Added 'split_quoted()' function to deal with strings that are quoted in
Unix shell-like syntax (eg. in Python's Makefile, for one thing -- now that I have this function, I'll probably allow quoted strings in config files too.
This commit is contained in:
parent
c3a43b4f9b
commit
6a2a3dbec5
|
@ -166,3 +166,70 @@ def grok_environment_error (exc, prefix="error: "):
|
|||
error = prefix + str(exc[-1])
|
||||
|
||||
return error
|
||||
|
||||
|
||||
# Needed by 'split_quoted()'
|
||||
_wordchars_re = re.compile(r'[^\\\'\"\ ]*')
|
||||
_squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
|
||||
_dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
|
||||
|
||||
def split_quoted (s):
|
||||
"""Split a string up according to Unix shell-like rules for quotes and
|
||||
backslashes. In short: words are delimited by spaces, as long as those
|
||||
spaces are not escaped by a backslash, or inside a quoted string.
|
||||
Single and double quotes are equivalent, and the quote characters can
|
||||
be backslash-escaped. The backslash is stripped from any two-character
|
||||
escape sequence, leaving only the escaped character. The quote
|
||||
characters are stripped from any quoted string. Returns a list of
|
||||
words.
|
||||
"""
|
||||
|
||||
# This is a nice algorithm for splitting up a single string, since it
|
||||
# doesn't require character-by-character examination. It was a little
|
||||
# bit of a brain-bender to get it working right, though...
|
||||
|
||||
s = string.strip(s)
|
||||
words = []
|
||||
pos = 0
|
||||
|
||||
while s:
|
||||
m = _wordchars_re.match(s, pos)
|
||||
end = m.end()
|
||||
if end == len(s):
|
||||
words.append(s[:end])
|
||||
break
|
||||
|
||||
if s[end] == ' ': # unescaped, unquoted space: now
|
||||
words.append(s[:end]) # we definitely have a word delimiter
|
||||
s = string.lstrip(s[end:])
|
||||
pos = 0
|
||||
|
||||
elif s[end] == '\\': # preserve whatever is being escaped;
|
||||
# will become part of the current word
|
||||
s = s[:end] + s[end+1:]
|
||||
pos = end+1
|
||||
|
||||
else:
|
||||
if s[end] == "'": # slurp singly-quoted string
|
||||
m = _squote_re.match(s, end)
|
||||
elif s[end] == '"': # slurp doubly-quoted string
|
||||
m = _dquote_re.match(s, end)
|
||||
else:
|
||||
raise RuntimeError, \
|
||||
"this can't happen (bad char '%c')" % s[end]
|
||||
|
||||
if m is None:
|
||||
raise ValueError, \
|
||||
"bad string (mismatched %s quotes?)" % s[end]
|
||||
|
||||
(beg, end) = m.span()
|
||||
s = s[:beg] + s[beg+1:end-1] + s[end:]
|
||||
pos = m.end() - 2
|
||||
|
||||
if pos >= len(s):
|
||||
words.append(s)
|
||||
break
|
||||
|
||||
return words
|
||||
|
||||
# split_quoted ()
|
||||
|
|
Loading…
Reference in New Issue