Added 'split_quoted()' function to deal with strings that are quoted in
Unix shell-like syntax (eg. in Python's Makefile, for one thing -- now that I have this function, I'll probably allow quoted strings in config files too.
This commit is contained in:
parent
c3a43b4f9b
commit
6a2a3dbec5
|
@ -166,3 +166,70 @@ def grok_environment_error (exc, prefix="error: "):
|
||||||
error = prefix + str(exc[-1])
|
error = prefix + str(exc[-1])
|
||||||
|
|
||||||
return error
|
return error
|
||||||
|
|
||||||
|
|
||||||
|
# Needed by 'split_quoted()'
|
||||||
|
_wordchars_re = re.compile(r'[^\\\'\"\ ]*')
|
||||||
|
_squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
|
||||||
|
_dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
|
||||||
|
|
||||||
|
def split_quoted (s):
|
||||||
|
"""Split a string up according to Unix shell-like rules for quotes and
|
||||||
|
backslashes. In short: words are delimited by spaces, as long as those
|
||||||
|
spaces are not escaped by a backslash, or inside a quoted string.
|
||||||
|
Single and double quotes are equivalent, and the quote characters can
|
||||||
|
be backslash-escaped. The backslash is stripped from any two-character
|
||||||
|
escape sequence, leaving only the escaped character. The quote
|
||||||
|
characters are stripped from any quoted string. Returns a list of
|
||||||
|
words.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# This is a nice algorithm for splitting up a single string, since it
|
||||||
|
# doesn't require character-by-character examination. It was a little
|
||||||
|
# bit of a brain-bender to get it working right, though...
|
||||||
|
|
||||||
|
s = string.strip(s)
|
||||||
|
words = []
|
||||||
|
pos = 0
|
||||||
|
|
||||||
|
while s:
|
||||||
|
m = _wordchars_re.match(s, pos)
|
||||||
|
end = m.end()
|
||||||
|
if end == len(s):
|
||||||
|
words.append(s[:end])
|
||||||
|
break
|
||||||
|
|
||||||
|
if s[end] == ' ': # unescaped, unquoted space: now
|
||||||
|
words.append(s[:end]) # we definitely have a word delimiter
|
||||||
|
s = string.lstrip(s[end:])
|
||||||
|
pos = 0
|
||||||
|
|
||||||
|
elif s[end] == '\\': # preserve whatever is being escaped;
|
||||||
|
# will become part of the current word
|
||||||
|
s = s[:end] + s[end+1:]
|
||||||
|
pos = end+1
|
||||||
|
|
||||||
|
else:
|
||||||
|
if s[end] == "'": # slurp singly-quoted string
|
||||||
|
m = _squote_re.match(s, end)
|
||||||
|
elif s[end] == '"': # slurp doubly-quoted string
|
||||||
|
m = _dquote_re.match(s, end)
|
||||||
|
else:
|
||||||
|
raise RuntimeError, \
|
||||||
|
"this can't happen (bad char '%c')" % s[end]
|
||||||
|
|
||||||
|
if m is None:
|
||||||
|
raise ValueError, \
|
||||||
|
"bad string (mismatched %s quotes?)" % s[end]
|
||||||
|
|
||||||
|
(beg, end) = m.span()
|
||||||
|
s = s[:beg] + s[beg+1:end-1] + s[end:]
|
||||||
|
pos = m.end() - 2
|
||||||
|
|
||||||
|
if pos >= len(s):
|
||||||
|
words.append(s)
|
||||||
|
break
|
||||||
|
|
||||||
|
return words
|
||||||
|
|
||||||
|
# split_quoted ()
|
||||||
|
|
Loading…
Reference in New Issue