Added 'split_quoted()' function to deal with strings that are quoted in

Unix shell-like syntax (eg. in Python's Makefile, for one thing -- now that
I have this function, I'll probably allow quoted strings in config files too.
This commit is contained in:
Greg Ward 2000-06-24 20:40:02 +00:00
parent c3a43b4f9b
commit 6a2a3dbec5
1 changed files with 67 additions and 0 deletions

View File

@ -166,3 +166,70 @@ def grok_environment_error (exc, prefix="error: "):
error = prefix + str(exc[-1]) error = prefix + str(exc[-1])
return error return error
# Needed by 'split_quoted()'
_wordchars_re = re.compile(r'[^\\\'\"\ ]*')
_squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
_dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
def split_quoted (s):
"""Split a string up according to Unix shell-like rules for quotes and
backslashes. In short: words are delimited by spaces, as long as those
spaces are not escaped by a backslash, or inside a quoted string.
Single and double quotes are equivalent, and the quote characters can
be backslash-escaped. The backslash is stripped from any two-character
escape sequence, leaving only the escaped character. The quote
characters are stripped from any quoted string. Returns a list of
words.
"""
# This is a nice algorithm for splitting up a single string, since it
# doesn't require character-by-character examination. It was a little
# bit of a brain-bender to get it working right, though...
s = string.strip(s)
words = []
pos = 0
while s:
m = _wordchars_re.match(s, pos)
end = m.end()
if end == len(s):
words.append(s[:end])
break
if s[end] == ' ': # unescaped, unquoted space: now
words.append(s[:end]) # we definitely have a word delimiter
s = string.lstrip(s[end:])
pos = 0
elif s[end] == '\\': # preserve whatever is being escaped;
# will become part of the current word
s = s[:end] + s[end+1:]
pos = end+1
else:
if s[end] == "'": # slurp singly-quoted string
m = _squote_re.match(s, end)
elif s[end] == '"': # slurp doubly-quoted string
m = _dquote_re.match(s, end)
else:
raise RuntimeError, \
"this can't happen (bad char '%c')" % s[end]
if m is None:
raise ValueError, \
"bad string (mismatched %s quotes?)" % s[end]
(beg, end) = m.span()
s = s[:beg] + s[beg+1:end-1] + s[end:]
pos = m.end() - 2
if pos >= len(s):
words.append(s)
break
return words
# split_quoted ()