cpython/Lib/test/test_shlex.py

import io
import shlex
import string
import unittest


# The original test data set was from shellwords, by Hartmut Goebel.

data = r"""x|x|
foo bar|foo|bar|
 foo bar|foo|bar|
 foo bar |foo|bar|
foo   bar    bla     fasel|foo|bar|bla|fasel|
x y  z              xxxx|x|y|z|xxxx|
\x bar|\|x|bar|
\ x bar|\|x|bar|
\ bar|\|bar|
foo \x bar|foo|\|x|bar|
foo \ x bar|foo|\|x|bar|
foo \ bar|foo|\|bar|
foo "bar" bla|foo|"bar"|bla|
"foo" "bar" "bla"|"foo"|"bar"|"bla"|
"foo" bar "bla"|"foo"|bar|"bla"|
"foo" bar bla|"foo"|bar|bla|
foo 'bar' bla|foo|'bar'|bla|
'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
'foo' bar 'bla'|'foo'|bar|'bla'|
'foo' bar bla|'foo'|bar|bla|
blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
""|""|
''|''|
foo "" bar|foo|""|bar|
foo '' bar|foo|''|bar|
foo "" "" "" bar|foo|""|""|""|bar|
foo '' '' '' bar|foo|''|''|''|bar|
\""|\|""|
"\"|"\"|
"foo\ bar"|"foo\ bar"|
"foo\\ bar"|"foo\\ bar"|
"foo\\ bar\"|"foo\\ bar\"|
"foo\\" bar\""|"foo\\"|bar|\|""|
"foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
"foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
"foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
"foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
\''|\|''|
'foo\ bar'|'foo\ bar'|
'foo\\ bar'|'foo\\ bar'|
"foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
\"foo"|\|"foo"|
\"foo"\x|\|"foo"|\|x|
"foo\x"|"foo\x"|
"foo\ "|"foo\ "|
foo\ xx|foo|\|xx|
foo\ x\x|foo|\|x|\|x|
foo\ x\x\""|foo|\|x|\|x|\|""|
"foo\ x\x"|"foo\ x\x"|
"foo\ x\x\\"|"foo\ x\x\\"|
"foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
"foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
"foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
"foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
'foo\ bar'|'foo\ bar'|
'foo\\ bar'|'foo\\ bar'|
foo\ bar|foo|\|bar|
foo#bar\nbaz|foobaz|
:-) ;-)|:|-|)|;|-|)|
áéíóú|á|é|í|ó|ú|
"""

posix_data = r"""x|x|
foo bar|foo|bar|
 foo bar|foo|bar|
 foo bar |foo|bar|
foo   bar    bla     fasel|foo|bar|bla|fasel|
x y  z              xxxx|x|y|z|xxxx|
\x bar|x|bar|
\ x bar| x|bar|
\ bar| bar|
foo \x bar|foo|x|bar|
foo \ x bar|foo| x|bar|
foo \ bar|foo| bar|
foo "bar" bla|foo|bar|bla|
"foo" "bar" "bla"|foo|bar|bla|
"foo" bar "bla"|foo|bar|bla|
"foo" bar bla|foo|bar|bla|
foo 'bar' bla|foo|bar|bla|
'foo' 'bar' 'bla'|foo|bar|bla|
'foo' bar 'bla'|foo|bar|bla|
'foo' bar bla|foo|bar|bla|
blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
""||
''||
foo "" bar|foo||bar|
foo '' bar|foo||bar|
foo "" "" "" bar|foo||||bar|
foo '' '' '' bar|foo||||bar|
\"|"|
"\""|"|
"foo\ bar"|foo\ bar|
"foo\\ bar"|foo\ bar|
"foo\\ bar\""|foo\ bar"|
"foo\\" bar\"|foo\|bar"|
"foo\\ bar\" dfadf"|foo\ bar" dfadf|
"foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
"foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
"foo\x bar\" dfadf"|foo\x bar" dfadf|
\'|'|
'foo\ bar'|foo\ bar|
'foo\\ bar'|foo\\ bar|
"foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
\"foo|"foo|
\"foo\x|"foox|
"foo\x"|foo\x|
"foo\ "|foo\ |
foo\ xx|foo xx|
foo\ x\x|foo xx|
foo\ x\x\"|foo xx"|
"foo\ x\x"|foo\ x\x|
"foo\ x\x\\"|foo\ x\x\|
"foo\ x\x\\""foobar"|foo\ x\x\foobar|
"foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
"foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
"foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
'foo\ bar'|foo\ bar|
'foo\\ bar'|foo\\ bar|
foo\ bar|foo bar|
foo#bar\nbaz|foo|baz|
:-) ;-)|:-)|;-)|
áéíóú|áéíóú|
"""

class ShlexTest(unittest.TestCase):
    def setUp(self):
        self.data = [x.split("|")[:-1]
                     for x in data.splitlines()]
        self.posix_data = [x.split("|")[:-1]
                           for x in posix_data.splitlines()]
        for item in self.data:
            item[0] = item[0].replace(r"\n", "\n")
        for item in self.posix_data:
            item[0] = item[0].replace(r"\n", "\n")

    def splitTest(self, data, comments):
        for i in range(len(data)):
            l = shlex.split(data[i][0], comments=comments)
            self.assertEqual(l, data[i][1:],
                             "%s: %s != %s" %
                             (data[i][0], l, data[i][1:]))

    def oldSplit(self, s):
        ret = []
        lex = shlex.shlex(io.StringIO(s))
        tok = lex.get_token()
        while tok:
            ret.append(tok)
            tok = lex.get_token()
        return ret

    def testSplitPosix(self):
        """Test data splitting with posix parser"""
        self.splitTest(self.posix_data, comments=True)

    def testCompat(self):
        """Test compatibility interface"""
        for i in range(len(self.data)):
            l = self.oldSplit(self.data[i][0])
            self.assertEqual(l, self.data[i][1:],
                             "%s: %s != %s" %
                             (self.data[i][0], l, self.data[i][1:]))

    def testSyntaxSplitAmpersandAndPipe(self):
        """Test handling of syntax splitting of &, |"""
        # Could take these forms: &&, &, |&, ;&, ;;&
        # of course, the same applies to | and ||
        # these should all parse to the same output
        for delimiter in ('&&', '&', '|&', ';&', ';;&',
                          '||', '|', '&|', ';|', ';;|'):
            src = ['echo hi %s echo bye' % delimiter,
                   'echo hi%secho bye' % delimiter]
            ref = ['echo', 'hi', delimiter, 'echo', 'bye']
            for ss in src:
                s = shlex.shlex(ss, punctuation_chars=True)
                result = list(s)
                self.assertEqual(ref, result, "While splitting '%s'" % ss)

    def testSyntaxSplitSemicolon(self):
        """Test handling of syntax splitting of ;"""
        # Could take these forms: ;, ;;, ;&, ;;&
        # these should all parse to the same output
        for delimiter in (';', ';;', ';&', ';;&'):
            src = ['echo hi %s echo bye' % delimiter,
                   'echo hi%s echo bye' % delimiter,
                   'echo hi%secho bye' % delimiter]
            ref = ['echo', 'hi', delimiter, 'echo', 'bye']
            for ss in src:
                s = shlex.shlex(ss, punctuation_chars=True)
                result = list(s)
                self.assertEqual(ref, result, "While splitting '%s'" % ss)

    def testSyntaxSplitRedirect(self):
        """Test handling of syntax splitting of >"""
        # of course, the same applies to <, |
        # these should all parse to the same output
        for delimiter in ('<', '|'):
            src = ['echo hi %s out' % delimiter,
                   'echo hi%s out' % delimiter,
                   'echo hi%sout' % delimiter]
            ref = ['echo', 'hi', delimiter, 'out']
            for ss in src:
                s = shlex.shlex(ss, punctuation_chars=True)
                result = list(s)
                self.assertEqual(ref, result, "While splitting '%s'" % ss)

    def testSyntaxSplitParen(self):
        """Test handling of syntax splitting of ()"""
        # these should all parse to the same output
        src = ['( echo hi )',
               '(echo hi)']
        ref = ['(', 'echo', 'hi', ')']
        for ss in src:
            s = shlex.shlex(ss, punctuation_chars=True)
            result = list(s)
            self.assertEqual(ref, result, "While splitting '%s'" % ss)

    def testSyntaxSplitCustom(self):
        """Test handling of syntax splitting with custom chars"""
        ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
        ss = "~/a && b-c --color=auto || d *.py?"
        s = shlex.shlex(ss, punctuation_chars="|")
        result = list(s)
        self.assertEqual(ref, result, "While splitting '%s'" % ss)

    def testTokenTypes(self):
        """Test that tokens are split with types as expected."""
        for source, expected in (
                                ('a && b || c',
                                 [('a', 'a'), ('&&', 'c'), ('b', 'a'),
                                  ('||', 'c'), ('c', 'a')]),
                              ):
            s = shlex.shlex(source, punctuation_chars=True)
            observed = []
            while True:
                t = s.get_token()
                if t == s.eof:
                    break
                if t[0] in s.punctuation_chars:
                    tt = 'c'
                else:
                    tt = 'a'
                observed.append((t, tt))
            self.assertEqual(observed, expected)

    def testPunctuationInWordChars(self):
        """Test that any punctuation chars are removed from wordchars"""
        s = shlex.shlex('a_b__c', punctuation_chars='_')
        self.assertNotIn('_', s.wordchars)
        self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])

    def testPunctuationWithWhitespaceSplit(self):
        """Test that with whitespace_split, behaviour is as expected"""
        s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
        # whitespace_split is False, so splitting will be based on
        # punctuation_chars
        self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
        s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
        s.whitespace_split = True
        # whitespace_split is True, so splitting will be based on
        # white space
        self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])

    def testEmptyStringHandling(self):
        """Test that parsing of empty strings is correctly handled."""
        # see Issue #21999
        expected = ['', ')', 'abc']
        for punct in (False, True):
            s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
            slist = list(s)
            self.assertEqual(slist, expected)
        expected = ["''", ')', 'abc']
        s = shlex.shlex("'')abc", punctuation_chars=True)
        self.assertEqual(list(s), expected)

    def testQuote(self):
        safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
        unicode_sample = '\xe9\xe0\xdf'  # e + acute accent, a + grave, sharp s
        unsafe = '"`$\\!' + unicode_sample

        self.assertEqual(shlex.quote(''), "''")
        self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
        self.assertEqual(shlex.quote('test file name'), "'test file name'")
        for u in unsafe:
            self.assertEqual(shlex.quote('test%sname' % u),
                             "'test%sname'" % u)
        for u in unsafe:
            self.assertEqual(shlex.quote("test%s'name'" % u),
                             "'test%s'\"'\"'name'\"'\"''" % u)

# Allow this test to be used with old shlex.py
if not getattr(shlex, "split", None):
    for methname in dir(ShlexTest):
        if methname.startswith("test") and methname != "testCompat":
            delattr(ShlexTest, methname)

if __name__ == "__main__":
    unittest.main()
Add shlex.quote function, to escape filenames and command lines (#9723). This function used to live as pipes.quote, where it was undocumented but used anyway. (An alias still exists for backward compatibility.) The tests have been moved as is, but the code of the function was changed to use a regex instead of a loop with string comparisons (at Ian Bicking’s suggestion). I’m terrible at regexes, so any feedback is welcome. 2011-07-27 13:29:31 -03:00			`import io`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`import shlex`
Add shlex.quote function, to escape filenames and command lines (#9723). This function used to live as pipes.quote, where it was undocumented but used anyway. (An alias still exists for backward compatibility.) The tests have been moved as is, but the code of the function was changed to use a regex instead of a loop with string comparisons (at Ian Bicking’s suggestion). I’m terrible at regexes, so any feedback is welcome. 2011-07-27 13:29:31 -03:00			`import string`
			`import unittest`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00
Get test to work under regrtest when running whole suite 2003-04-17 20:04:22 -03:00
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00
			`# The original test data set was from shellwords, by Hartmut Goebel.`

			`data = r"""x\|x\|`
			`foo bar\|foo\|bar\|`
			`foo bar\|foo\|bar\|`
			`foo bar \|foo\|bar\|`
			`foo bar bla fasel\|foo\|bar\|bla\|fasel\|`
			`x y z xxxx\|x\|y\|z\|xxxx\|`
			`\x bar\|\\|x\|bar\|`
			`\ x bar\|\\|x\|bar\|`
			`\ bar\|\\|bar\|`
			`foo \x bar\|foo\|\\|x\|bar\|`
			`foo \ x bar\|foo\|\\|x\|bar\|`
			`foo \ bar\|foo\|\\|bar\|`
			`foo "bar" bla\|foo\|"bar"\|bla\|`
			`"foo" "bar" "bla"\|"foo"\|"bar"\|"bla"\|`
			`"foo" bar "bla"\|"foo"\|bar\|"bla"\|`
			`"foo" bar bla\|"foo"\|bar\|bla\|`
			`foo 'bar' bla\|foo\|'bar'\|bla\|`
			`'foo' 'bar' 'bla'\|'foo'\|'bar'\|'bla'\|`
			`'foo' bar 'bla'\|'foo'\|bar\|'bla'\|`
			`'foo' bar bla\|'foo'\|bar\|bla\|`
			`blurb foo"bar"bar"fasel" baz\|blurb\|foo"bar"bar"fasel"\|baz\|`
			`blurb foo'bar'bar'fasel' baz\|blurb\|foo'bar'bar'fasel'\|baz\|`
			`""\|""\|`
			`''\|''\|`
			`foo "" bar\|foo\|""\|bar\|`
			`foo '' bar\|foo\|''\|bar\|`
			`foo "" "" "" bar\|foo\|""\|""\|""\|bar\|`
			`foo '' '' '' bar\|foo\|''\|''\|''\|bar\|`
			`\""\|\\|""\|`
			`"\"\|"\"\|`
			`"foo\ bar"\|"foo\ bar"\|`
			`"foo\\ bar"\|"foo\\ bar"\|`
			`"foo\\ bar\"\|"foo\\ bar\"\|`
			`"foo\\" bar\""\|"foo\\"\|bar\|\\|""\|`
			`"foo\\ bar\" dfadf"\|"foo\\ bar\"\|dfadf"\|`
			`"foo\\\ bar\" dfadf"\|"foo\\\ bar\"\|dfadf"\|`
			`"foo\\\x bar\" dfadf"\|"foo\\\x bar\"\|dfadf"\|`
			`"foo\x bar\" dfadf"\|"foo\x bar\"\|dfadf"\|`
			`\''\|\\|''\|`
			`'foo\ bar'\|'foo\ bar'\|`
			`'foo\\ bar'\|'foo\\ bar'\|`
			`"foo\\\x bar\" df'a\ 'df'\|"foo\\\x bar\"\|df'a\|\\|'df'\|`
			`\"foo"\|\\|"foo"\|`
			`\"foo"\x\|\\|"foo"\|\\|x\|`
			`"foo\x"\|"foo\x"\|`
			`"foo\ "\|"foo\ "\|`
			`foo\ xx\|foo\|\\|xx\|`
			`foo\ x\x\|foo\|\\|x\|\\|x\|`
			`foo\ x\x\""\|foo\|\\|x\|\\|x\|\\|""\|`
			`"foo\ x\x"\|"foo\ x\x"\|`
			`"foo\ x\x\\"\|"foo\ x\x\\"\|`
			`"foo\ x\x\\""foobar"\|"foo\ x\x\\"\|"foobar"\|`
			`"foo\ x\x\\"\''"foobar"\|"foo\ x\x\\"\|\\|''\|"foobar"\|`
			`"foo\ x\x\\"\'"fo'obar"\|"foo\ x\x\\"\|\\|'"fo'\|obar"\|`
			`"foo\ x\x\\"\'"fo'obar" 'don'\''t'\|"foo\ x\x\\"\|\\|'"fo'\|obar"\|'don'\|\\|''\|t'\|`
			`'foo\ bar'\|'foo\ bar'\|`
			`'foo\\ bar'\|'foo\\ bar'\|`
			`foo\ bar\|foo\|\\|bar\|`
			`foo#bar\nbaz\|foobaz\|`
			`:-) ;-)\|:\|-\|)\|;\|-\|)\|`
Recode modules from latin-1 to utf-8 2010-10-27 15:52:48 -03:00			`áéíóú\|á\|é\|í\|ó\|ú\|`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`"""`

			`posix_data = r"""x\|x\|`
			`foo bar\|foo\|bar\|`
			`foo bar\|foo\|bar\|`
			`foo bar \|foo\|bar\|`
			`foo bar bla fasel\|foo\|bar\|bla\|fasel\|`
			`x y z xxxx\|x\|y\|z\|xxxx\|`
			`\x bar\|x\|bar\|`
			`\ x bar\| x\|bar\|`
			`\ bar\| bar\|`
			`foo \x bar\|foo\|x\|bar\|`
			`foo \ x bar\|foo\| x\|bar\|`
			`foo \ bar\|foo\| bar\|`
			`foo "bar" bla\|foo\|bar\|bla\|`
			`"foo" "bar" "bla"\|foo\|bar\|bla\|`
			`"foo" bar "bla"\|foo\|bar\|bla\|`
			`"foo" bar bla\|foo\|bar\|bla\|`
			`foo 'bar' bla\|foo\|bar\|bla\|`
			`'foo' 'bar' 'bla'\|foo\|bar\|bla\|`
			`'foo' bar 'bla'\|foo\|bar\|bla\|`
			`'foo' bar bla\|foo\|bar\|bla\|`
			`blurb foo"bar"bar"fasel" baz\|blurb\|foobarbarfasel\|baz\|`
			`blurb foo'bar'bar'fasel' baz\|blurb\|foobarbarfasel\|baz\|`
			`""\|\|`
			`''\|\|`
			`foo "" bar\|foo\|\|bar\|`
			`foo '' bar\|foo\|\|bar\|`
			`foo "" "" "" bar\|foo\|\|\|\|bar\|`
			`foo '' '' '' bar\|foo\|\|\|\|bar\|`
			`\"\|"\|`
			`"\""\|"\|`
			`"foo\ bar"\|foo\ bar\|`
			`"foo\\ bar"\|foo\ bar\|`
			`"foo\\ bar\""\|foo\ bar"\|`
			`"foo\\" bar\"\|foo\\|bar"\|`
			`"foo\\ bar\" dfadf"\|foo\ bar" dfadf\|`
			`"foo\\\ bar\" dfadf"\|foo\\ bar" dfadf\|`
			`"foo\\\x bar\" dfadf"\|foo\\x bar" dfadf\|`
			`"foo\x bar\" dfadf"\|foo\x bar" dfadf\|`
			`\'\|'\|`
			`'foo\ bar'\|foo\ bar\|`
			`'foo\\ bar'\|foo\\ bar\|`
			`"foo\\\x bar\" df'a\ 'df"\|foo\\x bar" df'a\ 'df\|`
			`\"foo\|"foo\|`
			`\"foo\x\|"foox\|`
			`"foo\x"\|foo\x\|`
			`"foo\ "\|foo\ \|`
			`foo\ xx\|foo xx\|`
			`foo\ x\x\|foo xx\|`
			`foo\ x\x\"\|foo xx"\|`
			`"foo\ x\x"\|foo\ x\x\|`
			`"foo\ x\x\\"\|foo\ x\x\\|`
			`"foo\ x\x\\""foobar"\|foo\ x\x\foobar\|`
			`"foo\ x\x\\"\'"foobar"\|foo\ x\x\'foobar\|`
			`"foo\ x\x\\"\'"fo'obar"\|foo\ x\x\'fo'obar\|`
			`"foo\ x\x\\"\'"fo'obar" 'don'\''t'\|foo\ x\x\'fo'obar\|don't\|`
			`"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\\|foo\ x\x\'fo'obar\|don't\|\\|`
			`'foo\ bar'\|foo\ bar\|`
			`'foo\\ bar'\|foo\\ bar\|`
			`foo\ bar\|foo bar\|`
			`foo#bar\nbaz\|foo\|baz\|`
			`:-) ;-)\|:-)\|;-)\|`
Recode modules from latin-1 to utf-8 2010-10-27 15:52:48 -03:00			`áéíóú\|áéíóú\|`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`"""`

			`class ShlexTest(unittest.TestCase):`
			`def setUp(self):`
			`self.data = [x.split("\|")[:-1]`
			`for x in data.splitlines()]`
			`self.posix_data = [x.split("\|")[:-1]`
			`for x in posix_data.splitlines()]`
			`for item in self.data:`
			`item[0] = item[0].replace(r"\n", "\n")`
			`for item in self.posix_data:`
			`item[0] = item[0].replace(r"\n", "\n")`

- Changed shlex.split() method to have more useful and meaningful parameters. 2003-04-19 22:57:03 -03:00			`def splitTest(self, data, comments):`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`for i in range(len(data)):`
- Changed shlex.split() method to have more useful and meaningful parameters. 2003-04-19 22:57:03 -03:00			`l = shlex.split(data[i][0], comments=comments)`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`self.assertEqual(l, data[i][1:],`
			`"%s: %s != %s" %`
			`(data[i][0], l, data[i][1:]))`

			`def oldSplit(self, s):`
			`ret = []`
Fix test_shlex: Use io.StringIO. 2007-06-12 14:43:43 -03:00			`lex = shlex.shlex(io.StringIO(s))`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`tok = lex.get_token()`
			`while tok:`
			`ret.append(tok)`
			`tok = lex.get_token()`
			`return ret`
Whitespace normalization. 2003-04-24 13:02:54 -03:00
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`def testSplitPosix(self):`
			`"""Test data splitting with posix parser"""`
Whitespace normalization. 2003-04-24 13:02:54 -03:00			`self.splitTest(self.posix_data, comments=True)`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00
			`def testCompat(self):`
			`"""Test compatibility interface"""`
			`for i in range(len(self.data)):`
			`l = self.oldSplit(self.data[i][0])`
			`self.assertEqual(l, self.data[i][1:],`
			`"%s: %s != %s" %`
			`(self.data[i][0], l, self.data[i][1:]))`

Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00			`def testSyntaxSplitAmpersandAndPipe(self):`
			`"""Test handling of syntax splitting of &, \|"""`
			`# Could take these forms: &&, &, \|&, ;&, ;;&`
			`# of course, the same applies to \| and \|\|`
			`# these should all parse to the same output`
			`for delimiter in ('&&', '&', '\|&', ';&', ';;&',`
			`'\|\|', '\|', '&\|', ';\|', ';;\|'):`
			`src = ['echo hi %s echo bye' % delimiter,`
			`'echo hi%secho bye' % delimiter]`
			`ref = ['echo', 'hi', delimiter, 'echo', 'bye']`
			`for ss in src:`
			`s = shlex.shlex(ss, punctuation_chars=True)`
			`result = list(s)`
			`self.assertEqual(ref, result, "While splitting '%s'" % ss)`

			`def testSyntaxSplitSemicolon(self):`
			`"""Test handling of syntax splitting of ;"""`
			`# Could take these forms: ;, ;;, ;&, ;;&`
			`# these should all parse to the same output`
			`for delimiter in (';', ';;', ';&', ';;&'):`
			`src = ['echo hi %s echo bye' % delimiter,`
			`'echo hi%s echo bye' % delimiter,`
			`'echo hi%secho bye' % delimiter]`
			`ref = ['echo', 'hi', delimiter, 'echo', 'bye']`
			`for ss in src:`
			`s = shlex.shlex(ss, punctuation_chars=True)`
			`result = list(s)`
			`self.assertEqual(ref, result, "While splitting '%s'" % ss)`

			`def testSyntaxSplitRedirect(self):`
			`"""Test handling of syntax splitting of >"""`
			`# of course, the same applies to <, \|`
			`# these should all parse to the same output`
			`for delimiter in ('<', '\|'):`
			`src = ['echo hi %s out' % delimiter,`
			`'echo hi%s out' % delimiter,`
			`'echo hi%sout' % delimiter]`
			`ref = ['echo', 'hi', delimiter, 'out']`
			`for ss in src:`
			`s = shlex.shlex(ss, punctuation_chars=True)`
			`result = list(s)`
			`self.assertEqual(ref, result, "While splitting '%s'" % ss)`

			`def testSyntaxSplitParen(self):`
			`"""Test handling of syntax splitting of ()"""`
			`# these should all parse to the same output`
			`src = ['( echo hi )',`
			`'(echo hi)']`
			`ref = ['(', 'echo', 'hi', ')']`
			`for ss in src:`
			`s = shlex.shlex(ss, punctuation_chars=True)`
			`result = list(s)`
			`self.assertEqual(ref, result, "While splitting '%s'" % ss)`

			`def testSyntaxSplitCustom(self):`
			`"""Test handling of syntax splitting with custom chars"""`
			`ref = ['~/a', '&', '&', 'b-c', '--color=auto', '\|\|', 'd', '*.py?']`
			`ss = "~/a && b-c --color=auto \|\| d *.py?"`
			`s = shlex.shlex(ss, punctuation_chars="\|")`
			`result = list(s)`
			`self.assertEqual(ref, result, "While splitting '%s'" % ss)`

			`def testTokenTypes(self):`
			`"""Test that tokens are split with types as expected."""`
			`for source, expected in (`
			`('a && b \|\| c',`
			`[('a', 'a'), ('&&', 'c'), ('b', 'a'),`
			`('\|\|', 'c'), ('c', 'a')]),`
			`):`
			`s = shlex.shlex(source, punctuation_chars=True)`
			`observed = []`
			`while True:`
			`t = s.get_token()`
			`if t == s.eof:`
			`break`
			`if t[0] in s.punctuation_chars:`
			`tt = 'c'`
			`else:`
			`tt = 'a'`
			`observed.append((t, tt))`
			`self.assertEqual(observed, expected)`

			`def testPunctuationInWordChars(self):`
			`"""Test that any punctuation chars are removed from wordchars"""`
			`s = shlex.shlex('a_b__c', punctuation_chars='_')`
			`self.assertNotIn('_', s.wordchars)`
			`self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])`

			`def testPunctuationWithWhitespaceSplit(self):`
			`"""Test that with whitespace_split, behaviour is as expected"""`
			`s = shlex.shlex('a && b \|\| c', punctuation_chars='&')`
			`# whitespace_split is False, so splitting will be based on`
			`# punctuation_chars`
			`self.assertEqual(list(s), ['a', '&&', 'b', '\|', '\|', 'c'])`
			`s = shlex.shlex('a && b \|\| c', punctuation_chars='&')`
			`s.whitespace_split = True`
			`# whitespace_split is True, so splitting will be based on`
			`# white space`
			`self.assertEqual(list(s), ['a', '&&', 'b', '\|\|', 'c'])`

			`def testEmptyStringHandling(self):`
			`"""Test that parsing of empty strings is correctly handled."""`
			`# see Issue #21999`
			`expected = ['', ')', 'abc']`
			`for punct in (False, True):`
			`s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)`
			`slist = list(s)`
			`self.assertEqual(slist, expected)`
			`expected = ["''", ')', 'abc']`
			`s = shlex.shlex("'')abc", punctuation_chars=True)`
			`self.assertEqual(list(s), expected)`

Add shlex.quote function, to escape filenames and command lines (#9723). This function used to live as pipes.quote, where it was undocumented but used anyway. (An alias still exists for backward compatibility.) The tests have been moved as is, but the code of the function was changed to use a regex instead of a loop with string comparisons (at Ian Bicking’s suggestion). I’m terrible at regexes, so any feedback is welcome. 2011-07-27 13:29:31 -03:00			`def testQuote(self):`
			`safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'`
Avoid unwanted behavior change in shlex.quote (see #9723). I simplified the quote code to use a regex instead of a loop+test when I moved pipes.quote to shlex in 5966eeb0457d; Ezio Melotti pointed out that my regex contained redundant parts (now removed) and allowed non-ASCII characters (now disallowed). I think common UNIX shells don’t quote non-ASCII characters, but there’s no harm in doing so. We’ll see if users request a change. 2011-08-09 18:18:06 -03:00			`unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s`
			unsafe = '"`$\\!' + unicode_sample
Add shlex.quote function, to escape filenames and command lines (#9723). This function used to live as pipes.quote, where it was undocumented but used anyway. (An alias still exists for backward compatibility.) The tests have been moved as is, but the code of the function was changed to use a regex instead of a loop with string comparisons (at Ian Bicking’s suggestion). I’m terrible at regexes, so any feedback is welcome. 2011-07-27 13:29:31 -03:00
			`self.assertEqual(shlex.quote(''), "''")`
			`self.assertEqual(shlex.quote(safeunquoted), safeunquoted)`
			`self.assertEqual(shlex.quote('test file name'), "'test file name'")`
			`for u in unsafe:`
			`self.assertEqual(shlex.quote('test%sname' % u),`
			`"'test%sname'" % u)`
			`for u in unsafe:`
			`self.assertEqual(shlex.quote("test%s'name'" % u),`
			`"'test%s'\"'\"'name'\"'\"''" % u)`

Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`# Allow this test to be used with old shlex.py`
			`if not getattr(shlex, "split", None):`
			`for methname in dir(ShlexTest):`
			`if methname.startswith("test") and methname != "testCompat":`
			`delattr(ShlexTest, methname)`

			`if __name__ == "__main__":`
Issue #21741: Update 147 test modules to use test discovery. I have compared output between pre- and post-patch runs of these tests to make sure there's nothing missing and nothing broken, on both Windows and Linux. The only differences I found were actually tests that were previously not run. 2015-04-13 17:00:43 -03:00			`unittest.main()`