cpython/Lib/test/test_shlex.py

import io
import itertools
import shlex
import string
import unittest


# The original test data set was from shellwords, by Hartmut Goebel.

data = r"""x|x|
foo bar|foo|bar|
 foo bar|foo|bar|
 foo bar |foo|bar|
foo   bar    bla     fasel|foo|bar|bla|fasel|
x y  z              xxxx|x|y|z|xxxx|
\x bar|\|x|bar|
\ x bar|\|x|bar|
\ bar|\|bar|
foo \x bar|foo|\|x|bar|
foo \ x bar|foo|\|x|bar|
foo \ bar|foo|\|bar|
foo "bar" bla|foo|"bar"|bla|
"foo" "bar" "bla"|"foo"|"bar"|"bla"|
"foo" bar "bla"|"foo"|bar|"bla"|
"foo" bar bla|"foo"|bar|bla|
foo 'bar' bla|foo|'bar'|bla|
'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
'foo' bar 'bla'|'foo'|bar|'bla'|
'foo' bar bla|'foo'|bar|bla|
blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
""|""|
''|''|
foo "" bar|foo|""|bar|
foo '' bar|foo|''|bar|
foo "" "" "" bar|foo|""|""|""|bar|
foo '' '' '' bar|foo|''|''|''|bar|
\""|\|""|
"\"|"\"|
"foo\ bar"|"foo\ bar"|
"foo\\ bar"|"foo\\ bar"|
"foo\\ bar\"|"foo\\ bar\"|
"foo\\" bar\""|"foo\\"|bar|\|""|
"foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
"foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
"foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
"foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
\''|\|''|
'foo\ bar'|'foo\ bar'|
'foo\\ bar'|'foo\\ bar'|
"foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
\"foo"|\|"foo"|
\"foo"\x|\|"foo"|\|x|
"foo\x"|"foo\x"|
"foo\ "|"foo\ "|
foo\ xx|foo|\|xx|
foo\ x\x|foo|\|x|\|x|
foo\ x\x\""|foo|\|x|\|x|\|""|
"foo\ x\x"|"foo\ x\x"|
"foo\ x\x\\"|"foo\ x\x\\"|
"foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
"foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
"foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
"foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
'foo\ bar'|'foo\ bar'|
'foo\\ bar'|'foo\\ bar'|
foo\ bar|foo|\|bar|
foo#bar\nbaz|foobaz|
:-) ;-)|:|-|)|;|-|)|
áéíóú|á|é|í|ó|ú|
"""

posix_data = r"""x|x|
foo bar|foo|bar|
 foo bar|foo|bar|
 foo bar |foo|bar|
foo   bar    bla     fasel|foo|bar|bla|fasel|
x y  z              xxxx|x|y|z|xxxx|
\x bar|x|bar|
\ x bar| x|bar|
\ bar| bar|
foo \x bar|foo|x|bar|
foo \ x bar|foo| x|bar|
foo \ bar|foo| bar|
foo "bar" bla|foo|bar|bla|
"foo" "bar" "bla"|foo|bar|bla|
"foo" bar "bla"|foo|bar|bla|
"foo" bar bla|foo|bar|bla|
foo 'bar' bla|foo|bar|bla|
'foo' 'bar' 'bla'|foo|bar|bla|
'foo' bar 'bla'|foo|bar|bla|
'foo' bar bla|foo|bar|bla|
blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
""||
''||
foo "" bar|foo||bar|
foo '' bar|foo||bar|
foo "" "" "" bar|foo||||bar|
foo '' '' '' bar|foo||||bar|
\"|"|
"\""|"|
"foo\ bar"|foo\ bar|
"foo\\ bar"|foo\ bar|
"foo\\ bar\""|foo\ bar"|
"foo\\" bar\"|foo\|bar"|
"foo\\ bar\" dfadf"|foo\ bar" dfadf|
"foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
"foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
"foo\x bar\" dfadf"|foo\x bar" dfadf|
\'|'|
'foo\ bar'|foo\ bar|
'foo\\ bar'|foo\\ bar|
"foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
\"foo|"foo|
\"foo\x|"foox|
"foo\x"|foo\x|
"foo\ "|foo\ |
foo\ xx|foo xx|
foo\ x\x|foo xx|
foo\ x\x\"|foo xx"|
"foo\ x\x"|foo\ x\x|
"foo\ x\x\\"|foo\ x\x\|
"foo\ x\x\\""foobar"|foo\ x\x\foobar|
"foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
"foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
"foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
'foo\ bar'|foo\ bar|
'foo\\ bar'|foo\\ bar|
foo\ bar|foo bar|
foo#bar\nbaz|foo|baz|
:-) ;-)|:-)|;-)|
áéíóú|áéíóú|
"""

class ShlexTest(unittest.TestCase):
    def setUp(self):
        self.data = [x.split("|")[:-1]
                     for x in data.splitlines()]
        self.posix_data = [x.split("|")[:-1]
                           for x in posix_data.splitlines()]
        for item in self.data:
            item[0] = item[0].replace(r"\n", "\n")
        for item in self.posix_data:
            item[0] = item[0].replace(r"\n", "\n")

    def splitTest(self, data, comments):
        for i in range(len(data)):
            l = shlex.split(data[i][0], comments=comments)
            self.assertEqual(l, data[i][1:],
                             "%s: %s != %s" %
                             (data[i][0], l, data[i][1:]))

    def oldSplit(self, s):
        ret = []
        lex = shlex.shlex(io.StringIO(s))
        tok = lex.get_token()
        while tok:
            ret.append(tok)
            tok = lex.get_token()
        return ret

    def testSplitPosix(self):
        """Test data splitting with posix parser"""
        self.splitTest(self.posix_data, comments=True)

    def testCompat(self):
        """Test compatibility interface"""
        for i in range(len(self.data)):
            l = self.oldSplit(self.data[i][0])
            self.assertEqual(l, self.data[i][1:],
                             "%s: %s != %s" %
                             (self.data[i][0], l, self.data[i][1:]))

    def testSyntaxSplitAmpersandAndPipe(self):
        """Test handling of syntax splitting of &, |"""
        # Could take these forms: &&, &, |&, ;&, ;;&
        # of course, the same applies to | and ||
        # these should all parse to the same output
        for delimiter in ('&&', '&', '|&', ';&', ';;&',
                          '||', '|', '&|', ';|', ';;|'):
            src = ['echo hi %s echo bye' % delimiter,
                   'echo hi%secho bye' % delimiter]
            ref = ['echo', 'hi', delimiter, 'echo', 'bye']
            for ss, ws in itertools.product(src, (False, True)):
                s = shlex.shlex(ss, punctuation_chars=True)
                s.whitespace_split = ws
                result = list(s)
                self.assertEqual(ref, result,
                                 "While splitting '%s' [ws=%s]" % (ss, ws))

    def testSyntaxSplitSemicolon(self):
        """Test handling of syntax splitting of ;"""
        # Could take these forms: ;, ;;, ;&, ;;&
        # these should all parse to the same output
        for delimiter in (';', ';;', ';&', ';;&'):
            src = ['echo hi %s echo bye' % delimiter,
                   'echo hi%s echo bye' % delimiter,
                   'echo hi%secho bye' % delimiter]
            ref = ['echo', 'hi', delimiter, 'echo', 'bye']
            for ss, ws in itertools.product(src, (False, True)):
                s = shlex.shlex(ss, punctuation_chars=True)
                s.whitespace_split = ws
                result = list(s)
                self.assertEqual(ref, result,
                                 "While splitting '%s' [ws=%s]" % (ss, ws))

    def testSyntaxSplitRedirect(self):
        """Test handling of syntax splitting of >"""
        # of course, the same applies to <, |
        # these should all parse to the same output
        for delimiter in ('<', '|'):
            src = ['echo hi %s out' % delimiter,
                   'echo hi%s out' % delimiter,
                   'echo hi%sout' % delimiter]
            ref = ['echo', 'hi', delimiter, 'out']
            for ss, ws in itertools.product(src, (False, True)):
                s = shlex.shlex(ss, punctuation_chars=True)
                result = list(s)
                self.assertEqual(ref, result,
                                 "While splitting '%s' [ws=%s]" % (ss, ws))

    def testSyntaxSplitParen(self):
        """Test handling of syntax splitting of ()"""
        # these should all parse to the same output
        src = ['( echo hi )',
               '(echo hi)']
        ref = ['(', 'echo', 'hi', ')']
        for ss, ws in itertools.product(src, (False, True)):
            s = shlex.shlex(ss, punctuation_chars=True)
            s.whitespace_split = ws
            result = list(s)
            self.assertEqual(ref, result,
                             "While splitting '%s' [ws=%s]" % (ss, ws))

    def testSyntaxSplitCustom(self):
        """Test handling of syntax splitting with custom chars"""
        ss = "~/a&&b-c --color=auto||d *.py?"
        ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
        s = shlex.shlex(ss, punctuation_chars="|")
        result = list(s)
        self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss)
        ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?']
        s = shlex.shlex(ss, punctuation_chars="|")
        s.whitespace_split = True
        result = list(s)
        self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss)

    def testTokenTypes(self):
        """Test that tokens are split with types as expected."""
        for source, expected in (
                                ('a && b || c',
                                 [('a', 'a'), ('&&', 'c'), ('b', 'a'),
                                  ('||', 'c'), ('c', 'a')]),
                              ):
            s = shlex.shlex(source, punctuation_chars=True)
            observed = []
            while True:
                t = s.get_token()
                if t == s.eof:
                    break
                if t[0] in s.punctuation_chars:
                    tt = 'c'
                else:
                    tt = 'a'
                observed.append((t, tt))
            self.assertEqual(observed, expected)

    def testPunctuationInWordChars(self):
        """Test that any punctuation chars are removed from wordchars"""
        s = shlex.shlex('a_b__c', punctuation_chars='_')
        self.assertNotIn('_', s.wordchars)
        self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])

    def testPunctuationWithWhitespaceSplit(self):
        """Test that with whitespace_split, behaviour is as expected"""
        s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
        # whitespace_split is False, so splitting will be based on
        # punctuation_chars
        self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
        s = shlex.shlex('a  && b  ||  c', punctuation_chars='&')
        s.whitespace_split = True
        # whitespace_split is True, so splitting will be based on
        # white space
        self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])

    def testPunctuationWithPosix(self):
        """Test that punctuation_chars and posix behave correctly together."""
        # see Issue #29132
        s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)
        self.assertEqual(list(s), ['f', '>', 'abc'])
        s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)
        self.assertEqual(list(s), ['f', '>', '"abc"'])

    def testEmptyStringHandling(self):
        """Test that parsing of empty strings is correctly handled."""
        # see Issue #21999
        expected = ['', ')', 'abc']
        for punct in (False, True):
            s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
            slist = list(s)
            self.assertEqual(slist, expected)
        expected = ["''", ')', 'abc']
        s = shlex.shlex("'')abc", punctuation_chars=True)
        self.assertEqual(list(s), expected)

    def testUnicodeHandling(self):
        """Test punctuation_chars and whitespace_split handle unicode."""
        ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24"
        # Should be parsed as one complete token (whitespace_split=True).
        ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24']
        s = shlex.shlex(ss, punctuation_chars=True)
        s.whitespace_split = True
        self.assertEqual(list(s), ref)
        # Without whitespace_split, uses wordchars and splits on all.
        ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24']
        s = shlex.shlex(ss, punctuation_chars=True)
        self.assertEqual(list(s), ref)

    def testQuote(self):
        safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
        unicode_sample = '\xe9\xe0\xdf'  # e + acute accent, a + grave, sharp s
        unsafe = '"`$\\!' + unicode_sample

        self.assertEqual(shlex.quote(''), "''")
        self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
        self.assertEqual(shlex.quote('test file name'), "'test file name'")
        for u in unsafe:
            self.assertEqual(shlex.quote('test%sname' % u),
                             "'test%sname'" % u)
        for u in unsafe:
            self.assertEqual(shlex.quote("test%s'name'" % u),
                             "'test%s'\"'\"'name'\"'\"''" % u)

    def testJoin(self):
        for split_command, command in [
            (['a ', 'b'], "'a ' b"),
            (['a', ' b'], "a ' b'"),
            (['a', ' ', 'b'], "a ' ' b"),
            (['"a', 'b"'], '\'"a\' \'b"\''),
        ]:
            with self.subTest(command=command):
                joined = shlex.join(split_command)
                self.assertEqual(joined, command)

    def testJoinRoundtrip(self):
        all_data = self.data + self.posix_data
        for command, *split_command in all_data:
            with self.subTest(command=command):
                joined = shlex.join(split_command)
                resplit = shlex.split(joined)
                self.assertEqual(split_command, resplit)

    def testPunctuationCharsReadOnly(self):
        punctuation_chars = "/|$%^"
        shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars)
        self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars)
        with self.assertRaises(AttributeError):
            shlex_instance.punctuation_chars = False


# Allow this test to be used with old shlex.py
if not getattr(shlex, "split", None):
    for methname in dir(ShlexTest):
        if methname.startswith("test") and methname != "testCompat":
            delattr(ShlexTest, methname)

if __name__ == "__main__":
    unittest.main()
Add shlex.quote function, to escape filenames and command lines (#9723). This function used to live as pipes.quote, where it was undocumented but used anyway. (An alias still exists for backward compatibility.) The tests have been moved as is, but the code of the function was changed to use a regex instead of a loop with string comparisons (at Ian Bicking’s suggestion). I’m terrible at regexes, so any feedback is welcome. 2011-07-27 13:29:31 -03:00			`import io`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`import itertools`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`import shlex`
Add shlex.quote function, to escape filenames and command lines (#9723). This function used to live as pipes.quote, where it was undocumented but used anyway. (An alias still exists for backward compatibility.) The tests have been moved as is, but the code of the function was changed to use a regex instead of a loop with string comparisons (at Ian Bicking’s suggestion). I’m terrible at regexes, so any feedback is welcome. 2011-07-27 13:29:31 -03:00			`import string`
			`import unittest`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00
Get test to work under regrtest when running whole suite 2003-04-17 20:04:22 -03:00
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00
			`# The original test data set was from shellwords, by Hartmut Goebel.`

			`data = r"""x\|x\|`
			`foo bar\|foo\|bar\|`
			`foo bar\|foo\|bar\|`
			`foo bar \|foo\|bar\|`
			`foo bar bla fasel\|foo\|bar\|bla\|fasel\|`
			`x y z xxxx\|x\|y\|z\|xxxx\|`
			`\x bar\|\\|x\|bar\|`
			`\ x bar\|\\|x\|bar\|`
			`\ bar\|\\|bar\|`
			`foo \x bar\|foo\|\\|x\|bar\|`
			`foo \ x bar\|foo\|\\|x\|bar\|`
			`foo \ bar\|foo\|\\|bar\|`
			`foo "bar" bla\|foo\|"bar"\|bla\|`
			`"foo" "bar" "bla"\|"foo"\|"bar"\|"bla"\|`
			`"foo" bar "bla"\|"foo"\|bar\|"bla"\|`
			`"foo" bar bla\|"foo"\|bar\|bla\|`
			`foo 'bar' bla\|foo\|'bar'\|bla\|`
			`'foo' 'bar' 'bla'\|'foo'\|'bar'\|'bla'\|`
			`'foo' bar 'bla'\|'foo'\|bar\|'bla'\|`
			`'foo' bar bla\|'foo'\|bar\|bla\|`
			`blurb foo"bar"bar"fasel" baz\|blurb\|foo"bar"bar"fasel"\|baz\|`
			`blurb foo'bar'bar'fasel' baz\|blurb\|foo'bar'bar'fasel'\|baz\|`
			`""\|""\|`
			`''\|''\|`
			`foo "" bar\|foo\|""\|bar\|`
			`foo '' bar\|foo\|''\|bar\|`
			`foo "" "" "" bar\|foo\|""\|""\|""\|bar\|`
			`foo '' '' '' bar\|foo\|''\|''\|''\|bar\|`
			`\""\|\\|""\|`
			`"\"\|"\"\|`
			`"foo\ bar"\|"foo\ bar"\|`
			`"foo\\ bar"\|"foo\\ bar"\|`
			`"foo\\ bar\"\|"foo\\ bar\"\|`
			`"foo\\" bar\""\|"foo\\"\|bar\|\\|""\|`
			`"foo\\ bar\" dfadf"\|"foo\\ bar\"\|dfadf"\|`
			`"foo\\\ bar\" dfadf"\|"foo\\\ bar\"\|dfadf"\|`
			`"foo\\\x bar\" dfadf"\|"foo\\\x bar\"\|dfadf"\|`
			`"foo\x bar\" dfadf"\|"foo\x bar\"\|dfadf"\|`
			`\''\|\\|''\|`
			`'foo\ bar'\|'foo\ bar'\|`
			`'foo\\ bar'\|'foo\\ bar'\|`
			`"foo\\\x bar\" df'a\ 'df'\|"foo\\\x bar\"\|df'a\|\\|'df'\|`
			`\"foo"\|\\|"foo"\|`
			`\"foo"\x\|\\|"foo"\|\\|x\|`
			`"foo\x"\|"foo\x"\|`
			`"foo\ "\|"foo\ "\|`
			`foo\ xx\|foo\|\\|xx\|`
			`foo\ x\x\|foo\|\\|x\|\\|x\|`
			`foo\ x\x\""\|foo\|\\|x\|\\|x\|\\|""\|`
			`"foo\ x\x"\|"foo\ x\x"\|`
			`"foo\ x\x\\"\|"foo\ x\x\\"\|`
			`"foo\ x\x\\""foobar"\|"foo\ x\x\\"\|"foobar"\|`
			`"foo\ x\x\\"\''"foobar"\|"foo\ x\x\\"\|\\|''\|"foobar"\|`
			`"foo\ x\x\\"\'"fo'obar"\|"foo\ x\x\\"\|\\|'"fo'\|obar"\|`
			`"foo\ x\x\\"\'"fo'obar" 'don'\''t'\|"foo\ x\x\\"\|\\|'"fo'\|obar"\|'don'\|\\|''\|t'\|`
			`'foo\ bar'\|'foo\ bar'\|`
			`'foo\\ bar'\|'foo\\ bar'\|`
			`foo\ bar\|foo\|\\|bar\|`
			`foo#bar\nbaz\|foobaz\|`
			`:-) ;-)\|:\|-\|)\|;\|-\|)\|`
Recode modules from latin-1 to utf-8 2010-10-27 15:52:48 -03:00			`áéíóú\|á\|é\|í\|ó\|ú\|`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`"""`

			`posix_data = r"""x\|x\|`
			`foo bar\|foo\|bar\|`
			`foo bar\|foo\|bar\|`
			`foo bar \|foo\|bar\|`
			`foo bar bla fasel\|foo\|bar\|bla\|fasel\|`
			`x y z xxxx\|x\|y\|z\|xxxx\|`
			`\x bar\|x\|bar\|`
			`\ x bar\| x\|bar\|`
			`\ bar\| bar\|`
			`foo \x bar\|foo\|x\|bar\|`
			`foo \ x bar\|foo\| x\|bar\|`
			`foo \ bar\|foo\| bar\|`
			`foo "bar" bla\|foo\|bar\|bla\|`
			`"foo" "bar" "bla"\|foo\|bar\|bla\|`
			`"foo" bar "bla"\|foo\|bar\|bla\|`
			`"foo" bar bla\|foo\|bar\|bla\|`
			`foo 'bar' bla\|foo\|bar\|bla\|`
			`'foo' 'bar' 'bla'\|foo\|bar\|bla\|`
			`'foo' bar 'bla'\|foo\|bar\|bla\|`
			`'foo' bar bla\|foo\|bar\|bla\|`
			`blurb foo"bar"bar"fasel" baz\|blurb\|foobarbarfasel\|baz\|`
			`blurb foo'bar'bar'fasel' baz\|blurb\|foobarbarfasel\|baz\|`
			`""\|\|`
			`''\|\|`
			`foo "" bar\|foo\|\|bar\|`
			`foo '' bar\|foo\|\|bar\|`
			`foo "" "" "" bar\|foo\|\|\|\|bar\|`
			`foo '' '' '' bar\|foo\|\|\|\|bar\|`
			`\"\|"\|`
			`"\""\|"\|`
			`"foo\ bar"\|foo\ bar\|`
			`"foo\\ bar"\|foo\ bar\|`
			`"foo\\ bar\""\|foo\ bar"\|`
			`"foo\\" bar\"\|foo\\|bar"\|`
			`"foo\\ bar\" dfadf"\|foo\ bar" dfadf\|`
			`"foo\\\ bar\" dfadf"\|foo\\ bar" dfadf\|`
			`"foo\\\x bar\" dfadf"\|foo\\x bar" dfadf\|`
			`"foo\x bar\" dfadf"\|foo\x bar" dfadf\|`
			`\'\|'\|`
			`'foo\ bar'\|foo\ bar\|`
			`'foo\\ bar'\|foo\\ bar\|`
			`"foo\\\x bar\" df'a\ 'df"\|foo\\x bar" df'a\ 'df\|`
			`\"foo\|"foo\|`
			`\"foo\x\|"foox\|`
			`"foo\x"\|foo\x\|`
			`"foo\ "\|foo\ \|`
			`foo\ xx\|foo xx\|`
			`foo\ x\x\|foo xx\|`
			`foo\ x\x\"\|foo xx"\|`
			`"foo\ x\x"\|foo\ x\x\|`
			`"foo\ x\x\\"\|foo\ x\x\\|`
			`"foo\ x\x\\""foobar"\|foo\ x\x\foobar\|`
			`"foo\ x\x\\"\'"foobar"\|foo\ x\x\'foobar\|`
			`"foo\ x\x\\"\'"fo'obar"\|foo\ x\x\'fo'obar\|`
			`"foo\ x\x\\"\'"fo'obar" 'don'\''t'\|foo\ x\x\'fo'obar\|don't\|`
			`"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\\|foo\ x\x\'fo'obar\|don't\|\\|`
			`'foo\ bar'\|foo\ bar\|`
			`'foo\\ bar'\|foo\\ bar\|`
			`foo\ bar\|foo bar\|`
			`foo#bar\nbaz\|foo\|baz\|`
			`:-) ;-)\|:-)\|;-)\|`
Recode modules from latin-1 to utf-8 2010-10-27 15:52:48 -03:00			`áéíóú\|áéíóú\|`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`"""`

			`class ShlexTest(unittest.TestCase):`
			`def setUp(self):`
			`self.data = [x.split("\|")[:-1]`
			`for x in data.splitlines()]`
			`self.posix_data = [x.split("\|")[:-1]`
			`for x in posix_data.splitlines()]`
			`for item in self.data:`
			`item[0] = item[0].replace(r"\n", "\n")`
			`for item in self.posix_data:`
			`item[0] = item[0].replace(r"\n", "\n")`

- Changed shlex.split() method to have more useful and meaningful parameters. 2003-04-19 22:57:03 -03:00			`def splitTest(self, data, comments):`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`for i in range(len(data)):`
- Changed shlex.split() method to have more useful and meaningful parameters. 2003-04-19 22:57:03 -03:00			`l = shlex.split(data[i][0], comments=comments)`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`self.assertEqual(l, data[i][1:],`
			`"%s: %s != %s" %`
			`(data[i][0], l, data[i][1:]))`

			`def oldSplit(self, s):`
			`ret = []`
Fix test_shlex: Use io.StringIO. 2007-06-12 14:43:43 -03:00			`lex = shlex.shlex(io.StringIO(s))`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`tok = lex.get_token()`
			`while tok:`
			`ret.append(tok)`
			`tok = lex.get_token()`
			`return ret`
Whitespace normalization. 2003-04-24 13:02:54 -03:00
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`def testSplitPosix(self):`
			`"""Test data splitting with posix parser"""`
Whitespace normalization. 2003-04-24 13:02:54 -03:00			`self.splitTest(self.posix_data, comments=True)`
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00
			`def testCompat(self):`
			`"""Test compatibility interface"""`
			`for i in range(len(self.data)):`
			`l = self.oldSplit(self.data[i][0])`
			`self.assertEqual(l, self.data[i][1:],`
			`"%s: %s != %s" %`
			`(self.data[i][0], l, self.data[i][1:]))`

Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00			`def testSyntaxSplitAmpersandAndPipe(self):`
			`"""Test handling of syntax splitting of &, \|"""`
			`# Could take these forms: &&, &, \|&, ;&, ;;&`
			`# of course, the same applies to \| and \|\|`
			`# these should all parse to the same output`
			`for delimiter in ('&&', '&', '\|&', ';&', ';;&',`
			`'\|\|', '\|', '&\|', ';\|', ';;\|'):`
			`src = ['echo hi %s echo bye' % delimiter,`
			`'echo hi%secho bye' % delimiter]`
			`ref = ['echo', 'hi', delimiter, 'echo', 'bye']`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`for ss, ws in itertools.product(src, (False, True)):`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00			`s = shlex.shlex(ss, punctuation_chars=True)`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`s.whitespace_split = ws`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00			`result = list(s)`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`self.assertEqual(ref, result,`
			`"While splitting '%s' [ws=%s]" % (ss, ws))`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00
			`def testSyntaxSplitSemicolon(self):`
			`"""Test handling of syntax splitting of ;"""`
			`# Could take these forms: ;, ;;, ;&, ;;&`
			`# these should all parse to the same output`
			`for delimiter in (';', ';;', ';&', ';;&'):`
			`src = ['echo hi %s echo bye' % delimiter,`
			`'echo hi%s echo bye' % delimiter,`
			`'echo hi%secho bye' % delimiter]`
			`ref = ['echo', 'hi', delimiter, 'echo', 'bye']`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`for ss, ws in itertools.product(src, (False, True)):`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00			`s = shlex.shlex(ss, punctuation_chars=True)`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`s.whitespace_split = ws`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00			`result = list(s)`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`self.assertEqual(ref, result,`
			`"While splitting '%s' [ws=%s]" % (ss, ws))`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00
			`def testSyntaxSplitRedirect(self):`
			`"""Test handling of syntax splitting of >"""`
			`# of course, the same applies to <, \|`
			`# these should all parse to the same output`
			`for delimiter in ('<', '\|'):`
			`src = ['echo hi %s out' % delimiter,`
			`'echo hi%s out' % delimiter,`
			`'echo hi%sout' % delimiter]`
			`ref = ['echo', 'hi', delimiter, 'out']`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`for ss, ws in itertools.product(src, (False, True)):`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00			`s = shlex.shlex(ss, punctuation_chars=True)`
			`result = list(s)`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`self.assertEqual(ref, result,`
			`"While splitting '%s' [ws=%s]" % (ss, ws))`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00
			`def testSyntaxSplitParen(self):`
			`"""Test handling of syntax splitting of ()"""`
			`# these should all parse to the same output`
			`src = ['( echo hi )',`
			`'(echo hi)']`
			`ref = ['(', 'echo', 'hi', ')']`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`for ss, ws in itertools.product(src, (False, True)):`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00			`s = shlex.shlex(ss, punctuation_chars=True)`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`s.whitespace_split = ws`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00			`result = list(s)`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`self.assertEqual(ref, result,`
			`"While splitting '%s' [ws=%s]" % (ss, ws))`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00
			`def testSyntaxSplitCustom(self):`
			`"""Test handling of syntax splitting with custom chars"""`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`ss = "~/a&&b-c --color=auto\|\|d *.py?"`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00			`ref = ['~/a', '&', '&', 'b-c', '--color=auto', '\|\|', 'd', '*.py?']`
			`s = shlex.shlex(ss, punctuation_chars="\|")`
			`result = list(s)`
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss)`
			`ref = ['~/a&&b-c', '--color=auto', '\|\|', 'd', '*.py?']`
			`s = shlex.shlex(ss, punctuation_chars="\|")`
			`s.whitespace_split = True`
			`result = list(s)`
			`self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss)`
Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00
			`def testTokenTypes(self):`
			`"""Test that tokens are split with types as expected."""`
			`for source, expected in (`
			`('a && b \|\| c',`
			`[('a', 'a'), ('&&', 'c'), ('b', 'a'),`
			`('\|\|', 'c'), ('c', 'a')]),`
			`):`
			`s = shlex.shlex(source, punctuation_chars=True)`
			`observed = []`
			`while True:`
			`t = s.get_token()`
			`if t == s.eof:`
			`break`
			`if t[0] in s.punctuation_chars:`
			`tt = 'c'`
			`else:`
			`tt = 'a'`
			`observed.append((t, tt))`
			`self.assertEqual(observed, expected)`

			`def testPunctuationInWordChars(self):`
			`"""Test that any punctuation chars are removed from wordchars"""`
			`s = shlex.shlex('a_b__c', punctuation_chars='_')`
			`self.assertNotIn('_', s.wordchars)`
			`self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])`

			`def testPunctuationWithWhitespaceSplit(self):`
			`"""Test that with whitespace_split, behaviour is as expected"""`
			`s = shlex.shlex('a && b \|\| c', punctuation_chars='&')`
			`# whitespace_split is False, so splitting will be based on`
			`# punctuation_chars`
			`self.assertEqual(list(s), ['a', '&&', 'b', '\|', '\|', 'c'])`
			`s = shlex.shlex('a && b \|\| c', punctuation_chars='&')`
			`s.whitespace_split = True`
			`# whitespace_split is True, so splitting will be based on`
			`# white space`
			`self.assertEqual(list(s), ['a', '&&', 'b', '\|\|', 'c'])`

Fixed #29132: Updated shlex to work better with punctuation chars in POSIX mode. Thanks to Evan_ for the report and patch. 2017-01-15 06:06:52 -04:00			`def testPunctuationWithPosix(self):`
			`"""Test that punctuation_chars and posix behave correctly together."""`
			`# see Issue #29132`
			`s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)`
			`self.assertEqual(list(s), ['f', '>', 'abc'])`
			`s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)`
			`self.assertEqual(list(s), ['f', '>', '"abc"'])`

Closes #1521950: Made shlex parsing more shell-like. 2016-07-29 18:35:03 -03:00			`def testEmptyStringHandling(self):`
			`"""Test that parsing of empty strings is correctly handled."""`
			`# see Issue #21999`
			`expected = ['', ')', 'abc']`
			`for punct in (False, True):`
			`s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)`
			`slist = list(s)`
			`self.assertEqual(slist, expected)`
			`expected = ["''", ')', 'abc']`
			`s = shlex.shlex("'')abc", punctuation_chars=True)`
			`self.assertEqual(list(s), expected)`

bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071) 2019-06-01 16:09:22 -03:00			`def testUnicodeHandling(self):`
			`"""Test punctuation_chars and whitespace_split handle unicode."""`
			`ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24"`
			`# Should be parsed as one complete token (whitespace_split=True).`
			`ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24']`
			`s = shlex.shlex(ss, punctuation_chars=True)`
			`s.whitespace_split = True`
			`self.assertEqual(list(s), ref)`
			`# Without whitespace_split, uses wordchars and splits on all.`
			`ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24']`
			`s = shlex.shlex(ss, punctuation_chars=True)`
			`self.assertEqual(list(s), ref)`

Add shlex.quote function, to escape filenames and command lines (#9723). This function used to live as pipes.quote, where it was undocumented but used anyway. (An alias still exists for backward compatibility.) The tests have been moved as is, but the code of the function was changed to use a regex instead of a loop with string comparisons (at Ian Bicking’s suggestion). I’m terrible at regexes, so any feedback is welcome. 2011-07-27 13:29:31 -03:00			`def testQuote(self):`
			`safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'`
Avoid unwanted behavior change in shlex.quote (see #9723). I simplified the quote code to use a regex instead of a loop+test when I moved pipes.quote to shlex in 5966eeb0457d; Ezio Melotti pointed out that my regex contained redundant parts (now removed) and allowed non-ASCII characters (now disallowed). I think common UNIX shells don’t quote non-ASCII characters, but there’s no harm in doing so. We’ll see if users request a change. 2011-08-09 18:18:06 -03:00			`unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s`
			unsafe = '"`$\\!' + unicode_sample
Add shlex.quote function, to escape filenames and command lines (#9723). This function used to live as pipes.quote, where it was undocumented but used anyway. (An alias still exists for backward compatibility.) The tests have been moved as is, but the code of the function was changed to use a regex instead of a loop with string comparisons (at Ian Bicking’s suggestion). I’m terrible at regexes, so any feedback is welcome. 2011-07-27 13:29:31 -03:00
			`self.assertEqual(shlex.quote(''), "''")`
			`self.assertEqual(shlex.quote(safeunquoted), safeunquoted)`
			`self.assertEqual(shlex.quote('test file name'), "'test file name'")`
			`for u in unsafe:`
			`self.assertEqual(shlex.quote('test%sname' % u),`
			`"'test%sname'" % u)`
			`for u in unsafe:`
			`self.assertEqual(shlex.quote("test%s'name'" % u),`
			`"'test%s'\"'\"'name'\"'\"''" % u)`

bpo-22454: Add shlex.join() (the opposite of shlex.split()) (GH-7605) 2019-05-29 05:06:12 -03:00			`def testJoin(self):`
			`for split_command, command in [`
			`(['a ', 'b'], "'a ' b"),`
			`(['a', ' b'], "a ' b'"),`
			`(['a', ' ', 'b'], "a ' ' b"),`
			`(['"a', 'b"'], '\'"a\' \'b"\''),`
			`]:`
			`with self.subTest(command=command):`
			`joined = shlex.join(split_command)`
			`self.assertEqual(joined, command)`

			`def testJoinRoundtrip(self):`
			`all_data = self.data + self.posix_data`
			`for command, *split_command in all_data:`
			`with self.subTest(command=command):`
			`joined = shlex.join(split_command)`
			`resplit = shlex.split(joined)`
			`self.assertEqual(split_command, resplit)`

[3.8] bpo-35168: Make shlex.punctuation_chars read-only (GH-11631) (GH-15927) (cherry picked from commit 972cf5c06a5ba16ad243a442dbb9c15307fbed95) Co-authored-by: Alex <a.v.shkop@gmail.com> 2019-09-11 09:39:52 -03:00			`def testPunctuationCharsReadOnly(self):`
			`punctuation_chars = "/\|$%^"`
			`shlex_instance = shlex.shlex(punctuation_chars=punctuation_chars)`
			`self.assertEqual(shlex_instance.punctuation_chars, punctuation_chars)`
			`with self.assertRaises(AttributeError):`
			`shlex_instance.punctuation_chars = False`

bpo-22454: Add shlex.join() (the opposite of shlex.split()) (GH-7605) 2019-05-29 05:06:12 -03:00
Implemented posix-mode parsing support in shlex.py, as dicussed in mailing list, and in patch #722686. 2003-04-17 18:31:33 -03:00			`# Allow this test to be used with old shlex.py`
			`if not getattr(shlex, "split", None):`
			`for methname in dir(ShlexTest):`
			`if methname.startswith("test") and methname != "testCompat":`
			`delattr(ShlexTest, methname)`

			`if __name__ == "__main__":`
Issue #21741: Update 147 test modules to use test discovery. I have compared output between pre- and post-patch runs of these tests to make sure there's nothing missing and nothing broken, on both Windows and Linux. The only differences I found were actually tests that were previously not run. 2015-04-13 17:00:43 -03:00			`unittest.main()`