From 1ff08b1243dcb07db975640b2f3cbc82985bee81 Mon Sep 17 00:00:00 2001 From: Ka-Ping Yee Date: Mon, 15 Jan 2001 22:04:30 +0000 Subject: [PATCH] Add tokenizer support and tests for u'', U"", uR'', Ur"", etc. --- Lib/test/output/test_tokenize | 786 ++++++++++++++++++---------------- Lib/test/tokenize_tests.py | 14 + Lib/tokenize.py | 34 +- 3 files changed, 460 insertions(+), 374 deletions(-) diff --git a/Lib/test/output/test_tokenize b/Lib/test/output/test_tokenize index 52bf19093bd..ba7ea6eb071 100644 --- a/Lib/test/output/test_tokenize +++ b/Lib/test/output/test_tokenize @@ -1,26 +1,26 @@ test_tokenize -1,0-1,35: COMMENT "# Tests for the 'tokenize' module.\012" -2,0-2,43: COMMENT '# Large bits stolen from test_grammar.py. \012' -3,0-3,1: NL '\012' -4,0-4,11: COMMENT '# Comments\012' +1,0-1,35: COMMENT "# Tests for the 'tokenize' module.\n" +2,0-2,43: COMMENT '# Large bits stolen from test_grammar.py. \n' +3,0-3,1: NL '\n' +4,0-4,11: COMMENT '# Comments\n' 5,0-5,3: STRING '"#"' -5,3-5,4: NEWLINE '\012' -6,0-6,3: COMMENT "#'\012" -7,0-7,3: COMMENT '#"\012' -8,0-8,3: COMMENT '#\\\012' -9,7-9,9: COMMENT '#\012' -10,4-10,10: COMMENT '# abc\012' -11,0-12,4: STRING "'''#\012#'''" -12,4-12,5: NEWLINE '\012' -13,0-13,1: NL '\012' +5,3-5,4: NEWLINE '\n' +6,0-6,3: COMMENT "#'\n" +7,0-7,3: COMMENT '#"\n' +8,0-8,3: COMMENT '#\\\n' +9,7-9,9: COMMENT '#\n' +10,4-10,10: COMMENT '# abc\n' +11,0-12,4: STRING "'''#\n#'''" +12,4-12,5: NEWLINE '\n' +13,0-13,1: NL '\n' 14,0-14,1: NAME 'x' 14,2-14,3: OP '=' 14,4-14,5: NUMBER '1' 14,7-14,8: COMMENT '#' -14,8-14,9: NEWLINE '\012' -15,0-15,1: NL '\012' -16,0-16,25: COMMENT '# Balancing continuation\012' -17,0-17,1: NL '\012' +14,8-14,9: NEWLINE '\n' +15,0-15,1: NL '\n' +16,0-16,25: COMMENT '# Balancing continuation\n' +17,0-17,1: NL '\n' 18,0-18,1: NAME 'a' 18,2-18,3: OP '=' 18,4-18,5: OP '(' @@ -28,12 +28,12 @@ test_tokenize 18,6-18,7: OP ',' 18,8-18,9: NUMBER '4' 18,9-18,10: OP ',' -18,10-18,11: NL '\012' +18,10-18,11: NL '\n' 19,2-19,3: NUMBER '5' 19,3-19,4: OP ',' 19,5-19,6: NUMBER '6' 19,6-19,7: OP ')' -19,7-19,8: NEWLINE '\012' +19,7-19,8: NEWLINE '\n' 20,0-20,1: NAME 'y' 20,2-20,3: OP '=' 20,4-20,5: OP '[' @@ -41,10 +41,10 @@ test_tokenize 20,6-20,7: OP ',' 20,8-20,9: NUMBER '4' 20,9-20,10: OP ',' -20,10-20,11: NL '\012' +20,10-20,11: NL '\n' 21,2-21,3: NUMBER '5' 21,3-21,4: OP ']' -21,4-21,5: NEWLINE '\012' +21,4-21,5: NEWLINE '\n' 22,0-22,1: NAME 'z' 22,2-22,3: OP '=' 22,4-22,5: OP '{' @@ -52,12 +52,12 @@ test_tokenize 22,8-22,9: OP ':' 22,9-22,10: NUMBER '5' 22,10-22,11: OP ',' -22,11-22,12: NL '\012' +22,11-22,12: NL '\n' 23,2-23,5: STRING "'b'" 23,5-23,6: OP ':' 23,6-23,7: NUMBER '6' 23,7-23,8: OP '}' -23,8-23,9: NEWLINE '\012' +23,8-23,9: NEWLINE '\n' 24,0-24,1: NAME 'x' 24,2-24,3: OP '=' 24,4-24,5: OP '(' @@ -74,151 +74,151 @@ test_tokenize 24,20-24,21: OP '-' 24,22-24,23: NAME 'a' 24,23-24,24: OP '[' -24,24-24,25: NL '\012' +24,24-24,25: NL '\n' 25,3-25,4: NUMBER '3' 25,5-25,6: OP ']' -25,6-25,7: NL '\012' +25,6-25,7: NL '\n' 26,3-26,4: OP '-' 26,5-26,6: NAME 'x' 26,7-26,8: OP '+' 26,9-26,12: NAME 'len' 26,12-26,13: OP '(' 26,13-26,14: OP '{' -26,14-26,15: NL '\012' +26,14-26,15: NL '\n' 27,3-27,4: OP '}' -27,4-27,5: NL '\012' +27,4-27,5: NL '\n' 28,4-28,5: OP ')' -28,5-28,6: NL '\012' +28,5-28,6: NL '\n' 29,2-29,3: OP ')' -29,3-29,4: NEWLINE '\012' -30,0-30,1: NL '\012' -31,0-31,37: COMMENT '# Backslash means line continuation:\012' +29,3-29,4: NEWLINE '\n' +30,0-30,1: NL '\n' +31,0-31,37: COMMENT '# Backslash means line continuation:\n' 32,0-32,1: NAME 'x' 32,2-32,3: OP '=' 32,4-32,5: NUMBER '1' 33,0-33,1: OP '+' 33,2-33,3: NUMBER '1' -33,3-33,4: NEWLINE '\012' -34,0-34,1: NL '\012' -35,0-35,55: COMMENT '# Backslash does not means continuation in comments :\\\012' +33,3-33,4: NEWLINE '\n' +34,0-34,1: NL '\n' +35,0-35,55: COMMENT '# Backslash does not means continuation in comments :\\\n' 36,0-36,1: NAME 'x' 36,2-36,3: OP '=' 36,4-36,5: NUMBER '0' -36,5-36,6: NEWLINE '\012' -37,0-37,1: NL '\012' -38,0-38,20: COMMENT '# Ordinary integers\012' +36,5-36,6: NEWLINE '\n' +37,0-37,1: NL '\n' +38,0-38,20: COMMENT '# Ordinary integers\n' 39,0-39,4: NUMBER '0xff' 39,5-39,7: OP '<>' 39,8-39,11: NUMBER '255' -39,11-39,12: NEWLINE '\012' +39,11-39,12: NEWLINE '\n' 40,0-40,4: NUMBER '0377' 40,5-40,7: OP '<>' 40,8-40,11: NUMBER '255' -40,11-40,12: NEWLINE '\012' +40,11-40,12: NEWLINE '\n' 41,0-41,10: NUMBER '2147483647' 41,13-41,15: OP '!=' 41,16-41,28: NUMBER '017777777777' -41,28-41,29: NEWLINE '\012' +41,28-41,29: NEWLINE '\n' 42,0-42,1: OP '-' 42,1-42,11: NUMBER '2147483647' 42,11-42,12: OP '-' 42,12-42,13: NUMBER '1' 42,14-42,16: OP '!=' 42,17-42,29: NUMBER '020000000000' -42,29-42,30: NEWLINE '\012' +42,29-42,30: NEWLINE '\n' 43,0-43,12: NUMBER '037777777777' 43,13-43,15: OP '!=' 43,16-43,17: OP '-' 43,17-43,18: NUMBER '1' -43,18-43,19: NEWLINE '\012' +43,18-43,19: NEWLINE '\n' 44,0-44,10: NUMBER '0xffffffff' 44,11-44,13: OP '!=' 44,14-44,15: OP '-' 44,15-44,16: NUMBER '1' -44,16-44,17: NEWLINE '\012' -45,0-45,1: NL '\012' -46,0-46,16: COMMENT '# Long integers\012' +44,16-44,17: NEWLINE '\n' +45,0-45,1: NL '\n' +46,0-46,16: COMMENT '# Long integers\n' 47,0-47,1: NAME 'x' 47,2-47,3: OP '=' 47,4-47,6: NUMBER '0L' -47,6-47,7: NEWLINE '\012' +47,6-47,7: NEWLINE '\n' 48,0-48,1: NAME 'x' 48,2-48,3: OP '=' 48,4-48,6: NUMBER '0l' -48,6-48,7: NEWLINE '\012' +48,6-48,7: NEWLINE '\n' 49,0-49,1: NAME 'x' 49,2-49,3: OP '=' 49,4-49,23: NUMBER '0xffffffffffffffffL' -49,23-49,24: NEWLINE '\012' +49,23-49,24: NEWLINE '\n' 50,0-50,1: NAME 'x' 50,2-50,3: OP '=' 50,4-50,23: NUMBER '0xffffffffffffffffl' -50,23-50,24: NEWLINE '\012' +50,23-50,24: NEWLINE '\n' 51,0-51,1: NAME 'x' 51,2-51,3: OP '=' 51,4-51,23: NUMBER '077777777777777777L' -51,23-51,24: NEWLINE '\012' +51,23-51,24: NEWLINE '\n' 52,0-52,1: NAME 'x' 52,2-52,3: OP '=' 52,4-52,23: NUMBER '077777777777777777l' -52,23-52,24: NEWLINE '\012' +52,23-52,24: NEWLINE '\n' 53,0-53,1: NAME 'x' 53,2-53,3: OP '=' 53,4-53,35: NUMBER '123456789012345678901234567890L' -53,35-53,36: NEWLINE '\012' +53,35-53,36: NEWLINE '\n' 54,0-54,1: NAME 'x' 54,2-54,3: OP '=' 54,4-54,35: NUMBER '123456789012345678901234567890l' -54,35-54,36: NEWLINE '\012' -55,0-55,1: NL '\012' -56,0-56,25: COMMENT '# Floating-point numbers\012' +54,35-54,36: NEWLINE '\n' +55,0-55,1: NL '\n' +56,0-56,25: COMMENT '# Floating-point numbers\n' 57,0-57,1: NAME 'x' 57,2-57,3: OP '=' 57,4-57,8: NUMBER '3.14' -57,8-57,9: NEWLINE '\012' +57,8-57,9: NEWLINE '\n' 58,0-58,1: NAME 'x' 58,2-58,3: OP '=' 58,4-58,8: NUMBER '314.' -58,8-58,9: NEWLINE '\012' +58,8-58,9: NEWLINE '\n' 59,0-59,1: NAME 'x' 59,2-59,3: OP '=' 59,4-59,9: NUMBER '0.314' -59,9-59,10: NEWLINE '\012' -60,0-60,18: COMMENT '# XXX x = 000.314\012' +59,9-59,10: NEWLINE '\n' +60,0-60,18: COMMENT '# XXX x = 000.314\n' 61,0-61,1: NAME 'x' 61,2-61,3: OP '=' 61,4-61,8: NUMBER '.314' -61,8-61,9: NEWLINE '\012' +61,8-61,9: NEWLINE '\n' 62,0-62,1: NAME 'x' 62,2-62,3: OP '=' 62,4-62,8: NUMBER '3e14' -62,8-62,9: NEWLINE '\012' +62,8-62,9: NEWLINE '\n' 63,0-63,1: NAME 'x' 63,2-63,3: OP '=' 63,4-63,8: NUMBER '3E14' -63,8-63,9: NEWLINE '\012' +63,8-63,9: NEWLINE '\n' 64,0-64,1: NAME 'x' 64,2-64,3: OP '=' 64,4-64,9: NUMBER '3e-14' -64,9-64,10: NEWLINE '\012' +64,9-64,10: NEWLINE '\n' 65,0-65,1: NAME 'x' 65,2-65,3: OP '=' 65,4-65,9: NUMBER '3e+14' -65,9-65,10: NEWLINE '\012' +65,9-65,10: NEWLINE '\n' 66,0-66,1: NAME 'x' 66,2-66,3: OP '=' 66,4-66,9: NUMBER '3.e14' -66,9-66,10: NEWLINE '\012' +66,9-66,10: NEWLINE '\n' 67,0-67,1: NAME 'x' 67,2-67,3: OP '=' 67,4-67,9: NUMBER '.3e14' -67,9-67,10: NEWLINE '\012' +67,9-67,10: NEWLINE '\n' 68,0-68,1: NAME 'x' 68,2-68,3: OP '=' 68,4-68,9: NUMBER '3.1e4' -68,9-68,10: NEWLINE '\012' -69,0-69,1: NL '\012' -70,0-70,18: COMMENT '# String literals\012' +68,9-68,10: NEWLINE '\n' +69,0-69,1: NL '\n' +70,0-70,18: COMMENT '# String literals\n' 71,0-71,1: NAME 'x' 71,2-71,3: OP '=' 71,4-71,6: STRING "''" @@ -227,7 +227,7 @@ test_tokenize 71,10-71,11: OP '=' 71,12-71,14: STRING '""' 71,14-71,15: OP ';' -71,15-71,16: NEWLINE '\012' +71,15-71,16: NEWLINE '\n' 72,0-72,1: NAME 'x' 72,2-72,3: OP '=' 72,4-72,8: STRING "'\\''" @@ -236,7 +236,7 @@ test_tokenize 72,12-72,13: OP '=' 72,14-72,17: STRING '"\'"' 72,17-72,18: OP ';' -72,18-72,19: NEWLINE '\012' +72,18-72,19: NEWLINE '\n' 73,0-73,1: NAME 'x' 73,2-73,3: OP '=' 73,4-73,7: STRING '\'"\'' @@ -245,348 +245,404 @@ test_tokenize 73,11-73,12: OP '=' 73,13-73,17: STRING '"\\""' 73,17-73,18: OP ';' -73,18-73,19: NEWLINE '\012' +73,18-73,19: NEWLINE '\n' 74,0-74,1: NAME 'x' 74,2-74,3: OP '=' 74,4-74,32: STRING '"doesn\'t \\"shrink\\" does it"' -74,32-74,33: NEWLINE '\012' +74,32-74,33: NEWLINE '\n' 75,0-75,1: NAME 'y' 75,2-75,3: OP '=' 75,4-75,31: STRING '\'doesn\\\'t "shrink" does it\'' -75,31-75,32: NEWLINE '\012' +75,31-75,32: NEWLINE '\n' 76,0-76,1: NAME 'x' 76,2-76,3: OP '=' 76,4-76,32: STRING '"does \\"shrink\\" doesn\'t it"' -76,32-76,33: NEWLINE '\012' +76,32-76,33: NEWLINE '\n' 77,0-77,1: NAME 'y' 77,2-77,3: OP '=' 77,4-77,31: STRING '\'does "shrink" doesn\\\'t it\'' -77,31-77,32: NEWLINE '\012' +77,31-77,32: NEWLINE '\n' 78,0-78,1: NAME 'x' 78,2-78,3: OP '=' -78,4-83,3: STRING '"""\012The "quick"\012brown fox\012jumps over\012the \'lazy\' dog.\012"""' -83,3-83,4: NEWLINE '\012' +78,4-83,3: STRING '"""\nThe "quick"\nbrown fox\njumps over\nthe \'lazy\' dog.\n"""' +83,3-83,4: NEWLINE '\n' 84,0-84,1: NAME 'y' 84,2-84,3: OP '=' 84,4-84,63: STRING '\'\\nThe "quick"\\nbrown fox\\njumps over\\nthe \\\'lazy\\\' dog.\\n\'' -84,63-84,64: NEWLINE '\012' +84,63-84,64: NEWLINE '\n' 85,0-85,1: NAME 'y' 85,2-85,3: OP '=' -85,4-90,3: STRING '\'\'\'\012The "quick"\012brown fox\012jumps over\012the \'lazy\' dog.\012\'\'\'' +85,4-90,3: STRING '\'\'\'\nThe "quick"\nbrown fox\njumps over\nthe \'lazy\' dog.\n\'\'\'' 90,3-90,4: OP ';' -90,4-90,5: NEWLINE '\012' +90,4-90,5: NEWLINE '\n' 91,0-91,1: NAME 'y' 91,2-91,3: OP '=' -91,4-96,1: STRING '"\\n\\\012The \\"quick\\"\\n\\\012brown fox\\n\\\012jumps over\\n\\\012the \'lazy\' dog.\\n\\\012"' +91,4-96,1: STRING '"\\n\\\nThe \\"quick\\"\\n\\\nbrown fox\\n\\\njumps over\\n\\\nthe \'lazy\' dog.\\n\\\n"' 96,1-96,2: OP ';' -96,2-96,3: NEWLINE '\012' +96,2-96,3: NEWLINE '\n' 97,0-97,1: NAME 'y' 97,2-97,3: OP '=' -97,4-102,1: STRING '\'\\n\\\012The \\"quick\\"\\n\\\012brown fox\\n\\\012jumps over\\n\\\012the \\\'lazy\\\' dog.\\n\\\012\'' +97,4-102,1: STRING '\'\\n\\\nThe \\"quick\\"\\n\\\nbrown fox\\n\\\njumps over\\n\\\nthe \\\'lazy\\\' dog.\\n\\\n\'' 102,1-102,2: OP ';' -102,2-102,3: NEWLINE '\012' +102,2-102,3: NEWLINE '\n' 103,0-103,1: NAME 'x' 103,2-103,3: OP '=' 103,4-103,9: STRING "r'\\\\'" 103,10-103,11: OP '+' 103,12-103,17: STRING "R'\\\\'" -103,17-103,18: NEWLINE '\012' +103,17-103,18: NEWLINE '\n' 104,0-104,1: NAME 'x' 104,2-104,3: OP '=' 104,4-104,9: STRING "r'\\''" 104,10-104,11: OP '+' 104,12-104,14: STRING "''" -104,14-104,15: NEWLINE '\012' +104,14-104,15: NEWLINE '\n' 105,0-105,1: NAME 'y' 105,2-105,3: OP '=' -105,4-107,6: STRING "r'''\012foo bar \\\\\012baz'''" +105,4-107,6: STRING "r'''\nfoo bar \\\\\nbaz'''" 107,7-107,8: OP '+' -107,9-108,6: STRING "R'''\012foo'''" -108,6-108,7: NEWLINE '\012' +107,9-108,6: STRING "R'''\nfoo'''" +108,6-108,7: NEWLINE '\n' 109,0-109,1: NAME 'y' 109,2-109,3: OP '=' -109,4-111,3: STRING 'r"""foo\012bar \\\\ baz\012"""' +109,4-111,3: STRING 'r"""foo\nbar \\\\ baz\n"""' 111,4-111,5: OP '+' -111,6-112,3: STRING "R'''spam\012'''" -112,3-112,4: NEWLINE '\012' -113,0-113,1: NL '\012' -114,0-114,14: COMMENT '# Indentation\012' -115,0-115,2: NAME 'if' -115,3-115,4: NUMBER '1' -115,4-115,5: OP ':' -115,5-115,6: NEWLINE '\012' -116,0-116,4: INDENT ' ' -116,4-116,5: NAME 'x' -116,6-116,7: OP '=' -116,8-116,9: NUMBER '2' -116,9-116,10: NEWLINE '\012' -117,0-117,0: DEDENT '' -117,0-117,2: NAME 'if' -117,3-117,4: NUMBER '1' -117,4-117,5: OP ':' -117,5-117,6: NEWLINE '\012' -118,0-118,8: INDENT ' ' -118,8-118,9: NAME 'x' -118,10-118,11: OP '=' -118,12-118,13: NUMBER '2' -118,13-118,14: NEWLINE '\012' -119,0-119,0: DEDENT '' -119,0-119,2: NAME 'if' -119,3-119,4: NUMBER '1' -119,4-119,5: OP ':' -119,5-119,6: NEWLINE '\012' -120,0-120,4: INDENT ' ' -120,4-120,9: NAME 'while' -120,10-120,11: NUMBER '0' -120,11-120,12: OP ':' -120,12-120,13: NEWLINE '\012' -121,0-121,5: INDENT ' ' -121,5-121,7: NAME 'if' -121,8-121,9: NUMBER '0' -121,9-121,10: OP ':' -121,10-121,11: NEWLINE '\012' -122,0-122,11: INDENT ' ' -122,11-122,12: NAME 'x' -122,13-122,14: OP '=' -122,15-122,16: NUMBER '2' -122,16-122,17: NEWLINE '\012' -123,5-123,5: DEDENT '' -123,5-123,6: NAME 'x' -123,7-123,8: OP '=' -123,9-123,10: NUMBER '2' -123,10-123,11: NEWLINE '\012' -124,0-124,0: DEDENT '' -124,0-124,0: DEDENT '' -124,0-124,2: NAME 'if' -124,3-124,4: NUMBER '0' -124,4-124,5: OP ':' -124,5-124,6: NEWLINE '\012' -125,0-125,2: INDENT ' ' -125,2-125,4: NAME 'if' -125,5-125,6: NUMBER '2' -125,6-125,7: OP ':' -125,7-125,8: NEWLINE '\012' -126,0-126,3: INDENT ' ' -126,3-126,8: NAME 'while' -126,9-126,10: NUMBER '0' -126,10-126,11: OP ':' -126,11-126,12: NEWLINE '\012' -127,0-127,8: INDENT ' ' -127,8-127,10: NAME 'if' -127,11-127,12: NUMBER '1' -127,12-127,13: OP ':' -127,13-127,14: NEWLINE '\012' -128,0-128,10: INDENT ' ' -128,10-128,11: NAME 'x' -128,12-128,13: OP '=' -128,14-128,15: NUMBER '2' -128,15-128,16: NEWLINE '\012' -129,0-129,1: NL '\012' -130,0-130,12: COMMENT '# Operators\012' -131,0-131,1: NL '\012' -132,0-132,0: DEDENT '' -132,0-132,0: DEDENT '' -132,0-132,0: DEDENT '' -132,0-132,0: DEDENT '' -132,0-132,3: NAME 'def' -132,4-132,7: NAME 'd22' -132,7-132,8: OP '(' -132,8-132,9: NAME 'a' -132,9-132,10: OP ',' -132,11-132,12: NAME 'b' -132,12-132,13: OP ',' -132,14-132,15: NAME 'c' -132,15-132,16: OP '=' -132,16-132,17: NUMBER '1' -132,17-132,18: OP ',' -132,19-132,20: NAME 'd' -132,20-132,21: OP '=' -132,21-132,22: NUMBER '2' -132,22-132,23: OP ')' -132,23-132,24: OP ':' -132,25-132,29: NAME 'pass' -132,29-132,30: NEWLINE '\012' -133,0-133,3: NAME 'def' -133,4-133,8: NAME 'd01v' -133,8-133,9: OP '(' -133,9-133,10: NAME 'a' -133,10-133,11: OP '=' -133,11-133,12: NUMBER '1' -133,12-133,13: OP ',' -133,14-133,15: OP '*' -133,15-133,19: NAME 'rest' -133,19-133,20: OP ',' -133,21-133,23: OP '**' -133,23-133,27: NAME 'rest' -133,27-133,28: OP ')' -133,28-133,29: OP ':' -133,30-133,34: NAME 'pass' -133,34-133,35: NEWLINE '\012' -134,0-134,1: NL '\012' -135,0-135,1: OP '(' -135,1-135,2: NAME 'x' -135,2-135,3: OP ',' -135,4-135,5: NAME 'y' -135,5-135,6: OP ')' -135,7-135,9: OP '<>' -135,10-135,11: OP '(' -135,11-135,12: OP '{' -135,12-135,15: STRING "'a'" -135,15-135,16: OP ':' -135,16-135,17: NUMBER '1' -135,17-135,18: OP '}' -135,18-135,19: OP ',' -135,20-135,21: OP '{' -135,21-135,24: STRING "'b'" -135,24-135,25: OP ':' -135,25-135,26: NUMBER '2' -135,26-135,27: OP '}' -135,27-135,28: OP ')' -135,28-135,29: NEWLINE '\012' -136,0-136,1: NL '\012' -137,0-137,13: COMMENT '# comparison\012' +111,6-112,3: STRING "R'''spam\n'''" +112,3-112,4: NEWLINE '\n' +113,0-113,1: NAME 'x' +113,2-113,3: OP '=' +113,4-113,10: STRING "u'abc'" +113,11-113,12: OP '+' +113,13-113,19: STRING "U'ABC'" +113,19-113,20: NEWLINE '\n' +114,0-114,1: NAME 'y' +114,2-114,3: OP '=' +114,4-114,10: STRING 'u"abc"' +114,11-114,12: OP '+' +114,13-114,19: STRING 'U"ABC"' +114,19-114,20: NEWLINE '\n' +115,0-115,1: NAME 'x' +115,2-115,3: OP '=' +115,4-115,11: STRING "ur'abc'" +115,12-115,13: OP '+' +115,14-115,21: STRING "Ur'ABC'" +115,22-115,23: OP '+' +115,24-115,31: STRING "uR'ABC'" +115,32-115,33: OP '+' +115,34-115,41: STRING "UR'ABC'" +115,41-115,42: NEWLINE '\n' +116,0-116,1: NAME 'y' +116,2-116,3: OP '=' +116,4-116,11: STRING 'ur"abc"' +116,12-116,13: OP '+' +116,14-116,21: STRING 'Ur"ABC"' +116,22-116,23: OP '+' +116,24-116,31: STRING 'uR"ABC"' +116,32-116,33: OP '+' +116,34-116,41: STRING 'UR"ABC"' +116,41-116,42: NEWLINE '\n' +117,0-117,1: NAME 'x' +117,2-117,3: OP '=' +117,4-117,10: STRING "ur'\\\\'" +117,11-117,12: OP '+' +117,13-117,19: STRING "UR'\\\\'" +117,19-117,20: NEWLINE '\n' +118,0-118,1: NAME 'x' +118,2-118,3: OP '=' +118,4-118,10: STRING "ur'\\''" +118,11-118,12: OP '+' +118,13-118,15: STRING "''" +118,15-118,16: NEWLINE '\n' +119,0-119,1: NAME 'y' +119,2-119,3: OP '=' +119,4-121,6: STRING "ur'''\nfoo bar \\\\\nbaz'''" +121,7-121,8: OP '+' +121,9-122,6: STRING "UR'''\nfoo'''" +122,6-122,7: NEWLINE '\n' +123,0-123,1: NAME 'y' +123,2-123,3: OP '=' +123,4-125,3: STRING 'Ur"""foo\nbar \\\\ baz\n"""' +125,4-125,5: OP '+' +125,6-126,3: STRING "uR'''spam\n'''" +126,3-126,4: NEWLINE '\n' +127,0-127,1: NL '\n' +128,0-128,14: COMMENT '# Indentation\n' +129,0-129,2: NAME 'if' +129,3-129,4: NUMBER '1' +129,4-129,5: OP ':' +129,5-129,6: NEWLINE '\n' +130,0-130,4: INDENT ' ' +130,4-130,5: NAME 'x' +130,6-130,7: OP '=' +130,8-130,9: NUMBER '2' +130,9-130,10: NEWLINE '\n' +131,0-131,0: DEDENT '' +131,0-131,2: NAME 'if' +131,3-131,4: NUMBER '1' +131,4-131,5: OP ':' +131,5-131,6: NEWLINE '\n' +132,0-132,8: INDENT ' ' +132,8-132,9: NAME 'x' +132,10-132,11: OP '=' +132,12-132,13: NUMBER '2' +132,13-132,14: NEWLINE '\n' +133,0-133,0: DEDENT '' +133,0-133,2: NAME 'if' +133,3-133,4: NUMBER '1' +133,4-133,5: OP ':' +133,5-133,6: NEWLINE '\n' +134,0-134,4: INDENT ' ' +134,4-134,9: NAME 'while' +134,10-134,11: NUMBER '0' +134,11-134,12: OP ':' +134,12-134,13: NEWLINE '\n' +135,0-135,5: INDENT ' ' +135,5-135,7: NAME 'if' +135,8-135,9: NUMBER '0' +135,9-135,10: OP ':' +135,10-135,11: NEWLINE '\n' +136,0-136,11: INDENT ' ' +136,11-136,12: NAME 'x' +136,13-136,14: OP '=' +136,15-136,16: NUMBER '2' +136,16-136,17: NEWLINE '\n' +137,5-137,5: DEDENT '' +137,5-137,6: NAME 'x' +137,7-137,8: OP '=' +137,9-137,10: NUMBER '2' +137,10-137,11: NEWLINE '\n' +138,0-138,0: DEDENT '' +138,0-138,0: DEDENT '' 138,0-138,2: NAME 'if' -138,3-138,4: NUMBER '1' -138,5-138,6: OP '<' -138,7-138,8: NUMBER '1' -138,9-138,10: OP '>' -138,11-138,12: NUMBER '1' -138,13-138,15: OP '==' -138,16-138,17: NUMBER '1' -138,18-138,20: OP '>=' -138,21-138,22: NUMBER '1' -138,23-138,25: OP '<=' -138,26-138,27: NUMBER '1' -138,28-138,30: OP '<>' -138,31-138,32: NUMBER '1' -138,33-138,35: OP '!=' -138,36-138,37: NUMBER '1' -138,38-138,40: NAME 'in' -138,41-138,42: NUMBER '1' -138,43-138,46: NAME 'not' -138,47-138,49: NAME 'in' -138,50-138,51: NUMBER '1' -138,52-138,54: NAME 'is' -138,55-138,56: NUMBER '1' -138,57-138,59: NAME 'is' -138,60-138,63: NAME 'not' -138,64-138,65: NUMBER '1' -138,65-138,66: OP ':' -138,67-138,71: NAME 'pass' -138,71-138,72: NEWLINE '\012' -139,0-139,1: NL '\012' -140,0-140,9: COMMENT '# binary\012' -141,0-141,1: NAME 'x' -141,2-141,3: OP '=' -141,4-141,5: NUMBER '1' -141,6-141,7: OP '&' -141,8-141,9: NUMBER '1' -141,9-141,10: NEWLINE '\012' -142,0-142,1: NAME 'x' -142,2-142,3: OP '=' -142,4-142,5: NUMBER '1' -142,6-142,7: OP '^' -142,8-142,9: NUMBER '1' -142,9-142,10: NEWLINE '\012' -143,0-143,1: NAME 'x' -143,2-143,3: OP '=' -143,4-143,5: NUMBER '1' -143,6-143,7: OP '|' -143,8-143,9: NUMBER '1' -143,9-143,10: NEWLINE '\012' -144,0-144,1: NL '\012' -145,0-145,8: COMMENT '# shift\012' -146,0-146,1: NAME 'x' -146,2-146,3: OP '=' -146,4-146,5: NUMBER '1' -146,6-146,8: OP '<<' -146,9-146,10: NUMBER '1' -146,11-146,13: OP '>>' -146,14-146,15: NUMBER '1' -146,15-146,16: NEWLINE '\012' -147,0-147,1: NL '\012' -148,0-148,11: COMMENT '# additive\012' -149,0-149,1: NAME 'x' -149,2-149,3: OP '=' -149,4-149,5: NUMBER '1' -149,6-149,7: OP '-' -149,8-149,9: NUMBER '1' -149,10-149,11: OP '+' -149,12-149,13: NUMBER '1' -149,14-149,15: OP '-' +138,3-138,4: NUMBER '0' +138,4-138,5: OP ':' +138,5-138,6: NEWLINE '\n' +139,0-139,2: INDENT ' ' +139,2-139,4: NAME 'if' +139,5-139,6: NUMBER '2' +139,6-139,7: OP ':' +139,7-139,8: NEWLINE '\n' +140,0-140,3: INDENT ' ' +140,3-140,8: NAME 'while' +140,9-140,10: NUMBER '0' +140,10-140,11: OP ':' +140,11-140,12: NEWLINE '\n' +141,0-141,8: INDENT ' ' +141,8-141,10: NAME 'if' +141,11-141,12: NUMBER '1' +141,12-141,13: OP ':' +141,13-141,14: NEWLINE '\n' +142,0-142,10: INDENT ' ' +142,10-142,11: NAME 'x' +142,12-142,13: OP '=' +142,14-142,15: NUMBER '2' +142,15-142,16: NEWLINE '\n' +143,0-143,1: NL '\n' +144,0-144,12: COMMENT '# Operators\n' +145,0-145,1: NL '\n' +146,0-146,0: DEDENT '' +146,0-146,0: DEDENT '' +146,0-146,0: DEDENT '' +146,0-146,0: DEDENT '' +146,0-146,3: NAME 'def' +146,4-146,7: NAME 'd22' +146,7-146,8: OP '(' +146,8-146,9: NAME 'a' +146,9-146,10: OP ',' +146,11-146,12: NAME 'b' +146,12-146,13: OP ',' +146,14-146,15: NAME 'c' +146,15-146,16: OP '=' +146,16-146,17: NUMBER '1' +146,17-146,18: OP ',' +146,19-146,20: NAME 'd' +146,20-146,21: OP '=' +146,21-146,22: NUMBER '2' +146,22-146,23: OP ')' +146,23-146,24: OP ':' +146,25-146,29: NAME 'pass' +146,29-146,30: NEWLINE '\n' +147,0-147,3: NAME 'def' +147,4-147,8: NAME 'd01v' +147,8-147,9: OP '(' +147,9-147,10: NAME 'a' +147,10-147,11: OP '=' +147,11-147,12: NUMBER '1' +147,12-147,13: OP ',' +147,14-147,15: OP '*' +147,15-147,19: NAME 'rest' +147,19-147,20: OP ',' +147,21-147,23: OP '**' +147,23-147,27: NAME 'rest' +147,27-147,28: OP ')' +147,28-147,29: OP ':' +147,30-147,34: NAME 'pass' +147,34-147,35: NEWLINE '\n' +148,0-148,1: NL '\n' +149,0-149,1: OP '(' +149,1-149,2: NAME 'x' +149,2-149,3: OP ',' +149,4-149,5: NAME 'y' +149,5-149,6: OP ')' +149,7-149,9: OP '<>' +149,10-149,11: OP '(' +149,11-149,12: OP '{' +149,12-149,15: STRING "'a'" +149,15-149,16: OP ':' 149,16-149,17: NUMBER '1' -149,18-149,19: OP '+' -149,20-149,21: NUMBER '1' -149,21-149,22: NEWLINE '\012' -150,0-150,1: NL '\012' -151,0-151,17: COMMENT '# multiplicative\012' -152,0-152,1: NAME 'x' -152,2-152,3: OP '=' -152,4-152,5: NUMBER '1' -152,6-152,7: OP '/' -152,8-152,9: NUMBER '1' -152,10-152,11: OP '*' -152,12-152,13: NUMBER '1' -152,14-152,15: OP '%' +149,17-149,18: OP '}' +149,18-149,19: OP ',' +149,20-149,21: OP '{' +149,21-149,24: STRING "'b'" +149,24-149,25: OP ':' +149,25-149,26: NUMBER '2' +149,26-149,27: OP '}' +149,27-149,28: OP ')' +149,28-149,29: NEWLINE '\n' +150,0-150,1: NL '\n' +151,0-151,13: COMMENT '# comparison\n' +152,0-152,2: NAME 'if' +152,3-152,4: NUMBER '1' +152,5-152,6: OP '<' +152,7-152,8: NUMBER '1' +152,9-152,10: OP '>' +152,11-152,12: NUMBER '1' +152,13-152,15: OP '==' 152,16-152,17: NUMBER '1' -152,17-152,18: NEWLINE '\012' -153,0-153,1: NL '\012' -154,0-154,8: COMMENT '# unary\012' +152,18-152,20: OP '>=' +152,21-152,22: NUMBER '1' +152,23-152,25: OP '<=' +152,26-152,27: NUMBER '1' +152,28-152,30: OP '<>' +152,31-152,32: NUMBER '1' +152,33-152,35: OP '!=' +152,36-152,37: NUMBER '1' +152,38-152,40: NAME 'in' +152,41-152,42: NUMBER '1' +152,43-152,46: NAME 'not' +152,47-152,49: NAME 'in' +152,50-152,51: NUMBER '1' +152,52-152,54: NAME 'is' +152,55-152,56: NUMBER '1' +152,57-152,59: NAME 'is' +152,60-152,63: NAME 'not' +152,64-152,65: NUMBER '1' +152,65-152,66: OP ':' +152,67-152,71: NAME 'pass' +152,71-152,72: NEWLINE '\n' +153,0-153,1: NL '\n' +154,0-154,9: COMMENT '# binary\n' 155,0-155,1: NAME 'x' 155,2-155,3: OP '=' -155,4-155,5: OP '~' -155,5-155,6: NUMBER '1' -155,7-155,8: OP '^' -155,9-155,10: NUMBER '1' -155,11-155,12: OP '&' -155,13-155,14: NUMBER '1' -155,15-155,16: OP '|' -155,17-155,18: NUMBER '1' -155,19-155,20: OP '&' -155,21-155,22: NUMBER '1' -155,23-155,24: OP '^' -155,25-155,26: OP '-' -155,26-155,27: NUMBER '1' -155,27-155,28: NEWLINE '\012' +155,4-155,5: NUMBER '1' +155,6-155,7: OP '&' +155,8-155,9: NUMBER '1' +155,9-155,10: NEWLINE '\n' 156,0-156,1: NAME 'x' 156,2-156,3: OP '=' -156,4-156,5: OP '-' -156,5-156,6: NUMBER '1' -156,6-156,7: OP '*' -156,7-156,8: NUMBER '1' -156,8-156,9: OP '/' -156,9-156,10: NUMBER '1' -156,11-156,12: OP '+' -156,13-156,14: NUMBER '1' -156,14-156,15: OP '*' -156,15-156,16: NUMBER '1' -156,17-156,18: OP '-' -156,19-156,20: OP '-' -156,20-156,21: OP '-' -156,21-156,22: OP '-' -156,22-156,23: NUMBER '1' -156,23-156,24: OP '*' -156,24-156,25: NUMBER '1' -156,25-156,26: NEWLINE '\012' -157,0-157,1: NL '\012' -158,0-158,11: COMMENT '# selector\012' -159,0-159,6: NAME 'import' -159,7-159,10: NAME 'sys' -159,10-159,11: OP ',' -159,12-159,16: NAME 'time' -159,16-159,17: NEWLINE '\012' +156,4-156,5: NUMBER '1' +156,6-156,7: OP '^' +156,8-156,9: NUMBER '1' +156,9-156,10: NEWLINE '\n' +157,0-157,1: NAME 'x' +157,2-157,3: OP '=' +157,4-157,5: NUMBER '1' +157,6-157,7: OP '|' +157,8-157,9: NUMBER '1' +157,9-157,10: NEWLINE '\n' +158,0-158,1: NL '\n' +159,0-159,8: COMMENT '# shift\n' 160,0-160,1: NAME 'x' 160,2-160,3: OP '=' -160,4-160,7: NAME 'sys' -160,7-160,8: OP '.' -160,8-160,15: NAME 'modules' -160,15-160,16: OP '[' -160,16-160,22: STRING "'time'" -160,22-160,23: OP ']' -160,23-160,24: OP '.' -160,24-160,28: NAME 'time' -160,28-160,29: OP '(' -160,29-160,30: OP ')' -160,30-160,31: NEWLINE '\012' -161,0-161,1: NL '\012' -162,0-162,0: ENDMARKER '' +160,4-160,5: NUMBER '1' +160,6-160,8: OP '<<' +160,9-160,10: NUMBER '1' +160,11-160,13: OP '>>' +160,14-160,15: NUMBER '1' +160,15-160,16: NEWLINE '\n' +161,0-161,1: NL '\n' +162,0-162,11: COMMENT '# additive\n' +163,0-163,1: NAME 'x' +163,2-163,3: OP '=' +163,4-163,5: NUMBER '1' +163,6-163,7: OP '-' +163,8-163,9: NUMBER '1' +163,10-163,11: OP '+' +163,12-163,13: NUMBER '1' +163,14-163,15: OP '-' +163,16-163,17: NUMBER '1' +163,18-163,19: OP '+' +163,20-163,21: NUMBER '1' +163,21-163,22: NEWLINE '\n' +164,0-164,1: NL '\n' +165,0-165,17: COMMENT '# multiplicative\n' +166,0-166,1: NAME 'x' +166,2-166,3: OP '=' +166,4-166,5: NUMBER '1' +166,6-166,7: OP '/' +166,8-166,9: NUMBER '1' +166,10-166,11: OP '*' +166,12-166,13: NUMBER '1' +166,14-166,15: OP '%' +166,16-166,17: NUMBER '1' +166,17-166,18: NEWLINE '\n' +167,0-167,1: NL '\n' +168,0-168,8: COMMENT '# unary\n' +169,0-169,1: NAME 'x' +169,2-169,3: OP '=' +169,4-169,5: OP '~' +169,5-169,6: NUMBER '1' +169,7-169,8: OP '^' +169,9-169,10: NUMBER '1' +169,11-169,12: OP '&' +169,13-169,14: NUMBER '1' +169,15-169,16: OP '|' +169,17-169,18: NUMBER '1' +169,19-169,20: OP '&' +169,21-169,22: NUMBER '1' +169,23-169,24: OP '^' +169,25-169,26: OP '-' +169,26-169,27: NUMBER '1' +169,27-169,28: NEWLINE '\n' +170,0-170,1: NAME 'x' +170,2-170,3: OP '=' +170,4-170,5: OP '-' +170,5-170,6: NUMBER '1' +170,6-170,7: OP '*' +170,7-170,8: NUMBER '1' +170,8-170,9: OP '/' +170,9-170,10: NUMBER '1' +170,11-170,12: OP '+' +170,13-170,14: NUMBER '1' +170,14-170,15: OP '*' +170,15-170,16: NUMBER '1' +170,17-170,18: OP '-' +170,19-170,20: OP '-' +170,20-170,21: OP '-' +170,21-170,22: OP '-' +170,22-170,23: NUMBER '1' +170,23-170,24: OP '*' +170,24-170,25: NUMBER '1' +170,25-170,26: NEWLINE '\n' +171,0-171,1: NL '\n' +172,0-172,11: COMMENT '# selector\n' +173,0-173,6: NAME 'import' +173,7-173,10: NAME 'sys' +173,10-173,11: OP ',' +173,12-173,16: NAME 'time' +173,16-173,17: NEWLINE '\n' +174,0-174,1: NAME 'x' +174,2-174,3: OP '=' +174,4-174,7: NAME 'sys' +174,7-174,8: OP '.' +174,8-174,15: NAME 'modules' +174,15-174,16: OP '[' +174,16-174,22: STRING "'time'" +174,22-174,23: OP ']' +174,23-174,24: OP '.' +174,24-174,28: NAME 'time' +174,28-174,29: OP '(' +174,29-174,30: OP ')' +174,30-174,31: NEWLINE '\n' +175,0-175,1: NL '\n' +176,0-176,0: ENDMARKER '' diff --git a/Lib/test/tokenize_tests.py b/Lib/test/tokenize_tests.py index 48836683045..4974671f282 100644 --- a/Lib/test/tokenize_tests.py +++ b/Lib/test/tokenize_tests.py @@ -110,6 +110,20 @@ y = r"""foo bar \\ baz """ + R'''spam ''' +x = u'abc' + U'ABC' +y = u"abc" + U"ABC" +x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC' +y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC" +x = ur'\\' + UR'\\' +x = ur'\'' + '' +y = ur''' +foo bar \\ +baz''' + UR''' +foo''' +y = Ur"""foo +bar \\ baz +""" + uR'''spam +''' # Indentation if 1: diff --git a/Lib/tokenize.py b/Lib/tokenize.py index d2051594bae..3014b1958b1 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -54,10 +54,10 @@ Double = r'[^"\\]*(?:\\.[^"\\]*)*"' Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" # Tail end of """ string. Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' -Triple = group("[rR]?'''", '[rR]?"""') +Triple = group("[uU]?[rR]?'''", '[uU]?[rR]?"""') # Single-line ' or " string. -String = group(r"[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'", - r'[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"') +String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"') # Because of leftmost-then-longest match semantics, be sure to put the # longest operators first (e.g., if = came before ==, == would get @@ -74,8 +74,10 @@ PlainToken = group(Number, Funny, String, Name) Token = Ignore + PlainToken # First (or only) line of ' or " string. -ContStr = group(r"[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r'\\\r?\n'), - r'[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r'\\\r?\n')) +ContStr = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + + group("'", r'\\\r?\n'), + r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + + group('"', r'\\\r?\n')) PseudoExtras = group(r'\\\r?\n', Comment, Triple) PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) @@ -84,7 +86,14 @@ tokenprog, pseudoprog, single3prog, double3prog = map( endprogs = {"'": re.compile(Single), '"': re.compile(Double), "'''": single3prog, '"""': double3prog, "r'''": single3prog, 'r"""': double3prog, - "R'''": single3prog, 'R"""': double3prog, 'r': None, 'R': None} + "u'''": single3prog, 'u"""': double3prog, + "ur'''": single3prog, 'ur"""': double3prog, + "R'''": single3prog, 'R"""': double3prog, + "U'''": single3prog, 'U"""': double3prog, + "uR'''": single3prog, 'uR"""': double3prog, + "Ur'''": single3prog, 'Ur"""': double3prog, + "UR'''": single3prog, 'UR"""': double3prog, + 'r': None, 'R': None, 'u': None, 'U': None} tabsize = 8 @@ -172,7 +181,10 @@ def tokenize(readline, tokeneater=printtoken): elif initial == '#': tokeneater(COMMENT, token, spos, epos, line) elif token in ("'''", '"""', # triple-quoted - "r'''", 'r"""', "R'''", 'R"""'): + "r'''", 'r"""', "R'''", 'R"""', + "u'''", 'u"""', "U'''", 'U"""', + "ur'''", 'ur"""', "Ur'''", 'Ur"""', + "uR'''", 'uR"""', "UR'''", 'UR"""'): endprog = endprogs[token] endmatch = endprog.match(line, pos) if endmatch: # all on one line @@ -185,10 +197,14 @@ def tokenize(readline, tokeneater=printtoken): contline = line break elif initial in ("'", '"') or \ - token[:2] in ("r'", 'r"', "R'", 'R"'): + token[:2] in ("r'", 'r"', "R'", 'R"', + "u'", 'u"', "U'", 'U"') or \ + token[:3] in ("ur'", 'ur"', "Ur'", 'Ur"', + "uR'", 'uR"', "UR'", 'UR"' ): if token[-1] == '\n': # continued string strstart = (lnum, start) - endprog = endprogs[initial] or endprogs[token[1]] + endprog = (endprogs[initial] or endprogs[token[1]] or + endprogs[token[2]]) contstr, needcont = line[start:], 1 contline = line break