mirror of https://github.com/python/cpython
Add tests for the C tokenizer and expose it as a private module (GH-27924)
This commit is contained in:
parent
9ed523159c
commit
a24676bedc
|
@ -3,7 +3,7 @@ from test.support import os_helper
|
|||
from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
|
||||
STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
|
||||
open as tokenize_open, Untokenizer, generate_tokens,
|
||||
NEWLINE)
|
||||
NEWLINE, _generate_tokens_from_c_tokenizer)
|
||||
from io import BytesIO, StringIO
|
||||
import unittest
|
||||
from unittest import TestCase, mock
|
||||
|
@ -12,7 +12,6 @@ from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
|
|||
import os
|
||||
import token
|
||||
|
||||
|
||||
# Converts a source string into a list of textual representation
|
||||
# of the tokens such as:
|
||||
# ` NAME 'if' (1, 0) (1, 2)`
|
||||
|
@ -1654,5 +1653,865 @@ class TestRoundtrip(TestCase):
|
|||
self.check_roundtrip(code)
|
||||
|
||||
|
||||
class CTokenizeTest(TestCase):
|
||||
def check_tokenize(self, s, expected):
|
||||
# Format the tokens in s in a table format.
|
||||
# The ENDMARKER and final NEWLINE are omitted.
|
||||
with self.subTest(source=s):
|
||||
result = stringify_tokens_from_source(
|
||||
_generate_tokens_from_c_tokenizer(s), s
|
||||
)
|
||||
self.assertEqual(result, expected.rstrip().splitlines())
|
||||
|
||||
def test_int(self):
|
||||
|
||||
self.check_tokenize('0xff <= 255', """\
|
||||
NUMBER '0xff' (1, 0) (1, 4)
|
||||
LESSEQUAL '<=' (1, 5) (1, 7)
|
||||
NUMBER '255' (1, 8) (1, 11)
|
||||
""")
|
||||
|
||||
self.check_tokenize('0b10 <= 255', """\
|
||||
NUMBER '0b10' (1, 0) (1, 4)
|
||||
LESSEQUAL '<=' (1, 5) (1, 7)
|
||||
NUMBER '255' (1, 8) (1, 11)
|
||||
""")
|
||||
|
||||
self.check_tokenize('0o123 <= 0O123', """\
|
||||
NUMBER '0o123' (1, 0) (1, 5)
|
||||
LESSEQUAL '<=' (1, 6) (1, 8)
|
||||
NUMBER '0O123' (1, 9) (1, 14)
|
||||
""")
|
||||
|
||||
self.check_tokenize('1234567 > ~0x15', """\
|
||||
NUMBER '1234567' (1, 0) (1, 7)
|
||||
GREATER '>' (1, 8) (1, 9)
|
||||
TILDE '~' (1, 10) (1, 11)
|
||||
NUMBER '0x15' (1, 11) (1, 15)
|
||||
""")
|
||||
|
||||
self.check_tokenize('2134568 != 1231515', """\
|
||||
NUMBER '2134568' (1, 0) (1, 7)
|
||||
NOTEQUAL '!=' (1, 8) (1, 10)
|
||||
NUMBER '1231515' (1, 11) (1, 18)
|
||||
""")
|
||||
|
||||
self.check_tokenize('(-124561-1) & 200000000', """\
|
||||
LPAR '(' (1, 0) (1, 1)
|
||||
MINUS '-' (1, 1) (1, 2)
|
||||
NUMBER '124561' (1, 2) (1, 8)
|
||||
MINUS '-' (1, 8) (1, 9)
|
||||
NUMBER '1' (1, 9) (1, 10)
|
||||
RPAR ')' (1, 10) (1, 11)
|
||||
AMPER '&' (1, 12) (1, 13)
|
||||
NUMBER '200000000' (1, 14) (1, 23)
|
||||
""")
|
||||
|
||||
self.check_tokenize('0xdeadbeef != -1', """\
|
||||
NUMBER '0xdeadbeef' (1, 0) (1, 10)
|
||||
NOTEQUAL '!=' (1, 11) (1, 13)
|
||||
MINUS '-' (1, 14) (1, 15)
|
||||
NUMBER '1' (1, 15) (1, 16)
|
||||
""")
|
||||
|
||||
self.check_tokenize('0xdeadc0de & 12345', """\
|
||||
NUMBER '0xdeadc0de' (1, 0) (1, 10)
|
||||
AMPER '&' (1, 11) (1, 12)
|
||||
NUMBER '12345' (1, 13) (1, 18)
|
||||
""")
|
||||
|
||||
self.check_tokenize('0xFF & 0x15 | 1234', """\
|
||||
NUMBER '0xFF' (1, 0) (1, 4)
|
||||
AMPER '&' (1, 5) (1, 6)
|
||||
NUMBER '0x15' (1, 7) (1, 11)
|
||||
VBAR '|' (1, 12) (1, 13)
|
||||
NUMBER '1234' (1, 14) (1, 18)
|
||||
""")
|
||||
|
||||
def test_float(self):
|
||||
|
||||
self.check_tokenize('x = 3.14159', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
NUMBER '3.14159' (1, 4) (1, 11)
|
||||
""")
|
||||
|
||||
self.check_tokenize('x = 314159.', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
NUMBER '314159.' (1, 4) (1, 11)
|
||||
""")
|
||||
|
||||
self.check_tokenize('x = .314159', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
NUMBER '.314159' (1, 4) (1, 11)
|
||||
""")
|
||||
|
||||
self.check_tokenize('x = 3e14159', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
NUMBER '3e14159' (1, 4) (1, 11)
|
||||
""")
|
||||
|
||||
self.check_tokenize('x = 3E123', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
NUMBER '3E123' (1, 4) (1, 9)
|
||||
""")
|
||||
|
||||
self.check_tokenize('x+y = 3e-1230', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
PLUS '+' (1, 1) (1, 2)
|
||||
NAME 'y' (1, 2) (1, 3)
|
||||
EQUAL '=' (1, 4) (1, 5)
|
||||
NUMBER '3e-1230' (1, 6) (1, 13)
|
||||
""")
|
||||
|
||||
self.check_tokenize('x = 3.14e159', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
NUMBER '3.14e159' (1, 4) (1, 12)
|
||||
""")
|
||||
|
||||
def test_string(self):
|
||||
|
||||
self.check_tokenize('x = \'\'; y = ""', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
STRING "''" (1, 4) (1, 6)
|
||||
SEMI ';' (1, 6) (1, 7)
|
||||
NAME 'y' (1, 8) (1, 9)
|
||||
EQUAL '=' (1, 10) (1, 11)
|
||||
STRING '""' (1, 12) (1, 14)
|
||||
""")
|
||||
|
||||
self.check_tokenize('x = \'"\'; y = "\'"', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
STRING '\\'"\\'' (1, 4) (1, 7)
|
||||
SEMI ';' (1, 7) (1, 8)
|
||||
NAME 'y' (1, 9) (1, 10)
|
||||
EQUAL '=' (1, 11) (1, 12)
|
||||
STRING '"\\'"' (1, 13) (1, 16)
|
||||
""")
|
||||
|
||||
self.check_tokenize('x = "doesn\'t "shrink", does it"', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
STRING '"doesn\\'t "' (1, 4) (1, 14)
|
||||
NAME 'shrink' (1, 14) (1, 20)
|
||||
STRING '", does it"' (1, 20) (1, 31)
|
||||
""")
|
||||
|
||||
self.check_tokenize("x = 'abc' + 'ABC'", """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
STRING "'abc'" (1, 4) (1, 9)
|
||||
PLUS '+' (1, 10) (1, 11)
|
||||
STRING "'ABC'" (1, 12) (1, 17)
|
||||
""")
|
||||
|
||||
self.check_tokenize('y = "ABC" + "ABC"', """\
|
||||
NAME 'y' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
STRING '"ABC"' (1, 4) (1, 9)
|
||||
PLUS '+' (1, 10) (1, 11)
|
||||
STRING '"ABC"' (1, 12) (1, 17)
|
||||
""")
|
||||
|
||||
self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
STRING "r'abc'" (1, 4) (1, 10)
|
||||
PLUS '+' (1, 11) (1, 12)
|
||||
STRING "r'ABC'" (1, 13) (1, 19)
|
||||
PLUS '+' (1, 20) (1, 21)
|
||||
STRING "R'ABC'" (1, 22) (1, 28)
|
||||
PLUS '+' (1, 29) (1, 30)
|
||||
STRING "R'ABC'" (1, 31) (1, 37)
|
||||
""")
|
||||
|
||||
self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
|
||||
NAME 'y' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
STRING 'r"abc"' (1, 4) (1, 10)
|
||||
PLUS '+' (1, 11) (1, 12)
|
||||
STRING 'r"ABC"' (1, 13) (1, 19)
|
||||
PLUS '+' (1, 20) (1, 21)
|
||||
STRING 'R"ABC"' (1, 22) (1, 28)
|
||||
PLUS '+' (1, 29) (1, 30)
|
||||
STRING 'R"ABC"' (1, 31) (1, 37)
|
||||
""")
|
||||
|
||||
self.check_tokenize("u'abc' + U'abc'", """\
|
||||
STRING "u'abc'" (1, 0) (1, 6)
|
||||
PLUS '+' (1, 7) (1, 8)
|
||||
STRING "U'abc'" (1, 9) (1, 15)
|
||||
""")
|
||||
|
||||
self.check_tokenize('u"abc" + U"abc"', """\
|
||||
STRING 'u"abc"' (1, 0) (1, 6)
|
||||
PLUS '+' (1, 7) (1, 8)
|
||||
STRING 'U"abc"' (1, 9) (1, 15)
|
||||
""")
|
||||
|
||||
self.check_tokenize("b'abc' + B'abc'", """\
|
||||
STRING "b'abc'" (1, 0) (1, 6)
|
||||
PLUS '+' (1, 7) (1, 8)
|
||||
STRING "B'abc'" (1, 9) (1, 15)
|
||||
""")
|
||||
|
||||
self.check_tokenize('b"abc" + B"abc"', """\
|
||||
STRING 'b"abc"' (1, 0) (1, 6)
|
||||
PLUS '+' (1, 7) (1, 8)
|
||||
STRING 'B"abc"' (1, 9) (1, 15)
|
||||
""")
|
||||
|
||||
self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
|
||||
STRING "br'abc'" (1, 0) (1, 7)
|
||||
PLUS '+' (1, 8) (1, 9)
|
||||
STRING "bR'abc'" (1, 10) (1, 17)
|
||||
PLUS '+' (1, 18) (1, 19)
|
||||
STRING "Br'abc'" (1, 20) (1, 27)
|
||||
PLUS '+' (1, 28) (1, 29)
|
||||
STRING "BR'abc'" (1, 30) (1, 37)
|
||||
""")
|
||||
|
||||
self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
|
||||
STRING 'br"abc"' (1, 0) (1, 7)
|
||||
PLUS '+' (1, 8) (1, 9)
|
||||
STRING 'bR"abc"' (1, 10) (1, 17)
|
||||
PLUS '+' (1, 18) (1, 19)
|
||||
STRING 'Br"abc"' (1, 20) (1, 27)
|
||||
PLUS '+' (1, 28) (1, 29)
|
||||
STRING 'BR"abc"' (1, 30) (1, 37)
|
||||
""")
|
||||
|
||||
self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
|
||||
STRING "rb'abc'" (1, 0) (1, 7)
|
||||
PLUS '+' (1, 8) (1, 9)
|
||||
STRING "rB'abc'" (1, 10) (1, 17)
|
||||
PLUS '+' (1, 18) (1, 19)
|
||||
STRING "Rb'abc'" (1, 20) (1, 27)
|
||||
PLUS '+' (1, 28) (1, 29)
|
||||
STRING "RB'abc'" (1, 30) (1, 37)
|
||||
""")
|
||||
|
||||
self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
|
||||
STRING 'rb"abc"' (1, 0) (1, 7)
|
||||
PLUS '+' (1, 8) (1, 9)
|
||||
STRING 'rB"abc"' (1, 10) (1, 17)
|
||||
PLUS '+' (1, 18) (1, 19)
|
||||
STRING 'Rb"abc"' (1, 20) (1, 27)
|
||||
PLUS '+' (1, 28) (1, 29)
|
||||
STRING 'RB"abc"' (1, 30) (1, 37)
|
||||
""")
|
||||
|
||||
self.check_tokenize('"a\\\nde\\\nfg"', """\
|
||||
STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
|
||||
""")
|
||||
|
||||
self.check_tokenize('u"a\\\nde"', """\
|
||||
STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)
|
||||
""")
|
||||
|
||||
self.check_tokenize('rb"a\\\nd"', """\
|
||||
STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)
|
||||
""")
|
||||
|
||||
self.check_tokenize(r'"""a\
|
||||
b"""', """\
|
||||
STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
|
||||
""")
|
||||
self.check_tokenize(r'u"""a\
|
||||
b"""', """\
|
||||
STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
|
||||
""")
|
||||
self.check_tokenize(r'rb"""a\
|
||||
b\
|
||||
c"""', """\
|
||||
STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
|
||||
""")
|
||||
|
||||
self.check_tokenize('f"abc"', """\
|
||||
STRING 'f"abc"' (1, 0) (1, 6)
|
||||
""")
|
||||
|
||||
self.check_tokenize('fR"a{b}c"', """\
|
||||
STRING 'fR"a{b}c"' (1, 0) (1, 9)
|
||||
""")
|
||||
|
||||
self.check_tokenize('f"""abc"""', """\
|
||||
STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)
|
||||
""")
|
||||
|
||||
self.check_tokenize(r'f"abc\
|
||||
def"', """\
|
||||
STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)
|
||||
""")
|
||||
|
||||
self.check_tokenize(r'Rf"abc\
|
||||
def"', """\
|
||||
STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
|
||||
""")
|
||||
|
||||
def test_function(self):
|
||||
|
||||
self.check_tokenize('def d22(a, b, c=2, d=2, *k): pass', """\
|
||||
NAME 'def' (1, 0) (1, 3)
|
||||
NAME 'd22' (1, 4) (1, 7)
|
||||
LPAR '(' (1, 7) (1, 8)
|
||||
NAME 'a' (1, 8) (1, 9)
|
||||
COMMA ',' (1, 9) (1, 10)
|
||||
NAME 'b' (1, 11) (1, 12)
|
||||
COMMA ',' (1, 12) (1, 13)
|
||||
NAME 'c' (1, 14) (1, 15)
|
||||
EQUAL '=' (1, 15) (1, 16)
|
||||
NUMBER '2' (1, 16) (1, 17)
|
||||
COMMA ',' (1, 17) (1, 18)
|
||||
NAME 'd' (1, 19) (1, 20)
|
||||
EQUAL '=' (1, 20) (1, 21)
|
||||
NUMBER '2' (1, 21) (1, 22)
|
||||
COMMA ',' (1, 22) (1, 23)
|
||||
STAR '*' (1, 24) (1, 25)
|
||||
NAME 'k' (1, 25) (1, 26)
|
||||
RPAR ')' (1, 26) (1, 27)
|
||||
COLON ':' (1, 27) (1, 28)
|
||||
NAME 'pass' (1, 29) (1, 33)
|
||||
""")
|
||||
|
||||
self.check_tokenize('def d01v_(a=1, *k, **w): pass', """\
|
||||
NAME 'def' (1, 0) (1, 3)
|
||||
NAME 'd01v_' (1, 4) (1, 9)
|
||||
LPAR '(' (1, 9) (1, 10)
|
||||
NAME 'a' (1, 10) (1, 11)
|
||||
EQUAL '=' (1, 11) (1, 12)
|
||||
NUMBER '1' (1, 12) (1, 13)
|
||||
COMMA ',' (1, 13) (1, 14)
|
||||
STAR '*' (1, 15) (1, 16)
|
||||
NAME 'k' (1, 16) (1, 17)
|
||||
COMMA ',' (1, 17) (1, 18)
|
||||
DOUBLESTAR '**' (1, 19) (1, 21)
|
||||
NAME 'w' (1, 21) (1, 22)
|
||||
RPAR ')' (1, 22) (1, 23)
|
||||
COLON ':' (1, 23) (1, 24)
|
||||
NAME 'pass' (1, 25) (1, 29)
|
||||
""")
|
||||
|
||||
self.check_tokenize('def d23(a: str, b: int=3) -> int: pass', """\
|
||||
NAME 'def' (1, 0) (1, 3)
|
||||
NAME 'd23' (1, 4) (1, 7)
|
||||
LPAR '(' (1, 7) (1, 8)
|
||||
NAME 'a' (1, 8) (1, 9)
|
||||
COLON ':' (1, 9) (1, 10)
|
||||
NAME 'str' (1, 11) (1, 14)
|
||||
COMMA ',' (1, 14) (1, 15)
|
||||
NAME 'b' (1, 16) (1, 17)
|
||||
COLON ':' (1, 17) (1, 18)
|
||||
NAME 'int' (1, 19) (1, 22)
|
||||
EQUAL '=' (1, 22) (1, 23)
|
||||
NUMBER '3' (1, 23) (1, 24)
|
||||
RPAR ')' (1, 24) (1, 25)
|
||||
RARROW '->' (1, 26) (1, 28)
|
||||
NAME 'int' (1, 29) (1, 32)
|
||||
COLON ':' (1, 32) (1, 33)
|
||||
NAME 'pass' (1, 34) (1, 38)
|
||||
""")
|
||||
|
||||
def test_comparison(self):
|
||||
|
||||
self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
|
||||
"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
|
||||
NAME 'if' (1, 0) (1, 2)
|
||||
NUMBER '1' (1, 3) (1, 4)
|
||||
LESS '<' (1, 5) (1, 6)
|
||||
NUMBER '1' (1, 7) (1, 8)
|
||||
GREATER '>' (1, 9) (1, 10)
|
||||
NUMBER '1' (1, 11) (1, 12)
|
||||
EQEQUAL '==' (1, 13) (1, 15)
|
||||
NUMBER '1' (1, 16) (1, 17)
|
||||
GREATEREQUAL '>=' (1, 18) (1, 20)
|
||||
NUMBER '5' (1, 21) (1, 22)
|
||||
LESSEQUAL '<=' (1, 23) (1, 25)
|
||||
NUMBER '0x15' (1, 26) (1, 30)
|
||||
LESSEQUAL '<=' (1, 31) (1, 33)
|
||||
NUMBER '0x12' (1, 34) (1, 38)
|
||||
NOTEQUAL '!=' (1, 39) (1, 41)
|
||||
NUMBER '1' (1, 42) (1, 43)
|
||||
NAME 'and' (1, 44) (1, 47)
|
||||
NUMBER '5' (1, 48) (1, 49)
|
||||
NAME 'in' (1, 50) (1, 52)
|
||||
NUMBER '1' (1, 53) (1, 54)
|
||||
NAME 'not' (1, 55) (1, 58)
|
||||
NAME 'in' (1, 59) (1, 61)
|
||||
NUMBER '1' (1, 62) (1, 63)
|
||||
NAME 'is' (1, 64) (1, 66)
|
||||
NUMBER '1' (1, 67) (1, 68)
|
||||
NAME 'or' (1, 69) (1, 71)
|
||||
NUMBER '5' (1, 72) (1, 73)
|
||||
NAME 'is' (1, 74) (1, 76)
|
||||
NAME 'not' (1, 77) (1, 80)
|
||||
NUMBER '1' (1, 81) (1, 82)
|
||||
COLON ':' (1, 82) (1, 83)
|
||||
NAME 'pass' (1, 84) (1, 88)
|
||||
""")
|
||||
|
||||
def test_additive(self):
|
||||
|
||||
self.check_tokenize('x = 1 - y + 15 - 1 + 0x124 + z + a[5]', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
NUMBER '1' (1, 4) (1, 5)
|
||||
MINUS '-' (1, 6) (1, 7)
|
||||
NAME 'y' (1, 8) (1, 9)
|
||||
PLUS '+' (1, 10) (1, 11)
|
||||
NUMBER '15' (1, 12) (1, 14)
|
||||
MINUS '-' (1, 15) (1, 16)
|
||||
NUMBER '1' (1, 17) (1, 18)
|
||||
PLUS '+' (1, 19) (1, 20)
|
||||
NUMBER '0x124' (1, 21) (1, 26)
|
||||
PLUS '+' (1, 27) (1, 28)
|
||||
NAME 'z' (1, 29) (1, 30)
|
||||
PLUS '+' (1, 31) (1, 32)
|
||||
NAME 'a' (1, 33) (1, 34)
|
||||
LSQB '[' (1, 34) (1, 35)
|
||||
NUMBER '5' (1, 35) (1, 36)
|
||||
RSQB ']' (1, 36) (1, 37)
|
||||
""")
|
||||
|
||||
def test_multiplicative(self):
|
||||
|
||||
self.check_tokenize('x = 1//1*1/5*12%0x12@42', """\
|
||||
NAME 'x' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
NUMBER '1' (1, 4) (1, 5)
|
||||
DOUBLESLASH '//' (1, 5) (1, 7)
|
||||
NUMBER '1' (1, 7) (1, 8)
|
||||
STAR '*' (1, 8) (1, 9)
|
||||
NUMBER '1' (1, 9) (1, 10)
|
||||
SLASH '/' (1, 10) (1, 11)
|
||||
NUMBER '5' (1, 11) (1, 12)
|
||||
STAR '*' (1, 12) (1, 13)
|
||||
NUMBER '12' (1, 13) (1, 15)
|
||||
PERCENT '%' (1, 15) (1, 16)
|
||||
NUMBER '0x12' (1, 16) (1, 20)
|
||||
AT '@' (1, 20) (1, 21)
|
||||
NUMBER '42' (1, 21) (1, 23)
|
||||
""")
|
||||
|
||||
def test_unary(self):
|
||||
|
||||
self.check_tokenize('~1 ^ 1 & 1 |1 ^ -1', """\
|
||||
TILDE '~' (1, 0) (1, 1)
|
||||
NUMBER '1' (1, 1) (1, 2)
|
||||
CIRCUMFLEX '^' (1, 3) (1, 4)
|
||||
NUMBER '1' (1, 5) (1, 6)
|
||||
AMPER '&' (1, 7) (1, 8)
|
||||
NUMBER '1' (1, 9) (1, 10)
|
||||
VBAR '|' (1, 11) (1, 12)
|
||||
NUMBER '1' (1, 12) (1, 13)
|
||||
CIRCUMFLEX '^' (1, 14) (1, 15)
|
||||
MINUS '-' (1, 16) (1, 17)
|
||||
NUMBER '1' (1, 17) (1, 18)
|
||||
""")
|
||||
|
||||
self.check_tokenize('-1*1/1+1*1//1 - ---1**1', """\
|
||||
MINUS '-' (1, 0) (1, 1)
|
||||
NUMBER '1' (1, 1) (1, 2)
|
||||
STAR '*' (1, 2) (1, 3)
|
||||
NUMBER '1' (1, 3) (1, 4)
|
||||
SLASH '/' (1, 4) (1, 5)
|
||||
NUMBER '1' (1, 5) (1, 6)
|
||||
PLUS '+' (1, 6) (1, 7)
|
||||
NUMBER '1' (1, 7) (1, 8)
|
||||
STAR '*' (1, 8) (1, 9)
|
||||
NUMBER '1' (1, 9) (1, 10)
|
||||
DOUBLESLASH '//' (1, 10) (1, 12)
|
||||
NUMBER '1' (1, 12) (1, 13)
|
||||
MINUS '-' (1, 14) (1, 15)
|
||||
MINUS '-' (1, 16) (1, 17)
|
||||
MINUS '-' (1, 17) (1, 18)
|
||||
MINUS '-' (1, 18) (1, 19)
|
||||
NUMBER '1' (1, 19) (1, 20)
|
||||
DOUBLESTAR '**' (1, 20) (1, 22)
|
||||
NUMBER '1' (1, 22) (1, 23)
|
||||
""")
|
||||
|
||||
def test_selector(self):
|
||||
|
||||
self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
|
||||
NAME 'import' (1, 0) (1, 6)
|
||||
NAME 'sys' (1, 7) (1, 10)
|
||||
COMMA ',' (1, 10) (1, 11)
|
||||
NAME 'time' (1, 12) (1, 16)
|
||||
NEWLINE '' (1, 16) (1, 16)
|
||||
NAME 'x' (2, 0) (2, 1)
|
||||
EQUAL '=' (2, 2) (2, 3)
|
||||
NAME 'sys' (2, 4) (2, 7)
|
||||
DOT '.' (2, 7) (2, 8)
|
||||
NAME 'modules' (2, 8) (2, 15)
|
||||
LSQB '[' (2, 15) (2, 16)
|
||||
STRING "'time'" (2, 16) (2, 22)
|
||||
RSQB ']' (2, 22) (2, 23)
|
||||
DOT '.' (2, 23) (2, 24)
|
||||
NAME 'time' (2, 24) (2, 28)
|
||||
LPAR '(' (2, 28) (2, 29)
|
||||
RPAR ')' (2, 29) (2, 30)
|
||||
""")
|
||||
|
||||
def test_method(self):
|
||||
|
||||
self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
|
||||
AT '@' (1, 0) (1, 1)
|
||||
NAME 'staticmethod' (1, 1) (1, 13)
|
||||
NEWLINE '' (1, 13) (1, 13)
|
||||
NAME 'def' (2, 0) (2, 3)
|
||||
NAME 'foo' (2, 4) (2, 7)
|
||||
LPAR '(' (2, 7) (2, 8)
|
||||
NAME 'x' (2, 8) (2, 9)
|
||||
COMMA ',' (2, 9) (2, 10)
|
||||
NAME 'y' (2, 10) (2, 11)
|
||||
RPAR ')' (2, 11) (2, 12)
|
||||
COLON ':' (2, 12) (2, 13)
|
||||
NAME 'pass' (2, 14) (2, 18)
|
||||
""")
|
||||
|
||||
def test_tabs(self):
|
||||
|
||||
self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
|
||||
AT '@' (1, 0) (1, 1)
|
||||
NAME 'staticmethod' (1, 1) (1, 13)
|
||||
NEWLINE '' (1, 13) (1, 13)
|
||||
NAME 'def' (2, 0) (2, 3)
|
||||
NAME 'foo' (2, 4) (2, 7)
|
||||
LPAR '(' (2, 7) (2, 8)
|
||||
NAME 'x' (2, 8) (2, 9)
|
||||
COMMA ',' (2, 9) (2, 10)
|
||||
NAME 'y' (2, 10) (2, 11)
|
||||
RPAR ')' (2, 11) (2, 12)
|
||||
COLON ':' (2, 12) (2, 13)
|
||||
NAME 'pass' (2, 14) (2, 18)
|
||||
""")
|
||||
|
||||
def test_async(self):
|
||||
|
||||
self.check_tokenize('async = 1', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
EQUAL '=' (1, 6) (1, 7)
|
||||
NUMBER '1' (1, 8) (1, 9)
|
||||
""")
|
||||
|
||||
self.check_tokenize('a = (async = 1)', """\
|
||||
NAME 'a' (1, 0) (1, 1)
|
||||
EQUAL '=' (1, 2) (1, 3)
|
||||
LPAR '(' (1, 4) (1, 5)
|
||||
ASYNC 'async' (1, 5) (1, 10)
|
||||
EQUAL '=' (1, 11) (1, 12)
|
||||
NUMBER '1' (1, 13) (1, 14)
|
||||
RPAR ')' (1, 14) (1, 15)
|
||||
""")
|
||||
|
||||
self.check_tokenize('async()', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
LPAR '(' (1, 5) (1, 6)
|
||||
RPAR ')' (1, 6) (1, 7)
|
||||
""")
|
||||
|
||||
self.check_tokenize('class async(Bar):pass', """\
|
||||
NAME 'class' (1, 0) (1, 5)
|
||||
ASYNC 'async' (1, 6) (1, 11)
|
||||
LPAR '(' (1, 11) (1, 12)
|
||||
NAME 'Bar' (1, 12) (1, 15)
|
||||
RPAR ')' (1, 15) (1, 16)
|
||||
COLON ':' (1, 16) (1, 17)
|
||||
NAME 'pass' (1, 17) (1, 21)
|
||||
""")
|
||||
|
||||
self.check_tokenize('class async:pass', """\
|
||||
NAME 'class' (1, 0) (1, 5)
|
||||
ASYNC 'async' (1, 6) (1, 11)
|
||||
COLON ':' (1, 11) (1, 12)
|
||||
NAME 'pass' (1, 12) (1, 16)
|
||||
""")
|
||||
|
||||
self.check_tokenize('await = 1', """\
|
||||
AWAIT 'await' (1, 0) (1, 5)
|
||||
EQUAL '=' (1, 6) (1, 7)
|
||||
NUMBER '1' (1, 8) (1, 9)
|
||||
""")
|
||||
|
||||
self.check_tokenize('foo.async', """\
|
||||
NAME 'foo' (1, 0) (1, 3)
|
||||
DOT '.' (1, 3) (1, 4)
|
||||
ASYNC 'async' (1, 4) (1, 9)
|
||||
""")
|
||||
|
||||
self.check_tokenize('async for a in b: pass', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
NAME 'for' (1, 6) (1, 9)
|
||||
NAME 'a' (1, 10) (1, 11)
|
||||
NAME 'in' (1, 12) (1, 14)
|
||||
NAME 'b' (1, 15) (1, 16)
|
||||
COLON ':' (1, 16) (1, 17)
|
||||
NAME 'pass' (1, 18) (1, 22)
|
||||
""")
|
||||
|
||||
self.check_tokenize('async with a as b: pass', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
NAME 'with' (1, 6) (1, 10)
|
||||
NAME 'a' (1, 11) (1, 12)
|
||||
NAME 'as' (1, 13) (1, 15)
|
||||
NAME 'b' (1, 16) (1, 17)
|
||||
COLON ':' (1, 17) (1, 18)
|
||||
NAME 'pass' (1, 19) (1, 23)
|
||||
""")
|
||||
|
||||
self.check_tokenize('async.foo', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
DOT '.' (1, 5) (1, 6)
|
||||
NAME 'foo' (1, 6) (1, 9)
|
||||
""")
|
||||
|
||||
self.check_tokenize('async', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
""")
|
||||
|
||||
self.check_tokenize('async\n#comment\nawait', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
NEWLINE '' (1, 5) (1, 5)
|
||||
AWAIT 'await' (3, 0) (3, 5)
|
||||
""")
|
||||
|
||||
self.check_tokenize('async\n...\nawait', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
NEWLINE '' (1, 5) (1, 5)
|
||||
ELLIPSIS '...' (2, 0) (2, 3)
|
||||
NEWLINE '' (2, 3) (2, 3)
|
||||
AWAIT 'await' (3, 0) (3, 5)
|
||||
""")
|
||||
|
||||
self.check_tokenize('async\nawait', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
NEWLINE '' (1, 5) (1, 5)
|
||||
AWAIT 'await' (2, 0) (2, 5)
|
||||
""")
|
||||
|
||||
self.check_tokenize('foo.async + 1', """\
|
||||
NAME 'foo' (1, 0) (1, 3)
|
||||
DOT '.' (1, 3) (1, 4)
|
||||
ASYNC 'async' (1, 4) (1, 9)
|
||||
PLUS '+' (1, 10) (1, 11)
|
||||
NUMBER '1' (1, 12) (1, 13)
|
||||
""")
|
||||
|
||||
self.check_tokenize('async def foo(): pass', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
NAME 'def' (1, 6) (1, 9)
|
||||
NAME 'foo' (1, 10) (1, 13)
|
||||
LPAR '(' (1, 13) (1, 14)
|
||||
RPAR ')' (1, 14) (1, 15)
|
||||
COLON ':' (1, 15) (1, 16)
|
||||
NAME 'pass' (1, 17) (1, 21)
|
||||
""")
|
||||
|
||||
self.check_tokenize('''\
|
||||
async def foo():
|
||||
def foo(await):
|
||||
await = 1
|
||||
if 1:
|
||||
await
|
||||
async += 1
|
||||
''', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
NAME 'def' (1, 6) (1, 9)
|
||||
NAME 'foo' (1, 10) (1, 13)
|
||||
LPAR '(' (1, 13) (1, 14)
|
||||
RPAR ')' (1, 14) (1, 15)
|
||||
COLON ':' (1, 15) (1, 16)
|
||||
NEWLINE '' (1, 16) (1, 16)
|
||||
INDENT '' (2, -1) (2, -1)
|
||||
NAME 'def' (2, 2) (2, 5)
|
||||
NAME 'foo' (2, 6) (2, 9)
|
||||
LPAR '(' (2, 9) (2, 10)
|
||||
AWAIT 'await' (2, 10) (2, 15)
|
||||
RPAR ')' (2, 15) (2, 16)
|
||||
COLON ':' (2, 16) (2, 17)
|
||||
NEWLINE '' (2, 17) (2, 17)
|
||||
INDENT '' (3, -1) (3, -1)
|
||||
AWAIT 'await' (3, 4) (3, 9)
|
||||
EQUAL '=' (3, 10) (3, 11)
|
||||
NUMBER '1' (3, 12) (3, 13)
|
||||
NEWLINE '' (3, 13) (3, 13)
|
||||
DEDENT '' (4, -1) (4, -1)
|
||||
NAME 'if' (4, 2) (4, 4)
|
||||
NUMBER '1' (4, 5) (4, 6)
|
||||
COLON ':' (4, 6) (4, 7)
|
||||
NEWLINE '' (4, 7) (4, 7)
|
||||
INDENT '' (5, -1) (5, -1)
|
||||
AWAIT 'await' (5, 4) (5, 9)
|
||||
NEWLINE '' (5, 9) (5, 9)
|
||||
DEDENT '' (6, -1) (6, -1)
|
||||
DEDENT '' (6, -1) (6, -1)
|
||||
ASYNC 'async' (6, 0) (6, 5)
|
||||
PLUSEQUAL '+=' (6, 6) (6, 8)
|
||||
NUMBER '1' (6, 9) (6, 10)
|
||||
NEWLINE '' (6, 10) (6, 10)
|
||||
""")
|
||||
|
||||
self.check_tokenize('async def foo():\n async for i in 1: pass', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
NAME 'def' (1, 6) (1, 9)
|
||||
NAME 'foo' (1, 10) (1, 13)
|
||||
LPAR '(' (1, 13) (1, 14)
|
||||
RPAR ')' (1, 14) (1, 15)
|
||||
COLON ':' (1, 15) (1, 16)
|
||||
NEWLINE '' (1, 16) (1, 16)
|
||||
INDENT '' (2, -1) (2, -1)
|
||||
ASYNC 'async' (2, 2) (2, 7)
|
||||
NAME 'for' (2, 8) (2, 11)
|
||||
NAME 'i' (2, 12) (2, 13)
|
||||
NAME 'in' (2, 14) (2, 16)
|
||||
NUMBER '1' (2, 17) (2, 18)
|
||||
COLON ':' (2, 18) (2, 19)
|
||||
NAME 'pass' (2, 20) (2, 24)
|
||||
DEDENT '' (2, -1) (2, -1)
|
||||
""")
|
||||
|
||||
self.check_tokenize('async def foo(async): await', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
NAME 'def' (1, 6) (1, 9)
|
||||
NAME 'foo' (1, 10) (1, 13)
|
||||
LPAR '(' (1, 13) (1, 14)
|
||||
ASYNC 'async' (1, 14) (1, 19)
|
||||
RPAR ')' (1, 19) (1, 20)
|
||||
COLON ':' (1, 20) (1, 21)
|
||||
AWAIT 'await' (1, 22) (1, 27)
|
||||
""")
|
||||
|
||||
self.check_tokenize('''\
|
||||
def f():
|
||||
|
||||
def baz(): pass
|
||||
async def bar(): pass
|
||||
|
||||
await = 2''', """\
|
||||
NAME 'def' (1, 0) (1, 3)
|
||||
NAME 'f' (1, 4) (1, 5)
|
||||
LPAR '(' (1, 5) (1, 6)
|
||||
RPAR ')' (1, 6) (1, 7)
|
||||
COLON ':' (1, 7) (1, 8)
|
||||
NEWLINE '' (1, 8) (1, 8)
|
||||
INDENT '' (3, -1) (3, -1)
|
||||
NAME 'def' (3, 2) (3, 5)
|
||||
NAME 'baz' (3, 6) (3, 9)
|
||||
LPAR '(' (3, 9) (3, 10)
|
||||
RPAR ')' (3, 10) (3, 11)
|
||||
COLON ':' (3, 11) (3, 12)
|
||||
NAME 'pass' (3, 13) (3, 17)
|
||||
NEWLINE '' (3, 17) (3, 17)
|
||||
ASYNC 'async' (4, 2) (4, 7)
|
||||
NAME 'def' (4, 8) (4, 11)
|
||||
NAME 'bar' (4, 12) (4, 15)
|
||||
LPAR '(' (4, 15) (4, 16)
|
||||
RPAR ')' (4, 16) (4, 17)
|
||||
COLON ':' (4, 17) (4, 18)
|
||||
NAME 'pass' (4, 19) (4, 23)
|
||||
NEWLINE '' (4, 23) (4, 23)
|
||||
AWAIT 'await' (6, 2) (6, 7)
|
||||
EQUAL '=' (6, 8) (6, 9)
|
||||
NUMBER '2' (6, 10) (6, 11)
|
||||
DEDENT '' (6, -1) (6, -1)
|
||||
""")
|
||||
|
||||
self.check_tokenize('''\
|
||||
async def f():
|
||||
|
||||
def baz(): pass
|
||||
async def bar(): pass
|
||||
|
||||
await = 2''', """\
|
||||
ASYNC 'async' (1, 0) (1, 5)
|
||||
NAME 'def' (1, 6) (1, 9)
|
||||
NAME 'f' (1, 10) (1, 11)
|
||||
LPAR '(' (1, 11) (1, 12)
|
||||
RPAR ')' (1, 12) (1, 13)
|
||||
COLON ':' (1, 13) (1, 14)
|
||||
NEWLINE '' (1, 14) (1, 14)
|
||||
INDENT '' (3, -1) (3, -1)
|
||||
NAME 'def' (3, 2) (3, 5)
|
||||
NAME 'baz' (3, 6) (3, 9)
|
||||
LPAR '(' (3, 9) (3, 10)
|
||||
RPAR ')' (3, 10) (3, 11)
|
||||
COLON ':' (3, 11) (3, 12)
|
||||
NAME 'pass' (3, 13) (3, 17)
|
||||
NEWLINE '' (3, 17) (3, 17)
|
||||
ASYNC 'async' (4, 2) (4, 7)
|
||||
NAME 'def' (4, 8) (4, 11)
|
||||
NAME 'bar' (4, 12) (4, 15)
|
||||
LPAR '(' (4, 15) (4, 16)
|
||||
RPAR ')' (4, 16) (4, 17)
|
||||
COLON ':' (4, 17) (4, 18)
|
||||
NAME 'pass' (4, 19) (4, 23)
|
||||
NEWLINE '' (4, 23) (4, 23)
|
||||
AWAIT 'await' (6, 2) (6, 7)
|
||||
EQUAL '=' (6, 8) (6, 9)
|
||||
NUMBER '2' (6, 10) (6, 11)
|
||||
DEDENT '' (6, -1) (6, -1)
|
||||
""")
|
||||
|
||||
def test_unicode(self):
|
||||
|
||||
self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
|
||||
NAME 'Örter' (1, 0) (1, 6)
|
||||
EQUAL '=' (1, 7) (1, 8)
|
||||
STRING "u'places'" (1, 9) (1, 18)
|
||||
NEWLINE '' (1, 18) (1, 18)
|
||||
NAME 'grün' (2, 0) (2, 5)
|
||||
EQUAL '=' (2, 6) (2, 7)
|
||||
STRING "U'green'" (2, 8) (2, 16)
|
||||
""")
|
||||
|
||||
def test_invalid_syntax(self):
|
||||
def get_tokens(string):
|
||||
return list(_generate_tokens_from_c_tokenizer(string))
|
||||
|
||||
self.assertRaises(SyntaxError, get_tokens, "(1+2]")
|
||||
self.assertRaises(SyntaxError, get_tokens, "(1+2}")
|
||||
self.assertRaises(SyntaxError, get_tokens, "{1+2]")
|
||||
|
||||
self.assertRaises(SyntaxError, get_tokens, "1_")
|
||||
self.assertRaises(SyntaxError, get_tokens, "1.2_")
|
||||
self.assertRaises(SyntaxError, get_tokens, "1e2_")
|
||||
self.assertRaises(SyntaxError, get_tokens, "1e+")
|
||||
|
||||
self.assertRaises(SyntaxError, get_tokens, "\xa0")
|
||||
self.assertRaises(SyntaxError, get_tokens, "€")
|
||||
|
||||
self.assertRaises(SyntaxError, get_tokens, "0b12")
|
||||
self.assertRaises(SyntaxError, get_tokens, "0b1_2")
|
||||
self.assertRaises(SyntaxError, get_tokens, "0b2")
|
||||
self.assertRaises(SyntaxError, get_tokens, "0b1_")
|
||||
self.assertRaises(SyntaxError, get_tokens, "0b")
|
||||
self.assertRaises(SyntaxError, get_tokens, "0o18")
|
||||
self.assertRaises(SyntaxError, get_tokens, "0o1_8")
|
||||
self.assertRaises(SyntaxError, get_tokens, "0o8")
|
||||
self.assertRaises(SyntaxError, get_tokens, "0o1_")
|
||||
self.assertRaises(SyntaxError, get_tokens, "0o")
|
||||
self.assertRaises(SyntaxError, get_tokens, "0x1_")
|
||||
self.assertRaises(SyntaxError, get_tokens, "0x")
|
||||
self.assertRaises(SyntaxError, get_tokens, "1_")
|
||||
self.assertRaises(SyntaxError, get_tokens, "012")
|
||||
self.assertRaises(SyntaxError, get_tokens, "1.2_")
|
||||
self.assertRaises(SyntaxError, get_tokens, "1e2_")
|
||||
self.assertRaises(SyntaxError, get_tokens, "1e+")
|
||||
|
||||
self.assertRaises(SyntaxError, get_tokens, "'sdfsdf")
|
||||
self.assertRaises(SyntaxError, get_tokens, "'''sdfsdf''")
|
||||
|
||||
self.assertRaises(SyntaxError, get_tokens, "("*1000+"a"+")"*1000)
|
||||
self.assertRaises(SyntaxError, get_tokens, "]")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -680,5 +680,13 @@ def main():
|
|||
perror("unexpected error: %s" % err)
|
||||
raise
|
||||
|
||||
def _generate_tokens_from_c_tokenizer(source):
|
||||
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
|
||||
import _tokenize as c_tokenizer
|
||||
for info in c_tokenizer.TokenizerIter(source):
|
||||
tok, type, lineno, end_lineno, col_off, end_col_off, line = info
|
||||
yield TokenInfo(type, tok, (lineno, col_off), (end_lineno, end_col_off), line)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -339,6 +339,7 @@ PARSER_HEADERS= \
|
|||
PYTHON_OBJS= \
|
||||
Python/_warnings.o \
|
||||
Python/Python-ast.o \
|
||||
Python/Python-tokenize.o \
|
||||
Python/asdl.o \
|
||||
Python/ast.o \
|
||||
Python/ast_opt.o \
|
||||
|
|
|
@ -28,6 +28,7 @@ extern PyObject* PyMarshal_Init(void);
|
|||
extern PyObject* PyInit__imp(void);
|
||||
extern PyObject* PyInit_gc(void);
|
||||
extern PyObject* PyInit__ast(void);
|
||||
extern PyObject* PyInit__tokenize(void);
|
||||
extern PyObject* _PyWarnings_Init(void);
|
||||
extern PyObject* PyInit__string(void);
|
||||
|
||||
|
@ -44,6 +45,9 @@ struct _inittab _PyImport_Inittab[] = {
|
|||
/* This lives in Python/Python-ast.c */
|
||||
{"_ast", PyInit__ast},
|
||||
|
||||
/* This lives in Python/Python-tokenizer.c */
|
||||
{"_tokenize", PyInit__tokenize},
|
||||
|
||||
/* These entries are here for sys.builtin_module_names */
|
||||
{"builtins", NULL},
|
||||
{"sys", NULL},
|
||||
|
|
|
@ -72,9 +72,8 @@ extern PyObject* _PyWarnings_Init(void);
|
|||
extern PyObject* PyInit__string(void);
|
||||
extern PyObject* PyInit__stat(void);
|
||||
extern PyObject* PyInit__opcode(void);
|
||||
|
||||
extern PyObject* PyInit__contextvars(void);
|
||||
|
||||
extern PyObject* PyInit__tokenize(void);
|
||||
|
||||
/* tools/freeze/makeconfig.py marker for additional "extern" */
|
||||
/* -- ADDMODULE MARKER 1 -- */
|
||||
|
@ -83,7 +82,6 @@ extern PyObject* PyMarshal_Init(void);
|
|||
extern PyObject* PyInit__imp(void);
|
||||
|
||||
struct _inittab _PyImport_Inittab[] = {
|
||||
|
||||
{"_abc", PyInit__abc},
|
||||
{"array", PyInit_array},
|
||||
{"_ast", PyInit__ast},
|
||||
|
@ -105,6 +103,7 @@ struct _inittab _PyImport_Inittab[] = {
|
|||
{"_blake2", PyInit__blake2},
|
||||
{"time", PyInit_time},
|
||||
{"_thread", PyInit__thread},
|
||||
{"_tokenize", PyInit__tokenize},
|
||||
{"_typing", PyInit__typing},
|
||||
{"_statistics", PyInit__statistics},
|
||||
#ifdef WIN32
|
||||
|
|
|
@ -488,6 +488,7 @@
|
|||
<ClCompile Include="..\Python\pystrtod.c" />
|
||||
<ClCompile Include="..\Python\dtoa.c" />
|
||||
<ClCompile Include="..\Python\Python-ast.c" />
|
||||
<ClCompile Include="..\Python\Python-tokenize.c" />
|
||||
<ClCompile Include="..\Python\pythonrun.c" />
|
||||
<ClCompile Include="..\Python\specialize.c" />
|
||||
<ClCompile Include="..\Python\suggestions.c" />
|
||||
|
|
|
@ -0,0 +1,195 @@
|
|||
#include "Python.h"
|
||||
#include "../Parser/tokenizer.h"
|
||||
|
||||
static struct PyModuleDef _tokenizemodule;
|
||||
|
||||
typedef struct {
|
||||
PyTypeObject* TokenizerIter;
|
||||
} tokenize_state;
|
||||
|
||||
static tokenize_state*
|
||||
get_tokenize_state(PyObject* module)
|
||||
{
|
||||
return (tokenize_state*)PyModule_GetState(module);
|
||||
}
|
||||
|
||||
#define _tokenize_get_state_by_type(type) \
|
||||
get_tokenize_state(_PyType_GetModuleByDef(type, &_tokenizemodule))
|
||||
|
||||
#include "clinic/Python-tokenize.c.h"
|
||||
|
||||
/*[clinic input]
|
||||
module _tokenizer
|
||||
class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter"
|
||||
[clinic start generated code]*/
|
||||
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
struct tok_state* tok;
|
||||
} tokenizeriterobject;
|
||||
|
||||
/*[clinic input]
|
||||
@classmethod
|
||||
_tokenizer.tokenizeriter.__new__ as tokenizeriter_new
|
||||
|
||||
source: str
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
tokenizeriter_new_impl(PyTypeObject *type, const char *source)
|
||||
/*[clinic end generated code: output=7fd9f46cf9263cbb input=4384b368407375c6]*/
|
||||
{
|
||||
tokenizeriterobject* self = (tokenizeriterobject*)type->tp_alloc(type, 0);
|
||||
if (self == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
PyObject* filename = PyUnicode_FromString("<string>");
|
||||
if (filename == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
self->tok = PyTokenizer_FromUTF8(source, 1);
|
||||
if (self->tok == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
self->tok->filename = filename;
|
||||
return (PyObject*)self;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
tokenizeriter_next(tokenizeriterobject* it)
|
||||
{
|
||||
const char* start;
|
||||
const char* end;
|
||||
int type = PyTokenizer_Get(it->tok, &start, &end);
|
||||
if (type == ERRORTOKEN && PyErr_Occurred()) {
|
||||
return NULL;
|
||||
}
|
||||
if (type == ERRORTOKEN || type == ENDMARKER) {
|
||||
PyErr_SetString(PyExc_StopIteration, "EOF");
|
||||
return NULL;
|
||||
}
|
||||
PyObject* str = NULL;
|
||||
if (start == NULL || end == NULL) {
|
||||
str = PyUnicode_FromString("");
|
||||
} else {
|
||||
str = PyUnicode_FromStringAndSize(start, end - start);
|
||||
}
|
||||
if (str == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_ssize_t size = it->tok->inp - it->tok->buf;
|
||||
PyObject* line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace");
|
||||
if (line == NULL) {
|
||||
Py_DECREF(str);
|
||||
return NULL;
|
||||
}
|
||||
const char* line_start = type == STRING ? it->tok->multi_line_start : it->tok->line_start;
|
||||
int lineno = type == STRING ? it->tok->first_lineno : it->tok->lineno;
|
||||
int end_lineno = it->tok->lineno;
|
||||
int col_offset = -1;
|
||||
int end_col_offset = -1;
|
||||
if (start != NULL && start >= line_start) {
|
||||
col_offset = (int)(start - line_start);
|
||||
}
|
||||
if (end != NULL && end >= it->tok->line_start) {
|
||||
end_col_offset = (int)(end - it->tok->line_start);
|
||||
}
|
||||
|
||||
return Py_BuildValue("(NiiiiiN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line);
|
||||
}
|
||||
|
||||
static void
|
||||
tokenizeriter_dealloc(tokenizeriterobject* it)
|
||||
{
|
||||
PyTypeObject* tp = Py_TYPE(it);
|
||||
PyTokenizer_Free(it->tok);
|
||||
tp->tp_free(it);
|
||||
Py_DECREF(tp);
|
||||
}
|
||||
|
||||
static PyType_Slot tokenizeriter_slots[] = {
|
||||
{Py_tp_new, tokenizeriter_new},
|
||||
{Py_tp_dealloc, tokenizeriter_dealloc},
|
||||
{Py_tp_getattro, PyObject_GenericGetAttr},
|
||||
{Py_tp_iter, PyObject_SelfIter},
|
||||
{Py_tp_iternext, tokenizeriter_next},
|
||||
{0, NULL},
|
||||
};
|
||||
|
||||
static PyType_Spec tokenizeriter_spec = {
|
||||
.name = "_tokenize.TokenizerIter",
|
||||
.basicsize = sizeof(tokenizeriterobject),
|
||||
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
|
||||
.slots = tokenizeriter_slots,
|
||||
};
|
||||
|
||||
|
||||
static int
|
||||
tokenizemodule_exec(PyObject* m)
|
||||
{
|
||||
tokenize_state* state = get_tokenize_state(m);
|
||||
if (state == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec(
|
||||
m, &tokenizeriter_spec, NULL);
|
||||
if (state->TokenizerIter == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (PyModule_AddType(m, state->TokenizerIter) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyMethodDef tokenize_methods[] = {
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyModuleDef_Slot tokenizemodule_slots[] = {
|
||||
{Py_mod_exec, tokenizemodule_exec},
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
static int
|
||||
tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg)
|
||||
{
|
||||
tokenize_state *state = get_tokenize_state(m);
|
||||
Py_VISIT(state->TokenizerIter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
tokenizemodule_clear(PyObject *m)
|
||||
{
|
||||
tokenize_state *state = get_tokenize_state(m);
|
||||
Py_CLEAR(state->TokenizerIter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
tokenizemodule_free(void *m)
|
||||
{
|
||||
tokenizemodule_clear((PyObject *)m);
|
||||
}
|
||||
|
||||
static struct PyModuleDef _tokenizemodule = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
.m_name = "_tokenize",
|
||||
.m_size = sizeof(tokenize_state),
|
||||
.m_slots = tokenizemodule_slots,
|
||||
.m_methods = tokenize_methods,
|
||||
.m_traverse = tokenizemodule_traverse,
|
||||
.m_clear = tokenizemodule_clear,
|
||||
.m_free = tokenizemodule_free,
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC
|
||||
PyInit__tokenize(void)
|
||||
{
|
||||
return PyModuleDef_Init(&_tokenizemodule);
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/*[clinic input]
|
||||
preserve
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
tokenizeriter_new_impl(PyTypeObject *type, const char *source);
|
||||
|
||||
static PyObject *
|
||||
tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
static const char * const _keywords[] = {"source", NULL};
|
||||
static _PyArg_Parser _parser = {NULL, _keywords, "tokenizeriter", 0};
|
||||
PyObject *argsbuf[1];
|
||||
PyObject * const *fastargs;
|
||||
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
|
||||
const char *source;
|
||||
|
||||
fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 0, argsbuf);
|
||||
if (!fastargs) {
|
||||
goto exit;
|
||||
}
|
||||
if (!PyUnicode_Check(fastargs[0])) {
|
||||
_PyArg_BadArgument("tokenizeriter", "argument 'source'", "str", fastargs[0]);
|
||||
goto exit;
|
||||
}
|
||||
Py_ssize_t source_length;
|
||||
source = PyUnicode_AsUTF8AndSize(fastargs[0], &source_length);
|
||||
if (source == NULL) {
|
||||
goto exit;
|
||||
}
|
||||
if (strlen(source) != (size_t)source_length) {
|
||||
PyErr_SetString(PyExc_ValueError, "embedded null character");
|
||||
goto exit;
|
||||
}
|
||||
return_value = tokenizeriter_new_impl(type, source);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
/*[clinic end generated code: output=dfcd64774e01bfe6 input=a9049054013a1b77]*/
|
|
@ -80,6 +80,7 @@ static const char* _Py_stdlib_module_names[] = {
|
|||
"_thread",
|
||||
"_threading_local",
|
||||
"_tkinter",
|
||||
"_tokenize",
|
||||
"_tracemalloc",
|
||||
"_typing",
|
||||
"_uuid",
|
||||
|
|
Loading…
Reference in New Issue