mirror of https://github.com/python/cpython
bpo-30455: Generate all token related code and docs from Grammar/Tokens. (GH-10370)
"Include/token.h", "Lib/token.py" (containing now some data moved from "Lib/tokenize.py") and new files "Parser/token.c" (containing the code moved from "Parser/tokenizer.c") and "Doc/library/token-list.inc" (included in "Doc/library/token.rst") are now generated from "Grammar/Tokens" by "Tools/scripts/generate_token.py". The script overwrites files only if needed and can be used on the read-only sources tree. "Lib/symbol.py" is now generated by "Tools/scripts/generate_symbol_py.py" instead of been executable itself. Added new make targets "regen-token" and "regen-symbol" which are now dependencies of "regen-all". The documentation contains now strings for operators and punctuation tokens.
This commit is contained in:
parent
c1b4b0f616
commit
8ac658114d
|
@ -55,3 +55,7 @@ Include/opcode.h linguist-generated=true
|
|||
Python/opcode_targets.h linguist-generated=true
|
||||
Objects/typeslots.inc linguist-generated=true
|
||||
Modules/unicodedata_db.h linguist-generated=true
|
||||
Doc/library/token-list.inc linguist-generated=true
|
||||
Include/token.h linguist-generated=true
|
||||
Lib/token.py linguist-generated=true
|
||||
Parser/token.c linguist-generated=true
|
||||
|
|
|
@ -0,0 +1,206 @@
|
|||
.. Auto-generated by Tools/scripts/generate_token.py
|
||||
.. data:: ENDMARKER
|
||||
|
||||
.. data:: NAME
|
||||
|
||||
.. data:: NUMBER
|
||||
|
||||
.. data:: STRING
|
||||
|
||||
.. data:: NEWLINE
|
||||
|
||||
.. data:: INDENT
|
||||
|
||||
.. data:: DEDENT
|
||||
|
||||
.. data:: LPAR
|
||||
|
||||
Token value for ``"("``.
|
||||
|
||||
.. data:: RPAR
|
||||
|
||||
Token value for ``")"``.
|
||||
|
||||
.. data:: LSQB
|
||||
|
||||
Token value for ``"["``.
|
||||
|
||||
.. data:: RSQB
|
||||
|
||||
Token value for ``"]"``.
|
||||
|
||||
.. data:: COLON
|
||||
|
||||
Token value for ``":"``.
|
||||
|
||||
.. data:: COMMA
|
||||
|
||||
Token value for ``","``.
|
||||
|
||||
.. data:: SEMI
|
||||
|
||||
Token value for ``";"``.
|
||||
|
||||
.. data:: PLUS
|
||||
|
||||
Token value for ``"+"``.
|
||||
|
||||
.. data:: MINUS
|
||||
|
||||
Token value for ``"-"``.
|
||||
|
||||
.. data:: STAR
|
||||
|
||||
Token value for ``"*"``.
|
||||
|
||||
.. data:: SLASH
|
||||
|
||||
Token value for ``"/"``.
|
||||
|
||||
.. data:: VBAR
|
||||
|
||||
Token value for ``"|"``.
|
||||
|
||||
.. data:: AMPER
|
||||
|
||||
Token value for ``"&"``.
|
||||
|
||||
.. data:: LESS
|
||||
|
||||
Token value for ``"<"``.
|
||||
|
||||
.. data:: GREATER
|
||||
|
||||
Token value for ``">"``.
|
||||
|
||||
.. data:: EQUAL
|
||||
|
||||
Token value for ``"="``.
|
||||
|
||||
.. data:: DOT
|
||||
|
||||
Token value for ``"."``.
|
||||
|
||||
.. data:: PERCENT
|
||||
|
||||
Token value for ``"%"``.
|
||||
|
||||
.. data:: LBRACE
|
||||
|
||||
Token value for ``"{"``.
|
||||
|
||||
.. data:: RBRACE
|
||||
|
||||
Token value for ``"}"``.
|
||||
|
||||
.. data:: EQEQUAL
|
||||
|
||||
Token value for ``"=="``.
|
||||
|
||||
.. data:: NOTEQUAL
|
||||
|
||||
Token value for ``"!="``.
|
||||
|
||||
.. data:: LESSEQUAL
|
||||
|
||||
Token value for ``"<="``.
|
||||
|
||||
.. data:: GREATEREQUAL
|
||||
|
||||
Token value for ``">="``.
|
||||
|
||||
.. data:: TILDE
|
||||
|
||||
Token value for ``"~"``.
|
||||
|
||||
.. data:: CIRCUMFLEX
|
||||
|
||||
Token value for ``"^"``.
|
||||
|
||||
.. data:: LEFTSHIFT
|
||||
|
||||
Token value for ``"<<"``.
|
||||
|
||||
.. data:: RIGHTSHIFT
|
||||
|
||||
Token value for ``">>"``.
|
||||
|
||||
.. data:: DOUBLESTAR
|
||||
|
||||
Token value for ``"**"``.
|
||||
|
||||
.. data:: PLUSEQUAL
|
||||
|
||||
Token value for ``"+="``.
|
||||
|
||||
.. data:: MINEQUAL
|
||||
|
||||
Token value for ``"-="``.
|
||||
|
||||
.. data:: STAREQUAL
|
||||
|
||||
Token value for ``"*="``.
|
||||
|
||||
.. data:: SLASHEQUAL
|
||||
|
||||
Token value for ``"/="``.
|
||||
|
||||
.. data:: PERCENTEQUAL
|
||||
|
||||
Token value for ``"%="``.
|
||||
|
||||
.. data:: AMPEREQUAL
|
||||
|
||||
Token value for ``"&="``.
|
||||
|
||||
.. data:: VBAREQUAL
|
||||
|
||||
Token value for ``"|="``.
|
||||
|
||||
.. data:: CIRCUMFLEXEQUAL
|
||||
|
||||
Token value for ``"^="``.
|
||||
|
||||
.. data:: LEFTSHIFTEQUAL
|
||||
|
||||
Token value for ``"<<="``.
|
||||
|
||||
.. data:: RIGHTSHIFTEQUAL
|
||||
|
||||
Token value for ``">>="``.
|
||||
|
||||
.. data:: DOUBLESTAREQUAL
|
||||
|
||||
Token value for ``"**="``.
|
||||
|
||||
.. data:: DOUBLESLASH
|
||||
|
||||
Token value for ``"//"``.
|
||||
|
||||
.. data:: DOUBLESLASHEQUAL
|
||||
|
||||
Token value for ``"//="``.
|
||||
|
||||
.. data:: AT
|
||||
|
||||
Token value for ``"@"``.
|
||||
|
||||
.. data:: ATEQUAL
|
||||
|
||||
Token value for ``"@="``.
|
||||
|
||||
.. data:: RARROW
|
||||
|
||||
Token value for ``"->"``.
|
||||
|
||||
.. data:: ELLIPSIS
|
||||
|
||||
Token value for ``"..."``.
|
||||
|
||||
.. data:: OP
|
||||
|
||||
.. data:: ERRORTOKEN
|
||||
|
||||
.. data:: N_TOKENS
|
||||
|
||||
.. data:: NT_OFFSET
|
|
@ -44,64 +44,7 @@ functions. The functions mirror definitions in the Python C header files.
|
|||
|
||||
The token constants are:
|
||||
|
||||
.. data:: ENDMARKER
|
||||
NAME
|
||||
NUMBER
|
||||
STRING
|
||||
NEWLINE
|
||||
INDENT
|
||||
DEDENT
|
||||
LPAR
|
||||
RPAR
|
||||
LSQB
|
||||
RSQB
|
||||
COLON
|
||||
COMMA
|
||||
SEMI
|
||||
PLUS
|
||||
MINUS
|
||||
STAR
|
||||
SLASH
|
||||
VBAR
|
||||
AMPER
|
||||
LESS
|
||||
GREATER
|
||||
EQUAL
|
||||
DOT
|
||||
PERCENT
|
||||
LBRACE
|
||||
RBRACE
|
||||
EQEQUAL
|
||||
NOTEQUAL
|
||||
LESSEQUAL
|
||||
GREATEREQUAL
|
||||
TILDE
|
||||
CIRCUMFLEX
|
||||
LEFTSHIFT
|
||||
RIGHTSHIFT
|
||||
DOUBLESTAR
|
||||
PLUSEQUAL
|
||||
MINEQUAL
|
||||
STAREQUAL
|
||||
SLASHEQUAL
|
||||
PERCENTEQUAL
|
||||
AMPEREQUAL
|
||||
VBAREQUAL
|
||||
CIRCUMFLEXEQUAL
|
||||
LEFTSHIFTEQUAL
|
||||
RIGHTSHIFTEQUAL
|
||||
DOUBLESTAREQUAL
|
||||
DOUBLESLASH
|
||||
DOUBLESLASHEQUAL
|
||||
AT
|
||||
ATEQUAL
|
||||
RARROW
|
||||
ELLIPSIS
|
||||
OP
|
||||
ERRORTOKEN
|
||||
N_TOKENS
|
||||
NT_OFFSET
|
||||
|
||||
.. include:: token-list.inc
|
||||
|
||||
The following token type values aren't used by the C tokenizer but are needed for
|
||||
the :mod:`tokenize` module.
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
ENDMARKER
|
||||
NAME
|
||||
NUMBER
|
||||
STRING
|
||||
NEWLINE
|
||||
INDENT
|
||||
DEDENT
|
||||
|
||||
LPAR '('
|
||||
RPAR ')'
|
||||
LSQB '['
|
||||
RSQB ']'
|
||||
COLON ':'
|
||||
COMMA ','
|
||||
SEMI ';'
|
||||
PLUS '+'
|
||||
MINUS '-'
|
||||
STAR '*'
|
||||
SLASH '/'
|
||||
VBAR '|'
|
||||
AMPER '&'
|
||||
LESS '<'
|
||||
GREATER '>'
|
||||
EQUAL '='
|
||||
DOT '.'
|
||||
PERCENT '%'
|
||||
LBRACE '{'
|
||||
RBRACE '}'
|
||||
EQEQUAL '=='
|
||||
NOTEQUAL '!='
|
||||
LESSEQUAL '<='
|
||||
GREATEREQUAL '>='
|
||||
TILDE '~'
|
||||
CIRCUMFLEX '^'
|
||||
LEFTSHIFT '<<'
|
||||
RIGHTSHIFT '>>'
|
||||
DOUBLESTAR '**'
|
||||
PLUSEQUAL '+='
|
||||
MINEQUAL '-='
|
||||
STAREQUAL '*='
|
||||
SLASHEQUAL '/='
|
||||
PERCENTEQUAL '%='
|
||||
AMPEREQUAL '&='
|
||||
VBAREQUAL '|='
|
||||
CIRCUMFLEXEQUAL '^='
|
||||
LEFTSHIFTEQUAL '<<='
|
||||
RIGHTSHIFTEQUAL '>>='
|
||||
DOUBLESTAREQUAL '**='
|
||||
DOUBLESLASH '//'
|
||||
DOUBLESLASHEQUAL '//='
|
||||
AT '@'
|
||||
ATEQUAL '@='
|
||||
RARROW '->'
|
||||
ELLIPSIS '...'
|
||||
|
||||
OP
|
||||
ERRORTOKEN
|
||||
|
||||
# These aren't used by the C tokenizer but are needed for tokenize.py
|
||||
COMMENT
|
||||
NL
|
||||
ENCODING
|
|
@ -1,3 +1,4 @@
|
|||
/* Auto-generated by Tools/scripts/generate_token.py */
|
||||
|
||||
/* Token types */
|
||||
#ifndef Py_LIMITED_API
|
||||
|
@ -62,25 +63,19 @@ extern "C" {
|
|||
#define ATEQUAL 50
|
||||
#define RARROW 51
|
||||
#define ELLIPSIS 52
|
||||
/* Don't forget to update the table _PyParser_TokenNames in tokenizer.c! */
|
||||
#define OP 53
|
||||
#define ERRORTOKEN 54
|
||||
/* These aren't used by the C tokenizer but are needed for tokenize.py */
|
||||
#define COMMENT 55
|
||||
#define NL 56
|
||||
#define ENCODING 57
|
||||
#define N_TOKENS 58
|
||||
#define NT_OFFSET 256
|
||||
|
||||
/* Special definitions for cooperation with parser */
|
||||
|
||||
#define NT_OFFSET 256
|
||||
|
||||
#define ISTERMINAL(x) ((x) < NT_OFFSET)
|
||||
#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
|
||||
#define ISEOF(x) ((x) == ENDMARKER)
|
||||
|
||||
|
||||
PyAPI_DATA(const char *) _PyParser_TokenNames[]; /* Token names */
|
||||
PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
|
||||
PyAPI_FUNC(int) PyToken_OneChar(int);
|
||||
PyAPI_FUNC(int) PyToken_TwoChars(int, int);
|
||||
PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
#! /usr/bin/env python3
|
||||
|
||||
"""Non-terminal symbols of Python grammar (from "graminit.h")."""
|
||||
|
||||
# This file is automatically generated; please don't muck it up!
|
||||
|
@ -7,7 +5,11 @@
|
|||
# To update the symbols in this file, 'cd' to the top directory of
|
||||
# the python source tree after building the interpreter and run:
|
||||
#
|
||||
# ./python Lib/symbol.py
|
||||
# python3 Tools/scripts/generate_symbol_py.py Include/graminit.h Lib/symbol.py
|
||||
#
|
||||
# or just
|
||||
#
|
||||
# make regen-symbol
|
||||
|
||||
#--start constants--
|
||||
single_input = 256
|
||||
|
@ -103,14 +105,4 @@ sym_name = {}
|
|||
for _name, _value in list(globals().items()):
|
||||
if type(_value) is type(0):
|
||||
sym_name[_value] = _name
|
||||
|
||||
|
||||
def _main():
|
||||
import sys
|
||||
import token
|
||||
if len(sys.argv) == 1:
|
||||
sys.argv = sys.argv + ["Include/graminit.h", "Lib/symbol.py"]
|
||||
token._main()
|
||||
|
||||
if __name__ == "__main__":
|
||||
_main()
|
||||
del _name, _value
|
||||
|
|
|
@ -6,6 +6,9 @@ import subprocess
|
|||
|
||||
|
||||
SYMBOL_FILE = support.findfile('symbol.py')
|
||||
GEN_SYMBOL_FILE = os.path.join(os.path.dirname(__file__),
|
||||
'..', '..', 'Tools', 'scripts',
|
||||
'generate_symbol_py.py')
|
||||
GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
|
||||
'..', '..', 'Include', 'graminit.h')
|
||||
TEST_PY_FILE = 'symbol_test.py'
|
||||
|
@ -22,7 +25,7 @@ class TestSymbolGeneration(unittest.TestCase):
|
|||
|
||||
def _generate_symbols(self, grammar_file, target_symbol_py_file):
|
||||
proc = subprocess.Popen([sys.executable,
|
||||
SYMBOL_FILE,
|
||||
GEN_SYMBOL_FILE,
|
||||
grammar_file,
|
||||
target_symbol_py_file], stderr=subprocess.PIPE)
|
||||
stderr = proc.communicate()[1]
|
||||
|
|
|
@ -1619,6 +1619,8 @@ class TestRoundtrip(TestCase):
|
|||
testfiles = random.sample(testfiles, 10)
|
||||
|
||||
for testfile in testfiles:
|
||||
if support.verbose >= 2:
|
||||
print('tokenize', testfile)
|
||||
with open(testfile, 'rb') as f:
|
||||
with self.subTest(file=testfile):
|
||||
self.check_roundtrip(f)
|
||||
|
|
|
@ -1,15 +1,8 @@
|
|||
"""Token constants (from "token.h")."""
|
||||
"""Token constants."""
|
||||
# Auto-generated by Tools/scripts/generate_token.py
|
||||
|
||||
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
|
||||
|
||||
# This file is automatically generated; please don't muck it up!
|
||||
#
|
||||
# To update the symbols in this file, 'cd' to the top directory of
|
||||
# the python source tree after building the interpreter and run:
|
||||
#
|
||||
# ./python Lib/token.py
|
||||
|
||||
#--start constants--
|
||||
ENDMARKER = 0
|
||||
NAME = 1
|
||||
NUMBER = 2
|
||||
|
@ -63,23 +56,70 @@ AT = 49
|
|||
ATEQUAL = 50
|
||||
RARROW = 51
|
||||
ELLIPSIS = 52
|
||||
# Don't forget to update the table _PyParser_TokenNames in tokenizer.c!
|
||||
OP = 53
|
||||
ERRORTOKEN = 54
|
||||
# These aren't used by the C tokenizer but are needed for tokenize.py
|
||||
ERRORTOKEN = 54
|
||||
COMMENT = 55
|
||||
NL = 56
|
||||
ENCODING = 57
|
||||
N_TOKENS = 58
|
||||
# Special definitions for cooperation with parser
|
||||
NT_OFFSET = 256
|
||||
#--end constants--
|
||||
|
||||
tok_name = {value: name
|
||||
for name, value in globals().items()
|
||||
if isinstance(value, int) and not name.startswith('_')}
|
||||
__all__.extend(tok_name.values())
|
||||
|
||||
EXACT_TOKEN_TYPES = {
|
||||
'!=': NOTEQUAL,
|
||||
'%': PERCENT,
|
||||
'%=': PERCENTEQUAL,
|
||||
'&': AMPER,
|
||||
'&=': AMPEREQUAL,
|
||||
'(': LPAR,
|
||||
')': RPAR,
|
||||
'*': STAR,
|
||||
'**': DOUBLESTAR,
|
||||
'**=': DOUBLESTAREQUAL,
|
||||
'*=': STAREQUAL,
|
||||
'+': PLUS,
|
||||
'+=': PLUSEQUAL,
|
||||
',': COMMA,
|
||||
'-': MINUS,
|
||||
'-=': MINEQUAL,
|
||||
'->': RARROW,
|
||||
'.': DOT,
|
||||
'...': ELLIPSIS,
|
||||
'/': SLASH,
|
||||
'//': DOUBLESLASH,
|
||||
'//=': DOUBLESLASHEQUAL,
|
||||
'/=': SLASHEQUAL,
|
||||
':': COLON,
|
||||
';': SEMI,
|
||||
'<': LESS,
|
||||
'<<': LEFTSHIFT,
|
||||
'<<=': LEFTSHIFTEQUAL,
|
||||
'<=': LESSEQUAL,
|
||||
'=': EQUAL,
|
||||
'==': EQEQUAL,
|
||||
'>': GREATER,
|
||||
'>=': GREATEREQUAL,
|
||||
'>>': RIGHTSHIFT,
|
||||
'>>=': RIGHTSHIFTEQUAL,
|
||||
'@': AT,
|
||||
'@=': ATEQUAL,
|
||||
'[': LSQB,
|
||||
']': RSQB,
|
||||
'^': CIRCUMFLEX,
|
||||
'^=': CIRCUMFLEXEQUAL,
|
||||
'{': LBRACE,
|
||||
'|': VBAR,
|
||||
'|=': VBAREQUAL,
|
||||
'}': RBRACE,
|
||||
'~': TILDE,
|
||||
}
|
||||
|
||||
def ISTERMINAL(x):
|
||||
return x < NT_OFFSET
|
||||
|
||||
|
@ -88,73 +128,3 @@ def ISNONTERMINAL(x):
|
|||
|
||||
def ISEOF(x):
|
||||
return x == ENDMARKER
|
||||
|
||||
|
||||
def _main():
|
||||
import re
|
||||
import sys
|
||||
args = sys.argv[1:]
|
||||
inFileName = args and args[0] or "Include/token.h"
|
||||
outFileName = "Lib/token.py"
|
||||
if len(args) > 1:
|
||||
outFileName = args[1]
|
||||
try:
|
||||
fp = open(inFileName)
|
||||
except OSError as err:
|
||||
sys.stdout.write("I/O error: %s\n" % str(err))
|
||||
sys.exit(1)
|
||||
with fp:
|
||||
lines = fp.read().split("\n")
|
||||
prog = re.compile(
|
||||
r"#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)",
|
||||
re.IGNORECASE)
|
||||
comment_regex = re.compile(
|
||||
r"^\s*/\*\s*(.+?)\s*\*/\s*$",
|
||||
re.IGNORECASE)
|
||||
|
||||
tokens = {}
|
||||
prev_val = None
|
||||
for line in lines:
|
||||
match = prog.match(line)
|
||||
if match:
|
||||
name, val = match.group(1, 2)
|
||||
val = int(val)
|
||||
tokens[val] = {'token': name} # reverse so we can sort them...
|
||||
prev_val = val
|
||||
else:
|
||||
comment_match = comment_regex.match(line)
|
||||
if comment_match and prev_val is not None:
|
||||
comment = comment_match.group(1)
|
||||
tokens[prev_val]['comment'] = comment
|
||||
keys = sorted(tokens.keys())
|
||||
# load the output skeleton from the target:
|
||||
try:
|
||||
fp = open(outFileName)
|
||||
except OSError as err:
|
||||
sys.stderr.write("I/O error: %s\n" % str(err))
|
||||
sys.exit(2)
|
||||
with fp:
|
||||
format = fp.read().split("\n")
|
||||
try:
|
||||
start = format.index("#--start constants--") + 1
|
||||
end = format.index("#--end constants--")
|
||||
except ValueError:
|
||||
sys.stderr.write("target does not contain format markers")
|
||||
sys.exit(3)
|
||||
lines = []
|
||||
for key in keys:
|
||||
lines.append("%s = %d" % (tokens[key]["token"], key))
|
||||
if "comment" in tokens[key]:
|
||||
lines.append("# %s" % tokens[key]["comment"])
|
||||
format[start:end] = lines
|
||||
try:
|
||||
fp = open(outFileName, 'w')
|
||||
except OSError as err:
|
||||
sys.stderr.write("I/O error: %s\n" % str(err))
|
||||
sys.exit(4)
|
||||
with fp:
|
||||
fp.write("\n".join(format))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_main()
|
||||
|
|
|
@ -32,6 +32,7 @@ import itertools as _itertools
|
|||
import re
|
||||
import sys
|
||||
from token import *
|
||||
from token import EXACT_TOKEN_TYPES
|
||||
|
||||
cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
|
||||
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
|
||||
|
@ -41,55 +42,6 @@ __all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
|
|||
"untokenize", "TokenInfo"]
|
||||
del token
|
||||
|
||||
EXACT_TOKEN_TYPES = {
|
||||
'(': LPAR,
|
||||
')': RPAR,
|
||||
'[': LSQB,
|
||||
']': RSQB,
|
||||
':': COLON,
|
||||
',': COMMA,
|
||||
';': SEMI,
|
||||
'+': PLUS,
|
||||
'-': MINUS,
|
||||
'*': STAR,
|
||||
'/': SLASH,
|
||||
'|': VBAR,
|
||||
'&': AMPER,
|
||||
'<': LESS,
|
||||
'>': GREATER,
|
||||
'=': EQUAL,
|
||||
'.': DOT,
|
||||
'%': PERCENT,
|
||||
'{': LBRACE,
|
||||
'}': RBRACE,
|
||||
'==': EQEQUAL,
|
||||
'!=': NOTEQUAL,
|
||||
'<=': LESSEQUAL,
|
||||
'>=': GREATEREQUAL,
|
||||
'~': TILDE,
|
||||
'^': CIRCUMFLEX,
|
||||
'<<': LEFTSHIFT,
|
||||
'>>': RIGHTSHIFT,
|
||||
'**': DOUBLESTAR,
|
||||
'+=': PLUSEQUAL,
|
||||
'-=': MINEQUAL,
|
||||
'*=': STAREQUAL,
|
||||
'/=': SLASHEQUAL,
|
||||
'%=': PERCENTEQUAL,
|
||||
'&=': AMPEREQUAL,
|
||||
'|=': VBAREQUAL,
|
||||
'^=': CIRCUMFLEXEQUAL,
|
||||
'<<=': LEFTSHIFTEQUAL,
|
||||
'>>=': RIGHTSHIFTEQUAL,
|
||||
'**=': DOUBLESTAREQUAL,
|
||||
'//': DOUBLESLASH,
|
||||
'//=': DOUBLESLASHEQUAL,
|
||||
'...': ELLIPSIS,
|
||||
'->': RARROW,
|
||||
'@': AT,
|
||||
'@=': ATEQUAL,
|
||||
}
|
||||
|
||||
class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
|
||||
def __repr__(self):
|
||||
annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
|
||||
|
@ -163,17 +115,11 @@ Triple = group(StringPrefix + "'''", StringPrefix + '"""')
|
|||
String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
|
||||
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
|
||||
|
||||
# Because of leftmost-then-longest match semantics, be sure to put the
|
||||
# longest operators first (e.g., if = came before ==, == would get
|
||||
# recognized as two instances of =).
|
||||
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
|
||||
r"//=?", r"->",
|
||||
r"[+\-*/%&@|^=<>]=?",
|
||||
r"~")
|
||||
|
||||
Bracket = '[][(){}]'
|
||||
Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
|
||||
Funny = group(Operator, Bracket, Special)
|
||||
# Sorting in reverse order puts the long operators before their prefixes.
|
||||
# Otherwise if = came before ==, == would get recognized as two instances
|
||||
# of =.
|
||||
Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True)))
|
||||
Funny = group(r'\r?\n', Special)
|
||||
|
||||
PlainToken = group(Number, Funny, String, Name)
|
||||
Token = Ignore + PlainToken
|
||||
|
|
|
@ -302,6 +302,7 @@ POBJS= \
|
|||
Parser/metagrammar.o \
|
||||
Parser/firstsets.o \
|
||||
Parser/grammar.o \
|
||||
Parser/token.o \
|
||||
Parser/pgen.o
|
||||
|
||||
PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o
|
||||
|
@ -559,7 +560,7 @@ coverage-lcov:
|
|||
@echo
|
||||
|
||||
# Force regeneration of parser and importlib
|
||||
coverage-report: regen-grammar regen-importlib
|
||||
coverage-report: regen-grammar regen-token regen-importlib
|
||||
@ # build with coverage info
|
||||
$(MAKE) coverage
|
||||
@ # run tests, ignore failures
|
||||
|
@ -741,7 +742,7 @@ regen-importlib: Programs/_freeze_importlib
|
|||
# Regenerate all generated files
|
||||
|
||||
regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar \
|
||||
regen-ast regen-importlib clinic
|
||||
regen-token regen-symbol regen-ast regen-importlib clinic
|
||||
|
||||
############################################################################
|
||||
# Special rules for object files
|
||||
|
@ -849,6 +850,37 @@ regen-opcode:
|
|||
$(srcdir)/Include/opcode.h.new
|
||||
$(UPDATE_FILE) $(srcdir)/Include/opcode.h $(srcdir)/Include/opcode.h.new
|
||||
|
||||
.PHONY: regen-token
|
||||
regen-token:
|
||||
# Regenerate Doc/library/token-list.inc from Grammar/Tokens
|
||||
# using Tools/scripts/generate_token.py
|
||||
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py rst \
|
||||
$(srcdir)/Grammar/Tokens \
|
||||
$(srcdir)/Doc/library/token-list.inc
|
||||
# Regenerate Include/token.h from Grammar/Tokens
|
||||
# using Tools/scripts/generate_token.py
|
||||
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py h \
|
||||
$(srcdir)/Grammar/Tokens \
|
||||
$(srcdir)/Include/token.h
|
||||
# Regenerate Parser/token.c from Grammar/Tokens
|
||||
# using Tools/scripts/generate_token.py
|
||||
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py c \
|
||||
$(srcdir)/Grammar/Tokens \
|
||||
$(srcdir)/Parser/token.c
|
||||
# Regenerate Lib/token.py from Grammar/Tokens
|
||||
# using Tools/scripts/generate_token.py
|
||||
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py py \
|
||||
$(srcdir)/Grammar/Tokens \
|
||||
$(srcdir)/Lib/token.py
|
||||
|
||||
.PHONY: regen-symbol
|
||||
regen-symbol: $(srcdir)/Include/graminit.h
|
||||
# Regenerate Lib/symbol.py from Include/graminit.h
|
||||
# using Tools/scripts/generate_symbol_py.py
|
||||
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_symbol_py.py \
|
||||
$(srcdir)/Include/graminit.h \
|
||||
$(srcdir)/Lib/symbol.py
|
||||
|
||||
Python/compile.o Python/symtable.o Python/ast_unparse.o Python/ast.o: $(srcdir)/Include/graminit.h $(srcdir)/Include/Python-ast.h
|
||||
|
||||
Python/getplatform.o: $(srcdir)/Python/getplatform.c
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
The C and Python code and the documentation related to tokens are now generated
|
||||
from a single source file :file:`Grammar/Tokens`.
|
|
@ -367,6 +367,7 @@
|
|||
<ClCompile Include="..\Parser\parser.c" />
|
||||
<ClCompile Include="..\Parser\parsetok.c" />
|
||||
<ClCompile Include="..\Parser\tokenizer.c" />
|
||||
<ClCompile Include="..\Parser\token.c" />
|
||||
<ClCompile Include="..\PC\invalid_parameter_handler.c" />
|
||||
<ClCompile Include="..\PC\winreg.c" />
|
||||
<ClCompile Include="..\PC\config.c" />
|
||||
|
|
|
@ -866,6 +866,9 @@
|
|||
<ClCompile Include="..\Parser\tokenizer.c">
|
||||
<Filter>Parser</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Parser\token.c">
|
||||
<Filter>Parser</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\PC\winreg.c">
|
||||
<Filter>PC</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -0,0 +1,233 @@
|
|||
/* Auto-generated by Tools/scripts/generate_token.py */
|
||||
|
||||
#include "Python.h"
|
||||
#include "token.h"
|
||||
|
||||
/* Token names */
|
||||
|
||||
const char * const _PyParser_TokenNames[] = {
|
||||
"ENDMARKER",
|
||||
"NAME",
|
||||
"NUMBER",
|
||||
"STRING",
|
||||
"NEWLINE",
|
||||
"INDENT",
|
||||
"DEDENT",
|
||||
"LPAR",
|
||||
"RPAR",
|
||||
"LSQB",
|
||||
"RSQB",
|
||||
"COLON",
|
||||
"COMMA",
|
||||
"SEMI",
|
||||
"PLUS",
|
||||
"MINUS",
|
||||
"STAR",
|
||||
"SLASH",
|
||||
"VBAR",
|
||||
"AMPER",
|
||||
"LESS",
|
||||
"GREATER",
|
||||
"EQUAL",
|
||||
"DOT",
|
||||
"PERCENT",
|
||||
"LBRACE",
|
||||
"RBRACE",
|
||||
"EQEQUAL",
|
||||
"NOTEQUAL",
|
||||
"LESSEQUAL",
|
||||
"GREATEREQUAL",
|
||||
"TILDE",
|
||||
"CIRCUMFLEX",
|
||||
"LEFTSHIFT",
|
||||
"RIGHTSHIFT",
|
||||
"DOUBLESTAR",
|
||||
"PLUSEQUAL",
|
||||
"MINEQUAL",
|
||||
"STAREQUAL",
|
||||
"SLASHEQUAL",
|
||||
"PERCENTEQUAL",
|
||||
"AMPEREQUAL",
|
||||
"VBAREQUAL",
|
||||
"CIRCUMFLEXEQUAL",
|
||||
"LEFTSHIFTEQUAL",
|
||||
"RIGHTSHIFTEQUAL",
|
||||
"DOUBLESTAREQUAL",
|
||||
"DOUBLESLASH",
|
||||
"DOUBLESLASHEQUAL",
|
||||
"AT",
|
||||
"ATEQUAL",
|
||||
"RARROW",
|
||||
"ELLIPSIS",
|
||||
"OP",
|
||||
"<ERRORTOKEN>",
|
||||
"<COMMENT>",
|
||||
"<NL>",
|
||||
"<ENCODING>",
|
||||
"<N_TOKENS>",
|
||||
};
|
||||
|
||||
/* Return the token corresponding to a single character */
|
||||
|
||||
int
|
||||
PyToken_OneChar(int c1)
|
||||
{
|
||||
switch (c1) {
|
||||
case '%': return PERCENT;
|
||||
case '&': return AMPER;
|
||||
case '(': return LPAR;
|
||||
case ')': return RPAR;
|
||||
case '*': return STAR;
|
||||
case '+': return PLUS;
|
||||
case ',': return COMMA;
|
||||
case '-': return MINUS;
|
||||
case '.': return DOT;
|
||||
case '/': return SLASH;
|
||||
case ':': return COLON;
|
||||
case ';': return SEMI;
|
||||
case '<': return LESS;
|
||||
case '=': return EQUAL;
|
||||
case '>': return GREATER;
|
||||
case '@': return AT;
|
||||
case '[': return LSQB;
|
||||
case ']': return RSQB;
|
||||
case '^': return CIRCUMFLEX;
|
||||
case '{': return LBRACE;
|
||||
case '|': return VBAR;
|
||||
case '}': return RBRACE;
|
||||
case '~': return TILDE;
|
||||
}
|
||||
return OP;
|
||||
}
|
||||
|
||||
int
|
||||
PyToken_TwoChars(int c1, int c2)
|
||||
{
|
||||
switch (c1) {
|
||||
case '!':
|
||||
switch (c2) {
|
||||
case '=': return NOTEQUAL;
|
||||
}
|
||||
break;
|
||||
case '%':
|
||||
switch (c2) {
|
||||
case '=': return PERCENTEQUAL;
|
||||
}
|
||||
break;
|
||||
case '&':
|
||||
switch (c2) {
|
||||
case '=': return AMPEREQUAL;
|
||||
}
|
||||
break;
|
||||
case '*':
|
||||
switch (c2) {
|
||||
case '*': return DOUBLESTAR;
|
||||
case '=': return STAREQUAL;
|
||||
}
|
||||
break;
|
||||
case '+':
|
||||
switch (c2) {
|
||||
case '=': return PLUSEQUAL;
|
||||
}
|
||||
break;
|
||||
case '-':
|
||||
switch (c2) {
|
||||
case '=': return MINEQUAL;
|
||||
case '>': return RARROW;
|
||||
}
|
||||
break;
|
||||
case '/':
|
||||
switch (c2) {
|
||||
case '/': return DOUBLESLASH;
|
||||
case '=': return SLASHEQUAL;
|
||||
}
|
||||
break;
|
||||
case '<':
|
||||
switch (c2) {
|
||||
case '<': return LEFTSHIFT;
|
||||
case '=': return LESSEQUAL;
|
||||
case '>': return NOTEQUAL;
|
||||
}
|
||||
break;
|
||||
case '=':
|
||||
switch (c2) {
|
||||
case '=': return EQEQUAL;
|
||||
}
|
||||
break;
|
||||
case '>':
|
||||
switch (c2) {
|
||||
case '=': return GREATEREQUAL;
|
||||
case '>': return RIGHTSHIFT;
|
||||
}
|
||||
break;
|
||||
case '@':
|
||||
switch (c2) {
|
||||
case '=': return ATEQUAL;
|
||||
}
|
||||
break;
|
||||
case '^':
|
||||
switch (c2) {
|
||||
case '=': return CIRCUMFLEXEQUAL;
|
||||
}
|
||||
break;
|
||||
case '|':
|
||||
switch (c2) {
|
||||
case '=': return VBAREQUAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return OP;
|
||||
}
|
||||
|
||||
int
|
||||
PyToken_ThreeChars(int c1, int c2, int c3)
|
||||
{
|
||||
switch (c1) {
|
||||
case '*':
|
||||
switch (c2) {
|
||||
case '*':
|
||||
switch (c3) {
|
||||
case '=': return DOUBLESTAREQUAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '.':
|
||||
switch (c2) {
|
||||
case '.':
|
||||
switch (c3) {
|
||||
case '.': return ELLIPSIS;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '/':
|
||||
switch (c2) {
|
||||
case '/':
|
||||
switch (c3) {
|
||||
case '=': return DOUBLESLASHEQUAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '<':
|
||||
switch (c2) {
|
||||
case '<':
|
||||
switch (c3) {
|
||||
case '=': return LEFTSHIFTEQUAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '>':
|
||||
switch (c2) {
|
||||
case '>':
|
||||
switch (c3) {
|
||||
case '=': return RIGHTSHIFTEQUAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return OP;
|
||||
}
|
|
@ -48,72 +48,6 @@ static int tok_nextc(struct tok_state *tok);
|
|||
static void tok_backup(struct tok_state *tok, int c);
|
||||
|
||||
|
||||
/* Token names */
|
||||
|
||||
const char *_PyParser_TokenNames[] = {
|
||||
"ENDMARKER",
|
||||
"NAME",
|
||||
"NUMBER",
|
||||
"STRING",
|
||||
"NEWLINE",
|
||||
"INDENT",
|
||||
"DEDENT",
|
||||
"LPAR",
|
||||
"RPAR",
|
||||
"LSQB",
|
||||
"RSQB",
|
||||
"COLON",
|
||||
"COMMA",
|
||||
"SEMI",
|
||||
"PLUS",
|
||||
"MINUS",
|
||||
"STAR",
|
||||
"SLASH",
|
||||
"VBAR",
|
||||
"AMPER",
|
||||
"LESS",
|
||||
"GREATER",
|
||||
"EQUAL",
|
||||
"DOT",
|
||||
"PERCENT",
|
||||
"LBRACE",
|
||||
"RBRACE",
|
||||
"EQEQUAL",
|
||||
"NOTEQUAL",
|
||||
"LESSEQUAL",
|
||||
"GREATEREQUAL",
|
||||
"TILDE",
|
||||
"CIRCUMFLEX",
|
||||
"LEFTSHIFT",
|
||||
"RIGHTSHIFT",
|
||||
"DOUBLESTAR",
|
||||
"PLUSEQUAL",
|
||||
"MINEQUAL",
|
||||
"STAREQUAL",
|
||||
"SLASHEQUAL",
|
||||
"PERCENTEQUAL",
|
||||
"AMPEREQUAL",
|
||||
"VBAREQUAL",
|
||||
"CIRCUMFLEXEQUAL",
|
||||
"LEFTSHIFTEQUAL",
|
||||
"RIGHTSHIFTEQUAL",
|
||||
"DOUBLESTAREQUAL",
|
||||
"DOUBLESLASH",
|
||||
"DOUBLESLASHEQUAL",
|
||||
"AT",
|
||||
"ATEQUAL",
|
||||
"RARROW",
|
||||
"ELLIPSIS",
|
||||
/* This table must match the #defines in token.h! */
|
||||
"OP",
|
||||
"<ERRORTOKEN>",
|
||||
"COMMENT",
|
||||
"NL",
|
||||
"ENCODING",
|
||||
"<N_TOKENS>"
|
||||
};
|
||||
|
||||
|
||||
/* Create and initialize a new tok_state structure */
|
||||
|
||||
static struct tok_state *
|
||||
|
@ -1114,177 +1048,6 @@ tok_backup(struct tok_state *tok, int c)
|
|||
}
|
||||
|
||||
|
||||
/* Return the token corresponding to a single character */
|
||||
|
||||
int
|
||||
PyToken_OneChar(int c)
|
||||
{
|
||||
switch (c) {
|
||||
case '(': return LPAR;
|
||||
case ')': return RPAR;
|
||||
case '[': return LSQB;
|
||||
case ']': return RSQB;
|
||||
case ':': return COLON;
|
||||
case ',': return COMMA;
|
||||
case ';': return SEMI;
|
||||
case '+': return PLUS;
|
||||
case '-': return MINUS;
|
||||
case '*': return STAR;
|
||||
case '/': return SLASH;
|
||||
case '|': return VBAR;
|
||||
case '&': return AMPER;
|
||||
case '<': return LESS;
|
||||
case '>': return GREATER;
|
||||
case '=': return EQUAL;
|
||||
case '.': return DOT;
|
||||
case '%': return PERCENT;
|
||||
case '{': return LBRACE;
|
||||
case '}': return RBRACE;
|
||||
case '^': return CIRCUMFLEX;
|
||||
case '~': return TILDE;
|
||||
case '@': return AT;
|
||||
default: return OP;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
PyToken_TwoChars(int c1, int c2)
|
||||
{
|
||||
switch (c1) {
|
||||
case '=':
|
||||
switch (c2) {
|
||||
case '=': return EQEQUAL;
|
||||
}
|
||||
break;
|
||||
case '!':
|
||||
switch (c2) {
|
||||
case '=': return NOTEQUAL;
|
||||
}
|
||||
break;
|
||||
case '<':
|
||||
switch (c2) {
|
||||
case '>': return NOTEQUAL;
|
||||
case '=': return LESSEQUAL;
|
||||
case '<': return LEFTSHIFT;
|
||||
}
|
||||
break;
|
||||
case '>':
|
||||
switch (c2) {
|
||||
case '=': return GREATEREQUAL;
|
||||
case '>': return RIGHTSHIFT;
|
||||
}
|
||||
break;
|
||||
case '+':
|
||||
switch (c2) {
|
||||
case '=': return PLUSEQUAL;
|
||||
}
|
||||
break;
|
||||
case '-':
|
||||
switch (c2) {
|
||||
case '=': return MINEQUAL;
|
||||
case '>': return RARROW;
|
||||
}
|
||||
break;
|
||||
case '*':
|
||||
switch (c2) {
|
||||
case '*': return DOUBLESTAR;
|
||||
case '=': return STAREQUAL;
|
||||
}
|
||||
break;
|
||||
case '/':
|
||||
switch (c2) {
|
||||
case '/': return DOUBLESLASH;
|
||||
case '=': return SLASHEQUAL;
|
||||
}
|
||||
break;
|
||||
case '|':
|
||||
switch (c2) {
|
||||
case '=': return VBAREQUAL;
|
||||
}
|
||||
break;
|
||||
case '%':
|
||||
switch (c2) {
|
||||
case '=': return PERCENTEQUAL;
|
||||
}
|
||||
break;
|
||||
case '&':
|
||||
switch (c2) {
|
||||
case '=': return AMPEREQUAL;
|
||||
}
|
||||
break;
|
||||
case '^':
|
||||
switch (c2) {
|
||||
case '=': return CIRCUMFLEXEQUAL;
|
||||
}
|
||||
break;
|
||||
case '@':
|
||||
switch (c2) {
|
||||
case '=': return ATEQUAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return OP;
|
||||
}
|
||||
|
||||
int
|
||||
PyToken_ThreeChars(int c1, int c2, int c3)
|
||||
{
|
||||
switch (c1) {
|
||||
case '<':
|
||||
switch (c2) {
|
||||
case '<':
|
||||
switch (c3) {
|
||||
case '=':
|
||||
return LEFTSHIFTEQUAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '>':
|
||||
switch (c2) {
|
||||
case '>':
|
||||
switch (c3) {
|
||||
case '=':
|
||||
return RIGHTSHIFTEQUAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '*':
|
||||
switch (c2) {
|
||||
case '*':
|
||||
switch (c3) {
|
||||
case '=':
|
||||
return DOUBLESTAREQUAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '/':
|
||||
switch (c2) {
|
||||
case '/':
|
||||
switch (c3) {
|
||||
case '=':
|
||||
return DOUBLESLASHEQUAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '.':
|
||||
switch (c2) {
|
||||
case '.':
|
||||
switch (c3) {
|
||||
case '.':
|
||||
return ELLIPSIS;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return OP;
|
||||
}
|
||||
|
||||
static int
|
||||
syntaxerror(struct tok_state *tok, const char *format, ...)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
#! /usr/bin/env python3
|
||||
# This script generates the symbol.py source file.
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
def main(inFileName="Include/graminit.h", outFileName="Lib/symbol.py"):
|
||||
try:
|
||||
fp = open(inFileName)
|
||||
except OSError as err:
|
||||
sys.stderr.write("I/O error: %s\n" % str(err))
|
||||
sys.exit(1)
|
||||
with fp:
|
||||
lines = fp.read().split("\n")
|
||||
prog = re.compile(
|
||||
"#define[ \t][ \t]*([A-Z0-9][A-Z0-9_]*)[ \t][ \t]*([0-9][0-9]*)",
|
||||
re.IGNORECASE)
|
||||
tokens = {}
|
||||
for line in lines:
|
||||
match = prog.match(line)
|
||||
if match:
|
||||
name, val = match.group(1, 2)
|
||||
val = int(val)
|
||||
tokens[val] = name # reverse so we can sort them...
|
||||
keys = sorted(tokens.keys())
|
||||
# load the output skeleton from the target:
|
||||
try:
|
||||
fp = open(outFileName)
|
||||
except OSError as err:
|
||||
sys.stderr.write("I/O error: %s\n" % str(err))
|
||||
sys.exit(2)
|
||||
with fp:
|
||||
format = fp.read().split("\n")
|
||||
try:
|
||||
start = format.index("#--start constants--") + 1
|
||||
end = format.index("#--end constants--")
|
||||
except ValueError:
|
||||
sys.stderr.write("target does not contain format markers")
|
||||
sys.exit(3)
|
||||
lines = []
|
||||
for val in keys:
|
||||
lines.append("%s = %d" % (tokens[val], val))
|
||||
format[start:end] = lines
|
||||
try:
|
||||
fp = open(outFileName, 'w')
|
||||
except OSError as err:
|
||||
sys.stderr.write("I/O error: %s\n" % str(err))
|
||||
sys.exit(4)
|
||||
with fp:
|
||||
fp.write("\n".join(format))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(*sys.argv[1:])
|
|
@ -0,0 +1,268 @@
|
|||
#! /usr/bin/env python3
|
||||
# This script generates token related files from Grammar/Tokens:
|
||||
#
|
||||
# Doc/library/token-list.inc
|
||||
# Include/token.h
|
||||
# Parser/token.c
|
||||
# Lib/token.py
|
||||
|
||||
|
||||
NT_OFFSET = 256
|
||||
|
||||
def load_tokens(path):
|
||||
tok_names = []
|
||||
string_to_tok = {}
|
||||
ERRORTOKEN = None
|
||||
with open(path) as fp:
|
||||
for line in fp:
|
||||
line = line.strip()
|
||||
# strip comments
|
||||
i = line.find('#')
|
||||
if i >= 0:
|
||||
line = line[:i].strip()
|
||||
if not line:
|
||||
continue
|
||||
fields = line.split()
|
||||
name = fields[0]
|
||||
value = len(tok_names)
|
||||
if name == 'ERRORTOKEN':
|
||||
ERRORTOKEN = value
|
||||
string = fields[1] if len(fields) > 1 else None
|
||||
if string:
|
||||
string = eval(string)
|
||||
string_to_tok[string] = value
|
||||
tok_names.append(name)
|
||||
return tok_names, ERRORTOKEN, string_to_tok
|
||||
|
||||
|
||||
def update_file(file, content):
|
||||
try:
|
||||
with open(file, 'r') as fobj:
|
||||
if fobj.read() == content:
|
||||
return False
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
with open(file, 'w') as fobj:
|
||||
fobj.write(content)
|
||||
return True
|
||||
|
||||
|
||||
token_h_template = """\
|
||||
/* Auto-generated by Tools/scripts/generate_token.py */
|
||||
|
||||
/* Token types */
|
||||
#ifndef Py_LIMITED_API
|
||||
#ifndef Py_TOKEN_H
|
||||
#define Py_TOKEN_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
|
||||
|
||||
%s\
|
||||
#define N_TOKENS %d
|
||||
#define NT_OFFSET %d
|
||||
|
||||
/* Special definitions for cooperation with parser */
|
||||
|
||||
#define ISTERMINAL(x) ((x) < NT_OFFSET)
|
||||
#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
|
||||
#define ISEOF(x) ((x) == ENDMARKER)
|
||||
|
||||
|
||||
PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
|
||||
PyAPI_FUNC(int) PyToken_OneChar(int);
|
||||
PyAPI_FUNC(int) PyToken_TwoChars(int, int);
|
||||
PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* !Py_TOKEN_H */
|
||||
#endif /* Py_LIMITED_API */
|
||||
"""
|
||||
|
||||
def make_h(infile, outfile='Include/token.h'):
|
||||
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
||||
|
||||
defines = []
|
||||
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
|
||||
defines.append("#define %-15s %d\n" % (name, value))
|
||||
|
||||
if update_file(outfile, token_h_template % (
|
||||
''.join(defines),
|
||||
len(tok_names),
|
||||
NT_OFFSET
|
||||
)):
|
||||
print("%s regenerated from %s" % (outfile, infile))
|
||||
|
||||
|
||||
token_c_template = """\
|
||||
/* Auto-generated by Tools/scripts/generate_token.py */
|
||||
|
||||
#include "Python.h"
|
||||
#include "token.h"
|
||||
|
||||
/* Token names */
|
||||
|
||||
const char * const _PyParser_TokenNames[] = {
|
||||
%s\
|
||||
};
|
||||
|
||||
/* Return the token corresponding to a single character */
|
||||
|
||||
int
|
||||
PyToken_OneChar(int c1)
|
||||
{
|
||||
%s\
|
||||
return OP;
|
||||
}
|
||||
|
||||
int
|
||||
PyToken_TwoChars(int c1, int c2)
|
||||
{
|
||||
%s\
|
||||
return OP;
|
||||
}
|
||||
|
||||
int
|
||||
PyToken_ThreeChars(int c1, int c2, int c3)
|
||||
{
|
||||
%s\
|
||||
return OP;
|
||||
}
|
||||
"""
|
||||
|
||||
def generate_chars_to_token(mapping, n=1):
|
||||
result = []
|
||||
write = result.append
|
||||
indent = ' ' * n
|
||||
write(indent)
|
||||
write('switch (c%d) {\n' % (n,))
|
||||
for c in sorted(mapping):
|
||||
write(indent)
|
||||
value = mapping[c]
|
||||
if isinstance(value, dict):
|
||||
write("case '%s':\n" % (c,))
|
||||
write(generate_chars_to_token(value, n + 1))
|
||||
write(indent)
|
||||
write(' break;\n')
|
||||
else:
|
||||
write("case '%s': return %s;\n" % (c, value))
|
||||
write(indent)
|
||||
write('}\n')
|
||||
return ''.join(result)
|
||||
|
||||
def make_c(infile, outfile='Parser/token.c'):
|
||||
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
||||
string_to_tok['<>'] = string_to_tok['!=']
|
||||
chars_to_token = {}
|
||||
for string, value in string_to_tok.items():
|
||||
assert 1 <= len(string) <= 3
|
||||
name = tok_names[value]
|
||||
m = chars_to_token.setdefault(len(string), {})
|
||||
for c in string[:-1]:
|
||||
m = m.setdefault(c, {})
|
||||
m[string[-1]] = name
|
||||
|
||||
names = []
|
||||
for value, name in enumerate(tok_names):
|
||||
if value >= ERRORTOKEN:
|
||||
name = '<%s>' % name
|
||||
names.append(' "%s",\n' % name)
|
||||
names.append(' "<N_TOKENS>",\n')
|
||||
|
||||
if update_file(outfile, token_c_template % (
|
||||
''.join(names),
|
||||
generate_chars_to_token(chars_to_token[1]),
|
||||
generate_chars_to_token(chars_to_token[2]),
|
||||
generate_chars_to_token(chars_to_token[3])
|
||||
)):
|
||||
print("%s regenerated from %s" % (outfile, infile))
|
||||
|
||||
|
||||
token_inc_template = """\
|
||||
.. Auto-generated by Tools/scripts/generate_token.py
|
||||
%s
|
||||
.. data:: N_TOKENS
|
||||
|
||||
.. data:: NT_OFFSET
|
||||
"""
|
||||
|
||||
def make_rst(infile, outfile='Doc/library/token-list.inc'):
|
||||
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
||||
tok_to_string = {value: s for s, value in string_to_tok.items()}
|
||||
|
||||
names = []
|
||||
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
|
||||
names.append('.. data:: %s' % (name,))
|
||||
if value in tok_to_string:
|
||||
names.append('')
|
||||
names.append(' Token value for ``"%s"``.' % tok_to_string[value])
|
||||
names.append('')
|
||||
|
||||
if update_file(outfile, token_inc_template % '\n'.join(names)):
|
||||
print("%s regenerated from %s" % (outfile, infile))
|
||||
|
||||
|
||||
token_py_template = '''\
|
||||
"""Token constants."""
|
||||
# Auto-generated by Tools/scripts/generate_token.py
|
||||
|
||||
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
|
||||
|
||||
%s
|
||||
N_TOKENS = %d
|
||||
# Special definitions for cooperation with parser
|
||||
NT_OFFSET = %d
|
||||
|
||||
tok_name = {value: name
|
||||
for name, value in globals().items()
|
||||
if isinstance(value, int) and not name.startswith('_')}
|
||||
__all__.extend(tok_name.values())
|
||||
|
||||
EXACT_TOKEN_TYPES = {
|
||||
%s
|
||||
}
|
||||
|
||||
def ISTERMINAL(x):
|
||||
return x < NT_OFFSET
|
||||
|
||||
def ISNONTERMINAL(x):
|
||||
return x >= NT_OFFSET
|
||||
|
||||
def ISEOF(x):
|
||||
return x == ENDMARKER
|
||||
'''
|
||||
|
||||
def make_py(infile, outfile='Lib/token.py'):
|
||||
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
||||
|
||||
constants = []
|
||||
for value, name in enumerate(tok_names):
|
||||
constants.append('%s = %d' % (name, value))
|
||||
constants.insert(ERRORTOKEN,
|
||||
"# These aren't used by the C tokenizer but are needed for tokenize.py")
|
||||
|
||||
token_types = []
|
||||
for s, value in sorted(string_to_tok.items()):
|
||||
token_types.append(' %r: %s,' % (s, tok_names[value]))
|
||||
|
||||
if update_file(outfile, token_py_template % (
|
||||
'\n'.join(constants),
|
||||
len(tok_names),
|
||||
NT_OFFSET,
|
||||
'\n'.join(token_types),
|
||||
)):
|
||||
print("%s regenerated from %s" % (outfile, infile))
|
||||
|
||||
|
||||
def main(op, infile='Grammar/Tokens', *args):
|
||||
make = globals()['make_' + op]
|
||||
make(infile, *args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
main(*sys.argv[1:])
|
Loading…
Reference in New Issue