bpo-40939: Remove the old parser (GH-20768)

This commit removes the old parser, the deprecated parser module, the old parser compatibility flags and environment variables and all associated support code and documentation.
This commit is contained in:
Pablo Galindo 2020-06-11 17:30:46 +01:00 committed by GitHub
parent 311110abcd
commit 1ed83adb0e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
53 changed files with 24447 additions and 35978 deletions

View File

@ -695,16 +695,6 @@ PyConfig
:data:`sys._xoptions`.
.. c:member:: int _use_peg_parser
Enable PEG parser? Default: 1.
Set to 0 by :option:`-X oldparser <-X>` and :envvar:`PYTHONOLDPARSER`.
See also :pep:`617`.
.. deprecated-removed:: 3.9 3.10
If ``parse_argv`` is non-zero, ``argv`` arguments are parsed the same
way the regular Python parses command line arguments, and Python
arguments are stripped from ``argv``: see :ref:`Command Line Arguments

View File

@ -426,8 +426,6 @@ Miscellaneous options
defines the following possible values:
* ``-X faulthandler`` to enable :mod:`faulthandler`;
* ``-X oldparser``: enable the traditional LL(1) parser. See also
:envvar:`PYTHONOLDPARSER` and :pep:`617`.
* ``-X showrefcount`` to output the total reference count and number of used
memory blocks when the program finishes or after each statement in the
interactive interpreter. This only works on debug builds.
@ -587,15 +585,6 @@ conflict.
:option:`-d` multiple times.
.. envvar:: PYTHONOLDPARSER
If this is set to a non-empty string, enable the traditional LL(1) parser.
See also the :option:`-X` ``oldparser`` option and :pep:`617`.
.. deprecated-removed:: 3.9 3.10
.. envvar:: PYTHONINSPECT
If this is set to a non-empty string it is equivalent to specifying the

View File

@ -144,10 +144,6 @@ typedef struct {
Set to 1 by -X faulthandler and PYTHONFAULTHANDLER. -1 means unset. */
int faulthandler;
/* Enable PEG parser?
1 by default, set to 0 by -X oldparser and PYTHONOLDPARSER */
int _use_peg_parser;
/* Enable tracemalloc?
Set by -X tracemalloc=N and PYTHONTRACEMALLOC. -1 means unset */
int tracemalloc;

View File

@ -326,7 +326,7 @@ def _args_from_interpreter_flags():
if dev_mode:
args.extend(('-X', 'dev'))
for opt in ('faulthandler', 'tracemalloc', 'importtime',
'showrefcount', 'utf8', 'oldparser'):
'showrefcount', 'utf8'):
if opt in xoptions:
value = xoptions[opt]
if value is True:

View File

@ -1958,13 +1958,3 @@ def wait_process(pid, *, exitcode, timeout=None):
# sanity check: it should not fail in practice
if pid2 != pid:
raise AssertionError(f"pid {pid2} != pid {pid}")
def use_old_parser():
import _testinternalcapi
config = _testinternalcapi.get_configs()
return (config['config']['_use_peg_parser'] == 0)
def skip_if_new_parser(msg):
return unittest.skipIf(not use_old_parser(), msg)

View File

@ -347,7 +347,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'isolated': 0,
'use_environment': 1,
'dev_mode': 0,
'_use_peg_parser': 1,
'install_signal_handlers': 1,
'use_hash_seed': 0,
@ -733,7 +732,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'import_time': 1,
'show_ref_count': 1,
'malloc_stats': 1,
'_use_peg_parser': 0,
'stdio_encoding': 'iso8859-1',
'stdio_errors': 'replace',
@ -807,7 +805,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'warnoptions': ['EnvVar'],
'platlibdir': 'env_platlibdir',
'module_search_paths': self.IGNORE_CONFIG,
'_use_peg_parser': 0,
}
self.check_all_configs("test_init_compat_env", config, preconfig,
api=API_COMPAT)
@ -837,7 +834,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'warnoptions': ['EnvVar'],
'platlibdir': 'env_platlibdir',
'module_search_paths': self.IGNORE_CONFIG,
'_use_peg_parser': 0,
}
self.check_all_configs("test_init_python_env", config, preconfig,
api=API_PYTHON)

View File

@ -251,9 +251,9 @@ class ExceptionTests(unittest.TestCase):
check('def f():\n x, y: int', 2, 3)
check('[*x for x in xs]', 1, 2)
check('foo(x for x in range(10), 100)', 1, 5)
check('(yield i) = 2', 1, 1 if support.use_old_parser() else 2)
check('def f(*):\n pass', 1, 7 if support.use_old_parser() else 8)
check('for 1 in []: pass', 1, 5 if support.use_old_parser() else 7)
check('(yield i) = 2', 1, 2)
check('def f(*):\n pass', 1, 8)
check('for 1 in []: pass', 1, 7)
@cpython_only
def testSettingException(self):

View File

@ -20,7 +20,7 @@ class FLUFLTests(unittest.TestCase):
self.assertTrue(cm.exception.lineno, 2)
# The old parser reports the end of the token and the new
# parser reports the start of the token
self.assertEqual(cm.exception.offset, 4 if support.use_old_parser() else 3)
self.assertEqual(cm.exception.offset, 3)
def test_guido_as_bdfl(self):
code = '2 {0} 3'
@ -33,7 +33,7 @@ class FLUFLTests(unittest.TestCase):
self.assertEqual(cm.exception.lineno, 1)
# The old parser reports the end of the token and the new
# parser reports the start of the token
self.assertEqual(cm.exception.offset, 4 if support.use_old_parser() else 3)
self.assertEqual(cm.exception.offset, 3)
if __name__ == '__main__':

View File

@ -12,7 +12,7 @@ import os
import types
import decimal
import unittest
from test.support import temp_cwd, use_old_parser
from test.support import temp_cwd
from test.support.script_helper import assert_python_failure
a_global = 'global variable'
@ -1049,7 +1049,6 @@ non-important content
r"f'{1000:j}'",
])
@unittest.skipIf(use_old_parser(), "The old parser only supports <fstring> as the filename")
def test_filename_in_syntaxerror(self):
# see issue 38964
with temp_cwd() as cwd:

View File

@ -1,7 +1,7 @@
# Python test set -- part 1, grammar.
# This just tests whether the parser accepts them all.
from test.support import check_syntax_error, check_syntax_warning, use_old_parser
from test.support import check_syntax_error, check_syntax_warning
import inspect
import unittest
import sys
@ -1714,69 +1714,53 @@ class GrammarTests(unittest.TestCase):
with manager() as x, manager():
pass
if not use_old_parser():
test_cases = [
"""if 1:
with (
manager()
):
pass
""",
"""if 1:
with (
manager() as x
):
pass
""",
"""if 1:
with (
manager() as (x, y),
manager() as z,
):
pass
""",
"""if 1:
with (
manager(),
manager()
):
pass
""",
"""if 1:
with (
manager() as x,
manager() as y
):
pass
""",
"""if 1:
with (
manager() as x,
manager()
):
pass
""",
"""if 1:
with (
manager() as x,
manager() as y,
manager() as z,
):
pass
""",
"""if 1:
with (
manager() as x,
manager() as y,
manager(),
):
pass
""",
]
for case in test_cases:
with self.subTest(case=case):
compile(case, "<string>", "exec")
with (
manager()
):
pass
with (
manager() as x
):
pass
with (
manager() as (x, y),
manager() as z,
):
pass
with (
manager(),
manager()
):
pass
with (
manager() as x,
manager() as y
):
pass
with (
manager() as x,
manager()
):
pass
with (
manager() as x,
manager() as y,
manager() as z,
):
pass
with (
manager() as x,
manager() as y,
manager(),
):
pass
def test_if_else_expr(self):
# Test ifelse expressions in various cases

File diff suppressed because it is too large Load Diff

View File

@ -1,803 +0,0 @@
import ast
import _peg_parser as peg_parser
import unittest
from typing import Any, Union, Iterable, Tuple
from textwrap import dedent
from test import support
TEST_CASES = [
('annotated_assignment', 'x: int = 42'),
('annotated_assignment_with_tuple', 'x: tuple = 1, 2'),
('annotated_assignment_with_parens', '(paren): int = 3+2'),
('annotated_assignment_with_yield', 'x: int = yield 42'),
('annotated_no_assignment', 'x: int'),
('annotation_with_multiple_parens', '((parens)): int'),
('annotation_with_parens', '(parens): int'),
('annotated_assignment_with_attr', 'a.b: int'),
('annotated_assignment_with_subscript', 'a[b]: int'),
('annotated_assignment_with_attr_and_parens', '(a.b): int'),
('annotated_assignment_with_subscript_and_parens', '(a[b]): int'),
('assert', 'assert a'),
('assert_message', 'assert a, b'),
('assignment_false', 'a = False'),
('assignment_none', 'a = None'),
('assignment_true', 'a = True'),
('assignment_paren', '(a) = 42'),
('assignment_paren_multiple', '(a, b) = (0, 1)'),
('asyncfor',
'''
async for i in a:
pass
'''),
('attribute_call', 'a.b()'),
('attribute_multiple_names', 'abcd.efg.hij'),
('attribute_simple', 'a.b'),
('attributes_subscript', 'a.b[0]'),
('augmented_assignment', 'x += 42'),
('augmented_assignment_attribute', 'a.b.c += 42'),
('augmented_assignment_paren', '(x) += 42'),
('augmented_assignment_paren_subscript', '(x[0]) -= 42'),
('binop_add', '1 + 1'),
('binop_add_multiple', '1 + 1 + 1 + 1'),
('binop_all', '1 + 2 * 5 + 3 ** 2 - -3'),
('binop_boolop_comp', '1 + 1 == 2 or 1 + 1 == 3 and not b'),
('boolop_or', 'a or b'),
('boolop_or_multiple', 'a or b or c'),
('class_def_bases',
'''
class C(A, B):
pass
'''),
('class_def_decorators',
'''
@a
class C:
pass
'''),
('class_def_decorator_with_expression',
'''
@lambda x: 42
class C:
pass
'''),
('class_def_decorator_with_expression_and_walrus',
'''
@x:=lambda x: 42
class C:
pass
'''),
('class_def_keywords',
'''
class C(keyword=a+b, **c):
pass
'''),
('class_def_mixed',
'''
class C(A, B, keyword=0, **a):
pass
'''),
('class_def_simple',
'''
class C:
pass
'''),
('class_def_starred_and_kwarg',
'''
class C(A, B, *x, **y):
pass
'''),
('class_def_starred_in_kwargs',
'''
class C(A, x=2, *[B, C], y=3):
pass
'''),
('call_attribute', 'f().b'),
('call_genexp', 'f(i for i in a)'),
('call_mixed_args', 'f(a, b, *c, **d)'),
('call_mixed_args_named', 'f(a, b, *c, d=4, **v)'),
('call_one_arg', 'f(a)'),
('call_posarg_genexp', 'f(a, (i for i in a))'),
('call_simple', 'f()'),
('call_subscript', 'f()[0]'),
('comp', 'a == b'),
('comp_multiple', 'a == b == c'),
('comp_paren_end', 'a == (b-1)'),
('comp_paren_start', '(a-1) == b'),
('decorator',
'''
@a
def f():
pass
'''),
('decorator_async',
'''
@a
async def d():
pass
'''),
('decorator_with_expression',
'''
@lambda x: 42
def f():
pass
'''),
('decorator_with_expression_and_walrus',
'''
@x:=lambda x: 42
def f():
pass
'''),
('del_attribute', 'del a.b'),
('del_call_attribute', 'del a().c'),
('del_call_genexp_attribute', 'del a(i for i in b).c'),
('del_empty', 'del()'),
('del_list', 'del a, [b, c]'),
('del_mixed', 'del a[0].b().c'),
('del_multiple', 'del a, b'),
('del_multiple_calls_attribute', 'del a()().b'),
('del_paren', 'del(a,b)'),
('del_paren_single_target', 'del(a)'),
('del_subscript_attribute', 'del a[0].b'),
('del_tuple', 'del a, (b, c)'),
('delete', 'del a'),
('dict',
'''
{
a: 1,
b: 2,
c: 3
}
'''),
('dict_comp', '{x:1 for x in a}'),
('dict_comp_if', '{x:1+2 for x in a if b}'),
('dict_empty', '{}'),
('empty_line_after_linecont',
r'''
pass
\
pass
'''),
('for',
'''
for i in a:
pass
'''),
('for_else',
'''
for i in a:
pass
else:
pass
'''),
('for_star_target_in_paren', 'for (a) in b: pass'),
('for_star_targets_attribute', 'for a.b in c: pass'),
('for_star_targets_call_attribute', 'for a().c in b: pass'),
('for_star_targets_empty', 'for () in a: pass'),
('for_star_targets_mixed', 'for a[0].b().c in d: pass'),
('for_star_targets_mixed_starred',
'''
for a, *b, (c, d) in e:
pass
'''),
('for_star_targets_multiple', 'for a, b in c: pass'),
('for_star_targets_nested_starred', 'for *[*a] in b: pass'),
('for_star_targets_starred', 'for *a in b: pass'),
('for_star_targets_subscript_attribute', 'for a[0].b in c: pass'),
('for_star_targets_trailing_comma',
'''
for a, (b, c), in d:
pass
'''),
('for_star_targets_tuple', 'for a, (b, c) in d: pass'),
('for_underscore',
'''
for _ in a:
pass
'''),
('function_return_type',
'''
def f() -> Any:
pass
'''),
('f-string_slice', "f'{x[2]}'"),
('f-string_slice_upper', "f'{x[2:3]}'"),
('f-string_slice_step', "f'{x[2:3:-2]}'"),
('f-string_constant', "f'{42}'"),
('f-string_boolop', "f'{x and y}'"),
('f-string_named_expr', "f'{(x:=42)}'"),
('f-string_binop', "f'{x+y}'"),
('f-string_unaryop', "f'{not x}'"),
('f-string_lambda', "f'{(lambda x, /, y, y2=42 , *z, k1, k2=34, **k3: 42)}'"),
('f-string_lambda_call', "f'{(lambda: 2)(2)}'"),
('f-string_ifexpr', "f'{x if y else z}'"),
('f-string_dict', "f'{ {2:34, 3:34} }'"),
('f-string_set', "f'{ {2,-45} }'"),
('f-string_list', "f'{ [2,-45] }'"),
('f-string_tuple', "f'{ (2,-45) }'"),
('f-string_listcomp', "f'{[x for x in y if z]}'"),
('f-string_setcomp', "f'{ {x for x in y if z} }'"),
('f-string_dictcomp', "f'{ {x:x for x in y if z} }'"),
('f-string_genexpr', "f'{ (x for x in y if z) }'"),
('f-string_yield', "f'{ (yield x) }'"),
('f-string_yieldfrom', "f'{ (yield from x) }'"),
('f-string_await', "f'{ await x }'"),
('f-string_compare', "f'{ x == y }'"),
('f-string_call', "f'{ f(x,y,z) }'"),
('f-string_attribute', "f'{ f.x.y.z }'"),
('f-string_starred', "f'{ *x, }'"),
('f-string_doublestarred', "f'{ {**x} }'"),
('f-string_escape_brace', "f'{{Escape'"),
('f-string_escape_closing_brace', "f'Escape}}'"),
('f-string_repr', "f'{a!r}'"),
('f-string_str', "f'{a!s}'"),
('f-string_ascii', "f'{a!a}'"),
('f-string_debug', "f'{a=}'"),
('f-string_padding', "f'{a:03d}'"),
('f-string_multiline',
"""
f'''
{hello}
'''
"""),
('f-string_multiline_in_expr',
"""
f'''
{
hello
}
'''
"""),
('f-string_multiline_in_call',
"""
f'''
{f(
a, b, c
)}
'''
"""),
('global', 'global a, b'),
('group', '(yield a)'),
('if_elif',
'''
if a:
pass
elif b:
pass
'''),
('if_elif_elif',
'''
if a:
pass
elif b:
pass
elif c:
pass
'''),
('if_elif_else',
'''
if a:
pass
elif b:
pass
else:
pass
'''),
('if_else',
'''
if a:
pass
else:
pass
'''),
('if_simple', 'if a: pass'),
('import', 'import a'),
('import_alias', 'import a as b'),
('import_dotted', 'import a.b'),
('import_dotted_alias', 'import a.b as c'),
('import_dotted_multichar', 'import ab.cd'),
('import_from', 'from a import b'),
('import_from_alias', 'from a import b as c'),
('import_from_dotted', 'from a.b import c'),
('import_from_dotted_alias', 'from a.b import c as d'),
('import_from_multiple_aliases', 'from a import b as c, d as e'),
('import_from_one_dot', 'from .a import b'),
('import_from_one_dot_alias', 'from .a import b as c'),
('import_from_star', 'from a import *'),
('import_from_three_dots', 'from ...a import b'),
('import_from_trailing_comma', 'from a import (b,)'),
('kwarg',
'''
def f(**a):
pass
'''),
('kwonly_args',
'''
def f(*, a, b):
pass
'''),
('kwonly_args_with_default',
'''
def f(*, a=2, b):
pass
'''),
('lambda_kwarg', 'lambda **a: 42'),
('lambda_kwonly_args', 'lambda *, a, b: 42'),
('lambda_kwonly_args_with_default', 'lambda *, a=2, b: 42'),
('lambda_mixed_args', 'lambda a, /, b, *, c: 42'),
('lambda_mixed_args_with_default', 'lambda a, b=2, /, c=3, *e, f, **g: 42'),
('lambda_no_args', 'lambda: 42'),
('lambda_pos_args', 'lambda a,b: 42'),
('lambda_pos_args_with_default', 'lambda a, b=2: 42'),
('lambda_pos_only_args', 'lambda a, /: 42'),
('lambda_pos_only_args_with_default', 'lambda a=0, /: 42'),
('lambda_pos_posonly_args', 'lambda a, b, /, c, d: 42'),
('lambda_pos_posonly_args_with_default', 'lambda a, b=0, /, c=2: 42'),
('lambda_vararg', 'lambda *a: 42'),
('lambda_vararg_kwonly_args', 'lambda *a, b: 42'),
('list', '[1, 2, a]'),
('list_comp', '[i for i in a]'),
('list_comp_if', '[i for i in a if b]'),
('list_trailing_comma', '[1+2, a, 3+4,]'),
('mixed_args',
'''
def f(a, /, b, *, c):
pass
'''),
('mixed_args_with_default',
'''
def f(a, b=2, /, c=3, *e, f, **g):
pass
'''),
('multipart_string_bytes', 'b"Hola" b"Hello" b"Bye"'),
('multipart_string_triple', '"""Something here""" "and now"'),
('multipart_string_different_prefixes', 'u"Something" "Other thing" r"last thing"'),
('multiple_assignments', 'x = y = z = 42'),
('multiple_assignments_with_yield', 'x = y = z = yield 42'),
('multiple_pass',
'''
pass; pass
pass
'''),
('namedexpr', '(x := [1, 2, 3])'),
('namedexpr_false', '(x := False)'),
('namedexpr_none', '(x := None)'),
('namedexpr_true', '(x := True)'),
('nonlocal', 'nonlocal a, b'),
('number_complex', '-2.234+1j'),
('number_float', '-34.2333'),
('number_imaginary_literal', '1.1234j'),
('number_integer', '-234'),
('number_underscores', '1_234_567'),
('pass', 'pass'),
('pos_args',
'''
def f(a, b):
pass
'''),
('pos_args_with_default',
'''
def f(a, b=2):
pass
'''),
('pos_only_args',
'''
def f(a, /):
pass
'''),
('pos_only_args_with_default',
'''
def f(a=0, /):
pass
'''),
('pos_posonly_args',
'''
def f(a, b, /, c, d):
pass
'''),
('pos_posonly_args_with_default',
'''
def f(a, b=0, /, c=2):
pass
'''),
('primary_mixed', 'a.b.c().d[0]'),
('raise', 'raise'),
('raise_ellipsis', 'raise ...'),
('raise_expr', 'raise a'),
('raise_from', 'raise a from b'),
('return', 'return'),
('return_expr', 'return a'),
('set', '{1, 2+4, 3+5}'),
('set_comp', '{i for i in a}'),
('set_trailing_comma', '{1, 2, 3,}'),
('simple_assignment', 'x = 42'),
('simple_assignment_with_yield', 'x = yield 42'),
('string_bytes', 'b"hello"'),
('string_concatenation_bytes', 'b"hello" b"world"'),
('string_concatenation_simple', '"abcd" "efgh"'),
('string_format_simple', 'f"hello"'),
('string_format_with_formatted_value', 'f"hello {world}"'),
('string_simple', '"hello"'),
('string_unicode', 'u"hello"'),
('subscript_attribute', 'a[0].b'),
('subscript_call', 'a[b]()'),
('subscript_multiple_slices', 'a[0:a:2, 1]'),
('subscript_simple', 'a[0]'),
('subscript_single_element_tuple', 'a[0,]'),
('subscript_trailing_comma', 'a[0, 1, 2,]'),
('subscript_tuple', 'a[0, 1, 2]'),
('subscript_whole_slice', 'a[0+1:b:c]'),
('try_except',
'''
try:
pass
except:
pass
'''),
('try_except_else',
'''
try:
pass
except:
pass
else:
pass
'''),
('try_except_else_finally',
'''
try:
pass
except:
pass
else:
pass
finally:
pass
'''),
('try_except_expr',
'''
try:
pass
except a:
pass
'''),
('try_except_expr_target',
'''
try:
pass
except a as b:
pass
'''),
('try_except_finally',
'''
try:
pass
except:
pass
finally:
pass
'''),
('try_finally',
'''
try:
pass
finally:
pass
'''),
('unpacking_binop', '[*([1, 2, 3] + [3, 4, 5])]'),
('unpacking_call', '[*b()]'),
('unpacking_compare', '[*(x < y)]'),
('unpacking_constant', '[*3]'),
('unpacking_dict', '[*{1: 2, 3: 4}]'),
('unpacking_dict_comprehension', '[*{x:y for x,y in z}]'),
('unpacking_ifexpr', '[*([1, 2, 3] if x else y)]'),
('unpacking_list', '[*[1,2,3]]'),
('unpacking_list_comprehension', '[*[x for x in y]]'),
('unpacking_namedexpr', '[*(x:=[1, 2, 3])]'),
('unpacking_set', '[*{1,2,3}]'),
('unpacking_set_comprehension', '[*{x for x in y}]'),
('unpacking_string', '[*"myvalue"]'),
('unpacking_tuple', '[*(1,2,3)]'),
('unpacking_unaryop', '[*(not [1, 2, 3])]'),
('unpacking_yield', '[*(yield 42)]'),
('unpacking_yieldfrom', '[*(yield from x)]'),
('tuple', '(1, 2, 3)'),
('vararg',
'''
def f(*a):
pass
'''),
('vararg_kwonly_args',
'''
def f(*a, b):
pass
'''),
('while',
'''
while a:
pass
'''),
('while_else',
'''
while a:
pass
else:
pass
'''),
('with',
'''
with a:
pass
'''),
('with_as',
'''
with a as b:
pass
'''),
('with_as_paren',
'''
with a as (b):
pass
'''),
('with_as_empty', 'with a as (): pass'),
('with_list_recursive',
'''
with a as [x, [y, z]]:
pass
'''),
('with_tuple_recursive',
'''
with a as ((x, y), z):
pass
'''),
('with_tuple_target',
'''
with a as (x, y):
pass
'''),
('with_list_target',
'''
with a as [x, y]:
pass
'''),
('yield', 'yield'),
('yield_expr', 'yield a'),
('yield_from', 'yield from a'),
]
FAIL_TEST_CASES = [
("annotation_multiple_targets", "(a, b): int = 42"),
("annotation_nested_tuple", "((a, b)): int"),
("annotation_list", "[a]: int"),
("annotation_lambda", "lambda: int = 42"),
("annotation_tuple", "(a,): int"),
("annotation_tuple_without_paren", "a,: int"),
("assignment_keyword", "a = if"),
("augmented_assignment_list", "[a, b] += 1"),
("augmented_assignment_tuple", "a, b += 1"),
("augmented_assignment_tuple_paren", "(a, b) += (1, 2)"),
("comprehension_lambda", "(a for a in lambda: b)"),
("comprehension_else", "(a for a in b if c else d"),
("del_call", "del a()"),
("del_call_genexp", "del a(i for i in b)"),
("del_subscript_call", "del a[b]()"),
("del_attribute_call", "del a.b()"),
("del_mixed_call", "del a[0].b().c.d()"),
("for_star_targets_call", "for a() in b: pass"),
("for_star_targets_subscript_call", "for a[b]() in c: pass"),
("for_star_targets_attribute_call", "for a.b() in c: pass"),
("for_star_targets_mixed_call", "for a[0].b().c.d() in e: pass"),
("for_star_targets_in", "for a, in in b: pass"),
("f-string_assignment", "f'{x = 42}'"),
("f-string_empty", "f'{}'"),
("f-string_function_def", "f'{def f(): pass}'"),
("f-string_lambda", "f'{lambda x: 42}'"),
("f-string_singe_brace", "f'{'"),
("f-string_single_closing_brace", "f'}'"),
("from_import_invalid", "from import import a"),
("from_import_trailing_comma", "from a import b,"),
("import_non_ascii_syntax_error", "import ä £"),
# This test case checks error paths involving tokens with uninitialized
# values of col_offset and end_col_offset.
("invalid indentation",
"""
def f():
a
a
"""),
("not_terminated_string", "a = 'example"),
("try_except_attribute_target",
"""
try:
pass
except Exception as a.b:
pass
"""),
("try_except_subscript_target",
"""
try:
pass
except Exception as a[0]:
pass
"""),
]
FAIL_SPECIALIZED_MESSAGE_CASES = [
("f(x, y, z=1, **b, *a", "iterable argument unpacking follows keyword argument unpacking"),
("f(x, y=1, *z, **a, b", "positional argument follows keyword argument unpacking"),
("f(x, y, z=1, a=2, b", "positional argument follows keyword argument"),
("True = 1", "cannot assign to True"),
("a() = 1", "cannot assign to function call"),
("(a, b): int", "only single target (not tuple) can be annotated"),
("[a, b]: int", "only single target (not list) can be annotated"),
("a(): int", "illegal target for annotation"),
("1 += 1", "'literal' is an illegal expression for augmented assignment"),
("pass\n pass", "unexpected indent"),
("def f():\npass", "expected an indented block"),
("def f(*): pass", "named arguments must follow bare *"),
("def f(*,): pass", "named arguments must follow bare *"),
("def f(*, **a): pass", "named arguments must follow bare *"),
("lambda *: pass", "named arguments must follow bare *"),
("lambda *,: pass", "named arguments must follow bare *"),
("lambda *, **a: pass", "named arguments must follow bare *"),
("f(g()=2", "expression cannot contain assignment, perhaps you meant \"==\"?"),
("f(a, b, *c, d.e=2", "expression cannot contain assignment, perhaps you meant \"==\"?"),
("f(*a, **b, c=0, d[1]=3)", "expression cannot contain assignment, perhaps you meant \"==\"?"),
]
GOOD_BUT_FAIL_TEST_CASES = [
('string_concatenation_format', 'f"{hello} world" f"again {and_again}"'),
('string_concatenation_multiple',
'''
f"hello" f"{world} again" f"and_again"
'''),
('f-string_multiline_comp',
"""
f'''
{(i for i in a
if b)}
'''
"""),
]
FSTRINGS_TRACEBACKS = {
'multiline_fstrings_same_line_with_brace': (
"""
f'''
{a$b}
'''
""",
'(a$b)',
),
'multiline_fstring_brace_on_next_line': (
"""
f'''
{a$b
}'''
""",
'(a$b',
),
'multiline_fstring_brace_on_previous_line': (
"""
f'''
{
a$b}'''
""",
'a$b)',
),
}
EXPRESSIONS_TEST_CASES = [
("expression_add", "1+1"),
("expression_add_2", "a+b"),
("expression_call", "f(a, b=2, **kw)"),
("expression_tuple", "1, 2, 3"),
("expression_tuple_one_value", "1,")
]
def cleanup_source(source: Any) -> str:
if isinstance(source, str):
result = dedent(source)
elif not isinstance(source, (list, tuple)):
result = "\n".join(source)
else:
raise TypeError(f"Invalid type for test source: {source}")
return result
def prepare_test_cases(
test_cases: Iterable[Tuple[str, Union[str, Iterable[str]]]]
) -> Tuple[Iterable[str], Iterable[str]]:
test_ids, _test_sources = zip(*test_cases)
test_sources = list(_test_sources)
for index, source in enumerate(test_sources):
result = cleanup_source(source)
test_sources[index] = result
return test_ids, test_sources
TEST_IDS, TEST_SOURCES = prepare_test_cases(TEST_CASES)
GOOD_BUT_FAIL_TEST_IDS, GOOD_BUT_FAIL_SOURCES = prepare_test_cases(
GOOD_BUT_FAIL_TEST_CASES
)
FAIL_TEST_IDS, FAIL_SOURCES = prepare_test_cases(FAIL_TEST_CASES)
EXPRESSIONS_TEST_IDS, EXPRESSIONS_TEST_SOURCES = prepare_test_cases(
EXPRESSIONS_TEST_CASES
)
class ASTGenerationTest(unittest.TestCase):
def test_correct_ast_generation_on_source_files(self) -> None:
self.maxDiff = None
for source in TEST_SOURCES:
actual_ast = peg_parser.parse_string(source)
expected_ast = peg_parser.parse_string(source, oldparser=True)
self.assertEqual(
ast.dump(actual_ast, include_attributes=True),
ast.dump(expected_ast, include_attributes=True),
f"Wrong AST generation for source: {source}",
)
def test_incorrect_ast_generation_on_source_files(self) -> None:
for source in FAIL_SOURCES:
with self.assertRaises(SyntaxError, msg=f"Parsing {source} did not raise an exception"):
peg_parser.parse_string(source)
def test_incorrect_ast_generation_with_specialized_errors(self) -> None:
for source, error_text in FAIL_SPECIALIZED_MESSAGE_CASES:
exc = IndentationError if "indent" in error_text else SyntaxError
with self.assertRaises(exc) as se:
peg_parser.parse_string(source)
self.assertTrue(
error_text in se.exception.msg,
f"Actual error message does not match expexted for {source}"
)
@unittest.expectedFailure
def test_correct_but_known_to_fail_ast_generation_on_source_files(self) -> None:
for source in GOOD_BUT_FAIL_SOURCES:
actual_ast = peg_parser.parse_string(source)
expected_ast = peg_parser.parse_string(source, oldparser=True)
self.assertEqual(
ast.dump(actual_ast, include_attributes=True),
ast.dump(expected_ast, include_attributes=True),
f"Wrong AST generation for source: {source}",
)
def test_correct_ast_generation_without_pos_info(self) -> None:
for source in GOOD_BUT_FAIL_SOURCES:
actual_ast = peg_parser.parse_string(source)
expected_ast = peg_parser.parse_string(source, oldparser=True)
self.assertEqual(
ast.dump(actual_ast),
ast.dump(expected_ast),
f"Wrong AST generation for source: {source}",
)
def test_fstring_parse_error_tracebacks(self) -> None:
for source, error_text in FSTRINGS_TRACEBACKS.values():
with self.assertRaises(SyntaxError) as se:
peg_parser.parse_string(dedent(source))
self.assertEqual(error_text, se.exception.text)
def test_correct_ast_generatrion_eval(self) -> None:
for source in EXPRESSIONS_TEST_SOURCES:
actual_ast = peg_parser.parse_string(source, mode='eval')
expected_ast = peg_parser.parse_string(source, mode='eval', oldparser=True)
self.assertEqual(
ast.dump(actual_ast, include_attributes=True),
ast.dump(expected_ast, include_attributes=True),
f"Wrong AST generation for source: {source}",
)
def test_tokenizer_errors_are_propagated(self) -> None:
n=201
with self.assertRaisesRegex(SyntaxError, "too many nested parentheses"):
peg_parser.parse_string(n*'(' + ')'*n)

View File

@ -33,7 +33,6 @@ import shutil
import tempfile
import unittest
import warnings
from test.support import use_old_parser
TEMPLATE = r"""# coding: %s
@ -168,8 +167,7 @@ class TestLiterals(unittest.TestCase):
eval("b'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].filename, '<string>')
if use_old_parser():
self.assertEqual(w[0].lineno, 1)
self.assertEqual(w[0].lineno, 1)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=DeprecationWarning)
@ -178,8 +176,7 @@ class TestLiterals(unittest.TestCase):
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.filename, '<string>')
if use_old_parser():
self.assertEqual(exc.lineno, 1)
self.assertEqual(exc.lineno, 1)
def test_eval_bytes_raw(self):
self.assertEqual(eval(""" br'x' """), b'x')

View File

@ -733,7 +733,6 @@ class SyntaxTestCase(unittest.TestCase):
def test_assign_call(self):
self._check_error("f() = 1", "assign")
@unittest.skipIf(support.use_old_parser(), "The old parser cannot generate these error messages")
def test_assign_del(self):
self._check_error("del (,)", "invalid syntax")
self._check_error("del 1", "delete literal")

View File

@ -655,7 +655,6 @@ class BaseExceptionReportingTests:
self.assertIn('inner_raise() # Marker', blocks[2])
self.check_zero_div(blocks[2])
@unittest.skipIf(support.use_old_parser(), "Pegen is arguably better here, so no need to fix this")
def test_syntax_error_offset_at_eol(self):
# See #10186.
def e():

View File

@ -300,32 +300,24 @@ LIBFFI_INCLUDEDIR= @LIBFFI_INCLUDEDIR@
# Parser
PEGEN_OBJS= \
Parser/pegen/pegen.o \
Parser/pegen/parse.o \
Parser/pegen/parse_string.o \
Parser/pegen/peg_api.o
Parser/pegen.o \
Parser/parser.o \
Parser/string_parser.o \
Parser/peg_api.o
PEGEN_HEADERS= \
$(srcdir)/Include/internal/pegen_interface.h \
$(srcdir)/Parser/pegen/pegen.h \
$(srcdir)/Parser/pegen/parse_string.h
$(srcdir)/Parser/pegen.h \
$(srcdir)/Parser/string_parser.h
POBJS= \
Parser/acceler.o \
Parser/grammar1.o \
Parser/listnode.o \
Parser/node.o \
Parser/parser.o \
Parser/token.o \
PARSER_OBJS= $(POBJS) $(PEGEN_OBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o
PARSER_OBJS= $(POBJS) $(PEGEN_OBJS) Parser/myreadline.o Parser/tokenizer.o
PARSER_HEADERS= \
$(PEGEN_HEADERS) \
$(srcdir)/Include/grammar.h \
$(srcdir)/Include/parsetok.h \
$(srcdir)/Parser/parser.h \
$(srcdir)/Parser/tokenizer.h
##########################################################################
@ -568,7 +560,7 @@ coverage-lcov:
@echo
# Force regeneration of parser and importlib
coverage-report: regen-grammar regen-token regen-importlib
coverage-report: regen-token regen-importlib
@ # build with coverage info
$(MAKE) coverage
@ # run tests, ignore failures
@ -749,8 +741,8 @@ regen-importlib: Programs/_freeze_importlib
############################################################################
# Regenerate all generated files
regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar \
regen-token regen-keyword regen-symbol regen-ast regen-importlib clinic \
regen-all: regen-opcode regen-opcode-targets regen-typeslots \
regen-token regen-symbol regen-ast regen-importlib clinic \
regen-pegen-metaparser regen-pegen
############################################################################
@ -816,18 +808,6 @@ Python/initconfig.o: $(srcdir)/Python/initconfig.c
$(IO_OBJS): $(IO_H)
.PHONY: regen-grammar
regen-grammar: regen-token
# Regenerate Include/graminit.h and Python/graminit.c
# from Grammar/Grammar using pgen
@$(MKDIR_P) Include
PYTHONPATH=$(srcdir) $(PYTHON_FOR_REGEN) -m Parser.pgen $(srcdir)/Grammar/Grammar \
$(srcdir)/Grammar/Tokens \
$(srcdir)/Include/graminit.h.new \
$(srcdir)/Python/graminit.c.new
$(UPDATE_FILE) $(srcdir)/Include/graminit.h $(srcdir)/Include/graminit.h.new
$(UPDATE_FILE) $(srcdir)/Python/graminit.c $(srcdir)/Python/graminit.c.new
.PHONY: regen-pegen-metaparser
regen-pegen-metaparser:
@$(MKDIR_P) $(srcdir)/Tools/peg_generator/pegen
@ -839,12 +819,12 @@ regen-pegen-metaparser:
.PHONY: regen-pegen
regen-pegen:
@$(MKDIR_P) $(srcdir)/Parser/pegen
@$(MKDIR_P) $(srcdir)/Parser
PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -q c \
$(srcdir)/Grammar/python.gram \
$(srcdir)/Grammar/Tokens \
-o $(srcdir)/Parser/pegen/parse.new.c
$(UPDATE_FILE) $(srcdir)/Parser/pegen/parse.c $(srcdir)/Parser/pegen/parse.new.c
-o $(srcdir)/Parser/parser.new.c
$(UPDATE_FILE) $(srcdir)/Parser/parser.c $(srcdir)/Parser/parser.new.c
.PHONY=regen-ast
regen-ast:

View File

@ -0,0 +1 @@
Remove the old parser, the :mod:`parser` module and all associated support code, command-line options and environment variables. Patch by Pablo Galindo.

View File

@ -134,9 +134,6 @@ faulthandler faulthandler.c
# can call _PyTraceMalloc_NewReference().
_tracemalloc _tracemalloc.c
# PEG-based parser module -- slated to be *the* parser
_peg_parser _peg_parser.c
# The rest of the modules listed in this file are all commented out by
# default. Usually they can be detected and built as dynamically
# loaded modules by the new setup.py script added in Python 2.1. If
@ -331,10 +328,6 @@ _symtable symtablemodule.c
# Helper module for various ascii-encoders
#binascii binascii.c
# Fred Drake's interface to the Python parser
#parser parsermodule.c
# Andrew Kuchling's zlib module.
# This require zlib 1.1.3 (or later).
# See http://www.gzip.org/zlib/

View File

@ -1,153 +0,0 @@
#include <Python.h>
#include "pegen_interface.h"
static int
_mode_str_to_int(char *mode_str)
{
int mode;
if (strcmp(mode_str, "exec") == 0) {
mode = Py_file_input;
}
else if (strcmp(mode_str, "eval") == 0) {
mode = Py_eval_input;
}
else if (strcmp(mode_str, "single") == 0) {
mode = Py_single_input;
}
else {
mode = -1;
}
return mode;
}
static mod_ty
_run_parser(char *str, char *filename, int mode, PyCompilerFlags *flags, PyArena *arena, int oldparser)
{
mod_ty mod;
if (!oldparser) {
mod = PyPegen_ASTFromString(str, filename, mode, flags, arena);
}
else {
mod = PyParser_ASTFromString(str, filename, mode, flags, arena);
}
return mod;
}
PyObject *
_Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds)
{
static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
char *the_string;
char *filename = "<string>";
char *mode_str = "exec";
int oldparser = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
&the_string, &filename, &mode_str, &oldparser)) {
return NULL;
}
int mode = _mode_str_to_int(mode_str);
if (mode == -1) {
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
}
PyCompilerFlags flags = _PyCompilerFlags_INIT;
flags.cf_flags = PyCF_IGNORE_COOKIE;
PyArena *arena = PyArena_New();
if (arena == NULL) {
return NULL;
}
mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser);
if (mod == NULL) {
PyArena_Free(arena);
return NULL;
}
PyObject *filename_ob = PyUnicode_DecodeFSDefault(filename);
if (filename_ob == NULL) {
PyArena_Free(arena);
return NULL;
}
PyCodeObject *result = PyAST_CompileObject(mod, filename_ob, &flags, -1, arena);
Py_XDECREF(filename_ob);
PyArena_Free(arena);
return (PyObject *)result;
}
PyObject *
_Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
{
static char *keywords[] = {"string", "filename", "mode", "oldparser", "ast", NULL};
char *the_string;
char *filename = "<string>";
char *mode_str = "exec";
int oldparser = 0;
int ast = 1;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sspp", keywords,
&the_string, &filename, &mode_str, &oldparser, &ast)) {
return NULL;
}
int mode = _mode_str_to_int(mode_str);
if (mode == -1) {
return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'");
}
PyCompilerFlags flags = _PyCompilerFlags_INIT;
flags.cf_flags = PyCF_IGNORE_COOKIE;
PyArena *arena = PyArena_New();
if (arena == NULL) {
return NULL;
}
mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser);
if (mod == NULL) {
PyArena_Free(arena);
return NULL;
}
PyObject *result;
if (ast) {
result = PyAST_mod2obj(mod);
}
else {
Py_INCREF(Py_None);
result = Py_None;
}
PyArena_Free(arena);
return result;
}
static PyMethodDef ParseMethods[] = {
{
"parse_string",
(PyCFunction)(void (*)(void))_Py_parse_string,
METH_VARARGS|METH_KEYWORDS,
"Parse a string, return an AST."
},
{
"compile_string",
(PyCFunction)(void (*)(void))_Py_compile_string,
METH_VARARGS|METH_KEYWORDS,
"Compile a string, return a code object."
},
{NULL, NULL, 0, NULL} /* Sentinel */
};
static struct PyModuleDef parsemodule = {
PyModuleDef_HEAD_INIT,
.m_name = "peg_parser",
.m_doc = "A parser.",
.m_methods = ParseMethods,
};
PyMODINIT_FUNC
PyInit__peg_parser(void)
{
return PyModule_Create(&parsemodule);
}

File diff suppressed because it is too large Load Diff

View File

@ -45,7 +45,6 @@ extern PyObject* PyInit__symtable(void);
extern PyObject* PyInit_mmap(void);
extern PyObject* PyInit__csv(void);
extern PyObject* PyInit__sre(void);
extern PyObject* PyInit_parser(void);
extern PyObject* PyInit_winreg(void);
extern PyObject* PyInit__struct(void);
extern PyObject* PyInit__datetime(void);
@ -75,7 +74,6 @@ extern PyObject* PyInit__opcode(void);
extern PyObject* PyInit__contextvars(void);
extern PyObject* PyInit__peg_parser(void);
/* tools/freeze/makeconfig.py marker for additional "extern" */
/* -- ADDMODULE MARKER 1 -- */
@ -127,7 +125,6 @@ struct _inittab _PyImport_Inittab[] = {
{"mmap", PyInit_mmap},
{"_csv", PyInit__csv},
{"_sre", PyInit__sre},
{"parser", PyInit_parser},
{"winreg", PyInit_winreg},
{"_struct", PyInit__struct},
{"_datetime", PyInit__datetime},
@ -171,7 +168,6 @@ struct _inittab _PyImport_Inittab[] = {
{"_opcode", PyInit__opcode},
{"_contextvars", PyInit__contextvars},
{"_peg_parser", PyInit__peg_parser},
/* Sentinel */
{0, 0}

View File

@ -279,10 +279,9 @@
<ClInclude Include="..\Objects\stringlib\replace.h" />
<ClInclude Include="..\Objects\stringlib\split.h" />
<ClInclude Include="..\Objects\unicodetype_db.h" />
<ClInclude Include="..\Parser\parser.h" />
<ClInclude Include="..\Parser\tokenizer.h" />
<ClInclude Include="..\Parser\pegen\parse_string.h" />
<ClInclude Include="..\Parser\pegen\pegen.h" />
<ClInclude Include="..\Parser\string_parser.h" />
<ClInclude Include="..\Parser\pegen.h" />
<ClInclude Include="..\PC\errmap.h" />
<ClInclude Include="..\PC\pyconfig.h" />
<ClInclude Include="..\Python\ceval_gil.h" />
@ -343,8 +342,6 @@
<ClCompile Include="..\Modules\mmapmodule.c" />
<ClCompile Include="..\Modules\_opcode.c" />
<ClCompile Include="..\Modules\_operator.c" />
<ClCompile Include="..\Modules\parsermodule.c" />
<ClCompile Include="..\Modules\_peg_parser.c" />
<ClCompile Include="..\Modules\posixmodule.c" />
<ClCompile Include="..\Modules\rotatingtree.c" />
<ClCompile Include="..\Modules\sha1module.c" />
@ -417,19 +414,13 @@
<ClCompile Include="..\Objects\unicodectype.c" />
<ClCompile Include="..\Objects\unicodeobject.c" />
<ClCompile Include="..\Objects\weakrefobject.c" />
<ClCompile Include="..\Parser\acceler.c" />
<ClCompile Include="..\Parser\grammar1.c" />
<ClCompile Include="..\Parser\listnode.c" />
<ClCompile Include="..\Parser\myreadline.c" />
<ClCompile Include="..\Parser\node.c" />
<ClCompile Include="..\Parser\parser.c" />
<ClCompile Include="..\Parser\parsetok.c" />
<ClCompile Include="..\Parser\tokenizer.c" />
<ClCompile Include="..\Parser\token.c" />
<ClCompile Include="..\Parser\pegen\pegen.c" />
<ClCompile Include="..\Parser\pegen\parse.c" />
<ClCompile Include="..\Parser\pegen\parse_string.c" />
<ClCompile Include="..\Parser\pegen\peg_api.c" />
<ClCompile Include="..\Parser\pegen.c" />
<ClCompile Include="..\Parser\parser.c" />
<ClCompile Include="..\Parser\string_parser.c" />
<ClCompile Include="..\Parser\peg_api.c" />
<ClCompile Include="..\PC\invalid_parameter_handler.c" />
<ClCompile Include="..\PC\winreg.c" />
<ClCompile Include="..\PC\config.c" />

View File

@ -522,9 +522,6 @@
<ClInclude Include="..\Objects\unicodetype_db.h">
<Filter>Objects</Filter>
</ClInclude>
<ClInclude Include="..\Parser\parser.h">
<Filter>Parser</Filter>
</ClInclude>
<ClInclude Include="..\Parser\tokenizer.h">
<Filter>Parser</Filter>
</ClInclude>
@ -908,12 +905,6 @@
<ClCompile Include="..\Objects\weakrefobject.c">
<Filter>Objects</Filter>
</ClCompile>
<ClCompile Include="..\Parser\acceler.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\grammar1.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\pegen\pegen.c">
<Filter>Parser</Filter>
</ClCompile>
@ -926,21 +917,9 @@
<ClCompile Include="..\Parser\pegen\peg_api.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\listnode.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\myreadline.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\node.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\parser.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\parsetok.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\tokenizer.c">
<Filter>Parser</Filter>
</ClCompile>

View File

@ -155,22 +155,11 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
<Target Name="_RegenGrammar" BeforeTargets="Build">
<!-- Regenerate Include/graminit.h and Python/graminit.c from Grammar/Grammar using pgen-->
<Exec Command="&quot;$(PythonExe)&quot; -m Parser.pgen &quot;$(PySourcePath)Grammar\Grammar&quot; &quot;$(PySourcePath)Grammar\Tokens&quot; &quot;$(IntDir)graminit.h&quot; &quot;$(IntDir)graminit.c&quot;" />
<Copy SourceFiles="$(IntDir)graminit.h" DestinationFiles="$(PySourcePath)Include\graminit.h">
<Output TaskParameter="CopiedFiles" ItemName="_UpdatedH" />
</Copy>
<Copy SourceFiles="$(IntDir)graminit.c" DestinationFiles="$(PySourcePath)Python\graminit.c">
<Output TaskParameter="CopiedFiles" ItemName="_UpdatedC" />
</Copy>
<Warning Text="Grammar updated. You will need to rebuild pythoncore to see the changes." Condition="'@(_UpdatedH)' != '' and '@(_UpdatedC)' != ''" />
</Target>
<Target Name="_RegenPegen" BeforeTargets="Build">
<!-- Regenerate Parser/pegen/parse.c -->
<!-- Regenerate Parser/parser.c -->
<SetEnv Name="PYTHONPATH" Prefix="true" Value="$(PySourcePath)Tools\peg_generator\" />
<Exec Command="&quot;$(PythonExe)&quot; -m pegen -q c &quot;$(PySourcePath)Grammar\python.gram&quot; &quot;$(PySourcePath)Grammar\Tokens&quot; -o &quot;$(IntDir)parse.c&quot;" />
<Copy SourceFiles="$(IntDir)parse.c" DestinationFiles="$(PySourcePath)Parser\pegen\parse.c">
<Exec Command="&quot;$(PythonExe)&quot; -m pegen -q c &quot;$(PySourcePath)Grammar\python.gram&quot; &quot;$(PySourcePath)Grammar\Tokens&quot; -o &quot;$(IntDir)parser.c&quot;" />
<Copy SourceFiles="$(IntDir)parser.c" DestinationFiles="$(PySourcePath)Parser\parser.c">
<Output TaskParameter="CopiedFiles" ItemName="_UpdatedParse" />
</Copy>
<Warning Text="Pegen updated. You will need to rebuild pythoncore to see the changes." Condition="'@(_UpdatedParse)' != ''" />

View File

@ -1,123 +0,0 @@
/* Parser accelerator module */
/* The parser as originally conceived had disappointing performance.
This module does some precomputation that speeds up the selection
of a DFA based upon a token, turning a search through an array
into a simple indexing operation. The parser now cannot work
without the accelerators installed. Note that the accelerators
are installed dynamically when the parser is initialized, they
are not part of the static data structure written on graminit.[ch]
by the parser generator. */
#include "Python.h"
#include "grammar.h"
#include "node.h"
#include "token.h"
#include "parser.h"
/* Forward references */
static void fixdfa(grammar *, const dfa *);
static void fixstate(grammar *, state *);
void
PyGrammar_AddAccelerators(grammar *g)
{
int i;
const dfa *d = g->g_dfa;
for (i = g->g_ndfas; --i >= 0; d++)
fixdfa(g, d);
g->g_accel = 1;
}
void
PyGrammar_RemoveAccelerators(grammar *g)
{
int i;
g->g_accel = 0;
const dfa *d = g->g_dfa;
for (i = g->g_ndfas; --i >= 0; d++) {
state *s;
int j;
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++) {
if (s->s_accel)
PyObject_FREE(s->s_accel);
s->s_accel = NULL;
}
}
}
static void
fixdfa(grammar *g, const dfa *d)
{
state *s;
int j;
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++)
fixstate(g, s);
}
static void
fixstate(grammar *g, state *s)
{
const arc *a;
int k;
int *accel;
int nl = g->g_ll.ll_nlabels;
s->s_accept = 0;
accel = (int *) PyObject_MALLOC(nl * sizeof(int));
if (accel == NULL) {
fprintf(stderr, "no mem to build parser accelerators\n");
exit(1);
}
for (k = 0; k < nl; k++)
accel[k] = -1;
a = s->s_arc;
for (k = s->s_narcs; --k >= 0; a++) {
int lbl = a->a_lbl;
const label *l = &g->g_ll.ll_label[lbl];
int type = l->lb_type;
if (a->a_arrow >= (1 << 7)) {
printf("XXX too many states!\n");
continue;
}
if (ISNONTERMINAL(type)) {
const dfa *d1 = PyGrammar_FindDFA(g, type);
int ibit;
if (type - NT_OFFSET >= (1 << 7)) {
printf("XXX too high nonterminal number!\n");
continue;
}
for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {
if (testbit(d1->d_first, ibit)) {
if (accel[ibit] != -1)
printf("XXX ambiguity!\n");
accel[ibit] = a->a_arrow | (1 << 7) |
((type - NT_OFFSET) << 8);
}
}
}
else if (lbl == EMPTY)
s->s_accept = 1;
else if (lbl >= 0 && lbl < nl)
accel[lbl] = a->a_arrow;
}
while (nl > 0 && accel[nl-1] == -1)
nl--;
for (k = 0; k < nl && accel[k] == -1;)
k++;
if (k < nl) {
int i;
s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int));
if (s->s_accel == NULL) {
fprintf(stderr, "no mem to add parser accelerators\n");
exit(1);
}
s->s_lower = k;
s->s_upper = nl;
for (i = 0; k < nl; i++, k++)
s->s_accel[i] = accel[k];
}
PyObject_FREE(accel);
}

View File

@ -1,47 +0,0 @@
/* Grammar subroutines needed by parser */
#include "Python.h"
#include "grammar.h"
#include "token.h"
/* Return the DFA for the given type */
const dfa *
PyGrammar_FindDFA(grammar *g, int type)
{
/* Massive speed-up */
const dfa *d = &g->g_dfa[type - NT_OFFSET];
assert(d->d_type == type);
return d;
}
const char *
PyGrammar_LabelRepr(label *lb)
{
static char buf[100];
if (lb->lb_type == ENDMARKER)
return "EMPTY";
else if (ISNONTERMINAL(lb->lb_type)) {
if (lb->lb_str == NULL) {
PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type);
return buf;
}
else
return lb->lb_str;
}
else if (lb->lb_type < N_TOKENS) {
if (lb->lb_str == NULL)
return _PyParser_TokenNames[lb->lb_type];
else {
PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)",
_PyParser_TokenNames[lb->lb_type], lb->lb_str);
return buf;
}
}
else {
Py_FatalError("invalid grammar label");
return NULL;
}
}

View File

@ -1,71 +0,0 @@
/* List a node on a file */
#include "Python.h"
#include "pycore_interp.h" // PyInterpreterState.parser
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "token.h"
#include "node.h"
/* Forward */
static void list1node(FILE *, node *);
static void listnode(FILE *, node *);
void
PyNode_ListTree(node *n)
{
listnode(stdout, n);
}
static void
listnode(FILE *fp, node *n)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
interp->parser.listnode.level = 0;
interp->parser.listnode.atbol = 1;
list1node(fp, n);
}
static void
list1node(FILE *fp, node *n)
{
if (n == NULL)
return;
if (ISNONTERMINAL(TYPE(n))) {
int i;
for (i = 0; i < NCH(n); i++)
list1node(fp, CHILD(n, i));
}
else if (ISTERMINAL(TYPE(n))) {
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _Py_parser_state *parser = &interp->parser;
switch (TYPE(n)) {
case INDENT:
parser->listnode.level++;
break;
case DEDENT:
parser->listnode.level--;
break;
default:
if (parser->listnode.atbol) {
int i;
for (i = 0; i < parser->listnode.level; ++i) {
fprintf(fp, "\t");
}
parser->listnode.atbol = 0;
}
if (TYPE(n) == NEWLINE) {
if (STR(n) != NULL)
fprintf(fp, "%s", STR(n));
fprintf(fp, "\n");
parser->listnode.atbol = 1;
}
else
fprintf(fp, "%s ", STR(n));
break;
}
}
else
fprintf(fp, "? ");
}

File diff suppressed because it is too large Load Diff

View File

@ -1,49 +0,0 @@
#ifndef Py_PARSER_H
#define Py_PARSER_H
#ifdef __cplusplus
extern "C" {
#endif
/* Parser interface */
#define MAXSTACK 1700
typedef struct {
int s_state; /* State in current DFA */
const dfa *s_dfa; /* Current DFA */
struct _node *s_parent; /* Where to add next node */
} stackentry;
typedef struct {
stackentry *s_top; /* Top entry */
stackentry s_base[MAXSTACK];/* Array of stack entries */
/* NB The stack grows down */
} stack;
typedef struct {
stack p_stack; /* Stack of parser states */
grammar *p_grammar; /* Grammar to use */
node *p_tree; /* Top of parse tree */
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
unsigned long p_flags; /* see co_flags in Include/code.h */
#endif
} parser_state;
parser_state *PyParser_New(grammar *g, int start);
void PyParser_Delete(parser_state *ps);
int PyParser_AddToken(parser_state *ps, int type, char *str,
int lineno, int col_offset,
int end_lineno, int end_col_offset,
int *expected_ret);
void PyGrammar_AddAccelerators(grammar *g);
#define showtree _Py_showtree
#define printtree _Py_printtree
#define dumptree _Py_dumptree
#ifdef __cplusplus
}
#endif
#endif /* !Py_PARSER_H */

View File

@ -1,486 +0,0 @@
/* Parser-tokenizer link implementation */
#include "Python.h"
#include "tokenizer.h"
#include "node.h"
#include "grammar.h"
#include "parser.h"
#include "parsetok.h"
#include "errcode.h"
#include "graminit.h"
/* Forward */
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
static int initerr(perrdetail *err_ret, PyObject * filename);
typedef struct {
struct {
int lineno;
char *comment;
} *items;
size_t size;
size_t num_items;
} growable_comment_array;
static int
growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
assert(initial_size > 0);
arr->items = malloc(initial_size * sizeof(*arr->items));
arr->size = initial_size;
arr->num_items = 0;
return arr->items != NULL;
}
static int
growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
if (arr->num_items >= arr->size) {
size_t new_size = arr->size * 2;
void *new_items_array = realloc(arr->items, new_size * sizeof(*arr->items));
if (!new_items_array) {
return 0;
}
arr->items = new_items_array;
arr->size = new_size;
}
arr->items[arr->num_items].lineno = lineno;
arr->items[arr->num_items].comment = comment;
arr->num_items++;
return 1;
}
static void
growable_comment_array_deallocate(growable_comment_array *arr) {
for (unsigned i = 0; i < arr->num_items; i++) {
PyObject_FREE(arr->items[i].comment);
}
free(arr->items);
}
/* Parse input coming from a string. Return error code, print some errors. */
node *
PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
{
return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
}
node *
PyParser_ParseStringFlags(const char *s, grammar *g, int start,
perrdetail *err_ret, int flags)
{
return PyParser_ParseStringFlagsFilename(s, NULL,
g, start, err_ret, flags);
}
node *
PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
grammar *g, int start,
perrdetail *err_ret, int flags)
{
int iflags = flags;
return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
err_ret, &iflags);
}
node *
PyParser_ParseStringObject(const char *s, PyObject *filename,
grammar *g, int start,
perrdetail *err_ret, int *flags)
{
struct tok_state *tok;
int exec_input = start == file_input;
if (initerr(err_ret, filename) < 0)
return NULL;
if (PySys_Audit("compile", "yO", s, err_ret->filename) < 0) {
err_ret->error = E_ERROR;
return NULL;
}
if (*flags & PyPARSE_IGNORE_COOKIE)
tok = PyTokenizer_FromUTF8(s, exec_input);
else
tok = PyTokenizer_FromString(s, exec_input);
if (tok == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
}
if (*flags & PyPARSE_TYPE_COMMENTS) {
tok->type_comments = 1;
}
Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename;
if (*flags & PyPARSE_ASYNC_HACKS)
tok->async_hacks = 1;
return parsetok(tok, g, start, err_ret, flags);
}
node *
PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
grammar *g, int start,
perrdetail *err_ret, int *flags)
{
node *n;
PyObject *filename = NULL;
if (filename_str != NULL) {
filename = PyUnicode_DecodeFSDefault(filename_str);
if (filename == NULL) {
err_ret->error = E_ERROR;
return NULL;
}
}
n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
Py_XDECREF(filename);
return n;
}
/* Parse input coming from a file. Return error code, print some errors. */
node *
PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
const char *ps1, const char *ps2,
perrdetail *err_ret)
{
return PyParser_ParseFileFlags(fp, filename, NULL,
g, start, ps1, ps2, err_ret, 0);
}
node *
PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
grammar *g, int start,
const char *ps1, const char *ps2,
perrdetail *err_ret, int flags)
{
int iflags = flags;
return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
ps2, err_ret, &iflags);
}
node *
PyParser_ParseFileObject(FILE *fp, PyObject *filename,
const char *enc, grammar *g, int start,
const char *ps1, const char *ps2,
perrdetail *err_ret, int *flags)
{
struct tok_state *tok;
if (initerr(err_ret, filename) < 0)
return NULL;
if (PySys_Audit("compile", "OO", Py_None, err_ret->filename) < 0) {
return NULL;
}
if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
err_ret->error = E_NOMEM;
return NULL;
}
if (*flags & PyPARSE_TYPE_COMMENTS) {
tok->type_comments = 1;
}
Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename;
return parsetok(tok, g, start, err_ret, flags);
}
node *
PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
const char *enc, grammar *g, int start,
const char *ps1, const char *ps2,
perrdetail *err_ret, int *flags)
{
node *n;
PyObject *fileobj = NULL;
if (filename != NULL) {
fileobj = PyUnicode_DecodeFSDefault(filename);
if (fileobj == NULL) {
err_ret->error = E_ERROR;
return NULL;
}
}
n = PyParser_ParseFileObject(fp, fileobj, enc, g,
start, ps1, ps2, err_ret, flags);
Py_XDECREF(fileobj);
return n;
}
/* Parse input coming from the given tokenizer structure.
Return error code. */
static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
int *flags)
{
parser_state *ps;
node *n;
int started = 0;
int col_offset, end_col_offset;
growable_comment_array type_ignores;
if (!growable_comment_array_init(&type_ignores, 10)) {
err_ret->error = E_NOMEM;
PyTokenizer_Free(tok);
return NULL;
}
if ((ps = PyParser_New(g, start)) == NULL) {
err_ret->error = E_NOMEM;
growable_comment_array_deallocate(&type_ignores);
PyTokenizer_Free(tok);
return NULL;
}
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (*flags & PyPARSE_BARRY_AS_BDFL)
ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
if (*flags & PyPARSE_TYPE_COMMENTS)
ps->p_flags |= PyCF_TYPE_COMMENTS;
#endif
for (;;) {
const char *a, *b;
int type;
size_t len;
char *str;
col_offset = -1;
int lineno;
const char *line_start;
type = PyTokenizer_Get(tok, &a, &b);
len = (a != NULL && b != NULL) ? b - a : 0;
str = (char *) PyObject_MALLOC(len + 1);
if (str == NULL) {
err_ret->error = E_NOMEM;
break;
}
if (len > 0)
strncpy(str, a, len);
str[len] = '\0';
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (type == NOTEQUAL) {
if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
strcmp(str, "!=")) {
PyObject_FREE(str);
err_ret->error = E_SYNTAX;
break;
}
else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
strcmp(str, "<>")) {
PyObject_FREE(str);
err_ret->expected = NOTEQUAL;
err_ret->error = E_SYNTAX;
break;
}
}
#endif
/* Nodes of type STRING, especially multi line strings
must be handled differently in order to get both
the starting line number and the column offset right.
(cf. issue 16806) */
lineno = type == STRING ? tok->first_lineno : tok->lineno;
line_start = type == STRING ? tok->multi_line_start : tok->line_start;
if (a != NULL && a >= line_start) {
col_offset = Py_SAFE_DOWNCAST(a - line_start,
intptr_t, int);
}
else {
col_offset = -1;
}
if (b != NULL && b >= tok->line_start) {
end_col_offset = Py_SAFE_DOWNCAST(b - tok->line_start,
intptr_t, int);
}
else {
end_col_offset = -1;
}
if (type == TYPE_IGNORE) {
if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
err_ret->error = E_NOMEM;
break;
}
continue;
}
if (type == ERRORTOKEN) {
err_ret->error = tok->done;
break;
}
if (type == ENDMARKER && started) {
type = NEWLINE; /* Add an extra newline */
started = 0;
/* Add the right number of dedent tokens,
except if a certain flag is given --
codeop.py uses this. */
if (tok->indent &&
!(*flags & PyPARSE_DONT_IMPLY_DEDENT))
{
tok->pendin = -tok->indent;
tok->indent = 0;
}
}
else {
started = 1;
}
if ((err_ret->error =
PyParser_AddToken(ps, (int)type, str,
lineno, col_offset, tok->lineno, end_col_offset,
&(err_ret->expected))) != E_OK) {
if (tok->done == E_EOF && !ISWHITESPACE(type)) {
tok->done = E_SYNTAX;
}
if (err_ret->error != E_DONE) {
PyObject_FREE(str);
err_ret->token = type;
}
break;
}
}
if (err_ret->error == E_DONE) {
n = ps->p_tree;
ps->p_tree = NULL;
if (n->n_type == file_input) {
/* Put type_ignore nodes in the ENDMARKER of file_input. */
int num;
node *ch;
size_t i;
num = NCH(n);
ch = CHILD(n, num - 1);
REQ(ch, ENDMARKER);
for (i = 0; i < type_ignores.num_items; i++) {
int res = PyNode_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
type_ignores.items[i].lineno, 0,
type_ignores.items[i].lineno, 0);
if (res != 0) {
err_ret->error = res;
PyNode_Free(n);
n = NULL;
break;
}
type_ignores.items[i].comment = NULL;
}
}
/* Check that the source for a single input statement really
is a single statement by looking at what is left in the
buffer after parsing. Trailing whitespace and comments
are OK. */
if (err_ret->error == E_DONE && start == single_input) {
const char *cur = tok->cur;
char c = *tok->cur;
for (;;) {
while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
c = *++cur;
if (!c)
break;
if (c != '#') {
err_ret->error = E_BADSINGLE;
PyNode_Free(n);
n = NULL;
break;
}
/* Suck up comment. */
while (c && c != '\n')
c = *++cur;
}
}
}
else
n = NULL;
growable_comment_array_deallocate(&type_ignores);
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
*flags = ps->p_flags;
#endif
PyParser_Delete(ps);
if (n == NULL) {
if (tok->done == E_EOF)
err_ret->error = E_EOF;
err_ret->lineno = tok->lineno;
if (tok->buf != NULL) {
size_t len;
assert(tok->cur - tok->buf < INT_MAX);
/* if we've managed to parse a token, point the offset to its start,
* else use the current reading position of the tokenizer
*/
err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf));
len = tok->inp - tok->buf;
err_ret->text = (char *) PyObject_MALLOC(len + 1);
if (err_ret->text != NULL) {
if (len > 0)
strncpy(err_ret->text, tok->buf, len);
err_ret->text[len] = '\0';
}
}
} else if (tok->encoding != NULL) {
/* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
* allocated using PyMem_
*/
node* r = PyNode_New(encoding_decl);
if (r)
r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
if (!r || !r->n_str) {
err_ret->error = E_NOMEM;
if (r)
PyObject_FREE(r);
n = NULL;
goto done;
}
strcpy(r->n_str, tok->encoding);
PyMem_FREE(tok->encoding);
tok->encoding = NULL;
r->n_nchildren = 1;
r->n_child = n;
n = r;
}
done:
PyTokenizer_Free(tok);
if (n != NULL) {
_PyNode_FinalizeEndPos(n);
}
return n;
}
static int
initerr(perrdetail *err_ret, PyObject *filename)
{
err_ret->error = E_OK;
err_ret->lineno = 0;
err_ret->offset = 0;
err_ret->text = NULL;
err_ret->token = -1;
err_ret->expected = -1;
if (filename) {
Py_INCREF(filename);
err_ret->filename = filename;
}
else {
err_ret->filename = PyUnicode_FromString("<string>");
if (err_ret->filename == NULL) {
err_ret->error = E_ERROR;
return -1;
}
}
return 0;
}

View File

@ -1,6 +1,6 @@
#include "pegen_interface.h"
#include "../tokenizer.h"
#include "tokenizer.h"
#include "pegen.h"
mod_ty

View File

@ -1,9 +1,9 @@
#include <Python.h>
#include <errcode.h>
#include "../tokenizer.h"
#include "tokenizer.h"
#include "pegen.h"
#include "parse_string.h"
#include "string_parser.h"
PyObject *
_PyPegen_new_type_comment(Parser *p, char *s)

File diff suppressed because it is too large Load Diff

View File

@ -1,43 +0,0 @@
import argparse
from .pgen import ParserGenerator
def main():
parser = argparse.ArgumentParser(description="Parser generator main program.")
parser.add_argument(
"grammar", type=str, help="The file with the grammar definition in EBNF format"
)
parser.add_argument("tokens", type=str, help="The file with the token definitions")
parser.add_argument(
"graminit_h",
type=argparse.FileType("w"),
help="The path to write the grammar's non-terminals as #defines",
)
parser.add_argument(
"graminit_c",
type=argparse.FileType("w"),
help="The path to write the grammar as initialized data",
)
parser.add_argument("--verbose", "-v", action="count")
parser.add_argument(
"--graph",
type=argparse.FileType("w"),
action="store",
metavar="GRAPH_OUTPUT_FILE",
help="Dumps a DOT representation of the generated automata in a file",
)
args = parser.parse_args()
p = ParserGenerator(
args.grammar, args.tokens, verbose=args.verbose, graph_file=args.graph
)
grammar = p.make_grammar()
grammar.produce_graminit_h(args.graminit_h.write)
grammar.produce_graminit_c(args.graminit_c.write)
if __name__ == "__main__":
main()

View File

@ -1,400 +0,0 @@
"""Classes representing state-machine concepts"""
class NFA:
"""A non deterministic finite automata
A non deterministic automata is a form of a finite state
machine. An NFA's rules are less restrictive than a DFA.
The NFA rules are:
* A transition can be non-deterministic and can result in
nothing, one, or two or more states.
* An epsilon transition consuming empty input is valid.
Transitions consuming labeled symbols are also permitted.
This class assumes that there is only one starting state and one
accepting (ending) state.
Attributes:
name (str): The name of the rule the NFA is representing.
start (NFAState): The starting state.
end (NFAState): The ending state
"""
def __init__(self, start, end):
self.name = start.rule_name
self.start = start
self.end = end
def __repr__(self):
return "NFA(start={}, end={})".format(self.start, self.end)
def dump(self, writer=print):
"""Dump a graphical representation of the NFA"""
todo = [self.start]
for i, state in enumerate(todo):
writer(" State", i, state is self.end and "(final)" or "")
for arc in state.arcs:
label = arc.label
next = arc.target
if next in todo:
j = todo.index(next)
else:
j = len(todo)
todo.append(next)
if label is None:
writer(" -> %d" % j)
else:
writer(" %s -> %d" % (label, j))
def dump_graph(self, writer):
"""Dump a DOT representation of the NFA"""
writer('digraph %s_nfa {\n' % self.name)
todo = [self.start]
for i, state in enumerate(todo):
writer(' %d [label="State %d %s"];\n' % (i, i, state is self.end and "(final)" or ""))
for arc in state.arcs:
label = arc.label
next = arc.target
if next in todo:
j = todo.index(next)
else:
j = len(todo)
todo.append(next)
if label is None:
writer(" %d -> %d [style=dotted label=ε];\n" % (i, j))
else:
writer(" %d -> %d [label=%s];\n" % (i, j, label.replace("'", '"')))
writer('}\n')
class NFAArc:
"""An arc representing a transition between two NFA states.
NFA states can be connected via two ways:
* A label transition: An input equal to the label must
be consumed to perform the transition.
* An epsilon transition: The transition can be taken without
consuming any input symbol.
Attributes:
target (NFAState): The ending state of the transition arc.
label (Optional[str]): The label that must be consumed to make
the transition. An epsilon transition is represented
using `None`.
"""
def __init__(self, target, label):
self.target = target
self.label = label
def __repr__(self):
return "<%s: %s>" % (self.__class__.__name__, self.label)
class NFAState:
"""A state of a NFA, non deterministic finite automata.
Attributes:
target (rule_name): The name of the rule used to represent the NFA's
ending state after a transition.
arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
between the current NFA state and another NFA state via following
a label.
"""
def __init__(self, rule_name):
self.rule_name = rule_name
self.arcs = []
def add_arc(self, target, label=None):
"""Add a new arc to connect the state to a target state within the NFA
The method adds a new arc to the list of arcs available as transitions
from the present state. An optional label indicates a named transition
that consumes an input while the absence of a label represents an epsilon
transition.
Attributes:
target (NFAState): The end of the transition that the arc represents.
label (Optional[str]): The label that must be consumed for making
the transition. If the label is not provided the transition is assumed
to be an epsilon-transition.
"""
assert label is None or isinstance(label, str)
assert isinstance(target, NFAState)
self.arcs.append(NFAArc(target, label))
def __repr__(self):
return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)
class DFA:
"""A deterministic finite automata
A deterministic finite automata is a form of a finite state machine
that obeys the following rules:
* Each of the transitions is uniquely determined by
the source state and input symbol
* Reading an input symbol is required for each state
transition (no epsilon transitions).
The finite-state machine will accept or reject a string of symbols
and only produces a unique computation of the automaton for each input
string. The DFA must have a unique starting state (represented as the first
element in the list of states) but can have multiple final states.
Attributes:
name (str): The name of the rule the DFA is representing.
states (List[DFAState]): A collection of DFA states.
"""
def __init__(self, name, states):
self.name = name
self.states = states
@classmethod
def from_nfa(cls, nfa):
"""Constructs a DFA from a NFA using the RabinScott construction algorithm.
To simulate the operation of a DFA on a given input string, it's
necessary to keep track of a single state at any time, or more precisely,
the state that the automaton will reach after seeing a prefix of the
input. In contrast, to simulate an NFA, it's necessary to keep track of
a set of states: all of the states that the automaton could reach after
seeing the same prefix of the input, according to the nondeterministic
choices made by the automaton. There are two possible sources of
non-determinism:
1) Multiple (one or more) transitions with the same label
'A' +-------+
+----------->+ State +----------->+
| | 2 |
+-------+ +-------+
| State |
| 1 | +-------+
+-------+ | State |
+----------->+ 3 +----------->+
'A' +-------+
2) Epsilon transitions (transitions that can be taken without consuming any input)
+-------+ +-------+
| State | ε | State |
| 1 +----------->+ 2 +----------->+
+-------+ +-------+
Looking at the first case above, we can't determine which transition should be
followed when given an input A. We could choose whether or not to follow the
transition while in the second case the problem is that we can choose both to
follow the transition or not doing it. To solve this problem we can imagine that
we follow all possibilities at the same time and we construct new states from the
set of all possible reachable states. For every case in the previous example:
1) For multiple transitions with the same label we colapse all of the
final states under the same one
+-------+ +-------+
| State | 'A' | State |
| 1 +----------->+ 2-3 +----------->+
+-------+ +-------+
2) For epsilon transitions we collapse all epsilon-reachable states
into the same one
+-------+
| State |
| 1-2 +----------->
+-------+
Because the DFA states consist of sets of NFA states, an n-state NFA
may be converted to a DFA with at most 2**n states. Notice that the
constructed DFA is not minimal and can be simplified or reduced
afterwards.
Parameters:
name (NFA): The NFA to transform to DFA.
"""
assert isinstance(nfa, NFA)
def add_closure(nfa_state, base_nfa_set):
"""Calculate the epsilon-closure of a given state
Add to the *base_nfa_set* all the states that are
reachable from *nfa_state* via epsilon-transitions.
"""
assert isinstance(nfa_state, NFAState)
if nfa_state in base_nfa_set:
return
base_nfa_set.add(nfa_state)
for nfa_arc in nfa_state.arcs:
if nfa_arc.label is None:
add_closure(nfa_arc.target, base_nfa_set)
# Calculate the epsilon-closure of the starting state
base_nfa_set = set()
add_closure(nfa.start, base_nfa_set)
# Start by visiting the NFA starting state (there is only one).
states = [DFAState(nfa.name, base_nfa_set, nfa.end)]
for state in states: # NB states grow while we're iterating
# Find transitions from the current state to other reachable states
# and store them in mapping that correlates the label to all the
# possible reachable states that can be obtained by consuming a
# token equal to the label. Each set of all the states that can
# be reached after following a label will be the a DFA state.
arcs = {}
for nfa_state in state.nfa_set:
for nfa_arc in nfa_state.arcs:
if nfa_arc.label is not None:
nfa_set = arcs.setdefault(nfa_arc.label, set())
# All states that can be reached by epsilon-transitions
# are also included in the set of reachable states.
add_closure(nfa_arc.target, nfa_set)
# Now create new DFAs by visiting all posible transitions between
# the current DFA state and the new power-set states (each nfa_set)
# via the different labels. As the nodes are appended to *states* this
# is performing a breadth-first search traversal over the power-set of
# the states of the original NFA.
for label, nfa_set in sorted(arcs.items()):
for exisisting_state in states:
if exisisting_state.nfa_set == nfa_set:
# The DFA state already exists for this rule.
next_state = exisisting_state
break
else:
next_state = DFAState(nfa.name, nfa_set, nfa.end)
states.append(next_state)
# Add a transition between the current DFA state and the new
# DFA state (the power-set state) via the current label.
state.add_arc(next_state, label)
return cls(nfa.name, states)
def __iter__(self):
return iter(self.states)
def simplify(self):
"""Attempt to reduce the number of states of the DFA
Transform the DFA into an equivalent DFA that has fewer states. Two
classes of states can be removed or merged from the original DFA without
affecting the language it accepts to minimize it:
* Unreachable states can not be reached from the initial
state of the DFA, for any input string.
* Nondistinguishable states are those that cannot be distinguished
from one another for any input string.
This algorithm does not achieve the optimal fully-reduced solution, but it
works well enough for the particularities of the Python grammar. The
algorithm repeatedly looks for two states that have the same set of
arcs (same labels pointing to the same nodes) and unifies them, until
things stop changing.
"""
changes = True
while changes:
changes = False
for i, state_i in enumerate(self.states):
for j in range(i + 1, len(self.states)):
state_j = self.states[j]
if state_i == state_j:
del self.states[j]
for state in self.states:
state.unifystate(state_j, state_i)
changes = True
break
def dump(self, writer=print):
"""Dump a graphical representation of the DFA"""
for i, state in enumerate(self.states):
writer(" State", i, state.is_final and "(final)" or "")
for label, next in sorted(state.arcs.items()):
writer(" %s -> %d" % (label, self.states.index(next)))
def dump_graph(self, writer):
"""Dump a DOT representation of the DFA"""
writer('digraph %s_dfa {\n' % self.name)
for i, state in enumerate(self.states):
writer(' %d [label="State %d %s"];\n' % (i, i, state.is_final and "(final)" or ""))
for label, next in sorted(state.arcs.items()):
writer(" %d -> %d [label=%s];\n" % (i, self.states.index(next), label.replace("'", '"')))
writer('}\n')
class DFAState(object):
"""A state of a DFA
Attributes:
rule_name (rule_name): The name of the DFA rule containing the represented state.
nfa_set (Set[NFAState]): The set of NFA states used to create this state.
final (bool): True if the state represents an accepting state of the DFA
containing this state.
arcs (Dict[label, DFAState]): A mapping representing transitions between
the current DFA state and another DFA state via following a label.
"""
def __init__(self, rule_name, nfa_set, final):
assert isinstance(nfa_set, set)
assert isinstance(next(iter(nfa_set)), NFAState)
assert isinstance(final, NFAState)
self.rule_name = rule_name
self.nfa_set = nfa_set
self.arcs = {} # map from terminals/nonterminals to DFAState
self.is_final = final in nfa_set
def add_arc(self, target, label):
"""Add a new arc to the current state.
Parameters:
target (DFAState): The DFA state at the end of the arc.
label (str): The label respresenting the token that must be consumed
to perform this transition.
"""
assert isinstance(label, str)
assert label not in self.arcs
assert isinstance(target, DFAState)
self.arcs[label] = target
def unifystate(self, old, new):
"""Replace all arcs from the current node to *old* with *new*.
Parameters:
old (DFAState): The DFA state to remove from all existing arcs.
new (DFAState): The DFA state to replace in all existing arcs.
"""
for label, next_ in self.arcs.items():
if next_ is old:
self.arcs[label] = new
def __eq__(self, other):
# The nfa_set does not matter for equality
assert isinstance(other, DFAState)
if self.is_final != other.is_final:
return False
# We cannot just return self.arcs == other.arcs because that
# would invoke this method recursively if there are any cycles.
if len(self.arcs) != len(other.arcs):
return False
for label, next_ in self.arcs.items():
if next_ is not other.arcs.get(label):
return False
return True
__hash__ = None # For Py3 compatibility.
def __repr__(self):
return "<%s: %s is_final=%s>" % (
self.__class__.__name__,
self.rule_name,
self.is_final,
)

View File

@ -1,147 +0,0 @@
import collections
class Grammar:
"""Pgen parsing tables class.
The instance variables are as follows:
symbol2number -- a dict mapping symbol names to numbers. Symbol
numbers are always 256 or higher, to distinguish
them from token numbers, which are between 0 and
255 (inclusive).
number2symbol -- a dict mapping numbers to symbol names;
these two are each other's inverse.
states -- a list of DFAs, where each DFA is a list of
states, each state is a list of arcs, and each
arc is a (i, j) pair where i is a label and j is
a state number. The DFA number is the index into
this list. (This name is slightly confusing.)
Final states are represented by a special arc of
the form (0, j) where j is its own state number.
dfas -- a dict mapping symbol numbers to (DFA, first)
pairs, where DFA is an item from the states list
above, and first is a set of tokens that can
begin this grammar rule.
labels -- a list of (x, y) pairs where x is either a token
number or a symbol number, and y is either None
or a string; the strings are keywords. The label
number is the index in this list; label numbers
are used to mark state transitions (arcs) in the
DFAs.
start -- the number of the grammar's start symbol.
keywords -- a dict mapping keyword strings to arc labels.
tokens -- a dict mapping token numbers to arc labels.
"""
def __init__(self):
self.symbol2number = collections.OrderedDict()
self.number2symbol = collections.OrderedDict()
self.states = []
self.dfas = collections.OrderedDict()
self.labels = [(0, "EMPTY")]
self.keywords = collections.OrderedDict()
self.tokens = collections.OrderedDict()
self.symbol2label = collections.OrderedDict()
self.start = 256
def produce_graminit_h(self, writer):
writer("/* Generated by Parser/pgen */\n\n")
for number, symbol in self.number2symbol.items():
writer("#define {} {}\n".format(symbol, number))
def produce_graminit_c(self, writer):
writer("/* Generated by Parser/pgen */\n\n")
writer('#include "exports.h"\n')
writer('#include "grammar.h"\n')
writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar;\n")
self.print_dfas(writer)
self.print_labels(writer)
writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar = {\n")
writer(" {n_dfas},\n".format(n_dfas=len(self.dfas)))
writer(" dfas,\n")
writer(" {{{n_labels}, labels}},\n".format(n_labels=len(self.labels)))
writer(" {start_number}\n".format(start_number=self.start))
writer("};\n")
def print_labels(self, writer):
writer(
"static const label labels[{n_labels}] = {{\n".format(
n_labels=len(self.labels)
)
)
for label, name in self.labels:
label_name = '"{}"'.format(name) if name is not None else 0
writer(
" {{{label}, {label_name}}},\n".format(
label=label, label_name=label_name
)
)
writer("};\n")
def print_dfas(self, writer):
self.print_states(writer)
writer("static const dfa dfas[{}] = {{\n".format(len(self.dfas)))
for dfaindex, dfa_elem in enumerate(self.dfas.items()):
symbol, (dfa, first_sets) = dfa_elem
writer(
' {{{dfa_symbol}, "{symbol_name}", '.format(
dfa_symbol=symbol, symbol_name=self.number2symbol[symbol]
)
+ "{n_states}, states_{dfa_index},\n".format(
n_states=len(dfa), dfa_index=dfaindex
)
+ ' "'
)
bitset = bytearray((len(self.labels) >> 3) + 1)
for token in first_sets:
bitset[token >> 3] |= 1 << (token & 7)
for byte in bitset:
writer("\\%03o" % (byte & 0xFF))
writer('"},\n')
writer("};\n")
def print_states(self, write):
for dfaindex, dfa in enumerate(self.states):
self.print_arcs(write, dfaindex, dfa)
write(
"static state states_{dfa_index}[{n_states}] = {{\n".format(
dfa_index=dfaindex, n_states=len(dfa)
)
)
for stateindex, state in enumerate(dfa):
narcs = len(state)
write(
" {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format(
n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex
)
)
write("};\n")
def print_arcs(self, write, dfaindex, states):
for stateindex, state in enumerate(states):
narcs = len(state)
write(
"static const arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format(
dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs
)
)
for a, b in state:
write(
" {{{from_label}, {to_state}}},\n".format(
from_label=a, to_state=b
)
)
write("};\n")

View File

@ -1,59 +0,0 @@
"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""
import argparse
from .pgen import ParserGenerator
TEMPLATE = r'''
"""Keywords (from "Grammar/Grammar")
This file is automatically generated; please don't muck it up!
To update the symbols in this file, 'cd' to the top directory of
the python source tree and run:
python3 -m Parser.pgen.keywordgen Grammar/Grammar \
Grammar/Tokens \
Lib/keyword.py
Alternatively, you can run 'make regen-keyword'.
"""
__all__ = ["iskeyword", "kwlist"]
kwlist = [
{keywords}
]
iskeyword = frozenset(kwlist).__contains__
'''.lstrip()
EXTRA_KEYWORDS = ["async", "await"]
def main():
parser = argparse.ArgumentParser(
description="Generate the Lib/keywords.py " "file from the grammar."
)
parser.add_argument(
"grammar", type=str, help="The file with the grammar definition in EBNF format"
)
parser.add_argument("tokens", type=str, help="The file with the token definitions")
parser.add_argument(
"keyword_file",
type=argparse.FileType("w"),
help="The path to write the keyword definitions",
)
args = parser.parse_args()
p = ParserGenerator(args.grammar, args.tokens)
grammar = p.make_grammar()
with args.keyword_file as thefile:
all_keywords = sorted(list(grammar.keywords) + EXTRA_KEYWORDS)
keywords = ",\n ".join(map(repr, all_keywords))
thefile.write(TEMPLATE.format(keywords=keywords))
if __name__ == "__main__":
main()

View File

@ -1,152 +0,0 @@
"""Parser for the Python metagrammar"""
import io
import tokenize # from stdlib
from .automata import NFA, NFAState
class GrammarParser:
"""Parser for Python grammar files."""
_translation_table = {
tokenize.NAME: "NAME",
tokenize.STRING: "STRING",
tokenize.NEWLINE: "NEWLINE",
tokenize.NL: "NL",
tokenize.OP: "OP",
tokenize.ENDMARKER: "ENDMARKER",
tokenize.COMMENT: "COMMENT",
}
def __init__(self, grammar):
self.grammar = grammar
grammar_adaptor = io.StringIO(grammar)
self.generator = tokenize.generate_tokens(grammar_adaptor.readline)
self._gettoken() # Initialize lookahead
self._current_rule_name = None
def parse(self):
"""Turn the grammar into a collection of NFAs"""
# grammar: (NEWLINE | rule)* ENDMARKER
while self.type != tokenize.ENDMARKER:
while self.type == tokenize.NEWLINE:
self._gettoken()
# rule: NAME ':' rhs NEWLINE
self._current_rule_name = self._expect(tokenize.NAME)
self._expect(tokenize.OP, ":")
a, z = self._parse_rhs()
self._expect(tokenize.NEWLINE)
yield NFA(a, z)
def _parse_rhs(self):
# rhs: items ('|' items)*
a, z = self._parse_items()
if self.value != "|":
return a, z
else:
aa = NFAState(self._current_rule_name)
zz = NFAState(self._current_rule_name)
while True:
# Allow to transit directly to the previous state and connect the end of the
# previous state to the end of the current one, effectively allowing to skip
# the current state.
aa.add_arc(a)
z.add_arc(zz)
if self.value != "|":
break
self._gettoken()
a, z = self._parse_items()
return aa, zz
def _parse_items(self):
# items: item+
a, b = self._parse_item()
while self.type in (tokenize.NAME, tokenize.STRING) or self.value in ("(", "["):
c, d = self._parse_item()
# Allow a transition between the end of the previous state
# and the beginning of the new one, connecting all the items
# together. In this way we can only reach the end if we visit
# all the items.
b.add_arc(c)
b = d
return a, b
def _parse_item(self):
# item: '[' rhs ']' | atom ['+' | '*']
if self.value == "[":
self._gettoken()
a, z = self._parse_rhs()
self._expect(tokenize.OP, "]")
# Make a transition from the beginning to the end so it is possible to
# advance for free to the next state of this item # without consuming
# anything from the rhs.
a.add_arc(z)
return a, z
else:
a, z = self._parse_atom()
value = self.value
if value not in ("+", "*"):
return a, z
self._gettoken()
z.add_arc(a)
if value == "+":
# Create a cycle to the beginning so we go back to the old state in this
# item and repeat.
return a, z
else:
# The end state is the same as the beginning, so we can cycle arbitrarily
# and end in the beginning if necessary.
return a, a
def _parse_atom(self):
# atom: '(' rhs ')' | NAME | STRING
if self.value == "(":
self._gettoken()
a, z = self._parse_rhs()
self._expect(tokenize.OP, ")")
return a, z
elif self.type in (tokenize.NAME, tokenize.STRING):
a = NFAState(self._current_rule_name)
z = NFAState(self._current_rule_name)
# We can transit to the next state only if we consume the value.
a.add_arc(z, self.value)
self._gettoken()
return a, z
else:
self._raise_error(
"expected (...) or NAME or STRING, got {} ({})",
self._translation_table.get(self.type, self.type),
self.value,
)
def _expect(self, type_, value=None):
if self.type != type_:
self._raise_error(
"expected {}, got {} ({})",
self._translation_table.get(type_, type_),
self._translation_table.get(self.type, self.type),
self.value,
)
if value is not None and self.value != value:
self._raise_error("expected {}, got {}", value, self.value)
value = self.value
self._gettoken()
return value
def _gettoken(self):
tup = next(self.generator)
while tup[0] in (tokenize.COMMENT, tokenize.NL):
tup = next(self.generator)
self.type, self.value, self.begin, self.end, self.line = tup
def _raise_error(self, msg, *args):
if args:
try:
msg = msg.format(*args)
except Exception:
msg = " ".join([msg] + list(map(str, args)))
line = self.grammar.splitlines()[self.begin[0] - 1]
raise SyntaxError(msg, ("<grammar>", self.begin[0], self.begin[1], line))

View File

@ -1,310 +0,0 @@
"""Python parser generator
This parser generator transforms a Python grammar file into parsing tables
that can be consumed by Python's LL(1) parser written in C.
Concepts
--------
* An LL(1) parser (Left-to-right, Leftmost derivation, 1 token-lookahead) is a
top-down parser for a subset of context-free languages. It parses the input
from Left to right, performing Leftmost derivation of the sentence, and can
only use 1 token of lookahead when parsing a sentence.
* A parsing table is a collection of data that a generic implementation of the
LL(1) parser consumes to know how to parse a given context-free grammar. In
this case the collection of data involves Deterministic Finite Automatons,
calculated first sets, keywords and transition labels.
* A grammar is defined by production rules (or just 'productions') that specify
which symbols may replace which other symbols; these rules may be used to
generate strings, or to parse them. Each such rule has a head, or left-hand
side, which consists of the string that may be replaced, and a body, or
right-hand side, which consists of a string that may replace it. In the
Python grammar, rules are written in the form
rule_name: rule_description;
meaning the rule 'a: b' specifies that a can be replaced by b. A context-free
grammar is a grammar in which the left-hand side of each production rule
consists of only a single nonterminal symbol. Context-free grammars can
always be recognized by a Non-Deterministic Automatons.
* Terminal symbols are literal symbols which may appear in the outputs of the
production rules of the grammar and which cannot be changed using the rules
of the grammar. Applying the rules recursively to a source string of symbols
will usually terminate in a final output string consisting only of terminal
symbols.
* Nonterminal symbols are those symbols which can be replaced. The grammar
includes a start symbol a designated member of the set of nonterminals from
which all the strings in the language may be derived by successive
applications of the production rules.
* The language defined by the grammar is defined as the set of terminal strings
that can be derived using the production rules.
* The first sets of a rule (FIRST(rule)) are defined to be the set of terminals
that can appear in the first position of any string derived from the rule.
This is useful for LL(1) parsers as the parser is only allowed to look at the
next token in the input to know which rule needs to parse. For example, given
this grammar:
start: '(' A | B ')'
A: 'a' '<'
B: 'b' '<'
and the input '(b<)' the parser can only look at 'b' to know if it needs
to parse A o B. Because FIRST(A) = {'a'} and FIRST(B) = {'b'} it knows
that needs to continue parsing rule B because only that rule can start
with 'b'.
Description
-----------
The input for the parser generator is a grammar in extended BNF form (using *
for repetition, + for at-least-once repetition, [] for optional parts, | for
alternatives and () for grouping).
Each rule in the grammar file is considered as a regular expression in its
own right. It is turned into a Non-deterministic Finite Automaton (NFA),
which is then turned into a Deterministic Finite Automaton (DFA), which is
then optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,
or similar compiler books (this technique is more often used for lexical
analyzers).
The DFA's are used by the parser as parsing tables in a special way that's
probably unique. Before they are usable, the FIRST sets of all non-terminals
are computed so the LL(1) parser consuming the parsing tables can distinguish
between different transitions.
Reference
---------
[Aho&Ullman 77]
Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
(first edition)
"""
from ast import literal_eval
import collections
from . import grammar, token
from .automata import DFA
from .metaparser import GrammarParser
import enum
class LabelType(enum.Enum):
NONTERMINAL = 0
NAMED_TOKEN = 1
KEYWORD = 2
OPERATOR = 3
NONE = 4
class Label(str):
def __init__(self, value):
self.type = self._get_type()
def _get_type(self):
if self[0].isalpha():
if self.upper() == self:
# NAMED tokens (ASYNC, NAME...) are all uppercase by convention
return LabelType.NAMED_TOKEN
else:
# If is not uppercase it must be a non terminal.
return LabelType.NONTERMINAL
else:
# Keywords and operators are wrapped in quotes
assert self[0] == self[-1] in ('"', "'"), self
value = literal_eval(self)
if value[0].isalpha():
return LabelType.KEYWORD
else:
return LabelType.OPERATOR
def __repr__(self):
return "{}({})".format(self.type, super().__repr__())
class ParserGenerator(object):
def __init__(self, grammar_file, token_file, verbose=False, graph_file=None):
with open(grammar_file) as f:
self.grammar = f.read()
with open(token_file) as tok_file:
token_lines = tok_file.readlines()
self.tokens = dict(token.generate_tokens(token_lines))
self.opmap = dict(token.generate_opmap(token_lines))
# Manually add <> so it does not collide with !=
self.opmap["<>"] = "NOTEQUAL"
self.verbose = verbose
self.filename = grammar_file
self.graph_file = graph_file
self.dfas, self.startsymbol = self.create_dfas()
self.first = {} # map from symbol name to set of tokens
self.calculate_first_sets()
def create_dfas(self):
rule_to_dfas = collections.OrderedDict()
start_nonterminal = None
for nfa in GrammarParser(self.grammar).parse():
if self.verbose:
print("Dump of NFA for", nfa.name)
nfa.dump()
if self.graph_file is not None:
nfa.dump_graph(self.graph_file.write)
dfa = DFA.from_nfa(nfa)
if self.verbose:
print("Dump of DFA for", dfa.name)
dfa.dump()
dfa.simplify()
if self.graph_file is not None:
dfa.dump_graph(self.graph_file.write)
rule_to_dfas[dfa.name] = dfa
if start_nonterminal is None:
start_nonterminal = dfa.name
return rule_to_dfas, start_nonterminal
def make_grammar(self):
c = grammar.Grammar()
c.all_labels = set()
names = list(self.dfas.keys())
names.remove(self.startsymbol)
names.insert(0, self.startsymbol)
for name in names:
i = 256 + len(c.symbol2number)
c.symbol2number[Label(name)] = i
c.number2symbol[i] = Label(name)
c.all_labels.add(name)
for name in names:
self.make_label(c, name)
dfa = self.dfas[name]
states = []
for state in dfa:
arcs = []
for label, next in sorted(state.arcs.items()):
c.all_labels.add(label)
arcs.append((self.make_label(c, label), dfa.states.index(next)))
if state.is_final:
arcs.append((0, dfa.states.index(state)))
states.append(arcs)
c.states.append(states)
c.dfas[c.symbol2number[name]] = (states, self.make_first_sets(c, name))
c.start = c.symbol2number[self.startsymbol]
if self.verbose:
print("")
print("Grammar summary")
print("===============")
print("- {n_labels} labels".format(n_labels=len(c.labels)))
print("- {n_dfas} dfas".format(n_dfas=len(c.dfas)))
print("- {n_tokens} tokens".format(n_tokens=len(c.tokens)))
print("- {n_keywords} keywords".format(n_keywords=len(c.keywords)))
print(
"- Start symbol: {start_symbol}".format(
start_symbol=c.number2symbol[c.start]
)
)
return c
def make_first_sets(self, c, name):
rawfirst = self.first[name]
first = set()
for label in sorted(rawfirst):
ilabel = self.make_label(c, label)
##assert ilabel not in first # XXX failed on <> ... !=
first.add(ilabel)
return first
def make_label(self, c, label):
label = Label(label)
ilabel = len(c.labels)
if label.type == LabelType.NONTERMINAL:
if label in c.symbol2label:
return c.symbol2label[label]
else:
c.labels.append((c.symbol2number[label], None))
c.symbol2label[label] = ilabel
return ilabel
elif label.type == LabelType.NAMED_TOKEN:
# A named token (NAME, NUMBER, STRING)
itoken = self.tokens.get(label, None)
assert isinstance(itoken, int), label
assert itoken in self.tokens.values(), label
if itoken in c.tokens:
return c.tokens[itoken]
else:
c.labels.append((itoken, None))
c.tokens[itoken] = ilabel
return ilabel
elif label.type == LabelType.KEYWORD:
# A keyword
value = literal_eval(label)
if value in c.keywords:
return c.keywords[value]
else:
c.labels.append((self.tokens["NAME"], value))
c.keywords[value] = ilabel
return ilabel
elif label.type == LabelType.OPERATOR:
# An operator (any non-numeric token)
value = literal_eval(label)
tok_name = self.opmap[value] # Fails if unknown token
itoken = self.tokens[tok_name]
if itoken in c.tokens:
return c.tokens[itoken]
else:
c.labels.append((itoken, None))
c.tokens[itoken] = ilabel
return ilabel
else:
raise ValueError("Cannot categorize label {}".format(label))
def calculate_first_sets(self):
names = list(self.dfas.keys())
for name in names:
if name not in self.first:
self.calculate_first_sets_for_rule(name)
if self.verbose:
print("First set for {dfa_name}".format(dfa_name=name))
for item in self.first[name]:
print(" - {terminal}".format(terminal=item))
def calculate_first_sets_for_rule(self, name):
dfa = self.dfas[name]
self.first[name] = None # dummy to detect left recursion
state = dfa.states[0]
totalset = set()
overlapcheck = {}
for label, next in state.arcs.items():
if label in self.dfas:
if label in self.first:
fset = self.first[label]
if fset is None:
raise ValueError("recursion for rule %r" % name)
else:
self.calculate_first_sets_for_rule(label)
fset = self.first[label]
totalset.update(fset)
overlapcheck[label] = fset
else:
totalset.add(label)
overlapcheck[label] = {label}
inverse = {}
for label, itsfirst in overlapcheck.items():
for symbol in itsfirst:
if symbol in inverse:
raise ValueError(
"rule %s is ambiguous; %s is in the"
" first sets of %s as well as %s"
% (name, symbol, label, inverse[symbol])
)
inverse[symbol] = label
self.first[name] = totalset

View File

@ -1,38 +0,0 @@
import itertools
def generate_tokens(tokens):
numbers = itertools.count(0)
for line in tokens:
line = line.strip()
if not line or line.startswith("#"):
continue
name = line.split()[0]
yield (name, next(numbers))
yield ("N_TOKENS", next(numbers))
yield ("NT_OFFSET", 256)
def generate_opmap(tokens):
for line in tokens:
line = line.strip()
if not line or line.startswith("#"):
continue
pieces = line.split()
if len(pieces) != 2:
continue
name, op = pieces
yield (op.strip("'"), name)
# Yield independently <>. This is needed so it does not collide
# with the token generation in "generate_tokens" because if this
# symbol is included in Grammar/Tokens, it will collide with !=
# as it has the same name (NOTEQUAL).
yield ("<>", "NOTEQUAL")

View File

@ -1,8 +1,8 @@
#include <Python.h>
#include "../tokenizer.h"
#include "tokenizer.h"
#include "pegen.h"
#include "parse_string.h"
#include "string_parser.h"
//// STRING HANDLING FUNCTIONS ////

View File

@ -486,7 +486,6 @@ static int test_init_from_config(void)
config.install_signal_handlers = 0;
putenv("PYTHONOLDPARSER=1");
config._use_peg_parser = 0;
/* FIXME: test use_environment */

File diff suppressed because it is too large Load Diff

View File

@ -390,21 +390,6 @@ PyAST_CompileEx(mod_ty mod, const char *filename_str, PyCompilerFlags *flags,
}
PyCodeObject *
PyNode_Compile(struct _node *n, const char *filename)
{
PyCodeObject *co = NULL;
mod_ty mod;
PyArena *arena = PyArena_New();
if (!arena)
return NULL;
mod = PyAST_FromNode(n, NULL, filename, arena);
if (mod)
co = PyAST_Compile(mod, filename, NULL, arena);
PyArena_Free(arena);
return co;
}
static void
compiler_free(struct compiler *c)
{

View File

@ -72,7 +72,6 @@ static const char usage_3[] = "\
-X opt : set implementation-specific option. The following options are available:\n\
\n\
-X faulthandler: enable faulthandler\n\
-X oldparser: enable the traditional LL(1) parser; also PYTHONOLDPARSER\n\
-X showrefcount: output the total reference count and number of used\n\
memory blocks when the program finishes or after each statement in the\n\
interactive interpreter. This only works on debug builds\n\
@ -640,7 +639,6 @@ _PyConfig_InitCompatConfig(PyConfig *config)
#ifdef MS_WINDOWS
config->legacy_windows_stdio = -1;
#endif
config->_use_peg_parser = 1;
}
@ -798,7 +796,6 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2)
COPY_ATTR(isolated);
COPY_ATTR(use_environment);
COPY_ATTR(dev_mode);
COPY_ATTR(_use_peg_parser);
COPY_ATTR(install_signal_handlers);
COPY_ATTR(use_hash_seed);
COPY_ATTR(hash_seed);
@ -905,7 +902,6 @@ config_as_dict(const PyConfig *config)
SET_ITEM_INT(isolated);
SET_ITEM_INT(use_environment);
SET_ITEM_INT(dev_mode);
SET_ITEM_INT(_use_peg_parser);
SET_ITEM_INT(install_signal_handlers);
SET_ITEM_INT(use_hash_seed);
SET_ITEM_UINT(hash_seed);
@ -1451,11 +1447,6 @@ config_read_complex_options(PyConfig *config)
config->import_time = 1;
}
if (config_get_env(config, "PYTHONOLDPARSER")
|| config_get_xoption(config, L"oldparser")) {
config->_use_peg_parser = 0;
}
PyStatus status;
if (config->tracemalloc < 0) {
status = config_init_tracemalloc(config);
@ -2549,7 +2540,6 @@ PyConfig_Read(PyConfig *config)
assert(config->isolated >= 0);
assert(config->use_environment >= 0);
assert(config->dev_mode >= 0);
assert(config->_use_peg_parser >= 0);
assert(config->install_signal_handlers >= 0);
assert(config->use_hash_seed >= 0);
assert(config->faulthandler >= 0);

View File

@ -18,7 +18,6 @@
#include "pycore_sysmodule.h" // _PySys_ClearAuditHooks()
#include "pycore_traceback.h" // _Py_DumpTracebackThreads()
#include "grammar.h" // PyGrammar_RemoveAccelerators()
#include <locale.h> // setlocale()
#ifdef HAVE_SIGNAL_H
@ -50,7 +49,6 @@ _Py_IDENTIFIER(threading);
extern "C" {
#endif
extern grammar _PyParser_Grammar; /* From graminit.c */
/* Forward declarations */
static PyStatus add_main_module(PyInterpreterState *interp);
@ -1301,7 +1299,6 @@ finalize_interp_clear(PyThreadState *tstate)
_PyWarnings_Fini(tstate->interp);
if (is_main_interp) {
PyGrammar_RemoveAccelerators(&_PyParser_Grammar);
_PyExc_Fini();
}

View File

@ -58,16 +58,12 @@ _Py_static_string(PyId_string, "<string>");
extern "C" {
#endif
extern Py_EXPORTED_SYMBOL grammar _PyParser_Grammar; /* From graminit.c */
/* Forward */
static void flush_io(void);
static PyObject *run_mod(mod_ty, PyObject *, PyObject *, PyObject *,
PyCompilerFlags *, PyArena *);
static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *,
PyCompilerFlags *);
static void err_input(perrdetail *);
static void err_free(perrdetail *);
static int PyRun_InteractiveOneObjectEx(FILE *, PyObject *, PyCompilerFlags *);
/* Parse input from a file and execute it */
@ -148,32 +144,6 @@ PyRun_InteractiveLoopFlags(FILE *fp, const char *filename_str, PyCompilerFlags *
return err;
}
/* compute parser flags based on compiler flags */
static int PARSER_FLAGS(PyCompilerFlags *flags)
{
int parser_flags = 0;
if (!flags)
return 0;
if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT)
parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
if (flags->cf_flags & PyCF_IGNORE_COOKIE)
parser_flags |= PyPARSE_IGNORE_COOKIE;
if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL)
parser_flags |= PyPARSE_BARRY_AS_BDFL;
if (flags->cf_flags & PyCF_TYPE_COMMENTS)
parser_flags |= PyPARSE_TYPE_COMMENTS;
return parser_flags;
}
#if 0
/* Keep an example of flags with future keyword support. */
#define PARSER_FLAGS(flags) \
((flags) ? ((((flags)->cf_flags & PyCF_DONT_IMPLY_DEDENT) ? \
PyPARSE_DONT_IMPLY_DEDENT : 0) \
| ((flags)->cf_flags & CO_FUTURE_WITH_STATEMENT ? \
PyPARSE_WITH_IS_KEYWORD : 0)) : 0)
#endif
/* A PyRun_InteractiveOneObject() auxiliary function that does not print the
* error on failure. */
static int
@ -185,7 +155,6 @@ PyRun_InteractiveOneObjectEx(FILE *fp, PyObject *filename,
PyArena *arena;
const char *ps1 = "", *ps2 = "", *enc = NULL;
int errcode = 0;
int use_peg = _PyInterpreterState_GET()->config._use_peg_parser;
_Py_IDENTIFIER(encoding);
_Py_IDENTIFIER(__main__);
@ -239,15 +208,8 @@ PyRun_InteractiveOneObjectEx(FILE *fp, PyObject *filename,
return -1;
}
if (use_peg) {
mod = PyPegen_ASTFromFileObject(fp, filename, Py_single_input,
enc, ps1, ps2, flags, &errcode, arena);
}
else {
mod = PyParser_ASTFromFileObject(fp, filename, enc,
Py_single_input, ps1, ps2,
flags, &errcode, arena);
}
mod = PyPegen_ASTFromFileObject(fp, filename, Py_single_input,
enc, ps1, ps2, flags, &errcode, arena);
Py_XDECREF(v);
Py_XDECREF(w);
@ -1058,7 +1020,6 @@ PyRun_StringFlags(const char *str, int start, PyObject *globals,
mod_ty mod;
PyArena *arena;
PyObject *filename;
int use_peg = _PyInterpreterState_GET()->config._use_peg_parser;
filename = _PyUnicode_FromId(&PyId_string); /* borrowed */
if (filename == NULL)
@ -1068,12 +1029,7 @@ PyRun_StringFlags(const char *str, int start, PyObject *globals,
if (arena == NULL)
return NULL;
if (use_peg) {
mod = PyPegen_ASTFromStringObject(str, filename, start, flags, arena);
}
else {
mod = PyParser_ASTFromStringObject(str, filename, start, flags, arena);
}
mod = PyPegen_ASTFromStringObject(str, filename, start, flags, arena);
if (mod != NULL)
ret = run_mod(mod, filename, globals, locals, flags, arena);
@ -1089,7 +1045,6 @@ PyRun_FileExFlags(FILE *fp, const char *filename_str, int start, PyObject *globa
mod_ty mod;
PyArena *arena = NULL;
PyObject *filename;
int use_peg = _PyInterpreterState_GET()->config._use_peg_parser;
filename = PyUnicode_DecodeFSDefault(filename_str);
if (filename == NULL)
@ -1099,14 +1054,8 @@ PyRun_FileExFlags(FILE *fp, const char *filename_str, int start, PyObject *globa
if (arena == NULL)
goto exit;
if (use_peg) {
mod = PyPegen_ASTFromFileObject(fp, filename, start, NULL, NULL, NULL,
mod = PyPegen_ASTFromFileObject(fp, filename, start, NULL, NULL, NULL,
flags, NULL, arena);
}
else {
mod = PyParser_ASTFromFileObject(fp, filename, NULL, start, 0, 0,
flags, NULL, arena);
}
if (closeit)
fclose(fp);
@ -1250,17 +1199,11 @@ Py_CompileStringObject(const char *str, PyObject *filename, int start,
{
PyCodeObject *co;
mod_ty mod;
int use_peg = _PyInterpreterState_GET()->config._use_peg_parser;
PyArena *arena = PyArena_New();
if (arena == NULL)
return NULL;
if (use_peg) {
mod = PyPegen_ASTFromStringObject(str, filename, start, flags, arena);
}
else {
mod = PyParser_ASTFromStringObject(str, filename, start, flags, arena);
}
mod = PyPegen_ASTFromStringObject(str, filename, start, flags, arena);
if (mod == NULL) {
PyArena_Free(arena);
return NULL;
@ -1357,19 +1300,13 @@ _Py_SymtableStringObjectFlags(const char *str, PyObject *filename, int start, Py
{
struct symtable *st;
mod_ty mod;
int use_peg = _PyInterpreterState_GET()->config._use_peg_parser;
PyArena *arena;
arena = PyArena_New();
if (arena == NULL)
return NULL;
if (use_peg) {
mod = PyPegen_ASTFromStringObject(str, filename, start, flags, arena);
}
else {
mod = PyParser_ASTFromStringObject(str, filename, start, flags, arena);
}
mod = PyPegen_ASTFromStringObject(str, filename, start, flags, arena);
if (mod == NULL) {
PyArena_Free(arena);
return NULL;
@ -1393,291 +1330,6 @@ Py_SymtableString(const char *str, const char *filename_str, int start)
return st;
}
/* Preferred access to parser is through AST. */
mod_ty
PyParser_ASTFromStringObject(const char *s, PyObject *filename, int start,
PyCompilerFlags *flags, PyArena *arena)
{
mod_ty mod;
PyCompilerFlags localflags = _PyCompilerFlags_INIT;
perrdetail err;
int iflags = PARSER_FLAGS(flags);
if (flags && flags->cf_feature_version < 7)
iflags |= PyPARSE_ASYNC_HACKS;
node *n = PyParser_ParseStringObject(s, filename,
&_PyParser_Grammar, start, &err,
&iflags);
if (flags == NULL) {
flags = &localflags;
}
if (n) {
flags->cf_flags |= iflags & PyCF_MASK;
mod = PyAST_FromNodeObject(n, flags, filename, arena);
PyNode_Free(n);
}
else {
err_input(&err);
mod = NULL;
}
err_free(&err);
return mod;
}
mod_ty
PyParser_ASTFromString(const char *s, const char *filename_str, int start,
PyCompilerFlags *flags, PyArena *arena)
{
PyObject *filename;
mod_ty mod;
filename = PyUnicode_DecodeFSDefault(filename_str);
if (filename == NULL)
return NULL;
mod = PyParser_ASTFromStringObject(s, filename, start, flags, arena);
Py_DECREF(filename);
return mod;
}
mod_ty
PyParser_ASTFromFileObject(FILE *fp, PyObject *filename, const char* enc,
int start, const char *ps1,
const char *ps2, PyCompilerFlags *flags, int *errcode,
PyArena *arena)
{
mod_ty mod;
PyCompilerFlags localflags = _PyCompilerFlags_INIT;
perrdetail err;
int iflags = PARSER_FLAGS(flags);
node *n = PyParser_ParseFileObject(fp, filename, enc,
&_PyParser_Grammar,
start, ps1, ps2, &err, &iflags);
if (flags == NULL) {
flags = &localflags;
}
if (n) {
flags->cf_flags |= iflags & PyCF_MASK;
mod = PyAST_FromNodeObject(n, flags, filename, arena);
PyNode_Free(n);
}
else {
err_input(&err);
if (errcode)
*errcode = err.error;
mod = NULL;
}
err_free(&err);
return mod;
}
mod_ty
PyParser_ASTFromFile(FILE *fp, const char *filename_str, const char* enc,
int start, const char *ps1,
const char *ps2, PyCompilerFlags *flags, int *errcode,
PyArena *arena)
{
mod_ty mod;
PyObject *filename;
filename = PyUnicode_DecodeFSDefault(filename_str);
if (filename == NULL)
return NULL;
mod = PyParser_ASTFromFileObject(fp, filename, enc, start, ps1, ps2,
flags, errcode, arena);
Py_DECREF(filename);
return mod;
}
/* Simplified interface to parsefile -- return node or set exception */
node *
PyParser_SimpleParseFileFlags(FILE *fp, const char *filename, int start, int flags)
{
perrdetail err;
node *n = PyParser_ParseFileFlags(fp, filename, NULL,
&_PyParser_Grammar,
start, NULL, NULL, &err, flags);
if (n == NULL)
err_input(&err);
err_free(&err);
return n;
}
/* Simplified interface to parsestring -- return node or set exception */
node *
PyParser_SimpleParseStringFlags(const char *str, int start, int flags)
{
perrdetail err;
node *n = PyParser_ParseStringFlags(str, &_PyParser_Grammar,
start, &err, flags);
if (n == NULL)
err_input(&err);
err_free(&err);
return n;
}
node *
PyParser_SimpleParseStringFlagsFilename(const char *str, const char *filename,
int start, int flags)
{
perrdetail err;
node *n = PyParser_ParseStringFlagsFilename(str, filename,
&_PyParser_Grammar, start, &err, flags);
if (n == NULL)
err_input(&err);
err_free(&err);
return n;
}
/* May want to move a more generalized form of this to parsetok.c or
even parser modules. */
void
PyParser_ClearError(perrdetail *err)
{
err_free(err);
}
void
PyParser_SetError(perrdetail *err)
{
err_input(err);
}
static void
err_free(perrdetail *err)
{
Py_CLEAR(err->filename);
}
/* Set the error appropriate to the given input error code (see errcode.h) */
static void
err_input(perrdetail *err)
{
PyObject *v, *w, *errtype, *errtext;
PyObject *msg_obj = NULL;
const char *msg = NULL;
int offset = err->offset;
errtype = PyExc_SyntaxError;
switch (err->error) {
case E_ERROR:
goto cleanup;
case E_SYNTAX:
errtype = PyExc_IndentationError;
if (err->expected == INDENT)
msg = "expected an indented block";
else if (err->token == INDENT)
msg = "unexpected indent";
else if (err->token == DEDENT)
msg = "unexpected unindent";
else if (err->expected == NOTEQUAL) {
errtype = PyExc_SyntaxError;
msg = "with Barry as BDFL, use '<>' instead of '!='";
}
else {
errtype = PyExc_SyntaxError;
msg = "invalid syntax";
}
break;
case E_TOKEN:
msg = "invalid token";
break;
case E_EOFS:
msg = "EOF while scanning triple-quoted string literal";
break;
case E_EOLS:
msg = "EOL while scanning string literal";
break;
case E_INTR:
if (!PyErr_Occurred())
PyErr_SetNone(PyExc_KeyboardInterrupt);
goto cleanup;
case E_NOMEM:
PyErr_NoMemory();
goto cleanup;
case E_EOF:
msg = "unexpected EOF while parsing";
break;
case E_TABSPACE:
errtype = PyExc_TabError;
msg = "inconsistent use of tabs and spaces in indentation";
break;
case E_OVERFLOW:
msg = "expression too long";
break;
case E_DEDENT:
errtype = PyExc_IndentationError;
msg = "unindent does not match any outer indentation level";
break;
case E_TOODEEP:
errtype = PyExc_IndentationError;
msg = "too many levels of indentation";
break;
case E_DECODE: {
PyObject *type, *value, *tb;
PyErr_Fetch(&type, &value, &tb);
msg = "unknown decode error";
if (value != NULL)
msg_obj = PyObject_Str(value);
Py_XDECREF(type);
Py_XDECREF(value);
Py_XDECREF(tb);
break;
}
case E_LINECONT:
msg = "unexpected character after line continuation character";
break;
case E_BADSINGLE:
msg = "multiple statements found while compiling a single statement";
break;
default:
fprintf(stderr, "error=%d\n", err->error);
msg = "unknown parsing error";
break;
}
/* err->text may not be UTF-8 in case of decoding errors.
Explicitly convert to an object. */
if (!err->text) {
errtext = Py_None;
Py_INCREF(Py_None);
} else {
errtext = PyUnicode_DecodeUTF8(err->text, err->offset,
"replace");
if (errtext != NULL) {
Py_ssize_t len = strlen(err->text);
offset = (int)PyUnicode_GET_LENGTH(errtext);
if (len != err->offset) {
Py_DECREF(errtext);
errtext = PyUnicode_DecodeUTF8(err->text, len,
"replace");
}
}
}
v = Py_BuildValue("(OiiN)", err->filename,
err->lineno, offset, errtext);
if (v != NULL) {
if (msg_obj)
w = Py_BuildValue("(OO)", msg_obj, v);
else
w = Py_BuildValue("(sO)", msg, v);
} else
w = NULL;
Py_XDECREF(v);
PyErr_SetObject(errtype, w);
Py_XDECREF(w);
cleanup:
Py_XDECREF(msg_obj);
if (err->text != NULL) {
PyObject_FREE(err->text);
err->text = NULL;
}
}
#if defined(USE_STACKCHECK)
#if defined(WIN32) && defined(_MSC_VER)
@ -1715,123 +1367,6 @@ PyOS_CheckStack(void)
#endif /* USE_STACKCHECK */
/* Deprecated C API functions still provided for binary compatibility */
#undef PyParser_SimpleParseFile
PyAPI_FUNC(node *)
PyParser_SimpleParseFile(FILE *fp, const char *filename, int start)
{
return PyParser_SimpleParseFileFlags(fp, filename, start, 0);
}
#undef PyParser_SimpleParseString
PyAPI_FUNC(node *)
PyParser_SimpleParseString(const char *str, int start)
{
return PyParser_SimpleParseStringFlags(str, start, 0);
}
#undef PyRun_AnyFile
PyAPI_FUNC(int)
PyRun_AnyFile(FILE *fp, const char *name)
{
return PyRun_AnyFileExFlags(fp, name, 0, NULL);
}
#undef PyRun_AnyFileEx
PyAPI_FUNC(int)
PyRun_AnyFileEx(FILE *fp, const char *name, int closeit)
{
return PyRun_AnyFileExFlags(fp, name, closeit, NULL);
}
#undef PyRun_AnyFileFlags
PyAPI_FUNC(int)
PyRun_AnyFileFlags(FILE *fp, const char *name, PyCompilerFlags *flags)
{
return PyRun_AnyFileExFlags(fp, name, 0, flags);
}
#undef PyRun_File
PyAPI_FUNC(PyObject *)
PyRun_File(FILE *fp, const char *p, int s, PyObject *g, PyObject *l)
{
return PyRun_FileExFlags(fp, p, s, g, l, 0, NULL);
}
#undef PyRun_FileEx
PyAPI_FUNC(PyObject *)
PyRun_FileEx(FILE *fp, const char *p, int s, PyObject *g, PyObject *l, int c)
{
return PyRun_FileExFlags(fp, p, s, g, l, c, NULL);
}
#undef PyRun_FileFlags
PyAPI_FUNC(PyObject *)
PyRun_FileFlags(FILE *fp, const char *p, int s, PyObject *g, PyObject *l,
PyCompilerFlags *flags)
{
return PyRun_FileExFlags(fp, p, s, g, l, 0, flags);
}
#undef PyRun_SimpleFile
PyAPI_FUNC(int)
PyRun_SimpleFile(FILE *f, const char *p)
{
return PyRun_SimpleFileExFlags(f, p, 0, NULL);
}
#undef PyRun_SimpleFileEx
PyAPI_FUNC(int)
PyRun_SimpleFileEx(FILE *f, const char *p, int c)
{
return PyRun_SimpleFileExFlags(f, p, c, NULL);
}
#undef PyRun_String
PyAPI_FUNC(PyObject *)
PyRun_String(const char *str, int s, PyObject *g, PyObject *l)
{
return PyRun_StringFlags(str, s, g, l, NULL);
}
#undef PyRun_SimpleString
PyAPI_FUNC(int)
PyRun_SimpleString(const char *s)
{
return PyRun_SimpleStringFlags(s, NULL);
}
#undef Py_CompileString
PyAPI_FUNC(PyObject *)
Py_CompileString(const char *str, const char *p, int s)
{
return Py_CompileStringExFlags(str, p, s, NULL, -1);
}
#undef Py_CompileStringFlags
PyAPI_FUNC(PyObject *)
Py_CompileStringFlags(const char *str, const char *p, int s,
PyCompilerFlags *flags)
{
return Py_CompileStringExFlags(str, p, s, flags, -1);
}
#undef PyRun_InteractiveOne
PyAPI_FUNC(int)
PyRun_InteractiveOne(FILE *f, const char *p)
{
return PyRun_InteractiveOneFlags(f, p, NULL);
}
#undef PyRun_InteractiveLoop
PyAPI_FUNC(int)
PyRun_InteractiveLoop(FILE *f, const char *p)
{
return PyRun_InteractiveLoopFlags(f, p, NULL);
}
#ifdef __cplusplus
}
#endif

View File

@ -66,15 +66,14 @@ def compile_c_extension(
str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen" / "pegen.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen" / "parse_string.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
generated_source_path,
],
include_dirs=[
str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
str(MOD_DIR.parent.parent.parent / "Parser"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen"),
],
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,

View File

@ -5438,7 +5438,7 @@ do
done
AC_SUBST(SRCDIRS)
SRCDIRS="Parser Parser/pegen Objects Python Modules Modules/_io Programs"
SRCDIRS="Parser Objects Python Modules Modules/_io Programs"
AC_MSG_CHECKING(for build directories)
for dir in $SRCDIRS; do
if test ! -d $dir; then

View File

@ -917,9 +917,6 @@ class PyBuildExt(build_ext):
# select(2); not on ancient System V
self.add(Extension('select', ['selectmodule.c']))
# Fred Drake's interface to the Python parser
self.add(Extension('parser', ['parsermodule.c']))
# Memory-mapped files (also works on Win32).
self.add(Extension('mmap', ['mmapmodule.c']))