cpython/Lib/test/test_asdl_parser.py

126 lines
4.1 KiB
Python
Raw Normal View History

"""Tests for the asdl parser in Parser/asdl.py"""
import importlib.machinery
import os
from os.path import dirname
import sys
import sysconfig
import unittest
# This test is only relevant for from-source builds of Python.
if not sysconfig.is_python_build():
raise unittest.SkipTest('test irrelevant for an installed Python')
src_base = dirname(dirname(dirname(__file__)))
parser_dir = os.path.join(src_base, 'Parser')
class TestAsdlParser(unittest.TestCase):
@classmethod
def setUpClass(cls):
# Loads the asdl module dynamically, since it's not in a real importable
# package.
# Parses Python.asdl into an ast.Module and run the check on it.
# There's no need to do this for each test method, hence setUpClass.
sys.path.insert(0, parser_dir)
loader = importlib.machinery.SourceFileLoader(
'asdl', os.path.join(parser_dir, 'asdl.py'))
cls.asdl = loader.load_module()
cls.mod = cls.asdl.parse(os.path.join(parser_dir, 'Python.asdl'))
cls.assertTrue(cls.asdl.check(cls.mod), 'Module validation failed')
@classmethod
def tearDownClass(cls):
del sys.path[0]
def setUp(self):
# alias stuff from the class, for convenience
self.asdl = TestAsdlParser.asdl
self.mod = TestAsdlParser.mod
self.types = self.mod.types
def test_module(self):
self.assertEqual(self.mod.name, 'Python')
self.assertIn('stmt', self.types)
self.assertIn('expr', self.types)
self.assertIn('mod', self.types)
def test_definitions(self):
defs = self.mod.dfns
self.assertIsInstance(defs[0], self.asdl.Type)
self.assertIsInstance(defs[0].value, self.asdl.Sum)
self.assertIsInstance(self.types['withitem'], self.asdl.Product)
self.assertIsInstance(self.types['alias'], self.asdl.Product)
def test_product(self):
alias = self.types['alias']
self.assertEqual(
str(alias),
'Product([Field(identifier, name), Field(identifier, asname, opt=True)])')
def test_attributes(self):
stmt = self.types['stmt']
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 07:18:22 -04:00
self.assertEqual(len(stmt.attributes), 4)
self.assertEqual(str(stmt.attributes[0]), 'Field(int, lineno)')
self.assertEqual(str(stmt.attributes[1]), 'Field(int, col_offset)')
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 07:18:22 -04:00
self.assertEqual(str(stmt.attributes[2]), 'Field(int, end_lineno, opt=True)')
self.assertEqual(str(stmt.attributes[3]), 'Field(int, end_col_offset, opt=True)')
def test_constructor_fields(self):
ehandler = self.types['excepthandler']
self.assertEqual(len(ehandler.types), 1)
bpo-33416: Add end positions to Python AST (GH-11605) The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points: * It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`. * I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient. * Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear. * For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in ```python class C: pass pass ``` the end line and end column for the class definition is (2, 8). * For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node. * I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing. An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 07:18:22 -04:00
self.assertEqual(len(ehandler.attributes), 4)
cons = ehandler.types[0]
self.assertIsInstance(cons, self.asdl.Constructor)
self.assertEqual(len(cons.fields), 3)
f0 = cons.fields[0]
self.assertEqual(f0.type, 'expr')
self.assertEqual(f0.name, 'type')
self.assertTrue(f0.opt)
f1 = cons.fields[1]
self.assertEqual(f1.type, 'identifier')
self.assertEqual(f1.name, 'name')
self.assertTrue(f1.opt)
f2 = cons.fields[2]
self.assertEqual(f2.type, 'stmt')
self.assertEqual(f2.name, 'body')
self.assertFalse(f2.opt)
self.assertTrue(f2.seq)
def test_visitor(self):
class CustomVisitor(self.asdl.VisitorBase):
def __init__(self):
super().__init__()
self.names_with_seq = []
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type):
self.visit(type.value)
def visitSum(self, sum):
for t in sum.types:
self.visit(t)
def visitConstructor(self, cons):
for f in cons.fields:
if f.seq:
self.names_with_seq.append(cons.name)
v = CustomVisitor()
v.visit(self.types['mod'])
self.assertEqual(v.names_with_seq,
['Module', 'Module', 'Interactive', 'FunctionType', 'Suite'])
if __name__ == '__main__':
unittest.main()