bpo-39411: pyclbr rewrite on AST (#18103)

- Rewrite pyclbr using an AST processor
- Add is_async to the pyclbr.Function
This commit is contained in:
Batuhan Taskaya 2020-11-11 10:14:12 +03:00 committed by GitHub
parent 95ce7cd0a6
commit fa476fe132
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 112 additions and 205 deletions

View File

@ -97,6 +97,13 @@ statements. They have the following attributes:
.. versionadded:: 3.7 .. versionadded:: 3.7
.. attribute:: Function.is_async
``True`` for functions that are defined with the ``async`` prefix, ``False`` otherwise.
.. versionadded:: 3.10
.. _pyclbr-class-objects: .. _pyclbr-class-objects:
Class Objects Class Objects

View File

@ -25,7 +25,9 @@ has the following attributes:
children -- nested objects contained in this object. children -- nested objects contained in this object.
The 'children' attribute is a dictionary mapping names to objects. The 'children' attribute is a dictionary mapping names to objects.
Instances of Function describe functions with the attributes from _Object. Instances of Function describe functions with the attributes from _Object,
plus the following:
is_async -- if a function is defined with an 'async' prefix
Instances of Class describe classes with the attributes from _Object, Instances of Class describe classes with the attributes from _Object,
plus the following: plus the following:
@ -38,11 +40,10 @@ are recognized and imported modules are scanned as well, this
shouldn't happen often. shouldn't happen often.
""" """
import io import ast
import copy
import sys import sys
import importlib.util import importlib.util
import tokenize
from token import NAME, DEDENT, OP
__all__ = ["readmodule", "readmodule_ex", "Class", "Function"] __all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
@ -58,41 +59,33 @@ class _Object:
self.lineno = lineno self.lineno = lineno
self.parent = parent self.parent = parent
self.children = {} self.children = {}
if parent is not None:
def _addchild(self, name, obj): parent.children[name] = self
self.children[name] = obj
class Function(_Object): class Function(_Object):
"Information about a Python function, including methods." "Information about a Python function, including methods."
def __init__(self, module, name, file, lineno, parent=None): def __init__(self, module, name, file, lineno, parent=None, is_async=False):
_Object.__init__(self, module, name, file, lineno, parent) super().__init__(module, name, file, lineno, parent)
self.is_async = is_async
if isinstance(parent, Class):
parent.methods[name] = lineno
class Class(_Object): class Class(_Object):
"Information about a Python class." "Information about a Python class."
def __init__(self, module, name, super, file, lineno, parent=None): def __init__(self, module, name, super_, file, lineno, parent=None):
_Object.__init__(self, module, name, file, lineno, parent) super().__init__(module, name, file, lineno, parent)
self.super = [] if super is None else super self.super = super_ or []
self.methods = {} self.methods = {}
def _addmethod(self, name, lineno): # These 2 functions are used in these tests
self.methods[name] = lineno # Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py
def _nest_function(ob, func_name, lineno, is_async=False):
def _nest_function(ob, func_name, lineno):
"Return a Function after nesting within ob." "Return a Function after nesting within ob."
newfunc = Function(ob.module, func_name, ob.file, lineno, ob) return Function(ob.module, func_name, ob.file, lineno, ob, is_async)
ob._addchild(func_name, newfunc)
if isinstance(ob, Class):
ob._addmethod(func_name, lineno)
return newfunc
def _nest_class(ob, class_name, lineno, super=None): def _nest_class(ob, class_name, lineno, super=None):
"Return a Class after nesting within ob." "Return a Class after nesting within ob."
newclass = Class(ob.module, class_name, super, ob.file, lineno, ob) return Class(ob.module, class_name, super, ob.file, lineno, ob)
ob._addchild(class_name, newclass)
return newclass
def readmodule(module, path=None): def readmodule(module, path=None):
"""Return Class objects for the top-level classes in module. """Return Class objects for the top-level classes in module.
@ -179,187 +172,95 @@ def _readmodule(module, path, inpackage=None):
return _create_tree(fullmodule, path, fname, source, tree, inpackage) return _create_tree(fullmodule, path, fname, source, tree, inpackage)
def _create_tree(fullmodule, path, fname, source, tree, inpackage): class _ModuleBrowser(ast.NodeVisitor):
"""Return the tree for a particular module. def __init__(self, module, path, file, tree, inpackage):
self.path = path
self.tree = tree
self.file = file
self.module = module
self.inpackage = inpackage
self.stack = []
fullmodule (full module name), inpackage+module, becomes o.module. def visit_ClassDef(self, node):
path is passed to recursive calls of _readmodule. bases = []
fname becomes o.file. for base in node.bases:
source is tokenized. Imports cause recursive calls to _readmodule. name = ast.unparse(base)
tree is {} or {'__path__': <submodule search locations>}. if name in self.tree:
inpackage, None or string, is passed to recursive calls of _readmodule.
The effect of recursive calls is mutation of global _modules.
"""
f = io.StringIO(source)
stack = [] # Initialize stack of (class, indent) pairs.
g = tokenize.generate_tokens(f.readline)
try:
for tokentype, token, start, _end, _line in g:
if tokentype == DEDENT:
lineno, thisindent = start
# Close previous nested classes and defs.
while stack and stack[-1][1] >= thisindent:
del stack[-1]
elif token == 'def':
lineno, thisindent = start
# Close previous nested classes and defs.
while stack and stack[-1][1] >= thisindent:
del stack[-1]
tokentype, func_name, start = next(g)[0:3]
if tokentype != NAME:
continue # Skip def with syntax error.
cur_func = None
if stack:
cur_obj = stack[-1][0]
cur_func = _nest_function(cur_obj, func_name, lineno)
else:
# It is just a function.
cur_func = Function(fullmodule, func_name, fname, lineno)
tree[func_name] = cur_func
stack.append((cur_func, thisindent))
elif token == 'class':
lineno, thisindent = start
# Close previous nested classes and defs.
while stack and stack[-1][1] >= thisindent:
del stack[-1]
tokentype, class_name, start = next(g)[0:3]
if tokentype != NAME:
continue # Skip class with syntax error.
# Parse what follows the class name.
tokentype, token, start = next(g)[0:3]
inherit = None
if token == '(':
names = [] # Initialize list of superclasses.
level = 1
super = [] # Tokens making up current superclass.
while True:
tokentype, token, start = next(g)[0:3]
if token in (')', ',') and level == 1:
n = "".join(super)
if n in tree:
# We know this super class. # We know this super class.
n = tree[n] bases.append(self.tree[name])
else: elif len(names := name.split(".")) > 1:
c = n.split('.')
if len(c) > 1:
# Super class form is module.class: # Super class form is module.class:
# look in module for class. # look in module for class.
m = c[-2] *_, module, class_ = names
c = c[-1] if module in _modules:
if m in _modules: bases.append(_modules[module].get(class_, name))
d = _modules[m]
if c in d:
n = d[c]
names.append(n)
super = []
if token == '(':
level += 1
elif token == ')':
level -= 1
if level == 0:
break
elif token == ',' and level == 1:
pass
# Only use NAME and OP (== dot) tokens for type name.
elif tokentype in (NAME, OP) and level == 1:
super.append(token)
# Expressions in the base list are not supported.
inherit = names
if stack:
cur_obj = stack[-1][0]
cur_class = _nest_class(
cur_obj, class_name, lineno, inherit)
else: else:
cur_class = Class(fullmodule, class_name, inherit, bases.append(name)
fname, lineno)
tree[class_name] = cur_class parent = self.stack[-1] if self.stack else None
stack.append((cur_class, thisindent)) class_ = Class(
elif token == 'import' and start[1] == 0: self.module, node.name, bases, self.file, node.lineno, parent
modules = _getnamelist(g) )
for mod, _mod2 in modules: if parent is None:
self.tree[node.name] = class_
self.stack.append(class_)
self.generic_visit(node)
self.stack.pop()
def visit_FunctionDef(self, node, *, is_async=False):
parent = self.stack[-1] if self.stack else None
function = Function(
self.module, node.name, self.file, node.lineno, parent, is_async
)
if parent is None:
self.tree[node.name] = function
self.stack.append(function)
self.generic_visit(node)
self.stack.pop()
def visit_AsyncFunctionDef(self, node):
self.visit_FunctionDef(node, is_async=True)
def visit_Import(self, node):
if node.col_offset != 0:
return
for module in node.names:
try: try:
# Recursively read the imported module.
if inpackage is None:
_readmodule(mod, path)
else:
try: try:
_readmodule(mod, path, inpackage) _readmodule(module.name, self.path, self.inpackage)
except ImportError: except ImportError:
_readmodule(mod, []) _readmodule(module.name, [])
except: except (ImportError, SyntaxError):
# If we can't find or parse the imported module, # If we can't find or parse the imported module,
# too bad -- don't die here. # too bad -- don't die here.
pass
elif token == 'from' and start[1] == 0:
mod, token = _getname(g)
if not mod or token != "import":
continue continue
names = _getnamelist(g)
def visit_ImportFrom(self, node):
if node.col_offset != 0:
return
try: try:
# Recursively read the imported module. module = "." * node.level
d = _readmodule(mod, path, inpackage) if node.module:
except: module += node.module
# If we can't find or parse the imported module, module = _readmodule(module, self.path, self.inpackage)
# too bad -- don't die here. except (ImportError, SyntaxError):
return
for name in node.names:
if name.name in module:
self.tree[name.asname or name.name] = module[name.name]
elif name.name == "*":
for import_name, import_value in module.items():
if import_name.startswith("_"):
continue continue
# Add any classes that were defined in the imported module self.tree[import_name] = import_value
# to our name space if they were mentioned in the list.
for n, n2 in names:
if n in d:
tree[n2 or n] = d[n]
elif n == '*':
# Don't add names that start with _.
for n in d:
if n[0] != '_':
tree[n] = d[n]
except StopIteration:
pass
f.close()
return tree
def _getnamelist(g): def _create_tree(fullmodule, path, fname, source, tree, inpackage):
"""Return list of (dotted-name, as-name or None) tuples for token source g. mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage)
mbrowser.visit(ast.parse(source))
An as-name is the name that follows 'as' in an as clause. return mbrowser.tree
"""
names = []
while True:
name, token = _getname(g)
if not name:
break
if token == 'as':
name2, token = _getname(g)
else:
name2 = None
names.append((name, name2))
while token != "," and "\n" not in token:
token = next(g)[1]
if token != ",":
break
return names
def _getname(g):
"Return (dotted-name or None, next-token) tuple for token source g."
parts = []
tokentype, token = next(g)[0:2]
if tokentype != NAME and token != '*':
return (None, token)
parts.append(token)
while True:
tokentype, token = next(g)[0:2]
if token != '.':
break
tokentype, token = next(g)[0:2]
if tokentype != NAME:
break
parts.append(token)
return (".".join(parts), token)
def _main(): def _main():

View File

@ -150,9 +150,6 @@ class PyclbrTest(TestCase):
self.checkModule('difflib', ignore=("Match",)) self.checkModule('difflib', ignore=("Match",))
def test_decorators(self): def test_decorators(self):
# XXX: See comment in pyclbr_input.py for a test that would fail
# if it were not commented out.
#
self.checkModule('test.pyclbr_input', ignore=['om']) self.checkModule('test.pyclbr_input', ignore=['om'])
def test_nested(self): def test_nested(self):
@ -160,7 +157,7 @@ class PyclbrTest(TestCase):
# Set arguments for descriptor creation and _creat_tree call. # Set arguments for descriptor creation and _creat_tree call.
m, p, f, t, i = 'test', '', 'test.py', {}, None m, p, f, t, i = 'test', '', 'test.py', {}, None
source = dedent("""\ source = dedent("""\
def f0: def f0():
def f1(a,b,c): def f1(a,b,c):
def f2(a=1, b=2, c=3): pass def f2(a=1, b=2, c=3): pass
return f1(a,b,d) return f1(a,b,d)

View File

@ -0,0 +1,2 @@
Add an ``is_async`` identifier to :mod:`pyclbr`'s ``Function`` objects.
Patch by Batuhan Taskaya