Tim Peters: Taught it more "real Python" rules without slowing it
appreciably. Triple-quoted strings no longer confuse it, nor nested classes or defs, nor comments starting in column 1. Chews thru Tkinter.py in < 3 seconds for me; doctest.py no longer confuses it; no longer missing methods in PyShell.py; etc. Also captures defs starting in column 1 now, but ignores them; an interface should be added so that IDLE's class browser can show the top-level functions too.
This commit is contained in:
parent
9a33707be7
commit
df9f7a3e52
100
Lib/pyclbr.py
100
Lib/pyclbr.py
|
@ -29,10 +29,14 @@ are recognized and imported modules are scanned as well, this
|
||||||
shouldn't happen often.
|
shouldn't happen often.
|
||||||
|
|
||||||
BUGS
|
BUGS
|
||||||
Continuation lines are not dealt with at all and strings may confuse
|
Continuation lines are not dealt with at all.
|
||||||
the hell out of the parser, but it usually works.
|
While triple-quoted strings won't confuse it, lines that look like
|
||||||
Nested classes are not recognized.
|
def, class, import or "from ... import" stmts inside backslash-continued
|
||||||
Nested defs may be mistaken for class methods.''' # ' <-- bow to font lock
|
single-quoted strings are treated like code. The expense of stopping
|
||||||
|
that isn't worth it.
|
||||||
|
Code that doesn't pass tabnanny or python -t will confuse it, unless
|
||||||
|
you set the module TABWIDTH vrbl (default 8) to the correct tab width
|
||||||
|
for the file.''' # ' <-- bow to font lock
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
@ -40,39 +44,35 @@ import imp
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
|
|
||||||
|
TABWIDTH = 8
|
||||||
|
|
||||||
_getnext = re.compile(r"""
|
_getnext = re.compile(r"""
|
||||||
## String slows it down by more than a factor of 2 (not because the
|
(?P<String>
|
||||||
## string regexp is slow, but because there are often a lot of strings,
|
\""" [^"\\]* (?:
|
||||||
## which means the regexp has to get called that many more times).
|
(?: \\. | "(?!"") )
|
||||||
## (?P<String>
|
[^"\\]*
|
||||||
## " [^"\\\n]* (?: \\. [^"\\\n]* )* "
|
)*
|
||||||
##
|
\"""
|
||||||
## | ' [^'\\\n]* (?: \\. [^'\\\n]* )* '
|
|
||||||
##
|
| ''' [^'\\]* (?:
|
||||||
## | \""" [^"\\]* (?:
|
(?: \\. | '(?!'') )
|
||||||
## (?: \\. | "(?!"") )
|
[^'\\]*
|
||||||
## [^"\\]*
|
)*
|
||||||
## )*
|
'''
|
||||||
## \"""
|
)
|
||||||
##
|
|
||||||
## | ''' [^'\\]* (?:
|
| (?P<Method>
|
||||||
## (?: \\. | '(?!'') )
|
^
|
||||||
## [^'\\]*
|
(?P<MethodIndent> [ \t]* )
|
||||||
## )*
|
def [ \t]+
|
||||||
## '''
|
|
||||||
## )
|
|
||||||
##
|
|
||||||
##| (?P<Method>
|
|
||||||
(?P<Method>
|
|
||||||
# dicey trick: assume a def not at top level is a method
|
|
||||||
^ [ \t]+ def [ \t]+
|
|
||||||
(?P<MethodName> [a-zA-Z_] \w* )
|
(?P<MethodName> [a-zA-Z_] \w* )
|
||||||
[ \t]* \(
|
[ \t]* \(
|
||||||
)
|
)
|
||||||
|
|
||||||
| (?P<Class>
|
| (?P<Class>
|
||||||
# lightly questionable: assume only top-level classes count
|
^
|
||||||
^ class [ \t]+
|
(?P<ClassIndent> [ \t]* )
|
||||||
|
class [ \t]+
|
||||||
(?P<ClassName> [a-zA-Z_] \w* )
|
(?P<ClassName> [a-zA-Z_] \w* )
|
||||||
[ \t]*
|
[ \t]*
|
||||||
(?P<ClassSupers> \( [^)\n]* \) )?
|
(?P<ClassSupers> \( [^)\n]* \) )?
|
||||||
|
@ -96,11 +96,6 @@ _getnext = re.compile(r"""
|
||||||
import [ \t]+
|
import [ \t]+
|
||||||
(?P<ImportFromList> [^#;\n]+ )
|
(?P<ImportFromList> [^#;\n]+ )
|
||||||
)
|
)
|
||||||
|
|
||||||
| (?P<AtTopLevel>
|
|
||||||
# cheap trick: anything other than ws in first column
|
|
||||||
^ \S
|
|
||||||
)
|
|
||||||
""", re.VERBOSE | re.DOTALL | re.MULTILINE).search
|
""", re.VERBOSE | re.DOTALL | re.MULTILINE).search
|
||||||
|
|
||||||
_modules = {} # cache of modules we've seen
|
_modules = {} # cache of modules we've seen
|
||||||
|
@ -169,10 +164,10 @@ def readmodule(module, path=[], inpackage=0):
|
||||||
_modules[module] = dict
|
_modules[module] = dict
|
||||||
return dict
|
return dict
|
||||||
|
|
||||||
cur_class = None
|
|
||||||
dict = {}
|
dict = {}
|
||||||
_modules[module] = dict
|
_modules[module] = dict
|
||||||
imports = []
|
imports = []
|
||||||
|
classstack = [] # stack of (class, indent) pairs
|
||||||
src = f.read()
|
src = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
@ -191,26 +186,33 @@ def readmodule(module, path=[], inpackage=0):
|
||||||
break
|
break
|
||||||
start, i = m.span()
|
start, i = m.span()
|
||||||
|
|
||||||
if m.start("AtTopLevel") >= 0:
|
if m.start("Method") >= 0:
|
||||||
# end of class definition
|
# found a method definition or function
|
||||||
cur_class = None
|
thisindent = _indent(m.group("MethodIndent"))
|
||||||
|
# close all classes indented at least as much
|
||||||
## elif m.start("String") >= 0:
|
while classstack and \
|
||||||
## pass
|
classstack[-1][1] >= thisindent:
|
||||||
|
del classstack[-1]
|
||||||
elif m.start("Method") >= 0:
|
if classstack:
|
||||||
# found a method definition
|
|
||||||
if cur_class:
|
|
||||||
# and we know the class it belongs to
|
# and we know the class it belongs to
|
||||||
meth_name = m.group("MethodName")
|
meth_name = m.group("MethodName")
|
||||||
lineno = lineno + \
|
lineno = lineno + \
|
||||||
countnl(src, '\n',
|
countnl(src, '\n',
|
||||||
last_lineno_pos, start)
|
last_lineno_pos, start)
|
||||||
last_lineno_pos = start
|
last_lineno_pos = start
|
||||||
|
cur_class = classstack[-1][0]
|
||||||
cur_class._addmethod(meth_name, lineno)
|
cur_class._addmethod(meth_name, lineno)
|
||||||
|
|
||||||
|
elif m.start("String") >= 0:
|
||||||
|
pass
|
||||||
|
|
||||||
elif m.start("Class") >= 0:
|
elif m.start("Class") >= 0:
|
||||||
# we found a class definition
|
# we found a class definition
|
||||||
|
thisindent = _indent(m.group("ClassIndent"))
|
||||||
|
# close all classes indented at least as much
|
||||||
|
while classstack and \
|
||||||
|
classstack[-1][1] >= thisindent:
|
||||||
|
del classstack[-1]
|
||||||
lineno = lineno + \
|
lineno = lineno + \
|
||||||
countnl(src, '\n', last_lineno_pos, start)
|
countnl(src, '\n', last_lineno_pos, start)
|
||||||
last_lineno_pos = start
|
last_lineno_pos = start
|
||||||
|
@ -245,6 +247,7 @@ def readmodule(module, path=[], inpackage=0):
|
||||||
cur_class = Class(module, class_name, inherit,
|
cur_class = Class(module, class_name, inherit,
|
||||||
file, lineno)
|
file, lineno)
|
||||||
dict[class_name] = cur_class
|
dict[class_name] = cur_class
|
||||||
|
classstack.append((cur_class, thisindent))
|
||||||
|
|
||||||
elif m.start("Import") >= 0:
|
elif m.start("Import") >= 0:
|
||||||
# import module
|
# import module
|
||||||
|
@ -287,3 +290,6 @@ def readmodule(module, path=[], inpackage=0):
|
||||||
assert 0, "regexp _getnext found something unexpected"
|
assert 0, "regexp _getnext found something unexpected"
|
||||||
|
|
||||||
return dict
|
return dict
|
||||||
|
|
||||||
|
def _indent(ws, _expandtabs=string.expandtabs):
|
||||||
|
return len(_expandtabs(ws, TABWIDTH))
|
||||||
|
|
Loading…
Reference in New Issue