Tim Peters: Taught it more "real Python" rules without slowing it

appreciably.  Triple-quoted strings no longer confuse it, nor nested
classes or defs, nor comments starting in column 1.  Chews thru
Tkinter.py in < 3 seconds for me; doctest.py no longer confuses it; no
longer missing methods in PyShell.py; etc.  Also captures defs
starting in column 1 now, but ignores them; an interface should be
added so that IDLE's class browser can show the top-level functions
too.
This commit is contained in:
Guido van Rossum 1999-06-08 12:53:21 +00:00
parent 9a33707be7
commit df9f7a3e52
1 changed files with 53 additions and 47 deletions

View File

@ -29,10 +29,14 @@ are recognized and imported modules are scanned as well, this
shouldn't happen often.
BUGS
Continuation lines are not dealt with at all and strings may confuse
the hell out of the parser, but it usually works.
Nested classes are not recognized.
Nested defs may be mistaken for class methods.''' # ' <-- bow to font lock
Continuation lines are not dealt with at all.
While triple-quoted strings won't confuse it, lines that look like
def, class, import or "from ... import" stmts inside backslash-continued
single-quoted strings are treated like code. The expense of stopping
that isn't worth it.
Code that doesn't pass tabnanny or python -t will confuse it, unless
you set the module TABWIDTH vrbl (default 8) to the correct tab width
for the file.''' # ' <-- bow to font lock
import os
import sys
@ -40,39 +44,35 @@ import imp
import re
import string
TABWIDTH = 8
_getnext = re.compile(r"""
## String slows it down by more than a factor of 2 (not because the
## string regexp is slow, but because there are often a lot of strings,
## which means the regexp has to get called that many more times).
## (?P<String>
## " [^"\\\n]* (?: \\. [^"\\\n]* )* "
##
## | ' [^'\\\n]* (?: \\. [^'\\\n]* )* '
##
## | \""" [^"\\]* (?:
## (?: \\. | "(?!"") )
## [^"\\]*
## )*
## \"""
##
## | ''' [^'\\]* (?:
## (?: \\. | '(?!'') )
## [^'\\]*
## )*
## '''
## )
##
##| (?P<Method>
(?P<Method>
# dicey trick: assume a def not at top level is a method
^ [ \t]+ def [ \t]+
(?P<String>
\""" [^"\\]* (?:
(?: \\. | "(?!"") )
[^"\\]*
)*
\"""
| ''' [^'\\]* (?:
(?: \\. | '(?!'') )
[^'\\]*
)*
'''
)
| (?P<Method>
^
(?P<MethodIndent> [ \t]* )
def [ \t]+
(?P<MethodName> [a-zA-Z_] \w* )
[ \t]* \(
)
| (?P<Class>
# lightly questionable: assume only top-level classes count
^ class [ \t]+
^
(?P<ClassIndent> [ \t]* )
class [ \t]+
(?P<ClassName> [a-zA-Z_] \w* )
[ \t]*
(?P<ClassSupers> \( [^)\n]* \) )?
@ -96,11 +96,6 @@ _getnext = re.compile(r"""
import [ \t]+
(?P<ImportFromList> [^#;\n]+ )
)
| (?P<AtTopLevel>
# cheap trick: anything other than ws in first column
^ \S
)
""", re.VERBOSE | re.DOTALL | re.MULTILINE).search
_modules = {} # cache of modules we've seen
@ -169,10 +164,10 @@ def readmodule(module, path=[], inpackage=0):
_modules[module] = dict
return dict
cur_class = None
dict = {}
_modules[module] = dict
imports = []
classstack = [] # stack of (class, indent) pairs
src = f.read()
f.close()
@ -191,26 +186,33 @@ def readmodule(module, path=[], inpackage=0):
break
start, i = m.span()
if m.start("AtTopLevel") >= 0:
# end of class definition
cur_class = None
## elif m.start("String") >= 0:
## pass
elif m.start("Method") >= 0:
# found a method definition
if cur_class:
if m.start("Method") >= 0:
# found a method definition or function
thisindent = _indent(m.group("MethodIndent"))
# close all classes indented at least as much
while classstack and \
classstack[-1][1] >= thisindent:
del classstack[-1]
if classstack:
# and we know the class it belongs to
meth_name = m.group("MethodName")
lineno = lineno + \
countnl(src, '\n',
last_lineno_pos, start)
last_lineno_pos = start
cur_class = classstack[-1][0]
cur_class._addmethod(meth_name, lineno)
elif m.start("String") >= 0:
pass
elif m.start("Class") >= 0:
# we found a class definition
thisindent = _indent(m.group("ClassIndent"))
# close all classes indented at least as much
while classstack and \
classstack[-1][1] >= thisindent:
del classstack[-1]
lineno = lineno + \
countnl(src, '\n', last_lineno_pos, start)
last_lineno_pos = start
@ -245,6 +247,7 @@ def readmodule(module, path=[], inpackage=0):
cur_class = Class(module, class_name, inherit,
file, lineno)
dict[class_name] = cur_class
classstack.append((cur_class, thisindent))
elif m.start("Import") >= 0:
# import module
@ -287,3 +290,6 @@ def readmodule(module, path=[], inpackage=0):
assert 0, "regexp _getnext found something unexpected"
return dict
def _indent(ws, _expandtabs=string.expandtabs):
return len(_expandtabs(ws, TABWIDTH))