Tim's latest version (supports old and new tokenize modules)

This commit is contained in:
Guido van Rossum 1998-04-06 14:41:20 +00:00
parent 5d97ebf2ac
commit f4b44fa6ef
3 changed files with 360 additions and 207 deletions

View File

@ -1,29 +1,16 @@
#! /home/guido/python/src/sparc/python
#! /usr/bin/env python #! /usr/bin/env python
"""The Tab Nanny despises ambiguous indentation. She knows no mercy. """The Tab Nanny despises ambiguous indentation. She knows no mercy."""
CAUTION: this version requires Guido's "NL" patch to lib/tokenize.py, # Released to the public domain, by Tim Peters, 4 April 1998.
posted 30-Mar-98. This version will not run at all with an unpatched
tokenize (it will raise AttributeError while loading), while previous
versions will run incorrectly with the patched tokenize.
"""
# Released to the public domain, by Tim Peters, 30 March 1998. __version__ = "3"
__version__ = "2"
import os import os
import sys import sys
import getopt import getopt
import tokenize import tokenize
try:
tokenize.NL
except AttributeError:
raise AttributeError, "Sorry, I need a version of tokenize.py " \
"that supports the NL pseudo-token."
verbose = 0 verbose = 0
def main(): def main():
@ -235,67 +222,131 @@ def format_witnesses(w):
prefix = prefix + "s" prefix = prefix + "s"
return prefix + " " + string.join(firsts, ', ') return prefix + " " + string.join(firsts, ', ')
indents = [] # The collection of globals, the reset_globals() function, and the
check_equal = 0 # tokeneater() function, depend on which version of tokenize is
# in use.
def reset_globals(): if hasattr(tokenize, 'NL'):
global indents, check_equal # take advantage of Guido's patch!
check_equal = 0
indents = [Whitespace("")]
def tokeneater(type, token, start, end, line, indents = []
INDENT=tokenize.INDENT, check_equal = 0
DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
COMMENT=tokenize.COMMENT,
NL=tokenize.NL):
global indents, check_equal
# test in decreasing order of frequency, although the check_equal def reset_globals():
# test *must* be last; INDENT and DEDENT appear equally often global indents, check_equal
check_equal = 0
indents = [Whitespace("")]
if type in (COMMENT, NL): def tokeneater(type, token, start, end, line,
# the indentation of these guys is meaningless INDENT=tokenize.INDENT,
pass DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
COMMENT=tokenize.COMMENT,
NL=tokenize.NL):
global indents, check_equal
elif type == NEWLINE: # test in decreasing order of frequency, although the check_equal
# a program statement, or ENDMARKER, will eventually follow, # test *must* be last; INDENT and DEDENT appear equally often
# after some (possibly empty) run of tokens of the form
# (NL | COMMENT)* (INDENT | DEDENT+)?
# If an INDENT appears, setting check_equal is wrong, and will
# be undone when we see the INDENT.
check_equal = 1
elif type == INDENT: if type in (COMMENT, NL):
check_equal = 0 # the indentation of these guys is meaningless
thisguy = Whitespace(token) pass
if not indents[-1].less(thisguy):
witness = indents[-1].not_less_witness(thisguy)
msg = "indent not greater e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
indents.append(thisguy)
elif type == DEDENT: elif type == NEWLINE:
# there's nothing we need to check here! what's important is # a program statement, or ENDMARKER, will eventually follow,
# that when the run of DEDENTs ends, the indentation of the # after some (possibly empty) run of tokens of the form
# program statement (or ENDMARKER) that triggered the run is # (NL | COMMENT)* (INDENT | DEDENT+)?
# equal to what's left at the top of the indents stack # If an INDENT appears, setting check_equal is wrong, and will
assert check_equal # else no earlier NEWLINE, or an earlier INDENT # be undone when we see the INDENT.
del indents[-1] check_equal = 1
elif check_equal: elif type == INDENT:
# this is the first "real token" following a NEWLINE, so it check_equal = 0
# must be the first token of the next program statment, or an thisguy = Whitespace(token)
# ENDMARKER; the "line" argument exposes the leading whitespace if not indents[-1].less(thisguy):
# for this statement; in the case of ENDMARKER, line is an empty witness = indents[-1].not_less_witness(thisguy)
# string, so will properly match the empty string with which the msg = "indent not greater e.g. " + format_witnesses(witness)
# "indents" stack was seeded raise NannyNag(start[0], msg, line)
check_equal = 0 indents.append(thisguy)
thisguy = Whitespace(line)
if not indents[-1].equal(thisguy): elif type == DEDENT:
witness = indents[-1].not_equal_witness(thisguy) # there's nothing we need to check here! what's important is
msg = "indent not equal e.g. " + format_witnesses(witness) # that when the run of DEDENTs ends, the indentation of the
raise NannyNag(start[0], msg, line) # program statement (or ENDMARKER) that triggered the run is
# equal to what's left at the top of the indents stack
assert check_equal # else no earlier NEWLINE, or an earlier INDENT
del indents[-1]
elif check_equal:
# this is the first "real token" following a NEWLINE, so it
# must be the first token of the next program statement, or an
# ENDMARKER; the "line" argument exposes the leading whitespace
# for this statement; in the case of ENDMARKER, line is an empty
# string, so will properly match the empty string with which the
# "indents" stack was seeded
check_equal = 0
thisguy = Whitespace(line)
if not indents[-1].equal(thisguy):
witness = indents[-1].not_equal_witness(thisguy)
msg = "indent not equal e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
else:
# unpatched version of tokenize
nesting_level = 0
indents = []
check_equal = 0
def reset_globals():
global nesting_level, indents, check_equal
nesting_level = check_equal = 0
indents = [Whitespace("")]
def tokeneater(type, token, start, end, line,
INDENT=tokenize.INDENT,
DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
COMMENT=tokenize.COMMENT,
OP=tokenize.OP):
global nesting_level, indents, check_equal
if type == INDENT:
check_equal = 0
thisguy = Whitespace(token)
if not indents[-1].less(thisguy):
witness = indents[-1].not_less_witness(thisguy)
msg = "indent not greater e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
indents.append(thisguy)
elif type == DEDENT:
del indents[-1]
elif type == NEWLINE:
if nesting_level == 0:
check_equal = 1
elif type == COMMENT:
pass
elif check_equal:
check_equal = 0
thisguy = Whitespace(line)
if not indents[-1].equal(thisguy):
witness = indents[-1].not_equal_witness(thisguy)
msg = "indent not equal e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
if type == OP and token in ('{', '[', '('):
nesting_level = nesting_level + 1
elif type == OP and token in ('}', ']', ')'):
if nesting_level == 0:
raise NannyNag(start[0],
"unbalanced bracket '" + token + "'",
line)
nesting_level = nesting_level - 1
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -1,29 +1,16 @@
#! /home/guido/python/src/sparc/python
#! /usr/bin/env python #! /usr/bin/env python
"""The Tab Nanny despises ambiguous indentation. She knows no mercy. """The Tab Nanny despises ambiguous indentation. She knows no mercy."""
CAUTION: this version requires Guido's "NL" patch to lib/tokenize.py, # Released to the public domain, by Tim Peters, 4 April 1998.
posted 30-Mar-98. This version will not run at all with an unpatched
tokenize (it will raise AttributeError while loading), while previous
versions will run incorrectly with the patched tokenize.
"""
# Released to the public domain, by Tim Peters, 30 March 1998. __version__ = "3"
__version__ = "2"
import os import os
import sys import sys
import getopt import getopt
import tokenize import tokenize
try:
tokenize.NL
except AttributeError:
raise AttributeError, "Sorry, I need a version of tokenize.py " \
"that supports the NL pseudo-token."
verbose = 0 verbose = 0
def main(): def main():
@ -235,67 +222,131 @@ def format_witnesses(w):
prefix = prefix + "s" prefix = prefix + "s"
return prefix + " " + string.join(firsts, ', ') return prefix + " " + string.join(firsts, ', ')
indents = [] # The collection of globals, the reset_globals() function, and the
check_equal = 0 # tokeneater() function, depend on which version of tokenize is
# in use.
def reset_globals(): if hasattr(tokenize, 'NL'):
global indents, check_equal # take advantage of Guido's patch!
check_equal = 0
indents = [Whitespace("")]
def tokeneater(type, token, start, end, line, indents = []
INDENT=tokenize.INDENT, check_equal = 0
DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
COMMENT=tokenize.COMMENT,
NL=tokenize.NL):
global indents, check_equal
# test in decreasing order of frequency, although the check_equal def reset_globals():
# test *must* be last; INDENT and DEDENT appear equally often global indents, check_equal
check_equal = 0
indents = [Whitespace("")]
if type in (COMMENT, NL): def tokeneater(type, token, start, end, line,
# the indentation of these guys is meaningless INDENT=tokenize.INDENT,
pass DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
COMMENT=tokenize.COMMENT,
NL=tokenize.NL):
global indents, check_equal
elif type == NEWLINE: # test in decreasing order of frequency, although the check_equal
# a program statement, or ENDMARKER, will eventually follow, # test *must* be last; INDENT and DEDENT appear equally often
# after some (possibly empty) run of tokens of the form
# (NL | COMMENT)* (INDENT | DEDENT+)?
# If an INDENT appears, setting check_equal is wrong, and will
# be undone when we see the INDENT.
check_equal = 1
elif type == INDENT: if type in (COMMENT, NL):
check_equal = 0 # the indentation of these guys is meaningless
thisguy = Whitespace(token) pass
if not indents[-1].less(thisguy):
witness = indents[-1].not_less_witness(thisguy)
msg = "indent not greater e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
indents.append(thisguy)
elif type == DEDENT: elif type == NEWLINE:
# there's nothing we need to check here! what's important is # a program statement, or ENDMARKER, will eventually follow,
# that when the run of DEDENTs ends, the indentation of the # after some (possibly empty) run of tokens of the form
# program statement (or ENDMARKER) that triggered the run is # (NL | COMMENT)* (INDENT | DEDENT+)?
# equal to what's left at the top of the indents stack # If an INDENT appears, setting check_equal is wrong, and will
assert check_equal # else no earlier NEWLINE, or an earlier INDENT # be undone when we see the INDENT.
del indents[-1] check_equal = 1
elif check_equal: elif type == INDENT:
# this is the first "real token" following a NEWLINE, so it check_equal = 0
# must be the first token of the next program statment, or an thisguy = Whitespace(token)
# ENDMARKER; the "line" argument exposes the leading whitespace if not indents[-1].less(thisguy):
# for this statement; in the case of ENDMARKER, line is an empty witness = indents[-1].not_less_witness(thisguy)
# string, so will properly match the empty string with which the msg = "indent not greater e.g. " + format_witnesses(witness)
# "indents" stack was seeded raise NannyNag(start[0], msg, line)
check_equal = 0 indents.append(thisguy)
thisguy = Whitespace(line)
if not indents[-1].equal(thisguy): elif type == DEDENT:
witness = indents[-1].not_equal_witness(thisguy) # there's nothing we need to check here! what's important is
msg = "indent not equal e.g. " + format_witnesses(witness) # that when the run of DEDENTs ends, the indentation of the
raise NannyNag(start[0], msg, line) # program statement (or ENDMARKER) that triggered the run is
# equal to what's left at the top of the indents stack
assert check_equal # else no earlier NEWLINE, or an earlier INDENT
del indents[-1]
elif check_equal:
# this is the first "real token" following a NEWLINE, so it
# must be the first token of the next program statement, or an
# ENDMARKER; the "line" argument exposes the leading whitespace
# for this statement; in the case of ENDMARKER, line is an empty
# string, so will properly match the empty string with which the
# "indents" stack was seeded
check_equal = 0
thisguy = Whitespace(line)
if not indents[-1].equal(thisguy):
witness = indents[-1].not_equal_witness(thisguy)
msg = "indent not equal e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
else:
# unpatched version of tokenize
nesting_level = 0
indents = []
check_equal = 0
def reset_globals():
global nesting_level, indents, check_equal
nesting_level = check_equal = 0
indents = [Whitespace("")]
def tokeneater(type, token, start, end, line,
INDENT=tokenize.INDENT,
DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
COMMENT=tokenize.COMMENT,
OP=tokenize.OP):
global nesting_level, indents, check_equal
if type == INDENT:
check_equal = 0
thisguy = Whitespace(token)
if not indents[-1].less(thisguy):
witness = indents[-1].not_less_witness(thisguy)
msg = "indent not greater e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
indents.append(thisguy)
elif type == DEDENT:
del indents[-1]
elif type == NEWLINE:
if nesting_level == 0:
check_equal = 1
elif type == COMMENT:
pass
elif check_equal:
check_equal = 0
thisguy = Whitespace(line)
if not indents[-1].equal(thisguy):
witness = indents[-1].not_equal_witness(thisguy)
msg = "indent not equal e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
if type == OP and token in ('{', '[', '('):
nesting_level = nesting_level + 1
elif type == OP and token in ('}', ']', ')'):
if nesting_level == 0:
raise NannyNag(start[0],
"unbalanced bracket '" + token + "'",
line)
nesting_level = nesting_level - 1
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -1,29 +1,16 @@
#! /home/guido/python/src/sparc/python
#! /usr/bin/env python #! /usr/bin/env python
"""The Tab Nanny despises ambiguous indentation. She knows no mercy. """The Tab Nanny despises ambiguous indentation. She knows no mercy."""
CAUTION: this version requires Guido's "NL" patch to lib/tokenize.py, # Released to the public domain, by Tim Peters, 4 April 1998.
posted 30-Mar-98. This version will not run at all with an unpatched
tokenize (it will raise AttributeError while loading), while previous
versions will run incorrectly with the patched tokenize.
"""
# Released to the public domain, by Tim Peters, 30 March 1998. __version__ = "3"
__version__ = "2"
import os import os
import sys import sys
import getopt import getopt
import tokenize import tokenize
try:
tokenize.NL
except AttributeError:
raise AttributeError, "Sorry, I need a version of tokenize.py " \
"that supports the NL pseudo-token."
verbose = 0 verbose = 0
def main(): def main():
@ -235,67 +222,131 @@ def format_witnesses(w):
prefix = prefix + "s" prefix = prefix + "s"
return prefix + " " + string.join(firsts, ', ') return prefix + " " + string.join(firsts, ', ')
indents = [] # The collection of globals, the reset_globals() function, and the
check_equal = 0 # tokeneater() function, depend on which version of tokenize is
# in use.
def reset_globals(): if hasattr(tokenize, 'NL'):
global indents, check_equal # take advantage of Guido's patch!
check_equal = 0
indents = [Whitespace("")]
def tokeneater(type, token, start, end, line, indents = []
INDENT=tokenize.INDENT, check_equal = 0
DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
COMMENT=tokenize.COMMENT,
NL=tokenize.NL):
global indents, check_equal
# test in decreasing order of frequency, although the check_equal def reset_globals():
# test *must* be last; INDENT and DEDENT appear equally often global indents, check_equal
check_equal = 0
indents = [Whitespace("")]
if type in (COMMENT, NL): def tokeneater(type, token, start, end, line,
# the indentation of these guys is meaningless INDENT=tokenize.INDENT,
pass DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
COMMENT=tokenize.COMMENT,
NL=tokenize.NL):
global indents, check_equal
elif type == NEWLINE: # test in decreasing order of frequency, although the check_equal
# a program statement, or ENDMARKER, will eventually follow, # test *must* be last; INDENT and DEDENT appear equally often
# after some (possibly empty) run of tokens of the form
# (NL | COMMENT)* (INDENT | DEDENT+)?
# If an INDENT appears, setting check_equal is wrong, and will
# be undone when we see the INDENT.
check_equal = 1
elif type == INDENT: if type in (COMMENT, NL):
check_equal = 0 # the indentation of these guys is meaningless
thisguy = Whitespace(token) pass
if not indents[-1].less(thisguy):
witness = indents[-1].not_less_witness(thisguy)
msg = "indent not greater e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
indents.append(thisguy)
elif type == DEDENT: elif type == NEWLINE:
# there's nothing we need to check here! what's important is # a program statement, or ENDMARKER, will eventually follow,
# that when the run of DEDENTs ends, the indentation of the # after some (possibly empty) run of tokens of the form
# program statement (or ENDMARKER) that triggered the run is # (NL | COMMENT)* (INDENT | DEDENT+)?
# equal to what's left at the top of the indents stack # If an INDENT appears, setting check_equal is wrong, and will
assert check_equal # else no earlier NEWLINE, or an earlier INDENT # be undone when we see the INDENT.
del indents[-1] check_equal = 1
elif check_equal: elif type == INDENT:
# this is the first "real token" following a NEWLINE, so it check_equal = 0
# must be the first token of the next program statment, or an thisguy = Whitespace(token)
# ENDMARKER; the "line" argument exposes the leading whitespace if not indents[-1].less(thisguy):
# for this statement; in the case of ENDMARKER, line is an empty witness = indents[-1].not_less_witness(thisguy)
# string, so will properly match the empty string with which the msg = "indent not greater e.g. " + format_witnesses(witness)
# "indents" stack was seeded raise NannyNag(start[0], msg, line)
check_equal = 0 indents.append(thisguy)
thisguy = Whitespace(line)
if not indents[-1].equal(thisguy): elif type == DEDENT:
witness = indents[-1].not_equal_witness(thisguy) # there's nothing we need to check here! what's important is
msg = "indent not equal e.g. " + format_witnesses(witness) # that when the run of DEDENTs ends, the indentation of the
raise NannyNag(start[0], msg, line) # program statement (or ENDMARKER) that triggered the run is
# equal to what's left at the top of the indents stack
assert check_equal # else no earlier NEWLINE, or an earlier INDENT
del indents[-1]
elif check_equal:
# this is the first "real token" following a NEWLINE, so it
# must be the first token of the next program statement, or an
# ENDMARKER; the "line" argument exposes the leading whitespace
# for this statement; in the case of ENDMARKER, line is an empty
# string, so will properly match the empty string with which the
# "indents" stack was seeded
check_equal = 0
thisguy = Whitespace(line)
if not indents[-1].equal(thisguy):
witness = indents[-1].not_equal_witness(thisguy)
msg = "indent not equal e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
else:
# unpatched version of tokenize
nesting_level = 0
indents = []
check_equal = 0
def reset_globals():
global nesting_level, indents, check_equal
nesting_level = check_equal = 0
indents = [Whitespace("")]
def tokeneater(type, token, start, end, line,
INDENT=tokenize.INDENT,
DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
COMMENT=tokenize.COMMENT,
OP=tokenize.OP):
global nesting_level, indents, check_equal
if type == INDENT:
check_equal = 0
thisguy = Whitespace(token)
if not indents[-1].less(thisguy):
witness = indents[-1].not_less_witness(thisguy)
msg = "indent not greater e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
indents.append(thisguy)
elif type == DEDENT:
del indents[-1]
elif type == NEWLINE:
if nesting_level == 0:
check_equal = 1
elif type == COMMENT:
pass
elif check_equal:
check_equal = 0
thisguy = Whitespace(line)
if not indents[-1].equal(thisguy):
witness = indents[-1].not_equal_witness(thisguy)
msg = "indent not equal e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
if type == OP and token in ('{', '[', '('):
nesting_level = nesting_level + 1
elif type == OP and token in ('}', ']', ')'):
if nesting_level == 0:
raise NannyNag(start[0],
"unbalanced bracket '" + token + "'",
line)
nesting_level = nesting_level - 1
if __name__ == '__main__': if __name__ == '__main__':
main() main()