cpython/Tools/scripts/reindent.py

305 lines
10 KiB
Python
Raw Normal View History

#! /usr/bin/env python
# Released to the public domain, by Tim Peters, 03 October 2000.
"""reindent [-d][-r][-v] [ path ... ]
-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
-r (--recurse) Recurse. Search for all .py files in subdirectories too.
-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
-v (--verbose) Verbose. Print informative msgs; else no output.
-h (--help) Help. Print this usage information and exit.
Change Python (.py) files to use 4-space indents and no hard tab characters.
Also trim excess spaces and tabs from ends of lines, and remove empty lines
at the end of files. Also ensure the last line ends with a newline.
If no paths are given on the command line, reindent operates as a filter,
reading a single source file from standard input and writing the transformed
source to standard output. In this case, the -d, -r and -v flags are
ignored.
You can pass one or more file and/or directory paths. When a directory
path, all .py files within the directory will be examined, and, if the -r
option is given, likewise recursively for subdirectories.
If output is not to standard output, reindent overwrites files in place,
renaming the originals with a .bak extension. If it finds nothing to
change, the file is left alone. If reindent does change a file, the changed
file is a fixed-point for future runs (i.e., running reindent on the
resulting .py file won't change it again).
The hard part of reindenting is figuring out what to do with comment
lines. So long as the input files get a clean bill of health from
tabnanny.py, reindent should do a good job.
The backup file is a copy of the one that is being reindented. The ".bak"
file is generated with shutil.copy(), but some corner cases regarding
user/group and permissions could leave the backup file more readable that
you'd prefer. You can always use the --nobackup option to prevent this.
"""
__version__ = "1"
import tokenize
import os, shutil
import sys
verbose = 0
recurse = 0
dryrun = 0
makebackup = True
def usage(msg=None):
if msg is not None:
print >> sys.stderr, msg
print >> sys.stderr, __doc__
def errprint(*args):
sep = ""
for arg in args:
sys.stderr.write(sep + str(arg))
sep = " "
sys.stderr.write("\n")
def main():
import getopt
global verbose, recurse, dryrun, makebackup
try:
opts, args = getopt.getopt(sys.argv[1:], "drnvh",
["dryrun", "recurse", "nobackup", "verbose", "help"])
except getopt.error, msg:
usage(msg)
return
for o, a in opts:
if o in ('-d', '--dryrun'):
dryrun += 1
elif o in ('-r', '--recurse'):
recurse += 1
elif o in ('-n', '--nobackup'):
makebackup = False
elif o in ('-v', '--verbose'):
verbose += 1
elif o in ('-h', '--help'):
usage()
return
if not args:
r = Reindenter(sys.stdin)
r.run()
r.write(sys.stdout)
return
for arg in args:
check(arg)
def check(file):
if os.path.isdir(file) and not os.path.islink(file):
if verbose:
print "listing directory", file
names = os.listdir(file)
for name in names:
fullname = os.path.join(file, name)
if ((recurse and os.path.isdir(fullname) and
Merged revisions 66801,66803-66804,66813,66854-66856,66866,66870-66872,66874,66887,66903,66905,66911,66913,66927,66932,66938,66942,66962,66964,66973-66974,66977,66992,66998-66999,67002,67005,67007,67028,67040-67041,67044,67070,67089,67091,67101,67117-67119,67123-67124 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ................ r66801 | andrew.kuchling | 2008-10-04 23:51:59 +0200 (Sat, 04 Oct 2008) | 1 line Punctuation fix; expand dict.update docstring to be clearer ................ r66803 | benjamin.peterson | 2008-10-05 00:15:31 +0200 (Sun, 05 Oct 2008) | 1 line fix typo ................ r66804 | andrew.kuchling | 2008-10-05 02:11:56 +0200 (Sun, 05 Oct 2008) | 1 line #1415508 from Rocky Bernstein: add docstrings for enable_interspersed_args(), disable_interspersed_args() ................ r66813 | andrew.kuchling | 2008-10-06 14:07:04 +0200 (Mon, 06 Oct 2008) | 3 lines Per Greg Ward, optparse is no longer being externally maintained. I'll look at the bugs in the Optik bug tracker and copy them to the Python bug tracker if they're still relevant. ................ r66854 | georg.brandl | 2008-10-08 19:20:20 +0200 (Wed, 08 Oct 2008) | 2 lines #4059: patch up some sqlite docs. ................ r66855 | georg.brandl | 2008-10-08 19:30:55 +0200 (Wed, 08 Oct 2008) | 2 lines #4058: fix some whatsnew markup. ................ r66856 | georg.brandl | 2008-10-08 20:47:17 +0200 (Wed, 08 Oct 2008) | 3 lines #3935: properly support list subclasses in the C impl. of bisect. Patch reviewed by Raymond. ................ r66866 | benjamin.peterson | 2008-10-09 22:54:43 +0200 (Thu, 09 Oct 2008) | 1 line update paragraph about __future__ for 2.6 ................ r66870 | armin.rigo | 2008-10-10 10:40:44 +0200 (Fri, 10 Oct 2008) | 2 lines Typo: "ThreadError" is the name in the C source. ................ r66871 | benjamin.peterson | 2008-10-10 22:38:49 +0200 (Fri, 10 Oct 2008) | 1 line fix a small typo ................ r66872 | benjamin.peterson | 2008-10-10 22:51:37 +0200 (Fri, 10 Oct 2008) | 1 line talk about how you can unzip with zip ................ r66874 | benjamin.peterson | 2008-10-11 00:23:41 +0200 (Sat, 11 Oct 2008) | 1 line PyGILState_Acquire -> PyGILState_Ensure ................ r66887 | benjamin.peterson | 2008-10-13 23:51:40 +0200 (Mon, 13 Oct 2008) | 1 line document how to disable fixers ................ r66903 | benjamin.peterson | 2008-10-15 22:34:09 +0200 (Wed, 15 Oct 2008) | 1 line don't recurse into directories that start with '.' ................ r66905 | benjamin.peterson | 2008-10-15 23:05:55 +0200 (Wed, 15 Oct 2008) | 1 line support the optional line argument for idle ................ r66911 | benjamin.peterson | 2008-10-16 01:10:28 +0200 (Thu, 16 Oct 2008) | 41 lines Merged revisions 66805,66841,66860,66884-66886,66893,66907,66910 via svnmerge from svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........ r66805 | benjamin.peterson | 2008-10-04 20:11:02 -0500 (Sat, 04 Oct 2008) | 1 line mention what the fixes directory is for ........ r66841 | benjamin.peterson | 2008-10-07 17:48:12 -0500 (Tue, 07 Oct 2008) | 1 line use assertFalse and assertTrue ........ r66860 | benjamin.peterson | 2008-10-08 16:05:07 -0500 (Wed, 08 Oct 2008) | 1 line instead of abusing the pattern matcher, use start_tree to find a next binding ........ r66884 | benjamin.peterson | 2008-10-13 15:50:30 -0500 (Mon, 13 Oct 2008) | 1 line don't print tokens to stdout when -v is given ........ r66885 | benjamin.peterson | 2008-10-13 16:28:57 -0500 (Mon, 13 Oct 2008) | 1 line add the -x option to disable fixers ........ r66886 | benjamin.peterson | 2008-10-13 16:33:53 -0500 (Mon, 13 Oct 2008) | 1 line cut down on some crud ........ r66893 | benjamin.peterson | 2008-10-14 17:16:54 -0500 (Tue, 14 Oct 2008) | 1 line add an optional set literal fixer ........ r66907 | benjamin.peterson | 2008-10-15 16:59:41 -0500 (Wed, 15 Oct 2008) | 1 line don't write backup files by default ........ r66910 | benjamin.peterson | 2008-10-15 17:43:10 -0500 (Wed, 15 Oct 2008) | 1 line add the -n option; it stops backupfiles from being written ........ ................ r66913 | benjamin.peterson | 2008-10-16 20:52:14 +0200 (Thu, 16 Oct 2008) | 1 line document that deque indexing is O(n) #4123 ................ r66927 | andrew.kuchling | 2008-10-16 22:15:47 +0200 (Thu, 16 Oct 2008) | 1 line Fix wording (2.6.1 backport candidate) ................ r66932 | benjamin.peterson | 2008-10-16 23:09:28 +0200 (Thu, 16 Oct 2008) | 1 line check for error conditions in _json #3623 ................ r66938 | benjamin.peterson | 2008-10-16 23:27:54 +0200 (Thu, 16 Oct 2008) | 1 line fix possible ref leak ................ r66942 | benjamin.peterson | 2008-10-16 23:48:06 +0200 (Thu, 16 Oct 2008) | 1 line fix more possible ref leaks in _json and use Py_CLEAR ................ r66962 | benjamin.peterson | 2008-10-17 22:01:01 +0200 (Fri, 17 Oct 2008) | 1 line clarify CALL_FUNCTION #4141 ................ r66964 | georg.brandl | 2008-10-17 23:41:49 +0200 (Fri, 17 Oct 2008) | 2 lines Fix duplicate word. ................ r66973 | armin.ronacher | 2008-10-19 10:27:43 +0200 (Sun, 19 Oct 2008) | 3 lines Fixed #4067 by implementing _attributes and _fields for the AST root node. ................ r66974 | benjamin.peterson | 2008-10-19 15:59:01 +0200 (Sun, 19 Oct 2008) | 1 line fix compiler warning ................ r66977 | benjamin.peterson | 2008-10-19 21:39:16 +0200 (Sun, 19 Oct 2008) | 1 line mention -n ................ r66992 | benjamin.peterson | 2008-10-21 22:51:13 +0200 (Tue, 21 Oct 2008) | 1 line make sure to call iteritems() ................ r66998 | benjamin.peterson | 2008-10-22 22:57:43 +0200 (Wed, 22 Oct 2008) | 1 line fix a few typos ................ r66999 | benjamin.peterson | 2008-10-22 23:05:30 +0200 (Wed, 22 Oct 2008) | 1 line and another typo... ................ r67002 | hirokazu.yamamoto | 2008-10-23 02:37:33 +0200 (Thu, 23 Oct 2008) | 1 line Issue #4183: Some tests didn't run with pickle.HIGHEST_PROTOCOL. ................ r67005 | walter.doerwald | 2008-10-23 15:11:39 +0200 (Thu, 23 Oct 2008) | 2 lines Use the correct names of the stateless codec functions (Fixes issue 4178). ................ r67007 | benjamin.peterson | 2008-10-23 23:43:48 +0200 (Thu, 23 Oct 2008) | 1 line only nonempty __slots__ don't work ................ r67028 | benjamin.peterson | 2008-10-26 01:27:07 +0200 (Sun, 26 Oct 2008) | 1 line don't use a catch-all ................ r67040 | armin.rigo | 2008-10-28 18:01:21 +0100 (Tue, 28 Oct 2008) | 5 lines Fix one of the tests: it relied on being present in an "output test" in order to actually test what it was supposed to test, i.e. that the code in the __del__ method did not crash. Use instead the new helper test_support.captured_output(). ................ r67041 | benjamin.peterson | 2008-10-29 21:33:00 +0100 (Wed, 29 Oct 2008) | 1 line mention the version gettempdir() was added ................ r67044 | amaury.forgeotdarc | 2008-10-30 00:15:57 +0100 (Thu, 30 Oct 2008) | 3 lines Correct error message in io.open(): closefd=True is the only accepted value with a file name. ................ r67070 | benjamin.peterson | 2008-10-31 21:41:44 +0100 (Fri, 31 Oct 2008) | 1 line rephrase has_key doc ................ r67089 | benjamin.peterson | 2008-11-03 21:43:20 +0100 (Mon, 03 Nov 2008) | 1 line clarify by splitting into multiple paragraphs ................ r67091 | benjamin.peterson | 2008-11-03 23:34:57 +0100 (Mon, 03 Nov 2008) | 1 line move a FileIO test to test_fileio ................ r67101 | georg.brandl | 2008-11-04 21:49:35 +0100 (Tue, 04 Nov 2008) | 2 lines #4167: fix markup glitches. ................ r67117 | georg.brandl | 2008-11-06 11:17:58 +0100 (Thu, 06 Nov 2008) | 2 lines #4268: Use correct module for two toplevel functions. ................ r67118 | georg.brandl | 2008-11-06 11:19:11 +0100 (Thu, 06 Nov 2008) | 2 lines #4267: small fixes in sqlite3 docs. ................ r67119 | georg.brandl | 2008-11-06 11:20:49 +0100 (Thu, 06 Nov 2008) | 2 lines #4245: move Thread section to the top. ................ r67123 | georg.brandl | 2008-11-06 19:49:15 +0100 (Thu, 06 Nov 2008) | 2 lines #4247: add "pass" examples to tutorial. ................ r67124 | andrew.kuchling | 2008-11-06 20:23:02 +0100 (Thu, 06 Nov 2008) | 1 line Fix grammar error; reword two paragraphs ................
2008-11-07 04:56:27 -04:00
not os.path.islink(fullname) and
not os.path.split(fullname)[1].startswith("."))
or name.lower().endswith(".py")):
check(fullname)
return
if verbose:
print "checking", file, "...",
try:
f = open(file)
except IOError, msg:
errprint("%s: I/O Error: %s" % (file, str(msg)))
return
r = Reindenter(f)
f.close()
if r.run():
if verbose:
print "changed."
if dryrun:
print "But this is a dry run, so leaving it alone."
if not dryrun:
bak = file + ".bak"
if makebackup:
shutil.copyfile(file, bak)
if verbose:
print "backed up", file, "to", bak
f = open(file, "w")
r.write(f)
f.close()
if verbose:
print "wrote new", file
return True
else:
if verbose:
print "unchanged."
return False
def _rstrip(line, JUNK='\n \t'):
"""Return line stripped of trailing spaces, tabs, newlines.
Note that line.rstrip() instead also strips sundry control characters,
but at least one known Emacs user expects to keep junk like that, not
mentioning Barry by name or anything <wink>.
"""
i = len(line)
while i > 0 and line[i-1] in JUNK:
i -= 1
return line[:i]
class Reindenter:
def __init__(self, f):
self.find_stmt = 1 # next token begins a fresh stmt?
self.level = 0 # current indent level
# Raw file lines.
self.raw = f.readlines()
# File lines, rstripped & tab-expanded. Dummy at start is so
# that we can use tokenize's 1-based line numbering easily.
# Note that a line is all-blank iff it's "\n".
self.lines = [_rstrip(line).expandtabs() + "\n"
for line in self.raw]
self.lines.insert(0, None)
self.index = 1 # index into self.lines of next line
# List of (lineno, indentlevel) pairs, one for each stmt and
# comment line. indentlevel is -1 for comment lines, as a
# signal that tokenize doesn't know what to do about them;
# indeed, they're our headache!
self.stats = []
def run(self):
tokenize.tokenize(self.getline, self.tokeneater)
# Remove trailing empty lines.
lines = self.lines
while lines and lines[-1] == "\n":
lines.pop()
# Sentinel.
stats = self.stats
stats.append((len(lines), 0))
# Map count of leading spaces to # we want.
have2want = {}
# Program after transformation.
after = self.after = []
# Copy over initial empty lines -- there's nothing to do until
# we see a line with *something* on it.
i = stats[0][0]
after.extend(lines[1:i])
for i in range(len(stats)-1):
thisstmt, thislevel = stats[i]
nextstmt = stats[i+1][0]
have = getlspace(lines[thisstmt])
want = thislevel * 4
if want < 0:
# A comment line.
if have:
# An indented comment line. If we saw the same
# indentation before, reuse what it most recently
# mapped to.
want = have2want.get(have, -1)
if want < 0:
# Then it probably belongs to the next real stmt.
for j in xrange(i+1, len(stats)-1):
jline, jlevel = stats[j]
if jlevel >= 0:
if have == getlspace(lines[jline]):
want = jlevel * 4
break
if want < 0: # Maybe it's a hanging
# comment like this one,
# in which case we should shift it like its base
# line got shifted.
for j in xrange(i-1, -1, -1):
jline, jlevel = stats[j]
if jlevel >= 0:
want = have + getlspace(after[jline-1]) - \
getlspace(lines[jline])
break
if want < 0:
# Still no luck -- leave it alone.
want = have
else:
want = 0
assert want >= 0
have2want[have] = want
diff = want - have
if diff == 0 or have == 0:
after.extend(lines[thisstmt:nextstmt])
else:
for line in lines[thisstmt:nextstmt]:
if diff > 0:
if line == "\n":
after.append(line)
else:
after.append(" " * diff + line)
else:
remove = min(getlspace(line), -diff)
after.append(line[remove:])
return self.raw != self.after
def write(self, f):
f.writelines(self.after)
# Line-getter for tokenize.
def getline(self):
if self.index >= len(self.lines):
line = ""
else:
line = self.lines[self.index]
self.index += 1
return line
# Line-eater for tokenize.
def tokeneater(self, type, token, (sline, scol), end, line,
INDENT=tokenize.INDENT,
DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
COMMENT=tokenize.COMMENT,
NL=tokenize.NL):
if type == NEWLINE:
# A program statement, or ENDMARKER, will eventually follow,
# after some (possibly empty) run of tokens of the form
# (NL | COMMENT)* (INDENT | DEDENT+)?
self.find_stmt = 1
elif type == INDENT:
self.find_stmt = 1
self.level += 1
elif type == DEDENT:
self.find_stmt = 1
self.level -= 1
elif type == COMMENT:
if self.find_stmt:
self.stats.append((sline, -1))
# but we're still looking for a new stmt, so leave
# find_stmt alone
elif type == NL:
pass
elif self.find_stmt:
# This is the first "real token" following a NEWLINE, so it
# must be the first token of the next program statement, or an
# ENDMARKER.
self.find_stmt = 0
if line: # not endmarker
self.stats.append((sline, self.level))
# Count number of leading blanks.
def getlspace(line):
i, n = 0, len(line)
while i < n and line[i] == " ":
i += 1
return i
if __name__ == '__main__':
main()