Patch [ 784089 ] A program to scan python files and list those require coding

This commit is contained in:
Georg Brandl 2005-08-24 18:32:30 +00:00
parent 0a5d4a20e7
commit 568973181a
3 changed files with 241 additions and 0 deletions

View File

@ -502,6 +502,11 @@ New platforms
Tools/Demos
-----------
- Added two new files to Tools/scripts: pysource.py, which recursively
finds Python source files, and findnocoding.py, which finds Python
source files that need an encoding declaration.
Patch #784089, credits to Oleg Broytmann.
- Bug #1072853: pindent.py used an uninitialized variable.
- Patch #1177597: Correct Complex.__init__.

106
Tools/scripts/findnocoding.py Executable file
View File

@ -0,0 +1,106 @@
#!/usr/bin/env python
"""List all those Python files that require a coding directive
Usage: nocoding.py dir1 [dir2...]
"""
__author__ = "Oleg Broytmann, Reinhold Birkenfeld"
import sys, os, re, getopt
# our pysource module finds Python source files
try:
import pysource
except:
# emulate the module with a simple os.walk
class pysource:
has_python_ext = looks_like_python = can_be_compiled = None
def walk_python_files(self, paths, *args, **kwargs):
for path in paths:
if os.path.isfile(path):
yield path.endswith(".py")
elif os.path.isdir(path):
for root, dirs, files in os.walk(path):
for filename in files:
if filename.endswith(".py"):
yield os.path.join(root, filename)
pysource = pysource()
print >>sys.stderr, ("The pysource module is not available; "
"no sophisticated Python source file search will be done.")
decl_re = re.compile(r"coding[=:]\s*([-\w.]+)")
def get_declaration(line):
match = decl_re.search(line)
if match:
return match.group(1)
return ''
def has_correct_encoding(text, codec):
try:
unicode(text, codec)
except UnicodeDecodeError:
return False
else:
return True
def needs_declaration(fullpath):
try:
infile = open(fullpath, 'rU')
except IOError: # Oops, the file was removed - ignore it
return None
line1 = infile.readline()
line2 = infile.readline()
if get_declaration(line1) or get_declaration(line2):
# the file does have an encoding declaration, so trust it
infile.close()
return False
# check the whole file for non-ASCII characters
rest = infile.read()
infile.close()
if has_correct_encoding(line1+line2+rest, "ascii"):
return False
return True
usage = """Usage: %s [-cd] paths...
-c: recognize Python source files trying to compile them
-d: debug output""" % sys.argv[0]
try:
opts, args = getopt.getopt(sys.argv[1:], 'cd')
except getopt.error, msg:
print >>sys.stderr, msg
print >>sys.stderr, usage
sys.exit(1)
is_python = pysource.looks_like_python
debug = False
for o, a in opts:
if o == '-c':
is_python = pysource.can_be_compiled
elif o == '-d':
debug = True
if not args:
print >>sys.stderr, usage
sys.exit(1)
for fullpath in pysource.walk_python_files(args, is_python):
if debug:
print "Testing for coding: %s" % fullpath
result = needs_declaration(fullpath)
if result:
print fullpath

130
Tools/scripts/pysource.py Normal file
View File

@ -0,0 +1,130 @@
#!/usr/bin/env python
"""\
List python source files.
There are three functions to check whether a file is a Python source, listed
here with increasing complexity:
- has_python_ext() checks whether a file name ends in '.py[w]'.
- look_like_python() checks whether the file is not binary and either has
the '.py[w]' extension or the first line contains the word 'python'.
- can_be_compiled() checks whether the file can be compiled by compile().
The file also must be of appropriate size - not bigger than a megabyte.
walk_python_files() recursively lists all Python files under the given directories.
"""
__author__ = "Oleg Broytmann, Reinhold Birkenfeld"
__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]
import sys, os, re
binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]')
debug = False
def print_debug(msg):
if debug: print msg
def _open(fullpath):
try:
size = os.stat(fullpath).st_size
except OSError, err: # Permission denied - ignore the file
print_debug("%s: permission denied: %s" % (fullpath, err))
return None
if size > 1024*1024: # too big
print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
return None
try:
return open(fullpath, 'rU')
except IOError, err: # Access denied, or a special file - ignore it
print_debug("%s: access denied: %s" % (fullpath, err))
return None
def has_python_ext(fullpath):
return fullpath.endswith(".py") or fullpath.endswith(".pyw")
def looks_like_python(fullpath):
infile = _open(fullpath)
if infile is None:
return False
line = infile.readline()
infile.close()
if binary_re.search(line):
# file appears to be binary
print_debug("%s: appears to be binary" % fullpath)
return False
if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
return True
elif "python" in line:
# disguised Python script (e.g. CGI)
return True
return False
def can_be_compiled(fullpath):
infile = _open(fullpath)
if infile is None:
return False
code = infile.read()
infile.close()
try:
compile(code, fullpath, "exec")
except Exception, err:
print_debug("%s: cannot compile: %s" % (fullpath, err))
return False
return True
def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
"""\
Recursively yield all Python source files below the given paths.
paths: a list of files and/or directories to be checked.
is_python: a function that takes a file name and checks whether it is a
Python source file
exclude_dirs: a list of directory base names that should be excluded in
the search
"""
if exclude_dirs is None:
exclude_dirs=[]
for path in paths:
print_debug("testing: %s" % path)
if os.path.isfile(path):
if is_python(path):
yield path
elif os.path.isdir(path):
print_debug(" it is a directory")
for dirpath, dirnames, filenames in os.walk(path):
for exclude in exclude_dirs:
if exclude in dirnames:
dirnames.remove(exclude)
for filename in filenames:
fullpath = os.path.join(dirpath, filename)
print_debug("testing: %s" % fullpath)
if is_python(fullpath):
yield fullpath
else:
print_debug(" unknown type")
if __name__ == "__main__":
# Two simple examples/tests
for fullpath in walk_python_files(['.']):
print fullpath
print "----------"
for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
print fullpath