diff --git a/Doc/Makefile b/Doc/Makefile index b1b0e851294..4e84b8c6855 100644 --- a/Doc/Makefile +++ b/Doc/Makefile @@ -24,6 +24,7 @@ help: @echo " text to make plain text files" @echo " changes to make an overview over all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" + @echo " suspicious to check for suspicious markup in output text" @echo " coverage to check documentation coverage for library and C API" @echo " dist to create a \"dist\" directory with archived docs for download" @@ -84,6 +85,11 @@ linkcheck: build @echo "Link check complete; look for any errors in the above output " \ "or in build/$(BUILDER)/output.txt" +suspicious: BUILDER = suspicious +suspicious: build + @echo "Suspicious check complete; look for any errors in the above output " \ + "or in build/$(BUILDER)/suspicious.txt" + coverage: BUILDER = coverage coverage: build @echo "Coverage finished; see c.txt and python.txt in build/coverage" diff --git a/Doc/make.bat b/Doc/make.bat index d3379c114e4..a1291bec1e9 100644 --- a/Doc/make.bat +++ b/Doc/make.bat @@ -8,28 +8,35 @@ if "%HTMLHELP%" EQU "" set HTMLHELP=%ProgramFiles%\HTML Help Workshop\hhc.exe if "%1" EQU "" goto help if "%1" EQU "html" goto build if "%1" EQU "htmlhelp" goto build -if "%1" EQU "web" goto build -if "%1" EQU "webrun" goto webrun +if "%1" EQU "latex" goto build +if "%1" EQU "text" goto build +if "%1" EQU "suspicious" goto build +if "%1" EQU "linkcheck" goto build +if "%1" EQU "changes" goto build if "%1" EQU "checkout" goto checkout if "%1" EQU "update" goto update :help +set this=%~n0 echo HELP echo. -echo builddoc checkout -echo builddoc update -echo builddoc html -echo builddoc htmlhelp -echo builddoc web -echo builddoc webrun +echo %this% checkout +echo %this% update +echo %this% html +echo %this% htmlhelp +echo %this% latex +echo %this% text +echo %this% suspicious +echo %this% linkcheck +echo %this% changes echo. goto end :checkout svn co %SVNROOT%/doctools/trunk/sphinx tools/sphinx -svn co %SVNROOT%/external/docutils-0.4/docutils tools/docutils -svn co %SVNROOT%/external/Jinja-1.1/jinja tools/jinja -svn co %SVNROOT%/external/Pygments-0.9/pygments tools/pygments +svn co %SVNROOT%/external/docutils-0.5/docutils tools/docutils +svn co %SVNROOT%/external/Jinja-1.2/jinja tools/jinja +svn co %SVNROOT%/external/Pygments-0.11.1/pygments tools/pygments goto end :update @@ -43,7 +50,7 @@ goto end if not exist build mkdir build if not exist build\%1 mkdir build\%1 if not exist build\doctrees mkdir build\doctrees -cmd /C %PYTHON% tools\sphinx-build.py -b%1 -dbuild\doctrees . build\%1 +cmd /C %PYTHON% tools\sphinx-build.py -b%1 -dbuild\doctrees . build\%* if "%1" EQU "htmlhelp" "%HTMLHELP%" build\htmlhelp\pydoc.hhp goto end diff --git a/Doc/tools/sphinxext/pyspecific.py b/Doc/tools/sphinxext/pyspecific.py index d389243e7f9..c3268e946d1 100644 --- a/Doc/tools/sphinxext/pyspecific.py +++ b/Doc/tools/sphinxext/pyspecific.py @@ -92,6 +92,9 @@ class PydocTopicsBuilder(Builder): finally: f.close() +# Support for checking for suspicious markup + +import suspicious # Support for documenting Opcodes @@ -116,5 +119,6 @@ def parse_opcode_signature(env, sig, signode): def setup(app): app.add_role('issue', issue_role) app.add_builder(PydocTopicsBuilder) + app.add_builder(suspicious.CheckSuspiciousMarkupBuilder) app.add_description_unit('opcode', 'opcode', '%s (opcode)', parse_opcode_signature) diff --git a/Doc/tools/sphinxext/susp-ignored.csv b/Doc/tools/sphinxext/susp-ignored.csv new file mode 100644 index 00000000000..7e1a2890542 --- /dev/null +++ b/Doc/tools/sphinxext/susp-ignored.csv @@ -0,0 +1,164 @@ +c-api/arg,,:ref,"PyArg_ParseTuple(args, ""O|O:ref"", &object, &callback)" +c-api/list,,:high,list[low:high] +c-api/list,,:high,list[low:high] = itemlist +c-api/sequence,,:i2,o[i1:i2] +c-api/sequence,,:i2,o[i1:i2] = v +c-api/sequence,,:i2,del o[i1:i2] +c-api/unicode,,:end,str[start:end] +distutils/apiref,,:action,http://pypi.python.org/pypi?:action=list_classifiers +distutils/setupscript,,::, +extending/embedding,,:numargs,"if(!PyArg_ParseTuple(args, "":numargs""))" +extending/extending,,:set,"if (PyArg_ParseTuple(args, ""O:set_callback"", &temp)) {" +extending/extending,,:myfunction,"PyArg_ParseTuple(args, ""D:myfunction"", &c);" +extending/newtypes,,:call,"if (!PyArg_ParseTuple(args, ""sss:call"", &arg1, &arg2, &arg3)) {" +extending/windows,,:initspam,/export:initspam +howto/cporting,,:add,"if (!PyArg_ParseTuple(args, ""ii:add_ints"", &one, &two))" +howto/cporting,,:encode,"if (!PyArg_ParseTuple(args, ""O:encode_object"", &myobj))" +howto/cporting,,:say,"if (!PyArg_ParseTuple(args, ""U:say_hello"", &name))" +howto/curses,,:black,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:blue,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:cyan,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:green,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:magenta,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:red,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:white,"7:white." +howto/curses,,:yellow,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/regex,,::, +howto/regex,,:foo,(?:foo) +howto/urllib2,,:example,"for example ""joe@password:example.com""" +howto/webservers,,.. image:,.. image:: http.png +library/audioop,,:ipos,"# factor = audioop.findfactor(in_test[ipos*2:ipos*2+len(out_test)]," +library/datetime,,:MM, +library/datetime,,:SS, +library/decimal,,:optional,"trailneg:optional trailing minus indicator" +library/difflib,,:ahi,a[alo:ahi] +library/difflib,,:bhi,b[blo:bhi] +library/difflib,,:i2, +library/difflib,,:j2, +library/difflib,,:i1, +library/dis,,:TOS, +library/dis,,`,TOS = `TOS` +library/doctest,,`,``factorial`` from the ``example`` module: +library/doctest,,`,The ``example`` module +library/doctest,,`,Using ``factorial`` +library/functions,,:step,a[start:stop:step] +library/functions,,:stop,"a[start:stop, i]" +library/functions,,:stop,a[start:stop:step] +library/hotshot,,:lineno,"ncalls tottime percall cumtime percall filename:lineno(function)" +library/httplib,,:port,host:port +library/imaplib,,:MM,"""DD-Mmm-YYYY HH:MM:SS +HHMM""" +library/imaplib,,:SS,"""DD-Mmm-YYYY HH:MM:SS +HHMM""" +library/linecache,,:sys,"sys:x:3:3:sys:/dev:/bin/sh" +library/logging,,:And, +library/logging,,:package1, +library/logging,,:package2, +library/logging,,:root, +library/logging,,:This, +library/logging,,:port,host:port +library/mmap,,:i2,obj[i1:i2] +library/multiprocessing,,:queue,">>> QueueManager.register('get_queue', callable=lambda:queue)" +library/multiprocessing,,`,">>> l._callmethod('__getitem__', (20,)) # equiv to `l[20]`" +library/multiprocessing,,`,">>> l._callmethod('__getslice__', (2, 7)) # equiv to `l[2:7]`" +library/multiprocessing,,`,# `BaseManager`. +library/multiprocessing,,`,# `Pool.imap()` (which will save on the amount of code needed anyway). +library/multiprocessing,,`,# A test file for the `multiprocessing` package +library/multiprocessing,,`,# A test of `multiprocessing.Pool` class +library/multiprocessing,,`,# Add more tasks using `put()` +library/multiprocessing,,`,# create server for a `HostManager` object +library/multiprocessing,,`,# Depends on `multiprocessing` package -- tested with `processing-0.60` +library/multiprocessing,,`,# in the original order then consider using `Pool.map()` or +library/multiprocessing,,`,# Not sure if we should synchronize access to `socket.accept()` method by +library/multiprocessing,,`,# object. (We import `multiprocessing.reduction` to enable this pickling.) +library/multiprocessing,,`,# register the Foo class; make `f()` and `g()` accessible via proxy +library/multiprocessing,,`,# register the Foo class; make `g()` and `_h()` accessible via proxy +library/multiprocessing,,`,# register the generator function baz; use `GeneratorProxy` to make proxies +library/multiprocessing,,`,`Cluster` is a subclass of `SyncManager` so it allows creation of +library/multiprocessing,,`,`hostname` gives the name of the host. If hostname is not +library/multiprocessing,,`,`slots` is used to specify the number of slots for processes on +library/optparse,,:len,"del parser.rargs[:len(value)]" +library/os.path,,:foo,c:foo +library/parser,,`,"""Make a function that raises an argument to the exponent `exp`.""" +library/posix,,`,"CFLAGS=""`getconf LFS_CFLAGS`"" OPT=""-g -O2 $CFLAGS""" +library/profile,,:lineno,ncalls tottime percall cumtime percall filename:lineno(function) +library/profile,,:lineno,filename:lineno(function) +library/pyexpat,,:elem1, +library/pyexpat,,:py,"xmlns:py = ""http://www.python.org/ns/"">" +library/repr,,`,"return `obj`" +library/smtplib,,:port,"as well as a regular host:port server." +library/socket,,::,'5aef:2b::8' +library/sqlite3,,:memory, +library/sqlite3,,:age,"select name_last, age from people where name_last=:who and age=:age" +library/sqlite3,,:who,"select name_last, age from people where name_last=:who and age=:age" +library/ssl,,:My,"Organization Name (eg, company) [Internet Widgits Pty Ltd]:My Organization, Inc." +library/ssl,,:My,"Organizational Unit Name (eg, section) []:My Group" +library/ssl,,:myserver,"Common Name (eg, YOUR name) []:myserver.mygroup.myorganization.com" +library/ssl,,:MyState,State or Province Name (full name) [Some-State]:MyState +library/ssl,,:ops,Email Address []:ops@myserver.mygroup.myorganization.com +library/ssl,,:Some,"Locality Name (eg, city) []:Some City" +library/ssl,,:US,Country Name (2 letter code) [AU]:US +library/stdtypes,,:len,s[len(s):len(s)] +library/stdtypes,,:len,s[len(s):len(s)] +library/string,,:end,s[start:end] +library/string,,:end,s[start:end] +library/subprocess,,`,"output=`mycmd myarg`" +library/subprocess,,`,"output=`dmesg | grep hda`" +library/tarfile,,:compression,filemode[:compression] +library/tarfile,,:gz, +library/tarfile,,:bz2, +library/time,,:mm, +library/time,,:ss, +library/turtle,,::,Example:: +library/urllib,,:port,:port +library/urllib2,,:password,"""joe:password@python.org""" +library/uuid,,:uuid,urn:uuid:12345678-1234-5678-1234-567812345678 +library/xmlrpclib,,:pass,http://user:pass@host:port/path +library/xmlrpclib,,:pass,user:pass +library/xmlrpclib,,:port,http://user:pass@host:port/path +license,,`,THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND +license,,:zooko,mailto:zooko@zooko.com +license,,`,THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +reference/datamodel,,:step,a[i:j:step] +reference/datamodel,,:max, +reference/expressions,,:index,x[index:index] +reference/expressions,,:datum,{key:datum...} +reference/expressions,,`,`expressions...` +reference/grammar,,:output,#diagram:output +reference/grammar,,:rules,#diagram:rules +reference/grammar,,:token,#diagram:token +reference/grammar,,`,'`' testlist1 '`' +reference/lexical_analysis,,:fileencoding,# vim:fileencoding= +reference/lexical_analysis,,`,", : . ` = ;" +tutorial/datastructures,,:value,key:value pairs within the braces adds initial key:value pairs +tutorial/datastructures,,:value,It is also possible to delete a key:value +tutorial/stdlib2,,:start,"fields = struct.unpack(' don't care + self.issue = issue # the markup fragment that triggered this rule + self.line = line # text of the container element (single line only) + + +class CheckSuspiciousMarkupBuilder(Builder): + """ + Checks for possibly invalid markup that may leak into the output + """ + name = 'suspicious' + + def init(self): + # create output file + self.log_file_name = os.path.join(self.outdir, 'suspicious.csv') + open(self.log_file_name, 'w').close() + # load database of previously ignored issues + self.load_rules(os.path.join(os.path.dirname(__file__), 'susp-ignored.csv')) + + def get_outdated_docs(self): + return self.env.found_docs + + def get_target_uri(self, docname, typ=None): + return '' + + def prepare_writing(self, docnames): + ### PYTHON PROJECT SPECIFIC ### + for name in set(docnames): + if name.split('/', 1)[0] == 'documenting': + docnames.remove(name) + ### PYTHON PROJECT SPECIFIC ### + + def write_doc(self, docname, doctree): + self.any_issue = False # set when any issue is encountered in this document + self.docname = docname + visitor = SuspiciousVisitor(doctree, self) + doctree.walk(visitor) + + def finish(self): + return + + def check_issue(self, line, lineno, issue): + if not self.is_ignored(line, lineno, issue): + self.report_issue(line, lineno, issue) + + def is_ignored(self, line, lineno, issue): + """Determine whether this issue should be ignored. + """ + docname = self.docname + for rule in self.rules: + if rule.docname != docname: continue + if rule.issue != issue: continue + # Both lines must match *exactly*. This is rather strict, + # and probably should be improved. + # Doing fuzzy matches with levenshtein distance could work, + # but that means bringing other libraries... + # Ok, relax that requirement: just check if the rule fragment + # is contained in the document line + if rule.line not in line: continue + # Check both line numbers. If they're "near" + # this rule matches. (lineno=None means "don't care") + if (rule.lineno is not None) and \ + abs(rule.lineno - lineno) > 5: continue + # if it came this far, the rule matched + return True + return False + + def report_issue(self, text, lineno, issue): + if not self.any_issue: self.info() + self.any_issue = True + self.write_log_entry(lineno, issue, text) + self.warn('[%s:%d] "%s" found in "%-.120s"' % ( + self.docname.encode(sys.getdefaultencoding(),'replace'), + lineno, + issue.encode(sys.getdefaultencoding(),'replace'), + text.strip().encode(sys.getdefaultencoding(),'replace'))) + self.app.statuscode = 1 + + def write_log_entry(self, lineno, issue, text): + f = open(self.log_file_name, 'ab') + writer = csv.writer(f) + writer.writerow([self.docname.encode('utf-8'), + lineno, + issue.encode('utf-8'), + text.strip().encode('utf-8')]) + del writer + f.close() + + def load_rules(self, filename): + """Load database of previously ignored issues. + + A csv file, with exactly the same format as suspicious.csv + Fields: document name (normalized), line number, issue, surrounding text + """ + self.info("loading ignore rules... ", nonl=1) + self.rules = rules = [] + try: f = open(filename, 'rb') + except IOError: return + for i, row in enumerate(csv.reader(f)): + if len(row) != 4: + raise ValueError, "wrong format in %s, line %d: %s" % (filename, i+1, row) + docname, lineno, issue, text = row + docname = docname.decode('utf-8') + if lineno: lineno = int(lineno) + else: lineno = None + issue = issue.decode('utf-8') + text = text.decode('utf-8') + rule = Rule(docname, lineno, issue, text) + rules.append(rule) + f.close() + self.info('done, %d rules loaded' % len(self.rules)) + + +def get_lineno(node): + "Obtain line number information for a node" + lineno = None + while lineno is None and node: + node = node.parent + lineno = node.line + return lineno + + +def extract_line(text, index): + """text may be a multiline string; extract + only the line containing the given character index. + + >>> extract_line("abc\ndefgh\ni", 6) + >>> 'defgh' + >>> for i in (0, 2, 3, 4, 10): + ... print extract_line("abc\ndefgh\ni", i) + abc + abc + abc + defgh + defgh + i + """ + p = text.rfind('\n', 0, index) + 1 + q = text.find('\n', index) + if q<0: q = len(text) + return text[p:q] + + +class SuspiciousVisitor(nodes.GenericNodeVisitor): + + lastlineno = 0 + + def __init__(self, document, builder): + nodes.GenericNodeVisitor.__init__(self, document) + self.builder = builder + + def default_visit(self, node): + if isinstance(node, (nodes.Text, nodes.image)): # direct text containers + text = node.astext() + # lineno seems to go backwards sometimes (?) + self.lastlineno = lineno = max(get_lineno(node) or 0, self.lastlineno) + seen = set() # don't report the same issue more than only once per line + for match in detect_all(text): + #import pdb; pdb.set_trace() + issue = match.group() + line = extract_line(text, match.start()) + if (issue, line) not in seen: + self.builder.check_issue(line, lineno, issue) + seen.add((issue, line)) + + unknown_visit = default_visit + + def visit_document(self, node): + self.lastlineno = 0 + + def visit_comment(self, node): + # ignore comments -- too much false positives. + # (although doing this could miss some errors; + # there were two sections "commented-out" by mistake + # in the Python docs that would not be catched) + raise nodes.SkipNode