From 7c748469c7688092152ef9bc6b893ffcdfb2c328 Mon Sep 17 00:00:00 2001 From: Edward Loper Date: Mon, 9 Aug 2004 02:06:06 +0000 Subject: [PATCH] Rewrote Parser, using regular expressions instead of walking though the string one line at a time. The resulting code is (in my opinion, anyway), much easier to read. In the process, I found and fixed a bug in the orginal parser's line numbering in error messages (it was inconsistant between 0-based and 1-based). Also, check for missing blank lines after the prompt on all prompt lines, not just PS1 lines (test added). --- Lib/doctest.py | 435 +++++++++++++++++++-------------------- Lib/test/test_doctest.py | 13 +- 2 files changed, 227 insertions(+), 221 deletions(-) diff --git a/Lib/doctest.py b/Lib/doctest.py index b09c40d1545..4c651ed8799 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -354,15 +354,16 @@ ELLIPSIS_MARKER = '...' ###################################################################### ## Table of Contents ###################################################################### -# 1. Utility Functions -# 2. Example & DocTest -- store test cases -# 3. DocTest Finder -- extracts test cases from objects -# 4. DocTest Runner -- runs test cases -# 5. Test Functions -- convenient wrappers for testing -# 6. Tester Class -- for backwards compatibility -# 7. Unittest Support -# 8. Debugging Support -# 9. Example Usage +# 1. Utility Functions +# 2. Example & DocTest -- store test cases +# 3. DocTest Parser -- extracts examples from strings +# 4. DocTest Finder -- extracts test cases from objects +# 5. DocTest Runner -- runs test cases +# 6. Test Functions -- convenient wrappers for testing +# 7. Tester Class -- for backwards compatibility +# 8. Unittest Support +# 9. Debugging Support +# 10. Example Usage ###################################################################### ## 1. Utility Functions @@ -475,209 +476,6 @@ class _SpoofOut(StringIO): if hasattr(self, "softspace"): del self.softspace -class Parser: - """ - Extract doctests from a string. - """ - - _PS1 = ">>>" - _PS2 = "..." - _isPS1 = re.compile(r"(\s*)" + re.escape(_PS1)).match - _isPS2 = re.compile(r"(\s*)" + re.escape(_PS2)).match - _isEmpty = re.compile(r"\s*$").match - _isComment = re.compile(r"\s*#").match - - def __init__(self, name, string): - """ - Prepare to extract doctests from string `string`. - - `name` is an arbitrary (string) name associated with the string, - and is used only in error messages. - """ - self.name = name - self.source = string - - def get_examples(self): - """ - Return the doctest examples from the string. - - This is a list of (source, want, lineno) triples, one per example - in the string. "source" is a single Python statement; it ends - with a newline iff the statement contains more than one - physical line. "want" is the expected output from running the - example (either from stdout, or a traceback in case of exception). - "want" always ends with a newline, unless no output is expected, - in which case "want" is an empty string. "lineno" is the 0-based - line number of the first line of "source" within the string. It's - 0-based because it's most common in doctests that nothing - interesting appears on the same line as opening triple-quote, - and so the first interesting line is called "line 1" then. - - >>> text = ''' - ... >>> x, y = 2, 3 # no output expected - ... >>> if 1: - ... ... print x - ... ... print y - ... 2 - ... 3 - ... - ... Some text. - ... >>> x+y - ... 5 - ... ''' - >>> for x in Parser('', text).get_examples(): - ... print x - ('x, y = 2, 3 # no output expected', '', 1) - ('if 1:\\n print x\\n print y\\n', '2\\n3\\n', 2) - ('x+y', '5\\n', 9) - """ - return self._parse(kind='examples') - - def get_program(self): - """ - Return an executable program from the string, as a string. - - The format of this isn't rigidly defined. In general, doctest - examples become the executable statements in the result, and - their expected outputs become comments, preceded by an "#Expected:" - comment. Everything else (text, comments, everything not part of - a doctest test) is also placed in comments. - - >>> text = ''' - ... >>> x, y = 2, 3 # no output expected - ... >>> if 1: - ... ... print x - ... ... print y - ... 2 - ... 3 - ... - ... Some text. - ... >>> x+y - ... 5 - ... ''' - >>> print Parser('', text).get_program() - x, y = 2, 3 # no output expected - if 1: - print x - print y - # Expected: - # 2 - # 3 - # - # Some text. - x+y - # Expected: - # 5 - """ - return self._parse(kind='program') - - def _parse(self, kind): - assert kind in ('examples', 'program') - do_program = kind == 'program' - output = [] - push = output.append - - string = self.source - if not string.endswith('\n'): - string += '\n' - - isPS1, isPS2 = self._isPS1, self._isPS2 - isEmpty, isComment = self._isEmpty, self._isComment - lines = string.split("\n") - i, n = 0, len(lines) - while i < n: - # Search for an example (a PS1 line). - line = lines[i] - i += 1 - m = isPS1(line) - if m is None: - if do_program: - line = line.rstrip() - if line: - line = ' ' + line - push('#' + line) - continue - # line is a PS1 line. - j = m.end(0) # beyond the prompt - if isEmpty(line, j) or isComment(line, j): - # a bare prompt or comment -- not interesting - if do_program: - push("# " + line[j:]) - continue - # line is a non-trivial PS1 line. - lineno = i - 1 - if line[j] != " ": - raise ValueError('line %r of the docstring for %s lacks ' - 'blank after %s: %r' % - (lineno, self.name, self._PS1, line)) - - j += 1 - blanks = m.group(1) - nblanks = len(blanks) - # suck up this and following PS2 lines - source = [] - while 1: - source.append(line[j:]) - line = lines[i] - m = isPS2(line) - if m: - if m.group(1) != blanks: - raise ValueError('line %r of the docstring for %s ' - 'has inconsistent leading whitespace: %r' % - (i, self.name, line)) - i += 1 - else: - break - - if do_program: - output.extend(source) - else: - # get rid of useless null line from trailing empty "..." - if source[-1] == "": - assert len(source) > 1 - del source[-1] - if len(source) == 1: - source = source[0] - else: - source = "\n".join(source) + "\n" - - # suck up response - if isPS1(line) or isEmpty(line): - if not do_program: - push((source, "", lineno)) - continue - - # There is a response. - want = [] - if do_program: - push("# Expected:") - while 1: - if line[:nblanks] != blanks: - raise ValueError('line %r of the docstring for %s ' - 'has inconsistent leading whitespace: %r' % - (i, self.name, line)) - want.append(line[nblanks:]) - i += 1 - line = lines[i] - if isPS1(line) or isEmpty(line): - break - - if do_program: - output.extend(['# ' + x for x in want]) - else: - want = "\n".join(want) + "\n" - push((source, want, lineno)) - - if do_program: - # Trim junk on both ends. - while output and output[-1] == '#': - output.pop() - while output and output[0] == '#': - output.pop(0) - output = '\n'.join(output) - - return output - ###################################################################### ## 2. Example & DocTest ###################################################################### @@ -774,7 +572,206 @@ class DocTest: (other.name, other.filename, other.lineno, id(other))) ###################################################################### -## 3. DocTest Finder +## 2. Example Parser +###################################################################### + +class Parser: + """ + Extract doctests from a string. + """ + def __init__(self, name, string): + """ + Prepare to extract doctests from string `string`. + + `name` is an arbitrary (string) name associated with the string, + and is used only in error messages. + """ + self.name = name + self.string = string.expandtabs() + + _EXAMPLE_RE = re.compile(r''' + # Source consists of a PS1 line followed by zero or more PS2 lines. + (?P + (?:^(?P [ ]*) >>> .*) # PS1 line + (?:\n [ ]* \.\.\. .*)*) # PS2 lines + \n? + # Want consists of any non-blank lines that do not start with PS1. + (?P (?:(?![ ]*$) # Not a blank line + (?![ ]*>>>) # Not a line starting with PS1 + .*$\n? # But any other line + )*) + ''', re.MULTILINE | re.VERBOSE) + _IS_BLANK_OR_COMMENT = re.compile('^[ ]*(#.*)?$') + + def get_examples(self): + """ + Return the doctest examples from the string. + + This is a list of (source, want, lineno) triples, one per example + in the string. "source" is a single Python statement; it ends + with a newline iff the statement contains more than one + physical line. "want" is the expected output from running the + example (either from stdout, or a traceback in case of exception). + "want" always ends with a newline, unless no output is expected, + in which case "want" is an empty string. "lineno" is the 0-based + line number of the first line of "source" within the string. It's + 0-based because it's most common in doctests that nothing + interesting appears on the same line as opening triple-quote, + and so the first interesting line is called "line 1" then. + + >>> text = ''' + ... >>> x, y = 2, 3 # no output expected + ... >>> if 1: + ... ... print x + ... ... print y + ... 2 + ... 3 + ... + ... Some text. + ... >>> x+y + ... 5 + ... ''' + >>> for x in Parser('', text).get_examples(): + ... print x + ('x, y = 2, 3 # no output expected', '', 1) + ('if 1:\\n print x\\n print y\\n', '2\\n3\\n', 2) + ('x+y', '5\\n', 9) + """ + examples = [] + charno, lineno = 0, 0 + # Find all doctest examples in the string: + for m in self._EXAMPLE_RE.finditer(self.string): + # Update lineno (lines before this example) + lineno += self.string.count('\n', charno, m.start()) + + # Extract source/want from the regexp match. + (source, want) = self._parse_example(m, lineno) + if self._IS_BLANK_OR_COMMENT.match(source): + continue + examples.append( (source, want, lineno) ) + + # Update lineno (lines inside this example) + lineno += self.string.count('\n', m.start(), m.end()) + # Update charno. + charno = m.end() + return examples + + def get_program(self): + """ + Return an executable program from the string, as a string. + + The format of this isn't rigidly defined. In general, doctest + examples become the executable statements in the result, and + their expected outputs become comments, preceded by an \"#Expected:\" + comment. Everything else (text, comments, everything not part of + a doctest test) is also placed in comments. + + >>> text = ''' + ... >>> x, y = 2, 3 # no output expected + ... >>> if 1: + ... ... print x + ... ... print y + ... 2 + ... 3 + ... + ... Some text. + ... >>> x+y + ... 5 + ... ''' + >>> print Parser('', text).get_program() + x, y = 2, 3 # no output expected + if 1: + print x + print y + # Expected: + # 2 + # 3 + # + # Some text. + x+y + # Expected: + # 5 + """ + output = [] + charnum, lineno = 0, 0 + # Find all doctest examples in the string: + for m in self._EXAMPLE_RE.finditer(self.string): + # Add any text before this example, as a comment. + if m.start() > charnum: + lines = self.string[charnum:m.start()-1].split('\n') + output.extend([self._comment_line(l) for l in lines]) + lineno += len(lines) + + # Extract source/want from the regexp match. + (source, want) = self._parse_example(m, lineno, False) + # Display the source + output.append(source) + # Display the expected output, if any + if want: + output.append('# Expected:') + output.extend(['# '+l for l in want.split('\n')]) + + # Update the line number & char number. + lineno += self.string.count('\n', m.start(), m.end()) + charnum = m.end() + # Add any remaining text, as comments. + output.extend([self._comment_line(l) + for l in self.string[charnum:].split('\n')]) + # Trim junk on both ends. + while output and output[-1] == '#': + output.pop() + while output and output[0] == '#': + output.pop(0) + # Combine the output, and return it. + return '\n'.join(output) + + def _parse_example(self, m, lineno, add_newlines=True): + # Get the example's indentation level. + indent = len(m.group('indent')) + + # Divide source into lines; check that they're properly + # indented; and then strip their indentation & prompts. + source_lines = m.group('source').split('\n') + self._check_prompt_blank(source_lines, indent, lineno) + self._check_prefix(source_lines[1:], ' '*indent+'.', lineno) + source = '\n'.join([sl[indent+4:] for sl in source_lines]) + if len(source_lines) > 1 and add_newlines: + source += '\n' + + # Divide want into lines; check that it's properly + # indented; and then strip the indentation. + want_lines = m.group('want').rstrip().split('\n') + self._check_prefix(want_lines, ' '*indent, + lineno+len(source_lines)) + want = '\n'.join([wl[indent:] for wl in want_lines]) + if len(want) > 0 and add_newlines: + want += '\n' + + return source, want + + def _comment_line(self, line): + line = line.rstrip() + if line: return '# '+line + else: return '#' + + def _check_prompt_blank(self, lines, indent, lineno): + for i, line in enumerate(lines): + if len(line) >= indent+4 and line[indent+3] != ' ': + raise ValueError('line %r of the docstring for %s ' + 'lacks blank after %s: %r' % + (lineno+i+1, self.name, + line[indent:indent+3], line)) + + def _check_prefix(self, lines, prefix, lineno): + for i, line in enumerate(lines): + if line and not line.startswith(prefix): + raise ValueError('line %r of the docstring for %s has ' + 'inconsistent leading whitespace: %r' % + (lineno+i+1, self.name, line)) + + +###################################################################### +## 4. DocTest Finder ###################################################################### class DocTestFinder: @@ -1062,7 +1059,7 @@ class DocTestFinder: return None ###################################################################### -## 4. DocTest Runner +## 5. DocTest Runner ###################################################################### # [XX] Should overridable methods (eg DocTestRunner.check_output) be @@ -1698,7 +1695,7 @@ class DebugRunner(DocTestRunner): raise DocTestFailure(test, example, got) ###################################################################### -## 5. Test Functions +## 6. Test Functions ###################################################################### # These should be backwards compatible. @@ -1860,7 +1857,7 @@ def run_docstring_examples(f, globs, verbose=False, name="NoName", runner.run(test, compileflags=compileflags) ###################################################################### -## 6. Tester +## 7. Tester ###################################################################### # This is provided only for backwards compatibility. It's not # actually used in any way. @@ -1935,7 +1932,7 @@ class Tester: d[name] = f, t ###################################################################### -## 7. Unittest Support +## 8. Unittest Support ###################################################################### class DocTestCase(unittest.TestCase): @@ -2180,7 +2177,7 @@ def DocFileSuite(*paths, **kw): return suite ###################################################################### -## 8. Debugging Support +## 9. Debugging Support ###################################################################### def script_from_examples(s): @@ -2315,7 +2312,7 @@ def debug(module, name, pm=False): debug_script(testsrc, pm, module.__dict__) ###################################################################### -## 9. Example Usage +## 10. Example Usage ###################################################################### class _TestClass: """ diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index aae93c46c91..d9d0674d889 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -209,7 +209,7 @@ expected output of an example, then `DocTest` will raise a ValueError: ... ''' >>> doctest.DocTest(docstring, globs, 'some_test', 'filename', 0) Traceback (most recent call last): - ValueError: line 3 of the docstring for some_test has inconsistent leading whitespace: ' indentation' + ValueError: line 4 of the docstring for some_test has inconsistent leading whitespace: ' indentation' If the docstring contains inconsistent leading whitespace on continuation lines, then `DocTest` will raise a ValueError: @@ -229,7 +229,16 @@ will raise a ValueError: >>> docstring = '>>>print 1\n1' >>> doctest.DocTest(docstring, globs, 'some_test', 'filename', 0) Traceback (most recent call last): - ValueError: line 0 of the docstring for some_test lacks blank after >>>: '>>>print 1' + ValueError: line 1 of the docstring for some_test lacks blank after >>>: '>>>print 1' + +If there's no blank space after a PS2 prompt ('...'), then `DocTest` +will raise a ValueError: + + >>> docstring = '>>> if 1:\n...print 1\n1' + >>> doctest.DocTest(docstring, globs, 'some_test', 'filename', 0) + Traceback (most recent call last): + ValueError: line 2 of the docstring for some_test lacks blank after ...: '...print 1' + """ # [XX] test that it's getting line numbers right.