Rewrote Parser, using regular expressions instead of walking though

the string one line at a time.  The resulting code is (in my opinion,
anyway), much easier to read.  In the process, I found and fixed a
bug in the orginal parser's line numbering in error messages (it was
inconsistant between 0-based and 1-based).  Also, check for missing
blank lines after the prompt on all prompt lines, not just PS1 lines
(test added).
This commit is contained in:
Edward Loper 2004-08-09 02:06:06 +00:00
parent 103d26e851
commit 7c748469c7
2 changed files with 227 additions and 221 deletions

View File

@ -354,15 +354,16 @@ ELLIPSIS_MARKER = '...'
######################################################################
## Table of Contents
######################################################################
# 1. Utility Functions
# 2. Example & DocTest -- store test cases
# 3. DocTest Finder -- extracts test cases from objects
# 4. DocTest Runner -- runs test cases
# 5. Test Functions -- convenient wrappers for testing
# 6. Tester Class -- for backwards compatibility
# 7. Unittest Support
# 8. Debugging Support
# 9. Example Usage
# 1. Utility Functions
# 2. Example & DocTest -- store test cases
# 3. DocTest Parser -- extracts examples from strings
# 4. DocTest Finder -- extracts test cases from objects
# 5. DocTest Runner -- runs test cases
# 6. Test Functions -- convenient wrappers for testing
# 7. Tester Class -- for backwards compatibility
# 8. Unittest Support
# 9. Debugging Support
# 10. Example Usage
######################################################################
## 1. Utility Functions
@ -475,209 +476,6 @@ class _SpoofOut(StringIO):
if hasattr(self, "softspace"):
del self.softspace
class Parser:
"""
Extract doctests from a string.
"""
_PS1 = ">>>"
_PS2 = "..."
_isPS1 = re.compile(r"(\s*)" + re.escape(_PS1)).match
_isPS2 = re.compile(r"(\s*)" + re.escape(_PS2)).match
_isEmpty = re.compile(r"\s*$").match
_isComment = re.compile(r"\s*#").match
def __init__(self, name, string):
"""
Prepare to extract doctests from string `string`.
`name` is an arbitrary (string) name associated with the string,
and is used only in error messages.
"""
self.name = name
self.source = string
def get_examples(self):
"""
Return the doctest examples from the string.
This is a list of (source, want, lineno) triples, one per example
in the string. "source" is a single Python statement; it ends
with a newline iff the statement contains more than one
physical line. "want" is the expected output from running the
example (either from stdout, or a traceback in case of exception).
"want" always ends with a newline, unless no output is expected,
in which case "want" is an empty string. "lineno" is the 0-based
line number of the first line of "source" within the string. It's
0-based because it's most common in doctests that nothing
interesting appears on the same line as opening triple-quote,
and so the first interesting line is called "line 1" then.
>>> text = '''
... >>> x, y = 2, 3 # no output expected
... >>> if 1:
... ... print x
... ... print y
... 2
... 3
...
... Some text.
... >>> x+y
... 5
... '''
>>> for x in Parser('<string>', text).get_examples():
... print x
('x, y = 2, 3 # no output expected', '', 1)
('if 1:\\n print x\\n print y\\n', '2\\n3\\n', 2)
('x+y', '5\\n', 9)
"""
return self._parse(kind='examples')
def get_program(self):
"""
Return an executable program from the string, as a string.
The format of this isn't rigidly defined. In general, doctest
examples become the executable statements in the result, and
their expected outputs become comments, preceded by an "#Expected:"
comment. Everything else (text, comments, everything not part of
a doctest test) is also placed in comments.
>>> text = '''
... >>> x, y = 2, 3 # no output expected
... >>> if 1:
... ... print x
... ... print y
... 2
... 3
...
... Some text.
... >>> x+y
... 5
... '''
>>> print Parser('<string>', text).get_program()
x, y = 2, 3 # no output expected
if 1:
print x
print y
# Expected:
# 2
# 3
#
# Some text.
x+y
# Expected:
# 5
"""
return self._parse(kind='program')
def _parse(self, kind):
assert kind in ('examples', 'program')
do_program = kind == 'program'
output = []
push = output.append
string = self.source
if not string.endswith('\n'):
string += '\n'
isPS1, isPS2 = self._isPS1, self._isPS2
isEmpty, isComment = self._isEmpty, self._isComment
lines = string.split("\n")
i, n = 0, len(lines)
while i < n:
# Search for an example (a PS1 line).
line = lines[i]
i += 1
m = isPS1(line)
if m is None:
if do_program:
line = line.rstrip()
if line:
line = ' ' + line
push('#' + line)
continue
# line is a PS1 line.
j = m.end(0) # beyond the prompt
if isEmpty(line, j) or isComment(line, j):
# a bare prompt or comment -- not interesting
if do_program:
push("# " + line[j:])
continue
# line is a non-trivial PS1 line.
lineno = i - 1
if line[j] != " ":
raise ValueError('line %r of the docstring for %s lacks '
'blank after %s: %r' %
(lineno, self.name, self._PS1, line))
j += 1
blanks = m.group(1)
nblanks = len(blanks)
# suck up this and following PS2 lines
source = []
while 1:
source.append(line[j:])
line = lines[i]
m = isPS2(line)
if m:
if m.group(1) != blanks:
raise ValueError('line %r of the docstring for %s '
'has inconsistent leading whitespace: %r' %
(i, self.name, line))
i += 1
else:
break
if do_program:
output.extend(source)
else:
# get rid of useless null line from trailing empty "..."
if source[-1] == "":
assert len(source) > 1
del source[-1]
if len(source) == 1:
source = source[0]
else:
source = "\n".join(source) + "\n"
# suck up response
if isPS1(line) or isEmpty(line):
if not do_program:
push((source, "", lineno))
continue
# There is a response.
want = []
if do_program:
push("# Expected:")
while 1:
if line[:nblanks] != blanks:
raise ValueError('line %r of the docstring for %s '
'has inconsistent leading whitespace: %r' %
(i, self.name, line))
want.append(line[nblanks:])
i += 1
line = lines[i]
if isPS1(line) or isEmpty(line):
break
if do_program:
output.extend(['# ' + x for x in want])
else:
want = "\n".join(want) + "\n"
push((source, want, lineno))
if do_program:
# Trim junk on both ends.
while output and output[-1] == '#':
output.pop()
while output and output[0] == '#':
output.pop(0)
output = '\n'.join(output)
return output
######################################################################
## 2. Example & DocTest
######################################################################
@ -774,7 +572,206 @@ class DocTest:
(other.name, other.filename, other.lineno, id(other)))
######################################################################
## 3. DocTest Finder
## 2. Example Parser
######################################################################
class Parser:
"""
Extract doctests from a string.
"""
def __init__(self, name, string):
"""
Prepare to extract doctests from string `string`.
`name` is an arbitrary (string) name associated with the string,
and is used only in error messages.
"""
self.name = name
self.string = string.expandtabs()
_EXAMPLE_RE = re.compile(r'''
# Source consists of a PS1 line followed by zero or more PS2 lines.
(?P<source>
(?:^(?P<indent> [ ]*) >>> .*) # PS1 line
(?:\n [ ]* \.\.\. .*)*) # PS2 lines
\n?
# Want consists of any non-blank lines that do not start with PS1.
(?P<want> (?:(?![ ]*$) # Not a blank line
(?![ ]*>>>) # Not a line starting with PS1
.*$\n? # But any other line
)*)
''', re.MULTILINE | re.VERBOSE)
_IS_BLANK_OR_COMMENT = re.compile('^[ ]*(#.*)?$')
def get_examples(self):
"""
Return the doctest examples from the string.
This is a list of (source, want, lineno) triples, one per example
in the string. "source" is a single Python statement; it ends
with a newline iff the statement contains more than one
physical line. "want" is the expected output from running the
example (either from stdout, or a traceback in case of exception).
"want" always ends with a newline, unless no output is expected,
in which case "want" is an empty string. "lineno" is the 0-based
line number of the first line of "source" within the string. It's
0-based because it's most common in doctests that nothing
interesting appears on the same line as opening triple-quote,
and so the first interesting line is called "line 1" then.
>>> text = '''
... >>> x, y = 2, 3 # no output expected
... >>> if 1:
... ... print x
... ... print y
... 2
... 3
...
... Some text.
... >>> x+y
... 5
... '''
>>> for x in Parser('<string>', text).get_examples():
... print x
('x, y = 2, 3 # no output expected', '', 1)
('if 1:\\n print x\\n print y\\n', '2\\n3\\n', 2)
('x+y', '5\\n', 9)
"""
examples = []
charno, lineno = 0, 0
# Find all doctest examples in the string:
for m in self._EXAMPLE_RE.finditer(self.string):
# Update lineno (lines before this example)
lineno += self.string.count('\n', charno, m.start())
# Extract source/want from the regexp match.
(source, want) = self._parse_example(m, lineno)
if self._IS_BLANK_OR_COMMENT.match(source):
continue
examples.append( (source, want, lineno) )
# Update lineno (lines inside this example)
lineno += self.string.count('\n', m.start(), m.end())
# Update charno.
charno = m.end()
return examples
def get_program(self):
"""
Return an executable program from the string, as a string.
The format of this isn't rigidly defined. In general, doctest
examples become the executable statements in the result, and
their expected outputs become comments, preceded by an \"#Expected:\"
comment. Everything else (text, comments, everything not part of
a doctest test) is also placed in comments.
>>> text = '''
... >>> x, y = 2, 3 # no output expected
... >>> if 1:
... ... print x
... ... print y
... 2
... 3
...
... Some text.
... >>> x+y
... 5
... '''
>>> print Parser('<string>', text).get_program()
x, y = 2, 3 # no output expected
if 1:
print x
print y
# Expected:
# 2
# 3
#
# Some text.
x+y
# Expected:
# 5
"""
output = []
charnum, lineno = 0, 0
# Find all doctest examples in the string:
for m in self._EXAMPLE_RE.finditer(self.string):
# Add any text before this example, as a comment.
if m.start() > charnum:
lines = self.string[charnum:m.start()-1].split('\n')
output.extend([self._comment_line(l) for l in lines])
lineno += len(lines)
# Extract source/want from the regexp match.
(source, want) = self._parse_example(m, lineno, False)
# Display the source
output.append(source)
# Display the expected output, if any
if want:
output.append('# Expected:')
output.extend(['# '+l for l in want.split('\n')])
# Update the line number & char number.
lineno += self.string.count('\n', m.start(), m.end())
charnum = m.end()
# Add any remaining text, as comments.
output.extend([self._comment_line(l)
for l in self.string[charnum:].split('\n')])
# Trim junk on both ends.
while output and output[-1] == '#':
output.pop()
while output and output[0] == '#':
output.pop(0)
# Combine the output, and return it.
return '\n'.join(output)
def _parse_example(self, m, lineno, add_newlines=True):
# Get the example's indentation level.
indent = len(m.group('indent'))
# Divide source into lines; check that they're properly
# indented; and then strip their indentation & prompts.
source_lines = m.group('source').split('\n')
self._check_prompt_blank(source_lines, indent, lineno)
self._check_prefix(source_lines[1:], ' '*indent+'.', lineno)
source = '\n'.join([sl[indent+4:] for sl in source_lines])
if len(source_lines) > 1 and add_newlines:
source += '\n'
# Divide want into lines; check that it's properly
# indented; and then strip the indentation.
want_lines = m.group('want').rstrip().split('\n')
self._check_prefix(want_lines, ' '*indent,
lineno+len(source_lines))
want = '\n'.join([wl[indent:] for wl in want_lines])
if len(want) > 0 and add_newlines:
want += '\n'
return source, want
def _comment_line(self, line):
line = line.rstrip()
if line: return '# '+line
else: return '#'
def _check_prompt_blank(self, lines, indent, lineno):
for i, line in enumerate(lines):
if len(line) >= indent+4 and line[indent+3] != ' ':
raise ValueError('line %r of the docstring for %s '
'lacks blank after %s: %r' %
(lineno+i+1, self.name,
line[indent:indent+3], line))
def _check_prefix(self, lines, prefix, lineno):
for i, line in enumerate(lines):
if line and not line.startswith(prefix):
raise ValueError('line %r of the docstring for %s has '
'inconsistent leading whitespace: %r' %
(lineno+i+1, self.name, line))
######################################################################
## 4. DocTest Finder
######################################################################
class DocTestFinder:
@ -1062,7 +1059,7 @@ class DocTestFinder:
return None
######################################################################
## 4. DocTest Runner
## 5. DocTest Runner
######################################################################
# [XX] Should overridable methods (eg DocTestRunner.check_output) be
@ -1698,7 +1695,7 @@ class DebugRunner(DocTestRunner):
raise DocTestFailure(test, example, got)
######################################################################
## 5. Test Functions
## 6. Test Functions
######################################################################
# These should be backwards compatible.
@ -1860,7 +1857,7 @@ def run_docstring_examples(f, globs, verbose=False, name="NoName",
runner.run(test, compileflags=compileflags)
######################################################################
## 6. Tester
## 7. Tester
######################################################################
# This is provided only for backwards compatibility. It's not
# actually used in any way.
@ -1935,7 +1932,7 @@ class Tester:
d[name] = f, t
######################################################################
## 7. Unittest Support
## 8. Unittest Support
######################################################################
class DocTestCase(unittest.TestCase):
@ -2180,7 +2177,7 @@ def DocFileSuite(*paths, **kw):
return suite
######################################################################
## 8. Debugging Support
## 9. Debugging Support
######################################################################
def script_from_examples(s):
@ -2315,7 +2312,7 @@ def debug(module, name, pm=False):
debug_script(testsrc, pm, module.__dict__)
######################################################################
## 9. Example Usage
## 10. Example Usage
######################################################################
class _TestClass:
"""

View File

@ -209,7 +209,7 @@ expected output of an example, then `DocTest` will raise a ValueError:
... '''
>>> doctest.DocTest(docstring, globs, 'some_test', 'filename', 0)
Traceback (most recent call last):
ValueError: line 3 of the docstring for some_test has inconsistent leading whitespace: ' indentation'
ValueError: line 4 of the docstring for some_test has inconsistent leading whitespace: ' indentation'
If the docstring contains inconsistent leading whitespace on
continuation lines, then `DocTest` will raise a ValueError:
@ -229,7 +229,16 @@ will raise a ValueError:
>>> docstring = '>>>print 1\n1'
>>> doctest.DocTest(docstring, globs, 'some_test', 'filename', 0)
Traceback (most recent call last):
ValueError: line 0 of the docstring for some_test lacks blank after >>>: '>>>print 1'
ValueError: line 1 of the docstring for some_test lacks blank after >>>: '>>>print 1'
If there's no blank space after a PS2 prompt ('...'), then `DocTest`
will raise a ValueError:
>>> docstring = '>>> if 1:\n...print 1\n1'
>>> doctest.DocTest(docstring, globs, 'some_test', 'filename', 0)
Traceback (most recent call last):
ValueError: line 2 of the docstring for some_test lacks blank after ...: '...print 1'
"""
# [XX] test that it's getting line numbers right.