mirror of https://github.com/python/cpython
Issue #20750, Enable roundtrip tests for new 5-tuple untokenize. The
constructed examples and all but 7 of the test/test_*.py files (run with -ucpu) pass. Remove those that fail the new test from the selection list. Patch partly based on patches by G. Brandl (#8478) and G. Rees (#12691).
This commit is contained in:
parent
220cc21cec
commit
938ba685dc
|
@ -578,9 +578,15 @@ pass the '-ucpu' option to process the full directory.
|
|||
>>> tempdir = os.path.dirname(f) or os.curdir
|
||||
>>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
|
||||
|
||||
tokenize is broken on test_pep3131.py because regular expressions are broken on
|
||||
the obscure unicode identifiers in it. *sigh*
|
||||
Tokenize is broken on test_pep3131.py because regular expressions are
|
||||
broken on the obscure unicode identifiers in it. *sigh*
|
||||
With roundtrip extended to test the 5-tuple mode of untokenize,
|
||||
7 more testfiles fail. Remove them also until the failure is diagnosed.
|
||||
|
||||
>>> testfiles.remove(os.path.join(tempdir, "test_pep3131.py"))
|
||||
>>> for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
|
||||
... testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
|
||||
...
|
||||
>>> if not support.is_resource_enabled("cpu"):
|
||||
... testfiles = random.sample(testfiles, 10)
|
||||
...
|
||||
|
@ -659,21 +665,39 @@ def dump_tokens(s):
|
|||
def roundtrip(f):
|
||||
"""
|
||||
Test roundtrip for `untokenize`. `f` is an open file or a string.
|
||||
The source code in f is tokenized, converted back to source code via
|
||||
tokenize.untokenize(), and tokenized again from the latter. The test
|
||||
fails if the second tokenization doesn't match the first.
|
||||
The source code in f is tokenized to both 5- and 2-tuples.
|
||||
Both sequences are converted back to source code via
|
||||
tokenize.untokenize(), and the latter tokenized again to 2-tuples.
|
||||
The test fails if the 3 pair tokenizations do not match.
|
||||
|
||||
When untokenize bugs are fixed, untokenize with 5-tuples should
|
||||
reproduce code that does not contain a backslash continuation
|
||||
following spaces. A proper test should test this.
|
||||
|
||||
This function would be more useful for correcting bugs if it reported
|
||||
the first point of failure, like assertEqual, rather than just
|
||||
returning False -- or if it were only used in unittests and not
|
||||
doctest and actually used assertEqual.
|
||||
"""
|
||||
# Get source code and original tokenizations
|
||||
if isinstance(f, str):
|
||||
f = BytesIO(f.encode('utf-8'))
|
||||
try:
|
||||
token_list = list(tokenize(f.readline))
|
||||
finally:
|
||||
code = f.encode('utf-8')
|
||||
else:
|
||||
code = f.read()
|
||||
f.close()
|
||||
tokens1 = [tok[:2] for tok in token_list]
|
||||
new_bytes = untokenize(tokens1)
|
||||
readline = (line for line in new_bytes.splitlines(keepends=True)).__next__
|
||||
tokens2 = [tok[:2] for tok in tokenize(readline)]
|
||||
return tokens1 == tokens2
|
||||
readline = iter(code.splitlines(keepends=True)).__next__
|
||||
tokens5 = list(tokenize(readline))
|
||||
tokens2 = [tok[:2] for tok in tokens5]
|
||||
# Reproduce tokens2 from pairs
|
||||
bytes_from2 = untokenize(tokens2)
|
||||
readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
|
||||
tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
|
||||
# Reproduce tokens2 from 5-tuples
|
||||
bytes_from5 = untokenize(tokens5)
|
||||
readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
|
||||
tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
|
||||
# Compare 3 versions
|
||||
return tokens2 == tokens2_from2 == tokens2_from5
|
||||
|
||||
# This is an example from the docs, set up as a doctest.
|
||||
def decistmt(s):
|
||||
|
|
Loading…
Reference in New Issue