2005-06-10 08:05:19 -03:00
|
|
|
import os, glob, random
|
2006-03-30 23:17:30 -04:00
|
|
|
from cStringIO import StringIO
|
|
|
|
from test.test_support import (verbose, findfile, is_resource_enabled,
|
|
|
|
TestFailed)
|
2005-06-10 08:05:19 -03:00
|
|
|
from tokenize import (tokenize, generate_tokens, untokenize,
|
|
|
|
NUMBER, NAME, OP, STRING)
|
1997-10-27 18:15:06 -04:00
|
|
|
|
2006-03-30 23:17:30 -04:00
|
|
|
# Test roundtrip for `untokenize`. `f` is a file path. The source code in f
|
|
|
|
# is tokenized, converted back to source code via tokenize.untokenize(),
|
|
|
|
# and tokenized again from the latter. The test fails if the second
|
|
|
|
# tokenization doesn't match the first.
|
2005-06-10 08:05:19 -03:00
|
|
|
def test_roundtrip(f):
|
|
|
|
## print 'Testing:', f
|
2006-03-30 23:17:30 -04:00
|
|
|
fobj = open(f)
|
2005-06-10 08:05:19 -03:00
|
|
|
try:
|
2006-03-30 23:17:30 -04:00
|
|
|
fulltok = list(generate_tokens(fobj.readline))
|
2005-06-10 08:05:19 -03:00
|
|
|
finally:
|
2006-03-30 23:17:30 -04:00
|
|
|
fobj.close()
|
2005-06-10 08:05:19 -03:00
|
|
|
|
|
|
|
t1 = [tok[:2] for tok in fulltok]
|
|
|
|
newtext = untokenize(t1)
|
|
|
|
readline = iter(newtext.splitlines(1)).next
|
|
|
|
t2 = [tok[:2] for tok in generate_tokens(readline)]
|
2006-03-30 23:17:30 -04:00
|
|
|
if t1 != t2:
|
|
|
|
raise TestFailed("untokenize() roundtrip failed for %r" % f)
|
2005-06-10 08:05:19 -03:00
|
|
|
|
2006-03-30 23:17:30 -04:00
|
|
|
# This is an example from the docs, set up as a doctest.
|
2005-06-10 08:05:19 -03:00
|
|
|
def decistmt(s):
|
|
|
|
"""Substitute Decimals for floats in a string of statements.
|
|
|
|
|
|
|
|
>>> from decimal import Decimal
|
|
|
|
>>> s = 'print +21.3e-5*-.1234/81.7'
|
|
|
|
>>> decistmt(s)
|
|
|
|
"print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
|
|
|
|
|
2006-03-30 23:17:30 -04:00
|
|
|
The format of the exponent is inherited from the platform C library.
|
|
|
|
Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
|
|
|
|
we're only showing 12 digits, and the 13th isn't close to 5, the
|
|
|
|
rest of the output should be platform-independent.
|
|
|
|
|
|
|
|
>>> exec(s) #doctest: +ELLIPSIS
|
|
|
|
-3.21716034272e-0...7
|
|
|
|
|
|
|
|
Output from calculations with Decimal should be identical across all
|
|
|
|
platforms.
|
|
|
|
|
2005-06-10 08:05:19 -03:00
|
|
|
>>> exec(decistmt(s))
|
|
|
|
-3.217160342717258261933904529E-7
|
|
|
|
"""
|
2006-03-30 23:17:30 -04:00
|
|
|
|
2005-06-10 08:05:19 -03:00
|
|
|
result = []
|
|
|
|
g = generate_tokens(StringIO(s).readline) # tokenize the string
|
|
|
|
for toknum, tokval, _, _, _ in g:
|
|
|
|
if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
|
|
|
|
result.extend([
|
|
|
|
(NAME, 'Decimal'),
|
|
|
|
(OP, '('),
|
|
|
|
(STRING, repr(tokval)),
|
|
|
|
(OP, ')')
|
|
|
|
])
|
|
|
|
else:
|
|
|
|
result.append((toknum, tokval))
|
|
|
|
return untokenize(result)
|
|
|
|
|
2006-03-30 23:17:30 -04:00
|
|
|
def test_main():
|
|
|
|
if verbose:
|
|
|
|
print 'starting...'
|
|
|
|
|
|
|
|
# This displays the tokenization of tokenize_tests.py to stdout, and
|
|
|
|
# regrtest.py checks that this equals the expected output (in the
|
|
|
|
# test/output/ directory).
|
|
|
|
f = open(findfile('tokenize_tests' + os.extsep + 'txt'))
|
|
|
|
tokenize(f.readline)
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
# Now run test_roundtrip() over tokenize_test.py too, and over all
|
|
|
|
# (if the "compiler" resource is enabled) or a small random sample (if
|
|
|
|
# "compiler" is not enabled) of the test*.py files.
|
|
|
|
f = findfile('tokenize_tests' + os.extsep + 'txt')
|
|
|
|
test_roundtrip(f)
|
|
|
|
|
|
|
|
testdir = os.path.dirname(f) or os.curdir
|
|
|
|
testfiles = glob.glob(testdir + os.sep + 'test*.py')
|
|
|
|
if not is_resource_enabled('compiler'):
|
|
|
|
testfiles = random.sample(testfiles, 10)
|
|
|
|
|
|
|
|
for f in testfiles:
|
|
|
|
test_roundtrip(f)
|
|
|
|
|
|
|
|
# Test detecton of IndentationError.
|
|
|
|
sampleBadText = """\
|
|
|
|
def foo():
|
|
|
|
bar
|
|
|
|
baz
|
|
|
|
"""
|
|
|
|
|
|
|
|
try:
|
|
|
|
for tok in generate_tokens(StringIO(sampleBadText).readline):
|
|
|
|
pass
|
|
|
|
except IndentationError:
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
raise TestFailed("Did not detect IndentationError:")
|
|
|
|
|
|
|
|
# Run the doctests in this module.
|
|
|
|
from test import test_tokenize # i.e., this module
|
|
|
|
from test.test_support import run_doctest
|
|
|
|
run_doctest(test_tokenize)
|
|
|
|
|
|
|
|
if verbose:
|
|
|
|
print 'finished'
|
2005-06-10 08:05:19 -03:00
|
|
|
|
2006-03-30 23:17:30 -04:00
|
|
|
if __name__ == "__main__":
|
|
|
|
test_main()
|