gh-108303: Move tokenize-related data to Lib/test/tokenizedata (GH-109265)

This commit is contained in:
Nikita Sobolev 2023-09-12 09:37:42 +03:00 committed by GitHub
parent 8c813faf86
commit 1110c5bc82
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 40 additions and 25 deletions

2
.gitattributes vendored
View File

@ -24,7 +24,7 @@ PC/classicAppCompat.* binary
[attr]noeol -text [attr]noeol -text
Lib/test/cjkencodings/* noeol Lib/test/cjkencodings/* noeol
Lib/test/coding20731.py noeol Lib/test/tokenizedata/coding20731.py noeol
Lib/test/decimaltestdata/*.decTest noeol Lib/test/decimaltestdata/*.decTest noeol
Lib/test/test_email/data/*.txt noeol Lib/test/test_email/data/*.txt noeol
Lib/test/test_importlib/resources/data01/* noeol Lib/test/test_importlib/resources/data01/* noeol

View File

@ -5,7 +5,7 @@ repos:
- id: check-yaml - id: check-yaml
- id: end-of-file-fixer - id: end-of-file-fixer
types: [python] types: [python]
exclude: Lib/test/coding20731.py exclude: Lib/test/tokenizedata/coding20731.py
- id: trailing-whitespace - id: trailing-whitespace
types_or: [c, python, rst] types_or: [c, python, rst]

View File

@ -132,7 +132,9 @@ class PyCompileTestsBase:
os.chmod(self.directory, mode.st_mode) os.chmod(self.directory, mode.st_mode)
def test_bad_coding(self): def test_bad_coding(self):
bad_coding = os.path.join(os.path.dirname(__file__), 'bad_coding2.py') bad_coding = os.path.join(os.path.dirname(__file__),
'tokenizedata',
'bad_coding2.py')
with support.captured_stderr(): with support.captured_stderr():
self.assertIsNone(py_compile.compile(bad_coding, doraise=False)) self.assertIsNone(py_compile.compile(bad_coding, doraise=False))
self.assertFalse(os.path.exists( self.assertFalse(os.path.exists(
@ -195,7 +197,9 @@ class PyCompileTestsBase:
self.assertEqual(flags, 0b1) self.assertEqual(flags, 0b1)
def test_quiet(self): def test_quiet(self):
bad_coding = os.path.join(os.path.dirname(__file__), 'bad_coding2.py') bad_coding = os.path.join(os.path.dirname(__file__),
'tokenizedata',
'bad_coding2.py')
with support.captured_stderr() as stderr: with support.captured_stderr() as stderr:
self.assertIsNone(py_compile.compile(bad_coding, doraise=False, quiet=2)) self.assertIsNone(py_compile.compile(bad_coding, doraise=False, quiet=2))
self.assertIsNone(py_compile.compile(bad_coding, doraise=True, quiet=2)) self.assertIsNone(py_compile.compile(bad_coding, doraise=True, quiet=2))
@ -260,14 +264,18 @@ class PyCompileCLITestCase(unittest.TestCase):
self.assertTrue(os.path.exists(self.cache_path)) self.assertTrue(os.path.exists(self.cache_path))
def test_bad_syntax(self): def test_bad_syntax(self):
bad_syntax = os.path.join(os.path.dirname(__file__), 'badsyntax_3131.py') bad_syntax = os.path.join(os.path.dirname(__file__),
'tokenizedata',
'badsyntax_3131.py')
rc, stdout, stderr = self.pycompilecmd_failure(bad_syntax) rc, stdout, stderr = self.pycompilecmd_failure(bad_syntax)
self.assertEqual(rc, 1) self.assertEqual(rc, 1)
self.assertEqual(stdout, b'') self.assertEqual(stdout, b'')
self.assertIn(b'SyntaxError', stderr) self.assertIn(b'SyntaxError', stderr)
def test_bad_syntax_with_quiet(self): def test_bad_syntax_with_quiet(self):
bad_syntax = os.path.join(os.path.dirname(__file__), 'badsyntax_3131.py') bad_syntax = os.path.join(os.path.dirname(__file__),
'tokenizedata',
'badsyntax_3131.py')
rc, stdout, stderr = self.pycompilecmd_failure('-q', bad_syntax) rc, stdout, stderr = self.pycompilecmd_failure('-q', bad_syntax)
self.assertEqual(rc, 1) self.assertEqual(rc, 1)
self.assertEqual(stdout, b'') self.assertEqual(stdout, b'')

View File

@ -68,6 +68,7 @@ class MiscSourceEncodingTest(unittest.TestCase):
def test_20731(self): def test_20731(self):
sub = subprocess.Popen([sys.executable, sub = subprocess.Popen([sys.executable,
os.path.join(os.path.dirname(__file__), os.path.join(os.path.dirname(__file__),
'tokenizedata',
'coding20731.py')], 'coding20731.py')],
stderr=subprocess.PIPE) stderr=subprocess.PIPE)
err = sub.communicate()[1] err = sub.communicate()[1]
@ -100,10 +101,10 @@ class MiscSourceEncodingTest(unittest.TestCase):
self.verify_bad_module(module_name) self.verify_bad_module(module_name)
def verify_bad_module(self, module_name): def verify_bad_module(self, module_name):
self.assertRaises(SyntaxError, __import__, 'test.' + module_name) self.assertRaises(SyntaxError, __import__, 'test.tokenizedata.' + module_name)
path = os.path.dirname(__file__) path = os.path.dirname(__file__)
filename = os.path.join(path, module_name + '.py') filename = os.path.join(path, 'tokenizedata', module_name + '.py')
with open(filename, "rb") as fp: with open(filename, "rb") as fp:
bytes = fp.read() bytes = fp.read()
self.assertRaises(SyntaxError, compile, bytes, filename, 'exec') self.assertRaises(SyntaxError, compile, bytes, filename, 'exec')

View File

@ -2587,16 +2587,17 @@ class CommandLineTest(unittest.TestCase):
return script_helper.assert_python_failure('-m', 'tarfile', *args) return script_helper.assert_python_failure('-m', 'tarfile', *args)
def make_simple_tarfile(self, tar_name): def make_simple_tarfile(self, tar_name):
files = [support.findfile('tokenize_tests.txt'), files = [support.findfile('tokenize_tests.txt',
subdir='tokenizedata'),
support.findfile('tokenize_tests-no-coding-cookie-' support.findfile('tokenize_tests-no-coding-cookie-'
'and-utf8-bom-sig-only.txt')] 'and-utf8-bom-sig-only.txt',
subdir='tokenizedata')]
self.addCleanup(os_helper.unlink, tar_name) self.addCleanup(os_helper.unlink, tar_name)
with tarfile.open(tar_name, 'w') as tf: with tarfile.open(tar_name, 'w') as tf:
for tardata in files: for tardata in files:
tf.add(tardata, arcname=os.path.basename(tardata)) tf.add(tardata, arcname=os.path.basename(tardata))
def make_evil_tarfile(self, tar_name): def make_evil_tarfile(self, tar_name):
files = [support.findfile('tokenize_tests.txt')]
self.addCleanup(os_helper.unlink, tar_name) self.addCleanup(os_helper.unlink, tar_name)
with tarfile.open(tar_name, 'w') as tf: with tarfile.open(tar_name, 'w') as tf:
benign = tarfile.TarInfo('benign') benign = tarfile.TarInfo('benign')
@ -2677,9 +2678,11 @@ class CommandLineTest(unittest.TestCase):
self.assertEqual(rc, 1) self.assertEqual(rc, 1)
def test_create_command(self): def test_create_command(self):
files = [support.findfile('tokenize_tests.txt'), files = [support.findfile('tokenize_tests.txt',
subdir='tokenizedata'),
support.findfile('tokenize_tests-no-coding-cookie-' support.findfile('tokenize_tests-no-coding-cookie-'
'and-utf8-bom-sig-only.txt')] 'and-utf8-bom-sig-only.txt',
subdir='tokenizedata')]
for opt in '-c', '--create': for opt in '-c', '--create':
try: try:
out = self.tarfilecmd(opt, tmpname, *files) out = self.tarfilecmd(opt, tmpname, *files)
@ -2690,9 +2693,11 @@ class CommandLineTest(unittest.TestCase):
os_helper.unlink(tmpname) os_helper.unlink(tmpname)
def test_create_command_verbose(self): def test_create_command_verbose(self):
files = [support.findfile('tokenize_tests.txt'), files = [support.findfile('tokenize_tests.txt',
subdir='tokenizedata'),
support.findfile('tokenize_tests-no-coding-cookie-' support.findfile('tokenize_tests-no-coding-cookie-'
'and-utf8-bom-sig-only.txt')] 'and-utf8-bom-sig-only.txt',
subdir='tokenizedata')]
for opt in '-v', '--verbose': for opt in '-v', '--verbose':
try: try:
out = self.tarfilecmd(opt, '-c', tmpname, *files, out = self.tarfilecmd(opt, '-c', tmpname, *files,
@ -2704,7 +2709,7 @@ class CommandLineTest(unittest.TestCase):
os_helper.unlink(tmpname) os_helper.unlink(tmpname)
def test_create_command_dotless_filename(self): def test_create_command_dotless_filename(self):
files = [support.findfile('tokenize_tests.txt')] files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata')]
try: try:
out = self.tarfilecmd('-c', dotlessname, *files) out = self.tarfilecmd('-c', dotlessname, *files)
self.assertEqual(out, b'') self.assertEqual(out, b'')
@ -2715,7 +2720,7 @@ class CommandLineTest(unittest.TestCase):
def test_create_command_dot_started_filename(self): def test_create_command_dot_started_filename(self):
tar_name = os.path.join(TEMPDIR, ".testtar") tar_name = os.path.join(TEMPDIR, ".testtar")
files = [support.findfile('tokenize_tests.txt')] files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata')]
try: try:
out = self.tarfilecmd('-c', tar_name, *files) out = self.tarfilecmd('-c', tar_name, *files)
self.assertEqual(out, b'') self.assertEqual(out, b'')
@ -2725,9 +2730,11 @@ class CommandLineTest(unittest.TestCase):
os_helper.unlink(tar_name) os_helper.unlink(tar_name)
def test_create_command_compressed(self): def test_create_command_compressed(self):
files = [support.findfile('tokenize_tests.txt'), files = [support.findfile('tokenize_tests.txt',
subdir='tokenizedata'),
support.findfile('tokenize_tests-no-coding-cookie-' support.findfile('tokenize_tests-no-coding-cookie-'
'and-utf8-bom-sig-only.txt')] 'and-utf8-bom-sig-only.txt',
subdir='tokenizedata')]
for filetype in (GzipTest, Bz2Test, LzmaTest): for filetype in (GzipTest, Bz2Test, LzmaTest):
if not filetype.open: if not filetype.open:
continue continue

View File

@ -1200,7 +1200,7 @@ class TestTokenizerAdheresToPep0263(TestCase):
""" """
def _testFile(self, filename): def _testFile(self, filename):
path = os.path.join(os.path.dirname(__file__), filename) path = os.path.join(os.path.dirname(__file__), 'tokenizedata', filename)
with open(path, 'rb') as f: with open(path, 'rb') as f:
TestRoundtrip.check_roundtrip(self, f) TestRoundtrip.check_roundtrip(self, f)
@ -1794,7 +1794,7 @@ class TestRoundtrip(TestCase):
self.check_roundtrip("if x == 1 : \n" self.check_roundtrip("if x == 1 : \n"
" print(x)\n") " print(x)\n")
fn = support.findfile("tokenize_tests.txt") fn = support.findfile("tokenize_tests.txt", subdir="tokenizedata")
with open(fn, 'rb') as f: with open(fn, 'rb') as f:
self.check_roundtrip(f) self.check_roundtrip(f)
self.check_roundtrip("if x == 1:\n" self.check_roundtrip("if x == 1:\n"
@ -1849,8 +1849,7 @@ class TestRoundtrip(TestCase):
# pass the '-ucpu' option to process the full directory. # pass the '-ucpu' option to process the full directory.
import glob, random import glob, random
fn = support.findfile("tokenize_tests.txt") tempdir = os.path.dirname(__file__) or os.curdir
tempdir = os.path.dirname(fn) or os.curdir
testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py")) testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))
# Tokenize is broken on test_pep3131.py because regular expressions are # Tokenize is broken on test_pep3131.py because regular expressions are

View File

@ -25,7 +25,7 @@ class ReindentTests(unittest.TestCase):
self.assertGreater(err, b'') self.assertGreater(err, b'')
def test_reindent_file_with_bad_encoding(self): def test_reindent_file_with_bad_encoding(self):
bad_coding_path = findfile('bad_coding.py') bad_coding_path = findfile('bad_coding.py', subdir='tokenizedata')
rc, out, err = assert_python_ok(self.script, '-r', bad_coding_path) rc, out, err = assert_python_ok(self.script, '-r', bad_coding_path)
self.assertEqual(out, b'') self.assertEqual(out, b'')
self.assertNotEqual(err, b'') self.assertNotEqual(err, b'')

View File

@ -19,7 +19,7 @@ class PEP3131Test(unittest.TestCase):
def test_invalid(self): def test_invalid(self):
try: try:
from test import badsyntax_3131 from test.tokenizedata import badsyntax_3131
except SyntaxError as err: except SyntaxError as err:
self.assertEqual(str(err), self.assertEqual(str(err),
"invalid character '' (U+20AC) (badsyntax_3131.py, line 2)") "invalid character '' (U+20AC) (badsyntax_3131.py, line 2)")

View File