From 1110c5bc828218086f6397ec05a9312fb73ea30a Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Tue, 12 Sep 2023 09:37:42 +0300 Subject: [PATCH] gh-108303: Move tokenize-related data to Lib/test/tokenizedata (GH-109265) --- .gitattributes | 2 +- .pre-commit-config.yaml | 2 +- Lib/test/test_py_compile.py | 16 +++++++--- Lib/test/test_source_encoding.py | 5 ++-- Lib/test/test_tarfile.py | 29 ++++++++++++------- Lib/test/test_tokenize.py | 7 ++--- Lib/test/test_tools/test_reindent.py | 2 +- Lib/test/test_unicode_identifiers.py | 2 +- Lib/test/tokenizedata/__init__.py | 0 Lib/test/{ => tokenizedata}/bad_coding.py | 0 Lib/test/{ => tokenizedata}/bad_coding2.py | 0 Lib/test/{ => tokenizedata}/badsyntax_3131.py | 0 Lib/test/{ => tokenizedata}/coding20731.py | 0 ...-latin1-coding-cookie-and-utf8-bom-sig.txt | 0 ...no-coding-cookie-and-utf8-bom-sig-only.txt | 0 ...utf8-coding-cookie-and-no-utf8-bom-sig.txt | 0 ...ts-utf8-coding-cookie-and-utf8-bom-sig.txt | 0 .../{ => tokenizedata}/tokenize_tests.txt | 0 18 files changed, 40 insertions(+), 25 deletions(-) create mode 100644 Lib/test/tokenizedata/__init__.py rename Lib/test/{ => tokenizedata}/bad_coding.py (100%) rename Lib/test/{ => tokenizedata}/bad_coding2.py (100%) rename Lib/test/{ => tokenizedata}/badsyntax_3131.py (100%) rename Lib/test/{ => tokenizedata}/coding20731.py (100%) rename Lib/test/{ => tokenizedata}/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt (100%) rename Lib/test/{ => tokenizedata}/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt (100%) rename Lib/test/{ => tokenizedata}/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt (100%) rename Lib/test/{ => tokenizedata}/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt (100%) rename Lib/test/{ => tokenizedata}/tokenize_tests.txt (100%) diff --git a/.gitattributes b/.gitattributes index e05ff900bf1..8c37dbbb631 100644 --- a/.gitattributes +++ b/.gitattributes @@ -24,7 +24,7 @@ PC/classicAppCompat.* binary [attr]noeol -text Lib/test/cjkencodings/* noeol -Lib/test/coding20731.py noeol +Lib/test/tokenizedata/coding20731.py noeol Lib/test/decimaltestdata/*.decTest noeol Lib/test/test_email/data/*.txt noeol Lib/test/test_importlib/resources/data01/* noeol diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 451cbe8bc84..68e75fa44fa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,7 +5,7 @@ repos: - id: check-yaml - id: end-of-file-fixer types: [python] - exclude: Lib/test/coding20731.py + exclude: Lib/test/tokenizedata/coding20731.py - id: trailing-whitespace types_or: [c, python, rst] diff --git a/Lib/test/test_py_compile.py b/Lib/test/test_py_compile.py index 5e0a44ad969..c4e6551f605 100644 --- a/Lib/test/test_py_compile.py +++ b/Lib/test/test_py_compile.py @@ -132,7 +132,9 @@ class PyCompileTestsBase: os.chmod(self.directory, mode.st_mode) def test_bad_coding(self): - bad_coding = os.path.join(os.path.dirname(__file__), 'bad_coding2.py') + bad_coding = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'bad_coding2.py') with support.captured_stderr(): self.assertIsNone(py_compile.compile(bad_coding, doraise=False)) self.assertFalse(os.path.exists( @@ -195,7 +197,9 @@ class PyCompileTestsBase: self.assertEqual(flags, 0b1) def test_quiet(self): - bad_coding = os.path.join(os.path.dirname(__file__), 'bad_coding2.py') + bad_coding = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'bad_coding2.py') with support.captured_stderr() as stderr: self.assertIsNone(py_compile.compile(bad_coding, doraise=False, quiet=2)) self.assertIsNone(py_compile.compile(bad_coding, doraise=True, quiet=2)) @@ -260,14 +264,18 @@ class PyCompileCLITestCase(unittest.TestCase): self.assertTrue(os.path.exists(self.cache_path)) def test_bad_syntax(self): - bad_syntax = os.path.join(os.path.dirname(__file__), 'badsyntax_3131.py') + bad_syntax = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'badsyntax_3131.py') rc, stdout, stderr = self.pycompilecmd_failure(bad_syntax) self.assertEqual(rc, 1) self.assertEqual(stdout, b'') self.assertIn(b'SyntaxError', stderr) def test_bad_syntax_with_quiet(self): - bad_syntax = os.path.join(os.path.dirname(__file__), 'badsyntax_3131.py') + bad_syntax = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'badsyntax_3131.py') rc, stdout, stderr = self.pycompilecmd_failure('-q', bad_syntax) self.assertEqual(rc, 1) self.assertEqual(stdout, b'') diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py index 72c2b47779e..27871378f1c 100644 --- a/Lib/test/test_source_encoding.py +++ b/Lib/test/test_source_encoding.py @@ -68,6 +68,7 @@ class MiscSourceEncodingTest(unittest.TestCase): def test_20731(self): sub = subprocess.Popen([sys.executable, os.path.join(os.path.dirname(__file__), + 'tokenizedata', 'coding20731.py')], stderr=subprocess.PIPE) err = sub.communicate()[1] @@ -100,10 +101,10 @@ class MiscSourceEncodingTest(unittest.TestCase): self.verify_bad_module(module_name) def verify_bad_module(self, module_name): - self.assertRaises(SyntaxError, __import__, 'test.' + module_name) + self.assertRaises(SyntaxError, __import__, 'test.tokenizedata.' + module_name) path = os.path.dirname(__file__) - filename = os.path.join(path, module_name + '.py') + filename = os.path.join(path, 'tokenizedata', module_name + '.py') with open(filename, "rb") as fp: bytes = fp.read() self.assertRaises(SyntaxError, compile, bytes, filename, 'exec') diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 67009a3d2e9..9a39dd4a4e5 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2587,16 +2587,17 @@ class CommandLineTest(unittest.TestCase): return script_helper.assert_python_failure('-m', 'tarfile', *args) def make_simple_tarfile(self, tar_name): - files = [support.findfile('tokenize_tests.txt'), + files = [support.findfile('tokenize_tests.txt', + subdir='tokenizedata'), support.findfile('tokenize_tests-no-coding-cookie-' - 'and-utf8-bom-sig-only.txt')] + 'and-utf8-bom-sig-only.txt', + subdir='tokenizedata')] self.addCleanup(os_helper.unlink, tar_name) with tarfile.open(tar_name, 'w') as tf: for tardata in files: tf.add(tardata, arcname=os.path.basename(tardata)) def make_evil_tarfile(self, tar_name): - files = [support.findfile('tokenize_tests.txt')] self.addCleanup(os_helper.unlink, tar_name) with tarfile.open(tar_name, 'w') as tf: benign = tarfile.TarInfo('benign') @@ -2677,9 +2678,11 @@ class CommandLineTest(unittest.TestCase): self.assertEqual(rc, 1) def test_create_command(self): - files = [support.findfile('tokenize_tests.txt'), + files = [support.findfile('tokenize_tests.txt', + subdir='tokenizedata'), support.findfile('tokenize_tests-no-coding-cookie-' - 'and-utf8-bom-sig-only.txt')] + 'and-utf8-bom-sig-only.txt', + subdir='tokenizedata')] for opt in '-c', '--create': try: out = self.tarfilecmd(opt, tmpname, *files) @@ -2690,9 +2693,11 @@ class CommandLineTest(unittest.TestCase): os_helper.unlink(tmpname) def test_create_command_verbose(self): - files = [support.findfile('tokenize_tests.txt'), + files = [support.findfile('tokenize_tests.txt', + subdir='tokenizedata'), support.findfile('tokenize_tests-no-coding-cookie-' - 'and-utf8-bom-sig-only.txt')] + 'and-utf8-bom-sig-only.txt', + subdir='tokenizedata')] for opt in '-v', '--verbose': try: out = self.tarfilecmd(opt, '-c', tmpname, *files, @@ -2704,7 +2709,7 @@ class CommandLineTest(unittest.TestCase): os_helper.unlink(tmpname) def test_create_command_dotless_filename(self): - files = [support.findfile('tokenize_tests.txt')] + files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata')] try: out = self.tarfilecmd('-c', dotlessname, *files) self.assertEqual(out, b'') @@ -2715,7 +2720,7 @@ class CommandLineTest(unittest.TestCase): def test_create_command_dot_started_filename(self): tar_name = os.path.join(TEMPDIR, ".testtar") - files = [support.findfile('tokenize_tests.txt')] + files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata')] try: out = self.tarfilecmd('-c', tar_name, *files) self.assertEqual(out, b'') @@ -2725,9 +2730,11 @@ class CommandLineTest(unittest.TestCase): os_helper.unlink(tar_name) def test_create_command_compressed(self): - files = [support.findfile('tokenize_tests.txt'), + files = [support.findfile('tokenize_tests.txt', + subdir='tokenizedata'), support.findfile('tokenize_tests-no-coding-cookie-' - 'and-utf8-bom-sig-only.txt')] + 'and-utf8-bom-sig-only.txt', + subdir='tokenizedata')] for filetype in (GzipTest, Bz2Test, LzmaTest): if not filetype.open: continue diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index dbefee655c3..94fb6d933de 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1200,7 +1200,7 @@ class TestTokenizerAdheresToPep0263(TestCase): """ def _testFile(self, filename): - path = os.path.join(os.path.dirname(__file__), filename) + path = os.path.join(os.path.dirname(__file__), 'tokenizedata', filename) with open(path, 'rb') as f: TestRoundtrip.check_roundtrip(self, f) @@ -1794,7 +1794,7 @@ class TestRoundtrip(TestCase): self.check_roundtrip("if x == 1 : \n" " print(x)\n") - fn = support.findfile("tokenize_tests.txt") + fn = support.findfile("tokenize_tests.txt", subdir="tokenizedata") with open(fn, 'rb') as f: self.check_roundtrip(f) self.check_roundtrip("if x == 1:\n" @@ -1849,8 +1849,7 @@ class TestRoundtrip(TestCase): # pass the '-ucpu' option to process the full directory. import glob, random - fn = support.findfile("tokenize_tests.txt") - tempdir = os.path.dirname(fn) or os.curdir + tempdir = os.path.dirname(__file__) or os.curdir testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py")) # Tokenize is broken on test_pep3131.py because regular expressions are diff --git a/Lib/test/test_tools/test_reindent.py b/Lib/test/test_tools/test_reindent.py index 3b0c793a38e..64e31c2b770 100644 --- a/Lib/test/test_tools/test_reindent.py +++ b/Lib/test/test_tools/test_reindent.py @@ -25,7 +25,7 @@ class ReindentTests(unittest.TestCase): self.assertGreater(err, b'') def test_reindent_file_with_bad_encoding(self): - bad_coding_path = findfile('bad_coding.py') + bad_coding_path = findfile('bad_coding.py', subdir='tokenizedata') rc, out, err = assert_python_ok(self.script, '-r', bad_coding_path) self.assertEqual(out, b'') self.assertNotEqual(err, b'') diff --git a/Lib/test/test_unicode_identifiers.py b/Lib/test/test_unicode_identifiers.py index 5b9ced5d1cb..63c6c055824 100644 --- a/Lib/test/test_unicode_identifiers.py +++ b/Lib/test/test_unicode_identifiers.py @@ -19,7 +19,7 @@ class PEP3131Test(unittest.TestCase): def test_invalid(self): try: - from test import badsyntax_3131 + from test.tokenizedata import badsyntax_3131 except SyntaxError as err: self.assertEqual(str(err), "invalid character '€' (U+20AC) (badsyntax_3131.py, line 2)") diff --git a/Lib/test/tokenizedata/__init__.py b/Lib/test/tokenizedata/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Lib/test/bad_coding.py b/Lib/test/tokenizedata/bad_coding.py similarity index 100% rename from Lib/test/bad_coding.py rename to Lib/test/tokenizedata/bad_coding.py diff --git a/Lib/test/bad_coding2.py b/Lib/test/tokenizedata/bad_coding2.py similarity index 100% rename from Lib/test/bad_coding2.py rename to Lib/test/tokenizedata/bad_coding2.py diff --git a/Lib/test/badsyntax_3131.py b/Lib/test/tokenizedata/badsyntax_3131.py similarity index 100% rename from Lib/test/badsyntax_3131.py rename to Lib/test/tokenizedata/badsyntax_3131.py diff --git a/Lib/test/coding20731.py b/Lib/test/tokenizedata/coding20731.py similarity index 100% rename from Lib/test/coding20731.py rename to Lib/test/tokenizedata/coding20731.py diff --git a/Lib/test/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt b/Lib/test/tokenizedata/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt similarity index 100% rename from Lib/test/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt rename to Lib/test/tokenizedata/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt diff --git a/Lib/test/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt b/Lib/test/tokenizedata/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt similarity index 100% rename from Lib/test/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt rename to Lib/test/tokenizedata/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt diff --git a/Lib/test/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt b/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt similarity index 100% rename from Lib/test/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt rename to Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt diff --git a/Lib/test/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt b/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt similarity index 100% rename from Lib/test/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt rename to Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt diff --git a/Lib/test/tokenize_tests.txt b/Lib/test/tokenizedata/tokenize_tests.txt similarity index 100% rename from Lib/test/tokenize_tests.txt rename to Lib/test/tokenizedata/tokenize_tests.txt