From 700cfa8c90a90016638bac13c4efd03786b2b2a0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 25 Jun 2020 17:56:31 +0300 Subject: [PATCH] bpo-41069: Make TESTFN and the CWD for tests containing non-ascii characters. (GH-21035) --- Lib/test/libregrtest/main.py | 1 + Lib/test/support/__init__.py | 2 +- Lib/test/support/os_helper.py | 21 +++++------ Lib/test/test_binhex.py | 7 ++-- Lib/test/test_cgitb.py | 10 +++--- Lib/test/test_compileall.py | 6 ++-- Lib/test/test_embed.py | 4 +-- Lib/test/test_fstring.py | 5 +-- Lib/test/test_genericpath.py | 2 +- Lib/test/test_gzip.py | 18 +++++++--- Lib/test/test_msilib.py | 4 +-- Lib/test/test_ntpath.py | 2 +- Lib/test/test_os.py | 6 ++-- Lib/test/test_pdb.py | 11 +++--- Lib/test/test_posixpath.py | 2 +- Lib/test/test_tarfile.py | 9 +++-- Lib/test/test_tools/test_pathfix.py | 8 +++-- Lib/test/test_trace.py | 9 +++-- Lib/test/test_urllib.py | 7 ++-- Lib/test/test_uu.py | 5 +-- Lib/test/test_venv.py | 4 +-- Lib/test/test_warnings/__init__.py | 6 ++-- .../2020-06-22-00-21-12.bpo-41069.bLZkX-.rst | 2 ++ Modules/_testcapimodule.c | 36 +++++++++---------- 24 files changed, 110 insertions(+), 77 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2020-06-22-00-21-12.bpo-41069.bLZkX-.rst diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index 3f9771b9308..7675a97b5b4 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -597,6 +597,7 @@ class Regrtest: test_cwd = 'test_python_worker_{}'.format(pid) else: test_cwd = 'test_python_{}'.format(pid) + test_cwd += support.FS_NONASCII test_cwd = os.path.join(self.tmp_dir, test_cwd) return test_cwd diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 5707d8eeaa2..f8f60fb6c27 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -20,7 +20,7 @@ from .import_helper import ( forget, import_fresh_module, import_module, make_legacy_pyc, modules_cleanup, modules_setup, unload) from .os_helper import ( - FS_NONASCII, SAVEDCWD, TESTFN, TESTFN_NONASCII, + FS_NONASCII, SAVEDCWD, TESTFN, TESTFN_ASCII, TESTFN_NONASCII, TESTFN_UNENCODABLE, TESTFN_UNDECODABLE, TESTFN_UNICODE, can_symlink, can_xattr, change_cwd, create_empty_file, fd_count, diff --git a/Lib/test/support/os_helper.py b/Lib/test/support/os_helper.py index d3347027cf2..d9807a1e114 100644 --- a/Lib/test/support/os_helper.py +++ b/Lib/test/support/os_helper.py @@ -13,16 +13,16 @@ import warnings # Filename used for testing if os.name == 'java': # Jython disallows @ in module names - TESTFN = '$test' + TESTFN_ASCII = '$test' else: - TESTFN = '@test' + TESTFN_ASCII = '@test' # Disambiguate TESTFN for parallel testing, while letting it remain a valid # module name. -TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid()) +TESTFN_ASCII = "{}_{}_tmp".format(TESTFN_ASCII, os.getpid()) # TESTFN_UNICODE is a non-ascii filename -TESTFN_UNICODE = TESTFN + "-\xe0\xf2\u0258\u0141\u011f" +TESTFN_UNICODE = TESTFN_ASCII + "-\xe0\xf2\u0258\u0141\u011f" if sys.platform == 'darwin': # In Mac OS X's VFS API file names are, by definition, canonically # decomposed Unicode, encoded using UTF-8. See QA1173: @@ -39,7 +39,7 @@ if os.name == 'nt': if sys.getwindowsversion().platform >= 2: # Different kinds of characters from various languages to minimize the # probability that the whole name is encodable to MBCS (issue #9819) - TESTFN_UNENCODABLE = TESTFN + "-\u5171\u0141\u2661\u0363\uDC80" + TESTFN_UNENCODABLE = TESTFN_ASCII + "-\u5171\u0141\u2661\u0363\uDC80" try: TESTFN_UNENCODABLE.encode(sys.getfilesystemencoding()) except UnicodeEncodeError: @@ -56,7 +56,7 @@ elif sys.platform != 'darwin': b'\xff'.decode(sys.getfilesystemencoding()) except UnicodeDecodeError: # 0xff will be encoded using the surrogate character u+DCFF - TESTFN_UNENCODABLE = TESTFN \ + TESTFN_UNENCODABLE = TESTFN_ASCII \ + b'-\xff'.decode(sys.getfilesystemencoding(), 'surrogateescape') else: # File system encoding (eg. ISO-8859-* encodings) can encode @@ -64,8 +64,8 @@ elif sys.platform != 'darwin': pass # FS_NONASCII: non-ASCII character encodable by os.fsencode(), -# or None if there is no such character. -FS_NONASCII = None +# or an empty string if there is no such character. +FS_NONASCII = '' for character in ( # First try printable and common characters to have a readable filename. # For each character, the encoding list are just example of encodings able @@ -141,13 +141,14 @@ for name in ( try: name.decode(sys.getfilesystemencoding()) except UnicodeDecodeError: - TESTFN_UNDECODABLE = os.fsencode(TESTFN) + name + TESTFN_UNDECODABLE = os.fsencode(TESTFN_ASCII) + name break if FS_NONASCII: - TESTFN_NONASCII = TESTFN + '-' + FS_NONASCII + TESTFN_NONASCII = TESTFN_ASCII + FS_NONASCII else: TESTFN_NONASCII = None +TESTFN = TESTFN_NONASCII or TESTFN_ASCII def make_bad_fd(): diff --git a/Lib/test/test_binhex.py b/Lib/test/test_binhex.py index 859553222a3..591f32a4f0f 100644 --- a/Lib/test/test_binhex.py +++ b/Lib/test/test_binhex.py @@ -13,9 +13,10 @@ with support.check_warnings(('', DeprecationWarning)): class BinHexTestCase(unittest.TestCase): def setUp(self): - self.fname1 = support.TESTFN + "1" - self.fname2 = support.TESTFN + "2" - self.fname3 = support.TESTFN + "very_long_filename__very_long_filename__very_long_filename__very_long_filename__" + # binhex supports only file names encodable to Latin1 + self.fname1 = support.TESTFN_ASCII + "1" + self.fname2 = support.TESTFN_ASCII + "2" + self.fname3 = support.TESTFN_ASCII + "very_long_filename__very_long_filename__very_long_filename__very_long_filename__" def tearDown(self): support.unlink(self.fname1) diff --git a/Lib/test/test_cgitb.py b/Lib/test/test_cgitb.py index 8991bc1ff34..bab152d8554 100644 --- a/Lib/test/test_cgitb.py +++ b/Lib/test/test_cgitb.py @@ -41,8 +41,9 @@ class TestCgitb(unittest.TestCase): rc, out, err = assert_python_failure( '-c', ('import cgitb; cgitb.enable(logdir=%s); ' - 'raise ValueError("Hello World")') % repr(tracedir)) - out = out.decode(sys.getfilesystemencoding()) + 'raise ValueError("Hello World")') % repr(tracedir), + PYTHONIOENCODING='utf-8') + out = out.decode() self.assertIn("ValueError", out) self.assertIn("Hello World", out) self.assertIn("<module>", out) @@ -56,8 +57,9 @@ class TestCgitb(unittest.TestCase): rc, out, err = assert_python_failure( '-c', ('import cgitb; cgitb.enable(format="text", logdir=%s); ' - 'raise ValueError("Hello World")') % repr(tracedir)) - out = out.decode(sys.getfilesystemencoding()) + 'raise ValueError("Hello World")') % repr(tracedir), + PYTHONIOENCODING='utf-8') + out = out.decode() self.assertIn("ValueError", out) self.assertIn("Hello World", out) self.assertNotIn('

', out) diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index b4061b79357..3bbc6817f8d 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -456,13 +456,15 @@ class CommandLineTestsBase: def assertRunOK(self, *args, **env_vars): rc, out, err = script_helper.assert_python_ok( - *self._get_run_args(args), **env_vars) + *self._get_run_args(args), **env_vars, + PYTHONIOENCODING='utf-8') self.assertEqual(b'', err) return out def assertRunNotOK(self, *args, **env_vars): rc, out, err = script_helper.assert_python_failure( - *self._get_run_args(args), **env_vars) + *self._get_run_args(args), **env_vars, + PYTHONIOENCODING='utf-8') return rc, out, err def assertCompiled(self, fn): diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index e740fe89529..fe47289777a 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -1349,7 +1349,7 @@ class AuditingTests(EmbeddingTestsMixin, unittest.TestCase): returncode=1) def test_audit_run_interactivehook(self): - startup = os.path.join(self.oldcwd, support.TESTFN) + (support.FS_NONASCII or '') + ".py" + startup = os.path.join(self.oldcwd, support.TESTFN) + ".py" with open(startup, "w", encoding="utf-8") as f: print("import sys", file=f) print("sys.__interactivehook__ = lambda: None", file=f) @@ -1362,7 +1362,7 @@ class AuditingTests(EmbeddingTestsMixin, unittest.TestCase): os.unlink(startup) def test_audit_run_startup(self): - startup = os.path.join(self.oldcwd, support.TESTFN) + (support.FS_NONASCII or '') + ".py" + startup = os.path.join(self.oldcwd, support.TESTFN) + ".py" with open(startup, "w", encoding="utf-8") as f: print("pass", file=f) try: diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 9eb7ebe1055..7ffe01d2d8c 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1055,8 +1055,9 @@ non-important content file_path = os.path.join(cwd, 't.py') with open(file_path, 'w') as f: f.write('f"{a b}"') # This generates a SyntaxError - _, _, stderr = assert_python_failure(file_path) - self.assertIn(file_path, stderr.decode('utf-8')) + _, _, stderr = assert_python_failure(file_path, + PYTHONIOENCODING='ascii') + self.assertIn(file_path.encode('ascii', 'backslashreplace'), stderr) def test_loop(self): for i in range(1000): diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py index 9d5ac44b6d0..e7acbcd2908 100644 --- a/Lib/test/test_genericpath.py +++ b/Lib/test/test_genericpath.py @@ -534,7 +534,7 @@ class CommonTest(GenericTest): class PathLikeTests(unittest.TestCase): def setUp(self): - self.file_name = support.TESTFN.lower() + self.file_name = support.TESTFN self.file_path = FakePath(support.TESTFN) self.addCleanup(support.unlink, self.file_name) create_file(self.file_name, b"test_genericpath.PathLikeTests") diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 78334213f24..0f235d1805e 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -328,8 +328,15 @@ class TestGzip(BaseTest): cmByte = fRead.read(1) self.assertEqual(cmByte, b'\x08') # deflate + try: + expectedname = self.filename.encode('Latin-1') + b'\x00' + expectedflags = b'\x08' # only the FNAME flag is set + except UnicodeEncodeError: + expectedname = b'' + expectedflags = b'\x00' + flagsByte = fRead.read(1) - self.assertEqual(flagsByte, b'\x08') # only the FNAME flag is set + self.assertEqual(flagsByte, expectedflags) mtimeBytes = fRead.read(4) self.assertEqual(mtimeBytes, struct.pack('