From e667e98faad93e76b8b569d853eb02d91591f5fb Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 12 Nov 2012 01:23:15 +0100 Subject: [PATCH] Issue #16218, #16444: Backport improvment on tests for non-ASCII characters --- Lib/test/support.py | 75 ++++++++++++++++++++++++++++++++ Lib/test/test_cmd_line.py | 6 +-- Lib/test/test_cmd_line_script.py | 30 ++++++++++--- Lib/test/test_os.py | 4 ++ 4 files changed, 105 insertions(+), 10 deletions(-) diff --git a/Lib/test/support.py b/Lib/test/support.py index c5640e0a087..d0a37ea9262 100644 --- a/Lib/test/support.py +++ b/Lib/test/support.py @@ -603,6 +603,49 @@ else: # module name. TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid()) +# FS_NONASCII: non-ASCII character encodable by os.fsencode(), +# or None if there is no such character. +FS_NONASCII = None +for character in ( + # First try printable and common characters to have a readable filename. + # For each character, the encoding list are just example of encodings able + # to encode the character (the list is not exhaustive). + + # U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1 + '\u00E6', + # U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3 + '\u0130', + # U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257 + '\u0141', + # U+03C6 (Greek Small Letter Phi): cp1253 + '\u03C6', + # U+041A (Cyrillic Capital Letter Ka): cp1251 + '\u041A', + # U+05D0 (Hebrew Letter Alef): Encodable to cp424 + '\u05D0', + # U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic + '\u060C', + # U+062A (Arabic Letter Teh): cp720 + '\u062A', + # U+0E01 (Thai Character Ko Kai): cp874 + '\u0E01', + + # Then try more "special" characters. "special" because they may be + # interpreted or displayed differently depending on the exact locale + # encoding and the font. + + # U+00A0 (No-Break Space) + '\u00A0', + # U+20AC (Euro Sign) + '\u20AC', +): + try: + os.fsdecode(os.fsencode(character)) + except UnicodeError: + pass + else: + FS_NONASCII = character + break # TESTFN_UNICODE is a non-ascii filename TESTFN_UNICODE = TESTFN + "-\xe0\xf2\u0258\u0141\u011f" @@ -647,6 +690,38 @@ elif sys.platform != 'darwin': # the byte 0xff. Skip some unicode filename tests. pass +# TESTFN_UNDECODABLE is a filename (bytes type) that should *not* be able to be +# decoded from the filesystem encoding (in strict mode). It can be None if we +# cannot generate such filename (ex: the latin1 encoding can decode any byte +# sequence). On UNIX, TESTFN_UNDECODABLE can be decoded by os.fsdecode() thanks +# to the surrogateescape error handler (PEP 383), but not from the filesystem +# encoding in strict mode. +TESTFN_UNDECODABLE = None +for name in ( + # b'\xff' is not decodable by os.fsdecode() with code page 932. Windows + # accepts it to create a file or a directory, or don't accept to enter to + # such directory (when the bytes name is used). So test b'\xe7' first: it is + # not decodable from cp932. + b'\xe7w\xf0', + # undecodable from ASCII, UTF-8 + b'\xff', + # undecodable from iso8859-3, iso8859-6, iso8859-7, cp424, iso8859-8, cp856 + # and cp857 + b'\xae\xd5' + # undecodable from UTF-8 (UNIX and Mac OS X) + b'\xed\xb2\x80', b'\xed\xb4\x80', +): + try: + name.decode(TESTFN_ENCODING) + except UnicodeDecodeError: + TESTFN_UNDECODABLE = os.fsencode(TESTFN) + name + break + +if FS_NONASCII: + TESTFN_NONASCII = TESTFN + '-' + FS_NONASCII +else: + TESTFN_NONASCII = None + # Save the initial cwd SAVEDCWD = os.getcwd() diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 2b0c6e2437a..f617c2f3a00 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -93,15 +93,15 @@ class CmdLineTest(unittest.TestCase): # All good if execution is successful assert_python_ok('-c', 'pass') - @unittest.skipIf(sys.getfilesystemencoding() == 'ascii', - 'need a filesystem encoding different than ASCII') + @unittest.skipUnless(test.support.FS_NONASCII, 'need support.FS_NONASCII') def test_non_ascii(self): # Test handling of non-ascii data if test.support.verbose: import locale print('locale encoding = %s, filesystem encoding = %s' % (locale.getpreferredencoding(), sys.getfilesystemencoding())) - command = "assert(ord('\xe9') == 0xe9)" + command = ("assert(ord(%r) == %s)" + % (test.support.FS_NONASCII, ord(test.support.FS_NONASCII))) assert_python_ok('-c', command) # On Windows, pass bytes to subprocess doesn't test how Python decodes the diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index 6e097e37a4f..f066204393a 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -363,14 +363,30 @@ class CmdLineTest(unittest.TestCase): self.assertTrue(text[1].startswith(' File ')) self.assertTrue(text[3].startswith('NameError')) - def test_non_utf8(self): + def test_non_ascii(self): + # Mac OS X denies the creation of a file with an invalid UTF-8 name. + # Windows allows to create a name with an arbitrary bytes name, but + # Python cannot a undecodable bytes argument to a subprocess. + #if (support.TESTFN_UNDECODABLE + #and sys.platform not in ('win32', 'darwin')): + # name = os.fsdecode(support.TESTFN_UNDECODABLE) + #elif support.TESTFN_NONASCII: + if support.TESTFN_NONASCII: + name = support.TESTFN_NONASCII + else: + self.skipTest("need support.TESTFN_NONASCII") + # Issue #16218 - with temp_dir() as script_dir: - script_name = _make_test_script(script_dir, - '\udcf1\udcea\udcf0\udce8\udcef\udcf2') - self._check_script(script_name, script_name, script_name, - script_dir, None, - importlib.machinery.SourceFileLoader) + source = 'print(ascii(__file__))\n' + script_name = _make_test_script(os.curdir, name, source) + self.addCleanup(support.unlink, script_name) + rc, stdout, stderr = assert_python_ok(script_name) + self.assertEqual( + ascii(script_name), + stdout.rstrip().decode('ascii'), + 'stdout=%r stderr=%r' % (stdout, stderr)) + self.assertEqual(0, rc) + def test_main(): support.run_unittest(CmdLineTest) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 7d6b3779a42..13a2b381c51 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -1243,6 +1243,8 @@ if sys.platform != 'win32': def setUp(self): if support.TESTFN_UNENCODABLE: self.dir = support.TESTFN_UNENCODABLE + elif support.TESTFN_NONASCII: + self.dir = support.TESTFN_NONASCII else: self.dir = support.TESTFN self.bdir = os.fsencode(self.dir) @@ -1257,6 +1259,8 @@ if sys.platform != 'win32': add_filename(support.TESTFN_UNICODE) if support.TESTFN_UNENCODABLE: add_filename(support.TESTFN_UNENCODABLE) + if support.TESTFN_NONASCII: + add_filename(support.TESTFN_NONASCII) if not bytesfn: self.skipTest("couldn't create any non-ascii filename")