bpo-39337: encodings.normalize_encoding() now ignores non-ASCII characters (GH-22219)

2020-10-14 23:43:31 +08:00 · 2020-10-14 23:43:31 +08:00 · c5b049b91c
parent b4d895336a
commit c5b049b91c
4 changed files with 21 additions and 2 deletions
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@ -186,6 +186,11 @@ by :func:`curses.color_content`, :func:`curses.init_color`,
 support is provided by the underlying ncurses library.
 (Contributed by Jeffrey Kintscher and Hans Petter Jansson in :issue:`36982`.)
 encodings
 ---------
 :func:`encodings.normalize_encoding` now ignores non-ASCII characters.
 (Contributed by Hai Shi in :issue:`39337`.)
 glob
 ----
--- a/Lib/encodings/init.py
+++ b/Lib/encodings/init.py
@ -61,7 +61,8 @@ def normalize_encoding(encoding):
        if c.isalnum() or c == '.':
            if punct and chars:
                chars.append('_')
-            chars.append(c)
+            if c.isascii():
                chars.append(c)
            punct = False
        else:
            punct = True
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@ -3417,7 +3417,7 @@ class Rot13UtilTest(unittest.TestCase):
 class CodecNameNormalizationTest(unittest.TestCase):
    """Test codec name normalization"""
-    def test_normalized_encoding(self):
+    def test_codecs_lookup(self):
        FOUND = (1, 2, 3, 4)
        NOT_FOUND = (None, None, None, None)
        def search_function(encoding):
@ -3439,6 +3439,18 @@ class CodecNameNormalizationTest(unittest.TestCase):
        self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
        self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
    def test_encodings_normalize_encoding(self):
        # encodings.normalize_encoding() ignores non-ASCII characters.
        normalize = encodings.normalize_encoding
        self.assertEqual(normalize('utf_8'), 'utf_8')
        self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
        self.assertEqual(normalize('utf   8'), 'utf_8')
        # encodings.normalize_encoding() doesn't convert
        # characters to lower case.
        self.assertEqual(normalize('UTF 8'), 'UTF_8')
        self.assertEqual(normalize('utf.8'), 'utf.8')
        self.assertEqual(normalize('utf...8'), 'utf...8')
 if __name__ == "__main__":
    unittest.main()
--- a/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
+++ b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
@ -0,0 +1 @@
 :func:`encodings.normalize_encoding` now ignores non-ASCII characters.
		`@ -0,0 +1 @@`
							:func:`encodings.normalize_encoding` now ignores non-ASCII characters.