bpo-39337: encodings.normalize_encoding() now ignores non-ASCII characters (GH-22219)
This commit is contained in:
parent
b4d895336a
commit
c5b049b91c
|
@ -186,6 +186,11 @@ by :func:`curses.color_content`, :func:`curses.init_color`,
|
||||||
support is provided by the underlying ncurses library.
|
support is provided by the underlying ncurses library.
|
||||||
(Contributed by Jeffrey Kintscher and Hans Petter Jansson in :issue:`36982`.)
|
(Contributed by Jeffrey Kintscher and Hans Petter Jansson in :issue:`36982`.)
|
||||||
|
|
||||||
|
encodings
|
||||||
|
---------
|
||||||
|
:func:`encodings.normalize_encoding` now ignores non-ASCII characters.
|
||||||
|
(Contributed by Hai Shi in :issue:`39337`.)
|
||||||
|
|
||||||
glob
|
glob
|
||||||
----
|
----
|
||||||
|
|
||||||
|
|
|
@ -61,6 +61,7 @@ def normalize_encoding(encoding):
|
||||||
if c.isalnum() or c == '.':
|
if c.isalnum() or c == '.':
|
||||||
if punct and chars:
|
if punct and chars:
|
||||||
chars.append('_')
|
chars.append('_')
|
||||||
|
if c.isascii():
|
||||||
chars.append(c)
|
chars.append(c)
|
||||||
punct = False
|
punct = False
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -3417,7 +3417,7 @@ class Rot13UtilTest(unittest.TestCase):
|
||||||
|
|
||||||
class CodecNameNormalizationTest(unittest.TestCase):
|
class CodecNameNormalizationTest(unittest.TestCase):
|
||||||
"""Test codec name normalization"""
|
"""Test codec name normalization"""
|
||||||
def test_normalized_encoding(self):
|
def test_codecs_lookup(self):
|
||||||
FOUND = (1, 2, 3, 4)
|
FOUND = (1, 2, 3, 4)
|
||||||
NOT_FOUND = (None, None, None, None)
|
NOT_FOUND = (None, None, None, None)
|
||||||
def search_function(encoding):
|
def search_function(encoding):
|
||||||
|
@ -3439,6 +3439,18 @@ class CodecNameNormalizationTest(unittest.TestCase):
|
||||||
self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
|
self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
|
||||||
self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
|
self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
|
||||||
|
|
||||||
|
def test_encodings_normalize_encoding(self):
|
||||||
|
# encodings.normalize_encoding() ignores non-ASCII characters.
|
||||||
|
normalize = encodings.normalize_encoding
|
||||||
|
self.assertEqual(normalize('utf_8'), 'utf_8')
|
||||||
|
self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
|
||||||
|
self.assertEqual(normalize('utf 8'), 'utf_8')
|
||||||
|
# encodings.normalize_encoding() doesn't convert
|
||||||
|
# characters to lower case.
|
||||||
|
self.assertEqual(normalize('UTF 8'), 'UTF_8')
|
||||||
|
self.assertEqual(normalize('utf.8'), 'utf.8')
|
||||||
|
self.assertEqual(normalize('utf...8'), 'utf...8')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
:func:`encodings.normalize_encoding` now ignores non-ASCII characters.
|
Loading…
Reference in New Issue