diff --git a/Lib/test/test_normalization.py b/Lib/test/test_normalization.py deleted file mode 100644 index ba877e73f7d..00000000000 --- a/Lib/test/test_normalization.py +++ /dev/null @@ -1,117 +0,0 @@ -from test.support import open_urlresource -import unittest - -from http.client import HTTPException -import sys -from unicodedata import normalize, is_normalized, unidata_version - -TESTDATAFILE = "NormalizationTest.txt" -TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE - -def check_version(testfile): - hdr = testfile.readline() - return unidata_version in hdr - -class RangeError(Exception): - pass - -def NFC(str): - return normalize("NFC", str) - -def NFKC(str): - return normalize("NFKC", str) - -def NFD(str): - return normalize("NFD", str) - -def NFKD(str): - return normalize("NFKD", str) - -def unistr(data): - data = [int(x, 16) for x in data.split(" ")] - for x in data: - if x > sys.maxunicode: - raise RangeError - return "".join([chr(x) for x in data]) - -class NormalizationTest(unittest.TestCase): - def test_main(self): - # Hit the exception early - try: - testdata = open_urlresource(TESTDATAURL, encoding="utf-8", - check=check_version) - except PermissionError: - self.skipTest(f"Permission error when downloading {TESTDATAURL} " - f"into the test data directory") - except (OSError, HTTPException): - self.fail(f"Could not retrieve {TESTDATAURL}") - - with testdata: - self.run_normalization_tests(testdata) - - def run_normalization_tests(self, testdata): - part = None - part1_data = {} - - for line in testdata: - if '#' in line: - line = line.split('#')[0] - line = line.strip() - if not line: - continue - if line.startswith("@Part"): - part = line.split()[0] - continue - try: - c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] - except RangeError: - # Skip unsupported characters; - # try at least adding c1 if we are in part1 - if part == "@Part1": - try: - c1 = unistr(line.split(';')[0]) - except RangeError: - pass - else: - part1_data[c1] = 1 - continue - - # Perform tests - self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) - self.assertTrue(c4 == NFC(c4) == NFC(c5), line) - self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) - self.assertTrue(c5 == NFD(c4) == NFD(c5), line) - self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \ - NFKC(c3) == NFKC(c4) == NFKC(c5), - line) - self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \ - NFKD(c3) == NFKD(c4) == NFKD(c5), - line) - - self.assertTrue(is_normalized("NFC", c2)) - self.assertTrue(is_normalized("NFC", c4)) - - self.assertTrue(is_normalized("NFD", c3)) - self.assertTrue(is_normalized("NFD", c5)) - - self.assertTrue(is_normalized("NFKC", c4)) - self.assertTrue(is_normalized("NFKD", c5)) - - # Record part 1 data - if part == "@Part1": - part1_data[c1] = 1 - - # Perform tests for all other data - for c in range(sys.maxunicode+1): - X = chr(c) - if X in part1_data: - continue - self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c) - - def test_bug_834676(self): - # Check for bug 834676 - normalize('NFC', '\ud55c\uae00') - - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py index 8febf0af862..503df6a51d5 100644 --- a/Lib/test/test_ucn.py +++ b/Lib/test/test_ucn.py @@ -12,7 +12,6 @@ import unicodedata from test import support from http.client import HTTPException -from test.test_normalization import check_version try: from _testcapi import INT_MAX, PY_SSIZE_T_MAX, UINT_MAX @@ -172,6 +171,9 @@ class UnicodeNamesTest(unittest.TestCase): def test_named_sequences_full(self): # Check all the named sequences + def check_version(testfile): + hdr = testfile.readline() + return unicodedata.unidata_version in hdr url = ("http://www.pythontest.net/unicode/%s/NamedSequences.txt" % unicodedata.unidata_version) try: diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 9ec2f114972..40c38c1c426 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -7,10 +7,11 @@ """ import hashlib +from http.client import HTTPException import sys import unicodedata import unittest -from test.support import script_helper +from test.support import open_urlresource, script_helper class UnicodeMethodsTest(unittest.TestCase): @@ -171,13 +172,6 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): self.assertRaises(TypeError, self.db.combining) self.assertRaises(TypeError, self.db.combining, 'xx') - def test_normalize(self): - self.assertRaises(TypeError, self.db.normalize) - self.assertRaises(ValueError, self.db.normalize, 'unknown', 'xx') - self.assertEqual(self.db.normalize('NFKC', ''), '') - # The rest can be found in test_normalization.py - # which requires an external file. - def test_pr29(self): # http://www.unicode.org/review/pr-29.html # See issues #1054943 and #10254. @@ -208,9 +202,6 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): self.assertEqual(self.db.normalize('NFC', u11a7_str_a), u11a7_str_b) self.assertEqual(self.db.normalize('NFC', u11c3_str_a), u11c3_str_b) - # For tests of unicodedata.is_normalized / self.db.is_normalized , - # see test_normalization.py . - def test_east_asian_width(self): eaw = self.db.east_asian_width self.assertRaises(TypeError, eaw, b'a') @@ -315,5 +306,102 @@ class UnicodeMiscTest(UnicodeDatabaseTest): self.assertEqual(len(lines), 1, r"\u%.4x should not be a linebreak" % i) +class NormalizationTest(unittest.TestCase): + @staticmethod + def check_version(testfile): + hdr = testfile.readline() + return unicodedata.unidata_version in hdr + + @staticmethod + def unistr(data): + data = [int(x, 16) for x in data.split(" ")] + return "".join([chr(x) for x in data]) + + def test_normalization(self): + TESTDATAFILE = "NormalizationTest.txt" + TESTDATAURL = f"http://www.pythontest.net/unicode/{unicodedata.unidata_version}/{TESTDATAFILE}" + + # Hit the exception early + try: + testdata = open_urlresource(TESTDATAURL, encoding="utf-8", + check=self.check_version) + except PermissionError: + self.skipTest(f"Permission error when downloading {TESTDATAURL} " + f"into the test data directory") + except (OSError, HTTPException): + self.fail(f"Could not retrieve {TESTDATAURL}") + + with testdata: + self.run_normalization_tests(testdata) + + def run_normalization_tests(self, testdata): + part = None + part1_data = {} + + def NFC(str): + return unicodedata.normalize("NFC", str) + + def NFKC(str): + return unicodedata.normalize("NFKC", str) + + def NFD(str): + return unicodedata.normalize("NFD", str) + + def NFKD(str): + return unicodedata.normalize("NFKD", str) + + for line in testdata: + if '#' in line: + line = line.split('#')[0] + line = line.strip() + if not line: + continue + if line.startswith("@Part"): + part = line.split()[0] + continue + c1,c2,c3,c4,c5 = [self.unistr(x) for x in line.split(';')[:-1]] + + # Perform tests + self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) + self.assertTrue(c4 == NFC(c4) == NFC(c5), line) + self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) + self.assertTrue(c5 == NFD(c4) == NFD(c5), line) + self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \ + NFKC(c3) == NFKC(c4) == NFKC(c5), + line) + self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \ + NFKD(c3) == NFKD(c4) == NFKD(c5), + line) + + self.assertTrue(unicodedata.is_normalized("NFC", c2)) + self.assertTrue(unicodedata.is_normalized("NFC", c4)) + + self.assertTrue(unicodedata.is_normalized("NFD", c3)) + self.assertTrue(unicodedata.is_normalized("NFD", c5)) + + self.assertTrue(unicodedata.is_normalized("NFKC", c4)) + self.assertTrue(unicodedata.is_normalized("NFKD", c5)) + + # Record part 1 data + if part == "@Part1": + part1_data[c1] = 1 + + # Perform tests for all other data + for c in range(sys.maxunicode+1): + X = chr(c) + if X in part1_data: + continue + self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c) + + def test_edge_cases(self): + self.assertRaises(TypeError, unicodedata.normalize) + self.assertRaises(ValueError, unicodedata.normalize, 'unknown', 'xx') + self.assertEqual(unicodedata.normalize('NFKC', ''), '') + + def test_bug_834676(self): + # Check for bug 834676 + unicodedata.normalize('NFC', '\ud55c\uae00') + + if __name__ == "__main__": unittest.main() diff --git a/PCbuild/lib.pyproj b/PCbuild/lib.pyproj index 0ddeef3eaa3..401e207ae57 100644 --- a/PCbuild/lib.pyproj +++ b/PCbuild/lib.pyproj @@ -1191,7 +1191,6 @@ -