diff --git a/Lib/test/test_normalization.py b/Lib/test/test_normalization.py
deleted file mode 100644
index ba877e73f7d..00000000000
--- a/Lib/test/test_normalization.py
+++ /dev/null
@@ -1,117 +0,0 @@
-from test.support import open_urlresource
-import unittest
-
-from http.client import HTTPException
-import sys
-from unicodedata import normalize, is_normalized, unidata_version
-
-TESTDATAFILE = "NormalizationTest.txt"
-TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE
-
-def check_version(testfile):
- hdr = testfile.readline()
- return unidata_version in hdr
-
-class RangeError(Exception):
- pass
-
-def NFC(str):
- return normalize("NFC", str)
-
-def NFKC(str):
- return normalize("NFKC", str)
-
-def NFD(str):
- return normalize("NFD", str)
-
-def NFKD(str):
- return normalize("NFKD", str)
-
-def unistr(data):
- data = [int(x, 16) for x in data.split(" ")]
- for x in data:
- if x > sys.maxunicode:
- raise RangeError
- return "".join([chr(x) for x in data])
-
-class NormalizationTest(unittest.TestCase):
- def test_main(self):
- # Hit the exception early
- try:
- testdata = open_urlresource(TESTDATAURL, encoding="utf-8",
- check=check_version)
- except PermissionError:
- self.skipTest(f"Permission error when downloading {TESTDATAURL} "
- f"into the test data directory")
- except (OSError, HTTPException):
- self.fail(f"Could not retrieve {TESTDATAURL}")
-
- with testdata:
- self.run_normalization_tests(testdata)
-
- def run_normalization_tests(self, testdata):
- part = None
- part1_data = {}
-
- for line in testdata:
- if '#' in line:
- line = line.split('#')[0]
- line = line.strip()
- if not line:
- continue
- if line.startswith("@Part"):
- part = line.split()[0]
- continue
- try:
- c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
- except RangeError:
- # Skip unsupported characters;
- # try at least adding c1 if we are in part1
- if part == "@Part1":
- try:
- c1 = unistr(line.split(';')[0])
- except RangeError:
- pass
- else:
- part1_data[c1] = 1
- continue
-
- # Perform tests
- self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line)
- self.assertTrue(c4 == NFC(c4) == NFC(c5), line)
- self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line)
- self.assertTrue(c5 == NFD(c4) == NFD(c5), line)
- self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
- NFKC(c3) == NFKC(c4) == NFKC(c5),
- line)
- self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
- NFKD(c3) == NFKD(c4) == NFKD(c5),
- line)
-
- self.assertTrue(is_normalized("NFC", c2))
- self.assertTrue(is_normalized("NFC", c4))
-
- self.assertTrue(is_normalized("NFD", c3))
- self.assertTrue(is_normalized("NFD", c5))
-
- self.assertTrue(is_normalized("NFKC", c4))
- self.assertTrue(is_normalized("NFKD", c5))
-
- # Record part 1 data
- if part == "@Part1":
- part1_data[c1] = 1
-
- # Perform tests for all other data
- for c in range(sys.maxunicode+1):
- X = chr(c)
- if X in part1_data:
- continue
- self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
-
- def test_bug_834676(self):
- # Check for bug 834676
- normalize('NFC', '\ud55c\uae00')
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py
index 8febf0af862..503df6a51d5 100644
--- a/Lib/test/test_ucn.py
+++ b/Lib/test/test_ucn.py
@@ -12,7 +12,6 @@ import unicodedata
from test import support
from http.client import HTTPException
-from test.test_normalization import check_version
try:
from _testcapi import INT_MAX, PY_SSIZE_T_MAX, UINT_MAX
@@ -172,6 +171,9 @@ class UnicodeNamesTest(unittest.TestCase):
def test_named_sequences_full(self):
# Check all the named sequences
+ def check_version(testfile):
+ hdr = testfile.readline()
+ return unicodedata.unidata_version in hdr
url = ("http://www.pythontest.net/unicode/%s/NamedSequences.txt" %
unicodedata.unidata_version)
try:
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
index 9ec2f114972..40c38c1c426 100644
--- a/Lib/test/test_unicodedata.py
+++ b/Lib/test/test_unicodedata.py
@@ -7,10 +7,11 @@
"""
import hashlib
+from http.client import HTTPException
import sys
import unicodedata
import unittest
-from test.support import script_helper
+from test.support import open_urlresource, script_helper
class UnicodeMethodsTest(unittest.TestCase):
@@ -171,13 +172,6 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
self.assertRaises(TypeError, self.db.combining)
self.assertRaises(TypeError, self.db.combining, 'xx')
- def test_normalize(self):
- self.assertRaises(TypeError, self.db.normalize)
- self.assertRaises(ValueError, self.db.normalize, 'unknown', 'xx')
- self.assertEqual(self.db.normalize('NFKC', ''), '')
- # The rest can be found in test_normalization.py
- # which requires an external file.
-
def test_pr29(self):
# http://www.unicode.org/review/pr-29.html
# See issues #1054943 and #10254.
@@ -208,9 +202,6 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
self.assertEqual(self.db.normalize('NFC', u11a7_str_a), u11a7_str_b)
self.assertEqual(self.db.normalize('NFC', u11c3_str_a), u11c3_str_b)
- # For tests of unicodedata.is_normalized / self.db.is_normalized ,
- # see test_normalization.py .
-
def test_east_asian_width(self):
eaw = self.db.east_asian_width
self.assertRaises(TypeError, eaw, b'a')
@@ -315,5 +306,102 @@ class UnicodeMiscTest(UnicodeDatabaseTest):
self.assertEqual(len(lines), 1,
r"\u%.4x should not be a linebreak" % i)
+class NormalizationTest(unittest.TestCase):
+ @staticmethod
+ def check_version(testfile):
+ hdr = testfile.readline()
+ return unicodedata.unidata_version in hdr
+
+ @staticmethod
+ def unistr(data):
+ data = [int(x, 16) for x in data.split(" ")]
+ return "".join([chr(x) for x in data])
+
+ def test_normalization(self):
+ TESTDATAFILE = "NormalizationTest.txt"
+ TESTDATAURL = f"http://www.pythontest.net/unicode/{unicodedata.unidata_version}/{TESTDATAFILE}"
+
+ # Hit the exception early
+ try:
+ testdata = open_urlresource(TESTDATAURL, encoding="utf-8",
+ check=self.check_version)
+ except PermissionError:
+ self.skipTest(f"Permission error when downloading {TESTDATAURL} "
+ f"into the test data directory")
+ except (OSError, HTTPException):
+ self.fail(f"Could not retrieve {TESTDATAURL}")
+
+ with testdata:
+ self.run_normalization_tests(testdata)
+
+ def run_normalization_tests(self, testdata):
+ part = None
+ part1_data = {}
+
+ def NFC(str):
+ return unicodedata.normalize("NFC", str)
+
+ def NFKC(str):
+ return unicodedata.normalize("NFKC", str)
+
+ def NFD(str):
+ return unicodedata.normalize("NFD", str)
+
+ def NFKD(str):
+ return unicodedata.normalize("NFKD", str)
+
+ for line in testdata:
+ if '#' in line:
+ line = line.split('#')[0]
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith("@Part"):
+ part = line.split()[0]
+ continue
+ c1,c2,c3,c4,c5 = [self.unistr(x) for x in line.split(';')[:-1]]
+
+ # Perform tests
+ self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line)
+ self.assertTrue(c4 == NFC(c4) == NFC(c5), line)
+ self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line)
+ self.assertTrue(c5 == NFD(c4) == NFD(c5), line)
+ self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
+ NFKC(c3) == NFKC(c4) == NFKC(c5),
+ line)
+ self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
+ NFKD(c3) == NFKD(c4) == NFKD(c5),
+ line)
+
+ self.assertTrue(unicodedata.is_normalized("NFC", c2))
+ self.assertTrue(unicodedata.is_normalized("NFC", c4))
+
+ self.assertTrue(unicodedata.is_normalized("NFD", c3))
+ self.assertTrue(unicodedata.is_normalized("NFD", c5))
+
+ self.assertTrue(unicodedata.is_normalized("NFKC", c4))
+ self.assertTrue(unicodedata.is_normalized("NFKD", c5))
+
+ # Record part 1 data
+ if part == "@Part1":
+ part1_data[c1] = 1
+
+ # Perform tests for all other data
+ for c in range(sys.maxunicode+1):
+ X = chr(c)
+ if X in part1_data:
+ continue
+ self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
+
+ def test_edge_cases(self):
+ self.assertRaises(TypeError, unicodedata.normalize)
+ self.assertRaises(ValueError, unicodedata.normalize, 'unknown', 'xx')
+ self.assertEqual(unicodedata.normalize('NFKC', ''), '')
+
+ def test_bug_834676(self):
+ # Check for bug 834676
+ unicodedata.normalize('NFC', '\ud55c\uae00')
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/PCbuild/lib.pyproj b/PCbuild/lib.pyproj
index 0ddeef3eaa3..401e207ae57 100644
--- a/PCbuild/lib.pyproj
+++ b/PCbuild/lib.pyproj
@@ -1191,7 +1191,6 @@
-