gh-116608: Ignore UTF-16 BOM in importlib.resources._functional tests (GH-117569)

gh-116609: Ignore UTF-16 BOM in importlib.resources._functional tests To test the `errors` argument, we read a UTF-16 file as UTF-8 with "backslashreplace" error handling. However, the utf-16 codec adds an endian-specific byte-order mark, so on big-endian machines the expectation doesn't match the test file (which was saved on a little-endian machine). Use endswith to ignore the BOM.
2024-04-05 17:00:29 +02:00 · 2024-04-05 17:00:29 +02:00 · 4d4a6f1b6a
parent 687616877b
commit 4d4a6f1b6a
1 changed files with 10 additions and 4 deletions
--- a/Lib/test/test_importlib/resources/test_functional.py
+++ b/Lib/test/test_importlib/resources/test_functional.py
@ -32,6 +32,12 @@ class FunctionalAPIBase:
            with self.subTest(path_parts=path_parts):
                yield path_parts

+    def assertEndsWith(self, string, suffix):
+        """Assert that `string` ends with `suffix`.
+
+        Used to ignore an architecture-specific UTF-16 byte-order mark."""
+        self.assertEqual(string[-len(suffix):], suffix)
+
    def test_read_text(self):
        self.assertEqual(
            resources.read_text(self.anchor01, 'utf-8.file'),
@ -65,12 +71,12 @@ class FunctionalAPIBase:
            ),
            '\x00\x01\x02\x03',
        )
-        self.assertEqual(
+        self.assertEndsWith(  # ignore the BOM
            resources.read_text(
                self.anchor01, 'utf-16.file',
                errors='backslashreplace',
            ),
-            'Hello, UTF-16 world!\n'.encode('utf-16').decode(
+            'Hello, UTF-16 world!\n'.encode('utf-16-le').decode(
                errors='backslashreplace',
            ),
        )
@ -112,9 +118,9 @@ class FunctionalAPIBase:
            self.anchor01, 'utf-16.file',
            errors='backslashreplace',
        ) as f:
-            self.assertEqual(
+            self.assertEndsWith(  # ignore the BOM
                f.read(),
-                'Hello, UTF-16 world!\n'.encode('utf-16').decode(
+                'Hello, UTF-16 world!\n'.encode('utf-16-le').decode(
                    errors='backslashreplace',
                ),
            )