From 7731139b7af655b9f5df6d1b5493f8dfdf41d569 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Mon, 29 Jun 2020 05:07:41 -0700 Subject: [PATCH] bpo-41048: mimetypes should read the rule file using UTF-8, not the locale encoding (GH-20998) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry picked from commit 7f569c9bc0079906012b3034d30fe8abc742e7fc) Co-authored-by: Srinivas Reddy Thatiparthy (శ్రీనివాస్ రెడ్డి తాటిపర్తి) --- Lib/mimetypes.py | 2 +- Lib/test/test_mimetypes.py | 12 ++++++++++++ Misc/ACKS | 1 + .../Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst | 2 ++ 4 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 434f5b37c58..954bb0a7453 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -372,7 +372,7 @@ def init(files=None): def read_mime_types(file): try: - f = open(file) + f = open(file, encoding='utf-8') except OSError: return None with f: diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 9cac6ce0225..683d393fdb4 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -67,6 +67,18 @@ class MimeTypesTestCase(unittest.TestCase): mime_dict = mimetypes.read_mime_types(file) eq(mime_dict[".pyunit"], "x-application/x-unittest") + # bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding. + # Not with locale encoding. _bootlocale has been imported because io.open(...) + # uses it. + with support.temp_dir() as directory: + data = "application/no-mans-land Fran\u00E7ais" + file = pathlib.Path(directory, "sample.mimetype") + file.write_text(data, encoding='utf-8') + import _bootlocale + with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'): + mime_dict = mimetypes.read_mime_types(file) + eq(mime_dict[".Français"], "application/no-mans-land") + def test_non_standard_types(self): eq = self.assertEqual # First try strict diff --git a/Misc/ACKS b/Misc/ACKS index 8098637a32c..a08e917b307 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1669,6 +1669,7 @@ Mikhail Terekhov Victor Terrón Pablo Galindo Richard M. Tew +Srinivas Reddy Thatiparthy Tobias Thelen Christian Theune Févry Thibault diff --git a/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst b/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst new file mode 100644 index 00000000000..2595900137d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst @@ -0,0 +1,2 @@ +:func:`mimetypes.read_mime_types` function reads the rule file using UTF-8 encoding, not the locale encoding. +Patch by Srinivas Reddy Thatiparthy. \ No newline at end of file