diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 434f5b37c58..954bb0a7453 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -372,7 +372,7 @@ def init(files=None): def read_mime_types(file): try: - f = open(file) + f = open(file, encoding='utf-8') except OSError: return None with f: diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 9cac6ce0225..683d393fdb4 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -67,6 +67,18 @@ class MimeTypesTestCase(unittest.TestCase): mime_dict = mimetypes.read_mime_types(file) eq(mime_dict[".pyunit"], "x-application/x-unittest") + # bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding. + # Not with locale encoding. _bootlocale has been imported because io.open(...) + # uses it. + with support.temp_dir() as directory: + data = "application/no-mans-land Fran\u00E7ais" + file = pathlib.Path(directory, "sample.mimetype") + file.write_text(data, encoding='utf-8') + import _bootlocale + with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'): + mime_dict = mimetypes.read_mime_types(file) + eq(mime_dict[".Français"], "application/no-mans-land") + def test_non_standard_types(self): eq = self.assertEqual # First try strict diff --git a/Misc/ACKS b/Misc/ACKS index 8098637a32c..a08e917b307 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1669,6 +1669,7 @@ Mikhail Terekhov Victor Terrón Pablo Galindo Richard M. Tew +Srinivas Reddy Thatiparthy Tobias Thelen Christian Theune Févry Thibault diff --git a/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst b/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst new file mode 100644 index 00000000000..2595900137d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-06-20-10-16-57.bpo-41048.hEXB-B.rst @@ -0,0 +1,2 @@ +:func:`mimetypes.read_mime_types` function reads the rule file using UTF-8 encoding, not the locale encoding. +Patch by Srinivas Reddy Thatiparthy. \ No newline at end of file