bpo-41048: mimetypes should read the rule file using UTF-8, not the locale encoding (GH-20998)

(cherry picked from commit 7f569c9bc0)

Co-authored-by: Srinivas Reddy Thatiparthy (శ్రీనివాస్  రెడ్డి తాటిపర్తి) <thatiparthysreenivas@gmail.com>
This commit is contained in:
Miss Islington (bot) 2020-06-29 05:07:41 -07:00 committed by GitHub
parent 12bb0b69ec
commit 7731139b7a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 16 additions and 1 deletions

View File

@ -372,7 +372,7 @@ def init(files=None):
def read_mime_types(file): def read_mime_types(file):
try: try:
f = open(file) f = open(file, encoding='utf-8')
except OSError: except OSError:
return None return None
with f: with f:

View File

@ -67,6 +67,18 @@ class MimeTypesTestCase(unittest.TestCase):
mime_dict = mimetypes.read_mime_types(file) mime_dict = mimetypes.read_mime_types(file)
eq(mime_dict[".pyunit"], "x-application/x-unittest") eq(mime_dict[".pyunit"], "x-application/x-unittest")
# bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding.
# Not with locale encoding. _bootlocale has been imported because io.open(...)
# uses it.
with support.temp_dir() as directory:
data = "application/no-mans-land Fran\u00E7ais"
file = pathlib.Path(directory, "sample.mimetype")
file.write_text(data, encoding='utf-8')
import _bootlocale
with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'):
mime_dict = mimetypes.read_mime_types(file)
eq(mime_dict[".Français"], "application/no-mans-land")
def test_non_standard_types(self): def test_non_standard_types(self):
eq = self.assertEqual eq = self.assertEqual
# First try strict # First try strict

View File

@ -1669,6 +1669,7 @@ Mikhail Terekhov
Victor Terrón Victor Terrón
Pablo Galindo Pablo Galindo
Richard M. Tew Richard M. Tew
Srinivas Reddy Thatiparthy
Tobias Thelen Tobias Thelen
Christian Theune Christian Theune
Févry Thibault Févry Thibault

View File

@ -0,0 +1,2 @@
:func:`mimetypes.read_mime_types` function reads the rule file using UTF-8 encoding, not the locale encoding.
Patch by Srinivas Reddy Thatiparthy.