bpo-4963: Fix for initialization and non-deterministic behavior issues in mimetypes (GH-3062)

This commit is contained in:
David K. Hess 2019-06-24 18:46:59 -05:00 committed by Steve Dower
parent 8bd2872adb
commit 9fc720e5e4
4 changed files with 192 additions and 124 deletions

View File

@ -93,6 +93,10 @@ behavior of the module.
Specifying an empty list for *files* will prevent the system defaults from
being applied: only the well-known values will be present from a built-in list.
If *files* is ``None`` the internal data structure is completely rebuilt to its
initial default value. This is a stable operation and will produce the same results
when called multiple times.
.. versionchanged:: 3.2
Previously, Windows registry settings were ignored.

View File

@ -66,13 +66,13 @@ class MimeTypes:
def __init__(self, filenames=(), strict=True):
if not inited:
init()
self.encodings_map = encodings_map.copy()
self.suffix_map = suffix_map.copy()
self.encodings_map = _encodings_map_default.copy()
self.suffix_map = _suffix_map_default.copy()
self.types_map = ({}, {}) # dict for (non-strict, strict)
self.types_map_inv = ({}, {})
for (ext, type) in types_map.items():
for (ext, type) in _types_map_default.items():
self.add_type(type, ext, True)
for (ext, type) in common_types.items():
for (ext, type) in _common_types_default.items():
self.add_type(type, ext, False)
for name in filenames:
self.read(name, strict)
@ -346,11 +346,19 @@ def init(files=None):
global suffix_map, types_map, encodings_map, common_types
global inited, _db
inited = True # so that MimeTypes.__init__() doesn't call us again
if files is None or _db is None:
db = MimeTypes()
if files is None:
if _winreg:
db.read_windows_registry()
if files is None:
files = knownfiles
else:
files = knownfiles + list(files)
else:
db = _db
for file in files:
if os.path.isfile(file):
db.read(file)
@ -374,12 +382,12 @@ def read_mime_types(file):
def _default_mime_types():
global suffix_map
global encodings_map
global types_map
global common_types
global suffix_map, _suffix_map_default
global encodings_map, _encodings_map_default
global types_map, _types_map_default
global common_types, _common_types_default
suffix_map = {
suffix_map = _suffix_map_default = {
'.svgz': '.svg.gz',
'.tgz': '.tar.gz',
'.taz': '.tar.gz',
@ -388,7 +396,7 @@ def _default_mime_types():
'.txz': '.tar.xz',
}
encodings_map = {
encodings_map = _encodings_map_default = {
'.gz': 'gzip',
'.Z': 'compress',
'.bz2': 'bzip2',
@ -399,152 +407,155 @@ def _default_mime_types():
# at http://www.iana.org/assignments/media-types
# or extensions, i.e. using the x- prefix
# If you add to these, please keep them sorted!
types_map = {
'.a' : 'application/octet-stream',
'.ai' : 'application/postscript',
'.aif' : 'audio/x-aiff',
'.aifc' : 'audio/x-aiff',
'.aiff' : 'audio/x-aiff',
'.au' : 'audio/basic',
'.avi' : 'video/x-msvideo',
'.bat' : 'text/plain',
'.bcpio' : 'application/x-bcpio',
'.bin' : 'application/octet-stream',
'.bmp' : 'image/bmp',
'.c' : 'text/plain',
'.cdf' : 'application/x-netcdf',
'.cpio' : 'application/x-cpio',
'.csh' : 'application/x-csh',
'.css' : 'text/css',
'.csv' : 'text/csv',
'.dll' : 'application/octet-stream',
# If you add to these, please keep them sorted by mime type.
# Make sure the entry with the preferred file extension for a particular mime type
# appears before any others of the same mimetype.
types_map = _types_map_default = {
'.js' : 'application/javascript',
'.mjs' : 'application/javascript',
'.json' : 'application/json',
'.doc' : 'application/msword',
'.dot' : 'application/msword',
'.dvi' : 'application/x-dvi',
'.eml' : 'message/rfc822',
'.eps' : 'application/postscript',
'.etx' : 'text/x-setext',
'.wiz' : 'application/msword',
'.bin' : 'application/octet-stream',
'.a' : 'application/octet-stream',
'.dll' : 'application/octet-stream',
'.exe' : 'application/octet-stream',
'.gif' : 'image/gif',
'.gtar' : 'application/x-gtar',
'.h' : 'text/plain',
'.hdf' : 'application/x-hdf',
'.htm' : 'text/html',
'.html' : 'text/html',
'.ico' : 'image/vnd.microsoft.icon',
'.ief' : 'image/ief',
'.jpe' : 'image/jpeg',
'.jpeg' : 'image/jpeg',
'.jpg' : 'image/jpeg',
'.js' : 'application/javascript',
'.json' : 'application/json',
'.ksh' : 'text/plain',
'.latex' : 'application/x-latex',
'.m1v' : 'video/mpeg',
'.m3u' : 'application/vnd.apple.mpegurl',
'.m3u8' : 'application/vnd.apple.mpegurl',
'.man' : 'application/x-troff-man',
'.me' : 'application/x-troff-me',
'.mht' : 'message/rfc822',
'.mhtml' : 'message/rfc822',
'.mif' : 'application/x-mif',
'.mjs' : 'application/javascript',
'.mov' : 'video/quicktime',
'.movie' : 'video/x-sgi-movie',
'.mp2' : 'audio/mpeg',
'.mp3' : 'audio/mpeg',
'.mp4' : 'video/mp4',
'.mpa' : 'video/mpeg',
'.mpe' : 'video/mpeg',
'.mpeg' : 'video/mpeg',
'.mpg' : 'video/mpeg',
'.ms' : 'application/x-troff-ms',
'.nc' : 'application/x-netcdf',
'.nws' : 'message/rfc822',
'.o' : 'application/octet-stream',
'.obj' : 'application/octet-stream',
'.so' : 'application/octet-stream',
'.oda' : 'application/oda',
'.p12' : 'application/x-pkcs12',
'.p7c' : 'application/pkcs7-mime',
'.pbm' : 'image/x-portable-bitmap',
'.pdf' : 'application/pdf',
'.pfx' : 'application/x-pkcs12',
'.pgm' : 'image/x-portable-graymap',
'.pl' : 'text/plain',
'.png' : 'image/png',
'.pnm' : 'image/x-portable-anymap',
'.p7c' : 'application/pkcs7-mime',
'.ps' : 'application/postscript',
'.ai' : 'application/postscript',
'.eps' : 'application/postscript',
'.m3u' : 'application/vnd.apple.mpegurl',
'.m3u8' : 'application/vnd.apple.mpegurl',
'.xls' : 'application/vnd.ms-excel',
'.xlb' : 'application/vnd.ms-excel',
'.ppt' : 'application/vnd.ms-powerpoint',
'.pot' : 'application/vnd.ms-powerpoint',
'.ppa' : 'application/vnd.ms-powerpoint',
'.ppm' : 'image/x-portable-pixmap',
'.pps' : 'application/vnd.ms-powerpoint',
'.ppt' : 'application/vnd.ms-powerpoint',
'.ps' : 'application/postscript',
'.pwz' : 'application/vnd.ms-powerpoint',
'.py' : 'text/x-python',
'.wasm' : 'application/wasm',
'.bcpio' : 'application/x-bcpio',
'.cpio' : 'application/x-cpio',
'.csh' : 'application/x-csh',
'.dvi' : 'application/x-dvi',
'.gtar' : 'application/x-gtar',
'.hdf' : 'application/x-hdf',
'.latex' : 'application/x-latex',
'.mif' : 'application/x-mif',
'.cdf' : 'application/x-netcdf',
'.nc' : 'application/x-netcdf',
'.p12' : 'application/x-pkcs12',
'.pfx' : 'application/x-pkcs12',
'.ram' : 'application/x-pn-realaudio',
'.pyc' : 'application/x-python-code',
'.pyo' : 'application/x-python-code',
'.qt' : 'video/quicktime',
'.ra' : 'audio/x-pn-realaudio',
'.ram' : 'application/x-pn-realaudio',
'.ras' : 'image/x-cmu-raster',
'.rdf' : 'application/xml',
'.rgb' : 'image/x-rgb',
'.roff' : 'application/x-troff',
'.rtx' : 'text/richtext',
'.sgm' : 'text/x-sgml',
'.sgml' : 'text/x-sgml',
'.sh' : 'application/x-sh',
'.shar' : 'application/x-shar',
'.snd' : 'audio/basic',
'.so' : 'application/octet-stream',
'.src' : 'application/x-wais-source',
'.swf' : 'application/x-shockwave-flash',
'.sv4cpio': 'application/x-sv4cpio',
'.sv4crc' : 'application/x-sv4crc',
'.svg' : 'image/svg+xml',
'.swf' : 'application/x-shockwave-flash',
'.t' : 'application/x-troff',
'.tar' : 'application/x-tar',
'.tcl' : 'application/x-tcl',
'.tex' : 'application/x-tex',
'.texi' : 'application/x-texinfo',
'.texinfo': 'application/x-texinfo',
'.tif' : 'image/tiff',
'.tiff' : 'image/tiff',
'.roff' : 'application/x-troff',
'.t' : 'application/x-troff',
'.tr' : 'application/x-troff',
'.tsv' : 'text/tab-separated-values',
'.txt' : 'text/plain',
'.man' : 'application/x-troff-man',
'.me' : 'application/x-troff-me',
'.ms' : 'application/x-troff-ms',
'.ustar' : 'application/x-ustar',
'.vcf' : 'text/x-vcard',
'.wasm' : 'application/wasm',
'.wav' : 'audio/x-wav',
'.webm' : 'video/webm',
'.wiz' : 'application/msword',
'.wsdl' : 'application/xml',
'.xbm' : 'image/x-xbitmap',
'.xlb' : 'application/vnd.ms-excel',
'.xls' : 'application/vnd.ms-excel',
'.xml' : 'text/xml',
'.xpdl' : 'application/xml',
'.xpm' : 'image/x-xpixmap',
'.src' : 'application/x-wais-source',
'.xsl' : 'application/xml',
'.xwd' : 'image/x-xwindowdump',
'.rdf' : 'application/xml',
'.wsdl' : 'application/xml',
'.xpdl' : 'application/xml',
'.zip' : 'application/zip',
'.au' : 'audio/basic',
'.snd' : 'audio/basic',
'.mp3' : 'audio/mpeg',
'.mp2' : 'audio/mpeg',
'.aif' : 'audio/x-aiff',
'.aifc' : 'audio/x-aiff',
'.aiff' : 'audio/x-aiff',
'.ra' : 'audio/x-pn-realaudio',
'.wav' : 'audio/x-wav',
'.bmp' : 'image/bmp',
'.gif' : 'image/gif',
'.ief' : 'image/ief',
'.jpg' : 'image/jpeg',
'.jpe' : 'image/jpeg',
'.jpeg' : 'image/jpeg',
'.png' : 'image/png',
'.svg' : 'image/svg+xml',
'.tiff' : 'image/tiff',
'.tif' : 'image/tiff',
'.ico' : 'image/vnd.microsoft.icon',
'.ras' : 'image/x-cmu-raster',
'.bmp' : 'image/x-ms-bmp',
'.pnm' : 'image/x-portable-anymap',
'.pbm' : 'image/x-portable-bitmap',
'.pgm' : 'image/x-portable-graymap',
'.ppm' : 'image/x-portable-pixmap',
'.rgb' : 'image/x-rgb',
'.xbm' : 'image/x-xbitmap',
'.xpm' : 'image/x-xpixmap',
'.xwd' : 'image/x-xwindowdump',
'.eml' : 'message/rfc822',
'.mht' : 'message/rfc822',
'.mhtml' : 'message/rfc822',
'.nws' : 'message/rfc822',
'.css' : 'text/css',
'.csv' : 'text/csv',
'.html' : 'text/html',
'.htm' : 'text/html',
'.txt' : 'text/plain',
'.bat' : 'text/plain',
'.c' : 'text/plain',
'.h' : 'text/plain',
'.ksh' : 'text/plain',
'.pl' : 'text/plain',
'.rtx' : 'text/richtext',
'.tsv' : 'text/tab-separated-values',
'.py' : 'text/x-python',
'.etx' : 'text/x-setext',
'.sgm' : 'text/x-sgml',
'.sgml' : 'text/x-sgml',
'.vcf' : 'text/x-vcard',
'.xml' : 'text/xml',
'.mp4' : 'video/mp4',
'.mpeg' : 'video/mpeg',
'.m1v' : 'video/mpeg',
'.mpa' : 'video/mpeg',
'.mpe' : 'video/mpeg',
'.mpg' : 'video/mpeg',
'.mov' : 'video/quicktime',
'.qt' : 'video/quicktime',
'.webm' : 'video/webm',
'.avi' : 'video/x-msvideo',
'.movie' : 'video/x-sgi-movie',
}
# These are non-standard types, commonly found in the wild. They will
# only match if strict=0 flag is given to the API methods.
# Please sort these too
common_types = {
'.jpg' : 'image/jpg',
'.mid' : 'audio/midi',
common_types = _common_types_default = {
'.rtf' : 'application/rtf',
'.midi': 'audio/midi',
'.mid' : 'audio/midi',
'.jpg' : 'image/jpg',
'.pict': 'image/pict',
'.pct' : 'image/pict',
'.pic' : 'image/pict',
'.pict': 'image/pict',
'.rtf' : 'application/rtf',
'.xul' : 'text/xul'
'.xul' : 'text/xul',
}

View File

@ -79,6 +79,57 @@ class MimeTypesTestCase(unittest.TestCase):
strict=True)
self.assertEqual(exts, ['.g3', '.g\xb3'])
def test_init_reinitializes(self):
# Issue 4936: make sure an init starts clean
# First, put some poison into the types table
mimetypes.add_type('foo/bar', '.foobar')
self.assertEqual(mimetypes.guess_extension('foo/bar'), '.foobar')
# Reinitialize
mimetypes.init()
# Poison should be gone.
self.assertEqual(mimetypes.guess_extension('foo/bar'), None)
def test_preferred_extension(self):
def check_extensions():
self.assertEqual(mimetypes.guess_extension('application/octet-stream'), '.bin')
self.assertEqual(mimetypes.guess_extension('application/postscript'), '.ps')
self.assertEqual(mimetypes.guess_extension('application/vnd.apple.mpegurl'), '.m3u')
self.assertEqual(mimetypes.guess_extension('application/vnd.ms-excel'), '.xls')
self.assertEqual(mimetypes.guess_extension('application/vnd.ms-powerpoint'), '.ppt')
self.assertEqual(mimetypes.guess_extension('application/x-texinfo'), '.texi')
self.assertEqual(mimetypes.guess_extension('application/x-troff'), '.roff')
self.assertEqual(mimetypes.guess_extension('application/xml'), '.xsl')
self.assertEqual(mimetypes.guess_extension('audio/mpeg'), '.mp3')
self.assertEqual(mimetypes.guess_extension('image/jpeg'), '.jpg')
self.assertEqual(mimetypes.guess_extension('image/tiff'), '.tiff')
self.assertEqual(mimetypes.guess_extension('message/rfc822'), '.eml')
self.assertEqual(mimetypes.guess_extension('text/html'), '.html')
self.assertEqual(mimetypes.guess_extension('text/plain'), '.txt')
self.assertEqual(mimetypes.guess_extension('video/mpeg'), '.mpeg')
self.assertEqual(mimetypes.guess_extension('video/quicktime'), '.mov')
check_extensions()
mimetypes.init()
check_extensions()
def test_init_stability(self):
mimetypes.init()
suffix_map = mimetypes.suffix_map
encodings_map = mimetypes.encodings_map
types_map = mimetypes.types_map
common_types = mimetypes.common_types
mimetypes.init()
self.assertIsNot(suffix_map, mimetypes.suffix_map)
self.assertIsNot(encodings_map, mimetypes.encodings_map)
self.assertIsNot(types_map, mimetypes.types_map)
self.assertIsNot(common_types, mimetypes.common_types)
self.assertEqual(suffix_map, mimetypes.suffix_map)
self.assertEqual(encodings_map, mimetypes.encodings_map)
self.assertEqual(types_map, mimetypes.types_map)
self.assertEqual(common_types, mimetypes.common_types)
def test_path_like_ob(self):
filename = "LICENSE.txt"
filepath = pathlib.Path(filename)

View File

@ -0,0 +1,2 @@
Fixed non-deterministic behavior related to mimetypes extension mapping and
module reinitialization.