gh-66543: Add mimetypes.guess_file_type() (GH-117258)

This commit is contained in:
Serhiy Storchaka 2024-05-06 15:50:52 +03:00 committed by GitHub
parent d3c7821335
commit d6fa1d4bee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 129 additions and 35 deletions

View File

@ -53,7 +53,7 @@ must be running an SMTP server.
# Guess the content type based on the file's extension. Encoding # Guess the content type based on the file's extension. Encoding
# will be ignored, although we should check for simple things like # will be ignored, although we should check for simple things like
# gzip'd or compressed files. # gzip'd or compressed files.
ctype, encoding = mimetypes.guess_type(path) ctype, encoding = mimetypes.guess_file_type(path)
if ctype is None or encoding is not None: if ctype is None or encoding is not None:
# No guess could be made, or the file is encoded (compressed), so # No guess could be made, or the file is encoded (compressed), so
# use a generic bag-of-bits type. # use a generic bag-of-bits type.

View File

@ -52,7 +52,22 @@ the information :func:`init` sets up.
are also recognized. are also recognized.
.. versionchanged:: 3.8 .. versionchanged:: 3.8
Added support for url being a :term:`path-like object`. Added support for *url* being a :term:`path-like object`.
.. deprecated:: 3.13
Passing a file path instead of URL is :term:`soft deprecated`.
Use :func:`guess_file_type` for this.
.. function:: guess_file_type(path, *, strict=True)
.. index:: pair: MIME; headers
Guess the type of a file based on its path, given by *path*.
Similar to the :func:`guess_type` function, but accepts a path instead of URL.
Path can be a string, a bytes object or a :term:`path-like object`.
.. versionadded:: 3.13
.. function:: guess_all_extensions(type, strict=True) .. function:: guess_all_extensions(type, strict=True)
@ -61,7 +76,7 @@ the information :func:`init` sets up.
return value is a list of strings giving all possible filename extensions, return value is a list of strings giving all possible filename extensions,
including the leading dot (``'.'``). The extensions are not guaranteed to have including the leading dot (``'.'``). The extensions are not guaranteed to have
been associated with any particular data stream, but would be mapped to the MIME been associated with any particular data stream, but would be mapped to the MIME
type *type* by :func:`guess_type`. type *type* by :func:`guess_type` and :func:`guess_file_type`.
The optional *strict* argument has the same meaning as with the :func:`guess_type` function. The optional *strict* argument has the same meaning as with the :func:`guess_type` function.
@ -72,8 +87,8 @@ the information :func:`init` sets up.
return value is a string giving a filename extension, including the leading dot return value is a string giving a filename extension, including the leading dot
(``'.'``). The extension is not guaranteed to have been associated with any (``'.'``). The extension is not guaranteed to have been associated with any
particular data stream, but would be mapped to the MIME type *type* by particular data stream, but would be mapped to the MIME type *type* by
:func:`guess_type`. If no extension can be guessed for *type*, ``None`` is :func:`guess_type` and :func:`guess_file_type`.
returned. If no extension can be guessed for *type*, ``None`` is returned.
The optional *strict* argument has the same meaning as with the :func:`guess_type` function. The optional *strict* argument has the same meaning as with the :func:`guess_type` function.
@ -238,6 +253,14 @@ than one MIME-type database; it provides an interface similar to the one of the
the object. the object.
.. method:: MimeTypes.guess_file_type(path, *, strict=True)
Similar to the :func:`guess_file_type` function, using the tables stored
as part of the object.
.. versionadded:: 3.13
.. method:: MimeTypes.guess_all_extensions(type, strict=True) .. method:: MimeTypes.guess_all_extensions(type, strict=True)
Similar to the :func:`guess_all_extensions` function, using the tables stored Similar to the :func:`guess_all_extensions` function, using the tables stored

View File

@ -865,7 +865,7 @@ directory and port number (default: 8000) on the command line::
fn = os.path.join(path, environ["PATH_INFO"][1:]) fn = os.path.join(path, environ["PATH_INFO"][1:])
if "." not in fn.split(os.path.sep)[-1]: if "." not in fn.split(os.path.sep)[-1]:
fn = os.path.join(fn, "index.html") fn = os.path.join(fn, "index.html")
mime_type = mimetypes.guess_type(fn)[0] mime_type = mimetypes.guess_file_type(fn)[0]
# Return 200 OK if file exists, otherwise 404 Not Found # Return 200 OK if file exists, otherwise 404 Not Found
if os.path.exists(fn): if os.path.exists(fn):

View File

@ -623,6 +623,13 @@ math
"fusedMultiplyAdd" operation for special cases. "fusedMultiplyAdd" operation for special cases.
(Contributed by Mark Dickinson and Victor Stinner in :gh:`73468`.) (Contributed by Mark Dickinson and Victor Stinner in :gh:`73468`.)
mimetypes
---------
* Add the :func:`~mimetypes.guess_file_type` function which works with file path.
Passing file path instead of URL in :func:`~mimetypes.guess_type` is :term:`soft deprecated`.
(Contributed by Serhiy Storchaka in :gh:`66543`.)
mmap mmap
---- ----
@ -1167,6 +1174,10 @@ Deprecated
.. Add deprecations above alphabetically, not here at the end. .. Add deprecations above alphabetically, not here at the end.
* Passing file path instead of URL in :func:`~mimetypes.guess_type` is :term:`soft deprecated`.
Use :func:`~mimetypes.guess_file_type` instead.
(Contributed by Serhiy Storchaka in :gh:`66543`.)
Pending Removal in Python 3.14 Pending Removal in Python 3.14
------------------------------ ------------------------------

View File

@ -897,7 +897,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
ext = ext.lower() ext = ext.lower()
if ext in self.extensions_map: if ext in self.extensions_map:
return self.extensions_map[ext] return self.extensions_map[ext]
guess, _ = mimetypes.guess_type(path) guess, _ = mimetypes.guess_file_type(path)
if guess: if guess:
return guess return guess
return 'application/octet-stream' return 'application/octet-stream'

View File

@ -40,7 +40,7 @@ except ImportError:
__all__ = [ __all__ = [
"knownfiles", "inited", "MimeTypes", "knownfiles", "inited", "MimeTypes",
"guess_type", "guess_all_extensions", "guess_extension", "guess_type", "guess_file_type", "guess_all_extensions", "guess_extension",
"add_type", "init", "read_mime_types", "add_type", "init", "read_mime_types",
"suffix_map", "encodings_map", "types_map", "common_types" "suffix_map", "encodings_map", "types_map", "common_types"
] ]
@ -119,14 +119,14 @@ class MimeTypes:
Optional `strict' argument when False adds a bunch of commonly found, Optional `strict' argument when False adds a bunch of commonly found,
but non-standard types. but non-standard types.
""" """
# TODO: Deprecate accepting file paths (in particular path-like objects).
url = os.fspath(url) url = os.fspath(url)
p = urllib.parse.urlparse(url) p = urllib.parse.urlparse(url)
if p.scheme and len(p.scheme) > 1: if p.scheme and len(p.scheme) > 1:
scheme = p.scheme scheme = p.scheme
url = p.path url = p.path
else: else:
scheme = None return self.guess_file_type(url, strict=strict)
url = os.path.splitdrive(url)[1]
if scheme == 'data': if scheme == 'data':
# syntax of data URLs: # syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
@ -146,13 +146,25 @@ class MimeTypes:
if '=' in type or '/' not in type: if '=' in type or '/' not in type:
type = 'text/plain' type = 'text/plain'
return type, None # never compressed, so encoding is None return type, None # never compressed, so encoding is None
base, ext = posixpath.splitext(url) return self._guess_file_type(url, strict, posixpath.splitext)
def guess_file_type(self, path, *, strict=True):
"""Guess the type of a file based on its path.
Similar to guess_type(), but takes file path istead of URL.
"""
path = os.fsdecode(path)
path = os.path.splitdrive(path)[1]
return self._guess_file_type(path, strict, os.path.splitext)
def _guess_file_type(self, path, strict, splitext):
base, ext = splitext(path)
while (ext_lower := ext.lower()) in self.suffix_map: while (ext_lower := ext.lower()) in self.suffix_map:
base, ext = posixpath.splitext(base + self.suffix_map[ext_lower]) base, ext = splitext(base + self.suffix_map[ext_lower])
# encodings_map is case sensitive # encodings_map is case sensitive
if ext in self.encodings_map: if ext in self.encodings_map:
encoding = self.encodings_map[ext] encoding = self.encodings_map[ext]
base, ext = posixpath.splitext(base) base, ext = splitext(base)
else: else:
encoding = None encoding = None
ext = ext.lower() ext = ext.lower()
@ -310,6 +322,16 @@ def guess_type(url, strict=True):
return _db.guess_type(url, strict) return _db.guess_type(url, strict)
def guess_file_type(path, *, strict=True):
"""Guess the type of a file based on its path.
Similar to guess_type(), but takes file path istead of URL.
"""
if _db is None:
init()
return _db.guess_file_type(path, strict=strict)
def guess_all_extensions(type, strict=True): def guess_all_extensions(type, strict=True):
"""Guess the extensions for a file based on its MIME type. """Guess the extensions for a file based on its MIME type.

View File

@ -36,20 +36,28 @@ class MimeTypesTestCase(unittest.TestCase):
def test_case_sensitivity(self): def test_case_sensitivity(self):
eq = self.assertEqual eq = self.assertEqual
eq(self.db.guess_type("foobar.HTML"), self.db.guess_type("foobar.html")) eq(self.db.guess_file_type("foobar.html"), ("text/html", None))
eq(self.db.guess_type("foobar.TGZ"), self.db.guess_type("foobar.tgz")) eq(self.db.guess_type("scheme:foobar.html"), ("text/html", None))
eq(self.db.guess_type("foobar.tar.Z"), ("application/x-tar", "compress")) eq(self.db.guess_file_type("foobar.HTML"), ("text/html", None))
eq(self.db.guess_type("foobar.tar.z"), (None, None)) eq(self.db.guess_type("scheme:foobar.HTML"), ("text/html", None))
eq(self.db.guess_file_type("foobar.tgz"), ("application/x-tar", "gzip"))
eq(self.db.guess_type("scheme:foobar.tgz"), ("application/x-tar", "gzip"))
eq(self.db.guess_file_type("foobar.TGZ"), ("application/x-tar", "gzip"))
eq(self.db.guess_type("scheme:foobar.TGZ"), ("application/x-tar", "gzip"))
eq(self.db.guess_file_type("foobar.tar.Z"), ("application/x-tar", "compress"))
eq(self.db.guess_type("scheme:foobar.tar.Z"), ("application/x-tar", "compress"))
eq(self.db.guess_file_type("foobar.tar.z"), (None, None))
eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None))
def test_default_data(self): def test_default_data(self):
eq = self.assertEqual eq = self.assertEqual
eq(self.db.guess_type("foo.html"), ("text/html", None)) eq(self.db.guess_file_type("foo.html"), ("text/html", None))
eq(self.db.guess_type("foo.HTML"), ("text/html", None)) eq(self.db.guess_file_type("foo.HTML"), ("text/html", None))
eq(self.db.guess_type("foo.tgz"), ("application/x-tar", "gzip")) eq(self.db.guess_file_type("foo.tgz"), ("application/x-tar", "gzip"))
eq(self.db.guess_type("foo.tar.gz"), ("application/x-tar", "gzip")) eq(self.db.guess_file_type("foo.tar.gz"), ("application/x-tar", "gzip"))
eq(self.db.guess_type("foo.tar.Z"), ("application/x-tar", "compress")) eq(self.db.guess_file_type("foo.tar.Z"), ("application/x-tar", "compress"))
eq(self.db.guess_type("foo.tar.bz2"), ("application/x-tar", "bzip2")) eq(self.db.guess_file_type("foo.tar.bz2"), ("application/x-tar", "bzip2"))
eq(self.db.guess_type("foo.tar.xz"), ("application/x-tar", "xz")) eq(self.db.guess_file_type("foo.tar.xz"), ("application/x-tar", "xz"))
def test_data_urls(self): def test_data_urls(self):
eq = self.assertEqual eq = self.assertEqual
@ -63,7 +71,7 @@ class MimeTypesTestCase(unittest.TestCase):
eq = self.assertEqual eq = self.assertEqual
sio = io.StringIO("x-application/x-unittest pyunit\n") sio = io.StringIO("x-application/x-unittest pyunit\n")
self.db.readfp(sio) self.db.readfp(sio)
eq(self.db.guess_type("foo.pyunit"), eq(self.db.guess_file_type("foo.pyunit"),
("x-application/x-unittest", None)) ("x-application/x-unittest", None))
eq(self.db.guess_extension("x-application/x-unittest"), ".pyunit") eq(self.db.guess_extension("x-application/x-unittest"), ".pyunit")
@ -95,12 +103,12 @@ class MimeTypesTestCase(unittest.TestCase):
def test_non_standard_types(self): def test_non_standard_types(self):
eq = self.assertEqual eq = self.assertEqual
# First try strict # First try strict
eq(self.db.guess_type('foo.xul', strict=True), (None, None)) eq(self.db.guess_file_type('foo.xul', strict=True), (None, None))
eq(self.db.guess_extension('image/jpg', strict=True), None) eq(self.db.guess_extension('image/jpg', strict=True), None)
# And then non-strict # And then non-strict
eq(self.db.guess_type('foo.xul', strict=False), ('text/xul', None)) eq(self.db.guess_file_type('foo.xul', strict=False), ('text/xul', None))
eq(self.db.guess_type('foo.XUL', strict=False), ('text/xul', None)) eq(self.db.guess_file_type('foo.XUL', strict=False), ('text/xul', None))
eq(self.db.guess_type('foo.invalid', strict=False), (None, None)) eq(self.db.guess_file_type('foo.invalid', strict=False), (None, None))
eq(self.db.guess_extension('image/jpg', strict=False), '.jpg') eq(self.db.guess_extension('image/jpg', strict=False), '.jpg')
eq(self.db.guess_extension('image/JPG', strict=False), '.jpg') eq(self.db.guess_extension('image/JPG', strict=False), '.jpg')
@ -124,15 +132,26 @@ class MimeTypesTestCase(unittest.TestCase):
'//share/server/', '\\\\share\\server\\'): '//share/server/', '\\\\share\\server\\'):
path = prefix + name path = prefix + name
with self.subTest(path=path): with self.subTest(path=path):
eq(self.db.guess_file_type(path), gzip_expected)
eq(self.db.guess_type(path), gzip_expected) eq(self.db.guess_type(path), gzip_expected)
expected = (None, None) if os.name == 'nt' else gzip_expected expected = (None, None) if os.name == 'nt' else gzip_expected
for prefix in ('//', '\\\\', '//share/', '\\\\share\\'): for prefix in ('//', '\\\\', '//share/', '\\\\share\\'):
path = prefix + name path = prefix + name
with self.subTest(path=path): with self.subTest(path=path):
eq(self.db.guess_file_type(path), expected)
eq(self.db.guess_type(path), expected) eq(self.db.guess_type(path), expected)
eq(self.db.guess_file_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected) eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
eq(self.db.guess_file_type(r'foo/.tar.gz'), (None, 'gzip'))
eq(self.db.guess_type(r'foo/.tar.gz'), (None, 'gzip'))
expected = (None, 'gzip') if os.name == 'nt' else gzip_expected
eq(self.db.guess_file_type(r'foo\.tar.gz'), expected)
eq(self.db.guess_type(r'foo\.tar.gz'), expected)
eq(self.db.guess_type(r'scheme:foo\.tar.gz'), gzip_expected)
def test_url(self): def test_url(self):
result = self.db.guess_type('http://example.com/host.html')
result = self.db.guess_type('http://host.html') result = self.db.guess_type('http://host.html')
msg = 'URL only has a host name, not a file' msg = 'URL only has a host name, not a file'
self.assertSequenceEqual(result, (None, None), msg) self.assertSequenceEqual(result, (None, None), msg)
@ -242,22 +261,38 @@ class MimeTypesTestCase(unittest.TestCase):
def test_path_like_ob(self): def test_path_like_ob(self):
filename = "LICENSE.txt" filename = "LICENSE.txt"
filepath = pathlib.Path(filename) filepath = os_helper.FakePath(filename)
filepath_with_abs_dir = pathlib.Path('/dir/'+filename) filepath_with_abs_dir = os_helper.FakePath('/dir/'+filename)
filepath_relative = pathlib.Path('../dir/'+filename) filepath_relative = os_helper.FakePath('../dir/'+filename)
path_dir = pathlib.Path('./') path_dir = os_helper.FakePath('./')
expected = self.db.guess_type(filename) expected = self.db.guess_file_type(filename)
self.assertEqual(self.db.guess_file_type(filepath), expected)
self.assertEqual(self.db.guess_type(filepath), expected) self.assertEqual(self.db.guess_type(filepath), expected)
self.assertEqual(self.db.guess_file_type(
filepath_with_abs_dir), expected)
self.assertEqual(self.db.guess_type( self.assertEqual(self.db.guess_type(
filepath_with_abs_dir), expected) filepath_with_abs_dir), expected)
self.assertEqual(self.db.guess_file_type(filepath_relative), expected)
self.assertEqual(self.db.guess_type(filepath_relative), expected) self.assertEqual(self.db.guess_type(filepath_relative), expected)
self.assertEqual(self.db.guess_file_type(path_dir), (None, None))
self.assertEqual(self.db.guess_type(path_dir), (None, None)) self.assertEqual(self.db.guess_type(path_dir), (None, None))
def test_bytes_path(self):
self.assertEqual(self.db.guess_file_type(b'foo.html'),
self.db.guess_file_type('foo.html'))
self.assertEqual(self.db.guess_file_type(b'foo.tar.gz'),
self.db.guess_file_type('foo.tar.gz'))
self.assertEqual(self.db.guess_file_type(b'foo.tgz'),
self.db.guess_file_type('foo.tgz'))
def test_keywords_args_api(self): def test_keywords_args_api(self):
self.assertEqual(self.db.guess_file_type(
path="foo.html", strict=True), ("text/html", None))
self.assertEqual(self.db.guess_type( self.assertEqual(self.db.guess_type(
url="foo.html", strict=True), ("text/html", None)) url="scheme:foo.html", strict=True), ("text/html", None))
self.assertEqual(self.db.guess_all_extensions( self.assertEqual(self.db.guess_all_extensions(
type='image/jpg', strict=True), []) type='image/jpg', strict=True), [])
self.assertEqual(self.db.guess_extension( self.assertEqual(self.db.guess_extension(

View File

@ -0,0 +1,3 @@
Add the :func:`mimetypes.guess_file_type` function which works with file
path. Passing file path instead of URL in :func:`~mimetypes.guess_type` is
:term:`soft deprecated`.