mirror of https://github.com/python/cpython
gh-66543: Add mimetypes.guess_file_type() (GH-117258)
This commit is contained in:
parent
d3c7821335
commit
d6fa1d4bee
|
@ -53,7 +53,7 @@ must be running an SMTP server.
|
|||
# Guess the content type based on the file's extension. Encoding
|
||||
# will be ignored, although we should check for simple things like
|
||||
# gzip'd or compressed files.
|
||||
ctype, encoding = mimetypes.guess_type(path)
|
||||
ctype, encoding = mimetypes.guess_file_type(path)
|
||||
if ctype is None or encoding is not None:
|
||||
# No guess could be made, or the file is encoded (compressed), so
|
||||
# use a generic bag-of-bits type.
|
||||
|
|
|
@ -52,7 +52,22 @@ the information :func:`init` sets up.
|
|||
are also recognized.
|
||||
|
||||
.. versionchanged:: 3.8
|
||||
Added support for url being a :term:`path-like object`.
|
||||
Added support for *url* being a :term:`path-like object`.
|
||||
|
||||
.. deprecated:: 3.13
|
||||
Passing a file path instead of URL is :term:`soft deprecated`.
|
||||
Use :func:`guess_file_type` for this.
|
||||
|
||||
|
||||
.. function:: guess_file_type(path, *, strict=True)
|
||||
|
||||
.. index:: pair: MIME; headers
|
||||
|
||||
Guess the type of a file based on its path, given by *path*.
|
||||
Similar to the :func:`guess_type` function, but accepts a path instead of URL.
|
||||
Path can be a string, a bytes object or a :term:`path-like object`.
|
||||
|
||||
.. versionadded:: 3.13
|
||||
|
||||
|
||||
.. function:: guess_all_extensions(type, strict=True)
|
||||
|
@ -61,7 +76,7 @@ the information :func:`init` sets up.
|
|||
return value is a list of strings giving all possible filename extensions,
|
||||
including the leading dot (``'.'``). The extensions are not guaranteed to have
|
||||
been associated with any particular data stream, but would be mapped to the MIME
|
||||
type *type* by :func:`guess_type`.
|
||||
type *type* by :func:`guess_type` and :func:`guess_file_type`.
|
||||
|
||||
The optional *strict* argument has the same meaning as with the :func:`guess_type` function.
|
||||
|
||||
|
@ -72,8 +87,8 @@ the information :func:`init` sets up.
|
|||
return value is a string giving a filename extension, including the leading dot
|
||||
(``'.'``). The extension is not guaranteed to have been associated with any
|
||||
particular data stream, but would be mapped to the MIME type *type* by
|
||||
:func:`guess_type`. If no extension can be guessed for *type*, ``None`` is
|
||||
returned.
|
||||
:func:`guess_type` and :func:`guess_file_type`.
|
||||
If no extension can be guessed for *type*, ``None`` is returned.
|
||||
|
||||
The optional *strict* argument has the same meaning as with the :func:`guess_type` function.
|
||||
|
||||
|
@ -238,6 +253,14 @@ than one MIME-type database; it provides an interface similar to the one of the
|
|||
the object.
|
||||
|
||||
|
||||
.. method:: MimeTypes.guess_file_type(path, *, strict=True)
|
||||
|
||||
Similar to the :func:`guess_file_type` function, using the tables stored
|
||||
as part of the object.
|
||||
|
||||
.. versionadded:: 3.13
|
||||
|
||||
|
||||
.. method:: MimeTypes.guess_all_extensions(type, strict=True)
|
||||
|
||||
Similar to the :func:`guess_all_extensions` function, using the tables stored
|
||||
|
|
|
@ -865,7 +865,7 @@ directory and port number (default: 8000) on the command line::
|
|||
fn = os.path.join(path, environ["PATH_INFO"][1:])
|
||||
if "." not in fn.split(os.path.sep)[-1]:
|
||||
fn = os.path.join(fn, "index.html")
|
||||
mime_type = mimetypes.guess_type(fn)[0]
|
||||
mime_type = mimetypes.guess_file_type(fn)[0]
|
||||
|
||||
# Return 200 OK if file exists, otherwise 404 Not Found
|
||||
if os.path.exists(fn):
|
||||
|
|
|
@ -623,6 +623,13 @@ math
|
|||
"fusedMultiplyAdd" operation for special cases.
|
||||
(Contributed by Mark Dickinson and Victor Stinner in :gh:`73468`.)
|
||||
|
||||
mimetypes
|
||||
---------
|
||||
|
||||
* Add the :func:`~mimetypes.guess_file_type` function which works with file path.
|
||||
Passing file path instead of URL in :func:`~mimetypes.guess_type` is :term:`soft deprecated`.
|
||||
(Contributed by Serhiy Storchaka in :gh:`66543`.)
|
||||
|
||||
mmap
|
||||
----
|
||||
|
||||
|
@ -1167,6 +1174,10 @@ Deprecated
|
|||
|
||||
.. Add deprecations above alphabetically, not here at the end.
|
||||
|
||||
* Passing file path instead of URL in :func:`~mimetypes.guess_type` is :term:`soft deprecated`.
|
||||
Use :func:`~mimetypes.guess_file_type` instead.
|
||||
(Contributed by Serhiy Storchaka in :gh:`66543`.)
|
||||
|
||||
Pending Removal in Python 3.14
|
||||
------------------------------
|
||||
|
||||
|
|
|
@ -897,7 +897,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
|||
ext = ext.lower()
|
||||
if ext in self.extensions_map:
|
||||
return self.extensions_map[ext]
|
||||
guess, _ = mimetypes.guess_type(path)
|
||||
guess, _ = mimetypes.guess_file_type(path)
|
||||
if guess:
|
||||
return guess
|
||||
return 'application/octet-stream'
|
||||
|
|
|
@ -40,7 +40,7 @@ except ImportError:
|
|||
|
||||
__all__ = [
|
||||
"knownfiles", "inited", "MimeTypes",
|
||||
"guess_type", "guess_all_extensions", "guess_extension",
|
||||
"guess_type", "guess_file_type", "guess_all_extensions", "guess_extension",
|
||||
"add_type", "init", "read_mime_types",
|
||||
"suffix_map", "encodings_map", "types_map", "common_types"
|
||||
]
|
||||
|
@ -119,14 +119,14 @@ class MimeTypes:
|
|||
Optional `strict' argument when False adds a bunch of commonly found,
|
||||
but non-standard types.
|
||||
"""
|
||||
# TODO: Deprecate accepting file paths (in particular path-like objects).
|
||||
url = os.fspath(url)
|
||||
p = urllib.parse.urlparse(url)
|
||||
if p.scheme and len(p.scheme) > 1:
|
||||
scheme = p.scheme
|
||||
url = p.path
|
||||
else:
|
||||
scheme = None
|
||||
url = os.path.splitdrive(url)[1]
|
||||
return self.guess_file_type(url, strict=strict)
|
||||
if scheme == 'data':
|
||||
# syntax of data URLs:
|
||||
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
||||
|
@ -146,13 +146,25 @@ class MimeTypes:
|
|||
if '=' in type or '/' not in type:
|
||||
type = 'text/plain'
|
||||
return type, None # never compressed, so encoding is None
|
||||
base, ext = posixpath.splitext(url)
|
||||
return self._guess_file_type(url, strict, posixpath.splitext)
|
||||
|
||||
def guess_file_type(self, path, *, strict=True):
|
||||
"""Guess the type of a file based on its path.
|
||||
|
||||
Similar to guess_type(), but takes file path istead of URL.
|
||||
"""
|
||||
path = os.fsdecode(path)
|
||||
path = os.path.splitdrive(path)[1]
|
||||
return self._guess_file_type(path, strict, os.path.splitext)
|
||||
|
||||
def _guess_file_type(self, path, strict, splitext):
|
||||
base, ext = splitext(path)
|
||||
while (ext_lower := ext.lower()) in self.suffix_map:
|
||||
base, ext = posixpath.splitext(base + self.suffix_map[ext_lower])
|
||||
base, ext = splitext(base + self.suffix_map[ext_lower])
|
||||
# encodings_map is case sensitive
|
||||
if ext in self.encodings_map:
|
||||
encoding = self.encodings_map[ext]
|
||||
base, ext = posixpath.splitext(base)
|
||||
base, ext = splitext(base)
|
||||
else:
|
||||
encoding = None
|
||||
ext = ext.lower()
|
||||
|
@ -310,6 +322,16 @@ def guess_type(url, strict=True):
|
|||
return _db.guess_type(url, strict)
|
||||
|
||||
|
||||
def guess_file_type(path, *, strict=True):
|
||||
"""Guess the type of a file based on its path.
|
||||
|
||||
Similar to guess_type(), but takes file path istead of URL.
|
||||
"""
|
||||
if _db is None:
|
||||
init()
|
||||
return _db.guess_file_type(path, strict=strict)
|
||||
|
||||
|
||||
def guess_all_extensions(type, strict=True):
|
||||
"""Guess the extensions for a file based on its MIME type.
|
||||
|
||||
|
|
|
@ -36,20 +36,28 @@ class MimeTypesTestCase(unittest.TestCase):
|
|||
|
||||
def test_case_sensitivity(self):
|
||||
eq = self.assertEqual
|
||||
eq(self.db.guess_type("foobar.HTML"), self.db.guess_type("foobar.html"))
|
||||
eq(self.db.guess_type("foobar.TGZ"), self.db.guess_type("foobar.tgz"))
|
||||
eq(self.db.guess_type("foobar.tar.Z"), ("application/x-tar", "compress"))
|
||||
eq(self.db.guess_type("foobar.tar.z"), (None, None))
|
||||
eq(self.db.guess_file_type("foobar.html"), ("text/html", None))
|
||||
eq(self.db.guess_type("scheme:foobar.html"), ("text/html", None))
|
||||
eq(self.db.guess_file_type("foobar.HTML"), ("text/html", None))
|
||||
eq(self.db.guess_type("scheme:foobar.HTML"), ("text/html", None))
|
||||
eq(self.db.guess_file_type("foobar.tgz"), ("application/x-tar", "gzip"))
|
||||
eq(self.db.guess_type("scheme:foobar.tgz"), ("application/x-tar", "gzip"))
|
||||
eq(self.db.guess_file_type("foobar.TGZ"), ("application/x-tar", "gzip"))
|
||||
eq(self.db.guess_type("scheme:foobar.TGZ"), ("application/x-tar", "gzip"))
|
||||
eq(self.db.guess_file_type("foobar.tar.Z"), ("application/x-tar", "compress"))
|
||||
eq(self.db.guess_type("scheme:foobar.tar.Z"), ("application/x-tar", "compress"))
|
||||
eq(self.db.guess_file_type("foobar.tar.z"), (None, None))
|
||||
eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None))
|
||||
|
||||
def test_default_data(self):
|
||||
eq = self.assertEqual
|
||||
eq(self.db.guess_type("foo.html"), ("text/html", None))
|
||||
eq(self.db.guess_type("foo.HTML"), ("text/html", None))
|
||||
eq(self.db.guess_type("foo.tgz"), ("application/x-tar", "gzip"))
|
||||
eq(self.db.guess_type("foo.tar.gz"), ("application/x-tar", "gzip"))
|
||||
eq(self.db.guess_type("foo.tar.Z"), ("application/x-tar", "compress"))
|
||||
eq(self.db.guess_type("foo.tar.bz2"), ("application/x-tar", "bzip2"))
|
||||
eq(self.db.guess_type("foo.tar.xz"), ("application/x-tar", "xz"))
|
||||
eq(self.db.guess_file_type("foo.html"), ("text/html", None))
|
||||
eq(self.db.guess_file_type("foo.HTML"), ("text/html", None))
|
||||
eq(self.db.guess_file_type("foo.tgz"), ("application/x-tar", "gzip"))
|
||||
eq(self.db.guess_file_type("foo.tar.gz"), ("application/x-tar", "gzip"))
|
||||
eq(self.db.guess_file_type("foo.tar.Z"), ("application/x-tar", "compress"))
|
||||
eq(self.db.guess_file_type("foo.tar.bz2"), ("application/x-tar", "bzip2"))
|
||||
eq(self.db.guess_file_type("foo.tar.xz"), ("application/x-tar", "xz"))
|
||||
|
||||
def test_data_urls(self):
|
||||
eq = self.assertEqual
|
||||
|
@ -63,7 +71,7 @@ class MimeTypesTestCase(unittest.TestCase):
|
|||
eq = self.assertEqual
|
||||
sio = io.StringIO("x-application/x-unittest pyunit\n")
|
||||
self.db.readfp(sio)
|
||||
eq(self.db.guess_type("foo.pyunit"),
|
||||
eq(self.db.guess_file_type("foo.pyunit"),
|
||||
("x-application/x-unittest", None))
|
||||
eq(self.db.guess_extension("x-application/x-unittest"), ".pyunit")
|
||||
|
||||
|
@ -95,12 +103,12 @@ class MimeTypesTestCase(unittest.TestCase):
|
|||
def test_non_standard_types(self):
|
||||
eq = self.assertEqual
|
||||
# First try strict
|
||||
eq(self.db.guess_type('foo.xul', strict=True), (None, None))
|
||||
eq(self.db.guess_file_type('foo.xul', strict=True), (None, None))
|
||||
eq(self.db.guess_extension('image/jpg', strict=True), None)
|
||||
# And then non-strict
|
||||
eq(self.db.guess_type('foo.xul', strict=False), ('text/xul', None))
|
||||
eq(self.db.guess_type('foo.XUL', strict=False), ('text/xul', None))
|
||||
eq(self.db.guess_type('foo.invalid', strict=False), (None, None))
|
||||
eq(self.db.guess_file_type('foo.xul', strict=False), ('text/xul', None))
|
||||
eq(self.db.guess_file_type('foo.XUL', strict=False), ('text/xul', None))
|
||||
eq(self.db.guess_file_type('foo.invalid', strict=False), (None, None))
|
||||
eq(self.db.guess_extension('image/jpg', strict=False), '.jpg')
|
||||
eq(self.db.guess_extension('image/JPG', strict=False), '.jpg')
|
||||
|
||||
|
@ -124,15 +132,26 @@ class MimeTypesTestCase(unittest.TestCase):
|
|||
'//share/server/', '\\\\share\\server\\'):
|
||||
path = prefix + name
|
||||
with self.subTest(path=path):
|
||||
eq(self.db.guess_file_type(path), gzip_expected)
|
||||
eq(self.db.guess_type(path), gzip_expected)
|
||||
expected = (None, None) if os.name == 'nt' else gzip_expected
|
||||
for prefix in ('//', '\\\\', '//share/', '\\\\share\\'):
|
||||
path = prefix + name
|
||||
with self.subTest(path=path):
|
||||
eq(self.db.guess_file_type(path), expected)
|
||||
eq(self.db.guess_type(path), expected)
|
||||
eq(self.db.guess_file_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
|
||||
eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
|
||||
|
||||
eq(self.db.guess_file_type(r'foo/.tar.gz'), (None, 'gzip'))
|
||||
eq(self.db.guess_type(r'foo/.tar.gz'), (None, 'gzip'))
|
||||
expected = (None, 'gzip') if os.name == 'nt' else gzip_expected
|
||||
eq(self.db.guess_file_type(r'foo\.tar.gz'), expected)
|
||||
eq(self.db.guess_type(r'foo\.tar.gz'), expected)
|
||||
eq(self.db.guess_type(r'scheme:foo\.tar.gz'), gzip_expected)
|
||||
|
||||
def test_url(self):
|
||||
result = self.db.guess_type('http://example.com/host.html')
|
||||
result = self.db.guess_type('http://host.html')
|
||||
msg = 'URL only has a host name, not a file'
|
||||
self.assertSequenceEqual(result, (None, None), msg)
|
||||
|
@ -242,22 +261,38 @@ class MimeTypesTestCase(unittest.TestCase):
|
|||
|
||||
def test_path_like_ob(self):
|
||||
filename = "LICENSE.txt"
|
||||
filepath = pathlib.Path(filename)
|
||||
filepath_with_abs_dir = pathlib.Path('/dir/'+filename)
|
||||
filepath_relative = pathlib.Path('../dir/'+filename)
|
||||
path_dir = pathlib.Path('./')
|
||||
filepath = os_helper.FakePath(filename)
|
||||
filepath_with_abs_dir = os_helper.FakePath('/dir/'+filename)
|
||||
filepath_relative = os_helper.FakePath('../dir/'+filename)
|
||||
path_dir = os_helper.FakePath('./')
|
||||
|
||||
expected = self.db.guess_type(filename)
|
||||
expected = self.db.guess_file_type(filename)
|
||||
|
||||
self.assertEqual(self.db.guess_file_type(filepath), expected)
|
||||
self.assertEqual(self.db.guess_type(filepath), expected)
|
||||
self.assertEqual(self.db.guess_file_type(
|
||||
filepath_with_abs_dir), expected)
|
||||
self.assertEqual(self.db.guess_type(
|
||||
filepath_with_abs_dir), expected)
|
||||
self.assertEqual(self.db.guess_file_type(filepath_relative), expected)
|
||||
self.assertEqual(self.db.guess_type(filepath_relative), expected)
|
||||
|
||||
self.assertEqual(self.db.guess_file_type(path_dir), (None, None))
|
||||
self.assertEqual(self.db.guess_type(path_dir), (None, None))
|
||||
|
||||
def test_bytes_path(self):
|
||||
self.assertEqual(self.db.guess_file_type(b'foo.html'),
|
||||
self.db.guess_file_type('foo.html'))
|
||||
self.assertEqual(self.db.guess_file_type(b'foo.tar.gz'),
|
||||
self.db.guess_file_type('foo.tar.gz'))
|
||||
self.assertEqual(self.db.guess_file_type(b'foo.tgz'),
|
||||
self.db.guess_file_type('foo.tgz'))
|
||||
|
||||
def test_keywords_args_api(self):
|
||||
self.assertEqual(self.db.guess_file_type(
|
||||
path="foo.html", strict=True), ("text/html", None))
|
||||
self.assertEqual(self.db.guess_type(
|
||||
url="foo.html", strict=True), ("text/html", None))
|
||||
url="scheme:foo.html", strict=True), ("text/html", None))
|
||||
self.assertEqual(self.db.guess_all_extensions(
|
||||
type='image/jpg', strict=True), [])
|
||||
self.assertEqual(self.db.guess_extension(
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Add the :func:`mimetypes.guess_file_type` function which works with file
|
||||
path. Passing file path instead of URL in :func:`~mimetypes.guess_type` is
|
||||
:term:`soft deprecated`.
|
Loading…
Reference in New Issue