mirror of https://github.com/python/cpython
Handy utility to guess MIME type from extension.
This commit is contained in:
parent
3a74993118
commit
ac8a9f3ee9
|
@ -0,0 +1,190 @@
|
|||
"""Guess the MIME type of a file.
|
||||
|
||||
This module defines one useful function:
|
||||
|
||||
guess_type(url) -- guess the MIME type and encoding of a URL.
|
||||
|
||||
It also contains the following, for tuning the behavior:
|
||||
|
||||
Data:
|
||||
|
||||
knownfiles -- list of files to parse
|
||||
inited -- flag set when init() has been called
|
||||
suffixes_map -- dictionary mapping suffixes to suffixes
|
||||
encodings_map -- dictionary mapping suffixes to encodings
|
||||
types_map -- dictionary mapping suffixes to types
|
||||
|
||||
Functions:
|
||||
|
||||
init([files]) -- parse a list of files, default knownfiles
|
||||
read_mime_types(file) -- parse one file, return a dictionary or None
|
||||
|
||||
"""
|
||||
|
||||
import string
|
||||
import posixpath
|
||||
|
||||
knownfiles = [
|
||||
"/usr/local/etc/httpd/conf/mime.types",
|
||||
"/usr/local/lib/netscape/mime.types",
|
||||
]
|
||||
|
||||
inited = 0
|
||||
|
||||
def guess_type(url):
|
||||
"""Guess the type of a file based on its URL.
|
||||
|
||||
Return value is a tuple (type, encoding) where type is None if the
|
||||
type can't be guessed (no or unknown suffix) or a string of the
|
||||
form type/subtype, usable for a MIME Content-type header; and
|
||||
encoding is None for no encoding or the name of the program used
|
||||
to encode (e.g. compress or gzip). The mappings are table
|
||||
driven. Encoding suffixes are case sensitive; type suffixes are
|
||||
first tried case sensitive, then case insensitive.
|
||||
|
||||
The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
|
||||
to ".tar.gz". (This is table-driven too, using the dictionary
|
||||
suffixes_map).
|
||||
|
||||
"""
|
||||
if not inited:
|
||||
init()
|
||||
base, ext = posixpath.splitext(url)
|
||||
while suffix_map.has_key(ext):
|
||||
base, ext = posixpath.splitext(base + suffix_map[ext])
|
||||
if encodings_map.has_key(ext):
|
||||
encoding = encodings_map[ext]
|
||||
base, ext = posixpath.splitext(base)
|
||||
else:
|
||||
encoding = None
|
||||
if types_map.has_key(ext):
|
||||
return types_map[ext], encoding
|
||||
elif types_map.has_key(string.lower(ext)):
|
||||
return types_map[string.lower(ext)], encoding
|
||||
else:
|
||||
return None, encoding
|
||||
|
||||
def init(files=None):
|
||||
global inited
|
||||
for file in files or knownfiles:
|
||||
s = read_mime_types(file)
|
||||
if s:
|
||||
for key, value in s.items():
|
||||
types_map[key] = value
|
||||
inited = 1
|
||||
|
||||
def read_mime_types(file):
|
||||
try:
|
||||
f = open(file)
|
||||
except IOError:
|
||||
return None
|
||||
map = {}
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if not line: break
|
||||
words = string.split(line)
|
||||
for i in range(len(words)):
|
||||
if words[i][0] == '#':
|
||||
del words[i:]
|
||||
break
|
||||
if not words: continue
|
||||
type, suffixes = words[0], words[1:]
|
||||
for suff in suffixes:
|
||||
map['.'+suff] = type
|
||||
f.close()
|
||||
return map
|
||||
|
||||
suffix_map = {
|
||||
'.tgz': '.tar.gz',
|
||||
'.taz': '.tar.gz',
|
||||
'.tz': '.tar.gz',
|
||||
}
|
||||
|
||||
encodings_map = {
|
||||
'.gz': 'gzip',
|
||||
'.Z': 'compress',
|
||||
}
|
||||
|
||||
types_map = {
|
||||
'.a': 'application/octet-stream',
|
||||
'.ai': 'application/postscript',
|
||||
'.aif': 'audio/x-aiff',
|
||||
'.aifc': 'audio/x-aiff',
|
||||
'.aiff': 'audio/x-aiff',
|
||||
'.au': 'audio/basic',
|
||||
'.avi': 'video/x-msvideo',
|
||||
'.bcpio': 'application/x-bcpio',
|
||||
'.bin': 'application/octet-stream',
|
||||
'.cdf': 'application/x-netcdf',
|
||||
'.cpio': 'application/x-cpio',
|
||||
'.csh': 'application/x-csh',
|
||||
'.dll': 'application/octet-stream',
|
||||
'.dvi': 'application/x-dvi',
|
||||
'.exe': 'application/octet-stream',
|
||||
'.eps': 'application/postscript',
|
||||
'.etx': 'text/x-setext',
|
||||
'.gif': 'image/gif',
|
||||
'.gtar': 'application/x-gtar',
|
||||
'.hdf': 'application/x-hdf',
|
||||
'.htm': 'text/html',
|
||||
'.html': 'text/html',
|
||||
'.ief': 'image/ief',
|
||||
'.jpe': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.jpg': 'image/jpeg',
|
||||
'.latex': 'application/x-latex',
|
||||
'.man': 'application/x-troff-man',
|
||||
'.me': 'application/x-troff-me',
|
||||
'.mif': 'application/x-mif',
|
||||
'.mov': 'video/quicktime',
|
||||
'.movie': 'video/x-sgi-movie',
|
||||
'.mpe': 'video/mpeg',
|
||||
'.mpeg': 'video/mpeg',
|
||||
'.mpg': 'video/mpeg',
|
||||
'.ms': 'application/x-troff-ms',
|
||||
'.nc': 'application/x-netcdf',
|
||||
'.o': 'application/octet-stream',
|
||||
'.obj': 'application/octet-stream',
|
||||
'.oda': 'application/oda',
|
||||
'.pbm': 'image/x-portable-bitmap',
|
||||
'.pdf': 'application/pdf',
|
||||
'.pgm': 'image/x-portable-graymap',
|
||||
'.pnm': 'image/x-portable-anymap',
|
||||
'.png': 'image/png',
|
||||
'.ppm': 'image/x-portable-pixmap',
|
||||
'.py': 'text/x-python',
|
||||
'.pyc': 'application/x-python-code',
|
||||
'.ps': 'application/postscript',
|
||||
'.qt': 'video/quicktime',
|
||||
'.ras': 'image/x-cmu-raster',
|
||||
'.rgb': 'image/x-rgb',
|
||||
'.roff': 'application/x-troff',
|
||||
'.rtf': 'application/rtf',
|
||||
'.rtx': 'text/richtext',
|
||||
'.sgm': 'text/x-sgml',
|
||||
'.sgml': 'text/x-sgml',
|
||||
'.sh': 'application/x-sh',
|
||||
'.shar': 'application/x-shar',
|
||||
'.snd': 'audio/basic',
|
||||
'.so': 'application/octet-stream',
|
||||
'.src': 'application/x-wais-source',
|
||||
'.sv4cpio': 'application/x-sv4cpio',
|
||||
'.sv4crc': 'application/x-sv4crc',
|
||||
'.t': 'application/x-troff',
|
||||
'.tar': 'application/x-tar',
|
||||
'.tcl': 'application/x-tcl',
|
||||
'.tex': 'application/x-tex',
|
||||
'.texi': 'application/x-texinfo',
|
||||
'.texinfo': 'application/x-texinfo',
|
||||
'.tif': 'image/tiff',
|
||||
'.tiff': 'image/tiff',
|
||||
'.tr': 'application/x-troff',
|
||||
'.tsv': 'text/tab-separated-values',
|
||||
'.txt': 'text/plain',
|
||||
'.ustar': 'application/x-ustar',
|
||||
'.wav': 'audio/x-wav',
|
||||
'.xbm': 'image/x-xbitmap',
|
||||
'.xpm': 'image/x-xpixmap',
|
||||
'.xwd': 'image/x-xwindowdump',
|
||||
'.zip': 'application/zip',
|
||||
}
|
Loading…
Reference in New Issue