Refactor so that it is easier to work with alternate MIME types databases,

and programmatically extend the database in different ways. This closes the SF bug (feature request) #439710.
2001-08-03 21:01:44 +00:00 · 2001-08-03 21:01:44 +00:00 · eeee4ec4f1
parent e861365dab
commit eeee4ec4f1
1 changed files with 138 additions and 67 deletions
--- a/Lib/mimetypes.py
+++ b/Lib/mimetypes.py
@ -12,7 +12,7 @@ Data:

 knownfiles -- list of files to parse
 inited -- flag set when init() has been called
-suffixes_map -- dictionary mapping suffixes to suffixes
+suffix_map -- dictionary mapping suffixes to suffixes
 encodings_map -- dictionary mapping suffixes to encodings
 types_map -- dictionary mapping suffixes to types

@ -23,6 +23,7 @@ read_mime_types(file) -- parse one file, return a dictionary or None

 """

+import os
 import posixpath
 import urllib

@ -37,24 +38,40 @@ knownfiles = [

 inited = 0

-def guess_type(url):
-    """Guess the type of a file based on its URL.

-    Return value is a tuple (type, encoding) where type is None if the
-    type can't be guessed (no or unknown suffix) or a string of the
-    form type/subtype, usable for a MIME Content-type header; and
-    encoding is None for no encoding or the name of the program used
-    to encode (e.g. compress or gzip).  The mappings are table
-    driven.  Encoding suffixes are case sensitive; type suffixes are
-    first tried case sensitive, then case insensitive.
-
-    The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
-    to ".tar.gz".  (This is table-driven too, using the dictionary
-    suffix_map).
+class MimeTypes:
+    """MIME-types datastore.

+    This datastore can handle information from mime.types-style files
+    and supports basic determination of MIME type from a filename or
+    URL, and can guess a reasonable extension given a MIME type.
    """
+
+    def __init__(self, filenames=()):
        if not inited:
            init()
+        self.encodings_map = encodings_map.copy()
+        self.suffix_map = suffix_map.copy()
+        self.types_map = types_map.copy()
+        for name in filenames:
+            self.read(name)
+
+    def guess_type(self, url):
+        """Guess the type of a file based on its URL.
+
+        Return value is a tuple (type, encoding) where type is None if
+        the type can't be guessed (no or unknown suffix) or a string
+        of the form type/subtype, usable for a MIME Content-type
+        header; and encoding is None for no encoding or the name of
+        the program used to encode (e.g. compress or gzip).  The
+        mappings are table driven.  Encoding suffixes are case
+        sensitive; type suffixes are first tried case sensitive, then
+        case insensitive.
+
+        The suffixes .tgz, .taz and .tz (case sensitive!) are all
+        mapped to '.tar.gz'.  (This is table-driven too, using the
+        dictionary suffix_map.)
+        """
        scheme, url = urllib.splittype(url)
        if scheme == 'data':
            # syntax of data URLs:
@ -76,13 +93,14 @@ def guess_type(url):
                type = 'text/plain'
            return type, None           # never compressed, so encoding is None
        base, ext = posixpath.splitext(url)
-    while suffix_map.has_key(ext):
-        base, ext = posixpath.splitext(base + suffix_map[ext])
-    if encodings_map.has_key(ext):
-        encoding = encodings_map[ext]
+        while self.suffix_map.has_key(ext):
+            base, ext = posixpath.splitext(base + self.suffix_map[ext])
+        if self.encodings_map.has_key(ext):
+            encoding = self.encodings_map[ext]
            base, ext = posixpath.splitext(base)
        else:
            encoding = None
+        types_map = self.types_map
        if types_map.has_key(ext):
            return types_map[ext], encoding
        elif types_map.has_key(ext.lower()):
@ -90,6 +108,66 @@ def guess_type(url):
        else:
            return None, encoding

+    def guess_extension(self, type):
+        """Guess the extension for a file based on its MIME type.
+
+        Return value is a string giving a filename extension,
+        including the leading dot ('.').  The extension is not
+        guaranteed to have been associated with any particular data
+        stream, but would be mapped to the MIME type `type' by
+        guess_type().  If no extension can be guessed for `type', None
+        is returned.
+        """
+        type = type.lower()
+        for ext, stype in self.types_map.items():
+            if type == stype:
+                return ext
+        return None
+
+    def read(self, filename):
+        """Read a single mime.types-format file, specified by pathname."""
+        fp = open(filename)
+        self.readfp(fp)
+        fp.close()
+
+    def readfp(self):
+        """Read a single mime.types-format file."""
+        map = self.types_map
+        while 1:
+            line = f.readline()
+            if not line:
+                break
+            words = line.split()
+            for i in range(len(words)):
+                if words[i][0] == '#':
+                    del words[i:]
+                    break
+            if not words:
+                continue
+            type, suffixes = words[0], words[1:]
+            for suff in suffixes:
+                map['.' + suff] = type
+
+
+def guess_type(url):
+    """Guess the type of a file based on its URL.
+
+    Return value is a tuple (type, encoding) where type is None if the
+    type can't be guessed (no or unknown suffix) or a string of the
+    form type/subtype, usable for a MIME Content-type header; and
+    encoding is None for no encoding or the name of the program used
+    to encode (e.g. compress or gzip).  The mappings are table
+    driven.  Encoding suffixes are case sensitive; type suffixes are
+    first tried case sensitive, then case insensitive.
+
+    The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
+    to ".tar.gz".  (This is table-driven too, using the dictionary
+    suffix_map).
+    """
+    init()
+    return guess_type(url)
+
+
 def guess_extension(type):
    """Guess the extension for a file based on its MIME type.

@ -99,44 +177,37 @@ def guess_extension(type):
    MIME type `type' by guess_type().  If no extension can be guessed for
    `type', None is returned.
    """
-    global inited
-    if not inited:
    init()
-    type = type.lower()
-    for ext, stype in types_map.items():
-        if type == stype:
-            return ext
-    return None
+    return guess_extension(type)
+

 def init(files=None):
+    global guess_extension, guess_type
+    global suffix_map, types_map, encodings_map
    global inited
-    for file in files or knownfiles:
-        s = read_mime_types(file)
-        if s:
-            for key, value in s.items():
-                types_map[key] = value
    inited = 1
+    db = MimeTypes()
+    if files is None:
+        files = knownfiles
+    for file in files:
+        if os.path.isfile(file):
+            db.readfp(open(file))
+    encodings_map = db.encodings_map
+    suffix_map = db.encodings_map
+    types_map = db.types_map
+    guess_extension = db.guess_extension
+    guess_type = db.guess_type
+

 def read_mime_types(file):
    try:
        f = open(file)
    except IOError:
        return None
-    map = {}
-    while 1:
-        line = f.readline()
-        if not line: break
-        words = line.split()
-        for i in range(len(words)):
-            if words[i][0] == '#':
-                del words[i:]
-                break
-        if not words: continue
-        type, suffixes = words[0], words[1:]
-        for suff in suffixes:
-            map['.'+suff] = type
-    f.close()
-    return map
+    db = MimeTypes()
+    db.readfp(f)
+    return db.types_map
+

 suffix_map = {
    '.tgz': '.tar.gz',