Finalize this module for Python 2.0 based on feedback and input from

Martin von Loewis, Peter Funk, James Henstridge, Francois Pinard, and Marc-Andre Lemburg.
2000-08-30 03:29:58 +00:00 · 2000-08-30 03:29:58 +00:00 · 33d8d705b8
parent 28b815f28a
commit 33d8d705b8
1 changed files with 141 additions and 174 deletions
--- a/Lib/gettext.py
+++ b/Lib/gettext.py
@ -6,83 +6,7 @@ message catalog library.
 I18N refers to the operation by which a program is made aware of multiple
 languages.  L10N refers to the adaptation of your program, once
-internationalized, to the local language and cultural habits.  In order to
+internationalized, to the local language and cultural habits.
 provide multilingual messages for your Python programs, you need to take the
 following steps:
    - prepare your program by specially marking translatable strings
    - run a suite of tools over your marked program files to generate raw
      messages catalogs
    - create language specific translations of the message catalogs
    - use this module so that message strings are properly translated
 In order to prepare your program for I18N, you need to look at all the strings
 in your program.  Any string that needs to be translated should be marked by
 wrapping it in _('...') -- i.e. a call to the function `_'.  For example:
    filename = 'mylog.txt'
    message = _('writing a log message')
    fp = open(filename, 'w')
    fp.write(message)
    fp.close()
 In this example, the string `writing a log message' is marked as a candidate
 for translation, while the strings `mylog.txt' and `w' are not.
 The GNU gettext package provides a tool, called xgettext, that scans C and C++
 source code looking for these specially marked strings.  xgettext generates
 what are called `.pot' files, essentially structured human readable files
 which contain every marked string in the source code.  These .pot files are
 copied and handed over to translators who write language-specific versions for
 every supported language.
 For I18N Python programs however, xgettext won't work; it doesn't understand
 the myriad of string types support by Python.  The standard Python
 distribution provides a tool called pygettext that does though (found in the
 Tools/i18n directory).  This is a command line script that supports a similar
 interface as xgettext; see its documentation for details.  Once you've used
 pygettext to create your .pot files, you can use the standard GNU gettext
 tools to generate your machine-readable .mo files, which are what's used by
 this module.
 In the simple case, to use this module then, you need only add the following
 bit of code to the main driver file of your application:
    import gettext
    gettext.install()
 This sets everything up so that your _('...') function calls Just Work.  In
 other words, it installs `_' in the builtins namespace for convenience.  You
 can skip this step and do it manually by the equivalent code:
    import gettext
    import __builtin__
    __builtin__['_'] = gettext.gettext
 Once you've done this, you probably want to call bindtextdomain() and
 textdomain() to get the domain set up properly.  Again, for convenience, you
 can pass the domain and localedir to install to set everything up in one fell
 swoop:
    import gettext
    gettext.install('mydomain', '/my/locale/dir')
 If your program needs to support many languages at the same time, you will
 want to create Translation objects explicitly, like so:
    import gettext
    gettext.install()
    lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo'))
    lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo'))
    lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo'))
    gettext.set(lang1)
    # all _() will now translate to language 1
    gettext.set(lang2)
    # all _() will now translate to language 2
 Currently, only GNU gettext format binary .mo files are supported.
 """
@ -104,21 +28,27 @@ Currently, only GNU gettext format binary .mo files are supported.
 #
 # Barry Warsaw integrated these modules, wrote the .install() API and code,
 # and conformed all C and Python code to Python's coding standards.
 #
 # Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
 # module.
 #
 # TODO:
 # - Lazy loading of .mo files.  Currently the entire catalog is loaded into
 #   memory, but that's probably bad for large translated programs.  Instead,
 #   the lexical sort of original strings in GNU .mo files should be exploited
 #   to do binary searches and lazy initializations.  Or you might want to use
 #   the undocumented double-hash algorithm for .mo files with hash tables, but
 #   you'll need to study the GNU gettext code to do this.
 #
 # - Support Solaris .mo file formats.  Unfortunately, we've been unable to
 #   find this format documented anywhere.
 import os
 import sys
 import struct
-from UserDict import UserDict
+from errno import ENOENT
-
+_default_localedir = os.path.join(sys.prefix, 'share', 'locale')
 # globals
 _translations = {}
 _current_translation = None
 _current_domain = 'messages'
 # Domain to directory mapping, for use by bindtextdomain()
 _localedirs = {}
@ -165,16 +95,37 @@ def _expand_lang(locale):
-class GNUTranslations(UserDict):
+class NullTranslations:
-    # Magic number of .mo files
+    def __init__(self, fp=None):
-    MAGIC = 0x950412de
+        self._info = {}
        self._charset = None
        if fp:
            self._parse(fp)
-    def __init__(self, fp):
+    def _parse(self, fp):
-        if fp is None:
+        pass
-            d = {}
+
-        else:
+    def gettext(self, message):
-            d = self._parse(fp)
+        return message
-        UserDict.__init__(self, d)
+
    def ugettext(self, message):
        return unicode(message)
    def info(self):
        return self._info
    def charset(self):
        return self._charset
    def install(self, unicode=0):
        import __builtin__
        __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
 class GNUTranslations(NullTranslations):
    # Magic number of .mo files
    LE_MAGIC = 0x950412de
    BE_MAGIC = struct.unpack('>i', struct.pack('<i', LE_MAGIC))[0]
    def _parse(self, fp):
        """Override this method to support alternative .mo formats."""
@ -182,51 +133,62 @@ class GNUTranslations(UserDict):
        filename = getattr(fp, 'name', '')
        # Parse the .mo file header, which consists of 5 little endian 32
        # bit words.
-        catalog = {}
+        self._catalog = catalog = {}
        buf = fp.read()
-        magic, version, msgcount, masteridx, transidx = unpack(
+        # Are we big endian or little endian?
-            '<5i', buf[:20])
+        magic = unpack('<i', buf[:4])[0]
-        if magic <> self.MAGIC:
+        if magic == self.LE_MAGIC:
            version, msgcount, masteridx, transidx = unpack('<4i', buf[4:20])
            ii = '<ii'
        elif magic == self.BE_MAGIC:
            version, msgcount, masteridx, transidx = unpack('>4i', buf[4:20])
            ii = '>ii'
        else:
            raise IOError(0, 'Bad magic number', filename)
        #
        # Now put all messages from the .mo file buffer into the catalog
        # dictionary.
        for i in xrange(0, msgcount):
-            mstart = unpack('<i', buf[masteridx+4:masteridx+8])[0]
+            mlen, moff = unpack(ii, buf[masteridx:masteridx+8])
-            mend = mstart + unpack('<i', buf[masteridx:masteridx+4])[0]
+            mend = moff + mlen
-            tstart = unpack('<i', buf[transidx+4:transidx+8])[0]
+            tlen, toff = unpack(ii, buf[transidx:transidx+8])
-            tend = tstart + unpack('<i', buf[transidx:transidx+4])[0]
+            tend = toff + tlen
            if mend < len(buf) and tend < len(buf):
-                catalog[buf[mstart:mend]] = buf[tstart:tend]
+                tmsg = buf[toff:tend]
                catalog[buf[moff:mend]] = tmsg
            else:
                raise IOError(0, 'File is corrupt', filename)
-            #
+            # See if we're looking at GNU .mo conventions for metadata
            if mlen == 0 and tmsg.lower().startswith('project-id-version:'):
                # Catalog description
                for item in tmsg.split('\n'):
                    item = item.strip()
                    if not item:
                        continue
                    k, v = item.split(':', 1)
                    k = k.strip().lower()
                    v = v.strip()
                    self._info[k] = v
                    if k == 'content-type':
                        self._charset = v.split('charset=')[1]
            # advance to next entry in the seek tables
            masteridx += 8
            transidx += 8
-        return catalog
+
    def gettext(self, message):
        return self._catalog.get(message, message)
    def ugettext(self, message):
        tmsg = self._catalog.get(message, message)
        return unicode(tmsg, self._charset)
 # By default, use GNU gettext format .mo files
 Translations = GNUTranslations
 # Locate a .mo file using the gettext strategy
-def _find(localedir=None, languages=None, domain=None):
+def find(domain, localedir=None, languages=None):
    global _current_domain
    global _localedirs
    # Get some reasonable defaults for arguments that were not supplied
    if domain is None:
        domain = _current_domain
    if localedir is None:
-        localedir = _localedirs.get(
+        localedir = _default_localedir
            domain,
            # TBD: The default localedir is actually system dependent.  I
            # don't know of a good platform-consistent and portable way to
            # default it, so instead, we'll just use sys.prefix.  Most
            # programs should be calling bindtextdomain() or such explicitly
            # anyway.
            os.path.join(sys.prefix, 'share', 'locale'))
    if languages is None:
        languages = []
        for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
@ -247,72 +209,77 @@ def _find(localedir=None, languages=None, domain=None):
        if lang == 'C':
            break
        mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
-        # see if it's in the cache
+        if os.path.exists(mofile):
-        mo = _translations.get(mofile)
+            return mofile
-        if mo:
+    return None
            return mo
        fp = None
        try:
            try:
                fp = open(mofile, 'rb')
                t = Translations(fp)
                _translations[mofile] = t
                return t
            except IOError:
                pass
        finally:
            if fp:
                fp.close()
    return {}
-def bindtextdomain(domain=None, localedir=None):
+# a mapping between absolute .mo file path and Translation object
-    """Bind domain to a file in the specified directory."""
+_translations = {}
-    global _localedirs
+
-    if domain is None:
+def translation(domain, localedir=None, languages=None, class_=None):
-        return None
+    if class_ is None:
-    if localedir is None:
+        class_ = GNUTranslations
-        return _localedirs.get(domain, _localedirs.get('C'))
+    mofile = find(domain, localedir, languages)
-    _localedirs[domain] = localedir
+    if mofile is None:
-    return localedir
+        raise IOError(ENOENT, 'No translation file found for domain', domain)
    key = os.path.abspath(mofile)
    # TBD: do we need to worry about the file pointer getting collected?
    t = _translations.setdefault(key, class_(open(mofile, 'rb')))
    return t
 def install(domain, localedir=None, unicode=0):
    translation(domain, localedir).install(unicode)
 # a mapping b/w domains and locale directories
 _localedirs = {}
 # current global domain, `messages' used for compatibility w/ GNU gettext
 _current_domain = 'messages'
 def textdomain(domain=None):
    """Change or query the current global domain."""
    global _current_domain
-    if domain is None:
+    if domain is not None:
        return _current_domain
    else:
        _current_domain = domain
-        return domain
+    return _current_domain
-def gettext(message):
+def bindtextdomain(domain, localedir=None):
-    """Return localized version of a message."""
+    global _localedirs
-    return _find().get(message, message)
+    if localedir is not None:
        _localedirs[domain] = localedir
    return _localedirs.get(domain, _default_localedir)
 def dgettext(domain, message):
-    """Like gettext(), but look up message in specified domain."""
+    try:
-    return _find(domain=domain).get(message, message)
+        t = translation(domain, _localedirs.get(domain, None))
    except IOError:
        return message
    return t.gettext(message)
 def gettext(message):
    return dgettext(_current_domain, message)
-
+# dcgettext() has been deemed unnecessary and is not implemented.
 # A higher level API
 def set(translation):
    global _current_translation
    _current_translation = translation
 # James Henstridge's Catalog constructor from GNOME gettext.  Documented usage
 # was:
 #
 #    import gettext
 #    cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)
 #    _ = cat.gettext
 #    print _('Hello World')
-def get():
+# The resulting catalog object currently don't support access through a
-    global _current_translation
+# dictionary API, which was supported (but apparently unused) in GNOME
-    return _current_translation
+# gettext.
-
+Catalog = translation
 def install(domain=None, localedir=None):
    import __builtin__
    __builtin__.__dict__['_'] = gettext
    if domain is not None:
        bindtextdomain(domain, localedir)
        textdomain(domain)