From 33d8d705b88ca6fb227d75d9b7f5bf8efda1d0a7 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 30 Aug 2000 03:29:58 +0000 Subject: [PATCH] Finalize this module for Python 2.0 based on feedback and input from Martin von Loewis, Peter Funk, James Henstridge, Francois Pinard, and Marc-Andre Lemburg. --- Lib/gettext.py | 315 ++++++++++++++++++++++--------------------------- 1 file changed, 141 insertions(+), 174 deletions(-) diff --git a/Lib/gettext.py b/Lib/gettext.py index c216089de96..647fe56bf90 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -6,83 +6,7 @@ message catalog library. I18N refers to the operation by which a program is made aware of multiple languages. L10N refers to the adaptation of your program, once -internationalized, to the local language and cultural habits. In order to -provide multilingual messages for your Python programs, you need to take the -following steps: - - - prepare your program by specially marking translatable strings - - run a suite of tools over your marked program files to generate raw - messages catalogs - - create language specific translations of the message catalogs - - use this module so that message strings are properly translated - -In order to prepare your program for I18N, you need to look at all the strings -in your program. Any string that needs to be translated should be marked by -wrapping it in _('...') -- i.e. a call to the function `_'. For example: - - filename = 'mylog.txt' - message = _('writing a log message') - fp = open(filename, 'w') - fp.write(message) - fp.close() - -In this example, the string `writing a log message' is marked as a candidate -for translation, while the strings `mylog.txt' and `w' are not. - -The GNU gettext package provides a tool, called xgettext, that scans C and C++ -source code looking for these specially marked strings. xgettext generates -what are called `.pot' files, essentially structured human readable files -which contain every marked string in the source code. These .pot files are -copied and handed over to translators who write language-specific versions for -every supported language. - -For I18N Python programs however, xgettext won't work; it doesn't understand -the myriad of string types support by Python. The standard Python -distribution provides a tool called pygettext that does though (found in the -Tools/i18n directory). This is a command line script that supports a similar -interface as xgettext; see its documentation for details. Once you've used -pygettext to create your .pot files, you can use the standard GNU gettext -tools to generate your machine-readable .mo files, which are what's used by -this module. - -In the simple case, to use this module then, you need only add the following -bit of code to the main driver file of your application: - - import gettext - gettext.install() - -This sets everything up so that your _('...') function calls Just Work. In -other words, it installs `_' in the builtins namespace for convenience. You -can skip this step and do it manually by the equivalent code: - - import gettext - import __builtin__ - __builtin__['_'] = gettext.gettext - -Once you've done this, you probably want to call bindtextdomain() and -textdomain() to get the domain set up properly. Again, for convenience, you -can pass the domain and localedir to install to set everything up in one fell -swoop: - - import gettext - gettext.install('mydomain', '/my/locale/dir') - -If your program needs to support many languages at the same time, you will -want to create Translation objects explicitly, like so: - - import gettext - gettext.install() - - lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo')) - lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo')) - lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo')) - - gettext.set(lang1) - # all _() will now translate to language 1 - gettext.set(lang2) - # all _() will now translate to language 2 - -Currently, only GNU gettext format binary .mo files are supported. +internationalized, to the local language and cultural habits. """ @@ -104,21 +28,27 @@ Currently, only GNU gettext format binary .mo files are supported. # # Barry Warsaw integrated these modules, wrote the .install() API and code, # and conformed all C and Python code to Python's coding standards. +# +# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this +# module. +# +# TODO: +# - Lazy loading of .mo files. Currently the entire catalog is loaded into +# memory, but that's probably bad for large translated programs. Instead, +# the lexical sort of original strings in GNU .mo files should be exploited +# to do binary searches and lazy initializations. Or you might want to use +# the undocumented double-hash algorithm for .mo files with hash tables, but +# you'll need to study the GNU gettext code to do this. +# +# - Support Solaris .mo file formats. Unfortunately, we've been unable to +# find this format documented anywhere. import os import sys import struct -from UserDict import UserDict +from errno import ENOENT - - -# globals -_translations = {} -_current_translation = None -_current_domain = 'messages' - -# Domain to directory mapping, for use by bindtextdomain() -_localedirs = {} +_default_localedir = os.path.join(sys.prefix, 'share', 'locale') @@ -165,16 +95,37 @@ def _expand_lang(locale): -class GNUTranslations(UserDict): - # Magic number of .mo files - MAGIC = 0x950412de +class NullTranslations: + def __init__(self, fp=None): + self._info = {} + self._charset = None + if fp: + self._parse(fp) - def __init__(self, fp): - if fp is None: - d = {} - else: - d = self._parse(fp) - UserDict.__init__(self, d) + def _parse(self, fp): + pass + + def gettext(self, message): + return message + + def ugettext(self, message): + return unicode(message) + + def info(self): + return self._info + + def charset(self): + return self._charset + + def install(self, unicode=0): + import __builtin__ + __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext + + +class GNUTranslations(NullTranslations): + # Magic number of .mo files + LE_MAGIC = 0x950412de + BE_MAGIC = struct.unpack('>i', struct.pack(' self.MAGIC: + # Are we big endian or little endian? + magic = unpack('4i', buf[4:20]) + ii = '>ii' + else: raise IOError(0, 'Bad magic number', filename) # # Now put all messages from the .mo file buffer into the catalog # dictionary. for i in xrange(0, msgcount): - mstart = unpack('