This change implements the following gettext features, as

discussed recently in python-dev:

In _locale module:

- bind_textdomain_codeset() binding

In gettext module:

- bind_textdomain_codeset() function
- lgettext(), lngettext(), ldgettext(), ldngettext(),
  which return translated strings encoded in
  preferred system encoding, if
  bind_textdomain_codeset() was not used.
- Added equivalent functionality in translate()
  function and catalog classes.

Every change was also documented.
This commit is contained in:
Gustavo Niemeyer 2004-07-22 18:44:01 +00:00
parent 5980ff2d92
commit 7bd33c5e22
7 changed files with 256 additions and 29 deletions

View File

@ -51,6 +51,14 @@ for \var{domain} is returned.\footnote{
the start of your application.}
\end{funcdesc}
\begin{funcdesc}{bind_textdomain_codeset}{domain\optional{, codeset}}
Bind the \var{domain} to \var{codeset}, changing the encoding of
strings returned by the \function{gettext()} family of functions.
If \var{codeset} is omitted, then the current binding is returned.
\versionadded{2.4}
\end{funcdesc}
\begin{funcdesc}{textdomain}{\optional{domain}}
Change or query the current global domain. If \var{domain} is
\code{None}, then the current global domain is returned, otherwise the
@ -64,11 +72,27 @@ is usually aliased as \function{_} in the local namespace (see
examples below).
\end{funcdesc}
\begin{funcdesc}{lgettext}{message}
Equivalent to \function{gettext()}, but the translation is returned
in the preferred system encoding, if no other encoding was explicitly
set with \function{bind_textdomain_codeset()}.
\versionadded{2.4}
\end{funcdesc}
\begin{funcdesc}{dgettext}{domain, message}
Like \function{gettext()}, but look the message up in the specified
\var{domain}.
\end{funcdesc}
\begin{funcdesc}{ldgettext}{domain, message}
Equivalent to \function{dgettext()}, but the translation is returned
in the preferred system encoding, if no other encoding was explicitly
set with \function{bind_textdomain_codeset()}.
\versionadded{2.4}
\end{funcdesc}
\begin{funcdesc}{ngettext}{singular, plural, n}
Like \function{gettext()}, but consider plural forms. If a translation
@ -87,6 +111,14 @@ formulas for a variety of languages.
\end{funcdesc}
\begin{funcdesc}{lngettext}{singular, plural, n}
Equivalent to \function{ngettext()}, but the translation is returned
in the preferred system encoding, if no other encoding was explicitly
set with \function{bind_textdomain_codeset()}.
\versionadded{2.4}
\end{funcdesc}
\begin{funcdesc}{dngettext}{domain, singular, plural, n}
Like \function{ngettext()}, but look the message up in the specified
\var{domain}.
@ -94,6 +126,15 @@ Like \function{ngettext()}, but look the message up in the specified
\versionadded{2.3}
\end{funcdesc}
\begin{funcdesc}{ldngettext}{domain, singular, plural, n}
Equivalent to \function{dngettext()}, but the translation is returned
in the preferred system encoding, if no other encoding was explicitly
set with \function{bind_textdomain_codeset()}.
\versionadded{2.4}
\end{funcdesc}
Note that GNU \program{gettext} also defines a \function{dcgettext()}
method, but this was deemed not useful and so it is currently
@ -152,8 +193,8 @@ they appear in the languages list or the environment variables.
\end{funcdesc}
\begin{funcdesc}{translation}{domain\optional{, localedir\optional{,
languages\optional{,
class_,\optional{fallback}}}}}
languages\optional{, class_\optional{,
fallback\optional{, codeset}}}}}}
Return a \class{Translations} instance based on the \var{domain},
\var{localedir}, and \var{languages}, which are first passed to
\function{find()} to get a list of the
@ -161,7 +202,8 @@ associated \file{.mo} file paths. Instances with
identical \file{.mo} file names are cached. The actual class instantiated
is either \var{class_} if provided, otherwise
\class{GNUTranslations}. The class's constructor must take a single
file object argument.
file object argument. If provided, \var{codeset} will change the
charset used to encode translated strings.
If multiple files are found, later files are used as fallbacks for
earlier ones. To allow setting the fallback, \function{copy.copy}
@ -172,13 +214,17 @@ If no \file{.mo} file is found, this function raises
\exception{IOError} if \var{fallback} is false (which is the default),
and returns a \class{NullTranslations} instance if \var{fallback} is
true.
\versionchanged[Added the \var{codeset} parameter]{2.4}
\end{funcdesc}
\begin{funcdesc}{install}{domain\optional{, localedir\optional{, unicode}}}
\begin{funcdesc}{install}{domain\optional{, localedir\optional{, unicode
\optional{, codeset}}}}
This installs the function \function{_} in Python's builtin namespace,
based on \var{domain}, and \var{localedir} which are passed to the
function \function{translation()}. The \var{unicode} flag is passed to
the resulting translation object's \method{install} method.
based on \var{domain}, \var{localedir}, and \var{codeset} which are
passed to the function \function{translation()}. The \var{unicode}
flag is passed to the resulting translation object's \method{install}
method.
As seen below, you usually mark the strings in your application that are
candidates for translation, by wrapping them in a call to the
@ -191,6 +237,8 @@ print _('This string will be translated.')
For convenience, you want the \function{_()} function to be installed in
Python's builtin namespace, so it is easily accessible in all modules
of your application.
\versionchanged[Added the \var{codeset} parameter]{2.4}
\end{funcdesc}
\subsubsection{The \class{NullTranslations} class}
@ -223,25 +271,39 @@ provide a translation for a given message.
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{gettext}{message}
If a fallback has been set, forward \method{gettext} to the fallback.
If a fallback has been set, forward \method{gettext()} to the fallback.
Otherwise, return the translated message. Overridden in derived classes.
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{lgettext}{message}
If a fallback has been set, forward \method{lgettext()} to the fallback.
Otherwise, return the translated message. Overridden in derived classes.
\versionadded{2.4}
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{ugettext}{message}
If a fallback has been set, forward \method{ugettext} to the fallback.
If a fallback has been set, forward \method{ugettext()} to the fallback.
Otherwise, return the translated message as a Unicode string.
Overridden in derived classes.
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{ngettext}{singular, plural, n}
If a fallback has been set, forward \method{ngettext} to the fallback.
If a fallback has been set, forward \method{ngettext()} to the fallback.
Otherwise, return the translated message. Overridden in derived classes.
\versionadded{2.3}
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{lngettext}{singular, plural, n}
If a fallback has been set, forward \method{ngettext()} to the fallback.
Otherwise, return the translated message. Overridden in derived classes.
\versionadded{2.4}
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{ungettext}{singular, plural, n}
If a fallback has been set, forward \method{ungettext} to the fallback.
If a fallback has been set, forward \method{ungettext()} to the fallback.
Otherwise, return the translated message as a Unicode string.
Overridden in derived classes.
@ -256,6 +318,20 @@ Return the ``protected'' \member{_info} variable.
Return the ``protected'' \member{_charset} variable.
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{output_charset}{}
Return the ``protected'' \member{_output_charset} variable, which
defines the encoding used to return translated messages.
\versionadded{2.4}
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{set_output_charset}{charset}
Change the ``protected'' \member{_output_charset} variable, which
defines the encoding used to return translated messages.
\versionadded{2.4}
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{install}{\optional{unicode}}
If the \var{unicode} flag is false, this method installs
\method{self.gettext()} into the built-in namespace, binding it to
@ -323,6 +399,14 @@ look up is forwarded to the fallback's \method{gettext()} method.
Otherwise, the \var{message} id is returned.
\end{methoddesc}
\begin{methoddesc}[GNUTranslations]{lgettext}{message}
Equivalent to \method{gettext()}, but the translation is returned
in the preferred system encoding, if no other encoding was explicitly
set with \method{set_output_charset()}.
\versionadded{2.4}
\end{methoddesc}
\begin{methoddesc}[GNUTranslations]{ugettext}{message}
Look up the \var{message} id in the catalog and return the
corresponding message string, as a Unicode string. If there is no
@ -346,6 +430,14 @@ returned, and \var{plural} is returned in all other cases.
\versionadded{2.3}
\end{methoddesc}
\begin{methoddesc}[GNUTranslations]{lngettext}{singular, plural, n}
Equivalent to \method{gettext()}, but the translation is returned
in the preferred system encoding, if no other encoding was explicitly
set with \method{set_output_charset()}.
\versionadded{2.4}
\end{methoddesc}
\begin{methoddesc}[GNUTranslations]{ungettext}{singular, plural, n}
Do a plural-forms lookup of a message id. \var{singular} is used as
the message id for purposes of lookup in the catalog, while \var{n} is
@ -495,7 +587,7 @@ you would put at the top of your module:
\begin{verbatim}
import gettext
t = gettext.translation('spam', '/usr/share/locale')
_ = t.gettext
_ = t.lgettext
\end{verbatim}
If your translators were providing you with Unicode strings in their
@ -633,6 +725,21 @@ program to look for translatable strings marked with \function{N_()}.
\program{pygettext} and \program{xpot} both support this through the
use of command line switches.
\subsubsection{\function{gettext()} vs. \function{lgettext()}}
In Python 2.4 the \function{lgettext()} family of functions were
introduced. The intention of these functions is to provide an
alternative which is more compliant with the current
implementation of GNU gettext. Unlike \function{gettext()}, which
returns strings encoded with the same codeset used in the
translation file, \function{lgettext()} will return strings
encoded with the preferred system encoding, as returned by
\function{locale.getpreferredencoding()}. Also notice that
Python 2.4 introduces new functions to explicitly choose
the codeset used in translated strings. If a codeset is explicitly
set, even \function{lgettext()} will return translated strings in
the requested codeset, as would be expected in the GNU gettext
implementation.
\subsection{Acknowledgements}
The following people contributed code, feedback, design suggestions,
@ -647,4 +754,5 @@ this module:
\item Martin von L\"owis
\item Fran\c cois Pinard
\item Barry Warsaw
\item Gustavo Niemeyer
\end{itemize}

View File

@ -469,15 +469,16 @@ that the \module{_locale} module is not accessible as a shared library.
The locale module exposes the C library's gettext interface on systems
that provide this interface. It consists of the functions
\function{gettext()}, \function{dgettext()}, \function{dcgettext()},
\function{textdomain()}, and \function{bindtextdomain()}. These are
similar to the same functions in the \refmodule{gettext} module, but use
the C library's binary format for message catalogs, and the C
library's search algorithms for locating message catalogs.
\function{textdomain()}, \function{bindtextdomain()}, and
\function{bind_textdomain_codeset()}. These are similar to the same
functions in the \refmodule{gettext} module, but use the C library's
binary format for message catalogs, and the C library's search
algorithms for locating message catalogs.
Python applications should normally find no need to invoke these
functions, and should use \refmodule{gettext} instead. A known
exception to this rule are applications that link use additional C
libraries which internally invoke \cfunction{gettext()} or
\function{cdgettext()}. For these applications, it may be necessary to
\function{dcgettext()}. For these applications, it may be necessary to
bind the text domain, so that the libraries can properly locate their
message catalogs.

View File

@ -46,7 +46,7 @@ internationalized, to the local language and cultural habits.
# find this format documented anywhere.
import copy, os, re, struct, sys
import locale, copy, os, re, struct, sys
from errno import ENOENT
@ -171,6 +171,7 @@ class NullTranslations:
def __init__(self, fp=None):
self._info = {}
self._charset = None
self._output_charset = None
self._fallback = None
if fp is not None:
self._parse(fp)
@ -189,6 +190,11 @@ class NullTranslations:
return self._fallback.gettext(message)
return message
def lgettext(self, message):
if self._fallback:
return self._fallback.lgettext(message)
return message
def ngettext(self, msgid1, msgid2, n):
if self._fallback:
return self._fallback.ngettext(msgid1, msgid2, n)
@ -197,6 +203,14 @@ class NullTranslations:
else:
return msgid2
def lngettext(self, msgid1, msgid2, n):
if self._fallback:
return self._fallback.lngettext(msgid1, msgid2, n)
if n == 1:
return msgid1
else:
return msgid2
def ugettext(self, message):
if self._fallback:
return self._fallback.ugettext(message)
@ -216,6 +230,12 @@ class NullTranslations:
def charset(self):
return self._charset
def output_charset(self):
return self._output_charset
def set_output_charset(self, charset):
self._output_charset = charset
def install(self, unicode=False):
import __builtin__
__builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
@ -315,14 +335,29 @@ class GNUTranslations(NullTranslations):
return self._fallback.gettext(message)
return message
# Encode the Unicode tmsg back to an 8-bit string, if possible
if self._charset:
if self._output_charset:
return tmsg.encode(self._output_charset)
elif self._charset:
return tmsg.encode(self._charset)
return tmsg
def lgettext(self, message):
missing = object()
tmsg = self._catalog.get(message, missing)
if tmsg is missing:
if self._fallback:
return self._fallback.lgettext(message)
return message
if self._output_charset:
return tmsg.encode(self._output_charset)
return tmsg.encode(locale.getpreferredencoding())
def ngettext(self, msgid1, msgid2, n):
try:
tmsg = self._catalog[(msgid1, self.plural(n))]
if self._charset:
if self._output_charset:
return tmsg.encode(self._output_charset)
elif self._charset:
return tmsg.encode(self._charset)
return tmsg
except KeyError:
@ -333,6 +368,20 @@ class GNUTranslations(NullTranslations):
else:
return msgid2
def lngettext(self, msgid1, msgid2, n):
try:
tmsg = self._catalog[(msgid1, self.plural(n))]
if self._output_charset:
return tmsg.encode(self._output_charset)
return tmsg.encode(locale.getpreferredencoding())
except KeyError:
if self._fallback:
return self._fallback.lngettext(msgid1, msgid2, n)
if n == 1:
return msgid1
else:
return msgid2
def ugettext(self, message):
missing = object()
tmsg = self._catalog.get(message, missing)
@ -397,7 +446,7 @@ def find(domain, localedir=None, languages=None, all=0):
_translations = {}
def translation(domain, localedir=None, languages=None,
class_=None, fallback=False):
class_=None, fallback=False, codeset=None):
if class_ is None:
class_ = GNUTranslations
mofiles = find(domain, localedir, languages, all=1)
@ -414,9 +463,12 @@ def translation(domain, localedir=None, languages=None,
t = _translations.get(key)
if t is None:
t = _translations.setdefault(key, class_(open(mofile, 'rb')))
# Copy the translation object to allow setting fallbacks.
# All other instance data is shared with the cached object.
# Copy the translation object to allow setting fallbacks and
# output charset. All other instance data is shared with the
# cached object.
t = copy.copy(t)
if codeset:
t.set_output_charset(codeset)
if result is None:
result = t
else:
@ -424,13 +476,16 @@ def translation(domain, localedir=None, languages=None,
return result
def install(domain, localedir=None, unicode=False):
translation(domain, localedir, fallback=True).install(unicode)
def install(domain, localedir=None, unicode=False, codeset=None):
t = translation(domain, localedir, fallback=True, codeset=codeset)
t.install(unicode)
# a mapping b/w domains and locale directories
_localedirs = {}
# a mapping b/w domains and codesets
_localecodesets = {}
# current global domain, `messages' used for compatibility w/ GNU gettext
_current_domain = 'messages'
@ -449,17 +504,33 @@ def bindtextdomain(domain, localedir=None):
return _localedirs.get(domain, _default_localedir)
def bind_textdomain_codeset(domain, codeset=None):
global _localecodesets
if codeset is not None:
_localecodesets[domain] = codeset
return _localecodesets.get(domain)
def dgettext(domain, message):
try:
t = translation(domain, _localedirs.get(domain, None))
t = translation(domain, _localedirs.get(domain, None),
codeset=_localecodesets.get(domain))
except IOError:
return message
return t.gettext(message)
def ldgettext(domain, message):
try:
t = translation(domain, _localedirs.get(domain, None),
codeset=_localecodesets.get(domain))
except IOError:
return message
return t.lgettext(message)
def dngettext(domain, msgid1, msgid2, n):
try:
t = translation(domain, _localedirs.get(domain, None))
t = translation(domain, _localedirs.get(domain, None),
codeset=_localecodesets.get(domain))
except IOError:
if n == 1:
return msgid1
@ -467,14 +538,28 @@ def dngettext(domain, msgid1, msgid2, n):
return msgid2
return t.ngettext(msgid1, msgid2, n)
def ldngettext(domain, msgid1, msgid2, n):
try:
t = translation(domain, _localedirs.get(domain, None),
codeset=_localecodesets.get(domain))
except IOError:
if n == 1:
return msgid1
else:
return msgid2
return t.lngettext(msgid1, msgid2, n)
def gettext(message):
return dgettext(_current_domain, message)
def lgettext(message):
return ldgettext(_current_domain, message)
def ngettext(msgid1, msgid2, n):
return dngettext(_current_domain, msgid1, msgid2, n)
def lngettext(msgid1, msgid2, n):
return ldngettext(_current_domain, msgid1, msgid2, n)
# dcgettext() has been deemed unnecessary and is not implemented.

View File

@ -33,6 +33,8 @@ Core and builtins
will cause a TypeError to be raised. This matches the behavior of
Jython.
- Implemented bind_textdomain_codeset() in locale module.
Extension modules
-----------------
@ -112,6 +114,12 @@ Library
- Bug #990307: when keep_empty_values is True, cgi.parse_qsl()
no longer returns spurious empty fields.
- Implemented bind_textdomain_codeset() in gettext module.
- Introduced in gettext module the l*gettext() family of functions,
which return translation strings encoded in the preferred encoding,
as informed by locale module's getpreferredencoding().
Tools/Demos
-----------

View File

@ -649,6 +649,24 @@ PyIntl_bindtextdomain(PyObject* self,PyObject*args)
return PyString_FromString(dirname);
}
#ifdef HAVE_BIND_TEXTDOMAIN_CODESET
PyDoc_STRVAR(bind_textdomain_codeset__doc__,
"bind_textdomain_codeset(domain, codeset) -> string\n"
"Bind the C library's domain to codeset.");
static PyObject*
PyIntl_bind_textdomain_codeset(PyObject* self,PyObject*args)
{
char *domain,*codeset;
if (!PyArg_ParseTuple(args, "sz", &domain, &codeset))
return NULL;
codeset = bind_textdomain_codeset(domain, codeset);
if (codeset)
return PyString_FromString(codeset);
Py_RETURN_NONE;
}
#endif
#endif
static struct PyMethodDef PyLocale_Methods[] = {
@ -678,6 +696,10 @@ static struct PyMethodDef PyLocale_Methods[] = {
textdomain__doc__},
{"bindtextdomain",(PyCFunction)PyIntl_bindtextdomain,METH_VARARGS,
bindtextdomain__doc__},
#ifdef HAVE_BIND_TEXTDOMAIN_CODESET
{"bind_textdomain_codeset",(PyCFunction)PyIntl_bind_textdomain_codeset,
METH_VARARGS, bind_textdomain_codeset__doc__},
#endif
#endif
{NULL, NULL}
};

View File

@ -2044,8 +2044,8 @@ fi
AC_MSG_RESULT(MACHDEP_OBJS)
# checks for library functions
AC_CHECK_FUNCS(alarm chown clock confstr ctermid execv \
fork fpathconf ftime ftruncate \
AC_CHECK_FUNCS(alarm bind_textdomain_codeset chown clock confstr ctermid \
execv fork fpathconf ftime ftruncate \
gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \
getpriority getpwent getsid getwd \
kill killpg lchown lstat mkfifo mknod mktime \

View File

@ -37,6 +37,9 @@
/* Define this if your time.h defines altzone. */
#undef HAVE_ALTZONE
/* Define to 1 if you have the `bind_textdomain_codeset' function. */
#undef HAVE_BIND_TEXTDOMAIN_CODESET
/* Define to 1 if you have the <bluetooth/bluetooth.h> header file. */
#undef HAVE_BLUETOOTH_BLUETOOTH_H