diff --git a/Doc/lib/liblocale.tex b/Doc/lib/liblocale.tex index 3c4506d7821..415f0cf197f 100644 --- a/Doc/lib/liblocale.tex +++ b/Doc/lib/liblocale.tex @@ -155,6 +155,20 @@ for which symbolic constants are available in the locale module. \versionadded{2.0} \end{funcdesc} +\begin{funcdesc}{getpreferredencoding}{\optional{do_setlocale}} + Return the encoding used for text data, according to user + preferences. User preferences are expressed differently on + different systems, and might not be available programmatically on + some systems, so this function only returns a guess. + + On some systems, it is necessary to invoke \function{setlocale} + to obtain the user preferences, so this function is not thread-safe. + If invoking setlocale is not necessary or desired, \var{do_setlocale} + should be set to \code{False}. + + \versionadded{2.3} +\end{funcdesc} + \begin{funcdesc}{normalize}{localename} Returns a normalized locale code for the given locale name. The returned locale code is formatted for use with diff --git a/Lib/locale.py b/Lib/locale.py index 9078eed9164..9ae981f9a13 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -264,6 +264,15 @@ def _parse_localename(localename): """ code = normalize(localename) + if '@' in localename: + # Deal with locale modifiers + code, modifier = code.split('@') + if modifier == 'euro' and '.' not in code: + # Assume Latin-9 for @euro locales. This is bogus, + # since some systems may use other encodings for these + # locales. Also, we ignore other modifiers. + return code, 'iso-8859-15' + if '.' in code: return code.split('.')[:2] elif code == 'C': @@ -381,6 +390,38 @@ def resetlocale(category=LC_ALL): """ _setlocale(category, _build_localename(getdefaultlocale())) +if sys.platform in ('win32', 'darwin', 'mac'): + # On Win32, this will return the ANSI code page + # On the Mac, it should return the system encoding; + # it might return "ascii" instead + def getpreferredencoding(do_setlocale = True): + """Return the charset that the user is likely using.""" + import _locale + return _locale.getdefaultlocale()[1] +else: + # On Unix, if CODESET is available, use that. + try: + CODESET + except NameError: + # Fall back to parsing environment variables :-( + def getpreferredencoding(do_setlocale = True): + """Return the charset that the user is likely using, + by looking at environment variables.""" + return getdefaultlocale()[1] + else: + def getpreferredencoding(do_setlocale = True): + """Return the charset that the user is likely using, + according to the system configuration.""" + if do_setlocale: + oldloc = setlocale(LC_CTYPE) + setlocale(LC_CTYPE, "") + result = nl_langinfo(CODESET) + setlocale(LC_CTYPE, oldloc) + return result + else: + return nl_langinfo(CODESET) + + ### Database # # The following data was extracted from the locale.alias file which diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 2e056cfe103..46cbee2b53d 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -38,5 +38,7 @@ try: testformat("%20.f", -42, grouping=1, output=' -42') testformat("%+10.f", -4200, grouping=1, output=' -4,200') testformat("%-10.f", 4200, grouping=1, output='4,200 ') + # Invoke getpreferredencoding to make sure it does not cause exceptions, + locale.getpreferredencoding() finally: locale.setlocale(locale.LC_NUMERIC, oldlocale) diff --git a/Misc/NEWS b/Misc/NEWS index 3a026df54f2..71eacc84d4c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -325,7 +325,8 @@ Extension modules - posix.lchown, posix.killpg, posix.mknod, and posix.getpgid have been added where available. -- The locale module now exposes the C library's gettext interface. +- The locale module now exposes the C library's gettext interface. It + also has a new function getpreferredencoding. - A security hole ("double free") was found in zlib-1.1.3, a popular third party compression library used by some Python modules. The