diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 0246f991570..04035b33d0e 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -314,6 +314,15 @@ The :mod:`locale` module defines the following exception and functions: Get a representation of up to 100 values used to represent the values 0 to 99. + The function temporarily sets the ``LC_CTYPE`` locale to the locale + of the category that determines the requested value (``LC_TIME``, + ``LC_NUMERIC``, ``LC_MONETARY`` or ``LC_MESSAGES``) if locales are + different and the resulting string is non-ASCII. + This temporary change affects other threads. + + .. versionchanged:: 3.14 + The function now temporarily sets the ``LC_CTYPE`` locale in some cases. + .. function:: getdefaultlocale([envvars]) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index f1f78ed843f..4d71a24e9cc 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -587,6 +587,12 @@ Changes in the Python API Wrap it in :func:`staticmethod` if you want to preserve the old behavior. (Contributed by Serhiy Storchaka and Dominykas Grigonis in :gh:`121027`.) +* The :func:`locale.nl_langinfo` function now sets temporarily the ``LC_CTYPE`` + locale in some cases. + This temporary change affects other threads. + (Contributed by Serhiy Storchaka in :gh:`69998`.) + + Build Changes ============= diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index 0947464bb8c..ba2d31f9c1e 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -115,16 +115,17 @@ class _LocaleTests(unittest.TestCase): def test_lc_numeric_nl_langinfo(self): # Test nl_langinfo against known values tested = False + oldloc = setlocale(LC_CTYPE) for loc in candidate_locales: try: setlocale(LC_NUMERIC, loc) - setlocale(LC_CTYPE, loc) except Error: continue for li, lc in ((RADIXCHAR, "decimal_point"), (THOUSEP, "thousands_sep")): if self.numeric_tester('nl_langinfo', nl_langinfo(li), lc, loc): tested = True + self.assertEqual(setlocale(LC_CTYPE), oldloc) if not tested: self.skipTest('no suitable locales') @@ -135,10 +136,10 @@ class _LocaleTests(unittest.TestCase): def test_lc_numeric_localeconv(self): # Test localeconv against known values tested = False + oldloc = setlocale(LC_CTYPE) for loc in candidate_locales: try: setlocale(LC_NUMERIC, loc) - setlocale(LC_CTYPE, loc) except Error: continue formatting = localeconv() @@ -146,6 +147,7 @@ class _LocaleTests(unittest.TestCase): "thousands_sep"): if self.numeric_tester('localeconv', formatting[lc], lc, loc): tested = True + self.assertEqual(setlocale(LC_CTYPE), oldloc) if not tested: self.skipTest('no suitable locales') @@ -153,10 +155,10 @@ class _LocaleTests(unittest.TestCase): def test_lc_numeric_basic(self): # Test nl_langinfo against localeconv tested = False + oldloc = setlocale(LC_CTYPE) for loc in candidate_locales: try: setlocale(LC_NUMERIC, loc) - setlocale(LC_CTYPE, loc) except Error: continue for li, lc in ((RADIXCHAR, "decimal_point"), @@ -173,6 +175,7 @@ class _LocaleTests(unittest.TestCase): nl_radixchar, li_radixchar, loc, set_locale)) tested = True + self.assertEqual(setlocale(LC_CTYPE), oldloc) if not tested: self.skipTest('no suitable locales') @@ -180,10 +183,10 @@ class _LocaleTests(unittest.TestCase): # Bug #1391872: Test whether float parsing is okay on European # locales. tested = False + oldloc = setlocale(LC_CTYPE) for loc in candidate_locales: try: setlocale(LC_NUMERIC, loc) - setlocale(LC_CTYPE, loc) except Error: continue @@ -199,6 +202,7 @@ class _LocaleTests(unittest.TestCase): self.assertRaises(ValueError, float, localeconv()['decimal_point'].join(['1', '23'])) tested = True + self.assertEqual(setlocale(LC_CTYPE), oldloc) if not tested: self.skipTest('no suitable locales') diff --git a/Misc/NEWS.d/next/Library/2024-10-04-12-43-03.gh-issue-69998.DVqOXX.rst b/Misc/NEWS.d/next/Library/2024-10-04-12-43-03.gh-issue-69998.DVqOXX.rst new file mode 100644 index 00000000000..65388e0b4e7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-04-12-43-03.gh-issue-69998.DVqOXX.rst @@ -0,0 +1,3 @@ +Fix :func:`locale.nl_langinfo` in case when different categories have +different locales. The function now sets temporarily the ``LC_CTYPE`` locale +in some cases. This temporary change affects other threads. diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 9452df492bb..ce77c4072a6 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -144,6 +144,17 @@ locale_is_ascii(const char *str) return (strlen(str) == 1 && ((unsigned char)str[0]) <= 127); } +static int +is_all_ascii(const char *str) +{ + for (; *str; str++) { + if ((unsigned char)*str > 127) { + return 0; + } + } + return 1; +} + static int locale_decode_monetary(PyObject *dict, struct lconv *lc) { @@ -478,113 +489,153 @@ _locale__getdefaultlocale_impl(PyObject *module) #endif #ifdef HAVE_LANGINFO_H -#define LANGINFO(X) {#X, X} +#define LANGINFO(X, Y) {#X, X, Y} static struct langinfo_constant{ - char* name; + const char *name; int value; + int category; } langinfo_constants[] = { /* These constants should exist on any langinfo implementation */ - LANGINFO(DAY_1), - LANGINFO(DAY_2), - LANGINFO(DAY_3), - LANGINFO(DAY_4), - LANGINFO(DAY_5), - LANGINFO(DAY_6), - LANGINFO(DAY_7), + LANGINFO(DAY_1, LC_TIME), + LANGINFO(DAY_2, LC_TIME), + LANGINFO(DAY_3, LC_TIME), + LANGINFO(DAY_4, LC_TIME), + LANGINFO(DAY_5, LC_TIME), + LANGINFO(DAY_6, LC_TIME), + LANGINFO(DAY_7, LC_TIME), - LANGINFO(ABDAY_1), - LANGINFO(ABDAY_2), - LANGINFO(ABDAY_3), - LANGINFO(ABDAY_4), - LANGINFO(ABDAY_5), - LANGINFO(ABDAY_6), - LANGINFO(ABDAY_7), + LANGINFO(ABDAY_1, LC_TIME), + LANGINFO(ABDAY_2, LC_TIME), + LANGINFO(ABDAY_3, LC_TIME), + LANGINFO(ABDAY_4, LC_TIME), + LANGINFO(ABDAY_5, LC_TIME), + LANGINFO(ABDAY_6, LC_TIME), + LANGINFO(ABDAY_7, LC_TIME), - LANGINFO(MON_1), - LANGINFO(MON_2), - LANGINFO(MON_3), - LANGINFO(MON_4), - LANGINFO(MON_5), - LANGINFO(MON_6), - LANGINFO(MON_7), - LANGINFO(MON_8), - LANGINFO(MON_9), - LANGINFO(MON_10), - LANGINFO(MON_11), - LANGINFO(MON_12), + LANGINFO(MON_1, LC_TIME), + LANGINFO(MON_2, LC_TIME), + LANGINFO(MON_3, LC_TIME), + LANGINFO(MON_4, LC_TIME), + LANGINFO(MON_5, LC_TIME), + LANGINFO(MON_6, LC_TIME), + LANGINFO(MON_7, LC_TIME), + LANGINFO(MON_8, LC_TIME), + LANGINFO(MON_9, LC_TIME), + LANGINFO(MON_10, LC_TIME), + LANGINFO(MON_11, LC_TIME), + LANGINFO(MON_12, LC_TIME), - LANGINFO(ABMON_1), - LANGINFO(ABMON_2), - LANGINFO(ABMON_3), - LANGINFO(ABMON_4), - LANGINFO(ABMON_5), - LANGINFO(ABMON_6), - LANGINFO(ABMON_7), - LANGINFO(ABMON_8), - LANGINFO(ABMON_9), - LANGINFO(ABMON_10), - LANGINFO(ABMON_11), - LANGINFO(ABMON_12), + LANGINFO(ABMON_1, LC_TIME), + LANGINFO(ABMON_2, LC_TIME), + LANGINFO(ABMON_3, LC_TIME), + LANGINFO(ABMON_4, LC_TIME), + LANGINFO(ABMON_5, LC_TIME), + LANGINFO(ABMON_6, LC_TIME), + LANGINFO(ABMON_7, LC_TIME), + LANGINFO(ABMON_8, LC_TIME), + LANGINFO(ABMON_9, LC_TIME), + LANGINFO(ABMON_10, LC_TIME), + LANGINFO(ABMON_11, LC_TIME), + LANGINFO(ABMON_12, LC_TIME), #ifdef RADIXCHAR /* The following are not available with glibc 2.0 */ - LANGINFO(RADIXCHAR), - LANGINFO(THOUSEP), + LANGINFO(RADIXCHAR, LC_NUMERIC), + LANGINFO(THOUSEP, LC_NUMERIC), /* YESSTR and NOSTR are deprecated in glibc, since they are a special case of message translation, which should be rather done using gettext. So we don't expose it to Python in the first place. - LANGINFO(YESSTR), - LANGINFO(NOSTR), + LANGINFO(YESSTR, LC_MESSAGES), + LANGINFO(NOSTR, LC_MESSAGES), */ - LANGINFO(CRNCYSTR), + LANGINFO(CRNCYSTR, LC_MONETARY), #endif - LANGINFO(D_T_FMT), - LANGINFO(D_FMT), - LANGINFO(T_FMT), - LANGINFO(AM_STR), - LANGINFO(PM_STR), + LANGINFO(D_T_FMT, LC_TIME), + LANGINFO(D_FMT, LC_TIME), + LANGINFO(T_FMT, LC_TIME), + LANGINFO(AM_STR, LC_TIME), + LANGINFO(PM_STR, LC_TIME), /* The following constants are available only with XPG4, but... OpenBSD doesn't have CODESET but has T_FMT_AMPM, and doesn't have a few of the others. Solution: ifdef-test them all. */ #ifdef CODESET - LANGINFO(CODESET), + LANGINFO(CODESET, LC_CTYPE), #endif #ifdef T_FMT_AMPM - LANGINFO(T_FMT_AMPM), + LANGINFO(T_FMT_AMPM, LC_TIME), #endif #ifdef ERA - LANGINFO(ERA), + LANGINFO(ERA, LC_TIME), #endif #ifdef ERA_D_FMT - LANGINFO(ERA_D_FMT), + LANGINFO(ERA_D_FMT, LC_TIME), #endif #ifdef ERA_D_T_FMT - LANGINFO(ERA_D_T_FMT), + LANGINFO(ERA_D_T_FMT, LC_TIME), #endif #ifdef ERA_T_FMT - LANGINFO(ERA_T_FMT), + LANGINFO(ERA_T_FMT, LC_TIME), #endif #ifdef ALT_DIGITS - LANGINFO(ALT_DIGITS), + LANGINFO(ALT_DIGITS, LC_TIME), #endif #ifdef YESEXPR - LANGINFO(YESEXPR), + LANGINFO(YESEXPR, LC_MESSAGES), #endif #ifdef NOEXPR - LANGINFO(NOEXPR), + LANGINFO(NOEXPR, LC_MESSAGES), #endif #ifdef _DATE_FMT /* This is not available in all glibc versions that have CODESET. */ - LANGINFO(_DATE_FMT), + LANGINFO(_DATE_FMT, LC_TIME), #endif - {0, 0} + {0, 0, 0} }; +/* Temporary make the LC_CTYPE locale to be the same as + * the locale of the specified category. */ +static int +change_locale(int category, char **oldloc) +{ + /* Keep a copy of the LC_CTYPE locale */ + *oldloc = setlocale(LC_CTYPE, NULL); + if (!*oldloc) { + PyErr_SetString(PyExc_RuntimeError, "faild to get LC_CTYPE locale"); + return -1; + } + *oldloc = _PyMem_Strdup(*oldloc); + if (!*oldloc) { + PyErr_NoMemory(); + return -1; + } + + /* Set a new locale if it is different. */ + char *loc = setlocale(category, NULL); + if (loc == NULL || strcmp(loc, *oldloc) == 0) { + PyMem_Free(*oldloc); + *oldloc = NULL; + return 0; + } + + setlocale(LC_CTYPE, loc); + return 1; +} + +/* Restore the old LC_CTYPE locale. */ +static void +restore_locale(char *oldloc) +{ + if (oldloc != NULL) { + setlocale(LC_CTYPE, oldloc); + PyMem_Free(oldloc); + } +} + /*[clinic input] _locale.nl_langinfo @@ -602,14 +653,24 @@ _locale_nl_langinfo_impl(PyObject *module, int item) /* Check whether this is a supported constant. GNU libc sometimes returns numeric values in the char* return value, which would crash PyUnicode_FromString. */ - for (i = 0; langinfo_constants[i].name; i++) + for (i = 0; langinfo_constants[i].name; i++) { if (langinfo_constants[i].value == item) { /* Check NULL as a workaround for GNU libc's returning NULL instead of an empty string for nl_langinfo(ERA). */ const char *result = nl_langinfo(item); result = result != NULL ? result : ""; - return PyUnicode_DecodeLocale(result, NULL); + char *oldloc = NULL; + if (langinfo_constants[i].category != LC_CTYPE + && !is_all_ascii(result) + && change_locale(langinfo_constants[i].category, &oldloc) < 0) + { + return NULL; + } + PyObject *unicode = PyUnicode_DecodeLocale(result, NULL); + restore_locale(oldloc); + return unicode; } + } PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant"); return NULL; }