[3.13] gh-124969: Make locale.nl_langinfo(locale.ALT_DIGITS) returning a string again (GH-125774) (GH-125804)

This is a follow up of GH-124974. Only Glibc needed a fix.
Now the returned value is a string consisting of semicolon-separated
symbols on all Posix platforms.
(cherry picked from commit dcc4fb2c90)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Miss Islington (bot) 2024-10-21 21:30:23 +02:00 committed by GitHub
parent 829d650ccb
commit c1c3f5d19e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 52 additions and 30 deletions

View File

@ -158,8 +158,7 @@ The :mod:`locale` module defines the following exception and functions:
.. function:: nl_langinfo(option)
Return some locale-specific information as a string (or a tuple for
``ALT_DIGITS``). This function is not
Return some locale-specific information as a string. This function is not
available on all systems, and the set of possible options might also vary
across platforms. The possible argument values are numbers, for which
symbolic constants are available in the locale module.
@ -312,7 +311,9 @@ The :mod:`locale` module defines the following exception and functions:
.. data:: ALT_DIGITS
Get a tuple of up to 100 strings used to represent the values 0 to 99.
Get a string consisting of up to 100 semicolon-separated symbols used
to represent the values 0 to 99 in a locale-specific way.
In most locales this is an empty string.
.. function:: getdefaultlocale([envvars])

View File

@ -26,7 +26,10 @@ candidate_locales = ['es_UY', 'fr_FR', 'fi_FI', 'es_CO', 'pt_PT', 'it_IT',
'bs_BA', 'fr_LU', 'kl_GL', 'fa_IR', 'de_BE', 'sv_SE', 'it_CH', 'uk_UA',
'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', 'ps_AF', 'en_US',
'fr_FR.ISO8859-1', 'fr_FR.UTF-8', 'fr_FR.ISO8859-15@euro',
'ru_RU.KOI8-R', 'ko_KR.eucKR']
'ru_RU.KOI8-R', 'ko_KR.eucKR',
'ja_JP.UTF-8', 'lzh_TW.UTF-8', 'my_MM.UTF-8', 'or_IN.UTF-8', 'shn_MM.UTF-8',
'ar_AE.UTF-8', 'bn_IN.UTF-8', 'mr_IN.UTF-8', 'th_TH.TIS620',
]
def setUpModule():
global candidate_locales
@ -78,11 +81,13 @@ known_alt_digits = {
'C': (0, {}),
'en_US': (0, {}),
'fa_IR': (100, {0: '\u06f0\u06f0', 10: '\u06f1\u06f0', 99: '\u06f9\u06f9'}),
'ja_JP': (100, {0: '\u3007', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}),
'ja_JP': (100, {1: '\u4e00', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}),
'lzh_TW': (32, {0: '\u3007', 10: '\u5341', 31: '\u5345\u4e00'}),
'my_MM': (100, {0: '\u1040\u1040', 10: '\u1041\u1040', 99: '\u1049\u1049'}),
'or_IN': (100, {0: '\u0b66', 10: '\u0b67\u0b66', 99: '\u0b6f\u0b6f'}),
'shn_MM': (100, {0: '\u1090\u1090', 10: '\u1091\u1090', 99: '\u1099\u1099'}),
'ar_AE': (100, {0: '\u0660', 10: '\u0661\u0660', 99: '\u0669\u0669'}),
'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}),
}
if sys.platform == 'win32':
@ -196,7 +201,7 @@ class _LocaleTests(unittest.TestCase):
def test_alt_digits_nl_langinfo(self):
# Test nl_langinfo(ALT_DIGITS)
tested = False
for loc, (count, samples) in known_alt_digits.items():
for loc in candidate_locales:
with self.subTest(locale=loc):
try:
setlocale(LC_TIME, loc)
@ -204,12 +209,19 @@ class _LocaleTests(unittest.TestCase):
except Error:
self.skipTest(f'no locale {loc!r}')
continue
with self.subTest(locale=loc):
alt_digits = nl_langinfo(locale.ALT_DIGITS)
self.assertIsInstance(alt_digits, tuple)
if count and not alt_digits and support.is_apple:
self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on Apple platforms')
self.assertEqual(len(alt_digits), count)
self.assertIsInstance(alt_digits, str)
alt_digits = alt_digits.split(';') if alt_digits else []
if alt_digits:
self.assertGreaterEqual(len(alt_digits), 10, alt_digits)
loc1 = loc.split('.', 1)[0]
if loc1 in known_alt_digits:
count, samples = known_alt_digits[loc1]
if count and not alt_digits:
self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on this platform')
self.assertEqual(len(alt_digits), count, alt_digits)
for i in samples:
self.assertEqual(alt_digits[i], samples[i])
tested = True

View File

@ -1,3 +1,4 @@
Fix ``locale.nl_langinfo(locale.ALT_DIGITS)``. Now it returns a tuple of up
to 100 strings (an empty tuple on most locales). Previously it returned the
first item of that tuple or an empty string.
Fix ``locale.nl_langinfo(locale.ALT_DIGITS)`` on platforms with glibc.
Now it returns a string consisting of up to 100 semicolon-separated symbols
(an empty string in most locales) on all Posix platforms.
Previously it only returned the first symbol or an empty string.

View File

@ -609,28 +609,36 @@ _locale_nl_langinfo_impl(PyObject *module, int item)
const char *result = nl_langinfo(item);
result = result != NULL ? result : "";
PyObject *pyresult;
#ifdef __GLIBC__
#ifdef ALT_DIGITS
if (item == ALT_DIGITS) {
/* The result is a sequence of up to 100 NUL-separated strings. */
const char *s = result;
if (item == ALT_DIGITS && *result) {
/* According to the POSIX specification the result must be
* a sequence of up to 100 semicolon-separated strings.
* But in Glibc they are NUL-separated. */
Py_ssize_t i = 0;
int count = 0;
for (; count < 100 && *s; count++) {
s += strlen(s) + 1;
for (; count < 100 && result[i]; count++) {
i += strlen(result + i) + 1;
}
pyresult = PyTuple_New(count);
if (pyresult != NULL) {
for (int i = 0; i < count; i++) {
PyObject *unicode = PyUnicode_DecodeLocale(result, NULL);
if (unicode == NULL) {
Py_CLEAR(pyresult);
break;
char *buf = PyMem_Malloc(i);
if (buf == NULL) {
PyErr_NoMemory();
pyresult = NULL;
}
PyTuple_SET_ITEM(pyresult, i, unicode);
result += strlen(result) + 1;
else {
memcpy(buf, result, i);
/* Replace all NULs with semicolons. */
i = 0;
while (--count) {
i += strlen(buf + i);
buf[i++] = ';';
}
pyresult = PyUnicode_DecodeLocale(buf, NULL);
PyMem_Free(buf);
}
}
else
#endif
#endif
{
pyresult = PyUnicode_DecodeLocale(result, NULL);