bpo-31900: Fix localeconv() encoding for LC_NUMERIC (#4174)

* Add _Py_GetLocaleconvNumeric() function: decode decimal_point and
  thousands_sep fields of localeconv() from the LC_NUMERIC encoding,
  rather than decoding from the LC_CTYPE encoding.
* Modify locale.localeconv() and "n" formatter of str.format() (for
  int, float and complex to use _Py_GetLocaleconvNumeric()
  internally.
This commit is contained in:
Victor Stinner 2018-01-15 15:58:02 +01:00 committed by GitHub
parent 7ed7aead95
commit cb064fc232
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 151 additions and 19 deletions

View File

@ -147,6 +147,16 @@ The :mod:`locale` module defines the following exception and functions:
| ``CHAR_MAX`` | Nothing is specified in this locale. | | ``CHAR_MAX`` | Nothing is specified in this locale. |
+--------------+-----------------------------------------+ +--------------+-----------------------------------------+
The function sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC``
locale to decode ``decimal_point`` and ``thousands_sep`` byte strings if
they are non-ASCII or longer than 1 byte, and the ``LC_NUMERIC`` locale is
different than the ``LC_CTYPE`` locale. This temporary change affects other
threads.
.. versionchanged:: 3.7
The function now sets temporarily the ``LC_CTYPE`` locale to the
``LC_NUMERIC`` locale in some cases.
.. function:: nl_langinfo(option) .. function:: nl_langinfo(option)

View File

@ -1599,6 +1599,20 @@ expression support in the :mod:`re` module).
See :ref:`formatstrings` for a description of the various formatting options See :ref:`formatstrings` for a description of the various formatting options
that can be specified in format strings. that can be specified in format strings.
.. note::
When formatting a number (:class:`int`, :class:`float`, :class:`float`
and subclasses) with the ``n`` type (ex: ``'{:n}'.format(1234)``), the
function sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC``
locale to decode ``decimal_point`` and ``thousands_sep`` fields of
:c:func:`localeconv` if they are non-ASCII or longer than 1 byte, and the
``LC_NUMERIC`` locale is different than the ``LC_CTYPE`` locale. This
temporary change affects other threads.
.. versionchanged:: 3.7
When formatting a number with the ``n`` type, the function sets
temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC`` locale in some
cases.
.. method:: str.format_map(mapping) .. method:: str.format_map(mapping)

View File

@ -866,6 +866,9 @@ Changes in Python behavior
Changes in the Python API Changes in the Python API
------------------------- -------------------------
* The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE``
locale to the ``LC_NUMERIC`` locale in some cases.
* The ``asyncio.windows_utils.socketpair()`` function has been * The ``asyncio.windows_utils.socketpair()`` function has been
removed: use directly :func:`socket.socketpair` which is available on all removed: use directly :func:`socket.socketpair` which is available on all
platforms since Python 3.5 (before, it wasn't available on Windows). platforms since Python 3.5 (before, it wasn't available on Windows).

View File

@ -160,6 +160,11 @@ PyAPI_FUNC(int) _Py_get_blocking(int fd);
PyAPI_FUNC(int) _Py_set_blocking(int fd, int blocking); PyAPI_FUNC(int) _Py_set_blocking(int fd, int blocking);
#endif /* !MS_WINDOWS */ #endif /* !MS_WINDOWS */
PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
PyObject **decimal_point,
PyObject **thousands_sep,
const char **grouping);
#endif /* Py_LIMITED_API */ #endif /* Py_LIMITED_API */
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -0,0 +1,9 @@
The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE``
locale to the ``LC_NUMERIC`` locale to decode ``decimal_point`` and
``thousands_sep`` byte strings if they are non-ASCII or longer than 1 byte, and
the ``LC_NUMERIC`` locale is different than the ``LC_CTYPE`` locale. This
temporary change affects other threads.
Same change for the :meth:`str.format` method when formatting a number
(:class:`int`, :class:`float`, :class:`float` and subclasses) with the ``n``
type (ex: ``'{:n}'.format(1234)``).

View File

@ -139,8 +139,9 @@ PyLocale_localeconv(PyObject* self)
PyObject *x; PyObject *x;
result = PyDict_New(); result = PyDict_New();
if (!result) if (!result) {
return NULL; return NULL;
}
/* if LC_NUMERIC is different in the C library, use saved value */ /* if LC_NUMERIC is different in the C library, use saved value */
l = localeconv(); l = localeconv();
@ -171,12 +172,6 @@ PyLocale_localeconv(PyObject* self)
RESULT(#i, x); \ RESULT(#i, x); \
} while (0) } while (0)
/* Numeric information */
RESULT_STRING(decimal_point);
RESULT_STRING(thousands_sep);
x = copy_grouping(l->grouping);
RESULT("grouping", x);
/* Monetary information */ /* Monetary information */
RESULT_STRING(int_curr_symbol); RESULT_STRING(int_curr_symbol);
RESULT_STRING(currency_symbol); RESULT_STRING(currency_symbol);
@ -195,10 +190,36 @@ PyLocale_localeconv(PyObject* self)
RESULT_INT(n_sep_by_space); RESULT_INT(n_sep_by_space);
RESULT_INT(p_sign_posn); RESULT_INT(p_sign_posn);
RESULT_INT(n_sign_posn); RESULT_INT(n_sign_posn);
/* Numeric information */
PyObject *decimal_point, *thousands_sep;
const char *grouping;
if (_Py_GetLocaleconvNumeric(&decimal_point,
&thousands_sep,
&grouping) < 0) {
goto failed;
}
if (PyDict_SetItemString(result, "decimal_point", decimal_point) < 0) {
Py_DECREF(decimal_point);
Py_DECREF(thousands_sep);
goto failed;
}
Py_DECREF(decimal_point);
if (PyDict_SetItemString(result, "thousands_sep", thousands_sep) < 0) {
Py_DECREF(thousands_sep);
goto failed;
}
Py_DECREF(thousands_sep);
x = copy_grouping(grouping);
RESULT("grouping", x);
return result; return result;
failed: failed:
Py_XDECREF(result); Py_DECREF(result);
return NULL; return NULL;
} }

View File

@ -1746,3 +1746,80 @@ error:
return -1; return -1;
} }
#endif #endif
int
_Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
const char **grouping)
{
int res = -1;
struct lconv *lc = localeconv();
int change_locale = 0;
if (decimal_point != NULL &&
(strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127))
{
change_locale = 1;
}
if (thousands_sep != NULL &&
(strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127))
{
change_locale = 1;
}
/* Keep a copy of the LC_CTYPE locale */
char *oldloc = NULL, *loc = NULL;
if (change_locale) {
oldloc = setlocale(LC_CTYPE, NULL);
if (!oldloc) {
PyErr_SetString(PyExc_RuntimeWarning, "faild to get LC_CTYPE locale");
return -1;
}
oldloc = _PyMem_Strdup(oldloc);
if (!oldloc) {
PyErr_NoMemory();
return -1;
}
loc = setlocale(LC_NUMERIC, NULL);
if (loc != NULL && strcmp(loc, oldloc) == 0) {
loc = NULL;
}
if (loc != NULL) {
/* Only set the locale temporarilty the LC_CTYPE locale
if LC_NUMERIC locale is different than LC_CTYPE locale and
decimal_point and/or thousands_sep are non-ASCII or longer than
1 byte */
setlocale(LC_CTYPE, loc);
}
}
if (decimal_point != NULL) {
*decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
if (*decimal_point == NULL) {
goto error;
}
}
if (thousands_sep != NULL) {
*thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
if (*thousands_sep == NULL) {
goto error;
}
}
if (grouping != NULL) {
*grouping = lc->grouping;
}
res = 0;
error:
if (loc != NULL) {
setlocale(LC_CTYPE, oldloc);
}
PyMem_Free(oldloc);
return res;
}

View File

@ -704,18 +704,11 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
{ {
switch (type) { switch (type) {
case LT_CURRENT_LOCALE: { case LT_CURRENT_LOCALE: {
struct lconv *locale_data = localeconv(); if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point,
locale_info->decimal_point = PyUnicode_DecodeLocale( &locale_info->thousands_sep,
locale_data->decimal_point, &locale_info->grouping) < 0) {
NULL);
if (locale_info->decimal_point == NULL)
return -1; return -1;
locale_info->thousands_sep = PyUnicode_DecodeLocale( }
locale_data->thousands_sep,
NULL);
if (locale_info->thousands_sep == NULL)
return -1;
locale_info->grouping = locale_data->grouping;
break; break;
} }
case LT_DEFAULT_LOCALE: case LT_DEFAULT_LOCALE: