Patch #774665: Make Python LC_NUMERIC agnostic.

This commit is contained in:
Martin v. Löwis 2004-06-08 18:52:54 +00:00
parent 6ccc9a99df
commit 737ea82a5a
14 changed files with 317 additions and 103 deletions

View File

@ -456,25 +456,6 @@ find out what the current locale is. But since the return value can
only be used portably to restore it, that is not very useful (except
perhaps to find out whether or not the locale is \samp{C}).
When Python is embedded in an application, if the application sets the
locale to something specific before initializing Python, that is
generally okay, and Python will use whatever locale is set,
\emph{except} that the \constant{LC_NUMERIC} locale should always be
\samp{C}.
The \function{setlocale()} function in the \module{locale} module
gives the Python programmer the impression that you can manipulate the
\constant{LC_NUMERIC} locale setting, but this not the case at the C
level: C code will always find that the \constant{LC_NUMERIC} locale
setting is \samp{C}. This is because too much would break when the
decimal point character is set to something else than a period
(e.g. the Python parser would break). Caveat: threads that run
without holding Python's global interpreter lock may occasionally find
that the numeric locale setting differs; this is because the only
portable way to implement this feature is to set the numeric locale
settings to what the user requests, extract the relevant
characteristics, and then restore the \samp{C} numeric locale.
When Python code uses the \module{locale} module to change the locale,
this also affects the embedding application. If the embedding
application doesn't want this to happen, it should remove the

View File

@ -119,6 +119,8 @@
#include "compile.h"
#include "eval.h"
#include "pystrtod.h"
/* _Py_Mangle is defined in compile.c */
PyAPI_FUNC(int) _Py_Mangle(char *p, char *name, \
char *buffer, size_t maxlen);

18
Include/pystrtod.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef Py_STRTOD_H
#define Py_STRTOD_H
#ifdef __cplusplus
extern "C" {
#endif
double PyOS_ascii_strtod(const char *str, char **ptr);
double PyOS_ascii_atof(const char *str);
char * PyOS_ascii_formatd(char *buffer, int buf_len, const char *format, double d);
#ifdef __cplusplus
}
#endif
#endif /* !Py_STRTOD_H */

View File

@ -247,6 +247,7 @@ PYTHON_OBJS= \
Python/sysmodule.o \
Python/traceback.o \
Python/getopt.o \
Python/pystrtod.o \
Python/$(DYNLOADFILE) \
$(MACHDEP_OBJS) \
$(THREADOBJ)

View File

@ -12,6 +12,10 @@ What's New in Python 2.4 alpha 1?
Core and builtins
-----------------
- Python no longer relies on the LC_NUMERIC locale setting to be
the "C" locale; as a result, it no longer tries to prevent changing
the LC_NUMERIC category.
- Bug #952807: Unpickling pickled instances of subclasses of
datetime.date, datetime.datetime and datetime.time could yield insane
objects. Thanks to Jiwon Seo for a fix.

View File

@ -51,13 +51,6 @@ static PyObject *Error;
PyDoc_STRVAR(setlocale__doc__,
"(integer,string=None) -> string. Activates/queries locale processing.");
/* to record the LC_NUMERIC settings */
static PyObject* grouping = NULL;
static PyObject* thousands_sep = NULL;
static PyObject* decimal_point = NULL;
/* if non-null, indicates that LC_NUMERIC is different from "C" */
static char* saved_numeric = NULL;
/* the grouping is terminated by either 0 or CHAR_MAX */
static PyObject*
copy_grouping(char* s)
@ -167,7 +160,6 @@ PyLocale_setlocale(PyObject* self, PyObject* args)
int category;
char *locale = NULL, *result;
PyObject *result_object;
struct lconv *lc;
if (!PyArg_ParseTuple(args, "i|z:setlocale", &category, &locale))
return NULL;
@ -183,29 +175,6 @@ PyLocale_setlocale(PyObject* self, PyObject* args)
result_object = PyString_FromString(result);
if (!result_object)
return NULL;
/* record changes to LC_NUMERIC */
if (category == LC_NUMERIC || category == LC_ALL) {
if (strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0) {
/* user just asked for default numeric locale */
if (saved_numeric)
free(saved_numeric);
saved_numeric = NULL;
} else {
/* remember values */
lc = localeconv();
Py_XDECREF(grouping);
grouping = copy_grouping(lc->grouping);
Py_XDECREF(thousands_sep);
thousands_sep = PyString_FromString(lc->thousands_sep);
Py_XDECREF(decimal_point);
decimal_point = PyString_FromString(lc->decimal_point);
if (saved_numeric)
free(saved_numeric);
saved_numeric = strdup(locale);
/* restore to "C" */
setlocale(LC_NUMERIC, "C");
}
}
/* record changes to LC_CTYPE */
if (category == LC_CTYPE || category == LC_ALL)
fixup_ulcase();
@ -213,18 +182,12 @@ PyLocale_setlocale(PyObject* self, PyObject* args)
PyErr_Clear();
} else {
/* get locale */
/* restore LC_NUMERIC first, if appropriate */
if (saved_numeric)
setlocale(LC_NUMERIC, saved_numeric);
result = setlocale(category, NULL);
if (!result) {
PyErr_SetString(Error, "locale query failed");
return NULL;
}
result_object = PyString_FromString(result);
/* restore back to "C" */
if (saved_numeric)
setlocale(LC_NUMERIC, "C");
}
return result_object;
}
@ -262,20 +225,13 @@ PyLocale_localeconv(PyObject* self)
Py_XDECREF(x)
/* Numeric information */
if (saved_numeric){
/* cannot use localeconv results */
PyDict_SetItemString(result, "decimal_point", decimal_point);
PyDict_SetItemString(result, "grouping", grouping);
PyDict_SetItemString(result, "thousands_sep", thousands_sep);
} else {
RESULT_STRING(decimal_point);
RESULT_STRING(thousands_sep);
x = copy_grouping(l->grouping);
if (!x)
goto failed;
PyDict_SetItemString(result, "grouping", x);
Py_XDECREF(x);
}
RESULT_STRING(decimal_point);
RESULT_STRING(thousands_sep);
x = copy_grouping(l->grouping);
if (!x)
goto failed;
PyDict_SetItemString(result, "grouping", x);
Py_XDECREF(x);
/* Monetary information */
RESULT_STRING(int_curr_symbol);
@ -579,18 +535,6 @@ PyLocale_nl_langinfo(PyObject* self, PyObject* args)
/* Check whether this is a supported constant. GNU libc sometimes
returns numeric values in the char* return value, which would
crash PyString_FromString. */
#ifdef RADIXCHAR
if (saved_numeric) {
if(item == RADIXCHAR) {
Py_INCREF(decimal_point);
return decimal_point;
}
if(item == THOUSEP) {
Py_INCREF(thousands_sep);
return thousands_sep;
}
}
#endif
for (i = 0; langinfo_constants[i].name; i++)
if (langinfo_constants[i].value == item) {
/* Check NULL as a workaround for GNU libc's returning NULL

View File

@ -3319,7 +3319,7 @@ load_float(Unpicklerobject *self)
if (!( s=pystrndup(s,len))) return -1;
errno = 0;
d = strtod(s, &endptr);
d = PyOS_ascii_strtod(s, &endptr);
if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
PyErr_SetString(PyExc_ValueError,

View File

@ -838,7 +838,6 @@ PyDoc_STRVAR(atof__doc__,
static PyObject *
strop_atof(PyObject *self, PyObject *args)
{
extern double strtod(const char *, char **);
char *s, *end;
double x;
char buffer[256]; /* For errors */
@ -854,7 +853,7 @@ strop_atof(PyObject *self, PyObject *args)
}
errno = 0;
PyFPE_START_PROTECT("strop_atof", return 0)
x = strtod(s, &end);
x = PyOS_ascii_strtod(s, &end);
PyFPE_END_PROTECT(x)
while (*end && isspace(Py_CHARMASK(*end)))
end++;

View File

@ -272,13 +272,19 @@ complex_dealloc(PyObject *op)
static void
complex_to_buf(char *buf, int bufsz, PyComplexObject *v, int precision)
{
if (v->cval.real == 0.)
PyOS_snprintf(buf, bufsz, "%.*gj",
precision, v->cval.imag);
else
PyOS_snprintf(buf, bufsz, "(%.*g%+.*gj)",
precision, v->cval.real,
precision, v->cval.imag);
char format[32];
if (v->cval.real == 0.) {
PyOS_snprintf(format, 32, "%%.%ig", precision);
PyOS_ascii_formatd(buf, bufsz, format, v->cval.imag);
strncat(buf, "j", bufsz);
} else {
char re[64], im[64];
PyOS_snprintf(format, 32, "%%.%ig", precision);
PyOS_ascii_formatd(re, 64, format, v->cval.real);
PyOS_ascii_formatd(im, 64, format, v->cval.imag);
PyOS_snprintf(buf, bufsz, "(%s+%sj)", re, im);
}
}
static int
@ -662,7 +668,6 @@ static PyMemberDef complex_members[] = {
static PyObject *
complex_subtype_from_string(PyTypeObject *type, PyObject *v)
{
extern double strtod(const char *, char **);
const char *s, *start;
char *end;
double x=0.0, y=0.0, z;
@ -774,7 +779,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
}
errno = 0;
PyFPE_START_PROTECT("strtod", return 0)
z = strtod(s, &end) ;
z = PyOS_ascii_strtod(s, &end) ;
PyFPE_END_PROTECT(z)
if (errno != 0) {
PyOS_snprintf(buffer, sizeof(buffer),

View File

@ -132,7 +132,7 @@ PyFloat_FromString(PyObject *v, char **pend)
* key off errno.
*/
PyFPE_START_PROTECT("strtod", return NULL)
x = strtod(s, (char **)&end);
x = PyOS_ascii_strtod(s, (char **)&end);
PyFPE_END_PROTECT(x)
errno = 0;
/* Believe it or not, Solaris 2.6 can move end *beyond* the null
@ -164,7 +164,7 @@ PyFloat_FromString(PyObject *v, char **pend)
/* See above -- may have been strtod being anal
about denorms. */
PyFPE_START_PROTECT("atof", return NULL)
x = atof(s);
x = PyOS_ascii_atof(s);
PyFPE_END_PROTECT(x)
errno = 0; /* whether atof ever set errno is undefined */
}
@ -223,6 +223,7 @@ static void
format_float(char *buf, size_t buflen, PyFloatObject *v, int precision)
{
register char *cp;
char format[32];
/* Subroutine for float_repr and float_print.
We want float numbers to be recognizable as such,
i.e., they should contain a decimal point or an exponent.
@ -230,7 +231,8 @@ format_float(char *buf, size_t buflen, PyFloatObject *v, int precision)
in such cases, we append ".0" to the string. */
assert(PyFloat_Check(v));
PyOS_snprintf(buf, buflen, "%.*g", precision, v->ob_fval);
PyOS_snprintf(format, 32, "%%.%ig", precision);
PyOS_ascii_formatd(buf, buflen, format, v->ob_fval);
cp = buf;
if (*cp == '-')
cp++;

View File

@ -3582,7 +3582,7 @@ formatfloat(char *buf, size_t buflen, int flags,
PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
(flags&F_ALT) ? "#" : "",
prec, type);
PyOS_snprintf(buf, buflen, fmt, x);
PyOS_ascii_formatd(buf, buflen, fmt, x);
return strlen(buf);
}

View File

@ -1379,7 +1379,7 @@ parsenumber(struct compiling *c, char *s)
Py_complex z;
z.real = 0.;
PyFPE_START_PROTECT("atof", return 0)
z.imag = atof(s);
z.imag = PyOS_ascii_atof(s);
PyFPE_END_PROTECT(z)
return PyComplex_FromCComplex(z);
}
@ -1387,7 +1387,7 @@ parsenumber(struct compiling *c, char *s)
#endif
{
PyFPE_START_PROTECT("atof", return 0)
dx = atof(s);
dx = PyOS_ascii_atof(s);
PyFPE_END_PROTECT(dx)
return PyFloat_FromDouble(dx);
}

View File

@ -457,7 +457,7 @@ r_object(RFILE *p)
}
buf[n] = '\0';
PyFPE_START_PROTECT("atof", return 0)
dx = atof(buf);
dx = PyOS_ascii_atof(buf);
PyFPE_END_PROTECT(dx)
return PyFloat_FromDouble(dx);
}
@ -475,7 +475,7 @@ r_object(RFILE *p)
}
buf[n] = '\0';
PyFPE_START_PROTECT("atof", return 0)
c.real = atof(buf);
c.real = PyOS_ascii_atof(buf);
PyFPE_END_PROTECT(c)
n = r_byte(p);
if (n == EOF || r_string(buf, (int)n, p) != n) {
@ -485,7 +485,7 @@ r_object(RFILE *p)
}
buf[n] = '\0';
PyFPE_START_PROTECT("atof", return 0)
c.imag = atof(buf);
c.imag = PyOS_ascii_atof(buf);
PyFPE_END_PROTECT(c)
return PyComplex_FromCComplex(c);
}

258
Python/pystrtod.c Normal file
View File

@ -0,0 +1,258 @@
/* -*- Mode: C; c-file-style: "python" -*- */
#include <Python.h>
#include <locale.h>
/* ascii character tests (as opposed to locale tests) */
#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
(c) == '\r' || (c) == '\t' || (c) == '\v')
#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
#define ISXDIGIT(c) (ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
/**
* PyOS_ascii_strtod:
* @nptr: the string to convert to a numeric value.
* @endptr: if non-%NULL, it returns the character after
* the last character used in the conversion.
*
* Converts a string to a #gdouble value.
* This function behaves like the standard strtod() function
* does in the C locale. It does this without actually
* changing the current locale, since that would not be
* thread-safe.
*
* This function is typically used when reading configuration
* files or other non-user input that should be locale independent.
* To handle input from the user you should normally use the
* locale-sensitive system strtod() function.
*
* If the correct value would cause overflow, plus or minus %HUGE_VAL
* is returned (according to the sign of the value), and %ERANGE is
* stored in %errno. If the correct value would cause underflow,
* zero is returned and %ERANGE is stored in %errno.
*
* This function resets %errno before calling strtod() so that
* you can reliably detect overflow and underflow.
*
* Return value: the #gdouble value.
**/
double
PyOS_ascii_strtod(const char *nptr,
char **endptr)
{
char *fail_pos;
double val;
struct lconv *locale_data;
const char *decimal_point;
int decimal_point_len;
const char *p, *decimal_point_pos;
const char *end = NULL; /* Silence gcc */
/* g_return_val_if_fail (nptr != NULL, 0); */
assert(nptr != NULL);
fail_pos = NULL;
locale_data = localeconv();
decimal_point = locale_data->decimal_point;
decimal_point_len = strlen(decimal_point);
assert(decimal_point_len != 0);
decimal_point_pos = NULL;
if (decimal_point[0] != '.' ||
decimal_point[1] != 0)
{
p = nptr;
/* Skip leading space */
while (ISSPACE(*p))
p++;
/* Skip leading optional sign */
if (*p == '+' || *p == '-')
p++;
if (p[0] == '0' &&
(p[1] == 'x' || p[1] == 'X'))
{
p += 2;
/* HEX - find the (optional) decimal point */
while (ISXDIGIT(*p))
p++;
if (*p == '.')
{
decimal_point_pos = p++;
while (ISXDIGIT(*p))
p++;
if (*p == 'p' || *p == 'P')
p++;
if (*p == '+' || *p == '-')
p++;
while (ISDIGIT(*p))
p++;
end = p;
}
}
else
{
while (ISDIGIT(*p))
p++;
if (*p == '.')
{
decimal_point_pos = p++;
while (ISDIGIT(*p))
p++;
if (*p == 'e' || *p == 'E')
p++;
if (*p == '+' || *p == '-')
p++;
while (ISDIGIT(*p))
p++;
end = p;
}
}
/* For the other cases, we need not convert the decimal point */
}
/* Set errno to zero, so that we can distinguish zero results
and underflows */
errno = 0;
if (decimal_point_pos)
{
char *copy, *c;
/* We need to convert the '.' to the locale specific decimal point */
copy = malloc(end - nptr + 1 + decimal_point_len);
c = copy;
memcpy(c, nptr, decimal_point_pos - nptr);
c += decimal_point_pos - nptr;
memcpy(c, decimal_point, decimal_point_len);
c += decimal_point_len;
memcpy(c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
c += end - (decimal_point_pos + 1);
*c = 0;
val = strtod(copy, &fail_pos);
if (fail_pos)
{
if (fail_pos > decimal_point_pos)
fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
else
fail_pos = (char *)nptr + (fail_pos - copy);
}
free(copy);
}
else
val = strtod(nptr, &fail_pos);
if (endptr)
*endptr = fail_pos;
return val;
}
/**
* PyOS_ascii_formatd:
* @buffer: A buffer to place the resulting string in
* @buf_len: The length of the buffer.
* @format: The printf()-style format to use for the
* code to use for converting.
* @d: The #gdouble to convert
*
* Converts a #gdouble to a string, using the '.' as
* decimal point. To format the number you pass in
* a printf()-style format string. Allowed conversion
* specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
*
* Return value: The pointer to the buffer with the converted string.
**/
char *
PyOS_ascii_formatd(char *buffer,
int buf_len,
const char *format,
double d)
{
struct lconv *locale_data;
const char *decimal_point;
int decimal_point_len;
char *p;
int rest_len;
char format_char;
/* g_return_val_if_fail (buffer != NULL, NULL); */
/* g_return_val_if_fail (format[0] == '%', NULL); */
/* g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL); */
format_char = format[strlen(format) - 1];
/* g_return_val_if_fail (format_char == 'e' || format_char == 'E' || */
/* format_char == 'f' || format_char == 'F' || */
/* format_char == 'g' || format_char == 'G', */
/* NULL); */
if (format[0] != '%')
return NULL;
if (strpbrk(format + 1, "'l%"))
return NULL;
if (!(format_char == 'e' || format_char == 'E' ||
format_char == 'f' || format_char == 'F' ||
format_char == 'g' || format_char == 'G'))
return NULL;
PyOS_snprintf(buffer, buf_len, format, d);
locale_data = localeconv();
decimal_point = locale_data->decimal_point;
decimal_point_len = strlen(decimal_point);
assert(decimal_point_len != 0);
if (decimal_point[0] != '.' ||
decimal_point[1] != 0)
{
p = buffer;
if (*p == '+' || *p == '-')
p++;
while (isdigit((unsigned char)*p))
p++;
if (strncmp(p, decimal_point, decimal_point_len) == 0)
{
*p = '.';
p++;
if (decimal_point_len > 1) {
rest_len = strlen(p + (decimal_point_len - 1));
memmove(p, p + (decimal_point_len - 1),
rest_len);
p[rest_len] = 0;
}
}
}
return buffer;
}
double
PyOS_ascii_atof(const char *nptr)
{
return PyOS_ascii_strtod(nptr, NULL);
}