mirror of https://github.com/python/cpython
Issue #10557: Fixed error messages from float() and other numeric
types. Added a new API function, PyUnicode_TransformDecimalToASCII(), which transforms non-ASCII decimal digits in a Unicode string to their ASCII equivalents.
This commit is contained in:
parent
36526bf3d9
commit
942af5a9a4
|
@ -328,6 +328,13 @@ APIs:
|
|||
Identical to :c:func:`PyUnicode_FromFormat` except that it takes exactly two
|
||||
arguments.
|
||||
|
||||
.. c:function:: PyObject* PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, Py_ssize_t size)
|
||||
|
||||
Create a Unicode object by replacing all decimal digits in
|
||||
:c:type:`Py_UNICODE` buffer of the given size by ASCII digits 0--9
|
||||
according to their decimal value. Return *NULL* if an exception
|
||||
occurs.
|
||||
|
||||
|
||||
.. c:function:: Py_UNICODE* PyUnicode_AsUnicode(PyObject *unicode)
|
||||
|
||||
|
|
|
@ -1225,6 +1225,17 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
|
|||
);
|
||||
#endif
|
||||
|
||||
/* Transforms code points that have decimal digit property to the
|
||||
corresponding ASCII digit code points.
|
||||
|
||||
Returns a new Unicode string on success, NULL on failure.
|
||||
*/
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
|
||||
Py_UNICODE *s, /* Unicode buffer */
|
||||
Py_ssize_t length /* Number of Py_UNICODE chars to transform */
|
||||
);
|
||||
|
||||
/* --- File system encoding ---------------------------------------------- */
|
||||
|
||||
/* ParseTuple converter: encode str objects to bytes using
|
||||
|
|
|
@ -220,6 +220,7 @@ class ComplexTest(unittest.TestCase):
|
|||
self.assertEqual(complex(NS(1+10j)), 1+10j)
|
||||
self.assertRaises(TypeError, complex, OS(None))
|
||||
self.assertRaises(TypeError, complex, NS(None))
|
||||
self.assertRaises(TypeError, complex, {})
|
||||
|
||||
self.assertAlmostEqual(complex("1+10j"), 1+10j)
|
||||
self.assertAlmostEqual(complex(10), 10+0j)
|
||||
|
@ -325,6 +326,8 @@ class ComplexTest(unittest.TestCase):
|
|||
|
||||
# check that complex accepts long unicode strings
|
||||
self.assertEqual(type(complex("1"*500)), complex)
|
||||
# check whitespace processing
|
||||
self.assertEqual(complex('\N{EM SPACE}(\N{EN SPACE}1+1j ) '), 1+1j)
|
||||
|
||||
class EvilExc(Exception):
|
||||
pass
|
||||
|
|
|
@ -43,14 +43,30 @@ class GeneralFloatCases(unittest.TestCase):
|
|||
self.assertRaises(ValueError, float, "+.inf")
|
||||
self.assertRaises(ValueError, float, ".")
|
||||
self.assertRaises(ValueError, float, "-.")
|
||||
self.assertRaises(ValueError, float, b"-")
|
||||
self.assertRaises(TypeError, float, {})
|
||||
# Lone surrogate
|
||||
self.assertRaises(UnicodeEncodeError, float, '\uD8F0')
|
||||
# check that we don't accept alternate exponent markers
|
||||
self.assertRaises(ValueError, float, "-1.7d29")
|
||||
self.assertRaises(ValueError, float, "3D-14")
|
||||
self.assertEqual(float(b" \u0663.\u0661\u0664 ".decode('raw-unicode-escape')), 3.14)
|
||||
self.assertEqual(float(" \u0663.\u0661\u0664 "), 3.14)
|
||||
self.assertEqual(float("\N{EM SPACE}3.14\N{EN SPACE}"), 3.14)
|
||||
# extra long strings should not be a problem
|
||||
float(b'.' + b'1'*1000)
|
||||
float('.' + '1'*1000)
|
||||
|
||||
def test_error_message(self):
|
||||
testlist = ('\xbd', '123\xbd', ' 123 456 ')
|
||||
for s in testlist:
|
||||
try:
|
||||
float(s)
|
||||
except ValueError as e:
|
||||
self.assertIn(s.strip(), e.args[0])
|
||||
else:
|
||||
self.fail("Expected int(%r) to raise a ValueError", s)
|
||||
|
||||
|
||||
@support.run_with_locale('LC_NUMERIC', 'fr_FR', 'de_DE')
|
||||
def test_float_with_comma(self):
|
||||
# set locale to something that doesn't use '.' for the decimal point
|
||||
|
|
|
@ -20,7 +20,8 @@ L = [
|
|||
(' 1\02 ', ValueError),
|
||||
('', ValueError),
|
||||
(' ', ValueError),
|
||||
(' \t\t ', ValueError)
|
||||
(' \t\t ', ValueError),
|
||||
("\u0200", ValueError)
|
||||
]
|
||||
|
||||
class IntTestCases(unittest.TestCase):
|
||||
|
@ -35,6 +36,8 @@ class IntTestCases(unittest.TestCase):
|
|||
self.assertEqual(int(3.5), 3)
|
||||
self.assertEqual(int(-3.5), -3)
|
||||
self.assertEqual(int("-3"), -3)
|
||||
self.assertEqual(int(" -3 "), -3)
|
||||
self.assertEqual(int("\N{EM SPACE}-3\N{EN SPACE}"), -3)
|
||||
# Different base:
|
||||
self.assertEqual(int("10",16), 16)
|
||||
# Test conversion from strings and various anomalies
|
||||
|
@ -302,6 +305,16 @@ class IntTestCases(unittest.TestCase):
|
|||
self.fail("Failed to raise TypeError with %s" %
|
||||
((base, trunc_result_base),))
|
||||
|
||||
def test_error_message(self):
|
||||
testlist = ('\xbd', '123\xbd', ' 123 456 ')
|
||||
for s in testlist:
|
||||
try:
|
||||
int(s)
|
||||
except ValueError as e:
|
||||
self.assertIn(s.strip(), e.args[0])
|
||||
else:
|
||||
self.fail("Expected int(%r) to raise a ValueError", s)
|
||||
|
||||
def test_main():
|
||||
run_unittest(IntTestCases)
|
||||
|
||||
|
|
|
@ -1168,8 +1168,13 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
# Error handling (wrong arguments)
|
||||
self.assertRaises(TypeError, "hello".encode, 42, 42, 42)
|
||||
|
||||
# Error handling (PyUnicode_EncodeDecimal())
|
||||
self.assertRaises(UnicodeError, int, "\u0200")
|
||||
# Error handling (lone surrogate in PyUnicode_TransformDecimalToASCII())
|
||||
self.assertRaises(UnicodeError, int, "\ud800")
|
||||
self.assertRaises(UnicodeError, int, "\udf00")
|
||||
self.assertRaises(UnicodeError, float, "\ud800")
|
||||
self.assertRaises(UnicodeError, float, "\udf00")
|
||||
self.assertRaises(UnicodeError, complex, "\ud800")
|
||||
self.assertRaises(UnicodeError, complex, "\udf00")
|
||||
|
||||
def test_codecs(self):
|
||||
# Encoding
|
||||
|
|
|
@ -222,6 +222,10 @@ Library
|
|||
C-API
|
||||
-----
|
||||
|
||||
- Issue #10557: Added a new API function, PyUnicode_TransformDecimalToASCII(),
|
||||
which transforms non-ASCII decimal digits in a Unicode string to their
|
||||
ASCII equivalents.
|
||||
|
||||
- Issue #9518: Extend the PyModuleDef_HEAD_INIT macro to explicitly
|
||||
zero-initialize all fields, fixing compiler warnings seen when building
|
||||
extension modules with gcc with "-Wmissing-field-initializers" (implied by
|
||||
|
|
|
@ -766,20 +766,26 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
|
|||
char *end;
|
||||
double x=0.0, y=0.0, z;
|
||||
int got_bracket=0;
|
||||
char *s_buffer = NULL;
|
||||
PyObject *s_buffer = NULL;
|
||||
Py_ssize_t len;
|
||||
|
||||
if (PyUnicode_Check(v)) {
|
||||
s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v) + 1);
|
||||
Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v);
|
||||
Py_UNICODE *bufptr;
|
||||
s_buffer = PyUnicode_TransformDecimalToASCII(
|
||||
PyUnicode_AS_UNICODE(v), buflen);
|
||||
if (s_buffer == NULL)
|
||||
return PyErr_NoMemory();
|
||||
if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),
|
||||
PyUnicode_GET_SIZE(v),
|
||||
s_buffer,
|
||||
NULL))
|
||||
return NULL;
|
||||
/* Replace non-ASCII whitespace with ' ' */
|
||||
bufptr = PyUnicode_AS_UNICODE(s_buffer);
|
||||
for (i = 0; i < buflen; i++) {
|
||||
Py_UNICODE ch = bufptr[i];
|
||||
if (ch > 127 && Py_UNICODE_ISSPACE(ch))
|
||||
bufptr[i] = ' ';
|
||||
}
|
||||
s = _PyUnicode_AsStringAndSize(s_buffer, &len);
|
||||
if (s == NULL)
|
||||
goto error;
|
||||
s = s_buffer;
|
||||
len = strlen(s);
|
||||
}
|
||||
else if (PyObject_AsCharBuffer(v, &s, &len)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
|
@ -894,16 +900,14 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
|
|||
if (s-start != len)
|
||||
goto parse_error;
|
||||
|
||||
if (s_buffer)
|
||||
PyMem_FREE(s_buffer);
|
||||
Py_XDECREF(s_buffer);
|
||||
return complex_subtype_from_doubles(type, x, y);
|
||||
|
||||
parse_error:
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"complex() arg is a malformed string");
|
||||
error:
|
||||
if (s_buffer)
|
||||
PyMem_FREE(s_buffer);
|
||||
Py_XDECREF(s_buffer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -174,22 +174,30 @@ PyFloat_FromString(PyObject *v)
|
|||
{
|
||||
const char *s, *last, *end;
|
||||
double x;
|
||||
char buffer[256]; /* for errors */
|
||||
char *s_buffer = NULL;
|
||||
PyObject *s_buffer = NULL;
|
||||
Py_ssize_t len;
|
||||
PyObject *result = NULL;
|
||||
|
||||
if (PyUnicode_Check(v)) {
|
||||
s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v)+1);
|
||||
Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v);
|
||||
Py_UNICODE *bufptr;
|
||||
s_buffer = PyUnicode_TransformDecimalToASCII(
|
||||
PyUnicode_AS_UNICODE(v), buflen);
|
||||
if (s_buffer == NULL)
|
||||
return PyErr_NoMemory();
|
||||
if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),
|
||||
PyUnicode_GET_SIZE(v),
|
||||
s_buffer,
|
||||
NULL))
|
||||
goto error;
|
||||
s = s_buffer;
|
||||
len = strlen(s);
|
||||
return NULL;
|
||||
/* Replace non-ASCII whitespace with ' ' */
|
||||
bufptr = PyUnicode_AS_UNICODE(s_buffer);
|
||||
for (i = 0; i < buflen; i++) {
|
||||
Py_UNICODE ch = bufptr[i];
|
||||
if (ch > 127 && Py_UNICODE_ISSPACE(ch))
|
||||
bufptr[i] = ' ';
|
||||
}
|
||||
s = _PyUnicode_AsStringAndSize(s_buffer, &len);
|
||||
if (s == NULL) {
|
||||
Py_DECREF(s_buffer);
|
||||
return NULL;
|
||||
}
|
||||
last = s + len;
|
||||
}
|
||||
else if (PyObject_AsCharBuffer(v, &s, &len)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
|
@ -197,29 +205,27 @@ PyFloat_FromString(PyObject *v)
|
|||
return NULL;
|
||||
}
|
||||
last = s + len;
|
||||
|
||||
while (Py_ISSPACE(*s))
|
||||
/* strip space */
|
||||
while (s < last && Py_ISSPACE(*s))
|
||||
s++;
|
||||
while (s < last - 1 && Py_ISSPACE(last[-1]))
|
||||
last--;
|
||||
/* We don't care about overflow or underflow. If the platform
|
||||
* supports them, infinities and signed zeroes (on underflow) are
|
||||
* fine. */
|
||||
x = PyOS_string_to_double(s, (char **)&end, NULL);
|
||||
if (x == -1.0 && PyErr_Occurred())
|
||||
goto error;
|
||||
while (Py_ISSPACE(*end))
|
||||
end++;
|
||||
if (end == last)
|
||||
result = PyFloat_FromDouble(x);
|
||||
else {
|
||||
PyOS_snprintf(buffer, sizeof(buffer),
|
||||
"invalid literal for float(): %.200s", s);
|
||||
PyErr_SetString(PyExc_ValueError, buffer);
|
||||
if (end != last) {
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"could not convert string to float: "
|
||||
"%R", v);
|
||||
result = NULL;
|
||||
}
|
||||
else if (x == -1.0 && PyErr_Occurred())
|
||||
result = NULL;
|
||||
else
|
||||
result = PyFloat_FromDouble(x);
|
||||
|
||||
error:
|
||||
if (s_buffer)
|
||||
PyMem_FREE(s_buffer);
|
||||
Py_XDECREF(s_buffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -2133,17 +2133,34 @@ PyObject *
|
|||
PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
|
||||
{
|
||||
PyObject *result;
|
||||
char *buffer = (char *)PyMem_MALLOC(length+1);
|
||||
PyObject *asciidig;
|
||||
char *buffer, *end;
|
||||
Py_ssize_t i, buflen;
|
||||
Py_UNICODE *ptr;
|
||||
|
||||
if (buffer == NULL)
|
||||
asciidig = PyUnicode_TransformDecimalToASCII(u, length);
|
||||
if (asciidig == NULL)
|
||||
return NULL;
|
||||
|
||||
if (PyUnicode_EncodeDecimal(u, length, buffer, NULL)) {
|
||||
PyMem_FREE(buffer);
|
||||
/* Replace non-ASCII whitespace with ' ' */
|
||||
ptr = PyUnicode_AS_UNICODE(asciidig);
|
||||
for (i = 0; i < length; i++) {
|
||||
Py_UNICODE ch = ptr[i];
|
||||
if (ch > 127 && Py_UNICODE_ISSPACE(ch))
|
||||
ptr[i] = ' ';
|
||||
}
|
||||
buffer = _PyUnicode_AsStringAndSize(asciidig, &buflen);
|
||||
if (buffer == NULL) {
|
||||
Py_DECREF(asciidig);
|
||||
return NULL;
|
||||
}
|
||||
result = PyLong_FromString(buffer, NULL, base);
|
||||
PyMem_FREE(buffer);
|
||||
result = PyLong_FromString(buffer, &end, base);
|
||||
if (result != NULL && end != buffer + buflen) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"null byte in argument for int()");
|
||||
Py_DECREF(result);
|
||||
result = NULL;
|
||||
}
|
||||
Py_DECREF(asciidig);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -6206,6 +6206,30 @@ PyObject *PyUnicode_Translate(PyObject *str,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
|
||||
Py_ssize_t length)
|
||||
{
|
||||
PyObject *result;
|
||||
Py_UNICODE *p; /* write pointer into result */
|
||||
Py_ssize_t i;
|
||||
/* Copy to a new string */
|
||||
result = (PyObject *)_PyUnicode_New(length);
|
||||
Py_UNICODE_COPY(PyUnicode_AS_UNICODE(result), s, length);
|
||||
if (result == NULL)
|
||||
return result;
|
||||
p = PyUnicode_AS_UNICODE(result);
|
||||
/* Iterate over code points */
|
||||
for (i = 0; i < length; i++) {
|
||||
Py_UNICODE ch =s[i];
|
||||
if (ch > 127) {
|
||||
int decimal = Py_UNICODE_TODECIMAL(ch);
|
||||
if (decimal >= 0)
|
||||
p[i] = '0' + decimal;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
/* --- Decimal Encoder ---------------------------------------------------- */
|
||||
|
||||
int PyUnicode_EncodeDecimal(Py_UNICODE *s,
|
||||
|
@ -8967,6 +8991,13 @@ unicode_freelistsize(PyUnicodeObject *self)
|
|||
{
|
||||
return PyLong_FromLong(numfree);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
unicode__decimal2ascii(PyObject *self)
|
||||
{
|
||||
return PyUnicode_TransformDecimalToASCII(PyUnicode_AS_UNICODE(self),
|
||||
PyUnicode_GET_SIZE(self));
|
||||
}
|
||||
#endif
|
||||
|
||||
PyDoc_STRVAR(startswith__doc__,
|
||||
|
@ -9108,7 +9139,6 @@ unicode_getnewargs(PyUnicodeObject *v)
|
|||
return Py_BuildValue("(u#)", v->str, v->length);
|
||||
}
|
||||
|
||||
|
||||
static PyMethodDef unicode_methods[] = {
|
||||
|
||||
/* Order is according to common usage: often used methods should
|
||||
|
@ -9170,8 +9200,9 @@ static PyMethodDef unicode_methods[] = {
|
|||
#endif
|
||||
|
||||
#if 0
|
||||
/* This one is just used for debugging the implementation. */
|
||||
/* These methods are just used for debugging the implementation. */
|
||||
{"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS},
|
||||
{"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS},
|
||||
#endif
|
||||
|
||||
{"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS},
|
||||
|
|
Loading…
Reference in New Issue