Issue #10557: Fixed error messages from float() and other numeric

types.  Added a new API function, PyUnicode_TransformDecimalToASCII(),
which transforms non-ASCII decimal digits in a Unicode string to their
ASCII equivalents.
This commit is contained in:
Alexander Belopolsky 2010-12-04 03:38:46 +00:00
parent 36526bf3d9
commit 942af5a9a4
11 changed files with 169 additions and 52 deletions

View File

@ -328,6 +328,13 @@ APIs:
Identical to :c:func:`PyUnicode_FromFormat` except that it takes exactly two
arguments.
.. c:function:: PyObject* PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, Py_ssize_t size)
Create a Unicode object by replacing all decimal digits in
:c:type:`Py_UNICODE` buffer of the given size by ASCII digits 0--9
according to their decimal value. Return *NULL* if an exception
occurs.
.. c:function:: Py_UNICODE* PyUnicode_AsUnicode(PyObject *unicode)

View File

@ -1225,6 +1225,17 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
);
#endif
/* Transforms code points that have decimal digit property to the
corresponding ASCII digit code points.
Returns a new Unicode string on success, NULL on failure.
*/
PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
Py_UNICODE *s, /* Unicode buffer */
Py_ssize_t length /* Number of Py_UNICODE chars to transform */
);
/* --- File system encoding ---------------------------------------------- */
/* ParseTuple converter: encode str objects to bytes using

View File

@ -220,6 +220,7 @@ class ComplexTest(unittest.TestCase):
self.assertEqual(complex(NS(1+10j)), 1+10j)
self.assertRaises(TypeError, complex, OS(None))
self.assertRaises(TypeError, complex, NS(None))
self.assertRaises(TypeError, complex, {})
self.assertAlmostEqual(complex("1+10j"), 1+10j)
self.assertAlmostEqual(complex(10), 10+0j)
@ -325,6 +326,8 @@ class ComplexTest(unittest.TestCase):
# check that complex accepts long unicode strings
self.assertEqual(type(complex("1"*500)), complex)
# check whitespace processing
self.assertEqual(complex('\N{EM SPACE}(\N{EN SPACE}1+1j ) '), 1+1j)
class EvilExc(Exception):
pass

View File

@ -43,14 +43,30 @@ class GeneralFloatCases(unittest.TestCase):
self.assertRaises(ValueError, float, "+.inf")
self.assertRaises(ValueError, float, ".")
self.assertRaises(ValueError, float, "-.")
self.assertRaises(ValueError, float, b"-")
self.assertRaises(TypeError, float, {})
# Lone surrogate
self.assertRaises(UnicodeEncodeError, float, '\uD8F0')
# check that we don't accept alternate exponent markers
self.assertRaises(ValueError, float, "-1.7d29")
self.assertRaises(ValueError, float, "3D-14")
self.assertEqual(float(b" \u0663.\u0661\u0664 ".decode('raw-unicode-escape')), 3.14)
self.assertEqual(float(" \u0663.\u0661\u0664 "), 3.14)
self.assertEqual(float("\N{EM SPACE}3.14\N{EN SPACE}"), 3.14)
# extra long strings should not be a problem
float(b'.' + b'1'*1000)
float('.' + '1'*1000)
def test_error_message(self):
testlist = ('\xbd', '123\xbd', ' 123 456 ')
for s in testlist:
try:
float(s)
except ValueError as e:
self.assertIn(s.strip(), e.args[0])
else:
self.fail("Expected int(%r) to raise a ValueError", s)
@support.run_with_locale('LC_NUMERIC', 'fr_FR', 'de_DE')
def test_float_with_comma(self):
# set locale to something that doesn't use '.' for the decimal point

View File

@ -20,7 +20,8 @@ L = [
(' 1\02 ', ValueError),
('', ValueError),
(' ', ValueError),
(' \t\t ', ValueError)
(' \t\t ', ValueError),
("\u0200", ValueError)
]
class IntTestCases(unittest.TestCase):
@ -35,6 +36,8 @@ class IntTestCases(unittest.TestCase):
self.assertEqual(int(3.5), 3)
self.assertEqual(int(-3.5), -3)
self.assertEqual(int("-3"), -3)
self.assertEqual(int(" -3 "), -3)
self.assertEqual(int("\N{EM SPACE}-3\N{EN SPACE}"), -3)
# Different base:
self.assertEqual(int("10",16), 16)
# Test conversion from strings and various anomalies
@ -302,6 +305,16 @@ class IntTestCases(unittest.TestCase):
self.fail("Failed to raise TypeError with %s" %
((base, trunc_result_base),))
def test_error_message(self):
testlist = ('\xbd', '123\xbd', ' 123 456 ')
for s in testlist:
try:
int(s)
except ValueError as e:
self.assertIn(s.strip(), e.args[0])
else:
self.fail("Expected int(%r) to raise a ValueError", s)
def test_main():
run_unittest(IntTestCases)

View File

@ -1168,8 +1168,13 @@ class UnicodeTest(string_tests.CommonTest,
# Error handling (wrong arguments)
self.assertRaises(TypeError, "hello".encode, 42, 42, 42)
# Error handling (PyUnicode_EncodeDecimal())
self.assertRaises(UnicodeError, int, "\u0200")
# Error handling (lone surrogate in PyUnicode_TransformDecimalToASCII())
self.assertRaises(UnicodeError, int, "\ud800")
self.assertRaises(UnicodeError, int, "\udf00")
self.assertRaises(UnicodeError, float, "\ud800")
self.assertRaises(UnicodeError, float, "\udf00")
self.assertRaises(UnicodeError, complex, "\ud800")
self.assertRaises(UnicodeError, complex, "\udf00")
def test_codecs(self):
# Encoding

View File

@ -222,6 +222,10 @@ Library
C-API
-----
- Issue #10557: Added a new API function, PyUnicode_TransformDecimalToASCII(),
which transforms non-ASCII decimal digits in a Unicode string to their
ASCII equivalents.
- Issue #9518: Extend the PyModuleDef_HEAD_INIT macro to explicitly
zero-initialize all fields, fixing compiler warnings seen when building
extension modules with gcc with "-Wmissing-field-initializers" (implied by

View File

@ -766,20 +766,26 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
char *end;
double x=0.0, y=0.0, z;
int got_bracket=0;
char *s_buffer = NULL;
PyObject *s_buffer = NULL;
Py_ssize_t len;
if (PyUnicode_Check(v)) {
s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v) + 1);
Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v);
Py_UNICODE *bufptr;
s_buffer = PyUnicode_TransformDecimalToASCII(
PyUnicode_AS_UNICODE(v), buflen);
if (s_buffer == NULL)
return PyErr_NoMemory();
if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),
PyUnicode_GET_SIZE(v),
s_buffer,
NULL))
return NULL;
/* Replace non-ASCII whitespace with ' ' */
bufptr = PyUnicode_AS_UNICODE(s_buffer);
for (i = 0; i < buflen; i++) {
Py_UNICODE ch = bufptr[i];
if (ch > 127 && Py_UNICODE_ISSPACE(ch))
bufptr[i] = ' ';
}
s = _PyUnicode_AsStringAndSize(s_buffer, &len);
if (s == NULL)
goto error;
s = s_buffer;
len = strlen(s);
}
else if (PyObject_AsCharBuffer(v, &s, &len)) {
PyErr_SetString(PyExc_TypeError,
@ -894,16 +900,14 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
if (s-start != len)
goto parse_error;
if (s_buffer)
PyMem_FREE(s_buffer);
Py_XDECREF(s_buffer);
return complex_subtype_from_doubles(type, x, y);
parse_error:
PyErr_SetString(PyExc_ValueError,
"complex() arg is a malformed string");
error:
if (s_buffer)
PyMem_FREE(s_buffer);
Py_XDECREF(s_buffer);
return NULL;
}

View File

@ -174,22 +174,30 @@ PyFloat_FromString(PyObject *v)
{
const char *s, *last, *end;
double x;
char buffer[256]; /* for errors */
char *s_buffer = NULL;
PyObject *s_buffer = NULL;
Py_ssize_t len;
PyObject *result = NULL;
if (PyUnicode_Check(v)) {
s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v)+1);
Py_ssize_t i, buflen = PyUnicode_GET_SIZE(v);
Py_UNICODE *bufptr;
s_buffer = PyUnicode_TransformDecimalToASCII(
PyUnicode_AS_UNICODE(v), buflen);
if (s_buffer == NULL)
return PyErr_NoMemory();
if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),
PyUnicode_GET_SIZE(v),
s_buffer,
NULL))
goto error;
s = s_buffer;
len = strlen(s);
return NULL;
/* Replace non-ASCII whitespace with ' ' */
bufptr = PyUnicode_AS_UNICODE(s_buffer);
for (i = 0; i < buflen; i++) {
Py_UNICODE ch = bufptr[i];
if (ch > 127 && Py_UNICODE_ISSPACE(ch))
bufptr[i] = ' ';
}
s = _PyUnicode_AsStringAndSize(s_buffer, &len);
if (s == NULL) {
Py_DECREF(s_buffer);
return NULL;
}
last = s + len;
}
else if (PyObject_AsCharBuffer(v, &s, &len)) {
PyErr_SetString(PyExc_TypeError,
@ -197,29 +205,27 @@ PyFloat_FromString(PyObject *v)
return NULL;
}
last = s + len;
while (Py_ISSPACE(*s))
/* strip space */
while (s < last && Py_ISSPACE(*s))
s++;
while (s < last - 1 && Py_ISSPACE(last[-1]))
last--;
/* We don't care about overflow or underflow. If the platform
* supports them, infinities and signed zeroes (on underflow) are
* fine. */
x = PyOS_string_to_double(s, (char **)&end, NULL);
if (x == -1.0 && PyErr_Occurred())
goto error;
while (Py_ISSPACE(*end))
end++;
if (end == last)
result = PyFloat_FromDouble(x);
else {
PyOS_snprintf(buffer, sizeof(buffer),
"invalid literal for float(): %.200s", s);
PyErr_SetString(PyExc_ValueError, buffer);
if (end != last) {
PyErr_Format(PyExc_ValueError,
"could not convert string to float: "
"%R", v);
result = NULL;
}
else if (x == -1.0 && PyErr_Occurred())
result = NULL;
else
result = PyFloat_FromDouble(x);
error:
if (s_buffer)
PyMem_FREE(s_buffer);
Py_XDECREF(s_buffer);
return result;
}

View File

@ -2133,17 +2133,34 @@ PyObject *
PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
{
PyObject *result;
char *buffer = (char *)PyMem_MALLOC(length+1);
PyObject *asciidig;
char *buffer, *end;
Py_ssize_t i, buflen;
Py_UNICODE *ptr;
if (buffer == NULL)
asciidig = PyUnicode_TransformDecimalToASCII(u, length);
if (asciidig == NULL)
return NULL;
if (PyUnicode_EncodeDecimal(u, length, buffer, NULL)) {
PyMem_FREE(buffer);
/* Replace non-ASCII whitespace with ' ' */
ptr = PyUnicode_AS_UNICODE(asciidig);
for (i = 0; i < length; i++) {
Py_UNICODE ch = ptr[i];
if (ch > 127 && Py_UNICODE_ISSPACE(ch))
ptr[i] = ' ';
}
buffer = _PyUnicode_AsStringAndSize(asciidig, &buflen);
if (buffer == NULL) {
Py_DECREF(asciidig);
return NULL;
}
result = PyLong_FromString(buffer, NULL, base);
PyMem_FREE(buffer);
result = PyLong_FromString(buffer, &end, base);
if (result != NULL && end != buffer + buflen) {
PyErr_SetString(PyExc_ValueError,
"null byte in argument for int()");
Py_DECREF(result);
result = NULL;
}
Py_DECREF(asciidig);
return result;
}

View File

@ -6206,6 +6206,30 @@ PyObject *PyUnicode_Translate(PyObject *str,
return NULL;
}
PyObject *
PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
Py_ssize_t length)
{
PyObject *result;
Py_UNICODE *p; /* write pointer into result */
Py_ssize_t i;
/* Copy to a new string */
result = (PyObject *)_PyUnicode_New(length);
Py_UNICODE_COPY(PyUnicode_AS_UNICODE(result), s, length);
if (result == NULL)
return result;
p = PyUnicode_AS_UNICODE(result);
/* Iterate over code points */
for (i = 0; i < length; i++) {
Py_UNICODE ch =s[i];
if (ch > 127) {
int decimal = Py_UNICODE_TODECIMAL(ch);
if (decimal >= 0)
p[i] = '0' + decimal;
}
}
return result;
}
/* --- Decimal Encoder ---------------------------------------------------- */
int PyUnicode_EncodeDecimal(Py_UNICODE *s,
@ -8967,6 +8991,13 @@ unicode_freelistsize(PyUnicodeObject *self)
{
return PyLong_FromLong(numfree);
}
static PyObject *
unicode__decimal2ascii(PyObject *self)
{
return PyUnicode_TransformDecimalToASCII(PyUnicode_AS_UNICODE(self),
PyUnicode_GET_SIZE(self));
}
#endif
PyDoc_STRVAR(startswith__doc__,
@ -9108,7 +9139,6 @@ unicode_getnewargs(PyUnicodeObject *v)
return Py_BuildValue("(u#)", v->str, v->length);
}
static PyMethodDef unicode_methods[] = {
/* Order is according to common usage: often used methods should
@ -9170,8 +9200,9 @@ static PyMethodDef unicode_methods[] = {
#endif
#if 0
/* This one is just used for debugging the implementation. */
/* These methods are just used for debugging the implementation. */
{"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS},
{"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS},
#endif
{"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS},