Marc-Andre's third try at this bulk patch seems to work (except that

his copy of test_contains.py seems to be broken -- the lines he
deleted were already absent).  Checkin messages:


New Unicode support for int(), float(), complex() and long().

- new APIs PyInt_FromUnicode() and PyLong_FromUnicode()
- added support for Unicode to PyFloat_FromString()
- new encoding API PyUnicode_EncodeDecimal() which converts
  Unicode to a decimal char* string (used in the above new
  APIs)
- shortcuts for calls like int(<int object>) and float(<float obj>)
- tests for all of the above

Unicode compares and contains checks:
- comparing Unicode and non-string types now works; TypeErrors
  are masked, all other errors such as ValueError during
  Unicode coercion are passed through (note that PyUnicode_Compare
  does not implement the masking -- PyObject_Compare does this)
- contains now works for non-string types too; TypeErrors are
  masked and 0 returned; all other errors are passed through

Better testing support for the standard codecs.

Misc minor enhancements, such as an alias dbcs for the mbcs codec.

Changes:
- PyLong_FromString() now applies the same error checks as
  does PyInt_FromString(): trailing garbage is reported
  as error and not longer silently ignored. The only characters
  which may be trailing the digits are 'L' and 'l' -- these
  are still silently ignored.
- string.ato?() now directly interface to int(), long() and
  float(). The error strings are now a little different, but
  the type still remains the same. These functions are now
  ready to get declared obsolete ;-)
- PyNumber_Int() now also does a check for embedded NULL chars
  in the input string; PyNumber_Long() already did this (and
  still does)

Followed by:

Looks like I've gone a step too far there... (and test_contains.py
seem to have a bug too).

I've changed back to reporting all errors in PyUnicode_Contains()
and added a few more test cases to test_contains.py (plus corrected
the join() NameError).
This commit is contained in:
Guido van Rossum 2000-04-05 20:11:21 +00:00
parent 457855a5f0
commit 9e896b37c7
17 changed files with 421 additions and 115 deletions

View File

@ -72,6 +72,7 @@ PERFORMANCE OF THIS SOFTWARE.
#include "pydebug.h" #include "pydebug.h"
#include "unicodeobject.h"
#include "intobject.h" #include "intobject.h"
#include "longobject.h" #include "longobject.h"
#include "floatobject.h" #include "floatobject.h"
@ -92,7 +93,6 @@ PERFORMANCE OF THIS SOFTWARE.
#include "cobject.h" #include "cobject.h"
#include "traceback.h" #include "traceback.h"
#include "sliceobject.h" #include "sliceobject.h"
#include "unicodeobject.h"
#include "codecs.h" #include "codecs.h"
#include "pyerrors.h" #include "pyerrors.h"

View File

@ -60,6 +60,7 @@ extern DL_IMPORT(PyTypeObject) PyInt_Type;
#define PyInt_Check(op) ((op)->ob_type == &PyInt_Type) #define PyInt_Check(op) ((op)->ob_type == &PyInt_Type)
extern DL_IMPORT(PyObject *) PyInt_FromString Py_PROTO((char*, char**, int)); extern DL_IMPORT(PyObject *) PyInt_FromString Py_PROTO((char*, char**, int));
extern DL_IMPORT(PyObject *) PyInt_FromUnicode Py_PROTO((Py_UNICODE*, int, int));
extern DL_IMPORT(PyObject *) PyInt_FromLong Py_PROTO((long)); extern DL_IMPORT(PyObject *) PyInt_FromLong Py_PROTO((long));
extern DL_IMPORT(long) PyInt_AsLong Py_PROTO((PyObject *)); extern DL_IMPORT(long) PyInt_AsLong Py_PROTO((PyObject *));
extern DL_IMPORT(long) PyInt_GetMax Py_PROTO((void)); extern DL_IMPORT(long) PyInt_GetMax Py_PROTO((void));

View File

@ -82,6 +82,7 @@ extern DL_IMPORT(unsigned LONG_LONG) PyLong_AsUnsignedLongLong Py_PROTO((PyObjec
#endif /* HAVE_LONG_LONG */ #endif /* HAVE_LONG_LONG */
DL_IMPORT(PyObject *) PyLong_FromString Py_PROTO((char *, char **, int)); DL_IMPORT(PyObject *) PyLong_FromString Py_PROTO((char *, char **, int));
DL_IMPORT(PyObject *) PyLong_FromUnicode Py_PROTO((Py_UNICODE*, int, int));
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -358,7 +358,7 @@ extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8(
/* --- UTF-16 Codecs ------------------------------------------------------ */ /* --- UTF-16 Codecs ------------------------------------------------------ */
/* Decodes length bytes from a UTF-16 encoded buffer string and return /* Decodes length bytes from a UTF-16 encoded buffer string and returns
the corresponding Unicode object. the corresponding Unicode object.
errors (if non-NULL) defines the error handling. It defaults errors (if non-NULL) defines the error handling. It defaults
@ -397,7 +397,7 @@ extern DL_IMPORT(PyObject*) PyUnicode_AsUTF16String(
); );
/* Returns a Python string object holding the UTF-16 encoded value of /* Returns a Python string object holding the UTF-16 encoded value of
the Unicode data in s. the Unicode data.
If byteorder is not 0, output is written according to the following If byteorder is not 0, output is written according to the following
byte order: byte order:
@ -587,6 +587,37 @@ extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
#endif /* MS_WIN32 */ #endif /* MS_WIN32 */
/* --- Decimal Encoder ---------------------------------------------------- */
/* Takes a Unicode string holding a decimal value and writes it into
an output buffer using standard ASCII digit codes.
The output buffer has to provide at least length+1 bytes of storage
area. The output string is 0-terminated.
The encoder converts whitespace to ' ', decimal characters to their
corresponding ASCII digit and all other Latin-1 characters except
\0 as-is. Characters outside this range (Unicode ordinals 1-256)
are treated as errors. This includes embedded NULL bytes.
Error handling is defined by the errors argument:
NULL or "strict": raise a ValueError
"ignore": ignore the wrong characters (these are not copied to the
output buffer)
"replace": replaces illegal characters with '?'
Returns 0 on success, -1 on failure.
*/
extern DL_IMPORT(int) PyUnicode_EncodeDecimal(
Py_UNICODE *s, /* Unicode buffer */
int length, /* Number of Py_UNICODE chars to encode */
char *output, /* Output buffer; must have size >= length */
const char *errors /* error handling */
);
/* --- Methods & Slots ---------------------------------------------------- /* --- Methods & Slots ----------------------------------------------------
These are capable of handling Unicode objects and strings on input These are capable of handling Unicode objects and strings on input

View File

@ -4,8 +4,8 @@
directory. directory.
Codec modules must have names corresponding to standard lower-case Codec modules must have names corresponding to standard lower-case
encoding names. Hyphens are automatically converted to encoding names with hyphens mapped to underscores, e.g. 'utf-8' is
underscores, e.g. 'utf-8' is looked up as module utf_8. implemented by the module 'utf_8.py'.
Each codec module must export the following interface: Each codec module must export the following interface:
@ -40,7 +40,7 @@ def search_function(encoding):
return entry return entry
# Import the module # Import the module
modname = string.replace(encoding,'-','_') modname = string.replace(encoding, '-', '_')
modname = aliases.aliases.get(modname,modname) modname = aliases.aliases.get(modname,modname)
try: try:
mod = __import__(modname,globals(),locals(),'*') mod = __import__(modname,globals(),locals(),'*')

View File

@ -54,4 +54,7 @@ aliases = {
'macroman': 'mac_roman', 'macroman': 'mac_roman',
'macturkish': 'mac_turkish', 'macturkish': 'mac_turkish',
# MBCS
'dbcs': 'mbcs',
} }

View File

@ -196,14 +196,11 @@ def atof(s):
Return the floating point number represented by the string s. Return the floating point number represented by the string s.
""" """
if type(s) == _StringType: return _float(s)
return _float(s)
else:
raise TypeError('argument 1: expected string, %s found' %
type(s).__name__)
# Convert string to integer # Convert string to integer
def atoi(*args): def atoi(s , base=10):
"""atoi(s [,base]) -> int """atoi(s [,base]) -> int
Return the integer represented by the string s in the given Return the integer represented by the string s in the given
@ -214,23 +211,11 @@ def atoi(*args):
accepted. accepted.
""" """
try: return _int(s, base)
s = args[0]
except IndexError:
raise TypeError('function requires at least 1 argument: %d given' %
len(args))
# Don't catch type error resulting from too many arguments to int(). The
# error message isn't compatible but the error type is, and this function
# is complicated enough already.
if type(s) == _StringType:
return _apply(_int, args)
else:
raise TypeError('argument 1: expected string, %s found' %
type(s).__name__)
# Convert string to long integer # Convert string to long integer
def atol(*args): def atol(s, base=10):
"""atol(s [,base]) -> long """atol(s [,base]) -> long
Return the long integer represented by the string s in the Return the long integer represented by the string s in the
@ -242,19 +227,7 @@ def atol(*args):
unless base is 0. unless base is 0.
""" """
try: return _long(s, base)
s = args[0]
except IndexError:
raise TypeError('function requires at least 1 argument: %d given' %
len(args))
# Don't catch type error resulting from too many arguments to long(). The
# error message isn't compatible but the error type is, and this function
# is complicated enough already.
if type(s) == _StringType:
return _apply(_long, args)
else:
raise TypeError('argument 1: expected string, %s found' %
type(s).__name__)
# Left-justify a string # Left-justify a string

View File

@ -3,3 +3,4 @@ Testing Unicode comparisons... done.
Testing Unicode contains method... done. Testing Unicode contains method... done.
Testing Unicode formatting strings... done. Testing Unicode formatting strings... done.
Testing builtin codecs... done. Testing builtin codecs... done.
Testing standard mapping codecs... 0-127... 128-255... done.

View File

@ -95,6 +95,7 @@ if complex(0.0, 3.14j) <> -3.14+0j: raise TestFailed, 'complex(0.0, 3.14j)'
if complex(0j, 3.14) <> 3.14j: raise TestFailed, 'complex(0j, 3.14)' if complex(0j, 3.14) <> 3.14j: raise TestFailed, 'complex(0j, 3.14)'
if complex(0.0, 3.14) <> 3.14j: raise TestFailed, 'complex(0.0, 3.14)' if complex(0.0, 3.14) <> 3.14j: raise TestFailed, 'complex(0.0, 3.14)'
if complex(" 3.14+J ") <> 3.14+1j: raise TestFailed, 'complex(" 3.14+J )"' if complex(" 3.14+J ") <> 3.14+1j: raise TestFailed, 'complex(" 3.14+J )"'
if complex(u" 3.14+J ") <> 3.14+1j: raise TestFailed, 'complex(u" 3.14+J )"'
class Z: class Z:
def __complex__(self): return 3.14j def __complex__(self): return 3.14j
z = Z() z = Z()
@ -208,6 +209,9 @@ if float(3.14) <> 3.14: raise TestFailed, 'float(3.14)'
if float(314) <> 314.0: raise TestFailed, 'float(314)' if float(314) <> 314.0: raise TestFailed, 'float(314)'
if float(314L) <> 314.0: raise TestFailed, 'float(314L)' if float(314L) <> 314.0: raise TestFailed, 'float(314L)'
if float(" 3.14 ") <> 3.14: raise TestFailed, 'float(" 3.14 ")' if float(" 3.14 ") <> 3.14: raise TestFailed, 'float(" 3.14 ")'
if float(u" 3.14 ") <> 3.14: raise TestFailed, 'float(u" 3.14 ")'
if float(u" \u0663.\u0661\u0664 ") <> 3.14:
raise TestFailed, 'float(u" \u0663.\u0661\u0664 ")'
print 'getattr' print 'getattr'
import sys import sys
@ -254,6 +258,9 @@ if int(3.9) <> 3: raise TestFailed, 'int(3.9)'
if int(-3.9) <> -3: raise TestFailed, 'int(-3.9)' if int(-3.9) <> -3: raise TestFailed, 'int(-3.9)'
if int(3.5) <> 3: raise TestFailed, 'int(3.5)' if int(3.5) <> 3: raise TestFailed, 'int(3.5)'
if int(-3.5) <> -3: raise TestFailed, 'int(-3.5)' if int(-3.5) <> -3: raise TestFailed, 'int(-3.5)'
# Different base:
if int("10",16) <> 16L: raise TestFailed, 'int("10",16)'
if int(u"10",16) <> 16L: raise TestFailed, 'int(u"10",16)'
# Test conversion fron strings and various anomalies # Test conversion fron strings and various anomalies
L = [ L = [
('0', 0), ('0', 0),
@ -267,9 +274,28 @@ L = [
('314 ', 314), ('314 ', 314),
(' \t\t 314 \t\t ', 314), (' \t\t 314 \t\t ', 314),
(`sys.maxint`, sys.maxint), (`sys.maxint`, sys.maxint),
(' 1x', ValueError),
(' 1 ', 1),
(' 1\02 ', ValueError),
('', ValueError), ('', ValueError),
(' ', ValueError), (' ', ValueError),
(' \t\t ', ValueError), (' \t\t ', ValueError),
(u'0', 0),
(u'1', 1),
(u'9', 9),
(u'10', 10),
(u'99', 99),
(u'100', 100),
(u'314', 314),
(u' 314', 314),
(u'\u0663\u0661\u0664 ', 314),
(u' \t\t 314 \t\t ', 314),
(u' 1x', ValueError),
(u' 1 ', 1),
(u' 1\02 ', ValueError),
(u'', ValueError),
(u' ', ValueError),
(u' \t\t ', ValueError),
] ]
for s, v in L: for s, v in L:
for sign in "", "+", "-": for sign in "", "+", "-":
@ -349,10 +375,17 @@ if long(3.9) <> 3L: raise TestFailed, 'long(3.9)'
if long(-3.9) <> -3L: raise TestFailed, 'long(-3.9)' if long(-3.9) <> -3L: raise TestFailed, 'long(-3.9)'
if long(3.5) <> 3L: raise TestFailed, 'long(3.5)' if long(3.5) <> 3L: raise TestFailed, 'long(3.5)'
if long(-3.5) <> -3L: raise TestFailed, 'long(-3.5)' if long(-3.5) <> -3L: raise TestFailed, 'long(-3.5)'
if long("-3") <> -3L: raise TestFailed, 'long("-3")'
if long(u"-3") <> -3L: raise TestFailed, 'long(u"-3")'
# Different base:
if long("10",16) <> 16L: raise TestFailed, 'long("10",16)'
if long(u"10",16) <> 16L: raise TestFailed, 'long(u"10",16)'
# Check conversions from string (same test set as for int(), and then some) # Check conversions from string (same test set as for int(), and then some)
LL = [ LL = [
('1' + '0'*20, 10L**20), ('1' + '0'*20, 10L**20),
('1' + '0'*100, 10L**100), ('1' + '0'*100, 10L**100),
(u'1' + u'0'*20, 10L**20),
(u'1' + u'0'*100, 10L**100),
] ]
for s, v in L + LL: for s, v in L + LL:
for sign in "", "+", "-": for sign in "", "+", "-":
@ -363,11 +396,11 @@ for s, v in L + LL:
vv = -v vv = -v
try: try:
if long(ss) != long(vv): if long(ss) != long(vv):
raise TestFailed, "int(%s)" % `ss` raise TestFailed, "long(%s)" % `ss`
except v: except v:
pass pass
except ValueError, e: except ValueError, e:
raise TestFailed, "int(%s) raised ValueError: %s" % (`ss`, e) raise TestFailed, "long(%s) raised ValueError: %s" % (`ss`, e)
print 'map' print 'map'
if map(None, 'hello world') <> ['h','e','l','l','o',' ','w','o','r','l','d']: if map(None, 'hello world') <> ['h','e','l','l','o',' ','w','o','r','l','d']:

View File

@ -221,15 +221,23 @@ test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c
# Contains: # Contains:
print 'Testing Unicode contains method...', print 'Testing Unicode contains method...',
assert ('a' in 'abdb') == 1 assert ('a' in u'abdb') == 1
assert ('a' in 'bdab') == 1 assert ('a' in u'bdab') == 1
assert ('a' in 'bdaba') == 1 assert ('a' in u'bdaba') == 1
assert ('a' in 'bdba') == 1 assert ('a' in u'bdba') == 1
assert ('a' in u'bdba') == 1 assert ('a' in u'bdba') == 1
assert (u'a' in u'bdba') == 1 assert (u'a' in u'bdba') == 1
assert (u'a' in u'bdb') == 0 assert (u'a' in u'bdb') == 0
assert (u'a' in 'bdb') == 0 assert (u'a' in 'bdb') == 0
assert (u'a' in 'bdba') == 1 assert (u'a' in 'bdba') == 1
assert (u'a' in ('a',1,None)) == 1
assert (u'a' in (1,None,'a')) == 1
assert (u'a' in (1,None,u'a')) == 1
assert ('a' in ('a',1,None)) == 1
assert ('a' in (1,None,'a')) == 1
assert ('a' in (1,None,u'a')) == 1
assert ('a' in ('x',1,u'y')) == 0
assert ('a' in ('x',1,None)) == 0
print 'done.' print 'done.'
# Formatting: # Formatting:
@ -270,11 +278,88 @@ for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
assert unicode(u.encode(encoding),encoding) == u assert unicode(u.encode(encoding),encoding) == u
u = u''.join(map(unichr, range(256))) u = u''.join(map(unichr, range(256)))
for encoding in ('latin-1',): for encoding in (
assert unicode(u.encode(encoding),encoding) == u 'latin-1',
):
try:
assert unicode(u.encode(encoding),encoding) == u
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
u = u''.join(map(unichr, range(128))) u = u''.join(map(unichr, range(128)))
for encoding in ('ascii',): for encoding in (
assert unicode(u.encode(encoding),encoding) == u 'ascii',
):
try:
assert unicode(u.encode(encoding),encoding) == u
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
print 'done.'
print 'Testing standard mapping codecs...',
print '0-127...',
s = ''.join(map(chr, range(128)))
for encoding in (
'cp037', 'cp1026',
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
'cp863', 'cp865', 'cp866',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
'mac_cyrillic', 'mac_latin2',
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
'cp1256', 'cp1257', 'cp1258',
'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
'cp1006', 'cp875', 'iso8859_8',
### These have undefined mappings:
#'cp424',
):
try:
assert unicode(s,encoding).encode(encoding) == s
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
print '128-255...',
s = ''.join(map(chr, range(128,256)))
for encoding in (
'cp037', 'cp1026',
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
'cp863', 'cp865', 'cp866',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
'mac_cyrillic', 'mac_latin2',
### These have undefined mappings:
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
#'cp1256', 'cp1257', 'cp1258',
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
### These fail the round-trip:
#'cp1006', 'cp875', 'iso8859_8',
):
try:
assert unicode(s,encoding).encode(encoding) == s
except AssertionError:
print '*** codec "%s" failed round-trip' % encoding
except ValueError,why:
print '*** codec for "%s" failed: %s' % (encoding, why)
print 'done.' print 'done.'

View File

@ -726,6 +726,27 @@ PyNumber_Absolute(o)
return type_error("bad operand type for abs()"); return type_error("bad operand type for abs()");
} }
/* Add a check for embedded NULL-bytes in the argument. */
static PyObject *
int_from_string(s, len)
const char *s;
int len;
{
char *end;
PyObject *x;
x = PyInt_FromString((char*)s, &end, 10);
if (x == NULL)
return NULL;
if (end != s + len) {
PyErr_SetString(PyExc_ValueError,
"null byte in argument for int()");
Py_DECREF(x);
return NULL;
}
return x;
}
PyObject * PyObject *
PyNumber_Int(o) PyNumber_Int(o)
PyObject *o; PyObject *o;
@ -736,69 +757,42 @@ PyNumber_Int(o)
if (o == NULL) if (o == NULL)
return null_error(); return null_error();
if (PyInt_Check(o)) {
Py_INCREF(o);
return o;
}
if (PyString_Check(o)) if (PyString_Check(o))
return PyInt_FromString(PyString_AS_STRING(o), NULL, 10); return int_from_string(PyString_AS_STRING(o),
PyString_GET_SIZE(o));
if (PyUnicode_Check(o))
return PyInt_FromUnicode(PyUnicode_AS_UNICODE(o),
PyUnicode_GET_SIZE(o),
10);
m = o->ob_type->tp_as_number; m = o->ob_type->tp_as_number;
if (m && m->nb_int) if (m && m->nb_int)
return m->nb_int(o); return m->nb_int(o);
if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len))
return PyInt_FromString((char*)buffer, NULL, 10); return int_from_string((char*)buffer, buffer_len);
return type_error("object can't be converted to int"); return type_error("object can't be converted to int");
} }
/* There are two C API functions for converting a string to a long, /* Add a check for embedded NULL-bytes in the argument. */
* PyNumber_Long() and PyLong_FromString(). Both are used in builtin_long,
* reachable from Python with the built-in function long().
*
* The difference is this: PyNumber_Long will raise an exception when the
* string cannot be converted to a long. The most common situation is
* where a float string is passed in; this raises a ValueError.
* PyLong_FromString does not raise an exception; it silently truncates the
* float to an integer.
*
* You can see the different behavior from Python with the following:
*
* long('9.5')
* => ValueError: invalid literal for long(): 9.5
*
* long('9.5', 10)
* => 9L
*
* The first example ends up calling PyNumber_Long(), while the second one
* calls PyLong_FromString().
*/
static PyObject * static PyObject *
long_from_string(s, len) long_from_string(s, len)
const char *s; const char *s;
int len; int len;
{ {
const char *start;
char *end; char *end;
PyObject *x; PyObject *x;
char buffer[256]; /* For errors */
start = s;
while (*s && isspace(Py_CHARMASK(*s)))
s++;
x = PyLong_FromString((char*)s, &end, 10); x = PyLong_FromString((char*)s, &end, 10);
if (x == NULL) { if (x == NULL)
if (PyErr_ExceptionMatches(PyExc_ValueError))
goto bad;
return NULL; return NULL;
} if (end != s + len) {
while (*end && isspace(Py_CHARMASK(*end)))
end++;
if (*end != '\0') {
bad:
sprintf(buffer, "invalid literal for long(): %.200s", s);
PyErr_SetString(PyExc_ValueError, buffer);
Py_XDECREF(x);
return NULL;
}
else if (end != start + len) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"null byte in argument for long()"); "null byte in argument for long()");
Py_DECREF(x);
return NULL; return NULL;
} }
return x; return x;
@ -814,6 +808,10 @@ PyNumber_Long(o)
if (o == NULL) if (o == NULL)
return null_error(); return null_error();
if (PyLong_Check(o)) {
Py_INCREF(o);
return o;
}
if (PyString_Check(o)) if (PyString_Check(o))
/* need to do extra error checking that PyLong_FromString() /* need to do extra error checking that PyLong_FromString()
* doesn't do. In particular long('9.5') must raise an * doesn't do. In particular long('9.5') must raise an
@ -821,6 +819,11 @@ PyNumber_Long(o)
*/ */
return long_from_string(PyString_AS_STRING(o), return long_from_string(PyString_AS_STRING(o),
PyString_GET_SIZE(o)); PyString_GET_SIZE(o));
if (PyUnicode_Check(o))
/* The above check is done in PyLong_FromUnicode(). */
return PyLong_FromUnicode(PyUnicode_AS_UNICODE(o),
PyUnicode_GET_SIZE(o),
10);
m = o->ob_type->tp_as_number; m = o->ob_type->tp_as_number;
if (m && m->nb_long) if (m && m->nb_long)
return m->nb_long(o); return m->nb_long(o);
@ -838,6 +841,10 @@ PyNumber_Float(o)
if (o == NULL) if (o == NULL)
return null_error(); return null_error();
if (PyFloat_Check(o)) {
Py_INCREF(o);
return o;
}
if (!PyString_Check(o)) { if (!PyString_Check(o)) {
m = o->ob_type->tp_as_number; m = o->ob_type->tp_as_number;
if (m && m->nb_float) if (m && m->nb_float)

View File

@ -164,6 +164,22 @@ PyFloat_FromString(v, pend)
s = PyString_AS_STRING(v); s = PyString_AS_STRING(v);
len = PyString_GET_SIZE(v); len = PyString_GET_SIZE(v);
} }
else if (PyUnicode_Check(v)) {
char s_buffer[256];
if (PyUnicode_GET_SIZE(v) >= sizeof(s_buffer)) {
PyErr_SetString(PyExc_ValueError,
"float() literal too large to convert");
return NULL;
}
if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),
PyUnicode_GET_SIZE(v),
s_buffer,
NULL))
return NULL;
s = s_buffer;
len = strlen(s);
}
else if (PyObject_AsCharBuffer(v, &s, &len)) { else if (PyObject_AsCharBuffer(v, &s, &len)) {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"float() needs a string argument"); "float() needs a string argument");

View File

@ -261,6 +261,24 @@ PyInt_FromString(s, pend, base)
return PyInt_FromLong(x); return PyInt_FromLong(x);
} }
PyObject *
PyInt_FromUnicode(s, length, base)
Py_UNICODE *s;
int length;
int base;
{
char buffer[256];
if (length >= sizeof(buffer)) {
PyErr_SetString(PyExc_ValueError,
"int() literal too large to convert");
return NULL;
}
if (PyUnicode_EncodeDecimal(s, length, buffer, NULL))
return NULL;
return PyInt_FromString(buffer, NULL, base);
}
/* Methods */ /* Methods */
/* ARGSUSED */ /* ARGSUSED */

View File

@ -724,7 +724,7 @@ PyLong_FromString(str, pend, base)
int base; int base;
{ {
int sign = 1; int sign = 1;
char *start; char *start, *orig_str = str;
PyLongObject *z; PyLongObject *z;
if ((base != 0 && base < 2) || base > 36) { if ((base != 0 && base < 2) || base > 36) {
@ -772,17 +772,44 @@ PyLong_FromString(str, pend, base)
} }
if (z == NULL) if (z == NULL)
return NULL; return NULL;
if (str == start) { if (str == start)
PyErr_SetString(PyExc_ValueError, goto onError;
"no digits in long int constant");
Py_DECREF(z);
return NULL;
}
if (sign < 0 && z != NULL && z->ob_size != 0) if (sign < 0 && z != NULL && z->ob_size != 0)
z->ob_size = -(z->ob_size); z->ob_size = -(z->ob_size);
if (*str == 'L' || *str == 'l')
str++;
while (*str && isspace(Py_CHARMASK(*str)))
str++;
if (*str != '\0')
goto onError;
if (pend) if (pend)
*pend = str; *pend = str;
return (PyObject *) z; return (PyObject *) z;
onError:
PyErr_Format(PyExc_ValueError,
"invalid literal for long(): %.200s", orig_str);
Py_XDECREF(z);
return NULL;
}
PyObject *
PyLong_FromUnicode(u, length, base)
Py_UNICODE *u;
int length;
int base;
{
char buffer[256];
if (length >= sizeof(buffer)) {
PyErr_SetString(PyExc_ValueError,
"long() literal too large to convert");
return NULL;
}
if (PyUnicode_EncodeDecimal(u, length, buffer, NULL))
return NULL;
return PyLong_FromString(buffer, NULL, base);
} }
static PyLongObject *x_divrem static PyLongObject *x_divrem

View File

@ -329,8 +329,14 @@ PyObject *PyUnicode_FromObject(register PyObject *obj)
s = PyString_AS_STRING(obj); s = PyString_AS_STRING(obj);
len = PyString_GET_SIZE(obj); len = PyString_GET_SIZE(obj);
} }
else if (PyObject_AsCharBuffer(obj, &s, &len)) else if (PyObject_AsCharBuffer(obj, &s, &len)) {
/* Overwrite the error message with something more useful in
case of a TypeError. */
if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_SetString(PyExc_TypeError,
"coercing to Unicode: need string or charbuffer");
return NULL; return NULL;
}
if (len == 0) { if (len == 0) {
Py_INCREF(unicode_empty); Py_INCREF(unicode_empty);
return (PyObject *)unicode_empty; return (PyObject *)unicode_empty;
@ -1923,6 +1929,60 @@ PyObject *PyUnicode_Translate(PyObject *str,
return NULL; return NULL;
} }
/* --- Decimal Encoder ---------------------------------------------------- */
int PyUnicode_EncodeDecimal(Py_UNICODE *s,
int length,
char *output,
const char *errors)
{
Py_UNICODE *p, *end;
if (output == NULL) {
PyErr_BadArgument();
return -1;
}
p = s;
end = s + length;
while (p < end) {
register Py_UNICODE ch = *p++;
int decimal;
if (Py_UNICODE_ISSPACE(ch)) {
*output++ = ' ';
continue;
}
decimal = Py_UNICODE_TODECIMAL(ch);
if (decimal >= 0) {
*output++ = '0' + decimal;
continue;
}
if (0 < ch < 256) {
*output++ = ch;
continue;
}
/* All other characters are considered invalid */
if (errors == NULL || strcmp(errors, "strict") == 0) {
PyErr_SetString(PyExc_ValueError,
"invalid decimal Unicode string");
goto onError;
}
else if (strcmp(errors, "ignore") == 0)
continue;
else if (strcmp(errors, "replace") == 0) {
*output++ = '?';
continue;
}
}
/* 0-terminate the output string */
*output++ = '\0';
return 0;
onError:
return -1;
}
/* --- Helpers ------------------------------------------------------------ */ /* --- Helpers ------------------------------------------------------------ */
static static
@ -2811,12 +2871,14 @@ int PyUnicode_Contains(PyObject *container,
register Py_UNICODE ch; register Py_UNICODE ch;
/* Coerce the two arguments */ /* Coerce the two arguments */
u = (PyUnicodeObject *)PyUnicode_FromObject(container);
if (u == NULL)
goto onError;
v = (PyUnicodeObject *)PyUnicode_FromObject(element); v = (PyUnicodeObject *)PyUnicode_FromObject(element);
if (v == NULL) if (v == NULL)
goto onError; goto onError;
u = (PyUnicodeObject *)PyUnicode_FromObject(container);
if (u == NULL) {
Py_DECREF(v);
goto onError;
}
/* Check v in u */ /* Check v in u */
if (PyUnicode_GET_SIZE(v) != 1) { if (PyUnicode_GET_SIZE(v) != 1) {

View File

@ -449,17 +449,44 @@ complex_from_string(v)
PyObject *v; PyObject *v;
{ {
extern double strtod Py_PROTO((const char *, char **)); extern double strtod Py_PROTO((const char *, char **));
char *s, *start, *end; const char *s, *start;
char *end;
double x=0.0, y=0.0, z; double x=0.0, y=0.0, z;
int got_re=0, got_im=0, done=0; int got_re=0, got_im=0, done=0;
int digit_or_dot; int digit_or_dot;
int sw_error=0; int sw_error=0;
int sign; int sign;
char buffer[256]; /* For errors */ char buffer[256]; /* For errors */
int len;
start = s = PyString_AS_STRING(v); if (PyString_Check(v)) {
s = PyString_AS_STRING(v);
len = PyString_GET_SIZE(v);
}
else if (PyUnicode_Check(v)) {
char s_buffer[256];
if (PyUnicode_GET_SIZE(v) >= sizeof(s_buffer)) {
PyErr_SetString(PyExc_ValueError,
"complex() literal too large to convert");
return NULL;
}
if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v),
PyUnicode_GET_SIZE(v),
s_buffer,
NULL))
return NULL;
s = s_buffer;
len = strlen(s);
}
else if (PyObject_AsCharBuffer(v, &s, &len)) {
PyErr_SetString(PyExc_TypeError,
"complex() needs a string first argument");
return NULL;
}
/* position on first nonblank */ /* position on first nonblank */
start = s;
while (*s && isspace(Py_CHARMASK(*s))) while (*s && isspace(Py_CHARMASK(*s)))
s++; s++;
if (s[0] == '\0') { if (s[0] == '\0') {
@ -475,7 +502,7 @@ complex_from_string(v)
switch (*s) { switch (*s) {
case '\0': case '\0':
if (s-start != PyString_GET_SIZE(v)) { if (s-start != len) {
PyErr_SetString( PyErr_SetString(
PyExc_ValueError, PyExc_ValueError,
"null byte in argument for complex()"); "null byte in argument for complex()");
@ -584,7 +611,7 @@ builtin_complex(self, args)
i = NULL; i = NULL;
if (!PyArg_ParseTuple(args, "O|O:complex", &r, &i)) if (!PyArg_ParseTuple(args, "O|O:complex", &r, &i))
return NULL; return NULL;
if (PyString_Check(r)) if (PyString_Check(r) || PyUnicode_Check(r))
return complex_from_string(r); return complex_from_string(r);
if ((nbr = r->ob_type->tp_as_number) == NULL || if ((nbr = r->ob_type->tp_as_number) == NULL ||
nbr->nb_float == NULL || nbr->nb_float == NULL ||
@ -1289,12 +1316,17 @@ builtin_int(self, args)
return NULL; return NULL;
if (base == -909) if (base == -909)
return PyNumber_Int(v); return PyNumber_Int(v);
else if (!PyString_Check(v)) { else if (PyString_Check(v))
return PyInt_FromString(PyString_AS_STRING(v), NULL, base);
else if (PyUnicode_Check(v))
return PyInt_FromUnicode(PyUnicode_AS_UNICODE(v),
PyUnicode_GET_SIZE(v),
base);
else {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"can't convert non-string with explicit base"); "can't convert non-string with explicit base");
return NULL; return NULL;
} }
return PyInt_FromString(PyString_AS_STRING(v), NULL, base);
} }
static char int_doc[] = static char int_doc[] =
@ -1319,12 +1351,17 @@ builtin_long(self, args)
return NULL; return NULL;
if (base == -909) if (base == -909)
return PyNumber_Long(v); return PyNumber_Long(v);
else if (!PyString_Check(v)) { else if (PyString_Check(v))
return PyLong_FromString(PyString_AS_STRING(v), NULL, base);
else if (PyUnicode_Check(v))
return PyLong_FromUnicode(PyUnicode_AS_UNICODE(v),
PyUnicode_GET_SIZE(v),
base);
else {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"can't convert non-string with explicit base"); "can't convert non-string with explicit base");
return NULL; return NULL;
} }
return PyLong_FromString(PyString_AS_STRING(v), NULL, base);
} }
static char long_doc[] = static char long_doc[] =

View File

@ -84,8 +84,11 @@ int PyCodec_Register(PyObject *search_function)
return -1; return -1;
} }
/* Convert a string to a normalized Python string: all characters are
converted to lower case, spaces are replaced with underscores. */
static static
PyObject *lowercasestring(const char *string) PyObject *normalizestring(const char *string)
{ {
register int i; register int i;
int len = strlen(string); int len = strlen(string);
@ -96,8 +99,14 @@ PyObject *lowercasestring(const char *string)
if (v == NULL) if (v == NULL)
return NULL; return NULL;
p = PyString_AS_STRING(v); p = PyString_AS_STRING(v);
for (i = 0; i < len; i++) for (i = 0; i < len; i++) {
p[i] = tolower(string[i]); register char ch = string[i];
if (ch == ' ')
ch = '-';
else
ch = tolower(ch);
p[i] = ch;
}
return v; return v;
} }
@ -132,8 +141,10 @@ PyObject *_PyCodec_Lookup(const char *encoding)
goto onError; goto onError;
} }
/* Convert the encoding to a lower-cased Python string */ /* Convert the encoding to a normalized Python string: all
v = lowercasestring(encoding); characters are converted to lower case, spaces and hypens are
replaced with underscores. */
v = normalizestring(encoding);
if (v == NULL) if (v == NULL)
goto onError; goto onError;
PyString_InternInPlace(&v); PyString_InternInPlace(&v);