Add 'U'/'U#' format characters to Py_BuildValue (and thus

to PyObject_CallFunction()) that take a char * (and a size
in the case of 'U#') and create a unicode object out of it.

Add functions PyUnicode_FromFormat() and PyUnicode_FromFormatV()
that work similar to PyString_FromFormat(), but create a unicode
object (also a %U format character has been added, that takes
a PyObject *, which must point to a unicode object).

Change the encoding and reason attributes of UnicodeEncodeError,
UnicodeDecodeError and UnicodeTranslateError to be unicode
objects.
This commit is contained in:
Walter Dörwald 2007-05-18 16:29:38 +00:00
parent 5550731d9c
commit d2034310d6
6 changed files with 376 additions and 113 deletions

View File

@ -848,6 +848,15 @@ PyArg_ParseTuple(args, "O|O:ref", &object, &callback)
to a Python Unicode object. If the Unicode buffer pointer
is \NULL, the length is ignored and \code{None} is returned.
\item[\samp{U} (string) {[char *]}]
Convert a null-terminated C string to a Python unicode object.
If the C string pointer is \NULL, \code{None} is used.
\item[\samp{U\#} (string) {[char *, int]}]
Convert a C string and its length to a Python unicode object.
If the C string pointer is \NULL, the length is ignored and \code{None}
is returned.
\item[\samp{i} (integer) {[int]}]
Convert a plain C \ctype{int} to a Python integer object.

View File

@ -173,7 +173,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
# define PyUnicode_FromString PyUnicodeUCS2_FromString
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
@ -252,6 +254,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
# define PyUnicode_FromString PyUnicodeUCS4_FromString
# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
@ -429,6 +434,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Py_ssize_t size /* size of buffer */
);
/* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */
PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
const char *u, /* char buffer */
Py_ssize_t size /* size of buffer */
);
/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
Latin-1 encoded bytes */
PyAPI_FUNC(PyObject*) PyUnicode_FromString(
@ -510,6 +521,9 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
register PyObject *obj /* Object */
);
PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list);
PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...);
/* --- wchar_t support for platforms which support it --------------------- */
#ifdef HAVE_WCHAR_H

View File

@ -21,43 +21,43 @@ class PosReturn:
# A UnicodeEncodeError object with a bad start attribute
class BadStartUnicodeEncodeError(UnicodeEncodeError):
def __init__(self):
UnicodeEncodeError.__init__(self, str8("ascii"), "", 0, 1, str8("bad"))
UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad")
self.start = []
# A UnicodeEncodeError object with a bad object attribute
class BadObjectUnicodeEncodeError(UnicodeEncodeError):
def __init__(self):
UnicodeEncodeError.__init__(self, str8("ascii"), "", 0, 1, str8("bad"))
UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad")
self.object = []
# A UnicodeDecodeError object without an end attribute
class NoEndUnicodeDecodeError(UnicodeDecodeError):
def __init__(self):
UnicodeDecodeError.__init__(self, str8("ascii"), b"", 0, 1, str8("bad"))
UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad")
del self.end
# A UnicodeDecodeError object with a bad object attribute
class BadObjectUnicodeDecodeError(UnicodeDecodeError):
def __init__(self):
UnicodeDecodeError.__init__(self, str8("ascii"), b"", 0, 1, str8("bad"))
UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad")
self.object = []
# A UnicodeTranslateError object without a start attribute
class NoStartUnicodeTranslateError(UnicodeTranslateError):
def __init__(self):
UnicodeTranslateError.__init__(self, "", 0, 1, str8("bad"))
UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
del self.start
# A UnicodeTranslateError object without an end attribute
class NoEndUnicodeTranslateError(UnicodeTranslateError):
def __init__(self):
UnicodeTranslateError.__init__(self, "", 0, 1, str8("bad"))
UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
del self.end
# A UnicodeTranslateError object without an object attribute
class NoObjectUnicodeTranslateError(UnicodeTranslateError):
def __init__(self):
UnicodeTranslateError.__init__(self, "", 0, 1, str8("bad"))
UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
del self.object
class CodecCallbackTest(unittest.TestCase):
@ -328,73 +328,73 @@ class CodecCallbackTest(unittest.TestCase):
def test_unicodeencodeerror(self):
self.check_exceptionobjectargs(
UnicodeEncodeError,
[str8("ascii"), "g\xfcrk", 1, 2, str8("ouch")],
["ascii", "g\xfcrk", 1, 2, "ouch"],
"'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
)
self.check_exceptionobjectargs(
UnicodeEncodeError,
[str8("ascii"), "g\xfcrk", 1, 4, str8("ouch")],
["ascii", "g\xfcrk", 1, 4, "ouch"],
"'ascii' codec can't encode characters in position 1-3: ouch"
)
self.check_exceptionobjectargs(
UnicodeEncodeError,
[str8("ascii"), "\xfcx", 0, 1, str8("ouch")],
["ascii", "\xfcx", 0, 1, "ouch"],
"'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
)
self.check_exceptionobjectargs(
UnicodeEncodeError,
[str8("ascii"), "\u0100x", 0, 1, str8("ouch")],
["ascii", "\u0100x", 0, 1, "ouch"],
"'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
)
self.check_exceptionobjectargs(
UnicodeEncodeError,
[str8("ascii"), "\uffffx", 0, 1, str8("ouch")],
["ascii", "\uffffx", 0, 1, "ouch"],
"'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
)
if sys.maxunicode > 0xffff:
self.check_exceptionobjectargs(
UnicodeEncodeError,
[str8("ascii"), "\U00010000x", 0, 1, str8("ouch")],
["ascii", "\U00010000x", 0, 1, "ouch"],
"'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
)
def test_unicodedecodeerror(self):
self.check_exceptionobjectargs(
UnicodeDecodeError,
[str8("ascii"), b"g\xfcrk", 1, 2, str8("ouch")],
["ascii", b"g\xfcrk", 1, 2, "ouch"],
"'ascii' codec can't decode byte 0xfc in position 1: ouch"
)
self.check_exceptionobjectargs(
UnicodeDecodeError,
[str8("ascii"), b"g\xfcrk", 1, 3, str8("ouch")],
["ascii", b"g\xfcrk", 1, 3, "ouch"],
"'ascii' codec can't decode bytes in position 1-2: ouch"
)
def test_unicodetranslateerror(self):
self.check_exceptionobjectargs(
UnicodeTranslateError,
["g\xfcrk", 1, 2, str8("ouch")],
["g\xfcrk", 1, 2, "ouch"],
"can't translate character u'\\xfc' in position 1: ouch"
)
self.check_exceptionobjectargs(
UnicodeTranslateError,
["g\u0100rk", 1, 2, str8("ouch")],
["g\u0100rk", 1, 2, "ouch"],
"can't translate character u'\\u0100' in position 1: ouch"
)
self.check_exceptionobjectargs(
UnicodeTranslateError,
["g\uffffrk", 1, 2, str8("ouch")],
["g\uffffrk", 1, 2, "ouch"],
"can't translate character u'\\uffff' in position 1: ouch"
)
if sys.maxunicode > 0xffff:
self.check_exceptionobjectargs(
UnicodeTranslateError,
["g\U00010000rk", 1, 2, str8("ouch")],
["g\U00010000rk", 1, 2, "ouch"],
"can't translate character u'\\U00010000' in position 1: ouch"
)
self.check_exceptionobjectargs(
UnicodeTranslateError,
["g\xfcrk", 1, 3, str8("ouch")],
["g\xfcrk", 1, 3, "ouch"],
"can't translate characters in position 1-2: ouch"
)
@ -416,7 +416,7 @@ class CodecCallbackTest(unittest.TestCase):
self.assertRaises(
UnicodeEncodeError,
codecs.strict_errors,
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")
)
def test_badandgoodignoreexceptions(self):
@ -435,17 +435,17 @@ class CodecCallbackTest(unittest.TestCase):
# If the correct exception is passed in, "ignore" returns an empty replacement
self.assertEquals(
codecs.ignore_errors(
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))),
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
("", 1)
)
self.assertEquals(
codecs.ignore_errors(
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))),
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")),
("", 1)
)
self.assertEquals(
codecs.ignore_errors(
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))),
UnicodeTranslateError("\u3042", 0, 1, "ouch")),
("", 1)
)
@ -475,17 +475,17 @@ class CodecCallbackTest(unittest.TestCase):
# With the correct exception, "replace" returns an "?" or "\ufffd" replacement
self.assertEquals(
codecs.replace_errors(
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))),
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
("?", 1)
)
self.assertEquals(
codecs.replace_errors(
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))),
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")),
("\ufffd", 1)
)
self.assertEquals(
codecs.replace_errors(
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))),
UnicodeTranslateError("\u3042", 0, 1, "ouch")),
("\ufffd", 1)
)
@ -506,19 +506,19 @@ class CodecCallbackTest(unittest.TestCase):
self.assertRaises(
TypeError,
codecs.xmlcharrefreplace_errors,
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")
)
self.assertRaises(
TypeError,
codecs.xmlcharrefreplace_errors,
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))
UnicodeTranslateError("\u3042", 0, 1, "ouch")
)
# Use the correct exception
cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
s = "".join(chr(c) for c in cs)
self.assertEquals(
codecs.xmlcharrefreplace_errors(
UnicodeEncodeError(str8("ascii"), s, 0, len(s), str8("ouch"))
UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
),
("".join("&#%d;" % ord(c) for c in s), len(s))
)
@ -540,48 +540,48 @@ class CodecCallbackTest(unittest.TestCase):
self.assertRaises(
TypeError,
codecs.backslashreplace_errors,
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")
)
self.assertRaises(
TypeError,
codecs.backslashreplace_errors,
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))
UnicodeTranslateError("\u3042", 0, 1, "ouch")
)
# Use the correct exception
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))),
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
("\\u3042", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError(str8("ascii"), "\x00", 0, 1, str8("ouch"))),
UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")),
("\\x00", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError(str8("ascii"), "\xff", 0, 1, str8("ouch"))),
UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")),
("\\xff", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError(str8("ascii"), "\u0100", 0, 1, str8("ouch"))),
UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")),
("\\u0100", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError(str8("ascii"), "\uffff", 0, 1, str8("ouch"))),
UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
("\\uffff", 1)
)
if sys.maxunicode>0xffff:
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError(str8("ascii"), "\U00010000", 0, 1, str8("ouch"))),
UnicodeEncodeError("ascii", "\U00010000", 0, 1, "ouch")),
("\\U00010000", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(
UnicodeEncodeError(str8("ascii"), "\U0010ffff", 0, 1, str8("ouch"))),
UnicodeEncodeError("ascii", "\U0010ffff", 0, 1, "ouch")),
("\\U0010ffff", 1)
)

View File

@ -1186,35 +1186,6 @@ set_ssize_t(PyObject **attr, Py_ssize_t value)
return 0;
}
static PyObject *
get_string(PyObject *attr, const char *name)
{
if (!attr) {
PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
return NULL;
}
if (!PyString_Check(attr)) {
PyErr_Format(PyExc_TypeError, "%.200s attribute must be str", name);
return NULL;
}
Py_INCREF(attr);
return attr;
}
static int
set_string(PyObject **attr, const char *value)
{
PyObject *obj = PyString_FromString(value);
if (!obj)
return -1;
Py_CLEAR(*attr);
*attr = obj;
return 0;
}
static PyObject *
get_bytes(PyObject *attr, const char *name)
{
@ -1248,16 +1219,27 @@ get_unicode(PyObject *attr, const char *name)
return attr;
}
static int
set_unicodefromstring(PyObject **attr, const char *value)
{
PyObject *obj = PyUnicode_FromString(value);
if (!obj)
return -1;
Py_CLEAR(*attr);
*attr = obj;
return 0;
}
PyObject *
PyUnicodeEncodeError_GetEncoding(PyObject *exc)
{
return get_string(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
}
PyObject *
PyUnicodeDecodeError_GetEncoding(PyObject *exc)
{
return get_string(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
}
PyObject *
@ -1416,42 +1398,45 @@ PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end)
PyObject *
PyUnicodeEncodeError_GetReason(PyObject *exc)
{
return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason");
return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason");
}
PyObject *
PyUnicodeDecodeError_GetReason(PyObject *exc)
{
return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason");
return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason");
}
PyObject *
PyUnicodeTranslateError_GetReason(PyObject *exc)
{
return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason");
return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason");
}
int
PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason)
{
return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason);
return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason,
reason);
}
int
PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason)
{
return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason);
return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason,
reason);
}
int
PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason)
{
return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason);
return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason,
reason);
}
@ -1466,11 +1451,11 @@ UnicodeError_init(PyUnicodeErrorObject *self, PyObject *args, PyObject *kwds,
Py_CLEAR(self->reason);
if (!PyArg_ParseTuple(args, "O!O!O!O!O!",
&PyString_Type, &self->encoding,
&PyUnicode_Type, &self->encoding,
objecttype, &self->object,
&PyLong_Type, &self->start,
&PyLong_Type, &self->end,
&PyString_Type, &self->reason)) {
&PyUnicode_Type, &self->reason)) {
self->encoding = self->object = self->start = self->end =
self->reason = NULL;
return -1;
@ -1564,20 +1549,20 @@ UnicodeEncodeError_str(PyObject *self)
PyOS_snprintf(badchar_str, sizeof(badchar_str), "u%04x", badchar);
else
PyOS_snprintf(badchar_str, sizeof(badchar_str), "U%08x", badchar);
return PyString_FromFormat(
"'%.400s' codec can't encode character u'\\%s' in position %zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
return PyUnicode_FromFormat(
"'%U' codec can't encode character u'\\%s' in position %zd: %U",
((PyUnicodeErrorObject *)self)->encoding,
badchar_str,
start,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
((PyUnicodeErrorObject *)self)->reason
);
}
return PyString_FromFormat(
"'%.400s' codec can't encode characters in position %zd-%zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
return PyUnicode_FromFormat(
"'%U' codec can't encode characters in position %zd-%zd: %U",
((PyUnicodeErrorObject *)self)->encoding,
start,
(end-1),
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
((PyUnicodeErrorObject *)self)->reason
);
}
@ -1601,7 +1586,7 @@ PyUnicodeEncodeError_Create(
const char *encoding, const Py_UNICODE *object, Py_ssize_t length,
Py_ssize_t start, Py_ssize_t end, const char *reason)
{
return PyObject_CallFunction(PyExc_UnicodeEncodeError, "su#nns",
return PyObject_CallFunction(PyExc_UnicodeEncodeError, "Uu#nnU",
encoding, object, length, start, end, reason);
}
@ -1626,30 +1611,30 @@ UnicodeDecodeError_str(PyObject *self)
Py_ssize_t end = 0;
if (PyUnicodeDecodeError_GetStart(self, &start))
return NULL;
return NULL;
if (PyUnicodeDecodeError_GetEnd(self, &end))
return NULL;
return NULL;
if (end==start+1) {
/* FromFormat does not support %02x, so format that separately */
char byte[4];
PyOS_snprintf(byte, sizeof(byte), "%02x",
((int)PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
return PyString_FromFormat(
"'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
return PyUnicode_FromFormat(
"'%U' codec can't decode byte 0x%s in position %zd: %U",
((PyUnicodeErrorObject *)self)->encoding,
byte,
start,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
((PyUnicodeErrorObject *)self)->reason
);
}
return PyString_FromFormat(
"'%.400s' codec can't decode bytes in position %zd-%zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
return PyUnicode_FromFormat(
"'%U' codec can't decode bytes in position %zd-%zd: %U",
((PyUnicodeErrorObject *)self)->encoding,
start,
(end-1),
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
((PyUnicodeErrorObject *)self)->reason
);
}
@ -1676,7 +1661,7 @@ PyUnicodeDecodeError_Create(
assert(length < INT_MAX);
assert(start < INT_MAX);
assert(end < INT_MAX);
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nns",
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "Uy#nnU",
encoding, object, length, start, end, reason);
}
@ -1701,7 +1686,7 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
&PyUnicode_Type, &self->object,
&PyLong_Type, &self->start,
&PyLong_Type, &self->end,
&PyString_Type, &self->reason)) {
&PyUnicode_Type, &self->reason)) {
self->object = self->start = self->end = self->reason = NULL;
return -1;
}
@ -1736,18 +1721,18 @@ UnicodeTranslateError_str(PyObject *self)
PyOS_snprintf(badchar_str, sizeof(badchar_str), "u%04x", badchar);
else
PyOS_snprintf(badchar_str, sizeof(badchar_str), "U%08x", badchar);
return PyString_FromFormat(
"can't translate character u'\\%s' in position %zd: %.400s",
return PyUnicode_FromFormat(
"can't translate character u'\\%s' in position %zd: %U",
badchar_str,
start,
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
((PyUnicodeErrorObject *)self)->reason
);
}
return PyString_FromFormat(
"can't translate characters in position %zd-%zd: %.400s",
return PyUnicode_FromFormat(
"can't translate characters in position %zd-%zd: %U",
start,
(end-1),
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
((PyUnicodeErrorObject *)self)->reason
);
}

View File

@ -393,15 +393,9 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
return (PyObject *)unicode;
}
PyObject *PyUnicode_FromString(const char *u)
PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
{
PyUnicodeObject *unicode;
size_t size = strlen(u);
if (size > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError, "input too long");
return NULL;
}
/* If the Unicode data is known at construction time, we can apply
some optimizations which share commonly used objects. */
if (u != NULL) {
@ -441,6 +435,17 @@ PyObject *PyUnicode_FromString(const char *u)
return (PyObject *)unicode;
}
PyObject *PyUnicode_FromString(const char *u)
{
size_t size = strlen(u);
if (size > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError, "input too long");
return NULL;
}
return PyUnicode_FromStringAndSize(u, size);
}
#ifdef HAVE_WCHAR_H
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
@ -473,6 +478,223 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
return (PyObject *)unicode;
}
#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
PyObject *
PyUnicode_FromFormatV(const char *format, va_list vargs)
{
va_list count;
Py_ssize_t n = 0;
const char* f;
Py_UNICODE *s;
PyObject *string;
/* used by sprintf */
char buffer[21];
const char *copy;
#ifdef VA_LIST_IS_ARRAY
Py_MEMCPY(count, vargs, sizeof(va_list));
#else
#ifdef __va_copy
__va_copy(count, vargs);
#else
count = vargs;
#endif
#endif
/* step 1: figure out how large a buffer we need */
for (f = format; *f; f++) {
if (*f == '%') {
const char* p = f;
while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
;
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
* they don't affect the amount of space we reserve.
*/
if ((*f == 'l' || *f == 'z') &&
(f[1] == 'd' || f[1] == 'u'))
++f;
switch (*f) {
case 'c':
(void)va_arg(count, int);
/* fall through... */
case '%':
n++;
break;
case 'd': case 'u': case 'i': case 'x':
(void) va_arg(count, int);
/* 20 bytes is enough to hold a 64-bit
integer. Decimal takes the most space.
This isn't enough for octal. */
n += 20;
break;
case 's':
n += strlen(va_arg(count, char*));
break;
case 'U':
{
PyObject *obj = va_arg(count, PyObject *);
assert(obj && PyUnicode_Check(obj));
n += PyUnicode_GET_SIZE(obj);
break;
}
case 'p':
(void) va_arg(count, int);
/* maximum 64-bit pointer representation:
* 0xffffffffffffffff
* so 19 characters is enough.
* XXX I count 18 -- what's the extra for?
*/
n += 19;
break;
default:
/* if we stumble upon an unknown
formatting code, copy the rest of
the format string to the output
string. (we cannot just skip the
code, since there's no way to know
what's in the argument list) */
n += strlen(p);
goto expand;
}
} else
n++;
}
expand:
/* step 2: fill the buffer */
/* Since we've analyzed how much space we need for the worst case,
we don't have to resize the string. */
string = PyUnicode_FromUnicode(NULL, n);
if (!string)
return NULL;
s = PyUnicode_AS_UNICODE(string);
for (f = format; *f; f++) {
if (*f == '%') {
const char* p = f++;
int longflag = 0;
int size_tflag = 0;
/* parse the width.precision part (we're only
interested in the precision value, if any) */
n = 0;
while (isdigit(Py_CHARMASK(*f)))
n = (n*10) + *f++ - '0';
if (*f == '.') {
f++;
n = 0;
while (isdigit(Py_CHARMASK(*f)))
n = (n*10) + *f++ - '0';
}
while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
f++;
/* handle the long flag, but only for %ld and %lu.
others can be added when necessary. */
if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
longflag = 1;
++f;
}
/* handle the size_t flag. */
if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
size_tflag = 1;
++f;
}
switch (*f) {
case 'c':
*s++ = va_arg(vargs, int);
break;
case 'd':
if (longflag)
sprintf(buffer, "%ld", va_arg(vargs, long));
else if (size_tflag)
sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
va_arg(vargs, Py_ssize_t));
else
sprintf(buffer, "%d", va_arg(vargs, int));
appendstring(buffer);
break;
case 'u':
if (longflag)
sprintf(buffer, "%lu",
va_arg(vargs, unsigned long));
else if (size_tflag)
sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
va_arg(vargs, size_t));
else
sprintf(buffer, "%u",
va_arg(vargs, unsigned int));
appendstring(buffer);
break;
case 'i':
sprintf(buffer, "%i", va_arg(vargs, int));
appendstring(buffer);
break;
case 'x':
sprintf(buffer, "%x", va_arg(vargs, int));
appendstring(buffer);
break;
case 's':
p = va_arg(vargs, char*);
appendstring(p);
break;
case 'U':
{
PyObject *obj = va_arg(vargs, PyObject *);
Py_UNICODE *ucopy = PyUnicode_AS_UNICODE(obj);
Py_ssize_t usize = PyUnicode_GET_SIZE(obj);
Py_ssize_t upos;
for (upos = 0; upos<usize;)
*s++ = ucopy[upos++];
break;
}
case 'p':
sprintf(buffer, "%p", va_arg(vargs, void*));
/* %p is ill-defined: ensure leading 0x. */
if (buffer[1] == 'X')
buffer[1] = 'x';
else if (buffer[1] != 'x') {
memmove(buffer+2, buffer, strlen(buffer)+1);
buffer[0] = '0';
buffer[1] = 'x';
}
appendstring(buffer);
break;
case '%':
*s++ = '%';
break;
default:
appendstring(p);
goto end;
}
} else
*s++ = *f;
}
end:
_PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string));
return string;
}
#undef appendstring
PyObject *
PyUnicode_FromFormat(const char *format, ...)
{
PyObject* ret;
va_list vargs;
#ifdef HAVE_STDARG_PROTOTYPES
va_start(vargs, format);
#else
va_start(vargs);
#endif
ret = PyUnicode_FromFormatV(format, vargs);
va_end(vargs);
return ret;
}
Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode,
wchar_t *w,
Py_ssize_t size)

View File

@ -424,6 +424,39 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
return v;
}
case 'U':
{
PyObject *v;
char *str = va_arg(*p_va, char *);
Py_ssize_t n;
if (**p_format == '#') {
++*p_format;
if (flags & FLAG_SIZE_T)
n = va_arg(*p_va, Py_ssize_t);
else
n = va_arg(*p_va, int);
}
else
n = -1;
if (str == NULL) {
v = Py_None;
Py_INCREF(v);
}
else {
if (n < 0) {
size_t m = strlen(str);
if (m > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"string too long for Python string");
return NULL;
}
n = (Py_ssize_t)m;
}
v = PyUnicode_FromStringAndSize(str, n);
}
return v;
}
case 'y':
{
PyObject *v;