mirror of https://github.com/python/cpython
Add 'U'/'U#' format characters to Py_BuildValue (and thus
to PyObject_CallFunction()) that take a char * (and a size in the case of 'U#') and create a unicode object out of it. Add functions PyUnicode_FromFormat() and PyUnicode_FromFormatV() that work similar to PyString_FromFormat(), but create a unicode object (also a %U format character has been added, that takes a PyObject *, which must point to a unicode object). Change the encoding and reason attributes of UnicodeEncodeError, UnicodeDecodeError and UnicodeTranslateError to be unicode objects.
This commit is contained in:
parent
5550731d9c
commit
d2034310d6
|
@ -848,6 +848,15 @@ PyArg_ParseTuple(args, "O|O:ref", &object, &callback)
|
|||
to a Python Unicode object. If the Unicode buffer pointer
|
||||
is \NULL, the length is ignored and \code{None} is returned.
|
||||
|
||||
\item[\samp{U} (string) {[char *]}]
|
||||
Convert a null-terminated C string to a Python unicode object.
|
||||
If the C string pointer is \NULL, \code{None} is used.
|
||||
|
||||
\item[\samp{U\#} (string) {[char *, int]}]
|
||||
Convert a C string and its length to a Python unicode object.
|
||||
If the C string pointer is \NULL, the length is ignored and \code{None}
|
||||
is returned.
|
||||
|
||||
\item[\samp{i} (integer) {[int]}]
|
||||
Convert a plain C \ctype{int} to a Python integer object.
|
||||
|
||||
|
|
|
@ -173,7 +173,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
|
||||
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
|
||||
# define PyUnicode_FromString PyUnicodeUCS2_FromString
|
||||
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
|
||||
# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
|
||||
# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
|
||||
# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
|
||||
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
|
||||
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
|
||||
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
|
||||
|
@ -252,6 +254,9 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
|||
# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
|
||||
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
|
||||
# define PyUnicode_FromString PyUnicodeUCS4_FromString
|
||||
# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
|
||||
# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
|
||||
# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
|
||||
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
|
||||
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
|
||||
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
|
||||
|
@ -429,6 +434,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
|
|||
Py_ssize_t size /* size of buffer */
|
||||
);
|
||||
|
||||
/* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
|
||||
const char *u, /* char buffer */
|
||||
Py_ssize_t size /* size of buffer */
|
||||
);
|
||||
|
||||
/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
|
||||
Latin-1 encoded bytes */
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_FromString(
|
||||
|
@ -510,6 +521,9 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
|
|||
register PyObject *obj /* Object */
|
||||
);
|
||||
|
||||
PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list);
|
||||
PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...);
|
||||
|
||||
/* --- wchar_t support for platforms which support it --------------------- */
|
||||
|
||||
#ifdef HAVE_WCHAR_H
|
||||
|
|
|
@ -21,43 +21,43 @@ class PosReturn:
|
|||
# A UnicodeEncodeError object with a bad start attribute
|
||||
class BadStartUnicodeEncodeError(UnicodeEncodeError):
|
||||
def __init__(self):
|
||||
UnicodeEncodeError.__init__(self, str8("ascii"), "", 0, 1, str8("bad"))
|
||||
UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad")
|
||||
self.start = []
|
||||
|
||||
# A UnicodeEncodeError object with a bad object attribute
|
||||
class BadObjectUnicodeEncodeError(UnicodeEncodeError):
|
||||
def __init__(self):
|
||||
UnicodeEncodeError.__init__(self, str8("ascii"), "", 0, 1, str8("bad"))
|
||||
UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad")
|
||||
self.object = []
|
||||
|
||||
# A UnicodeDecodeError object without an end attribute
|
||||
class NoEndUnicodeDecodeError(UnicodeDecodeError):
|
||||
def __init__(self):
|
||||
UnicodeDecodeError.__init__(self, str8("ascii"), b"", 0, 1, str8("bad"))
|
||||
UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad")
|
||||
del self.end
|
||||
|
||||
# A UnicodeDecodeError object with a bad object attribute
|
||||
class BadObjectUnicodeDecodeError(UnicodeDecodeError):
|
||||
def __init__(self):
|
||||
UnicodeDecodeError.__init__(self, str8("ascii"), b"", 0, 1, str8("bad"))
|
||||
UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad")
|
||||
self.object = []
|
||||
|
||||
# A UnicodeTranslateError object without a start attribute
|
||||
class NoStartUnicodeTranslateError(UnicodeTranslateError):
|
||||
def __init__(self):
|
||||
UnicodeTranslateError.__init__(self, "", 0, 1, str8("bad"))
|
||||
UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
|
||||
del self.start
|
||||
|
||||
# A UnicodeTranslateError object without an end attribute
|
||||
class NoEndUnicodeTranslateError(UnicodeTranslateError):
|
||||
def __init__(self):
|
||||
UnicodeTranslateError.__init__(self, "", 0, 1, str8("bad"))
|
||||
UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
|
||||
del self.end
|
||||
|
||||
# A UnicodeTranslateError object without an object attribute
|
||||
class NoObjectUnicodeTranslateError(UnicodeTranslateError):
|
||||
def __init__(self):
|
||||
UnicodeTranslateError.__init__(self, "", 0, 1, str8("bad"))
|
||||
UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
|
||||
del self.object
|
||||
|
||||
class CodecCallbackTest(unittest.TestCase):
|
||||
|
@ -328,73 +328,73 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
def test_unicodeencodeerror(self):
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeEncodeError,
|
||||
[str8("ascii"), "g\xfcrk", 1, 2, str8("ouch")],
|
||||
["ascii", "g\xfcrk", 1, 2, "ouch"],
|
||||
"'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeEncodeError,
|
||||
[str8("ascii"), "g\xfcrk", 1, 4, str8("ouch")],
|
||||
["ascii", "g\xfcrk", 1, 4, "ouch"],
|
||||
"'ascii' codec can't encode characters in position 1-3: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeEncodeError,
|
||||
[str8("ascii"), "\xfcx", 0, 1, str8("ouch")],
|
||||
["ascii", "\xfcx", 0, 1, "ouch"],
|
||||
"'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeEncodeError,
|
||||
[str8("ascii"), "\u0100x", 0, 1, str8("ouch")],
|
||||
["ascii", "\u0100x", 0, 1, "ouch"],
|
||||
"'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeEncodeError,
|
||||
[str8("ascii"), "\uffffx", 0, 1, str8("ouch")],
|
||||
["ascii", "\uffffx", 0, 1, "ouch"],
|
||||
"'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
|
||||
)
|
||||
if sys.maxunicode > 0xffff:
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeEncodeError,
|
||||
[str8("ascii"), "\U00010000x", 0, 1, str8("ouch")],
|
||||
["ascii", "\U00010000x", 0, 1, "ouch"],
|
||||
"'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
|
||||
)
|
||||
|
||||
def test_unicodedecodeerror(self):
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeDecodeError,
|
||||
[str8("ascii"), b"g\xfcrk", 1, 2, str8("ouch")],
|
||||
["ascii", b"g\xfcrk", 1, 2, "ouch"],
|
||||
"'ascii' codec can't decode byte 0xfc in position 1: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeDecodeError,
|
||||
[str8("ascii"), b"g\xfcrk", 1, 3, str8("ouch")],
|
||||
["ascii", b"g\xfcrk", 1, 3, "ouch"],
|
||||
"'ascii' codec can't decode bytes in position 1-2: ouch"
|
||||
)
|
||||
|
||||
def test_unicodetranslateerror(self):
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeTranslateError,
|
||||
["g\xfcrk", 1, 2, str8("ouch")],
|
||||
["g\xfcrk", 1, 2, "ouch"],
|
||||
"can't translate character u'\\xfc' in position 1: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeTranslateError,
|
||||
["g\u0100rk", 1, 2, str8("ouch")],
|
||||
["g\u0100rk", 1, 2, "ouch"],
|
||||
"can't translate character u'\\u0100' in position 1: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeTranslateError,
|
||||
["g\uffffrk", 1, 2, str8("ouch")],
|
||||
["g\uffffrk", 1, 2, "ouch"],
|
||||
"can't translate character u'\\uffff' in position 1: ouch"
|
||||
)
|
||||
if sys.maxunicode > 0xffff:
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeTranslateError,
|
||||
["g\U00010000rk", 1, 2, str8("ouch")],
|
||||
["g\U00010000rk", 1, 2, "ouch"],
|
||||
"can't translate character u'\\U00010000' in position 1: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeTranslateError,
|
||||
["g\xfcrk", 1, 3, str8("ouch")],
|
||||
["g\xfcrk", 1, 3, "ouch"],
|
||||
"can't translate characters in position 1-2: ouch"
|
||||
)
|
||||
|
||||
|
@ -416,7 +416,7 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
self.assertRaises(
|
||||
UnicodeEncodeError,
|
||||
codecs.strict_errors,
|
||||
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))
|
||||
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")
|
||||
)
|
||||
|
||||
def test_badandgoodignoreexceptions(self):
|
||||
|
@ -435,17 +435,17 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
# If the correct exception is passed in, "ignore" returns an empty replacement
|
||||
self.assertEquals(
|
||||
codecs.ignore_errors(
|
||||
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))),
|
||||
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
|
||||
("", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.ignore_errors(
|
||||
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))),
|
||||
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")),
|
||||
("", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.ignore_errors(
|
||||
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))),
|
||||
UnicodeTranslateError("\u3042", 0, 1, "ouch")),
|
||||
("", 1)
|
||||
)
|
||||
|
||||
|
@ -475,17 +475,17 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
# With the correct exception, "replace" returns an "?" or "\ufffd" replacement
|
||||
self.assertEquals(
|
||||
codecs.replace_errors(
|
||||
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))),
|
||||
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
|
||||
("?", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.replace_errors(
|
||||
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))),
|
||||
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")),
|
||||
("\ufffd", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.replace_errors(
|
||||
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))),
|
||||
UnicodeTranslateError("\u3042", 0, 1, "ouch")),
|
||||
("\ufffd", 1)
|
||||
)
|
||||
|
||||
|
@ -506,19 +506,19 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.xmlcharrefreplace_errors,
|
||||
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))
|
||||
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.xmlcharrefreplace_errors,
|
||||
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))
|
||||
UnicodeTranslateError("\u3042", 0, 1, "ouch")
|
||||
)
|
||||
# Use the correct exception
|
||||
cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
|
||||
s = "".join(chr(c) for c in cs)
|
||||
self.assertEquals(
|
||||
codecs.xmlcharrefreplace_errors(
|
||||
UnicodeEncodeError(str8("ascii"), s, 0, len(s), str8("ouch"))
|
||||
UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
|
||||
),
|
||||
("".join("&#%d;" % ord(c) for c in s), len(s))
|
||||
)
|
||||
|
@ -540,48 +540,48 @@ class CodecCallbackTest(unittest.TestCase):
|
|||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.backslashreplace_errors,
|
||||
UnicodeDecodeError(str8("ascii"), b"\xff", 0, 1, str8("ouch"))
|
||||
UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.backslashreplace_errors,
|
||||
UnicodeTranslateError("\u3042", 0, 1, str8("ouch"))
|
||||
UnicodeTranslateError("\u3042", 0, 1, "ouch")
|
||||
)
|
||||
# Use the correct exception
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError(str8("ascii"), "\u3042", 0, 1, str8("ouch"))),
|
||||
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
|
||||
("\\u3042", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError(str8("ascii"), "\x00", 0, 1, str8("ouch"))),
|
||||
UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")),
|
||||
("\\x00", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError(str8("ascii"), "\xff", 0, 1, str8("ouch"))),
|
||||
UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")),
|
||||
("\\xff", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError(str8("ascii"), "\u0100", 0, 1, str8("ouch"))),
|
||||
UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")),
|
||||
("\\u0100", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError(str8("ascii"), "\uffff", 0, 1, str8("ouch"))),
|
||||
UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
|
||||
("\\uffff", 1)
|
||||
)
|
||||
if sys.maxunicode>0xffff:
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError(str8("ascii"), "\U00010000", 0, 1, str8("ouch"))),
|
||||
UnicodeEncodeError("ascii", "\U00010000", 0, 1, "ouch")),
|
||||
("\\U00010000", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(
|
||||
UnicodeEncodeError(str8("ascii"), "\U0010ffff", 0, 1, str8("ouch"))),
|
||||
UnicodeEncodeError("ascii", "\U0010ffff", 0, 1, "ouch")),
|
||||
("\\U0010ffff", 1)
|
||||
)
|
||||
|
||||
|
|
|
@ -1186,35 +1186,6 @@ set_ssize_t(PyObject **attr, Py_ssize_t value)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
get_string(PyObject *attr, const char *name)
|
||||
{
|
||||
if (!attr) {
|
||||
PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!PyString_Check(attr)) {
|
||||
PyErr_Format(PyExc_TypeError, "%.200s attribute must be str", name);
|
||||
return NULL;
|
||||
}
|
||||
Py_INCREF(attr);
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
set_string(PyObject **attr, const char *value)
|
||||
{
|
||||
PyObject *obj = PyString_FromString(value);
|
||||
if (!obj)
|
||||
return -1;
|
||||
Py_CLEAR(*attr);
|
||||
*attr = obj;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
get_bytes(PyObject *attr, const char *name)
|
||||
{
|
||||
|
@ -1248,16 +1219,27 @@ get_unicode(PyObject *attr, const char *name)
|
|||
return attr;
|
||||
}
|
||||
|
||||
static int
|
||||
set_unicodefromstring(PyObject **attr, const char *value)
|
||||
{
|
||||
PyObject *obj = PyUnicode_FromString(value);
|
||||
if (!obj)
|
||||
return -1;
|
||||
Py_CLEAR(*attr);
|
||||
*attr = obj;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyUnicodeEncodeError_GetEncoding(PyObject *exc)
|
||||
{
|
||||
return get_string(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
|
||||
return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyUnicodeDecodeError_GetEncoding(PyObject *exc)
|
||||
{
|
||||
return get_string(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
|
||||
return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding");
|
||||
}
|
||||
|
||||
PyObject *
|
||||
|
@ -1416,42 +1398,45 @@ PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end)
|
|||
PyObject *
|
||||
PyUnicodeEncodeError_GetReason(PyObject *exc)
|
||||
{
|
||||
return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason");
|
||||
return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason");
|
||||
}
|
||||
|
||||
|
||||
PyObject *
|
||||
PyUnicodeDecodeError_GetReason(PyObject *exc)
|
||||
{
|
||||
return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason");
|
||||
return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason");
|
||||
}
|
||||
|
||||
|
||||
PyObject *
|
||||
PyUnicodeTranslateError_GetReason(PyObject *exc)
|
||||
{
|
||||
return get_string(((PyUnicodeErrorObject *)exc)->reason, "reason");
|
||||
return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason");
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason)
|
||||
{
|
||||
return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason);
|
||||
return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason,
|
||||
reason);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason)
|
||||
{
|
||||
return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason);
|
||||
return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason,
|
||||
reason);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason)
|
||||
{
|
||||
return set_string(&((PyUnicodeErrorObject *)exc)->reason, reason);
|
||||
return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason,
|
||||
reason);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1466,11 +1451,11 @@ UnicodeError_init(PyUnicodeErrorObject *self, PyObject *args, PyObject *kwds,
|
|||
Py_CLEAR(self->reason);
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O!O!O!O!O!",
|
||||
&PyString_Type, &self->encoding,
|
||||
&PyUnicode_Type, &self->encoding,
|
||||
objecttype, &self->object,
|
||||
&PyLong_Type, &self->start,
|
||||
&PyLong_Type, &self->end,
|
||||
&PyString_Type, &self->reason)) {
|
||||
&PyUnicode_Type, &self->reason)) {
|
||||
self->encoding = self->object = self->start = self->end =
|
||||
self->reason = NULL;
|
||||
return -1;
|
||||
|
@ -1564,20 +1549,20 @@ UnicodeEncodeError_str(PyObject *self)
|
|||
PyOS_snprintf(badchar_str, sizeof(badchar_str), "u%04x", badchar);
|
||||
else
|
||||
PyOS_snprintf(badchar_str, sizeof(badchar_str), "U%08x", badchar);
|
||||
return PyString_FromFormat(
|
||||
"'%.400s' codec can't encode character u'\\%s' in position %zd: %.400s",
|
||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
|
||||
return PyUnicode_FromFormat(
|
||||
"'%U' codec can't encode character u'\\%s' in position %zd: %U",
|
||||
((PyUnicodeErrorObject *)self)->encoding,
|
||||
badchar_str,
|
||||
start,
|
||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
|
||||
((PyUnicodeErrorObject *)self)->reason
|
||||
);
|
||||
}
|
||||
return PyString_FromFormat(
|
||||
"'%.400s' codec can't encode characters in position %zd-%zd: %.400s",
|
||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
|
||||
return PyUnicode_FromFormat(
|
||||
"'%U' codec can't encode characters in position %zd-%zd: %U",
|
||||
((PyUnicodeErrorObject *)self)->encoding,
|
||||
start,
|
||||
(end-1),
|
||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
|
||||
((PyUnicodeErrorObject *)self)->reason
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1601,7 +1586,7 @@ PyUnicodeEncodeError_Create(
|
|||
const char *encoding, const Py_UNICODE *object, Py_ssize_t length,
|
||||
Py_ssize_t start, Py_ssize_t end, const char *reason)
|
||||
{
|
||||
return PyObject_CallFunction(PyExc_UnicodeEncodeError, "su#nns",
|
||||
return PyObject_CallFunction(PyExc_UnicodeEncodeError, "Uu#nnU",
|
||||
encoding, object, length, start, end, reason);
|
||||
}
|
||||
|
||||
|
@ -1626,30 +1611,30 @@ UnicodeDecodeError_str(PyObject *self)
|
|||
Py_ssize_t end = 0;
|
||||
|
||||
if (PyUnicodeDecodeError_GetStart(self, &start))
|
||||
return NULL;
|
||||
return NULL;
|
||||
|
||||
if (PyUnicodeDecodeError_GetEnd(self, &end))
|
||||
return NULL;
|
||||
return NULL;
|
||||
|
||||
if (end==start+1) {
|
||||
/* FromFormat does not support %02x, so format that separately */
|
||||
char byte[4];
|
||||
PyOS_snprintf(byte, sizeof(byte), "%02x",
|
||||
((int)PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
|
||||
return PyString_FromFormat(
|
||||
"'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
|
||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
|
||||
return PyUnicode_FromFormat(
|
||||
"'%U' codec can't decode byte 0x%s in position %zd: %U",
|
||||
((PyUnicodeErrorObject *)self)->encoding,
|
||||
byte,
|
||||
start,
|
||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
|
||||
((PyUnicodeErrorObject *)self)->reason
|
||||
);
|
||||
}
|
||||
return PyString_FromFormat(
|
||||
"'%.400s' codec can't decode bytes in position %zd-%zd: %.400s",
|
||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
|
||||
return PyUnicode_FromFormat(
|
||||
"'%U' codec can't decode bytes in position %zd-%zd: %U",
|
||||
((PyUnicodeErrorObject *)self)->encoding,
|
||||
start,
|
||||
(end-1),
|
||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
|
||||
((PyUnicodeErrorObject *)self)->reason
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1676,7 +1661,7 @@ PyUnicodeDecodeError_Create(
|
|||
assert(length < INT_MAX);
|
||||
assert(start < INT_MAX);
|
||||
assert(end < INT_MAX);
|
||||
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nns",
|
||||
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "Uy#nnU",
|
||||
encoding, object, length, start, end, reason);
|
||||
}
|
||||
|
||||
|
@ -1701,7 +1686,7 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
|
|||
&PyUnicode_Type, &self->object,
|
||||
&PyLong_Type, &self->start,
|
||||
&PyLong_Type, &self->end,
|
||||
&PyString_Type, &self->reason)) {
|
||||
&PyUnicode_Type, &self->reason)) {
|
||||
self->object = self->start = self->end = self->reason = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
@ -1736,18 +1721,18 @@ UnicodeTranslateError_str(PyObject *self)
|
|||
PyOS_snprintf(badchar_str, sizeof(badchar_str), "u%04x", badchar);
|
||||
else
|
||||
PyOS_snprintf(badchar_str, sizeof(badchar_str), "U%08x", badchar);
|
||||
return PyString_FromFormat(
|
||||
"can't translate character u'\\%s' in position %zd: %.400s",
|
||||
return PyUnicode_FromFormat(
|
||||
"can't translate character u'\\%s' in position %zd: %U",
|
||||
badchar_str,
|
||||
start,
|
||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
|
||||
((PyUnicodeErrorObject *)self)->reason
|
||||
);
|
||||
}
|
||||
return PyString_FromFormat(
|
||||
"can't translate characters in position %zd-%zd: %.400s",
|
||||
return PyUnicode_FromFormat(
|
||||
"can't translate characters in position %zd-%zd: %U",
|
||||
start,
|
||||
(end-1),
|
||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->reason)
|
||||
((PyUnicodeErrorObject *)self)->reason
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -393,15 +393,9 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
|
|||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
PyObject *PyUnicode_FromString(const char *u)
|
||||
PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
|
||||
{
|
||||
PyUnicodeObject *unicode;
|
||||
size_t size = strlen(u);
|
||||
if (size > PY_SSIZE_T_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError, "input too long");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* If the Unicode data is known at construction time, we can apply
|
||||
some optimizations which share commonly used objects. */
|
||||
if (u != NULL) {
|
||||
|
@ -441,6 +435,17 @@ PyObject *PyUnicode_FromString(const char *u)
|
|||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
PyObject *PyUnicode_FromString(const char *u)
|
||||
{
|
||||
size_t size = strlen(u);
|
||||
if (size > PY_SSIZE_T_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError, "input too long");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return PyUnicode_FromStringAndSize(u, size);
|
||||
}
|
||||
|
||||
#ifdef HAVE_WCHAR_H
|
||||
|
||||
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
||||
|
@ -473,6 +478,223 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
|
|||
return (PyObject *)unicode;
|
||||
}
|
||||
|
||||
#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
|
||||
|
||||
PyObject *
|
||||
PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||
{
|
||||
va_list count;
|
||||
Py_ssize_t n = 0;
|
||||
const char* f;
|
||||
Py_UNICODE *s;
|
||||
PyObject *string;
|
||||
/* used by sprintf */
|
||||
char buffer[21];
|
||||
const char *copy;
|
||||
|
||||
#ifdef VA_LIST_IS_ARRAY
|
||||
Py_MEMCPY(count, vargs, sizeof(va_list));
|
||||
#else
|
||||
#ifdef __va_copy
|
||||
__va_copy(count, vargs);
|
||||
#else
|
||||
count = vargs;
|
||||
#endif
|
||||
#endif
|
||||
/* step 1: figure out how large a buffer we need */
|
||||
for (f = format; *f; f++) {
|
||||
if (*f == '%') {
|
||||
const char* p = f;
|
||||
while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
|
||||
;
|
||||
|
||||
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
|
||||
* they don't affect the amount of space we reserve.
|
||||
*/
|
||||
if ((*f == 'l' || *f == 'z') &&
|
||||
(f[1] == 'd' || f[1] == 'u'))
|
||||
++f;
|
||||
|
||||
switch (*f) {
|
||||
case 'c':
|
||||
(void)va_arg(count, int);
|
||||
/* fall through... */
|
||||
case '%':
|
||||
n++;
|
||||
break;
|
||||
case 'd': case 'u': case 'i': case 'x':
|
||||
(void) va_arg(count, int);
|
||||
/* 20 bytes is enough to hold a 64-bit
|
||||
integer. Decimal takes the most space.
|
||||
This isn't enough for octal. */
|
||||
n += 20;
|
||||
break;
|
||||
case 's':
|
||||
n += strlen(va_arg(count, char*));
|
||||
break;
|
||||
case 'U':
|
||||
{
|
||||
PyObject *obj = va_arg(count, PyObject *);
|
||||
assert(obj && PyUnicode_Check(obj));
|
||||
n += PyUnicode_GET_SIZE(obj);
|
||||
break;
|
||||
}
|
||||
case 'p':
|
||||
(void) va_arg(count, int);
|
||||
/* maximum 64-bit pointer representation:
|
||||
* 0xffffffffffffffff
|
||||
* so 19 characters is enough.
|
||||
* XXX I count 18 -- what's the extra for?
|
||||
*/
|
||||
n += 19;
|
||||
break;
|
||||
default:
|
||||
/* if we stumble upon an unknown
|
||||
formatting code, copy the rest of
|
||||
the format string to the output
|
||||
string. (we cannot just skip the
|
||||
code, since there's no way to know
|
||||
what's in the argument list) */
|
||||
n += strlen(p);
|
||||
goto expand;
|
||||
}
|
||||
} else
|
||||
n++;
|
||||
}
|
||||
expand:
|
||||
/* step 2: fill the buffer */
|
||||
/* Since we've analyzed how much space we need for the worst case,
|
||||
we don't have to resize the string. */
|
||||
string = PyUnicode_FromUnicode(NULL, n);
|
||||
if (!string)
|
||||
return NULL;
|
||||
|
||||
s = PyUnicode_AS_UNICODE(string);
|
||||
|
||||
for (f = format; *f; f++) {
|
||||
if (*f == '%') {
|
||||
const char* p = f++;
|
||||
int longflag = 0;
|
||||
int size_tflag = 0;
|
||||
/* parse the width.precision part (we're only
|
||||
interested in the precision value, if any) */
|
||||
n = 0;
|
||||
while (isdigit(Py_CHARMASK(*f)))
|
||||
n = (n*10) + *f++ - '0';
|
||||
if (*f == '.') {
|
||||
f++;
|
||||
n = 0;
|
||||
while (isdigit(Py_CHARMASK(*f)))
|
||||
n = (n*10) + *f++ - '0';
|
||||
}
|
||||
while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
|
||||
f++;
|
||||
/* handle the long flag, but only for %ld and %lu.
|
||||
others can be added when necessary. */
|
||||
if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
|
||||
longflag = 1;
|
||||
++f;
|
||||
}
|
||||
/* handle the size_t flag. */
|
||||
if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
|
||||
size_tflag = 1;
|
||||
++f;
|
||||
}
|
||||
|
||||
switch (*f) {
|
||||
case 'c':
|
||||
*s++ = va_arg(vargs, int);
|
||||
break;
|
||||
case 'd':
|
||||
if (longflag)
|
||||
sprintf(buffer, "%ld", va_arg(vargs, long));
|
||||
else if (size_tflag)
|
||||
sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
|
||||
va_arg(vargs, Py_ssize_t));
|
||||
else
|
||||
sprintf(buffer, "%d", va_arg(vargs, int));
|
||||
appendstring(buffer);
|
||||
break;
|
||||
case 'u':
|
||||
if (longflag)
|
||||
sprintf(buffer, "%lu",
|
||||
va_arg(vargs, unsigned long));
|
||||
else if (size_tflag)
|
||||
sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
|
||||
va_arg(vargs, size_t));
|
||||
else
|
||||
sprintf(buffer, "%u",
|
||||
va_arg(vargs, unsigned int));
|
||||
appendstring(buffer);
|
||||
break;
|
||||
case 'i':
|
||||
sprintf(buffer, "%i", va_arg(vargs, int));
|
||||
appendstring(buffer);
|
||||
break;
|
||||
case 'x':
|
||||
sprintf(buffer, "%x", va_arg(vargs, int));
|
||||
appendstring(buffer);
|
||||
break;
|
||||
case 's':
|
||||
p = va_arg(vargs, char*);
|
||||
appendstring(p);
|
||||
break;
|
||||
case 'U':
|
||||
{
|
||||
PyObject *obj = va_arg(vargs, PyObject *);
|
||||
Py_UNICODE *ucopy = PyUnicode_AS_UNICODE(obj);
|
||||
Py_ssize_t usize = PyUnicode_GET_SIZE(obj);
|
||||
Py_ssize_t upos;
|
||||
for (upos = 0; upos<usize;)
|
||||
*s++ = ucopy[upos++];
|
||||
break;
|
||||
}
|
||||
case 'p':
|
||||
sprintf(buffer, "%p", va_arg(vargs, void*));
|
||||
/* %p is ill-defined: ensure leading 0x. */
|
||||
if (buffer[1] == 'X')
|
||||
buffer[1] = 'x';
|
||||
else if (buffer[1] != 'x') {
|
||||
memmove(buffer+2, buffer, strlen(buffer)+1);
|
||||
buffer[0] = '0';
|
||||
buffer[1] = 'x';
|
||||
}
|
||||
appendstring(buffer);
|
||||
break;
|
||||
case '%':
|
||||
*s++ = '%';
|
||||
break;
|
||||
default:
|
||||
appendstring(p);
|
||||
goto end;
|
||||
}
|
||||
} else
|
||||
*s++ = *f;
|
||||
}
|
||||
|
||||
end:
|
||||
_PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string));
|
||||
return string;
|
||||
}
|
||||
|
||||
#undef appendstring
|
||||
|
||||
PyObject *
|
||||
PyUnicode_FromFormat(const char *format, ...)
|
||||
{
|
||||
PyObject* ret;
|
||||
va_list vargs;
|
||||
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
va_start(vargs, format);
|
||||
#else
|
||||
va_start(vargs);
|
||||
#endif
|
||||
ret = PyUnicode_FromFormatV(format, vargs);
|
||||
va_end(vargs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode,
|
||||
wchar_t *w,
|
||||
Py_ssize_t size)
|
||||
|
|
|
@ -424,6 +424,39 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
|
|||
return v;
|
||||
}
|
||||
|
||||
case 'U':
|
||||
{
|
||||
PyObject *v;
|
||||
char *str = va_arg(*p_va, char *);
|
||||
Py_ssize_t n;
|
||||
if (**p_format == '#') {
|
||||
++*p_format;
|
||||
if (flags & FLAG_SIZE_T)
|
||||
n = va_arg(*p_va, Py_ssize_t);
|
||||
else
|
||||
n = va_arg(*p_va, int);
|
||||
}
|
||||
else
|
||||
n = -1;
|
||||
if (str == NULL) {
|
||||
v = Py_None;
|
||||
Py_INCREF(v);
|
||||
}
|
||||
else {
|
||||
if (n < 0) {
|
||||
size_t m = strlen(str);
|
||||
if (m > PY_SSIZE_T_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"string too long for Python string");
|
||||
return NULL;
|
||||
}
|
||||
n = (Py_ssize_t)m;
|
||||
}
|
||||
v = PyUnicode_FromStringAndSize(str, n);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
case 'y':
|
||||
{
|
||||
PyObject *v;
|
||||
|
|
Loading…
Reference in New Issue