PEP 293 implemention (from SF patch http://www.python.org/sf/432401)
This commit is contained in:
parent
94fab762de
commit
3aeb632c31
|
@ -17,7 +17,7 @@
|
|||
|
||||
This module defines base classes for standard Python codecs (encoders
|
||||
and decoders) and provides access to the internal Python codec
|
||||
registry which manages the codec lookup process.
|
||||
registry which manages the codec and error handling lookup process.
|
||||
|
||||
It defines the following functions:
|
||||
|
||||
|
@ -98,6 +98,43 @@ Raises a \exception{LookupError} in case the encoding cannot be found.
|
|||
To simplify working with encoded files or stream, the module
|
||||
also defines these utility functions:
|
||||
|
||||
\begin{funcdesc}{register_error}{name, error_handler}
|
||||
Register the error handling function \var{error_handler} under the
|
||||
name \var{name}. \vari{error_handler} will be called during encoding
|
||||
and decoding in case of an error, when \var{name} is specified as the
|
||||
errors parameter. \var{error_handler} will be called with an
|
||||
\exception{UnicodeEncodeError}, \exception{UnicodeDecodeError} or
|
||||
\exception{UnicodeTranslateError} instance and must return a tuple
|
||||
with a replacement for the unencodable/undecodable part of the input
|
||||
and a position where encoding/decoding should continue.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{lookup_error}{name}
|
||||
Return the error handler previously register under the name \var{name}.
|
||||
|
||||
Raises a \exception{LookupError} in case the handler cannot be found.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{strict_errors}{exception}
|
||||
Implements the \code{strict} error handling.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{replace_errors}{exception}
|
||||
Implements the \code{replace} error handling.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{ignore_errors}{exception}
|
||||
Implements the \code{ignore} error handling.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{xmlcharrefreplace_errors_errors}{exception}
|
||||
Implements the \code{xmlcharrefreplace} error handling.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{backslashreplace_errors_errors}{exception}
|
||||
Implements the \code{backslashreplace} error handling.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{open}{filename, mode\optional{, encoding\optional{,
|
||||
errors\optional{, buffering}}}}
|
||||
Open an encoded file using the given \var{mode} and return
|
||||
|
|
|
@ -335,6 +335,24 @@ Raised when an \keyword{assert} statement fails.
|
|||
\versionadded{2.0}
|
||||
\end{excdesc}
|
||||
|
||||
\begin{excdesc}{UnicodeEncodeError}
|
||||
Raised when a Unicode-related error occurs during encoding. It
|
||||
is a subclass of \exception{UnicodeError}.
|
||||
\versionadded{2.3}
|
||||
\end{excdesc}
|
||||
|
||||
\begin{excdesc}{UnicodeDecodeError}
|
||||
Raised when a Unicode-related error occurs during decoding. It
|
||||
is a subclass of \exception{UnicodeError}.
|
||||
\versionadded{2.3}
|
||||
\end{excdesc}
|
||||
|
||||
\begin{excdesc}{UnicodeTranslateError}
|
||||
Raised when a Unicode-related error occurs during translating. It
|
||||
is a subclass of \exception{UnicodeError}.
|
||||
\versionadded{2.3}
|
||||
\end{excdesc}
|
||||
|
||||
\begin{excdesc}{ValueError}
|
||||
Raised when a built-in operation or function receives an argument
|
||||
that has the right type but an inappropriate value, and the
|
||||
|
@ -426,6 +444,9 @@ The class hierarchy for built-in exceptions is:
|
|||
| | +-- FloatingPointError
|
||||
| +-- ValueError
|
||||
| | +-- UnicodeError
|
||||
| | +-- UnicodeEncodeError
|
||||
| | +-- UnicodeDecodeError
|
||||
| | +-- UnicodeTranslateError
|
||||
| +-- ReferenceError
|
||||
| +-- SystemError
|
||||
| +-- MemoryError
|
||||
|
|
|
@ -117,6 +117,36 @@ PyAPI_FUNC(PyObject *) PyCodec_StreamWriter(
|
|||
const char *errors
|
||||
);
|
||||
|
||||
/* Unicode encoding error handling callback registry API */
|
||||
|
||||
/* Register the error handling callback function error under the name
|
||||
name. This function will be called by the codec when it encounters
|
||||
unencodable characters/undecodable bytes and doesn't know the
|
||||
callback name, when name is specified as the error parameter
|
||||
in the call to the encode/decode function.
|
||||
Return 0 on success, -1 on error */
|
||||
PyAPI_FUNC(int) PyCodec_RegisterError(const char *name, PyObject *error);
|
||||
|
||||
/* Lookup the error handling callback function registered under the
|
||||
name error. As a special case NULL can be passed, in which case
|
||||
the error handling callback for "strict" will be returned. */
|
||||
PyAPI_FUNC(PyObject *) PyCodec_LookupError(const char *name);
|
||||
|
||||
/* raise exc as an exception */
|
||||
PyAPI_FUNC(PyObject *) PyCodec_StrictErrors(PyObject *exc);
|
||||
|
||||
/* ignore the unicode error, skipping the faulty input */
|
||||
PyAPI_FUNC(PyObject *) PyCodec_IgnoreErrors(PyObject *exc);
|
||||
|
||||
/* replace the unicode error with ? or U+FFFD */
|
||||
PyAPI_FUNC(PyObject *) PyCodec_ReplaceErrors(PyObject *exc);
|
||||
|
||||
/* replace the unicode encode error with XML character references */
|
||||
PyAPI_FUNC(PyObject *) PyCodec_XMLCharRefReplaceErrors(PyObject *exc);
|
||||
|
||||
/* replace the unicode encode error with backslash escapes (\x, \u and \U) */
|
||||
PyAPI_FUNC(PyObject *) PyCodec_BackslashReplaceErrors(PyObject *exc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -54,6 +54,9 @@ PyAPI_DATA(PyObject *) PyExc_SystemExit;
|
|||
PyAPI_DATA(PyObject *) PyExc_TypeError;
|
||||
PyAPI_DATA(PyObject *) PyExc_UnboundLocalError;
|
||||
PyAPI_DATA(PyObject *) PyExc_UnicodeError;
|
||||
PyAPI_DATA(PyObject *) PyExc_UnicodeEncodeError;
|
||||
PyAPI_DATA(PyObject *) PyExc_UnicodeDecodeError;
|
||||
PyAPI_DATA(PyObject *) PyExc_UnicodeTranslateError;
|
||||
PyAPI_DATA(PyObject *) PyExc_ValueError;
|
||||
PyAPI_DATA(PyObject *) PyExc_ZeroDivisionError;
|
||||
#ifdef MS_WINDOWS
|
||||
|
@ -114,6 +117,69 @@ PyAPI_FUNC(void) PyErr_SetInterrupt(void);
|
|||
PyAPI_FUNC(void) PyErr_SyntaxLocation(char *, int);
|
||||
PyAPI_FUNC(PyObject *) PyErr_ProgramText(char *, int);
|
||||
|
||||
/* The following functions are used to create and modify unicode
|
||||
exceptions from C */
|
||||
/* create a UnicodeDecodeError object */
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_Create(
|
||||
const char *, const char *, int, int, int, const char *);
|
||||
|
||||
/* create a UnicodeEncodeError object */
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_Create(
|
||||
const char *, const Py_UNICODE *, int, int, int, const char *);
|
||||
|
||||
/* create a UnicodeTranslateError object */
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create(
|
||||
const Py_UNICODE *, int, int, int, const char *);
|
||||
|
||||
/* get the encoding attribute */
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetEncoding(PyObject *);
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetEncoding(PyObject *);
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetEncoding(PyObject *);
|
||||
|
||||
/* get the object attribute */
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetObject(PyObject *);
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetObject(PyObject *);
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetObject(PyObject *);
|
||||
|
||||
/* get the value of the start attribute (the int * may not be NULL)
|
||||
return 0 on success, -1 on failure */
|
||||
PyAPI_FUNC(int) PyUnicodeEncodeError_GetStart(PyObject *, int *);
|
||||
PyAPI_FUNC(int) PyUnicodeDecodeError_GetStart(PyObject *, int *);
|
||||
PyAPI_FUNC(int) PyUnicodeTranslateError_GetStart(PyObject *, int *);
|
||||
|
||||
/* assign a new value to the start attribute
|
||||
return 0 on success, -1 on failure */
|
||||
PyAPI_FUNC(int) PyUnicodeEncodeError_SetStart(PyObject *, int);
|
||||
PyAPI_FUNC(int) PyUnicodeDecodeError_SetStart(PyObject *, int);
|
||||
PyAPI_FUNC(int) PyUnicodeTranslateError_SetStart(PyObject *, int);
|
||||
|
||||
/* get the value of the end attribute (the int *may not be NULL)
|
||||
return 0 on success, -1 on failure */
|
||||
PyAPI_FUNC(int) PyUnicodeEncodeError_GetEnd(PyObject *, int *);
|
||||
PyAPI_FUNC(int) PyUnicodeDecodeError_GetEnd(PyObject *, int *);
|
||||
PyAPI_FUNC(int) PyUnicodeTranslateError_GetEnd(PyObject *, int *);
|
||||
|
||||
/* assign a new value to the end attribute
|
||||
return 0 on success, -1 on failure */
|
||||
PyAPI_FUNC(int) PyUnicodeEncodeError_SetEnd(PyObject *, int);
|
||||
PyAPI_FUNC(int) PyUnicodeDecodeError_SetEnd(PyObject *, int);
|
||||
PyAPI_FUNC(int) PyUnicodeTranslateError_SetEnd(PyObject *, int);
|
||||
|
||||
/* get the value of the reason attribute */
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetReason(PyObject *);
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetReason(PyObject *);
|
||||
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetReason(PyObject *);
|
||||
|
||||
/* assign a new value to the reason attribute
|
||||
return 0 on success, -1 on failure */
|
||||
PyAPI_FUNC(int) PyUnicodeEncodeError_SetReason(
|
||||
PyObject *, const char *);
|
||||
PyAPI_FUNC(int) PyUnicodeDecodeError_SetReason(
|
||||
PyObject *, const char *);
|
||||
PyAPI_FUNC(int) PyUnicodeTranslateError_SetReason(
|
||||
PyObject *, const char *);
|
||||
|
||||
|
||||
/* These APIs aren't really part of the error implementation, but
|
||||
often needed to format error messages; the native C lib APIs are
|
||||
not available on all platforms, which is why we provide emulations
|
||||
|
|
|
@ -20,7 +20,10 @@ except ImportError, why:
|
|||
__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
|
||||
"BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
|
||||
"BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
|
||||
"BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE"]
|
||||
"BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
|
||||
"strict_errors", "ignore_errors", "replace_errors",
|
||||
"xmlcharrefreplace_errors",
|
||||
"register_error", "lookup_error"]
|
||||
|
||||
### Constants
|
||||
|
||||
|
@ -632,6 +635,14 @@ def make_encoding_map(decoding_map):
|
|||
m[v] = None
|
||||
return m
|
||||
|
||||
### error handlers
|
||||
|
||||
strict_errors = lookup_error("strict")
|
||||
ignore_errors = lookup_error("ignore")
|
||||
replace_errors = lookup_error("replace")
|
||||
xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
|
||||
backslashreplace_errors = lookup_error("backslashreplace")
|
||||
|
||||
# Tell modulefinder that using codecs probably needs the encodings
|
||||
# package
|
||||
_false = 0
|
||||
|
|
|
@ -0,0 +1,483 @@
|
|||
import test.test_support, unittest
|
||||
import sys, codecs, htmlentitydefs, unicodedata
|
||||
|
||||
class CodecCallbackTest(unittest.TestCase):
|
||||
|
||||
def test_xmlcharrefreplace(self):
|
||||
# replace unencodable characters which numeric character entities.
|
||||
# For ascii, latin-1 and charmaps this is completely implemented
|
||||
# in C and should be reasonably fast.
|
||||
s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
|
||||
self.assertEqual(
|
||||
s.encode("ascii", "xmlcharrefreplace"),
|
||||
"スパモ änd eggs"
|
||||
)
|
||||
self.assertEqual(
|
||||
s.encode("latin-1", "xmlcharrefreplace"),
|
||||
"スパモ \xe4nd eggs"
|
||||
)
|
||||
|
||||
def test_xmlcharnamereplace(self):
|
||||
# This time use a named character entity for unencodable
|
||||
# characters, if one is available.
|
||||
names = {}
|
||||
for (key, value) in htmlentitydefs.entitydefs.items():
|
||||
if len(value)==1:
|
||||
names[unicode(value, "latin-1")] = unicode(key, "latin-1")
|
||||
else:
|
||||
names[unichr(int(value[2:-1]))] = unicode(key, "latin-1")
|
||||
|
||||
def xmlcharnamereplace(exc):
|
||||
if not isinstance(exc, UnicodeEncodeError):
|
||||
raise TypeError("don't know how to handle %r" % exc)
|
||||
l = []
|
||||
for c in exc.object[exc.start:exc.end]:
|
||||
try:
|
||||
l.append(u"&%s;" % names[c])
|
||||
except KeyError:
|
||||
l.append(u"&#%d;" % ord(c))
|
||||
return (u"".join(l), exc.end)
|
||||
|
||||
codecs.register_error(
|
||||
"test.xmlcharnamereplace", xmlcharnamereplace)
|
||||
|
||||
sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
|
||||
sout = "«ℜ» = ⟨ሴ€⟩"
|
||||
self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
|
||||
sout = "\xabℜ\xbb = ⟨ሴ€⟩"
|
||||
self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
|
||||
sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩"
|
||||
self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
|
||||
|
||||
def test_uninamereplace(self):
|
||||
# We're using the names from the unicode database this time,
|
||||
# and we're doing "systax highlighting" here, i.e. we include
|
||||
# the replaced text in ANSI escape sequences. For this it is
|
||||
# useful that the error handler is not called for every single
|
||||
# unencodable character, but for a complete sequence of
|
||||
# unencodable characters, otherwise we would output many
|
||||
# unneccessary escape sequences.
|
||||
|
||||
def uninamereplace(exc):
|
||||
if not isinstance(exc, UnicodeEncodeError):
|
||||
raise TypeError("don't know how to handle %r" % exc)
|
||||
l = []
|
||||
for c in exc.object[exc.start:exc.end]:
|
||||
l.append(unicodedata.name(c, u"0x%x" % ord(c)))
|
||||
return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
|
||||
|
||||
codecs.register_error(
|
||||
"test.uninamereplace", uninamereplace)
|
||||
|
||||
sin = u"\xac\u1234\u20ac\u8000"
|
||||
sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, 0x8000\033[0m"
|
||||
self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
|
||||
|
||||
sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, 0x8000\033[0m"
|
||||
self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
|
||||
|
||||
sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1m0x8000\033[0m"
|
||||
self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
|
||||
|
||||
def test_backslashescape(self):
|
||||
# Does the same as the "unicode-escape" encoding, but with different
|
||||
# base encodings.
|
||||
sin = u"a\xac\u1234\u20ac\u8000"
|
||||
if sys.maxunicode > 0xffff:
|
||||
sin += unichr(sys.maxunicode)
|
||||
sout = "a\\xac\\u1234\\u20ac\\u8000"
|
||||
if sys.maxunicode > 0xffff:
|
||||
sout += "\\U%08x" % sys.maxunicode
|
||||
self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
|
||||
|
||||
sout = "a\xac\\u1234\\u20ac\\u8000"
|
||||
if sys.maxunicode > 0xffff:
|
||||
sout += "\\U%08x" % sys.maxunicode
|
||||
self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
|
||||
|
||||
sout = "a\xac\\u1234\xa4\\u8000"
|
||||
if sys.maxunicode > 0xffff:
|
||||
sout += "\\U%08x" % sys.maxunicode
|
||||
self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
|
||||
|
||||
def test_relaxedutf8(self):
|
||||
# This is the test for a decoding callback handler,
|
||||
# that relaxes the UTF-8 minimal encoding restriction.
|
||||
# A null byte that is encoded as "\xc0\x80" will be
|
||||
# decoded as a null byte. All other illegal sequences
|
||||
# will be handled strictly.
|
||||
def relaxedutf8(exc):
|
||||
if not isinstance(exc, UnicodeDecodeError):
|
||||
raise TypeError("don't know how to handle %r" % exc)
|
||||
if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
|
||||
return (u"\x00", exc.start+2) # retry after two bytes
|
||||
else:
|
||||
raise exc
|
||||
|
||||
codecs.register_error(
|
||||
"test.relaxedutf8", relaxedutf8)
|
||||
|
||||
sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
|
||||
sout = u"a\x00b\x00c\xfc\x00\x00"
|
||||
self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
|
||||
sin = "\xc0\x80\xc0\x81"
|
||||
self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
|
||||
|
||||
def test_charmapencode(self):
|
||||
# For charmap encodings the replacement string will be
|
||||
# mapped through the encoding again. This means, that
|
||||
# to be able to use e.g. the "replace" handler, the
|
||||
# charmap has to have a mapping for "?".
|
||||
charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
|
||||
sin = u"abc"
|
||||
sout = "AABBCC"
|
||||
self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
|
||||
|
||||
sin = u"abcA"
|
||||
self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
|
||||
|
||||
charmap[ord("?")] = "XYZ"
|
||||
sin = u"abcDEF"
|
||||
sout = "AABBCCXYZXYZXYZ"
|
||||
self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
|
||||
|
||||
charmap[ord("?")] = u"XYZ"
|
||||
self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
|
||||
|
||||
charmap[ord("?")] = u"XYZ"
|
||||
self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
|
||||
|
||||
def test_callbacks(self):
|
||||
def handler1(exc):
|
||||
if not isinstance(exc, UnicodeEncodeError) \
|
||||
and not isinstance(exc, UnicodeDecodeError):
|
||||
raise TypeError("don't know how to handle %r" % exc)
|
||||
l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
|
||||
return (u"[%s]" % u"".join(l), exc.end)
|
||||
|
||||
codecs.register_error("test.handler1", handler1)
|
||||
|
||||
def handler2(exc):
|
||||
if not isinstance(exc, UnicodeDecodeError):
|
||||
raise TypeError("don't know how to handle %r" % exc)
|
||||
l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
|
||||
return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
|
||||
|
||||
codecs.register_error("test.handler2", handler2)
|
||||
|
||||
s = "\x00\x81\x7f\x80\xff"
|
||||
|
||||
self.assertEqual(
|
||||
s.decode("ascii", "test.handler1"),
|
||||
u"\x00[<129>]\x7f[<128>][<255>]"
|
||||
)
|
||||
self.assertEqual(
|
||||
s.decode("ascii", "test.handler2"),
|
||||
u"\x00[<129>][<128>]"
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
"\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
|
||||
u"\u3042[<92><117><51><120>]xx"
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
"\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
|
||||
u"\u3042[<92><117><51><120><120>]"
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
|
||||
u"z[<98>][<99>]"
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
|
||||
u"g[<252><223>]rk"
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
u"g\xfc\xdf".encode("ascii", "test.handler1"),
|
||||
u"g[<252><223>]"
|
||||
)
|
||||
|
||||
def test_longstrings(self):
|
||||
# test long strings to check for memory overflow problems
|
||||
errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"]
|
||||
# register the handlers under different names,
|
||||
# to prevent the codec from recognizing the name
|
||||
for err in errors:
|
||||
codecs.register_error("test." + err, codecs.lookup_error(err))
|
||||
l = 1000
|
||||
errors += [ "test." + err for err in errors ]
|
||||
for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
|
||||
for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"):
|
||||
for err in errors:
|
||||
try:
|
||||
uni.encode(enc, err)
|
||||
except UnicodeError:
|
||||
pass
|
||||
|
||||
def check_exceptionobjectargs(self, exctype, args, msg):
|
||||
# Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
|
||||
# check with one missing argument
|
||||
self.assertRaises(TypeError, exctype, *args[:-1])
|
||||
# check with one missing argument
|
||||
self.assertRaises(TypeError, exctype, *(args + ["too much"]))
|
||||
# check with one argument of the wrong type
|
||||
wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
|
||||
for i in xrange(len(args)):
|
||||
for wrongarg in wrongargs:
|
||||
if type(wrongarg) is type(args[i]):
|
||||
continue
|
||||
# build argument array
|
||||
callargs = []
|
||||
for j in xrange(len(args)):
|
||||
if i==j:
|
||||
callargs.append(wrongarg)
|
||||
else:
|
||||
callargs.append(args[i])
|
||||
self.assertRaises(TypeError, exctype, *callargs)
|
||||
exc = exctype(*args)
|
||||
self.assertEquals(str(exc), msg)
|
||||
|
||||
def test_unicodeencodeerror(self):
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeEncodeError,
|
||||
["ascii", u"g\xfcrk", 1, 2, "ouch"],
|
||||
"'ascii' codec can't encode character '\ufc' in position 1: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeEncodeError,
|
||||
["ascii", u"g\xfcrk", 1, 4, "ouch"],
|
||||
"'ascii' codec can't encode characters in position 1-3: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeEncodeError,
|
||||
["ascii", u"\xfcx", 0, 1, "ouch"],
|
||||
"'ascii' codec can't encode character '\ufc' in position 0: ouch"
|
||||
)
|
||||
|
||||
def test_unicodedecodeerror(self):
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeDecodeError,
|
||||
["ascii", "g\xfcrk", 1, 2, "ouch"],
|
||||
"'ascii' codec can't decode byte 0xfc in position 1: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeDecodeError,
|
||||
["ascii", "g\xfcrk", 1, 3, "ouch"],
|
||||
"'ascii' codec can't decode bytes in position 1-2: ouch"
|
||||
)
|
||||
|
||||
def test_unicodetranslateerror(self):
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeTranslateError,
|
||||
[u"g\xfcrk", 1, 2, "ouch"],
|
||||
"can't translate character '\\ufc' in position 1: ouch"
|
||||
)
|
||||
self.check_exceptionobjectargs(
|
||||
UnicodeTranslateError,
|
||||
[u"g\xfcrk", 1, 3, "ouch"],
|
||||
"can't translate characters in position 1-2: ouch"
|
||||
)
|
||||
|
||||
def test_badandgoodstrictexceptions(self):
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.strict_errors,
|
||||
42
|
||||
)
|
||||
self.assertRaises(
|
||||
Exception,
|
||||
codecs.strict_errors,
|
||||
Exception("ouch")
|
||||
)
|
||||
|
||||
self.assertRaises(
|
||||
UnicodeEncodeError,
|
||||
codecs.strict_errors,
|
||||
UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
|
||||
)
|
||||
|
||||
def test_badandgoodignoreexceptions(self):
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.ignore_errors,
|
||||
42
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.ignore_errors,
|
||||
UnicodeError("ouch")
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
|
||||
(u"", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
|
||||
(u"", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
|
||||
(u"", 1)
|
||||
)
|
||||
|
||||
def test_badandgoodreplaceexceptions(self):
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.replace_errors,
|
||||
42
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.replace_errors,
|
||||
UnicodeError("ouch")
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
|
||||
(u"?", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
|
||||
(u"\ufffd", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
|
||||
(u"\ufffd", 1)
|
||||
)
|
||||
|
||||
def test_badandgoodxmlcharrefreplaceexceptions(self):
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.xmlcharrefreplace_errors,
|
||||
42
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.xmlcharrefreplace_errors,
|
||||
UnicodeError("ouch")
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.xmlcharrefreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
|
||||
(u"&#%d;" % 0x3042, 1)
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.xmlcharrefreplace_errors,
|
||||
UnicodeError("ouch")
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.xmlcharrefreplace_errors,
|
||||
UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.xmlcharrefreplace_errors,
|
||||
UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
|
||||
)
|
||||
|
||||
def test_badandgoodbackslashreplaceexceptions(self):
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.backslashreplace_errors,
|
||||
42
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.backslashreplace_errors,
|
||||
UnicodeError("ouch")
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
|
||||
(u"\\u3042", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
|
||||
(u"\\x00", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
|
||||
(u"\\xff", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
|
||||
(u"\\u0100", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
|
||||
(u"\\uffff", 1)
|
||||
)
|
||||
if sys.maxunicode>0xffff:
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
|
||||
(u"\\U00010000", 1)
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
|
||||
(u"\\U0010ffff", 1)
|
||||
)
|
||||
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.backslashreplace_errors,
|
||||
UnicodeError("ouch")
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.backslashreplace_errors,
|
||||
UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
codecs.backslashreplace_errors,
|
||||
UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
|
||||
)
|
||||
|
||||
def test_badhandlerresults(self):
|
||||
results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
|
||||
encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
|
||||
|
||||
for res in results:
|
||||
codecs.register_error("test.badhandler", lambda: res)
|
||||
for enc in encs:
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
u"\u3042".encode,
|
||||
enc,
|
||||
"test.badhandler"
|
||||
)
|
||||
for (enc, bytes) in (
|
||||
("ascii", "\xff"),
|
||||
("utf-8", "\xff"),
|
||||
("utf-7", "+x-")
|
||||
):
|
||||
self.assertRaises(
|
||||
TypeError,
|
||||
bytes.decode,
|
||||
enc,
|
||||
"test.badhandler"
|
||||
)
|
||||
|
||||
def test_lookup(self):
|
||||
self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
|
||||
self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
|
||||
self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
|
||||
self.assertEquals(
|
||||
codecs.xmlcharrefreplace_errors,
|
||||
codecs.lookup_error("xmlcharrefreplace")
|
||||
)
|
||||
self.assertEquals(
|
||||
codecs.backslashreplace_errors,
|
||||
codecs.lookup_error("backslashreplace")
|
||||
)
|
||||
|
||||
def test_main():
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(CodecCallbackTest))
|
||||
test.test_support.run_suite(suite)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_main()
|
|
@ -57,6 +57,9 @@ Type/class unification and new-style classes
|
|||
|
||||
Core and builtins
|
||||
|
||||
- Codec error handling callbacks (PEP 293) are implemented.
|
||||
Error handling in unicode.encode or str.decode can now be customized.
|
||||
|
||||
- A subtle change to the semantics of the built-in function intern():
|
||||
interned strings are no longer immortal. You must keep a reference
|
||||
to the return value intern() around to get the benefit.
|
||||
|
|
|
@ -706,6 +706,32 @@ mbcs_encode(PyObject *self,
|
|||
#endif /* MS_WINDOWS */
|
||||
#endif /* Py_USING_UNICODE */
|
||||
|
||||
/* --- Error handler registry --------------------------------------------- */
|
||||
|
||||
static PyObject *register_error(PyObject *self, PyObject *args)
|
||||
{
|
||||
const char *name;
|
||||
PyObject *handler;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "sO:register_error",
|
||||
&name, &handler))
|
||||
return NULL;
|
||||
if (PyCodec_RegisterError(name, handler))
|
||||
return NULL;
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
|
||||
static PyObject *lookup_error(PyObject *self, PyObject *args)
|
||||
{
|
||||
const char *name;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "s:lookup_error",
|
||||
&name))
|
||||
return NULL;
|
||||
return PyCodec_LookupError(name);
|
||||
}
|
||||
|
||||
/* --- Module API --------------------------------------------------------- */
|
||||
|
||||
static PyMethodDef _codecs_functions[] = {
|
||||
|
@ -744,6 +770,8 @@ static PyMethodDef _codecs_functions[] = {
|
|||
{"mbcs_decode", mbcs_decode, METH_VARARGS},
|
||||
#endif
|
||||
#endif /* Py_USING_UNICODE */
|
||||
{"register_error", register_error, METH_VARARGS},
|
||||
{"lookup_error", lookup_error, METH_VARARGS},
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
|
|
|
@ -2468,7 +2468,9 @@ PyDoc_STRVAR(encode__doc__,
|
|||
Encodes S using the codec registered for encoding. encoding defaults\n\
|
||||
to the default encoding. errors may be given to set a different error\n\
|
||||
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
|
||||
a ValueError. Other possible values are 'ignore' and 'replace'.");
|
||||
a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
|
||||
'xmlcharrefreplace' as well as any other name registered with\n\
|
||||
codecs.register_error that is able to handle UnicodeEncodeErrors.");
|
||||
|
||||
static PyObject *
|
||||
string_encode(PyStringObject *self, PyObject *args)
|
||||
|
@ -2487,7 +2489,9 @@ PyDoc_STRVAR(decode__doc__,
|
|||
Decodes S using the codec registered for encoding. encoding defaults\n\
|
||||
to the default encoding. errors may be given to set a different error\n\
|
||||
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
|
||||
a ValueError. Other possible values are 'ignore' and 'replace'.");
|
||||
a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
|
||||
as well as any other name registerd with codecs.register_error that is\n\
|
||||
able to handle UnicodeDecodeErrors.");
|
||||
|
||||
static PyObject *
|
||||
string_decode(PyStringObject *self, PyObject *args)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
399
Python/codecs.c
399
Python/codecs.c
|
@ -422,12 +422,409 @@ PyObject *PyCodec_Decode(PyObject *object,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static PyObject *_PyCodec_ErrorRegistry;
|
||||
|
||||
/* Register the error handling callback function error under the name
|
||||
name. This function will be called by the codec when it encounters
|
||||
an unencodable characters/undecodable bytes and doesn't know the
|
||||
callback name, when name is specified as the error parameter
|
||||
in the call to the encode/decode function.
|
||||
Return 0 on success, -1 on error */
|
||||
int PyCodec_RegisterError(const char *name, PyObject *error)
|
||||
{
|
||||
if (!PyCallable_Check(error)) {
|
||||
PyErr_SetString(PyExc_TypeError, "handler must be callable");
|
||||
return -1;
|
||||
}
|
||||
return PyDict_SetItemString( _PyCodec_ErrorRegistry, (char *)name, error);
|
||||
}
|
||||
|
||||
/* Lookup the error handling callback function registered under the
|
||||
name error. As a special case NULL can be passed, in which case
|
||||
the error handling callback for strict encoding will be returned. */
|
||||
PyObject *PyCodec_LookupError(const char *name)
|
||||
{
|
||||
PyObject *handler = NULL;
|
||||
|
||||
if (name==NULL)
|
||||
name = "strict";
|
||||
handler = PyDict_GetItemString(_PyCodec_ErrorRegistry, (char *)name);
|
||||
if (!handler)
|
||||
PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
|
||||
else
|
||||
Py_INCREF(handler);
|
||||
return handler;
|
||||
}
|
||||
|
||||
static void wrong_exception_type(PyObject *exc)
|
||||
{
|
||||
PyObject *type = PyObject_GetAttrString(exc, "__class__");
|
||||
if (type != NULL) {
|
||||
PyObject *name = PyObject_GetAttrString(type, "__name__");
|
||||
Py_DECREF(type);
|
||||
if (name != NULL) {
|
||||
PyObject *string = PyObject_Str(name);
|
||||
Py_DECREF(name);
|
||||
PyErr_Format(PyExc_TypeError, "don't know how to handle %.400s in error callback",
|
||||
PyString_AS_STRING(string));
|
||||
Py_DECREF(string);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *PyCodec_StrictErrors(PyObject *exc)
|
||||
{
|
||||
if (PyInstance_Check(exc))
|
||||
PyErr_SetObject((PyObject*)((PyInstanceObject*)exc)->in_class,
|
||||
exc);
|
||||
else
|
||||
PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
PyObject *PyCodec_IgnoreErrors(PyObject *exc)
|
||||
{
|
||||
int end;
|
||||
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
|
||||
if (PyUnicodeEncodeError_GetEnd(exc, &end))
|
||||
return NULL;
|
||||
}
|
||||
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
|
||||
if (PyUnicodeDecodeError_GetEnd(exc, &end))
|
||||
return NULL;
|
||||
}
|
||||
else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
|
||||
if (PyUnicodeTranslateError_GetEnd(exc, &end))
|
||||
return NULL;
|
||||
}
|
||||
else {
|
||||
wrong_exception_type(exc);
|
||||
return NULL;
|
||||
}
|
||||
/* ouch: passing NULL, 0, pos gives None instead of u'' */
|
||||
return Py_BuildValue("(u#i)", &end, 0, end);
|
||||
}
|
||||
|
||||
|
||||
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
|
||||
{
|
||||
PyObject *restuple;
|
||||
int start;
|
||||
int end;
|
||||
int i;
|
||||
|
||||
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
|
||||
PyObject *res;
|
||||
Py_UNICODE *p;
|
||||
if (PyUnicodeEncodeError_GetStart(exc, &start))
|
||||
return NULL;
|
||||
if (PyUnicodeEncodeError_GetEnd(exc, &end))
|
||||
return NULL;
|
||||
res = PyUnicode_FromUnicode(NULL, end-start);
|
||||
if (res == NULL)
|
||||
return NULL;
|
||||
for (p = PyUnicode_AS_UNICODE(res), i = start;
|
||||
i<end; ++p, ++i)
|
||||
*p = '?';
|
||||
restuple = Py_BuildValue("(Oi)", res, end);
|
||||
Py_DECREF(res);
|
||||
return restuple;
|
||||
}
|
||||
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
|
||||
Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
|
||||
if (PyUnicodeDecodeError_GetEnd(exc, &end))
|
||||
return NULL;
|
||||
return Py_BuildValue("(u#i)", &res, 1, end);
|
||||
}
|
||||
else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
|
||||
PyObject *res;
|
||||
Py_UNICODE *p;
|
||||
if (PyUnicodeTranslateError_GetStart(exc, &start))
|
||||
return NULL;
|
||||
if (PyUnicodeTranslateError_GetEnd(exc, &end))
|
||||
return NULL;
|
||||
res = PyUnicode_FromUnicode(NULL, end-start);
|
||||
if (res == NULL)
|
||||
return NULL;
|
||||
for (p = PyUnicode_AS_UNICODE(res), i = start;
|
||||
i<end; ++p, ++i)
|
||||
*p = Py_UNICODE_REPLACEMENT_CHARACTER;
|
||||
restuple = Py_BuildValue("(Oi)", res, end);
|
||||
Py_DECREF(res);
|
||||
return restuple;
|
||||
}
|
||||
else {
|
||||
wrong_exception_type(exc);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
|
||||
{
|
||||
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
|
||||
PyObject *restuple;
|
||||
PyObject *object;
|
||||
int start;
|
||||
int end;
|
||||
PyObject *res;
|
||||
Py_UNICODE *p;
|
||||
Py_UNICODE *startp;
|
||||
Py_UNICODE *outp;
|
||||
int ressize;
|
||||
if (PyUnicodeEncodeError_GetStart(exc, &start))
|
||||
return NULL;
|
||||
if (PyUnicodeEncodeError_GetEnd(exc, &end))
|
||||
return NULL;
|
||||
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
|
||||
return NULL;
|
||||
startp = PyUnicode_AS_UNICODE(object);
|
||||
for (p = startp+start, ressize = 0; p < startp+end; ++p) {
|
||||
if (*p<10)
|
||||
ressize += 2+1+1;
|
||||
else if (*p<100)
|
||||
ressize += 2+2+1;
|
||||
else if (*p<1000)
|
||||
ressize += 2+3+1;
|
||||
else if (*p<10000)
|
||||
ressize += 2+4+1;
|
||||
else if (*p<100000)
|
||||
ressize += 2+5+1;
|
||||
else if (*p<1000000)
|
||||
ressize += 2+6+1;
|
||||
else
|
||||
ressize += 2+7+1;
|
||||
}
|
||||
/* allocate replacement */
|
||||
res = PyUnicode_FromUnicode(NULL, ressize);
|
||||
if (res == NULL) {
|
||||
Py_DECREF(object);
|
||||
return NULL;
|
||||
}
|
||||
/* generate replacement */
|
||||
for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
|
||||
p < startp+end; ++p) {
|
||||
Py_UNICODE c = *p;
|
||||
int digits;
|
||||
int base;
|
||||
*outp++ = '&';
|
||||
*outp++ = '#';
|
||||
if (*p<10) {
|
||||
digits = 1;
|
||||
base = 1;
|
||||
}
|
||||
else if (*p<100) {
|
||||
digits = 2;
|
||||
base = 10;
|
||||
}
|
||||
else if (*p<1000) {
|
||||
digits = 3;
|
||||
base = 100;
|
||||
}
|
||||
else if (*p<10000) {
|
||||
digits = 4;
|
||||
base = 1000;
|
||||
}
|
||||
else if (*p<100000) {
|
||||
digits = 5;
|
||||
base = 10000;
|
||||
}
|
||||
else if (*p<1000000) {
|
||||
digits = 6;
|
||||
base = 100000;
|
||||
}
|
||||
else {
|
||||
digits = 7;
|
||||
base = 1000000;
|
||||
}
|
||||
while (digits-->0) {
|
||||
*outp++ = '0' + c/base;
|
||||
c %= base;
|
||||
base /= 10;
|
||||
}
|
||||
*outp++ = ';';
|
||||
}
|
||||
restuple = Py_BuildValue("(Oi)", res, end);
|
||||
Py_DECREF(res);
|
||||
Py_DECREF(object);
|
||||
return restuple;
|
||||
}
|
||||
else {
|
||||
wrong_exception_type(exc);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static Py_UNICODE hexdigits[] = {
|
||||
'0', '1', '2', '3', '4', '5', '6', '7',
|
||||
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
|
||||
};
|
||||
|
||||
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
||||
{
|
||||
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
|
||||
PyObject *restuple;
|
||||
PyObject *object;
|
||||
int start;
|
||||
int end;
|
||||
PyObject *res;
|
||||
Py_UNICODE *p;
|
||||
Py_UNICODE *startp;
|
||||
Py_UNICODE *outp;
|
||||
int ressize;
|
||||
if (PyUnicodeEncodeError_GetStart(exc, &start))
|
||||
return NULL;
|
||||
if (PyUnicodeEncodeError_GetEnd(exc, &end))
|
||||
return NULL;
|
||||
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
|
||||
return NULL;
|
||||
startp = PyUnicode_AS_UNICODE(object);
|
||||
for (p = startp+start, ressize = 0; p < startp+end; ++p) {
|
||||
if (*p >= 0x00010000)
|
||||
ressize += 1+1+8;
|
||||
else if (*p >= 0x100) {
|
||||
ressize += 1+1+4;
|
||||
}
|
||||
else
|
||||
ressize += 1+1+2;
|
||||
}
|
||||
res = PyUnicode_FromUnicode(NULL, ressize);
|
||||
if (res==NULL)
|
||||
return NULL;
|
||||
for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
|
||||
p < startp+end; ++p) {
|
||||
Py_UNICODE c = *p;
|
||||
*outp++ = '\\';
|
||||
if (c >= 0x00010000) {
|
||||
*outp++ = 'U';
|
||||
*outp++ = hexdigits[(c>>28)&0xf];
|
||||
*outp++ = hexdigits[(c>>24)&0xf];
|
||||
*outp++ = hexdigits[(c>>20)&0xf];
|
||||
*outp++ = hexdigits[(c>>16)&0xf];
|
||||
*outp++ = hexdigits[(c>>12)&0xf];
|
||||
*outp++ = hexdigits[(c>>8)&0xf];
|
||||
}
|
||||
else if (c >= 0x100) {
|
||||
*outp++ = 'u';
|
||||
*outp++ = hexdigits[(c>>12)&0xf];
|
||||
*outp++ = hexdigits[(c>>8)&0xf];
|
||||
}
|
||||
else
|
||||
*outp++ = 'x';
|
||||
*outp++ = hexdigits[(c>>4)&0xf];
|
||||
*outp++ = hexdigits[c&0xf];
|
||||
}
|
||||
|
||||
restuple = Py_BuildValue("(Oi)", res, end);
|
||||
Py_DECREF(res);
|
||||
Py_DECREF(object);
|
||||
return restuple;
|
||||
}
|
||||
else {
|
||||
wrong_exception_type(exc);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *strict_errors(PyObject *self, PyObject *exc)
|
||||
{
|
||||
return PyCodec_StrictErrors(exc);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *ignore_errors(PyObject *self, PyObject *exc)
|
||||
{
|
||||
return PyCodec_IgnoreErrors(exc);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *replace_errors(PyObject *self, PyObject *exc)
|
||||
{
|
||||
return PyCodec_ReplaceErrors(exc);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
|
||||
{
|
||||
return PyCodec_XMLCharRefReplaceErrors(exc);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
|
||||
{
|
||||
return PyCodec_BackslashReplaceErrors(exc);
|
||||
}
|
||||
|
||||
|
||||
void _PyCodecRegistry_Init(void)
|
||||
{
|
||||
static struct {
|
||||
char *name;
|
||||
PyMethodDef def;
|
||||
} methods[] =
|
||||
{
|
||||
{
|
||||
"strict",
|
||||
{
|
||||
"strict_errors",
|
||||
strict_errors,
|
||||
METH_O
|
||||
}
|
||||
},
|
||||
{
|
||||
"ignore",
|
||||
{
|
||||
"ignore_errors",
|
||||
ignore_errors,
|
||||
METH_O
|
||||
}
|
||||
},
|
||||
{
|
||||
"replace",
|
||||
{
|
||||
"replace_errors",
|
||||
replace_errors,
|
||||
METH_O
|
||||
}
|
||||
},
|
||||
{
|
||||
"xmlcharrefreplace",
|
||||
{
|
||||
"xmlcharrefreplace_errors",
|
||||
xmlcharrefreplace_errors,
|
||||
METH_O
|
||||
}
|
||||
},
|
||||
{
|
||||
"backslashreplace",
|
||||
{
|
||||
"backslashreplace_errors",
|
||||
backslashreplace_errors,
|
||||
METH_O
|
||||
}
|
||||
}
|
||||
};
|
||||
if (_PyCodec_SearchPath == NULL)
|
||||
_PyCodec_SearchPath = PyList_New(0);
|
||||
if (_PyCodec_SearchCache == NULL)
|
||||
_PyCodec_SearchCache = PyDict_New();
|
||||
if (_PyCodec_ErrorRegistry == NULL) {
|
||||
int i;
|
||||
_PyCodec_ErrorRegistry = PyDict_New();
|
||||
|
||||
if (_PyCodec_ErrorRegistry) {
|
||||
for (i = 0; i < 5; ++i) {
|
||||
PyObject *func = PyCFunction_New(&methods[i].def, NULL);
|
||||
int res;
|
||||
if (!func)
|
||||
Py_FatalError("can't initialize codec error registry");
|
||||
res = PyCodec_RegisterError(methods[i].name, func);
|
||||
Py_DECREF(func);
|
||||
if (res)
|
||||
Py_FatalError("can't initialize codec error registry");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (_PyCodec_SearchPath == NULL ||
|
||||
_PyCodec_SearchCache == NULL)
|
||||
Py_FatalError("can't initialize codec registry");
|
||||
|
@ -439,4 +836,6 @@ void _PyCodecRegistry_Fini(void)
|
|||
_PyCodec_SearchPath = NULL;
|
||||
Py_XDECREF(_PyCodec_SearchCache);
|
||||
_PyCodec_SearchCache = NULL;
|
||||
Py_XDECREF(_PyCodec_ErrorRegistry);
|
||||
_PyCodec_ErrorRegistry = NULL;
|
||||
}
|
||||
|
|
|
@ -100,6 +100,10 @@ Exception\n\
|
|||
| +-- ValueError\n\
|
||||
| | |\n\
|
||||
| | +-- UnicodeError\n\
|
||||
| | |\n\
|
||||
| | +-- UnicodeEncodeError\n\
|
||||
| | +-- UnicodeDecodeError\n\
|
||||
| | +-- UnicodeTranslateError\n\
|
||||
| |\n\
|
||||
| +-- ReferenceError\n\
|
||||
| +-- SystemError\n\
|
||||
|
@ -840,6 +844,590 @@ static PyMethodDef SyntaxError_methods[] = {
|
|||
};
|
||||
|
||||
|
||||
static
|
||||
int get_int(PyObject *exc, const char *name, int *value)
|
||||
{
|
||||
PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
|
||||
|
||||
if (!attr)
|
||||
return -1;
|
||||
if (!PyInt_Check(attr)) {
|
||||
PyErr_Format(PyExc_TypeError, "%s attribute must be int", name);
|
||||
Py_DECREF(attr);
|
||||
return -1;
|
||||
}
|
||||
*value = PyInt_AS_LONG(attr);
|
||||
Py_DECREF(attr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int set_int(PyObject *exc, const char *name, int value)
|
||||
{
|
||||
PyObject *obj = PyInt_FromLong(value);
|
||||
int result;
|
||||
|
||||
if (!obj)
|
||||
return -1;
|
||||
result = PyObject_SetAttrString(exc, (char *)name, obj);
|
||||
Py_DECREF(obj);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
PyObject *get_string(PyObject *exc, const char *name)
|
||||
{
|
||||
PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
|
||||
|
||||
if (!attr)
|
||||
return NULL;
|
||||
if (!PyString_Check(attr)) {
|
||||
PyErr_Format(PyExc_TypeError, "%s attribute must be str", name);
|
||||
Py_DECREF(attr);
|
||||
return NULL;
|
||||
}
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int set_string(PyObject *exc, const char *name, const char *value)
|
||||
{
|
||||
PyObject *obj = PyString_FromString(value);
|
||||
int result;
|
||||
|
||||
if (!obj)
|
||||
return -1;
|
||||
result = PyObject_SetAttrString(exc, (char *)name, obj);
|
||||
Py_DECREF(obj);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
PyObject *get_unicode(PyObject *exc, const char *name)
|
||||
{
|
||||
PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
|
||||
|
||||
if (!attr)
|
||||
return NULL;
|
||||
if (!PyUnicode_Check(attr)) {
|
||||
PyErr_Format(PyExc_TypeError, "%s attribute must be unicode", name);
|
||||
Py_DECREF(attr);
|
||||
return NULL;
|
||||
}
|
||||
return attr;
|
||||
}
|
||||
|
||||
PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *exc)
|
||||
{
|
||||
return get_string(exc, "encoding");
|
||||
}
|
||||
|
||||
PyObject * PyUnicodeDecodeError_GetEncoding(PyObject *exc)
|
||||
{
|
||||
return get_string(exc, "encoding");
|
||||
}
|
||||
|
||||
PyObject * PyUnicodeTranslateError_GetEncoding(PyObject *exc)
|
||||
{
|
||||
return get_string(exc, "encoding");
|
||||
}
|
||||
|
||||
PyObject *PyUnicodeEncodeError_GetObject(PyObject *exc)
|
||||
{
|
||||
return get_unicode(exc, "object");
|
||||
}
|
||||
|
||||
PyObject *PyUnicodeDecodeError_GetObject(PyObject *exc)
|
||||
{
|
||||
return get_string(exc, "object");
|
||||
}
|
||||
|
||||
PyObject *PyUnicodeTranslateError_GetObject(PyObject *exc)
|
||||
{
|
||||
return get_unicode(exc, "object");
|
||||
}
|
||||
|
||||
int PyUnicodeEncodeError_GetStart(PyObject *exc, int *start)
|
||||
{
|
||||
if (!get_int(exc, "start", start)) {
|
||||
PyObject *object = PyUnicodeEncodeError_GetObject(exc);
|
||||
int size;
|
||||
if (!object)
|
||||
return -1;
|
||||
size = PyUnicode_GET_SIZE(object);
|
||||
if (*start<0)
|
||||
*start = 0;
|
||||
if (*start>=size)
|
||||
*start = size-1;
|
||||
Py_DECREF(object);
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeDecodeError_GetStart(PyObject *exc, int *start)
|
||||
{
|
||||
if (!get_int(exc, "start", start)) {
|
||||
PyObject *object = PyUnicodeDecodeError_GetObject(exc);
|
||||
int size;
|
||||
if (!object)
|
||||
return -1;
|
||||
size = PyString_GET_SIZE(object);
|
||||
if (*start<0)
|
||||
*start = 0;
|
||||
if (*start>=size)
|
||||
*start = size-1;
|
||||
Py_DECREF(object);
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeTranslateError_GetStart(PyObject *exc, int *start)
|
||||
{
|
||||
return PyUnicodeEncodeError_GetStart(exc, start);
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeEncodeError_SetStart(PyObject *exc, int start)
|
||||
{
|
||||
return set_int(exc, "start", start);
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeDecodeError_SetStart(PyObject *exc, int start)
|
||||
{
|
||||
return set_int(exc, "start", start);
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeTranslateError_SetStart(PyObject *exc, int start)
|
||||
{
|
||||
return set_int(exc, "start", start);
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeEncodeError_GetEnd(PyObject *exc, int *end)
|
||||
{
|
||||
if (!get_int(exc, "end", end)) {
|
||||
PyObject *object = PyUnicodeEncodeError_GetObject(exc);
|
||||
int size;
|
||||
if (!object)
|
||||
return -1;
|
||||
size = PyUnicode_GET_SIZE(object);
|
||||
if (*end<1)
|
||||
*end = 1;
|
||||
if (*end>size)
|
||||
*end = size;
|
||||
Py_DECREF(object);
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeDecodeError_GetEnd(PyObject *exc, int *end)
|
||||
{
|
||||
if (!get_int(exc, "end", end)) {
|
||||
PyObject *object = PyUnicodeDecodeError_GetObject(exc);
|
||||
int size;
|
||||
if (!object)
|
||||
return -1;
|
||||
size = PyString_GET_SIZE(object);
|
||||
if (*end<1)
|
||||
*end = 1;
|
||||
if (*end>size)
|
||||
*end = size;
|
||||
Py_DECREF(object);
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeTranslateError_GetEnd(PyObject *exc, int *start)
|
||||
{
|
||||
return PyUnicodeEncodeError_GetEnd(exc, start);
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeEncodeError_SetEnd(PyObject *exc, int end)
|
||||
{
|
||||
return set_int(exc, "end", end);
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeDecodeError_SetEnd(PyObject *exc, int end)
|
||||
{
|
||||
return set_int(exc, "end", end);
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeTranslateError_SetEnd(PyObject *exc, int end)
|
||||
{
|
||||
return set_int(exc, "end", end);
|
||||
}
|
||||
|
||||
|
||||
PyObject *PyUnicodeEncodeError_GetReason(PyObject *exc)
|
||||
{
|
||||
return get_string(exc, "reason");
|
||||
}
|
||||
|
||||
|
||||
PyObject *PyUnicodeDecodeError_GetReason(PyObject *exc)
|
||||
{
|
||||
return get_string(exc, "reason");
|
||||
}
|
||||
|
||||
|
||||
PyObject *PyUnicodeTranslateError_GetReason(PyObject *exc)
|
||||
{
|
||||
return get_string(exc, "reason");
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason)
|
||||
{
|
||||
return set_string(exc, "reason", reason);
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason)
|
||||
{
|
||||
return set_string(exc, "reason", reason);
|
||||
}
|
||||
|
||||
|
||||
int PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason)
|
||||
{
|
||||
return set_string(exc, "reason", reason);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
UnicodeError__init__(PyObject *self, PyObject *args, PyTypeObject *objecttype)
|
||||
{
|
||||
PyObject *rtnval = NULL;
|
||||
PyObject *encoding;
|
||||
PyObject *object;
|
||||
PyObject *start;
|
||||
PyObject *end;
|
||||
PyObject *reason;
|
||||
|
||||
if (!(self = get_self(args)))
|
||||
return NULL;
|
||||
|
||||
if (!(args = PySequence_GetSlice(args, 1, PySequence_Size(args))))
|
||||
return NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O!O!O!O!O!",
|
||||
&PyString_Type, &encoding,
|
||||
objecttype, &object,
|
||||
&PyInt_Type, &start,
|
||||
&PyInt_Type, &end,
|
||||
&PyString_Type, &reason))
|
||||
return NULL;
|
||||
|
||||
if (PyObject_SetAttrString(self, "args", args))
|
||||
goto finally;
|
||||
|
||||
if (PyObject_SetAttrString(self, "encoding", encoding))
|
||||
goto finally;
|
||||
if (PyObject_SetAttrString(self, "object", object))
|
||||
goto finally;
|
||||
if (PyObject_SetAttrString(self, "start", start))
|
||||
goto finally;
|
||||
if (PyObject_SetAttrString(self, "end", end))
|
||||
goto finally;
|
||||
if (PyObject_SetAttrString(self, "reason", reason))
|
||||
goto finally;
|
||||
|
||||
Py_INCREF(Py_None);
|
||||
rtnval = Py_None;
|
||||
|
||||
finally:
|
||||
Py_DECREF(args);
|
||||
return rtnval;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
UnicodeEncodeError__init__(PyObject *self, PyObject *args)
|
||||
{
|
||||
return UnicodeError__init__(self, args, &PyUnicode_Type);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
UnicodeEncodeError__str__(PyObject *self, PyObject *arg)
|
||||
{
|
||||
PyObject *encodingObj = NULL;
|
||||
PyObject *objectObj = NULL;
|
||||
int length;
|
||||
int start;
|
||||
int end;
|
||||
PyObject *reasonObj = NULL;
|
||||
char buffer[1000];
|
||||
PyObject *result = NULL;
|
||||
|
||||
self = arg;
|
||||
|
||||
if (!(encodingObj = PyUnicodeEncodeError_GetEncoding(self)))
|
||||
goto error;
|
||||
|
||||
if (!(objectObj = PyUnicodeEncodeError_GetObject(self)))
|
||||
goto error;
|
||||
|
||||
length = PyUnicode_GET_SIZE(objectObj);
|
||||
|
||||
if (PyUnicodeEncodeError_GetStart(self, &start))
|
||||
goto error;
|
||||
|
||||
if (PyUnicodeEncodeError_GetEnd(self, &end))
|
||||
goto error;
|
||||
|
||||
if (!(reasonObj = PyUnicodeEncodeError_GetReason(self)))
|
||||
goto error;
|
||||
|
||||
if (end==start+1) {
|
||||
PyOS_snprintf(buffer, sizeof(buffer),
|
||||
"'%.400s' codec can't encode character '\\u%x' in position %d: %.400s",
|
||||
PyString_AS_STRING(encodingObj),
|
||||
(int)PyUnicode_AS_UNICODE(objectObj)[start],
|
||||
start,
|
||||
PyString_AS_STRING(reasonObj)
|
||||
);
|
||||
}
|
||||
else {
|
||||
PyOS_snprintf(buffer, sizeof(buffer),
|
||||
"'%.400s' codec can't encode characters in position %d-%d: %.400s",
|
||||
PyString_AS_STRING(encodingObj),
|
||||
start,
|
||||
end-1,
|
||||
PyString_AS_STRING(reasonObj)
|
||||
);
|
||||
}
|
||||
result = PyString_FromString(buffer);
|
||||
|
||||
error:
|
||||
Py_XDECREF(reasonObj);
|
||||
Py_XDECREF(objectObj);
|
||||
Py_XDECREF(encodingObj);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyMethodDef UnicodeEncodeError_methods[] = {
|
||||
{"__init__", UnicodeEncodeError__init__, METH_VARARGS},
|
||||
{"__str__", UnicodeEncodeError__str__, METH_O},
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
|
||||
PyObject * PyUnicodeEncodeError_Create(
|
||||
const char *encoding, const Py_UNICODE *object, int length,
|
||||
int start, int end, const char *reason)
|
||||
{
|
||||
return PyObject_CallFunction(PyExc_UnicodeEncodeError, "su#iis",
|
||||
encoding, object, length, start, end, reason);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
UnicodeDecodeError__init__(PyObject *self, PyObject *args)
|
||||
{
|
||||
return UnicodeError__init__(self, args, &PyString_Type);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
UnicodeDecodeError__str__(PyObject *self, PyObject *arg)
|
||||
{
|
||||
PyObject *encodingObj = NULL;
|
||||
PyObject *objectObj = NULL;
|
||||
int length;
|
||||
int start;
|
||||
int end;
|
||||
PyObject *reasonObj = NULL;
|
||||
char buffer[1000];
|
||||
PyObject *result = NULL;
|
||||
|
||||
self = arg;
|
||||
|
||||
if (!(encodingObj = PyUnicodeDecodeError_GetEncoding(self)))
|
||||
goto error;
|
||||
|
||||
if (!(objectObj = PyUnicodeDecodeError_GetObject(self)))
|
||||
goto error;
|
||||
|
||||
length = PyString_GET_SIZE(objectObj);
|
||||
|
||||
if (PyUnicodeDecodeError_GetStart(self, &start))
|
||||
goto error;
|
||||
|
||||
if (PyUnicodeDecodeError_GetEnd(self, &end))
|
||||
goto error;
|
||||
|
||||
if (!(reasonObj = PyUnicodeDecodeError_GetReason(self)))
|
||||
goto error;
|
||||
|
||||
if (end==start+1) {
|
||||
PyOS_snprintf(buffer, sizeof(buffer),
|
||||
"'%.400s' codec can't decode byte 0x%x in position %d: %.400s",
|
||||
PyString_AS_STRING(encodingObj),
|
||||
((int)PyString_AS_STRING(objectObj)[start])&0xff,
|
||||
start,
|
||||
PyString_AS_STRING(reasonObj)
|
||||
);
|
||||
}
|
||||
else {
|
||||
PyOS_snprintf(buffer, sizeof(buffer),
|
||||
"'%.400s' codec can't decode bytes in position %d-%d: %.400s",
|
||||
PyString_AS_STRING(encodingObj),
|
||||
start,
|
||||
end-1,
|
||||
PyString_AS_STRING(reasonObj)
|
||||
);
|
||||
}
|
||||
result = PyString_FromString(buffer);
|
||||
|
||||
error:
|
||||
Py_XDECREF(reasonObj);
|
||||
Py_XDECREF(objectObj);
|
||||
Py_XDECREF(encodingObj);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyMethodDef UnicodeDecodeError_methods[] = {
|
||||
{"__init__", UnicodeDecodeError__init__, METH_VARARGS},
|
||||
{"__str__", UnicodeDecodeError__str__, METH_O},
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
|
||||
PyObject * PyUnicodeDecodeError_Create(
|
||||
const char *encoding, const char *object, int length,
|
||||
int start, int end, const char *reason)
|
||||
{
|
||||
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#iis",
|
||||
encoding, object, length, start, end, reason);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
UnicodeTranslateError__init__(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *rtnval = NULL;
|
||||
PyObject *object;
|
||||
PyObject *start;
|
||||
PyObject *end;
|
||||
PyObject *reason;
|
||||
|
||||
if (!(self = get_self(args)))
|
||||
return NULL;
|
||||
|
||||
if (!(args = PySequence_GetSlice(args, 1, PySequence_Size(args))))
|
||||
return NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O!O!O!O!",
|
||||
&PyUnicode_Type, &object,
|
||||
&PyInt_Type, &start,
|
||||
&PyInt_Type, &end,
|
||||
&PyString_Type, &reason))
|
||||
goto finally;
|
||||
|
||||
if (PyObject_SetAttrString(self, "args", args))
|
||||
goto finally;
|
||||
|
||||
if (PyObject_SetAttrString(self, "object", object))
|
||||
goto finally;
|
||||
if (PyObject_SetAttrString(self, "start", start))
|
||||
goto finally;
|
||||
if (PyObject_SetAttrString(self, "end", end))
|
||||
goto finally;
|
||||
if (PyObject_SetAttrString(self, "reason", reason))
|
||||
goto finally;
|
||||
|
||||
Py_INCREF(Py_None);
|
||||
rtnval = Py_None;
|
||||
|
||||
finally:
|
||||
Py_DECREF(args);
|
||||
return rtnval;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
UnicodeTranslateError__str__(PyObject *self, PyObject *arg)
|
||||
{
|
||||
PyObject *objectObj = NULL;
|
||||
int length;
|
||||
int start;
|
||||
int end;
|
||||
PyObject *reasonObj = NULL;
|
||||
char buffer[1000];
|
||||
PyObject *result = NULL;
|
||||
|
||||
self = arg;
|
||||
|
||||
if (!(objectObj = PyUnicodeTranslateError_GetObject(self)))
|
||||
goto error;
|
||||
|
||||
length = PyUnicode_GET_SIZE(objectObj);
|
||||
|
||||
if (PyUnicodeTranslateError_GetStart(self, &start))
|
||||
goto error;
|
||||
|
||||
if (PyUnicodeTranslateError_GetEnd(self, &end))
|
||||
goto error;
|
||||
|
||||
if (!(reasonObj = PyUnicodeTranslateError_GetReason(self)))
|
||||
goto error;
|
||||
|
||||
if (end==start+1) {
|
||||
PyOS_snprintf(buffer, sizeof(buffer),
|
||||
"can't translate character '\\u%x' in position %d: %.400s",
|
||||
(int)PyUnicode_AS_UNICODE(objectObj)[start],
|
||||
start,
|
||||
PyString_AS_STRING(reasonObj)
|
||||
);
|
||||
}
|
||||
else {
|
||||
PyOS_snprintf(buffer, sizeof(buffer),
|
||||
"can't translate characters in position %d-%d: %.400s",
|
||||
start,
|
||||
end-1,
|
||||
PyString_AS_STRING(reasonObj)
|
||||
);
|
||||
}
|
||||
result = PyString_FromString(buffer);
|
||||
|
||||
error:
|
||||
Py_XDECREF(reasonObj);
|
||||
Py_XDECREF(objectObj);
|
||||
return result;
|
||||
}
|
||||
|
||||
static PyMethodDef UnicodeTranslateError_methods[] = {
|
||||
{"__init__", UnicodeTranslateError__init__, METH_VARARGS},
|
||||
{"__str__", UnicodeTranslateError__str__, METH_O},
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
|
||||
PyObject * PyUnicodeTranslateError_Create(
|
||||
const Py_UNICODE *object, int length,
|
||||
int start, int end, const char *reason)
|
||||
{
|
||||
return PyObject_CallFunction(PyExc_UnicodeTranslateError, "u#iis",
|
||||
object, length, start, end, reason);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Exception doc strings */
|
||||
|
||||
|
@ -865,6 +1453,12 @@ PyDoc_STRVAR(ValueError__doc__,
|
|||
|
||||
PyDoc_STRVAR(UnicodeError__doc__, "Unicode related error.");
|
||||
|
||||
PyDoc_STRVAR(UnicodeEncodeError__doc__, "Unicode encoding error.");
|
||||
|
||||
PyDoc_STRVAR(UnicodeDecodeError__doc__, "Unicode decoding error.");
|
||||
|
||||
PyDoc_STRVAR(UnicodeTranslateError__doc__, "Unicode translation error.");
|
||||
|
||||
PyDoc_STRVAR(SystemError__doc__,
|
||||
"Internal error in the Python interpreter.\n\
|
||||
\n\
|
||||
|
@ -949,6 +1543,9 @@ PyObject *PyExc_SystemError;
|
|||
PyObject *PyExc_SystemExit;
|
||||
PyObject *PyExc_UnboundLocalError;
|
||||
PyObject *PyExc_UnicodeError;
|
||||
PyObject *PyExc_UnicodeEncodeError;
|
||||
PyObject *PyExc_UnicodeDecodeError;
|
||||
PyObject *PyExc_UnicodeTranslateError;
|
||||
PyObject *PyExc_TypeError;
|
||||
PyObject *PyExc_ValueError;
|
||||
PyObject *PyExc_ZeroDivisionError;
|
||||
|
@ -1035,6 +1632,12 @@ static struct {
|
|||
FloatingPointError__doc__},
|
||||
{"ValueError", &PyExc_ValueError, 0, ValueError__doc__},
|
||||
{"UnicodeError", &PyExc_UnicodeError, &PyExc_ValueError, UnicodeError__doc__},
|
||||
{"UnicodeEncodeError", &PyExc_UnicodeEncodeError, &PyExc_UnicodeError,
|
||||
UnicodeEncodeError__doc__, UnicodeEncodeError_methods},
|
||||
{"UnicodeDecodeError", &PyExc_UnicodeDecodeError, &PyExc_UnicodeError,
|
||||
UnicodeDecodeError__doc__, UnicodeDecodeError_methods},
|
||||
{"UnicodeTranslateError", &PyExc_UnicodeTranslateError, &PyExc_UnicodeError,
|
||||
UnicodeTranslateError__doc__, UnicodeTranslateError_methods},
|
||||
{"ReferenceError", &PyExc_ReferenceError, 0, ReferenceError__doc__},
|
||||
{"SystemError", &PyExc_SystemError, 0, SystemError__doc__},
|
||||
{"MemoryError", &PyExc_MemoryError, 0, MemoryError__doc__},
|
||||
|
|
Loading…
Reference in New Issue