PEP 293 implemention (from SF patch http://www.python.org/sf/432401)

This commit is contained in:
Walter Dörwald 2002-09-02 13:14:32 +00:00
parent 94fab762de
commit 3aeb632c31
12 changed files with 2936 additions and 563 deletions

View File

@ -17,7 +17,7 @@
This module defines base classes for standard Python codecs (encoders
and decoders) and provides access to the internal Python codec
registry which manages the codec lookup process.
registry which manages the codec and error handling lookup process.
It defines the following functions:
@ -98,6 +98,43 @@ Raises a \exception{LookupError} in case the encoding cannot be found.
To simplify working with encoded files or stream, the module
also defines these utility functions:
\begin{funcdesc}{register_error}{name, error_handler}
Register the error handling function \var{error_handler} under the
name \var{name}. \vari{error_handler} will be called during encoding
and decoding in case of an error, when \var{name} is specified as the
errors parameter. \var{error_handler} will be called with an
\exception{UnicodeEncodeError}, \exception{UnicodeDecodeError} or
\exception{UnicodeTranslateError} instance and must return a tuple
with a replacement for the unencodable/undecodable part of the input
and a position where encoding/decoding should continue.
\end{funcdesc}
\begin{funcdesc}{lookup_error}{name}
Return the error handler previously register under the name \var{name}.
Raises a \exception{LookupError} in case the handler cannot be found.
\end{funcdesc}
\begin{funcdesc}{strict_errors}{exception}
Implements the \code{strict} error handling.
\end{funcdesc}
\begin{funcdesc}{replace_errors}{exception}
Implements the \code{replace} error handling.
\end{funcdesc}
\begin{funcdesc}{ignore_errors}{exception}
Implements the \code{ignore} error handling.
\end{funcdesc}
\begin{funcdesc}{xmlcharrefreplace_errors_errors}{exception}
Implements the \code{xmlcharrefreplace} error handling.
\end{funcdesc}
\begin{funcdesc}{backslashreplace_errors_errors}{exception}
Implements the \code{backslashreplace} error handling.
\end{funcdesc}
\begin{funcdesc}{open}{filename, mode\optional{, encoding\optional{,
errors\optional{, buffering}}}}
Open an encoded file using the given \var{mode} and return

View File

@ -335,6 +335,24 @@ Raised when an \keyword{assert} statement fails.
\versionadded{2.0}
\end{excdesc}
\begin{excdesc}{UnicodeEncodeError}
Raised when a Unicode-related error occurs during encoding. It
is a subclass of \exception{UnicodeError}.
\versionadded{2.3}
\end{excdesc}
\begin{excdesc}{UnicodeDecodeError}
Raised when a Unicode-related error occurs during decoding. It
is a subclass of \exception{UnicodeError}.
\versionadded{2.3}
\end{excdesc}
\begin{excdesc}{UnicodeTranslateError}
Raised when a Unicode-related error occurs during translating. It
is a subclass of \exception{UnicodeError}.
\versionadded{2.3}
\end{excdesc}
\begin{excdesc}{ValueError}
Raised when a built-in operation or function receives an argument
that has the right type but an inappropriate value, and the
@ -426,6 +444,9 @@ The class hierarchy for built-in exceptions is:
| | +-- FloatingPointError
| +-- ValueError
| | +-- UnicodeError
| | +-- UnicodeEncodeError
| | +-- UnicodeDecodeError
| | +-- UnicodeTranslateError
| +-- ReferenceError
| +-- SystemError
| +-- MemoryError

View File

@ -117,6 +117,36 @@ PyAPI_FUNC(PyObject *) PyCodec_StreamWriter(
const char *errors
);
/* Unicode encoding error handling callback registry API */
/* Register the error handling callback function error under the name
name. This function will be called by the codec when it encounters
unencodable characters/undecodable bytes and doesn't know the
callback name, when name is specified as the error parameter
in the call to the encode/decode function.
Return 0 on success, -1 on error */
PyAPI_FUNC(int) PyCodec_RegisterError(const char *name, PyObject *error);
/* Lookup the error handling callback function registered under the
name error. As a special case NULL can be passed, in which case
the error handling callback for "strict" will be returned. */
PyAPI_FUNC(PyObject *) PyCodec_LookupError(const char *name);
/* raise exc as an exception */
PyAPI_FUNC(PyObject *) PyCodec_StrictErrors(PyObject *exc);
/* ignore the unicode error, skipping the faulty input */
PyAPI_FUNC(PyObject *) PyCodec_IgnoreErrors(PyObject *exc);
/* replace the unicode error with ? or U+FFFD */
PyAPI_FUNC(PyObject *) PyCodec_ReplaceErrors(PyObject *exc);
/* replace the unicode encode error with XML character references */
PyAPI_FUNC(PyObject *) PyCodec_XMLCharRefReplaceErrors(PyObject *exc);
/* replace the unicode encode error with backslash escapes (\x, \u and \U) */
PyAPI_FUNC(PyObject *) PyCodec_BackslashReplaceErrors(PyObject *exc);
#ifdef __cplusplus
}
#endif

View File

@ -54,6 +54,9 @@ PyAPI_DATA(PyObject *) PyExc_SystemExit;
PyAPI_DATA(PyObject *) PyExc_TypeError;
PyAPI_DATA(PyObject *) PyExc_UnboundLocalError;
PyAPI_DATA(PyObject *) PyExc_UnicodeError;
PyAPI_DATA(PyObject *) PyExc_UnicodeEncodeError;
PyAPI_DATA(PyObject *) PyExc_UnicodeDecodeError;
PyAPI_DATA(PyObject *) PyExc_UnicodeTranslateError;
PyAPI_DATA(PyObject *) PyExc_ValueError;
PyAPI_DATA(PyObject *) PyExc_ZeroDivisionError;
#ifdef MS_WINDOWS
@ -114,6 +117,69 @@ PyAPI_FUNC(void) PyErr_SetInterrupt(void);
PyAPI_FUNC(void) PyErr_SyntaxLocation(char *, int);
PyAPI_FUNC(PyObject *) PyErr_ProgramText(char *, int);
/* The following functions are used to create and modify unicode
exceptions from C */
/* create a UnicodeDecodeError object */
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_Create(
const char *, const char *, int, int, int, const char *);
/* create a UnicodeEncodeError object */
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_Create(
const char *, const Py_UNICODE *, int, int, int, const char *);
/* create a UnicodeTranslateError object */
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create(
const Py_UNICODE *, int, int, int, const char *);
/* get the encoding attribute */
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetEncoding(PyObject *);
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetEncoding(PyObject *);
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetEncoding(PyObject *);
/* get the object attribute */
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetObject(PyObject *);
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetObject(PyObject *);
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetObject(PyObject *);
/* get the value of the start attribute (the int * may not be NULL)
return 0 on success, -1 on failure */
PyAPI_FUNC(int) PyUnicodeEncodeError_GetStart(PyObject *, int *);
PyAPI_FUNC(int) PyUnicodeDecodeError_GetStart(PyObject *, int *);
PyAPI_FUNC(int) PyUnicodeTranslateError_GetStart(PyObject *, int *);
/* assign a new value to the start attribute
return 0 on success, -1 on failure */
PyAPI_FUNC(int) PyUnicodeEncodeError_SetStart(PyObject *, int);
PyAPI_FUNC(int) PyUnicodeDecodeError_SetStart(PyObject *, int);
PyAPI_FUNC(int) PyUnicodeTranslateError_SetStart(PyObject *, int);
/* get the value of the end attribute (the int *may not be NULL)
return 0 on success, -1 on failure */
PyAPI_FUNC(int) PyUnicodeEncodeError_GetEnd(PyObject *, int *);
PyAPI_FUNC(int) PyUnicodeDecodeError_GetEnd(PyObject *, int *);
PyAPI_FUNC(int) PyUnicodeTranslateError_GetEnd(PyObject *, int *);
/* assign a new value to the end attribute
return 0 on success, -1 on failure */
PyAPI_FUNC(int) PyUnicodeEncodeError_SetEnd(PyObject *, int);
PyAPI_FUNC(int) PyUnicodeDecodeError_SetEnd(PyObject *, int);
PyAPI_FUNC(int) PyUnicodeTranslateError_SetEnd(PyObject *, int);
/* get the value of the reason attribute */
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetReason(PyObject *);
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetReason(PyObject *);
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetReason(PyObject *);
/* assign a new value to the reason attribute
return 0 on success, -1 on failure */
PyAPI_FUNC(int) PyUnicodeEncodeError_SetReason(
PyObject *, const char *);
PyAPI_FUNC(int) PyUnicodeDecodeError_SetReason(
PyObject *, const char *);
PyAPI_FUNC(int) PyUnicodeTranslateError_SetReason(
PyObject *, const char *);
/* These APIs aren't really part of the error implementation, but
often needed to format error messages; the native C lib APIs are
not available on all platforms, which is why we provide emulations

View File

@ -20,7 +20,10 @@ except ImportError, why:
__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
"BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
"BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
"BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE"]
"BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
"strict_errors", "ignore_errors", "replace_errors",
"xmlcharrefreplace_errors",
"register_error", "lookup_error"]
### Constants
@ -632,6 +635,14 @@ def make_encoding_map(decoding_map):
m[v] = None
return m
### error handlers
strict_errors = lookup_error("strict")
ignore_errors = lookup_error("ignore")
replace_errors = lookup_error("replace")
xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
backslashreplace_errors = lookup_error("backslashreplace")
# Tell modulefinder that using codecs probably needs the encodings
# package
_false = 0

View File

@ -0,0 +1,483 @@
import test.test_support, unittest
import sys, codecs, htmlentitydefs, unicodedata
class CodecCallbackTest(unittest.TestCase):
def test_xmlcharrefreplace(self):
# replace unencodable characters which numeric character entities.
# For ascii, latin-1 and charmaps this is completely implemented
# in C and should be reasonably fast.
s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
self.assertEqual(
s.encode("ascii", "xmlcharrefreplace"),
"スパモ änd eggs"
)
self.assertEqual(
s.encode("latin-1", "xmlcharrefreplace"),
"スパモ \xe4nd eggs"
)
def test_xmlcharnamereplace(self):
# This time use a named character entity for unencodable
# characters, if one is available.
names = {}
for (key, value) in htmlentitydefs.entitydefs.items():
if len(value)==1:
names[unicode(value, "latin-1")] = unicode(key, "latin-1")
else:
names[unichr(int(value[2:-1]))] = unicode(key, "latin-1")
def xmlcharnamereplace(exc):
if not isinstance(exc, UnicodeEncodeError):
raise TypeError("don't know how to handle %r" % exc)
l = []
for c in exc.object[exc.start:exc.end]:
try:
l.append(u"&%s;" % names[c])
except KeyError:
l.append(u"&#%d;" % ord(c))
return (u"".join(l), exc.end)
codecs.register_error(
"test.xmlcharnamereplace", xmlcharnamereplace)
sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
sout = "«ℜ» = ⟨ሴ€⟩"
self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
sout = "\xabℜ\xbb = ⟨ሴ€⟩"
self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩"
self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
def test_uninamereplace(self):
# We're using the names from the unicode database this time,
# and we're doing "systax highlighting" here, i.e. we include
# the replaced text in ANSI escape sequences. For this it is
# useful that the error handler is not called for every single
# unencodable character, but for a complete sequence of
# unencodable characters, otherwise we would output many
# unneccessary escape sequences.
def uninamereplace(exc):
if not isinstance(exc, UnicodeEncodeError):
raise TypeError("don't know how to handle %r" % exc)
l = []
for c in exc.object[exc.start:exc.end]:
l.append(unicodedata.name(c, u"0x%x" % ord(c)))
return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
codecs.register_error(
"test.uninamereplace", uninamereplace)
sin = u"\xac\u1234\u20ac\u8000"
sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, 0x8000\033[0m"
self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, 0x8000\033[0m"
self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1m0x8000\033[0m"
self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
def test_backslashescape(self):
# Does the same as the "unicode-escape" encoding, but with different
# base encodings.
sin = u"a\xac\u1234\u20ac\u8000"
if sys.maxunicode > 0xffff:
sin += unichr(sys.maxunicode)
sout = "a\\xac\\u1234\\u20ac\\u8000"
if sys.maxunicode > 0xffff:
sout += "\\U%08x" % sys.maxunicode
self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
sout = "a\xac\\u1234\\u20ac\\u8000"
if sys.maxunicode > 0xffff:
sout += "\\U%08x" % sys.maxunicode
self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
sout = "a\xac\\u1234\xa4\\u8000"
if sys.maxunicode > 0xffff:
sout += "\\U%08x" % sys.maxunicode
self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
def test_relaxedutf8(self):
# This is the test for a decoding callback handler,
# that relaxes the UTF-8 minimal encoding restriction.
# A null byte that is encoded as "\xc0\x80" will be
# decoded as a null byte. All other illegal sequences
# will be handled strictly.
def relaxedutf8(exc):
if not isinstance(exc, UnicodeDecodeError):
raise TypeError("don't know how to handle %r" % exc)
if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
return (u"\x00", exc.start+2) # retry after two bytes
else:
raise exc
codecs.register_error(
"test.relaxedutf8", relaxedutf8)
sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
sout = u"a\x00b\x00c\xfc\x00\x00"
self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
sin = "\xc0\x80\xc0\x81"
self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
def test_charmapencode(self):
# For charmap encodings the replacement string will be
# mapped through the encoding again. This means, that
# to be able to use e.g. the "replace" handler, the
# charmap has to have a mapping for "?".
charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
sin = u"abc"
sout = "AABBCC"
self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
sin = u"abcA"
self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
charmap[ord("?")] = "XYZ"
sin = u"abcDEF"
sout = "AABBCCXYZXYZXYZ"
self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
charmap[ord("?")] = u"XYZ"
self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
charmap[ord("?")] = u"XYZ"
self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
def test_callbacks(self):
def handler1(exc):
if not isinstance(exc, UnicodeEncodeError) \
and not isinstance(exc, UnicodeDecodeError):
raise TypeError("don't know how to handle %r" % exc)
l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
return (u"[%s]" % u"".join(l), exc.end)
codecs.register_error("test.handler1", handler1)
def handler2(exc):
if not isinstance(exc, UnicodeDecodeError):
raise TypeError("don't know how to handle %r" % exc)
l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
codecs.register_error("test.handler2", handler2)
s = "\x00\x81\x7f\x80\xff"
self.assertEqual(
s.decode("ascii", "test.handler1"),
u"\x00[<129>]\x7f[<128>][<255>]"
)
self.assertEqual(
s.decode("ascii", "test.handler2"),
u"\x00[<129>][<128>]"
)
self.assertEqual(
"\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
u"\u3042[<92><117><51><120>]xx"
)
self.assertEqual(
"\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
u"\u3042[<92><117><51><120><120>]"
)
self.assertEqual(
codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
u"z[<98>][<99>]"
)
self.assertEqual(
u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
u"g[<252><223>]rk"
)
self.assertEqual(
u"g\xfc\xdf".encode("ascii", "test.handler1"),
u"g[<252><223>]"
)
def test_longstrings(self):
# test long strings to check for memory overflow problems
errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"]
# register the handlers under different names,
# to prevent the codec from recognizing the name
for err in errors:
codecs.register_error("test." + err, codecs.lookup_error(err))
l = 1000
errors += [ "test." + err for err in errors ]
for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"):
for err in errors:
try:
uni.encode(enc, err)
except UnicodeError:
pass
def check_exceptionobjectargs(self, exctype, args, msg):
# Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
# check with one missing argument
self.assertRaises(TypeError, exctype, *args[:-1])
# check with one missing argument
self.assertRaises(TypeError, exctype, *(args + ["too much"]))
# check with one argument of the wrong type
wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
for i in xrange(len(args)):
for wrongarg in wrongargs:
if type(wrongarg) is type(args[i]):
continue
# build argument array
callargs = []
for j in xrange(len(args)):
if i==j:
callargs.append(wrongarg)
else:
callargs.append(args[i])
self.assertRaises(TypeError, exctype, *callargs)
exc = exctype(*args)
self.assertEquals(str(exc), msg)
def test_unicodeencodeerror(self):
self.check_exceptionobjectargs(
UnicodeEncodeError,
["ascii", u"g\xfcrk", 1, 2, "ouch"],
"'ascii' codec can't encode character '\ufc' in position 1: ouch"
)
self.check_exceptionobjectargs(
UnicodeEncodeError,
["ascii", u"g\xfcrk", 1, 4, "ouch"],
"'ascii' codec can't encode characters in position 1-3: ouch"
)
self.check_exceptionobjectargs(
UnicodeEncodeError,
["ascii", u"\xfcx", 0, 1, "ouch"],
"'ascii' codec can't encode character '\ufc' in position 0: ouch"
)
def test_unicodedecodeerror(self):
self.check_exceptionobjectargs(
UnicodeDecodeError,
["ascii", "g\xfcrk", 1, 2, "ouch"],
"'ascii' codec can't decode byte 0xfc in position 1: ouch"
)
self.check_exceptionobjectargs(
UnicodeDecodeError,
["ascii", "g\xfcrk", 1, 3, "ouch"],
"'ascii' codec can't decode bytes in position 1-2: ouch"
)
def test_unicodetranslateerror(self):
self.check_exceptionobjectargs(
UnicodeTranslateError,
[u"g\xfcrk", 1, 2, "ouch"],
"can't translate character '\\ufc' in position 1: ouch"
)
self.check_exceptionobjectargs(
UnicodeTranslateError,
[u"g\xfcrk", 1, 3, "ouch"],
"can't translate characters in position 1-2: ouch"
)
def test_badandgoodstrictexceptions(self):
self.assertRaises(
TypeError,
codecs.strict_errors,
42
)
self.assertRaises(
Exception,
codecs.strict_errors,
Exception("ouch")
)
self.assertRaises(
UnicodeEncodeError,
codecs.strict_errors,
UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
)
def test_badandgoodignoreexceptions(self):
self.assertRaises(
TypeError,
codecs.ignore_errors,
42
)
self.assertRaises(
TypeError,
codecs.ignore_errors,
UnicodeError("ouch")
)
self.assertEquals(
codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
(u"", 1)
)
self.assertEquals(
codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
(u"", 1)
)
self.assertEquals(
codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
(u"", 1)
)
def test_badandgoodreplaceexceptions(self):
self.assertRaises(
TypeError,
codecs.replace_errors,
42
)
self.assertRaises(
TypeError,
codecs.replace_errors,
UnicodeError("ouch")
)
self.assertEquals(
codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
(u"?", 1)
)
self.assertEquals(
codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
(u"\ufffd", 1)
)
self.assertEquals(
codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
(u"\ufffd", 1)
)
def test_badandgoodxmlcharrefreplaceexceptions(self):
self.assertRaises(
TypeError,
codecs.xmlcharrefreplace_errors,
42
)
self.assertRaises(
TypeError,
codecs.xmlcharrefreplace_errors,
UnicodeError("ouch")
)
self.assertEquals(
codecs.xmlcharrefreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
(u"&#%d;" % 0x3042, 1)
)
self.assertRaises(
TypeError,
codecs.xmlcharrefreplace_errors,
UnicodeError("ouch")
)
self.assertRaises(
TypeError,
codecs.xmlcharrefreplace_errors,
UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
)
self.assertRaises(
TypeError,
codecs.xmlcharrefreplace_errors,
UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
)
def test_badandgoodbackslashreplaceexceptions(self):
self.assertRaises(
TypeError,
codecs.backslashreplace_errors,
42
)
self.assertRaises(
TypeError,
codecs.backslashreplace_errors,
UnicodeError("ouch")
)
self.assertEquals(
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
(u"\\u3042", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
(u"\\x00", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
(u"\\xff", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
(u"\\u0100", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
(u"\\uffff", 1)
)
if sys.maxunicode>0xffff:
self.assertEquals(
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
(u"\\U00010000", 1)
)
self.assertEquals(
codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
(u"\\U0010ffff", 1)
)
self.assertRaises(
TypeError,
codecs.backslashreplace_errors,
UnicodeError("ouch")
)
self.assertRaises(
TypeError,
codecs.backslashreplace_errors,
UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
)
self.assertRaises(
TypeError,
codecs.backslashreplace_errors,
UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
)
def test_badhandlerresults(self):
results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
for res in results:
codecs.register_error("test.badhandler", lambda: res)
for enc in encs:
self.assertRaises(
TypeError,
u"\u3042".encode,
enc,
"test.badhandler"
)
for (enc, bytes) in (
("ascii", "\xff"),
("utf-8", "\xff"),
("utf-7", "+x-")
):
self.assertRaises(
TypeError,
bytes.decode,
enc,
"test.badhandler"
)
def test_lookup(self):
self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
self.assertEquals(
codecs.xmlcharrefreplace_errors,
codecs.lookup_error("xmlcharrefreplace")
)
self.assertEquals(
codecs.backslashreplace_errors,
codecs.lookup_error("backslashreplace")
)
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(CodecCallbackTest))
test.test_support.run_suite(suite)
if __name__ == "__main__":
test_main()

View File

@ -57,6 +57,9 @@ Type/class unification and new-style classes
Core and builtins
- Codec error handling callbacks (PEP 293) are implemented.
Error handling in unicode.encode or str.decode can now be customized.
- A subtle change to the semantics of the built-in function intern():
interned strings are no longer immortal. You must keep a reference
to the return value intern() around to get the benefit.

View File

@ -706,6 +706,32 @@ mbcs_encode(PyObject *self,
#endif /* MS_WINDOWS */
#endif /* Py_USING_UNICODE */
/* --- Error handler registry --------------------------------------------- */
static PyObject *register_error(PyObject *self, PyObject *args)
{
const char *name;
PyObject *handler;
if (!PyArg_ParseTuple(args, "sO:register_error",
&name, &handler))
return NULL;
if (PyCodec_RegisterError(name, handler))
return NULL;
Py_INCREF(Py_None);
return Py_None;
}
static PyObject *lookup_error(PyObject *self, PyObject *args)
{
const char *name;
if (!PyArg_ParseTuple(args, "s:lookup_error",
&name))
return NULL;
return PyCodec_LookupError(name);
}
/* --- Module API --------------------------------------------------------- */
static PyMethodDef _codecs_functions[] = {
@ -744,6 +770,8 @@ static PyMethodDef _codecs_functions[] = {
{"mbcs_decode", mbcs_decode, METH_VARARGS},
#endif
#endif /* Py_USING_UNICODE */
{"register_error", register_error, METH_VARARGS},
{"lookup_error", lookup_error, METH_VARARGS},
{NULL, NULL} /* sentinel */
};

View File

@ -2468,7 +2468,9 @@ PyDoc_STRVAR(encode__doc__,
Encodes S using the codec registered for encoding. encoding defaults\n\
to the default encoding. errors may be given to set a different error\n\
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
a ValueError. Other possible values are 'ignore' and 'replace'.");
a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
'xmlcharrefreplace' as well as any other name registered with\n\
codecs.register_error that is able to handle UnicodeEncodeErrors.");
static PyObject *
string_encode(PyStringObject *self, PyObject *args)
@ -2487,7 +2489,9 @@ PyDoc_STRVAR(decode__doc__,
Decodes S using the codec registered for encoding. encoding defaults\n\
to the default encoding. errors may be given to set a different error\n\
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
a ValueError. Other possible values are 'ignore' and 'replace'.");
a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
as well as any other name registerd with codecs.register_error that is\n\
able to handle UnicodeDecodeErrors.");
static PyObject *
string_decode(PyStringObject *self, PyObject *args)

File diff suppressed because it is too large Load Diff

View File

@ -422,12 +422,409 @@ PyObject *PyCodec_Decode(PyObject *object,
return NULL;
}
static PyObject *_PyCodec_ErrorRegistry;
/* Register the error handling callback function error under the name
name. This function will be called by the codec when it encounters
an unencodable characters/undecodable bytes and doesn't know the
callback name, when name is specified as the error parameter
in the call to the encode/decode function.
Return 0 on success, -1 on error */
int PyCodec_RegisterError(const char *name, PyObject *error)
{
if (!PyCallable_Check(error)) {
PyErr_SetString(PyExc_TypeError, "handler must be callable");
return -1;
}
return PyDict_SetItemString( _PyCodec_ErrorRegistry, (char *)name, error);
}
/* Lookup the error handling callback function registered under the
name error. As a special case NULL can be passed, in which case
the error handling callback for strict encoding will be returned. */
PyObject *PyCodec_LookupError(const char *name)
{
PyObject *handler = NULL;
if (name==NULL)
name = "strict";
handler = PyDict_GetItemString(_PyCodec_ErrorRegistry, (char *)name);
if (!handler)
PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
else
Py_INCREF(handler);
return handler;
}
static void wrong_exception_type(PyObject *exc)
{
PyObject *type = PyObject_GetAttrString(exc, "__class__");
if (type != NULL) {
PyObject *name = PyObject_GetAttrString(type, "__name__");
Py_DECREF(type);
if (name != NULL) {
PyObject *string = PyObject_Str(name);
Py_DECREF(name);
PyErr_Format(PyExc_TypeError, "don't know how to handle %.400s in error callback",
PyString_AS_STRING(string));
Py_DECREF(string);
}
}
}
PyObject *PyCodec_StrictErrors(PyObject *exc)
{
if (PyInstance_Check(exc))
PyErr_SetObject((PyObject*)((PyInstanceObject*)exc)->in_class,
exc);
else
PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
return NULL;
}
PyObject *PyCodec_IgnoreErrors(PyObject *exc)
{
int end;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
if (PyUnicodeDecodeError_GetEnd(exc, &end))
return NULL;
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
if (PyUnicodeTranslateError_GetEnd(exc, &end))
return NULL;
}
else {
wrong_exception_type(exc);
return NULL;
}
/* ouch: passing NULL, 0, pos gives None instead of u'' */
return Py_BuildValue("(u#i)", &end, 0, end);
}
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
{
PyObject *restuple;
int start;
int end;
int i;
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *res;
Py_UNICODE *p;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
res = PyUnicode_FromUnicode(NULL, end-start);
if (res == NULL)
return NULL;
for (p = PyUnicode_AS_UNICODE(res), i = start;
i<end; ++p, ++i)
*p = '?';
restuple = Py_BuildValue("(Oi)", res, end);
Py_DECREF(res);
return restuple;
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
if (PyUnicodeDecodeError_GetEnd(exc, &end))
return NULL;
return Py_BuildValue("(u#i)", &res, 1, end);
}
else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
PyObject *res;
Py_UNICODE *p;
if (PyUnicodeTranslateError_GetStart(exc, &start))
return NULL;
if (PyUnicodeTranslateError_GetEnd(exc, &end))
return NULL;
res = PyUnicode_FromUnicode(NULL, end-start);
if (res == NULL)
return NULL;
for (p = PyUnicode_AS_UNICODE(res), i = start;
i<end; ++p, ++i)
*p = Py_UNICODE_REPLACEMENT_CHARACTER;
restuple = Py_BuildValue("(Oi)", res, end);
Py_DECREF(res);
return restuple;
}
else {
wrong_exception_type(exc);
return NULL;
}
}
PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
{
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *restuple;
PyObject *object;
int start;
int end;
PyObject *res;
Py_UNICODE *p;
Py_UNICODE *startp;
Py_UNICODE *outp;
int ressize;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
startp = PyUnicode_AS_UNICODE(object);
for (p = startp+start, ressize = 0; p < startp+end; ++p) {
if (*p<10)
ressize += 2+1+1;
else if (*p<100)
ressize += 2+2+1;
else if (*p<1000)
ressize += 2+3+1;
else if (*p<10000)
ressize += 2+4+1;
else if (*p<100000)
ressize += 2+5+1;
else if (*p<1000000)
ressize += 2+6+1;
else
ressize += 2+7+1;
}
/* allocate replacement */
res = PyUnicode_FromUnicode(NULL, ressize);
if (res == NULL) {
Py_DECREF(object);
return NULL;
}
/* generate replacement */
for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
p < startp+end; ++p) {
Py_UNICODE c = *p;
int digits;
int base;
*outp++ = '&';
*outp++ = '#';
if (*p<10) {
digits = 1;
base = 1;
}
else if (*p<100) {
digits = 2;
base = 10;
}
else if (*p<1000) {
digits = 3;
base = 100;
}
else if (*p<10000) {
digits = 4;
base = 1000;
}
else if (*p<100000) {
digits = 5;
base = 10000;
}
else if (*p<1000000) {
digits = 6;
base = 100000;
}
else {
digits = 7;
base = 1000000;
}
while (digits-->0) {
*outp++ = '0' + c/base;
c %= base;
base /= 10;
}
*outp++ = ';';
}
restuple = Py_BuildValue("(Oi)", res, end);
Py_DECREF(res);
Py_DECREF(object);
return restuple;
}
else {
wrong_exception_type(exc);
return NULL;
}
}
static Py_UNICODE hexdigits[] = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
{
if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
PyObject *restuple;
PyObject *object;
int start;
int end;
PyObject *res;
Py_UNICODE *p;
Py_UNICODE *startp;
Py_UNICODE *outp;
int ressize;
if (PyUnicodeEncodeError_GetStart(exc, &start))
return NULL;
if (PyUnicodeEncodeError_GetEnd(exc, &end))
return NULL;
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
return NULL;
startp = PyUnicode_AS_UNICODE(object);
for (p = startp+start, ressize = 0; p < startp+end; ++p) {
if (*p >= 0x00010000)
ressize += 1+1+8;
else if (*p >= 0x100) {
ressize += 1+1+4;
}
else
ressize += 1+1+2;
}
res = PyUnicode_FromUnicode(NULL, ressize);
if (res==NULL)
return NULL;
for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
p < startp+end; ++p) {
Py_UNICODE c = *p;
*outp++ = '\\';
if (c >= 0x00010000) {
*outp++ = 'U';
*outp++ = hexdigits[(c>>28)&0xf];
*outp++ = hexdigits[(c>>24)&0xf];
*outp++ = hexdigits[(c>>20)&0xf];
*outp++ = hexdigits[(c>>16)&0xf];
*outp++ = hexdigits[(c>>12)&0xf];
*outp++ = hexdigits[(c>>8)&0xf];
}
else if (c >= 0x100) {
*outp++ = 'u';
*outp++ = hexdigits[(c>>12)&0xf];
*outp++ = hexdigits[(c>>8)&0xf];
}
else
*outp++ = 'x';
*outp++ = hexdigits[(c>>4)&0xf];
*outp++ = hexdigits[c&0xf];
}
restuple = Py_BuildValue("(Oi)", res, end);
Py_DECREF(res);
Py_DECREF(object);
return restuple;
}
else {
wrong_exception_type(exc);
return NULL;
}
}
static PyObject *strict_errors(PyObject *self, PyObject *exc)
{
return PyCodec_StrictErrors(exc);
}
static PyObject *ignore_errors(PyObject *self, PyObject *exc)
{
return PyCodec_IgnoreErrors(exc);
}
static PyObject *replace_errors(PyObject *self, PyObject *exc)
{
return PyCodec_ReplaceErrors(exc);
}
static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
{
return PyCodec_XMLCharRefReplaceErrors(exc);
}
static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
{
return PyCodec_BackslashReplaceErrors(exc);
}
void _PyCodecRegistry_Init(void)
{
static struct {
char *name;
PyMethodDef def;
} methods[] =
{
{
"strict",
{
"strict_errors",
strict_errors,
METH_O
}
},
{
"ignore",
{
"ignore_errors",
ignore_errors,
METH_O
}
},
{
"replace",
{
"replace_errors",
replace_errors,
METH_O
}
},
{
"xmlcharrefreplace",
{
"xmlcharrefreplace_errors",
xmlcharrefreplace_errors,
METH_O
}
},
{
"backslashreplace",
{
"backslashreplace_errors",
backslashreplace_errors,
METH_O
}
}
};
if (_PyCodec_SearchPath == NULL)
_PyCodec_SearchPath = PyList_New(0);
if (_PyCodec_SearchCache == NULL)
_PyCodec_SearchCache = PyDict_New();
if (_PyCodec_ErrorRegistry == NULL) {
int i;
_PyCodec_ErrorRegistry = PyDict_New();
if (_PyCodec_ErrorRegistry) {
for (i = 0; i < 5; ++i) {
PyObject *func = PyCFunction_New(&methods[i].def, NULL);
int res;
if (!func)
Py_FatalError("can't initialize codec error registry");
res = PyCodec_RegisterError(methods[i].name, func);
Py_DECREF(func);
if (res)
Py_FatalError("can't initialize codec error registry");
}
}
}
if (_PyCodec_SearchPath == NULL ||
_PyCodec_SearchCache == NULL)
Py_FatalError("can't initialize codec registry");
@ -439,4 +836,6 @@ void _PyCodecRegistry_Fini(void)
_PyCodec_SearchPath = NULL;
Py_XDECREF(_PyCodec_SearchCache);
_PyCodec_SearchCache = NULL;
Py_XDECREF(_PyCodec_ErrorRegistry);
_PyCodec_ErrorRegistry = NULL;
}

View File

@ -100,6 +100,10 @@ Exception\n\
| +-- ValueError\n\
| | |\n\
| | +-- UnicodeError\n\
| | |\n\
| | +-- UnicodeEncodeError\n\
| | +-- UnicodeDecodeError\n\
| | +-- UnicodeTranslateError\n\
| |\n\
| +-- ReferenceError\n\
| +-- SystemError\n\
@ -840,6 +844,590 @@ static PyMethodDef SyntaxError_methods[] = {
};
static
int get_int(PyObject *exc, const char *name, int *value)
{
PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
if (!attr)
return -1;
if (!PyInt_Check(attr)) {
PyErr_Format(PyExc_TypeError, "%s attribute must be int", name);
Py_DECREF(attr);
return -1;
}
*value = PyInt_AS_LONG(attr);
Py_DECREF(attr);
return 0;
}
static
int set_int(PyObject *exc, const char *name, int value)
{
PyObject *obj = PyInt_FromLong(value);
int result;
if (!obj)
return -1;
result = PyObject_SetAttrString(exc, (char *)name, obj);
Py_DECREF(obj);
return result;
}
static
PyObject *get_string(PyObject *exc, const char *name)
{
PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
if (!attr)
return NULL;
if (!PyString_Check(attr)) {
PyErr_Format(PyExc_TypeError, "%s attribute must be str", name);
Py_DECREF(attr);
return NULL;
}
return attr;
}
static
int set_string(PyObject *exc, const char *name, const char *value)
{
PyObject *obj = PyString_FromString(value);
int result;
if (!obj)
return -1;
result = PyObject_SetAttrString(exc, (char *)name, obj);
Py_DECREF(obj);
return result;
}
static
PyObject *get_unicode(PyObject *exc, const char *name)
{
PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
if (!attr)
return NULL;
if (!PyUnicode_Check(attr)) {
PyErr_Format(PyExc_TypeError, "%s attribute must be unicode", name);
Py_DECREF(attr);
return NULL;
}
return attr;
}
PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *exc)
{
return get_string(exc, "encoding");
}
PyObject * PyUnicodeDecodeError_GetEncoding(PyObject *exc)
{
return get_string(exc, "encoding");
}
PyObject * PyUnicodeTranslateError_GetEncoding(PyObject *exc)
{
return get_string(exc, "encoding");
}
PyObject *PyUnicodeEncodeError_GetObject(PyObject *exc)
{
return get_unicode(exc, "object");
}
PyObject *PyUnicodeDecodeError_GetObject(PyObject *exc)
{
return get_string(exc, "object");
}
PyObject *PyUnicodeTranslateError_GetObject(PyObject *exc)
{
return get_unicode(exc, "object");
}
int PyUnicodeEncodeError_GetStart(PyObject *exc, int *start)
{
if (!get_int(exc, "start", start)) {
PyObject *object = PyUnicodeEncodeError_GetObject(exc);
int size;
if (!object)
return -1;
size = PyUnicode_GET_SIZE(object);
if (*start<0)
*start = 0;
if (*start>=size)
*start = size-1;
Py_DECREF(object);
return 0;
}
return -1;
}
int PyUnicodeDecodeError_GetStart(PyObject *exc, int *start)
{
if (!get_int(exc, "start", start)) {
PyObject *object = PyUnicodeDecodeError_GetObject(exc);
int size;
if (!object)
return -1;
size = PyString_GET_SIZE(object);
if (*start<0)
*start = 0;
if (*start>=size)
*start = size-1;
Py_DECREF(object);
return 0;
}
return -1;
}
int PyUnicodeTranslateError_GetStart(PyObject *exc, int *start)
{
return PyUnicodeEncodeError_GetStart(exc, start);
}
int PyUnicodeEncodeError_SetStart(PyObject *exc, int start)
{
return set_int(exc, "start", start);
}
int PyUnicodeDecodeError_SetStart(PyObject *exc, int start)
{
return set_int(exc, "start", start);
}
int PyUnicodeTranslateError_SetStart(PyObject *exc, int start)
{
return set_int(exc, "start", start);
}
int PyUnicodeEncodeError_GetEnd(PyObject *exc, int *end)
{
if (!get_int(exc, "end", end)) {
PyObject *object = PyUnicodeEncodeError_GetObject(exc);
int size;
if (!object)
return -1;
size = PyUnicode_GET_SIZE(object);
if (*end<1)
*end = 1;
if (*end>size)
*end = size;
Py_DECREF(object);
return 0;
}
return -1;
}
int PyUnicodeDecodeError_GetEnd(PyObject *exc, int *end)
{
if (!get_int(exc, "end", end)) {
PyObject *object = PyUnicodeDecodeError_GetObject(exc);
int size;
if (!object)
return -1;
size = PyString_GET_SIZE(object);
if (*end<1)
*end = 1;
if (*end>size)
*end = size;
Py_DECREF(object);
return 0;
}
return -1;
}
int PyUnicodeTranslateError_GetEnd(PyObject *exc, int *start)
{
return PyUnicodeEncodeError_GetEnd(exc, start);
}
int PyUnicodeEncodeError_SetEnd(PyObject *exc, int end)
{
return set_int(exc, "end", end);
}
int PyUnicodeDecodeError_SetEnd(PyObject *exc, int end)
{
return set_int(exc, "end", end);
}
int PyUnicodeTranslateError_SetEnd(PyObject *exc, int end)
{
return set_int(exc, "end", end);
}
PyObject *PyUnicodeEncodeError_GetReason(PyObject *exc)
{
return get_string(exc, "reason");
}
PyObject *PyUnicodeDecodeError_GetReason(PyObject *exc)
{
return get_string(exc, "reason");
}
PyObject *PyUnicodeTranslateError_GetReason(PyObject *exc)
{
return get_string(exc, "reason");
}
int PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason)
{
return set_string(exc, "reason", reason);
}
int PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason)
{
return set_string(exc, "reason", reason);
}
int PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason)
{
return set_string(exc, "reason", reason);
}
static PyObject *
UnicodeError__init__(PyObject *self, PyObject *args, PyTypeObject *objecttype)
{
PyObject *rtnval = NULL;
PyObject *encoding;
PyObject *object;
PyObject *start;
PyObject *end;
PyObject *reason;
if (!(self = get_self(args)))
return NULL;
if (!(args = PySequence_GetSlice(args, 1, PySequence_Size(args))))
return NULL;
if (!PyArg_ParseTuple(args, "O!O!O!O!O!",
&PyString_Type, &encoding,
objecttype, &object,
&PyInt_Type, &start,
&PyInt_Type, &end,
&PyString_Type, &reason))
return NULL;
if (PyObject_SetAttrString(self, "args", args))
goto finally;
if (PyObject_SetAttrString(self, "encoding", encoding))
goto finally;
if (PyObject_SetAttrString(self, "object", object))
goto finally;
if (PyObject_SetAttrString(self, "start", start))
goto finally;
if (PyObject_SetAttrString(self, "end", end))
goto finally;
if (PyObject_SetAttrString(self, "reason", reason))
goto finally;
Py_INCREF(Py_None);
rtnval = Py_None;
finally:
Py_DECREF(args);
return rtnval;
}
static PyObject *
UnicodeEncodeError__init__(PyObject *self, PyObject *args)
{
return UnicodeError__init__(self, args, &PyUnicode_Type);
}
static PyObject *
UnicodeEncodeError__str__(PyObject *self, PyObject *arg)
{
PyObject *encodingObj = NULL;
PyObject *objectObj = NULL;
int length;
int start;
int end;
PyObject *reasonObj = NULL;
char buffer[1000];
PyObject *result = NULL;
self = arg;
if (!(encodingObj = PyUnicodeEncodeError_GetEncoding(self)))
goto error;
if (!(objectObj = PyUnicodeEncodeError_GetObject(self)))
goto error;
length = PyUnicode_GET_SIZE(objectObj);
if (PyUnicodeEncodeError_GetStart(self, &start))
goto error;
if (PyUnicodeEncodeError_GetEnd(self, &end))
goto error;
if (!(reasonObj = PyUnicodeEncodeError_GetReason(self)))
goto error;
if (end==start+1) {
PyOS_snprintf(buffer, sizeof(buffer),
"'%.400s' codec can't encode character '\\u%x' in position %d: %.400s",
PyString_AS_STRING(encodingObj),
(int)PyUnicode_AS_UNICODE(objectObj)[start],
start,
PyString_AS_STRING(reasonObj)
);
}
else {
PyOS_snprintf(buffer, sizeof(buffer),
"'%.400s' codec can't encode characters in position %d-%d: %.400s",
PyString_AS_STRING(encodingObj),
start,
end-1,
PyString_AS_STRING(reasonObj)
);
}
result = PyString_FromString(buffer);
error:
Py_XDECREF(reasonObj);
Py_XDECREF(objectObj);
Py_XDECREF(encodingObj);
return result;
}
static PyMethodDef UnicodeEncodeError_methods[] = {
{"__init__", UnicodeEncodeError__init__, METH_VARARGS},
{"__str__", UnicodeEncodeError__str__, METH_O},
{NULL, NULL}
};
PyObject * PyUnicodeEncodeError_Create(
const char *encoding, const Py_UNICODE *object, int length,
int start, int end, const char *reason)
{
return PyObject_CallFunction(PyExc_UnicodeEncodeError, "su#iis",
encoding, object, length, start, end, reason);
}
static PyObject *
UnicodeDecodeError__init__(PyObject *self, PyObject *args)
{
return UnicodeError__init__(self, args, &PyString_Type);
}
static PyObject *
UnicodeDecodeError__str__(PyObject *self, PyObject *arg)
{
PyObject *encodingObj = NULL;
PyObject *objectObj = NULL;
int length;
int start;
int end;
PyObject *reasonObj = NULL;
char buffer[1000];
PyObject *result = NULL;
self = arg;
if (!(encodingObj = PyUnicodeDecodeError_GetEncoding(self)))
goto error;
if (!(objectObj = PyUnicodeDecodeError_GetObject(self)))
goto error;
length = PyString_GET_SIZE(objectObj);
if (PyUnicodeDecodeError_GetStart(self, &start))
goto error;
if (PyUnicodeDecodeError_GetEnd(self, &end))
goto error;
if (!(reasonObj = PyUnicodeDecodeError_GetReason(self)))
goto error;
if (end==start+1) {
PyOS_snprintf(buffer, sizeof(buffer),
"'%.400s' codec can't decode byte 0x%x in position %d: %.400s",
PyString_AS_STRING(encodingObj),
((int)PyString_AS_STRING(objectObj)[start])&0xff,
start,
PyString_AS_STRING(reasonObj)
);
}
else {
PyOS_snprintf(buffer, sizeof(buffer),
"'%.400s' codec can't decode bytes in position %d-%d: %.400s",
PyString_AS_STRING(encodingObj),
start,
end-1,
PyString_AS_STRING(reasonObj)
);
}
result = PyString_FromString(buffer);
error:
Py_XDECREF(reasonObj);
Py_XDECREF(objectObj);
Py_XDECREF(encodingObj);
return result;
}
static PyMethodDef UnicodeDecodeError_methods[] = {
{"__init__", UnicodeDecodeError__init__, METH_VARARGS},
{"__str__", UnicodeDecodeError__str__, METH_O},
{NULL, NULL}
};
PyObject * PyUnicodeDecodeError_Create(
const char *encoding, const char *object, int length,
int start, int end, const char *reason)
{
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#iis",
encoding, object, length, start, end, reason);
}
static PyObject *
UnicodeTranslateError__init__(PyObject *self, PyObject *args)
{
PyObject *rtnval = NULL;
PyObject *object;
PyObject *start;
PyObject *end;
PyObject *reason;
if (!(self = get_self(args)))
return NULL;
if (!(args = PySequence_GetSlice(args, 1, PySequence_Size(args))))
return NULL;
if (!PyArg_ParseTuple(args, "O!O!O!O!",
&PyUnicode_Type, &object,
&PyInt_Type, &start,
&PyInt_Type, &end,
&PyString_Type, &reason))
goto finally;
if (PyObject_SetAttrString(self, "args", args))
goto finally;
if (PyObject_SetAttrString(self, "object", object))
goto finally;
if (PyObject_SetAttrString(self, "start", start))
goto finally;
if (PyObject_SetAttrString(self, "end", end))
goto finally;
if (PyObject_SetAttrString(self, "reason", reason))
goto finally;
Py_INCREF(Py_None);
rtnval = Py_None;
finally:
Py_DECREF(args);
return rtnval;
}
static PyObject *
UnicodeTranslateError__str__(PyObject *self, PyObject *arg)
{
PyObject *objectObj = NULL;
int length;
int start;
int end;
PyObject *reasonObj = NULL;
char buffer[1000];
PyObject *result = NULL;
self = arg;
if (!(objectObj = PyUnicodeTranslateError_GetObject(self)))
goto error;
length = PyUnicode_GET_SIZE(objectObj);
if (PyUnicodeTranslateError_GetStart(self, &start))
goto error;
if (PyUnicodeTranslateError_GetEnd(self, &end))
goto error;
if (!(reasonObj = PyUnicodeTranslateError_GetReason(self)))
goto error;
if (end==start+1) {
PyOS_snprintf(buffer, sizeof(buffer),
"can't translate character '\\u%x' in position %d: %.400s",
(int)PyUnicode_AS_UNICODE(objectObj)[start],
start,
PyString_AS_STRING(reasonObj)
);
}
else {
PyOS_snprintf(buffer, sizeof(buffer),
"can't translate characters in position %d-%d: %.400s",
start,
end-1,
PyString_AS_STRING(reasonObj)
);
}
result = PyString_FromString(buffer);
error:
Py_XDECREF(reasonObj);
Py_XDECREF(objectObj);
return result;
}
static PyMethodDef UnicodeTranslateError_methods[] = {
{"__init__", UnicodeTranslateError__init__, METH_VARARGS},
{"__str__", UnicodeTranslateError__str__, METH_O},
{NULL, NULL}
};
PyObject * PyUnicodeTranslateError_Create(
const Py_UNICODE *object, int length,
int start, int end, const char *reason)
{
return PyObject_CallFunction(PyExc_UnicodeTranslateError, "u#iis",
object, length, start, end, reason);
}
/* Exception doc strings */
@ -865,6 +1453,12 @@ PyDoc_STRVAR(ValueError__doc__,
PyDoc_STRVAR(UnicodeError__doc__, "Unicode related error.");
PyDoc_STRVAR(UnicodeEncodeError__doc__, "Unicode encoding error.");
PyDoc_STRVAR(UnicodeDecodeError__doc__, "Unicode decoding error.");
PyDoc_STRVAR(UnicodeTranslateError__doc__, "Unicode translation error.");
PyDoc_STRVAR(SystemError__doc__,
"Internal error in the Python interpreter.\n\
\n\
@ -949,6 +1543,9 @@ PyObject *PyExc_SystemError;
PyObject *PyExc_SystemExit;
PyObject *PyExc_UnboundLocalError;
PyObject *PyExc_UnicodeError;
PyObject *PyExc_UnicodeEncodeError;
PyObject *PyExc_UnicodeDecodeError;
PyObject *PyExc_UnicodeTranslateError;
PyObject *PyExc_TypeError;
PyObject *PyExc_ValueError;
PyObject *PyExc_ZeroDivisionError;
@ -1035,6 +1632,12 @@ static struct {
FloatingPointError__doc__},
{"ValueError", &PyExc_ValueError, 0, ValueError__doc__},
{"UnicodeError", &PyExc_UnicodeError, &PyExc_ValueError, UnicodeError__doc__},
{"UnicodeEncodeError", &PyExc_UnicodeEncodeError, &PyExc_UnicodeError,
UnicodeEncodeError__doc__, UnicodeEncodeError_methods},
{"UnicodeDecodeError", &PyExc_UnicodeDecodeError, &PyExc_UnicodeError,
UnicodeDecodeError__doc__, UnicodeDecodeError_methods},
{"UnicodeTranslateError", &PyExc_UnicodeTranslateError, &PyExc_UnicodeError,
UnicodeTranslateError__doc__, UnicodeTranslateError_methods},
{"ReferenceError", &PyExc_ReferenceError, 0, ReferenceError__doc__},
{"SystemError", &PyExc_SystemError, 0, SystemError__doc__},
{"MemoryError", &PyExc_MemoryError, 0, MemoryError__doc__},