Rename utf8b error handler to surrogateescape.

This commit is contained in:
Martin v. Löwis 2009-05-10 08:15:24 +00:00
parent e0a2b72e61
commit 43c57785d3
9 changed files with 30 additions and 30 deletions

View File

@ -322,7 +322,7 @@ and implemented by all standard Python codecs:
| ``'backslashreplace'`` | Replace with backslashed escape sequences | | ``'backslashreplace'`` | Replace with backslashed escape sequences |
| | (only for encoding). | | | (only for encoding). |
+-------------------------+-----------------------------------------------+ +-------------------------+-----------------------------------------------+
| ``'utf8b'`` | Replace byte with surrogate U+DCxx. | | ``'surrogateescape'`` | Replace byte with surrogate U+DCxx. |
+-------------------------+-----------------------------------------------+ +-------------------------+-----------------------------------------------+
In addition, the following error handlers are specific to a single codec: In addition, the following error handlers are specific to a single codec:
@ -335,7 +335,7 @@ In addition, the following error handlers are specific to a single codec:
+-------------------+---------+-------------------------------------------+ +-------------------+---------+-------------------------------------------+
.. versionadded:: 3.1 .. versionadded:: 3.1
The ``'utf8b'`` and ``'surrogatepass'`` error handlers. The ``'surrogateescape'`` and ``'surrogatepass'`` error handlers.
The set of allowed values can be extended via :meth:`register_error`. The set of allowed values can be extended via :meth:`register_error`.

View File

@ -64,8 +64,8 @@ perform this conversion (see :func:`sys.getfilesystemencoding`).
.. versionchanged:: 3.1 .. versionchanged:: 3.1
On some systems, conversion using the file system encoding may On some systems, conversion using the file system encoding may
fail. In this case, Python uses the ``utf8b`` encoding error fail. In this case, Python uses the ``surrogateescape`` encoding
handler, which means that undecodable bytes are replaced by a error handler, which means that undecodable bytes are replaced by a
Unicode character U+DCxx on decoding, and these are again Unicode character U+DCxx on decoding, and these are again
translated to the original byte on encoding. translated to the original byte on encoding.

View File

@ -1521,32 +1521,32 @@ class TypesTest(unittest.TestCase):
self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6)) self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6)) self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
class Utf8bTest(unittest.TestCase): class SurrogateEscapeTest(unittest.TestCase):
def test_utf8(self): def test_utf8(self):
# Bad byte # Bad byte
self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"), self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),
"foo\udc80bar") "foo\udc80bar")
self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"), self.assertEqual("foo\udc80bar".encode("utf-8", "surrogateescape"),
b"foo\x80bar") b"foo\x80bar")
# bad-utf-8 encoded surrogate # bad-utf-8 encoded surrogate
self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"), self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"),
"\udced\udcb0\udc80") "\udced\udcb0\udc80")
self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"), self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "surrogateescape"),
b"\xed\xb0\x80") b"\xed\xb0\x80")
def test_ascii(self): def test_ascii(self):
# bad byte # bad byte
self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"), self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),
"foo\udc80bar") "foo\udc80bar")
self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"), self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"),
b"foo\x80bar") b"foo\x80bar")
def test_charmap(self): def test_charmap(self):
# bad byte: \xa5 is unmapped in iso-8859-3 # bad byte: \xa5 is unmapped in iso-8859-3
self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"), self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"),
"foo\udca5bar") "foo\udca5bar")
self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"), self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),
b"foo\xa5bar") b"foo\xa5bar")
@ -1576,7 +1576,7 @@ def test_main():
CharmapTest, CharmapTest,
WithStmtTest, WithStmtTest,
TypesTest, TypesTest,
Utf8bTest, SurrogateEscapeTest,
) )

View File

@ -708,13 +708,13 @@ if sys.platform != 'win32':
self.fsencoding = sys.getfilesystemencoding() self.fsencoding = sys.getfilesystemencoding()
sys.setfilesystemencoding("utf-8") sys.setfilesystemencoding("utf-8")
self.dir = support.TESTFN self.dir = support.TESTFN
self.bdir = self.dir.encode("utf-8", "utf8b") self.bdir = self.dir.encode("utf-8", "surrogateescape")
os.mkdir(self.dir) os.mkdir(self.dir)
self.unicodefn = [] self.unicodefn = []
for fn in self.filenames: for fn in self.filenames:
f = open(os.path.join(self.bdir, fn), "w") f = open(os.path.join(self.bdir, fn), "w")
f.close() f.close()
self.unicodefn.append(fn.decode("utf-8", "utf8b")) self.unicodefn.append(fn.decode("utf-8", "surrogateescape"))
def tearDown(self): def tearDown(self):
shutil.rmtree(self.dir) shutil.rmtree(self.dir)

View File

@ -245,7 +245,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
return -1; return -1;
stringobj = PyUnicode_AsEncodedString( stringobj = PyUnicode_AsEncodedString(
u, Py_FileSystemDefaultEncoding, "utf8b"); u, Py_FileSystemDefaultEncoding, "surrogateescape");
Py_DECREF(u); Py_DECREF(u);
if (stringobj == NULL) if (stringobj == NULL)
return -1; return -1;

View File

@ -494,13 +494,13 @@ convertenviron(void)
if (p == NULL) if (p == NULL)
continue; continue;
k = PyUnicode_Decode(*e, (int)(p-*e), k = PyUnicode_Decode(*e, (int)(p-*e),
Py_FileSystemDefaultEncoding, "utf8b"); Py_FileSystemDefaultEncoding, "surrogateescape");
if (k == NULL) { if (k == NULL) {
PyErr_Clear(); PyErr_Clear();
continue; continue;
} }
v = PyUnicode_Decode(p+1, strlen(p+1), v = PyUnicode_Decode(p+1, strlen(p+1),
Py_FileSystemDefaultEncoding, "utf8b"); Py_FileSystemDefaultEncoding, "surrogateescape");
if (v == NULL) { if (v == NULL) {
PyErr_Clear(); PyErr_Clear();
Py_DECREF(k); Py_DECREF(k);
@ -2167,7 +2167,7 @@ posix_getcwd(int use_bytes)
return posix_error(); return posix_error();
if (use_bytes) if (use_bytes)
return PyBytes_FromStringAndSize(buf, strlen(buf)); return PyBytes_FromStringAndSize(buf, strlen(buf));
return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b"); return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"surrogateescape");
} }
PyDoc_STRVAR(posix_getcwd__doc__, PyDoc_STRVAR(posix_getcwd__doc__,
@ -2513,7 +2513,7 @@ posix_listdir(PyObject *self, PyObject *args)
w = PyUnicode_FromEncodedObject(v, w = PyUnicode_FromEncodedObject(v,
Py_FileSystemDefaultEncoding, Py_FileSystemDefaultEncoding,
"utf8b"); "surrogateescape");
Py_DECREF(v); Py_DECREF(v);
if (w != NULL) if (w != NULL)
v = w; v = w;
@ -4695,7 +4695,7 @@ posix_readlink(PyObject *self, PyObject *args)
w = PyUnicode_FromEncodedObject(v, w = PyUnicode_FromEncodedObject(v,
Py_FileSystemDefaultEncoding, Py_FileSystemDefaultEncoding,
"utf8b"); "surrogateescape");
if (w != NULL) { if (w != NULL) {
Py_DECREF(v); Py_DECREF(v);
v = w; v = w;

View File

@ -42,7 +42,7 @@ char2wchar(char* arg)
return res; return res;
PyMem_Free(res); PyMem_Free(res);
} }
/* Conversion failed. Fall back to escaping with utf8b. */ /* Conversion failed. Fall back to escaping with surrogateescape. */
#ifdef HAVE_MBRTOWC #ifdef HAVE_MBRTOWC
/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */ /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */

View File

@ -1549,7 +1549,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
return 0; return 0;
output = PyUnicode_AsEncodedObject(arg, output = PyUnicode_AsEncodedObject(arg,
Py_FileSystemDefaultEncoding, Py_FileSystemDefaultEncoding,
"utf8b"); "surrogateescape");
Py_DECREF(arg); Py_DECREF(arg);
if (!output) if (!output)
return 0; return 0;

View File

@ -830,7 +830,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
} }
static PyObject * static PyObject *
PyCodec_UTF8bErrors(PyObject *exc) PyCodec_SurrogateEscapeErrors(PyObject *exc)
{ {
PyObject *restuple; PyObject *restuple;
PyObject *object; PyObject *object;
@ -940,9 +940,9 @@ static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
return PyCodec_SurrogatePassErrors(exc); return PyCodec_SurrogatePassErrors(exc);
} }
static PyObject *utf8b_errors(PyObject *self, PyObject *exc) static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
{ {
return PyCodec_UTF8bErrors(exc); return PyCodec_SurrogateEscapeErrors(exc);
} }
static int _PyCodecRegistry_Init(void) static int _PyCodecRegistry_Init(void)
@ -1001,10 +1001,10 @@ static int _PyCodecRegistry_Init(void)
} }
}, },
{ {
"utf8b", "surrogateescape",
{ {
"utf8b", "surrogateescape",
utf8b_errors, surrogateescape_errors,
METH_O METH_O
} }
} }