Issue #13560: Locale codec functions use the classic "errors" parameter,

instead of surrogateescape

So it would be possible to support more error handlers later.
This commit is contained in:
Victor Stinner 2011-12-17 05:47:23 +01:00
parent ab59594326
commit 1b57967b96
6 changed files with 49 additions and 17 deletions

View File

@ -1608,14 +1608,14 @@ PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
const char *str,
Py_ssize_t len,
int surrogateescape);
const char *errors);
/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
length using strlen(). */
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
const char *str,
int surrogateescape);
const char *errors);
/* Encode a Unicode object to the current locale encoding. The encoder is
strict is *surrogateescape* is equal to zero, otherwise the
@ -1624,7 +1624,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
PyObject *unicode,
int surrogateescape
const char *errors
);
/* --- File system encoding ---------------------------------------------- */

View File

@ -495,7 +495,7 @@ Py_Main(int argc, wchar_t **argv)
/* Use utf-8 on Mac OS X */
unicode = PyUnicode_FromString(p);
#else
unicode = PyUnicode_DecodeLocale(p, 1);
unicode = PyUnicode_DecodeLocale(p, "surrogateescape");
#endif
if (unicode == NULL) {
/* ignore errors */

View File

@ -7891,7 +7891,7 @@ posix_strerror(PyObject *self, PyObject *args)
"strerror() argument out of range");
return NULL;
}
return PyUnicode_DecodeLocale(message, 1);
return PyUnicode_DecodeLocale(message, "surrogateescape");
}

View File

@ -486,7 +486,7 @@ time_strftime(PyObject *self, PyObject *args)
fmt = format;
#else
/* Convert the unicode string to an ascii one */
format = PyUnicode_EncodeLocale(format_arg, 1);
format = PyUnicode_EncodeLocale(format_arg, "surrogateescape");
if (format == NULL)
return NULL;
fmt = PyBytes_AS_STRING(format);
@ -532,7 +532,8 @@ time_strftime(PyObject *self, PyObject *args)
#ifdef HAVE_WCSFTIME
ret = PyUnicode_FromWideChar(outbuf, buflen);
#else
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, 1);
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen,
"surrogateescape");
#endif
PyMem_Free(outbuf);
break;
@ -764,8 +765,8 @@ PyInit_timezone(PyObject *m) {
#endif /* PYOS_OS2 */
#endif
PyModule_AddIntConstant(m, "daylight", daylight);
otz0 = PyUnicode_DecodeLocale(tzname[0], 1);
otz1 = PyUnicode_DecodeLocale(tzname[1], 1);
otz0 = PyUnicode_DecodeLocale(tzname[0], "surrogateescape");
otz1 = PyUnicode_DecodeLocale(tzname[1], "surrogateescape");
PyModule_AddObject(m, "tzname", Py_BuildValue("(NN)", otz0, otz1));
#else /* !HAVE_TZNAME || __GLIBC__ || __CYGWIN__*/
#ifdef HAVE_STRUCT_TM_TM_ZONE

View File

@ -3125,8 +3125,31 @@ wcstombs_errorpos(const wchar_t *wstr)
return 0;
}
static int
locale_error_handler(const char *errors, int *surrogateescape)
{
if (errors == NULL) {
*surrogateescape = 0;
return 0;
}
if (strcmp(errors, "strict") == 0) {
*surrogateescape = 0;
return 0;
}
if (strcmp(errors, "surrogateescape") == 0) {
*surrogateescape = 1;
return 0;
}
PyErr_Format(PyExc_ValueError,
"only 'strict' and 'surrogateescape' error handlers "
"are supported, not '%s'",
errors);
return -1;
}
PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
{
Py_ssize_t wlen, wlen2;
wchar_t *wstr;
@ -3135,6 +3158,10 @@ PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
PyObject *reason;
PyObject *exc;
size_t error_pos;
int surrogateescape;
if (locale_error_handler(errors, &surrogateescape) < 0)
return NULL;
wstr = PyUnicode_AsWideCharString(unicode, &wlen);
if (wstr == NULL)
@ -3198,7 +3225,7 @@ encode_error:
Py_XDECREF(bytes);
if (errmsg != NULL)
reason = PyUnicode_DecodeLocale(errmsg, 1);
reason = PyUnicode_DecodeLocale(errmsg, "surrogateescape");
else
reason = PyUnicode_FromString(
"wcstombs() encountered an unencodable "
@ -3243,7 +3270,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
"surrogateescape");
}
else {
return PyUnicode_EncodeLocale(unicode, 1);
return PyUnicode_EncodeLocale(unicode, "surrogateescape");
}
#endif
}
@ -3351,13 +3378,17 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
int surrogateescape)
const char *errors)
{
wchar_t smallbuf[256];
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
wchar_t *wstr;
size_t wlen, wlen2;
PyObject *unicode;
int surrogateescape;
if (locale_error_handler(errors, &surrogateescape) < 0)
return NULL;
if (str[len] != '\0' || len != strlen(str)) {
PyErr_SetString(PyExc_TypeError, "embedded null character");
@ -3419,10 +3450,10 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
}
PyObject*
PyUnicode_DecodeLocale(const char *str, int surrogateescape)
PyUnicode_DecodeLocale(const char *str, const char *errors)
{
Py_ssize_t size = (Py_ssize_t)strlen(str);
return PyUnicode_DecodeLocaleAndSize(str, size, surrogateescape);
return PyUnicode_DecodeLocaleAndSize(str, size, errors);
}
@ -3456,7 +3487,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
"surrogateescape");
}
else {
return PyUnicode_DecodeLocaleAndSize(s, size, 1);
return PyUnicode_DecodeLocaleAndSize(s, size, "surrogateescape");
}
#endif
}

View File

@ -355,7 +355,7 @@ PyErr_SetFromErrnoWithFilenameObject(PyObject *exc, PyObject *filenameObject)
#ifndef MS_WINDOWS
if (i != 0) {
char *s = strerror(i);
message = PyUnicode_DecodeLocale(s, 1);
message = PyUnicode_DecodeLocale(s, "surrogateescape");
}
else {
/* Sometimes errno didn't get set */