Issue #13560: Locale codec functions use the classic "errors" parameter,

instead of surrogateescape

So it would be possible to support more error handlers later.
This commit is contained in:
Victor Stinner 2011-12-17 05:47:23 +01:00
parent ab59594326
commit 1b57967b96
6 changed files with 49 additions and 17 deletions

View File

@ -1608,14 +1608,14 @@ PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize( PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
const char *str, const char *str,
Py_ssize_t len, Py_ssize_t len,
int surrogateescape); const char *errors);
/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
length using strlen(). */ length using strlen(). */
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale( PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
const char *str, const char *str,
int surrogateescape); const char *errors);
/* Encode a Unicode object to the current locale encoding. The encoder is /* Encode a Unicode object to the current locale encoding. The encoder is
strict is *surrogateescape* is equal to zero, otherwise the strict is *surrogateescape* is equal to zero, otherwise the
@ -1624,7 +1624,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
PyObject *unicode, PyObject *unicode,
int surrogateescape const char *errors
); );
/* --- File system encoding ---------------------------------------------- */ /* --- File system encoding ---------------------------------------------- */

View File

@ -495,7 +495,7 @@ Py_Main(int argc, wchar_t **argv)
/* Use utf-8 on Mac OS X */ /* Use utf-8 on Mac OS X */
unicode = PyUnicode_FromString(p); unicode = PyUnicode_FromString(p);
#else #else
unicode = PyUnicode_DecodeLocale(p, 1); unicode = PyUnicode_DecodeLocale(p, "surrogateescape");
#endif #endif
if (unicode == NULL) { if (unicode == NULL) {
/* ignore errors */ /* ignore errors */

View File

@ -7891,7 +7891,7 @@ posix_strerror(PyObject *self, PyObject *args)
"strerror() argument out of range"); "strerror() argument out of range");
return NULL; return NULL;
} }
return PyUnicode_DecodeLocale(message, 1); return PyUnicode_DecodeLocale(message, "surrogateescape");
} }

View File

@ -486,7 +486,7 @@ time_strftime(PyObject *self, PyObject *args)
fmt = format; fmt = format;
#else #else
/* Convert the unicode string to an ascii one */ /* Convert the unicode string to an ascii one */
format = PyUnicode_EncodeLocale(format_arg, 1); format = PyUnicode_EncodeLocale(format_arg, "surrogateescape");
if (format == NULL) if (format == NULL)
return NULL; return NULL;
fmt = PyBytes_AS_STRING(format); fmt = PyBytes_AS_STRING(format);
@ -532,7 +532,8 @@ time_strftime(PyObject *self, PyObject *args)
#ifdef HAVE_WCSFTIME #ifdef HAVE_WCSFTIME
ret = PyUnicode_FromWideChar(outbuf, buflen); ret = PyUnicode_FromWideChar(outbuf, buflen);
#else #else
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, 1); ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen,
"surrogateescape");
#endif #endif
PyMem_Free(outbuf); PyMem_Free(outbuf);
break; break;
@ -764,8 +765,8 @@ PyInit_timezone(PyObject *m) {
#endif /* PYOS_OS2 */ #endif /* PYOS_OS2 */
#endif #endif
PyModule_AddIntConstant(m, "daylight", daylight); PyModule_AddIntConstant(m, "daylight", daylight);
otz0 = PyUnicode_DecodeLocale(tzname[0], 1); otz0 = PyUnicode_DecodeLocale(tzname[0], "surrogateescape");
otz1 = PyUnicode_DecodeLocale(tzname[1], 1); otz1 = PyUnicode_DecodeLocale(tzname[1], "surrogateescape");
PyModule_AddObject(m, "tzname", Py_BuildValue("(NN)", otz0, otz1)); PyModule_AddObject(m, "tzname", Py_BuildValue("(NN)", otz0, otz1));
#else /* !HAVE_TZNAME || __GLIBC__ || __CYGWIN__*/ #else /* !HAVE_TZNAME || __GLIBC__ || __CYGWIN__*/
#ifdef HAVE_STRUCT_TM_TM_ZONE #ifdef HAVE_STRUCT_TM_TM_ZONE

View File

@ -3125,8 +3125,31 @@ wcstombs_errorpos(const wchar_t *wstr)
return 0; return 0;
} }
static int
locale_error_handler(const char *errors, int *surrogateescape)
{
if (errors == NULL) {
*surrogateescape = 0;
return 0;
}
if (strcmp(errors, "strict") == 0) {
*surrogateescape = 0;
return 0;
}
if (strcmp(errors, "surrogateescape") == 0) {
*surrogateescape = 1;
return 0;
}
PyErr_Format(PyExc_ValueError,
"only 'strict' and 'surrogateescape' error handlers "
"are supported, not '%s'",
errors);
return -1;
}
PyObject * PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape) PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
{ {
Py_ssize_t wlen, wlen2; Py_ssize_t wlen, wlen2;
wchar_t *wstr; wchar_t *wstr;
@ -3135,6 +3158,10 @@ PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
PyObject *reason; PyObject *reason;
PyObject *exc; PyObject *exc;
size_t error_pos; size_t error_pos;
int surrogateescape;
if (locale_error_handler(errors, &surrogateescape) < 0)
return NULL;
wstr = PyUnicode_AsWideCharString(unicode, &wlen); wstr = PyUnicode_AsWideCharString(unicode, &wlen);
if (wstr == NULL) if (wstr == NULL)
@ -3198,7 +3225,7 @@ encode_error:
Py_XDECREF(bytes); Py_XDECREF(bytes);
if (errmsg != NULL) if (errmsg != NULL)
reason = PyUnicode_DecodeLocale(errmsg, 1); reason = PyUnicode_DecodeLocale(errmsg, "surrogateescape");
else else
reason = PyUnicode_FromString( reason = PyUnicode_FromString(
"wcstombs() encountered an unencodable " "wcstombs() encountered an unencodable "
@ -3243,7 +3270,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
"surrogateescape"); "surrogateescape");
} }
else { else {
return PyUnicode_EncodeLocale(unicode, 1); return PyUnicode_EncodeLocale(unicode, "surrogateescape");
} }
#endif #endif
} }
@ -3351,13 +3378,17 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
PyObject* PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
int surrogateescape) const char *errors)
{ {
wchar_t smallbuf[256]; wchar_t smallbuf[256];
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf); size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
wchar_t *wstr; wchar_t *wstr;
size_t wlen, wlen2; size_t wlen, wlen2;
PyObject *unicode; PyObject *unicode;
int surrogateescape;
if (locale_error_handler(errors, &surrogateescape) < 0)
return NULL;
if (str[len] != '\0' || len != strlen(str)) { if (str[len] != '\0' || len != strlen(str)) {
PyErr_SetString(PyExc_TypeError, "embedded null character"); PyErr_SetString(PyExc_TypeError, "embedded null character");
@ -3419,10 +3450,10 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
} }
PyObject* PyObject*
PyUnicode_DecodeLocale(const char *str, int surrogateescape) PyUnicode_DecodeLocale(const char *str, const char *errors)
{ {
Py_ssize_t size = (Py_ssize_t)strlen(str); Py_ssize_t size = (Py_ssize_t)strlen(str);
return PyUnicode_DecodeLocaleAndSize(str, size, surrogateescape); return PyUnicode_DecodeLocaleAndSize(str, size, errors);
} }
@ -3456,7 +3487,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
"surrogateescape"); "surrogateescape");
} }
else { else {
return PyUnicode_DecodeLocaleAndSize(s, size, 1); return PyUnicode_DecodeLocaleAndSize(s, size, "surrogateescape");
} }
#endif #endif
} }

View File

@ -355,7 +355,7 @@ PyErr_SetFromErrnoWithFilenameObject(PyObject *exc, PyObject *filenameObject)
#ifndef MS_WINDOWS #ifndef MS_WINDOWS
if (i != 0) { if (i != 0) {
char *s = strerror(i); char *s = strerror(i);
message = PyUnicode_DecodeLocale(s, 1); message = PyUnicode_DecodeLocale(s, "surrogateescape");
} }
else { else {
/* Sometimes errno didn't get set */ /* Sometimes errno didn't get set */