mirror of https://github.com/python/cpython
Issue #13560: Locale codec functions use the classic "errors" parameter,
instead of surrogateescape So it would be possible to support more error handlers later.
This commit is contained in:
parent
ab59594326
commit
1b57967b96
|
@ -1608,14 +1608,14 @@ PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
|
|||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
|
||||
const char *str,
|
||||
Py_ssize_t len,
|
||||
int surrogateescape);
|
||||
const char *errors);
|
||||
|
||||
/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
|
||||
length using strlen(). */
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
|
||||
const char *str,
|
||||
int surrogateescape);
|
||||
const char *errors);
|
||||
|
||||
/* Encode a Unicode object to the current locale encoding. The encoder is
|
||||
strict is *surrogateescape* is equal to zero, otherwise the
|
||||
|
@ -1624,7 +1624,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
|
|||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
|
||||
PyObject *unicode,
|
||||
int surrogateescape
|
||||
const char *errors
|
||||
);
|
||||
|
||||
/* --- File system encoding ---------------------------------------------- */
|
||||
|
|
|
@ -495,7 +495,7 @@ Py_Main(int argc, wchar_t **argv)
|
|||
/* Use utf-8 on Mac OS X */
|
||||
unicode = PyUnicode_FromString(p);
|
||||
#else
|
||||
unicode = PyUnicode_DecodeLocale(p, 1);
|
||||
unicode = PyUnicode_DecodeLocale(p, "surrogateescape");
|
||||
#endif
|
||||
if (unicode == NULL) {
|
||||
/* ignore errors */
|
||||
|
|
|
@ -7891,7 +7891,7 @@ posix_strerror(PyObject *self, PyObject *args)
|
|||
"strerror() argument out of range");
|
||||
return NULL;
|
||||
}
|
||||
return PyUnicode_DecodeLocale(message, 1);
|
||||
return PyUnicode_DecodeLocale(message, "surrogateescape");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -486,7 +486,7 @@ time_strftime(PyObject *self, PyObject *args)
|
|||
fmt = format;
|
||||
#else
|
||||
/* Convert the unicode string to an ascii one */
|
||||
format = PyUnicode_EncodeLocale(format_arg, 1);
|
||||
format = PyUnicode_EncodeLocale(format_arg, "surrogateescape");
|
||||
if (format == NULL)
|
||||
return NULL;
|
||||
fmt = PyBytes_AS_STRING(format);
|
||||
|
@ -532,7 +532,8 @@ time_strftime(PyObject *self, PyObject *args)
|
|||
#ifdef HAVE_WCSFTIME
|
||||
ret = PyUnicode_FromWideChar(outbuf, buflen);
|
||||
#else
|
||||
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, 1);
|
||||
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen,
|
||||
"surrogateescape");
|
||||
#endif
|
||||
PyMem_Free(outbuf);
|
||||
break;
|
||||
|
@ -764,8 +765,8 @@ PyInit_timezone(PyObject *m) {
|
|||
#endif /* PYOS_OS2 */
|
||||
#endif
|
||||
PyModule_AddIntConstant(m, "daylight", daylight);
|
||||
otz0 = PyUnicode_DecodeLocale(tzname[0], 1);
|
||||
otz1 = PyUnicode_DecodeLocale(tzname[1], 1);
|
||||
otz0 = PyUnicode_DecodeLocale(tzname[0], "surrogateescape");
|
||||
otz1 = PyUnicode_DecodeLocale(tzname[1], "surrogateescape");
|
||||
PyModule_AddObject(m, "tzname", Py_BuildValue("(NN)", otz0, otz1));
|
||||
#else /* !HAVE_TZNAME || __GLIBC__ || __CYGWIN__*/
|
||||
#ifdef HAVE_STRUCT_TM_TM_ZONE
|
||||
|
|
|
@ -3125,8 +3125,31 @@ wcstombs_errorpos(const wchar_t *wstr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
locale_error_handler(const char *errors, int *surrogateescape)
|
||||
{
|
||||
if (errors == NULL) {
|
||||
*surrogateescape = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (strcmp(errors, "strict") == 0) {
|
||||
*surrogateescape = 0;
|
||||
return 0;
|
||||
}
|
||||
if (strcmp(errors, "surrogateescape") == 0) {
|
||||
*surrogateescape = 1;
|
||||
return 0;
|
||||
}
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"only 'strict' and 'surrogateescape' error handlers "
|
||||
"are supported, not '%s'",
|
||||
errors);
|
||||
return -1;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
|
||||
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
|
||||
{
|
||||
Py_ssize_t wlen, wlen2;
|
||||
wchar_t *wstr;
|
||||
|
@ -3135,6 +3158,10 @@ PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
|
|||
PyObject *reason;
|
||||
PyObject *exc;
|
||||
size_t error_pos;
|
||||
int surrogateescape;
|
||||
|
||||
if (locale_error_handler(errors, &surrogateescape) < 0)
|
||||
return NULL;
|
||||
|
||||
wstr = PyUnicode_AsWideCharString(unicode, &wlen);
|
||||
if (wstr == NULL)
|
||||
|
@ -3198,7 +3225,7 @@ encode_error:
|
|||
Py_XDECREF(bytes);
|
||||
|
||||
if (errmsg != NULL)
|
||||
reason = PyUnicode_DecodeLocale(errmsg, 1);
|
||||
reason = PyUnicode_DecodeLocale(errmsg, "surrogateescape");
|
||||
else
|
||||
reason = PyUnicode_FromString(
|
||||
"wcstombs() encountered an unencodable "
|
||||
|
@ -3243,7 +3270,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
|
|||
"surrogateescape");
|
||||
}
|
||||
else {
|
||||
return PyUnicode_EncodeLocale(unicode, 1);
|
||||
return PyUnicode_EncodeLocale(unicode, "surrogateescape");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -3351,13 +3378,17 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
|
|||
|
||||
PyObject*
|
||||
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
|
||||
int surrogateescape)
|
||||
const char *errors)
|
||||
{
|
||||
wchar_t smallbuf[256];
|
||||
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
|
||||
wchar_t *wstr;
|
||||
size_t wlen, wlen2;
|
||||
PyObject *unicode;
|
||||
int surrogateescape;
|
||||
|
||||
if (locale_error_handler(errors, &surrogateescape) < 0)
|
||||
return NULL;
|
||||
|
||||
if (str[len] != '\0' || len != strlen(str)) {
|
||||
PyErr_SetString(PyExc_TypeError, "embedded null character");
|
||||
|
@ -3419,10 +3450,10 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
|
|||
}
|
||||
|
||||
PyObject*
|
||||
PyUnicode_DecodeLocale(const char *str, int surrogateescape)
|
||||
PyUnicode_DecodeLocale(const char *str, const char *errors)
|
||||
{
|
||||
Py_ssize_t size = (Py_ssize_t)strlen(str);
|
||||
return PyUnicode_DecodeLocaleAndSize(str, size, surrogateescape);
|
||||
return PyUnicode_DecodeLocaleAndSize(str, size, errors);
|
||||
}
|
||||
|
||||
|
||||
|
@ -3456,7 +3487,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
|
|||
"surrogateescape");
|
||||
}
|
||||
else {
|
||||
return PyUnicode_DecodeLocaleAndSize(s, size, 1);
|
||||
return PyUnicode_DecodeLocaleAndSize(s, size, "surrogateescape");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -355,7 +355,7 @@ PyErr_SetFromErrnoWithFilenameObject(PyObject *exc, PyObject *filenameObject)
|
|||
#ifndef MS_WINDOWS
|
||||
if (i != 0) {
|
||||
char *s = strerror(i);
|
||||
message = PyUnicode_DecodeLocale(s, 1);
|
||||
message = PyUnicode_DecodeLocale(s, "surrogateescape");
|
||||
}
|
||||
else {
|
||||
/* Sometimes errno didn't get set */
|
||||
|
|
Loading…
Reference in New Issue