mirror of https://github.com/python/cpython
Issue #13560: Locale codec functions use the classic "errors" parameter,
instead of surrogateescape So it would be possible to support more error handlers later.
This commit is contained in:
parent
ab59594326
commit
1b57967b96
|
@ -1608,14 +1608,14 @@ PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
|
||||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
|
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
|
||||||
const char *str,
|
const char *str,
|
||||||
Py_ssize_t len,
|
Py_ssize_t len,
|
||||||
int surrogateescape);
|
const char *errors);
|
||||||
|
|
||||||
/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
|
/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
|
||||||
length using strlen(). */
|
length using strlen(). */
|
||||||
|
|
||||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
|
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
|
||||||
const char *str,
|
const char *str,
|
||||||
int surrogateescape);
|
const char *errors);
|
||||||
|
|
||||||
/* Encode a Unicode object to the current locale encoding. The encoder is
|
/* Encode a Unicode object to the current locale encoding. The encoder is
|
||||||
strict is *surrogateescape* is equal to zero, otherwise the
|
strict is *surrogateescape* is equal to zero, otherwise the
|
||||||
|
@ -1624,7 +1624,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
|
||||||
|
|
||||||
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
|
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
|
||||||
PyObject *unicode,
|
PyObject *unicode,
|
||||||
int surrogateescape
|
const char *errors
|
||||||
);
|
);
|
||||||
|
|
||||||
/* --- File system encoding ---------------------------------------------- */
|
/* --- File system encoding ---------------------------------------------- */
|
||||||
|
|
|
@ -495,7 +495,7 @@ Py_Main(int argc, wchar_t **argv)
|
||||||
/* Use utf-8 on Mac OS X */
|
/* Use utf-8 on Mac OS X */
|
||||||
unicode = PyUnicode_FromString(p);
|
unicode = PyUnicode_FromString(p);
|
||||||
#else
|
#else
|
||||||
unicode = PyUnicode_DecodeLocale(p, 1);
|
unicode = PyUnicode_DecodeLocale(p, "surrogateescape");
|
||||||
#endif
|
#endif
|
||||||
if (unicode == NULL) {
|
if (unicode == NULL) {
|
||||||
/* ignore errors */
|
/* ignore errors */
|
||||||
|
|
|
@ -7891,7 +7891,7 @@ posix_strerror(PyObject *self, PyObject *args)
|
||||||
"strerror() argument out of range");
|
"strerror() argument out of range");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return PyUnicode_DecodeLocale(message, 1);
|
return PyUnicode_DecodeLocale(message, "surrogateescape");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -486,7 +486,7 @@ time_strftime(PyObject *self, PyObject *args)
|
||||||
fmt = format;
|
fmt = format;
|
||||||
#else
|
#else
|
||||||
/* Convert the unicode string to an ascii one */
|
/* Convert the unicode string to an ascii one */
|
||||||
format = PyUnicode_EncodeLocale(format_arg, 1);
|
format = PyUnicode_EncodeLocale(format_arg, "surrogateescape");
|
||||||
if (format == NULL)
|
if (format == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
fmt = PyBytes_AS_STRING(format);
|
fmt = PyBytes_AS_STRING(format);
|
||||||
|
@ -532,7 +532,8 @@ time_strftime(PyObject *self, PyObject *args)
|
||||||
#ifdef HAVE_WCSFTIME
|
#ifdef HAVE_WCSFTIME
|
||||||
ret = PyUnicode_FromWideChar(outbuf, buflen);
|
ret = PyUnicode_FromWideChar(outbuf, buflen);
|
||||||
#else
|
#else
|
||||||
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, 1);
|
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen,
|
||||||
|
"surrogateescape");
|
||||||
#endif
|
#endif
|
||||||
PyMem_Free(outbuf);
|
PyMem_Free(outbuf);
|
||||||
break;
|
break;
|
||||||
|
@ -764,8 +765,8 @@ PyInit_timezone(PyObject *m) {
|
||||||
#endif /* PYOS_OS2 */
|
#endif /* PYOS_OS2 */
|
||||||
#endif
|
#endif
|
||||||
PyModule_AddIntConstant(m, "daylight", daylight);
|
PyModule_AddIntConstant(m, "daylight", daylight);
|
||||||
otz0 = PyUnicode_DecodeLocale(tzname[0], 1);
|
otz0 = PyUnicode_DecodeLocale(tzname[0], "surrogateescape");
|
||||||
otz1 = PyUnicode_DecodeLocale(tzname[1], 1);
|
otz1 = PyUnicode_DecodeLocale(tzname[1], "surrogateescape");
|
||||||
PyModule_AddObject(m, "tzname", Py_BuildValue("(NN)", otz0, otz1));
|
PyModule_AddObject(m, "tzname", Py_BuildValue("(NN)", otz0, otz1));
|
||||||
#else /* !HAVE_TZNAME || __GLIBC__ || __CYGWIN__*/
|
#else /* !HAVE_TZNAME || __GLIBC__ || __CYGWIN__*/
|
||||||
#ifdef HAVE_STRUCT_TM_TM_ZONE
|
#ifdef HAVE_STRUCT_TM_TM_ZONE
|
||||||
|
|
|
@ -3125,8 +3125,31 @@ wcstombs_errorpos(const wchar_t *wstr)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
locale_error_handler(const char *errors, int *surrogateescape)
|
||||||
|
{
|
||||||
|
if (errors == NULL) {
|
||||||
|
*surrogateescape = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strcmp(errors, "strict") == 0) {
|
||||||
|
*surrogateescape = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (strcmp(errors, "surrogateescape") == 0) {
|
||||||
|
*surrogateescape = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
PyErr_Format(PyExc_ValueError,
|
||||||
|
"only 'strict' and 'surrogateescape' error handlers "
|
||||||
|
"are supported, not '%s'",
|
||||||
|
errors);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
|
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
|
||||||
{
|
{
|
||||||
Py_ssize_t wlen, wlen2;
|
Py_ssize_t wlen, wlen2;
|
||||||
wchar_t *wstr;
|
wchar_t *wstr;
|
||||||
|
@ -3135,6 +3158,10 @@ PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
|
||||||
PyObject *reason;
|
PyObject *reason;
|
||||||
PyObject *exc;
|
PyObject *exc;
|
||||||
size_t error_pos;
|
size_t error_pos;
|
||||||
|
int surrogateescape;
|
||||||
|
|
||||||
|
if (locale_error_handler(errors, &surrogateescape) < 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
wstr = PyUnicode_AsWideCharString(unicode, &wlen);
|
wstr = PyUnicode_AsWideCharString(unicode, &wlen);
|
||||||
if (wstr == NULL)
|
if (wstr == NULL)
|
||||||
|
@ -3198,7 +3225,7 @@ encode_error:
|
||||||
Py_XDECREF(bytes);
|
Py_XDECREF(bytes);
|
||||||
|
|
||||||
if (errmsg != NULL)
|
if (errmsg != NULL)
|
||||||
reason = PyUnicode_DecodeLocale(errmsg, 1);
|
reason = PyUnicode_DecodeLocale(errmsg, "surrogateescape");
|
||||||
else
|
else
|
||||||
reason = PyUnicode_FromString(
|
reason = PyUnicode_FromString(
|
||||||
"wcstombs() encountered an unencodable "
|
"wcstombs() encountered an unencodable "
|
||||||
|
@ -3243,7 +3270,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
|
||||||
"surrogateescape");
|
"surrogateescape");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return PyUnicode_EncodeLocale(unicode, 1);
|
return PyUnicode_EncodeLocale(unicode, "surrogateescape");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -3351,13 +3378,17 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
|
||||||
|
|
||||||
PyObject*
|
PyObject*
|
||||||
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
|
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
|
||||||
int surrogateescape)
|
const char *errors)
|
||||||
{
|
{
|
||||||
wchar_t smallbuf[256];
|
wchar_t smallbuf[256];
|
||||||
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
|
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
|
||||||
wchar_t *wstr;
|
wchar_t *wstr;
|
||||||
size_t wlen, wlen2;
|
size_t wlen, wlen2;
|
||||||
PyObject *unicode;
|
PyObject *unicode;
|
||||||
|
int surrogateescape;
|
||||||
|
|
||||||
|
if (locale_error_handler(errors, &surrogateescape) < 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
if (str[len] != '\0' || len != strlen(str)) {
|
if (str[len] != '\0' || len != strlen(str)) {
|
||||||
PyErr_SetString(PyExc_TypeError, "embedded null character");
|
PyErr_SetString(PyExc_TypeError, "embedded null character");
|
||||||
|
@ -3419,10 +3450,10 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject*
|
PyObject*
|
||||||
PyUnicode_DecodeLocale(const char *str, int surrogateescape)
|
PyUnicode_DecodeLocale(const char *str, const char *errors)
|
||||||
{
|
{
|
||||||
Py_ssize_t size = (Py_ssize_t)strlen(str);
|
Py_ssize_t size = (Py_ssize_t)strlen(str);
|
||||||
return PyUnicode_DecodeLocaleAndSize(str, size, surrogateescape);
|
return PyUnicode_DecodeLocaleAndSize(str, size, errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -3456,7 +3487,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
|
||||||
"surrogateescape");
|
"surrogateescape");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return PyUnicode_DecodeLocaleAndSize(s, size, 1);
|
return PyUnicode_DecodeLocaleAndSize(s, size, "surrogateescape");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -355,7 +355,7 @@ PyErr_SetFromErrnoWithFilenameObject(PyObject *exc, PyObject *filenameObject)
|
||||||
#ifndef MS_WINDOWS
|
#ifndef MS_WINDOWS
|
||||||
if (i != 0) {
|
if (i != 0) {
|
||||||
char *s = strerror(i);
|
char *s = strerror(i);
|
||||||
message = PyUnicode_DecodeLocale(s, 1);
|
message = PyUnicode_DecodeLocale(s, "surrogateescape");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* Sometimes errno didn't get set */
|
/* Sometimes errno didn't get set */
|
||||||
|
|
Loading…
Reference in New Issue