Issue #13560: Add PyUnicode_EncodeLocale()
* Use PyUnicode_EncodeLocale() in time.strftime() if wcsftime() is not available * Document my last changes in Misc/NEWS
This commit is contained in:
parent
9987d9351c
commit
f2ea71fcc8
|
@ -713,7 +713,7 @@ system.
|
|||
bytes. If a byte sequence can be decoded as a surrogate character and
|
||||
*surrogateescape* is not equal to zero, the byte sequence is escaped using
|
||||
the ``'surrogateescape'`` error handler instead of being decoded. *str*
|
||||
must end with a null character but cannot contain embedded null character.
|
||||
must end with a null character but cannot contain embedded null characters.
|
||||
|
||||
.. seealso::
|
||||
|
||||
|
@ -732,6 +732,22 @@ system.
|
|||
.. versionadded:: 3.3
|
||||
|
||||
|
||||
.. c:function:: PyObject* PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
|
||||
|
||||
Encode a Unicode object to the current locale encoding. The encoder is
|
||||
strict if *surrogateescape* is equal to zero, otherwise it uses the
|
||||
``'surrogateescape'`` error handler (:pep:`383`). Return a :class:`bytes`
|
||||
object. *str* cannot contain embedded null characters.
|
||||
|
||||
.. seealso::
|
||||
|
||||
Use :c:func:`PyUnicode_EncodeFSDefault` to encode a string to
|
||||
:c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
|
||||
Python startup).
|
||||
|
||||
.. versionadded:: 3.3
|
||||
|
||||
|
||||
File System Encoding
|
||||
""""""""""""""""""""
|
||||
|
||||
|
@ -806,6 +822,13 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
|
|||
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
|
||||
locale encoding.
|
||||
|
||||
.. seealso::
|
||||
|
||||
:c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
|
||||
locale encoding and cannot be modified later. If you need to encode a
|
||||
string to the current locale encoding, use
|
||||
:c:func:`PyUnicode_EncodeLocale`.
|
||||
|
||||
.. versionadded:: 3.2
|
||||
|
||||
|
||||
|
|
|
@ -1603,7 +1603,7 @@ PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
|
|||
be decoded as a surrogate character and *surrogateescape* is not equal to
|
||||
zero, the byte sequence is escaped using the 'surrogateescape' error handler
|
||||
instead of being decoded. *str* must end with a null character but cannot
|
||||
contain embedded null character. */
|
||||
contain embedded null characters. */
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
|
||||
const char *str,
|
||||
|
@ -1617,6 +1617,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
|
|||
const char *str,
|
||||
int surrogateescape);
|
||||
|
||||
/* Encode a Unicode object to the current locale encoding. The encoder is
|
||||
strict is *surrogateescape* is equal to zero, otherwise the
|
||||
"surrogateescape" error handler is used. Return a bytes object. The string
|
||||
cannot contain embedded null characters.. */
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
|
||||
PyObject *unicode,
|
||||
int surrogateescape
|
||||
);
|
||||
|
||||
/* --- File system encoding ---------------------------------------------- */
|
||||
|
||||
/* ParseTuple converter: encode str objects to bytes using
|
||||
|
|
|
@ -419,6 +419,10 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #13560: Add PyUnicode_DecodeLocale(), PyUnicode_DecodeLocaleAndSize()
|
||||
and PyUnicode_EncodeLocale() functions to the C API to decode/encode from/to
|
||||
the current locale encoding.
|
||||
|
||||
- Issue #8373: The filesystem path of AF_UNIX sockets now uses the filesystem
|
||||
encoding and the surrogateescape error handler, rather than UTF-8. Patch
|
||||
by David Watson.
|
||||
|
@ -451,8 +455,8 @@ Library
|
|||
'importlib.abc.PyPycLoader', 'nntplib.NNTP.xgtitle', 'nntplib.NNTP.xpath',
|
||||
and private attributes of 'smtpd.SMTPChannel'.
|
||||
|
||||
- Issue #5905: time.strftime() is now using the locale encoding, instead of
|
||||
UTF-8, if the wcsftime() function is not available.
|
||||
- Issue #5905, #13560: time.strftime() is now using the current locale
|
||||
encoding, instead of UTF-8, if the wcsftime() function is not available.
|
||||
|
||||
- Issue #8641: Update IDLE 3 syntax coloring to recognize b".." and not u"..".
|
||||
Patch by Tal Einat.
|
||||
|
|
|
@ -486,7 +486,7 @@ time_strftime(PyObject *self, PyObject *args)
|
|||
fmt = format;
|
||||
#else
|
||||
/* Convert the unicode string to an ascii one */
|
||||
format = PyUnicode_EncodeFSDefault(format_arg);
|
||||
format = PyUnicode_EncodeLocale(format_arg, 1);
|
||||
if (format == NULL)
|
||||
return NULL;
|
||||
fmt = PyBytes_AS_STRING(format);
|
||||
|
|
|
@ -3073,6 +3073,140 @@ PyUnicode_AsEncodedObject(PyObject *unicode,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static size_t
|
||||
wcstombs_errorpos(const wchar_t *wstr)
|
||||
{
|
||||
size_t len;
|
||||
#if SIZEOF_WCHAR_T == 2
|
||||
wchar_t buf[3];
|
||||
#else
|
||||
wchar_t buf[2];
|
||||
#endif
|
||||
char outbuf[MB_LEN_MAX];
|
||||
const wchar_t *start, *previous;
|
||||
int save_errno;
|
||||
|
||||
save_errno = errno;
|
||||
#if SIZEOF_WCHAR_T == 2
|
||||
buf[2] = 0;
|
||||
#else
|
||||
buf[1] = 0;
|
||||
#endif
|
||||
start = wstr;
|
||||
while (*wstr != L'\0')
|
||||
{
|
||||
previous = wstr;
|
||||
#if SIZEOF_WCHAR_T == 2
|
||||
if (Py_UNICODE_IS_HIGH_SURROGATE(wstr[0])
|
||||
&& Py_UNICODE_IS_LOW_SURROGATE(wstr[1]))
|
||||
{
|
||||
buf[0] = wstr[0];
|
||||
buf[1] = wstr[1];
|
||||
wstr += 2;
|
||||
}
|
||||
else {
|
||||
buf[0] = *wstr;
|
||||
buf[1] = 0;
|
||||
wstr++;
|
||||
}
|
||||
#else
|
||||
buf[0] = *wstr;
|
||||
wstr++;
|
||||
#endif
|
||||
len = wcstombs(outbuf, buf, sizeof(outbuf));
|
||||
if (len == (size_t)-1) {
|
||||
errno = save_errno;
|
||||
return previous - start;
|
||||
}
|
||||
}
|
||||
|
||||
/* failed to find the unencodable character */
|
||||
errno = save_errno;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
|
||||
{
|
||||
Py_ssize_t wlen, wlen2;
|
||||
wchar_t *wstr;
|
||||
PyObject *bytes = NULL;
|
||||
char *errmsg;
|
||||
PyObject *exc;
|
||||
size_t error_pos;
|
||||
|
||||
wstr = PyUnicode_AsWideCharString(unicode, &wlen);
|
||||
if (wstr == NULL)
|
||||
return NULL;
|
||||
|
||||
wlen2 = wcslen(wstr);
|
||||
if (wlen2 != wlen) {
|
||||
PyMem_Free(wstr);
|
||||
PyErr_SetString(PyExc_TypeError, "embedded null character");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (surrogateescape) {
|
||||
/* locale encoding with surrogateescape */
|
||||
char *str;
|
||||
|
||||
str = _Py_wchar2char(wstr, &error_pos);
|
||||
if (str == NULL) {
|
||||
if (error_pos == (size_t)-1) {
|
||||
PyErr_NoMemory();
|
||||
PyMem_Free(wstr);
|
||||
return NULL;
|
||||
}
|
||||
else {
|
||||
goto encode_error;
|
||||
}
|
||||
}
|
||||
PyMem_Free(wstr);
|
||||
|
||||
bytes = PyBytes_FromString(str);
|
||||
PyMem_Free(str);
|
||||
}
|
||||
else {
|
||||
size_t len, len2;
|
||||
|
||||
len = wcstombs(NULL, wstr, 0);
|
||||
if (len == (size_t)-1) {
|
||||
error_pos = wcstombs_errorpos(wstr);
|
||||
goto encode_error;
|
||||
}
|
||||
|
||||
bytes = PyBytes_FromStringAndSize(NULL, len);
|
||||
if (bytes == NULL) {
|
||||
PyMem_Free(wstr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
len2 = wcstombs(PyBytes_AS_STRING(bytes), wstr, len+1);
|
||||
if (len2 == (size_t)-1 || len2 > len) {
|
||||
error_pos = wcstombs_errorpos(wstr);
|
||||
goto encode_error;
|
||||
}
|
||||
PyMem_Free(wstr);
|
||||
}
|
||||
return bytes;
|
||||
|
||||
encode_error:
|
||||
errmsg = strerror(errno);
|
||||
assert(errmsg != NULL);
|
||||
if (errmsg == NULL)
|
||||
errmsg = "wcstombs() encountered an unencodable wide character";
|
||||
PyMem_Free(wstr);
|
||||
Py_XDECREF(bytes);
|
||||
|
||||
exc = NULL;
|
||||
raise_encode_exception(&exc,
|
||||
"locale", unicode,
|
||||
error_pos, error_pos+1,
|
||||
errmsg);
|
||||
Py_XDECREF(exc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyUnicode_EncodeFSDefault(PyObject *unicode)
|
||||
{
|
||||
|
@ -3097,38 +3231,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
|
|||
"surrogateescape");
|
||||
}
|
||||
else {
|
||||
/* locale encoding with surrogateescape */
|
||||
wchar_t *wchar;
|
||||
char *bytes;
|
||||
PyObject *bytes_obj;
|
||||
size_t error_pos;
|
||||
|
||||
wchar = PyUnicode_AsWideCharString(unicode, NULL);
|
||||
if (wchar == NULL)
|
||||
return NULL;
|
||||
bytes = _Py_wchar2char(wchar, &error_pos);
|
||||
if (bytes == NULL) {
|
||||
if (error_pos != (size_t)-1) {
|
||||
char *errmsg = strerror(errno);
|
||||
PyObject *exc = NULL;
|
||||
if (errmsg == NULL)
|
||||
errmsg = "Py_wchar2char() failed";
|
||||
raise_encode_exception(&exc,
|
||||
"filesystemencoding", unicode,
|
||||
error_pos, error_pos+1,
|
||||
errmsg);
|
||||
Py_XDECREF(exc);
|
||||
}
|
||||
else
|
||||
PyErr_NoMemory();
|
||||
PyMem_Free(wchar);
|
||||
return NULL;
|
||||
}
|
||||
PyMem_Free(wchar);
|
||||
|
||||
bytes_obj = PyBytes_FromString(bytes);
|
||||
PyMem_Free(bytes);
|
||||
return bytes_obj;
|
||||
return PyUnicode_EncodeLocale(unicode, 1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue