Issue #9542: Create PyUnicode_FSDecoder() function

It's a ParseTuple converter: decode bytes objects to unicode using
PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is.

 * Don't specify surrogateescape error handler in the comments nor the
   documentation, but PyUnicode_DecodeFSDefaultAndSize() and
   PyUnicode_EncodeFSDefault() because these functions use strict error handler
   for the mbcs encoding (on Windows).
 * Remove PyUnicode_FSConverter() comment in unicodeobject.c to avoid
   inconsistency with unicodeobject.h.
This commit is contained in:
Victor Stinner 2010-08-13 23:59:58 +00:00
parent f2e08b34f1
commit 47fcb5b4c3
4 changed files with 70 additions and 10 deletions

View File

@ -380,13 +380,25 @@ used, passsing :func:`PyUnicode_FSConverter` as the conversion function:
.. cfunction:: int PyUnicode_FSConverter(PyObject* obj, void* result)
Convert *obj* into *result*, using :cdata:`Py_FileSystemDefaultEncoding`,
and the ``"surrogateescape"`` error handler. *result* must be a
``PyObject*``, return a :func:`bytes` object which must be released if it
is no longer used.
ParseTuple converter: encode :class:`str` objects to :class:`bytes` using
:cfunc:`PyUnicode_EncodeFSDefault`; :class:`bytes` objects are output as-is.
*result* must be a :ctype:`PyBytesObject*` which must be released when it is
no longer used.
.. versionadded:: 3.1
To decode file names during argument parsing, the ``"O&"`` converter should be
used, passsing :func:`PyUnicode_FSDecoder` as the conversion function:
.. cfunction:: int PyUnicode_FSDecoder(PyObject* obj, void* result)
ParseTuple converter: decode :class:`bytes` objects to :class:`str` using
:cfunc:`PyUnicode_DecodeFSDefaultAndSize`; :class:`str` objects are output
as-is. *result* must be a :ctype:`PyUnicodeObject*` which must be released
when it is no longer used.
.. versionadded:: 3.2
.. cfunction:: PyObject* PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
Decode a null-terminated string using :cdata:`Py_FileSystemDefaultEncoding`

View File

@ -200,6 +200,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
@ -300,6 +301,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
@ -1239,12 +1241,16 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
/* --- File system encoding ---------------------------------------------- */
/* ParseTuple converter which converts a Unicode object into the file
system encoding as a bytes object, using the "surrogateescape" error
handler; bytes objects are output as-is. */
/* ParseTuple converter: encode str objects to bytes using
PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
/* ParseTuple converter: decode bytes objects to unicode using
PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
and the "surrogateescape" error handler.

View File

@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 2?
Core and Builtins
-----------------
- Issue #9542: Create PyUnicode_FSDecoder() function, a ParseTuple converter:
decode bytes objects to unicode using PyUnicode_DecodeFSDefaultAndSize();
str objects are output as-is.
- Issue #9203: Computed gotos are now enabled by default on supported
compilers (which are detected by the configure script). They can still
be disable selectively by specifying --without-computed-gotos.

View File

@ -1652,9 +1652,6 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
}
}
/* Convert the argument to a bytes object, according to the file
system encoding. The addr param must be a PyObject**.
This is designed to be used with "O&" in PyArg_Parse APIs. */
int
PyUnicode_FSConverter(PyObject* arg, void* addr)
@ -1696,6 +1693,47 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
}
int
PyUnicode_FSDecoder(PyObject* arg, void* addr)
{
PyObject *output = NULL;
Py_ssize_t size;
void *data;
if (arg == NULL) {
Py_DECREF(*(PyObject**)addr);
return 1;
}
if (PyUnicode_Check(arg)) {
output = arg;
Py_INCREF(output);
}
else {
arg = PyBytes_FromObject(arg);
if (!arg)
return 0;
output = PyUnicode_DecodeFSDefaultAndSize(PyBytes_AS_STRING(arg),
PyBytes_GET_SIZE(arg));
Py_DECREF(arg);
if (!output)
return 0;
if (!PyUnicode_Check(output)) {
Py_DECREF(output);
PyErr_SetString(PyExc_TypeError, "decoder failed to return unicode");
return 0;
}
}
size = PyUnicode_GET_SIZE(output);
data = PyUnicode_AS_UNICODE(output);
if (size != Py_UNICODE_strlen(data)) {
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
Py_DECREF(output);
return 0;
}
*(PyObject**)addr = output;
return Py_CLEANUP_SUPPORTED;
}
char*
_PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
{