From 47fcb5b4c31eb6ed2042e2e558a640524dc0c986 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 13 Aug 2010 23:59:58 +0000 Subject: [PATCH] Issue #9542: Create PyUnicode_FSDecoder() function It's a ParseTuple converter: decode bytes objects to unicode using PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. * Don't specify surrogateescape error handler in the comments nor the documentation, but PyUnicode_DecodeFSDefaultAndSize() and PyUnicode_EncodeFSDefault() because these functions use strict error handler for the mbcs encoding (on Windows). * Remove PyUnicode_FSConverter() comment in unicodeobject.c to avoid inconsistency with unicodeobject.h. --- Doc/c-api/unicode.rst | 20 +++++++++++++++---- Include/unicodeobject.h | 12 ++++++++--- Misc/NEWS | 4 ++++ Objects/unicodeobject.c | 44 ++++++++++++++++++++++++++++++++++++++--- 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index fa460a8baa5..913fd69fce0 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -380,13 +380,25 @@ used, passsing :func:`PyUnicode_FSConverter` as the conversion function: .. cfunction:: int PyUnicode_FSConverter(PyObject* obj, void* result) - Convert *obj* into *result*, using :cdata:`Py_FileSystemDefaultEncoding`, - and the ``"surrogateescape"`` error handler. *result* must be a - ``PyObject*``, return a :func:`bytes` object which must be released if it - is no longer used. + ParseTuple converter: encode :class:`str` objects to :class:`bytes` using + :cfunc:`PyUnicode_EncodeFSDefault`; :class:`bytes` objects are output as-is. + *result* must be a :ctype:`PyBytesObject*` which must be released when it is + no longer used. .. versionadded:: 3.1 +To decode file names during argument parsing, the ``"O&"`` converter should be +used, passsing :func:`PyUnicode_FSDecoder` as the conversion function: + +.. cfunction:: int PyUnicode_FSDecoder(PyObject* obj, void* result) + + ParseTuple converter: decode :class:`bytes` objects to :class:`str` using + :cfunc:`PyUnicode_DecodeFSDefaultAndSize`; :class:`str` objects are output + as-is. *result* must be a :ctype:`PyUnicodeObject*` which must be released + when it is no longer used. + + .. versionadded:: 3.2 + .. cfunction:: PyObject* PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) Decode a null-terminated string using :cdata:`Py_FileSystemDefaultEncoding` diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 7f5e8fdacfd..cee75cc8a1f 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -200,6 +200,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar # define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter +# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding # define PyUnicode_GetMax PyUnicodeUCS2_GetMax # define PyUnicode_GetSize PyUnicodeUCS2_GetSize @@ -300,6 +301,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar # define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter +# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding # define PyUnicode_GetMax PyUnicodeUCS4_GetMax # define PyUnicode_GetSize PyUnicodeUCS4_GetSize @@ -1239,12 +1241,16 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal( /* --- File system encoding ---------------------------------------------- */ -/* ParseTuple converter which converts a Unicode object into the file - system encoding as a bytes object, using the "surrogateescape" error - handler; bytes objects are output as-is. */ +/* ParseTuple converter: encode str objects to bytes using + PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); +/* ParseTuple converter: decode bytes objects to unicode using + PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ + +PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); + /* Decode a null-terminated string using Py_FileSystemDefaultEncoding and the "surrogateescape" error handler. diff --git a/Misc/NEWS b/Misc/NEWS index 28e03140dff..3388aece9e0 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 2? Core and Builtins ----------------- +- Issue #9542: Create PyUnicode_FSDecoder() function, a ParseTuple converter: + decode bytes objects to unicode using PyUnicode_DecodeFSDefaultAndSize(); + str objects are output as-is. + - Issue #9203: Computed gotos are now enabled by default on supported compilers (which are detected by the configure script). They can still be disable selectively by specifying --without-computed-gotos. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7c9b8827389..676c6930401 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1652,9 +1652,6 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) } } -/* Convert the argument to a bytes object, according to the file - system encoding. The addr param must be a PyObject**. - This is designed to be used with "O&" in PyArg_Parse APIs. */ int PyUnicode_FSConverter(PyObject* arg, void* addr) @@ -1696,6 +1693,47 @@ PyUnicode_FSConverter(PyObject* arg, void* addr) } +int +PyUnicode_FSDecoder(PyObject* arg, void* addr) +{ + PyObject *output = NULL; + Py_ssize_t size; + void *data; + if (arg == NULL) { + Py_DECREF(*(PyObject**)addr); + return 1; + } + if (PyUnicode_Check(arg)) { + output = arg; + Py_INCREF(output); + } + else { + arg = PyBytes_FromObject(arg); + if (!arg) + return 0; + output = PyUnicode_DecodeFSDefaultAndSize(PyBytes_AS_STRING(arg), + PyBytes_GET_SIZE(arg)); + Py_DECREF(arg); + if (!output) + return 0; + if (!PyUnicode_Check(output)) { + Py_DECREF(output); + PyErr_SetString(PyExc_TypeError, "decoder failed to return unicode"); + return 0; + } + } + size = PyUnicode_GET_SIZE(output); + data = PyUnicode_AS_UNICODE(output); + if (size != Py_UNICODE_strlen(data)) { + PyErr_SetString(PyExc_TypeError, "embedded NUL character"); + Py_DECREF(output); + return 0; + } + *(PyObject**)addr = output; + return Py_CLEANUP_SUPPORTED; +} + + char* _PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize) {