Issue #8715: Create PyUnicode_EncodeFSDefault() function: Encode a Unicode

object to Py_FileSystemDefaultEncoding with the "surrogateescape" error
handler, return a bytes object. If Py_FileSystemDefaultEncoding is not set,
fall back to UTF-8.
This commit is contained in:
Victor Stinner 2010-05-15 16:27:27 +00:00
parent 59e62db0a3
commit ae6265f8d0
10 changed files with 46 additions and 26 deletions

View File

@ -396,6 +396,7 @@ used, passsing :func:PyUnicode_FSConverter as the conversion function:
Use :func:`PyUnicode_DecodeFSDefaultAndSize` if you know the string length.
.. cfunction:: PyObject* PyUnicode_DecodeFSDefault(const char *s)
Decode a string using :cdata:`Py_FileSystemDefaultEncoding` and
@ -404,6 +405,16 @@ used, passsing :func:PyUnicode_FSConverter as the conversion function:
If :cdata:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
.. cfunction:: PyObject* PyUnicode_EncodeFSDefault(PyObject *unicode)
Encode a Unicode object to :cdata:`Py_FileSystemDefaultEncoding` with the
``'surrogateescape'`` error handler, return a :func:`bytes` object.
If :cdata:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
.. versionadded:: 3.2
wchar_t Support
"""""""""""""""

View File

@ -1268,6 +1268,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
Py_ssize_t size /* size */
);
/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
"surrogateescape" error handler, return a bytes object.
If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
*/
PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
PyObject *unicode
);
/* --- Methods & Slots ----------------------------------------------------
These are capable of handling Unicode objects and strings on input

View File

@ -12,6 +12,11 @@ What's New in Python 3.2 Alpha 1?
Core and Builtins
-----------------
- Issue #8715: Create PyUnicode_EncodeFSDefault() function: Encode a Unicode
object to Py_FileSystemDefaultEncoding with the "surrogateescape" error
handler, return a bytes object. If Py_FileSystemDefaultEncoding is not set,
fall back to UTF-8.
- Enable shortcuts for common encodings in PyUnicode_AsEncodedString() for any
error handler, not only the default error handler (strict)

View File

@ -247,8 +247,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
if (u == NULL)
return -1;
stringobj = PyUnicode_AsEncodedString(
u, Py_FileSystemDefaultEncoding, "surrogateescape");
stringobj = PyUnicode_EncodeFSDefault(u);
Py_DECREF(u);
if (stringobj == NULL)
return -1;

View File

@ -3147,9 +3147,7 @@ PyInit__tkinter(void)
it also helps Tcl find its encodings. */
uexe = PyUnicode_FromWideChar(Py_GetProgramName(), -1);
if (uexe) {
cexe = PyUnicode_AsEncodedString(uexe,
Py_FileSystemDefaultEncoding,
NULL);
cexe = PyUnicode_EncodeFSDefault(uexe);
if (cexe)
Tcl_FindExecutable(PyBytes_AsString(cexe));
Py_XDECREF(cexe);

View File

@ -111,8 +111,7 @@ grp_getgrnam(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "U:getgrnam", &arg))
return NULL;
if ((bytes = PyUnicode_AsEncodedString(arg, Py_FileSystemDefaultEncoding,
"surrogateescape")) == NULL)
if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
return NULL;
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
goto out;

View File

@ -132,9 +132,7 @@ pwd_getpwnam(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "U:getpwnam", &arg))
return NULL;
if ((bytes = PyUnicode_AsEncodedString(arg,
Py_FileSystemDefaultEncoding,
"surrogateescape")) == NULL)
if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
return NULL;
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
goto out;

View File

@ -118,9 +118,7 @@ static PyObject* spwd_getspnam(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "U:getspnam", &arg))
return NULL;
if ((bytes = PyUnicode_AsEncodedString(arg,
Py_FileSystemDefaultEncoding,
"surrogateescape")) == NULL)
if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
return NULL;
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
goto out;

View File

@ -1461,6 +1461,18 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode,
return NULL;
}
PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
{
if (Py_FileSystemDefaultEncoding)
return PyUnicode_AsEncodedString(unicode,
Py_FileSystemDefaultEncoding,
"surrogateescape");
else
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
"surrogateescape");
}
PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
const char *encoding,
const char *errors)
@ -1646,9 +1658,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
arg = PyUnicode_FromObject(arg);
if (!arg)
return 0;
output = PyUnicode_AsEncodedObject(arg,
Py_FileSystemDefaultEncoding,
"surrogateescape");
output = PyUnicode_EncodeFSDefault(arg);
Py_DECREF(arg);
if (!output)
return 0;

View File

@ -1633,8 +1633,7 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf,
if (!v)
return NULL;
if (PyUnicode_Check(v)) {
v = PyUnicode_AsEncodedString(v,
Py_FileSystemDefaultEncoding, NULL);
v = PyUnicode_EncodeFSDefault(v);
if (v == NULL)
return NULL;
}
@ -2752,14 +2751,7 @@ ensure_fromlist(PyObject *mod, PyObject *fromlist, char *buf, Py_ssize_t buflen,
char *subname;
PyObject *submod;
char *p;
if (!Py_FileSystemDefaultEncoding) {
item8 = PyUnicode_EncodeASCII(PyUnicode_AsUnicode(item),
PyUnicode_GetSize(item),
NULL);
} else {
item8 = PyUnicode_AsEncodedString(item,
Py_FileSystemDefaultEncoding, NULL);
}
item8 = PyUnicode_EncodeFSDefault(item);
if (!item8) {
PyErr_SetString(PyExc_ValueError, "Cannot encode path item");
return 0;