Use locale encoding if Py_FileSystemDefaultEncoding is not set

* PyUnicode_EncodeFSDefault(), PyUnicode_DecodeFSDefaultAndSize() and
   PyUnicode_DecodeFSDefault() use the locale encoding instead of UTF-8 if
   Py_FileSystemDefaultEncoding is NULL
 * redecode_filenames() functions and _Py_code_object_list (issue #9630)
   are no more needed: remove them
This commit is contained in:
Victor Stinner 2010-10-15 12:04:23 +00:00
parent 6a4aff10f0
commit f3170ccef8
8 changed files with 48 additions and 297 deletions

View File

@ -415,7 +415,8 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
Decode a string using :c:data:`Py_FileSystemDefaultEncoding` and the
``'surrogateescape'`` error handler, or ``'strict'`` on Windows.
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
locale encoding.
.. versionchanged:: 3.2
Use ``'strict'`` error handler on Windows.
@ -426,7 +427,8 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
Decode a null-terminated string using :c:data:`Py_FileSystemDefaultEncoding`
and the ``'surrogateescape'`` error handler, or ``'strict'`` on Windows.
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
locale encoding.
Use :c:func:`PyUnicode_DecodeFSDefaultAndSize` if you know the string length.
@ -440,7 +442,8 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
``'surrogateescape'`` error handler, or ``'strict'`` on Windows, and return
:class:`bytes`.
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to UTF-8.
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
locale encoding.
.. versionadded:: 3.2

View File

@ -99,13 +99,6 @@ PyAPI_FUNC(int) _PyCode_CheckLineNumber(PyCodeObject* co,
PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts,
PyObject *names, PyObject *lineno_obj);
/* List of weak references to all code objects. The list is used by
initfsencoding() to redecode code filenames at startup if the filesystem
encoding changes. At initfsencoding() exit, the list is set to NULL and it
is no more used. */
extern PyObject *_Py_code_object_list;
#ifdef __cplusplus
}
#endif

View File

@ -1193,7 +1193,8 @@ PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
and the "surrogateescape" error handler.
If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
If Py_FileSystemDefaultEncoding is not set, fall back to the locale
encoding.
Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
*/
@ -1205,7 +1206,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
/* Decode a string using Py_FileSystemDefaultEncoding
and the "surrogateescape" error handler.
If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
If Py_FileSystemDefaultEncoding is not set, fall back to the locale
encoding.
*/
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
@ -1216,7 +1218,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
"surrogateescape" error handler, and return bytes.
If Py_FileSystemDefaultEncoding is not set, fall back to UTF-8.
If Py_FileSystemDefaultEncoding is not set, fall back to the locale
encoding.
*/
PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(

View File

@ -10,6 +10,9 @@ What's New in Python 3.2 Beta 1?
Core and Builtins
-----------------
- Use locale encoding instead of UTF-8 to encode and decode filenames if
Py_FileSystemDefaultEncoding is not set.
- Issue #10095: fp_setreadl() doesn't reopen the file, reuse instead the file
descriptor.

View File

@ -5,8 +5,6 @@
#define NAME_CHARS \
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
PyObject *_Py_code_object_list = NULL;
/* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */
static int
@ -111,17 +109,6 @@ PyCode_New(int argcount, int kwonlyargcount,
co->co_lnotab = lnotab;
co->co_zombieframe = NULL;
co->co_weakreflist = NULL;
if (_Py_code_object_list != NULL) {
int err;
PyObject *ref = PyWeakref_NewRef((PyObject*)co, NULL);
if (ref == NULL)
goto error;
err = PyList_Append(_Py_code_object_list, ref);
Py_DECREF(ref);
if (err)
goto error;
}
}
return co;

View File

@ -1604,10 +1604,6 @@ _Py_ReadyTypes(void)
if (PyType_Ready(&PyCode_Type) < 0)
Py_FatalError("Can't initialize code type");
_Py_code_object_list = PyList_New(0);
if (_Py_code_object_list == NULL)
Py_FatalError("Can't initialize code type");
if (PyType_Ready(&PyFrame_Type) < 0)
Py_FatalError("Can't initialize frame type");

View File

@ -1597,11 +1597,22 @@ PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
"surrogateescape");
}
else {
/* if you change the default encoding, update also
PyUnicode_DecodeFSDefaultAndSize() and redecode_filenames() */
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
"surrogateescape");
/* locale encoding with surrogateescape */
wchar_t *wchar;
char *bytes;
PyObject *bytes_obj;
wchar = PyUnicode_AsWideCharString(unicode, NULL);
if (wchar == NULL)
return NULL;
bytes = _Py_wchar2char(wchar);
PyMem_Free(wchar);
if (bytes == NULL)
return NULL;
bytes_obj = PyBytes_FromString(bytes);
PyMem_Free(bytes);
return bytes_obj;
}
}
@ -1769,9 +1780,22 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
"surrogateescape");
}
else {
/* if you change the default encoding, update also
PyUnicode_EncodeFSDefault() and redecode_filenames() */
return PyUnicode_DecodeUTF8(s, size, "surrogateescape");
/* locale encoding with surrogateescape */
wchar_t *wchar;
PyObject *unicode;
if (s[size] != '\0' || size != strlen(s)) {
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
return NULL;
}
wchar = _Py_char2wchar(s);
if (wchar == NULL)
return NULL;
unicode = PyUnicode_FromWideChar(wchar, -1);
PyMem_Free(wchar);
return unicode;
}
}

View File

@ -719,259 +719,6 @@ initmain(void)
}
}
/* Redecode a filename from the default filesystem encoding (utf-8) to
'new_encoding' encoding with 'errors' error handler */
static PyObject*
redecode_filename(PyObject *file, const char *new_encoding,
const char *errors)
{
PyObject *file_bytes, *new_file;
file_bytes = PyUnicode_EncodeFSDefault(file);
if (file_bytes == NULL)
return NULL;
new_file = PyUnicode_Decode(
PyBytes_AsString(file_bytes),
PyBytes_GET_SIZE(file_bytes),
new_encoding,
errors);
Py_DECREF(file_bytes);
return new_file;
}
/* Redecode a path list */
static int
redecode_path_list(PyObject *paths,
const char *new_encoding, const char *errors)
{
PyObject *filename, *new_filename;
Py_ssize_t i, size;
size = PyList_Size(paths);
for (i=0; i < size; i++) {
filename = PyList_GetItem(paths, i);
if (filename == NULL)
return -1;
new_filename = redecode_filename(filename, new_encoding, errors);
if (new_filename == NULL)
return -1;
if (PyList_SetItem(paths, i, new_filename)) {
Py_DECREF(new_filename);
return -1;
}
}
return 0;
}
/* Redecode __file__ and __path__ attributes of sys.modules */
static int
redecode_sys_modules(const char *new_encoding, const char *errors)
{
PyInterpreterState *interp;
PyObject *modules, *values, *file, *new_file, *paths;
PyObject *iter = NULL, *module = NULL;
interp = PyThreadState_GET()->interp;
modules = interp->modules;
values = PyObject_CallMethod(modules, "values", "");
if (values == NULL)
goto error;
iter = PyObject_GetIter(values);
Py_DECREF(values);
if (iter == NULL)
goto error;
while (1)
{
module = PyIter_Next(iter);
if (module == NULL) {
if (PyErr_Occurred())
goto error;
else
break;
}
file = PyModule_GetFilenameObject(module);
if (file != NULL) {
new_file = redecode_filename(file, new_encoding, errors);
Py_DECREF(file);
if (new_file == NULL)
goto error;
if (PyObject_SetAttrString(module, "__file__", new_file)) {
Py_DECREF(new_file);
goto error;
}
Py_DECREF(new_file);
}
else
PyErr_Clear();
paths = PyObject_GetAttrString(module, "__path__");
if (paths != NULL) {
if (redecode_path_list(paths, new_encoding, errors))
goto error;
}
else
PyErr_Clear();
Py_CLEAR(module);
}
Py_CLEAR(iter);
return 0;
error:
Py_XDECREF(iter);
Py_XDECREF(module);
return -1;
}
/* Redecode sys.path_importer_cache keys */
static int
redecode_sys_path_importer_cache(const char *new_encoding, const char *errors)
{
PyObject *path_importer_cache, *items, *item, *path, *importer, *new_path;
PyObject *new_cache = NULL, *iter = NULL;
path_importer_cache = PySys_GetObject("path_importer_cache");
if (path_importer_cache == NULL)
goto error;
items = PyObject_CallMethod(path_importer_cache, "items", "");
if (items == NULL)
goto error;
iter = PyObject_GetIter(items);
Py_DECREF(items);
if (iter == NULL)
goto error;
new_cache = PyDict_New();
if (new_cache == NULL)
goto error;
while (1)
{
item = PyIter_Next(iter);
if (item == NULL) {
if (PyErr_Occurred())
goto error;
else
break;
}
path = PyTuple_GET_ITEM(item, 0);
importer = PyTuple_GET_ITEM(item, 1);
new_path = redecode_filename(path, new_encoding, errors);
if (new_path == NULL)
goto error;
if (PyDict_SetItem(new_cache, new_path, importer)) {
Py_DECREF(new_path);
goto error;
}
Py_DECREF(new_path);
}
Py_CLEAR(iter);
if (PySys_SetObject("path_importer_cache", new_cache))
goto error;
Py_CLEAR(new_cache);
return 0;
error:
Py_XDECREF(iter);
Py_XDECREF(new_cache);
return -1;
}
/* Redecode co_filename attribute of all code objects */
static int
redecode_code_objects(const char *new_encoding, const char *errors)
{
Py_ssize_t i, len;
PyCodeObject *co;
PyObject *ref, *new_file;
len = Py_SIZE(_Py_code_object_list);
for (i=0; i < len; i++) {
ref = PyList_GET_ITEM(_Py_code_object_list, i);
co = (PyCodeObject *)PyWeakref_GetObject(ref);
if ((PyObject*)co == Py_None)
continue;
if (co == NULL)
return -1;
new_file = redecode_filename(co->co_filename, new_encoding, errors);
if (new_file == NULL)
return -1;
Py_DECREF(co->co_filename);
co->co_filename = new_file;
}
Py_CLEAR(_Py_code_object_list);
return 0;
}
/* Redecode the filenames of all modules (__file__ and __path__ attributes),
all code objects (co_filename attribute), sys.path, sys.meta_path,
sys.executable and sys.path_importer_cache (keys) when the filesystem
encoding changes from the default encoding (utf-8) to new_encoding */
static int
redecode_filenames(const char *new_encoding)
{
char *errors;
PyObject *paths, *executable, *new_executable;
/* PyUnicode_DecodeFSDefault() and PyUnicode_EncodeFSDefault() do already
use utf-8 if Py_FileSystemDefaultEncoding is NULL */
if (strcmp(new_encoding, "utf-8") == 0)
return 0;
if (strcmp(new_encoding, "mbcs") != 0)
errors = "surrogateescape";
else
errors = NULL;
/* sys.modules */
if (redecode_sys_modules(new_encoding, errors))
return -1;
/* sys.path and sys.meta_path */
paths = PySys_GetObject("path");
if (paths != NULL) {
if (redecode_path_list(paths, new_encoding, errors))
return -1;
}
paths = PySys_GetObject("meta_path");
if (paths != NULL) {
if (redecode_path_list(paths, new_encoding, errors))
return -1;
}
/* sys.executable */
executable = PySys_GetObject("executable");
if (executable == NULL)
return -1;
new_executable = redecode_filename(executable, new_encoding, errors);
if (new_executable == NULL)
return -1;
if (PySys_SetObject("executable", new_executable)) {
Py_DECREF(new_executable);
return -1;
}
Py_DECREF(new_executable);
/* sys.path_importer_cache */
if (redecode_sys_path_importer_cache(new_encoding, errors))
return -1;
/* code objects */
if (redecode_code_objects(new_encoding, errors))
return -1;
return 0;
}
static void
initfsencoding(void)
{
@ -987,11 +734,8 @@ initfsencoding(void)
stdin and stdout if these are terminals. */
codeset = get_codeset();
if (codeset != NULL) {
if (redecode_filenames(codeset))
Py_FatalError("Py_Initialize: can't redecode filenames");
Py_FileSystemDefaultEncoding = codeset;
Py_HasFileSystemDefaultEncoding = 0;
Py_CLEAR(_Py_code_object_list);
return;
} else {
fprintf(stderr, "Unable to get the locale encoding:\n");
@ -1004,8 +748,6 @@ initfsencoding(void)
}
#endif
Py_CLEAR(_Py_code_object_list);
/* the encoding is mbcs, utf-8 or ascii */
codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
if (!codec) {