From 4ee65a96858ab9b54c505bad7f2338a475243343 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 22 Jan 2011 10:30:29 +0000 Subject: [PATCH] Issue #10955: zipimport uses ASCII at bootstrap zipimport uses ASCII encoding instead of cp497 to decode filenames, at bootstrap, if the codec registry is not ready yet. It is still possible to have non-ASCII filenames using the Unicode flag (UTF-8 encoding) for file entries in the ZIP file. --- Misc/NEWS | 5 +++++ Modules/zipimport.c | 20 +++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS b/Misc/NEWS index 0ae7d7d8003..e6a23bdc13a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -16,6 +16,11 @@ Core and Builtins Library ------- +- Issue #10955: zipimport uses ASCII encoding instead of cp497 to decode + filenames, at bootstrap, if the codec registry is not ready yet. It is still + possible to have non-ASCII filenames using the Unicode flag (UTF-8 encoding) + for file entries in the ZIP file. + - Issue #10949: Improved robustness of rotating file handlers. - Issue #10955: Fix a potential crash when trying to mmap() a file past its diff --git a/Modules/zipimport.c b/Modules/zipimport.c index ba449c0d222..e38587f7d94 100644 --- a/Modules/zipimport.c +++ b/Modules/zipimport.c @@ -725,6 +725,7 @@ read_directory(PyObject *archive_obj) long arc_offset; /* offset from beginning of file to start of zip-archive */ PyObject *pathobj; const char *charset; + int bootstrap; if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) { PyErr_SetString(PyExc_OverflowError, @@ -801,13 +802,30 @@ read_directory(PyObject *archive_obj) *p = 0; /* Add terminating null byte */ header_offset += header_size; + bootstrap = 0; if (flags & 0x0800) charset = "utf-8"; + else if (!PyThreadState_GET()->interp->codecs_initialized) { + /* During bootstrap, we may need to load the encodings + package from a ZIP file. But the cp437 encoding is implemented + in Python in the encodings package. + + Break out of this dependency by assuming that the path to + the encodings module is ASCII-only. */ + charset = "ascii"; + bootstrap = 1; + } else charset = "cp437"; nameobj = PyUnicode_Decode(name, name_size, charset, NULL); - if (nameobj == NULL) + if (nameobj == NULL) { + if (bootstrap) + PyErr_Format(PyExc_NotImplementedError, + "bootstrap issue: python%i%i.zip contains non-ASCII " + "filenames without the unicode flag", + PY_MAJOR_VERSION, PY_MINOR_VERSION); goto error; + } Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1); pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path));