From 9bee329130aae5a13050c08dab9d349b76e66835 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 21 Dec 2017 16:49:13 +0100 Subject: [PATCH] bpo-32030: Add _Py_FindEnvConfigValue() (#4963) Add a new _Py_FindEnvConfigValue() function: code shared between Windows and Unix implementations of _PyPathConfig_Calculate() to read the pyenv.cfg file. _Py_FindEnvConfigValue() now uses _Py_DecodeUTF8_surrogateescape() instead of using a Python Unicode string, the Python API must not be used early during Python initialization. Same change in Unix search_for_exec_prefix(): use _Py_DecodeUTF8_surrogateescape(). Cleanup also encode_current_locale(): PyMem_RawFree/PyMem_Free can be called with NULL. Fix also "NUL byte" => "NULL byte" typo. --- Include/fileutils.h | 7 ++++ Include/pylifecycle.h | 5 +++ Modules/getpath.c | 81 +++++-------------------------------------- PC/getpathp.c | 56 ++---------------------------- Python/fileutils.c | 16 ++++----- Python/pathconfig.c | 50 ++++++++++++++++++++++++++ 6 files changed, 80 insertions(+), 135 deletions(-) diff --git a/Include/fileutils.h b/Include/fileutils.h index b7b6cd26c00..d027e18348f 100644 --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -19,6 +19,13 @@ PyAPI_FUNC(char*) _Py_EncodeLocaleRaw( size_t *error_pos); #endif +#ifdef Py_BUILD_CORE +PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape( + const char *s, + Py_ssize_t size, + size_t *p_wlen); +#endif + #ifndef Py_LIMITED_API PyAPI_FUNC(PyObject *) _Py_device_encoding(int); diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h index da740fef23a..659c6df644e 100644 --- a/Include/pylifecycle.h +++ b/Include/pylifecycle.h @@ -118,6 +118,11 @@ PyAPI_FUNC(wchar_t *) Py_GetPath(void); #ifdef Py_BUILD_CORE PyAPI_FUNC(_PyInitError) _PyPathConfig_Init(const _PyCoreConfig *core_config); PyAPI_FUNC(PyObject*) _PyPathConfig_ComputeArgv0(int argc, wchar_t **argv); +PyAPI_FUNC(int) _Py_FindEnvConfigValue( + FILE *env_file, + const wchar_t *key, + wchar_t *value, + size_t value_size); #endif PyAPI_FUNC(void) Py_SetPath(const wchar_t *); #ifdef MS_WINDOWS diff --git a/Modules/getpath.c b/Modules/getpath.c index 494fa19bdf3..85e737b61d0 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -296,62 +296,6 @@ absolutize(wchar_t *path) } -/* search for a prefix value in an environment file. If found, copy it - to the provided buffer, which is expected to be no more than MAXPATHLEN - bytes long. -*/ -static int -find_env_config_value(FILE * env_file, const wchar_t * key, wchar_t * value) -{ - int result = 0; /* meaning not found */ - char buffer[MAXPATHLEN*2+1]; /* allow extra for key, '=', etc. */ - - fseek(env_file, 0, SEEK_SET); - while (!feof(env_file)) { - char * p = fgets(buffer, MAXPATHLEN*2, env_file); - wchar_t tmpbuffer[MAXPATHLEN*2+1]; - PyObject * decoded; - int n; - - if (p == NULL) { - break; - } - n = strlen(p); - if (p[n - 1] != '\n') { - /* line has overflowed - bail */ - break; - } - if (p[0] == '#') { - /* Comment - skip */ - continue; - } - decoded = PyUnicode_DecodeUTF8(buffer, n, "surrogateescape"); - if (decoded != NULL) { - Py_ssize_t k; - wchar_t * state; - k = PyUnicode_AsWideChar(decoded, - tmpbuffer, MAXPATHLEN * 2); - Py_DECREF(decoded); - if (k >= 0) { - wchar_t * tok = wcstok(tmpbuffer, L" \t\r\n", &state); - if ((tok != NULL) && !wcscmp(tok, key)) { - tok = wcstok(NULL, L" \t", &state); - if ((tok != NULL) && !wcscmp(tok, L"=")) { - tok = wcstok(NULL, L"\r\n", &state); - if (tok != NULL) { - wcsncpy(value, tok, MAXPATHLEN); - result = 1; - break; - } - } - } - } - } - } - return result; -} - - /* search_for_prefix requires that argv0_path be no more than MAXPATHLEN bytes long. */ @@ -501,24 +445,17 @@ search_for_exec_prefix(const _PyCoreConfig *core_config, } else { char buf[MAXPATHLEN+1]; - PyObject *decoded; - wchar_t rel_builddir_path[MAXPATHLEN+1]; + wchar_t *rel_builddir_path; n = fread(buf, 1, MAXPATHLEN, f); buf[n] = '\0'; fclose(f); - decoded = PyUnicode_DecodeUTF8(buf, n, "surrogateescape"); - if (decoded != NULL) { - Py_ssize_t k; - k = PyUnicode_AsWideChar(decoded, - rel_builddir_path, MAXPATHLEN); - Py_DECREF(decoded); - if (k >= 0) { - rel_builddir_path[k] = L'\0'; - wcsncpy(exec_prefix, calculate->argv0_path, MAXPATHLEN); - exec_prefix[MAXPATHLEN] = L'\0'; - joinpath(exec_prefix, rel_builddir_path); - return -1; - } + rel_builddir_path = _Py_DecodeUTF8_surrogateescape(buf, n, NULL); + if (rel_builddir_path != NULL) { + wcsncpy(exec_prefix, calculate->argv0_path, MAXPATHLEN); + exec_prefix[MAXPATHLEN] = L'\0'; + joinpath(exec_prefix, rel_builddir_path); + PyMem_RawFree(rel_builddir_path ); + return -1; } } } @@ -784,7 +721,7 @@ calculate_read_pyenv(PyCalculatePath *calculate) } /* Look for a 'home' variable and set argv0_path to it, if found */ - if (find_env_config_value(env_file, L"home", tmpbuffer)) { + if (_Py_FindEnvConfigValue(env_file, L"home", tmpbuffer, MAXPATHLEN)) { wcscpy(calculate->argv0_path, tmpbuffer); } fclose(env_file); diff --git a/PC/getpathp.c b/PC/getpathp.c index fe4ef1a1ab3..c4977e79979 100644 --- a/PC/getpathp.c +++ b/PC/getpathp.c @@ -564,58 +564,6 @@ done: } -static int -find_env_config_value(FILE * env_file, const wchar_t * key, wchar_t * value) -{ - int result = 0; /* meaning not found */ - char buffer[MAXPATHLEN*2+1]; /* allow extra for key, '=', etc. */ - - fseek(env_file, 0, SEEK_SET); - while (!feof(env_file)) { - char * p = fgets(buffer, MAXPATHLEN*2, env_file); - wchar_t tmpbuffer[MAXPATHLEN*2+1]; - PyObject * decoded; - size_t n; - - if (p == NULL) { - break; - } - n = strlen(p); - if (p[n - 1] != '\n') { - /* line has overflowed - bail */ - break; - } - if (p[0] == '#') { - /* Comment - skip */ - continue; - } - decoded = PyUnicode_DecodeUTF8(buffer, n, "surrogateescape"); - if (decoded != NULL) { - Py_ssize_t k; - k = PyUnicode_AsWideChar(decoded, - tmpbuffer, MAXPATHLEN * 2); - Py_DECREF(decoded); - if (k >= 0) { - wchar_t * context = NULL; - wchar_t * tok = wcstok_s(tmpbuffer, L" \t\r\n", &context); - if ((tok != NULL) && !wcscmp(tok, key)) { - tok = wcstok_s(NULL, L" \t", &context); - if ((tok != NULL) && !wcscmp(tok, L"=")) { - tok = wcstok_s(NULL, L"\r\n", &context); - if (tok != NULL) { - wcsncpy(value, tok, MAXPATHLEN); - result = 1; - break; - } - } - } - } - } - } - return result; -} - - static int read_pth_file(_PyPathConfig *config, wchar_t *prefix, const wchar_t *path, int *isolated, int *nosite) @@ -765,9 +713,11 @@ calculate_pyvenv_file(PyCalculatePath *calculate) FILE *env_file = _Py_wfopen(envbuffer, L"r"); if (env_file == NULL) { errno = 0; + reduce(envbuffer); reduce(envbuffer); join(envbuffer, env_cfg); + env_file = _Py_wfopen(envbuffer, L"r"); if (env_file == NULL) { errno = 0; @@ -780,7 +730,7 @@ calculate_pyvenv_file(PyCalculatePath *calculate) /* Look for a 'home' variable and set argv0_path to it, if found */ wchar_t tmpbuffer[MAXPATHLEN+1]; - if (find_env_config_value(env_file, L"home", tmpbuffer)) { + if (_Py_FindEnvConfigValue(env_file, L"home", tmpbuffer, MAXPATHLEN)) { wcscpy_s(calculate->argv0_path, MAXPATHLEN+1, tmpbuffer); } fclose(env_file); diff --git a/Python/fileutils.c b/Python/fileutils.c index 1ccd4baa6d2..645a1793664 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -20,8 +20,6 @@ extern int winerror_to_errno(int); #include #endif /* HAVE_FCNTL_H */ -extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, - size_t *p_wlen); extern char* _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos, int raw_malloc); @@ -194,7 +192,7 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos, int raw_mal len = wcslen(text); - /* +1 for NUL byte */ + /* +1 for NULL byte */ if (raw_malloc) { result = PyMem_RawMalloc(len + 1); } @@ -467,13 +465,11 @@ encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc) else converted = wcstombs(NULL, buf, 0); if (converted == (size_t)-1) { - if (result != NULL) { - if (raw_malloc) { - PyMem_RawFree(result); - } - else { - PyMem_Free(result); - } + if (raw_malloc) { + PyMem_RawFree(result); + } + else { + PyMem_Free(result); } if (error_pos != NULL) *error_pos = i; diff --git a/Python/pathconfig.c b/Python/pathconfig.c index acb25b6fa89..9591fcc4911 100644 --- a/Python/pathconfig.c +++ b/Python/pathconfig.c @@ -354,6 +354,56 @@ _PyPathConfig_ComputeArgv0(int argc, wchar_t **argv) return PyUnicode_FromWideChar(argv0, n); } + +/* Search for a prefix value in an environment file (pyvenv.cfg). + If found, copy it into the provided buffer. */ +int +_Py_FindEnvConfigValue(FILE *env_file, const wchar_t *key, + wchar_t *value, size_t value_size) +{ + int result = 0; /* meaning not found */ + char buffer[MAXPATHLEN*2+1]; /* allow extra for key, '=', etc. */ + + fseek(env_file, 0, SEEK_SET); + while (!feof(env_file)) { + char * p = fgets(buffer, MAXPATHLEN*2, env_file); + wchar_t *tmpbuffer; + int n; + + if (p == NULL) { + break; + } + n = strlen(p); + if (p[n - 1] != '\n') { + /* line has overflowed - bail */ + break; + } + if (p[0] == '#') { + /* Comment - skip */ + continue; + } + tmpbuffer = _Py_DecodeUTF8_surrogateescape(buffer, n, NULL); + if (tmpbuffer != NULL) { + wchar_t * state; + wchar_t * tok = wcstok(tmpbuffer, L" \t\r\n", &state); + if ((tok != NULL) && !wcscmp(tok, key)) { + tok = wcstok(NULL, L" \t", &state); + if ((tok != NULL) && !wcscmp(tok, L"=")) { + tok = wcstok(NULL, L"\r\n", &state); + if (tok != NULL) { + wcsncpy(value, tok, MAXPATHLEN); + result = 1; + PyMem_RawFree(tmpbuffer); + break; + } + } + } + PyMem_RawFree(tmpbuffer); + } + } + return result; +} + #ifdef __cplusplus } #endif