From 9dd762013fd9fcf975ad51700b55d050ca9ed60e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 21 Dec 2017 16:20:32 +0100 Subject: [PATCH] bpo-32030: Add _Py_EncodeLocaleRaw() (#4961) Replace Py_EncodeLocale() with _Py_EncodeLocaleRaw() in: * _Py_wfopen() * _Py_wreadlink() * _Py_wrealpath() * _Py_wstat() * pymain_open_filename() These functions are called early during Python intialization, only the RAW memory allocator must be used. --- Include/fileutils.h | 5 +- Modules/getpath.c | 4 +- Modules/main.c | 4 +- Objects/unicodeobject.c | 25 ++++++++-- Python/fileutils.c | 104 +++++++++++++++++++++++++++------------- 5 files changed, 101 insertions(+), 41 deletions(-) diff --git a/Include/fileutils.h b/Include/fileutils.h index 900c70faad7..b7b6cd26c00 100644 --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -13,10 +13,13 @@ PyAPI_FUNC(wchar_t *) Py_DecodeLocale( PyAPI_FUNC(char*) Py_EncodeLocale( const wchar_t *text, size_t *error_pos); + +PyAPI_FUNC(char*) _Py_EncodeLocaleRaw( + const wchar_t *text, + size_t *error_pos); #endif #ifndef Py_LIMITED_API - PyAPI_FUNC(PyObject *) _Py_device_encoding(int); #ifdef MS_WINDOWS diff --git a/Modules/getpath.c b/Modules/getpath.c index b4b33437b6f..494fa19bdf3 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -140,13 +140,13 @@ _Py_wstat(const wchar_t* path, struct stat *buf) { int err; char *fname; - fname = Py_EncodeLocale(path, NULL); + fname = _Py_EncodeLocaleRaw(path, NULL); if (fname == NULL) { errno = EINVAL; return -1; } err = stat(fname, buf); - PyMem_Free(fname); + PyMem_RawFree(fname); return err; } diff --git a/Modules/main.c b/Modules/main.c index 1bf706b162c..dff7894bf35 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -1490,14 +1490,14 @@ pymain_open_filename(_PyMain *pymain) char *cfilename_buffer; const char *cfilename; int err = errno; - cfilename_buffer = Py_EncodeLocale(pymain->filename, NULL); + cfilename_buffer = _Py_EncodeLocaleRaw(pymain->filename, NULL); if (cfilename_buffer != NULL) cfilename = cfilename_buffer; else cfilename = ""; fprintf(stderr, "%ls: can't open file '%s': [Errno %d] %s\n", pymain->config.program, cfilename, err, strerror(err)); - PyMem_Free(cfilename_buffer); + PyMem_RawFree(cfilename_buffer); pymain->status = 2; return NULL; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 716e352dea6..92a6ad6b979 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5158,7 +5158,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen) On memory allocation failure, return NULL and write (size_t)-1 into *error_pos (if error_pos is set). */ char* -_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos) +_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos, + int raw_malloc) { const Py_ssize_t max_char_size = 4; Py_ssize_t len = wcslen(text); @@ -5167,7 +5168,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos) char *bytes; if (len <= PY_SSIZE_T_MAX / max_char_size - 1) { - bytes = PyMem_Malloc((len + 1) * max_char_size); + if (raw_malloc) { + bytes = PyMem_RawMalloc((len + 1) * max_char_size); + } + else { + bytes = PyMem_Malloc((len + 1) * max_char_size); + } } else { bytes = NULL; @@ -5221,7 +5227,13 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos) *p++ = '\0'; size_t final_size = (p - bytes); - char *bytes2 = PyMem_Realloc(bytes, final_size); + char *bytes2; + if (raw_malloc) { + bytes2 = PyMem_RawRealloc(bytes, final_size); + } + else { + bytes2 = PyMem_Realloc(bytes, final_size); + } if (bytes2 == NULL) { if (error_pos != NULL) { *error_pos = (size_t)-1; @@ -5231,7 +5243,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos) return bytes2; error: - PyMem_Free(bytes); + if (raw_malloc) { + PyMem_RawFree(bytes); + } + else { + PyMem_Free(bytes); + } return NULL; } diff --git a/Python/fileutils.c b/Python/fileutils.c index eeb5f2e89d2..1ccd4baa6d2 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -23,7 +23,7 @@ extern int winerror_to_errno(int); extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen); extern char* _Py_EncodeUTF8_surrogateescape(const wchar_t *text, - size_t *error_pos); + size_t *error_pos, int raw_malloc); #ifdef O_CLOEXEC /* Does open() support the O_CLOEXEC flag? Possible values: @@ -183,7 +183,7 @@ error: } static char* -encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos) +encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos, int raw_malloc) { char *result = NULL, *out; size_t len, i; @@ -194,7 +194,13 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos) len = wcslen(text); - result = PyMem_Malloc(len + 1); /* +1 for NUL byte */ + /* +1 for NUL byte */ + if (raw_malloc) { + result = PyMem_RawMalloc(len + 1); + } + else { + result = PyMem_Malloc(len + 1); + } if (result == NULL) return NULL; @@ -211,9 +217,15 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos) *out++ = (char)(ch - 0xdc00); } else { - if (error_pos != NULL) + if (error_pos != NULL) { *error_pos = i; - PyMem_Free(result); + } + if (raw_malloc) { + PyMem_RawFree(result); + } + else { + PyMem_Free(result); + } return NULL; } } @@ -423,7 +435,7 @@ Py_DecodeLocale(const char* arg, size_t *size) #if !defined(__APPLE__) && !defined(__ANDROID__) static char* -encode_locale(const wchar_t *text, size_t *error_pos) +encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc) { const size_t len = wcslen(text); char *result = NULL, *bytes = NULL; @@ -455,8 +467,14 @@ encode_locale(const wchar_t *text, size_t *error_pos) else converted = wcstombs(NULL, buf, 0); if (converted == (size_t)-1) { - if (result != NULL) - PyMem_Free(result); + if (result != NULL) { + if (raw_malloc) { + PyMem_RawFree(result); + } + else { + PyMem_Free(result); + } + } if (error_pos != NULL) *error_pos = i; return NULL; @@ -475,10 +493,16 @@ encode_locale(const wchar_t *text, size_t *error_pos) } size += 1; /* nul byte at the end */ - result = PyMem_Malloc(size); + if (raw_malloc) { + result = PyMem_RawMalloc(size); + } + else { + result = PyMem_Malloc(size); + } if (result == NULL) { - if (error_pos != NULL) + if (error_pos != NULL) { *error_pos = (size_t)-1; + } return NULL; } bytes = result; @@ -487,6 +511,28 @@ encode_locale(const wchar_t *text, size_t *error_pos) } #endif +static char* +encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc) +{ +#if defined(__APPLE__) || defined(__ANDROID__) + return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc); +#else /* __APPLE__ */ + if (Py_UTF8Mode == 1) { + return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc); + } + +#ifndef MS_WINDOWS + if (force_ascii == -1) + force_ascii = check_force_ascii(); + + if (force_ascii) + return encode_ascii_surrogateescape(text, error_pos, raw_malloc); +#endif + + return encode_current_locale(text, error_pos, raw_malloc); +#endif /* __APPLE__ or __ANDROID__ */ +} + /* Encode a wide character string to the locale encoding with the surrogateescape error handler: surrogate characters in the range U+DC80..U+DCFF are converted to bytes 0x80..0xFF. @@ -502,23 +548,16 @@ encode_locale(const wchar_t *text, size_t *error_pos) char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos) { -#if defined(__APPLE__) || defined(__ANDROID__) - return _Py_EncodeUTF8_surrogateescape(text, error_pos); -#else /* __APPLE__ */ - if (Py_UTF8Mode == 1) { - return _Py_EncodeUTF8_surrogateescape(text, error_pos); - } + return encode_locale(text, error_pos, 0); +} -#ifndef MS_WINDOWS - if (force_ascii == -1) - force_ascii = check_force_ascii(); - if (force_ascii) - return encode_ascii_surrogateescape(text, error_pos); -#endif - - return encode_locale(text, error_pos); -#endif /* __APPLE__ or __ANDROID__ */ +/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree() + instead of PyMem_Free(). */ +char* +_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos) +{ + return encode_locale(text, error_pos, 1); } @@ -1029,11 +1068,12 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode) errno = EINVAL; return NULL; } - cpath = Py_EncodeLocale(path, NULL); - if (cpath == NULL) + cpath = _Py_EncodeLocaleRaw(path, NULL); + if (cpath == NULL) { return NULL; + } f = fopen(cpath, cmode); - PyMem_Free(cpath); + PyMem_RawFree(cpath); #else f = _wfopen(path, mode); #endif @@ -1341,13 +1381,13 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz) int res; size_t r1; - cpath = Py_EncodeLocale(path, NULL); + cpath = _Py_EncodeLocaleRaw(path, NULL); if (cpath == NULL) { errno = EINVAL; return -1; } res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf)); - PyMem_Free(cpath); + PyMem_RawFree(cpath); if (res == -1) return -1; if (res == Py_ARRAY_LENGTH(cbuf)) { @@ -1386,13 +1426,13 @@ _Py_wrealpath(const wchar_t *path, wchar_t *wresolved_path; char *res; size_t r; - cpath = Py_EncodeLocale(path, NULL); + cpath = _Py_EncodeLocaleRaw(path, NULL); if (cpath == NULL) { errno = EINVAL; return NULL; } res = realpath(cpath, cresolved_path); - PyMem_Free(cpath); + PyMem_RawFree(cpath); if (res == NULL) return NULL;