diff --git a/Include/fileutils.h b/Include/fileutils.h index 11ebebfba9c..2fade9b95cd 100644 --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -10,7 +10,8 @@ PyAPI_FUNC(wchar_t *) _Py_char2wchar( size_t *size); PyAPI_FUNC(char*) _Py_wchar2char( - const wchar_t *text); + const wchar_t *text, + size_t *error_pos); #if defined(HAVE_STAT) && !defined(MS_WINDOWS) PyAPI_FUNC(int) _Py_wstat( diff --git a/Modules/main.c b/Modules/main.c index 008b6a4d10e..590104d8883 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -646,7 +646,7 @@ Py_Main(int argc, wchar_t **argv) if (fp == NULL) { char *cfilename_buffer; const char *cfilename; - cfilename_buffer = _Py_wchar2char(filename); + cfilename_buffer = _Py_wchar2char(filename, NULL); if (cfilename_buffer != NULL) cfilename = cfilename_buffer; else diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cff756a1cec..2250f45e9a3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1606,14 +1606,31 @@ PyUnicode_EncodeFSDefault(PyObject *unicode) wchar_t *wchar; char *bytes; PyObject *bytes_obj; + size_t error_pos; wchar = PyUnicode_AsWideCharString(unicode, NULL); if (wchar == NULL) return NULL; - bytes = _Py_wchar2char(wchar); - PyMem_Free(wchar); - if (bytes == NULL) + bytes = _Py_wchar2char(wchar, &error_pos); + if (bytes == NULL) { + if (error_pos != (size_t)-1) { + char *errmsg = strerror(errno); + PyObject *exc = NULL; + if (errmsg == NULL) + errmsg = "Py_wchar2char() failed"; + raise_encode_exception(&exc, + "filesystemencoding", + PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), + error_pos, error_pos+1, + errmsg); + Py_XDECREF(exc); + } + else + PyErr_NoMemory(); + PyMem_Free(wchar); return NULL; + } + PyMem_Free(wchar); bytes_obj = PyBytes_FromString(bytes); PyMem_Free(bytes); diff --git a/Python/fileutils.c b/Python/fileutils.c index 03fc0cb79dd..18e98e513c6 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -132,15 +132,21 @@ oom: This function is the reverse of _Py_char2wchar(). Return a pointer to a newly allocated byte string (use PyMem_Free() to free - the memory), or NULL on error (conversion error or memory error). */ + the memory), or NULL on conversion or memory allocation error. + + If error_pos is not NULL: *error_pos is the index of the invalid character + on conversion error, or (size_t)-1 otherwise. */ char* -_Py_wchar2char(const wchar_t *text) +_Py_wchar2char(const wchar_t *text, size_t *error_pos) { const size_t len = wcslen(text); char *result = NULL, *bytes = NULL; size_t i, size, converted; wchar_t c, buf[2]; + if (error_pos != NULL) + *error_pos = (size_t)-1; + /* The function works in two steps: 1. compute the length of the output buffer in bytes (size) 2. outputs the bytes */ @@ -168,6 +174,8 @@ _Py_wchar2char(const wchar_t *text) if (converted == (size_t)-1) { if (result != NULL) PyMem_Free(result); + if (error_pos != NULL) + *error_pos = i; return NULL; } if (bytes != NULL) { @@ -208,7 +216,7 @@ _Py_wstat(const wchar_t* path, struct stat *buf) { int err; char *fname; - fname = _Py_wchar2char(path); + fname = _Py_wchar2char(path, NULL); if (fname == NULL) { errno = EINVAL; return -1; @@ -263,7 +271,7 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode) errno = EINVAL; return NULL; } - cpath = _Py_wchar2char(path); + cpath = _Py_wchar2char(path, NULL); if (cpath == NULL) return NULL; f = fopen(cpath, cmode); @@ -317,7 +325,7 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz) int res; size_t r1; - cpath = _Py_wchar2char(path); + cpath = _Py_wchar2char(path, NULL); if (cpath == NULL) { errno = EINVAL; return -1; @@ -361,7 +369,7 @@ _Py_wrealpath(const wchar_t *path, wchar_t *wresolved_path; char *res; size_t r; - cpath = _Py_wchar2char(path); + cpath = _Py_wchar2char(path, NULL); if (cpath == NULL) { errno = EINVAL; return NULL;