Issue #16416: OS data are now always encoded/decoded to/from

UTF-8/surrogateescape, instead of the locale encoding (which may be ASCII if no
locale environment variable is set), to avoid inconsistencies with
os.fsencode() and os.fsdecode() functions which are already using
UTF-8/surrogateescape.
This commit is contained in:
Victor Stinner 2012-11-12 23:04:02 +01:00
parent 29824550b1
commit e262377cab
3 changed files with 51 additions and 10 deletions

View File

@ -10,6 +10,12 @@ What's New in Python 3.4.0 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #16416: OS data are now always encoded/decoded to/from
UTF-8/surrogateescape, instead of the locale encoding (which may be ASCII if
no locale environment variable is set), to avoid inconsistencies with
os.fsencode() and os.fsdecode() functions which are already using
UTF-8/surrogateescape.
- Issue #16453: Fix equality testing of dead weakref objects. - Issue #16453: Fix equality testing of dead weakref objects.
- Issue #9535: Fix pending signals that have been received but not yet - Issue #9535: Fix pending signals that have been received but not yet

View File

@ -15,10 +15,6 @@ wmain(int argc, wchar_t **argv)
} }
#else #else
#ifdef __APPLE__
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
#endif
int int
main(int argc, char **argv) main(int argc, char **argv)
{ {
@ -45,11 +41,7 @@ main(int argc, char **argv)
oldloc = strdup(setlocale(LC_ALL, NULL)); oldloc = strdup(setlocale(LC_ALL, NULL));
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
for (i = 0; i < argc; i++) { for (i = 0; i < argc; i++) {
#ifdef __APPLE__
argv_copy[i] = _Py_DecodeUTF8_surrogateescape(argv[i], strlen(argv[i]));
#else
argv_copy[i] = _Py_char2wchar(argv[i], NULL); argv_copy[i] = _Py_char2wchar(argv[i], NULL);
#endif
if (!argv_copy[i]) { if (!argv_copy[i]) {
free(oldloc); free(oldloc);
fprintf(stderr, "Fatal Python error: " fprintf(stderr, "Fatal Python error: "

View File

@ -8,6 +8,10 @@
#include <langinfo.h> #include <langinfo.h>
#endif #endif
#ifdef __APPLE__
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
#endif
PyObject * PyObject *
_Py_device_encoding(int fd) _Py_device_encoding(int fd)
{ {
@ -60,6 +64,15 @@ _Py_device_encoding(int fd)
wchar_t* wchar_t*
_Py_char2wchar(const char* arg, size_t *size) _Py_char2wchar(const char* arg, size_t *size)
{ {
#ifdef __APPLE__
wchar_t *wstr;
wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
if (wstr == NULL)
return NULL;
if (size != NULL)
*size = wcslen(wstr);
return wstr;
#else
wchar_t *res; wchar_t *res;
#ifdef HAVE_BROKEN_MBSTOWCS #ifdef HAVE_BROKEN_MBSTOWCS
/* Some platforms have a broken implementation of /* Some platforms have a broken implementation of
@ -145,7 +158,7 @@ _Py_char2wchar(const char* arg, size_t *size)
argsize -= converted; argsize -= converted;
out++; out++;
} }
#else #else /* HAVE_MBRTOWC */
/* Cannot use C locale for escaping; manually escape as if charset /* Cannot use C locale for escaping; manually escape as if charset
is ASCII (i.e. escape all bytes > 128. This will still roundtrip is ASCII (i.e. escape all bytes > 128. This will still roundtrip
correctly in the locale's charset, which must be an ASCII superset. */ correctly in the locale's charset, which must be an ASCII superset. */
@ -160,7 +173,7 @@ _Py_char2wchar(const char* arg, size_t *size)
else else
*out++ = 0xdc00 + *in++; *out++ = 0xdc00 + *in++;
*out = 0; *out = 0;
#endif #endif /* HAVE_MBRTOWC */
if (size != NULL) if (size != NULL)
*size = out - res; *size = out - res;
return res; return res;
@ -168,6 +181,7 @@ oom:
if (size != NULL) if (size != NULL)
*size = (size_t)-1; *size = (size_t)-1;
return NULL; return NULL;
#endif /* __APPLE__ */
} }
/* Encode a (wide) character string to the locale encoding with the /* Encode a (wide) character string to the locale encoding with the
@ -184,6 +198,34 @@ oom:
char* char*
_Py_wchar2char(const wchar_t *text, size_t *error_pos) _Py_wchar2char(const wchar_t *text, size_t *error_pos)
{ {
#ifdef __APPLE__
Py_ssize_t len;
PyObject *unicode, *bytes = NULL;
char *cpath;
unicode = PyUnicode_FromWideChar(text, wcslen(text));
if (unicode == NULL) {
Py_DECREF(unicode);
return NULL;
}
bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
Py_DECREF(unicode);
if (bytes == NULL) {
PyErr_Clear();
return NULL;
}
len = PyBytes_GET_SIZE(bytes);
cpath = PyMem_Malloc(len+1);
if (cpath == NULL) {
Py_DECREF(bytes);
return NULL;
}
memcpy(cpath, PyBytes_AsString(bytes), len + 1);
Py_DECREF(bytes);
return cpath;
#else /* __APPLE__ */
const size_t len = wcslen(text); const size_t len = wcslen(text);
char *result = NULL, *bytes = NULL; char *result = NULL, *bytes = NULL;
size_t i, size, converted; size_t i, size, converted;
@ -243,6 +285,7 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos)
bytes = result; bytes = result;
} }
return result; return result;
#endif /* __APPLE__ */
} }
/* In principle, this should use HAVE__WSTAT, and _wstat /* In principle, this should use HAVE__WSTAT, and _wstat