bpo-36775: _PyCoreConfig only uses wchar_t* (GH-13062)

_PyCoreConfig: Change filesystem_encoding, filesystem_errors,
stdio_encoding and stdio_errors fields type from char* to wchar_t*.

Changes:

* PyInterpreterState: replace fscodec_initialized (int) with fs_codec
  structure.
* Add get_error_handler_wide() and unicode_encode_utf8() helper
  functions.
* Add error_handler parameter to unicode_encode_locale()
  and unicode_decode_locale().
* Remove _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideString() to _PyCoreConfig_SetString().
* Rename _PyCoreConfig_SetWideStringFromString()
  to _PyCoreConfig_DecodeLocale().
This commit is contained in:
Victor Stinner 2019-05-02 14:56:30 -04:00 committed by GitHub
parent 6ae2bbbdfc
commit 709d23dee6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 357 additions and 220 deletions

View File

@ -207,8 +207,8 @@ typedef struct {
See Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors.
*/
char *filesystem_encoding;
char *filesystem_errors;
wchar_t *filesystem_encoding;
wchar_t *filesystem_errors;
wchar_t *pycache_prefix; /* PYTHONPYCACHEPREFIX, -X pycache_prefix=PATH */
wchar_t *program_name; /* Program name, see also Py_GetProgramName() */
@ -334,13 +334,13 @@ typedef struct {
Value set from PYTHONIOENCODING environment variable and
Py_SetStandardStreamEncoding() function.
See also 'stdio_errors' attribute. */
char *stdio_encoding;
wchar_t *stdio_encoding;
/* Error handler of sys.stdin and sys.stdout.
Value set from PYTHONIOENCODING environment variable and
Py_SetStandardStreamEncoding() function.
See also 'stdio_encoding' attribute. */
char *stdio_errors;
wchar_t *stdio_errors;
#ifdef MS_WINDOWS
/* If greater than zero, use io.FileIO instead of WindowsConsoleIO for sys

View File

@ -106,12 +106,9 @@ PyAPI_FUNC(_PyInitError) _PyCoreConfig_Copy(
_PyCoreConfig *config,
const _PyCoreConfig *config2);
PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetString(
char **config_str,
const char *str);
PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetWideString(
wchar_t **config_str,
const wchar_t *str);
PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetWideStringFromString(
PyAPI_FUNC(_PyInitError) _PyCoreConfig_DecodeLocale(
wchar_t **config_str,
const char *str);
PyAPI_FUNC(_PyInitError) _PyCoreConfig_InitPathConfig(_PyCoreConfig *config);

View File

@ -21,6 +21,9 @@ extern int _Py_SetFileSystemEncoding(
const char *errors);
extern void _Py_ClearFileSystemEncoding(void);
extern _PyInitError _PyUnicode_InitEncodings(PyInterpreterState *interp);
#ifdef MS_WINDOWS
extern int _PyUnicode_EnableLegacyWindowsFSEncoding(void);
#endif
PyAPI_FUNC(void) _Py_ClearStandardStreamEncoding(void);

View File

@ -56,7 +56,14 @@ struct _is {
PyObject *codec_search_cache;
PyObject *codec_error_registry;
int codecs_initialized;
int fscodec_initialized;
/* fs_codec.encoding is initialized to NULL.
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
struct {
char *encoding; /* Filesystem encoding (encoded to UTF-8) */
char *errors; /* Filesystem errors (encoded to UTF-8) */
_Py_error_handler error_handler;
} fs_codec;
_PyCoreConfig core_config;
#ifdef HAVE_DLOPEN

View File

@ -260,6 +260,7 @@ Py_LOCAL_INLINE(PyObject *)
STRINGLIB(utf8_encoder)(PyObject *unicode,
STRINGLIB_CHAR *data,
Py_ssize_t size,
_Py_error_handler error_handler,
const char *errors)
{
Py_ssize_t i; /* index into data of next input character */
@ -268,7 +269,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
PyObject *rep = NULL;
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
#endif
#if STRINGLIB_SIZEOF_CHAR == 1
const Py_ssize_t max_char_size = 2;

View File

@ -40,6 +40,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "pycore_coreconfig.h"
#include "pycore_fileutils.h"
#include "pycore_object.h"
#include "pycore_pylifecycle.h"
@ -264,6 +265,13 @@ unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
/* Forward declaration */
static inline int
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
static PyObject *
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
const char *errors);
static PyObject *
unicode_decode_utf8(const char *s, Py_ssize_t size,
_Py_error_handler error_handler, const char *errors,
Py_ssize_t *consumed);
/* List of static strings. */
static _Py_Identifier *static_strings = NULL;
@ -388,6 +396,35 @@ _Py_GetErrorHandler(const char *errors)
return _Py_ERROR_OTHER;
}
static _Py_error_handler
get_error_handler_wide(const wchar_t *errors)
{
if (errors == NULL || wcscmp(errors, L"strict") == 0) {
return _Py_ERROR_STRICT;
}
if (wcscmp(errors, L"surrogateescape") == 0) {
return _Py_ERROR_SURROGATEESCAPE;
}
if (wcscmp(errors, L"replace") == 0) {
return _Py_ERROR_REPLACE;
}
if (wcscmp(errors, L"ignore") == 0) {
return _Py_ERROR_IGNORE;
}
if (wcscmp(errors, L"backslashreplace") == 0) {
return _Py_ERROR_BACKSLASHREPLACE;
}
if (wcscmp(errors, L"surrogatepass") == 0) {
return _Py_ERROR_SURROGATEPASS;
}
if (wcscmp(errors, L"xmlcharrefreplace") == 0) {
return _Py_ERROR_XMLCHARREFREPLACE;
}
return _Py_ERROR_OTHER;
}
/* The max unicode value is always 0x10FFFF while using the PEP-393 API.
This function is kept for backward compatibility with the old API. */
Py_UNICODE
@ -3445,11 +3482,9 @@ PyUnicode_AsEncodedObject(PyObject *unicode,
static PyObject *
unicode_encode_locale(PyObject *unicode, const char *errors,
unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
int current_locale)
{
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
Py_ssize_t wlen;
wchar_t *wstr = PyUnicode_AsWideCharString(unicode, &wlen);
if (wstr == NULL) {
@ -3499,30 +3534,44 @@ unicode_encode_locale(PyObject *unicode, const char *errors,
PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
{
return unicode_encode_locale(unicode, errors, 1);
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
return unicode_encode_locale(unicode, error_handler, 1);
}
PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
#ifdef _Py_FORCE_UTF8_FS_ENCODING
return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors);
if (interp->fs_codec.encoding) {
return unicode_encode_utf8(unicode,
interp->fs_codec.error_handler,
interp->fs_codec.errors);
}
else {
const _PyCoreConfig *config = &interp->core_config;
_Py_error_handler errors;
errors = get_error_handler_wide(config->filesystem_errors);
assert(errors != _Py_ERROR_UNKNOWN);
return unicode_encode_utf8(unicode, errors, NULL);
}
#else
/* Bootstrap check: if the filesystem codec is implemented in Python, we
cannot use it to encode and decode filenames before it is loaded. Load
the Python codec requires to encode at least its own filename. Use the C
implementation of the locale codec until the codec registry is
initialized and the Python codec is loaded. See initfsencoding(). */
if (interp->fscodec_initialized) {
if (interp->fs_codec.encoding) {
return PyUnicode_AsEncodedString(unicode,
config->filesystem_encoding,
config->filesystem_errors);
interp->fs_codec.encoding,
interp->fs_codec.errors);
}
else {
return unicode_encode_locale(unicode,
config->filesystem_errors, 0);
const _PyCoreConfig *config = &interp->core_config;
_Py_error_handler errors;
errors = get_error_handler_wide(config->filesystem_errors);
assert(errors != _Py_ERROR_UNKNOWN);
return unicode_encode_locale(unicode, errors, 0);
}
#endif
}
@ -3663,11 +3712,9 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
}
static PyObject*
unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
int current_locale)
unicode_decode_locale(const char *str, Py_ssize_t len,
_Py_error_handler errors, int current_locale)
{
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
if (str[len] != '\0' || (size_t)len != strlen(str)) {
PyErr_SetString(PyExc_ValueError, "embedded null byte");
return NULL;
@ -3677,7 +3724,7 @@ unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
size_t wlen;
const char *reason;
int res = _Py_DecodeLocaleEx(str, &wstr, &wlen, &reason,
current_locale, error_handler);
current_locale, errors);
if (res != 0) {
if (res == -2) {
PyObject *exc;
@ -3709,14 +3756,16 @@ PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
const char *errors)
{
return unicode_decode_locale(str, len, errors, 1);
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
return unicode_decode_locale(str, len, error_handler, 1);
}
PyObject*
PyUnicode_DecodeLocale(const char *str, const char *errors)
{
Py_ssize_t size = (Py_ssize_t)strlen(str);
return unicode_decode_locale(str, size, errors, 1);
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
return unicode_decode_locale(str, size, error_handler, 1);
}
@ -3730,23 +3779,36 @@ PyObject*
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
#ifdef _Py_FORCE_UTF8_FS_ENCODING
return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL);
if (interp->fs_codec.encoding) {
return unicode_decode_utf8(s, size,
interp->fs_codec.error_handler,
interp->fs_codec.errors,
NULL);
}
else {
const _PyCoreConfig *config = &interp->core_config;
_Py_error_handler errors;
errors = get_error_handler_wide(config->filesystem_errors);
assert(errors != _Py_ERROR_UNKNOWN);
return unicode_decode_utf8(s, size, errors, NULL, NULL);
}
#else
/* Bootstrap check: if the filesystem codec is implemented in Python, we
cannot use it to encode and decode filenames before it is loaded. Load
the Python codec requires to encode at least its own filename. Use the C
implementation of the locale codec until the codec registry is
initialized and the Python codec is loaded. See initfsencoding(). */
if (interp->fscodec_initialized) {
if (interp->fs_codec.encoding) {
return PyUnicode_Decode(s, size,
config->filesystem_encoding,
config->filesystem_errors);
interp->fs_codec.encoding,
interp->fs_codec.errors);
}
else {
return unicode_decode_locale(s, size,
config->filesystem_errors, 0);
const _PyCoreConfig *config = &interp->core_config;
_Py_error_handler errors;
errors = get_error_handler_wide(config->filesystem_errors);
return unicode_decode_locale(s, size, errors, 0);
}
#endif
}
@ -4810,11 +4872,10 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
return p - start;
}
PyObject *
PyUnicode_DecodeUTF8Stateful(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
static PyObject *
unicode_decode_utf8(const char *s, Py_ssize_t size,
_Py_error_handler error_handler, const char *errors,
Py_ssize_t *consumed)
{
_PyUnicodeWriter writer;
const char *starts = s;
@ -4825,7 +4886,6 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
const char *errmsg = "";
PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
if (size == 0) {
if (consumed)
@ -4948,6 +5008,16 @@ onError:
}
PyObject *
PyUnicode_DecodeUTF8Stateful(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
{
return unicode_decode_utf8(s, size, _Py_ERROR_UNKNOWN, errors, consumed);
}
/* UTF-8 decoder: use surrogateescape error handler if 'surrogateescape' is
non-zero, use strict error handler otherwise.
@ -5231,8 +5301,9 @@ _Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
maximum possible needed (4 result bytes per Unicode character), and return
the excess memory at the end.
*/
PyObject *
_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
static PyObject *
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
const char *errors)
{
enum PyUnicode_Kind kind;
void *data;
@ -5260,14 +5331,21 @@ _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
case PyUnicode_1BYTE_KIND:
/* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
assert(!PyUnicode_IS_ASCII(unicode));
return ucs1lib_utf8_encoder(unicode, data, size, errors);
return ucs1lib_utf8_encoder(unicode, data, size, error_handler, errors);
case PyUnicode_2BYTE_KIND:
return ucs2lib_utf8_encoder(unicode, data, size, errors);
return ucs2lib_utf8_encoder(unicode, data, size, error_handler, errors);
case PyUnicode_4BYTE_KIND:
return ucs4lib_utf8_encoder(unicode, data, size, errors);
return ucs4lib_utf8_encoder(unicode, data, size, error_handler, errors);
}
}
PyObject *
_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
{
return unicode_encode_utf8(unicode, _Py_ERROR_UNKNOWN, errors);
}
PyObject *
PyUnicode_EncodeUTF8(const Py_UNICODE *s,
Py_ssize_t size,
@ -15575,12 +15653,35 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
}
static char*
get_codec_name(const char *encoding)
static int
encode_wstr_utf8(wchar_t *wstr, char **str, const char *name)
{
PyObject *codec, *name_obj = NULL;
int res;
res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT);
if (res == -2) {
PyErr_Format(PyExc_RuntimeWarning, "cannot decode %s", name);
return -1;
}
if (res < 0) {
PyErr_NoMemory();
return -1;
}
return 0;
}
static int
config_get_codec_name(wchar_t **config_encoding)
{
char *encoding;
if (encode_wstr_utf8(*config_encoding, &encoding, "stdio_encoding") < 0) {
return -1;
}
PyObject *name_obj = NULL;
PyObject *codec = _PyCodec_Lookup(encoding);
PyMem_RawFree(encoding);
codec = _PyCodec_Lookup(encoding);
if (!codec)
goto error;
@ -15590,71 +15691,107 @@ get_codec_name(const char *encoding)
goto error;
}
const char *name_utf8 = PyUnicode_AsUTF8(name_obj);
if (name_utf8 == NULL) {
wchar_t *wname = PyUnicode_AsWideCharString(name_obj, NULL);
Py_DECREF(name_obj);
if (wname == NULL) {
goto error;
}
char *name = _PyMem_RawStrdup(name_utf8);
Py_DECREF(name_obj);
if (name == NULL) {
wchar_t *raw_wname = _PyMem_RawWcsdup(wname);
if (raw_wname == NULL) {
PyMem_Free(wname);
PyErr_NoMemory();
return NULL;
goto error;
}
return name;
PyMem_RawFree(*config_encoding);
*config_encoding = raw_wname;
PyMem_Free(wname);
return 0;
error:
Py_XDECREF(codec);
Py_XDECREF(name_obj);
return NULL;
return -1;
}
static _PyInitError
init_stdio_encoding(PyInterpreterState *interp)
{
/* Update the stdio encoding to the normalized Python codec name. */
_PyCoreConfig *config = &interp->core_config;
char *codec_name = get_codec_name(config->stdio_encoding);
if (codec_name == NULL) {
if (config_get_codec_name(&config->stdio_encoding) < 0) {
return _Py_INIT_ERR("failed to get the Python codec name "
"of the stdio encoding");
}
PyMem_RawFree(config->stdio_encoding);
config->stdio_encoding = codec_name;
return _Py_INIT_OK();
}
static int
init_fs_codec(PyInterpreterState *interp)
{
_PyCoreConfig *config = &interp->core_config;
_Py_error_handler error_handler;
error_handler = get_error_handler_wide(config->filesystem_errors);
if (error_handler == _Py_ERROR_UNKNOWN) {
PyErr_SetString(PyExc_RuntimeError, "unknow filesystem error handler");
return -1;
}
char *encoding, *errors;
if (encode_wstr_utf8(config->filesystem_encoding,
&encoding,
"filesystem_encoding") < 0) {
return -1;
}
if (encode_wstr_utf8(config->filesystem_errors,
&errors,
"filesystem_errors") < 0) {
PyMem_RawFree(encoding);
return -1;
}
PyMem_RawFree(interp->fs_codec.encoding);
interp->fs_codec.encoding = encoding;
PyMem_RawFree(interp->fs_codec.errors);
interp->fs_codec.errors = errors;
interp->fs_codec.error_handler = error_handler;
/* At this point, PyUnicode_EncodeFSDefault() and
PyUnicode_DecodeFSDefault() can now use the Python codec rather than
the C implementation of the filesystem encoding. */
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
global configuration variables. */
if (_Py_SetFileSystemEncoding(interp->fs_codec.encoding,
interp->fs_codec.errors) < 0) {
PyErr_NoMemory();
return -1;
}
return 0;
}
static _PyInitError
init_fs_encoding(PyInterpreterState *interp)
{
/* Update the filesystem encoding to the normalized Python codec name.
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
(Python codec name). */
_PyCoreConfig *config = &interp->core_config;
char *encoding = get_codec_name(config->filesystem_encoding);
if (encoding == NULL) {
/* Such error can only occurs in critical situations: no more
memory, import a module of the standard library failed, etc. */
if (config_get_codec_name(&config->filesystem_encoding) < 0) {
return _Py_INIT_ERR("failed to get the Python codec "
"of the filesystem encoding");
}
/* Update the filesystem encoding to the normalized Python codec name.
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
(Python codec name). */
PyMem_RawFree(config->filesystem_encoding);
config->filesystem_encoding = encoding;
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
global configuration variables. */
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
config->filesystem_errors) < 0) {
return _Py_INIT_NO_MEMORY();
if (init_fs_codec(interp) < 0) {
return _Py_INIT_ERR("cannot initialize filesystem codec");
}
/* PyUnicode can now use the Python codec rather than C implementation
for the filesystem encoding */
interp->fscodec_initialized = 1;
return _Py_INIT_OK();
}
@ -15671,6 +15808,33 @@ _PyUnicode_InitEncodings(PyInterpreterState *interp)
}
#ifdef MS_WINDOWS
int
_PyUnicode_EnableLegacyWindowsFSEncoding(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
_PyCoreConfig *config = &interp->core_config;
/* Set the filesystem encoding to mbcs/replace (PEP 529) */
wchar_t *encoding = _PyMem_RawWcsdup(L"mbcs");
wchar_t *errors = _PyMem_RawWcsdup(L"replace");
if (encoding == NULL || errors == NULL) {
PyMem_RawFree(encoding);
PyMem_RawFree(errors);
PyErr_NoMemory();
return -1;
}
PyMem_RawFree(config->filesystem_encoding);
config->filesystem_encoding = encoding;
PyMem_RawFree(config->filesystem_errors);
config->filesystem_errors = errors;
return init_fs_codec(interp);
}
#endif
void
_PyUnicode_Fini(void)
{
@ -15694,6 +15858,12 @@ _PyUnicode_Fini(void)
}
_PyUnicode_ClearStaticStrings();
(void)PyUnicode_ClearFreeList();
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
PyMem_RawFree(interp->fs_codec.encoding);
interp->fs_codec.encoding = NULL;
PyMem_RawFree(interp->fs_codec.errors);
interp->fs_codec.errors = NULL;
}

View File

@ -488,8 +488,8 @@ static int test_init_from_config(void)
Force it to 0 through the config. */
config.legacy_windows_stdio = 0;
#endif
config.stdio_encoding = "iso8859-1";
config.stdio_errors = "replace";
config.stdio_encoding = L"iso8859-1";
config.stdio_errors = L"replace";
putenv("PYTHONNOUSERSITE=");
Py_NoUserSiteDirectory = 0;

View File

@ -523,27 +523,7 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
/* Copy str into *config_str (duplicate the string) */
_PyInitError
_PyCoreConfig_SetString(char **config_str, const char *str)
{
char *str2;
if (str != NULL) {
str2 = _PyMem_RawStrdup(str);
if (str2 == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
else {
str2 = NULL;
}
PyMem_RawFree(*config_str);
*config_str = str2;
return _Py_INIT_OK();
}
/* Copy str into *config_str (duplicate the string) */
_PyInitError
_PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str)
_PyCoreConfig_SetString(wchar_t **config_str, const wchar_t *str)
{
wchar_t *str2;
if (str != NULL) {
@ -563,8 +543,8 @@ _PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str)
/* Decode str using Py_DecodeLocale() and set the result into *config_str */
static _PyInitError
_PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str,
const char *decode_err_msg)
_PyCoreConfig_DecodeLocaleErr(wchar_t **config_str, const char *str,
const char *decode_err_msg)
{
wchar_t *str2;
if (str != NULL) {
@ -588,17 +568,15 @@ _PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str,
}
_PyInitError
_PyCoreConfig_SetWideStringFromString(wchar_t **config_str, const char *str)
{
return _PyCoreConfig_SetWideStringFromStringErr(
config_str, str, "cannot decode string");
}
#define CONFIG_DECODE_LOCALE(config_str, str, NAME) \
_PyCoreConfig_SetWideStringFromStringErr(config_str, str, \
"cannot decode " NAME)
_PyCoreConfig_DecodeLocaleErr(config_str, str, "cannot decode " NAME)
_PyInitError
_PyCoreConfig_DecodeLocale(wchar_t **config_str, const char *str)
{
return CONFIG_DECODE_LOCALE(config_str, str, "string");
}
_PyInitError
@ -608,16 +586,9 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
_PyCoreConfig_Clear(config);
#define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
#define COPY_STR_ATTR(ATTR) \
do { \
err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
if (_Py_INIT_FAILED(err)) { \
return err; \
} \
} while (0)
#define COPY_WSTR_ATTR(ATTR) \
do { \
err = _PyCoreConfig_SetWideString(&config->ATTR, config2->ATTR); \
err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
if (_Py_INIT_FAILED(err)) { \
return err; \
} \
@ -676,10 +647,10 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(quiet);
COPY_ATTR(user_site_directory);
COPY_ATTR(buffered_stdio);
COPY_STR_ATTR(filesystem_encoding);
COPY_STR_ATTR(filesystem_errors);
COPY_STR_ATTR(stdio_encoding);
COPY_STR_ATTR(stdio_errors);
COPY_WSTR_ATTR(filesystem_encoding);
COPY_WSTR_ATTR(filesystem_errors);
COPY_WSTR_ATTR(stdio_encoding);
COPY_WSTR_ATTR(stdio_errors);
#ifdef MS_WINDOWS
COPY_ATTR(legacy_windows_stdio);
#endif
@ -692,7 +663,6 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(_init_main);
#undef COPY_ATTR
#undef COPY_STR_ATTR
#undef COPY_WSTR_ATTR
#undef COPY_WSTRLIST
return _Py_INIT_OK();
@ -721,16 +691,10 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
goto fail; \
} \
} while (0)
#define FROM_STRING(STR) \
((STR != NULL) ? \
PyUnicode_FromString(STR) \
: (Py_INCREF(Py_None), Py_None))
#define SET_ITEM_INT(ATTR) \
SET_ITEM(#ATTR, PyLong_FromLong(config->ATTR))
#define SET_ITEM_UINT(ATTR) \
SET_ITEM(#ATTR, PyLong_FromUnsignedLong(config->ATTR))
#define SET_ITEM_STR(ATTR) \
SET_ITEM(#ATTR, FROM_STRING(config->ATTR))
#define FROM_WSTRING(STR) \
((STR != NULL) ? \
PyUnicode_FromWideChar(STR, -1) \
@ -753,8 +717,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
SET_ITEM_INT(show_alloc_count);
SET_ITEM_INT(dump_refs);
SET_ITEM_INT(malloc_stats);
SET_ITEM_STR(filesystem_encoding);
SET_ITEM_STR(filesystem_errors);
SET_ITEM_WSTR(filesystem_encoding);
SET_ITEM_WSTR(filesystem_errors);
SET_ITEM_WSTR(pycache_prefix);
SET_ITEM_WSTR(program_name);
SET_ITEM_WSTRLIST(argv);
@ -783,8 +747,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
SET_ITEM_INT(quiet);
SET_ITEM_INT(user_site_directory);
SET_ITEM_INT(buffered_stdio);
SET_ITEM_STR(stdio_encoding);
SET_ITEM_STR(stdio_errors);
SET_ITEM_WSTR(stdio_encoding);
SET_ITEM_WSTR(stdio_errors);
#ifdef MS_WINDOWS
SET_ITEM_INT(legacy_windows_stdio);
#endif
@ -803,12 +767,10 @@ fail:
Py_DECREF(dict);
return NULL;
#undef FROM_STRING
#undef FROM_WSTRING
#undef SET_ITEM
#undef SET_ITEM_INT
#undef SET_ITEM_UINT
#undef SET_ITEM_STR
#undef SET_ITEM_WSTR
#undef SET_ITEM_WSTRLIST
}
@ -845,7 +807,7 @@ _PyCoreConfig_GetEnvDup(const _PyCoreConfig *config,
return _Py_INIT_OK();
}
return _PyCoreConfig_SetWideString(dest, var);
return _PyCoreConfig_SetString(dest, var);
#else
const char *var = getenv(name);
if (!var || var[0] == '\0') {
@ -853,7 +815,7 @@ _PyCoreConfig_GetEnvDup(const _PyCoreConfig *config,
return _Py_INIT_OK();
}
return _PyCoreConfig_SetWideStringFromStringErr(dest, var, decode_err_msg);
return _PyCoreConfig_DecodeLocaleErr(dest, var, decode_err_msg);
#endif
}
@ -996,8 +958,7 @@ config_init_program_name(_PyCoreConfig *config)
/* Use argv[0] by default, if available */
if (config->program != NULL) {
err = _PyCoreConfig_SetWideString(&config->program_name,
config->program);
err = _PyCoreConfig_SetString(&config->program_name, config->program);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1010,7 +971,7 @@ config_init_program_name(_PyCoreConfig *config)
#else
const wchar_t *default_program_name = L"python3";
#endif
err = _PyCoreConfig_SetWideString(&config->program_name, default_program_name);
err = _PyCoreConfig_SetString(&config->program_name, default_program_name);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1025,8 +986,8 @@ config_init_executable(_PyCoreConfig *config)
/* If Py_SetProgramFullPath() was called, use its value */
const wchar_t *program_full_path = _Py_path_config.program_full_path;
if (program_full_path != NULL) {
_PyInitError err = _PyCoreConfig_SetWideString(&config->executable,
program_full_path);
_PyInitError err = _PyCoreConfig_SetString(&config->executable,
program_full_path);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1051,7 +1012,7 @@ config_init_home(_PyCoreConfig *config)
/* If Py_SetPythonHome() was called, use its value */
wchar_t *home = _Py_path_config.home;
if (home) {
_PyInitError err = _PyCoreConfig_SetWideString(&config->home, home);
_PyInitError err = _PyCoreConfig_SetString(&config->home, home);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1280,7 +1241,7 @@ config_read_complex_options(_PyCoreConfig *config)
}
static const char *
static const wchar_t *
config_get_stdio_errors(const _PyCoreConfig *config)
{
#ifndef MS_WINDOWS
@ -1288,43 +1249,44 @@ config_get_stdio_errors(const _PyCoreConfig *config)
if (loc != NULL) {
/* surrogateescape is the default in the legacy C and POSIX locales */
if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) {
return "surrogateescape";
return L"surrogateescape";
}
#ifdef PY_COERCE_C_LOCALE
/* surrogateescape is the default in locale coercion target locales */
if (_Py_IsLocaleCoercionTarget(loc)) {
return "surrogateescape";
return L"surrogateescape";
}
#endif
}
return "strict";
return L"strict";
#else
/* On Windows, always use surrogateescape by default */
return "surrogateescape";
return L"surrogateescape";
#endif
}
static _PyInitError
config_get_locale_encoding(char **locale_encoding)
config_get_locale_encoding(wchar_t **locale_encoding)
{
#ifdef MS_WINDOWS
char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
return _PyCoreConfig_DecodeLocale(locale_encoding, encoding);
#elif defined(_Py_FORCE_UTF8_LOCALE)
const char *encoding = "UTF-8";
return _PyCoreConfig_SetString(locale_encoding, L"utf-8");
#else
const char *encoding = nl_langinfo(CODESET);
if (!encoding || encoding[0] == '\0') {
return _Py_INIT_ERR("failed to get the locale encoding: "
"nl_langinfo(CODESET) failed");
}
/* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
return CONFIG_DECODE_LOCALE(locale_encoding, encoding,
"nl_langinfo(CODESET)");
#endif
assert(*locale_encoding == NULL);
return _PyCoreConfig_SetString(locale_encoding, encoding);
}
@ -1337,16 +1299,18 @@ config_init_stdio_encoding(_PyCoreConfig *config,
/* If Py_SetStandardStreamEncoding() have been called, use these
parameters. */
if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
err = _PyCoreConfig_SetString(&config->stdio_encoding,
_Py_StandardStreamEncoding);
err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
_Py_StandardStreamEncoding,
"_Py_StandardStreamEncoding");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) {
err = _PyCoreConfig_SetString(&config->stdio_errors,
_Py_StandardStreamErrors);
err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
_Py_StandardStreamErrors,
"_Py_StandardStreamErrors");
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1359,11 +1323,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
/* PYTHONIOENCODING environment variable */
const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING");
if (opt) {
/* _PyCoreConfig_SetString() requires dest to be initialized to NULL */
char *pythonioencoding = NULL;
err = _PyCoreConfig_SetString(&pythonioencoding, opt);
if (_Py_INIT_FAILED(err)) {
return err;
char *pythonioencoding = _PyMem_RawStrdup(opt);
if (pythonioencoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
char *errors = strchr(pythonioencoding, ':');
@ -1378,8 +1340,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
/* Does PYTHONIOENCODING contain an encoding? */
if (pythonioencoding[0]) {
if (config->stdio_encoding == NULL) {
err = _PyCoreConfig_SetString(&config->stdio_encoding,
pythonioencoding);
err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
pythonioencoding,
"PYTHONIOENCODING environment variable");
if (_Py_INIT_FAILED(err)) {
PyMem_RawFree(pythonioencoding);
return err;
@ -1396,7 +1359,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
}
if (config->stdio_errors == NULL && errors != NULL) {
err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
errors,
"PYTHONIOENCODING environment variable");
if (_Py_INIT_FAILED(err)) {
PyMem_RawFree(pythonioencoding);
return err;
@ -1409,15 +1374,14 @@ config_init_stdio_encoding(_PyCoreConfig *config,
/* UTF-8 Mode uses UTF-8/surrogateescape */
if (preconfig->utf8_mode) {
if (config->stdio_encoding == NULL) {
err = _PyCoreConfig_SetString(&config->stdio_encoding,
"utf-8");
err = _PyCoreConfig_SetString(&config->stdio_encoding, L"utf-8");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
if (config->stdio_errors == NULL) {
err = _PyCoreConfig_SetString(&config->stdio_errors,
"surrogateescape");
L"surrogateescape");
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -1432,7 +1396,7 @@ config_init_stdio_encoding(_PyCoreConfig *config,
}
}
if (config->stdio_errors == NULL) {
const char *errors = config_get_stdio_errors(config);
const wchar_t *errors = config_get_stdio_errors(config);
assert(errors != NULL);
err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
@ -1452,33 +1416,32 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
if (config->filesystem_encoding == NULL) {
#ifdef _Py_FORCE_UTF8_FS_ENCODING
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
err = _PyCoreConfig_SetString(&config->filesystem_encoding, L"utf-8");
#else
#ifdef MS_WINDOWS
if (preconfig->legacy_windows_fs_encoding) {
/* Legacy Windows filesystem encoding: mbcs/replace */
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"mbcs");
L"mbcs");
}
else
#endif
if (preconfig->utf8_mode) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
L"utf-8");
}
#ifndef MS_WINDOWS
else if (_Py_GetForceASCII()) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"ascii");
L"ascii");
}
#endif
else {
#ifdef MS_WINDOWS
/* Windows defaults to utf-8/surrogatepass (PEP 529). */
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
L"utf-8");
#else
err = config_get_locale_encoding(&config->filesystem_encoding);
#endif
@ -1491,16 +1454,16 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
}
if (config->filesystem_errors == NULL) {
const char *errors;
const wchar_t *errors;
#ifdef MS_WINDOWS
if (preconfig->legacy_windows_fs_encoding) {
errors = "replace";
errors = L"replace";
}
else {
errors = "surrogatepass";
errors = L"surrogatepass";
}
#else
errors = "surrogateescape";
errors = L"surrogateescape";
#endif
err = _PyCoreConfig_SetString(&config->filesystem_errors, errors);
if (_Py_INIT_FAILED(err)) {
@ -1745,8 +1708,8 @@ config_parse_cmdline(_PyCoreConfig *config, _PyPreCmdline *precmdline,
|| wcscmp(_PyOS_optarg, L"never") == 0
|| wcscmp(_PyOS_optarg, L"default") == 0)
{
err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode,
_PyOS_optarg);
err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode,
_PyOS_optarg);
if (_Py_INIT_FAILED(err)) {
return err;
}
@ -2119,7 +2082,7 @@ config_read_cmdline(_PyCoreConfig *config, _PyPreCmdline *precmdline)
}
if (config->check_hash_pycs_mode == NULL) {
err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode, L"default");
err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode, L"default");
if (_Py_INIT_FAILED(err)) {
goto done;
}

View File

@ -14,7 +14,10 @@
/* --- File system encoding/errors -------------------------------- */
/* The filesystem encoding is chosen by config_init_fs_encoding(),
see also initfsencoding(). */
see also initfsencoding().
Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
are encoded to UTF-8. */
const char *Py_FileSystemDefaultEncoding = NULL;
int Py_HasFileSystemDefaultEncoding = 0;
const char *Py_FileSystemDefaultEncodeErrors = NULL;

View File

@ -1668,7 +1668,7 @@ is_valid_fd(int fd)
static PyObject*
create_stdio(const _PyCoreConfig *config, PyObject* io,
int fd, int write_mode, const char* name,
const char* encoding, const char* errors)
const wchar_t* encoding, const wchar_t* errors)
{
PyObject *buf = NULL, *stream = NULL, *text = NULL, *raw = NULL, *res;
const char* mode;
@ -1718,7 +1718,7 @@ create_stdio(const _PyCoreConfig *config, PyObject* io,
#ifdef MS_WINDOWS
/* Windows console IO is always UTF-8 encoded */
if (PyWindowsConsoleIO_Check(raw))
encoding = "utf-8";
encoding = L"utf-8";
#endif
text = PyUnicode_FromString(name);
@ -1754,10 +1754,25 @@ create_stdio(const _PyCoreConfig *config, PyObject* io,
newline = "\n";
#endif
stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OsssOO",
buf, encoding, errors,
PyObject *encoding_str = PyUnicode_FromWideChar(encoding, -1);
if (encoding_str == NULL) {
Py_CLEAR(buf);
goto error;
}
PyObject *errors_str = PyUnicode_FromWideChar(errors, -1);
if (errors_str == NULL) {
Py_CLEAR(buf);
Py_CLEAR(encoding_str);
goto error;
}
stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OOOsOO",
buf, encoding_str, errors_str,
newline, line_buffering, write_through);
Py_CLEAR(buf);
Py_CLEAR(encoding_str);
Py_CLEAR(errors_str);
if (stream == NULL)
goto error;
@ -1874,7 +1889,7 @@ init_sys_streams(PyInterpreterState *interp)
fd = fileno(stderr);
std = create_stdio(config, iomod, fd, 1, "<stderr>",
config->stdio_encoding,
"backslashreplace");
L"backslashreplace");
if (std == NULL)
goto error;

View File

@ -424,7 +424,7 @@ sys_getfilesystemencoding_impl(PyObject *module)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
return PyUnicode_FromString(config->filesystem_encoding);
return PyUnicode_FromWideChar(config->filesystem_encoding, -1);
}
/*[clinic input]
@ -439,7 +439,7 @@ sys_getfilesystemencodeerrors_impl(PyObject *module)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config;
return PyUnicode_FromString(config->filesystem_errors);
return PyUnicode_FromWideChar(config->filesystem_errors, -1);
}
/*[clinic input]
@ -1211,30 +1211,9 @@ static PyObject *
sys__enablelegacywindowsfsencoding_impl(PyObject *module)
/*[clinic end generated code: output=f5c3855b45e24fe9 input=2bfa931a20704492]*/
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
_PyCoreConfig *config = &interp->core_config;
/* Set the filesystem encoding to mbcs/replace (PEP 529) */
char *encoding = _PyMem_RawStrdup("mbcs");
char *errors = _PyMem_RawStrdup("replace");
if (encoding == NULL || errors == NULL) {
PyMem_Free(encoding);
PyMem_Free(errors);
PyErr_NoMemory();
if (_PyUnicode_EnableLegacyWindowsFSEncoding() < 0) {
return NULL;
}
PyMem_RawFree(config->filesystem_encoding);
config->filesystem_encoding = encoding;
PyMem_RawFree(config->filesystem_errors);
config->filesystem_errors = errors;
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
config->filesystem_errors) < 0) {
PyErr_NoMemory();
return NULL;
}
Py_RETURN_NONE;
}