bpo-36775: _PyCoreConfig only uses wchar_t* (GH-13062)
_PyCoreConfig: Change filesystem_encoding, filesystem_errors, stdio_encoding and stdio_errors fields type from char* to wchar_t*. Changes: * PyInterpreterState: replace fscodec_initialized (int) with fs_codec structure. * Add get_error_handler_wide() and unicode_encode_utf8() helper functions. * Add error_handler parameter to unicode_encode_locale() and unicode_decode_locale(). * Remove _PyCoreConfig_SetString(). * Rename _PyCoreConfig_SetWideString() to _PyCoreConfig_SetString(). * Rename _PyCoreConfig_SetWideStringFromString() to _PyCoreConfig_DecodeLocale().
This commit is contained in:
parent
6ae2bbbdfc
commit
709d23dee6
|
@ -207,8 +207,8 @@ typedef struct {
|
|||
|
||||
See Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors.
|
||||
*/
|
||||
char *filesystem_encoding;
|
||||
char *filesystem_errors;
|
||||
wchar_t *filesystem_encoding;
|
||||
wchar_t *filesystem_errors;
|
||||
|
||||
wchar_t *pycache_prefix; /* PYTHONPYCACHEPREFIX, -X pycache_prefix=PATH */
|
||||
wchar_t *program_name; /* Program name, see also Py_GetProgramName() */
|
||||
|
@ -334,13 +334,13 @@ typedef struct {
|
|||
Value set from PYTHONIOENCODING environment variable and
|
||||
Py_SetStandardStreamEncoding() function.
|
||||
See also 'stdio_errors' attribute. */
|
||||
char *stdio_encoding;
|
||||
wchar_t *stdio_encoding;
|
||||
|
||||
/* Error handler of sys.stdin and sys.stdout.
|
||||
Value set from PYTHONIOENCODING environment variable and
|
||||
Py_SetStandardStreamEncoding() function.
|
||||
See also 'stdio_encoding' attribute. */
|
||||
char *stdio_errors;
|
||||
wchar_t *stdio_errors;
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
/* If greater than zero, use io.FileIO instead of WindowsConsoleIO for sys
|
||||
|
|
|
@ -106,12 +106,9 @@ PyAPI_FUNC(_PyInitError) _PyCoreConfig_Copy(
|
|||
_PyCoreConfig *config,
|
||||
const _PyCoreConfig *config2);
|
||||
PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetString(
|
||||
char **config_str,
|
||||
const char *str);
|
||||
PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetWideString(
|
||||
wchar_t **config_str,
|
||||
const wchar_t *str);
|
||||
PyAPI_FUNC(_PyInitError) _PyCoreConfig_SetWideStringFromString(
|
||||
PyAPI_FUNC(_PyInitError) _PyCoreConfig_DecodeLocale(
|
||||
wchar_t **config_str,
|
||||
const char *str);
|
||||
PyAPI_FUNC(_PyInitError) _PyCoreConfig_InitPathConfig(_PyCoreConfig *config);
|
||||
|
|
|
@ -21,6 +21,9 @@ extern int _Py_SetFileSystemEncoding(
|
|||
const char *errors);
|
||||
extern void _Py_ClearFileSystemEncoding(void);
|
||||
extern _PyInitError _PyUnicode_InitEncodings(PyInterpreterState *interp);
|
||||
#ifdef MS_WINDOWS
|
||||
extern int _PyUnicode_EnableLegacyWindowsFSEncoding(void);
|
||||
#endif
|
||||
|
||||
PyAPI_FUNC(void) _Py_ClearStandardStreamEncoding(void);
|
||||
|
||||
|
|
|
@ -56,7 +56,14 @@ struct _is {
|
|||
PyObject *codec_search_cache;
|
||||
PyObject *codec_error_registry;
|
||||
int codecs_initialized;
|
||||
int fscodec_initialized;
|
||||
|
||||
/* fs_codec.encoding is initialized to NULL.
|
||||
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
|
||||
struct {
|
||||
char *encoding; /* Filesystem encoding (encoded to UTF-8) */
|
||||
char *errors; /* Filesystem errors (encoded to UTF-8) */
|
||||
_Py_error_handler error_handler;
|
||||
} fs_codec;
|
||||
|
||||
_PyCoreConfig core_config;
|
||||
#ifdef HAVE_DLOPEN
|
||||
|
|
|
@ -260,6 +260,7 @@ Py_LOCAL_INLINE(PyObject *)
|
|||
STRINGLIB(utf8_encoder)(PyObject *unicode,
|
||||
STRINGLIB_CHAR *data,
|
||||
Py_ssize_t size,
|
||||
_Py_error_handler error_handler,
|
||||
const char *errors)
|
||||
{
|
||||
Py_ssize_t i; /* index into data of next input character */
|
||||
|
@ -268,7 +269,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
|
|||
PyObject *error_handler_obj = NULL;
|
||||
PyObject *exc = NULL;
|
||||
PyObject *rep = NULL;
|
||||
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
|
||||
#endif
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
const Py_ssize_t max_char_size = 2;
|
||||
|
|
|
@ -40,6 +40,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include "Python.h"
|
||||
#include "pycore_coreconfig.h"
|
||||
#include "pycore_fileutils.h"
|
||||
#include "pycore_object.h"
|
||||
#include "pycore_pylifecycle.h"
|
||||
|
@ -264,6 +265,13 @@ unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
|
|||
/* Forward declaration */
|
||||
static inline int
|
||||
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
|
||||
static PyObject *
|
||||
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
|
||||
const char *errors);
|
||||
static PyObject *
|
||||
unicode_decode_utf8(const char *s, Py_ssize_t size,
|
||||
_Py_error_handler error_handler, const char *errors,
|
||||
Py_ssize_t *consumed);
|
||||
|
||||
/* List of static strings. */
|
||||
static _Py_Identifier *static_strings = NULL;
|
||||
|
@ -388,6 +396,35 @@ _Py_GetErrorHandler(const char *errors)
|
|||
return _Py_ERROR_OTHER;
|
||||
}
|
||||
|
||||
|
||||
static _Py_error_handler
|
||||
get_error_handler_wide(const wchar_t *errors)
|
||||
{
|
||||
if (errors == NULL || wcscmp(errors, L"strict") == 0) {
|
||||
return _Py_ERROR_STRICT;
|
||||
}
|
||||
if (wcscmp(errors, L"surrogateescape") == 0) {
|
||||
return _Py_ERROR_SURROGATEESCAPE;
|
||||
}
|
||||
if (wcscmp(errors, L"replace") == 0) {
|
||||
return _Py_ERROR_REPLACE;
|
||||
}
|
||||
if (wcscmp(errors, L"ignore") == 0) {
|
||||
return _Py_ERROR_IGNORE;
|
||||
}
|
||||
if (wcscmp(errors, L"backslashreplace") == 0) {
|
||||
return _Py_ERROR_BACKSLASHREPLACE;
|
||||
}
|
||||
if (wcscmp(errors, L"surrogatepass") == 0) {
|
||||
return _Py_ERROR_SURROGATEPASS;
|
||||
}
|
||||
if (wcscmp(errors, L"xmlcharrefreplace") == 0) {
|
||||
return _Py_ERROR_XMLCHARREFREPLACE;
|
||||
}
|
||||
return _Py_ERROR_OTHER;
|
||||
}
|
||||
|
||||
|
||||
/* The max unicode value is always 0x10FFFF while using the PEP-393 API.
|
||||
This function is kept for backward compatibility with the old API. */
|
||||
Py_UNICODE
|
||||
|
@ -3445,11 +3482,9 @@ PyUnicode_AsEncodedObject(PyObject *unicode,
|
|||
|
||||
|
||||
static PyObject *
|
||||
unicode_encode_locale(PyObject *unicode, const char *errors,
|
||||
unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
|
||||
int current_locale)
|
||||
{
|
||||
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
|
||||
|
||||
Py_ssize_t wlen;
|
||||
wchar_t *wstr = PyUnicode_AsWideCharString(unicode, &wlen);
|
||||
if (wstr == NULL) {
|
||||
|
@ -3499,30 +3534,44 @@ unicode_encode_locale(PyObject *unicode, const char *errors,
|
|||
PyObject *
|
||||
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
|
||||
{
|
||||
return unicode_encode_locale(unicode, errors, 1);
|
||||
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
|
||||
return unicode_encode_locale(unicode, error_handler, 1);
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyUnicode_EncodeFSDefault(PyObject *unicode)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
#ifdef _Py_FORCE_UTF8_FS_ENCODING
|
||||
return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors);
|
||||
if (interp->fs_codec.encoding) {
|
||||
return unicode_encode_utf8(unicode,
|
||||
interp->fs_codec.error_handler,
|
||||
interp->fs_codec.errors);
|
||||
}
|
||||
else {
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
_Py_error_handler errors;
|
||||
errors = get_error_handler_wide(config->filesystem_errors);
|
||||
assert(errors != _Py_ERROR_UNKNOWN);
|
||||
return unicode_encode_utf8(unicode, errors, NULL);
|
||||
}
|
||||
#else
|
||||
/* Bootstrap check: if the filesystem codec is implemented in Python, we
|
||||
cannot use it to encode and decode filenames before it is loaded. Load
|
||||
the Python codec requires to encode at least its own filename. Use the C
|
||||
implementation of the locale codec until the codec registry is
|
||||
initialized and the Python codec is loaded. See initfsencoding(). */
|
||||
if (interp->fscodec_initialized) {
|
||||
if (interp->fs_codec.encoding) {
|
||||
return PyUnicode_AsEncodedString(unicode,
|
||||
config->filesystem_encoding,
|
||||
config->filesystem_errors);
|
||||
interp->fs_codec.encoding,
|
||||
interp->fs_codec.errors);
|
||||
}
|
||||
else {
|
||||
return unicode_encode_locale(unicode,
|
||||
config->filesystem_errors, 0);
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
_Py_error_handler errors;
|
||||
errors = get_error_handler_wide(config->filesystem_errors);
|
||||
assert(errors != _Py_ERROR_UNKNOWN);
|
||||
return unicode_encode_locale(unicode, errors, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -3663,11 +3712,9 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
|
|||
}
|
||||
|
||||
static PyObject*
|
||||
unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
|
||||
int current_locale)
|
||||
unicode_decode_locale(const char *str, Py_ssize_t len,
|
||||
_Py_error_handler errors, int current_locale)
|
||||
{
|
||||
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
|
||||
|
||||
if (str[len] != '\0' || (size_t)len != strlen(str)) {
|
||||
PyErr_SetString(PyExc_ValueError, "embedded null byte");
|
||||
return NULL;
|
||||
|
@ -3677,7 +3724,7 @@ unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
|
|||
size_t wlen;
|
||||
const char *reason;
|
||||
int res = _Py_DecodeLocaleEx(str, &wstr, &wlen, &reason,
|
||||
current_locale, error_handler);
|
||||
current_locale, errors);
|
||||
if (res != 0) {
|
||||
if (res == -2) {
|
||||
PyObject *exc;
|
||||
|
@ -3709,14 +3756,16 @@ PyObject*
|
|||
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
|
||||
const char *errors)
|
||||
{
|
||||
return unicode_decode_locale(str, len, errors, 1);
|
||||
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
|
||||
return unicode_decode_locale(str, len, error_handler, 1);
|
||||
}
|
||||
|
||||
PyObject*
|
||||
PyUnicode_DecodeLocale(const char *str, const char *errors)
|
||||
{
|
||||
Py_ssize_t size = (Py_ssize_t)strlen(str);
|
||||
return unicode_decode_locale(str, size, errors, 1);
|
||||
_Py_error_handler error_handler = _Py_GetErrorHandler(errors);
|
||||
return unicode_decode_locale(str, size, error_handler, 1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -3730,23 +3779,36 @@ PyObject*
|
|||
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
#ifdef _Py_FORCE_UTF8_FS_ENCODING
|
||||
return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL);
|
||||
if (interp->fs_codec.encoding) {
|
||||
return unicode_decode_utf8(s, size,
|
||||
interp->fs_codec.error_handler,
|
||||
interp->fs_codec.errors,
|
||||
NULL);
|
||||
}
|
||||
else {
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
_Py_error_handler errors;
|
||||
errors = get_error_handler_wide(config->filesystem_errors);
|
||||
assert(errors != _Py_ERROR_UNKNOWN);
|
||||
return unicode_decode_utf8(s, size, errors, NULL, NULL);
|
||||
}
|
||||
#else
|
||||
/* Bootstrap check: if the filesystem codec is implemented in Python, we
|
||||
cannot use it to encode and decode filenames before it is loaded. Load
|
||||
the Python codec requires to encode at least its own filename. Use the C
|
||||
implementation of the locale codec until the codec registry is
|
||||
initialized and the Python codec is loaded. See initfsencoding(). */
|
||||
if (interp->fscodec_initialized) {
|
||||
if (interp->fs_codec.encoding) {
|
||||
return PyUnicode_Decode(s, size,
|
||||
config->filesystem_encoding,
|
||||
config->filesystem_errors);
|
||||
interp->fs_codec.encoding,
|
||||
interp->fs_codec.errors);
|
||||
}
|
||||
else {
|
||||
return unicode_decode_locale(s, size,
|
||||
config->filesystem_errors, 0);
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
_Py_error_handler errors;
|
||||
errors = get_error_handler_wide(config->filesystem_errors);
|
||||
return unicode_decode_locale(s, size, errors, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -4810,11 +4872,10 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
|
|||
return p - start;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyUnicode_DecodeUTF8Stateful(const char *s,
|
||||
Py_ssize_t size,
|
||||
const char *errors,
|
||||
Py_ssize_t *consumed)
|
||||
static PyObject *
|
||||
unicode_decode_utf8(const char *s, Py_ssize_t size,
|
||||
_Py_error_handler error_handler, const char *errors,
|
||||
Py_ssize_t *consumed)
|
||||
{
|
||||
_PyUnicodeWriter writer;
|
||||
const char *starts = s;
|
||||
|
@ -4825,7 +4886,6 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
|
|||
const char *errmsg = "";
|
||||
PyObject *error_handler_obj = NULL;
|
||||
PyObject *exc = NULL;
|
||||
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
|
||||
|
||||
if (size == 0) {
|
||||
if (consumed)
|
||||
|
@ -4948,6 +5008,16 @@ onError:
|
|||
}
|
||||
|
||||
|
||||
PyObject *
|
||||
PyUnicode_DecodeUTF8Stateful(const char *s,
|
||||
Py_ssize_t size,
|
||||
const char *errors,
|
||||
Py_ssize_t *consumed)
|
||||
{
|
||||
return unicode_decode_utf8(s, size, _Py_ERROR_UNKNOWN, errors, consumed);
|
||||
}
|
||||
|
||||
|
||||
/* UTF-8 decoder: use surrogateescape error handler if 'surrogateescape' is
|
||||
non-zero, use strict error handler otherwise.
|
||||
|
||||
|
@ -5231,8 +5301,9 @@ _Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
|
|||
maximum possible needed (4 result bytes per Unicode character), and return
|
||||
the excess memory at the end.
|
||||
*/
|
||||
PyObject *
|
||||
_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
|
||||
static PyObject *
|
||||
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
|
||||
const char *errors)
|
||||
{
|
||||
enum PyUnicode_Kind kind;
|
||||
void *data;
|
||||
|
@ -5260,14 +5331,21 @@ _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
|
|||
case PyUnicode_1BYTE_KIND:
|
||||
/* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
|
||||
assert(!PyUnicode_IS_ASCII(unicode));
|
||||
return ucs1lib_utf8_encoder(unicode, data, size, errors);
|
||||
return ucs1lib_utf8_encoder(unicode, data, size, error_handler, errors);
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
return ucs2lib_utf8_encoder(unicode, data, size, errors);
|
||||
return ucs2lib_utf8_encoder(unicode, data, size, error_handler, errors);
|
||||
case PyUnicode_4BYTE_KIND:
|
||||
return ucs4lib_utf8_encoder(unicode, data, size, errors);
|
||||
return ucs4lib_utf8_encoder(unicode, data, size, error_handler, errors);
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *
|
||||
_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
|
||||
{
|
||||
return unicode_encode_utf8(unicode, _Py_ERROR_UNKNOWN, errors);
|
||||
}
|
||||
|
||||
|
||||
PyObject *
|
||||
PyUnicode_EncodeUTF8(const Py_UNICODE *s,
|
||||
Py_ssize_t size,
|
||||
|
@ -15575,12 +15653,35 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
|
|||
}
|
||||
|
||||
|
||||
static char*
|
||||
get_codec_name(const char *encoding)
|
||||
static int
|
||||
encode_wstr_utf8(wchar_t *wstr, char **str, const char *name)
|
||||
{
|
||||
PyObject *codec, *name_obj = NULL;
|
||||
int res;
|
||||
res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT);
|
||||
if (res == -2) {
|
||||
PyErr_Format(PyExc_RuntimeWarning, "cannot decode %s", name);
|
||||
return -1;
|
||||
}
|
||||
if (res < 0) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
config_get_codec_name(wchar_t **config_encoding)
|
||||
{
|
||||
char *encoding;
|
||||
if (encode_wstr_utf8(*config_encoding, &encoding, "stdio_encoding") < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
PyObject *name_obj = NULL;
|
||||
PyObject *codec = _PyCodec_Lookup(encoding);
|
||||
PyMem_RawFree(encoding);
|
||||
|
||||
codec = _PyCodec_Lookup(encoding);
|
||||
if (!codec)
|
||||
goto error;
|
||||
|
||||
|
@ -15590,71 +15691,107 @@ get_codec_name(const char *encoding)
|
|||
goto error;
|
||||
}
|
||||
|
||||
const char *name_utf8 = PyUnicode_AsUTF8(name_obj);
|
||||
if (name_utf8 == NULL) {
|
||||
wchar_t *wname = PyUnicode_AsWideCharString(name_obj, NULL);
|
||||
Py_DECREF(name_obj);
|
||||
if (wname == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
char *name = _PyMem_RawStrdup(name_utf8);
|
||||
Py_DECREF(name_obj);
|
||||
if (name == NULL) {
|
||||
wchar_t *raw_wname = _PyMem_RawWcsdup(wname);
|
||||
if (raw_wname == NULL) {
|
||||
PyMem_Free(wname);
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
return name;
|
||||
|
||||
PyMem_RawFree(*config_encoding);
|
||||
*config_encoding = raw_wname;
|
||||
|
||||
PyMem_Free(wname);
|
||||
return 0;
|
||||
|
||||
error:
|
||||
Py_XDECREF(codec);
|
||||
Py_XDECREF(name_obj);
|
||||
return NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
static _PyInitError
|
||||
init_stdio_encoding(PyInterpreterState *interp)
|
||||
{
|
||||
/* Update the stdio encoding to the normalized Python codec name. */
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
char *codec_name = get_codec_name(config->stdio_encoding);
|
||||
if (codec_name == NULL) {
|
||||
if (config_get_codec_name(&config->stdio_encoding) < 0) {
|
||||
return _Py_INIT_ERR("failed to get the Python codec name "
|
||||
"of the stdio encoding");
|
||||
}
|
||||
PyMem_RawFree(config->stdio_encoding);
|
||||
config->stdio_encoding = codec_name;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
init_fs_codec(PyInterpreterState *interp)
|
||||
{
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
_Py_error_handler error_handler;
|
||||
error_handler = get_error_handler_wide(config->filesystem_errors);
|
||||
if (error_handler == _Py_ERROR_UNKNOWN) {
|
||||
PyErr_SetString(PyExc_RuntimeError, "unknow filesystem error handler");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *encoding, *errors;
|
||||
if (encode_wstr_utf8(config->filesystem_encoding,
|
||||
&encoding,
|
||||
"filesystem_encoding") < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (encode_wstr_utf8(config->filesystem_errors,
|
||||
&errors,
|
||||
"filesystem_errors") < 0) {
|
||||
PyMem_RawFree(encoding);
|
||||
return -1;
|
||||
}
|
||||
|
||||
PyMem_RawFree(interp->fs_codec.encoding);
|
||||
interp->fs_codec.encoding = encoding;
|
||||
PyMem_RawFree(interp->fs_codec.errors);
|
||||
interp->fs_codec.errors = errors;
|
||||
interp->fs_codec.error_handler = error_handler;
|
||||
|
||||
/* At this point, PyUnicode_EncodeFSDefault() and
|
||||
PyUnicode_DecodeFSDefault() can now use the Python codec rather than
|
||||
the C implementation of the filesystem encoding. */
|
||||
|
||||
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
|
||||
global configuration variables. */
|
||||
if (_Py_SetFileSystemEncoding(interp->fs_codec.encoding,
|
||||
interp->fs_codec.errors) < 0) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static _PyInitError
|
||||
init_fs_encoding(PyInterpreterState *interp)
|
||||
{
|
||||
/* Update the filesystem encoding to the normalized Python codec name.
|
||||
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
|
||||
(Python codec name). */
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
char *encoding = get_codec_name(config->filesystem_encoding);
|
||||
if (encoding == NULL) {
|
||||
/* Such error can only occurs in critical situations: no more
|
||||
memory, import a module of the standard library failed, etc. */
|
||||
if (config_get_codec_name(&config->filesystem_encoding) < 0) {
|
||||
return _Py_INIT_ERR("failed to get the Python codec "
|
||||
"of the filesystem encoding");
|
||||
}
|
||||
|
||||
/* Update the filesystem encoding to the normalized Python codec name.
|
||||
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
|
||||
(Python codec name). */
|
||||
PyMem_RawFree(config->filesystem_encoding);
|
||||
config->filesystem_encoding = encoding;
|
||||
|
||||
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
|
||||
global configuration variables. */
|
||||
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
|
||||
config->filesystem_errors) < 0) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
if (init_fs_codec(interp) < 0) {
|
||||
return _Py_INIT_ERR("cannot initialize filesystem codec");
|
||||
}
|
||||
|
||||
/* PyUnicode can now use the Python codec rather than C implementation
|
||||
for the filesystem encoding */
|
||||
interp->fscodec_initialized = 1;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
|
@ -15671,6 +15808,33 @@ _PyUnicode_InitEncodings(PyInterpreterState *interp)
|
|||
}
|
||||
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
int
|
||||
_PyUnicode_EnableLegacyWindowsFSEncoding(void)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
/* Set the filesystem encoding to mbcs/replace (PEP 529) */
|
||||
wchar_t *encoding = _PyMem_RawWcsdup(L"mbcs");
|
||||
wchar_t *errors = _PyMem_RawWcsdup(L"replace");
|
||||
if (encoding == NULL || errors == NULL) {
|
||||
PyMem_RawFree(encoding);
|
||||
PyMem_RawFree(errors);
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
||||
PyMem_RawFree(config->filesystem_encoding);
|
||||
config->filesystem_encoding = encoding;
|
||||
PyMem_RawFree(config->filesystem_errors);
|
||||
config->filesystem_errors = errors;
|
||||
|
||||
return init_fs_codec(interp);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void
|
||||
_PyUnicode_Fini(void)
|
||||
{
|
||||
|
@ -15694,6 +15858,12 @@ _PyUnicode_Fini(void)
|
|||
}
|
||||
_PyUnicode_ClearStaticStrings();
|
||||
(void)PyUnicode_ClearFreeList();
|
||||
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
PyMem_RawFree(interp->fs_codec.encoding);
|
||||
interp->fs_codec.encoding = NULL;
|
||||
PyMem_RawFree(interp->fs_codec.errors);
|
||||
interp->fs_codec.errors = NULL;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -488,8 +488,8 @@ static int test_init_from_config(void)
|
|||
Force it to 0 through the config. */
|
||||
config.legacy_windows_stdio = 0;
|
||||
#endif
|
||||
config.stdio_encoding = "iso8859-1";
|
||||
config.stdio_errors = "replace";
|
||||
config.stdio_encoding = L"iso8859-1";
|
||||
config.stdio_errors = L"replace";
|
||||
|
||||
putenv("PYTHONNOUSERSITE=");
|
||||
Py_NoUserSiteDirectory = 0;
|
||||
|
|
|
@ -523,27 +523,7 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
|
|||
|
||||
/* Copy str into *config_str (duplicate the string) */
|
||||
_PyInitError
|
||||
_PyCoreConfig_SetString(char **config_str, const char *str)
|
||||
{
|
||||
char *str2;
|
||||
if (str != NULL) {
|
||||
str2 = _PyMem_RawStrdup(str);
|
||||
if (str2 == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
else {
|
||||
str2 = NULL;
|
||||
}
|
||||
PyMem_RawFree(*config_str);
|
||||
*config_str = str2;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
|
||||
/* Copy str into *config_str (duplicate the string) */
|
||||
_PyInitError
|
||||
_PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str)
|
||||
_PyCoreConfig_SetString(wchar_t **config_str, const wchar_t *str)
|
||||
{
|
||||
wchar_t *str2;
|
||||
if (str != NULL) {
|
||||
|
@ -563,8 +543,8 @@ _PyCoreConfig_SetWideString(wchar_t **config_str, const wchar_t *str)
|
|||
|
||||
/* Decode str using Py_DecodeLocale() and set the result into *config_str */
|
||||
static _PyInitError
|
||||
_PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str,
|
||||
const char *decode_err_msg)
|
||||
_PyCoreConfig_DecodeLocaleErr(wchar_t **config_str, const char *str,
|
||||
const char *decode_err_msg)
|
||||
{
|
||||
wchar_t *str2;
|
||||
if (str != NULL) {
|
||||
|
@ -588,17 +568,15 @@ _PyCoreConfig_SetWideStringFromStringErr(wchar_t **config_str, const char *str,
|
|||
}
|
||||
|
||||
|
||||
_PyInitError
|
||||
_PyCoreConfig_SetWideStringFromString(wchar_t **config_str, const char *str)
|
||||
{
|
||||
return _PyCoreConfig_SetWideStringFromStringErr(
|
||||
config_str, str, "cannot decode string");
|
||||
}
|
||||
|
||||
|
||||
#define CONFIG_DECODE_LOCALE(config_str, str, NAME) \
|
||||
_PyCoreConfig_SetWideStringFromStringErr(config_str, str, \
|
||||
"cannot decode " NAME)
|
||||
_PyCoreConfig_DecodeLocaleErr(config_str, str, "cannot decode " NAME)
|
||||
|
||||
|
||||
_PyInitError
|
||||
_PyCoreConfig_DecodeLocale(wchar_t **config_str, const char *str)
|
||||
{
|
||||
return CONFIG_DECODE_LOCALE(config_str, str, "string");
|
||||
}
|
||||
|
||||
|
||||
_PyInitError
|
||||
|
@ -608,16 +586,9 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
|
|||
_PyCoreConfig_Clear(config);
|
||||
|
||||
#define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
|
||||
#define COPY_STR_ATTR(ATTR) \
|
||||
do { \
|
||||
err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
|
||||
if (_Py_INIT_FAILED(err)) { \
|
||||
return err; \
|
||||
} \
|
||||
} while (0)
|
||||
#define COPY_WSTR_ATTR(ATTR) \
|
||||
do { \
|
||||
err = _PyCoreConfig_SetWideString(&config->ATTR, config2->ATTR); \
|
||||
err = _PyCoreConfig_SetString(&config->ATTR, config2->ATTR); \
|
||||
if (_Py_INIT_FAILED(err)) { \
|
||||
return err; \
|
||||
} \
|
||||
|
@ -676,10 +647,10 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
|
|||
COPY_ATTR(quiet);
|
||||
COPY_ATTR(user_site_directory);
|
||||
COPY_ATTR(buffered_stdio);
|
||||
COPY_STR_ATTR(filesystem_encoding);
|
||||
COPY_STR_ATTR(filesystem_errors);
|
||||
COPY_STR_ATTR(stdio_encoding);
|
||||
COPY_STR_ATTR(stdio_errors);
|
||||
COPY_WSTR_ATTR(filesystem_encoding);
|
||||
COPY_WSTR_ATTR(filesystem_errors);
|
||||
COPY_WSTR_ATTR(stdio_encoding);
|
||||
COPY_WSTR_ATTR(stdio_errors);
|
||||
#ifdef MS_WINDOWS
|
||||
COPY_ATTR(legacy_windows_stdio);
|
||||
#endif
|
||||
|
@ -692,7 +663,6 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
|
|||
COPY_ATTR(_init_main);
|
||||
|
||||
#undef COPY_ATTR
|
||||
#undef COPY_STR_ATTR
|
||||
#undef COPY_WSTR_ATTR
|
||||
#undef COPY_WSTRLIST
|
||||
return _Py_INIT_OK();
|
||||
|
@ -721,16 +691,10 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
|
|||
goto fail; \
|
||||
} \
|
||||
} while (0)
|
||||
#define FROM_STRING(STR) \
|
||||
((STR != NULL) ? \
|
||||
PyUnicode_FromString(STR) \
|
||||
: (Py_INCREF(Py_None), Py_None))
|
||||
#define SET_ITEM_INT(ATTR) \
|
||||
SET_ITEM(#ATTR, PyLong_FromLong(config->ATTR))
|
||||
#define SET_ITEM_UINT(ATTR) \
|
||||
SET_ITEM(#ATTR, PyLong_FromUnsignedLong(config->ATTR))
|
||||
#define SET_ITEM_STR(ATTR) \
|
||||
SET_ITEM(#ATTR, FROM_STRING(config->ATTR))
|
||||
#define FROM_WSTRING(STR) \
|
||||
((STR != NULL) ? \
|
||||
PyUnicode_FromWideChar(STR, -1) \
|
||||
|
@ -753,8 +717,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
|
|||
SET_ITEM_INT(show_alloc_count);
|
||||
SET_ITEM_INT(dump_refs);
|
||||
SET_ITEM_INT(malloc_stats);
|
||||
SET_ITEM_STR(filesystem_encoding);
|
||||
SET_ITEM_STR(filesystem_errors);
|
||||
SET_ITEM_WSTR(filesystem_encoding);
|
||||
SET_ITEM_WSTR(filesystem_errors);
|
||||
SET_ITEM_WSTR(pycache_prefix);
|
||||
SET_ITEM_WSTR(program_name);
|
||||
SET_ITEM_WSTRLIST(argv);
|
||||
|
@ -783,8 +747,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config)
|
|||
SET_ITEM_INT(quiet);
|
||||
SET_ITEM_INT(user_site_directory);
|
||||
SET_ITEM_INT(buffered_stdio);
|
||||
SET_ITEM_STR(stdio_encoding);
|
||||
SET_ITEM_STR(stdio_errors);
|
||||
SET_ITEM_WSTR(stdio_encoding);
|
||||
SET_ITEM_WSTR(stdio_errors);
|
||||
#ifdef MS_WINDOWS
|
||||
SET_ITEM_INT(legacy_windows_stdio);
|
||||
#endif
|
||||
|
@ -803,12 +767,10 @@ fail:
|
|||
Py_DECREF(dict);
|
||||
return NULL;
|
||||
|
||||
#undef FROM_STRING
|
||||
#undef FROM_WSTRING
|
||||
#undef SET_ITEM
|
||||
#undef SET_ITEM_INT
|
||||
#undef SET_ITEM_UINT
|
||||
#undef SET_ITEM_STR
|
||||
#undef SET_ITEM_WSTR
|
||||
#undef SET_ITEM_WSTRLIST
|
||||
}
|
||||
|
@ -845,7 +807,7 @@ _PyCoreConfig_GetEnvDup(const _PyCoreConfig *config,
|
|||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
return _PyCoreConfig_SetWideString(dest, var);
|
||||
return _PyCoreConfig_SetString(dest, var);
|
||||
#else
|
||||
const char *var = getenv(name);
|
||||
if (!var || var[0] == '\0') {
|
||||
|
@ -853,7 +815,7 @@ _PyCoreConfig_GetEnvDup(const _PyCoreConfig *config,
|
|||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
return _PyCoreConfig_SetWideStringFromStringErr(dest, var, decode_err_msg);
|
||||
return _PyCoreConfig_DecodeLocaleErr(dest, var, decode_err_msg);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -996,8 +958,7 @@ config_init_program_name(_PyCoreConfig *config)
|
|||
|
||||
/* Use argv[0] by default, if available */
|
||||
if (config->program != NULL) {
|
||||
err = _PyCoreConfig_SetWideString(&config->program_name,
|
||||
config->program);
|
||||
err = _PyCoreConfig_SetString(&config->program_name, config->program);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -1010,7 +971,7 @@ config_init_program_name(_PyCoreConfig *config)
|
|||
#else
|
||||
const wchar_t *default_program_name = L"python3";
|
||||
#endif
|
||||
err = _PyCoreConfig_SetWideString(&config->program_name, default_program_name);
|
||||
err = _PyCoreConfig_SetString(&config->program_name, default_program_name);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -1025,8 +986,8 @@ config_init_executable(_PyCoreConfig *config)
|
|||
/* If Py_SetProgramFullPath() was called, use its value */
|
||||
const wchar_t *program_full_path = _Py_path_config.program_full_path;
|
||||
if (program_full_path != NULL) {
|
||||
_PyInitError err = _PyCoreConfig_SetWideString(&config->executable,
|
||||
program_full_path);
|
||||
_PyInitError err = _PyCoreConfig_SetString(&config->executable,
|
||||
program_full_path);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -1051,7 +1012,7 @@ config_init_home(_PyCoreConfig *config)
|
|||
/* If Py_SetPythonHome() was called, use its value */
|
||||
wchar_t *home = _Py_path_config.home;
|
||||
if (home) {
|
||||
_PyInitError err = _PyCoreConfig_SetWideString(&config->home, home);
|
||||
_PyInitError err = _PyCoreConfig_SetString(&config->home, home);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -1280,7 +1241,7 @@ config_read_complex_options(_PyCoreConfig *config)
|
|||
}
|
||||
|
||||
|
||||
static const char *
|
||||
static const wchar_t *
|
||||
config_get_stdio_errors(const _PyCoreConfig *config)
|
||||
{
|
||||
#ifndef MS_WINDOWS
|
||||
|
@ -1288,43 +1249,44 @@ config_get_stdio_errors(const _PyCoreConfig *config)
|
|||
if (loc != NULL) {
|
||||
/* surrogateescape is the default in the legacy C and POSIX locales */
|
||||
if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) {
|
||||
return "surrogateescape";
|
||||
return L"surrogateescape";
|
||||
}
|
||||
|
||||
#ifdef PY_COERCE_C_LOCALE
|
||||
/* surrogateescape is the default in locale coercion target locales */
|
||||
if (_Py_IsLocaleCoercionTarget(loc)) {
|
||||
return "surrogateescape";
|
||||
return L"surrogateescape";
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return "strict";
|
||||
return L"strict";
|
||||
#else
|
||||
/* On Windows, always use surrogateescape by default */
|
||||
return "surrogateescape";
|
||||
return L"surrogateescape";
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static _PyInitError
|
||||
config_get_locale_encoding(char **locale_encoding)
|
||||
config_get_locale_encoding(wchar_t **locale_encoding)
|
||||
{
|
||||
#ifdef MS_WINDOWS
|
||||
char encoding[20];
|
||||
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
|
||||
return _PyCoreConfig_DecodeLocale(locale_encoding, encoding);
|
||||
#elif defined(_Py_FORCE_UTF8_LOCALE)
|
||||
const char *encoding = "UTF-8";
|
||||
return _PyCoreConfig_SetString(locale_encoding, L"utf-8");
|
||||
#else
|
||||
const char *encoding = nl_langinfo(CODESET);
|
||||
if (!encoding || encoding[0] == '\0') {
|
||||
return _Py_INIT_ERR("failed to get the locale encoding: "
|
||||
"nl_langinfo(CODESET) failed");
|
||||
}
|
||||
/* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
|
||||
return CONFIG_DECODE_LOCALE(locale_encoding, encoding,
|
||||
"nl_langinfo(CODESET)");
|
||||
#endif
|
||||
|
||||
assert(*locale_encoding == NULL);
|
||||
return _PyCoreConfig_SetString(locale_encoding, encoding);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1337,16 +1299,18 @@ config_init_stdio_encoding(_PyCoreConfig *config,
|
|||
/* If Py_SetStandardStreamEncoding() have been called, use these
|
||||
parameters. */
|
||||
if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
|
||||
err = _PyCoreConfig_SetString(&config->stdio_encoding,
|
||||
_Py_StandardStreamEncoding);
|
||||
err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
|
||||
_Py_StandardStreamEncoding,
|
||||
"_Py_StandardStreamEncoding");
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) {
|
||||
err = _PyCoreConfig_SetString(&config->stdio_errors,
|
||||
_Py_StandardStreamErrors);
|
||||
err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
|
||||
_Py_StandardStreamErrors,
|
||||
"_Py_StandardStreamErrors");
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -1359,11 +1323,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
|
|||
/* PYTHONIOENCODING environment variable */
|
||||
const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING");
|
||||
if (opt) {
|
||||
/* _PyCoreConfig_SetString() requires dest to be initialized to NULL */
|
||||
char *pythonioencoding = NULL;
|
||||
err = _PyCoreConfig_SetString(&pythonioencoding, opt);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
char *pythonioencoding = _PyMem_RawStrdup(opt);
|
||||
if (pythonioencoding == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
|
||||
char *errors = strchr(pythonioencoding, ':');
|
||||
|
@ -1378,8 +1340,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
|
|||
/* Does PYTHONIOENCODING contain an encoding? */
|
||||
if (pythonioencoding[0]) {
|
||||
if (config->stdio_encoding == NULL) {
|
||||
err = _PyCoreConfig_SetString(&config->stdio_encoding,
|
||||
pythonioencoding);
|
||||
err = CONFIG_DECODE_LOCALE(&config->stdio_encoding,
|
||||
pythonioencoding,
|
||||
"PYTHONIOENCODING environment variable");
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
PyMem_RawFree(pythonioencoding);
|
||||
return err;
|
||||
|
@ -1396,7 +1359,9 @@ config_init_stdio_encoding(_PyCoreConfig *config,
|
|||
}
|
||||
|
||||
if (config->stdio_errors == NULL && errors != NULL) {
|
||||
err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
|
||||
err = CONFIG_DECODE_LOCALE(&config->stdio_errors,
|
||||
errors,
|
||||
"PYTHONIOENCODING environment variable");
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
PyMem_RawFree(pythonioencoding);
|
||||
return err;
|
||||
|
@ -1409,15 +1374,14 @@ config_init_stdio_encoding(_PyCoreConfig *config,
|
|||
/* UTF-8 Mode uses UTF-8/surrogateescape */
|
||||
if (preconfig->utf8_mode) {
|
||||
if (config->stdio_encoding == NULL) {
|
||||
err = _PyCoreConfig_SetString(&config->stdio_encoding,
|
||||
"utf-8");
|
||||
err = _PyCoreConfig_SetString(&config->stdio_encoding, L"utf-8");
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
if (config->stdio_errors == NULL) {
|
||||
err = _PyCoreConfig_SetString(&config->stdio_errors,
|
||||
"surrogateescape");
|
||||
L"surrogateescape");
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -1432,7 +1396,7 @@ config_init_stdio_encoding(_PyCoreConfig *config,
|
|||
}
|
||||
}
|
||||
if (config->stdio_errors == NULL) {
|
||||
const char *errors = config_get_stdio_errors(config);
|
||||
const wchar_t *errors = config_get_stdio_errors(config);
|
||||
assert(errors != NULL);
|
||||
|
||||
err = _PyCoreConfig_SetString(&config->stdio_errors, errors);
|
||||
|
@ -1452,33 +1416,32 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
|
|||
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
#ifdef _Py_FORCE_UTF8_FS_ENCODING
|
||||
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
|
||||
"utf-8");
|
||||
err = _PyCoreConfig_SetString(&config->filesystem_encoding, L"utf-8");
|
||||
#else
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
if (preconfig->legacy_windows_fs_encoding) {
|
||||
/* Legacy Windows filesystem encoding: mbcs/replace */
|
||||
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
|
||||
"mbcs");
|
||||
L"mbcs");
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (preconfig->utf8_mode) {
|
||||
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
|
||||
"utf-8");
|
||||
L"utf-8");
|
||||
}
|
||||
#ifndef MS_WINDOWS
|
||||
else if (_Py_GetForceASCII()) {
|
||||
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
|
||||
"ascii");
|
||||
L"ascii");
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
#ifdef MS_WINDOWS
|
||||
/* Windows defaults to utf-8/surrogatepass (PEP 529). */
|
||||
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
|
||||
"utf-8");
|
||||
L"utf-8");
|
||||
#else
|
||||
err = config_get_locale_encoding(&config->filesystem_encoding);
|
||||
#endif
|
||||
|
@ -1491,16 +1454,16 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
|
|||
}
|
||||
|
||||
if (config->filesystem_errors == NULL) {
|
||||
const char *errors;
|
||||
const wchar_t *errors;
|
||||
#ifdef MS_WINDOWS
|
||||
if (preconfig->legacy_windows_fs_encoding) {
|
||||
errors = "replace";
|
||||
errors = L"replace";
|
||||
}
|
||||
else {
|
||||
errors = "surrogatepass";
|
||||
errors = L"surrogatepass";
|
||||
}
|
||||
#else
|
||||
errors = "surrogateescape";
|
||||
errors = L"surrogateescape";
|
||||
#endif
|
||||
err = _PyCoreConfig_SetString(&config->filesystem_errors, errors);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
|
@ -1745,8 +1708,8 @@ config_parse_cmdline(_PyCoreConfig *config, _PyPreCmdline *precmdline,
|
|||
|| wcscmp(_PyOS_optarg, L"never") == 0
|
||||
|| wcscmp(_PyOS_optarg, L"default") == 0)
|
||||
{
|
||||
err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode,
|
||||
_PyOS_optarg);
|
||||
err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode,
|
||||
_PyOS_optarg);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -2119,7 +2082,7 @@ config_read_cmdline(_PyCoreConfig *config, _PyPreCmdline *precmdline)
|
|||
}
|
||||
|
||||
if (config->check_hash_pycs_mode == NULL) {
|
||||
err = _PyCoreConfig_SetWideString(&config->check_hash_pycs_mode, L"default");
|
||||
err = _PyCoreConfig_SetString(&config->check_hash_pycs_mode, L"default");
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
goto done;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,10 @@
|
|||
/* --- File system encoding/errors -------------------------------- */
|
||||
|
||||
/* The filesystem encoding is chosen by config_init_fs_encoding(),
|
||||
see also initfsencoding(). */
|
||||
see also initfsencoding().
|
||||
|
||||
Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
|
||||
are encoded to UTF-8. */
|
||||
const char *Py_FileSystemDefaultEncoding = NULL;
|
||||
int Py_HasFileSystemDefaultEncoding = 0;
|
||||
const char *Py_FileSystemDefaultEncodeErrors = NULL;
|
||||
|
|
|
@ -1668,7 +1668,7 @@ is_valid_fd(int fd)
|
|||
static PyObject*
|
||||
create_stdio(const _PyCoreConfig *config, PyObject* io,
|
||||
int fd, int write_mode, const char* name,
|
||||
const char* encoding, const char* errors)
|
||||
const wchar_t* encoding, const wchar_t* errors)
|
||||
{
|
||||
PyObject *buf = NULL, *stream = NULL, *text = NULL, *raw = NULL, *res;
|
||||
const char* mode;
|
||||
|
@ -1718,7 +1718,7 @@ create_stdio(const _PyCoreConfig *config, PyObject* io,
|
|||
#ifdef MS_WINDOWS
|
||||
/* Windows console IO is always UTF-8 encoded */
|
||||
if (PyWindowsConsoleIO_Check(raw))
|
||||
encoding = "utf-8";
|
||||
encoding = L"utf-8";
|
||||
#endif
|
||||
|
||||
text = PyUnicode_FromString(name);
|
||||
|
@ -1754,10 +1754,25 @@ create_stdio(const _PyCoreConfig *config, PyObject* io,
|
|||
newline = "\n";
|
||||
#endif
|
||||
|
||||
stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OsssOO",
|
||||
buf, encoding, errors,
|
||||
PyObject *encoding_str = PyUnicode_FromWideChar(encoding, -1);
|
||||
if (encoding_str == NULL) {
|
||||
Py_CLEAR(buf);
|
||||
goto error;
|
||||
}
|
||||
|
||||
PyObject *errors_str = PyUnicode_FromWideChar(errors, -1);
|
||||
if (errors_str == NULL) {
|
||||
Py_CLEAR(buf);
|
||||
Py_CLEAR(encoding_str);
|
||||
goto error;
|
||||
}
|
||||
|
||||
stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OOOsOO",
|
||||
buf, encoding_str, errors_str,
|
||||
newline, line_buffering, write_through);
|
||||
Py_CLEAR(buf);
|
||||
Py_CLEAR(encoding_str);
|
||||
Py_CLEAR(errors_str);
|
||||
if (stream == NULL)
|
||||
goto error;
|
||||
|
||||
|
@ -1874,7 +1889,7 @@ init_sys_streams(PyInterpreterState *interp)
|
|||
fd = fileno(stderr);
|
||||
std = create_stdio(config, iomod, fd, 1, "<stderr>",
|
||||
config->stdio_encoding,
|
||||
"backslashreplace");
|
||||
L"backslashreplace");
|
||||
if (std == NULL)
|
||||
goto error;
|
||||
|
||||
|
|
|
@ -424,7 +424,7 @@ sys_getfilesystemencoding_impl(PyObject *module)
|
|||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
return PyUnicode_FromString(config->filesystem_encoding);
|
||||
return PyUnicode_FromWideChar(config->filesystem_encoding, -1);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
@ -439,7 +439,7 @@ sys_getfilesystemencodeerrors_impl(PyObject *module)
|
|||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
return PyUnicode_FromString(config->filesystem_errors);
|
||||
return PyUnicode_FromWideChar(config->filesystem_errors, -1);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
@ -1211,30 +1211,9 @@ static PyObject *
|
|||
sys__enablelegacywindowsfsencoding_impl(PyObject *module)
|
||||
/*[clinic end generated code: output=f5c3855b45e24fe9 input=2bfa931a20704492]*/
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
/* Set the filesystem encoding to mbcs/replace (PEP 529) */
|
||||
char *encoding = _PyMem_RawStrdup("mbcs");
|
||||
char *errors = _PyMem_RawStrdup("replace");
|
||||
if (encoding == NULL || errors == NULL) {
|
||||
PyMem_Free(encoding);
|
||||
PyMem_Free(errors);
|
||||
PyErr_NoMemory();
|
||||
if (_PyUnicode_EnableLegacyWindowsFSEncoding() < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyMem_RawFree(config->filesystem_encoding);
|
||||
config->filesystem_encoding = encoding;
|
||||
PyMem_RawFree(config->filesystem_errors);
|
||||
config->filesystem_errors = errors;
|
||||
|
||||
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
|
||||
config->filesystem_errors) < 0) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue