bpo-36775: Add _PyUnicode_InitEncodings() (GH-13057)
Move get_codec_name() and initfsencoding() from pylifecycle.c to unicodeobject.c. Rename also "init" functions in pylifecycle.c.
This commit is contained in:
parent
e251095a3f
commit
43fc3bb7cf
|
@ -16,10 +16,11 @@ PyAPI_DATA(int) _Py_UnhandledKeyboardInterrupt;
|
|||
|
||||
PyAPI_FUNC(int) _Py_UnixMain(int argc, char **argv);
|
||||
|
||||
PyAPI_FUNC(int) _Py_SetFileSystemEncoding(
|
||||
extern int _Py_SetFileSystemEncoding(
|
||||
const char *encoding,
|
||||
const char *errors);
|
||||
PyAPI_FUNC(void) _Py_ClearFileSystemEncoding(void);
|
||||
extern void _Py_ClearFileSystemEncoding(void);
|
||||
extern _PyInitError _PyUnicode_InitEncodings(PyInterpreterState *interp);
|
||||
|
||||
PyAPI_FUNC(void) _Py_ClearStandardStreamEncoding(void);
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|||
#include "Python.h"
|
||||
#include "pycore_fileutils.h"
|
||||
#include "pycore_object.h"
|
||||
#include "pycore_pylifecycle.h"
|
||||
#include "pycore_pystate.h"
|
||||
#include "ucnhash.h"
|
||||
#include "bytes_methods.h"
|
||||
|
@ -15574,6 +15575,102 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
|
|||
}
|
||||
|
||||
|
||||
static char*
|
||||
get_codec_name(const char *encoding)
|
||||
{
|
||||
PyObject *codec, *name_obj = NULL;
|
||||
|
||||
codec = _PyCodec_Lookup(encoding);
|
||||
if (!codec)
|
||||
goto error;
|
||||
|
||||
name_obj = PyObject_GetAttrString(codec, "name");
|
||||
Py_CLEAR(codec);
|
||||
if (!name_obj) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
const char *name_utf8 = PyUnicode_AsUTF8(name_obj);
|
||||
if (name_utf8 == NULL) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
char *name = _PyMem_RawStrdup(name_utf8);
|
||||
Py_DECREF(name_obj);
|
||||
if (name == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
return name;
|
||||
|
||||
error:
|
||||
Py_XDECREF(codec);
|
||||
Py_XDECREF(name_obj);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static _PyInitError
|
||||
init_stdio_encoding(PyInterpreterState *interp)
|
||||
{
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
char *codec_name = get_codec_name(config->stdio_encoding);
|
||||
if (codec_name == NULL) {
|
||||
return _Py_INIT_ERR("failed to get the Python codec name "
|
||||
"of the stdio encoding");
|
||||
}
|
||||
PyMem_RawFree(config->stdio_encoding);
|
||||
config->stdio_encoding = codec_name;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
|
||||
static _PyInitError
|
||||
init_fs_encoding(PyInterpreterState *interp)
|
||||
{
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
char *encoding = get_codec_name(config->filesystem_encoding);
|
||||
if (encoding == NULL) {
|
||||
/* Such error can only occurs in critical situations: no more
|
||||
memory, import a module of the standard library failed, etc. */
|
||||
return _Py_INIT_ERR("failed to get the Python codec "
|
||||
"of the filesystem encoding");
|
||||
}
|
||||
|
||||
/* Update the filesystem encoding to the normalized Python codec name.
|
||||
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
|
||||
(Python codec name). */
|
||||
PyMem_RawFree(config->filesystem_encoding);
|
||||
config->filesystem_encoding = encoding;
|
||||
|
||||
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
|
||||
global configuration variables. */
|
||||
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
|
||||
config->filesystem_errors) < 0) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
|
||||
/* PyUnicode can now use the Python codec rather than C implementation
|
||||
for the filesystem encoding */
|
||||
interp->fscodec_initialized = 1;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
|
||||
_PyInitError
|
||||
_PyUnicode_InitEncodings(PyInterpreterState *interp)
|
||||
{
|
||||
_PyInitError err = init_fs_encoding(interp);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
||||
return init_stdio_encoding(interp);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
_PyUnicode_Fini(void)
|
||||
{
|
||||
|
|
|
@ -59,10 +59,9 @@ extern grammar _PyParser_Grammar; /* From graminit.c */
|
|||
|
||||
/* Forward */
|
||||
static _PyInitError add_main_module(PyInterpreterState *interp);
|
||||
static _PyInitError initfsencoding(PyInterpreterState *interp);
|
||||
static _PyInitError initsite(void);
|
||||
static _PyInitError init_import_size(void);
|
||||
static _PyInitError init_sys_streams(PyInterpreterState *interp);
|
||||
static _PyInitError initsigs(void);
|
||||
static _PyInitError init_signals(void);
|
||||
static void call_py_exitfuncs(PyInterpreterState *);
|
||||
static void wait_for_thread_shutdown(void);
|
||||
static void call_ll_exitfuncs(_PyRuntimeState *runtime);
|
||||
|
@ -144,42 +143,8 @@ Py_IsInitialized(void)
|
|||
|
||||
*/
|
||||
|
||||
static char*
|
||||
get_codec_name(const char *encoding)
|
||||
{
|
||||
const char *name_utf8;
|
||||
char *name_str;
|
||||
PyObject *codec, *name = NULL;
|
||||
|
||||
codec = _PyCodec_Lookup(encoding);
|
||||
if (!codec)
|
||||
goto error;
|
||||
|
||||
name = _PyObject_GetAttrId(codec, &PyId_name);
|
||||
Py_CLEAR(codec);
|
||||
if (!name)
|
||||
goto error;
|
||||
|
||||
name_utf8 = PyUnicode_AsUTF8(name);
|
||||
if (name_utf8 == NULL)
|
||||
goto error;
|
||||
name_str = _PyMem_RawStrdup(name_utf8);
|
||||
Py_DECREF(name);
|
||||
if (name_str == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
return name_str;
|
||||
|
||||
error:
|
||||
Py_XDECREF(codec);
|
||||
Py_XDECREF(name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static _PyInitError
|
||||
initimport(PyInterpreterState *interp, PyObject *sysmod)
|
||||
init_importlib(PyInterpreterState *interp, PyObject *sysmod)
|
||||
{
|
||||
PyObject *importlib;
|
||||
PyObject *impmod;
|
||||
|
@ -229,7 +194,7 @@ initimport(PyInterpreterState *interp, PyObject *sysmod)
|
|||
}
|
||||
|
||||
static _PyInitError
|
||||
initexternalimport(PyInterpreterState *interp)
|
||||
init_importlib_external(PyInterpreterState *interp)
|
||||
{
|
||||
PyObject *value;
|
||||
value = PyObject_CallMethod(interp->importlib,
|
||||
|
@ -661,7 +626,7 @@ pycore_init_import_warnings(PyInterpreterState *interp, PyObject *sysmod)
|
|||
|
||||
/* This call sets up builtin and frozen import support */
|
||||
if (interp->core_config._install_importlib) {
|
||||
err = initimport(interp, sysmod);
|
||||
err = init_importlib(interp, sysmod);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -940,7 +905,7 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
|
|||
return _Py_INIT_ERR("can't finish initializing sys");
|
||||
}
|
||||
|
||||
_PyInitError err = initexternalimport(interp);
|
||||
_PyInitError err = init_importlib_external(interp);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -951,13 +916,13 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
|
|||
return err;
|
||||
}
|
||||
|
||||
err = initfsencoding(interp);
|
||||
err = _PyUnicode_InitEncodings(interp);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
||||
if (core_config->install_signal_handlers) {
|
||||
err = initsigs(); /* Signal handling stuff, including initintr() */
|
||||
err = init_signals();
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -992,7 +957,7 @@ _Py_InitializeMainInterpreter(_PyRuntimeState *runtime,
|
|||
runtime->initialized = 1;
|
||||
|
||||
if (core_config->site_import) {
|
||||
err = initsite(); /* Module site */
|
||||
err = init_import_size(); /* Module site */
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -1497,17 +1462,17 @@ new_interpreter(PyThreadState **tstate_p)
|
|||
return err;
|
||||
}
|
||||
|
||||
err = initimport(interp, sysmod);
|
||||
err = init_importlib(interp, sysmod);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = initexternalimport(interp);
|
||||
err = init_importlib_external(interp);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = initfsencoding(interp);
|
||||
err = _PyUnicode_InitEncodings(interp);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -1523,7 +1488,7 @@ new_interpreter(PyThreadState **tstate_p)
|
|||
}
|
||||
|
||||
if (core_config->site_import) {
|
||||
err = initsite();
|
||||
err = init_import_size();
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -1649,42 +1614,10 @@ add_main_module(PyInterpreterState *interp)
|
|||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
static _PyInitError
|
||||
initfsencoding(PyInterpreterState *interp)
|
||||
{
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
char *encoding = get_codec_name(config->filesystem_encoding);
|
||||
if (encoding == NULL) {
|
||||
/* Such error can only occurs in critical situations: no more
|
||||
memory, import a module of the standard library failed, etc. */
|
||||
return _Py_INIT_ERR("failed to get the Python codec "
|
||||
"of the filesystem encoding");
|
||||
}
|
||||
|
||||
/* Update the filesystem encoding to the normalized Python codec name.
|
||||
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
|
||||
(Python codec name). */
|
||||
PyMem_RawFree(config->filesystem_encoding);
|
||||
config->filesystem_encoding = encoding;
|
||||
|
||||
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
|
||||
global configuration variables. */
|
||||
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
|
||||
config->filesystem_errors) < 0) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
|
||||
/* PyUnicode can now use the Python codec rather than C implementation
|
||||
for the filesystem encoding */
|
||||
interp->fscodec_initialized = 1;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
/* Import the site module (not into __main__ though) */
|
||||
|
||||
static _PyInitError
|
||||
initsite(void)
|
||||
init_import_size(void)
|
||||
{
|
||||
PyObject *m;
|
||||
m = PyImport_ImportModule("site");
|
||||
|
@ -1880,14 +1813,6 @@ init_sys_streams(PyInterpreterState *interp)
|
|||
}
|
||||
#endif
|
||||
|
||||
char *codec_name = get_codec_name(config->stdio_encoding);
|
||||
if (codec_name == NULL) {
|
||||
return _Py_INIT_ERR("failed to get the Python codec name "
|
||||
"of the stdio encoding");
|
||||
}
|
||||
PyMem_RawFree(config->stdio_encoding);
|
||||
config->stdio_encoding = codec_name;
|
||||
|
||||
/* Hack to avoid a nasty recursion issue when Python is invoked
|
||||
in verbose mode: pre-import the Latin-1 and UTF-8 codecs */
|
||||
if ((m = PyImport_ImportModule("encodings.utf_8")) == NULL) {
|
||||
|
@ -2287,7 +2212,7 @@ Py_Exit(int sts)
|
|||
}
|
||||
|
||||
static _PyInitError
|
||||
initsigs(void)
|
||||
init_signals(void)
|
||||
{
|
||||
#ifdef SIGPIPE
|
||||
PyOS_setsig(SIGPIPE, SIG_IGN);
|
||||
|
|
Loading…
Reference in New Issue