Add PyInterpreterState.fs_codec.utf8 (GH-18367)
Add a fast-path for UTF-8 encoding in PyUnicode_EncodeFSDefault() and PyUnicode_DecodeFSDefaultAndSize(). Add _PyUnicode_FiniEncodings() helper function for _PyUnicode_Fini().
This commit is contained in:
parent
0e4e735d06
commit
bf305cc6f0
|
@ -102,6 +102,7 @@ struct _is {
|
|||
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
|
||||
struct {
|
||||
char *encoding; /* Filesystem encoding (encoded to UTF-8) */
|
||||
int utf8; /* encoding=="utf-8"? */
|
||||
char *errors; /* Filesystem errors (encoded to UTF-8) */
|
||||
_Py_error_handler error_handler;
|
||||
} fs_codec;
|
||||
|
|
|
@ -3615,39 +3615,32 @@ PyObject *
|
|||
PyUnicode_EncodeFSDefault(PyObject *unicode)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
#ifdef _Py_FORCE_UTF8_FS_ENCODING
|
||||
if (interp->fs_codec.encoding) {
|
||||
if (interp->fs_codec.utf8) {
|
||||
return unicode_encode_utf8(unicode,
|
||||
interp->fs_codec.error_handler,
|
||||
interp->fs_codec.errors);
|
||||
}
|
||||
else {
|
||||
const wchar_t *filesystem_errors = interp->config.filesystem_errors;
|
||||
_Py_error_handler errors;
|
||||
errors = get_error_handler_wide(filesystem_errors);
|
||||
assert(errors != _Py_ERROR_UNKNOWN);
|
||||
return unicode_encode_utf8(unicode, errors, NULL);
|
||||
}
|
||||
#else
|
||||
/* Bootstrap check: if the filesystem codec is implemented in Python, we
|
||||
cannot use it to encode and decode filenames before it is loaded. Load
|
||||
the Python codec requires to encode at least its own filename. Use the C
|
||||
implementation of the locale codec until the codec registry is
|
||||
initialized and the Python codec is loaded.
|
||||
See _PyUnicode_InitEncodings(). */
|
||||
if (interp->fs_codec.encoding) {
|
||||
#ifndef _Py_FORCE_UTF8_FS_ENCODING
|
||||
else if (interp->fs_codec.encoding) {
|
||||
return PyUnicode_AsEncodedString(unicode,
|
||||
interp->fs_codec.encoding,
|
||||
interp->fs_codec.errors);
|
||||
}
|
||||
else {
|
||||
const wchar_t *filesystem_errors = interp->config.filesystem_errors;
|
||||
_Py_error_handler errors;
|
||||
errors = get_error_handler_wide(filesystem_errors);
|
||||
assert(errors != _Py_ERROR_UNKNOWN);
|
||||
return unicode_encode_locale(unicode, errors, 0);
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
/* Before _PyUnicode_InitEncodings() is called, the Python codec
|
||||
machinery is not ready and so cannot be used:
|
||||
use wcstombs() in this case. */
|
||||
const wchar_t *filesystem_errors = interp->config.filesystem_errors;
|
||||
assert(filesystem_errors != NULL);
|
||||
_Py_error_handler errors = get_error_handler_wide(filesystem_errors);
|
||||
assert(errors != _Py_ERROR_UNKNOWN);
|
||||
#ifdef _Py_FORCE_UTF8_FS_ENCODING
|
||||
return unicode_encode_utf8(unicode, errors, NULL);
|
||||
#else
|
||||
return unicode_encode_locale(unicode, errors, 0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *
|
||||
|
@ -3857,39 +3850,33 @@ PyObject*
|
|||
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
#ifdef _Py_FORCE_UTF8_FS_ENCODING
|
||||
if (interp->fs_codec.encoding) {
|
||||
if (interp->fs_codec.utf8) {
|
||||
return unicode_decode_utf8(s, size,
|
||||
interp->fs_codec.error_handler,
|
||||
interp->fs_codec.errors,
|
||||
NULL);
|
||||
}
|
||||
else {
|
||||
const wchar_t *filesystem_errors = interp->config.filesystem_errors;
|
||||
_Py_error_handler errors;
|
||||
errors = get_error_handler_wide(filesystem_errors);
|
||||
assert(errors != _Py_ERROR_UNKNOWN);
|
||||
return unicode_decode_utf8(s, size, errors, NULL, NULL);
|
||||
}
|
||||
#else
|
||||
/* Bootstrap check: if the filesystem codec is implemented in Python, we
|
||||
cannot use it to encode and decode filenames before it is loaded. Load
|
||||
the Python codec requires to encode at least its own filename. Use the C
|
||||
implementation of the locale codec until the codec registry is
|
||||
initialized and the Python codec is loaded.
|
||||
See _PyUnicode_InitEncodings(). */
|
||||
if (interp->fs_codec.encoding) {
|
||||
#ifndef _Py_FORCE_UTF8_FS_ENCODING
|
||||
else if (interp->fs_codec.encoding) {
|
||||
return PyUnicode_Decode(s, size,
|
||||
interp->fs_codec.encoding,
|
||||
interp->fs_codec.errors);
|
||||
}
|
||||
else {
|
||||
const wchar_t *filesystem_errors = interp->config.filesystem_errors;
|
||||
_Py_error_handler errors;
|
||||
errors = get_error_handler_wide(filesystem_errors);
|
||||
return unicode_decode_locale(s, size, errors, 0);
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
/* Before _PyUnicode_InitEncodings() is called, the Python codec
|
||||
machinery is not ready and so cannot be used:
|
||||
use mbstowcs() in this case. */
|
||||
const wchar_t *filesystem_errors = interp->config.filesystem_errors;
|
||||
assert(filesystem_errors != NULL);
|
||||
_Py_error_handler errors = get_error_handler_wide(filesystem_errors);
|
||||
assert(errors != _Py_ERROR_UNKNOWN);
|
||||
#ifdef _Py_FORCE_UTF8_FS_ENCODING
|
||||
return unicode_decode_utf8(s, size, errors, NULL, NULL);
|
||||
#else
|
||||
return unicode_decode_locale(s, size, errors, 0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -15849,10 +15836,16 @@ init_fs_codec(PyInterpreterState *interp)
|
|||
|
||||
PyMem_RawFree(interp->fs_codec.encoding);
|
||||
interp->fs_codec.encoding = encoding;
|
||||
/* encoding has been normalized by init_fs_encoding() */
|
||||
interp->fs_codec.utf8 = (strcmp(encoding, "utf-8") == 0);
|
||||
PyMem_RawFree(interp->fs_codec.errors);
|
||||
interp->fs_codec.errors = errors;
|
||||
interp->fs_codec.error_handler = error_handler;
|
||||
|
||||
#ifdef _Py_FORCE_UTF8_FS_ENCODING
|
||||
assert(interp->fs_codec.utf8 == 1);
|
||||
#endif
|
||||
|
||||
/* At this point, PyUnicode_EncodeFSDefault() and
|
||||
PyUnicode_DecodeFSDefault() can now use the Python codec rather than
|
||||
the C implementation of the filesystem encoding. */
|
||||
|
@ -15902,6 +15895,19 @@ _PyUnicode_InitEncodings(PyThreadState *tstate)
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
_PyUnicode_FiniEncodings(PyThreadState *tstate)
|
||||
{
|
||||
PyInterpreterState *interp = tstate->interp;
|
||||
PyMem_RawFree(interp->fs_codec.encoding);
|
||||
interp->fs_codec.encoding = NULL;
|
||||
interp->fs_codec.utf8 = 0;
|
||||
PyMem_RawFree(interp->fs_codec.errors);
|
||||
interp->fs_codec.errors = NULL;
|
||||
interp->fs_codec.error_handler = _Py_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
int
|
||||
_PyUnicode_EnableLegacyWindowsFSEncoding(void)
|
||||
|
@ -15954,12 +15960,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
|
|||
_PyUnicode_ClearStaticStrings();
|
||||
}
|
||||
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
PyMem_RawFree(interp->fs_codec.encoding);
|
||||
interp->fs_codec.encoding = NULL;
|
||||
PyMem_RawFree(interp->fs_codec.errors);
|
||||
interp->fs_codec.errors = NULL;
|
||||
interp->config.filesystem_errors = (wchar_t *)_Py_ERROR_UNKNOWN;
|
||||
_PyUnicode_FiniEncodings(tstate);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue