mirror of https://github.com/python/cpython
Issue #18395: Rename ``_Py_char2wchar()`` to :c:func:`Py_DecodeLocale`, rename
``_Py_wchar2char()`` to :c:func:`Py_EncodeLocale`, and document these functions.
This commit is contained in:
parent
c6f8c0a1de
commit
f6a271ae98
|
@ -47,6 +47,60 @@ Operating System Utilities
|
|||
not call those functions directly! :c:type:`PyOS_sighandler_t` is a typedef
|
||||
alias for :c:type:`void (\*)(int)`.
|
||||
|
||||
.. c:function:: wchar_t* Py_DecodeLocale(const char* arg, size_t *size)
|
||||
|
||||
Decode a byte string from the locale encoding with the :ref:`surrogateescape
|
||||
error handler <surrogateescape>`: undecodable bytes are decoded as
|
||||
characters in range U+DC80..U+DCFF. If a byte sequence can be decoded as a
|
||||
surrogate character, escape the bytes using the surrogateescape error
|
||||
handler instead of decoding them.
|
||||
|
||||
Return a pointer to a newly allocated wide character string, use
|
||||
:c:func:`PyMem_RawFree` to free the memory. If size is not ``NULL``, write
|
||||
the number of wide characters excluding the null character into ``*size``
|
||||
|
||||
Return ``NULL`` on decoding error or memory allocation error. If *size* is
|
||||
not ``NULL``, ``*size`` is set to ``(size_t)-1`` on memory error or set to
|
||||
``(size_t)-2`` on decoding error.
|
||||
|
||||
Decoding errors should never happen, unless there is a bug in the C
|
||||
library.
|
||||
|
||||
Use the :c:func:`Py_EncodeLocale` function to encode the character string
|
||||
back to a byte string.
|
||||
|
||||
.. seealso::
|
||||
|
||||
The :c:func:`PyUnicode_DecodeFSDefaultAndSize` and
|
||||
:c:func:`PyUnicode_DecodeLocaleAndSize` functions.
|
||||
|
||||
.. versionadded:: 3.5
|
||||
|
||||
|
||||
.. c:function:: char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
|
||||
|
||||
Encode a wide character string to the locale encoding with the
|
||||
:ref:`surrogateescape error handler <surrogateescape>`: surrogate characters
|
||||
in the range U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
|
||||
|
||||
Return a pointer to a newly allocated byte string, use :c:func:`PyMem_Free`
|
||||
to free the memory. Return ``NULL`` on encoding error or memory allocation
|
||||
error
|
||||
|
||||
If error_pos is not ``NULL``, ``*error_pos`` is set to the index of the
|
||||
invalid character on encoding error, or set to ``(size_t)-1`` otherwise.
|
||||
|
||||
Use the :c:func:`Py_DecodeLocale` function to decode the bytes string back
|
||||
to a wide character string.
|
||||
|
||||
.. seealso::
|
||||
|
||||
The :c:func:`PyUnicode_EncodeFSDefault` and
|
||||
:c:func:`PyUnicode_EncodeLocale` functions.
|
||||
|
||||
.. versionadded:: 3.5
|
||||
|
||||
|
||||
.. _systemfunctions:
|
||||
|
||||
System Functions
|
||||
|
|
|
@ -758,12 +758,14 @@ system.
|
|||
*errors* is ``NULL``. *str* must end with a null character but
|
||||
cannot contain embedded null characters.
|
||||
|
||||
.. seealso::
|
||||
|
||||
Use :c:func:`PyUnicode_DecodeFSDefaultAndSize` to decode a string from
|
||||
:c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
|
||||
Python startup).
|
||||
|
||||
.. seealso::
|
||||
|
||||
The :c:func:`Py_DecodeLocale` function.
|
||||
|
||||
.. versionadded:: 3.3
|
||||
|
||||
|
||||
|
@ -783,12 +785,14 @@ system.
|
|||
*errors* is ``NULL``. Return a :class:`bytes` object. *str* cannot
|
||||
contain embedded null characters.
|
||||
|
||||
.. seealso::
|
||||
|
||||
Use :c:func:`PyUnicode_EncodeFSDefault` to encode a string to
|
||||
:c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
|
||||
Python startup).
|
||||
|
||||
.. seealso::
|
||||
|
||||
The :c:func:`Py_EncodeLocale` function.
|
||||
|
||||
.. versionadded:: 3.3
|
||||
|
||||
|
||||
|
@ -832,12 +836,14 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
|
|||
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
|
||||
locale encoding.
|
||||
|
||||
:c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
|
||||
locale encoding and cannot be modified later. If you need to decode a string
|
||||
from the current locale encoding, use
|
||||
:c:func:`PyUnicode_DecodeLocaleAndSize`.
|
||||
|
||||
.. seealso::
|
||||
|
||||
:c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
|
||||
locale encoding and cannot be modified later. If you need to decode a
|
||||
string from the current locale encoding, use
|
||||
:c:func:`PyUnicode_DecodeLocaleAndSize`.
|
||||
The :c:func:`Py_DecodeLocale` function.
|
||||
|
||||
.. versionchanged:: 3.2
|
||||
Use ``"strict"`` error handler on Windows.
|
||||
|
@ -867,12 +873,13 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
|
|||
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
|
||||
locale encoding.
|
||||
|
||||
:c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
|
||||
locale encoding and cannot be modified later. If you need to encode a string
|
||||
to the current locale encoding, use :c:func:`PyUnicode_EncodeLocale`.
|
||||
|
||||
.. seealso::
|
||||
|
||||
:c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
|
||||
locale encoding and cannot be modified later. If you need to encode a
|
||||
string to the current locale encoding, use
|
||||
:c:func:`PyUnicode_EncodeLocale`.
|
||||
The :c:func:`Py_EncodeLocale` function.
|
||||
|
||||
.. versionadded:: 3.2
|
||||
|
||||
|
|
|
@ -318,6 +318,7 @@ and writing to platform dependent files:
|
|||
encodings.
|
||||
|
||||
|
||||
.. _surrogateescape:
|
||||
.. _codec-base-classes:
|
||||
|
||||
Codec Base Classes
|
||||
|
|
|
@ -78,9 +78,10 @@ uses the file system encoding to perform this conversion (see
|
|||
|
||||
.. versionchanged:: 3.1
|
||||
On some systems, conversion using the file system encoding may fail. In this
|
||||
case, Python uses the ``surrogateescape`` encoding error handler, which means
|
||||
that undecodable bytes are replaced by a Unicode character U+DCxx on
|
||||
decoding, and these are again translated to the original byte on encoding.
|
||||
case, Python uses the :ref:`surrogateescape encoding error handler
|
||||
<surrogateescape>`, which means that undecodable bytes are replaced by a
|
||||
Unicode character U+DCxx on decoding, and these are again translated to the
|
||||
original byte on encoding.
|
||||
|
||||
|
||||
The file system encoding must guarantee to successfully decode all bytes
|
||||
|
|
|
@ -7,11 +7,11 @@ extern "C" {
|
|||
|
||||
PyAPI_FUNC(PyObject *) _Py_device_encoding(int);
|
||||
|
||||
PyAPI_FUNC(wchar_t *) _Py_char2wchar(
|
||||
PyAPI_FUNC(wchar_t *) Py_DecodeLocale(
|
||||
const char *arg,
|
||||
size_t *size);
|
||||
|
||||
PyAPI_FUNC(char*) _Py_wchar2char(
|
||||
PyAPI_FUNC(char*) Py_EncodeLocale(
|
||||
const wchar_t *text,
|
||||
size_t *error_pos);
|
||||
|
||||
|
|
|
@ -10,6 +10,10 @@ Release date: TBA
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #18395: Rename ``_Py_char2wchar()`` to :c:func:`Py_DecodeLocale`,
|
||||
rename ``_Py_wchar2char()`` to :c:func:`Py_EncodeLocale`, and document
|
||||
these functions.
|
||||
|
||||
- Issue #20179: Apply Argument Clinic to bytes and bytearray.
|
||||
Patch by Tal Einat.
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ PyObject *PyErr_SetFromErrnoWithFilename(PyObject *exc, const char *filename)
|
|||
}
|
||||
|
||||
/* Python/fileutils.c */
|
||||
wchar_t *_Py_char2wchar(const char* arg, size_t *size)
|
||||
wchar_t *Py_DecodeLocale(const char* arg, size_t *size)
|
||||
{
|
||||
wchar_t *w;
|
||||
__coverity_tainted_data_sink__(arg);
|
||||
|
|
|
@ -336,7 +336,7 @@ search_for_prefix(wchar_t *argv0_path, wchar_t *home, wchar_t *_prefix,
|
|||
joinpath(prefix, L"Modules/Setup");
|
||||
if (isfile(prefix)) {
|
||||
/* Check VPATH to see if argv0_path is in the build directory. */
|
||||
vpath = _Py_char2wchar(VPATH, NULL);
|
||||
vpath = Py_DecodeLocale(VPATH, NULL);
|
||||
if (vpath != NULL) {
|
||||
wcsncpy(prefix, argv0_path, MAXPATHLEN);
|
||||
prefix[MAXPATHLEN] = L'\0';
|
||||
|
@ -491,10 +491,10 @@ calculate_path(void)
|
|||
wchar_t *_pythonpath, *_prefix, *_exec_prefix;
|
||||
wchar_t *lib_python;
|
||||
|
||||
_pythonpath = _Py_char2wchar(PYTHONPATH, NULL);
|
||||
_prefix = _Py_char2wchar(PREFIX, NULL);
|
||||
_exec_prefix = _Py_char2wchar(EXEC_PREFIX, NULL);
|
||||
lib_python = _Py_char2wchar("lib/python" VERSION, NULL);
|
||||
_pythonpath = Py_DecodeLocale(PYTHONPATH, NULL);
|
||||
_prefix = Py_DecodeLocale(PREFIX, NULL);
|
||||
_exec_prefix = Py_DecodeLocale(EXEC_PREFIX, NULL);
|
||||
lib_python = Py_DecodeLocale("lib/python" VERSION, NULL);
|
||||
|
||||
if (!_pythonpath || !_prefix || !_exec_prefix || !lib_python) {
|
||||
Py_FatalError(
|
||||
|
@ -503,7 +503,7 @@ calculate_path(void)
|
|||
}
|
||||
|
||||
if (_path) {
|
||||
path_buffer = _Py_char2wchar(_path, NULL);
|
||||
path_buffer = Py_DecodeLocale(_path, NULL);
|
||||
path = path_buffer;
|
||||
}
|
||||
|
||||
|
@ -584,7 +584,7 @@ calculate_path(void)
|
|||
** be running the interpreter in the build directory, so we use the
|
||||
** build-directory-specific logic to find Lib and such.
|
||||
*/
|
||||
wchar_t* wbuf = _Py_char2wchar(modPath, NULL);
|
||||
wchar_t* wbuf = Py_DecodeLocale(modPath, NULL);
|
||||
if (wbuf == NULL) {
|
||||
Py_FatalError("Cannot decode framework location");
|
||||
}
|
||||
|
@ -709,7 +709,7 @@ calculate_path(void)
|
|||
|
||||
if (_rtpypath && _rtpypath[0] != '\0') {
|
||||
size_t rtpypath_len;
|
||||
rtpypath = _Py_char2wchar(_rtpypath, &rtpypath_len);
|
||||
rtpypath = Py_DecodeLocale(_rtpypath, &rtpypath_len);
|
||||
if (rtpypath != NULL)
|
||||
bufsz += rtpypath_len + 1;
|
||||
}
|
||||
|
|
|
@ -647,7 +647,7 @@ Py_Main(int argc, wchar_t **argv)
|
|||
/* Used by Mac/Tools/pythonw.c to forward
|
||||
* the argv0 of the stub executable
|
||||
*/
|
||||
wchar_t* wbuf = _Py_char2wchar(pyvenv_launcher, NULL);
|
||||
wchar_t* wbuf = Py_DecodeLocale(pyvenv_launcher, NULL);
|
||||
|
||||
if (wbuf == NULL) {
|
||||
Py_FatalError("Cannot decode __PYVENV_LAUNCHER__");
|
||||
|
@ -730,7 +730,7 @@ Py_Main(int argc, wchar_t **argv)
|
|||
char *cfilename_buffer;
|
||||
const char *cfilename;
|
||||
int err = errno;
|
||||
cfilename_buffer = _Py_wchar2char(filename, NULL);
|
||||
cfilename_buffer = Py_EncodeLocale(filename, NULL);
|
||||
if (cfilename_buffer != NULL)
|
||||
cfilename = cfilename_buffer;
|
||||
else
|
||||
|
|
|
@ -3255,7 +3255,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
|
|||
/* "surrogateescape" error handler */
|
||||
char *str;
|
||||
|
||||
str = _Py_wchar2char(wstr, &error_pos);
|
||||
str = Py_EncodeLocale(wstr, &error_pos);
|
||||
if (str == NULL) {
|
||||
if (error_pos == (size_t)-1) {
|
||||
PyErr_NoMemory();
|
||||
|
@ -3308,7 +3308,7 @@ encode_error:
|
|||
|
||||
if (errmsg != NULL) {
|
||||
size_t errlen;
|
||||
wstr = _Py_char2wchar(errmsg, &errlen);
|
||||
wstr = Py_DecodeLocale(errmsg, &errlen);
|
||||
if (wstr != NULL) {
|
||||
reason = PyUnicode_FromWideChar(wstr, errlen);
|
||||
PyMem_RawFree(wstr);
|
||||
|
@ -3526,7 +3526,7 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
|
|||
|
||||
if (surrogateescape) {
|
||||
/* "surrogateescape" error handler */
|
||||
wstr = _Py_char2wchar(str, &wlen);
|
||||
wstr = Py_DecodeLocale(str, &wlen);
|
||||
if (wstr == NULL) {
|
||||
if (wlen == (size_t)-1)
|
||||
PyErr_NoMemory();
|
||||
|
@ -3581,7 +3581,7 @@ decode_error:
|
|||
error_pos = mbstowcs_errorpos(str, len);
|
||||
if (errmsg != NULL) {
|
||||
size_t errlen;
|
||||
wstr = _Py_char2wchar(errmsg, &errlen);
|
||||
wstr = Py_DecodeLocale(errmsg, &errlen);
|
||||
if (wstr != NULL) {
|
||||
reason = PyUnicode_FromWideChar(wstr, errlen);
|
||||
PyMem_RawFree(wstr);
|
||||
|
|
|
@ -52,7 +52,7 @@ main(int argc, char **argv)
|
|||
|
||||
setlocale(LC_ALL, "");
|
||||
for (i = 0; i < argc; i++) {
|
||||
argv_copy[i] = _Py_char2wchar(argv[i], NULL);
|
||||
argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
|
||||
if (!argv_copy[i]) {
|
||||
PyMem_RawFree(oldloc);
|
||||
fprintf(stderr, "Fatal Python error: "
|
||||
|
|
|
@ -82,11 +82,11 @@ extern int _Py_normalize_encoding(const char *, char *, size_t);
|
|||
|
||||
Values of force_ascii:
|
||||
|
||||
1: the workaround is used: _Py_wchar2char() uses
|
||||
encode_ascii_surrogateescape() and _Py_char2wchar() uses
|
||||
1: the workaround is used: Py_EncodeLocale() uses
|
||||
encode_ascii_surrogateescape() and Py_DecodeLocale() uses
|
||||
decode_ascii_surrogateescape()
|
||||
0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
|
||||
_Py_char2wchar() uses mbstowcs()
|
||||
0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
|
||||
Py_DecodeLocale() uses mbstowcs()
|
||||
-1: unknown, need to call check_force_ascii() to get the value
|
||||
*/
|
||||
static int force_ascii = -1;
|
||||
|
@ -241,24 +241,26 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)
|
|||
|
||||
|
||||
/* Decode a byte string from the locale encoding with the
|
||||
surrogateescape error handler (undecodable bytes are decoded as characters
|
||||
in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
|
||||
surrogateescape error handler: undecodable bytes are decoded as characters
|
||||
in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
|
||||
character, escape the bytes using the surrogateescape error handler instead
|
||||
of decoding them.
|
||||
|
||||
Use _Py_wchar2char() to encode the character string back to a byte string.
|
||||
Return a pointer to a newly allocated wide character string, use
|
||||
PyMem_RawFree() to free the memory. If size is not NULL, write the number of
|
||||
wide characters excluding the null character into *size
|
||||
|
||||
Return a pointer to a newly allocated wide character string (use
|
||||
PyMem_RawFree() to free the memory) and write the number of written wide
|
||||
characters excluding the null character into *size if size is not NULL, or
|
||||
NULL on error (decoding or memory allocation error). If size is not NULL,
|
||||
*size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
|
||||
error.
|
||||
Return NULL on decoding error or memory allocation error. If *size* is not
|
||||
NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
|
||||
decoding error.
|
||||
|
||||
Conversion errors should never happen, unless there is a bug in the C
|
||||
library. */
|
||||
Decoding errors should never happen, unless there is a bug in the C
|
||||
library.
|
||||
|
||||
Use the Py_EncodeLocale() function to encode the character string back to a
|
||||
byte string. */
|
||||
wchar_t*
|
||||
_Py_char2wchar(const char* arg, size_t *size)
|
||||
Py_DecodeLocale(const char* arg, size_t *size)
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
wchar_t *wstr;
|
||||
|
@ -389,19 +391,20 @@ oom:
|
|||
#endif /* __APPLE__ */
|
||||
}
|
||||
|
||||
/* Encode a (wide) character string to the locale encoding with the
|
||||
surrogateescape error handler (characters in range U+DC80..U+DCFF are
|
||||
converted to bytes 0x80..0xFF).
|
||||
/* Encode a wide character string to the locale encoding with the
|
||||
surrogateescape error handler: surrogate characters in the range
|
||||
U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
|
||||
|
||||
This function is the reverse of _Py_char2wchar().
|
||||
Return a pointer to a newly allocated byte string, use PyMem_Free() to free
|
||||
the memory. Return NULL on encoding or memory allocation error.
|
||||
|
||||
Return a pointer to a newly allocated byte string (use PyMem_Free() to free
|
||||
the memory), or NULL on encoding or memory allocation error.
|
||||
If error_pos is not NULL, *error_pos is set to the index of the invalid
|
||||
character on encoding error, or set to (size_t)-1 otherwise.
|
||||
|
||||
If error_pos is not NULL: *error_pos is the index of the invalid character
|
||||
on encoding error, or (size_t)-1 otherwise. */
|
||||
Use the Py_DecodeLocale() function to decode the bytes string back to a wide
|
||||
character string. */
|
||||
char*
|
||||
_Py_wchar2char(const wchar_t *text, size_t *error_pos)
|
||||
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
Py_ssize_t len;
|
||||
|
@ -520,7 +523,7 @@ _Py_wstat(const wchar_t* path, struct stat *buf)
|
|||
{
|
||||
int err;
|
||||
char *fname;
|
||||
fname = _Py_wchar2char(path, NULL);
|
||||
fname = Py_EncodeLocale(path, NULL);
|
||||
if (fname == NULL) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
|
@ -784,7 +787,7 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode)
|
|||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
cpath = _Py_wchar2char(path, NULL);
|
||||
cpath = Py_EncodeLocale(path, NULL);
|
||||
if (cpath == NULL)
|
||||
return NULL;
|
||||
f = fopen(cpath, cmode);
|
||||
|
@ -875,7 +878,7 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
|
|||
int res;
|
||||
size_t r1;
|
||||
|
||||
cpath = _Py_wchar2char(path, NULL);
|
||||
cpath = Py_EncodeLocale(path, NULL);
|
||||
if (cpath == NULL) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
|
@ -889,7 +892,7 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
|
|||
return -1;
|
||||
}
|
||||
cbuf[res] = '\0'; /* buf will be null terminated */
|
||||
wbuf = _Py_char2wchar(cbuf, &r1);
|
||||
wbuf = Py_DecodeLocale(cbuf, &r1);
|
||||
if (wbuf == NULL) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
|
@ -920,7 +923,7 @@ _Py_wrealpath(const wchar_t *path,
|
|||
wchar_t *wresolved_path;
|
||||
char *res;
|
||||
size_t r;
|
||||
cpath = _Py_wchar2char(path, NULL);
|
||||
cpath = Py_EncodeLocale(path, NULL);
|
||||
if (cpath == NULL) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
|
@ -930,7 +933,7 @@ _Py_wrealpath(const wchar_t *path,
|
|||
if (res == NULL)
|
||||
return NULL;
|
||||
|
||||
wresolved_path = _Py_char2wchar(cresolved_path, &r);
|
||||
wresolved_path = Py_DecodeLocale(cresolved_path, &r);
|
||||
if (wresolved_path == NULL) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
|
@ -963,7 +966,7 @@ _Py_wgetcwd(wchar_t *buf, size_t size)
|
|||
|
||||
if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
|
||||
return NULL;
|
||||
wname = _Py_char2wchar(fname, &len);
|
||||
wname = Py_DecodeLocale(fname, &len);
|
||||
if (wname == NULL)
|
||||
return NULL;
|
||||
if (size <= len) {
|
||||
|
|
|
@ -52,7 +52,7 @@ Py_FrozenMain(int argc, char **argv)
|
|||
|
||||
setlocale(LC_ALL, "");
|
||||
for (i = 0; i < argc; i++) {
|
||||
argv_copy[i] = _Py_char2wchar(argv[i], NULL);
|
||||
argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
|
||||
argv_copy2[i] = argv_copy[i];
|
||||
if (!argv_copy[i]) {
|
||||
fprintf(stderr, "Unable to decode the command line argument #%i\n",
|
||||
|
|
Loading…
Reference in New Issue