From 2ad93821a69e6efac3b0efe1d205d6e5ef030791 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 3 Dec 2020 12:46:16 +0200 Subject: [PATCH] bpo-42431: Fix outdated bytes comments (GH-23458) Also move definitions of internal macros F_LJUST etc to private header. --- Include/bytesobject.h | 35 ++++++++++-------------------- Include/cpython/bytesobject.h | 2 +- Include/internal/pycore_format.h | 27 +++++++++++++++++++++++ Makefile.pre.in | 1 + Objects/bytearrayobject.c | 17 ++++++--------- Objects/bytesobject.c | 30 ++++++++----------------- Objects/clinic/bytearrayobject.c.h | 4 ++-- Objects/clinic/bytesobject.c.h | 4 ++-- Objects/unicodeobject.c | 1 + PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 +++ 11 files changed, 65 insertions(+), 60 deletions(-) create mode 100644 Include/internal/pycore_format.h diff --git a/Include/bytesobject.h b/Include/bytesobject.h index 5062d8d123a..39c241a2dcf 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -1,5 +1,5 @@ -/* Bytes (String) object interface */ +/* Bytes object interface */ #ifndef Py_BYTESOBJECT_H #define Py_BYTESOBJECT_H @@ -10,23 +10,20 @@ extern "C" { #include /* -Type PyBytesObject represents a character string. An extra zero byte is +Type PyBytesObject represents a byte string. An extra zero byte is reserved at the end to ensure it is zero-terminated, but a size is present so strings with null bytes in them can be represented. This is an immutable object type. -There are functions to create new string objects, to test -an object for string-ness, and to get the -string value. The latter function returns a null pointer +There are functions to create new bytes objects, to test +an object for bytes-ness, and to get the +byte string value. The latter function returns a null pointer if the object is not of the proper type. There is a variant that takes an explicit size as well as a variant that assumes a zero-terminated string. Note that none of the -functions should be applied to nil objects. +functions should be applied to NULL pointer. */ -/* Caching the hash (ob_shash) saves recalculation of a string's hash value. - This significantly speeds up dict lookups. */ - PyAPI_DATA(PyTypeObject) PyBytes_Type; PyAPI_DATA(PyTypeObject) PyBytesIter_Type; @@ -50,26 +47,16 @@ PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t, const char *); -/* Provides access to the internal data buffer and size of a string - object or the default encoded version of a Unicode object. Passing - NULL as *len parameter will force the string buffer to be - 0-terminated (passing a string with embedded NULL characters will +/* Provides access to the internal data buffer and size of a bytes object. + Passing NULL as len parameter will force the string buffer to be + 0-terminated (passing a string with embedded NUL characters will cause an exception). */ PyAPI_FUNC(int) PyBytes_AsStringAndSize( - PyObject *obj, /* string or Unicode object */ + PyObject *obj, /* bytes object */ char **s, /* pointer to buffer variable */ - Py_ssize_t *len /* pointer to length variable or NULL - (only possible for 0-terminated - strings) */ + Py_ssize_t *len /* pointer to length variable or NULL */ ); -/* Flags used by string formatting */ -#define F_LJUST (1<<0) -#define F_SIGN (1<<1) -#define F_BLANK (1<<2) -#define F_ALT (1<<3) -#define F_ZERO (1<<4) - #ifndef Py_LIMITED_API # define Py_CPYTHON_BYTESOBJECT_H # include "cpython/bytesobject.h" diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h index f284c5835df..6b3f55224fc 100644 --- a/Include/cpython/bytesobject.h +++ b/Include/cpython/bytesobject.h @@ -10,7 +10,7 @@ typedef struct { /* Invariants: * ob_sval contains space for 'ob_size+1' elements. * ob_sval[ob_size] == 0. - * ob_shash is the hash of the string or -1 if not computed yet. + * ob_shash is the hash of the byte string or -1 if not computed yet. */ } PyBytesObject; diff --git a/Include/internal/pycore_format.h b/Include/internal/pycore_format.h new file mode 100644 index 00000000000..1b8d57539ca --- /dev/null +++ b/Include/internal/pycore_format.h @@ -0,0 +1,27 @@ +#ifndef Py_INTERNAL_FORMAT_H +#define Py_INTERNAL_FORMAT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* Format codes + * F_LJUST '-' + * F_SIGN '+' + * F_BLANK ' ' + * F_ALT '#' + * F_ZERO '0' + */ +#define F_LJUST (1<<0) +#define F_SIGN (1<<1) +#define F_BLANK (1<<2) +#define F_ALT (1<<3) +#define F_ZERO (1<<4) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_FORMAT_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index ee801ec46df..082945f58a7 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1112,6 +1112,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_context.h \ $(srcdir)/Include/internal/pycore_dtoa.h \ $(srcdir)/Include/internal/pycore_fileutils.h \ + $(srcdir)/Include/internal/pycore_format.h \ $(srcdir)/Include/internal/pycore_getopt.h \ $(srcdir)/Include/internal/pycore_gil.h \ $(srcdir)/Include/internal/pycore_hamt.h \ diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 805707a4529..7cb2b1478cf 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -13,10 +13,9 @@ class bytearray "PyByteArrayObject *" "&PyByteArray_Type" [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=5535b77c37a119e0]*/ +/* For PyByteArray_AS_STRING(). */ char _PyByteArray_empty_string[] = ""; -/* end nullbytes support */ - /* Helpers */ static int @@ -266,7 +265,7 @@ PyByteArray_Concat(PyObject *a, PyObject *b) result = (PyByteArrayObject *) \ PyByteArray_FromStringAndSize(NULL, va.len + vb.len); - // result->ob_bytes is NULL if result is an empty string: + // result->ob_bytes is NULL if result is an empty bytearray: // if va.len + vb.len equals zero. if (result != NULL && result->ob_bytes != NULL) { memcpy(result->ob_bytes, va.buf, va.len); @@ -1007,9 +1006,6 @@ bytearray_richcompare(PyObject *self, PyObject *other, int op) Py_buffer self_bytes, other_bytes; int cmp; - /* Bytes can be compared to anything that supports the (binary) - buffer API. Except that a comparison with Unicode is always an - error, even if the comparison is for equality. */ if (!PyObject_CheckBuffer(self) || !PyObject_CheckBuffer(other)) { if (PyUnicode_Check(self) || PyUnicode_Check(other)) { if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) { @@ -1021,6 +1017,7 @@ bytearray_richcompare(PyObject *self, PyObject *other, int op) Py_RETURN_NOTIMPLEMENTED; } + /* Bytearrays can be compared to anything that supports the buffer API. */ if (PyObject_GetBuffer(self, &self_bytes, PyBUF_SIMPLE) != 0) { PyErr_Clear(); Py_RETURN_NOTIMPLEMENTED; @@ -1328,7 +1325,7 @@ bytearray_translate_impl(PyByteArrayObject *self, PyObject *table, if (trans_table[c] != -1) *output++ = (char)trans_table[c]; } - /* Fix the size of the resulting string */ + /* Fix the size of the resulting bytearray */ if (inlen > 0) if (PyByteArray_Resize(result, output - output_start) < 0) { Py_CLEAR(result); @@ -2083,7 +2080,7 @@ bytearray.hex How many bytes between separators. Positive values count from the right, negative values count from the left. -Create a str of hexadecimal numbers from a bytearray object. +Create a string of hexadecimal numbers from a bytearray object. Example: >>> value = bytearray([0xb9, 0x01, 0xef]) @@ -2099,7 +2096,7 @@ Example: static PyObject * bytearray_hex_impl(PyByteArrayObject *self, PyObject *sep, int bytes_per_sep) -/*[clinic end generated code: output=29c4e5ef72c565a0 input=814c15830ac8c4b5]*/ +/*[clinic end generated code: output=29c4e5ef72c565a0 input=808667e49bcccb54]*/ { char* argbuf = PyByteArray_AS_STRING(self); Py_ssize_t arglen = PyByteArray_GET_SIZE(self); @@ -2358,7 +2355,7 @@ PyTypeObject PyByteArray_Type = { PyObject_Del, /* tp_free */ }; -/*********************** Bytes Iterator ****************************/ +/*********************** Bytearray Iterator ****************************/ typedef struct { PyObject_HEAD diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 13216b9bb21..ccabbdca1d5 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -5,6 +5,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_bytes_methods.h" // _Py_bytes_startswith() +#include "pycore_format.h" // F_LJUST #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_object.h" // _PyObject_GC_TRACK #include "pycore_pymem.h" // PYMEM_CLEANBYTE @@ -21,11 +22,11 @@ class bytes "PyBytesObject *" "&PyBytes_Type" _Py_IDENTIFIER(__bytes__); -/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation - for a string of length n should request PyBytesObject_SIZE + n bytes. +/* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation + for a bytes object of length n should request PyBytesObject_SIZE + n bytes. Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves - 3 bytes per string allocation on a typical system. + 3 or 7 bytes per bytes object allocation on a typical system. */ #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) @@ -439,19 +440,6 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) return NULL; } -/* Format codes - * F_LJUST '-' - * F_SIGN '+' - * F_BLANK ' ' - * F_ALT '#' - * F_ZERO '0' - */ -#define F_LJUST (1<<0) -#define F_SIGN (1<<1) -#define F_BLANK (1<<2) -#define F_ALT (1<<3) -#define F_ZERO (1<<4) - /* Returns a new reference to a PyBytes object, or NULL on failure. */ static char* @@ -1560,7 +1548,7 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op) case Py_EQ: case Py_LE: case Py_GE: - /* a string is equal to itself */ + /* a byte string is equal to itself */ Py_RETURN_TRUE; case Py_NE: case Py_LT: @@ -2149,7 +2137,7 @@ bytes_translate_impl(PyBytesObject *self, PyObject *table, Py_INCREF(input_obj); return input_obj; } - /* Fix the size of the resulting string */ + /* Fix the size of the resulting byte string */ if (inlen > 0) _PyBytes_Resize(&result, output - output_start); return result; @@ -2453,7 +2441,7 @@ bytes.hex How many bytes between separators. Positive values count from the right, negative values count from the left. -Create a str of hexadecimal numbers from a bytes object. +Create a string of hexadecimal numbers from a bytes object. Example: >>> value = b'\xb9\x01\xef' @@ -2469,7 +2457,7 @@ Example: static PyObject * bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep) -/*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/ +/*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/ { const char *argbuf = PyBytes_AS_STRING(self); Py_ssize_t arglen = PyBytes_GET_SIZE(self); @@ -2771,7 +2759,7 @@ _PyBytes_FromIterator(PyObject *it, PyObject *x) Py_ssize_t i, size; _PyBytesWriter writer; - /* For iterator version, create a string object and resize as needed */ + /* For iterator version, create a bytes object and resize as needed */ size = PyObject_LengthHint(x, 64); if (size == -1 && PyErr_Occurred()) return NULL; diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index 3452b241740..1e3f1975615 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -990,7 +990,7 @@ PyDoc_STRVAR(bytearray_hex__doc__, "hex($self, /, sep=, bytes_per_sep=1)\n" "--\n" "\n" -"Create a str of hexadecimal numbers from a bytearray object.\n" +"Create a string of hexadecimal numbers from a bytearray object.\n" "\n" " sep\n" " An optional single character or byte to separate hex bytes.\n" @@ -1120,4 +1120,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl(self); } -/*[clinic end generated code: output=47cd9ad3fdc3ac0c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a82659f581e55629 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h index 27ac6b10674..9e365ce1a08 100644 --- a/Objects/clinic/bytesobject.c.h +++ b/Objects/clinic/bytesobject.c.h @@ -750,7 +750,7 @@ PyDoc_STRVAR(bytes_hex__doc__, "hex($self, /, sep=, bytes_per_sep=1)\n" "--\n" "\n" -"Create a str of hexadecimal numbers from a bytes object.\n" +"Create a string of hexadecimal numbers from a bytes object.\n" "\n" " sep\n" " An optional single character or byte to separate hex bytes.\n" @@ -878,4 +878,4 @@ skip_optional_pos: exit: return return_value; } -/*[clinic end generated code: output=6101b417d6a6a717 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b3f0ec2753246b9c input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f6473c02d30..409355534a2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -42,6 +42,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_bytes_methods.h" // _Py_bytes_lower() +#include "pycore_format.h" // F_LJUST #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_object.h" // _PyObject_GC_TRACK() diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 18edba855d6..cf78714920b 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -176,6 +176,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 281bce1c5f4..ba84ab902b6 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -510,6 +510,9 @@ Include\internal + + Include\internal + Include\internal