Improve string forms and PyUnicode_Resize() documentation

Remove also the FIXME for resize_copy(): as discussed with Martin, copy the
string on resize if the string is not resizable is just fine.
This commit is contained in:
Victor Stinner 2011-10-03 23:19:21 +02:00
parent 77bb47b312
commit 8cfcbed4e3
2 changed files with 24 additions and 15 deletions

View File

@ -206,7 +206,7 @@ extern "C" {
immediately follow the structure. utf8_length and wstr_length can be found
in the length field; the utf8 pointer is equal to the data pointer. */
typedef struct {
/* Unicode strings can be in 4 states:
/* There a 4 forms of Unicode strings:
- compact ascii:
@ -227,7 +227,7 @@ typedef struct {
* ascii = 0
* utf8 != data
- string created by the legacy API (not ready):
- legacy string, not ready:
* structure = PyUnicodeObject
* kind = PyUnicode_WCHAR_KIND
@ -239,7 +239,7 @@ typedef struct {
* interned = SSTATE_NOT_INTERNED
* ascii = 0
- string created by the legacy API, ready:
- legacy string, ready:
* structure = PyUnicodeObject structure
* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
@ -249,10 +249,16 @@ typedef struct {
* data.any is not NULL
* utf8 = data if ascii is 1
String created by the legacy API becomes ready when calling
PyUnicode_READY().
Compact strings use only one memory block (structure + characters),
whereas legacy strings use one block for the structure and one block
for characters.
See also _PyUnicode_CheckConsistency(). */
Legacy strings are created by PyUnicode_FromUnicode() and
PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
when PyUnicode_READY() is called.
See also _PyUnicode_CheckConsistency().
*/
PyObject_HEAD
Py_ssize_t length; /* Number of code points in the string */
Py_hash_t hash; /* Hash value; -1 if not set */
@ -721,19 +727,22 @@ PyAPI_FUNC(int) PyUnicode_WriteChar(
PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
#endif
/* Resize an already allocated Unicode object to the new size length.
/* Resize an Unicode object allocated by the legacy API (e.g.
PyUnicode_FromUnicode). Unicode objects allocated by the new API (e.g.
PyUnicode_New) cannot be resized by this function.
The length is a number of Py_UNICODE characters (and not the number of code
points).
*unicode is modified to point to the new (resized) object and 0
returned on success.
This API may only be called by the function which also called the
Unicode constructor. The refcount on the object must be 1. Otherwise,
an error is returned.
If the refcount on the object is 1, the function resizes the string in
place, which is usually faster than allocating a new string (and copy
characters).
Error handling is implemented as follows: an exception is set, -1
is returned and *unicode left untouched.
*/
is returned and *unicode left untouched. */
PyAPI_FUNC(int) PyUnicode_Resize(
PyObject **unicode, /* Pointer to the Unicode object */

View File

@ -536,7 +536,8 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
return NULL;
}
return copy;
} else {
}
else {
PyUnicodeObject *w;
assert(_PyUnicode_WSTR(unicode) != NULL);
assert(_PyUnicode_DATA_ANY(unicode) == NULL);
@ -1294,7 +1295,6 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
if (old_length == length)
return 0;
/* FIXME: really create a new object? */
if (!unicode_resizable(unicode)) {
PyObject *copy = resize_copy(unicode, length);
if (copy == NULL)