mirror of https://github.com/python/cpython
Check newly created consistency using _PyUnicode_CheckConsistency(str, 1)
* In debug mode, fill the string data with invalid characters * Simplify also reference counting in PyCodec_BackslashReplaceErrors() and PyCodec_XMLCharRefReplaceError()
This commit is contained in:
parent
990eff0776
commit
8f825060f1
|
@ -246,6 +246,7 @@ ascii_escape_unicode(PyObject *pystr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output[chars++] = '"';
|
output[chars++] = '"';
|
||||||
|
assert(_PyUnicode_CheckConsistency(rval, 1));
|
||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -397,6 +397,7 @@ MD5_hexdigest(MD5object *self, PyObject *unused)
|
||||||
c = (digest[i] & 0xf);
|
c = (digest[i] & 0xf);
|
||||||
hex_digest[j++] = Py_hexdigits[c];
|
hex_digest[j++] = Py_hexdigits[c];
|
||||||
}
|
}
|
||||||
|
assert(_PyUnicode_CheckConsistency(retval, 1));
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -373,6 +373,7 @@ SHA1_hexdigest(SHA1object *self, PyObject *unused)
|
||||||
c = (digest[i] & 0xf);
|
c = (digest[i] & 0xf);
|
||||||
hex_digest[j++] = Py_hexdigits[c];
|
hex_digest[j++] = Py_hexdigits[c];
|
||||||
}
|
}
|
||||||
|
assert(_PyUnicode_CheckConsistency(retval, 1));
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -466,6 +466,7 @@ SHA256_hexdigest(SHAobject *self, PyObject *unused)
|
||||||
c = (digest[i] & 0xf);
|
c = (digest[i] & 0xf);
|
||||||
hex_digest[j++] = Py_hexdigits[c];
|
hex_digest[j++] = Py_hexdigits[c];
|
||||||
}
|
}
|
||||||
|
assert(_PyUnicode_CheckConsistency(retval, 1));
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -532,6 +532,7 @@ SHA512_hexdigest(SHAobject *self, PyObject *unused)
|
||||||
c = (digest[i] & 0xf);
|
c = (digest[i] & 0xf);
|
||||||
hex_digest[j++] = Py_hexdigits[c];
|
hex_digest[j++] = Py_hexdigits[c];
|
||||||
}
|
}
|
||||||
|
assert(_PyUnicode_CheckConsistency(retval, 1));
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -626,6 +626,7 @@ PyBytes_Repr(PyObject *obj, int smartquotes)
|
||||||
*p++ = c;
|
*p++ = c;
|
||||||
}
|
}
|
||||||
*p++ = quote;
|
*p++ = quote;
|
||||||
|
assert(_PyUnicode_CheckConsistency(v, 1));
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -967,7 +967,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
|
||||||
PyObject *obj;
|
PyObject *obj;
|
||||||
PyCompactUnicodeObject *unicode;
|
PyCompactUnicodeObject *unicode;
|
||||||
void *data;
|
void *data;
|
||||||
int kind_state;
|
enum PyUnicode_Kind kind;
|
||||||
int is_sharing, is_ascii;
|
int is_sharing, is_ascii;
|
||||||
Py_ssize_t char_size;
|
Py_ssize_t char_size;
|
||||||
Py_ssize_t struct_size;
|
Py_ssize_t struct_size;
|
||||||
|
@ -986,17 +986,17 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
|
||||||
is_sharing = 0;
|
is_sharing = 0;
|
||||||
struct_size = sizeof(PyCompactUnicodeObject);
|
struct_size = sizeof(PyCompactUnicodeObject);
|
||||||
if (maxchar < 128) {
|
if (maxchar < 128) {
|
||||||
kind_state = PyUnicode_1BYTE_KIND;
|
kind = PyUnicode_1BYTE_KIND;
|
||||||
char_size = 1;
|
char_size = 1;
|
||||||
is_ascii = 1;
|
is_ascii = 1;
|
||||||
struct_size = sizeof(PyASCIIObject);
|
struct_size = sizeof(PyASCIIObject);
|
||||||
}
|
}
|
||||||
else if (maxchar < 256) {
|
else if (maxchar < 256) {
|
||||||
kind_state = PyUnicode_1BYTE_KIND;
|
kind = PyUnicode_1BYTE_KIND;
|
||||||
char_size = 1;
|
char_size = 1;
|
||||||
}
|
}
|
||||||
else if (maxchar < 65536) {
|
else if (maxchar < 65536) {
|
||||||
kind_state = PyUnicode_2BYTE_KIND;
|
kind = PyUnicode_2BYTE_KIND;
|
||||||
char_size = 2;
|
char_size = 2;
|
||||||
if (sizeof(wchar_t) == 2)
|
if (sizeof(wchar_t) == 2)
|
||||||
is_sharing = 1;
|
is_sharing = 1;
|
||||||
|
@ -1007,7 +1007,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
|
||||||
"invalid maximum character passed to PyUnicode_New");
|
"invalid maximum character passed to PyUnicode_New");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
kind_state = PyUnicode_4BYTE_KIND;
|
kind = PyUnicode_4BYTE_KIND;
|
||||||
char_size = 4;
|
char_size = 4;
|
||||||
if (sizeof(wchar_t) == 4)
|
if (sizeof(wchar_t) == 4)
|
||||||
is_sharing = 1;
|
is_sharing = 1;
|
||||||
|
@ -1041,7 +1041,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
|
||||||
_PyUnicode_LENGTH(unicode) = size;
|
_PyUnicode_LENGTH(unicode) = size;
|
||||||
_PyUnicode_HASH(unicode) = -1;
|
_PyUnicode_HASH(unicode) = -1;
|
||||||
_PyUnicode_STATE(unicode).interned = 0;
|
_PyUnicode_STATE(unicode).interned = 0;
|
||||||
_PyUnicode_STATE(unicode).kind = kind_state;
|
_PyUnicode_STATE(unicode).kind = kind;
|
||||||
_PyUnicode_STATE(unicode).compact = 1;
|
_PyUnicode_STATE(unicode).compact = 1;
|
||||||
_PyUnicode_STATE(unicode).ready = 1;
|
_PyUnicode_STATE(unicode).ready = 1;
|
||||||
_PyUnicode_STATE(unicode).ascii = is_ascii;
|
_PyUnicode_STATE(unicode).ascii = is_ascii;
|
||||||
|
@ -1049,19 +1049,19 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
|
||||||
((char*)data)[size] = 0;
|
((char*)data)[size] = 0;
|
||||||
_PyUnicode_WSTR(unicode) = NULL;
|
_PyUnicode_WSTR(unicode) = NULL;
|
||||||
}
|
}
|
||||||
else if (kind_state == PyUnicode_1BYTE_KIND) {
|
else if (kind == PyUnicode_1BYTE_KIND) {
|
||||||
((char*)data)[size] = 0;
|
((char*)data)[size] = 0;
|
||||||
_PyUnicode_WSTR(unicode) = NULL;
|
_PyUnicode_WSTR(unicode) = NULL;
|
||||||
_PyUnicode_WSTR_LENGTH(unicode) = 0;
|
_PyUnicode_WSTR_LENGTH(unicode) = 0;
|
||||||
unicode->utf8 = NULL;
|
unicode->utf8 = NULL;
|
||||||
unicode->utf8_length = 0;
|
unicode->utf8_length = 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
unicode->utf8 = NULL;
|
unicode->utf8 = NULL;
|
||||||
unicode->utf8_length = 0;
|
unicode->utf8_length = 0;
|
||||||
if (kind_state == PyUnicode_2BYTE_KIND)
|
if (kind == PyUnicode_2BYTE_KIND)
|
||||||
((Py_UCS2*)data)[size] = 0;
|
((Py_UCS2*)data)[size] = 0;
|
||||||
else /* kind_state == PyUnicode_4BYTE_KIND */
|
else /* kind == PyUnicode_4BYTE_KIND */
|
||||||
((Py_UCS4*)data)[size] = 0;
|
((Py_UCS4*)data)[size] = 0;
|
||||||
if (is_sharing) {
|
if (is_sharing) {
|
||||||
_PyUnicode_WSTR_LENGTH(unicode) = size;
|
_PyUnicode_WSTR_LENGTH(unicode) = size;
|
||||||
|
@ -1072,6 +1072,13 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
|
||||||
_PyUnicode_WSTR(unicode) = NULL;
|
_PyUnicode_WSTR(unicode) = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifdef Py_DEBUG
|
||||||
|
/* Fill the data with invalid characters to detect bugs earlier.
|
||||||
|
_PyUnicode_CheckConsistency(str, 1) detects invalid characters,
|
||||||
|
at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII
|
||||||
|
and U+FFFFFFFF is an invalid character in Unicode 6.0. */
|
||||||
|
memset(data, 0xff, size * kind);
|
||||||
|
#endif
|
||||||
assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
|
assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
|
||||||
return obj;
|
return obj;
|
||||||
}
|
}
|
||||||
|
|
|
@ -534,6 +534,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
|
||||||
data = PyUnicode_DATA(res);
|
data = PyUnicode_DATA(res);
|
||||||
for (i = 0; i < len; ++i)
|
for (i = 0; i < len; ++i)
|
||||||
PyUnicode_WRITE(kind, data, i, '?');
|
PyUnicode_WRITE(kind, data, i, '?');
|
||||||
|
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||||
return Py_BuildValue("(Nn)", res, end);
|
return Py_BuildValue("(Nn)", res, end);
|
||||||
}
|
}
|
||||||
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
|
else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
|
||||||
|
@ -559,6 +560,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
|
||||||
data = PyUnicode_DATA(res);
|
data = PyUnicode_DATA(res);
|
||||||
for (i=0; i < len; i++)
|
for (i=0; i < len; i++)
|
||||||
PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
|
PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
|
||||||
|
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||||
return Py_BuildValue("(Nn)", res, end);
|
return Py_BuildValue("(Nn)", res, end);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -652,8 +654,8 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
|
||||||
}
|
}
|
||||||
*outp++ = ';';
|
*outp++ = ';';
|
||||||
}
|
}
|
||||||
restuple = Py_BuildValue("(On)", res, end);
|
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||||
Py_DECREF(res);
|
restuple = Py_BuildValue("(Nn)", res, end);
|
||||||
Py_DECREF(object);
|
Py_DECREF(object);
|
||||||
return restuple;
|
return restuple;
|
||||||
}
|
}
|
||||||
|
@ -720,8 +722,8 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
|
||||||
*outp++ = Py_hexdigits[c&0xf];
|
*outp++ = Py_hexdigits[c&0xf];
|
||||||
}
|
}
|
||||||
|
|
||||||
restuple = Py_BuildValue("(On)", res, end);
|
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||||
Py_DECREF(res);
|
restuple = Py_BuildValue("(Nn)", res, end);
|
||||||
Py_DECREF(object);
|
Py_DECREF(object);
|
||||||
return restuple;
|
return restuple;
|
||||||
}
|
}
|
||||||
|
|
|
@ -263,6 +263,7 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
|
||||||
Py_DECREF(result);
|
Py_DECREF(result);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
assert(_PyUnicode_CheckConsistency(result, 1));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -992,6 +992,7 @@ make_source_pathname(PyObject *path)
|
||||||
(j = dot0-right));
|
(j = dot0-right));
|
||||||
PyUnicode_WRITE(kind, data, i+j, 'p');
|
PyUnicode_WRITE(kind, data, i+j, 'p');
|
||||||
PyUnicode_WRITE(kind, data, i+j+1, 'y');
|
PyUnicode_WRITE(kind, data, i+j+1, 'y');
|
||||||
|
assert(_PyUnicode_CheckConsistency(result, 1));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue