bpo-37587: json: Use _PyUnicodeWriter when scanning string. (GH-15591)
This commit is contained in:
parent
a661392f8f
commit
9c11029bb4
|
@ -0,0 +1,2 @@
|
||||||
|
``_json.scanstring`` is now up to 3x faster when there are many backslash
|
||||||
|
escaped characters in the JSON string.
|
|
@ -73,19 +73,6 @@ static PyMemberDef encoder_members[] = {
|
||||||
{NULL}
|
{NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
static PyObject *
|
|
||||||
join_list_unicode(PyObject *lst)
|
|
||||||
{
|
|
||||||
/* return u''.join(lst) */
|
|
||||||
static PyObject *sep = NULL;
|
|
||||||
if (sep == NULL) {
|
|
||||||
sep = PyUnicode_FromStringAndSize("", 0);
|
|
||||||
if (sep == NULL)
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
return PyUnicode_Join(sep, lst);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Forward decls */
|
/* Forward decls */
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -385,21 +372,6 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
|
||||||
return tpl;
|
return tpl;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define APPEND_OLD_CHUNK \
|
|
||||||
if (chunk != NULL) { \
|
|
||||||
if (chunks == NULL) { \
|
|
||||||
chunks = PyList_New(0); \
|
|
||||||
if (chunks == NULL) { \
|
|
||||||
goto bail; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
if (PyList_Append(chunks, chunk)) { \
|
|
||||||
Py_CLEAR(chunk); \
|
|
||||||
goto bail; \
|
|
||||||
} \
|
|
||||||
Py_CLEAR(chunk); \
|
|
||||||
}
|
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
|
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
|
||||||
{
|
{
|
||||||
|
@ -417,12 +389,14 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
|
||||||
Py_ssize_t next /* = begin */;
|
Py_ssize_t next /* = begin */;
|
||||||
const void *buf;
|
const void *buf;
|
||||||
int kind;
|
int kind;
|
||||||
PyObject *chunks = NULL;
|
|
||||||
PyObject *chunk = NULL;
|
|
||||||
|
|
||||||
if (PyUnicode_READY(pystr) == -1)
|
if (PyUnicode_READY(pystr) == -1)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
_PyUnicodeWriter writer;
|
||||||
|
_PyUnicodeWriter_Init(&writer);
|
||||||
|
writer.overallocate = 1;
|
||||||
|
|
||||||
len = PyUnicode_GET_LENGTH(pystr);
|
len = PyUnicode_GET_LENGTH(pystr);
|
||||||
buf = PyUnicode_DATA(pystr);
|
buf = PyUnicode_DATA(pystr);
|
||||||
kind = PyUnicode_KIND(pystr);
|
kind = PyUnicode_KIND(pystr);
|
||||||
|
@ -449,18 +423,26 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
|
||||||
}
|
}
|
||||||
c = d;
|
c = d;
|
||||||
}
|
}
|
||||||
if (!(c == '"' || c == '\\')) {
|
|
||||||
|
if (c == '"') {
|
||||||
|
// Fast path for simple case.
|
||||||
|
if (writer.buffer == NULL) {
|
||||||
|
PyObject *ret = PyUnicode_Substring(pystr, end, next);
|
||||||
|
if (ret == NULL) {
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
*next_end_ptr = next + 1;;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (c != '\\') {
|
||||||
raise_errmsg("Unterminated string starting at", pystr, begin);
|
raise_errmsg("Unterminated string starting at", pystr, begin);
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Pick up this chunk if it's not zero length */
|
/* Pick up this chunk if it's not zero length */
|
||||||
if (next != end) {
|
if (next != end) {
|
||||||
APPEND_OLD_CHUNK
|
if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
|
||||||
chunk = PyUnicode_FromKindAndData(
|
|
||||||
kind,
|
|
||||||
(char*)buf + kind * end,
|
|
||||||
next - end);
|
|
||||||
if (chunk == NULL) {
|
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -551,34 +533,18 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
|
||||||
end -= 6;
|
end -= 6;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
APPEND_OLD_CHUNK
|
if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
|
||||||
chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
|
|
||||||
if (chunk == NULL) {
|
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (chunks == NULL) {
|
rval = _PyUnicodeWriter_Finish(&writer);
|
||||||
if (chunk != NULL)
|
|
||||||
rval = chunk;
|
|
||||||
else
|
|
||||||
rval = PyUnicode_FromStringAndSize("", 0);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
APPEND_OLD_CHUNK
|
|
||||||
rval = join_list_unicode(chunks);
|
|
||||||
if (rval == NULL) {
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
Py_CLEAR(chunks);
|
|
||||||
}
|
|
||||||
|
|
||||||
*next_end_ptr = end;
|
*next_end_ptr = end;
|
||||||
return rval;
|
return rval;
|
||||||
|
|
||||||
bail:
|
bail:
|
||||||
*next_end_ptr = -1;
|
*next_end_ptr = -1;
|
||||||
Py_XDECREF(chunks);
|
_PyUnicodeWriter_Dealloc(&writer);
|
||||||
Py_XDECREF(chunk);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue