bpo-37587: optimize json.loads (GH-15134)

Use a tighter scope temporary variable to help register allocation.
1% speedup for large string.

Use PyDict_SetItemDefault() for memoizing keys.
At most 4% speedup when the cache hit ratio is low.
This commit is contained in:
Inada Naoki 2019-08-08 17:57:10 +09:00 committed by GitHub
parent e43e7ed364
commit 2a570af12a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 19 additions and 20 deletions

View File

@ -433,16 +433,21 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
}
while (1) {
/* Find the end of the string or the next escape */
Py_UCS4 c = 0;
for (next = end; next < len; next++) {
c = PyUnicode_READ(kind, buf, next);
if (c == '"' || c == '\\') {
break;
}
else if (c <= 0x1f && strict) {
raise_errmsg("Invalid control character at", pystr, next);
goto bail;
Py_UCS4 c;
{
// Use tight scope variable to help register allocation.
Py_UCS4 d = 0;
for (next = end; next < len; next++) {
d = PyUnicode_READ(kind, buf, next);
if (d == '"' || d == '\\') {
break;
}
if (d <= 0x1f && strict) {
raise_errmsg("Invalid control character at", pystr, next);
goto bail;
}
}
c = d;
}
if (!(c == '"' || c == '\\')) {
raise_errmsg("Unterminated string starting at", pystr, begin);
@ -749,19 +754,13 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
if (key == NULL)
goto bail;
memokey = PyDict_GetItemWithError(s->memo, key);
if (memokey != NULL) {
Py_INCREF(memokey);
Py_DECREF(key);
key = memokey;
}
else if (PyErr_Occurred()) {
memokey = PyDict_SetDefault(s->memo, key, key);
if (memokey == NULL) {
goto bail;
}
else {
if (PyDict_SetItem(s->memo, key, key) < 0)
goto bail;
}
Py_INCREF(memokey);
Py_DECREF(key);
key = memokey;
idx = next_idx;
/* skip whitespace between key and : delimiter, read :, skip whitespace */