Issue 5381: Add object_pairs_hook to the json module.

This commit is contained in:
Raymond Hettinger 2009-03-19 19:19:03 +00:00
parent 2124599eaa
commit 91852ca673
7 changed files with 150 additions and 23 deletions

View File

@ -166,7 +166,7 @@ Basic Usage
:func:`dump`.
.. function:: load(fp[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, **kw]]]]]]])
.. function:: load(fp[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, object_pairs_hook[, **kw]]]]]]]])
Deserialize *fp* (a ``.read()``-supporting file-like object containing a JSON
document) to a Python object.
@ -182,6 +182,17 @@ Basic Usage
*object_hook* will be used instead of the :class:`dict`. This feature can be used
to implement custom decoders (e.g. JSON-RPC class hinting).
*object_pairs_hook* is an optional function that will be called with the
result of any object literal decode with an ordered list of pairs. The
return value of *object_pairs_hook* will be used instead of the
:class:`dict`. This feature can be used to implement custom decoders that
rely on the order that the key and value pairs are decoded (for example,
:func:`collections.OrderedDict` will remember the order of insertion). If
*object_hook* is also defined, the *object_pairs_hook* takes priority.
.. versionchanged:: 2.7
Added support for *object_pairs_hook*.
*parse_float*, if specified, will be called with the string of every JSON
float to be decoded. By default, this is equivalent to ``float(num_str)``.
This can be used to use another datatype or parser for JSON floats
@ -202,7 +213,7 @@ Basic Usage
class.
.. function:: loads(s[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, **kw]]]]]]])
.. function:: loads(s[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, object_pairs_hook[, **kw]]]]]]]])
Deserialize *s* (a :class:`str` or :class:`unicode` instance containing a JSON
document) to a Python object.
@ -218,7 +229,7 @@ Basic Usage
Encoders and decoders
---------------------
.. class:: JSONDecoder([encoding[, object_hook[, parse_float[, parse_int[, parse_constant[, strict]]]]]])
.. class:: JSONDecoder([encoding[, object_hook[, parse_float[, parse_int[, parse_constant[, strict[, object_pairs_hook]]]]]]])
Simple JSON decoder.
@ -259,6 +270,17 @@ Encoders and decoders
:class:`dict`. This can be used to provide custom deserializations (e.g. to
support JSON-RPC class hinting).
*object_pairs_hook*, if specified will be called with the result of every
JSON object decoded with an ordered list of pairs. The return value of
*object_pairs_hook* will be used instead of the :class:`dict`. This
feature can be used to implement custom decoders that rely on the order
that the key and value pairs are decoded (for example,
:func:`collections.OrderedDict` will remember the order of insertion). If
*object_hook* is also defined, the *object_pairs_hook* takes priority.
.. versionchanged:: 2.7
Added support for *object_pairs_hook*.
*parse_float*, if specified, will be called with the string of every JSON
float to be decoded. By default, this is equivalent to ``float(num_str)``.
This can be used to use another datatype or parser for JSON floats

View File

@ -238,11 +238,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
**kw).encode(obj)
_default_decoder = JSONDecoder(encoding=None, object_hook=None)
_default_decoder = JSONDecoder(encoding=None, object_hook=None,
object_pairs_hook=None)
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, **kw):
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document) to a Python object.
@ -265,11 +266,11 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
return loads(fp.read(),
encoding=encoding, cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, **kw)
parse_constant=parse_constant, object_pairs_hook=None, **kw)
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, **kw):
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
"""Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
document) to a Python object.
@ -304,12 +305,14 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
"""
if (cls is None and encoding is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and not kw):
parse_constant is None and object_pairs_hook is None and not kw):
return _default_decoder.decode(s)
if cls is None:
cls = JSONDecoder
if object_hook is not None:
kw['object_hook'] = object_hook
if object_pairs_hook is not None:
kw['object_pairs_hook'] = object_pairs_hook
if parse_float is not None:
kw['parse_float'] = parse_float
if parse_int is not None:

View File

@ -147,8 +147,9 @@ WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
WHITESPACE_STR = ' \t\n\r'
def JSONObject((s, end), encoding, strict, scan_once, object_hook,
_w=WHITESPACE.match, _ws=WHITESPACE_STR):
pairs = {}
object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
pairs = []
pairs_append = pairs.append
# Use a slice to prevent IndexError from being raised, the following
# check will raise a more specific ValueError if the string is empty
nextchar = s[end:end + 1]
@ -187,7 +188,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook,
value, end = scan_once(s, end)
except StopIteration:
raise ValueError(errmsg("Expecting object", s, end))
pairs[key] = value
pairs_append((key, value))
try:
nextchar = s[end]
@ -218,6 +219,10 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook,
if nextchar != '"':
raise ValueError(errmsg("Expecting property name", s, end - 1))
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
return result, end
pairs = dict(pairs)
if object_hook is not None:
pairs = object_hook(pairs)
return pairs, end
@ -289,7 +294,8 @@ class JSONDecoder(object):
"""
def __init__(self, encoding=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True):
parse_int=None, parse_constant=None, strict=True,
object_pairs_hook=None):
"""``encoding`` determines the encoding used to interpret any ``str``
objects decoded by this instance (utf-8 by default). It has no
effect when decoding ``unicode`` objects.
@ -320,6 +326,7 @@ class JSONDecoder(object):
"""
self.encoding = encoding
self.object_hook = object_hook
self.object_pairs_hook = object_pairs_hook
self.parse_float = parse_float or float
self.parse_int = parse_int or int
self.parse_constant = parse_constant or _CONSTANTS.__getitem__

View File

@ -2,6 +2,7 @@ import decimal
from unittest import TestCase
import json
from collections import OrderedDict
class TestDecode(TestCase):
def test_decimal(self):
@ -20,3 +21,18 @@ class TestDecode(TestCase):
# exercise the uncommon cases. The array cases are already covered.
rval = json.loads('{ "key" : "value" , "k":"v" }')
self.assertEquals(rval, {"key":"value", "k":"v"})
def test_object_pairs_hook(self):
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4),
("qrt", 5), ("pad", 6), ("hoy", 7)]
self.assertEqual(json.loads(s), eval(s))
self.assertEqual(json.loads(s, object_pairs_hook = lambda x: x), p)
od = json.loads(s, object_pairs_hook = OrderedDict)
self.assertEqual(od, OrderedDict(p))
self.assertEqual(type(od), OrderedDict)
# the object_pairs_hook takes priority over the object_hook
self.assertEqual(json.loads(s,
object_pairs_hook = OrderedDict,
object_hook = lambda x: None),
OrderedDict(p))

View File

@ -1,6 +1,7 @@
from unittest import TestCase
import json
from collections import OrderedDict
class TestUnicode(TestCase):
def test_encoding1(self):
@ -54,6 +55,21 @@ class TestUnicode(TestCase):
s = '"\\u{0:04x}"'.format(i)
self.assertEquals(json.loads(s), u)
def test_object_pairs_hook_with_unicode(self):
s = u'{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
p = [(u"xkd", 1), (u"kcw", 2), (u"art", 3), (u"hxm", 4),
(u"qrt", 5), (u"pad", 6), (u"hoy", 7)]
self.assertEqual(json.loads(s), eval(s))
self.assertEqual(json.loads(s, object_pairs_hook = lambda x: x), p)
od = json.loads(s, object_pairs_hook = OrderedDict)
self.assertEqual(od, OrderedDict(p))
self.assertEqual(type(od), OrderedDict)
# the object_pairs_hook takes priority over the object_hook
self.assertEqual(json.loads(s,
object_pairs_hook = OrderedDict,
object_hook = lambda x: None),
OrderedDict(p))
def test_default_encoding(self):
self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')),
{'a': u'\xe9'})

View File

@ -181,6 +181,9 @@ Core and Builtins
Library
-------
- Issue #5381: Added object_pairs_hook to the json module. This allows
OrderedDicts to be built by the decoder.
- Issue #2110: Add support for thousands separator and 'n' type
specifier to Decimal.__format__

View File

@ -35,6 +35,7 @@ typedef struct _PyScannerObject {
PyObject *encoding;
PyObject *strict;
PyObject *object_hook;
PyObject *pairs_hook;
PyObject *parse_float;
PyObject *parse_int;
PyObject *parse_constant;
@ -44,6 +45,7 @@ static PyMemberDef scanner_members[] = {
{"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
{"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
{"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
{"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
{"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
{"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
{"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
@ -891,6 +893,7 @@ scanner_traverse(PyObject *self, visitproc visit, void *arg)
Py_VISIT(s->encoding);
Py_VISIT(s->strict);
Py_VISIT(s->object_hook);
Py_VISIT(s->pairs_hook);
Py_VISIT(s->parse_float);
Py_VISIT(s->parse_int);
Py_VISIT(s->parse_constant);
@ -906,6 +909,7 @@ scanner_clear(PyObject *self)
Py_CLEAR(s->encoding);
Py_CLEAR(s->strict);
Py_CLEAR(s->object_hook);
Py_CLEAR(s->pairs_hook);
Py_CLEAR(s->parse_float);
Py_CLEAR(s->parse_int);
Py_CLEAR(s->parse_constant);
@ -923,13 +927,17 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
*/
char *str = PyString_AS_STRING(pystr);
Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
PyObject *rval = PyDict_New();
PyObject *rval;
PyObject *pairs;
PyObject *item;
PyObject *key = NULL;
PyObject *val = NULL;
char *encoding = PyString_AS_STRING(s->encoding);
int strict = PyObject_IsTrue(s->strict);
Py_ssize_t next_idx;
if (rval == NULL)
pairs = PyList_New(0);
if (pairs == NULL)
return NULL;
/* skip whitespace after { */
@ -962,11 +970,16 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
if (val == NULL)
goto bail;
if (PyDict_SetItem(rval, key, val) == -1)
item = PyTuple_Pack(2, key, val);
if (item == NULL)
goto bail;
Py_CLEAR(key);
Py_CLEAR(val);
if (PyList_Append(pairs, item) == -1) {
Py_DECREF(item);
goto bail;
}
Py_DECREF(item);
idx = next_idx;
/* skip whitespace before } or , */
@ -992,6 +1005,23 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
raise_errmsg("Expecting object", pystr, end_idx);
goto bail;
}
/* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
if (s->pairs_hook != Py_None) {
val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
if (val == NULL)
goto bail;
Py_DECREF(pairs);
*next_idx_ptr = idx + 1;
return val;
}
rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
pairs, NULL);
if (rval == NULL)
goto bail;
Py_CLEAR(pairs);
/* if object_hook is not None: rval = object_hook(rval) */
if (s->object_hook != Py_None) {
val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
@ -1006,7 +1036,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
bail:
Py_XDECREF(key);
Py_XDECREF(val);
Py_DECREF(rval);
Py_XDECREF(pairs);
return NULL;
}
@ -1021,12 +1051,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
*/
Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
PyObject *val = NULL;
PyObject *rval = PyDict_New();
PyObject *rval;
PyObject *pairs;
PyObject *item;
PyObject *key = NULL;
PyObject *val = NULL;
int strict = PyObject_IsTrue(s->strict);
Py_ssize_t next_idx;
if (rval == NULL)
pairs = PyList_New(0);
if (pairs == NULL)
return NULL;
/* skip whitespace after { */
@ -1059,11 +1093,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
if (val == NULL)
goto bail;
if (PyDict_SetItem(rval, key, val) == -1)
item = PyTuple_Pack(2, key, val);
if (item == NULL)
goto bail;
Py_CLEAR(key);
Py_CLEAR(val);
if (PyList_Append(pairs, item) == -1) {
Py_DECREF(item);
goto bail;
}
Py_DECREF(item);
idx = next_idx;
/* skip whitespace before } or , */
@ -1091,6 +1130,22 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
goto bail;
}
/* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
if (s->pairs_hook != Py_None) {
val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
if (val == NULL)
goto bail;
Py_DECREF(pairs);
*next_idx_ptr = idx + 1;
return val;
}
rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
pairs, NULL);
if (rval == NULL)
goto bail;
Py_CLEAR(pairs);
/* if object_hook is not None: rval = object_hook(rval) */
if (s->object_hook != Py_None) {
val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
@ -1105,7 +1160,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
bail:
Py_XDECREF(key);
Py_XDECREF(val);
Py_DECREF(rval);
Py_XDECREF(pairs);
return NULL;
}
@ -1648,6 +1703,7 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
s->encoding = NULL;
s->strict = NULL;
s->object_hook = NULL;
s->pairs_hook = NULL;
s->parse_float = NULL;
s->parse_int = NULL;
s->parse_constant = NULL;
@ -1688,6 +1744,9 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
if (s->strict == NULL)
goto bail;
s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
if (s->object_hook == NULL)
goto bail;
s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
if (s->object_hook == NULL)
goto bail;
s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
@ -1706,6 +1765,7 @@ bail:
Py_CLEAR(s->encoding);
Py_CLEAR(s->strict);
Py_CLEAR(s->object_hook);
Py_CLEAR(s->pairs_hook);
Py_CLEAR(s->parse_float);
Py_CLEAR(s->parse_int);
Py_CLEAR(s->parse_constant);