From 91852ca673adcfa4e9ceacbb36f2cd45767efa58 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 19 Mar 2009 19:19:03 +0000 Subject: [PATCH] Issue 5381: Add object_pairs_hook to the json module. --- Doc/library/json.rst | 28 ++++++++++-- Lib/json/__init__.py | 13 +++--- Lib/json/decoder.py | 15 +++++-- Lib/json/tests/test_decode.py | 16 +++++++ Lib/json/tests/test_unicode.py | 16 +++++++ Misc/NEWS | 3 ++ Modules/_json.c | 82 +++++++++++++++++++++++++++++----- 7 files changed, 150 insertions(+), 23 deletions(-) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 8a644af823b..9eaa3b3ee5e 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -166,7 +166,7 @@ Basic Usage :func:`dump`. -.. function:: load(fp[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, **kw]]]]]]]) +.. function:: load(fp[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, object_pairs_hook[, **kw]]]]]]]]) Deserialize *fp* (a ``.read()``-supporting file-like object containing a JSON document) to a Python object. @@ -182,6 +182,17 @@ Basic Usage *object_hook* will be used instead of the :class:`dict`. This feature can be used to implement custom decoders (e.g. JSON-RPC class hinting). + *object_pairs_hook* is an optional function that will be called with the + result of any object literal decode with an ordered list of pairs. The + return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders that + rely on the order that the key and value pairs are decoded (for example, + :func:`collections.OrderedDict` will remember the order of insertion). If + *object_hook* is also defined, the *object_pairs_hook* takes priority. + + .. versionchanged:: 2.7 + Added support for *object_pairs_hook*. + *parse_float*, if specified, will be called with the string of every JSON float to be decoded. By default, this is equivalent to ``float(num_str)``. This can be used to use another datatype or parser for JSON floats @@ -202,7 +213,7 @@ Basic Usage class. -.. function:: loads(s[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, **kw]]]]]]]) +.. function:: loads(s[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, object_pairs_hook[, **kw]]]]]]]]) Deserialize *s* (a :class:`str` or :class:`unicode` instance containing a JSON document) to a Python object. @@ -218,7 +229,7 @@ Basic Usage Encoders and decoders --------------------- -.. class:: JSONDecoder([encoding[, object_hook[, parse_float[, parse_int[, parse_constant[, strict]]]]]]) +.. class:: JSONDecoder([encoding[, object_hook[, parse_float[, parse_int[, parse_constant[, strict[, object_pairs_hook]]]]]]]) Simple JSON decoder. @@ -259,6 +270,17 @@ Encoders and decoders :class:`dict`. This can be used to provide custom deserializations (e.g. to support JSON-RPC class hinting). + *object_pairs_hook*, if specified will be called with the result of every + JSON object decoded with an ordered list of pairs. The return value of + *object_pairs_hook* will be used instead of the :class:`dict`. This + feature can be used to implement custom decoders that rely on the order + that the key and value pairs are decoded (for example, + :func:`collections.OrderedDict` will remember the order of insertion). If + *object_hook* is also defined, the *object_pairs_hook* takes priority. + + .. versionchanged:: 2.7 + Added support for *object_pairs_hook*. + *parse_float*, if specified, will be called with the string of every JSON float to be decoded. By default, this is equivalent to ``float(num_str)``. This can be used to use another datatype or parser for JSON floats diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 4338a03988a..01c26d9f602 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -238,11 +238,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, **kw).encode(obj) -_default_decoder = JSONDecoder(encoding=None, object_hook=None) +_default_decoder = JSONDecoder(encoding=None, object_hook=None, + object_pairs_hook=None) def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document) to a Python object. @@ -265,11 +266,11 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, return loads(fp.read(), encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, **kw) + parse_constant=parse_constant, object_pairs_hook=None, **kw) def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) to a Python object. @@ -304,12 +305,14 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, """ if (cls is None and encoding is None and object_hook is None and parse_int is None and parse_float is None and - parse_constant is None and not kw): + parse_constant is None and object_pairs_hook is None and not kw): return _default_decoder.decode(s) if cls is None: cls = JSONDecoder if object_hook is not None: kw['object_hook'] = object_hook + if object_pairs_hook is not None: + kw['object_pairs_hook'] = object_pairs_hook if parse_float is not None: kw['parse_float'] = parse_float if parse_int is not None: diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index c7f04f9eec8..44635a00e74 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -147,8 +147,9 @@ WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE_STR = ' \t\n\r' def JSONObject((s, end), encoding, strict, scan_once, object_hook, - _w=WHITESPACE.match, _ws=WHITESPACE_STR): - pairs = {} + object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + pairs = [] + pairs_append = pairs.append # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] @@ -187,7 +188,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, value, end = scan_once(s, end) except StopIteration: raise ValueError(errmsg("Expecting object", s, end)) - pairs[key] = value + pairs_append((key, value)) try: nextchar = s[end] @@ -218,6 +219,10 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, if nextchar != '"': raise ValueError(errmsg("Expecting property name", s, end - 1)) + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + pairs = dict(pairs) if object_hook is not None: pairs = object_hook(pairs) return pairs, end @@ -289,7 +294,8 @@ class JSONDecoder(object): """ def __init__(self, encoding=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, strict=True): + parse_int=None, parse_constant=None, strict=True, + object_pairs_hook=None): """``encoding`` determines the encoding used to interpret any ``str`` objects decoded by this instance (utf-8 by default). It has no effect when decoding ``unicode`` objects. @@ -320,6 +326,7 @@ class JSONDecoder(object): """ self.encoding = encoding self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook self.parse_float = parse_float or float self.parse_int = parse_int or int self.parse_constant = parse_constant or _CONSTANTS.__getitem__ diff --git a/Lib/json/tests/test_decode.py b/Lib/json/tests/test_decode.py index 484cc946598..0744b52b246 100644 --- a/Lib/json/tests/test_decode.py +++ b/Lib/json/tests/test_decode.py @@ -2,6 +2,7 @@ import decimal from unittest import TestCase import json +from collections import OrderedDict class TestDecode(TestCase): def test_decimal(self): @@ -20,3 +21,18 @@ class TestDecode(TestCase): # exercise the uncommon cases. The array cases are already covered. rval = json.loads('{ "key" : "value" , "k":"v" }') self.assertEquals(rval, {"key":"value", "k":"v"}) + + def test_object_pairs_hook(self): + s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' + p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4), + ("qrt", 5), ("pad", 6), ("hoy", 7)] + self.assertEqual(json.loads(s), eval(s)) + self.assertEqual(json.loads(s, object_pairs_hook = lambda x: x), p) + od = json.loads(s, object_pairs_hook = OrderedDict) + self.assertEqual(od, OrderedDict(p)) + self.assertEqual(type(od), OrderedDict) + # the object_pairs_hook takes priority over the object_hook + self.assertEqual(json.loads(s, + object_pairs_hook = OrderedDict, + object_hook = lambda x: None), + OrderedDict(p)) diff --git a/Lib/json/tests/test_unicode.py b/Lib/json/tests/test_unicode.py index 0b47cbb8145..13759f8a3f9 100644 --- a/Lib/json/tests/test_unicode.py +++ b/Lib/json/tests/test_unicode.py @@ -1,6 +1,7 @@ from unittest import TestCase import json +from collections import OrderedDict class TestUnicode(TestCase): def test_encoding1(self): @@ -54,6 +55,21 @@ class TestUnicode(TestCase): s = '"\\u{0:04x}"'.format(i) self.assertEquals(json.loads(s), u) + def test_object_pairs_hook_with_unicode(self): + s = u'{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' + p = [(u"xkd", 1), (u"kcw", 2), (u"art", 3), (u"hxm", 4), + (u"qrt", 5), (u"pad", 6), (u"hoy", 7)] + self.assertEqual(json.loads(s), eval(s)) + self.assertEqual(json.loads(s, object_pairs_hook = lambda x: x), p) + od = json.loads(s, object_pairs_hook = OrderedDict) + self.assertEqual(od, OrderedDict(p)) + self.assertEqual(type(od), OrderedDict) + # the object_pairs_hook takes priority over the object_hook + self.assertEqual(json.loads(s, + object_pairs_hook = OrderedDict, + object_hook = lambda x: None), + OrderedDict(p)) + def test_default_encoding(self): self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), {'a': u'\xe9'}) diff --git a/Misc/NEWS b/Misc/NEWS index d3abb41904a..09bbc7a7063 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -181,6 +181,9 @@ Core and Builtins Library ------- +- Issue #5381: Added object_pairs_hook to the json module. This allows + OrderedDicts to be built by the decoder. + - Issue #2110: Add support for thousands separator and 'n' type specifier to Decimal.__format__ diff --git a/Modules/_json.c b/Modules/_json.c index d75a019ae26..d4721d39502 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -35,6 +35,7 @@ typedef struct _PyScannerObject { PyObject *encoding; PyObject *strict; PyObject *object_hook; + PyObject *pairs_hook; PyObject *parse_float; PyObject *parse_int; PyObject *parse_constant; @@ -44,6 +45,7 @@ static PyMemberDef scanner_members[] = { {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, + {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, @@ -891,6 +893,7 @@ scanner_traverse(PyObject *self, visitproc visit, void *arg) Py_VISIT(s->encoding); Py_VISIT(s->strict); Py_VISIT(s->object_hook); + Py_VISIT(s->pairs_hook); Py_VISIT(s->parse_float); Py_VISIT(s->parse_int); Py_VISIT(s->parse_constant); @@ -906,6 +909,7 @@ scanner_clear(PyObject *self) Py_CLEAR(s->encoding); Py_CLEAR(s->strict); Py_CLEAR(s->object_hook); + Py_CLEAR(s->pairs_hook); Py_CLEAR(s->parse_float); Py_CLEAR(s->parse_int); Py_CLEAR(s->parse_constant); @@ -923,13 +927,17 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ */ char *str = PyString_AS_STRING(pystr); Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - PyObject *rval = PyDict_New(); + PyObject *rval; + PyObject *pairs; + PyObject *item; PyObject *key = NULL; PyObject *val = NULL; char *encoding = PyString_AS_STRING(s->encoding); int strict = PyObject_IsTrue(s->strict); Py_ssize_t next_idx; - if (rval == NULL) + + pairs = PyList_New(0); + if (pairs == NULL) return NULL; /* skip whitespace after { */ @@ -962,11 +970,16 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ if (val == NULL) goto bail; - if (PyDict_SetItem(rval, key, val) == -1) + item = PyTuple_Pack(2, key, val); + if (item == NULL) goto bail; - Py_CLEAR(key); Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); idx = next_idx; /* skip whitespace before } or , */ @@ -992,6 +1005,23 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ raise_errmsg("Expecting object", pystr, end_idx); goto bail; } + + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + + rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type), + pairs, NULL); + if (rval == NULL) + goto bail; + Py_CLEAR(pairs); + /* if object_hook is not None: rval = object_hook(rval) */ if (s->object_hook != Py_None) { val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); @@ -1006,7 +1036,7 @@ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ bail: Py_XDECREF(key); Py_XDECREF(val); - Py_DECREF(rval); + Py_XDECREF(pairs); return NULL; } @@ -1021,12 +1051,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss */ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyDict_New(); + PyObject *rval; + PyObject *pairs; + PyObject *item; PyObject *key = NULL; + PyObject *val = NULL; int strict = PyObject_IsTrue(s->strict); Py_ssize_t next_idx; - if (rval == NULL) + + pairs = PyList_New(0); + if (pairs == NULL) return NULL; /* skip whitespace after { */ @@ -1059,11 +1093,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss if (val == NULL) goto bail; - if (PyDict_SetItem(rval, key, val) == -1) + item = PyTuple_Pack(2, key, val); + if (item == NULL) goto bail; - Py_CLEAR(key); Py_CLEAR(val); + if (PyList_Append(pairs, item) == -1) { + Py_DECREF(item); + goto bail; + } + Py_DECREF(item); idx = next_idx; /* skip whitespace before } or , */ @@ -1091,6 +1130,22 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss goto bail; } + /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ + if (s->pairs_hook != Py_None) { + val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); + if (val == NULL) + goto bail; + Py_DECREF(pairs); + *next_idx_ptr = idx + 1; + return val; + } + + rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type), + pairs, NULL); + if (rval == NULL) + goto bail; + Py_CLEAR(pairs); + /* if object_hook is not None: rval = object_hook(rval) */ if (s->object_hook != Py_None) { val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); @@ -1105,7 +1160,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss bail: Py_XDECREF(key); Py_XDECREF(val); - Py_DECREF(rval); + Py_XDECREF(pairs); return NULL; } @@ -1648,6 +1703,7 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->encoding = NULL; s->strict = NULL; s->object_hook = NULL; + s->pairs_hook = NULL; s->parse_float = NULL; s->parse_int = NULL; s->parse_constant = NULL; @@ -1688,6 +1744,9 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) if (s->strict == NULL) goto bail; s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); + if (s->object_hook == NULL) + goto bail; + s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); if (s->object_hook == NULL) goto bail; s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); @@ -1706,6 +1765,7 @@ bail: Py_CLEAR(s->encoding); Py_CLEAR(s->strict); Py_CLEAR(s->object_hook); + Py_CLEAR(s->pairs_hook); Py_CLEAR(s->parse_float); Py_CLEAR(s->parse_int); Py_CLEAR(s->parse_constant);