Issue #19361: JSON decoder now raises JSONDecodeError instead of ValueError.

This commit is contained in:
Serhiy Storchaka 2015-01-26 13:16:30 +02:00
parent 4e5d9eac2c
commit 47efb4a5dc
10 changed files with 144 additions and 83 deletions

View File

@ -250,7 +250,7 @@ Basic Usage
will be passed to the constructor of the class. will be passed to the constructor of the class.
If the data being deserialized is not a valid JSON document, a If the data being deserialized is not a valid JSON document, a
:exc:`ValueError` will be raised. :exc:`JSONDecodeError` will be raised.
.. function:: loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw) .. function:: loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
@ -261,7 +261,7 @@ Basic Usage
*encoding* which is ignored and deprecated. *encoding* which is ignored and deprecated.
If the data being deserialized is not a valid JSON document, a If the data being deserialized is not a valid JSON document, a
:exc:`ValueError` will be raised. :exc:`JSONDecodeError` will be raised.
Encoders and Decoders Encoders and Decoders
--------------------- ---------------------
@ -334,13 +334,16 @@ Encoders and Decoders
``'\n'``, ``'\r'`` and ``'\0'``. ``'\n'``, ``'\r'`` and ``'\0'``.
If the data being deserialized is not a valid JSON document, a If the data being deserialized is not a valid JSON document, a
:exc:`ValueError` will be raised. :exc:`JSONDecodeError` will be raised.
.. method:: decode(s) .. method:: decode(s)
Return the Python representation of *s* (a :class:`str` instance Return the Python representation of *s* (a :class:`str` instance
containing a JSON document) containing a JSON document)
:exc:`JSONDecodeError` will be raised if the given JSON document is not
valid.
.. method:: raw_decode(s) .. method:: raw_decode(s)
Decode a JSON document from *s* (a :class:`str` beginning with a Decode a JSON document from *s* (a :class:`str` beginning with a
@ -469,6 +472,36 @@ Encoders and Decoders
mysocket.write(chunk) mysocket.write(chunk)
Exceptions
----------
.. exception:: JSONDecodeError(msg, doc, pos, end=None)
Subclass of :exc:`ValueError` with the following additional attributes:
.. attribute:: msg
The unformatted error message.
.. attribute:: doc
The JSON document being parsed.
.. attribute:: pos
The start index of *doc* where parsing failed.
.. attribute:: lineno
The line corresponding to *pos*.
.. attribute:: colno
The column corresponding to *pos*.
.. versionadded:: 3.5
Standard Compliance and Interoperability Standard Compliance and Interoperability
---------------------------------------- ----------------------------------------

View File

@ -230,6 +230,9 @@ json
of dictionaries alphabetically by key. (Contributed by Berker Peksag in of dictionaries alphabetically by key. (Contributed by Berker Peksag in
:issue:`21650`.) :issue:`21650`.)
* JSON decoder now raises :exc:`json.JSONDecodeError` instead of
:exc:`ValueError`. (Contributed by Serhiy Storchaka in :issue:`19361`.)
os os
-- --

View File

@ -98,12 +98,12 @@ Using json.tool from the shell to validate and pretty-print::
__version__ = '2.0.9' __version__ = '2.0.9'
__all__ = [ __all__ = [
'dump', 'dumps', 'load', 'loads', 'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONEncoder', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
] ]
__author__ = 'Bob Ippolito <bob@redivi.com>' __author__ = 'Bob Ippolito <bob@redivi.com>'
from .decoder import JSONDecoder from .decoder import JSONDecoder, JSONDecodeError
from .encoder import JSONEncoder from .encoder import JSONEncoder
_default_encoder = JSONEncoder( _default_encoder = JSONEncoder(
@ -311,7 +311,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
raise TypeError('the JSON object must be str, not {!r}'.format( raise TypeError('the JSON object must be str, not {!r}'.format(
s.__class__.__name__)) s.__class__.__name__))
if s.startswith(u'\ufeff'): if s.startswith(u'\ufeff'):
raise ValueError("Unexpected UTF-8 BOM (decode using utf-8-sig)") raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",
s, 0)
if (cls is None and object_hook is None and if (cls is None and object_hook is None and
parse_int is None and parse_float is None and parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None and not kw): parse_constant is None and object_pairs_hook is None and not kw):

View File

@ -8,7 +8,7 @@ try:
except ImportError: except ImportError:
c_scanstring = None c_scanstring = None
__all__ = ['JSONDecoder'] __all__ = ['JSONDecoder', 'JSONDecodeError']
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
@ -17,32 +17,30 @@ PosInf = float('inf')
NegInf = float('-inf') NegInf = float('-inf')
def linecol(doc, pos): class JSONDecodeError(ValueError):
if isinstance(doc, bytes): """Subclass of ValueError with the following additional properties:
newline = b'\n'
else:
newline = '\n'
lineno = doc.count(newline, 0, pos) + 1
if lineno == 1:
colno = pos + 1
else:
colno = pos - doc.rindex(newline, 0, pos)
return lineno, colno
msg: The unformatted error message
doc: The JSON document being parsed
pos: The start index of doc where parsing failed
lineno: The line corresponding to pos
colno: The column corresponding to pos
def errmsg(msg, doc, pos, end=None): """
# Note that this function is called from _json # Note that this exception is used from _json
lineno, colno = linecol(doc, pos) def __init__(self, msg, doc, pos):
if end is None: lineno = doc.count('\n', 0, pos) + 1
fmt = '{0}: line {1} column {2} (char {3})' colno = pos - doc.rfind('\n', 0, pos)
return fmt.format(msg, lineno, colno, pos) errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
#fmt = '%s: line %d column %d (char %d)' ValueError.__init__(self, errmsg)
#return fmt % (msg, lineno, colno, pos) self.msg = msg
endlineno, endcolno = linecol(doc, end) self.doc = doc
fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' self.pos = pos
return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) self.lineno = lineno
#fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' self.colno = colno
#return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
def __reduce__(self):
return self.__class__, (self.msg, self.doc, self.pos)
_CONSTANTS = { _CONSTANTS = {
@ -66,7 +64,7 @@ def _decode_uXXXX(s, pos):
except ValueError: except ValueError:
pass pass
msg = "Invalid \\uXXXX escape" msg = "Invalid \\uXXXX escape"
raise ValueError(errmsg(msg, s, pos)) raise JSONDecodeError(msg, s, pos)
def py_scanstring(s, end, strict=True, def py_scanstring(s, end, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match): _b=BACKSLASH, _m=STRINGCHUNK.match):
@ -84,8 +82,7 @@ def py_scanstring(s, end, strict=True,
while 1: while 1:
chunk = _m(s, end) chunk = _m(s, end)
if chunk is None: if chunk is None:
raise ValueError( raise JSONDecodeError("Unterminated string starting at", s, begin)
errmsg("Unterminated string starting at", s, begin))
end = chunk.end() end = chunk.end()
content, terminator = chunk.groups() content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters # Content is contains zero or more unescaped string characters
@ -99,22 +96,21 @@ def py_scanstring(s, end, strict=True,
if strict: if strict:
#msg = "Invalid control character %r at" % (terminator,) #msg = "Invalid control character %r at" % (terminator,)
msg = "Invalid control character {0!r} at".format(terminator) msg = "Invalid control character {0!r} at".format(terminator)
raise ValueError(errmsg(msg, s, end)) raise JSONDecodeError(msg, s, end)
else: else:
_append(terminator) _append(terminator)
continue continue
try: try:
esc = s[end] esc = s[end]
except IndexError: except IndexError:
raise ValueError( raise JSONDecodeError("Unterminated string starting at", s, begin)
errmsg("Unterminated string starting at", s, begin))
# If not a unicode escape sequence, must be in the lookup table # If not a unicode escape sequence, must be in the lookup table
if esc != 'u': if esc != 'u':
try: try:
char = _b[esc] char = _b[esc]
except KeyError: except KeyError:
msg = "Invalid \\escape: {0!r}".format(esc) msg = "Invalid \\escape: {0!r}".format(esc)
raise ValueError(errmsg(msg, s, end)) raise JSONDecodeError(msg, s, end)
end += 1 end += 1
else: else:
uni = _decode_uXXXX(s, end) uni = _decode_uXXXX(s, end)
@ -163,8 +159,8 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
pairs = object_hook(pairs) pairs = object_hook(pairs)
return pairs, end + 1 return pairs, end + 1
elif nextchar != '"': elif nextchar != '"':
raise ValueError(errmsg( raise JSONDecodeError(
"Expecting property name enclosed in double quotes", s, end)) "Expecting property name enclosed in double quotes", s, end)
end += 1 end += 1
while True: while True:
key, end = scanstring(s, end, strict) key, end = scanstring(s, end, strict)
@ -174,7 +170,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
if s[end:end + 1] != ':': if s[end:end + 1] != ':':
end = _w(s, end).end() end = _w(s, end).end()
if s[end:end + 1] != ':': if s[end:end + 1] != ':':
raise ValueError(errmsg("Expecting ':' delimiter", s, end)) raise JSONDecodeError("Expecting ':' delimiter", s, end)
end += 1 end += 1
try: try:
@ -188,7 +184,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
try: try:
value, end = scan_once(s, end) value, end = scan_once(s, end)
except StopIteration as err: except StopIteration as err:
raise ValueError(errmsg("Expecting value", s, err.value)) from None raise JSONDecodeError("Expecting value", s, err.value) from None
pairs_append((key, value)) pairs_append((key, value))
try: try:
nextchar = s[end] nextchar = s[end]
@ -202,13 +198,13 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
if nextchar == '}': if nextchar == '}':
break break
elif nextchar != ',': elif nextchar != ',':
raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
end = _w(s, end).end() end = _w(s, end).end()
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
end += 1 end += 1
if nextchar != '"': if nextchar != '"':
raise ValueError(errmsg( raise JSONDecodeError(
"Expecting property name enclosed in double quotes", s, end - 1)) "Expecting property name enclosed in double quotes", s, end - 1)
if object_pairs_hook is not None: if object_pairs_hook is not None:
result = object_pairs_hook(pairs) result = object_pairs_hook(pairs)
return result, end return result, end
@ -232,7 +228,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
try: try:
value, end = scan_once(s, end) value, end = scan_once(s, end)
except StopIteration as err: except StopIteration as err:
raise ValueError(errmsg("Expecting value", s, err.value)) from None raise JSONDecodeError("Expecting value", s, err.value) from None
_append(value) _append(value)
nextchar = s[end:end + 1] nextchar = s[end:end + 1]
if nextchar in _ws: if nextchar in _ws:
@ -242,7 +238,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
if nextchar == ']': if nextchar == ']':
break break
elif nextchar != ',': elif nextchar != ',':
raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
try: try:
if s[end] in _ws: if s[end] in _ws:
end += 1 end += 1
@ -343,7 +339,7 @@ class JSONDecoder(object):
obj, end = self.raw_decode(s, idx=_w(s, 0).end()) obj, end = self.raw_decode(s, idx=_w(s, 0).end())
end = _w(s, end).end() end = _w(s, end).end()
if end != len(s): if end != len(s):
raise ValueError(errmsg("Extra data", s, end, len(s))) raise JSONDecodeError("Extra data", s, end)
return obj return obj
def raw_decode(self, s, idx=0): def raw_decode(self, s, idx=0):
@ -358,5 +354,5 @@ class JSONDecoder(object):
try: try:
obj, end = self.scan_once(s, idx) obj, end = self.scan_once(s, idx)
except StopIteration as err: except StopIteration as err:
raise ValueError(errmsg("Expecting value", s, err.value)) from None raise JSONDecodeError("Expecting value", s, err.value) from None
return obj, end return obj, end

View File

@ -9,12 +9,15 @@ from test import support
# import json with and without accelerations # import json with and without accelerations
cjson = support.import_fresh_module('json', fresh=['_json']) cjson = support.import_fresh_module('json', fresh=['_json'])
pyjson = support.import_fresh_module('json', blocked=['_json']) pyjson = support.import_fresh_module('json', blocked=['_json'])
# JSONDecodeError is cached inside the _json module
cjson.JSONDecodeError = cjson.decoder.JSONDecodeError = json.JSONDecodeError
# create two base classes that will be used by the other tests # create two base classes that will be used by the other tests
class PyTest(unittest.TestCase): class PyTest(unittest.TestCase):
json = pyjson json = pyjson
loads = staticmethod(pyjson.loads) loads = staticmethod(pyjson.loads)
dumps = staticmethod(pyjson.dumps) dumps = staticmethod(pyjson.dumps)
JSONDecodeError = staticmethod(pyjson.JSONDecodeError)
@unittest.skipUnless(cjson, 'requires _json') @unittest.skipUnless(cjson, 'requires _json')
class CTest(unittest.TestCase): class CTest(unittest.TestCase):
@ -22,6 +25,7 @@ class CTest(unittest.TestCase):
json = cjson json = cjson
loads = staticmethod(cjson.loads) loads = staticmethod(cjson.loads)
dumps = staticmethod(cjson.dumps) dumps = staticmethod(cjson.dumps)
JSONDecodeError = staticmethod(cjson.JSONDecodeError)
# test PyTest and CTest checking if the functions come from the right module # test PyTest and CTest checking if the functions come from the right module
class TestPyTest(PyTest): class TestPyTest(PyTest):

View File

@ -63,12 +63,12 @@ class TestDecode:
def test_extra_data(self): def test_extra_data(self):
s = '[1, 2, 3]5' s = '[1, 2, 3]5'
msg = 'Extra data' msg = 'Extra data'
self.assertRaisesRegex(ValueError, msg, self.loads, s) self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
def test_invalid_escape(self): def test_invalid_escape(self):
s = '["abc\\y"]' s = '["abc\\y"]'
msg = 'escape' msg = 'escape'
self.assertRaisesRegex(ValueError, msg, self.loads, s) self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
def test_invalid_input_type(self): def test_invalid_input_type(self):
msg = 'the JSON object must be str' msg = 'the JSON object must be str'
@ -80,10 +80,10 @@ class TestDecode:
def test_string_with_utf8_bom(self): def test_string_with_utf8_bom(self):
# see #18958 # see #18958
bom_json = "[1,2,3]".encode('utf-8-sig').decode('utf-8') bom_json = "[1,2,3]".encode('utf-8-sig').decode('utf-8')
with self.assertRaises(ValueError) as cm: with self.assertRaises(self.JSONDecodeError) as cm:
self.loads(bom_json) self.loads(bom_json)
self.assertIn('BOM', str(cm.exception)) self.assertIn('BOM', str(cm.exception))
with self.assertRaises(ValueError) as cm: with self.assertRaises(self.JSONDecodeError) as cm:
self.json.load(StringIO(bom_json)) self.json.load(StringIO(bom_json))
self.assertIn('BOM', str(cm.exception)) self.assertIn('BOM', str(cm.exception))
# make sure that the BOM is not detected in the middle of a string # make sure that the BOM is not detected in the middle of a string

View File

@ -87,7 +87,7 @@ class TestFail:
continue continue
try: try:
self.loads(doc) self.loads(doc)
except ValueError: except self.JSONDecodeError:
pass pass
else: else:
self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc)) self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc))
@ -124,10 +124,16 @@ class TestFail:
('"spam', 'Unterminated string starting at', 0), ('"spam', 'Unterminated string starting at', 0),
] ]
for data, msg, idx in test_cases: for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError, with self.assertRaises(self.JSONDecodeError) as cm:
r'^{0}: line 1 column {1} \(char {2}\)'.format( self.loads(data)
re.escape(msg), idx + 1, idx), err = cm.exception
self.loads, data) self.assertEqual(err.msg, msg)
self.assertEqual(err.pos, idx)
self.assertEqual(err.lineno, 1)
self.assertEqual(err.colno, idx + 1)
self.assertEqual(str(err),
'%s: line 1 column %d (char %d)' %
(msg, idx + 1, idx))
def test_unexpected_data(self): def test_unexpected_data(self):
test_cases = [ test_cases = [
@ -154,10 +160,16 @@ class TestFail:
('{"spam":42,}', 'Expecting property name enclosed in double quotes', 11), ('{"spam":42,}', 'Expecting property name enclosed in double quotes', 11),
] ]
for data, msg, idx in test_cases: for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError, with self.assertRaises(self.JSONDecodeError) as cm:
r'^{0}: line 1 column {1} \(char {2}\)'.format( self.loads(data)
re.escape(msg), idx + 1, idx), err = cm.exception
self.loads, data) self.assertEqual(err.msg, msg)
self.assertEqual(err.pos, idx)
self.assertEqual(err.lineno, 1)
self.assertEqual(err.colno, idx + 1)
self.assertEqual(str(err),
'%s: line 1 column %d (char %d)' %
(msg, idx + 1, idx))
def test_extra_data(self): def test_extra_data(self):
test_cases = [ test_cases = [
@ -171,11 +183,16 @@ class TestFail:
('"spam",42', 'Extra data', 6), ('"spam",42', 'Extra data', 6),
] ]
for data, msg, idx in test_cases: for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError, with self.assertRaises(self.JSONDecodeError) as cm:
r'^{0}: line 1 column {1} - line 1 column {2}' self.loads(data)
r' \(char {3} - {4}\)'.format( err = cm.exception
re.escape(msg), idx + 1, len(data) + 1, idx, len(data)), self.assertEqual(err.msg, msg)
self.loads, data) self.assertEqual(err.pos, idx)
self.assertEqual(err.lineno, 1)
self.assertEqual(err.colno, idx + 1)
self.assertEqual(str(err),
'%s: line 1 column %d (char %d)' %
(msg, idx + 1, idx))
def test_linecol(self): def test_linecol(self):
test_cases = [ test_cases = [
@ -185,10 +202,16 @@ class TestFail:
('\n \n\n !', 4, 6, 10), ('\n \n\n !', 4, 6, 10),
] ]
for data, line, col, idx in test_cases: for data, line, col, idx in test_cases:
self.assertRaisesRegex(ValueError, with self.assertRaises(self.JSONDecodeError) as cm:
r'^Expecting value: line {0} column {1}' self.loads(data)
r' \(char {2}\)$'.format(line, col, idx), err = cm.exception
self.loads, data) self.assertEqual(err.msg, 'Expecting value')
self.assertEqual(err.pos, idx)
self.assertEqual(err.lineno, line)
self.assertEqual(err.colno, col)
self.assertEqual(str(err),
'Expecting value: line %s column %d (char %d)' %
(line, col, idx))
class TestPyFail(TestFail, PyTest): pass class TestPyFail(TestFail, PyTest): pass
class TestCFail(TestFail, CTest): pass class TestCFail(TestFail, CTest): pass

View File

@ -129,7 +129,7 @@ class TestScanstring:
'"\\ud834\\u0X20"', '"\\ud834\\u0X20"',
] ]
for s in bad_escapes: for s in bad_escapes:
with self.assertRaises(ValueError, msg=s): with self.assertRaises(self.JSONDecodeError, msg=s):
scanstring(s, 1, True) scanstring(s, 1, True)
def test_overflow(self): def test_overflow(self):

View File

@ -218,6 +218,8 @@ Core and Builtins
Library Library
------- -------
- Issue #19361: JSON decoder now raises JSONDecodeError instead of ValueError.
- Issue #18518: timeit now rejects statements which can't be compiled outside - Issue #18518: timeit now rejects statements which can't be compiled outside
a function or a loop (e.g. "return" or "break"). a function or a loop (e.g. "return" or "break").

View File

@ -312,23 +312,22 @@ escape_unicode(PyObject *pystr)
static void static void
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
{ {
/* Use the Python function json.decoder.errmsg to raise a nice /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
looking ValueError exception */ static PyObject *JSONDecodeError = NULL;
static PyObject *errmsg_fn = NULL; PyObject *exc;
PyObject *pymsg; if (JSONDecodeError == NULL) {
if (errmsg_fn == NULL) {
PyObject *decoder = PyImport_ImportModule("json.decoder"); PyObject *decoder = PyImport_ImportModule("json.decoder");
if (decoder == NULL) if (decoder == NULL)
return; return;
errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Py_DECREF(decoder); Py_DECREF(decoder);
if (errmsg_fn == NULL) if (JSONDecodeError == NULL)
return; return;
} }
pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end); exc = PyObject_CallFunction(JSONDecodeError, "(zOn)", msg, s, end);
if (pymsg) { if (exc) {
PyErr_SetObject(PyExc_ValueError, pymsg); PyErr_SetObject(JSONDecodeError, exc);
Py_DECREF(pymsg); Py_DECREF(exc);
} }
} }