Issue #19361: JSON decoder now raises JSONDecodeError instead of ValueError.

This commit is contained in:
Serhiy Storchaka 2015-01-26 13:16:30 +02:00
parent 4e5d9eac2c
commit 47efb4a5dc
10 changed files with 144 additions and 83 deletions

View File

@ -250,7 +250,7 @@ Basic Usage
will be passed to the constructor of the class.
If the data being deserialized is not a valid JSON document, a
:exc:`ValueError` will be raised.
:exc:`JSONDecodeError` will be raised.
.. function:: loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
@ -261,7 +261,7 @@ Basic Usage
*encoding* which is ignored and deprecated.
If the data being deserialized is not a valid JSON document, a
:exc:`ValueError` will be raised.
:exc:`JSONDecodeError` will be raised.
Encoders and Decoders
---------------------
@ -334,13 +334,16 @@ Encoders and Decoders
``'\n'``, ``'\r'`` and ``'\0'``.
If the data being deserialized is not a valid JSON document, a
:exc:`ValueError` will be raised.
:exc:`JSONDecodeError` will be raised.
.. method:: decode(s)
Return the Python representation of *s* (a :class:`str` instance
containing a JSON document)
:exc:`JSONDecodeError` will be raised if the given JSON document is not
valid.
.. method:: raw_decode(s)
Decode a JSON document from *s* (a :class:`str` beginning with a
@ -469,6 +472,36 @@ Encoders and Decoders
mysocket.write(chunk)
Exceptions
----------
.. exception:: JSONDecodeError(msg, doc, pos, end=None)
Subclass of :exc:`ValueError` with the following additional attributes:
.. attribute:: msg
The unformatted error message.
.. attribute:: doc
The JSON document being parsed.
.. attribute:: pos
The start index of *doc* where parsing failed.
.. attribute:: lineno
The line corresponding to *pos*.
.. attribute:: colno
The column corresponding to *pos*.
.. versionadded:: 3.5
Standard Compliance and Interoperability
----------------------------------------

View File

@ -230,6 +230,9 @@ json
of dictionaries alphabetically by key. (Contributed by Berker Peksag in
:issue:`21650`.)
* JSON decoder now raises :exc:`json.JSONDecodeError` instead of
:exc:`ValueError`. (Contributed by Serhiy Storchaka in :issue:`19361`.)
os
--

View File

@ -98,12 +98,12 @@ Using json.tool from the shell to validate and pretty-print::
__version__ = '2.0.9'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONEncoder',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
]
__author__ = 'Bob Ippolito <bob@redivi.com>'
from .decoder import JSONDecoder
from .decoder import JSONDecoder, JSONDecodeError
from .encoder import JSONEncoder
_default_encoder = JSONEncoder(
@ -311,7 +311,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
raise TypeError('the JSON object must be str, not {!r}'.format(
s.__class__.__name__))
if s.startswith(u'\ufeff'):
raise ValueError("Unexpected UTF-8 BOM (decode using utf-8-sig)")
raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",
s, 0)
if (cls is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None and not kw):

View File

@ -8,7 +8,7 @@ try:
except ImportError:
c_scanstring = None
__all__ = ['JSONDecoder']
__all__ = ['JSONDecoder', 'JSONDecodeError']
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
@ -17,32 +17,30 @@ PosInf = float('inf')
NegInf = float('-inf')
def linecol(doc, pos):
if isinstance(doc, bytes):
newline = b'\n'
else:
newline = '\n'
lineno = doc.count(newline, 0, pos) + 1
if lineno == 1:
colno = pos + 1
else:
colno = pos - doc.rindex(newline, 0, pos)
return lineno, colno
class JSONDecodeError(ValueError):
"""Subclass of ValueError with the following additional properties:
msg: The unformatted error message
doc: The JSON document being parsed
pos: The start index of doc where parsing failed
lineno: The line corresponding to pos
colno: The column corresponding to pos
def errmsg(msg, doc, pos, end=None):
# Note that this function is called from _json
lineno, colno = linecol(doc, pos)
if end is None:
fmt = '{0}: line {1} column {2} (char {3})'
return fmt.format(msg, lineno, colno, pos)
#fmt = '%s: line %d column %d (char %d)'
#return fmt % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
#fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
#return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
"""
# Note that this exception is used from _json
def __init__(self, msg, doc, pos):
lineno = doc.count('\n', 0, pos) + 1
colno = pos - doc.rfind('\n', 0, pos)
errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
ValueError.__init__(self, errmsg)
self.msg = msg
self.doc = doc
self.pos = pos
self.lineno = lineno
self.colno = colno
def __reduce__(self):
return self.__class__, (self.msg, self.doc, self.pos)
_CONSTANTS = {
@ -66,7 +64,7 @@ def _decode_uXXXX(s, pos):
except ValueError:
pass
msg = "Invalid \\uXXXX escape"
raise ValueError(errmsg(msg, s, pos))
raise JSONDecodeError(msg, s, pos)
def py_scanstring(s, end, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match):
@ -84,8 +82,7 @@ def py_scanstring(s, end, strict=True,
while 1:
chunk = _m(s, end)
if chunk is None:
raise ValueError(
errmsg("Unterminated string starting at", s, begin))
raise JSONDecodeError("Unterminated string starting at", s, begin)
end = chunk.end()
content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters
@ -99,22 +96,21 @@ def py_scanstring(s, end, strict=True,
if strict:
#msg = "Invalid control character %r at" % (terminator,)
msg = "Invalid control character {0!r} at".format(terminator)
raise ValueError(errmsg(msg, s, end))
raise JSONDecodeError(msg, s, end)
else:
_append(terminator)
continue
try:
esc = s[end]
except IndexError:
raise ValueError(
errmsg("Unterminated string starting at", s, begin))
raise JSONDecodeError("Unterminated string starting at", s, begin)
# If not a unicode escape sequence, must be in the lookup table
if esc != 'u':
try:
char = _b[esc]
except KeyError:
msg = "Invalid \\escape: {0!r}".format(esc)
raise ValueError(errmsg(msg, s, end))
raise JSONDecodeError(msg, s, end)
end += 1
else:
uni = _decode_uXXXX(s, end)
@ -163,8 +159,8 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
pairs = object_hook(pairs)
return pairs, end + 1
elif nextchar != '"':
raise ValueError(errmsg(
"Expecting property name enclosed in double quotes", s, end))
raise JSONDecodeError(
"Expecting property name enclosed in double quotes", s, end)
end += 1
while True:
key, end = scanstring(s, end, strict)
@ -174,7 +170,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
if s[end:end + 1] != ':':
end = _w(s, end).end()
if s[end:end + 1] != ':':
raise ValueError(errmsg("Expecting ':' delimiter", s, end))
raise JSONDecodeError("Expecting ':' delimiter", s, end)
end += 1
try:
@ -188,7 +184,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
try:
value, end = scan_once(s, end)
except StopIteration as err:
raise ValueError(errmsg("Expecting value", s, err.value)) from None
raise JSONDecodeError("Expecting value", s, err.value) from None
pairs_append((key, value))
try:
nextchar = s[end]
@ -202,13 +198,13 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
if nextchar == '}':
break
elif nextchar != ',':
raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
end = _w(s, end).end()
nextchar = s[end:end + 1]
end += 1
if nextchar != '"':
raise ValueError(errmsg(
"Expecting property name enclosed in double quotes", s, end - 1))
raise JSONDecodeError(
"Expecting property name enclosed in double quotes", s, end - 1)
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
return result, end
@ -232,7 +228,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
try:
value, end = scan_once(s, end)
except StopIteration as err:
raise ValueError(errmsg("Expecting value", s, err.value)) from None
raise JSONDecodeError("Expecting value", s, err.value) from None
_append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
@ -242,7 +238,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
if nextchar == ']':
break
elif nextchar != ',':
raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
try:
if s[end] in _ws:
end += 1
@ -343,7 +339,7 @@ class JSONDecoder(object):
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
end = _w(s, end).end()
if end != len(s):
raise ValueError(errmsg("Extra data", s, end, len(s)))
raise JSONDecodeError("Extra data", s, end)
return obj
def raw_decode(self, s, idx=0):
@ -358,5 +354,5 @@ class JSONDecoder(object):
try:
obj, end = self.scan_once(s, idx)
except StopIteration as err:
raise ValueError(errmsg("Expecting value", s, err.value)) from None
raise JSONDecodeError("Expecting value", s, err.value) from None
return obj, end

View File

@ -9,12 +9,15 @@ from test import support
# import json with and without accelerations
cjson = support.import_fresh_module('json', fresh=['_json'])
pyjson = support.import_fresh_module('json', blocked=['_json'])
# JSONDecodeError is cached inside the _json module
cjson.JSONDecodeError = cjson.decoder.JSONDecodeError = json.JSONDecodeError
# create two base classes that will be used by the other tests
class PyTest(unittest.TestCase):
json = pyjson
loads = staticmethod(pyjson.loads)
dumps = staticmethod(pyjson.dumps)
JSONDecodeError = staticmethod(pyjson.JSONDecodeError)
@unittest.skipUnless(cjson, 'requires _json')
class CTest(unittest.TestCase):
@ -22,6 +25,7 @@ class CTest(unittest.TestCase):
json = cjson
loads = staticmethod(cjson.loads)
dumps = staticmethod(cjson.dumps)
JSONDecodeError = staticmethod(cjson.JSONDecodeError)
# test PyTest and CTest checking if the functions come from the right module
class TestPyTest(PyTest):

View File

@ -63,12 +63,12 @@ class TestDecode:
def test_extra_data(self):
s = '[1, 2, 3]5'
msg = 'Extra data'
self.assertRaisesRegex(ValueError, msg, self.loads, s)
self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
def test_invalid_escape(self):
s = '["abc\\y"]'
msg = 'escape'
self.assertRaisesRegex(ValueError, msg, self.loads, s)
self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
def test_invalid_input_type(self):
msg = 'the JSON object must be str'
@ -80,10 +80,10 @@ class TestDecode:
def test_string_with_utf8_bom(self):
# see #18958
bom_json = "[1,2,3]".encode('utf-8-sig').decode('utf-8')
with self.assertRaises(ValueError) as cm:
with self.assertRaises(self.JSONDecodeError) as cm:
self.loads(bom_json)
self.assertIn('BOM', str(cm.exception))
with self.assertRaises(ValueError) as cm:
with self.assertRaises(self.JSONDecodeError) as cm:
self.json.load(StringIO(bom_json))
self.assertIn('BOM', str(cm.exception))
# make sure that the BOM is not detected in the middle of a string

View File

@ -87,7 +87,7 @@ class TestFail:
continue
try:
self.loads(doc)
except ValueError:
except self.JSONDecodeError:
pass
else:
self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc))
@ -124,10 +124,16 @@ class TestFail:
('"spam', 'Unterminated string starting at', 0),
]
for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError,
r'^{0}: line 1 column {1} \(char {2}\)'.format(
re.escape(msg), idx + 1, idx),
self.loads, data)
with self.assertRaises(self.JSONDecodeError) as cm:
self.loads(data)
err = cm.exception
self.assertEqual(err.msg, msg)
self.assertEqual(err.pos, idx)
self.assertEqual(err.lineno, 1)
self.assertEqual(err.colno, idx + 1)
self.assertEqual(str(err),
'%s: line 1 column %d (char %d)' %
(msg, idx + 1, idx))
def test_unexpected_data(self):
test_cases = [
@ -154,10 +160,16 @@ class TestFail:
('{"spam":42,}', 'Expecting property name enclosed in double quotes', 11),
]
for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError,
r'^{0}: line 1 column {1} \(char {2}\)'.format(
re.escape(msg), idx + 1, idx),
self.loads, data)
with self.assertRaises(self.JSONDecodeError) as cm:
self.loads(data)
err = cm.exception
self.assertEqual(err.msg, msg)
self.assertEqual(err.pos, idx)
self.assertEqual(err.lineno, 1)
self.assertEqual(err.colno, idx + 1)
self.assertEqual(str(err),
'%s: line 1 column %d (char %d)' %
(msg, idx + 1, idx))
def test_extra_data(self):
test_cases = [
@ -171,11 +183,16 @@ class TestFail:
('"spam",42', 'Extra data', 6),
]
for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError,
r'^{0}: line 1 column {1} - line 1 column {2}'
r' \(char {3} - {4}\)'.format(
re.escape(msg), idx + 1, len(data) + 1, idx, len(data)),
self.loads, data)
with self.assertRaises(self.JSONDecodeError) as cm:
self.loads(data)
err = cm.exception
self.assertEqual(err.msg, msg)
self.assertEqual(err.pos, idx)
self.assertEqual(err.lineno, 1)
self.assertEqual(err.colno, idx + 1)
self.assertEqual(str(err),
'%s: line 1 column %d (char %d)' %
(msg, idx + 1, idx))
def test_linecol(self):
test_cases = [
@ -185,10 +202,16 @@ class TestFail:
('\n \n\n !', 4, 6, 10),
]
for data, line, col, idx in test_cases:
self.assertRaisesRegex(ValueError,
r'^Expecting value: line {0} column {1}'
r' \(char {2}\)$'.format(line, col, idx),
self.loads, data)
with self.assertRaises(self.JSONDecodeError) as cm:
self.loads(data)
err = cm.exception
self.assertEqual(err.msg, 'Expecting value')
self.assertEqual(err.pos, idx)
self.assertEqual(err.lineno, line)
self.assertEqual(err.colno, col)
self.assertEqual(str(err),
'Expecting value: line %s column %d (char %d)' %
(line, col, idx))
class TestPyFail(TestFail, PyTest): pass
class TestCFail(TestFail, CTest): pass

View File

@ -129,7 +129,7 @@ class TestScanstring:
'"\\ud834\\u0X20"',
]
for s in bad_escapes:
with self.assertRaises(ValueError, msg=s):
with self.assertRaises(self.JSONDecodeError, msg=s):
scanstring(s, 1, True)
def test_overflow(self):

View File

@ -218,6 +218,8 @@ Core and Builtins
Library
-------
- Issue #19361: JSON decoder now raises JSONDecodeError instead of ValueError.
- Issue #18518: timeit now rejects statements which can't be compiled outside
a function or a loop (e.g. "return" or "break").

View File

@ -312,23 +312,22 @@ escape_unicode(PyObject *pystr)
static void
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
{
/* Use the Python function json.decoder.errmsg to raise a nice
looking ValueError exception */
static PyObject *errmsg_fn = NULL;
PyObject *pymsg;
if (errmsg_fn == NULL) {
/* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
static PyObject *JSONDecodeError = NULL;
PyObject *exc;
if (JSONDecodeError == NULL) {
PyObject *decoder = PyImport_ImportModule("json.decoder");
if (decoder == NULL)
return;
errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Py_DECREF(decoder);
if (errmsg_fn == NULL)
if (JSONDecodeError == NULL)
return;
}
pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end);
if (pymsg) {
PyErr_SetObject(PyExc_ValueError, pymsg);
Py_DECREF(pymsg);
exc = PyObject_CallFunction(JSONDecodeError, "(zOn)", msg, s, end);
if (exc) {
PyErr_SetObject(JSONDecodeError, exc);
Py_DECREF(exc);
}
}