diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index d69a45d6793..ff4bfcdcc40 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -50,17 +50,18 @@ _CONSTANTS = { } +HEXDIGITS = re.compile(r'[0-9A-Fa-f]{4}', FLAGS) STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', } -def _decode_uXXXX(s, pos): - esc = s[pos + 1:pos + 5] - if len(esc) == 4 and esc[1] not in 'xX': +def _decode_uXXXX(s, pos, _m=HEXDIGITS.match): + esc = _m(s, pos + 1) + if esc is not None: try: - return int(esc, 16) + return int(esc.group(), 16) except ValueError: pass msg = "Invalid \\uXXXX escape" diff --git a/Lib/test/test_json/test_scanstring.py b/Lib/test/test_json/test_scanstring.py index 2d3ee8a8bf0..cca556a3b95 100644 --- a/Lib/test/test_json/test_scanstring.py +++ b/Lib/test/test_json/test_scanstring.py @@ -116,6 +116,11 @@ class TestScanstring: '"\\u012z"', '"\\u0x12"', '"\\u0X12"', + '"\\u{0}"'.format("\uff10" * 4), + '"\\u 123"', + '"\\u-123"', + '"\\u+123"', + '"\\u1_23"', '"\\ud834\\"', '"\\ud834\\u"', '"\\ud834\\ud"', @@ -127,6 +132,11 @@ class TestScanstring: '"\\ud834\\udd2z"', '"\\ud834\\u0x20"', '"\\ud834\\u0X20"', + '"\\ud834\\u{0}"'.format("\uff10" * 4), + '"\\ud834\\u 123"', + '"\\ud834\\u-123"', + '"\\ud834\\u+123"', + '"\\ud834\\u1_23"', ] for s in bad_escapes: with self.assertRaises(self.JSONDecodeError, msg=s): diff --git a/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst b/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst new file mode 100644 index 00000000000..74d76c7bdda --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-18-08-58-10.gh-issue-125660.sDdDqO.rst @@ -0,0 +1 @@ +Reject invalid unicode escapes for Python implementation of :func:`json.loads`.