From 4dbc30500218204eace01fa4d429f3087df5376f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 27 Jan 2015 22:18:46 +0200 Subject: [PATCH 1/2] Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis and fix by Guido Vranken. --- Lib/test/test_unicode.py | 157 +++++++++++++++++++++++++++++---------- Misc/NEWS | 12 +++ Objects/unicodeobject.c | 2 + 3 files changed, 131 insertions(+), 40 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 6ab9ed0c3ed..f359082267d 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2016,9 +2016,10 @@ class UnicodeTest(string_tests.CommonTest, # Test PyUnicode_FromFormat() def test_from_format(self): support.import_module('ctypes') - from ctypes import (pythonapi, py_object, + from ctypes import ( + pythonapi, py_object, sizeof, c_int, c_long, c_longlong, c_ssize_t, - c_uint, c_ulong, c_ulonglong, c_size_t) + c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p) name = "PyUnicode_FromFormat" _PyUnicode_FromFormat = getattr(pythonapi, name) _PyUnicode_FromFormat.restype = py_object @@ -2029,9 +2030,13 @@ class UnicodeTest(string_tests.CommonTest, for arg in args) return _PyUnicode_FromFormat(format, *cargs) + def check_format(expected, format, *args): + text = PyUnicode_FromFormat(format, *args) + self.assertEqual(expected, text) + # ascii format, non-ascii argument - text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9') - self.assertEqual(text, 'ascii\x7f=unicode\xe9') + check_format('ascii\x7f=unicode\xe9', + b'ascii\x7f=%U', 'unicode\xe9') # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV() # raises an error @@ -2041,64 +2046,136 @@ class UnicodeTest(string_tests.CommonTest, PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii') # test "%c" - self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd') - self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff') + check_format('\uabcd', + b'%c', c_int(0xabcd)) + check_format('\U0010ffff', + b'%c', c_int(0x10ffff)) with self.assertRaises(OverflowError): PyUnicode_FromFormat(b'%c', c_int(0x110000)) # Issue #18183 - self.assertEqual( - PyUnicode_FromFormat(b'%c%c', c_int(0x10000), c_int(0x100000)), - '\U00010000\U00100000') + check_format('\U00010000\U00100000', + b'%c%c', c_int(0x10000), c_int(0x100000)) # test "%" - self.assertEqual(PyUnicode_FromFormat(b'%'), '%') - self.assertEqual(PyUnicode_FromFormat(b'%%'), '%') - self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s') - self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]') - self.assertEqual(PyUnicode_FromFormat(b'%%%s', b'abc'), '%abc') + check_format('%', + b'%') + check_format('%', + b'%%') + check_format('%s', + b'%%s') + check_format('[%]', + b'[%%]') + check_format('%abc', + b'%%%s', b'abc') + + # test %S + check_format("repr=\u20acABC", + b'repr=%S', '\u20acABC') + + # test %R + check_format("repr='\u20acABC'", + b'repr=%R', '\u20acABC') # test integer formats (%i, %d, %u) - self.assertEqual(PyUnicode_FromFormat(b'%03i', c_int(10)), '010') - self.assertEqual(PyUnicode_FromFormat(b'%0.4i', c_int(10)), '0010') - self.assertEqual(PyUnicode_FromFormat(b'%i', c_int(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%li', c_long(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%lli', c_longlong(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%zi', c_ssize_t(-123)), '-123') + check_format('010', + b'%03i', c_int(10)) + check_format('0010', + b'%0.4i', c_int(10)) + check_format('-123', + b'%i', c_int(-123)) + check_format('-123', + b'%li', c_long(-123)) + check_format('-123', + b'%lli', c_longlong(-123)) + check_format('-123', + b'%zi', c_ssize_t(-123)) - self.assertEqual(PyUnicode_FromFormat(b'%d', c_int(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%ld', c_long(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%lld', c_longlong(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%zd', c_ssize_t(-123)), '-123') + check_format('-123', + b'%d', c_int(-123)) + check_format('-123', + b'%ld', c_long(-123)) + check_format('-123', + b'%lld', c_longlong(-123)) + check_format('-123', + b'%zd', c_ssize_t(-123)) - self.assertEqual(PyUnicode_FromFormat(b'%u', c_uint(123)), '123') - self.assertEqual(PyUnicode_FromFormat(b'%lu', c_ulong(123)), '123') - self.assertEqual(PyUnicode_FromFormat(b'%llu', c_ulonglong(123)), '123') - self.assertEqual(PyUnicode_FromFormat(b'%zu', c_size_t(123)), '123') + check_format('123', + b'%u', c_uint(123)) + check_format('123', + b'%lu', c_ulong(123)) + check_format('123', + b'%llu', c_ulonglong(123)) + check_format('123', + b'%zu', c_size_t(123)) + + # test long output + min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1)) + max_longlong = -min_longlong - 1 + check_format(str(min_longlong), + b'%lld', c_longlong(min_longlong)) + check_format(str(max_longlong), + b'%lld', c_longlong(max_longlong)) + max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1 + check_format(str(max_ulonglong), + b'%llu', c_ulonglong(max_ulonglong)) + PyUnicode_FromFormat(b'%p', c_void_p(-1)) + + # test padding (width and/or precision) + check_format('123'.rjust(10, '0'), + b'%010i', c_int(123)) + check_format('123'.rjust(100), + b'%100i', c_int(123)) + check_format('123'.rjust(300, '0'), + b'%.300i', c_int(123)) + check_format('123'.rjust(80, '0').rjust(100), + b'%100.80i', c_int(123)) + + check_format('123'.rjust(10, '0'), + b'%010u', c_uint(123)) + check_format('123'.rjust(100), + b'%100u', c_uint(123)) + check_format('123'.rjust(300, '0'), + b'%.300u', c_uint(123)) + check_format('123'.rjust(80, '0').rjust(100), + b'%100.80u', c_uint(123)) + + check_format('123'.rjust(10, '0'), + b'%010x', c_int(0x123)) + check_format('123'.rjust(100), + b'%100x', c_int(0x123)) + check_format('123'.rjust(300, '0'), + b'%.300x', c_int(0x123)) + check_format('123'.rjust(80, '0').rjust(100), + b'%100.80x', c_int(0x123)) # test %A - text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff') - self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'") + check_format(r"%A:'abc\xe9\uabcd\U0010ffff'", + b'%%A:%A', 'abc\xe9\uabcd\U0010ffff') # test %V - text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz') - self.assertEqual(text, 'repr=abc') + check_format('repr=abc', + b'repr=%V', 'abc', b'xyz') # Test string decode from parameter of %s using utf-8. # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of # '\u4eba\u6c11' - text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91') - self.assertEqual(text, 'repr=\u4eba\u6c11') + check_format('repr=\u4eba\u6c11', + b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91') #Test replace error handler. - text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff') - self.assertEqual(text, 'repr=abc\ufffd') + check_format('repr=abc\ufffd', + b'repr=%V', None, b'abc\xff') # not supported: copy the raw format string. these tests are just here # to check for crashs and should not be considered as specifications - self.assertEqual(PyUnicode_FromFormat(b'%1%s', b'abc'), '%s') - self.assertEqual(PyUnicode_FromFormat(b'%1abc'), '%1abc') - self.assertEqual(PyUnicode_FromFormat(b'%+i', c_int(10)), '%+i') - self.assertEqual(PyUnicode_FromFormat(b'%.%s', b'abc'), '%.%s') + check_format('%s', + b'%1%s', b'abc') + check_format('%1abc', + b'%1abc') + check_format('%+i', + b'%+i', c_int(10)) + check_format('%.%s', + b'%.%s', b'abc') # Test PyUnicode_AsWideChar() @support.cpython_only diff --git a/Misc/NEWS b/Misc/NEWS index b853c699279..b203462fea9 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -2,6 +2,18 @@ Python News +++++++++++ +What's New in Python 3.3.7? +============================ + +*Release date: XXXX-XX-XX* + +Core and Builtins +----------------- + +- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis + and fix by Guido Vranken. + + What's New in Python 3.3.6? =========================== diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e896aba4ccd..156316b1543 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2335,6 +2335,8 @@ parse_format_flags(const char *f, f--; } } + if (width < precision) + width = precision; if (*f == '\0') { /* bogus format "%.1" => go backward, f points to "1" */ f--; From e3bfe19358f9d6747676fcdb9af415972d276671 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Sun, 1 Feb 2015 17:53:53 -0500 Subject: [PATCH 2/2] fix possible overflow in encode_basestring_ascii (closes #23369) --- .../test_json/test_encode_basestring_ascii.py | 9 ++++++++- Misc/NEWS | 6 ++++++ Modules/_json.c | 15 +++++++++++---- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_json/test_encode_basestring_ascii.py b/Lib/test/test_json/test_encode_basestring_ascii.py index 480afd686e4..3c014d327c1 100644 --- a/Lib/test/test_json/test_encode_basestring_ascii.py +++ b/Lib/test/test_json/test_encode_basestring_ascii.py @@ -1,5 +1,6 @@ from collections import OrderedDict from test.test_json import PyTest, CTest +from test.support import bigaddrspacetest CASES = [ @@ -41,4 +42,10 @@ class TestEncodeBasestringAscii: class TestPyEncodeBasestringAscii(TestEncodeBasestringAscii, PyTest): pass -class TestCEncodeBasestringAscii(TestEncodeBasestringAscii, CTest): pass +class TestCEncodeBasestringAscii(TestEncodeBasestringAscii, CTest): + @bigaddrspacetest + def test_overflow(self): + s = "\uffff"*((2**32)//6 + 1) + with self.assertRaises(OverflowError): + self.json.encoder.encode_basestring_ascii(s) + diff --git a/Misc/NEWS b/Misc/NEWS index b203462fea9..b69955aa61a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -13,6 +13,12 @@ Core and Builtins - Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis and fix by Guido Vranken. +Library +------- + +- Issue #23369: Fixed possible integer overflow in + _json.encode_basestring_ascii. + What's New in Python 3.3.6? =========================== diff --git a/Modules/_json.c b/Modules/_json.c index 4bc585dc2ab..397037e56b9 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -216,17 +216,24 @@ ascii_escape_unicode(PyObject *pystr) /* Compute the output size */ for (i = 0, output_size = 2; i < input_chars; i++) { Py_UCS4 c = PyUnicode_READ(kind, input, i); - if (S_CHAR(c)) - output_size++; + Py_ssize_t d; + if (S_CHAR(c)) { + d = 1; + } else { switch(c) { case '\\': case '"': case '\b': case '\f': case '\n': case '\r': case '\t': - output_size += 2; break; + d = 2; break; default: - output_size += c >= 0x10000 ? 12 : 6; + d = c >= 0x10000 ? 12 : 6; } } + if (output_size > PY_SSIZE_T_MAX - d) { + PyErr_SetString(PyExc_OverflowError, "string is too long to escape"); + return NULL; + } + output_size += d; } rval = PyUnicode_New(output_size, 127);