From 3f95292be69ac09ed173e4241d220d30b1b059ff Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 27 Jan 2015 22:18:34 +0200 Subject: [PATCH] Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis and fix by Guido Vranken. --- Lib/test/test_unicode.py | 141 +++++++++++++++++++++++++++++++++++---- Misc/NEWS | 12 ++++ Objects/unicodeobject.c | 37 +++++----- 3 files changed, 159 insertions(+), 31 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 47af8b938f9..21005eb7a6f 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1661,7 +1661,10 @@ class UnicodeTest(string_tests.CommonTest, # Test PyUnicode_FromFormat() def test_from_format(self): support.import_module('ctypes') - from ctypes import pythonapi, py_object, c_int + from ctypes import ( + pythonapi, py_object, sizeof, + c_int, c_long, c_longlong, c_ssize_t, + c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p) if sys.maxunicode == 65535: name = "PyUnicodeUCS2_FromFormat" else: @@ -1675,9 +1678,13 @@ class UnicodeTest(string_tests.CommonTest, for arg in args) return _PyUnicode_FromFormat(format, *cargs) + def check_format(expected, format, *args): + text = PyUnicode_FromFormat(format, *args) + self.assertEqual(expected, text) + # ascii format, non-ascii argument - text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9') - self.assertEqual(text, 'ascii\x7f=unicode\xe9') + check_format('ascii\x7f=unicode\xe9', + b'ascii\x7f=%U', 'unicode\xe9') # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV() # raises an error @@ -1686,25 +1693,131 @@ class UnicodeTest(string_tests.CommonTest, 'string, got a non-ASCII byte: 0xe9$', PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii') - self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd') - self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff') + # test "%c" + check_format('\uabcd', + b'%c', c_int(0xabcd)) + check_format('\U0010ffff', + b'%c', c_int(0x10ffff)) + with self.assertRaises(OverflowError): + PyUnicode_FromFormat(b'%c', c_int(0x110000)) + # Issue #18183 + check_format('\U00010000\U00100000', + b'%c%c', c_int(0x10000), c_int(0x100000)) - # other tests - text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff') - self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'") + # test "%" + check_format('%', + b'%') + check_format('%', + b'%%') + check_format('%s', + b'%%s') + check_format('[%]', + b'[%%]') + check_format('%abc', + b'%%%s', b'abc') - text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz') - self.assertEqual(text, 'repr=abc') + # test %S + check_format("repr=\u20acABC", + b'repr=%S', '\u20acABC') + + # test %R + check_format("repr='\u20acABC'", + b'repr=%R', '\u20acABC') + + # test integer formats (%i, %d, %u) + check_format('010', + b'%03i', c_int(10)) + check_format('0010', + b'%0.4i', c_int(10)) + check_format('-123', + b'%i', c_int(-123)) + + check_format('-123', + b'%d', c_int(-123)) + check_format('-123', + b'%ld', c_long(-123)) + check_format('-123', + b'%lld', c_longlong(-123)) + check_format('-123', + b'%zd', c_ssize_t(-123)) + + check_format('123', + b'%u', c_uint(123)) + check_format('123', + b'%lu', c_ulong(123)) + check_format('123', + b'%llu', c_ulonglong(123)) + check_format('123', + b'%zu', c_size_t(123)) + + # test long output + min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1)) + max_longlong = -min_longlong - 1 + check_format(str(min_longlong), + b'%lld', c_longlong(min_longlong)) + check_format(str(max_longlong), + b'%lld', c_longlong(max_longlong)) + max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1 + check_format(str(max_ulonglong), + b'%llu', c_ulonglong(max_ulonglong)) + PyUnicode_FromFormat(b'%p', c_void_p(-1)) + + # test padding (width and/or precision) + check_format('123'.rjust(10, '0'), + b'%010i', c_int(123)) + check_format('123'.rjust(100), + b'%100i', c_int(123)) + check_format('123'.rjust(100, '0'), + b'%.100i', c_int(123)) + check_format('123'.rjust(80, '0').rjust(100), + b'%100.80i', c_int(123)) + + check_format('123'.rjust(10, '0'), + b'%010u', c_uint(123)) + check_format('123'.rjust(100), + b'%100u', c_uint(123)) + check_format('123'.rjust(100, '0'), + b'%.100u', c_uint(123)) + check_format('123'.rjust(80, '0').rjust(100), + b'%100.80u', c_uint(123)) + + check_format('123'.rjust(10, '0'), + b'%010x', c_int(0x123)) + check_format('123'.rjust(100), + b'%100x', c_int(0x123)) + check_format('123'.rjust(100, '0'), + b'%.100x', c_int(0x123)) + check_format('123'.rjust(80, '0').rjust(100), + b'%100.80x', c_int(0x123)) + + # test %A + check_format(r"%A:'abc\xe9\uabcd\U0010ffff'", + b'%%A:%A', 'abc\xe9\uabcd\U0010ffff') + + # test %V + check_format('repr=abc', + b'repr=%V', 'abc', b'xyz') # Test string decode from parameter of %s using utf-8. # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of # '\u4eba\u6c11' - text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91') - self.assertEqual(text, 'repr=\u4eba\u6c11') + check_format('repr=\u4eba\u6c11', + b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91') #Test replace error handler. - text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff') - self.assertEqual(text, 'repr=abc\ufffd') + check_format('repr=abc\ufffd', + b'repr=%V', None, b'abc\xff') + + # not supported: copy the raw format string. these tests are just here + # to check for crashs and should not be considered as specifications + check_format('%s', + b'%1%s', b'abc') + check_format('%1abc', + b'%1abc') + check_format('%+i', + b'%+i', c_int(10)) + check_format('%s', + b'%.%s', b'abc') # Test PyUnicode_AsWideChar() def test_aswidechar(self): diff --git a/Misc/NEWS b/Misc/NEWS index e84186259d9..b54f2670832 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -2,6 +2,18 @@ Python News +++++++++++ +What's New in Python 3.2.7? +============================ + +*Release date: XXXX-XX-XX* + +Core and Builtins +----------------- + +- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis + and fix by Guido Vranken. + + What's New in Python 3.2.6? =========================== diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b7988509fbe..1d1d5313c34 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -759,15 +759,10 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) * result in an array) */ for (f = format; *f; f++) { if (*f == '%') { - if (*(f+1)=='%') - continue; - if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V') - ++callcount; - while (Py_ISDIGIT((unsigned)*f)) - width = (width*10) + *f++ - '0'; - while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f)) - ; - if (*f == 's') + f++; + while (*f && *f != '%' && !Py_ISALPHA((unsigned)*f)) + f++; + if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V') ++callcount; } else if (128 <= (unsigned char)*f) { @@ -794,12 +789,16 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) #ifdef HAVE_LONG_LONG int longlongflag = 0; #endif - const char* p = f; + const char* p = f++; width = 0; while (Py_ISDIGIT((unsigned)*f)) width = (width*10) + *f++ - '0'; - while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f)) - ; + precision = 0; + if (*f == '.') { + f++; + while (Py_ISDIGIT((unsigned)*f)) + precision = (precision*10) + *f++ - '0'; + } /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since * they don't affect the amount of space we reserve. @@ -823,16 +822,18 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) switch (*f) { case 'c': { -#ifndef Py_UNICODE_WIDE int ordinal = va_arg(count, int); + if (ordinal < 0 || ordinal > 0x10ffff) { + PyErr_SetString(PyExc_OverflowError, + "%c arg not in range(0x110000)"); + goto fail; + } +#ifndef Py_UNICODE_WIDE if (ordinal > 0xffff) n += 2; else - n++; -#else - (void)va_arg(count, int); - n++; #endif + n++; break; } case '%': @@ -840,6 +841,8 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) break; case 'd': case 'u': case 'i': case 'x': (void) va_arg(count, int); + if (width < precision) + width = precision; #ifdef HAVE_LONG_LONG if (longlongflag) { if (width < MAX_LONG_LONG_CHARS)