diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index db821be031c..13bb3a2fbf6 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -542,6 +542,27 @@ if 1: check_limit("a", "[0]") check_limit("a", "*a") + def test_null_terminated(self): + # The source code is null-terminated internally, but bytes-like + # objects are accepted, which could be not terminated. + # Exception changed from TypeError to ValueError in 3.5 + with self.assertRaisesRegex(Exception, "cannot contain null"): + compile("123\x00", "", "eval") + with self.assertRaisesRegex(Exception, "cannot contain null"): + compile(memoryview(b"123\x00"), "", "eval") + code = compile(memoryview(b"123\x00")[1:-1], "", "eval") + self.assertEqual(eval(code), 23) + code = compile(memoryview(b"1234")[1:-1], "", "eval") + self.assertEqual(eval(code), 23) + code = compile(memoryview(b"$23$")[1:-1], "", "eval") + self.assertEqual(eval(code), 23) + + # Also test when eval() and exec() do the compilation step + self.assertEqual(eval(memoryview(b"1234")[1:-1]), 23) + namespace = dict() + exec(memoryview(b"ax = 123")[1:-1], namespace) + self.assertEqual(namespace['x'], 12) + class TestStackSize(unittest.TestCase): # These tests check that the computed stack size for a code object diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 4251090522e..723beb37623 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -31,7 +31,6 @@ class GeneralFloatCases(unittest.TestCase): self.assertEqual(float(3.14), 3.14) self.assertEqual(float(314), 314.0) self.assertEqual(float(" 3.14 "), 3.14) - self.assertEqual(float(b" 3.14 "), 3.14) self.assertRaises(ValueError, float, " 0x3.1 ") self.assertRaises(ValueError, float, " -0x3.p-1 ") self.assertRaises(ValueError, float, " +0x3.p-1 ") @@ -43,7 +42,6 @@ class GeneralFloatCases(unittest.TestCase): self.assertRaises(ValueError, float, "+.inf") self.assertRaises(ValueError, float, ".") self.assertRaises(ValueError, float, "-.") - self.assertRaises(ValueError, float, b"-") self.assertRaises(TypeError, float, {}) self.assertRaisesRegex(TypeError, "not 'dict'", float, {}) # Lone surrogate @@ -57,6 +55,42 @@ class GeneralFloatCases(unittest.TestCase): float(b'.' + b'1'*1000) float('.' + '1'*1000) + def test_non_numeric_input_types(self): + # Test possible non-numeric types for the argument x, including + # subclasses of the explicitly documented accepted types. + class CustomStr(str): pass + class CustomBytes(bytes): pass + class CustomByteArray(bytearray): pass + + factories = [ + bytes, + bytearray, + lambda b: CustomStr(b.decode()), + CustomBytes, + CustomByteArray, + memoryview, + ] + try: + from array import array + except ImportError: + pass + else: + factories.append(lambda b: array('B', b)) + + for f in factories: + x = f(b" 3.14 ") + with self.subTest(type(x)): + self.assertEqual(float(x), 3.14) + with self.assertRaisesRegex(ValueError, "could not convert"): + float(f(b'A' * 0x10)) + + def test_float_memoryview(self): + self.assertEqual(float(memoryview(b'12.3')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.3\x00')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.3 ')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.3A')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.34')[1:4]), 2.3) + def test_error_message(self): testlist = ('\xbd', '123\xbd', ' 123 456 ') for s in testlist: diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index cb57f15425c..3e4b4fc2fdc 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -276,16 +276,40 @@ class IntTestCases(unittest.TestCase): class CustomBytes(bytes): pass class CustomByteArray(bytearray): pass - values = [b'100', - bytearray(b'100'), - CustomStr('100'), - CustomBytes(b'100'), - CustomByteArray(b'100')] + factories = [ + bytes, + bytearray, + lambda b: CustomStr(b.decode()), + CustomBytes, + CustomByteArray, + memoryview, + ] + try: + from array import array + except ImportError: + pass + else: + factories.append(lambda b: array('B', b)) - for x in values: - msg = 'x has type %s' % type(x).__name__ - self.assertEqual(int(x), 100, msg=msg) - self.assertEqual(int(x, 2), 4, msg=msg) + for f in factories: + x = f(b'100') + with self.subTest(type(x)): + self.assertEqual(int(x), 100) + if isinstance(x, (str, bytes, bytearray)): + self.assertEqual(int(x, 2), 4) + else: + msg = "can't convert non-string" + with self.assertRaisesRegex(TypeError, msg): + int(x, 2) + with self.assertRaisesRegex(ValueError, 'invalid literal'): + int(f(b'A' * 0x10)) + + def test_int_memoryview(self): + self.assertEqual(int(memoryview(b'123')[1:3]), 23) + self.assertEqual(int(memoryview(b'123\x00')[1:3]), 23) + self.assertEqual(int(memoryview(b'123 ')[1:3]), 23) + self.assertEqual(int(memoryview(b'123A')[1:3]), 23) + self.assertEqual(int(memoryview(b'1234')[1:3]), 23) def test_string_float(self): self.assertRaises(ValueError, int, '1.2') diff --git a/Misc/NEWS b/Misc/NEWS index 68de9581553..1b308a12541 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ Release date: XXXX-XX-XX Core and Builtins ----------------- +- Issue #24802: Avoid buffer overreads when int(), float(), compile(), exec() + and eval() are passed bytes-like objects. These objects are not + necessarily terminated by a null byte, but the functions assumed they were. + - Issue #25555: Fix parser and AST: fill lineno and col_offset of "arg" node when compiling AST from Python objects. @@ -357,6 +361,10 @@ Release date: TBA Core and Builtins ----------------- +- Issue #24802: Avoid buffer overreads when int(), float(), compile(), exec() + and eval() are passed bytes-like objects. These objects are not + necessarily terminated by a null byte, but the functions assumed they were. + - Issue #24402: Fix input() to prompt to the redirected stdout when sys.stdout.fileno() fails. diff --git a/Objects/abstract.c b/Objects/abstract.c index 2c1c76e4bdd..3e1ff97547e 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1312,12 +1312,30 @@ PyNumber_Long(PyObject *o) /* The below check is done in PyLong_FromUnicode(). */ return PyLong_FromUnicodeObject(o, 10); - if (PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) == 0) { + if (PyBytes_Check(o)) /* need to do extra error checking that PyLong_FromString() * doesn't do. In particular int('9\x005') must raise an * exception, not truncate at the null. */ - PyObject *result = _PyLong_FromBytes(view.buf, view.len, 10); + return _PyLong_FromBytes(PyBytes_AS_STRING(o), + PyBytes_GET_SIZE(o), 10); + + if (PyByteArray_Check(o)) + return _PyLong_FromBytes(PyByteArray_AS_STRING(o), + PyByteArray_GET_SIZE(o), 10); + + if (PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) == 0) { + PyObject *result, *bytes; + + /* Copy to NUL-terminated buffer. */ + bytes = PyBytes_FromStringAndSize((const char *)view.buf, view.len); + if (bytes == NULL) { + PyBuffer_Release(&view); + return NULL; + } + result = _PyLong_FromBytes(PyBytes_AS_STRING(bytes), + PyBytes_GET_SIZE(bytes), 10); + Py_DECREF(bytes); PyBuffer_Release(&view); return result; } diff --git a/Objects/complexobject.c b/Objects/complexobject.c index dc1212e4b79..a5bfb667c46 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -767,7 +767,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) int got_bracket=0; PyObject *s_buffer = NULL; Py_ssize_t len; - Py_buffer view = {NULL, NULL}; if (PyUnicode_Check(v)) { s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v); @@ -777,10 +776,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) if (s == NULL) goto error; } - else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) { - s = (const char *)view.buf; - len = view.len; - } else { PyErr_Format(PyExc_TypeError, "complex() argument must be a string or a number, not '%.200s'", @@ -895,7 +890,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) if (s-start != len) goto parse_error; - PyBuffer_Release(&view); Py_XDECREF(s_buffer); return complex_subtype_from_doubles(type, x, y); @@ -903,7 +897,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) PyErr_SetString(PyExc_ValueError, "complex() arg is a malformed string"); error: - PyBuffer_Release(&view); Py_XDECREF(s_buffer); return NULL; } diff --git a/Objects/floatobject.c b/Objects/floatobject.c index d6819814ef8..b8d6f2b52e0 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -144,9 +144,24 @@ PyFloat_FromString(PyObject *v) return NULL; } } + else if (PyBytes_Check(v)) { + s = PyBytes_AS_STRING(v); + len = PyBytes_GET_SIZE(v); + } + else if (PyByteArray_Check(v)) { + s = PyByteArray_AS_STRING(v); + len = PyByteArray_GET_SIZE(v); + } else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) { s = (const char *)view.buf; len = view.len; + /* Copy to NUL-terminated buffer. */ + s_buffer = PyBytes_FromStringAndSize(s, len); + if (s_buffer == NULL) { + PyBuffer_Release(&view); + return NULL; + } + s = PyBytes_AS_STRING(s_buffer); } else { PyErr_Format(PyExc_TypeError, diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 3b707184fb1..31d9e0e4d1b 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -599,20 +599,37 @@ builtin_chr_impl(PyModuleDef *module, int i) static const char * -source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, Py_buffer *view) +source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, PyObject **cmd_copy) { const char *str; Py_ssize_t size; + Py_buffer view; + *cmd_copy = NULL; if (PyUnicode_Check(cmd)) { cf->cf_flags |= PyCF_IGNORE_COOKIE; str = PyUnicode_AsUTF8AndSize(cmd, &size); if (str == NULL) return NULL; } - else if (PyObject_GetBuffer(cmd, view, PyBUF_SIMPLE) == 0) { - str = (const char *)view->buf; - size = view->len; + else if (PyBytes_Check(cmd)) { + str = PyBytes_AS_STRING(cmd); + size = PyBytes_GET_SIZE(cmd); + } + else if (PyByteArray_Check(cmd)) { + str = PyByteArray_AS_STRING(cmd); + size = PyByteArray_GET_SIZE(cmd); + } + else if (PyObject_GetBuffer(cmd, &view, PyBUF_SIMPLE) == 0) { + /* Copy to NUL-terminated buffer. */ + *cmd_copy = PyBytes_FromStringAndSize( + (const char *)view.buf, view.len); + PyBuffer_Release(&view); + if (*cmd_copy == NULL) { + return NULL; + } + str = PyBytes_AS_STRING(*cmd_copy); + size = PyBytes_GET_SIZE(*cmd_copy); } else { PyErr_Format(PyExc_TypeError, @@ -624,7 +641,7 @@ source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompil if (strlen(str) != (size_t)size) { PyErr_SetString(PyExc_ValueError, "source code string cannot contain null bytes"); - PyBuffer_Release(view); + Py_CLEAR(*cmd_copy); return NULL; } return str; @@ -660,7 +677,7 @@ builtin_compile_impl(PyModuleDef *module, PyObject *source, int dont_inherit, int optimize) /*[clinic end generated code: output=31881762c1bb90c4 input=9d53e8cfb3c86414]*/ { - Py_buffer view = {NULL, NULL}; + PyObject *source_copy; const char *str; int compile_mode = -1; int is_ast; @@ -732,12 +749,12 @@ builtin_compile_impl(PyModuleDef *module, PyObject *source, goto finally; } - str = source_as_string(source, "compile", "string, bytes or AST", &cf, &view); + str = source_as_string(source, "compile", "string, bytes or AST", &cf, &source_copy); if (str == NULL) goto error; result = Py_CompileStringObject(str, filename, start[compile_mode], &cf, optimize); - PyBuffer_Release(&view); + Py_XDECREF(source_copy); goto finally; error: @@ -812,8 +829,7 @@ builtin_eval_impl(PyModuleDef *module, PyObject *source, PyObject *globals, PyObject *locals) /*[clinic end generated code: output=7284501fb7b4d666 input=11ee718a8640e527]*/ { - PyObject *result, *tmp = NULL; - Py_buffer view = {NULL, NULL}; + PyObject *result, *source_copy; const char *str; PyCompilerFlags cf; @@ -861,7 +877,7 @@ builtin_eval_impl(PyModuleDef *module, PyObject *source, PyObject *globals, } cf.cf_flags = PyCF_SOURCE_IS_UTF8; - str = source_as_string(source, "eval", "string, bytes or code", &cf, &view); + str = source_as_string(source, "eval", "string, bytes or code", &cf, &source_copy); if (str == NULL) return NULL; @@ -870,8 +886,7 @@ builtin_eval_impl(PyModuleDef *module, PyObject *source, PyObject *globals, (void)PyEval_MergeCompilerFlags(&cf); result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf); - PyBuffer_Release(&view); - Py_XDECREF(tmp); + Py_XDECREF(source_copy); return result; } @@ -942,12 +957,13 @@ builtin_exec_impl(PyModuleDef *module, PyObject *source, PyObject *globals, v = PyEval_EvalCode(source, globals, locals); } else { - Py_buffer view = {NULL, NULL}; + PyObject *source_copy; const char *str; PyCompilerFlags cf; cf.cf_flags = PyCF_SOURCE_IS_UTF8; str = source_as_string(source, "exec", - "string, bytes or code", &cf, &view); + "string, bytes or code", &cf, + &source_copy); if (str == NULL) return NULL; if (PyEval_MergeCompilerFlags(&cf)) @@ -955,7 +971,7 @@ builtin_exec_impl(PyModuleDef *module, PyObject *source, PyObject *globals, locals, &cf); else v = PyRun_String(str, Py_file_input, globals, locals); - PyBuffer_Release(&view); + Py_XDECREF(source_copy); } if (v == NULL) return NULL;