bpo-30416: Protect the optimizer during constant folding. (#4860)

It no longer spends much time doing complex calculations and no
longer consumes much memory for creating large constants that will
be dropped later.

This fixes also bpo-21074.
This commit is contained in:
Serhiy Storchaka 2017-12-15 14:11:43 +02:00 committed by GitHub
parent a5552f023e
commit 2e3f570185
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 144 additions and 27 deletions

View File

@ -759,7 +759,8 @@ class CBytesIOTest(PyBytesIOTest):
check = self.check_sizeof check = self.check_sizeof
self.assertEqual(object.__sizeof__(io.BytesIO()), basesize) self.assertEqual(object.__sizeof__(io.BytesIO()), basesize)
check(io.BytesIO(), basesize ) check(io.BytesIO(), basesize )
check(io.BytesIO(b'a' * 1000), basesize + sys.getsizeof(b'a' * 1000)) n = 1000 # use a variable to prevent constant folding
check(io.BytesIO(b'a' * n), basesize + sys.getsizeof(b'a' * n))
# Various tests of copy-on-write behaviour for BytesIO. # Various tests of copy-on-write behaviour for BytesIO.

View File

@ -175,8 +175,15 @@ class TestTranforms(BytecodeTestCase):
self.assertInBytecode(code, 'LOAD_CONST', 'b') self.assertInBytecode(code, 'LOAD_CONST', 'b')
# Verify that large sequences do not result from folding # Verify that large sequences do not result from folding
code = compile('a="x"*1000', '', 'single') code = compile('a="x"*10000', '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', 10000)
self.assertNotIn("x"*10000, code.co_consts)
code = compile('a=1<<1000', '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', 1000) self.assertInBytecode(code, 'LOAD_CONST', 1000)
self.assertNotIn(1<<1000, code.co_consts)
code = compile('a=2**1000', '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', 1000)
self.assertNotIn(2**1000, code.co_consts)
def test_binary_subscr_on_unicode(self): def test_binary_subscr_on_unicode(self):
# valid code get optimized # valid code get optimized

View File

@ -0,0 +1,4 @@
The optimizer is now protected from spending much time doing complex
calculations and consuming much memory for creating large constants in
constant folding. Increased limits for constants that can be produced in
constant folding.

View File

@ -125,6 +125,132 @@ fold_unaryop(expr_ty node, PyArena *arena)
return make_const(node, newval, arena); return make_const(node, newval, arena);
} }
/* Check whether a collection doesn't containing too much items (including
subcollections). This protects from creating a constant that needs
too much time for calculating a hash.
"limit" is the maximal number of items.
Returns the negative number if the total number of items exceeds the
limit. Otherwise returns the limit minus the total number of items.
*/
static Py_ssize_t
check_complexity(PyObject *obj, Py_ssize_t limit)
{
if (PyTuple_Check(obj)) {
Py_ssize_t i;
limit -= PyTuple_GET_SIZE(obj);
for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) {
limit = check_complexity(PyTuple_GET_ITEM(obj, i), limit);
}
return limit;
}
else if (PyFrozenSet_Check(obj)) {
Py_ssize_t i = 0;
PyObject *item;
Py_hash_t hash;
limit -= PySet_GET_SIZE(obj);
while (limit >= 0 && _PySet_NextEntry(obj, &i, &item, &hash)) {
limit = check_complexity(item, limit);
}
}
return limit;
}
#define MAX_INT_SIZE 128 /* bits */
#define MAX_COLLECTION_SIZE 256 /* items */
#define MAX_STR_SIZE 4096 /* characters */
#define MAX_TOTAL_ITEMS 1024 /* including nested collections */
static PyObject *
safe_multiply(PyObject *v, PyObject *w)
{
if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w)) {
size_t vbits = _PyLong_NumBits(v);
size_t wbits = _PyLong_NumBits(w);
if (vbits == (size_t)-1 || wbits == (size_t)-1) {
return NULL;
}
if (vbits + wbits > MAX_INT_SIZE) {
return NULL;
}
}
else if (PyLong_Check(v) && (PyTuple_Check(w) || PyFrozenSet_Check(w))) {
Py_ssize_t size = PyTuple_Check(w) ? PyTuple_GET_SIZE(w) :
PySet_GET_SIZE(w);
if (size) {
long n = PyLong_AsLong(v);
if (n < 0 || n > MAX_COLLECTION_SIZE / size) {
return NULL;
}
if (n && check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) {
return NULL;
}
}
}
else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) {
Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) :
PyBytes_GET_SIZE(w);
if (size) {
long n = PyLong_AsLong(v);
if (n < 0 || n > MAX_STR_SIZE / size) {
return NULL;
}
}
}
else if (PyLong_Check(w) &&
(PyTuple_Check(v) || PyFrozenSet_Check(v) ||
PyUnicode_Check(v) || PyBytes_Check(v)))
{
return safe_multiply(w, v);
}
return PyNumber_Multiply(v, w);
}
static PyObject *
safe_power(PyObject *v, PyObject *w)
{
if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w) > 0) {
size_t vbits = _PyLong_NumBits(v);
size_t wbits = PyLong_AsSize_t(w);
if (vbits == (size_t)-1 || wbits == (size_t)-1) {
return NULL;
}
if (vbits > MAX_INT_SIZE / wbits) {
return NULL;
}
}
return PyNumber_Power(v, w, Py_None);
}
static PyObject *
safe_lshift(PyObject *v, PyObject *w)
{
if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w)) {
size_t vbits = _PyLong_NumBits(v);
size_t wbits = PyLong_AsSize_t(w);
if (vbits == (size_t)-1 || wbits == (size_t)-1) {
return NULL;
}
if (wbits > MAX_INT_SIZE || vbits > MAX_INT_SIZE - wbits) {
return NULL;
}
}
return PyNumber_Lshift(v, w);
}
static PyObject *
safe_mod(PyObject *v, PyObject *w)
{
if (PyUnicode_Check(v) || PyBytes_Check(v)) {
return NULL;
}
return PyNumber_Remainder(v, w);
}
static int static int
fold_binop(expr_ty node, PyArena *arena) fold_binop(expr_ty node, PyArena *arena)
{ {
@ -147,7 +273,7 @@ fold_binop(expr_ty node, PyArena *arena)
newval = PyNumber_Subtract(lv, rv); newval = PyNumber_Subtract(lv, rv);
break; break;
case Mult: case Mult:
newval = PyNumber_Multiply(lv, rv); newval = safe_multiply(lv, rv);
break; break;
case Div: case Div:
newval = PyNumber_TrueDivide(lv, rv); newval = PyNumber_TrueDivide(lv, rv);
@ -156,13 +282,13 @@ fold_binop(expr_ty node, PyArena *arena)
newval = PyNumber_FloorDivide(lv, rv); newval = PyNumber_FloorDivide(lv, rv);
break; break;
case Mod: case Mod:
newval = PyNumber_Remainder(lv, rv); newval = safe_mod(lv, rv);
break; break;
case Pow: case Pow:
newval = PyNumber_Power(lv, rv, Py_None); newval = safe_power(lv, rv);
break; break;
case LShift: case LShift:
newval = PyNumber_Lshift(lv, rv); newval = safe_lshift(lv, rv);
break; break;
case RShift: case RShift:
newval = PyNumber_Rshift(lv, rv); newval = PyNumber_Rshift(lv, rv);
@ -180,27 +306,6 @@ fold_binop(expr_ty node, PyArena *arena)
return 1; return 1;
} }
if (newval == NULL) {
if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
return 0;
}
PyErr_Clear();
return 1;
}
/* Avoid creating large constants. */
Py_ssize_t size = PyObject_Size(newval);
if (size == -1) {
if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
Py_DECREF(newval);
return 0;
}
PyErr_Clear();
}
else if (size > 20) {
Py_DECREF(newval);
return 1;
}
return make_const(node, newval, arena); return make_const(node, newval, arena);
} }