bpo-43693: Clean up the PyCodeObject fields. (GH-26364)

* Move up the comment about fields using in hashing/comparision.

* Group the fields more clearly.

* Add co_ncellvars and co_nfreevars.

* Raise ValueError if nlocals != len(varnames), rather than aborting.
This commit is contained in:
Eric Snow 2021-05-26 13:15:40 -06:00 committed by GitHub
parent e6c815d2e3
commit 6cc800d363
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 148 additions and 83 deletions

View File

@ -17,31 +17,62 @@ typedef struct _PyOpcache _PyOpcache;
/* Bytecode object */
struct PyCodeObject {
PyObject_HEAD
int co_argcount; /* #arguments, except *args */
int co_posonlyargcount; /* #positional only arguments */
int co_kwonlyargcount; /* #keyword only arguments */
int co_nlocals; /* #local variables */
int co_stacksize; /* #entries needed for evaluation stack */
int co_flags; /* CO_..., see below */
int co_firstlineno; /* first source line number */
/* Note only the following fields are used in hash and/or comparisons
*
* - co_name
* - co_argcount
* - co_posonlyargcount
* - co_kwonlyargcount
* - co_nlocals
* - co_stacksize
* - co_flags
* - co_firstlineno
* - co_code
* - co_consts
* - co_names
* - co_varnames
* - co_freevars
* - co_cellvars
*
* This is done to preserve the name and line number for tracebacks
* and debuggers; otherwise, constant de-duplication would collapse
* identical functions/lambdas defined on different lines.
*/
/* These fields are set with provided values on new code objects. */
// The hottest fields (in the eval loop) are grouped here at the top.
PyObject *co_code; /* instruction opcodes */
PyObject *co_consts; /* list (constants used) */
PyObject *co_names; /* list of strings (names used) */
int co_flags; /* CO_..., see below */
// The rest are not so impactful on performance.
int co_argcount; /* #arguments, except *args */
int co_posonlyargcount; /* #positional only arguments */
int co_kwonlyargcount; /* #keyword only arguments */
int co_stacksize; /* #entries needed for evaluation stack */
int co_firstlineno; /* first source line number */
PyObject *co_varnames; /* tuple of strings (local variable names) */
PyObject *co_freevars; /* tuple of strings (free variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
/* The rest aren't used in either hash or comparisons, except for co_name,
used in both. This is done to preserve the name and line number
for tracebacks and debuggers; otherwise, constant de-duplication
would collapse identical functions/lambdas defined on different lines.
*/
Py_ssize_t *co_cell2arg; /* Maps cell vars which are arguments. */
PyObject *co_freevars; /* tuple of strings (free variable names) */
PyObject *co_filename; /* unicode (where it was loaded from) */
PyObject *co_name; /* unicode (name, for reference) */
PyObject *co_linetable; /* string (encoding addr<->lineno mapping) See
Objects/lnotab_notes.txt for details. */
int co_nlocalsplus; /* Number of locals + free + cell variables */
PyObject *co_exceptiontable; /* Byte string encoding exception handling table */
/* These fields are set with computed values on new code objects. */
Py_ssize_t *co_cell2arg; /* Maps cell vars which are arguments. */
// These are redundant but offer some performance benefit.
int co_nlocalsplus; /* number of local + cell + free variables */
int co_nlocals; /* number of local variables */
int co_ncellvars; /* number of cell variables */
int co_nfreevars; /* number of free variables */
/* The remaining fields are zeroed out on new code objects. */
PyObject *co_weakreflist; /* to support weakrefs to code objects */
/* Scratch space for extra data relating to the code object.
Type is a void* to keep the format private in codeobject.c to force

View File

@ -240,6 +240,7 @@ class CodeTest(unittest.TestCase):
# different co_name, co_varnames, co_consts
def func2():
y = 2
z = 3
return y
code2 = func2.__code__
@ -247,14 +248,14 @@ class CodeTest(unittest.TestCase):
("co_argcount", 0),
("co_posonlyargcount", 0),
("co_kwonlyargcount", 0),
("co_nlocals", 0),
("co_nlocals", 1),
("co_stacksize", 0),
("co_flags", code.co_flags | inspect.CO_COROUTINE),
("co_firstlineno", 100),
("co_code", code2.co_code),
("co_consts", code2.co_consts),
("co_names", ("myname",)),
("co_varnames", code2.co_varnames),
("co_varnames", ('spam',)),
("co_freevars", ("freevar",)),
("co_cellvars", ("cellvar",)),
("co_filename", "newfilename"),
@ -265,6 +266,47 @@ class CodeTest(unittest.TestCase):
new_code = code.replace(**{attr: value})
self.assertEqual(getattr(new_code, attr), value)
new_code = code.replace(co_varnames=code2.co_varnames,
co_nlocals=code2.co_nlocals)
self.assertEqual(new_code.co_varnames, code2.co_varnames)
self.assertEqual(new_code.co_nlocals, code2.co_nlocals)
def test_nlocals_mismatch(self):
def func():
x = 1
return x
co = func.__code__
assert co.co_nlocals > 0;
# First we try the constructor.
CodeType = type(co)
for diff in (-1, 1):
with self.assertRaises(ValueError):
CodeType(co.co_argcount,
co.co_posonlyargcount,
co.co_kwonlyargcount,
# This is the only change.
co.co_nlocals + diff,
co.co_stacksize,
co.co_flags,
co.co_code,
co.co_consts,
co.co_names,
co.co_varnames,
co.co_filename,
co.co_name,
co.co_firstlineno,
co.co_lnotab,
co.co_exceptiontable,
co.co_freevars,
co.co_cellvars,
)
# Then we try the replace method.
with self.assertRaises(ValueError):
co.replace(co_nlocals=co.co_nlocals - 1)
with self.assertRaises(ValueError):
co.replace(co_nlocals=co.co_nlocals + 1)
def test_empty_linetable(self):
def func():
pass

View File

@ -164,7 +164,8 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
{
PyCodeObject *co;
Py_ssize_t *cell2arg = NULL;
Py_ssize_t i, n_cellvars, n_varnames, total_args;
Py_ssize_t i, total_args;
int ncellvars, nfreevars;
/* Check argument types */
if (argcount < posonlyargcount || posonlyargcount < 0 ||
@ -184,6 +185,19 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
return NULL;
}
/* Make sure that code is indexable with an int, this is
a long running assumption in ceval.c and many parts of
the interpreter. */
if (PyBytes_GET_SIZE(code) > INT_MAX) {
PyErr_SetString(PyExc_OverflowError, "co_code larger than INT_MAX");
return NULL;
}
if (nlocals != PyTuple_GET_SIZE(varnames)) {
PyErr_SetString(PyExc_ValueError, "co_nlocals != len(co_varnames)");
return NULL;
}
/* Ensure that strings are ready Unicode string */
if (PyUnicode_READY(name) < 0) {
return NULL;
@ -208,46 +222,40 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
return NULL;
}
/* Make sure that code is indexable with an int, this is
a long running assumption in ceval.c and many parts of
the interpreter. */
if (PyBytes_GET_SIZE(code) > INT_MAX) {
PyErr_SetString(PyExc_OverflowError, "co_code larger than INT_MAX");
return NULL;
}
/* Check for any inner or outer closure references */
n_cellvars = PyTuple_GET_SIZE(cellvars);
if (!n_cellvars && !PyTuple_GET_SIZE(freevars)) {
assert(PyTuple_GET_SIZE(cellvars) < INT_MAX);
ncellvars = (int)PyTuple_GET_SIZE(cellvars);
assert(PyTuple_GET_SIZE(freevars) < INT_MAX);
nfreevars = (int)PyTuple_GET_SIZE(freevars);
if (!ncellvars && !nfreevars) {
flags |= CO_NOFREE;
} else {
flags &= ~CO_NOFREE;
}
n_varnames = PyTuple_GET_SIZE(varnames);
if (argcount <= n_varnames && kwonlyargcount <= n_varnames) {
if (argcount <= nlocals && kwonlyargcount <= nlocals) {
/* Never overflows. */
total_args = (Py_ssize_t)argcount + (Py_ssize_t)kwonlyargcount +
((flags & CO_VARARGS) != 0) + ((flags & CO_VARKEYWORDS) != 0);
}
else {
total_args = n_varnames + 1;
total_args = nlocals + 1;
}
if (total_args > n_varnames) {
if (total_args > nlocals) {
PyErr_SetString(PyExc_ValueError, "code: varnames is too small");
return NULL;
}
/* Create mapping between cells and arguments if needed. */
if (n_cellvars) {
if (ncellvars) {
bool used_cell2arg = false;
cell2arg = PyMem_NEW(Py_ssize_t, n_cellvars);
cell2arg = PyMem_NEW(Py_ssize_t, ncellvars);
if (cell2arg == NULL) {
PyErr_NoMemory();
return NULL;
}
/* Find cells which are also arguments. */
for (i = 0; i < n_cellvars; i++) {
for (i = 0; i < ncellvars; i++) {
Py_ssize_t j;
PyObject *cell = PyTuple_GET_ITEM(cellvars, i);
cell2arg[i] = CO_CELL_NOT_AN_ARG;
@ -279,9 +287,10 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
co->co_argcount = argcount;
co->co_posonlyargcount = posonlyargcount;
co->co_kwonlyargcount = kwonlyargcount;
co->co_nlocalsplus = nlocals + ncellvars + nfreevars;
co->co_nlocals = nlocals;
co->co_nlocalsplus = nlocals +
(int)PyTuple_GET_SIZE(freevars) + (int)PyTuple_GET_SIZE(cellvars);
co->co_ncellvars = ncellvars;
co->co_nfreevars = nfreevars;
co->co_stacksize = stacksize;
co->co_flags = flags;
Py_INCREF(code);
@ -1139,7 +1148,7 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args))
_PyCodeObjectExtra *co_extra = (_PyCodeObjectExtra*) co->co_extra;
if (co->co_cell2arg != NULL && co->co_cellvars != NULL) {
res += PyTuple_GET_SIZE(co->co_cellvars) * sizeof(Py_ssize_t);
res += co->co_ncellvars * sizeof(Py_ssize_t);
}
if (co_extra != NULL) {
res += sizeof(_PyCodeObjectExtra) +

View File

@ -1023,7 +1023,6 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
PyObject **fast;
PyCodeObject *co;
Py_ssize_t j;
Py_ssize_t ncells, nfreevars;
if (f == NULL) {
PyErr_BadInternalCall();
@ -1051,10 +1050,8 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
if (map_to_dict(map, j, locals, fast, 0) < 0)
return -1;
}
ncells = PyTuple_GET_SIZE(co->co_cellvars);
nfreevars = PyTuple_GET_SIZE(co->co_freevars);
if (ncells || nfreevars) {
if (map_to_dict(co->co_cellvars, ncells,
if (co->co_ncellvars || co->co_nfreevars) {
if (map_to_dict(co->co_cellvars, co->co_ncellvars,
locals, fast + co->co_nlocals, 1))
return -1;
@ -1067,8 +1064,8 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f)
into the locals dict used by the class.
*/
if (co->co_flags & CO_OPTIMIZED) {
if (map_to_dict(co->co_freevars, nfreevars,
locals, fast + co->co_nlocals + ncells, 1) < 0)
if (map_to_dict(co->co_freevars, co->co_nfreevars, locals,
fast + co->co_nlocals + co->co_ncellvars, 1) < 0)
return -1;
}
}
@ -1096,7 +1093,6 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
PyObject *error_type, *error_value, *error_traceback;
PyCodeObject *co;
Py_ssize_t j;
Py_ssize_t ncells, nfreevars;
if (f == NULL)
return;
locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET];
@ -1113,16 +1109,14 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
j = co->co_nlocals;
if (co->co_nlocals)
dict_to_map(co->co_varnames, j, locals, fast, 0, clear);
ncells = PyTuple_GET_SIZE(co->co_cellvars);
nfreevars = PyTuple_GET_SIZE(co->co_freevars);
if (ncells || nfreevars) {
dict_to_map(co->co_cellvars, ncells,
if (co->co_ncellvars || co->co_nfreevars) {
dict_to_map(co->co_cellvars, co->co_ncellvars,
locals, fast + co->co_nlocals, 1, clear);
/* Same test as in PyFrame_FastToLocals() above. */
if (co->co_flags & CO_OPTIMIZED) {
dict_to_map(co->co_freevars, nfreevars,
locals, fast + co->co_nlocals + ncells, 1,
clear);
dict_to_map(co->co_freevars, co->co_nfreevars, locals,
fast + co->co_nlocals + co->co_ncellvars, 1,
clear);
}
}
PyErr_Restore(error_type, error_value, error_traceback);

View File

@ -279,7 +279,8 @@ func_get_code(PyFunctionObject *op, void *Py_UNUSED(ignored))
static int
func_set_code(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignored))
{
Py_ssize_t nfree, nclosure;
Py_ssize_t nclosure;
int nfree;
/* Not legal to del f.func_code or to set it to anything
* other than a code object. */
@ -294,7 +295,7 @@ func_set_code(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignored))
return -1;
}
nfree = PyCode_GetNumFree((PyCodeObject *)value);
nfree = ((PyCodeObject *)value)->co_nfreevars;
nclosure = (op->func_closure == NULL ? 0 :
PyTuple_GET_SIZE(op->func_closure));
if (nclosure != nfree) {
@ -538,7 +539,7 @@ func_new_impl(PyTypeObject *type, PyCodeObject *code, PyObject *globals,
/*[clinic end generated code: output=99c6d9da3a24e3be input=93611752fc2daf11]*/
{
PyFunctionObject *newfunc;
Py_ssize_t nfree, nclosure;
Py_ssize_t nclosure;
if (name != Py_None && !PyUnicode_Check(name)) {
PyErr_SetString(PyExc_TypeError,
@ -550,9 +551,8 @@ func_new_impl(PyTypeObject *type, PyCodeObject *code, PyObject *globals,
"arg 4 (defaults) must be None or tuple");
return NULL;
}
nfree = PyTuple_GET_SIZE(code->co_freevars);
if (!PyTuple_Check(closure)) {
if (nfree && closure == Py_None) {
if (code->co_nfreevars && closure == Py_None) {
PyErr_SetString(PyExc_TypeError,
"arg 5 (closure) must be tuple");
return NULL;
@ -566,10 +566,10 @@ func_new_impl(PyTypeObject *type, PyCodeObject *code, PyObject *globals,
/* check that the closure is well-formed */
nclosure = closure == Py_None ? 0 : PyTuple_GET_SIZE(closure);
if (nfree != nclosure)
if (code->co_nfreevars != nclosure)
return PyErr_Format(PyExc_ValueError,
"%U requires closure of length %zd, not %zd",
code->co_name, nfree, nclosure);
code->co_name, code->co_nfreevars, nclosure);
if (nclosure) {
Py_ssize_t i;
for (i = 0; i < nclosure; i++) {

View File

@ -8848,11 +8848,10 @@ super_init_without_args(PyFrameObject *f, PyCodeObject *co,
}
PyObject *obj = f->f_localsptr[0];
Py_ssize_t i, n;
Py_ssize_t i;
if (obj == NULL && co->co_cell2arg) {
/* The first argument might be a cell. */
n = PyTuple_GET_SIZE(co->co_cellvars);
for (i = 0; i < n; i++) {
for (i = 0; i < co->co_ncellvars; i++) {
if (co->co_cell2arg[i] == 0) {
PyObject *cell = f->f_localsptr[co->co_nlocals + i];
assert(PyCell_Check(cell));
@ -8867,21 +8866,12 @@ super_init_without_args(PyFrameObject *f, PyCodeObject *co,
return -1;
}
if (co->co_freevars == NULL) {
n = 0;
}
else {
assert(PyTuple_Check(co->co_freevars));
n = PyTuple_GET_SIZE(co->co_freevars);
}
PyTypeObject *type = NULL;
for (i = 0; i < n; i++) {
for (i = 0; i < co->co_nfreevars; i++) {
PyObject *name = PyTuple_GET_ITEM(co->co_freevars, i);
assert(PyUnicode_Check(name));
if (_PyUnicode_EqualToASCIIId(name, &PyId___class__)) {
Py_ssize_t index = co->co_nlocals +
PyTuple_GET_SIZE(co->co_cellvars) + i;
Py_ssize_t index = co->co_nlocals + co->co_ncellvars + i;
PyObject *cell = f->f_localsptr[index];
if (cell == NULL || !PyCell_Check(cell)) {
PyErr_SetString(PyExc_RuntimeError,

View File

@ -3081,9 +3081,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
PyObject *name, *value, *locals = LOCALS();
Py_ssize_t idx;
assert(locals);
assert(oparg >= PyTuple_GET_SIZE(co->co_cellvars));
idx = oparg - PyTuple_GET_SIZE(co->co_cellvars);
assert(idx >= 0 && idx < PyTuple_GET_SIZE(co->co_freevars));
assert(oparg >= co->co_ncellvars);
idx = oparg - co->co_ncellvars;
assert(idx >= 0 && idx < co->co_nfreevars);
name = PyTuple_GET_ITEM(co->co_freevars, idx);
if (PyDict_CheckExact(locals)) {
value = PyDict_GetItemWithError(locals, name);
@ -5062,7 +5062,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
/* Allocate and initialize storage for cell vars, and copy free
vars into frame. */
for (i = 0; i < PyTuple_GET_SIZE(co->co_cellvars); ++i) {
for (i = 0; i < co->co_ncellvars; ++i) {
PyObject *c;
Py_ssize_t arg;
/* Possibly account for the cell variable being an argument. */
@ -5081,10 +5081,10 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con,
}
/* Copy closure variables to free variables */
for (i = 0; i < PyTuple_GET_SIZE(co->co_freevars); ++i) {
for (i = 0; i < co->co_nfreevars; ++i) {
PyObject *o = PyTuple_GET_ITEM(con->fc_closure, i);
Py_INCREF(o);
freevars[PyTuple_GET_SIZE(co->co_cellvars) + i] = o;
freevars[co->co_ncellvars + i] = o;
}
return 0;
@ -6417,7 +6417,7 @@ format_exc_unbound(PyThreadState *tstate, PyCodeObject *co, int oparg)
/* Don't stomp existing exception */
if (_PyErr_Occurred(tstate))
return;
if (oparg < PyTuple_GET_SIZE(co->co_cellvars)) {
if (oparg < co->co_ncellvars) {
name = PyTuple_GET_ITEM(co->co_cellvars,
oparg);
format_exc_check_arg(tstate,
@ -6425,8 +6425,7 @@ format_exc_unbound(PyThreadState *tstate, PyCodeObject *co, int oparg)
UNBOUNDLOCAL_ERROR_MSG,
name);
} else {
name = PyTuple_GET_ITEM(co->co_freevars, oparg -
PyTuple_GET_SIZE(co->co_cellvars));
name = PyTuple_GET_ITEM(co->co_freevars, oparg - co->co_ncellvars);
format_exc_check_arg(tstate, PyExc_NameError,
UNBOUNDFREE_ERROR_MSG, name);
}