gh-98831: Use opcode metadata for stack_effect() (#101704)

* Write output and metadata in a single run
  This halves the time to run the cases generator
  (most of the time goes into parsing the input).
* Declare or define opcode metadata based on NEED_OPCODE_TABLES
* Use generated metadata for stack_effect()
* compile.o depends on opcode_metadata.h
* Return -1 from _PyOpcode_num_popped/pushed for unknown opcode
This commit is contained in:
Guido van Rossum 2023-02-08 16:23:19 -08:00 committed by GitHub
parent 0e0c5d8baa
commit 65b7b6bd23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 88 additions and 256 deletions

View File

@ -1445,24 +1445,21 @@ regen-opcode-targets:
.PHONY: regen-cases
regen-cases:
# Regenerate Python/generated_cases.c.h from Python/bytecodes.c
# Regenerate Python/generated_cases.c.h
# and Python/opcode_metadata.h
# from Python/bytecodes.c
# using Tools/cases_generator/generate_cases.py
PYTHONPATH=$(srcdir)/Tools/cases_generator \
$(PYTHON_FOR_REGEN) \
$(srcdir)/Tools/cases_generator/generate_cases.py \
-i $(srcdir)/Python/bytecodes.c \
-o $(srcdir)/Python/generated_cases.c.h.new
-o $(srcdir)/Python/generated_cases.c.h.new \
-m $(srcdir)/Python/opcode_metadata.h.new
$(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new
# Regenerate Python/opcode_metadata.h from Python/bytecodes.c
# using Tools/cases_generator/generate_cases.py --metadata
PYTHONPATH=$(srcdir)/Tools/cases_generator \
$(PYTHON_FOR_REGEN) \
$(srcdir)/Tools/cases_generator/generate_cases.py \
--metadata \
-i $(srcdir)/Python/bytecodes.c \
-o $(srcdir)/Python/opcode_metadata.h.new
$(UPDATE_FILE) $(srcdir)/Python/opcode_metadata.h $(srcdir)/Python/opcode_metadata.h.new
Python/compile.o: $(srcdir)/Python/opcode_metadata.h
Python/ceval.o: \
$(srcdir)/Python/ceval_macros.h \
$(srcdir)/Python/condvar.h \

View File

@ -1074,135 +1074,49 @@ basicblock_next_instr(basicblock *b)
static int
stack_effect(int opcode, int oparg, int jump)
{
if (0 <= opcode && opcode <= MAX_REAL_OPCODE) {
if (_PyOpcode_Deopt[opcode] != opcode) {
// Specialized instructions are not supported.
return PY_INVALID_STACK_EFFECT;
}
int popped, pushed;
if (jump > 0) {
popped = _PyOpcode_num_popped(opcode, oparg, true);
pushed = _PyOpcode_num_pushed(opcode, oparg, true);
}
else {
popped = _PyOpcode_num_popped(opcode, oparg, false);
pushed = _PyOpcode_num_pushed(opcode, oparg, false);
}
if (popped < 0 || pushed < 0) {
return PY_INVALID_STACK_EFFECT;
}
if (jump >= 0) {
return pushed - popped;
}
if (jump < 0) {
// Compute max(pushed - popped, alt_pushed - alt_popped)
int alt_popped = _PyOpcode_num_popped(opcode, oparg, true);
int alt_pushed = _PyOpcode_num_pushed(opcode, oparg, true);
if (alt_popped < 0 || alt_pushed < 0) {
return PY_INVALID_STACK_EFFECT;
}
int diff = pushed - popped;
int alt_diff = alt_pushed - alt_popped;
if (alt_diff > diff) {
return alt_diff;
}
return diff;
}
}
// Pseudo ops
switch (opcode) {
case NOP:
case EXTENDED_ARG:
case RESUME:
case CACHE:
return 0;
/* Stack manipulation */
case POP_TOP:
return -1;
case SWAP:
return 0;
case END_FOR:
return -2;
/* Unary operators */
case UNARY_NEGATIVE:
case UNARY_NOT:
case UNARY_INVERT:
return 0;
case SET_ADD:
case LIST_APPEND:
return -1;
case MAP_ADD:
return -2;
case BINARY_SUBSCR:
return -1;
case BINARY_SLICE:
return -2;
case STORE_SUBSCR:
return -3;
case STORE_SLICE:
return -4;
case DELETE_SUBSCR:
return -2;
case GET_ITER:
return 0;
case LOAD_BUILD_CLASS:
return 1;
case RETURN_VALUE:
return -1;
case RETURN_CONST:
return 0;
case SETUP_ANNOTATIONS:
return 0;
case YIELD_VALUE:
return 0;
case POP_BLOCK:
return 0;
case POP_EXCEPT:
return -1;
case STORE_NAME:
return -1;
case DELETE_NAME:
return 0;
case UNPACK_SEQUENCE:
return oparg-1;
case UNPACK_EX:
return (oparg&0xFF) + (oparg>>8);
case FOR_ITER:
return 1;
case SEND:
return jump > 0 ? -1 : 0;
case STORE_ATTR:
return -2;
case DELETE_ATTR:
return -1;
case STORE_GLOBAL:
return -1;
case DELETE_GLOBAL:
return 0;
case LOAD_CONST:
return 1;
case LOAD_NAME:
return 1;
case BUILD_TUPLE:
case BUILD_LIST:
case BUILD_SET:
case BUILD_STRING:
return 1-oparg;
case BUILD_MAP:
return 1 - 2*oparg;
case BUILD_CONST_KEY_MAP:
return -oparg;
case LOAD_ATTR:
return (oparg & 1);
case COMPARE_OP:
case IS_OP:
case CONTAINS_OP:
return -1;
case CHECK_EXC_MATCH:
return 0;
case CHECK_EG_MATCH:
return 0;
case IMPORT_NAME:
return -1;
case IMPORT_FROM:
return 1;
/* Jumps */
case JUMP_FORWARD:
case JUMP_BACKWARD:
case JUMP:
case JUMP_BACKWARD_NO_INTERRUPT:
case JUMP_NO_INTERRUPT:
return 0;
case JUMP_IF_TRUE_OR_POP:
case JUMP_IF_FALSE_OR_POP:
return jump ? 0 : -1;
case POP_JUMP_IF_NONE:
case POP_JUMP_IF_NOT_NONE:
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
return -1;
case COMPARE_AND_BRANCH:
return -2;
case LOAD_GLOBAL:
return (oparg & 1) + 1;
/* Exception handling pseudo-instructions */
case SETUP_FINALLY:
/* 0 in the normal flow.
@ -1218,109 +1132,13 @@ stack_effect(int opcode, int oparg, int jump)
* of __(a)enter__ and push 2 values before jumping to the handler
* if an exception be raised. */
return jump ? 1 : 0;
case PREP_RERAISE_STAR:
return -1;
case RERAISE:
return -1;
case PUSH_EXC_INFO:
return 1;
case WITH_EXCEPT_START:
return 1;
case LOAD_FAST:
case LOAD_FAST_CHECK:
return 1;
case STORE_FAST:
return -1;
case DELETE_FAST:
return 0;
case RETURN_GENERATOR:
return 0;
case RAISE_VARARGS:
return -oparg;
/* Functions and calls */
case KW_NAMES:
return 0;
case CALL:
return -1-oparg;
case CALL_INTRINSIC_1:
return 0;
case CALL_FUNCTION_EX:
return -2 - ((oparg & 0x01) != 0);
case MAKE_FUNCTION:
return 0 - ((oparg & 0x01) != 0) - ((oparg & 0x02) != 0) -
((oparg & 0x04) != 0) - ((oparg & 0x08) != 0);
case BUILD_SLICE:
if (oparg == 3)
return -2;
else
return -1;
/* Closures */
case MAKE_CELL:
case COPY_FREE_VARS:
return 0;
case LOAD_CLOSURE:
return 1;
case LOAD_DEREF:
case LOAD_CLASSDEREF:
return 1;
case STORE_DEREF:
return -1;
case DELETE_DEREF:
return 0;
/* Iterators and generators */
case GET_AWAITABLE:
return 0;
case BEFORE_ASYNC_WITH:
case BEFORE_WITH:
return 1;
case GET_AITER:
return 0;
case GET_ANEXT:
return 1;
case GET_YIELD_FROM_ITER:
return 0;
case END_ASYNC_FOR:
return -2;
case CLEANUP_THROW:
return -2;
case FORMAT_VALUE:
/* If there's a fmt_spec on the stack, we go from 2->1,
else 1->1. */
return (oparg & FVS_MASK) == FVS_HAVE_SPEC ? -1 : 0;
case LOAD_METHOD:
return 1;
case LOAD_ASSERTION_ERROR:
return 1;
case LIST_EXTEND:
case SET_UPDATE:
case DICT_MERGE:
case DICT_UPDATE:
return -1;
case MATCH_CLASS:
return -2;
case GET_LEN:
case MATCH_MAPPING:
case MATCH_SEQUENCE:
case MATCH_KEYS:
return 1;
case COPY:
case PUSH_NULL:
return 1;
case BINARY_OP:
return -1;
case INTERPRETER_EXIT:
return -1;
default:
return PY_INVALID_STACK_EFFECT;
}
return PY_INVALID_STACK_EFFECT; /* not reachable */
}

View File

@ -2,8 +2,10 @@
// from Python/bytecodes.c
// Do not edit!
#ifndef NDEBUG
static int
#ifndef NEED_OPCODE_TABLES
extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
#else
int
_PyOpcode_num_popped(int opcode, int oparg, bool jump) {
switch(opcode) {
case NOP:
@ -345,13 +347,15 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
case CACHE:
return 0;
default:
Py_UNREACHABLE();
return -1;
}
}
#endif
#ifndef NDEBUG
static int
#ifndef NEED_OPCODE_TABLES
extern int _PyOpcode_num_pushed(int opcode, int oparg, bool jump);
#else
int
_PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
switch(opcode) {
case NOP:
@ -693,10 +697,11 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
case CACHE:
return 0;
default:
Py_UNREACHABLE();
return -1;
}
}
#endif
enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };
enum InstructionFormat { INSTR_FMT_IB, INSTR_FMT_IBC, INSTR_FMT_IBC0, INSTR_FMT_IBC000, INSTR_FMT_IBC0000, INSTR_FMT_IBC00000000, INSTR_FMT_IBIB, INSTR_FMT_IX, INSTR_FMT_IXC, INSTR_FMT_IXC000 };
struct opcode_metadata {
@ -705,7 +710,12 @@ struct opcode_metadata {
enum Direction dir_op3;
bool valid_entry;
enum InstructionFormat instr_format;
} _PyOpcode_opcode_metadata[256] = {
};
#ifndef NEED_OPCODE_TABLES
extern const struct opcode_metadata _PyOpcode_opcode_metadata[256];
#else
const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {
[NOP] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
[RESUME] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
[LOAD_CLOSURE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
@ -876,3 +886,4 @@ struct opcode_metadata {
[EXTENDED_ARG] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
[CACHE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
};
#endif

View File

@ -43,10 +43,7 @@ arg_parser.add_argument(
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
)
arg_parser.add_argument(
"-m",
"--metadata",
action="store_true",
help=f"Generate metadata instead, changes output default to {DEFAULT_METADATA_OUTPUT}",
"-m", "--metadata", type=str, help="Generated metadata", default=DEFAULT_METADATA_OUTPUT
)
@ -498,13 +495,15 @@ class Analyzer:
filename: str
output_filename: str
metadata_filename: str
src: str
errors: int = 0
def __init__(self, filename: str, output_filename: str):
def __init__(self, filename: str, output_filename: str, metadata_filename: str):
"""Read the input file."""
self.filename = filename
self.output_filename = output_filename
self.metadata_filename = metadata_filename
with open(filename) as f:
self.src = f.read()
@ -889,21 +888,25 @@ class Analyzer:
def write_function(
direction: str, data: list[tuple[AnyInstruction, str]]
) -> None:
self.out.emit("\n#ifndef NDEBUG")
self.out.emit("static int")
self.out.emit("")
self.out.emit("#ifndef NEED_OPCODE_TABLES")
self.out.emit(f"extern int _PyOpcode_num_{direction}(int opcode, int oparg, bool jump);")
self.out.emit("#else")
self.out.emit("int")
self.out.emit(f"_PyOpcode_num_{direction}(int opcode, int oparg, bool jump) {{")
self.out.emit(" switch(opcode) {")
for instr, effect in data:
self.out.emit(f" case {instr.name}:")
self.out.emit(f" return {effect};")
self.out.emit(" default:")
self.out.emit(" Py_UNREACHABLE();")
self.out.emit(" return -1;")
self.out.emit(" }")
self.out.emit("}")
self.out.emit("#endif")
write_function("popped", popped_data)
write_function("pushed", pushed_data)
self.out.emit("")
def write_metadata(self) -> None:
"""Write instruction metadata to output file."""
@ -924,7 +927,7 @@ class Analyzer:
# Turn it into a list of enum definitions.
format_enums = [INSTR_FMT_PREFIX + format for format in sorted(all_formats)]
with open(self.output_filename, "w") as f:
with open(self.metadata_filename, "w") as f:
# Write provenance header
f.write(f"// This file is generated by {THIS} --metadata\n")
f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
@ -935,7 +938,7 @@ class Analyzer:
self.write_stack_effect_functions()
# Write variable definition
# Write type definitions
self.out.emit("enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };")
self.out.emit(f"enum InstructionFormat {{ {', '.join(format_enums)} }};")
self.out.emit("struct opcode_metadata {")
@ -945,7 +948,14 @@ class Analyzer:
self.out.emit("enum Direction dir_op3;")
self.out.emit("bool valid_entry;")
self.out.emit("enum InstructionFormat instr_format;")
self.out.emit("} _PyOpcode_opcode_metadata[256] = {")
self.out.emit("};")
self.out.emit("")
# Write metadata array declaration
self.out.emit("#ifndef NEED_OPCODE_TABLES")
self.out.emit("extern const struct opcode_metadata _PyOpcode_opcode_metadata[256];")
self.out.emit("#else")
self.out.emit("const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {")
# Write metadata for each instruction
for thing in self.everything:
@ -962,6 +972,7 @@ class Analyzer:
# Write end of array
self.out.emit("};")
self.out.emit("#endif")
def write_metadata_for_inst(self, instr: Instruction) -> None:
"""Write metadata for a single instruction."""
@ -1184,18 +1195,13 @@ def variable_used(node: parser.Node, name: str) -> bool:
def main():
"""Parse command line, parse input, analyze, write output."""
args = arg_parser.parse_args() # Prints message and sys.exit(2) on error
if args.metadata:
if args.output == DEFAULT_OUTPUT:
args.output = DEFAULT_METADATA_OUTPUT
a = Analyzer(args.input, args.output) # Raises OSError if input unreadable
a = Analyzer(args.input, args.output, args.metadata) # Raises OSError if input unreadable
a.parse() # Raises SyntaxError on failure
a.analyze() # Prints messages and sets a.errors on failure
if a.errors:
sys.exit(f"Found {a.errors} errors")
if args.metadata:
a.write_metadata()
else:
a.write_instructions() # Raises OSError if output can't be written
a.write_instructions() # Raises OSError if output can't be written
a.write_metadata()
if __name__ == "__main__":