From c1e9647107c854439a9864b6ec4f6784aeb94ed5 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 29 May 2024 10:47:56 +0100 Subject: [PATCH] gh-119689: generate stack effect metadata for pseudo instructions (#119691) --- Include/internal/pycore_opcode_metadata.h | 32 +++++++++++ Lib/test/test_generated_cases.py | 4 +- ...-05-28-22-49-56.gh-issue-119689.VwLFD5.rst | 1 + Python/bytecodes.c | 24 +++++--- Python/compile.c | 57 +++++-------------- Tools/cases_generator/analyzer.py | 4 +- .../cases_generator/interpreter_definition.md | 8 ++- .../opcode_metadata_generator.py | 9 ++- Tools/cases_generator/parsing.py | 24 ++++---- Tools/cases_generator/stack.py | 22 ++++--- 10 files changed, 112 insertions(+), 73 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-28-22-49-56.gh-issue-119689.VwLFD5.rst diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index f805be04985..d3535800139 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -259,12 +259,16 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 1; case IS_OP: return 2; + case JUMP: + return 0; case JUMP_BACKWARD: return 0; case JUMP_BACKWARD_NO_INTERRUPT: return 0; case JUMP_FORWARD: return 0; + case JUMP_NO_INTERRUPT: + return 0; case LIST_APPEND: return 2 + (oparg-1); case LIST_EXTEND: @@ -297,6 +301,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 1; case LOAD_BUILD_CLASS: return 0; + case LOAD_CLOSURE: + return 0; case LOAD_COMMON_CONSTANT: return 0; case LOAD_CONST: @@ -347,6 +353,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 1; case NOP: return 0; + case POP_BLOCK: + return 0; case POP_EXCEPT: return 1; case POP_JUMP_IF_FALSE: @@ -385,6 +393,12 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 2; case SETUP_ANNOTATIONS: return 0; + case SETUP_CLEANUP: + return 0; + case SETUP_FINALLY: + return 0; + case SETUP_WITH: + return 0; case SET_ADD: return 2 + (oparg-1); case SET_FUNCTION_ATTRIBUTE: @@ -405,6 +419,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 1; case STORE_FAST_LOAD_FAST: return 1; + case STORE_FAST_MAYBE_NULL: + return 1; case STORE_FAST_STORE_FAST: return 2; case STORE_GLOBAL: @@ -692,12 +708,16 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 0; case IS_OP: return 1; + case JUMP: + return 0; case JUMP_BACKWARD: return 0; case JUMP_BACKWARD_NO_INTERRUPT: return 0; case JUMP_FORWARD: return 0; + case JUMP_NO_INTERRUPT: + return 0; case LIST_APPEND: return 1 + (oparg-1); case LIST_EXTEND: @@ -730,6 +750,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1 + (oparg & 1); case LOAD_BUILD_CLASS: return 1; + case LOAD_CLOSURE: + return 1; case LOAD_COMMON_CONSTANT: return 1; case LOAD_CONST: @@ -780,6 +802,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 2; case NOP: return 0; + case POP_BLOCK: + return 0; case POP_EXCEPT: return 0; case POP_JUMP_IF_FALSE: @@ -818,6 +842,12 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 2; case SETUP_ANNOTATIONS: return 0; + case SETUP_CLEANUP: + return 2; + case SETUP_FINALLY: + return 1; + case SETUP_WITH: + return 1; case SET_ADD: return 1 + (oparg-1); case SET_FUNCTION_ATTRIBUTE: @@ -838,6 +868,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 0; case STORE_FAST_LOAD_FAST: return 1; + case STORE_FAST_MAYBE_NULL: + return 0; case STORE_FAST_STORE_FAST: return 0; case STORE_GLOBAL: diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index fb85222fdcc..41eeb9c0705 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -485,7 +485,7 @@ class TestGeneratedCases(unittest.TestCase): def test_pseudo_instruction_no_flags(self): input = """ - pseudo(OP) = { + pseudo(OP, (in -- out1, out2)) = { OP1, }; @@ -504,7 +504,7 @@ class TestGeneratedCases(unittest.TestCase): def test_pseudo_instruction_with_flags(self): input = """ - pseudo(OP, (HAS_ARG, HAS_JUMP)) = { + pseudo(OP, (in1, in2 --), (HAS_ARG, HAS_JUMP)) = { OP1, }; diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-28-22-49-56.gh-issue-119689.VwLFD5.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-28-22-49-56.gh-issue-119689.VwLFD5.rst new file mode 100644 index 00000000000..56be3132621 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-28-22-49-56.gh-issue-119689.VwLFD5.rst @@ -0,0 +1 @@ +Generate stack effect metadata for pseudo instructions from bytecodes.c. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 274c5c22447..9a8198515de 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -213,7 +213,7 @@ dummy_func( } } - pseudo(LOAD_CLOSURE) = { + pseudo(LOAD_CLOSURE, (-- unused)) = { LOAD_FAST, }; @@ -259,7 +259,7 @@ dummy_func( SETLOCAL(oparg, value); } - pseudo(STORE_FAST_MAYBE_NULL) = { + pseudo(STORE_FAST_MAYBE_NULL, (unused --)) = { STORE_FAST, }; @@ -2393,12 +2393,12 @@ dummy_func( #endif /* _Py_TIER2 */ } - pseudo(JUMP) = { + pseudo(JUMP, (--)) = { JUMP_FORWARD, JUMP_BACKWARD, }; - pseudo(JUMP_NO_INTERRUPT) = { + pseudo(JUMP_NO_INTERRUPT, (--)) = { JUMP_FORWARD, JUMP_BACKWARD_NO_INTERRUPT, }; @@ -2895,19 +2895,27 @@ dummy_func( ERROR_IF(res == NULL, error); } - pseudo(SETUP_FINALLY, (HAS_ARG)) = { + pseudo(SETUP_FINALLY, (-- unused), (HAS_ARG)) = { + /* If an exception is raised, restore the stack position + * and push one value before jumping to the handler. + */ NOP, }; - pseudo(SETUP_CLEANUP, (HAS_ARG)) = { + pseudo(SETUP_CLEANUP, (-- unused, unused), (HAS_ARG)) = { + /* As SETUP_FINALLY, but push lasti as well */ NOP, }; - pseudo(SETUP_WITH, (HAS_ARG)) = { + pseudo(SETUP_WITH, (-- unused), (HAS_ARG)) = { + /* If an exception is raised, restore the stack position to the + * position before the result of __(a)enter__ and push 2 values + * before jumping to the handler. + */ NOP, }; - pseudo(POP_BLOCK) = { + pseudo(POP_BLOCK, (--)) = { NOP, }; diff --git a/Python/compile.c b/Python/compile.c index e6efae33eb4..3a80577e0f2 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -703,51 +703,22 @@ compiler_set_qualname(struct compiler *c) static int stack_effect(int opcode, int oparg, int jump) { - if (0 <= opcode && opcode <= MAX_REAL_OPCODE) { - if (_PyOpcode_Deopt[opcode] != opcode) { - // Specialized instructions are not supported. - return PY_INVALID_STACK_EFFECT; - } - int popped = _PyOpcode_num_popped(opcode, oparg); - int pushed = _PyOpcode_num_pushed(opcode, oparg); - if (popped < 0 || pushed < 0) { - return PY_INVALID_STACK_EFFECT; - } - return pushed - popped; + if (opcode < 0) { + return PY_INVALID_STACK_EFFECT; } - - // Pseudo ops - switch (opcode) { - case POP_BLOCK: - case JUMP: - case JUMP_NO_INTERRUPT: - return 0; - - /* Exception handling pseudo-instructions */ - case SETUP_FINALLY: - /* 0 in the normal flow. - * Restore the stack position and push 1 value before jumping to - * the handler if an exception be raised. */ - return jump ? 1 : 0; - case SETUP_CLEANUP: - /* As SETUP_FINALLY, but pushes lasti as well */ - return jump ? 2 : 0; - case SETUP_WITH: - /* 0 in the normal flow. - * Restore the stack position to the position before the result - * of __(a)enter__ and push 2 values before jumping to the handler - * if an exception be raised. */ - return jump ? 1 : 0; - - case STORE_FAST_MAYBE_NULL: - return -1; - case LOAD_CLOSURE: - return 1; - default: - return PY_INVALID_STACK_EFFECT; + if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) { + // Specialized instructions are not supported. + return PY_INVALID_STACK_EFFECT; } - - return PY_INVALID_STACK_EFFECT; /* not reachable */ + int popped = _PyOpcode_num_popped(opcode, oparg); + int pushed = _PyOpcode_num_pushed(opcode, oparg); + if (popped < 0 || pushed < 0) { + return PY_INVALID_STACK_EFFECT; + } + if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) { + return 0; + } + return pushed - popped; } int diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index fdb635486b9..e44bebd8f3c 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -235,6 +235,7 @@ class Instruction: @dataclass class PseudoInstruction: name: str + stack: StackEffect targets: list[Instruction] flags: list[str] opcode: int = -1 @@ -295,7 +296,7 @@ def convert_stack_item(item: parser.StackEffect, replace_op_arg_1: str | None) - item.name, item.type, cond, (item.size or "1") ) -def analyze_stack(op: parser.InstDef, replace_op_arg_1: str | None = None) -> StackEffect: +def analyze_stack(op: parser.InstDef | parser.Pseudo, replace_op_arg_1: str | None = None) -> StackEffect: inputs: list[StackItem] = [ convert_stack_item(i, replace_op_arg_1) for i in op.inputs if isinstance(i, parser.StackEffect) ] @@ -706,6 +707,7 @@ def add_pseudo( ) -> None: pseudos[pseudo.name] = PseudoInstruction( pseudo.name, + analyze_stack(pseudo), [instructions[target] for target in pseudo.targets], pseudo.flags, ) diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index 889f58fc3e1..ba09931c541 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -124,7 +124,13 @@ and a piece of C code describing its semantics:: "family" "(" NAME ")" = "{" NAME ("," NAME)+ [","] "}" ";" pseudo: - "pseudo" "(" NAME ")" = "{" NAME ("," NAME)+ [","] "}" ";" + "pseudo" "(" NAME "," stack_effect ["," "(" flags ")"]")" = "{" NAME ("," NAME)+ [","] "}" ";" + + flags: + flag ("|" flag)* + + flag: + HAS_ARG | HAS_DEOPT | etc.. ``` The following definitions may occur: diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 04fecb235f1..2632eb89ce8 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -10,6 +10,7 @@ import sys from analyzer import ( Analysis, Instruction, + PseudoInstruction, analyze_files, Skip, Uop, @@ -94,12 +95,18 @@ def emit_stack_effect_function( def generate_stack_effect_functions(analysis: Analysis, out: CWriter) -> None: popped_data: list[tuple[str, str]] = [] pushed_data: list[tuple[str, str]] = [] - for inst in analysis.instructions.values(): + def add(inst: Instruction | PseudoInstruction) -> None: stack = get_stack_effect(inst) popped = (-stack.base_offset).to_c() pushed = (stack.top_offset - stack.base_offset).to_c() popped_data.append((inst.name, popped)) pushed_data.append((inst.name, pushed)) + + for inst in analysis.instructions.values(): + add(inst) + for pseudo in analysis.pseudos.values(): + add(pseudo) + emit_stack_effect_function(out, "popped", sorted(popped_data)) emit_stack_effect_function(out, "pushed", sorted(pushed_data)) diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 0d54820e4e7..cc897ff2cbe 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -138,6 +138,8 @@ class Family(Node): @dataclass class Pseudo(Node): name: str + inputs: list[InputEffect] + outputs: list[OutputEffect] flags: list[str] # instr flags to set on the pseudo instruction targets: list[str] # opcodes this can be replaced by @@ -409,16 +411,18 @@ class Parser(PLexer): if self.expect(lx.LPAREN): if tkn := self.expect(lx.IDENTIFIER): if self.expect(lx.COMMA): - flags = self.flags() - else: - flags = [] - if self.expect(lx.RPAREN): - if self.expect(lx.EQUALS): - if not self.expect(lx.LBRACE): - raise self.make_syntax_error("Expected {") - if members := self.members(): - if self.expect(lx.RBRACE) and self.expect(lx.SEMI): - return Pseudo(tkn.text, flags, members) + inp, outp = self.io_effect() + if self.expect(lx.COMMA): + flags = self.flags() + else: + flags = [] + if self.expect(lx.RPAREN): + if self.expect(lx.EQUALS): + if not self.expect(lx.LBRACE): + raise self.make_syntax_error("Expected {") + if members := self.members(): + if self.expect(lx.RBRACE) and self.expect(lx.SEMI): + return Pseudo(tkn.text, inp, outp, flags, members) return None def members(self) -> list[str] | None: diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index 5aecac39aef..7f07a6805b1 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -1,7 +1,8 @@ import re -from analyzer import StackItem, Instruction, Uop +from analyzer import StackItem, StackEffect, Instruction, Uop, PseudoInstruction from dataclasses import dataclass from cwriter import CWriter +from typing import Iterator UNUSED = {"unused"} @@ -208,13 +209,20 @@ class Stack: return f"/* Variables: {[v.name for v in self.variables]}. Base offset: {self.base_offset.to_c()}. Top offset: {self.top_offset.to_c()} */" -def get_stack_effect(inst: Instruction) -> Stack: +def get_stack_effect(inst: Instruction | PseudoInstruction) -> Stack: stack = Stack() - for uop in inst.parts: - if not isinstance(uop, Uop): - continue - for var in reversed(uop.stack.inputs): + def stacks(inst : Instruction | PseudoInstruction) -> Iterator[StackEffect]: + if isinstance(inst, Instruction): + for uop in inst.parts: + if isinstance(uop, Uop): + yield uop.stack + else: + assert isinstance(inst, PseudoInstruction) + yield inst.stack + + for s in stacks(inst): + for var in reversed(s.inputs): stack.pop(var) - for i, var in enumerate(uop.stack.outputs): + for var in s.outputs: stack.push(var) return stack