gh-120642: Move private PyCode APIs to the internal C API (#120643)

* Move _Py_CODEUNIT and related functions to pycore_code.h.
* Move _Py_BackoffCounter to pycore_backoff.h.
* Move Include/cpython/optimizer.h content to pycore_optimizer.h.
* Remove Include/cpython/optimizer.h.
* Remove PyUnstable_Replace_Executor().

Rename functions:

* PyUnstable_GetExecutor() => _Py_GetExecutor()
* PyUnstable_GetOptimizer() => _Py_GetOptimizer()
* PyUnstable_SetOptimizer() => _Py_SetTier2Optimizer()
* PyUnstable_Optimizer_NewCounter() => _PyOptimizer_NewCounter()
* PyUnstable_Optimizer_NewUOpOptimizer() => _PyOptimizer_NewUOpOptimizer()
This commit is contained in:
Victor Stinner 2024-06-26 13:54:03 +02:00 committed by GitHub
parent 9e45fd9858
commit 9e4a81f00f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 212 additions and 227 deletions

View File

@ -132,6 +132,5 @@
#include "fileutils.h"
#include "cpython/pyfpe.h"
#include "cpython/tracemalloc.h"
#include "cpython/optimizer.h"
#endif /* !Py_PYTHON_H */

View File

@ -24,58 +24,6 @@ typedef struct _Py_GlobalMonitors {
uint8_t tools[_PY_MONITORING_UNGROUPED_EVENTS];
} _Py_GlobalMonitors;
typedef struct {
union {
struct {
uint16_t backoff : 4;
uint16_t value : 12;
};
uint16_t as_counter; // For printf("%#x", ...)
};
} _Py_BackoffCounter;
/* Each instruction in a code object is a fixed-width value,
* currently 2 bytes: 1-byte opcode + 1-byte oparg. The EXTENDED_ARG
* opcode allows for larger values but the current limit is 3 uses
* of EXTENDED_ARG (see Python/compile.c), for a maximum
* 32-bit value. This aligns with the note in Python/compile.c
* (compiler_addop_i_line) indicating that the max oparg value is
* 2**32 - 1, rather than INT_MAX.
*/
typedef union {
uint16_t cache;
struct {
uint8_t code;
uint8_t arg;
} op;
_Py_BackoffCounter counter; // First cache entry of specializable op
} _Py_CODEUNIT;
/* These macros only remain defined for compatibility. */
#define _Py_OPCODE(word) ((word).op.code)
#define _Py_OPARG(word) ((word).op.arg)
static inline _Py_CODEUNIT
_py_make_codeunit(uint8_t opcode, uint8_t oparg)
{
// No designated initialisers because of C++ compat
_Py_CODEUNIT word;
word.op.code = opcode;
word.op.arg = oparg;
return word;
}
static inline void
_py_set_opcode(_Py_CODEUNIT *word, uint8_t opcode)
{
word->op.code = opcode;
}
#define _Py_MAKE_CODEUNIT(opcode, oparg) _py_make_codeunit((opcode), (oparg))
#define _Py_SET_OPCODE(word, opcode) _py_set_opcode(&(word), (opcode))
typedef struct {
PyObject *_co_code;

View File

@ -1,135 +0,0 @@
#ifndef Py_LIMITED_API
#ifndef Py_OPTIMIZER_H
#define Py_OPTIMIZER_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _PyExecutorLinkListNode {
struct _PyExecutorObject *next;
struct _PyExecutorObject *previous;
} _PyExecutorLinkListNode;
/* Bloom filter with m = 256
* https://en.wikipedia.org/wiki/Bloom_filter */
#define _Py_BLOOM_FILTER_WORDS 8
typedef struct {
uint32_t bits[_Py_BLOOM_FILTER_WORDS];
} _PyBloomFilter;
typedef struct {
uint8_t opcode;
uint8_t oparg;
uint8_t valid;
uint8_t linked;
int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below).
_PyBloomFilter bloom;
_PyExecutorLinkListNode links;
PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR).
} _PyVMData;
/* Depending on the format,
* the 32 bits between the oparg and operand are:
* UOP_FORMAT_TARGET:
* uint32_t target;
* UOP_FORMAT_EXIT
* uint16_t exit_index;
* uint16_t error_target;
* UOP_FORMAT_JUMP
* uint16_t jump_target;
* uint16_t error_target;
*/
typedef struct {
uint16_t opcode:14;
uint16_t format:2;
uint16_t oparg;
union {
uint32_t target;
struct {
union {
uint16_t exit_index;
uint16_t jump_target;
};
uint16_t error_target;
};
};
uint64_t operand; // A cache entry
} _PyUOpInstruction;
typedef struct {
uint32_t target;
_Py_BackoffCounter temperature;
const struct _PyExecutorObject *executor;
} _PyExitData;
typedef struct _PyExecutorObject {
PyObject_VAR_HEAD
const _PyUOpInstruction *trace;
_PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */
uint32_t exit_count;
uint32_t code_size;
size_t jit_size;
void *jit_code;
void *jit_side_entry;
_PyExitData exits[1];
} _PyExecutorObject;
typedef struct _PyOptimizerObject _PyOptimizerObject;
/* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */
typedef int (*_Py_optimize_func)(
_PyOptimizerObject* self, struct _PyInterpreterFrame *frame,
_Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr,
int curr_stackentries);
struct _PyOptimizerObject {
PyObject_HEAD
_Py_optimize_func optimize;
/* Data needed by the optimizer goes here, but is opaque to the VM */
};
/** Test support **/
typedef struct {
_PyOptimizerObject base;
int64_t count;
} _PyCounterOptimizerObject;
PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor);
_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer);
PyAPI_FUNC(int) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer);
PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset);
void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
void _Py_ExecutorDetach(_PyExecutorObject *);
void _Py_BloomFilter_Init(_PyBloomFilter *);
void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj);
/* For testing */
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void);
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);
#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
#ifdef _Py_TIER2
PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation);
PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation);
#else
# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0)
# define _Py_Executors_InvalidateAll(A, B) ((void)0)
#endif
#ifdef __cplusplus
}
#endif
#endif /* !Py_OPTIMIZER_H */
#endif /* Py_LIMITED_API */

View File

@ -13,6 +13,18 @@ extern "C" {
#include <stdbool.h>
#include <stdint.h>
typedef struct {
union {
struct {
uint16_t backoff : 4;
uint16_t value : 12;
};
uint16_t as_counter; // For printf("%#x", ...)
};
} _Py_BackoffCounter;
/* 16-bit countdown counters using exponential backoff.
These are used by the adaptive specializer to count down until

View File

@ -9,6 +9,50 @@ extern "C" {
#endif
#include "pycore_lock.h" // PyMutex
#include "pycore_backoff.h" // _Py_BackoffCounter
/* Each instruction in a code object is a fixed-width value,
* currently 2 bytes: 1-byte opcode + 1-byte oparg. The EXTENDED_ARG
* opcode allows for larger values but the current limit is 3 uses
* of EXTENDED_ARG (see Python/compile.c), for a maximum
* 32-bit value. This aligns with the note in Python/compile.c
* (compiler_addop_i_line) indicating that the max oparg value is
* 2**32 - 1, rather than INT_MAX.
*/
typedef union {
uint16_t cache;
struct {
uint8_t code;
uint8_t arg;
} op;
_Py_BackoffCounter counter; // First cache entry of specializable op
} _Py_CODEUNIT;
/* These macros only remain defined for compatibility. */
#define _Py_OPCODE(word) ((word).op.code)
#define _Py_OPARG(word) ((word).op.arg)
static inline _Py_CODEUNIT
_py_make_codeunit(uint8_t opcode, uint8_t oparg)
{
// No designated initialisers because of C++ compat
_Py_CODEUNIT word;
word.op.code = opcode;
word.op.arg = oparg;
return word;
}
static inline void
_py_set_opcode(_Py_CODEUNIT *word, uint8_t opcode)
{
word->op.code = opcode;
}
#define _Py_MAKE_CODEUNIT(opcode, oparg) _py_make_codeunit((opcode), (oparg))
#define _Py_SET_OPCODE(word, opcode) _py_set_opcode(&(word), (opcode))
// We hide some of the newer PyCodeObject fields behind macros.

View File

@ -30,6 +30,7 @@ extern "C" {
#include "pycore_list.h" // struct _Py_list_state
#include "pycore_mimalloc.h" // struct _mimalloc_interp_state
#include "pycore_object_state.h" // struct _py_object_state
#include "pycore_optimizer.h" // _PyOptimizerObject
#include "pycore_obmalloc.h" // struct _obmalloc_state
#include "pycore_qsbr.h" // struct _qsbr_state
#include "pycore_tstate.h" // _PyThreadStateImpl

View File

@ -11,12 +11,135 @@ extern "C" {
#include "pycore_uop_ids.h"
#include <stdbool.h>
typedef struct _PyExecutorLinkListNode {
struct _PyExecutorObject *next;
struct _PyExecutorObject *previous;
} _PyExecutorLinkListNode;
/* Bloom filter with m = 256
* https://en.wikipedia.org/wiki/Bloom_filter */
#define _Py_BLOOM_FILTER_WORDS 8
typedef struct {
uint32_t bits[_Py_BLOOM_FILTER_WORDS];
} _PyBloomFilter;
typedef struct {
uint8_t opcode;
uint8_t oparg;
uint8_t valid;
uint8_t linked;
int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below).
_PyBloomFilter bloom;
_PyExecutorLinkListNode links;
PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR).
} _PyVMData;
/* Depending on the format,
* the 32 bits between the oparg and operand are:
* UOP_FORMAT_TARGET:
* uint32_t target;
* UOP_FORMAT_EXIT
* uint16_t exit_index;
* uint16_t error_target;
* UOP_FORMAT_JUMP
* uint16_t jump_target;
* uint16_t error_target;
*/
typedef struct {
uint16_t opcode:14;
uint16_t format:2;
uint16_t oparg;
union {
uint32_t target;
struct {
union {
uint16_t exit_index;
uint16_t jump_target;
};
uint16_t error_target;
};
};
uint64_t operand; // A cache entry
} _PyUOpInstruction;
typedef struct {
uint32_t target;
_Py_BackoffCounter temperature;
const struct _PyExecutorObject *executor;
} _PyExitData;
typedef struct _PyExecutorObject {
PyObject_VAR_HEAD
const _PyUOpInstruction *trace;
_PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */
uint32_t exit_count;
uint32_t code_size;
size_t jit_size;
void *jit_code;
void *jit_side_entry;
_PyExitData exits[1];
} _PyExecutorObject;
typedef struct _PyOptimizerObject _PyOptimizerObject;
/* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */
typedef int (*_Py_optimize_func)(
_PyOptimizerObject* self, struct _PyInterpreterFrame *frame,
_Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr,
int curr_stackentries);
struct _PyOptimizerObject {
PyObject_HEAD
_Py_optimize_func optimize;
/* Data needed by the optimizer goes here, but is opaque to the VM */
};
/** Test support **/
typedef struct {
_PyOptimizerObject base;
int64_t count;
} _PyCounterOptimizerObject;
_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer);
// Export for '_opcode' shared extension (JIT compiler).
PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset);
void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
void _Py_ExecutorDetach(_PyExecutorObject *);
void _Py_BloomFilter_Init(_PyBloomFilter *);
void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj);
// For testing
// Export for '_testinternalcapi' shared extension.
PyAPI_FUNC(_PyOptimizerObject *) _Py_GetOptimizer(void);
PyAPI_FUNC(int) _Py_SetTier2Optimizer(_PyOptimizerObject* optimizer);
PyAPI_FUNC(PyObject *) _PyOptimizer_NewCounter(void);
PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void);
#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
#ifdef _Py_TIER2
PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation);
PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation);
#else
# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0)
# define _Py_Executors_InvalidateAll(A, B) ((void)0)
#endif
// This is the length of the trace we project initially.
#define UOP_MAX_TRACE_LENGTH 800
#define TRACE_STACK_SIZE 5
int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame,
int _Py_uop_analyze_and_optimize(struct _PyInterpreterFrame *frame,
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
_PyBloomFilter *dependencies);
@ -148,7 +271,7 @@ extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx);
PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr);
PyAPI_FUNC(int) _PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr);
#ifdef __cplusplus
}

View File

@ -1104,7 +1104,6 @@ PYTHON_HEADERS= \
$(srcdir)/Include/cpython/object.h \
$(srcdir)/Include/cpython/objimpl.h \
$(srcdir)/Include/cpython/odictobject.h \
$(srcdir)/Include/cpython/optimizer.h \
$(srcdir)/Include/cpython/picklebufobject.h \
$(srcdir)/Include/cpython/pthread_stubs.h \
$(srcdir)/Include/cpython/pyatomic.h \

View File

@ -0,0 +1,10 @@
Remove the following unstable functions:
* ``PyUnstable_Replace_Executor()``
* ``PyUnstable_SetOptimizer()``
* ``PyUnstable_GetOptimizer()``
* ``PyUnstable_GetExecutor()``
* ``PyUnstable_Optimizer_NewCounter()``
* ``PyUnstable_Optimizer_NewUOpOptimizer()``
Patch by Victor Stinner.

View File

@ -0,0 +1,3 @@
Remove the private ``_Py_CODEUNIT`` type from the public C API. The internal
``pycore_code.h`` header should now be used to get this internal type. Patch by
Victor Stinner.

View File

@ -5,10 +5,11 @@
#include "Python.h"
#include "compile.h"
#include "opcode.h"
#include "internal/pycore_ceval.h"
#include "internal/pycore_code.h"
#include "internal/pycore_compile.h"
#include "internal/pycore_intrinsics.h"
#include "pycore_ceval.h"
#include "pycore_code.h"
#include "pycore_compile.h"
#include "pycore_intrinsics.h"
#include "pycore_optimizer.h" // _Py_GetExecutor()
/*[clinic input]
module _opcode
@ -395,7 +396,7 @@ _opcode_get_executor_impl(PyObject *module, PyObject *code, int offset)
return NULL;
}
#ifdef _Py_TIER2
return (PyObject *)PyUnstable_GetExecutor((PyCodeObject *)code, offset);
return (PyObject *)_Py_GetExecutor((PyCodeObject *)code, offset);
#else
PyErr_Format(PyExc_RuntimeError,
"Executors are not available in this build");

View File

@ -991,13 +991,13 @@ get_co_framesize(PyObject *self, PyObject *arg)
static PyObject *
new_counter_optimizer(PyObject *self, PyObject *arg)
{
return PyUnstable_Optimizer_NewCounter();
return _PyOptimizer_NewCounter();
}
static PyObject *
new_uop_optimizer(PyObject *self, PyObject *arg)
{
return PyUnstable_Optimizer_NewUOpOptimizer();
return _PyOptimizer_NewUOpOptimizer();
}
static PyObject *
@ -1006,7 +1006,7 @@ set_optimizer(PyObject *self, PyObject *opt)
if (opt == Py_None) {
opt = NULL;
}
if (PyUnstable_SetOptimizer((_PyOptimizerObject*)opt) < 0) {
if (_Py_SetTier2Optimizer((_PyOptimizerObject*)opt) < 0) {
return NULL;
}
Py_RETURN_NONE;
@ -1017,7 +1017,7 @@ get_optimizer(PyObject *self, PyObject *Py_UNUSED(ignored))
{
PyObject *opt = NULL;
#ifdef _Py_TIER2
opt = (PyObject *)PyUnstable_GetOptimizer();
opt = (PyObject *)_Py_GetOptimizer();
#endif
if (opt == NULL) {
Py_RETURN_NONE;

View File

@ -166,7 +166,6 @@
<ClInclude Include="..\Include\cpython\object.h" />
<ClInclude Include="..\Include\cpython\objimpl.h" />
<ClInclude Include="..\Include\cpython\odictobject.h" />
<ClInclude Include="..\Include\cpython\optimizer.h" />
<ClInclude Include="..\Include\cpython\parser_interface.h" />
<ClInclude Include="..\Include\cpython\picklebufobject.h" />
<ClInclude Include="..\Include\cpython\pyarena.h" />

View File

@ -420,9 +420,6 @@
<ClInclude Include="..\Include\cpython\odictobject.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\cpython\optimizer.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\cpython\unicodeobject.h">
<Filter>Include\cpython</Filter>
</ClInclude>

View File

@ -12,7 +12,6 @@
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_uop_ids.h"
#include "pycore_jit.h"
#include "cpython/optimizer.h"
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
@ -105,18 +104,6 @@ insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorO
instr->op.arg = index;
}
int
PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *new)
{
if (instr->op.code != ENTER_EXECUTOR) {
PyErr_Format(PyExc_ValueError, "No executor to replace");
return -1;
}
int index = instr->op.arg;
assert(index >= 0);
insert_executor(code, instr, index, new);
return 0;
}
static int
never_optimize(
@ -144,7 +131,7 @@ static _PyOptimizerObject _PyOptimizer_Default = {
};
_PyOptimizerObject *
PyUnstable_GetOptimizer(void)
_Py_GetOptimizer(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (interp->optimizer == &_PyOptimizer_Default) {
@ -195,7 +182,7 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
}
int
PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
_Py_SetTier2Optimizer(_PyOptimizerObject *optimizer)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer);
@ -240,7 +227,7 @@ _PyOptimizer_Optimize(
}
_PyExecutorObject *
PyUnstable_GetExecutor(PyCodeObject *code, int offset)
_Py_GetExecutor(PyCodeObject *code, int offset)
{
int code_len = (int)Py_SIZE(code);
for (int i = 0 ; i < code_len;) {
@ -1354,7 +1341,7 @@ PyTypeObject _PyUOpOptimizer_Type = {
};
PyObject *
PyUnstable_Optimizer_NewUOpOptimizer(void)
_PyOptimizer_NewUOpOptimizer(void)
{
_PyOptimizerObject *opt = PyObject_New(_PyOptimizerObject, &_PyUOpOptimizer_Type);
if (opt == NULL) {
@ -1442,7 +1429,7 @@ PyTypeObject _PyCounterOptimizer_Type = {
};
PyObject *
PyUnstable_Optimizer_NewCounter(void)
_PyOptimizer_NewCounter(void)
{
_PyCounterOptimizerObject *opt = (_PyCounterOptimizerObject *)_PyObject_New(&_PyCounterOptimizer_Type);
if (opt == NULL) {

View File

@ -21,7 +21,6 @@
#include "pycore_uop_metadata.h"
#include "pycore_dict.h"
#include "pycore_long.h"
#include "cpython/optimizer.h"
#include "pycore_optimizer.h"
#include "pycore_object.h"
#include "pycore_dict.h"

View File

@ -2,7 +2,6 @@
#include "Python.h"
#include "cpython/optimizer.h"
#include "pycore_code.h"
#include "pycore_frame.h"
#include "pycore_long.h"

View File

@ -31,7 +31,6 @@
#include "pycore_typevarobject.h" // _Py_clear_generic_types()
#include "pycore_unicodeobject.h" // _PyUnicode_InitTypes()
#include "pycore_weakref.h" // _PyWeakref_GET_REF()
#include "cpython/optimizer.h" // _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS
#include "pycore_obmalloc.h" // _PyMem_init_obmalloc()
#include "opcode.h"
@ -1298,11 +1297,11 @@ init_interp_main(PyThreadState *tstate)
enabled = *env != '0';
}
if (enabled) {
PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer();
PyObject *opt = _PyOptimizer_NewUOpOptimizer();
if (opt == NULL) {
return _PyStatus_ERR("can't initialize optimizer");
}
if (PyUnstable_SetOptimizer((_PyOptimizerObject *)opt)) {
if (_Py_SetTier2Optimizer((_PyOptimizerObject *)opt)) {
return _PyStatus_ERR("can't install optimizer");
}
Py_DECREF(opt);