diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 8fc9fb62aeb..6d7b8bc3c14 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -92,9 +92,6 @@ PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void); PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset); -int -_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr); - void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); void _Py_ExecutorClear(_PyExecutorObject *); void _Py_BloomFilter_Init(_PyBloomFilter *); diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index bf77526cf75..6eab2ba1dae 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -181,22 +181,26 @@ extern PyObject* _Py_MakeCoro(PyFunctionObject *func); /* Handle signals, pending calls, GIL drop request and asynchronous exception */ -extern int _Py_HandlePending(PyThreadState *tstate); +PyAPI_FUNC(int) _Py_HandlePending(PyThreadState *tstate); extern PyObject * _PyEval_GetFrameLocals(void); -extern const binaryfunc _PyEval_BinaryOps[]; -int _PyEval_CheckExceptStarTypeValid(PyThreadState *tstate, PyObject* right); -int _PyEval_CheckExceptTypeValid(PyThreadState *tstate, PyObject* right); -int _PyEval_ExceptionGroupMatch(PyObject* exc_value, PyObject *match_type, PyObject **match, PyObject **rest); -void _PyEval_FormatAwaitableError(PyThreadState *tstate, PyTypeObject *type, int oparg); -void _PyEval_FormatExcCheckArg(PyThreadState *tstate, PyObject *exc, const char *format_str, PyObject *obj); -void _PyEval_FormatExcUnbound(PyThreadState *tstate, PyCodeObject *co, int oparg); -void _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject *kwargs); -PyObject *_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs); -PyObject *_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys); -int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, PyObject **sp); -void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); +typedef PyObject *(*conversion_func)(PyObject *); + +PyAPI_DATA(const binaryfunc) _PyEval_BinaryOps[]; +PyAPI_DATA(const conversion_func) _PyEval_ConversionFuncs[]; + +PyAPI_FUNC(int) _PyEval_CheckExceptStarTypeValid(PyThreadState *tstate, PyObject* right); +PyAPI_FUNC(int) _PyEval_CheckExceptTypeValid(PyThreadState *tstate, PyObject* right); +PyAPI_FUNC(int) _PyEval_ExceptionGroupMatch(PyObject* exc_value, PyObject *match_type, PyObject **match, PyObject **rest); +PyAPI_FUNC(void) _PyEval_FormatAwaitableError(PyThreadState *tstate, PyTypeObject *type, int oparg); +PyAPI_FUNC(void) _PyEval_FormatExcCheckArg(PyThreadState *tstate, PyObject *exc, const char *format_str, PyObject *obj); +PyAPI_FUNC(void) _PyEval_FormatExcUnbound(PyThreadState *tstate, PyCodeObject *co, int oparg); +PyAPI_FUNC(void) _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject *kwargs); +PyAPI_FUNC(PyObject *)_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs); +PyAPI_FUNC(PyObject *)_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys); +PyAPI_FUNC(int) _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, PyObject **sp); +PyAPI_FUNC(void) _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); /* Bits that can be set in PyThreadState.eval_breaker */ diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index d1a0010b9a8..cd171a4384d 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -52,7 +52,7 @@ PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *); of a key wins, if override is 2, a KeyError with conflicting key as argument is raised. */ -extern int _PyDict_MergeEx(PyObject *mp, PyObject *other, int override); +PyAPI_FUNC(int) _PyDict_MergeEx(PyObject *mp, PyObject *other, int override); extern void _PyDict_DebugMallocStats(FILE *out); @@ -100,10 +100,10 @@ extern Py_ssize_t _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t has extern Py_ssize_t _PyDict_LookupIndex(PyDictObject *, PyObject *); extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key); -extern PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *); +PyAPI_FUNC(PyObject *)_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *); /* Consumes references to key and value */ -extern int _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value); +PyAPI_FUNC(int) _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value); extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value); extern int _PyDict_Pop_KnownHash( @@ -247,8 +247,8 @@ _PyDict_NotifyEvent(PyInterpreterState *interp, } extern PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values); -extern bool _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv); -extern PyObject *_PyDict_FromItems( +PyAPI_FUNC(bool) _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv); +PyAPI_FUNC(PyObject *)_PyDict_FromItems( PyObject *const *keys, Py_ssize_t keys_offset, PyObject *const *values, Py_ssize_t values_offset, Py_ssize_t length); diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h index 3767df5506d..f984df69569 100644 --- a/Include/internal/pycore_floatobject.h +++ b/Include/internal/pycore_floatobject.h @@ -34,7 +34,7 @@ struct _Py_float_runtime_state { -void _PyFloat_ExactDealloc(PyObject *op); +PyAPI_FUNC(void) _PyFloat_ExactDealloc(PyObject *op); extern void _PyFloat_DebugMallocStats(FILE* out); diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index 3f3da8a44b7..dad6a89af77 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -29,7 +29,7 @@ struct _py_func_state { extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr); extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func); -extern void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version); +PyAPI_FUNC(void) _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version); PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version); extern PyObject *_Py_set_function_type_params( diff --git a/Include/internal/pycore_genobject.h b/Include/internal/pycore_genobject.h index b2aa0175984..9463c822ad8 100644 --- a/Include/internal/pycore_genobject.h +++ b/Include/internal/pycore_genobject.h @@ -10,7 +10,7 @@ extern "C" { #include "pycore_freelist.h" -extern PyObject *_PyGen_yf(PyGenObject *); +PyAPI_FUNC(PyObject *)_PyGen_yf(PyGenObject *); extern void _PyGen_Finalize(PyObject *self); // Export for '_asyncio' shared extension @@ -19,7 +19,7 @@ PyAPI_FUNC(int) _PyGen_SetStopIterationValue(PyObject *); // Export for '_asyncio' shared extension PyAPI_FUNC(int) _PyGen_FetchStopIterationValue(PyObject **); -extern PyObject *_PyCoro_GetAwaitableIter(PyObject *o); +PyAPI_FUNC(PyObject *)_PyCoro_GetAwaitableIter(PyObject *o); extern PyObject *_PyAsyncGenValueWrapperNew(PyThreadState *state, PyObject *); extern PyTypeObject _PyCoroWrapper_Type; diff --git a/Include/internal/pycore_intrinsics.h b/Include/internal/pycore_intrinsics.h index 3a8dd95cff8..8fa88ea3f74 100644 --- a/Include/internal/pycore_intrinsics.h +++ b/Include/internal/pycore_intrinsics.h @@ -44,7 +44,7 @@ typedef struct { const char *name; } intrinsic_func2_info; -extern const intrinsic_func1_info _PyIntrinsics_UnaryFunctions[]; -extern const intrinsic_func2_info _PyIntrinsics_BinaryFunctions[]; +PyAPI_DATA(const intrinsic_func1_info) _PyIntrinsics_UnaryFunctions[]; +PyAPI_DATA(const intrinsic_func2_info) _PyIntrinsics_BinaryFunctions[]; #endif // !Py_INTERNAL_INTRINSIC_H diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 50dc13c4da4..2a82912e41d 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -10,12 +10,12 @@ extern "C" { #include "pycore_freelist.h" // _PyFreeListState -extern PyObject* _PyList_Extend(PyListObject *, PyObject *); +PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *); extern void _PyList_DebugMallocStats(FILE *out); #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item) -extern int +PyAPI_FUNC(int) _PyList_AppendTakeRefListResize(PyListObject *self, PyObject *newitem); // In free-threaded build: self should be locked by the caller, if it should be thread-safe. @@ -54,7 +54,7 @@ typedef struct { PyListObject *it_seq; /* Set to NULL when iterator is exhausted */ } _PyListIterObject; -extern PyObject *_PyList_FromArraySteal(PyObject *const *src, Py_ssize_t n); +PyAPI_FUNC(PyObject *)_PyList_FromArraySteal(PyObject *const *src, Py_ssize_t n); #ifdef __cplusplus } diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index ec27df9e416..f04f66d053b 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -121,9 +121,9 @@ PyAPI_DATA(PyObject*) _PyLong_Rshift(PyObject *, size_t); // Export for 'math' shared extension PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, size_t); -extern PyObject* _PyLong_Add(PyLongObject *left, PyLongObject *right); -extern PyObject* _PyLong_Multiply(PyLongObject *left, PyLongObject *right); -extern PyObject* _PyLong_Subtract(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(PyObject*) _PyLong_Add(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(PyObject*) _PyLong_Multiply(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(PyObject*) _PyLong_Subtract(PyLongObject *left, PyLongObject *right); // Export for 'binascii' shared extension. PyAPI_DATA(unsigned char) _PyLong_DigitValue[256]; diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 34a83ea228e..9809f5f2e02 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -73,7 +73,7 @@ PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *); .ob_size = size \ } -extern void _Py_NO_RETURN _Py_FatalRefcountErrorFunc( +PyAPI_FUNC(void) _Py_NO_RETURN _Py_FatalRefcountErrorFunc( const char *func, const char *message); @@ -684,7 +684,7 @@ PyAPI_FUNC(PyObject*) _PyObject_LookupSpecial(PyObject *, PyObject *); extern int _PyObject_IsAbstract(PyObject *); -extern int _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method); +PyAPI_FUNC(int) _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method); extern PyObject* _PyObject_NextNotImplemented(PyObject *); // Pickle support. diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 614850468ec..4894f613b5f 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -111,6 +111,8 @@ extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx); PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); +PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_pyerrors.h b/Include/internal/pycore_pyerrors.h index 0f16fb894d1..910335fd2cf 100644 --- a/Include/internal/pycore_pyerrors.h +++ b/Include/internal/pycore_pyerrors.h @@ -95,7 +95,7 @@ extern void _PyErr_Fetch( extern PyObject* _PyErr_GetRaisedException(PyThreadState *tstate); -extern int _PyErr_ExceptionMatches( +PyAPI_FUNC(int) _PyErr_ExceptionMatches( PyThreadState *tstate, PyObject *exc); @@ -114,18 +114,18 @@ extern void _PyErr_SetObject( extern void _PyErr_ChainStackItem(void); -extern void _PyErr_Clear(PyThreadState *tstate); +PyAPI_FUNC(void) _PyErr_Clear(PyThreadState *tstate); extern void _PyErr_SetNone(PyThreadState *tstate, PyObject *exception); extern PyObject* _PyErr_NoMemory(PyThreadState *tstate); -extern void _PyErr_SetString( +PyAPI_FUNC(void) _PyErr_SetString( PyThreadState *tstate, PyObject *exception, const char *string); -extern PyObject* _PyErr_Format( +PyAPI_FUNC(PyObject*) _PyErr_Format( PyThreadState *tstate, PyObject *exception, const char *format, diff --git a/Include/internal/pycore_sliceobject.h b/Include/internal/pycore_sliceobject.h index 89086f67683..ba8b1f1cb27 100644 --- a/Include/internal/pycore_sliceobject.h +++ b/Include/internal/pycore_sliceobject.h @@ -11,7 +11,7 @@ extern "C" { /* runtime lifecycle */ -extern PyObject * +PyAPI_FUNC(PyObject *) _PyBuildSlice_ConsumeRefs(PyObject *start, PyObject *stop); #ifdef __cplusplus diff --git a/Include/internal/pycore_tuple.h b/Include/internal/pycore_tuple.h index 4605f355ccb..14a9e42c3a3 100644 --- a/Include/internal/pycore_tuple.h +++ b/Include/internal/pycore_tuple.h @@ -21,7 +21,7 @@ extern PyStatus _PyTuple_InitGlobalObjects(PyInterpreterState *); #define _PyTuple_ITEMS(op) _Py_RVALUE(_PyTuple_CAST(op)->ob_item) extern PyObject *_PyTuple_FromArray(PyObject *const *, Py_ssize_t); -extern PyObject *_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t); +PyAPI_FUNC(PyObject *)_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t); typedef struct { PyObject_HEAD diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 9134ab45cd0..c214111fed6 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -147,7 +147,7 @@ extern PyObject* _Py_slot_tp_getattr_hook(PyObject *self, PyObject *name); extern PyTypeObject _PyBufferWrapper_Type; -extern PyObject* _PySuper_Lookup(PyTypeObject *su_type, PyObject *su_obj, +PyAPI_FUNC(PyObject*) _PySuper_Lookup(PyTypeObject *su_type, PyObject *su_obj, PyObject *name, int *meth_found); diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 7ee540154b2..fea5ceea095 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -31,7 +31,7 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency( PyObject *op, int check_content); -extern void _PyUnicode_ExactDealloc(PyObject *op); +PyAPI_FUNC(void) _PyUnicode_ExactDealloc(PyObject *op); extern Py_ssize_t _PyUnicode_InternedSize(void); // Get a copy of a Unicode string. @@ -202,7 +202,7 @@ PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( /* --- Methods & Slots ---------------------------------------------------- */ -extern PyObject* _PyUnicode_JoinArray( +PyAPI_FUNC(PyObject*) _PyUnicode_JoinArray( PyObject *separator, PyObject *const *items, Py_ssize_t seqlen diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 565379afc4b..1d515098f6c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2755,7 +2755,7 @@ dummy_func( GOTO_ERROR(error); } DECREF_INPUTS(); - res = _PyObject_CallNoArgsTstate(tstate, enter); + res = PyObject_CallNoArgs(enter); Py_DECREF(enter); if (res == NULL) { Py_DECREF(exit); @@ -2790,7 +2790,7 @@ dummy_func( GOTO_ERROR(error); } DECREF_INPUTS(); - res = _PyObject_CallNoArgsTstate(tstate, enter); + res = PyObject_CallNoArgs(enter); Py_DECREF(enter); if (res == NULL) { Py_DECREF(exit); @@ -3822,9 +3822,9 @@ dummy_func( } inst(CONVERT_VALUE, (value -- result)) { - convertion_func_ptr conv_fn; + conversion_func conv_fn; assert(oparg >= FVC_STR && oparg <= FVC_ASCII); - conv_fn = CONVERSION_FUNCTIONS[oparg]; + conv_fn = _PyEval_ConversionFuncs[oparg]; result = conv_fn(value); Py_DECREF(value); ERROR_IF(result == NULL, error); diff --git a/Python/ceval.c b/Python/ceval.c index 41e9310938d..34f286e4e17 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -337,6 +337,12 @@ const binaryfunc _PyEval_BinaryOps[] = { [NB_INPLACE_XOR] = PyNumber_InPlaceXor, }; +const conversion_func _PyEval_ConversionFuncs[4] = { + [FVC_STR] = PyObject_Str, + [FVC_REPR] = PyObject_Repr, + [FVC_ASCII] = PyObject_ASCII +}; + // PEP 634: Structural Pattern Matching diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 6ad41950ea3..9674a7824a4 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -352,13 +352,6 @@ do { \ } \ } while (0); -typedef PyObject *(*convertion_func_ptr)(PyObject *); - -static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = { - [FVC_STR] = PyObject_Str, - [FVC_REPR] = PyObject_Repr, - [FVC_ASCII] = PyObject_ASCII -}; // GH-89279: Force inlining by using a macro. #if defined(_MSC_VER) && SIZEOF_INT == 4 diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 20fab8f4c61..9ec1be9076a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2548,7 +2548,7 @@ GOTO_ERROR(error); } Py_DECREF(mgr); - res = _PyObject_CallNoArgsTstate(tstate, enter); + res = PyObject_CallNoArgs(enter); Py_DECREF(enter); if (res == NULL) { Py_DECREF(exit); @@ -2591,7 +2591,7 @@ GOTO_ERROR(error); } Py_DECREF(mgr); - res = _PyObject_CallNoArgsTstate(tstate, enter); + res = PyObject_CallNoArgs(enter); Py_DECREF(enter); if (res == NULL) { Py_DECREF(exit); @@ -3570,9 +3570,9 @@ PyObject *result; oparg = CURRENT_OPARG(); value = stack_pointer[-1]; - convertion_func_ptr conv_fn; + conversion_func conv_fn; assert(oparg >= FVC_STR && oparg <= FVC_ASCII); - conv_fn = CONVERSION_FUNCTIONS[oparg]; + conv_fn = _PyEval_ConversionFuncs[oparg]; result = conv_fn(value); Py_DECREF(value); if (result == NULL) goto pop_1_error_tier_two; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index bb26dac0e2b..3312078e9a2 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -40,7 +40,7 @@ GOTO_ERROR(error); } Py_DECREF(mgr); - res = _PyObject_CallNoArgsTstate(tstate, enter); + res = PyObject_CallNoArgs(enter); Py_DECREF(enter); if (res == NULL) { Py_DECREF(exit); @@ -86,7 +86,7 @@ GOTO_ERROR(error); } Py_DECREF(mgr); - res = _PyObject_CallNoArgsTstate(tstate, enter); + res = PyObject_CallNoArgs(enter); Py_DECREF(enter); if (res == NULL) { Py_DECREF(exit); @@ -2140,9 +2140,9 @@ PyObject *value; PyObject *result; value = stack_pointer[-1]; - convertion_func_ptr conv_fn; + conversion_func conv_fn; assert(oparg >= FVC_STR && oparg <= FVC_ASCII); - conv_fn = CONVERSION_FUNCTIONS[oparg]; + conv_fn = _PyEval_ConversionFuncs[oparg]; result = conv_fn(value); Py_DECREF(value); if (result == NULL) goto pop_1_error; diff --git a/Python/jit.c b/Python/jit.c index ac2c60ed925..9f9e123ab91 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -203,13 +203,14 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) *loc32 = (uint32_t)value; continue; case HoleKind_ARM64_RELOC_UNSIGNED: - case HoleKind_IMAGE_REL_AMD64_ADDR64: case HoleKind_R_AARCH64_ABS64: case HoleKind_X86_64_RELOC_UNSIGNED: case HoleKind_R_X86_64_64: // 64-bit absolute address. *loc64 = value; continue; + case HoleKind_IMAGE_REL_AMD64_REL32: + case HoleKind_IMAGE_REL_I386_REL32: case HoleKind_R_X86_64_GOTPCRELX: case HoleKind_R_X86_64_REX_GOTPCRELX: case HoleKind_X86_64_RELOC_GOT: @@ -249,7 +250,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) // Check that we're not out of range of 32 signed bits: assert((int64_t)value >= -(1LL << 31)); assert((int64_t)value < (1LL << 31)); - loc32[0] = (uint32_t)value; + *loc32 = (uint32_t)value; continue; case HoleKind_R_AARCH64_CALL26: case HoleKind_R_AARCH64_JUMP26: @@ -307,23 +308,23 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) next_hole->addend == hole->addend && next_hole->value == hole->value) { - unsigned char rd = get_bits(loc32[0], 0, 5); + unsigned char reg = get_bits(loc32[0], 0, 5); assert(IS_AARCH64_LDR_OR_STR(loc32[1])); - unsigned char rt = get_bits(loc32[1], 0, 5); - unsigned char rn = get_bits(loc32[1], 5, 5); - assert(rd == rn && rn == rt); + // There should be only one register involved: + assert(reg == get_bits(loc32[1], 0, 5)); // ldr's output register. + assert(reg == get_bits(loc32[1], 5, 5)); // ldr's input register. uint64_t relaxed = *(uint64_t *)value; if (relaxed < (1UL << 16)) { // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; nop - loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; + loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg; loc32[1] = 0xD503201F; i++; continue; } if (relaxed < (1ULL << 32)) { // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; movk reg, YYY - loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; - loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | rd; + loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg; + loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | reg; i++; continue; } @@ -332,13 +333,15 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) (int64_t)relaxed >= -(1L << 19) && (int64_t)relaxed < (1L << 19)) { - // adrp reg, AAA; ldr reg, [reg + BBB] -> ldr x0, XXX; nop - loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | rd; + // adrp reg, AAA; ldr reg, [reg + BBB] -> ldr reg, XXX; nop + loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | reg; loc32[1] = 0xD503201F; i++; continue; } } + // Fall through... + case HoleKind_ARM64_RELOC_PAGE21: // Number of pages between this page and the value's page: value = (value >> 12) - ((uint64_t)location >> 12); // Check that we're not out of range of 21 signed bits: @@ -350,6 +353,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches) set_bits(loc32, 5, value, 2, 19); continue; case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12: + case HoleKind_ARM64_RELOC_PAGEOFF12: case HoleKind_R_AARCH64_LD64_GOT_LO12_NC: // 12-bit low part of an absolute address. Pairs nicely with // ARM64_RELOC_GOT_LOAD_PAGE21 (above). diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py index 975ca650a13..14e5fc2aae8 100644 --- a/Tools/jit/_schema.py +++ b/Tools/jit/_schema.py @@ -4,9 +4,12 @@ import typing HoleKind: typing.TypeAlias = typing.Literal[ "ARM64_RELOC_GOT_LOAD_PAGE21", "ARM64_RELOC_GOT_LOAD_PAGEOFF12", + "ARM64_RELOC_PAGE21", + "ARM64_RELOC_PAGEOFF12", "ARM64_RELOC_UNSIGNED", - "IMAGE_REL_AMD64_ADDR64", + "IMAGE_REL_AMD64_REL32", "IMAGE_REL_I386_DIR32", + "IMAGE_REL_I386_REL32", "R_AARCH64_ABS64", "R_AARCH64_ADR_GOT_PAGE", "R_AARCH64_CALL26", diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 71c678e04fb..eddec731984 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -96,7 +96,7 @@ class Stencil: instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF self.body[where] = instruction.to_bytes(4, sys.byteorder) self.disassembly += [ - f"{base + 4 * 0: x}: d2800008 mov x8, #0x0", + f"{base + 4 * 0:x}: d2800008 mov x8, #0x0", f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}", f"{base + 4 * 1:x}: f2a00008 movk x8, #0x0, lsl #16", f"{base + 4 * 1:016x}: R_AARCH64_MOVW_UABS_G1_NC {hole.symbol}", @@ -162,6 +162,13 @@ class StencilGroup: ): self.code.emit_aarch64_trampoline(hole) continue + elif ( + hole.kind in {"IMAGE_REL_AMD64_REL32"} + and hole.value is HoleValue.ZERO + ): + raise ValueError( + f"Add PyAPI_FUNC(...) or PyAPI_DATA(...) to declaration of {hole.symbol}!" + ) holes.append(hole) stencil.holes[:] = holes self.code.pad(alignment) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 06dc4e7acc6..07959b15b6c 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -106,7 +106,7 @@ class _Target(typing.Generic[_S, _R]): o = tempdir / f"{opname}.o" args = [ f"--target={self.triple}", - "-DPy_BUILD_CORE", + "-DPy_BUILD_CORE_MODULE", "-D_DEBUG" if self.debug else "-DNDEBUG", f"-D_JIT_OPCODE={opname}", "-D_PyJIT_ACTIVE", @@ -118,12 +118,17 @@ class _Target(typing.Generic[_S, _R]): f"-I{CPYTHON / 'Python'}", "-O3", "-c", + # This debug info isn't necessary, and bloats out the JIT'ed code. + # We *may* be able to re-enable this, process it, and JIT it for a + # nicer debugging experience... but that needs a lot more research: "-fno-asynchronous-unwind-tables", + # Don't call built-in functions that we can't find or patch: "-fno-builtin", - # SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds: - "-fno-jump-tables", + # Emit relaxable 64-bit calls/jumps, so we don't have to worry about + # about emitting in-range trampolines for out-of-range targets. + # We can probably remove this and emit trampolines in the future: "-fno-plt", - # Don't make calls to weird stack-smashing canaries: + # Don't call stack-smashing canaries that we can't find or patch: "-fno-stack-protector", "-o", f"{o}", @@ -194,34 +199,45 @@ class _COFF( offset = base + symbol["Value"] name = symbol["Name"] name = name.removeprefix(self.prefix) - group.symbols[name] = value, offset + if name not in group.symbols: + group.symbols[name] = value, offset for wrapped_relocation in section["Relocations"]: relocation = wrapped_relocation["Relocation"] hole = self._handle_relocation(base, relocation, stencil.body) stencil.holes.append(hole) + def _unwrap_dllimport(self, name: str) -> tuple[_stencils.HoleValue, str | None]: + if name.startswith("__imp_"): + name = name.removeprefix("__imp_") + name = name.removeprefix(self.prefix) + return _stencils.HoleValue.GOT, name + name = name.removeprefix(self.prefix) + return _stencils.symbol_to_value(name) + def _handle_relocation( self, base: int, relocation: _schema.COFFRelocation, raw: bytes ) -> _stencils.Hole: match relocation: - case { - "Offset": offset, - "Symbol": s, - "Type": {"Value": "IMAGE_REL_AMD64_ADDR64" as kind}, - }: - offset += base - s = s.removeprefix(self.prefix) - value, symbol = _stencils.symbol_to_value(s) - addend = int.from_bytes(raw[offset : offset + 8], "little") case { "Offset": offset, "Symbol": s, "Type": {"Value": "IMAGE_REL_I386_DIR32" as kind}, }: offset += base - s = s.removeprefix(self.prefix) - value, symbol = _stencils.symbol_to_value(s) + value, symbol = self._unwrap_dllimport(s) addend = int.from_bytes(raw[offset : offset + 4], "little") + case { + "Offset": offset, + "Symbol": s, + "Type": { + "Value": "IMAGE_REL_AMD64_REL32" | "IMAGE_REL_I386_REL32" as kind + }, + }: + offset += base + value, symbol = self._unwrap_dllimport(s) + addend = ( + int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 + ) case _: raise NotImplementedError(relocation) return _stencils.Hole(offset, kind, value, symbol, addend) @@ -423,12 +439,12 @@ def get_target(host: str) -> _COFF | _ELF | _MachO: args = ["-mcmodel=large"] return _ELF(host, alignment=8, args=args) if re.fullmatch(r"i686-pc-windows-msvc", host): - args = ["-mcmodel=large"] + args = ["-DPy_NO_ENABLE_SHARED"] return _COFF(host, args=args, prefix="_") if re.fullmatch(r"x86_64-apple-darwin.*", host): return _MachO(host, prefix="_") if re.fullmatch(r"x86_64-pc-windows-msvc", host): - args = ["-mcmodel=large"] + args = ["-fms-runtime-lib=dll"] return _COFF(host, args=args) if re.fullmatch(r"x86_64-.*-linux-gnu", host): return _ELF(host) diff --git a/Tools/jit/template.c b/Tools/jit/template.c index d79c6efb8f6..8aaf4581de3 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -9,6 +9,7 @@ #include "pycore_long.h" #include "pycore_opcode_metadata.h" #include "pycore_opcode_utils.h" +#include "pycore_optimizer.h" #include "pycore_range.h" #include "pycore_setobject.h" #include "pycore_sliceobject.h" @@ -58,11 +59,11 @@ do { \ } while (0) #define PATCH_VALUE(TYPE, NAME, ALIAS) \ - extern void ALIAS; \ + PyAPI_DATA(void) ALIAS; \ TYPE NAME = (TYPE)(uint64_t)&ALIAS; #define PATCH_JUMP(ALIAS) \ - extern void ALIAS; \ + PyAPI_DATA(void) ALIAS; \ __attribute__((musttail)) \ return ((jit_func)&ALIAS)(frame, stack_pointer, tstate);