GH-115802: JIT "small" code for Windows (GH-115964)

This commit is contained in:
Brandt Bucher 2024-02-29 08:11:28 -08:00 committed by GitHub
parent 45d8871dc4
commit f0df35eeca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 129 additions and 96 deletions

View File

@ -92,9 +92,6 @@ PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset); PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset);
int
_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr);
void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
void _Py_ExecutorClear(_PyExecutorObject *); void _Py_ExecutorClear(_PyExecutorObject *);
void _Py_BloomFilter_Init(_PyBloomFilter *); void _Py_BloomFilter_Init(_PyBloomFilter *);

View File

@ -181,22 +181,26 @@ extern PyObject* _Py_MakeCoro(PyFunctionObject *func);
/* Handle signals, pending calls, GIL drop request /* Handle signals, pending calls, GIL drop request
and asynchronous exception */ and asynchronous exception */
extern int _Py_HandlePending(PyThreadState *tstate); PyAPI_FUNC(int) _Py_HandlePending(PyThreadState *tstate);
extern PyObject * _PyEval_GetFrameLocals(void); extern PyObject * _PyEval_GetFrameLocals(void);
extern const binaryfunc _PyEval_BinaryOps[]; typedef PyObject *(*conversion_func)(PyObject *);
int _PyEval_CheckExceptStarTypeValid(PyThreadState *tstate, PyObject* right);
int _PyEval_CheckExceptTypeValid(PyThreadState *tstate, PyObject* right); PyAPI_DATA(const binaryfunc) _PyEval_BinaryOps[];
int _PyEval_ExceptionGroupMatch(PyObject* exc_value, PyObject *match_type, PyObject **match, PyObject **rest); PyAPI_DATA(const conversion_func) _PyEval_ConversionFuncs[];
void _PyEval_FormatAwaitableError(PyThreadState *tstate, PyTypeObject *type, int oparg);
void _PyEval_FormatExcCheckArg(PyThreadState *tstate, PyObject *exc, const char *format_str, PyObject *obj); PyAPI_FUNC(int) _PyEval_CheckExceptStarTypeValid(PyThreadState *tstate, PyObject* right);
void _PyEval_FormatExcUnbound(PyThreadState *tstate, PyCodeObject *co, int oparg); PyAPI_FUNC(int) _PyEval_CheckExceptTypeValid(PyThreadState *tstate, PyObject* right);
void _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject *kwargs); PyAPI_FUNC(int) _PyEval_ExceptionGroupMatch(PyObject* exc_value, PyObject *match_type, PyObject **match, PyObject **rest);
PyObject *_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs); PyAPI_FUNC(void) _PyEval_FormatAwaitableError(PyThreadState *tstate, PyTypeObject *type, int oparg);
PyObject *_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys); PyAPI_FUNC(void) _PyEval_FormatExcCheckArg(PyThreadState *tstate, PyObject *exc, const char *format_str, PyObject *obj);
int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, PyObject **sp); PyAPI_FUNC(void) _PyEval_FormatExcUnbound(PyThreadState *tstate, PyCodeObject *co, int oparg);
void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); PyAPI_FUNC(void) _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject *kwargs);
PyAPI_FUNC(PyObject *)_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs);
PyAPI_FUNC(PyObject *)_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys);
PyAPI_FUNC(int) _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, PyObject **sp);
PyAPI_FUNC(void) _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
/* Bits that can be set in PyThreadState.eval_breaker */ /* Bits that can be set in PyThreadState.eval_breaker */

View File

@ -52,7 +52,7 @@ PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *);
of a key wins, if override is 2, a KeyError with conflicting key as of a key wins, if override is 2, a KeyError with conflicting key as
argument is raised. argument is raised.
*/ */
extern int _PyDict_MergeEx(PyObject *mp, PyObject *other, int override); PyAPI_FUNC(int) _PyDict_MergeEx(PyObject *mp, PyObject *other, int override);
extern void _PyDict_DebugMallocStats(FILE *out); extern void _PyDict_DebugMallocStats(FILE *out);
@ -100,10 +100,10 @@ extern Py_ssize_t _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t has
extern Py_ssize_t _PyDict_LookupIndex(PyDictObject *, PyObject *); extern Py_ssize_t _PyDict_LookupIndex(PyDictObject *, PyObject *);
extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key); extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key);
extern PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *); PyAPI_FUNC(PyObject *)_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
/* Consumes references to key and value */ /* Consumes references to key and value */
extern int _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value); PyAPI_FUNC(int) _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value);
extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value); extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
extern int _PyDict_Pop_KnownHash( extern int _PyDict_Pop_KnownHash(
@ -247,8 +247,8 @@ _PyDict_NotifyEvent(PyInterpreterState *interp,
} }
extern PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values); extern PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values);
extern bool _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv); PyAPI_FUNC(bool) _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv);
extern PyObject *_PyDict_FromItems( PyAPI_FUNC(PyObject *)_PyDict_FromItems(
PyObject *const *keys, Py_ssize_t keys_offset, PyObject *const *keys, Py_ssize_t keys_offset,
PyObject *const *values, Py_ssize_t values_offset, PyObject *const *values, Py_ssize_t values_offset,
Py_ssize_t length); Py_ssize_t length);

View File

@ -34,7 +34,7 @@ struct _Py_float_runtime_state {
void _PyFloat_ExactDealloc(PyObject *op); PyAPI_FUNC(void) _PyFloat_ExactDealloc(PyObject *op);
extern void _PyFloat_DebugMallocStats(FILE* out); extern void _PyFloat_DebugMallocStats(FILE* out);

View File

@ -29,7 +29,7 @@ struct _py_func_state {
extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr); extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr);
extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func); extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func);
extern void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version); PyAPI_FUNC(void) _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version);
PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version); PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version);
extern PyObject *_Py_set_function_type_params( extern PyObject *_Py_set_function_type_params(

View File

@ -10,7 +10,7 @@ extern "C" {
#include "pycore_freelist.h" #include "pycore_freelist.h"
extern PyObject *_PyGen_yf(PyGenObject *); PyAPI_FUNC(PyObject *)_PyGen_yf(PyGenObject *);
extern void _PyGen_Finalize(PyObject *self); extern void _PyGen_Finalize(PyObject *self);
// Export for '_asyncio' shared extension // Export for '_asyncio' shared extension
@ -19,7 +19,7 @@ PyAPI_FUNC(int) _PyGen_SetStopIterationValue(PyObject *);
// Export for '_asyncio' shared extension // Export for '_asyncio' shared extension
PyAPI_FUNC(int) _PyGen_FetchStopIterationValue(PyObject **); PyAPI_FUNC(int) _PyGen_FetchStopIterationValue(PyObject **);
extern PyObject *_PyCoro_GetAwaitableIter(PyObject *o); PyAPI_FUNC(PyObject *)_PyCoro_GetAwaitableIter(PyObject *o);
extern PyObject *_PyAsyncGenValueWrapperNew(PyThreadState *state, PyObject *); extern PyObject *_PyAsyncGenValueWrapperNew(PyThreadState *state, PyObject *);
extern PyTypeObject _PyCoroWrapper_Type; extern PyTypeObject _PyCoroWrapper_Type;

View File

@ -44,7 +44,7 @@ typedef struct {
const char *name; const char *name;
} intrinsic_func2_info; } intrinsic_func2_info;
extern const intrinsic_func1_info _PyIntrinsics_UnaryFunctions[]; PyAPI_DATA(const intrinsic_func1_info) _PyIntrinsics_UnaryFunctions[];
extern const intrinsic_func2_info _PyIntrinsics_BinaryFunctions[]; PyAPI_DATA(const intrinsic_func2_info) _PyIntrinsics_BinaryFunctions[];
#endif // !Py_INTERNAL_INTRINSIC_H #endif // !Py_INTERNAL_INTRINSIC_H

View File

@ -10,12 +10,12 @@ extern "C" {
#include "pycore_freelist.h" // _PyFreeListState #include "pycore_freelist.h" // _PyFreeListState
extern PyObject* _PyList_Extend(PyListObject *, PyObject *); PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *);
extern void _PyList_DebugMallocStats(FILE *out); extern void _PyList_DebugMallocStats(FILE *out);
#define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item) #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item)
extern int PyAPI_FUNC(int)
_PyList_AppendTakeRefListResize(PyListObject *self, PyObject *newitem); _PyList_AppendTakeRefListResize(PyListObject *self, PyObject *newitem);
// In free-threaded build: self should be locked by the caller, if it should be thread-safe. // In free-threaded build: self should be locked by the caller, if it should be thread-safe.
@ -54,7 +54,7 @@ typedef struct {
PyListObject *it_seq; /* Set to NULL when iterator is exhausted */ PyListObject *it_seq; /* Set to NULL when iterator is exhausted */
} _PyListIterObject; } _PyListIterObject;
extern PyObject *_PyList_FromArraySteal(PyObject *const *src, Py_ssize_t n); PyAPI_FUNC(PyObject *)_PyList_FromArraySteal(PyObject *const *src, Py_ssize_t n);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -121,9 +121,9 @@ PyAPI_DATA(PyObject*) _PyLong_Rshift(PyObject *, size_t);
// Export for 'math' shared extension // Export for 'math' shared extension
PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, size_t); PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, size_t);
extern PyObject* _PyLong_Add(PyLongObject *left, PyLongObject *right); PyAPI_FUNC(PyObject*) _PyLong_Add(PyLongObject *left, PyLongObject *right);
extern PyObject* _PyLong_Multiply(PyLongObject *left, PyLongObject *right); PyAPI_FUNC(PyObject*) _PyLong_Multiply(PyLongObject *left, PyLongObject *right);
extern PyObject* _PyLong_Subtract(PyLongObject *left, PyLongObject *right); PyAPI_FUNC(PyObject*) _PyLong_Subtract(PyLongObject *left, PyLongObject *right);
// Export for 'binascii' shared extension. // Export for 'binascii' shared extension.
PyAPI_DATA(unsigned char) _PyLong_DigitValue[256]; PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];

View File

@ -73,7 +73,7 @@ PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *);
.ob_size = size \ .ob_size = size \
} }
extern void _Py_NO_RETURN _Py_FatalRefcountErrorFunc( PyAPI_FUNC(void) _Py_NO_RETURN _Py_FatalRefcountErrorFunc(
const char *func, const char *func,
const char *message); const char *message);
@ -684,7 +684,7 @@ PyAPI_FUNC(PyObject*) _PyObject_LookupSpecial(PyObject *, PyObject *);
extern int _PyObject_IsAbstract(PyObject *); extern int _PyObject_IsAbstract(PyObject *);
extern int _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method); PyAPI_FUNC(int) _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method);
extern PyObject* _PyObject_NextNotImplemented(PyObject *); extern PyObject* _PyObject_NextNotImplemented(PyObject *);
// Pickle support. // Pickle support.

View File

@ -111,6 +111,8 @@ extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx);
PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -95,7 +95,7 @@ extern void _PyErr_Fetch(
extern PyObject* _PyErr_GetRaisedException(PyThreadState *tstate); extern PyObject* _PyErr_GetRaisedException(PyThreadState *tstate);
extern int _PyErr_ExceptionMatches( PyAPI_FUNC(int) _PyErr_ExceptionMatches(
PyThreadState *tstate, PyThreadState *tstate,
PyObject *exc); PyObject *exc);
@ -114,18 +114,18 @@ extern void _PyErr_SetObject(
extern void _PyErr_ChainStackItem(void); extern void _PyErr_ChainStackItem(void);
extern void _PyErr_Clear(PyThreadState *tstate); PyAPI_FUNC(void) _PyErr_Clear(PyThreadState *tstate);
extern void _PyErr_SetNone(PyThreadState *tstate, PyObject *exception); extern void _PyErr_SetNone(PyThreadState *tstate, PyObject *exception);
extern PyObject* _PyErr_NoMemory(PyThreadState *tstate); extern PyObject* _PyErr_NoMemory(PyThreadState *tstate);
extern void _PyErr_SetString( PyAPI_FUNC(void) _PyErr_SetString(
PyThreadState *tstate, PyThreadState *tstate,
PyObject *exception, PyObject *exception,
const char *string); const char *string);
extern PyObject* _PyErr_Format( PyAPI_FUNC(PyObject*) _PyErr_Format(
PyThreadState *tstate, PyThreadState *tstate,
PyObject *exception, PyObject *exception,
const char *format, const char *format,

View File

@ -11,7 +11,7 @@ extern "C" {
/* runtime lifecycle */ /* runtime lifecycle */
extern PyObject * PyAPI_FUNC(PyObject *)
_PyBuildSlice_ConsumeRefs(PyObject *start, PyObject *stop); _PyBuildSlice_ConsumeRefs(PyObject *start, PyObject *stop);
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -21,7 +21,7 @@ extern PyStatus _PyTuple_InitGlobalObjects(PyInterpreterState *);
#define _PyTuple_ITEMS(op) _Py_RVALUE(_PyTuple_CAST(op)->ob_item) #define _PyTuple_ITEMS(op) _Py_RVALUE(_PyTuple_CAST(op)->ob_item)
extern PyObject *_PyTuple_FromArray(PyObject *const *, Py_ssize_t); extern PyObject *_PyTuple_FromArray(PyObject *const *, Py_ssize_t);
extern PyObject *_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t); PyAPI_FUNC(PyObject *)_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t);
typedef struct { typedef struct {
PyObject_HEAD PyObject_HEAD

View File

@ -147,7 +147,7 @@ extern PyObject* _Py_slot_tp_getattr_hook(PyObject *self, PyObject *name);
extern PyTypeObject _PyBufferWrapper_Type; extern PyTypeObject _PyBufferWrapper_Type;
extern PyObject* _PySuper_Lookup(PyTypeObject *su_type, PyObject *su_obj, PyAPI_FUNC(PyObject*) _PySuper_Lookup(PyTypeObject *su_type, PyObject *su_obj,
PyObject *name, int *meth_found); PyObject *name, int *meth_found);

View File

@ -31,7 +31,7 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
PyObject *op, PyObject *op,
int check_content); int check_content);
extern void _PyUnicode_ExactDealloc(PyObject *op); PyAPI_FUNC(void) _PyUnicode_ExactDealloc(PyObject *op);
extern Py_ssize_t _PyUnicode_InternedSize(void); extern Py_ssize_t _PyUnicode_InternedSize(void);
// Get a copy of a Unicode string. // Get a copy of a Unicode string.
@ -202,7 +202,7 @@ PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
/* --- Methods & Slots ---------------------------------------------------- */ /* --- Methods & Slots ---------------------------------------------------- */
extern PyObject* _PyUnicode_JoinArray( PyAPI_FUNC(PyObject*) _PyUnicode_JoinArray(
PyObject *separator, PyObject *separator,
PyObject *const *items, PyObject *const *items,
Py_ssize_t seqlen Py_ssize_t seqlen

View File

@ -2755,7 +2755,7 @@ dummy_func(
GOTO_ERROR(error); GOTO_ERROR(error);
} }
DECREF_INPUTS(); DECREF_INPUTS();
res = _PyObject_CallNoArgsTstate(tstate, enter); res = PyObject_CallNoArgs(enter);
Py_DECREF(enter); Py_DECREF(enter);
if (res == NULL) { if (res == NULL) {
Py_DECREF(exit); Py_DECREF(exit);
@ -2790,7 +2790,7 @@ dummy_func(
GOTO_ERROR(error); GOTO_ERROR(error);
} }
DECREF_INPUTS(); DECREF_INPUTS();
res = _PyObject_CallNoArgsTstate(tstate, enter); res = PyObject_CallNoArgs(enter);
Py_DECREF(enter); Py_DECREF(enter);
if (res == NULL) { if (res == NULL) {
Py_DECREF(exit); Py_DECREF(exit);
@ -3822,9 +3822,9 @@ dummy_func(
} }
inst(CONVERT_VALUE, (value -- result)) { inst(CONVERT_VALUE, (value -- result)) {
convertion_func_ptr conv_fn; conversion_func conv_fn;
assert(oparg >= FVC_STR && oparg <= FVC_ASCII); assert(oparg >= FVC_STR && oparg <= FVC_ASCII);
conv_fn = CONVERSION_FUNCTIONS[oparg]; conv_fn = _PyEval_ConversionFuncs[oparg];
result = conv_fn(value); result = conv_fn(value);
Py_DECREF(value); Py_DECREF(value);
ERROR_IF(result == NULL, error); ERROR_IF(result == NULL, error);

View File

@ -337,6 +337,12 @@ const binaryfunc _PyEval_BinaryOps[] = {
[NB_INPLACE_XOR] = PyNumber_InPlaceXor, [NB_INPLACE_XOR] = PyNumber_InPlaceXor,
}; };
const conversion_func _PyEval_ConversionFuncs[4] = {
[FVC_STR] = PyObject_Str,
[FVC_REPR] = PyObject_Repr,
[FVC_ASCII] = PyObject_ASCII
};
// PEP 634: Structural Pattern Matching // PEP 634: Structural Pattern Matching

View File

@ -352,13 +352,6 @@ do { \
} \ } \
} while (0); } while (0);
typedef PyObject *(*convertion_func_ptr)(PyObject *);
static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = {
[FVC_STR] = PyObject_Str,
[FVC_REPR] = PyObject_Repr,
[FVC_ASCII] = PyObject_ASCII
};
// GH-89279: Force inlining by using a macro. // GH-89279: Force inlining by using a macro.
#if defined(_MSC_VER) && SIZEOF_INT == 4 #if defined(_MSC_VER) && SIZEOF_INT == 4

View File

@ -2548,7 +2548,7 @@
GOTO_ERROR(error); GOTO_ERROR(error);
} }
Py_DECREF(mgr); Py_DECREF(mgr);
res = _PyObject_CallNoArgsTstate(tstate, enter); res = PyObject_CallNoArgs(enter);
Py_DECREF(enter); Py_DECREF(enter);
if (res == NULL) { if (res == NULL) {
Py_DECREF(exit); Py_DECREF(exit);
@ -2591,7 +2591,7 @@
GOTO_ERROR(error); GOTO_ERROR(error);
} }
Py_DECREF(mgr); Py_DECREF(mgr);
res = _PyObject_CallNoArgsTstate(tstate, enter); res = PyObject_CallNoArgs(enter);
Py_DECREF(enter); Py_DECREF(enter);
if (res == NULL) { if (res == NULL) {
Py_DECREF(exit); Py_DECREF(exit);
@ -3570,9 +3570,9 @@
PyObject *result; PyObject *result;
oparg = CURRENT_OPARG(); oparg = CURRENT_OPARG();
value = stack_pointer[-1]; value = stack_pointer[-1];
convertion_func_ptr conv_fn; conversion_func conv_fn;
assert(oparg >= FVC_STR && oparg <= FVC_ASCII); assert(oparg >= FVC_STR && oparg <= FVC_ASCII);
conv_fn = CONVERSION_FUNCTIONS[oparg]; conv_fn = _PyEval_ConversionFuncs[oparg];
result = conv_fn(value); result = conv_fn(value);
Py_DECREF(value); Py_DECREF(value);
if (result == NULL) goto pop_1_error_tier_two; if (result == NULL) goto pop_1_error_tier_two;

View File

@ -40,7 +40,7 @@
GOTO_ERROR(error); GOTO_ERROR(error);
} }
Py_DECREF(mgr); Py_DECREF(mgr);
res = _PyObject_CallNoArgsTstate(tstate, enter); res = PyObject_CallNoArgs(enter);
Py_DECREF(enter); Py_DECREF(enter);
if (res == NULL) { if (res == NULL) {
Py_DECREF(exit); Py_DECREF(exit);
@ -86,7 +86,7 @@
GOTO_ERROR(error); GOTO_ERROR(error);
} }
Py_DECREF(mgr); Py_DECREF(mgr);
res = _PyObject_CallNoArgsTstate(tstate, enter); res = PyObject_CallNoArgs(enter);
Py_DECREF(enter); Py_DECREF(enter);
if (res == NULL) { if (res == NULL) {
Py_DECREF(exit); Py_DECREF(exit);
@ -2140,9 +2140,9 @@
PyObject *value; PyObject *value;
PyObject *result; PyObject *result;
value = stack_pointer[-1]; value = stack_pointer[-1];
convertion_func_ptr conv_fn; conversion_func conv_fn;
assert(oparg >= FVC_STR && oparg <= FVC_ASCII); assert(oparg >= FVC_STR && oparg <= FVC_ASCII);
conv_fn = CONVERSION_FUNCTIONS[oparg]; conv_fn = _PyEval_ConversionFuncs[oparg];
result = conv_fn(value); result = conv_fn(value);
Py_DECREF(value); Py_DECREF(value);
if (result == NULL) goto pop_1_error; if (result == NULL) goto pop_1_error;

View File

@ -203,13 +203,14 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
*loc32 = (uint32_t)value; *loc32 = (uint32_t)value;
continue; continue;
case HoleKind_ARM64_RELOC_UNSIGNED: case HoleKind_ARM64_RELOC_UNSIGNED:
case HoleKind_IMAGE_REL_AMD64_ADDR64:
case HoleKind_R_AARCH64_ABS64: case HoleKind_R_AARCH64_ABS64:
case HoleKind_X86_64_RELOC_UNSIGNED: case HoleKind_X86_64_RELOC_UNSIGNED:
case HoleKind_R_X86_64_64: case HoleKind_R_X86_64_64:
// 64-bit absolute address. // 64-bit absolute address.
*loc64 = value; *loc64 = value;
continue; continue;
case HoleKind_IMAGE_REL_AMD64_REL32:
case HoleKind_IMAGE_REL_I386_REL32:
case HoleKind_R_X86_64_GOTPCRELX: case HoleKind_R_X86_64_GOTPCRELX:
case HoleKind_R_X86_64_REX_GOTPCRELX: case HoleKind_R_X86_64_REX_GOTPCRELX:
case HoleKind_X86_64_RELOC_GOT: case HoleKind_X86_64_RELOC_GOT:
@ -249,7 +250,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
// Check that we're not out of range of 32 signed bits: // Check that we're not out of range of 32 signed bits:
assert((int64_t)value >= -(1LL << 31)); assert((int64_t)value >= -(1LL << 31));
assert((int64_t)value < (1LL << 31)); assert((int64_t)value < (1LL << 31));
loc32[0] = (uint32_t)value; *loc32 = (uint32_t)value;
continue; continue;
case HoleKind_R_AARCH64_CALL26: case HoleKind_R_AARCH64_CALL26:
case HoleKind_R_AARCH64_JUMP26: case HoleKind_R_AARCH64_JUMP26:
@ -307,23 +308,23 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
next_hole->addend == hole->addend && next_hole->addend == hole->addend &&
next_hole->value == hole->value) next_hole->value == hole->value)
{ {
unsigned char rd = get_bits(loc32[0], 0, 5); unsigned char reg = get_bits(loc32[0], 0, 5);
assert(IS_AARCH64_LDR_OR_STR(loc32[1])); assert(IS_AARCH64_LDR_OR_STR(loc32[1]));
unsigned char rt = get_bits(loc32[1], 0, 5); // There should be only one register involved:
unsigned char rn = get_bits(loc32[1], 5, 5); assert(reg == get_bits(loc32[1], 0, 5)); // ldr's output register.
assert(rd == rn && rn == rt); assert(reg == get_bits(loc32[1], 5, 5)); // ldr's input register.
uint64_t relaxed = *(uint64_t *)value; uint64_t relaxed = *(uint64_t *)value;
if (relaxed < (1UL << 16)) { if (relaxed < (1UL << 16)) {
// adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; nop // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; nop
loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg;
loc32[1] = 0xD503201F; loc32[1] = 0xD503201F;
i++; i++;
continue; continue;
} }
if (relaxed < (1ULL << 32)) { if (relaxed < (1ULL << 32)) {
// adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; movk reg, YYY // adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; movk reg, YYY
loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd; loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg;
loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | rd; loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | reg;
i++; i++;
continue; continue;
} }
@ -332,13 +333,15 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
(int64_t)relaxed >= -(1L << 19) && (int64_t)relaxed >= -(1L << 19) &&
(int64_t)relaxed < (1L << 19)) (int64_t)relaxed < (1L << 19))
{ {
// adrp reg, AAA; ldr reg, [reg + BBB] -> ldr x0, XXX; nop // adrp reg, AAA; ldr reg, [reg + BBB] -> ldr reg, XXX; nop
loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | rd; loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | reg;
loc32[1] = 0xD503201F; loc32[1] = 0xD503201F;
i++; i++;
continue; continue;
} }
} }
// Fall through...
case HoleKind_ARM64_RELOC_PAGE21:
// Number of pages between this page and the value's page: // Number of pages between this page and the value's page:
value = (value >> 12) - ((uint64_t)location >> 12); value = (value >> 12) - ((uint64_t)location >> 12);
// Check that we're not out of range of 21 signed bits: // Check that we're not out of range of 21 signed bits:
@ -350,6 +353,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
set_bits(loc32, 5, value, 2, 19); set_bits(loc32, 5, value, 2, 19);
continue; continue;
case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12: case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12:
case HoleKind_ARM64_RELOC_PAGEOFF12:
case HoleKind_R_AARCH64_LD64_GOT_LO12_NC: case HoleKind_R_AARCH64_LD64_GOT_LO12_NC:
// 12-bit low part of an absolute address. Pairs nicely with // 12-bit low part of an absolute address. Pairs nicely with
// ARM64_RELOC_GOT_LOAD_PAGE21 (above). // ARM64_RELOC_GOT_LOAD_PAGE21 (above).

View File

@ -4,9 +4,12 @@ import typing
HoleKind: typing.TypeAlias = typing.Literal[ HoleKind: typing.TypeAlias = typing.Literal[
"ARM64_RELOC_GOT_LOAD_PAGE21", "ARM64_RELOC_GOT_LOAD_PAGE21",
"ARM64_RELOC_GOT_LOAD_PAGEOFF12", "ARM64_RELOC_GOT_LOAD_PAGEOFF12",
"ARM64_RELOC_PAGE21",
"ARM64_RELOC_PAGEOFF12",
"ARM64_RELOC_UNSIGNED", "ARM64_RELOC_UNSIGNED",
"IMAGE_REL_AMD64_ADDR64", "IMAGE_REL_AMD64_REL32",
"IMAGE_REL_I386_DIR32", "IMAGE_REL_I386_DIR32",
"IMAGE_REL_I386_REL32",
"R_AARCH64_ABS64", "R_AARCH64_ABS64",
"R_AARCH64_ADR_GOT_PAGE", "R_AARCH64_ADR_GOT_PAGE",
"R_AARCH64_CALL26", "R_AARCH64_CALL26",

View File

@ -96,7 +96,7 @@ class Stencil:
instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF
self.body[where] = instruction.to_bytes(4, sys.byteorder) self.body[where] = instruction.to_bytes(4, sys.byteorder)
self.disassembly += [ self.disassembly += [
f"{base + 4 * 0: x}: d2800008 mov x8, #0x0", f"{base + 4 * 0:x}: d2800008 mov x8, #0x0",
f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}", f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}",
f"{base + 4 * 1:x}: f2a00008 movk x8, #0x0, lsl #16", f"{base + 4 * 1:x}: f2a00008 movk x8, #0x0, lsl #16",
f"{base + 4 * 1:016x}: R_AARCH64_MOVW_UABS_G1_NC {hole.symbol}", f"{base + 4 * 1:016x}: R_AARCH64_MOVW_UABS_G1_NC {hole.symbol}",
@ -162,6 +162,13 @@ class StencilGroup:
): ):
self.code.emit_aarch64_trampoline(hole) self.code.emit_aarch64_trampoline(hole)
continue continue
elif (
hole.kind in {"IMAGE_REL_AMD64_REL32"}
and hole.value is HoleValue.ZERO
):
raise ValueError(
f"Add PyAPI_FUNC(...) or PyAPI_DATA(...) to declaration of {hole.symbol}!"
)
holes.append(hole) holes.append(hole)
stencil.holes[:] = holes stencil.holes[:] = holes
self.code.pad(alignment) self.code.pad(alignment)

View File

@ -106,7 +106,7 @@ class _Target(typing.Generic[_S, _R]):
o = tempdir / f"{opname}.o" o = tempdir / f"{opname}.o"
args = [ args = [
f"--target={self.triple}", f"--target={self.triple}",
"-DPy_BUILD_CORE", "-DPy_BUILD_CORE_MODULE",
"-D_DEBUG" if self.debug else "-DNDEBUG", "-D_DEBUG" if self.debug else "-DNDEBUG",
f"-D_JIT_OPCODE={opname}", f"-D_JIT_OPCODE={opname}",
"-D_PyJIT_ACTIVE", "-D_PyJIT_ACTIVE",
@ -118,12 +118,17 @@ class _Target(typing.Generic[_S, _R]):
f"-I{CPYTHON / 'Python'}", f"-I{CPYTHON / 'Python'}",
"-O3", "-O3",
"-c", "-c",
# This debug info isn't necessary, and bloats out the JIT'ed code.
# We *may* be able to re-enable this, process it, and JIT it for a
# nicer debugging experience... but that needs a lot more research:
"-fno-asynchronous-unwind-tables", "-fno-asynchronous-unwind-tables",
# Don't call built-in functions that we can't find or patch:
"-fno-builtin", "-fno-builtin",
# SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds: # Emit relaxable 64-bit calls/jumps, so we don't have to worry about
"-fno-jump-tables", # about emitting in-range trampolines for out-of-range targets.
# We can probably remove this and emit trampolines in the future:
"-fno-plt", "-fno-plt",
# Don't make calls to weird stack-smashing canaries: # Don't call stack-smashing canaries that we can't find or patch:
"-fno-stack-protector", "-fno-stack-protector",
"-o", "-o",
f"{o}", f"{o}",
@ -194,34 +199,45 @@ class _COFF(
offset = base + symbol["Value"] offset = base + symbol["Value"]
name = symbol["Name"] name = symbol["Name"]
name = name.removeprefix(self.prefix) name = name.removeprefix(self.prefix)
group.symbols[name] = value, offset if name not in group.symbols:
group.symbols[name] = value, offset
for wrapped_relocation in section["Relocations"]: for wrapped_relocation in section["Relocations"]:
relocation = wrapped_relocation["Relocation"] relocation = wrapped_relocation["Relocation"]
hole = self._handle_relocation(base, relocation, stencil.body) hole = self._handle_relocation(base, relocation, stencil.body)
stencil.holes.append(hole) stencil.holes.append(hole)
def _unwrap_dllimport(self, name: str) -> tuple[_stencils.HoleValue, str | None]:
if name.startswith("__imp_"):
name = name.removeprefix("__imp_")
name = name.removeprefix(self.prefix)
return _stencils.HoleValue.GOT, name
name = name.removeprefix(self.prefix)
return _stencils.symbol_to_value(name)
def _handle_relocation( def _handle_relocation(
self, base: int, relocation: _schema.COFFRelocation, raw: bytes self, base: int, relocation: _schema.COFFRelocation, raw: bytes
) -> _stencils.Hole: ) -> _stencils.Hole:
match relocation: match relocation:
case {
"Offset": offset,
"Symbol": s,
"Type": {"Value": "IMAGE_REL_AMD64_ADDR64" as kind},
}:
offset += base
s = s.removeprefix(self.prefix)
value, symbol = _stencils.symbol_to_value(s)
addend = int.from_bytes(raw[offset : offset + 8], "little")
case { case {
"Offset": offset, "Offset": offset,
"Symbol": s, "Symbol": s,
"Type": {"Value": "IMAGE_REL_I386_DIR32" as kind}, "Type": {"Value": "IMAGE_REL_I386_DIR32" as kind},
}: }:
offset += base offset += base
s = s.removeprefix(self.prefix) value, symbol = self._unwrap_dllimport(s)
value, symbol = _stencils.symbol_to_value(s)
addend = int.from_bytes(raw[offset : offset + 4], "little") addend = int.from_bytes(raw[offset : offset + 4], "little")
case {
"Offset": offset,
"Symbol": s,
"Type": {
"Value": "IMAGE_REL_AMD64_REL32" | "IMAGE_REL_I386_REL32" as kind
},
}:
offset += base
value, symbol = self._unwrap_dllimport(s)
addend = (
int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4
)
case _: case _:
raise NotImplementedError(relocation) raise NotImplementedError(relocation)
return _stencils.Hole(offset, kind, value, symbol, addend) return _stencils.Hole(offset, kind, value, symbol, addend)
@ -423,12 +439,12 @@ def get_target(host: str) -> _COFF | _ELF | _MachO:
args = ["-mcmodel=large"] args = ["-mcmodel=large"]
return _ELF(host, alignment=8, args=args) return _ELF(host, alignment=8, args=args)
if re.fullmatch(r"i686-pc-windows-msvc", host): if re.fullmatch(r"i686-pc-windows-msvc", host):
args = ["-mcmodel=large"] args = ["-DPy_NO_ENABLE_SHARED"]
return _COFF(host, args=args, prefix="_") return _COFF(host, args=args, prefix="_")
if re.fullmatch(r"x86_64-apple-darwin.*", host): if re.fullmatch(r"x86_64-apple-darwin.*", host):
return _MachO(host, prefix="_") return _MachO(host, prefix="_")
if re.fullmatch(r"x86_64-pc-windows-msvc", host): if re.fullmatch(r"x86_64-pc-windows-msvc", host):
args = ["-mcmodel=large"] args = ["-fms-runtime-lib=dll"]
return _COFF(host, args=args) return _COFF(host, args=args)
if re.fullmatch(r"x86_64-.*-linux-gnu", host): if re.fullmatch(r"x86_64-.*-linux-gnu", host):
return _ELF(host) return _ELF(host)

View File

@ -9,6 +9,7 @@
#include "pycore_long.h" #include "pycore_long.h"
#include "pycore_opcode_metadata.h" #include "pycore_opcode_metadata.h"
#include "pycore_opcode_utils.h" #include "pycore_opcode_utils.h"
#include "pycore_optimizer.h"
#include "pycore_range.h" #include "pycore_range.h"
#include "pycore_setobject.h" #include "pycore_setobject.h"
#include "pycore_sliceobject.h" #include "pycore_sliceobject.h"
@ -58,11 +59,11 @@ do { \
} while (0) } while (0)
#define PATCH_VALUE(TYPE, NAME, ALIAS) \ #define PATCH_VALUE(TYPE, NAME, ALIAS) \
extern void ALIAS; \ PyAPI_DATA(void) ALIAS; \
TYPE NAME = (TYPE)(uint64_t)&ALIAS; TYPE NAME = (TYPE)(uint64_t)&ALIAS;
#define PATCH_JUMP(ALIAS) \ #define PATCH_JUMP(ALIAS) \
extern void ALIAS; \ PyAPI_DATA(void) ALIAS; \
__attribute__((musttail)) \ __attribute__((musttail)) \
return ((jit_func)&ALIAS)(frame, stack_pointer, tstate); return ((jit_func)&ALIAS)(frame, stack_pointer, tstate);