GH-117512: Allow 64-bit JIT operands on 32-bit platforms (GH-117527)

This commit is contained in:
Brandt Bucher 2024-04-06 08:26:43 -07:00 committed by GitHub
parent df4d84c3cd
commit 62aeb0ee69
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 47 additions and 31 deletions

View File

@ -149,12 +149,12 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start,
// Fill all of stencil's holes in the memory pointed to by base, using the
// values in patches.
static void
patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
patch(unsigned char *base, const Stencil *stencil, uintptr_t patches[])
{
for (uint64_t i = 0; i < stencil->holes_size; i++) {
for (size_t i = 0; i < stencil->holes_size; i++) {
const Hole *hole = &stencil->holes[i];
unsigned char *location = base + hole->offset;
uint64_t value = patches[hole->value] + (uint64_t)hole->symbol + hole->addend;
uint64_t value = patches[hole->value] + (uintptr_t)hole->symbol + hole->addend;
uint8_t *loc8 = (uint8_t *)location;
uint32_t *loc32 = (uint32_t *)location;
uint64_t *loc64 = (uint64_t *)location;
@ -228,7 +228,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
case HoleKind_X86_64_RELOC_SIGNED:
case HoleKind_X86_64_RELOC_BRANCH:
// 32-bit relative address.
value -= (uint64_t)location;
value -= (uintptr_t)location;
// Check that we're not out of range of 32 signed bits:
assert((int64_t)value >= -(1LL << 31));
assert((int64_t)value < (1LL << 31));
@ -239,7 +239,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
case HoleKind_R_AARCH64_JUMP26:
// 28-bit relative branch.
assert(IS_AARCH64_BRANCH(*loc32));
value -= (uint64_t)location;
value -= (uintptr_t)location;
// Check that we're not out of range of 28 signed bits:
assert((int64_t)value >= -(1 << 27));
assert((int64_t)value < (1 << 27));
@ -313,7 +313,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
i++;
continue;
}
relaxed = (uint64_t)value - (uint64_t)location;
relaxed = value - (uintptr_t)location;
if ((relaxed & 0x3) == 0 &&
(int64_t)relaxed >= -(1L << 19) &&
(int64_t)relaxed < (1L << 19))
@ -328,7 +328,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
// Fall through...
case HoleKind_ARM64_RELOC_PAGE21:
// Number of pages between this page and the value's page:
value = (value >> 12) - ((uint64_t)location >> 12);
value = (value >> 12) - ((uintptr_t)location >> 12);
// Check that we're not out of range of 21 signed bits:
assert((int64_t)value >= -(1 << 20));
assert((int64_t)value < (1 << 20));
@ -363,14 +363,14 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
}
static void
copy_and_patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
copy_and_patch(unsigned char *base, const Stencil *stencil, uintptr_t patches[])
{
memcpy(base, stencil->body, stencil->body_size);
patch(base, stencil, patches);
}
static void
emit(const StencilGroup *group, uint64_t patches[])
emit(const StencilGroup *group, uintptr_t patches[])
{
copy_and_patch((unsigned char *)patches[HoleValue_DATA], &group->data, patches);
copy_and_patch((unsigned char *)patches[HoleValue_CODE], &group->code, patches);
@ -381,9 +381,9 @@ int
_PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length)
{
// Loop once to find the total compiled size:
uint32_t instruction_starts[UOP_MAX_TRACE_LENGTH];
uint32_t code_size = 0;
uint32_t data_size = 0;
size_t instruction_starts[UOP_MAX_TRACE_LENGTH];
size_t code_size = 0;
size_t data_size = 0;
for (size_t i = 0; i < length; i++) {
_PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
@ -409,14 +409,20 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size
for (size_t i = 0; i < length; i++) {
_PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
// Think of patches as a dictionary mapping HoleValue to uint64_t:
uint64_t patches[] = GET_PATCHES();
patches[HoleValue_CODE] = (uint64_t)code;
patches[HoleValue_CONTINUE] = (uint64_t)code + group->code.body_size;
patches[HoleValue_DATA] = (uint64_t)data;
patches[HoleValue_EXECUTOR] = (uint64_t)executor;
// Think of patches as a dictionary mapping HoleValue to uintptr_t:
uintptr_t patches[] = GET_PATCHES();
patches[HoleValue_CODE] = (uintptr_t)code;
patches[HoleValue_CONTINUE] = (uintptr_t)code + group->code.body_size;
patches[HoleValue_DATA] = (uintptr_t)data;
patches[HoleValue_EXECUTOR] = (uintptr_t)executor;
patches[HoleValue_OPARG] = instruction->oparg;
#if SIZEOF_VOID_P == 8
patches[HoleValue_OPERAND] = instruction->operand;
#else
assert(SIZEOF_VOID_P == 4);
patches[HoleValue_OPERAND_HI] = instruction->operand >> 32;
patches[HoleValue_OPERAND_LO] = instruction->operand & UINT32_MAX;
#endif
switch (instruction->format) {
case UOP_FORMAT_TARGET:
patches[HoleValue_TARGET] = instruction->target;
@ -425,21 +431,21 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size
assert(instruction->exit_index < executor->exit_count);
patches[HoleValue_EXIT_INDEX] = instruction->exit_index;
if (instruction->error_target < length) {
patches[HoleValue_ERROR_TARGET] = (uint64_t)memory + instruction_starts[instruction->error_target];
patches[HoleValue_ERROR_TARGET] = (uintptr_t)memory + instruction_starts[instruction->error_target];
}
break;
case UOP_FORMAT_JUMP:
assert(instruction->jump_target < length);
patches[HoleValue_JUMP_TARGET] = (uint64_t)memory + instruction_starts[instruction->jump_target];
patches[HoleValue_JUMP_TARGET] = (uintptr_t)memory + instruction_starts[instruction->jump_target];
if (instruction->error_target < length) {
patches[HoleValue_ERROR_TARGET] = (uint64_t)memory + instruction_starts[instruction->error_target];
patches[HoleValue_ERROR_TARGET] = (uintptr_t)memory + instruction_starts[instruction->error_target];
}
break;
default:
assert(0);
Py_FatalError("Illegal instruction format");
}
patches[HoleValue_TOP] = (uint64_t)memory + instruction_starts[1];
patches[HoleValue_TOP] = (uintptr_t)memory + instruction_starts[1];
patches[HoleValue_ZERO] = 0;
emit(group, patches);
code += group->code.body_size;
@ -447,12 +453,12 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size
}
// Protect against accidental buffer overrun into data:
const StencilGroup *group = &stencil_groups[_FATAL_ERROR];
uint64_t patches[] = GET_PATCHES();
patches[HoleValue_CODE] = (uint64_t)code;
patches[HoleValue_CONTINUE] = (uint64_t)code;
patches[HoleValue_DATA] = (uint64_t)data;
patches[HoleValue_EXECUTOR] = (uint64_t)executor;
patches[HoleValue_TOP] = (uint64_t)code;
uintptr_t patches[] = GET_PATCHES();
patches[HoleValue_CODE] = (uintptr_t)code;
patches[HoleValue_CONTINUE] = (uintptr_t)code;
patches[HoleValue_DATA] = (uintptr_t)data;
patches[HoleValue_EXECUTOR] = (uintptr_t)executor;
patches[HoleValue_TOP] = (uintptr_t)code;
patches[HoleValue_ZERO] = 0;
emit(group, patches);
code += group->code.body_size;

View File

@ -27,8 +27,11 @@ class HoleValue(enum.Enum):
GOT = enum.auto()
# The current uop's oparg (exposed as _JIT_OPARG):
OPARG = enum.auto()
# The current uop's operand (exposed as _JIT_OPERAND):
# The current uop's operand on 64-bit platforms (exposed as _JIT_OPERAND):
OPERAND = enum.auto()
# The current uop's operand on 32-bit platforms (exposed as _JIT_OPERAND_HI and _JIT_OPERAND_LO):
OPERAND_HI = enum.auto()
OPERAND_LO = enum.auto()
# The current uop's target (exposed as _JIT_TARGET):
TARGET = enum.auto()
# The base address of the machine code for the jump target (exposed as _JIT_JUMP_TARGET):

View File

@ -17,7 +17,7 @@ def _dump_header() -> typing.Iterator[str]:
yield "} HoleValue;"
yield ""
yield "typedef struct {"
yield " const uint64_t offset;"
yield " const size_t offset;"
yield " const HoleKind kind;"
yield " const HoleValue value;"
yield " const void *symbol;"
@ -58,7 +58,7 @@ def _dump_footer(opnames: typing.Iterable[str]) -> typing.Iterator[str]:
yield ""
yield "#define GET_PATCHES() { \\"
for value in _stencils.HoleValue:
yield f" [HoleValue_{value.name}] = (uint64_t)0xBADBADBADBADBADB, \\"
yield f" [HoleValue_{value.name}] = (uintptr_t)0xBADBADBADBADBADB, \\"
yield "}"

View File

@ -88,7 +88,14 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *
int uopcode = _JIT_OPCODE;
// Other stuff we need handy:
PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG)
#if SIZEOF_VOID_P == 8
PATCH_VALUE(uint64_t, _operand, _JIT_OPERAND)
#else
assert(SIZEOF_VOID_P == 4);
PATCH_VALUE(uint32_t, _operand_hi, _JIT_OPERAND_HI)
PATCH_VALUE(uint32_t, _operand_lo, _JIT_OPERAND_LO)
uint64_t _operand = ((uint64_t)_operand_hi << 32) | _operand_lo;
#endif
PATCH_VALUE(uint32_t, _target, _JIT_TARGET)
PATCH_VALUE(uint16_t, _exit_index, _JIT_EXIT_INDEX)