GH-113464: Add a JIT backend for tier 2 (GH-113465)

Add an option (--enable-experimental-jit for configure-based builds
or --experimental-jit for PCbuild-based ones) to build an
*experimental* just-in-time compiler, based on copy-and-patch (https://fredrikbk.com/publications/copy-and-patch.pdf).

See Tools/jit/README.md for more information on how to install the required build-time tooling.
This commit is contained in:
Brandt Bucher 2024-01-28 18:48:48 -08:00 committed by GitHub
parent f7c05d7ad3
commit f6d9e5926b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 1738 additions and 5 deletions

112
.github/workflows/jit.yml vendored Normal file
View File

@ -0,0 +1,112 @@
name: JIT
on:
pull_request:
paths: '**jit**'
push:
paths: '**jit**'
workflow_dispatch:
jobs:
jit:
name: ${{ matrix.target }} (${{ matrix.debug && 'Debug' || 'Release' }})
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
target:
- i686-pc-windows-msvc/msvc
- x86_64-pc-windows-msvc/msvc
- x86_64-apple-darwin/clang
- x86_64-unknown-linux-gnu/gcc
- x86_64-unknown-linux-gnu/clang
- aarch64-unknown-linux-gnu/gcc
- aarch64-unknown-linux-gnu/clang
debug:
- true
- false
llvm:
- 16
include:
- target: i686-pc-windows-msvc/msvc
architecture: Win32
runner: windows-latest
compiler: msvc
- target: x86_64-pc-windows-msvc/msvc
architecture: x64
runner: windows-latest
compiler: msvc
- target: x86_64-apple-darwin/clang
architecture: x86_64
runner: macos-latest
compiler: clang
exclude: test_embed
- target: x86_64-unknown-linux-gnu/gcc
architecture: x86_64
runner: ubuntu-latest
compiler: gcc
- target: x86_64-unknown-linux-gnu/clang
architecture: x86_64
runner: ubuntu-latest
compiler: clang
- target: aarch64-unknown-linux-gnu/gcc
architecture: aarch64
runner: ubuntu-latest
compiler: gcc
# These fail because of emulation, not because of the JIT:
exclude: test_unix_events test_init test_process_pool test_shutdown test_multiprocessing_fork test_cmd_line test_faulthandler test_os test_perf_profiler test_posix test_signal test_socket test_subprocess test_threading test_venv
- target: aarch64-unknown-linux-gnu/clang
architecture: aarch64
runner: ubuntu-latest
compiler: clang
# These fail because of emulation, not because of the JIT:
exclude: test_unix_events test_init test_process_pool test_shutdown test_multiprocessing_fork test_cmd_line test_faulthandler test_os test_perf_profiler test_posix test_signal test_socket test_subprocess test_threading test_venv
env:
CC: ${{ matrix.compiler }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Windows
if: runner.os == 'Windows'
run: |
choco install llvm --allow-downgrade --no-progress --version ${{ matrix.llvm }}
./PCbuild/build.bat --experimental-jit ${{ matrix.debug && '-d' || '--pgo' }} -p ${{ matrix.architecture }}
./PCbuild/rt.bat ${{ matrix.debug && '-d' }} -p ${{ matrix.architecture }} -q --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3
- name: macOS
if: runner.os == 'macOS'
run: |
brew install llvm@${{ matrix.llvm }}
export SDKROOT="$(xcrun --show-sdk-path)"
./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }}
make all --jobs 3
./python.exe -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3
- name: Native Linux
if: runner.os == 'Linux' && matrix.architecture == 'x86_64'
run: |
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }}
export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH"
./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }}
make all --jobs 4
./python -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3
- name: Emulated Linux
if: runner.os == 'Linux' && matrix.architecture != 'x86_64'
run: |
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }}
export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH"
./configure --prefix="$(pwd)/../build"
make install --jobs 4
make clean --jobs 4
export HOST=${{ matrix.architecture }}-linux-gnu
sudo apt install --yes "gcc-$HOST" qemu-user
${{ !matrix.debug && matrix.compiler == 'clang' && './configure --enable-optimizations' || '' }}
${{ !matrix.debug && matrix.compiler == 'clang' && 'make profile-run-stamp --jobs 4' || '' }}
export CC="${{ matrix.compiler == 'clang' && 'clang --target=$HOST' || '$HOST-gcc' }}"
export CPP="$CC --preprocess"
export HOSTRUNNER=qemu-${{ matrix.architecture }}
export QEMU_LD_PREFIX="/usr/$HOST"
./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} --build=x86_64-linux-gnu --host="$HOST" --with-build-python=../build/bin/python3 --with-pkg-config=no ac_cv_buggy_getaddrinfo=no ac_cv_file__dev_ptc=no ac_cv_file__dev_ptmx=yes
make all --jobs 4
./python -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3

View File

@ -12,6 +12,7 @@ on:
- "Tools/build/generate_sbom.py"
- "Tools/cases_generator/**"
- "Tools/clinic/**"
- "Tools/jit/**"
- "Tools/peg_generator/**"
- "Tools/requirements-dev.txt"
- "Tools/wasm/**"
@ -38,6 +39,7 @@ jobs:
"Tools/build/",
"Tools/cases_generator",
"Tools/clinic",
"Tools/jit",
"Tools/peg_generator",
"Tools/wasm",
]

1
.gitignore vendored
View File

@ -126,6 +126,7 @@ Tools/unicode/data/
# hendrikmuhs/ccache-action@v1
/.ccache
/cross-build/
/jit_stencils.h
/platform
/profile-clean-stamp
/profile-run-stamp

View File

@ -39,6 +39,8 @@ typedef struct {
typedef struct _PyExecutorObject {
PyObject_VAR_HEAD
_PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */
void *jit_code;
size_t jit_size;
_PyUOpInstruction trace[1];
} _PyExecutorObject;

View File

@ -0,0 +1,25 @@
#ifndef Py_INTERNAL_JIT_H
#define Py_INTERNAL_JIT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif
#ifdef _Py_JIT
typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate);
int _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length);
void _PyJIT_Free(_PyExecutorObject *executor);
#endif // _Py_JIT
#ifdef __cplusplus
}
#endif
#endif // !Py_INTERNAL_JIT_H

View File

@ -178,7 +178,7 @@ _Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct)
}
_Py_DECREF_STAT_INC();
#ifdef Py_REF_DEBUG
_Py_DEC_REFTOTAL(_PyInterpreterState_GET());
_Py_DEC_REFTOTAL(PyInterpreterState_Get());
#endif
if (--op->ob_refcnt != 0) {
assert(op->ob_refcnt > 0);
@ -199,7 +199,7 @@ _Py_DECREF_NO_DEALLOC(PyObject *op)
}
_Py_DECREF_STAT_INC();
#ifdef Py_REF_DEBUG
_Py_DEC_REFTOTAL(_PyInterpreterState_GET());
_Py_DEC_REFTOTAL(PyInterpreterState_Get());
#endif
op->ob_refcnt--;
#ifdef Py_DEBUG

View File

@ -433,6 +433,7 @@ PYTHON_OBJS= \
Python/initconfig.o \
Python/instrumentation.o \
Python/intrinsics.o \
Python/jit.o \
Python/legacy_tracing.o \
Python/lock.o \
Python/marshal.o \
@ -1365,7 +1366,7 @@ regen-unicodedata:
regen-all: regen-cases regen-typeslots \
regen-token regen-ast regen-keyword regen-sre regen-frozen \
regen-pegen-metaparser regen-pegen regen-test-frozenmain \
regen-test-levenshtein regen-global-objects regen-sbom
regen-test-levenshtein regen-global-objects regen-sbom regen-jit
@echo
@echo "Note: make regen-stdlib-module-names, make regen-limited-abi, "
@echo "make regen-configure and make regen-unicodedata should be run manually"
@ -1846,6 +1847,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_initconfig.h \
$(srcdir)/Include/internal/pycore_interp.h \
$(srcdir)/Include/internal/pycore_intrinsics.h \
$(srcdir)/Include/internal/pycore_jit.h \
$(srcdir)/Include/internal/pycore_list.h \
$(srcdir)/Include/internal/pycore_llist.h \
$(srcdir)/Include/internal/pycore_lock.h \
@ -2641,6 +2643,12 @@ config.status: $(srcdir)/configure
Python/asm_trampoline.o: $(srcdir)/Python/asm_trampoline.S
$(CC) -c $(PY_CORE_CFLAGS) -o $@ $<
Python/jit.o: regen-jit
.PHONY: regen-jit
regen-jit:
@REGEN_JIT_COMMAND@
# Some make's put the object file in the current directory
.c.o:
$(CC) -c $(PY_CORE_CFLAGS) -o $@ $<
@ -2733,6 +2741,7 @@ clean-retain-profile: pycremoval
-rm -f Python/deepfreeze/*.[co]
-rm -f Python/frozen_modules/*.h
-rm -f Python/frozen_modules/MANIFEST
-rm -f jit_stencils.h
-find build -type f -a ! -name '*.gc??' -exec rm -f {} ';'
-rm -f Include/pydtrace_probes.h
-rm -f profile-gen-stamp

View File

@ -0,0 +1,4 @@
Add an option (``--enable-experimental-jit`` for ``configure``-based builds
or ``--experimental-jit`` for ``PCbuild``-based ones) to build an
*experimental* just-in-time compiler, based on `copy-and-patch
<https://fredrikbk.com/publications/copy-and-patch.pdf>`_

View File

@ -224,6 +224,7 @@
<ClCompile Include="..\Python\initconfig.c" />
<ClCompile Include="..\Python\intrinsics.c" />
<ClCompile Include="..\Python\instrumentation.c" />
<ClCompile Include="..\Python\jit.c" />
<ClCompile Include="..\Python\legacy_tracing.c" />
<ClCompile Include="..\Python\lock.c" />
<ClCompile Include="..\Python\marshal.c" />

View File

@ -250,6 +250,9 @@
<ClCompile Include="..\Objects\iterobject.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Python\jit.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Objects\listobject.c">
<Filter>Source Files</Filter>
</ClCompile>

View File

@ -36,6 +36,7 @@ echo. overrides -c and -d
echo. --disable-gil Enable experimental support for running without the GIL.
echo. --test-marker Enable the test marker within the build.
echo. --regen Regenerate all opcodes, grammar and tokens.
echo. --experimental-jit Enable the experimental just-in-time compiler.
echo.
echo.Available flags to avoid building certain modules.
echo.These flags have no effect if '-e' is not given:
@ -85,6 +86,7 @@ if "%~1"=="--disable-gil" (set UseDisableGil=true) & shift & goto CheckOpts
if "%~1"=="--test-marker" (set UseTestMarker=true) & shift & goto CheckOpts
if "%~1"=="-V" shift & goto Version
if "%~1"=="--regen" (set Regen=true) & shift & goto CheckOpts
if "%~1"=="--experimental-jit" (set UseJIT=true) & shift & goto CheckOpts
rem These use the actual property names used by MSBuild. We could just let
rem them in through the environment, but we specify them on the command line
rem anyway for visibility so set defaults after this
@ -176,6 +178,7 @@ echo on
/p:IncludeSSL=%IncludeSSL% /p:IncludeTkinter=%IncludeTkinter%^
/p:DisableGil=%UseDisableGil%^
/p:UseTestMarker=%UseTestMarker% %GITProperty%^
/p:UseJIT=%UseJIT%^
%1 %2 %3 %4 %5 %6 %7 %8 %9
@echo off

View File

@ -104,6 +104,7 @@
<AdditionalIncludeDirectories Condition="$(IncludeExternals)">$(zlibDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_USRDLL;Py_BUILD_CORE;Py_BUILD_CORE_BUILTIN;Py_ENABLE_SHARED;MS_DLL_ID="$(SysWinVer)";%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="$(IncludeExternals)">_Py_HAVE_ZLIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(UseJIT)' == 'true'">_Py_JIT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<AdditionalDependencies>version.lib;ws2_32.lib;pathcch.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
@ -247,6 +248,7 @@
<ClInclude Include="..\Include\internal\pycore_initconfig.h" />
<ClInclude Include="..\Include\internal\pycore_interp.h" />
<ClInclude Include="..\Include\internal\pycore_intrinsics.h" />
<ClInclude Include="..\Include\internal\pycore_jit.h" />
<ClInclude Include="..\Include\internal\pycore_list.h" />
<ClInclude Include="..\Include\internal\pycore_llist.h" />
<ClInclude Include="..\Include\internal\pycore_lock.h" />
@ -585,6 +587,7 @@
<ClCompile Include="..\Python\initconfig.c" />
<ClCompile Include="..\Python\intrinsics.c" />
<ClCompile Include="..\Python\instrumentation.c" />
<ClCompile Include="..\Python\jit.c" />
<ClCompile Include="..\Python\legacy_tracing.c" />
<ClCompile Include="..\Python\lock.c" />
<ClCompile Include="..\Python\marshal.c" />

View File

@ -669,6 +669,9 @@
<ClInclude Include="..\Include\internal\pycore_intrinsics.h">
<Filter>Include\cpython</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_jit.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_list.h">
<Filter>Include\internal</Filter>
</ClInclude>
@ -1337,6 +1340,9 @@
<ClCompile Include="..\Python\instrumentation.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Python\jit.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\legacy_tracing.c">
<Filter>Source Files</Filter>
</ClCompile>

View File

@ -28,6 +28,9 @@
</_TokenOutputs>
<_KeywordSources Include="$(PySourcePath)Grammar\python.gram;$(PySourcePath)Grammar\Tokens" />
<_KeywordOutputs Include="$(PySourcePath)Lib\keyword.py" />
<!-- Taken from _Target._compute_digest in Tools\jit\_targets.py: -->
<_JITSources Include="$(PySourcePath)Python\executor_cases.c.h;$(GeneratedPyConfigDir)pyconfig.h;$(PySourcePath)Tools\jit\**"/>
<_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils.h"/>
</ItemGroup>
<Target Name="_TouchRegenSources" Condition="$(ForceRegen) == 'true'">
@ -76,10 +79,28 @@
<Exec Command="$(PythonForBuild) Tools\build\generate_global_objects.py"
WorkingDirectory="$(PySourcePath)" />
</Target>
<Target Name="_RegenJIT"
Condition="'$(UseJIT)' == 'true'"
DependsOnTargets="_UpdatePyconfig;FindPythonForBuild"
Inputs="@(_JITSources)"
Outputs="@(_JITOutputs)">
<PropertyGroup>
<JITArgs Condition="$(Platform) == 'ARM64'">aarch64-pc-windows-msvc</JITArgs>
<JITArgs Condition="$(Platform) == 'Win32'">i686-pc-windows-msvc</JITArgs>
<JITArgs Condition="$(Platform) == 'x64'">x86_64-pc-windows-msvc</JITArgs>
<JITArgs Condition="$(Configuration) == 'Debug'">$(JITArgs) --debug</JITArgs>
</PropertyGroup>
<Exec Command='$(PythonForBuild) "$(PySourcePath)Tools\jit\build.py" $(JITArgs)'
WorkingDirectory="$(GeneratedPyConfigDir)"/>
</Target>
<Target Name="Regen"
<Target Name="_RegenNoPGUpdate"
Condition="$(Configuration) != 'PGUpdate'"
DependsOnTargets="_TouchRegenSources;_RegenPegen;_RegenAST_H;_RegenTokens;_RegenKeywords;_RegenGlobalObjects">
</Target>
<Target Name="Regen" DependsOnTargets="_RegenNoPGUpdate;_RegenJIT">
<Message Text="Generated sources are up to date" Importance="high" />
</Target>

View File

@ -11,6 +11,7 @@
#include "pycore_function.h"
#include "pycore_instruments.h"
#include "pycore_intrinsics.h"
#include "pycore_jit.h"
#include "pycore_long.h" // _PyLong_GetZero()
#include "pycore_moduleobject.h" // PyModuleObject
#include "pycore_object.h" // _PyObject_GC_TRACK()
@ -955,9 +956,24 @@ resume_with_error:
// The Tier 2 interpreter is also here!
// Tier 2 is also here!
enter_tier_two:
#ifdef _Py_JIT
; // ;)
jit_func jitted = current_executor->jit_code;
next_instr = jitted(frame, stack_pointer, tstate);
frame = tstate->current_frame;
Py_DECREF(current_executor);
if (next_instr == NULL) {
goto resume_with_error;
}
stack_pointer = _PyFrame_GetStackPointer(frame);
DISPATCH();
#else
#undef LOAD_IP
#define LOAD_IP(UNUSED) (void)0
@ -1073,6 +1089,8 @@ deoptimize:
Py_DECREF(current_executor);
DISPATCH();
#endif // _Py_JIT
}
#if defined(__GNUC__)
# pragma GCC diagnostic pop

369
Python/jit.c Normal file
View File

@ -0,0 +1,369 @@
#ifdef _Py_JIT
#include "Python.h"
#include "pycore_abstract.h"
#include "pycore_call.h"
#include "pycore_ceval.h"
#include "pycore_dict.h"
#include "pycore_intrinsics.h"
#include "pycore_long.h"
#include "pycore_opcode_metadata.h"
#include "pycore_opcode_utils.h"
#include "pycore_optimizer.h"
#include "pycore_pyerrors.h"
#include "pycore_setobject.h"
#include "pycore_sliceobject.h"
#include "pycore_jit.h"
#include "jit_stencils.h"
// Memory management stuff: ////////////////////////////////////////////////////
#ifndef MS_WINDOWS
#include <sys/mman.h>
#endif
static size_t
get_page_size(void)
{
#ifdef MS_WINDOWS
SYSTEM_INFO si;
GetSystemInfo(&si);
return si.dwPageSize;
#else
return sysconf(_SC_PAGESIZE);
#endif
}
static void
jit_error(const char *message)
{
#ifdef MS_WINDOWS
int hint = GetLastError();
#else
int hint = errno;
#endif
PyErr_Format(PyExc_RuntimeWarning, "JIT %s (%d)", message, hint);
}
static char *
jit_alloc(size_t size)
{
assert(size);
assert(size % get_page_size() == 0);
#ifdef MS_WINDOWS
int flags = MEM_COMMIT | MEM_RESERVE;
char *memory = VirtualAlloc(NULL, size, flags, PAGE_READWRITE);
int failed = memory == NULL;
#else
int flags = MAP_ANONYMOUS | MAP_PRIVATE;
char *memory = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
int failed = memory == MAP_FAILED;
#endif
if (failed) {
jit_error("unable to allocate memory");
return NULL;
}
return memory;
}
static int
jit_free(char *memory, size_t size)
{
assert(size);
assert(size % get_page_size() == 0);
#ifdef MS_WINDOWS
int failed = !VirtualFree(memory, 0, MEM_RELEASE);
#else
int failed = munmap(memory, size);
#endif
if (failed) {
jit_error("unable to free memory");
return -1;
}
return 0;
}
static int
mark_executable(char *memory, size_t size)
{
if (size == 0) {
return 0;
}
assert(size % get_page_size() == 0);
// Do NOT ever leave the memory writable! Also, don't forget to flush the
// i-cache (I cannot begin to tell you how horrible that is to debug):
#ifdef MS_WINDOWS
if (!FlushInstructionCache(GetCurrentProcess(), memory, size)) {
jit_error("unable to flush instruction cache");
return -1;
}
int old;
int failed = !VirtualProtect(memory, size, PAGE_EXECUTE_READ, &old);
#else
__builtin___clear_cache((char *)memory, (char *)memory + size);
int failed = mprotect(memory, size, PROT_EXEC | PROT_READ);
#endif
if (failed) {
jit_error("unable to protect executable memory");
return -1;
}
return 0;
}
static int
mark_readable(char *memory, size_t size)
{
if (size == 0) {
return 0;
}
assert(size % get_page_size() == 0);
#ifdef MS_WINDOWS
DWORD old;
int failed = !VirtualProtect(memory, size, PAGE_READONLY, &old);
#else
int failed = mprotect(memory, size, PROT_READ);
#endif
if (failed) {
jit_error("unable to protect readable memory");
return -1;
}
return 0;
}
// JIT compiler stuff: /////////////////////////////////////////////////////////
// Warning! AArch64 requires you to get your hands dirty. These are your gloves:
// value[value_start : value_start + len]
static uint32_t
get_bits(uint64_t value, uint8_t value_start, uint8_t width)
{
assert(width <= 32);
return (value >> value_start) & ((1ULL << width) - 1);
}
// *loc[loc_start : loc_start + width] = value[value_start : value_start + width]
static void
set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start,
uint8_t width)
{
assert(loc_start + width <= 32);
// Clear the bits we're about to patch:
*loc &= ~(((1ULL << width) - 1) << loc_start);
assert(get_bits(*loc, loc_start, width) == 0);
// Patch the bits:
*loc |= get_bits(value, value_start, width) << loc_start;
assert(get_bits(*loc, loc_start, width) == get_bits(value, value_start, width));
}
// See https://developer.arm.com/documentation/ddi0602/2023-09/Base-Instructions
// for instruction encodings:
#define IS_AARCH64_ADD_OR_SUB(I) (((I) & 0x11C00000) == 0x11000000)
#define IS_AARCH64_ADRP(I) (((I) & 0x9F000000) == 0x90000000)
#define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000)
#define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000)
#define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000)
// Fill all of stencil's holes in the memory pointed to by base, using the
// values in patches.
static void
patch(char *base, const Stencil *stencil, uint64_t *patches)
{
for (uint64_t i = 0; i < stencil->holes_size; i++) {
const Hole *hole = &stencil->holes[i];
void *location = base + hole->offset;
uint64_t value = patches[hole->value] + (uint64_t)hole->symbol + hole->addend;
uint32_t *loc32 = (uint32_t *)location;
uint64_t *loc64 = (uint64_t *)location;
// LLD is a great reference for performing relocations... just keep in
// mind that Tools/jit/build.py does filtering and preprocessing for us!
// Here's a good place to start for each platform:
// - aarch64-apple-darwin:
// - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.cpp
// - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.h
// - aarch64-unknown-linux-gnu:
// - https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/AArch64.cpp
// - i686-pc-windows-msvc:
// - https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp
// - x86_64-apple-darwin:
// - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/X86_64.cpp
// - x86_64-pc-windows-msvc:
// - https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp
// - x86_64-unknown-linux-gnu:
// - https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/X86_64.cpp
switch (hole->kind) {
case HoleKind_IMAGE_REL_I386_DIR32:
// 32-bit absolute address.
// Check that we're not out of range of 32 unsigned bits:
assert(value < (1ULL << 32));
*loc32 = (uint32_t)value;
continue;
case HoleKind_ARM64_RELOC_UNSIGNED:
case HoleKind_IMAGE_REL_AMD64_ADDR64:
case HoleKind_R_AARCH64_ABS64:
case HoleKind_X86_64_RELOC_UNSIGNED:
case HoleKind_R_X86_64_64:
// 64-bit absolute address.
*loc64 = value;
continue;
case HoleKind_R_AARCH64_CALL26:
case HoleKind_R_AARCH64_JUMP26:
// 28-bit relative branch.
assert(IS_AARCH64_BRANCH(*loc32));
value -= (uint64_t)location;
// Check that we're not out of range of 28 signed bits:
assert((int64_t)value >= -(1 << 27));
assert((int64_t)value < (1 << 27));
// Since instructions are 4-byte aligned, only use 26 bits:
assert(get_bits(value, 0, 2) == 0);
set_bits(loc32, 0, value, 2, 26);
continue;
case HoleKind_R_AARCH64_MOVW_UABS_G0_NC:
// 16-bit low part of an absolute address.
assert(IS_AARCH64_MOV(*loc32));
// Check the implicit shift (this is "part 0 of 3"):
assert(get_bits(*loc32, 21, 2) == 0);
set_bits(loc32, 5, value, 0, 16);
continue;
case HoleKind_R_AARCH64_MOVW_UABS_G1_NC:
// 16-bit middle-low part of an absolute address.
assert(IS_AARCH64_MOV(*loc32));
// Check the implicit shift (this is "part 1 of 3"):
assert(get_bits(*loc32, 21, 2) == 1);
set_bits(loc32, 5, value, 16, 16);
continue;
case HoleKind_R_AARCH64_MOVW_UABS_G2_NC:
// 16-bit middle-high part of an absolute address.
assert(IS_AARCH64_MOV(*loc32));
// Check the implicit shift (this is "part 2 of 3"):
assert(get_bits(*loc32, 21, 2) == 2);
set_bits(loc32, 5, value, 32, 16);
continue;
case HoleKind_R_AARCH64_MOVW_UABS_G3:
// 16-bit high part of an absolute address.
assert(IS_AARCH64_MOV(*loc32));
// Check the implicit shift (this is "part 3 of 3"):
assert(get_bits(*loc32, 21, 2) == 3);
set_bits(loc32, 5, value, 48, 16);
continue;
case HoleKind_ARM64_RELOC_GOT_LOAD_PAGE21:
// 21-bit count of pages between this page and an absolute address's
// page... I know, I know, it's weird. Pairs nicely with
// ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below).
assert(IS_AARCH64_ADRP(*loc32));
// Number of pages between this page and the value's page:
value = (value >> 12) - ((uint64_t)location >> 12);
// Check that we're not out of range of 21 signed bits:
assert((int64_t)value >= -(1 << 20));
assert((int64_t)value < (1 << 20));
// value[0:2] goes in loc[29:31]:
set_bits(loc32, 29, value, 0, 2);
// value[2:21] goes in loc[5:26]:
set_bits(loc32, 5, value, 2, 19);
continue;
case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12:
// 12-bit low part of an absolute address. Pairs nicely with
// ARM64_RELOC_GOT_LOAD_PAGE21 (above).
assert(IS_AARCH64_LDR_OR_STR(*loc32) || IS_AARCH64_ADD_OR_SUB(*loc32));
// There might be an implicit shift encoded in the instruction:
uint8_t shift = 0;
if (IS_AARCH64_LDR_OR_STR(*loc32)) {
shift = (uint8_t)get_bits(*loc32, 30, 2);
// If both of these are set, the shift is supposed to be 4.
// That's pretty weird, and it's never actually been observed...
assert(get_bits(*loc32, 23, 1) == 0 || get_bits(*loc32, 26, 1) == 0);
}
value = get_bits(value, 0, 12);
assert(get_bits(value, 0, shift) == 0);
set_bits(loc32, 10, value, shift, 12);
continue;
}
Py_UNREACHABLE();
}
}
static void
copy_and_patch(char *base, const Stencil *stencil, uint64_t *patches)
{
memcpy(base, stencil->body, stencil->body_size);
patch(base, stencil, patches);
}
static void
emit(const StencilGroup *group, uint64_t patches[])
{
copy_and_patch((char *)patches[HoleValue_CODE], &group->code, patches);
copy_and_patch((char *)patches[HoleValue_DATA], &group->data, patches);
}
// Compiles executor in-place. Don't forget to call _PyJIT_Free later!
int
_PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length)
{
// Loop once to find the total compiled size:
size_t code_size = 0;
size_t data_size = 0;
for (size_t i = 0; i < length; i++) {
_PyUOpInstruction *instruction = &trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
code_size += group->code.body_size;
data_size += group->data.body_size;
}
// Round up to the nearest page (code and data need separate pages):
size_t page_size = get_page_size();
assert((page_size & (page_size - 1)) == 0);
code_size += page_size - (code_size & (page_size - 1));
data_size += page_size - (data_size & (page_size - 1));
char *memory = jit_alloc(code_size + data_size);
if (memory == NULL) {
return -1;
}
// Loop again to emit the code:
char *code = memory;
char *data = memory + code_size;
for (size_t i = 0; i < length; i++) {
_PyUOpInstruction *instruction = &trace[i];
const StencilGroup *group = &stencil_groups[instruction->opcode];
// Think of patches as a dictionary mapping HoleValue to uint64_t:
uint64_t patches[] = GET_PATCHES();
patches[HoleValue_CODE] = (uint64_t)code;
patches[HoleValue_CONTINUE] = (uint64_t)code + group->code.body_size;
patches[HoleValue_DATA] = (uint64_t)data;
patches[HoleValue_EXECUTOR] = (uint64_t)executor;
patches[HoleValue_OPARG] = instruction->oparg;
patches[HoleValue_OPERAND] = instruction->operand;
patches[HoleValue_TARGET] = instruction->target;
patches[HoleValue_TOP] = (uint64_t)memory;
patches[HoleValue_ZERO] = 0;
emit(group, patches);
code += group->code.body_size;
data += group->data.body_size;
}
if (mark_executable(memory, code_size) ||
mark_readable(memory + code_size, data_size))
{
jit_free(memory, code_size + data_size);
return -1;
}
executor->jit_code = memory;
executor->jit_size = code_size + data_size;
return 0;
}
void
_PyJIT_Free(_PyExecutorObject *executor)
{
char *memory = (char *)executor->jit_code;
size_t size = executor->jit_size;
if (memory) {
executor->jit_code = NULL;
executor->jit_size = 0;
if (jit_free(memory, size)) {
PyErr_WriteUnraisable(NULL);
}
}
}
#endif // _Py_JIT

View File

@ -7,6 +7,7 @@
#include "pycore_optimizer.h" // _Py_uop_analyze_and_optimize()
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_uop_ids.h"
#include "pycore_jit.h"
#include "cpython/optimizer.h"
#include <stdbool.h>
#include <stdint.h>
@ -227,6 +228,9 @@ static PyMethodDef executor_methods[] = {
static void
uop_dealloc(_PyExecutorObject *self) {
_Py_ExecutorClear(self);
#ifdef _Py_JIT
_PyJIT_Free(self);
#endif
PyObject_Free(self);
}
@ -789,6 +793,14 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
executor->trace[i].operand);
}
}
#endif
#ifdef _Py_JIT
executor->jit_code = NULL;
executor->jit_size = 0;
if (_PyJIT_Compile(executor, executor->trace, Py_SIZE(executor))) {
Py_DECREF(executor);
return NULL;
}
#endif
return executor;
}

View File

@ -1240,12 +1240,19 @@ init_interp_main(PyThreadState *tstate)
// Turn on experimental tier 2 (uops-based) optimizer
if (is_main_interp) {
#ifndef _Py_JIT
// No JIT, maybe use the tier two interpreter:
char *envvar = Py_GETENV("PYTHON_UOPS");
int enabled = envvar != NULL && *envvar > '0';
if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) {
enabled = 1;
}
if (enabled) {
#else
// Always enable tier two for JIT builds (ignoring the environment
// variable and command-line option above):
if (true) {
#endif
PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer();
if (opt == NULL) {
return _PyStatus_ERR("can't initialize optimizer");

46
Tools/jit/README.md Normal file
View File

@ -0,0 +1,46 @@
The JIT Compiler
================
This version of CPython can be built with an experimental just-in-time compiler. While most everything you already know about building and using CPython is unchanged, you will probably need to install a compatible version of LLVM first.
## Installing LLVM
The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
LLVM version 16 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-16`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
It's easy to install all of the required tools:
### Linux
Install LLVM 16 on Ubuntu/Debian:
```sh
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
sudo ./llvm.sh 16
```
### macOS
Install LLVM 16 with [Homebrew](https://brew.sh):
```sh
brew install llvm@16
```
Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
### Windows
Install LLVM 16 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=16), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
## Building
For `PCbuild`-based builds, pass the new `--experimental-jit` option to `build.bat`.
For all other builds, pass the new `--enable-experimental-jit` option to `configure`.
Otherwise, just configure and build as you normally would. Cross-compiling "just works", since the JIT is built for the host platform.
[^why-llvm]: Clang is specifically needed because it's the only C compiler with support for guaranteed tail calls (`musttail`), which are required by CPython's continuation-passing-style approach to JIT compilation. Since LLVM also includes other functionalities we need (namely, object file parsing and disassembly), it's convenient to only support one toolchain at this time.

99
Tools/jit/_llvm.py Normal file
View File

@ -0,0 +1,99 @@
"""Utilities for invoking LLVM tools."""
import asyncio
import functools
import os
import re
import shlex
import subprocess
import typing
_LLVM_VERSION = 16
_LLVM_VERSION_PATTERN = re.compile(rf"version\s+{_LLVM_VERSION}\.\d+\.\d+\s+")
_P = typing.ParamSpec("_P")
_R = typing.TypeVar("_R")
_C = typing.Callable[_P, typing.Awaitable[_R]]
def _async_cache(f: _C[_P, _R]) -> _C[_P, _R]:
cache = {}
lock = asyncio.Lock()
@functools.wraps(f)
async def wrapper(
*args: _P.args, **kwargs: _P.kwargs # pylint: disable = no-member
) -> _R:
async with lock:
if args not in cache:
cache[args] = await f(*args, **kwargs)
return cache[args]
return wrapper
_CORES = asyncio.BoundedSemaphore(os.cpu_count() or 1)
async def _run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str | None:
command = [tool, *args]
async with _CORES:
if echo:
print(shlex.join(command))
try:
process = await asyncio.create_subprocess_exec(
*command, stdout=subprocess.PIPE
)
except FileNotFoundError:
return None
out, _ = await process.communicate()
if process.returncode:
raise RuntimeError(f"{tool} exited with return code {process.returncode}")
return out.decode()
@_async_cache
async def _check_tool_version(name: str, *, echo: bool = False) -> bool:
output = await _run(name, ["--version"], echo=echo)
return bool(output and _LLVM_VERSION_PATTERN.search(output))
@_async_cache
async def _get_brew_llvm_prefix(*, echo: bool = False) -> str | None:
output = await _run("brew", ["--prefix", f"llvm@{_LLVM_VERSION}"], echo=echo)
return output and output.removesuffix("\n")
@_async_cache
async def _find_tool(tool: str, *, echo: bool = False) -> str | None:
# Unversioned executables:
path = tool
if await _check_tool_version(path, echo=echo):
return path
# Versioned executables:
path = f"{tool}-{_LLVM_VERSION}"
if await _check_tool_version(path, echo=echo):
return path
# Homebrew-installed executables:
prefix = await _get_brew_llvm_prefix(echo=echo)
if prefix is not None:
path = os.path.join(prefix, "bin", tool)
if await _check_tool_version(path, echo=echo):
return path
# Nothing found:
return None
async def maybe_run(
tool: str, args: typing.Iterable[str], echo: bool = False
) -> str | None:
"""Run an LLVM tool if it can be found. Otherwise, return None."""
path = await _find_tool(tool, echo=echo)
return path and await _run(path, args, echo=echo)
async def run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str:
"""Run an LLVM tool if it can be found. Otherwise, raise RuntimeError."""
output = await maybe_run(tool, args, echo=echo)
if output is None:
raise RuntimeError(f"Can't find {tool}-{_LLVM_VERSION}!")
return output

99
Tools/jit/_schema.py Normal file
View File

@ -0,0 +1,99 @@
"""Schema for the JSON produced by llvm-readobj --elf-output-style=JSON."""
import typing
HoleKind: typing.TypeAlias = typing.Literal[
"ARM64_RELOC_GOT_LOAD_PAGE21",
"ARM64_RELOC_GOT_LOAD_PAGEOFF12",
"ARM64_RELOC_UNSIGNED",
"IMAGE_REL_AMD64_ADDR64",
"IMAGE_REL_I386_DIR32",
"R_AARCH64_ABS64",
"R_AARCH64_CALL26",
"R_AARCH64_JUMP26",
"R_AARCH64_MOVW_UABS_G0_NC",
"R_AARCH64_MOVW_UABS_G1_NC",
"R_AARCH64_MOVW_UABS_G2_NC",
"R_AARCH64_MOVW_UABS_G3",
"R_X86_64_64",
"X86_64_RELOC_UNSIGNED",
]
class COFFRelocation(typing.TypedDict):
"""A COFF object file relocation record."""
Type: dict[typing.Literal["Value"], HoleKind]
Symbol: str
Offset: int
class ELFRelocation(typing.TypedDict):
"""An ELF object file relocation record."""
Addend: int
Offset: int
Symbol: dict[typing.Literal["Value"], str]
Type: dict[typing.Literal["Value"], HoleKind]
class MachORelocation(typing.TypedDict):
"""A Mach-O object file relocation record."""
Offset: int
Section: typing.NotRequired[dict[typing.Literal["Value"], str]]
Symbol: typing.NotRequired[dict[typing.Literal["Value"], str]]
Type: dict[typing.Literal["Value"], HoleKind]
class _COFFSymbol(typing.TypedDict):
Name: str
Value: int
class _ELFSymbol(typing.TypedDict):
Name: dict[typing.Literal["Value"], str]
Value: int
class _MachOSymbol(typing.TypedDict):
Name: dict[typing.Literal["Value"], str]
Value: int
class COFFSection(typing.TypedDict):
"""A COFF object file section."""
Characteristics: dict[
typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]
]
Number: int
RawDataSize: int
Relocations: list[dict[typing.Literal["Relocation"], COFFRelocation]]
SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]]
Symbols: list[dict[typing.Literal["Symbol"], _COFFSymbol]]
class ELFSection(typing.TypedDict):
"""An ELF object file section."""
Flags: dict[typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]]
Index: int
Info: int
Relocations: list[dict[typing.Literal["Relocation"], ELFRelocation]]
SectionData: dict[typing.Literal["Bytes"], list[int]]
Symbols: list[dict[typing.Literal["Symbol"], _ELFSymbol]]
Type: dict[typing.Literal["Value"], str]
class MachOSection(typing.TypedDict):
"""A Mach-O object file section."""
Address: int
Attributes: dict[typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]]
Index: int
Name: dict[typing.Literal["Value"], str]
Relocations: typing.NotRequired[
list[dict[typing.Literal["Relocation"], MachORelocation]]
]
SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]]
Symbols: typing.NotRequired[list[dict[typing.Literal["Symbol"], _MachOSymbol]]]

220
Tools/jit/_stencils.py Normal file
View File

@ -0,0 +1,220 @@
"""Core data structures for compiled code templates."""
import dataclasses
import enum
import sys
import _schema
@enum.unique
class HoleValue(enum.Enum):
"""
Different "base" values that can be patched into holes (usually combined with the
address of a symbol and/or an addend).
"""
# The base address of the machine code for the current uop (exposed as _JIT_ENTRY):
CODE = enum.auto()
# The base address of the machine code for the next uop (exposed as _JIT_CONTINUE):
CONTINUE = enum.auto()
# The base address of the read-only data for this uop:
DATA = enum.auto()
# The address of the current executor (exposed as _JIT_EXECUTOR):
EXECUTOR = enum.auto()
# The base address of the "global" offset table located in the read-only data.
# Shouldn't be present in the final stencils, since these are all replaced with
# equivalent DATA values:
GOT = enum.auto()
# The current uop's oparg (exposed as _JIT_OPARG):
OPARG = enum.auto()
# The current uop's operand (exposed as _JIT_OPERAND):
OPERAND = enum.auto()
# The current uop's target (exposed as _JIT_TARGET):
TARGET = enum.auto()
# The base address of the machine code for the first uop (exposed as _JIT_TOP):
TOP = enum.auto()
# A hardcoded value of zero (used for symbol lookups):
ZERO = enum.auto()
@dataclasses.dataclass
class Hole:
"""
A "hole" in the stencil to be patched with a computed runtime value.
Analogous to relocation records in an object file.
"""
offset: int
kind: _schema.HoleKind
# Patch with this base value:
value: HoleValue
# ...plus the address of this symbol:
symbol: str | None
# ...plus this addend:
addend: int
# Convenience method:
replace = dataclasses.replace
def as_c(self) -> str:
"""Dump this hole as an initialization of a C Hole struct."""
parts = [
f"{self.offset:#x}",
f"HoleKind_{self.kind}",
f"HoleValue_{self.value.name}",
f"&{self.symbol}" if self.symbol else "NULL",
_format_addend(self.addend),
]
return f"{{{', '.join(parts)}}}"
@dataclasses.dataclass
class Stencil:
"""
A contiguous block of machine code or data to be copied-and-patched.
Analogous to a section or segment in an object file.
"""
body: bytearray = dataclasses.field(default_factory=bytearray, init=False)
holes: list[Hole] = dataclasses.field(default_factory=list, init=False)
disassembly: list[str] = dataclasses.field(default_factory=list, init=False)
def pad(self, alignment: int) -> None:
"""Pad the stencil to the given alignment."""
offset = len(self.body)
padding = -offset % alignment
self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
self.body.extend([0] * padding)
def emit_aarch64_trampoline(self, hole: Hole) -> None:
"""Even with the large code model, AArch64 Linux insists on 28-bit jumps."""
base = len(self.body)
where = slice(hole.offset, hole.offset + 4)
instruction = int.from_bytes(self.body[where], sys.byteorder)
instruction &= 0xFC000000
instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF
self.body[where] = instruction.to_bytes(4, sys.byteorder)
self.disassembly += [
f"{base + 4 * 0: x}: d2800008 mov x8, #0x0",
f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}",
f"{base + 4 * 1:x}: f2a00008 movk x8, #0x0, lsl #16",
f"{base + 4 * 1:016x}: R_AARCH64_MOVW_UABS_G1_NC {hole.symbol}",
f"{base + 4 * 2:x}: f2c00008 movk x8, #0x0, lsl #32",
f"{base + 4 * 2:016x}: R_AARCH64_MOVW_UABS_G2_NC {hole.symbol}",
f"{base + 4 * 3:x}: f2e00008 movk x8, #0x0, lsl #48",
f"{base + 4 * 3:016x}: R_AARCH64_MOVW_UABS_G3 {hole.symbol}",
f"{base + 4 * 4:x}: d61f0100 br x8",
]
for code in [
0xD2800008.to_bytes(4, sys.byteorder),
0xF2A00008.to_bytes(4, sys.byteorder),
0xF2C00008.to_bytes(4, sys.byteorder),
0xF2E00008.to_bytes(4, sys.byteorder),
0xD61F0100.to_bytes(4, sys.byteorder),
]:
self.body.extend(code)
for i, kind in enumerate(
[
"R_AARCH64_MOVW_UABS_G0_NC",
"R_AARCH64_MOVW_UABS_G1_NC",
"R_AARCH64_MOVW_UABS_G2_NC",
"R_AARCH64_MOVW_UABS_G3",
]
):
self.holes.append(hole.replace(offset=base + 4 * i, kind=kind))
@dataclasses.dataclass
class StencilGroup:
"""
Code and data corresponding to a given micro-opcode.
Analogous to an entire object file.
"""
code: Stencil = dataclasses.field(default_factory=Stencil, init=False)
data: Stencil = dataclasses.field(default_factory=Stencil, init=False)
symbols: dict[int | str, tuple[HoleValue, int]] = dataclasses.field(
default_factory=dict, init=False
)
_got: dict[str, int] = dataclasses.field(default_factory=dict, init=False)
def process_relocations(self, *, alignment: int = 1) -> None:
"""Fix up all GOT and internal relocations for this stencil group."""
self.code.pad(alignment)
self.data.pad(8)
for stencil in [self.code, self.data]:
holes = []
for hole in stencil.holes:
if hole.value is HoleValue.GOT:
assert hole.symbol is not None
hole.value = HoleValue.DATA
hole.addend += self._global_offset_table_lookup(hole.symbol)
hole.symbol = None
elif hole.symbol in self.symbols:
hole.value, addend = self.symbols[hole.symbol]
hole.addend += addend
hole.symbol = None
elif (
hole.kind in {"R_AARCH64_CALL26", "R_AARCH64_JUMP26"}
and hole.value is HoleValue.ZERO
):
self.code.emit_aarch64_trampoline(hole)
continue
holes.append(hole)
stencil.holes[:] = holes
self.code.pad(alignment)
self._emit_global_offset_table()
self.code.holes.sort(key=lambda hole: hole.offset)
self.data.holes.sort(key=lambda hole: hole.offset)
def _global_offset_table_lookup(self, symbol: str) -> int:
return len(self.data.body) + self._got.setdefault(symbol, 8 * len(self._got))
def _emit_global_offset_table(self) -> None:
got = len(self.data.body)
for s, offset in self._got.items():
if s in self.symbols:
value, addend = self.symbols[s]
symbol = None
else:
value, symbol = symbol_to_value(s)
addend = 0
self.data.holes.append(
Hole(got + offset, "R_X86_64_64", value, symbol, addend)
)
value_part = value.name if value is not HoleValue.ZERO else ""
if value_part and not symbol and not addend:
addend_part = ""
else:
addend_part = f"&{symbol}" if symbol else ""
addend_part += _format_addend(addend, signed=symbol is not None)
if value_part:
value_part += "+"
self.data.disassembly.append(
f"{len(self.data.body):x}: {value_part}{addend_part}"
)
self.data.body.extend([0] * 8)
def symbol_to_value(symbol: str) -> tuple[HoleValue, str | None]:
"""
Convert a symbol name to a HoleValue and a symbol name.
Some symbols (starting with "_JIT_") are special and are converted to their
own HoleValues.
"""
if symbol.startswith("_JIT_"):
try:
return HoleValue[symbol.removeprefix("_JIT_")], None
except KeyError:
pass
return HoleValue.ZERO, symbol
def _format_addend(addend: int, signed: bool = False) -> str:
addend %= 1 << 64
if addend & (1 << 63):
addend -= 1 << 64
return f"{addend:{'+#x' if signed else '#x'}}"

394
Tools/jit/_targets.py Normal file
View File

@ -0,0 +1,394 @@
"""Target-specific code generation, parsing, and processing."""
import asyncio
import dataclasses
import hashlib
import json
import os
import pathlib
import re
import sys
import tempfile
import typing
import _llvm
import _schema
import _stencils
import _writer
if sys.version_info < (3, 11):
raise RuntimeError("Building the JIT compiler requires Python 3.11 or newer!")
TOOLS_JIT_BUILD = pathlib.Path(__file__).resolve()
TOOLS_JIT = TOOLS_JIT_BUILD.parent
TOOLS = TOOLS_JIT.parent
CPYTHON = TOOLS.parent
PYTHON_EXECUTOR_CASES_C_H = CPYTHON / "Python" / "executor_cases.c.h"
TOOLS_JIT_TEMPLATE_C = TOOLS_JIT / "template.c"
_S = typing.TypeVar("_S", _schema.COFFSection, _schema.ELFSection, _schema.MachOSection)
_R = typing.TypeVar(
"_R", _schema.COFFRelocation, _schema.ELFRelocation, _schema.MachORelocation
)
@dataclasses.dataclass
class _Target(typing.Generic[_S, _R]):
triple: str
_: dataclasses.KW_ONLY
alignment: int = 1
prefix: str = ""
debug: bool = False
force: bool = False
verbose: bool = False
def _compute_digest(self, out: pathlib.Path) -> str:
hasher = hashlib.sha256()
hasher.update(self.triple.encode())
hasher.update(self.alignment.to_bytes())
hasher.update(self.prefix.encode())
# These dependencies are also reflected in _JITSources in regen.targets:
hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes())
hasher.update((out / "pyconfig.h").read_bytes())
for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)):
for filename in filenames:
hasher.update(pathlib.Path(dirpath, filename).read_bytes())
return hasher.hexdigest()
async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup:
group = _stencils.StencilGroup()
args = ["--disassemble", "--reloc", f"{path}"]
output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose)
if output is not None:
group.code.disassembly.extend(
line.expandtabs().strip()
for line in output.splitlines()
if not line.isspace()
)
args = [
"--elf-output-style=JSON",
"--expand-relocs",
# "--pretty-print",
"--section-data",
"--section-relocations",
"--section-symbols",
"--sections",
f"{path}",
]
output = await _llvm.run("llvm-readobj", args, echo=self.verbose)
# --elf-output-style=JSON is only *slightly* broken on Mach-O...
output = output.replace("PrivateExtern\n", "\n")
output = output.replace("Extern\n", "\n")
# ...and also COFF:
output = output[output.index("[", 1, None) :]
output = output[: output.rindex("]", None, -1) + 1]
sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output)
for wrapped_section in sections:
self._handle_section(wrapped_section["Section"], group)
assert group.symbols["_JIT_ENTRY"] == (_stencils.HoleValue.CODE, 0)
if group.data.body:
line = f"0: {str(bytes(group.data.body)).removeprefix('b')}"
group.data.disassembly.append(line)
group.process_relocations()
return group
def _handle_section(self, section: _S, group: _stencils.StencilGroup) -> None:
raise NotImplementedError(type(self))
def _handle_relocation(
self, base: int, relocation: _R, raw: bytes
) -> _stencils.Hole:
raise NotImplementedError(type(self))
async def _compile(
self, opname: str, c: pathlib.Path, tempdir: pathlib.Path
) -> _stencils.StencilGroup:
o = tempdir / f"{opname}.o"
args = [
f"--target={self.triple}",
"-DPy_BUILD_CORE",
"-D_DEBUG" if self.debug else "-DNDEBUG",
f"-D_JIT_OPCODE={opname}",
"-D_PyJIT_ACTIVE",
"-D_Py_JIT",
"-I.",
f"-I{CPYTHON / 'Include'}",
f"-I{CPYTHON / 'Include' / 'internal'}",
f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}",
f"-I{CPYTHON / 'Python'}",
"-O3",
"-c",
"-fno-asynchronous-unwind-tables",
# SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds:
"-fno-jump-tables",
# Position-independent code adds indirection to every load and jump:
"-fno-pic",
# Don't make calls to weird stack-smashing canaries:
"-fno-stack-protector",
# We have three options for code model:
# - "small": the default, assumes that code and data reside in the
# lowest 2GB of memory (128MB on aarch64)
# - "medium": assumes that code resides in the lowest 2GB of memory,
# and makes no assumptions about data (not available on aarch64)
# - "large": makes no assumptions about either code or data
"-mcmodel=large",
"-o",
f"{o}",
"-std=c11",
f"{c}",
]
await _llvm.run("clang", args, echo=self.verbose)
return await self._parse(o)
async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases))
tasks = []
with tempfile.TemporaryDirectory() as tempdir:
work = pathlib.Path(tempdir).resolve()
async with asyncio.TaskGroup() as group:
for opname in opnames:
coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work)
tasks.append(group.create_task(coro, name=opname))
return {task.get_name(): task.result() for task in tasks}
def build(self, out: pathlib.Path, *, comment: str = "") -> None:
"""Build jit_stencils.h in the given directory."""
digest = f"// {self._compute_digest(out)}\n"
jit_stencils = out / "jit_stencils.h"
if (
not self.force
and jit_stencils.exists()
and jit_stencils.read_text().startswith(digest)
):
return
stencil_groups = asyncio.run(self._build_stencils())
with jit_stencils.open("w") as file:
file.write(digest)
if comment:
file.write(f"// {comment}\n")
file.write("")
for line in _writer.dump(stencil_groups):
file.write(f"{line}\n")
class _COFF(
_Target[_schema.COFFSection, _schema.COFFRelocation]
): # pylint: disable = too-few-public-methods
def _handle_section(
self, section: _schema.COFFSection, group: _stencils.StencilGroup
) -> None:
flags = {flag["Name"] for flag in section["Characteristics"]["Flags"]}
if "SectionData" in section:
section_data_bytes = section["SectionData"]["Bytes"]
else:
# Zeroed BSS data, seen with printf debugging calls:
section_data_bytes = [0] * section["RawDataSize"]
if "IMAGE_SCN_MEM_EXECUTE" in flags:
value = _stencils.HoleValue.CODE
stencil = group.code
elif "IMAGE_SCN_MEM_READ" in flags:
value = _stencils.HoleValue.DATA
stencil = group.data
else:
return
base = len(stencil.body)
group.symbols[section["Number"]] = value, base
stencil.body.extend(section_data_bytes)
for wrapped_symbol in section["Symbols"]:
symbol = wrapped_symbol["Symbol"]
offset = base + symbol["Value"]
name = symbol["Name"]
name = name.removeprefix(self.prefix)
group.symbols[name] = value, offset
for wrapped_relocation in section["Relocations"]:
relocation = wrapped_relocation["Relocation"]
hole = self._handle_relocation(base, relocation, stencil.body)
stencil.holes.append(hole)
def _handle_relocation(
self, base: int, relocation: _schema.COFFRelocation, raw: bytes
) -> _stencils.Hole:
match relocation:
case {
"Offset": offset,
"Symbol": s,
"Type": {"Value": "IMAGE_REL_AMD64_ADDR64" as kind},
}:
offset += base
s = s.removeprefix(self.prefix)
value, symbol = _stencils.symbol_to_value(s)
addend = int.from_bytes(raw[offset : offset + 8], "little")
case {
"Offset": offset,
"Symbol": s,
"Type": {"Value": "IMAGE_REL_I386_DIR32" as kind},
}:
offset += base
s = s.removeprefix(self.prefix)
value, symbol = _stencils.symbol_to_value(s)
addend = int.from_bytes(raw[offset : offset + 4], "little")
case _:
raise NotImplementedError(relocation)
return _stencils.Hole(offset, kind, value, symbol, addend)
class _ELF(
_Target[_schema.ELFSection, _schema.ELFRelocation]
): # pylint: disable = too-few-public-methods
def _handle_section(
self, section: _schema.ELFSection, group: _stencils.StencilGroup
) -> None:
section_type = section["Type"]["Value"]
flags = {flag["Name"] for flag in section["Flags"]["Flags"]}
if section_type == "SHT_RELA":
assert "SHF_INFO_LINK" in flags, flags
assert not section["Symbols"]
value, base = group.symbols[section["Info"]]
if value is _stencils.HoleValue.CODE:
stencil = group.code
else:
assert value is _stencils.HoleValue.DATA
stencil = group.data
for wrapped_relocation in section["Relocations"]:
relocation = wrapped_relocation["Relocation"]
hole = self._handle_relocation(base, relocation, stencil.body)
stencil.holes.append(hole)
elif section_type == "SHT_PROGBITS":
if "SHF_ALLOC" not in flags:
return
if "SHF_EXECINSTR" in flags:
value = _stencils.HoleValue.CODE
stencil = group.code
else:
value = _stencils.HoleValue.DATA
stencil = group.data
group.symbols[section["Index"]] = value, len(stencil.body)
for wrapped_symbol in section["Symbols"]:
symbol = wrapped_symbol["Symbol"]
offset = len(stencil.body) + symbol["Value"]
name = symbol["Name"]["Value"]
name = name.removeprefix(self.prefix)
group.symbols[name] = value, offset
stencil.body.extend(section["SectionData"]["Bytes"])
assert not section["Relocations"]
else:
assert section_type in {
"SHT_GROUP",
"SHT_LLVM_ADDRSIG",
"SHT_NULL",
"SHT_STRTAB",
"SHT_SYMTAB",
}, section_type
def _handle_relocation(
self, base: int, relocation: _schema.ELFRelocation, raw: bytes
) -> _stencils.Hole:
match relocation:
case {
"Addend": addend,
"Offset": offset,
"Symbol": {"Value": s},
"Type": {"Value": kind},
}:
offset += base
s = s.removeprefix(self.prefix)
value, symbol = _stencils.symbol_to_value(s)
case _:
raise NotImplementedError(relocation)
return _stencils.Hole(offset, kind, value, symbol, addend)
class _MachO(
_Target[_schema.MachOSection, _schema.MachORelocation]
): # pylint: disable = too-few-public-methods
def _handle_section(
self, section: _schema.MachOSection, group: _stencils.StencilGroup
) -> None:
assert section["Address"] >= len(group.code.body)
assert "SectionData" in section
flags = {flag["Name"] for flag in section["Attributes"]["Flags"]}
name = section["Name"]["Value"]
name = name.removeprefix(self.prefix)
if "SomeInstructions" in flags:
value = _stencils.HoleValue.CODE
stencil = group.code
start_address = 0
group.symbols[name] = value, section["Address"] - start_address
else:
value = _stencils.HoleValue.DATA
stencil = group.data
start_address = len(group.code.body)
group.symbols[name] = value, len(group.code.body)
base = section["Address"] - start_address
group.symbols[section["Index"]] = value, base
stencil.body.extend(
[0] * (section["Address"] - len(group.code.body) - len(group.data.body))
)
stencil.body.extend(section["SectionData"]["Bytes"])
assert "Symbols" in section
for wrapped_symbol in section["Symbols"]:
symbol = wrapped_symbol["Symbol"]
offset = symbol["Value"] - start_address
name = symbol["Name"]["Value"]
name = name.removeprefix(self.prefix)
group.symbols[name] = value, offset
assert "Relocations" in section
for wrapped_relocation in section["Relocations"]:
relocation = wrapped_relocation["Relocation"]
hole = self._handle_relocation(base, relocation, stencil.body)
stencil.holes.append(hole)
def _handle_relocation(
self, base: int, relocation: _schema.MachORelocation, raw: bytes
) -> _stencils.Hole:
symbol: str | None
match relocation:
case {
"Offset": offset,
"Symbol": {"Value": s},
"Type": {
"Value": "ARM64_RELOC_GOT_LOAD_PAGE21"
| "ARM64_RELOC_GOT_LOAD_PAGEOFF12" as kind
},
}:
offset += base
s = s.removeprefix(self.prefix)
value, symbol = _stencils.HoleValue.GOT, s
addend = 0
case {
"Offset": offset,
"Section": {"Value": s},
"Type": {"Value": kind},
} | {
"Offset": offset,
"Symbol": {"Value": s},
"Type": {"Value": kind},
}:
offset += base
s = s.removeprefix(self.prefix)
value, symbol = _stencils.symbol_to_value(s)
addend = 0
case _:
raise NotImplementedError(relocation)
# Turn Clang's weird __bzero calls into normal bzero calls:
if symbol == "__bzero":
symbol = "bzero"
return _stencils.Hole(offset, kind, value, symbol, addend)
def get_target(host: str) -> _COFF | _ELF | _MachO:
"""Build a _Target for the given host "triple" and options."""
if re.fullmatch(r"aarch64-apple-darwin.*", host):
return _MachO(host, alignment=8, prefix="_")
if re.fullmatch(r"aarch64-.*-linux-gnu", host):
return _ELF(host, alignment=8)
if re.fullmatch(r"i686-pc-windows-msvc", host):
return _COFF(host, prefix="_")
if re.fullmatch(r"x86_64-apple-darwin.*", host):
return _MachO(host, prefix="_")
if re.fullmatch(r"x86_64-pc-windows-msvc", host):
return _COFF(host)
if re.fullmatch(r"x86_64-.*-linux-gnu", host):
return _ELF(host)
raise ValueError(host)

95
Tools/jit/_writer.py Normal file
View File

@ -0,0 +1,95 @@
"""Utilities for writing StencilGroups out to a C header file."""
import typing
import _schema
import _stencils
def _dump_header() -> typing.Iterator[str]:
yield "typedef enum {"
for kind in typing.get_args(_schema.HoleKind):
yield f" HoleKind_{kind},"
yield "} HoleKind;"
yield ""
yield "typedef enum {"
for value in _stencils.HoleValue:
yield f" HoleValue_{value.name},"
yield "} HoleValue;"
yield ""
yield "typedef struct {"
yield " const uint64_t offset;"
yield " const HoleKind kind;"
yield " const HoleValue value;"
yield " const void *symbol;"
yield " const uint64_t addend;"
yield "} Hole;"
yield ""
yield "typedef struct {"
yield " const size_t body_size;"
yield " const unsigned char * const body;"
yield " const size_t holes_size;"
yield " const Hole * const holes;"
yield "} Stencil;"
yield ""
yield "typedef struct {"
yield " const Stencil code;"
yield " const Stencil data;"
yield "} StencilGroup;"
yield ""
def _dump_footer(opnames: typing.Iterable[str]) -> typing.Iterator[str]:
yield "#define INIT_STENCIL(STENCIL) { \\"
yield " .body_size = Py_ARRAY_LENGTH(STENCIL##_body) - 1, \\"
yield " .body = STENCIL##_body, \\"
yield " .holes_size = Py_ARRAY_LENGTH(STENCIL##_holes) - 1, \\"
yield " .holes = STENCIL##_holes, \\"
yield "}"
yield ""
yield "#define INIT_STENCIL_GROUP(OP) { \\"
yield " .code = INIT_STENCIL(OP##_code), \\"
yield " .data = INIT_STENCIL(OP##_data), \\"
yield "}"
yield ""
yield "static const StencilGroup stencil_groups[512] = {"
for opname in opnames:
yield f" [{opname}] = INIT_STENCIL_GROUP({opname}),"
yield "};"
yield ""
yield "#define GET_PATCHES() { \\"
for value in _stencils.HoleValue:
yield f" [HoleValue_{value.name}] = (uint64_t)0xBADBADBADBADBADB, \\"
yield "}"
def _dump_stencil(opname: str, group: _stencils.StencilGroup) -> typing.Iterator[str]:
yield f"// {opname}"
for part, stencil in [("code", group.code), ("data", group.data)]:
for line in stencil.disassembly:
yield f"// {line}"
if stencil.body:
size = len(stencil.body) + 1
yield f"static const unsigned char {opname}_{part}_body[{size}] = {{"
for i in range(0, len(stencil.body), 8):
row = " ".join(f"{byte:#04x}," for byte in stencil.body[i : i + 8])
yield f" {row}"
yield "};"
else:
yield f"static const unsigned char {opname}_{part}_body[1];"
if stencil.holes:
size = len(stencil.holes) + 1
yield f"static const Hole {opname}_{part}_holes[{size}] = {{"
for hole in stencil.holes:
yield f" {hole.as_c()},"
yield "};"
else:
yield f"static const Hole {opname}_{part}_holes[1];"
yield ""
def dump(groups: dict[str, _stencils.StencilGroup]) -> typing.Iterator[str]:
"""Yield a JIT compiler line-by-line as a C header file."""
yield from _dump_header()
for opname, group in groups.items():
yield from _dump_stencil(opname, group)
yield from _dump_footer(groups)

28
Tools/jit/build.py Normal file
View File

@ -0,0 +1,28 @@
"""Build an experimental just-in-time compiler for CPython."""
import argparse
import pathlib
import shlex
import sys
import _targets
if __name__ == "__main__":
comment = f"$ {shlex.join([sys.executable] + sys.argv)}"
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"target", type=_targets.get_target, help="a PEP 11 target triple to compile for"
)
parser.add_argument(
"-d", "--debug", action="store_true", help="compile for a debug build of Python"
)
parser.add_argument(
"-f", "--force", action="store_true", help="force the entire JIT to be rebuilt"
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="echo commands as they are run"
)
args = parser.parse_args()
args.target.debug = args.debug
args.target.force = args.force
args.target.verbose = args.verbose
args.target.build(pathlib.Path.cwd(), comment=comment)

5
Tools/jit/mypy.ini Normal file
View File

@ -0,0 +1,5 @@
[mypy]
files = Tools/jit
pretty = True
python_version = 3.11
strict = True

98
Tools/jit/template.c Normal file
View File

@ -0,0 +1,98 @@
#include "Python.h"
#include "pycore_call.h"
#include "pycore_ceval.h"
#include "pycore_dict.h"
#include "pycore_emscripten_signal.h"
#include "pycore_intrinsics.h"
#include "pycore_jit.h"
#include "pycore_long.h"
#include "pycore_opcode_metadata.h"
#include "pycore_opcode_utils.h"
#include "pycore_range.h"
#include "pycore_setobject.h"
#include "pycore_sliceobject.h"
#include "ceval_macros.h"
#undef CURRENT_OPARG
#define CURRENT_OPARG() (_oparg)
#undef CURRENT_OPERAND
#define CURRENT_OPERAND() (_operand)
#undef DEOPT_IF
#define DEOPT_IF(COND, INSTNAME) \
do { \
if ((COND)) { \
goto deoptimize; \
} \
} while (0)
#undef ENABLE_SPECIALIZATION
#define ENABLE_SPECIALIZATION (0)
#undef GOTO_ERROR
#define GOTO_ERROR(LABEL) \
do { \
goto LABEL ## _tier_two; \
} while (0)
#undef LOAD_IP
#define LOAD_IP(UNUSED) \
do { \
} while (0)
#define PATCH_VALUE(TYPE, NAME, ALIAS) \
extern void ALIAS; \
TYPE NAME = (TYPE)(uint64_t)&ALIAS;
#define PATCH_JUMP(ALIAS) \
extern void ALIAS; \
__attribute__((musttail)) \
return ((jit_func)&ALIAS)(frame, stack_pointer, tstate);
_Py_CODEUNIT *
_JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate)
{
// Locals that the instruction implementations expect to exist:
PATCH_VALUE(_PyExecutorObject *, current_executor, _JIT_EXECUTOR)
int oparg;
int opcode = _JIT_OPCODE;
_PyUOpInstruction *next_uop;
// Other stuff we need handy:
PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG)
PATCH_VALUE(uint64_t, _operand, _JIT_OPERAND)
PATCH_VALUE(uint32_t, _target, _JIT_TARGET)
// The actual instruction definitions (only one will be used):
if (opcode == _JUMP_TO_TOP) {
CHECK_EVAL_BREAKER();
PATCH_JUMP(_JIT_TOP);
}
switch (opcode) {
#include "executor_cases.c.h"
default:
Py_UNREACHABLE();
}
PATCH_JUMP(_JIT_CONTINUE);
// Labels that the instruction implementations expect to exist:
unbound_local_error_tier_two:
_PyEval_FormatExcCheckArg(
tstate, PyExc_UnboundLocalError, UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg));
goto error_tier_two;
pop_4_error_tier_two:
STACK_SHRINK(1);
pop_3_error_tier_two:
STACK_SHRINK(1);
pop_2_error_tier_two:
STACK_SHRINK(1);
pop_1_error_tier_two:
STACK_SHRINK(1);
error_tier_two:
_PyFrame_SetStackPointer(frame, stack_pointer);
return NULL;
deoptimize:
_PyFrame_SetStackPointer(frame, stack_pointer);
return _PyCode_CODE(_PyFrame_GetCode(frame)) + _target;
}

31
configure generated vendored
View File

@ -920,6 +920,7 @@ LLVM_AR
PROFILE_TASK
DEF_MAKE_RULE
DEF_MAKE_ALL_RULE
REGEN_JIT_COMMAND
ABIFLAGS
LN
MKDIR_P
@ -1074,6 +1075,7 @@ with_pydebug
with_trace_refs
enable_pystats
with_assertions
enable_experimental_jit
enable_optimizations
with_lto
enable_bolt
@ -1801,6 +1803,9 @@ Optional Features:
--disable-gil enable experimental support for running without the
GIL (default is no)
--enable-pystats enable internal statistics gathering (default is no)
--enable-experimental-jit
build the experimental just-in-time compiler
(default is no)
--enable-optimizations enable expensive, stable optimizations (PGO, etc.)
(default is no)
--enable-bolt enable usage of the llvm-bolt post-link optimizer
@ -7997,6 +8002,32 @@ else
printf "%s\n" "no" >&6; }
fi
# Check for --enable-experimental-jit:
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --enable-experimental-jit" >&5
printf %s "checking for --enable-experimental-jit... " >&6; }
# Check whether --enable-experimental-jit was given.
if test ${enable_experimental_jit+y}
then :
enableval=$enable_experimental_jit;
else $as_nop
enable_experimental_jit=no
fi
if test "x$enable_experimental_jit" = xno
then :
else $as_nop
as_fn_append CFLAGS_NODIST " -D_Py_JIT"
REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py $host"
if test "x$Py_DEBUG" = xtrue
then :
as_fn_append REGEN_JIT_COMMAND " --debug"
fi
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_experimental_jit" >&5
printf "%s\n" "$enable_experimental_jit" >&6; }
# Enable optimization flags

View File

@ -1579,6 +1579,26 @@ else
AC_MSG_RESULT([no])
fi
# Check for --enable-experimental-jit:
AC_MSG_CHECKING([for --enable-experimental-jit])
AC_ARG_ENABLE([experimental-jit],
[AS_HELP_STRING([--enable-experimental-jit],
[build the experimental just-in-time compiler (default is no)])],
[],
[enable_experimental_jit=no])
AS_VAR_IF([enable_experimental_jit],
[no],
[],
[AS_VAR_APPEND([CFLAGS_NODIST], [" -D_Py_JIT"])
AS_VAR_SET([REGEN_JIT_COMMAND],
["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py $host"])
AS_VAR_IF([Py_DEBUG],
[true],
[AS_VAR_APPEND([REGEN_JIT_COMMAND], [" --debug"])],
[])])
AC_SUBST([REGEN_JIT_COMMAND])
AC_MSG_RESULT([$enable_experimental_jit])
# Enable optimization flags
AC_SUBST([DEF_MAKE_ALL_RULE])
AC_SUBST([DEF_MAKE_RULE])