mirror of https://github.com/python/cpython
GH-113464: Add a JIT backend for tier 2 (GH-113465)
Add an option (--enable-experimental-jit for configure-based builds or --experimental-jit for PCbuild-based ones) to build an *experimental* just-in-time compiler, based on copy-and-patch (https://fredrikbk.com/publications/copy-and-patch.pdf). See Tools/jit/README.md for more information on how to install the required build-time tooling.
This commit is contained in:
parent
f7c05d7ad3
commit
f6d9e5926b
|
@ -0,0 +1,112 @@
|
|||
name: JIT
|
||||
on:
|
||||
pull_request:
|
||||
paths: '**jit**'
|
||||
push:
|
||||
paths: '**jit**'
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
jit:
|
||||
name: ${{ matrix.target }} (${{ matrix.debug && 'Debug' || 'Release' }})
|
||||
runs-on: ${{ matrix.runner }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
target:
|
||||
- i686-pc-windows-msvc/msvc
|
||||
- x86_64-pc-windows-msvc/msvc
|
||||
- x86_64-apple-darwin/clang
|
||||
- x86_64-unknown-linux-gnu/gcc
|
||||
- x86_64-unknown-linux-gnu/clang
|
||||
- aarch64-unknown-linux-gnu/gcc
|
||||
- aarch64-unknown-linux-gnu/clang
|
||||
debug:
|
||||
- true
|
||||
- false
|
||||
llvm:
|
||||
- 16
|
||||
include:
|
||||
- target: i686-pc-windows-msvc/msvc
|
||||
architecture: Win32
|
||||
runner: windows-latest
|
||||
compiler: msvc
|
||||
- target: x86_64-pc-windows-msvc/msvc
|
||||
architecture: x64
|
||||
runner: windows-latest
|
||||
compiler: msvc
|
||||
- target: x86_64-apple-darwin/clang
|
||||
architecture: x86_64
|
||||
runner: macos-latest
|
||||
compiler: clang
|
||||
exclude: test_embed
|
||||
- target: x86_64-unknown-linux-gnu/gcc
|
||||
architecture: x86_64
|
||||
runner: ubuntu-latest
|
||||
compiler: gcc
|
||||
- target: x86_64-unknown-linux-gnu/clang
|
||||
architecture: x86_64
|
||||
runner: ubuntu-latest
|
||||
compiler: clang
|
||||
- target: aarch64-unknown-linux-gnu/gcc
|
||||
architecture: aarch64
|
||||
runner: ubuntu-latest
|
||||
compiler: gcc
|
||||
# These fail because of emulation, not because of the JIT:
|
||||
exclude: test_unix_events test_init test_process_pool test_shutdown test_multiprocessing_fork test_cmd_line test_faulthandler test_os test_perf_profiler test_posix test_signal test_socket test_subprocess test_threading test_venv
|
||||
- target: aarch64-unknown-linux-gnu/clang
|
||||
architecture: aarch64
|
||||
runner: ubuntu-latest
|
||||
compiler: clang
|
||||
# These fail because of emulation, not because of the JIT:
|
||||
exclude: test_unix_events test_init test_process_pool test_shutdown test_multiprocessing_fork test_cmd_line test_faulthandler test_os test_perf_profiler test_posix test_signal test_socket test_subprocess test_threading test_venv
|
||||
env:
|
||||
CC: ${{ matrix.compiler }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Windows
|
||||
if: runner.os == 'Windows'
|
||||
run: |
|
||||
choco install llvm --allow-downgrade --no-progress --version ${{ matrix.llvm }}
|
||||
./PCbuild/build.bat --experimental-jit ${{ matrix.debug && '-d' || '--pgo' }} -p ${{ matrix.architecture }}
|
||||
./PCbuild/rt.bat ${{ matrix.debug && '-d' }} -p ${{ matrix.architecture }} -q --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3
|
||||
|
||||
- name: macOS
|
||||
if: runner.os == 'macOS'
|
||||
run: |
|
||||
brew install llvm@${{ matrix.llvm }}
|
||||
export SDKROOT="$(xcrun --show-sdk-path)"
|
||||
./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }}
|
||||
make all --jobs 3
|
||||
./python.exe -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3
|
||||
|
||||
- name: Native Linux
|
||||
if: runner.os == 'Linux' && matrix.architecture == 'x86_64'
|
||||
run: |
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }}
|
||||
export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH"
|
||||
./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }}
|
||||
make all --jobs 4
|
||||
./python -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3
|
||||
- name: Emulated Linux
|
||||
if: runner.os == 'Linux' && matrix.architecture != 'x86_64'
|
||||
run: |
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }}
|
||||
export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH"
|
||||
./configure --prefix="$(pwd)/../build"
|
||||
make install --jobs 4
|
||||
make clean --jobs 4
|
||||
export HOST=${{ matrix.architecture }}-linux-gnu
|
||||
sudo apt install --yes "gcc-$HOST" qemu-user
|
||||
${{ !matrix.debug && matrix.compiler == 'clang' && './configure --enable-optimizations' || '' }}
|
||||
${{ !matrix.debug && matrix.compiler == 'clang' && 'make profile-run-stamp --jobs 4' || '' }}
|
||||
export CC="${{ matrix.compiler == 'clang' && 'clang --target=$HOST' || '$HOST-gcc' }}"
|
||||
export CPP="$CC --preprocess"
|
||||
export HOSTRUNNER=qemu-${{ matrix.architecture }}
|
||||
export QEMU_LD_PREFIX="/usr/$HOST"
|
||||
./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} --build=x86_64-linux-gnu --host="$HOST" --with-build-python=../build/bin/python3 --with-pkg-config=no ac_cv_buggy_getaddrinfo=no ac_cv_file__dev_ptc=no ac_cv_file__dev_ptmx=yes
|
||||
make all --jobs 4
|
||||
./python -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3
|
|
@ -12,6 +12,7 @@ on:
|
|||
- "Tools/build/generate_sbom.py"
|
||||
- "Tools/cases_generator/**"
|
||||
- "Tools/clinic/**"
|
||||
- "Tools/jit/**"
|
||||
- "Tools/peg_generator/**"
|
||||
- "Tools/requirements-dev.txt"
|
||||
- "Tools/wasm/**"
|
||||
|
@ -38,6 +39,7 @@ jobs:
|
|||
"Tools/build/",
|
||||
"Tools/cases_generator",
|
||||
"Tools/clinic",
|
||||
"Tools/jit",
|
||||
"Tools/peg_generator",
|
||||
"Tools/wasm",
|
||||
]
|
||||
|
|
|
@ -126,6 +126,7 @@ Tools/unicode/data/
|
|||
# hendrikmuhs/ccache-action@v1
|
||||
/.ccache
|
||||
/cross-build/
|
||||
/jit_stencils.h
|
||||
/platform
|
||||
/profile-clean-stamp
|
||||
/profile-run-stamp
|
||||
|
|
|
@ -39,6 +39,8 @@ typedef struct {
|
|||
typedef struct _PyExecutorObject {
|
||||
PyObject_VAR_HEAD
|
||||
_PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */
|
||||
void *jit_code;
|
||||
size_t jit_size;
|
||||
_PyUOpInstruction trace[1];
|
||||
} _PyExecutorObject;
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
#ifndef Py_INTERNAL_JIT_H
|
||||
#define Py_INTERNAL_JIT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#ifdef _Py_JIT
|
||||
|
||||
typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate);
|
||||
|
||||
int _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length);
|
||||
void _PyJIT_Free(_PyExecutorObject *executor);
|
||||
|
||||
#endif // _Py_JIT
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // !Py_INTERNAL_JIT_H
|
|
@ -178,7 +178,7 @@ _Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct)
|
|||
}
|
||||
_Py_DECREF_STAT_INC();
|
||||
#ifdef Py_REF_DEBUG
|
||||
_Py_DEC_REFTOTAL(_PyInterpreterState_GET());
|
||||
_Py_DEC_REFTOTAL(PyInterpreterState_Get());
|
||||
#endif
|
||||
if (--op->ob_refcnt != 0) {
|
||||
assert(op->ob_refcnt > 0);
|
||||
|
@ -199,7 +199,7 @@ _Py_DECREF_NO_DEALLOC(PyObject *op)
|
|||
}
|
||||
_Py_DECREF_STAT_INC();
|
||||
#ifdef Py_REF_DEBUG
|
||||
_Py_DEC_REFTOTAL(_PyInterpreterState_GET());
|
||||
_Py_DEC_REFTOTAL(PyInterpreterState_Get());
|
||||
#endif
|
||||
op->ob_refcnt--;
|
||||
#ifdef Py_DEBUG
|
||||
|
|
|
@ -433,6 +433,7 @@ PYTHON_OBJS= \
|
|||
Python/initconfig.o \
|
||||
Python/instrumentation.o \
|
||||
Python/intrinsics.o \
|
||||
Python/jit.o \
|
||||
Python/legacy_tracing.o \
|
||||
Python/lock.o \
|
||||
Python/marshal.o \
|
||||
|
@ -1365,7 +1366,7 @@ regen-unicodedata:
|
|||
regen-all: regen-cases regen-typeslots \
|
||||
regen-token regen-ast regen-keyword regen-sre regen-frozen \
|
||||
regen-pegen-metaparser regen-pegen regen-test-frozenmain \
|
||||
regen-test-levenshtein regen-global-objects regen-sbom
|
||||
regen-test-levenshtein regen-global-objects regen-sbom regen-jit
|
||||
@echo
|
||||
@echo "Note: make regen-stdlib-module-names, make regen-limited-abi, "
|
||||
@echo "make regen-configure and make regen-unicodedata should be run manually"
|
||||
|
@ -1846,6 +1847,7 @@ PYTHON_HEADERS= \
|
|||
$(srcdir)/Include/internal/pycore_initconfig.h \
|
||||
$(srcdir)/Include/internal/pycore_interp.h \
|
||||
$(srcdir)/Include/internal/pycore_intrinsics.h \
|
||||
$(srcdir)/Include/internal/pycore_jit.h \
|
||||
$(srcdir)/Include/internal/pycore_list.h \
|
||||
$(srcdir)/Include/internal/pycore_llist.h \
|
||||
$(srcdir)/Include/internal/pycore_lock.h \
|
||||
|
@ -2641,6 +2643,12 @@ config.status: $(srcdir)/configure
|
|||
Python/asm_trampoline.o: $(srcdir)/Python/asm_trampoline.S
|
||||
$(CC) -c $(PY_CORE_CFLAGS) -o $@ $<
|
||||
|
||||
Python/jit.o: regen-jit
|
||||
|
||||
.PHONY: regen-jit
|
||||
regen-jit:
|
||||
@REGEN_JIT_COMMAND@
|
||||
|
||||
# Some make's put the object file in the current directory
|
||||
.c.o:
|
||||
$(CC) -c $(PY_CORE_CFLAGS) -o $@ $<
|
||||
|
@ -2733,6 +2741,7 @@ clean-retain-profile: pycremoval
|
|||
-rm -f Python/deepfreeze/*.[co]
|
||||
-rm -f Python/frozen_modules/*.h
|
||||
-rm -f Python/frozen_modules/MANIFEST
|
||||
-rm -f jit_stencils.h
|
||||
-find build -type f -a ! -name '*.gc??' -exec rm -f {} ';'
|
||||
-rm -f Include/pydtrace_probes.h
|
||||
-rm -f profile-gen-stamp
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
Add an option (``--enable-experimental-jit`` for ``configure``-based builds
|
||||
or ``--experimental-jit`` for ``PCbuild``-based ones) to build an
|
||||
*experimental* just-in-time compiler, based on `copy-and-patch
|
||||
<https://fredrikbk.com/publications/copy-and-patch.pdf>`_
|
|
@ -224,6 +224,7 @@
|
|||
<ClCompile Include="..\Python\initconfig.c" />
|
||||
<ClCompile Include="..\Python\intrinsics.c" />
|
||||
<ClCompile Include="..\Python\instrumentation.c" />
|
||||
<ClCompile Include="..\Python\jit.c" />
|
||||
<ClCompile Include="..\Python\legacy_tracing.c" />
|
||||
<ClCompile Include="..\Python\lock.c" />
|
||||
<ClCompile Include="..\Python\marshal.c" />
|
||||
|
|
|
@ -250,6 +250,9 @@
|
|||
<ClCompile Include="..\Objects\iterobject.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\jit.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Objects\listobject.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -36,6 +36,7 @@ echo. overrides -c and -d
|
|||
echo. --disable-gil Enable experimental support for running without the GIL.
|
||||
echo. --test-marker Enable the test marker within the build.
|
||||
echo. --regen Regenerate all opcodes, grammar and tokens.
|
||||
echo. --experimental-jit Enable the experimental just-in-time compiler.
|
||||
echo.
|
||||
echo.Available flags to avoid building certain modules.
|
||||
echo.These flags have no effect if '-e' is not given:
|
||||
|
@ -85,6 +86,7 @@ if "%~1"=="--disable-gil" (set UseDisableGil=true) & shift & goto CheckOpts
|
|||
if "%~1"=="--test-marker" (set UseTestMarker=true) & shift & goto CheckOpts
|
||||
if "%~1"=="-V" shift & goto Version
|
||||
if "%~1"=="--regen" (set Regen=true) & shift & goto CheckOpts
|
||||
if "%~1"=="--experimental-jit" (set UseJIT=true) & shift & goto CheckOpts
|
||||
rem These use the actual property names used by MSBuild. We could just let
|
||||
rem them in through the environment, but we specify them on the command line
|
||||
rem anyway for visibility so set defaults after this
|
||||
|
@ -176,6 +178,7 @@ echo on
|
|||
/p:IncludeSSL=%IncludeSSL% /p:IncludeTkinter=%IncludeTkinter%^
|
||||
/p:DisableGil=%UseDisableGil%^
|
||||
/p:UseTestMarker=%UseTestMarker% %GITProperty%^
|
||||
/p:UseJIT=%UseJIT%^
|
||||
%1 %2 %3 %4 %5 %6 %7 %8 %9
|
||||
|
||||
@echo off
|
||||
|
|
|
@ -104,6 +104,7 @@
|
|||
<AdditionalIncludeDirectories Condition="$(IncludeExternals)">$(zlibDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>_USRDLL;Py_BUILD_CORE;Py_BUILD_CORE_BUILTIN;Py_ENABLE_SHARED;MS_DLL_ID="$(SysWinVer)";%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions Condition="$(IncludeExternals)">_Py_HAVE_ZLIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions Condition="'$(UseJIT)' == 'true'">_Py_JIT;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>version.lib;ws2_32.lib;pathcch.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
|
@ -247,6 +248,7 @@
|
|||
<ClInclude Include="..\Include\internal\pycore_initconfig.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_interp.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_intrinsics.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_jit.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_list.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_llist.h" />
|
||||
<ClInclude Include="..\Include\internal\pycore_lock.h" />
|
||||
|
@ -585,6 +587,7 @@
|
|||
<ClCompile Include="..\Python\initconfig.c" />
|
||||
<ClCompile Include="..\Python\intrinsics.c" />
|
||||
<ClCompile Include="..\Python\instrumentation.c" />
|
||||
<ClCompile Include="..\Python\jit.c" />
|
||||
<ClCompile Include="..\Python\legacy_tracing.c" />
|
||||
<ClCompile Include="..\Python\lock.c" />
|
||||
<ClCompile Include="..\Python\marshal.c" />
|
||||
|
|
|
@ -669,6 +669,9 @@
|
|||
<ClInclude Include="..\Include\internal\pycore_intrinsics.h">
|
||||
<Filter>Include\cpython</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_jit.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Include\internal\pycore_list.h">
|
||||
<Filter>Include\internal</Filter>
|
||||
</ClInclude>
|
||||
|
@ -1337,6 +1340,9 @@
|
|||
<ClCompile Include="..\Python\instrumentation.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\jit.c">
|
||||
<Filter>Python</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\legacy_tracing.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -28,6 +28,9 @@
|
|||
</_TokenOutputs>
|
||||
<_KeywordSources Include="$(PySourcePath)Grammar\python.gram;$(PySourcePath)Grammar\Tokens" />
|
||||
<_KeywordOutputs Include="$(PySourcePath)Lib\keyword.py" />
|
||||
<!-- Taken from _Target._compute_digest in Tools\jit\_targets.py: -->
|
||||
<_JITSources Include="$(PySourcePath)Python\executor_cases.c.h;$(GeneratedPyConfigDir)pyconfig.h;$(PySourcePath)Tools\jit\**"/>
|
||||
<_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils.h"/>
|
||||
</ItemGroup>
|
||||
|
||||
<Target Name="_TouchRegenSources" Condition="$(ForceRegen) == 'true'">
|
||||
|
@ -76,10 +79,28 @@
|
|||
<Exec Command="$(PythonForBuild) Tools\build\generate_global_objects.py"
|
||||
WorkingDirectory="$(PySourcePath)" />
|
||||
</Target>
|
||||
|
||||
<Target Name="_RegenJIT"
|
||||
Condition="'$(UseJIT)' == 'true'"
|
||||
DependsOnTargets="_UpdatePyconfig;FindPythonForBuild"
|
||||
Inputs="@(_JITSources)"
|
||||
Outputs="@(_JITOutputs)">
|
||||
<PropertyGroup>
|
||||
<JITArgs Condition="$(Platform) == 'ARM64'">aarch64-pc-windows-msvc</JITArgs>
|
||||
<JITArgs Condition="$(Platform) == 'Win32'">i686-pc-windows-msvc</JITArgs>
|
||||
<JITArgs Condition="$(Platform) == 'x64'">x86_64-pc-windows-msvc</JITArgs>
|
||||
<JITArgs Condition="$(Configuration) == 'Debug'">$(JITArgs) --debug</JITArgs>
|
||||
</PropertyGroup>
|
||||
<Exec Command='$(PythonForBuild) "$(PySourcePath)Tools\jit\build.py" $(JITArgs)'
|
||||
WorkingDirectory="$(GeneratedPyConfigDir)"/>
|
||||
</Target>
|
||||
|
||||
<Target Name="Regen"
|
||||
<Target Name="_RegenNoPGUpdate"
|
||||
Condition="$(Configuration) != 'PGUpdate'"
|
||||
DependsOnTargets="_TouchRegenSources;_RegenPegen;_RegenAST_H;_RegenTokens;_RegenKeywords;_RegenGlobalObjects">
|
||||
</Target>
|
||||
|
||||
<Target Name="Regen" DependsOnTargets="_RegenNoPGUpdate;_RegenJIT">
|
||||
<Message Text="Generated sources are up to date" Importance="high" />
|
||||
</Target>
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "pycore_function.h"
|
||||
#include "pycore_instruments.h"
|
||||
#include "pycore_intrinsics.h"
|
||||
#include "pycore_jit.h"
|
||||
#include "pycore_long.h" // _PyLong_GetZero()
|
||||
#include "pycore_moduleobject.h" // PyModuleObject
|
||||
#include "pycore_object.h" // _PyObject_GC_TRACK()
|
||||
|
@ -955,9 +956,24 @@ resume_with_error:
|
|||
|
||||
|
||||
|
||||
// The Tier 2 interpreter is also here!
|
||||
// Tier 2 is also here!
|
||||
enter_tier_two:
|
||||
|
||||
#ifdef _Py_JIT
|
||||
|
||||
; // ;)
|
||||
jit_func jitted = current_executor->jit_code;
|
||||
next_instr = jitted(frame, stack_pointer, tstate);
|
||||
frame = tstate->current_frame;
|
||||
Py_DECREF(current_executor);
|
||||
if (next_instr == NULL) {
|
||||
goto resume_with_error;
|
||||
}
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame);
|
||||
DISPATCH();
|
||||
|
||||
#else
|
||||
|
||||
#undef LOAD_IP
|
||||
#define LOAD_IP(UNUSED) (void)0
|
||||
|
||||
|
@ -1073,6 +1089,8 @@ deoptimize:
|
|||
Py_DECREF(current_executor);
|
||||
DISPATCH();
|
||||
|
||||
#endif // _Py_JIT
|
||||
|
||||
}
|
||||
#if defined(__GNUC__)
|
||||
# pragma GCC diagnostic pop
|
||||
|
|
|
@ -0,0 +1,369 @@
|
|||
#ifdef _Py_JIT
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
#include "pycore_abstract.h"
|
||||
#include "pycore_call.h"
|
||||
#include "pycore_ceval.h"
|
||||
#include "pycore_dict.h"
|
||||
#include "pycore_intrinsics.h"
|
||||
#include "pycore_long.h"
|
||||
#include "pycore_opcode_metadata.h"
|
||||
#include "pycore_opcode_utils.h"
|
||||
#include "pycore_optimizer.h"
|
||||
#include "pycore_pyerrors.h"
|
||||
#include "pycore_setobject.h"
|
||||
#include "pycore_sliceobject.h"
|
||||
#include "pycore_jit.h"
|
||||
|
||||
#include "jit_stencils.h"
|
||||
|
||||
// Memory management stuff: ////////////////////////////////////////////////////
|
||||
|
||||
#ifndef MS_WINDOWS
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
static size_t
|
||||
get_page_size(void)
|
||||
{
|
||||
#ifdef MS_WINDOWS
|
||||
SYSTEM_INFO si;
|
||||
GetSystemInfo(&si);
|
||||
return si.dwPageSize;
|
||||
#else
|
||||
return sysconf(_SC_PAGESIZE);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
jit_error(const char *message)
|
||||
{
|
||||
#ifdef MS_WINDOWS
|
||||
int hint = GetLastError();
|
||||
#else
|
||||
int hint = errno;
|
||||
#endif
|
||||
PyErr_Format(PyExc_RuntimeWarning, "JIT %s (%d)", message, hint);
|
||||
}
|
||||
|
||||
static char *
|
||||
jit_alloc(size_t size)
|
||||
{
|
||||
assert(size);
|
||||
assert(size % get_page_size() == 0);
|
||||
#ifdef MS_WINDOWS
|
||||
int flags = MEM_COMMIT | MEM_RESERVE;
|
||||
char *memory = VirtualAlloc(NULL, size, flags, PAGE_READWRITE);
|
||||
int failed = memory == NULL;
|
||||
#else
|
||||
int flags = MAP_ANONYMOUS | MAP_PRIVATE;
|
||||
char *memory = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
|
||||
int failed = memory == MAP_FAILED;
|
||||
#endif
|
||||
if (failed) {
|
||||
jit_error("unable to allocate memory");
|
||||
return NULL;
|
||||
}
|
||||
return memory;
|
||||
}
|
||||
|
||||
static int
|
||||
jit_free(char *memory, size_t size)
|
||||
{
|
||||
assert(size);
|
||||
assert(size % get_page_size() == 0);
|
||||
#ifdef MS_WINDOWS
|
||||
int failed = !VirtualFree(memory, 0, MEM_RELEASE);
|
||||
#else
|
||||
int failed = munmap(memory, size);
|
||||
#endif
|
||||
if (failed) {
|
||||
jit_error("unable to free memory");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
mark_executable(char *memory, size_t size)
|
||||
{
|
||||
if (size == 0) {
|
||||
return 0;
|
||||
}
|
||||
assert(size % get_page_size() == 0);
|
||||
// Do NOT ever leave the memory writable! Also, don't forget to flush the
|
||||
// i-cache (I cannot begin to tell you how horrible that is to debug):
|
||||
#ifdef MS_WINDOWS
|
||||
if (!FlushInstructionCache(GetCurrentProcess(), memory, size)) {
|
||||
jit_error("unable to flush instruction cache");
|
||||
return -1;
|
||||
}
|
||||
int old;
|
||||
int failed = !VirtualProtect(memory, size, PAGE_EXECUTE_READ, &old);
|
||||
#else
|
||||
__builtin___clear_cache((char *)memory, (char *)memory + size);
|
||||
int failed = mprotect(memory, size, PROT_EXEC | PROT_READ);
|
||||
#endif
|
||||
if (failed) {
|
||||
jit_error("unable to protect executable memory");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
mark_readable(char *memory, size_t size)
|
||||
{
|
||||
if (size == 0) {
|
||||
return 0;
|
||||
}
|
||||
assert(size % get_page_size() == 0);
|
||||
#ifdef MS_WINDOWS
|
||||
DWORD old;
|
||||
int failed = !VirtualProtect(memory, size, PAGE_READONLY, &old);
|
||||
#else
|
||||
int failed = mprotect(memory, size, PROT_READ);
|
||||
#endif
|
||||
if (failed) {
|
||||
jit_error("unable to protect readable memory");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// JIT compiler stuff: /////////////////////////////////////////////////////////
|
||||
|
||||
// Warning! AArch64 requires you to get your hands dirty. These are your gloves:
|
||||
|
||||
// value[value_start : value_start + len]
|
||||
static uint32_t
|
||||
get_bits(uint64_t value, uint8_t value_start, uint8_t width)
|
||||
{
|
||||
assert(width <= 32);
|
||||
return (value >> value_start) & ((1ULL << width) - 1);
|
||||
}
|
||||
|
||||
// *loc[loc_start : loc_start + width] = value[value_start : value_start + width]
|
||||
static void
|
||||
set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start,
|
||||
uint8_t width)
|
||||
{
|
||||
assert(loc_start + width <= 32);
|
||||
// Clear the bits we're about to patch:
|
||||
*loc &= ~(((1ULL << width) - 1) << loc_start);
|
||||
assert(get_bits(*loc, loc_start, width) == 0);
|
||||
// Patch the bits:
|
||||
*loc |= get_bits(value, value_start, width) << loc_start;
|
||||
assert(get_bits(*loc, loc_start, width) == get_bits(value, value_start, width));
|
||||
}
|
||||
|
||||
// See https://developer.arm.com/documentation/ddi0602/2023-09/Base-Instructions
|
||||
// for instruction encodings:
|
||||
#define IS_AARCH64_ADD_OR_SUB(I) (((I) & 0x11C00000) == 0x11000000)
|
||||
#define IS_AARCH64_ADRP(I) (((I) & 0x9F000000) == 0x90000000)
|
||||
#define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000)
|
||||
#define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000)
|
||||
#define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000)
|
||||
|
||||
// Fill all of stencil's holes in the memory pointed to by base, using the
|
||||
// values in patches.
|
||||
static void
|
||||
patch(char *base, const Stencil *stencil, uint64_t *patches)
|
||||
{
|
||||
for (uint64_t i = 0; i < stencil->holes_size; i++) {
|
||||
const Hole *hole = &stencil->holes[i];
|
||||
void *location = base + hole->offset;
|
||||
uint64_t value = patches[hole->value] + (uint64_t)hole->symbol + hole->addend;
|
||||
uint32_t *loc32 = (uint32_t *)location;
|
||||
uint64_t *loc64 = (uint64_t *)location;
|
||||
// LLD is a great reference for performing relocations... just keep in
|
||||
// mind that Tools/jit/build.py does filtering and preprocessing for us!
|
||||
// Here's a good place to start for each platform:
|
||||
// - aarch64-apple-darwin:
|
||||
// - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.cpp
|
||||
// - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.h
|
||||
// - aarch64-unknown-linux-gnu:
|
||||
// - https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/AArch64.cpp
|
||||
// - i686-pc-windows-msvc:
|
||||
// - https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp
|
||||
// - x86_64-apple-darwin:
|
||||
// - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/X86_64.cpp
|
||||
// - x86_64-pc-windows-msvc:
|
||||
// - https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp
|
||||
// - x86_64-unknown-linux-gnu:
|
||||
// - https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/X86_64.cpp
|
||||
switch (hole->kind) {
|
||||
case HoleKind_IMAGE_REL_I386_DIR32:
|
||||
// 32-bit absolute address.
|
||||
// Check that we're not out of range of 32 unsigned bits:
|
||||
assert(value < (1ULL << 32));
|
||||
*loc32 = (uint32_t)value;
|
||||
continue;
|
||||
case HoleKind_ARM64_RELOC_UNSIGNED:
|
||||
case HoleKind_IMAGE_REL_AMD64_ADDR64:
|
||||
case HoleKind_R_AARCH64_ABS64:
|
||||
case HoleKind_X86_64_RELOC_UNSIGNED:
|
||||
case HoleKind_R_X86_64_64:
|
||||
// 64-bit absolute address.
|
||||
*loc64 = value;
|
||||
continue;
|
||||
case HoleKind_R_AARCH64_CALL26:
|
||||
case HoleKind_R_AARCH64_JUMP26:
|
||||
// 28-bit relative branch.
|
||||
assert(IS_AARCH64_BRANCH(*loc32));
|
||||
value -= (uint64_t)location;
|
||||
// Check that we're not out of range of 28 signed bits:
|
||||
assert((int64_t)value >= -(1 << 27));
|
||||
assert((int64_t)value < (1 << 27));
|
||||
// Since instructions are 4-byte aligned, only use 26 bits:
|
||||
assert(get_bits(value, 0, 2) == 0);
|
||||
set_bits(loc32, 0, value, 2, 26);
|
||||
continue;
|
||||
case HoleKind_R_AARCH64_MOVW_UABS_G0_NC:
|
||||
// 16-bit low part of an absolute address.
|
||||
assert(IS_AARCH64_MOV(*loc32));
|
||||
// Check the implicit shift (this is "part 0 of 3"):
|
||||
assert(get_bits(*loc32, 21, 2) == 0);
|
||||
set_bits(loc32, 5, value, 0, 16);
|
||||
continue;
|
||||
case HoleKind_R_AARCH64_MOVW_UABS_G1_NC:
|
||||
// 16-bit middle-low part of an absolute address.
|
||||
assert(IS_AARCH64_MOV(*loc32));
|
||||
// Check the implicit shift (this is "part 1 of 3"):
|
||||
assert(get_bits(*loc32, 21, 2) == 1);
|
||||
set_bits(loc32, 5, value, 16, 16);
|
||||
continue;
|
||||
case HoleKind_R_AARCH64_MOVW_UABS_G2_NC:
|
||||
// 16-bit middle-high part of an absolute address.
|
||||
assert(IS_AARCH64_MOV(*loc32));
|
||||
// Check the implicit shift (this is "part 2 of 3"):
|
||||
assert(get_bits(*loc32, 21, 2) == 2);
|
||||
set_bits(loc32, 5, value, 32, 16);
|
||||
continue;
|
||||
case HoleKind_R_AARCH64_MOVW_UABS_G3:
|
||||
// 16-bit high part of an absolute address.
|
||||
assert(IS_AARCH64_MOV(*loc32));
|
||||
// Check the implicit shift (this is "part 3 of 3"):
|
||||
assert(get_bits(*loc32, 21, 2) == 3);
|
||||
set_bits(loc32, 5, value, 48, 16);
|
||||
continue;
|
||||
case HoleKind_ARM64_RELOC_GOT_LOAD_PAGE21:
|
||||
// 21-bit count of pages between this page and an absolute address's
|
||||
// page... I know, I know, it's weird. Pairs nicely with
|
||||
// ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below).
|
||||
assert(IS_AARCH64_ADRP(*loc32));
|
||||
// Number of pages between this page and the value's page:
|
||||
value = (value >> 12) - ((uint64_t)location >> 12);
|
||||
// Check that we're not out of range of 21 signed bits:
|
||||
assert((int64_t)value >= -(1 << 20));
|
||||
assert((int64_t)value < (1 << 20));
|
||||
// value[0:2] goes in loc[29:31]:
|
||||
set_bits(loc32, 29, value, 0, 2);
|
||||
// value[2:21] goes in loc[5:26]:
|
||||
set_bits(loc32, 5, value, 2, 19);
|
||||
continue;
|
||||
case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12:
|
||||
// 12-bit low part of an absolute address. Pairs nicely with
|
||||
// ARM64_RELOC_GOT_LOAD_PAGE21 (above).
|
||||
assert(IS_AARCH64_LDR_OR_STR(*loc32) || IS_AARCH64_ADD_OR_SUB(*loc32));
|
||||
// There might be an implicit shift encoded in the instruction:
|
||||
uint8_t shift = 0;
|
||||
if (IS_AARCH64_LDR_OR_STR(*loc32)) {
|
||||
shift = (uint8_t)get_bits(*loc32, 30, 2);
|
||||
// If both of these are set, the shift is supposed to be 4.
|
||||
// That's pretty weird, and it's never actually been observed...
|
||||
assert(get_bits(*loc32, 23, 1) == 0 || get_bits(*loc32, 26, 1) == 0);
|
||||
}
|
||||
value = get_bits(value, 0, 12);
|
||||
assert(get_bits(value, 0, shift) == 0);
|
||||
set_bits(loc32, 10, value, shift, 12);
|
||||
continue;
|
||||
}
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
copy_and_patch(char *base, const Stencil *stencil, uint64_t *patches)
|
||||
{
|
||||
memcpy(base, stencil->body, stencil->body_size);
|
||||
patch(base, stencil, patches);
|
||||
}
|
||||
|
||||
static void
|
||||
emit(const StencilGroup *group, uint64_t patches[])
|
||||
{
|
||||
copy_and_patch((char *)patches[HoleValue_CODE], &group->code, patches);
|
||||
copy_and_patch((char *)patches[HoleValue_DATA], &group->data, patches);
|
||||
}
|
||||
|
||||
// Compiles executor in-place. Don't forget to call _PyJIT_Free later!
|
||||
int
|
||||
_PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length)
|
||||
{
|
||||
// Loop once to find the total compiled size:
|
||||
size_t code_size = 0;
|
||||
size_t data_size = 0;
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
_PyUOpInstruction *instruction = &trace[i];
|
||||
const StencilGroup *group = &stencil_groups[instruction->opcode];
|
||||
code_size += group->code.body_size;
|
||||
data_size += group->data.body_size;
|
||||
}
|
||||
// Round up to the nearest page (code and data need separate pages):
|
||||
size_t page_size = get_page_size();
|
||||
assert((page_size & (page_size - 1)) == 0);
|
||||
code_size += page_size - (code_size & (page_size - 1));
|
||||
data_size += page_size - (data_size & (page_size - 1));
|
||||
char *memory = jit_alloc(code_size + data_size);
|
||||
if (memory == NULL) {
|
||||
return -1;
|
||||
}
|
||||
// Loop again to emit the code:
|
||||
char *code = memory;
|
||||
char *data = memory + code_size;
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
_PyUOpInstruction *instruction = &trace[i];
|
||||
const StencilGroup *group = &stencil_groups[instruction->opcode];
|
||||
// Think of patches as a dictionary mapping HoleValue to uint64_t:
|
||||
uint64_t patches[] = GET_PATCHES();
|
||||
patches[HoleValue_CODE] = (uint64_t)code;
|
||||
patches[HoleValue_CONTINUE] = (uint64_t)code + group->code.body_size;
|
||||
patches[HoleValue_DATA] = (uint64_t)data;
|
||||
patches[HoleValue_EXECUTOR] = (uint64_t)executor;
|
||||
patches[HoleValue_OPARG] = instruction->oparg;
|
||||
patches[HoleValue_OPERAND] = instruction->operand;
|
||||
patches[HoleValue_TARGET] = instruction->target;
|
||||
patches[HoleValue_TOP] = (uint64_t)memory;
|
||||
patches[HoleValue_ZERO] = 0;
|
||||
emit(group, patches);
|
||||
code += group->code.body_size;
|
||||
data += group->data.body_size;
|
||||
}
|
||||
if (mark_executable(memory, code_size) ||
|
||||
mark_readable(memory + code_size, data_size))
|
||||
{
|
||||
jit_free(memory, code_size + data_size);
|
||||
return -1;
|
||||
}
|
||||
executor->jit_code = memory;
|
||||
executor->jit_size = code_size + data_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
_PyJIT_Free(_PyExecutorObject *executor)
|
||||
{
|
||||
char *memory = (char *)executor->jit_code;
|
||||
size_t size = executor->jit_size;
|
||||
if (memory) {
|
||||
executor->jit_code = NULL;
|
||||
executor->jit_size = 0;
|
||||
if (jit_free(memory, size)) {
|
||||
PyErr_WriteUnraisable(NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // _Py_JIT
|
|
@ -7,6 +7,7 @@
|
|||
#include "pycore_optimizer.h" // _Py_uop_analyze_and_optimize()
|
||||
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
||||
#include "pycore_uop_ids.h"
|
||||
#include "pycore_jit.h"
|
||||
#include "cpython/optimizer.h"
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
@ -227,6 +228,9 @@ static PyMethodDef executor_methods[] = {
|
|||
static void
|
||||
uop_dealloc(_PyExecutorObject *self) {
|
||||
_Py_ExecutorClear(self);
|
||||
#ifdef _Py_JIT
|
||||
_PyJIT_Free(self);
|
||||
#endif
|
||||
PyObject_Free(self);
|
||||
}
|
||||
|
||||
|
@ -789,6 +793,14 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
|
|||
executor->trace[i].operand);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef _Py_JIT
|
||||
executor->jit_code = NULL;
|
||||
executor->jit_size = 0;
|
||||
if (_PyJIT_Compile(executor, executor->trace, Py_SIZE(executor))) {
|
||||
Py_DECREF(executor);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
return executor;
|
||||
}
|
||||
|
|
|
@ -1240,12 +1240,19 @@ init_interp_main(PyThreadState *tstate)
|
|||
|
||||
// Turn on experimental tier 2 (uops-based) optimizer
|
||||
if (is_main_interp) {
|
||||
#ifndef _Py_JIT
|
||||
// No JIT, maybe use the tier two interpreter:
|
||||
char *envvar = Py_GETENV("PYTHON_UOPS");
|
||||
int enabled = envvar != NULL && *envvar > '0';
|
||||
if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) {
|
||||
enabled = 1;
|
||||
}
|
||||
if (enabled) {
|
||||
#else
|
||||
// Always enable tier two for JIT builds (ignoring the environment
|
||||
// variable and command-line option above):
|
||||
if (true) {
|
||||
#endif
|
||||
PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer();
|
||||
if (opt == NULL) {
|
||||
return _PyStatus_ERR("can't initialize optimizer");
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
The JIT Compiler
|
||||
================
|
||||
|
||||
This version of CPython can be built with an experimental just-in-time compiler. While most everything you already know about building and using CPython is unchanged, you will probably need to install a compatible version of LLVM first.
|
||||
|
||||
## Installing LLVM
|
||||
|
||||
The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
|
||||
|
||||
LLVM version 16 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-16`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
|
||||
|
||||
It's easy to install all of the required tools:
|
||||
|
||||
### Linux
|
||||
|
||||
Install LLVM 16 on Ubuntu/Debian:
|
||||
|
||||
```sh
|
||||
wget https://apt.llvm.org/llvm.sh
|
||||
chmod +x llvm.sh
|
||||
sudo ./llvm.sh 16
|
||||
```
|
||||
|
||||
### macOS
|
||||
|
||||
Install LLVM 16 with [Homebrew](https://brew.sh):
|
||||
|
||||
```sh
|
||||
brew install llvm@16
|
||||
```
|
||||
|
||||
Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
|
||||
|
||||
### Windows
|
||||
|
||||
Install LLVM 16 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=16), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
|
||||
|
||||
## Building
|
||||
|
||||
For `PCbuild`-based builds, pass the new `--experimental-jit` option to `build.bat`.
|
||||
|
||||
For all other builds, pass the new `--enable-experimental-jit` option to `configure`.
|
||||
|
||||
Otherwise, just configure and build as you normally would. Cross-compiling "just works", since the JIT is built for the host platform.
|
||||
|
||||
[^why-llvm]: Clang is specifically needed because it's the only C compiler with support for guaranteed tail calls (`musttail`), which are required by CPython's continuation-passing-style approach to JIT compilation. Since LLVM also includes other functionalities we need (namely, object file parsing and disassembly), it's convenient to only support one toolchain at this time.
|
|
@ -0,0 +1,99 @@
|
|||
"""Utilities for invoking LLVM tools."""
|
||||
import asyncio
|
||||
import functools
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
import typing
|
||||
|
||||
_LLVM_VERSION = 16
|
||||
_LLVM_VERSION_PATTERN = re.compile(rf"version\s+{_LLVM_VERSION}\.\d+\.\d+\s+")
|
||||
|
||||
_P = typing.ParamSpec("_P")
|
||||
_R = typing.TypeVar("_R")
|
||||
_C = typing.Callable[_P, typing.Awaitable[_R]]
|
||||
|
||||
|
||||
def _async_cache(f: _C[_P, _R]) -> _C[_P, _R]:
|
||||
cache = {}
|
||||
lock = asyncio.Lock()
|
||||
|
||||
@functools.wraps(f)
|
||||
async def wrapper(
|
||||
*args: _P.args, **kwargs: _P.kwargs # pylint: disable = no-member
|
||||
) -> _R:
|
||||
async with lock:
|
||||
if args not in cache:
|
||||
cache[args] = await f(*args, **kwargs)
|
||||
return cache[args]
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
_CORES = asyncio.BoundedSemaphore(os.cpu_count() or 1)
|
||||
|
||||
|
||||
async def _run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str | None:
|
||||
command = [tool, *args]
|
||||
async with _CORES:
|
||||
if echo:
|
||||
print(shlex.join(command))
|
||||
try:
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
*command, stdout=subprocess.PIPE
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
out, _ = await process.communicate()
|
||||
if process.returncode:
|
||||
raise RuntimeError(f"{tool} exited with return code {process.returncode}")
|
||||
return out.decode()
|
||||
|
||||
|
||||
@_async_cache
|
||||
async def _check_tool_version(name: str, *, echo: bool = False) -> bool:
|
||||
output = await _run(name, ["--version"], echo=echo)
|
||||
return bool(output and _LLVM_VERSION_PATTERN.search(output))
|
||||
|
||||
|
||||
@_async_cache
|
||||
async def _get_brew_llvm_prefix(*, echo: bool = False) -> str | None:
|
||||
output = await _run("brew", ["--prefix", f"llvm@{_LLVM_VERSION}"], echo=echo)
|
||||
return output and output.removesuffix("\n")
|
||||
|
||||
|
||||
@_async_cache
|
||||
async def _find_tool(tool: str, *, echo: bool = False) -> str | None:
|
||||
# Unversioned executables:
|
||||
path = tool
|
||||
if await _check_tool_version(path, echo=echo):
|
||||
return path
|
||||
# Versioned executables:
|
||||
path = f"{tool}-{_LLVM_VERSION}"
|
||||
if await _check_tool_version(path, echo=echo):
|
||||
return path
|
||||
# Homebrew-installed executables:
|
||||
prefix = await _get_brew_llvm_prefix(echo=echo)
|
||||
if prefix is not None:
|
||||
path = os.path.join(prefix, "bin", tool)
|
||||
if await _check_tool_version(path, echo=echo):
|
||||
return path
|
||||
# Nothing found:
|
||||
return None
|
||||
|
||||
|
||||
async def maybe_run(
|
||||
tool: str, args: typing.Iterable[str], echo: bool = False
|
||||
) -> str | None:
|
||||
"""Run an LLVM tool if it can be found. Otherwise, return None."""
|
||||
path = await _find_tool(tool, echo=echo)
|
||||
return path and await _run(path, args, echo=echo)
|
||||
|
||||
|
||||
async def run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str:
|
||||
"""Run an LLVM tool if it can be found. Otherwise, raise RuntimeError."""
|
||||
output = await maybe_run(tool, args, echo=echo)
|
||||
if output is None:
|
||||
raise RuntimeError(f"Can't find {tool}-{_LLVM_VERSION}!")
|
||||
return output
|
|
@ -0,0 +1,99 @@
|
|||
"""Schema for the JSON produced by llvm-readobj --elf-output-style=JSON."""
|
||||
import typing
|
||||
|
||||
HoleKind: typing.TypeAlias = typing.Literal[
|
||||
"ARM64_RELOC_GOT_LOAD_PAGE21",
|
||||
"ARM64_RELOC_GOT_LOAD_PAGEOFF12",
|
||||
"ARM64_RELOC_UNSIGNED",
|
||||
"IMAGE_REL_AMD64_ADDR64",
|
||||
"IMAGE_REL_I386_DIR32",
|
||||
"R_AARCH64_ABS64",
|
||||
"R_AARCH64_CALL26",
|
||||
"R_AARCH64_JUMP26",
|
||||
"R_AARCH64_MOVW_UABS_G0_NC",
|
||||
"R_AARCH64_MOVW_UABS_G1_NC",
|
||||
"R_AARCH64_MOVW_UABS_G2_NC",
|
||||
"R_AARCH64_MOVW_UABS_G3",
|
||||
"R_X86_64_64",
|
||||
"X86_64_RELOC_UNSIGNED",
|
||||
]
|
||||
|
||||
|
||||
class COFFRelocation(typing.TypedDict):
|
||||
"""A COFF object file relocation record."""
|
||||
|
||||
Type: dict[typing.Literal["Value"], HoleKind]
|
||||
Symbol: str
|
||||
Offset: int
|
||||
|
||||
|
||||
class ELFRelocation(typing.TypedDict):
|
||||
"""An ELF object file relocation record."""
|
||||
|
||||
Addend: int
|
||||
Offset: int
|
||||
Symbol: dict[typing.Literal["Value"], str]
|
||||
Type: dict[typing.Literal["Value"], HoleKind]
|
||||
|
||||
|
||||
class MachORelocation(typing.TypedDict):
|
||||
"""A Mach-O object file relocation record."""
|
||||
|
||||
Offset: int
|
||||
Section: typing.NotRequired[dict[typing.Literal["Value"], str]]
|
||||
Symbol: typing.NotRequired[dict[typing.Literal["Value"], str]]
|
||||
Type: dict[typing.Literal["Value"], HoleKind]
|
||||
|
||||
|
||||
class _COFFSymbol(typing.TypedDict):
|
||||
Name: str
|
||||
Value: int
|
||||
|
||||
|
||||
class _ELFSymbol(typing.TypedDict):
|
||||
Name: dict[typing.Literal["Value"], str]
|
||||
Value: int
|
||||
|
||||
|
||||
class _MachOSymbol(typing.TypedDict):
|
||||
Name: dict[typing.Literal["Value"], str]
|
||||
Value: int
|
||||
|
||||
|
||||
class COFFSection(typing.TypedDict):
|
||||
"""A COFF object file section."""
|
||||
|
||||
Characteristics: dict[
|
||||
typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]
|
||||
]
|
||||
Number: int
|
||||
RawDataSize: int
|
||||
Relocations: list[dict[typing.Literal["Relocation"], COFFRelocation]]
|
||||
SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]]
|
||||
Symbols: list[dict[typing.Literal["Symbol"], _COFFSymbol]]
|
||||
|
||||
|
||||
class ELFSection(typing.TypedDict):
|
||||
"""An ELF object file section."""
|
||||
|
||||
Flags: dict[typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]]
|
||||
Index: int
|
||||
Info: int
|
||||
Relocations: list[dict[typing.Literal["Relocation"], ELFRelocation]]
|
||||
SectionData: dict[typing.Literal["Bytes"], list[int]]
|
||||
Symbols: list[dict[typing.Literal["Symbol"], _ELFSymbol]]
|
||||
Type: dict[typing.Literal["Value"], str]
|
||||
|
||||
|
||||
class MachOSection(typing.TypedDict):
|
||||
"""A Mach-O object file section."""
|
||||
|
||||
Address: int
|
||||
Attributes: dict[typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]]
|
||||
Index: int
|
||||
Name: dict[typing.Literal["Value"], str]
|
||||
Relocations: typing.NotRequired[
|
||||
list[dict[typing.Literal["Relocation"], MachORelocation]]
|
||||
]
|
||||
SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]]
|
||||
Symbols: typing.NotRequired[list[dict[typing.Literal["Symbol"], _MachOSymbol]]]
|
|
@ -0,0 +1,220 @@
|
|||
"""Core data structures for compiled code templates."""
|
||||
import dataclasses
|
||||
import enum
|
||||
import sys
|
||||
|
||||
import _schema
|
||||
|
||||
|
||||
@enum.unique
|
||||
class HoleValue(enum.Enum):
|
||||
"""
|
||||
Different "base" values that can be patched into holes (usually combined with the
|
||||
address of a symbol and/or an addend).
|
||||
"""
|
||||
|
||||
# The base address of the machine code for the current uop (exposed as _JIT_ENTRY):
|
||||
CODE = enum.auto()
|
||||
# The base address of the machine code for the next uop (exposed as _JIT_CONTINUE):
|
||||
CONTINUE = enum.auto()
|
||||
# The base address of the read-only data for this uop:
|
||||
DATA = enum.auto()
|
||||
# The address of the current executor (exposed as _JIT_EXECUTOR):
|
||||
EXECUTOR = enum.auto()
|
||||
# The base address of the "global" offset table located in the read-only data.
|
||||
# Shouldn't be present in the final stencils, since these are all replaced with
|
||||
# equivalent DATA values:
|
||||
GOT = enum.auto()
|
||||
# The current uop's oparg (exposed as _JIT_OPARG):
|
||||
OPARG = enum.auto()
|
||||
# The current uop's operand (exposed as _JIT_OPERAND):
|
||||
OPERAND = enum.auto()
|
||||
# The current uop's target (exposed as _JIT_TARGET):
|
||||
TARGET = enum.auto()
|
||||
# The base address of the machine code for the first uop (exposed as _JIT_TOP):
|
||||
TOP = enum.auto()
|
||||
# A hardcoded value of zero (used for symbol lookups):
|
||||
ZERO = enum.auto()
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Hole:
|
||||
"""
|
||||
A "hole" in the stencil to be patched with a computed runtime value.
|
||||
|
||||
Analogous to relocation records in an object file.
|
||||
"""
|
||||
|
||||
offset: int
|
||||
kind: _schema.HoleKind
|
||||
# Patch with this base value:
|
||||
value: HoleValue
|
||||
# ...plus the address of this symbol:
|
||||
symbol: str | None
|
||||
# ...plus this addend:
|
||||
addend: int
|
||||
# Convenience method:
|
||||
replace = dataclasses.replace
|
||||
|
||||
def as_c(self) -> str:
|
||||
"""Dump this hole as an initialization of a C Hole struct."""
|
||||
parts = [
|
||||
f"{self.offset:#x}",
|
||||
f"HoleKind_{self.kind}",
|
||||
f"HoleValue_{self.value.name}",
|
||||
f"&{self.symbol}" if self.symbol else "NULL",
|
||||
_format_addend(self.addend),
|
||||
]
|
||||
return f"{{{', '.join(parts)}}}"
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Stencil:
|
||||
"""
|
||||
A contiguous block of machine code or data to be copied-and-patched.
|
||||
|
||||
Analogous to a section or segment in an object file.
|
||||
"""
|
||||
|
||||
body: bytearray = dataclasses.field(default_factory=bytearray, init=False)
|
||||
holes: list[Hole] = dataclasses.field(default_factory=list, init=False)
|
||||
disassembly: list[str] = dataclasses.field(default_factory=list, init=False)
|
||||
|
||||
def pad(self, alignment: int) -> None:
|
||||
"""Pad the stencil to the given alignment."""
|
||||
offset = len(self.body)
|
||||
padding = -offset % alignment
|
||||
self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
|
||||
self.body.extend([0] * padding)
|
||||
|
||||
def emit_aarch64_trampoline(self, hole: Hole) -> None:
|
||||
"""Even with the large code model, AArch64 Linux insists on 28-bit jumps."""
|
||||
base = len(self.body)
|
||||
where = slice(hole.offset, hole.offset + 4)
|
||||
instruction = int.from_bytes(self.body[where], sys.byteorder)
|
||||
instruction &= 0xFC000000
|
||||
instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF
|
||||
self.body[where] = instruction.to_bytes(4, sys.byteorder)
|
||||
self.disassembly += [
|
||||
f"{base + 4 * 0: x}: d2800008 mov x8, #0x0",
|
||||
f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}",
|
||||
f"{base + 4 * 1:x}: f2a00008 movk x8, #0x0, lsl #16",
|
||||
f"{base + 4 * 1:016x}: R_AARCH64_MOVW_UABS_G1_NC {hole.symbol}",
|
||||
f"{base + 4 * 2:x}: f2c00008 movk x8, #0x0, lsl #32",
|
||||
f"{base + 4 * 2:016x}: R_AARCH64_MOVW_UABS_G2_NC {hole.symbol}",
|
||||
f"{base + 4 * 3:x}: f2e00008 movk x8, #0x0, lsl #48",
|
||||
f"{base + 4 * 3:016x}: R_AARCH64_MOVW_UABS_G3 {hole.symbol}",
|
||||
f"{base + 4 * 4:x}: d61f0100 br x8",
|
||||
]
|
||||
for code in [
|
||||
0xD2800008.to_bytes(4, sys.byteorder),
|
||||
0xF2A00008.to_bytes(4, sys.byteorder),
|
||||
0xF2C00008.to_bytes(4, sys.byteorder),
|
||||
0xF2E00008.to_bytes(4, sys.byteorder),
|
||||
0xD61F0100.to_bytes(4, sys.byteorder),
|
||||
]:
|
||||
self.body.extend(code)
|
||||
for i, kind in enumerate(
|
||||
[
|
||||
"R_AARCH64_MOVW_UABS_G0_NC",
|
||||
"R_AARCH64_MOVW_UABS_G1_NC",
|
||||
"R_AARCH64_MOVW_UABS_G2_NC",
|
||||
"R_AARCH64_MOVW_UABS_G3",
|
||||
]
|
||||
):
|
||||
self.holes.append(hole.replace(offset=base + 4 * i, kind=kind))
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class StencilGroup:
|
||||
"""
|
||||
Code and data corresponding to a given micro-opcode.
|
||||
|
||||
Analogous to an entire object file.
|
||||
"""
|
||||
|
||||
code: Stencil = dataclasses.field(default_factory=Stencil, init=False)
|
||||
data: Stencil = dataclasses.field(default_factory=Stencil, init=False)
|
||||
symbols: dict[int | str, tuple[HoleValue, int]] = dataclasses.field(
|
||||
default_factory=dict, init=False
|
||||
)
|
||||
_got: dict[str, int] = dataclasses.field(default_factory=dict, init=False)
|
||||
|
||||
def process_relocations(self, *, alignment: int = 1) -> None:
|
||||
"""Fix up all GOT and internal relocations for this stencil group."""
|
||||
self.code.pad(alignment)
|
||||
self.data.pad(8)
|
||||
for stencil in [self.code, self.data]:
|
||||
holes = []
|
||||
for hole in stencil.holes:
|
||||
if hole.value is HoleValue.GOT:
|
||||
assert hole.symbol is not None
|
||||
hole.value = HoleValue.DATA
|
||||
hole.addend += self._global_offset_table_lookup(hole.symbol)
|
||||
hole.symbol = None
|
||||
elif hole.symbol in self.symbols:
|
||||
hole.value, addend = self.symbols[hole.symbol]
|
||||
hole.addend += addend
|
||||
hole.symbol = None
|
||||
elif (
|
||||
hole.kind in {"R_AARCH64_CALL26", "R_AARCH64_JUMP26"}
|
||||
and hole.value is HoleValue.ZERO
|
||||
):
|
||||
self.code.emit_aarch64_trampoline(hole)
|
||||
continue
|
||||
holes.append(hole)
|
||||
stencil.holes[:] = holes
|
||||
self.code.pad(alignment)
|
||||
self._emit_global_offset_table()
|
||||
self.code.holes.sort(key=lambda hole: hole.offset)
|
||||
self.data.holes.sort(key=lambda hole: hole.offset)
|
||||
|
||||
def _global_offset_table_lookup(self, symbol: str) -> int:
|
||||
return len(self.data.body) + self._got.setdefault(symbol, 8 * len(self._got))
|
||||
|
||||
def _emit_global_offset_table(self) -> None:
|
||||
got = len(self.data.body)
|
||||
for s, offset in self._got.items():
|
||||
if s in self.symbols:
|
||||
value, addend = self.symbols[s]
|
||||
symbol = None
|
||||
else:
|
||||
value, symbol = symbol_to_value(s)
|
||||
addend = 0
|
||||
self.data.holes.append(
|
||||
Hole(got + offset, "R_X86_64_64", value, symbol, addend)
|
||||
)
|
||||
value_part = value.name if value is not HoleValue.ZERO else ""
|
||||
if value_part and not symbol and not addend:
|
||||
addend_part = ""
|
||||
else:
|
||||
addend_part = f"&{symbol}" if symbol else ""
|
||||
addend_part += _format_addend(addend, signed=symbol is not None)
|
||||
if value_part:
|
||||
value_part += "+"
|
||||
self.data.disassembly.append(
|
||||
f"{len(self.data.body):x}: {value_part}{addend_part}"
|
||||
)
|
||||
self.data.body.extend([0] * 8)
|
||||
|
||||
|
||||
def symbol_to_value(symbol: str) -> tuple[HoleValue, str | None]:
|
||||
"""
|
||||
Convert a symbol name to a HoleValue and a symbol name.
|
||||
|
||||
Some symbols (starting with "_JIT_") are special and are converted to their
|
||||
own HoleValues.
|
||||
"""
|
||||
if symbol.startswith("_JIT_"):
|
||||
try:
|
||||
return HoleValue[symbol.removeprefix("_JIT_")], None
|
||||
except KeyError:
|
||||
pass
|
||||
return HoleValue.ZERO, symbol
|
||||
|
||||
|
||||
def _format_addend(addend: int, signed: bool = False) -> str:
|
||||
addend %= 1 << 64
|
||||
if addend & (1 << 63):
|
||||
addend -= 1 << 64
|
||||
return f"{addend:{'+#x' if signed else '#x'}}"
|
|
@ -0,0 +1,394 @@
|
|||
"""Target-specific code generation, parsing, and processing."""
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
import typing
|
||||
|
||||
import _llvm
|
||||
import _schema
|
||||
import _stencils
|
||||
import _writer
|
||||
|
||||
if sys.version_info < (3, 11):
|
||||
raise RuntimeError("Building the JIT compiler requires Python 3.11 or newer!")
|
||||
|
||||
TOOLS_JIT_BUILD = pathlib.Path(__file__).resolve()
|
||||
TOOLS_JIT = TOOLS_JIT_BUILD.parent
|
||||
TOOLS = TOOLS_JIT.parent
|
||||
CPYTHON = TOOLS.parent
|
||||
PYTHON_EXECUTOR_CASES_C_H = CPYTHON / "Python" / "executor_cases.c.h"
|
||||
TOOLS_JIT_TEMPLATE_C = TOOLS_JIT / "template.c"
|
||||
|
||||
|
||||
_S = typing.TypeVar("_S", _schema.COFFSection, _schema.ELFSection, _schema.MachOSection)
|
||||
_R = typing.TypeVar(
|
||||
"_R", _schema.COFFRelocation, _schema.ELFRelocation, _schema.MachORelocation
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class _Target(typing.Generic[_S, _R]):
|
||||
triple: str
|
||||
_: dataclasses.KW_ONLY
|
||||
alignment: int = 1
|
||||
prefix: str = ""
|
||||
debug: bool = False
|
||||
force: bool = False
|
||||
verbose: bool = False
|
||||
|
||||
def _compute_digest(self, out: pathlib.Path) -> str:
|
||||
hasher = hashlib.sha256()
|
||||
hasher.update(self.triple.encode())
|
||||
hasher.update(self.alignment.to_bytes())
|
||||
hasher.update(self.prefix.encode())
|
||||
# These dependencies are also reflected in _JITSources in regen.targets:
|
||||
hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes())
|
||||
hasher.update((out / "pyconfig.h").read_bytes())
|
||||
for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)):
|
||||
for filename in filenames:
|
||||
hasher.update(pathlib.Path(dirpath, filename).read_bytes())
|
||||
return hasher.hexdigest()
|
||||
|
||||
async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup:
|
||||
group = _stencils.StencilGroup()
|
||||
args = ["--disassemble", "--reloc", f"{path}"]
|
||||
output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose)
|
||||
if output is not None:
|
||||
group.code.disassembly.extend(
|
||||
line.expandtabs().strip()
|
||||
for line in output.splitlines()
|
||||
if not line.isspace()
|
||||
)
|
||||
args = [
|
||||
"--elf-output-style=JSON",
|
||||
"--expand-relocs",
|
||||
# "--pretty-print",
|
||||
"--section-data",
|
||||
"--section-relocations",
|
||||
"--section-symbols",
|
||||
"--sections",
|
||||
f"{path}",
|
||||
]
|
||||
output = await _llvm.run("llvm-readobj", args, echo=self.verbose)
|
||||
# --elf-output-style=JSON is only *slightly* broken on Mach-O...
|
||||
output = output.replace("PrivateExtern\n", "\n")
|
||||
output = output.replace("Extern\n", "\n")
|
||||
# ...and also COFF:
|
||||
output = output[output.index("[", 1, None) :]
|
||||
output = output[: output.rindex("]", None, -1) + 1]
|
||||
sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output)
|
||||
for wrapped_section in sections:
|
||||
self._handle_section(wrapped_section["Section"], group)
|
||||
assert group.symbols["_JIT_ENTRY"] == (_stencils.HoleValue.CODE, 0)
|
||||
if group.data.body:
|
||||
line = f"0: {str(bytes(group.data.body)).removeprefix('b')}"
|
||||
group.data.disassembly.append(line)
|
||||
group.process_relocations()
|
||||
return group
|
||||
|
||||
def _handle_section(self, section: _S, group: _stencils.StencilGroup) -> None:
|
||||
raise NotImplementedError(type(self))
|
||||
|
||||
def _handle_relocation(
|
||||
self, base: int, relocation: _R, raw: bytes
|
||||
) -> _stencils.Hole:
|
||||
raise NotImplementedError(type(self))
|
||||
|
||||
async def _compile(
|
||||
self, opname: str, c: pathlib.Path, tempdir: pathlib.Path
|
||||
) -> _stencils.StencilGroup:
|
||||
o = tempdir / f"{opname}.o"
|
||||
args = [
|
||||
f"--target={self.triple}",
|
||||
"-DPy_BUILD_CORE",
|
||||
"-D_DEBUG" if self.debug else "-DNDEBUG",
|
||||
f"-D_JIT_OPCODE={opname}",
|
||||
"-D_PyJIT_ACTIVE",
|
||||
"-D_Py_JIT",
|
||||
"-I.",
|
||||
f"-I{CPYTHON / 'Include'}",
|
||||
f"-I{CPYTHON / 'Include' / 'internal'}",
|
||||
f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}",
|
||||
f"-I{CPYTHON / 'Python'}",
|
||||
"-O3",
|
||||
"-c",
|
||||
"-fno-asynchronous-unwind-tables",
|
||||
# SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds:
|
||||
"-fno-jump-tables",
|
||||
# Position-independent code adds indirection to every load and jump:
|
||||
"-fno-pic",
|
||||
# Don't make calls to weird stack-smashing canaries:
|
||||
"-fno-stack-protector",
|
||||
# We have three options for code model:
|
||||
# - "small": the default, assumes that code and data reside in the
|
||||
# lowest 2GB of memory (128MB on aarch64)
|
||||
# - "medium": assumes that code resides in the lowest 2GB of memory,
|
||||
# and makes no assumptions about data (not available on aarch64)
|
||||
# - "large": makes no assumptions about either code or data
|
||||
"-mcmodel=large",
|
||||
"-o",
|
||||
f"{o}",
|
||||
"-std=c11",
|
||||
f"{c}",
|
||||
]
|
||||
await _llvm.run("clang", args, echo=self.verbose)
|
||||
return await self._parse(o)
|
||||
|
||||
async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
|
||||
generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
|
||||
opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases))
|
||||
tasks = []
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
work = pathlib.Path(tempdir).resolve()
|
||||
async with asyncio.TaskGroup() as group:
|
||||
for opname in opnames:
|
||||
coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work)
|
||||
tasks.append(group.create_task(coro, name=opname))
|
||||
return {task.get_name(): task.result() for task in tasks}
|
||||
|
||||
def build(self, out: pathlib.Path, *, comment: str = "") -> None:
|
||||
"""Build jit_stencils.h in the given directory."""
|
||||
digest = f"// {self._compute_digest(out)}\n"
|
||||
jit_stencils = out / "jit_stencils.h"
|
||||
if (
|
||||
not self.force
|
||||
and jit_stencils.exists()
|
||||
and jit_stencils.read_text().startswith(digest)
|
||||
):
|
||||
return
|
||||
stencil_groups = asyncio.run(self._build_stencils())
|
||||
with jit_stencils.open("w") as file:
|
||||
file.write(digest)
|
||||
if comment:
|
||||
file.write(f"// {comment}\n")
|
||||
file.write("")
|
||||
for line in _writer.dump(stencil_groups):
|
||||
file.write(f"{line}\n")
|
||||
|
||||
|
||||
class _COFF(
|
||||
_Target[_schema.COFFSection, _schema.COFFRelocation]
|
||||
): # pylint: disable = too-few-public-methods
|
||||
def _handle_section(
|
||||
self, section: _schema.COFFSection, group: _stencils.StencilGroup
|
||||
) -> None:
|
||||
flags = {flag["Name"] for flag in section["Characteristics"]["Flags"]}
|
||||
if "SectionData" in section:
|
||||
section_data_bytes = section["SectionData"]["Bytes"]
|
||||
else:
|
||||
# Zeroed BSS data, seen with printf debugging calls:
|
||||
section_data_bytes = [0] * section["RawDataSize"]
|
||||
if "IMAGE_SCN_MEM_EXECUTE" in flags:
|
||||
value = _stencils.HoleValue.CODE
|
||||
stencil = group.code
|
||||
elif "IMAGE_SCN_MEM_READ" in flags:
|
||||
value = _stencils.HoleValue.DATA
|
||||
stencil = group.data
|
||||
else:
|
||||
return
|
||||
base = len(stencil.body)
|
||||
group.symbols[section["Number"]] = value, base
|
||||
stencil.body.extend(section_data_bytes)
|
||||
for wrapped_symbol in section["Symbols"]:
|
||||
symbol = wrapped_symbol["Symbol"]
|
||||
offset = base + symbol["Value"]
|
||||
name = symbol["Name"]
|
||||
name = name.removeprefix(self.prefix)
|
||||
group.symbols[name] = value, offset
|
||||
for wrapped_relocation in section["Relocations"]:
|
||||
relocation = wrapped_relocation["Relocation"]
|
||||
hole = self._handle_relocation(base, relocation, stencil.body)
|
||||
stencil.holes.append(hole)
|
||||
|
||||
def _handle_relocation(
|
||||
self, base: int, relocation: _schema.COFFRelocation, raw: bytes
|
||||
) -> _stencils.Hole:
|
||||
match relocation:
|
||||
case {
|
||||
"Offset": offset,
|
||||
"Symbol": s,
|
||||
"Type": {"Value": "IMAGE_REL_AMD64_ADDR64" as kind},
|
||||
}:
|
||||
offset += base
|
||||
s = s.removeprefix(self.prefix)
|
||||
value, symbol = _stencils.symbol_to_value(s)
|
||||
addend = int.from_bytes(raw[offset : offset + 8], "little")
|
||||
case {
|
||||
"Offset": offset,
|
||||
"Symbol": s,
|
||||
"Type": {"Value": "IMAGE_REL_I386_DIR32" as kind},
|
||||
}:
|
||||
offset += base
|
||||
s = s.removeprefix(self.prefix)
|
||||
value, symbol = _stencils.symbol_to_value(s)
|
||||
addend = int.from_bytes(raw[offset : offset + 4], "little")
|
||||
case _:
|
||||
raise NotImplementedError(relocation)
|
||||
return _stencils.Hole(offset, kind, value, symbol, addend)
|
||||
|
||||
|
||||
class _ELF(
|
||||
_Target[_schema.ELFSection, _schema.ELFRelocation]
|
||||
): # pylint: disable = too-few-public-methods
|
||||
def _handle_section(
|
||||
self, section: _schema.ELFSection, group: _stencils.StencilGroup
|
||||
) -> None:
|
||||
section_type = section["Type"]["Value"]
|
||||
flags = {flag["Name"] for flag in section["Flags"]["Flags"]}
|
||||
if section_type == "SHT_RELA":
|
||||
assert "SHF_INFO_LINK" in flags, flags
|
||||
assert not section["Symbols"]
|
||||
value, base = group.symbols[section["Info"]]
|
||||
if value is _stencils.HoleValue.CODE:
|
||||
stencil = group.code
|
||||
else:
|
||||
assert value is _stencils.HoleValue.DATA
|
||||
stencil = group.data
|
||||
for wrapped_relocation in section["Relocations"]:
|
||||
relocation = wrapped_relocation["Relocation"]
|
||||
hole = self._handle_relocation(base, relocation, stencil.body)
|
||||
stencil.holes.append(hole)
|
||||
elif section_type == "SHT_PROGBITS":
|
||||
if "SHF_ALLOC" not in flags:
|
||||
return
|
||||
if "SHF_EXECINSTR" in flags:
|
||||
value = _stencils.HoleValue.CODE
|
||||
stencil = group.code
|
||||
else:
|
||||
value = _stencils.HoleValue.DATA
|
||||
stencil = group.data
|
||||
group.symbols[section["Index"]] = value, len(stencil.body)
|
||||
for wrapped_symbol in section["Symbols"]:
|
||||
symbol = wrapped_symbol["Symbol"]
|
||||
offset = len(stencil.body) + symbol["Value"]
|
||||
name = symbol["Name"]["Value"]
|
||||
name = name.removeprefix(self.prefix)
|
||||
group.symbols[name] = value, offset
|
||||
stencil.body.extend(section["SectionData"]["Bytes"])
|
||||
assert not section["Relocations"]
|
||||
else:
|
||||
assert section_type in {
|
||||
"SHT_GROUP",
|
||||
"SHT_LLVM_ADDRSIG",
|
||||
"SHT_NULL",
|
||||
"SHT_STRTAB",
|
||||
"SHT_SYMTAB",
|
||||
}, section_type
|
||||
|
||||
def _handle_relocation(
|
||||
self, base: int, relocation: _schema.ELFRelocation, raw: bytes
|
||||
) -> _stencils.Hole:
|
||||
match relocation:
|
||||
case {
|
||||
"Addend": addend,
|
||||
"Offset": offset,
|
||||
"Symbol": {"Value": s},
|
||||
"Type": {"Value": kind},
|
||||
}:
|
||||
offset += base
|
||||
s = s.removeprefix(self.prefix)
|
||||
value, symbol = _stencils.symbol_to_value(s)
|
||||
case _:
|
||||
raise NotImplementedError(relocation)
|
||||
return _stencils.Hole(offset, kind, value, symbol, addend)
|
||||
|
||||
|
||||
class _MachO(
|
||||
_Target[_schema.MachOSection, _schema.MachORelocation]
|
||||
): # pylint: disable = too-few-public-methods
|
||||
def _handle_section(
|
||||
self, section: _schema.MachOSection, group: _stencils.StencilGroup
|
||||
) -> None:
|
||||
assert section["Address"] >= len(group.code.body)
|
||||
assert "SectionData" in section
|
||||
flags = {flag["Name"] for flag in section["Attributes"]["Flags"]}
|
||||
name = section["Name"]["Value"]
|
||||
name = name.removeprefix(self.prefix)
|
||||
if "SomeInstructions" in flags:
|
||||
value = _stencils.HoleValue.CODE
|
||||
stencil = group.code
|
||||
start_address = 0
|
||||
group.symbols[name] = value, section["Address"] - start_address
|
||||
else:
|
||||
value = _stencils.HoleValue.DATA
|
||||
stencil = group.data
|
||||
start_address = len(group.code.body)
|
||||
group.symbols[name] = value, len(group.code.body)
|
||||
base = section["Address"] - start_address
|
||||
group.symbols[section["Index"]] = value, base
|
||||
stencil.body.extend(
|
||||
[0] * (section["Address"] - len(group.code.body) - len(group.data.body))
|
||||
)
|
||||
stencil.body.extend(section["SectionData"]["Bytes"])
|
||||
assert "Symbols" in section
|
||||
for wrapped_symbol in section["Symbols"]:
|
||||
symbol = wrapped_symbol["Symbol"]
|
||||
offset = symbol["Value"] - start_address
|
||||
name = symbol["Name"]["Value"]
|
||||
name = name.removeprefix(self.prefix)
|
||||
group.symbols[name] = value, offset
|
||||
assert "Relocations" in section
|
||||
for wrapped_relocation in section["Relocations"]:
|
||||
relocation = wrapped_relocation["Relocation"]
|
||||
hole = self._handle_relocation(base, relocation, stencil.body)
|
||||
stencil.holes.append(hole)
|
||||
|
||||
def _handle_relocation(
|
||||
self, base: int, relocation: _schema.MachORelocation, raw: bytes
|
||||
) -> _stencils.Hole:
|
||||
symbol: str | None
|
||||
match relocation:
|
||||
case {
|
||||
"Offset": offset,
|
||||
"Symbol": {"Value": s},
|
||||
"Type": {
|
||||
"Value": "ARM64_RELOC_GOT_LOAD_PAGE21"
|
||||
| "ARM64_RELOC_GOT_LOAD_PAGEOFF12" as kind
|
||||
},
|
||||
}:
|
||||
offset += base
|
||||
s = s.removeprefix(self.prefix)
|
||||
value, symbol = _stencils.HoleValue.GOT, s
|
||||
addend = 0
|
||||
case {
|
||||
"Offset": offset,
|
||||
"Section": {"Value": s},
|
||||
"Type": {"Value": kind},
|
||||
} | {
|
||||
"Offset": offset,
|
||||
"Symbol": {"Value": s},
|
||||
"Type": {"Value": kind},
|
||||
}:
|
||||
offset += base
|
||||
s = s.removeprefix(self.prefix)
|
||||
value, symbol = _stencils.symbol_to_value(s)
|
||||
addend = 0
|
||||
case _:
|
||||
raise NotImplementedError(relocation)
|
||||
# Turn Clang's weird __bzero calls into normal bzero calls:
|
||||
if symbol == "__bzero":
|
||||
symbol = "bzero"
|
||||
return _stencils.Hole(offset, kind, value, symbol, addend)
|
||||
|
||||
|
||||
def get_target(host: str) -> _COFF | _ELF | _MachO:
|
||||
"""Build a _Target for the given host "triple" and options."""
|
||||
if re.fullmatch(r"aarch64-apple-darwin.*", host):
|
||||
return _MachO(host, alignment=8, prefix="_")
|
||||
if re.fullmatch(r"aarch64-.*-linux-gnu", host):
|
||||
return _ELF(host, alignment=8)
|
||||
if re.fullmatch(r"i686-pc-windows-msvc", host):
|
||||
return _COFF(host, prefix="_")
|
||||
if re.fullmatch(r"x86_64-apple-darwin.*", host):
|
||||
return _MachO(host, prefix="_")
|
||||
if re.fullmatch(r"x86_64-pc-windows-msvc", host):
|
||||
return _COFF(host)
|
||||
if re.fullmatch(r"x86_64-.*-linux-gnu", host):
|
||||
return _ELF(host)
|
||||
raise ValueError(host)
|
|
@ -0,0 +1,95 @@
|
|||
"""Utilities for writing StencilGroups out to a C header file."""
|
||||
import typing
|
||||
|
||||
import _schema
|
||||
import _stencils
|
||||
|
||||
|
||||
def _dump_header() -> typing.Iterator[str]:
|
||||
yield "typedef enum {"
|
||||
for kind in typing.get_args(_schema.HoleKind):
|
||||
yield f" HoleKind_{kind},"
|
||||
yield "} HoleKind;"
|
||||
yield ""
|
||||
yield "typedef enum {"
|
||||
for value in _stencils.HoleValue:
|
||||
yield f" HoleValue_{value.name},"
|
||||
yield "} HoleValue;"
|
||||
yield ""
|
||||
yield "typedef struct {"
|
||||
yield " const uint64_t offset;"
|
||||
yield " const HoleKind kind;"
|
||||
yield " const HoleValue value;"
|
||||
yield " const void *symbol;"
|
||||
yield " const uint64_t addend;"
|
||||
yield "} Hole;"
|
||||
yield ""
|
||||
yield "typedef struct {"
|
||||
yield " const size_t body_size;"
|
||||
yield " const unsigned char * const body;"
|
||||
yield " const size_t holes_size;"
|
||||
yield " const Hole * const holes;"
|
||||
yield "} Stencil;"
|
||||
yield ""
|
||||
yield "typedef struct {"
|
||||
yield " const Stencil code;"
|
||||
yield " const Stencil data;"
|
||||
yield "} StencilGroup;"
|
||||
yield ""
|
||||
|
||||
|
||||
def _dump_footer(opnames: typing.Iterable[str]) -> typing.Iterator[str]:
|
||||
yield "#define INIT_STENCIL(STENCIL) { \\"
|
||||
yield " .body_size = Py_ARRAY_LENGTH(STENCIL##_body) - 1, \\"
|
||||
yield " .body = STENCIL##_body, \\"
|
||||
yield " .holes_size = Py_ARRAY_LENGTH(STENCIL##_holes) - 1, \\"
|
||||
yield " .holes = STENCIL##_holes, \\"
|
||||
yield "}"
|
||||
yield ""
|
||||
yield "#define INIT_STENCIL_GROUP(OP) { \\"
|
||||
yield " .code = INIT_STENCIL(OP##_code), \\"
|
||||
yield " .data = INIT_STENCIL(OP##_data), \\"
|
||||
yield "}"
|
||||
yield ""
|
||||
yield "static const StencilGroup stencil_groups[512] = {"
|
||||
for opname in opnames:
|
||||
yield f" [{opname}] = INIT_STENCIL_GROUP({opname}),"
|
||||
yield "};"
|
||||
yield ""
|
||||
yield "#define GET_PATCHES() { \\"
|
||||
for value in _stencils.HoleValue:
|
||||
yield f" [HoleValue_{value.name}] = (uint64_t)0xBADBADBADBADBADB, \\"
|
||||
yield "}"
|
||||
|
||||
|
||||
def _dump_stencil(opname: str, group: _stencils.StencilGroup) -> typing.Iterator[str]:
|
||||
yield f"// {opname}"
|
||||
for part, stencil in [("code", group.code), ("data", group.data)]:
|
||||
for line in stencil.disassembly:
|
||||
yield f"// {line}"
|
||||
if stencil.body:
|
||||
size = len(stencil.body) + 1
|
||||
yield f"static const unsigned char {opname}_{part}_body[{size}] = {{"
|
||||
for i in range(0, len(stencil.body), 8):
|
||||
row = " ".join(f"{byte:#04x}," for byte in stencil.body[i : i + 8])
|
||||
yield f" {row}"
|
||||
yield "};"
|
||||
else:
|
||||
yield f"static const unsigned char {opname}_{part}_body[1];"
|
||||
if stencil.holes:
|
||||
size = len(stencil.holes) + 1
|
||||
yield f"static const Hole {opname}_{part}_holes[{size}] = {{"
|
||||
for hole in stencil.holes:
|
||||
yield f" {hole.as_c()},"
|
||||
yield "};"
|
||||
else:
|
||||
yield f"static const Hole {opname}_{part}_holes[1];"
|
||||
yield ""
|
||||
|
||||
|
||||
def dump(groups: dict[str, _stencils.StencilGroup]) -> typing.Iterator[str]:
|
||||
"""Yield a JIT compiler line-by-line as a C header file."""
|
||||
yield from _dump_header()
|
||||
for opname, group in groups.items():
|
||||
yield from _dump_stencil(opname, group)
|
||||
yield from _dump_footer(groups)
|
|
@ -0,0 +1,28 @@
|
|||
"""Build an experimental just-in-time compiler for CPython."""
|
||||
import argparse
|
||||
import pathlib
|
||||
import shlex
|
||||
import sys
|
||||
|
||||
import _targets
|
||||
|
||||
if __name__ == "__main__":
|
||||
comment = f"$ {shlex.join([sys.executable] + sys.argv)}"
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"target", type=_targets.get_target, help="a PEP 11 target triple to compile for"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d", "--debug", action="store_true", help="compile for a debug build of Python"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f", "--force", action="store_true", help="force the entire JIT to be rebuilt"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="echo commands as they are run"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
args.target.debug = args.debug
|
||||
args.target.force = args.force
|
||||
args.target.verbose = args.verbose
|
||||
args.target.build(pathlib.Path.cwd(), comment=comment)
|
|
@ -0,0 +1,5 @@
|
|||
[mypy]
|
||||
files = Tools/jit
|
||||
pretty = True
|
||||
python_version = 3.11
|
||||
strict = True
|
|
@ -0,0 +1,98 @@
|
|||
#include "Python.h"
|
||||
|
||||
#include "pycore_call.h"
|
||||
#include "pycore_ceval.h"
|
||||
#include "pycore_dict.h"
|
||||
#include "pycore_emscripten_signal.h"
|
||||
#include "pycore_intrinsics.h"
|
||||
#include "pycore_jit.h"
|
||||
#include "pycore_long.h"
|
||||
#include "pycore_opcode_metadata.h"
|
||||
#include "pycore_opcode_utils.h"
|
||||
#include "pycore_range.h"
|
||||
#include "pycore_setobject.h"
|
||||
#include "pycore_sliceobject.h"
|
||||
|
||||
#include "ceval_macros.h"
|
||||
|
||||
#undef CURRENT_OPARG
|
||||
#define CURRENT_OPARG() (_oparg)
|
||||
|
||||
#undef CURRENT_OPERAND
|
||||
#define CURRENT_OPERAND() (_operand)
|
||||
|
||||
#undef DEOPT_IF
|
||||
#define DEOPT_IF(COND, INSTNAME) \
|
||||
do { \
|
||||
if ((COND)) { \
|
||||
goto deoptimize; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#undef ENABLE_SPECIALIZATION
|
||||
#define ENABLE_SPECIALIZATION (0)
|
||||
|
||||
#undef GOTO_ERROR
|
||||
#define GOTO_ERROR(LABEL) \
|
||||
do { \
|
||||
goto LABEL ## _tier_two; \
|
||||
} while (0)
|
||||
|
||||
#undef LOAD_IP
|
||||
#define LOAD_IP(UNUSED) \
|
||||
do { \
|
||||
} while (0)
|
||||
|
||||
#define PATCH_VALUE(TYPE, NAME, ALIAS) \
|
||||
extern void ALIAS; \
|
||||
TYPE NAME = (TYPE)(uint64_t)&ALIAS;
|
||||
|
||||
#define PATCH_JUMP(ALIAS) \
|
||||
extern void ALIAS; \
|
||||
__attribute__((musttail)) \
|
||||
return ((jit_func)&ALIAS)(frame, stack_pointer, tstate);
|
||||
|
||||
_Py_CODEUNIT *
|
||||
_JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate)
|
||||
{
|
||||
// Locals that the instruction implementations expect to exist:
|
||||
PATCH_VALUE(_PyExecutorObject *, current_executor, _JIT_EXECUTOR)
|
||||
int oparg;
|
||||
int opcode = _JIT_OPCODE;
|
||||
_PyUOpInstruction *next_uop;
|
||||
// Other stuff we need handy:
|
||||
PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG)
|
||||
PATCH_VALUE(uint64_t, _operand, _JIT_OPERAND)
|
||||
PATCH_VALUE(uint32_t, _target, _JIT_TARGET)
|
||||
// The actual instruction definitions (only one will be used):
|
||||
if (opcode == _JUMP_TO_TOP) {
|
||||
CHECK_EVAL_BREAKER();
|
||||
PATCH_JUMP(_JIT_TOP);
|
||||
}
|
||||
switch (opcode) {
|
||||
#include "executor_cases.c.h"
|
||||
default:
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
PATCH_JUMP(_JIT_CONTINUE);
|
||||
// Labels that the instruction implementations expect to exist:
|
||||
unbound_local_error_tier_two:
|
||||
_PyEval_FormatExcCheckArg(
|
||||
tstate, PyExc_UnboundLocalError, UNBOUNDLOCAL_ERROR_MSG,
|
||||
PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg));
|
||||
goto error_tier_two;
|
||||
pop_4_error_tier_two:
|
||||
STACK_SHRINK(1);
|
||||
pop_3_error_tier_two:
|
||||
STACK_SHRINK(1);
|
||||
pop_2_error_tier_two:
|
||||
STACK_SHRINK(1);
|
||||
pop_1_error_tier_two:
|
||||
STACK_SHRINK(1);
|
||||
error_tier_two:
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
return NULL;
|
||||
deoptimize:
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer);
|
||||
return _PyCode_CODE(_PyFrame_GetCode(frame)) + _target;
|
||||
}
|
|
@ -920,6 +920,7 @@ LLVM_AR
|
|||
PROFILE_TASK
|
||||
DEF_MAKE_RULE
|
||||
DEF_MAKE_ALL_RULE
|
||||
REGEN_JIT_COMMAND
|
||||
ABIFLAGS
|
||||
LN
|
||||
MKDIR_P
|
||||
|
@ -1074,6 +1075,7 @@ with_pydebug
|
|||
with_trace_refs
|
||||
enable_pystats
|
||||
with_assertions
|
||||
enable_experimental_jit
|
||||
enable_optimizations
|
||||
with_lto
|
||||
enable_bolt
|
||||
|
@ -1801,6 +1803,9 @@ Optional Features:
|
|||
--disable-gil enable experimental support for running without the
|
||||
GIL (default is no)
|
||||
--enable-pystats enable internal statistics gathering (default is no)
|
||||
--enable-experimental-jit
|
||||
build the experimental just-in-time compiler
|
||||
(default is no)
|
||||
--enable-optimizations enable expensive, stable optimizations (PGO, etc.)
|
||||
(default is no)
|
||||
--enable-bolt enable usage of the llvm-bolt post-link optimizer
|
||||
|
@ -7997,6 +8002,32 @@ else
|
|||
printf "%s\n" "no" >&6; }
|
||||
fi
|
||||
|
||||
# Check for --enable-experimental-jit:
|
||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --enable-experimental-jit" >&5
|
||||
printf %s "checking for --enable-experimental-jit... " >&6; }
|
||||
# Check whether --enable-experimental-jit was given.
|
||||
if test ${enable_experimental_jit+y}
|
||||
then :
|
||||
enableval=$enable_experimental_jit;
|
||||
else $as_nop
|
||||
enable_experimental_jit=no
|
||||
fi
|
||||
|
||||
if test "x$enable_experimental_jit" = xno
|
||||
then :
|
||||
|
||||
else $as_nop
|
||||
as_fn_append CFLAGS_NODIST " -D_Py_JIT"
|
||||
REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py $host"
|
||||
if test "x$Py_DEBUG" = xtrue
|
||||
then :
|
||||
as_fn_append REGEN_JIT_COMMAND " --debug"
|
||||
fi
|
||||
fi
|
||||
|
||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_experimental_jit" >&5
|
||||
printf "%s\n" "$enable_experimental_jit" >&6; }
|
||||
|
||||
# Enable optimization flags
|
||||
|
||||
|
||||
|
|
20
configure.ac
20
configure.ac
|
@ -1579,6 +1579,26 @@ else
|
|||
AC_MSG_RESULT([no])
|
||||
fi
|
||||
|
||||
# Check for --enable-experimental-jit:
|
||||
AC_MSG_CHECKING([for --enable-experimental-jit])
|
||||
AC_ARG_ENABLE([experimental-jit],
|
||||
[AS_HELP_STRING([--enable-experimental-jit],
|
||||
[build the experimental just-in-time compiler (default is no)])],
|
||||
[],
|
||||
[enable_experimental_jit=no])
|
||||
AS_VAR_IF([enable_experimental_jit],
|
||||
[no],
|
||||
[],
|
||||
[AS_VAR_APPEND([CFLAGS_NODIST], [" -D_Py_JIT"])
|
||||
AS_VAR_SET([REGEN_JIT_COMMAND],
|
||||
["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py $host"])
|
||||
AS_VAR_IF([Py_DEBUG],
|
||||
[true],
|
||||
[AS_VAR_APPEND([REGEN_JIT_COMMAND], [" --debug"])],
|
||||
[])])
|
||||
AC_SUBST([REGEN_JIT_COMMAND])
|
||||
AC_MSG_RESULT([$enable_experimental_jit])
|
||||
|
||||
# Enable optimization flags
|
||||
AC_SUBST([DEF_MAKE_ALL_RULE])
|
||||
AC_SUBST([DEF_MAKE_RULE])
|
||||
|
|
Loading…
Reference in New Issue