cpython/Modules/_testexternalinspection.c

630 lines
17 KiB
C
Raw Normal View History

#define _GNU_SOURCE
#ifdef __linux__
# include <elf.h>
# include <sys/uio.h>
# if INTPTR_MAX == INT64_MAX
# define Elf_Ehdr Elf64_Ehdr
# define Elf_Shdr Elf64_Shdr
# define Elf_Phdr Elf64_Phdr
# else
# define Elf_Ehdr Elf32_Ehdr
# define Elf_Shdr Elf32_Shdr
# define Elf_Phdr Elf32_Phdr
# endif
# include <sys/mman.h>
#endif
#ifdef __APPLE__
# include <libproc.h>
# include <mach-o/fat.h>
# include <mach-o/loader.h>
# include <mach-o/nlist.h>
# include <mach/mach.h>
# include <mach/mach_vm.h>
# include <mach/machine.h>
# include <sys/mman.h>
# include <sys/proc.h>
# include <sys/sysctl.h>
#endif
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif
#include "Python.h"
#include <internal/pycore_runtime.h>
#ifndef HAVE_PROCESS_VM_READV
# define HAVE_PROCESS_VM_READV 0
#endif
#ifdef __APPLE__
static void*
analyze_macho64(mach_port_t proc_ref, void* base, void* map)
{
struct mach_header_64* hdr = (struct mach_header_64*)map;
int ncmds = hdr->ncmds;
int cmd_cnt = 0;
struct segment_command_64* cmd = map + sizeof(struct mach_header_64);
mach_vm_size_t size = 0;
mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t);
mach_vm_address_t address = (mach_vm_address_t)base;
vm_region_basic_info_data_64_t region_info;
mach_port_t object_name;
for (int i = 0; cmd_cnt < 2 && i < ncmds; i++) {
if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__DATA") == 0) {
while (cmd->filesize != size) {
address += size;
if (mach_vm_region(
proc_ref,
&address,
&size,
VM_REGION_BASIC_INFO_64,
(vm_region_info_t)&region_info, // cppcheck-suppress [uninitvar]
&count,
&object_name)
!= KERN_SUCCESS)
{
PyErr_SetString(PyExc_RuntimeError, "Cannot get any more VM maps.\n");
return NULL;
}
}
base = (void*)address - cmd->vmaddr;
int nsects = cmd->nsects;
struct section_64* sec =
(struct section_64*)((void*)cmd + sizeof(struct segment_command_64));
for (int j = 0; j < nsects; j++) {
if (strcmp(sec[j].sectname, "PyRuntime") == 0) {
return base + sec[j].addr;
}
}
cmd_cnt++;
}
cmd = (struct segment_command_64*)((void*)cmd + cmd->cmdsize);
}
return NULL;
}
static void*
analyze_macho(char* path, void* base, mach_vm_size_t size, mach_port_t proc_ref)
{
int fd = open(path, O_RDONLY);
if (fd == -1) {
PyErr_Format(PyExc_RuntimeError, "Cannot open binary %s\n", path);
return NULL;
}
struct stat fs;
if (fstat(fd, &fs) == -1) {
PyErr_Format(PyExc_RuntimeError, "Cannot get size of binary %s\n", path);
close(fd);
return NULL;
}
void* map = mmap(0, fs.st_size, PROT_READ, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
PyErr_Format(PyExc_RuntimeError, "Cannot map binary %s\n", path);
close(fd);
return NULL;
}
void* result = NULL;
struct mach_header_64* hdr = (struct mach_header_64*)map;
switch (hdr->magic) {
case MH_MAGIC:
case MH_CIGAM:
case FAT_MAGIC:
case FAT_CIGAM:
PyErr_SetString(PyExc_RuntimeError, "32-bit Mach-O binaries are not supported");
break;
case MH_MAGIC_64:
case MH_CIGAM_64:
result = analyze_macho64(proc_ref, base, map);
break;
default:
PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic");
break;
}
munmap(map, fs.st_size);
if (close(fd) != 0) {
PyErr_SetFromErrno(PyExc_OSError);
}
return result;
}
static mach_port_t
pid_to_task(pid_t pid)
{
mach_port_t task;
kern_return_t result;
result = task_for_pid(mach_task_self(), pid, &task);
if (result != KERN_SUCCESS) {
PyErr_Format(PyExc_PermissionError, "Cannot get task for PID %d", pid);
return 0;
}
return task;
}
static void*
get_py_runtime_macos(pid_t pid)
{
mach_vm_address_t address = 0;
mach_vm_size_t size = 0;
mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t);
vm_region_basic_info_data_64_t region_info;
mach_port_t object_name;
mach_port_t proc_ref = pid_to_task(pid);
if (proc_ref == 0) {
PyErr_SetString(PyExc_PermissionError, "Cannot get task for PID");
return NULL;
}
int match_found = 0;
char map_filename[MAXPATHLEN + 1];
void* result_address = NULL;
while (mach_vm_region(
proc_ref,
&address,
&size,
VM_REGION_BASIC_INFO_64,
(vm_region_info_t)&region_info,
&count,
&object_name)
== KERN_SUCCESS)
{
int path_len = proc_regionfilename(pid, address, map_filename, MAXPATHLEN);
if (path_len == 0) {
address += size;
continue;
}
char* filename = strrchr(map_filename, '/');
if (filename != NULL) {
filename++; // Move past the '/'
} else {
filename = map_filename; // No path, use the whole string
}
// Check if the filename starts with "python" or "libpython"
if (!match_found && strncmp(filename, "python", 6) == 0) {
match_found = 1;
result_address = analyze_macho(map_filename, (void*)address, size, proc_ref);
}
if (strncmp(filename, "libpython", 9) == 0) {
match_found = 1;
result_address = analyze_macho(map_filename, (void*)address, size, proc_ref);
break;
}
address += size;
}
return result_address;
}
#endif
#ifdef __linux__
void*
find_python_map_start_address(pid_t pid, char* result_filename)
{
char maps_file_path[64];
sprintf(maps_file_path, "/proc/%d/maps", pid);
FILE* maps_file = fopen(maps_file_path, "r");
if (maps_file == NULL) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
int match_found = 0;
char line[256];
char map_filename[PATH_MAX];
void* result_address = 0;
while (fgets(line, sizeof(line), maps_file) != NULL) {
unsigned long start_address = 0;
sscanf(line, "%lx-%*x %*s %*s %*s %*s %s", &start_address, map_filename);
char* filename = strrchr(map_filename, '/');
if (filename != NULL) {
filename++; // Move past the '/'
} else {
filename = map_filename; // No path, use the whole string
}
// Check if the filename starts with "python" or "libpython"
if (!match_found && strncmp(filename, "python", 6) == 0) {
match_found = 1;
result_address = (void*)start_address;
strcpy(result_filename, map_filename);
}
if (strncmp(filename, "libpython", 9) == 0) {
match_found = 1;
result_address = (void*)start_address;
strcpy(result_filename, map_filename);
break;
}
}
fclose(maps_file);
if (!match_found) {
map_filename[0] = '\0';
}
return result_address;
}
void*
get_py_runtime_linux(pid_t pid)
{
char elf_file[256];
void* start_address = (void*)find_python_map_start_address(pid, elf_file);
if (start_address == 0) {
PyErr_SetString(PyExc_RuntimeError, "No memory map associated with python or libpython found");
return NULL;
}
void* result = NULL;
void* file_memory = NULL;
int fd = open(elf_file, O_RDONLY);
if (fd < 0) {
PyErr_SetFromErrno(PyExc_OSError);
goto exit;
}
struct stat file_stats;
if (fstat(fd, &file_stats) != 0) {
PyErr_SetFromErrno(PyExc_OSError);
goto exit;
}
file_memory = mmap(NULL, file_stats.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (file_memory == MAP_FAILED) {
PyErr_SetFromErrno(PyExc_OSError);
goto exit;
}
Elf_Ehdr* elf_header = (Elf_Ehdr*)file_memory;
Elf_Shdr* section_header_table = (Elf_Shdr*)(file_memory + elf_header->e_shoff);
Elf_Shdr* shstrtab_section = &section_header_table[elf_header->e_shstrndx];
char* shstrtab = (char*)(file_memory + shstrtab_section->sh_offset);
Elf_Shdr* py_runtime_section = NULL;
for (int i = 0; i < elf_header->e_shnum; i++) {
if (strcmp(".PyRuntime", shstrtab + section_header_table[i].sh_name) == 0) {
py_runtime_section = &section_header_table[i];
break;
}
}
Elf_Phdr* program_header_table = (Elf_Phdr*)(file_memory + elf_header->e_phoff);
// Find the first PT_LOAD segment
Elf_Phdr* first_load_segment = NULL;
for (int i = 0; i < elf_header->e_phnum; i++) {
if (program_header_table[i].p_type == PT_LOAD) {
first_load_segment = &program_header_table[i];
break;
}
}
if (py_runtime_section != NULL && first_load_segment != NULL) {
uintptr_t elf_load_addr = first_load_segment->p_vaddr
- (first_load_segment->p_vaddr % first_load_segment->p_align);
result = start_address + py_runtime_section->sh_addr - elf_load_addr;
}
exit:
if (close(fd) != 0) {
PyErr_SetFromErrno(PyExc_OSError);
}
if (file_memory != NULL) {
munmap(file_memory, file_stats.st_size);
}
return result;
}
#endif
ssize_t
read_memory(pid_t pid, void* remote_address, size_t len, void* dst)
{
ssize_t total_bytes_read = 0;
#if defined(__linux__) && HAVE_PROCESS_VM_READV
struct iovec local[1];
struct iovec remote[1];
ssize_t result = 0;
ssize_t read = 0;
do {
local[0].iov_base = dst + result;
local[0].iov_len = len - result;
remote[0].iov_base = (void*)(remote_address + result);
remote[0].iov_len = len - result;
read = process_vm_readv(pid, local, 1, remote, 1, 0);
if (read < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return -1;
}
result += read;
} while ((size_t)read != local[0].iov_len);
total_bytes_read = result;
#elif defined(__APPLE__)
ssize_t result = -1;
kern_return_t kr = mach_vm_read_overwrite(
pid_to_task(pid),
(mach_vm_address_t)remote_address,
len,
(mach_vm_address_t)dst,
(mach_vm_size_t*)&result);
if (kr != KERN_SUCCESS) {
switch (kr) {
case KERN_PROTECTION_FAILURE:
PyErr_SetString(PyExc_PermissionError, "Not enough permissions to read memory");
break;
case KERN_INVALID_ARGUMENT:
PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_read_overwrite");
break;
default:
PyErr_SetString(PyExc_RuntimeError, "Unknown error reading memory");
}
return -1;
}
total_bytes_read = len;
#else
return -1;
#endif
return total_bytes_read;
}
int
read_string(pid_t pid, _Py_DebugOffsets* debug_offsets, void* address, char* buffer, Py_ssize_t size)
{
Py_ssize_t len;
ssize_t bytes_read =
read_memory(pid, address + debug_offsets->unicode_object.length, sizeof(Py_ssize_t), &len);
if (bytes_read == -1) {
return -1;
}
if (len >= size) {
PyErr_SetString(PyExc_RuntimeError, "Buffer too small");
return -1;
}
size_t offset = debug_offsets->unicode_object.asciiobject_size;
bytes_read = read_memory(pid, address + offset, len, buffer);
if (bytes_read == -1) {
return -1;
}
buffer[len] = '\0';
return 0;
}
void*
get_py_runtime(pid_t pid)
{
#if defined(__linux__)
return get_py_runtime_linux(pid);
#elif defined(__APPLE__)
return get_py_runtime_macos(pid);
#else
return NULL;
#endif
}
static int
parse_code_object(
int pid,
PyObject* result,
struct _Py_DebugOffsets* offsets,
void* address,
void** previous_frame)
{
void* address_of_function_name;
read_memory(
pid,
(void*)(address + offsets->code_object.name),
sizeof(void*),
&address_of_function_name);
if (address_of_function_name == NULL) {
PyErr_SetString(PyExc_RuntimeError, "No function name found");
return -1;
}
char function_name[256];
if (read_string(pid, offsets, address_of_function_name, function_name, sizeof(function_name)) != 0) {
return -1;
}
PyObject* py_function_name = PyUnicode_FromString(function_name);
if (py_function_name == NULL) {
return -1;
}
if (PyList_Append(result, py_function_name) == -1) {
Py_DECREF(py_function_name);
return -1;
}
Py_DECREF(py_function_name);
return 0;
}
static int
parse_frame_object(
int pid,
PyObject* result,
struct _Py_DebugOffsets* offsets,
void* address,
void** previous_frame)
{
ssize_t bytes_read = read_memory(
pid,
(void*)(address + offsets->interpreter_frame.previous),
sizeof(void*),
previous_frame);
if (bytes_read == -1) {
return -1;
}
char owner;
bytes_read =
read_memory(pid, (void*)(address + offsets->interpreter_frame.owner), sizeof(char), &owner);
if (bytes_read < 0) {
return -1;
}
if (owner == FRAME_OWNED_BY_CSTACK) {
return 0;
}
void* address_of_code_object;
bytes_read = read_memory(
pid,
(void*)(address + offsets->interpreter_frame.executable),
sizeof(void*),
&address_of_code_object);
if (bytes_read == -1) {
return -1;
}
if (address_of_code_object == NULL) {
return 0;
}
return parse_code_object(pid, result, offsets, address_of_code_object, previous_frame);
}
static PyObject*
get_stack_trace(PyObject* self, PyObject* args)
{
#if (!defined(__linux__) && !defined(__APPLE__)) || (defined(__linux__) && !HAVE_PROCESS_VM_READV)
PyErr_SetString(PyExc_RuntimeError, "get_stack_trace is not supported on this platform");
return NULL;
#endif
int pid;
if (!PyArg_ParseTuple(args, "i", &pid)) {
return NULL;
}
void* runtime_start_address = get_py_runtime(pid);
if (runtime_start_address == NULL) {
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError, "Failed to get .PyRuntime address");
}
return NULL;
}
size_t size = sizeof(struct _Py_DebugOffsets);
struct _Py_DebugOffsets local_debug_offsets;
ssize_t bytes_read = read_memory(pid, runtime_start_address, size, &local_debug_offsets);
if (bytes_read == -1) {
return NULL;
}
off_t thread_state_list_head = local_debug_offsets.runtime_state.interpreters_head;
void* address_of_interpreter_state;
bytes_read = read_memory(
pid,
(void*)(runtime_start_address + thread_state_list_head),
sizeof(void*),
&address_of_interpreter_state);
if (bytes_read == -1) {
return NULL;
}
if (address_of_interpreter_state == NULL) {
PyErr_SetString(PyExc_RuntimeError, "No interpreter state found");
return NULL;
}
void* address_of_thread;
bytes_read = read_memory(
pid,
(void*)(address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_head),
sizeof(void*),
&address_of_thread);
if (bytes_read == -1) {
return NULL;
}
PyObject* result = PyList_New(0);
if (result == NULL) {
return NULL;
}
// No Python frames are available for us (can happen at tear-down).
if (address_of_thread != NULL) {
void* address_of_current_frame;
(void)read_memory(
pid,
(void*)(address_of_thread + local_debug_offsets.thread_state.current_frame),
sizeof(void*),
&address_of_current_frame);
while (address_of_current_frame != NULL) {
if (parse_frame_object(
pid,
result,
&local_debug_offsets,
address_of_current_frame,
&address_of_current_frame)
< 0)
{
Py_DECREF(result);
return NULL;
}
}
}
return result;
}
static PyMethodDef methods[] = {
{"get_stack_trace", get_stack_trace, METH_VARARGS, "Get the Python stack from a given PID"},
{NULL, NULL, 0, NULL},
};
static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_testexternalinspection",
.m_size = -1,
.m_methods = methods,
};
PyMODINIT_FUNC
PyInit__testexternalinspection(void)
{
PyObject* mod = PyModule_Create(&module);
int rc = PyModule_AddIntConstant(mod, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV);
if (rc < 0) {
Py_DECREF(mod);
return NULL;
}
return mod;
}