diff options
author | L. E. Segovia <amy@centricular.com> | 2024-04-29 16:22:19 -0300 |
---|---|---|
committer | GStreamer Marge Bot <gitlab-merge-bot@gstreamer-foundation.org> | 2024-05-15 06:30:20 +0000 |
commit | 170804fa41410f1ced15fe37e4d4bd64786c05d2 (patch) | |
tree | a7684dc078ae4c372e0fdd05cb95d818ff729800 | |
parent | cfc3780a9cee71ec977bc7416a5663bb8e21d4a9 (diff) |
orcompiler: support stack unwinding on Windows
Part-of: <https://gitlab.freedesktop.org/gstreamer/orc/-/merge_requests/181>
-rw-r--r-- | meson.build | 8 | ||||
-rw-r--r-- | orc/orccode.c | 11 | ||||
-rw-r--r-- | orc/orccompiler.c | 173 | ||||
-rw-r--r-- | orc/orcprogram-x86.c | 3 | ||||
-rw-r--r-- | orc/orcx86.c | 22 |
5 files changed, 208 insertions, 9 deletions
diff --git a/meson.build b/meson.build index 38e7de4..a50c198 100644 --- a/meson.build +++ b/meson.build @@ -110,6 +110,14 @@ if host_os == 'windows' if cc.compiles(code, name : 'building for UWP') cdata.set('ORC_WINAPI_ONLY_APP', true) endif + + # RtlAddFunctionTable is only available for UWP starting with + # Windows SDK 10.0.22621.0. A simple check is to validate if it's usable. + if cc.has_function('RtlAddFunctionTable', prefix: '#include <windows.h>') \ + and cc.has_function('RtlLookupFunctionEntry', prefix: '#include <windows.h>') \ + and cc.has_function('RtlDeleteFunctionTable', prefix: '#include <windows.h>') + cdata.set('ORC_SUPPORTS_BACKTRACE_FROM_JIT', true) + endif else # If it is not windows, we just assume it is a unix of sorts for now. cdata.set('HAVE_CODEMEM_MMAP', true) diff --git a/orc/orccode.c b/orc/orccode.c index f0438dd..ccf091f 100644 --- a/orc/orccode.c +++ b/orc/orccode.c @@ -9,6 +9,9 @@ #include <orc/orcdebug.h> #include <orc/orcinternal.h> +#ifdef _WIN64 +#include <windows.h> +#endif OrcCode * orc_code_new (void) @@ -31,6 +34,14 @@ orc_code_free (OrcCode *code) code->vars = NULL; } if (code->chunk) { +#if defined(_WIN64) && defined(ORC_SUPPORTS_BACKTRACE_FROM_JIT) + DWORD64 dyn_base = 0; + PRUNTIME_FUNCTION p = + RtlLookupFunctionEntry((DWORD64)code->code, &dyn_base, NULL); + if (p != NULL) { + RtlDeleteFunctionTable((PRUNTIME_FUNCTION)((DWORD64)code->code | 0x3)); + } +#endif orc_code_chunk_free (code->chunk); code->chunk = NULL; } diff --git a/orc/orccompiler.c b/orc/orccompiler.c index b3e29e0..338ce36 100644 --- a/orc/orccompiler.c +++ b/orc/orccompiler.c @@ -35,6 +35,50 @@ #include <valgrind/valgrind.h> #endif +#if defined(_WIN64) +#if defined(_M_ARM64) +#define THUNK_SIZE 20 +typedef struct { + orc_uint32 FunctionLength : 18; + orc_uint32 Version : 2; + orc_uint32 HasExceptionHandler : 1; + orc_uint32 PackedEpilog : 1; + orc_uint32 EpilogCount : 5; + orc_uint32 CodeWords : 5; + orc_uint8 UnwindCode[4]; + orc_uint32 ExceptionHandler; +} UNWIND_INFO; +#else // _M_X64 +#define THUNK_SIZE 12 +#define UWOP_SET_FPREG 3 +#define UWOP_PUSH_NONVOL 0 +#define UWOP_REG_RBP 5 +typedef union { + struct { + orc_uint8 CodeOffset; + orc_uint8 UnwindOp : 4; + orc_uint8 OpInfo : 4; + }; + orc_uint16 FrameOffset; +} UNWIND_CODE; +typedef struct { + orc_uint8 Version : 3; + orc_uint8 Flags : 5; + orc_uint8 SizeOfProlog; + orc_uint8 CountOfCodes; + orc_uint8 FrameRegister : 4; + orc_uint8 FrameOffset : 4; + UNWIND_CODE UnwindCode[2]; + orc_uint32 ExceptionHandler; +} UNWIND_INFO; +#endif // defined(_M_ARM64) +typedef struct { + RUNTIME_FUNCTION function_table; + UNWIND_INFO unwind_info; + orc_uint8 thunk[THUNK_SIZE]; +} OrcUnwindInfo; +#endif + /** * SECTION:orccompiler * @title: OrcCompiler @@ -72,6 +116,16 @@ int _orc_compiler_flag_randomize; /* For Windows */ int _orc_codemem_alignment; +#if defined(_WIN64) +static DWORD orc_exception_handler(PEXCEPTION_RECORD exceptionRecord, + PEXCEPTION_REGISTRATION_RECORD _unused, + PCONTEXT context, + PEXCEPTION_REGISTRATION_RECORD *_unused2) +{ + return ExceptionContinueSearch; +} +#endif + void _orc_compiler_init (void) { @@ -425,7 +479,95 @@ orc_compiler_compile_program (OrcCompiler *compiler, OrcProgram *program, OrcTar goto error; } +#if defined(_WIN64) && defined(ORC_SUPPORTS_BACKTRACE_FROM_JIT) + OrcUnwindInfo table; + // The structures must be DWORD aligned in memory. + const unsigned char *alignas_offset = + (unsigned char *)((DWORD64)(compiler->codeptr + 3) & (~3)); + + if (compiler->use_frame_pointer) { + memset(&table, 0, sizeof(OrcUnwindInfo)); + const DWORD64 start_of_orcunwindinfo = alignas_offset - compiler->code; + + table.function_table.BeginAddress = 0; + + // The following Arm64 block is based on the Firefox changeset: + // https://hg.mozilla.org/mozilla-central/rev/4d932b82695c + // which I fixed to work with pure C. + + // The program counter (and the stack pointer, on Arm64) must be modified + // for the handler to be recognised as valid. +#ifdef _M_ARM64 + table.function_table.UnwindData = + start_of_orcunwindinfo + ORC_STRUCT_OFFSET(OrcUnwindInfo, unwind_info); + + // one 32-bit word gives us up to 4 codes + table.unwind_info.CodeWords = 1; + // alloc_s small stack of size 1*16 + table.unwind_info.UnwindCode[0] = 0x1; + // end + table.unwind_info.UnwindCode[1] = 0xe4; + + // xip0/r16 should be safe to clobber: Windows just used it to call the thunk. + const orc_uint8 reg = 16; + + const void *handler = (void *)&orc_exception_handler; + const uint16_t *addr = (uint16_t *)&handler; + + // Say `handler` is 0x4444333322221111, then: + table.thunk[0] = 0xd2800000 | addr[0] << 5 | reg; // mov xip0, 1111 + table.thunk[1] = 0xf2a00000 | addr[1] << 5 | reg; // movk xip0, 2222 lsl #0x10 + table.thunk[2] = 0xf2c00000 | addr[2] << 5 | reg; // movk xip0, 3333 lsl #0x20 + table.thunk[3] = 0xf2e00000 | addr[3] << 5 | reg; // movk xip0, 4444 lsl #0x30 + table.thunk[4] = 0xd61f0000 | reg << 5; // br xip0 +#else + table.function_table.EndAddress = compiler->codeptr - compiler->code; + table.function_table.UnwindInfoAddress = + start_of_orcunwindinfo + ORC_STRUCT_OFFSET(OrcUnwindInfo, unwind_info); + + table.unwind_info.Version = 1; + // We handle the exception (let the UCRT terminate the app) + table.unwind_info.Flags = UNW_FLAG_EHANDLER; + table.unwind_info.SizeOfProlog = 8; + // See below + table.unwind_info.CountOfCodes = 2; + // RAX = 50, RBP = 55 + // https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170#operation-info + // https://www.felixcloutier.com/x86/mov + table.unwind_info.FrameRegister = UWOP_REG_RBP; + // No offset from RSP + table.unwind_info.FrameOffset = 0; + // Bytes 0-4 are the ENDBR64 CET instruction + // The docs kind of lie when they say that offsets are not allowed + // until SET_FPREG. I guess they apply to OpInfo instead? + table.unwind_info.UnwindCode[0].CodeOffset = 5; + table.unwind_info.UnwindCode[0].UnwindOp = UWOP_SET_FPREG; + table.unwind_info.UnwindCode[0].OpInfo = 0; // mov rbp, rsp + table.unwind_info.UnwindCode[1].CodeOffset = 4; + table.unwind_info.UnwindCode[1].UnwindOp = UWOP_PUSH_NONVOL; + table.unwind_info.UnwindCode[1].OpInfo = UWOP_REG_RBP; // push rbp + // Same as above -- the exception handler is required, and must do + // something to actually be dispatched. + table.unwind_info.ExceptionHandler = + start_of_orcunwindinfo + ORC_STRUCT_OFFSET(OrcUnwindInfo, thunk); + // mov %rax, (address of handler as imm64) + // jmp %rax + static const orc_uint8 thunk[THUNK_SIZE] = { + 0x48, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xe0, + }; + memcpy (&table.thunk, &thunk, THUNK_SIZE); + memcpy (&table.thunk[2], &orc_exception_handler, 8); +#endif + + program->orccode->code_size = (unsigned char *)alignas_offset - + compiler->code + sizeof(OrcUnwindInfo); + } else { + program->orccode->code_size = compiler->codeptr - compiler->code; + } +#else program->orccode->code_size = compiler->codeptr - compiler->code; +#endif + orc_code_allocate_codemem (program->orccode, program->orccode->code_size); #if defined(__APPLE__) && TARGET_OS_OSX @@ -441,7 +583,36 @@ orc_compiler_compile_program (OrcCompiler *compiler, OrcProgram *program, OrcTar _set_virtual_protect (program->orccode->code, program->orccode->code_size, PAGE_READWRITE); #endif - memcpy (program->orccode->code, compiler->code, program->orccode->code_size); +#if defined(_WIN64) && defined(ORC_SUPPORTS_BACKTRACE_FROM_JIT) + if (compiler->use_frame_pointer) { + void *const program_unwind_info = + program->orccode->code + (alignas_offset - compiler->code); + PRUNTIME_FUNCTION const runtime_function_address = + (PRUNTIME_FUNCTION)((orc_uint8*)program_unwind_info + + ORC_STRUCT_OFFSET(OrcUnwindInfo, function_table)); + const size_t real_code_size = compiler->codeptr - compiler->code; + memcpy (program->orccode->code, compiler->code, real_code_size); + memcpy (program_unwind_info, &table, sizeof (OrcUnwindInfo)); + if (RtlAddFunctionTable (runtime_function_address, 1, (DWORD64)program->orccode->code)) { + DWORD64 dyn_base = 0; + PRUNTIME_FUNCTION const p = RtlLookupFunctionEntry ( + (DWORD64)program->orccode->code + 20, &dyn_base, NULL); + if (p != runtime_function_address) { + ORC_ERROR ("Runtime function for program %s %p is bogus " + "(dynbase=%llx, info=%p, expected=%p)", + program->name, program->orccode->code, dyn_base, p, + runtime_function_address); + } + } else { + ORC_WARNING ("Unable to install unwind info for program %s %p", + program->name, program->orccode->code); + } + } else { + memcpy(program->orccode->code, compiler->code, program->orccode->code_size); + } +#else + memcpy(program->orccode->code, compiler->code, program->orccode->code_size); +#endif #ifdef VALGRIND_DISCARD_TRANSLATIONS VALGRIND_DISCARD_TRANSLATIONS (program->orccode->exec, diff --git a/orc/orcprogram-x86.c b/orc/orcprogram-x86.c index eb55f26..3701b53 100644 --- a/orc/orcprogram-x86.c +++ b/orc/orcprogram-x86.c @@ -82,6 +82,9 @@ orc_x86_compiler_init (OrcCompiler *c) c->valid_regs[i] = 1; } c->valid_regs[X86_ESP] = 0; + if (c->use_frame_pointer) { + c->valid_regs[X86_EBP] = 0; + } orc_x86_validate_registers (t, c); diff --git a/orc/orcx86.c b/orc/orcx86.c index 0b77b90..d4fd270 100644 --- a/orc/orcx86.c +++ b/orc/orcx86.c @@ -453,11 +453,16 @@ orc_x86_emit_prologue (OrcCompiler *compiler) if (compiler->is_64bit) { int i; - orc_x86_emit_cpuinsn_none (compiler, ORC_X86_endbr64); + orc_x86_emit_cpuinsn_none(compiler, ORC_X86_endbr64); + orc_x86_emit_push (compiler, 8, X86_EBP); + if (compiler->use_frame_pointer) { + orc_x86_emit_mov_reg_reg (compiler, 8, X86_ESP, X86_EBP); + } - for(i=0;i<16;i++){ - if (compiler->used_regs[ORC_GP_REG_BASE+i] && - compiler->save_regs[ORC_GP_REG_BASE+i]) { + for (i = 0; i < 16; i++) { + const int reg = ORC_GP_REG_BASE+i; + if (compiler->used_regs[reg] && + compiler->save_regs[reg] && reg != X86_EBP) { orc_x86_emit_push (compiler, 8, ORC_GP_REG_BASE+i); } } @@ -497,12 +502,13 @@ orc_x86_emit_epilogue (OrcCompiler *compiler) if (compiler->is_64bit) { int i; - for(i=15;i>=0;i--){ - if (compiler->used_regs[ORC_GP_REG_BASE+i] && - compiler->save_regs[ORC_GP_REG_BASE+i]) { - orc_x86_emit_pop (compiler, 8, ORC_GP_REG_BASE+i); + for (i = 15; i >= 0; i--) { + const int reg = ORC_GP_REG_BASE+i; + if (compiler->used_regs[reg] && compiler->save_regs[reg] && reg != X86_EBP) { + orc_x86_emit_pop (compiler, 8, reg); } } + orc_x86_emit_pop (compiler, 8, X86_EBP); } else { if (compiler->used_regs[X86_EBX]) { orc_x86_emit_pop (compiler, 4, X86_EBX); |