/* * Copyright (c) 2019 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ // #define STANDALONE #ifndef STANDALONE #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef STANDALONE T_GLOBAL_META( T_META_NAMESPACE("xnu.intel"), T_META_RADAR_COMPONENT_NAME("xnu"), T_META_RADAR_COMPONENT_VERSION("intel"), T_META_OWNER("seth_goldberg"), T_META_CHECK_LEAKS(false) ); #endif #define COMPAT_MODE_CS_SELECTOR 0x1f #define SYSENTER_SELECTOR 0xb /* #define DEBUG 1 */ #define P2ROUNDUP(x, align) (-(-((long)x) & -((long)align))) #define MSG 2048 #define NORMAL_RUN_TIME (10) #define TIMEOUT_OVERHEAD (10) /* * General theory of operation: * ---------------------------- * (1) Ensure that all code and data to be accessed from compatibility mode is * located in the low 4GiB of virtual address space. * (2) Allocate required segments via the i386_set_ldt() system call, making * sure to set the descriptor type correctly (code vs. data). Creating * 64-bit code segments is not allowed (just use the existing 0x2b selector.) * (3) Once you know which selector is associated with the desired code, use a * trampoline (or thunk) to (a) switch to a stack that's located below 4GiB * and (b) save ABI-mandated caller-saved state so that if it's trashed by * compatibility-mode code, it can be restored before returning to 64-bit * mode (if desired), and finally (c) long-jump or long-call (aka far call) * to the segment and desired offset (this example uses an offset of 0 for * simplicity.) * (4) Once in compatibility mode, if a framework call or system call is required, * the code must trampoline back to 64-bit mode to do so. System calls from * compatibility mode code are not supported and will result in invalid opcode * exceptions. This example includes a simple 64-bit trampoline (which must * be located in the low 4GiB of virtual address space, since it's executed * by compatibility-mode code.) Note that since the 64-bit ABI mandates that * the stack must be aligned to a 16-byte boundary, the sample trampoline * performs that rounding, to simplify compatibility-mode code. Additionally, * since 64-bit native code makes use of thread-local storage, the user-mode * GSbase must be restored. This sample includes two ways to do that-- (a) by * calling into a C implementation that associates the thread-local storage * pointer with a stack range (which will be unique for each thread.), and * (b) by storing the original GSbase in a block of memory installed into * GSbase before calling into compatibility-mode code. A special machdep * system call restores GSbase as needed. Note that the sample trampoline * does not save and restore %gs (or most other register state, so that is an * area that may be tailored to the application's requirements.) * (5) Once running in compatibility mode, should synchronous or asynchronous * exceptions occur, this sample shows how a mach exception handler (running * in a detached thread, handling exceptions for the entire task) can catch * such exceptions and manipulate thread state to perform recovery (or not.) * Other ways to handle exceptions include installing per-thread exception * servers. Alternatively, BSD signal handlers can be used. Note that once a * process installs a custom LDT, *ALL* future signal deliveries will include * ucontext pointers to mcontext structures that include enhanced thread * state embedded (e.g. the %ds, %es, %ss, and GSBase registers) [This assumes * that the SA_SIGINFO is passed to sigaction(2) when registering handlers]. * The mcontext size (part of the ucontext) can be used to differentiate between * different mcontext flavors (e.g. those with/without full thread state plus * x87 FP state, AVX state, or AVX2/3 state). */ /* * This test exercises the custom LDT functionality exposed via the i386_{get,set}_ldt * system calls. * * Tests include: * (1a) Exception handling (due to an exception or another thread sending a signal) while * running in compatibility mode; * (1b) Signal handling while running in compatibility mode; * (2) Thunking back to 64-bit mode and executing a framework function (e.g. printf) * (3) Ensuring that transitions to compatibility mode and back to 64-bit mode * do not negatively impact system calls and framework calls in 64-bit mode * (4) Use of thread_get_state / thread_set_state to configure a thread to * execute in compatibility mode with the proper LDT code segment (this is * effectively what the exception handler does when the passed-in new_state * is changed (or what the BSD signal handler return handling does when the * mcontext is modified).) * (5) Ensure that compatibility mode code cannot make system calls via sysenter or * old-style int {0x80..0x82}. * (6) Negative testing to ensure errors are returned if the consumer tries * to set a disallowed segment type / Long flag. [TBD] */ /* * Note that these addresses are not necessarily available due to ASLR, so * a robust implementation should determine the proper range to use via * another means. */ #ifndef STANDALONE /* libdarwintest needs LOTs of stack */ #endif #define FIXED_STACK_SIZE (PAGE_SIZE * 16) #define FIXED_TRAMP_MAXLEN (PAGE_SIZE * 8) #pragma pack(1) typedef struct { uint64_t off; uint16_t seg; } far_call_t; #pragma pack() typedef struct { uint64_t stack_base; uint64_t stack_limit; uint64_t GSbase; } stackaddr_to_gsbase_t; typedef struct thread_arg { pthread_mutex_t mutex; pthread_cond_t condvar; volatile boolean_t done; uint32_t compat_stackaddr; /* Compatibility mode stack address */ } thread_arg_t; typedef struct custom_tsd { struct custom_tsd * this_tsd_base; uint64_t orig_tsd_base; } custom_tsd_t; typedef uint64_t (*compat_tramp_t)(far_call_t *fcp, void *lowmemstk, uint64_t arg_for_32bit, uint64_t callback, uint64_t absolute_addr_of_thunk64); #define GS_RELATIVE volatile __attribute__((address_space(256))) static custom_tsd_t GS_RELATIVE *mytsd = (custom_tsd_t GS_RELATIVE *)0; static far_call_t input_desc = { .seg = COMPAT_MODE_CS_SELECTOR, .off = 0 }; static uint64_t stackAddr = 0; static compat_tramp_t thunkit = NULL; static uint64_t thunk64_addr; /* stack2gs[0] is initialized in map_lowmem_stack() */ static stackaddr_to_gsbase_t stack2gs[] = { { 0 } }; extern int compat_mode_trampoline(far_call_t *, void *, uint64_t); extern void long_mode_trampoline(void); extern boolean_t mach_exc_server(mach_msg_header_t *InHeadP, mach_msg_header_t *OutHeadP); extern void code_32(void); kern_return_t catch_mach_exception_raise_state_identity(mach_port_t exception_port, mach_port_t thread, mach_port_t task, exception_type_t exception, mach_exception_data_t code, mach_msg_type_number_t code_count, int * flavor, thread_state_t old_state, mach_msg_type_number_t old_state_count, thread_state_t new_state, mach_msg_type_number_t * new_state_count); kern_return_t catch_mach_exception_raise_state(mach_port_t exception_port, exception_type_t exception, const mach_exception_data_t code, mach_msg_type_number_t codeCnt, int *flavor, const thread_state_t old_state, mach_msg_type_number_t old_stateCnt, thread_state_t new_state, mach_msg_type_number_t *new_stateCnt); kern_return_t catch_mach_exception_raise(mach_port_t exception_port, mach_port_t thread, mach_port_t task, exception_type_t exception, mach_exception_data_t code, mach_msg_type_number_t codeCnt, int *flavor, thread_state_t old_state, mach_msg_type_number_t old_stateCnt, thread_state_t new_state, mach_msg_type_number_t *new_stateCnt); extern void _thread_set_tsd_base(uint64_t); static uint64_t stack_range_to_GSbase(uint64_t stackptr, uint64_t GSbase); void restore_gsbase(uint64_t stackptr); static uint64_t get_gsbase(void) { struct thread_identifier_info tiinfo; unsigned int info_count = THREAD_IDENTIFIER_INFO_COUNT; kern_return_t kr; if ((kr = thread_info(mach_thread_self(), THREAD_IDENTIFIER_INFO, (thread_info_t) &tiinfo, &info_count)) != KERN_SUCCESS) { fprintf(stderr, "Could not get tsd base address. This will not end well.\n"); return 0; } return (uint64_t)tiinfo.thread_handle; } void restore_gsbase(uint64_t stackptr) { /* Restore GSbase so tsd is accessible in long mode */ uint64_t orig_GSbase = stack_range_to_GSbase(stackptr, 0); assert(orig_GSbase != 0); _thread_set_tsd_base(orig_GSbase); } /* * Though we've directed all exceptions through the catch_mach_exception_raise_state_identity * entry point, we still must provide these two other entry points, otherwise a linker error * will occur. */ kern_return_t catch_mach_exception_raise(mach_port_t exception_port, mach_port_t thread, mach_port_t task, exception_type_t exception, mach_exception_data_t code, mach_msg_type_number_t codeCnt, int *flavor, thread_state_t old_state, mach_msg_type_number_t old_stateCnt, thread_state_t new_state, mach_msg_type_number_t *new_stateCnt) { #pragma unused(exception_port, thread, task, exception, code, codeCnt, flavor, old_state, old_stateCnt, new_state, new_stateCnt) fprintf(stderr, "Unexpected exception handler called: %s\n", __func__); return KERN_FAILURE; } kern_return_t catch_mach_exception_raise_state(mach_port_t exception_port, exception_type_t exception, const mach_exception_data_t code, mach_msg_type_number_t codeCnt, int *flavor, const thread_state_t old_state, mach_msg_type_number_t old_stateCnt, thread_state_t new_state, mach_msg_type_number_t *new_stateCnt) { #pragma unused(exception_port, exception, code, codeCnt, flavor, old_state, old_stateCnt, new_state, new_stateCnt) fprintf(stderr, "Unexpected exception handler called: %s\n", __func__); return KERN_FAILURE; } static void handle_arithmetic_exception(_STRUCT_X86_THREAD_FULL_STATE64 *xtfs64, uint64_t *ip_skip_countp) { fprintf(stderr, "Caught divide-error exception\n"); fprintf(stderr, "cs=0x%x rip=0x%x gs=0x%x ss=0x%x rsp=0x%llx\n", (unsigned)xtfs64->__ss64.__cs, (unsigned)xtfs64->__ss64.__rip, (unsigned)xtfs64->__ss64.__gs, (unsigned)xtfs64->__ss, xtfs64->__ss64.__rsp); *ip_skip_countp = 2; } static void handle_badinsn_exception(_STRUCT_X86_THREAD_FULL_STATE64 *xtfs64, uint64_t __unused *ip_skip_countp) { extern void first_invalid_opcode(void); extern void last_invalid_opcode(void); uint64_t start_addr = ((uintptr_t)first_invalid_opcode - (uintptr_t)code_32); uint64_t end_addr = ((uintptr_t)last_invalid_opcode - (uintptr_t)code_32); fprintf(stderr, "Caught invalid opcode exception\n"); fprintf(stderr, "cs=%x rip=%x gs=%x ss=0x%x rsp=0x%llx | handling between 0x%llx and 0x%llx\n", (unsigned)xtfs64->__ss64.__cs, (unsigned)xtfs64->__ss64.__rip, (unsigned)xtfs64->__ss64.__gs, (unsigned)xtfs64->__ss, xtfs64->__ss64.__rsp, start_addr, end_addr); /* * We expect to handle 4 invalid opcode exceptions: * (1) sysenter * (2) int $0x80 * (3) int $0x81 * (4) int $0x82 * (Note that due to the way the invalid opcode indication was implemented, * %rip is already set to the next instruction.) */ if (xtfs64->__ss64.__rip >= start_addr && xtfs64->__ss64.__rip <= end_addr) { /* * On return from the failed sysenter, %cs is changed to the * sysenter code selector and %ss is set to 0x23, so switch them * back to sane values. */ if ((unsigned)xtfs64->__ss64.__cs == SYSENTER_SELECTOR) { xtfs64->__ss64.__cs = COMPAT_MODE_CS_SELECTOR; xtfs64->__ss = 0x23; /* XXX */ } } } kern_return_t catch_mach_exception_raise_state_identity(mach_port_t exception_port, mach_port_t thread, mach_port_t task, exception_type_t exception, mach_exception_data_t code, mach_msg_type_number_t codeCnt, int * flavor, thread_state_t old_state, mach_msg_type_number_t old_stateCnt, thread_state_t new_state, mach_msg_type_number_t * new_stateCnt) { #pragma unused(exception_port, thread, task) _STRUCT_X86_THREAD_FULL_STATE64 *xtfs64 = (_STRUCT_X86_THREAD_FULL_STATE64 *)(void *)old_state; _STRUCT_X86_THREAD_FULL_STATE64 *new_xtfs64 = (_STRUCT_X86_THREAD_FULL_STATE64 *)(void *)new_state; uint64_t rip_skip_count = 0; /* * Check the exception code and thread state. * If we were executing 32-bit code (or 64-bit code on behalf of * 32-bit code), we could update the thread state to effectively longjmp * back to a safe location where the victim thread can recover. * Then again, we could return KERN_NOT_SUPPORTED and allow the process * to be nuked. */ switch (exception) { case EXC_ARITHMETIC: if (codeCnt >= 1 && code[0] == EXC_I386_DIV) { handle_arithmetic_exception(xtfs64, &rip_skip_count); } break; case EXC_BAD_INSTRUCTION: { if (codeCnt >= 1 && code[0] == EXC_I386_INVOP) { handle_badinsn_exception(xtfs64, &rip_skip_count); } break; } default: fprintf(stderr, "Unsupported catch_mach_exception_raise_state_identity: code 0x%llx sub 0x%llx\n", code[0], codeCnt > 1 ? code[1] : 0LL); fprintf(stderr, "flavor=%d %%cs=0x%x %%rip=0x%llx\n", *flavor, (unsigned)xtfs64->__ss64.__cs, xtfs64->__ss64.__rip); } /* * If this exception happened in compatibility mode, * assume it was the intentional division-by-zero and set the * new state's cs register to just after the div instruction * to enable the thread to resume. */ if ((unsigned)xtfs64->__ss64.__cs == COMPAT_MODE_CS_SELECTOR) { *new_stateCnt = old_stateCnt; *new_xtfs64 = *xtfs64; new_xtfs64->__ss64.__rip += rip_skip_count; fprintf(stderr, "new cs=0x%x rip=0x%llx\n", (unsigned)new_xtfs64->__ss64.__cs, new_xtfs64->__ss64.__rip); return KERN_SUCCESS; } else { return KERN_NOT_SUPPORTED; } } static void * handle_exceptions(void *arg) { mach_port_t ePort = (mach_port_t)arg; kern_return_t kret; kret = mach_msg_server(mach_exc_server, MACH_MSG_SIZE_RELIABLE, ePort, 0); if (kret != KERN_SUCCESS) { fprintf(stderr, "mach_msg_server: %s (%d)", mach_error_string(kret), kret); } return NULL; } static void init_task_exception_server(void) { kern_return_t kr; task_t me = mach_task_self(); pthread_t handler_thread; pthread_attr_t attr; mach_port_t ePort; kr = mach_port_allocate(me, MACH_PORT_RIGHT_RECEIVE, &ePort); if (kr != KERN_SUCCESS) { fprintf(stderr, "allocate receive right: %d\n", kr); return; } kr = mach_port_insert_right(me, ePort, ePort, MACH_MSG_TYPE_MAKE_SEND); if (kr != KERN_SUCCESS) { fprintf(stderr, "insert right into port=[%d]: %d\n", ePort, kr); return; } kr = task_set_exception_ports(me, EXC_MASK_BAD_INSTRUCTION | EXC_MASK_ARITHMETIC, ePort, (exception_behavior_t)(EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES), x86_THREAD_FULL_STATE64); if (kr != KERN_SUCCESS) { fprintf(stderr, "abort: error setting task exception ports on task=[%d], handler=[%d]: %d\n", me, ePort, kr); exit(1); } pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); if (pthread_create(&handler_thread, &attr, handle_exceptions, (void *)(uintptr_t)ePort) != 0) { perror("pthread create error"); return; } pthread_attr_destroy(&attr); } static union ldt_entry *descs = 0; static uint64_t idx; static int saw_ud2 = 0; static boolean_t ENV_set_ldt_in_sighandler = FALSE; static void signal_handler(int signo, siginfo_t *sinfop, void *ucontext) { uint64_t rip_skip_count = 0; ucontext_t *uctxp = (ucontext_t *)ucontext; union { _STRUCT_MCONTEXT_AVX512_64 *avx512_basep; _STRUCT_MCONTEXT_AVX512_64_FULL *avx512_fullp; _STRUCT_MCONTEXT_AVX64 *avx64_basep; _STRUCT_MCONTEXT_AVX64_FULL *avx64_fullp; _STRUCT_MCONTEXT64 *fp_basep; _STRUCT_MCONTEXT64_FULL *fp_fullp; } mctx; mctx.fp_fullp = (_STRUCT_MCONTEXT64_FULL *)uctxp->uc_mcontext; /* * Note that GSbase must be restored before calling into any frameworks * that might access anything %gs-relative (e.g. TSD) if the signal * handler was triggered while the thread was running with a non-default * (system-established) GSbase. */ if ((signo != SIGFPE && signo != SIGILL) || sinfop->si_signo != signo) { #ifndef STANDALONE T_ASSERT_FAIL("Unexpected signal %d\n", signo); #else restore_gsbase(mctx.fp_fullp->__ss.__ss64.__rsp); fprintf(stderr, "Not handling signal %d\n", signo); abort(); #endif } if (uctxp->uc_mcsize == sizeof(_STRUCT_MCONTEXT_AVX512_64) || uctxp->uc_mcsize == sizeof(_STRUCT_MCONTEXT_AVX64) || uctxp->uc_mcsize == sizeof(_STRUCT_MCONTEXT64)) { _STRUCT_X86_THREAD_STATE64 *ss64 = &mctx.fp_basep->__ss; /* * The following block is an illustration of what NOT to do. * Configuring an LDT for the first time in a signal handler * will likely cause the process to crash. */ if (ENV_set_ldt_in_sighandler == TRUE && !saw_ud2) { /* Set the LDT: */ int cnt = i386_set_ldt((int)idx, &descs[idx], 1); if (cnt != (int)idx) { #ifdef DEBUG fprintf(stderr, "i386_set_ldt unexpectedly returned %d (errno = %s)\n", cnt, strerror(errno)); #endif #ifndef STANDALONE T_LOG("i386_set_ldt unexpectedly returned %d (errno: %s)\n", cnt, strerror(errno)); T_ASSERT_FAIL("i386_set_ldt failure"); #else exit(1); #endif } #ifdef DEBUG printf("i386_set_ldt returned %d\n", cnt); #endif ss64->__rip += 2; /* ud2 is 2 bytes */ saw_ud2 = 1; /* * When we return here, the sigreturn processing code will try to copy a FULL * thread context from the signal stack, which will likely cause the resumed * thread to fault and be terminated. */ return; } restore_gsbase(ss64->__rsp); /* * If we're in this block, either we are dispatching a signal received * before we installed a custom LDT or we are on a kernel without * BSD-signalling-sending-full-thread-state support. It's likely the latter case. */ #ifndef STANDALONE T_ASSERT_FAIL("This system doesn't support BSD signals with full thread state."); #else fprintf(stderr, "This system doesn't support BSD signals with full thread state. Aborting.\n"); abort(); #endif } else if (uctxp->uc_mcsize == sizeof(_STRUCT_MCONTEXT_AVX512_64_FULL) || uctxp->uc_mcsize == sizeof(_STRUCT_MCONTEXT_AVX64_FULL) || uctxp->uc_mcsize == sizeof(_STRUCT_MCONTEXT64_FULL)) { _STRUCT_X86_THREAD_FULL_STATE64 *ss64 = &mctx.fp_fullp->__ss; /* * Since we're handing this signal on the same thread, we may need to * restore GSbase. */ uint64_t orig_gsbase = stack_range_to_GSbase(ss64->__ss64.__rsp, 0); if (orig_gsbase != 0 && orig_gsbase != ss64->__gsbase) { restore_gsbase(ss64->__ss64.__rsp); } if (signo == SIGFPE) { handle_arithmetic_exception(ss64, &rip_skip_count); } else if (signo == SIGILL) { handle_badinsn_exception(ss64, &rip_skip_count); } /* * If this exception happened in compatibility mode, * assume it was the intentional division-by-zero and set the * new state's cs register to just after the div instruction * to enable the thread to resume. */ if ((unsigned)ss64->__ss64.__cs == COMPAT_MODE_CS_SELECTOR) { ss64->__ss64.__rip += rip_skip_count; fprintf(stderr, "new cs=0x%x rip=0x%llx\n", (unsigned)ss64->__ss64.__cs, ss64->__ss64.__rip); } } else { _STRUCT_X86_THREAD_STATE64 *ss64 = &mctx.fp_basep->__ss; restore_gsbase(ss64->__rsp); #ifndef STANDALONE T_ASSERT_FAIL("Unknown mcontext size %lu: Aborting.", uctxp->uc_mcsize); #else fprintf(stderr, "Unknown mcontext size %lu: Aborting.\n", uctxp->uc_mcsize); abort(); #endif } } static void setup_signal_handling(void) { int rv; struct sigaction sa = { .__sigaction_u = { .__sa_sigaction = signal_handler }, .sa_flags = SA_SIGINFO }; sigfillset(&sa.sa_mask); rv = sigaction(SIGFPE, &sa, NULL); if (rv != 0) { #ifndef STANDALONE T_ASSERT_FAIL("Failed to configure SIGFPE signal handler\n"); #else fprintf(stderr, "Failed to configure SIGFPE signal handler\n"); abort(); #endif } rv = sigaction(SIGILL, &sa, NULL); if (rv != 0) { #ifndef STANDALONE T_ASSERT_FAIL("Failed to configure SIGILL signal handler\n"); #else fprintf(stderr, "Failed to configure SIGILL signal handler\n"); abort(); #endif } } static void teardown_signal_handling(void) { if (signal(SIGFPE, SIG_DFL) == SIG_ERR) { #ifndef STANDALONE T_ASSERT_FAIL("Error resetting SIGFPE signal disposition\n"); #else fprintf(stderr, "Error resetting SIGFPE signal disposition\n"); abort(); #endif } if (signal(SIGILL, SIG_DFL) == SIG_ERR) { #ifndef STANDALONE T_ASSERT_FAIL("Error resetting SIGILL signal disposition\n"); #else fprintf(stderr, "Error resetting SIGILL signal disposition\n"); abort(); #endif } } #ifdef DEBUG static void dump_desc(union ldt_entry *entp) { printf("base %p lim %p type 0x%x dpl %x present %x opsz %x granular %x\n", (void *)(uintptr_t)(entp->code.base00 + (entp->code.base16 << 16) + (entp->code.base24 << 24)), (void *)(uintptr_t)(entp->code.limit00 + (entp->code.limit16 << 16)), entp->code.type, entp->code.dpl, entp->code.present, entp->code.opsz, entp->code.granular); } #endif static int map_lowmem_stack(void **lowmemstk) { void *addr; int err; if ((addr = mmap(0, FIXED_STACK_SIZE + PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_32BIT | MAP_PRIVATE | MAP_ANON, -1, 0)) == MAP_FAILED) { return errno; } if ((uintptr_t)addr > 0xFFFFF000ULL) { /* Error: This kernel does not support MAP_32BIT or there's a bug. */ #ifndef STANDALONE T_ASSERT_FAIL("%s: failed to map a 32-bit-accessible stack", __func__); #else fprintf(stderr, "This kernel returned a virtual address > 4G (%p) despite MAP_32BIT. Aborting.\n", addr); exit(1); #endif } /* Enforce one page of redzone at the bottom of the stack */ if (mprotect(addr, PAGE_SIZE, PROT_NONE) < 0) { err = errno; (void) munmap(addr, FIXED_STACK_SIZE + PAGE_SIZE); return err; } if (lowmemstk) { stack2gs[0].stack_base = (uintptr_t)addr + PAGE_SIZE; stack2gs[0].stack_limit = stack2gs[0].stack_base + FIXED_STACK_SIZE; *lowmemstk = (void *)((uintptr_t)addr + PAGE_SIZE); } return 0; } static int map_32bit_code_impl(uint8_t *code_src, size_t code_len, void **codeptr, size_t szlimit) { void *addr; size_t sz = (size_t)P2ROUNDUP(code_len, (unsigned)PAGE_SIZE); if (code_len > szlimit) { return E2BIG; } #ifdef DEBUG printf("size = %lu, szlimit = %u\n", sz, (unsigned)szlimit); #endif if ((addr = mmap(0, sz, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_32BIT | MAP_PRIVATE | MAP_ANON, -1, 0)) == MAP_FAILED) { return errno; } if ((uintptr_t)addr > 0xFFFFF000ULL) { /* Error: This kernel does not support MAP_32BIT or there's a bug. */ #ifndef STANDALONE T_ASSERT_FAIL("%s: failed to map a 32-bit-accessible trampoline", __func__); #else fprintf(stderr, "This kernel returned a virtual address > 4G (%p) despite MAP_32BIT. Aborting.\n", addr); exit(1); #endif } #ifdef DEBUG printf("Mapping code @%p..%p => %p..%p\n", (void *)code_src, (void *)((uintptr_t)code_src + (unsigned)code_len), addr, (void *)((uintptr_t)addr + (unsigned)code_len)); #endif bcopy(code_src, addr, code_len); /* Fill the rest of the page with NOPs */ if ((sz - code_len) > 0) { memset((void *)((uintptr_t)addr + code_len), 0x90, sz - code_len); } if (codeptr) { *codeptr = addr; } return 0; } static int map_32bit_trampoline(compat_tramp_t *lowmemtrampp) { extern int compat_mode_trampoline_len; return map_32bit_code_impl((uint8_t *)&compat_mode_trampoline, (size_t)compat_mode_trampoline_len, (void **)lowmemtrampp, FIXED_TRAMP_MAXLEN); } static uint64_t stack_range_to_GSbase(uint64_t stackptr, uint64_t GSbase) { unsigned long i; for (i = 0; i < sizeof(stack2gs) / sizeof(stack2gs[0]); i++) { if (stackptr >= stack2gs[i].stack_base && stackptr < stack2gs[i].stack_limit) { if (GSbase != 0) { #ifdef DEBUG fprintf(stderr, "Updated gsbase for stack at 0x%llx..0x%llx to 0x%llx\n", stack2gs[i].stack_base, stack2gs[i].stack_limit, GSbase); #endif stack2gs[i].GSbase = GSbase; } return stack2gs[i].GSbase; } } return 0; } static uint64_t call_compatmode(uint32_t stackaddr, uint64_t compat_arg, uint64_t callback) { uint64_t rv; /* * Depending on how this is used, this allocation may need to be * made with an allocator that returns virtual addresses below 4G. */ custom_tsd_t *new_GSbase = malloc(PAGE_SIZE); /* * Change the GSbase (so things like printf will fail unless GSbase is * restored) */ if (new_GSbase != NULL) { #ifdef DEBUG fprintf(stderr, "Setting new GS base: %p\n", (void *)new_GSbase); #endif new_GSbase->this_tsd_base = new_GSbase; new_GSbase->orig_tsd_base = get_gsbase(); _thread_set_tsd_base((uintptr_t)new_GSbase); } else { #ifndef STANDALONE T_ASSERT_FAIL("Failed to allocate a page for new GSbase"); #else fprintf(stderr, "Failed to allocate a page for new GSbase"); abort(); #endif } rv = thunkit(&input_desc, (void *)(uintptr_t)stackaddr, compat_arg, callback, thunk64_addr); restore_gsbase(stackaddr); free(new_GSbase); return rv; } static uint64_t get_cursp(void) { uint64_t curstk; __asm__ __volatile__ ("movq %%rsp, %0" : "=r" (curstk) :: "memory"); return curstk; } static void hello_from_32bit(void) { uint64_t cur_tsd_base = (uint64_t)(uintptr_t)mytsd->this_tsd_base; restore_gsbase(get_cursp()); printf("Hello on behalf of 32-bit compatibility mode!\n"); _thread_set_tsd_base(cur_tsd_base); } /* * Thread for executing 32-bit code */ static void * thread_32bit(void *arg) { thread_arg_t *targp = (thread_arg_t *)arg; uint64_t cthread_self = 0; /* Save the GSbase for context switch back to 64-bit mode */ cthread_self = get_gsbase(); /* * Associate GSbase with the compat-mode stack (which will be used for long mode * thunk calls as well.) */ (void)stack_range_to_GSbase(targp->compat_stackaddr, cthread_self); #ifdef DEBUG printf("[thread %p] tsd base => %p\n", (void *)pthread_self(), (void *)cthread_self); #endif pthread_mutex_lock(&targp->mutex); do { if (targp->done == FALSE) { pthread_cond_wait(&targp->condvar, &targp->mutex); } /* Finally, execute the test */ if (call_compatmode(targp->compat_stackaddr, 0, (uint64_t)&hello_from_32bit) == 1) { printf("32-bit code test passed\n"); } else { printf("32-bit code test failed\n"); } } while (targp->done == FALSE); pthread_mutex_unlock(&targp->mutex); return 0; } static void join_32bit_thread(pthread_t *thridp, thread_arg_t *cmargp) { (void)pthread_mutex_lock(&cmargp->mutex); cmargp->done = TRUE; (void)pthread_cond_signal(&cmargp->condvar); (void)pthread_mutex_unlock(&cmargp->mutex); (void)pthread_join(*thridp, NULL); *thridp = 0; } static int create_worker_thread(thread_arg_t *cmargp, uint32_t stackaddr, pthread_t *cmthreadp) { *cmargp = (thread_arg_t) { .mutex = PTHREAD_MUTEX_INITIALIZER, .condvar = PTHREAD_COND_INITIALIZER, .done = FALSE, .compat_stackaddr = stackaddr }; return pthread_create(cmthreadp, NULL, thread_32bit, cmargp); } static void ldt64_test_setup(pthread_t *cmthreadp, thread_arg_t *cmargp, boolean_t setldt_in_sighandler) { extern void thunk64(void); extern void thunk64_movabs(void); int cnt = 0, err; void *addr; uintptr_t code_addr; uintptr_t thunk64_movabs_addr; descs = malloc(sizeof(union ldt_entry) * 256); if (descs == 0) { #ifndef STANDALONE T_ASSERT_FAIL("Could not allocate descriptor storage"); #else fprintf(stderr, "Could not allocate descriptor storage\n"); abort(); #endif } #ifdef DEBUG printf("32-bit code is at %p\n", (void *)&code_32); #endif if ((err = map_lowmem_stack(&addr)) != 0) { #ifndef STANDALONE T_ASSERT_FAIL("failed to mmap lowmem stack: %s", strerror(err)); #else fprintf(stderr, "Failed to mmap lowmem stack: %s\n", strerror(err)); exit(1); #endif } stackAddr = (uintptr_t)addr + FIXED_STACK_SIZE - 16; #ifdef DEBUG printf("lowstack addr = %p\n", (void *)stackAddr); #endif if ((err = map_32bit_trampoline(&thunkit)) != 0) { #ifndef STANDALONE T_LOG("Failed to map trampoline into lowmem: %s\n", strerror(err)); T_ASSERT_FAIL("Failed to map trampoline into lowmem"); #else fprintf(stderr, "Failed to map trampoline into lowmem: %s\n", strerror(err)); exit(1); #endif } /* * Store long_mode_trampoline's address into the constant part of the movabs * instruction in thunk64 */ thunk64_movabs_addr = (uintptr_t)thunkit + ((uintptr_t)thunk64_movabs - (uintptr_t)compat_mode_trampoline); *((uint64_t *)(thunk64_movabs_addr + 2)) = (uint64_t)&long_mode_trampoline; bzero(descs, sizeof(union ldt_entry) * 256); if ((cnt = i386_get_ldt(0, descs, 1)) <= 0) { #ifndef STANDALONE T_LOG("i386_get_ldt unexpectedly returned %d (errno: %s)\n", cnt, strerror(errno)); T_ASSERT_FAIL("i386_get_ldt failure"); #else fprintf(stderr, "i386_get_ldt unexpectedly returned %d (errno: %s)\n", cnt, strerror(errno)); exit(1); #endif } #ifdef DEBUG printf("i386_get_ldt returned %d\n", cnt); #endif idx = (unsigned)cnt; /* Put the desired descriptor in the first available slot */ /* * code_32's address for the purposes of this descriptor is the base mapped address of * the thunkit function + the offset of code_32 from compat_mode_trampoline. */ code_addr = (uintptr_t)thunkit + ((uintptr_t)code_32 - (uintptr_t)compat_mode_trampoline); thunk64_addr = (uintptr_t)thunkit + ((uintptr_t)thunk64 - (uintptr_t)compat_mode_trampoline); /* Initialize desired descriptor */ descs[idx].code.limit00 = (unsigned short)(((code_addr >> 12) + 1) & 0xFFFF); descs[idx].code.limit16 = (unsigned char)((((code_addr >> 12) + 1) >> 16) & 0xF); descs[idx].code.base00 = (unsigned short)((code_addr) & 0xFFFF); descs[idx].code.base16 = (unsigned char)((code_addr >> 16) & 0xFF); descs[idx].code.base24 = (unsigned char)((code_addr >> 24) & 0xFF); descs[idx].code.type = DESC_CODE_READ; descs[idx].code.opsz = DESC_CODE_32B; descs[idx].code.granular = DESC_GRAN_PAGE; descs[idx].code.dpl = 3; descs[idx].code.present = 1; if (setldt_in_sighandler == FALSE) { /* Set the LDT: */ cnt = i386_set_ldt((int)idx, &descs[idx], 1); if (cnt != (int)idx) { #ifndef STANDALONE T_LOG("i386_set_ldt unexpectedly returned %d (errno: %s)\n", cnt, strerror(errno)); T_ASSERT_FAIL("i386_set_ldt failure"); #else fprintf(stderr, "i386_set_ldt unexpectedly returned %d (errno: %s)\n", cnt, strerror(errno)); exit(1); #endif } #ifdef DEBUG printf("i386_set_ldt returned %d\n", cnt); #endif } else { __asm__ __volatile__ ("ud2" ::: "memory"); } /* Read back the LDT to ensure it was set properly */ if ((cnt = i386_get_ldt(0, descs, (int)idx)) > 0) { #ifdef DEBUG for (int i = 0; i < cnt; i++) { dump_desc(&descs[i]); } #endif } else { #ifndef STANDALONE T_LOG("i386_get_ldt unexpectedly returned %d (errno: %s)\n", cnt, strerror(errno)); T_ASSERT_FAIL("i386_get_ldt failure"); #else fprintf(stderr, "i386_get_ldt unexpectedly returned %d (errno: %s)\n", cnt, strerror(errno)); exit(1); #endif } free(descs); if ((err = create_worker_thread(cmargp, (uint32_t)stackAddr, cmthreadp)) != 0) { #ifdef DEBUG fprintf(stderr, "Fatal: Could not create thread: %s\n", strerror(err)); #endif #ifndef STANDALONE T_LOG("Fatal: Could not create thread: %s\n", strerror(err)); T_ASSERT_FAIL("Thread creation failure"); #else exit(1); #endif } } #ifdef STANDALONE static void test_ldt64_with_bsdsig(void) #else /* * Main test declarations */ T_DECL(ldt64_with_bsd_sighandling, "Ensures that a 64-bit process can create LDT entries and can execute code in " "compatibility mode with BSD signal handling", T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) #endif { pthread_t cmthread; thread_arg_t cmarg; int translated = 0; size_t translated_size = sizeof(int); sysctlbyname("sysctl.proc_translated", &translated, &translated_size, NULL, 0); if (translated) { T_SKIP("Skipping this test because it is translated"); } setup_signal_handling(); #ifndef STANDALONE T_SETUPBEGIN; #endif ENV_set_ldt_in_sighandler = (getenv("LDT_SET_IN_SIGHANDLER") != NULL) ? TRUE : FALSE; ldt64_test_setup(&cmthread, &cmarg, ENV_set_ldt_in_sighandler); #ifndef STANDALONE T_SETUPEND; #endif join_32bit_thread(&cmthread, &cmarg); teardown_signal_handling(); #ifndef STANDALONE T_PASS("Successfully completed ldt64 test with BSD signal handling"); #else fprintf(stderr, "PASSED: ldt64_with_bsd_signal_handling\n"); #endif } #ifdef STANDALONE static void test_ldt64_with_machexc(void) #else T_DECL(ldt64_with_mach_exception_handling, "Ensures that a 64-bit process can create LDT entries and can execute code in " "compatibility mode with Mach exception handling", T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) #endif { pthread_t cmthread; thread_arg_t cmarg; int translated = 0; size_t translated_size = sizeof(int); sysctlbyname("sysctl.proc_translated", &translated, &translated_size, NULL, 0); if (translated) { T_SKIP("Skipping this test because it is translated"); } #ifndef STANDALONE T_SETUPBEGIN; #endif ldt64_test_setup(&cmthread, &cmarg, FALSE); #ifndef STANDALONE T_SETUPEND; #endif /* Now repeat with Mach exception handling */ init_task_exception_server(); join_32bit_thread(&cmthread, &cmarg); #ifndef STANDALONE T_PASS("Successfully completed ldt64 test with mach exception handling"); #else fprintf(stderr, "PASSED: ldt64_with_mach_exception_handling\n"); #endif } #ifdef STANDALONE int main(int __unused argc, char ** __unused argv) { test_ldt64_with_bsdsig(); test_ldt64_with_machexc(); } #endif