1fe801747SSiva Chandra Reddy //===--- Implementation of a Linux thread class -----------------*- C++ -*-===// 2fe801747SSiva Chandra Reddy // 3fe801747SSiva Chandra Reddy // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe801747SSiva Chandra Reddy // See https://llvm.org/LICENSE.txt for license information. 5fe801747SSiva Chandra Reddy // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe801747SSiva Chandra Reddy // 7fe801747SSiva Chandra Reddy //===----------------------------------------------------------------------===// 8fe801747SSiva Chandra Reddy 9fe801747SSiva Chandra Reddy #include "src/__support/threads/thread.h" 10fe801747SSiva Chandra Reddy #include "src/__support/CPP/atomic.h" 11fe801747SSiva Chandra Reddy #include "src/__support/CPP/error.h" 12fe801747SSiva Chandra Reddy #include "src/__support/OSUtil/syscall.h" // For syscall functions. 13fe801747SSiva Chandra Reddy #include "src/__support/threads/linux/futex_word.h" // For FutexWordType 14fe801747SSiva Chandra Reddy 15fe801747SSiva Chandra Reddy #ifdef LLVM_LIBC_ARCH_AARCH64 16fe801747SSiva Chandra Reddy #include <arm_acle.h> 17fe801747SSiva Chandra Reddy #endif 18fe801747SSiva Chandra Reddy 19fe801747SSiva Chandra Reddy #include <linux/futex.h> 20fe801747SSiva Chandra Reddy #include <linux/sched.h> // For CLONE_* flags. 21fe801747SSiva Chandra Reddy #include <stdint.h> 22fe801747SSiva Chandra Reddy #include <sys/mman.h> // For PROT_* and MAP_* definitions. 23fe801747SSiva Chandra Reddy #include <sys/syscall.h> // For syscall numbers. 24fe801747SSiva Chandra Reddy 25fe801747SSiva Chandra Reddy namespace __llvm_libc { 26fe801747SSiva Chandra Reddy 27fe801747SSiva Chandra Reddy #ifdef SYS_mmap2 28fe801747SSiva Chandra Reddy static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2; 29fe801747SSiva Chandra Reddy #elif SYS_mmap 30fe801747SSiva Chandra Reddy static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap; 31fe801747SSiva Chandra Reddy #else 32fe801747SSiva Chandra Reddy #error "SYS_mmap or SYS_mmap2 not available on the target platform" 33fe801747SSiva Chandra Reddy #endif 34fe801747SSiva Chandra Reddy 35fe801747SSiva Chandra Reddy static constexpr size_t DEFAULT_STACK_SIZE = (1 << 16); // 64KB 36fe801747SSiva Chandra Reddy static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234; 37fe801747SSiva Chandra Reddy static constexpr unsigned CLONE_SYSCALL_FLAGS = 38fe801747SSiva Chandra Reddy CLONE_VM // Share the memory space with the parent. 39fe801747SSiva Chandra Reddy | CLONE_FS // Share the file system with the parent. 40fe801747SSiva Chandra Reddy | CLONE_FILES // Share the files with the parent. 41fe801747SSiva Chandra Reddy | CLONE_SIGHAND // Share the signal handlers with the parent. 42fe801747SSiva Chandra Reddy | CLONE_THREAD // Same thread group as the parent. 43fe801747SSiva Chandra Reddy | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment 44fe801747SSiva Chandra Reddy // values 45fe801747SSiva Chandra Reddy | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent. 46fe801747SSiva Chandra Reddy | CLONE_CHILD_CLEARTID; // Let the kernel clear the tid address 47fe801747SSiva Chandra Reddy // wake the joining thread. 48fe801747SSiva Chandra Reddy // TODO: Add the CLONE_SETTLS flag and setup the TLS area correctly 49fe801747SSiva Chandra Reddy // when making the clone syscall. 50fe801747SSiva Chandra Reddy 51fe801747SSiva Chandra Reddy static inline cpp::ErrorOr<void *> alloc_stack(size_t size) { 52fe801747SSiva Chandra Reddy long mmap_result = 53fe801747SSiva Chandra Reddy __llvm_libc::syscall(MMAP_SYSCALL_NUMBER, 54fe801747SSiva Chandra Reddy 0, // No special address 55fe801747SSiva Chandra Reddy size, 56fe801747SSiva Chandra Reddy PROT_READ | PROT_WRITE, // Read and write stack 57fe801747SSiva Chandra Reddy MAP_ANONYMOUS | MAP_PRIVATE, // Process private 58fe801747SSiva Chandra Reddy -1, // Not backed by any file 59fe801747SSiva Chandra Reddy 0 // No offset 60fe801747SSiva Chandra Reddy ); 61fe801747SSiva Chandra Reddy if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size)) 62fe801747SSiva Chandra Reddy return cpp::Error{int(-mmap_result)}; 63fe801747SSiva Chandra Reddy return reinterpret_cast<void *>(mmap_result); 64fe801747SSiva Chandra Reddy } 65fe801747SSiva Chandra Reddy 66fe801747SSiva Chandra Reddy static inline void free_stack(void *stack, size_t size) { 67fe801747SSiva Chandra Reddy __llvm_libc::syscall(SYS_munmap, stack, size); 68fe801747SSiva Chandra Reddy } 69fe801747SSiva Chandra Reddy 70fe801747SSiva Chandra Reddy struct Thread; 71fe801747SSiva Chandra Reddy 72fe801747SSiva Chandra Reddy // We align the start args to 16-byte boundary as we adjust the allocated 73fe801747SSiva Chandra Reddy // stack memory with its size. We want the adjusted address to be at a 74fe801747SSiva Chandra Reddy // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements. 75fe801747SSiva Chandra Reddy // If different architecture in future requires higher alignment, then we 76fe801747SSiva Chandra Reddy // can add a platform specific alignment spec. 77fe801747SSiva Chandra Reddy struct alignas(STACK_ALIGNMENT) StartArgs { 78*379428c2SSiva Chandra Reddy ThreadAttributes *thread_attrib; 79fe801747SSiva Chandra Reddy ThreadRunner runner; 80fe801747SSiva Chandra Reddy void *arg; 81fe801747SSiva Chandra Reddy }; 82fe801747SSiva Chandra Reddy 83fe801747SSiva Chandra Reddy __attribute__((always_inline)) inline uintptr_t get_start_args_addr() { 84fe801747SSiva Chandra Reddy // NOTE: For __builtin_frame_address to work reliably across compilers, 85fe801747SSiva Chandra Reddy // architectures and various optimization levels, the TU including this file 86fe801747SSiva Chandra Reddy // should be compiled with -fno-omit-frame-pointer. 87fe801747SSiva Chandra Reddy #ifdef LLVM_LIBC_ARCH_X86_64 88fe801747SSiva Chandra Reddy return reinterpret_cast<uintptr_t>(__builtin_frame_address(0)) 89fe801747SSiva Chandra Reddy // The x86_64 call instruction pushes resume address on to the stack. 90fe801747SSiva Chandra Reddy // Next, The x86_64 SysV ABI requires that the frame pointer be pushed 91fe801747SSiva Chandra Reddy // on to the stack. So, we have to step past two 64-bit values to get 92fe801747SSiva Chandra Reddy // to the start args. 93fe801747SSiva Chandra Reddy + sizeof(uintptr_t) * 2; 94fe801747SSiva Chandra Reddy #elif defined(LLVM_LIBC_ARCH_AARCH64) 95fe801747SSiva Chandra Reddy // The frame pointer after cloning the new thread in the Thread::run method 96fe801747SSiva Chandra Reddy // is set to the stack pointer where start args are stored. So, we fetch 97fe801747SSiva Chandra Reddy // from there. 98fe801747SSiva Chandra Reddy return reinterpret_cast<uintptr_t>(__builtin_frame_address(1)); 99fe801747SSiva Chandra Reddy #endif 100fe801747SSiva Chandra Reddy } 101fe801747SSiva Chandra Reddy 102fe801747SSiva Chandra Reddy static void start_thread() __attribute__((noinline)) { 103fe801747SSiva Chandra Reddy auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr()); 104*379428c2SSiva Chandra Reddy auto *attrib = start_args->thread_attrib; 105fe801747SSiva Chandra Reddy long retval; 106fe801747SSiva Chandra Reddy if (attrib->style == ThreadStyle::POSIX) { 107fe801747SSiva Chandra Reddy attrib->retval.posix_retval = 108fe801747SSiva Chandra Reddy start_args->runner.posix_runner(start_args->arg); 109fe801747SSiva Chandra Reddy retval = long(attrib->retval.posix_retval); 110fe801747SSiva Chandra Reddy } else { 111fe801747SSiva Chandra Reddy attrib->retval.stdc_retval = 112fe801747SSiva Chandra Reddy start_args->runner.stdc_runner(start_args->arg); 113fe801747SSiva Chandra Reddy retval = long(attrib->retval.stdc_retval); 114fe801747SSiva Chandra Reddy } 115fe801747SSiva Chandra Reddy 116fe801747SSiva Chandra Reddy uint32_t joinable_state = uint32_t(DetachState::JOINABLE); 117*379428c2SSiva Chandra Reddy if (!attrib->detach_state.compare_exchange_strong( 118fe801747SSiva Chandra Reddy joinable_state, uint32_t(DetachState::EXITING))) { 119fe801747SSiva Chandra Reddy // Thread is detached so cleanup the resources. 120*379428c2SSiva Chandra Reddy if (attrib->owned_stack) 121*379428c2SSiva Chandra Reddy free_stack(attrib->stack, attrib->stack_size); 122fe801747SSiva Chandra Reddy } 123fe801747SSiva Chandra Reddy 124fe801747SSiva Chandra Reddy __llvm_libc::syscall(SYS_exit, retval); 125fe801747SSiva Chandra Reddy } 126fe801747SSiva Chandra Reddy 127fe801747SSiva Chandra Reddy int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack, 128fe801747SSiva Chandra Reddy size_t size, bool detached) { 129fe801747SSiva Chandra Reddy bool owned_stack = false; 130fe801747SSiva Chandra Reddy if (stack == nullptr) { 131fe801747SSiva Chandra Reddy if (size == 0) 132fe801747SSiva Chandra Reddy size = DEFAULT_STACK_SIZE; 133fe801747SSiva Chandra Reddy auto alloc = alloc_stack(size); 134fe801747SSiva Chandra Reddy if (!alloc) 135fe801747SSiva Chandra Reddy return alloc.error_code(); 136fe801747SSiva Chandra Reddy else 137fe801747SSiva Chandra Reddy stack = alloc.value(); 138fe801747SSiva Chandra Reddy owned_stack = true; 139fe801747SSiva Chandra Reddy } 140fe801747SSiva Chandra Reddy 141fe801747SSiva Chandra Reddy // When the new thread is spawned by the kernel, the new thread gets the 142fe801747SSiva Chandra Reddy // stack we pass to the clone syscall. However, this stack is empty and does 143fe801747SSiva Chandra Reddy // not have any local vars present in this function. Hence, one cannot 144fe801747SSiva Chandra Reddy // pass arguments to the thread start function, or use any local vars from 145fe801747SSiva Chandra Reddy // here. So, we pack them into the new stack from where the thread can sniff 146fe801747SSiva Chandra Reddy // them out. 147fe801747SSiva Chandra Reddy // 148fe801747SSiva Chandra Reddy // Likewise, the actual thread state information is also stored on the 149fe801747SSiva Chandra Reddy // stack memory. 150fe801747SSiva Chandra Reddy uintptr_t adjusted_stack = reinterpret_cast<uintptr_t>(stack) + size - 151fe801747SSiva Chandra Reddy sizeof(StartArgs) - sizeof(ThreadAttributes) - 152fe801747SSiva Chandra Reddy sizeof(cpp::Atomic<FutexWordType>); 153fe801747SSiva Chandra Reddy adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1); 154fe801747SSiva Chandra Reddy 155fe801747SSiva Chandra Reddy auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack); 156fe801747SSiva Chandra Reddy 157fe801747SSiva Chandra Reddy attrib = 158fe801747SSiva Chandra Reddy reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs)); 159fe801747SSiva Chandra Reddy attrib->style = style; 160fe801747SSiva Chandra Reddy attrib->detach_state = 161fe801747SSiva Chandra Reddy uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE); 162fe801747SSiva Chandra Reddy attrib->stack = stack; 163fe801747SSiva Chandra Reddy attrib->stack_size = size; 164fe801747SSiva Chandra Reddy attrib->owned_stack = owned_stack; 165fe801747SSiva Chandra Reddy 166*379428c2SSiva Chandra Reddy start_args->thread_attrib = attrib; 167*379428c2SSiva Chandra Reddy start_args->runner = runner; 168*379428c2SSiva Chandra Reddy start_args->arg = arg; 169*379428c2SSiva Chandra Reddy 170fe801747SSiva Chandra Reddy auto clear_tid = reinterpret_cast<cpp::Atomic<FutexWordType> *>( 171fe801747SSiva Chandra Reddy adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes)); 172fe801747SSiva Chandra Reddy clear_tid->val = CLEAR_TID_VALUE; 173fe801747SSiva Chandra Reddy platform_data = clear_tid; 174fe801747SSiva Chandra Reddy 175fe801747SSiva Chandra Reddy // The clone syscall takes arguments in an architecture specific order. 176fe801747SSiva Chandra Reddy // Also, we want the result of the syscall to be in a register as the child 177fe801747SSiva Chandra Reddy // thread gets a completely different stack after it is created. The stack 178fe801747SSiva Chandra Reddy // variables from this function will not be availalbe to the child thread. 179fe801747SSiva Chandra Reddy #ifdef LLVM_LIBC_ARCH_X86_64 180fe801747SSiva Chandra Reddy long register clone_result asm("rax"); 181fe801747SSiva Chandra Reddy clone_result = __llvm_libc::syscall( 182fe801747SSiva Chandra Reddy SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, 183fe801747SSiva Chandra Reddy &attrib->tid, // The address where the child tid is written 184fe801747SSiva Chandra Reddy &clear_tid->val, // The futex where the child thread status is signalled 185fe801747SSiva Chandra Reddy 0 // Set TLS to null for now. 186fe801747SSiva Chandra Reddy ); 187fe801747SSiva Chandra Reddy #elif defined(LLVM_LIBC_ARCH_AARCH64) 188fe801747SSiva Chandra Reddy long register clone_result asm("x0"); 189fe801747SSiva Chandra Reddy clone_result = __llvm_libc::syscall( 190fe801747SSiva Chandra Reddy SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, 191fe801747SSiva Chandra Reddy &attrib->tid, // The address where the child tid is written 192fe801747SSiva Chandra Reddy 0, // Set TLS to null for now. 193fe801747SSiva Chandra Reddy &clear_tid->val // The futex where the child thread status is signalled 194fe801747SSiva Chandra Reddy ); 195fe801747SSiva Chandra Reddy #else 196fe801747SSiva Chandra Reddy #error "Unsupported architecture for the clone syscall." 197fe801747SSiva Chandra Reddy #endif 198fe801747SSiva Chandra Reddy 199fe801747SSiva Chandra Reddy if (clone_result == 0) { 200fe801747SSiva Chandra Reddy #ifdef LLVM_LIBC_ARCH_AARCH64 201fe801747SSiva Chandra Reddy // We set the frame pointer to be the same as the "sp" so that start args 202fe801747SSiva Chandra Reddy // can be sniffed out from start_thread. 203fe801747SSiva Chandra Reddy __arm_wsr64("x29", __arm_rsr64("sp")); 204fe801747SSiva Chandra Reddy #endif 205fe801747SSiva Chandra Reddy start_thread(); 206fe801747SSiva Chandra Reddy } else if (clone_result < 0) { 207fe801747SSiva Chandra Reddy if (attrib->owned_stack) 208fe801747SSiva Chandra Reddy free_stack(attrib->stack, attrib->stack_size); 209fe801747SSiva Chandra Reddy return -clone_result; 210fe801747SSiva Chandra Reddy } 211fe801747SSiva Chandra Reddy 212fe801747SSiva Chandra Reddy return 0; 213fe801747SSiva Chandra Reddy } 214fe801747SSiva Chandra Reddy 215fe801747SSiva Chandra Reddy int Thread::join(ThreadReturnValue &retval) { 216fe801747SSiva Chandra Reddy wait(); 217fe801747SSiva Chandra Reddy 218fe801747SSiva Chandra Reddy if (attrib->style == ThreadStyle::POSIX) 219fe801747SSiva Chandra Reddy retval.posix_retval = attrib->retval.posix_retval; 220fe801747SSiva Chandra Reddy else 221fe801747SSiva Chandra Reddy retval.stdc_retval = attrib->retval.stdc_retval; 222fe801747SSiva Chandra Reddy 223fe801747SSiva Chandra Reddy if (attrib->owned_stack) 224fe801747SSiva Chandra Reddy free_stack(attrib->stack, attrib->stack_size); 225fe801747SSiva Chandra Reddy 226fe801747SSiva Chandra Reddy return 0; 227fe801747SSiva Chandra Reddy } 228fe801747SSiva Chandra Reddy 229fe801747SSiva Chandra Reddy int Thread::detach() { 230fe801747SSiva Chandra Reddy uint32_t joinable_state = uint32_t(DetachState::JOINABLE); 231fe801747SSiva Chandra Reddy if (attrib->detach_state.compare_exchange_strong( 232fe801747SSiva Chandra Reddy joinable_state, uint32_t(DetachState::DETACHED))) { 233fe801747SSiva Chandra Reddy return int(DetachType::SIMPLE); 234fe801747SSiva Chandra Reddy } 235fe801747SSiva Chandra Reddy 236fe801747SSiva Chandra Reddy // If the thread was already detached, then the detach method should not 237fe801747SSiva Chandra Reddy // be called at all. If the thread is exiting, then we wait for it to exit 238fe801747SSiva Chandra Reddy // and free up resources. 239fe801747SSiva Chandra Reddy wait(); 240fe801747SSiva Chandra Reddy 241fe801747SSiva Chandra Reddy if (attrib->owned_stack) 242fe801747SSiva Chandra Reddy free_stack(attrib->stack, attrib->stack_size); 243fe801747SSiva Chandra Reddy return int(DetachType::CLEANUP); 244fe801747SSiva Chandra Reddy } 245fe801747SSiva Chandra Reddy 246fe801747SSiva Chandra Reddy void Thread::wait() { 247fe801747SSiva Chandra Reddy // The kernel should set the value at the clear tid address to zero. 248fe801747SSiva Chandra Reddy // If not, it is a spurious wake and we should continue to wait on 249fe801747SSiva Chandra Reddy // the futex. 250fe801747SSiva Chandra Reddy auto *clear_tid = 251fe801747SSiva Chandra Reddy reinterpret_cast<cpp::Atomic<FutexWordType> *>(platform_data); 252fe801747SSiva Chandra Reddy while (clear_tid->load() != 0) { 253fe801747SSiva Chandra Reddy // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a 254fe801747SSiva Chandra Reddy // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE. 255fe801747SSiva Chandra Reddy __llvm_libc::syscall(SYS_futex, &clear_tid->val, FUTEX_WAIT, 256fe801747SSiva Chandra Reddy CLEAR_TID_VALUE, nullptr); 257fe801747SSiva Chandra Reddy } 258fe801747SSiva Chandra Reddy } 259fe801747SSiva Chandra Reddy 260fe801747SSiva Chandra Reddy } // namespace __llvm_libc 261