1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "src/__support/threads/thread.h" 10 #include "src/__support/CPP/atomic.h" 11 #include "src/__support/CPP/error.h" 12 #include "src/__support/OSUtil/syscall.h" // For syscall functions. 13 #include "src/__support/threads/linux/futex_word.h" // For FutexWordType 14 15 #ifdef LLVM_LIBC_ARCH_AARCH64 16 #include <arm_acle.h> 17 #endif 18 19 #include <linux/futex.h> 20 #include <linux/sched.h> // For CLONE_* flags. 21 #include <stdint.h> 22 #include <sys/mman.h> // For PROT_* and MAP_* definitions. 23 #include <sys/syscall.h> // For syscall numbers. 24 25 namespace __llvm_libc { 26 27 #ifdef SYS_mmap2 28 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2; 29 #elif SYS_mmap 30 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap; 31 #else 32 #error "SYS_mmap or SYS_mmap2 not available on the target platform" 33 #endif 34 35 static constexpr size_t DEFAULT_STACK_SIZE = (1 << 16); // 64KB 36 static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234; 37 static constexpr unsigned CLONE_SYSCALL_FLAGS = 38 CLONE_VM // Share the memory space with the parent. 39 | CLONE_FS // Share the file system with the parent. 40 | CLONE_FILES // Share the files with the parent. 41 | CLONE_SIGHAND // Share the signal handlers with the parent. 42 | CLONE_THREAD // Same thread group as the parent. 43 | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment 44 // values 45 | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent. 46 | CLONE_CHILD_CLEARTID; // Let the kernel clear the tid address 47 // wake the joining thread. 48 // TODO: Add the CLONE_SETTLS flag and setup the TLS area correctly 49 // when making the clone syscall. 50 51 static inline cpp::ErrorOr<void *> alloc_stack(size_t size) { 52 long mmap_result = 53 __llvm_libc::syscall(MMAP_SYSCALL_NUMBER, 54 0, // No special address 55 size, 56 PROT_READ | PROT_WRITE, // Read and write stack 57 MAP_ANONYMOUS | MAP_PRIVATE, // Process private 58 -1, // Not backed by any file 59 0 // No offset 60 ); 61 if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size)) 62 return cpp::Error{int(-mmap_result)}; 63 return reinterpret_cast<void *>(mmap_result); 64 } 65 66 static inline void free_stack(void *stack, size_t size) { 67 __llvm_libc::syscall(SYS_munmap, stack, size); 68 } 69 70 struct Thread; 71 72 // We align the start args to 16-byte boundary as we adjust the allocated 73 // stack memory with its size. We want the adjusted address to be at a 74 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements. 75 // If different architecture in future requires higher alignment, then we 76 // can add a platform specific alignment spec. 77 struct alignas(STACK_ALIGNMENT) StartArgs { 78 Thread *thread; 79 ThreadRunner runner; 80 void *arg; 81 }; 82 83 __attribute__((always_inline)) inline uintptr_t get_start_args_addr() { 84 // NOTE: For __builtin_frame_address to work reliably across compilers, 85 // architectures and various optimization levels, the TU including this file 86 // should be compiled with -fno-omit-frame-pointer. 87 #ifdef LLVM_LIBC_ARCH_X86_64 88 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0)) 89 // The x86_64 call instruction pushes resume address on to the stack. 90 // Next, The x86_64 SysV ABI requires that the frame pointer be pushed 91 // on to the stack. So, we have to step past two 64-bit values to get 92 // to the start args. 93 + sizeof(uintptr_t) * 2; 94 #elif defined(LLVM_LIBC_ARCH_AARCH64) 95 // The frame pointer after cloning the new thread in the Thread::run method 96 // is set to the stack pointer where start args are stored. So, we fetch 97 // from there. 98 return reinterpret_cast<uintptr_t>(__builtin_frame_address(1)); 99 #endif 100 } 101 102 static void start_thread() __attribute__((noinline)) { 103 auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr()); 104 auto *thread = start_args->thread; 105 auto *attrib = thread->attrib; 106 long retval; 107 if (attrib->style == ThreadStyle::POSIX) { 108 attrib->retval.posix_retval = 109 start_args->runner.posix_runner(start_args->arg); 110 retval = long(attrib->retval.posix_retval); 111 } else { 112 attrib->retval.stdc_retval = 113 start_args->runner.stdc_runner(start_args->arg); 114 retval = long(attrib->retval.stdc_retval); 115 } 116 117 uint32_t joinable_state = uint32_t(DetachState::JOINABLE); 118 if (!thread->attrib->detach_state.compare_exchange_strong( 119 joinable_state, uint32_t(DetachState::EXITING))) { 120 // Thread is detached so cleanup the resources. 121 if (thread->attrib->owned_stack) 122 free_stack(thread->attrib->stack, thread->attrib->stack_size); 123 } 124 125 __llvm_libc::syscall(SYS_exit, retval); 126 } 127 128 int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack, 129 size_t size, bool detached) { 130 bool owned_stack = false; 131 if (stack == nullptr) { 132 if (size == 0) 133 size = DEFAULT_STACK_SIZE; 134 auto alloc = alloc_stack(size); 135 if (!alloc) 136 return alloc.error_code(); 137 else 138 stack = alloc.value(); 139 owned_stack = true; 140 } 141 142 // When the new thread is spawned by the kernel, the new thread gets the 143 // stack we pass to the clone syscall. However, this stack is empty and does 144 // not have any local vars present in this function. Hence, one cannot 145 // pass arguments to the thread start function, or use any local vars from 146 // here. So, we pack them into the new stack from where the thread can sniff 147 // them out. 148 // 149 // Likewise, the actual thread state information is also stored on the 150 // stack memory. 151 uintptr_t adjusted_stack = reinterpret_cast<uintptr_t>(stack) + size - 152 sizeof(StartArgs) - sizeof(ThreadAttributes) - 153 sizeof(cpp::Atomic<FutexWordType>); 154 adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1); 155 156 auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack); 157 start_args->thread = this; 158 start_args->runner = runner; 159 start_args->arg = arg; 160 161 attrib = 162 reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs)); 163 attrib->style = style; 164 attrib->detach_state = 165 uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE); 166 attrib->stack = stack; 167 attrib->stack_size = size; 168 attrib->owned_stack = owned_stack; 169 170 auto clear_tid = reinterpret_cast<cpp::Atomic<FutexWordType> *>( 171 adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes)); 172 clear_tid->val = CLEAR_TID_VALUE; 173 platform_data = clear_tid; 174 175 // The clone syscall takes arguments in an architecture specific order. 176 // Also, we want the result of the syscall to be in a register as the child 177 // thread gets a completely different stack after it is created. The stack 178 // variables from this function will not be availalbe to the child thread. 179 #ifdef LLVM_LIBC_ARCH_X86_64 180 long register clone_result asm("rax"); 181 clone_result = __llvm_libc::syscall( 182 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, 183 &attrib->tid, // The address where the child tid is written 184 &clear_tid->val, // The futex where the child thread status is signalled 185 0 // Set TLS to null for now. 186 ); 187 #elif defined(LLVM_LIBC_ARCH_AARCH64) 188 long register clone_result asm("x0"); 189 clone_result = __llvm_libc::syscall( 190 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, 191 &attrib->tid, // The address where the child tid is written 192 0, // Set TLS to null for now. 193 &clear_tid->val // The futex where the child thread status is signalled 194 ); 195 #else 196 #error "Unsupported architecture for the clone syscall." 197 #endif 198 199 if (clone_result == 0) { 200 #ifdef LLVM_LIBC_ARCH_AARCH64 201 // We set the frame pointer to be the same as the "sp" so that start args 202 // can be sniffed out from start_thread. 203 __arm_wsr64("x29", __arm_rsr64("sp")); 204 #endif 205 start_thread(); 206 } else if (clone_result < 0) { 207 if (attrib->owned_stack) 208 free_stack(attrib->stack, attrib->stack_size); 209 return -clone_result; 210 } 211 212 return 0; 213 } 214 215 int Thread::join(ThreadReturnValue &retval) { 216 wait(); 217 218 if (attrib->style == ThreadStyle::POSIX) 219 retval.posix_retval = attrib->retval.posix_retval; 220 else 221 retval.stdc_retval = attrib->retval.stdc_retval; 222 223 if (attrib->owned_stack) 224 free_stack(attrib->stack, attrib->stack_size); 225 226 return 0; 227 } 228 229 int Thread::detach() { 230 uint32_t joinable_state = uint32_t(DetachState::JOINABLE); 231 if (attrib->detach_state.compare_exchange_strong( 232 joinable_state, uint32_t(DetachState::DETACHED))) { 233 return int(DetachType::SIMPLE); 234 } 235 236 // If the thread was already detached, then the detach method should not 237 // be called at all. If the thread is exiting, then we wait for it to exit 238 // and free up resources. 239 wait(); 240 241 if (attrib->owned_stack) 242 free_stack(attrib->stack, attrib->stack_size); 243 return int(DetachType::CLEANUP); 244 } 245 246 void Thread::wait() { 247 // The kernel should set the value at the clear tid address to zero. 248 // If not, it is a spurious wake and we should continue to wait on 249 // the futex. 250 auto *clear_tid = 251 reinterpret_cast<cpp::Atomic<FutexWordType> *>(platform_data); 252 while (clear_tid->load() != 0) { 253 // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a 254 // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE. 255 __llvm_libc::syscall(SYS_futex, &clear_tid->val, FUTEX_WAIT, 256 CLEAR_TID_VALUE, nullptr); 257 } 258 } 259 260 } // namespace __llvm_libc 261