1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "src/__support/threads/thread.h" 10 #include "src/__support/CPP/atomic.h" 11 #include "src/__support/CPP/error.h" 12 #include "src/__support/OSUtil/syscall.h" // For syscall functions. 13 #include "src/__support/threads/linux/futex_word.h" // For FutexWordType 14 15 #ifdef LLVM_LIBC_ARCH_AARCH64 16 #include <arm_acle.h> 17 #endif 18 19 #include <linux/futex.h> 20 #include <linux/sched.h> // For CLONE_* flags. 21 #include <stdint.h> 22 #include <sys/mman.h> // For PROT_* and MAP_* definitions. 23 #include <sys/syscall.h> // For syscall numbers. 24 25 namespace __llvm_libc { 26 27 #ifdef SYS_mmap2 28 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2; 29 #elif SYS_mmap 30 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap; 31 #else 32 #error "SYS_mmap or SYS_mmap2 not available on the target platform" 33 #endif 34 35 static constexpr size_t DEFAULT_STACK_SIZE = (1 << 16); // 64KB 36 static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234; 37 static constexpr unsigned CLONE_SYSCALL_FLAGS = 38 CLONE_VM // Share the memory space with the parent. 39 | CLONE_FS // Share the file system with the parent. 40 | CLONE_FILES // Share the files with the parent. 41 | CLONE_SIGHAND // Share the signal handlers with the parent. 42 | CLONE_THREAD // Same thread group as the parent. 43 | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment 44 // values 45 | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent. 46 | CLONE_CHILD_CLEARTID; // Let the kernel clear the tid address 47 // wake the joining thread. 48 // TODO: Add the CLONE_SETTLS flag and setup the TLS area correctly 49 // when making the clone syscall. 50 51 static inline cpp::ErrorOr<void *> alloc_stack(size_t size) { 52 long mmap_result = 53 __llvm_libc::syscall(MMAP_SYSCALL_NUMBER, 54 0, // No special address 55 size, 56 PROT_READ | PROT_WRITE, // Read and write stack 57 MAP_ANONYMOUS | MAP_PRIVATE, // Process private 58 -1, // Not backed by any file 59 0 // No offset 60 ); 61 if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size)) 62 return cpp::Error{int(-mmap_result)}; 63 return reinterpret_cast<void *>(mmap_result); 64 } 65 66 static inline void free_stack(void *stack, size_t size) { 67 __llvm_libc::syscall(SYS_munmap, stack, size); 68 } 69 70 struct Thread; 71 72 // We align the start args to 16-byte boundary as we adjust the allocated 73 // stack memory with its size. We want the adjusted address to be at a 74 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements. 75 // If different architecture in future requires higher alignment, then we 76 // can add a platform specific alignment spec. 77 struct alignas(STACK_ALIGNMENT) StartArgs { 78 ThreadAttributes *thread_attrib; 79 ThreadRunner runner; 80 void *arg; 81 }; 82 83 __attribute__((always_inline)) inline uintptr_t get_start_args_addr() { 84 // NOTE: For __builtin_frame_address to work reliably across compilers, 85 // architectures and various optimization levels, the TU including this file 86 // should be compiled with -fno-omit-frame-pointer. 87 #ifdef LLVM_LIBC_ARCH_X86_64 88 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0)) 89 // The x86_64 call instruction pushes resume address on to the stack. 90 // Next, The x86_64 SysV ABI requires that the frame pointer be pushed 91 // on to the stack. So, we have to step past two 64-bit values to get 92 // to the start args. 93 + sizeof(uintptr_t) * 2; 94 #elif defined(LLVM_LIBC_ARCH_AARCH64) 95 // The frame pointer after cloning the new thread in the Thread::run method 96 // is set to the stack pointer where start args are stored. So, we fetch 97 // from there. 98 return reinterpret_cast<uintptr_t>(__builtin_frame_address(1)); 99 #endif 100 } 101 102 static void start_thread() __attribute__((noinline)) { 103 auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr()); 104 auto *attrib = start_args->thread_attrib; 105 long retval; 106 if (attrib->style == ThreadStyle::POSIX) { 107 attrib->retval.posix_retval = 108 start_args->runner.posix_runner(start_args->arg); 109 retval = long(attrib->retval.posix_retval); 110 } else { 111 attrib->retval.stdc_retval = 112 start_args->runner.stdc_runner(start_args->arg); 113 retval = long(attrib->retval.stdc_retval); 114 } 115 116 uint32_t joinable_state = uint32_t(DetachState::JOINABLE); 117 if (!attrib->detach_state.compare_exchange_strong( 118 joinable_state, uint32_t(DetachState::EXITING))) { 119 // Thread is detached so cleanup the resources. 120 if (attrib->owned_stack) 121 free_stack(attrib->stack, attrib->stack_size); 122 123 // Set the CLEAR_TID address to nullptr to prevent the kernel 124 // from signalling at a non-existent futex location. 125 __llvm_libc::syscall(SYS_set_tid_address, 0); 126 } 127 128 __llvm_libc::syscall(SYS_exit, retval); 129 } 130 131 int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack, 132 size_t size, bool detached) { 133 bool owned_stack = false; 134 if (stack == nullptr) { 135 if (size == 0) 136 size = DEFAULT_STACK_SIZE; 137 auto alloc = alloc_stack(size); 138 if (!alloc) 139 return alloc.error_code(); 140 else 141 stack = alloc.value(); 142 owned_stack = true; 143 } 144 145 // When the new thread is spawned by the kernel, the new thread gets the 146 // stack we pass to the clone syscall. However, this stack is empty and does 147 // not have any local vars present in this function. Hence, one cannot 148 // pass arguments to the thread start function, or use any local vars from 149 // here. So, we pack them into the new stack from where the thread can sniff 150 // them out. 151 // 152 // Likewise, the actual thread state information is also stored on the 153 // stack memory. 154 uintptr_t adjusted_stack = reinterpret_cast<uintptr_t>(stack) + size - 155 sizeof(StartArgs) - sizeof(ThreadAttributes) - 156 sizeof(cpp::Atomic<FutexWordType>); 157 adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1); 158 159 auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack); 160 161 attrib = 162 reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs)); 163 attrib->style = style; 164 attrib->detach_state = 165 uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE); 166 attrib->stack = stack; 167 attrib->stack_size = size; 168 attrib->owned_stack = owned_stack; 169 170 start_args->thread_attrib = attrib; 171 start_args->runner = runner; 172 start_args->arg = arg; 173 174 auto clear_tid = reinterpret_cast<cpp::Atomic<FutexWordType> *>( 175 adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes)); 176 clear_tid->val = CLEAR_TID_VALUE; 177 platform_data = clear_tid; 178 179 // The clone syscall takes arguments in an architecture specific order. 180 // Also, we want the result of the syscall to be in a register as the child 181 // thread gets a completely different stack after it is created. The stack 182 // variables from this function will not be availalbe to the child thread. 183 #ifdef LLVM_LIBC_ARCH_X86_64 184 long register clone_result asm("rax"); 185 clone_result = __llvm_libc::syscall( 186 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, 187 &attrib->tid, // The address where the child tid is written 188 &clear_tid->val, // The futex where the child thread status is signalled 189 0 // Set TLS to null for now. 190 ); 191 #elif defined(LLVM_LIBC_ARCH_AARCH64) 192 long register clone_result asm("x0"); 193 clone_result = __llvm_libc::syscall( 194 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, 195 &attrib->tid, // The address where the child tid is written 196 0, // Set TLS to null for now. 197 &clear_tid->val // The futex where the child thread status is signalled 198 ); 199 #else 200 #error "Unsupported architecture for the clone syscall." 201 #endif 202 203 if (clone_result == 0) { 204 #ifdef LLVM_LIBC_ARCH_AARCH64 205 // We set the frame pointer to be the same as the "sp" so that start args 206 // can be sniffed out from start_thread. 207 __arm_wsr64("x29", __arm_rsr64("sp")); 208 #endif 209 start_thread(); 210 } else if (clone_result < 0) { 211 if (attrib->owned_stack) 212 free_stack(attrib->stack, attrib->stack_size); 213 return -clone_result; 214 } 215 216 return 0; 217 } 218 219 int Thread::join(ThreadReturnValue &retval) { 220 wait(); 221 222 if (attrib->style == ThreadStyle::POSIX) 223 retval.posix_retval = attrib->retval.posix_retval; 224 else 225 retval.stdc_retval = attrib->retval.stdc_retval; 226 227 if (attrib->owned_stack) 228 free_stack(attrib->stack, attrib->stack_size); 229 230 return 0; 231 } 232 233 int Thread::detach() { 234 uint32_t joinable_state = uint32_t(DetachState::JOINABLE); 235 if (attrib->detach_state.compare_exchange_strong( 236 joinable_state, uint32_t(DetachState::DETACHED))) { 237 return int(DetachType::SIMPLE); 238 } 239 240 // If the thread was already detached, then the detach method should not 241 // be called at all. If the thread is exiting, then we wait for it to exit 242 // and free up resources. 243 wait(); 244 245 if (attrib->owned_stack) 246 free_stack(attrib->stack, attrib->stack_size); 247 return int(DetachType::CLEANUP); 248 } 249 250 void Thread::wait() { 251 // The kernel should set the value at the clear tid address to zero. 252 // If not, it is a spurious wake and we should continue to wait on 253 // the futex. 254 auto *clear_tid = 255 reinterpret_cast<cpp::Atomic<FutexWordType> *>(platform_data); 256 while (clear_tid->load() != 0) { 257 // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a 258 // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE. 259 __llvm_libc::syscall(SYS_futex, &clear_tid->val, FUTEX_WAIT, 260 CLEAR_TID_VALUE, nullptr); 261 } 262 } 263 264 } // namespace __llvm_libc 265