1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "src/__support/threads/thread.h" 10 #include "config/linux/app.h" 11 #include "src/__support/CPP/atomic.h" 12 #include "src/__support/CPP/error.h" 13 #include "src/__support/OSUtil/syscall.h" // For syscall functions. 14 #include "src/__support/threads/linux/futex_word.h" // For FutexWordType 15 16 #ifdef LLVM_LIBC_ARCH_AARCH64 17 #include <arm_acle.h> 18 #endif 19 20 #include <linux/futex.h> 21 #include <linux/sched.h> // For CLONE_* flags. 22 #include <stdint.h> 23 #include <sys/mman.h> // For PROT_* and MAP_* definitions. 24 #include <sys/syscall.h> // For syscall numbers. 25 26 namespace __llvm_libc { 27 28 #ifdef SYS_mmap2 29 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2; 30 #elif SYS_mmap 31 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap; 32 #else 33 #error "SYS_mmap or SYS_mmap2 not available on the target platform" 34 #endif 35 36 static constexpr size_t DEFAULT_STACK_SIZE = (1 << 16); // 64KB 37 static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234; 38 static constexpr unsigned CLONE_SYSCALL_FLAGS = 39 CLONE_VM // Share the memory space with the parent. 40 | CLONE_FS // Share the file system with the parent. 41 | CLONE_FILES // Share the files with the parent. 42 | CLONE_SIGHAND // Share the signal handlers with the parent. 43 | CLONE_THREAD // Same thread group as the parent. 44 | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment 45 // values 46 | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent. 47 | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address 48 // wake the joining thread. 49 | CLONE_SETTLS; // Setup the thread pointer of the new thread. 50 51 static inline cpp::ErrorOr<void *> alloc_stack(size_t size) { 52 long mmap_result = 53 __llvm_libc::syscall(MMAP_SYSCALL_NUMBER, 54 0, // No special address 55 size, 56 PROT_READ | PROT_WRITE, // Read and write stack 57 MAP_ANONYMOUS | MAP_PRIVATE, // Process private 58 -1, // Not backed by any file 59 0 // No offset 60 ); 61 if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size)) 62 return cpp::Error{int(-mmap_result)}; 63 return reinterpret_cast<void *>(mmap_result); 64 } 65 66 static inline void free_stack(void *stack, size_t size) { 67 __llvm_libc::syscall(SYS_munmap, stack, size); 68 } 69 70 struct Thread; 71 72 // We align the start args to 16-byte boundary as we adjust the allocated 73 // stack memory with its size. We want the adjusted address to be at a 74 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements. 75 // If different architecture in future requires higher alignment, then we 76 // can add a platform specific alignment spec. 77 struct alignas(STACK_ALIGNMENT) StartArgs { 78 ThreadAttributes *thread_attrib; 79 ThreadRunner runner; 80 void *arg; 81 }; 82 83 static void cleanup_thread_resources(ThreadAttributes *attrib) { 84 // Cleanup the TLS before the stack as the TLS information is stored on 85 // the stack. 86 cleanup_tls(attrib->tls, attrib->tls_size); 87 if (attrib->owned_stack) 88 free_stack(attrib->stack, attrib->stack_size); 89 } 90 91 __attribute__((always_inline)) inline uintptr_t get_start_args_addr() { 92 // NOTE: For __builtin_frame_address to work reliably across compilers, 93 // architectures and various optimization levels, the TU including this file 94 // should be compiled with -fno-omit-frame-pointer. 95 #ifdef LLVM_LIBC_ARCH_X86_64 96 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0)) 97 // The x86_64 call instruction pushes resume address on to the stack. 98 // Next, The x86_64 SysV ABI requires that the frame pointer be pushed 99 // on to the stack. So, we have to step past two 64-bit values to get 100 // to the start args. 101 + sizeof(uintptr_t) * 2; 102 #elif defined(LLVM_LIBC_ARCH_AARCH64) 103 // The frame pointer after cloning the new thread in the Thread::run method 104 // is set to the stack pointer where start args are stored. So, we fetch 105 // from there. 106 return reinterpret_cast<uintptr_t>(__builtin_frame_address(1)); 107 #endif 108 } 109 110 __attribute__((noinline)) 111 static void start_thread() { 112 auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr()); 113 auto *attrib = start_args->thread_attrib; 114 self.attrib = attrib; 115 116 long retval; 117 if (attrib->style == ThreadStyle::POSIX) { 118 attrib->retval.posix_retval = 119 start_args->runner.posix_runner(start_args->arg); 120 retval = long(attrib->retval.posix_retval); 121 } else { 122 attrib->retval.stdc_retval = 123 start_args->runner.stdc_runner(start_args->arg); 124 retval = long(attrib->retval.stdc_retval); 125 } 126 127 uint32_t joinable_state = uint32_t(DetachState::JOINABLE); 128 if (!attrib->detach_state.compare_exchange_strong( 129 joinable_state, uint32_t(DetachState::EXITING))) { 130 // Thread is detached so cleanup the resources. 131 cleanup_thread_resources(attrib); 132 133 // Set the CLEAR_TID address to nullptr to prevent the kernel 134 // from signalling at a non-existent futex location. 135 __llvm_libc::syscall(SYS_set_tid_address, 0); 136 } 137 138 __llvm_libc::syscall(SYS_exit, retval); 139 } 140 141 int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack, 142 size_t size, bool detached) { 143 bool owned_stack = false; 144 if (stack == nullptr) { 145 if (size == 0) 146 size = DEFAULT_STACK_SIZE; 147 auto alloc = alloc_stack(size); 148 if (!alloc) 149 return alloc.error_code(); 150 else 151 stack = alloc.value(); 152 owned_stack = true; 153 } 154 155 TLSDescriptor tls; 156 init_tls(tls); 157 158 // When the new thread is spawned by the kernel, the new thread gets the 159 // stack we pass to the clone syscall. However, this stack is empty and does 160 // not have any local vars present in this function. Hence, one cannot 161 // pass arguments to the thread start function, or use any local vars from 162 // here. So, we pack them into the new stack from where the thread can sniff 163 // them out. 164 // 165 // Likewise, the actual thread state information is also stored on the 166 // stack memory. 167 uintptr_t adjusted_stack = reinterpret_cast<uintptr_t>(stack) + size - 168 sizeof(StartArgs) - sizeof(ThreadAttributes) - 169 sizeof(cpp::Atomic<FutexWordType>); 170 adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1); 171 172 auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack); 173 174 attrib = 175 reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs)); 176 attrib->style = style; 177 attrib->detach_state = 178 uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE); 179 attrib->stack = stack; 180 attrib->stack_size = size; 181 attrib->owned_stack = owned_stack; 182 attrib->tls = tls.addr; 183 attrib->tls_size = tls.size; 184 185 start_args->thread_attrib = attrib; 186 start_args->runner = runner; 187 start_args->arg = arg; 188 189 auto clear_tid = reinterpret_cast<cpp::Atomic<FutexWordType> *>( 190 adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes)); 191 clear_tid->val = CLEAR_TID_VALUE; 192 attrib->platform_data = clear_tid; 193 194 // The clone syscall takes arguments in an architecture specific order. 195 // Also, we want the result of the syscall to be in a register as the child 196 // thread gets a completely different stack after it is created. The stack 197 // variables from this function will not be availalbe to the child thread. 198 #ifdef LLVM_LIBC_ARCH_X86_64 199 long register clone_result asm("rax"); 200 clone_result = __llvm_libc::syscall( 201 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, 202 &attrib->tid, // The address where the child tid is written 203 &clear_tid->val, // The futex where the child thread status is signalled 204 tls.tp // The thread pointer value for the new thread. 205 ); 206 #elif defined(LLVM_LIBC_ARCH_AARCH64) 207 long register clone_result asm("x0"); 208 clone_result = __llvm_libc::syscall( 209 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, 210 &attrib->tid, // The address where the child tid is written 211 tls.tp, // The thread pointer value for the new thread. 212 &clear_tid->val // The futex where the child thread status is signalled 213 ); 214 #else 215 #error "Unsupported architecture for the clone syscall." 216 #endif 217 218 if (clone_result == 0) { 219 #ifdef LLVM_LIBC_ARCH_AARCH64 220 // We set the frame pointer to be the same as the "sp" so that start args 221 // can be sniffed out from start_thread. 222 __arm_wsr64("x29", __arm_rsr64("sp")); 223 #endif 224 start_thread(); 225 } else if (clone_result < 0) { 226 cleanup_thread_resources(attrib); 227 return -clone_result; 228 } 229 230 return 0; 231 } 232 233 int Thread::join(ThreadReturnValue &retval) { 234 wait(); 235 236 if (attrib->style == ThreadStyle::POSIX) 237 retval.posix_retval = attrib->retval.posix_retval; 238 else 239 retval.stdc_retval = attrib->retval.stdc_retval; 240 241 cleanup_thread_resources(attrib); 242 243 return 0; 244 } 245 246 int Thread::detach() { 247 uint32_t joinable_state = uint32_t(DetachState::JOINABLE); 248 if (attrib->detach_state.compare_exchange_strong( 249 joinable_state, uint32_t(DetachState::DETACHED))) { 250 return int(DetachType::SIMPLE); 251 } 252 253 // If the thread was already detached, then the detach method should not 254 // be called at all. If the thread is exiting, then we wait for it to exit 255 // and free up resources. 256 wait(); 257 258 cleanup_thread_resources(attrib); 259 260 return int(DetachType::CLEANUP); 261 } 262 263 void Thread::wait() { 264 // The kernel should set the value at the clear tid address to zero. 265 // If not, it is a spurious wake and we should continue to wait on 266 // the futex. 267 auto *clear_tid = 268 reinterpret_cast<cpp::Atomic<FutexWordType> *>(attrib->platform_data); 269 while (clear_tid->load() != 0) { 270 // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a 271 // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE. 272 __llvm_libc::syscall(SYS_futex, &clear_tid->val, FUTEX_WAIT, 273 CLEAR_TID_VALUE, nullptr); 274 } 275 } 276 277 bool Thread::operator==(const Thread &thread) const { 278 return attrib->tid == thread.attrib->tid; 279 } 280 281 } // namespace __llvm_libc 282