1fe801747SSiva Chandra Reddy //===--- Implementation of a Linux thread class -----------------*- C++ -*-===//
2fe801747SSiva Chandra Reddy //
3fe801747SSiva Chandra Reddy // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe801747SSiva Chandra Reddy // See https://llvm.org/LICENSE.txt for license information.
5fe801747SSiva Chandra Reddy // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe801747SSiva Chandra Reddy //
7fe801747SSiva Chandra Reddy //===----------------------------------------------------------------------===//
8fe801747SSiva Chandra Reddy 
9fe801747SSiva Chandra Reddy #include "src/__support/threads/thread.h"
10859c1897SSiva Chandra Reddy #include "config/linux/app.h"
11fe801747SSiva Chandra Reddy #include "src/__support/CPP/atomic.h"
12fe801747SSiva Chandra Reddy #include "src/__support/CPP/error.h"
13fe801747SSiva Chandra Reddy #include "src/__support/OSUtil/syscall.h"           // For syscall functions.
14fe801747SSiva Chandra Reddy #include "src/__support/threads/linux/futex_word.h" // For FutexWordType
15fe801747SSiva Chandra Reddy 
16fe801747SSiva Chandra Reddy #ifdef LLVM_LIBC_ARCH_AARCH64
17fe801747SSiva Chandra Reddy #include <arm_acle.h>
18fe801747SSiva Chandra Reddy #endif
19fe801747SSiva Chandra Reddy 
20fe801747SSiva Chandra Reddy #include <linux/futex.h>
21fe801747SSiva Chandra Reddy #include <linux/sched.h> // For CLONE_* flags.
22fe801747SSiva Chandra Reddy #include <stdint.h>
23fe801747SSiva Chandra Reddy #include <sys/mman.h>    // For PROT_* and MAP_* definitions.
24fe801747SSiva Chandra Reddy #include <sys/syscall.h> // For syscall numbers.
25fe801747SSiva Chandra Reddy 
26fe801747SSiva Chandra Reddy namespace __llvm_libc {
27fe801747SSiva Chandra Reddy 
28fe801747SSiva Chandra Reddy #ifdef SYS_mmap2
29fe801747SSiva Chandra Reddy static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
30fe801747SSiva Chandra Reddy #elif SYS_mmap
31fe801747SSiva Chandra Reddy static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
32fe801747SSiva Chandra Reddy #else
33fe801747SSiva Chandra Reddy #error "SYS_mmap or SYS_mmap2 not available on the target platform"
34fe801747SSiva Chandra Reddy #endif
35fe801747SSiva Chandra Reddy 
36fe801747SSiva Chandra Reddy static constexpr size_t DEFAULT_STACK_SIZE = (1 << 16); // 64KB
37fe801747SSiva Chandra Reddy static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234;
38fe801747SSiva Chandra Reddy static constexpr unsigned CLONE_SYSCALL_FLAGS =
39fe801747SSiva Chandra Reddy     CLONE_VM        // Share the memory space with the parent.
40fe801747SSiva Chandra Reddy     | CLONE_FS      // Share the file system with the parent.
41fe801747SSiva Chandra Reddy     | CLONE_FILES   // Share the files with the parent.
42fe801747SSiva Chandra Reddy     | CLONE_SIGHAND // Share the signal handlers with the parent.
43fe801747SSiva Chandra Reddy     | CLONE_THREAD  // Same thread group as the parent.
44fe801747SSiva Chandra Reddy     | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment
45fe801747SSiva Chandra Reddy                     // values
46fe801747SSiva Chandra Reddy     | CLONE_PARENT_SETTID  // Set child thread ID in |ptid| of the parent.
47859c1897SSiva Chandra Reddy     | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address
48fe801747SSiva Chandra Reddy                            // wake the joining thread.
49859c1897SSiva Chandra Reddy     | CLONE_SETTLS;        // Setup the thread pointer of the new thread.
50fe801747SSiva Chandra Reddy 
alloc_stack(size_t size)51fe801747SSiva Chandra Reddy static inline cpp::ErrorOr<void *> alloc_stack(size_t size) {
52fe801747SSiva Chandra Reddy   long mmap_result =
53fe801747SSiva Chandra Reddy       __llvm_libc::syscall(MMAP_SYSCALL_NUMBER,
54fe801747SSiva Chandra Reddy                            0, // No special address
55fe801747SSiva Chandra Reddy                            size,
56fe801747SSiva Chandra Reddy                            PROT_READ | PROT_WRITE,      // Read and write stack
57fe801747SSiva Chandra Reddy                            MAP_ANONYMOUS | MAP_PRIVATE, // Process private
58fe801747SSiva Chandra Reddy                            -1, // Not backed by any file
59fe801747SSiva Chandra Reddy                            0   // No offset
60fe801747SSiva Chandra Reddy       );
61fe801747SSiva Chandra Reddy   if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size))
62fe801747SSiva Chandra Reddy     return cpp::Error{int(-mmap_result)};
63fe801747SSiva Chandra Reddy   return reinterpret_cast<void *>(mmap_result);
64fe801747SSiva Chandra Reddy }
65fe801747SSiva Chandra Reddy 
free_stack(void * stack,size_t size)66fe801747SSiva Chandra Reddy static inline void free_stack(void *stack, size_t size) {
67fe801747SSiva Chandra Reddy   __llvm_libc::syscall(SYS_munmap, stack, size);
68fe801747SSiva Chandra Reddy }
69fe801747SSiva Chandra Reddy 
70fe801747SSiva Chandra Reddy struct Thread;
71fe801747SSiva Chandra Reddy 
72fe801747SSiva Chandra Reddy // We align the start args to 16-byte boundary as we adjust the allocated
73fe801747SSiva Chandra Reddy // stack memory with its size. We want the adjusted address to be at a
74fe801747SSiva Chandra Reddy // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements.
75fe801747SSiva Chandra Reddy // If different architecture in future requires higher alignment, then we
76fe801747SSiva Chandra Reddy // can add a platform specific alignment spec.
77fe801747SSiva Chandra Reddy struct alignas(STACK_ALIGNMENT) StartArgs {
78379428c2SSiva Chandra Reddy   ThreadAttributes *thread_attrib;
79fe801747SSiva Chandra Reddy   ThreadRunner runner;
80fe801747SSiva Chandra Reddy   void *arg;
81fe801747SSiva Chandra Reddy };
82fe801747SSiva Chandra Reddy 
cleanup_thread_resources(ThreadAttributes * attrib)83859c1897SSiva Chandra Reddy static void cleanup_thread_resources(ThreadAttributes *attrib) {
84859c1897SSiva Chandra Reddy   // Cleanup the TLS before the stack as the TLS information is stored on
85859c1897SSiva Chandra Reddy   // the stack.
86859c1897SSiva Chandra Reddy   cleanup_tls(attrib->tls, attrib->tls_size);
87859c1897SSiva Chandra Reddy   if (attrib->owned_stack)
88859c1897SSiva Chandra Reddy     free_stack(attrib->stack, attrib->stack_size);
89859c1897SSiva Chandra Reddy }
90859c1897SSiva Chandra Reddy 
get_start_args_addr()91fe801747SSiva Chandra Reddy __attribute__((always_inline)) inline uintptr_t get_start_args_addr() {
92fe801747SSiva Chandra Reddy // NOTE: For __builtin_frame_address to work reliably across compilers,
93fe801747SSiva Chandra Reddy // architectures and various optimization levels, the TU including this file
94fe801747SSiva Chandra Reddy // should be compiled with -fno-omit-frame-pointer.
95fe801747SSiva Chandra Reddy #ifdef LLVM_LIBC_ARCH_X86_64
96fe801747SSiva Chandra Reddy   return reinterpret_cast<uintptr_t>(__builtin_frame_address(0))
97fe801747SSiva Chandra Reddy          // The x86_64 call instruction pushes resume address on to the stack.
98fe801747SSiva Chandra Reddy          // Next, The x86_64 SysV ABI requires that the frame pointer be pushed
99fe801747SSiva Chandra Reddy          // on to the stack. So, we have to step past two 64-bit values to get
100fe801747SSiva Chandra Reddy          // to the start args.
101fe801747SSiva Chandra Reddy          + sizeof(uintptr_t) * 2;
102fe801747SSiva Chandra Reddy #elif defined(LLVM_LIBC_ARCH_AARCH64)
103fe801747SSiva Chandra Reddy   // The frame pointer after cloning the new thread in the Thread::run method
104fe801747SSiva Chandra Reddy   // is set to the stack pointer where start args are stored. So, we fetch
105fe801747SSiva Chandra Reddy   // from there.
106fe801747SSiva Chandra Reddy   return reinterpret_cast<uintptr_t>(__builtin_frame_address(1));
107fe801747SSiva Chandra Reddy #endif
108fe801747SSiva Chandra Reddy }
109fe801747SSiva Chandra Reddy 
1103c5d6312SSiva Chandra Reddy __attribute__((noinline))
start_thread()1113c5d6312SSiva Chandra Reddy static void start_thread() {
112fe801747SSiva Chandra Reddy   auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr());
113379428c2SSiva Chandra Reddy   auto *attrib = start_args->thread_attrib;
114*8dc42802SSiva Chandra Reddy   self.attrib = attrib;
115*8dc42802SSiva Chandra Reddy 
116fe801747SSiva Chandra Reddy   long retval;
117fe801747SSiva Chandra Reddy   if (attrib->style == ThreadStyle::POSIX) {
118fe801747SSiva Chandra Reddy     attrib->retval.posix_retval =
119fe801747SSiva Chandra Reddy         start_args->runner.posix_runner(start_args->arg);
120fe801747SSiva Chandra Reddy     retval = long(attrib->retval.posix_retval);
121fe801747SSiva Chandra Reddy   } else {
122fe801747SSiva Chandra Reddy     attrib->retval.stdc_retval =
123fe801747SSiva Chandra Reddy         start_args->runner.stdc_runner(start_args->arg);
124fe801747SSiva Chandra Reddy     retval = long(attrib->retval.stdc_retval);
125fe801747SSiva Chandra Reddy   }
126fe801747SSiva Chandra Reddy 
127fe801747SSiva Chandra Reddy   uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
128379428c2SSiva Chandra Reddy   if (!attrib->detach_state.compare_exchange_strong(
129fe801747SSiva Chandra Reddy           joinable_state, uint32_t(DetachState::EXITING))) {
130fe801747SSiva Chandra Reddy     // Thread is detached so cleanup the resources.
131859c1897SSiva Chandra Reddy     cleanup_thread_resources(attrib);
132badda4acSSiva Chandra Reddy 
133badda4acSSiva Chandra Reddy     // Set the CLEAR_TID address to nullptr to prevent the kernel
134badda4acSSiva Chandra Reddy     // from signalling at a non-existent futex location.
135badda4acSSiva Chandra Reddy     __llvm_libc::syscall(SYS_set_tid_address, 0);
136fe801747SSiva Chandra Reddy   }
137fe801747SSiva Chandra Reddy 
138fe801747SSiva Chandra Reddy   __llvm_libc::syscall(SYS_exit, retval);
139fe801747SSiva Chandra Reddy }
140fe801747SSiva Chandra Reddy 
run(ThreadStyle style,ThreadRunner runner,void * arg,void * stack,size_t size,bool detached)141fe801747SSiva Chandra Reddy int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
142fe801747SSiva Chandra Reddy                 size_t size, bool detached) {
143fe801747SSiva Chandra Reddy   bool owned_stack = false;
144fe801747SSiva Chandra Reddy   if (stack == nullptr) {
145fe801747SSiva Chandra Reddy     if (size == 0)
146fe801747SSiva Chandra Reddy       size = DEFAULT_STACK_SIZE;
147fe801747SSiva Chandra Reddy     auto alloc = alloc_stack(size);
148fe801747SSiva Chandra Reddy     if (!alloc)
149fe801747SSiva Chandra Reddy       return alloc.error_code();
150fe801747SSiva Chandra Reddy     else
151fe801747SSiva Chandra Reddy       stack = alloc.value();
152fe801747SSiva Chandra Reddy     owned_stack = true;
153fe801747SSiva Chandra Reddy   }
154fe801747SSiva Chandra Reddy 
155859c1897SSiva Chandra Reddy   TLSDescriptor tls;
156859c1897SSiva Chandra Reddy   init_tls(tls);
157859c1897SSiva Chandra Reddy 
158fe801747SSiva Chandra Reddy   // When the new thread is spawned by the kernel, the new thread gets the
159fe801747SSiva Chandra Reddy   // stack we pass to the clone syscall. However, this stack is empty and does
160fe801747SSiva Chandra Reddy   // not have any local vars present in this function. Hence, one cannot
161fe801747SSiva Chandra Reddy   // pass arguments to the thread start function, or use any local vars from
162fe801747SSiva Chandra Reddy   // here. So, we pack them into the new stack from where the thread can sniff
163fe801747SSiva Chandra Reddy   // them out.
164fe801747SSiva Chandra Reddy   //
165fe801747SSiva Chandra Reddy   // Likewise, the actual thread state information is also stored on the
166fe801747SSiva Chandra Reddy   // stack memory.
167fe801747SSiva Chandra Reddy   uintptr_t adjusted_stack = reinterpret_cast<uintptr_t>(stack) + size -
168fe801747SSiva Chandra Reddy                              sizeof(StartArgs) - sizeof(ThreadAttributes) -
169fe801747SSiva Chandra Reddy                              sizeof(cpp::Atomic<FutexWordType>);
170fe801747SSiva Chandra Reddy   adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1);
171fe801747SSiva Chandra Reddy 
172fe801747SSiva Chandra Reddy   auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack);
173fe801747SSiva Chandra Reddy 
174fe801747SSiva Chandra Reddy   attrib =
175fe801747SSiva Chandra Reddy       reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs));
176fe801747SSiva Chandra Reddy   attrib->style = style;
177fe801747SSiva Chandra Reddy   attrib->detach_state =
178fe801747SSiva Chandra Reddy       uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE);
179fe801747SSiva Chandra Reddy   attrib->stack = stack;
180fe801747SSiva Chandra Reddy   attrib->stack_size = size;
181fe801747SSiva Chandra Reddy   attrib->owned_stack = owned_stack;
182859c1897SSiva Chandra Reddy   attrib->tls = tls.addr;
183859c1897SSiva Chandra Reddy   attrib->tls_size = tls.size;
184fe801747SSiva Chandra Reddy 
185379428c2SSiva Chandra Reddy   start_args->thread_attrib = attrib;
186379428c2SSiva Chandra Reddy   start_args->runner = runner;
187379428c2SSiva Chandra Reddy   start_args->arg = arg;
188379428c2SSiva Chandra Reddy 
189fe801747SSiva Chandra Reddy   auto clear_tid = reinterpret_cast<cpp::Atomic<FutexWordType> *>(
190fe801747SSiva Chandra Reddy       adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes));
191fe801747SSiva Chandra Reddy   clear_tid->val = CLEAR_TID_VALUE;
1923c5d6312SSiva Chandra Reddy   attrib->platform_data = clear_tid;
193fe801747SSiva Chandra Reddy 
194fe801747SSiva Chandra Reddy   // The clone syscall takes arguments in an architecture specific order.
195fe801747SSiva Chandra Reddy   // Also, we want the result of the syscall to be in a register as the child
196fe801747SSiva Chandra Reddy   // thread gets a completely different stack after it is created. The stack
197fe801747SSiva Chandra Reddy   // variables from this function will not be availalbe to the child thread.
198fe801747SSiva Chandra Reddy #ifdef LLVM_LIBC_ARCH_X86_64
199fe801747SSiva Chandra Reddy   long register clone_result asm("rax");
200fe801747SSiva Chandra Reddy   clone_result = __llvm_libc::syscall(
201fe801747SSiva Chandra Reddy       SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
202fe801747SSiva Chandra Reddy       &attrib->tid,    // The address where the child tid is written
203fe801747SSiva Chandra Reddy       &clear_tid->val, // The futex where the child thread status is signalled
204859c1897SSiva Chandra Reddy       tls.tp           // The thread pointer value for the new thread.
205fe801747SSiva Chandra Reddy   );
206fe801747SSiva Chandra Reddy #elif defined(LLVM_LIBC_ARCH_AARCH64)
207fe801747SSiva Chandra Reddy   long register clone_result asm("x0");
208fe801747SSiva Chandra Reddy   clone_result = __llvm_libc::syscall(
209fe801747SSiva Chandra Reddy       SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
210fe801747SSiva Chandra Reddy       &attrib->tid,   // The address where the child tid is written
211859c1897SSiva Chandra Reddy       tls.tp,         // The thread pointer value for the new thread.
212fe801747SSiva Chandra Reddy       &clear_tid->val // The futex where the child thread status is signalled
213fe801747SSiva Chandra Reddy   );
214fe801747SSiva Chandra Reddy #else
215fe801747SSiva Chandra Reddy #error "Unsupported architecture for the clone syscall."
216fe801747SSiva Chandra Reddy #endif
217fe801747SSiva Chandra Reddy 
218fe801747SSiva Chandra Reddy   if (clone_result == 0) {
219fe801747SSiva Chandra Reddy #ifdef LLVM_LIBC_ARCH_AARCH64
220fe801747SSiva Chandra Reddy     // We set the frame pointer to be the same as the "sp" so that start args
221fe801747SSiva Chandra Reddy     // can be sniffed out from start_thread.
222fe801747SSiva Chandra Reddy     __arm_wsr64("x29", __arm_rsr64("sp"));
223fe801747SSiva Chandra Reddy #endif
224fe801747SSiva Chandra Reddy     start_thread();
225fe801747SSiva Chandra Reddy   } else if (clone_result < 0) {
226859c1897SSiva Chandra Reddy     cleanup_thread_resources(attrib);
227fe801747SSiva Chandra Reddy     return -clone_result;
228fe801747SSiva Chandra Reddy   }
229fe801747SSiva Chandra Reddy 
230fe801747SSiva Chandra Reddy   return 0;
231fe801747SSiva Chandra Reddy }
232fe801747SSiva Chandra Reddy 
join(ThreadReturnValue & retval)233fe801747SSiva Chandra Reddy int Thread::join(ThreadReturnValue &retval) {
234fe801747SSiva Chandra Reddy   wait();
235fe801747SSiva Chandra Reddy 
236fe801747SSiva Chandra Reddy   if (attrib->style == ThreadStyle::POSIX)
237fe801747SSiva Chandra Reddy     retval.posix_retval = attrib->retval.posix_retval;
238fe801747SSiva Chandra Reddy   else
239fe801747SSiva Chandra Reddy     retval.stdc_retval = attrib->retval.stdc_retval;
240fe801747SSiva Chandra Reddy 
241859c1897SSiva Chandra Reddy   cleanup_thread_resources(attrib);
242fe801747SSiva Chandra Reddy 
243fe801747SSiva Chandra Reddy   return 0;
244fe801747SSiva Chandra Reddy }
245fe801747SSiva Chandra Reddy 
detach()246fe801747SSiva Chandra Reddy int Thread::detach() {
247fe801747SSiva Chandra Reddy   uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
248fe801747SSiva Chandra Reddy   if (attrib->detach_state.compare_exchange_strong(
249fe801747SSiva Chandra Reddy           joinable_state, uint32_t(DetachState::DETACHED))) {
250fe801747SSiva Chandra Reddy     return int(DetachType::SIMPLE);
251fe801747SSiva Chandra Reddy   }
252fe801747SSiva Chandra Reddy 
253fe801747SSiva Chandra Reddy   // If the thread was already detached, then the detach method should not
254fe801747SSiva Chandra Reddy   // be called at all. If the thread is exiting, then we wait for it to exit
255fe801747SSiva Chandra Reddy   // and free up resources.
256fe801747SSiva Chandra Reddy   wait();
257fe801747SSiva Chandra Reddy 
258859c1897SSiva Chandra Reddy   cleanup_thread_resources(attrib);
259859c1897SSiva Chandra Reddy 
260fe801747SSiva Chandra Reddy   return int(DetachType::CLEANUP);
261fe801747SSiva Chandra Reddy }
262fe801747SSiva Chandra Reddy 
wait()263fe801747SSiva Chandra Reddy void Thread::wait() {
264fe801747SSiva Chandra Reddy   // The kernel should set the value at the clear tid address to zero.
265fe801747SSiva Chandra Reddy   // If not, it is a spurious wake and we should continue to wait on
266fe801747SSiva Chandra Reddy   // the futex.
267fe801747SSiva Chandra Reddy   auto *clear_tid =
2683c5d6312SSiva Chandra Reddy       reinterpret_cast<cpp::Atomic<FutexWordType> *>(attrib->platform_data);
269fe801747SSiva Chandra Reddy   while (clear_tid->load() != 0) {
270fe801747SSiva Chandra Reddy     // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a
271fe801747SSiva Chandra Reddy     // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE.
272fe801747SSiva Chandra Reddy     __llvm_libc::syscall(SYS_futex, &clear_tid->val, FUTEX_WAIT,
273fe801747SSiva Chandra Reddy                          CLEAR_TID_VALUE, nullptr);
274fe801747SSiva Chandra Reddy   }
275fe801747SSiva Chandra Reddy }
276fe801747SSiva Chandra Reddy 
operator ==(const Thread & thread) const277*8dc42802SSiva Chandra Reddy bool Thread::operator==(const Thread &thread) const {
278*8dc42802SSiva Chandra Reddy   return attrib->tid == thread.attrib->tid;
279*8dc42802SSiva Chandra Reddy }
280*8dc42802SSiva Chandra Reddy 
281fe801747SSiva Chandra Reddy } // namespace __llvm_libc
282