1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "src/__support/threads/thread.h"
10 #include "config/linux/app.h"
11 #include "src/__support/CPP/atomic.h"
12 #include "src/__support/CPP/error.h"
13 #include "src/__support/OSUtil/syscall.h" // For syscall functions.
14 #include "src/__support/threads/linux/futex_word.h" // For FutexWordType
15
16 #ifdef LLVM_LIBC_ARCH_AARCH64
17 #include <arm_acle.h>
18 #endif
19
20 #include <linux/futex.h>
21 #include <linux/sched.h> // For CLONE_* flags.
22 #include <stdint.h>
23 #include <sys/mman.h> // For PROT_* and MAP_* definitions.
24 #include <sys/syscall.h> // For syscall numbers.
25
26 namespace __llvm_libc {
27
28 #ifdef SYS_mmap2
29 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
30 #elif SYS_mmap
31 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
32 #else
33 #error "SYS_mmap or SYS_mmap2 not available on the target platform"
34 #endif
35
36 static constexpr size_t DEFAULT_STACK_SIZE = (1 << 16); // 64KB
37 static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234;
38 static constexpr unsigned CLONE_SYSCALL_FLAGS =
39 CLONE_VM // Share the memory space with the parent.
40 | CLONE_FS // Share the file system with the parent.
41 | CLONE_FILES // Share the files with the parent.
42 | CLONE_SIGHAND // Share the signal handlers with the parent.
43 | CLONE_THREAD // Same thread group as the parent.
44 | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment
45 // values
46 | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent.
47 | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address
48 // wake the joining thread.
49 | CLONE_SETTLS; // Setup the thread pointer of the new thread.
50
alloc_stack(size_t size)51 static inline cpp::ErrorOr<void *> alloc_stack(size_t size) {
52 long mmap_result =
53 __llvm_libc::syscall(MMAP_SYSCALL_NUMBER,
54 0, // No special address
55 size,
56 PROT_READ | PROT_WRITE, // Read and write stack
57 MAP_ANONYMOUS | MAP_PRIVATE, // Process private
58 -1, // Not backed by any file
59 0 // No offset
60 );
61 if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size))
62 return cpp::Error{int(-mmap_result)};
63 return reinterpret_cast<void *>(mmap_result);
64 }
65
free_stack(void * stack,size_t size)66 static inline void free_stack(void *stack, size_t size) {
67 __llvm_libc::syscall(SYS_munmap, stack, size);
68 }
69
70 struct Thread;
71
72 // We align the start args to 16-byte boundary as we adjust the allocated
73 // stack memory with its size. We want the adjusted address to be at a
74 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements.
75 // If different architecture in future requires higher alignment, then we
76 // can add a platform specific alignment spec.
77 struct alignas(STACK_ALIGNMENT) StartArgs {
78 ThreadAttributes *thread_attrib;
79 ThreadRunner runner;
80 void *arg;
81 };
82
cleanup_thread_resources(ThreadAttributes * attrib)83 static void cleanup_thread_resources(ThreadAttributes *attrib) {
84 // Cleanup the TLS before the stack as the TLS information is stored on
85 // the stack.
86 cleanup_tls(attrib->tls, attrib->tls_size);
87 if (attrib->owned_stack)
88 free_stack(attrib->stack, attrib->stack_size);
89 }
90
get_start_args_addr()91 __attribute__((always_inline)) inline uintptr_t get_start_args_addr() {
92 // NOTE: For __builtin_frame_address to work reliably across compilers,
93 // architectures and various optimization levels, the TU including this file
94 // should be compiled with -fno-omit-frame-pointer.
95 #ifdef LLVM_LIBC_ARCH_X86_64
96 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0))
97 // The x86_64 call instruction pushes resume address on to the stack.
98 // Next, The x86_64 SysV ABI requires that the frame pointer be pushed
99 // on to the stack. So, we have to step past two 64-bit values to get
100 // to the start args.
101 + sizeof(uintptr_t) * 2;
102 #elif defined(LLVM_LIBC_ARCH_AARCH64)
103 // The frame pointer after cloning the new thread in the Thread::run method
104 // is set to the stack pointer where start args are stored. So, we fetch
105 // from there.
106 return reinterpret_cast<uintptr_t>(__builtin_frame_address(1));
107 #endif
108 }
109
110 __attribute__((noinline))
start_thread()111 static void start_thread() {
112 auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr());
113 auto *attrib = start_args->thread_attrib;
114 self.attrib = attrib;
115
116 long retval;
117 if (attrib->style == ThreadStyle::POSIX) {
118 attrib->retval.posix_retval =
119 start_args->runner.posix_runner(start_args->arg);
120 retval = long(attrib->retval.posix_retval);
121 } else {
122 attrib->retval.stdc_retval =
123 start_args->runner.stdc_runner(start_args->arg);
124 retval = long(attrib->retval.stdc_retval);
125 }
126
127 uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
128 if (!attrib->detach_state.compare_exchange_strong(
129 joinable_state, uint32_t(DetachState::EXITING))) {
130 // Thread is detached so cleanup the resources.
131 cleanup_thread_resources(attrib);
132
133 // Set the CLEAR_TID address to nullptr to prevent the kernel
134 // from signalling at a non-existent futex location.
135 __llvm_libc::syscall(SYS_set_tid_address, 0);
136 }
137
138 __llvm_libc::syscall(SYS_exit, retval);
139 }
140
run(ThreadStyle style,ThreadRunner runner,void * arg,void * stack,size_t size,bool detached)141 int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
142 size_t size, bool detached) {
143 bool owned_stack = false;
144 if (stack == nullptr) {
145 if (size == 0)
146 size = DEFAULT_STACK_SIZE;
147 auto alloc = alloc_stack(size);
148 if (!alloc)
149 return alloc.error_code();
150 else
151 stack = alloc.value();
152 owned_stack = true;
153 }
154
155 TLSDescriptor tls;
156 init_tls(tls);
157
158 // When the new thread is spawned by the kernel, the new thread gets the
159 // stack we pass to the clone syscall. However, this stack is empty and does
160 // not have any local vars present in this function. Hence, one cannot
161 // pass arguments to the thread start function, or use any local vars from
162 // here. So, we pack them into the new stack from where the thread can sniff
163 // them out.
164 //
165 // Likewise, the actual thread state information is also stored on the
166 // stack memory.
167 uintptr_t adjusted_stack = reinterpret_cast<uintptr_t>(stack) + size -
168 sizeof(StartArgs) - sizeof(ThreadAttributes) -
169 sizeof(cpp::Atomic<FutexWordType>);
170 adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1);
171
172 auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack);
173
174 attrib =
175 reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs));
176 attrib->style = style;
177 attrib->detach_state =
178 uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE);
179 attrib->stack = stack;
180 attrib->stack_size = size;
181 attrib->owned_stack = owned_stack;
182 attrib->tls = tls.addr;
183 attrib->tls_size = tls.size;
184
185 start_args->thread_attrib = attrib;
186 start_args->runner = runner;
187 start_args->arg = arg;
188
189 auto clear_tid = reinterpret_cast<cpp::Atomic<FutexWordType> *>(
190 adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes));
191 clear_tid->val = CLEAR_TID_VALUE;
192 attrib->platform_data = clear_tid;
193
194 // The clone syscall takes arguments in an architecture specific order.
195 // Also, we want the result of the syscall to be in a register as the child
196 // thread gets a completely different stack after it is created. The stack
197 // variables from this function will not be availalbe to the child thread.
198 #ifdef LLVM_LIBC_ARCH_X86_64
199 long register clone_result asm("rax");
200 clone_result = __llvm_libc::syscall(
201 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
202 &attrib->tid, // The address where the child tid is written
203 &clear_tid->val, // The futex where the child thread status is signalled
204 tls.tp // The thread pointer value for the new thread.
205 );
206 #elif defined(LLVM_LIBC_ARCH_AARCH64)
207 long register clone_result asm("x0");
208 clone_result = __llvm_libc::syscall(
209 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
210 &attrib->tid, // The address where the child tid is written
211 tls.tp, // The thread pointer value for the new thread.
212 &clear_tid->val // The futex where the child thread status is signalled
213 );
214 #else
215 #error "Unsupported architecture for the clone syscall."
216 #endif
217
218 if (clone_result == 0) {
219 #ifdef LLVM_LIBC_ARCH_AARCH64
220 // We set the frame pointer to be the same as the "sp" so that start args
221 // can be sniffed out from start_thread.
222 __arm_wsr64("x29", __arm_rsr64("sp"));
223 #endif
224 start_thread();
225 } else if (clone_result < 0) {
226 cleanup_thread_resources(attrib);
227 return -clone_result;
228 }
229
230 return 0;
231 }
232
join(ThreadReturnValue & retval)233 int Thread::join(ThreadReturnValue &retval) {
234 wait();
235
236 if (attrib->style == ThreadStyle::POSIX)
237 retval.posix_retval = attrib->retval.posix_retval;
238 else
239 retval.stdc_retval = attrib->retval.stdc_retval;
240
241 cleanup_thread_resources(attrib);
242
243 return 0;
244 }
245
detach()246 int Thread::detach() {
247 uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
248 if (attrib->detach_state.compare_exchange_strong(
249 joinable_state, uint32_t(DetachState::DETACHED))) {
250 return int(DetachType::SIMPLE);
251 }
252
253 // If the thread was already detached, then the detach method should not
254 // be called at all. If the thread is exiting, then we wait for it to exit
255 // and free up resources.
256 wait();
257
258 cleanup_thread_resources(attrib);
259
260 return int(DetachType::CLEANUP);
261 }
262
wait()263 void Thread::wait() {
264 // The kernel should set the value at the clear tid address to zero.
265 // If not, it is a spurious wake and we should continue to wait on
266 // the futex.
267 auto *clear_tid =
268 reinterpret_cast<cpp::Atomic<FutexWordType> *>(attrib->platform_data);
269 while (clear_tid->load() != 0) {
270 // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a
271 // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE.
272 __llvm_libc::syscall(SYS_futex, &clear_tid->val, FUTEX_WAIT,
273 CLEAR_TID_VALUE, nullptr);
274 }
275 }
276
operator ==(const Thread & thread) const277 bool Thread::operator==(const Thread &thread) const {
278 return attrib->tid == thread.attrib->tid;
279 }
280
281 } // namespace __llvm_libc
282