1 /*
2 * Copyright (c) 2019 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /**
29 * On devices that support it, this test ensures that a mach exception is
30 * generated when a matrix-math exception is triggered, and that the
31 * matrix register file is correctly preserved or zeroed on context switch.
32 */
33
34 /*
35 * IMPLEMENTATION NOTE:
36 *
37 * This test code goes to some unusual lengths to avoid calling out to libc or
38 * libdarwintest while the CPU is in streaming SVE mode (i.e., between
39 * ops->start() and ops->stop()). Both of these libraries are built with SIMD
40 * instructions that will cause the test executable to crash while in streaming
41 * SVE mode.
42 *
43 * Ordinarily this is the wrong way to solve this problem. Functions that use
44 * streaming SVE mode should have annotations telling the compiler so, and the
45 * compiler will automatically generate appropriate interworking code. However
46 * this interworking code will stash SME state to memory and temporarily exit
47 * streaming SVE mode. We're specifically testing how xnu manages live SME
48 * register state, so we can't let the compiler stash and disable this state
49 * behind our backs.
50 */
51
52 #ifdef __arm64__
53 #include <mach/error.h>
54 #endif /* __arm64__ */
55
56 #include <darwintest.h>
57 #include <pthread.h>
58 #include <stdlib.h>
59 #include <mach/mach.h>
60 #include <mach/thread_status.h>
61 #include <mach/exception.h>
62 #include <machine/cpu_capabilities.h>
63 #include <sys/types.h>
64 #include <sys/sysctl.h>
65
66 #include "arm_matrix.h"
67 #include "exc_helpers.h"
68 #include "test_utils.h"
69
70 T_GLOBAL_META(
71 T_META_NAMESPACE("xnu.arm"),
72 T_META_RADAR_COMPONENT_NAME("xnu"),
73 T_META_RADAR_COMPONENT_VERSION("arm"),
74 T_META_OWNER("ghackmann"),
75 T_META_RUN_CONCURRENTLY(true)
76 );
77
78 #ifdef __arm64__
79
80 #ifndef EXC_ARM_SME_DISALLOWED
81 #define EXC_ARM_SME_DISALLOWED 2
82 #endif
83
84 /* Whether we caught the EXC_BAD_INSTRUCTION mach exception or not. */
85 static volatile bool mach_exc_caught = false;
86
87 static size_t
bad_instruction_exception_handler(__unused mach_port_t task,__unused mach_port_t thread,exception_type_t type,mach_exception_data_t codes)88 bad_instruction_exception_handler(
89 __unused mach_port_t task,
90 __unused mach_port_t thread,
91 exception_type_t type,
92 mach_exception_data_t codes)
93 {
94 T_QUIET; T_ASSERT_EQ(type, EXC_BAD_INSTRUCTION, "Caught an EXC_BAD_INSTRUCTION exception");
95 T_QUIET; T_ASSERT_EQ(codes[0], (uint64_t)EXC_ARM_UNDEFINED, "The subcode is EXC_ARM_UNDEFINED");
96
97 mach_exc_caught = true;
98 return 4;
99 }
100 #endif
101
102
103 #ifdef __arm64__
104 static void
test_matrix_not_started(const struct arm_matrix_operations * ops)105 test_matrix_not_started(const struct arm_matrix_operations *ops)
106 {
107 if (!ops->is_available()) {
108 T_SKIP("Running on non-%s target, skipping...", ops->name);
109 }
110
111 mach_port_t exc_port = create_exception_port(EXC_MASK_BAD_INSTRUCTION);
112
113 size_t size = ops->data_size();
114 uint8_t *d = ops->alloc_data();
115 bzero(d, size);
116
117 ops->start();
118 ops->load_one_vector(d);
119 ops->stop();
120 T_PASS("%s instruction after start instruction should not cause an exception", ops->name);
121
122 mach_exc_caught = false;
123 run_exception_handler(exc_port, bad_instruction_exception_handler);
124 ops->load_one_vector(d);
125 T_EXPECT_TRUE(mach_exc_caught, "%s instruction before start instruction should cause an exception", ops->name);
126
127 free(d);
128 }
129 #endif
130
131
132 T_DECL(sme_not_started,
133 "Test that SME instructions before smstart generate mach exceptions.", T_META_TAG_VM_NOT_ELIGIBLE)
134 {
135 #ifndef __arm64__
136 T_SKIP("Running on non-arm64 target, skipping...");
137 #else
138 test_matrix_not_started(&sme_operations);
139 #endif
140 }
141
142 #ifdef __arm64__
143 typedef bool (*thread_fn_t)(const struct arm_matrix_operations *, uint32_t);
144
145 struct test_thread {
146 pthread_t thread;
147 thread_fn_t thread_fn;
148 uint32_t cpuid;
149 uint32_t thread_id;
150 const struct arm_matrix_operations *ops;
151 };
152
153 static uint32_t barrier;
154 static pthread_cond_t barrier_cond = PTHREAD_COND_INITIALIZER;
155 static pthread_mutex_t barrier_lock = PTHREAD_MUTEX_INITIALIZER;
156
157 static void
test_thread_barrier(void)158 test_thread_barrier(void)
159 {
160 /* Wait for all threads to reach this barrier */
161 pthread_mutex_lock(&barrier_lock);
162 barrier--;
163 if (barrier) {
164 while (barrier) {
165 pthread_cond_wait(&barrier_cond, &barrier_lock);
166 }
167 } else {
168 pthread_cond_broadcast(&barrier_cond);
169 }
170 pthread_mutex_unlock(&barrier_lock);
171 }
172
173 static uint32_t
ncpus(void)174 ncpus(void)
175 {
176 uint32_t ncpu;
177 size_t ncpu_size = sizeof(ncpu);
178 int err = sysctlbyname("hw.ncpu", &ncpu, &ncpu_size, NULL, 0);
179 T_QUIET; T_ASSERT_POSIX_ZERO(err, "Retrieved CPU count");
180
181 return ncpu;
182 }
183
184 static int
thread_bind_cpu_unchecked(uint32_t cpuid)185 thread_bind_cpu_unchecked(uint32_t cpuid)
186 {
187 /*
188 * libc's sysctl() implementation calls strlen(name), which is
189 * SIMD-accelerated. Avoid this by directly invoking the libsyscall
190 * wrapper with namelen computed at compile time.
191 */
192 #define THREAD_BIND_CPU "kern.sched_thread_bind_cpu"
193 extern int __sysctlbyname(const char *name, size_t namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
194 const char *name = THREAD_BIND_CPU;
195 size_t namelen = sizeof(THREAD_BIND_CPU) - 1;
196 return __sysctlbyname(name, namelen, NULL, 0, &cpuid, sizeof(cpuid));
197 }
198
199 static void
thread_bind_cpu(uint32_t cpuid)200 thread_bind_cpu(uint32_t cpuid)
201 {
202 int err = thread_bind_cpu_unchecked(cpuid);
203 T_QUIET; T_ASSERT_POSIX_ZERO(err, "Bound thread to CPU %u", cpuid);
204 }
205
206 static void *
test_thread_shim(void * arg)207 test_thread_shim(void *arg)
208 {
209 struct test_thread *thread = arg;
210
211 thread_bind_cpu(thread->cpuid);
212 bool ret = thread->thread_fn(thread->ops, thread->thread_id);
213 return (void *)(uintptr_t)ret;
214 }
215
216 static void
test_on_each_cpu(thread_fn_t thread_fn,const struct arm_matrix_operations * ops,const char * desc)217 test_on_each_cpu(thread_fn_t thread_fn, const struct arm_matrix_operations *ops, const char *desc)
218 {
219 uint32_t ncpu = ncpus();
220 uint32_t nthreads = ncpu * 2;
221 barrier = nthreads;
222 struct test_thread *threads = calloc(nthreads, sizeof(threads[0]));
223 for (uint32_t i = 0; i < nthreads; i++) {
224 threads[i].thread_fn = thread_fn;
225 threads[i].cpuid = i % ncpu;
226 threads[i].thread_id = i;
227 threads[i].ops = ops;
228
229 int err = pthread_create(&threads[i].thread, NULL, test_thread_shim, &threads[i]);
230 T_QUIET; T_ASSERT_EQ(err, 0, "%s: created thread #%u", desc, i);
231 }
232
233 for (uint32_t i = 0; i < nthreads; i++) {
234 void *thread_ret_ptr;
235 int err = pthread_join(threads[i].thread, &thread_ret_ptr);
236 T_QUIET; T_ASSERT_EQ(err, 0, "%s: joined thread #%u", desc, i);
237
238 bool thread_ret = (uintptr_t)thread_ret_ptr;
239 if (thread_ret) {
240 T_PASS("%s: thread #%u passed", desc, i);
241 } else {
242 T_FAIL("%s: thread #%u failed", desc, i);
243 }
244 }
245
246 free(threads);
247 }
248
249 static bool
active_context_switch_thread(const struct arm_matrix_operations * ops,uint32_t thread_id)250 active_context_switch_thread(const struct arm_matrix_operations *ops, uint32_t thread_id)
251 {
252 size_t size = ops->data_size();
253 uint8_t *d1 = ops->alloc_data();
254 memset(d1, (char)thread_id, size);
255
256 uint8_t *d2 = ops->alloc_data();
257
258 test_thread_barrier();
259
260 bool ok = true;
261 for (unsigned int i = 0; i < 100000 && ok; i++) {
262 ops->start();
263 ops->load_data(d1);
264
265 /*
266 * Rescheduling with the matrix registers active must preserve
267 * state, even after a context switch.
268 */
269 sched_yield();
270
271 ops->store_data(d2);
272 ops->stop();
273
274 if (memcmp(d1, d2, size)) {
275 ok = false;
276 }
277 }
278
279 free(d2);
280 free(d1);
281 return ok;
282 }
283
284 static bool
inactive_context_switch_thread(const struct arm_matrix_operations * ops,uint32_t thread_id)285 inactive_context_switch_thread(const struct arm_matrix_operations *ops, uint32_t thread_id)
286 {
287 size_t size = ops->data_size();
288 uint8_t *d1 = ops->alloc_data();
289 memset(d1, (char)thread_id, size);
290
291 uint8_t *d2 = ops->alloc_data();
292
293 test_thread_barrier();
294
295 bool ok = true;
296 for (unsigned int i = 0; i < 100000 && ok; i++) {
297 ops->start();
298 ops->load_data(d1);
299 ops->stop();
300
301 /*
302 * Rescheduling with the matrix registers inactive may preserve
303 * state or may zero it out.
304 */
305 sched_yield();
306
307 ops->start();
308 ops->store_data(d2);
309 ops->stop();
310
311 for (size_t j = 0; j < size; j++) {
312 if (d1[j] != d2[j] && d2[j] != 0) {
313 ok = false;
314 }
315 }
316 }
317
318 free(d2);
319 free(d1);
320 return ok;
321 }
322
323 static void
test_thread_migration(const struct arm_matrix_operations * ops)324 test_thread_migration(const struct arm_matrix_operations *ops)
325 {
326 size_t size = ops->data_size();
327 uint8_t *d = ops->alloc_data();
328 arc4random_buf(d, size);
329
330 uint32_t ncpu = ncpus();
331 uint8_t *cpu_d[ncpu];
332 for (uint32_t cpuid = 0; cpuid < ncpu; cpuid++) {
333 cpu_d[cpuid] = ops->alloc_data();
334 memset(cpu_d[cpuid], 0, size);
335 }
336
337 ops->start();
338 ops->load_data(d);
339 for (uint32_t cpuid = 0; cpuid < ncpu; cpuid++) {
340 int err = thread_bind_cpu_unchecked(cpuid);
341 if (err) {
342 ops->stop();
343 T_ASSERT_POSIX_ZERO(err, "Bound thread to CPU %u", cpuid);
344 }
345 ops->store_data(cpu_d[cpuid]);
346 }
347 ops->stop();
348
349 for (uint32_t cpuid = 0; cpuid < ncpu; cpuid++) {
350 int cmp = memcmp(d, cpu_d[cpuid], size);
351 T_EXPECT_EQ(cmp, 0, "Matrix state migrated to CPU %u", cpuid);
352 free(cpu_d[cpuid]);
353 }
354 free(d);
355 }
356 #endif
357
358
359 T_DECL(sme_context_switch,
360 "Test that SME contexts are migrated during context switch and do not leak between process contexts.",
361 T_META_BOOTARGS_SET("enable_skstb=1"),
362 T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm.FEAT_SME2", 1),
363 XNU_T_META_SOC_SPECIFIC, T_META_TAG_VM_NOT_ELIGIBLE)
364 {
365 #ifndef __arm64__
366 T_SKIP("Running on non-arm64 target, skipping...");
367 #else
368 if (!sme_operations.is_available()) {
369 T_SKIP("Running on non-SME target, skipping...");
370 }
371
372 test_thread_migration(&sme_operations);
373 test_on_each_cpu(active_context_switch_thread, &sme_operations, "SME context migrates when active");
374 test_on_each_cpu(inactive_context_switch_thread, &sme_operations, "SME context does not leak across processes");
375 #endif
376 }
377
378