xref: /xnu-11215/tests/arm_matrix.c (revision 8d741a5d)
1 /*
2  * Copyright (c) 2019 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /**
29  * On devices that support it, this test ensures that a mach exception is
30  * generated when a matrix-math exception is triggered, and that the
31  * matrix register file is correctly preserved or zeroed on context switch.
32  */
33 
34 /*
35  * IMPLEMENTATION NOTE:
36  *
37  * This test code goes to some unusual lengths to avoid calling out to libc or
38  * libdarwintest while the CPU is in streaming SVE mode (i.e., between
39  * ops->start() and ops->stop()).  Both of these libraries are built with SIMD
40  * instructions that will cause the test executable to crash while in streaming
41  * SVE mode.
42  *
43  * Ordinarily this is the wrong way to solve this problem.  Functions that use
44  * streaming SVE mode should have annotations telling the compiler so, and the
45  * compiler will automatically generate appropriate interworking code.  However
46  * this interworking code will stash SME state to memory and temporarily exit
47  * streaming SVE mode.  We're specifically testing how xnu manages live SME
48  * register state, so we can't let the compiler stash and disable this state
49  * behind our backs.
50  */
51 
52 #ifdef __arm64__
53 #include <mach/error.h>
54 #endif /* __arm64__ */
55 
56 #include <darwintest.h>
57 #include <pthread.h>
58 #include <stdlib.h>
59 #include <mach/mach.h>
60 #include <mach/thread_status.h>
61 #include <mach/exception.h>
62 #include <machine/cpu_capabilities.h>
63 #include <sys/types.h>
64 #include <sys/sysctl.h>
65 
66 #include "arm_matrix.h"
67 #include "exc_helpers.h"
68 #include "test_utils.h"
69 
70 T_GLOBAL_META(
71 	T_META_NAMESPACE("xnu.arm"),
72 	T_META_RADAR_COMPONENT_NAME("xnu"),
73 	T_META_RADAR_COMPONENT_VERSION("arm"),
74 	T_META_OWNER("ghackmann"),
75 	T_META_RUN_CONCURRENTLY(true)
76 	);
77 
78 #ifdef __arm64__
79 
80 #ifndef EXC_ARM_SME_DISALLOWED
81 #define EXC_ARM_SME_DISALLOWED 2
82 #endif
83 
84 /* Whether we caught the EXC_BAD_INSTRUCTION mach exception or not. */
85 static volatile bool mach_exc_caught = false;
86 
87 static size_t
bad_instruction_exception_handler(__unused mach_port_t task,__unused mach_port_t thread,exception_type_t type,mach_exception_data_t codes)88 bad_instruction_exception_handler(
89 	__unused mach_port_t task,
90 	__unused mach_port_t thread,
91 	exception_type_t type,
92 	mach_exception_data_t codes)
93 {
94 	T_QUIET; T_ASSERT_EQ(type, EXC_BAD_INSTRUCTION, "Caught an EXC_BAD_INSTRUCTION exception");
95 	T_QUIET; T_ASSERT_EQ(codes[0], (uint64_t)EXC_ARM_UNDEFINED, "The subcode is EXC_ARM_UNDEFINED");
96 
97 	mach_exc_caught = true;
98 	return 4;
99 }
100 #endif
101 
102 
103 #ifdef __arm64__
104 static void
test_matrix_not_started(const struct arm_matrix_operations * ops)105 test_matrix_not_started(const struct arm_matrix_operations *ops)
106 {
107 	if (!ops->is_available()) {
108 		T_SKIP("Running on non-%s target, skipping...", ops->name);
109 	}
110 
111 	mach_port_t exc_port = create_exception_port(EXC_MASK_BAD_INSTRUCTION);
112 
113 	size_t size = ops->data_size();
114 	uint8_t *d = ops->alloc_data();
115 	bzero(d, size);
116 
117 	ops->start();
118 	ops->load_one_vector(d);
119 	ops->stop();
120 	T_PASS("%s instruction after start instruction should not cause an exception", ops->name);
121 
122 	mach_exc_caught = false;
123 	run_exception_handler(exc_port, bad_instruction_exception_handler);
124 	ops->load_one_vector(d);
125 	T_EXPECT_TRUE(mach_exc_caught, "%s instruction before start instruction should cause an exception", ops->name);
126 
127 	free(d);
128 }
129 #endif
130 
131 
132 T_DECL(sme_not_started,
133     "Test that SME instructions before smstart generate mach exceptions.", T_META_TAG_VM_NOT_ELIGIBLE)
134 {
135 #ifndef __arm64__
136 	T_SKIP("Running on non-arm64 target, skipping...");
137 #else
138 	test_matrix_not_started(&sme_operations);
139 #endif
140 }
141 
142 #ifdef __arm64__
143 typedef bool (*thread_fn_t)(const struct arm_matrix_operations *, uint32_t);
144 
145 struct test_thread {
146 	pthread_t thread;
147 	thread_fn_t thread_fn;
148 	uint32_t cpuid;
149 	uint32_t thread_id;
150 	const struct arm_matrix_operations *ops;
151 };
152 
153 static uint32_t barrier;
154 static pthread_cond_t barrier_cond = PTHREAD_COND_INITIALIZER;
155 static pthread_mutex_t barrier_lock = PTHREAD_MUTEX_INITIALIZER;
156 
157 static void
test_thread_barrier(void)158 test_thread_barrier(void)
159 {
160 	/* Wait for all threads to reach this barrier */
161 	pthread_mutex_lock(&barrier_lock);
162 	barrier--;
163 	if (barrier) {
164 		while (barrier) {
165 			pthread_cond_wait(&barrier_cond, &barrier_lock);
166 		}
167 	} else {
168 		pthread_cond_broadcast(&barrier_cond);
169 	}
170 	pthread_mutex_unlock(&barrier_lock);
171 }
172 
173 static uint32_t
ncpus(void)174 ncpus(void)
175 {
176 	uint32_t ncpu;
177 	size_t ncpu_size = sizeof(ncpu);
178 	int err = sysctlbyname("hw.ncpu", &ncpu, &ncpu_size, NULL, 0);
179 	T_QUIET; T_ASSERT_POSIX_ZERO(err, "Retrieved CPU count");
180 
181 	return ncpu;
182 }
183 
184 static int
thread_bind_cpu_unchecked(uint32_t cpuid)185 thread_bind_cpu_unchecked(uint32_t cpuid)
186 {
187 	/*
188 	 * libc's sysctl() implementation calls strlen(name), which is
189 	 * SIMD-accelerated.  Avoid this by directly invoking the libsyscall
190 	 * wrapper with namelen computed at compile time.
191 	 */
192 #define THREAD_BIND_CPU "kern.sched_thread_bind_cpu"
193 	extern int __sysctlbyname(const char *name, size_t namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
194 	const char *name = THREAD_BIND_CPU;
195 	size_t namelen = sizeof(THREAD_BIND_CPU) - 1;
196 	return __sysctlbyname(name, namelen, NULL, 0, &cpuid, sizeof(cpuid));
197 }
198 
199 static void
thread_bind_cpu(uint32_t cpuid)200 thread_bind_cpu(uint32_t cpuid)
201 {
202 	int err = thread_bind_cpu_unchecked(cpuid);
203 	T_QUIET; T_ASSERT_POSIX_ZERO(err, "Bound thread to CPU %u", cpuid);
204 }
205 
206 static void *
test_thread_shim(void * arg)207 test_thread_shim(void *arg)
208 {
209 	struct test_thread *thread = arg;
210 
211 	thread_bind_cpu(thread->cpuid);
212 	bool ret = thread->thread_fn(thread->ops, thread->thread_id);
213 	return (void *)(uintptr_t)ret;
214 }
215 
216 static void
test_on_each_cpu(thread_fn_t thread_fn,const struct arm_matrix_operations * ops,const char * desc)217 test_on_each_cpu(thread_fn_t thread_fn, const struct arm_matrix_operations *ops, const char *desc)
218 {
219 	uint32_t ncpu = ncpus();
220 	uint32_t nthreads = ncpu * 2;
221 	barrier = nthreads;
222 	struct test_thread *threads = calloc(nthreads, sizeof(threads[0]));
223 	for (uint32_t i = 0; i < nthreads; i++) {
224 		threads[i].thread_fn = thread_fn;
225 		threads[i].cpuid = i % ncpu;
226 		threads[i].thread_id = i;
227 		threads[i].ops = ops;
228 
229 		int err = pthread_create(&threads[i].thread, NULL, test_thread_shim, &threads[i]);
230 		T_QUIET; T_ASSERT_EQ(err, 0, "%s: created thread #%u", desc, i);
231 	}
232 
233 	for (uint32_t i = 0; i < nthreads; i++) {
234 		void *thread_ret_ptr;
235 		int err = pthread_join(threads[i].thread, &thread_ret_ptr);
236 		T_QUIET; T_ASSERT_EQ(err, 0, "%s: joined thread #%u", desc, i);
237 
238 		bool thread_ret = (uintptr_t)thread_ret_ptr;
239 		if (thread_ret) {
240 			T_PASS("%s: thread #%u passed", desc, i);
241 		} else {
242 			T_FAIL("%s: thread #%u failed", desc, i);
243 		}
244 	}
245 
246 	free(threads);
247 }
248 
249 static bool
active_context_switch_thread(const struct arm_matrix_operations * ops,uint32_t thread_id)250 active_context_switch_thread(const struct arm_matrix_operations *ops, uint32_t thread_id)
251 {
252 	size_t size = ops->data_size();
253 	uint8_t *d1 = ops->alloc_data();
254 	memset(d1, (char)thread_id, size);
255 
256 	uint8_t *d2 = ops->alloc_data();
257 
258 	test_thread_barrier();
259 
260 	bool ok = true;
261 	for (unsigned int i = 0; i < 100000 && ok; i++) {
262 		ops->start();
263 		ops->load_data(d1);
264 
265 		/*
266 		 * Rescheduling with the matrix registers active must preserve
267 		 * state, even after a context switch.
268 		 */
269 		sched_yield();
270 
271 		ops->store_data(d2);
272 		ops->stop();
273 
274 		if (memcmp(d1, d2, size)) {
275 			ok = false;
276 		}
277 	}
278 
279 	free(d2);
280 	free(d1);
281 	return ok;
282 }
283 
284 static bool
inactive_context_switch_thread(const struct arm_matrix_operations * ops,uint32_t thread_id)285 inactive_context_switch_thread(const struct arm_matrix_operations *ops, uint32_t thread_id)
286 {
287 	size_t size = ops->data_size();
288 	uint8_t *d1 = ops->alloc_data();
289 	memset(d1, (char)thread_id, size);
290 
291 	uint8_t *d2 = ops->alloc_data();
292 
293 	test_thread_barrier();
294 
295 	bool ok = true;
296 	for (unsigned int i = 0; i < 100000 && ok; i++) {
297 		ops->start();
298 		ops->load_data(d1);
299 		ops->stop();
300 
301 		/*
302 		 * Rescheduling with the matrix registers inactive may preserve
303 		 * state or may zero it out.
304 		 */
305 		sched_yield();
306 
307 		ops->start();
308 		ops->store_data(d2);
309 		ops->stop();
310 
311 		for (size_t j = 0; j < size; j++) {
312 			if (d1[j] != d2[j] && d2[j] != 0) {
313 				ok = false;
314 			}
315 		}
316 	}
317 
318 	free(d2);
319 	free(d1);
320 	return ok;
321 }
322 
323 static void
test_thread_migration(const struct arm_matrix_operations * ops)324 test_thread_migration(const struct arm_matrix_operations *ops)
325 {
326 	size_t size = ops->data_size();
327 	uint8_t *d = ops->alloc_data();
328 	arc4random_buf(d, size);
329 
330 	uint32_t ncpu = ncpus();
331 	uint8_t *cpu_d[ncpu];
332 	for (uint32_t cpuid = 0; cpuid < ncpu; cpuid++) {
333 		cpu_d[cpuid] = ops->alloc_data();
334 		memset(cpu_d[cpuid], 0, size);
335 	}
336 
337 	ops->start();
338 	ops->load_data(d);
339 	for (uint32_t cpuid = 0; cpuid < ncpu; cpuid++) {
340 		int err = thread_bind_cpu_unchecked(cpuid);
341 		if (err) {
342 			ops->stop();
343 			T_ASSERT_POSIX_ZERO(err, "Bound thread to CPU %u", cpuid);
344 		}
345 		ops->store_data(cpu_d[cpuid]);
346 	}
347 	ops->stop();
348 
349 	for (uint32_t cpuid = 0; cpuid < ncpu; cpuid++) {
350 		int cmp = memcmp(d, cpu_d[cpuid], size);
351 		T_EXPECT_EQ(cmp, 0, "Matrix state migrated to CPU %u", cpuid);
352 		free(cpu_d[cpuid]);
353 	}
354 	free(d);
355 }
356 #endif
357 
358 
359 T_DECL(sme_context_switch,
360     "Test that SME contexts are migrated during context switch and do not leak between process contexts.",
361     T_META_BOOTARGS_SET("enable_skstb=1"),
362     T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm.FEAT_SME2", 1),
363     XNU_T_META_SOC_SPECIFIC, T_META_TAG_VM_NOT_ELIGIBLE)
364 {
365 #ifndef __arm64__
366 	T_SKIP("Running on non-arm64 target, skipping...");
367 #else
368 	if (!sme_operations.is_available()) {
369 		T_SKIP("Running on non-SME target, skipping...");
370 	}
371 
372 	test_thread_migration(&sme_operations);
373 	test_on_each_cpu(active_context_switch_thread, &sme_operations, "SME context migrates when active");
374 	test_on_each_cpu(inactive_context_switch_thread, &sme_operations, "SME context does not leak across processes");
375 #endif
376 }
377 
378