1 /*
2 * Copyright (c) 2020 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <darwintest.h>
30 #include <machine/cpu_capabilities.h>
31 #include <sys/sysctl.h>
32
33 #include "exc_helpers.h"
34
35 T_GLOBAL_META(
36 T_META_NAMESPACE("xnu.arm"),
37 T_META_RADAR_COMPONENT_NAME("xnu"),
38 T_META_RADAR_COMPONENT_VERSION("arm"),
39 T_META_OWNER("sdooher"),
40 T_META_RUN_CONCURRENTLY(true),
41 T_META_TAG("SoCSpecific")
42 );
43
44 static volatile bool cap_usable;
45
46 static size_t
bad_instruction_handler(mach_port_t task __unused,mach_port_t thread __unused,exception_type_t type __unused,mach_exception_data_t codes __unused)47 bad_instruction_handler(mach_port_t task __unused, mach_port_t thread __unused,
48 exception_type_t type __unused, mach_exception_data_t codes __unused)
49 {
50 cap_usable = false;
51 return 4;
52 }
53
54 static void
try_fp16(void)55 try_fp16(void)
56 {
57 asm volatile (
58 "fmov h0, #0" "\n"
59 :
60 :
61 : "v0"
62 );
63 }
64
65 static void
try_atomics(void)66 try_atomics(void)
67 {
68 uint64_t dword;
69 asm volatile (
70 "swp xzr, xzr, [%[dword]]"
71 :
72 : [dword]"r"(&dword)
73 );
74 }
75
76 static void
try_crc32(void)77 try_crc32(void)
78 {
79 asm volatile ( "crc32b wzr, wzr, wzr");
80 }
81
82 static void
try_fhm(void)83 try_fhm(void)
84 {
85 asm volatile (
86 "fmov d0, #0" "\n"
87 "fmlal v0.2s, v0.2h, v0.2h" "\n"
88 :
89 :
90 : "v0"
91 );
92 }
93
94 static void
try_sha512(void)95 try_sha512(void)
96 {
97 asm volatile (
98 "fmov d0, #0" "\n"
99 "fmov d1, #0" "\n"
100 "sha512h q0, q0, v0.2d" "\n"
101 :
102 :
103 : "v0"
104 );
105 }
106
107 static void
try_sha3(void)108 try_sha3(void)
109 {
110 asm volatile (
111 "fmov d0, #0" "\n"
112 "fmov d1, #0" "\n"
113 "eor3 v0.16b, v0.16b, v0.16b, v0.16b" "\n"
114 :
115 :
116 : "v0"
117 );
118 }
119
120 static void
try_sha1(void)121 try_sha1(void)
122 {
123 asm volatile (
124 "fmov s0, #0" "\n"
125 "sha1h s0, s0" "\n"
126 :
127 :
128 : "v0"
129 );
130 }
131
132 static void
try_pmull(void)133 try_pmull(void)
134 {
135 asm volatile (
136 "fmov d0, #0" "\n"
137 "pmull v0.1q, v0.1d, v0.1d" "\n"
138 :
139 :
140 : "v0"
141 );
142 }
143
144 static void
try_aes(void)145 try_aes(void)
146 {
147 asm volatile (
148 "fmov d0, #0" "\n"
149 "fmov d1, #0" "\n"
150 "aesd v0.16B, v0.16B" "\n"
151 :
152 :
153 : "v0"
154 );
155 }
156
157
158 static void
try_sha256(void)159 try_sha256(void)
160 {
161 asm volatile (
162 "fmov d0, #0" "\n"
163 "fmov d1, #0" "\n"
164 "sha256h q0, q0, v0.4s" "\n"
165 :
166 :
167 : "v0"
168 );
169 }
170
171
172 static void
try_compnum(void)173 try_compnum(void)
174 {
175 asm volatile (
176 "fmov d0, #0" "\n"
177 "fcadd v0.2s, v0.2s, v0.2s, #90" "\n"
178 :
179 :
180 : "v0"
181 );
182 }
183
184
185 static void
try_flagm(void)186 try_flagm(void)
187 {
188 asm volatile (
189 "cfinv" "\n"
190 "cfinv" "\n"
191 );
192 }
193
194 static void
try_flagm2(void)195 try_flagm2(void)
196 {
197 asm volatile (
198 "axflag" "\n"
199 "xaflag" "\n"
200 );
201 }
202
203 static void
try_dotprod(void)204 try_dotprod(void)
205 {
206 asm volatile (
207 "udot v0.4S,v1.16B,v2.16B"
208 :
209 :
210 : "v0"
211 );
212 }
213
214 static void
try_rdm(void)215 try_rdm(void)
216 {
217 asm volatile (
218 "sqrdmlah s0, s1, s2"
219 :
220 :
221 : "s0"
222 );
223 }
224
225 static void
try_sb(void)226 try_sb(void)
227 {
228 asm volatile (
229 "sb"
230 );
231 }
232
233 static void
try_frintts(void)234 try_frintts(void)
235 {
236 asm volatile (
237 "frint32x s0, s0"
238 :
239 :
240 : "s0"
241 );
242 }
243
244 static void
try_jscvt(void)245 try_jscvt(void)
246 {
247 asm volatile (
248 "fmov d0, #0" "\n"
249 "fjcvtzs w1, d0" "\n"
250 :
251 :
252 : "w1", "d0"
253 );
254 }
255
256 static void
try_pauth(void)257 try_pauth(void)
258 {
259 asm volatile (
260 "pacga x0, x0, x0"
261 :
262 :
263 : "x0"
264 );
265 }
266
267 static void
try_dpb(void)268 try_dpb(void)
269 {
270 int x;
271 asm volatile (
272 "dc cvap, %0"
273 :
274 : "r" (&x)
275 );
276 }
277
278 static void
try_dpb2(void)279 try_dpb2(void)
280 {
281 int x;
282 asm volatile (
283 "dc cvadp, %0"
284 :
285 : "r" (&x)
286 );
287 }
288
289 static void
try_lrcpc(void)290 try_lrcpc(void)
291 {
292 int x;
293 asm volatile (
294 "ldaprb w0, [%0]"
295 :
296 : "r" (&x)
297 : "w0"
298 );
299 }
300
301 static void
try_lrcpc2(void)302 try_lrcpc2(void)
303 {
304 int x;
305 asm volatile (
306 "ldapurb w0, [%0]"
307 :
308 : "r" (&x)
309 : "w0"
310 );
311 }
312
313
314 static void
try_specres(void)315 try_specres(void)
316 {
317 int x;
318 asm volatile (
319 "cfp rctx, %0"
320 :
321 : "r" (&x)
322 );
323 }
324
325 static void
try_bf16(void)326 try_bf16(void)
327 {
328 asm volatile (
329 "bfdot v0.4S,v1.8H,v2.8H"
330 :
331 :
332 : "v0"
333 );
334 }
335
336 static void
try_i8mm(void)337 try_i8mm(void)
338 {
339 asm volatile (
340 "sudot v0.4S,v1.16B,v2.4B[0]"
341 :
342 :
343 : "v0"
344 );
345 }
346
347 static void
try_ecv(void)348 try_ecv(void)
349 {
350 /*
351 * These registers are present only when FEAT_ECV is implemented.
352 * Otherwise, direct accesses to CNTPCTSS_EL0 or CNTVCTSS_EL0 are UNDEFINED.
353 */
354 (void)__builtin_arm_rsr64("CNTPCTSS_EL0");
355 (void)__builtin_arm_rsr64("CNTVCTSS_EL0");
356 }
357
358 static void
try_afp(void)359 try_afp(void)
360 {
361 /*
362 * FEAT_AFP can be detected via three new FPCR bits which were
363 * previously marked read-as-zero.
364 */
365 const uint64_t FPCR_AFP_FLAGS = (1 << 0) | (1 << 1) | (1 << 2);
366
367 uint64_t old_fpcr = __builtin_arm_rsr64("FPCR");
368 __builtin_arm_wsr64("FPCR", old_fpcr | FPCR_AFP_FLAGS);
369 uint64_t new_fpcr = __builtin_arm_rsr64("FPCR");
370 __builtin_arm_wsr64("FPCR", old_fpcr);
371
372 if ((new_fpcr & FPCR_AFP_FLAGS) != FPCR_AFP_FLAGS) {
373 cap_usable = false;
374 }
375 }
376
377 static void
try_rpres(void)378 try_rpres(void)
379 {
380 /*
381 * When FEAT_RPRES is enabled via FPCR.AH, floating-point reciprocal
382 * estimate instructions increase precision from 8 mantissa bits to 12
383 * mantissa bits. This can be detected by estimating 1/10.0 (which has
384 * no exact floating-point representation) and checking bits 11-14.
385 */
386 const uint64_t FPCR_AH = (1 << 1);
387 const uint32_t EXTRA_MANTISSA_BITS = (0xf << 11);
388
389 uint32_t recip;
390 uint64_t old_fpcr = __builtin_arm_rsr64("FPCR");
391 __builtin_arm_wsr64("FPCR", old_fpcr | FPCR_AH);
392 asm volatile (
393 "fmov s0, #10.0" "\n"
394 "frecpe s0, s0" "\n"
395 "fmov %w0, s0" "\n"
396 : "=r"(recip)
397 :
398 : "s0"
399 );
400 __builtin_arm_wsr64("FPCR", old_fpcr);
401
402 if ((recip & EXTRA_MANTISSA_BITS) == 0) {
403 cap_usable = false;
404 }
405 }
406
407 __attribute__((target("wfxt")))
408 static void
try_wfxt(void)409 try_wfxt(void)
410 {
411 asm volatile ("wfet xzr");
412 }
413
414 static void
try_sme(void)415 try_sme(void)
416 {
417 asm volatile (
418 "rdsvl x0, #1"
419 :
420 :
421 : "x0"
422 );
423 }
424
425 static void
try_sme2(void)426 try_sme2(void)
427 {
428 asm volatile (
429 "smstart za" "\n"
430 "zero { zt0 }" "\n"
431 "smstop za" "\n"
432 );
433 }
434
435 static void
try_sme_f32f32(void)436 try_sme_f32f32(void)
437 {
438 asm volatile (
439 "smstart" "\n"
440 "fmopa za0.s, p0/m, p0/m, z0.s, z0.s" "\n"
441 "smstop" "\n"
442 );
443 }
444
445 static void
try_sme_bi32i32(void)446 try_sme_bi32i32(void)
447 {
448 asm volatile (
449 "smstart" "\n"
450 "bmopa za0.s, p0/m, p0/m, z0.s, z0.s" "\n"
451 "smstop" "\n"
452 );
453 }
454
455 static void
try_sme_b16f32(void)456 try_sme_b16f32(void)
457 {
458 asm volatile (
459 "smstart" "\n"
460 "bfmopa za0.s, p0/m, p0/m, z0.h, z0.h" "\n"
461 "smstop" "\n"
462 );
463 }
464
465 static void
try_sme_f16f32(void)466 try_sme_f16f32(void)
467 {
468 asm volatile (
469 "smstart" "\n"
470 "fmopa za0.s, p0/m, p0/m, z0.h, z0.h" "\n"
471 "smstop" "\n"
472 );
473 }
474
475 static void
try_sme_i8i32(void)476 try_sme_i8i32(void)
477 {
478 asm volatile (
479 "smstart" "\n"
480 "smopa za0.s, p0/m, p0/m, z0.b, z0.b" "\n"
481 "smstop" "\n"
482 );
483 }
484
485 static void
try_sme_i16i32(void)486 try_sme_i16i32(void)
487 {
488 asm volatile (
489 "smstart" "\n"
490 "smopa za0.s, p0/m, p0/m, z0.h, z0.h" "\n"
491 "smstop" "\n"
492 );
493 }
494
495 __attribute__((target("sme-f64f64")))
496 static void
try_sme_f64f64(void)497 try_sme_f64f64(void)
498 {
499 asm volatile (
500 "smstart" "\n"
501 "fmopa za0.d, p0/m, p0/m, z0.d, z0.d" "\n"
502 "smstop" "\n"
503 );
504 }
505
506 __attribute__((target("sme-i16i64")))
507 static void
try_sme_i16i64(void)508 try_sme_i16i64(void)
509 {
510 asm volatile (
511 "smstart" "\n"
512 "smopa za0.d, p0/m, p0/m, z0.h, z0.h" "\n"
513 "smstop" "\n"
514 );
515 }
516
517 static void
try_fpexcp(void)518 try_fpexcp(void)
519 {
520 /* FP Exceptions are supported if all exceptions bit can be set. */
521 const uint64_t flags = (1 << 8) | (1 << 9) | (1 << 10) | (1 << 11) | (1 << 12) | (1 << 15);
522
523 uint64_t old_fpcr = __builtin_arm_rsr64("FPCR");
524 __builtin_arm_wsr64("FPCR", old_fpcr | flags);
525 uint64_t new_fpcr = __builtin_arm_rsr64("FPCR");
526 __builtin_arm_wsr64("FPCR", old_fpcr);
527
528 if ((new_fpcr & flags) != flags) {
529 cap_usable = false;
530 }
531 }
532
533 static void
try_dit(void)534 try_dit(void)
535 {
536 asm volatile (
537 "msr DIT, x0"
538 :
539 :
540 : "x0"
541 );
542 }
543
544 static mach_port_t exc_port;
545
546 static void
test_cpu_capability(const char * cap_name,uint64_t cap_flag,bool has_commpage_entry,const char * cap_sysctl,void (* try_cpu_capability)(void))547 test_cpu_capability(const char *cap_name, uint64_t cap_flag, bool has_commpage_entry, const char *cap_sysctl, void (*try_cpu_capability)(void))
548 {
549 uint64_t caps = _get_cpu_capabilities();
550 bool has_cap_flag = (caps & cap_flag);
551
552 int sysctl_val;
553 bool has_sysctl_flag = 0;
554 if (cap_sysctl != NULL) {
555 size_t sysctl_size = sizeof(sysctl_val);
556 int err = sysctlbyname(cap_sysctl, &sysctl_val, &sysctl_size, NULL, 0);
557 has_sysctl_flag = (err == 0 && sysctl_val > 0);
558 }
559
560 bool has_capability = has_commpage_entry ? has_cap_flag : has_sysctl_flag;
561
562 if (!has_commpage_entry && cap_sysctl == NULL) {
563 T_FAIL("Tested capability must have either sysctl or commpage flag");
564 return;
565 }
566
567 if (has_commpage_entry && cap_sysctl != NULL) {
568 T_EXPECT_EQ(has_cap_flag, has_sysctl_flag, "%s commpage flag matches sysctl flag", cap_name);
569 }
570
571 if (try_cpu_capability != NULL) {
572 cap_usable = true;
573 try_cpu_capability();
574 T_EXPECT_EQ(has_capability, cap_usable, "%s capability matches actual usability", cap_name);
575 }
576 }
577
578 T_DECL(cpu_capabilities, "Verify ARM CPU capabilities", T_META_TAG_VM_NOT_ELIGIBLE) {
579 exc_port = create_exception_port(EXC_MASK_BAD_INSTRUCTION);
580 repeat_exception_handler(exc_port, bad_instruction_handler);
581
582 test_cpu_capability("FP16 (deprecated sysctl)", kHasFeatFP16, true, "hw.optional.neon_fp16", NULL);
583 test_cpu_capability("FP16", kHasFeatFP16, true, "hw.optional.arm.FEAT_FP16", try_fp16);
584 test_cpu_capability("LSE (deprecated sysctl)", kHasFeatLSE, true, "hw.optional.armv8_1_atomics", NULL);
585 test_cpu_capability("LSE", kHasFeatLSE, true, "hw.optional.arm.FEAT_LSE", try_atomics);
586 test_cpu_capability("CRC32", kHasARMv8Crc32, true, "hw.optional.armv8_crc32", try_crc32);
587 test_cpu_capability("FHM (deprecated sysctl)", kHasFeatFHM, true, "hw.optional.armv8_2_fhm", NULL);
588 test_cpu_capability("FHM", kHasFeatFHM, true, "hw.optional.arm.FEAT_FHM", try_fhm);
589 test_cpu_capability("SHA512", kHasFeatSHA512, true, "hw.optional.armv8_2_sha512", try_sha512);
590 test_cpu_capability("SHA3", kHasFeatSHA3, true, "hw.optional.armv8_2_sha3", try_sha3);
591 test_cpu_capability("AES", kHasFeatAES, true, "hw.optional.arm.FEAT_AES", try_aes);
592 test_cpu_capability("SHA1", kHasFeatSHA1, true, "hw.optional.arm.FEAT_SHA1", try_sha1);
593 test_cpu_capability("SHA256", kHasFeatSHA256, true, "hw.optional.arm.FEAT_SHA256", try_sha256);
594 test_cpu_capability("PMULL", kHasFeatPMULL, true, "hw.optional.arm.FEAT_PMULL", try_pmull);
595 test_cpu_capability("FCMA (deprecated sysctl)", kHasFeatFCMA, true, "hw.optional.armv8_3_compnum", NULL);
596 test_cpu_capability("FCMA", kHasFeatFCMA, true, "hw.optional.arm.FEAT_FCMA", try_compnum);
597 test_cpu_capability("FlagM", kHasFEATFlagM, true, "hw.optional.arm.FEAT_FlagM", try_flagm);
598 test_cpu_capability("FlagM2", kHasFEATFlagM2, true, "hw.optional.arm.FEAT_FlagM2", try_flagm2);
599 test_cpu_capability("DotProd", kHasFeatDotProd, true, "hw.optional.arm.FEAT_DotProd", try_dotprod);
600 test_cpu_capability("RDM", kHasFeatRDM, true, "hw.optional.arm.FEAT_RDM", try_rdm);
601 test_cpu_capability("SB", kHasFeatSB, true, "hw.optional.arm.FEAT_SB", try_sb);
602 test_cpu_capability("FRINTTS", kHasFeatFRINTTS, true, "hw.optional.arm.FEAT_FRINTTS", try_frintts);
603 test_cpu_capability("JSCVT", kHasFeatJSCVT, true, "hw.optional.arm.FEAT_JSCVT", try_jscvt);
604 test_cpu_capability("PAuth", kHasFeatPAuth, true, "hw.optional.arm.FEAT_PAuth", try_pauth);
605 test_cpu_capability("DBP", kHasFeatDPB, true, "hw.optional.arm.FEAT_DPB", try_dpb);
606 test_cpu_capability("DBP2", kHasFeatDPB2, true, "hw.optional.arm.FEAT_DPB2", try_dpb2);
607 test_cpu_capability("SPECRES", kHasFeatSPECRES, true, "hw.optional.arm.FEAT_SPECRES", try_specres);
608 test_cpu_capability("LRCPC", kHasFeatLRCPC, true, "hw.optional.arm.FEAT_LRCPC", try_lrcpc);
609 test_cpu_capability("LRCPC2", kHasFeatLRCPC2, true, "hw.optional.arm.FEAT_LRCPC2", try_lrcpc2);
610 test_cpu_capability("AFP", kHasFeatAFP, true, "hw.optional.arm.FEAT_AFP", try_afp);
611 test_cpu_capability("DIT", kHasFeatDIT, true, "hw.optional.arm.FEAT_DIT", try_dit);
612 test_cpu_capability("FP16", kHasFP_SyncExceptions, true, "hw.optional.arm.FP_SyncExceptions", try_fpexcp);
613 test_cpu_capability("SME", kHasFeatSME, true, "hw.optional.arm.FEAT_SME", try_sme);
614 test_cpu_capability("SME2", kHasFeatSME2, true, "hw.optional.arm.FEAT_SME2", try_sme2);
615
616 // The following features do not have a commpage entry
617 test_cpu_capability("BF16", 0, false, "hw.optional.arm.FEAT_BF16", try_bf16);
618 test_cpu_capability("I8MM", 0, false, "hw.optional.arm.FEAT_I8MM", try_i8mm);
619 test_cpu_capability("ECV", 0, false, "hw.optional.arm.FEAT_ECV", try_ecv);
620 test_cpu_capability("RPRES", 0, false, "hw.optional.arm.FEAT_RPRES", try_rpres);
621 test_cpu_capability("WFxT", 0, false, "hw.optional.arm.FEAT_WFxT", try_wfxt);
622 test_cpu_capability("SME_F32F32", 0, false, "hw.optional.arm.SME_F32F32", try_sme_f32f32);
623 test_cpu_capability("SME_BI32I32", 0, false, "hw.optional.arm.SME_BI32I32", try_sme_bi32i32);
624 test_cpu_capability("SME_B16F32", 0, false, "hw.optional.arm.SME_B16F32", try_sme_b16f32);
625 test_cpu_capability("SME_F16F32", 0, false, "hw.optional.arm.SME_F16F32", try_sme_f16f32);
626 test_cpu_capability("SME_I8I32", 0, false, "hw.optional.arm.SME_I8I32", try_sme_i8i32);
627 test_cpu_capability("SME_I16I32", 0, false, "hw.optional.arm.SME_I16I32", try_sme_i16i32);
628 test_cpu_capability("SME_F64F64", 0, false, "hw.optional.arm.FEAT_SME_F64F64", try_sme_f64f64);
629 test_cpu_capability("SME_I16I64", 0, false, "hw.optional.arm.FEAT_SME_I16I64", try_sme_i16i64);
630
631 // The following features do not add instructions or registers to test for the presence of
632 test_cpu_capability("LSE2", kHasFeatLSE2, true, "hw.optional.arm.FEAT_LSE2", NULL);
633 test_cpu_capability("CSV2", kHasFeatCSV2, true, "hw.optional.arm.FEAT_CSV2", NULL);
634 test_cpu_capability("CSV3", kHasFeatCSV3, true, "hw.optional.arm.FEAT_CSV3", NULL);
635 }
636