1 /*
2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <arm64/machine_machdep.h>
30 #include <arm64/proc_reg.h>
31 #include <arm/machine_cpu.h>
32 #include <arm/cpu_internal.h>
33 #include <arm/cpuid.h>
34 #include <arm/cpu_data.h>
35 #include <arm/cpu_data_internal.h>
36 #include <arm/caches_internal.h>
37 #include <arm/misc_protos.h>
38 #include <arm/machdep_call.h>
39 #include <arm/machine_routines.h>
40 #include <arm/rtclock.h>
41 #include <arm/cpuid_internal.h>
42 #include <arm/cpu_capabilities.h>
43 #include <console/serial_protos.h>
44 #include <kern/machine.h>
45 #include <kern/misc_protos.h>
46 #include <prng/random.h>
47 #include <kern/startup.h>
48 #include <kern/thread.h>
49 #include <kern/timer_queue.h>
50 #include <mach/machine.h>
51 #include <machine/atomic.h>
52 #include <machine/config.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_shared_region_xnu.h>
56 #include <vm/vm_map_xnu.h>
57 #include <vm/vm_kern_xnu.h>
58 #include <sys/codesign.h>
59 #include <sys/kdebug.h>
60 #include <kern/coalition.h>
61 #include <pexpert/device_tree.h>
62 #include <pexpert/arm64/board_config.h>
63
64 #include <IOKit/IOPlatformExpert.h>
65 #if HIBERNATION
66 #include <IOKit/IOHibernatePrivate.h>
67 #endif /* HIBERNATION */
68
69 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
70 #include <arm64/amcc_rorgn.h>
71 #endif
72
73
74 #if CONFIG_SPTM
75 #include <arm64/sptm/sptm.h>
76 #endif /* CONFIG_SPTM */
77
78 #include <libkern/section_keywords.h>
79
80 /**
81 * On supported hardware, debuggable builds make the HID bits read-only
82 * without locking them. This lets people manually modify HID bits while
83 * debugging, since they can use a debugging tool to first reset the HID
84 * bits back to read/write. However it will still catch xnu changes that
85 * accidentally write to HID bits after they've been made read-only.
86 */
87 SECURITY_READ_ONLY_LATE(bool) skip_spr_lockdown_glb = 0;
88
89 /*
90 * On some SoCs, PIO lockdown is applied in assembly in early boot by
91 * secondary CPUs.
92 * Since the cluster_pio_ro_ctl value is dynamic, it is stored here by the
93 * primary CPU so that it doesn't have to be computed each time by the
94 * startup code.
95 */
96 SECURITY_READ_ONLY_LATE(uint64_t) cluster_pio_ro_ctl_mask_glb = 0;
97
98 #if CONFIG_CPU_COUNTERS
99 #include <kern/kpc.h>
100 #endif /* CONFIG_CPU_COUNTERS */
101
102 #define MPIDR_CPU_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF0_MASK) >> MPIDR_AFF0_SHIFT)
103 #define MPIDR_CLUSTER_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT)
104
105 #if HAS_CLUSTER
106 static uint8_t cluster_initialized = 0;
107 #endif
108
109 MACHINE_TIMEOUT_DEV_WRITEABLE(LockTimeOut, "lock", 6e6 /* 0.25s */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
110 machine_timeout_t LockTimeOutUsec; // computed in ml_init_lock_timeout
111
112 MACHINE_TIMEOUT_DEV_WRITEABLE(TLockTimeOut, "ticket-lock", 3e6 /* 0.125s */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
113
114 MACHINE_TIMEOUT_DEV_WRITEABLE(MutexSpin, "mutex-spin", 240 /* 10us */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
115
116 uint64_t low_MutexSpin;
117 int64_t high_MutexSpin;
118
119
120
121 static uint64_t ml_wfe_hint_max_interval;
122 #define MAX_WFE_HINT_INTERVAL_US (500ULL)
123
124 /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */
125 TUNABLE(uint32_t, yield_delay_us, "yield_delay_us", 0);
126
127 extern vm_offset_t segLOWEST;
128 extern vm_offset_t segLOWESTTEXT;
129 extern vm_offset_t segLASTB;
130 extern unsigned long segSizeLAST;
131
132 /* ARM64 specific bounds; used to test for presence in the kernelcache. */
133 extern vm_offset_t vm_kernelcache_base;
134 extern vm_offset_t vm_kernelcache_top;
135
136 /* Location of the physmap / physical aperture */
137 extern uint64_t physmap_base;
138
139 #if defined(CONFIG_SPTM)
140 extern const arm_physrange_t *arm_vm_kernelcache_ranges;
141 extern int arm_vm_kernelcache_numranges;
142 #else /* defined(CONFIG_SPTM) */
143 extern vm_offset_t arm_vm_kernelcache_phys_start;
144 extern vm_offset_t arm_vm_kernelcache_phys_end;
145 #endif /* defined(CONFIG_SPTM) */
146
147 #if defined(HAS_IPI)
148 unsigned int gFastIPI = 1;
149 #define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */
150 static TUNABLE_WRITEABLE(uint64_t, deferred_ipi_timer_ns, "fastipitimeout",
151 kDeferredIPITimerDefault);
152 #endif /* defined(HAS_IPI) */
153
154 thread_t Idle_context(void);
155
156 SECURITY_READ_ONLY_LATE(bool) cpu_config_correct = true;
157
158 SECURITY_READ_ONLY_LATE(static ml_topology_cpu_t) topology_cpu_array[MAX_CPUS];
159 SECURITY_READ_ONLY_LATE(static ml_topology_cluster_t) topology_cluster_array[MAX_CPU_CLUSTERS];
160 SECURITY_READ_ONLY_LATE(static ml_topology_info_t) topology_info = {
161 .version = CPU_TOPOLOGY_VERSION,
162 .cpus = topology_cpu_array,
163 .clusters = topology_cluster_array,
164 };
165
166 _Atomic unsigned int cluster_type_num_active_cpus[MAX_CPU_TYPES];
167
168 /**
169 * Represents the offset of each cluster within a hypothetical array of MAX_CPUS
170 * entries of an arbitrary data type. This is intended for use by specialized consumers
171 * that must quickly access per-CPU data using only the physical CPU ID (MPIDR_EL1),
172 * as follows:
173 * hypothetical_array[cluster_offsets[AFF1] + AFF0]
174 * Most consumers should instead use general-purpose facilities such as PERCPU or
175 * ml_get_cpu_number().
176 */
177 SECURITY_READ_ONLY_LATE(int64_t) cluster_offsets[MAX_CPU_CLUSTER_PHY_ID + 1];
178
179 SECURITY_READ_ONLY_LATE(static uint32_t) arm64_eventi = UINT32_MAX;
180
181 extern uint32_t lockdown_done;
182
183 /**
184 * Represents regions of virtual address space that should be reserved
185 * (pre-mapped) in each user address space.
186 */
187 static const struct vm_reserved_region vm_reserved_regions[] = {
188 {
189 .vmrr_name = "GPU Carveout",
190 .vmrr_addr = MACH_VM_MIN_GPU_CARVEOUT_ADDRESS,
191 .vmrr_size = (vm_map_size_t)(MACH_VM_MAX_GPU_CARVEOUT_ADDRESS - MACH_VM_MIN_GPU_CARVEOUT_ADDRESS)
192 },
193 /*
194 * Reserve the virtual memory space representing the commpage nesting region
195 * to prevent user processes from allocating memory within it. The actual
196 * page table entries for the commpage are inserted by vm_commpage_enter().
197 * This vm_map_enter() just prevents userspace from allocating/deallocating
198 * anything within the entire commpage nested region.
199 */
200 {
201 .vmrr_name = "commpage nesting",
202 .vmrr_addr = _COMM_PAGE64_NESTING_START,
203 .vmrr_size = _COMM_PAGE64_NESTING_SIZE
204 }
205 };
206
207 uint32_t get_arm_cpu_version(void);
208
209
210 #if defined(HAS_IPI)
211 static inline void
ml_cpu_signal_type(unsigned int cpu_mpidr,uint32_t type)212 ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type)
213 {
214 #if HAS_CLUSTER
215 uint64_t local_mpidr;
216 /* NOTE: this logic expects that we are called in a non-preemptible
217 * context, or at least one in which the calling thread is bound
218 * to a single CPU. Otherwise we may migrate between choosing which
219 * IPI mechanism to use and issuing the IPI. */
220 MRS(local_mpidr, "MPIDR_EL1");
221 if (MPIDR_CLUSTER_ID(local_mpidr) == MPIDR_CLUSTER_ID(cpu_mpidr)) {
222 uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
223 MSR("S3_5_C15_C0_0", x);
224 } else {
225 #define IPI_RR_TARGET_CLUSTER_SHIFT 16
226 uint64_t x = type | (MPIDR_CLUSTER_ID(cpu_mpidr) << IPI_RR_TARGET_CLUSTER_SHIFT) | MPIDR_CPU_ID(cpu_mpidr);
227 MSR("S3_5_C15_C0_1", x);
228 }
229 #else
230 uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr);
231 MSR("S3_5_C15_C0_1", x);
232 #endif
233 /* The recommended local/global IPI sequence is:
234 * DSB <sys> (This ensures visibility of e.g. older stores to the
235 * pending CPU signals bit vector in DRAM prior to IPI reception,
236 * and is present in cpu_signal_internal())
237 * MSR S3_5_C15_C0_1, Xt
238 * ISB
239 */
240 __builtin_arm_isb(ISB_SY);
241 }
242 #endif
243
244 #if !defined(HAS_IPI)
245 __dead2
246 #endif
247 void
ml_cpu_signal(unsigned int cpu_mpidr __unused)248 ml_cpu_signal(unsigned int cpu_mpidr __unused)
249 {
250 #if defined(HAS_IPI)
251 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_IMMEDIATE);
252 #else
253 panic("Platform does not support ACC Fast IPI");
254 #endif
255 }
256
257 #if !defined(HAS_IPI)
258 __dead2
259 #endif
260 void
ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)261 ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs)
262 {
263 #if defined(HAS_IPI)
264 /* adjust IPI_CR timer countdown value for deferred IPI
265 * accepts input in nanosecs, convert to absolutetime (REFCLK ticks),
266 * clamp maximum REFCLK ticks to 0xFFFF (16 bit field)
267 *
268 * global register, should only require a single write to update all
269 * CPU cores: from Skye ACC user spec section 5.7.3.3
270 *
271 * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK.
272 * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies.
273 */
274 uint64_t abstime;
275
276 nanoseconds_to_absolutetime(nanosecs, &abstime);
277
278 abstime = MIN(abstime, 0xFFFF);
279
280 /* update deferred_ipi_timer_ns with the new clamped value */
281 absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns);
282
283 MSR("S3_5_C15_C3_1", abstime);
284 #else
285 (void)nanosecs;
286 panic("Platform does not support ACC Fast IPI");
287 #endif
288 }
289
290 uint64_t
ml_cpu_signal_deferred_get_timer()291 ml_cpu_signal_deferred_get_timer()
292 {
293 #if defined(HAS_IPI)
294 return deferred_ipi_timer_ns;
295 #else
296 return 0;
297 #endif
298 }
299
300 #if !defined(HAS_IPI)
301 __dead2
302 #endif
303 void
ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)304 ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused)
305 {
306 #if defined(HAS_IPI)
307 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_DEFERRED);
308 #else
309 panic("Platform does not support ACC Fast IPI deferral");
310 #endif
311 }
312
313 #if !defined(HAS_IPI)
314 __dead2
315 #endif
316 void
ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)317 ml_cpu_signal_retract(unsigned int cpu_mpidr __unused)
318 {
319 #if defined(HAS_IPI)
320 ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_RETRACT);
321 #else
322 panic("Platform does not support ACC Fast IPI retraction");
323 #endif
324 }
325
326 extern uint32_t idle_proximate_io_wfe_unmasked;
327
328 #define CPUPM_IDLE_WFE 0x5310300
329 static bool
wfe_process_recommendation(void)330 wfe_process_recommendation(void)
331 {
332 bool ipending = false;
333 if (__probable(idle_proximate_io_wfe_unmasked == 1)) {
334 /* Check for an active perf. controller generated
335 * WFE recommendation for this cluster.
336 */
337 cpu_data_t *cdp = getCpuDatap();
338 uint32_t cid = cdp->cpu_cluster_id;
339 uint64_t wfe_ttd = 0;
340 uint64_t wfe_deadline = 0;
341
342 if ((wfe_ttd = ml_cluster_wfe_timeout(cid)) != 0) {
343 wfe_deadline = mach_absolute_time() + wfe_ttd;
344 }
345
346 if (wfe_deadline != 0) {
347 /* Poll issuing event-bounded WFEs until an interrupt
348 * arrives or the WFE recommendation expires
349 */
350 #if DEVELOPMENT || DEBUG
351 uint64_t wc = cdp->wfe_count;
352 KDBG(CPUPM_IDLE_WFE | DBG_FUNC_START, ipending, wc, wfe_ttd, cdp->cpu_stat.irq_ex_cnt_wake);
353 #endif
354 /* Issue WFE until the recommendation expires,
355 * with IRQs unmasked.
356 */
357 ipending = wfe_to_deadline_or_interrupt(cid, wfe_deadline, cdp, true, true);
358 #if DEVELOPMENT || DEBUG
359 KDBG(CPUPM_IDLE_WFE | DBG_FUNC_END, ipending, cdp->wfe_count - wc, wfe_deadline, cdp->cpu_stat.irq_ex_cnt_wake);
360 #endif
361 }
362 }
363 return ipending;
364 }
365
366 void
machine_idle(void)367 machine_idle(void)
368 {
369 /* Interrupts are expected to be masked on entry or re-entry via
370 * Idle_load_context()
371 */
372 assert((__builtin_arm_rsr("DAIF") & DAIF_STANDARD_DISABLE) == DAIF_STANDARD_DISABLE);
373 /* Check for, and act on, a WFE recommendation.
374 * Bypasses context spill/fill for a minor perf. increment.
375 * May unmask and restore IRQ+FIQ mask.
376 */
377 if (wfe_process_recommendation() == false) {
378 /* If WFE recommendation absent, or WFE deadline
379 * arrived with no interrupt pending/processed,
380 * fall back to WFI.
381 */
382 Idle_context();
383 }
384 __builtin_arm_wsr("DAIFClr", DAIFSC_STANDARD_DISABLE);
385 }
386
387 void
OSSynchronizeIO(void)388 OSSynchronizeIO(void)
389 {
390 __builtin_arm_dsb(DSB_SY);
391 }
392
393 uint64_t
get_aux_control(void)394 get_aux_control(void)
395 {
396 uint64_t value;
397
398 MRS(value, "ACTLR_EL1");
399 return value;
400 }
401
402 uint64_t
get_mmu_control(void)403 get_mmu_control(void)
404 {
405 uint64_t value;
406
407 MRS(value, "SCTLR_EL1");
408 return value;
409 }
410
411 uint64_t
get_tcr(void)412 get_tcr(void)
413 {
414 uint64_t value;
415
416 MRS(value, "TCR_EL1");
417 return value;
418 }
419
420 boolean_t
ml_get_interrupts_enabled(void)421 ml_get_interrupts_enabled(void)
422 {
423 uint64_t value;
424
425 MRS(value, "DAIF");
426 if ((value & DAIF_STANDARD_DISABLE) == DAIF_STANDARD_DISABLE) {
427 return FALSE;
428 }
429 return TRUE;
430 }
431
432 pmap_paddr_t
get_mmu_ttb(void)433 get_mmu_ttb(void)
434 {
435 pmap_paddr_t value;
436
437 MRS(value, "TTBR0_EL1");
438 return value;
439 }
440
441 uint32_t
get_arm_cpu_version(void)442 get_arm_cpu_version(void)
443 {
444 uint32_t value = machine_read_midr();
445
446 /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */
447 return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4));
448 }
449
450 bool
ml_feature_supported(uint64_t feature_bit)451 ml_feature_supported(uint64_t feature_bit)
452 {
453 uint64_t aidr_el1_value = 0;
454
455 MRS(aidr_el1_value, "AIDR_EL1");
456
457 #ifdef APPLEAVALANCHE
458 #endif // APPLEAVALANCHE
459
460 return aidr_el1_value & feature_bit;
461 }
462
463 /*
464 * user_cont_hwclock_allowed()
465 *
466 * Indicates whether we allow EL0 to read the virtual timebase (CNTVCT_EL0)
467 * as a continuous time source (e.g. from mach_continuous_time)
468 */
469 boolean_t
user_cont_hwclock_allowed(void)470 user_cont_hwclock_allowed(void)
471 {
472 #if HAS_CONTINUOUS_HWCLOCK
473 return TRUE;
474 #else
475 return FALSE;
476 #endif
477 }
478
479 /*
480 * user_timebase_type()
481 *
482 * Indicates type of EL0 virtual timebase read (CNTVCT_EL0).
483 *
484 * USER_TIMEBASE_NONE: EL0 has no access to timebase register
485 * USER_TIMEBASE_SPEC: EL0 has access to speculative timebase reads (CNTVCT_EL0)
486 * USER_TIMEBASE_NOSPEC: EL0 has access to non speculative timebase reads (CNTVCTSS_EL0)
487 *
488 */
489
490 uint8_t
user_timebase_type(void)491 user_timebase_type(void)
492 {
493 #if HAS_ACNTVCT
494 return USER_TIMEBASE_NOSPEC_APPLE;
495 #elif HAS_APPLE_GENERIC_TIMER
496 // Conveniently, AGTCNTVCTSS_EL0 and ACNTVCT_EL0 have identical encodings
497 return USER_TIMEBASE_NOSPEC_APPLE;
498 #elif __ARM_ARCH_8_6__
499 return USER_TIMEBASE_NOSPEC;
500 #else
501 return USER_TIMEBASE_SPEC;
502 #endif
503 }
504
505 void
machine_startup(__unused boot_args * args)506 machine_startup(__unused boot_args * args)
507 {
508 #if defined(HAS_IPI) && (DEVELOPMENT || DEBUG)
509 if (!PE_parse_boot_argn("fastipi", &gFastIPI, sizeof(gFastIPI))) {
510 gFastIPI = 1;
511 }
512 #endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/
513
514
515 machine_conf();
516
517
518 /*
519 * Kick off the kernel bootstrap.
520 */
521 kernel_bootstrap();
522 /* NOTREACHED */
523 }
524
525 typedef void (*invalidate_fn_t)(void);
526
527 static SECURITY_READ_ONLY_LATE(invalidate_fn_t) invalidate_hmac_function = NULL;
528
529 void set_invalidate_hmac_function(invalidate_fn_t fn);
530
531 void
set_invalidate_hmac_function(invalidate_fn_t fn)532 set_invalidate_hmac_function(invalidate_fn_t fn)
533 {
534 if (NULL != invalidate_hmac_function) {
535 panic("Invalidate HMAC function already set");
536 }
537
538 invalidate_hmac_function = fn;
539 }
540
541 bool
ml_is_secure_hib_supported(void)542 ml_is_secure_hib_supported(void)
543 {
544 return false;
545 }
546
547 void
machine_lockdown(void)548 machine_lockdown(void)
549 {
550
551 #if CONFIG_SPTM
552
553 /**
554 * On devices that make use of the SPTM, the SPTM is responsible for
555 * managing system register locks. Due to this, we skip the call to
556 * spr_lockdown() below.
557 */
558 #else
559 #endif
560
561 arm_vm_prot_finalize(PE_state.bootArgs);
562
563 #if CONFIG_KERNEL_INTEGRITY
564 #if KERNEL_INTEGRITY_WT
565 /* Watchtower
566 *
567 * Notify the monitor about the completion of early kernel bootstrap.
568 * From this point forward it will enforce the integrity of kernel text,
569 * rodata and page tables.
570 */
571
572 #ifdef MONITOR
573 monitor_call(MONITOR_LOCKDOWN, 0, 0, 0);
574 #endif
575 #endif /* KERNEL_INTEGRITY_WT */
576
577 #if CONFIG_SPTM
578 extern void pmap_prepare_commpages(void);
579 pmap_prepare_commpages();
580
581 /**
582 * sptm_lockdown_xnu() disables preemption like all SPTM calls, but may take
583 * a fair amount of time as it involves retyping a large number of pages.
584 * This preemption latency is not really a concern since we're still fairly
585 * early in the boot process, so just explicitly disable preemption before
586 * invoking the SPTM and abandon preemption latency measurements before
587 * re-enabling it.
588 */
589 disable_preemption();
590 /* Signal the SPTM that XNU is ready for RO memory to actually become read-only */
591 sptm_lockdown_xnu();
592 #if SCHED_HYGIENE_DEBUG
593 abandon_preemption_disable_measurement();
594 #endif /* SCHED_HYGIENE_DEBUG */
595 enable_preemption();
596 #else
597 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
598 /* KTRR
599 *
600 * Lock physical KTRR region. KTRR region is read-only. Memory outside
601 * the region is not executable at EL1.
602 */
603
604 rorgn_lockdown();
605 #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
606 #endif /* CONFIG_SPTM */
607
608 #if XNU_MONITOR
609 pmap_lockdown_ppl();
610 #endif
611
612 #endif /* CONFIG_KERNEL_INTEGRITY */
613
614
615 /**
616 * For platforms that use SEP-backed hibernation, invoke kext-provided
617 * functionality to invalidate HMAC key in SIO used to sign a variety of
618 * data (e.g., the RO region).
619 *
620 * Just for paranoia's sake, let's make it so that if an attacker is
621 * capable of corrupting EDT early that they have to do so in a way that
622 * prevents invaldidate_hmac_function from running properly yet still
623 * makes it so that the invalidate HMAC function receives an OK
624 * response, which seems hard.
625 *
626 * This only makes sense for PPL-based systems seeing as SPTM-based systems
627 * will have iBoot invalidate Key1 for us.
628 */
629 if (NULL != invalidate_hmac_function) {
630 #if !defined(CONFIG_SPTM)
631 invalidate_hmac_function();
632 #endif /* !defined(CONFIG_SPTM) */
633 }
634
635 lockdown_done = 1;
636 }
637
638
639 char *
machine_boot_info(__unused char * buf,__unused vm_size_t size)640 machine_boot_info(
641 __unused char *buf,
642 __unused vm_size_t size)
643 {
644 return PE_boot_args();
645 }
646
647 void
machine_cpu_reinit(__unused void * param)648 machine_cpu_reinit(__unused void *param)
649 {
650 cpu_machine_init(); /* Initialize the processor */
651 clock_init(); /* Init the clock */
652 }
653
654 /*
655 * Routine: machine_processor_shutdown
656 * Function:
657 */
658 thread_t
machine_processor_shutdown(__unused thread_t thread,void (* doshutdown)(processor_t),processor_t processor)659 machine_processor_shutdown(
660 __unused thread_t thread,
661 void (*doshutdown)(processor_t),
662 processor_t processor)
663 {
664 return Shutdown_context(doshutdown, processor);
665 }
666
667 /*
668 * Routine: ml_init_lock_timeout
669 * Function:
670 */
671 static void __startup_func
ml_init_lock_timeout(void)672 ml_init_lock_timeout(void)
673 {
674 /*
675 * This function is called after STARTUP_SUB_TIMEOUTS
676 * initialization, so using the "legacy" boot-args here overrides
677 * the ml-timeout-... configuration. (Given that these boot-args
678 * here are usually explicitly specified, this makes sense by
679 * overriding ml-timeout-..., which may come from the device tree.
680 */
681
682 uint64_t lto_timeout_ns;
683 uint64_t lto_abstime;
684 uint32_t slto;
685
686 if (PE_parse_boot_argn("slto_us", &slto, sizeof(slto))) {
687 lto_timeout_ns = slto * NSEC_PER_USEC;
688 nanoseconds_to_absolutetime(lto_timeout_ns, <o_abstime);
689 os_atomic_store(&LockTimeOut, lto_abstime, relaxed);
690 } else {
691 lto_abstime = os_atomic_load(&LockTimeOut, relaxed);
692 absolutetime_to_nanoseconds(lto_abstime, <o_timeout_ns);
693 }
694
695 os_atomic_store(&LockTimeOutUsec, lto_timeout_ns / NSEC_PER_USEC, relaxed);
696
697 if (PE_parse_boot_argn("tlto_us", &slto, sizeof(slto))) {
698 nanoseconds_to_absolutetime(slto * NSEC_PER_USEC, <o_abstime);
699 os_atomic_store(&TLockTimeOut, lto_abstime, relaxed);
700 } else if (lto_abstime != 0) {
701 os_atomic_store(&TLockTimeOut, lto_abstime >> 1, relaxed);
702 } // else take default from MACHINE_TIMEOUT.
703
704 uint64_t mtxspin;
705 uint64_t mtx_abstime;
706 if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof(mtxspin))) {
707 if (mtxspin > USEC_PER_SEC >> 4) {
708 mtxspin = USEC_PER_SEC >> 4;
709 }
710 nanoseconds_to_absolutetime(mtxspin * NSEC_PER_USEC, &mtx_abstime);
711 os_atomic_store(&MutexSpin, mtx_abstime, relaxed);
712 } else {
713 mtx_abstime = os_atomic_load(&MutexSpin, relaxed);
714 }
715
716 low_MutexSpin = os_atomic_load(&MutexSpin, relaxed);
717 /*
718 * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but
719 * real_ncpus is not set at this time
720 *
721 * NOTE: active spinning is disabled in arm. It can be activated
722 * by setting high_MutexSpin through the sysctl.
723 */
724 high_MutexSpin = low_MutexSpin;
725
726 uint64_t maxwfeus = MAX_WFE_HINT_INTERVAL_US;
727 PE_parse_boot_argn("max_wfe_us", &maxwfeus, sizeof(maxwfeus));
728 nanoseconds_to_absolutetime(maxwfeus * NSEC_PER_USEC, &ml_wfe_hint_max_interval);
729 }
730 STARTUP(TIMEOUTS, STARTUP_RANK_MIDDLE, ml_init_lock_timeout);
731
732
733 /*
734 * This is called when all of the ml_processor_info_t structures have been
735 * initialized and all the processors have been started through processor_boot().
736 *
737 * Required by the scheduler subsystem.
738 */
739 void
ml_cpu_init_completed(void)740 ml_cpu_init_completed(void)
741 {
742 sched_cpu_init_completed();
743 }
744
745 /*
746 * This tracks which cpus are between ml_cpu_down and ml_cpu_up
747 */
748 _Atomic uint64_t ml_cpu_up_processors = 0;
749
750 void
ml_cpu_up(void)751 ml_cpu_up(void)
752 {
753 cpu_data_t *cpu_data_ptr = getCpuDatap();
754
755 assert(!bit_test(os_atomic_load(&ml_cpu_up_processors, relaxed), cpu_data_ptr->cpu_number));
756
757 atomic_bit_set(&ml_cpu_up_processors, cpu_data_ptr->cpu_number, memory_order_relaxed);
758 }
759
760 /*
761 * These are called from the machine-independent routine cpu_up()
762 * to perform machine-dependent info updates.
763 *
764 * The update to CPU counts needs to be separate from other actions
765 * because we don't update the counts when CLPC causes temporary
766 * cluster powerdown events, as these must be transparent to the user.
767 */
768
769 void
ml_cpu_up_update_counts(int cpu_id)770 ml_cpu_up_update_counts(int cpu_id)
771 {
772 ml_topology_cpu_t *cpu = &ml_get_topology_info()->cpus[cpu_id];
773
774 os_atomic_inc(&cluster_type_num_active_cpus[cpu->cluster_type], relaxed);
775
776 os_atomic_inc(&machine_info.physical_cpu, relaxed);
777 os_atomic_inc(&machine_info.logical_cpu, relaxed);
778 }
779
780 int
ml_find_next_up_processor()781 ml_find_next_up_processor()
782 {
783 if (BootCpuData.cpu_running) {
784 return BootCpuData.cpu_number;
785 }
786
787 int next_active_cpu = lsb_first(os_atomic_load(&ml_cpu_up_processors, relaxed));
788
789 if (next_active_cpu == -1) {
790 assertf(ml_is_quiescing(), "can only have no active CPUs in quiesce state");
791 next_active_cpu = BootCpuData.cpu_number;
792 }
793
794 return next_active_cpu;
795 }
796
797 /*
798 * These are called from the machine-independent routine cpu_down()
799 * to perform machine-dependent info updates.
800 *
801 * The update to CPU counts needs to be separate from other actions
802 * because we don't update the counts when CLPC causes temporary
803 * cluster powerdown events, as these must be transparent to the user.
804 */
805 void
ml_cpu_down(void)806 ml_cpu_down(void)
807 {
808 /*
809 * If we want to deal with outstanding IPIs, we need to
810 * do relatively early in the processor_doshutdown path,
811 * as we pend decrementer interrupts using the IPI
812 * mechanism if we cannot immediately service them (if
813 * IRQ is masked). Do so now.
814 *
815 * We aren't on the interrupt stack here; would it make
816 * more sense to disable signaling and then enable
817 * interrupts? It might be a bit cleaner.
818 */
819 cpu_data_t *cpu_data_ptr = getCpuDatap();
820 cpu_data_ptr->cpu_running = FALSE;
821
822 assert((cpu_data_ptr->cpu_signal & SIGPdisabled) == 0);
823 assert(bit_test(os_atomic_load(&ml_cpu_up_processors, relaxed), cpu_data_ptr->cpu_number));
824
825 atomic_bit_clear(&ml_cpu_up_processors, cpu_data_ptr->cpu_number, memory_order_release);
826
827 if (cpu_data_ptr == &BootCpuData && ml_is_quiescing()) {
828 /*
829 * This is the boot CPU powering down for S2R, don't try to migrate its timers,
830 * because there is nobody else active to migrate it to.
831 */
832 assert3u(os_atomic_load(&ml_cpu_up_processors, relaxed), ==, 0);
833 } else if (cpu_data_ptr != &BootCpuData || (support_bootcpu_shutdown && !ml_is_quiescing())) {
834 int next_cpu = ml_find_next_up_processor();
835
836 cpu_data_t* new_cpu_datap = cpu_datap(next_cpu);
837
838 /*
839 * Move all of this cpu's timers to another cpu that has not gone through ml_cpu_down,
840 * and poke it in case there's a sooner deadline for it to schedule.
841 *
842 * This depends on ml_cpu_down never running concurrently, which is guaranteed by
843 * the processor_updown_lock.
844 */
845 timer_queue_shutdown(next_cpu, &cpu_data_ptr->rtclock_timer.queue,
846 &new_cpu_datap->rtclock_timer.queue);
847
848 /*
849 * Trigger timer_queue_expire_local to execute on the remote CPU.
850 *
851 * Because we have interrupts disabled here, we cannot use a
852 * standard cpu_xcall, which would deadlock against the stackshot
853 * IPI. This must be a fire-and-forget IPI.
854 */
855 kern_return_t rv = cpu_signal(new_cpu_datap, SIGPTimerLocal, NULL, NULL);
856
857 if (rv != KERN_SUCCESS) {
858 panic("ml_cpu_down: cpu_signal of cpu %d failure %d", next_cpu, rv);
859 }
860 } else {
861 panic("boot cpu powering down with nowhere for its timers to go");
862 }
863
864 cpu_signal_handler_internal(TRUE);
865
866 /* There should be no more pending IPIs on this core. */
867 assert3u(getCpuDatap()->cpu_signal, ==, SIGPdisabled);
868 }
869
870 void
ml_cpu_down_update_counts(int cpu_id)871 ml_cpu_down_update_counts(int cpu_id)
872 {
873 ml_topology_cpu_t *cpu = &ml_get_topology_info()->cpus[cpu_id];
874
875 os_atomic_dec(&cluster_type_num_active_cpus[cpu->cluster_type], relaxed);
876
877 os_atomic_dec(&machine_info.physical_cpu, relaxed);
878 os_atomic_dec(&machine_info.logical_cpu, relaxed);
879 }
880
881
882 unsigned int
ml_get_machine_mem(void)883 ml_get_machine_mem(void)
884 {
885 return machine_info.memory_size;
886 }
887
888 __attribute__((noreturn))
889 void
halt_all_cpus(boolean_t reboot)890 halt_all_cpus(boolean_t reboot)
891 {
892 if (reboot) {
893 printf("MACH Reboot\n");
894 PEHaltRestart(kPERestartCPU);
895 } else {
896 printf("CPU halted\n");
897 PEHaltRestart(kPEHaltCPU);
898 }
899 while (1) {
900 ;
901 }
902 }
903
904 __attribute__((noreturn))
905 void
halt_cpu(void)906 halt_cpu(void)
907 {
908 halt_all_cpus(FALSE);
909 }
910
911 /*
912 * Routine: machine_signal_idle
913 * Function:
914 */
915 void
machine_signal_idle(processor_t processor)916 machine_signal_idle(
917 processor_t processor)
918 {
919 cpu_signal(processor_to_cpu_datap(processor), SIGPnop, (void *)NULL, (void *)NULL);
920 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
921 }
922
923 void
machine_signal_idle_deferred(processor_t processor)924 machine_signal_idle_deferred(
925 processor_t processor)
926 {
927 cpu_signal_deferred(processor_to_cpu_datap(processor));
928 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_DEFERRED_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
929 }
930
931 void
machine_signal_idle_cancel(processor_t processor)932 machine_signal_idle_cancel(
933 processor_t processor)
934 {
935 cpu_signal_cancel(processor_to_cpu_datap(processor));
936 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_CANCEL_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0);
937 }
938
939 /*
940 * Routine: ml_install_interrupt_handler
941 * Function: Initialize Interrupt Handler
942 */
943 void
ml_install_interrupt_handler(void * nub,int source,void * target,IOInterruptHandler handler,void * refCon)944 ml_install_interrupt_handler(
945 void *nub,
946 int source,
947 void *target,
948 IOInterruptHandler handler,
949 void *refCon)
950 {
951 cpu_data_t *cpu_data_ptr;
952 boolean_t current_state;
953
954 current_state = ml_set_interrupts_enabled(FALSE);
955 cpu_data_ptr = getCpuDatap();
956
957 cpu_data_ptr->interrupt_nub = nub;
958 cpu_data_ptr->interrupt_source = source;
959 cpu_data_ptr->interrupt_target = target;
960 cpu_data_ptr->interrupt_handler = handler;
961 cpu_data_ptr->interrupt_refCon = refCon;
962
963 (void) ml_set_interrupts_enabled(current_state);
964 }
965
966 /*
967 * Routine: ml_init_interrupt
968 * Function: Initialize Interrupts
969 */
970 void
ml_init_interrupt(void)971 ml_init_interrupt(void)
972 {
973 #if defined(HAS_IPI)
974 /*
975 * ml_init_interrupt will get called once for each CPU, but this is redundant
976 * because there is only one global copy of the register for skye. do it only
977 * on the bootstrap cpu
978 */
979 if (getCpuDatap()->cluster_master) {
980 ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns);
981 }
982 #endif
983 }
984
985 /*
986 * Routine: ml_init_timebase
987 * Function: register and setup Timebase, Decremeter services
988 */
989 void
ml_init_timebase(void * args,tbd_ops_t tbd_funcs,vm_offset_t int_address,vm_offset_t int_value __unused)990 ml_init_timebase(
991 void *args,
992 tbd_ops_t tbd_funcs,
993 vm_offset_t int_address,
994 vm_offset_t int_value __unused)
995 {
996 cpu_data_t *cpu_data_ptr;
997
998 cpu_data_ptr = (cpu_data_t *)args;
999
1000 if ((cpu_data_ptr == &BootCpuData)
1001 && (rtclock_timebase_func.tbd_fiq_handler == (void *)NULL)) {
1002 rtclock_timebase_func = *tbd_funcs;
1003 rtclock_timebase_addr = int_address;
1004 }
1005 }
1006
1007 #define ML_READPROP_MANDATORY UINT64_MAX
1008
1009 static uint64_t
ml_readprop(const DTEntry entry,const char * propertyName,uint64_t default_value)1010 ml_readprop(const DTEntry entry, const char *propertyName, uint64_t default_value)
1011 {
1012 void const *prop;
1013 unsigned int propSize;
1014
1015 if (SecureDTGetProperty(entry, propertyName, &prop, &propSize) == kSuccess) {
1016 if (propSize == sizeof(uint8_t)) {
1017 return *((uint8_t const *)prop);
1018 } else if (propSize == sizeof(uint16_t)) {
1019 return *((uint16_t const *)prop);
1020 } else if (propSize == sizeof(uint32_t)) {
1021 return *((uint32_t const *)prop);
1022 } else if (propSize == sizeof(uint64_t)) {
1023 return *((uint64_t const *)prop);
1024 } else {
1025 panic("CPU property '%s' has bad size %u", propertyName, propSize);
1026 }
1027 } else {
1028 if (default_value == ML_READPROP_MANDATORY) {
1029 panic("Missing mandatory property '%s'", propertyName);
1030 }
1031 return default_value;
1032 }
1033 }
1034
1035 static boolean_t
ml_read_reg_range(const DTEntry entry,const char * propertyName,uint64_t * pa_ptr,uint64_t * len_ptr)1036 ml_read_reg_range(const DTEntry entry, const char *propertyName, uint64_t *pa_ptr, uint64_t *len_ptr)
1037 {
1038 uint64_t const *prop;
1039 unsigned int propSize;
1040
1041 if (SecureDTGetProperty(entry, propertyName, (void const **)&prop, &propSize) != kSuccess) {
1042 return FALSE;
1043 }
1044
1045 if (propSize != sizeof(uint64_t) * 2) {
1046 panic("Wrong property size for %s", propertyName);
1047 }
1048
1049 *pa_ptr = prop[0];
1050 *len_ptr = prop[1];
1051 return TRUE;
1052 }
1053
1054 static boolean_t
ml_is_boot_cpu(const DTEntry entry)1055 ml_is_boot_cpu(const DTEntry entry)
1056 {
1057 void const *prop;
1058 unsigned int propSize;
1059
1060 if (SecureDTGetProperty(entry, "state", &prop, &propSize) != kSuccess) {
1061 panic("unable to retrieve state for cpu");
1062 }
1063
1064 if (strncmp((char const *)prop, "running", propSize) == 0) {
1065 return TRUE;
1066 } else {
1067 return FALSE;
1068 }
1069 }
1070
1071 static void
ml_cluster_power_override(unsigned int * flag)1072 ml_cluster_power_override(unsigned int *flag)
1073 {
1074 #if XNU_CLUSTER_POWER_DOWN
1075 /*
1076 * Old method (H14/H15): enable CPD in the kernel build
1077 * For H16+, *flag may have be set to 1 through EDT
1078 */
1079 *flag = 1;
1080 #endif
1081
1082 /*
1083 * If a boot-arg is set that allows threads to be bound
1084 * to a cpu or cluster, cluster_power_down must
1085 * default to false.
1086 */
1087 #ifdef CONFIG_XNUPOST
1088 uint64_t kernel_post = 0;
1089 PE_parse_boot_argn("kernPOST", &kernel_post, sizeof(kernel_post));
1090 if (kernel_post != 0) {
1091 *flag = 0;
1092 }
1093 #endif
1094 if (PE_parse_boot_argn("enable_skstb", NULL, 0)) {
1095 *flag = 0;
1096 }
1097 if (PE_parse_boot_argn("enable_skstsct", NULL, 0)) {
1098 *flag = 0;
1099 }
1100
1101 /* Always let the user manually override, even if it's unsupported */
1102 PE_parse_boot_argn("cluster_power", flag, sizeof(*flag));
1103 }
1104
1105 static void
ml_read_chip_revision(unsigned int * rev __unused)1106 ml_read_chip_revision(unsigned int *rev __unused)
1107 {
1108 // The CPU_VERSION_* macros are only defined on APPLE_ARM64_ARCH_FAMILY builds
1109 #ifdef APPLE_ARM64_ARCH_FAMILY
1110 DTEntry entryP;
1111
1112 if ((SecureDTFindEntry("name", "arm-io", &entryP) == kSuccess)) {
1113 *rev = (unsigned int)ml_readprop(entryP, "chip-revision", CPU_VERSION_UNKNOWN);
1114 } else {
1115 *rev = CPU_VERSION_UNKNOWN;
1116 }
1117 #endif
1118 }
1119
1120 void
ml_parse_cpu_topology(void)1121 ml_parse_cpu_topology(void)
1122 {
1123 DTEntry entry, child __unused;
1124 OpaqueDTEntryIterator iter;
1125 uint32_t cpu_boot_arg = MAX_CPUS;
1126 uint64_t cpumask_boot_arg = ULLONG_MAX;
1127 int err;
1128
1129 int64_t cluster_phys_to_logical[MAX_CPU_CLUSTER_PHY_ID + 1];
1130 int64_t cluster_max_cpu_phys_id[MAX_CPU_CLUSTER_PHY_ID + 1];
1131 const boolean_t cpus_boot_arg_present = PE_parse_boot_argn("cpus", &cpu_boot_arg, sizeof(cpu_boot_arg));
1132 const boolean_t cpumask_boot_arg_present = PE_parse_boot_argn("cpumask", &cpumask_boot_arg, sizeof(cpumask_boot_arg));
1133
1134 // The cpus=N and cpumask=N boot args cannot be used simultaneously. Flag this
1135 // so that we trigger a panic later in the boot process, once serial is enabled.
1136 if (cpus_boot_arg_present && cpumask_boot_arg_present) {
1137 cpu_config_correct = false;
1138 }
1139
1140 err = SecureDTLookupEntry(NULL, "/cpus", &entry);
1141 assert(err == kSuccess);
1142
1143 err = SecureDTInitEntryIterator(entry, &iter);
1144 assert(err == kSuccess);
1145
1146 for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
1147 cluster_offsets[i] = -1;
1148 cluster_phys_to_logical[i] = -1;
1149 cluster_max_cpu_phys_id[i] = 0;
1150 }
1151
1152 while (kSuccess == SecureDTIterateEntries(&iter, &child)) {
1153 boolean_t is_boot_cpu = ml_is_boot_cpu(child);
1154 boolean_t cpu_enabled = cpumask_boot_arg & 1;
1155 cpumask_boot_arg >>= 1;
1156
1157 // Boot CPU disabled in cpumask. Flag this so that we trigger a panic
1158 // later in the boot process, once serial is enabled.
1159 if (is_boot_cpu && !cpu_enabled) {
1160 cpu_config_correct = false;
1161 }
1162
1163 // Ignore this CPU if it has been disabled by the cpumask= boot-arg.
1164 if (!is_boot_cpu && !cpu_enabled) {
1165 continue;
1166 }
1167
1168 // If the number of CPUs is constrained by the cpus= boot-arg, and the boot CPU hasn't
1169 // been added to the topology struct yet, and we only have one slot left, then skip
1170 // every other non-boot CPU in order to leave room for the boot CPU.
1171 //
1172 // e.g. if the boot-args say "cpus=3" and CPU4 is the boot CPU, then the cpus[]
1173 // array will list CPU0, CPU1, and CPU4. CPU2-CPU3 and CPU5-CPUn will be omitted.
1174 if (topology_info.num_cpus >= (cpu_boot_arg - 1) && topology_info.boot_cpu == NULL && !is_boot_cpu) {
1175 continue;
1176 }
1177 if (topology_info.num_cpus >= cpu_boot_arg) {
1178 break;
1179 }
1180
1181 ml_topology_cpu_t *cpu = &topology_info.cpus[topology_info.num_cpus];
1182
1183 cpu->cpu_id = topology_info.num_cpus++;
1184 assert(cpu->cpu_id < MAX_CPUS);
1185 topology_info.max_cpu_id = MAX(topology_info.max_cpu_id, cpu->cpu_id);
1186
1187 cpu->die_id = (int)ml_readprop(child, "die-id", 0);
1188 topology_info.max_die_id = MAX(topology_info.max_die_id, cpu->die_id);
1189
1190 cpu->phys_id = (uint32_t)ml_readprop(child, "reg", ML_READPROP_MANDATORY);
1191
1192 cpu->l2_access_penalty = (uint32_t)ml_readprop(child, "l2-access-penalty", 0);
1193 cpu->l2_cache_size = (uint32_t)ml_readprop(child, "l2-cache-size", 0);
1194 cpu->l2_cache_id = (uint32_t)ml_readprop(child, "l2-cache-id", 0);
1195 cpu->l3_cache_size = (uint32_t)ml_readprop(child, "l3-cache-size", 0);
1196 cpu->l3_cache_id = (uint32_t)ml_readprop(child, "l3-cache-id", 0);
1197
1198 ml_read_reg_range(child, "cpu-uttdbg-reg", &cpu->cpu_UTTDBG_pa, &cpu->cpu_UTTDBG_len);
1199 ml_read_reg_range(child, "cpu-impl-reg", &cpu->cpu_IMPL_pa, &cpu->cpu_IMPL_len);
1200 ml_read_reg_range(child, "coresight-reg", &cpu->coresight_pa, &cpu->coresight_len);
1201 cpu->cluster_type = CLUSTER_TYPE_SMP;
1202
1203 int cluster_type = (int)ml_readprop(child, "cluster-type", 0);
1204 if (cluster_type == 'E') {
1205 cpu->cluster_type = CLUSTER_TYPE_E;
1206 } else if (cluster_type == 'P') {
1207 cpu->cluster_type = CLUSTER_TYPE_P;
1208 }
1209
1210 if (ml_readprop(child, "cluster-power-down", 0)) {
1211 topology_info.cluster_power_down = 1;
1212 }
1213
1214 topology_info.cluster_type_num_cpus[cpu->cluster_type]++;
1215
1216 /*
1217 * Since we want to keep a linear cluster ID space, we cannot just rely
1218 * on the value provided by EDT. Instead, use the MPIDR value to see if we have
1219 * seen this exact cluster before. If so, then reuse that cluster ID for this CPU.
1220 */
1221 #if HAS_CLUSTER
1222 uint32_t phys_cluster_id = MPIDR_CLUSTER_ID(cpu->phys_id);
1223 #else
1224 uint32_t phys_cluster_id = (cpu->cluster_type == CLUSTER_TYPE_P);
1225 #endif
1226 assert(phys_cluster_id <= MAX_CPU_CLUSTER_PHY_ID);
1227 cpu->cluster_id = ((cluster_phys_to_logical[phys_cluster_id] == -1) ?
1228 topology_info.num_clusters : cluster_phys_to_logical[phys_cluster_id]);
1229
1230 assert(cpu->cluster_id < MAX_CPU_CLUSTERS);
1231
1232 ml_topology_cluster_t *cluster = &topology_info.clusters[cpu->cluster_id];
1233 if (cluster->num_cpus == 0) {
1234 assert(topology_info.num_clusters < MAX_CPU_CLUSTERS);
1235
1236 topology_info.num_clusters++;
1237 topology_info.max_cluster_id = MAX(topology_info.max_cluster_id, cpu->cluster_id);
1238 topology_info.cluster_types |= (1 << cpu->cluster_type);
1239
1240 cluster->cluster_id = cpu->cluster_id;
1241 cluster->die_id = cpu->die_id;
1242 cluster->cluster_type = cpu->cluster_type;
1243 cluster->first_cpu_id = cpu->cpu_id;
1244 assert(cluster_phys_to_logical[phys_cluster_id] == -1);
1245 cluster_phys_to_logical[phys_cluster_id] = cpu->cluster_id;
1246
1247 topology_info.cluster_type_num_clusters[cluster->cluster_type]++;
1248
1249 // Since we don't have a per-cluster EDT node, this is repeated in each CPU node.
1250 // If we wind up with a bunch of these, we might want to create separate per-cluster
1251 // EDT nodes and have the CPU nodes reference them through a phandle.
1252 ml_read_reg_range(child, "acc-impl-reg", &cluster->acc_IMPL_pa, &cluster->acc_IMPL_len);
1253 ml_read_reg_range(child, "cpm-impl-reg", &cluster->cpm_IMPL_pa, &cluster->cpm_IMPL_len);
1254 }
1255
1256 #if HAS_CLUSTER
1257 if (MPIDR_CPU_ID(cpu->phys_id) > cluster_max_cpu_phys_id[phys_cluster_id]) {
1258 cluster_max_cpu_phys_id[phys_cluster_id] = MPIDR_CPU_ID(cpu->phys_id);
1259 }
1260 #endif
1261
1262 cpu->die_cluster_id = (int)ml_readprop(child, "die-cluster-id", MPIDR_CLUSTER_ID(cpu->phys_id));
1263 cluster->die_cluster_id = cpu->die_cluster_id;
1264
1265 cpu->cluster_core_id = (int)ml_readprop(child, "cluster-core-id", MPIDR_CPU_ID(cpu->phys_id));
1266
1267 cluster->num_cpus++;
1268 cluster->cpu_mask |= 1ULL << cpu->cpu_id;
1269
1270 if (is_boot_cpu) {
1271 assert(topology_info.boot_cpu == NULL);
1272 topology_info.boot_cpu = cpu;
1273 topology_info.boot_cluster = cluster;
1274 }
1275
1276 #if CONFIG_SPTM
1277 sptm_register_cpu(cpu->phys_id);
1278 #endif
1279 }
1280
1281 #if HAS_CLUSTER
1282 /*
1283 * Build the cluster offset array, ensuring that the region reserved
1284 * for each physical cluster contains enough entries to be indexed
1285 * by the maximum physical CPU ID (AFF0) within the cluster.
1286 */
1287 unsigned int cur_cluster_offset = 0;
1288 for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) {
1289 if (cluster_phys_to_logical[i] != -1) {
1290 cluster_offsets[i] = cur_cluster_offset;
1291 cur_cluster_offset += (cluster_max_cpu_phys_id[i] + 1);
1292 }
1293 }
1294 assert(cur_cluster_offset <= MAX_CPUS);
1295 #else
1296 /*
1297 * For H10, there are really 2 physical clusters, but they are not separated
1298 * into distinct ACCs. AFF1 therefore always reports 0, and AFF0 numbering
1299 * is linear across both clusters. For the purpose of MPIDR_EL1-based indexing,
1300 * treat H10 and earlier devices as though they contain a single cluster.
1301 */
1302 cluster_offsets[0] = 0;
1303 #endif
1304 assert(topology_info.boot_cpu != NULL);
1305 ml_read_chip_revision(&topology_info.chip_revision);
1306 ml_cluster_power_override(&topology_info.cluster_power_down);
1307
1308 /*
1309 * Set TPIDR_EL0 to indicate the correct cpu number & cluster id,
1310 * as we may not be booting from cpu 0. Userspace will consume
1311 * the current CPU number through this register. For non-boot
1312 * cores, this is done in start.s (start_cpu) using the per-cpu
1313 * data object.
1314 */
1315 ml_topology_cpu_t *boot_cpu = topology_info.boot_cpu;
1316 uint64_t tpidr_el0 = ((boot_cpu->cpu_id << MACHDEP_TPIDR_CPUNUM_SHIFT) & MACHDEP_TPIDR_CPUNUM_MASK) | \
1317 ((boot_cpu->cluster_id << MACHDEP_TPIDR_CLUSTERID_SHIFT) & MACHDEP_TPIDR_CLUSTERID_MASK);
1318 assert(((tpidr_el0 & MACHDEP_TPIDR_CPUNUM_MASK) >> MACHDEP_TPIDR_CPUNUM_SHIFT) == boot_cpu->cpu_id);
1319 assert(((tpidr_el0 & MACHDEP_TPIDR_CLUSTERID_MASK) >> MACHDEP_TPIDR_CLUSTERID_SHIFT) == boot_cpu->cluster_id);
1320 __builtin_arm_wsr64("TPIDR_EL0", tpidr_el0);
1321
1322 __builtin_arm_wsr64("TPIDRRO_EL0", 0);
1323 }
1324
1325 const ml_topology_info_t *
ml_get_topology_info(void)1326 ml_get_topology_info(void)
1327 {
1328 return &topology_info;
1329 }
1330
1331 void
ml_map_cpu_pio(void)1332 ml_map_cpu_pio(void)
1333 {
1334 unsigned int i;
1335
1336 for (i = 0; i < topology_info.num_cpus; i++) {
1337 ml_topology_cpu_t *cpu = &topology_info.cpus[i];
1338 if (cpu->cpu_IMPL_pa) {
1339 cpu->cpu_IMPL_regs = (vm_offset_t)ml_io_map(cpu->cpu_IMPL_pa, cpu->cpu_IMPL_len);
1340 cpu->coresight_regs = (vm_offset_t)ml_io_map(cpu->coresight_pa, cpu->coresight_len);
1341 }
1342 if (cpu->cpu_UTTDBG_pa) {
1343 cpu->cpu_UTTDBG_regs = (vm_offset_t)ml_io_map(cpu->cpu_UTTDBG_pa, cpu->cpu_UTTDBG_len);
1344 }
1345 }
1346
1347 for (i = 0; i < topology_info.num_clusters; i++) {
1348 ml_topology_cluster_t *cluster = &topology_info.clusters[i];
1349 if (cluster->acc_IMPL_pa) {
1350 cluster->acc_IMPL_regs = (vm_offset_t)ml_io_map(cluster->acc_IMPL_pa, cluster->acc_IMPL_len);
1351 }
1352 if (cluster->cpm_IMPL_pa) {
1353 cluster->cpm_IMPL_regs = (vm_offset_t)ml_io_map(cluster->cpm_IMPL_pa, cluster->cpm_IMPL_len);
1354 }
1355 }
1356 }
1357
1358 unsigned int
ml_get_cpu_count(void)1359 ml_get_cpu_count(void)
1360 {
1361 return topology_info.num_cpus;
1362 }
1363
1364 unsigned int
ml_get_cluster_count(void)1365 ml_get_cluster_count(void)
1366 {
1367 return topology_info.num_clusters;
1368 }
1369
1370 int
ml_get_boot_cpu_number(void)1371 ml_get_boot_cpu_number(void)
1372 {
1373 return topology_info.boot_cpu->cpu_id;
1374 }
1375
1376 cluster_type_t
ml_get_boot_cluster_type(void)1377 ml_get_boot_cluster_type(void)
1378 {
1379 return topology_info.boot_cluster->cluster_type;
1380 }
1381
1382 int
ml_get_cpu_number(uint32_t phys_id)1383 ml_get_cpu_number(uint32_t phys_id)
1384 {
1385 phys_id &= MPIDR_AFF1_MASK | MPIDR_AFF0_MASK;
1386
1387 for (unsigned i = 0; i < topology_info.num_cpus; i++) {
1388 if (topology_info.cpus[i].phys_id == phys_id) {
1389 return i;
1390 }
1391 }
1392
1393 return -1;
1394 }
1395
1396 int
ml_get_cluster_number(uint32_t phys_id)1397 ml_get_cluster_number(uint32_t phys_id)
1398 {
1399 int cpu_id = ml_get_cpu_number(phys_id);
1400 if (cpu_id < 0) {
1401 return -1;
1402 }
1403
1404 ml_topology_cpu_t *cpu = &topology_info.cpus[cpu_id];
1405
1406 return cpu->cluster_id;
1407 }
1408
1409 unsigned int
ml_get_cpu_number_local(void)1410 ml_get_cpu_number_local(void)
1411 {
1412 uint64_t mpidr_el1_value = 0;
1413 unsigned cpu_id;
1414
1415 /* We identify the CPU based on the constant bits of MPIDR_EL1. */
1416 MRS(mpidr_el1_value, "MPIDR_EL1");
1417 cpu_id = ml_get_cpu_number((uint32_t)mpidr_el1_value);
1418
1419 assert(cpu_id <= (unsigned int)ml_get_max_cpu_number());
1420
1421 return cpu_id;
1422 }
1423
1424 int
ml_get_cluster_number_local()1425 ml_get_cluster_number_local()
1426 {
1427 uint64_t mpidr_el1_value = 0;
1428 unsigned cluster_id;
1429
1430 /* We identify the cluster based on the constant bits of MPIDR_EL1. */
1431 MRS(mpidr_el1_value, "MPIDR_EL1");
1432 cluster_id = ml_get_cluster_number((uint32_t)mpidr_el1_value);
1433
1434 assert(cluster_id <= (unsigned int)ml_get_max_cluster_number());
1435
1436 return cluster_id;
1437 }
1438
1439 int
ml_get_max_cpu_number(void)1440 ml_get_max_cpu_number(void)
1441 {
1442 return topology_info.max_cpu_id;
1443 }
1444
1445 int
ml_get_max_cluster_number(void)1446 ml_get_max_cluster_number(void)
1447 {
1448 return topology_info.max_cluster_id;
1449 }
1450
1451 unsigned int
ml_get_first_cpu_id(unsigned int cluster_id)1452 ml_get_first_cpu_id(unsigned int cluster_id)
1453 {
1454 return topology_info.clusters[cluster_id].first_cpu_id;
1455 }
1456
1457 static_assert(MAX_CPUS <= 256, "MAX_CPUS must fit in _COMM_PAGE_CPU_TO_CLUSTER; Increase table size if needed");
1458
1459 void
ml_map_cpus_to_clusters(uint8_t * table)1460 ml_map_cpus_to_clusters(uint8_t *table)
1461 {
1462 for (uint16_t cpu_id = 0; cpu_id < topology_info.num_cpus; cpu_id++) {
1463 *(table + cpu_id) = (uint8_t)(topology_info.cpus[cpu_id].cluster_id);
1464 }
1465 }
1466
1467 /*
1468 * Return the die id of a cluster.
1469 */
1470 unsigned int
ml_get_die_id(unsigned int cluster_id)1471 ml_get_die_id(unsigned int cluster_id)
1472 {
1473 /*
1474 * The current implementation gets the die_id from the
1475 * first CPU of the cluster.
1476 * rdar://80917654 (Add the die_id field to the cluster topology info)
1477 */
1478 unsigned int first_cpu = ml_get_first_cpu_id(cluster_id);
1479 return topology_info.cpus[first_cpu].die_id;
1480 }
1481
1482 /*
1483 * Return the index of a cluster in its die.
1484 */
1485 unsigned int
ml_get_die_cluster_id(unsigned int cluster_id)1486 ml_get_die_cluster_id(unsigned int cluster_id)
1487 {
1488 /*
1489 * The current implementation gets the die_id from the
1490 * first CPU of the cluster.
1491 * rdar://80917654 (Add the die_id field to the cluster topology info)
1492 */
1493 unsigned int first_cpu = ml_get_first_cpu_id(cluster_id);
1494 return topology_info.cpus[first_cpu].die_cluster_id;
1495 }
1496
1497 /*
1498 * Return the highest die id of the system.
1499 */
1500 unsigned int
ml_get_max_die_id(void)1501 ml_get_max_die_id(void)
1502 {
1503 return topology_info.max_die_id;
1504 }
1505
1506 void
ml_lockdown_init()1507 ml_lockdown_init()
1508 {
1509 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1510 rorgn_stash_range();
1511 #endif
1512 }
1513
1514 kern_return_t
ml_lockdown_handler_register(lockdown_handler_t f,void * this)1515 ml_lockdown_handler_register(lockdown_handler_t f, void *this)
1516 {
1517 if (!f) {
1518 return KERN_FAILURE;
1519 }
1520
1521 assert(lockdown_done);
1522 f(this); // XXX: f this whole function
1523
1524 return KERN_SUCCESS;
1525 }
1526
1527 static mcache_flush_function mcache_flush_func;
1528 static void* mcache_flush_service;
1529 kern_return_t
ml_mcache_flush_callback_register(mcache_flush_function func,void * service)1530 ml_mcache_flush_callback_register(mcache_flush_function func, void *service)
1531 {
1532 mcache_flush_service = service;
1533 mcache_flush_func = func;
1534
1535 return KERN_SUCCESS;
1536 }
1537
1538 kern_return_t
ml_mcache_flush(void)1539 ml_mcache_flush(void)
1540 {
1541 if (!mcache_flush_func) {
1542 panic("Cannot flush M$ with no flush callback registered");
1543
1544 return KERN_FAILURE;
1545 } else {
1546 return mcache_flush_func(mcache_flush_service);
1547 }
1548 }
1549
1550
1551 kern_return_t ml_mem_fault_report_enable_register(void);
1552 kern_return_t
ml_mem_fault_report_enable_register(void)1553 ml_mem_fault_report_enable_register(void)
1554 {
1555 return KERN_SUCCESS;
1556 }
1557
1558 kern_return_t ml_amcc_error_inject_register(void);
1559 kern_return_t
ml_amcc_error_inject_register(void)1560 ml_amcc_error_inject_register(void)
1561 {
1562 return KERN_SUCCESS;
1563 }
1564
1565 kern_return_t ml_dcs_error_inject_register(void);
1566 kern_return_t
ml_dcs_error_inject_register(void)1567 ml_dcs_error_inject_register(void)
1568 {
1569 return KERN_SUCCESS;
1570 }
1571
1572
1573 extern lck_mtx_t pset_create_lock;
1574
1575 kern_return_t
ml_processor_register(ml_processor_info_t * in_processor_info,processor_t * processor_out,ipi_handler_t * ipi_handler_out,perfmon_interrupt_handler_func * pmi_handler_out)1576 ml_processor_register(ml_processor_info_t *in_processor_info,
1577 processor_t *processor_out, ipi_handler_t *ipi_handler_out,
1578 perfmon_interrupt_handler_func *pmi_handler_out)
1579 {
1580 cpu_data_t *this_cpu_datap;
1581 processor_set_t pset;
1582 boolean_t is_boot_cpu;
1583 static unsigned int reg_cpu_count = 0;
1584
1585 if (in_processor_info->log_id > (uint32_t)ml_get_max_cpu_number()) {
1586 return KERN_FAILURE;
1587 }
1588
1589 if ((unsigned)OSIncrementAtomic((SInt32*)®_cpu_count) >= topology_info.num_cpus) {
1590 return KERN_FAILURE;
1591 }
1592
1593 if (in_processor_info->log_id != (uint32_t)ml_get_boot_cpu_number()) {
1594 is_boot_cpu = FALSE;
1595 this_cpu_datap = cpu_data_alloc(FALSE);
1596 cpu_data_init(this_cpu_datap);
1597 } else {
1598 this_cpu_datap = &BootCpuData;
1599 is_boot_cpu = TRUE;
1600 /*
1601 * Note that ml_processor_register happens for the boot cpu
1602 * *after* it starts running arbitrary threads, possibly
1603 * including *userspace*, depending on how long the CPU
1604 * services take to match.
1605 */
1606 }
1607
1608 assert(in_processor_info->log_id <= (uint32_t)ml_get_max_cpu_number());
1609
1610 this_cpu_datap->cpu_id = in_processor_info->cpu_id;
1611
1612 if (!is_boot_cpu) {
1613 this_cpu_datap->cpu_number = (unsigned short)(in_processor_info->log_id);
1614 cpu_data_register(this_cpu_datap);
1615 assert((this_cpu_datap->cpu_number & MACHDEP_TPIDR_CPUNUM_MASK) == this_cpu_datap->cpu_number);
1616 }
1617
1618 this_cpu_datap->cpu_idle_notify = in_processor_info->processor_idle;
1619 this_cpu_datap->cpu_cache_dispatch = (cache_dispatch_t)in_processor_info->platform_cache_dispatch;
1620 nanoseconds_to_absolutetime((uint64_t) in_processor_info->powergate_latency, &this_cpu_datap->cpu_idle_latency);
1621 this_cpu_datap->cpu_reset_assist = kvtophys(in_processor_info->powergate_stub_addr);
1622
1623 this_cpu_datap->idle_timer_notify = in_processor_info->idle_timer;
1624 this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon;
1625
1626 this_cpu_datap->platform_error_handler = in_processor_info->platform_error_handler;
1627 this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr;
1628 this_cpu_datap->cpu_phys_id = in_processor_info->phys_id;
1629 this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty;
1630
1631 this_cpu_datap->cpu_cluster_type = in_processor_info->cluster_type;
1632 this_cpu_datap->cpu_cluster_id = in_processor_info->cluster_id;
1633 this_cpu_datap->cpu_l2_id = in_processor_info->l2_cache_id;
1634 this_cpu_datap->cpu_l2_size = in_processor_info->l2_cache_size;
1635 this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id;
1636 this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size;
1637
1638 /*
1639 * Encode cpu_id, cluster_id to be stored in TPIDR_EL0 (see
1640 * cswitch.s:set_thread_registers, start.s:start_cpu) for consumption
1641 * by userspace.
1642 */
1643 this_cpu_datap->cpu_tpidr_el0 = ((this_cpu_datap->cpu_number << MACHDEP_TPIDR_CPUNUM_SHIFT) & MACHDEP_TPIDR_CPUNUM_MASK) | \
1644 ((this_cpu_datap->cpu_cluster_id << MACHDEP_TPIDR_CLUSTERID_SHIFT) & MACHDEP_TPIDR_CLUSTERID_MASK);
1645 assert(((this_cpu_datap->cpu_tpidr_el0 & MACHDEP_TPIDR_CPUNUM_MASK) >> MACHDEP_TPIDR_CPUNUM_SHIFT) == this_cpu_datap->cpu_number);
1646 assert(((this_cpu_datap->cpu_tpidr_el0 & MACHDEP_TPIDR_CLUSTERID_MASK) >> MACHDEP_TPIDR_CLUSTERID_SHIFT) == this_cpu_datap->cpu_cluster_id);
1647
1648 #if HAS_CLUSTER
1649 this_cpu_datap->cluster_master = !OSTestAndSet(this_cpu_datap->cpu_cluster_id, &cluster_initialized);
1650 #else /* HAS_CLUSTER */
1651 this_cpu_datap->cluster_master = is_boot_cpu;
1652 #endif /* HAS_CLUSTER */
1653 lck_mtx_lock(&pset_create_lock);
1654 pset = pset_find(in_processor_info->cluster_id, NULL);
1655 kprintf("[%d]%s>pset_find(cluster_id=%d) returned pset %d\n", current_processor()->cpu_id, __FUNCTION__, in_processor_info->cluster_id, pset ? pset->pset_id : -1);
1656 if (pset == NULL) {
1657 pset_cluster_type_t pset_cluster_type = cluster_type_to_pset_cluster_type(this_cpu_datap->cpu_cluster_type);
1658 pset_node_t pset_node = cluster_type_to_pset_node(this_cpu_datap->cpu_cluster_type);
1659 pset = pset_create(pset_node, pset_cluster_type, this_cpu_datap->cpu_cluster_id, this_cpu_datap->cpu_cluster_id);
1660 assert(pset != PROCESSOR_SET_NULL);
1661 #if __AMP__
1662 kprintf("[%d]%s>pset_create(cluster_id=%d) returned pset %d\n", current_processor()->cpu_id, __FUNCTION__, this_cpu_datap->cpu_cluster_id, pset->pset_id);
1663 #endif /* __AMP__ */
1664 }
1665 kprintf("[%d]%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n", current_processor()->cpu_id, __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type);
1666 lck_mtx_unlock(&pset_create_lock);
1667
1668 processor_t processor = PERCPU_GET_RELATIVE(processor, cpu_data, this_cpu_datap);
1669 if (!is_boot_cpu) {
1670 processor_init(processor, this_cpu_datap->cpu_number, pset);
1671
1672 if (this_cpu_datap->cpu_l2_access_penalty) {
1673 /*
1674 * Cores that have a non-zero L2 access penalty compared
1675 * to the boot processor should be de-prioritized by the
1676 * scheduler, so that threads use the cores with better L2
1677 * preferentially.
1678 */
1679 processor_set_primary(processor, master_processor);
1680 }
1681 }
1682
1683 *processor_out = processor;
1684 *ipi_handler_out = cpu_signal_handler;
1685 #if CPMU_AIC_PMI && CONFIG_CPU_COUNTERS
1686 *pmi_handler_out = mt_cpmu_aic_pmi;
1687 #else
1688 *pmi_handler_out = NULL;
1689 #endif /* CPMU_AIC_PMI && CONFIG_CPU_COUNTERS */
1690 if (in_processor_info->idle_tickle != (idle_tickle_t *) NULL) {
1691 *in_processor_info->idle_tickle = (idle_tickle_t) cpu_idle_tickle;
1692 }
1693
1694 #if CONFIG_CPU_COUNTERS
1695 kpc_register_cpu(this_cpu_datap);
1696 #endif /* CONFIG_CPU_COUNTERS */
1697
1698 #ifdef APPLEEVEREST
1699 /**
1700 * H15 SoCs have PIO lockdown applied at early boot for secondary CPUs.
1701 * Save PIO lock base addreses.
1702 */
1703 const uint32_t log_id = in_processor_info->log_id;
1704 const unsigned int cluster_id = topology_info.cpus[log_id].cluster_id;
1705 this_cpu_datap->cpu_reg_paddr = topology_info.cpus[log_id].cpu_IMPL_pa;
1706 this_cpu_datap->acc_reg_paddr = topology_info.clusters[cluster_id].acc_IMPL_pa;
1707 this_cpu_datap->cpm_reg_paddr = topology_info.clusters[cluster_id].cpm_IMPL_pa;
1708 #endif
1709
1710
1711 if (!is_boot_cpu) {
1712 random_cpu_init(this_cpu_datap->cpu_number);
1713 // now let next CPU register itself
1714 OSIncrementAtomic((SInt32*)&real_ncpus);
1715 }
1716
1717 os_atomic_or(&this_cpu_datap->cpu_flags, InitState, relaxed);
1718
1719 #if !USE_APPLEARMSMP
1720 /*
1721 * AppleARMCPU's external processor_start call is now a no-op, so
1722 * boot the processor directly when it's registered.
1723 *
1724 * It needs to be booted here for the boot processor to finish the
1725 * subsequent registerInterrupt operations and unblock the other cores.
1726 */
1727 processor_boot(processor);
1728 #endif /* !USE_APPLEARMSMP */
1729
1730 return KERN_SUCCESS;
1731 }
1732
1733 void
ml_init_arm_debug_interface(void * in_cpu_datap,vm_offset_t virt_address)1734 ml_init_arm_debug_interface(
1735 void * in_cpu_datap,
1736 vm_offset_t virt_address)
1737 {
1738 ((cpu_data_t *)in_cpu_datap)->cpu_debug_interface_map = virt_address;
1739 do_debugid();
1740 }
1741
1742 /*
1743 * Routine: init_ast_check
1744 * Function:
1745 */
1746 void
init_ast_check(__unused processor_t processor)1747 init_ast_check(
1748 __unused processor_t processor)
1749 {
1750 }
1751
1752 /*
1753 * Routine: cause_ast_check
1754 * Function:
1755 */
1756 void
cause_ast_check(processor_t processor)1757 cause_ast_check(
1758 processor_t processor)
1759 {
1760 assert(processor != PROCESSOR_NULL);
1761
1762 if (current_processor() != processor) {
1763 cpu_signal(processor_to_cpu_datap(processor), SIGPast, (void *)NULL, (void *)NULL);
1764 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 1 /* ast */, 0, 0, 0);
1765 }
1766 }
1767
1768 extern uint32_t cpu_idle_count;
1769
1770 void
ml_get_power_state(boolean_t * icp,boolean_t * pidlep)1771 ml_get_power_state(boolean_t *icp, boolean_t *pidlep)
1772 {
1773 *icp = ml_at_interrupt_context();
1774 *pidlep = (cpu_idle_count == real_ncpus);
1775 }
1776
1777 /*
1778 * Routine: ml_cause_interrupt
1779 * Function: Generate a fake interrupt
1780 */
1781 void
ml_cause_interrupt(void)1782 ml_cause_interrupt(void)
1783 {
1784 return; /* BS_XXX */
1785 }
1786
1787 /* Map memory map IO space */
1788 vm_offset_t
ml_io_map(vm_offset_t phys_addr,vm_size_t size)1789 ml_io_map(
1790 vm_offset_t phys_addr,
1791 vm_size_t size)
1792 {
1793 return io_map(phys_addr, size, VM_WIMG_IO, VM_PROT_DEFAULT, false);
1794 }
1795
1796 /* Map memory map IO space (with protections specified) */
1797 vm_offset_t
ml_io_map_with_prot(vm_offset_t phys_addr,vm_size_t size,vm_prot_t prot)1798 ml_io_map_with_prot(
1799 vm_offset_t phys_addr,
1800 vm_size_t size,
1801 vm_prot_t prot)
1802 {
1803 return io_map(phys_addr, size, VM_WIMG_IO, prot, false);
1804 }
1805
1806 vm_offset_t
ml_io_map_unmappable(vm_offset_t phys_addr,vm_size_t size,unsigned int flags)1807 ml_io_map_unmappable(
1808 vm_offset_t phys_addr,
1809 vm_size_t size,
1810 unsigned int flags)
1811 {
1812 return io_map(phys_addr, size, flags, VM_PROT_DEFAULT, true);
1813 }
1814
1815 vm_offset_t
ml_io_map_wcomb(vm_offset_t phys_addr,vm_size_t size)1816 ml_io_map_wcomb(
1817 vm_offset_t phys_addr,
1818 vm_size_t size)
1819 {
1820 return io_map(phys_addr, size, VM_WIMG_WCOMB, VM_PROT_DEFAULT, false);
1821 }
1822
1823 void
ml_io_unmap(vm_offset_t addr,vm_size_t sz)1824 ml_io_unmap(vm_offset_t addr, vm_size_t sz)
1825 {
1826 pmap_remove(kernel_pmap, addr, addr + sz);
1827 kmem_free(kernel_map, addr, sz);
1828 }
1829
1830 vm_map_address_t
ml_map_high_window(vm_offset_t phys_addr,vm_size_t len)1831 ml_map_high_window(
1832 vm_offset_t phys_addr,
1833 vm_size_t len)
1834 {
1835 return pmap_map_high_window_bd(phys_addr, len, VM_PROT_READ | VM_PROT_WRITE);
1836 }
1837
1838 vm_offset_t
ml_static_ptovirt(vm_offset_t paddr)1839 ml_static_ptovirt(
1840 vm_offset_t paddr)
1841 {
1842 return phystokv(paddr);
1843 }
1844
1845 vm_offset_t
ml_static_slide(vm_offset_t vaddr)1846 ml_static_slide(
1847 vm_offset_t vaddr)
1848 {
1849 vm_offset_t slid_vaddr = 0;
1850
1851 #if CONFIG_SPTM
1852 if ((vaddr >= vm_sptm_offsets.unslid_base) && (vaddr < vm_sptm_offsets.unslid_top)) {
1853 slid_vaddr = vaddr + vm_sptm_offsets.slide;
1854 } else if ((vaddr >= vm_txm_offsets.unslid_base) && (vaddr < vm_txm_offsets.unslid_top)) {
1855 slid_vaddr = vaddr + vm_txm_offsets.slide;
1856 } else
1857 #endif /* CONFIG_SPTM */
1858 {
1859 slid_vaddr = vaddr + vm_kernel_slide;
1860 }
1861
1862 if (!VM_KERNEL_IS_SLID(slid_vaddr)) {
1863 /* This is only intended for use on static kernel addresses. */
1864 return 0;
1865 }
1866
1867 return slid_vaddr;
1868 }
1869
1870 vm_offset_t
ml_static_unslide(vm_offset_t vaddr)1871 ml_static_unslide(
1872 vm_offset_t vaddr)
1873 {
1874 if (!VM_KERNEL_IS_SLID(vaddr)) {
1875 /* This is only intended for use on static kernel addresses. */
1876 return 0;
1877 }
1878
1879 #if CONFIG_SPTM
1880 /**
1881 * Addresses coming from the SPTM and TXM have a different slide than the
1882 * rest of the kernel.
1883 */
1884 if ((vaddr >= vm_sptm_offsets.slid_base) && (vaddr < vm_sptm_offsets.slid_top)) {
1885 return vaddr - vm_sptm_offsets.slide;
1886 }
1887
1888 if ((vaddr >= vm_txm_offsets.slid_base) && (vaddr < vm_txm_offsets.slid_top)) {
1889 return vaddr - vm_txm_offsets.slide;
1890 }
1891 #endif /* CONFIG_SPTM */
1892
1893 return vaddr - vm_kernel_slide;
1894 }
1895
1896 extern tt_entry_t *arm_kva_to_tte(vm_offset_t va);
1897
1898 kern_return_t
ml_static_protect(vm_offset_t vaddr,vm_size_t size,vm_prot_t new_prot __unused)1899 ml_static_protect(
1900 vm_offset_t vaddr, /* kernel virtual address */
1901 vm_size_t size,
1902 vm_prot_t new_prot __unused)
1903 {
1904 #if CONFIG_SPTM
1905 /**
1906 * Retype any frames that may be passed to the VM to XNU_DEFAULT.
1907 */
1908 for (vm_offset_t sptm_vaddr_cur = vaddr; sptm_vaddr_cur < trunc_page_64(vaddr + size); sptm_vaddr_cur += PAGE_SIZE) {
1909 /* Check if this frame is XNU_DEFAULT and only retype it if is not */
1910 sptm_paddr_t sptm_paddr_cur = kvtophys_nofail(sptm_vaddr_cur);
1911 sptm_frame_type_t current_type = sptm_get_frame_type(sptm_paddr_cur);
1912 if (current_type != XNU_DEFAULT) {
1913 sptm_retype_params_t retype_params = {.raw = SPTM_RETYPE_PARAMS_NULL};
1914 sptm_retype(sptm_paddr_cur, current_type, XNU_DEFAULT, retype_params);
1915 }
1916 }
1917
1918 return KERN_SUCCESS;
1919 #else /* CONFIG_SPTM */
1920 pt_entry_t arm_prot = 0;
1921 pt_entry_t arm_block_prot = 0;
1922 vm_offset_t vaddr_cur;
1923 ppnum_t ppn;
1924 kern_return_t result = KERN_SUCCESS;
1925
1926 if (vaddr < physmap_base) {
1927 panic("ml_static_protect(): %p < %p", (void *) vaddr, (void *) physmap_base);
1928 return KERN_FAILURE;
1929 }
1930
1931 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
1932
1933 if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) {
1934 panic("ml_static_protect(): WX request on %p", (void *) vaddr);
1935 }
1936 if (lockdown_done && (new_prot & VM_PROT_EXECUTE)) {
1937 panic("ml_static_protect(): attempt to inject executable mapping on %p", (void *) vaddr);
1938 }
1939
1940 /* Set up the protection bits, and block bits so we can validate block mappings. */
1941 if (new_prot & VM_PROT_WRITE) {
1942 arm_prot |= ARM_PTE_AP(AP_RWNA);
1943 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RWNA);
1944 } else {
1945 arm_prot |= ARM_PTE_AP(AP_RONA);
1946 arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RONA);
1947 }
1948
1949 arm_prot |= ARM_PTE_NX;
1950 arm_block_prot |= ARM_TTE_BLOCK_NX;
1951
1952 if (!(new_prot & VM_PROT_EXECUTE)) {
1953 arm_prot |= ARM_PTE_PNX;
1954 arm_block_prot |= ARM_TTE_BLOCK_PNX;
1955 }
1956
1957 for (vaddr_cur = vaddr;
1958 vaddr_cur < trunc_page_64(vaddr + size);
1959 vaddr_cur += PAGE_SIZE) {
1960 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
1961 if (ppn != (vm_offset_t) NULL) {
1962 tt_entry_t *tte2;
1963 pt_entry_t *pte_p;
1964 pt_entry_t ptmp;
1965
1966 #if XNU_MONITOR
1967 assert(!pmap_is_monitor(ppn));
1968 assert(!TEST_PAGE_RATIO_4);
1969 #endif
1970
1971 tte2 = arm_kva_to_tte(vaddr_cur);
1972
1973 if (((*tte2) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
1974 if ((((*tte2) & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) &&
1975 ((*tte2 & (ARM_TTE_BLOCK_NXMASK | ARM_TTE_BLOCK_PNXMASK | ARM_TTE_BLOCK_APMASK)) == arm_block_prot)) {
1976 /*
1977 * We can support ml_static_protect on a block mapping if the mapping already has
1978 * the desired protections. We still want to run checks on a per-page basis.
1979 */
1980 continue;
1981 }
1982
1983 result = KERN_FAILURE;
1984 break;
1985 }
1986
1987 pte_p = (pt_entry_t *)&((tt_entry_t*)(phystokv((*tte2) & ARM_TTE_TABLE_MASK)))[(((vaddr_cur) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)];
1988 ptmp = *pte_p;
1989
1990 if ((ptmp & ARM_PTE_HINT_MASK) && ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot)) {
1991 /*
1992 * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing
1993 * protections do not match the desired protections, then we will fail (as we cannot update
1994 * this mapping without updating other mappings as well).
1995 */
1996 result = KERN_FAILURE;
1997 break;
1998 }
1999
2000 __unreachable_ok_push
2001 if (TEST_PAGE_RATIO_4) {
2002 {
2003 unsigned int i;
2004 pt_entry_t *ptep_iter;
2005
2006 ptep_iter = pte_p;
2007 for (i = 0; i < 4; i++, ptep_iter++) {
2008 /* Note that there is a hole in the HINT sanity checking here. */
2009 ptmp = *ptep_iter;
2010
2011 /* We only need to update the page tables if the protections do not match. */
2012 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
2013 ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
2014 *ptep_iter = ptmp;
2015 }
2016 }
2017 }
2018 } else {
2019 ptmp = *pte_p;
2020 /* We only need to update the page tables if the protections do not match. */
2021 if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) {
2022 ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot;
2023 *pte_p = ptmp;
2024 }
2025 }
2026 __unreachable_ok_pop
2027 }
2028 }
2029
2030 if (vaddr_cur > vaddr) {
2031 assert(((vaddr_cur - vaddr) & 0xFFFFFFFF00000000ULL) == 0);
2032 flush_mmu_tlb_region(vaddr, (uint32_t)(vaddr_cur - vaddr));
2033 }
2034
2035
2036 return result;
2037 #endif /* CONFIG_SPTM */
2038 }
2039
2040 #if defined(CONFIG_SPTM)
2041 /*
2042 * Returns true if the given physical address is in one of the boot kernelcache ranges.
2043 */
2044 static bool
ml_physaddr_in_bootkc_range(vm_offset_t physaddr)2045 ml_physaddr_in_bootkc_range(vm_offset_t physaddr)
2046 {
2047 for (int i = 0; i < arm_vm_kernelcache_numranges; i++) {
2048 if (physaddr >= arm_vm_kernelcache_ranges[i].start_phys && physaddr < arm_vm_kernelcache_ranges[i].end_phys) {
2049 return true;
2050 }
2051 }
2052 return false;
2053 }
2054 #endif /* defined(CONFIG_SPTM) */
2055
2056 /*
2057 * Routine: ml_static_mfree
2058 * Function:
2059 */
2060 void
ml_static_mfree(vm_offset_t vaddr,vm_size_t size)2061 ml_static_mfree(
2062 vm_offset_t vaddr,
2063 vm_size_t size)
2064 {
2065 vm_offset_t vaddr_cur;
2066 vm_offset_t paddr_cur;
2067 ppnum_t ppn;
2068 uint32_t freed_pages = 0;
2069 uint32_t freed_kernelcache_pages = 0;
2070
2071
2072 /* It is acceptable (if bad) to fail to free. */
2073 if (vaddr < physmap_base) {
2074 return;
2075 }
2076
2077 assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */
2078
2079 for (vaddr_cur = vaddr;
2080 vaddr_cur < trunc_page_64(vaddr + size);
2081 vaddr_cur += PAGE_SIZE) {
2082 ppn = pmap_find_phys(kernel_pmap, vaddr_cur);
2083 if (ppn != (vm_offset_t) NULL) {
2084 /*
2085 * It is not acceptable to fail to update the protections on a page
2086 * we will release to the VM. We need to either panic or continue.
2087 * For now, we'll panic (to help flag if there is memory we can
2088 * reclaim).
2089 */
2090 if (ml_static_protect(vaddr_cur, PAGE_SIZE, VM_PROT_WRITE | VM_PROT_READ) != KERN_SUCCESS) {
2091 panic("Failed ml_static_mfree on %p", (void *) vaddr_cur);
2092 }
2093
2094 paddr_cur = ptoa(ppn);
2095
2096
2097 vm_page_create(ppn, (ppn + 1));
2098 freed_pages++;
2099 #if defined(CONFIG_SPTM)
2100 if (ml_physaddr_in_bootkc_range(paddr_cur)) {
2101 #else
2102 if (paddr_cur >= arm_vm_kernelcache_phys_start && paddr_cur < arm_vm_kernelcache_phys_end) {
2103 #endif
2104 freed_kernelcache_pages++;
2105 }
2106 }
2107 }
2108 vm_page_lockspin_queues();
2109 vm_page_wire_count -= freed_pages;
2110 vm_page_wire_count_initial -= freed_pages;
2111 vm_page_kernelcache_count -= freed_kernelcache_pages;
2112 vm_page_unlock_queues();
2113 #if DEBUG
2114 kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x, +%d bad\n", freed_pages, (void *)vaddr, (uint64_t)size, ppn, bad_page_cnt);
2115 #endif
2116 }
2117
2118 /*
2119 * Routine: ml_page_protection_type
2120 * Function: Returns the type of page protection that the system supports.
2121 */
2122 ml_page_protection_t
2123 ml_page_protection_type(void)
2124 {
2125 #if CONFIG_SPTM
2126 return 2;
2127 #elif XNU_MONITOR
2128 return 1;
2129 #else
2130 return 0;
2131 #endif
2132 }
2133
2134 /* virtual to physical on wired pages */
2135 vm_offset_t
2136 ml_vtophys(vm_offset_t vaddr)
2137 {
2138 return kvtophys(vaddr);
2139 }
2140
2141 /*
2142 * Routine: ml_nofault_copy
2143 * Function: Perform a physical mode copy if the source and destination have
2144 * valid translations in the kernel pmap. If translations are present, they are
2145 * assumed to be wired; e.g., no attempt is made to guarantee that the
2146 * translations obtained remain valid for the duration of the copy process.
2147 */
2148 vm_size_t
2149 ml_nofault_copy(vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
2150 {
2151 addr64_t cur_phys_dst, cur_phys_src;
2152 vm_size_t count, nbytes = 0;
2153
2154 while (size > 0) {
2155 if (!(cur_phys_src = kvtophys(virtsrc))) {
2156 break;
2157 }
2158 if (!(cur_phys_dst = kvtophys(virtdst))) {
2159 break;
2160 }
2161 if (!pmap_valid_address(trunc_page_64(cur_phys_dst)) ||
2162 !pmap_valid_address(trunc_page_64(cur_phys_src))) {
2163 break;
2164 }
2165 count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
2166 if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) {
2167 count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
2168 }
2169 if (count > size) {
2170 count = size;
2171 }
2172
2173 bcopy_phys(cur_phys_src, cur_phys_dst, count);
2174
2175 nbytes += count;
2176 virtsrc += count;
2177 virtdst += count;
2178 size -= count;
2179 }
2180
2181 return nbytes;
2182 }
2183
2184 /*
2185 * Routine: ml_validate_nofault
2186 * Function: Validate that ths address range has a valid translations
2187 * in the kernel pmap. If translations are present, they are
2188 * assumed to be wired; i.e. no attempt is made to guarantee
2189 * that the translation persist after the check.
2190 * Returns: TRUE if the range is mapped and will not cause a fault,
2191 * FALSE otherwise.
2192 */
2193
2194 boolean_t
2195 ml_validate_nofault(
2196 vm_offset_t virtsrc, vm_size_t size)
2197 {
2198 addr64_t cur_phys_src;
2199 uint32_t count;
2200
2201 while (size > 0) {
2202 if (!(cur_phys_src = kvtophys(virtsrc))) {
2203 return FALSE;
2204 }
2205 if (!pmap_valid_address(trunc_page_64(cur_phys_src))) {
2206 return FALSE;
2207 }
2208 count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK));
2209 if (count > size) {
2210 count = (uint32_t)size;
2211 }
2212
2213 virtsrc += count;
2214 size -= count;
2215 }
2216
2217 return TRUE;
2218 }
2219
2220 void
2221 ml_get_bouncepool_info(vm_offset_t * phys_addr, vm_size_t * size)
2222 {
2223 *phys_addr = 0;
2224 *size = 0;
2225 }
2226
2227 void
2228 active_rt_threads(__unused boolean_t active)
2229 {
2230 }
2231
2232 static void
2233 cpu_qos_cb_default(__unused int urgency, __unused uint64_t qos_param1, __unused uint64_t qos_param2)
2234 {
2235 return;
2236 }
2237
2238 cpu_qos_update_t cpu_qos_update = cpu_qos_cb_default;
2239
2240 void
2241 cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb)
2242 {
2243 if (cpu_qos_cb != NULL) {
2244 cpu_qos_update = cpu_qos_cb;
2245 } else {
2246 cpu_qos_update = cpu_qos_cb_default;
2247 }
2248 }
2249
2250 void
2251 thread_tell_urgency(thread_urgency_t urgency, uint64_t rt_period, uint64_t rt_deadline, uint64_t sched_latency __unused, __unused thread_t nthread)
2252 {
2253 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
2254
2255 cpu_qos_update((int)urgency, rt_period, rt_deadline);
2256
2257 SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
2258 }
2259
2260 void
2261 machine_run_count(__unused uint32_t count)
2262 {
2263 }
2264
2265 processor_t
2266 machine_choose_processor(__unused processor_set_t pset, processor_t processor)
2267 {
2268 return processor;
2269 }
2270
2271 #if KASAN
2272 vm_offset_t ml_stack_base(void);
2273 vm_size_t ml_stack_size(void);
2274
2275 vm_offset_t
2276 ml_stack_base(void)
2277 {
2278 uintptr_t local = (uintptr_t) &local;
2279 vm_offset_t intstack_top_ptr;
2280
2281 intstack_top_ptr = getCpuDatap()->intstack_top;
2282 if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
2283 return intstack_top_ptr - INTSTACK_SIZE;
2284 } else {
2285 return current_thread()->kernel_stack;
2286 }
2287 }
2288 vm_size_t
2289 ml_stack_size(void)
2290 {
2291 uintptr_t local = (uintptr_t) &local;
2292 vm_offset_t intstack_top_ptr;
2293
2294 intstack_top_ptr = getCpuDatap()->intstack_top;
2295 if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
2296 return INTSTACK_SIZE;
2297 } else {
2298 return kernel_stack_size;
2299 }
2300 }
2301 #endif
2302
2303 #ifdef CONFIG_KCOV
2304
2305 kcov_cpu_data_t *
2306 current_kcov_data(void)
2307 {
2308 return ¤t_cpu_datap()->cpu_kcov_data;
2309 }
2310
2311 kcov_cpu_data_t *
2312 cpu_kcov_data(int cpuid)
2313 {
2314 return &cpu_datap(cpuid)->cpu_kcov_data;
2315 }
2316
2317 #endif /* CONFIG_KCOV */
2318
2319 boolean_t
2320 machine_timeout_suspended(void)
2321 {
2322 return FALSE;
2323 }
2324
2325 kern_return_t
2326 ml_interrupt_prewarm(__unused uint64_t deadline)
2327 {
2328 return KERN_FAILURE;
2329 }
2330
2331 #if HAS_APPLE_GENERIC_TIMER
2332 /* The kernel timer APIs always use the Apple timebase */
2333 #define KERNEL_TIMEBASE(reg) "AGT"reg
2334 #else
2335 #define KERNEL_TIMEBASE(reg) reg
2336 #endif
2337
2338 /*
2339 * Assumes fiq, irq disabled.
2340 */
2341 void
2342 ml_set_decrementer(uint32_t dec_value)
2343 {
2344 cpu_data_t *cdp = getCpuDatap();
2345
2346 assert(ml_get_interrupts_enabled() == FALSE);
2347 cdp->cpu_decrementer = dec_value;
2348
2349 if (cdp->cpu_set_decrementer_func) {
2350 cdp->cpu_set_decrementer_func(dec_value);
2351 } else {
2352 __builtin_arm_wsr64(KERNEL_TIMEBASE("CNTV_TVAL_EL0"), (uint64_t)dec_value);
2353 }
2354 }
2355
2356 /**
2357 * Perform a read of the timebase which is permitted to be executed
2358 * speculatively and/or out of program order.
2359 */
2360 static inline uint64_t
2361 speculative_timebase(void)
2362 {
2363 return __builtin_arm_rsr64(KERNEL_TIMEBASE("CNTVCT_EL0"));
2364 }
2365
2366 /**
2367 * Read a non-speculative view of the timebase if one is available,
2368 * otherwise fallback on an ISB to prevent prevent speculation and
2369 * enforce ordering.
2370 */
2371 static inline uint64_t
2372 nonspeculative_timebase(void)
2373 {
2374 #if defined(HAS_ACNTVCT)
2375 return __builtin_arm_rsr64("ACNTVCT_EL0");
2376 #elif __ARM_ARCH_8_6__
2377 return __builtin_arm_rsr64(KERNEL_TIMEBASE("CNTVCTSS_EL0"));
2378 #else
2379 // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2
2380 // "Reads of CNT[PV]CT[_EL0] can occur speculatively and out of order relative
2381 // to other instructions executed on the same processor."
2382 __builtin_arm_isb(ISB_SY);
2383 return speculative_timebase();
2384 #endif
2385 }
2386
2387
2388 uint64_t
2389 ml_get_hwclock()
2390 {
2391 uint64_t timebase = nonspeculative_timebase();
2392 return timebase;
2393 }
2394
2395 uint64_t
2396 ml_get_timebase()
2397 {
2398 uint64_t clock, timebase;
2399
2400 //the retry is for the case where S2R catches us in the middle of this. see rdar://77019633
2401 do {
2402 timebase = getCpuDatap()->cpu_base_timebase;
2403 os_compiler_barrier();
2404 clock = ml_get_hwclock();
2405 os_compiler_barrier();
2406 } while (getCpuDatap()->cpu_base_timebase != timebase);
2407
2408 return clock + timebase;
2409 }
2410
2411 /**
2412 * Issue a barrier that guarantees all prior memory accesses will complete
2413 * before any subsequent timebase reads.
2414 */
2415 void
2416 ml_memory_to_timebase_fence(void)
2417 {
2418 __builtin_arm_dmb(DMB_SY);
2419 const uint64_t take_backwards_branch = 0;
2420 asm volatile (
2421 "1:"
2422 "ldr x0, [%[take_backwards_branch]]" "\n"
2423 "cbnz x0, 1b" "\n"
2424 :
2425 : [take_backwards_branch] "r"(&take_backwards_branch)
2426 : "x0"
2427 );
2428
2429 /* throwaway read to prevent ml_get_speculative_timebase() reordering */
2430 (void)ml_get_hwclock();
2431 }
2432
2433 /**
2434 * Issue a barrier that guarantees all prior timebase reads will
2435 * be ordered before any subsequent memory accesses.
2436 */
2437 void
2438 ml_timebase_to_memory_fence(void)
2439 {
2440 __builtin_arm_isb(ISB_SY);
2441 }
2442
2443 /*
2444 * Get the speculative timebase without an ISB.
2445 */
2446 uint64_t
2447 ml_get_speculative_timebase(void)
2448 {
2449 uint64_t clock, timebase;
2450
2451 //the retry is for the case where S2R catches us in the middle of this. see rdar://77019633&77697482
2452 do {
2453 timebase = getCpuDatap()->cpu_base_timebase;
2454 os_compiler_barrier();
2455 clock = speculative_timebase();
2456
2457 os_compiler_barrier();
2458 } while (getCpuDatap()->cpu_base_timebase != timebase);
2459
2460 return clock + timebase;
2461 }
2462
2463 uint64_t
2464 ml_get_timebase_entropy(void)
2465 {
2466 return ml_get_speculative_timebase();
2467 }
2468
2469 uint32_t
2470 ml_get_decrementer(void)
2471 {
2472 cpu_data_t *cdp = getCpuDatap();
2473 uint32_t dec;
2474
2475 assert(ml_get_interrupts_enabled() == FALSE);
2476
2477 if (cdp->cpu_get_decrementer_func) {
2478 dec = cdp->cpu_get_decrementer_func();
2479 } else {
2480 uint64_t wide_val;
2481
2482 wide_val = __builtin_arm_rsr64(KERNEL_TIMEBASE("CNTV_TVAL_EL0"));
2483 dec = (uint32_t)wide_val;
2484 assert(wide_val == (uint64_t)dec);
2485 }
2486
2487 return dec;
2488 }
2489
2490 boolean_t
2491 ml_get_timer_pending(void)
2492 {
2493 uint64_t cntv_ctl = __builtin_arm_rsr64(KERNEL_TIMEBASE("CNTV_CTL_EL0"));
2494 return ((cntv_ctl & CNTV_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE;
2495 }
2496
2497 __attribute__((noreturn))
2498 void
2499 platform_syscall(arm_saved_state_t *state)
2500 {
2501 uint32_t code;
2502
2503 #define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */
2504
2505 code = (uint32_t)get_saved_state_reg(state, 3);
2506
2507 KDBG(MACHDBG_CODE(DBG_MACH_MACHDEP_EXCP_SC_ARM, code) | DBG_FUNC_START,
2508 get_saved_state_reg(state, 0),
2509 get_saved_state_reg(state, 1),
2510 get_saved_state_reg(state, 2));
2511
2512 switch (code) {
2513 case 2:
2514 /* set cthread */
2515 platform_syscall_kprintf("set cthread self.\n");
2516 thread_set_cthread_self(get_saved_state_reg(state, 0));
2517 break;
2518 case 3:
2519 /* get cthread */
2520 platform_syscall_kprintf("get cthread self.\n");
2521 set_user_saved_state_reg(state, 0, thread_get_cthread_self());
2522 break;
2523 case 0: /* I-Cache flush (removed) */
2524 case 1: /* D-Cache flush (removed) */
2525 default:
2526 platform_syscall_kprintf("unknown: %d\n", code);
2527 break;
2528 }
2529
2530 KDBG(MACHDBG_CODE(DBG_MACH_MACHDEP_EXCP_SC_ARM, code) | DBG_FUNC_END,
2531 get_saved_state_reg(state, 0));
2532
2533 thread_exception_return();
2534 }
2535
2536 static void
2537 _enable_timebase_event_stream(uint32_t bit_index)
2538 {
2539 if (bit_index >= 64) {
2540 panic("%s: invalid bit index (%u)", __FUNCTION__, bit_index);
2541 }
2542
2543 uint64_t cntkctl = __builtin_arm_rsr64(KERNEL_TIMEBASE("CNTKCTL_EL1"));
2544
2545 cntkctl |= (bit_index << CNTKCTL_EL1_EVENTI_SHIFT);
2546 cntkctl |= CNTKCTL_EL1_EVNTEN;
2547 cntkctl |= CNTKCTL_EL1_EVENTDIR; /* 1->0; why not? */
2548
2549 /*
2550 * If the SOC supports it (and it isn't broken), enable
2551 * EL0 access to the timebase registers.
2552 */
2553 if (user_timebase_type() != USER_TIMEBASE_NONE) {
2554 cntkctl |= (CNTKCTL_EL1_PL0PCTEN | CNTKCTL_EL1_PL0VCTEN);
2555 }
2556
2557 __builtin_arm_wsr64(KERNEL_TIMEBASE("CNTKCTL_EL1"), cntkctl);
2558
2559 #if HAS_APPLE_GENERIC_TIMER
2560 /* Enable EL0 access to the ARM timebase registers too */
2561 uint64_t arm_cntkctl = __builtin_arm_rsr64("CNTKCTL_EL1");
2562 arm_cntkctl |= (CNTKCTL_EL1_PL0PCTEN | CNTKCTL_EL1_PL0VCTEN);
2563 __builtin_arm_wsr64("CNTKCTL_EL1", arm_cntkctl);
2564 #endif
2565 }
2566
2567 /*
2568 * Turn timer on, unmask that interrupt.
2569 */
2570 static void
2571 _enable_virtual_timer(void)
2572 {
2573 uint64_t cntvctl = CNTV_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */
2574
2575 __builtin_arm_wsr64(KERNEL_TIMEBASE("CNTV_CTL_EL0"), cntvctl);
2576 /* disable the physical timer as a precaution, as its registers reset to architecturally unknown values */
2577 __builtin_arm_wsr64("CNTP_CTL_EL0", CNTP_CTL_EL0_IMASKED);
2578 #if HAS_APPLE_GENERIC_TIMER
2579 __builtin_arm_wsr64("AGTCNTP_CTL_EL0", CNTP_CTL_EL0_IMASKED);
2580 #endif
2581 }
2582
2583 void
2584 fiq_context_init(boolean_t enable_fiq __unused)
2585 {
2586 /* Interrupts still disabled. */
2587 assert(ml_get_interrupts_enabled() == FALSE);
2588 _enable_virtual_timer();
2589 }
2590
2591 void
2592 wfe_timeout_init(void)
2593 {
2594 _enable_timebase_event_stream(arm64_eventi);
2595 }
2596
2597 /**
2598 * Configures, but does not enable, the WFE event stream. The event stream
2599 * generates an event at a set interval to act as a timeout for WFEs.
2600 *
2601 * This function sets the static global variable arm64_eventi to be the proper
2602 * bit index for the CNTKCTL_EL1.EVENTI field to generate events at the correct
2603 * period (1us unless specified by the "wfe_events_sec" boot-arg). arm64_eventi
2604 * is used by wfe_timeout_init to actually poke the registers and enable the
2605 * event stream.
2606 *
2607 * The CNTKCTL_EL1.EVENTI field contains the index of the bit of CNTVCT_EL0 that
2608 * is the trigger for the system to generate an event. The trigger can occur on
2609 * either the rising or falling edge of the bit depending on the value of
2610 * CNTKCTL_EL1.EVNTDIR. This is arbitrary for our purposes, so we use the
2611 * falling edge (1->0) transition to generate events.
2612 */
2613 void
2614 wfe_timeout_configure(void)
2615 {
2616 /* Could fill in our own ops here, if we needed them */
2617 uint64_t ticks_per_sec, ticks_per_event, events_per_sec = 0;
2618 uint32_t bit_index;
2619
2620 if (PE_parse_boot_argn("wfe_events_sec", &events_per_sec, sizeof(events_per_sec))) {
2621 if (events_per_sec <= 0) {
2622 events_per_sec = 1;
2623 } else if (events_per_sec > USEC_PER_SEC) {
2624 events_per_sec = USEC_PER_SEC;
2625 }
2626 } else {
2627 events_per_sec = USEC_PER_SEC;
2628 }
2629 ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz;
2630 ticks_per_event = ticks_per_sec / events_per_sec;
2631
2632 /* Bit index of next power of two greater than ticks_per_event */
2633 bit_index = flsll(ticks_per_event) - 1;
2634 /* Round up to next power of two if ticks_per_event is initially power of two */
2635 if ((ticks_per_event & ((1 << bit_index) - 1)) != 0) {
2636 bit_index++;
2637 }
2638
2639 /*
2640 * The timer can only trigger on rising or falling edge, not both; we don't
2641 * care which we trigger on, but we do need to adjust which bit we are
2642 * interested in to account for this.
2643 *
2644 * In particular, we set CNTKCTL_EL1.EVENTDIR to trigger events on the
2645 * falling edge of the given bit. Therefore, we must decrement the bit index
2646 * by one as when the bit before the one we care about makes a 1 -> 0
2647 * transition, the bit we care about makes a 0 -> 1 transition.
2648 *
2649 * For example if we want an event generated every 8 ticks (if we calculated
2650 * a bit_index of 3), we would want the event to be generated whenever the
2651 * lower four bits of the counter transition from 0b0111 -> 0b1000. We can
2652 * see that the bit at index 2 makes a falling transition in this scenario,
2653 * so we would want EVENTI to be 2 instead of 3.
2654 */
2655 if (bit_index != 0) {
2656 bit_index--;
2657 }
2658
2659 arm64_eventi = bit_index;
2660 }
2661
2662 boolean_t
2663 ml_delay_should_spin(uint64_t interval)
2664 {
2665 cpu_data_t *cdp = getCpuDatap();
2666
2667 if (cdp->cpu_idle_latency) {
2668 return (interval < cdp->cpu_idle_latency) ? TRUE : FALSE;
2669 } else {
2670 /*
2671 * Early boot, latency is unknown. Err on the side of blocking,
2672 * which should always be safe, even if slow
2673 */
2674 return FALSE;
2675 }
2676 }
2677
2678 boolean_t
2679 ml_thread_is64bit(thread_t thread)
2680 {
2681 return thread_is_64bit_addr(thread);
2682 }
2683
2684 void
2685 ml_delay_on_yield(void)
2686 {
2687 #if DEVELOPMENT || DEBUG
2688 if (yield_delay_us) {
2689 delay(yield_delay_us);
2690 }
2691 #endif
2692 }
2693
2694 void
2695 ml_timer_evaluate(void)
2696 {
2697 }
2698
2699 boolean_t
2700 ml_timer_forced_evaluation(void)
2701 {
2702 return FALSE;
2703 }
2704
2705 void
2706 ml_gpu_stat_update(__unused uint64_t gpu_ns_delta)
2707 {
2708 /*
2709 * For now: update the resource coalition stats of the
2710 * current thread's coalition
2711 */
2712 task_coalition_update_gpu_stats(current_task(), gpu_ns_delta);
2713 }
2714
2715 uint64_t
2716 ml_gpu_stat(__unused thread_t t)
2717 {
2718 return 0;
2719 }
2720
2721 thread_t
2722 current_thread(void)
2723 {
2724 return current_thread_fast();
2725 }
2726
2727 #if defined(HAS_APPLE_PAC)
2728 uint8_t
2729 ml_task_get_disable_user_jop(task_t task)
2730 {
2731 assert(task);
2732 return task->disable_user_jop;
2733 }
2734
2735 void
2736 ml_task_set_disable_user_jop(task_t task, uint8_t disable_user_jop)
2737 {
2738 assert(task);
2739 task->disable_user_jop = disable_user_jop;
2740 }
2741
2742 void
2743 ml_thread_set_disable_user_jop(thread_t thread, uint8_t disable_user_jop)
2744 {
2745 assert(thread);
2746 if (disable_user_jop) {
2747 thread->machine.arm_machine_flags |= ARM_MACHINE_THREAD_DISABLE_USER_JOP;
2748 } else {
2749 thread->machine.arm_machine_flags &= ~ARM_MACHINE_THREAD_DISABLE_USER_JOP;
2750 }
2751 }
2752
2753 void
2754 ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit)
2755 {
2756 if (inherit) {
2757 task->rop_pid = parent_task->rop_pid;
2758 } else {
2759 task->rop_pid = early_random();
2760 }
2761 }
2762
2763 /**
2764 * jop_pid may be inherited from the parent task or generated inside the shared
2765 * region. Unfortunately these two parameters are available at very different
2766 * times during task creation, so we need to split this into two steps.
2767 */
2768 void
2769 ml_task_set_jop_pid(task_t task, task_t parent_task, boolean_t inherit, boolean_t disable_user_jop)
2770 {
2771 if (inherit) {
2772 task->jop_pid = parent_task->jop_pid;
2773 } else if (disable_user_jop) {
2774 task->jop_pid = ml_non_arm64e_user_jop_pid();
2775 } else {
2776 task->jop_pid = ml_default_jop_pid();
2777 }
2778 }
2779
2780 void
2781 ml_task_set_jop_pid_from_shared_region(task_t task, boolean_t disable_user_jop)
2782 {
2783 if (disable_user_jop) {
2784 task->jop_pid = ml_non_arm64e_user_jop_pid();
2785 return;
2786 }
2787
2788 vm_shared_region_t sr = vm_shared_region_get(task);
2789 /*
2790 * If there's no shared region, we can assign the key arbitrarily. This
2791 * typically happens when Mach-O image activation failed part of the way
2792 * through, and this task is in the middle of dying with SIGKILL anyway.
2793 */
2794 if (__improbable(!sr)) {
2795 task->jop_pid = early_random();
2796 return;
2797 }
2798 vm_shared_region_deallocate(sr);
2799
2800 /*
2801 * Similarly we have to worry about jetsam having killed the task and
2802 * already cleared the shared_region_id.
2803 */
2804 task_lock(task);
2805 if (task->shared_region_id != NULL) {
2806 task->jop_pid = shared_region_find_key(task->shared_region_id);
2807 } else {
2808 task->jop_pid = early_random();
2809 }
2810 task_unlock(task);
2811 }
2812
2813 void
2814 ml_thread_set_jop_pid(thread_t thread, task_t task)
2815 {
2816 thread->machine.jop_pid = task->jop_pid;
2817 }
2818 #endif /* defined(HAS_APPLE_PAC) */
2819
2820 #if DEVELOPMENT || DEBUG
2821 static uint64_t minor_badness_suffered = 0;
2822 #endif
2823 void
2824 ml_report_minor_badness(uint32_t __unused badness_id)
2825 {
2826 #if DEVELOPMENT || DEBUG
2827 (void)os_atomic_or(&minor_badness_suffered, 1ULL << badness_id, relaxed);
2828 #endif
2829 }
2830
2831 #if HAS_APPLE_PAC && (__ARM_ARCH_8_6__ || APPLEVIRTUALPLATFORM)
2832 /**
2833 * Emulates the poisoning done by ARMv8.3-PAuth instructions on auth failure.
2834 */
2835 void *
2836 ml_poison_ptr(void *ptr, ptrauth_key key)
2837 {
2838 bool b_key = key & (1ULL << 0);
2839 uint64_t error_code;
2840 if (b_key) {
2841 error_code = 2;
2842 } else {
2843 error_code = 1;
2844 }
2845
2846 bool kernel_pointer = (uintptr_t)ptr & (1ULL << 55);
2847 bool data_key = key & (1ULL << 1);
2848 /* When PAC is enabled, only userspace data pointers use TBI, regardless of boot parameters */
2849 bool tbi = data_key && !kernel_pointer;
2850 unsigned int poison_shift;
2851 if (tbi) {
2852 poison_shift = 53;
2853 } else {
2854 poison_shift = 61;
2855 }
2856
2857 uintptr_t poisoned = (uintptr_t)ptr;
2858 poisoned &= ~(3ULL << poison_shift);
2859 poisoned |= error_code << poison_shift;
2860 return (void *)poisoned;
2861 }
2862 #endif /* HAS_APPLE_PAC && (__ARM_ARCH_8_6__ || APPLEVIRTUALPLATFORM) */
2863
2864 #ifdef CONFIG_XNUPOST
2865 void
2866 ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_addr)
2867 {
2868 thread_t thread = current_thread();
2869 thread->machine.expected_fault_handler = expected_fault_handler;
2870 thread->machine.expected_fault_addr = expected_fault_addr;
2871 thread->machine.expected_fault_pc = 0;
2872 }
2873
2874 /** Expect an exception to be thrown at EXPECTED_FAULT_PC */
2875 void
2876 ml_expect_fault_pc_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_pc)
2877 {
2878 thread_t thread = current_thread();
2879 thread->machine.expected_fault_handler = expected_fault_handler;
2880 thread->machine.expected_fault_addr = 0;
2881 uintptr_t raw_func = (uintptr_t)ptrauth_strip(
2882 (void *)expected_fault_pc,
2883 ptrauth_key_function_pointer);
2884 thread->machine.expected_fault_pc = raw_func;
2885 }
2886
2887 void
2888 ml_expect_fault_end(void)
2889 {
2890 thread_t thread = current_thread();
2891 thread->machine.expected_fault_handler = NULL;
2892 thread->machine.expected_fault_addr = 0;
2893 thread->machine.expected_fault_pc = 0;
2894 }
2895 #endif /* CONFIG_XNUPOST */
2896
2897 void
2898 ml_hibernate_active_pre(void)
2899 {
2900 #if HIBERNATION
2901 if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2902
2903 hibernate_rebuild_vm_structs();
2904 }
2905 #endif /* HIBERNATION */
2906 }
2907
2908 void
2909 ml_hibernate_active_post(void)
2910 {
2911 #if HIBERNATION
2912 if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) {
2913 hibernate_machine_init();
2914 hibernate_vm_lock_end();
2915 current_cpu_datap()->cpu_hibernate = 0;
2916 }
2917 #endif /* HIBERNATION */
2918 }
2919
2920 /**
2921 * Return back a machine-dependent array of address space regions that should be
2922 * reserved by the VM (pre-mapped in the address space). This will prevent user
2923 * processes from allocating or deallocating from within these regions.
2924 *
2925 * @param vm_is64bit True if the process has a 64-bit address space.
2926 * @param regions An out parameter representing an array of regions to reserve.
2927 *
2928 * @return The number of reserved regions returned through `regions`.
2929 */
2930 size_t
2931 ml_get_vm_reserved_regions(bool vm_is64bit, const struct vm_reserved_region **regions)
2932 {
2933 assert(regions != NULL);
2934
2935 /**
2936 * Reserved regions only apply to 64-bit address spaces. This is because
2937 * we only expect to grow the maximum user VA address on 64-bit address spaces
2938 * (we've essentially already reached the max for 32-bit spaces). The reserved
2939 * regions should safely fall outside of the max user VA for 32-bit processes.
2940 */
2941 if (vm_is64bit) {
2942 *regions = vm_reserved_regions;
2943 return ARRAY_COUNT(vm_reserved_regions);
2944 } else {
2945 /* Don't reserve any VA regions on arm64_32 processes. */
2946 *regions = NULL;
2947 return 0;
2948 }
2949 }
2950
2951 /* These WFE recommendations are expected to be updated on a relatively
2952 * infrequent cadence, possibly from a different cluster, hence
2953 * false cacheline sharing isn't expected to be material
2954 */
2955 static uint64_t arm64_cluster_wfe_recs[MAX_CPU_CLUSTERS];
2956
2957 uint32_t
2958 ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id, uint64_t wfe_timeout_abstime_interval, __unused uint64_t wfe_hint_flags)
2959 {
2960 assert(wfe_cluster_id < MAX_CPU_CLUSTERS);
2961 assert(wfe_timeout_abstime_interval <= ml_wfe_hint_max_interval);
2962 os_atomic_store(&arm64_cluster_wfe_recs[wfe_cluster_id], wfe_timeout_abstime_interval, relaxed);
2963 return 0; /* Success */
2964 }
2965
2966 #if DEVELOPMENT || DEBUG
2967 int wfe_rec_max = 0;
2968 int wfe_rec_none = 0;
2969 uint64_t wfe_rec_override_mat = 0;
2970 uint64_t wfe_rec_clamp = 0;
2971 #endif
2972
2973 uint64_t
2974 ml_cluster_wfe_timeout(uint32_t wfe_cluster_id)
2975 {
2976 /* This and its consumer does not synchronize vis-a-vis updates
2977 * of the recommendation; races are acceptable.
2978 */
2979 uint64_t wfet = os_atomic_load(&arm64_cluster_wfe_recs[wfe_cluster_id], relaxed);
2980 #if DEVELOPMENT || DEBUG
2981 if (wfe_rec_clamp) {
2982 wfet = MIN(wfe_rec_clamp, wfet);
2983 }
2984
2985 if (wfe_rec_max) {
2986 for (int i = 0; i < MAX_CPU_CLUSTERS; i++) {
2987 if (arm64_cluster_wfe_recs[i] > wfet) {
2988 wfet = arm64_cluster_wfe_recs[i];
2989 }
2990 }
2991 }
2992
2993 if (wfe_rec_none) {
2994 wfet = 0;
2995 }
2996
2997 if (wfe_rec_override_mat) {
2998 wfet = wfe_rec_override_mat;
2999 }
3000 #endif
3001 return wfet;
3002 }
3003
3004 __pure2 bool
3005 ml_addr_in_non_xnu_stack(__unused uintptr_t addr)
3006 {
3007 #if CONFIG_SPTM
3008 /**
3009 * If the address is within one of the SPTM-allocated per-cpu stacks, then
3010 * return true.
3011 */
3012 if ((addr >= SPTMArgs->cpu_stack_papt_start) &&
3013 (addr < SPTMArgs->cpu_stack_papt_end)) {
3014 return true;
3015 }
3016
3017 /**
3018 * If the address is within one of the TXM thread stacks, then return true.
3019 * The SPTM guarantees that these stacks are virtually contiguous.
3020 */
3021 if ((addr >= SPTMArgs->txm_thread_stacks[0]) &&
3022 (addr < SPTMArgs->txm_thread_stacks[MAX_CPUS - 1])) {
3023 return true;
3024 }
3025
3026 return false;
3027 #elif XNU_MONITOR
3028 return (addr >= (uintptr_t)pmap_stacks_start) && (addr < (uintptr_t)pmap_stacks_end);
3029 #else
3030 return false;
3031 #endif /* CONFIG_SPTM || XNU_MONITOR */
3032 }
3033
3034 uint64_t
3035 ml_get_backtrace_pc(struct arm_saved_state *state)
3036 {
3037 assert((state != NULL) && is_saved_state64(state));
3038
3039 #if CONFIG_SPTM
3040 /**
3041 * On SPTM-based systems, when a non-XNU domain (e.g., SPTM) is interrupted,
3042 * the PC value saved into the state is not the actual PC at the interrupted
3043 * point, but a fixed value to a handler that knows how to re-enter the
3044 * interrupted domain. The interrupted domain's actual PC value is saved
3045 * into x14, so let's return that instead.
3046 */
3047 if (ml_addr_in_non_xnu_stack(get_saved_state_fp(state))) {
3048 return saved_state64(state)->x[14];
3049 }
3050 #endif /* CONFIG_SPTM */
3051
3052 return get_saved_state_pc(state);
3053 }
3054
3055
3056 bool
3057 ml_paddr_is_exclaves_owned(vm_offset_t paddr)
3058 {
3059 #if CONFIG_SPTM
3060 const sptm_frame_type_t type = sptm_get_frame_type(paddr);
3061 return type == SK_DEFAULT || type == SK_IO; // SK_SHARED_R[OW] are not exclusively exclaves frames
3062 #else
3063 #pragma unused(paddr)
3064 return false;
3065 #endif /* CONFIG_SPTM */
3066 }
3067
3068 /**
3069 * Panic because an ARM saved-state accessor expected user saved-state but was
3070 * passed non-user saved-state.
3071 *
3072 * @param ss invalid saved-state (CPSR.M != EL0)
3073 */
3074 void
3075 ml_panic_on_invalid_old_cpsr(const arm_saved_state_t *ss)
3076 {
3077 panic("invalid CPSR in user saved-state %p", ss);
3078 }
3079
3080 /**
3081 * Panic because an ARM saved-state accessor was passed user saved-state and
3082 * asked to assign a non-user CPSR.
3083 *
3084 * @param ss original EL0 saved-state
3085 * @param cpsr invalid new CPSR value (CPSR.M != EL0)
3086 */
3087 void
3088 ml_panic_on_invalid_new_cpsr(const arm_saved_state_t *ss, uint32_t cpsr)
3089 {
3090 panic("attempt to set non-user CPSR %#010x on user saved-state %p", cpsr, ss);
3091 }
3092
3093
3094 /**
3095 * Explicitly preallocates a floating point save area.
3096 * This is a noop on ARM because preallocation isn't required at this time.
3097 */
3098 void
3099 ml_fp_save_area_prealloc(void)
3100 {
3101 }
3102
3103
3104 void
3105 ml_task_post_signature_processing_hook(__unused task_t task)
3106 {
3107 /**
3108 * Have an acquire barrier here to make sure the machine flags read that is going
3109 * to happen below is not speculated before the task->t_returnwaitflags earlier
3110 * in task_wait_to_return().
3111 */
3112 os_atomic_thread_fence(acquire);
3113
3114 }
3115