1 /*-
2 * Copyright (c) 2014 Andrew Turner
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32
33 #include <sys/cdefs.h>
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/asan.h>
37 #include <sys/buf.h>
38 #include <sys/bus.h>
39 #include <sys/cons.h>
40 #include <sys/cpu.h>
41 #include <sys/csan.h>
42 #include <sys/devmap.h>
43 #include <sys/efi.h>
44 #include <sys/exec.h>
45 #include <sys/imgact.h>
46 #include <sys/kdb.h>
47 #include <sys/kernel.h>
48 #include <sys/ktr.h>
49 #include <sys/limits.h>
50 #include <sys/linker.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/debug_monitor.h>
81 #include <machine/hypervisor.h>
82 #include <machine/kdb.h>
83 #include <machine/machdep.h>
84 #include <machine/metadata.h>
85 #include <machine/md_var.h>
86 #include <machine/pcb.h>
87 #include <machine/undefined.h>
88 #include <machine/vmparam.h>
89
90 #ifdef VFP
91 #include <machine/vfp.h>
92 #endif
93
94 #ifdef DEV_ACPI
95 #include <contrib/dev/acpica/include/acpi.h>
96 #include <machine/acpica_machdep.h>
97 #endif
98
99 #ifdef FDT
100 #include <dev/fdt/fdt_common.h>
101 #include <dev/ofw/openfirm.h>
102 #endif
103
104 #include <dev/smbios/smbios.h>
105
106 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
107 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
108 "pcb_fpusaved changed offset");
109 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
110 "pcb_fpustate changed offset");
111
112 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
113
114 /*
115 * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
116 * could relocate this, but will need to keep the same virtual address as
117 * it's reverenced by the EARLY_COUNTER macro.
118 */
119 struct pcpu pcpu0;
120
121 #if defined(PERTHREAD_SSP)
122 /*
123 * The boot SSP canary. Will be replaced with a per-thread canary when
124 * scheduling has started.
125 */
126 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
127 #endif
128
129 static struct trapframe proc0_tf;
130
131 int early_boot = 1;
132 int cold = 1;
133 static int boot_el;
134 static uint64_t hcr_el2;
135
136 struct kva_md_info kmi;
137
138 int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */
139 int has_pan;
140
141 /*
142 * Physical address of the EFI System Table. Stashed from the metadata hints
143 * passed into the kernel and used by the EFI code to call runtime services.
144 */
145 vm_paddr_t efi_systbl_phys;
146 static struct efi_map_header *efihdr;
147
148 /* pagezero_* implementations are provided in support.S */
149 void pagezero_simple(void *);
150 void pagezero_cache(void *);
151
152 /* pagezero_simple is default pagezero */
153 void (*pagezero)(void *p) = pagezero_simple;
154
155 int (*apei_nmi)(void);
156
157 #if defined(PERTHREAD_SSP_WARNING)
158 static void
print_ssp_warning(void * data __unused)159 print_ssp_warning(void *data __unused)
160 {
161 printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
162 }
163 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
164 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
165 #endif
166
167 static void
pan_setup(void)168 pan_setup(void)
169 {
170 uint64_t id_aa64mfr1;
171
172 id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
173 if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
174 has_pan = 1;
175 }
176
177 void
pan_enable(void)178 pan_enable(void)
179 {
180
181 /*
182 * The LLVM integrated assembler doesn't understand the PAN
183 * PSTATE field. Because of this we need to manually create
184 * the instruction in an asm block. This is equivalent to:
185 * msr pan, #1
186 *
187 * This sets the PAN bit, stopping the kernel from accessing
188 * memory when userspace can also access it unless the kernel
189 * uses the userspace load/store instructions.
190 */
191 if (has_pan) {
192 WRITE_SPECIALREG(sctlr_el1,
193 READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
194 __asm __volatile(".inst 0xd500409f | (0x1 << 8)");
195 }
196 }
197
198 bool
has_hyp(void)199 has_hyp(void)
200 {
201
202 /*
203 * XXX The E2H check is wrong, but it's close enough for now. Needs to
204 * be re-evaluated once we're running regularly in EL2.
205 */
206 return (boot_el == 2 && (hcr_el2 & HCR_E2H) == 0);
207 }
208
209 static void
cpu_startup(void * dummy)210 cpu_startup(void *dummy)
211 {
212 vm_paddr_t size;
213 int i;
214
215 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
216 ptoa((uintmax_t)realmem) / 1024 / 1024);
217
218 if (bootverbose) {
219 printf("Physical memory chunk(s):\n");
220 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
221 size = phys_avail[i + 1] - phys_avail[i];
222 printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
223 (uintmax_t)phys_avail[i],
224 (uintmax_t)phys_avail[i + 1] - 1,
225 (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
226 }
227 }
228
229 printf("avail memory = %ju (%ju MB)\n",
230 ptoa((uintmax_t)vm_free_count()),
231 ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
232
233 undef_init();
234 install_cpu_errata();
235
236 vm_ksubmap_init(&kmi);
237 bufinit();
238 vm_pager_bufferinit();
239 }
240
241 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
242
243 static void
late_ifunc_resolve(void * dummy __unused)244 late_ifunc_resolve(void *dummy __unused)
245 {
246 link_elf_late_ireloc();
247 }
248 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
249
250 int
cpu_idle_wakeup(int cpu)251 cpu_idle_wakeup(int cpu)
252 {
253
254 return (0);
255 }
256
257 void
cpu_idle(int busy)258 cpu_idle(int busy)
259 {
260
261 spinlock_enter();
262 if (!busy)
263 cpu_idleclock();
264 if (!sched_runnable())
265 __asm __volatile(
266 "dsb sy \n"
267 "wfi \n");
268 if (!busy)
269 cpu_activeclock();
270 spinlock_exit();
271 }
272
273 void
cpu_halt(void)274 cpu_halt(void)
275 {
276
277 /* We should have shutdown by now, if not enter a low power sleep */
278 intr_disable();
279 while (1) {
280 __asm __volatile("wfi");
281 }
282 }
283
284 /*
285 * Flush the D-cache for non-DMA I/O so that the I-cache can
286 * be made coherent later.
287 */
288 void
cpu_flush_dcache(void * ptr,size_t len)289 cpu_flush_dcache(void *ptr, size_t len)
290 {
291
292 /* ARM64TODO TBD */
293 }
294
295 /* Get current clock frequency for the given CPU ID. */
296 int
cpu_est_clockrate(int cpu_id,uint64_t * rate)297 cpu_est_clockrate(int cpu_id, uint64_t *rate)
298 {
299 struct pcpu *pc;
300
301 pc = pcpu_find(cpu_id);
302 if (pc == NULL || rate == NULL)
303 return (EINVAL);
304
305 if (pc->pc_clock == 0)
306 return (EOPNOTSUPP);
307
308 *rate = pc->pc_clock;
309 return (0);
310 }
311
312 void
cpu_pcpu_init(struct pcpu * pcpu,int cpuid,size_t size)313 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
314 {
315
316 pcpu->pc_acpi_id = 0xffffffff;
317 pcpu->pc_mpidr = UINT64_MAX;
318 }
319
320 void
spinlock_enter(void)321 spinlock_enter(void)
322 {
323 struct thread *td;
324 register_t daif;
325
326 td = curthread;
327 if (td->td_md.md_spinlock_count == 0) {
328 daif = intr_disable();
329 td->td_md.md_spinlock_count = 1;
330 td->td_md.md_saved_daif = daif;
331 critical_enter();
332 } else
333 td->td_md.md_spinlock_count++;
334 }
335
336 void
spinlock_exit(void)337 spinlock_exit(void)
338 {
339 struct thread *td;
340 register_t daif;
341
342 td = curthread;
343 daif = td->td_md.md_saved_daif;
344 td->td_md.md_spinlock_count--;
345 if (td->td_md.md_spinlock_count == 0) {
346 critical_exit();
347 intr_restore(daif);
348 }
349 }
350
351 /*
352 * Construct a PCB from a trapframe. This is called from kdb_trap() where
353 * we want to start a backtrace from the function that caused us to enter
354 * the debugger. We have the context in the trapframe, but base the trace
355 * on the PCB. The PCB doesn't have to be perfect, as long as it contains
356 * enough for a backtrace.
357 */
358 void
makectx(struct trapframe * tf,struct pcb * pcb)359 makectx(struct trapframe *tf, struct pcb *pcb)
360 {
361 int i;
362
363 /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
364 for (i = 0; i < nitems(pcb->pcb_x); i++) {
365 if (i == PCB_LR)
366 pcb->pcb_x[i] = tf->tf_elr;
367 else
368 pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
369 }
370
371 pcb->pcb_sp = tf->tf_sp;
372 }
373
374 static void
init_proc0(vm_offset_t kstack)375 init_proc0(vm_offset_t kstack)
376 {
377 struct pcpu *pcpup;
378
379 pcpup = cpuid_to_pcpu[0];
380 MPASS(pcpup != NULL);
381
382 proc_linkup0(&proc0, &thread0);
383 thread0.td_kstack = kstack;
384 thread0.td_kstack_pages = KSTACK_PAGES;
385 #if defined(PERTHREAD_SSP)
386 thread0.td_md.md_canary = boot_canary;
387 #endif
388 thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
389 thread0.td_kstack_pages * PAGE_SIZE) - 1;
390 thread0.td_pcb->pcb_flags = 0;
391 thread0.td_pcb->pcb_fpflags = 0;
392 thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
393 thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
394 thread0.td_frame = &proc0_tf;
395 ptrauth_thread0(&thread0);
396 pcpup->pc_curpcb = thread0.td_pcb;
397
398 /*
399 * Unmask SError exceptions. They are used to signal a RAS failure,
400 * or other hardware error.
401 */
402 serror_enable();
403 }
404
405 /*
406 * Get an address to be used to write to kernel data that may be mapped
407 * read-only, e.g. to patch kernel code.
408 */
409 bool
arm64_get_writable_addr(vm_offset_t addr,vm_offset_t * out)410 arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out)
411 {
412 vm_paddr_t pa;
413
414 /* Check if the page is writable */
415 if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
416 *out = addr;
417 return (true);
418 }
419
420 /*
421 * Find the physical address of the given page.
422 */
423 if (!pmap_klookup(addr, &pa)) {
424 return (false);
425 }
426
427 /*
428 * If it is within the DMAP region and is writable use that.
429 */
430 if (PHYS_IN_DMAP_RANGE(pa)) {
431 addr = PHYS_TO_DMAP(pa);
432 if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
433 *out = addr;
434 return (true);
435 }
436 }
437
438 return (false);
439 }
440
441 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
442
443 static void
foreach_efi_map_entry(struct efi_map_header * efihdr,efi_map_entry_cb cb,void * argp)444 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
445 {
446 struct efi_md *map, *p;
447 size_t efisz;
448 int ndesc, i;
449
450 /*
451 * Memory map data provided by UEFI via the GetMemoryMap
452 * Boot Services API.
453 */
454 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
455 map = (struct efi_md *)((uint8_t *)efihdr + efisz);
456
457 if (efihdr->descriptor_size == 0)
458 return;
459 ndesc = efihdr->memory_size / efihdr->descriptor_size;
460
461 for (i = 0, p = map; i < ndesc; i++,
462 p = efi_next_descriptor(p, efihdr->descriptor_size)) {
463 cb(p, argp);
464 }
465 }
466
467 /*
468 * Handle the EFI memory map list.
469 *
470 * We will make two passes at this, the first (exclude == false) to populate
471 * physmem with valid physical memory ranges from recognized map entry types.
472 * In the second pass we will exclude memory ranges from physmem which must not
473 * be used for general allocations, either because they are used by runtime
474 * firmware or otherwise reserved.
475 *
476 * Adding the runtime-reserved memory ranges to physmem and excluding them
477 * later ensures that they are included in the DMAP, but excluded from
478 * phys_avail[].
479 *
480 * Entry types not explicitly listed here are ignored and not mapped.
481 */
482 static void
handle_efi_map_entry(struct efi_md * p,void * argp)483 handle_efi_map_entry(struct efi_md *p, void *argp)
484 {
485 bool exclude = *(bool *)argp;
486
487 switch (p->md_type) {
488 case EFI_MD_TYPE_RECLAIM:
489 /*
490 * The recomended location for ACPI tables. Map into the
491 * DMAP so we can access them from userspace via /dev/mem.
492 */
493 case EFI_MD_TYPE_RT_CODE:
494 /*
495 * Some UEFI implementations put the system table in the
496 * runtime code section. Include it in the DMAP, but will
497 * be excluded from phys_avail.
498 */
499 case EFI_MD_TYPE_RT_DATA:
500 /*
501 * Runtime data will be excluded after the DMAP
502 * region is created to stop it from being added
503 * to phys_avail.
504 */
505 if (exclude) {
506 physmem_exclude_region(p->md_phys,
507 p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
508 break;
509 }
510 /* FALLTHROUGH */
511 case EFI_MD_TYPE_CODE:
512 case EFI_MD_TYPE_DATA:
513 case EFI_MD_TYPE_BS_CODE:
514 case EFI_MD_TYPE_BS_DATA:
515 case EFI_MD_TYPE_FREE:
516 /*
517 * We're allowed to use any entry with these types.
518 */
519 if (!exclude)
520 physmem_hardware_region(p->md_phys,
521 p->md_pages * EFI_PAGE_SIZE);
522 break;
523 default:
524 /* Other types shall not be handled by physmem. */
525 break;
526 }
527 }
528
529 static void
add_efi_map_entries(struct efi_map_header * efihdr)530 add_efi_map_entries(struct efi_map_header *efihdr)
531 {
532 bool exclude = false;
533 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
534 }
535
536 static void
exclude_efi_map_entries(struct efi_map_header * efihdr)537 exclude_efi_map_entries(struct efi_map_header *efihdr)
538 {
539 bool exclude = true;
540 foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
541 }
542
543 static void
print_efi_map_entry(struct efi_md * p,void * argp __unused)544 print_efi_map_entry(struct efi_md *p, void *argp __unused)
545 {
546 const char *type;
547 static const char *types[] = {
548 "Reserved",
549 "LoaderCode",
550 "LoaderData",
551 "BootServicesCode",
552 "BootServicesData",
553 "RuntimeServicesCode",
554 "RuntimeServicesData",
555 "ConventionalMemory",
556 "UnusableMemory",
557 "ACPIReclaimMemory",
558 "ACPIMemoryNVS",
559 "MemoryMappedIO",
560 "MemoryMappedIOPortSpace",
561 "PalCode",
562 "PersistentMemory"
563 };
564
565 if (p->md_type < nitems(types))
566 type = types[p->md_type];
567 else
568 type = "<INVALID>";
569 printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
570 p->md_virt, p->md_pages);
571 if (p->md_attr & EFI_MD_ATTR_UC)
572 printf("UC ");
573 if (p->md_attr & EFI_MD_ATTR_WC)
574 printf("WC ");
575 if (p->md_attr & EFI_MD_ATTR_WT)
576 printf("WT ");
577 if (p->md_attr & EFI_MD_ATTR_WB)
578 printf("WB ");
579 if (p->md_attr & EFI_MD_ATTR_UCE)
580 printf("UCE ");
581 if (p->md_attr & EFI_MD_ATTR_WP)
582 printf("WP ");
583 if (p->md_attr & EFI_MD_ATTR_RP)
584 printf("RP ");
585 if (p->md_attr & EFI_MD_ATTR_XP)
586 printf("XP ");
587 if (p->md_attr & EFI_MD_ATTR_NV)
588 printf("NV ");
589 if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
590 printf("MORE_RELIABLE ");
591 if (p->md_attr & EFI_MD_ATTR_RO)
592 printf("RO ");
593 if (p->md_attr & EFI_MD_ATTR_RT)
594 printf("RUNTIME");
595 printf("\n");
596 }
597
598 static void
print_efi_map_entries(struct efi_map_header * efihdr)599 print_efi_map_entries(struct efi_map_header *efihdr)
600 {
601
602 printf("%23s %12s %12s %8s %4s\n",
603 "Type", "Physical", "Virtual", "#Pages", "Attr");
604 foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
605 }
606
607 /*
608 * Map the passed in VA in EFI space to a void * using the efi memory table to
609 * find the PA and return it in the DMAP, if it exists. We're used between the
610 * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
611 * tables We assume that either the entry you are mapping fits within its page,
612 * or if it spills to the next page, that's contiguous in PA and in the DMAP.
613 * All observed tables obey the first part of this precondition.
614 */
615 struct early_map_data
616 {
617 vm_offset_t va;
618 vm_offset_t pa;
619 };
620
621 static void
efi_early_map_entry(struct efi_md * p,void * argp)622 efi_early_map_entry(struct efi_md *p, void *argp)
623 {
624 struct early_map_data *emdp = argp;
625 vm_offset_t s, e;
626
627 if (emdp->pa != 0)
628 return;
629 if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
630 return;
631 s = p->md_virt;
632 e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
633 if (emdp->va < s || emdp->va >= e)
634 return;
635 emdp->pa = p->md_phys + (emdp->va - p->md_virt);
636 }
637
638 static void *
efi_early_map(vm_offset_t va)639 efi_early_map(vm_offset_t va)
640 {
641 struct early_map_data emd = { .va = va };
642
643 foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
644 if (emd.pa == 0)
645 return NULL;
646 return (void *)PHYS_TO_DMAP(emd.pa);
647 }
648
649
650 /*
651 * When booted via kboot, the prior kernel will pass in reserved memory areas in
652 * a EFI config table. We need to find that table and walk through it excluding
653 * the memory ranges in it. btw, this is called too early for the printf to do
654 * anything since msgbufp isn't initialized, let alone a console...
655 */
656 static void
exclude_efi_memreserve(vm_offset_t efi_systbl_phys)657 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
658 {
659 struct efi_systbl *systbl;
660 struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
661
662 systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
663 if (systbl == NULL) {
664 printf("can't map systbl\n");
665 return;
666 }
667 if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
668 printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
669 return;
670 }
671
672 /*
673 * We don't yet have the pmap system booted enough to create a pmap for
674 * the efi firmware's preferred address space from the GetMemoryMap()
675 * table. The st_cfgtbl is a VA in this space, so we need to do the
676 * mapping ourselves to a kernel VA with efi_early_map. We assume that
677 * the cfgtbl entries don't span a page. Other pointers are PAs, as
678 * noted below.
679 */
680 if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */
681 return;
682 for (int i = 0; i < systbl->st_entries; i++) {
683 struct efi_cfgtbl *cfgtbl;
684 struct linux_efi_memreserve *mr;
685
686 cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
687 if (cfgtbl == NULL)
688 panic("Can't map the config table entry %d\n", i);
689 if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
690 continue;
691
692 /*
693 * cfgtbl points are either VA or PA, depending on the GUID of
694 * the table. memreserve GUID pointers are PA and not converted
695 * after a SetVirtualAddressMap(). The list's mr_next pointer
696 * is also a PA.
697 */
698 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
699 (vm_offset_t)cfgtbl->ct_data);
700 while (true) {
701 for (int j = 0; j < mr->mr_count; j++) {
702 struct linux_efi_memreserve_entry *mre;
703
704 mre = &mr->mr_entry[j];
705 physmem_exclude_region(mre->mre_base, mre->mre_size,
706 EXFLAG_NODUMP | EXFLAG_NOALLOC);
707 }
708 if (mr->mr_next == 0)
709 break;
710 mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
711 };
712 }
713
714 }
715
716 #ifdef FDT
717 static void
try_load_dtb(caddr_t kmdp)718 try_load_dtb(caddr_t kmdp)
719 {
720 vm_offset_t dtbp;
721
722 dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
723 #if defined(FDT_DTB_STATIC)
724 /*
725 * In case the device tree blob was not retrieved (from metadata) try
726 * to use the statically embedded one.
727 */
728 if (dtbp == 0)
729 dtbp = (vm_offset_t)&fdt_static_dtb;
730 #endif
731
732 if (dtbp == (vm_offset_t)NULL) {
733 #ifndef TSLOG
734 printf("ERROR loading DTB\n");
735 #endif
736 return;
737 }
738
739 if (OF_install(OFW_FDT, 0) == FALSE)
740 panic("Cannot install FDT");
741
742 if (OF_init((void *)dtbp) != 0)
743 panic("OF_init failed with the found device tree");
744
745 parse_fdt_bootargs();
746 }
747 #endif
748
749 static bool
bus_probe(void)750 bus_probe(void)
751 {
752 bool has_acpi, has_fdt;
753 char *order, *env;
754
755 has_acpi = has_fdt = false;
756
757 #ifdef FDT
758 has_fdt = (OF_peer(0) != 0);
759 #endif
760 #ifdef DEV_ACPI
761 has_acpi = (AcpiOsGetRootPointer() != 0);
762 #endif
763
764 env = kern_getenv("kern.cfg.order");
765 if (env != NULL) {
766 order = env;
767 while (order != NULL) {
768 if (has_acpi &&
769 strncmp(order, "acpi", 4) == 0 &&
770 (order[4] == ',' || order[4] == '\0')) {
771 arm64_bus_method = ARM64_BUS_ACPI;
772 break;
773 }
774 if (has_fdt &&
775 strncmp(order, "fdt", 3) == 0 &&
776 (order[3] == ',' || order[3] == '\0')) {
777 arm64_bus_method = ARM64_BUS_FDT;
778 break;
779 }
780 order = strchr(order, ',');
781 if (order != NULL)
782 order++; /* Skip comma */
783 }
784 freeenv(env);
785
786 /* If we set the bus method it is valid */
787 if (arm64_bus_method != ARM64_BUS_NONE)
788 return (true);
789 }
790 /* If no order or an invalid order was set use the default */
791 if (arm64_bus_method == ARM64_BUS_NONE) {
792 if (has_fdt)
793 arm64_bus_method = ARM64_BUS_FDT;
794 else if (has_acpi)
795 arm64_bus_method = ARM64_BUS_ACPI;
796 }
797
798 /*
799 * If no option was set the default is valid, otherwise we are
800 * setting one to get cninit() working, then calling panic to tell
801 * the user about the invalid bus setup.
802 */
803 return (env == NULL);
804 }
805
806 static void
cache_setup(void)807 cache_setup(void)
808 {
809 int dczva_line_shift;
810 uint32_t dczid_el0;
811
812 identify_cache(READ_SPECIALREG(ctr_el0));
813
814 dczid_el0 = READ_SPECIALREG(dczid_el0);
815
816 /* Check if dc zva is not prohibited */
817 if (dczid_el0 & DCZID_DZP)
818 dczva_line_size = 0;
819 else {
820 /* Same as with above calculations */
821 dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
822 dczva_line_size = sizeof(int) << dczva_line_shift;
823
824 /* Change pagezero function */
825 pagezero = pagezero_cache;
826 }
827 }
828
829 int
memory_mapping_mode(vm_paddr_t pa)830 memory_mapping_mode(vm_paddr_t pa)
831 {
832 struct efi_md *map, *p;
833 size_t efisz;
834 int ndesc, i;
835
836 if (efihdr == NULL)
837 return (VM_MEMATTR_WRITE_BACK);
838
839 /*
840 * Memory map data provided by UEFI via the GetMemoryMap
841 * Boot Services API.
842 */
843 efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
844 map = (struct efi_md *)((uint8_t *)efihdr + efisz);
845
846 if (efihdr->descriptor_size == 0)
847 return (VM_MEMATTR_WRITE_BACK);
848 ndesc = efihdr->memory_size / efihdr->descriptor_size;
849
850 for (i = 0, p = map; i < ndesc; i++,
851 p = efi_next_descriptor(p, efihdr->descriptor_size)) {
852 if (pa < p->md_phys ||
853 pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
854 continue;
855 if (p->md_type == EFI_MD_TYPE_IOMEM ||
856 p->md_type == EFI_MD_TYPE_IOPORT)
857 return (VM_MEMATTR_DEVICE);
858 else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
859 p->md_type == EFI_MD_TYPE_RECLAIM)
860 return (VM_MEMATTR_WRITE_BACK);
861 else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
862 return (VM_MEMATTR_WRITE_THROUGH);
863 else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
864 return (VM_MEMATTR_WRITE_COMBINING);
865 break;
866 }
867
868 return (VM_MEMATTR_DEVICE);
869 }
870
871 void
initarm(struct arm64_bootparams * abp)872 initarm(struct arm64_bootparams *abp)
873 {
874 struct efi_fb *efifb;
875 struct pcpu *pcpup;
876 char *env;
877 #ifdef FDT
878 struct mem_region mem_regions[FDT_MEM_REGIONS];
879 int mem_regions_sz;
880 phandle_t root;
881 char dts_version[255];
882 #endif
883 vm_offset_t lastaddr;
884 caddr_t kmdp;
885 bool valid;
886
887 TSRAW(&thread0, TS_ENTER, __func__, NULL);
888
889 boot_el = abp->boot_el;
890 hcr_el2 = abp->hcr_el2;
891
892 /* Parse loader or FDT boot parameters. Determine last used address. */
893 lastaddr = parse_boot_param(abp);
894
895 /* Find the kernel address */
896 kmdp = preload_search_by_type("elf kernel");
897 if (kmdp == NULL)
898 kmdp = preload_search_by_type("elf64 kernel");
899
900 identify_cpu(0);
901 identify_hypervisor_smbios();
902
903 update_special_regs(0);
904
905 link_elf_ireloc(kmdp);
906 #ifdef FDT
907 try_load_dtb(kmdp);
908 #endif
909
910 efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
911
912 /* Load the physical memory ranges */
913 efihdr = (struct efi_map_header *)preload_search_info(kmdp,
914 MODINFO_METADATA | MODINFOMD_EFI_MAP);
915 if (efihdr != NULL)
916 add_efi_map_entries(efihdr);
917 #ifdef FDT
918 else {
919 /* Grab physical memory regions information from device tree. */
920 if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
921 NULL) != 0)
922 panic("Cannot get physical memory regions");
923 physmem_hardware_regions(mem_regions, mem_regions_sz);
924 }
925 if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
926 physmem_exclude_regions(mem_regions, mem_regions_sz,
927 EXFLAG_NODUMP | EXFLAG_NOALLOC);
928 #endif
929
930 /* Exclude the EFI framebuffer from our view of physical memory. */
931 efifb = (struct efi_fb *)preload_search_info(kmdp,
932 MODINFO_METADATA | MODINFOMD_EFI_FB);
933 if (efifb != NULL)
934 physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
935 EXFLAG_NOALLOC);
936
937 /* Set the pcpu data, this is needed by pmap_bootstrap */
938 pcpup = &pcpu0;
939 pcpu_init(pcpup, 0, sizeof(struct pcpu));
940
941 /*
942 * Set the pcpu pointer with a backup in tpidr_el1 to be
943 * loaded when entering the kernel from userland.
944 */
945 __asm __volatile(
946 "mov x18, %0 \n"
947 "msr tpidr_el1, %0" :: "r"(pcpup));
948
949 /* locore.S sets sp_el0 to &thread0 so no need to set it here. */
950 PCPU_SET(curthread, &thread0);
951 PCPU_SET(midr, get_midr());
952
953 /* Do basic tuning, hz etc */
954 init_param1();
955
956 cache_setup();
957 pan_setup();
958
959 /* Bootstrap enough of pmap to enter the kernel proper */
960 pmap_bootstrap(lastaddr - KERNBASE);
961 /* Exclude entries needed in the DMAP region, but not phys_avail */
962 if (efihdr != NULL)
963 exclude_efi_map_entries(efihdr);
964 /* Do the same for reserve entries in the EFI MEMRESERVE table */
965 if (efi_systbl_phys != 0)
966 exclude_efi_memreserve(efi_systbl_phys);
967
968 /*
969 * We carefully bootstrap the sanitizer map after we've excluded
970 * absolutely everything else that could impact phys_avail. There's not
971 * always enough room for the initial shadow map after the kernel, so
972 * we'll end up searching for segments that we can safely use. Those
973 * segments also get excluded from phys_avail.
974 */
975 #if defined(KASAN)
976 pmap_bootstrap_san();
977 #endif
978
979 physmem_init_kernel_globals();
980
981 devmap_bootstrap(0, NULL);
982
983 valid = bus_probe();
984
985 cninit();
986 set_ttbr0(abp->kern_ttbr0);
987 cpu_tlb_flushID();
988
989 if (!valid)
990 panic("Invalid bus configuration: %s",
991 kern_getenv("kern.cfg.order"));
992
993 /*
994 * Check if pointer authentication is available on this system, and
995 * if so enable its use. This needs to be called before init_proc0
996 * as that will configure the thread0 pointer authentication keys.
997 */
998 ptrauth_init();
999
1000 /*
1001 * Dump the boot metadata. We have to wait for cninit() since console
1002 * output is required. If it's grossly incorrect the kernel will never
1003 * make it this far.
1004 */
1005 if (getenv_is_true("debug.dump_modinfo_at_boot"))
1006 preload_dump();
1007
1008 init_proc0(abp->kern_stack);
1009 msgbufinit(msgbufp, msgbufsize);
1010 mutex_init();
1011 init_param2(physmem);
1012
1013 dbg_init();
1014 kdb_init();
1015 #ifdef KDB
1016 if ((boothowto & RB_KDB) != 0)
1017 kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1018 #endif
1019 pan_enable();
1020
1021 kcsan_cpu_init(0);
1022 kasan_init();
1023
1024 env = kern_getenv("kernelname");
1025 if (env != NULL)
1026 strlcpy(kernelname, env, sizeof(kernelname));
1027
1028 #ifdef FDT
1029 if (arm64_bus_method == ARM64_BUS_FDT) {
1030 root = OF_finddevice("/");
1031 if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1032 if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1033 printf("WARNING: DTB version is %s while kernel expects %s, "
1034 "please update the DTB in the ESP\n",
1035 dts_version,
1036 LINUX_DTS_VERSION);
1037 } else {
1038 printf("WARNING: Cannot find freebsd,dts-version property, "
1039 "cannot check DTB compliance\n");
1040 }
1041 }
1042 #endif
1043
1044 if (boothowto & RB_VERBOSE) {
1045 if (efihdr != NULL)
1046 print_efi_map_entries(efihdr);
1047 physmem_print_tables();
1048 }
1049
1050 early_boot = 0;
1051
1052 if (bootverbose && kstack_pages != KSTACK_PAGES)
1053 printf("kern.kstack_pages = %d ignored for thread0\n",
1054 kstack_pages);
1055
1056 TSEXIT();
1057 }
1058
1059 void
dbg_init(void)1060 dbg_init(void)
1061 {
1062
1063 /* Clear OS lock */
1064 WRITE_SPECIALREG(oslar_el1, 0);
1065
1066 /* This permits DDB to use debug registers for watchpoints. */
1067 dbg_monitor_init();
1068
1069 /* TODO: Eventually will need to initialize debug registers here. */
1070 }
1071
1072 #ifdef DDB
1073 #include <ddb/ddb.h>
1074
DB_SHOW_COMMAND(specialregs,db_show_spregs)1075 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1076 {
1077 #define PRINT_REG(reg) \
1078 db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1079
1080 PRINT_REG(actlr_el1);
1081 PRINT_REG(afsr0_el1);
1082 PRINT_REG(afsr1_el1);
1083 PRINT_REG(aidr_el1);
1084 PRINT_REG(amair_el1);
1085 PRINT_REG(ccsidr_el1);
1086 PRINT_REG(clidr_el1);
1087 PRINT_REG(contextidr_el1);
1088 PRINT_REG(cpacr_el1);
1089 PRINT_REG(csselr_el1);
1090 PRINT_REG(ctr_el0);
1091 PRINT_REG(currentel);
1092 PRINT_REG(daif);
1093 PRINT_REG(dczid_el0);
1094 PRINT_REG(elr_el1);
1095 PRINT_REG(esr_el1);
1096 PRINT_REG(far_el1);
1097 #if 0
1098 /* ARM64TODO: Enable VFP before reading floating-point registers */
1099 PRINT_REG(fpcr);
1100 PRINT_REG(fpsr);
1101 #endif
1102 PRINT_REG(id_aa64afr0_el1);
1103 PRINT_REG(id_aa64afr1_el1);
1104 PRINT_REG(id_aa64dfr0_el1);
1105 PRINT_REG(id_aa64dfr1_el1);
1106 PRINT_REG(id_aa64isar0_el1);
1107 PRINT_REG(id_aa64isar1_el1);
1108 PRINT_REG(id_aa64pfr0_el1);
1109 PRINT_REG(id_aa64pfr1_el1);
1110 PRINT_REG(id_afr0_el1);
1111 PRINT_REG(id_dfr0_el1);
1112 PRINT_REG(id_isar0_el1);
1113 PRINT_REG(id_isar1_el1);
1114 PRINT_REG(id_isar2_el1);
1115 PRINT_REG(id_isar3_el1);
1116 PRINT_REG(id_isar4_el1);
1117 PRINT_REG(id_isar5_el1);
1118 PRINT_REG(id_mmfr0_el1);
1119 PRINT_REG(id_mmfr1_el1);
1120 PRINT_REG(id_mmfr2_el1);
1121 PRINT_REG(id_mmfr3_el1);
1122 #if 0
1123 /* Missing from llvm */
1124 PRINT_REG(id_mmfr4_el1);
1125 #endif
1126 PRINT_REG(id_pfr0_el1);
1127 PRINT_REG(id_pfr1_el1);
1128 PRINT_REG(isr_el1);
1129 PRINT_REG(mair_el1);
1130 PRINT_REG(midr_el1);
1131 PRINT_REG(mpidr_el1);
1132 PRINT_REG(mvfr0_el1);
1133 PRINT_REG(mvfr1_el1);
1134 PRINT_REG(mvfr2_el1);
1135 PRINT_REG(revidr_el1);
1136 PRINT_REG(sctlr_el1);
1137 PRINT_REG(sp_el0);
1138 PRINT_REG(spsel);
1139 PRINT_REG(spsr_el1);
1140 PRINT_REG(tcr_el1);
1141 PRINT_REG(tpidr_el0);
1142 PRINT_REG(tpidr_el1);
1143 PRINT_REG(tpidrro_el0);
1144 PRINT_REG(ttbr0_el1);
1145 PRINT_REG(ttbr1_el1);
1146 PRINT_REG(vbar_el1);
1147 #undef PRINT_REG
1148 }
1149
DB_SHOW_COMMAND(vtop,db_show_vtop)1150 DB_SHOW_COMMAND(vtop, db_show_vtop)
1151 {
1152 uint64_t phys;
1153
1154 if (have_addr) {
1155 phys = arm64_address_translate_s1e1r(addr);
1156 db_printf("EL1 physical address reg (read): 0x%016lx\n", phys);
1157 phys = arm64_address_translate_s1e1w(addr);
1158 db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1159 phys = arm64_address_translate_s1e0r(addr);
1160 db_printf("EL0 physical address reg (read): 0x%016lx\n", phys);
1161 phys = arm64_address_translate_s1e0w(addr);
1162 db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1163 } else
1164 db_printf("show vtop <virt_addr>\n");
1165 }
1166 #endif
1167