xref: /freebsd-14.2/sys/arm64/arm64/machdep.c (revision c22bc90d)
1 /*-
2  * Copyright (c) 2014 Andrew Turner
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32 
33 #include <sys/cdefs.h>
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/asan.h>
37 #include <sys/buf.h>
38 #include <sys/bus.h>
39 #include <sys/cons.h>
40 #include <sys/cpu.h>
41 #include <sys/csan.h>
42 #include <sys/devmap.h>
43 #include <sys/efi.h>
44 #include <sys/exec.h>
45 #include <sys/imgact.h>
46 #include <sys/kdb.h>
47 #include <sys/kernel.h>
48 #include <sys/ktr.h>
49 #include <sys/limits.h>
50 #include <sys/linker.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77 
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/debug_monitor.h>
81 #include <machine/hypervisor.h>
82 #include <machine/kdb.h>
83 #include <machine/machdep.h>
84 #include <machine/metadata.h>
85 #include <machine/md_var.h>
86 #include <machine/pcb.h>
87 #include <machine/undefined.h>
88 #include <machine/vmparam.h>
89 
90 #ifdef VFP
91 #include <machine/vfp.h>
92 #endif
93 
94 #ifdef DEV_ACPI
95 #include <contrib/dev/acpica/include/acpi.h>
96 #include <machine/acpica_machdep.h>
97 #endif
98 
99 #ifdef FDT
100 #include <dev/fdt/fdt_common.h>
101 #include <dev/ofw/openfirm.h>
102 #endif
103 
104 #include <dev/smbios/smbios.h>
105 
106 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
107 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
108     "pcb_fpusaved changed offset");
109 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
110     "pcb_fpustate changed offset");
111 
112 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
113 
114 /*
115  * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
116  * could relocate this, but will need to keep the same virtual address as
117  * it's reverenced by the EARLY_COUNTER macro.
118  */
119 struct pcpu pcpu0;
120 
121 #if defined(PERTHREAD_SSP)
122 /*
123  * The boot SSP canary. Will be replaced with a per-thread canary when
124  * scheduling has started.
125  */
126 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
127 #endif
128 
129 static struct trapframe proc0_tf;
130 
131 int early_boot = 1;
132 int cold = 1;
133 static int boot_el;
134 static uint64_t hcr_el2;
135 
136 struct kva_md_info kmi;
137 
138 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
139 int has_pan;
140 
141 /*
142  * Physical address of the EFI System Table. Stashed from the metadata hints
143  * passed into the kernel and used by the EFI code to call runtime services.
144  */
145 vm_paddr_t efi_systbl_phys;
146 static struct efi_map_header *efihdr;
147 
148 /* pagezero_* implementations are provided in support.S */
149 void pagezero_simple(void *);
150 void pagezero_cache(void *);
151 
152 /* pagezero_simple is default pagezero */
153 void (*pagezero)(void *p) = pagezero_simple;
154 
155 int (*apei_nmi)(void);
156 
157 #if defined(PERTHREAD_SSP_WARNING)
158 static void
print_ssp_warning(void * data __unused)159 print_ssp_warning(void *data __unused)
160 {
161 	printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
162 }
163 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
164 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
165 #endif
166 
167 static void
pan_setup(void)168 pan_setup(void)
169 {
170 	uint64_t id_aa64mfr1;
171 
172 	id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
173 	if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
174 		has_pan = 1;
175 }
176 
177 void
pan_enable(void)178 pan_enable(void)
179 {
180 
181 	/*
182 	 * The LLVM integrated assembler doesn't understand the PAN
183 	 * PSTATE field. Because of this we need to manually create
184 	 * the instruction in an asm block. This is equivalent to:
185 	 * msr pan, #1
186 	 *
187 	 * This sets the PAN bit, stopping the kernel from accessing
188 	 * memory when userspace can also access it unless the kernel
189 	 * uses the userspace load/store instructions.
190 	 */
191 	if (has_pan) {
192 		WRITE_SPECIALREG(sctlr_el1,
193 		    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
194 		__asm __volatile(".inst 0xd500409f | (0x1 << 8)");
195 	}
196 }
197 
198 bool
has_hyp(void)199 has_hyp(void)
200 {
201 
202 	/*
203 	 * XXX The E2H check is wrong, but it's close enough for now.  Needs to
204 	 * be re-evaluated once we're running regularly in EL2.
205 	 */
206 	return (boot_el == 2 && (hcr_el2 & HCR_E2H) == 0);
207 }
208 
209 static void
cpu_startup(void * dummy)210 cpu_startup(void *dummy)
211 {
212 	vm_paddr_t size;
213 	int i;
214 
215 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
216 	    ptoa((uintmax_t)realmem) / 1024 / 1024);
217 
218 	if (bootverbose) {
219 		printf("Physical memory chunk(s):\n");
220 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
221 			size = phys_avail[i + 1] - phys_avail[i];
222 			printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
223 			    (uintmax_t)phys_avail[i],
224 			    (uintmax_t)phys_avail[i + 1] - 1,
225 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
226 		}
227 	}
228 
229 	printf("avail memory = %ju (%ju MB)\n",
230 	    ptoa((uintmax_t)vm_free_count()),
231 	    ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
232 
233 	undef_init();
234 	install_cpu_errata();
235 
236 	vm_ksubmap_init(&kmi);
237 	bufinit();
238 	vm_pager_bufferinit();
239 }
240 
241 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
242 
243 static void
late_ifunc_resolve(void * dummy __unused)244 late_ifunc_resolve(void *dummy __unused)
245 {
246 	link_elf_late_ireloc();
247 }
248 SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
249 
250 int
cpu_idle_wakeup(int cpu)251 cpu_idle_wakeup(int cpu)
252 {
253 
254 	return (0);
255 }
256 
257 void
cpu_idle(int busy)258 cpu_idle(int busy)
259 {
260 
261 	spinlock_enter();
262 	if (!busy)
263 		cpu_idleclock();
264 	if (!sched_runnable())
265 		__asm __volatile(
266 		    "dsb sy \n"
267 		    "wfi    \n");
268 	if (!busy)
269 		cpu_activeclock();
270 	spinlock_exit();
271 }
272 
273 void
cpu_halt(void)274 cpu_halt(void)
275 {
276 
277 	/* We should have shutdown by now, if not enter a low power sleep */
278 	intr_disable();
279 	while (1) {
280 		__asm __volatile("wfi");
281 	}
282 }
283 
284 /*
285  * Flush the D-cache for non-DMA I/O so that the I-cache can
286  * be made coherent later.
287  */
288 void
cpu_flush_dcache(void * ptr,size_t len)289 cpu_flush_dcache(void *ptr, size_t len)
290 {
291 
292 	/* ARM64TODO TBD */
293 }
294 
295 /* Get current clock frequency for the given CPU ID. */
296 int
cpu_est_clockrate(int cpu_id,uint64_t * rate)297 cpu_est_clockrate(int cpu_id, uint64_t *rate)
298 {
299 	struct pcpu *pc;
300 
301 	pc = pcpu_find(cpu_id);
302 	if (pc == NULL || rate == NULL)
303 		return (EINVAL);
304 
305 	if (pc->pc_clock == 0)
306 		return (EOPNOTSUPP);
307 
308 	*rate = pc->pc_clock;
309 	return (0);
310 }
311 
312 void
cpu_pcpu_init(struct pcpu * pcpu,int cpuid,size_t size)313 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
314 {
315 
316 	pcpu->pc_acpi_id = 0xffffffff;
317 	pcpu->pc_mpidr = UINT64_MAX;
318 }
319 
320 void
spinlock_enter(void)321 spinlock_enter(void)
322 {
323 	struct thread *td;
324 	register_t daif;
325 
326 	td = curthread;
327 	if (td->td_md.md_spinlock_count == 0) {
328 		daif = intr_disable();
329 		td->td_md.md_spinlock_count = 1;
330 		td->td_md.md_saved_daif = daif;
331 		critical_enter();
332 	} else
333 		td->td_md.md_spinlock_count++;
334 }
335 
336 void
spinlock_exit(void)337 spinlock_exit(void)
338 {
339 	struct thread *td;
340 	register_t daif;
341 
342 	td = curthread;
343 	daif = td->td_md.md_saved_daif;
344 	td->td_md.md_spinlock_count--;
345 	if (td->td_md.md_spinlock_count == 0) {
346 		critical_exit();
347 		intr_restore(daif);
348 	}
349 }
350 
351 /*
352  * Construct a PCB from a trapframe. This is called from kdb_trap() where
353  * we want to start a backtrace from the function that caused us to enter
354  * the debugger. We have the context in the trapframe, but base the trace
355  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
356  * enough for a backtrace.
357  */
358 void
makectx(struct trapframe * tf,struct pcb * pcb)359 makectx(struct trapframe *tf, struct pcb *pcb)
360 {
361 	int i;
362 
363 	/* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
364 	for (i = 0; i < nitems(pcb->pcb_x); i++) {
365 		if (i == PCB_LR)
366 			pcb->pcb_x[i] = tf->tf_elr;
367 		else
368 			pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
369 	}
370 
371 	pcb->pcb_sp = tf->tf_sp;
372 }
373 
374 static void
init_proc0(vm_offset_t kstack)375 init_proc0(vm_offset_t kstack)
376 {
377 	struct pcpu *pcpup;
378 
379 	pcpup = cpuid_to_pcpu[0];
380 	MPASS(pcpup != NULL);
381 
382 	proc_linkup0(&proc0, &thread0);
383 	thread0.td_kstack = kstack;
384 	thread0.td_kstack_pages = KSTACK_PAGES;
385 #if defined(PERTHREAD_SSP)
386 	thread0.td_md.md_canary = boot_canary;
387 #endif
388 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
389 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
390 	thread0.td_pcb->pcb_flags = 0;
391 	thread0.td_pcb->pcb_fpflags = 0;
392 	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
393 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
394 	thread0.td_frame = &proc0_tf;
395 	ptrauth_thread0(&thread0);
396 	pcpup->pc_curpcb = thread0.td_pcb;
397 
398 	/*
399 	 * Unmask SError exceptions. They are used to signal a RAS failure,
400 	 * or other hardware error.
401 	 */
402 	serror_enable();
403 }
404 
405 /*
406  * Get an address to be used to write to kernel data that may be mapped
407  * read-only, e.g. to patch kernel code.
408  */
409 bool
arm64_get_writable_addr(vm_offset_t addr,vm_offset_t * out)410 arm64_get_writable_addr(vm_offset_t addr, vm_offset_t *out)
411 {
412 	vm_paddr_t pa;
413 
414 	/* Check if the page is writable */
415 	if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
416 		*out = addr;
417 		return (true);
418 	}
419 
420 	/*
421 	 * Find the physical address of the given page.
422 	 */
423 	if (!pmap_klookup(addr, &pa)) {
424 		return (false);
425 	}
426 
427 	/*
428 	 * If it is within the DMAP region and is writable use that.
429 	 */
430 	if (PHYS_IN_DMAP_RANGE(pa)) {
431 		addr = PHYS_TO_DMAP(pa);
432 		if (PAR_SUCCESS(arm64_address_translate_s1e1w(addr))) {
433 			*out = addr;
434 			return (true);
435 		}
436 	}
437 
438 	return (false);
439 }
440 
441 typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
442 
443 static void
foreach_efi_map_entry(struct efi_map_header * efihdr,efi_map_entry_cb cb,void * argp)444 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
445 {
446 	struct efi_md *map, *p;
447 	size_t efisz;
448 	int ndesc, i;
449 
450 	/*
451 	 * Memory map data provided by UEFI via the GetMemoryMap
452 	 * Boot Services API.
453 	 */
454 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
455 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
456 
457 	if (efihdr->descriptor_size == 0)
458 		return;
459 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
460 
461 	for (i = 0, p = map; i < ndesc; i++,
462 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
463 		cb(p, argp);
464 	}
465 }
466 
467 /*
468  * Handle the EFI memory map list.
469  *
470  * We will make two passes at this, the first (exclude == false) to populate
471  * physmem with valid physical memory ranges from recognized map entry types.
472  * In the second pass we will exclude memory ranges from physmem which must not
473  * be used for general allocations, either because they are used by runtime
474  * firmware or otherwise reserved.
475  *
476  * Adding the runtime-reserved memory ranges to physmem and excluding them
477  * later ensures that they are included in the DMAP, but excluded from
478  * phys_avail[].
479  *
480  * Entry types not explicitly listed here are ignored and not mapped.
481  */
482 static void
handle_efi_map_entry(struct efi_md * p,void * argp)483 handle_efi_map_entry(struct efi_md *p, void *argp)
484 {
485 	bool exclude = *(bool *)argp;
486 
487 	switch (p->md_type) {
488 	case EFI_MD_TYPE_RECLAIM:
489 		/*
490 		 * The recomended location for ACPI tables. Map into the
491 		 * DMAP so we can access them from userspace via /dev/mem.
492 		 */
493 	case EFI_MD_TYPE_RT_CODE:
494 		/*
495 		 * Some UEFI implementations put the system table in the
496 		 * runtime code section. Include it in the DMAP, but will
497 		 * be excluded from phys_avail.
498 		 */
499 	case EFI_MD_TYPE_RT_DATA:
500 		/*
501 		 * Runtime data will be excluded after the DMAP
502 		 * region is created to stop it from being added
503 		 * to phys_avail.
504 		 */
505 		if (exclude) {
506 			physmem_exclude_region(p->md_phys,
507 			    p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
508 			break;
509 		}
510 		/* FALLTHROUGH */
511 	case EFI_MD_TYPE_CODE:
512 	case EFI_MD_TYPE_DATA:
513 	case EFI_MD_TYPE_BS_CODE:
514 	case EFI_MD_TYPE_BS_DATA:
515 	case EFI_MD_TYPE_FREE:
516 		/*
517 		 * We're allowed to use any entry with these types.
518 		 */
519 		if (!exclude)
520 			physmem_hardware_region(p->md_phys,
521 			    p->md_pages * EFI_PAGE_SIZE);
522 		break;
523 	default:
524 		/* Other types shall not be handled by physmem. */
525 		break;
526 	}
527 }
528 
529 static void
add_efi_map_entries(struct efi_map_header * efihdr)530 add_efi_map_entries(struct efi_map_header *efihdr)
531 {
532 	bool exclude = false;
533 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
534 }
535 
536 static void
exclude_efi_map_entries(struct efi_map_header * efihdr)537 exclude_efi_map_entries(struct efi_map_header *efihdr)
538 {
539 	bool exclude = true;
540 	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
541 }
542 
543 static void
print_efi_map_entry(struct efi_md * p,void * argp __unused)544 print_efi_map_entry(struct efi_md *p, void *argp __unused)
545 {
546 	const char *type;
547 	static const char *types[] = {
548 		"Reserved",
549 		"LoaderCode",
550 		"LoaderData",
551 		"BootServicesCode",
552 		"BootServicesData",
553 		"RuntimeServicesCode",
554 		"RuntimeServicesData",
555 		"ConventionalMemory",
556 		"UnusableMemory",
557 		"ACPIReclaimMemory",
558 		"ACPIMemoryNVS",
559 		"MemoryMappedIO",
560 		"MemoryMappedIOPortSpace",
561 		"PalCode",
562 		"PersistentMemory"
563 	};
564 
565 	if (p->md_type < nitems(types))
566 		type = types[p->md_type];
567 	else
568 		type = "<INVALID>";
569 	printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
570 	    p->md_virt, p->md_pages);
571 	if (p->md_attr & EFI_MD_ATTR_UC)
572 		printf("UC ");
573 	if (p->md_attr & EFI_MD_ATTR_WC)
574 		printf("WC ");
575 	if (p->md_attr & EFI_MD_ATTR_WT)
576 		printf("WT ");
577 	if (p->md_attr & EFI_MD_ATTR_WB)
578 		printf("WB ");
579 	if (p->md_attr & EFI_MD_ATTR_UCE)
580 		printf("UCE ");
581 	if (p->md_attr & EFI_MD_ATTR_WP)
582 		printf("WP ");
583 	if (p->md_attr & EFI_MD_ATTR_RP)
584 		printf("RP ");
585 	if (p->md_attr & EFI_MD_ATTR_XP)
586 		printf("XP ");
587 	if (p->md_attr & EFI_MD_ATTR_NV)
588 		printf("NV ");
589 	if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
590 		printf("MORE_RELIABLE ");
591 	if (p->md_attr & EFI_MD_ATTR_RO)
592 		printf("RO ");
593 	if (p->md_attr & EFI_MD_ATTR_RT)
594 		printf("RUNTIME");
595 	printf("\n");
596 }
597 
598 static void
print_efi_map_entries(struct efi_map_header * efihdr)599 print_efi_map_entries(struct efi_map_header *efihdr)
600 {
601 
602 	printf("%23s %12s %12s %8s %4s\n",
603 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
604 	foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
605 }
606 
607 /*
608  * Map the passed in VA in EFI space to a void * using the efi memory table to
609  * find the PA and return it in the DMAP, if it exists. We're used between the
610  * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
611  * tables We assume that either the entry you are mapping fits within its page,
612  * or if it spills to the next page, that's contiguous in PA and in the DMAP.
613  * All observed tables obey the first part of this precondition.
614  */
615 struct early_map_data
616 {
617 	vm_offset_t va;
618 	vm_offset_t pa;
619 };
620 
621 static void
efi_early_map_entry(struct efi_md * p,void * argp)622 efi_early_map_entry(struct efi_md *p, void *argp)
623 {
624 	struct early_map_data *emdp = argp;
625 	vm_offset_t s, e;
626 
627 	if (emdp->pa != 0)
628 		return;
629 	if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
630 		return;
631 	s = p->md_virt;
632 	e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
633 	if (emdp->va < s  || emdp->va >= e)
634 		return;
635 	emdp->pa = p->md_phys + (emdp->va - p->md_virt);
636 }
637 
638 static void *
efi_early_map(vm_offset_t va)639 efi_early_map(vm_offset_t va)
640 {
641 	struct early_map_data emd = { .va = va };
642 
643 	foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
644 	if (emd.pa == 0)
645 		return NULL;
646 	return (void *)PHYS_TO_DMAP(emd.pa);
647 }
648 
649 
650 /*
651  * When booted via kboot, the prior kernel will pass in reserved memory areas in
652  * a EFI config table. We need to find that table and walk through it excluding
653  * the memory ranges in it. btw, this is called too early for the printf to do
654  * anything since msgbufp isn't initialized, let alone a console...
655  */
656 static void
exclude_efi_memreserve(vm_offset_t efi_systbl_phys)657 exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
658 {
659 	struct efi_systbl *systbl;
660 	struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
661 
662 	systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
663 	if (systbl == NULL) {
664 		printf("can't map systbl\n");
665 		return;
666 	}
667 	if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
668 		printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
669 		return;
670 	}
671 
672 	/*
673 	 * We don't yet have the pmap system booted enough to create a pmap for
674 	 * the efi firmware's preferred address space from the GetMemoryMap()
675 	 * table. The st_cfgtbl is a VA in this space, so we need to do the
676 	 * mapping ourselves to a kernel VA with efi_early_map. We assume that
677 	 * the cfgtbl entries don't span a page. Other pointers are PAs, as
678 	 * noted below.
679 	 */
680 	if (systbl->st_cfgtbl == 0)	/* Failsafe st_entries should == 0 in this case */
681 		return;
682 	for (int i = 0; i < systbl->st_entries; i++) {
683 		struct efi_cfgtbl *cfgtbl;
684 		struct linux_efi_memreserve *mr;
685 
686 		cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
687 		if (cfgtbl == NULL)
688 			panic("Can't map the config table entry %d\n", i);
689 		if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
690 			continue;
691 
692 		/*
693 		 * cfgtbl points are either VA or PA, depending on the GUID of
694 		 * the table. memreserve GUID pointers are PA and not converted
695 		 * after a SetVirtualAddressMap(). The list's mr_next pointer
696 		 * is also a PA.
697 		 */
698 		mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
699 			(vm_offset_t)cfgtbl->ct_data);
700 		while (true) {
701 			for (int j = 0; j < mr->mr_count; j++) {
702 				struct linux_efi_memreserve_entry *mre;
703 
704 				mre = &mr->mr_entry[j];
705 				physmem_exclude_region(mre->mre_base, mre->mre_size,
706 				    EXFLAG_NODUMP | EXFLAG_NOALLOC);
707 			}
708 			if (mr->mr_next == 0)
709 				break;
710 			mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
711 		};
712 	}
713 
714 }
715 
716 #ifdef FDT
717 static void
try_load_dtb(caddr_t kmdp)718 try_load_dtb(caddr_t kmdp)
719 {
720 	vm_offset_t dtbp;
721 
722 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
723 #if defined(FDT_DTB_STATIC)
724 	/*
725 	 * In case the device tree blob was not retrieved (from metadata) try
726 	 * to use the statically embedded one.
727 	 */
728 	if (dtbp == 0)
729 		dtbp = (vm_offset_t)&fdt_static_dtb;
730 #endif
731 
732 	if (dtbp == (vm_offset_t)NULL) {
733 #ifndef TSLOG
734 		printf("ERROR loading DTB\n");
735 #endif
736 		return;
737 	}
738 
739 	if (OF_install(OFW_FDT, 0) == FALSE)
740 		panic("Cannot install FDT");
741 
742 	if (OF_init((void *)dtbp) != 0)
743 		panic("OF_init failed with the found device tree");
744 
745 	parse_fdt_bootargs();
746 }
747 #endif
748 
749 static bool
bus_probe(void)750 bus_probe(void)
751 {
752 	bool has_acpi, has_fdt;
753 	char *order, *env;
754 
755 	has_acpi = has_fdt = false;
756 
757 #ifdef FDT
758 	has_fdt = (OF_peer(0) != 0);
759 #endif
760 #ifdef DEV_ACPI
761 	has_acpi = (AcpiOsGetRootPointer() != 0);
762 #endif
763 
764 	env = kern_getenv("kern.cfg.order");
765 	if (env != NULL) {
766 		order = env;
767 		while (order != NULL) {
768 			if (has_acpi &&
769 			    strncmp(order, "acpi", 4) == 0 &&
770 			    (order[4] == ',' || order[4] == '\0')) {
771 				arm64_bus_method = ARM64_BUS_ACPI;
772 				break;
773 			}
774 			if (has_fdt &&
775 			    strncmp(order, "fdt", 3) == 0 &&
776 			    (order[3] == ',' || order[3] == '\0')) {
777 				arm64_bus_method = ARM64_BUS_FDT;
778 				break;
779 			}
780 			order = strchr(order, ',');
781 			if (order != NULL)
782 				order++;	/* Skip comma */
783 		}
784 		freeenv(env);
785 
786 		/* If we set the bus method it is valid */
787 		if (arm64_bus_method != ARM64_BUS_NONE)
788 			return (true);
789 	}
790 	/* If no order or an invalid order was set use the default */
791 	if (arm64_bus_method == ARM64_BUS_NONE) {
792 		if (has_fdt)
793 			arm64_bus_method = ARM64_BUS_FDT;
794 		else if (has_acpi)
795 			arm64_bus_method = ARM64_BUS_ACPI;
796 	}
797 
798 	/*
799 	 * If no option was set the default is valid, otherwise we are
800 	 * setting one to get cninit() working, then calling panic to tell
801 	 * the user about the invalid bus setup.
802 	 */
803 	return (env == NULL);
804 }
805 
806 static void
cache_setup(void)807 cache_setup(void)
808 {
809 	int dczva_line_shift;
810 	uint32_t dczid_el0;
811 
812 	identify_cache(READ_SPECIALREG(ctr_el0));
813 
814 	dczid_el0 = READ_SPECIALREG(dczid_el0);
815 
816 	/* Check if dc zva is not prohibited */
817 	if (dczid_el0 & DCZID_DZP)
818 		dczva_line_size = 0;
819 	else {
820 		/* Same as with above calculations */
821 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
822 		dczva_line_size = sizeof(int) << dczva_line_shift;
823 
824 		/* Change pagezero function */
825 		pagezero = pagezero_cache;
826 	}
827 }
828 
829 int
memory_mapping_mode(vm_paddr_t pa)830 memory_mapping_mode(vm_paddr_t pa)
831 {
832 	struct efi_md *map, *p;
833 	size_t efisz;
834 	int ndesc, i;
835 
836 	if (efihdr == NULL)
837 		return (VM_MEMATTR_WRITE_BACK);
838 
839 	/*
840 	 * Memory map data provided by UEFI via the GetMemoryMap
841 	 * Boot Services API.
842 	 */
843 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
844 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
845 
846 	if (efihdr->descriptor_size == 0)
847 		return (VM_MEMATTR_WRITE_BACK);
848 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
849 
850 	for (i = 0, p = map; i < ndesc; i++,
851 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
852 		if (pa < p->md_phys ||
853 		    pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
854 			continue;
855 		if (p->md_type == EFI_MD_TYPE_IOMEM ||
856 		    p->md_type == EFI_MD_TYPE_IOPORT)
857 			return (VM_MEMATTR_DEVICE);
858 		else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
859 		    p->md_type == EFI_MD_TYPE_RECLAIM)
860 			return (VM_MEMATTR_WRITE_BACK);
861 		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
862 			return (VM_MEMATTR_WRITE_THROUGH);
863 		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
864 			return (VM_MEMATTR_WRITE_COMBINING);
865 		break;
866 	}
867 
868 	return (VM_MEMATTR_DEVICE);
869 }
870 
871 void
initarm(struct arm64_bootparams * abp)872 initarm(struct arm64_bootparams *abp)
873 {
874 	struct efi_fb *efifb;
875 	struct pcpu *pcpup;
876 	char *env;
877 #ifdef FDT
878 	struct mem_region mem_regions[FDT_MEM_REGIONS];
879 	int mem_regions_sz;
880 	phandle_t root;
881 	char dts_version[255];
882 #endif
883 	vm_offset_t lastaddr;
884 	caddr_t kmdp;
885 	bool valid;
886 
887 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
888 
889 	boot_el = abp->boot_el;
890 	hcr_el2 = abp->hcr_el2;
891 
892 	/* Parse loader or FDT boot parameters. Determine last used address. */
893 	lastaddr = parse_boot_param(abp);
894 
895 	/* Find the kernel address */
896 	kmdp = preload_search_by_type("elf kernel");
897 	if (kmdp == NULL)
898 		kmdp = preload_search_by_type("elf64 kernel");
899 
900 	identify_cpu(0);
901 	identify_hypervisor_smbios();
902 
903 	update_special_regs(0);
904 
905 	link_elf_ireloc(kmdp);
906 #ifdef FDT
907 	try_load_dtb(kmdp);
908 #endif
909 
910 	efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
911 
912 	/* Load the physical memory ranges */
913 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
914 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
915 	if (efihdr != NULL)
916 		add_efi_map_entries(efihdr);
917 #ifdef FDT
918 	else {
919 		/* Grab physical memory regions information from device tree. */
920 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
921 		    NULL) != 0)
922 			panic("Cannot get physical memory regions");
923 		physmem_hardware_regions(mem_regions, mem_regions_sz);
924 	}
925 	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
926 		physmem_exclude_regions(mem_regions, mem_regions_sz,
927 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
928 #endif
929 
930 	/* Exclude the EFI framebuffer from our view of physical memory. */
931 	efifb = (struct efi_fb *)preload_search_info(kmdp,
932 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
933 	if (efifb != NULL)
934 		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
935 		    EXFLAG_NOALLOC);
936 
937 	/* Set the pcpu data, this is needed by pmap_bootstrap */
938 	pcpup = &pcpu0;
939 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
940 
941 	/*
942 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
943 	 * loaded when entering the kernel from userland.
944 	 */
945 	__asm __volatile(
946 	    "mov x18, %0 \n"
947 	    "msr tpidr_el1, %0" :: "r"(pcpup));
948 
949 	/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
950 	PCPU_SET(curthread, &thread0);
951 	PCPU_SET(midr, get_midr());
952 
953 	/* Do basic tuning, hz etc */
954 	init_param1();
955 
956 	cache_setup();
957 	pan_setup();
958 
959 	/* Bootstrap enough of pmap  to enter the kernel proper */
960 	pmap_bootstrap(lastaddr - KERNBASE);
961 	/* Exclude entries needed in the DMAP region, but not phys_avail */
962 	if (efihdr != NULL)
963 		exclude_efi_map_entries(efihdr);
964 	/*  Do the same for reserve entries in the EFI MEMRESERVE table */
965 	if (efi_systbl_phys != 0)
966 		exclude_efi_memreserve(efi_systbl_phys);
967 
968 	/*
969 	 * We carefully bootstrap the sanitizer map after we've excluded
970 	 * absolutely everything else that could impact phys_avail.  There's not
971 	 * always enough room for the initial shadow map after the kernel, so
972 	 * we'll end up searching for segments that we can safely use.  Those
973 	 * segments also get excluded from phys_avail.
974 	 */
975 #if defined(KASAN)
976 	pmap_bootstrap_san();
977 #endif
978 
979 	physmem_init_kernel_globals();
980 
981 	devmap_bootstrap(0, NULL);
982 
983 	valid = bus_probe();
984 
985 	cninit();
986 	set_ttbr0(abp->kern_ttbr0);
987 	cpu_tlb_flushID();
988 
989 	if (!valid)
990 		panic("Invalid bus configuration: %s",
991 		    kern_getenv("kern.cfg.order"));
992 
993 	/*
994 	 * Check if pointer authentication is available on this system, and
995 	 * if so enable its use. This needs to be called before init_proc0
996 	 * as that will configure the thread0 pointer authentication keys.
997 	 */
998 	ptrauth_init();
999 
1000 	/*
1001 	 * Dump the boot metadata. We have to wait for cninit() since console
1002 	 * output is required. If it's grossly incorrect the kernel will never
1003 	 * make it this far.
1004 	 */
1005 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
1006 		preload_dump();
1007 
1008 	init_proc0(abp->kern_stack);
1009 	msgbufinit(msgbufp, msgbufsize);
1010 	mutex_init();
1011 	init_param2(physmem);
1012 
1013 	dbg_init();
1014 	kdb_init();
1015 #ifdef KDB
1016 	if ((boothowto & RB_KDB) != 0)
1017 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1018 #endif
1019 	pan_enable();
1020 
1021 	kcsan_cpu_init(0);
1022 	kasan_init();
1023 
1024 	env = kern_getenv("kernelname");
1025 	if (env != NULL)
1026 		strlcpy(kernelname, env, sizeof(kernelname));
1027 
1028 #ifdef FDT
1029 	if (arm64_bus_method == ARM64_BUS_FDT) {
1030 		root = OF_finddevice("/");
1031 		if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1032 			if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1033 				printf("WARNING: DTB version is %s while kernel expects %s, "
1034 				    "please update the DTB in the ESP\n",
1035 				    dts_version,
1036 				    LINUX_DTS_VERSION);
1037 		} else {
1038 			printf("WARNING: Cannot find freebsd,dts-version property, "
1039 			    "cannot check DTB compliance\n");
1040 		}
1041 	}
1042 #endif
1043 
1044 	if (boothowto & RB_VERBOSE) {
1045 		if (efihdr != NULL)
1046 			print_efi_map_entries(efihdr);
1047 		physmem_print_tables();
1048 	}
1049 
1050 	early_boot = 0;
1051 
1052 	if (bootverbose && kstack_pages != KSTACK_PAGES)
1053 		printf("kern.kstack_pages = %d ignored for thread0\n",
1054 		    kstack_pages);
1055 
1056 	TSEXIT();
1057 }
1058 
1059 void
dbg_init(void)1060 dbg_init(void)
1061 {
1062 
1063 	/* Clear OS lock */
1064 	WRITE_SPECIALREG(oslar_el1, 0);
1065 
1066 	/* This permits DDB to use debug registers for watchpoints. */
1067 	dbg_monitor_init();
1068 
1069 	/* TODO: Eventually will need to initialize debug registers here. */
1070 }
1071 
1072 #ifdef DDB
1073 #include <ddb/ddb.h>
1074 
DB_SHOW_COMMAND(specialregs,db_show_spregs)1075 DB_SHOW_COMMAND(specialregs, db_show_spregs)
1076 {
1077 #define	PRINT_REG(reg)	\
1078     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1079 
1080 	PRINT_REG(actlr_el1);
1081 	PRINT_REG(afsr0_el1);
1082 	PRINT_REG(afsr1_el1);
1083 	PRINT_REG(aidr_el1);
1084 	PRINT_REG(amair_el1);
1085 	PRINT_REG(ccsidr_el1);
1086 	PRINT_REG(clidr_el1);
1087 	PRINT_REG(contextidr_el1);
1088 	PRINT_REG(cpacr_el1);
1089 	PRINT_REG(csselr_el1);
1090 	PRINT_REG(ctr_el0);
1091 	PRINT_REG(currentel);
1092 	PRINT_REG(daif);
1093 	PRINT_REG(dczid_el0);
1094 	PRINT_REG(elr_el1);
1095 	PRINT_REG(esr_el1);
1096 	PRINT_REG(far_el1);
1097 #if 0
1098 	/* ARM64TODO: Enable VFP before reading floating-point registers */
1099 	PRINT_REG(fpcr);
1100 	PRINT_REG(fpsr);
1101 #endif
1102 	PRINT_REG(id_aa64afr0_el1);
1103 	PRINT_REG(id_aa64afr1_el1);
1104 	PRINT_REG(id_aa64dfr0_el1);
1105 	PRINT_REG(id_aa64dfr1_el1);
1106 	PRINT_REG(id_aa64isar0_el1);
1107 	PRINT_REG(id_aa64isar1_el1);
1108 	PRINT_REG(id_aa64pfr0_el1);
1109 	PRINT_REG(id_aa64pfr1_el1);
1110 	PRINT_REG(id_afr0_el1);
1111 	PRINT_REG(id_dfr0_el1);
1112 	PRINT_REG(id_isar0_el1);
1113 	PRINT_REG(id_isar1_el1);
1114 	PRINT_REG(id_isar2_el1);
1115 	PRINT_REG(id_isar3_el1);
1116 	PRINT_REG(id_isar4_el1);
1117 	PRINT_REG(id_isar5_el1);
1118 	PRINT_REG(id_mmfr0_el1);
1119 	PRINT_REG(id_mmfr1_el1);
1120 	PRINT_REG(id_mmfr2_el1);
1121 	PRINT_REG(id_mmfr3_el1);
1122 #if 0
1123 	/* Missing from llvm */
1124 	PRINT_REG(id_mmfr4_el1);
1125 #endif
1126 	PRINT_REG(id_pfr0_el1);
1127 	PRINT_REG(id_pfr1_el1);
1128 	PRINT_REG(isr_el1);
1129 	PRINT_REG(mair_el1);
1130 	PRINT_REG(midr_el1);
1131 	PRINT_REG(mpidr_el1);
1132 	PRINT_REG(mvfr0_el1);
1133 	PRINT_REG(mvfr1_el1);
1134 	PRINT_REG(mvfr2_el1);
1135 	PRINT_REG(revidr_el1);
1136 	PRINT_REG(sctlr_el1);
1137 	PRINT_REG(sp_el0);
1138 	PRINT_REG(spsel);
1139 	PRINT_REG(spsr_el1);
1140 	PRINT_REG(tcr_el1);
1141 	PRINT_REG(tpidr_el0);
1142 	PRINT_REG(tpidr_el1);
1143 	PRINT_REG(tpidrro_el0);
1144 	PRINT_REG(ttbr0_el1);
1145 	PRINT_REG(ttbr1_el1);
1146 	PRINT_REG(vbar_el1);
1147 #undef PRINT_REG
1148 }
1149 
DB_SHOW_COMMAND(vtop,db_show_vtop)1150 DB_SHOW_COMMAND(vtop, db_show_vtop)
1151 {
1152 	uint64_t phys;
1153 
1154 	if (have_addr) {
1155 		phys = arm64_address_translate_s1e1r(addr);
1156 		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
1157 		phys = arm64_address_translate_s1e1w(addr);
1158 		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1159 		phys = arm64_address_translate_s1e0r(addr);
1160 		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
1161 		phys = arm64_address_translate_s1e0w(addr);
1162 		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1163 	} else
1164 		db_printf("show vtop <virt_addr>\n");
1165 }
1166 #endif
1167