xref: /linux-6.15/arch/x86/kernel/cpu/microcode/core.c (revision 7eb314a2)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * CPU Microcode Update Driver for Linux
4  *
5  * Copyright (C) 2000-2006 Tigran Aivazian <[email protected]>
6  *	      2006	Shaohua Li <[email protected]>
7  *	      2013-2016	Borislav Petkov <[email protected]>
8  *
9  * X86 CPU microcode early update for Linux:
10  *
11  *	Copyright (C) 2012 Fenghua Yu <[email protected]>
12  *			   H Peter Anvin" <[email protected]>
13  *		  (C) 2015 Borislav Petkov <[email protected]>
14  *
15  * This driver allows to upgrade microcode on x86 processors.
16  */
17 
18 #define pr_fmt(fmt) "microcode: " fmt
19 
20 #include <linux/platform_device.h>
21 #include <linux/stop_machine.h>
22 #include <linux/syscore_ops.h>
23 #include <linux/miscdevice.h>
24 #include <linux/capability.h>
25 #include <linux/firmware.h>
26 #include <linux/cpumask.h>
27 #include <linux/kernel.h>
28 #include <linux/delay.h>
29 #include <linux/mutex.h>
30 #include <linux/cpu.h>
31 #include <linux/nmi.h>
32 #include <linux/fs.h>
33 #include <linux/mm.h>
34 
35 #include <asm/apic.h>
36 #include <asm/cpu_device_id.h>
37 #include <asm/perf_event.h>
38 #include <asm/processor.h>
39 #include <asm/cmdline.h>
40 #include <asm/setup.h>
41 
42 #include "internal.h"
43 
44 #define DRIVER_VERSION	"2.2"
45 
46 static struct microcode_ops	*microcode_ops;
47 bool dis_ucode_ldr = true;
48 
49 /*
50  * Synchronization.
51  *
52  * All non cpu-hotplug-callback call sites use:
53  *
54  * - cpus_read_lock/unlock() to synchronize with
55  *   the cpu-hotplug-callback call sites.
56  *
57  * We guarantee that only a single cpu is being
58  * updated at any particular moment of time.
59  */
60 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
61 
62 struct cpu_info_ctx {
63 	struct cpu_signature	*cpu_sig;
64 	int			err;
65 };
66 
67 /*
68  * Those patch levels cannot be updated to newer ones and thus should be final.
69  */
70 static u32 final_levels[] = {
71 	0x01000098,
72 	0x0100009f,
73 	0x010000af,
74 	0, /* T-101 terminator */
75 };
76 
77 /*
78  * Check the current patch level on this CPU.
79  *
80  * Returns:
81  *  - true: if update should stop
82  *  - false: otherwise
83  */
84 static bool amd_check_current_patch_level(void)
85 {
86 	u32 lvl, dummy, i;
87 	u32 *levels;
88 
89 	native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
90 
91 	levels = final_levels;
92 
93 	for (i = 0; levels[i]; i++) {
94 		if (lvl == levels[i])
95 			return true;
96 	}
97 	return false;
98 }
99 
100 static bool __init check_loader_disabled_bsp(void)
101 {
102 	static const char *__dis_opt_str = "dis_ucode_ldr";
103 	const char *cmdline = boot_command_line;
104 	const char *option  = __dis_opt_str;
105 
106 	/*
107 	 * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
108 	 * completely accurate as xen pv guests don't see that CPUID bit set but
109 	 * that's good enough as they don't land on the BSP path anyway.
110 	 */
111 	if (native_cpuid_ecx(1) & BIT(31))
112 		return true;
113 
114 	if (x86_cpuid_vendor() == X86_VENDOR_AMD) {
115 		if (amd_check_current_patch_level())
116 			return true;
117 	}
118 
119 	if (cmdline_find_option_bool(cmdline, option) <= 0)
120 		dis_ucode_ldr = false;
121 
122 	return dis_ucode_ldr;
123 }
124 
125 void __init load_ucode_bsp(void)
126 {
127 	unsigned int cpuid_1_eax;
128 	bool intel = true;
129 
130 	if (!have_cpuid_p())
131 		return;
132 
133 	cpuid_1_eax = native_cpuid_eax(1);
134 
135 	switch (x86_cpuid_vendor()) {
136 	case X86_VENDOR_INTEL:
137 		if (x86_family(cpuid_1_eax) < 6)
138 			return;
139 		break;
140 
141 	case X86_VENDOR_AMD:
142 		if (x86_family(cpuid_1_eax) < 0x10)
143 			return;
144 		intel = false;
145 		break;
146 
147 	default:
148 		return;
149 	}
150 
151 	if (check_loader_disabled_bsp())
152 		return;
153 
154 	if (intel)
155 		load_ucode_intel_bsp();
156 	else
157 		load_ucode_amd_bsp(cpuid_1_eax);
158 }
159 
160 void load_ucode_ap(void)
161 {
162 	unsigned int cpuid_1_eax;
163 
164 	if (dis_ucode_ldr)
165 		return;
166 
167 	cpuid_1_eax = native_cpuid_eax(1);
168 
169 	switch (x86_cpuid_vendor()) {
170 	case X86_VENDOR_INTEL:
171 		if (x86_family(cpuid_1_eax) >= 6)
172 			load_ucode_intel_ap();
173 		break;
174 	case X86_VENDOR_AMD:
175 		if (x86_family(cpuid_1_eax) >= 0x10)
176 			load_ucode_amd_ap(cpuid_1_eax);
177 		break;
178 	default:
179 		break;
180 	}
181 }
182 
183 struct cpio_data __init find_microcode_in_initrd(const char *path)
184 {
185 #ifdef CONFIG_BLK_DEV_INITRD
186 	unsigned long start = 0;
187 	size_t size;
188 
189 #ifdef CONFIG_X86_32
190 	size = boot_params.hdr.ramdisk_size;
191 	/* Early load on BSP has a temporary mapping. */
192 	if (size)
193 		start = initrd_start_early;
194 
195 #else /* CONFIG_X86_64 */
196 	size  = (unsigned long)boot_params.ext_ramdisk_size << 32;
197 	size |= boot_params.hdr.ramdisk_size;
198 
199 	if (size) {
200 		start  = (unsigned long)boot_params.ext_ramdisk_image << 32;
201 		start |= boot_params.hdr.ramdisk_image;
202 		start += PAGE_OFFSET;
203 	}
204 #endif
205 
206 	/*
207 	 * Fixup the start address: after reserve_initrd() runs, initrd_start
208 	 * has the virtual address of the beginning of the initrd. It also
209 	 * possibly relocates the ramdisk. In either case, initrd_start contains
210 	 * the updated address so use that instead.
211 	 */
212 	if (initrd_start)
213 		start = initrd_start;
214 
215 	return find_cpio_data(path, (void *)start, size, NULL);
216 #else /* !CONFIG_BLK_DEV_INITRD */
217 	return (struct cpio_data){ NULL, 0, "" };
218 #endif
219 }
220 
221 static void reload_early_microcode(unsigned int cpu)
222 {
223 	int vendor, family;
224 
225 	vendor = x86_cpuid_vendor();
226 	family = x86_cpuid_family();
227 
228 	switch (vendor) {
229 	case X86_VENDOR_INTEL:
230 		if (family >= 6)
231 			reload_ucode_intel();
232 		break;
233 	case X86_VENDOR_AMD:
234 		if (family >= 0x10)
235 			reload_ucode_amd(cpu);
236 		break;
237 	default:
238 		break;
239 	}
240 }
241 
242 /* fake device for request_firmware */
243 static struct platform_device	*microcode_pdev;
244 
245 #ifdef CONFIG_MICROCODE_LATE_LOADING
246 /*
247  * Late loading dance. Why the heavy-handed stomp_machine effort?
248  *
249  * - HT siblings must be idle and not execute other code while the other sibling
250  *   is loading microcode in order to avoid any negative interactions caused by
251  *   the loading.
252  *
253  * - In addition, microcode update on the cores must be serialized until this
254  *   requirement can be relaxed in the future. Right now, this is conservative
255  *   and good.
256  */
257 enum sibling_ctrl {
258 	/* Spinwait with timeout */
259 	SCTRL_WAIT,
260 	/* Invoke the microcode_apply() callback */
261 	SCTRL_APPLY,
262 	/* Proceed without invoking the microcode_apply() callback */
263 	SCTRL_DONE,
264 };
265 
266 struct microcode_ctrl {
267 	enum sibling_ctrl	ctrl;
268 	enum ucode_state	result;
269 	unsigned int		ctrl_cpu;
270 	bool			nmi_enabled;
271 };
272 
273 DEFINE_STATIC_KEY_FALSE(microcode_nmi_handler_enable);
274 static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
275 static atomic_t late_cpus_in;
276 
277 static bool wait_for_cpus(atomic_t *cnt)
278 {
279 	unsigned int timeout;
280 
281 	WARN_ON_ONCE(atomic_dec_return(cnt) < 0);
282 
283 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
284 		if (!atomic_read(cnt))
285 			return true;
286 
287 		udelay(1);
288 
289 		/* If invoked directly, tickle the NMI watchdog */
290 		if (!microcode_ops->use_nmi && !(timeout % USEC_PER_MSEC))
291 			touch_nmi_watchdog();
292 	}
293 	/* Prevent the late comers from making progress and let them time out */
294 	atomic_inc(cnt);
295 	return false;
296 }
297 
298 static bool wait_for_ctrl(void)
299 {
300 	unsigned int timeout;
301 
302 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
303 		if (this_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT)
304 			return true;
305 		udelay(1);
306 		/* If invoked directly, tickle the NMI watchdog */
307 		if (!microcode_ops->use_nmi && !(timeout % 1000))
308 			touch_nmi_watchdog();
309 	}
310 	return false;
311 }
312 
313 static void load_secondary(unsigned int cpu)
314 {
315 	unsigned int ctrl_cpu = this_cpu_read(ucode_ctrl.ctrl_cpu);
316 	enum ucode_state ret;
317 
318 	/* Initial rendezvous to ensure that all CPUs have arrived */
319 	if (!wait_for_cpus(&late_cpus_in)) {
320 		pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
321 		this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
322 		return;
323 	}
324 
325 	/*
326 	 * Wait for primary threads to complete. If one of them hangs due
327 	 * to the update, there is no way out. This is non-recoverable
328 	 * because the CPU might hold locks or resources and confuse the
329 	 * scheduler, watchdogs etc. There is no way to safely evacuate the
330 	 * machine.
331 	 */
332 	if (!wait_for_ctrl())
333 		panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu);
334 
335 	/*
336 	 * If the primary succeeded then invoke the apply() callback,
337 	 * otherwise copy the state from the primary thread.
338 	 */
339 	if (this_cpu_read(ucode_ctrl.ctrl) == SCTRL_APPLY)
340 		ret = microcode_ops->apply_microcode(cpu);
341 	else
342 		ret = per_cpu(ucode_ctrl.result, ctrl_cpu);
343 
344 	this_cpu_write(ucode_ctrl.result, ret);
345 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
346 }
347 
348 static void load_primary(unsigned int cpu)
349 {
350 	struct cpumask *secondaries = topology_sibling_cpumask(cpu);
351 	enum sibling_ctrl ctrl;
352 	enum ucode_state ret;
353 	unsigned int sibling;
354 
355 	/* Initial rendezvous to ensure that all CPUs have arrived */
356 	if (!wait_for_cpus(&late_cpus_in)) {
357 		this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
358 		pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
359 		return;
360 	}
361 
362 	ret = microcode_ops->apply_microcode(cpu);
363 	this_cpu_write(ucode_ctrl.result, ret);
364 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
365 
366 	/*
367 	 * If the update was successful, let the siblings run the apply()
368 	 * callback. If not, tell them it's done. This also covers the
369 	 * case where the CPU has uniform loading at package or system
370 	 * scope implemented but does not advertise it.
371 	 */
372 	if (ret == UCODE_UPDATED || ret == UCODE_OK)
373 		ctrl = SCTRL_APPLY;
374 	else
375 		ctrl = SCTRL_DONE;
376 
377 	for_each_cpu(sibling, secondaries) {
378 		if (sibling != cpu)
379 			per_cpu(ucode_ctrl.ctrl, sibling) = ctrl;
380 	}
381 }
382 
383 static bool microcode_update_handler(void)
384 {
385 	unsigned int cpu = smp_processor_id();
386 
387 	if (this_cpu_read(ucode_ctrl.ctrl_cpu) == cpu)
388 		load_primary(cpu);
389 	else
390 		load_secondary(cpu);
391 
392 	touch_nmi_watchdog();
393 	return true;
394 }
395 
396 bool microcode_nmi_handler(void)
397 {
398 	if (!this_cpu_read(ucode_ctrl.nmi_enabled))
399 		return false;
400 
401 	this_cpu_write(ucode_ctrl.nmi_enabled, false);
402 	return microcode_update_handler();
403 }
404 
405 static int load_cpus_stopped(void *unused)
406 {
407 	if (microcode_ops->use_nmi) {
408 		/* Enable the NMI handler and raise NMI */
409 		this_cpu_write(ucode_ctrl.nmi_enabled, true);
410 		apic->send_IPI(smp_processor_id(), NMI_VECTOR);
411 	} else {
412 		/* Just invoke the handler directly */
413 		microcode_update_handler();
414 	}
415 	return 0;
416 }
417 
418 static int load_late_stop_cpus(void)
419 {
420 	unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
421 	int old_rev = boot_cpu_data.microcode;
422 	struct cpuinfo_x86 prev_info;
423 
424 	pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n");
425 	pr_err("You should switch to early loading, if possible.\n");
426 
427 	atomic_set(&late_cpus_in, num_online_cpus());
428 
429 	/*
430 	 * Take a snapshot before the microcode update in order to compare and
431 	 * check whether any bits changed after an update.
432 	 */
433 	store_cpu_caps(&prev_info);
434 
435 	if (microcode_ops->use_nmi)
436 		static_branch_enable_cpuslocked(&microcode_nmi_handler_enable);
437 
438 	stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);
439 
440 	if (microcode_ops->use_nmi)
441 		static_branch_disable_cpuslocked(&microcode_nmi_handler_enable);
442 
443 	/* Analyze the results */
444 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
445 		switch (per_cpu(ucode_ctrl.result, cpu)) {
446 		case UCODE_UPDATED:	updated++; break;
447 		case UCODE_TIMEOUT:	timedout++; break;
448 		case UCODE_OK:		siblings++; break;
449 		default:		failed++; break;
450 		}
451 	}
452 
453 	if (microcode_ops->finalize_late_load)
454 		microcode_ops->finalize_late_load(!updated);
455 
456 	if (!updated) {
457 		/* Nothing changed. */
458 		if (!failed && !timedout)
459 			return 0;
460 		pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
461 		       failed, timedout);
462 		return -EIO;
463 	}
464 
465 	add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
466 	pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
467 	if (failed || timedout) {
468 		pr_err("load incomplete. %u CPUs timed out or failed\n",
469 		       num_online_cpus() - (updated + siblings));
470 	}
471 	pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
472 	microcode_check(&prev_info);
473 
474 	return updated + siblings == num_online_cpus() ? 0 : -EIO;
475 }
476 
477 /*
478  * This function does two things:
479  *
480  * 1) Ensure that all required CPUs which are present and have been booted
481  *    once are online.
482  *
483  *    To pass this check, all primary threads must be online.
484  *
485  *    If the microcode load is not safe against NMI then all SMT threads
486  *    must be online as well because they still react to NMIs when they are
487  *    soft-offlined and parked in one of the play_dead() variants. So if a
488  *    NMI hits while the primary thread updates the microcode the resulting
489  *    behaviour is undefined. The default play_dead() implementation on
490  *    modern CPUs uses MWAIT, which is also not guaranteed to be safe
491  *    against a microcode update which affects MWAIT.
492  *
493  * 2) Initialize the per CPU control structure
494  */
495 static bool setup_cpus(void)
496 {
497 	struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT, .result = -1, };
498 	unsigned int cpu;
499 
500 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
501 		if (!cpu_online(cpu)) {
502 			if (topology_is_primary_thread(cpu) || !microcode_ops->nmi_safe) {
503 				pr_err("CPU %u not online\n", cpu);
504 				return false;
505 			}
506 		}
507 
508 		/*
509 		 * Initialize the per CPU state. This is core scope for now,
510 		 * but prepared to take package or system scope into account.
511 		 */
512 		ctrl.ctrl_cpu = cpumask_first(topology_sibling_cpumask(cpu));
513 		per_cpu(ucode_ctrl, cpu) = ctrl;
514 	}
515 	return true;
516 }
517 
518 static int load_late_locked(void)
519 {
520 	if (!setup_cpus())
521 		return -EBUSY;
522 
523 	switch (microcode_ops->request_microcode_fw(0, &microcode_pdev->dev)) {
524 	case UCODE_NEW:
525 		return load_late_stop_cpus();
526 	case UCODE_NFOUND:
527 		return -ENOENT;
528 	default:
529 		return -EBADFD;
530 	}
531 }
532 
533 static ssize_t reload_store(struct device *dev,
534 			    struct device_attribute *attr,
535 			    const char *buf, size_t size)
536 {
537 	unsigned long val;
538 	ssize_t ret;
539 
540 	ret = kstrtoul(buf, 0, &val);
541 	if (ret || val != 1)
542 		return -EINVAL;
543 
544 	cpus_read_lock();
545 	ret = load_late_locked();
546 	cpus_read_unlock();
547 
548 	return ret ? : size;
549 }
550 
551 static DEVICE_ATTR_WO(reload);
552 #endif
553 
554 static ssize_t version_show(struct device *dev,
555 			struct device_attribute *attr, char *buf)
556 {
557 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
558 
559 	return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
560 }
561 
562 static ssize_t processor_flags_show(struct device *dev,
563 			struct device_attribute *attr, char *buf)
564 {
565 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
566 
567 	return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
568 }
569 
570 static DEVICE_ATTR_RO(version);
571 static DEVICE_ATTR_RO(processor_flags);
572 
573 static struct attribute *mc_default_attrs[] = {
574 	&dev_attr_version.attr,
575 	&dev_attr_processor_flags.attr,
576 	NULL
577 };
578 
579 static const struct attribute_group mc_attr_group = {
580 	.attrs			= mc_default_attrs,
581 	.name			= "microcode",
582 };
583 
584 static void microcode_fini_cpu(int cpu)
585 {
586 	if (microcode_ops->microcode_fini_cpu)
587 		microcode_ops->microcode_fini_cpu(cpu);
588 }
589 
590 /**
591  * microcode_bsp_resume - Update boot CPU microcode during resume.
592  */
593 void microcode_bsp_resume(void)
594 {
595 	int cpu = smp_processor_id();
596 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
597 
598 	if (uci->mc)
599 		microcode_ops->apply_microcode(cpu);
600 	else
601 		reload_early_microcode(cpu);
602 }
603 
604 static struct syscore_ops mc_syscore_ops = {
605 	.resume	= microcode_bsp_resume,
606 };
607 
608 static int mc_cpu_online(unsigned int cpu)
609 {
610 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
611 	struct device *dev = get_cpu_device(cpu);
612 
613 	memset(uci, 0, sizeof(*uci));
614 
615 	microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
616 	cpu_data(cpu).microcode = uci->cpu_sig.rev;
617 	if (!cpu)
618 		boot_cpu_data.microcode = uci->cpu_sig.rev;
619 
620 	if (sysfs_create_group(&dev->kobj, &mc_attr_group))
621 		pr_err("Failed to create group for CPU%d\n", cpu);
622 	return 0;
623 }
624 
625 static int mc_cpu_down_prep(unsigned int cpu)
626 {
627 	struct device *dev = get_cpu_device(cpu);
628 
629 	microcode_fini_cpu(cpu);
630 	sysfs_remove_group(&dev->kobj, &mc_attr_group);
631 	return 0;
632 }
633 
634 static struct attribute *cpu_root_microcode_attrs[] = {
635 #ifdef CONFIG_MICROCODE_LATE_LOADING
636 	&dev_attr_reload.attr,
637 #endif
638 	NULL
639 };
640 
641 static const struct attribute_group cpu_root_microcode_group = {
642 	.name  = "microcode",
643 	.attrs = cpu_root_microcode_attrs,
644 };
645 
646 static int __init microcode_init(void)
647 {
648 	struct device *dev_root;
649 	struct cpuinfo_x86 *c = &boot_cpu_data;
650 	int error;
651 
652 	if (dis_ucode_ldr)
653 		return -EINVAL;
654 
655 	if (c->x86_vendor == X86_VENDOR_INTEL)
656 		microcode_ops = init_intel_microcode();
657 	else if (c->x86_vendor == X86_VENDOR_AMD)
658 		microcode_ops = init_amd_microcode();
659 	else
660 		pr_err("no support for this CPU vendor\n");
661 
662 	if (!microcode_ops)
663 		return -ENODEV;
664 
665 	microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0);
666 	if (IS_ERR(microcode_pdev))
667 		return PTR_ERR(microcode_pdev);
668 
669 	dev_root = bus_get_dev_root(&cpu_subsys);
670 	if (dev_root) {
671 		error = sysfs_create_group(&dev_root->kobj, &cpu_root_microcode_group);
672 		put_device(dev_root);
673 		if (error) {
674 			pr_err("Error creating microcode group!\n");
675 			goto out_pdev;
676 		}
677 	}
678 
679 	register_syscore_ops(&mc_syscore_ops);
680 	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
681 			  mc_cpu_online, mc_cpu_down_prep);
682 
683 	pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
684 
685 	return 0;
686 
687  out_pdev:
688 	platform_device_unregister(microcode_pdev);
689 	return error;
690 
691 }
692 late_initcall(microcode_init);
693