xref: /linux-6.15/arch/x86/kernel/cpu/microcode/core.c (revision 0bf87165)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * CPU Microcode Update Driver for Linux
4  *
5  * Copyright (C) 2000-2006 Tigran Aivazian <[email protected]>
6  *	      2006	Shaohua Li <[email protected]>
7  *	      2013-2016	Borislav Petkov <[email protected]>
8  *
9  * X86 CPU microcode early update for Linux:
10  *
11  *	Copyright (C) 2012 Fenghua Yu <[email protected]>
12  *			   H Peter Anvin" <[email protected]>
13  *		  (C) 2015 Borislav Petkov <[email protected]>
14  *
15  * This driver allows to upgrade microcode on x86 processors.
16  */
17 
18 #define pr_fmt(fmt) "microcode: " fmt
19 
20 #include <linux/platform_device.h>
21 #include <linux/stop_machine.h>
22 #include <linux/syscore_ops.h>
23 #include <linux/miscdevice.h>
24 #include <linux/capability.h>
25 #include <linux/firmware.h>
26 #include <linux/kernel.h>
27 #include <linux/delay.h>
28 #include <linux/mutex.h>
29 #include <linux/cpu.h>
30 #include <linux/nmi.h>
31 #include <linux/fs.h>
32 #include <linux/mm.h>
33 
34 #include <asm/cpu_device_id.h>
35 #include <asm/perf_event.h>
36 #include <asm/processor.h>
37 #include <asm/cmdline.h>
38 #include <asm/setup.h>
39 
40 #include "internal.h"
41 
42 #define DRIVER_VERSION	"2.2"
43 
44 static struct microcode_ops	*microcode_ops;
45 bool dis_ucode_ldr = true;
46 
47 /*
48  * Synchronization.
49  *
50  * All non cpu-hotplug-callback call sites use:
51  *
52  * - cpus_read_lock/unlock() to synchronize with
53  *   the cpu-hotplug-callback call sites.
54  *
55  * We guarantee that only a single cpu is being
56  * updated at any particular moment of time.
57  */
58 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
59 
60 struct cpu_info_ctx {
61 	struct cpu_signature	*cpu_sig;
62 	int			err;
63 };
64 
65 /*
66  * Those patch levels cannot be updated to newer ones and thus should be final.
67  */
68 static u32 final_levels[] = {
69 	0x01000098,
70 	0x0100009f,
71 	0x010000af,
72 	0, /* T-101 terminator */
73 };
74 
75 /*
76  * Check the current patch level on this CPU.
77  *
78  * Returns:
79  *  - true: if update should stop
80  *  - false: otherwise
81  */
82 static bool amd_check_current_patch_level(void)
83 {
84 	u32 lvl, dummy, i;
85 	u32 *levels;
86 
87 	native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
88 
89 	levels = final_levels;
90 
91 	for (i = 0; levels[i]; i++) {
92 		if (lvl == levels[i])
93 			return true;
94 	}
95 	return false;
96 }
97 
98 static bool __init check_loader_disabled_bsp(void)
99 {
100 	static const char *__dis_opt_str = "dis_ucode_ldr";
101 	const char *cmdline = boot_command_line;
102 	const char *option  = __dis_opt_str;
103 
104 	/*
105 	 * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
106 	 * completely accurate as xen pv guests don't see that CPUID bit set but
107 	 * that's good enough as they don't land on the BSP path anyway.
108 	 */
109 	if (native_cpuid_ecx(1) & BIT(31))
110 		return true;
111 
112 	if (x86_cpuid_vendor() == X86_VENDOR_AMD) {
113 		if (amd_check_current_patch_level())
114 			return true;
115 	}
116 
117 	if (cmdline_find_option_bool(cmdline, option) <= 0)
118 		dis_ucode_ldr = false;
119 
120 	return dis_ucode_ldr;
121 }
122 
123 void __init load_ucode_bsp(void)
124 {
125 	unsigned int cpuid_1_eax;
126 	bool intel = true;
127 
128 	if (!have_cpuid_p())
129 		return;
130 
131 	cpuid_1_eax = native_cpuid_eax(1);
132 
133 	switch (x86_cpuid_vendor()) {
134 	case X86_VENDOR_INTEL:
135 		if (x86_family(cpuid_1_eax) < 6)
136 			return;
137 		break;
138 
139 	case X86_VENDOR_AMD:
140 		if (x86_family(cpuid_1_eax) < 0x10)
141 			return;
142 		intel = false;
143 		break;
144 
145 	default:
146 		return;
147 	}
148 
149 	if (check_loader_disabled_bsp())
150 		return;
151 
152 	if (intel)
153 		load_ucode_intel_bsp();
154 	else
155 		load_ucode_amd_bsp(cpuid_1_eax);
156 }
157 
158 void load_ucode_ap(void)
159 {
160 	unsigned int cpuid_1_eax;
161 
162 	if (dis_ucode_ldr)
163 		return;
164 
165 	cpuid_1_eax = native_cpuid_eax(1);
166 
167 	switch (x86_cpuid_vendor()) {
168 	case X86_VENDOR_INTEL:
169 		if (x86_family(cpuid_1_eax) >= 6)
170 			load_ucode_intel_ap();
171 		break;
172 	case X86_VENDOR_AMD:
173 		if (x86_family(cpuid_1_eax) >= 0x10)
174 			load_ucode_amd_ap(cpuid_1_eax);
175 		break;
176 	default:
177 		break;
178 	}
179 }
180 
181 struct cpio_data __init find_microcode_in_initrd(const char *path)
182 {
183 #ifdef CONFIG_BLK_DEV_INITRD
184 	unsigned long start = 0;
185 	size_t size;
186 
187 #ifdef CONFIG_X86_32
188 	size = boot_params.hdr.ramdisk_size;
189 	/* Early load on BSP has a temporary mapping. */
190 	if (size)
191 		start = initrd_start_early;
192 
193 #else /* CONFIG_X86_64 */
194 	size  = (unsigned long)boot_params.ext_ramdisk_size << 32;
195 	size |= boot_params.hdr.ramdisk_size;
196 
197 	if (size) {
198 		start  = (unsigned long)boot_params.ext_ramdisk_image << 32;
199 		start |= boot_params.hdr.ramdisk_image;
200 		start += PAGE_OFFSET;
201 	}
202 #endif
203 
204 	/*
205 	 * Fixup the start address: after reserve_initrd() runs, initrd_start
206 	 * has the virtual address of the beginning of the initrd. It also
207 	 * possibly relocates the ramdisk. In either case, initrd_start contains
208 	 * the updated address so use that instead.
209 	 */
210 	if (initrd_start)
211 		start = initrd_start;
212 
213 	return find_cpio_data(path, (void *)start, size, NULL);
214 #else /* !CONFIG_BLK_DEV_INITRD */
215 	return (struct cpio_data){ NULL, 0, "" };
216 #endif
217 }
218 
219 static void reload_early_microcode(unsigned int cpu)
220 {
221 	int vendor, family;
222 
223 	vendor = x86_cpuid_vendor();
224 	family = x86_cpuid_family();
225 
226 	switch (vendor) {
227 	case X86_VENDOR_INTEL:
228 		if (family >= 6)
229 			reload_ucode_intel();
230 		break;
231 	case X86_VENDOR_AMD:
232 		if (family >= 0x10)
233 			reload_ucode_amd(cpu);
234 		break;
235 	default:
236 		break;
237 	}
238 }
239 
240 /* fake device for request_firmware */
241 static struct platform_device	*microcode_pdev;
242 
243 #ifdef CONFIG_MICROCODE_LATE_LOADING
244 /*
245  * Late loading dance. Why the heavy-handed stomp_machine effort?
246  *
247  * - HT siblings must be idle and not execute other code while the other sibling
248  *   is loading microcode in order to avoid any negative interactions caused by
249  *   the loading.
250  *
251  * - In addition, microcode update on the cores must be serialized until this
252  *   requirement can be relaxed in the future. Right now, this is conservative
253  *   and good.
254  */
255 enum sibling_ctrl {
256 	/* Spinwait with timeout */
257 	SCTRL_WAIT,
258 	/* Invoke the microcode_apply() callback */
259 	SCTRL_APPLY,
260 	/* Proceed without invoking the microcode_apply() callback */
261 	SCTRL_DONE,
262 };
263 
264 struct microcode_ctrl {
265 	enum sibling_ctrl	ctrl;
266 	enum ucode_state	result;
267 	unsigned int		ctrl_cpu;
268 };
269 
270 static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
271 static atomic_t late_cpus_in;
272 
273 static bool wait_for_cpus(atomic_t *cnt)
274 {
275 	unsigned int timeout;
276 
277 	WARN_ON_ONCE(atomic_dec_return(cnt) < 0);
278 
279 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
280 		if (!atomic_read(cnt))
281 			return true;
282 
283 		udelay(1);
284 
285 		if (!(timeout % USEC_PER_MSEC))
286 			touch_nmi_watchdog();
287 	}
288 	/* Prevent the late comers from making progress and let them time out */
289 	atomic_inc(cnt);
290 	return false;
291 }
292 
293 static bool wait_for_ctrl(void)
294 {
295 	unsigned int timeout;
296 
297 	for (timeout = 0; timeout < USEC_PER_SEC; timeout++) {
298 		if (this_cpu_read(ucode_ctrl.ctrl) != SCTRL_WAIT)
299 			return true;
300 		udelay(1);
301 		if (!(timeout % 1000))
302 			touch_nmi_watchdog();
303 	}
304 	return false;
305 }
306 
307 static void load_secondary(unsigned int cpu)
308 {
309 	unsigned int ctrl_cpu = this_cpu_read(ucode_ctrl.ctrl_cpu);
310 	enum ucode_state ret;
311 
312 	/* Initial rendezvous to ensure that all CPUs have arrived */
313 	if (!wait_for_cpus(&late_cpus_in)) {
314 		pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
315 		this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
316 		return;
317 	}
318 
319 	/*
320 	 * Wait for primary threads to complete. If one of them hangs due
321 	 * to the update, there is no way out. This is non-recoverable
322 	 * because the CPU might hold locks or resources and confuse the
323 	 * scheduler, watchdogs etc. There is no way to safely evacuate the
324 	 * machine.
325 	 */
326 	if (!wait_for_ctrl())
327 		panic("Microcode load: Primary CPU %d timed out\n", ctrl_cpu);
328 
329 	/*
330 	 * If the primary succeeded then invoke the apply() callback,
331 	 * otherwise copy the state from the primary thread.
332 	 */
333 	if (this_cpu_read(ucode_ctrl.ctrl) == SCTRL_APPLY)
334 		ret = microcode_ops->apply_microcode(cpu);
335 	else
336 		ret = per_cpu(ucode_ctrl.result, ctrl_cpu);
337 
338 	this_cpu_write(ucode_ctrl.result, ret);
339 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
340 }
341 
342 static void load_primary(unsigned int cpu)
343 {
344 	struct cpumask *secondaries = topology_sibling_cpumask(cpu);
345 	enum sibling_ctrl ctrl;
346 	enum ucode_state ret;
347 	unsigned int sibling;
348 
349 	/* Initial rendezvous to ensure that all CPUs have arrived */
350 	if (!wait_for_cpus(&late_cpus_in)) {
351 		this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
352 		pr_err_once("load: %d CPUs timed out\n", atomic_read(&late_cpus_in) - 1);
353 		return;
354 	}
355 
356 	ret = microcode_ops->apply_microcode(cpu);
357 	this_cpu_write(ucode_ctrl.result, ret);
358 	this_cpu_write(ucode_ctrl.ctrl, SCTRL_DONE);
359 
360 	/*
361 	 * If the update was successful, let the siblings run the apply()
362 	 * callback. If not, tell them it's done. This also covers the
363 	 * case where the CPU has uniform loading at package or system
364 	 * scope implemented but does not advertise it.
365 	 */
366 	if (ret == UCODE_UPDATED || ret == UCODE_OK)
367 		ctrl = SCTRL_APPLY;
368 	else
369 		ctrl = SCTRL_DONE;
370 
371 	for_each_cpu(sibling, secondaries) {
372 		if (sibling != cpu)
373 			per_cpu(ucode_ctrl.ctrl, sibling) = ctrl;
374 	}
375 }
376 
377 static int load_cpus_stopped(void *unused)
378 {
379 	unsigned int cpu = smp_processor_id();
380 
381 	if (this_cpu_read(ucode_ctrl.ctrl_cpu) == cpu)
382 		load_primary(cpu);
383 	else
384 		load_secondary(cpu);
385 
386 	/* No point to wait here. The CPUs will all wait in stop_machine(). */
387 	return 0;
388 }
389 
390 static int load_late_stop_cpus(void)
391 {
392 	unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
393 	int old_rev = boot_cpu_data.microcode;
394 	struct cpuinfo_x86 prev_info;
395 
396 	pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n");
397 	pr_err("You should switch to early loading, if possible.\n");
398 
399 	atomic_set(&late_cpus_in, num_online_cpus());
400 
401 	/*
402 	 * Take a snapshot before the microcode update in order to compare and
403 	 * check whether any bits changed after an update.
404 	 */
405 	store_cpu_caps(&prev_info);
406 
407 	stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);
408 
409 	/* Analyze the results */
410 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
411 		switch (per_cpu(ucode_ctrl.result, cpu)) {
412 		case UCODE_UPDATED:	updated++; break;
413 		case UCODE_TIMEOUT:	timedout++; break;
414 		case UCODE_OK:		siblings++; break;
415 		default:		failed++; break;
416 		}
417 	}
418 
419 	if (microcode_ops->finalize_late_load)
420 		microcode_ops->finalize_late_load(!updated);
421 
422 	if (!updated) {
423 		/* Nothing changed. */
424 		if (!failed && !timedout)
425 			return 0;
426 		pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
427 		       failed, timedout);
428 		return -EIO;
429 	}
430 
431 	add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
432 	pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
433 	if (failed || timedout) {
434 		pr_err("load incomplete. %u CPUs timed out or failed\n",
435 		       num_online_cpus() - (updated + siblings));
436 	}
437 	pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
438 	microcode_check(&prev_info);
439 
440 	return updated + siblings == num_online_cpus() ? 0 : -EIO;
441 }
442 
443 /*
444  * This function does two things:
445  *
446  * 1) Ensure that all required CPUs which are present and have been booted
447  *    once are online.
448  *
449  *    To pass this check, all primary threads must be online.
450  *
451  *    If the microcode load is not safe against NMI then all SMT threads
452  *    must be online as well because they still react to NMIs when they are
453  *    soft-offlined and parked in one of the play_dead() variants. So if a
454  *    NMI hits while the primary thread updates the microcode the resulting
455  *    behaviour is undefined. The default play_dead() implementation on
456  *    modern CPUs uses MWAIT, which is also not guaranteed to be safe
457  *    against a microcode update which affects MWAIT.
458  *
459  * 2) Initialize the per CPU control structure
460  */
461 static bool setup_cpus(void)
462 {
463 	struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT, .result = -1, };
464 	unsigned int cpu;
465 
466 	for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
467 		if (!cpu_online(cpu)) {
468 			if (topology_is_primary_thread(cpu) || !microcode_ops->nmi_safe) {
469 				pr_err("CPU %u not online\n", cpu);
470 				return false;
471 			}
472 		}
473 
474 		/*
475 		 * Initialize the per CPU state. This is core scope for now,
476 		 * but prepared to take package or system scope into account.
477 		 */
478 		ctrl.ctrl_cpu = cpumask_first(topology_sibling_cpumask(cpu));
479 		per_cpu(ucode_ctrl, cpu) = ctrl;
480 	}
481 	return true;
482 }
483 
484 static int load_late_locked(void)
485 {
486 	if (!setup_cpus())
487 		return -EBUSY;
488 
489 	switch (microcode_ops->request_microcode_fw(0, &microcode_pdev->dev)) {
490 	case UCODE_NEW:
491 		return load_late_stop_cpus();
492 	case UCODE_NFOUND:
493 		return -ENOENT;
494 	default:
495 		return -EBADFD;
496 	}
497 }
498 
499 static ssize_t reload_store(struct device *dev,
500 			    struct device_attribute *attr,
501 			    const char *buf, size_t size)
502 {
503 	unsigned long val;
504 	ssize_t ret;
505 
506 	ret = kstrtoul(buf, 0, &val);
507 	if (ret || val != 1)
508 		return -EINVAL;
509 
510 	cpus_read_lock();
511 	ret = load_late_locked();
512 	cpus_read_unlock();
513 
514 	return ret ? : size;
515 }
516 
517 static DEVICE_ATTR_WO(reload);
518 #endif
519 
520 static ssize_t version_show(struct device *dev,
521 			struct device_attribute *attr, char *buf)
522 {
523 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
524 
525 	return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
526 }
527 
528 static ssize_t processor_flags_show(struct device *dev,
529 			struct device_attribute *attr, char *buf)
530 {
531 	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
532 
533 	return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
534 }
535 
536 static DEVICE_ATTR_RO(version);
537 static DEVICE_ATTR_RO(processor_flags);
538 
539 static struct attribute *mc_default_attrs[] = {
540 	&dev_attr_version.attr,
541 	&dev_attr_processor_flags.attr,
542 	NULL
543 };
544 
545 static const struct attribute_group mc_attr_group = {
546 	.attrs			= mc_default_attrs,
547 	.name			= "microcode",
548 };
549 
550 static void microcode_fini_cpu(int cpu)
551 {
552 	if (microcode_ops->microcode_fini_cpu)
553 		microcode_ops->microcode_fini_cpu(cpu);
554 }
555 
556 /**
557  * microcode_bsp_resume - Update boot CPU microcode during resume.
558  */
559 void microcode_bsp_resume(void)
560 {
561 	int cpu = smp_processor_id();
562 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
563 
564 	if (uci->mc)
565 		microcode_ops->apply_microcode(cpu);
566 	else
567 		reload_early_microcode(cpu);
568 }
569 
570 static struct syscore_ops mc_syscore_ops = {
571 	.resume	= microcode_bsp_resume,
572 };
573 
574 static int mc_cpu_online(unsigned int cpu)
575 {
576 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
577 	struct device *dev = get_cpu_device(cpu);
578 
579 	memset(uci, 0, sizeof(*uci));
580 
581 	microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
582 	cpu_data(cpu).microcode = uci->cpu_sig.rev;
583 	if (!cpu)
584 		boot_cpu_data.microcode = uci->cpu_sig.rev;
585 
586 	if (sysfs_create_group(&dev->kobj, &mc_attr_group))
587 		pr_err("Failed to create group for CPU%d\n", cpu);
588 	return 0;
589 }
590 
591 static int mc_cpu_down_prep(unsigned int cpu)
592 {
593 	struct device *dev = get_cpu_device(cpu);
594 
595 	microcode_fini_cpu(cpu);
596 	sysfs_remove_group(&dev->kobj, &mc_attr_group);
597 	return 0;
598 }
599 
600 static struct attribute *cpu_root_microcode_attrs[] = {
601 #ifdef CONFIG_MICROCODE_LATE_LOADING
602 	&dev_attr_reload.attr,
603 #endif
604 	NULL
605 };
606 
607 static const struct attribute_group cpu_root_microcode_group = {
608 	.name  = "microcode",
609 	.attrs = cpu_root_microcode_attrs,
610 };
611 
612 static int __init microcode_init(void)
613 {
614 	struct device *dev_root;
615 	struct cpuinfo_x86 *c = &boot_cpu_data;
616 	int error;
617 
618 	if (dis_ucode_ldr)
619 		return -EINVAL;
620 
621 	if (c->x86_vendor == X86_VENDOR_INTEL)
622 		microcode_ops = init_intel_microcode();
623 	else if (c->x86_vendor == X86_VENDOR_AMD)
624 		microcode_ops = init_amd_microcode();
625 	else
626 		pr_err("no support for this CPU vendor\n");
627 
628 	if (!microcode_ops)
629 		return -ENODEV;
630 
631 	microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0);
632 	if (IS_ERR(microcode_pdev))
633 		return PTR_ERR(microcode_pdev);
634 
635 	dev_root = bus_get_dev_root(&cpu_subsys);
636 	if (dev_root) {
637 		error = sysfs_create_group(&dev_root->kobj, &cpu_root_microcode_group);
638 		put_device(dev_root);
639 		if (error) {
640 			pr_err("Error creating microcode group!\n");
641 			goto out_pdev;
642 		}
643 	}
644 
645 	register_syscore_ops(&mc_syscore_ops);
646 	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
647 			  mc_cpu_online, mc_cpu_down_prep);
648 
649 	pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
650 
651 	return 0;
652 
653  out_pdev:
654 	platform_device_unregister(microcode_pdev);
655 	return error;
656 
657 }
658 late_initcall(microcode_init);
659