xref: /openbmc/linux/arch/x86/kernel/reboot.c (revision 261cd5ed)
1 // SPDX-License-Identifier: GPL-2.0
2 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3 
4 #include <linux/export.h>
5 #include <linux/reboot.h>
6 #include <linux/init.h>
7 #include <linux/pm.h>
8 #include <linux/efi.h>
9 #include <linux/dmi.h>
10 #include <linux/sched.h>
11 #include <linux/tboot.h>
12 #include <linux/delay.h>
13 #include <linux/objtool.h>
14 #include <linux/pgtable.h>
15 #include <acpi/reboot.h>
16 #include <asm/io.h>
17 #include <asm/apic.h>
18 #include <asm/io_apic.h>
19 #include <asm/desc.h>
20 #include <asm/hpet.h>
21 #include <asm/proto.h>
22 #include <asm/reboot_fixups.h>
23 #include <asm/reboot.h>
24 #include <asm/pci_x86.h>
25 #include <asm/cpu.h>
26 #include <asm/nmi.h>
27 #include <asm/smp.h>
28 
29 #include <linux/ctype.h>
30 #include <linux/mc146818rtc.h>
31 #include <asm/realmode.h>
32 #include <asm/x86_init.h>
33 #include <asm/efi.h>
34 
35 /*
36  * Power off function, if any
37  */
38 void (*pm_power_off)(void);
39 EXPORT_SYMBOL(pm_power_off);
40 
41 /*
42  * This is set if we need to go through the 'emergency' path.
43  * When machine_emergency_restart() is called, we may be on
44  * an inconsistent state and won't be able to do a clean cleanup
45  */
46 static int reboot_emergency;
47 
48 /* This is set by the PCI code if either type 1 or type 2 PCI is detected */
49 bool port_cf9_safe = false;
50 
51 /*
52  * Reboot options and system auto-detection code provided by
53  * Dell Inc. so their systems "just work". :-)
54  */
55 
56 /*
57  * Some machines require the "reboot=a" commandline options
58  */
set_acpi_reboot(const struct dmi_system_id * d)59 static int __init set_acpi_reboot(const struct dmi_system_id *d)
60 {
61 	if (reboot_type != BOOT_ACPI) {
62 		reboot_type = BOOT_ACPI;
63 		pr_info("%s series board detected. Selecting %s-method for reboots.\n",
64 			d->ident, "ACPI");
65 	}
66 	return 0;
67 }
68 
69 /*
70  * Some machines require the "reboot=b" or "reboot=k"  commandline options,
71  * this quirk makes that automatic.
72  */
set_bios_reboot(const struct dmi_system_id * d)73 static int __init set_bios_reboot(const struct dmi_system_id *d)
74 {
75 	if (reboot_type != BOOT_BIOS) {
76 		reboot_type = BOOT_BIOS;
77 		pr_info("%s series board detected. Selecting %s-method for reboots.\n",
78 			d->ident, "BIOS");
79 	}
80 	return 0;
81 }
82 
83 /*
84  * Some machines don't handle the default ACPI reboot method and
85  * require the EFI reboot method:
86  */
set_efi_reboot(const struct dmi_system_id * d)87 static int __init set_efi_reboot(const struct dmi_system_id *d)
88 {
89 	if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) {
90 		reboot_type = BOOT_EFI;
91 		pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident);
92 	}
93 	return 0;
94 }
95 
machine_real_restart(unsigned int type)96 void __noreturn machine_real_restart(unsigned int type)
97 {
98 	local_irq_disable();
99 
100 	/*
101 	 * Write zero to CMOS register number 0x0f, which the BIOS POST
102 	 * routine will recognize as telling it to do a proper reboot.  (Well
103 	 * that's what this book in front of me says -- it may only apply to
104 	 * the Phoenix BIOS though, it's not clear).  At the same time,
105 	 * disable NMIs by setting the top bit in the CMOS address register,
106 	 * as we're about to do peculiar things to the CPU.  I'm not sure if
107 	 * `outb_p' is needed instead of just `outb'.  Use it to be on the
108 	 * safe side.  (Yes, CMOS_WRITE does outb_p's. -  Paul G.)
109 	 */
110 	spin_lock(&rtc_lock);
111 	CMOS_WRITE(0x00, 0x8f);
112 	spin_unlock(&rtc_lock);
113 
114 	/*
115 	 * Switch to the trampoline page table.
116 	 */
117 	load_trampoline_pgtable();
118 
119 	/* Jump to the identity-mapped low memory code */
120 #ifdef CONFIG_X86_32
121 	asm volatile("jmpl *%0" : :
122 		     "rm" (real_mode_header->machine_real_restart_asm),
123 		     "a" (type));
124 #else
125 	asm volatile("ljmpl *%0" : :
126 		     "m" (real_mode_header->machine_real_restart_asm),
127 		     "D" (type));
128 #endif
129 	unreachable();
130 }
131 #ifdef CONFIG_APM_MODULE
132 EXPORT_SYMBOL(machine_real_restart);
133 #endif
134 STACK_FRAME_NON_STANDARD(machine_real_restart);
135 
136 /*
137  * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
138  */
set_pci_reboot(const struct dmi_system_id * d)139 static int __init set_pci_reboot(const struct dmi_system_id *d)
140 {
141 	if (reboot_type != BOOT_CF9_FORCE) {
142 		reboot_type = BOOT_CF9_FORCE;
143 		pr_info("%s series board detected. Selecting %s-method for reboots.\n",
144 			d->ident, "PCI");
145 	}
146 	return 0;
147 }
148 
set_kbd_reboot(const struct dmi_system_id * d)149 static int __init set_kbd_reboot(const struct dmi_system_id *d)
150 {
151 	if (reboot_type != BOOT_KBD) {
152 		reboot_type = BOOT_KBD;
153 		pr_info("%s series board detected. Selecting %s-method for reboot.\n",
154 			d->ident, "KBD");
155 	}
156 	return 0;
157 }
158 
159 /*
160  * This is a single dmi_table handling all reboot quirks.
161  */
162 static const struct dmi_system_id reboot_dmi_table[] __initconst = {
163 
164 	/* Acer */
165 	{	/* Handle reboot issue on Acer Aspire one */
166 		.callback = set_kbd_reboot,
167 		.ident = "Acer Aspire One A110",
168 		.matches = {
169 			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
170 			DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
171 		},
172 	},
173 	{	/* Handle reboot issue on Acer TravelMate X514-51T */
174 		.callback = set_efi_reboot,
175 		.ident = "Acer TravelMate X514-51T",
176 		.matches = {
177 			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
178 			DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"),
179 		},
180 	},
181 
182 	/* Apple */
183 	{	/* Handle problems with rebooting on Apple MacBook5 */
184 		.callback = set_pci_reboot,
185 		.ident = "Apple MacBook5",
186 		.matches = {
187 			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
188 			DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
189 		},
190 	},
191 	{	/* Handle problems with rebooting on Apple MacBook6,1 */
192 		.callback = set_pci_reboot,
193 		.ident = "Apple MacBook6,1",
194 		.matches = {
195 			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
196 			DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"),
197 		},
198 	},
199 	{	/* Handle problems with rebooting on Apple MacBookPro5 */
200 		.callback = set_pci_reboot,
201 		.ident = "Apple MacBookPro5",
202 		.matches = {
203 			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
204 			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
205 		},
206 	},
207 	{	/* Handle problems with rebooting on Apple Macmini3,1 */
208 		.callback = set_pci_reboot,
209 		.ident = "Apple Macmini3,1",
210 		.matches = {
211 			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
212 			DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
213 		},
214 	},
215 	{	/* Handle problems with rebooting on the iMac9,1. */
216 		.callback = set_pci_reboot,
217 		.ident = "Apple iMac9,1",
218 		.matches = {
219 			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
220 			DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
221 		},
222 	},
223 	{	/* Handle problems with rebooting on the iMac10,1. */
224 		.callback = set_pci_reboot,
225 		.ident = "Apple iMac10,1",
226 		.matches = {
227 		    DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
228 		    DMI_MATCH(DMI_PRODUCT_NAME, "iMac10,1"),
229 		},
230 	},
231 
232 	/* ASRock */
233 	{	/* Handle problems with rebooting on ASRock Q1900DC-ITX */
234 		.callback = set_pci_reboot,
235 		.ident = "ASRock Q1900DC-ITX",
236 		.matches = {
237 			DMI_MATCH(DMI_BOARD_VENDOR, "ASRock"),
238 			DMI_MATCH(DMI_BOARD_NAME, "Q1900DC-ITX"),
239 		},
240 	},
241 
242 	/* ASUS */
243 	{	/* Handle problems with rebooting on ASUS P4S800 */
244 		.callback = set_bios_reboot,
245 		.ident = "ASUS P4S800",
246 		.matches = {
247 			DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
248 			DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
249 		},
250 	},
251 	{	/* Handle problems with rebooting on ASUS EeeBook X205TA */
252 		.callback = set_acpi_reboot,
253 		.ident = "ASUS EeeBook X205TA",
254 		.matches = {
255 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
256 			DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"),
257 		},
258 	},
259 	{	/* Handle problems with rebooting on ASUS EeeBook X205TAW */
260 		.callback = set_acpi_reboot,
261 		.ident = "ASUS EeeBook X205TAW",
262 		.matches = {
263 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
264 			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
265 		},
266 	},
267 
268 	/* Certec */
269 	{       /* Handle problems with rebooting on Certec BPC600 */
270 		.callback = set_pci_reboot,
271 		.ident = "Certec BPC600",
272 		.matches = {
273 			DMI_MATCH(DMI_SYS_VENDOR, "Certec"),
274 			DMI_MATCH(DMI_PRODUCT_NAME, "BPC600"),
275 		},
276 	},
277 
278 	/* Dell */
279 	{	/* Handle problems with rebooting on Dell DXP061 */
280 		.callback = set_bios_reboot,
281 		.ident = "Dell DXP061",
282 		.matches = {
283 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
284 			DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"),
285 		},
286 	},
287 	{	/* Handle problems with rebooting on Dell E520's */
288 		.callback = set_bios_reboot,
289 		.ident = "Dell E520",
290 		.matches = {
291 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
292 			DMI_MATCH(DMI_PRODUCT_NAME, "Dell DM061"),
293 		},
294 	},
295 	{	/* Handle problems with rebooting on the Latitude E5410. */
296 		.callback = set_pci_reboot,
297 		.ident = "Dell Latitude E5410",
298 		.matches = {
299 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
300 			DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5410"),
301 		},
302 	},
303 	{	/* Handle problems with rebooting on the Latitude E5420. */
304 		.callback = set_pci_reboot,
305 		.ident = "Dell Latitude E5420",
306 		.matches = {
307 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
308 			DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"),
309 		},
310 	},
311 	{	/* Handle problems with rebooting on the Latitude E6320. */
312 		.callback = set_pci_reboot,
313 		.ident = "Dell Latitude E6320",
314 		.matches = {
315 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
316 			DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"),
317 		},
318 	},
319 	{	/* Handle problems with rebooting on the Latitude E6420. */
320 		.callback = set_pci_reboot,
321 		.ident = "Dell Latitude E6420",
322 		.matches = {
323 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
324 			DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"),
325 		},
326 	},
327 	{	/* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
328 		.callback = set_bios_reboot,
329 		.ident = "Dell OptiPlex 330",
330 		.matches = {
331 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
332 			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"),
333 			DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
334 		},
335 	},
336 	{	/* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */
337 		.callback = set_bios_reboot,
338 		.ident = "Dell OptiPlex 360",
339 		.matches = {
340 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
341 			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 360"),
342 			DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
343 		},
344 	},
345 	{	/* Handle problems with rebooting on Dell Optiplex 745's SFF */
346 		.callback = set_bios_reboot,
347 		.ident = "Dell OptiPlex 745",
348 		.matches = {
349 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
350 			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
351 		},
352 	},
353 	{	/* Handle problems with rebooting on Dell Optiplex 745's DFF */
354 		.callback = set_bios_reboot,
355 		.ident = "Dell OptiPlex 745",
356 		.matches = {
357 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
358 			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
359 			DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
360 		},
361 	},
362 	{	/* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
363 		.callback = set_bios_reboot,
364 		.ident = "Dell OptiPlex 745",
365 		.matches = {
366 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
367 			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
368 			DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
369 		},
370 	},
371 	{	/* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G */
372 		.callback = set_bios_reboot,
373 		.ident = "Dell OptiPlex 760",
374 		.matches = {
375 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
376 			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"),
377 			DMI_MATCH(DMI_BOARD_NAME, "0G919G"),
378 		},
379 	},
380 	{	/* Handle problems with rebooting on the OptiPlex 990. */
381 		.callback = set_pci_reboot,
382 		.ident = "Dell OptiPlex 990 BIOS A0x",
383 		.matches = {
384 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
385 			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
386 			DMI_MATCH(DMI_BIOS_VERSION, "A0"),
387 		},
388 	},
389 	{	/* Handle problems with rebooting on Dell 300's */
390 		.callback = set_bios_reboot,
391 		.ident = "Dell PowerEdge 300",
392 		.matches = {
393 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
394 			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
395 		},
396 	},
397 	{	/* Handle problems with rebooting on Dell 1300's */
398 		.callback = set_bios_reboot,
399 		.ident = "Dell PowerEdge 1300",
400 		.matches = {
401 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
402 			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"),
403 		},
404 	},
405 	{	/* Handle problems with rebooting on Dell 2400's */
406 		.callback = set_bios_reboot,
407 		.ident = "Dell PowerEdge 2400",
408 		.matches = {
409 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
410 			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
411 		},
412 	},
413 	{	/* Handle problems with rebooting on the Dell PowerEdge C6100. */
414 		.callback = set_pci_reboot,
415 		.ident = "Dell PowerEdge C6100",
416 		.matches = {
417 			DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
418 			DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
419 		},
420 	},
421 	{	/* Handle problems with rebooting on the Precision M6600. */
422 		.callback = set_pci_reboot,
423 		.ident = "Dell Precision M6600",
424 		.matches = {
425 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
426 			DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
427 		},
428 	},
429 	{	/* Handle problems with rebooting on Dell T5400's */
430 		.callback = set_bios_reboot,
431 		.ident = "Dell Precision T5400",
432 		.matches = {
433 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
434 			DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
435 		},
436 	},
437 	{	/* Handle problems with rebooting on Dell T7400's */
438 		.callback = set_bios_reboot,
439 		.ident = "Dell Precision T7400",
440 		.matches = {
441 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
442 			DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T7400"),
443 		},
444 	},
445 	{	/* Handle problems with rebooting on Dell XPS710 */
446 		.callback = set_bios_reboot,
447 		.ident = "Dell XPS710",
448 		.matches = {
449 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
450 			DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
451 		},
452 	},
453 	{	/* Handle problems with rebooting on Dell Optiplex 7450 AIO */
454 		.callback = set_acpi_reboot,
455 		.ident = "Dell OptiPlex 7450 AIO",
456 		.matches = {
457 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
458 			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7450 AIO"),
459 		},
460 	},
461 
462 	/* Hewlett-Packard */
463 	{	/* Handle problems with rebooting on HP laptops */
464 		.callback = set_bios_reboot,
465 		.ident = "HP Compaq Laptop",
466 		.matches = {
467 			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
468 			DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
469 		},
470 	},
471 
472 	{	/* PCIe Wifi card isn't detected after reboot otherwise */
473 		.callback = set_pci_reboot,
474 		.ident = "Zotac ZBOX CI327 nano",
475 		.matches = {
476 			DMI_MATCH(DMI_SYS_VENDOR, "NA"),
477 			DMI_MATCH(DMI_PRODUCT_NAME, "ZBOX-CI327NANO-GS-01"),
478 		},
479 	},
480 
481 	/* Sony */
482 	{	/* Handle problems with rebooting on Sony VGN-Z540N */
483 		.callback = set_bios_reboot,
484 		.ident = "Sony VGN-Z540N",
485 		.matches = {
486 			DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
487 			DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
488 		},
489 	},
490 
491 	{ }
492 };
493 
reboot_init(void)494 static int __init reboot_init(void)
495 {
496 	int rv;
497 
498 	/*
499 	 * Only do the DMI check if reboot_type hasn't been overridden
500 	 * on the command line
501 	 */
502 	if (!reboot_default)
503 		return 0;
504 
505 	/*
506 	 * The DMI quirks table takes precedence. If no quirks entry
507 	 * matches and the ACPI Hardware Reduced bit is set and EFI
508 	 * runtime services are enabled, force EFI reboot.
509 	 */
510 	rv = dmi_check_system(reboot_dmi_table);
511 
512 	if (!rv && efi_reboot_required() && !efi_runtime_disabled())
513 		reboot_type = BOOT_EFI;
514 
515 	return 0;
516 }
517 core_initcall(reboot_init);
518 
kb_wait(void)519 static inline void kb_wait(void)
520 {
521 	int i;
522 
523 	for (i = 0; i < 0x10000; i++) {
524 		if ((inb(0x64) & 0x02) == 0)
525 			break;
526 		udelay(2);
527 	}
528 }
529 
530 static inline void nmi_shootdown_cpus_on_restart(void);
531 
532 #if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
533 /* RCU-protected callback to disable virtualization prior to reboot. */
534 static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
535 
cpu_emergency_register_virt_callback(cpu_emergency_virt_cb * callback)536 void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
537 {
538 	if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback)))
539 		return;
540 
541 	rcu_assign_pointer(cpu_emergency_virt_callback, callback);
542 }
543 EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback);
544 
cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb * callback)545 void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
546 {
547 	if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback))
548 		return;
549 
550 	rcu_assign_pointer(cpu_emergency_virt_callback, NULL);
551 	synchronize_rcu();
552 }
553 EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
554 
555 /*
556  * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
557  * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
558  * GIF=0, i.e. if the crash occurred between CLGI and STGI.
559  */
cpu_emergency_disable_virtualization(void)560 void cpu_emergency_disable_virtualization(void)
561 {
562 	cpu_emergency_virt_cb *callback;
563 
564 	/*
565 	 * IRQs must be disabled as KVM enables virtualization in hardware via
566 	 * function call IPIs, i.e. IRQs need to be disabled to guarantee
567 	 * virtualization stays disabled.
568 	 */
569 	lockdep_assert_irqs_disabled();
570 
571 	rcu_read_lock();
572 	callback = rcu_dereference(cpu_emergency_virt_callback);
573 	if (callback)
574 		callback();
575 	rcu_read_unlock();
576 }
577 
emergency_reboot_disable_virtualization(void)578 static void emergency_reboot_disable_virtualization(void)
579 {
580 	local_irq_disable();
581 
582 	/*
583 	 * Disable virtualization on all CPUs before rebooting to avoid hanging
584 	 * the system, as VMX and SVM block INIT when running in the host.
585 	 *
586 	 * We can't take any locks and we may be on an inconsistent state, so
587 	 * use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt.
588 	 *
589 	 * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
590 	 * other CPUs may have virtualization enabled.
591 	 */
592 	if (rcu_access_pointer(cpu_emergency_virt_callback)) {
593 		/* Safely force _this_ CPU out of VMX/SVM operation. */
594 		cpu_emergency_disable_virtualization();
595 
596 		/* Disable VMX/SVM and halt on other CPUs. */
597 		nmi_shootdown_cpus_on_restart();
598 	}
599 }
600 #else
emergency_reboot_disable_virtualization(void)601 static void emergency_reboot_disable_virtualization(void) { }
602 #endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
603 
mach_reboot_fixups(void)604 void __attribute__((weak)) mach_reboot_fixups(void)
605 {
606 }
607 
608 /*
609  * To the best of our knowledge Windows compatible x86 hardware expects
610  * the following on reboot:
611  *
612  * 1) If the FADT has the ACPI reboot register flag set, try it
613  * 2) If still alive, write to the keyboard controller
614  * 3) If still alive, write to the ACPI reboot register again
615  * 4) If still alive, write to the keyboard controller again
616  * 5) If still alive, call the EFI runtime service to reboot
617  * 6) If no EFI runtime service, call the BIOS to do a reboot
618  *
619  * We default to following the same pattern. We also have
620  * two other reboot methods: 'triple fault' and 'PCI', which
621  * can be triggered via the reboot= kernel boot option or
622  * via quirks.
623  *
624  * This means that this function can never return, it can misbehave
625  * by not rebooting properly and hanging.
626  */
native_machine_emergency_restart(void)627 static void native_machine_emergency_restart(void)
628 {
629 	int i;
630 	int attempt = 0;
631 	int orig_reboot_type = reboot_type;
632 	unsigned short mode;
633 
634 	if (reboot_emergency)
635 		emergency_reboot_disable_virtualization();
636 
637 	tboot_shutdown(TB_SHUTDOWN_REBOOT);
638 
639 	/* Tell the BIOS if we want cold or warm reboot */
640 	mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0;
641 	*((unsigned short *)__va(0x472)) = mode;
642 
643 	/*
644 	 * If an EFI capsule has been registered with the firmware then
645 	 * override the reboot= parameter.
646 	 */
647 	if (efi_capsule_pending(NULL)) {
648 		pr_info("EFI capsule is pending, forcing EFI reboot.\n");
649 		reboot_type = BOOT_EFI;
650 	}
651 
652 	for (;;) {
653 		/* Could also try the reset bit in the Hammer NB */
654 		switch (reboot_type) {
655 		case BOOT_ACPI:
656 			acpi_reboot();
657 			reboot_type = BOOT_KBD;
658 			break;
659 
660 		case BOOT_KBD:
661 			mach_reboot_fixups(); /* For board specific fixups */
662 
663 			for (i = 0; i < 10; i++) {
664 				kb_wait();
665 				udelay(50);
666 				outb(0xfe, 0x64); /* Pulse reset low */
667 				udelay(50);
668 			}
669 			if (attempt == 0 && orig_reboot_type == BOOT_ACPI) {
670 				attempt = 1;
671 				reboot_type = BOOT_ACPI;
672 			} else {
673 				reboot_type = BOOT_EFI;
674 			}
675 			break;
676 
677 		case BOOT_EFI:
678 			efi_reboot(reboot_mode, NULL);
679 			reboot_type = BOOT_BIOS;
680 			break;
681 
682 		case BOOT_BIOS:
683 			machine_real_restart(MRR_BIOS);
684 
685 			/* We're probably dead after this, but... */
686 			reboot_type = BOOT_CF9_SAFE;
687 			break;
688 
689 		case BOOT_CF9_FORCE:
690 			port_cf9_safe = true;
691 			fallthrough;
692 
693 		case BOOT_CF9_SAFE:
694 			if (port_cf9_safe) {
695 				u8 reboot_code = reboot_mode == REBOOT_WARM ?  0x06 : 0x0E;
696 				u8 cf9 = inb(0xcf9) & ~reboot_code;
697 				outb(cf9|2, 0xcf9); /* Request hard reset */
698 				udelay(50);
699 				/* Actually do the reset */
700 				outb(cf9|reboot_code, 0xcf9);
701 				udelay(50);
702 			}
703 			reboot_type = BOOT_TRIPLE;
704 			break;
705 
706 		case BOOT_TRIPLE:
707 			idt_invalidate();
708 			__asm__ __volatile__("int3");
709 
710 			/* We're probably dead after this, but... */
711 			reboot_type = BOOT_KBD;
712 			break;
713 		}
714 	}
715 }
716 
native_machine_shutdown(void)717 void native_machine_shutdown(void)
718 {
719 	/* Stop the cpus and apics */
720 #ifdef CONFIG_X86_IO_APIC
721 	/*
722 	 * Disabling IO APIC before local APIC is a workaround for
723 	 * erratum AVR31 in "Intel Atom Processor C2000 Product Family
724 	 * Specification Update". In this situation, interrupts that target
725 	 * a Logical Processor whose Local APIC is either in the process of
726 	 * being hardware disabled or software disabled are neither delivered
727 	 * nor discarded. When this erratum occurs, the processor may hang.
728 	 *
729 	 * Even without the erratum, it still makes sense to quiet IO APIC
730 	 * before disabling Local APIC.
731 	 */
732 	clear_IO_APIC();
733 #endif
734 
735 #ifdef CONFIG_SMP
736 	/*
737 	 * Stop all of the others. Also disable the local irq to
738 	 * not receive the per-cpu timer interrupt which may trigger
739 	 * scheduler's load balance.
740 	 */
741 	local_irq_disable();
742 	stop_other_cpus();
743 #endif
744 
745 	lapic_shutdown();
746 	restore_boot_irq_mode();
747 
748 #ifdef CONFIG_HPET_TIMER
749 	hpet_disable();
750 #endif
751 
752 #ifdef CONFIG_X86_64
753 	x86_platform.iommu_shutdown();
754 #endif
755 }
756 
__machine_emergency_restart(int emergency)757 static void __machine_emergency_restart(int emergency)
758 {
759 	reboot_emergency = emergency;
760 	machine_ops.emergency_restart();
761 }
762 
native_machine_restart(char * __unused)763 static void native_machine_restart(char *__unused)
764 {
765 	pr_notice("machine restart\n");
766 
767 	if (!reboot_force)
768 		machine_shutdown();
769 	__machine_emergency_restart(0);
770 }
771 
native_machine_halt(void)772 static void native_machine_halt(void)
773 {
774 	/* Stop other cpus and apics */
775 	machine_shutdown();
776 
777 	tboot_shutdown(TB_SHUTDOWN_HALT);
778 
779 	stop_this_cpu(NULL);
780 }
781 
native_machine_power_off(void)782 static void native_machine_power_off(void)
783 {
784 	if (kernel_can_power_off()) {
785 		if (!reboot_force)
786 			machine_shutdown();
787 		do_kernel_power_off();
788 	}
789 	/* A fallback in case there is no PM info available */
790 	tboot_shutdown(TB_SHUTDOWN_HALT);
791 }
792 
793 struct machine_ops machine_ops __ro_after_init = {
794 	.power_off = native_machine_power_off,
795 	.shutdown = native_machine_shutdown,
796 	.emergency_restart = native_machine_emergency_restart,
797 	.restart = native_machine_restart,
798 	.halt = native_machine_halt,
799 #ifdef CONFIG_KEXEC_CORE
800 	.crash_shutdown = native_machine_crash_shutdown,
801 #endif
802 };
803 
machine_power_off(void)804 void machine_power_off(void)
805 {
806 	machine_ops.power_off();
807 }
808 
machine_shutdown(void)809 void machine_shutdown(void)
810 {
811 	machine_ops.shutdown();
812 }
813 
machine_emergency_restart(void)814 void machine_emergency_restart(void)
815 {
816 	__machine_emergency_restart(1);
817 }
818 
machine_restart(char * cmd)819 void machine_restart(char *cmd)
820 {
821 	machine_ops.restart(cmd);
822 }
823 
machine_halt(void)824 void machine_halt(void)
825 {
826 	machine_ops.halt();
827 }
828 
829 #ifdef CONFIG_KEXEC_CORE
machine_crash_shutdown(struct pt_regs * regs)830 void machine_crash_shutdown(struct pt_regs *regs)
831 {
832 	machine_ops.crash_shutdown(regs);
833 }
834 #endif
835 
836 /* This is the CPU performing the emergency shutdown work. */
837 int crashing_cpu = -1;
838 
839 #if defined(CONFIG_SMP)
840 
841 static nmi_shootdown_cb shootdown_callback;
842 
843 static atomic_t waiting_for_crash_ipi;
844 static int crash_ipi_issued;
845 
crash_nmi_callback(unsigned int val,struct pt_regs * regs)846 static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
847 {
848 	int cpu;
849 
850 	cpu = raw_smp_processor_id();
851 
852 	/*
853 	 * Don't do anything if this handler is invoked on crashing cpu.
854 	 * Otherwise, system will completely hang. Crashing cpu can get
855 	 * an NMI if system was initially booted with nmi_watchdog parameter.
856 	 */
857 	if (cpu == crashing_cpu)
858 		return NMI_HANDLED;
859 	local_irq_disable();
860 
861 	if (shootdown_callback)
862 		shootdown_callback(cpu, regs);
863 
864 	/*
865 	 * Prepare the CPU for reboot _after_ invoking the callback so that the
866 	 * callback can safely use virtualization instructions, e.g. VMCLEAR.
867 	 */
868 	cpu_emergency_disable_virtualization();
869 
870 	atomic_dec(&waiting_for_crash_ipi);
871 	/* Assume hlt works */
872 	halt();
873 	for (;;)
874 		cpu_relax();
875 
876 	return NMI_HANDLED;
877 }
878 
879 /**
880  * nmi_shootdown_cpus - Stop other CPUs via NMI
881  * @callback:	Optional callback to be invoked from the NMI handler
882  *
883  * The NMI handler on the remote CPUs invokes @callback, if not
884  * NULL, first and then disables virtualization to ensure that
885  * INIT is recognized during reboot.
886  *
887  * nmi_shootdown_cpus() can only be invoked once. After the first
888  * invocation all other CPUs are stuck in crash_nmi_callback() and
889  * cannot respond to a second NMI.
890  */
nmi_shootdown_cpus(nmi_shootdown_cb callback)891 void nmi_shootdown_cpus(nmi_shootdown_cb callback)
892 {
893 	unsigned long msecs;
894 
895 	local_irq_disable();
896 
897 	/*
898 	 * Avoid certain doom if a shootdown already occurred; re-registering
899 	 * the NMI handler will cause list corruption, modifying the callback
900 	 * will do who knows what, etc...
901 	 */
902 	if (WARN_ON_ONCE(crash_ipi_issued))
903 		return;
904 
905 	/* Make a note of crashing cpu. Will be used in NMI callback. */
906 	crashing_cpu = safe_smp_processor_id();
907 
908 	shootdown_callback = callback;
909 
910 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
911 	/* Would it be better to replace the trap vector here? */
912 	if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
913 				 NMI_FLAG_FIRST, "crash"))
914 		return;		/* Return what? */
915 	/*
916 	 * Ensure the new callback function is set before sending
917 	 * out the NMI
918 	 */
919 	wmb();
920 
921 	apic_send_IPI_allbutself(NMI_VECTOR);
922 
923 	/* Kick CPUs looping in NMI context. */
924 	WRITE_ONCE(crash_ipi_issued, 1);
925 
926 	msecs = 1000; /* Wait at most a second for the other cpus to stop */
927 	while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
928 		mdelay(1);
929 		msecs--;
930 	}
931 
932 	/*
933 	 * Leave the nmi callback set, shootdown is a one-time thing.  Clearing
934 	 * the callback could result in a NULL pointer dereference if a CPU
935 	 * (finally) responds after the timeout expires.
936 	 */
937 }
938 
nmi_shootdown_cpus_on_restart(void)939 static inline void nmi_shootdown_cpus_on_restart(void)
940 {
941 	if (!crash_ipi_issued)
942 		nmi_shootdown_cpus(NULL);
943 }
944 
945 /*
946  * Check if the crash dumping IPI got issued and if so, call its callback
947  * directly. This function is used when we have already been in NMI handler.
948  * It doesn't return.
949  */
run_crash_ipi_callback(struct pt_regs * regs)950 void run_crash_ipi_callback(struct pt_regs *regs)
951 {
952 	if (crash_ipi_issued)
953 		crash_nmi_callback(0, regs);
954 }
955 
956 /* Override the weak function in kernel/panic.c */
nmi_panic_self_stop(struct pt_regs * regs)957 void __noreturn nmi_panic_self_stop(struct pt_regs *regs)
958 {
959 	while (1) {
960 		/* If no CPU is preparing crash dump, we simply loop here. */
961 		run_crash_ipi_callback(regs);
962 		cpu_relax();
963 	}
964 }
965 
966 #else /* !CONFIG_SMP */
nmi_shootdown_cpus(nmi_shootdown_cb callback)967 void nmi_shootdown_cpus(nmi_shootdown_cb callback)
968 {
969 	/* No other CPUs to shoot down */
970 }
971 
nmi_shootdown_cpus_on_restart(void)972 static inline void nmi_shootdown_cpus_on_restart(void) { }
973 
run_crash_ipi_callback(struct pt_regs * regs)974 void run_crash_ipi_callback(struct pt_regs *regs)
975 {
976 }
977 #endif
978