xref: /openbmc/linux/drivers/hv/hv_common.c (revision 44676bb9)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Architecture neutral utility routines for interacting with
5  * Hyper-V. This file is specifically for code that must be
6  * built-in to the kernel image when CONFIG_HYPERV is set
7  * (vs. being in a module) because it is called from architecture
8  * specific code under arch/.
9  *
10  * Copyright (C) 2021, Microsoft, Inc.
11  *
12  * Author : Michael Kelley <mikelley@microsoft.com>
13  */
14 
15 #include <linux/types.h>
16 #include <linux/acpi.h>
17 #include <linux/export.h>
18 #include <linux/bitfield.h>
19 #include <linux/cpumask.h>
20 #include <linux/sched/task_stack.h>
21 #include <linux/panic_notifier.h>
22 #include <linux/ptrace.h>
23 #include <linux/kdebug.h>
24 #include <linux/kmsg_dump.h>
25 #include <linux/slab.h>
26 #include <linux/dma-map-ops.h>
27 #include <linux/set_memory.h>
28 #include <asm/hyperv-tlfs.h>
29 #include <asm/mshyperv.h>
30 
31 /*
32  * hv_root_partition, ms_hyperv and hv_nested are defined here with other
33  * Hyper-V specific globals so they are shared across all architectures and are
34  * built only when CONFIG_HYPERV is defined.  But on x86,
35  * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
36  * defined, and it uses these three variables.  So mark them as __weak
37  * here, allowing for an overriding definition in the module containing
38  * ms_hyperv_init_platform().
39  */
40 bool __weak hv_root_partition;
41 EXPORT_SYMBOL_GPL(hv_root_partition);
42 
43 bool __weak hv_nested;
44 EXPORT_SYMBOL_GPL(hv_nested);
45 
46 struct ms_hyperv_info __weak ms_hyperv;
47 EXPORT_SYMBOL_GPL(ms_hyperv);
48 
49 u32 *hv_vp_index;
50 EXPORT_SYMBOL_GPL(hv_vp_index);
51 
52 u32 hv_max_vp_index;
53 EXPORT_SYMBOL_GPL(hv_max_vp_index);
54 
55 void * __percpu *hyperv_pcpu_input_arg;
56 EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
57 
58 void * __percpu *hyperv_pcpu_output_arg;
59 EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
60 
61 static void hv_kmsg_dump_unregister(void);
62 
63 static struct ctl_table_header *hv_ctl_table_hdr;
64 
65 /*
66  * Hyper-V specific initialization and shutdown code that is
67  * common across all architectures.  Called from architecture
68  * specific initialization functions.
69  */
70 
71 void __init hv_common_free(void)
72 {
73 	unregister_sysctl_table(hv_ctl_table_hdr);
74 	hv_ctl_table_hdr = NULL;
75 
76 	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
77 		hv_kmsg_dump_unregister();
78 
79 	kfree(hv_vp_index);
80 	hv_vp_index = NULL;
81 
82 	free_percpu(hyperv_pcpu_output_arg);
83 	hyperv_pcpu_output_arg = NULL;
84 
85 	free_percpu(hyperv_pcpu_input_arg);
86 	hyperv_pcpu_input_arg = NULL;
87 }
88 
89 /*
90  * Functions for allocating and freeing memory with size and
91  * alignment HV_HYP_PAGE_SIZE. These functions are needed because
92  * the guest page size may not be the same as the Hyper-V page
93  * size. We depend upon kmalloc() aligning power-of-two size
94  * allocations to the allocation size boundary, so that the
95  * allocated memory appears to Hyper-V as a page of the size
96  * it expects.
97  */
98 
99 void *hv_alloc_hyperv_page(void)
100 {
101 	BUILD_BUG_ON(PAGE_SIZE <  HV_HYP_PAGE_SIZE);
102 
103 	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
104 		return (void *)__get_free_page(GFP_KERNEL);
105 	else
106 		return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
107 }
108 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
109 
110 void *hv_alloc_hyperv_zeroed_page(void)
111 {
112 	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
113 		return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
114 	else
115 		return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
116 }
117 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
118 
119 void hv_free_hyperv_page(unsigned long addr)
120 {
121 	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
122 		free_page(addr);
123 	else
124 		kfree((void *)addr);
125 }
126 EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
127 
128 static void *hv_panic_page;
129 
130 /*
131  * Boolean to control whether to report panic messages over Hyper-V.
132  *
133  * It can be set via /proc/sys/kernel/hyperv_record_panic_msg
134  */
135 static int sysctl_record_panic_msg = 1;
136 
137 /*
138  * sysctl option to allow the user to control whether kmsg data should be
139  * reported to Hyper-V on panic.
140  */
141 static struct ctl_table hv_ctl_table[] = {
142 	{
143 		.procname	= "hyperv_record_panic_msg",
144 		.data		= &sysctl_record_panic_msg,
145 		.maxlen		= sizeof(int),
146 		.mode		= 0644,
147 		.proc_handler	= proc_dointvec_minmax,
148 		.extra1		= SYSCTL_ZERO,
149 		.extra2		= SYSCTL_ONE
150 	},
151 	{}
152 };
153 
154 static int hv_die_panic_notify_crash(struct notifier_block *self,
155 				     unsigned long val, void *args);
156 
157 static struct notifier_block hyperv_die_report_block = {
158 	.notifier_call = hv_die_panic_notify_crash,
159 };
160 
161 static struct notifier_block hyperv_panic_report_block = {
162 	.notifier_call = hv_die_panic_notify_crash,
163 };
164 
165 /*
166  * The following callback works both as die and panic notifier; its
167  * goal is to provide panic information to the hypervisor unless the
168  * kmsg dumper is used [see hv_kmsg_dump()], which provides more
169  * information but isn't always available.
170  *
171  * Notice that both the panic/die report notifiers are registered only
172  * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
173  */
174 static int hv_die_panic_notify_crash(struct notifier_block *self,
175 				     unsigned long val, void *args)
176 {
177 	struct pt_regs *regs;
178 	bool is_die;
179 
180 	/* Don't notify Hyper-V unless we have a die oops event or panic. */
181 	if (self == &hyperv_panic_report_block) {
182 		is_die = false;
183 		regs = current_pt_regs();
184 	} else { /* die event */
185 		if (val != DIE_OOPS)
186 			return NOTIFY_DONE;
187 
188 		is_die = true;
189 		regs = ((struct die_args *)args)->regs;
190 	}
191 
192 	/*
193 	 * Hyper-V should be notified only once about a panic/die. If we will
194 	 * be calling hv_kmsg_dump() later with kmsg data, don't do the
195 	 * notification here.
196 	 */
197 	if (!sysctl_record_panic_msg || !hv_panic_page)
198 		hyperv_report_panic(regs, val, is_die);
199 
200 	return NOTIFY_DONE;
201 }
202 
203 /*
204  * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
205  * buffer and call into Hyper-V to transfer the data.
206  */
207 static void hv_kmsg_dump(struct kmsg_dumper *dumper,
208 			 enum kmsg_dump_reason reason)
209 {
210 	struct kmsg_dump_iter iter;
211 	size_t bytes_written;
212 
213 	/* We are only interested in panics. */
214 	if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg)
215 		return;
216 
217 	/*
218 	 * Write dump contents to the page. No need to synchronize; panic should
219 	 * be single-threaded.
220 	 */
221 	kmsg_dump_rewind(&iter);
222 	kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
223 			     &bytes_written);
224 	if (!bytes_written)
225 		return;
226 	/*
227 	 * P3 to contain the physical address of the panic page & P4 to
228 	 * contain the size of the panic data in that page. Rest of the
229 	 * registers are no-op when the NOTIFY_MSG flag is set.
230 	 */
231 	hv_set_register(HV_REGISTER_CRASH_P0, 0);
232 	hv_set_register(HV_REGISTER_CRASH_P1, 0);
233 	hv_set_register(HV_REGISTER_CRASH_P2, 0);
234 	hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page));
235 	hv_set_register(HV_REGISTER_CRASH_P4, bytes_written);
236 
237 	/*
238 	 * Let Hyper-V know there is crash data available along with
239 	 * the panic message.
240 	 */
241 	hv_set_register(HV_REGISTER_CRASH_CTL,
242 			(HV_CRASH_CTL_CRASH_NOTIFY |
243 			 HV_CRASH_CTL_CRASH_NOTIFY_MSG));
244 }
245 
246 static struct kmsg_dumper hv_kmsg_dumper = {
247 	.dump = hv_kmsg_dump,
248 };
249 
250 static void hv_kmsg_dump_unregister(void)
251 {
252 	kmsg_dump_unregister(&hv_kmsg_dumper);
253 	unregister_die_notifier(&hyperv_die_report_block);
254 	atomic_notifier_chain_unregister(&panic_notifier_list,
255 					 &hyperv_panic_report_block);
256 
257 	hv_free_hyperv_page((unsigned long)hv_panic_page);
258 	hv_panic_page = NULL;
259 }
260 
261 static void hv_kmsg_dump_register(void)
262 {
263 	int ret;
264 
265 	hv_panic_page = hv_alloc_hyperv_zeroed_page();
266 	if (!hv_panic_page) {
267 		pr_err("Hyper-V: panic message page memory allocation failed\n");
268 		return;
269 	}
270 
271 	ret = kmsg_dump_register(&hv_kmsg_dumper);
272 	if (ret) {
273 		pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
274 		hv_free_hyperv_page((unsigned long)hv_panic_page);
275 		hv_panic_page = NULL;
276 	}
277 }
278 
279 int __init hv_common_init(void)
280 {
281 	int i;
282 
283 	if (hv_is_isolation_supported())
284 		sysctl_record_panic_msg = 0;
285 
286 	/*
287 	 * Hyper-V expects to get crash register data or kmsg when
288 	 * crash enlightment is available and system crashes. Set
289 	 * crash_kexec_post_notifiers to be true to make sure that
290 	 * calling crash enlightment interface before running kdump
291 	 * kernel.
292 	 */
293 	if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
294 		u64 hyperv_crash_ctl;
295 
296 		crash_kexec_post_notifiers = true;
297 		pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n");
298 
299 		/*
300 		 * Panic message recording (sysctl_record_panic_msg)
301 		 * is enabled by default in non-isolated guests and
302 		 * disabled by default in isolated guests; the panic
303 		 * message recording won't be available in isolated
304 		 * guests should the following registration fail.
305 		 */
306 		hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table);
307 		if (!hv_ctl_table_hdr)
308 			pr_err("Hyper-V: sysctl table register error");
309 
310 		/*
311 		 * Register for panic kmsg callback only if the right
312 		 * capability is supported by the hypervisor.
313 		 */
314 		hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL);
315 		if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
316 			hv_kmsg_dump_register();
317 
318 		register_die_notifier(&hyperv_die_report_block);
319 		atomic_notifier_chain_register(&panic_notifier_list,
320 					       &hyperv_panic_report_block);
321 	}
322 
323 	/*
324 	 * Allocate the per-CPU state for the hypercall input arg.
325 	 * If this allocation fails, we will not be able to setup
326 	 * (per-CPU) hypercall input page and thus this failure is
327 	 * fatal on Hyper-V.
328 	 */
329 	hyperv_pcpu_input_arg = alloc_percpu(void  *);
330 	BUG_ON(!hyperv_pcpu_input_arg);
331 
332 	/* Allocate the per-CPU state for output arg for root */
333 	if (hv_root_partition) {
334 		hyperv_pcpu_output_arg = alloc_percpu(void *);
335 		BUG_ON(!hyperv_pcpu_output_arg);
336 	}
337 
338 	hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
339 				    GFP_KERNEL);
340 	if (!hv_vp_index) {
341 		hv_common_free();
342 		return -ENOMEM;
343 	}
344 
345 	for (i = 0; i < num_possible_cpus(); i++)
346 		hv_vp_index[i] = VP_INVAL;
347 
348 	return 0;
349 }
350 
351 /*
352  * Hyper-V specific initialization and die code for
353  * individual CPUs that is common across all architectures.
354  * Called by the CPU hotplug mechanism.
355  */
356 
357 int hv_common_cpu_init(unsigned int cpu)
358 {
359 	void **inputarg, **outputarg;
360 	u64 msr_vp_index;
361 	gfp_t flags;
362 	int pgcount = hv_root_partition ? 2 : 1;
363 	int ret;
364 
365 	/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
366 	flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
367 
368 	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
369 
370 	/*
371 	 * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
372 	 * allocated if this CPU was previously online and then taken offline
373 	 */
374 	if (!*inputarg) {
375 		*inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
376 		if (!(*inputarg))
377 			return -ENOMEM;
378 
379 		if (hv_root_partition) {
380 			outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
381 			*outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
382 		}
383 
384 		if (hv_isolation_type_en_snp()) {
385 			ret = set_memory_decrypted((unsigned long)*inputarg, pgcount);
386 			if (ret) {
387 				kfree(*inputarg);
388 				*inputarg = NULL;
389 				return ret;
390 			}
391 
392 			memset(*inputarg, 0x00, pgcount * PAGE_SIZE);
393 		}
394 	}
395 
396 	msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
397 
398 	hv_vp_index[cpu] = msr_vp_index;
399 
400 	if (msr_vp_index > hv_max_vp_index)
401 		hv_max_vp_index = msr_vp_index;
402 
403 	return 0;
404 }
405 
406 int hv_common_cpu_die(unsigned int cpu)
407 {
408 	/*
409 	 * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
410 	 * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
411 	 * may be used by the Hyper-V vPCI driver in reassigning interrupts
412 	 * as part of the offlining process.  The interrupt reassignment
413 	 * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and
414 	 * called this function.
415 	 *
416 	 * If a previously offlined CPU is brought back online again, the
417 	 * originally allocated memory is reused in hv_common_cpu_init().
418 	 */
419 
420 	return 0;
421 }
422 
423 /* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
424 bool hv_query_ext_cap(u64 cap_query)
425 {
426 	/*
427 	 * The address of the 'hv_extended_cap' variable will be used as an
428 	 * output parameter to the hypercall below and so it should be
429 	 * compatible with 'virt_to_phys'. Which means, it's address should be
430 	 * directly mapped. Use 'static' to keep it compatible; stack variables
431 	 * can be virtually mapped, making them incompatible with
432 	 * 'virt_to_phys'.
433 	 * Hypercall input/output addresses should also be 8-byte aligned.
434 	 */
435 	static u64 hv_extended_cap __aligned(8);
436 	static bool hv_extended_cap_queried;
437 	u64 status;
438 
439 	/*
440 	 * Querying extended capabilities is an extended hypercall. Check if the
441 	 * partition supports extended hypercall, first.
442 	 */
443 	if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
444 		return false;
445 
446 	/* Extended capabilities do not change at runtime. */
447 	if (hv_extended_cap_queried)
448 		return hv_extended_cap & cap_query;
449 
450 	status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
451 				 &hv_extended_cap);
452 
453 	/*
454 	 * The query extended capabilities hypercall should not fail under
455 	 * any normal circumstances. Avoid repeatedly making the hypercall, on
456 	 * error.
457 	 */
458 	hv_extended_cap_queried = true;
459 	if (!hv_result_success(status)) {
460 		pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
461 		       status);
462 		return false;
463 	}
464 
465 	return hv_extended_cap & cap_query;
466 }
467 EXPORT_SYMBOL_GPL(hv_query_ext_cap);
468 
469 void hv_setup_dma_ops(struct device *dev, bool coherent)
470 {
471 	/*
472 	 * Hyper-V does not offer a vIOMMU in the guest
473 	 * VM, so pass 0/NULL for the IOMMU settings
474 	 */
475 	arch_setup_dma_ops(dev, 0, 0, NULL, coherent);
476 }
477 EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
478 
479 bool hv_is_hibernation_supported(void)
480 {
481 	return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
482 }
483 EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
484 
485 /*
486  * Default function to read the Hyper-V reference counter, independent
487  * of whether Hyper-V enlightened clocks/timers are being used. But on
488  * architectures where it is used, Hyper-V enlightenment code in
489  * hyperv_timer.c may override this function.
490  */
491 static u64 __hv_read_ref_counter(void)
492 {
493 	return hv_get_register(HV_REGISTER_TIME_REF_COUNT);
494 }
495 
496 u64 (*hv_read_reference_counter)(void) = __hv_read_ref_counter;
497 EXPORT_SYMBOL_GPL(hv_read_reference_counter);
498 
499 /* These __weak functions provide default "no-op" behavior and
500  * may be overridden by architecture specific versions. Architectures
501  * for which the default "no-op" behavior is sufficient can leave
502  * them unimplemented and not be cluttered with a bunch of stub
503  * functions in arch-specific code.
504  */
505 
506 bool __weak hv_is_isolation_supported(void)
507 {
508 	return false;
509 }
510 EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
511 
512 bool __weak hv_isolation_type_snp(void)
513 {
514 	return false;
515 }
516 EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
517 
518 bool __weak hv_isolation_type_en_snp(void)
519 {
520 	return false;
521 }
522 EXPORT_SYMBOL_GPL(hv_isolation_type_en_snp);
523 
524 void __weak hv_setup_vmbus_handler(void (*handler)(void))
525 {
526 }
527 EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
528 
529 void __weak hv_remove_vmbus_handler(void)
530 {
531 }
532 EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler);
533 
534 void __weak hv_setup_kexec_handler(void (*handler)(void))
535 {
536 }
537 EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
538 
539 void __weak hv_remove_kexec_handler(void)
540 {
541 }
542 EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
543 
544 void __weak hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
545 {
546 }
547 EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
548 
549 void __weak hv_remove_crash_handler(void)
550 {
551 }
552 EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
553 
554 void __weak hyperv_cleanup(void)
555 {
556 }
557 EXPORT_SYMBOL_GPL(hyperv_cleanup);
558 
559 u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
560 {
561 	return HV_STATUS_INVALID_PARAMETER;
562 }
563 EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
564