xref: /openbmc/linux/drivers/hv/hv.c (revision 68f2f2bc)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2009, Microsoft Corporation.
4  *
5  * Authors:
6  *   Haiyang Zhang <haiyangz@microsoft.com>
7  *   Hank Janssen  <hjanssen@microsoft.com>
8  */
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 
11 #include <linux/io.h>
12 #include <linux/kernel.h>
13 #include <linux/mm.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/hyperv.h>
17 #include <linux/random.h>
18 #include <linux/clockchips.h>
19 #include <linux/delay.h>
20 #include <linux/interrupt.h>
21 #include <clocksource/hyperv_timer.h>
22 #include <asm/mshyperv.h>
23 #include <linux/set_memory.h>
24 #include "hyperv_vmbus.h"
25 
26 /* The one and only */
27 struct hv_context hv_context;
28 
29 /*
30  * hv_init - Main initialization routine.
31  *
32  * This routine must be called before any other routines in here are called
33  */
34 int hv_init(void)
35 {
36 	hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
37 	if (!hv_context.cpu_context)
38 		return -ENOMEM;
39 	return 0;
40 }
41 
42 /*
43  * hv_post_message - Post a message using the hypervisor message IPC.
44  *
45  * This involves a hypercall.
46  */
47 int hv_post_message(union hv_connection_id connection_id,
48 		  enum hv_message_type message_type,
49 		  void *payload, size_t payload_size)
50 {
51 	struct hv_input_post_message *aligned_msg;
52 	unsigned long flags;
53 	u64 status;
54 
55 	if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
56 		return -EMSGSIZE;
57 
58 	local_irq_save(flags);
59 
60 	aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg);
61 	aligned_msg->connectionid = connection_id;
62 	aligned_msg->reserved = 0;
63 	aligned_msg->message_type = message_type;
64 	aligned_msg->payload_size = payload_size;
65 	memcpy((void *)aligned_msg->payload, payload, payload_size);
66 
67 	if (hv_isolation_type_snp())
68 		status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
69 				(void *)aligned_msg, NULL,
70 				sizeof(*aligned_msg));
71 	else
72 		status = hv_do_hypercall(HVCALL_POST_MESSAGE,
73 				aligned_msg, NULL);
74 
75 	local_irq_restore(flags);
76 
77 	return hv_result(status);
78 }
79 
80 int hv_synic_alloc(void)
81 {
82 	int cpu, ret = -ENOMEM;
83 	struct hv_per_cpu_context *hv_cpu;
84 
85 	/*
86 	 * First, zero all per-cpu memory areas so hv_synic_free() can
87 	 * detect what memory has been allocated and cleanup properly
88 	 * after any failures.
89 	 */
90 	for_each_present_cpu(cpu) {
91 		hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
92 		memset(hv_cpu, 0, sizeof(*hv_cpu));
93 	}
94 
95 	hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
96 					 GFP_KERNEL);
97 	if (hv_context.hv_numa_map == NULL) {
98 		pr_err("Unable to allocate NUMA map\n");
99 		goto err;
100 	}
101 
102 	for_each_present_cpu(cpu) {
103 		hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
104 
105 		tasklet_init(&hv_cpu->msg_dpc,
106 			     vmbus_on_msg_dpc, (unsigned long) hv_cpu);
107 
108 		/*
109 		 * Synic message and event pages are allocated by paravisor.
110 		 * Skip these pages allocation here.
111 		 */
112 		if (!hv_isolation_type_snp() && !hv_root_partition) {
113 			hv_cpu->synic_message_page =
114 				(void *)get_zeroed_page(GFP_ATOMIC);
115 			if (hv_cpu->synic_message_page == NULL) {
116 				pr_err("Unable to allocate SYNIC message page\n");
117 				goto err;
118 			}
119 
120 			hv_cpu->synic_event_page =
121 				(void *)get_zeroed_page(GFP_ATOMIC);
122 			if (hv_cpu->synic_event_page == NULL) {
123 				pr_err("Unable to allocate SYNIC event page\n");
124 
125 				free_page((unsigned long)hv_cpu->synic_message_page);
126 				hv_cpu->synic_message_page = NULL;
127 				goto err;
128 			}
129 		}
130 
131 		if (!ms_hyperv.paravisor_present &&
132 		    (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) {
133 			ret = set_memory_decrypted((unsigned long)
134 				hv_cpu->synic_message_page, 1);
135 			if (ret) {
136 				pr_err("Failed to decrypt SYNIC msg page: %d\n", ret);
137 				hv_cpu->synic_message_page = NULL;
138 
139 				/*
140 				 * Free the event page here so that hv_synic_free()
141 				 * won't later try to re-encrypt it.
142 				 */
143 				free_page((unsigned long)hv_cpu->synic_event_page);
144 				hv_cpu->synic_event_page = NULL;
145 				goto err;
146 			}
147 
148 			ret = set_memory_decrypted((unsigned long)
149 				hv_cpu->synic_event_page, 1);
150 			if (ret) {
151 				pr_err("Failed to decrypt SYNIC event page: %d\n", ret);
152 				hv_cpu->synic_event_page = NULL;
153 				goto err;
154 			}
155 
156 			memset(hv_cpu->synic_message_page, 0, PAGE_SIZE);
157 			memset(hv_cpu->synic_event_page, 0, PAGE_SIZE);
158 		}
159 	}
160 
161 	return 0;
162 
163 err:
164 	/*
165 	 * Any memory allocations that succeeded will be freed when
166 	 * the caller cleans up by calling hv_synic_free()
167 	 */
168 	return ret;
169 }
170 
171 
172 void hv_synic_free(void)
173 {
174 	int cpu, ret;
175 
176 	for_each_present_cpu(cpu) {
177 		struct hv_per_cpu_context *hv_cpu
178 			= per_cpu_ptr(hv_context.cpu_context, cpu);
179 
180 		/* It's better to leak the page if the encryption fails. */
181 		if (!ms_hyperv.paravisor_present &&
182 		    (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) {
183 			if (hv_cpu->synic_message_page) {
184 				ret = set_memory_encrypted((unsigned long)
185 					hv_cpu->synic_message_page, 1);
186 				if (ret) {
187 					pr_err("Failed to encrypt SYNIC msg page: %d\n", ret);
188 					hv_cpu->synic_message_page = NULL;
189 				}
190 			}
191 
192 			if (hv_cpu->synic_event_page) {
193 				ret = set_memory_encrypted((unsigned long)
194 					hv_cpu->synic_event_page, 1);
195 				if (ret) {
196 					pr_err("Failed to encrypt SYNIC event page: %d\n", ret);
197 					hv_cpu->synic_event_page = NULL;
198 				}
199 			}
200 		}
201 
202 		free_page((unsigned long)hv_cpu->synic_event_page);
203 		free_page((unsigned long)hv_cpu->synic_message_page);
204 	}
205 
206 	kfree(hv_context.hv_numa_map);
207 }
208 
209 /*
210  * hv_synic_init - Initialize the Synthetic Interrupt Controller.
211  *
212  * If it is already initialized by another entity (ie x2v shim), we need to
213  * retrieve the initialized message and event pages.  Otherwise, we create and
214  * initialize the message and event pages.
215  */
216 void hv_synic_enable_regs(unsigned int cpu)
217 {
218 	struct hv_per_cpu_context *hv_cpu
219 		= per_cpu_ptr(hv_context.cpu_context, cpu);
220 	union hv_synic_simp simp;
221 	union hv_synic_siefp siefp;
222 	union hv_synic_sint shared_sint;
223 	union hv_synic_scontrol sctrl;
224 
225 	/* Setup the Synic's message page */
226 	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
227 	simp.simp_enabled = 1;
228 
229 	if (hv_isolation_type_snp() || hv_root_partition) {
230 		/* Mask out vTOM bit. ioremap_cache() maps decrypted */
231 		u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
232 				~ms_hyperv.shared_gpa_boundary;
233 		hv_cpu->synic_message_page
234 			= (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
235 		if (!hv_cpu->synic_message_page)
236 			pr_err("Fail to map synic message page.\n");
237 	} else {
238 		simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
239 			>> HV_HYP_PAGE_SHIFT;
240 	}
241 
242 	hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
243 
244 	/* Setup the Synic's event page */
245 	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
246 	siefp.siefp_enabled = 1;
247 
248 	if (hv_isolation_type_snp() || hv_root_partition) {
249 		/* Mask out vTOM bit. ioremap_cache() maps decrypted */
250 		u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
251 				~ms_hyperv.shared_gpa_boundary;
252 		hv_cpu->synic_event_page
253 			= (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
254 		if (!hv_cpu->synic_event_page)
255 			pr_err("Fail to map synic event page.\n");
256 	} else {
257 		siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
258 			>> HV_HYP_PAGE_SHIFT;
259 	}
260 
261 	hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
262 
263 	/* Setup the shared SINT. */
264 	if (vmbus_irq != -1)
265 		enable_percpu_irq(vmbus_irq, 0);
266 	shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
267 					VMBUS_MESSAGE_SINT);
268 
269 	shared_sint.vector = vmbus_interrupt;
270 	shared_sint.masked = false;
271 
272 	/*
273 	 * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
274 	 * it doesn't provide a recommendation flag and AEOI must be disabled.
275 	 */
276 #ifdef HV_DEPRECATING_AEOI_RECOMMENDED
277 	shared_sint.auto_eoi =
278 			!(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
279 #else
280 	shared_sint.auto_eoi = 0;
281 #endif
282 	hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
283 				shared_sint.as_uint64);
284 
285 	/* Enable the global synic bit */
286 	sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
287 	sctrl.enable = 1;
288 
289 	hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
290 }
291 
292 int hv_synic_init(unsigned int cpu)
293 {
294 	hv_synic_enable_regs(cpu);
295 
296 	hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
297 
298 	return 0;
299 }
300 
301 /*
302  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
303  */
304 void hv_synic_disable_regs(unsigned int cpu)
305 {
306 	struct hv_per_cpu_context *hv_cpu
307 		= per_cpu_ptr(hv_context.cpu_context, cpu);
308 	union hv_synic_sint shared_sint;
309 	union hv_synic_simp simp;
310 	union hv_synic_siefp siefp;
311 	union hv_synic_scontrol sctrl;
312 
313 	shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
314 					VMBUS_MESSAGE_SINT);
315 
316 	shared_sint.masked = 1;
317 
318 	/* Need to correctly cleanup in the case of SMP!!! */
319 	/* Disable the interrupt */
320 	hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
321 				shared_sint.as_uint64);
322 
323 	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
324 	/*
325 	 * In Isolation VM, sim and sief pages are allocated by
326 	 * paravisor. These pages also will be used by kdump
327 	 * kernel. So just reset enable bit here and keep page
328 	 * addresses.
329 	 */
330 	simp.simp_enabled = 0;
331 	if (hv_isolation_type_snp() || hv_root_partition) {
332 		iounmap(hv_cpu->synic_message_page);
333 		hv_cpu->synic_message_page = NULL;
334 	} else {
335 		simp.base_simp_gpa = 0;
336 	}
337 
338 	hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
339 
340 	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
341 	siefp.siefp_enabled = 0;
342 
343 	if (hv_isolation_type_snp() || hv_root_partition) {
344 		iounmap(hv_cpu->synic_event_page);
345 		hv_cpu->synic_event_page = NULL;
346 	} else {
347 		siefp.base_siefp_gpa = 0;
348 	}
349 
350 	hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
351 
352 	/* Disable the global synic bit */
353 	sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
354 	sctrl.enable = 0;
355 	hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
356 
357 	if (vmbus_irq != -1)
358 		disable_percpu_irq(vmbus_irq);
359 }
360 
361 #define HV_MAX_TRIES 3
362 /*
363  * Scan the event flags page of 'this' CPU looking for any bit that is set.  If we find one
364  * bit set, then wait for a few milliseconds.  Repeat these steps for a maximum of 3 times.
365  * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
366  *
367  * If a bit is set, that means there is a pending channel interrupt.  The expectation is
368  * that the normal interrupt handling mechanism will find and process the channel interrupt
369  * "very soon", and in the process clear the bit.
370  */
371 static bool hv_synic_event_pending(void)
372 {
373 	struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
374 	union hv_synic_event_flags *event =
375 		(union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT;
376 	unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */
377 	bool pending;
378 	u32 relid;
379 	int tries = 0;
380 
381 retry:
382 	pending = false;
383 	for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
384 		/* Special case - VMBus channel protocol messages */
385 		if (relid == 0)
386 			continue;
387 		pending = true;
388 		break;
389 	}
390 	if (pending && tries++ < HV_MAX_TRIES) {
391 		usleep_range(10000, 20000);
392 		goto retry;
393 	}
394 	return pending;
395 }
396 
397 int hv_synic_cleanup(unsigned int cpu)
398 {
399 	struct vmbus_channel *channel, *sc;
400 	bool channel_found = false;
401 
402 	if (vmbus_connection.conn_state != CONNECTED)
403 		goto always_cleanup;
404 
405 	/*
406 	 * Hyper-V does not provide a way to change the connect CPU once
407 	 * it is set; we must prevent the connect CPU from going offline
408 	 * while the VM is running normally. But in the panic or kexec()
409 	 * path where the vmbus is already disconnected, the CPU must be
410 	 * allowed to shut down.
411 	 */
412 	if (cpu == VMBUS_CONNECT_CPU)
413 		return -EBUSY;
414 
415 	/*
416 	 * Search for channels which are bound to the CPU we're about to
417 	 * cleanup.  In case we find one and vmbus is still connected, we
418 	 * fail; this will effectively prevent CPU offlining.
419 	 *
420 	 * TODO: Re-bind the channels to different CPUs.
421 	 */
422 	mutex_lock(&vmbus_connection.channel_mutex);
423 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
424 		if (channel->target_cpu == cpu) {
425 			channel_found = true;
426 			break;
427 		}
428 		list_for_each_entry(sc, &channel->sc_list, sc_list) {
429 			if (sc->target_cpu == cpu) {
430 				channel_found = true;
431 				break;
432 			}
433 		}
434 		if (channel_found)
435 			break;
436 	}
437 	mutex_unlock(&vmbus_connection.channel_mutex);
438 
439 	if (channel_found)
440 		return -EBUSY;
441 
442 	/*
443 	 * channel_found == false means that any channels that were previously
444 	 * assigned to the CPU have been reassigned elsewhere with a call of
445 	 * vmbus_send_modifychannel().  Scan the event flags page looking for
446 	 * bits that are set and waiting with a timeout for vmbus_chan_sched()
447 	 * to process such bits.  If bits are still set after this operation
448 	 * and VMBus is connected, fail the CPU offlining operation.
449 	 */
450 	if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
451 		return -EBUSY;
452 
453 always_cleanup:
454 	hv_stimer_legacy_cleanup(cpu);
455 
456 	hv_synic_disable_regs(cpu);
457 
458 	return 0;
459 }
460