xref: /openbmc/linux/drivers/hv/hv.c (revision 6ab42a66)
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Authors:
18  *   Haiyang Zhang <haiyangz@microsoft.com>
19  *   Hank Janssen  <hjanssen@microsoft.com>
20  *
21  */
22 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
23 
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/slab.h>
27 #include <linux/vmalloc.h>
28 #include <linux/hyperv.h>
29 #include <linux/version.h>
30 #include <linux/interrupt.h>
31 #include <linux/clockchips.h>
32 #include <asm/hyperv.h>
33 #include <asm/mshyperv.h>
34 #include "hyperv_vmbus.h"
35 
36 /* The one and only */
37 struct hv_context hv_context = {
38 	.synic_initialized	= false,
39 };
40 
41 #define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
42 #define HV_MAX_MAX_DELTA_TICKS 0xffffffff
43 #define HV_MIN_DELTA_TICKS 1
44 
45 /*
46  * query_hypervisor_info - Get version info of the windows hypervisor
47  */
48 unsigned int host_info_eax;
49 unsigned int host_info_ebx;
50 unsigned int host_info_ecx;
51 unsigned int host_info_edx;
52 
53 static int query_hypervisor_info(void)
54 {
55 	unsigned int eax;
56 	unsigned int ebx;
57 	unsigned int ecx;
58 	unsigned int edx;
59 	unsigned int max_leaf;
60 	unsigned int op;
61 
62 	/*
63 	* Its assumed that this is called after confirming that Viridian
64 	* is present. Query id and revision.
65 	*/
66 	eax = 0;
67 	ebx = 0;
68 	ecx = 0;
69 	edx = 0;
70 	op = HVCPUID_VENDOR_MAXFUNCTION;
71 	cpuid(op, &eax, &ebx, &ecx, &edx);
72 
73 	max_leaf = eax;
74 
75 	if (max_leaf >= HVCPUID_VERSION) {
76 		eax = 0;
77 		ebx = 0;
78 		ecx = 0;
79 		edx = 0;
80 		op = HVCPUID_VERSION;
81 		cpuid(op, &eax, &ebx, &ecx, &edx);
82 		host_info_eax = eax;
83 		host_info_ebx = ebx;
84 		host_info_ecx = ecx;
85 		host_info_edx = edx;
86 	}
87 	return max_leaf;
88 }
89 
90 #ifdef CONFIG_X86_64
91 static u64 read_hv_clock_tsc(struct clocksource *arg)
92 {
93 	u64 current_tick;
94 	struct ms_hyperv_tsc_page *tsc_pg = hv_context.tsc_page;
95 
96 	if (tsc_pg->tsc_sequence != 0) {
97 		/*
98 		 * Use the tsc page to compute the value.
99 		 */
100 
101 		while (1) {
102 			u64 tmp;
103 			u32 sequence = tsc_pg->tsc_sequence;
104 			u64 cur_tsc;
105 			u64 scale = tsc_pg->tsc_scale;
106 			s64 offset = tsc_pg->tsc_offset;
107 
108 			rdtscll(cur_tsc);
109 			/* current_tick = ((cur_tsc *scale) >> 64) + offset */
110 			asm("mulq %3"
111 				: "=d" (current_tick), "=a" (tmp)
112 				: "a" (cur_tsc), "r" (scale));
113 
114 			current_tick += offset;
115 			if (tsc_pg->tsc_sequence == sequence)
116 				return current_tick;
117 
118 			if (tsc_pg->tsc_sequence != 0)
119 				continue;
120 			/*
121 			 * Fallback using MSR method.
122 			 */
123 			break;
124 		}
125 	}
126 	rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
127 	return current_tick;
128 }
129 
130 static struct clocksource hyperv_cs_tsc = {
131 		.name           = "hyperv_clocksource_tsc_page",
132 		.rating         = 425,
133 		.read           = read_hv_clock_tsc,
134 		.mask           = CLOCKSOURCE_MASK(64),
135 		.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
136 };
137 #endif
138 
139 
140 /*
141  * hv_init - Main initialization routine.
142  *
143  * This routine must be called before any other routines in here are called
144  */
145 int hv_init(void)
146 {
147 	int max_leaf;
148 	union hv_x64_msr_hypercall_contents hypercall_msr;
149 
150 	memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS);
151 	memset(hv_context.synic_message_page, 0,
152 	       sizeof(void *) * NR_CPUS);
153 	memset(hv_context.post_msg_page, 0,
154 	       sizeof(void *) * NR_CPUS);
155 	memset(hv_context.vp_index, 0,
156 	       sizeof(int) * NR_CPUS);
157 	memset(hv_context.event_dpc, 0,
158 	       sizeof(void *) * NR_CPUS);
159 	memset(hv_context.msg_dpc, 0,
160 	       sizeof(void *) * NR_CPUS);
161 	memset(hv_context.clk_evt, 0,
162 	       sizeof(void *) * NR_CPUS);
163 
164 	max_leaf = query_hypervisor_info();
165 
166 
167 	/* See if the hypercall page is already set */
168 	hypercall_msr.as_uint64 = 0;
169 	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
170 
171 	if (!hypercall_msr.enable)
172 		return -ENOTSUPP;
173 
174 #ifdef CONFIG_X86_64
175 	if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
176 		union hv_x64_msr_hypercall_contents tsc_msr;
177 		void *va_tsc;
178 
179 		va_tsc = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
180 		if (!va_tsc)
181 			goto cleanup;
182 		hv_context.tsc_page = va_tsc;
183 
184 		rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
185 
186 		tsc_msr.enable = 1;
187 		tsc_msr.guest_physical_address = vmalloc_to_pfn(va_tsc);
188 
189 		wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
190 		clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
191 	}
192 #endif
193 	return 0;
194 
195 cleanup:
196 	return -ENOTSUPP;
197 }
198 
199 /*
200  * hv_cleanup - Cleanup routine.
201  *
202  * This routine is called normally during driver unloading or exiting.
203  */
204 void hv_cleanup(bool crash)
205 {
206 
207 #ifdef CONFIG_X86_64
208 	union hv_x64_msr_hypercall_contents hypercall_msr;
209 	/*
210 	 * Cleanup the TSC page based CS.
211 	 */
212 	if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
213 		/*
214 		 * Crash can happen in an interrupt context and unregistering
215 		 * a clocksource is impossible and redundant in this case.
216 		 */
217 		if (!oops_in_progress) {
218 			clocksource_change_rating(&hyperv_cs_tsc, 10);
219 			clocksource_unregister(&hyperv_cs_tsc);
220 		}
221 
222 		hypercall_msr.as_uint64 = 0;
223 		wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
224 		if (!crash) {
225 			vfree(hv_context.tsc_page);
226 			hv_context.tsc_page = NULL;
227 		}
228 	}
229 #endif
230 }
231 
232 /*
233  * hv_post_message - Post a message using the hypervisor message IPC.
234  *
235  * This involves a hypercall.
236  */
237 int hv_post_message(union hv_connection_id connection_id,
238 		  enum hv_message_type message_type,
239 		  void *payload, size_t payload_size)
240 {
241 
242 	struct hv_input_post_message *aligned_msg;
243 	u64 status;
244 
245 	if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
246 		return -EMSGSIZE;
247 
248 	aligned_msg = (struct hv_input_post_message *)
249 			hv_context.post_msg_page[get_cpu()];
250 
251 	aligned_msg->connectionid = connection_id;
252 	aligned_msg->reserved = 0;
253 	aligned_msg->message_type = message_type;
254 	aligned_msg->payload_size = payload_size;
255 	memcpy((void *)aligned_msg->payload, payload, payload_size);
256 
257 	status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL);
258 
259 	put_cpu();
260 	return status & 0xFFFF;
261 }
262 
263 static int hv_ce_set_next_event(unsigned long delta,
264 				struct clock_event_device *evt)
265 {
266 	u64 current_tick;
267 
268 	WARN_ON(!clockevent_state_oneshot(evt));
269 
270 	rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
271 	current_tick += delta;
272 	wrmsrl(HV_X64_MSR_STIMER0_COUNT, current_tick);
273 	return 0;
274 }
275 
276 static int hv_ce_shutdown(struct clock_event_device *evt)
277 {
278 	wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0);
279 	wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0);
280 
281 	return 0;
282 }
283 
284 static int hv_ce_set_oneshot(struct clock_event_device *evt)
285 {
286 	union hv_timer_config timer_cfg;
287 
288 	timer_cfg.enable = 1;
289 	timer_cfg.auto_enable = 1;
290 	timer_cfg.sintx = VMBUS_MESSAGE_SINT;
291 	wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
292 
293 	return 0;
294 }
295 
296 static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
297 {
298 	dev->name = "Hyper-V clockevent";
299 	dev->features = CLOCK_EVT_FEAT_ONESHOT;
300 	dev->cpumask = cpumask_of(cpu);
301 	dev->rating = 1000;
302 	/*
303 	 * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will
304 	 * result in clockevents_config_and_register() taking additional
305 	 * references to the hv_vmbus module making it impossible to unload.
306 	 */
307 
308 	dev->set_state_shutdown = hv_ce_shutdown;
309 	dev->set_state_oneshot = hv_ce_set_oneshot;
310 	dev->set_next_event = hv_ce_set_next_event;
311 }
312 
313 
314 int hv_synic_alloc(void)
315 {
316 	size_t size = sizeof(struct tasklet_struct);
317 	size_t ced_size = sizeof(struct clock_event_device);
318 	int cpu;
319 
320 	hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids,
321 					 GFP_ATOMIC);
322 	if (hv_context.hv_numa_map == NULL) {
323 		pr_err("Unable to allocate NUMA map\n");
324 		goto err;
325 	}
326 
327 	for_each_present_cpu(cpu) {
328 		hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
329 		if (hv_context.event_dpc[cpu] == NULL) {
330 			pr_err("Unable to allocate event dpc\n");
331 			goto err;
332 		}
333 		tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu);
334 
335 		hv_context.msg_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
336 		if (hv_context.msg_dpc[cpu] == NULL) {
337 			pr_err("Unable to allocate event dpc\n");
338 			goto err;
339 		}
340 		tasklet_init(hv_context.msg_dpc[cpu], vmbus_on_msg_dpc, cpu);
341 
342 		hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC);
343 		if (hv_context.clk_evt[cpu] == NULL) {
344 			pr_err("Unable to allocate clock event device\n");
345 			goto err;
346 		}
347 
348 		hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu);
349 
350 		hv_context.synic_message_page[cpu] =
351 			(void *)get_zeroed_page(GFP_ATOMIC);
352 
353 		if (hv_context.synic_message_page[cpu] == NULL) {
354 			pr_err("Unable to allocate SYNIC message page\n");
355 			goto err;
356 		}
357 
358 		hv_context.synic_event_page[cpu] =
359 			(void *)get_zeroed_page(GFP_ATOMIC);
360 
361 		if (hv_context.synic_event_page[cpu] == NULL) {
362 			pr_err("Unable to allocate SYNIC event page\n");
363 			goto err;
364 		}
365 
366 		hv_context.post_msg_page[cpu] =
367 			(void *)get_zeroed_page(GFP_ATOMIC);
368 
369 		if (hv_context.post_msg_page[cpu] == NULL) {
370 			pr_err("Unable to allocate post msg page\n");
371 			goto err;
372 		}
373 
374 		INIT_LIST_HEAD(&hv_context.percpu_list[cpu]);
375 	}
376 
377 	return 0;
378 err:
379 	return -ENOMEM;
380 }
381 
382 static void hv_synic_free_cpu(int cpu)
383 {
384 	kfree(hv_context.event_dpc[cpu]);
385 	kfree(hv_context.msg_dpc[cpu]);
386 	kfree(hv_context.clk_evt[cpu]);
387 	if (hv_context.synic_event_page[cpu])
388 		free_page((unsigned long)hv_context.synic_event_page[cpu]);
389 	if (hv_context.synic_message_page[cpu])
390 		free_page((unsigned long)hv_context.synic_message_page[cpu]);
391 	if (hv_context.post_msg_page[cpu])
392 		free_page((unsigned long)hv_context.post_msg_page[cpu]);
393 }
394 
395 void hv_synic_free(void)
396 {
397 	int cpu;
398 
399 	kfree(hv_context.hv_numa_map);
400 	for_each_present_cpu(cpu)
401 		hv_synic_free_cpu(cpu);
402 }
403 
404 /*
405  * hv_synic_init - Initialize the Synthethic Interrupt Controller.
406  *
407  * If it is already initialized by another entity (ie x2v shim), we need to
408  * retrieve the initialized message and event pages.  Otherwise, we create and
409  * initialize the message and event pages.
410  */
411 int hv_synic_init(unsigned int cpu)
412 {
413 	u64 version;
414 	union hv_synic_simp simp;
415 	union hv_synic_siefp siefp;
416 	union hv_synic_sint shared_sint;
417 	union hv_synic_scontrol sctrl;
418 	u64 vp_index;
419 
420 	/* Check the version */
421 	rdmsrl(HV_X64_MSR_SVERSION, version);
422 
423 	/* Setup the Synic's message page */
424 	rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
425 	simp.simp_enabled = 1;
426 	simp.base_simp_gpa = virt_to_phys(hv_context.synic_message_page[cpu])
427 		>> PAGE_SHIFT;
428 
429 	wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
430 
431 	/* Setup the Synic's event page */
432 	rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
433 	siefp.siefp_enabled = 1;
434 	siefp.base_siefp_gpa = virt_to_phys(hv_context.synic_event_page[cpu])
435 		>> PAGE_SHIFT;
436 
437 	wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
438 
439 	/* Setup the shared SINT. */
440 	rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
441 
442 	shared_sint.as_uint64 = 0;
443 	shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR;
444 	shared_sint.masked = false;
445 	shared_sint.auto_eoi = true;
446 
447 	wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
448 
449 	/* Enable the global synic bit */
450 	rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
451 	sctrl.enable = 1;
452 
453 	wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
454 
455 	hv_context.synic_initialized = true;
456 
457 	/*
458 	 * Setup the mapping between Hyper-V's notion
459 	 * of cpuid and Linux' notion of cpuid.
460 	 * This array will be indexed using Linux cpuid.
461 	 */
462 	rdmsrl(HV_X64_MSR_VP_INDEX, vp_index);
463 	hv_context.vp_index[cpu] = (u32)vp_index;
464 
465 	/*
466 	 * Register the per-cpu clockevent source.
467 	 */
468 	if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
469 		clockevents_config_and_register(hv_context.clk_evt[cpu],
470 						HV_TIMER_FREQUENCY,
471 						HV_MIN_DELTA_TICKS,
472 						HV_MAX_MAX_DELTA_TICKS);
473 	return 0;
474 }
475 
476 /*
477  * hv_synic_clockevents_cleanup - Cleanup clockevent devices
478  */
479 void hv_synic_clockevents_cleanup(void)
480 {
481 	int cpu;
482 
483 	if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
484 		return;
485 
486 	for_each_present_cpu(cpu)
487 		clockevents_unbind_device(hv_context.clk_evt[cpu], cpu);
488 }
489 
490 /*
491  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
492  */
493 int hv_synic_cleanup(unsigned int cpu)
494 {
495 	union hv_synic_sint shared_sint;
496 	union hv_synic_simp simp;
497 	union hv_synic_siefp siefp;
498 	union hv_synic_scontrol sctrl;
499 	struct vmbus_channel *channel, *sc;
500 	bool channel_found = false;
501 	unsigned long flags;
502 
503 	if (!hv_context.synic_initialized)
504 		return -EFAULT;
505 
506 	/*
507 	 * Search for channels which are bound to the CPU we're about to
508 	 * cleanup. In case we find one and vmbus is still connected we need to
509 	 * fail, this will effectively prevent CPU offlining. There is no way
510 	 * we can re-bind channels to different CPUs for now.
511 	 */
512 	mutex_lock(&vmbus_connection.channel_mutex);
513 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
514 		if (channel->target_cpu == cpu) {
515 			channel_found = true;
516 			break;
517 		}
518 		spin_lock_irqsave(&channel->lock, flags);
519 		list_for_each_entry(sc, &channel->sc_list, sc_list) {
520 			if (sc->target_cpu == cpu) {
521 				channel_found = true;
522 				break;
523 			}
524 		}
525 		spin_unlock_irqrestore(&channel->lock, flags);
526 		if (channel_found)
527 			break;
528 	}
529 	mutex_unlock(&vmbus_connection.channel_mutex);
530 
531 	if (channel_found && vmbus_connection.conn_state == CONNECTED)
532 		return -EBUSY;
533 
534 	/* Turn off clockevent device */
535 	if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) {
536 		clockevents_unbind_device(hv_context.clk_evt[cpu], cpu);
537 		hv_ce_shutdown(hv_context.clk_evt[cpu]);
538 	}
539 
540 	rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
541 
542 	shared_sint.masked = 1;
543 
544 	/* Need to correctly cleanup in the case of SMP!!! */
545 	/* Disable the interrupt */
546 	wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
547 
548 	rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
549 	simp.simp_enabled = 0;
550 	simp.base_simp_gpa = 0;
551 
552 	wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
553 
554 	rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
555 	siefp.siefp_enabled = 0;
556 	siefp.base_siefp_gpa = 0;
557 
558 	wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
559 
560 	/* Disable the global synic bit */
561 	rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
562 	sctrl.enable = 0;
563 	wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
564 
565 	return 0;
566 }
567