1 /* 2 * Copyright (c) 2009, Microsoft Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 15 * Place - Suite 330, Boston, MA 02111-1307 USA. 16 * 17 * Authors: 18 * Haiyang Zhang <haiyangz@microsoft.com> 19 * Hank Janssen <hjanssen@microsoft.com> 20 * 21 */ 22 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 23 24 #include <linux/kernel.h> 25 #include <linux/mm.h> 26 #include <linux/slab.h> 27 #include <linux/vmalloc.h> 28 #include <linux/hyperv.h> 29 #include <linux/version.h> 30 #include <linux/interrupt.h> 31 #include <linux/clockchips.h> 32 #include <asm/hyperv.h> 33 #include <asm/mshyperv.h> 34 #include "hyperv_vmbus.h" 35 36 /* The one and only */ 37 struct hv_context hv_context = { 38 .synic_initialized = false, 39 }; 40 41 #define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */ 42 #define HV_MAX_MAX_DELTA_TICKS 0xffffffff 43 #define HV_MIN_DELTA_TICKS 1 44 45 /* 46 * query_hypervisor_info - Get version info of the windows hypervisor 47 */ 48 unsigned int host_info_eax; 49 unsigned int host_info_ebx; 50 unsigned int host_info_ecx; 51 unsigned int host_info_edx; 52 53 static int query_hypervisor_info(void) 54 { 55 unsigned int eax; 56 unsigned int ebx; 57 unsigned int ecx; 58 unsigned int edx; 59 unsigned int max_leaf; 60 unsigned int op; 61 62 /* 63 * Its assumed that this is called after confirming that Viridian 64 * is present. Query id and revision. 65 */ 66 eax = 0; 67 ebx = 0; 68 ecx = 0; 69 edx = 0; 70 op = HVCPUID_VENDOR_MAXFUNCTION; 71 cpuid(op, &eax, &ebx, &ecx, &edx); 72 73 max_leaf = eax; 74 75 if (max_leaf >= HVCPUID_VERSION) { 76 eax = 0; 77 ebx = 0; 78 ecx = 0; 79 edx = 0; 80 op = HVCPUID_VERSION; 81 cpuid(op, &eax, &ebx, &ecx, &edx); 82 host_info_eax = eax; 83 host_info_ebx = ebx; 84 host_info_ecx = ecx; 85 host_info_edx = edx; 86 } 87 return max_leaf; 88 } 89 90 #ifdef CONFIG_X86_64 91 static u64 read_hv_clock_tsc(struct clocksource *arg) 92 { 93 u64 current_tick; 94 struct ms_hyperv_tsc_page *tsc_pg = hv_context.tsc_page; 95 96 if (tsc_pg->tsc_sequence != 0) { 97 /* 98 * Use the tsc page to compute the value. 99 */ 100 101 while (1) { 102 u64 tmp; 103 u32 sequence = tsc_pg->tsc_sequence; 104 u64 cur_tsc; 105 u64 scale = tsc_pg->tsc_scale; 106 s64 offset = tsc_pg->tsc_offset; 107 108 rdtscll(cur_tsc); 109 /* current_tick = ((cur_tsc *scale) >> 64) + offset */ 110 asm("mulq %3" 111 : "=d" (current_tick), "=a" (tmp) 112 : "a" (cur_tsc), "r" (scale)); 113 114 current_tick += offset; 115 if (tsc_pg->tsc_sequence == sequence) 116 return current_tick; 117 118 if (tsc_pg->tsc_sequence != 0) 119 continue; 120 /* 121 * Fallback using MSR method. 122 */ 123 break; 124 } 125 } 126 rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); 127 return current_tick; 128 } 129 130 static struct clocksource hyperv_cs_tsc = { 131 .name = "hyperv_clocksource_tsc_page", 132 .rating = 425, 133 .read = read_hv_clock_tsc, 134 .mask = CLOCKSOURCE_MASK(64), 135 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 136 }; 137 #endif 138 139 140 /* 141 * hv_init - Main initialization routine. 142 * 143 * This routine must be called before any other routines in here are called 144 */ 145 int hv_init(void) 146 { 147 int max_leaf; 148 union hv_x64_msr_hypercall_contents hypercall_msr; 149 150 memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS); 151 memset(hv_context.synic_message_page, 0, 152 sizeof(void *) * NR_CPUS); 153 memset(hv_context.post_msg_page, 0, 154 sizeof(void *) * NR_CPUS); 155 memset(hv_context.vp_index, 0, 156 sizeof(int) * NR_CPUS); 157 memset(hv_context.event_dpc, 0, 158 sizeof(void *) * NR_CPUS); 159 memset(hv_context.msg_dpc, 0, 160 sizeof(void *) * NR_CPUS); 161 memset(hv_context.clk_evt, 0, 162 sizeof(void *) * NR_CPUS); 163 164 max_leaf = query_hypervisor_info(); 165 166 167 /* See if the hypercall page is already set */ 168 hypercall_msr.as_uint64 = 0; 169 rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 170 171 if (!hypercall_msr.enable) 172 return -ENOTSUPP; 173 174 #ifdef CONFIG_X86_64 175 if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { 176 union hv_x64_msr_hypercall_contents tsc_msr; 177 void *va_tsc; 178 179 va_tsc = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); 180 if (!va_tsc) 181 goto cleanup; 182 hv_context.tsc_page = va_tsc; 183 184 rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); 185 186 tsc_msr.enable = 1; 187 tsc_msr.guest_physical_address = vmalloc_to_pfn(va_tsc); 188 189 wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); 190 clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); 191 } 192 #endif 193 return 0; 194 195 cleanup: 196 return -ENOTSUPP; 197 } 198 199 /* 200 * hv_cleanup - Cleanup routine. 201 * 202 * This routine is called normally during driver unloading or exiting. 203 */ 204 void hv_cleanup(bool crash) 205 { 206 207 #ifdef CONFIG_X86_64 208 union hv_x64_msr_hypercall_contents hypercall_msr; 209 /* 210 * Cleanup the TSC page based CS. 211 */ 212 if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { 213 /* 214 * Crash can happen in an interrupt context and unregistering 215 * a clocksource is impossible and redundant in this case. 216 */ 217 if (!oops_in_progress) { 218 clocksource_change_rating(&hyperv_cs_tsc, 10); 219 clocksource_unregister(&hyperv_cs_tsc); 220 } 221 222 hypercall_msr.as_uint64 = 0; 223 wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); 224 if (!crash) { 225 vfree(hv_context.tsc_page); 226 hv_context.tsc_page = NULL; 227 } 228 } 229 #endif 230 } 231 232 /* 233 * hv_post_message - Post a message using the hypervisor message IPC. 234 * 235 * This involves a hypercall. 236 */ 237 int hv_post_message(union hv_connection_id connection_id, 238 enum hv_message_type message_type, 239 void *payload, size_t payload_size) 240 { 241 242 struct hv_input_post_message *aligned_msg; 243 u64 status; 244 245 if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) 246 return -EMSGSIZE; 247 248 aligned_msg = (struct hv_input_post_message *) 249 hv_context.post_msg_page[get_cpu()]; 250 251 aligned_msg->connectionid = connection_id; 252 aligned_msg->reserved = 0; 253 aligned_msg->message_type = message_type; 254 aligned_msg->payload_size = payload_size; 255 memcpy((void *)aligned_msg->payload, payload, payload_size); 256 257 status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL); 258 259 put_cpu(); 260 return status & 0xFFFF; 261 } 262 263 static int hv_ce_set_next_event(unsigned long delta, 264 struct clock_event_device *evt) 265 { 266 u64 current_tick; 267 268 WARN_ON(!clockevent_state_oneshot(evt)); 269 270 rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); 271 current_tick += delta; 272 wrmsrl(HV_X64_MSR_STIMER0_COUNT, current_tick); 273 return 0; 274 } 275 276 static int hv_ce_shutdown(struct clock_event_device *evt) 277 { 278 wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0); 279 wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0); 280 281 return 0; 282 } 283 284 static int hv_ce_set_oneshot(struct clock_event_device *evt) 285 { 286 union hv_timer_config timer_cfg; 287 288 timer_cfg.enable = 1; 289 timer_cfg.auto_enable = 1; 290 timer_cfg.sintx = VMBUS_MESSAGE_SINT; 291 wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); 292 293 return 0; 294 } 295 296 static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu) 297 { 298 dev->name = "Hyper-V clockevent"; 299 dev->features = CLOCK_EVT_FEAT_ONESHOT; 300 dev->cpumask = cpumask_of(cpu); 301 dev->rating = 1000; 302 /* 303 * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will 304 * result in clockevents_config_and_register() taking additional 305 * references to the hv_vmbus module making it impossible to unload. 306 */ 307 308 dev->set_state_shutdown = hv_ce_shutdown; 309 dev->set_state_oneshot = hv_ce_set_oneshot; 310 dev->set_next_event = hv_ce_set_next_event; 311 } 312 313 314 int hv_synic_alloc(void) 315 { 316 size_t size = sizeof(struct tasklet_struct); 317 size_t ced_size = sizeof(struct clock_event_device); 318 int cpu; 319 320 hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids, 321 GFP_ATOMIC); 322 if (hv_context.hv_numa_map == NULL) { 323 pr_err("Unable to allocate NUMA map\n"); 324 goto err; 325 } 326 327 for_each_present_cpu(cpu) { 328 hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC); 329 if (hv_context.event_dpc[cpu] == NULL) { 330 pr_err("Unable to allocate event dpc\n"); 331 goto err; 332 } 333 tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu); 334 335 hv_context.msg_dpc[cpu] = kmalloc(size, GFP_ATOMIC); 336 if (hv_context.msg_dpc[cpu] == NULL) { 337 pr_err("Unable to allocate event dpc\n"); 338 goto err; 339 } 340 tasklet_init(hv_context.msg_dpc[cpu], vmbus_on_msg_dpc, cpu); 341 342 hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC); 343 if (hv_context.clk_evt[cpu] == NULL) { 344 pr_err("Unable to allocate clock event device\n"); 345 goto err; 346 } 347 348 hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu); 349 350 hv_context.synic_message_page[cpu] = 351 (void *)get_zeroed_page(GFP_ATOMIC); 352 353 if (hv_context.synic_message_page[cpu] == NULL) { 354 pr_err("Unable to allocate SYNIC message page\n"); 355 goto err; 356 } 357 358 hv_context.synic_event_page[cpu] = 359 (void *)get_zeroed_page(GFP_ATOMIC); 360 361 if (hv_context.synic_event_page[cpu] == NULL) { 362 pr_err("Unable to allocate SYNIC event page\n"); 363 goto err; 364 } 365 366 hv_context.post_msg_page[cpu] = 367 (void *)get_zeroed_page(GFP_ATOMIC); 368 369 if (hv_context.post_msg_page[cpu] == NULL) { 370 pr_err("Unable to allocate post msg page\n"); 371 goto err; 372 } 373 374 INIT_LIST_HEAD(&hv_context.percpu_list[cpu]); 375 } 376 377 return 0; 378 err: 379 return -ENOMEM; 380 } 381 382 static void hv_synic_free_cpu(int cpu) 383 { 384 kfree(hv_context.event_dpc[cpu]); 385 kfree(hv_context.msg_dpc[cpu]); 386 kfree(hv_context.clk_evt[cpu]); 387 if (hv_context.synic_event_page[cpu]) 388 free_page((unsigned long)hv_context.synic_event_page[cpu]); 389 if (hv_context.synic_message_page[cpu]) 390 free_page((unsigned long)hv_context.synic_message_page[cpu]); 391 if (hv_context.post_msg_page[cpu]) 392 free_page((unsigned long)hv_context.post_msg_page[cpu]); 393 } 394 395 void hv_synic_free(void) 396 { 397 int cpu; 398 399 kfree(hv_context.hv_numa_map); 400 for_each_present_cpu(cpu) 401 hv_synic_free_cpu(cpu); 402 } 403 404 /* 405 * hv_synic_init - Initialize the Synthethic Interrupt Controller. 406 * 407 * If it is already initialized by another entity (ie x2v shim), we need to 408 * retrieve the initialized message and event pages. Otherwise, we create and 409 * initialize the message and event pages. 410 */ 411 int hv_synic_init(unsigned int cpu) 412 { 413 u64 version; 414 union hv_synic_simp simp; 415 union hv_synic_siefp siefp; 416 union hv_synic_sint shared_sint; 417 union hv_synic_scontrol sctrl; 418 u64 vp_index; 419 420 /* Check the version */ 421 rdmsrl(HV_X64_MSR_SVERSION, version); 422 423 /* Setup the Synic's message page */ 424 rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64); 425 simp.simp_enabled = 1; 426 simp.base_simp_gpa = virt_to_phys(hv_context.synic_message_page[cpu]) 427 >> PAGE_SHIFT; 428 429 wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64); 430 431 /* Setup the Synic's event page */ 432 rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); 433 siefp.siefp_enabled = 1; 434 siefp.base_siefp_gpa = virt_to_phys(hv_context.synic_event_page[cpu]) 435 >> PAGE_SHIFT; 436 437 wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); 438 439 /* Setup the shared SINT. */ 440 rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); 441 442 shared_sint.as_uint64 = 0; 443 shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR; 444 shared_sint.masked = false; 445 shared_sint.auto_eoi = true; 446 447 wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); 448 449 /* Enable the global synic bit */ 450 rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); 451 sctrl.enable = 1; 452 453 wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); 454 455 hv_context.synic_initialized = true; 456 457 /* 458 * Setup the mapping between Hyper-V's notion 459 * of cpuid and Linux' notion of cpuid. 460 * This array will be indexed using Linux cpuid. 461 */ 462 rdmsrl(HV_X64_MSR_VP_INDEX, vp_index); 463 hv_context.vp_index[cpu] = (u32)vp_index; 464 465 /* 466 * Register the per-cpu clockevent source. 467 */ 468 if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) 469 clockevents_config_and_register(hv_context.clk_evt[cpu], 470 HV_TIMER_FREQUENCY, 471 HV_MIN_DELTA_TICKS, 472 HV_MAX_MAX_DELTA_TICKS); 473 return 0; 474 } 475 476 /* 477 * hv_synic_clockevents_cleanup - Cleanup clockevent devices 478 */ 479 void hv_synic_clockevents_cleanup(void) 480 { 481 int cpu; 482 483 if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)) 484 return; 485 486 for_each_present_cpu(cpu) 487 clockevents_unbind_device(hv_context.clk_evt[cpu], cpu); 488 } 489 490 /* 491 * hv_synic_cleanup - Cleanup routine for hv_synic_init(). 492 */ 493 int hv_synic_cleanup(unsigned int cpu) 494 { 495 union hv_synic_sint shared_sint; 496 union hv_synic_simp simp; 497 union hv_synic_siefp siefp; 498 union hv_synic_scontrol sctrl; 499 struct vmbus_channel *channel, *sc; 500 bool channel_found = false; 501 unsigned long flags; 502 503 if (!hv_context.synic_initialized) 504 return -EFAULT; 505 506 /* 507 * Search for channels which are bound to the CPU we're about to 508 * cleanup. In case we find one and vmbus is still connected we need to 509 * fail, this will effectively prevent CPU offlining. There is no way 510 * we can re-bind channels to different CPUs for now. 511 */ 512 mutex_lock(&vmbus_connection.channel_mutex); 513 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 514 if (channel->target_cpu == cpu) { 515 channel_found = true; 516 break; 517 } 518 spin_lock_irqsave(&channel->lock, flags); 519 list_for_each_entry(sc, &channel->sc_list, sc_list) { 520 if (sc->target_cpu == cpu) { 521 channel_found = true; 522 break; 523 } 524 } 525 spin_unlock_irqrestore(&channel->lock, flags); 526 if (channel_found) 527 break; 528 } 529 mutex_unlock(&vmbus_connection.channel_mutex); 530 531 if (channel_found && vmbus_connection.conn_state == CONNECTED) 532 return -EBUSY; 533 534 /* Turn off clockevent device */ 535 if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) { 536 clockevents_unbind_device(hv_context.clk_evt[cpu], cpu); 537 hv_ce_shutdown(hv_context.clk_evt[cpu]); 538 } 539 540 rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); 541 542 shared_sint.masked = 1; 543 544 /* Need to correctly cleanup in the case of SMP!!! */ 545 /* Disable the interrupt */ 546 wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); 547 548 rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64); 549 simp.simp_enabled = 0; 550 simp.base_simp_gpa = 0; 551 552 wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64); 553 554 rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); 555 siefp.siefp_enabled = 0; 556 siefp.base_siefp_gpa = 0; 557 558 wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); 559 560 /* Disable the global synic bit */ 561 rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); 562 sctrl.enable = 0; 563 wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); 564 565 return 0; 566 } 567