1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * X86 specific Hyper-V initialization code. 4 * 5 * Copyright (C) 2016, Microsoft, Inc. 6 * 7 * Author : K. Y. Srinivasan <kys@microsoft.com> 8 */ 9 10 #include <linux/efi.h> 11 #include <linux/types.h> 12 #include <asm/apic.h> 13 #include <asm/desc.h> 14 #include <asm/hypervisor.h> 15 #include <asm/hyperv-tlfs.h> 16 #include <asm/mshyperv.h> 17 #include <linux/version.h> 18 #include <linux/vmalloc.h> 19 #include <linux/mm.h> 20 #include <linux/clockchips.h> 21 #include <linux/hyperv.h> 22 #include <linux/slab.h> 23 #include <linux/cpuhotplug.h> 24 25 #ifdef CONFIG_HYPERV_TSCPAGE 26 27 static struct ms_hyperv_tsc_page *tsc_pg; 28 29 struct ms_hyperv_tsc_page *hv_get_tsc_page(void) 30 { 31 return tsc_pg; 32 } 33 EXPORT_SYMBOL_GPL(hv_get_tsc_page); 34 35 static u64 read_hv_clock_tsc(struct clocksource *arg) 36 { 37 u64 current_tick = hv_read_tsc_page(tsc_pg); 38 39 if (current_tick == U64_MAX) 40 rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); 41 42 return current_tick; 43 } 44 45 static struct clocksource hyperv_cs_tsc = { 46 .name = "hyperv_clocksource_tsc_page", 47 .rating = 400, 48 .read = read_hv_clock_tsc, 49 .mask = CLOCKSOURCE_MASK(64), 50 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 51 }; 52 #endif 53 54 static u64 read_hv_clock_msr(struct clocksource *arg) 55 { 56 u64 current_tick; 57 /* 58 * Read the partition counter to get the current tick count. This count 59 * is set to 0 when the partition is created and is incremented in 60 * 100 nanosecond units. 61 */ 62 rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); 63 return current_tick; 64 } 65 66 static struct clocksource hyperv_cs_msr = { 67 .name = "hyperv_clocksource_msr", 68 .rating = 400, 69 .read = read_hv_clock_msr, 70 .mask = CLOCKSOURCE_MASK(64), 71 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 72 }; 73 74 void *hv_hypercall_pg; 75 EXPORT_SYMBOL_GPL(hv_hypercall_pg); 76 struct clocksource *hyperv_cs; 77 EXPORT_SYMBOL_GPL(hyperv_cs); 78 79 u32 *hv_vp_index; 80 EXPORT_SYMBOL_GPL(hv_vp_index); 81 82 struct hv_vp_assist_page **hv_vp_assist_page; 83 EXPORT_SYMBOL_GPL(hv_vp_assist_page); 84 85 void __percpu **hyperv_pcpu_input_arg; 86 EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); 87 88 u32 hv_max_vp_index; 89 EXPORT_SYMBOL_GPL(hv_max_vp_index); 90 91 static int hv_cpu_init(unsigned int cpu) 92 { 93 u64 msr_vp_index; 94 struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; 95 void **input_arg; 96 struct page *pg; 97 98 input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); 99 pg = alloc_page(GFP_KERNEL); 100 if (unlikely(!pg)) 101 return -ENOMEM; 102 *input_arg = page_address(pg); 103 104 hv_get_vp_index(msr_vp_index); 105 106 hv_vp_index[smp_processor_id()] = msr_vp_index; 107 108 if (msr_vp_index > hv_max_vp_index) 109 hv_max_vp_index = msr_vp_index; 110 111 if (!hv_vp_assist_page) 112 return 0; 113 114 if (!*hvp) 115 *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); 116 117 if (*hvp) { 118 u64 val; 119 120 val = vmalloc_to_pfn(*hvp); 121 val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) | 122 HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; 123 124 wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val); 125 } 126 127 return 0; 128 } 129 130 static void (*hv_reenlightenment_cb)(void); 131 132 static void hv_reenlightenment_notify(struct work_struct *dummy) 133 { 134 struct hv_tsc_emulation_status emu_status; 135 136 rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); 137 138 /* Don't issue the callback if TSC accesses are not emulated */ 139 if (hv_reenlightenment_cb && emu_status.inprogress) 140 hv_reenlightenment_cb(); 141 } 142 static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify); 143 144 void hyperv_stop_tsc_emulation(void) 145 { 146 u64 freq; 147 struct hv_tsc_emulation_status emu_status; 148 149 rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); 150 emu_status.inprogress = 0; 151 wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); 152 153 rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); 154 tsc_khz = div64_u64(freq, 1000); 155 } 156 EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation); 157 158 static inline bool hv_reenlightenment_available(void) 159 { 160 /* 161 * Check for required features and priviliges to make TSC frequency 162 * change notifications work. 163 */ 164 return ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && 165 ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE && 166 ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT; 167 } 168 169 __visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs) 170 { 171 entering_ack_irq(); 172 173 inc_irq_stat(irq_hv_reenlightenment_count); 174 175 schedule_delayed_work(&hv_reenlightenment_work, HZ/10); 176 177 exiting_irq(); 178 } 179 180 void set_hv_tscchange_cb(void (*cb)(void)) 181 { 182 struct hv_reenlightenment_control re_ctrl = { 183 .vector = HYPERV_REENLIGHTENMENT_VECTOR, 184 .enabled = 1, 185 .target_vp = hv_vp_index[smp_processor_id()] 186 }; 187 struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; 188 189 if (!hv_reenlightenment_available()) { 190 pr_warn("Hyper-V: reenlightenment support is unavailable\n"); 191 return; 192 } 193 194 hv_reenlightenment_cb = cb; 195 196 /* Make sure callback is registered before we write to MSRs */ 197 wmb(); 198 199 wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); 200 wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl)); 201 } 202 EXPORT_SYMBOL_GPL(set_hv_tscchange_cb); 203 204 void clear_hv_tscchange_cb(void) 205 { 206 struct hv_reenlightenment_control re_ctrl; 207 208 if (!hv_reenlightenment_available()) 209 return; 210 211 rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); 212 re_ctrl.enabled = 0; 213 wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); 214 215 hv_reenlightenment_cb = NULL; 216 } 217 EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb); 218 219 static int hv_cpu_die(unsigned int cpu) 220 { 221 struct hv_reenlightenment_control re_ctrl; 222 unsigned int new_cpu; 223 unsigned long flags; 224 void **input_arg; 225 void *input_pg = NULL; 226 227 local_irq_save(flags); 228 input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); 229 input_pg = *input_arg; 230 *input_arg = NULL; 231 local_irq_restore(flags); 232 free_page((unsigned long)input_pg); 233 234 if (hv_vp_assist_page && hv_vp_assist_page[cpu]) 235 wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); 236 237 if (hv_reenlightenment_cb == NULL) 238 return 0; 239 240 rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); 241 if (re_ctrl.target_vp == hv_vp_index[cpu]) { 242 /* Reassign to some other online CPU */ 243 new_cpu = cpumask_any_but(cpu_online_mask, cpu); 244 245 re_ctrl.target_vp = hv_vp_index[new_cpu]; 246 wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); 247 } 248 249 return 0; 250 } 251 252 static int __init hv_pci_init(void) 253 { 254 int gen2vm = efi_enabled(EFI_BOOT); 255 256 /* 257 * For Generation-2 VM, we exit from pci_arch_init() by returning 0. 258 * The purpose is to suppress the harmless warning: 259 * "PCI: Fatal: No config space access function found" 260 */ 261 if (gen2vm) 262 return 0; 263 264 /* For Generation-1 VM, we'll proceed in pci_arch_init(). */ 265 return 1; 266 } 267 268 /* 269 * This function is to be invoked early in the boot sequence after the 270 * hypervisor has been detected. 271 * 272 * 1. Setup the hypercall page. 273 * 2. Register Hyper-V specific clocksource. 274 * 3. Setup Hyper-V specific APIC entry points. 275 */ 276 void __init hyperv_init(void) 277 { 278 u64 guest_id, required_msrs; 279 union hv_x64_msr_hypercall_contents hypercall_msr; 280 int cpuhp, i; 281 282 if (x86_hyper_type != X86_HYPER_MS_HYPERV) 283 return; 284 285 /* Absolutely required MSRs */ 286 required_msrs = HV_X64_MSR_HYPERCALL_AVAILABLE | 287 HV_X64_MSR_VP_INDEX_AVAILABLE; 288 289 if ((ms_hyperv.features & required_msrs) != required_msrs) 290 return; 291 292 /* 293 * Allocate the per-CPU state for the hypercall input arg. 294 * If this allocation fails, we will not be able to setup 295 * (per-CPU) hypercall input page and thus this failure is 296 * fatal on Hyper-V. 297 */ 298 hyperv_pcpu_input_arg = alloc_percpu(void *); 299 300 BUG_ON(hyperv_pcpu_input_arg == NULL); 301 302 /* Allocate percpu VP index */ 303 hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), 304 GFP_KERNEL); 305 if (!hv_vp_index) 306 return; 307 308 for (i = 0; i < num_possible_cpus(); i++) 309 hv_vp_index[i] = VP_INVAL; 310 311 hv_vp_assist_page = kcalloc(num_possible_cpus(), 312 sizeof(*hv_vp_assist_page), GFP_KERNEL); 313 if (!hv_vp_assist_page) { 314 ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; 315 goto free_vp_index; 316 } 317 318 cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", 319 hv_cpu_init, hv_cpu_die); 320 if (cpuhp < 0) 321 goto free_vp_assist_page; 322 323 /* 324 * Setup the hypercall page and enable hypercalls. 325 * 1. Register the guest ID 326 * 2. Enable the hypercall and register the hypercall page 327 */ 328 guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); 329 wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); 330 331 hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); 332 if (hv_hypercall_pg == NULL) { 333 wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); 334 goto remove_cpuhp_state; 335 } 336 337 rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 338 hypercall_msr.enable = 1; 339 hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); 340 wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 341 342 hv_apic_init(); 343 344 x86_init.pci.arch_init = hv_pci_init; 345 346 /* 347 * Register Hyper-V specific clocksource. 348 */ 349 #ifdef CONFIG_HYPERV_TSCPAGE 350 if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) { 351 union hv_x64_msr_hypercall_contents tsc_msr; 352 353 tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); 354 if (!tsc_pg) 355 goto register_msr_cs; 356 357 hyperv_cs = &hyperv_cs_tsc; 358 359 rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); 360 361 tsc_msr.enable = 1; 362 tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg); 363 364 wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); 365 366 hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK; 367 368 clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); 369 return; 370 } 371 register_msr_cs: 372 #endif 373 /* 374 * For 32 bit guests just use the MSR based mechanism for reading 375 * the partition counter. 376 */ 377 378 hyperv_cs = &hyperv_cs_msr; 379 if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE) 380 clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); 381 382 return; 383 384 remove_cpuhp_state: 385 cpuhp_remove_state(cpuhp); 386 free_vp_assist_page: 387 kfree(hv_vp_assist_page); 388 hv_vp_assist_page = NULL; 389 free_vp_index: 390 kfree(hv_vp_index); 391 hv_vp_index = NULL; 392 } 393 394 /* 395 * This routine is called before kexec/kdump, it does the required cleanup. 396 */ 397 void hyperv_cleanup(void) 398 { 399 union hv_x64_msr_hypercall_contents hypercall_msr; 400 401 /* Reset our OS id */ 402 wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); 403 404 /* 405 * Reset hypercall page reference before reset the page, 406 * let hypercall operations fail safely rather than 407 * panic the kernel for using invalid hypercall page 408 */ 409 hv_hypercall_pg = NULL; 410 411 /* Reset the hypercall page */ 412 hypercall_msr.as_uint64 = 0; 413 wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 414 415 /* Reset the TSC page */ 416 hypercall_msr.as_uint64 = 0; 417 wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); 418 } 419 EXPORT_SYMBOL_GPL(hyperv_cleanup); 420 421 void hyperv_report_panic(struct pt_regs *regs, long err) 422 { 423 static bool panic_reported; 424 u64 guest_id; 425 426 /* 427 * We prefer to report panic on 'die' chain as we have proper 428 * registers to report, but if we miss it (e.g. on BUG()) we need 429 * to report it on 'panic'. 430 */ 431 if (panic_reported) 432 return; 433 panic_reported = true; 434 435 rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); 436 437 wrmsrl(HV_X64_MSR_CRASH_P0, err); 438 wrmsrl(HV_X64_MSR_CRASH_P1, guest_id); 439 wrmsrl(HV_X64_MSR_CRASH_P2, regs->ip); 440 wrmsrl(HV_X64_MSR_CRASH_P3, regs->ax); 441 wrmsrl(HV_X64_MSR_CRASH_P4, regs->sp); 442 443 /* 444 * Let Hyper-V know there is crash data available 445 */ 446 wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); 447 } 448 EXPORT_SYMBOL_GPL(hyperv_report_panic); 449 450 /** 451 * hyperv_report_panic_msg - report panic message to Hyper-V 452 * @pa: physical address of the panic page containing the message 453 * @size: size of the message in the page 454 */ 455 void hyperv_report_panic_msg(phys_addr_t pa, size_t size) 456 { 457 /* 458 * P3 to contain the physical address of the panic page & P4 to 459 * contain the size of the panic data in that page. Rest of the 460 * registers are no-op when the NOTIFY_MSG flag is set. 461 */ 462 wrmsrl(HV_X64_MSR_CRASH_P0, 0); 463 wrmsrl(HV_X64_MSR_CRASH_P1, 0); 464 wrmsrl(HV_X64_MSR_CRASH_P2, 0); 465 wrmsrl(HV_X64_MSR_CRASH_P3, pa); 466 wrmsrl(HV_X64_MSR_CRASH_P4, size); 467 468 /* 469 * Let Hyper-V know there is crash data available along with 470 * the panic message. 471 */ 472 wrmsrl(HV_X64_MSR_CRASH_CTL, 473 (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG)); 474 } 475 EXPORT_SYMBOL_GPL(hyperv_report_panic_msg); 476 477 bool hv_is_hyperv_initialized(void) 478 { 479 union hv_x64_msr_hypercall_contents hypercall_msr; 480 481 /* 482 * Ensure that we're really on Hyper-V, and not a KVM or Xen 483 * emulation of Hyper-V 484 */ 485 if (x86_hyper_type != X86_HYPER_MS_HYPERV) 486 return false; 487 488 /* 489 * Verify that earlier initialization succeeded by checking 490 * that the hypercall page is setup 491 */ 492 hypercall_msr.as_uint64 = 0; 493 rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 494 495 return hypercall_msr.enable; 496 } 497 EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); 498