1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * X86 specific Hyper-V initialization code. 4 * 5 * Copyright (C) 2016, Microsoft, Inc. 6 * 7 * Author : K. Y. Srinivasan <kys@microsoft.com> 8 */ 9 10 #include <linux/efi.h> 11 #include <linux/types.h> 12 #include <asm/apic.h> 13 #include <asm/desc.h> 14 #include <asm/hypervisor.h> 15 #include <asm/hyperv-tlfs.h> 16 #include <asm/mshyperv.h> 17 #include <linux/version.h> 18 #include <linux/vmalloc.h> 19 #include <linux/mm.h> 20 #include <linux/hyperv.h> 21 #include <linux/slab.h> 22 #include <linux/cpuhotplug.h> 23 #include <clocksource/hyperv_timer.h> 24 25 void *hv_hypercall_pg; 26 EXPORT_SYMBOL_GPL(hv_hypercall_pg); 27 28 u32 *hv_vp_index; 29 EXPORT_SYMBOL_GPL(hv_vp_index); 30 31 struct hv_vp_assist_page **hv_vp_assist_page; 32 EXPORT_SYMBOL_GPL(hv_vp_assist_page); 33 34 void __percpu **hyperv_pcpu_input_arg; 35 EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); 36 37 u32 hv_max_vp_index; 38 EXPORT_SYMBOL_GPL(hv_max_vp_index); 39 40 static int hv_cpu_init(unsigned int cpu) 41 { 42 u64 msr_vp_index; 43 struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; 44 void **input_arg; 45 struct page *pg; 46 47 input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); 48 pg = alloc_page(GFP_KERNEL); 49 if (unlikely(!pg)) 50 return -ENOMEM; 51 *input_arg = page_address(pg); 52 53 hv_get_vp_index(msr_vp_index); 54 55 hv_vp_index[smp_processor_id()] = msr_vp_index; 56 57 if (msr_vp_index > hv_max_vp_index) 58 hv_max_vp_index = msr_vp_index; 59 60 if (!hv_vp_assist_page) 61 return 0; 62 63 /* 64 * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section 65 * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure 66 * we always write the EOI MSR in hv_apic_eoi_write() *after* the 67 * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may 68 * not be stopped in the case of CPU offlining and the VM will hang. 69 */ 70 if (!*hvp) { 71 *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO, 72 PAGE_KERNEL); 73 } 74 75 if (*hvp) { 76 u64 val; 77 78 val = vmalloc_to_pfn(*hvp); 79 val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) | 80 HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; 81 82 wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val); 83 } 84 85 return 0; 86 } 87 88 static void (*hv_reenlightenment_cb)(void); 89 90 static void hv_reenlightenment_notify(struct work_struct *dummy) 91 { 92 struct hv_tsc_emulation_status emu_status; 93 94 rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); 95 96 /* Don't issue the callback if TSC accesses are not emulated */ 97 if (hv_reenlightenment_cb && emu_status.inprogress) 98 hv_reenlightenment_cb(); 99 } 100 static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify); 101 102 void hyperv_stop_tsc_emulation(void) 103 { 104 u64 freq; 105 struct hv_tsc_emulation_status emu_status; 106 107 rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); 108 emu_status.inprogress = 0; 109 wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); 110 111 rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); 112 tsc_khz = div64_u64(freq, 1000); 113 } 114 EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation); 115 116 static inline bool hv_reenlightenment_available(void) 117 { 118 /* 119 * Check for required features and priviliges to make TSC frequency 120 * change notifications work. 121 */ 122 return ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && 123 ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE && 124 ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT; 125 } 126 127 __visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs) 128 { 129 entering_ack_irq(); 130 131 inc_irq_stat(irq_hv_reenlightenment_count); 132 133 schedule_delayed_work(&hv_reenlightenment_work, HZ/10); 134 135 exiting_irq(); 136 } 137 138 void set_hv_tscchange_cb(void (*cb)(void)) 139 { 140 struct hv_reenlightenment_control re_ctrl = { 141 .vector = HYPERV_REENLIGHTENMENT_VECTOR, 142 .enabled = 1, 143 .target_vp = hv_vp_index[smp_processor_id()] 144 }; 145 struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; 146 147 if (!hv_reenlightenment_available()) { 148 pr_warn("Hyper-V: reenlightenment support is unavailable\n"); 149 return; 150 } 151 152 hv_reenlightenment_cb = cb; 153 154 /* Make sure callback is registered before we write to MSRs */ 155 wmb(); 156 157 wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); 158 wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl)); 159 } 160 EXPORT_SYMBOL_GPL(set_hv_tscchange_cb); 161 162 void clear_hv_tscchange_cb(void) 163 { 164 struct hv_reenlightenment_control re_ctrl; 165 166 if (!hv_reenlightenment_available()) 167 return; 168 169 rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); 170 re_ctrl.enabled = 0; 171 wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); 172 173 hv_reenlightenment_cb = NULL; 174 } 175 EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb); 176 177 static int hv_cpu_die(unsigned int cpu) 178 { 179 struct hv_reenlightenment_control re_ctrl; 180 unsigned int new_cpu; 181 unsigned long flags; 182 void **input_arg; 183 void *input_pg = NULL; 184 185 local_irq_save(flags); 186 input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); 187 input_pg = *input_arg; 188 *input_arg = NULL; 189 local_irq_restore(flags); 190 free_page((unsigned long)input_pg); 191 192 if (hv_vp_assist_page && hv_vp_assist_page[cpu]) 193 wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); 194 195 if (hv_reenlightenment_cb == NULL) 196 return 0; 197 198 rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); 199 if (re_ctrl.target_vp == hv_vp_index[cpu]) { 200 /* Reassign to some other online CPU */ 201 new_cpu = cpumask_any_but(cpu_online_mask, cpu); 202 203 re_ctrl.target_vp = hv_vp_index[new_cpu]; 204 wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); 205 } 206 207 return 0; 208 } 209 210 static int __init hv_pci_init(void) 211 { 212 int gen2vm = efi_enabled(EFI_BOOT); 213 214 /* 215 * For Generation-2 VM, we exit from pci_arch_init() by returning 0. 216 * The purpose is to suppress the harmless warning: 217 * "PCI: Fatal: No config space access function found" 218 */ 219 if (gen2vm) 220 return 0; 221 222 /* For Generation-1 VM, we'll proceed in pci_arch_init(). */ 223 return 1; 224 } 225 226 /* 227 * This function is to be invoked early in the boot sequence after the 228 * hypervisor has been detected. 229 * 230 * 1. Setup the hypercall page. 231 * 2. Register Hyper-V specific clocksource. 232 * 3. Setup Hyper-V specific APIC entry points. 233 */ 234 void __init hyperv_init(void) 235 { 236 u64 guest_id, required_msrs; 237 union hv_x64_msr_hypercall_contents hypercall_msr; 238 int cpuhp, i; 239 240 if (x86_hyper_type != X86_HYPER_MS_HYPERV) 241 return; 242 243 /* Absolutely required MSRs */ 244 required_msrs = HV_X64_MSR_HYPERCALL_AVAILABLE | 245 HV_X64_MSR_VP_INDEX_AVAILABLE; 246 247 if ((ms_hyperv.features & required_msrs) != required_msrs) 248 return; 249 250 /* 251 * Allocate the per-CPU state for the hypercall input arg. 252 * If this allocation fails, we will not be able to setup 253 * (per-CPU) hypercall input page and thus this failure is 254 * fatal on Hyper-V. 255 */ 256 hyperv_pcpu_input_arg = alloc_percpu(void *); 257 258 BUG_ON(hyperv_pcpu_input_arg == NULL); 259 260 /* Allocate percpu VP index */ 261 hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), 262 GFP_KERNEL); 263 if (!hv_vp_index) 264 return; 265 266 for (i = 0; i < num_possible_cpus(); i++) 267 hv_vp_index[i] = VP_INVAL; 268 269 hv_vp_assist_page = kcalloc(num_possible_cpus(), 270 sizeof(*hv_vp_assist_page), GFP_KERNEL); 271 if (!hv_vp_assist_page) { 272 ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; 273 goto free_vp_index; 274 } 275 276 cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", 277 hv_cpu_init, hv_cpu_die); 278 if (cpuhp < 0) 279 goto free_vp_assist_page; 280 281 /* 282 * Setup the hypercall page and enable hypercalls. 283 * 1. Register the guest ID 284 * 2. Enable the hypercall and register the hypercall page 285 */ 286 guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); 287 wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); 288 289 hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); 290 if (hv_hypercall_pg == NULL) { 291 wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); 292 goto remove_cpuhp_state; 293 } 294 295 rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 296 hypercall_msr.enable = 1; 297 hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); 298 wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 299 300 hv_apic_init(); 301 302 x86_init.pci.arch_init = hv_pci_init; 303 304 /* Register Hyper-V specific clocksource */ 305 hv_init_clocksource(); 306 return; 307 308 remove_cpuhp_state: 309 cpuhp_remove_state(cpuhp); 310 free_vp_assist_page: 311 kfree(hv_vp_assist_page); 312 hv_vp_assist_page = NULL; 313 free_vp_index: 314 kfree(hv_vp_index); 315 hv_vp_index = NULL; 316 } 317 318 /* 319 * This routine is called before kexec/kdump, it does the required cleanup. 320 */ 321 void hyperv_cleanup(void) 322 { 323 union hv_x64_msr_hypercall_contents hypercall_msr; 324 325 /* Reset our OS id */ 326 wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); 327 328 /* 329 * Reset hypercall page reference before reset the page, 330 * let hypercall operations fail safely rather than 331 * panic the kernel for using invalid hypercall page 332 */ 333 hv_hypercall_pg = NULL; 334 335 /* Reset the hypercall page */ 336 hypercall_msr.as_uint64 = 0; 337 wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 338 339 /* Reset the TSC page */ 340 hypercall_msr.as_uint64 = 0; 341 wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); 342 } 343 EXPORT_SYMBOL_GPL(hyperv_cleanup); 344 345 void hyperv_report_panic(struct pt_regs *regs, long err) 346 { 347 static bool panic_reported; 348 u64 guest_id; 349 350 /* 351 * We prefer to report panic on 'die' chain as we have proper 352 * registers to report, but if we miss it (e.g. on BUG()) we need 353 * to report it on 'panic'. 354 */ 355 if (panic_reported) 356 return; 357 panic_reported = true; 358 359 rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); 360 361 wrmsrl(HV_X64_MSR_CRASH_P0, err); 362 wrmsrl(HV_X64_MSR_CRASH_P1, guest_id); 363 wrmsrl(HV_X64_MSR_CRASH_P2, regs->ip); 364 wrmsrl(HV_X64_MSR_CRASH_P3, regs->ax); 365 wrmsrl(HV_X64_MSR_CRASH_P4, regs->sp); 366 367 /* 368 * Let Hyper-V know there is crash data available 369 */ 370 wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); 371 } 372 EXPORT_SYMBOL_GPL(hyperv_report_panic); 373 374 /** 375 * hyperv_report_panic_msg - report panic message to Hyper-V 376 * @pa: physical address of the panic page containing the message 377 * @size: size of the message in the page 378 */ 379 void hyperv_report_panic_msg(phys_addr_t pa, size_t size) 380 { 381 /* 382 * P3 to contain the physical address of the panic page & P4 to 383 * contain the size of the panic data in that page. Rest of the 384 * registers are no-op when the NOTIFY_MSG flag is set. 385 */ 386 wrmsrl(HV_X64_MSR_CRASH_P0, 0); 387 wrmsrl(HV_X64_MSR_CRASH_P1, 0); 388 wrmsrl(HV_X64_MSR_CRASH_P2, 0); 389 wrmsrl(HV_X64_MSR_CRASH_P3, pa); 390 wrmsrl(HV_X64_MSR_CRASH_P4, size); 391 392 /* 393 * Let Hyper-V know there is crash data available along with 394 * the panic message. 395 */ 396 wrmsrl(HV_X64_MSR_CRASH_CTL, 397 (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG)); 398 } 399 EXPORT_SYMBOL_GPL(hyperv_report_panic_msg); 400 401 bool hv_is_hyperv_initialized(void) 402 { 403 union hv_x64_msr_hypercall_contents hypercall_msr; 404 405 /* 406 * Ensure that we're really on Hyper-V, and not a KVM or Xen 407 * emulation of Hyper-V 408 */ 409 if (x86_hyper_type != X86_HYPER_MS_HYPERV) 410 return false; 411 412 /* 413 * Verify that earlier initialization succeeded by checking 414 * that the hypercall page is setup 415 */ 416 hypercall_msr.as_uint64 = 0; 417 rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 418 419 return hypercall_msr.enable; 420 } 421 EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); 422