1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/io.h> 12 #include <linux/kernel.h> 13 #include <linux/mm.h> 14 #include <linux/slab.h> 15 #include <linux/vmalloc.h> 16 #include <linux/hyperv.h> 17 #include <linux/random.h> 18 #include <linux/clockchips.h> 19 #include <linux/delay.h> 20 #include <linux/interrupt.h> 21 #include <clocksource/hyperv_timer.h> 22 #include <asm/mshyperv.h> 23 #include <linux/set_memory.h> 24 #include "hyperv_vmbus.h" 25 26 /* The one and only */ 27 struct hv_context hv_context; 28 29 /* 30 * hv_init - Main initialization routine. 31 * 32 * This routine must be called before any other routines in here are called 33 */ 34 int hv_init(void) 35 { 36 hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context); 37 if (!hv_context.cpu_context) 38 return -ENOMEM; 39 return 0; 40 } 41 42 /* 43 * hv_post_message - Post a message using the hypervisor message IPC. 44 * 45 * This involves a hypercall. 46 */ 47 int hv_post_message(union hv_connection_id connection_id, 48 enum hv_message_type message_type, 49 void *payload, size_t payload_size) 50 { 51 struct hv_input_post_message *aligned_msg; 52 unsigned long flags; 53 u64 status; 54 55 if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) 56 return -EMSGSIZE; 57 58 local_irq_save(flags); 59 60 aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg); 61 aligned_msg->connectionid = connection_id; 62 aligned_msg->reserved = 0; 63 aligned_msg->message_type = message_type; 64 aligned_msg->payload_size = payload_size; 65 memcpy((void *)aligned_msg->payload, payload, payload_size); 66 67 if (hv_isolation_type_snp()) 68 status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE, 69 (void *)aligned_msg, NULL, 70 sizeof(*aligned_msg)); 71 else 72 status = hv_do_hypercall(HVCALL_POST_MESSAGE, 73 aligned_msg, NULL); 74 75 local_irq_restore(flags); 76 77 return hv_result(status); 78 } 79 80 int hv_synic_alloc(void) 81 { 82 int cpu, ret = -ENOMEM; 83 struct hv_per_cpu_context *hv_cpu; 84 85 /* 86 * First, zero all per-cpu memory areas so hv_synic_free() can 87 * detect what memory has been allocated and cleanup properly 88 * after any failures. 89 */ 90 for_each_present_cpu(cpu) { 91 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); 92 memset(hv_cpu, 0, sizeof(*hv_cpu)); 93 } 94 95 hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask), 96 GFP_KERNEL); 97 if (hv_context.hv_numa_map == NULL) { 98 pr_err("Unable to allocate NUMA map\n"); 99 goto err; 100 } 101 102 for_each_present_cpu(cpu) { 103 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); 104 105 tasklet_init(&hv_cpu->msg_dpc, 106 vmbus_on_msg_dpc, (unsigned long) hv_cpu); 107 108 /* 109 * Synic message and event pages are allocated by paravisor. 110 * Skip these pages allocation here. 111 */ 112 if (!ms_hyperv.paravisor_present && !hv_root_partition) { 113 hv_cpu->synic_message_page = 114 (void *)get_zeroed_page(GFP_ATOMIC); 115 if (hv_cpu->synic_message_page == NULL) { 116 pr_err("Unable to allocate SYNIC message page\n"); 117 goto err; 118 } 119 120 hv_cpu->synic_event_page = 121 (void *)get_zeroed_page(GFP_ATOMIC); 122 if (hv_cpu->synic_event_page == NULL) { 123 pr_err("Unable to allocate SYNIC event page\n"); 124 125 free_page((unsigned long)hv_cpu->synic_message_page); 126 hv_cpu->synic_message_page = NULL; 127 goto err; 128 } 129 } 130 131 if (!ms_hyperv.paravisor_present && 132 (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { 133 ret = set_memory_decrypted((unsigned long) 134 hv_cpu->synic_message_page, 1); 135 if (ret) { 136 pr_err("Failed to decrypt SYNIC msg page: %d\n", ret); 137 hv_cpu->synic_message_page = NULL; 138 139 /* 140 * Free the event page here so that hv_synic_free() 141 * won't later try to re-encrypt it. 142 */ 143 free_page((unsigned long)hv_cpu->synic_event_page); 144 hv_cpu->synic_event_page = NULL; 145 goto err; 146 } 147 148 ret = set_memory_decrypted((unsigned long) 149 hv_cpu->synic_event_page, 1); 150 if (ret) { 151 pr_err("Failed to decrypt SYNIC event page: %d\n", ret); 152 hv_cpu->synic_event_page = NULL; 153 goto err; 154 } 155 156 memset(hv_cpu->synic_message_page, 0, PAGE_SIZE); 157 memset(hv_cpu->synic_event_page, 0, PAGE_SIZE); 158 } 159 } 160 161 return 0; 162 163 err: 164 /* 165 * Any memory allocations that succeeded will be freed when 166 * the caller cleans up by calling hv_synic_free() 167 */ 168 return ret; 169 } 170 171 172 void hv_synic_free(void) 173 { 174 int cpu, ret; 175 176 for_each_present_cpu(cpu) { 177 struct hv_per_cpu_context *hv_cpu 178 = per_cpu_ptr(hv_context.cpu_context, cpu); 179 180 /* It's better to leak the page if the encryption fails. */ 181 if (!ms_hyperv.paravisor_present && 182 (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { 183 if (hv_cpu->synic_message_page) { 184 ret = set_memory_encrypted((unsigned long) 185 hv_cpu->synic_message_page, 1); 186 if (ret) { 187 pr_err("Failed to encrypt SYNIC msg page: %d\n", ret); 188 hv_cpu->synic_message_page = NULL; 189 } 190 } 191 192 if (hv_cpu->synic_event_page) { 193 ret = set_memory_encrypted((unsigned long) 194 hv_cpu->synic_event_page, 1); 195 if (ret) { 196 pr_err("Failed to encrypt SYNIC event page: %d\n", ret); 197 hv_cpu->synic_event_page = NULL; 198 } 199 } 200 } 201 202 free_page((unsigned long)hv_cpu->synic_event_page); 203 free_page((unsigned long)hv_cpu->synic_message_page); 204 } 205 206 kfree(hv_context.hv_numa_map); 207 } 208 209 /* 210 * hv_synic_init - Initialize the Synthetic Interrupt Controller. 211 * 212 * If it is already initialized by another entity (ie x2v shim), we need to 213 * retrieve the initialized message and event pages. Otherwise, we create and 214 * initialize the message and event pages. 215 */ 216 void hv_synic_enable_regs(unsigned int cpu) 217 { 218 struct hv_per_cpu_context *hv_cpu 219 = per_cpu_ptr(hv_context.cpu_context, cpu); 220 union hv_synic_simp simp; 221 union hv_synic_siefp siefp; 222 union hv_synic_sint shared_sint; 223 union hv_synic_scontrol sctrl; 224 225 /* Setup the Synic's message page */ 226 simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); 227 simp.simp_enabled = 1; 228 229 if (ms_hyperv.paravisor_present || hv_root_partition) { 230 /* Mask out vTOM bit. ioremap_cache() maps decrypted */ 231 u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) & 232 ~ms_hyperv.shared_gpa_boundary; 233 hv_cpu->synic_message_page 234 = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); 235 if (!hv_cpu->synic_message_page) 236 pr_err("Fail to map synic message page.\n"); 237 } else { 238 simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page) 239 >> HV_HYP_PAGE_SHIFT; 240 } 241 242 hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); 243 244 /* Setup the Synic's event page */ 245 siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); 246 siefp.siefp_enabled = 1; 247 248 if (ms_hyperv.paravisor_present || hv_root_partition) { 249 /* Mask out vTOM bit. ioremap_cache() maps decrypted */ 250 u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) & 251 ~ms_hyperv.shared_gpa_boundary; 252 hv_cpu->synic_event_page 253 = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); 254 if (!hv_cpu->synic_event_page) 255 pr_err("Fail to map synic event page.\n"); 256 } else { 257 siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page) 258 >> HV_HYP_PAGE_SHIFT; 259 } 260 261 hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); 262 263 /* Setup the shared SINT. */ 264 if (vmbus_irq != -1) 265 enable_percpu_irq(vmbus_irq, 0); 266 shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + 267 VMBUS_MESSAGE_SINT); 268 269 shared_sint.vector = vmbus_interrupt; 270 shared_sint.masked = false; 271 272 /* 273 * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64), 274 * it doesn't provide a recommendation flag and AEOI must be disabled. 275 */ 276 #ifdef HV_DEPRECATING_AEOI_RECOMMENDED 277 shared_sint.auto_eoi = 278 !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED); 279 #else 280 shared_sint.auto_eoi = 0; 281 #endif 282 hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, 283 shared_sint.as_uint64); 284 285 /* Enable the global synic bit */ 286 sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); 287 sctrl.enable = 1; 288 289 hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); 290 } 291 292 int hv_synic_init(unsigned int cpu) 293 { 294 hv_synic_enable_regs(cpu); 295 296 hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT); 297 298 return 0; 299 } 300 301 /* 302 * hv_synic_cleanup - Cleanup routine for hv_synic_init(). 303 */ 304 void hv_synic_disable_regs(unsigned int cpu) 305 { 306 struct hv_per_cpu_context *hv_cpu 307 = per_cpu_ptr(hv_context.cpu_context, cpu); 308 union hv_synic_sint shared_sint; 309 union hv_synic_simp simp; 310 union hv_synic_siefp siefp; 311 union hv_synic_scontrol sctrl; 312 313 shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + 314 VMBUS_MESSAGE_SINT); 315 316 shared_sint.masked = 1; 317 318 /* Need to correctly cleanup in the case of SMP!!! */ 319 /* Disable the interrupt */ 320 hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, 321 shared_sint.as_uint64); 322 323 simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); 324 /* 325 * In Isolation VM, sim and sief pages are allocated by 326 * paravisor. These pages also will be used by kdump 327 * kernel. So just reset enable bit here and keep page 328 * addresses. 329 */ 330 simp.simp_enabled = 0; 331 if (ms_hyperv.paravisor_present || hv_root_partition) { 332 iounmap(hv_cpu->synic_message_page); 333 hv_cpu->synic_message_page = NULL; 334 } else { 335 simp.base_simp_gpa = 0; 336 } 337 338 hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); 339 340 siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); 341 siefp.siefp_enabled = 0; 342 343 if (ms_hyperv.paravisor_present || hv_root_partition) { 344 iounmap(hv_cpu->synic_event_page); 345 hv_cpu->synic_event_page = NULL; 346 } else { 347 siefp.base_siefp_gpa = 0; 348 } 349 350 hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); 351 352 /* Disable the global synic bit */ 353 sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); 354 sctrl.enable = 0; 355 hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); 356 357 if (vmbus_irq != -1) 358 disable_percpu_irq(vmbus_irq); 359 } 360 361 #define HV_MAX_TRIES 3 362 /* 363 * Scan the event flags page of 'this' CPU looking for any bit that is set. If we find one 364 * bit set, then wait for a few milliseconds. Repeat these steps for a maximum of 3 times. 365 * Return 'true', if there is still any set bit after this operation; 'false', otherwise. 366 * 367 * If a bit is set, that means there is a pending channel interrupt. The expectation is 368 * that the normal interrupt handling mechanism will find and process the channel interrupt 369 * "very soon", and in the process clear the bit. 370 */ 371 static bool hv_synic_event_pending(void) 372 { 373 struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context); 374 union hv_synic_event_flags *event = 375 (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT; 376 unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */ 377 bool pending; 378 u32 relid; 379 int tries = 0; 380 381 retry: 382 pending = false; 383 for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) { 384 /* Special case - VMBus channel protocol messages */ 385 if (relid == 0) 386 continue; 387 pending = true; 388 break; 389 } 390 if (pending && tries++ < HV_MAX_TRIES) { 391 usleep_range(10000, 20000); 392 goto retry; 393 } 394 return pending; 395 } 396 397 int hv_synic_cleanup(unsigned int cpu) 398 { 399 struct vmbus_channel *channel, *sc; 400 bool channel_found = false; 401 402 if (vmbus_connection.conn_state != CONNECTED) 403 goto always_cleanup; 404 405 /* 406 * Hyper-V does not provide a way to change the connect CPU once 407 * it is set; we must prevent the connect CPU from going offline 408 * while the VM is running normally. But in the panic or kexec() 409 * path where the vmbus is already disconnected, the CPU must be 410 * allowed to shut down. 411 */ 412 if (cpu == VMBUS_CONNECT_CPU) 413 return -EBUSY; 414 415 /* 416 * Search for channels which are bound to the CPU we're about to 417 * cleanup. In case we find one and vmbus is still connected, we 418 * fail; this will effectively prevent CPU offlining. 419 * 420 * TODO: Re-bind the channels to different CPUs. 421 */ 422 mutex_lock(&vmbus_connection.channel_mutex); 423 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 424 if (channel->target_cpu == cpu) { 425 channel_found = true; 426 break; 427 } 428 list_for_each_entry(sc, &channel->sc_list, sc_list) { 429 if (sc->target_cpu == cpu) { 430 channel_found = true; 431 break; 432 } 433 } 434 if (channel_found) 435 break; 436 } 437 mutex_unlock(&vmbus_connection.channel_mutex); 438 439 if (channel_found) 440 return -EBUSY; 441 442 /* 443 * channel_found == false means that any channels that were previously 444 * assigned to the CPU have been reassigned elsewhere with a call of 445 * vmbus_send_modifychannel(). Scan the event flags page looking for 446 * bits that are set and waiting with a timeout for vmbus_chan_sched() 447 * to process such bits. If bits are still set after this operation 448 * and VMBus is connected, fail the CPU offlining operation. 449 */ 450 if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending()) 451 return -EBUSY; 452 453 always_cleanup: 454 hv_stimer_legacy_cleanup(cpu); 455 456 hv_synic_disable_regs(cpu); 457 458 return 0; 459 } 460