1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/io.h> 12 #include <linux/kernel.h> 13 #include <linux/mm.h> 14 #include <linux/slab.h> 15 #include <linux/vmalloc.h> 16 #include <linux/hyperv.h> 17 #include <linux/random.h> 18 #include <linux/clockchips.h> 19 #include <linux/delay.h> 20 #include <linux/interrupt.h> 21 #include <clocksource/hyperv_timer.h> 22 #include <asm/mshyperv.h> 23 #include "hyperv_vmbus.h" 24 25 /* The one and only */ 26 struct hv_context hv_context; 27 28 /* 29 * hv_init - Main initialization routine. 30 * 31 * This routine must be called before any other routines in here are called 32 */ 33 int hv_init(void) 34 { 35 hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context); 36 if (!hv_context.cpu_context) 37 return -ENOMEM; 38 return 0; 39 } 40 41 /* 42 * hv_post_message - Post a message using the hypervisor message IPC. 43 * 44 * This involves a hypercall. 45 */ 46 int hv_post_message(union hv_connection_id connection_id, 47 enum hv_message_type message_type, 48 void *payload, size_t payload_size) 49 { 50 struct hv_input_post_message *aligned_msg; 51 unsigned long flags; 52 u64 status; 53 54 if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) 55 return -EMSGSIZE; 56 57 local_irq_save(flags); 58 59 aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg); 60 aligned_msg->connectionid = connection_id; 61 aligned_msg->reserved = 0; 62 aligned_msg->message_type = message_type; 63 aligned_msg->payload_size = payload_size; 64 memcpy((void *)aligned_msg->payload, payload, payload_size); 65 66 if (hv_isolation_type_snp()) 67 status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE, 68 (void *)aligned_msg, NULL, 69 sizeof(*aligned_msg)); 70 else 71 status = hv_do_hypercall(HVCALL_POST_MESSAGE, 72 aligned_msg, NULL); 73 74 local_irq_restore(flags); 75 76 return hv_result(status); 77 } 78 79 int hv_synic_alloc(void) 80 { 81 int cpu; 82 struct hv_per_cpu_context *hv_cpu; 83 84 /* 85 * First, zero all per-cpu memory areas so hv_synic_free() can 86 * detect what memory has been allocated and cleanup properly 87 * after any failures. 88 */ 89 for_each_present_cpu(cpu) { 90 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); 91 memset(hv_cpu, 0, sizeof(*hv_cpu)); 92 } 93 94 hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask), 95 GFP_KERNEL); 96 if (hv_context.hv_numa_map == NULL) { 97 pr_err("Unable to allocate NUMA map\n"); 98 goto err; 99 } 100 101 for_each_present_cpu(cpu) { 102 hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); 103 104 tasklet_init(&hv_cpu->msg_dpc, 105 vmbus_on_msg_dpc, (unsigned long) hv_cpu); 106 107 /* 108 * Synic message and event pages are allocated by paravisor. 109 * Skip these pages allocation here. 110 */ 111 if (!hv_isolation_type_snp() && !hv_root_partition) { 112 hv_cpu->synic_message_page = 113 (void *)get_zeroed_page(GFP_ATOMIC); 114 if (hv_cpu->synic_message_page == NULL) { 115 pr_err("Unable to allocate SYNIC message page\n"); 116 goto err; 117 } 118 119 hv_cpu->synic_event_page = 120 (void *)get_zeroed_page(GFP_ATOMIC); 121 if (hv_cpu->synic_event_page == NULL) { 122 pr_err("Unable to allocate SYNIC event page\n"); 123 goto err; 124 } 125 } 126 } 127 128 return 0; 129 err: 130 /* 131 * Any memory allocations that succeeded will be freed when 132 * the caller cleans up by calling hv_synic_free() 133 */ 134 return -ENOMEM; 135 } 136 137 138 void hv_synic_free(void) 139 { 140 int cpu; 141 142 for_each_present_cpu(cpu) { 143 struct hv_per_cpu_context *hv_cpu 144 = per_cpu_ptr(hv_context.cpu_context, cpu); 145 146 free_page((unsigned long)hv_cpu->synic_event_page); 147 free_page((unsigned long)hv_cpu->synic_message_page); 148 } 149 150 kfree(hv_context.hv_numa_map); 151 } 152 153 /* 154 * hv_synic_init - Initialize the Synthetic Interrupt Controller. 155 * 156 * If it is already initialized by another entity (ie x2v shim), we need to 157 * retrieve the initialized message and event pages. Otherwise, we create and 158 * initialize the message and event pages. 159 */ 160 void hv_synic_enable_regs(unsigned int cpu) 161 { 162 struct hv_per_cpu_context *hv_cpu 163 = per_cpu_ptr(hv_context.cpu_context, cpu); 164 union hv_synic_simp simp; 165 union hv_synic_siefp siefp; 166 union hv_synic_sint shared_sint; 167 union hv_synic_scontrol sctrl; 168 169 /* Setup the Synic's message page */ 170 simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); 171 simp.simp_enabled = 1; 172 173 if (hv_isolation_type_snp() || hv_root_partition) { 174 /* Mask out vTOM bit. ioremap_cache() maps decrypted */ 175 u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) & 176 ~ms_hyperv.shared_gpa_boundary; 177 hv_cpu->synic_message_page 178 = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); 179 if (!hv_cpu->synic_message_page) 180 pr_err("Fail to map synic message page.\n"); 181 } else { 182 simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page) 183 >> HV_HYP_PAGE_SHIFT; 184 } 185 186 hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); 187 188 /* Setup the Synic's event page */ 189 siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); 190 siefp.siefp_enabled = 1; 191 192 if (hv_isolation_type_snp() || hv_root_partition) { 193 /* Mask out vTOM bit. ioremap_cache() maps decrypted */ 194 u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) & 195 ~ms_hyperv.shared_gpa_boundary; 196 hv_cpu->synic_event_page 197 = (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); 198 if (!hv_cpu->synic_event_page) 199 pr_err("Fail to map synic event page.\n"); 200 } else { 201 siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page) 202 >> HV_HYP_PAGE_SHIFT; 203 } 204 205 hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); 206 207 /* Setup the shared SINT. */ 208 if (vmbus_irq != -1) 209 enable_percpu_irq(vmbus_irq, 0); 210 shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + 211 VMBUS_MESSAGE_SINT); 212 213 shared_sint.vector = vmbus_interrupt; 214 shared_sint.masked = false; 215 216 /* 217 * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64), 218 * it doesn't provide a recommendation flag and AEOI must be disabled. 219 */ 220 #ifdef HV_DEPRECATING_AEOI_RECOMMENDED 221 shared_sint.auto_eoi = 222 !(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED); 223 #else 224 shared_sint.auto_eoi = 0; 225 #endif 226 hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, 227 shared_sint.as_uint64); 228 229 /* Enable the global synic bit */ 230 sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); 231 sctrl.enable = 1; 232 233 hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); 234 } 235 236 int hv_synic_init(unsigned int cpu) 237 { 238 hv_synic_enable_regs(cpu); 239 240 hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT); 241 242 return 0; 243 } 244 245 /* 246 * hv_synic_cleanup - Cleanup routine for hv_synic_init(). 247 */ 248 void hv_synic_disable_regs(unsigned int cpu) 249 { 250 struct hv_per_cpu_context *hv_cpu 251 = per_cpu_ptr(hv_context.cpu_context, cpu); 252 union hv_synic_sint shared_sint; 253 union hv_synic_simp simp; 254 union hv_synic_siefp siefp; 255 union hv_synic_scontrol sctrl; 256 257 shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + 258 VMBUS_MESSAGE_SINT); 259 260 shared_sint.masked = 1; 261 262 /* Need to correctly cleanup in the case of SMP!!! */ 263 /* Disable the interrupt */ 264 hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, 265 shared_sint.as_uint64); 266 267 simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); 268 /* 269 * In Isolation VM, sim and sief pages are allocated by 270 * paravisor. These pages also will be used by kdump 271 * kernel. So just reset enable bit here and keep page 272 * addresses. 273 */ 274 simp.simp_enabled = 0; 275 if (hv_isolation_type_snp() || hv_root_partition) { 276 iounmap(hv_cpu->synic_message_page); 277 hv_cpu->synic_message_page = NULL; 278 } else { 279 simp.base_simp_gpa = 0; 280 } 281 282 hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); 283 284 siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); 285 siefp.siefp_enabled = 0; 286 287 if (hv_isolation_type_snp() || hv_root_partition) { 288 iounmap(hv_cpu->synic_event_page); 289 hv_cpu->synic_event_page = NULL; 290 } else { 291 siefp.base_siefp_gpa = 0; 292 } 293 294 hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); 295 296 /* Disable the global synic bit */ 297 sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); 298 sctrl.enable = 0; 299 hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); 300 301 if (vmbus_irq != -1) 302 disable_percpu_irq(vmbus_irq); 303 } 304 305 #define HV_MAX_TRIES 3 306 /* 307 * Scan the event flags page of 'this' CPU looking for any bit that is set. If we find one 308 * bit set, then wait for a few milliseconds. Repeat these steps for a maximum of 3 times. 309 * Return 'true', if there is still any set bit after this operation; 'false', otherwise. 310 * 311 * If a bit is set, that means there is a pending channel interrupt. The expectation is 312 * that the normal interrupt handling mechanism will find and process the channel interrupt 313 * "very soon", and in the process clear the bit. 314 */ 315 static bool hv_synic_event_pending(void) 316 { 317 struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context); 318 union hv_synic_event_flags *event = 319 (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT; 320 unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */ 321 bool pending; 322 u32 relid; 323 int tries = 0; 324 325 retry: 326 pending = false; 327 for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) { 328 /* Special case - VMBus channel protocol messages */ 329 if (relid == 0) 330 continue; 331 pending = true; 332 break; 333 } 334 if (pending && tries++ < HV_MAX_TRIES) { 335 usleep_range(10000, 20000); 336 goto retry; 337 } 338 return pending; 339 } 340 341 int hv_synic_cleanup(unsigned int cpu) 342 { 343 struct vmbus_channel *channel, *sc; 344 bool channel_found = false; 345 346 if (vmbus_connection.conn_state != CONNECTED) 347 goto always_cleanup; 348 349 /* 350 * Hyper-V does not provide a way to change the connect CPU once 351 * it is set; we must prevent the connect CPU from going offline 352 * while the VM is running normally. But in the panic or kexec() 353 * path where the vmbus is already disconnected, the CPU must be 354 * allowed to shut down. 355 */ 356 if (cpu == VMBUS_CONNECT_CPU) 357 return -EBUSY; 358 359 /* 360 * Search for channels which are bound to the CPU we're about to 361 * cleanup. In case we find one and vmbus is still connected, we 362 * fail; this will effectively prevent CPU offlining. 363 * 364 * TODO: Re-bind the channels to different CPUs. 365 */ 366 mutex_lock(&vmbus_connection.channel_mutex); 367 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 368 if (channel->target_cpu == cpu) { 369 channel_found = true; 370 break; 371 } 372 list_for_each_entry(sc, &channel->sc_list, sc_list) { 373 if (sc->target_cpu == cpu) { 374 channel_found = true; 375 break; 376 } 377 } 378 if (channel_found) 379 break; 380 } 381 mutex_unlock(&vmbus_connection.channel_mutex); 382 383 if (channel_found) 384 return -EBUSY; 385 386 /* 387 * channel_found == false means that any channels that were previously 388 * assigned to the CPU have been reassigned elsewhere with a call of 389 * vmbus_send_modifychannel(). Scan the event flags page looking for 390 * bits that are set and waiting with a timeout for vmbus_chan_sched() 391 * to process such bits. If bits are still set after this operation 392 * and VMBus is connected, fail the CPU offlining operation. 393 */ 394 if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending()) 395 return -EBUSY; 396 397 always_cleanup: 398 hv_stimer_legacy_cleanup(cpu); 399 400 hv_synic_disable_regs(cpu); 401 402 return 0; 403 } 404