1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (c) 2009, Microsoft Corporation. 5 * 6 * Authors: 7 * Haiyang Zhang <haiyangz@microsoft.com> 8 * Hank Janssen <hjanssen@microsoft.com> 9 */ 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/sched.h> 14 #include <linux/wait.h> 15 #include <linux/delay.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/slab.h> 19 #include <linux/vmalloc.h> 20 #include <linux/hyperv.h> 21 #include <linux/export.h> 22 #include <linux/io.h> 23 #include <linux/set_memory.h> 24 #include <asm/mshyperv.h> 25 26 #include "hyperv_vmbus.h" 27 28 29 struct vmbus_connection vmbus_connection = { 30 .conn_state = DISCONNECTED, 31 .unload_event = COMPLETION_INITIALIZER( 32 vmbus_connection.unload_event), 33 .next_gpadl_handle = ATOMIC_INIT(0xE1E10), 34 35 .ready_for_suspend_event = COMPLETION_INITIALIZER( 36 vmbus_connection.ready_for_suspend_event), 37 .ready_for_resume_event = COMPLETION_INITIALIZER( 38 vmbus_connection.ready_for_resume_event), 39 }; 40 EXPORT_SYMBOL_GPL(vmbus_connection); 41 42 /* 43 * Negotiated protocol version with the host. 44 */ 45 __u32 vmbus_proto_version; 46 EXPORT_SYMBOL_GPL(vmbus_proto_version); 47 48 /* 49 * Table of VMBus versions listed from newest to oldest. 50 * VERSION_WIN7 and VERSION_WS2008 are no longer supported in 51 * Linux guests and are not listed. 52 */ 53 static __u32 vmbus_versions[] = { 54 VERSION_WIN10_V5_3, 55 VERSION_WIN10_V5_2, 56 VERSION_WIN10_V5_1, 57 VERSION_WIN10_V5, 58 VERSION_WIN10_V4_1, 59 VERSION_WIN10, 60 VERSION_WIN8_1, 61 VERSION_WIN8 62 }; 63 64 /* 65 * Maximal VMBus protocol version guests can negotiate. Useful to cap the 66 * VMBus version for testing and debugging purpose. 67 */ 68 static uint max_version = VERSION_WIN10_V5_3; 69 70 module_param(max_version, uint, S_IRUGO); 71 MODULE_PARM_DESC(max_version, 72 "Maximal VMBus protocol version which can be negotiated"); 73 74 int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) 75 { 76 int ret = 0; 77 struct vmbus_channel_initiate_contact *msg; 78 unsigned long flags; 79 80 init_completion(&msginfo->waitevent); 81 82 msg = (struct vmbus_channel_initiate_contact *)msginfo->msg; 83 84 memset(msg, 0, sizeof(*msg)); 85 msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT; 86 msg->vmbus_version_requested = version; 87 88 /* 89 * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must 90 * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, 91 * and for subsequent messages, we must use the Message Connection ID 92 * field in the host-returned Version Response Message. And, with 93 * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we 94 * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for 95 * compatibility. 96 * 97 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1). 98 */ 99 if (version >= VERSION_WIN10_V5) { 100 msg->msg_sint = VMBUS_MESSAGE_SINT; 101 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; 102 } else { 103 msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); 104 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; 105 } 106 107 msg->monitor_page1 = vmbus_connection.monitor_pages_pa[0]; 108 msg->monitor_page2 = vmbus_connection.monitor_pages_pa[1]; 109 110 msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU); 111 112 /* 113 * Add to list before we send the request since we may 114 * receive the response before returning from this routine 115 */ 116 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 117 list_add_tail(&msginfo->msglistentry, 118 &vmbus_connection.chn_msg_list); 119 120 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 121 122 ret = vmbus_post_msg(msg, 123 sizeof(struct vmbus_channel_initiate_contact), 124 true); 125 126 trace_vmbus_negotiate_version(msg, ret); 127 128 if (ret != 0) { 129 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 130 list_del(&msginfo->msglistentry); 131 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, 132 flags); 133 return ret; 134 } 135 136 /* Wait for the connection response */ 137 wait_for_completion(&msginfo->waitevent); 138 139 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 140 list_del(&msginfo->msglistentry); 141 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 142 143 /* Check if successful */ 144 if (msginfo->response.version_response.version_supported) { 145 vmbus_connection.conn_state = CONNECTED; 146 147 if (version >= VERSION_WIN10_V5) 148 vmbus_connection.msg_conn_id = 149 msginfo->response.version_response.msg_conn_id; 150 } else { 151 return -ECONNREFUSED; 152 } 153 154 return ret; 155 } 156 157 /* 158 * vmbus_connect - Sends a connect request on the partition service connection 159 */ 160 int vmbus_connect(void) 161 { 162 struct vmbus_channel_msginfo *msginfo = NULL; 163 int i, ret = 0; 164 __u32 version; 165 166 /* Initialize the vmbus connection */ 167 vmbus_connection.conn_state = CONNECTING; 168 vmbus_connection.work_queue = create_workqueue("hv_vmbus_con"); 169 if (!vmbus_connection.work_queue) { 170 ret = -ENOMEM; 171 goto cleanup; 172 } 173 174 vmbus_connection.handle_primary_chan_wq = 175 create_workqueue("hv_pri_chan"); 176 if (!vmbus_connection.handle_primary_chan_wq) { 177 ret = -ENOMEM; 178 goto cleanup; 179 } 180 181 vmbus_connection.handle_sub_chan_wq = 182 create_workqueue("hv_sub_chan"); 183 if (!vmbus_connection.handle_sub_chan_wq) { 184 ret = -ENOMEM; 185 goto cleanup; 186 } 187 188 INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); 189 spin_lock_init(&vmbus_connection.channelmsg_lock); 190 191 INIT_LIST_HEAD(&vmbus_connection.chn_list); 192 mutex_init(&vmbus_connection.channel_mutex); 193 194 /* 195 * Setup the vmbus event connection for channel interrupt 196 * abstraction stuff 197 */ 198 vmbus_connection.int_page = 199 (void *)hv_alloc_hyperv_zeroed_page(); 200 if (vmbus_connection.int_page == NULL) { 201 ret = -ENOMEM; 202 goto cleanup; 203 } 204 205 vmbus_connection.recv_int_page = vmbus_connection.int_page; 206 vmbus_connection.send_int_page = 207 (void *)((unsigned long)vmbus_connection.int_page + 208 (HV_HYP_PAGE_SIZE >> 1)); 209 210 /* 211 * Setup the monitor notification facility. The 1st page for 212 * parent->child and the 2nd page for child->parent 213 */ 214 vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_zeroed_page(); 215 vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_zeroed_page(); 216 if ((vmbus_connection.monitor_pages[0] == NULL) || 217 (vmbus_connection.monitor_pages[1] == NULL)) { 218 ret = -ENOMEM; 219 goto cleanup; 220 } 221 222 vmbus_connection.monitor_pages_original[0] 223 = vmbus_connection.monitor_pages[0]; 224 vmbus_connection.monitor_pages_original[1] 225 = vmbus_connection.monitor_pages[1]; 226 vmbus_connection.monitor_pages_pa[0] 227 = virt_to_phys(vmbus_connection.monitor_pages[0]); 228 vmbus_connection.monitor_pages_pa[1] 229 = virt_to_phys(vmbus_connection.monitor_pages[1]); 230 231 if (hv_is_isolation_supported()) { 232 ret = set_memory_decrypted((unsigned long) 233 vmbus_connection.monitor_pages[0], 234 1); 235 ret |= set_memory_decrypted((unsigned long) 236 vmbus_connection.monitor_pages[1], 237 1); 238 if (ret) 239 goto cleanup; 240 241 /* 242 * Isolation VM with AMD SNP needs to access monitor page via 243 * address space above shared gpa boundary. 244 */ 245 if (hv_isolation_type_snp()) { 246 vmbus_connection.monitor_pages_pa[0] += 247 ms_hyperv.shared_gpa_boundary; 248 vmbus_connection.monitor_pages_pa[1] += 249 ms_hyperv.shared_gpa_boundary; 250 251 vmbus_connection.monitor_pages[0] 252 = memremap(vmbus_connection.monitor_pages_pa[0], 253 HV_HYP_PAGE_SIZE, 254 MEMREMAP_WB); 255 if (!vmbus_connection.monitor_pages[0]) { 256 ret = -ENOMEM; 257 goto cleanup; 258 } 259 260 vmbus_connection.monitor_pages[1] 261 = memremap(vmbus_connection.monitor_pages_pa[1], 262 HV_HYP_PAGE_SIZE, 263 MEMREMAP_WB); 264 if (!vmbus_connection.monitor_pages[1]) { 265 ret = -ENOMEM; 266 goto cleanup; 267 } 268 } 269 270 /* 271 * Set memory host visibility hvcall smears memory 272 * and so zero monitor pages here. 273 */ 274 memset(vmbus_connection.monitor_pages[0], 0x00, 275 HV_HYP_PAGE_SIZE); 276 memset(vmbus_connection.monitor_pages[1], 0x00, 277 HV_HYP_PAGE_SIZE); 278 279 } 280 281 msginfo = kzalloc(sizeof(*msginfo) + 282 sizeof(struct vmbus_channel_initiate_contact), 283 GFP_KERNEL); 284 if (msginfo == NULL) { 285 ret = -ENOMEM; 286 goto cleanup; 287 } 288 289 /* 290 * Negotiate a compatible VMBUS version number with the 291 * host. We start with the highest number we can support 292 * and work our way down until we negotiate a compatible 293 * version. 294 */ 295 296 for (i = 0; ; i++) { 297 if (i == ARRAY_SIZE(vmbus_versions)) { 298 ret = -EDOM; 299 goto cleanup; 300 } 301 302 version = vmbus_versions[i]; 303 if (version > max_version) 304 continue; 305 306 ret = vmbus_negotiate_version(msginfo, version); 307 if (ret == -ETIMEDOUT) 308 goto cleanup; 309 310 if (vmbus_connection.conn_state == CONNECTED) 311 break; 312 } 313 314 if (hv_is_isolation_supported() && version < VERSION_WIN10_V5_2) { 315 pr_err("Invalid VMBus version %d.%d (expected >= %d.%d) from the host supporting isolation\n", 316 version >> 16, version & 0xFFFF, VERSION_WIN10_V5_2 >> 16, VERSION_WIN10_V5_2 & 0xFFFF); 317 ret = -EINVAL; 318 goto cleanup; 319 } 320 321 vmbus_proto_version = version; 322 pr_info("Vmbus version:%d.%d\n", 323 version >> 16, version & 0xFFFF); 324 325 vmbus_connection.channels = kcalloc(MAX_CHANNEL_RELIDS, 326 sizeof(struct vmbus_channel *), 327 GFP_KERNEL); 328 if (vmbus_connection.channels == NULL) { 329 ret = -ENOMEM; 330 goto cleanup; 331 } 332 333 kfree(msginfo); 334 return 0; 335 336 cleanup: 337 pr_err("Unable to connect to host\n"); 338 339 vmbus_connection.conn_state = DISCONNECTED; 340 vmbus_disconnect(); 341 342 kfree(msginfo); 343 344 return ret; 345 } 346 347 void vmbus_disconnect(void) 348 { 349 /* 350 * First send the unload request to the host. 351 */ 352 vmbus_initiate_unload(false); 353 354 if (vmbus_connection.handle_sub_chan_wq) 355 destroy_workqueue(vmbus_connection.handle_sub_chan_wq); 356 357 if (vmbus_connection.handle_primary_chan_wq) 358 destroy_workqueue(vmbus_connection.handle_primary_chan_wq); 359 360 if (vmbus_connection.work_queue) 361 destroy_workqueue(vmbus_connection.work_queue); 362 363 if (vmbus_connection.int_page) { 364 hv_free_hyperv_page((unsigned long)vmbus_connection.int_page); 365 vmbus_connection.int_page = NULL; 366 } 367 368 if (hv_is_isolation_supported()) { 369 /* 370 * memunmap() checks input address is ioremap address or not 371 * inside. It doesn't unmap any thing in the non-SNP CVM and 372 * so not check CVM type here. 373 */ 374 memunmap(vmbus_connection.monitor_pages[0]); 375 memunmap(vmbus_connection.monitor_pages[1]); 376 377 set_memory_encrypted((unsigned long) 378 vmbus_connection.monitor_pages_original[0], 379 1); 380 set_memory_encrypted((unsigned long) 381 vmbus_connection.monitor_pages_original[1], 382 1); 383 } 384 385 hv_free_hyperv_page((unsigned long) 386 vmbus_connection.monitor_pages_original[0]); 387 hv_free_hyperv_page((unsigned long) 388 vmbus_connection.monitor_pages_original[1]); 389 vmbus_connection.monitor_pages_original[0] = 390 vmbus_connection.monitor_pages[0] = NULL; 391 vmbus_connection.monitor_pages_original[1] = 392 vmbus_connection.monitor_pages[1] = NULL; 393 } 394 395 /* 396 * relid2channel - Get the channel object given its 397 * child relative id (ie channel id) 398 */ 399 struct vmbus_channel *relid2channel(u32 relid) 400 { 401 if (WARN_ON(relid >= MAX_CHANNEL_RELIDS)) 402 return NULL; 403 return READ_ONCE(vmbus_connection.channels[relid]); 404 } 405 406 /* 407 * vmbus_on_event - Process a channel event notification 408 * 409 * For batched channels (default) optimize host to guest signaling 410 * by ensuring: 411 * 1. While reading the channel, we disable interrupts from host. 412 * 2. Ensure that we process all posted messages from the host 413 * before returning from this callback. 414 * 3. Once we return, enable signaling from the host. Once this 415 * state is set we check to see if additional packets are 416 * available to read. In this case we repeat the process. 417 * If this tasklet has been running for a long time 418 * then reschedule ourselves. 419 */ 420 void vmbus_on_event(unsigned long data) 421 { 422 struct vmbus_channel *channel = (void *) data; 423 unsigned long time_limit = jiffies + 2; 424 425 trace_vmbus_on_event(channel); 426 427 hv_debug_delay_test(channel, INTERRUPT_DELAY); 428 do { 429 void (*callback_fn)(void *); 430 431 /* A channel once created is persistent even when 432 * there is no driver handling the device. An 433 * unloading driver sets the onchannel_callback to NULL. 434 */ 435 callback_fn = READ_ONCE(channel->onchannel_callback); 436 if (unlikely(callback_fn == NULL)) 437 return; 438 439 (*callback_fn)(channel->channel_callback_context); 440 441 if (channel->callback_mode != HV_CALL_BATCHED) 442 return; 443 444 if (likely(hv_end_read(&channel->inbound) == 0)) 445 return; 446 447 hv_begin_read(&channel->inbound); 448 } while (likely(time_before(jiffies, time_limit))); 449 450 /* The time limit (2 jiffies) has been reached */ 451 tasklet_schedule(&channel->callback_event); 452 } 453 454 /* 455 * vmbus_post_msg - Send a msg on the vmbus's message connection 456 */ 457 int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) 458 { 459 struct vmbus_channel_message_header *hdr; 460 union hv_connection_id conn_id; 461 int ret = 0; 462 int retries = 0; 463 u32 usec = 1; 464 465 conn_id.asu32 = 0; 466 conn_id.u.id = vmbus_connection.msg_conn_id; 467 468 /* 469 * hv_post_message() can have transient failures because of 470 * insufficient resources. Retry the operation a couple of 471 * times before giving up. 472 */ 473 while (retries < 100) { 474 ret = hv_post_message(conn_id, 1, buffer, buflen); 475 476 switch (ret) { 477 case HV_STATUS_INVALID_CONNECTION_ID: 478 /* 479 * See vmbus_negotiate_version(): VMBus protocol 5.0 480 * and higher require that we must use 481 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate 482 * Contact message, but on old hosts that only 483 * support VMBus protocol 4.0 or lower, here we get 484 * HV_STATUS_INVALID_CONNECTION_ID and we should 485 * return an error immediately without retrying. 486 */ 487 hdr = buffer; 488 if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT) 489 return -EINVAL; 490 /* 491 * We could get this if we send messages too 492 * frequently. 493 */ 494 ret = -EAGAIN; 495 break; 496 case HV_STATUS_INSUFFICIENT_MEMORY: 497 case HV_STATUS_INSUFFICIENT_BUFFERS: 498 ret = -ENOBUFS; 499 break; 500 case HV_STATUS_SUCCESS: 501 return ret; 502 default: 503 pr_err("hv_post_msg() failed; error code:%d\n", ret); 504 return -EINVAL; 505 } 506 507 retries++; 508 if (can_sleep && usec > 1000) 509 msleep(usec / 1000); 510 else if (usec < MAX_UDELAY_MS * 1000) 511 udelay(usec); 512 else 513 mdelay(usec / 1000); 514 515 if (retries < 22) 516 usec *= 2; 517 } 518 return ret; 519 } 520 521 /* 522 * vmbus_set_event - Send an event notification to the parent 523 */ 524 void vmbus_set_event(struct vmbus_channel *channel) 525 { 526 u32 child_relid = channel->offermsg.child_relid; 527 528 if (!channel->is_dedicated_interrupt) 529 vmbus_send_interrupt(child_relid); 530 531 ++channel->sig_events; 532 533 if (hv_isolation_type_snp()) 534 hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event, 535 NULL, sizeof(channel->sig_event)); 536 else 537 hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); 538 } 539 EXPORT_SYMBOL_GPL(vmbus_set_event); 540