1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (c) 2009, Microsoft Corporation. 5 * 6 * Authors: 7 * Haiyang Zhang <haiyangz@microsoft.com> 8 * Hank Janssen <hjanssen@microsoft.com> 9 */ 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/sched.h> 14 #include <linux/wait.h> 15 #include <linux/delay.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/slab.h> 19 #include <linux/vmalloc.h> 20 #include <linux/hyperv.h> 21 #include <linux/export.h> 22 #include <linux/io.h> 23 #include <linux/set_memory.h> 24 #include <asm/mshyperv.h> 25 26 #include "hyperv_vmbus.h" 27 28 29 struct vmbus_connection vmbus_connection = { 30 .conn_state = DISCONNECTED, 31 .unload_event = COMPLETION_INITIALIZER( 32 vmbus_connection.unload_event), 33 .next_gpadl_handle = ATOMIC_INIT(0xE1E10), 34 35 .ready_for_suspend_event = COMPLETION_INITIALIZER( 36 vmbus_connection.ready_for_suspend_event), 37 .ready_for_resume_event = COMPLETION_INITIALIZER( 38 vmbus_connection.ready_for_resume_event), 39 }; 40 EXPORT_SYMBOL_GPL(vmbus_connection); 41 42 /* 43 * Negotiated protocol version with the host. 44 */ 45 __u32 vmbus_proto_version; 46 EXPORT_SYMBOL_GPL(vmbus_proto_version); 47 48 /* 49 * Table of VMBus versions listed from newest to oldest. 50 * VERSION_WIN7 and VERSION_WS2008 are no longer supported in 51 * Linux guests and are not listed. 52 */ 53 static __u32 vmbus_versions[] = { 54 VERSION_WIN10_V5_3, 55 VERSION_WIN10_V5_2, 56 VERSION_WIN10_V5_1, 57 VERSION_WIN10_V5, 58 VERSION_WIN10_V4_1, 59 VERSION_WIN10, 60 VERSION_WIN8_1, 61 VERSION_WIN8 62 }; 63 64 /* 65 * Maximal VMBus protocol version guests can negotiate. Useful to cap the 66 * VMBus version for testing and debugging purpose. 67 */ 68 static uint max_version = VERSION_WIN10_V5_3; 69 70 module_param(max_version, uint, S_IRUGO); 71 MODULE_PARM_DESC(max_version, 72 "Maximal VMBus protocol version which can be negotiated"); 73 74 int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) 75 { 76 int ret = 0; 77 struct vmbus_channel_initiate_contact *msg; 78 unsigned long flags; 79 80 init_completion(&msginfo->waitevent); 81 82 msg = (struct vmbus_channel_initiate_contact *)msginfo->msg; 83 84 memset(msg, 0, sizeof(*msg)); 85 msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT; 86 msg->vmbus_version_requested = version; 87 88 /* 89 * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must 90 * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, 91 * and for subsequent messages, we must use the Message Connection ID 92 * field in the host-returned Version Response Message. And, with 93 * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we 94 * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for 95 * compatibility. 96 * 97 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1). 98 */ 99 if (version >= VERSION_WIN10_V5) { 100 msg->msg_sint = VMBUS_MESSAGE_SINT; 101 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; 102 } else { 103 msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); 104 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; 105 } 106 107 /* 108 * shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always 109 * bitwise OR it 110 */ 111 msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]) | 112 ms_hyperv.shared_gpa_boundary; 113 msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]) | 114 ms_hyperv.shared_gpa_boundary; 115 116 msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU); 117 118 /* 119 * Add to list before we send the request since we may 120 * receive the response before returning from this routine 121 */ 122 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 123 list_add_tail(&msginfo->msglistentry, 124 &vmbus_connection.chn_msg_list); 125 126 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 127 128 ret = vmbus_post_msg(msg, 129 sizeof(struct vmbus_channel_initiate_contact), 130 true); 131 132 trace_vmbus_negotiate_version(msg, ret); 133 134 if (ret != 0) { 135 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 136 list_del(&msginfo->msglistentry); 137 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, 138 flags); 139 return ret; 140 } 141 142 /* Wait for the connection response */ 143 wait_for_completion(&msginfo->waitevent); 144 145 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 146 list_del(&msginfo->msglistentry); 147 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 148 149 /* Check if successful */ 150 if (msginfo->response.version_response.version_supported) { 151 vmbus_connection.conn_state = CONNECTED; 152 153 if (version >= VERSION_WIN10_V5) 154 vmbus_connection.msg_conn_id = 155 msginfo->response.version_response.msg_conn_id; 156 } else { 157 return -ECONNREFUSED; 158 } 159 160 return ret; 161 } 162 163 /* 164 * vmbus_connect - Sends a connect request on the partition service connection 165 */ 166 int vmbus_connect(void) 167 { 168 struct vmbus_channel_msginfo *msginfo = NULL; 169 int i, ret = 0; 170 __u32 version; 171 172 /* Initialize the vmbus connection */ 173 vmbus_connection.conn_state = CONNECTING; 174 vmbus_connection.work_queue = create_workqueue("hv_vmbus_con"); 175 if (!vmbus_connection.work_queue) { 176 ret = -ENOMEM; 177 goto cleanup; 178 } 179 180 vmbus_connection.rescind_work_queue = 181 create_workqueue("hv_vmbus_rescind"); 182 if (!vmbus_connection.rescind_work_queue) { 183 ret = -ENOMEM; 184 goto cleanup; 185 } 186 vmbus_connection.ignore_any_offer_msg = false; 187 188 vmbus_connection.handle_primary_chan_wq = 189 create_workqueue("hv_pri_chan"); 190 if (!vmbus_connection.handle_primary_chan_wq) { 191 ret = -ENOMEM; 192 goto cleanup; 193 } 194 195 vmbus_connection.handle_sub_chan_wq = 196 create_workqueue("hv_sub_chan"); 197 if (!vmbus_connection.handle_sub_chan_wq) { 198 ret = -ENOMEM; 199 goto cleanup; 200 } 201 202 INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); 203 spin_lock_init(&vmbus_connection.channelmsg_lock); 204 205 INIT_LIST_HEAD(&vmbus_connection.chn_list); 206 mutex_init(&vmbus_connection.channel_mutex); 207 208 /* 209 * Setup the vmbus event connection for channel interrupt 210 * abstraction stuff 211 */ 212 vmbus_connection.int_page = hv_alloc_hyperv_zeroed_page(); 213 if (vmbus_connection.int_page == NULL) { 214 ret = -ENOMEM; 215 goto cleanup; 216 } 217 218 vmbus_connection.recv_int_page = vmbus_connection.int_page; 219 vmbus_connection.send_int_page = 220 (void *)((unsigned long)vmbus_connection.int_page + 221 (HV_HYP_PAGE_SIZE >> 1)); 222 223 /* 224 * Setup the monitor notification facility. The 1st page for 225 * parent->child and the 2nd page for child->parent 226 */ 227 vmbus_connection.monitor_pages[0] = hv_alloc_hyperv_page(); 228 vmbus_connection.monitor_pages[1] = hv_alloc_hyperv_page(); 229 if ((vmbus_connection.monitor_pages[0] == NULL) || 230 (vmbus_connection.monitor_pages[1] == NULL)) { 231 ret = -ENOMEM; 232 goto cleanup; 233 } 234 235 ret = set_memory_decrypted((unsigned long) 236 vmbus_connection.monitor_pages[0], 1); 237 ret |= set_memory_decrypted((unsigned long) 238 vmbus_connection.monitor_pages[1], 1); 239 if (ret) 240 goto cleanup; 241 242 /* 243 * Set_memory_decrypted() will change the memory contents if 244 * decryption occurs, so zero monitor pages here. 245 */ 246 memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE); 247 memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE); 248 249 msginfo = kzalloc(sizeof(*msginfo) + 250 sizeof(struct vmbus_channel_initiate_contact), 251 GFP_KERNEL); 252 if (msginfo == NULL) { 253 ret = -ENOMEM; 254 goto cleanup; 255 } 256 257 /* 258 * Negotiate a compatible VMBUS version number with the 259 * host. We start with the highest number we can support 260 * and work our way down until we negotiate a compatible 261 * version. 262 */ 263 264 for (i = 0; ; i++) { 265 if (i == ARRAY_SIZE(vmbus_versions)) { 266 ret = -EDOM; 267 goto cleanup; 268 } 269 270 version = vmbus_versions[i]; 271 if (version > max_version) 272 continue; 273 274 ret = vmbus_negotiate_version(msginfo, version); 275 if (ret == -ETIMEDOUT) 276 goto cleanup; 277 278 if (vmbus_connection.conn_state == CONNECTED) 279 break; 280 } 281 282 if (hv_is_isolation_supported() && version < VERSION_WIN10_V5_2) { 283 pr_err("Invalid VMBus version %d.%d (expected >= %d.%d) from the host supporting isolation\n", 284 version >> 16, version & 0xFFFF, VERSION_WIN10_V5_2 >> 16, VERSION_WIN10_V5_2 & 0xFFFF); 285 ret = -EINVAL; 286 goto cleanup; 287 } 288 289 vmbus_proto_version = version; 290 pr_info("Vmbus version:%d.%d\n", 291 version >> 16, version & 0xFFFF); 292 293 vmbus_connection.channels = kcalloc(MAX_CHANNEL_RELIDS, 294 sizeof(struct vmbus_channel *), 295 GFP_KERNEL); 296 if (vmbus_connection.channels == NULL) { 297 ret = -ENOMEM; 298 goto cleanup; 299 } 300 301 kfree(msginfo); 302 return 0; 303 304 cleanup: 305 pr_err("Unable to connect to host\n"); 306 307 vmbus_connection.conn_state = DISCONNECTED; 308 vmbus_disconnect(); 309 310 kfree(msginfo); 311 312 return ret; 313 } 314 315 void vmbus_disconnect(void) 316 { 317 /* 318 * First send the unload request to the host. 319 */ 320 vmbus_initiate_unload(false); 321 322 if (vmbus_connection.handle_sub_chan_wq) 323 destroy_workqueue(vmbus_connection.handle_sub_chan_wq); 324 325 if (vmbus_connection.handle_primary_chan_wq) 326 destroy_workqueue(vmbus_connection.handle_primary_chan_wq); 327 328 if (vmbus_connection.rescind_work_queue) 329 destroy_workqueue(vmbus_connection.rescind_work_queue); 330 331 if (vmbus_connection.work_queue) 332 destroy_workqueue(vmbus_connection.work_queue); 333 334 if (vmbus_connection.int_page) { 335 hv_free_hyperv_page(vmbus_connection.int_page); 336 vmbus_connection.int_page = NULL; 337 } 338 339 set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1); 340 set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1); 341 342 hv_free_hyperv_page(vmbus_connection.monitor_pages[0]); 343 hv_free_hyperv_page(vmbus_connection.monitor_pages[1]); 344 vmbus_connection.monitor_pages[0] = NULL; 345 vmbus_connection.monitor_pages[1] = NULL; 346 } 347 348 /* 349 * relid2channel - Get the channel object given its 350 * child relative id (ie channel id) 351 */ 352 struct vmbus_channel *relid2channel(u32 relid) 353 { 354 if (vmbus_connection.channels == NULL) { 355 pr_warn_once("relid2channel: relid=%d: No channels mapped!\n", relid); 356 return NULL; 357 } 358 if (WARN_ON(relid >= MAX_CHANNEL_RELIDS)) 359 return NULL; 360 return READ_ONCE(vmbus_connection.channels[relid]); 361 } 362 363 /* 364 * vmbus_on_event - Process a channel event notification 365 * 366 * For batched channels (default) optimize host to guest signaling 367 * by ensuring: 368 * 1. While reading the channel, we disable interrupts from host. 369 * 2. Ensure that we process all posted messages from the host 370 * before returning from this callback. 371 * 3. Once we return, enable signaling from the host. Once this 372 * state is set we check to see if additional packets are 373 * available to read. In this case we repeat the process. 374 * If this tasklet has been running for a long time 375 * then reschedule ourselves. 376 */ 377 void vmbus_on_event(unsigned long data) 378 { 379 struct vmbus_channel *channel = (void *) data; 380 void (*callback_fn)(void *context); 381 382 trace_vmbus_on_event(channel); 383 384 hv_debug_delay_test(channel, INTERRUPT_DELAY); 385 386 /* A channel once created is persistent even when 387 * there is no driver handling the device. An 388 * unloading driver sets the onchannel_callback to NULL. 389 */ 390 callback_fn = READ_ONCE(channel->onchannel_callback); 391 if (unlikely(!callback_fn)) 392 return; 393 394 (*callback_fn)(channel->channel_callback_context); 395 396 if (channel->callback_mode != HV_CALL_BATCHED) 397 return; 398 399 if (likely(hv_end_read(&channel->inbound) == 0)) 400 return; 401 402 hv_begin_read(&channel->inbound); 403 tasklet_schedule(&channel->callback_event); 404 } 405 406 /* 407 * vmbus_post_msg - Send a msg on the vmbus's message connection 408 */ 409 int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) 410 { 411 struct vmbus_channel_message_header *hdr; 412 union hv_connection_id conn_id; 413 int ret = 0; 414 int retries = 0; 415 u32 usec = 1; 416 417 conn_id.asu32 = 0; 418 conn_id.u.id = vmbus_connection.msg_conn_id; 419 420 /* 421 * hv_post_message() can have transient failures because of 422 * insufficient resources. Retry the operation a couple of 423 * times before giving up. 424 */ 425 while (retries < 100) { 426 ret = hv_post_message(conn_id, 1, buffer, buflen); 427 428 switch (ret) { 429 case HV_STATUS_INVALID_CONNECTION_ID: 430 /* 431 * See vmbus_negotiate_version(): VMBus protocol 5.0 432 * and higher require that we must use 433 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate 434 * Contact message, but on old hosts that only 435 * support VMBus protocol 4.0 or lower, here we get 436 * HV_STATUS_INVALID_CONNECTION_ID and we should 437 * return an error immediately without retrying. 438 */ 439 hdr = buffer; 440 if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT) 441 return -EINVAL; 442 /* 443 * We could get this if we send messages too 444 * frequently. 445 */ 446 ret = -EAGAIN; 447 break; 448 case HV_STATUS_INSUFFICIENT_MEMORY: 449 case HV_STATUS_INSUFFICIENT_BUFFERS: 450 ret = -ENOBUFS; 451 break; 452 case HV_STATUS_SUCCESS: 453 return ret; 454 default: 455 pr_err("hv_post_msg() failed; error code:%d\n", ret); 456 return -EINVAL; 457 } 458 459 retries++; 460 if (can_sleep && usec > 1000) 461 msleep(usec / 1000); 462 else if (usec < MAX_UDELAY_MS * 1000) 463 udelay(usec); 464 else 465 mdelay(usec / 1000); 466 467 if (retries < 22) 468 usec *= 2; 469 } 470 return ret; 471 } 472 473 /* 474 * vmbus_set_event - Send an event notification to the parent 475 */ 476 void vmbus_set_event(struct vmbus_channel *channel) 477 { 478 u32 child_relid = channel->offermsg.child_relid; 479 480 if (!channel->is_dedicated_interrupt) 481 vmbus_send_interrupt(child_relid); 482 483 ++channel->sig_events; 484 485 if (hv_isolation_type_snp()) 486 hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event, 487 NULL, sizeof(channel->sig_event)); 488 else 489 hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); 490 } 491 EXPORT_SYMBOL_GPL(vmbus_set_event); 492