1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (c) 2009, Microsoft Corporation. 5 * 6 * Authors: 7 * Haiyang Zhang <haiyangz@microsoft.com> 8 * Hank Janssen <hjanssen@microsoft.com> 9 */ 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/sched.h> 14 #include <linux/wait.h> 15 #include <linux/delay.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/slab.h> 19 #include <linux/vmalloc.h> 20 #include <linux/hyperv.h> 21 #include <linux/export.h> 22 #include <asm/mshyperv.h> 23 24 #include "hyperv_vmbus.h" 25 26 27 struct vmbus_connection vmbus_connection = { 28 .conn_state = DISCONNECTED, 29 .next_gpadl_handle = ATOMIC_INIT(0xE1E10), 30 31 .ready_for_suspend_event= COMPLETION_INITIALIZER( 32 vmbus_connection.ready_for_suspend_event), 33 .ready_for_resume_event = COMPLETION_INITIALIZER( 34 vmbus_connection.ready_for_resume_event), 35 }; 36 EXPORT_SYMBOL_GPL(vmbus_connection); 37 38 /* 39 * Negotiated protocol version with the host. 40 */ 41 __u32 vmbus_proto_version; 42 EXPORT_SYMBOL_GPL(vmbus_proto_version); 43 44 /* 45 * Table of VMBus versions listed from newest to oldest. 46 */ 47 static __u32 vmbus_versions[] = { 48 VERSION_WIN10_V5_2, 49 VERSION_WIN10_V5_1, 50 VERSION_WIN10_V5, 51 VERSION_WIN10_V4_1, 52 VERSION_WIN10, 53 VERSION_WIN8_1, 54 VERSION_WIN8, 55 VERSION_WIN7, 56 VERSION_WS2008 57 }; 58 59 /* 60 * Maximal VMBus protocol version guests can negotiate. Useful to cap the 61 * VMBus version for testing and debugging purpose. 62 */ 63 static uint max_version = VERSION_WIN10_V5_2; 64 65 module_param(max_version, uint, S_IRUGO); 66 MODULE_PARM_DESC(max_version, 67 "Maximal VMBus protocol version which can be negotiated"); 68 69 int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) 70 { 71 int ret = 0; 72 unsigned int cur_cpu; 73 struct vmbus_channel_initiate_contact *msg; 74 unsigned long flags; 75 76 init_completion(&msginfo->waitevent); 77 78 msg = (struct vmbus_channel_initiate_contact *)msginfo->msg; 79 80 memset(msg, 0, sizeof(*msg)); 81 msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT; 82 msg->vmbus_version_requested = version; 83 84 /* 85 * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must 86 * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, 87 * and for subsequent messages, we must use the Message Connection ID 88 * field in the host-returned Version Response Message. And, with 89 * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we 90 * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for 91 * compatibility. 92 * 93 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1). 94 */ 95 if (version >= VERSION_WIN10_V5) { 96 msg->msg_sint = VMBUS_MESSAGE_SINT; 97 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; 98 } else { 99 msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); 100 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; 101 } 102 103 msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]); 104 msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]); 105 /* 106 * We want all channel messages to be delivered on CPU 0. 107 * This has been the behavior pre-win8. This is not 108 * perf issue and having all channel messages delivered on CPU 0 109 * would be ok. 110 * For post win8 hosts, we support receiving channel messagges on 111 * all the CPUs. This is needed for kexec to work correctly where 112 * the CPU attempting to connect may not be CPU 0. 113 */ 114 if (version >= VERSION_WIN8_1) { 115 cur_cpu = get_cpu(); 116 msg->target_vcpu = hv_cpu_number_to_vp_number(cur_cpu); 117 vmbus_connection.connect_cpu = cur_cpu; 118 put_cpu(); 119 } else { 120 msg->target_vcpu = 0; 121 vmbus_connection.connect_cpu = 0; 122 } 123 124 /* 125 * Add to list before we send the request since we may 126 * receive the response before returning from this routine 127 */ 128 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 129 list_add_tail(&msginfo->msglistentry, 130 &vmbus_connection.chn_msg_list); 131 132 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 133 134 ret = vmbus_post_msg(msg, 135 sizeof(struct vmbus_channel_initiate_contact), 136 true); 137 138 trace_vmbus_negotiate_version(msg, ret); 139 140 if (ret != 0) { 141 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 142 list_del(&msginfo->msglistentry); 143 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, 144 flags); 145 return ret; 146 } 147 148 /* Wait for the connection response */ 149 wait_for_completion(&msginfo->waitevent); 150 151 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 152 list_del(&msginfo->msglistentry); 153 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 154 155 /* Check if successful */ 156 if (msginfo->response.version_response.version_supported) { 157 vmbus_connection.conn_state = CONNECTED; 158 159 if (version >= VERSION_WIN10_V5) 160 vmbus_connection.msg_conn_id = 161 msginfo->response.version_response.msg_conn_id; 162 } else { 163 return -ECONNREFUSED; 164 } 165 166 return ret; 167 } 168 169 /* 170 * vmbus_connect - Sends a connect request on the partition service connection 171 */ 172 int vmbus_connect(void) 173 { 174 struct vmbus_channel_msginfo *msginfo = NULL; 175 int i, ret = 0; 176 __u32 version; 177 178 /* Initialize the vmbus connection */ 179 vmbus_connection.conn_state = CONNECTING; 180 vmbus_connection.work_queue = create_workqueue("hv_vmbus_con"); 181 if (!vmbus_connection.work_queue) { 182 ret = -ENOMEM; 183 goto cleanup; 184 } 185 186 vmbus_connection.handle_primary_chan_wq = 187 create_workqueue("hv_pri_chan"); 188 if (!vmbus_connection.handle_primary_chan_wq) { 189 ret = -ENOMEM; 190 goto cleanup; 191 } 192 193 vmbus_connection.handle_sub_chan_wq = 194 create_workqueue("hv_sub_chan"); 195 if (!vmbus_connection.handle_sub_chan_wq) { 196 ret = -ENOMEM; 197 goto cleanup; 198 } 199 200 INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); 201 spin_lock_init(&vmbus_connection.channelmsg_lock); 202 203 INIT_LIST_HEAD(&vmbus_connection.chn_list); 204 mutex_init(&vmbus_connection.channel_mutex); 205 206 /* 207 * Setup the vmbus event connection for channel interrupt 208 * abstraction stuff 209 */ 210 vmbus_connection.int_page = 211 (void *)hv_alloc_hyperv_zeroed_page(); 212 if (vmbus_connection.int_page == NULL) { 213 ret = -ENOMEM; 214 goto cleanup; 215 } 216 217 vmbus_connection.recv_int_page = vmbus_connection.int_page; 218 vmbus_connection.send_int_page = 219 (void *)((unsigned long)vmbus_connection.int_page + 220 (HV_HYP_PAGE_SIZE >> 1)); 221 222 /* 223 * Setup the monitor notification facility. The 1st page for 224 * parent->child and the 2nd page for child->parent 225 */ 226 vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_zeroed_page(); 227 vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_zeroed_page(); 228 if ((vmbus_connection.monitor_pages[0] == NULL) || 229 (vmbus_connection.monitor_pages[1] == NULL)) { 230 ret = -ENOMEM; 231 goto cleanup; 232 } 233 234 msginfo = kzalloc(sizeof(*msginfo) + 235 sizeof(struct vmbus_channel_initiate_contact), 236 GFP_KERNEL); 237 if (msginfo == NULL) { 238 ret = -ENOMEM; 239 goto cleanup; 240 } 241 242 /* 243 * Negotiate a compatible VMBUS version number with the 244 * host. We start with the highest number we can support 245 * and work our way down until we negotiate a compatible 246 * version. 247 */ 248 249 for (i = 0; ; i++) { 250 if (i == ARRAY_SIZE(vmbus_versions)) 251 goto cleanup; 252 253 version = vmbus_versions[i]; 254 if (version > max_version) 255 continue; 256 257 ret = vmbus_negotiate_version(msginfo, version); 258 if (ret == -ETIMEDOUT) 259 goto cleanup; 260 261 if (vmbus_connection.conn_state == CONNECTED) 262 break; 263 } 264 265 vmbus_proto_version = version; 266 pr_info("Vmbus version:%d.%d\n", 267 version >> 16, version & 0xFFFF); 268 269 kfree(msginfo); 270 return 0; 271 272 cleanup: 273 pr_err("Unable to connect to host\n"); 274 275 vmbus_connection.conn_state = DISCONNECTED; 276 vmbus_disconnect(); 277 278 kfree(msginfo); 279 280 return ret; 281 } 282 283 void vmbus_disconnect(void) 284 { 285 /* 286 * First send the unload request to the host. 287 */ 288 vmbus_initiate_unload(false); 289 290 if (vmbus_connection.handle_sub_chan_wq) 291 destroy_workqueue(vmbus_connection.handle_sub_chan_wq); 292 293 if (vmbus_connection.handle_primary_chan_wq) 294 destroy_workqueue(vmbus_connection.handle_primary_chan_wq); 295 296 if (vmbus_connection.work_queue) 297 destroy_workqueue(vmbus_connection.work_queue); 298 299 if (vmbus_connection.int_page) { 300 hv_free_hyperv_page((unsigned long)vmbus_connection.int_page); 301 vmbus_connection.int_page = NULL; 302 } 303 304 hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]); 305 hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]); 306 vmbus_connection.monitor_pages[0] = NULL; 307 vmbus_connection.monitor_pages[1] = NULL; 308 } 309 310 /* 311 * relid2channel - Get the channel object given its 312 * child relative id (ie channel id) 313 */ 314 struct vmbus_channel *relid2channel(u32 relid) 315 { 316 struct vmbus_channel *channel; 317 struct vmbus_channel *found_channel = NULL; 318 struct list_head *cur, *tmp; 319 struct vmbus_channel *cur_sc; 320 321 BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); 322 323 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 324 if (channel->offermsg.child_relid == relid) { 325 found_channel = channel; 326 break; 327 } else if (!list_empty(&channel->sc_list)) { 328 /* 329 * Deal with sub-channels. 330 */ 331 list_for_each_safe(cur, tmp, &channel->sc_list) { 332 cur_sc = list_entry(cur, struct vmbus_channel, 333 sc_list); 334 if (cur_sc->offermsg.child_relid == relid) { 335 found_channel = cur_sc; 336 break; 337 } 338 } 339 } 340 } 341 342 return found_channel; 343 } 344 345 /* 346 * vmbus_on_event - Process a channel event notification 347 * 348 * For batched channels (default) optimize host to guest signaling 349 * by ensuring: 350 * 1. While reading the channel, we disable interrupts from host. 351 * 2. Ensure that we process all posted messages from the host 352 * before returning from this callback. 353 * 3. Once we return, enable signaling from the host. Once this 354 * state is set we check to see if additional packets are 355 * available to read. In this case we repeat the process. 356 * If this tasklet has been running for a long time 357 * then reschedule ourselves. 358 */ 359 void vmbus_on_event(unsigned long data) 360 { 361 struct vmbus_channel *channel = (void *) data; 362 unsigned long time_limit = jiffies + 2; 363 364 trace_vmbus_on_event(channel); 365 366 hv_debug_delay_test(channel, INTERRUPT_DELAY); 367 do { 368 void (*callback_fn)(void *); 369 370 /* A channel once created is persistent even when 371 * there is no driver handling the device. An 372 * unloading driver sets the onchannel_callback to NULL. 373 */ 374 callback_fn = READ_ONCE(channel->onchannel_callback); 375 if (unlikely(callback_fn == NULL)) 376 return; 377 378 (*callback_fn)(channel->channel_callback_context); 379 380 if (channel->callback_mode != HV_CALL_BATCHED) 381 return; 382 383 if (likely(hv_end_read(&channel->inbound) == 0)) 384 return; 385 386 hv_begin_read(&channel->inbound); 387 } while (likely(time_before(jiffies, time_limit))); 388 389 /* The time limit (2 jiffies) has been reached */ 390 tasklet_schedule(&channel->callback_event); 391 } 392 393 /* 394 * vmbus_post_msg - Send a msg on the vmbus's message connection 395 */ 396 int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) 397 { 398 struct vmbus_channel_message_header *hdr; 399 union hv_connection_id conn_id; 400 int ret = 0; 401 int retries = 0; 402 u32 usec = 1; 403 404 conn_id.asu32 = 0; 405 conn_id.u.id = vmbus_connection.msg_conn_id; 406 407 /* 408 * hv_post_message() can have transient failures because of 409 * insufficient resources. Retry the operation a couple of 410 * times before giving up. 411 */ 412 while (retries < 100) { 413 ret = hv_post_message(conn_id, 1, buffer, buflen); 414 415 switch (ret) { 416 case HV_STATUS_INVALID_CONNECTION_ID: 417 /* 418 * See vmbus_negotiate_version(): VMBus protocol 5.0 419 * and higher require that we must use 420 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate 421 * Contact message, but on old hosts that only 422 * support VMBus protocol 4.0 or lower, here we get 423 * HV_STATUS_INVALID_CONNECTION_ID and we should 424 * return an error immediately without retrying. 425 */ 426 hdr = buffer; 427 if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT) 428 return -EINVAL; 429 /* 430 * We could get this if we send messages too 431 * frequently. 432 */ 433 ret = -EAGAIN; 434 break; 435 case HV_STATUS_INSUFFICIENT_MEMORY: 436 case HV_STATUS_INSUFFICIENT_BUFFERS: 437 ret = -ENOBUFS; 438 break; 439 case HV_STATUS_SUCCESS: 440 return ret; 441 default: 442 pr_err("hv_post_msg() failed; error code:%d\n", ret); 443 return -EINVAL; 444 } 445 446 retries++; 447 if (can_sleep && usec > 1000) 448 msleep(usec / 1000); 449 else if (usec < MAX_UDELAY_MS * 1000) 450 udelay(usec); 451 else 452 mdelay(usec / 1000); 453 454 if (retries < 22) 455 usec *= 2; 456 } 457 return ret; 458 } 459 460 /* 461 * vmbus_set_event - Send an event notification to the parent 462 */ 463 void vmbus_set_event(struct vmbus_channel *channel) 464 { 465 u32 child_relid = channel->offermsg.child_relid; 466 467 if (!channel->is_dedicated_interrupt) 468 vmbus_send_interrupt(child_relid); 469 470 ++channel->sig_events; 471 472 hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); 473 } 474 EXPORT_SYMBOL_GPL(vmbus_set_event); 475