1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (c) 2009, Microsoft Corporation. 5 * 6 * Authors: 7 * Haiyang Zhang <haiyangz@microsoft.com> 8 * Hank Janssen <hjanssen@microsoft.com> 9 */ 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/sched.h> 14 #include <linux/wait.h> 15 #include <linux/delay.h> 16 #include <linux/mm.h> 17 #include <linux/slab.h> 18 #include <linux/vmalloc.h> 19 #include <linux/hyperv.h> 20 #include <linux/export.h> 21 #include <asm/mshyperv.h> 22 23 #include "hyperv_vmbus.h" 24 25 26 struct vmbus_connection vmbus_connection = { 27 .conn_state = DISCONNECTED, 28 .next_gpadl_handle = ATOMIC_INIT(0xE1E10), 29 30 .ready_for_suspend_event= COMPLETION_INITIALIZER( 31 vmbus_connection.ready_for_suspend_event), 32 .ready_for_resume_event = COMPLETION_INITIALIZER( 33 vmbus_connection.ready_for_resume_event), 34 }; 35 EXPORT_SYMBOL_GPL(vmbus_connection); 36 37 /* 38 * Negotiated protocol version with the host. 39 */ 40 __u32 vmbus_proto_version; 41 EXPORT_SYMBOL_GPL(vmbus_proto_version); 42 43 static __u32 vmbus_get_next_version(__u32 current_version) 44 { 45 switch (current_version) { 46 case (VERSION_WIN7): 47 return VERSION_WS2008; 48 49 case (VERSION_WIN8): 50 return VERSION_WIN7; 51 52 case (VERSION_WIN8_1): 53 return VERSION_WIN8; 54 55 case (VERSION_WIN10): 56 return VERSION_WIN8_1; 57 58 case (VERSION_WIN10_V5): 59 return VERSION_WIN10; 60 61 case (VERSION_WS2008): 62 default: 63 return VERSION_INVAL; 64 } 65 } 66 67 int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) 68 { 69 int ret = 0; 70 unsigned int cur_cpu; 71 struct vmbus_channel_initiate_contact *msg; 72 unsigned long flags; 73 74 init_completion(&msginfo->waitevent); 75 76 msg = (struct vmbus_channel_initiate_contact *)msginfo->msg; 77 78 memset(msg, 0, sizeof(*msg)); 79 msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT; 80 msg->vmbus_version_requested = version; 81 82 /* 83 * VMBus protocol 5.0 (VERSION_WIN10_V5) requires that we must use 84 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, 85 * and for subsequent messages, we must use the Message Connection ID 86 * field in the host-returned Version Response Message. And, with 87 * VERSION_WIN10_V5, we don't use msg->interrupt_page, but we tell 88 * the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for 89 * compatibility. 90 * 91 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1). 92 */ 93 if (version >= VERSION_WIN10_V5) { 94 msg->msg_sint = VMBUS_MESSAGE_SINT; 95 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; 96 } else { 97 msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); 98 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; 99 } 100 101 msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]); 102 msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]); 103 /* 104 * We want all channel messages to be delivered on CPU 0. 105 * This has been the behavior pre-win8. This is not 106 * perf issue and having all channel messages delivered on CPU 0 107 * would be ok. 108 * For post win8 hosts, we support receiving channel messagges on 109 * all the CPUs. This is needed for kexec to work correctly where 110 * the CPU attempting to connect may not be CPU 0. 111 */ 112 if (version >= VERSION_WIN8_1) { 113 cur_cpu = get_cpu(); 114 msg->target_vcpu = hv_cpu_number_to_vp_number(cur_cpu); 115 vmbus_connection.connect_cpu = cur_cpu; 116 put_cpu(); 117 } else { 118 msg->target_vcpu = 0; 119 vmbus_connection.connect_cpu = 0; 120 } 121 122 /* 123 * Add to list before we send the request since we may 124 * receive the response before returning from this routine 125 */ 126 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 127 list_add_tail(&msginfo->msglistentry, 128 &vmbus_connection.chn_msg_list); 129 130 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 131 132 ret = vmbus_post_msg(msg, 133 sizeof(struct vmbus_channel_initiate_contact), 134 true); 135 136 trace_vmbus_negotiate_version(msg, ret); 137 138 if (ret != 0) { 139 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 140 list_del(&msginfo->msglistentry); 141 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, 142 flags); 143 return ret; 144 } 145 146 /* Wait for the connection response */ 147 wait_for_completion(&msginfo->waitevent); 148 149 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 150 list_del(&msginfo->msglistentry); 151 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 152 153 /* Check if successful */ 154 if (msginfo->response.version_response.version_supported) { 155 vmbus_connection.conn_state = CONNECTED; 156 157 if (version >= VERSION_WIN10_V5) 158 vmbus_connection.msg_conn_id = 159 msginfo->response.version_response.msg_conn_id; 160 } else { 161 return -ECONNREFUSED; 162 } 163 164 return ret; 165 } 166 167 /* 168 * vmbus_connect - Sends a connect request on the partition service connection 169 */ 170 int vmbus_connect(void) 171 { 172 int ret = 0; 173 struct vmbus_channel_msginfo *msginfo = NULL; 174 __u32 version; 175 176 /* Initialize the vmbus connection */ 177 vmbus_connection.conn_state = CONNECTING; 178 vmbus_connection.work_queue = create_workqueue("hv_vmbus_con"); 179 if (!vmbus_connection.work_queue) { 180 ret = -ENOMEM; 181 goto cleanup; 182 } 183 184 vmbus_connection.handle_primary_chan_wq = 185 create_workqueue("hv_pri_chan"); 186 if (!vmbus_connection.handle_primary_chan_wq) { 187 ret = -ENOMEM; 188 goto cleanup; 189 } 190 191 vmbus_connection.handle_sub_chan_wq = 192 create_workqueue("hv_sub_chan"); 193 if (!vmbus_connection.handle_sub_chan_wq) { 194 ret = -ENOMEM; 195 goto cleanup; 196 } 197 198 INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); 199 spin_lock_init(&vmbus_connection.channelmsg_lock); 200 201 INIT_LIST_HEAD(&vmbus_connection.chn_list); 202 mutex_init(&vmbus_connection.channel_mutex); 203 204 /* 205 * Setup the vmbus event connection for channel interrupt 206 * abstraction stuff 207 */ 208 vmbus_connection.int_page = 209 (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0); 210 if (vmbus_connection.int_page == NULL) { 211 ret = -ENOMEM; 212 goto cleanup; 213 } 214 215 vmbus_connection.recv_int_page = vmbus_connection.int_page; 216 vmbus_connection.send_int_page = 217 (void *)((unsigned long)vmbus_connection.int_page + 218 (PAGE_SIZE >> 1)); 219 220 /* 221 * Setup the monitor notification facility. The 1st page for 222 * parent->child and the 2nd page for child->parent 223 */ 224 vmbus_connection.monitor_pages[0] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0); 225 vmbus_connection.monitor_pages[1] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0); 226 if ((vmbus_connection.monitor_pages[0] == NULL) || 227 (vmbus_connection.monitor_pages[1] == NULL)) { 228 ret = -ENOMEM; 229 goto cleanup; 230 } 231 232 msginfo = kzalloc(sizeof(*msginfo) + 233 sizeof(struct vmbus_channel_initiate_contact), 234 GFP_KERNEL); 235 if (msginfo == NULL) { 236 ret = -ENOMEM; 237 goto cleanup; 238 } 239 240 /* 241 * Negotiate a compatible VMBUS version number with the 242 * host. We start with the highest number we can support 243 * and work our way down until we negotiate a compatible 244 * version. 245 */ 246 247 version = VERSION_CURRENT; 248 249 do { 250 ret = vmbus_negotiate_version(msginfo, version); 251 if (ret == -ETIMEDOUT) 252 goto cleanup; 253 254 if (vmbus_connection.conn_state == CONNECTED) 255 break; 256 257 version = vmbus_get_next_version(version); 258 } while (version != VERSION_INVAL); 259 260 if (version == VERSION_INVAL) 261 goto cleanup; 262 263 vmbus_proto_version = version; 264 pr_info("Vmbus version:%d.%d\n", 265 version >> 16, version & 0xFFFF); 266 267 kfree(msginfo); 268 return 0; 269 270 cleanup: 271 pr_err("Unable to connect to host\n"); 272 273 vmbus_connection.conn_state = DISCONNECTED; 274 vmbus_disconnect(); 275 276 kfree(msginfo); 277 278 return ret; 279 } 280 281 void vmbus_disconnect(void) 282 { 283 /* 284 * First send the unload request to the host. 285 */ 286 vmbus_initiate_unload(false); 287 288 if (vmbus_connection.handle_sub_chan_wq) 289 destroy_workqueue(vmbus_connection.handle_sub_chan_wq); 290 291 if (vmbus_connection.handle_primary_chan_wq) 292 destroy_workqueue(vmbus_connection.handle_primary_chan_wq); 293 294 if (vmbus_connection.work_queue) 295 destroy_workqueue(vmbus_connection.work_queue); 296 297 if (vmbus_connection.int_page) { 298 free_pages((unsigned long)vmbus_connection.int_page, 0); 299 vmbus_connection.int_page = NULL; 300 } 301 302 free_pages((unsigned long)vmbus_connection.monitor_pages[0], 0); 303 free_pages((unsigned long)vmbus_connection.monitor_pages[1], 0); 304 vmbus_connection.monitor_pages[0] = NULL; 305 vmbus_connection.monitor_pages[1] = NULL; 306 } 307 308 /* 309 * relid2channel - Get the channel object given its 310 * child relative id (ie channel id) 311 */ 312 struct vmbus_channel *relid2channel(u32 relid) 313 { 314 struct vmbus_channel *channel; 315 struct vmbus_channel *found_channel = NULL; 316 struct list_head *cur, *tmp; 317 struct vmbus_channel *cur_sc; 318 319 BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); 320 321 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 322 if (channel->offermsg.child_relid == relid) { 323 found_channel = channel; 324 break; 325 } else if (!list_empty(&channel->sc_list)) { 326 /* 327 * Deal with sub-channels. 328 */ 329 list_for_each_safe(cur, tmp, &channel->sc_list) { 330 cur_sc = list_entry(cur, struct vmbus_channel, 331 sc_list); 332 if (cur_sc->offermsg.child_relid == relid) { 333 found_channel = cur_sc; 334 break; 335 } 336 } 337 } 338 } 339 340 return found_channel; 341 } 342 343 /* 344 * vmbus_on_event - Process a channel event notification 345 * 346 * For batched channels (default) optimize host to guest signaling 347 * by ensuring: 348 * 1. While reading the channel, we disable interrupts from host. 349 * 2. Ensure that we process all posted messages from the host 350 * before returning from this callback. 351 * 3. Once we return, enable signaling from the host. Once this 352 * state is set we check to see if additional packets are 353 * available to read. In this case we repeat the process. 354 * If this tasklet has been running for a long time 355 * then reschedule ourselves. 356 */ 357 void vmbus_on_event(unsigned long data) 358 { 359 struct vmbus_channel *channel = (void *) data; 360 unsigned long time_limit = jiffies + 2; 361 362 trace_vmbus_on_event(channel); 363 364 do { 365 void (*callback_fn)(void *); 366 367 /* A channel once created is persistent even when 368 * there is no driver handling the device. An 369 * unloading driver sets the onchannel_callback to NULL. 370 */ 371 callback_fn = READ_ONCE(channel->onchannel_callback); 372 if (unlikely(callback_fn == NULL)) 373 return; 374 375 (*callback_fn)(channel->channel_callback_context); 376 377 if (channel->callback_mode != HV_CALL_BATCHED) 378 return; 379 380 if (likely(hv_end_read(&channel->inbound) == 0)) 381 return; 382 383 hv_begin_read(&channel->inbound); 384 } while (likely(time_before(jiffies, time_limit))); 385 386 /* The time limit (2 jiffies) has been reached */ 387 tasklet_schedule(&channel->callback_event); 388 } 389 390 /* 391 * vmbus_post_msg - Send a msg on the vmbus's message connection 392 */ 393 int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) 394 { 395 struct vmbus_channel_message_header *hdr; 396 union hv_connection_id conn_id; 397 int ret = 0; 398 int retries = 0; 399 u32 usec = 1; 400 401 conn_id.asu32 = 0; 402 conn_id.u.id = vmbus_connection.msg_conn_id; 403 404 /* 405 * hv_post_message() can have transient failures because of 406 * insufficient resources. Retry the operation a couple of 407 * times before giving up. 408 */ 409 while (retries < 100) { 410 ret = hv_post_message(conn_id, 1, buffer, buflen); 411 412 switch (ret) { 413 case HV_STATUS_INVALID_CONNECTION_ID: 414 /* 415 * See vmbus_negotiate_version(): VMBus protocol 5.0 416 * requires that we must use 417 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate 418 * Contact message, but on old hosts that only 419 * support VMBus protocol 4.0 or lower, here we get 420 * HV_STATUS_INVALID_CONNECTION_ID and we should 421 * return an error immediately without retrying. 422 */ 423 hdr = buffer; 424 if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT) 425 return -EINVAL; 426 /* 427 * We could get this if we send messages too 428 * frequently. 429 */ 430 ret = -EAGAIN; 431 break; 432 case HV_STATUS_INSUFFICIENT_MEMORY: 433 case HV_STATUS_INSUFFICIENT_BUFFERS: 434 ret = -ENOBUFS; 435 break; 436 case HV_STATUS_SUCCESS: 437 return ret; 438 default: 439 pr_err("hv_post_msg() failed; error code:%d\n", ret); 440 return -EINVAL; 441 } 442 443 retries++; 444 if (can_sleep && usec > 1000) 445 msleep(usec / 1000); 446 else if (usec < MAX_UDELAY_MS * 1000) 447 udelay(usec); 448 else 449 mdelay(usec / 1000); 450 451 if (retries < 22) 452 usec *= 2; 453 } 454 return ret; 455 } 456 457 /* 458 * vmbus_set_event - Send an event notification to the parent 459 */ 460 void vmbus_set_event(struct vmbus_channel *channel) 461 { 462 u32 child_relid = channel->offermsg.child_relid; 463 464 if (!channel->is_dedicated_interrupt) 465 vmbus_send_interrupt(child_relid); 466 467 ++channel->sig_events; 468 469 hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); 470 } 471 EXPORT_SYMBOL_GPL(vmbus_set_event); 472