1 /* 2 * 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 16 * Place - Suite 330, Boston, MA 02111-1307 USA. 17 * 18 * Authors: 19 * Haiyang Zhang <haiyangz@microsoft.com> 20 * Hank Janssen <hjanssen@microsoft.com> 21 * 22 */ 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/kernel.h> 26 #include <linux/sched.h> 27 #include <linux/wait.h> 28 #include <linux/delay.h> 29 #include <linux/mm.h> 30 #include <linux/slab.h> 31 #include <linux/vmalloc.h> 32 #include <linux/hyperv.h> 33 #include <linux/export.h> 34 #include <asm/mshyperv.h> 35 36 #include "hyperv_vmbus.h" 37 38 39 struct vmbus_connection vmbus_connection = { 40 .conn_state = DISCONNECTED, 41 .next_gpadl_handle = ATOMIC_INIT(0xE1E10), 42 }; 43 EXPORT_SYMBOL_GPL(vmbus_connection); 44 45 /* 46 * Negotiated protocol version with the host. 47 */ 48 __u32 vmbus_proto_version; 49 EXPORT_SYMBOL_GPL(vmbus_proto_version); 50 51 static __u32 vmbus_get_next_version(__u32 current_version) 52 { 53 switch (current_version) { 54 case (VERSION_WIN7): 55 return VERSION_WS2008; 56 57 case (VERSION_WIN8): 58 return VERSION_WIN7; 59 60 case (VERSION_WIN8_1): 61 return VERSION_WIN8; 62 63 case (VERSION_WIN10): 64 return VERSION_WIN8_1; 65 66 case (VERSION_WIN10_V5): 67 return VERSION_WIN10; 68 69 case (VERSION_WS2008): 70 default: 71 return VERSION_INVAL; 72 } 73 } 74 75 static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, 76 __u32 version) 77 { 78 int ret = 0; 79 unsigned int cur_cpu; 80 struct vmbus_channel_initiate_contact *msg; 81 unsigned long flags; 82 83 init_completion(&msginfo->waitevent); 84 85 msg = (struct vmbus_channel_initiate_contact *)msginfo->msg; 86 87 memset(msg, 0, sizeof(*msg)); 88 msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT; 89 msg->vmbus_version_requested = version; 90 91 /* 92 * VMBus protocol 5.0 (VERSION_WIN10_V5) requires that we must use 93 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, 94 * and for subsequent messages, we must use the Message Connection ID 95 * field in the host-returned Version Response Message. And, with 96 * VERSION_WIN10_V5, we don't use msg->interrupt_page, but we tell 97 * the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for 98 * compatibility. 99 * 100 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1). 101 */ 102 if (version >= VERSION_WIN10_V5) { 103 msg->msg_sint = VMBUS_MESSAGE_SINT; 104 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; 105 } else { 106 msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); 107 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; 108 } 109 110 msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]); 111 msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]); 112 /* 113 * We want all channel messages to be delivered on CPU 0. 114 * This has been the behavior pre-win8. This is not 115 * perf issue and having all channel messages delivered on CPU 0 116 * would be ok. 117 * For post win8 hosts, we support receiving channel messagges on 118 * all the CPUs. This is needed for kexec to work correctly where 119 * the CPU attempting to connect may not be CPU 0. 120 */ 121 if (version >= VERSION_WIN8_1) { 122 cur_cpu = get_cpu(); 123 msg->target_vcpu = hv_cpu_number_to_vp_number(cur_cpu); 124 vmbus_connection.connect_cpu = cur_cpu; 125 put_cpu(); 126 } else { 127 msg->target_vcpu = 0; 128 vmbus_connection.connect_cpu = 0; 129 } 130 131 /* 132 * Add to list before we send the request since we may 133 * receive the response before returning from this routine 134 */ 135 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 136 list_add_tail(&msginfo->msglistentry, 137 &vmbus_connection.chn_msg_list); 138 139 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 140 141 ret = vmbus_post_msg(msg, 142 sizeof(struct vmbus_channel_initiate_contact), 143 true); 144 145 trace_vmbus_negotiate_version(msg, ret); 146 147 if (ret != 0) { 148 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 149 list_del(&msginfo->msglistentry); 150 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, 151 flags); 152 return ret; 153 } 154 155 /* Wait for the connection response */ 156 wait_for_completion(&msginfo->waitevent); 157 158 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 159 list_del(&msginfo->msglistentry); 160 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 161 162 /* Check if successful */ 163 if (msginfo->response.version_response.version_supported) { 164 vmbus_connection.conn_state = CONNECTED; 165 166 if (version >= VERSION_WIN10_V5) 167 vmbus_connection.msg_conn_id = 168 msginfo->response.version_response.msg_conn_id; 169 } else { 170 return -ECONNREFUSED; 171 } 172 173 return ret; 174 } 175 176 /* 177 * vmbus_connect - Sends a connect request on the partition service connection 178 */ 179 int vmbus_connect(void) 180 { 181 int ret = 0; 182 struct vmbus_channel_msginfo *msginfo = NULL; 183 __u32 version; 184 185 /* Initialize the vmbus connection */ 186 vmbus_connection.conn_state = CONNECTING; 187 vmbus_connection.work_queue = create_workqueue("hv_vmbus_con"); 188 if (!vmbus_connection.work_queue) { 189 ret = -ENOMEM; 190 goto cleanup; 191 } 192 193 vmbus_connection.handle_primary_chan_wq = 194 create_workqueue("hv_pri_chan"); 195 if (!vmbus_connection.handle_primary_chan_wq) { 196 ret = -ENOMEM; 197 goto cleanup; 198 } 199 200 vmbus_connection.handle_sub_chan_wq = 201 create_workqueue("hv_sub_chan"); 202 if (!vmbus_connection.handle_sub_chan_wq) { 203 ret = -ENOMEM; 204 goto cleanup; 205 } 206 207 INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); 208 spin_lock_init(&vmbus_connection.channelmsg_lock); 209 210 INIT_LIST_HEAD(&vmbus_connection.chn_list); 211 mutex_init(&vmbus_connection.channel_mutex); 212 213 /* 214 * Setup the vmbus event connection for channel interrupt 215 * abstraction stuff 216 */ 217 vmbus_connection.int_page = 218 (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0); 219 if (vmbus_connection.int_page == NULL) { 220 ret = -ENOMEM; 221 goto cleanup; 222 } 223 224 vmbus_connection.recv_int_page = vmbus_connection.int_page; 225 vmbus_connection.send_int_page = 226 (void *)((unsigned long)vmbus_connection.int_page + 227 (PAGE_SIZE >> 1)); 228 229 /* 230 * Setup the monitor notification facility. The 1st page for 231 * parent->child and the 2nd page for child->parent 232 */ 233 vmbus_connection.monitor_pages[0] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0); 234 vmbus_connection.monitor_pages[1] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0); 235 if ((vmbus_connection.monitor_pages[0] == NULL) || 236 (vmbus_connection.monitor_pages[1] == NULL)) { 237 ret = -ENOMEM; 238 goto cleanup; 239 } 240 241 msginfo = kzalloc(sizeof(*msginfo) + 242 sizeof(struct vmbus_channel_initiate_contact), 243 GFP_KERNEL); 244 if (msginfo == NULL) { 245 ret = -ENOMEM; 246 goto cleanup; 247 } 248 249 /* 250 * Negotiate a compatible VMBUS version number with the 251 * host. We start with the highest number we can support 252 * and work our way down until we negotiate a compatible 253 * version. 254 */ 255 256 version = VERSION_CURRENT; 257 258 do { 259 ret = vmbus_negotiate_version(msginfo, version); 260 if (ret == -ETIMEDOUT) 261 goto cleanup; 262 263 if (vmbus_connection.conn_state == CONNECTED) 264 break; 265 266 version = vmbus_get_next_version(version); 267 } while (version != VERSION_INVAL); 268 269 if (version == VERSION_INVAL) 270 goto cleanup; 271 272 vmbus_proto_version = version; 273 pr_info("Vmbus version:%d.%d\n", 274 version >> 16, version & 0xFFFF); 275 276 kfree(msginfo); 277 return 0; 278 279 cleanup: 280 pr_err("Unable to connect to host\n"); 281 282 vmbus_connection.conn_state = DISCONNECTED; 283 vmbus_disconnect(); 284 285 kfree(msginfo); 286 287 return ret; 288 } 289 290 void vmbus_disconnect(void) 291 { 292 /* 293 * First send the unload request to the host. 294 */ 295 vmbus_initiate_unload(false); 296 297 if (vmbus_connection.handle_sub_chan_wq) 298 destroy_workqueue(vmbus_connection.handle_sub_chan_wq); 299 300 if (vmbus_connection.handle_primary_chan_wq) 301 destroy_workqueue(vmbus_connection.handle_primary_chan_wq); 302 303 if (vmbus_connection.work_queue) 304 destroy_workqueue(vmbus_connection.work_queue); 305 306 if (vmbus_connection.int_page) { 307 free_pages((unsigned long)vmbus_connection.int_page, 0); 308 vmbus_connection.int_page = NULL; 309 } 310 311 free_pages((unsigned long)vmbus_connection.monitor_pages[0], 0); 312 free_pages((unsigned long)vmbus_connection.monitor_pages[1], 0); 313 vmbus_connection.monitor_pages[0] = NULL; 314 vmbus_connection.monitor_pages[1] = NULL; 315 } 316 317 /* 318 * relid2channel - Get the channel object given its 319 * child relative id (ie channel id) 320 */ 321 struct vmbus_channel *relid2channel(u32 relid) 322 { 323 struct vmbus_channel *channel; 324 struct vmbus_channel *found_channel = NULL; 325 struct list_head *cur, *tmp; 326 struct vmbus_channel *cur_sc; 327 328 BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); 329 330 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 331 if (channel->offermsg.child_relid == relid) { 332 found_channel = channel; 333 break; 334 } else if (!list_empty(&channel->sc_list)) { 335 /* 336 * Deal with sub-channels. 337 */ 338 list_for_each_safe(cur, tmp, &channel->sc_list) { 339 cur_sc = list_entry(cur, struct vmbus_channel, 340 sc_list); 341 if (cur_sc->offermsg.child_relid == relid) { 342 found_channel = cur_sc; 343 break; 344 } 345 } 346 } 347 } 348 349 return found_channel; 350 } 351 352 /* 353 * vmbus_on_event - Process a channel event notification 354 * 355 * For batched channels (default) optimize host to guest signaling 356 * by ensuring: 357 * 1. While reading the channel, we disable interrupts from host. 358 * 2. Ensure that we process all posted messages from the host 359 * before returning from this callback. 360 * 3. Once we return, enable signaling from the host. Once this 361 * state is set we check to see if additional packets are 362 * available to read. In this case we repeat the process. 363 * If this tasklet has been running for a long time 364 * then reschedule ourselves. 365 */ 366 void vmbus_on_event(unsigned long data) 367 { 368 struct vmbus_channel *channel = (void *) data; 369 unsigned long time_limit = jiffies + 2; 370 371 trace_vmbus_on_event(channel); 372 373 do { 374 void (*callback_fn)(void *); 375 376 /* A channel once created is persistent even when 377 * there is no driver handling the device. An 378 * unloading driver sets the onchannel_callback to NULL. 379 */ 380 callback_fn = READ_ONCE(channel->onchannel_callback); 381 if (unlikely(callback_fn == NULL)) 382 return; 383 384 (*callback_fn)(channel->channel_callback_context); 385 386 if (channel->callback_mode != HV_CALL_BATCHED) 387 return; 388 389 if (likely(hv_end_read(&channel->inbound) == 0)) 390 return; 391 392 hv_begin_read(&channel->inbound); 393 } while (likely(time_before(jiffies, time_limit))); 394 395 /* The time limit (2 jiffies) has been reached */ 396 tasklet_schedule(&channel->callback_event); 397 } 398 399 /* 400 * vmbus_post_msg - Send a msg on the vmbus's message connection 401 */ 402 int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) 403 { 404 struct vmbus_channel_message_header *hdr; 405 union hv_connection_id conn_id; 406 int ret = 0; 407 int retries = 0; 408 u32 usec = 1; 409 410 conn_id.asu32 = 0; 411 conn_id.u.id = vmbus_connection.msg_conn_id; 412 413 /* 414 * hv_post_message() can have transient failures because of 415 * insufficient resources. Retry the operation a couple of 416 * times before giving up. 417 */ 418 while (retries < 100) { 419 ret = hv_post_message(conn_id, 1, buffer, buflen); 420 421 switch (ret) { 422 case HV_STATUS_INVALID_CONNECTION_ID: 423 /* 424 * See vmbus_negotiate_version(): VMBus protocol 5.0 425 * requires that we must use 426 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate 427 * Contact message, but on old hosts that only 428 * support VMBus protocol 4.0 or lower, here we get 429 * HV_STATUS_INVALID_CONNECTION_ID and we should 430 * return an error immediately without retrying. 431 */ 432 hdr = buffer; 433 if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT) 434 return -EINVAL; 435 /* 436 * We could get this if we send messages too 437 * frequently. 438 */ 439 ret = -EAGAIN; 440 break; 441 case HV_STATUS_INSUFFICIENT_MEMORY: 442 case HV_STATUS_INSUFFICIENT_BUFFERS: 443 ret = -ENOBUFS; 444 break; 445 case HV_STATUS_SUCCESS: 446 return ret; 447 default: 448 pr_err("hv_post_msg() failed; error code:%d\n", ret); 449 return -EINVAL; 450 } 451 452 retries++; 453 if (can_sleep && usec > 1000) 454 msleep(usec / 1000); 455 else if (usec < MAX_UDELAY_MS * 1000) 456 udelay(usec); 457 else 458 mdelay(usec / 1000); 459 460 if (retries < 22) 461 usec *= 2; 462 } 463 return ret; 464 } 465 466 /* 467 * vmbus_set_event - Send an event notification to the parent 468 */ 469 void vmbus_set_event(struct vmbus_channel *channel) 470 { 471 u32 child_relid = channel->offermsg.child_relid; 472 473 if (!channel->is_dedicated_interrupt) 474 vmbus_send_interrupt(child_relid); 475 476 ++channel->sig_events; 477 478 hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); 479 } 480 EXPORT_SYMBOL_GPL(vmbus_set_event); 481