1 /* 2 * 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 16 * Place - Suite 330, Boston, MA 02111-1307 USA. 17 * 18 * Authors: 19 * Haiyang Zhang <haiyangz@microsoft.com> 20 * Hank Janssen <hjanssen@microsoft.com> 21 * 22 */ 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/kernel.h> 26 #include <linux/sched.h> 27 #include <linux/wait.h> 28 #include <linux/delay.h> 29 #include <linux/mm.h> 30 #include <linux/slab.h> 31 #include <linux/vmalloc.h> 32 #include <linux/hyperv.h> 33 #include <linux/export.h> 34 #include <asm/mshyperv.h> 35 36 #include "hyperv_vmbus.h" 37 38 39 struct vmbus_connection vmbus_connection = { 40 .conn_state = DISCONNECTED, 41 .next_gpadl_handle = ATOMIC_INIT(0xE1E10), 42 }; 43 EXPORT_SYMBOL_GPL(vmbus_connection); 44 45 /* 46 * Negotiated protocol version with the host. 47 */ 48 __u32 vmbus_proto_version; 49 EXPORT_SYMBOL_GPL(vmbus_proto_version); 50 51 static __u32 vmbus_get_next_version(__u32 current_version) 52 { 53 switch (current_version) { 54 case (VERSION_WIN7): 55 return VERSION_WS2008; 56 57 case (VERSION_WIN8): 58 return VERSION_WIN7; 59 60 case (VERSION_WIN8_1): 61 return VERSION_WIN8; 62 63 case (VERSION_WIN10): 64 return VERSION_WIN8_1; 65 66 case (VERSION_WIN10_V5): 67 return VERSION_WIN10; 68 69 case (VERSION_WS2008): 70 default: 71 return VERSION_INVAL; 72 } 73 } 74 75 static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, 76 __u32 version) 77 { 78 int ret = 0; 79 unsigned int cur_cpu; 80 struct vmbus_channel_initiate_contact *msg; 81 unsigned long flags; 82 83 init_completion(&msginfo->waitevent); 84 85 msg = (struct vmbus_channel_initiate_contact *)msginfo->msg; 86 87 memset(msg, 0, sizeof(*msg)); 88 msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT; 89 msg->vmbus_version_requested = version; 90 91 /* 92 * VMBus protocol 5.0 (VERSION_WIN10_V5) requires that we must use 93 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, 94 * and for subsequent messages, we must use the Message Connection ID 95 * field in the host-returned Version Response Message. And, with 96 * VERSION_WIN10_V5, we don't use msg->interrupt_page, but we tell 97 * the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for 98 * compatibility. 99 * 100 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1). 101 */ 102 if (version >= VERSION_WIN10_V5) { 103 msg->msg_sint = VMBUS_MESSAGE_SINT; 104 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; 105 } else { 106 msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); 107 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; 108 } 109 110 msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]); 111 msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]); 112 /* 113 * We want all channel messages to be delivered on CPU 0. 114 * This has been the behavior pre-win8. This is not 115 * perf issue and having all channel messages delivered on CPU 0 116 * would be ok. 117 * For post win8 hosts, we support receiving channel messagges on 118 * all the CPUs. This is needed for kexec to work correctly where 119 * the CPU attempting to connect may not be CPU 0. 120 */ 121 if (version >= VERSION_WIN8_1) { 122 cur_cpu = get_cpu(); 123 msg->target_vcpu = hv_cpu_number_to_vp_number(cur_cpu); 124 vmbus_connection.connect_cpu = cur_cpu; 125 put_cpu(); 126 } else { 127 msg->target_vcpu = 0; 128 vmbus_connection.connect_cpu = 0; 129 } 130 131 /* 132 * Add to list before we send the request since we may 133 * receive the response before returning from this routine 134 */ 135 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 136 list_add_tail(&msginfo->msglistentry, 137 &vmbus_connection.chn_msg_list); 138 139 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 140 141 ret = vmbus_post_msg(msg, 142 sizeof(struct vmbus_channel_initiate_contact), 143 true); 144 145 trace_vmbus_negotiate_version(msg, ret); 146 147 if (ret != 0) { 148 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 149 list_del(&msginfo->msglistentry); 150 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, 151 flags); 152 return ret; 153 } 154 155 /* Wait for the connection response */ 156 wait_for_completion(&msginfo->waitevent); 157 158 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 159 list_del(&msginfo->msglistentry); 160 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 161 162 /* Check if successful */ 163 if (msginfo->response.version_response.version_supported) { 164 vmbus_connection.conn_state = CONNECTED; 165 166 if (version >= VERSION_WIN10_V5) 167 vmbus_connection.msg_conn_id = 168 msginfo->response.version_response.msg_conn_id; 169 } else { 170 return -ECONNREFUSED; 171 } 172 173 return ret; 174 } 175 176 /* 177 * vmbus_connect - Sends a connect request on the partition service connection 178 */ 179 int vmbus_connect(void) 180 { 181 int ret = 0; 182 struct vmbus_channel_msginfo *msginfo = NULL; 183 __u32 version; 184 185 /* Initialize the vmbus connection */ 186 vmbus_connection.conn_state = CONNECTING; 187 vmbus_connection.work_queue = create_workqueue("hv_vmbus_con"); 188 if (!vmbus_connection.work_queue) { 189 ret = -ENOMEM; 190 goto cleanup; 191 } 192 193 INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); 194 spin_lock_init(&vmbus_connection.channelmsg_lock); 195 196 INIT_LIST_HEAD(&vmbus_connection.chn_list); 197 mutex_init(&vmbus_connection.channel_mutex); 198 199 /* 200 * Setup the vmbus event connection for channel interrupt 201 * abstraction stuff 202 */ 203 vmbus_connection.int_page = 204 (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0); 205 if (vmbus_connection.int_page == NULL) { 206 ret = -ENOMEM; 207 goto cleanup; 208 } 209 210 vmbus_connection.recv_int_page = vmbus_connection.int_page; 211 vmbus_connection.send_int_page = 212 (void *)((unsigned long)vmbus_connection.int_page + 213 (PAGE_SIZE >> 1)); 214 215 /* 216 * Setup the monitor notification facility. The 1st page for 217 * parent->child and the 2nd page for child->parent 218 */ 219 vmbus_connection.monitor_pages[0] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0); 220 vmbus_connection.monitor_pages[1] = (void *)__get_free_pages((GFP_KERNEL|__GFP_ZERO), 0); 221 if ((vmbus_connection.monitor_pages[0] == NULL) || 222 (vmbus_connection.monitor_pages[1] == NULL)) { 223 ret = -ENOMEM; 224 goto cleanup; 225 } 226 227 msginfo = kzalloc(sizeof(*msginfo) + 228 sizeof(struct vmbus_channel_initiate_contact), 229 GFP_KERNEL); 230 if (msginfo == NULL) { 231 ret = -ENOMEM; 232 goto cleanup; 233 } 234 235 /* 236 * Negotiate a compatible VMBUS version number with the 237 * host. We start with the highest number we can support 238 * and work our way down until we negotiate a compatible 239 * version. 240 */ 241 242 version = VERSION_CURRENT; 243 244 do { 245 ret = vmbus_negotiate_version(msginfo, version); 246 if (ret == -ETIMEDOUT) 247 goto cleanup; 248 249 if (vmbus_connection.conn_state == CONNECTED) 250 break; 251 252 version = vmbus_get_next_version(version); 253 } while (version != VERSION_INVAL); 254 255 if (version == VERSION_INVAL) 256 goto cleanup; 257 258 vmbus_proto_version = version; 259 pr_info("Vmbus version:%d.%d\n", 260 version >> 16, version & 0xFFFF); 261 262 kfree(msginfo); 263 return 0; 264 265 cleanup: 266 pr_err("Unable to connect to host\n"); 267 268 vmbus_connection.conn_state = DISCONNECTED; 269 vmbus_disconnect(); 270 271 kfree(msginfo); 272 273 return ret; 274 } 275 276 void vmbus_disconnect(void) 277 { 278 /* 279 * First send the unload request to the host. 280 */ 281 vmbus_initiate_unload(false); 282 283 if (vmbus_connection.work_queue) { 284 drain_workqueue(vmbus_connection.work_queue); 285 destroy_workqueue(vmbus_connection.work_queue); 286 } 287 288 if (vmbus_connection.int_page) { 289 free_pages((unsigned long)vmbus_connection.int_page, 0); 290 vmbus_connection.int_page = NULL; 291 } 292 293 free_pages((unsigned long)vmbus_connection.monitor_pages[0], 0); 294 free_pages((unsigned long)vmbus_connection.monitor_pages[1], 0); 295 vmbus_connection.monitor_pages[0] = NULL; 296 vmbus_connection.monitor_pages[1] = NULL; 297 } 298 299 /* 300 * relid2channel - Get the channel object given its 301 * child relative id (ie channel id) 302 */ 303 struct vmbus_channel *relid2channel(u32 relid) 304 { 305 struct vmbus_channel *channel; 306 struct vmbus_channel *found_channel = NULL; 307 struct list_head *cur, *tmp; 308 struct vmbus_channel *cur_sc; 309 310 BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); 311 312 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 313 if (channel->offermsg.child_relid == relid) { 314 found_channel = channel; 315 break; 316 } else if (!list_empty(&channel->sc_list)) { 317 /* 318 * Deal with sub-channels. 319 */ 320 list_for_each_safe(cur, tmp, &channel->sc_list) { 321 cur_sc = list_entry(cur, struct vmbus_channel, 322 sc_list); 323 if (cur_sc->offermsg.child_relid == relid) { 324 found_channel = cur_sc; 325 break; 326 } 327 } 328 } 329 } 330 331 return found_channel; 332 } 333 334 /* 335 * vmbus_on_event - Process a channel event notification 336 * 337 * For batched channels (default) optimize host to guest signaling 338 * by ensuring: 339 * 1. While reading the channel, we disable interrupts from host. 340 * 2. Ensure that we process all posted messages from the host 341 * before returning from this callback. 342 * 3. Once we return, enable signaling from the host. Once this 343 * state is set we check to see if additional packets are 344 * available to read. In this case we repeat the process. 345 * If this tasklet has been running for a long time 346 * then reschedule ourselves. 347 */ 348 void vmbus_on_event(unsigned long data) 349 { 350 struct vmbus_channel *channel = (void *) data; 351 unsigned long time_limit = jiffies + 2; 352 353 trace_vmbus_on_event(channel); 354 355 do { 356 void (*callback_fn)(void *); 357 358 /* A channel once created is persistent even when 359 * there is no driver handling the device. An 360 * unloading driver sets the onchannel_callback to NULL. 361 */ 362 callback_fn = READ_ONCE(channel->onchannel_callback); 363 if (unlikely(callback_fn == NULL)) 364 return; 365 366 (*callback_fn)(channel->channel_callback_context); 367 368 if (channel->callback_mode != HV_CALL_BATCHED) 369 return; 370 371 if (likely(hv_end_read(&channel->inbound) == 0)) 372 return; 373 374 hv_begin_read(&channel->inbound); 375 } while (likely(time_before(jiffies, time_limit))); 376 377 /* The time limit (2 jiffies) has been reached */ 378 tasklet_schedule(&channel->callback_event); 379 } 380 381 /* 382 * vmbus_post_msg - Send a msg on the vmbus's message connection 383 */ 384 int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) 385 { 386 struct vmbus_channel_message_header *hdr; 387 union hv_connection_id conn_id; 388 int ret = 0; 389 int retries = 0; 390 u32 usec = 1; 391 392 conn_id.asu32 = 0; 393 conn_id.u.id = vmbus_connection.msg_conn_id; 394 395 /* 396 * hv_post_message() can have transient failures because of 397 * insufficient resources. Retry the operation a couple of 398 * times before giving up. 399 */ 400 while (retries < 100) { 401 ret = hv_post_message(conn_id, 1, buffer, buflen); 402 403 switch (ret) { 404 case HV_STATUS_INVALID_CONNECTION_ID: 405 /* 406 * See vmbus_negotiate_version(): VMBus protocol 5.0 407 * requires that we must use 408 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate 409 * Contact message, but on old hosts that only 410 * support VMBus protocol 4.0 or lower, here we get 411 * HV_STATUS_INVALID_CONNECTION_ID and we should 412 * return an error immediately without retrying. 413 */ 414 hdr = buffer; 415 if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT) 416 return -EINVAL; 417 /* 418 * We could get this if we send messages too 419 * frequently. 420 */ 421 ret = -EAGAIN; 422 break; 423 case HV_STATUS_INSUFFICIENT_MEMORY: 424 case HV_STATUS_INSUFFICIENT_BUFFERS: 425 ret = -ENOBUFS; 426 break; 427 case HV_STATUS_SUCCESS: 428 return ret; 429 default: 430 pr_err("hv_post_msg() failed; error code:%d\n", ret); 431 return -EINVAL; 432 } 433 434 retries++; 435 if (can_sleep && usec > 1000) 436 msleep(usec / 1000); 437 else if (usec < MAX_UDELAY_MS * 1000) 438 udelay(usec); 439 else 440 mdelay(usec / 1000); 441 442 if (retries < 22) 443 usec *= 2; 444 } 445 return ret; 446 } 447 448 /* 449 * vmbus_set_event - Send an event notification to the parent 450 */ 451 void vmbus_set_event(struct vmbus_channel *channel) 452 { 453 u32 child_relid = channel->offermsg.child_relid; 454 455 if (!channel->is_dedicated_interrupt) 456 vmbus_send_interrupt(child_relid); 457 458 ++channel->sig_events; 459 460 hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); 461 } 462 EXPORT_SYMBOL_GPL(vmbus_set_event); 463