1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (c) 2009, Microsoft Corporation. 5 * 6 * Authors: 7 * Haiyang Zhang <haiyangz@microsoft.com> 8 * Hank Janssen <hjanssen@microsoft.com> 9 */ 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/sched.h> 14 #include <linux/wait.h> 15 #include <linux/delay.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/slab.h> 19 #include <linux/vmalloc.h> 20 #include <linux/hyperv.h> 21 #include <linux/export.h> 22 #include <asm/mshyperv.h> 23 24 #include "hyperv_vmbus.h" 25 26 27 struct vmbus_connection vmbus_connection = { 28 .conn_state = DISCONNECTED, 29 .unload_event = COMPLETION_INITIALIZER( 30 vmbus_connection.unload_event), 31 .next_gpadl_handle = ATOMIC_INIT(0xE1E10), 32 33 .ready_for_suspend_event = COMPLETION_INITIALIZER( 34 vmbus_connection.ready_for_suspend_event), 35 .ready_for_resume_event = COMPLETION_INITIALIZER( 36 vmbus_connection.ready_for_resume_event), 37 }; 38 EXPORT_SYMBOL_GPL(vmbus_connection); 39 40 /* 41 * Negotiated protocol version with the host. 42 */ 43 __u32 vmbus_proto_version; 44 EXPORT_SYMBOL_GPL(vmbus_proto_version); 45 46 /* 47 * Table of VMBus versions listed from newest to oldest. 48 */ 49 static __u32 vmbus_versions[] = { 50 VERSION_WIN10_V5_3, 51 VERSION_WIN10_V5_2, 52 VERSION_WIN10_V5_1, 53 VERSION_WIN10_V5, 54 VERSION_WIN10_V4_1, 55 VERSION_WIN10, 56 VERSION_WIN8_1, 57 VERSION_WIN8, 58 VERSION_WIN7, 59 VERSION_WS2008 60 }; 61 62 /* 63 * Maximal VMBus protocol version guests can negotiate. Useful to cap the 64 * VMBus version for testing and debugging purpose. 65 */ 66 static uint max_version = VERSION_WIN10_V5_3; 67 68 module_param(max_version, uint, S_IRUGO); 69 MODULE_PARM_DESC(max_version, 70 "Maximal VMBus protocol version which can be negotiated"); 71 72 int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) 73 { 74 int ret = 0; 75 struct vmbus_channel_initiate_contact *msg; 76 unsigned long flags; 77 78 init_completion(&msginfo->waitevent); 79 80 msg = (struct vmbus_channel_initiate_contact *)msginfo->msg; 81 82 memset(msg, 0, sizeof(*msg)); 83 msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT; 84 msg->vmbus_version_requested = version; 85 86 /* 87 * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must 88 * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, 89 * and for subsequent messages, we must use the Message Connection ID 90 * field in the host-returned Version Response Message. And, with 91 * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we 92 * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for 93 * compatibility. 94 * 95 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1). 96 */ 97 if (version >= VERSION_WIN10_V5) { 98 msg->msg_sint = VMBUS_MESSAGE_SINT; 99 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; 100 } else { 101 msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); 102 vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; 103 } 104 105 msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]); 106 msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]); 107 msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU); 108 109 /* 110 * Add to list before we send the request since we may 111 * receive the response before returning from this routine 112 */ 113 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 114 list_add_tail(&msginfo->msglistentry, 115 &vmbus_connection.chn_msg_list); 116 117 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 118 119 ret = vmbus_post_msg(msg, 120 sizeof(struct vmbus_channel_initiate_contact), 121 true); 122 123 trace_vmbus_negotiate_version(msg, ret); 124 125 if (ret != 0) { 126 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 127 list_del(&msginfo->msglistentry); 128 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, 129 flags); 130 return ret; 131 } 132 133 /* Wait for the connection response */ 134 wait_for_completion(&msginfo->waitevent); 135 136 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 137 list_del(&msginfo->msglistentry); 138 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 139 140 /* Check if successful */ 141 if (msginfo->response.version_response.version_supported) { 142 vmbus_connection.conn_state = CONNECTED; 143 144 if (version >= VERSION_WIN10_V5) 145 vmbus_connection.msg_conn_id = 146 msginfo->response.version_response.msg_conn_id; 147 } else { 148 return -ECONNREFUSED; 149 } 150 151 return ret; 152 } 153 154 /* 155 * vmbus_connect - Sends a connect request on the partition service connection 156 */ 157 int vmbus_connect(void) 158 { 159 struct vmbus_channel_msginfo *msginfo = NULL; 160 int i, ret = 0; 161 __u32 version; 162 163 /* Initialize the vmbus connection */ 164 vmbus_connection.conn_state = CONNECTING; 165 vmbus_connection.work_queue = create_workqueue("hv_vmbus_con"); 166 if (!vmbus_connection.work_queue) { 167 ret = -ENOMEM; 168 goto cleanup; 169 } 170 171 vmbus_connection.handle_primary_chan_wq = 172 create_workqueue("hv_pri_chan"); 173 if (!vmbus_connection.handle_primary_chan_wq) { 174 ret = -ENOMEM; 175 goto cleanup; 176 } 177 178 vmbus_connection.handle_sub_chan_wq = 179 create_workqueue("hv_sub_chan"); 180 if (!vmbus_connection.handle_sub_chan_wq) { 181 ret = -ENOMEM; 182 goto cleanup; 183 } 184 185 INIT_LIST_HEAD(&vmbus_connection.chn_msg_list); 186 spin_lock_init(&vmbus_connection.channelmsg_lock); 187 188 INIT_LIST_HEAD(&vmbus_connection.chn_list); 189 mutex_init(&vmbus_connection.channel_mutex); 190 191 /* 192 * Setup the vmbus event connection for channel interrupt 193 * abstraction stuff 194 */ 195 vmbus_connection.int_page = 196 (void *)hv_alloc_hyperv_zeroed_page(); 197 if (vmbus_connection.int_page == NULL) { 198 ret = -ENOMEM; 199 goto cleanup; 200 } 201 202 vmbus_connection.recv_int_page = vmbus_connection.int_page; 203 vmbus_connection.send_int_page = 204 (void *)((unsigned long)vmbus_connection.int_page + 205 (HV_HYP_PAGE_SIZE >> 1)); 206 207 /* 208 * Setup the monitor notification facility. The 1st page for 209 * parent->child and the 2nd page for child->parent 210 */ 211 vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_zeroed_page(); 212 vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_zeroed_page(); 213 if ((vmbus_connection.monitor_pages[0] == NULL) || 214 (vmbus_connection.monitor_pages[1] == NULL)) { 215 ret = -ENOMEM; 216 goto cleanup; 217 } 218 219 msginfo = kzalloc(sizeof(*msginfo) + 220 sizeof(struct vmbus_channel_initiate_contact), 221 GFP_KERNEL); 222 if (msginfo == NULL) { 223 ret = -ENOMEM; 224 goto cleanup; 225 } 226 227 /* 228 * Negotiate a compatible VMBUS version number with the 229 * host. We start with the highest number we can support 230 * and work our way down until we negotiate a compatible 231 * version. 232 */ 233 234 for (i = 0; ; i++) { 235 if (i == ARRAY_SIZE(vmbus_versions)) { 236 ret = -EDOM; 237 goto cleanup; 238 } 239 240 version = vmbus_versions[i]; 241 if (version > max_version) 242 continue; 243 244 ret = vmbus_negotiate_version(msginfo, version); 245 if (ret == -ETIMEDOUT) 246 goto cleanup; 247 248 if (vmbus_connection.conn_state == CONNECTED) 249 break; 250 } 251 252 if (hv_is_isolation_supported() && version < VERSION_WIN10_V5_2) { 253 pr_err("Invalid VMBus version %d.%d (expected >= %d.%d) from the host supporting isolation\n", 254 version >> 16, version & 0xFFFF, VERSION_WIN10_V5_2 >> 16, VERSION_WIN10_V5_2 & 0xFFFF); 255 ret = -EINVAL; 256 goto cleanup; 257 } 258 259 vmbus_proto_version = version; 260 pr_info("Vmbus version:%d.%d\n", 261 version >> 16, version & 0xFFFF); 262 263 vmbus_connection.channels = kcalloc(MAX_CHANNEL_RELIDS, 264 sizeof(struct vmbus_channel *), 265 GFP_KERNEL); 266 if (vmbus_connection.channels == NULL) { 267 ret = -ENOMEM; 268 goto cleanup; 269 } 270 271 kfree(msginfo); 272 return 0; 273 274 cleanup: 275 pr_err("Unable to connect to host\n"); 276 277 vmbus_connection.conn_state = DISCONNECTED; 278 vmbus_disconnect(); 279 280 kfree(msginfo); 281 282 return ret; 283 } 284 285 void vmbus_disconnect(void) 286 { 287 /* 288 * First send the unload request to the host. 289 */ 290 vmbus_initiate_unload(false); 291 292 if (vmbus_connection.handle_sub_chan_wq) 293 destroy_workqueue(vmbus_connection.handle_sub_chan_wq); 294 295 if (vmbus_connection.handle_primary_chan_wq) 296 destroy_workqueue(vmbus_connection.handle_primary_chan_wq); 297 298 if (vmbus_connection.work_queue) 299 destroy_workqueue(vmbus_connection.work_queue); 300 301 if (vmbus_connection.int_page) { 302 hv_free_hyperv_page((unsigned long)vmbus_connection.int_page); 303 vmbus_connection.int_page = NULL; 304 } 305 306 hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]); 307 hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]); 308 vmbus_connection.monitor_pages[0] = NULL; 309 vmbus_connection.monitor_pages[1] = NULL; 310 } 311 312 /* 313 * relid2channel - Get the channel object given its 314 * child relative id (ie channel id) 315 */ 316 struct vmbus_channel *relid2channel(u32 relid) 317 { 318 if (WARN_ON(relid >= MAX_CHANNEL_RELIDS)) 319 return NULL; 320 return READ_ONCE(vmbus_connection.channels[relid]); 321 } 322 323 /* 324 * vmbus_on_event - Process a channel event notification 325 * 326 * For batched channels (default) optimize host to guest signaling 327 * by ensuring: 328 * 1. While reading the channel, we disable interrupts from host. 329 * 2. Ensure that we process all posted messages from the host 330 * before returning from this callback. 331 * 3. Once we return, enable signaling from the host. Once this 332 * state is set we check to see if additional packets are 333 * available to read. In this case we repeat the process. 334 * If this tasklet has been running for a long time 335 * then reschedule ourselves. 336 */ 337 void vmbus_on_event(unsigned long data) 338 { 339 struct vmbus_channel *channel = (void *) data; 340 unsigned long time_limit = jiffies + 2; 341 342 trace_vmbus_on_event(channel); 343 344 hv_debug_delay_test(channel, INTERRUPT_DELAY); 345 do { 346 void (*callback_fn)(void *); 347 348 /* A channel once created is persistent even when 349 * there is no driver handling the device. An 350 * unloading driver sets the onchannel_callback to NULL. 351 */ 352 callback_fn = READ_ONCE(channel->onchannel_callback); 353 if (unlikely(callback_fn == NULL)) 354 return; 355 356 (*callback_fn)(channel->channel_callback_context); 357 358 if (channel->callback_mode != HV_CALL_BATCHED) 359 return; 360 361 if (likely(hv_end_read(&channel->inbound) == 0)) 362 return; 363 364 hv_begin_read(&channel->inbound); 365 } while (likely(time_before(jiffies, time_limit))); 366 367 /* The time limit (2 jiffies) has been reached */ 368 tasklet_schedule(&channel->callback_event); 369 } 370 371 /* 372 * vmbus_post_msg - Send a msg on the vmbus's message connection 373 */ 374 int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) 375 { 376 struct vmbus_channel_message_header *hdr; 377 union hv_connection_id conn_id; 378 int ret = 0; 379 int retries = 0; 380 u32 usec = 1; 381 382 conn_id.asu32 = 0; 383 conn_id.u.id = vmbus_connection.msg_conn_id; 384 385 /* 386 * hv_post_message() can have transient failures because of 387 * insufficient resources. Retry the operation a couple of 388 * times before giving up. 389 */ 390 while (retries < 100) { 391 ret = hv_post_message(conn_id, 1, buffer, buflen); 392 393 switch (ret) { 394 case HV_STATUS_INVALID_CONNECTION_ID: 395 /* 396 * See vmbus_negotiate_version(): VMBus protocol 5.0 397 * and higher require that we must use 398 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate 399 * Contact message, but on old hosts that only 400 * support VMBus protocol 4.0 or lower, here we get 401 * HV_STATUS_INVALID_CONNECTION_ID and we should 402 * return an error immediately without retrying. 403 */ 404 hdr = buffer; 405 if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT) 406 return -EINVAL; 407 /* 408 * We could get this if we send messages too 409 * frequently. 410 */ 411 ret = -EAGAIN; 412 break; 413 case HV_STATUS_INSUFFICIENT_MEMORY: 414 case HV_STATUS_INSUFFICIENT_BUFFERS: 415 ret = -ENOBUFS; 416 break; 417 case HV_STATUS_SUCCESS: 418 return ret; 419 default: 420 pr_err("hv_post_msg() failed; error code:%d\n", ret); 421 return -EINVAL; 422 } 423 424 retries++; 425 if (can_sleep && usec > 1000) 426 msleep(usec / 1000); 427 else if (usec < MAX_UDELAY_MS * 1000) 428 udelay(usec); 429 else 430 mdelay(usec / 1000); 431 432 if (retries < 22) 433 usec *= 2; 434 } 435 return ret; 436 } 437 438 /* 439 * vmbus_set_event - Send an event notification to the parent 440 */ 441 void vmbus_set_event(struct vmbus_channel *channel) 442 { 443 u32 child_relid = channel->offermsg.child_relid; 444 445 if (!channel->is_dedicated_interrupt) 446 vmbus_send_interrupt(child_relid); 447 448 ++channel->sig_events; 449 450 hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); 451 } 452 EXPORT_SYMBOL_GPL(vmbus_set_event); 453