1 /* 2 * Copyright (c) 2009, Microsoft Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 15 * Place - Suite 330, Boston, MA 02111-1307 USA. 16 * 17 * Authors: 18 * Haiyang Zhang <haiyangz@microsoft.com> 19 * Hank Janssen <hjanssen@microsoft.com> 20 */ 21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 22 23 #include <linux/kernel.h> 24 #include <linux/interrupt.h> 25 #include <linux/sched.h> 26 #include <linux/wait.h> 27 #include <linux/mm.h> 28 #include <linux/slab.h> 29 #include <linux/list.h> 30 #include <linux/module.h> 31 #include <linux/completion.h> 32 #include <linux/delay.h> 33 #include <linux/hyperv.h> 34 #include <asm/mshyperv.h> 35 36 #include "hyperv_vmbus.h" 37 38 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type); 39 40 static const struct vmbus_device vmbus_devs[] = { 41 /* IDE */ 42 { .dev_type = HV_IDE, 43 HV_IDE_GUID, 44 .perf_device = true, 45 }, 46 47 /* SCSI */ 48 { .dev_type = HV_SCSI, 49 HV_SCSI_GUID, 50 .perf_device = true, 51 }, 52 53 /* Fibre Channel */ 54 { .dev_type = HV_FC, 55 HV_SYNTHFC_GUID, 56 .perf_device = true, 57 }, 58 59 /* Synthetic NIC */ 60 { .dev_type = HV_NIC, 61 HV_NIC_GUID, 62 .perf_device = true, 63 }, 64 65 /* Network Direct */ 66 { .dev_type = HV_ND, 67 HV_ND_GUID, 68 .perf_device = true, 69 }, 70 71 /* PCIE */ 72 { .dev_type = HV_PCIE, 73 HV_PCIE_GUID, 74 .perf_device = true, 75 }, 76 77 /* Synthetic Frame Buffer */ 78 { .dev_type = HV_FB, 79 HV_SYNTHVID_GUID, 80 .perf_device = false, 81 }, 82 83 /* Synthetic Keyboard */ 84 { .dev_type = HV_KBD, 85 HV_KBD_GUID, 86 .perf_device = false, 87 }, 88 89 /* Synthetic MOUSE */ 90 { .dev_type = HV_MOUSE, 91 HV_MOUSE_GUID, 92 .perf_device = false, 93 }, 94 95 /* KVP */ 96 { .dev_type = HV_KVP, 97 HV_KVP_GUID, 98 .perf_device = false, 99 }, 100 101 /* Time Synch */ 102 { .dev_type = HV_TS, 103 HV_TS_GUID, 104 .perf_device = false, 105 }, 106 107 /* Heartbeat */ 108 { .dev_type = HV_HB, 109 HV_HEART_BEAT_GUID, 110 .perf_device = false, 111 }, 112 113 /* Shutdown */ 114 { .dev_type = HV_SHUTDOWN, 115 HV_SHUTDOWN_GUID, 116 .perf_device = false, 117 }, 118 119 /* File copy */ 120 { .dev_type = HV_FCOPY, 121 HV_FCOPY_GUID, 122 .perf_device = false, 123 }, 124 125 /* Backup */ 126 { .dev_type = HV_BACKUP, 127 HV_VSS_GUID, 128 .perf_device = false, 129 }, 130 131 /* Dynamic Memory */ 132 { .dev_type = HV_DM, 133 HV_DM_GUID, 134 .perf_device = false, 135 }, 136 137 /* Unknown GUID */ 138 { .dev_type = HV_UNKNOWN, 139 .perf_device = false, 140 }, 141 }; 142 143 static const struct { 144 uuid_le guid; 145 } vmbus_unsupported_devs[] = { 146 { HV_AVMA1_GUID }, 147 { HV_AVMA2_GUID }, 148 { HV_RDV_GUID }, 149 }; 150 151 /* 152 * The rescinded channel may be blocked waiting for a response from the host; 153 * take care of that. 154 */ 155 static void vmbus_rescind_cleanup(struct vmbus_channel *channel) 156 { 157 struct vmbus_channel_msginfo *msginfo; 158 unsigned long flags; 159 160 161 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 162 163 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 164 msglistentry) { 165 166 if (msginfo->waiting_channel == channel) { 167 complete(&msginfo->waitevent); 168 break; 169 } 170 } 171 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 172 } 173 174 static bool is_unsupported_vmbus_devs(const uuid_le *guid) 175 { 176 int i; 177 178 for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++) 179 if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid)) 180 return true; 181 return false; 182 } 183 184 static u16 hv_get_dev_type(const struct vmbus_channel *channel) 185 { 186 const uuid_le *guid = &channel->offermsg.offer.if_type; 187 u16 i; 188 189 if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid)) 190 return HV_UNKNOWN; 191 192 for (i = HV_IDE; i < HV_UNKNOWN; i++) { 193 if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) 194 return i; 195 } 196 pr_info("Unknown GUID: %pUl\n", guid); 197 return i; 198 } 199 200 /** 201 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message 202 * @icmsghdrp: Pointer to msg header structure 203 * @icmsg_negotiate: Pointer to negotiate message structure 204 * @buf: Raw buffer channel data 205 * 206 * @icmsghdrp is of type &struct icmsg_hdr. 207 * @negop is of type &struct icmsg_negotiate. 208 * Set up and fill in default negotiate response message. 209 * 210 * The fw_version specifies the framework version that 211 * we can support and srv_version specifies the service 212 * version we can support. 213 * 214 * Mainly used by Hyper-V drivers. 215 */ 216 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, 217 struct icmsg_negotiate *negop, u8 *buf, 218 int fw_version, int srv_version) 219 { 220 int icframe_major, icframe_minor; 221 int icmsg_major, icmsg_minor; 222 int fw_major, fw_minor; 223 int srv_major, srv_minor; 224 int i; 225 bool found_match = false; 226 227 icmsghdrp->icmsgsize = 0x10; 228 fw_major = (fw_version >> 16); 229 fw_minor = (fw_version & 0xFFFF); 230 231 srv_major = (srv_version >> 16); 232 srv_minor = (srv_version & 0xFFFF); 233 234 negop = (struct icmsg_negotiate *)&buf[ 235 sizeof(struct vmbuspipe_hdr) + 236 sizeof(struct icmsg_hdr)]; 237 238 icframe_major = negop->icframe_vercnt; 239 icframe_minor = 0; 240 241 icmsg_major = negop->icmsg_vercnt; 242 icmsg_minor = 0; 243 244 /* 245 * Select the framework version number we will 246 * support. 247 */ 248 249 for (i = 0; i < negop->icframe_vercnt; i++) { 250 if ((negop->icversion_data[i].major == fw_major) && 251 (negop->icversion_data[i].minor == fw_minor)) { 252 icframe_major = negop->icversion_data[i].major; 253 icframe_minor = negop->icversion_data[i].minor; 254 found_match = true; 255 } 256 } 257 258 if (!found_match) 259 goto fw_error; 260 261 found_match = false; 262 263 for (i = negop->icframe_vercnt; 264 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) { 265 if ((negop->icversion_data[i].major == srv_major) && 266 (negop->icversion_data[i].minor == srv_minor)) { 267 icmsg_major = negop->icversion_data[i].major; 268 icmsg_minor = negop->icversion_data[i].minor; 269 found_match = true; 270 } 271 } 272 273 /* 274 * Respond with the framework and service 275 * version numbers we can support. 276 */ 277 278 fw_error: 279 if (!found_match) { 280 negop->icframe_vercnt = 0; 281 negop->icmsg_vercnt = 0; 282 } else { 283 negop->icframe_vercnt = 1; 284 negop->icmsg_vercnt = 1; 285 } 286 287 negop->icversion_data[0].major = icframe_major; 288 negop->icversion_data[0].minor = icframe_minor; 289 negop->icversion_data[1].major = icmsg_major; 290 negop->icversion_data[1].minor = icmsg_minor; 291 return found_match; 292 } 293 294 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp); 295 296 /* 297 * alloc_channel - Allocate and initialize a vmbus channel object 298 */ 299 static struct vmbus_channel *alloc_channel(void) 300 { 301 struct vmbus_channel *channel; 302 303 channel = kzalloc(sizeof(*channel), GFP_ATOMIC); 304 if (!channel) 305 return NULL; 306 307 channel->acquire_ring_lock = true; 308 spin_lock_init(&channel->inbound_lock); 309 spin_lock_init(&channel->lock); 310 311 INIT_LIST_HEAD(&channel->sc_list); 312 INIT_LIST_HEAD(&channel->percpu_list); 313 314 return channel; 315 } 316 317 /* 318 * free_channel - Release the resources used by the vmbus channel object 319 */ 320 static void free_channel(struct vmbus_channel *channel) 321 { 322 kfree(channel); 323 } 324 325 static void percpu_channel_enq(void *arg) 326 { 327 struct vmbus_channel *channel = arg; 328 int cpu = smp_processor_id(); 329 330 list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]); 331 } 332 333 static void percpu_channel_deq(void *arg) 334 { 335 struct vmbus_channel *channel = arg; 336 337 list_del(&channel->percpu_list); 338 } 339 340 341 static void vmbus_release_relid(u32 relid) 342 { 343 struct vmbus_channel_relid_released msg; 344 345 memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); 346 msg.child_relid = relid; 347 msg.header.msgtype = CHANNELMSG_RELID_RELEASED; 348 vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released), 349 true); 350 } 351 352 void hv_event_tasklet_disable(struct vmbus_channel *channel) 353 { 354 struct tasklet_struct *tasklet; 355 tasklet = hv_context.event_dpc[channel->target_cpu]; 356 tasklet_disable(tasklet); 357 } 358 359 void hv_event_tasklet_enable(struct vmbus_channel *channel) 360 { 361 struct tasklet_struct *tasklet; 362 tasklet = hv_context.event_dpc[channel->target_cpu]; 363 tasklet_enable(tasklet); 364 365 /* In case there is any pending event */ 366 tasklet_schedule(tasklet); 367 } 368 369 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) 370 { 371 unsigned long flags; 372 struct vmbus_channel *primary_channel; 373 374 BUG_ON(!channel->rescind); 375 BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); 376 377 hv_event_tasklet_disable(channel); 378 if (channel->target_cpu != get_cpu()) { 379 put_cpu(); 380 smp_call_function_single(channel->target_cpu, 381 percpu_channel_deq, channel, true); 382 } else { 383 percpu_channel_deq(channel); 384 put_cpu(); 385 } 386 hv_event_tasklet_enable(channel); 387 388 if (channel->primary_channel == NULL) { 389 list_del(&channel->listentry); 390 391 primary_channel = channel; 392 } else { 393 primary_channel = channel->primary_channel; 394 spin_lock_irqsave(&primary_channel->lock, flags); 395 list_del(&channel->sc_list); 396 primary_channel->num_sc--; 397 spin_unlock_irqrestore(&primary_channel->lock, flags); 398 } 399 400 /* 401 * We need to free the bit for init_vp_index() to work in the case 402 * of sub-channel, when we reload drivers like hv_netvsc. 403 */ 404 if (channel->affinity_policy == HV_LOCALIZED) 405 cpumask_clear_cpu(channel->target_cpu, 406 &primary_channel->alloced_cpus_in_node); 407 408 vmbus_release_relid(relid); 409 410 free_channel(channel); 411 } 412 413 void vmbus_free_channels(void) 414 { 415 struct vmbus_channel *channel, *tmp; 416 417 mutex_lock(&vmbus_connection.channel_mutex); 418 list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list, 419 listentry) { 420 /* hv_process_channel_removal() needs this */ 421 channel->rescind = true; 422 423 vmbus_device_unregister(channel->device_obj); 424 } 425 mutex_unlock(&vmbus_connection.channel_mutex); 426 } 427 428 /* 429 * vmbus_process_offer - Process the offer by creating a channel/device 430 * associated with this offer 431 */ 432 static void vmbus_process_offer(struct vmbus_channel *newchannel) 433 { 434 struct vmbus_channel *channel; 435 bool fnew = true; 436 unsigned long flags; 437 u16 dev_type; 438 int ret; 439 440 /* Make sure this is a new offer */ 441 mutex_lock(&vmbus_connection.channel_mutex); 442 443 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 444 if (!uuid_le_cmp(channel->offermsg.offer.if_type, 445 newchannel->offermsg.offer.if_type) && 446 !uuid_le_cmp(channel->offermsg.offer.if_instance, 447 newchannel->offermsg.offer.if_instance)) { 448 fnew = false; 449 break; 450 } 451 } 452 453 if (fnew) 454 list_add_tail(&newchannel->listentry, 455 &vmbus_connection.chn_list); 456 457 mutex_unlock(&vmbus_connection.channel_mutex); 458 459 if (!fnew) { 460 /* 461 * Check to see if this is a sub-channel. 462 */ 463 if (newchannel->offermsg.offer.sub_channel_index != 0) { 464 /* 465 * Process the sub-channel. 466 */ 467 newchannel->primary_channel = channel; 468 spin_lock_irqsave(&channel->lock, flags); 469 list_add_tail(&newchannel->sc_list, &channel->sc_list); 470 channel->num_sc++; 471 spin_unlock_irqrestore(&channel->lock, flags); 472 } else 473 goto err_free_chan; 474 } 475 476 dev_type = hv_get_dev_type(newchannel); 477 478 init_vp_index(newchannel, dev_type); 479 480 hv_event_tasklet_disable(newchannel); 481 if (newchannel->target_cpu != get_cpu()) { 482 put_cpu(); 483 smp_call_function_single(newchannel->target_cpu, 484 percpu_channel_enq, 485 newchannel, true); 486 } else { 487 percpu_channel_enq(newchannel); 488 put_cpu(); 489 } 490 hv_event_tasklet_enable(newchannel); 491 492 /* 493 * This state is used to indicate a successful open 494 * so that when we do close the channel normally, we 495 * can cleanup properly 496 */ 497 newchannel->state = CHANNEL_OPEN_STATE; 498 499 if (!fnew) { 500 if (channel->sc_creation_callback != NULL) 501 channel->sc_creation_callback(newchannel); 502 return; 503 } 504 505 /* 506 * Start the process of binding this offer to the driver 507 * We need to set the DeviceObject field before calling 508 * vmbus_child_dev_add() 509 */ 510 newchannel->device_obj = vmbus_device_create( 511 &newchannel->offermsg.offer.if_type, 512 &newchannel->offermsg.offer.if_instance, 513 newchannel); 514 if (!newchannel->device_obj) 515 goto err_deq_chan; 516 517 newchannel->device_obj->device_id = dev_type; 518 /* 519 * Add the new device to the bus. This will kick off device-driver 520 * binding which eventually invokes the device driver's AddDevice() 521 * method. 522 */ 523 mutex_lock(&vmbus_connection.channel_mutex); 524 ret = vmbus_device_register(newchannel->device_obj); 525 mutex_unlock(&vmbus_connection.channel_mutex); 526 527 if (ret != 0) { 528 pr_err("unable to add child device object (relid %d)\n", 529 newchannel->offermsg.child_relid); 530 kfree(newchannel->device_obj); 531 goto err_deq_chan; 532 } 533 return; 534 535 err_deq_chan: 536 mutex_lock(&vmbus_connection.channel_mutex); 537 list_del(&newchannel->listentry); 538 mutex_unlock(&vmbus_connection.channel_mutex); 539 540 hv_event_tasklet_disable(newchannel); 541 if (newchannel->target_cpu != get_cpu()) { 542 put_cpu(); 543 smp_call_function_single(newchannel->target_cpu, 544 percpu_channel_deq, newchannel, true); 545 } else { 546 percpu_channel_deq(newchannel); 547 put_cpu(); 548 } 549 hv_event_tasklet_enable(newchannel); 550 551 vmbus_release_relid(newchannel->offermsg.child_relid); 552 553 err_free_chan: 554 free_channel(newchannel); 555 } 556 557 /* 558 * We use this state to statically distribute the channel interrupt load. 559 */ 560 static int next_numa_node_id; 561 562 /* 563 * Starting with Win8, we can statically distribute the incoming 564 * channel interrupt load by binding a channel to VCPU. 565 * We do this in a hierarchical fashion: 566 * First distribute the primary channels across available NUMA nodes 567 * and then distribute the subchannels amongst the CPUs in the NUMA 568 * node assigned to the primary channel. 569 * 570 * For pre-win8 hosts or non-performance critical channels we assign the 571 * first CPU in the first NUMA node. 572 */ 573 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) 574 { 575 u32 cur_cpu; 576 bool perf_chn = vmbus_devs[dev_type].perf_device; 577 struct vmbus_channel *primary = channel->primary_channel; 578 int next_node; 579 struct cpumask available_mask; 580 struct cpumask *alloced_mask; 581 582 if ((vmbus_proto_version == VERSION_WS2008) || 583 (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) { 584 /* 585 * Prior to win8, all channel interrupts are 586 * delivered on cpu 0. 587 * Also if the channel is not a performance critical 588 * channel, bind it to cpu 0. 589 */ 590 channel->numa_node = 0; 591 channel->target_cpu = 0; 592 channel->target_vp = hv_context.vp_index[0]; 593 return; 594 } 595 596 /* 597 * Based on the channel affinity policy, we will assign the NUMA 598 * nodes. 599 */ 600 601 if ((channel->affinity_policy == HV_BALANCED) || (!primary)) { 602 while (true) { 603 next_node = next_numa_node_id++; 604 if (next_node == nr_node_ids) { 605 next_node = next_numa_node_id = 0; 606 continue; 607 } 608 if (cpumask_empty(cpumask_of_node(next_node))) 609 continue; 610 break; 611 } 612 channel->numa_node = next_node; 613 primary = channel; 614 } 615 alloced_mask = &hv_context.hv_numa_map[primary->numa_node]; 616 617 if (cpumask_weight(alloced_mask) == 618 cpumask_weight(cpumask_of_node(primary->numa_node))) { 619 /* 620 * We have cycled through all the CPUs in the node; 621 * reset the alloced map. 622 */ 623 cpumask_clear(alloced_mask); 624 } 625 626 cpumask_xor(&available_mask, alloced_mask, 627 cpumask_of_node(primary->numa_node)); 628 629 cur_cpu = -1; 630 631 if (primary->affinity_policy == HV_LOCALIZED) { 632 /* 633 * Normally Hyper-V host doesn't create more subchannels 634 * than there are VCPUs on the node but it is possible when not 635 * all present VCPUs on the node are initialized by guest. 636 * Clear the alloced_cpus_in_node to start over. 637 */ 638 if (cpumask_equal(&primary->alloced_cpus_in_node, 639 cpumask_of_node(primary->numa_node))) 640 cpumask_clear(&primary->alloced_cpus_in_node); 641 } 642 643 while (true) { 644 cur_cpu = cpumask_next(cur_cpu, &available_mask); 645 if (cur_cpu >= nr_cpu_ids) { 646 cur_cpu = -1; 647 cpumask_copy(&available_mask, 648 cpumask_of_node(primary->numa_node)); 649 continue; 650 } 651 652 if (primary->affinity_policy == HV_LOCALIZED) { 653 /* 654 * NOTE: in the case of sub-channel, we clear the 655 * sub-channel related bit(s) in 656 * primary->alloced_cpus_in_node in 657 * hv_process_channel_removal(), so when we 658 * reload drivers like hv_netvsc in SMP guest, here 659 * we're able to re-allocate 660 * bit from primary->alloced_cpus_in_node. 661 */ 662 if (!cpumask_test_cpu(cur_cpu, 663 &primary->alloced_cpus_in_node)) { 664 cpumask_set_cpu(cur_cpu, 665 &primary->alloced_cpus_in_node); 666 cpumask_set_cpu(cur_cpu, alloced_mask); 667 break; 668 } 669 } else { 670 cpumask_set_cpu(cur_cpu, alloced_mask); 671 break; 672 } 673 } 674 675 channel->target_cpu = cur_cpu; 676 channel->target_vp = hv_context.vp_index[cur_cpu]; 677 } 678 679 static void vmbus_wait_for_unload(void) 680 { 681 int cpu; 682 void *page_addr; 683 struct hv_message *msg; 684 struct vmbus_channel_message_header *hdr; 685 u32 message_type; 686 687 /* 688 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was 689 * used for initial contact or to CPU0 depending on host version. When 690 * we're crashing on a different CPU let's hope that IRQ handler on 691 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still 692 * functional and vmbus_unload_response() will complete 693 * vmbus_connection.unload_event. If not, the last thing we can do is 694 * read message pages for all CPUs directly. 695 */ 696 while (1) { 697 if (completion_done(&vmbus_connection.unload_event)) 698 break; 699 700 for_each_online_cpu(cpu) { 701 page_addr = hv_context.synic_message_page[cpu]; 702 msg = (struct hv_message *)page_addr + 703 VMBUS_MESSAGE_SINT; 704 705 message_type = READ_ONCE(msg->header.message_type); 706 if (message_type == HVMSG_NONE) 707 continue; 708 709 hdr = (struct vmbus_channel_message_header *) 710 msg->u.payload; 711 712 if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE) 713 complete(&vmbus_connection.unload_event); 714 715 vmbus_signal_eom(msg, message_type); 716 } 717 718 mdelay(10); 719 } 720 721 /* 722 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all 723 * maybe-pending messages on all CPUs to be able to receive new 724 * messages after we reconnect. 725 */ 726 for_each_online_cpu(cpu) { 727 page_addr = hv_context.synic_message_page[cpu]; 728 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 729 msg->header.message_type = HVMSG_NONE; 730 } 731 } 732 733 /* 734 * vmbus_unload_response - Handler for the unload response. 735 */ 736 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) 737 { 738 /* 739 * This is a global event; just wakeup the waiting thread. 740 * Once we successfully unload, we can cleanup the monitor state. 741 */ 742 complete(&vmbus_connection.unload_event); 743 } 744 745 void vmbus_initiate_unload(bool crash) 746 { 747 struct vmbus_channel_message_header hdr; 748 749 /* Pre-Win2012R2 hosts don't support reconnect */ 750 if (vmbus_proto_version < VERSION_WIN8_1) 751 return; 752 753 init_completion(&vmbus_connection.unload_event); 754 memset(&hdr, 0, sizeof(struct vmbus_channel_message_header)); 755 hdr.msgtype = CHANNELMSG_UNLOAD; 756 vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header), 757 !crash); 758 759 /* 760 * vmbus_initiate_unload() is also called on crash and the crash can be 761 * happening in an interrupt context, where scheduling is impossible. 762 */ 763 if (!crash) 764 wait_for_completion(&vmbus_connection.unload_event); 765 else 766 vmbus_wait_for_unload(); 767 } 768 769 /* 770 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. 771 * 772 */ 773 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) 774 { 775 struct vmbus_channel_offer_channel *offer; 776 struct vmbus_channel *newchannel; 777 778 offer = (struct vmbus_channel_offer_channel *)hdr; 779 780 /* Allocate the channel object and save this offer. */ 781 newchannel = alloc_channel(); 782 if (!newchannel) { 783 pr_err("Unable to allocate channel object\n"); 784 return; 785 } 786 787 /* 788 * By default we setup state to enable batched 789 * reading. A specific service can choose to 790 * disable this prior to opening the channel. 791 */ 792 newchannel->batched_reading = true; 793 794 /* 795 * Setup state for signalling the host. 796 */ 797 newchannel->sig_event = (struct hv_input_signal_event *) 798 (ALIGN((unsigned long) 799 &newchannel->sig_buf, 800 HV_HYPERCALL_PARAM_ALIGN)); 801 802 newchannel->sig_event->connectionid.asu32 = 0; 803 newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID; 804 newchannel->sig_event->flag_number = 0; 805 newchannel->sig_event->rsvdz = 0; 806 807 if (vmbus_proto_version != VERSION_WS2008) { 808 newchannel->is_dedicated_interrupt = 809 (offer->is_dedicated_interrupt != 0); 810 newchannel->sig_event->connectionid.u.id = 811 offer->connection_id; 812 } 813 814 memcpy(&newchannel->offermsg, offer, 815 sizeof(struct vmbus_channel_offer_channel)); 816 newchannel->monitor_grp = (u8)offer->monitorid / 32; 817 newchannel->monitor_bit = (u8)offer->monitorid % 32; 818 819 vmbus_process_offer(newchannel); 820 } 821 822 /* 823 * vmbus_onoffer_rescind - Rescind offer handler. 824 * 825 * We queue a work item to process this offer synchronously 826 */ 827 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) 828 { 829 struct vmbus_channel_rescind_offer *rescind; 830 struct vmbus_channel *channel; 831 unsigned long flags; 832 struct device *dev; 833 834 rescind = (struct vmbus_channel_rescind_offer *)hdr; 835 836 mutex_lock(&vmbus_connection.channel_mutex); 837 channel = relid2channel(rescind->child_relid); 838 839 if (channel == NULL) { 840 /* 841 * This is very impossible, because in 842 * vmbus_process_offer(), we have already invoked 843 * vmbus_release_relid() on error. 844 */ 845 goto out; 846 } 847 848 spin_lock_irqsave(&channel->lock, flags); 849 channel->rescind = true; 850 spin_unlock_irqrestore(&channel->lock, flags); 851 852 vmbus_rescind_cleanup(channel); 853 854 if (channel->device_obj) { 855 if (channel->chn_rescind_callback) { 856 channel->chn_rescind_callback(channel); 857 goto out; 858 } 859 /* 860 * We will have to unregister this device from the 861 * driver core. 862 */ 863 dev = get_device(&channel->device_obj->device); 864 if (dev) { 865 vmbus_device_unregister(channel->device_obj); 866 put_device(dev); 867 } 868 } else { 869 hv_process_channel_removal(channel, 870 channel->offermsg.child_relid); 871 } 872 873 out: 874 mutex_unlock(&vmbus_connection.channel_mutex); 875 } 876 877 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) 878 { 879 mutex_lock(&vmbus_connection.channel_mutex); 880 881 BUG_ON(!is_hvsock_channel(channel)); 882 883 channel->rescind = true; 884 vmbus_device_unregister(channel->device_obj); 885 886 mutex_unlock(&vmbus_connection.channel_mutex); 887 } 888 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); 889 890 891 /* 892 * vmbus_onoffers_delivered - 893 * This is invoked when all offers have been delivered. 894 * 895 * Nothing to do here. 896 */ 897 static void vmbus_onoffers_delivered( 898 struct vmbus_channel_message_header *hdr) 899 { 900 } 901 902 /* 903 * vmbus_onopen_result - Open result handler. 904 * 905 * This is invoked when we received a response to our channel open request. 906 * Find the matching request, copy the response and signal the requesting 907 * thread. 908 */ 909 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr) 910 { 911 struct vmbus_channel_open_result *result; 912 struct vmbus_channel_msginfo *msginfo; 913 struct vmbus_channel_message_header *requestheader; 914 struct vmbus_channel_open_channel *openmsg; 915 unsigned long flags; 916 917 result = (struct vmbus_channel_open_result *)hdr; 918 919 /* 920 * Find the open msg, copy the result and signal/unblock the wait event 921 */ 922 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 923 924 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 925 msglistentry) { 926 requestheader = 927 (struct vmbus_channel_message_header *)msginfo->msg; 928 929 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) { 930 openmsg = 931 (struct vmbus_channel_open_channel *)msginfo->msg; 932 if (openmsg->child_relid == result->child_relid && 933 openmsg->openid == result->openid) { 934 memcpy(&msginfo->response.open_result, 935 result, 936 sizeof( 937 struct vmbus_channel_open_result)); 938 complete(&msginfo->waitevent); 939 break; 940 } 941 } 942 } 943 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 944 } 945 946 /* 947 * vmbus_ongpadl_created - GPADL created handler. 948 * 949 * This is invoked when we received a response to our gpadl create request. 950 * Find the matching request, copy the response and signal the requesting 951 * thread. 952 */ 953 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr) 954 { 955 struct vmbus_channel_gpadl_created *gpadlcreated; 956 struct vmbus_channel_msginfo *msginfo; 957 struct vmbus_channel_message_header *requestheader; 958 struct vmbus_channel_gpadl_header *gpadlheader; 959 unsigned long flags; 960 961 gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr; 962 963 /* 964 * Find the establish msg, copy the result and signal/unblock the wait 965 * event 966 */ 967 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 968 969 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 970 msglistentry) { 971 requestheader = 972 (struct vmbus_channel_message_header *)msginfo->msg; 973 974 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) { 975 gpadlheader = 976 (struct vmbus_channel_gpadl_header *)requestheader; 977 978 if ((gpadlcreated->child_relid == 979 gpadlheader->child_relid) && 980 (gpadlcreated->gpadl == gpadlheader->gpadl)) { 981 memcpy(&msginfo->response.gpadl_created, 982 gpadlcreated, 983 sizeof( 984 struct vmbus_channel_gpadl_created)); 985 complete(&msginfo->waitevent); 986 break; 987 } 988 } 989 } 990 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 991 } 992 993 /* 994 * vmbus_ongpadl_torndown - GPADL torndown handler. 995 * 996 * This is invoked when we received a response to our gpadl teardown request. 997 * Find the matching request, copy the response and signal the requesting 998 * thread. 999 */ 1000 static void vmbus_ongpadl_torndown( 1001 struct vmbus_channel_message_header *hdr) 1002 { 1003 struct vmbus_channel_gpadl_torndown *gpadl_torndown; 1004 struct vmbus_channel_msginfo *msginfo; 1005 struct vmbus_channel_message_header *requestheader; 1006 struct vmbus_channel_gpadl_teardown *gpadl_teardown; 1007 unsigned long flags; 1008 1009 gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr; 1010 1011 /* 1012 * Find the open msg, copy the result and signal/unblock the wait event 1013 */ 1014 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1015 1016 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1017 msglistentry) { 1018 requestheader = 1019 (struct vmbus_channel_message_header *)msginfo->msg; 1020 1021 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) { 1022 gpadl_teardown = 1023 (struct vmbus_channel_gpadl_teardown *)requestheader; 1024 1025 if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) { 1026 memcpy(&msginfo->response.gpadl_torndown, 1027 gpadl_torndown, 1028 sizeof( 1029 struct vmbus_channel_gpadl_torndown)); 1030 complete(&msginfo->waitevent); 1031 break; 1032 } 1033 } 1034 } 1035 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1036 } 1037 1038 /* 1039 * vmbus_onversion_response - Version response handler 1040 * 1041 * This is invoked when we received a response to our initiate contact request. 1042 * Find the matching request, copy the response and signal the requesting 1043 * thread. 1044 */ 1045 static void vmbus_onversion_response( 1046 struct vmbus_channel_message_header *hdr) 1047 { 1048 struct vmbus_channel_msginfo *msginfo; 1049 struct vmbus_channel_message_header *requestheader; 1050 struct vmbus_channel_version_response *version_response; 1051 unsigned long flags; 1052 1053 version_response = (struct vmbus_channel_version_response *)hdr; 1054 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 1055 1056 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 1057 msglistentry) { 1058 requestheader = 1059 (struct vmbus_channel_message_header *)msginfo->msg; 1060 1061 if (requestheader->msgtype == 1062 CHANNELMSG_INITIATE_CONTACT) { 1063 memcpy(&msginfo->response.version_response, 1064 version_response, 1065 sizeof(struct vmbus_channel_version_response)); 1066 complete(&msginfo->waitevent); 1067 } 1068 } 1069 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); 1070 } 1071 1072 /* Channel message dispatch table */ 1073 struct vmbus_channel_message_table_entry 1074 channel_message_table[CHANNELMSG_COUNT] = { 1075 {CHANNELMSG_INVALID, 0, NULL}, 1076 {CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer}, 1077 {CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind}, 1078 {CHANNELMSG_REQUESTOFFERS, 0, NULL}, 1079 {CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered}, 1080 {CHANNELMSG_OPENCHANNEL, 0, NULL}, 1081 {CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result}, 1082 {CHANNELMSG_CLOSECHANNEL, 0, NULL}, 1083 {CHANNELMSG_GPADL_HEADER, 0, NULL}, 1084 {CHANNELMSG_GPADL_BODY, 0, NULL}, 1085 {CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created}, 1086 {CHANNELMSG_GPADL_TEARDOWN, 0, NULL}, 1087 {CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown}, 1088 {CHANNELMSG_RELID_RELEASED, 0, NULL}, 1089 {CHANNELMSG_INITIATE_CONTACT, 0, NULL}, 1090 {CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response}, 1091 {CHANNELMSG_UNLOAD, 0, NULL}, 1092 {CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response}, 1093 {CHANNELMSG_18, 0, NULL}, 1094 {CHANNELMSG_19, 0, NULL}, 1095 {CHANNELMSG_20, 0, NULL}, 1096 {CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL}, 1097 }; 1098 1099 /* 1100 * vmbus_onmessage - Handler for channel protocol messages. 1101 * 1102 * This is invoked in the vmbus worker thread context. 1103 */ 1104 void vmbus_onmessage(void *context) 1105 { 1106 struct hv_message *msg = context; 1107 struct vmbus_channel_message_header *hdr; 1108 int size; 1109 1110 hdr = (struct vmbus_channel_message_header *)msg->u.payload; 1111 size = msg->header.payload_size; 1112 1113 if (hdr->msgtype >= CHANNELMSG_COUNT) { 1114 pr_err("Received invalid channel message type %d size %d\n", 1115 hdr->msgtype, size); 1116 print_hex_dump_bytes("", DUMP_PREFIX_NONE, 1117 (unsigned char *)msg->u.payload, size); 1118 return; 1119 } 1120 1121 if (channel_message_table[hdr->msgtype].message_handler) 1122 channel_message_table[hdr->msgtype].message_handler(hdr); 1123 else 1124 pr_err("Unhandled channel message type %d\n", hdr->msgtype); 1125 } 1126 1127 /* 1128 * vmbus_request_offers - Send a request to get all our pending offers. 1129 */ 1130 int vmbus_request_offers(void) 1131 { 1132 struct vmbus_channel_message_header *msg; 1133 struct vmbus_channel_msginfo *msginfo; 1134 int ret; 1135 1136 msginfo = kmalloc(sizeof(*msginfo) + 1137 sizeof(struct vmbus_channel_message_header), 1138 GFP_KERNEL); 1139 if (!msginfo) 1140 return -ENOMEM; 1141 1142 msg = (struct vmbus_channel_message_header *)msginfo->msg; 1143 1144 msg->msgtype = CHANNELMSG_REQUESTOFFERS; 1145 1146 1147 ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header), 1148 true); 1149 if (ret != 0) { 1150 pr_err("Unable to request offers - %d\n", ret); 1151 1152 goto cleanup; 1153 } 1154 1155 cleanup: 1156 kfree(msginfo); 1157 1158 return ret; 1159 } 1160 1161 /* 1162 * Retrieve the (sub) channel on which to send an outgoing request. 1163 * When a primary channel has multiple sub-channels, we try to 1164 * distribute the load equally amongst all available channels. 1165 */ 1166 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary) 1167 { 1168 struct list_head *cur, *tmp; 1169 int cur_cpu; 1170 struct vmbus_channel *cur_channel; 1171 struct vmbus_channel *outgoing_channel = primary; 1172 int next_channel; 1173 int i = 1; 1174 1175 if (list_empty(&primary->sc_list)) 1176 return outgoing_channel; 1177 1178 next_channel = primary->next_oc++; 1179 1180 if (next_channel > (primary->num_sc)) { 1181 primary->next_oc = 0; 1182 return outgoing_channel; 1183 } 1184 1185 cur_cpu = hv_context.vp_index[get_cpu()]; 1186 put_cpu(); 1187 list_for_each_safe(cur, tmp, &primary->sc_list) { 1188 cur_channel = list_entry(cur, struct vmbus_channel, sc_list); 1189 if (cur_channel->state != CHANNEL_OPENED_STATE) 1190 continue; 1191 1192 if (cur_channel->target_vp == cur_cpu) 1193 return cur_channel; 1194 1195 if (i == next_channel) 1196 return cur_channel; 1197 1198 i++; 1199 } 1200 1201 return outgoing_channel; 1202 } 1203 EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel); 1204 1205 static void invoke_sc_cb(struct vmbus_channel *primary_channel) 1206 { 1207 struct list_head *cur, *tmp; 1208 struct vmbus_channel *cur_channel; 1209 1210 if (primary_channel->sc_creation_callback == NULL) 1211 return; 1212 1213 list_for_each_safe(cur, tmp, &primary_channel->sc_list) { 1214 cur_channel = list_entry(cur, struct vmbus_channel, sc_list); 1215 1216 primary_channel->sc_creation_callback(cur_channel); 1217 } 1218 } 1219 1220 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, 1221 void (*sc_cr_cb)(struct vmbus_channel *new_sc)) 1222 { 1223 primary_channel->sc_creation_callback = sc_cr_cb; 1224 } 1225 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback); 1226 1227 bool vmbus_are_subchannels_present(struct vmbus_channel *primary) 1228 { 1229 bool ret; 1230 1231 ret = !list_empty(&primary->sc_list); 1232 1233 if (ret) { 1234 /* 1235 * Invoke the callback on sub-channel creation. 1236 * This will present a uniform interface to the 1237 * clients. 1238 */ 1239 invoke_sc_cb(primary); 1240 } 1241 1242 return ret; 1243 } 1244 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present); 1245 1246 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, 1247 void (*chn_rescind_cb)(struct vmbus_channel *)) 1248 { 1249 channel->chn_rescind_callback = chn_rescind_cb; 1250 } 1251 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); 1252