1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 * K. Y. Srinivasan <kys@microsoft.com> 9 */ 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/device.h> 15 #include <linux/interrupt.h> 16 #include <linux/sysctl.h> 17 #include <linux/slab.h> 18 #include <linux/acpi.h> 19 #include <linux/completion.h> 20 #include <linux/hyperv.h> 21 #include <linux/kernel_stat.h> 22 #include <linux/clockchips.h> 23 #include <linux/cpu.h> 24 #include <linux/sched/task_stack.h> 25 26 #include <linux/delay.h> 27 #include <linux/notifier.h> 28 #include <linux/ptrace.h> 29 #include <linux/screen_info.h> 30 #include <linux/kdebug.h> 31 #include <linux/efi.h> 32 #include <linux/random.h> 33 #include <linux/kernel.h> 34 #include <linux/syscore_ops.h> 35 #include <clocksource/hyperv_timer.h> 36 #include "hyperv_vmbus.h" 37 38 struct vmbus_dynid { 39 struct list_head node; 40 struct hv_vmbus_device_id id; 41 }; 42 43 static struct acpi_device *hv_acpi_dev; 44 45 static struct completion probe_event; 46 47 static int hyperv_cpuhp_online; 48 49 static void *hv_panic_page; 50 51 /* Values parsed from ACPI DSDT */ 52 static int vmbus_irq; 53 int vmbus_interrupt; 54 55 /* 56 * Boolean to control whether to report panic messages over Hyper-V. 57 * 58 * It can be set via /proc/sys/kernel/hyperv/record_panic_msg 59 */ 60 static int sysctl_record_panic_msg = 1; 61 62 static int hyperv_report_reg(void) 63 { 64 return !sysctl_record_panic_msg || !hv_panic_page; 65 } 66 67 static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, 68 void *args) 69 { 70 struct pt_regs *regs; 71 72 vmbus_initiate_unload(true); 73 74 /* 75 * Hyper-V should be notified only once about a panic. If we will be 76 * doing hyperv_report_panic_msg() later with kmsg data, don't do 77 * the notification here. 78 */ 79 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 80 && hyperv_report_reg()) { 81 regs = current_pt_regs(); 82 hyperv_report_panic(regs, val, false); 83 } 84 return NOTIFY_DONE; 85 } 86 87 static int hyperv_die_event(struct notifier_block *nb, unsigned long val, 88 void *args) 89 { 90 struct die_args *die = args; 91 struct pt_regs *regs = die->regs; 92 93 /* Don't notify Hyper-V if the die event is other than oops */ 94 if (val != DIE_OOPS) 95 return NOTIFY_DONE; 96 97 /* 98 * Hyper-V should be notified only once about a panic. If we will be 99 * doing hyperv_report_panic_msg() later with kmsg data, don't do 100 * the notification here. 101 */ 102 if (hyperv_report_reg()) 103 hyperv_report_panic(regs, val, true); 104 return NOTIFY_DONE; 105 } 106 107 static struct notifier_block hyperv_die_block = { 108 .notifier_call = hyperv_die_event, 109 }; 110 static struct notifier_block hyperv_panic_block = { 111 .notifier_call = hyperv_panic_event, 112 }; 113 114 static const char *fb_mmio_name = "fb_range"; 115 static struct resource *fb_mmio; 116 static struct resource *hyperv_mmio; 117 static DEFINE_MUTEX(hyperv_mmio_lock); 118 119 static int vmbus_exists(void) 120 { 121 if (hv_acpi_dev == NULL) 122 return -ENODEV; 123 124 return 0; 125 } 126 127 static u8 channel_monitor_group(const struct vmbus_channel *channel) 128 { 129 return (u8)channel->offermsg.monitorid / 32; 130 } 131 132 static u8 channel_monitor_offset(const struct vmbus_channel *channel) 133 { 134 return (u8)channel->offermsg.monitorid % 32; 135 } 136 137 static u32 channel_pending(const struct vmbus_channel *channel, 138 const struct hv_monitor_page *monitor_page) 139 { 140 u8 monitor_group = channel_monitor_group(channel); 141 142 return monitor_page->trigger_group[monitor_group].pending; 143 } 144 145 static u32 channel_latency(const struct vmbus_channel *channel, 146 const struct hv_monitor_page *monitor_page) 147 { 148 u8 monitor_group = channel_monitor_group(channel); 149 u8 monitor_offset = channel_monitor_offset(channel); 150 151 return monitor_page->latency[monitor_group][monitor_offset]; 152 } 153 154 static u32 channel_conn_id(struct vmbus_channel *channel, 155 struct hv_monitor_page *monitor_page) 156 { 157 u8 monitor_group = channel_monitor_group(channel); 158 u8 monitor_offset = channel_monitor_offset(channel); 159 return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; 160 } 161 162 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, 163 char *buf) 164 { 165 struct hv_device *hv_dev = device_to_hv_device(dev); 166 167 if (!hv_dev->channel) 168 return -ENODEV; 169 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); 170 } 171 static DEVICE_ATTR_RO(id); 172 173 static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, 174 char *buf) 175 { 176 struct hv_device *hv_dev = device_to_hv_device(dev); 177 178 if (!hv_dev->channel) 179 return -ENODEV; 180 return sprintf(buf, "%d\n", hv_dev->channel->state); 181 } 182 static DEVICE_ATTR_RO(state); 183 184 static ssize_t monitor_id_show(struct device *dev, 185 struct device_attribute *dev_attr, char *buf) 186 { 187 struct hv_device *hv_dev = device_to_hv_device(dev); 188 189 if (!hv_dev->channel) 190 return -ENODEV; 191 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid); 192 } 193 static DEVICE_ATTR_RO(monitor_id); 194 195 static ssize_t class_id_show(struct device *dev, 196 struct device_attribute *dev_attr, char *buf) 197 { 198 struct hv_device *hv_dev = device_to_hv_device(dev); 199 200 if (!hv_dev->channel) 201 return -ENODEV; 202 return sprintf(buf, "{%pUl}\n", 203 &hv_dev->channel->offermsg.offer.if_type); 204 } 205 static DEVICE_ATTR_RO(class_id); 206 207 static ssize_t device_id_show(struct device *dev, 208 struct device_attribute *dev_attr, char *buf) 209 { 210 struct hv_device *hv_dev = device_to_hv_device(dev); 211 212 if (!hv_dev->channel) 213 return -ENODEV; 214 return sprintf(buf, "{%pUl}\n", 215 &hv_dev->channel->offermsg.offer.if_instance); 216 } 217 static DEVICE_ATTR_RO(device_id); 218 219 static ssize_t modalias_show(struct device *dev, 220 struct device_attribute *dev_attr, char *buf) 221 { 222 struct hv_device *hv_dev = device_to_hv_device(dev); 223 224 return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type); 225 } 226 static DEVICE_ATTR_RO(modalias); 227 228 #ifdef CONFIG_NUMA 229 static ssize_t numa_node_show(struct device *dev, 230 struct device_attribute *attr, char *buf) 231 { 232 struct hv_device *hv_dev = device_to_hv_device(dev); 233 234 if (!hv_dev->channel) 235 return -ENODEV; 236 237 return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu)); 238 } 239 static DEVICE_ATTR_RO(numa_node); 240 #endif 241 242 static ssize_t server_monitor_pending_show(struct device *dev, 243 struct device_attribute *dev_attr, 244 char *buf) 245 { 246 struct hv_device *hv_dev = device_to_hv_device(dev); 247 248 if (!hv_dev->channel) 249 return -ENODEV; 250 return sprintf(buf, "%d\n", 251 channel_pending(hv_dev->channel, 252 vmbus_connection.monitor_pages[0])); 253 } 254 static DEVICE_ATTR_RO(server_monitor_pending); 255 256 static ssize_t client_monitor_pending_show(struct device *dev, 257 struct device_attribute *dev_attr, 258 char *buf) 259 { 260 struct hv_device *hv_dev = device_to_hv_device(dev); 261 262 if (!hv_dev->channel) 263 return -ENODEV; 264 return sprintf(buf, "%d\n", 265 channel_pending(hv_dev->channel, 266 vmbus_connection.monitor_pages[1])); 267 } 268 static DEVICE_ATTR_RO(client_monitor_pending); 269 270 static ssize_t server_monitor_latency_show(struct device *dev, 271 struct device_attribute *dev_attr, 272 char *buf) 273 { 274 struct hv_device *hv_dev = device_to_hv_device(dev); 275 276 if (!hv_dev->channel) 277 return -ENODEV; 278 return sprintf(buf, "%d\n", 279 channel_latency(hv_dev->channel, 280 vmbus_connection.monitor_pages[0])); 281 } 282 static DEVICE_ATTR_RO(server_monitor_latency); 283 284 static ssize_t client_monitor_latency_show(struct device *dev, 285 struct device_attribute *dev_attr, 286 char *buf) 287 { 288 struct hv_device *hv_dev = device_to_hv_device(dev); 289 290 if (!hv_dev->channel) 291 return -ENODEV; 292 return sprintf(buf, "%d\n", 293 channel_latency(hv_dev->channel, 294 vmbus_connection.monitor_pages[1])); 295 } 296 static DEVICE_ATTR_RO(client_monitor_latency); 297 298 static ssize_t server_monitor_conn_id_show(struct device *dev, 299 struct device_attribute *dev_attr, 300 char *buf) 301 { 302 struct hv_device *hv_dev = device_to_hv_device(dev); 303 304 if (!hv_dev->channel) 305 return -ENODEV; 306 return sprintf(buf, "%d\n", 307 channel_conn_id(hv_dev->channel, 308 vmbus_connection.monitor_pages[0])); 309 } 310 static DEVICE_ATTR_RO(server_monitor_conn_id); 311 312 static ssize_t client_monitor_conn_id_show(struct device *dev, 313 struct device_attribute *dev_attr, 314 char *buf) 315 { 316 struct hv_device *hv_dev = device_to_hv_device(dev); 317 318 if (!hv_dev->channel) 319 return -ENODEV; 320 return sprintf(buf, "%d\n", 321 channel_conn_id(hv_dev->channel, 322 vmbus_connection.monitor_pages[1])); 323 } 324 static DEVICE_ATTR_RO(client_monitor_conn_id); 325 326 static ssize_t out_intr_mask_show(struct device *dev, 327 struct device_attribute *dev_attr, char *buf) 328 { 329 struct hv_device *hv_dev = device_to_hv_device(dev); 330 struct hv_ring_buffer_debug_info outbound; 331 int ret; 332 333 if (!hv_dev->channel) 334 return -ENODEV; 335 336 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 337 &outbound); 338 if (ret < 0) 339 return ret; 340 341 return sprintf(buf, "%d\n", outbound.current_interrupt_mask); 342 } 343 static DEVICE_ATTR_RO(out_intr_mask); 344 345 static ssize_t out_read_index_show(struct device *dev, 346 struct device_attribute *dev_attr, char *buf) 347 { 348 struct hv_device *hv_dev = device_to_hv_device(dev); 349 struct hv_ring_buffer_debug_info outbound; 350 int ret; 351 352 if (!hv_dev->channel) 353 return -ENODEV; 354 355 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 356 &outbound); 357 if (ret < 0) 358 return ret; 359 return sprintf(buf, "%d\n", outbound.current_read_index); 360 } 361 static DEVICE_ATTR_RO(out_read_index); 362 363 static ssize_t out_write_index_show(struct device *dev, 364 struct device_attribute *dev_attr, 365 char *buf) 366 { 367 struct hv_device *hv_dev = device_to_hv_device(dev); 368 struct hv_ring_buffer_debug_info outbound; 369 int ret; 370 371 if (!hv_dev->channel) 372 return -ENODEV; 373 374 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 375 &outbound); 376 if (ret < 0) 377 return ret; 378 return sprintf(buf, "%d\n", outbound.current_write_index); 379 } 380 static DEVICE_ATTR_RO(out_write_index); 381 382 static ssize_t out_read_bytes_avail_show(struct device *dev, 383 struct device_attribute *dev_attr, 384 char *buf) 385 { 386 struct hv_device *hv_dev = device_to_hv_device(dev); 387 struct hv_ring_buffer_debug_info outbound; 388 int ret; 389 390 if (!hv_dev->channel) 391 return -ENODEV; 392 393 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 394 &outbound); 395 if (ret < 0) 396 return ret; 397 return sprintf(buf, "%d\n", outbound.bytes_avail_toread); 398 } 399 static DEVICE_ATTR_RO(out_read_bytes_avail); 400 401 static ssize_t out_write_bytes_avail_show(struct device *dev, 402 struct device_attribute *dev_attr, 403 char *buf) 404 { 405 struct hv_device *hv_dev = device_to_hv_device(dev); 406 struct hv_ring_buffer_debug_info outbound; 407 int ret; 408 409 if (!hv_dev->channel) 410 return -ENODEV; 411 412 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 413 &outbound); 414 if (ret < 0) 415 return ret; 416 return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); 417 } 418 static DEVICE_ATTR_RO(out_write_bytes_avail); 419 420 static ssize_t in_intr_mask_show(struct device *dev, 421 struct device_attribute *dev_attr, char *buf) 422 { 423 struct hv_device *hv_dev = device_to_hv_device(dev); 424 struct hv_ring_buffer_debug_info inbound; 425 int ret; 426 427 if (!hv_dev->channel) 428 return -ENODEV; 429 430 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 431 if (ret < 0) 432 return ret; 433 434 return sprintf(buf, "%d\n", inbound.current_interrupt_mask); 435 } 436 static DEVICE_ATTR_RO(in_intr_mask); 437 438 static ssize_t in_read_index_show(struct device *dev, 439 struct device_attribute *dev_attr, char *buf) 440 { 441 struct hv_device *hv_dev = device_to_hv_device(dev); 442 struct hv_ring_buffer_debug_info inbound; 443 int ret; 444 445 if (!hv_dev->channel) 446 return -ENODEV; 447 448 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 449 if (ret < 0) 450 return ret; 451 452 return sprintf(buf, "%d\n", inbound.current_read_index); 453 } 454 static DEVICE_ATTR_RO(in_read_index); 455 456 static ssize_t in_write_index_show(struct device *dev, 457 struct device_attribute *dev_attr, char *buf) 458 { 459 struct hv_device *hv_dev = device_to_hv_device(dev); 460 struct hv_ring_buffer_debug_info inbound; 461 int ret; 462 463 if (!hv_dev->channel) 464 return -ENODEV; 465 466 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 467 if (ret < 0) 468 return ret; 469 470 return sprintf(buf, "%d\n", inbound.current_write_index); 471 } 472 static DEVICE_ATTR_RO(in_write_index); 473 474 static ssize_t in_read_bytes_avail_show(struct device *dev, 475 struct device_attribute *dev_attr, 476 char *buf) 477 { 478 struct hv_device *hv_dev = device_to_hv_device(dev); 479 struct hv_ring_buffer_debug_info inbound; 480 int ret; 481 482 if (!hv_dev->channel) 483 return -ENODEV; 484 485 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 486 if (ret < 0) 487 return ret; 488 489 return sprintf(buf, "%d\n", inbound.bytes_avail_toread); 490 } 491 static DEVICE_ATTR_RO(in_read_bytes_avail); 492 493 static ssize_t in_write_bytes_avail_show(struct device *dev, 494 struct device_attribute *dev_attr, 495 char *buf) 496 { 497 struct hv_device *hv_dev = device_to_hv_device(dev); 498 struct hv_ring_buffer_debug_info inbound; 499 int ret; 500 501 if (!hv_dev->channel) 502 return -ENODEV; 503 504 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 505 if (ret < 0) 506 return ret; 507 508 return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); 509 } 510 static DEVICE_ATTR_RO(in_write_bytes_avail); 511 512 static ssize_t channel_vp_mapping_show(struct device *dev, 513 struct device_attribute *dev_attr, 514 char *buf) 515 { 516 struct hv_device *hv_dev = device_to_hv_device(dev); 517 struct vmbus_channel *channel = hv_dev->channel, *cur_sc; 518 int buf_size = PAGE_SIZE, n_written, tot_written; 519 struct list_head *cur; 520 521 if (!channel) 522 return -ENODEV; 523 524 mutex_lock(&vmbus_connection.channel_mutex); 525 526 tot_written = snprintf(buf, buf_size, "%u:%u\n", 527 channel->offermsg.child_relid, channel->target_cpu); 528 529 list_for_each(cur, &channel->sc_list) { 530 if (tot_written >= buf_size - 1) 531 break; 532 533 cur_sc = list_entry(cur, struct vmbus_channel, sc_list); 534 n_written = scnprintf(buf + tot_written, 535 buf_size - tot_written, 536 "%u:%u\n", 537 cur_sc->offermsg.child_relid, 538 cur_sc->target_cpu); 539 tot_written += n_written; 540 } 541 542 mutex_unlock(&vmbus_connection.channel_mutex); 543 544 return tot_written; 545 } 546 static DEVICE_ATTR_RO(channel_vp_mapping); 547 548 static ssize_t vendor_show(struct device *dev, 549 struct device_attribute *dev_attr, 550 char *buf) 551 { 552 struct hv_device *hv_dev = device_to_hv_device(dev); 553 return sprintf(buf, "0x%x\n", hv_dev->vendor_id); 554 } 555 static DEVICE_ATTR_RO(vendor); 556 557 static ssize_t device_show(struct device *dev, 558 struct device_attribute *dev_attr, 559 char *buf) 560 { 561 struct hv_device *hv_dev = device_to_hv_device(dev); 562 return sprintf(buf, "0x%x\n", hv_dev->device_id); 563 } 564 static DEVICE_ATTR_RO(device); 565 566 static ssize_t driver_override_store(struct device *dev, 567 struct device_attribute *attr, 568 const char *buf, size_t count) 569 { 570 struct hv_device *hv_dev = device_to_hv_device(dev); 571 char *driver_override, *old, *cp; 572 573 /* We need to keep extra room for a newline */ 574 if (count >= (PAGE_SIZE - 1)) 575 return -EINVAL; 576 577 driver_override = kstrndup(buf, count, GFP_KERNEL); 578 if (!driver_override) 579 return -ENOMEM; 580 581 cp = strchr(driver_override, '\n'); 582 if (cp) 583 *cp = '\0'; 584 585 device_lock(dev); 586 old = hv_dev->driver_override; 587 if (strlen(driver_override)) { 588 hv_dev->driver_override = driver_override; 589 } else { 590 kfree(driver_override); 591 hv_dev->driver_override = NULL; 592 } 593 device_unlock(dev); 594 595 kfree(old); 596 597 return count; 598 } 599 600 static ssize_t driver_override_show(struct device *dev, 601 struct device_attribute *attr, char *buf) 602 { 603 struct hv_device *hv_dev = device_to_hv_device(dev); 604 ssize_t len; 605 606 device_lock(dev); 607 len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override); 608 device_unlock(dev); 609 610 return len; 611 } 612 static DEVICE_ATTR_RW(driver_override); 613 614 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ 615 static struct attribute *vmbus_dev_attrs[] = { 616 &dev_attr_id.attr, 617 &dev_attr_state.attr, 618 &dev_attr_monitor_id.attr, 619 &dev_attr_class_id.attr, 620 &dev_attr_device_id.attr, 621 &dev_attr_modalias.attr, 622 #ifdef CONFIG_NUMA 623 &dev_attr_numa_node.attr, 624 #endif 625 &dev_attr_server_monitor_pending.attr, 626 &dev_attr_client_monitor_pending.attr, 627 &dev_attr_server_monitor_latency.attr, 628 &dev_attr_client_monitor_latency.attr, 629 &dev_attr_server_monitor_conn_id.attr, 630 &dev_attr_client_monitor_conn_id.attr, 631 &dev_attr_out_intr_mask.attr, 632 &dev_attr_out_read_index.attr, 633 &dev_attr_out_write_index.attr, 634 &dev_attr_out_read_bytes_avail.attr, 635 &dev_attr_out_write_bytes_avail.attr, 636 &dev_attr_in_intr_mask.attr, 637 &dev_attr_in_read_index.attr, 638 &dev_attr_in_write_index.attr, 639 &dev_attr_in_read_bytes_avail.attr, 640 &dev_attr_in_write_bytes_avail.attr, 641 &dev_attr_channel_vp_mapping.attr, 642 &dev_attr_vendor.attr, 643 &dev_attr_device.attr, 644 &dev_attr_driver_override.attr, 645 NULL, 646 }; 647 648 /* 649 * Device-level attribute_group callback function. Returns the permission for 650 * each attribute, and returns 0 if an attribute is not visible. 651 */ 652 static umode_t vmbus_dev_attr_is_visible(struct kobject *kobj, 653 struct attribute *attr, int idx) 654 { 655 struct device *dev = kobj_to_dev(kobj); 656 const struct hv_device *hv_dev = device_to_hv_device(dev); 657 658 /* Hide the monitor attributes if the monitor mechanism is not used. */ 659 if (!hv_dev->channel->offermsg.monitor_allocated && 660 (attr == &dev_attr_monitor_id.attr || 661 attr == &dev_attr_server_monitor_pending.attr || 662 attr == &dev_attr_client_monitor_pending.attr || 663 attr == &dev_attr_server_monitor_latency.attr || 664 attr == &dev_attr_client_monitor_latency.attr || 665 attr == &dev_attr_server_monitor_conn_id.attr || 666 attr == &dev_attr_client_monitor_conn_id.attr)) 667 return 0; 668 669 return attr->mode; 670 } 671 672 static const struct attribute_group vmbus_dev_group = { 673 .attrs = vmbus_dev_attrs, 674 .is_visible = vmbus_dev_attr_is_visible 675 }; 676 __ATTRIBUTE_GROUPS(vmbus_dev); 677 678 /* 679 * vmbus_uevent - add uevent for our device 680 * 681 * This routine is invoked when a device is added or removed on the vmbus to 682 * generate a uevent to udev in the userspace. The udev will then look at its 683 * rule and the uevent generated here to load the appropriate driver 684 * 685 * The alias string will be of the form vmbus:guid where guid is the string 686 * representation of the device guid (each byte of the guid will be 687 * represented with two hex characters. 688 */ 689 static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) 690 { 691 struct hv_device *dev = device_to_hv_device(device); 692 const char *format = "MODALIAS=vmbus:%*phN"; 693 694 return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type); 695 } 696 697 static const struct hv_vmbus_device_id * 698 hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid) 699 { 700 if (id == NULL) 701 return NULL; /* empty device table */ 702 703 for (; !guid_is_null(&id->guid); id++) 704 if (guid_equal(&id->guid, guid)) 705 return id; 706 707 return NULL; 708 } 709 710 static const struct hv_vmbus_device_id * 711 hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid) 712 { 713 const struct hv_vmbus_device_id *id = NULL; 714 struct vmbus_dynid *dynid; 715 716 spin_lock(&drv->dynids.lock); 717 list_for_each_entry(dynid, &drv->dynids.list, node) { 718 if (guid_equal(&dynid->id.guid, guid)) { 719 id = &dynid->id; 720 break; 721 } 722 } 723 spin_unlock(&drv->dynids.lock); 724 725 return id; 726 } 727 728 static const struct hv_vmbus_device_id vmbus_device_null; 729 730 /* 731 * Return a matching hv_vmbus_device_id pointer. 732 * If there is no match, return NULL. 733 */ 734 static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv, 735 struct hv_device *dev) 736 { 737 const guid_t *guid = &dev->dev_type; 738 const struct hv_vmbus_device_id *id; 739 740 /* When driver_override is set, only bind to the matching driver */ 741 if (dev->driver_override && strcmp(dev->driver_override, drv->name)) 742 return NULL; 743 744 /* Look at the dynamic ids first, before the static ones */ 745 id = hv_vmbus_dynid_match(drv, guid); 746 if (!id) 747 id = hv_vmbus_dev_match(drv->id_table, guid); 748 749 /* driver_override will always match, send a dummy id */ 750 if (!id && dev->driver_override) 751 id = &vmbus_device_null; 752 753 return id; 754 } 755 756 /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */ 757 static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid) 758 { 759 struct vmbus_dynid *dynid; 760 761 dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); 762 if (!dynid) 763 return -ENOMEM; 764 765 dynid->id.guid = *guid; 766 767 spin_lock(&drv->dynids.lock); 768 list_add_tail(&dynid->node, &drv->dynids.list); 769 spin_unlock(&drv->dynids.lock); 770 771 return driver_attach(&drv->driver); 772 } 773 774 static void vmbus_free_dynids(struct hv_driver *drv) 775 { 776 struct vmbus_dynid *dynid, *n; 777 778 spin_lock(&drv->dynids.lock); 779 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 780 list_del(&dynid->node); 781 kfree(dynid); 782 } 783 spin_unlock(&drv->dynids.lock); 784 } 785 786 /* 787 * store_new_id - sysfs frontend to vmbus_add_dynid() 788 * 789 * Allow GUIDs to be added to an existing driver via sysfs. 790 */ 791 static ssize_t new_id_store(struct device_driver *driver, const char *buf, 792 size_t count) 793 { 794 struct hv_driver *drv = drv_to_hv_drv(driver); 795 guid_t guid; 796 ssize_t retval; 797 798 retval = guid_parse(buf, &guid); 799 if (retval) 800 return retval; 801 802 if (hv_vmbus_dynid_match(drv, &guid)) 803 return -EEXIST; 804 805 retval = vmbus_add_dynid(drv, &guid); 806 if (retval) 807 return retval; 808 return count; 809 } 810 static DRIVER_ATTR_WO(new_id); 811 812 /* 813 * store_remove_id - remove a PCI device ID from this driver 814 * 815 * Removes a dynamic pci device ID to this driver. 816 */ 817 static ssize_t remove_id_store(struct device_driver *driver, const char *buf, 818 size_t count) 819 { 820 struct hv_driver *drv = drv_to_hv_drv(driver); 821 struct vmbus_dynid *dynid, *n; 822 guid_t guid; 823 ssize_t retval; 824 825 retval = guid_parse(buf, &guid); 826 if (retval) 827 return retval; 828 829 retval = -ENODEV; 830 spin_lock(&drv->dynids.lock); 831 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 832 struct hv_vmbus_device_id *id = &dynid->id; 833 834 if (guid_equal(&id->guid, &guid)) { 835 list_del(&dynid->node); 836 kfree(dynid); 837 retval = count; 838 break; 839 } 840 } 841 spin_unlock(&drv->dynids.lock); 842 843 return retval; 844 } 845 static DRIVER_ATTR_WO(remove_id); 846 847 static struct attribute *vmbus_drv_attrs[] = { 848 &driver_attr_new_id.attr, 849 &driver_attr_remove_id.attr, 850 NULL, 851 }; 852 ATTRIBUTE_GROUPS(vmbus_drv); 853 854 855 /* 856 * vmbus_match - Attempt to match the specified device to the specified driver 857 */ 858 static int vmbus_match(struct device *device, struct device_driver *driver) 859 { 860 struct hv_driver *drv = drv_to_hv_drv(driver); 861 struct hv_device *hv_dev = device_to_hv_device(device); 862 863 /* The hv_sock driver handles all hv_sock offers. */ 864 if (is_hvsock_channel(hv_dev->channel)) 865 return drv->hvsock; 866 867 if (hv_vmbus_get_id(drv, hv_dev)) 868 return 1; 869 870 return 0; 871 } 872 873 /* 874 * vmbus_probe - Add the new vmbus's child device 875 */ 876 static int vmbus_probe(struct device *child_device) 877 { 878 int ret = 0; 879 struct hv_driver *drv = 880 drv_to_hv_drv(child_device->driver); 881 struct hv_device *dev = device_to_hv_device(child_device); 882 const struct hv_vmbus_device_id *dev_id; 883 884 dev_id = hv_vmbus_get_id(drv, dev); 885 if (drv->probe) { 886 ret = drv->probe(dev, dev_id); 887 if (ret != 0) 888 pr_err("probe failed for device %s (%d)\n", 889 dev_name(child_device), ret); 890 891 } else { 892 pr_err("probe not set for driver %s\n", 893 dev_name(child_device)); 894 ret = -ENODEV; 895 } 896 return ret; 897 } 898 899 /* 900 * vmbus_remove - Remove a vmbus device 901 */ 902 static int vmbus_remove(struct device *child_device) 903 { 904 struct hv_driver *drv; 905 struct hv_device *dev = device_to_hv_device(child_device); 906 907 if (child_device->driver) { 908 drv = drv_to_hv_drv(child_device->driver); 909 if (drv->remove) 910 drv->remove(dev); 911 } 912 913 return 0; 914 } 915 916 917 /* 918 * vmbus_shutdown - Shutdown a vmbus device 919 */ 920 static void vmbus_shutdown(struct device *child_device) 921 { 922 struct hv_driver *drv; 923 struct hv_device *dev = device_to_hv_device(child_device); 924 925 926 /* The device may not be attached yet */ 927 if (!child_device->driver) 928 return; 929 930 drv = drv_to_hv_drv(child_device->driver); 931 932 if (drv->shutdown) 933 drv->shutdown(dev); 934 } 935 936 #ifdef CONFIG_PM_SLEEP 937 /* 938 * vmbus_suspend - Suspend a vmbus device 939 */ 940 static int vmbus_suspend(struct device *child_device) 941 { 942 struct hv_driver *drv; 943 struct hv_device *dev = device_to_hv_device(child_device); 944 945 /* The device may not be attached yet */ 946 if (!child_device->driver) 947 return 0; 948 949 drv = drv_to_hv_drv(child_device->driver); 950 if (!drv->suspend) 951 return -EOPNOTSUPP; 952 953 return drv->suspend(dev); 954 } 955 956 /* 957 * vmbus_resume - Resume a vmbus device 958 */ 959 static int vmbus_resume(struct device *child_device) 960 { 961 struct hv_driver *drv; 962 struct hv_device *dev = device_to_hv_device(child_device); 963 964 /* The device may not be attached yet */ 965 if (!child_device->driver) 966 return 0; 967 968 drv = drv_to_hv_drv(child_device->driver); 969 if (!drv->resume) 970 return -EOPNOTSUPP; 971 972 return drv->resume(dev); 973 } 974 #else 975 #define vmbus_suspend NULL 976 #define vmbus_resume NULL 977 #endif /* CONFIG_PM_SLEEP */ 978 979 /* 980 * vmbus_device_release - Final callback release of the vmbus child device 981 */ 982 static void vmbus_device_release(struct device *device) 983 { 984 struct hv_device *hv_dev = device_to_hv_device(device); 985 struct vmbus_channel *channel = hv_dev->channel; 986 987 hv_debug_rm_dev_dir(hv_dev); 988 989 mutex_lock(&vmbus_connection.channel_mutex); 990 hv_process_channel_removal(channel); 991 mutex_unlock(&vmbus_connection.channel_mutex); 992 kfree(hv_dev); 993 } 994 995 /* 996 * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm. 997 * 998 * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we 999 * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there 1000 * is no way to wake up a Generation-2 VM. 1001 * 1002 * The other 4 ops are for hibernation. 1003 */ 1004 1005 static const struct dev_pm_ops vmbus_pm = { 1006 .suspend_noirq = NULL, 1007 .resume_noirq = NULL, 1008 .freeze_noirq = vmbus_suspend, 1009 .thaw_noirq = vmbus_resume, 1010 .poweroff_noirq = vmbus_suspend, 1011 .restore_noirq = vmbus_resume, 1012 }; 1013 1014 /* The one and only one */ 1015 static struct bus_type hv_bus = { 1016 .name = "vmbus", 1017 .match = vmbus_match, 1018 .shutdown = vmbus_shutdown, 1019 .remove = vmbus_remove, 1020 .probe = vmbus_probe, 1021 .uevent = vmbus_uevent, 1022 .dev_groups = vmbus_dev_groups, 1023 .drv_groups = vmbus_drv_groups, 1024 .pm = &vmbus_pm, 1025 }; 1026 1027 struct onmessage_work_context { 1028 struct work_struct work; 1029 struct { 1030 struct hv_message_header header; 1031 u8 payload[]; 1032 } msg; 1033 }; 1034 1035 static void vmbus_onmessage_work(struct work_struct *work) 1036 { 1037 struct onmessage_work_context *ctx; 1038 1039 /* Do not process messages if we're in DISCONNECTED state */ 1040 if (vmbus_connection.conn_state == DISCONNECTED) 1041 return; 1042 1043 ctx = container_of(work, struct onmessage_work_context, 1044 work); 1045 vmbus_onmessage((struct vmbus_channel_message_header *) 1046 &ctx->msg.payload); 1047 kfree(ctx); 1048 } 1049 1050 void vmbus_on_msg_dpc(unsigned long data) 1051 { 1052 struct hv_per_cpu_context *hv_cpu = (void *)data; 1053 void *page_addr = hv_cpu->synic_message_page; 1054 struct hv_message *msg = (struct hv_message *)page_addr + 1055 VMBUS_MESSAGE_SINT; 1056 struct vmbus_channel_message_header *hdr; 1057 const struct vmbus_channel_message_table_entry *entry; 1058 struct onmessage_work_context *ctx; 1059 u32 message_type = msg->header.message_type; 1060 1061 /* 1062 * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as 1063 * it is being used in 'struct vmbus_channel_message_header' definition 1064 * which is supposed to match hypervisor ABI. 1065 */ 1066 BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32)); 1067 1068 if (message_type == HVMSG_NONE) 1069 /* no msg */ 1070 return; 1071 1072 hdr = (struct vmbus_channel_message_header *)msg->u.payload; 1073 1074 trace_vmbus_on_msg_dpc(hdr); 1075 1076 if (hdr->msgtype >= CHANNELMSG_COUNT) { 1077 WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); 1078 goto msg_handled; 1079 } 1080 1081 if (msg->header.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) { 1082 WARN_ONCE(1, "payload size is too large (%d)\n", 1083 msg->header.payload_size); 1084 goto msg_handled; 1085 } 1086 1087 entry = &channel_message_table[hdr->msgtype]; 1088 1089 if (!entry->message_handler) 1090 goto msg_handled; 1091 1092 if (msg->header.payload_size < entry->min_payload_len) { 1093 WARN_ONCE(1, "message too short: msgtype=%d len=%d\n", 1094 hdr->msgtype, msg->header.payload_size); 1095 goto msg_handled; 1096 } 1097 1098 if (entry->handler_type == VMHT_BLOCKING) { 1099 ctx = kmalloc(sizeof(*ctx) + msg->header.payload_size, 1100 GFP_ATOMIC); 1101 if (ctx == NULL) 1102 return; 1103 1104 INIT_WORK(&ctx->work, vmbus_onmessage_work); 1105 memcpy(&ctx->msg, msg, sizeof(msg->header) + 1106 msg->header.payload_size); 1107 1108 /* 1109 * The host can generate a rescind message while we 1110 * may still be handling the original offer. We deal with 1111 * this condition by relying on the synchronization provided 1112 * by offer_in_progress and by channel_mutex. See also the 1113 * inline comments in vmbus_onoffer_rescind(). 1114 */ 1115 switch (hdr->msgtype) { 1116 case CHANNELMSG_RESCIND_CHANNELOFFER: 1117 /* 1118 * If we are handling the rescind message; 1119 * schedule the work on the global work queue. 1120 * 1121 * The OFFER message and the RESCIND message should 1122 * not be handled by the same serialized work queue, 1123 * because the OFFER handler may call vmbus_open(), 1124 * which tries to open the channel by sending an 1125 * OPEN_CHANNEL message to the host and waits for 1126 * the host's response; however, if the host has 1127 * rescinded the channel before it receives the 1128 * OPEN_CHANNEL message, the host just silently 1129 * ignores the OPEN_CHANNEL message; as a result, 1130 * the guest's OFFER handler hangs for ever, if we 1131 * handle the RESCIND message in the same serialized 1132 * work queue: the RESCIND handler can not start to 1133 * run before the OFFER handler finishes. 1134 */ 1135 schedule_work(&ctx->work); 1136 break; 1137 1138 case CHANNELMSG_OFFERCHANNEL: 1139 /* 1140 * The host sends the offer message of a given channel 1141 * before sending the rescind message of the same 1142 * channel. These messages are sent to the guest's 1143 * connect CPU; the guest then starts processing them 1144 * in the tasklet handler on this CPU: 1145 * 1146 * VMBUS_CONNECT_CPU 1147 * 1148 * [vmbus_on_msg_dpc()] 1149 * atomic_inc() // CHANNELMSG_OFFERCHANNEL 1150 * queue_work() 1151 * ... 1152 * [vmbus_on_msg_dpc()] 1153 * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER 1154 * 1155 * We rely on the memory-ordering properties of the 1156 * queue_work() and schedule_work() primitives, which 1157 * guarantee that the atomic increment will be visible 1158 * to the CPUs which will execute the offer & rescind 1159 * works by the time these works will start execution. 1160 */ 1161 atomic_inc(&vmbus_connection.offer_in_progress); 1162 fallthrough; 1163 1164 default: 1165 queue_work(vmbus_connection.work_queue, &ctx->work); 1166 } 1167 } else 1168 entry->message_handler(hdr); 1169 1170 msg_handled: 1171 vmbus_signal_eom(msg, message_type); 1172 } 1173 1174 #ifdef CONFIG_PM_SLEEP 1175 /* 1176 * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for 1177 * hibernation, because hv_sock connections can not persist across hibernation. 1178 */ 1179 static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) 1180 { 1181 struct onmessage_work_context *ctx; 1182 struct vmbus_channel_rescind_offer *rescind; 1183 1184 WARN_ON(!is_hvsock_channel(channel)); 1185 1186 /* 1187 * Allocation size is small and the allocation should really not fail, 1188 * otherwise the state of the hv_sock connections ends up in limbo. 1189 */ 1190 ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind), 1191 GFP_KERNEL | __GFP_NOFAIL); 1192 1193 /* 1194 * So far, these are not really used by Linux. Just set them to the 1195 * reasonable values conforming to the definitions of the fields. 1196 */ 1197 ctx->msg.header.message_type = 1; 1198 ctx->msg.header.payload_size = sizeof(*rescind); 1199 1200 /* These values are actually used by Linux. */ 1201 rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload; 1202 rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER; 1203 rescind->child_relid = channel->offermsg.child_relid; 1204 1205 INIT_WORK(&ctx->work, vmbus_onmessage_work); 1206 1207 queue_work(vmbus_connection.work_queue, &ctx->work); 1208 } 1209 #endif /* CONFIG_PM_SLEEP */ 1210 1211 /* 1212 * Schedule all channels with events pending 1213 */ 1214 static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) 1215 { 1216 unsigned long *recv_int_page; 1217 u32 maxbits, relid; 1218 1219 if (vmbus_proto_version < VERSION_WIN8) { 1220 maxbits = MAX_NUM_CHANNELS_SUPPORTED; 1221 recv_int_page = vmbus_connection.recv_int_page; 1222 } else { 1223 /* 1224 * When the host is win8 and beyond, the event page 1225 * can be directly checked to get the id of the channel 1226 * that has the interrupt pending. 1227 */ 1228 void *page_addr = hv_cpu->synic_event_page; 1229 union hv_synic_event_flags *event 1230 = (union hv_synic_event_flags *)page_addr + 1231 VMBUS_MESSAGE_SINT; 1232 1233 maxbits = HV_EVENT_FLAGS_COUNT; 1234 recv_int_page = event->flags; 1235 } 1236 1237 if (unlikely(!recv_int_page)) 1238 return; 1239 1240 for_each_set_bit(relid, recv_int_page, maxbits) { 1241 void (*callback_fn)(void *context); 1242 struct vmbus_channel *channel; 1243 1244 if (!sync_test_and_clear_bit(relid, recv_int_page)) 1245 continue; 1246 1247 /* Special case - vmbus channel protocol msg */ 1248 if (relid == 0) 1249 continue; 1250 1251 /* 1252 * Pairs with the kfree_rcu() in vmbus_chan_release(). 1253 * Guarantees that the channel data structure doesn't 1254 * get freed while the channel pointer below is being 1255 * dereferenced. 1256 */ 1257 rcu_read_lock(); 1258 1259 /* Find channel based on relid */ 1260 channel = relid2channel(relid); 1261 if (channel == NULL) 1262 goto sched_unlock_rcu; 1263 1264 if (channel->rescind) 1265 goto sched_unlock_rcu; 1266 1267 /* 1268 * Make sure that the ring buffer data structure doesn't get 1269 * freed while we dereference the ring buffer pointer. Test 1270 * for the channel's onchannel_callback being NULL within a 1271 * sched_lock critical section. See also the inline comments 1272 * in vmbus_reset_channel_cb(). 1273 */ 1274 spin_lock(&channel->sched_lock); 1275 1276 callback_fn = channel->onchannel_callback; 1277 if (unlikely(callback_fn == NULL)) 1278 goto sched_unlock; 1279 1280 trace_vmbus_chan_sched(channel); 1281 1282 ++channel->interrupts; 1283 1284 switch (channel->callback_mode) { 1285 case HV_CALL_ISR: 1286 (*callback_fn)(channel->channel_callback_context); 1287 break; 1288 1289 case HV_CALL_BATCHED: 1290 hv_begin_read(&channel->inbound); 1291 fallthrough; 1292 case HV_CALL_DIRECT: 1293 tasklet_schedule(&channel->callback_event); 1294 } 1295 1296 sched_unlock: 1297 spin_unlock(&channel->sched_lock); 1298 sched_unlock_rcu: 1299 rcu_read_unlock(); 1300 } 1301 } 1302 1303 static void vmbus_isr(void) 1304 { 1305 struct hv_per_cpu_context *hv_cpu 1306 = this_cpu_ptr(hv_context.cpu_context); 1307 void *page_addr = hv_cpu->synic_event_page; 1308 struct hv_message *msg; 1309 union hv_synic_event_flags *event; 1310 bool handled = false; 1311 1312 if (unlikely(page_addr == NULL)) 1313 return; 1314 1315 event = (union hv_synic_event_flags *)page_addr + 1316 VMBUS_MESSAGE_SINT; 1317 /* 1318 * Check for events before checking for messages. This is the order 1319 * in which events and messages are checked in Windows guests on 1320 * Hyper-V, and the Windows team suggested we do the same. 1321 */ 1322 1323 if ((vmbus_proto_version == VERSION_WS2008) || 1324 (vmbus_proto_version == VERSION_WIN7)) { 1325 1326 /* Since we are a child, we only need to check bit 0 */ 1327 if (sync_test_and_clear_bit(0, event->flags)) 1328 handled = true; 1329 } else { 1330 /* 1331 * Our host is win8 or above. The signaling mechanism 1332 * has changed and we can directly look at the event page. 1333 * If bit n is set then we have an interrup on the channel 1334 * whose id is n. 1335 */ 1336 handled = true; 1337 } 1338 1339 if (handled) 1340 vmbus_chan_sched(hv_cpu); 1341 1342 page_addr = hv_cpu->synic_message_page; 1343 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 1344 1345 /* Check if there are actual msgs to be processed */ 1346 if (msg->header.message_type != HVMSG_NONE) { 1347 if (msg->header.message_type == HVMSG_TIMER_EXPIRED) { 1348 hv_stimer0_isr(); 1349 vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); 1350 } else 1351 tasklet_schedule(&hv_cpu->msg_dpc); 1352 } 1353 1354 add_interrupt_randomness(hv_get_vector(), 0); 1355 } 1356 1357 /* 1358 * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg 1359 * buffer and call into Hyper-V to transfer the data. 1360 */ 1361 static void hv_kmsg_dump(struct kmsg_dumper *dumper, 1362 enum kmsg_dump_reason reason) 1363 { 1364 size_t bytes_written; 1365 phys_addr_t panic_pa; 1366 1367 /* We are only interested in panics. */ 1368 if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) 1369 return; 1370 1371 panic_pa = virt_to_phys(hv_panic_page); 1372 1373 /* 1374 * Write dump contents to the page. No need to synchronize; panic should 1375 * be single-threaded. 1376 */ 1377 kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE, 1378 &bytes_written); 1379 if (bytes_written) 1380 hyperv_report_panic_msg(panic_pa, bytes_written); 1381 } 1382 1383 static struct kmsg_dumper hv_kmsg_dumper = { 1384 .dump = hv_kmsg_dump, 1385 }; 1386 1387 static struct ctl_table_header *hv_ctl_table_hdr; 1388 1389 /* 1390 * sysctl option to allow the user to control whether kmsg data should be 1391 * reported to Hyper-V on panic. 1392 */ 1393 static struct ctl_table hv_ctl_table[] = { 1394 { 1395 .procname = "hyperv_record_panic_msg", 1396 .data = &sysctl_record_panic_msg, 1397 .maxlen = sizeof(int), 1398 .mode = 0644, 1399 .proc_handler = proc_dointvec_minmax, 1400 .extra1 = SYSCTL_ZERO, 1401 .extra2 = SYSCTL_ONE 1402 }, 1403 {} 1404 }; 1405 1406 static struct ctl_table hv_root_table[] = { 1407 { 1408 .procname = "kernel", 1409 .mode = 0555, 1410 .child = hv_ctl_table 1411 }, 1412 {} 1413 }; 1414 1415 /* 1416 * vmbus_bus_init -Main vmbus driver initialization routine. 1417 * 1418 * Here, we 1419 * - initialize the vmbus driver context 1420 * - invoke the vmbus hv main init routine 1421 * - retrieve the channel offers 1422 */ 1423 static int vmbus_bus_init(void) 1424 { 1425 int ret; 1426 1427 ret = hv_init(); 1428 if (ret != 0) { 1429 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); 1430 return ret; 1431 } 1432 1433 ret = bus_register(&hv_bus); 1434 if (ret) 1435 return ret; 1436 1437 ret = hv_setup_vmbus_irq(vmbus_irq, vmbus_isr); 1438 if (ret) 1439 goto err_setup; 1440 1441 ret = hv_synic_alloc(); 1442 if (ret) 1443 goto err_alloc; 1444 1445 /* 1446 * Initialize the per-cpu interrupt state and stimer state. 1447 * Then connect to the host. 1448 */ 1449 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", 1450 hv_synic_init, hv_synic_cleanup); 1451 if (ret < 0) 1452 goto err_cpuhp; 1453 hyperv_cpuhp_online = ret; 1454 1455 ret = vmbus_connect(); 1456 if (ret) 1457 goto err_connect; 1458 1459 /* 1460 * Only register if the crash MSRs are available 1461 */ 1462 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 1463 u64 hyperv_crash_ctl; 1464 /* 1465 * Sysctl registration is not fatal, since by default 1466 * reporting is enabled. 1467 */ 1468 hv_ctl_table_hdr = register_sysctl_table(hv_root_table); 1469 if (!hv_ctl_table_hdr) 1470 pr_err("Hyper-V: sysctl table register error"); 1471 1472 /* 1473 * Register for panic kmsg callback only if the right 1474 * capability is supported by the hypervisor. 1475 */ 1476 hv_get_crash_ctl(hyperv_crash_ctl); 1477 if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) { 1478 hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page(); 1479 if (hv_panic_page) { 1480 ret = kmsg_dump_register(&hv_kmsg_dumper); 1481 if (ret) { 1482 pr_err("Hyper-V: kmsg dump register " 1483 "error 0x%x\n", ret); 1484 hv_free_hyperv_page( 1485 (unsigned long)hv_panic_page); 1486 hv_panic_page = NULL; 1487 } 1488 } else 1489 pr_err("Hyper-V: panic message page memory " 1490 "allocation failed"); 1491 } 1492 1493 register_die_notifier(&hyperv_die_block); 1494 } 1495 1496 /* 1497 * Always register the panic notifier because we need to unload 1498 * the VMbus channel connection to prevent any VMbus 1499 * activity after the VM panics. 1500 */ 1501 atomic_notifier_chain_register(&panic_notifier_list, 1502 &hyperv_panic_block); 1503 1504 vmbus_request_offers(); 1505 1506 return 0; 1507 1508 err_connect: 1509 cpuhp_remove_state(hyperv_cpuhp_online); 1510 err_cpuhp: 1511 hv_synic_free(); 1512 err_alloc: 1513 hv_remove_vmbus_irq(); 1514 err_setup: 1515 bus_unregister(&hv_bus); 1516 unregister_sysctl_table(hv_ctl_table_hdr); 1517 hv_ctl_table_hdr = NULL; 1518 return ret; 1519 } 1520 1521 /** 1522 * __vmbus_child_driver_register() - Register a vmbus's driver 1523 * @hv_driver: Pointer to driver structure you want to register 1524 * @owner: owner module of the drv 1525 * @mod_name: module name string 1526 * 1527 * Registers the given driver with Linux through the 'driver_register()' call 1528 * and sets up the hyper-v vmbus handling for this driver. 1529 * It will return the state of the 'driver_register()' call. 1530 * 1531 */ 1532 int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) 1533 { 1534 int ret; 1535 1536 pr_info("registering driver %s\n", hv_driver->name); 1537 1538 ret = vmbus_exists(); 1539 if (ret < 0) 1540 return ret; 1541 1542 hv_driver->driver.name = hv_driver->name; 1543 hv_driver->driver.owner = owner; 1544 hv_driver->driver.mod_name = mod_name; 1545 hv_driver->driver.bus = &hv_bus; 1546 1547 spin_lock_init(&hv_driver->dynids.lock); 1548 INIT_LIST_HEAD(&hv_driver->dynids.list); 1549 1550 ret = driver_register(&hv_driver->driver); 1551 1552 return ret; 1553 } 1554 EXPORT_SYMBOL_GPL(__vmbus_driver_register); 1555 1556 /** 1557 * vmbus_driver_unregister() - Unregister a vmbus's driver 1558 * @hv_driver: Pointer to driver structure you want to 1559 * un-register 1560 * 1561 * Un-register the given driver that was previous registered with a call to 1562 * vmbus_driver_register() 1563 */ 1564 void vmbus_driver_unregister(struct hv_driver *hv_driver) 1565 { 1566 pr_info("unregistering driver %s\n", hv_driver->name); 1567 1568 if (!vmbus_exists()) { 1569 driver_unregister(&hv_driver->driver); 1570 vmbus_free_dynids(hv_driver); 1571 } 1572 } 1573 EXPORT_SYMBOL_GPL(vmbus_driver_unregister); 1574 1575 1576 /* 1577 * Called when last reference to channel is gone. 1578 */ 1579 static void vmbus_chan_release(struct kobject *kobj) 1580 { 1581 struct vmbus_channel *channel 1582 = container_of(kobj, struct vmbus_channel, kobj); 1583 1584 kfree_rcu(channel, rcu); 1585 } 1586 1587 struct vmbus_chan_attribute { 1588 struct attribute attr; 1589 ssize_t (*show)(struct vmbus_channel *chan, char *buf); 1590 ssize_t (*store)(struct vmbus_channel *chan, 1591 const char *buf, size_t count); 1592 }; 1593 #define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \ 1594 struct vmbus_chan_attribute chan_attr_##_name \ 1595 = __ATTR(_name, _mode, _show, _store) 1596 #define VMBUS_CHAN_ATTR_RW(_name) \ 1597 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name) 1598 #define VMBUS_CHAN_ATTR_RO(_name) \ 1599 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name) 1600 #define VMBUS_CHAN_ATTR_WO(_name) \ 1601 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name) 1602 1603 static ssize_t vmbus_chan_attr_show(struct kobject *kobj, 1604 struct attribute *attr, char *buf) 1605 { 1606 const struct vmbus_chan_attribute *attribute 1607 = container_of(attr, struct vmbus_chan_attribute, attr); 1608 struct vmbus_channel *chan 1609 = container_of(kobj, struct vmbus_channel, kobj); 1610 1611 if (!attribute->show) 1612 return -EIO; 1613 1614 return attribute->show(chan, buf); 1615 } 1616 1617 static ssize_t vmbus_chan_attr_store(struct kobject *kobj, 1618 struct attribute *attr, const char *buf, 1619 size_t count) 1620 { 1621 const struct vmbus_chan_attribute *attribute 1622 = container_of(attr, struct vmbus_chan_attribute, attr); 1623 struct vmbus_channel *chan 1624 = container_of(kobj, struct vmbus_channel, kobj); 1625 1626 if (!attribute->store) 1627 return -EIO; 1628 1629 return attribute->store(chan, buf, count); 1630 } 1631 1632 static const struct sysfs_ops vmbus_chan_sysfs_ops = { 1633 .show = vmbus_chan_attr_show, 1634 .store = vmbus_chan_attr_store, 1635 }; 1636 1637 static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf) 1638 { 1639 struct hv_ring_buffer_info *rbi = &channel->outbound; 1640 ssize_t ret; 1641 1642 mutex_lock(&rbi->ring_buffer_mutex); 1643 if (!rbi->ring_buffer) { 1644 mutex_unlock(&rbi->ring_buffer_mutex); 1645 return -EINVAL; 1646 } 1647 1648 ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1649 mutex_unlock(&rbi->ring_buffer_mutex); 1650 return ret; 1651 } 1652 static VMBUS_CHAN_ATTR_RO(out_mask); 1653 1654 static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf) 1655 { 1656 struct hv_ring_buffer_info *rbi = &channel->inbound; 1657 ssize_t ret; 1658 1659 mutex_lock(&rbi->ring_buffer_mutex); 1660 if (!rbi->ring_buffer) { 1661 mutex_unlock(&rbi->ring_buffer_mutex); 1662 return -EINVAL; 1663 } 1664 1665 ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1666 mutex_unlock(&rbi->ring_buffer_mutex); 1667 return ret; 1668 } 1669 static VMBUS_CHAN_ATTR_RO(in_mask); 1670 1671 static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf) 1672 { 1673 struct hv_ring_buffer_info *rbi = &channel->inbound; 1674 ssize_t ret; 1675 1676 mutex_lock(&rbi->ring_buffer_mutex); 1677 if (!rbi->ring_buffer) { 1678 mutex_unlock(&rbi->ring_buffer_mutex); 1679 return -EINVAL; 1680 } 1681 1682 ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi)); 1683 mutex_unlock(&rbi->ring_buffer_mutex); 1684 return ret; 1685 } 1686 static VMBUS_CHAN_ATTR_RO(read_avail); 1687 1688 static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf) 1689 { 1690 struct hv_ring_buffer_info *rbi = &channel->outbound; 1691 ssize_t ret; 1692 1693 mutex_lock(&rbi->ring_buffer_mutex); 1694 if (!rbi->ring_buffer) { 1695 mutex_unlock(&rbi->ring_buffer_mutex); 1696 return -EINVAL; 1697 } 1698 1699 ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi)); 1700 mutex_unlock(&rbi->ring_buffer_mutex); 1701 return ret; 1702 } 1703 static VMBUS_CHAN_ATTR_RO(write_avail); 1704 1705 static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf) 1706 { 1707 return sprintf(buf, "%u\n", channel->target_cpu); 1708 } 1709 static ssize_t target_cpu_store(struct vmbus_channel *channel, 1710 const char *buf, size_t count) 1711 { 1712 u32 target_cpu, origin_cpu; 1713 ssize_t ret = count; 1714 1715 if (vmbus_proto_version < VERSION_WIN10_V4_1) 1716 return -EIO; 1717 1718 if (sscanf(buf, "%uu", &target_cpu) != 1) 1719 return -EIO; 1720 1721 /* Validate target_cpu for the cpumask_test_cpu() operation below. */ 1722 if (target_cpu >= nr_cpumask_bits) 1723 return -EINVAL; 1724 1725 /* No CPUs should come up or down during this. */ 1726 cpus_read_lock(); 1727 1728 if (!cpu_online(target_cpu)) { 1729 cpus_read_unlock(); 1730 return -EINVAL; 1731 } 1732 1733 /* 1734 * Synchronizes target_cpu_store() and channel closure: 1735 * 1736 * { Initially: state = CHANNEL_OPENED } 1737 * 1738 * CPU1 CPU2 1739 * 1740 * [target_cpu_store()] [vmbus_disconnect_ring()] 1741 * 1742 * LOCK channel_mutex LOCK channel_mutex 1743 * LOAD r1 = state LOAD r2 = state 1744 * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED) 1745 * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN 1746 * [...] SEND CLOSECHANNEL 1747 * UNLOCK channel_mutex UNLOCK channel_mutex 1748 * 1749 * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes 1750 * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND 1751 * 1752 * Note. The host processes the channel messages "sequentially", in 1753 * the order in which they are received on a per-partition basis. 1754 */ 1755 mutex_lock(&vmbus_connection.channel_mutex); 1756 1757 /* 1758 * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; 1759 * avoid sending the message and fail here for such channels. 1760 */ 1761 if (channel->state != CHANNEL_OPENED_STATE) { 1762 ret = -EIO; 1763 goto cpu_store_unlock; 1764 } 1765 1766 origin_cpu = channel->target_cpu; 1767 if (target_cpu == origin_cpu) 1768 goto cpu_store_unlock; 1769 1770 if (vmbus_send_modifychannel(channel->offermsg.child_relid, 1771 hv_cpu_number_to_vp_number(target_cpu))) { 1772 ret = -EIO; 1773 goto cpu_store_unlock; 1774 } 1775 1776 /* 1777 * Warning. At this point, there is *no* guarantee that the host will 1778 * have successfully processed the vmbus_send_modifychannel() request. 1779 * See the header comment of vmbus_send_modifychannel() for more info. 1780 * 1781 * Lags in the processing of the above vmbus_send_modifychannel() can 1782 * result in missed interrupts if the "old" target CPU is taken offline 1783 * before Hyper-V starts sending interrupts to the "new" target CPU. 1784 * But apart from this offlining scenario, the code tolerates such 1785 * lags. It will function correctly even if a channel interrupt comes 1786 * in on a CPU that is different from the channel target_cpu value. 1787 */ 1788 1789 channel->target_cpu = target_cpu; 1790 1791 /* See init_vp_index(). */ 1792 if (hv_is_perf_channel(channel)) 1793 hv_update_alloced_cpus(origin_cpu, target_cpu); 1794 1795 /* Currently set only for storvsc channels. */ 1796 if (channel->change_target_cpu_callback) { 1797 (*channel->change_target_cpu_callback)(channel, 1798 origin_cpu, target_cpu); 1799 } 1800 1801 cpu_store_unlock: 1802 mutex_unlock(&vmbus_connection.channel_mutex); 1803 cpus_read_unlock(); 1804 return ret; 1805 } 1806 static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store); 1807 1808 static ssize_t channel_pending_show(struct vmbus_channel *channel, 1809 char *buf) 1810 { 1811 return sprintf(buf, "%d\n", 1812 channel_pending(channel, 1813 vmbus_connection.monitor_pages[1])); 1814 } 1815 static VMBUS_CHAN_ATTR(pending, S_IRUGO, channel_pending_show, NULL); 1816 1817 static ssize_t channel_latency_show(struct vmbus_channel *channel, 1818 char *buf) 1819 { 1820 return sprintf(buf, "%d\n", 1821 channel_latency(channel, 1822 vmbus_connection.monitor_pages[1])); 1823 } 1824 static VMBUS_CHAN_ATTR(latency, S_IRUGO, channel_latency_show, NULL); 1825 1826 static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf) 1827 { 1828 return sprintf(buf, "%llu\n", channel->interrupts); 1829 } 1830 static VMBUS_CHAN_ATTR(interrupts, S_IRUGO, channel_interrupts_show, NULL); 1831 1832 static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf) 1833 { 1834 return sprintf(buf, "%llu\n", channel->sig_events); 1835 } 1836 static VMBUS_CHAN_ATTR(events, S_IRUGO, channel_events_show, NULL); 1837 1838 static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel, 1839 char *buf) 1840 { 1841 return sprintf(buf, "%llu\n", 1842 (unsigned long long)channel->intr_in_full); 1843 } 1844 static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL); 1845 1846 static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel, 1847 char *buf) 1848 { 1849 return sprintf(buf, "%llu\n", 1850 (unsigned long long)channel->intr_out_empty); 1851 } 1852 static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL); 1853 1854 static ssize_t channel_out_full_first_show(struct vmbus_channel *channel, 1855 char *buf) 1856 { 1857 return sprintf(buf, "%llu\n", 1858 (unsigned long long)channel->out_full_first); 1859 } 1860 static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL); 1861 1862 static ssize_t channel_out_full_total_show(struct vmbus_channel *channel, 1863 char *buf) 1864 { 1865 return sprintf(buf, "%llu\n", 1866 (unsigned long long)channel->out_full_total); 1867 } 1868 static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL); 1869 1870 static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel, 1871 char *buf) 1872 { 1873 return sprintf(buf, "%u\n", channel->offermsg.monitorid); 1874 } 1875 static VMBUS_CHAN_ATTR(monitor_id, S_IRUGO, subchannel_monitor_id_show, NULL); 1876 1877 static ssize_t subchannel_id_show(struct vmbus_channel *channel, 1878 char *buf) 1879 { 1880 return sprintf(buf, "%u\n", 1881 channel->offermsg.offer.sub_channel_index); 1882 } 1883 static VMBUS_CHAN_ATTR_RO(subchannel_id); 1884 1885 static struct attribute *vmbus_chan_attrs[] = { 1886 &chan_attr_out_mask.attr, 1887 &chan_attr_in_mask.attr, 1888 &chan_attr_read_avail.attr, 1889 &chan_attr_write_avail.attr, 1890 &chan_attr_cpu.attr, 1891 &chan_attr_pending.attr, 1892 &chan_attr_latency.attr, 1893 &chan_attr_interrupts.attr, 1894 &chan_attr_events.attr, 1895 &chan_attr_intr_in_full.attr, 1896 &chan_attr_intr_out_empty.attr, 1897 &chan_attr_out_full_first.attr, 1898 &chan_attr_out_full_total.attr, 1899 &chan_attr_monitor_id.attr, 1900 &chan_attr_subchannel_id.attr, 1901 NULL 1902 }; 1903 1904 /* 1905 * Channel-level attribute_group callback function. Returns the permission for 1906 * each attribute, and returns 0 if an attribute is not visible. 1907 */ 1908 static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, 1909 struct attribute *attr, int idx) 1910 { 1911 const struct vmbus_channel *channel = 1912 container_of(kobj, struct vmbus_channel, kobj); 1913 1914 /* Hide the monitor attributes if the monitor mechanism is not used. */ 1915 if (!channel->offermsg.monitor_allocated && 1916 (attr == &chan_attr_pending.attr || 1917 attr == &chan_attr_latency.attr || 1918 attr == &chan_attr_monitor_id.attr)) 1919 return 0; 1920 1921 return attr->mode; 1922 } 1923 1924 static struct attribute_group vmbus_chan_group = { 1925 .attrs = vmbus_chan_attrs, 1926 .is_visible = vmbus_chan_attr_is_visible 1927 }; 1928 1929 static struct kobj_type vmbus_chan_ktype = { 1930 .sysfs_ops = &vmbus_chan_sysfs_ops, 1931 .release = vmbus_chan_release, 1932 }; 1933 1934 /* 1935 * vmbus_add_channel_kobj - setup a sub-directory under device/channels 1936 */ 1937 int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) 1938 { 1939 const struct device *device = &dev->device; 1940 struct kobject *kobj = &channel->kobj; 1941 u32 relid = channel->offermsg.child_relid; 1942 int ret; 1943 1944 kobj->kset = dev->channels_kset; 1945 ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, 1946 "%u", relid); 1947 if (ret) 1948 return ret; 1949 1950 ret = sysfs_create_group(kobj, &vmbus_chan_group); 1951 1952 if (ret) { 1953 /* 1954 * The calling functions' error handling paths will cleanup the 1955 * empty channel directory. 1956 */ 1957 dev_err(device, "Unable to set up channel sysfs files\n"); 1958 return ret; 1959 } 1960 1961 kobject_uevent(kobj, KOBJ_ADD); 1962 1963 return 0; 1964 } 1965 1966 /* 1967 * vmbus_remove_channel_attr_group - remove the channel's attribute group 1968 */ 1969 void vmbus_remove_channel_attr_group(struct vmbus_channel *channel) 1970 { 1971 sysfs_remove_group(&channel->kobj, &vmbus_chan_group); 1972 } 1973 1974 /* 1975 * vmbus_device_create - Creates and registers a new child device 1976 * on the vmbus. 1977 */ 1978 struct hv_device *vmbus_device_create(const guid_t *type, 1979 const guid_t *instance, 1980 struct vmbus_channel *channel) 1981 { 1982 struct hv_device *child_device_obj; 1983 1984 child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); 1985 if (!child_device_obj) { 1986 pr_err("Unable to allocate device object for child device\n"); 1987 return NULL; 1988 } 1989 1990 child_device_obj->channel = channel; 1991 guid_copy(&child_device_obj->dev_type, type); 1992 guid_copy(&child_device_obj->dev_instance, instance); 1993 child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ 1994 1995 return child_device_obj; 1996 } 1997 1998 /* 1999 * vmbus_device_register - Register the child device 2000 */ 2001 int vmbus_device_register(struct hv_device *child_device_obj) 2002 { 2003 struct kobject *kobj = &child_device_obj->device.kobj; 2004 int ret; 2005 2006 dev_set_name(&child_device_obj->device, "%pUl", 2007 &child_device_obj->channel->offermsg.offer.if_instance); 2008 2009 child_device_obj->device.bus = &hv_bus; 2010 child_device_obj->device.parent = &hv_acpi_dev->dev; 2011 child_device_obj->device.release = vmbus_device_release; 2012 2013 /* 2014 * Register with the LDM. This will kick off the driver/device 2015 * binding...which will eventually call vmbus_match() and vmbus_probe() 2016 */ 2017 ret = device_register(&child_device_obj->device); 2018 if (ret) { 2019 pr_err("Unable to register child device\n"); 2020 return ret; 2021 } 2022 2023 child_device_obj->channels_kset = kset_create_and_add("channels", 2024 NULL, kobj); 2025 if (!child_device_obj->channels_kset) { 2026 ret = -ENOMEM; 2027 goto err_dev_unregister; 2028 } 2029 2030 ret = vmbus_add_channel_kobj(child_device_obj, 2031 child_device_obj->channel); 2032 if (ret) { 2033 pr_err("Unable to register primary channeln"); 2034 goto err_kset_unregister; 2035 } 2036 hv_debug_add_dev_dir(child_device_obj); 2037 2038 return 0; 2039 2040 err_kset_unregister: 2041 kset_unregister(child_device_obj->channels_kset); 2042 2043 err_dev_unregister: 2044 device_unregister(&child_device_obj->device); 2045 return ret; 2046 } 2047 2048 /* 2049 * vmbus_device_unregister - Remove the specified child device 2050 * from the vmbus. 2051 */ 2052 void vmbus_device_unregister(struct hv_device *device_obj) 2053 { 2054 pr_debug("child device %s unregistered\n", 2055 dev_name(&device_obj->device)); 2056 2057 kset_unregister(device_obj->channels_kset); 2058 2059 /* 2060 * Kick off the process of unregistering the device. 2061 * This will call vmbus_remove() and eventually vmbus_device_release() 2062 */ 2063 device_unregister(&device_obj->device); 2064 } 2065 2066 2067 /* 2068 * VMBUS is an acpi enumerated device. Get the information we 2069 * need from DSDT. 2070 */ 2071 #define VTPM_BASE_ADDRESS 0xfed40000 2072 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) 2073 { 2074 resource_size_t start = 0; 2075 resource_size_t end = 0; 2076 struct resource *new_res; 2077 struct resource **old_res = &hyperv_mmio; 2078 struct resource **prev_res = NULL; 2079 struct resource r; 2080 2081 switch (res->type) { 2082 2083 /* 2084 * "Address" descriptors are for bus windows. Ignore 2085 * "memory" descriptors, which are for registers on 2086 * devices. 2087 */ 2088 case ACPI_RESOURCE_TYPE_ADDRESS32: 2089 start = res->data.address32.address.minimum; 2090 end = res->data.address32.address.maximum; 2091 break; 2092 2093 case ACPI_RESOURCE_TYPE_ADDRESS64: 2094 start = res->data.address64.address.minimum; 2095 end = res->data.address64.address.maximum; 2096 break; 2097 2098 /* 2099 * The IRQ information is needed only on ARM64, which Hyper-V 2100 * sets up in the extended format. IRQ information is present 2101 * on x86/x64 in the non-extended format but it is not used by 2102 * Linux. So don't bother checking for the non-extended format. 2103 */ 2104 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: 2105 if (!acpi_dev_resource_interrupt(res, 0, &r)) { 2106 pr_err("Unable to parse Hyper-V ACPI interrupt\n"); 2107 return AE_ERROR; 2108 } 2109 /* ARM64 INTID for VMbus */ 2110 vmbus_interrupt = res->data.extended_irq.interrupts[0]; 2111 /* Linux IRQ number */ 2112 vmbus_irq = r.start; 2113 return AE_OK; 2114 2115 default: 2116 /* Unused resource type */ 2117 return AE_OK; 2118 2119 } 2120 /* 2121 * Ignore ranges that are below 1MB, as they're not 2122 * necessary or useful here. 2123 */ 2124 if (end < 0x100000) 2125 return AE_OK; 2126 2127 new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); 2128 if (!new_res) 2129 return AE_NO_MEMORY; 2130 2131 /* If this range overlaps the virtual TPM, truncate it. */ 2132 if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) 2133 end = VTPM_BASE_ADDRESS; 2134 2135 new_res->name = "hyperv mmio"; 2136 new_res->flags = IORESOURCE_MEM; 2137 new_res->start = start; 2138 new_res->end = end; 2139 2140 /* 2141 * If two ranges are adjacent, merge them. 2142 */ 2143 do { 2144 if (!*old_res) { 2145 *old_res = new_res; 2146 break; 2147 } 2148 2149 if (((*old_res)->end + 1) == new_res->start) { 2150 (*old_res)->end = new_res->end; 2151 kfree(new_res); 2152 break; 2153 } 2154 2155 if ((*old_res)->start == new_res->end + 1) { 2156 (*old_res)->start = new_res->start; 2157 kfree(new_res); 2158 break; 2159 } 2160 2161 if ((*old_res)->start > new_res->end) { 2162 new_res->sibling = *old_res; 2163 if (prev_res) 2164 (*prev_res)->sibling = new_res; 2165 *old_res = new_res; 2166 break; 2167 } 2168 2169 prev_res = old_res; 2170 old_res = &(*old_res)->sibling; 2171 2172 } while (1); 2173 2174 return AE_OK; 2175 } 2176 2177 static int vmbus_acpi_remove(struct acpi_device *device) 2178 { 2179 struct resource *cur_res; 2180 struct resource *next_res; 2181 2182 if (hyperv_mmio) { 2183 if (fb_mmio) { 2184 __release_region(hyperv_mmio, fb_mmio->start, 2185 resource_size(fb_mmio)); 2186 fb_mmio = NULL; 2187 } 2188 2189 for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { 2190 next_res = cur_res->sibling; 2191 kfree(cur_res); 2192 } 2193 } 2194 2195 return 0; 2196 } 2197 2198 static void vmbus_reserve_fb(void) 2199 { 2200 int size; 2201 /* 2202 * Make a claim for the frame buffer in the resource tree under the 2203 * first node, which will be the one below 4GB. The length seems to 2204 * be underreported, particularly in a Generation 1 VM. So start out 2205 * reserving a larger area and make it smaller until it succeeds. 2206 */ 2207 2208 if (screen_info.lfb_base) { 2209 if (efi_enabled(EFI_BOOT)) 2210 size = max_t(__u32, screen_info.lfb_size, 0x800000); 2211 else 2212 size = max_t(__u32, screen_info.lfb_size, 0x4000000); 2213 2214 for (; !fb_mmio && (size >= 0x100000); size >>= 1) { 2215 fb_mmio = __request_region(hyperv_mmio, 2216 screen_info.lfb_base, size, 2217 fb_mmio_name, 0); 2218 } 2219 } 2220 } 2221 2222 /** 2223 * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. 2224 * @new: If successful, supplied a pointer to the 2225 * allocated MMIO space. 2226 * @device_obj: Identifies the caller 2227 * @min: Minimum guest physical address of the 2228 * allocation 2229 * @max: Maximum guest physical address 2230 * @size: Size of the range to be allocated 2231 * @align: Alignment of the range to be allocated 2232 * @fb_overlap_ok: Whether this allocation can be allowed 2233 * to overlap the video frame buffer. 2234 * 2235 * This function walks the resources granted to VMBus by the 2236 * _CRS object in the ACPI namespace underneath the parent 2237 * "bridge" whether that's a root PCI bus in the Generation 1 2238 * case or a Module Device in the Generation 2 case. It then 2239 * attempts to allocate from the global MMIO pool in a way that 2240 * matches the constraints supplied in these parameters and by 2241 * that _CRS. 2242 * 2243 * Return: 0 on success, -errno on failure 2244 */ 2245 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, 2246 resource_size_t min, resource_size_t max, 2247 resource_size_t size, resource_size_t align, 2248 bool fb_overlap_ok) 2249 { 2250 struct resource *iter, *shadow; 2251 resource_size_t range_min, range_max, start; 2252 const char *dev_n = dev_name(&device_obj->device); 2253 int retval; 2254 2255 retval = -ENXIO; 2256 mutex_lock(&hyperv_mmio_lock); 2257 2258 /* 2259 * If overlaps with frame buffers are allowed, then first attempt to 2260 * make the allocation from within the reserved region. Because it 2261 * is already reserved, no shadow allocation is necessary. 2262 */ 2263 if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) && 2264 !(max < fb_mmio->start)) { 2265 2266 range_min = fb_mmio->start; 2267 range_max = fb_mmio->end; 2268 start = (range_min + align - 1) & ~(align - 1); 2269 for (; start + size - 1 <= range_max; start += align) { 2270 *new = request_mem_region_exclusive(start, size, dev_n); 2271 if (*new) { 2272 retval = 0; 2273 goto exit; 2274 } 2275 } 2276 } 2277 2278 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 2279 if ((iter->start >= max) || (iter->end <= min)) 2280 continue; 2281 2282 range_min = iter->start; 2283 range_max = iter->end; 2284 start = (range_min + align - 1) & ~(align - 1); 2285 for (; start + size - 1 <= range_max; start += align) { 2286 shadow = __request_region(iter, start, size, NULL, 2287 IORESOURCE_BUSY); 2288 if (!shadow) 2289 continue; 2290 2291 *new = request_mem_region_exclusive(start, size, dev_n); 2292 if (*new) { 2293 shadow->name = (char *)*new; 2294 retval = 0; 2295 goto exit; 2296 } 2297 2298 __release_region(iter, start, size); 2299 } 2300 } 2301 2302 exit: 2303 mutex_unlock(&hyperv_mmio_lock); 2304 return retval; 2305 } 2306 EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); 2307 2308 /** 2309 * vmbus_free_mmio() - Free a memory-mapped I/O range. 2310 * @start: Base address of region to release. 2311 * @size: Size of the range to be allocated 2312 * 2313 * This function releases anything requested by 2314 * vmbus_mmio_allocate(). 2315 */ 2316 void vmbus_free_mmio(resource_size_t start, resource_size_t size) 2317 { 2318 struct resource *iter; 2319 2320 mutex_lock(&hyperv_mmio_lock); 2321 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 2322 if ((iter->start >= start + size) || (iter->end <= start)) 2323 continue; 2324 2325 __release_region(iter, start, size); 2326 } 2327 release_mem_region(start, size); 2328 mutex_unlock(&hyperv_mmio_lock); 2329 2330 } 2331 EXPORT_SYMBOL_GPL(vmbus_free_mmio); 2332 2333 static int vmbus_acpi_add(struct acpi_device *device) 2334 { 2335 acpi_status result; 2336 int ret_val = -ENODEV; 2337 struct acpi_device *ancestor; 2338 2339 hv_acpi_dev = device; 2340 2341 result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, 2342 vmbus_walk_resources, NULL); 2343 2344 if (ACPI_FAILURE(result)) 2345 goto acpi_walk_err; 2346 /* 2347 * Some ancestor of the vmbus acpi device (Gen1 or Gen2 2348 * firmware) is the VMOD that has the mmio ranges. Get that. 2349 */ 2350 for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { 2351 result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, 2352 vmbus_walk_resources, NULL); 2353 2354 if (ACPI_FAILURE(result)) 2355 continue; 2356 if (hyperv_mmio) { 2357 vmbus_reserve_fb(); 2358 break; 2359 } 2360 } 2361 ret_val = 0; 2362 2363 acpi_walk_err: 2364 complete(&probe_event); 2365 if (ret_val) 2366 vmbus_acpi_remove(device); 2367 return ret_val; 2368 } 2369 2370 #ifdef CONFIG_PM_SLEEP 2371 static int vmbus_bus_suspend(struct device *dev) 2372 { 2373 struct vmbus_channel *channel, *sc; 2374 2375 while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { 2376 /* 2377 * We wait here until the completion of any channel 2378 * offers that are currently in progress. 2379 */ 2380 msleep(1); 2381 } 2382 2383 mutex_lock(&vmbus_connection.channel_mutex); 2384 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2385 if (!is_hvsock_channel(channel)) 2386 continue; 2387 2388 vmbus_force_channel_rescinded(channel); 2389 } 2390 mutex_unlock(&vmbus_connection.channel_mutex); 2391 2392 /* 2393 * Wait until all the sub-channels and hv_sock channels have been 2394 * cleaned up. Sub-channels should be destroyed upon suspend, otherwise 2395 * they would conflict with the new sub-channels that will be created 2396 * in the resume path. hv_sock channels should also be destroyed, but 2397 * a hv_sock channel of an established hv_sock connection can not be 2398 * really destroyed since it may still be referenced by the userspace 2399 * application, so we just force the hv_sock channel to be rescinded 2400 * by vmbus_force_channel_rescinded(), and the userspace application 2401 * will thoroughly destroy the channel after hibernation. 2402 * 2403 * Note: the counter nr_chan_close_on_suspend may never go above 0 if 2404 * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM. 2405 */ 2406 if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) 2407 wait_for_completion(&vmbus_connection.ready_for_suspend_event); 2408 2409 if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) { 2410 pr_err("Can not suspend due to a previous failed resuming\n"); 2411 return -EBUSY; 2412 } 2413 2414 mutex_lock(&vmbus_connection.channel_mutex); 2415 2416 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2417 /* 2418 * Remove the channel from the array of channels and invalidate 2419 * the channel's relid. Upon resume, vmbus_onoffer() will fix 2420 * up the relid (and other fields, if necessary) and add the 2421 * channel back to the array. 2422 */ 2423 vmbus_channel_unmap_relid(channel); 2424 channel->offermsg.child_relid = INVALID_RELID; 2425 2426 if (is_hvsock_channel(channel)) { 2427 if (!channel->rescind) { 2428 pr_err("hv_sock channel not rescinded!\n"); 2429 WARN_ON_ONCE(1); 2430 } 2431 continue; 2432 } 2433 2434 list_for_each_entry(sc, &channel->sc_list, sc_list) { 2435 pr_err("Sub-channel not deleted!\n"); 2436 WARN_ON_ONCE(1); 2437 } 2438 2439 atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume); 2440 } 2441 2442 mutex_unlock(&vmbus_connection.channel_mutex); 2443 2444 vmbus_initiate_unload(false); 2445 2446 /* Reset the event for the next resume. */ 2447 reinit_completion(&vmbus_connection.ready_for_resume_event); 2448 2449 return 0; 2450 } 2451 2452 static int vmbus_bus_resume(struct device *dev) 2453 { 2454 struct vmbus_channel_msginfo *msginfo; 2455 size_t msgsize; 2456 int ret; 2457 2458 /* 2459 * We only use the 'vmbus_proto_version', which was in use before 2460 * hibernation, to re-negotiate with the host. 2461 */ 2462 if (!vmbus_proto_version) { 2463 pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version); 2464 return -EINVAL; 2465 } 2466 2467 msgsize = sizeof(*msginfo) + 2468 sizeof(struct vmbus_channel_initiate_contact); 2469 2470 msginfo = kzalloc(msgsize, GFP_KERNEL); 2471 2472 if (msginfo == NULL) 2473 return -ENOMEM; 2474 2475 ret = vmbus_negotiate_version(msginfo, vmbus_proto_version); 2476 2477 kfree(msginfo); 2478 2479 if (ret != 0) 2480 return ret; 2481 2482 WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0); 2483 2484 vmbus_request_offers(); 2485 2486 if (wait_for_completion_timeout( 2487 &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0) 2488 pr_err("Some vmbus device is missing after suspending?\n"); 2489 2490 /* Reset the event for the next suspend. */ 2491 reinit_completion(&vmbus_connection.ready_for_suspend_event); 2492 2493 return 0; 2494 } 2495 #else 2496 #define vmbus_bus_suspend NULL 2497 #define vmbus_bus_resume NULL 2498 #endif /* CONFIG_PM_SLEEP */ 2499 2500 static const struct acpi_device_id vmbus_acpi_device_ids[] = { 2501 {"VMBUS", 0}, 2502 {"VMBus", 0}, 2503 {"", 0}, 2504 }; 2505 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); 2506 2507 /* 2508 * Note: we must use the "no_irq" ops, otherwise hibernation can not work with 2509 * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in 2510 * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see 2511 * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() -> 2512 * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's 2513 * resume callback must also run via the "noirq" ops. 2514 * 2515 * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment 2516 * earlier in this file before vmbus_pm. 2517 */ 2518 2519 static const struct dev_pm_ops vmbus_bus_pm = { 2520 .suspend_noirq = NULL, 2521 .resume_noirq = NULL, 2522 .freeze_noirq = vmbus_bus_suspend, 2523 .thaw_noirq = vmbus_bus_resume, 2524 .poweroff_noirq = vmbus_bus_suspend, 2525 .restore_noirq = vmbus_bus_resume 2526 }; 2527 2528 static struct acpi_driver vmbus_acpi_driver = { 2529 .name = "vmbus", 2530 .ids = vmbus_acpi_device_ids, 2531 .ops = { 2532 .add = vmbus_acpi_add, 2533 .remove = vmbus_acpi_remove, 2534 }, 2535 .drv.pm = &vmbus_bus_pm, 2536 }; 2537 2538 static void hv_kexec_handler(void) 2539 { 2540 hv_stimer_global_cleanup(); 2541 vmbus_initiate_unload(false); 2542 /* Make sure conn_state is set as hv_synic_cleanup checks for it */ 2543 mb(); 2544 cpuhp_remove_state(hyperv_cpuhp_online); 2545 hyperv_cleanup(); 2546 }; 2547 2548 static void hv_crash_handler(struct pt_regs *regs) 2549 { 2550 int cpu; 2551 2552 vmbus_initiate_unload(true); 2553 /* 2554 * In crash handler we can't schedule synic cleanup for all CPUs, 2555 * doing the cleanup for current CPU only. This should be sufficient 2556 * for kdump. 2557 */ 2558 cpu = smp_processor_id(); 2559 hv_stimer_cleanup(cpu); 2560 hv_synic_disable_regs(cpu); 2561 hyperv_cleanup(); 2562 }; 2563 2564 static int hv_synic_suspend(void) 2565 { 2566 /* 2567 * When we reach here, all the non-boot CPUs have been offlined. 2568 * If we're in a legacy configuration where stimer Direct Mode is 2569 * not enabled, the stimers on the non-boot CPUs have been unbound 2570 * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() -> 2571 * hv_stimer_cleanup() -> clockevents_unbind_device(). 2572 * 2573 * hv_synic_suspend() only runs on CPU0 with interrupts disabled. 2574 * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because: 2575 * 1) it's unnecessary as interrupts remain disabled between 2576 * syscore_suspend() and syscore_resume(): see create_image() and 2577 * resume_target_kernel() 2578 * 2) the stimer on CPU0 is automatically disabled later by 2579 * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... 2580 * -> clockevents_shutdown() -> ... -> hv_ce_shutdown() 2581 * 3) a warning would be triggered if we call 2582 * clockevents_unbind_device(), which may sleep, in an 2583 * interrupts-disabled context. 2584 */ 2585 2586 hv_synic_disable_regs(0); 2587 2588 return 0; 2589 } 2590 2591 static void hv_synic_resume(void) 2592 { 2593 hv_synic_enable_regs(0); 2594 2595 /* 2596 * Note: we don't need to call hv_stimer_init(0), because the timer 2597 * on CPU0 is not unbound in hv_synic_suspend(), and the timer is 2598 * automatically re-enabled in timekeeping_resume(). 2599 */ 2600 } 2601 2602 /* The callbacks run only on CPU0, with irqs_disabled. */ 2603 static struct syscore_ops hv_synic_syscore_ops = { 2604 .suspend = hv_synic_suspend, 2605 .resume = hv_synic_resume, 2606 }; 2607 2608 static int __init hv_acpi_init(void) 2609 { 2610 int ret, t; 2611 2612 if (!hv_is_hyperv_initialized()) 2613 return -ENODEV; 2614 2615 init_completion(&probe_event); 2616 2617 /* 2618 * Get ACPI resources first. 2619 */ 2620 ret = acpi_bus_register_driver(&vmbus_acpi_driver); 2621 2622 if (ret) 2623 return ret; 2624 2625 t = wait_for_completion_timeout(&probe_event, 5*HZ); 2626 if (t == 0) { 2627 ret = -ETIMEDOUT; 2628 goto cleanup; 2629 } 2630 hv_debug_init(); 2631 2632 ret = vmbus_bus_init(); 2633 if (ret) 2634 goto cleanup; 2635 2636 hv_setup_kexec_handler(hv_kexec_handler); 2637 hv_setup_crash_handler(hv_crash_handler); 2638 2639 register_syscore_ops(&hv_synic_syscore_ops); 2640 2641 return 0; 2642 2643 cleanup: 2644 acpi_bus_unregister_driver(&vmbus_acpi_driver); 2645 hv_acpi_dev = NULL; 2646 return ret; 2647 } 2648 2649 static void __exit vmbus_exit(void) 2650 { 2651 int cpu; 2652 2653 unregister_syscore_ops(&hv_synic_syscore_ops); 2654 2655 hv_remove_kexec_handler(); 2656 hv_remove_crash_handler(); 2657 vmbus_connection.conn_state = DISCONNECTED; 2658 hv_stimer_global_cleanup(); 2659 vmbus_disconnect(); 2660 hv_remove_vmbus_irq(); 2661 for_each_online_cpu(cpu) { 2662 struct hv_per_cpu_context *hv_cpu 2663 = per_cpu_ptr(hv_context.cpu_context, cpu); 2664 2665 tasklet_kill(&hv_cpu->msg_dpc); 2666 } 2667 hv_debug_rm_all_dir(); 2668 2669 vmbus_free_channels(); 2670 kfree(vmbus_connection.channels); 2671 2672 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 2673 kmsg_dump_unregister(&hv_kmsg_dumper); 2674 unregister_die_notifier(&hyperv_die_block); 2675 atomic_notifier_chain_unregister(&panic_notifier_list, 2676 &hyperv_panic_block); 2677 } 2678 2679 free_page((unsigned long)hv_panic_page); 2680 unregister_sysctl_table(hv_ctl_table_hdr); 2681 hv_ctl_table_hdr = NULL; 2682 bus_unregister(&hv_bus); 2683 2684 cpuhp_remove_state(hyperv_cpuhp_online); 2685 hv_synic_free(); 2686 acpi_bus_unregister_driver(&vmbus_acpi_driver); 2687 } 2688 2689 2690 MODULE_LICENSE("GPL"); 2691 MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver"); 2692 2693 subsys_initcall(hv_acpi_init); 2694 module_exit(vmbus_exit); 2695