1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 * K. Y. Srinivasan <kys@microsoft.com> 9 */ 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/device.h> 15 #include <linux/interrupt.h> 16 #include <linux/sysctl.h> 17 #include <linux/slab.h> 18 #include <linux/acpi.h> 19 #include <linux/completion.h> 20 #include <linux/hyperv.h> 21 #include <linux/kernel_stat.h> 22 #include <linux/clockchips.h> 23 #include <linux/cpu.h> 24 #include <linux/sched/task_stack.h> 25 26 #include <linux/delay.h> 27 #include <linux/notifier.h> 28 #include <linux/ptrace.h> 29 #include <linux/screen_info.h> 30 #include <linux/kdebug.h> 31 #include <linux/efi.h> 32 #include <linux/random.h> 33 #include <linux/kernel.h> 34 #include <linux/syscore_ops.h> 35 #include <clocksource/hyperv_timer.h> 36 #include "hyperv_vmbus.h" 37 38 struct vmbus_dynid { 39 struct list_head node; 40 struct hv_vmbus_device_id id; 41 }; 42 43 static struct acpi_device *hv_acpi_dev; 44 45 static struct completion probe_event; 46 47 static int hyperv_cpuhp_online; 48 49 static void *hv_panic_page; 50 51 /* 52 * Boolean to control whether to report panic messages over Hyper-V. 53 * 54 * It can be set via /proc/sys/kernel/hyperv/record_panic_msg 55 */ 56 static int sysctl_record_panic_msg = 1; 57 58 static int hyperv_report_reg(void) 59 { 60 return !sysctl_record_panic_msg || !hv_panic_page; 61 } 62 63 static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, 64 void *args) 65 { 66 struct pt_regs *regs; 67 68 vmbus_initiate_unload(true); 69 70 /* 71 * Hyper-V should be notified only once about a panic. If we will be 72 * doing hyperv_report_panic_msg() later with kmsg data, don't do 73 * the notification here. 74 */ 75 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 76 && hyperv_report_reg()) { 77 regs = current_pt_regs(); 78 hyperv_report_panic(regs, val, false); 79 } 80 return NOTIFY_DONE; 81 } 82 83 static int hyperv_die_event(struct notifier_block *nb, unsigned long val, 84 void *args) 85 { 86 struct die_args *die = (struct die_args *)args; 87 struct pt_regs *regs = die->regs; 88 89 /* Don't notify Hyper-V if the die event is other than oops */ 90 if (val != DIE_OOPS) 91 return NOTIFY_DONE; 92 93 /* 94 * Hyper-V should be notified only once about a panic. If we will be 95 * doing hyperv_report_panic_msg() later with kmsg data, don't do 96 * the notification here. 97 */ 98 if (hyperv_report_reg()) 99 hyperv_report_panic(regs, val, true); 100 return NOTIFY_DONE; 101 } 102 103 static struct notifier_block hyperv_die_block = { 104 .notifier_call = hyperv_die_event, 105 }; 106 static struct notifier_block hyperv_panic_block = { 107 .notifier_call = hyperv_panic_event, 108 }; 109 110 static const char *fb_mmio_name = "fb_range"; 111 static struct resource *fb_mmio; 112 static struct resource *hyperv_mmio; 113 static DEFINE_MUTEX(hyperv_mmio_lock); 114 115 static int vmbus_exists(void) 116 { 117 if (hv_acpi_dev == NULL) 118 return -ENODEV; 119 120 return 0; 121 } 122 123 static u8 channel_monitor_group(const struct vmbus_channel *channel) 124 { 125 return (u8)channel->offermsg.monitorid / 32; 126 } 127 128 static u8 channel_monitor_offset(const struct vmbus_channel *channel) 129 { 130 return (u8)channel->offermsg.monitorid % 32; 131 } 132 133 static u32 channel_pending(const struct vmbus_channel *channel, 134 const struct hv_monitor_page *monitor_page) 135 { 136 u8 monitor_group = channel_monitor_group(channel); 137 138 return monitor_page->trigger_group[monitor_group].pending; 139 } 140 141 static u32 channel_latency(const struct vmbus_channel *channel, 142 const struct hv_monitor_page *monitor_page) 143 { 144 u8 monitor_group = channel_monitor_group(channel); 145 u8 monitor_offset = channel_monitor_offset(channel); 146 147 return monitor_page->latency[monitor_group][monitor_offset]; 148 } 149 150 static u32 channel_conn_id(struct vmbus_channel *channel, 151 struct hv_monitor_page *monitor_page) 152 { 153 u8 monitor_group = channel_monitor_group(channel); 154 u8 monitor_offset = channel_monitor_offset(channel); 155 return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; 156 } 157 158 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, 159 char *buf) 160 { 161 struct hv_device *hv_dev = device_to_hv_device(dev); 162 163 if (!hv_dev->channel) 164 return -ENODEV; 165 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); 166 } 167 static DEVICE_ATTR_RO(id); 168 169 static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, 170 char *buf) 171 { 172 struct hv_device *hv_dev = device_to_hv_device(dev); 173 174 if (!hv_dev->channel) 175 return -ENODEV; 176 return sprintf(buf, "%d\n", hv_dev->channel->state); 177 } 178 static DEVICE_ATTR_RO(state); 179 180 static ssize_t monitor_id_show(struct device *dev, 181 struct device_attribute *dev_attr, char *buf) 182 { 183 struct hv_device *hv_dev = device_to_hv_device(dev); 184 185 if (!hv_dev->channel) 186 return -ENODEV; 187 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid); 188 } 189 static DEVICE_ATTR_RO(monitor_id); 190 191 static ssize_t class_id_show(struct device *dev, 192 struct device_attribute *dev_attr, char *buf) 193 { 194 struct hv_device *hv_dev = device_to_hv_device(dev); 195 196 if (!hv_dev->channel) 197 return -ENODEV; 198 return sprintf(buf, "{%pUl}\n", 199 &hv_dev->channel->offermsg.offer.if_type); 200 } 201 static DEVICE_ATTR_RO(class_id); 202 203 static ssize_t device_id_show(struct device *dev, 204 struct device_attribute *dev_attr, char *buf) 205 { 206 struct hv_device *hv_dev = device_to_hv_device(dev); 207 208 if (!hv_dev->channel) 209 return -ENODEV; 210 return sprintf(buf, "{%pUl}\n", 211 &hv_dev->channel->offermsg.offer.if_instance); 212 } 213 static DEVICE_ATTR_RO(device_id); 214 215 static ssize_t modalias_show(struct device *dev, 216 struct device_attribute *dev_attr, char *buf) 217 { 218 struct hv_device *hv_dev = device_to_hv_device(dev); 219 220 return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type); 221 } 222 static DEVICE_ATTR_RO(modalias); 223 224 #ifdef CONFIG_NUMA 225 static ssize_t numa_node_show(struct device *dev, 226 struct device_attribute *attr, char *buf) 227 { 228 struct hv_device *hv_dev = device_to_hv_device(dev); 229 230 if (!hv_dev->channel) 231 return -ENODEV; 232 233 return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu)); 234 } 235 static DEVICE_ATTR_RO(numa_node); 236 #endif 237 238 static ssize_t server_monitor_pending_show(struct device *dev, 239 struct device_attribute *dev_attr, 240 char *buf) 241 { 242 struct hv_device *hv_dev = device_to_hv_device(dev); 243 244 if (!hv_dev->channel) 245 return -ENODEV; 246 return sprintf(buf, "%d\n", 247 channel_pending(hv_dev->channel, 248 vmbus_connection.monitor_pages[0])); 249 } 250 static DEVICE_ATTR_RO(server_monitor_pending); 251 252 static ssize_t client_monitor_pending_show(struct device *dev, 253 struct device_attribute *dev_attr, 254 char *buf) 255 { 256 struct hv_device *hv_dev = device_to_hv_device(dev); 257 258 if (!hv_dev->channel) 259 return -ENODEV; 260 return sprintf(buf, "%d\n", 261 channel_pending(hv_dev->channel, 262 vmbus_connection.monitor_pages[1])); 263 } 264 static DEVICE_ATTR_RO(client_monitor_pending); 265 266 static ssize_t server_monitor_latency_show(struct device *dev, 267 struct device_attribute *dev_attr, 268 char *buf) 269 { 270 struct hv_device *hv_dev = device_to_hv_device(dev); 271 272 if (!hv_dev->channel) 273 return -ENODEV; 274 return sprintf(buf, "%d\n", 275 channel_latency(hv_dev->channel, 276 vmbus_connection.monitor_pages[0])); 277 } 278 static DEVICE_ATTR_RO(server_monitor_latency); 279 280 static ssize_t client_monitor_latency_show(struct device *dev, 281 struct device_attribute *dev_attr, 282 char *buf) 283 { 284 struct hv_device *hv_dev = device_to_hv_device(dev); 285 286 if (!hv_dev->channel) 287 return -ENODEV; 288 return sprintf(buf, "%d\n", 289 channel_latency(hv_dev->channel, 290 vmbus_connection.monitor_pages[1])); 291 } 292 static DEVICE_ATTR_RO(client_monitor_latency); 293 294 static ssize_t server_monitor_conn_id_show(struct device *dev, 295 struct device_attribute *dev_attr, 296 char *buf) 297 { 298 struct hv_device *hv_dev = device_to_hv_device(dev); 299 300 if (!hv_dev->channel) 301 return -ENODEV; 302 return sprintf(buf, "%d\n", 303 channel_conn_id(hv_dev->channel, 304 vmbus_connection.monitor_pages[0])); 305 } 306 static DEVICE_ATTR_RO(server_monitor_conn_id); 307 308 static ssize_t client_monitor_conn_id_show(struct device *dev, 309 struct device_attribute *dev_attr, 310 char *buf) 311 { 312 struct hv_device *hv_dev = device_to_hv_device(dev); 313 314 if (!hv_dev->channel) 315 return -ENODEV; 316 return sprintf(buf, "%d\n", 317 channel_conn_id(hv_dev->channel, 318 vmbus_connection.monitor_pages[1])); 319 } 320 static DEVICE_ATTR_RO(client_monitor_conn_id); 321 322 static ssize_t out_intr_mask_show(struct device *dev, 323 struct device_attribute *dev_attr, char *buf) 324 { 325 struct hv_device *hv_dev = device_to_hv_device(dev); 326 struct hv_ring_buffer_debug_info outbound; 327 int ret; 328 329 if (!hv_dev->channel) 330 return -ENODEV; 331 332 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 333 &outbound); 334 if (ret < 0) 335 return ret; 336 337 return sprintf(buf, "%d\n", outbound.current_interrupt_mask); 338 } 339 static DEVICE_ATTR_RO(out_intr_mask); 340 341 static ssize_t out_read_index_show(struct device *dev, 342 struct device_attribute *dev_attr, char *buf) 343 { 344 struct hv_device *hv_dev = device_to_hv_device(dev); 345 struct hv_ring_buffer_debug_info outbound; 346 int ret; 347 348 if (!hv_dev->channel) 349 return -ENODEV; 350 351 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 352 &outbound); 353 if (ret < 0) 354 return ret; 355 return sprintf(buf, "%d\n", outbound.current_read_index); 356 } 357 static DEVICE_ATTR_RO(out_read_index); 358 359 static ssize_t out_write_index_show(struct device *dev, 360 struct device_attribute *dev_attr, 361 char *buf) 362 { 363 struct hv_device *hv_dev = device_to_hv_device(dev); 364 struct hv_ring_buffer_debug_info outbound; 365 int ret; 366 367 if (!hv_dev->channel) 368 return -ENODEV; 369 370 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 371 &outbound); 372 if (ret < 0) 373 return ret; 374 return sprintf(buf, "%d\n", outbound.current_write_index); 375 } 376 static DEVICE_ATTR_RO(out_write_index); 377 378 static ssize_t out_read_bytes_avail_show(struct device *dev, 379 struct device_attribute *dev_attr, 380 char *buf) 381 { 382 struct hv_device *hv_dev = device_to_hv_device(dev); 383 struct hv_ring_buffer_debug_info outbound; 384 int ret; 385 386 if (!hv_dev->channel) 387 return -ENODEV; 388 389 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 390 &outbound); 391 if (ret < 0) 392 return ret; 393 return sprintf(buf, "%d\n", outbound.bytes_avail_toread); 394 } 395 static DEVICE_ATTR_RO(out_read_bytes_avail); 396 397 static ssize_t out_write_bytes_avail_show(struct device *dev, 398 struct device_attribute *dev_attr, 399 char *buf) 400 { 401 struct hv_device *hv_dev = device_to_hv_device(dev); 402 struct hv_ring_buffer_debug_info outbound; 403 int ret; 404 405 if (!hv_dev->channel) 406 return -ENODEV; 407 408 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 409 &outbound); 410 if (ret < 0) 411 return ret; 412 return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); 413 } 414 static DEVICE_ATTR_RO(out_write_bytes_avail); 415 416 static ssize_t in_intr_mask_show(struct device *dev, 417 struct device_attribute *dev_attr, char *buf) 418 { 419 struct hv_device *hv_dev = device_to_hv_device(dev); 420 struct hv_ring_buffer_debug_info inbound; 421 int ret; 422 423 if (!hv_dev->channel) 424 return -ENODEV; 425 426 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 427 if (ret < 0) 428 return ret; 429 430 return sprintf(buf, "%d\n", inbound.current_interrupt_mask); 431 } 432 static DEVICE_ATTR_RO(in_intr_mask); 433 434 static ssize_t in_read_index_show(struct device *dev, 435 struct device_attribute *dev_attr, char *buf) 436 { 437 struct hv_device *hv_dev = device_to_hv_device(dev); 438 struct hv_ring_buffer_debug_info inbound; 439 int ret; 440 441 if (!hv_dev->channel) 442 return -ENODEV; 443 444 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 445 if (ret < 0) 446 return ret; 447 448 return sprintf(buf, "%d\n", inbound.current_read_index); 449 } 450 static DEVICE_ATTR_RO(in_read_index); 451 452 static ssize_t in_write_index_show(struct device *dev, 453 struct device_attribute *dev_attr, char *buf) 454 { 455 struct hv_device *hv_dev = device_to_hv_device(dev); 456 struct hv_ring_buffer_debug_info inbound; 457 int ret; 458 459 if (!hv_dev->channel) 460 return -ENODEV; 461 462 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 463 if (ret < 0) 464 return ret; 465 466 return sprintf(buf, "%d\n", inbound.current_write_index); 467 } 468 static DEVICE_ATTR_RO(in_write_index); 469 470 static ssize_t in_read_bytes_avail_show(struct device *dev, 471 struct device_attribute *dev_attr, 472 char *buf) 473 { 474 struct hv_device *hv_dev = device_to_hv_device(dev); 475 struct hv_ring_buffer_debug_info inbound; 476 int ret; 477 478 if (!hv_dev->channel) 479 return -ENODEV; 480 481 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 482 if (ret < 0) 483 return ret; 484 485 return sprintf(buf, "%d\n", inbound.bytes_avail_toread); 486 } 487 static DEVICE_ATTR_RO(in_read_bytes_avail); 488 489 static ssize_t in_write_bytes_avail_show(struct device *dev, 490 struct device_attribute *dev_attr, 491 char *buf) 492 { 493 struct hv_device *hv_dev = device_to_hv_device(dev); 494 struct hv_ring_buffer_debug_info inbound; 495 int ret; 496 497 if (!hv_dev->channel) 498 return -ENODEV; 499 500 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 501 if (ret < 0) 502 return ret; 503 504 return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); 505 } 506 static DEVICE_ATTR_RO(in_write_bytes_avail); 507 508 static ssize_t channel_vp_mapping_show(struct device *dev, 509 struct device_attribute *dev_attr, 510 char *buf) 511 { 512 struct hv_device *hv_dev = device_to_hv_device(dev); 513 struct vmbus_channel *channel = hv_dev->channel, *cur_sc; 514 int buf_size = PAGE_SIZE, n_written, tot_written; 515 struct list_head *cur; 516 517 if (!channel) 518 return -ENODEV; 519 520 mutex_lock(&vmbus_connection.channel_mutex); 521 522 tot_written = snprintf(buf, buf_size, "%u:%u\n", 523 channel->offermsg.child_relid, channel->target_cpu); 524 525 list_for_each(cur, &channel->sc_list) { 526 if (tot_written >= buf_size - 1) 527 break; 528 529 cur_sc = list_entry(cur, struct vmbus_channel, sc_list); 530 n_written = scnprintf(buf + tot_written, 531 buf_size - tot_written, 532 "%u:%u\n", 533 cur_sc->offermsg.child_relid, 534 cur_sc->target_cpu); 535 tot_written += n_written; 536 } 537 538 mutex_unlock(&vmbus_connection.channel_mutex); 539 540 return tot_written; 541 } 542 static DEVICE_ATTR_RO(channel_vp_mapping); 543 544 static ssize_t vendor_show(struct device *dev, 545 struct device_attribute *dev_attr, 546 char *buf) 547 { 548 struct hv_device *hv_dev = device_to_hv_device(dev); 549 return sprintf(buf, "0x%x\n", hv_dev->vendor_id); 550 } 551 static DEVICE_ATTR_RO(vendor); 552 553 static ssize_t device_show(struct device *dev, 554 struct device_attribute *dev_attr, 555 char *buf) 556 { 557 struct hv_device *hv_dev = device_to_hv_device(dev); 558 return sprintf(buf, "0x%x\n", hv_dev->device_id); 559 } 560 static DEVICE_ATTR_RO(device); 561 562 static ssize_t driver_override_store(struct device *dev, 563 struct device_attribute *attr, 564 const char *buf, size_t count) 565 { 566 struct hv_device *hv_dev = device_to_hv_device(dev); 567 char *driver_override, *old, *cp; 568 569 /* We need to keep extra room for a newline */ 570 if (count >= (PAGE_SIZE - 1)) 571 return -EINVAL; 572 573 driver_override = kstrndup(buf, count, GFP_KERNEL); 574 if (!driver_override) 575 return -ENOMEM; 576 577 cp = strchr(driver_override, '\n'); 578 if (cp) 579 *cp = '\0'; 580 581 device_lock(dev); 582 old = hv_dev->driver_override; 583 if (strlen(driver_override)) { 584 hv_dev->driver_override = driver_override; 585 } else { 586 kfree(driver_override); 587 hv_dev->driver_override = NULL; 588 } 589 device_unlock(dev); 590 591 kfree(old); 592 593 return count; 594 } 595 596 static ssize_t driver_override_show(struct device *dev, 597 struct device_attribute *attr, char *buf) 598 { 599 struct hv_device *hv_dev = device_to_hv_device(dev); 600 ssize_t len; 601 602 device_lock(dev); 603 len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override); 604 device_unlock(dev); 605 606 return len; 607 } 608 static DEVICE_ATTR_RW(driver_override); 609 610 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ 611 static struct attribute *vmbus_dev_attrs[] = { 612 &dev_attr_id.attr, 613 &dev_attr_state.attr, 614 &dev_attr_monitor_id.attr, 615 &dev_attr_class_id.attr, 616 &dev_attr_device_id.attr, 617 &dev_attr_modalias.attr, 618 #ifdef CONFIG_NUMA 619 &dev_attr_numa_node.attr, 620 #endif 621 &dev_attr_server_monitor_pending.attr, 622 &dev_attr_client_monitor_pending.attr, 623 &dev_attr_server_monitor_latency.attr, 624 &dev_attr_client_monitor_latency.attr, 625 &dev_attr_server_monitor_conn_id.attr, 626 &dev_attr_client_monitor_conn_id.attr, 627 &dev_attr_out_intr_mask.attr, 628 &dev_attr_out_read_index.attr, 629 &dev_attr_out_write_index.attr, 630 &dev_attr_out_read_bytes_avail.attr, 631 &dev_attr_out_write_bytes_avail.attr, 632 &dev_attr_in_intr_mask.attr, 633 &dev_attr_in_read_index.attr, 634 &dev_attr_in_write_index.attr, 635 &dev_attr_in_read_bytes_avail.attr, 636 &dev_attr_in_write_bytes_avail.attr, 637 &dev_attr_channel_vp_mapping.attr, 638 &dev_attr_vendor.attr, 639 &dev_attr_device.attr, 640 &dev_attr_driver_override.attr, 641 NULL, 642 }; 643 644 /* 645 * Device-level attribute_group callback function. Returns the permission for 646 * each attribute, and returns 0 if an attribute is not visible. 647 */ 648 static umode_t vmbus_dev_attr_is_visible(struct kobject *kobj, 649 struct attribute *attr, int idx) 650 { 651 struct device *dev = kobj_to_dev(kobj); 652 const struct hv_device *hv_dev = device_to_hv_device(dev); 653 654 /* Hide the monitor attributes if the monitor mechanism is not used. */ 655 if (!hv_dev->channel->offermsg.monitor_allocated && 656 (attr == &dev_attr_monitor_id.attr || 657 attr == &dev_attr_server_monitor_pending.attr || 658 attr == &dev_attr_client_monitor_pending.attr || 659 attr == &dev_attr_server_monitor_latency.attr || 660 attr == &dev_attr_client_monitor_latency.attr || 661 attr == &dev_attr_server_monitor_conn_id.attr || 662 attr == &dev_attr_client_monitor_conn_id.attr)) 663 return 0; 664 665 return attr->mode; 666 } 667 668 static const struct attribute_group vmbus_dev_group = { 669 .attrs = vmbus_dev_attrs, 670 .is_visible = vmbus_dev_attr_is_visible 671 }; 672 __ATTRIBUTE_GROUPS(vmbus_dev); 673 674 /* 675 * vmbus_uevent - add uevent for our device 676 * 677 * This routine is invoked when a device is added or removed on the vmbus to 678 * generate a uevent to udev in the userspace. The udev will then look at its 679 * rule and the uevent generated here to load the appropriate driver 680 * 681 * The alias string will be of the form vmbus:guid where guid is the string 682 * representation of the device guid (each byte of the guid will be 683 * represented with two hex characters. 684 */ 685 static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) 686 { 687 struct hv_device *dev = device_to_hv_device(device); 688 const char *format = "MODALIAS=vmbus:%*phN"; 689 690 return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type); 691 } 692 693 static const struct hv_vmbus_device_id * 694 hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid) 695 { 696 if (id == NULL) 697 return NULL; /* empty device table */ 698 699 for (; !guid_is_null(&id->guid); id++) 700 if (guid_equal(&id->guid, guid)) 701 return id; 702 703 return NULL; 704 } 705 706 static const struct hv_vmbus_device_id * 707 hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid) 708 { 709 const struct hv_vmbus_device_id *id = NULL; 710 struct vmbus_dynid *dynid; 711 712 spin_lock(&drv->dynids.lock); 713 list_for_each_entry(dynid, &drv->dynids.list, node) { 714 if (guid_equal(&dynid->id.guid, guid)) { 715 id = &dynid->id; 716 break; 717 } 718 } 719 spin_unlock(&drv->dynids.lock); 720 721 return id; 722 } 723 724 static const struct hv_vmbus_device_id vmbus_device_null; 725 726 /* 727 * Return a matching hv_vmbus_device_id pointer. 728 * If there is no match, return NULL. 729 */ 730 static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv, 731 struct hv_device *dev) 732 { 733 const guid_t *guid = &dev->dev_type; 734 const struct hv_vmbus_device_id *id; 735 736 /* When driver_override is set, only bind to the matching driver */ 737 if (dev->driver_override && strcmp(dev->driver_override, drv->name)) 738 return NULL; 739 740 /* Look at the dynamic ids first, before the static ones */ 741 id = hv_vmbus_dynid_match(drv, guid); 742 if (!id) 743 id = hv_vmbus_dev_match(drv->id_table, guid); 744 745 /* driver_override will always match, send a dummy id */ 746 if (!id && dev->driver_override) 747 id = &vmbus_device_null; 748 749 return id; 750 } 751 752 /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */ 753 static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid) 754 { 755 struct vmbus_dynid *dynid; 756 757 dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); 758 if (!dynid) 759 return -ENOMEM; 760 761 dynid->id.guid = *guid; 762 763 spin_lock(&drv->dynids.lock); 764 list_add_tail(&dynid->node, &drv->dynids.list); 765 spin_unlock(&drv->dynids.lock); 766 767 return driver_attach(&drv->driver); 768 } 769 770 static void vmbus_free_dynids(struct hv_driver *drv) 771 { 772 struct vmbus_dynid *dynid, *n; 773 774 spin_lock(&drv->dynids.lock); 775 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 776 list_del(&dynid->node); 777 kfree(dynid); 778 } 779 spin_unlock(&drv->dynids.lock); 780 } 781 782 /* 783 * store_new_id - sysfs frontend to vmbus_add_dynid() 784 * 785 * Allow GUIDs to be added to an existing driver via sysfs. 786 */ 787 static ssize_t new_id_store(struct device_driver *driver, const char *buf, 788 size_t count) 789 { 790 struct hv_driver *drv = drv_to_hv_drv(driver); 791 guid_t guid; 792 ssize_t retval; 793 794 retval = guid_parse(buf, &guid); 795 if (retval) 796 return retval; 797 798 if (hv_vmbus_dynid_match(drv, &guid)) 799 return -EEXIST; 800 801 retval = vmbus_add_dynid(drv, &guid); 802 if (retval) 803 return retval; 804 return count; 805 } 806 static DRIVER_ATTR_WO(new_id); 807 808 /* 809 * store_remove_id - remove a PCI device ID from this driver 810 * 811 * Removes a dynamic pci device ID to this driver. 812 */ 813 static ssize_t remove_id_store(struct device_driver *driver, const char *buf, 814 size_t count) 815 { 816 struct hv_driver *drv = drv_to_hv_drv(driver); 817 struct vmbus_dynid *dynid, *n; 818 guid_t guid; 819 ssize_t retval; 820 821 retval = guid_parse(buf, &guid); 822 if (retval) 823 return retval; 824 825 retval = -ENODEV; 826 spin_lock(&drv->dynids.lock); 827 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 828 struct hv_vmbus_device_id *id = &dynid->id; 829 830 if (guid_equal(&id->guid, &guid)) { 831 list_del(&dynid->node); 832 kfree(dynid); 833 retval = count; 834 break; 835 } 836 } 837 spin_unlock(&drv->dynids.lock); 838 839 return retval; 840 } 841 static DRIVER_ATTR_WO(remove_id); 842 843 static struct attribute *vmbus_drv_attrs[] = { 844 &driver_attr_new_id.attr, 845 &driver_attr_remove_id.attr, 846 NULL, 847 }; 848 ATTRIBUTE_GROUPS(vmbus_drv); 849 850 851 /* 852 * vmbus_match - Attempt to match the specified device to the specified driver 853 */ 854 static int vmbus_match(struct device *device, struct device_driver *driver) 855 { 856 struct hv_driver *drv = drv_to_hv_drv(driver); 857 struct hv_device *hv_dev = device_to_hv_device(device); 858 859 /* The hv_sock driver handles all hv_sock offers. */ 860 if (is_hvsock_channel(hv_dev->channel)) 861 return drv->hvsock; 862 863 if (hv_vmbus_get_id(drv, hv_dev)) 864 return 1; 865 866 return 0; 867 } 868 869 /* 870 * vmbus_probe - Add the new vmbus's child device 871 */ 872 static int vmbus_probe(struct device *child_device) 873 { 874 int ret = 0; 875 struct hv_driver *drv = 876 drv_to_hv_drv(child_device->driver); 877 struct hv_device *dev = device_to_hv_device(child_device); 878 const struct hv_vmbus_device_id *dev_id; 879 880 dev_id = hv_vmbus_get_id(drv, dev); 881 if (drv->probe) { 882 ret = drv->probe(dev, dev_id); 883 if (ret != 0) 884 pr_err("probe failed for device %s (%d)\n", 885 dev_name(child_device), ret); 886 887 } else { 888 pr_err("probe not set for driver %s\n", 889 dev_name(child_device)); 890 ret = -ENODEV; 891 } 892 return ret; 893 } 894 895 /* 896 * vmbus_remove - Remove a vmbus device 897 */ 898 static int vmbus_remove(struct device *child_device) 899 { 900 struct hv_driver *drv; 901 struct hv_device *dev = device_to_hv_device(child_device); 902 903 if (child_device->driver) { 904 drv = drv_to_hv_drv(child_device->driver); 905 if (drv->remove) 906 drv->remove(dev); 907 } 908 909 return 0; 910 } 911 912 913 /* 914 * vmbus_shutdown - Shutdown a vmbus device 915 */ 916 static void vmbus_shutdown(struct device *child_device) 917 { 918 struct hv_driver *drv; 919 struct hv_device *dev = device_to_hv_device(child_device); 920 921 922 /* The device may not be attached yet */ 923 if (!child_device->driver) 924 return; 925 926 drv = drv_to_hv_drv(child_device->driver); 927 928 if (drv->shutdown) 929 drv->shutdown(dev); 930 } 931 932 #ifdef CONFIG_PM_SLEEP 933 /* 934 * vmbus_suspend - Suspend a vmbus device 935 */ 936 static int vmbus_suspend(struct device *child_device) 937 { 938 struct hv_driver *drv; 939 struct hv_device *dev = device_to_hv_device(child_device); 940 941 /* The device may not be attached yet */ 942 if (!child_device->driver) 943 return 0; 944 945 drv = drv_to_hv_drv(child_device->driver); 946 if (!drv->suspend) 947 return -EOPNOTSUPP; 948 949 return drv->suspend(dev); 950 } 951 952 /* 953 * vmbus_resume - Resume a vmbus device 954 */ 955 static int vmbus_resume(struct device *child_device) 956 { 957 struct hv_driver *drv; 958 struct hv_device *dev = device_to_hv_device(child_device); 959 960 /* The device may not be attached yet */ 961 if (!child_device->driver) 962 return 0; 963 964 drv = drv_to_hv_drv(child_device->driver); 965 if (!drv->resume) 966 return -EOPNOTSUPP; 967 968 return drv->resume(dev); 969 } 970 #else 971 #define vmbus_suspend NULL 972 #define vmbus_resume NULL 973 #endif /* CONFIG_PM_SLEEP */ 974 975 /* 976 * vmbus_device_release - Final callback release of the vmbus child device 977 */ 978 static void vmbus_device_release(struct device *device) 979 { 980 struct hv_device *hv_dev = device_to_hv_device(device); 981 struct vmbus_channel *channel = hv_dev->channel; 982 983 hv_debug_rm_dev_dir(hv_dev); 984 985 mutex_lock(&vmbus_connection.channel_mutex); 986 hv_process_channel_removal(channel); 987 mutex_unlock(&vmbus_connection.channel_mutex); 988 kfree(hv_dev); 989 } 990 991 /* 992 * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm. 993 * 994 * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we 995 * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there 996 * is no way to wake up a Generation-2 VM. 997 * 998 * The other 4 ops are for hibernation. 999 */ 1000 1001 static const struct dev_pm_ops vmbus_pm = { 1002 .suspend_noirq = NULL, 1003 .resume_noirq = NULL, 1004 .freeze_noirq = vmbus_suspend, 1005 .thaw_noirq = vmbus_resume, 1006 .poweroff_noirq = vmbus_suspend, 1007 .restore_noirq = vmbus_resume, 1008 }; 1009 1010 /* The one and only one */ 1011 static struct bus_type hv_bus = { 1012 .name = "vmbus", 1013 .match = vmbus_match, 1014 .shutdown = vmbus_shutdown, 1015 .remove = vmbus_remove, 1016 .probe = vmbus_probe, 1017 .uevent = vmbus_uevent, 1018 .dev_groups = vmbus_dev_groups, 1019 .drv_groups = vmbus_drv_groups, 1020 .pm = &vmbus_pm, 1021 }; 1022 1023 struct onmessage_work_context { 1024 struct work_struct work; 1025 struct { 1026 struct hv_message_header header; 1027 u8 payload[]; 1028 } msg; 1029 }; 1030 1031 static void vmbus_onmessage_work(struct work_struct *work) 1032 { 1033 struct onmessage_work_context *ctx; 1034 1035 /* Do not process messages if we're in DISCONNECTED state */ 1036 if (vmbus_connection.conn_state == DISCONNECTED) 1037 return; 1038 1039 ctx = container_of(work, struct onmessage_work_context, 1040 work); 1041 vmbus_onmessage((struct vmbus_channel_message_header *) 1042 &ctx->msg.payload); 1043 kfree(ctx); 1044 } 1045 1046 void vmbus_on_msg_dpc(unsigned long data) 1047 { 1048 struct hv_per_cpu_context *hv_cpu = (void *)data; 1049 void *page_addr = hv_cpu->synic_message_page; 1050 struct hv_message *msg = (struct hv_message *)page_addr + 1051 VMBUS_MESSAGE_SINT; 1052 struct vmbus_channel_message_header *hdr; 1053 const struct vmbus_channel_message_table_entry *entry; 1054 struct onmessage_work_context *ctx; 1055 u32 message_type = msg->header.message_type; 1056 1057 /* 1058 * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as 1059 * it is being used in 'struct vmbus_channel_message_header' definition 1060 * which is supposed to match hypervisor ABI. 1061 */ 1062 BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32)); 1063 1064 if (message_type == HVMSG_NONE) 1065 /* no msg */ 1066 return; 1067 1068 hdr = (struct vmbus_channel_message_header *)msg->u.payload; 1069 1070 trace_vmbus_on_msg_dpc(hdr); 1071 1072 if (hdr->msgtype >= CHANNELMSG_COUNT) { 1073 WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); 1074 goto msg_handled; 1075 } 1076 1077 if (msg->header.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) { 1078 WARN_ONCE(1, "payload size is too large (%d)\n", 1079 msg->header.payload_size); 1080 goto msg_handled; 1081 } 1082 1083 entry = &channel_message_table[hdr->msgtype]; 1084 1085 if (!entry->message_handler) 1086 goto msg_handled; 1087 1088 if (msg->header.payload_size < entry->min_payload_len) { 1089 WARN_ONCE(1, "message too short: msgtype=%d len=%d\n", 1090 hdr->msgtype, msg->header.payload_size); 1091 goto msg_handled; 1092 } 1093 1094 if (entry->handler_type == VMHT_BLOCKING) { 1095 ctx = kmalloc(sizeof(*ctx) + msg->header.payload_size, 1096 GFP_ATOMIC); 1097 if (ctx == NULL) 1098 return; 1099 1100 INIT_WORK(&ctx->work, vmbus_onmessage_work); 1101 memcpy(&ctx->msg, msg, sizeof(msg->header) + 1102 msg->header.payload_size); 1103 1104 /* 1105 * The host can generate a rescind message while we 1106 * may still be handling the original offer. We deal with 1107 * this condition by relying on the synchronization provided 1108 * by offer_in_progress and by channel_mutex. See also the 1109 * inline comments in vmbus_onoffer_rescind(). 1110 */ 1111 switch (hdr->msgtype) { 1112 case CHANNELMSG_RESCIND_CHANNELOFFER: 1113 /* 1114 * If we are handling the rescind message; 1115 * schedule the work on the global work queue. 1116 * 1117 * The OFFER message and the RESCIND message should 1118 * not be handled by the same serialized work queue, 1119 * because the OFFER handler may call vmbus_open(), 1120 * which tries to open the channel by sending an 1121 * OPEN_CHANNEL message to the host and waits for 1122 * the host's response; however, if the host has 1123 * rescinded the channel before it receives the 1124 * OPEN_CHANNEL message, the host just silently 1125 * ignores the OPEN_CHANNEL message; as a result, 1126 * the guest's OFFER handler hangs for ever, if we 1127 * handle the RESCIND message in the same serialized 1128 * work queue: the RESCIND handler can not start to 1129 * run before the OFFER handler finishes. 1130 */ 1131 schedule_work(&ctx->work); 1132 break; 1133 1134 case CHANNELMSG_OFFERCHANNEL: 1135 /* 1136 * The host sends the offer message of a given channel 1137 * before sending the rescind message of the same 1138 * channel. These messages are sent to the guest's 1139 * connect CPU; the guest then starts processing them 1140 * in the tasklet handler on this CPU: 1141 * 1142 * VMBUS_CONNECT_CPU 1143 * 1144 * [vmbus_on_msg_dpc()] 1145 * atomic_inc() // CHANNELMSG_OFFERCHANNEL 1146 * queue_work() 1147 * ... 1148 * [vmbus_on_msg_dpc()] 1149 * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER 1150 * 1151 * We rely on the memory-ordering properties of the 1152 * queue_work() and schedule_work() primitives, which 1153 * guarantee that the atomic increment will be visible 1154 * to the CPUs which will execute the offer & rescind 1155 * works by the time these works will start execution. 1156 */ 1157 atomic_inc(&vmbus_connection.offer_in_progress); 1158 fallthrough; 1159 1160 default: 1161 queue_work(vmbus_connection.work_queue, &ctx->work); 1162 } 1163 } else 1164 entry->message_handler(hdr); 1165 1166 msg_handled: 1167 vmbus_signal_eom(msg, message_type); 1168 } 1169 1170 #ifdef CONFIG_PM_SLEEP 1171 /* 1172 * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for 1173 * hibernation, because hv_sock connections can not persist across hibernation. 1174 */ 1175 static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) 1176 { 1177 struct onmessage_work_context *ctx; 1178 struct vmbus_channel_rescind_offer *rescind; 1179 1180 WARN_ON(!is_hvsock_channel(channel)); 1181 1182 /* 1183 * Allocation size is small and the allocation should really not fail, 1184 * otherwise the state of the hv_sock connections ends up in limbo. 1185 */ 1186 ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind), 1187 GFP_KERNEL | __GFP_NOFAIL); 1188 1189 /* 1190 * So far, these are not really used by Linux. Just set them to the 1191 * reasonable values conforming to the definitions of the fields. 1192 */ 1193 ctx->msg.header.message_type = 1; 1194 ctx->msg.header.payload_size = sizeof(*rescind); 1195 1196 /* These values are actually used by Linux. */ 1197 rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload; 1198 rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER; 1199 rescind->child_relid = channel->offermsg.child_relid; 1200 1201 INIT_WORK(&ctx->work, vmbus_onmessage_work); 1202 1203 queue_work(vmbus_connection.work_queue, &ctx->work); 1204 } 1205 #endif /* CONFIG_PM_SLEEP */ 1206 1207 /* 1208 * Schedule all channels with events pending 1209 */ 1210 static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) 1211 { 1212 unsigned long *recv_int_page; 1213 u32 maxbits, relid; 1214 1215 if (vmbus_proto_version < VERSION_WIN8) { 1216 maxbits = MAX_NUM_CHANNELS_SUPPORTED; 1217 recv_int_page = vmbus_connection.recv_int_page; 1218 } else { 1219 /* 1220 * When the host is win8 and beyond, the event page 1221 * can be directly checked to get the id of the channel 1222 * that has the interrupt pending. 1223 */ 1224 void *page_addr = hv_cpu->synic_event_page; 1225 union hv_synic_event_flags *event 1226 = (union hv_synic_event_flags *)page_addr + 1227 VMBUS_MESSAGE_SINT; 1228 1229 maxbits = HV_EVENT_FLAGS_COUNT; 1230 recv_int_page = event->flags; 1231 } 1232 1233 if (unlikely(!recv_int_page)) 1234 return; 1235 1236 for_each_set_bit(relid, recv_int_page, maxbits) { 1237 void (*callback_fn)(void *context); 1238 struct vmbus_channel *channel; 1239 1240 if (!sync_test_and_clear_bit(relid, recv_int_page)) 1241 continue; 1242 1243 /* Special case - vmbus channel protocol msg */ 1244 if (relid == 0) 1245 continue; 1246 1247 /* 1248 * Pairs with the kfree_rcu() in vmbus_chan_release(). 1249 * Guarantees that the channel data structure doesn't 1250 * get freed while the channel pointer below is being 1251 * dereferenced. 1252 */ 1253 rcu_read_lock(); 1254 1255 /* Find channel based on relid */ 1256 channel = relid2channel(relid); 1257 if (channel == NULL) 1258 goto sched_unlock_rcu; 1259 1260 if (channel->rescind) 1261 goto sched_unlock_rcu; 1262 1263 /* 1264 * Make sure that the ring buffer data structure doesn't get 1265 * freed while we dereference the ring buffer pointer. Test 1266 * for the channel's onchannel_callback being NULL within a 1267 * sched_lock critical section. See also the inline comments 1268 * in vmbus_reset_channel_cb(). 1269 */ 1270 spin_lock(&channel->sched_lock); 1271 1272 callback_fn = channel->onchannel_callback; 1273 if (unlikely(callback_fn == NULL)) 1274 goto sched_unlock; 1275 1276 trace_vmbus_chan_sched(channel); 1277 1278 ++channel->interrupts; 1279 1280 switch (channel->callback_mode) { 1281 case HV_CALL_ISR: 1282 (*callback_fn)(channel->channel_callback_context); 1283 break; 1284 1285 case HV_CALL_BATCHED: 1286 hv_begin_read(&channel->inbound); 1287 fallthrough; 1288 case HV_CALL_DIRECT: 1289 tasklet_schedule(&channel->callback_event); 1290 } 1291 1292 sched_unlock: 1293 spin_unlock(&channel->sched_lock); 1294 sched_unlock_rcu: 1295 rcu_read_unlock(); 1296 } 1297 } 1298 1299 static void vmbus_isr(void) 1300 { 1301 struct hv_per_cpu_context *hv_cpu 1302 = this_cpu_ptr(hv_context.cpu_context); 1303 void *page_addr = hv_cpu->synic_event_page; 1304 struct hv_message *msg; 1305 union hv_synic_event_flags *event; 1306 bool handled = false; 1307 1308 if (unlikely(page_addr == NULL)) 1309 return; 1310 1311 event = (union hv_synic_event_flags *)page_addr + 1312 VMBUS_MESSAGE_SINT; 1313 /* 1314 * Check for events before checking for messages. This is the order 1315 * in which events and messages are checked in Windows guests on 1316 * Hyper-V, and the Windows team suggested we do the same. 1317 */ 1318 1319 if ((vmbus_proto_version == VERSION_WS2008) || 1320 (vmbus_proto_version == VERSION_WIN7)) { 1321 1322 /* Since we are a child, we only need to check bit 0 */ 1323 if (sync_test_and_clear_bit(0, event->flags)) 1324 handled = true; 1325 } else { 1326 /* 1327 * Our host is win8 or above. The signaling mechanism 1328 * has changed and we can directly look at the event page. 1329 * If bit n is set then we have an interrup on the channel 1330 * whose id is n. 1331 */ 1332 handled = true; 1333 } 1334 1335 if (handled) 1336 vmbus_chan_sched(hv_cpu); 1337 1338 page_addr = hv_cpu->synic_message_page; 1339 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 1340 1341 /* Check if there are actual msgs to be processed */ 1342 if (msg->header.message_type != HVMSG_NONE) { 1343 if (msg->header.message_type == HVMSG_TIMER_EXPIRED) { 1344 hv_stimer0_isr(); 1345 vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); 1346 } else 1347 tasklet_schedule(&hv_cpu->msg_dpc); 1348 } 1349 1350 add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); 1351 } 1352 1353 /* 1354 * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg 1355 * buffer and call into Hyper-V to transfer the data. 1356 */ 1357 static void hv_kmsg_dump(struct kmsg_dumper *dumper, 1358 enum kmsg_dump_reason reason) 1359 { 1360 size_t bytes_written; 1361 phys_addr_t panic_pa; 1362 1363 /* We are only interested in panics. */ 1364 if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) 1365 return; 1366 1367 panic_pa = virt_to_phys(hv_panic_page); 1368 1369 /* 1370 * Write dump contents to the page. No need to synchronize; panic should 1371 * be single-threaded. 1372 */ 1373 kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE, 1374 &bytes_written); 1375 if (bytes_written) 1376 hyperv_report_panic_msg(panic_pa, bytes_written); 1377 } 1378 1379 static struct kmsg_dumper hv_kmsg_dumper = { 1380 .dump = hv_kmsg_dump, 1381 }; 1382 1383 static struct ctl_table_header *hv_ctl_table_hdr; 1384 1385 /* 1386 * sysctl option to allow the user to control whether kmsg data should be 1387 * reported to Hyper-V on panic. 1388 */ 1389 static struct ctl_table hv_ctl_table[] = { 1390 { 1391 .procname = "hyperv_record_panic_msg", 1392 .data = &sysctl_record_panic_msg, 1393 .maxlen = sizeof(int), 1394 .mode = 0644, 1395 .proc_handler = proc_dointvec_minmax, 1396 .extra1 = SYSCTL_ZERO, 1397 .extra2 = SYSCTL_ONE 1398 }, 1399 {} 1400 }; 1401 1402 static struct ctl_table hv_root_table[] = { 1403 { 1404 .procname = "kernel", 1405 .mode = 0555, 1406 .child = hv_ctl_table 1407 }, 1408 {} 1409 }; 1410 1411 /* 1412 * vmbus_bus_init -Main vmbus driver initialization routine. 1413 * 1414 * Here, we 1415 * - initialize the vmbus driver context 1416 * - invoke the vmbus hv main init routine 1417 * - retrieve the channel offers 1418 */ 1419 static int vmbus_bus_init(void) 1420 { 1421 int ret; 1422 1423 ret = hv_init(); 1424 if (ret != 0) { 1425 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); 1426 return ret; 1427 } 1428 1429 ret = bus_register(&hv_bus); 1430 if (ret) 1431 return ret; 1432 1433 hv_setup_vmbus_irq(vmbus_isr); 1434 1435 ret = hv_synic_alloc(); 1436 if (ret) 1437 goto err_alloc; 1438 1439 /* 1440 * Initialize the per-cpu interrupt state and stimer state. 1441 * Then connect to the host. 1442 */ 1443 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", 1444 hv_synic_init, hv_synic_cleanup); 1445 if (ret < 0) 1446 goto err_cpuhp; 1447 hyperv_cpuhp_online = ret; 1448 1449 ret = vmbus_connect(); 1450 if (ret) 1451 goto err_connect; 1452 1453 /* 1454 * Only register if the crash MSRs are available 1455 */ 1456 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 1457 u64 hyperv_crash_ctl; 1458 /* 1459 * Sysctl registration is not fatal, since by default 1460 * reporting is enabled. 1461 */ 1462 hv_ctl_table_hdr = register_sysctl_table(hv_root_table); 1463 if (!hv_ctl_table_hdr) 1464 pr_err("Hyper-V: sysctl table register error"); 1465 1466 /* 1467 * Register for panic kmsg callback only if the right 1468 * capability is supported by the hypervisor. 1469 */ 1470 hv_get_crash_ctl(hyperv_crash_ctl); 1471 if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) { 1472 hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page(); 1473 if (hv_panic_page) { 1474 ret = kmsg_dump_register(&hv_kmsg_dumper); 1475 if (ret) { 1476 pr_err("Hyper-V: kmsg dump register " 1477 "error 0x%x\n", ret); 1478 hv_free_hyperv_page( 1479 (unsigned long)hv_panic_page); 1480 hv_panic_page = NULL; 1481 } 1482 } else 1483 pr_err("Hyper-V: panic message page memory " 1484 "allocation failed"); 1485 } 1486 1487 register_die_notifier(&hyperv_die_block); 1488 } 1489 1490 /* 1491 * Always register the panic notifier because we need to unload 1492 * the VMbus channel connection to prevent any VMbus 1493 * activity after the VM panics. 1494 */ 1495 atomic_notifier_chain_register(&panic_notifier_list, 1496 &hyperv_panic_block); 1497 1498 vmbus_request_offers(); 1499 1500 return 0; 1501 1502 err_connect: 1503 cpuhp_remove_state(hyperv_cpuhp_online); 1504 err_cpuhp: 1505 hv_synic_free(); 1506 err_alloc: 1507 hv_remove_vmbus_irq(); 1508 1509 bus_unregister(&hv_bus); 1510 unregister_sysctl_table(hv_ctl_table_hdr); 1511 hv_ctl_table_hdr = NULL; 1512 return ret; 1513 } 1514 1515 /** 1516 * __vmbus_child_driver_register() - Register a vmbus's driver 1517 * @hv_driver: Pointer to driver structure you want to register 1518 * @owner: owner module of the drv 1519 * @mod_name: module name string 1520 * 1521 * Registers the given driver with Linux through the 'driver_register()' call 1522 * and sets up the hyper-v vmbus handling for this driver. 1523 * It will return the state of the 'driver_register()' call. 1524 * 1525 */ 1526 int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) 1527 { 1528 int ret; 1529 1530 pr_info("registering driver %s\n", hv_driver->name); 1531 1532 ret = vmbus_exists(); 1533 if (ret < 0) 1534 return ret; 1535 1536 hv_driver->driver.name = hv_driver->name; 1537 hv_driver->driver.owner = owner; 1538 hv_driver->driver.mod_name = mod_name; 1539 hv_driver->driver.bus = &hv_bus; 1540 1541 spin_lock_init(&hv_driver->dynids.lock); 1542 INIT_LIST_HEAD(&hv_driver->dynids.list); 1543 1544 ret = driver_register(&hv_driver->driver); 1545 1546 return ret; 1547 } 1548 EXPORT_SYMBOL_GPL(__vmbus_driver_register); 1549 1550 /** 1551 * vmbus_driver_unregister() - Unregister a vmbus's driver 1552 * @hv_driver: Pointer to driver structure you want to 1553 * un-register 1554 * 1555 * Un-register the given driver that was previous registered with a call to 1556 * vmbus_driver_register() 1557 */ 1558 void vmbus_driver_unregister(struct hv_driver *hv_driver) 1559 { 1560 pr_info("unregistering driver %s\n", hv_driver->name); 1561 1562 if (!vmbus_exists()) { 1563 driver_unregister(&hv_driver->driver); 1564 vmbus_free_dynids(hv_driver); 1565 } 1566 } 1567 EXPORT_SYMBOL_GPL(vmbus_driver_unregister); 1568 1569 1570 /* 1571 * Called when last reference to channel is gone. 1572 */ 1573 static void vmbus_chan_release(struct kobject *kobj) 1574 { 1575 struct vmbus_channel *channel 1576 = container_of(kobj, struct vmbus_channel, kobj); 1577 1578 kfree_rcu(channel, rcu); 1579 } 1580 1581 struct vmbus_chan_attribute { 1582 struct attribute attr; 1583 ssize_t (*show)(struct vmbus_channel *chan, char *buf); 1584 ssize_t (*store)(struct vmbus_channel *chan, 1585 const char *buf, size_t count); 1586 }; 1587 #define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \ 1588 struct vmbus_chan_attribute chan_attr_##_name \ 1589 = __ATTR(_name, _mode, _show, _store) 1590 #define VMBUS_CHAN_ATTR_RW(_name) \ 1591 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name) 1592 #define VMBUS_CHAN_ATTR_RO(_name) \ 1593 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name) 1594 #define VMBUS_CHAN_ATTR_WO(_name) \ 1595 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name) 1596 1597 static ssize_t vmbus_chan_attr_show(struct kobject *kobj, 1598 struct attribute *attr, char *buf) 1599 { 1600 const struct vmbus_chan_attribute *attribute 1601 = container_of(attr, struct vmbus_chan_attribute, attr); 1602 struct vmbus_channel *chan 1603 = container_of(kobj, struct vmbus_channel, kobj); 1604 1605 if (!attribute->show) 1606 return -EIO; 1607 1608 return attribute->show(chan, buf); 1609 } 1610 1611 static ssize_t vmbus_chan_attr_store(struct kobject *kobj, 1612 struct attribute *attr, const char *buf, 1613 size_t count) 1614 { 1615 const struct vmbus_chan_attribute *attribute 1616 = container_of(attr, struct vmbus_chan_attribute, attr); 1617 struct vmbus_channel *chan 1618 = container_of(kobj, struct vmbus_channel, kobj); 1619 1620 if (!attribute->store) 1621 return -EIO; 1622 1623 return attribute->store(chan, buf, count); 1624 } 1625 1626 static const struct sysfs_ops vmbus_chan_sysfs_ops = { 1627 .show = vmbus_chan_attr_show, 1628 .store = vmbus_chan_attr_store, 1629 }; 1630 1631 static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf) 1632 { 1633 struct hv_ring_buffer_info *rbi = &channel->outbound; 1634 ssize_t ret; 1635 1636 mutex_lock(&rbi->ring_buffer_mutex); 1637 if (!rbi->ring_buffer) { 1638 mutex_unlock(&rbi->ring_buffer_mutex); 1639 return -EINVAL; 1640 } 1641 1642 ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1643 mutex_unlock(&rbi->ring_buffer_mutex); 1644 return ret; 1645 } 1646 static VMBUS_CHAN_ATTR_RO(out_mask); 1647 1648 static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf) 1649 { 1650 struct hv_ring_buffer_info *rbi = &channel->inbound; 1651 ssize_t ret; 1652 1653 mutex_lock(&rbi->ring_buffer_mutex); 1654 if (!rbi->ring_buffer) { 1655 mutex_unlock(&rbi->ring_buffer_mutex); 1656 return -EINVAL; 1657 } 1658 1659 ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1660 mutex_unlock(&rbi->ring_buffer_mutex); 1661 return ret; 1662 } 1663 static VMBUS_CHAN_ATTR_RO(in_mask); 1664 1665 static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf) 1666 { 1667 struct hv_ring_buffer_info *rbi = &channel->inbound; 1668 ssize_t ret; 1669 1670 mutex_lock(&rbi->ring_buffer_mutex); 1671 if (!rbi->ring_buffer) { 1672 mutex_unlock(&rbi->ring_buffer_mutex); 1673 return -EINVAL; 1674 } 1675 1676 ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi)); 1677 mutex_unlock(&rbi->ring_buffer_mutex); 1678 return ret; 1679 } 1680 static VMBUS_CHAN_ATTR_RO(read_avail); 1681 1682 static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf) 1683 { 1684 struct hv_ring_buffer_info *rbi = &channel->outbound; 1685 ssize_t ret; 1686 1687 mutex_lock(&rbi->ring_buffer_mutex); 1688 if (!rbi->ring_buffer) { 1689 mutex_unlock(&rbi->ring_buffer_mutex); 1690 return -EINVAL; 1691 } 1692 1693 ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi)); 1694 mutex_unlock(&rbi->ring_buffer_mutex); 1695 return ret; 1696 } 1697 static VMBUS_CHAN_ATTR_RO(write_avail); 1698 1699 static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf) 1700 { 1701 return sprintf(buf, "%u\n", channel->target_cpu); 1702 } 1703 static ssize_t target_cpu_store(struct vmbus_channel *channel, 1704 const char *buf, size_t count) 1705 { 1706 u32 target_cpu, origin_cpu; 1707 ssize_t ret = count; 1708 1709 if (vmbus_proto_version < VERSION_WIN10_V4_1) 1710 return -EIO; 1711 1712 if (sscanf(buf, "%uu", &target_cpu) != 1) 1713 return -EIO; 1714 1715 /* Validate target_cpu for the cpumask_test_cpu() operation below. */ 1716 if (target_cpu >= nr_cpumask_bits) 1717 return -EINVAL; 1718 1719 /* No CPUs should come up or down during this. */ 1720 cpus_read_lock(); 1721 1722 if (!cpu_online(target_cpu)) { 1723 cpus_read_unlock(); 1724 return -EINVAL; 1725 } 1726 1727 /* 1728 * Synchronizes target_cpu_store() and channel closure: 1729 * 1730 * { Initially: state = CHANNEL_OPENED } 1731 * 1732 * CPU1 CPU2 1733 * 1734 * [target_cpu_store()] [vmbus_disconnect_ring()] 1735 * 1736 * LOCK channel_mutex LOCK channel_mutex 1737 * LOAD r1 = state LOAD r2 = state 1738 * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED) 1739 * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN 1740 * [...] SEND CLOSECHANNEL 1741 * UNLOCK channel_mutex UNLOCK channel_mutex 1742 * 1743 * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes 1744 * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND 1745 * 1746 * Note. The host processes the channel messages "sequentially", in 1747 * the order in which they are received on a per-partition basis. 1748 */ 1749 mutex_lock(&vmbus_connection.channel_mutex); 1750 1751 /* 1752 * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; 1753 * avoid sending the message and fail here for such channels. 1754 */ 1755 if (channel->state != CHANNEL_OPENED_STATE) { 1756 ret = -EIO; 1757 goto cpu_store_unlock; 1758 } 1759 1760 origin_cpu = channel->target_cpu; 1761 if (target_cpu == origin_cpu) 1762 goto cpu_store_unlock; 1763 1764 if (vmbus_send_modifychannel(channel->offermsg.child_relid, 1765 hv_cpu_number_to_vp_number(target_cpu))) { 1766 ret = -EIO; 1767 goto cpu_store_unlock; 1768 } 1769 1770 /* 1771 * Warning. At this point, there is *no* guarantee that the host will 1772 * have successfully processed the vmbus_send_modifychannel() request. 1773 * See the header comment of vmbus_send_modifychannel() for more info. 1774 * 1775 * Lags in the processing of the above vmbus_send_modifychannel() can 1776 * result in missed interrupts if the "old" target CPU is taken offline 1777 * before Hyper-V starts sending interrupts to the "new" target CPU. 1778 * But apart from this offlining scenario, the code tolerates such 1779 * lags. It will function correctly even if a channel interrupt comes 1780 * in on a CPU that is different from the channel target_cpu value. 1781 */ 1782 1783 channel->target_cpu = target_cpu; 1784 1785 /* See init_vp_index(). */ 1786 if (hv_is_perf_channel(channel)) 1787 hv_update_alloced_cpus(origin_cpu, target_cpu); 1788 1789 /* Currently set only for storvsc channels. */ 1790 if (channel->change_target_cpu_callback) { 1791 (*channel->change_target_cpu_callback)(channel, 1792 origin_cpu, target_cpu); 1793 } 1794 1795 cpu_store_unlock: 1796 mutex_unlock(&vmbus_connection.channel_mutex); 1797 cpus_read_unlock(); 1798 return ret; 1799 } 1800 static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store); 1801 1802 static ssize_t channel_pending_show(struct vmbus_channel *channel, 1803 char *buf) 1804 { 1805 return sprintf(buf, "%d\n", 1806 channel_pending(channel, 1807 vmbus_connection.monitor_pages[1])); 1808 } 1809 static VMBUS_CHAN_ATTR(pending, S_IRUGO, channel_pending_show, NULL); 1810 1811 static ssize_t channel_latency_show(struct vmbus_channel *channel, 1812 char *buf) 1813 { 1814 return sprintf(buf, "%d\n", 1815 channel_latency(channel, 1816 vmbus_connection.monitor_pages[1])); 1817 } 1818 static VMBUS_CHAN_ATTR(latency, S_IRUGO, channel_latency_show, NULL); 1819 1820 static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf) 1821 { 1822 return sprintf(buf, "%llu\n", channel->interrupts); 1823 } 1824 static VMBUS_CHAN_ATTR(interrupts, S_IRUGO, channel_interrupts_show, NULL); 1825 1826 static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf) 1827 { 1828 return sprintf(buf, "%llu\n", channel->sig_events); 1829 } 1830 static VMBUS_CHAN_ATTR(events, S_IRUGO, channel_events_show, NULL); 1831 1832 static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel, 1833 char *buf) 1834 { 1835 return sprintf(buf, "%llu\n", 1836 (unsigned long long)channel->intr_in_full); 1837 } 1838 static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL); 1839 1840 static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel, 1841 char *buf) 1842 { 1843 return sprintf(buf, "%llu\n", 1844 (unsigned long long)channel->intr_out_empty); 1845 } 1846 static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL); 1847 1848 static ssize_t channel_out_full_first_show(struct vmbus_channel *channel, 1849 char *buf) 1850 { 1851 return sprintf(buf, "%llu\n", 1852 (unsigned long long)channel->out_full_first); 1853 } 1854 static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL); 1855 1856 static ssize_t channel_out_full_total_show(struct vmbus_channel *channel, 1857 char *buf) 1858 { 1859 return sprintf(buf, "%llu\n", 1860 (unsigned long long)channel->out_full_total); 1861 } 1862 static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL); 1863 1864 static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel, 1865 char *buf) 1866 { 1867 return sprintf(buf, "%u\n", channel->offermsg.monitorid); 1868 } 1869 static VMBUS_CHAN_ATTR(monitor_id, S_IRUGO, subchannel_monitor_id_show, NULL); 1870 1871 static ssize_t subchannel_id_show(struct vmbus_channel *channel, 1872 char *buf) 1873 { 1874 return sprintf(buf, "%u\n", 1875 channel->offermsg.offer.sub_channel_index); 1876 } 1877 static VMBUS_CHAN_ATTR_RO(subchannel_id); 1878 1879 static struct attribute *vmbus_chan_attrs[] = { 1880 &chan_attr_out_mask.attr, 1881 &chan_attr_in_mask.attr, 1882 &chan_attr_read_avail.attr, 1883 &chan_attr_write_avail.attr, 1884 &chan_attr_cpu.attr, 1885 &chan_attr_pending.attr, 1886 &chan_attr_latency.attr, 1887 &chan_attr_interrupts.attr, 1888 &chan_attr_events.attr, 1889 &chan_attr_intr_in_full.attr, 1890 &chan_attr_intr_out_empty.attr, 1891 &chan_attr_out_full_first.attr, 1892 &chan_attr_out_full_total.attr, 1893 &chan_attr_monitor_id.attr, 1894 &chan_attr_subchannel_id.attr, 1895 NULL 1896 }; 1897 1898 /* 1899 * Channel-level attribute_group callback function. Returns the permission for 1900 * each attribute, and returns 0 if an attribute is not visible. 1901 */ 1902 static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, 1903 struct attribute *attr, int idx) 1904 { 1905 const struct vmbus_channel *channel = 1906 container_of(kobj, struct vmbus_channel, kobj); 1907 1908 /* Hide the monitor attributes if the monitor mechanism is not used. */ 1909 if (!channel->offermsg.monitor_allocated && 1910 (attr == &chan_attr_pending.attr || 1911 attr == &chan_attr_latency.attr || 1912 attr == &chan_attr_monitor_id.attr)) 1913 return 0; 1914 1915 return attr->mode; 1916 } 1917 1918 static struct attribute_group vmbus_chan_group = { 1919 .attrs = vmbus_chan_attrs, 1920 .is_visible = vmbus_chan_attr_is_visible 1921 }; 1922 1923 static struct kobj_type vmbus_chan_ktype = { 1924 .sysfs_ops = &vmbus_chan_sysfs_ops, 1925 .release = vmbus_chan_release, 1926 }; 1927 1928 /* 1929 * vmbus_add_channel_kobj - setup a sub-directory under device/channels 1930 */ 1931 int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) 1932 { 1933 const struct device *device = &dev->device; 1934 struct kobject *kobj = &channel->kobj; 1935 u32 relid = channel->offermsg.child_relid; 1936 int ret; 1937 1938 kobj->kset = dev->channels_kset; 1939 ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, 1940 "%u", relid); 1941 if (ret) 1942 return ret; 1943 1944 ret = sysfs_create_group(kobj, &vmbus_chan_group); 1945 1946 if (ret) { 1947 /* 1948 * The calling functions' error handling paths will cleanup the 1949 * empty channel directory. 1950 */ 1951 dev_err(device, "Unable to set up channel sysfs files\n"); 1952 return ret; 1953 } 1954 1955 kobject_uevent(kobj, KOBJ_ADD); 1956 1957 return 0; 1958 } 1959 1960 /* 1961 * vmbus_remove_channel_attr_group - remove the channel's attribute group 1962 */ 1963 void vmbus_remove_channel_attr_group(struct vmbus_channel *channel) 1964 { 1965 sysfs_remove_group(&channel->kobj, &vmbus_chan_group); 1966 } 1967 1968 /* 1969 * vmbus_device_create - Creates and registers a new child device 1970 * on the vmbus. 1971 */ 1972 struct hv_device *vmbus_device_create(const guid_t *type, 1973 const guid_t *instance, 1974 struct vmbus_channel *channel) 1975 { 1976 struct hv_device *child_device_obj; 1977 1978 child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); 1979 if (!child_device_obj) { 1980 pr_err("Unable to allocate device object for child device\n"); 1981 return NULL; 1982 } 1983 1984 child_device_obj->channel = channel; 1985 guid_copy(&child_device_obj->dev_type, type); 1986 guid_copy(&child_device_obj->dev_instance, instance); 1987 child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ 1988 1989 return child_device_obj; 1990 } 1991 1992 /* 1993 * vmbus_device_register - Register the child device 1994 */ 1995 int vmbus_device_register(struct hv_device *child_device_obj) 1996 { 1997 struct kobject *kobj = &child_device_obj->device.kobj; 1998 int ret; 1999 2000 dev_set_name(&child_device_obj->device, "%pUl", 2001 &child_device_obj->channel->offermsg.offer.if_instance); 2002 2003 child_device_obj->device.bus = &hv_bus; 2004 child_device_obj->device.parent = &hv_acpi_dev->dev; 2005 child_device_obj->device.release = vmbus_device_release; 2006 2007 /* 2008 * Register with the LDM. This will kick off the driver/device 2009 * binding...which will eventually call vmbus_match() and vmbus_probe() 2010 */ 2011 ret = device_register(&child_device_obj->device); 2012 if (ret) { 2013 pr_err("Unable to register child device\n"); 2014 return ret; 2015 } 2016 2017 child_device_obj->channels_kset = kset_create_and_add("channels", 2018 NULL, kobj); 2019 if (!child_device_obj->channels_kset) { 2020 ret = -ENOMEM; 2021 goto err_dev_unregister; 2022 } 2023 2024 ret = vmbus_add_channel_kobj(child_device_obj, 2025 child_device_obj->channel); 2026 if (ret) { 2027 pr_err("Unable to register primary channeln"); 2028 goto err_kset_unregister; 2029 } 2030 hv_debug_add_dev_dir(child_device_obj); 2031 2032 return 0; 2033 2034 err_kset_unregister: 2035 kset_unregister(child_device_obj->channels_kset); 2036 2037 err_dev_unregister: 2038 device_unregister(&child_device_obj->device); 2039 return ret; 2040 } 2041 2042 /* 2043 * vmbus_device_unregister - Remove the specified child device 2044 * from the vmbus. 2045 */ 2046 void vmbus_device_unregister(struct hv_device *device_obj) 2047 { 2048 pr_debug("child device %s unregistered\n", 2049 dev_name(&device_obj->device)); 2050 2051 kset_unregister(device_obj->channels_kset); 2052 2053 /* 2054 * Kick off the process of unregistering the device. 2055 * This will call vmbus_remove() and eventually vmbus_device_release() 2056 */ 2057 device_unregister(&device_obj->device); 2058 } 2059 2060 2061 /* 2062 * VMBUS is an acpi enumerated device. Get the information we 2063 * need from DSDT. 2064 */ 2065 #define VTPM_BASE_ADDRESS 0xfed40000 2066 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) 2067 { 2068 resource_size_t start = 0; 2069 resource_size_t end = 0; 2070 struct resource *new_res; 2071 struct resource **old_res = &hyperv_mmio; 2072 struct resource **prev_res = NULL; 2073 2074 switch (res->type) { 2075 2076 /* 2077 * "Address" descriptors are for bus windows. Ignore 2078 * "memory" descriptors, which are for registers on 2079 * devices. 2080 */ 2081 case ACPI_RESOURCE_TYPE_ADDRESS32: 2082 start = res->data.address32.address.minimum; 2083 end = res->data.address32.address.maximum; 2084 break; 2085 2086 case ACPI_RESOURCE_TYPE_ADDRESS64: 2087 start = res->data.address64.address.minimum; 2088 end = res->data.address64.address.maximum; 2089 break; 2090 2091 default: 2092 /* Unused resource type */ 2093 return AE_OK; 2094 2095 } 2096 /* 2097 * Ignore ranges that are below 1MB, as they're not 2098 * necessary or useful here. 2099 */ 2100 if (end < 0x100000) 2101 return AE_OK; 2102 2103 new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); 2104 if (!new_res) 2105 return AE_NO_MEMORY; 2106 2107 /* If this range overlaps the virtual TPM, truncate it. */ 2108 if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) 2109 end = VTPM_BASE_ADDRESS; 2110 2111 new_res->name = "hyperv mmio"; 2112 new_res->flags = IORESOURCE_MEM; 2113 new_res->start = start; 2114 new_res->end = end; 2115 2116 /* 2117 * If two ranges are adjacent, merge them. 2118 */ 2119 do { 2120 if (!*old_res) { 2121 *old_res = new_res; 2122 break; 2123 } 2124 2125 if (((*old_res)->end + 1) == new_res->start) { 2126 (*old_res)->end = new_res->end; 2127 kfree(new_res); 2128 break; 2129 } 2130 2131 if ((*old_res)->start == new_res->end + 1) { 2132 (*old_res)->start = new_res->start; 2133 kfree(new_res); 2134 break; 2135 } 2136 2137 if ((*old_res)->start > new_res->end) { 2138 new_res->sibling = *old_res; 2139 if (prev_res) 2140 (*prev_res)->sibling = new_res; 2141 *old_res = new_res; 2142 break; 2143 } 2144 2145 prev_res = old_res; 2146 old_res = &(*old_res)->sibling; 2147 2148 } while (1); 2149 2150 return AE_OK; 2151 } 2152 2153 static int vmbus_acpi_remove(struct acpi_device *device) 2154 { 2155 struct resource *cur_res; 2156 struct resource *next_res; 2157 2158 if (hyperv_mmio) { 2159 if (fb_mmio) { 2160 __release_region(hyperv_mmio, fb_mmio->start, 2161 resource_size(fb_mmio)); 2162 fb_mmio = NULL; 2163 } 2164 2165 for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { 2166 next_res = cur_res->sibling; 2167 kfree(cur_res); 2168 } 2169 } 2170 2171 return 0; 2172 } 2173 2174 static void vmbus_reserve_fb(void) 2175 { 2176 int size; 2177 /* 2178 * Make a claim for the frame buffer in the resource tree under the 2179 * first node, which will be the one below 4GB. The length seems to 2180 * be underreported, particularly in a Generation 1 VM. So start out 2181 * reserving a larger area and make it smaller until it succeeds. 2182 */ 2183 2184 if (screen_info.lfb_base) { 2185 if (efi_enabled(EFI_BOOT)) 2186 size = max_t(__u32, screen_info.lfb_size, 0x800000); 2187 else 2188 size = max_t(__u32, screen_info.lfb_size, 0x4000000); 2189 2190 for (; !fb_mmio && (size >= 0x100000); size >>= 1) { 2191 fb_mmio = __request_region(hyperv_mmio, 2192 screen_info.lfb_base, size, 2193 fb_mmio_name, 0); 2194 } 2195 } 2196 } 2197 2198 /** 2199 * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. 2200 * @new: If successful, supplied a pointer to the 2201 * allocated MMIO space. 2202 * @device_obj: Identifies the caller 2203 * @min: Minimum guest physical address of the 2204 * allocation 2205 * @max: Maximum guest physical address 2206 * @size: Size of the range to be allocated 2207 * @align: Alignment of the range to be allocated 2208 * @fb_overlap_ok: Whether this allocation can be allowed 2209 * to overlap the video frame buffer. 2210 * 2211 * This function walks the resources granted to VMBus by the 2212 * _CRS object in the ACPI namespace underneath the parent 2213 * "bridge" whether that's a root PCI bus in the Generation 1 2214 * case or a Module Device in the Generation 2 case. It then 2215 * attempts to allocate from the global MMIO pool in a way that 2216 * matches the constraints supplied in these parameters and by 2217 * that _CRS. 2218 * 2219 * Return: 0 on success, -errno on failure 2220 */ 2221 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, 2222 resource_size_t min, resource_size_t max, 2223 resource_size_t size, resource_size_t align, 2224 bool fb_overlap_ok) 2225 { 2226 struct resource *iter, *shadow; 2227 resource_size_t range_min, range_max, start; 2228 const char *dev_n = dev_name(&device_obj->device); 2229 int retval; 2230 2231 retval = -ENXIO; 2232 mutex_lock(&hyperv_mmio_lock); 2233 2234 /* 2235 * If overlaps with frame buffers are allowed, then first attempt to 2236 * make the allocation from within the reserved region. Because it 2237 * is already reserved, no shadow allocation is necessary. 2238 */ 2239 if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) && 2240 !(max < fb_mmio->start)) { 2241 2242 range_min = fb_mmio->start; 2243 range_max = fb_mmio->end; 2244 start = (range_min + align - 1) & ~(align - 1); 2245 for (; start + size - 1 <= range_max; start += align) { 2246 *new = request_mem_region_exclusive(start, size, dev_n); 2247 if (*new) { 2248 retval = 0; 2249 goto exit; 2250 } 2251 } 2252 } 2253 2254 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 2255 if ((iter->start >= max) || (iter->end <= min)) 2256 continue; 2257 2258 range_min = iter->start; 2259 range_max = iter->end; 2260 start = (range_min + align - 1) & ~(align - 1); 2261 for (; start + size - 1 <= range_max; start += align) { 2262 shadow = __request_region(iter, start, size, NULL, 2263 IORESOURCE_BUSY); 2264 if (!shadow) 2265 continue; 2266 2267 *new = request_mem_region_exclusive(start, size, dev_n); 2268 if (*new) { 2269 shadow->name = (char *)*new; 2270 retval = 0; 2271 goto exit; 2272 } 2273 2274 __release_region(iter, start, size); 2275 } 2276 } 2277 2278 exit: 2279 mutex_unlock(&hyperv_mmio_lock); 2280 return retval; 2281 } 2282 EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); 2283 2284 /** 2285 * vmbus_free_mmio() - Free a memory-mapped I/O range. 2286 * @start: Base address of region to release. 2287 * @size: Size of the range to be allocated 2288 * 2289 * This function releases anything requested by 2290 * vmbus_mmio_allocate(). 2291 */ 2292 void vmbus_free_mmio(resource_size_t start, resource_size_t size) 2293 { 2294 struct resource *iter; 2295 2296 mutex_lock(&hyperv_mmio_lock); 2297 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 2298 if ((iter->start >= start + size) || (iter->end <= start)) 2299 continue; 2300 2301 __release_region(iter, start, size); 2302 } 2303 release_mem_region(start, size); 2304 mutex_unlock(&hyperv_mmio_lock); 2305 2306 } 2307 EXPORT_SYMBOL_GPL(vmbus_free_mmio); 2308 2309 static int vmbus_acpi_add(struct acpi_device *device) 2310 { 2311 acpi_status result; 2312 int ret_val = -ENODEV; 2313 struct acpi_device *ancestor; 2314 2315 hv_acpi_dev = device; 2316 2317 result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, 2318 vmbus_walk_resources, NULL); 2319 2320 if (ACPI_FAILURE(result)) 2321 goto acpi_walk_err; 2322 /* 2323 * Some ancestor of the vmbus acpi device (Gen1 or Gen2 2324 * firmware) is the VMOD that has the mmio ranges. Get that. 2325 */ 2326 for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { 2327 result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, 2328 vmbus_walk_resources, NULL); 2329 2330 if (ACPI_FAILURE(result)) 2331 continue; 2332 if (hyperv_mmio) { 2333 vmbus_reserve_fb(); 2334 break; 2335 } 2336 } 2337 ret_val = 0; 2338 2339 acpi_walk_err: 2340 complete(&probe_event); 2341 if (ret_val) 2342 vmbus_acpi_remove(device); 2343 return ret_val; 2344 } 2345 2346 #ifdef CONFIG_PM_SLEEP 2347 static int vmbus_bus_suspend(struct device *dev) 2348 { 2349 struct vmbus_channel *channel, *sc; 2350 2351 while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { 2352 /* 2353 * We wait here until the completion of any channel 2354 * offers that are currently in progress. 2355 */ 2356 msleep(1); 2357 } 2358 2359 mutex_lock(&vmbus_connection.channel_mutex); 2360 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2361 if (!is_hvsock_channel(channel)) 2362 continue; 2363 2364 vmbus_force_channel_rescinded(channel); 2365 } 2366 mutex_unlock(&vmbus_connection.channel_mutex); 2367 2368 /* 2369 * Wait until all the sub-channels and hv_sock channels have been 2370 * cleaned up. Sub-channels should be destroyed upon suspend, otherwise 2371 * they would conflict with the new sub-channels that will be created 2372 * in the resume path. hv_sock channels should also be destroyed, but 2373 * a hv_sock channel of an established hv_sock connection can not be 2374 * really destroyed since it may still be referenced by the userspace 2375 * application, so we just force the hv_sock channel to be rescinded 2376 * by vmbus_force_channel_rescinded(), and the userspace application 2377 * will thoroughly destroy the channel after hibernation. 2378 * 2379 * Note: the counter nr_chan_close_on_suspend may never go above 0 if 2380 * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM. 2381 */ 2382 if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) 2383 wait_for_completion(&vmbus_connection.ready_for_suspend_event); 2384 2385 if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) { 2386 pr_err("Can not suspend due to a previous failed resuming\n"); 2387 return -EBUSY; 2388 } 2389 2390 mutex_lock(&vmbus_connection.channel_mutex); 2391 2392 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2393 /* 2394 * Remove the channel from the array of channels and invalidate 2395 * the channel's relid. Upon resume, vmbus_onoffer() will fix 2396 * up the relid (and other fields, if necessary) and add the 2397 * channel back to the array. 2398 */ 2399 vmbus_channel_unmap_relid(channel); 2400 channel->offermsg.child_relid = INVALID_RELID; 2401 2402 if (is_hvsock_channel(channel)) { 2403 if (!channel->rescind) { 2404 pr_err("hv_sock channel not rescinded!\n"); 2405 WARN_ON_ONCE(1); 2406 } 2407 continue; 2408 } 2409 2410 list_for_each_entry(sc, &channel->sc_list, sc_list) { 2411 pr_err("Sub-channel not deleted!\n"); 2412 WARN_ON_ONCE(1); 2413 } 2414 2415 atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume); 2416 } 2417 2418 mutex_unlock(&vmbus_connection.channel_mutex); 2419 2420 vmbus_initiate_unload(false); 2421 2422 /* Reset the event for the next resume. */ 2423 reinit_completion(&vmbus_connection.ready_for_resume_event); 2424 2425 return 0; 2426 } 2427 2428 static int vmbus_bus_resume(struct device *dev) 2429 { 2430 struct vmbus_channel_msginfo *msginfo; 2431 size_t msgsize; 2432 int ret; 2433 2434 /* 2435 * We only use the 'vmbus_proto_version', which was in use before 2436 * hibernation, to re-negotiate with the host. 2437 */ 2438 if (!vmbus_proto_version) { 2439 pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version); 2440 return -EINVAL; 2441 } 2442 2443 msgsize = sizeof(*msginfo) + 2444 sizeof(struct vmbus_channel_initiate_contact); 2445 2446 msginfo = kzalloc(msgsize, GFP_KERNEL); 2447 2448 if (msginfo == NULL) 2449 return -ENOMEM; 2450 2451 ret = vmbus_negotiate_version(msginfo, vmbus_proto_version); 2452 2453 kfree(msginfo); 2454 2455 if (ret != 0) 2456 return ret; 2457 2458 WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0); 2459 2460 vmbus_request_offers(); 2461 2462 if (wait_for_completion_timeout( 2463 &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0) 2464 pr_err("Some vmbus device is missing after suspending?\n"); 2465 2466 /* Reset the event for the next suspend. */ 2467 reinit_completion(&vmbus_connection.ready_for_suspend_event); 2468 2469 return 0; 2470 } 2471 #else 2472 #define vmbus_bus_suspend NULL 2473 #define vmbus_bus_resume NULL 2474 #endif /* CONFIG_PM_SLEEP */ 2475 2476 static const struct acpi_device_id vmbus_acpi_device_ids[] = { 2477 {"VMBUS", 0}, 2478 {"VMBus", 0}, 2479 {"", 0}, 2480 }; 2481 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); 2482 2483 /* 2484 * Note: we must use the "no_irq" ops, otherwise hibernation can not work with 2485 * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in 2486 * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see 2487 * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() -> 2488 * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's 2489 * resume callback must also run via the "noirq" ops. 2490 * 2491 * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment 2492 * earlier in this file before vmbus_pm. 2493 */ 2494 2495 static const struct dev_pm_ops vmbus_bus_pm = { 2496 .suspend_noirq = NULL, 2497 .resume_noirq = NULL, 2498 .freeze_noirq = vmbus_bus_suspend, 2499 .thaw_noirq = vmbus_bus_resume, 2500 .poweroff_noirq = vmbus_bus_suspend, 2501 .restore_noirq = vmbus_bus_resume 2502 }; 2503 2504 static struct acpi_driver vmbus_acpi_driver = { 2505 .name = "vmbus", 2506 .ids = vmbus_acpi_device_ids, 2507 .ops = { 2508 .add = vmbus_acpi_add, 2509 .remove = vmbus_acpi_remove, 2510 }, 2511 .drv.pm = &vmbus_bus_pm, 2512 }; 2513 2514 static void hv_kexec_handler(void) 2515 { 2516 hv_stimer_global_cleanup(); 2517 vmbus_initiate_unload(false); 2518 /* Make sure conn_state is set as hv_synic_cleanup checks for it */ 2519 mb(); 2520 cpuhp_remove_state(hyperv_cpuhp_online); 2521 hyperv_cleanup(); 2522 }; 2523 2524 static void hv_crash_handler(struct pt_regs *regs) 2525 { 2526 int cpu; 2527 2528 vmbus_initiate_unload(true); 2529 /* 2530 * In crash handler we can't schedule synic cleanup for all CPUs, 2531 * doing the cleanup for current CPU only. This should be sufficient 2532 * for kdump. 2533 */ 2534 cpu = smp_processor_id(); 2535 hv_stimer_cleanup(cpu); 2536 hv_synic_disable_regs(cpu); 2537 hyperv_cleanup(); 2538 }; 2539 2540 static int hv_synic_suspend(void) 2541 { 2542 /* 2543 * When we reach here, all the non-boot CPUs have been offlined. 2544 * If we're in a legacy configuration where stimer Direct Mode is 2545 * not enabled, the stimers on the non-boot CPUs have been unbound 2546 * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() -> 2547 * hv_stimer_cleanup() -> clockevents_unbind_device(). 2548 * 2549 * hv_synic_suspend() only runs on CPU0 with interrupts disabled. 2550 * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because: 2551 * 1) it's unnecessary as interrupts remain disabled between 2552 * syscore_suspend() and syscore_resume(): see create_image() and 2553 * resume_target_kernel() 2554 * 2) the stimer on CPU0 is automatically disabled later by 2555 * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... 2556 * -> clockevents_shutdown() -> ... -> hv_ce_shutdown() 2557 * 3) a warning would be triggered if we call 2558 * clockevents_unbind_device(), which may sleep, in an 2559 * interrupts-disabled context. 2560 */ 2561 2562 hv_synic_disable_regs(0); 2563 2564 return 0; 2565 } 2566 2567 static void hv_synic_resume(void) 2568 { 2569 hv_synic_enable_regs(0); 2570 2571 /* 2572 * Note: we don't need to call hv_stimer_init(0), because the timer 2573 * on CPU0 is not unbound in hv_synic_suspend(), and the timer is 2574 * automatically re-enabled in timekeeping_resume(). 2575 */ 2576 } 2577 2578 /* The callbacks run only on CPU0, with irqs_disabled. */ 2579 static struct syscore_ops hv_synic_syscore_ops = { 2580 .suspend = hv_synic_suspend, 2581 .resume = hv_synic_resume, 2582 }; 2583 2584 static int __init hv_acpi_init(void) 2585 { 2586 int ret, t; 2587 2588 if (!hv_is_hyperv_initialized()) 2589 return -ENODEV; 2590 2591 init_completion(&probe_event); 2592 2593 /* 2594 * Get ACPI resources first. 2595 */ 2596 ret = acpi_bus_register_driver(&vmbus_acpi_driver); 2597 2598 if (ret) 2599 return ret; 2600 2601 t = wait_for_completion_timeout(&probe_event, 5*HZ); 2602 if (t == 0) { 2603 ret = -ETIMEDOUT; 2604 goto cleanup; 2605 } 2606 hv_debug_init(); 2607 2608 ret = vmbus_bus_init(); 2609 if (ret) 2610 goto cleanup; 2611 2612 hv_setup_kexec_handler(hv_kexec_handler); 2613 hv_setup_crash_handler(hv_crash_handler); 2614 2615 register_syscore_ops(&hv_synic_syscore_ops); 2616 2617 return 0; 2618 2619 cleanup: 2620 acpi_bus_unregister_driver(&vmbus_acpi_driver); 2621 hv_acpi_dev = NULL; 2622 return ret; 2623 } 2624 2625 static void __exit vmbus_exit(void) 2626 { 2627 int cpu; 2628 2629 unregister_syscore_ops(&hv_synic_syscore_ops); 2630 2631 hv_remove_kexec_handler(); 2632 hv_remove_crash_handler(); 2633 vmbus_connection.conn_state = DISCONNECTED; 2634 hv_stimer_global_cleanup(); 2635 vmbus_disconnect(); 2636 hv_remove_vmbus_irq(); 2637 for_each_online_cpu(cpu) { 2638 struct hv_per_cpu_context *hv_cpu 2639 = per_cpu_ptr(hv_context.cpu_context, cpu); 2640 2641 tasklet_kill(&hv_cpu->msg_dpc); 2642 } 2643 hv_debug_rm_all_dir(); 2644 2645 vmbus_free_channels(); 2646 kfree(vmbus_connection.channels); 2647 2648 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 2649 kmsg_dump_unregister(&hv_kmsg_dumper); 2650 unregister_die_notifier(&hyperv_die_block); 2651 atomic_notifier_chain_unregister(&panic_notifier_list, 2652 &hyperv_panic_block); 2653 } 2654 2655 free_page((unsigned long)hv_panic_page); 2656 unregister_sysctl_table(hv_ctl_table_hdr); 2657 hv_ctl_table_hdr = NULL; 2658 bus_unregister(&hv_bus); 2659 2660 cpuhp_remove_state(hyperv_cpuhp_online); 2661 hv_synic_free(); 2662 acpi_bus_unregister_driver(&vmbus_acpi_driver); 2663 } 2664 2665 2666 MODULE_LICENSE("GPL"); 2667 MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver"); 2668 2669 subsys_initcall(hv_acpi_init); 2670 module_exit(vmbus_exit); 2671