1 /* 2 * Copyright (c) 2009, Microsoft Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 15 * Place - Suite 330, Boston, MA 02111-1307 USA. 16 * 17 * Authors: 18 * Haiyang Zhang <haiyangz@microsoft.com> 19 * Hank Janssen <hjanssen@microsoft.com> 20 * K. Y. Srinivasan <kys@microsoft.com> 21 * 22 */ 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/init.h> 26 #include <linux/module.h> 27 #include <linux/device.h> 28 #include <linux/interrupt.h> 29 #include <linux/sysctl.h> 30 #include <linux/slab.h> 31 #include <linux/acpi.h> 32 #include <linux/completion.h> 33 #include <linux/hyperv.h> 34 #include <linux/kernel_stat.h> 35 #include <linux/clockchips.h> 36 #include <linux/cpu.h> 37 #include <linux/sched/task_stack.h> 38 39 #include <asm/mshyperv.h> 40 #include <linux/notifier.h> 41 #include <linux/ptrace.h> 42 #include <linux/screen_info.h> 43 #include <linux/kdebug.h> 44 #include <linux/efi.h> 45 #include <linux/random.h> 46 #include "hyperv_vmbus.h" 47 48 struct vmbus_dynid { 49 struct list_head node; 50 struct hv_vmbus_device_id id; 51 }; 52 53 static struct acpi_device *hv_acpi_dev; 54 55 static struct completion probe_event; 56 57 static int hyperv_cpuhp_online; 58 59 static void *hv_panic_page; 60 61 static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, 62 void *args) 63 { 64 struct pt_regs *regs; 65 66 regs = current_pt_regs(); 67 68 hyperv_report_panic(regs, val); 69 return NOTIFY_DONE; 70 } 71 72 static int hyperv_die_event(struct notifier_block *nb, unsigned long val, 73 void *args) 74 { 75 struct die_args *die = (struct die_args *)args; 76 struct pt_regs *regs = die->regs; 77 78 hyperv_report_panic(regs, val); 79 return NOTIFY_DONE; 80 } 81 82 static struct notifier_block hyperv_die_block = { 83 .notifier_call = hyperv_die_event, 84 }; 85 static struct notifier_block hyperv_panic_block = { 86 .notifier_call = hyperv_panic_event, 87 }; 88 89 static const char *fb_mmio_name = "fb_range"; 90 static struct resource *fb_mmio; 91 static struct resource *hyperv_mmio; 92 static DEFINE_SEMAPHORE(hyperv_mmio_lock); 93 94 static int vmbus_exists(void) 95 { 96 if (hv_acpi_dev == NULL) 97 return -ENODEV; 98 99 return 0; 100 } 101 102 #define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2) 103 static void print_alias_name(struct hv_device *hv_dev, char *alias_name) 104 { 105 int i; 106 for (i = 0; i < VMBUS_ALIAS_LEN; i += 2) 107 sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]); 108 } 109 110 static u8 channel_monitor_group(const struct vmbus_channel *channel) 111 { 112 return (u8)channel->offermsg.monitorid / 32; 113 } 114 115 static u8 channel_monitor_offset(const struct vmbus_channel *channel) 116 { 117 return (u8)channel->offermsg.monitorid % 32; 118 } 119 120 static u32 channel_pending(const struct vmbus_channel *channel, 121 const struct hv_monitor_page *monitor_page) 122 { 123 u8 monitor_group = channel_monitor_group(channel); 124 125 return monitor_page->trigger_group[monitor_group].pending; 126 } 127 128 static u32 channel_latency(const struct vmbus_channel *channel, 129 const struct hv_monitor_page *monitor_page) 130 { 131 u8 monitor_group = channel_monitor_group(channel); 132 u8 monitor_offset = channel_monitor_offset(channel); 133 134 return monitor_page->latency[monitor_group][monitor_offset]; 135 } 136 137 static u32 channel_conn_id(struct vmbus_channel *channel, 138 struct hv_monitor_page *monitor_page) 139 { 140 u8 monitor_group = channel_monitor_group(channel); 141 u8 monitor_offset = channel_monitor_offset(channel); 142 return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; 143 } 144 145 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, 146 char *buf) 147 { 148 struct hv_device *hv_dev = device_to_hv_device(dev); 149 150 if (!hv_dev->channel) 151 return -ENODEV; 152 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); 153 } 154 static DEVICE_ATTR_RO(id); 155 156 static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, 157 char *buf) 158 { 159 struct hv_device *hv_dev = device_to_hv_device(dev); 160 161 if (!hv_dev->channel) 162 return -ENODEV; 163 return sprintf(buf, "%d\n", hv_dev->channel->state); 164 } 165 static DEVICE_ATTR_RO(state); 166 167 static ssize_t monitor_id_show(struct device *dev, 168 struct device_attribute *dev_attr, char *buf) 169 { 170 struct hv_device *hv_dev = device_to_hv_device(dev); 171 172 if (!hv_dev->channel) 173 return -ENODEV; 174 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid); 175 } 176 static DEVICE_ATTR_RO(monitor_id); 177 178 static ssize_t class_id_show(struct device *dev, 179 struct device_attribute *dev_attr, char *buf) 180 { 181 struct hv_device *hv_dev = device_to_hv_device(dev); 182 183 if (!hv_dev->channel) 184 return -ENODEV; 185 return sprintf(buf, "{%pUl}\n", 186 hv_dev->channel->offermsg.offer.if_type.b); 187 } 188 static DEVICE_ATTR_RO(class_id); 189 190 static ssize_t device_id_show(struct device *dev, 191 struct device_attribute *dev_attr, char *buf) 192 { 193 struct hv_device *hv_dev = device_to_hv_device(dev); 194 195 if (!hv_dev->channel) 196 return -ENODEV; 197 return sprintf(buf, "{%pUl}\n", 198 hv_dev->channel->offermsg.offer.if_instance.b); 199 } 200 static DEVICE_ATTR_RO(device_id); 201 202 static ssize_t modalias_show(struct device *dev, 203 struct device_attribute *dev_attr, char *buf) 204 { 205 struct hv_device *hv_dev = device_to_hv_device(dev); 206 char alias_name[VMBUS_ALIAS_LEN + 1]; 207 208 print_alias_name(hv_dev, alias_name); 209 return sprintf(buf, "vmbus:%s\n", alias_name); 210 } 211 static DEVICE_ATTR_RO(modalias); 212 213 #ifdef CONFIG_NUMA 214 static ssize_t numa_node_show(struct device *dev, 215 struct device_attribute *attr, char *buf) 216 { 217 struct hv_device *hv_dev = device_to_hv_device(dev); 218 219 if (!hv_dev->channel) 220 return -ENODEV; 221 222 return sprintf(buf, "%d\n", hv_dev->channel->numa_node); 223 } 224 static DEVICE_ATTR_RO(numa_node); 225 #endif 226 227 static ssize_t server_monitor_pending_show(struct device *dev, 228 struct device_attribute *dev_attr, 229 char *buf) 230 { 231 struct hv_device *hv_dev = device_to_hv_device(dev); 232 233 if (!hv_dev->channel) 234 return -ENODEV; 235 return sprintf(buf, "%d\n", 236 channel_pending(hv_dev->channel, 237 vmbus_connection.monitor_pages[1])); 238 } 239 static DEVICE_ATTR_RO(server_monitor_pending); 240 241 static ssize_t client_monitor_pending_show(struct device *dev, 242 struct device_attribute *dev_attr, 243 char *buf) 244 { 245 struct hv_device *hv_dev = device_to_hv_device(dev); 246 247 if (!hv_dev->channel) 248 return -ENODEV; 249 return sprintf(buf, "%d\n", 250 channel_pending(hv_dev->channel, 251 vmbus_connection.monitor_pages[1])); 252 } 253 static DEVICE_ATTR_RO(client_monitor_pending); 254 255 static ssize_t server_monitor_latency_show(struct device *dev, 256 struct device_attribute *dev_attr, 257 char *buf) 258 { 259 struct hv_device *hv_dev = device_to_hv_device(dev); 260 261 if (!hv_dev->channel) 262 return -ENODEV; 263 return sprintf(buf, "%d\n", 264 channel_latency(hv_dev->channel, 265 vmbus_connection.monitor_pages[0])); 266 } 267 static DEVICE_ATTR_RO(server_monitor_latency); 268 269 static ssize_t client_monitor_latency_show(struct device *dev, 270 struct device_attribute *dev_attr, 271 char *buf) 272 { 273 struct hv_device *hv_dev = device_to_hv_device(dev); 274 275 if (!hv_dev->channel) 276 return -ENODEV; 277 return sprintf(buf, "%d\n", 278 channel_latency(hv_dev->channel, 279 vmbus_connection.monitor_pages[1])); 280 } 281 static DEVICE_ATTR_RO(client_monitor_latency); 282 283 static ssize_t server_monitor_conn_id_show(struct device *dev, 284 struct device_attribute *dev_attr, 285 char *buf) 286 { 287 struct hv_device *hv_dev = device_to_hv_device(dev); 288 289 if (!hv_dev->channel) 290 return -ENODEV; 291 return sprintf(buf, "%d\n", 292 channel_conn_id(hv_dev->channel, 293 vmbus_connection.monitor_pages[0])); 294 } 295 static DEVICE_ATTR_RO(server_monitor_conn_id); 296 297 static ssize_t client_monitor_conn_id_show(struct device *dev, 298 struct device_attribute *dev_attr, 299 char *buf) 300 { 301 struct hv_device *hv_dev = device_to_hv_device(dev); 302 303 if (!hv_dev->channel) 304 return -ENODEV; 305 return sprintf(buf, "%d\n", 306 channel_conn_id(hv_dev->channel, 307 vmbus_connection.monitor_pages[1])); 308 } 309 static DEVICE_ATTR_RO(client_monitor_conn_id); 310 311 static ssize_t out_intr_mask_show(struct device *dev, 312 struct device_attribute *dev_attr, char *buf) 313 { 314 struct hv_device *hv_dev = device_to_hv_device(dev); 315 struct hv_ring_buffer_debug_info outbound; 316 317 if (!hv_dev->channel) 318 return -ENODEV; 319 if (hv_dev->channel->state != CHANNEL_OPENED_STATE) 320 return -EINVAL; 321 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 322 return sprintf(buf, "%d\n", outbound.current_interrupt_mask); 323 } 324 static DEVICE_ATTR_RO(out_intr_mask); 325 326 static ssize_t out_read_index_show(struct device *dev, 327 struct device_attribute *dev_attr, char *buf) 328 { 329 struct hv_device *hv_dev = device_to_hv_device(dev); 330 struct hv_ring_buffer_debug_info outbound; 331 332 if (!hv_dev->channel) 333 return -ENODEV; 334 if (hv_dev->channel->state != CHANNEL_OPENED_STATE) 335 return -EINVAL; 336 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 337 return sprintf(buf, "%d\n", outbound.current_read_index); 338 } 339 static DEVICE_ATTR_RO(out_read_index); 340 341 static ssize_t out_write_index_show(struct device *dev, 342 struct device_attribute *dev_attr, 343 char *buf) 344 { 345 struct hv_device *hv_dev = device_to_hv_device(dev); 346 struct hv_ring_buffer_debug_info outbound; 347 348 if (!hv_dev->channel) 349 return -ENODEV; 350 if (hv_dev->channel->state != CHANNEL_OPENED_STATE) 351 return -EINVAL; 352 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 353 return sprintf(buf, "%d\n", outbound.current_write_index); 354 } 355 static DEVICE_ATTR_RO(out_write_index); 356 357 static ssize_t out_read_bytes_avail_show(struct device *dev, 358 struct device_attribute *dev_attr, 359 char *buf) 360 { 361 struct hv_device *hv_dev = device_to_hv_device(dev); 362 struct hv_ring_buffer_debug_info outbound; 363 364 if (!hv_dev->channel) 365 return -ENODEV; 366 if (hv_dev->channel->state != CHANNEL_OPENED_STATE) 367 return -EINVAL; 368 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 369 return sprintf(buf, "%d\n", outbound.bytes_avail_toread); 370 } 371 static DEVICE_ATTR_RO(out_read_bytes_avail); 372 373 static ssize_t out_write_bytes_avail_show(struct device *dev, 374 struct device_attribute *dev_attr, 375 char *buf) 376 { 377 struct hv_device *hv_dev = device_to_hv_device(dev); 378 struct hv_ring_buffer_debug_info outbound; 379 380 if (!hv_dev->channel) 381 return -ENODEV; 382 if (hv_dev->channel->state != CHANNEL_OPENED_STATE) 383 return -EINVAL; 384 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 385 return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); 386 } 387 static DEVICE_ATTR_RO(out_write_bytes_avail); 388 389 static ssize_t in_intr_mask_show(struct device *dev, 390 struct device_attribute *dev_attr, char *buf) 391 { 392 struct hv_device *hv_dev = device_to_hv_device(dev); 393 struct hv_ring_buffer_debug_info inbound; 394 395 if (!hv_dev->channel) 396 return -ENODEV; 397 if (hv_dev->channel->state != CHANNEL_OPENED_STATE) 398 return -EINVAL; 399 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 400 return sprintf(buf, "%d\n", inbound.current_interrupt_mask); 401 } 402 static DEVICE_ATTR_RO(in_intr_mask); 403 404 static ssize_t in_read_index_show(struct device *dev, 405 struct device_attribute *dev_attr, char *buf) 406 { 407 struct hv_device *hv_dev = device_to_hv_device(dev); 408 struct hv_ring_buffer_debug_info inbound; 409 410 if (!hv_dev->channel) 411 return -ENODEV; 412 if (hv_dev->channel->state != CHANNEL_OPENED_STATE) 413 return -EINVAL; 414 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 415 return sprintf(buf, "%d\n", inbound.current_read_index); 416 } 417 static DEVICE_ATTR_RO(in_read_index); 418 419 static ssize_t in_write_index_show(struct device *dev, 420 struct device_attribute *dev_attr, char *buf) 421 { 422 struct hv_device *hv_dev = device_to_hv_device(dev); 423 struct hv_ring_buffer_debug_info inbound; 424 425 if (!hv_dev->channel) 426 return -ENODEV; 427 if (hv_dev->channel->state != CHANNEL_OPENED_STATE) 428 return -EINVAL; 429 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 430 return sprintf(buf, "%d\n", inbound.current_write_index); 431 } 432 static DEVICE_ATTR_RO(in_write_index); 433 434 static ssize_t in_read_bytes_avail_show(struct device *dev, 435 struct device_attribute *dev_attr, 436 char *buf) 437 { 438 struct hv_device *hv_dev = device_to_hv_device(dev); 439 struct hv_ring_buffer_debug_info inbound; 440 441 if (!hv_dev->channel) 442 return -ENODEV; 443 if (hv_dev->channel->state != CHANNEL_OPENED_STATE) 444 return -EINVAL; 445 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 446 return sprintf(buf, "%d\n", inbound.bytes_avail_toread); 447 } 448 static DEVICE_ATTR_RO(in_read_bytes_avail); 449 450 static ssize_t in_write_bytes_avail_show(struct device *dev, 451 struct device_attribute *dev_attr, 452 char *buf) 453 { 454 struct hv_device *hv_dev = device_to_hv_device(dev); 455 struct hv_ring_buffer_debug_info inbound; 456 457 if (!hv_dev->channel) 458 return -ENODEV; 459 if (hv_dev->channel->state != CHANNEL_OPENED_STATE) 460 return -EINVAL; 461 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 462 return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); 463 } 464 static DEVICE_ATTR_RO(in_write_bytes_avail); 465 466 static ssize_t channel_vp_mapping_show(struct device *dev, 467 struct device_attribute *dev_attr, 468 char *buf) 469 { 470 struct hv_device *hv_dev = device_to_hv_device(dev); 471 struct vmbus_channel *channel = hv_dev->channel, *cur_sc; 472 unsigned long flags; 473 int buf_size = PAGE_SIZE, n_written, tot_written; 474 struct list_head *cur; 475 476 if (!channel) 477 return -ENODEV; 478 479 tot_written = snprintf(buf, buf_size, "%u:%u\n", 480 channel->offermsg.child_relid, channel->target_cpu); 481 482 spin_lock_irqsave(&channel->lock, flags); 483 484 list_for_each(cur, &channel->sc_list) { 485 if (tot_written >= buf_size - 1) 486 break; 487 488 cur_sc = list_entry(cur, struct vmbus_channel, sc_list); 489 n_written = scnprintf(buf + tot_written, 490 buf_size - tot_written, 491 "%u:%u\n", 492 cur_sc->offermsg.child_relid, 493 cur_sc->target_cpu); 494 tot_written += n_written; 495 } 496 497 spin_unlock_irqrestore(&channel->lock, flags); 498 499 return tot_written; 500 } 501 static DEVICE_ATTR_RO(channel_vp_mapping); 502 503 static ssize_t vendor_show(struct device *dev, 504 struct device_attribute *dev_attr, 505 char *buf) 506 { 507 struct hv_device *hv_dev = device_to_hv_device(dev); 508 return sprintf(buf, "0x%x\n", hv_dev->vendor_id); 509 } 510 static DEVICE_ATTR_RO(vendor); 511 512 static ssize_t device_show(struct device *dev, 513 struct device_attribute *dev_attr, 514 char *buf) 515 { 516 struct hv_device *hv_dev = device_to_hv_device(dev); 517 return sprintf(buf, "0x%x\n", hv_dev->device_id); 518 } 519 static DEVICE_ATTR_RO(device); 520 521 static ssize_t driver_override_store(struct device *dev, 522 struct device_attribute *attr, 523 const char *buf, size_t count) 524 { 525 struct hv_device *hv_dev = device_to_hv_device(dev); 526 char *driver_override, *old, *cp; 527 528 /* We need to keep extra room for a newline */ 529 if (count >= (PAGE_SIZE - 1)) 530 return -EINVAL; 531 532 driver_override = kstrndup(buf, count, GFP_KERNEL); 533 if (!driver_override) 534 return -ENOMEM; 535 536 cp = strchr(driver_override, '\n'); 537 if (cp) 538 *cp = '\0'; 539 540 device_lock(dev); 541 old = hv_dev->driver_override; 542 if (strlen(driver_override)) { 543 hv_dev->driver_override = driver_override; 544 } else { 545 kfree(driver_override); 546 hv_dev->driver_override = NULL; 547 } 548 device_unlock(dev); 549 550 kfree(old); 551 552 return count; 553 } 554 555 static ssize_t driver_override_show(struct device *dev, 556 struct device_attribute *attr, char *buf) 557 { 558 struct hv_device *hv_dev = device_to_hv_device(dev); 559 ssize_t len; 560 561 device_lock(dev); 562 len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override); 563 device_unlock(dev); 564 565 return len; 566 } 567 static DEVICE_ATTR_RW(driver_override); 568 569 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ 570 static struct attribute *vmbus_dev_attrs[] = { 571 &dev_attr_id.attr, 572 &dev_attr_state.attr, 573 &dev_attr_monitor_id.attr, 574 &dev_attr_class_id.attr, 575 &dev_attr_device_id.attr, 576 &dev_attr_modalias.attr, 577 #ifdef CONFIG_NUMA 578 &dev_attr_numa_node.attr, 579 #endif 580 &dev_attr_server_monitor_pending.attr, 581 &dev_attr_client_monitor_pending.attr, 582 &dev_attr_server_monitor_latency.attr, 583 &dev_attr_client_monitor_latency.attr, 584 &dev_attr_server_monitor_conn_id.attr, 585 &dev_attr_client_monitor_conn_id.attr, 586 &dev_attr_out_intr_mask.attr, 587 &dev_attr_out_read_index.attr, 588 &dev_attr_out_write_index.attr, 589 &dev_attr_out_read_bytes_avail.attr, 590 &dev_attr_out_write_bytes_avail.attr, 591 &dev_attr_in_intr_mask.attr, 592 &dev_attr_in_read_index.attr, 593 &dev_attr_in_write_index.attr, 594 &dev_attr_in_read_bytes_avail.attr, 595 &dev_attr_in_write_bytes_avail.attr, 596 &dev_attr_channel_vp_mapping.attr, 597 &dev_attr_vendor.attr, 598 &dev_attr_device.attr, 599 &dev_attr_driver_override.attr, 600 NULL, 601 }; 602 ATTRIBUTE_GROUPS(vmbus_dev); 603 604 /* 605 * vmbus_uevent - add uevent for our device 606 * 607 * This routine is invoked when a device is added or removed on the vmbus to 608 * generate a uevent to udev in the userspace. The udev will then look at its 609 * rule and the uevent generated here to load the appropriate driver 610 * 611 * The alias string will be of the form vmbus:guid where guid is the string 612 * representation of the device guid (each byte of the guid will be 613 * represented with two hex characters. 614 */ 615 static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) 616 { 617 struct hv_device *dev = device_to_hv_device(device); 618 int ret; 619 char alias_name[VMBUS_ALIAS_LEN + 1]; 620 621 print_alias_name(dev, alias_name); 622 ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name); 623 return ret; 624 } 625 626 static const uuid_le null_guid; 627 628 static inline bool is_null_guid(const uuid_le *guid) 629 { 630 if (uuid_le_cmp(*guid, null_guid)) 631 return false; 632 return true; 633 } 634 635 static const struct hv_vmbus_device_id * 636 hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const uuid_le *guid) 637 638 { 639 if (id == NULL) 640 return NULL; /* empty device table */ 641 642 for (; !is_null_guid(&id->guid); id++) 643 if (!uuid_le_cmp(id->guid, *guid)) 644 return id; 645 646 return NULL; 647 } 648 649 static const struct hv_vmbus_device_id * 650 hv_vmbus_dynid_match(struct hv_driver *drv, const uuid_le *guid) 651 { 652 const struct hv_vmbus_device_id *id = NULL; 653 struct vmbus_dynid *dynid; 654 655 spin_lock(&drv->dynids.lock); 656 list_for_each_entry(dynid, &drv->dynids.list, node) { 657 if (!uuid_le_cmp(dynid->id.guid, *guid)) { 658 id = &dynid->id; 659 break; 660 } 661 } 662 spin_unlock(&drv->dynids.lock); 663 664 return id; 665 } 666 667 static const struct hv_vmbus_device_id vmbus_device_null = { 668 .guid = NULL_UUID_LE, 669 }; 670 671 /* 672 * Return a matching hv_vmbus_device_id pointer. 673 * If there is no match, return NULL. 674 */ 675 static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv, 676 struct hv_device *dev) 677 { 678 const uuid_le *guid = &dev->dev_type; 679 const struct hv_vmbus_device_id *id; 680 681 /* When driver_override is set, only bind to the matching driver */ 682 if (dev->driver_override && strcmp(dev->driver_override, drv->name)) 683 return NULL; 684 685 /* Look at the dynamic ids first, before the static ones */ 686 id = hv_vmbus_dynid_match(drv, guid); 687 if (!id) 688 id = hv_vmbus_dev_match(drv->id_table, guid); 689 690 /* driver_override will always match, send a dummy id */ 691 if (!id && dev->driver_override) 692 id = &vmbus_device_null; 693 694 return id; 695 } 696 697 /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */ 698 static int vmbus_add_dynid(struct hv_driver *drv, uuid_le *guid) 699 { 700 struct vmbus_dynid *dynid; 701 702 dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); 703 if (!dynid) 704 return -ENOMEM; 705 706 dynid->id.guid = *guid; 707 708 spin_lock(&drv->dynids.lock); 709 list_add_tail(&dynid->node, &drv->dynids.list); 710 spin_unlock(&drv->dynids.lock); 711 712 return driver_attach(&drv->driver); 713 } 714 715 static void vmbus_free_dynids(struct hv_driver *drv) 716 { 717 struct vmbus_dynid *dynid, *n; 718 719 spin_lock(&drv->dynids.lock); 720 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 721 list_del(&dynid->node); 722 kfree(dynid); 723 } 724 spin_unlock(&drv->dynids.lock); 725 } 726 727 /* 728 * store_new_id - sysfs frontend to vmbus_add_dynid() 729 * 730 * Allow GUIDs to be added to an existing driver via sysfs. 731 */ 732 static ssize_t new_id_store(struct device_driver *driver, const char *buf, 733 size_t count) 734 { 735 struct hv_driver *drv = drv_to_hv_drv(driver); 736 uuid_le guid; 737 ssize_t retval; 738 739 retval = uuid_le_to_bin(buf, &guid); 740 if (retval) 741 return retval; 742 743 if (hv_vmbus_dynid_match(drv, &guid)) 744 return -EEXIST; 745 746 retval = vmbus_add_dynid(drv, &guid); 747 if (retval) 748 return retval; 749 return count; 750 } 751 static DRIVER_ATTR_WO(new_id); 752 753 /* 754 * store_remove_id - remove a PCI device ID from this driver 755 * 756 * Removes a dynamic pci device ID to this driver. 757 */ 758 static ssize_t remove_id_store(struct device_driver *driver, const char *buf, 759 size_t count) 760 { 761 struct hv_driver *drv = drv_to_hv_drv(driver); 762 struct vmbus_dynid *dynid, *n; 763 uuid_le guid; 764 ssize_t retval; 765 766 retval = uuid_le_to_bin(buf, &guid); 767 if (retval) 768 return retval; 769 770 retval = -ENODEV; 771 spin_lock(&drv->dynids.lock); 772 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 773 struct hv_vmbus_device_id *id = &dynid->id; 774 775 if (!uuid_le_cmp(id->guid, guid)) { 776 list_del(&dynid->node); 777 kfree(dynid); 778 retval = count; 779 break; 780 } 781 } 782 spin_unlock(&drv->dynids.lock); 783 784 return retval; 785 } 786 static DRIVER_ATTR_WO(remove_id); 787 788 static struct attribute *vmbus_drv_attrs[] = { 789 &driver_attr_new_id.attr, 790 &driver_attr_remove_id.attr, 791 NULL, 792 }; 793 ATTRIBUTE_GROUPS(vmbus_drv); 794 795 796 /* 797 * vmbus_match - Attempt to match the specified device to the specified driver 798 */ 799 static int vmbus_match(struct device *device, struct device_driver *driver) 800 { 801 struct hv_driver *drv = drv_to_hv_drv(driver); 802 struct hv_device *hv_dev = device_to_hv_device(device); 803 804 /* The hv_sock driver handles all hv_sock offers. */ 805 if (is_hvsock_channel(hv_dev->channel)) 806 return drv->hvsock; 807 808 if (hv_vmbus_get_id(drv, hv_dev)) 809 return 1; 810 811 return 0; 812 } 813 814 /* 815 * vmbus_probe - Add the new vmbus's child device 816 */ 817 static int vmbus_probe(struct device *child_device) 818 { 819 int ret = 0; 820 struct hv_driver *drv = 821 drv_to_hv_drv(child_device->driver); 822 struct hv_device *dev = device_to_hv_device(child_device); 823 const struct hv_vmbus_device_id *dev_id; 824 825 dev_id = hv_vmbus_get_id(drv, dev); 826 if (drv->probe) { 827 ret = drv->probe(dev, dev_id); 828 if (ret != 0) 829 pr_err("probe failed for device %s (%d)\n", 830 dev_name(child_device), ret); 831 832 } else { 833 pr_err("probe not set for driver %s\n", 834 dev_name(child_device)); 835 ret = -ENODEV; 836 } 837 return ret; 838 } 839 840 /* 841 * vmbus_remove - Remove a vmbus device 842 */ 843 static int vmbus_remove(struct device *child_device) 844 { 845 struct hv_driver *drv; 846 struct hv_device *dev = device_to_hv_device(child_device); 847 848 if (child_device->driver) { 849 drv = drv_to_hv_drv(child_device->driver); 850 if (drv->remove) 851 drv->remove(dev); 852 } 853 854 return 0; 855 } 856 857 858 /* 859 * vmbus_shutdown - Shutdown a vmbus device 860 */ 861 static void vmbus_shutdown(struct device *child_device) 862 { 863 struct hv_driver *drv; 864 struct hv_device *dev = device_to_hv_device(child_device); 865 866 867 /* The device may not be attached yet */ 868 if (!child_device->driver) 869 return; 870 871 drv = drv_to_hv_drv(child_device->driver); 872 873 if (drv->shutdown) 874 drv->shutdown(dev); 875 } 876 877 878 /* 879 * vmbus_device_release - Final callback release of the vmbus child device 880 */ 881 static void vmbus_device_release(struct device *device) 882 { 883 struct hv_device *hv_dev = device_to_hv_device(device); 884 struct vmbus_channel *channel = hv_dev->channel; 885 886 mutex_lock(&vmbus_connection.channel_mutex); 887 hv_process_channel_removal(channel); 888 mutex_unlock(&vmbus_connection.channel_mutex); 889 kfree(hv_dev); 890 } 891 892 /* The one and only one */ 893 static struct bus_type hv_bus = { 894 .name = "vmbus", 895 .match = vmbus_match, 896 .shutdown = vmbus_shutdown, 897 .remove = vmbus_remove, 898 .probe = vmbus_probe, 899 .uevent = vmbus_uevent, 900 .dev_groups = vmbus_dev_groups, 901 .drv_groups = vmbus_drv_groups, 902 }; 903 904 struct onmessage_work_context { 905 struct work_struct work; 906 struct hv_message msg; 907 }; 908 909 static void vmbus_onmessage_work(struct work_struct *work) 910 { 911 struct onmessage_work_context *ctx; 912 913 /* Do not process messages if we're in DISCONNECTED state */ 914 if (vmbus_connection.conn_state == DISCONNECTED) 915 return; 916 917 ctx = container_of(work, struct onmessage_work_context, 918 work); 919 vmbus_onmessage(&ctx->msg); 920 kfree(ctx); 921 } 922 923 static void hv_process_timer_expiration(struct hv_message *msg, 924 struct hv_per_cpu_context *hv_cpu) 925 { 926 struct clock_event_device *dev = hv_cpu->clk_evt; 927 928 if (dev->event_handler) 929 dev->event_handler(dev); 930 931 vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); 932 } 933 934 void vmbus_on_msg_dpc(unsigned long data) 935 { 936 struct hv_per_cpu_context *hv_cpu = (void *)data; 937 void *page_addr = hv_cpu->synic_message_page; 938 struct hv_message *msg = (struct hv_message *)page_addr + 939 VMBUS_MESSAGE_SINT; 940 struct vmbus_channel_message_header *hdr; 941 const struct vmbus_channel_message_table_entry *entry; 942 struct onmessage_work_context *ctx; 943 u32 message_type = msg->header.message_type; 944 945 if (message_type == HVMSG_NONE) 946 /* no msg */ 947 return; 948 949 hdr = (struct vmbus_channel_message_header *)msg->u.payload; 950 951 trace_vmbus_on_msg_dpc(hdr); 952 953 if (hdr->msgtype >= CHANNELMSG_COUNT) { 954 WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); 955 goto msg_handled; 956 } 957 958 entry = &channel_message_table[hdr->msgtype]; 959 if (entry->handler_type == VMHT_BLOCKING) { 960 ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); 961 if (ctx == NULL) 962 return; 963 964 INIT_WORK(&ctx->work, vmbus_onmessage_work); 965 memcpy(&ctx->msg, msg, sizeof(*msg)); 966 967 /* 968 * The host can generate a rescind message while we 969 * may still be handling the original offer. We deal with 970 * this condition by ensuring the processing is done on the 971 * same CPU. 972 */ 973 switch (hdr->msgtype) { 974 case CHANNELMSG_RESCIND_CHANNELOFFER: 975 /* 976 * If we are handling the rescind message; 977 * schedule the work on the global work queue. 978 */ 979 schedule_work_on(vmbus_connection.connect_cpu, 980 &ctx->work); 981 break; 982 983 case CHANNELMSG_OFFERCHANNEL: 984 atomic_inc(&vmbus_connection.offer_in_progress); 985 queue_work_on(vmbus_connection.connect_cpu, 986 vmbus_connection.work_queue, 987 &ctx->work); 988 break; 989 990 default: 991 queue_work(vmbus_connection.work_queue, &ctx->work); 992 } 993 } else 994 entry->message_handler(hdr); 995 996 msg_handled: 997 vmbus_signal_eom(msg, message_type); 998 } 999 1000 1001 /* 1002 * Direct callback for channels using other deferred processing 1003 */ 1004 static void vmbus_channel_isr(struct vmbus_channel *channel) 1005 { 1006 void (*callback_fn)(void *); 1007 1008 callback_fn = READ_ONCE(channel->onchannel_callback); 1009 if (likely(callback_fn != NULL)) 1010 (*callback_fn)(channel->channel_callback_context); 1011 } 1012 1013 /* 1014 * Schedule all channels with events pending 1015 */ 1016 static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) 1017 { 1018 unsigned long *recv_int_page; 1019 u32 maxbits, relid; 1020 1021 if (vmbus_proto_version < VERSION_WIN8) { 1022 maxbits = MAX_NUM_CHANNELS_SUPPORTED; 1023 recv_int_page = vmbus_connection.recv_int_page; 1024 } else { 1025 /* 1026 * When the host is win8 and beyond, the event page 1027 * can be directly checked to get the id of the channel 1028 * that has the interrupt pending. 1029 */ 1030 void *page_addr = hv_cpu->synic_event_page; 1031 union hv_synic_event_flags *event 1032 = (union hv_synic_event_flags *)page_addr + 1033 VMBUS_MESSAGE_SINT; 1034 1035 maxbits = HV_EVENT_FLAGS_COUNT; 1036 recv_int_page = event->flags; 1037 } 1038 1039 if (unlikely(!recv_int_page)) 1040 return; 1041 1042 for_each_set_bit(relid, recv_int_page, maxbits) { 1043 struct vmbus_channel *channel; 1044 1045 if (!sync_test_and_clear_bit(relid, recv_int_page)) 1046 continue; 1047 1048 /* Special case - vmbus channel protocol msg */ 1049 if (relid == 0) 1050 continue; 1051 1052 rcu_read_lock(); 1053 1054 /* Find channel based on relid */ 1055 list_for_each_entry_rcu(channel, &hv_cpu->chan_list, percpu_list) { 1056 if (channel->offermsg.child_relid != relid) 1057 continue; 1058 1059 if (channel->rescind) 1060 continue; 1061 1062 trace_vmbus_chan_sched(channel); 1063 1064 ++channel->interrupts; 1065 1066 switch (channel->callback_mode) { 1067 case HV_CALL_ISR: 1068 vmbus_channel_isr(channel); 1069 break; 1070 1071 case HV_CALL_BATCHED: 1072 hv_begin_read(&channel->inbound); 1073 /* fallthrough */ 1074 case HV_CALL_DIRECT: 1075 tasklet_schedule(&channel->callback_event); 1076 } 1077 } 1078 1079 rcu_read_unlock(); 1080 } 1081 } 1082 1083 static void vmbus_isr(void) 1084 { 1085 struct hv_per_cpu_context *hv_cpu 1086 = this_cpu_ptr(hv_context.cpu_context); 1087 void *page_addr = hv_cpu->synic_event_page; 1088 struct hv_message *msg; 1089 union hv_synic_event_flags *event; 1090 bool handled = false; 1091 1092 if (unlikely(page_addr == NULL)) 1093 return; 1094 1095 event = (union hv_synic_event_flags *)page_addr + 1096 VMBUS_MESSAGE_SINT; 1097 /* 1098 * Check for events before checking for messages. This is the order 1099 * in which events and messages are checked in Windows guests on 1100 * Hyper-V, and the Windows team suggested we do the same. 1101 */ 1102 1103 if ((vmbus_proto_version == VERSION_WS2008) || 1104 (vmbus_proto_version == VERSION_WIN7)) { 1105 1106 /* Since we are a child, we only need to check bit 0 */ 1107 if (sync_test_and_clear_bit(0, event->flags)) 1108 handled = true; 1109 } else { 1110 /* 1111 * Our host is win8 or above. The signaling mechanism 1112 * has changed and we can directly look at the event page. 1113 * If bit n is set then we have an interrup on the channel 1114 * whose id is n. 1115 */ 1116 handled = true; 1117 } 1118 1119 if (handled) 1120 vmbus_chan_sched(hv_cpu); 1121 1122 page_addr = hv_cpu->synic_message_page; 1123 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 1124 1125 /* Check if there are actual msgs to be processed */ 1126 if (msg->header.message_type != HVMSG_NONE) { 1127 if (msg->header.message_type == HVMSG_TIMER_EXPIRED) 1128 hv_process_timer_expiration(msg, hv_cpu); 1129 else 1130 tasklet_schedule(&hv_cpu->msg_dpc); 1131 } 1132 1133 add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); 1134 } 1135 1136 /* 1137 * Boolean to control whether to report panic messages over Hyper-V. 1138 * 1139 * It can be set via /proc/sys/kernel/hyperv/record_panic_msg 1140 */ 1141 static int sysctl_record_panic_msg = 1; 1142 1143 /* 1144 * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg 1145 * buffer and call into Hyper-V to transfer the data. 1146 */ 1147 static void hv_kmsg_dump(struct kmsg_dumper *dumper, 1148 enum kmsg_dump_reason reason) 1149 { 1150 size_t bytes_written; 1151 phys_addr_t panic_pa; 1152 1153 /* We are only interested in panics. */ 1154 if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) 1155 return; 1156 1157 panic_pa = virt_to_phys(hv_panic_page); 1158 1159 /* 1160 * Write dump contents to the page. No need to synchronize; panic should 1161 * be single-threaded. 1162 */ 1163 kmsg_dump_get_buffer(dumper, true, hv_panic_page, PAGE_SIZE, 1164 &bytes_written); 1165 if (bytes_written) 1166 hyperv_report_panic_msg(panic_pa, bytes_written); 1167 } 1168 1169 static struct kmsg_dumper hv_kmsg_dumper = { 1170 .dump = hv_kmsg_dump, 1171 }; 1172 1173 static struct ctl_table_header *hv_ctl_table_hdr; 1174 static int zero; 1175 static int one = 1; 1176 1177 /* 1178 * sysctl option to allow the user to control whether kmsg data should be 1179 * reported to Hyper-V on panic. 1180 */ 1181 static struct ctl_table hv_ctl_table[] = { 1182 { 1183 .procname = "hyperv_record_panic_msg", 1184 .data = &sysctl_record_panic_msg, 1185 .maxlen = sizeof(int), 1186 .mode = 0644, 1187 .proc_handler = proc_dointvec_minmax, 1188 .extra1 = &zero, 1189 .extra2 = &one 1190 }, 1191 {} 1192 }; 1193 1194 static struct ctl_table hv_root_table[] = { 1195 { 1196 .procname = "kernel", 1197 .mode = 0555, 1198 .child = hv_ctl_table 1199 }, 1200 {} 1201 }; 1202 1203 /* 1204 * vmbus_bus_init -Main vmbus driver initialization routine. 1205 * 1206 * Here, we 1207 * - initialize the vmbus driver context 1208 * - invoke the vmbus hv main init routine 1209 * - retrieve the channel offers 1210 */ 1211 static int vmbus_bus_init(void) 1212 { 1213 int ret; 1214 1215 /* Hypervisor initialization...setup hypercall page..etc */ 1216 ret = hv_init(); 1217 if (ret != 0) { 1218 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); 1219 return ret; 1220 } 1221 1222 ret = bus_register(&hv_bus); 1223 if (ret) 1224 return ret; 1225 1226 hv_setup_vmbus_irq(vmbus_isr); 1227 1228 ret = hv_synic_alloc(); 1229 if (ret) 1230 goto err_alloc; 1231 /* 1232 * Initialize the per-cpu interrupt state and 1233 * connect to the host. 1234 */ 1235 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", 1236 hv_synic_init, hv_synic_cleanup); 1237 if (ret < 0) 1238 goto err_alloc; 1239 hyperv_cpuhp_online = ret; 1240 1241 ret = vmbus_connect(); 1242 if (ret) 1243 goto err_connect; 1244 1245 /* 1246 * Only register if the crash MSRs are available 1247 */ 1248 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 1249 u64 hyperv_crash_ctl; 1250 /* 1251 * Sysctl registration is not fatal, since by default 1252 * reporting is enabled. 1253 */ 1254 hv_ctl_table_hdr = register_sysctl_table(hv_root_table); 1255 if (!hv_ctl_table_hdr) 1256 pr_err("Hyper-V: sysctl table register error"); 1257 1258 /* 1259 * Register for panic kmsg callback only if the right 1260 * capability is supported by the hypervisor. 1261 */ 1262 hv_get_crash_ctl(hyperv_crash_ctl); 1263 if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) { 1264 hv_panic_page = (void *)get_zeroed_page(GFP_KERNEL); 1265 if (hv_panic_page) { 1266 ret = kmsg_dump_register(&hv_kmsg_dumper); 1267 if (ret) 1268 pr_err("Hyper-V: kmsg dump register " 1269 "error 0x%x\n", ret); 1270 } else 1271 pr_err("Hyper-V: panic message page memory " 1272 "allocation failed"); 1273 } 1274 1275 register_die_notifier(&hyperv_die_block); 1276 atomic_notifier_chain_register(&panic_notifier_list, 1277 &hyperv_panic_block); 1278 } 1279 1280 vmbus_request_offers(); 1281 1282 return 0; 1283 1284 err_connect: 1285 cpuhp_remove_state(hyperv_cpuhp_online); 1286 err_alloc: 1287 hv_synic_free(); 1288 hv_remove_vmbus_irq(); 1289 1290 bus_unregister(&hv_bus); 1291 free_page((unsigned long)hv_panic_page); 1292 unregister_sysctl_table(hv_ctl_table_hdr); 1293 hv_ctl_table_hdr = NULL; 1294 return ret; 1295 } 1296 1297 /** 1298 * __vmbus_child_driver_register() - Register a vmbus's driver 1299 * @hv_driver: Pointer to driver structure you want to register 1300 * @owner: owner module of the drv 1301 * @mod_name: module name string 1302 * 1303 * Registers the given driver with Linux through the 'driver_register()' call 1304 * and sets up the hyper-v vmbus handling for this driver. 1305 * It will return the state of the 'driver_register()' call. 1306 * 1307 */ 1308 int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) 1309 { 1310 int ret; 1311 1312 pr_info("registering driver %s\n", hv_driver->name); 1313 1314 ret = vmbus_exists(); 1315 if (ret < 0) 1316 return ret; 1317 1318 hv_driver->driver.name = hv_driver->name; 1319 hv_driver->driver.owner = owner; 1320 hv_driver->driver.mod_name = mod_name; 1321 hv_driver->driver.bus = &hv_bus; 1322 1323 spin_lock_init(&hv_driver->dynids.lock); 1324 INIT_LIST_HEAD(&hv_driver->dynids.list); 1325 1326 ret = driver_register(&hv_driver->driver); 1327 1328 return ret; 1329 } 1330 EXPORT_SYMBOL_GPL(__vmbus_driver_register); 1331 1332 /** 1333 * vmbus_driver_unregister() - Unregister a vmbus's driver 1334 * @hv_driver: Pointer to driver structure you want to 1335 * un-register 1336 * 1337 * Un-register the given driver that was previous registered with a call to 1338 * vmbus_driver_register() 1339 */ 1340 void vmbus_driver_unregister(struct hv_driver *hv_driver) 1341 { 1342 pr_info("unregistering driver %s\n", hv_driver->name); 1343 1344 if (!vmbus_exists()) { 1345 driver_unregister(&hv_driver->driver); 1346 vmbus_free_dynids(hv_driver); 1347 } 1348 } 1349 EXPORT_SYMBOL_GPL(vmbus_driver_unregister); 1350 1351 1352 /* 1353 * Called when last reference to channel is gone. 1354 */ 1355 static void vmbus_chan_release(struct kobject *kobj) 1356 { 1357 struct vmbus_channel *channel 1358 = container_of(kobj, struct vmbus_channel, kobj); 1359 1360 kfree_rcu(channel, rcu); 1361 } 1362 1363 struct vmbus_chan_attribute { 1364 struct attribute attr; 1365 ssize_t (*show)(const struct vmbus_channel *chan, char *buf); 1366 ssize_t (*store)(struct vmbus_channel *chan, 1367 const char *buf, size_t count); 1368 }; 1369 #define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \ 1370 struct vmbus_chan_attribute chan_attr_##_name \ 1371 = __ATTR(_name, _mode, _show, _store) 1372 #define VMBUS_CHAN_ATTR_RW(_name) \ 1373 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name) 1374 #define VMBUS_CHAN_ATTR_RO(_name) \ 1375 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name) 1376 #define VMBUS_CHAN_ATTR_WO(_name) \ 1377 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name) 1378 1379 static ssize_t vmbus_chan_attr_show(struct kobject *kobj, 1380 struct attribute *attr, char *buf) 1381 { 1382 const struct vmbus_chan_attribute *attribute 1383 = container_of(attr, struct vmbus_chan_attribute, attr); 1384 const struct vmbus_channel *chan 1385 = container_of(kobj, struct vmbus_channel, kobj); 1386 1387 if (!attribute->show) 1388 return -EIO; 1389 1390 if (chan->state != CHANNEL_OPENED_STATE) 1391 return -EINVAL; 1392 1393 return attribute->show(chan, buf); 1394 } 1395 1396 static const struct sysfs_ops vmbus_chan_sysfs_ops = { 1397 .show = vmbus_chan_attr_show, 1398 }; 1399 1400 static ssize_t out_mask_show(const struct vmbus_channel *channel, char *buf) 1401 { 1402 const struct hv_ring_buffer_info *rbi = &channel->outbound; 1403 1404 return sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1405 } 1406 static VMBUS_CHAN_ATTR_RO(out_mask); 1407 1408 static ssize_t in_mask_show(const struct vmbus_channel *channel, char *buf) 1409 { 1410 const struct hv_ring_buffer_info *rbi = &channel->inbound; 1411 1412 return sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1413 } 1414 static VMBUS_CHAN_ATTR_RO(in_mask); 1415 1416 static ssize_t read_avail_show(const struct vmbus_channel *channel, char *buf) 1417 { 1418 const struct hv_ring_buffer_info *rbi = &channel->inbound; 1419 1420 return sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi)); 1421 } 1422 static VMBUS_CHAN_ATTR_RO(read_avail); 1423 1424 static ssize_t write_avail_show(const struct vmbus_channel *channel, char *buf) 1425 { 1426 const struct hv_ring_buffer_info *rbi = &channel->outbound; 1427 1428 return sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi)); 1429 } 1430 static VMBUS_CHAN_ATTR_RO(write_avail); 1431 1432 static ssize_t show_target_cpu(const struct vmbus_channel *channel, char *buf) 1433 { 1434 return sprintf(buf, "%u\n", channel->target_cpu); 1435 } 1436 static VMBUS_CHAN_ATTR(cpu, S_IRUGO, show_target_cpu, NULL); 1437 1438 static ssize_t channel_pending_show(const struct vmbus_channel *channel, 1439 char *buf) 1440 { 1441 return sprintf(buf, "%d\n", 1442 channel_pending(channel, 1443 vmbus_connection.monitor_pages[1])); 1444 } 1445 static VMBUS_CHAN_ATTR(pending, S_IRUGO, channel_pending_show, NULL); 1446 1447 static ssize_t channel_latency_show(const struct vmbus_channel *channel, 1448 char *buf) 1449 { 1450 return sprintf(buf, "%d\n", 1451 channel_latency(channel, 1452 vmbus_connection.monitor_pages[1])); 1453 } 1454 static VMBUS_CHAN_ATTR(latency, S_IRUGO, channel_latency_show, NULL); 1455 1456 static ssize_t channel_interrupts_show(const struct vmbus_channel *channel, char *buf) 1457 { 1458 return sprintf(buf, "%llu\n", channel->interrupts); 1459 } 1460 static VMBUS_CHAN_ATTR(interrupts, S_IRUGO, channel_interrupts_show, NULL); 1461 1462 static ssize_t channel_events_show(const struct vmbus_channel *channel, char *buf) 1463 { 1464 return sprintf(buf, "%llu\n", channel->sig_events); 1465 } 1466 static VMBUS_CHAN_ATTR(events, S_IRUGO, channel_events_show, NULL); 1467 1468 static ssize_t subchannel_monitor_id_show(const struct vmbus_channel *channel, 1469 char *buf) 1470 { 1471 return sprintf(buf, "%u\n", channel->offermsg.monitorid); 1472 } 1473 static VMBUS_CHAN_ATTR(monitor_id, S_IRUGO, subchannel_monitor_id_show, NULL); 1474 1475 static ssize_t subchannel_id_show(const struct vmbus_channel *channel, 1476 char *buf) 1477 { 1478 return sprintf(buf, "%u\n", 1479 channel->offermsg.offer.sub_channel_index); 1480 } 1481 static VMBUS_CHAN_ATTR_RO(subchannel_id); 1482 1483 static struct attribute *vmbus_chan_attrs[] = { 1484 &chan_attr_out_mask.attr, 1485 &chan_attr_in_mask.attr, 1486 &chan_attr_read_avail.attr, 1487 &chan_attr_write_avail.attr, 1488 &chan_attr_cpu.attr, 1489 &chan_attr_pending.attr, 1490 &chan_attr_latency.attr, 1491 &chan_attr_interrupts.attr, 1492 &chan_attr_events.attr, 1493 &chan_attr_monitor_id.attr, 1494 &chan_attr_subchannel_id.attr, 1495 NULL 1496 }; 1497 1498 static struct kobj_type vmbus_chan_ktype = { 1499 .sysfs_ops = &vmbus_chan_sysfs_ops, 1500 .release = vmbus_chan_release, 1501 .default_attrs = vmbus_chan_attrs, 1502 }; 1503 1504 /* 1505 * vmbus_add_channel_kobj - setup a sub-directory under device/channels 1506 */ 1507 int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) 1508 { 1509 struct kobject *kobj = &channel->kobj; 1510 u32 relid = channel->offermsg.child_relid; 1511 int ret; 1512 1513 kobj->kset = dev->channels_kset; 1514 ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, 1515 "%u", relid); 1516 if (ret) 1517 return ret; 1518 1519 kobject_uevent(kobj, KOBJ_ADD); 1520 1521 return 0; 1522 } 1523 1524 /* 1525 * vmbus_device_create - Creates and registers a new child device 1526 * on the vmbus. 1527 */ 1528 struct hv_device *vmbus_device_create(const uuid_le *type, 1529 const uuid_le *instance, 1530 struct vmbus_channel *channel) 1531 { 1532 struct hv_device *child_device_obj; 1533 1534 child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); 1535 if (!child_device_obj) { 1536 pr_err("Unable to allocate device object for child device\n"); 1537 return NULL; 1538 } 1539 1540 child_device_obj->channel = channel; 1541 memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); 1542 memcpy(&child_device_obj->dev_instance, instance, 1543 sizeof(uuid_le)); 1544 child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ 1545 1546 1547 return child_device_obj; 1548 } 1549 1550 /* 1551 * vmbus_device_register - Register the child device 1552 */ 1553 int vmbus_device_register(struct hv_device *child_device_obj) 1554 { 1555 struct kobject *kobj = &child_device_obj->device.kobj; 1556 int ret; 1557 1558 dev_set_name(&child_device_obj->device, "%pUl", 1559 child_device_obj->channel->offermsg.offer.if_instance.b); 1560 1561 child_device_obj->device.bus = &hv_bus; 1562 child_device_obj->device.parent = &hv_acpi_dev->dev; 1563 child_device_obj->device.release = vmbus_device_release; 1564 1565 /* 1566 * Register with the LDM. This will kick off the driver/device 1567 * binding...which will eventually call vmbus_match() and vmbus_probe() 1568 */ 1569 ret = device_register(&child_device_obj->device); 1570 if (ret) { 1571 pr_err("Unable to register child device\n"); 1572 return ret; 1573 } 1574 1575 child_device_obj->channels_kset = kset_create_and_add("channels", 1576 NULL, kobj); 1577 if (!child_device_obj->channels_kset) { 1578 ret = -ENOMEM; 1579 goto err_dev_unregister; 1580 } 1581 1582 ret = vmbus_add_channel_kobj(child_device_obj, 1583 child_device_obj->channel); 1584 if (ret) { 1585 pr_err("Unable to register primary channeln"); 1586 goto err_kset_unregister; 1587 } 1588 1589 return 0; 1590 1591 err_kset_unregister: 1592 kset_unregister(child_device_obj->channels_kset); 1593 1594 err_dev_unregister: 1595 device_unregister(&child_device_obj->device); 1596 return ret; 1597 } 1598 1599 /* 1600 * vmbus_device_unregister - Remove the specified child device 1601 * from the vmbus. 1602 */ 1603 void vmbus_device_unregister(struct hv_device *device_obj) 1604 { 1605 pr_debug("child device %s unregistered\n", 1606 dev_name(&device_obj->device)); 1607 1608 kset_unregister(device_obj->channels_kset); 1609 1610 /* 1611 * Kick off the process of unregistering the device. 1612 * This will call vmbus_remove() and eventually vmbus_device_release() 1613 */ 1614 device_unregister(&device_obj->device); 1615 } 1616 1617 1618 /* 1619 * VMBUS is an acpi enumerated device. Get the information we 1620 * need from DSDT. 1621 */ 1622 #define VTPM_BASE_ADDRESS 0xfed40000 1623 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) 1624 { 1625 resource_size_t start = 0; 1626 resource_size_t end = 0; 1627 struct resource *new_res; 1628 struct resource **old_res = &hyperv_mmio; 1629 struct resource **prev_res = NULL; 1630 1631 switch (res->type) { 1632 1633 /* 1634 * "Address" descriptors are for bus windows. Ignore 1635 * "memory" descriptors, which are for registers on 1636 * devices. 1637 */ 1638 case ACPI_RESOURCE_TYPE_ADDRESS32: 1639 start = res->data.address32.address.minimum; 1640 end = res->data.address32.address.maximum; 1641 break; 1642 1643 case ACPI_RESOURCE_TYPE_ADDRESS64: 1644 start = res->data.address64.address.minimum; 1645 end = res->data.address64.address.maximum; 1646 break; 1647 1648 default: 1649 /* Unused resource type */ 1650 return AE_OK; 1651 1652 } 1653 /* 1654 * Ignore ranges that are below 1MB, as they're not 1655 * necessary or useful here. 1656 */ 1657 if (end < 0x100000) 1658 return AE_OK; 1659 1660 new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); 1661 if (!new_res) 1662 return AE_NO_MEMORY; 1663 1664 /* If this range overlaps the virtual TPM, truncate it. */ 1665 if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) 1666 end = VTPM_BASE_ADDRESS; 1667 1668 new_res->name = "hyperv mmio"; 1669 new_res->flags = IORESOURCE_MEM; 1670 new_res->start = start; 1671 new_res->end = end; 1672 1673 /* 1674 * If two ranges are adjacent, merge them. 1675 */ 1676 do { 1677 if (!*old_res) { 1678 *old_res = new_res; 1679 break; 1680 } 1681 1682 if (((*old_res)->end + 1) == new_res->start) { 1683 (*old_res)->end = new_res->end; 1684 kfree(new_res); 1685 break; 1686 } 1687 1688 if ((*old_res)->start == new_res->end + 1) { 1689 (*old_res)->start = new_res->start; 1690 kfree(new_res); 1691 break; 1692 } 1693 1694 if ((*old_res)->start > new_res->end) { 1695 new_res->sibling = *old_res; 1696 if (prev_res) 1697 (*prev_res)->sibling = new_res; 1698 *old_res = new_res; 1699 break; 1700 } 1701 1702 prev_res = old_res; 1703 old_res = &(*old_res)->sibling; 1704 1705 } while (1); 1706 1707 return AE_OK; 1708 } 1709 1710 static int vmbus_acpi_remove(struct acpi_device *device) 1711 { 1712 struct resource *cur_res; 1713 struct resource *next_res; 1714 1715 if (hyperv_mmio) { 1716 if (fb_mmio) { 1717 __release_region(hyperv_mmio, fb_mmio->start, 1718 resource_size(fb_mmio)); 1719 fb_mmio = NULL; 1720 } 1721 1722 for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { 1723 next_res = cur_res->sibling; 1724 kfree(cur_res); 1725 } 1726 } 1727 1728 return 0; 1729 } 1730 1731 static void vmbus_reserve_fb(void) 1732 { 1733 int size; 1734 /* 1735 * Make a claim for the frame buffer in the resource tree under the 1736 * first node, which will be the one below 4GB. The length seems to 1737 * be underreported, particularly in a Generation 1 VM. So start out 1738 * reserving a larger area and make it smaller until it succeeds. 1739 */ 1740 1741 if (screen_info.lfb_base) { 1742 if (efi_enabled(EFI_BOOT)) 1743 size = max_t(__u32, screen_info.lfb_size, 0x800000); 1744 else 1745 size = max_t(__u32, screen_info.lfb_size, 0x4000000); 1746 1747 for (; !fb_mmio && (size >= 0x100000); size >>= 1) { 1748 fb_mmio = __request_region(hyperv_mmio, 1749 screen_info.lfb_base, size, 1750 fb_mmio_name, 0); 1751 } 1752 } 1753 } 1754 1755 /** 1756 * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. 1757 * @new: If successful, supplied a pointer to the 1758 * allocated MMIO space. 1759 * @device_obj: Identifies the caller 1760 * @min: Minimum guest physical address of the 1761 * allocation 1762 * @max: Maximum guest physical address 1763 * @size: Size of the range to be allocated 1764 * @align: Alignment of the range to be allocated 1765 * @fb_overlap_ok: Whether this allocation can be allowed 1766 * to overlap the video frame buffer. 1767 * 1768 * This function walks the resources granted to VMBus by the 1769 * _CRS object in the ACPI namespace underneath the parent 1770 * "bridge" whether that's a root PCI bus in the Generation 1 1771 * case or a Module Device in the Generation 2 case. It then 1772 * attempts to allocate from the global MMIO pool in a way that 1773 * matches the constraints supplied in these parameters and by 1774 * that _CRS. 1775 * 1776 * Return: 0 on success, -errno on failure 1777 */ 1778 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, 1779 resource_size_t min, resource_size_t max, 1780 resource_size_t size, resource_size_t align, 1781 bool fb_overlap_ok) 1782 { 1783 struct resource *iter, *shadow; 1784 resource_size_t range_min, range_max, start; 1785 const char *dev_n = dev_name(&device_obj->device); 1786 int retval; 1787 1788 retval = -ENXIO; 1789 down(&hyperv_mmio_lock); 1790 1791 /* 1792 * If overlaps with frame buffers are allowed, then first attempt to 1793 * make the allocation from within the reserved region. Because it 1794 * is already reserved, no shadow allocation is necessary. 1795 */ 1796 if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) && 1797 !(max < fb_mmio->start)) { 1798 1799 range_min = fb_mmio->start; 1800 range_max = fb_mmio->end; 1801 start = (range_min + align - 1) & ~(align - 1); 1802 for (; start + size - 1 <= range_max; start += align) { 1803 *new = request_mem_region_exclusive(start, size, dev_n); 1804 if (*new) { 1805 retval = 0; 1806 goto exit; 1807 } 1808 } 1809 } 1810 1811 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 1812 if ((iter->start >= max) || (iter->end <= min)) 1813 continue; 1814 1815 range_min = iter->start; 1816 range_max = iter->end; 1817 start = (range_min + align - 1) & ~(align - 1); 1818 for (; start + size - 1 <= range_max; start += align) { 1819 shadow = __request_region(iter, start, size, NULL, 1820 IORESOURCE_BUSY); 1821 if (!shadow) 1822 continue; 1823 1824 *new = request_mem_region_exclusive(start, size, dev_n); 1825 if (*new) { 1826 shadow->name = (char *)*new; 1827 retval = 0; 1828 goto exit; 1829 } 1830 1831 __release_region(iter, start, size); 1832 } 1833 } 1834 1835 exit: 1836 up(&hyperv_mmio_lock); 1837 return retval; 1838 } 1839 EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); 1840 1841 /** 1842 * vmbus_free_mmio() - Free a memory-mapped I/O range. 1843 * @start: Base address of region to release. 1844 * @size: Size of the range to be allocated 1845 * 1846 * This function releases anything requested by 1847 * vmbus_mmio_allocate(). 1848 */ 1849 void vmbus_free_mmio(resource_size_t start, resource_size_t size) 1850 { 1851 struct resource *iter; 1852 1853 down(&hyperv_mmio_lock); 1854 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 1855 if ((iter->start >= start + size) || (iter->end <= start)) 1856 continue; 1857 1858 __release_region(iter, start, size); 1859 } 1860 release_mem_region(start, size); 1861 up(&hyperv_mmio_lock); 1862 1863 } 1864 EXPORT_SYMBOL_GPL(vmbus_free_mmio); 1865 1866 static int vmbus_acpi_add(struct acpi_device *device) 1867 { 1868 acpi_status result; 1869 int ret_val = -ENODEV; 1870 struct acpi_device *ancestor; 1871 1872 hv_acpi_dev = device; 1873 1874 result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, 1875 vmbus_walk_resources, NULL); 1876 1877 if (ACPI_FAILURE(result)) 1878 goto acpi_walk_err; 1879 /* 1880 * Some ancestor of the vmbus acpi device (Gen1 or Gen2 1881 * firmware) is the VMOD that has the mmio ranges. Get that. 1882 */ 1883 for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { 1884 result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, 1885 vmbus_walk_resources, NULL); 1886 1887 if (ACPI_FAILURE(result)) 1888 continue; 1889 if (hyperv_mmio) { 1890 vmbus_reserve_fb(); 1891 break; 1892 } 1893 } 1894 ret_val = 0; 1895 1896 acpi_walk_err: 1897 complete(&probe_event); 1898 if (ret_val) 1899 vmbus_acpi_remove(device); 1900 return ret_val; 1901 } 1902 1903 static const struct acpi_device_id vmbus_acpi_device_ids[] = { 1904 {"VMBUS", 0}, 1905 {"VMBus", 0}, 1906 {"", 0}, 1907 }; 1908 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); 1909 1910 static struct acpi_driver vmbus_acpi_driver = { 1911 .name = "vmbus", 1912 .ids = vmbus_acpi_device_ids, 1913 .ops = { 1914 .add = vmbus_acpi_add, 1915 .remove = vmbus_acpi_remove, 1916 }, 1917 }; 1918 1919 static void hv_kexec_handler(void) 1920 { 1921 hv_synic_clockevents_cleanup(); 1922 vmbus_initiate_unload(false); 1923 vmbus_connection.conn_state = DISCONNECTED; 1924 /* Make sure conn_state is set as hv_synic_cleanup checks for it */ 1925 mb(); 1926 cpuhp_remove_state(hyperv_cpuhp_online); 1927 hyperv_cleanup(); 1928 }; 1929 1930 static void hv_crash_handler(struct pt_regs *regs) 1931 { 1932 vmbus_initiate_unload(true); 1933 /* 1934 * In crash handler we can't schedule synic cleanup for all CPUs, 1935 * doing the cleanup for current CPU only. This should be sufficient 1936 * for kdump. 1937 */ 1938 vmbus_connection.conn_state = DISCONNECTED; 1939 hv_synic_cleanup(smp_processor_id()); 1940 hyperv_cleanup(); 1941 }; 1942 1943 static int __init hv_acpi_init(void) 1944 { 1945 int ret, t; 1946 1947 if (!hv_is_hyperv_initialized()) 1948 return -ENODEV; 1949 1950 init_completion(&probe_event); 1951 1952 /* 1953 * Get ACPI resources first. 1954 */ 1955 ret = acpi_bus_register_driver(&vmbus_acpi_driver); 1956 1957 if (ret) 1958 return ret; 1959 1960 t = wait_for_completion_timeout(&probe_event, 5*HZ); 1961 if (t == 0) { 1962 ret = -ETIMEDOUT; 1963 goto cleanup; 1964 } 1965 1966 ret = vmbus_bus_init(); 1967 if (ret) 1968 goto cleanup; 1969 1970 hv_setup_kexec_handler(hv_kexec_handler); 1971 hv_setup_crash_handler(hv_crash_handler); 1972 1973 return 0; 1974 1975 cleanup: 1976 acpi_bus_unregister_driver(&vmbus_acpi_driver); 1977 hv_acpi_dev = NULL; 1978 return ret; 1979 } 1980 1981 static void __exit vmbus_exit(void) 1982 { 1983 int cpu; 1984 1985 hv_remove_kexec_handler(); 1986 hv_remove_crash_handler(); 1987 vmbus_connection.conn_state = DISCONNECTED; 1988 hv_synic_clockevents_cleanup(); 1989 vmbus_disconnect(); 1990 hv_remove_vmbus_irq(); 1991 for_each_online_cpu(cpu) { 1992 struct hv_per_cpu_context *hv_cpu 1993 = per_cpu_ptr(hv_context.cpu_context, cpu); 1994 1995 tasklet_kill(&hv_cpu->msg_dpc); 1996 } 1997 vmbus_free_channels(); 1998 1999 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 2000 kmsg_dump_unregister(&hv_kmsg_dumper); 2001 unregister_die_notifier(&hyperv_die_block); 2002 atomic_notifier_chain_unregister(&panic_notifier_list, 2003 &hyperv_panic_block); 2004 } 2005 2006 free_page((unsigned long)hv_panic_page); 2007 unregister_sysctl_table(hv_ctl_table_hdr); 2008 hv_ctl_table_hdr = NULL; 2009 bus_unregister(&hv_bus); 2010 2011 cpuhp_remove_state(hyperv_cpuhp_online); 2012 hv_synic_free(); 2013 acpi_bus_unregister_driver(&vmbus_acpi_driver); 2014 } 2015 2016 2017 MODULE_LICENSE("GPL"); 2018 2019 subsys_initcall(hv_acpi_init); 2020 module_exit(vmbus_exit); 2021