1 /* 2 * Copyright (c) 2009, Microsoft Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 15 * Place - Suite 330, Boston, MA 02111-1307 USA. 16 * 17 * Authors: 18 * Haiyang Zhang <haiyangz@microsoft.com> 19 * Hank Janssen <hjanssen@microsoft.com> 20 * K. Y. Srinivasan <kys@microsoft.com> 21 * 22 */ 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/init.h> 26 #include <linux/module.h> 27 #include <linux/device.h> 28 #include <linux/interrupt.h> 29 #include <linux/sysctl.h> 30 #include <linux/slab.h> 31 #include <linux/acpi.h> 32 #include <linux/completion.h> 33 #include <linux/hyperv.h> 34 #include <linux/kernel_stat.h> 35 #include <linux/clockchips.h> 36 #include <linux/cpu.h> 37 #include <asm/hyperv.h> 38 #include <asm/hypervisor.h> 39 #include <asm/mshyperv.h> 40 #include <linux/notifier.h> 41 #include <linux/ptrace.h> 42 #include <linux/screen_info.h> 43 #include <linux/kdebug.h> 44 #include "hyperv_vmbus.h" 45 46 static struct acpi_device *hv_acpi_dev; 47 48 static struct tasklet_struct msg_dpc; 49 static struct completion probe_event; 50 51 52 static void hyperv_report_panic(struct pt_regs *regs) 53 { 54 static bool panic_reported; 55 56 /* 57 * We prefer to report panic on 'die' chain as we have proper 58 * registers to report, but if we miss it (e.g. on BUG()) we need 59 * to report it on 'panic'. 60 */ 61 if (panic_reported) 62 return; 63 panic_reported = true; 64 65 wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip); 66 wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax); 67 wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx); 68 wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx); 69 wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx); 70 71 /* 72 * Let Hyper-V know there is crash data available 73 */ 74 wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); 75 } 76 77 static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, 78 void *args) 79 { 80 struct pt_regs *regs; 81 82 regs = current_pt_regs(); 83 84 hyperv_report_panic(regs); 85 return NOTIFY_DONE; 86 } 87 88 static int hyperv_die_event(struct notifier_block *nb, unsigned long val, 89 void *args) 90 { 91 struct die_args *die = (struct die_args *)args; 92 struct pt_regs *regs = die->regs; 93 94 hyperv_report_panic(regs); 95 return NOTIFY_DONE; 96 } 97 98 static struct notifier_block hyperv_die_block = { 99 .notifier_call = hyperv_die_event, 100 }; 101 static struct notifier_block hyperv_panic_block = { 102 .notifier_call = hyperv_panic_event, 103 }; 104 105 struct resource *hyperv_mmio; 106 107 static int vmbus_exists(void) 108 { 109 if (hv_acpi_dev == NULL) 110 return -ENODEV; 111 112 return 0; 113 } 114 115 #define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2) 116 static void print_alias_name(struct hv_device *hv_dev, char *alias_name) 117 { 118 int i; 119 for (i = 0; i < VMBUS_ALIAS_LEN; i += 2) 120 sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]); 121 } 122 123 static u8 channel_monitor_group(struct vmbus_channel *channel) 124 { 125 return (u8)channel->offermsg.monitorid / 32; 126 } 127 128 static u8 channel_monitor_offset(struct vmbus_channel *channel) 129 { 130 return (u8)channel->offermsg.monitorid % 32; 131 } 132 133 static u32 channel_pending(struct vmbus_channel *channel, 134 struct hv_monitor_page *monitor_page) 135 { 136 u8 monitor_group = channel_monitor_group(channel); 137 return monitor_page->trigger_group[monitor_group].pending; 138 } 139 140 static u32 channel_latency(struct vmbus_channel *channel, 141 struct hv_monitor_page *monitor_page) 142 { 143 u8 monitor_group = channel_monitor_group(channel); 144 u8 monitor_offset = channel_monitor_offset(channel); 145 return monitor_page->latency[monitor_group][monitor_offset]; 146 } 147 148 static u32 channel_conn_id(struct vmbus_channel *channel, 149 struct hv_monitor_page *monitor_page) 150 { 151 u8 monitor_group = channel_monitor_group(channel); 152 u8 monitor_offset = channel_monitor_offset(channel); 153 return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; 154 } 155 156 static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, 157 char *buf) 158 { 159 struct hv_device *hv_dev = device_to_hv_device(dev); 160 161 if (!hv_dev->channel) 162 return -ENODEV; 163 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); 164 } 165 static DEVICE_ATTR_RO(id); 166 167 static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, 168 char *buf) 169 { 170 struct hv_device *hv_dev = device_to_hv_device(dev); 171 172 if (!hv_dev->channel) 173 return -ENODEV; 174 return sprintf(buf, "%d\n", hv_dev->channel->state); 175 } 176 static DEVICE_ATTR_RO(state); 177 178 static ssize_t monitor_id_show(struct device *dev, 179 struct device_attribute *dev_attr, char *buf) 180 { 181 struct hv_device *hv_dev = device_to_hv_device(dev); 182 183 if (!hv_dev->channel) 184 return -ENODEV; 185 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid); 186 } 187 static DEVICE_ATTR_RO(monitor_id); 188 189 static ssize_t class_id_show(struct device *dev, 190 struct device_attribute *dev_attr, char *buf) 191 { 192 struct hv_device *hv_dev = device_to_hv_device(dev); 193 194 if (!hv_dev->channel) 195 return -ENODEV; 196 return sprintf(buf, "{%pUl}\n", 197 hv_dev->channel->offermsg.offer.if_type.b); 198 } 199 static DEVICE_ATTR_RO(class_id); 200 201 static ssize_t device_id_show(struct device *dev, 202 struct device_attribute *dev_attr, char *buf) 203 { 204 struct hv_device *hv_dev = device_to_hv_device(dev); 205 206 if (!hv_dev->channel) 207 return -ENODEV; 208 return sprintf(buf, "{%pUl}\n", 209 hv_dev->channel->offermsg.offer.if_instance.b); 210 } 211 static DEVICE_ATTR_RO(device_id); 212 213 static ssize_t modalias_show(struct device *dev, 214 struct device_attribute *dev_attr, char *buf) 215 { 216 struct hv_device *hv_dev = device_to_hv_device(dev); 217 char alias_name[VMBUS_ALIAS_LEN + 1]; 218 219 print_alias_name(hv_dev, alias_name); 220 return sprintf(buf, "vmbus:%s\n", alias_name); 221 } 222 static DEVICE_ATTR_RO(modalias); 223 224 static ssize_t server_monitor_pending_show(struct device *dev, 225 struct device_attribute *dev_attr, 226 char *buf) 227 { 228 struct hv_device *hv_dev = device_to_hv_device(dev); 229 230 if (!hv_dev->channel) 231 return -ENODEV; 232 return sprintf(buf, "%d\n", 233 channel_pending(hv_dev->channel, 234 vmbus_connection.monitor_pages[1])); 235 } 236 static DEVICE_ATTR_RO(server_monitor_pending); 237 238 static ssize_t client_monitor_pending_show(struct device *dev, 239 struct device_attribute *dev_attr, 240 char *buf) 241 { 242 struct hv_device *hv_dev = device_to_hv_device(dev); 243 244 if (!hv_dev->channel) 245 return -ENODEV; 246 return sprintf(buf, "%d\n", 247 channel_pending(hv_dev->channel, 248 vmbus_connection.monitor_pages[1])); 249 } 250 static DEVICE_ATTR_RO(client_monitor_pending); 251 252 static ssize_t server_monitor_latency_show(struct device *dev, 253 struct device_attribute *dev_attr, 254 char *buf) 255 { 256 struct hv_device *hv_dev = device_to_hv_device(dev); 257 258 if (!hv_dev->channel) 259 return -ENODEV; 260 return sprintf(buf, "%d\n", 261 channel_latency(hv_dev->channel, 262 vmbus_connection.monitor_pages[0])); 263 } 264 static DEVICE_ATTR_RO(server_monitor_latency); 265 266 static ssize_t client_monitor_latency_show(struct device *dev, 267 struct device_attribute *dev_attr, 268 char *buf) 269 { 270 struct hv_device *hv_dev = device_to_hv_device(dev); 271 272 if (!hv_dev->channel) 273 return -ENODEV; 274 return sprintf(buf, "%d\n", 275 channel_latency(hv_dev->channel, 276 vmbus_connection.monitor_pages[1])); 277 } 278 static DEVICE_ATTR_RO(client_monitor_latency); 279 280 static ssize_t server_monitor_conn_id_show(struct device *dev, 281 struct device_attribute *dev_attr, 282 char *buf) 283 { 284 struct hv_device *hv_dev = device_to_hv_device(dev); 285 286 if (!hv_dev->channel) 287 return -ENODEV; 288 return sprintf(buf, "%d\n", 289 channel_conn_id(hv_dev->channel, 290 vmbus_connection.monitor_pages[0])); 291 } 292 static DEVICE_ATTR_RO(server_monitor_conn_id); 293 294 static ssize_t client_monitor_conn_id_show(struct device *dev, 295 struct device_attribute *dev_attr, 296 char *buf) 297 { 298 struct hv_device *hv_dev = device_to_hv_device(dev); 299 300 if (!hv_dev->channel) 301 return -ENODEV; 302 return sprintf(buf, "%d\n", 303 channel_conn_id(hv_dev->channel, 304 vmbus_connection.monitor_pages[1])); 305 } 306 static DEVICE_ATTR_RO(client_monitor_conn_id); 307 308 static ssize_t out_intr_mask_show(struct device *dev, 309 struct device_attribute *dev_attr, char *buf) 310 { 311 struct hv_device *hv_dev = device_to_hv_device(dev); 312 struct hv_ring_buffer_debug_info outbound; 313 314 if (!hv_dev->channel) 315 return -ENODEV; 316 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 317 return sprintf(buf, "%d\n", outbound.current_interrupt_mask); 318 } 319 static DEVICE_ATTR_RO(out_intr_mask); 320 321 static ssize_t out_read_index_show(struct device *dev, 322 struct device_attribute *dev_attr, char *buf) 323 { 324 struct hv_device *hv_dev = device_to_hv_device(dev); 325 struct hv_ring_buffer_debug_info outbound; 326 327 if (!hv_dev->channel) 328 return -ENODEV; 329 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 330 return sprintf(buf, "%d\n", outbound.current_read_index); 331 } 332 static DEVICE_ATTR_RO(out_read_index); 333 334 static ssize_t out_write_index_show(struct device *dev, 335 struct device_attribute *dev_attr, 336 char *buf) 337 { 338 struct hv_device *hv_dev = device_to_hv_device(dev); 339 struct hv_ring_buffer_debug_info outbound; 340 341 if (!hv_dev->channel) 342 return -ENODEV; 343 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 344 return sprintf(buf, "%d\n", outbound.current_write_index); 345 } 346 static DEVICE_ATTR_RO(out_write_index); 347 348 static ssize_t out_read_bytes_avail_show(struct device *dev, 349 struct device_attribute *dev_attr, 350 char *buf) 351 { 352 struct hv_device *hv_dev = device_to_hv_device(dev); 353 struct hv_ring_buffer_debug_info outbound; 354 355 if (!hv_dev->channel) 356 return -ENODEV; 357 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 358 return sprintf(buf, "%d\n", outbound.bytes_avail_toread); 359 } 360 static DEVICE_ATTR_RO(out_read_bytes_avail); 361 362 static ssize_t out_write_bytes_avail_show(struct device *dev, 363 struct device_attribute *dev_attr, 364 char *buf) 365 { 366 struct hv_device *hv_dev = device_to_hv_device(dev); 367 struct hv_ring_buffer_debug_info outbound; 368 369 if (!hv_dev->channel) 370 return -ENODEV; 371 hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); 372 return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); 373 } 374 static DEVICE_ATTR_RO(out_write_bytes_avail); 375 376 static ssize_t in_intr_mask_show(struct device *dev, 377 struct device_attribute *dev_attr, char *buf) 378 { 379 struct hv_device *hv_dev = device_to_hv_device(dev); 380 struct hv_ring_buffer_debug_info inbound; 381 382 if (!hv_dev->channel) 383 return -ENODEV; 384 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 385 return sprintf(buf, "%d\n", inbound.current_interrupt_mask); 386 } 387 static DEVICE_ATTR_RO(in_intr_mask); 388 389 static ssize_t in_read_index_show(struct device *dev, 390 struct device_attribute *dev_attr, char *buf) 391 { 392 struct hv_device *hv_dev = device_to_hv_device(dev); 393 struct hv_ring_buffer_debug_info inbound; 394 395 if (!hv_dev->channel) 396 return -ENODEV; 397 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 398 return sprintf(buf, "%d\n", inbound.current_read_index); 399 } 400 static DEVICE_ATTR_RO(in_read_index); 401 402 static ssize_t in_write_index_show(struct device *dev, 403 struct device_attribute *dev_attr, char *buf) 404 { 405 struct hv_device *hv_dev = device_to_hv_device(dev); 406 struct hv_ring_buffer_debug_info inbound; 407 408 if (!hv_dev->channel) 409 return -ENODEV; 410 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 411 return sprintf(buf, "%d\n", inbound.current_write_index); 412 } 413 static DEVICE_ATTR_RO(in_write_index); 414 415 static ssize_t in_read_bytes_avail_show(struct device *dev, 416 struct device_attribute *dev_attr, 417 char *buf) 418 { 419 struct hv_device *hv_dev = device_to_hv_device(dev); 420 struct hv_ring_buffer_debug_info inbound; 421 422 if (!hv_dev->channel) 423 return -ENODEV; 424 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 425 return sprintf(buf, "%d\n", inbound.bytes_avail_toread); 426 } 427 static DEVICE_ATTR_RO(in_read_bytes_avail); 428 429 static ssize_t in_write_bytes_avail_show(struct device *dev, 430 struct device_attribute *dev_attr, 431 char *buf) 432 { 433 struct hv_device *hv_dev = device_to_hv_device(dev); 434 struct hv_ring_buffer_debug_info inbound; 435 436 if (!hv_dev->channel) 437 return -ENODEV; 438 hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 439 return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); 440 } 441 static DEVICE_ATTR_RO(in_write_bytes_avail); 442 443 static ssize_t channel_vp_mapping_show(struct device *dev, 444 struct device_attribute *dev_attr, 445 char *buf) 446 { 447 struct hv_device *hv_dev = device_to_hv_device(dev); 448 struct vmbus_channel *channel = hv_dev->channel, *cur_sc; 449 unsigned long flags; 450 int buf_size = PAGE_SIZE, n_written, tot_written; 451 struct list_head *cur; 452 453 if (!channel) 454 return -ENODEV; 455 456 tot_written = snprintf(buf, buf_size, "%u:%u\n", 457 channel->offermsg.child_relid, channel->target_cpu); 458 459 spin_lock_irqsave(&channel->lock, flags); 460 461 list_for_each(cur, &channel->sc_list) { 462 if (tot_written >= buf_size - 1) 463 break; 464 465 cur_sc = list_entry(cur, struct vmbus_channel, sc_list); 466 n_written = scnprintf(buf + tot_written, 467 buf_size - tot_written, 468 "%u:%u\n", 469 cur_sc->offermsg.child_relid, 470 cur_sc->target_cpu); 471 tot_written += n_written; 472 } 473 474 spin_unlock_irqrestore(&channel->lock, flags); 475 476 return tot_written; 477 } 478 static DEVICE_ATTR_RO(channel_vp_mapping); 479 480 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ 481 static struct attribute *vmbus_attrs[] = { 482 &dev_attr_id.attr, 483 &dev_attr_state.attr, 484 &dev_attr_monitor_id.attr, 485 &dev_attr_class_id.attr, 486 &dev_attr_device_id.attr, 487 &dev_attr_modalias.attr, 488 &dev_attr_server_monitor_pending.attr, 489 &dev_attr_client_monitor_pending.attr, 490 &dev_attr_server_monitor_latency.attr, 491 &dev_attr_client_monitor_latency.attr, 492 &dev_attr_server_monitor_conn_id.attr, 493 &dev_attr_client_monitor_conn_id.attr, 494 &dev_attr_out_intr_mask.attr, 495 &dev_attr_out_read_index.attr, 496 &dev_attr_out_write_index.attr, 497 &dev_attr_out_read_bytes_avail.attr, 498 &dev_attr_out_write_bytes_avail.attr, 499 &dev_attr_in_intr_mask.attr, 500 &dev_attr_in_read_index.attr, 501 &dev_attr_in_write_index.attr, 502 &dev_attr_in_read_bytes_avail.attr, 503 &dev_attr_in_write_bytes_avail.attr, 504 &dev_attr_channel_vp_mapping.attr, 505 NULL, 506 }; 507 ATTRIBUTE_GROUPS(vmbus); 508 509 /* 510 * vmbus_uevent - add uevent for our device 511 * 512 * This routine is invoked when a device is added or removed on the vmbus to 513 * generate a uevent to udev in the userspace. The udev will then look at its 514 * rule and the uevent generated here to load the appropriate driver 515 * 516 * The alias string will be of the form vmbus:guid where guid is the string 517 * representation of the device guid (each byte of the guid will be 518 * represented with two hex characters. 519 */ 520 static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) 521 { 522 struct hv_device *dev = device_to_hv_device(device); 523 int ret; 524 char alias_name[VMBUS_ALIAS_LEN + 1]; 525 526 print_alias_name(dev, alias_name); 527 ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name); 528 return ret; 529 } 530 531 static const uuid_le null_guid; 532 533 static inline bool is_null_guid(const uuid_le *guid) 534 { 535 if (uuid_le_cmp(*guid, null_guid)) 536 return false; 537 return true; 538 } 539 540 /* 541 * Return a matching hv_vmbus_device_id pointer. 542 * If there is no match, return NULL. 543 */ 544 static const struct hv_vmbus_device_id *hv_vmbus_get_id( 545 const struct hv_vmbus_device_id *id, 546 const uuid_le *guid) 547 { 548 for (; !is_null_guid(&id->guid); id++) 549 if (!uuid_le_cmp(id->guid, *guid)) 550 return id; 551 552 return NULL; 553 } 554 555 556 557 /* 558 * vmbus_match - Attempt to match the specified device to the specified driver 559 */ 560 static int vmbus_match(struct device *device, struct device_driver *driver) 561 { 562 struct hv_driver *drv = drv_to_hv_drv(driver); 563 struct hv_device *hv_dev = device_to_hv_device(device); 564 565 if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) 566 return 1; 567 568 return 0; 569 } 570 571 /* 572 * vmbus_probe - Add the new vmbus's child device 573 */ 574 static int vmbus_probe(struct device *child_device) 575 { 576 int ret = 0; 577 struct hv_driver *drv = 578 drv_to_hv_drv(child_device->driver); 579 struct hv_device *dev = device_to_hv_device(child_device); 580 const struct hv_vmbus_device_id *dev_id; 581 582 dev_id = hv_vmbus_get_id(drv->id_table, &dev->dev_type); 583 if (drv->probe) { 584 ret = drv->probe(dev, dev_id); 585 if (ret != 0) 586 pr_err("probe failed for device %s (%d)\n", 587 dev_name(child_device), ret); 588 589 } else { 590 pr_err("probe not set for driver %s\n", 591 dev_name(child_device)); 592 ret = -ENODEV; 593 } 594 return ret; 595 } 596 597 /* 598 * vmbus_remove - Remove a vmbus device 599 */ 600 static int vmbus_remove(struct device *child_device) 601 { 602 struct hv_driver *drv; 603 struct hv_device *dev = device_to_hv_device(child_device); 604 605 if (child_device->driver) { 606 drv = drv_to_hv_drv(child_device->driver); 607 if (drv->remove) 608 drv->remove(dev); 609 } 610 611 return 0; 612 } 613 614 615 /* 616 * vmbus_shutdown - Shutdown a vmbus device 617 */ 618 static void vmbus_shutdown(struct device *child_device) 619 { 620 struct hv_driver *drv; 621 struct hv_device *dev = device_to_hv_device(child_device); 622 623 624 /* The device may not be attached yet */ 625 if (!child_device->driver) 626 return; 627 628 drv = drv_to_hv_drv(child_device->driver); 629 630 if (drv->shutdown) 631 drv->shutdown(dev); 632 633 return; 634 } 635 636 637 /* 638 * vmbus_device_release - Final callback release of the vmbus child device 639 */ 640 static void vmbus_device_release(struct device *device) 641 { 642 struct hv_device *hv_dev = device_to_hv_device(device); 643 struct vmbus_channel *channel = hv_dev->channel; 644 645 hv_process_channel_removal(channel, 646 channel->offermsg.child_relid); 647 kfree(hv_dev); 648 649 } 650 651 /* The one and only one */ 652 static struct bus_type hv_bus = { 653 .name = "vmbus", 654 .match = vmbus_match, 655 .shutdown = vmbus_shutdown, 656 .remove = vmbus_remove, 657 .probe = vmbus_probe, 658 .uevent = vmbus_uevent, 659 .dev_groups = vmbus_groups, 660 }; 661 662 struct onmessage_work_context { 663 struct work_struct work; 664 struct hv_message msg; 665 }; 666 667 static void vmbus_onmessage_work(struct work_struct *work) 668 { 669 struct onmessage_work_context *ctx; 670 671 /* Do not process messages if we're in DISCONNECTED state */ 672 if (vmbus_connection.conn_state == DISCONNECTED) 673 return; 674 675 ctx = container_of(work, struct onmessage_work_context, 676 work); 677 vmbus_onmessage(&ctx->msg); 678 kfree(ctx); 679 } 680 681 static void hv_process_timer_expiration(struct hv_message *msg, int cpu) 682 { 683 struct clock_event_device *dev = hv_context.clk_evt[cpu]; 684 685 if (dev->event_handler) 686 dev->event_handler(dev); 687 688 msg->header.message_type = HVMSG_NONE; 689 690 /* 691 * Make sure the write to MessageType (ie set to 692 * HVMSG_NONE) happens before we read the 693 * MessagePending and EOMing. Otherwise, the EOMing 694 * will not deliver any more messages since there is 695 * no empty slot 696 */ 697 mb(); 698 699 if (msg->header.message_flags.msg_pending) { 700 /* 701 * This will cause message queue rescan to 702 * possibly deliver another msg from the 703 * hypervisor 704 */ 705 wrmsrl(HV_X64_MSR_EOM, 0); 706 } 707 } 708 709 static void vmbus_on_msg_dpc(unsigned long data) 710 { 711 int cpu = smp_processor_id(); 712 void *page_addr = hv_context.synic_message_page[cpu]; 713 struct hv_message *msg = (struct hv_message *)page_addr + 714 VMBUS_MESSAGE_SINT; 715 struct vmbus_channel_message_header *hdr; 716 struct vmbus_channel_message_table_entry *entry; 717 struct onmessage_work_context *ctx; 718 719 while (1) { 720 if (msg->header.message_type == HVMSG_NONE) 721 /* no msg */ 722 break; 723 724 hdr = (struct vmbus_channel_message_header *)msg->u.payload; 725 726 if (hdr->msgtype >= CHANNELMSG_COUNT) { 727 WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); 728 goto msg_handled; 729 } 730 731 entry = &channel_message_table[hdr->msgtype]; 732 if (entry->handler_type == VMHT_BLOCKING) { 733 ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); 734 if (ctx == NULL) 735 continue; 736 737 INIT_WORK(&ctx->work, vmbus_onmessage_work); 738 memcpy(&ctx->msg, msg, sizeof(*msg)); 739 740 queue_work(vmbus_connection.work_queue, &ctx->work); 741 } else 742 entry->message_handler(hdr); 743 744 msg_handled: 745 msg->header.message_type = HVMSG_NONE; 746 747 /* 748 * Make sure the write to MessageType (ie set to 749 * HVMSG_NONE) happens before we read the 750 * MessagePending and EOMing. Otherwise, the EOMing 751 * will not deliver any more messages since there is 752 * no empty slot 753 */ 754 mb(); 755 756 if (msg->header.message_flags.msg_pending) { 757 /* 758 * This will cause message queue rescan to 759 * possibly deliver another msg from the 760 * hypervisor 761 */ 762 wrmsrl(HV_X64_MSR_EOM, 0); 763 } 764 } 765 } 766 767 static void vmbus_isr(void) 768 { 769 int cpu = smp_processor_id(); 770 void *page_addr; 771 struct hv_message *msg; 772 union hv_synic_event_flags *event; 773 bool handled = false; 774 775 page_addr = hv_context.synic_event_page[cpu]; 776 if (page_addr == NULL) 777 return; 778 779 event = (union hv_synic_event_flags *)page_addr + 780 VMBUS_MESSAGE_SINT; 781 /* 782 * Check for events before checking for messages. This is the order 783 * in which events and messages are checked in Windows guests on 784 * Hyper-V, and the Windows team suggested we do the same. 785 */ 786 787 if ((vmbus_proto_version == VERSION_WS2008) || 788 (vmbus_proto_version == VERSION_WIN7)) { 789 790 /* Since we are a child, we only need to check bit 0 */ 791 if (sync_test_and_clear_bit(0, 792 (unsigned long *) &event->flags32[0])) { 793 handled = true; 794 } 795 } else { 796 /* 797 * Our host is win8 or above. The signaling mechanism 798 * has changed and we can directly look at the event page. 799 * If bit n is set then we have an interrup on the channel 800 * whose id is n. 801 */ 802 handled = true; 803 } 804 805 if (handled) 806 tasklet_schedule(hv_context.event_dpc[cpu]); 807 808 809 page_addr = hv_context.synic_message_page[cpu]; 810 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 811 812 /* Check if there are actual msgs to be processed */ 813 if (msg->header.message_type != HVMSG_NONE) { 814 if (msg->header.message_type == HVMSG_TIMER_EXPIRED) 815 hv_process_timer_expiration(msg, cpu); 816 else 817 tasklet_schedule(&msg_dpc); 818 } 819 } 820 821 822 /* 823 * vmbus_bus_init -Main vmbus driver initialization routine. 824 * 825 * Here, we 826 * - initialize the vmbus driver context 827 * - invoke the vmbus hv main init routine 828 * - retrieve the channel offers 829 */ 830 static int vmbus_bus_init(void) 831 { 832 int ret; 833 834 /* Hypervisor initialization...setup hypercall page..etc */ 835 ret = hv_init(); 836 if (ret != 0) { 837 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); 838 return ret; 839 } 840 841 tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0); 842 843 ret = bus_register(&hv_bus); 844 if (ret) 845 goto err_cleanup; 846 847 hv_setup_vmbus_irq(vmbus_isr); 848 849 ret = hv_synic_alloc(); 850 if (ret) 851 goto err_alloc; 852 /* 853 * Initialize the per-cpu interrupt state and 854 * connect to the host. 855 */ 856 on_each_cpu(hv_synic_init, NULL, 1); 857 ret = vmbus_connect(); 858 if (ret) 859 goto err_connect; 860 861 if (vmbus_proto_version > VERSION_WIN7) 862 cpu_hotplug_disable(); 863 864 /* 865 * Only register if the crash MSRs are available 866 */ 867 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 868 register_die_notifier(&hyperv_die_block); 869 atomic_notifier_chain_register(&panic_notifier_list, 870 &hyperv_panic_block); 871 } 872 873 vmbus_request_offers(); 874 875 return 0; 876 877 err_connect: 878 on_each_cpu(hv_synic_cleanup, NULL, 1); 879 err_alloc: 880 hv_synic_free(); 881 hv_remove_vmbus_irq(); 882 883 bus_unregister(&hv_bus); 884 885 err_cleanup: 886 hv_cleanup(); 887 888 return ret; 889 } 890 891 /** 892 * __vmbus_child_driver_register() - Register a vmbus's driver 893 * @hv_driver: Pointer to driver structure you want to register 894 * @owner: owner module of the drv 895 * @mod_name: module name string 896 * 897 * Registers the given driver with Linux through the 'driver_register()' call 898 * and sets up the hyper-v vmbus handling for this driver. 899 * It will return the state of the 'driver_register()' call. 900 * 901 */ 902 int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) 903 { 904 int ret; 905 906 pr_info("registering driver %s\n", hv_driver->name); 907 908 ret = vmbus_exists(); 909 if (ret < 0) 910 return ret; 911 912 hv_driver->driver.name = hv_driver->name; 913 hv_driver->driver.owner = owner; 914 hv_driver->driver.mod_name = mod_name; 915 hv_driver->driver.bus = &hv_bus; 916 917 ret = driver_register(&hv_driver->driver); 918 919 return ret; 920 } 921 EXPORT_SYMBOL_GPL(__vmbus_driver_register); 922 923 /** 924 * vmbus_driver_unregister() - Unregister a vmbus's driver 925 * @hv_driver: Pointer to driver structure you want to 926 * un-register 927 * 928 * Un-register the given driver that was previous registered with a call to 929 * vmbus_driver_register() 930 */ 931 void vmbus_driver_unregister(struct hv_driver *hv_driver) 932 { 933 pr_info("unregistering driver %s\n", hv_driver->name); 934 935 if (!vmbus_exists()) 936 driver_unregister(&hv_driver->driver); 937 } 938 EXPORT_SYMBOL_GPL(vmbus_driver_unregister); 939 940 /* 941 * vmbus_device_create - Creates and registers a new child device 942 * on the vmbus. 943 */ 944 struct hv_device *vmbus_device_create(const uuid_le *type, 945 const uuid_le *instance, 946 struct vmbus_channel *channel) 947 { 948 struct hv_device *child_device_obj; 949 950 child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); 951 if (!child_device_obj) { 952 pr_err("Unable to allocate device object for child device\n"); 953 return NULL; 954 } 955 956 child_device_obj->channel = channel; 957 memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); 958 memcpy(&child_device_obj->dev_instance, instance, 959 sizeof(uuid_le)); 960 961 962 return child_device_obj; 963 } 964 965 /* 966 * vmbus_device_register - Register the child device 967 */ 968 int vmbus_device_register(struct hv_device *child_device_obj) 969 { 970 int ret = 0; 971 972 dev_set_name(&child_device_obj->device, "vmbus_%d", 973 child_device_obj->channel->id); 974 975 child_device_obj->device.bus = &hv_bus; 976 child_device_obj->device.parent = &hv_acpi_dev->dev; 977 child_device_obj->device.release = vmbus_device_release; 978 979 /* 980 * Register with the LDM. This will kick off the driver/device 981 * binding...which will eventually call vmbus_match() and vmbus_probe() 982 */ 983 ret = device_register(&child_device_obj->device); 984 985 if (ret) 986 pr_err("Unable to register child device\n"); 987 else 988 pr_debug("child device %s registered\n", 989 dev_name(&child_device_obj->device)); 990 991 return ret; 992 } 993 994 /* 995 * vmbus_device_unregister - Remove the specified child device 996 * from the vmbus. 997 */ 998 void vmbus_device_unregister(struct hv_device *device_obj) 999 { 1000 pr_debug("child device %s unregistered\n", 1001 dev_name(&device_obj->device)); 1002 1003 /* 1004 * Kick off the process of unregistering the device. 1005 * This will call vmbus_remove() and eventually vmbus_device_release() 1006 */ 1007 device_unregister(&device_obj->device); 1008 } 1009 1010 1011 /* 1012 * VMBUS is an acpi enumerated device. Get the information we 1013 * need from DSDT. 1014 */ 1015 #define VTPM_BASE_ADDRESS 0xfed40000 1016 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) 1017 { 1018 resource_size_t start = 0; 1019 resource_size_t end = 0; 1020 struct resource *new_res; 1021 struct resource **old_res = &hyperv_mmio; 1022 struct resource **prev_res = NULL; 1023 1024 switch (res->type) { 1025 1026 /* 1027 * "Address" descriptors are for bus windows. Ignore 1028 * "memory" descriptors, which are for registers on 1029 * devices. 1030 */ 1031 case ACPI_RESOURCE_TYPE_ADDRESS32: 1032 start = res->data.address32.address.minimum; 1033 end = res->data.address32.address.maximum; 1034 break; 1035 1036 case ACPI_RESOURCE_TYPE_ADDRESS64: 1037 start = res->data.address64.address.minimum; 1038 end = res->data.address64.address.maximum; 1039 break; 1040 1041 default: 1042 /* Unused resource type */ 1043 return AE_OK; 1044 1045 } 1046 /* 1047 * Ignore ranges that are below 1MB, as they're not 1048 * necessary or useful here. 1049 */ 1050 if (end < 0x100000) 1051 return AE_OK; 1052 1053 new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); 1054 if (!new_res) 1055 return AE_NO_MEMORY; 1056 1057 /* If this range overlaps the virtual TPM, truncate it. */ 1058 if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) 1059 end = VTPM_BASE_ADDRESS; 1060 1061 new_res->name = "hyperv mmio"; 1062 new_res->flags = IORESOURCE_MEM; 1063 new_res->start = start; 1064 new_res->end = end; 1065 1066 /* 1067 * Stick ranges from higher in address space at the front of the list. 1068 * If two ranges are adjacent, merge them. 1069 */ 1070 do { 1071 if (!*old_res) { 1072 *old_res = new_res; 1073 break; 1074 } 1075 1076 if (((*old_res)->end + 1) == new_res->start) { 1077 (*old_res)->end = new_res->end; 1078 kfree(new_res); 1079 break; 1080 } 1081 1082 if ((*old_res)->start == new_res->end + 1) { 1083 (*old_res)->start = new_res->start; 1084 kfree(new_res); 1085 break; 1086 } 1087 1088 if ((*old_res)->end < new_res->start) { 1089 new_res->sibling = *old_res; 1090 if (prev_res) 1091 (*prev_res)->sibling = new_res; 1092 *old_res = new_res; 1093 break; 1094 } 1095 1096 prev_res = old_res; 1097 old_res = &(*old_res)->sibling; 1098 1099 } while (1); 1100 1101 return AE_OK; 1102 } 1103 1104 static int vmbus_acpi_remove(struct acpi_device *device) 1105 { 1106 struct resource *cur_res; 1107 struct resource *next_res; 1108 1109 if (hyperv_mmio) { 1110 for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { 1111 next_res = cur_res->sibling; 1112 kfree(cur_res); 1113 } 1114 } 1115 1116 return 0; 1117 } 1118 1119 /** 1120 * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. 1121 * @new: If successful, supplied a pointer to the 1122 * allocated MMIO space. 1123 * @device_obj: Identifies the caller 1124 * @min: Minimum guest physical address of the 1125 * allocation 1126 * @max: Maximum guest physical address 1127 * @size: Size of the range to be allocated 1128 * @align: Alignment of the range to be allocated 1129 * @fb_overlap_ok: Whether this allocation can be allowed 1130 * to overlap the video frame buffer. 1131 * 1132 * This function walks the resources granted to VMBus by the 1133 * _CRS object in the ACPI namespace underneath the parent 1134 * "bridge" whether that's a root PCI bus in the Generation 1 1135 * case or a Module Device in the Generation 2 case. It then 1136 * attempts to allocate from the global MMIO pool in a way that 1137 * matches the constraints supplied in these parameters and by 1138 * that _CRS. 1139 * 1140 * Return: 0 on success, -errno on failure 1141 */ 1142 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, 1143 resource_size_t min, resource_size_t max, 1144 resource_size_t size, resource_size_t align, 1145 bool fb_overlap_ok) 1146 { 1147 struct resource *iter; 1148 resource_size_t range_min, range_max, start, local_min, local_max; 1149 const char *dev_n = dev_name(&device_obj->device); 1150 u32 fb_end = screen_info.lfb_base + (screen_info.lfb_size << 1); 1151 int i; 1152 1153 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 1154 if ((iter->start >= max) || (iter->end <= min)) 1155 continue; 1156 1157 range_min = iter->start; 1158 range_max = iter->end; 1159 1160 /* If this range overlaps the frame buffer, split it into 1161 two tries. */ 1162 for (i = 0; i < 2; i++) { 1163 local_min = range_min; 1164 local_max = range_max; 1165 if (fb_overlap_ok || (range_min >= fb_end) || 1166 (range_max <= screen_info.lfb_base)) { 1167 i++; 1168 } else { 1169 if ((range_min <= screen_info.lfb_base) && 1170 (range_max >= screen_info.lfb_base)) { 1171 /* 1172 * The frame buffer is in this window, 1173 * so trim this into the part that 1174 * preceeds the frame buffer. 1175 */ 1176 local_max = screen_info.lfb_base - 1; 1177 range_min = fb_end; 1178 } else { 1179 range_min = fb_end; 1180 continue; 1181 } 1182 } 1183 1184 start = (local_min + align - 1) & ~(align - 1); 1185 for (; start + size - 1 <= local_max; start += align) { 1186 *new = request_mem_region_exclusive(start, size, 1187 dev_n); 1188 if (*new) 1189 return 0; 1190 } 1191 } 1192 } 1193 1194 return -ENXIO; 1195 } 1196 EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); 1197 1198 /** 1199 * vmbus_cpu_number_to_vp_number() - Map CPU to VP. 1200 * @cpu_number: CPU number in Linux terms 1201 * 1202 * This function returns the mapping between the Linux processor 1203 * number and the hypervisor's virtual processor number, useful 1204 * in making hypercalls and such that talk about specific 1205 * processors. 1206 * 1207 * Return: Virtual processor number in Hyper-V terms 1208 */ 1209 int vmbus_cpu_number_to_vp_number(int cpu_number) 1210 { 1211 return hv_context.vp_index[cpu_number]; 1212 } 1213 EXPORT_SYMBOL_GPL(vmbus_cpu_number_to_vp_number); 1214 1215 static int vmbus_acpi_add(struct acpi_device *device) 1216 { 1217 acpi_status result; 1218 int ret_val = -ENODEV; 1219 struct acpi_device *ancestor; 1220 1221 hv_acpi_dev = device; 1222 1223 result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, 1224 vmbus_walk_resources, NULL); 1225 1226 if (ACPI_FAILURE(result)) 1227 goto acpi_walk_err; 1228 /* 1229 * Some ancestor of the vmbus acpi device (Gen1 or Gen2 1230 * firmware) is the VMOD that has the mmio ranges. Get that. 1231 */ 1232 for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { 1233 result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, 1234 vmbus_walk_resources, NULL); 1235 1236 if (ACPI_FAILURE(result)) 1237 continue; 1238 if (hyperv_mmio) 1239 break; 1240 } 1241 ret_val = 0; 1242 1243 acpi_walk_err: 1244 complete(&probe_event); 1245 if (ret_val) 1246 vmbus_acpi_remove(device); 1247 return ret_val; 1248 } 1249 1250 static const struct acpi_device_id vmbus_acpi_device_ids[] = { 1251 {"VMBUS", 0}, 1252 {"VMBus", 0}, 1253 {"", 0}, 1254 }; 1255 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); 1256 1257 static struct acpi_driver vmbus_acpi_driver = { 1258 .name = "vmbus", 1259 .ids = vmbus_acpi_device_ids, 1260 .ops = { 1261 .add = vmbus_acpi_add, 1262 .remove = vmbus_acpi_remove, 1263 }, 1264 }; 1265 1266 static void hv_kexec_handler(void) 1267 { 1268 int cpu; 1269 1270 hv_synic_clockevents_cleanup(); 1271 vmbus_initiate_unload(); 1272 for_each_online_cpu(cpu) 1273 smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); 1274 hv_cleanup(); 1275 }; 1276 1277 static void hv_crash_handler(struct pt_regs *regs) 1278 { 1279 vmbus_initiate_unload(); 1280 /* 1281 * In crash handler we can't schedule synic cleanup for all CPUs, 1282 * doing the cleanup for current CPU only. This should be sufficient 1283 * for kdump. 1284 */ 1285 hv_synic_cleanup(NULL); 1286 hv_cleanup(); 1287 }; 1288 1289 static int __init hv_acpi_init(void) 1290 { 1291 int ret, t; 1292 1293 if (x86_hyper != &x86_hyper_ms_hyperv) 1294 return -ENODEV; 1295 1296 init_completion(&probe_event); 1297 1298 /* 1299 * Get ACPI resources first. 1300 */ 1301 ret = acpi_bus_register_driver(&vmbus_acpi_driver); 1302 1303 if (ret) 1304 return ret; 1305 1306 t = wait_for_completion_timeout(&probe_event, 5*HZ); 1307 if (t == 0) { 1308 ret = -ETIMEDOUT; 1309 goto cleanup; 1310 } 1311 1312 ret = vmbus_bus_init(); 1313 if (ret) 1314 goto cleanup; 1315 1316 hv_setup_kexec_handler(hv_kexec_handler); 1317 hv_setup_crash_handler(hv_crash_handler); 1318 1319 return 0; 1320 1321 cleanup: 1322 acpi_bus_unregister_driver(&vmbus_acpi_driver); 1323 hv_acpi_dev = NULL; 1324 return ret; 1325 } 1326 1327 static void __exit vmbus_exit(void) 1328 { 1329 int cpu; 1330 1331 hv_remove_kexec_handler(); 1332 hv_remove_crash_handler(); 1333 vmbus_connection.conn_state = DISCONNECTED; 1334 hv_synic_clockevents_cleanup(); 1335 vmbus_disconnect(); 1336 hv_remove_vmbus_irq(); 1337 tasklet_kill(&msg_dpc); 1338 vmbus_free_channels(); 1339 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 1340 unregister_die_notifier(&hyperv_die_block); 1341 atomic_notifier_chain_unregister(&panic_notifier_list, 1342 &hyperv_panic_block); 1343 } 1344 bus_unregister(&hv_bus); 1345 hv_cleanup(); 1346 for_each_online_cpu(cpu) { 1347 tasklet_kill(hv_context.event_dpc[cpu]); 1348 smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); 1349 } 1350 hv_synic_free(); 1351 acpi_bus_unregister_driver(&vmbus_acpi_driver); 1352 if (vmbus_proto_version > VERSION_WIN7) 1353 cpu_hotplug_enable(); 1354 } 1355 1356 1357 MODULE_LICENSE("GPL"); 1358 1359 subsys_initcall(hv_acpi_init); 1360 module_exit(vmbus_exit); 1361