1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VMware VMCI Driver 4 * 5 * Copyright (C) 2012 VMware, Inc. All rights reserved. 6 */ 7 8 #include <linux/vmw_vmci_defs.h> 9 #include <linux/vmw_vmci_api.h> 10 #include <linux/moduleparam.h> 11 #include <linux/interrupt.h> 12 #include <linux/highmem.h> 13 #include <linux/kernel.h> 14 #include <linux/mm.h> 15 #include <linux/module.h> 16 #include <linux/processor.h> 17 #include <linux/sched.h> 18 #include <linux/slab.h> 19 #include <linux/init.h> 20 #include <linux/pci.h> 21 #include <linux/smp.h> 22 #include <linux/io.h> 23 #include <linux/vmalloc.h> 24 25 #include "vmci_datagram.h" 26 #include "vmci_doorbell.h" 27 #include "vmci_context.h" 28 #include "vmci_driver.h" 29 #include "vmci_event.h" 30 31 #define PCI_DEVICE_ID_VMWARE_VMCI 0x0740 32 33 #define VMCI_UTIL_NUM_RESOURCES 1 34 35 /* 36 * Datagram buffers for DMA send/receive must accommodate at least 37 * a maximum sized datagram and the header. 38 */ 39 #define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE) 40 41 static bool vmci_disable_msi; 42 module_param_named(disable_msi, vmci_disable_msi, bool, 0); 43 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); 44 45 static bool vmci_disable_msix; 46 module_param_named(disable_msix, vmci_disable_msix, bool, 0); 47 MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)"); 48 49 static u32 ctx_update_sub_id = VMCI_INVALID_ID; 50 static u32 vm_context_id = VMCI_INVALID_ID; 51 52 struct vmci_guest_device { 53 struct device *dev; /* PCI device we are attached to */ 54 void __iomem *iobase; 55 void __iomem *mmio_base; 56 57 bool exclusive_vectors; 58 59 struct tasklet_struct datagram_tasklet; 60 struct tasklet_struct bm_tasklet; 61 struct wait_queue_head inout_wq; 62 63 void *data_buffer; 64 dma_addr_t data_buffer_base; 65 void *tx_buffer; 66 dma_addr_t tx_buffer_base; 67 void *notification_bitmap; 68 dma_addr_t notification_base; 69 }; 70 71 static bool use_ppn64; 72 73 bool vmci_use_ppn64(void) 74 { 75 return use_ppn64; 76 } 77 78 /* vmci_dev singleton device and supporting data*/ 79 struct pci_dev *vmci_pdev; 80 static struct vmci_guest_device *vmci_dev_g; 81 static DEFINE_SPINLOCK(vmci_dev_spinlock); 82 83 static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0); 84 85 bool vmci_guest_code_active(void) 86 { 87 return atomic_read(&vmci_num_guest_devices) != 0; 88 } 89 90 u32 vmci_get_vm_context_id(void) 91 { 92 if (vm_context_id == VMCI_INVALID_ID) { 93 struct vmci_datagram get_cid_msg; 94 get_cid_msg.dst = 95 vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, 96 VMCI_GET_CONTEXT_ID); 97 get_cid_msg.src = VMCI_ANON_SRC_HANDLE; 98 get_cid_msg.payload_size = 0; 99 vm_context_id = vmci_send_datagram(&get_cid_msg); 100 } 101 return vm_context_id; 102 } 103 104 static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg) 105 { 106 if (dev->mmio_base != NULL) 107 return readl(dev->mmio_base + reg); 108 return ioread32(dev->iobase + reg); 109 } 110 111 static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg) 112 { 113 if (dev->mmio_base != NULL) 114 writel(val, dev->mmio_base + reg); 115 else 116 iowrite32(val, dev->iobase + reg); 117 } 118 119 static void vmci_read_data(struct vmci_guest_device *vmci_dev, 120 void *dest, size_t size) 121 { 122 if (vmci_dev->mmio_base == NULL) 123 ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, 124 dest, size); 125 else { 126 /* 127 * For DMA datagrams, the data_buffer will contain the header on the 128 * first page, followed by the incoming datagram(s) on the following 129 * pages. The header uses an S/G element immediately following the 130 * header on the first page to point to the data area. 131 */ 132 struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer; 133 struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1); 134 size_t buffer_offset = dest - vmci_dev->data_buffer; 135 136 buffer_header->opcode = 1; 137 buffer_header->size = 1; 138 buffer_header->busy = 0; 139 sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset; 140 sg_array[0].size = size; 141 142 vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base), 143 VMCI_DATA_IN_LOW_ADDR); 144 145 wait_event(vmci_dev->inout_wq, buffer_header->busy == 1); 146 } 147 } 148 149 static int vmci_write_data(struct vmci_guest_device *dev, 150 struct vmci_datagram *dg) 151 { 152 int result; 153 154 if (dev->mmio_base != NULL) { 155 struct vmci_data_in_out_header *buffer_header = dev->tx_buffer; 156 u8 *dg_out_buffer = (u8 *)(buffer_header + 1); 157 158 if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) 159 return VMCI_ERROR_INVALID_ARGS; 160 161 /* 162 * Initialize send buffer with outgoing datagram 163 * and set up header for inline data. Device will 164 * not access buffer asynchronously - only after 165 * the write to VMCI_DATA_OUT_LOW_ADDR. 166 */ 167 memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg)); 168 buffer_header->opcode = 0; 169 buffer_header->size = VMCI_DG_SIZE(dg); 170 buffer_header->busy = 1; 171 172 vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base), 173 VMCI_DATA_OUT_LOW_ADDR); 174 175 /* Caller holds a spinlock, so cannot block. */ 176 spin_until_cond(buffer_header->busy == 0); 177 178 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 179 if (result == VMCI_SUCCESS) 180 result = (int)buffer_header->result; 181 } else { 182 iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR, 183 dg, VMCI_DG_SIZE(dg)); 184 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 185 } 186 187 return result; 188 } 189 190 /* 191 * VM to hypervisor call mechanism. We use the standard VMware naming 192 * convention since shared code is calling this function as well. 193 */ 194 int vmci_send_datagram(struct vmci_datagram *dg) 195 { 196 unsigned long flags; 197 int result; 198 199 /* Check args. */ 200 if (dg == NULL) 201 return VMCI_ERROR_INVALID_ARGS; 202 203 /* 204 * Need to acquire spinlock on the device because the datagram 205 * data may be spread over multiple pages and the monitor may 206 * interleave device user rpc calls from multiple 207 * VCPUs. Acquiring the spinlock precludes that 208 * possibility. Disabling interrupts to avoid incoming 209 * datagrams during a "rep out" and possibly landing up in 210 * this function. 211 */ 212 spin_lock_irqsave(&vmci_dev_spinlock, flags); 213 214 if (vmci_dev_g) { 215 vmci_write_data(vmci_dev_g, dg); 216 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 217 } else { 218 result = VMCI_ERROR_UNAVAILABLE; 219 } 220 221 spin_unlock_irqrestore(&vmci_dev_spinlock, flags); 222 223 return result; 224 } 225 EXPORT_SYMBOL_GPL(vmci_send_datagram); 226 227 /* 228 * Gets called with the new context id if updated or resumed. 229 * Context id. 230 */ 231 static void vmci_guest_cid_update(u32 sub_id, 232 const struct vmci_event_data *event_data, 233 void *client_data) 234 { 235 const struct vmci_event_payld_ctx *ev_payload = 236 vmci_event_data_const_payload(event_data); 237 238 if (sub_id != ctx_update_sub_id) { 239 pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id); 240 return; 241 } 242 243 if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) { 244 pr_devel("Invalid event data\n"); 245 return; 246 } 247 248 pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n", 249 vm_context_id, ev_payload->context_id, event_data->event); 250 251 vm_context_id = ev_payload->context_id; 252 } 253 254 /* 255 * Verify that the host supports the hypercalls we need. If it does not, 256 * try to find fallback hypercalls and use those instead. Returns 0 if 257 * required hypercalls (or fallback hypercalls) are supported by the host, 258 * an error code otherwise. 259 */ 260 static int vmci_check_host_caps(struct pci_dev *pdev) 261 { 262 bool result; 263 struct vmci_resource_query_msg *msg; 264 u32 msg_size = sizeof(struct vmci_resource_query_hdr) + 265 VMCI_UTIL_NUM_RESOURCES * sizeof(u32); 266 struct vmci_datagram *check_msg; 267 268 check_msg = kzalloc(msg_size, GFP_KERNEL); 269 if (!check_msg) { 270 dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__); 271 return -ENOMEM; 272 } 273 274 check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, 275 VMCI_RESOURCES_QUERY); 276 check_msg->src = VMCI_ANON_SRC_HANDLE; 277 check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE; 278 msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg); 279 280 msg->num_resources = VMCI_UTIL_NUM_RESOURCES; 281 msg->resources[0] = VMCI_GET_CONTEXT_ID; 282 283 /* Checks that hyper calls are supported */ 284 result = vmci_send_datagram(check_msg) == 0x01; 285 kfree(check_msg); 286 287 dev_dbg(&pdev->dev, "%s: Host capability check: %s\n", 288 __func__, result ? "PASSED" : "FAILED"); 289 290 /* We need the vector. There are no fallbacks. */ 291 return result ? 0 : -ENXIO; 292 } 293 294 /* 295 * Reads datagrams from the device and dispatches them. For IO port 296 * based access to the device, we always start reading datagrams into 297 * only the first page of the datagram buffer. If the datagrams don't 298 * fit into one page, we use the maximum datagram buffer size for the 299 * remainder of the invocation. This is a simple heuristic for not 300 * penalizing small datagrams. For DMA-based datagrams, we always 301 * use the maximum datagram buffer size, since there is no performance 302 * penalty for doing so. 303 * 304 * This function assumes that it has exclusive access to the data 305 * in register(s) for the duration of the call. 306 */ 307 static void vmci_dispatch_dgs(unsigned long data) 308 { 309 struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data; 310 u8 *dg_in_buffer = vmci_dev->data_buffer; 311 struct vmci_datagram *dg; 312 size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; 313 size_t current_dg_in_buffer_size; 314 size_t remaining_bytes; 315 bool is_io_port = vmci_dev->mmio_base == NULL; 316 317 BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); 318 319 if (!is_io_port) { 320 /* For mmio, the first page is used for the header. */ 321 dg_in_buffer += PAGE_SIZE; 322 323 /* 324 * For DMA-based datagram operations, there is no performance 325 * penalty for reading the maximum buffer size. 326 */ 327 current_dg_in_buffer_size = VMCI_MAX_DG_SIZE; 328 } else { 329 current_dg_in_buffer_size = PAGE_SIZE; 330 } 331 vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); 332 dg = (struct vmci_datagram *)dg_in_buffer; 333 remaining_bytes = current_dg_in_buffer_size; 334 335 /* 336 * Read through the buffer until an invalid datagram header is 337 * encountered. The exit condition for datagrams read through 338 * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram 339 * can start on any page boundary in the buffer. 340 */ 341 while (dg->dst.resource != VMCI_INVALID_ID || 342 (is_io_port && remaining_bytes > PAGE_SIZE)) { 343 unsigned dg_in_size; 344 345 /* 346 * If using VMCI_DATA_IN_ADDR, skip to the next page 347 * as a datagram can start on any page boundary. 348 */ 349 if (dg->dst.resource == VMCI_INVALID_ID) { 350 dg = (struct vmci_datagram *)roundup( 351 (uintptr_t)dg + 1, PAGE_SIZE); 352 remaining_bytes = 353 (size_t)(dg_in_buffer + 354 current_dg_in_buffer_size - 355 (u8 *)dg); 356 continue; 357 } 358 359 dg_in_size = VMCI_DG_SIZE_ALIGNED(dg); 360 361 if (dg_in_size <= dg_in_buffer_size) { 362 int result; 363 364 /* 365 * If the remaining bytes in the datagram 366 * buffer doesn't contain the complete 367 * datagram, we first make sure we have enough 368 * room for it and then we read the reminder 369 * of the datagram and possibly any following 370 * datagrams. 371 */ 372 if (dg_in_size > remaining_bytes) { 373 if (remaining_bytes != 374 current_dg_in_buffer_size) { 375 376 /* 377 * We move the partial 378 * datagram to the front and 379 * read the reminder of the 380 * datagram and possibly 381 * following calls into the 382 * following bytes. 383 */ 384 memmove(dg_in_buffer, dg_in_buffer + 385 current_dg_in_buffer_size - 386 remaining_bytes, 387 remaining_bytes); 388 dg = (struct vmci_datagram *) 389 dg_in_buffer; 390 } 391 392 if (current_dg_in_buffer_size != 393 dg_in_buffer_size) 394 current_dg_in_buffer_size = 395 dg_in_buffer_size; 396 397 vmci_read_data(vmci_dev, 398 dg_in_buffer + 399 remaining_bytes, 400 current_dg_in_buffer_size - 401 remaining_bytes); 402 } 403 404 /* 405 * We special case event datagrams from the 406 * hypervisor. 407 */ 408 if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && 409 dg->dst.resource == VMCI_EVENT_HANDLER) { 410 result = vmci_event_dispatch(dg); 411 } else { 412 result = vmci_datagram_invoke_guest_handler(dg); 413 } 414 if (result < VMCI_SUCCESS) 415 dev_dbg(vmci_dev->dev, 416 "Datagram with resource (ID=0x%x) failed (err=%d)\n", 417 dg->dst.resource, result); 418 419 /* On to the next datagram. */ 420 dg = (struct vmci_datagram *)((u8 *)dg + 421 dg_in_size); 422 } else { 423 size_t bytes_to_skip; 424 425 /* 426 * Datagram doesn't fit in datagram buffer of maximal 427 * size. We drop it. 428 */ 429 dev_dbg(vmci_dev->dev, 430 "Failed to receive datagram (size=%u bytes)\n", 431 dg_in_size); 432 433 bytes_to_skip = dg_in_size - remaining_bytes; 434 if (current_dg_in_buffer_size != dg_in_buffer_size) 435 current_dg_in_buffer_size = dg_in_buffer_size; 436 437 for (;;) { 438 vmci_read_data(vmci_dev, dg_in_buffer, 439 current_dg_in_buffer_size); 440 if (bytes_to_skip <= current_dg_in_buffer_size) 441 break; 442 443 bytes_to_skip -= current_dg_in_buffer_size; 444 } 445 dg = (struct vmci_datagram *)(dg_in_buffer + 446 bytes_to_skip); 447 } 448 449 remaining_bytes = 450 (size_t) (dg_in_buffer + current_dg_in_buffer_size - 451 (u8 *)dg); 452 453 if (remaining_bytes < VMCI_DG_HEADERSIZE) { 454 /* Get the next batch of datagrams. */ 455 456 vmci_read_data(vmci_dev, dg_in_buffer, 457 current_dg_in_buffer_size); 458 dg = (struct vmci_datagram *)dg_in_buffer; 459 remaining_bytes = current_dg_in_buffer_size; 460 } 461 } 462 } 463 464 /* 465 * Scans the notification bitmap for raised flags, clears them 466 * and handles the notifications. 467 */ 468 static void vmci_process_bitmap(unsigned long data) 469 { 470 struct vmci_guest_device *dev = (struct vmci_guest_device *)data; 471 472 if (!dev->notification_bitmap) { 473 dev_dbg(dev->dev, "No bitmap present in %s\n", __func__); 474 return; 475 } 476 477 vmci_dbell_scan_notification_entries(dev->notification_bitmap); 478 } 479 480 /* 481 * Interrupt handler for legacy or MSI interrupt, or for first MSI-X 482 * interrupt (vector VMCI_INTR_DATAGRAM). 483 */ 484 static irqreturn_t vmci_interrupt(int irq, void *_dev) 485 { 486 struct vmci_guest_device *dev = _dev; 487 488 /* 489 * If we are using MSI-X with exclusive vectors then we simply schedule 490 * the datagram tasklet, since we know the interrupt was meant for us. 491 * Otherwise we must read the ICR to determine what to do. 492 */ 493 494 if (dev->exclusive_vectors) { 495 tasklet_schedule(&dev->datagram_tasklet); 496 } else { 497 unsigned int icr; 498 499 /* Acknowledge interrupt and determine what needs doing. */ 500 icr = vmci_read_reg(dev, VMCI_ICR_ADDR); 501 if (icr == 0 || icr == ~0) 502 return IRQ_NONE; 503 504 if (icr & VMCI_ICR_DATAGRAM) { 505 tasklet_schedule(&dev->datagram_tasklet); 506 icr &= ~VMCI_ICR_DATAGRAM; 507 } 508 509 if (icr & VMCI_ICR_NOTIFICATION) { 510 tasklet_schedule(&dev->bm_tasklet); 511 icr &= ~VMCI_ICR_NOTIFICATION; 512 } 513 514 515 if (icr & VMCI_ICR_DMA_DATAGRAM) { 516 wake_up_all(&dev->inout_wq); 517 icr &= ~VMCI_ICR_DMA_DATAGRAM; 518 } 519 520 if (icr != 0) 521 dev_warn(dev->dev, 522 "Ignoring unknown interrupt cause (%d)\n", 523 icr); 524 } 525 526 return IRQ_HANDLED; 527 } 528 529 /* 530 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION, 531 * which is for the notification bitmap. Will only get called if we are 532 * using MSI-X with exclusive vectors. 533 */ 534 static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) 535 { 536 struct vmci_guest_device *dev = _dev; 537 538 /* For MSI-X we can just assume it was meant for us. */ 539 tasklet_schedule(&dev->bm_tasklet); 540 541 return IRQ_HANDLED; 542 } 543 544 /* 545 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM, 546 * which is for the completion of a DMA datagram send or receive operation. 547 * Will only get called if we are using MSI-X with exclusive vectors. 548 */ 549 static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev) 550 { 551 struct vmci_guest_device *dev = _dev; 552 553 wake_up_all(&dev->inout_wq); 554 555 return IRQ_HANDLED; 556 } 557 558 static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev) 559 { 560 if (vmci_dev->mmio_base != NULL) { 561 if (vmci_dev->tx_buffer != NULL) 562 dma_free_coherent(vmci_dev->dev, 563 VMCI_DMA_DG_BUFFER_SIZE, 564 vmci_dev->tx_buffer, 565 vmci_dev->tx_buffer_base); 566 if (vmci_dev->data_buffer != NULL) 567 dma_free_coherent(vmci_dev->dev, 568 VMCI_DMA_DG_BUFFER_SIZE, 569 vmci_dev->data_buffer, 570 vmci_dev->data_buffer_base); 571 } else { 572 vfree(vmci_dev->data_buffer); 573 } 574 } 575 576 /* 577 * Most of the initialization at module load time is done here. 578 */ 579 static int vmci_guest_probe_device(struct pci_dev *pdev, 580 const struct pci_device_id *id) 581 { 582 struct vmci_guest_device *vmci_dev; 583 void __iomem *iobase = NULL; 584 void __iomem *mmio_base = NULL; 585 unsigned int num_irq_vectors; 586 unsigned int capabilities; 587 unsigned int caps_in_use; 588 unsigned long cmd; 589 int vmci_err; 590 int error; 591 592 dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n"); 593 594 error = pcim_enable_device(pdev); 595 if (error) { 596 dev_err(&pdev->dev, 597 "Failed to enable VMCI device: %d\n", error); 598 return error; 599 } 600 601 /* 602 * The VMCI device with mmio access to registers requests 256KB 603 * for BAR1. If present, driver will use new VMCI device 604 * functionality for register access and datagram send/recv. 605 */ 606 607 if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) { 608 dev_info(&pdev->dev, "MMIO register access is available\n"); 609 mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET, 610 VMCI_MMIO_ACCESS_SIZE); 611 /* If the map fails, we fall back to IOIO access. */ 612 if (!mmio_base) 613 dev_warn(&pdev->dev, "Failed to map MMIO register access\n"); 614 } 615 616 if (!mmio_base) { 617 if (IS_ENABLED(CONFIG_ARM64)) { 618 dev_err(&pdev->dev, "MMIO base is invalid\n"); 619 return -ENXIO; 620 } 621 error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME); 622 if (error) { 623 dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); 624 return error; 625 } 626 iobase = pcim_iomap_table(pdev)[0]; 627 } 628 629 vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL); 630 if (!vmci_dev) { 631 dev_err(&pdev->dev, 632 "Can't allocate memory for VMCI device\n"); 633 return -ENOMEM; 634 } 635 636 vmci_dev->dev = &pdev->dev; 637 vmci_dev->exclusive_vectors = false; 638 vmci_dev->iobase = iobase; 639 vmci_dev->mmio_base = mmio_base; 640 641 tasklet_init(&vmci_dev->datagram_tasklet, 642 vmci_dispatch_dgs, (unsigned long)vmci_dev); 643 tasklet_init(&vmci_dev->bm_tasklet, 644 vmci_process_bitmap, (unsigned long)vmci_dev); 645 init_waitqueue_head(&vmci_dev->inout_wq); 646 647 if (mmio_base != NULL) { 648 vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, 649 &vmci_dev->tx_buffer_base, 650 GFP_KERNEL); 651 if (!vmci_dev->tx_buffer) { 652 dev_err(&pdev->dev, 653 "Can't allocate memory for datagram tx buffer\n"); 654 return -ENOMEM; 655 } 656 657 vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, 658 &vmci_dev->data_buffer_base, 659 GFP_KERNEL); 660 } else { 661 vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); 662 } 663 if (!vmci_dev->data_buffer) { 664 dev_err(&pdev->dev, 665 "Can't allocate memory for datagram buffer\n"); 666 error = -ENOMEM; 667 goto err_free_data_buffers; 668 } 669 670 pci_set_master(pdev); /* To enable queue_pair functionality. */ 671 672 /* 673 * Verify that the VMCI Device supports the capabilities that 674 * we need. If the device is missing capabilities that we would 675 * like to use, check for fallback capabilities and use those 676 * instead (so we can run a new VM on old hosts). Fail the load if 677 * a required capability is missing and there is no fallback. 678 * 679 * Right now, we need datagrams. There are no fallbacks. 680 */ 681 capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR); 682 if (!(capabilities & VMCI_CAPS_DATAGRAM)) { 683 dev_err(&pdev->dev, "Device does not support datagrams\n"); 684 error = -ENXIO; 685 goto err_free_data_buffers; 686 } 687 caps_in_use = VMCI_CAPS_DATAGRAM; 688 689 /* 690 * Use 64-bit PPNs if the device supports. 691 * 692 * There is no check for the return value of dma_set_mask_and_coherent 693 * since this driver can handle the default mask values if 694 * dma_set_mask_and_coherent fails. 695 */ 696 if (capabilities & VMCI_CAPS_PPN64) { 697 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 698 use_ppn64 = true; 699 caps_in_use |= VMCI_CAPS_PPN64; 700 } else { 701 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44)); 702 use_ppn64 = false; 703 } 704 705 /* 706 * If the hardware supports notifications, we will use that as 707 * well. 708 */ 709 if (capabilities & VMCI_CAPS_NOTIFICATIONS) { 710 vmci_dev->notification_bitmap = dma_alloc_coherent( 711 &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base, 712 GFP_KERNEL); 713 if (!vmci_dev->notification_bitmap) 714 dev_warn(&pdev->dev, 715 "Unable to allocate notification bitmap\n"); 716 else 717 caps_in_use |= VMCI_CAPS_NOTIFICATIONS; 718 } 719 720 if (mmio_base != NULL) { 721 if (capabilities & VMCI_CAPS_DMA_DATAGRAM) { 722 caps_in_use |= VMCI_CAPS_DMA_DATAGRAM; 723 } else { 724 dev_err(&pdev->dev, 725 "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n"); 726 error = -ENXIO; 727 goto err_free_notification_bitmap; 728 } 729 } 730 731 dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use); 732 733 /* Let the host know which capabilities we intend to use. */ 734 vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR); 735 736 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { 737 /* Let the device know the size for pages passed down. */ 738 vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT); 739 740 /* Configure the high order parts of the data in/out buffers. */ 741 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base), 742 VMCI_DATA_IN_HIGH_ADDR); 743 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base), 744 VMCI_DATA_OUT_HIGH_ADDR); 745 } 746 747 /* Set up global device so that we can start sending datagrams */ 748 spin_lock_irq(&vmci_dev_spinlock); 749 vmci_dev_g = vmci_dev; 750 vmci_pdev = pdev; 751 spin_unlock_irq(&vmci_dev_spinlock); 752 753 /* 754 * Register notification bitmap with device if that capability is 755 * used. 756 */ 757 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) { 758 unsigned long bitmap_ppn = 759 vmci_dev->notification_base >> PAGE_SHIFT; 760 if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) { 761 dev_warn(&pdev->dev, 762 "VMCI device unable to register notification bitmap with PPN 0x%lx\n", 763 bitmap_ppn); 764 error = -ENXIO; 765 goto err_remove_vmci_dev_g; 766 } 767 } 768 769 /* Check host capabilities. */ 770 error = vmci_check_host_caps(pdev); 771 if (error) 772 goto err_remove_vmci_dev_g; 773 774 /* Enable device. */ 775 776 /* 777 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can 778 * update the internal context id when needed. 779 */ 780 vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE, 781 vmci_guest_cid_update, NULL, 782 &ctx_update_sub_id); 783 if (vmci_err < VMCI_SUCCESS) 784 dev_warn(&pdev->dev, 785 "Failed to subscribe to event (type=%d): %d\n", 786 VMCI_EVENT_CTX_ID_UPDATE, vmci_err); 787 788 /* 789 * Enable interrupts. Try MSI-X first, then MSI, and then fallback on 790 * legacy interrupts. 791 */ 792 if (vmci_dev->mmio_base != NULL) 793 num_irq_vectors = VMCI_MAX_INTRS; 794 else 795 num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION; 796 error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors, 797 PCI_IRQ_MSIX); 798 if (error < 0) { 799 error = pci_alloc_irq_vectors(pdev, 1, 1, 800 PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY); 801 if (error < 0) 802 goto err_unsubscribe_event; 803 } else { 804 vmci_dev->exclusive_vectors = true; 805 } 806 807 /* 808 * Request IRQ for legacy or MSI interrupts, or for first 809 * MSI-X vector. 810 */ 811 error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt, 812 IRQF_SHARED, KBUILD_MODNAME, vmci_dev); 813 if (error) { 814 dev_err(&pdev->dev, "Irq %u in use: %d\n", 815 pci_irq_vector(pdev, 0), error); 816 goto err_disable_msi; 817 } 818 819 /* 820 * For MSI-X with exclusive vectors we need to request an 821 * interrupt for each vector so that we get a separate 822 * interrupt handler routine. This allows us to distinguish 823 * between the vectors. 824 */ 825 if (vmci_dev->exclusive_vectors) { 826 error = request_irq(pci_irq_vector(pdev, 1), 827 vmci_interrupt_bm, 0, KBUILD_MODNAME, 828 vmci_dev); 829 if (error) { 830 dev_err(&pdev->dev, 831 "Failed to allocate irq %u: %d\n", 832 pci_irq_vector(pdev, 1), error); 833 goto err_free_irq; 834 } 835 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { 836 error = request_irq(pci_irq_vector(pdev, 2), 837 vmci_interrupt_dma_datagram, 838 0, KBUILD_MODNAME, vmci_dev); 839 if (error) { 840 dev_err(&pdev->dev, 841 "Failed to allocate irq %u: %d\n", 842 pci_irq_vector(pdev, 2), error); 843 goto err_free_bm_irq; 844 } 845 } 846 } 847 848 dev_dbg(&pdev->dev, "Registered device\n"); 849 850 atomic_inc(&vmci_num_guest_devices); 851 852 /* Enable specific interrupt bits. */ 853 cmd = VMCI_IMR_DATAGRAM; 854 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) 855 cmd |= VMCI_IMR_NOTIFICATION; 856 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) 857 cmd |= VMCI_IMR_DMA_DATAGRAM; 858 vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR); 859 860 /* Enable interrupts. */ 861 vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR); 862 863 pci_set_drvdata(pdev, vmci_dev); 864 865 vmci_call_vsock_callback(false); 866 return 0; 867 868 err_free_bm_irq: 869 if (vmci_dev->exclusive_vectors) 870 free_irq(pci_irq_vector(pdev, 1), vmci_dev); 871 872 err_free_irq: 873 free_irq(pci_irq_vector(pdev, 0), vmci_dev); 874 tasklet_kill(&vmci_dev->datagram_tasklet); 875 tasklet_kill(&vmci_dev->bm_tasklet); 876 877 err_disable_msi: 878 pci_free_irq_vectors(pdev); 879 880 err_unsubscribe_event: 881 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); 882 if (vmci_err < VMCI_SUCCESS) 883 dev_warn(&pdev->dev, 884 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", 885 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); 886 887 err_remove_vmci_dev_g: 888 spin_lock_irq(&vmci_dev_spinlock); 889 vmci_pdev = NULL; 890 vmci_dev_g = NULL; 891 spin_unlock_irq(&vmci_dev_spinlock); 892 893 err_free_notification_bitmap: 894 if (vmci_dev->notification_bitmap) { 895 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); 896 dma_free_coherent(&pdev->dev, PAGE_SIZE, 897 vmci_dev->notification_bitmap, 898 vmci_dev->notification_base); 899 } 900 901 err_free_data_buffers: 902 vmci_free_dg_buffers(vmci_dev); 903 904 /* The rest are managed resources and will be freed by PCI core */ 905 return error; 906 } 907 908 static void vmci_guest_remove_device(struct pci_dev *pdev) 909 { 910 struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev); 911 int vmci_err; 912 913 dev_dbg(&pdev->dev, "Removing device\n"); 914 915 atomic_dec(&vmci_num_guest_devices); 916 917 vmci_qp_guest_endpoints_exit(); 918 919 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); 920 if (vmci_err < VMCI_SUCCESS) 921 dev_warn(&pdev->dev, 922 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", 923 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); 924 925 spin_lock_irq(&vmci_dev_spinlock); 926 vmci_dev_g = NULL; 927 vmci_pdev = NULL; 928 spin_unlock_irq(&vmci_dev_spinlock); 929 930 dev_dbg(&pdev->dev, "Resetting vmci device\n"); 931 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); 932 933 /* 934 * Free IRQ and then disable MSI/MSI-X as appropriate. For 935 * MSI-X, we might have multiple vectors, each with their own 936 * IRQ, which we must free too. 937 */ 938 if (vmci_dev->exclusive_vectors) { 939 free_irq(pci_irq_vector(pdev, 1), vmci_dev); 940 if (vmci_dev->mmio_base != NULL) 941 free_irq(pci_irq_vector(pdev, 2), vmci_dev); 942 } 943 free_irq(pci_irq_vector(pdev, 0), vmci_dev); 944 pci_free_irq_vectors(pdev); 945 946 tasklet_kill(&vmci_dev->datagram_tasklet); 947 tasklet_kill(&vmci_dev->bm_tasklet); 948 949 if (vmci_dev->notification_bitmap) { 950 /* 951 * The device reset above cleared the bitmap state of the 952 * device, so we can safely free it here. 953 */ 954 955 dma_free_coherent(&pdev->dev, PAGE_SIZE, 956 vmci_dev->notification_bitmap, 957 vmci_dev->notification_base); 958 } 959 960 vmci_free_dg_buffers(vmci_dev); 961 962 if (vmci_dev->mmio_base != NULL) 963 pci_iounmap(pdev, vmci_dev->mmio_base); 964 965 /* The rest are managed resources and will be freed by PCI core */ 966 } 967 968 static const struct pci_device_id vmci_ids[] = { 969 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), }, 970 { 0 }, 971 }; 972 MODULE_DEVICE_TABLE(pci, vmci_ids); 973 974 static struct pci_driver vmci_guest_driver = { 975 .name = KBUILD_MODNAME, 976 .id_table = vmci_ids, 977 .probe = vmci_guest_probe_device, 978 .remove = vmci_guest_remove_device, 979 }; 980 981 int __init vmci_guest_init(void) 982 { 983 return pci_register_driver(&vmci_guest_driver); 984 } 985 986 void __exit vmci_guest_exit(void) 987 { 988 pci_unregister_driver(&vmci_guest_driver); 989 } 990