1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VMware VMCI Driver 4 * 5 * Copyright (C) 2012 VMware, Inc. All rights reserved. 6 */ 7 8 #include <linux/vmw_vmci_defs.h> 9 #include <linux/vmw_vmci_api.h> 10 #include <linux/moduleparam.h> 11 #include <linux/interrupt.h> 12 #include <linux/highmem.h> 13 #include <linux/kernel.h> 14 #include <linux/mm.h> 15 #include <linux/module.h> 16 #include <linux/processor.h> 17 #include <linux/sched.h> 18 #include <linux/slab.h> 19 #include <linux/init.h> 20 #include <linux/pci.h> 21 #include <linux/smp.h> 22 #include <linux/io.h> 23 #include <linux/vmalloc.h> 24 25 #include "vmci_datagram.h" 26 #include "vmci_doorbell.h" 27 #include "vmci_context.h" 28 #include "vmci_driver.h" 29 #include "vmci_event.h" 30 31 #define PCI_DEVICE_ID_VMWARE_VMCI 0x0740 32 33 #define VMCI_UTIL_NUM_RESOURCES 1 34 35 /* 36 * Datagram buffers for DMA send/receive must accommodate at least 37 * a maximum sized datagram and the header. 38 */ 39 #define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE) 40 41 static bool vmci_disable_msi; 42 module_param_named(disable_msi, vmci_disable_msi, bool, 0); 43 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); 44 45 static bool vmci_disable_msix; 46 module_param_named(disable_msix, vmci_disable_msix, bool, 0); 47 MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)"); 48 49 static u32 ctx_update_sub_id = VMCI_INVALID_ID; 50 static u32 vm_context_id = VMCI_INVALID_ID; 51 52 struct vmci_guest_device { 53 struct device *dev; /* PCI device we are attached to */ 54 void __iomem *iobase; 55 void __iomem *mmio_base; 56 57 bool exclusive_vectors; 58 59 struct wait_queue_head inout_wq; 60 61 void *data_buffer; 62 dma_addr_t data_buffer_base; 63 void *tx_buffer; 64 dma_addr_t tx_buffer_base; 65 void *notification_bitmap; 66 dma_addr_t notification_base; 67 }; 68 69 static bool use_ppn64; 70 71 bool vmci_use_ppn64(void) 72 { 73 return use_ppn64; 74 } 75 76 /* vmci_dev singleton device and supporting data*/ 77 struct pci_dev *vmci_pdev; 78 static struct vmci_guest_device *vmci_dev_g; 79 static DEFINE_SPINLOCK(vmci_dev_spinlock); 80 81 static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0); 82 83 bool vmci_guest_code_active(void) 84 { 85 return atomic_read(&vmci_num_guest_devices) != 0; 86 } 87 88 u32 vmci_get_vm_context_id(void) 89 { 90 if (vm_context_id == VMCI_INVALID_ID) { 91 struct vmci_datagram get_cid_msg; 92 get_cid_msg.dst = 93 vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, 94 VMCI_GET_CONTEXT_ID); 95 get_cid_msg.src = VMCI_ANON_SRC_HANDLE; 96 get_cid_msg.payload_size = 0; 97 vm_context_id = vmci_send_datagram(&get_cid_msg); 98 } 99 return vm_context_id; 100 } 101 102 static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg) 103 { 104 if (dev->mmio_base != NULL) 105 return readl(dev->mmio_base + reg); 106 return ioread32(dev->iobase + reg); 107 } 108 109 static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg) 110 { 111 if (dev->mmio_base != NULL) 112 writel(val, dev->mmio_base + reg); 113 else 114 iowrite32(val, dev->iobase + reg); 115 } 116 117 static void vmci_read_data(struct vmci_guest_device *vmci_dev, 118 void *dest, size_t size) 119 { 120 if (vmci_dev->mmio_base == NULL) 121 ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, 122 dest, size); 123 else { 124 /* 125 * For DMA datagrams, the data_buffer will contain the header on the 126 * first page, followed by the incoming datagram(s) on the following 127 * pages. The header uses an S/G element immediately following the 128 * header on the first page to point to the data area. 129 */ 130 struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer; 131 struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1); 132 size_t buffer_offset = dest - vmci_dev->data_buffer; 133 134 buffer_header->opcode = 1; 135 buffer_header->size = 1; 136 buffer_header->busy = 0; 137 sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset; 138 sg_array[0].size = size; 139 140 vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base), 141 VMCI_DATA_IN_LOW_ADDR); 142 143 wait_event(vmci_dev->inout_wq, buffer_header->busy == 1); 144 } 145 } 146 147 static int vmci_write_data(struct vmci_guest_device *dev, 148 struct vmci_datagram *dg) 149 { 150 int result; 151 152 if (dev->mmio_base != NULL) { 153 struct vmci_data_in_out_header *buffer_header = dev->tx_buffer; 154 u8 *dg_out_buffer = (u8 *)(buffer_header + 1); 155 156 if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) 157 return VMCI_ERROR_INVALID_ARGS; 158 159 /* 160 * Initialize send buffer with outgoing datagram 161 * and set up header for inline data. Device will 162 * not access buffer asynchronously - only after 163 * the write to VMCI_DATA_OUT_LOW_ADDR. 164 */ 165 memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg)); 166 buffer_header->opcode = 0; 167 buffer_header->size = VMCI_DG_SIZE(dg); 168 buffer_header->busy = 1; 169 170 vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base), 171 VMCI_DATA_OUT_LOW_ADDR); 172 173 /* Caller holds a spinlock, so cannot block. */ 174 spin_until_cond(buffer_header->busy == 0); 175 176 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 177 if (result == VMCI_SUCCESS) 178 result = (int)buffer_header->result; 179 } else { 180 iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR, 181 dg, VMCI_DG_SIZE(dg)); 182 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 183 } 184 185 return result; 186 } 187 188 /* 189 * VM to hypervisor call mechanism. We use the standard VMware naming 190 * convention since shared code is calling this function as well. 191 */ 192 int vmci_send_datagram(struct vmci_datagram *dg) 193 { 194 unsigned long flags; 195 int result; 196 197 /* Check args. */ 198 if (dg == NULL) 199 return VMCI_ERROR_INVALID_ARGS; 200 201 /* 202 * Need to acquire spinlock on the device because the datagram 203 * data may be spread over multiple pages and the monitor may 204 * interleave device user rpc calls from multiple 205 * VCPUs. Acquiring the spinlock precludes that 206 * possibility. Disabling interrupts to avoid incoming 207 * datagrams during a "rep out" and possibly landing up in 208 * this function. 209 */ 210 spin_lock_irqsave(&vmci_dev_spinlock, flags); 211 212 if (vmci_dev_g) { 213 vmci_write_data(vmci_dev_g, dg); 214 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 215 } else { 216 result = VMCI_ERROR_UNAVAILABLE; 217 } 218 219 spin_unlock_irqrestore(&vmci_dev_spinlock, flags); 220 221 return result; 222 } 223 EXPORT_SYMBOL_GPL(vmci_send_datagram); 224 225 /* 226 * Gets called with the new context id if updated or resumed. 227 * Context id. 228 */ 229 static void vmci_guest_cid_update(u32 sub_id, 230 const struct vmci_event_data *event_data, 231 void *client_data) 232 { 233 const struct vmci_event_payld_ctx *ev_payload = 234 vmci_event_data_const_payload(event_data); 235 236 if (sub_id != ctx_update_sub_id) { 237 pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id); 238 return; 239 } 240 241 if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) { 242 pr_devel("Invalid event data\n"); 243 return; 244 } 245 246 pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n", 247 vm_context_id, ev_payload->context_id, event_data->event); 248 249 vm_context_id = ev_payload->context_id; 250 } 251 252 /* 253 * Verify that the host supports the hypercalls we need. If it does not, 254 * try to find fallback hypercalls and use those instead. Returns 0 if 255 * required hypercalls (or fallback hypercalls) are supported by the host, 256 * an error code otherwise. 257 */ 258 static int vmci_check_host_caps(struct pci_dev *pdev) 259 { 260 bool result; 261 struct vmci_resource_query_msg *msg; 262 u32 msg_size = sizeof(struct vmci_resource_query_hdr) + 263 VMCI_UTIL_NUM_RESOURCES * sizeof(u32); 264 struct vmci_datagram *check_msg; 265 266 check_msg = kzalloc(msg_size, GFP_KERNEL); 267 if (!check_msg) { 268 dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__); 269 return -ENOMEM; 270 } 271 272 check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, 273 VMCI_RESOURCES_QUERY); 274 check_msg->src = VMCI_ANON_SRC_HANDLE; 275 check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE; 276 msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg); 277 278 msg->num_resources = VMCI_UTIL_NUM_RESOURCES; 279 msg->resources[0] = VMCI_GET_CONTEXT_ID; 280 281 /* Checks that hyper calls are supported */ 282 result = vmci_send_datagram(check_msg) == 0x01; 283 kfree(check_msg); 284 285 dev_dbg(&pdev->dev, "%s: Host capability check: %s\n", 286 __func__, result ? "PASSED" : "FAILED"); 287 288 /* We need the vector. There are no fallbacks. */ 289 return result ? 0 : -ENXIO; 290 } 291 292 /* 293 * Reads datagrams from the device and dispatches them. For IO port 294 * based access to the device, we always start reading datagrams into 295 * only the first page of the datagram buffer. If the datagrams don't 296 * fit into one page, we use the maximum datagram buffer size for the 297 * remainder of the invocation. This is a simple heuristic for not 298 * penalizing small datagrams. For DMA-based datagrams, we always 299 * use the maximum datagram buffer size, since there is no performance 300 * penalty for doing so. 301 * 302 * This function assumes that it has exclusive access to the data 303 * in register(s) for the duration of the call. 304 */ 305 static void vmci_dispatch_dgs(struct vmci_guest_device *vmci_dev) 306 { 307 u8 *dg_in_buffer = vmci_dev->data_buffer; 308 struct vmci_datagram *dg; 309 size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; 310 size_t current_dg_in_buffer_size; 311 size_t remaining_bytes; 312 bool is_io_port = vmci_dev->mmio_base == NULL; 313 314 BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); 315 316 if (!is_io_port) { 317 /* For mmio, the first page is used for the header. */ 318 dg_in_buffer += PAGE_SIZE; 319 320 /* 321 * For DMA-based datagram operations, there is no performance 322 * penalty for reading the maximum buffer size. 323 */ 324 current_dg_in_buffer_size = VMCI_MAX_DG_SIZE; 325 } else { 326 current_dg_in_buffer_size = PAGE_SIZE; 327 } 328 vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); 329 dg = (struct vmci_datagram *)dg_in_buffer; 330 remaining_bytes = current_dg_in_buffer_size; 331 332 /* 333 * Read through the buffer until an invalid datagram header is 334 * encountered. The exit condition for datagrams read through 335 * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram 336 * can start on any page boundary in the buffer. 337 */ 338 while (dg->dst.resource != VMCI_INVALID_ID || 339 (is_io_port && remaining_bytes > PAGE_SIZE)) { 340 unsigned dg_in_size; 341 342 /* 343 * If using VMCI_DATA_IN_ADDR, skip to the next page 344 * as a datagram can start on any page boundary. 345 */ 346 if (dg->dst.resource == VMCI_INVALID_ID) { 347 dg = (struct vmci_datagram *)roundup( 348 (uintptr_t)dg + 1, PAGE_SIZE); 349 remaining_bytes = 350 (size_t)(dg_in_buffer + 351 current_dg_in_buffer_size - 352 (u8 *)dg); 353 continue; 354 } 355 356 dg_in_size = VMCI_DG_SIZE_ALIGNED(dg); 357 358 if (dg_in_size <= dg_in_buffer_size) { 359 int result; 360 361 /* 362 * If the remaining bytes in the datagram 363 * buffer doesn't contain the complete 364 * datagram, we first make sure we have enough 365 * room for it and then we read the reminder 366 * of the datagram and possibly any following 367 * datagrams. 368 */ 369 if (dg_in_size > remaining_bytes) { 370 if (remaining_bytes != 371 current_dg_in_buffer_size) { 372 373 /* 374 * We move the partial 375 * datagram to the front and 376 * read the reminder of the 377 * datagram and possibly 378 * following calls into the 379 * following bytes. 380 */ 381 memmove(dg_in_buffer, dg_in_buffer + 382 current_dg_in_buffer_size - 383 remaining_bytes, 384 remaining_bytes); 385 dg = (struct vmci_datagram *) 386 dg_in_buffer; 387 } 388 389 if (current_dg_in_buffer_size != 390 dg_in_buffer_size) 391 current_dg_in_buffer_size = 392 dg_in_buffer_size; 393 394 vmci_read_data(vmci_dev, 395 dg_in_buffer + 396 remaining_bytes, 397 current_dg_in_buffer_size - 398 remaining_bytes); 399 } 400 401 /* 402 * We special case event datagrams from the 403 * hypervisor. 404 */ 405 if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && 406 dg->dst.resource == VMCI_EVENT_HANDLER) { 407 result = vmci_event_dispatch(dg); 408 } else { 409 result = vmci_datagram_invoke_guest_handler(dg); 410 } 411 if (result < VMCI_SUCCESS) 412 dev_dbg(vmci_dev->dev, 413 "Datagram with resource (ID=0x%x) failed (err=%d)\n", 414 dg->dst.resource, result); 415 416 /* On to the next datagram. */ 417 dg = (struct vmci_datagram *)((u8 *)dg + 418 dg_in_size); 419 } else { 420 size_t bytes_to_skip; 421 422 /* 423 * Datagram doesn't fit in datagram buffer of maximal 424 * size. We drop it. 425 */ 426 dev_dbg(vmci_dev->dev, 427 "Failed to receive datagram (size=%u bytes)\n", 428 dg_in_size); 429 430 bytes_to_skip = dg_in_size - remaining_bytes; 431 if (current_dg_in_buffer_size != dg_in_buffer_size) 432 current_dg_in_buffer_size = dg_in_buffer_size; 433 434 for (;;) { 435 vmci_read_data(vmci_dev, dg_in_buffer, 436 current_dg_in_buffer_size); 437 if (bytes_to_skip <= current_dg_in_buffer_size) 438 break; 439 440 bytes_to_skip -= current_dg_in_buffer_size; 441 } 442 dg = (struct vmci_datagram *)(dg_in_buffer + 443 bytes_to_skip); 444 } 445 446 remaining_bytes = 447 (size_t) (dg_in_buffer + current_dg_in_buffer_size - 448 (u8 *)dg); 449 450 if (remaining_bytes < VMCI_DG_HEADERSIZE) { 451 /* Get the next batch of datagrams. */ 452 453 vmci_read_data(vmci_dev, dg_in_buffer, 454 current_dg_in_buffer_size); 455 dg = (struct vmci_datagram *)dg_in_buffer; 456 remaining_bytes = current_dg_in_buffer_size; 457 } 458 } 459 } 460 461 /* 462 * Scans the notification bitmap for raised flags, clears them 463 * and handles the notifications. 464 */ 465 static void vmci_process_bitmap(struct vmci_guest_device *dev) 466 { 467 if (!dev->notification_bitmap) { 468 dev_dbg(dev->dev, "No bitmap present in %s\n", __func__); 469 return; 470 } 471 472 vmci_dbell_scan_notification_entries(dev->notification_bitmap); 473 } 474 475 /* 476 * Interrupt handler for legacy or MSI interrupt, or for first MSI-X 477 * interrupt (vector VMCI_INTR_DATAGRAM). 478 */ 479 static irqreturn_t vmci_interrupt(int irq, void *_dev) 480 { 481 struct vmci_guest_device *dev = _dev; 482 483 /* 484 * If we are using MSI-X with exclusive vectors then we simply call 485 * vmci_dispatch_dgs(), since we know the interrupt was meant for us. 486 * Otherwise we must read the ICR to determine what to do. 487 */ 488 489 if (dev->exclusive_vectors) { 490 vmci_dispatch_dgs(dev); 491 } else { 492 unsigned int icr; 493 494 /* Acknowledge interrupt and determine what needs doing. */ 495 icr = vmci_read_reg(dev, VMCI_ICR_ADDR); 496 if (icr == 0 || icr == ~0) 497 return IRQ_NONE; 498 499 if (icr & VMCI_ICR_DATAGRAM) { 500 vmci_dispatch_dgs(dev); 501 icr &= ~VMCI_ICR_DATAGRAM; 502 } 503 504 if (icr & VMCI_ICR_NOTIFICATION) { 505 vmci_process_bitmap(dev); 506 icr &= ~VMCI_ICR_NOTIFICATION; 507 } 508 509 510 if (icr & VMCI_ICR_DMA_DATAGRAM) { 511 wake_up_all(&dev->inout_wq); 512 icr &= ~VMCI_ICR_DMA_DATAGRAM; 513 } 514 515 if (icr != 0) 516 dev_warn(dev->dev, 517 "Ignoring unknown interrupt cause (%d)\n", 518 icr); 519 } 520 521 return IRQ_HANDLED; 522 } 523 524 /* 525 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION, 526 * which is for the notification bitmap. Will only get called if we are 527 * using MSI-X with exclusive vectors. 528 */ 529 static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) 530 { 531 struct vmci_guest_device *dev = _dev; 532 533 /* For MSI-X we can just assume it was meant for us. */ 534 vmci_process_bitmap(dev); 535 536 return IRQ_HANDLED; 537 } 538 539 /* 540 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM, 541 * which is for the completion of a DMA datagram send or receive operation. 542 * Will only get called if we are using MSI-X with exclusive vectors. 543 */ 544 static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev) 545 { 546 struct vmci_guest_device *dev = _dev; 547 548 wake_up_all(&dev->inout_wq); 549 550 return IRQ_HANDLED; 551 } 552 553 static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev) 554 { 555 if (vmci_dev->mmio_base != NULL) { 556 if (vmci_dev->tx_buffer != NULL) 557 dma_free_coherent(vmci_dev->dev, 558 VMCI_DMA_DG_BUFFER_SIZE, 559 vmci_dev->tx_buffer, 560 vmci_dev->tx_buffer_base); 561 if (vmci_dev->data_buffer != NULL) 562 dma_free_coherent(vmci_dev->dev, 563 VMCI_DMA_DG_BUFFER_SIZE, 564 vmci_dev->data_buffer, 565 vmci_dev->data_buffer_base); 566 } else { 567 vfree(vmci_dev->data_buffer); 568 } 569 } 570 571 /* 572 * Most of the initialization at module load time is done here. 573 */ 574 static int vmci_guest_probe_device(struct pci_dev *pdev, 575 const struct pci_device_id *id) 576 { 577 struct vmci_guest_device *vmci_dev; 578 void __iomem *iobase = NULL; 579 void __iomem *mmio_base = NULL; 580 unsigned int num_irq_vectors; 581 unsigned int capabilities; 582 unsigned int caps_in_use; 583 unsigned long cmd; 584 int vmci_err; 585 int error; 586 587 dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n"); 588 589 error = pcim_enable_device(pdev); 590 if (error) { 591 dev_err(&pdev->dev, 592 "Failed to enable VMCI device: %d\n", error); 593 return error; 594 } 595 596 /* 597 * The VMCI device with mmio access to registers requests 256KB 598 * for BAR1. If present, driver will use new VMCI device 599 * functionality for register access and datagram send/recv. 600 */ 601 602 if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) { 603 dev_info(&pdev->dev, "MMIO register access is available\n"); 604 mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET, 605 VMCI_MMIO_ACCESS_SIZE); 606 /* If the map fails, we fall back to IOIO access. */ 607 if (!mmio_base) 608 dev_warn(&pdev->dev, "Failed to map MMIO register access\n"); 609 } 610 611 if (!mmio_base) { 612 if (IS_ENABLED(CONFIG_ARM64)) { 613 dev_err(&pdev->dev, "MMIO base is invalid\n"); 614 return -ENXIO; 615 } 616 error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME); 617 if (error) { 618 dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); 619 return error; 620 } 621 iobase = pcim_iomap_table(pdev)[0]; 622 } 623 624 vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL); 625 if (!vmci_dev) { 626 dev_err(&pdev->dev, 627 "Can't allocate memory for VMCI device\n"); 628 error = -ENOMEM; 629 goto err_unmap_mmio_base; 630 } 631 632 vmci_dev->dev = &pdev->dev; 633 vmci_dev->exclusive_vectors = false; 634 vmci_dev->iobase = iobase; 635 vmci_dev->mmio_base = mmio_base; 636 637 init_waitqueue_head(&vmci_dev->inout_wq); 638 639 if (mmio_base != NULL) { 640 vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, 641 &vmci_dev->tx_buffer_base, 642 GFP_KERNEL); 643 if (!vmci_dev->tx_buffer) { 644 dev_err(&pdev->dev, 645 "Can't allocate memory for datagram tx buffer\n"); 646 error = -ENOMEM; 647 goto err_unmap_mmio_base; 648 } 649 650 vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, 651 &vmci_dev->data_buffer_base, 652 GFP_KERNEL); 653 } else { 654 vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); 655 } 656 if (!vmci_dev->data_buffer) { 657 dev_err(&pdev->dev, 658 "Can't allocate memory for datagram buffer\n"); 659 error = -ENOMEM; 660 goto err_free_data_buffers; 661 } 662 663 pci_set_master(pdev); /* To enable queue_pair functionality. */ 664 665 /* 666 * Verify that the VMCI Device supports the capabilities that 667 * we need. If the device is missing capabilities that we would 668 * like to use, check for fallback capabilities and use those 669 * instead (so we can run a new VM on old hosts). Fail the load if 670 * a required capability is missing and there is no fallback. 671 * 672 * Right now, we need datagrams. There are no fallbacks. 673 */ 674 capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR); 675 if (!(capabilities & VMCI_CAPS_DATAGRAM)) { 676 dev_err(&pdev->dev, "Device does not support datagrams\n"); 677 error = -ENXIO; 678 goto err_free_data_buffers; 679 } 680 caps_in_use = VMCI_CAPS_DATAGRAM; 681 682 /* 683 * Use 64-bit PPNs if the device supports. 684 * 685 * There is no check for the return value of dma_set_mask_and_coherent 686 * since this driver can handle the default mask values if 687 * dma_set_mask_and_coherent fails. 688 */ 689 if (capabilities & VMCI_CAPS_PPN64) { 690 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 691 use_ppn64 = true; 692 caps_in_use |= VMCI_CAPS_PPN64; 693 } else { 694 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44)); 695 use_ppn64 = false; 696 } 697 698 /* 699 * If the hardware supports notifications, we will use that as 700 * well. 701 */ 702 if (capabilities & VMCI_CAPS_NOTIFICATIONS) { 703 vmci_dev->notification_bitmap = dma_alloc_coherent( 704 &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base, 705 GFP_KERNEL); 706 if (!vmci_dev->notification_bitmap) 707 dev_warn(&pdev->dev, 708 "Unable to allocate notification bitmap\n"); 709 else 710 caps_in_use |= VMCI_CAPS_NOTIFICATIONS; 711 } 712 713 if (mmio_base != NULL) { 714 if (capabilities & VMCI_CAPS_DMA_DATAGRAM) { 715 caps_in_use |= VMCI_CAPS_DMA_DATAGRAM; 716 } else { 717 dev_err(&pdev->dev, 718 "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n"); 719 error = -ENXIO; 720 goto err_free_notification_bitmap; 721 } 722 } 723 724 dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use); 725 726 /* Let the host know which capabilities we intend to use. */ 727 vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR); 728 729 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { 730 /* Let the device know the size for pages passed down. */ 731 vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT); 732 733 /* Configure the high order parts of the data in/out buffers. */ 734 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base), 735 VMCI_DATA_IN_HIGH_ADDR); 736 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base), 737 VMCI_DATA_OUT_HIGH_ADDR); 738 } 739 740 /* Set up global device so that we can start sending datagrams */ 741 spin_lock_irq(&vmci_dev_spinlock); 742 vmci_dev_g = vmci_dev; 743 vmci_pdev = pdev; 744 spin_unlock_irq(&vmci_dev_spinlock); 745 746 /* 747 * Register notification bitmap with device if that capability is 748 * used. 749 */ 750 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) { 751 unsigned long bitmap_ppn = 752 vmci_dev->notification_base >> PAGE_SHIFT; 753 if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) { 754 dev_warn(&pdev->dev, 755 "VMCI device unable to register notification bitmap with PPN 0x%lx\n", 756 bitmap_ppn); 757 error = -ENXIO; 758 goto err_remove_vmci_dev_g; 759 } 760 } 761 762 /* Check host capabilities. */ 763 error = vmci_check_host_caps(pdev); 764 if (error) 765 goto err_remove_vmci_dev_g; 766 767 /* Enable device. */ 768 769 /* 770 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can 771 * update the internal context id when needed. 772 */ 773 vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE, 774 vmci_guest_cid_update, NULL, 775 &ctx_update_sub_id); 776 if (vmci_err < VMCI_SUCCESS) 777 dev_warn(&pdev->dev, 778 "Failed to subscribe to event (type=%d): %d\n", 779 VMCI_EVENT_CTX_ID_UPDATE, vmci_err); 780 781 /* 782 * Enable interrupts. Try MSI-X first, then MSI, and then fallback on 783 * legacy interrupts. 784 */ 785 if (vmci_dev->mmio_base != NULL) 786 num_irq_vectors = VMCI_MAX_INTRS; 787 else 788 num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION; 789 error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors, 790 PCI_IRQ_MSIX); 791 if (error < 0) { 792 error = pci_alloc_irq_vectors(pdev, 1, 1, 793 PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY); 794 if (error < 0) 795 goto err_unsubscribe_event; 796 } else { 797 vmci_dev->exclusive_vectors = true; 798 } 799 800 /* 801 * Request IRQ for legacy or MSI interrupts, or for first 802 * MSI-X vector. 803 */ 804 error = request_threaded_irq(pci_irq_vector(pdev, 0), NULL, 805 vmci_interrupt, IRQF_SHARED, 806 KBUILD_MODNAME, vmci_dev); 807 if (error) { 808 dev_err(&pdev->dev, "Irq %u in use: %d\n", 809 pci_irq_vector(pdev, 0), error); 810 goto err_disable_msi; 811 } 812 813 /* 814 * For MSI-X with exclusive vectors we need to request an 815 * interrupt for each vector so that we get a separate 816 * interrupt handler routine. This allows us to distinguish 817 * between the vectors. 818 */ 819 if (vmci_dev->exclusive_vectors) { 820 error = request_threaded_irq(pci_irq_vector(pdev, 1), NULL, 821 vmci_interrupt_bm, 0, 822 KBUILD_MODNAME, vmci_dev); 823 if (error) { 824 dev_err(&pdev->dev, 825 "Failed to allocate irq %u: %d\n", 826 pci_irq_vector(pdev, 1), error); 827 goto err_free_irq; 828 } 829 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { 830 error = request_threaded_irq(pci_irq_vector(pdev, 2), 831 NULL, 832 vmci_interrupt_dma_datagram, 833 0, KBUILD_MODNAME, 834 vmci_dev); 835 if (error) { 836 dev_err(&pdev->dev, 837 "Failed to allocate irq %u: %d\n", 838 pci_irq_vector(pdev, 2), error); 839 goto err_free_bm_irq; 840 } 841 } 842 } 843 844 dev_dbg(&pdev->dev, "Registered device\n"); 845 846 atomic_inc(&vmci_num_guest_devices); 847 848 /* Enable specific interrupt bits. */ 849 cmd = VMCI_IMR_DATAGRAM; 850 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) 851 cmd |= VMCI_IMR_NOTIFICATION; 852 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) 853 cmd |= VMCI_IMR_DMA_DATAGRAM; 854 vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR); 855 856 /* Enable interrupts. */ 857 vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR); 858 859 pci_set_drvdata(pdev, vmci_dev); 860 861 vmci_call_vsock_callback(false); 862 return 0; 863 864 err_free_bm_irq: 865 if (vmci_dev->exclusive_vectors) 866 free_irq(pci_irq_vector(pdev, 1), vmci_dev); 867 868 err_free_irq: 869 free_irq(pci_irq_vector(pdev, 0), vmci_dev); 870 871 err_disable_msi: 872 pci_free_irq_vectors(pdev); 873 874 err_unsubscribe_event: 875 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); 876 if (vmci_err < VMCI_SUCCESS) 877 dev_warn(&pdev->dev, 878 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", 879 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); 880 881 err_remove_vmci_dev_g: 882 spin_lock_irq(&vmci_dev_spinlock); 883 vmci_pdev = NULL; 884 vmci_dev_g = NULL; 885 spin_unlock_irq(&vmci_dev_spinlock); 886 887 err_free_notification_bitmap: 888 if (vmci_dev->notification_bitmap) { 889 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); 890 dma_free_coherent(&pdev->dev, PAGE_SIZE, 891 vmci_dev->notification_bitmap, 892 vmci_dev->notification_base); 893 } 894 895 err_free_data_buffers: 896 vmci_free_dg_buffers(vmci_dev); 897 898 err_unmap_mmio_base: 899 if (mmio_base != NULL) 900 pci_iounmap(pdev, mmio_base); 901 902 /* The rest are managed resources and will be freed by PCI core */ 903 return error; 904 } 905 906 static void vmci_guest_remove_device(struct pci_dev *pdev) 907 { 908 struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev); 909 int vmci_err; 910 911 dev_dbg(&pdev->dev, "Removing device\n"); 912 913 atomic_dec(&vmci_num_guest_devices); 914 915 vmci_qp_guest_endpoints_exit(); 916 917 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); 918 if (vmci_err < VMCI_SUCCESS) 919 dev_warn(&pdev->dev, 920 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", 921 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); 922 923 spin_lock_irq(&vmci_dev_spinlock); 924 vmci_dev_g = NULL; 925 vmci_pdev = NULL; 926 spin_unlock_irq(&vmci_dev_spinlock); 927 928 dev_dbg(&pdev->dev, "Resetting vmci device\n"); 929 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); 930 931 /* 932 * Free IRQ and then disable MSI/MSI-X as appropriate. For 933 * MSI-X, we might have multiple vectors, each with their own 934 * IRQ, which we must free too. 935 */ 936 if (vmci_dev->exclusive_vectors) { 937 free_irq(pci_irq_vector(pdev, 1), vmci_dev); 938 if (vmci_dev->mmio_base != NULL) 939 free_irq(pci_irq_vector(pdev, 2), vmci_dev); 940 } 941 free_irq(pci_irq_vector(pdev, 0), vmci_dev); 942 pci_free_irq_vectors(pdev); 943 944 if (vmci_dev->notification_bitmap) { 945 /* 946 * The device reset above cleared the bitmap state of the 947 * device, so we can safely free it here. 948 */ 949 950 dma_free_coherent(&pdev->dev, PAGE_SIZE, 951 vmci_dev->notification_bitmap, 952 vmci_dev->notification_base); 953 } 954 955 vmci_free_dg_buffers(vmci_dev); 956 957 if (vmci_dev->mmio_base != NULL) 958 pci_iounmap(pdev, vmci_dev->mmio_base); 959 960 /* The rest are managed resources and will be freed by PCI core */ 961 } 962 963 static const struct pci_device_id vmci_ids[] = { 964 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), }, 965 { 0 }, 966 }; 967 MODULE_DEVICE_TABLE(pci, vmci_ids); 968 969 static struct pci_driver vmci_guest_driver = { 970 .name = KBUILD_MODNAME, 971 .id_table = vmci_ids, 972 .probe = vmci_guest_probe_device, 973 .remove = vmci_guest_remove_device, 974 }; 975 976 int __init vmci_guest_init(void) 977 { 978 return pci_register_driver(&vmci_guest_driver); 979 } 980 981 void __exit vmci_guest_exit(void) 982 { 983 pci_unregister_driver(&vmci_guest_driver); 984 } 985