1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VMware VMCI Driver 4 * 5 * Copyright (C) 2012 VMware, Inc. All rights reserved. 6 */ 7 8 #include <linux/vmw_vmci_defs.h> 9 #include <linux/vmw_vmci_api.h> 10 #include <linux/moduleparam.h> 11 #include <linux/interrupt.h> 12 #include <linux/highmem.h> 13 #include <linux/kernel.h> 14 #include <linux/mm.h> 15 #include <linux/module.h> 16 #include <linux/processor.h> 17 #include <linux/sched.h> 18 #include <linux/slab.h> 19 #include <linux/init.h> 20 #include <linux/pci.h> 21 #include <linux/smp.h> 22 #include <linux/io.h> 23 #include <linux/vmalloc.h> 24 25 #include "vmci_datagram.h" 26 #include "vmci_doorbell.h" 27 #include "vmci_context.h" 28 #include "vmci_driver.h" 29 #include "vmci_event.h" 30 31 #define PCI_DEVICE_ID_VMWARE_VMCI 0x0740 32 33 #define VMCI_UTIL_NUM_RESOURCES 1 34 35 /* 36 * Datagram buffers for DMA send/receive must accommodate at least 37 * a maximum sized datagram and the header. 38 */ 39 #define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE) 40 41 static bool vmci_disable_msi; 42 module_param_named(disable_msi, vmci_disable_msi, bool, 0); 43 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); 44 45 static bool vmci_disable_msix; 46 module_param_named(disable_msix, vmci_disable_msix, bool, 0); 47 MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)"); 48 49 static u32 ctx_update_sub_id = VMCI_INVALID_ID; 50 static u32 vm_context_id = VMCI_INVALID_ID; 51 52 struct vmci_guest_device { 53 struct device *dev; /* PCI device we are attached to */ 54 void __iomem *iobase; 55 void __iomem *mmio_base; 56 57 bool exclusive_vectors; 58 59 struct tasklet_struct datagram_tasklet; 60 struct tasklet_struct bm_tasklet; 61 struct wait_queue_head inout_wq; 62 63 void *data_buffer; 64 dma_addr_t data_buffer_base; 65 void *tx_buffer; 66 dma_addr_t tx_buffer_base; 67 void *notification_bitmap; 68 dma_addr_t notification_base; 69 }; 70 71 static bool use_ppn64; 72 73 bool vmci_use_ppn64(void) 74 { 75 return use_ppn64; 76 } 77 78 /* vmci_dev singleton device and supporting data*/ 79 struct pci_dev *vmci_pdev; 80 static struct vmci_guest_device *vmci_dev_g; 81 static DEFINE_SPINLOCK(vmci_dev_spinlock); 82 83 static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0); 84 85 bool vmci_guest_code_active(void) 86 { 87 return atomic_read(&vmci_num_guest_devices) != 0; 88 } 89 90 u32 vmci_get_vm_context_id(void) 91 { 92 if (vm_context_id == VMCI_INVALID_ID) { 93 struct vmci_datagram get_cid_msg; 94 get_cid_msg.dst = 95 vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, 96 VMCI_GET_CONTEXT_ID); 97 get_cid_msg.src = VMCI_ANON_SRC_HANDLE; 98 get_cid_msg.payload_size = 0; 99 vm_context_id = vmci_send_datagram(&get_cid_msg); 100 } 101 return vm_context_id; 102 } 103 104 static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg) 105 { 106 if (dev->mmio_base != NULL) 107 return readl(dev->mmio_base + reg); 108 return ioread32(dev->iobase + reg); 109 } 110 111 static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg) 112 { 113 if (dev->mmio_base != NULL) 114 writel(val, dev->mmio_base + reg); 115 else 116 iowrite32(val, dev->iobase + reg); 117 } 118 119 static void vmci_read_data(struct vmci_guest_device *vmci_dev, 120 void *dest, size_t size) 121 { 122 if (vmci_dev->mmio_base == NULL) 123 ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, 124 dest, size); 125 else { 126 /* 127 * For DMA datagrams, the data_buffer will contain the header on the 128 * first page, followed by the incoming datagram(s) on the following 129 * pages. The header uses an S/G element immediately following the 130 * header on the first page to point to the data area. 131 */ 132 struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer; 133 struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1); 134 size_t buffer_offset = dest - vmci_dev->data_buffer; 135 136 buffer_header->opcode = 1; 137 buffer_header->size = 1; 138 buffer_header->busy = 0; 139 sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset; 140 sg_array[0].size = size; 141 142 vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base), 143 VMCI_DATA_IN_LOW_ADDR); 144 145 wait_event(vmci_dev->inout_wq, buffer_header->busy == 1); 146 } 147 } 148 149 static int vmci_write_data(struct vmci_guest_device *dev, 150 struct vmci_datagram *dg) 151 { 152 int result; 153 154 if (dev->mmio_base != NULL) { 155 struct vmci_data_in_out_header *buffer_header = dev->tx_buffer; 156 u8 *dg_out_buffer = (u8 *)(buffer_header + 1); 157 158 if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) 159 return VMCI_ERROR_INVALID_ARGS; 160 161 /* 162 * Initialize send buffer with outgoing datagram 163 * and set up header for inline data. Device will 164 * not access buffer asynchronously - only after 165 * the write to VMCI_DATA_OUT_LOW_ADDR. 166 */ 167 memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg)); 168 buffer_header->opcode = 0; 169 buffer_header->size = VMCI_DG_SIZE(dg); 170 buffer_header->busy = 1; 171 172 vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base), 173 VMCI_DATA_OUT_LOW_ADDR); 174 175 /* Caller holds a spinlock, so cannot block. */ 176 spin_until_cond(buffer_header->busy == 0); 177 178 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 179 if (result == VMCI_SUCCESS) 180 result = (int)buffer_header->result; 181 } else { 182 iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR, 183 dg, VMCI_DG_SIZE(dg)); 184 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 185 } 186 187 return result; 188 } 189 190 /* 191 * VM to hypervisor call mechanism. We use the standard VMware naming 192 * convention since shared code is calling this function as well. 193 */ 194 int vmci_send_datagram(struct vmci_datagram *dg) 195 { 196 unsigned long flags; 197 int result; 198 199 /* Check args. */ 200 if (dg == NULL) 201 return VMCI_ERROR_INVALID_ARGS; 202 203 /* 204 * Need to acquire spinlock on the device because the datagram 205 * data may be spread over multiple pages and the monitor may 206 * interleave device user rpc calls from multiple 207 * VCPUs. Acquiring the spinlock precludes that 208 * possibility. Disabling interrupts to avoid incoming 209 * datagrams during a "rep out" and possibly landing up in 210 * this function. 211 */ 212 spin_lock_irqsave(&vmci_dev_spinlock, flags); 213 214 if (vmci_dev_g) { 215 vmci_write_data(vmci_dev_g, dg); 216 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); 217 } else { 218 result = VMCI_ERROR_UNAVAILABLE; 219 } 220 221 spin_unlock_irqrestore(&vmci_dev_spinlock, flags); 222 223 return result; 224 } 225 EXPORT_SYMBOL_GPL(vmci_send_datagram); 226 227 /* 228 * Gets called with the new context id if updated or resumed. 229 * Context id. 230 */ 231 static void vmci_guest_cid_update(u32 sub_id, 232 const struct vmci_event_data *event_data, 233 void *client_data) 234 { 235 const struct vmci_event_payld_ctx *ev_payload = 236 vmci_event_data_const_payload(event_data); 237 238 if (sub_id != ctx_update_sub_id) { 239 pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id); 240 return; 241 } 242 243 if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) { 244 pr_devel("Invalid event data\n"); 245 return; 246 } 247 248 pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n", 249 vm_context_id, ev_payload->context_id, event_data->event); 250 251 vm_context_id = ev_payload->context_id; 252 } 253 254 /* 255 * Verify that the host supports the hypercalls we need. If it does not, 256 * try to find fallback hypercalls and use those instead. Returns 257 * true if required hypercalls (or fallback hypercalls) are 258 * supported by the host, false otherwise. 259 */ 260 static int vmci_check_host_caps(struct pci_dev *pdev) 261 { 262 bool result; 263 struct vmci_resource_query_msg *msg; 264 u32 msg_size = sizeof(struct vmci_resource_query_hdr) + 265 VMCI_UTIL_NUM_RESOURCES * sizeof(u32); 266 struct vmci_datagram *check_msg; 267 268 check_msg = kzalloc(msg_size, GFP_KERNEL); 269 if (!check_msg) { 270 dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__); 271 return -ENOMEM; 272 } 273 274 check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, 275 VMCI_RESOURCES_QUERY); 276 check_msg->src = VMCI_ANON_SRC_HANDLE; 277 check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE; 278 msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg); 279 280 msg->num_resources = VMCI_UTIL_NUM_RESOURCES; 281 msg->resources[0] = VMCI_GET_CONTEXT_ID; 282 283 /* Checks that hyper calls are supported */ 284 result = vmci_send_datagram(check_msg) == 0x01; 285 kfree(check_msg); 286 287 dev_dbg(&pdev->dev, "%s: Host capability check: %s\n", 288 __func__, result ? "PASSED" : "FAILED"); 289 290 /* We need the vector. There are no fallbacks. */ 291 return result ? 0 : -ENXIO; 292 } 293 294 /* 295 * Reads datagrams from the device and dispatches them. For IO port 296 * based access to the device, we always start reading datagrams into 297 * only the first page of the datagram buffer. If the datagrams don't 298 * fit into one page, we use the maximum datagram buffer size for the 299 * remainder of the invocation. This is a simple heuristic for not 300 * penalizing small datagrams. For DMA-based datagrams, we always 301 * use the maximum datagram buffer size, since there is no performance 302 * penalty for doing so. 303 * 304 * This function assumes that it has exclusive access to the data 305 * in register(s) for the duration of the call. 306 */ 307 static void vmci_dispatch_dgs(unsigned long data) 308 { 309 struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data; 310 u8 *dg_in_buffer = vmci_dev->data_buffer; 311 struct vmci_datagram *dg; 312 size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; 313 size_t current_dg_in_buffer_size; 314 size_t remaining_bytes; 315 bool is_io_port = vmci_dev->mmio_base == NULL; 316 317 BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); 318 319 if (!is_io_port) { 320 /* For mmio, the first page is used for the header. */ 321 dg_in_buffer += PAGE_SIZE; 322 323 /* 324 * For DMA-based datagram operations, there is no performance 325 * penalty for reading the maximum buffer size. 326 */ 327 current_dg_in_buffer_size = VMCI_MAX_DG_SIZE; 328 } else { 329 current_dg_in_buffer_size = PAGE_SIZE; 330 } 331 vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); 332 dg = (struct vmci_datagram *)dg_in_buffer; 333 remaining_bytes = current_dg_in_buffer_size; 334 335 /* 336 * Read through the buffer until an invalid datagram header is 337 * encountered. The exit condition for datagrams read through 338 * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram 339 * can start on any page boundary in the buffer. 340 */ 341 while (dg->dst.resource != VMCI_INVALID_ID || 342 (is_io_port && remaining_bytes > PAGE_SIZE)) { 343 unsigned dg_in_size; 344 345 /* 346 * If using VMCI_DATA_IN_ADDR, skip to the next page 347 * as a datagram can start on any page boundary. 348 */ 349 if (dg->dst.resource == VMCI_INVALID_ID) { 350 dg = (struct vmci_datagram *)roundup( 351 (uintptr_t)dg + 1, PAGE_SIZE); 352 remaining_bytes = 353 (size_t)(dg_in_buffer + 354 current_dg_in_buffer_size - 355 (u8 *)dg); 356 continue; 357 } 358 359 dg_in_size = VMCI_DG_SIZE_ALIGNED(dg); 360 361 if (dg_in_size <= dg_in_buffer_size) { 362 int result; 363 364 /* 365 * If the remaining bytes in the datagram 366 * buffer doesn't contain the complete 367 * datagram, we first make sure we have enough 368 * room for it and then we read the reminder 369 * of the datagram and possibly any following 370 * datagrams. 371 */ 372 if (dg_in_size > remaining_bytes) { 373 if (remaining_bytes != 374 current_dg_in_buffer_size) { 375 376 /* 377 * We move the partial 378 * datagram to the front and 379 * read the reminder of the 380 * datagram and possibly 381 * following calls into the 382 * following bytes. 383 */ 384 memmove(dg_in_buffer, dg_in_buffer + 385 current_dg_in_buffer_size - 386 remaining_bytes, 387 remaining_bytes); 388 dg = (struct vmci_datagram *) 389 dg_in_buffer; 390 } 391 392 if (current_dg_in_buffer_size != 393 dg_in_buffer_size) 394 current_dg_in_buffer_size = 395 dg_in_buffer_size; 396 397 vmci_read_data(vmci_dev, 398 dg_in_buffer + 399 remaining_bytes, 400 current_dg_in_buffer_size - 401 remaining_bytes); 402 } 403 404 /* 405 * We special case event datagrams from the 406 * hypervisor. 407 */ 408 if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && 409 dg->dst.resource == VMCI_EVENT_HANDLER) { 410 result = vmci_event_dispatch(dg); 411 } else { 412 result = vmci_datagram_invoke_guest_handler(dg); 413 } 414 if (result < VMCI_SUCCESS) 415 dev_dbg(vmci_dev->dev, 416 "Datagram with resource (ID=0x%x) failed (err=%d)\n", 417 dg->dst.resource, result); 418 419 /* On to the next datagram. */ 420 dg = (struct vmci_datagram *)((u8 *)dg + 421 dg_in_size); 422 } else { 423 size_t bytes_to_skip; 424 425 /* 426 * Datagram doesn't fit in datagram buffer of maximal 427 * size. We drop it. 428 */ 429 dev_dbg(vmci_dev->dev, 430 "Failed to receive datagram (size=%u bytes)\n", 431 dg_in_size); 432 433 bytes_to_skip = dg_in_size - remaining_bytes; 434 if (current_dg_in_buffer_size != dg_in_buffer_size) 435 current_dg_in_buffer_size = dg_in_buffer_size; 436 437 for (;;) { 438 vmci_read_data(vmci_dev, dg_in_buffer, 439 current_dg_in_buffer_size); 440 if (bytes_to_skip <= current_dg_in_buffer_size) 441 break; 442 443 bytes_to_skip -= current_dg_in_buffer_size; 444 } 445 dg = (struct vmci_datagram *)(dg_in_buffer + 446 bytes_to_skip); 447 } 448 449 remaining_bytes = 450 (size_t) (dg_in_buffer + current_dg_in_buffer_size - 451 (u8 *)dg); 452 453 if (remaining_bytes < VMCI_DG_HEADERSIZE) { 454 /* Get the next batch of datagrams. */ 455 456 vmci_read_data(vmci_dev, dg_in_buffer, 457 current_dg_in_buffer_size); 458 dg = (struct vmci_datagram *)dg_in_buffer; 459 remaining_bytes = current_dg_in_buffer_size; 460 } 461 } 462 } 463 464 /* 465 * Scans the notification bitmap for raised flags, clears them 466 * and handles the notifications. 467 */ 468 static void vmci_process_bitmap(unsigned long data) 469 { 470 struct vmci_guest_device *dev = (struct vmci_guest_device *)data; 471 472 if (!dev->notification_bitmap) { 473 dev_dbg(dev->dev, "No bitmap present in %s\n", __func__); 474 return; 475 } 476 477 vmci_dbell_scan_notification_entries(dev->notification_bitmap); 478 } 479 480 /* 481 * Interrupt handler for legacy or MSI interrupt, or for first MSI-X 482 * interrupt (vector VMCI_INTR_DATAGRAM). 483 */ 484 static irqreturn_t vmci_interrupt(int irq, void *_dev) 485 { 486 struct vmci_guest_device *dev = _dev; 487 488 /* 489 * If we are using MSI-X with exclusive vectors then we simply schedule 490 * the datagram tasklet, since we know the interrupt was meant for us. 491 * Otherwise we must read the ICR to determine what to do. 492 */ 493 494 if (dev->exclusive_vectors) { 495 tasklet_schedule(&dev->datagram_tasklet); 496 } else { 497 unsigned int icr; 498 499 /* Acknowledge interrupt and determine what needs doing. */ 500 icr = vmci_read_reg(dev, VMCI_ICR_ADDR); 501 if (icr == 0 || icr == ~0) 502 return IRQ_NONE; 503 504 if (icr & VMCI_ICR_DATAGRAM) { 505 tasklet_schedule(&dev->datagram_tasklet); 506 icr &= ~VMCI_ICR_DATAGRAM; 507 } 508 509 if (icr & VMCI_ICR_NOTIFICATION) { 510 tasklet_schedule(&dev->bm_tasklet); 511 icr &= ~VMCI_ICR_NOTIFICATION; 512 } 513 514 515 if (icr & VMCI_ICR_DMA_DATAGRAM) { 516 wake_up_all(&dev->inout_wq); 517 icr &= ~VMCI_ICR_DMA_DATAGRAM; 518 } 519 520 if (icr != 0) 521 dev_warn(dev->dev, 522 "Ignoring unknown interrupt cause (%d)\n", 523 icr); 524 } 525 526 return IRQ_HANDLED; 527 } 528 529 /* 530 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION, 531 * which is for the notification bitmap. Will only get called if we are 532 * using MSI-X with exclusive vectors. 533 */ 534 static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) 535 { 536 struct vmci_guest_device *dev = _dev; 537 538 /* For MSI-X we can just assume it was meant for us. */ 539 tasklet_schedule(&dev->bm_tasklet); 540 541 return IRQ_HANDLED; 542 } 543 544 /* 545 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM, 546 * which is for the completion of a DMA datagram send or receive operation. 547 * Will only get called if we are using MSI-X with exclusive vectors. 548 */ 549 static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev) 550 { 551 struct vmci_guest_device *dev = _dev; 552 553 wake_up_all(&dev->inout_wq); 554 555 return IRQ_HANDLED; 556 } 557 558 static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev) 559 { 560 if (vmci_dev->mmio_base != NULL) { 561 if (vmci_dev->tx_buffer != NULL) 562 dma_free_coherent(vmci_dev->dev, 563 VMCI_DMA_DG_BUFFER_SIZE, 564 vmci_dev->tx_buffer, 565 vmci_dev->tx_buffer_base); 566 if (vmci_dev->data_buffer != NULL) 567 dma_free_coherent(vmci_dev->dev, 568 VMCI_DMA_DG_BUFFER_SIZE, 569 vmci_dev->data_buffer, 570 vmci_dev->data_buffer_base); 571 } else { 572 vfree(vmci_dev->data_buffer); 573 } 574 } 575 576 /* 577 * Most of the initialization at module load time is done here. 578 */ 579 static int vmci_guest_probe_device(struct pci_dev *pdev, 580 const struct pci_device_id *id) 581 { 582 struct vmci_guest_device *vmci_dev; 583 void __iomem *iobase = NULL; 584 void __iomem *mmio_base = NULL; 585 unsigned int num_irq_vectors; 586 unsigned int capabilities; 587 unsigned int caps_in_use; 588 unsigned long cmd; 589 int vmci_err; 590 int error; 591 592 dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n"); 593 594 error = pcim_enable_device(pdev); 595 if (error) { 596 dev_err(&pdev->dev, 597 "Failed to enable VMCI device: %d\n", error); 598 return error; 599 } 600 601 /* 602 * The VMCI device with mmio access to registers requests 256KB 603 * for BAR1. If present, driver will use new VMCI device 604 * functionality for register access and datagram send/recv. 605 */ 606 607 if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) { 608 dev_info(&pdev->dev, "MMIO register access is available\n"); 609 mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET, 610 VMCI_MMIO_ACCESS_SIZE); 611 /* If the map fails, we fall back to IOIO access. */ 612 if (!mmio_base) 613 dev_warn(&pdev->dev, "Failed to map MMIO register access\n"); 614 } 615 616 if (!mmio_base) { 617 error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME); 618 if (error) { 619 dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); 620 return error; 621 } 622 iobase = pcim_iomap_table(pdev)[0]; 623 } 624 625 vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL); 626 if (!vmci_dev) { 627 dev_err(&pdev->dev, 628 "Can't allocate memory for VMCI device\n"); 629 return -ENOMEM; 630 } 631 632 vmci_dev->dev = &pdev->dev; 633 vmci_dev->exclusive_vectors = false; 634 vmci_dev->iobase = iobase; 635 vmci_dev->mmio_base = mmio_base; 636 637 tasklet_init(&vmci_dev->datagram_tasklet, 638 vmci_dispatch_dgs, (unsigned long)vmci_dev); 639 tasklet_init(&vmci_dev->bm_tasklet, 640 vmci_process_bitmap, (unsigned long)vmci_dev); 641 init_waitqueue_head(&vmci_dev->inout_wq); 642 643 if (mmio_base != NULL) { 644 vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, 645 &vmci_dev->tx_buffer_base, 646 GFP_KERNEL); 647 if (!vmci_dev->tx_buffer) { 648 dev_err(&pdev->dev, 649 "Can't allocate memory for datagram tx buffer\n"); 650 return -ENOMEM; 651 } 652 653 vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, 654 &vmci_dev->data_buffer_base, 655 GFP_KERNEL); 656 } else { 657 vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); 658 } 659 if (!vmci_dev->data_buffer) { 660 dev_err(&pdev->dev, 661 "Can't allocate memory for datagram buffer\n"); 662 error = -ENOMEM; 663 goto err_free_data_buffers; 664 } 665 666 pci_set_master(pdev); /* To enable queue_pair functionality. */ 667 668 /* 669 * Verify that the VMCI Device supports the capabilities that 670 * we need. If the device is missing capabilities that we would 671 * like to use, check for fallback capabilities and use those 672 * instead (so we can run a new VM on old hosts). Fail the load if 673 * a required capability is missing and there is no fallback. 674 * 675 * Right now, we need datagrams. There are no fallbacks. 676 */ 677 capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR); 678 if (!(capabilities & VMCI_CAPS_DATAGRAM)) { 679 dev_err(&pdev->dev, "Device does not support datagrams\n"); 680 error = -ENXIO; 681 goto err_free_data_buffers; 682 } 683 caps_in_use = VMCI_CAPS_DATAGRAM; 684 685 /* 686 * Use 64-bit PPNs if the device supports. 687 * 688 * There is no check for the return value of dma_set_mask_and_coherent 689 * since this driver can handle the default mask values if 690 * dma_set_mask_and_coherent fails. 691 */ 692 if (capabilities & VMCI_CAPS_PPN64) { 693 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 694 use_ppn64 = true; 695 caps_in_use |= VMCI_CAPS_PPN64; 696 } else { 697 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44)); 698 use_ppn64 = false; 699 } 700 701 /* 702 * If the hardware supports notifications, we will use that as 703 * well. 704 */ 705 if (capabilities & VMCI_CAPS_NOTIFICATIONS) { 706 vmci_dev->notification_bitmap = dma_alloc_coherent( 707 &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base, 708 GFP_KERNEL); 709 if (!vmci_dev->notification_bitmap) { 710 dev_warn(&pdev->dev, 711 "Unable to allocate notification bitmap\n"); 712 } else { 713 memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE); 714 caps_in_use |= VMCI_CAPS_NOTIFICATIONS; 715 } 716 } 717 718 if (mmio_base != NULL) { 719 if (capabilities & VMCI_CAPS_DMA_DATAGRAM) { 720 caps_in_use |= VMCI_CAPS_DMA_DATAGRAM; 721 } else { 722 dev_err(&pdev->dev, 723 "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n"); 724 error = -ENXIO; 725 goto err_free_data_buffers; 726 } 727 } 728 729 dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use); 730 731 /* Let the host know which capabilities we intend to use. */ 732 vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR); 733 734 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { 735 /* Let the device know the size for pages passed down. */ 736 vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT); 737 738 /* Configure the high order parts of the data in/out buffers. */ 739 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base), 740 VMCI_DATA_IN_HIGH_ADDR); 741 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base), 742 VMCI_DATA_OUT_HIGH_ADDR); 743 } 744 745 /* Set up global device so that we can start sending datagrams */ 746 spin_lock_irq(&vmci_dev_spinlock); 747 vmci_dev_g = vmci_dev; 748 vmci_pdev = pdev; 749 spin_unlock_irq(&vmci_dev_spinlock); 750 751 /* 752 * Register notification bitmap with device if that capability is 753 * used. 754 */ 755 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) { 756 unsigned long bitmap_ppn = 757 vmci_dev->notification_base >> PAGE_SHIFT; 758 if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) { 759 dev_warn(&pdev->dev, 760 "VMCI device unable to register notification bitmap with PPN 0x%lx\n", 761 bitmap_ppn); 762 error = -ENXIO; 763 goto err_remove_vmci_dev_g; 764 } 765 } 766 767 /* Check host capabilities. */ 768 error = vmci_check_host_caps(pdev); 769 if (error) 770 goto err_remove_bitmap; 771 772 /* Enable device. */ 773 774 /* 775 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can 776 * update the internal context id when needed. 777 */ 778 vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE, 779 vmci_guest_cid_update, NULL, 780 &ctx_update_sub_id); 781 if (vmci_err < VMCI_SUCCESS) 782 dev_warn(&pdev->dev, 783 "Failed to subscribe to event (type=%d): %d\n", 784 VMCI_EVENT_CTX_ID_UPDATE, vmci_err); 785 786 /* 787 * Enable interrupts. Try MSI-X first, then MSI, and then fallback on 788 * legacy interrupts. 789 */ 790 if (vmci_dev->mmio_base != NULL) 791 num_irq_vectors = VMCI_MAX_INTRS; 792 else 793 num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION; 794 error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors, 795 PCI_IRQ_MSIX); 796 if (error < 0) { 797 error = pci_alloc_irq_vectors(pdev, 1, 1, 798 PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY); 799 if (error < 0) 800 goto err_remove_bitmap; 801 } else { 802 vmci_dev->exclusive_vectors = true; 803 } 804 805 /* 806 * Request IRQ for legacy or MSI interrupts, or for first 807 * MSI-X vector. 808 */ 809 error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt, 810 IRQF_SHARED, KBUILD_MODNAME, vmci_dev); 811 if (error) { 812 dev_err(&pdev->dev, "Irq %u in use: %d\n", 813 pci_irq_vector(pdev, 0), error); 814 goto err_disable_msi; 815 } 816 817 /* 818 * For MSI-X with exclusive vectors we need to request an 819 * interrupt for each vector so that we get a separate 820 * interrupt handler routine. This allows us to distinguish 821 * between the vectors. 822 */ 823 if (vmci_dev->exclusive_vectors) { 824 error = request_irq(pci_irq_vector(pdev, 1), 825 vmci_interrupt_bm, 0, KBUILD_MODNAME, 826 vmci_dev); 827 if (error) { 828 dev_err(&pdev->dev, 829 "Failed to allocate irq %u: %d\n", 830 pci_irq_vector(pdev, 1), error); 831 goto err_free_irq; 832 } 833 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { 834 error = request_irq(pci_irq_vector(pdev, 2), 835 vmci_interrupt_dma_datagram, 836 0, KBUILD_MODNAME, vmci_dev); 837 if (error) { 838 dev_err(&pdev->dev, 839 "Failed to allocate irq %u: %d\n", 840 pci_irq_vector(pdev, 2), error); 841 goto err_free_bm_irq; 842 } 843 } 844 } 845 846 dev_dbg(&pdev->dev, "Registered device\n"); 847 848 atomic_inc(&vmci_num_guest_devices); 849 850 /* Enable specific interrupt bits. */ 851 cmd = VMCI_IMR_DATAGRAM; 852 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) 853 cmd |= VMCI_IMR_NOTIFICATION; 854 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) 855 cmd |= VMCI_IMR_DMA_DATAGRAM; 856 vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR); 857 858 /* Enable interrupts. */ 859 vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR); 860 861 pci_set_drvdata(pdev, vmci_dev); 862 863 vmci_call_vsock_callback(false); 864 return 0; 865 866 err_free_bm_irq: 867 free_irq(pci_irq_vector(pdev, 1), vmci_dev); 868 err_free_irq: 869 free_irq(pci_irq_vector(pdev, 0), vmci_dev); 870 tasklet_kill(&vmci_dev->datagram_tasklet); 871 tasklet_kill(&vmci_dev->bm_tasklet); 872 873 err_disable_msi: 874 pci_free_irq_vectors(pdev); 875 876 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); 877 if (vmci_err < VMCI_SUCCESS) 878 dev_warn(&pdev->dev, 879 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", 880 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); 881 882 err_remove_bitmap: 883 if (vmci_dev->notification_bitmap) { 884 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); 885 dma_free_coherent(&pdev->dev, PAGE_SIZE, 886 vmci_dev->notification_bitmap, 887 vmci_dev->notification_base); 888 } 889 890 err_remove_vmci_dev_g: 891 spin_lock_irq(&vmci_dev_spinlock); 892 vmci_pdev = NULL; 893 vmci_dev_g = NULL; 894 spin_unlock_irq(&vmci_dev_spinlock); 895 896 err_free_data_buffers: 897 vmci_free_dg_buffers(vmci_dev); 898 899 /* The rest are managed resources and will be freed by PCI core */ 900 return error; 901 } 902 903 static void vmci_guest_remove_device(struct pci_dev *pdev) 904 { 905 struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev); 906 int vmci_err; 907 908 dev_dbg(&pdev->dev, "Removing device\n"); 909 910 atomic_dec(&vmci_num_guest_devices); 911 912 vmci_qp_guest_endpoints_exit(); 913 914 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); 915 if (vmci_err < VMCI_SUCCESS) 916 dev_warn(&pdev->dev, 917 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", 918 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); 919 920 spin_lock_irq(&vmci_dev_spinlock); 921 vmci_dev_g = NULL; 922 vmci_pdev = NULL; 923 spin_unlock_irq(&vmci_dev_spinlock); 924 925 dev_dbg(&pdev->dev, "Resetting vmci device\n"); 926 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); 927 928 /* 929 * Free IRQ and then disable MSI/MSI-X as appropriate. For 930 * MSI-X, we might have multiple vectors, each with their own 931 * IRQ, which we must free too. 932 */ 933 if (vmci_dev->exclusive_vectors) { 934 free_irq(pci_irq_vector(pdev, 1), vmci_dev); 935 if (vmci_dev->mmio_base != NULL) 936 free_irq(pci_irq_vector(pdev, 2), vmci_dev); 937 } 938 free_irq(pci_irq_vector(pdev, 0), vmci_dev); 939 pci_free_irq_vectors(pdev); 940 941 tasklet_kill(&vmci_dev->datagram_tasklet); 942 tasklet_kill(&vmci_dev->bm_tasklet); 943 944 if (vmci_dev->notification_bitmap) { 945 /* 946 * The device reset above cleared the bitmap state of the 947 * device, so we can safely free it here. 948 */ 949 950 dma_free_coherent(&pdev->dev, PAGE_SIZE, 951 vmci_dev->notification_bitmap, 952 vmci_dev->notification_base); 953 } 954 955 vmci_free_dg_buffers(vmci_dev); 956 957 if (vmci_dev->mmio_base != NULL) 958 pci_iounmap(pdev, vmci_dev->mmio_base); 959 960 /* The rest are managed resources and will be freed by PCI core */ 961 } 962 963 static const struct pci_device_id vmci_ids[] = { 964 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), }, 965 { 0 }, 966 }; 967 MODULE_DEVICE_TABLE(pci, vmci_ids); 968 969 static struct pci_driver vmci_guest_driver = { 970 .name = KBUILD_MODNAME, 971 .id_table = vmci_ids, 972 .probe = vmci_guest_probe_device, 973 .remove = vmci_guest_remove_device, 974 }; 975 976 int __init vmci_guest_init(void) 977 { 978 return pci_register_driver(&vmci_guest_driver); 979 } 980 981 void __exit vmci_guest_exit(void) 982 { 983 pci_unregister_driver(&vmci_guest_driver); 984 } 985