1 /* 2 * Inter-VM Shared Memory PCI device. 3 * 4 * Author: 5 * Cam Macdonell <cam@cs.ualberta.ca> 6 * 7 * Based On: cirrus_vga.c 8 * Copyright (c) 2004 Fabrice Bellard 9 * Copyright (c) 2004 Makoto Suzuki (suzu) 10 * 11 * and rtl8139.c 12 * Copyright (c) 2006 Igor Kovalenko 13 * 14 * This code is licensed under the GNU GPL v2. 15 * 16 * Contributions after 2012-01-13 are licensed under the terms of the 17 * GNU GPL, version 2 or (at your option) any later version. 18 */ 19 #include "qemu/osdep.h" 20 #include "qemu/units.h" 21 #include "qapi/error.h" 22 #include "qemu/cutils.h" 23 #include "hw/hw.h" 24 #include "hw/pci/pci.h" 25 #include "hw/pci/msi.h" 26 #include "hw/pci/msix.h" 27 #include "sysemu/kvm.h" 28 #include "migration/blocker.h" 29 #include "qemu/error-report.h" 30 #include "qemu/event_notifier.h" 31 #include "qom/object_interfaces.h" 32 #include "chardev/char-fe.h" 33 #include "sysemu/hostmem.h" 34 #include "sysemu/qtest.h" 35 #include "qapi/visitor.h" 36 37 #include "hw/misc/ivshmem.h" 38 39 #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET 40 #define PCI_DEVICE_ID_IVSHMEM 0x1110 41 42 #define IVSHMEM_MAX_PEERS UINT16_MAX 43 #define IVSHMEM_IOEVENTFD 0 44 #define IVSHMEM_MSI 1 45 46 #define IVSHMEM_REG_BAR_SIZE 0x100 47 48 #define IVSHMEM_DEBUG 0 49 #define IVSHMEM_DPRINTF(fmt, ...) \ 50 do { \ 51 if (IVSHMEM_DEBUG) { \ 52 printf("IVSHMEM: " fmt, ## __VA_ARGS__); \ 53 } \ 54 } while (0) 55 56 #define TYPE_IVSHMEM_COMMON "ivshmem-common" 57 #define IVSHMEM_COMMON(obj) \ 58 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON) 59 60 #define TYPE_IVSHMEM_PLAIN "ivshmem-plain" 61 #define IVSHMEM_PLAIN(obj) \ 62 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN) 63 64 #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell" 65 #define IVSHMEM_DOORBELL(obj) \ 66 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL) 67 68 #define TYPE_IVSHMEM "ivshmem" 69 #define IVSHMEM(obj) \ 70 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) 71 72 typedef struct Peer { 73 int nb_eventfds; 74 EventNotifier *eventfds; 75 } Peer; 76 77 typedef struct MSIVector { 78 PCIDevice *pdev; 79 int virq; 80 bool unmasked; 81 } MSIVector; 82 83 typedef struct IVShmemState { 84 /*< private >*/ 85 PCIDevice parent_obj; 86 /*< public >*/ 87 88 uint32_t features; 89 90 /* exactly one of these two may be set */ 91 HostMemoryBackend *hostmem; /* with interrupts */ 92 CharBackend server_chr; /* without interrupts */ 93 94 /* registers */ 95 uint32_t intrmask; 96 uint32_t intrstatus; 97 int vm_id; 98 99 /* BARs */ 100 MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */ 101 MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ 102 MemoryRegion server_bar2; /* used with server_chr */ 103 104 /* interrupt support */ 105 Peer *peers; 106 int nb_peers; /* space in @peers[] */ 107 uint32_t vectors; 108 MSIVector *msi_vectors; 109 uint64_t msg_buf; /* buffer for receiving server messages */ 110 int msg_buffered_bytes; /* #bytes in @msg_buf */ 111 112 /* migration stuff */ 113 OnOffAuto master; 114 Error *migration_blocker; 115 } IVShmemState; 116 117 /* registers for the Inter-VM shared memory device */ 118 enum ivshmem_registers { 119 INTRMASK = 0, 120 INTRSTATUS = 4, 121 IVPOSITION = 8, 122 DOORBELL = 12, 123 }; 124 125 static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, 126 unsigned int feature) { 127 return (ivs->features & (1 << feature)); 128 } 129 130 static inline bool ivshmem_is_master(IVShmemState *s) 131 { 132 assert(s->master != ON_OFF_AUTO_AUTO); 133 return s->master == ON_OFF_AUTO_ON; 134 } 135 136 static void ivshmem_update_irq(IVShmemState *s) 137 { 138 PCIDevice *d = PCI_DEVICE(s); 139 uint32_t isr = s->intrstatus & s->intrmask; 140 141 /* 142 * Do nothing unless the device actually uses INTx. Here's how 143 * the device variants signal interrupts, what they put in PCI 144 * config space: 145 * Device variant Interrupt Interrupt Pin MSI-X cap. 146 * ivshmem-plain none 0 no 147 * ivshmem-doorbell MSI-X 1 yes(1) 148 * ivshmem,msi=off INTx 1 no 149 * ivshmem,msi=on MSI-X 1(2) yes(1) 150 * (1) if guest enabled MSI-X 151 * (2) the device lies 152 * Leads to the condition for doing nothing: 153 */ 154 if (ivshmem_has_feature(s, IVSHMEM_MSI) 155 || !d->config[PCI_INTERRUPT_PIN]) { 156 return; 157 } 158 159 /* don't print ISR resets */ 160 if (isr) { 161 IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", 162 isr ? 1 : 0, s->intrstatus, s->intrmask); 163 } 164 165 pci_set_irq(d, isr != 0); 166 } 167 168 static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) 169 { 170 IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); 171 172 s->intrmask = val; 173 ivshmem_update_irq(s); 174 } 175 176 static uint32_t ivshmem_IntrMask_read(IVShmemState *s) 177 { 178 uint32_t ret = s->intrmask; 179 180 IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); 181 return ret; 182 } 183 184 static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) 185 { 186 IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); 187 188 s->intrstatus = val; 189 ivshmem_update_irq(s); 190 } 191 192 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) 193 { 194 uint32_t ret = s->intrstatus; 195 196 /* reading ISR clears all interrupts */ 197 s->intrstatus = 0; 198 ivshmem_update_irq(s); 199 return ret; 200 } 201 202 static void ivshmem_io_write(void *opaque, hwaddr addr, 203 uint64_t val, unsigned size) 204 { 205 IVShmemState *s = opaque; 206 207 uint16_t dest = val >> 16; 208 uint16_t vector = val & 0xff; 209 210 addr &= 0xfc; 211 212 IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); 213 switch (addr) 214 { 215 case INTRMASK: 216 ivshmem_IntrMask_write(s, val); 217 break; 218 219 case INTRSTATUS: 220 ivshmem_IntrStatus_write(s, val); 221 break; 222 223 case DOORBELL: 224 /* check that dest VM ID is reasonable */ 225 if (dest >= s->nb_peers) { 226 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); 227 break; 228 } 229 230 /* check doorbell range */ 231 if (vector < s->peers[dest].nb_eventfds) { 232 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); 233 event_notifier_set(&s->peers[dest].eventfds[vector]); 234 } else { 235 IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n", 236 vector, dest); 237 } 238 break; 239 default: 240 IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr); 241 } 242 } 243 244 static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, 245 unsigned size) 246 { 247 248 IVShmemState *s = opaque; 249 uint32_t ret; 250 251 switch (addr) 252 { 253 case INTRMASK: 254 ret = ivshmem_IntrMask_read(s); 255 break; 256 257 case INTRSTATUS: 258 ret = ivshmem_IntrStatus_read(s); 259 break; 260 261 case IVPOSITION: 262 ret = s->vm_id; 263 break; 264 265 default: 266 IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); 267 ret = 0; 268 } 269 270 return ret; 271 } 272 273 static const MemoryRegionOps ivshmem_mmio_ops = { 274 .read = ivshmem_io_read, 275 .write = ivshmem_io_write, 276 .endianness = DEVICE_NATIVE_ENDIAN, 277 .impl = { 278 .min_access_size = 4, 279 .max_access_size = 4, 280 }, 281 }; 282 283 static void ivshmem_vector_notify(void *opaque) 284 { 285 MSIVector *entry = opaque; 286 PCIDevice *pdev = entry->pdev; 287 IVShmemState *s = IVSHMEM_COMMON(pdev); 288 int vector = entry - s->msi_vectors; 289 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 290 291 if (!event_notifier_test_and_clear(n)) { 292 return; 293 } 294 295 IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); 296 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 297 if (msix_enabled(pdev)) { 298 msix_notify(pdev, vector); 299 } 300 } else { 301 ivshmem_IntrStatus_write(s, 1); 302 } 303 } 304 305 static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, 306 MSIMessage msg) 307 { 308 IVShmemState *s = IVSHMEM_COMMON(dev); 309 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 310 MSIVector *v = &s->msi_vectors[vector]; 311 int ret; 312 313 IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); 314 if (!v->pdev) { 315 error_report("ivshmem: vector %d route does not exist", vector); 316 return -EINVAL; 317 } 318 assert(!v->unmasked); 319 320 ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); 321 if (ret < 0) { 322 return ret; 323 } 324 kvm_irqchip_commit_routes(kvm_state); 325 326 ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); 327 if (ret < 0) { 328 return ret; 329 } 330 v->unmasked = true; 331 332 return 0; 333 } 334 335 static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) 336 { 337 IVShmemState *s = IVSHMEM_COMMON(dev); 338 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 339 MSIVector *v = &s->msi_vectors[vector]; 340 int ret; 341 342 IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); 343 if (!v->pdev) { 344 error_report("ivshmem: vector %d route does not exist", vector); 345 return; 346 } 347 assert(v->unmasked); 348 349 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq); 350 if (ret < 0) { 351 error_report("remove_irqfd_notifier_gsi failed"); 352 return; 353 } 354 v->unmasked = false; 355 } 356 357 static void ivshmem_vector_poll(PCIDevice *dev, 358 unsigned int vector_start, 359 unsigned int vector_end) 360 { 361 IVShmemState *s = IVSHMEM_COMMON(dev); 362 unsigned int vector; 363 364 IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end); 365 366 vector_end = MIN(vector_end, s->vectors); 367 368 for (vector = vector_start; vector < vector_end; vector++) { 369 EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector]; 370 371 if (!msix_is_masked(dev, vector)) { 372 continue; 373 } 374 375 if (event_notifier_test_and_clear(notifier)) { 376 msix_set_pending(dev, vector); 377 } 378 } 379 } 380 381 static void watch_vector_notifier(IVShmemState *s, EventNotifier *n, 382 int vector) 383 { 384 int eventfd = event_notifier_get_fd(n); 385 386 assert(!s->msi_vectors[vector].pdev); 387 s->msi_vectors[vector].pdev = PCI_DEVICE(s); 388 389 qemu_set_fd_handler(eventfd, ivshmem_vector_notify, 390 NULL, &s->msi_vectors[vector]); 391 } 392 393 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) 394 { 395 memory_region_add_eventfd(&s->ivshmem_mmio, 396 DOORBELL, 397 4, 398 true, 399 (posn << 16) | i, 400 &s->peers[posn].eventfds[i]); 401 } 402 403 static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) 404 { 405 memory_region_del_eventfd(&s->ivshmem_mmio, 406 DOORBELL, 407 4, 408 true, 409 (posn << 16) | i, 410 &s->peers[posn].eventfds[i]); 411 } 412 413 static void close_peer_eventfds(IVShmemState *s, int posn) 414 { 415 int i, n; 416 417 assert(posn >= 0 && posn < s->nb_peers); 418 n = s->peers[posn].nb_eventfds; 419 420 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 421 memory_region_transaction_begin(); 422 for (i = 0; i < n; i++) { 423 ivshmem_del_eventfd(s, posn, i); 424 } 425 memory_region_transaction_commit(); 426 } 427 428 for (i = 0; i < n; i++) { 429 event_notifier_cleanup(&s->peers[posn].eventfds[i]); 430 } 431 432 g_free(s->peers[posn].eventfds); 433 s->peers[posn].nb_eventfds = 0; 434 } 435 436 static void resize_peers(IVShmemState *s, int nb_peers) 437 { 438 int old_nb_peers = s->nb_peers; 439 int i; 440 441 assert(nb_peers > old_nb_peers); 442 IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers); 443 444 s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer)); 445 s->nb_peers = nb_peers; 446 447 for (i = old_nb_peers; i < nb_peers; i++) { 448 s->peers[i].eventfds = g_new0(EventNotifier, s->vectors); 449 s->peers[i].nb_eventfds = 0; 450 } 451 } 452 453 static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, 454 Error **errp) 455 { 456 PCIDevice *pdev = PCI_DEVICE(s); 457 int ret; 458 459 IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); 460 assert(!s->msi_vectors[vector].pdev); 461 462 ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev); 463 if (ret < 0) { 464 error_setg(errp, "kvm_irqchip_add_msi_route failed"); 465 return; 466 } 467 468 s->msi_vectors[vector].virq = ret; 469 s->msi_vectors[vector].pdev = pdev; 470 } 471 472 static void setup_interrupt(IVShmemState *s, int vector, Error **errp) 473 { 474 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 475 bool with_irqfd = kvm_msi_via_irqfd_enabled() && 476 ivshmem_has_feature(s, IVSHMEM_MSI); 477 PCIDevice *pdev = PCI_DEVICE(s); 478 Error *err = NULL; 479 480 IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); 481 482 if (!with_irqfd) { 483 IVSHMEM_DPRINTF("with eventfd\n"); 484 watch_vector_notifier(s, n, vector); 485 } else if (msix_enabled(pdev)) { 486 IVSHMEM_DPRINTF("with irqfd\n"); 487 ivshmem_add_kvm_msi_virq(s, vector, &err); 488 if (err) { 489 error_propagate(errp, err); 490 return; 491 } 492 493 if (!msix_is_masked(pdev, vector)) { 494 kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, 495 s->msi_vectors[vector].virq); 496 /* TODO handle error */ 497 } 498 } else { 499 /* it will be delayed until msix is enabled, in write_config */ 500 IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n"); 501 } 502 } 503 504 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) 505 { 506 Error *local_err = NULL; 507 struct stat buf; 508 size_t size; 509 510 if (s->ivshmem_bar2) { 511 error_setg(errp, "server sent unexpected shared memory message"); 512 close(fd); 513 return; 514 } 515 516 if (fstat(fd, &buf) < 0) { 517 error_setg_errno(errp, errno, 518 "can't determine size of shared memory sent by server"); 519 close(fd); 520 return; 521 } 522 523 size = buf.st_size; 524 525 /* mmap the region and map into the BAR2 */ 526 memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), 527 "ivshmem.bar2", size, true, fd, &local_err); 528 if (local_err) { 529 error_propagate(errp, local_err); 530 return; 531 } 532 533 s->ivshmem_bar2 = &s->server_bar2; 534 } 535 536 static void process_msg_disconnect(IVShmemState *s, uint16_t posn, 537 Error **errp) 538 { 539 IVSHMEM_DPRINTF("posn %d has gone away\n", posn); 540 if (posn >= s->nb_peers || posn == s->vm_id) { 541 error_setg(errp, "invalid peer %d", posn); 542 return; 543 } 544 close_peer_eventfds(s, posn); 545 } 546 547 static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, 548 Error **errp) 549 { 550 Peer *peer = &s->peers[posn]; 551 int vector; 552 553 /* 554 * The N-th connect message for this peer comes with the file 555 * descriptor for vector N-1. Count messages to find the vector. 556 */ 557 if (peer->nb_eventfds >= s->vectors) { 558 error_setg(errp, "Too many eventfd received, device has %d vectors", 559 s->vectors); 560 close(fd); 561 return; 562 } 563 vector = peer->nb_eventfds++; 564 565 IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); 566 event_notifier_init_fd(&peer->eventfds[vector], fd); 567 fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ 568 569 if (posn == s->vm_id) { 570 setup_interrupt(s, vector, errp); 571 /* TODO do we need to handle the error? */ 572 } 573 574 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 575 ivshmem_add_eventfd(s, posn, vector); 576 } 577 } 578 579 static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) 580 { 581 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 582 583 if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { 584 error_setg(errp, "server sent invalid message %" PRId64, msg); 585 close(fd); 586 return; 587 } 588 589 if (msg == -1) { 590 process_msg_shmem(s, fd, errp); 591 return; 592 } 593 594 if (msg >= s->nb_peers) { 595 resize_peers(s, msg + 1); 596 } 597 598 if (fd >= 0) { 599 process_msg_connect(s, msg, fd, errp); 600 } else { 601 process_msg_disconnect(s, msg, errp); 602 } 603 } 604 605 static int ivshmem_can_receive(void *opaque) 606 { 607 IVShmemState *s = opaque; 608 609 assert(s->msg_buffered_bytes < sizeof(s->msg_buf)); 610 return sizeof(s->msg_buf) - s->msg_buffered_bytes; 611 } 612 613 static void ivshmem_read(void *opaque, const uint8_t *buf, int size) 614 { 615 IVShmemState *s = opaque; 616 Error *err = NULL; 617 int fd; 618 int64_t msg; 619 620 assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf)); 621 memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size); 622 s->msg_buffered_bytes += size; 623 if (s->msg_buffered_bytes < sizeof(s->msg_buf)) { 624 return; 625 } 626 msg = le64_to_cpu(s->msg_buf); 627 s->msg_buffered_bytes = 0; 628 629 fd = qemu_chr_fe_get_msgfd(&s->server_chr); 630 631 process_msg(s, msg, fd, &err); 632 if (err) { 633 error_report_err(err); 634 } 635 } 636 637 static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp) 638 { 639 int64_t msg; 640 int n, ret; 641 642 n = 0; 643 do { 644 ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n, 645 sizeof(msg) - n); 646 if (ret < 0) { 647 if (ret == -EINTR) { 648 continue; 649 } 650 error_setg_errno(errp, -ret, "read from server failed"); 651 return INT64_MIN; 652 } 653 n += ret; 654 } while (n < sizeof(msg)); 655 656 *pfd = qemu_chr_fe_get_msgfd(&s->server_chr); 657 return le64_to_cpu(msg); 658 } 659 660 static void ivshmem_recv_setup(IVShmemState *s, Error **errp) 661 { 662 Error *err = NULL; 663 int64_t msg; 664 int fd; 665 666 msg = ivshmem_recv_msg(s, &fd, &err); 667 if (err) { 668 error_propagate(errp, err); 669 return; 670 } 671 if (msg != IVSHMEM_PROTOCOL_VERSION) { 672 error_setg(errp, "server sent version %" PRId64 ", expecting %d", 673 msg, IVSHMEM_PROTOCOL_VERSION); 674 return; 675 } 676 if (fd != -1) { 677 error_setg(errp, "server sent invalid version message"); 678 return; 679 } 680 681 /* 682 * ivshmem-server sends the remaining initial messages in a fixed 683 * order, but the device has always accepted them in any order. 684 * Stay as compatible as practical, just in case people use 685 * servers that behave differently. 686 */ 687 688 /* 689 * ivshmem_device_spec.txt has always required the ID message 690 * right here, and ivshmem-server has always complied. However, 691 * older versions of the device accepted it out of order, but 692 * broke when an interrupt setup message arrived before it. 693 */ 694 msg = ivshmem_recv_msg(s, &fd, &err); 695 if (err) { 696 error_propagate(errp, err); 697 return; 698 } 699 if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) { 700 error_setg(errp, "server sent invalid ID message"); 701 return; 702 } 703 s->vm_id = msg; 704 705 /* 706 * Receive more messages until we got shared memory. 707 */ 708 do { 709 msg = ivshmem_recv_msg(s, &fd, &err); 710 if (err) { 711 error_propagate(errp, err); 712 return; 713 } 714 process_msg(s, msg, fd, &err); 715 if (err) { 716 error_propagate(errp, err); 717 return; 718 } 719 } while (msg != -1); 720 721 /* 722 * This function must either map the shared memory or fail. The 723 * loop above ensures that: it terminates normally only after it 724 * successfully processed the server's shared memory message. 725 * Assert that actually mapped the shared memory: 726 */ 727 assert(s->ivshmem_bar2); 728 } 729 730 /* Select the MSI-X vectors used by device. 731 * ivshmem maps events to vectors statically, so 732 * we just enable all vectors on init and after reset. */ 733 static void ivshmem_msix_vector_use(IVShmemState *s) 734 { 735 PCIDevice *d = PCI_DEVICE(s); 736 int i; 737 738 for (i = 0; i < s->vectors; i++) { 739 msix_vector_use(d, i); 740 } 741 } 742 743 static void ivshmem_disable_irqfd(IVShmemState *s); 744 745 static void ivshmem_reset(DeviceState *d) 746 { 747 IVShmemState *s = IVSHMEM_COMMON(d); 748 749 ivshmem_disable_irqfd(s); 750 751 s->intrstatus = 0; 752 s->intrmask = 0; 753 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 754 ivshmem_msix_vector_use(s); 755 } 756 } 757 758 static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp) 759 { 760 /* allocate QEMU callback data for receiving interrupts */ 761 s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector)); 762 763 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 764 if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) { 765 return -1; 766 } 767 768 IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); 769 ivshmem_msix_vector_use(s); 770 } 771 772 return 0; 773 } 774 775 static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) 776 { 777 IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); 778 779 if (s->msi_vectors[vector].pdev == NULL) { 780 return; 781 } 782 783 /* it was cleaned when masked in the frontend. */ 784 kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); 785 786 s->msi_vectors[vector].pdev = NULL; 787 } 788 789 static void ivshmem_enable_irqfd(IVShmemState *s) 790 { 791 PCIDevice *pdev = PCI_DEVICE(s); 792 int i; 793 794 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 795 Error *err = NULL; 796 797 ivshmem_add_kvm_msi_virq(s, i, &err); 798 if (err) { 799 error_report_err(err); 800 goto undo; 801 } 802 } 803 804 if (msix_set_vector_notifiers(pdev, 805 ivshmem_vector_unmask, 806 ivshmem_vector_mask, 807 ivshmem_vector_poll)) { 808 error_report("ivshmem: msix_set_vector_notifiers failed"); 809 goto undo; 810 } 811 return; 812 813 undo: 814 while (--i >= 0) { 815 ivshmem_remove_kvm_msi_virq(s, i); 816 } 817 } 818 819 static void ivshmem_disable_irqfd(IVShmemState *s) 820 { 821 PCIDevice *pdev = PCI_DEVICE(s); 822 int i; 823 824 if (!pdev->msix_vector_use_notifier) { 825 return; 826 } 827 828 msix_unset_vector_notifiers(pdev); 829 830 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 831 /* 832 * MSI-X is already disabled here so msix_unset_vector_notifiers() 833 * didn't call our release notifier. Do it now to keep our masks and 834 * unmasks balanced. 835 */ 836 if (s->msi_vectors[i].unmasked) { 837 ivshmem_vector_mask(pdev, i); 838 } 839 ivshmem_remove_kvm_msi_virq(s, i); 840 } 841 842 } 843 844 static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, 845 uint32_t val, int len) 846 { 847 IVShmemState *s = IVSHMEM_COMMON(pdev); 848 int is_enabled, was_enabled = msix_enabled(pdev); 849 850 pci_default_write_config(pdev, address, val, len); 851 is_enabled = msix_enabled(pdev); 852 853 if (kvm_msi_via_irqfd_enabled()) { 854 if (!was_enabled && is_enabled) { 855 ivshmem_enable_irqfd(s); 856 } else if (was_enabled && !is_enabled) { 857 ivshmem_disable_irqfd(s); 858 } 859 } 860 } 861 862 static void ivshmem_common_realize(PCIDevice *dev, Error **errp) 863 { 864 IVShmemState *s = IVSHMEM_COMMON(dev); 865 Error *err = NULL; 866 uint8_t *pci_conf; 867 Error *local_err = NULL; 868 869 /* IRQFD requires MSI */ 870 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && 871 !ivshmem_has_feature(s, IVSHMEM_MSI)) { 872 error_setg(errp, "ioeventfd/irqfd requires MSI"); 873 return; 874 } 875 876 pci_conf = dev->config; 877 pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; 878 879 memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, 880 "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE); 881 882 /* region for registers*/ 883 pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, 884 &s->ivshmem_mmio); 885 886 if (s->hostmem != NULL) { 887 IVSHMEM_DPRINTF("using hostmem\n"); 888 889 s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem); 890 host_memory_backend_set_mapped(s->hostmem, true); 891 } else { 892 Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr); 893 assert(chr); 894 895 IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", 896 chr->filename); 897 898 /* we allocate enough space for 16 peers and grow as needed */ 899 resize_peers(s, 16); 900 901 /* 902 * Receive setup messages from server synchronously. 903 * Older versions did it asynchronously, but that creates a 904 * number of entertaining race conditions. 905 */ 906 ivshmem_recv_setup(s, &err); 907 if (err) { 908 error_propagate(errp, err); 909 return; 910 } 911 912 if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) { 913 error_setg(errp, 914 "master must connect to the server before any peers"); 915 return; 916 } 917 918 qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive, 919 ivshmem_read, NULL, NULL, s, NULL, true); 920 921 if (ivshmem_setup_interrupts(s, errp) < 0) { 922 error_prepend(errp, "Failed to initialize interrupts: "); 923 return; 924 } 925 } 926 927 if (s->master == ON_OFF_AUTO_AUTO) { 928 s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; 929 } 930 931 if (!ivshmem_is_master(s)) { 932 error_setg(&s->migration_blocker, 933 "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); 934 migrate_add_blocker(s->migration_blocker, &local_err); 935 if (local_err) { 936 error_propagate(errp, local_err); 937 error_free(s->migration_blocker); 938 return; 939 } 940 } 941 942 vmstate_register_ram(s->ivshmem_bar2, DEVICE(s)); 943 pci_register_bar(PCI_DEVICE(s), 2, 944 PCI_BASE_ADDRESS_SPACE_MEMORY | 945 PCI_BASE_ADDRESS_MEM_PREFETCH | 946 PCI_BASE_ADDRESS_MEM_TYPE_64, 947 s->ivshmem_bar2); 948 } 949 950 static void ivshmem_exit(PCIDevice *dev) 951 { 952 IVShmemState *s = IVSHMEM_COMMON(dev); 953 int i; 954 955 if (s->migration_blocker) { 956 migrate_del_blocker(s->migration_blocker); 957 error_free(s->migration_blocker); 958 } 959 960 if (memory_region_is_mapped(s->ivshmem_bar2)) { 961 if (!s->hostmem) { 962 void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2); 963 int fd; 964 965 if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) { 966 error_report("Failed to munmap shared memory %s", 967 strerror(errno)); 968 } 969 970 fd = memory_region_get_fd(s->ivshmem_bar2); 971 close(fd); 972 } 973 974 vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev)); 975 } 976 977 if (s->hostmem) { 978 host_memory_backend_set_mapped(s->hostmem, false); 979 } 980 981 if (s->peers) { 982 for (i = 0; i < s->nb_peers; i++) { 983 close_peer_eventfds(s, i); 984 } 985 g_free(s->peers); 986 } 987 988 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 989 msix_uninit_exclusive_bar(dev); 990 } 991 992 g_free(s->msi_vectors); 993 } 994 995 static int ivshmem_pre_load(void *opaque) 996 { 997 IVShmemState *s = opaque; 998 999 if (!ivshmem_is_master(s)) { 1000 error_report("'peer' devices are not migratable"); 1001 return -EINVAL; 1002 } 1003 1004 return 0; 1005 } 1006 1007 static int ivshmem_post_load(void *opaque, int version_id) 1008 { 1009 IVShmemState *s = opaque; 1010 1011 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 1012 ivshmem_msix_vector_use(s); 1013 } 1014 return 0; 1015 } 1016 1017 static void ivshmem_common_class_init(ObjectClass *klass, void *data) 1018 { 1019 DeviceClass *dc = DEVICE_CLASS(klass); 1020 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1021 1022 k->realize = ivshmem_common_realize; 1023 k->exit = ivshmem_exit; 1024 k->config_write = ivshmem_write_config; 1025 k->vendor_id = PCI_VENDOR_ID_IVSHMEM; 1026 k->device_id = PCI_DEVICE_ID_IVSHMEM; 1027 k->class_id = PCI_CLASS_MEMORY_RAM; 1028 k->revision = 1; 1029 dc->reset = ivshmem_reset; 1030 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1031 dc->desc = "Inter-VM shared memory"; 1032 } 1033 1034 static const TypeInfo ivshmem_common_info = { 1035 .name = TYPE_IVSHMEM_COMMON, 1036 .parent = TYPE_PCI_DEVICE, 1037 .instance_size = sizeof(IVShmemState), 1038 .abstract = true, 1039 .class_init = ivshmem_common_class_init, 1040 .interfaces = (InterfaceInfo[]) { 1041 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1042 { }, 1043 }, 1044 }; 1045 1046 static const VMStateDescription ivshmem_plain_vmsd = { 1047 .name = TYPE_IVSHMEM_PLAIN, 1048 .version_id = 0, 1049 .minimum_version_id = 0, 1050 .pre_load = ivshmem_pre_load, 1051 .post_load = ivshmem_post_load, 1052 .fields = (VMStateField[]) { 1053 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1054 VMSTATE_UINT32(intrstatus, IVShmemState), 1055 VMSTATE_UINT32(intrmask, IVShmemState), 1056 VMSTATE_END_OF_LIST() 1057 }, 1058 }; 1059 1060 static Property ivshmem_plain_properties[] = { 1061 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1062 DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND, 1063 HostMemoryBackend *), 1064 DEFINE_PROP_END_OF_LIST(), 1065 }; 1066 1067 static void ivshmem_plain_realize(PCIDevice *dev, Error **errp) 1068 { 1069 IVShmemState *s = IVSHMEM_COMMON(dev); 1070 1071 if (!s->hostmem) { 1072 error_setg(errp, "You must specify a 'memdev'"); 1073 return; 1074 } else if (host_memory_backend_is_mapped(s->hostmem)) { 1075 char *path = object_get_canonical_path_component(OBJECT(s->hostmem)); 1076 error_setg(errp, "can't use already busy memdev: %s", path); 1077 g_free(path); 1078 return; 1079 } 1080 1081 ivshmem_common_realize(dev, errp); 1082 } 1083 1084 static void ivshmem_plain_class_init(ObjectClass *klass, void *data) 1085 { 1086 DeviceClass *dc = DEVICE_CLASS(klass); 1087 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1088 1089 k->realize = ivshmem_plain_realize; 1090 dc->props = ivshmem_plain_properties; 1091 dc->vmsd = &ivshmem_plain_vmsd; 1092 } 1093 1094 static const TypeInfo ivshmem_plain_info = { 1095 .name = TYPE_IVSHMEM_PLAIN, 1096 .parent = TYPE_IVSHMEM_COMMON, 1097 .instance_size = sizeof(IVShmemState), 1098 .class_init = ivshmem_plain_class_init, 1099 }; 1100 1101 static const VMStateDescription ivshmem_doorbell_vmsd = { 1102 .name = TYPE_IVSHMEM_DOORBELL, 1103 .version_id = 0, 1104 .minimum_version_id = 0, 1105 .pre_load = ivshmem_pre_load, 1106 .post_load = ivshmem_post_load, 1107 .fields = (VMStateField[]) { 1108 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1109 VMSTATE_MSIX(parent_obj, IVShmemState), 1110 VMSTATE_UINT32(intrstatus, IVShmemState), 1111 VMSTATE_UINT32(intrmask, IVShmemState), 1112 VMSTATE_END_OF_LIST() 1113 }, 1114 }; 1115 1116 static Property ivshmem_doorbell_properties[] = { 1117 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1118 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1119 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1120 true), 1121 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1122 DEFINE_PROP_END_OF_LIST(), 1123 }; 1124 1125 static void ivshmem_doorbell_init(Object *obj) 1126 { 1127 IVShmemState *s = IVSHMEM_DOORBELL(obj); 1128 1129 s->features |= (1 << IVSHMEM_MSI); 1130 } 1131 1132 static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp) 1133 { 1134 IVShmemState *s = IVSHMEM_COMMON(dev); 1135 1136 if (!qemu_chr_fe_backend_connected(&s->server_chr)) { 1137 error_setg(errp, "You must specify a 'chardev'"); 1138 return; 1139 } 1140 1141 ivshmem_common_realize(dev, errp); 1142 } 1143 1144 static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) 1145 { 1146 DeviceClass *dc = DEVICE_CLASS(klass); 1147 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1148 1149 k->realize = ivshmem_doorbell_realize; 1150 dc->props = ivshmem_doorbell_properties; 1151 dc->vmsd = &ivshmem_doorbell_vmsd; 1152 } 1153 1154 static const TypeInfo ivshmem_doorbell_info = { 1155 .name = TYPE_IVSHMEM_DOORBELL, 1156 .parent = TYPE_IVSHMEM_COMMON, 1157 .instance_size = sizeof(IVShmemState), 1158 .instance_init = ivshmem_doorbell_init, 1159 .class_init = ivshmem_doorbell_class_init, 1160 }; 1161 1162 static void ivshmem_register_types(void) 1163 { 1164 type_register_static(&ivshmem_common_info); 1165 type_register_static(&ivshmem_plain_info); 1166 type_register_static(&ivshmem_doorbell_info); 1167 } 1168 1169 type_init(ivshmem_register_types) 1170