1 /* 2 * Inter-VM Shared Memory PCI device. 3 * 4 * Author: 5 * Cam Macdonell <cam@cs.ualberta.ca> 6 * 7 * Based On: cirrus_vga.c 8 * Copyright (c) 2004 Fabrice Bellard 9 * Copyright (c) 2004 Makoto Suzuki (suzu) 10 * 11 * and rtl8139.c 12 * Copyright (c) 2006 Igor Kovalenko 13 * 14 * This code is licensed under the GNU GPL v2. 15 * 16 * Contributions after 2012-01-13 are licensed under the terms of the 17 * GNU GPL, version 2 or (at your option) any later version. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/units.h" 22 #include "qapi/error.h" 23 #include "qemu/cutils.h" 24 #include "hw/pci/pci.h" 25 #include "hw/qdev-properties.h" 26 #include "hw/pci/msi.h" 27 #include "hw/pci/msix.h" 28 #include "sysemu/kvm.h" 29 #include "migration/blocker.h" 30 #include "migration/vmstate.h" 31 #include "qemu/error-report.h" 32 #include "qemu/event_notifier.h" 33 #include "qemu/module.h" 34 #include "qom/object_interfaces.h" 35 #include "chardev/char-fe.h" 36 #include "sysemu/hostmem.h" 37 #include "sysemu/qtest.h" 38 #include "qapi/visitor.h" 39 40 #include "hw/misc/ivshmem.h" 41 42 #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET 43 #define PCI_DEVICE_ID_IVSHMEM 0x1110 44 45 #define IVSHMEM_MAX_PEERS UINT16_MAX 46 #define IVSHMEM_IOEVENTFD 0 47 #define IVSHMEM_MSI 1 48 49 #define IVSHMEM_REG_BAR_SIZE 0x100 50 51 #define IVSHMEM_DEBUG 0 52 #define IVSHMEM_DPRINTF(fmt, ...) \ 53 do { \ 54 if (IVSHMEM_DEBUG) { \ 55 printf("IVSHMEM: " fmt, ## __VA_ARGS__); \ 56 } \ 57 } while (0) 58 59 #define TYPE_IVSHMEM_COMMON "ivshmem-common" 60 #define IVSHMEM_COMMON(obj) \ 61 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON) 62 63 #define TYPE_IVSHMEM_PLAIN "ivshmem-plain" 64 #define IVSHMEM_PLAIN(obj) \ 65 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN) 66 67 #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell" 68 #define IVSHMEM_DOORBELL(obj) \ 69 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL) 70 71 #define TYPE_IVSHMEM "ivshmem" 72 #define IVSHMEM(obj) \ 73 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) 74 75 typedef struct Peer { 76 int nb_eventfds; 77 EventNotifier *eventfds; 78 } Peer; 79 80 typedef struct MSIVector { 81 PCIDevice *pdev; 82 int virq; 83 bool unmasked; 84 } MSIVector; 85 86 typedef struct IVShmemState { 87 /*< private >*/ 88 PCIDevice parent_obj; 89 /*< public >*/ 90 91 uint32_t features; 92 93 /* exactly one of these two may be set */ 94 HostMemoryBackend *hostmem; /* with interrupts */ 95 CharBackend server_chr; /* without interrupts */ 96 97 /* registers */ 98 uint32_t intrmask; 99 uint32_t intrstatus; 100 int vm_id; 101 102 /* BARs */ 103 MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */ 104 MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ 105 MemoryRegion server_bar2; /* used with server_chr */ 106 107 /* interrupt support */ 108 Peer *peers; 109 int nb_peers; /* space in @peers[] */ 110 uint32_t vectors; 111 MSIVector *msi_vectors; 112 uint64_t msg_buf; /* buffer for receiving server messages */ 113 int msg_buffered_bytes; /* #bytes in @msg_buf */ 114 115 /* migration stuff */ 116 OnOffAuto master; 117 Error *migration_blocker; 118 } IVShmemState; 119 120 /* registers for the Inter-VM shared memory device */ 121 enum ivshmem_registers { 122 INTRMASK = 0, 123 INTRSTATUS = 4, 124 IVPOSITION = 8, 125 DOORBELL = 12, 126 }; 127 128 static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, 129 unsigned int feature) { 130 return (ivs->features & (1 << feature)); 131 } 132 133 static inline bool ivshmem_is_master(IVShmemState *s) 134 { 135 assert(s->master != ON_OFF_AUTO_AUTO); 136 return s->master == ON_OFF_AUTO_ON; 137 } 138 139 static void ivshmem_update_irq(IVShmemState *s) 140 { 141 PCIDevice *d = PCI_DEVICE(s); 142 uint32_t isr = s->intrstatus & s->intrmask; 143 144 /* 145 * Do nothing unless the device actually uses INTx. Here's how 146 * the device variants signal interrupts, what they put in PCI 147 * config space: 148 * Device variant Interrupt Interrupt Pin MSI-X cap. 149 * ivshmem-plain none 0 no 150 * ivshmem-doorbell MSI-X 1 yes(1) 151 * ivshmem,msi=off INTx 1 no 152 * ivshmem,msi=on MSI-X 1(2) yes(1) 153 * (1) if guest enabled MSI-X 154 * (2) the device lies 155 * Leads to the condition for doing nothing: 156 */ 157 if (ivshmem_has_feature(s, IVSHMEM_MSI) 158 || !d->config[PCI_INTERRUPT_PIN]) { 159 return; 160 } 161 162 /* don't print ISR resets */ 163 if (isr) { 164 IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", 165 isr ? 1 : 0, s->intrstatus, s->intrmask); 166 } 167 168 pci_set_irq(d, isr != 0); 169 } 170 171 static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) 172 { 173 IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); 174 175 s->intrmask = val; 176 ivshmem_update_irq(s); 177 } 178 179 static uint32_t ivshmem_IntrMask_read(IVShmemState *s) 180 { 181 uint32_t ret = s->intrmask; 182 183 IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); 184 return ret; 185 } 186 187 static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) 188 { 189 IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); 190 191 s->intrstatus = val; 192 ivshmem_update_irq(s); 193 } 194 195 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) 196 { 197 uint32_t ret = s->intrstatus; 198 199 /* reading ISR clears all interrupts */ 200 s->intrstatus = 0; 201 ivshmem_update_irq(s); 202 return ret; 203 } 204 205 static void ivshmem_io_write(void *opaque, hwaddr addr, 206 uint64_t val, unsigned size) 207 { 208 IVShmemState *s = opaque; 209 210 uint16_t dest = val >> 16; 211 uint16_t vector = val & 0xff; 212 213 addr &= 0xfc; 214 215 IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); 216 switch (addr) 217 { 218 case INTRMASK: 219 ivshmem_IntrMask_write(s, val); 220 break; 221 222 case INTRSTATUS: 223 ivshmem_IntrStatus_write(s, val); 224 break; 225 226 case DOORBELL: 227 /* check that dest VM ID is reasonable */ 228 if (dest >= s->nb_peers) { 229 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); 230 break; 231 } 232 233 /* check doorbell range */ 234 if (vector < s->peers[dest].nb_eventfds) { 235 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); 236 event_notifier_set(&s->peers[dest].eventfds[vector]); 237 } else { 238 IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n", 239 vector, dest); 240 } 241 break; 242 default: 243 IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr); 244 } 245 } 246 247 static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, 248 unsigned size) 249 { 250 251 IVShmemState *s = opaque; 252 uint32_t ret; 253 254 switch (addr) 255 { 256 case INTRMASK: 257 ret = ivshmem_IntrMask_read(s); 258 break; 259 260 case INTRSTATUS: 261 ret = ivshmem_IntrStatus_read(s); 262 break; 263 264 case IVPOSITION: 265 ret = s->vm_id; 266 break; 267 268 default: 269 IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); 270 ret = 0; 271 } 272 273 return ret; 274 } 275 276 static const MemoryRegionOps ivshmem_mmio_ops = { 277 .read = ivshmem_io_read, 278 .write = ivshmem_io_write, 279 .endianness = DEVICE_NATIVE_ENDIAN, 280 .impl = { 281 .min_access_size = 4, 282 .max_access_size = 4, 283 }, 284 }; 285 286 static void ivshmem_vector_notify(void *opaque) 287 { 288 MSIVector *entry = opaque; 289 PCIDevice *pdev = entry->pdev; 290 IVShmemState *s = IVSHMEM_COMMON(pdev); 291 int vector = entry - s->msi_vectors; 292 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 293 294 if (!event_notifier_test_and_clear(n)) { 295 return; 296 } 297 298 IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); 299 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 300 if (msix_enabled(pdev)) { 301 msix_notify(pdev, vector); 302 } 303 } else { 304 ivshmem_IntrStatus_write(s, 1); 305 } 306 } 307 308 static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, 309 MSIMessage msg) 310 { 311 IVShmemState *s = IVSHMEM_COMMON(dev); 312 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 313 MSIVector *v = &s->msi_vectors[vector]; 314 int ret; 315 316 IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); 317 if (!v->pdev) { 318 error_report("ivshmem: vector %d route does not exist", vector); 319 return -EINVAL; 320 } 321 assert(!v->unmasked); 322 323 ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); 324 if (ret < 0) { 325 return ret; 326 } 327 kvm_irqchip_commit_routes(kvm_state); 328 329 ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); 330 if (ret < 0) { 331 return ret; 332 } 333 v->unmasked = true; 334 335 return 0; 336 } 337 338 static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) 339 { 340 IVShmemState *s = IVSHMEM_COMMON(dev); 341 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 342 MSIVector *v = &s->msi_vectors[vector]; 343 int ret; 344 345 IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); 346 if (!v->pdev) { 347 error_report("ivshmem: vector %d route does not exist", vector); 348 return; 349 } 350 assert(v->unmasked); 351 352 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq); 353 if (ret < 0) { 354 error_report("remove_irqfd_notifier_gsi failed"); 355 return; 356 } 357 v->unmasked = false; 358 } 359 360 static void ivshmem_vector_poll(PCIDevice *dev, 361 unsigned int vector_start, 362 unsigned int vector_end) 363 { 364 IVShmemState *s = IVSHMEM_COMMON(dev); 365 unsigned int vector; 366 367 IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end); 368 369 vector_end = MIN(vector_end, s->vectors); 370 371 for (vector = vector_start; vector < vector_end; vector++) { 372 EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector]; 373 374 if (!msix_is_masked(dev, vector)) { 375 continue; 376 } 377 378 if (event_notifier_test_and_clear(notifier)) { 379 msix_set_pending(dev, vector); 380 } 381 } 382 } 383 384 static void watch_vector_notifier(IVShmemState *s, EventNotifier *n, 385 int vector) 386 { 387 int eventfd = event_notifier_get_fd(n); 388 389 assert(!s->msi_vectors[vector].pdev); 390 s->msi_vectors[vector].pdev = PCI_DEVICE(s); 391 392 qemu_set_fd_handler(eventfd, ivshmem_vector_notify, 393 NULL, &s->msi_vectors[vector]); 394 } 395 396 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) 397 { 398 memory_region_add_eventfd(&s->ivshmem_mmio, 399 DOORBELL, 400 4, 401 true, 402 (posn << 16) | i, 403 &s->peers[posn].eventfds[i]); 404 } 405 406 static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) 407 { 408 memory_region_del_eventfd(&s->ivshmem_mmio, 409 DOORBELL, 410 4, 411 true, 412 (posn << 16) | i, 413 &s->peers[posn].eventfds[i]); 414 } 415 416 static void close_peer_eventfds(IVShmemState *s, int posn) 417 { 418 int i, n; 419 420 assert(posn >= 0 && posn < s->nb_peers); 421 n = s->peers[posn].nb_eventfds; 422 423 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 424 memory_region_transaction_begin(); 425 for (i = 0; i < n; i++) { 426 ivshmem_del_eventfd(s, posn, i); 427 } 428 memory_region_transaction_commit(); 429 } 430 431 for (i = 0; i < n; i++) { 432 event_notifier_cleanup(&s->peers[posn].eventfds[i]); 433 } 434 435 g_free(s->peers[posn].eventfds); 436 s->peers[posn].nb_eventfds = 0; 437 } 438 439 static void resize_peers(IVShmemState *s, int nb_peers) 440 { 441 int old_nb_peers = s->nb_peers; 442 int i; 443 444 assert(nb_peers > old_nb_peers); 445 IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers); 446 447 s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer)); 448 s->nb_peers = nb_peers; 449 450 for (i = old_nb_peers; i < nb_peers; i++) { 451 s->peers[i].eventfds = g_new0(EventNotifier, s->vectors); 452 s->peers[i].nb_eventfds = 0; 453 } 454 } 455 456 static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, 457 Error **errp) 458 { 459 PCIDevice *pdev = PCI_DEVICE(s); 460 int ret; 461 462 IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); 463 assert(!s->msi_vectors[vector].pdev); 464 465 ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev); 466 if (ret < 0) { 467 error_setg(errp, "kvm_irqchip_add_msi_route failed"); 468 return; 469 } 470 471 s->msi_vectors[vector].virq = ret; 472 s->msi_vectors[vector].pdev = pdev; 473 } 474 475 static void setup_interrupt(IVShmemState *s, int vector, Error **errp) 476 { 477 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 478 bool with_irqfd = kvm_msi_via_irqfd_enabled() && 479 ivshmem_has_feature(s, IVSHMEM_MSI); 480 PCIDevice *pdev = PCI_DEVICE(s); 481 Error *err = NULL; 482 483 IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); 484 485 if (!with_irqfd) { 486 IVSHMEM_DPRINTF("with eventfd\n"); 487 watch_vector_notifier(s, n, vector); 488 } else if (msix_enabled(pdev)) { 489 IVSHMEM_DPRINTF("with irqfd\n"); 490 ivshmem_add_kvm_msi_virq(s, vector, &err); 491 if (err) { 492 error_propagate(errp, err); 493 return; 494 } 495 496 if (!msix_is_masked(pdev, vector)) { 497 kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, 498 s->msi_vectors[vector].virq); 499 /* TODO handle error */ 500 } 501 } else { 502 /* it will be delayed until msix is enabled, in write_config */ 503 IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n"); 504 } 505 } 506 507 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) 508 { 509 Error *local_err = NULL; 510 struct stat buf; 511 size_t size; 512 513 if (s->ivshmem_bar2) { 514 error_setg(errp, "server sent unexpected shared memory message"); 515 close(fd); 516 return; 517 } 518 519 if (fstat(fd, &buf) < 0) { 520 error_setg_errno(errp, errno, 521 "can't determine size of shared memory sent by server"); 522 close(fd); 523 return; 524 } 525 526 size = buf.st_size; 527 528 /* mmap the region and map into the BAR2 */ 529 memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), 530 "ivshmem.bar2", size, true, fd, &local_err); 531 if (local_err) { 532 error_propagate(errp, local_err); 533 return; 534 } 535 536 s->ivshmem_bar2 = &s->server_bar2; 537 } 538 539 static void process_msg_disconnect(IVShmemState *s, uint16_t posn, 540 Error **errp) 541 { 542 IVSHMEM_DPRINTF("posn %d has gone away\n", posn); 543 if (posn >= s->nb_peers || posn == s->vm_id) { 544 error_setg(errp, "invalid peer %d", posn); 545 return; 546 } 547 close_peer_eventfds(s, posn); 548 } 549 550 static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, 551 Error **errp) 552 { 553 Peer *peer = &s->peers[posn]; 554 int vector; 555 556 /* 557 * The N-th connect message for this peer comes with the file 558 * descriptor for vector N-1. Count messages to find the vector. 559 */ 560 if (peer->nb_eventfds >= s->vectors) { 561 error_setg(errp, "Too many eventfd received, device has %d vectors", 562 s->vectors); 563 close(fd); 564 return; 565 } 566 vector = peer->nb_eventfds++; 567 568 IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); 569 event_notifier_init_fd(&peer->eventfds[vector], fd); 570 fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ 571 572 if (posn == s->vm_id) { 573 setup_interrupt(s, vector, errp); 574 /* TODO do we need to handle the error? */ 575 } 576 577 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 578 ivshmem_add_eventfd(s, posn, vector); 579 } 580 } 581 582 static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) 583 { 584 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 585 586 if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { 587 error_setg(errp, "server sent invalid message %" PRId64, msg); 588 close(fd); 589 return; 590 } 591 592 if (msg == -1) { 593 process_msg_shmem(s, fd, errp); 594 return; 595 } 596 597 if (msg >= s->nb_peers) { 598 resize_peers(s, msg + 1); 599 } 600 601 if (fd >= 0) { 602 process_msg_connect(s, msg, fd, errp); 603 } else { 604 process_msg_disconnect(s, msg, errp); 605 } 606 } 607 608 static int ivshmem_can_receive(void *opaque) 609 { 610 IVShmemState *s = opaque; 611 612 assert(s->msg_buffered_bytes < sizeof(s->msg_buf)); 613 return sizeof(s->msg_buf) - s->msg_buffered_bytes; 614 } 615 616 static void ivshmem_read(void *opaque, const uint8_t *buf, int size) 617 { 618 IVShmemState *s = opaque; 619 Error *err = NULL; 620 int fd; 621 int64_t msg; 622 623 assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf)); 624 memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size); 625 s->msg_buffered_bytes += size; 626 if (s->msg_buffered_bytes < sizeof(s->msg_buf)) { 627 return; 628 } 629 msg = le64_to_cpu(s->msg_buf); 630 s->msg_buffered_bytes = 0; 631 632 fd = qemu_chr_fe_get_msgfd(&s->server_chr); 633 634 process_msg(s, msg, fd, &err); 635 if (err) { 636 error_report_err(err); 637 } 638 } 639 640 static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp) 641 { 642 int64_t msg; 643 int n, ret; 644 645 n = 0; 646 do { 647 ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n, 648 sizeof(msg) - n); 649 if (ret < 0) { 650 if (ret == -EINTR) { 651 continue; 652 } 653 error_setg_errno(errp, -ret, "read from server failed"); 654 return INT64_MIN; 655 } 656 n += ret; 657 } while (n < sizeof(msg)); 658 659 *pfd = qemu_chr_fe_get_msgfd(&s->server_chr); 660 return le64_to_cpu(msg); 661 } 662 663 static void ivshmem_recv_setup(IVShmemState *s, Error **errp) 664 { 665 Error *err = NULL; 666 int64_t msg; 667 int fd; 668 669 msg = ivshmem_recv_msg(s, &fd, &err); 670 if (err) { 671 error_propagate(errp, err); 672 return; 673 } 674 if (msg != IVSHMEM_PROTOCOL_VERSION) { 675 error_setg(errp, "server sent version %" PRId64 ", expecting %d", 676 msg, IVSHMEM_PROTOCOL_VERSION); 677 return; 678 } 679 if (fd != -1) { 680 error_setg(errp, "server sent invalid version message"); 681 return; 682 } 683 684 /* 685 * ivshmem-server sends the remaining initial messages in a fixed 686 * order, but the device has always accepted them in any order. 687 * Stay as compatible as practical, just in case people use 688 * servers that behave differently. 689 */ 690 691 /* 692 * ivshmem_device_spec.txt has always required the ID message 693 * right here, and ivshmem-server has always complied. However, 694 * older versions of the device accepted it out of order, but 695 * broke when an interrupt setup message arrived before it. 696 */ 697 msg = ivshmem_recv_msg(s, &fd, &err); 698 if (err) { 699 error_propagate(errp, err); 700 return; 701 } 702 if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) { 703 error_setg(errp, "server sent invalid ID message"); 704 return; 705 } 706 s->vm_id = msg; 707 708 /* 709 * Receive more messages until we got shared memory. 710 */ 711 do { 712 msg = ivshmem_recv_msg(s, &fd, &err); 713 if (err) { 714 error_propagate(errp, err); 715 return; 716 } 717 process_msg(s, msg, fd, &err); 718 if (err) { 719 error_propagate(errp, err); 720 return; 721 } 722 } while (msg != -1); 723 724 /* 725 * This function must either map the shared memory or fail. The 726 * loop above ensures that: it terminates normally only after it 727 * successfully processed the server's shared memory message. 728 * Assert that actually mapped the shared memory: 729 */ 730 assert(s->ivshmem_bar2); 731 } 732 733 /* Select the MSI-X vectors used by device. 734 * ivshmem maps events to vectors statically, so 735 * we just enable all vectors on init and after reset. */ 736 static void ivshmem_msix_vector_use(IVShmemState *s) 737 { 738 PCIDevice *d = PCI_DEVICE(s); 739 int i; 740 741 for (i = 0; i < s->vectors; i++) { 742 msix_vector_use(d, i); 743 } 744 } 745 746 static void ivshmem_disable_irqfd(IVShmemState *s); 747 748 static void ivshmem_reset(DeviceState *d) 749 { 750 IVShmemState *s = IVSHMEM_COMMON(d); 751 752 ivshmem_disable_irqfd(s); 753 754 s->intrstatus = 0; 755 s->intrmask = 0; 756 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 757 ivshmem_msix_vector_use(s); 758 } 759 } 760 761 static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp) 762 { 763 /* allocate QEMU callback data for receiving interrupts */ 764 s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector)); 765 766 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 767 if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) { 768 return -1; 769 } 770 771 IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); 772 ivshmem_msix_vector_use(s); 773 } 774 775 return 0; 776 } 777 778 static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) 779 { 780 IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); 781 782 if (s->msi_vectors[vector].pdev == NULL) { 783 return; 784 } 785 786 /* it was cleaned when masked in the frontend. */ 787 kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); 788 789 s->msi_vectors[vector].pdev = NULL; 790 } 791 792 static void ivshmem_enable_irqfd(IVShmemState *s) 793 { 794 PCIDevice *pdev = PCI_DEVICE(s); 795 int i; 796 797 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 798 Error *err = NULL; 799 800 ivshmem_add_kvm_msi_virq(s, i, &err); 801 if (err) { 802 error_report_err(err); 803 goto undo; 804 } 805 } 806 807 if (msix_set_vector_notifiers(pdev, 808 ivshmem_vector_unmask, 809 ivshmem_vector_mask, 810 ivshmem_vector_poll)) { 811 error_report("ivshmem: msix_set_vector_notifiers failed"); 812 goto undo; 813 } 814 return; 815 816 undo: 817 while (--i >= 0) { 818 ivshmem_remove_kvm_msi_virq(s, i); 819 } 820 } 821 822 static void ivshmem_disable_irqfd(IVShmemState *s) 823 { 824 PCIDevice *pdev = PCI_DEVICE(s); 825 int i; 826 827 if (!pdev->msix_vector_use_notifier) { 828 return; 829 } 830 831 msix_unset_vector_notifiers(pdev); 832 833 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 834 /* 835 * MSI-X is already disabled here so msix_unset_vector_notifiers() 836 * didn't call our release notifier. Do it now to keep our masks and 837 * unmasks balanced. 838 */ 839 if (s->msi_vectors[i].unmasked) { 840 ivshmem_vector_mask(pdev, i); 841 } 842 ivshmem_remove_kvm_msi_virq(s, i); 843 } 844 845 } 846 847 static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, 848 uint32_t val, int len) 849 { 850 IVShmemState *s = IVSHMEM_COMMON(pdev); 851 int is_enabled, was_enabled = msix_enabled(pdev); 852 853 pci_default_write_config(pdev, address, val, len); 854 is_enabled = msix_enabled(pdev); 855 856 if (kvm_msi_via_irqfd_enabled()) { 857 if (!was_enabled && is_enabled) { 858 ivshmem_enable_irqfd(s); 859 } else if (was_enabled && !is_enabled) { 860 ivshmem_disable_irqfd(s); 861 } 862 } 863 } 864 865 static void ivshmem_common_realize(PCIDevice *dev, Error **errp) 866 { 867 IVShmemState *s = IVSHMEM_COMMON(dev); 868 Error *err = NULL; 869 uint8_t *pci_conf; 870 Error *local_err = NULL; 871 872 /* IRQFD requires MSI */ 873 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && 874 !ivshmem_has_feature(s, IVSHMEM_MSI)) { 875 error_setg(errp, "ioeventfd/irqfd requires MSI"); 876 return; 877 } 878 879 pci_conf = dev->config; 880 pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; 881 882 memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, 883 "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE); 884 885 /* region for registers*/ 886 pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, 887 &s->ivshmem_mmio); 888 889 if (s->hostmem != NULL) { 890 IVSHMEM_DPRINTF("using hostmem\n"); 891 892 s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem); 893 host_memory_backend_set_mapped(s->hostmem, true); 894 } else { 895 Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr); 896 assert(chr); 897 898 IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", 899 chr->filename); 900 901 /* we allocate enough space for 16 peers and grow as needed */ 902 resize_peers(s, 16); 903 904 /* 905 * Receive setup messages from server synchronously. 906 * Older versions did it asynchronously, but that creates a 907 * number of entertaining race conditions. 908 */ 909 ivshmem_recv_setup(s, &err); 910 if (err) { 911 error_propagate(errp, err); 912 return; 913 } 914 915 if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) { 916 error_setg(errp, 917 "master must connect to the server before any peers"); 918 return; 919 } 920 921 qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive, 922 ivshmem_read, NULL, NULL, s, NULL, true); 923 924 if (ivshmem_setup_interrupts(s, errp) < 0) { 925 error_prepend(errp, "Failed to initialize interrupts: "); 926 return; 927 } 928 } 929 930 if (s->master == ON_OFF_AUTO_AUTO) { 931 s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; 932 } 933 934 if (!ivshmem_is_master(s)) { 935 error_setg(&s->migration_blocker, 936 "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); 937 migrate_add_blocker(s->migration_blocker, &local_err); 938 if (local_err) { 939 error_propagate(errp, local_err); 940 error_free(s->migration_blocker); 941 return; 942 } 943 } 944 945 vmstate_register_ram(s->ivshmem_bar2, DEVICE(s)); 946 pci_register_bar(PCI_DEVICE(s), 2, 947 PCI_BASE_ADDRESS_SPACE_MEMORY | 948 PCI_BASE_ADDRESS_MEM_PREFETCH | 949 PCI_BASE_ADDRESS_MEM_TYPE_64, 950 s->ivshmem_bar2); 951 } 952 953 static void ivshmem_exit(PCIDevice *dev) 954 { 955 IVShmemState *s = IVSHMEM_COMMON(dev); 956 int i; 957 958 if (s->migration_blocker) { 959 migrate_del_blocker(s->migration_blocker); 960 error_free(s->migration_blocker); 961 } 962 963 if (memory_region_is_mapped(s->ivshmem_bar2)) { 964 if (!s->hostmem) { 965 void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2); 966 int fd; 967 968 if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) { 969 error_report("Failed to munmap shared memory %s", 970 strerror(errno)); 971 } 972 973 fd = memory_region_get_fd(s->ivshmem_bar2); 974 close(fd); 975 } 976 977 vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev)); 978 } 979 980 if (s->hostmem) { 981 host_memory_backend_set_mapped(s->hostmem, false); 982 } 983 984 if (s->peers) { 985 for (i = 0; i < s->nb_peers; i++) { 986 close_peer_eventfds(s, i); 987 } 988 g_free(s->peers); 989 } 990 991 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 992 msix_uninit_exclusive_bar(dev); 993 } 994 995 g_free(s->msi_vectors); 996 } 997 998 static int ivshmem_pre_load(void *opaque) 999 { 1000 IVShmemState *s = opaque; 1001 1002 if (!ivshmem_is_master(s)) { 1003 error_report("'peer' devices are not migratable"); 1004 return -EINVAL; 1005 } 1006 1007 return 0; 1008 } 1009 1010 static int ivshmem_post_load(void *opaque, int version_id) 1011 { 1012 IVShmemState *s = opaque; 1013 1014 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 1015 ivshmem_msix_vector_use(s); 1016 } 1017 return 0; 1018 } 1019 1020 static void ivshmem_common_class_init(ObjectClass *klass, void *data) 1021 { 1022 DeviceClass *dc = DEVICE_CLASS(klass); 1023 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1024 1025 k->realize = ivshmem_common_realize; 1026 k->exit = ivshmem_exit; 1027 k->config_write = ivshmem_write_config; 1028 k->vendor_id = PCI_VENDOR_ID_IVSHMEM; 1029 k->device_id = PCI_DEVICE_ID_IVSHMEM; 1030 k->class_id = PCI_CLASS_MEMORY_RAM; 1031 k->revision = 1; 1032 dc->reset = ivshmem_reset; 1033 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1034 dc->desc = "Inter-VM shared memory"; 1035 } 1036 1037 static const TypeInfo ivshmem_common_info = { 1038 .name = TYPE_IVSHMEM_COMMON, 1039 .parent = TYPE_PCI_DEVICE, 1040 .instance_size = sizeof(IVShmemState), 1041 .abstract = true, 1042 .class_init = ivshmem_common_class_init, 1043 .interfaces = (InterfaceInfo[]) { 1044 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1045 { }, 1046 }, 1047 }; 1048 1049 static const VMStateDescription ivshmem_plain_vmsd = { 1050 .name = TYPE_IVSHMEM_PLAIN, 1051 .version_id = 0, 1052 .minimum_version_id = 0, 1053 .pre_load = ivshmem_pre_load, 1054 .post_load = ivshmem_post_load, 1055 .fields = (VMStateField[]) { 1056 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1057 VMSTATE_UINT32(intrstatus, IVShmemState), 1058 VMSTATE_UINT32(intrmask, IVShmemState), 1059 VMSTATE_END_OF_LIST() 1060 }, 1061 }; 1062 1063 static Property ivshmem_plain_properties[] = { 1064 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1065 DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND, 1066 HostMemoryBackend *), 1067 DEFINE_PROP_END_OF_LIST(), 1068 }; 1069 1070 static void ivshmem_plain_realize(PCIDevice *dev, Error **errp) 1071 { 1072 IVShmemState *s = IVSHMEM_COMMON(dev); 1073 1074 if (!s->hostmem) { 1075 error_setg(errp, "You must specify a 'memdev'"); 1076 return; 1077 } else if (host_memory_backend_is_mapped(s->hostmem)) { 1078 char *path = object_get_canonical_path_component(OBJECT(s->hostmem)); 1079 error_setg(errp, "can't use already busy memdev: %s", path); 1080 g_free(path); 1081 return; 1082 } 1083 1084 ivshmem_common_realize(dev, errp); 1085 } 1086 1087 static void ivshmem_plain_class_init(ObjectClass *klass, void *data) 1088 { 1089 DeviceClass *dc = DEVICE_CLASS(klass); 1090 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1091 1092 k->realize = ivshmem_plain_realize; 1093 dc->props = ivshmem_plain_properties; 1094 dc->vmsd = &ivshmem_plain_vmsd; 1095 } 1096 1097 static const TypeInfo ivshmem_plain_info = { 1098 .name = TYPE_IVSHMEM_PLAIN, 1099 .parent = TYPE_IVSHMEM_COMMON, 1100 .instance_size = sizeof(IVShmemState), 1101 .class_init = ivshmem_plain_class_init, 1102 }; 1103 1104 static const VMStateDescription ivshmem_doorbell_vmsd = { 1105 .name = TYPE_IVSHMEM_DOORBELL, 1106 .version_id = 0, 1107 .minimum_version_id = 0, 1108 .pre_load = ivshmem_pre_load, 1109 .post_load = ivshmem_post_load, 1110 .fields = (VMStateField[]) { 1111 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1112 VMSTATE_MSIX(parent_obj, IVShmemState), 1113 VMSTATE_UINT32(intrstatus, IVShmemState), 1114 VMSTATE_UINT32(intrmask, IVShmemState), 1115 VMSTATE_END_OF_LIST() 1116 }, 1117 }; 1118 1119 static Property ivshmem_doorbell_properties[] = { 1120 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1121 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1122 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1123 true), 1124 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1125 DEFINE_PROP_END_OF_LIST(), 1126 }; 1127 1128 static void ivshmem_doorbell_init(Object *obj) 1129 { 1130 IVShmemState *s = IVSHMEM_DOORBELL(obj); 1131 1132 s->features |= (1 << IVSHMEM_MSI); 1133 } 1134 1135 static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp) 1136 { 1137 IVShmemState *s = IVSHMEM_COMMON(dev); 1138 1139 if (!qemu_chr_fe_backend_connected(&s->server_chr)) { 1140 error_setg(errp, "You must specify a 'chardev'"); 1141 return; 1142 } 1143 1144 ivshmem_common_realize(dev, errp); 1145 } 1146 1147 static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) 1148 { 1149 DeviceClass *dc = DEVICE_CLASS(klass); 1150 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1151 1152 k->realize = ivshmem_doorbell_realize; 1153 dc->props = ivshmem_doorbell_properties; 1154 dc->vmsd = &ivshmem_doorbell_vmsd; 1155 } 1156 1157 static const TypeInfo ivshmem_doorbell_info = { 1158 .name = TYPE_IVSHMEM_DOORBELL, 1159 .parent = TYPE_IVSHMEM_COMMON, 1160 .instance_size = sizeof(IVShmemState), 1161 .instance_init = ivshmem_doorbell_init, 1162 .class_init = ivshmem_doorbell_class_init, 1163 }; 1164 1165 static void ivshmem_register_types(void) 1166 { 1167 type_register_static(&ivshmem_common_info); 1168 type_register_static(&ivshmem_plain_info); 1169 type_register_static(&ivshmem_doorbell_info); 1170 } 1171 1172 type_init(ivshmem_register_types) 1173