1 /* 2 * Inter-VM Shared Memory PCI device. 3 * 4 * Author: 5 * Cam Macdonell <cam@cs.ualberta.ca> 6 * 7 * Based On: cirrus_vga.c 8 * Copyright (c) 2004 Fabrice Bellard 9 * Copyright (c) 2004 Makoto Suzuki (suzu) 10 * 11 * and rtl8139.c 12 * Copyright (c) 2006 Igor Kovalenko 13 * 14 * This code is licensed under the GNU GPL v2. 15 * 16 * Contributions after 2012-01-13 are licensed under the terms of the 17 * GNU GPL, version 2 or (at your option) any later version. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/units.h" 22 #include "qapi/error.h" 23 #include "qemu/cutils.h" 24 #include "hw/hw.h" 25 #include "hw/pci/pci.h" 26 #include "hw/pci/msi.h" 27 #include "hw/pci/msix.h" 28 #include "sysemu/kvm.h" 29 #include "migration/blocker.h" 30 #include "qemu/error-report.h" 31 #include "qemu/event_notifier.h" 32 #include "qemu/module.h" 33 #include "qom/object_interfaces.h" 34 #include "chardev/char-fe.h" 35 #include "sysemu/hostmem.h" 36 #include "sysemu/qtest.h" 37 #include "qapi/visitor.h" 38 39 #include "hw/misc/ivshmem.h" 40 41 #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET 42 #define PCI_DEVICE_ID_IVSHMEM 0x1110 43 44 #define IVSHMEM_MAX_PEERS UINT16_MAX 45 #define IVSHMEM_IOEVENTFD 0 46 #define IVSHMEM_MSI 1 47 48 #define IVSHMEM_REG_BAR_SIZE 0x100 49 50 #define IVSHMEM_DEBUG 0 51 #define IVSHMEM_DPRINTF(fmt, ...) \ 52 do { \ 53 if (IVSHMEM_DEBUG) { \ 54 printf("IVSHMEM: " fmt, ## __VA_ARGS__); \ 55 } \ 56 } while (0) 57 58 #define TYPE_IVSHMEM_COMMON "ivshmem-common" 59 #define IVSHMEM_COMMON(obj) \ 60 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON) 61 62 #define TYPE_IVSHMEM_PLAIN "ivshmem-plain" 63 #define IVSHMEM_PLAIN(obj) \ 64 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN) 65 66 #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell" 67 #define IVSHMEM_DOORBELL(obj) \ 68 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL) 69 70 #define TYPE_IVSHMEM "ivshmem" 71 #define IVSHMEM(obj) \ 72 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) 73 74 typedef struct Peer { 75 int nb_eventfds; 76 EventNotifier *eventfds; 77 } Peer; 78 79 typedef struct MSIVector { 80 PCIDevice *pdev; 81 int virq; 82 bool unmasked; 83 } MSIVector; 84 85 typedef struct IVShmemState { 86 /*< private >*/ 87 PCIDevice parent_obj; 88 /*< public >*/ 89 90 uint32_t features; 91 92 /* exactly one of these two may be set */ 93 HostMemoryBackend *hostmem; /* with interrupts */ 94 CharBackend server_chr; /* without interrupts */ 95 96 /* registers */ 97 uint32_t intrmask; 98 uint32_t intrstatus; 99 int vm_id; 100 101 /* BARs */ 102 MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */ 103 MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ 104 MemoryRegion server_bar2; /* used with server_chr */ 105 106 /* interrupt support */ 107 Peer *peers; 108 int nb_peers; /* space in @peers[] */ 109 uint32_t vectors; 110 MSIVector *msi_vectors; 111 uint64_t msg_buf; /* buffer for receiving server messages */ 112 int msg_buffered_bytes; /* #bytes in @msg_buf */ 113 114 /* migration stuff */ 115 OnOffAuto master; 116 Error *migration_blocker; 117 } IVShmemState; 118 119 /* registers for the Inter-VM shared memory device */ 120 enum ivshmem_registers { 121 INTRMASK = 0, 122 INTRSTATUS = 4, 123 IVPOSITION = 8, 124 DOORBELL = 12, 125 }; 126 127 static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, 128 unsigned int feature) { 129 return (ivs->features & (1 << feature)); 130 } 131 132 static inline bool ivshmem_is_master(IVShmemState *s) 133 { 134 assert(s->master != ON_OFF_AUTO_AUTO); 135 return s->master == ON_OFF_AUTO_ON; 136 } 137 138 static void ivshmem_update_irq(IVShmemState *s) 139 { 140 PCIDevice *d = PCI_DEVICE(s); 141 uint32_t isr = s->intrstatus & s->intrmask; 142 143 /* 144 * Do nothing unless the device actually uses INTx. Here's how 145 * the device variants signal interrupts, what they put in PCI 146 * config space: 147 * Device variant Interrupt Interrupt Pin MSI-X cap. 148 * ivshmem-plain none 0 no 149 * ivshmem-doorbell MSI-X 1 yes(1) 150 * ivshmem,msi=off INTx 1 no 151 * ivshmem,msi=on MSI-X 1(2) yes(1) 152 * (1) if guest enabled MSI-X 153 * (2) the device lies 154 * Leads to the condition for doing nothing: 155 */ 156 if (ivshmem_has_feature(s, IVSHMEM_MSI) 157 || !d->config[PCI_INTERRUPT_PIN]) { 158 return; 159 } 160 161 /* don't print ISR resets */ 162 if (isr) { 163 IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", 164 isr ? 1 : 0, s->intrstatus, s->intrmask); 165 } 166 167 pci_set_irq(d, isr != 0); 168 } 169 170 static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) 171 { 172 IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); 173 174 s->intrmask = val; 175 ivshmem_update_irq(s); 176 } 177 178 static uint32_t ivshmem_IntrMask_read(IVShmemState *s) 179 { 180 uint32_t ret = s->intrmask; 181 182 IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); 183 return ret; 184 } 185 186 static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) 187 { 188 IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); 189 190 s->intrstatus = val; 191 ivshmem_update_irq(s); 192 } 193 194 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) 195 { 196 uint32_t ret = s->intrstatus; 197 198 /* reading ISR clears all interrupts */ 199 s->intrstatus = 0; 200 ivshmem_update_irq(s); 201 return ret; 202 } 203 204 static void ivshmem_io_write(void *opaque, hwaddr addr, 205 uint64_t val, unsigned size) 206 { 207 IVShmemState *s = opaque; 208 209 uint16_t dest = val >> 16; 210 uint16_t vector = val & 0xff; 211 212 addr &= 0xfc; 213 214 IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); 215 switch (addr) 216 { 217 case INTRMASK: 218 ivshmem_IntrMask_write(s, val); 219 break; 220 221 case INTRSTATUS: 222 ivshmem_IntrStatus_write(s, val); 223 break; 224 225 case DOORBELL: 226 /* check that dest VM ID is reasonable */ 227 if (dest >= s->nb_peers) { 228 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); 229 break; 230 } 231 232 /* check doorbell range */ 233 if (vector < s->peers[dest].nb_eventfds) { 234 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); 235 event_notifier_set(&s->peers[dest].eventfds[vector]); 236 } else { 237 IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n", 238 vector, dest); 239 } 240 break; 241 default: 242 IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr); 243 } 244 } 245 246 static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, 247 unsigned size) 248 { 249 250 IVShmemState *s = opaque; 251 uint32_t ret; 252 253 switch (addr) 254 { 255 case INTRMASK: 256 ret = ivshmem_IntrMask_read(s); 257 break; 258 259 case INTRSTATUS: 260 ret = ivshmem_IntrStatus_read(s); 261 break; 262 263 case IVPOSITION: 264 ret = s->vm_id; 265 break; 266 267 default: 268 IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); 269 ret = 0; 270 } 271 272 return ret; 273 } 274 275 static const MemoryRegionOps ivshmem_mmio_ops = { 276 .read = ivshmem_io_read, 277 .write = ivshmem_io_write, 278 .endianness = DEVICE_NATIVE_ENDIAN, 279 .impl = { 280 .min_access_size = 4, 281 .max_access_size = 4, 282 }, 283 }; 284 285 static void ivshmem_vector_notify(void *opaque) 286 { 287 MSIVector *entry = opaque; 288 PCIDevice *pdev = entry->pdev; 289 IVShmemState *s = IVSHMEM_COMMON(pdev); 290 int vector = entry - s->msi_vectors; 291 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 292 293 if (!event_notifier_test_and_clear(n)) { 294 return; 295 } 296 297 IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); 298 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 299 if (msix_enabled(pdev)) { 300 msix_notify(pdev, vector); 301 } 302 } else { 303 ivshmem_IntrStatus_write(s, 1); 304 } 305 } 306 307 static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, 308 MSIMessage msg) 309 { 310 IVShmemState *s = IVSHMEM_COMMON(dev); 311 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 312 MSIVector *v = &s->msi_vectors[vector]; 313 int ret; 314 315 IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); 316 if (!v->pdev) { 317 error_report("ivshmem: vector %d route does not exist", vector); 318 return -EINVAL; 319 } 320 assert(!v->unmasked); 321 322 ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); 323 if (ret < 0) { 324 return ret; 325 } 326 kvm_irqchip_commit_routes(kvm_state); 327 328 ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); 329 if (ret < 0) { 330 return ret; 331 } 332 v->unmasked = true; 333 334 return 0; 335 } 336 337 static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) 338 { 339 IVShmemState *s = IVSHMEM_COMMON(dev); 340 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 341 MSIVector *v = &s->msi_vectors[vector]; 342 int ret; 343 344 IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); 345 if (!v->pdev) { 346 error_report("ivshmem: vector %d route does not exist", vector); 347 return; 348 } 349 assert(v->unmasked); 350 351 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq); 352 if (ret < 0) { 353 error_report("remove_irqfd_notifier_gsi failed"); 354 return; 355 } 356 v->unmasked = false; 357 } 358 359 static void ivshmem_vector_poll(PCIDevice *dev, 360 unsigned int vector_start, 361 unsigned int vector_end) 362 { 363 IVShmemState *s = IVSHMEM_COMMON(dev); 364 unsigned int vector; 365 366 IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end); 367 368 vector_end = MIN(vector_end, s->vectors); 369 370 for (vector = vector_start; vector < vector_end; vector++) { 371 EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector]; 372 373 if (!msix_is_masked(dev, vector)) { 374 continue; 375 } 376 377 if (event_notifier_test_and_clear(notifier)) { 378 msix_set_pending(dev, vector); 379 } 380 } 381 } 382 383 static void watch_vector_notifier(IVShmemState *s, EventNotifier *n, 384 int vector) 385 { 386 int eventfd = event_notifier_get_fd(n); 387 388 assert(!s->msi_vectors[vector].pdev); 389 s->msi_vectors[vector].pdev = PCI_DEVICE(s); 390 391 qemu_set_fd_handler(eventfd, ivshmem_vector_notify, 392 NULL, &s->msi_vectors[vector]); 393 } 394 395 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) 396 { 397 memory_region_add_eventfd(&s->ivshmem_mmio, 398 DOORBELL, 399 4, 400 true, 401 (posn << 16) | i, 402 &s->peers[posn].eventfds[i]); 403 } 404 405 static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) 406 { 407 memory_region_del_eventfd(&s->ivshmem_mmio, 408 DOORBELL, 409 4, 410 true, 411 (posn << 16) | i, 412 &s->peers[posn].eventfds[i]); 413 } 414 415 static void close_peer_eventfds(IVShmemState *s, int posn) 416 { 417 int i, n; 418 419 assert(posn >= 0 && posn < s->nb_peers); 420 n = s->peers[posn].nb_eventfds; 421 422 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 423 memory_region_transaction_begin(); 424 for (i = 0; i < n; i++) { 425 ivshmem_del_eventfd(s, posn, i); 426 } 427 memory_region_transaction_commit(); 428 } 429 430 for (i = 0; i < n; i++) { 431 event_notifier_cleanup(&s->peers[posn].eventfds[i]); 432 } 433 434 g_free(s->peers[posn].eventfds); 435 s->peers[posn].nb_eventfds = 0; 436 } 437 438 static void resize_peers(IVShmemState *s, int nb_peers) 439 { 440 int old_nb_peers = s->nb_peers; 441 int i; 442 443 assert(nb_peers > old_nb_peers); 444 IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers); 445 446 s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer)); 447 s->nb_peers = nb_peers; 448 449 for (i = old_nb_peers; i < nb_peers; i++) { 450 s->peers[i].eventfds = g_new0(EventNotifier, s->vectors); 451 s->peers[i].nb_eventfds = 0; 452 } 453 } 454 455 static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, 456 Error **errp) 457 { 458 PCIDevice *pdev = PCI_DEVICE(s); 459 int ret; 460 461 IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); 462 assert(!s->msi_vectors[vector].pdev); 463 464 ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev); 465 if (ret < 0) { 466 error_setg(errp, "kvm_irqchip_add_msi_route failed"); 467 return; 468 } 469 470 s->msi_vectors[vector].virq = ret; 471 s->msi_vectors[vector].pdev = pdev; 472 } 473 474 static void setup_interrupt(IVShmemState *s, int vector, Error **errp) 475 { 476 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 477 bool with_irqfd = kvm_msi_via_irqfd_enabled() && 478 ivshmem_has_feature(s, IVSHMEM_MSI); 479 PCIDevice *pdev = PCI_DEVICE(s); 480 Error *err = NULL; 481 482 IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); 483 484 if (!with_irqfd) { 485 IVSHMEM_DPRINTF("with eventfd\n"); 486 watch_vector_notifier(s, n, vector); 487 } else if (msix_enabled(pdev)) { 488 IVSHMEM_DPRINTF("with irqfd\n"); 489 ivshmem_add_kvm_msi_virq(s, vector, &err); 490 if (err) { 491 error_propagate(errp, err); 492 return; 493 } 494 495 if (!msix_is_masked(pdev, vector)) { 496 kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, 497 s->msi_vectors[vector].virq); 498 /* TODO handle error */ 499 } 500 } else { 501 /* it will be delayed until msix is enabled, in write_config */ 502 IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n"); 503 } 504 } 505 506 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) 507 { 508 Error *local_err = NULL; 509 struct stat buf; 510 size_t size; 511 512 if (s->ivshmem_bar2) { 513 error_setg(errp, "server sent unexpected shared memory message"); 514 close(fd); 515 return; 516 } 517 518 if (fstat(fd, &buf) < 0) { 519 error_setg_errno(errp, errno, 520 "can't determine size of shared memory sent by server"); 521 close(fd); 522 return; 523 } 524 525 size = buf.st_size; 526 527 /* mmap the region and map into the BAR2 */ 528 memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), 529 "ivshmem.bar2", size, true, fd, &local_err); 530 if (local_err) { 531 error_propagate(errp, local_err); 532 return; 533 } 534 535 s->ivshmem_bar2 = &s->server_bar2; 536 } 537 538 static void process_msg_disconnect(IVShmemState *s, uint16_t posn, 539 Error **errp) 540 { 541 IVSHMEM_DPRINTF("posn %d has gone away\n", posn); 542 if (posn >= s->nb_peers || posn == s->vm_id) { 543 error_setg(errp, "invalid peer %d", posn); 544 return; 545 } 546 close_peer_eventfds(s, posn); 547 } 548 549 static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, 550 Error **errp) 551 { 552 Peer *peer = &s->peers[posn]; 553 int vector; 554 555 /* 556 * The N-th connect message for this peer comes with the file 557 * descriptor for vector N-1. Count messages to find the vector. 558 */ 559 if (peer->nb_eventfds >= s->vectors) { 560 error_setg(errp, "Too many eventfd received, device has %d vectors", 561 s->vectors); 562 close(fd); 563 return; 564 } 565 vector = peer->nb_eventfds++; 566 567 IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); 568 event_notifier_init_fd(&peer->eventfds[vector], fd); 569 fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ 570 571 if (posn == s->vm_id) { 572 setup_interrupt(s, vector, errp); 573 /* TODO do we need to handle the error? */ 574 } 575 576 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 577 ivshmem_add_eventfd(s, posn, vector); 578 } 579 } 580 581 static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) 582 { 583 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 584 585 if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { 586 error_setg(errp, "server sent invalid message %" PRId64, msg); 587 close(fd); 588 return; 589 } 590 591 if (msg == -1) { 592 process_msg_shmem(s, fd, errp); 593 return; 594 } 595 596 if (msg >= s->nb_peers) { 597 resize_peers(s, msg + 1); 598 } 599 600 if (fd >= 0) { 601 process_msg_connect(s, msg, fd, errp); 602 } else { 603 process_msg_disconnect(s, msg, errp); 604 } 605 } 606 607 static int ivshmem_can_receive(void *opaque) 608 { 609 IVShmemState *s = opaque; 610 611 assert(s->msg_buffered_bytes < sizeof(s->msg_buf)); 612 return sizeof(s->msg_buf) - s->msg_buffered_bytes; 613 } 614 615 static void ivshmem_read(void *opaque, const uint8_t *buf, int size) 616 { 617 IVShmemState *s = opaque; 618 Error *err = NULL; 619 int fd; 620 int64_t msg; 621 622 assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf)); 623 memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size); 624 s->msg_buffered_bytes += size; 625 if (s->msg_buffered_bytes < sizeof(s->msg_buf)) { 626 return; 627 } 628 msg = le64_to_cpu(s->msg_buf); 629 s->msg_buffered_bytes = 0; 630 631 fd = qemu_chr_fe_get_msgfd(&s->server_chr); 632 633 process_msg(s, msg, fd, &err); 634 if (err) { 635 error_report_err(err); 636 } 637 } 638 639 static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp) 640 { 641 int64_t msg; 642 int n, ret; 643 644 n = 0; 645 do { 646 ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n, 647 sizeof(msg) - n); 648 if (ret < 0) { 649 if (ret == -EINTR) { 650 continue; 651 } 652 error_setg_errno(errp, -ret, "read from server failed"); 653 return INT64_MIN; 654 } 655 n += ret; 656 } while (n < sizeof(msg)); 657 658 *pfd = qemu_chr_fe_get_msgfd(&s->server_chr); 659 return le64_to_cpu(msg); 660 } 661 662 static void ivshmem_recv_setup(IVShmemState *s, Error **errp) 663 { 664 Error *err = NULL; 665 int64_t msg; 666 int fd; 667 668 msg = ivshmem_recv_msg(s, &fd, &err); 669 if (err) { 670 error_propagate(errp, err); 671 return; 672 } 673 if (msg != IVSHMEM_PROTOCOL_VERSION) { 674 error_setg(errp, "server sent version %" PRId64 ", expecting %d", 675 msg, IVSHMEM_PROTOCOL_VERSION); 676 return; 677 } 678 if (fd != -1) { 679 error_setg(errp, "server sent invalid version message"); 680 return; 681 } 682 683 /* 684 * ivshmem-server sends the remaining initial messages in a fixed 685 * order, but the device has always accepted them in any order. 686 * Stay as compatible as practical, just in case people use 687 * servers that behave differently. 688 */ 689 690 /* 691 * ivshmem_device_spec.txt has always required the ID message 692 * right here, and ivshmem-server has always complied. However, 693 * older versions of the device accepted it out of order, but 694 * broke when an interrupt setup message arrived before it. 695 */ 696 msg = ivshmem_recv_msg(s, &fd, &err); 697 if (err) { 698 error_propagate(errp, err); 699 return; 700 } 701 if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) { 702 error_setg(errp, "server sent invalid ID message"); 703 return; 704 } 705 s->vm_id = msg; 706 707 /* 708 * Receive more messages until we got shared memory. 709 */ 710 do { 711 msg = ivshmem_recv_msg(s, &fd, &err); 712 if (err) { 713 error_propagate(errp, err); 714 return; 715 } 716 process_msg(s, msg, fd, &err); 717 if (err) { 718 error_propagate(errp, err); 719 return; 720 } 721 } while (msg != -1); 722 723 /* 724 * This function must either map the shared memory or fail. The 725 * loop above ensures that: it terminates normally only after it 726 * successfully processed the server's shared memory message. 727 * Assert that actually mapped the shared memory: 728 */ 729 assert(s->ivshmem_bar2); 730 } 731 732 /* Select the MSI-X vectors used by device. 733 * ivshmem maps events to vectors statically, so 734 * we just enable all vectors on init and after reset. */ 735 static void ivshmem_msix_vector_use(IVShmemState *s) 736 { 737 PCIDevice *d = PCI_DEVICE(s); 738 int i; 739 740 for (i = 0; i < s->vectors; i++) { 741 msix_vector_use(d, i); 742 } 743 } 744 745 static void ivshmem_disable_irqfd(IVShmemState *s); 746 747 static void ivshmem_reset(DeviceState *d) 748 { 749 IVShmemState *s = IVSHMEM_COMMON(d); 750 751 ivshmem_disable_irqfd(s); 752 753 s->intrstatus = 0; 754 s->intrmask = 0; 755 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 756 ivshmem_msix_vector_use(s); 757 } 758 } 759 760 static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp) 761 { 762 /* allocate QEMU callback data for receiving interrupts */ 763 s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector)); 764 765 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 766 if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) { 767 return -1; 768 } 769 770 IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); 771 ivshmem_msix_vector_use(s); 772 } 773 774 return 0; 775 } 776 777 static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) 778 { 779 IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); 780 781 if (s->msi_vectors[vector].pdev == NULL) { 782 return; 783 } 784 785 /* it was cleaned when masked in the frontend. */ 786 kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); 787 788 s->msi_vectors[vector].pdev = NULL; 789 } 790 791 static void ivshmem_enable_irqfd(IVShmemState *s) 792 { 793 PCIDevice *pdev = PCI_DEVICE(s); 794 int i; 795 796 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 797 Error *err = NULL; 798 799 ivshmem_add_kvm_msi_virq(s, i, &err); 800 if (err) { 801 error_report_err(err); 802 goto undo; 803 } 804 } 805 806 if (msix_set_vector_notifiers(pdev, 807 ivshmem_vector_unmask, 808 ivshmem_vector_mask, 809 ivshmem_vector_poll)) { 810 error_report("ivshmem: msix_set_vector_notifiers failed"); 811 goto undo; 812 } 813 return; 814 815 undo: 816 while (--i >= 0) { 817 ivshmem_remove_kvm_msi_virq(s, i); 818 } 819 } 820 821 static void ivshmem_disable_irqfd(IVShmemState *s) 822 { 823 PCIDevice *pdev = PCI_DEVICE(s); 824 int i; 825 826 if (!pdev->msix_vector_use_notifier) { 827 return; 828 } 829 830 msix_unset_vector_notifiers(pdev); 831 832 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 833 /* 834 * MSI-X is already disabled here so msix_unset_vector_notifiers() 835 * didn't call our release notifier. Do it now to keep our masks and 836 * unmasks balanced. 837 */ 838 if (s->msi_vectors[i].unmasked) { 839 ivshmem_vector_mask(pdev, i); 840 } 841 ivshmem_remove_kvm_msi_virq(s, i); 842 } 843 844 } 845 846 static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, 847 uint32_t val, int len) 848 { 849 IVShmemState *s = IVSHMEM_COMMON(pdev); 850 int is_enabled, was_enabled = msix_enabled(pdev); 851 852 pci_default_write_config(pdev, address, val, len); 853 is_enabled = msix_enabled(pdev); 854 855 if (kvm_msi_via_irqfd_enabled()) { 856 if (!was_enabled && is_enabled) { 857 ivshmem_enable_irqfd(s); 858 } else if (was_enabled && !is_enabled) { 859 ivshmem_disable_irqfd(s); 860 } 861 } 862 } 863 864 static void ivshmem_common_realize(PCIDevice *dev, Error **errp) 865 { 866 IVShmemState *s = IVSHMEM_COMMON(dev); 867 Error *err = NULL; 868 uint8_t *pci_conf; 869 Error *local_err = NULL; 870 871 /* IRQFD requires MSI */ 872 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && 873 !ivshmem_has_feature(s, IVSHMEM_MSI)) { 874 error_setg(errp, "ioeventfd/irqfd requires MSI"); 875 return; 876 } 877 878 pci_conf = dev->config; 879 pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; 880 881 memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, 882 "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE); 883 884 /* region for registers*/ 885 pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, 886 &s->ivshmem_mmio); 887 888 if (s->hostmem != NULL) { 889 IVSHMEM_DPRINTF("using hostmem\n"); 890 891 s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem); 892 host_memory_backend_set_mapped(s->hostmem, true); 893 } else { 894 Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr); 895 assert(chr); 896 897 IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", 898 chr->filename); 899 900 /* we allocate enough space for 16 peers and grow as needed */ 901 resize_peers(s, 16); 902 903 /* 904 * Receive setup messages from server synchronously. 905 * Older versions did it asynchronously, but that creates a 906 * number of entertaining race conditions. 907 */ 908 ivshmem_recv_setup(s, &err); 909 if (err) { 910 error_propagate(errp, err); 911 return; 912 } 913 914 if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) { 915 error_setg(errp, 916 "master must connect to the server before any peers"); 917 return; 918 } 919 920 qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive, 921 ivshmem_read, NULL, NULL, s, NULL, true); 922 923 if (ivshmem_setup_interrupts(s, errp) < 0) { 924 error_prepend(errp, "Failed to initialize interrupts: "); 925 return; 926 } 927 } 928 929 if (s->master == ON_OFF_AUTO_AUTO) { 930 s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; 931 } 932 933 if (!ivshmem_is_master(s)) { 934 error_setg(&s->migration_blocker, 935 "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); 936 migrate_add_blocker(s->migration_blocker, &local_err); 937 if (local_err) { 938 error_propagate(errp, local_err); 939 error_free(s->migration_blocker); 940 return; 941 } 942 } 943 944 vmstate_register_ram(s->ivshmem_bar2, DEVICE(s)); 945 pci_register_bar(PCI_DEVICE(s), 2, 946 PCI_BASE_ADDRESS_SPACE_MEMORY | 947 PCI_BASE_ADDRESS_MEM_PREFETCH | 948 PCI_BASE_ADDRESS_MEM_TYPE_64, 949 s->ivshmem_bar2); 950 } 951 952 static void ivshmem_exit(PCIDevice *dev) 953 { 954 IVShmemState *s = IVSHMEM_COMMON(dev); 955 int i; 956 957 if (s->migration_blocker) { 958 migrate_del_blocker(s->migration_blocker); 959 error_free(s->migration_blocker); 960 } 961 962 if (memory_region_is_mapped(s->ivshmem_bar2)) { 963 if (!s->hostmem) { 964 void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2); 965 int fd; 966 967 if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) { 968 error_report("Failed to munmap shared memory %s", 969 strerror(errno)); 970 } 971 972 fd = memory_region_get_fd(s->ivshmem_bar2); 973 close(fd); 974 } 975 976 vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev)); 977 } 978 979 if (s->hostmem) { 980 host_memory_backend_set_mapped(s->hostmem, false); 981 } 982 983 if (s->peers) { 984 for (i = 0; i < s->nb_peers; i++) { 985 close_peer_eventfds(s, i); 986 } 987 g_free(s->peers); 988 } 989 990 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 991 msix_uninit_exclusive_bar(dev); 992 } 993 994 g_free(s->msi_vectors); 995 } 996 997 static int ivshmem_pre_load(void *opaque) 998 { 999 IVShmemState *s = opaque; 1000 1001 if (!ivshmem_is_master(s)) { 1002 error_report("'peer' devices are not migratable"); 1003 return -EINVAL; 1004 } 1005 1006 return 0; 1007 } 1008 1009 static int ivshmem_post_load(void *opaque, int version_id) 1010 { 1011 IVShmemState *s = opaque; 1012 1013 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 1014 ivshmem_msix_vector_use(s); 1015 } 1016 return 0; 1017 } 1018 1019 static void ivshmem_common_class_init(ObjectClass *klass, void *data) 1020 { 1021 DeviceClass *dc = DEVICE_CLASS(klass); 1022 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1023 1024 k->realize = ivshmem_common_realize; 1025 k->exit = ivshmem_exit; 1026 k->config_write = ivshmem_write_config; 1027 k->vendor_id = PCI_VENDOR_ID_IVSHMEM; 1028 k->device_id = PCI_DEVICE_ID_IVSHMEM; 1029 k->class_id = PCI_CLASS_MEMORY_RAM; 1030 k->revision = 1; 1031 dc->reset = ivshmem_reset; 1032 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1033 dc->desc = "Inter-VM shared memory"; 1034 } 1035 1036 static const TypeInfo ivshmem_common_info = { 1037 .name = TYPE_IVSHMEM_COMMON, 1038 .parent = TYPE_PCI_DEVICE, 1039 .instance_size = sizeof(IVShmemState), 1040 .abstract = true, 1041 .class_init = ivshmem_common_class_init, 1042 .interfaces = (InterfaceInfo[]) { 1043 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1044 { }, 1045 }, 1046 }; 1047 1048 static const VMStateDescription ivshmem_plain_vmsd = { 1049 .name = TYPE_IVSHMEM_PLAIN, 1050 .version_id = 0, 1051 .minimum_version_id = 0, 1052 .pre_load = ivshmem_pre_load, 1053 .post_load = ivshmem_post_load, 1054 .fields = (VMStateField[]) { 1055 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1056 VMSTATE_UINT32(intrstatus, IVShmemState), 1057 VMSTATE_UINT32(intrmask, IVShmemState), 1058 VMSTATE_END_OF_LIST() 1059 }, 1060 }; 1061 1062 static Property ivshmem_plain_properties[] = { 1063 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1064 DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND, 1065 HostMemoryBackend *), 1066 DEFINE_PROP_END_OF_LIST(), 1067 }; 1068 1069 static void ivshmem_plain_realize(PCIDevice *dev, Error **errp) 1070 { 1071 IVShmemState *s = IVSHMEM_COMMON(dev); 1072 1073 if (!s->hostmem) { 1074 error_setg(errp, "You must specify a 'memdev'"); 1075 return; 1076 } else if (host_memory_backend_is_mapped(s->hostmem)) { 1077 char *path = object_get_canonical_path_component(OBJECT(s->hostmem)); 1078 error_setg(errp, "can't use already busy memdev: %s", path); 1079 g_free(path); 1080 return; 1081 } 1082 1083 ivshmem_common_realize(dev, errp); 1084 } 1085 1086 static void ivshmem_plain_class_init(ObjectClass *klass, void *data) 1087 { 1088 DeviceClass *dc = DEVICE_CLASS(klass); 1089 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1090 1091 k->realize = ivshmem_plain_realize; 1092 dc->props = ivshmem_plain_properties; 1093 dc->vmsd = &ivshmem_plain_vmsd; 1094 } 1095 1096 static const TypeInfo ivshmem_plain_info = { 1097 .name = TYPE_IVSHMEM_PLAIN, 1098 .parent = TYPE_IVSHMEM_COMMON, 1099 .instance_size = sizeof(IVShmemState), 1100 .class_init = ivshmem_plain_class_init, 1101 }; 1102 1103 static const VMStateDescription ivshmem_doorbell_vmsd = { 1104 .name = TYPE_IVSHMEM_DOORBELL, 1105 .version_id = 0, 1106 .minimum_version_id = 0, 1107 .pre_load = ivshmem_pre_load, 1108 .post_load = ivshmem_post_load, 1109 .fields = (VMStateField[]) { 1110 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1111 VMSTATE_MSIX(parent_obj, IVShmemState), 1112 VMSTATE_UINT32(intrstatus, IVShmemState), 1113 VMSTATE_UINT32(intrmask, IVShmemState), 1114 VMSTATE_END_OF_LIST() 1115 }, 1116 }; 1117 1118 static Property ivshmem_doorbell_properties[] = { 1119 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1120 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1121 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1122 true), 1123 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1124 DEFINE_PROP_END_OF_LIST(), 1125 }; 1126 1127 static void ivshmem_doorbell_init(Object *obj) 1128 { 1129 IVShmemState *s = IVSHMEM_DOORBELL(obj); 1130 1131 s->features |= (1 << IVSHMEM_MSI); 1132 } 1133 1134 static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp) 1135 { 1136 IVShmemState *s = IVSHMEM_COMMON(dev); 1137 1138 if (!qemu_chr_fe_backend_connected(&s->server_chr)) { 1139 error_setg(errp, "You must specify a 'chardev'"); 1140 return; 1141 } 1142 1143 ivshmem_common_realize(dev, errp); 1144 } 1145 1146 static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) 1147 { 1148 DeviceClass *dc = DEVICE_CLASS(klass); 1149 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1150 1151 k->realize = ivshmem_doorbell_realize; 1152 dc->props = ivshmem_doorbell_properties; 1153 dc->vmsd = &ivshmem_doorbell_vmsd; 1154 } 1155 1156 static const TypeInfo ivshmem_doorbell_info = { 1157 .name = TYPE_IVSHMEM_DOORBELL, 1158 .parent = TYPE_IVSHMEM_COMMON, 1159 .instance_size = sizeof(IVShmemState), 1160 .instance_init = ivshmem_doorbell_init, 1161 .class_init = ivshmem_doorbell_class_init, 1162 }; 1163 1164 static void ivshmem_register_types(void) 1165 { 1166 type_register_static(&ivshmem_common_info); 1167 type_register_static(&ivshmem_plain_info); 1168 type_register_static(&ivshmem_doorbell_info); 1169 } 1170 1171 type_init(ivshmem_register_types) 1172