1 /* 2 * Inter-VM Shared Memory PCI device. 3 * 4 * Author: 5 * Cam Macdonell <cam@cs.ualberta.ca> 6 * 7 * Based On: cirrus_vga.c 8 * Copyright (c) 2004 Fabrice Bellard 9 * Copyright (c) 2004 Makoto Suzuki (suzu) 10 * 11 * and rtl8139.c 12 * Copyright (c) 2006 Igor Kovalenko 13 * 14 * This code is licensed under the GNU GPL v2. 15 * 16 * Contributions after 2012-01-13 are licensed under the terms of the 17 * GNU GPL, version 2 or (at your option) any later version. 18 */ 19 #include "qemu/osdep.h" 20 #include "qapi/error.h" 21 #include "qemu/cutils.h" 22 #include "hw/hw.h" 23 #include "hw/i386/pc.h" 24 #include "hw/pci/pci.h" 25 #include "hw/pci/msi.h" 26 #include "hw/pci/msix.h" 27 #include "sysemu/kvm.h" 28 #include "migration/blocker.h" 29 #include "qemu/error-report.h" 30 #include "qemu/event_notifier.h" 31 #include "qom/object_interfaces.h" 32 #include "chardev/char-fe.h" 33 #include "sysemu/hostmem.h" 34 #include "sysemu/qtest.h" 35 #include "qapi/visitor.h" 36 37 #include "hw/misc/ivshmem.h" 38 39 #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET 40 #define PCI_DEVICE_ID_IVSHMEM 0x1110 41 42 #define IVSHMEM_MAX_PEERS UINT16_MAX 43 #define IVSHMEM_IOEVENTFD 0 44 #define IVSHMEM_MSI 1 45 46 #define IVSHMEM_REG_BAR_SIZE 0x100 47 48 #define IVSHMEM_DEBUG 0 49 #define IVSHMEM_DPRINTF(fmt, ...) \ 50 do { \ 51 if (IVSHMEM_DEBUG) { \ 52 printf("IVSHMEM: " fmt, ## __VA_ARGS__); \ 53 } \ 54 } while (0) 55 56 #define TYPE_IVSHMEM_COMMON "ivshmem-common" 57 #define IVSHMEM_COMMON(obj) \ 58 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON) 59 60 #define TYPE_IVSHMEM_PLAIN "ivshmem-plain" 61 #define IVSHMEM_PLAIN(obj) \ 62 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN) 63 64 #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell" 65 #define IVSHMEM_DOORBELL(obj) \ 66 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL) 67 68 #define TYPE_IVSHMEM "ivshmem" 69 #define IVSHMEM(obj) \ 70 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) 71 72 typedef struct Peer { 73 int nb_eventfds; 74 EventNotifier *eventfds; 75 } Peer; 76 77 typedef struct MSIVector { 78 PCIDevice *pdev; 79 int virq; 80 } MSIVector; 81 82 typedef struct IVShmemState { 83 /*< private >*/ 84 PCIDevice parent_obj; 85 /*< public >*/ 86 87 uint32_t features; 88 89 /* exactly one of these two may be set */ 90 HostMemoryBackend *hostmem; /* with interrupts */ 91 CharBackend server_chr; /* without interrupts */ 92 93 /* registers */ 94 uint32_t intrmask; 95 uint32_t intrstatus; 96 int vm_id; 97 98 /* BARs */ 99 MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */ 100 MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ 101 MemoryRegion server_bar2; /* used with server_chr */ 102 103 /* interrupt support */ 104 Peer *peers; 105 int nb_peers; /* space in @peers[] */ 106 uint32_t vectors; 107 MSIVector *msi_vectors; 108 uint64_t msg_buf; /* buffer for receiving server messages */ 109 int msg_buffered_bytes; /* #bytes in @msg_buf */ 110 111 /* migration stuff */ 112 OnOffAuto master; 113 Error *migration_blocker; 114 115 /* legacy cruft */ 116 char *role; 117 char *shmobj; 118 char *sizearg; 119 size_t legacy_size; 120 uint32_t not_legacy_32bit; 121 } IVShmemState; 122 123 /* registers for the Inter-VM shared memory device */ 124 enum ivshmem_registers { 125 INTRMASK = 0, 126 INTRSTATUS = 4, 127 IVPOSITION = 8, 128 DOORBELL = 12, 129 }; 130 131 static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, 132 unsigned int feature) { 133 return (ivs->features & (1 << feature)); 134 } 135 136 static inline bool ivshmem_is_master(IVShmemState *s) 137 { 138 assert(s->master != ON_OFF_AUTO_AUTO); 139 return s->master == ON_OFF_AUTO_ON; 140 } 141 142 static void ivshmem_update_irq(IVShmemState *s) 143 { 144 PCIDevice *d = PCI_DEVICE(s); 145 uint32_t isr = s->intrstatus & s->intrmask; 146 147 /* 148 * Do nothing unless the device actually uses INTx. Here's how 149 * the device variants signal interrupts, what they put in PCI 150 * config space: 151 * Device variant Interrupt Interrupt Pin MSI-X cap. 152 * ivshmem-plain none 0 no 153 * ivshmem-doorbell MSI-X 1 yes(1) 154 * ivshmem,msi=off INTx 1 no 155 * ivshmem,msi=on MSI-X 1(2) yes(1) 156 * (1) if guest enabled MSI-X 157 * (2) the device lies 158 * Leads to the condition for doing nothing: 159 */ 160 if (ivshmem_has_feature(s, IVSHMEM_MSI) 161 || !d->config[PCI_INTERRUPT_PIN]) { 162 return; 163 } 164 165 /* don't print ISR resets */ 166 if (isr) { 167 IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", 168 isr ? 1 : 0, s->intrstatus, s->intrmask); 169 } 170 171 pci_set_irq(d, isr != 0); 172 } 173 174 static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) 175 { 176 IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); 177 178 s->intrmask = val; 179 ivshmem_update_irq(s); 180 } 181 182 static uint32_t ivshmem_IntrMask_read(IVShmemState *s) 183 { 184 uint32_t ret = s->intrmask; 185 186 IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); 187 return ret; 188 } 189 190 static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) 191 { 192 IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); 193 194 s->intrstatus = val; 195 ivshmem_update_irq(s); 196 } 197 198 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) 199 { 200 uint32_t ret = s->intrstatus; 201 202 /* reading ISR clears all interrupts */ 203 s->intrstatus = 0; 204 ivshmem_update_irq(s); 205 return ret; 206 } 207 208 static void ivshmem_io_write(void *opaque, hwaddr addr, 209 uint64_t val, unsigned size) 210 { 211 IVShmemState *s = opaque; 212 213 uint16_t dest = val >> 16; 214 uint16_t vector = val & 0xff; 215 216 addr &= 0xfc; 217 218 IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); 219 switch (addr) 220 { 221 case INTRMASK: 222 ivshmem_IntrMask_write(s, val); 223 break; 224 225 case INTRSTATUS: 226 ivshmem_IntrStatus_write(s, val); 227 break; 228 229 case DOORBELL: 230 /* check that dest VM ID is reasonable */ 231 if (dest >= s->nb_peers) { 232 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); 233 break; 234 } 235 236 /* check doorbell range */ 237 if (vector < s->peers[dest].nb_eventfds) { 238 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); 239 event_notifier_set(&s->peers[dest].eventfds[vector]); 240 } else { 241 IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n", 242 vector, dest); 243 } 244 break; 245 default: 246 IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr); 247 } 248 } 249 250 static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, 251 unsigned size) 252 { 253 254 IVShmemState *s = opaque; 255 uint32_t ret; 256 257 switch (addr) 258 { 259 case INTRMASK: 260 ret = ivshmem_IntrMask_read(s); 261 break; 262 263 case INTRSTATUS: 264 ret = ivshmem_IntrStatus_read(s); 265 break; 266 267 case IVPOSITION: 268 ret = s->vm_id; 269 break; 270 271 default: 272 IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); 273 ret = 0; 274 } 275 276 return ret; 277 } 278 279 static const MemoryRegionOps ivshmem_mmio_ops = { 280 .read = ivshmem_io_read, 281 .write = ivshmem_io_write, 282 .endianness = DEVICE_NATIVE_ENDIAN, 283 .impl = { 284 .min_access_size = 4, 285 .max_access_size = 4, 286 }, 287 }; 288 289 static void ivshmem_vector_notify(void *opaque) 290 { 291 MSIVector *entry = opaque; 292 PCIDevice *pdev = entry->pdev; 293 IVShmemState *s = IVSHMEM_COMMON(pdev); 294 int vector = entry - s->msi_vectors; 295 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 296 297 if (!event_notifier_test_and_clear(n)) { 298 return; 299 } 300 301 IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); 302 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 303 if (msix_enabled(pdev)) { 304 msix_notify(pdev, vector); 305 } 306 } else { 307 ivshmem_IntrStatus_write(s, 1); 308 } 309 } 310 311 static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, 312 MSIMessage msg) 313 { 314 IVShmemState *s = IVSHMEM_COMMON(dev); 315 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 316 MSIVector *v = &s->msi_vectors[vector]; 317 int ret; 318 319 IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); 320 321 ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); 322 if (ret < 0) { 323 return ret; 324 } 325 kvm_irqchip_commit_routes(kvm_state); 326 327 return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); 328 } 329 330 static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) 331 { 332 IVShmemState *s = IVSHMEM_COMMON(dev); 333 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 334 int ret; 335 336 IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); 337 338 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, 339 s->msi_vectors[vector].virq); 340 if (ret != 0) { 341 error_report("remove_irqfd_notifier_gsi failed"); 342 } 343 } 344 345 static void ivshmem_vector_poll(PCIDevice *dev, 346 unsigned int vector_start, 347 unsigned int vector_end) 348 { 349 IVShmemState *s = IVSHMEM_COMMON(dev); 350 unsigned int vector; 351 352 IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end); 353 354 vector_end = MIN(vector_end, s->vectors); 355 356 for (vector = vector_start; vector < vector_end; vector++) { 357 EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector]; 358 359 if (!msix_is_masked(dev, vector)) { 360 continue; 361 } 362 363 if (event_notifier_test_and_clear(notifier)) { 364 msix_set_pending(dev, vector); 365 } 366 } 367 } 368 369 static void watch_vector_notifier(IVShmemState *s, EventNotifier *n, 370 int vector) 371 { 372 int eventfd = event_notifier_get_fd(n); 373 374 assert(!s->msi_vectors[vector].pdev); 375 s->msi_vectors[vector].pdev = PCI_DEVICE(s); 376 377 qemu_set_fd_handler(eventfd, ivshmem_vector_notify, 378 NULL, &s->msi_vectors[vector]); 379 } 380 381 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) 382 { 383 memory_region_add_eventfd(&s->ivshmem_mmio, 384 DOORBELL, 385 4, 386 true, 387 (posn << 16) | i, 388 &s->peers[posn].eventfds[i]); 389 } 390 391 static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) 392 { 393 memory_region_del_eventfd(&s->ivshmem_mmio, 394 DOORBELL, 395 4, 396 true, 397 (posn << 16) | i, 398 &s->peers[posn].eventfds[i]); 399 } 400 401 static void close_peer_eventfds(IVShmemState *s, int posn) 402 { 403 int i, n; 404 405 assert(posn >= 0 && posn < s->nb_peers); 406 n = s->peers[posn].nb_eventfds; 407 408 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 409 memory_region_transaction_begin(); 410 for (i = 0; i < n; i++) { 411 ivshmem_del_eventfd(s, posn, i); 412 } 413 memory_region_transaction_commit(); 414 } 415 416 for (i = 0; i < n; i++) { 417 event_notifier_cleanup(&s->peers[posn].eventfds[i]); 418 } 419 420 g_free(s->peers[posn].eventfds); 421 s->peers[posn].nb_eventfds = 0; 422 } 423 424 static void resize_peers(IVShmemState *s, int nb_peers) 425 { 426 int old_nb_peers = s->nb_peers; 427 int i; 428 429 assert(nb_peers > old_nb_peers); 430 IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers); 431 432 s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer)); 433 s->nb_peers = nb_peers; 434 435 for (i = old_nb_peers; i < nb_peers; i++) { 436 s->peers[i].eventfds = g_new0(EventNotifier, s->vectors); 437 s->peers[i].nb_eventfds = 0; 438 } 439 } 440 441 static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, 442 Error **errp) 443 { 444 PCIDevice *pdev = PCI_DEVICE(s); 445 int ret; 446 447 IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); 448 assert(!s->msi_vectors[vector].pdev); 449 450 ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev); 451 if (ret < 0) { 452 error_setg(errp, "kvm_irqchip_add_msi_route failed"); 453 return; 454 } 455 456 s->msi_vectors[vector].virq = ret; 457 s->msi_vectors[vector].pdev = pdev; 458 } 459 460 static void setup_interrupt(IVShmemState *s, int vector, Error **errp) 461 { 462 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 463 bool with_irqfd = kvm_msi_via_irqfd_enabled() && 464 ivshmem_has_feature(s, IVSHMEM_MSI); 465 PCIDevice *pdev = PCI_DEVICE(s); 466 Error *err = NULL; 467 468 IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); 469 470 if (!with_irqfd) { 471 IVSHMEM_DPRINTF("with eventfd\n"); 472 watch_vector_notifier(s, n, vector); 473 } else if (msix_enabled(pdev)) { 474 IVSHMEM_DPRINTF("with irqfd\n"); 475 ivshmem_add_kvm_msi_virq(s, vector, &err); 476 if (err) { 477 error_propagate(errp, err); 478 return; 479 } 480 481 if (!msix_is_masked(pdev, vector)) { 482 kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, 483 s->msi_vectors[vector].virq); 484 /* TODO handle error */ 485 } 486 } else { 487 /* it will be delayed until msix is enabled, in write_config */ 488 IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n"); 489 } 490 } 491 492 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) 493 { 494 Error *local_err = NULL; 495 struct stat buf; 496 size_t size; 497 498 if (s->ivshmem_bar2) { 499 error_setg(errp, "server sent unexpected shared memory message"); 500 close(fd); 501 return; 502 } 503 504 if (fstat(fd, &buf) < 0) { 505 error_setg_errno(errp, errno, 506 "can't determine size of shared memory sent by server"); 507 close(fd); 508 return; 509 } 510 511 size = buf.st_size; 512 513 /* Legacy cruft */ 514 if (s->legacy_size != SIZE_MAX) { 515 if (size < s->legacy_size) { 516 error_setg(errp, "server sent only %zd bytes of shared memory", 517 (size_t)buf.st_size); 518 close(fd); 519 return; 520 } 521 size = s->legacy_size; 522 } 523 524 /* mmap the region and map into the BAR2 */ 525 memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), 526 "ivshmem.bar2", size, true, fd, &local_err); 527 if (local_err) { 528 error_propagate(errp, local_err); 529 return; 530 } 531 532 s->ivshmem_bar2 = &s->server_bar2; 533 } 534 535 static void process_msg_disconnect(IVShmemState *s, uint16_t posn, 536 Error **errp) 537 { 538 IVSHMEM_DPRINTF("posn %d has gone away\n", posn); 539 if (posn >= s->nb_peers || posn == s->vm_id) { 540 error_setg(errp, "invalid peer %d", posn); 541 return; 542 } 543 close_peer_eventfds(s, posn); 544 } 545 546 static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, 547 Error **errp) 548 { 549 Peer *peer = &s->peers[posn]; 550 int vector; 551 552 /* 553 * The N-th connect message for this peer comes with the file 554 * descriptor for vector N-1. Count messages to find the vector. 555 */ 556 if (peer->nb_eventfds >= s->vectors) { 557 error_setg(errp, "Too many eventfd received, device has %d vectors", 558 s->vectors); 559 close(fd); 560 return; 561 } 562 vector = peer->nb_eventfds++; 563 564 IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); 565 event_notifier_init_fd(&peer->eventfds[vector], fd); 566 fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ 567 568 if (posn == s->vm_id) { 569 setup_interrupt(s, vector, errp); 570 /* TODO do we need to handle the error? */ 571 } 572 573 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 574 ivshmem_add_eventfd(s, posn, vector); 575 } 576 } 577 578 static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) 579 { 580 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 581 582 if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { 583 error_setg(errp, "server sent invalid message %" PRId64, msg); 584 close(fd); 585 return; 586 } 587 588 if (msg == -1) { 589 process_msg_shmem(s, fd, errp); 590 return; 591 } 592 593 if (msg >= s->nb_peers) { 594 resize_peers(s, msg + 1); 595 } 596 597 if (fd >= 0) { 598 process_msg_connect(s, msg, fd, errp); 599 } else { 600 process_msg_disconnect(s, msg, errp); 601 } 602 } 603 604 static int ivshmem_can_receive(void *opaque) 605 { 606 IVShmemState *s = opaque; 607 608 assert(s->msg_buffered_bytes < sizeof(s->msg_buf)); 609 return sizeof(s->msg_buf) - s->msg_buffered_bytes; 610 } 611 612 static void ivshmem_read(void *opaque, const uint8_t *buf, int size) 613 { 614 IVShmemState *s = opaque; 615 Error *err = NULL; 616 int fd; 617 int64_t msg; 618 619 assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf)); 620 memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size); 621 s->msg_buffered_bytes += size; 622 if (s->msg_buffered_bytes < sizeof(s->msg_buf)) { 623 return; 624 } 625 msg = le64_to_cpu(s->msg_buf); 626 s->msg_buffered_bytes = 0; 627 628 fd = qemu_chr_fe_get_msgfd(&s->server_chr); 629 630 process_msg(s, msg, fd, &err); 631 if (err) { 632 error_report_err(err); 633 } 634 } 635 636 static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp) 637 { 638 int64_t msg; 639 int n, ret; 640 641 n = 0; 642 do { 643 ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n, 644 sizeof(msg) - n); 645 if (ret < 0 && ret != -EINTR) { 646 error_setg_errno(errp, -ret, "read from server failed"); 647 return INT64_MIN; 648 } 649 n += ret; 650 } while (n < sizeof(msg)); 651 652 *pfd = qemu_chr_fe_get_msgfd(&s->server_chr); 653 return msg; 654 } 655 656 static void ivshmem_recv_setup(IVShmemState *s, Error **errp) 657 { 658 Error *err = NULL; 659 int64_t msg; 660 int fd; 661 662 msg = ivshmem_recv_msg(s, &fd, &err); 663 if (err) { 664 error_propagate(errp, err); 665 return; 666 } 667 if (msg != IVSHMEM_PROTOCOL_VERSION) { 668 error_setg(errp, "server sent version %" PRId64 ", expecting %d", 669 msg, IVSHMEM_PROTOCOL_VERSION); 670 return; 671 } 672 if (fd != -1) { 673 error_setg(errp, "server sent invalid version message"); 674 return; 675 } 676 677 /* 678 * ivshmem-server sends the remaining initial messages in a fixed 679 * order, but the device has always accepted them in any order. 680 * Stay as compatible as practical, just in case people use 681 * servers that behave differently. 682 */ 683 684 /* 685 * ivshmem_device_spec.txt has always required the ID message 686 * right here, and ivshmem-server has always complied. However, 687 * older versions of the device accepted it out of order, but 688 * broke when an interrupt setup message arrived before it. 689 */ 690 msg = ivshmem_recv_msg(s, &fd, &err); 691 if (err) { 692 error_propagate(errp, err); 693 return; 694 } 695 if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) { 696 error_setg(errp, "server sent invalid ID message"); 697 return; 698 } 699 s->vm_id = msg; 700 701 /* 702 * Receive more messages until we got shared memory. 703 */ 704 do { 705 msg = ivshmem_recv_msg(s, &fd, &err); 706 if (err) { 707 error_propagate(errp, err); 708 return; 709 } 710 process_msg(s, msg, fd, &err); 711 if (err) { 712 error_propagate(errp, err); 713 return; 714 } 715 } while (msg != -1); 716 717 /* 718 * This function must either map the shared memory or fail. The 719 * loop above ensures that: it terminates normally only after it 720 * successfully processed the server's shared memory message. 721 * Assert that actually mapped the shared memory: 722 */ 723 assert(s->ivshmem_bar2); 724 } 725 726 /* Select the MSI-X vectors used by device. 727 * ivshmem maps events to vectors statically, so 728 * we just enable all vectors on init and after reset. */ 729 static void ivshmem_msix_vector_use(IVShmemState *s) 730 { 731 PCIDevice *d = PCI_DEVICE(s); 732 int i; 733 734 for (i = 0; i < s->vectors; i++) { 735 msix_vector_use(d, i); 736 } 737 } 738 739 static void ivshmem_reset(DeviceState *d) 740 { 741 IVShmemState *s = IVSHMEM_COMMON(d); 742 743 s->intrstatus = 0; 744 s->intrmask = 0; 745 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 746 ivshmem_msix_vector_use(s); 747 } 748 } 749 750 static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp) 751 { 752 /* allocate QEMU callback data for receiving interrupts */ 753 s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector)); 754 755 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 756 if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) { 757 return -1; 758 } 759 760 IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); 761 ivshmem_msix_vector_use(s); 762 } 763 764 return 0; 765 } 766 767 static void ivshmem_enable_irqfd(IVShmemState *s) 768 { 769 PCIDevice *pdev = PCI_DEVICE(s); 770 int i; 771 772 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 773 Error *err = NULL; 774 775 ivshmem_add_kvm_msi_virq(s, i, &err); 776 if (err) { 777 error_report_err(err); 778 /* TODO do we need to handle the error? */ 779 } 780 } 781 782 if (msix_set_vector_notifiers(pdev, 783 ivshmem_vector_unmask, 784 ivshmem_vector_mask, 785 ivshmem_vector_poll)) { 786 error_report("ivshmem: msix_set_vector_notifiers failed"); 787 } 788 } 789 790 static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) 791 { 792 IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); 793 794 if (s->msi_vectors[vector].pdev == NULL) { 795 return; 796 } 797 798 /* it was cleaned when masked in the frontend. */ 799 kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); 800 801 s->msi_vectors[vector].pdev = NULL; 802 } 803 804 static void ivshmem_disable_irqfd(IVShmemState *s) 805 { 806 PCIDevice *pdev = PCI_DEVICE(s); 807 int i; 808 809 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 810 ivshmem_remove_kvm_msi_virq(s, i); 811 } 812 813 msix_unset_vector_notifiers(pdev); 814 } 815 816 static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, 817 uint32_t val, int len) 818 { 819 IVShmemState *s = IVSHMEM_COMMON(pdev); 820 int is_enabled, was_enabled = msix_enabled(pdev); 821 822 pci_default_write_config(pdev, address, val, len); 823 is_enabled = msix_enabled(pdev); 824 825 if (kvm_msi_via_irqfd_enabled()) { 826 if (!was_enabled && is_enabled) { 827 ivshmem_enable_irqfd(s); 828 } else if (was_enabled && !is_enabled) { 829 ivshmem_disable_irqfd(s); 830 } 831 } 832 } 833 834 static void ivshmem_common_realize(PCIDevice *dev, Error **errp) 835 { 836 IVShmemState *s = IVSHMEM_COMMON(dev); 837 Error *err = NULL; 838 uint8_t *pci_conf; 839 uint8_t attr = PCI_BASE_ADDRESS_SPACE_MEMORY | 840 PCI_BASE_ADDRESS_MEM_PREFETCH; 841 Error *local_err = NULL; 842 843 /* IRQFD requires MSI */ 844 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && 845 !ivshmem_has_feature(s, IVSHMEM_MSI)) { 846 error_setg(errp, "ioeventfd/irqfd requires MSI"); 847 return; 848 } 849 850 pci_conf = dev->config; 851 pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; 852 853 memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, 854 "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE); 855 856 /* region for registers*/ 857 pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, 858 &s->ivshmem_mmio); 859 860 if (s->not_legacy_32bit) { 861 attr |= PCI_BASE_ADDRESS_MEM_TYPE_64; 862 } 863 864 if (s->hostmem != NULL) { 865 IVSHMEM_DPRINTF("using hostmem\n"); 866 867 s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem, 868 &error_abort); 869 } else { 870 Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr); 871 assert(chr); 872 873 IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", 874 chr->filename); 875 876 /* we allocate enough space for 16 peers and grow as needed */ 877 resize_peers(s, 16); 878 879 /* 880 * Receive setup messages from server synchronously. 881 * Older versions did it asynchronously, but that creates a 882 * number of entertaining race conditions. 883 */ 884 ivshmem_recv_setup(s, &err); 885 if (err) { 886 error_propagate(errp, err); 887 return; 888 } 889 890 if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) { 891 error_setg(errp, 892 "master must connect to the server before any peers"); 893 return; 894 } 895 896 qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive, 897 ivshmem_read, NULL, NULL, s, NULL, true); 898 899 if (ivshmem_setup_interrupts(s, errp) < 0) { 900 error_prepend(errp, "Failed to initialize interrupts: "); 901 return; 902 } 903 } 904 905 if (s->master == ON_OFF_AUTO_AUTO) { 906 s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; 907 } 908 909 if (!ivshmem_is_master(s)) { 910 error_setg(&s->migration_blocker, 911 "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); 912 migrate_add_blocker(s->migration_blocker, &local_err); 913 if (local_err) { 914 error_propagate(errp, local_err); 915 error_free(s->migration_blocker); 916 return; 917 } 918 } 919 920 vmstate_register_ram(s->ivshmem_bar2, DEVICE(s)); 921 pci_register_bar(PCI_DEVICE(s), 2, attr, s->ivshmem_bar2); 922 } 923 924 static void ivshmem_exit(PCIDevice *dev) 925 { 926 IVShmemState *s = IVSHMEM_COMMON(dev); 927 int i; 928 929 if (s->migration_blocker) { 930 migrate_del_blocker(s->migration_blocker); 931 error_free(s->migration_blocker); 932 } 933 934 if (memory_region_is_mapped(s->ivshmem_bar2)) { 935 if (!s->hostmem) { 936 void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2); 937 int fd; 938 939 if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) { 940 error_report("Failed to munmap shared memory %s", 941 strerror(errno)); 942 } 943 944 fd = memory_region_get_fd(s->ivshmem_bar2); 945 close(fd); 946 } 947 948 vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev)); 949 } 950 951 if (s->peers) { 952 for (i = 0; i < s->nb_peers; i++) { 953 close_peer_eventfds(s, i); 954 } 955 g_free(s->peers); 956 } 957 958 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 959 msix_uninit_exclusive_bar(dev); 960 } 961 962 g_free(s->msi_vectors); 963 } 964 965 static int ivshmem_pre_load(void *opaque) 966 { 967 IVShmemState *s = opaque; 968 969 if (!ivshmem_is_master(s)) { 970 error_report("'peer' devices are not migratable"); 971 return -EINVAL; 972 } 973 974 return 0; 975 } 976 977 static int ivshmem_post_load(void *opaque, int version_id) 978 { 979 IVShmemState *s = opaque; 980 981 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 982 ivshmem_msix_vector_use(s); 983 } 984 return 0; 985 } 986 987 static void ivshmem_common_class_init(ObjectClass *klass, void *data) 988 { 989 DeviceClass *dc = DEVICE_CLASS(klass); 990 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 991 992 k->realize = ivshmem_common_realize; 993 k->exit = ivshmem_exit; 994 k->config_write = ivshmem_write_config; 995 k->vendor_id = PCI_VENDOR_ID_IVSHMEM; 996 k->device_id = PCI_DEVICE_ID_IVSHMEM; 997 k->class_id = PCI_CLASS_MEMORY_RAM; 998 k->revision = 1; 999 dc->reset = ivshmem_reset; 1000 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1001 dc->desc = "Inter-VM shared memory"; 1002 } 1003 1004 static const TypeInfo ivshmem_common_info = { 1005 .name = TYPE_IVSHMEM_COMMON, 1006 .parent = TYPE_PCI_DEVICE, 1007 .instance_size = sizeof(IVShmemState), 1008 .abstract = true, 1009 .class_init = ivshmem_common_class_init, 1010 }; 1011 1012 static const VMStateDescription ivshmem_plain_vmsd = { 1013 .name = TYPE_IVSHMEM_PLAIN, 1014 .version_id = 0, 1015 .minimum_version_id = 0, 1016 .pre_load = ivshmem_pre_load, 1017 .post_load = ivshmem_post_load, 1018 .fields = (VMStateField[]) { 1019 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1020 VMSTATE_UINT32(intrstatus, IVShmemState), 1021 VMSTATE_UINT32(intrmask, IVShmemState), 1022 VMSTATE_END_OF_LIST() 1023 }, 1024 }; 1025 1026 static Property ivshmem_plain_properties[] = { 1027 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1028 DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND, 1029 HostMemoryBackend *), 1030 DEFINE_PROP_END_OF_LIST(), 1031 }; 1032 1033 static void ivshmem_plain_init(Object *obj) 1034 { 1035 IVShmemState *s = IVSHMEM_PLAIN(obj); 1036 1037 s->not_legacy_32bit = 1; 1038 } 1039 1040 static void ivshmem_plain_realize(PCIDevice *dev, Error **errp) 1041 { 1042 IVShmemState *s = IVSHMEM_COMMON(dev); 1043 1044 if (!s->hostmem) { 1045 error_setg(errp, "You must specify a 'memdev'"); 1046 return; 1047 } else if (host_memory_backend_is_mapped(s->hostmem)) { 1048 char *path = object_get_canonical_path_component(OBJECT(s->hostmem)); 1049 error_setg(errp, "can't use already busy memdev: %s", path); 1050 g_free(path); 1051 return; 1052 } 1053 1054 ivshmem_common_realize(dev, errp); 1055 host_memory_backend_set_mapped(s->hostmem, true); 1056 } 1057 1058 static void ivshmem_plain_exit(PCIDevice *pci_dev) 1059 { 1060 IVShmemState *s = IVSHMEM_COMMON(pci_dev); 1061 1062 host_memory_backend_set_mapped(s->hostmem, false); 1063 } 1064 1065 static void ivshmem_plain_class_init(ObjectClass *klass, void *data) 1066 { 1067 DeviceClass *dc = DEVICE_CLASS(klass); 1068 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1069 1070 k->realize = ivshmem_plain_realize; 1071 k->exit = ivshmem_plain_exit; 1072 dc->props = ivshmem_plain_properties; 1073 dc->vmsd = &ivshmem_plain_vmsd; 1074 } 1075 1076 static const TypeInfo ivshmem_plain_info = { 1077 .name = TYPE_IVSHMEM_PLAIN, 1078 .parent = TYPE_IVSHMEM_COMMON, 1079 .instance_size = sizeof(IVShmemState), 1080 .instance_init = ivshmem_plain_init, 1081 .class_init = ivshmem_plain_class_init, 1082 }; 1083 1084 static const VMStateDescription ivshmem_doorbell_vmsd = { 1085 .name = TYPE_IVSHMEM_DOORBELL, 1086 .version_id = 0, 1087 .minimum_version_id = 0, 1088 .pre_load = ivshmem_pre_load, 1089 .post_load = ivshmem_post_load, 1090 .fields = (VMStateField[]) { 1091 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1092 VMSTATE_MSIX(parent_obj, IVShmemState), 1093 VMSTATE_UINT32(intrstatus, IVShmemState), 1094 VMSTATE_UINT32(intrmask, IVShmemState), 1095 VMSTATE_END_OF_LIST() 1096 }, 1097 }; 1098 1099 static Property ivshmem_doorbell_properties[] = { 1100 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1101 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1102 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1103 true), 1104 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1105 DEFINE_PROP_END_OF_LIST(), 1106 }; 1107 1108 static void ivshmem_doorbell_init(Object *obj) 1109 { 1110 IVShmemState *s = IVSHMEM_DOORBELL(obj); 1111 1112 s->features |= (1 << IVSHMEM_MSI); 1113 s->legacy_size = SIZE_MAX; /* whatever the server sends */ 1114 s->not_legacy_32bit = 1; 1115 } 1116 1117 static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp) 1118 { 1119 IVShmemState *s = IVSHMEM_COMMON(dev); 1120 1121 if (!qemu_chr_fe_backend_connected(&s->server_chr)) { 1122 error_setg(errp, "You must specify a 'chardev'"); 1123 return; 1124 } 1125 1126 ivshmem_common_realize(dev, errp); 1127 } 1128 1129 static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) 1130 { 1131 DeviceClass *dc = DEVICE_CLASS(klass); 1132 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1133 1134 k->realize = ivshmem_doorbell_realize; 1135 dc->props = ivshmem_doorbell_properties; 1136 dc->vmsd = &ivshmem_doorbell_vmsd; 1137 } 1138 1139 static const TypeInfo ivshmem_doorbell_info = { 1140 .name = TYPE_IVSHMEM_DOORBELL, 1141 .parent = TYPE_IVSHMEM_COMMON, 1142 .instance_size = sizeof(IVShmemState), 1143 .instance_init = ivshmem_doorbell_init, 1144 .class_init = ivshmem_doorbell_class_init, 1145 }; 1146 1147 static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id) 1148 { 1149 IVShmemState *s = opaque; 1150 PCIDevice *pdev = PCI_DEVICE(s); 1151 int ret; 1152 1153 IVSHMEM_DPRINTF("ivshmem_load_old\n"); 1154 1155 if (version_id != 0) { 1156 return -EINVAL; 1157 } 1158 1159 ret = ivshmem_pre_load(s); 1160 if (ret) { 1161 return ret; 1162 } 1163 1164 ret = pci_device_load(pdev, f); 1165 if (ret) { 1166 return ret; 1167 } 1168 1169 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 1170 msix_load(pdev, f); 1171 ivshmem_msix_vector_use(s); 1172 } else { 1173 s->intrstatus = qemu_get_be32(f); 1174 s->intrmask = qemu_get_be32(f); 1175 } 1176 1177 return 0; 1178 } 1179 1180 static bool test_msix(void *opaque, int version_id) 1181 { 1182 IVShmemState *s = opaque; 1183 1184 return ivshmem_has_feature(s, IVSHMEM_MSI); 1185 } 1186 1187 static bool test_no_msix(void *opaque, int version_id) 1188 { 1189 return !test_msix(opaque, version_id); 1190 } 1191 1192 static const VMStateDescription ivshmem_vmsd = { 1193 .name = "ivshmem", 1194 .version_id = 1, 1195 .minimum_version_id = 1, 1196 .pre_load = ivshmem_pre_load, 1197 .post_load = ivshmem_post_load, 1198 .fields = (VMStateField[]) { 1199 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1200 1201 VMSTATE_MSIX_TEST(parent_obj, IVShmemState, test_msix), 1202 VMSTATE_UINT32_TEST(intrstatus, IVShmemState, test_no_msix), 1203 VMSTATE_UINT32_TEST(intrmask, IVShmemState, test_no_msix), 1204 1205 VMSTATE_END_OF_LIST() 1206 }, 1207 .load_state_old = ivshmem_load_old, 1208 .minimum_version_id_old = 0 1209 }; 1210 1211 static Property ivshmem_properties[] = { 1212 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1213 DEFINE_PROP_STRING("size", IVShmemState, sizearg), 1214 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1215 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1216 false), 1217 DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true), 1218 DEFINE_PROP_STRING("shm", IVShmemState, shmobj), 1219 DEFINE_PROP_STRING("role", IVShmemState, role), 1220 DEFINE_PROP_UINT32("use64", IVShmemState, not_legacy_32bit, 1), 1221 DEFINE_PROP_END_OF_LIST(), 1222 }; 1223 1224 static void desugar_shm(IVShmemState *s) 1225 { 1226 Object *obj; 1227 char *path; 1228 1229 obj = object_new("memory-backend-file"); 1230 path = g_strdup_printf("/dev/shm/%s", s->shmobj); 1231 object_property_set_str(obj, path, "mem-path", &error_abort); 1232 g_free(path); 1233 object_property_set_int(obj, s->legacy_size, "size", &error_abort); 1234 object_property_set_bool(obj, true, "share", &error_abort); 1235 object_property_add_child(OBJECT(s), "internal-shm-backend", obj, 1236 &error_abort); 1237 user_creatable_complete(obj, &error_abort); 1238 s->hostmem = MEMORY_BACKEND(obj); 1239 } 1240 1241 static void ivshmem_realize(PCIDevice *dev, Error **errp) 1242 { 1243 IVShmemState *s = IVSHMEM_COMMON(dev); 1244 1245 if (!qtest_enabled()) { 1246 error_report("ivshmem is deprecated, please use ivshmem-plain" 1247 " or ivshmem-doorbell instead"); 1248 } 1249 1250 if (qemu_chr_fe_backend_connected(&s->server_chr) + !!s->shmobj != 1) { 1251 error_setg(errp, "You must specify either 'shm' or 'chardev'"); 1252 return; 1253 } 1254 1255 if (s->sizearg == NULL) { 1256 s->legacy_size = 4 << 20; /* 4 MB default */ 1257 } else { 1258 int ret; 1259 uint64_t size; 1260 1261 ret = qemu_strtosz_MiB(s->sizearg, NULL, &size); 1262 if (ret < 0 || (size_t)size != size || !is_power_of_2(size)) { 1263 error_setg(errp, "Invalid size %s", s->sizearg); 1264 return; 1265 } 1266 s->legacy_size = size; 1267 } 1268 1269 /* check that role is reasonable */ 1270 if (s->role) { 1271 if (strncmp(s->role, "peer", 5) == 0) { 1272 s->master = ON_OFF_AUTO_OFF; 1273 } else if (strncmp(s->role, "master", 7) == 0) { 1274 s->master = ON_OFF_AUTO_ON; 1275 } else { 1276 error_setg(errp, "'role' must be 'peer' or 'master'"); 1277 return; 1278 } 1279 } else { 1280 s->master = ON_OFF_AUTO_AUTO; 1281 } 1282 1283 if (s->shmobj) { 1284 desugar_shm(s); 1285 } 1286 1287 /* 1288 * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a 1289 * bald-faced lie then. But it's a backwards compatible lie. 1290 */ 1291 pci_config_set_interrupt_pin(dev->config, 1); 1292 1293 ivshmem_common_realize(dev, errp); 1294 } 1295 1296 static void ivshmem_class_init(ObjectClass *klass, void *data) 1297 { 1298 DeviceClass *dc = DEVICE_CLASS(klass); 1299 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1300 1301 k->realize = ivshmem_realize; 1302 k->revision = 0; 1303 dc->desc = "Inter-VM shared memory (legacy)"; 1304 dc->props = ivshmem_properties; 1305 dc->vmsd = &ivshmem_vmsd; 1306 } 1307 1308 static const TypeInfo ivshmem_info = { 1309 .name = TYPE_IVSHMEM, 1310 .parent = TYPE_IVSHMEM_COMMON, 1311 .instance_size = sizeof(IVShmemState), 1312 .class_init = ivshmem_class_init, 1313 }; 1314 1315 static void ivshmem_register_types(void) 1316 { 1317 type_register_static(&ivshmem_common_info); 1318 type_register_static(&ivshmem_plain_info); 1319 type_register_static(&ivshmem_doorbell_info); 1320 type_register_static(&ivshmem_info); 1321 } 1322 1323 type_init(ivshmem_register_types) 1324