1 /* 2 * Inter-VM Shared Memory PCI device. 3 * 4 * Author: 5 * Cam Macdonell <cam@cs.ualberta.ca> 6 * 7 * Based On: cirrus_vga.c 8 * Copyright (c) 2004 Fabrice Bellard 9 * Copyright (c) 2004 Makoto Suzuki (suzu) 10 * 11 * and rtl8139.c 12 * Copyright (c) 2006 Igor Kovalenko 13 * 14 * This code is licensed under the GNU GPL v2. 15 * 16 * Contributions after 2012-01-13 are licensed under the terms of the 17 * GNU GPL, version 2 or (at your option) any later version. 18 */ 19 #include "qemu/osdep.h" 20 #include "qapi/error.h" 21 #include "qemu/cutils.h" 22 #include "hw/hw.h" 23 #include "hw/i386/pc.h" 24 #include "hw/pci/pci.h" 25 #include "hw/pci/msi.h" 26 #include "hw/pci/msix.h" 27 #include "sysemu/kvm.h" 28 #include "migration/migration.h" 29 #include "qemu/error-report.h" 30 #include "qemu/event_notifier.h" 31 #include "qom/object_interfaces.h" 32 #include "sysemu/char.h" 33 #include "sysemu/hostmem.h" 34 #include "sysemu/qtest.h" 35 #include "qapi/visitor.h" 36 #include "exec/ram_addr.h" 37 38 #include "hw/misc/ivshmem.h" 39 40 #include <sys/mman.h> 41 42 #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET 43 #define PCI_DEVICE_ID_IVSHMEM 0x1110 44 45 #define IVSHMEM_MAX_PEERS UINT16_MAX 46 #define IVSHMEM_IOEVENTFD 0 47 #define IVSHMEM_MSI 1 48 49 #define IVSHMEM_REG_BAR_SIZE 0x100 50 51 #define IVSHMEM_DEBUG 0 52 #define IVSHMEM_DPRINTF(fmt, ...) \ 53 do { \ 54 if (IVSHMEM_DEBUG) { \ 55 printf("IVSHMEM: " fmt, ## __VA_ARGS__); \ 56 } \ 57 } while (0) 58 59 #define TYPE_IVSHMEM_COMMON "ivshmem-common" 60 #define IVSHMEM_COMMON(obj) \ 61 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON) 62 63 #define TYPE_IVSHMEM_PLAIN "ivshmem-plain" 64 #define IVSHMEM_PLAIN(obj) \ 65 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN) 66 67 #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell" 68 #define IVSHMEM_DOORBELL(obj) \ 69 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL) 70 71 #define TYPE_IVSHMEM "ivshmem" 72 #define IVSHMEM(obj) \ 73 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) 74 75 typedef struct Peer { 76 int nb_eventfds; 77 EventNotifier *eventfds; 78 } Peer; 79 80 typedef struct MSIVector { 81 PCIDevice *pdev; 82 int virq; 83 } MSIVector; 84 85 typedef struct IVShmemState { 86 /*< private >*/ 87 PCIDevice parent_obj; 88 /*< public >*/ 89 90 uint32_t features; 91 92 /* exactly one of these two may be set */ 93 HostMemoryBackend *hostmem; /* with interrupts */ 94 CharDriverState *server_chr; /* without interrupts */ 95 96 /* registers */ 97 uint32_t intrmask; 98 uint32_t intrstatus; 99 int vm_id; 100 101 /* BARs */ 102 MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */ 103 MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ 104 MemoryRegion server_bar2; /* used with server_chr */ 105 106 /* interrupt support */ 107 Peer *peers; 108 int nb_peers; /* space in @peers[] */ 109 uint32_t vectors; 110 MSIVector *msi_vectors; 111 uint64_t msg_buf; /* buffer for receiving server messages */ 112 int msg_buffered_bytes; /* #bytes in @msg_buf */ 113 114 /* migration stuff */ 115 OnOffAuto master; 116 Error *migration_blocker; 117 118 /* legacy cruft */ 119 char *role; 120 char *shmobj; 121 char *sizearg; 122 size_t legacy_size; 123 uint32_t not_legacy_32bit; 124 } IVShmemState; 125 126 /* registers for the Inter-VM shared memory device */ 127 enum ivshmem_registers { 128 INTRMASK = 0, 129 INTRSTATUS = 4, 130 IVPOSITION = 8, 131 DOORBELL = 12, 132 }; 133 134 static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, 135 unsigned int feature) { 136 return (ivs->features & (1 << feature)); 137 } 138 139 static inline bool ivshmem_is_master(IVShmemState *s) 140 { 141 assert(s->master != ON_OFF_AUTO_AUTO); 142 return s->master == ON_OFF_AUTO_ON; 143 } 144 145 static void ivshmem_update_irq(IVShmemState *s) 146 { 147 PCIDevice *d = PCI_DEVICE(s); 148 uint32_t isr = s->intrstatus & s->intrmask; 149 150 /* 151 * Do nothing unless the device actually uses INTx. Here's how 152 * the device variants signal interrupts, what they put in PCI 153 * config space: 154 * Device variant Interrupt Interrupt Pin MSI-X cap. 155 * ivshmem-plain none 0 no 156 * ivshmem-doorbell MSI-X 1 yes(1) 157 * ivshmem,msi=off INTx 1 no 158 * ivshmem,msi=on MSI-X 1(2) yes(1) 159 * (1) if guest enabled MSI-X 160 * (2) the device lies 161 * Leads to the condition for doing nothing: 162 */ 163 if (ivshmem_has_feature(s, IVSHMEM_MSI) 164 || !d->config[PCI_INTERRUPT_PIN]) { 165 return; 166 } 167 168 /* don't print ISR resets */ 169 if (isr) { 170 IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", 171 isr ? 1 : 0, s->intrstatus, s->intrmask); 172 } 173 174 pci_set_irq(d, isr != 0); 175 } 176 177 static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) 178 { 179 IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); 180 181 s->intrmask = val; 182 ivshmem_update_irq(s); 183 } 184 185 static uint32_t ivshmem_IntrMask_read(IVShmemState *s) 186 { 187 uint32_t ret = s->intrmask; 188 189 IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); 190 return ret; 191 } 192 193 static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) 194 { 195 IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); 196 197 s->intrstatus = val; 198 ivshmem_update_irq(s); 199 } 200 201 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) 202 { 203 uint32_t ret = s->intrstatus; 204 205 /* reading ISR clears all interrupts */ 206 s->intrstatus = 0; 207 ivshmem_update_irq(s); 208 return ret; 209 } 210 211 static void ivshmem_io_write(void *opaque, hwaddr addr, 212 uint64_t val, unsigned size) 213 { 214 IVShmemState *s = opaque; 215 216 uint16_t dest = val >> 16; 217 uint16_t vector = val & 0xff; 218 219 addr &= 0xfc; 220 221 IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); 222 switch (addr) 223 { 224 case INTRMASK: 225 ivshmem_IntrMask_write(s, val); 226 break; 227 228 case INTRSTATUS: 229 ivshmem_IntrStatus_write(s, val); 230 break; 231 232 case DOORBELL: 233 /* check that dest VM ID is reasonable */ 234 if (dest >= s->nb_peers) { 235 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); 236 break; 237 } 238 239 /* check doorbell range */ 240 if (vector < s->peers[dest].nb_eventfds) { 241 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); 242 event_notifier_set(&s->peers[dest].eventfds[vector]); 243 } else { 244 IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n", 245 vector, dest); 246 } 247 break; 248 default: 249 IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr); 250 } 251 } 252 253 static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, 254 unsigned size) 255 { 256 257 IVShmemState *s = opaque; 258 uint32_t ret; 259 260 switch (addr) 261 { 262 case INTRMASK: 263 ret = ivshmem_IntrMask_read(s); 264 break; 265 266 case INTRSTATUS: 267 ret = ivshmem_IntrStatus_read(s); 268 break; 269 270 case IVPOSITION: 271 ret = s->vm_id; 272 break; 273 274 default: 275 IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); 276 ret = 0; 277 } 278 279 return ret; 280 } 281 282 static const MemoryRegionOps ivshmem_mmio_ops = { 283 .read = ivshmem_io_read, 284 .write = ivshmem_io_write, 285 .endianness = DEVICE_NATIVE_ENDIAN, 286 .impl = { 287 .min_access_size = 4, 288 .max_access_size = 4, 289 }, 290 }; 291 292 static void ivshmem_vector_notify(void *opaque) 293 { 294 MSIVector *entry = opaque; 295 PCIDevice *pdev = entry->pdev; 296 IVShmemState *s = IVSHMEM_COMMON(pdev); 297 int vector = entry - s->msi_vectors; 298 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 299 300 if (!event_notifier_test_and_clear(n)) { 301 return; 302 } 303 304 IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); 305 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 306 if (msix_enabled(pdev)) { 307 msix_notify(pdev, vector); 308 } 309 } else { 310 ivshmem_IntrStatus_write(s, 1); 311 } 312 } 313 314 static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, 315 MSIMessage msg) 316 { 317 IVShmemState *s = IVSHMEM_COMMON(dev); 318 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 319 MSIVector *v = &s->msi_vectors[vector]; 320 int ret; 321 322 IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); 323 324 ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); 325 if (ret < 0) { 326 return ret; 327 } 328 329 return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); 330 } 331 332 static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) 333 { 334 IVShmemState *s = IVSHMEM_COMMON(dev); 335 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 336 int ret; 337 338 IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); 339 340 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, 341 s->msi_vectors[vector].virq); 342 if (ret != 0) { 343 error_report("remove_irqfd_notifier_gsi failed"); 344 } 345 } 346 347 static void ivshmem_vector_poll(PCIDevice *dev, 348 unsigned int vector_start, 349 unsigned int vector_end) 350 { 351 IVShmemState *s = IVSHMEM_COMMON(dev); 352 unsigned int vector; 353 354 IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end); 355 356 vector_end = MIN(vector_end, s->vectors); 357 358 for (vector = vector_start; vector < vector_end; vector++) { 359 EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector]; 360 361 if (!msix_is_masked(dev, vector)) { 362 continue; 363 } 364 365 if (event_notifier_test_and_clear(notifier)) { 366 msix_set_pending(dev, vector); 367 } 368 } 369 } 370 371 static void watch_vector_notifier(IVShmemState *s, EventNotifier *n, 372 int vector) 373 { 374 int eventfd = event_notifier_get_fd(n); 375 376 assert(!s->msi_vectors[vector].pdev); 377 s->msi_vectors[vector].pdev = PCI_DEVICE(s); 378 379 qemu_set_fd_handler(eventfd, ivshmem_vector_notify, 380 NULL, &s->msi_vectors[vector]); 381 } 382 383 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) 384 { 385 memory_region_add_eventfd(&s->ivshmem_mmio, 386 DOORBELL, 387 4, 388 true, 389 (posn << 16) | i, 390 &s->peers[posn].eventfds[i]); 391 } 392 393 static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) 394 { 395 memory_region_del_eventfd(&s->ivshmem_mmio, 396 DOORBELL, 397 4, 398 true, 399 (posn << 16) | i, 400 &s->peers[posn].eventfds[i]); 401 } 402 403 static void close_peer_eventfds(IVShmemState *s, int posn) 404 { 405 int i, n; 406 407 assert(posn >= 0 && posn < s->nb_peers); 408 n = s->peers[posn].nb_eventfds; 409 410 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 411 memory_region_transaction_begin(); 412 for (i = 0; i < n; i++) { 413 ivshmem_del_eventfd(s, posn, i); 414 } 415 memory_region_transaction_commit(); 416 } 417 418 for (i = 0; i < n; i++) { 419 event_notifier_cleanup(&s->peers[posn].eventfds[i]); 420 } 421 422 g_free(s->peers[posn].eventfds); 423 s->peers[posn].nb_eventfds = 0; 424 } 425 426 static void resize_peers(IVShmemState *s, int nb_peers) 427 { 428 int old_nb_peers = s->nb_peers; 429 int i; 430 431 assert(nb_peers > old_nb_peers); 432 IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers); 433 434 s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer)); 435 s->nb_peers = nb_peers; 436 437 for (i = old_nb_peers; i < nb_peers; i++) { 438 s->peers[i].eventfds = g_new0(EventNotifier, s->vectors); 439 s->peers[i].nb_eventfds = 0; 440 } 441 } 442 443 static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, 444 Error **errp) 445 { 446 PCIDevice *pdev = PCI_DEVICE(s); 447 MSIMessage msg = msix_get_message(pdev, vector); 448 int ret; 449 450 IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); 451 assert(!s->msi_vectors[vector].pdev); 452 453 ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev); 454 if (ret < 0) { 455 error_setg(errp, "kvm_irqchip_add_msi_route failed"); 456 return; 457 } 458 459 s->msi_vectors[vector].virq = ret; 460 s->msi_vectors[vector].pdev = pdev; 461 } 462 463 static void setup_interrupt(IVShmemState *s, int vector, Error **errp) 464 { 465 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 466 bool with_irqfd = kvm_msi_via_irqfd_enabled() && 467 ivshmem_has_feature(s, IVSHMEM_MSI); 468 PCIDevice *pdev = PCI_DEVICE(s); 469 Error *err = NULL; 470 471 IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); 472 473 if (!with_irqfd) { 474 IVSHMEM_DPRINTF("with eventfd\n"); 475 watch_vector_notifier(s, n, vector); 476 } else if (msix_enabled(pdev)) { 477 IVSHMEM_DPRINTF("with irqfd\n"); 478 ivshmem_add_kvm_msi_virq(s, vector, &err); 479 if (err) { 480 error_propagate(errp, err); 481 return; 482 } 483 484 if (!msix_is_masked(pdev, vector)) { 485 kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, 486 s->msi_vectors[vector].virq); 487 /* TODO handle error */ 488 } 489 } else { 490 /* it will be delayed until msix is enabled, in write_config */ 491 IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n"); 492 } 493 } 494 495 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) 496 { 497 struct stat buf; 498 size_t size; 499 void *ptr; 500 501 if (s->ivshmem_bar2) { 502 error_setg(errp, "server sent unexpected shared memory message"); 503 close(fd); 504 return; 505 } 506 507 if (fstat(fd, &buf) < 0) { 508 error_setg_errno(errp, errno, 509 "can't determine size of shared memory sent by server"); 510 close(fd); 511 return; 512 } 513 514 size = buf.st_size; 515 516 /* Legacy cruft */ 517 if (s->legacy_size != SIZE_MAX) { 518 if (size < s->legacy_size) { 519 error_setg(errp, "server sent only %zd bytes of shared memory", 520 (size_t)buf.st_size); 521 close(fd); 522 return; 523 } 524 size = s->legacy_size; 525 } 526 527 /* mmap the region and map into the BAR2 */ 528 ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 529 if (ptr == MAP_FAILED) { 530 error_setg_errno(errp, errno, "Failed to mmap shared memory"); 531 close(fd); 532 return; 533 } 534 memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s), 535 "ivshmem.bar2", size, ptr); 536 qemu_set_ram_fd(memory_region_get_ram_addr(&s->server_bar2), fd); 537 s->ivshmem_bar2 = &s->server_bar2; 538 } 539 540 static void process_msg_disconnect(IVShmemState *s, uint16_t posn, 541 Error **errp) 542 { 543 IVSHMEM_DPRINTF("posn %d has gone away\n", posn); 544 if (posn >= s->nb_peers || posn == s->vm_id) { 545 error_setg(errp, "invalid peer %d", posn); 546 return; 547 } 548 close_peer_eventfds(s, posn); 549 } 550 551 static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, 552 Error **errp) 553 { 554 Peer *peer = &s->peers[posn]; 555 int vector; 556 557 /* 558 * The N-th connect message for this peer comes with the file 559 * descriptor for vector N-1. Count messages to find the vector. 560 */ 561 if (peer->nb_eventfds >= s->vectors) { 562 error_setg(errp, "Too many eventfd received, device has %d vectors", 563 s->vectors); 564 close(fd); 565 return; 566 } 567 vector = peer->nb_eventfds++; 568 569 IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); 570 event_notifier_init_fd(&peer->eventfds[vector], fd); 571 fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ 572 573 if (posn == s->vm_id) { 574 setup_interrupt(s, vector, errp); 575 /* TODO do we need to handle the error? */ 576 } 577 578 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 579 ivshmem_add_eventfd(s, posn, vector); 580 } 581 } 582 583 static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) 584 { 585 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 586 587 if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { 588 error_setg(errp, "server sent invalid message %" PRId64, msg); 589 close(fd); 590 return; 591 } 592 593 if (msg == -1) { 594 process_msg_shmem(s, fd, errp); 595 return; 596 } 597 598 if (msg >= s->nb_peers) { 599 resize_peers(s, msg + 1); 600 } 601 602 if (fd >= 0) { 603 process_msg_connect(s, msg, fd, errp); 604 } else { 605 process_msg_disconnect(s, msg, errp); 606 } 607 } 608 609 static int ivshmem_can_receive(void *opaque) 610 { 611 IVShmemState *s = opaque; 612 613 assert(s->msg_buffered_bytes < sizeof(s->msg_buf)); 614 return sizeof(s->msg_buf) - s->msg_buffered_bytes; 615 } 616 617 static void ivshmem_read(void *opaque, const uint8_t *buf, int size) 618 { 619 IVShmemState *s = opaque; 620 Error *err = NULL; 621 int fd; 622 int64_t msg; 623 624 assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf)); 625 memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size); 626 s->msg_buffered_bytes += size; 627 if (s->msg_buffered_bytes < sizeof(s->msg_buf)) { 628 return; 629 } 630 msg = le64_to_cpu(s->msg_buf); 631 s->msg_buffered_bytes = 0; 632 633 fd = qemu_chr_fe_get_msgfd(s->server_chr); 634 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 635 636 process_msg(s, msg, fd, &err); 637 if (err) { 638 error_report_err(err); 639 } 640 } 641 642 static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp) 643 { 644 int64_t msg; 645 int n, ret; 646 647 n = 0; 648 do { 649 ret = qemu_chr_fe_read_all(s->server_chr, (uint8_t *)&msg + n, 650 sizeof(msg) - n); 651 if (ret < 0 && ret != -EINTR) { 652 error_setg_errno(errp, -ret, "read from server failed"); 653 return INT64_MIN; 654 } 655 n += ret; 656 } while (n < sizeof(msg)); 657 658 *pfd = qemu_chr_fe_get_msgfd(s->server_chr); 659 return msg; 660 } 661 662 static void ivshmem_recv_setup(IVShmemState *s, Error **errp) 663 { 664 Error *err = NULL; 665 int64_t msg; 666 int fd; 667 668 msg = ivshmem_recv_msg(s, &fd, &err); 669 if (err) { 670 error_propagate(errp, err); 671 return; 672 } 673 if (msg != IVSHMEM_PROTOCOL_VERSION) { 674 error_setg(errp, "server sent version %" PRId64 ", expecting %d", 675 msg, IVSHMEM_PROTOCOL_VERSION); 676 return; 677 } 678 if (fd != -1) { 679 error_setg(errp, "server sent invalid version message"); 680 return; 681 } 682 683 /* 684 * ivshmem-server sends the remaining initial messages in a fixed 685 * order, but the device has always accepted them in any order. 686 * Stay as compatible as practical, just in case people use 687 * servers that behave differently. 688 */ 689 690 /* 691 * ivshmem_device_spec.txt has always required the ID message 692 * right here, and ivshmem-server has always complied. However, 693 * older versions of the device accepted it out of order, but 694 * broke when an interrupt setup message arrived before it. 695 */ 696 msg = ivshmem_recv_msg(s, &fd, &err); 697 if (err) { 698 error_propagate(errp, err); 699 return; 700 } 701 if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) { 702 error_setg(errp, "server sent invalid ID message"); 703 return; 704 } 705 s->vm_id = msg; 706 707 /* 708 * Receive more messages until we got shared memory. 709 */ 710 do { 711 msg = ivshmem_recv_msg(s, &fd, &err); 712 if (err) { 713 error_propagate(errp, err); 714 return; 715 } 716 process_msg(s, msg, fd, &err); 717 if (err) { 718 error_propagate(errp, err); 719 return; 720 } 721 } while (msg != -1); 722 723 /* 724 * This function must either map the shared memory or fail. The 725 * loop above ensures that: it terminates normally only after it 726 * successfully processed the server's shared memory message. 727 * Assert that actually mapped the shared memory: 728 */ 729 assert(s->ivshmem_bar2); 730 } 731 732 /* Select the MSI-X vectors used by device. 733 * ivshmem maps events to vectors statically, so 734 * we just enable all vectors on init and after reset. */ 735 static void ivshmem_msix_vector_use(IVShmemState *s) 736 { 737 PCIDevice *d = PCI_DEVICE(s); 738 int i; 739 740 for (i = 0; i < s->vectors; i++) { 741 msix_vector_use(d, i); 742 } 743 } 744 745 static void ivshmem_reset(DeviceState *d) 746 { 747 IVShmemState *s = IVSHMEM_COMMON(d); 748 749 s->intrstatus = 0; 750 s->intrmask = 0; 751 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 752 ivshmem_msix_vector_use(s); 753 } 754 } 755 756 static int ivshmem_setup_interrupts(IVShmemState *s) 757 { 758 /* allocate QEMU callback data for receiving interrupts */ 759 s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector)); 760 761 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 762 if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1)) { 763 return -1; 764 } 765 766 IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); 767 ivshmem_msix_vector_use(s); 768 } 769 770 return 0; 771 } 772 773 static void ivshmem_enable_irqfd(IVShmemState *s) 774 { 775 PCIDevice *pdev = PCI_DEVICE(s); 776 int i; 777 778 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 779 Error *err = NULL; 780 781 ivshmem_add_kvm_msi_virq(s, i, &err); 782 if (err) { 783 error_report_err(err); 784 /* TODO do we need to handle the error? */ 785 } 786 } 787 788 if (msix_set_vector_notifiers(pdev, 789 ivshmem_vector_unmask, 790 ivshmem_vector_mask, 791 ivshmem_vector_poll)) { 792 error_report("ivshmem: msix_set_vector_notifiers failed"); 793 } 794 } 795 796 static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) 797 { 798 IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); 799 800 if (s->msi_vectors[vector].pdev == NULL) { 801 return; 802 } 803 804 /* it was cleaned when masked in the frontend. */ 805 kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); 806 807 s->msi_vectors[vector].pdev = NULL; 808 } 809 810 static void ivshmem_disable_irqfd(IVShmemState *s) 811 { 812 PCIDevice *pdev = PCI_DEVICE(s); 813 int i; 814 815 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 816 ivshmem_remove_kvm_msi_virq(s, i); 817 } 818 819 msix_unset_vector_notifiers(pdev); 820 } 821 822 static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, 823 uint32_t val, int len) 824 { 825 IVShmemState *s = IVSHMEM_COMMON(pdev); 826 int is_enabled, was_enabled = msix_enabled(pdev); 827 828 pci_default_write_config(pdev, address, val, len); 829 is_enabled = msix_enabled(pdev); 830 831 if (kvm_msi_via_irqfd_enabled()) { 832 if (!was_enabled && is_enabled) { 833 ivshmem_enable_irqfd(s); 834 } else if (was_enabled && !is_enabled) { 835 ivshmem_disable_irqfd(s); 836 } 837 } 838 } 839 840 static void ivshmem_common_realize(PCIDevice *dev, Error **errp) 841 { 842 IVShmemState *s = IVSHMEM_COMMON(dev); 843 Error *err = NULL; 844 uint8_t *pci_conf; 845 uint8_t attr = PCI_BASE_ADDRESS_SPACE_MEMORY | 846 PCI_BASE_ADDRESS_MEM_PREFETCH; 847 848 /* IRQFD requires MSI */ 849 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && 850 !ivshmem_has_feature(s, IVSHMEM_MSI)) { 851 error_setg(errp, "ioeventfd/irqfd requires MSI"); 852 return; 853 } 854 855 pci_conf = dev->config; 856 pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; 857 858 memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, 859 "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE); 860 861 /* region for registers*/ 862 pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, 863 &s->ivshmem_mmio); 864 865 if (!s->not_legacy_32bit) { 866 attr |= PCI_BASE_ADDRESS_MEM_TYPE_64; 867 } 868 869 if (s->hostmem != NULL) { 870 IVSHMEM_DPRINTF("using hostmem\n"); 871 872 s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem, 873 &error_abort); 874 } else { 875 IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", 876 s->server_chr->filename); 877 878 /* we allocate enough space for 16 peers and grow as needed */ 879 resize_peers(s, 16); 880 881 /* 882 * Receive setup messages from server synchronously. 883 * Older versions did it asynchronously, but that creates a 884 * number of entertaining race conditions. 885 */ 886 ivshmem_recv_setup(s, &err); 887 if (err) { 888 error_propagate(errp, err); 889 return; 890 } 891 892 if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) { 893 error_setg(errp, 894 "master must connect to the server before any peers"); 895 return; 896 } 897 898 qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, 899 ivshmem_read, NULL, s); 900 901 if (ivshmem_setup_interrupts(s) < 0) { 902 error_setg(errp, "failed to initialize interrupts"); 903 return; 904 } 905 } 906 907 vmstate_register_ram(s->ivshmem_bar2, DEVICE(s)); 908 pci_register_bar(PCI_DEVICE(s), 2, attr, s->ivshmem_bar2); 909 910 if (s->master == ON_OFF_AUTO_AUTO) { 911 s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; 912 } 913 914 if (!ivshmem_is_master(s)) { 915 error_setg(&s->migration_blocker, 916 "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); 917 migrate_add_blocker(s->migration_blocker); 918 } 919 } 920 921 static void ivshmem_exit(PCIDevice *dev) 922 { 923 IVShmemState *s = IVSHMEM_COMMON(dev); 924 int i; 925 926 if (s->migration_blocker) { 927 migrate_del_blocker(s->migration_blocker); 928 error_free(s->migration_blocker); 929 } 930 931 if (memory_region_is_mapped(s->ivshmem_bar2)) { 932 if (!s->hostmem) { 933 void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2); 934 int fd; 935 936 if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) { 937 error_report("Failed to munmap shared memory %s", 938 strerror(errno)); 939 } 940 941 fd = qemu_get_ram_fd(memory_region_get_ram_addr(s->ivshmem_bar2)); 942 close(fd); 943 } 944 945 vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev)); 946 } 947 948 if (s->peers) { 949 for (i = 0; i < s->nb_peers; i++) { 950 close_peer_eventfds(s, i); 951 } 952 g_free(s->peers); 953 } 954 955 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 956 msix_uninit_exclusive_bar(dev); 957 } 958 959 g_free(s->msi_vectors); 960 } 961 962 static int ivshmem_pre_load(void *opaque) 963 { 964 IVShmemState *s = opaque; 965 966 if (!ivshmem_is_master(s)) { 967 error_report("'peer' devices are not migratable"); 968 return -EINVAL; 969 } 970 971 return 0; 972 } 973 974 static int ivshmem_post_load(void *opaque, int version_id) 975 { 976 IVShmemState *s = opaque; 977 978 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 979 ivshmem_msix_vector_use(s); 980 } 981 return 0; 982 } 983 984 static void ivshmem_common_class_init(ObjectClass *klass, void *data) 985 { 986 DeviceClass *dc = DEVICE_CLASS(klass); 987 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 988 989 k->realize = ivshmem_common_realize; 990 k->exit = ivshmem_exit; 991 k->config_write = ivshmem_write_config; 992 k->vendor_id = PCI_VENDOR_ID_IVSHMEM; 993 k->device_id = PCI_DEVICE_ID_IVSHMEM; 994 k->class_id = PCI_CLASS_MEMORY_RAM; 995 k->revision = 1; 996 dc->reset = ivshmem_reset; 997 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 998 dc->desc = "Inter-VM shared memory"; 999 } 1000 1001 static const TypeInfo ivshmem_common_info = { 1002 .name = TYPE_IVSHMEM_COMMON, 1003 .parent = TYPE_PCI_DEVICE, 1004 .instance_size = sizeof(IVShmemState), 1005 .abstract = true, 1006 .class_init = ivshmem_common_class_init, 1007 }; 1008 1009 static void ivshmem_check_memdev_is_busy(Object *obj, const char *name, 1010 Object *val, Error **errp) 1011 { 1012 MemoryRegion *mr; 1013 1014 mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), &error_abort); 1015 if (memory_region_is_mapped(mr)) { 1016 char *path = object_get_canonical_path_component(val); 1017 error_setg(errp, "can't use already busy memdev: %s", path); 1018 g_free(path); 1019 } else { 1020 qdev_prop_allow_set_link_before_realize(obj, name, val, errp); 1021 } 1022 } 1023 1024 static const VMStateDescription ivshmem_plain_vmsd = { 1025 .name = TYPE_IVSHMEM_PLAIN, 1026 .version_id = 0, 1027 .minimum_version_id = 0, 1028 .pre_load = ivshmem_pre_load, 1029 .post_load = ivshmem_post_load, 1030 .fields = (VMStateField[]) { 1031 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1032 VMSTATE_UINT32(intrstatus, IVShmemState), 1033 VMSTATE_UINT32(intrmask, IVShmemState), 1034 VMSTATE_END_OF_LIST() 1035 }, 1036 }; 1037 1038 static Property ivshmem_plain_properties[] = { 1039 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1040 DEFINE_PROP_END_OF_LIST(), 1041 }; 1042 1043 static void ivshmem_plain_init(Object *obj) 1044 { 1045 IVShmemState *s = IVSHMEM_PLAIN(obj); 1046 1047 object_property_add_link(obj, "memdev", TYPE_MEMORY_BACKEND, 1048 (Object **)&s->hostmem, 1049 ivshmem_check_memdev_is_busy, 1050 OBJ_PROP_LINK_UNREF_ON_RELEASE, 1051 &error_abort); 1052 } 1053 1054 static void ivshmem_plain_class_init(ObjectClass *klass, void *data) 1055 { 1056 DeviceClass *dc = DEVICE_CLASS(klass); 1057 1058 dc->props = ivshmem_plain_properties; 1059 dc->vmsd = &ivshmem_plain_vmsd; 1060 } 1061 1062 static const TypeInfo ivshmem_plain_info = { 1063 .name = TYPE_IVSHMEM_PLAIN, 1064 .parent = TYPE_IVSHMEM_COMMON, 1065 .instance_size = sizeof(IVShmemState), 1066 .instance_init = ivshmem_plain_init, 1067 .class_init = ivshmem_plain_class_init, 1068 }; 1069 1070 static const VMStateDescription ivshmem_doorbell_vmsd = { 1071 .name = TYPE_IVSHMEM_DOORBELL, 1072 .version_id = 0, 1073 .minimum_version_id = 0, 1074 .pre_load = ivshmem_pre_load, 1075 .post_load = ivshmem_post_load, 1076 .fields = (VMStateField[]) { 1077 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1078 VMSTATE_MSIX(parent_obj, IVShmemState), 1079 VMSTATE_UINT32(intrstatus, IVShmemState), 1080 VMSTATE_UINT32(intrmask, IVShmemState), 1081 VMSTATE_END_OF_LIST() 1082 }, 1083 }; 1084 1085 static Property ivshmem_doorbell_properties[] = { 1086 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1087 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1088 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1089 true), 1090 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1091 DEFINE_PROP_END_OF_LIST(), 1092 }; 1093 1094 static void ivshmem_doorbell_init(Object *obj) 1095 { 1096 IVShmemState *s = IVSHMEM_DOORBELL(obj); 1097 1098 s->features |= (1 << IVSHMEM_MSI); 1099 s->legacy_size = SIZE_MAX; /* whatever the server sends */ 1100 } 1101 1102 static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) 1103 { 1104 DeviceClass *dc = DEVICE_CLASS(klass); 1105 1106 dc->props = ivshmem_doorbell_properties; 1107 dc->vmsd = &ivshmem_doorbell_vmsd; 1108 } 1109 1110 static const TypeInfo ivshmem_doorbell_info = { 1111 .name = TYPE_IVSHMEM_DOORBELL, 1112 .parent = TYPE_IVSHMEM_COMMON, 1113 .instance_size = sizeof(IVShmemState), 1114 .instance_init = ivshmem_doorbell_init, 1115 .class_init = ivshmem_doorbell_class_init, 1116 }; 1117 1118 static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id) 1119 { 1120 IVShmemState *s = opaque; 1121 PCIDevice *pdev = PCI_DEVICE(s); 1122 int ret; 1123 1124 IVSHMEM_DPRINTF("ivshmem_load_old\n"); 1125 1126 if (version_id != 0) { 1127 return -EINVAL; 1128 } 1129 1130 ret = ivshmem_pre_load(s); 1131 if (ret) { 1132 return ret; 1133 } 1134 1135 ret = pci_device_load(pdev, f); 1136 if (ret) { 1137 return ret; 1138 } 1139 1140 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 1141 msix_load(pdev, f); 1142 ivshmem_msix_vector_use(s); 1143 } else { 1144 s->intrstatus = qemu_get_be32(f); 1145 s->intrmask = qemu_get_be32(f); 1146 } 1147 1148 return 0; 1149 } 1150 1151 static bool test_msix(void *opaque, int version_id) 1152 { 1153 IVShmemState *s = opaque; 1154 1155 return ivshmem_has_feature(s, IVSHMEM_MSI); 1156 } 1157 1158 static bool test_no_msix(void *opaque, int version_id) 1159 { 1160 return !test_msix(opaque, version_id); 1161 } 1162 1163 static const VMStateDescription ivshmem_vmsd = { 1164 .name = "ivshmem", 1165 .version_id = 1, 1166 .minimum_version_id = 1, 1167 .pre_load = ivshmem_pre_load, 1168 .post_load = ivshmem_post_load, 1169 .fields = (VMStateField[]) { 1170 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1171 1172 VMSTATE_MSIX_TEST(parent_obj, IVShmemState, test_msix), 1173 VMSTATE_UINT32_TEST(intrstatus, IVShmemState, test_no_msix), 1174 VMSTATE_UINT32_TEST(intrmask, IVShmemState, test_no_msix), 1175 1176 VMSTATE_END_OF_LIST() 1177 }, 1178 .load_state_old = ivshmem_load_old, 1179 .minimum_version_id_old = 0 1180 }; 1181 1182 static Property ivshmem_properties[] = { 1183 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1184 DEFINE_PROP_STRING("size", IVShmemState, sizearg), 1185 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1186 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1187 false), 1188 DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true), 1189 DEFINE_PROP_STRING("shm", IVShmemState, shmobj), 1190 DEFINE_PROP_STRING("role", IVShmemState, role), 1191 DEFINE_PROP_UINT32("use64", IVShmemState, not_legacy_32bit, 1), 1192 DEFINE_PROP_END_OF_LIST(), 1193 }; 1194 1195 static void desugar_shm(IVShmemState *s) 1196 { 1197 Object *obj; 1198 char *path; 1199 1200 obj = object_new("memory-backend-file"); 1201 path = g_strdup_printf("/dev/shm/%s", s->shmobj); 1202 object_property_set_str(obj, path, "mem-path", &error_abort); 1203 g_free(path); 1204 object_property_set_int(obj, s->legacy_size, "size", &error_abort); 1205 object_property_set_bool(obj, true, "share", &error_abort); 1206 object_property_add_child(OBJECT(s), "internal-shm-backend", obj, 1207 &error_abort); 1208 user_creatable_complete(obj, &error_abort); 1209 s->hostmem = MEMORY_BACKEND(obj); 1210 } 1211 1212 static void ivshmem_realize(PCIDevice *dev, Error **errp) 1213 { 1214 IVShmemState *s = IVSHMEM_COMMON(dev); 1215 1216 if (!qtest_enabled()) { 1217 error_report("ivshmem is deprecated, please use ivshmem-plain" 1218 " or ivshmem-doorbell instead"); 1219 } 1220 1221 if (!!s->server_chr + !!s->shmobj != 1) { 1222 error_setg(errp, "You must specify either 'shm' or 'chardev'"); 1223 return; 1224 } 1225 1226 if (s->sizearg == NULL) { 1227 s->legacy_size = 4 << 20; /* 4 MB default */ 1228 } else { 1229 char *end; 1230 int64_t size = qemu_strtosz(s->sizearg, &end); 1231 if (size < 0 || (size_t)size != size || *end != '\0' 1232 || !is_power_of_2(size)) { 1233 error_setg(errp, "Invalid size %s", s->sizearg); 1234 return; 1235 } 1236 s->legacy_size = size; 1237 } 1238 1239 /* check that role is reasonable */ 1240 if (s->role) { 1241 if (strncmp(s->role, "peer", 5) == 0) { 1242 s->master = ON_OFF_AUTO_OFF; 1243 } else if (strncmp(s->role, "master", 7) == 0) { 1244 s->master = ON_OFF_AUTO_ON; 1245 } else { 1246 error_setg(errp, "'role' must be 'peer' or 'master'"); 1247 return; 1248 } 1249 } else { 1250 s->master = ON_OFF_AUTO_AUTO; 1251 } 1252 1253 if (s->shmobj) { 1254 desugar_shm(s); 1255 } 1256 1257 /* 1258 * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a 1259 * bald-faced lie then. But it's a backwards compatible lie. 1260 */ 1261 pci_config_set_interrupt_pin(dev->config, 1); 1262 1263 ivshmem_common_realize(dev, errp); 1264 } 1265 1266 static void ivshmem_class_init(ObjectClass *klass, void *data) 1267 { 1268 DeviceClass *dc = DEVICE_CLASS(klass); 1269 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1270 1271 k->realize = ivshmem_realize; 1272 k->revision = 0; 1273 dc->desc = "Inter-VM shared memory (legacy)"; 1274 dc->props = ivshmem_properties; 1275 dc->vmsd = &ivshmem_vmsd; 1276 } 1277 1278 static const TypeInfo ivshmem_info = { 1279 .name = TYPE_IVSHMEM, 1280 .parent = TYPE_IVSHMEM_COMMON, 1281 .instance_size = sizeof(IVShmemState), 1282 .class_init = ivshmem_class_init, 1283 }; 1284 1285 static void ivshmem_register_types(void) 1286 { 1287 type_register_static(&ivshmem_common_info); 1288 type_register_static(&ivshmem_plain_info); 1289 type_register_static(&ivshmem_doorbell_info); 1290 type_register_static(&ivshmem_info); 1291 } 1292 1293 type_init(ivshmem_register_types) 1294