1 /* 2 * Inter-VM Shared Memory PCI device. 3 * 4 * Author: 5 * Cam Macdonell <cam@cs.ualberta.ca> 6 * 7 * Based On: cirrus_vga.c 8 * Copyright (c) 2004 Fabrice Bellard 9 * Copyright (c) 2004 Makoto Suzuki (suzu) 10 * 11 * and rtl8139.c 12 * Copyright (c) 2006 Igor Kovalenko 13 * 14 * This code is licensed under the GNU GPL v2. 15 * 16 * Contributions after 2012-01-13 are licensed under the terms of the 17 * GNU GPL, version 2 or (at your option) any later version. 18 */ 19 #include "qemu/osdep.h" 20 #include "qapi/error.h" 21 #include "qemu/cutils.h" 22 #include "hw/hw.h" 23 #include "hw/i386/pc.h" 24 #include "hw/pci/pci.h" 25 #include "hw/pci/msi.h" 26 #include "hw/pci/msix.h" 27 #include "sysemu/kvm.h" 28 #include "migration/migration.h" 29 #include "qemu/error-report.h" 30 #include "qemu/event_notifier.h" 31 #include "qom/object_interfaces.h" 32 #include "sysemu/char.h" 33 #include "sysemu/hostmem.h" 34 #include "sysemu/qtest.h" 35 #include "qapi/visitor.h" 36 #include "exec/ram_addr.h" 37 38 #include "hw/misc/ivshmem.h" 39 40 #include <sys/mman.h> 41 42 #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET 43 #define PCI_DEVICE_ID_IVSHMEM 0x1110 44 45 #define IVSHMEM_MAX_PEERS UINT16_MAX 46 #define IVSHMEM_IOEVENTFD 0 47 #define IVSHMEM_MSI 1 48 49 #define IVSHMEM_REG_BAR_SIZE 0x100 50 51 #define IVSHMEM_DEBUG 0 52 #define IVSHMEM_DPRINTF(fmt, ...) \ 53 do { \ 54 if (IVSHMEM_DEBUG) { \ 55 printf("IVSHMEM: " fmt, ## __VA_ARGS__); \ 56 } \ 57 } while (0) 58 59 #define TYPE_IVSHMEM_COMMON "ivshmem-common" 60 #define IVSHMEM_COMMON(obj) \ 61 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON) 62 63 #define TYPE_IVSHMEM_PLAIN "ivshmem-plain" 64 #define IVSHMEM_PLAIN(obj) \ 65 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN) 66 67 #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell" 68 #define IVSHMEM_DOORBELL(obj) \ 69 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL) 70 71 #define TYPE_IVSHMEM "ivshmem" 72 #define IVSHMEM(obj) \ 73 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) 74 75 typedef struct Peer { 76 int nb_eventfds; 77 EventNotifier *eventfds; 78 } Peer; 79 80 typedef struct MSIVector { 81 PCIDevice *pdev; 82 int virq; 83 } MSIVector; 84 85 typedef struct IVShmemState { 86 /*< private >*/ 87 PCIDevice parent_obj; 88 /*< public >*/ 89 90 uint32_t features; 91 92 /* exactly one of these two may be set */ 93 HostMemoryBackend *hostmem; /* with interrupts */ 94 CharDriverState *server_chr; /* without interrupts */ 95 96 /* registers */ 97 uint32_t intrmask; 98 uint32_t intrstatus; 99 int vm_id; 100 101 /* BARs */ 102 MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */ 103 MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ 104 MemoryRegion server_bar2; /* used with server_chr */ 105 106 /* interrupt support */ 107 Peer *peers; 108 int nb_peers; /* space in @peers[] */ 109 uint32_t vectors; 110 MSIVector *msi_vectors; 111 uint64_t msg_buf; /* buffer for receiving server messages */ 112 int msg_buffered_bytes; /* #bytes in @msg_buf */ 113 114 /* migration stuff */ 115 OnOffAuto master; 116 Error *migration_blocker; 117 118 /* legacy cruft */ 119 char *role; 120 char *shmobj; 121 char *sizearg; 122 size_t legacy_size; 123 uint32_t not_legacy_32bit; 124 } IVShmemState; 125 126 /* registers for the Inter-VM shared memory device */ 127 enum ivshmem_registers { 128 INTRMASK = 0, 129 INTRSTATUS = 4, 130 IVPOSITION = 8, 131 DOORBELL = 12, 132 }; 133 134 static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, 135 unsigned int feature) { 136 return (ivs->features & (1 << feature)); 137 } 138 139 static inline bool ivshmem_is_master(IVShmemState *s) 140 { 141 assert(s->master != ON_OFF_AUTO_AUTO); 142 return s->master == ON_OFF_AUTO_ON; 143 } 144 145 static void ivshmem_update_irq(IVShmemState *s) 146 { 147 PCIDevice *d = PCI_DEVICE(s); 148 uint32_t isr = s->intrstatus & s->intrmask; 149 150 /* 151 * Do nothing unless the device actually uses INTx. Here's how 152 * the device variants signal interrupts, what they put in PCI 153 * config space: 154 * Device variant Interrupt Interrupt Pin MSI-X cap. 155 * ivshmem-plain none 0 no 156 * ivshmem-doorbell MSI-X 1 yes(1) 157 * ivshmem,msi=off INTx 1 no 158 * ivshmem,msi=on MSI-X 1(2) yes(1) 159 * (1) if guest enabled MSI-X 160 * (2) the device lies 161 * Leads to the condition for doing nothing: 162 */ 163 if (ivshmem_has_feature(s, IVSHMEM_MSI) 164 || !d->config[PCI_INTERRUPT_PIN]) { 165 return; 166 } 167 168 /* don't print ISR resets */ 169 if (isr) { 170 IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", 171 isr ? 1 : 0, s->intrstatus, s->intrmask); 172 } 173 174 pci_set_irq(d, isr != 0); 175 } 176 177 static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) 178 { 179 IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); 180 181 s->intrmask = val; 182 ivshmem_update_irq(s); 183 } 184 185 static uint32_t ivshmem_IntrMask_read(IVShmemState *s) 186 { 187 uint32_t ret = s->intrmask; 188 189 IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); 190 return ret; 191 } 192 193 static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) 194 { 195 IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); 196 197 s->intrstatus = val; 198 ivshmem_update_irq(s); 199 } 200 201 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) 202 { 203 uint32_t ret = s->intrstatus; 204 205 /* reading ISR clears all interrupts */ 206 s->intrstatus = 0; 207 ivshmem_update_irq(s); 208 return ret; 209 } 210 211 static void ivshmem_io_write(void *opaque, hwaddr addr, 212 uint64_t val, unsigned size) 213 { 214 IVShmemState *s = opaque; 215 216 uint16_t dest = val >> 16; 217 uint16_t vector = val & 0xff; 218 219 addr &= 0xfc; 220 221 IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); 222 switch (addr) 223 { 224 case INTRMASK: 225 ivshmem_IntrMask_write(s, val); 226 break; 227 228 case INTRSTATUS: 229 ivshmem_IntrStatus_write(s, val); 230 break; 231 232 case DOORBELL: 233 /* check that dest VM ID is reasonable */ 234 if (dest >= s->nb_peers) { 235 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); 236 break; 237 } 238 239 /* check doorbell range */ 240 if (vector < s->peers[dest].nb_eventfds) { 241 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); 242 event_notifier_set(&s->peers[dest].eventfds[vector]); 243 } else { 244 IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n", 245 vector, dest); 246 } 247 break; 248 default: 249 IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr); 250 } 251 } 252 253 static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, 254 unsigned size) 255 { 256 257 IVShmemState *s = opaque; 258 uint32_t ret; 259 260 switch (addr) 261 { 262 case INTRMASK: 263 ret = ivshmem_IntrMask_read(s); 264 break; 265 266 case INTRSTATUS: 267 ret = ivshmem_IntrStatus_read(s); 268 break; 269 270 case IVPOSITION: 271 ret = s->vm_id; 272 break; 273 274 default: 275 IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); 276 ret = 0; 277 } 278 279 return ret; 280 } 281 282 static const MemoryRegionOps ivshmem_mmio_ops = { 283 .read = ivshmem_io_read, 284 .write = ivshmem_io_write, 285 .endianness = DEVICE_NATIVE_ENDIAN, 286 .impl = { 287 .min_access_size = 4, 288 .max_access_size = 4, 289 }, 290 }; 291 292 static void ivshmem_vector_notify(void *opaque) 293 { 294 MSIVector *entry = opaque; 295 PCIDevice *pdev = entry->pdev; 296 IVShmemState *s = IVSHMEM_COMMON(pdev); 297 int vector = entry - s->msi_vectors; 298 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 299 300 if (!event_notifier_test_and_clear(n)) { 301 return; 302 } 303 304 IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); 305 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 306 if (msix_enabled(pdev)) { 307 msix_notify(pdev, vector); 308 } 309 } else { 310 ivshmem_IntrStatus_write(s, 1); 311 } 312 } 313 314 static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, 315 MSIMessage msg) 316 { 317 IVShmemState *s = IVSHMEM_COMMON(dev); 318 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 319 MSIVector *v = &s->msi_vectors[vector]; 320 int ret; 321 322 IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); 323 324 ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); 325 if (ret < 0) { 326 return ret; 327 } 328 329 return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); 330 } 331 332 static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) 333 { 334 IVShmemState *s = IVSHMEM_COMMON(dev); 335 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 336 int ret; 337 338 IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); 339 340 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, 341 s->msi_vectors[vector].virq); 342 if (ret != 0) { 343 error_report("remove_irqfd_notifier_gsi failed"); 344 } 345 } 346 347 static void ivshmem_vector_poll(PCIDevice *dev, 348 unsigned int vector_start, 349 unsigned int vector_end) 350 { 351 IVShmemState *s = IVSHMEM_COMMON(dev); 352 unsigned int vector; 353 354 IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end); 355 356 vector_end = MIN(vector_end, s->vectors); 357 358 for (vector = vector_start; vector < vector_end; vector++) { 359 EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector]; 360 361 if (!msix_is_masked(dev, vector)) { 362 continue; 363 } 364 365 if (event_notifier_test_and_clear(notifier)) { 366 msix_set_pending(dev, vector); 367 } 368 } 369 } 370 371 static void watch_vector_notifier(IVShmemState *s, EventNotifier *n, 372 int vector) 373 { 374 int eventfd = event_notifier_get_fd(n); 375 376 assert(!s->msi_vectors[vector].pdev); 377 s->msi_vectors[vector].pdev = PCI_DEVICE(s); 378 379 qemu_set_fd_handler(eventfd, ivshmem_vector_notify, 380 NULL, &s->msi_vectors[vector]); 381 } 382 383 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) 384 { 385 memory_region_add_eventfd(&s->ivshmem_mmio, 386 DOORBELL, 387 4, 388 true, 389 (posn << 16) | i, 390 &s->peers[posn].eventfds[i]); 391 } 392 393 static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) 394 { 395 memory_region_del_eventfd(&s->ivshmem_mmio, 396 DOORBELL, 397 4, 398 true, 399 (posn << 16) | i, 400 &s->peers[posn].eventfds[i]); 401 } 402 403 static void close_peer_eventfds(IVShmemState *s, int posn) 404 { 405 int i, n; 406 407 assert(posn >= 0 && posn < s->nb_peers); 408 n = s->peers[posn].nb_eventfds; 409 410 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 411 memory_region_transaction_begin(); 412 for (i = 0; i < n; i++) { 413 ivshmem_del_eventfd(s, posn, i); 414 } 415 memory_region_transaction_commit(); 416 } 417 418 for (i = 0; i < n; i++) { 419 event_notifier_cleanup(&s->peers[posn].eventfds[i]); 420 } 421 422 g_free(s->peers[posn].eventfds); 423 s->peers[posn].nb_eventfds = 0; 424 } 425 426 static void resize_peers(IVShmemState *s, int nb_peers) 427 { 428 int old_nb_peers = s->nb_peers; 429 int i; 430 431 assert(nb_peers > old_nb_peers); 432 IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers); 433 434 s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer)); 435 s->nb_peers = nb_peers; 436 437 for (i = old_nb_peers; i < nb_peers; i++) { 438 s->peers[i].eventfds = g_new0(EventNotifier, s->vectors); 439 s->peers[i].nb_eventfds = 0; 440 } 441 } 442 443 static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, 444 Error **errp) 445 { 446 PCIDevice *pdev = PCI_DEVICE(s); 447 MSIMessage msg = msix_get_message(pdev, vector); 448 int ret; 449 450 IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); 451 assert(!s->msi_vectors[vector].pdev); 452 453 ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev); 454 if (ret < 0) { 455 error_setg(errp, "kvm_irqchip_add_msi_route failed"); 456 return; 457 } 458 459 s->msi_vectors[vector].virq = ret; 460 s->msi_vectors[vector].pdev = pdev; 461 } 462 463 static void setup_interrupt(IVShmemState *s, int vector, Error **errp) 464 { 465 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 466 bool with_irqfd = kvm_msi_via_irqfd_enabled() && 467 ivshmem_has_feature(s, IVSHMEM_MSI); 468 PCIDevice *pdev = PCI_DEVICE(s); 469 Error *err = NULL; 470 471 IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); 472 473 if (!with_irqfd) { 474 IVSHMEM_DPRINTF("with eventfd\n"); 475 watch_vector_notifier(s, n, vector); 476 } else if (msix_enabled(pdev)) { 477 IVSHMEM_DPRINTF("with irqfd\n"); 478 ivshmem_add_kvm_msi_virq(s, vector, &err); 479 if (err) { 480 error_propagate(errp, err); 481 return; 482 } 483 484 if (!msix_is_masked(pdev, vector)) { 485 kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, 486 s->msi_vectors[vector].virq); 487 /* TODO handle error */ 488 } 489 } else { 490 /* it will be delayed until msix is enabled, in write_config */ 491 IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n"); 492 } 493 } 494 495 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) 496 { 497 struct stat buf; 498 size_t size; 499 void *ptr; 500 501 if (s->ivshmem_bar2) { 502 error_setg(errp, "server sent unexpected shared memory message"); 503 close(fd); 504 return; 505 } 506 507 if (fstat(fd, &buf) < 0) { 508 error_setg_errno(errp, errno, 509 "can't determine size of shared memory sent by server"); 510 close(fd); 511 return; 512 } 513 514 size = buf.st_size; 515 516 /* Legacy cruft */ 517 if (s->legacy_size != SIZE_MAX) { 518 if (size < s->legacy_size) { 519 error_setg(errp, "server sent only %zd bytes of shared memory", 520 (size_t)buf.st_size); 521 close(fd); 522 return; 523 } 524 size = s->legacy_size; 525 } 526 527 /* mmap the region and map into the BAR2 */ 528 ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 529 if (ptr == MAP_FAILED) { 530 error_setg_errno(errp, errno, "Failed to mmap shared memory"); 531 close(fd); 532 return; 533 } 534 memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s), 535 "ivshmem.bar2", size, ptr); 536 qemu_set_ram_fd(memory_region_get_ram_addr(&s->server_bar2), fd); 537 s->ivshmem_bar2 = &s->server_bar2; 538 } 539 540 static void process_msg_disconnect(IVShmemState *s, uint16_t posn, 541 Error **errp) 542 { 543 IVSHMEM_DPRINTF("posn %d has gone away\n", posn); 544 if (posn >= s->nb_peers || posn == s->vm_id) { 545 error_setg(errp, "invalid peer %d", posn); 546 return; 547 } 548 close_peer_eventfds(s, posn); 549 } 550 551 static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, 552 Error **errp) 553 { 554 Peer *peer = &s->peers[posn]; 555 int vector; 556 557 /* 558 * The N-th connect message for this peer comes with the file 559 * descriptor for vector N-1. Count messages to find the vector. 560 */ 561 if (peer->nb_eventfds >= s->vectors) { 562 error_setg(errp, "Too many eventfd received, device has %d vectors", 563 s->vectors); 564 close(fd); 565 return; 566 } 567 vector = peer->nb_eventfds++; 568 569 IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); 570 event_notifier_init_fd(&peer->eventfds[vector], fd); 571 fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ 572 573 if (posn == s->vm_id) { 574 setup_interrupt(s, vector, errp); 575 /* TODO do we need to handle the error? */ 576 } 577 578 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 579 ivshmem_add_eventfd(s, posn, vector); 580 } 581 } 582 583 static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) 584 { 585 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 586 587 if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { 588 error_setg(errp, "server sent invalid message %" PRId64, msg); 589 close(fd); 590 return; 591 } 592 593 if (msg == -1) { 594 process_msg_shmem(s, fd, errp); 595 return; 596 } 597 598 if (msg >= s->nb_peers) { 599 resize_peers(s, msg + 1); 600 } 601 602 if (fd >= 0) { 603 process_msg_connect(s, msg, fd, errp); 604 } else { 605 process_msg_disconnect(s, msg, errp); 606 } 607 } 608 609 static int ivshmem_can_receive(void *opaque) 610 { 611 IVShmemState *s = opaque; 612 613 assert(s->msg_buffered_bytes < sizeof(s->msg_buf)); 614 return sizeof(s->msg_buf) - s->msg_buffered_bytes; 615 } 616 617 static void ivshmem_read(void *opaque, const uint8_t *buf, int size) 618 { 619 IVShmemState *s = opaque; 620 Error *err = NULL; 621 int fd; 622 int64_t msg; 623 624 assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf)); 625 memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size); 626 s->msg_buffered_bytes += size; 627 if (s->msg_buffered_bytes < sizeof(s->msg_buf)) { 628 return; 629 } 630 msg = le64_to_cpu(s->msg_buf); 631 s->msg_buffered_bytes = 0; 632 633 fd = qemu_chr_fe_get_msgfd(s->server_chr); 634 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 635 636 process_msg(s, msg, fd, &err); 637 if (err) { 638 error_report_err(err); 639 } 640 } 641 642 static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp) 643 { 644 int64_t msg; 645 int n, ret; 646 647 n = 0; 648 do { 649 ret = qemu_chr_fe_read_all(s->server_chr, (uint8_t *)&msg + n, 650 sizeof(msg) - n); 651 if (ret < 0 && ret != -EINTR) { 652 error_setg_errno(errp, -ret, "read from server failed"); 653 return INT64_MIN; 654 } 655 n += ret; 656 } while (n < sizeof(msg)); 657 658 *pfd = qemu_chr_fe_get_msgfd(s->server_chr); 659 return msg; 660 } 661 662 static void ivshmem_recv_setup(IVShmemState *s, Error **errp) 663 { 664 Error *err = NULL; 665 int64_t msg; 666 int fd; 667 668 msg = ivshmem_recv_msg(s, &fd, &err); 669 if (err) { 670 error_propagate(errp, err); 671 return; 672 } 673 if (msg != IVSHMEM_PROTOCOL_VERSION) { 674 error_setg(errp, "server sent version %" PRId64 ", expecting %d", 675 msg, IVSHMEM_PROTOCOL_VERSION); 676 return; 677 } 678 if (fd != -1) { 679 error_setg(errp, "server sent invalid version message"); 680 return; 681 } 682 683 /* 684 * ivshmem-server sends the remaining initial messages in a fixed 685 * order, but the device has always accepted them in any order. 686 * Stay as compatible as practical, just in case people use 687 * servers that behave differently. 688 */ 689 690 /* 691 * ivshmem_device_spec.txt has always required the ID message 692 * right here, and ivshmem-server has always complied. However, 693 * older versions of the device accepted it out of order, but 694 * broke when an interrupt setup message arrived before it. 695 */ 696 msg = ivshmem_recv_msg(s, &fd, &err); 697 if (err) { 698 error_propagate(errp, err); 699 return; 700 } 701 if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) { 702 error_setg(errp, "server sent invalid ID message"); 703 return; 704 } 705 s->vm_id = msg; 706 707 /* 708 * Receive more messages until we got shared memory. 709 */ 710 do { 711 msg = ivshmem_recv_msg(s, &fd, &err); 712 if (err) { 713 error_propagate(errp, err); 714 return; 715 } 716 process_msg(s, msg, fd, &err); 717 if (err) { 718 error_propagate(errp, err); 719 return; 720 } 721 } while (msg != -1); 722 723 /* 724 * This function must either map the shared memory or fail. The 725 * loop above ensures that: it terminates normally only after it 726 * successfully processed the server's shared memory message. 727 * Assert that actually mapped the shared memory: 728 */ 729 assert(s->ivshmem_bar2); 730 } 731 732 /* Select the MSI-X vectors used by device. 733 * ivshmem maps events to vectors statically, so 734 * we just enable all vectors on init and after reset. */ 735 static void ivshmem_msix_vector_use(IVShmemState *s) 736 { 737 PCIDevice *d = PCI_DEVICE(s); 738 int i; 739 740 for (i = 0; i < s->vectors; i++) { 741 msix_vector_use(d, i); 742 } 743 } 744 745 static void ivshmem_reset(DeviceState *d) 746 { 747 IVShmemState *s = IVSHMEM_COMMON(d); 748 749 s->intrstatus = 0; 750 s->intrmask = 0; 751 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 752 ivshmem_msix_vector_use(s); 753 } 754 } 755 756 static int ivshmem_setup_interrupts(IVShmemState *s) 757 { 758 /* allocate QEMU callback data for receiving interrupts */ 759 s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector)); 760 761 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 762 if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1)) { 763 return -1; 764 } 765 766 IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); 767 ivshmem_msix_vector_use(s); 768 } 769 770 return 0; 771 } 772 773 static void ivshmem_enable_irqfd(IVShmemState *s) 774 { 775 PCIDevice *pdev = PCI_DEVICE(s); 776 int i; 777 778 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 779 Error *err = NULL; 780 781 ivshmem_add_kvm_msi_virq(s, i, &err); 782 if (err) { 783 error_report_err(err); 784 /* TODO do we need to handle the error? */ 785 } 786 } 787 788 if (msix_set_vector_notifiers(pdev, 789 ivshmem_vector_unmask, 790 ivshmem_vector_mask, 791 ivshmem_vector_poll)) { 792 error_report("ivshmem: msix_set_vector_notifiers failed"); 793 } 794 } 795 796 static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) 797 { 798 IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); 799 800 if (s->msi_vectors[vector].pdev == NULL) { 801 return; 802 } 803 804 /* it was cleaned when masked in the frontend. */ 805 kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); 806 807 s->msi_vectors[vector].pdev = NULL; 808 } 809 810 static void ivshmem_disable_irqfd(IVShmemState *s) 811 { 812 PCIDevice *pdev = PCI_DEVICE(s); 813 int i; 814 815 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 816 ivshmem_remove_kvm_msi_virq(s, i); 817 } 818 819 msix_unset_vector_notifiers(pdev); 820 } 821 822 static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, 823 uint32_t val, int len) 824 { 825 IVShmemState *s = IVSHMEM_COMMON(pdev); 826 int is_enabled, was_enabled = msix_enabled(pdev); 827 828 pci_default_write_config(pdev, address, val, len); 829 is_enabled = msix_enabled(pdev); 830 831 if (kvm_msi_via_irqfd_enabled()) { 832 if (!was_enabled && is_enabled) { 833 ivshmem_enable_irqfd(s); 834 } else if (was_enabled && !is_enabled) { 835 ivshmem_disable_irqfd(s); 836 } 837 } 838 } 839 840 static void ivshmem_common_realize(PCIDevice *dev, Error **errp) 841 { 842 IVShmemState *s = IVSHMEM_COMMON(dev); 843 Error *err = NULL; 844 uint8_t *pci_conf; 845 uint8_t attr = PCI_BASE_ADDRESS_SPACE_MEMORY | 846 PCI_BASE_ADDRESS_MEM_PREFETCH; 847 848 /* IRQFD requires MSI */ 849 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && 850 !ivshmem_has_feature(s, IVSHMEM_MSI)) { 851 error_setg(errp, "ioeventfd/irqfd requires MSI"); 852 return; 853 } 854 855 pci_conf = dev->config; 856 pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; 857 858 memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, 859 "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE); 860 861 /* region for registers*/ 862 pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, 863 &s->ivshmem_mmio); 864 865 if (!s->not_legacy_32bit) { 866 attr |= PCI_BASE_ADDRESS_MEM_TYPE_64; 867 } 868 869 if (s->hostmem != NULL) { 870 IVSHMEM_DPRINTF("using hostmem\n"); 871 872 s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem, 873 &error_abort); 874 } else { 875 assert(s->server_chr); 876 877 IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", 878 s->server_chr->filename); 879 880 /* we allocate enough space for 16 peers and grow as needed */ 881 resize_peers(s, 16); 882 883 /* 884 * Receive setup messages from server synchronously. 885 * Older versions did it asynchronously, but that creates a 886 * number of entertaining race conditions. 887 */ 888 ivshmem_recv_setup(s, &err); 889 if (err) { 890 error_propagate(errp, err); 891 return; 892 } 893 894 if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) { 895 error_setg(errp, 896 "master must connect to the server before any peers"); 897 return; 898 } 899 900 qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, 901 ivshmem_read, NULL, s); 902 903 if (ivshmem_setup_interrupts(s) < 0) { 904 error_setg(errp, "failed to initialize interrupts"); 905 return; 906 } 907 } 908 909 vmstate_register_ram(s->ivshmem_bar2, DEVICE(s)); 910 pci_register_bar(PCI_DEVICE(s), 2, attr, s->ivshmem_bar2); 911 912 if (s->master == ON_OFF_AUTO_AUTO) { 913 s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; 914 } 915 916 if (!ivshmem_is_master(s)) { 917 error_setg(&s->migration_blocker, 918 "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); 919 migrate_add_blocker(s->migration_blocker); 920 } 921 } 922 923 static void ivshmem_exit(PCIDevice *dev) 924 { 925 IVShmemState *s = IVSHMEM_COMMON(dev); 926 int i; 927 928 if (s->migration_blocker) { 929 migrate_del_blocker(s->migration_blocker); 930 error_free(s->migration_blocker); 931 } 932 933 if (memory_region_is_mapped(s->ivshmem_bar2)) { 934 if (!s->hostmem) { 935 void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2); 936 int fd; 937 938 if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) { 939 error_report("Failed to munmap shared memory %s", 940 strerror(errno)); 941 } 942 943 fd = qemu_get_ram_fd(memory_region_get_ram_addr(s->ivshmem_bar2)); 944 close(fd); 945 } 946 947 vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev)); 948 } 949 950 if (s->peers) { 951 for (i = 0; i < s->nb_peers; i++) { 952 close_peer_eventfds(s, i); 953 } 954 g_free(s->peers); 955 } 956 957 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 958 msix_uninit_exclusive_bar(dev); 959 } 960 961 g_free(s->msi_vectors); 962 } 963 964 static int ivshmem_pre_load(void *opaque) 965 { 966 IVShmemState *s = opaque; 967 968 if (!ivshmem_is_master(s)) { 969 error_report("'peer' devices are not migratable"); 970 return -EINVAL; 971 } 972 973 return 0; 974 } 975 976 static int ivshmem_post_load(void *opaque, int version_id) 977 { 978 IVShmemState *s = opaque; 979 980 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 981 ivshmem_msix_vector_use(s); 982 } 983 return 0; 984 } 985 986 static void ivshmem_common_class_init(ObjectClass *klass, void *data) 987 { 988 DeviceClass *dc = DEVICE_CLASS(klass); 989 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 990 991 k->realize = ivshmem_common_realize; 992 k->exit = ivshmem_exit; 993 k->config_write = ivshmem_write_config; 994 k->vendor_id = PCI_VENDOR_ID_IVSHMEM; 995 k->device_id = PCI_DEVICE_ID_IVSHMEM; 996 k->class_id = PCI_CLASS_MEMORY_RAM; 997 k->revision = 1; 998 dc->reset = ivshmem_reset; 999 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1000 dc->desc = "Inter-VM shared memory"; 1001 } 1002 1003 static const TypeInfo ivshmem_common_info = { 1004 .name = TYPE_IVSHMEM_COMMON, 1005 .parent = TYPE_PCI_DEVICE, 1006 .instance_size = sizeof(IVShmemState), 1007 .abstract = true, 1008 .class_init = ivshmem_common_class_init, 1009 }; 1010 1011 static void ivshmem_check_memdev_is_busy(Object *obj, const char *name, 1012 Object *val, Error **errp) 1013 { 1014 MemoryRegion *mr; 1015 1016 mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), &error_abort); 1017 if (memory_region_is_mapped(mr)) { 1018 char *path = object_get_canonical_path_component(val); 1019 error_setg(errp, "can't use already busy memdev: %s", path); 1020 g_free(path); 1021 } else { 1022 qdev_prop_allow_set_link_before_realize(obj, name, val, errp); 1023 } 1024 } 1025 1026 static const VMStateDescription ivshmem_plain_vmsd = { 1027 .name = TYPE_IVSHMEM_PLAIN, 1028 .version_id = 0, 1029 .minimum_version_id = 0, 1030 .pre_load = ivshmem_pre_load, 1031 .post_load = ivshmem_post_load, 1032 .fields = (VMStateField[]) { 1033 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1034 VMSTATE_UINT32(intrstatus, IVShmemState), 1035 VMSTATE_UINT32(intrmask, IVShmemState), 1036 VMSTATE_END_OF_LIST() 1037 }, 1038 }; 1039 1040 static Property ivshmem_plain_properties[] = { 1041 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1042 DEFINE_PROP_END_OF_LIST(), 1043 }; 1044 1045 static void ivshmem_plain_init(Object *obj) 1046 { 1047 IVShmemState *s = IVSHMEM_PLAIN(obj); 1048 1049 object_property_add_link(obj, "memdev", TYPE_MEMORY_BACKEND, 1050 (Object **)&s->hostmem, 1051 ivshmem_check_memdev_is_busy, 1052 OBJ_PROP_LINK_UNREF_ON_RELEASE, 1053 &error_abort); 1054 } 1055 1056 static void ivshmem_plain_realize(PCIDevice *dev, Error **errp) 1057 { 1058 IVShmemState *s = IVSHMEM_COMMON(dev); 1059 1060 if (!s->hostmem) { 1061 error_setg(errp, "You must specify a 'memdev'"); 1062 return; 1063 } 1064 1065 ivshmem_common_realize(dev, errp); 1066 } 1067 1068 static void ivshmem_plain_class_init(ObjectClass *klass, void *data) 1069 { 1070 DeviceClass *dc = DEVICE_CLASS(klass); 1071 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1072 1073 k->realize = ivshmem_plain_realize; 1074 dc->props = ivshmem_plain_properties; 1075 dc->vmsd = &ivshmem_plain_vmsd; 1076 } 1077 1078 static const TypeInfo ivshmem_plain_info = { 1079 .name = TYPE_IVSHMEM_PLAIN, 1080 .parent = TYPE_IVSHMEM_COMMON, 1081 .instance_size = sizeof(IVShmemState), 1082 .instance_init = ivshmem_plain_init, 1083 .class_init = ivshmem_plain_class_init, 1084 }; 1085 1086 static const VMStateDescription ivshmem_doorbell_vmsd = { 1087 .name = TYPE_IVSHMEM_DOORBELL, 1088 .version_id = 0, 1089 .minimum_version_id = 0, 1090 .pre_load = ivshmem_pre_load, 1091 .post_load = ivshmem_post_load, 1092 .fields = (VMStateField[]) { 1093 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1094 VMSTATE_MSIX(parent_obj, IVShmemState), 1095 VMSTATE_UINT32(intrstatus, IVShmemState), 1096 VMSTATE_UINT32(intrmask, IVShmemState), 1097 VMSTATE_END_OF_LIST() 1098 }, 1099 }; 1100 1101 static Property ivshmem_doorbell_properties[] = { 1102 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1103 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1104 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1105 true), 1106 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1107 DEFINE_PROP_END_OF_LIST(), 1108 }; 1109 1110 static void ivshmem_doorbell_init(Object *obj) 1111 { 1112 IVShmemState *s = IVSHMEM_DOORBELL(obj); 1113 1114 s->features |= (1 << IVSHMEM_MSI); 1115 s->legacy_size = SIZE_MAX; /* whatever the server sends */ 1116 } 1117 1118 static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp) 1119 { 1120 IVShmemState *s = IVSHMEM_COMMON(dev); 1121 1122 if (!s->server_chr) { 1123 error_setg(errp, "You must specify a 'chardev'"); 1124 return; 1125 } 1126 1127 ivshmem_common_realize(dev, errp); 1128 } 1129 1130 static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) 1131 { 1132 DeviceClass *dc = DEVICE_CLASS(klass); 1133 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1134 1135 k->realize = ivshmem_doorbell_realize; 1136 dc->props = ivshmem_doorbell_properties; 1137 dc->vmsd = &ivshmem_doorbell_vmsd; 1138 } 1139 1140 static const TypeInfo ivshmem_doorbell_info = { 1141 .name = TYPE_IVSHMEM_DOORBELL, 1142 .parent = TYPE_IVSHMEM_COMMON, 1143 .instance_size = sizeof(IVShmemState), 1144 .instance_init = ivshmem_doorbell_init, 1145 .class_init = ivshmem_doorbell_class_init, 1146 }; 1147 1148 static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id) 1149 { 1150 IVShmemState *s = opaque; 1151 PCIDevice *pdev = PCI_DEVICE(s); 1152 int ret; 1153 1154 IVSHMEM_DPRINTF("ivshmem_load_old\n"); 1155 1156 if (version_id != 0) { 1157 return -EINVAL; 1158 } 1159 1160 ret = ivshmem_pre_load(s); 1161 if (ret) { 1162 return ret; 1163 } 1164 1165 ret = pci_device_load(pdev, f); 1166 if (ret) { 1167 return ret; 1168 } 1169 1170 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 1171 msix_load(pdev, f); 1172 ivshmem_msix_vector_use(s); 1173 } else { 1174 s->intrstatus = qemu_get_be32(f); 1175 s->intrmask = qemu_get_be32(f); 1176 } 1177 1178 return 0; 1179 } 1180 1181 static bool test_msix(void *opaque, int version_id) 1182 { 1183 IVShmemState *s = opaque; 1184 1185 return ivshmem_has_feature(s, IVSHMEM_MSI); 1186 } 1187 1188 static bool test_no_msix(void *opaque, int version_id) 1189 { 1190 return !test_msix(opaque, version_id); 1191 } 1192 1193 static const VMStateDescription ivshmem_vmsd = { 1194 .name = "ivshmem", 1195 .version_id = 1, 1196 .minimum_version_id = 1, 1197 .pre_load = ivshmem_pre_load, 1198 .post_load = ivshmem_post_load, 1199 .fields = (VMStateField[]) { 1200 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1201 1202 VMSTATE_MSIX_TEST(parent_obj, IVShmemState, test_msix), 1203 VMSTATE_UINT32_TEST(intrstatus, IVShmemState, test_no_msix), 1204 VMSTATE_UINT32_TEST(intrmask, IVShmemState, test_no_msix), 1205 1206 VMSTATE_END_OF_LIST() 1207 }, 1208 .load_state_old = ivshmem_load_old, 1209 .minimum_version_id_old = 0 1210 }; 1211 1212 static Property ivshmem_properties[] = { 1213 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1214 DEFINE_PROP_STRING("size", IVShmemState, sizearg), 1215 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1216 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1217 false), 1218 DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true), 1219 DEFINE_PROP_STRING("shm", IVShmemState, shmobj), 1220 DEFINE_PROP_STRING("role", IVShmemState, role), 1221 DEFINE_PROP_UINT32("use64", IVShmemState, not_legacy_32bit, 1), 1222 DEFINE_PROP_END_OF_LIST(), 1223 }; 1224 1225 static void desugar_shm(IVShmemState *s) 1226 { 1227 Object *obj; 1228 char *path; 1229 1230 obj = object_new("memory-backend-file"); 1231 path = g_strdup_printf("/dev/shm/%s", s->shmobj); 1232 object_property_set_str(obj, path, "mem-path", &error_abort); 1233 g_free(path); 1234 object_property_set_int(obj, s->legacy_size, "size", &error_abort); 1235 object_property_set_bool(obj, true, "share", &error_abort); 1236 object_property_add_child(OBJECT(s), "internal-shm-backend", obj, 1237 &error_abort); 1238 user_creatable_complete(obj, &error_abort); 1239 s->hostmem = MEMORY_BACKEND(obj); 1240 } 1241 1242 static void ivshmem_realize(PCIDevice *dev, Error **errp) 1243 { 1244 IVShmemState *s = IVSHMEM_COMMON(dev); 1245 1246 if (!qtest_enabled()) { 1247 error_report("ivshmem is deprecated, please use ivshmem-plain" 1248 " or ivshmem-doorbell instead"); 1249 } 1250 1251 if (!!s->server_chr + !!s->shmobj != 1) { 1252 error_setg(errp, "You must specify either 'shm' or 'chardev'"); 1253 return; 1254 } 1255 1256 if (s->sizearg == NULL) { 1257 s->legacy_size = 4 << 20; /* 4 MB default */ 1258 } else { 1259 char *end; 1260 int64_t size = qemu_strtosz(s->sizearg, &end); 1261 if (size < 0 || (size_t)size != size || *end != '\0' 1262 || !is_power_of_2(size)) { 1263 error_setg(errp, "Invalid size %s", s->sizearg); 1264 return; 1265 } 1266 s->legacy_size = size; 1267 } 1268 1269 /* check that role is reasonable */ 1270 if (s->role) { 1271 if (strncmp(s->role, "peer", 5) == 0) { 1272 s->master = ON_OFF_AUTO_OFF; 1273 } else if (strncmp(s->role, "master", 7) == 0) { 1274 s->master = ON_OFF_AUTO_ON; 1275 } else { 1276 error_setg(errp, "'role' must be 'peer' or 'master'"); 1277 return; 1278 } 1279 } else { 1280 s->master = ON_OFF_AUTO_AUTO; 1281 } 1282 1283 if (s->shmobj) { 1284 desugar_shm(s); 1285 } 1286 1287 /* 1288 * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a 1289 * bald-faced lie then. But it's a backwards compatible lie. 1290 */ 1291 pci_config_set_interrupt_pin(dev->config, 1); 1292 1293 ivshmem_common_realize(dev, errp); 1294 } 1295 1296 static void ivshmem_class_init(ObjectClass *klass, void *data) 1297 { 1298 DeviceClass *dc = DEVICE_CLASS(klass); 1299 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1300 1301 k->realize = ivshmem_realize; 1302 k->revision = 0; 1303 dc->desc = "Inter-VM shared memory (legacy)"; 1304 dc->props = ivshmem_properties; 1305 dc->vmsd = &ivshmem_vmsd; 1306 } 1307 1308 static const TypeInfo ivshmem_info = { 1309 .name = TYPE_IVSHMEM, 1310 .parent = TYPE_IVSHMEM_COMMON, 1311 .instance_size = sizeof(IVShmemState), 1312 .class_init = ivshmem_class_init, 1313 }; 1314 1315 static void ivshmem_register_types(void) 1316 { 1317 type_register_static(&ivshmem_common_info); 1318 type_register_static(&ivshmem_plain_info); 1319 type_register_static(&ivshmem_doorbell_info); 1320 type_register_static(&ivshmem_info); 1321 } 1322 1323 type_init(ivshmem_register_types) 1324