1 /* 2 * Inter-VM Shared Memory PCI device. 3 * 4 * Author: 5 * Cam Macdonell <cam@cs.ualberta.ca> 6 * 7 * Based On: cirrus_vga.c 8 * Copyright (c) 2004 Fabrice Bellard 9 * Copyright (c) 2004 Makoto Suzuki (suzu) 10 * 11 * and rtl8139.c 12 * Copyright (c) 2006 Igor Kovalenko 13 * 14 * This code is licensed under the GNU GPL v2. 15 * 16 * Contributions after 2012-01-13 are licensed under the terms of the 17 * GNU GPL, version 2 or (at your option) any later version. 18 */ 19 #include "qemu/osdep.h" 20 #include "qapi/error.h" 21 #include "qemu/cutils.h" 22 #include "hw/hw.h" 23 #include "hw/i386/pc.h" 24 #include "hw/pci/pci.h" 25 #include "hw/pci/msi.h" 26 #include "hw/pci/msix.h" 27 #include "sysemu/kvm.h" 28 #include "migration/migration.h" 29 #include "qemu/error-report.h" 30 #include "qemu/event_notifier.h" 31 #include "qom/object_interfaces.h" 32 #include "sysemu/char.h" 33 #include "sysemu/hostmem.h" 34 #include "sysemu/qtest.h" 35 #include "qapi/visitor.h" 36 37 #include "hw/misc/ivshmem.h" 38 39 #include <sys/mman.h> 40 41 #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET 42 #define PCI_DEVICE_ID_IVSHMEM 0x1110 43 44 #define IVSHMEM_MAX_PEERS UINT16_MAX 45 #define IVSHMEM_IOEVENTFD 0 46 #define IVSHMEM_MSI 1 47 48 #define IVSHMEM_REG_BAR_SIZE 0x100 49 50 #define IVSHMEM_DEBUG 0 51 #define IVSHMEM_DPRINTF(fmt, ...) \ 52 do { \ 53 if (IVSHMEM_DEBUG) { \ 54 printf("IVSHMEM: " fmt, ## __VA_ARGS__); \ 55 } \ 56 } while (0) 57 58 #define TYPE_IVSHMEM_COMMON "ivshmem-common" 59 #define IVSHMEM_COMMON(obj) \ 60 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON) 61 62 #define TYPE_IVSHMEM_PLAIN "ivshmem-plain" 63 #define IVSHMEM_PLAIN(obj) \ 64 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN) 65 66 #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell" 67 #define IVSHMEM_DOORBELL(obj) \ 68 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL) 69 70 #define TYPE_IVSHMEM "ivshmem" 71 #define IVSHMEM(obj) \ 72 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) 73 74 typedef struct Peer { 75 int nb_eventfds; 76 EventNotifier *eventfds; 77 } Peer; 78 79 typedef struct MSIVector { 80 PCIDevice *pdev; 81 int virq; 82 } MSIVector; 83 84 typedef struct IVShmemState { 85 /*< private >*/ 86 PCIDevice parent_obj; 87 /*< public >*/ 88 89 uint32_t features; 90 91 /* exactly one of these two may be set */ 92 HostMemoryBackend *hostmem; /* with interrupts */ 93 CharDriverState *server_chr; /* without interrupts */ 94 95 /* registers */ 96 uint32_t intrmask; 97 uint32_t intrstatus; 98 int vm_id; 99 100 /* BARs */ 101 MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */ 102 MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ 103 MemoryRegion server_bar2; /* used with server_chr */ 104 105 /* interrupt support */ 106 Peer *peers; 107 int nb_peers; /* space in @peers[] */ 108 uint32_t vectors; 109 MSIVector *msi_vectors; 110 uint64_t msg_buf; /* buffer for receiving server messages */ 111 int msg_buffered_bytes; /* #bytes in @msg_buf */ 112 113 /* migration stuff */ 114 OnOffAuto master; 115 Error *migration_blocker; 116 117 /* legacy cruft */ 118 char *role; 119 char *shmobj; 120 char *sizearg; 121 size_t legacy_size; 122 uint32_t not_legacy_32bit; 123 } IVShmemState; 124 125 /* registers for the Inter-VM shared memory device */ 126 enum ivshmem_registers { 127 INTRMASK = 0, 128 INTRSTATUS = 4, 129 IVPOSITION = 8, 130 DOORBELL = 12, 131 }; 132 133 static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, 134 unsigned int feature) { 135 return (ivs->features & (1 << feature)); 136 } 137 138 static inline bool ivshmem_is_master(IVShmemState *s) 139 { 140 assert(s->master != ON_OFF_AUTO_AUTO); 141 return s->master == ON_OFF_AUTO_ON; 142 } 143 144 static void ivshmem_update_irq(IVShmemState *s) 145 { 146 PCIDevice *d = PCI_DEVICE(s); 147 uint32_t isr = s->intrstatus & s->intrmask; 148 149 /* 150 * Do nothing unless the device actually uses INTx. Here's how 151 * the device variants signal interrupts, what they put in PCI 152 * config space: 153 * Device variant Interrupt Interrupt Pin MSI-X cap. 154 * ivshmem-plain none 0 no 155 * ivshmem-doorbell MSI-X 1 yes(1) 156 * ivshmem,msi=off INTx 1 no 157 * ivshmem,msi=on MSI-X 1(2) yes(1) 158 * (1) if guest enabled MSI-X 159 * (2) the device lies 160 * Leads to the condition for doing nothing: 161 */ 162 if (ivshmem_has_feature(s, IVSHMEM_MSI) 163 || !d->config[PCI_INTERRUPT_PIN]) { 164 return; 165 } 166 167 /* don't print ISR resets */ 168 if (isr) { 169 IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", 170 isr ? 1 : 0, s->intrstatus, s->intrmask); 171 } 172 173 pci_set_irq(d, isr != 0); 174 } 175 176 static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) 177 { 178 IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); 179 180 s->intrmask = val; 181 ivshmem_update_irq(s); 182 } 183 184 static uint32_t ivshmem_IntrMask_read(IVShmemState *s) 185 { 186 uint32_t ret = s->intrmask; 187 188 IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); 189 return ret; 190 } 191 192 static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) 193 { 194 IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); 195 196 s->intrstatus = val; 197 ivshmem_update_irq(s); 198 } 199 200 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) 201 { 202 uint32_t ret = s->intrstatus; 203 204 /* reading ISR clears all interrupts */ 205 s->intrstatus = 0; 206 ivshmem_update_irq(s); 207 return ret; 208 } 209 210 static void ivshmem_io_write(void *opaque, hwaddr addr, 211 uint64_t val, unsigned size) 212 { 213 IVShmemState *s = opaque; 214 215 uint16_t dest = val >> 16; 216 uint16_t vector = val & 0xff; 217 218 addr &= 0xfc; 219 220 IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); 221 switch (addr) 222 { 223 case INTRMASK: 224 ivshmem_IntrMask_write(s, val); 225 break; 226 227 case INTRSTATUS: 228 ivshmem_IntrStatus_write(s, val); 229 break; 230 231 case DOORBELL: 232 /* check that dest VM ID is reasonable */ 233 if (dest >= s->nb_peers) { 234 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); 235 break; 236 } 237 238 /* check doorbell range */ 239 if (vector < s->peers[dest].nb_eventfds) { 240 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); 241 event_notifier_set(&s->peers[dest].eventfds[vector]); 242 } else { 243 IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n", 244 vector, dest); 245 } 246 break; 247 default: 248 IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr); 249 } 250 } 251 252 static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, 253 unsigned size) 254 { 255 256 IVShmemState *s = opaque; 257 uint32_t ret; 258 259 switch (addr) 260 { 261 case INTRMASK: 262 ret = ivshmem_IntrMask_read(s); 263 break; 264 265 case INTRSTATUS: 266 ret = ivshmem_IntrStatus_read(s); 267 break; 268 269 case IVPOSITION: 270 ret = s->vm_id; 271 break; 272 273 default: 274 IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); 275 ret = 0; 276 } 277 278 return ret; 279 } 280 281 static const MemoryRegionOps ivshmem_mmio_ops = { 282 .read = ivshmem_io_read, 283 .write = ivshmem_io_write, 284 .endianness = DEVICE_NATIVE_ENDIAN, 285 .impl = { 286 .min_access_size = 4, 287 .max_access_size = 4, 288 }, 289 }; 290 291 static void ivshmem_vector_notify(void *opaque) 292 { 293 MSIVector *entry = opaque; 294 PCIDevice *pdev = entry->pdev; 295 IVShmemState *s = IVSHMEM_COMMON(pdev); 296 int vector = entry - s->msi_vectors; 297 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 298 299 if (!event_notifier_test_and_clear(n)) { 300 return; 301 } 302 303 IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); 304 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 305 if (msix_enabled(pdev)) { 306 msix_notify(pdev, vector); 307 } 308 } else { 309 ivshmem_IntrStatus_write(s, 1); 310 } 311 } 312 313 static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, 314 MSIMessage msg) 315 { 316 IVShmemState *s = IVSHMEM_COMMON(dev); 317 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 318 MSIVector *v = &s->msi_vectors[vector]; 319 int ret; 320 321 IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); 322 323 ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); 324 if (ret < 0) { 325 return ret; 326 } 327 328 return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); 329 } 330 331 static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) 332 { 333 IVShmemState *s = IVSHMEM_COMMON(dev); 334 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 335 int ret; 336 337 IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); 338 339 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, 340 s->msi_vectors[vector].virq); 341 if (ret != 0) { 342 error_report("remove_irqfd_notifier_gsi failed"); 343 } 344 } 345 346 static void ivshmem_vector_poll(PCIDevice *dev, 347 unsigned int vector_start, 348 unsigned int vector_end) 349 { 350 IVShmemState *s = IVSHMEM_COMMON(dev); 351 unsigned int vector; 352 353 IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end); 354 355 vector_end = MIN(vector_end, s->vectors); 356 357 for (vector = vector_start; vector < vector_end; vector++) { 358 EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector]; 359 360 if (!msix_is_masked(dev, vector)) { 361 continue; 362 } 363 364 if (event_notifier_test_and_clear(notifier)) { 365 msix_set_pending(dev, vector); 366 } 367 } 368 } 369 370 static void watch_vector_notifier(IVShmemState *s, EventNotifier *n, 371 int vector) 372 { 373 int eventfd = event_notifier_get_fd(n); 374 375 assert(!s->msi_vectors[vector].pdev); 376 s->msi_vectors[vector].pdev = PCI_DEVICE(s); 377 378 qemu_set_fd_handler(eventfd, ivshmem_vector_notify, 379 NULL, &s->msi_vectors[vector]); 380 } 381 382 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) 383 { 384 memory_region_add_eventfd(&s->ivshmem_mmio, 385 DOORBELL, 386 4, 387 true, 388 (posn << 16) | i, 389 &s->peers[posn].eventfds[i]); 390 } 391 392 static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) 393 { 394 memory_region_del_eventfd(&s->ivshmem_mmio, 395 DOORBELL, 396 4, 397 true, 398 (posn << 16) | i, 399 &s->peers[posn].eventfds[i]); 400 } 401 402 static void close_peer_eventfds(IVShmemState *s, int posn) 403 { 404 int i, n; 405 406 assert(posn >= 0 && posn < s->nb_peers); 407 n = s->peers[posn].nb_eventfds; 408 409 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 410 memory_region_transaction_begin(); 411 for (i = 0; i < n; i++) { 412 ivshmem_del_eventfd(s, posn, i); 413 } 414 memory_region_transaction_commit(); 415 } 416 417 for (i = 0; i < n; i++) { 418 event_notifier_cleanup(&s->peers[posn].eventfds[i]); 419 } 420 421 g_free(s->peers[posn].eventfds); 422 s->peers[posn].nb_eventfds = 0; 423 } 424 425 static void resize_peers(IVShmemState *s, int nb_peers) 426 { 427 int old_nb_peers = s->nb_peers; 428 int i; 429 430 assert(nb_peers > old_nb_peers); 431 IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers); 432 433 s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer)); 434 s->nb_peers = nb_peers; 435 436 for (i = old_nb_peers; i < nb_peers; i++) { 437 s->peers[i].eventfds = g_new0(EventNotifier, s->vectors); 438 s->peers[i].nb_eventfds = 0; 439 } 440 } 441 442 static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, 443 Error **errp) 444 { 445 PCIDevice *pdev = PCI_DEVICE(s); 446 MSIMessage msg = msix_get_message(pdev, vector); 447 int ret; 448 449 IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); 450 assert(!s->msi_vectors[vector].pdev); 451 452 ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev); 453 if (ret < 0) { 454 error_setg(errp, "kvm_irqchip_add_msi_route failed"); 455 return; 456 } 457 458 s->msi_vectors[vector].virq = ret; 459 s->msi_vectors[vector].pdev = pdev; 460 } 461 462 static void setup_interrupt(IVShmemState *s, int vector, Error **errp) 463 { 464 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 465 bool with_irqfd = kvm_msi_via_irqfd_enabled() && 466 ivshmem_has_feature(s, IVSHMEM_MSI); 467 PCIDevice *pdev = PCI_DEVICE(s); 468 Error *err = NULL; 469 470 IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); 471 472 if (!with_irqfd) { 473 IVSHMEM_DPRINTF("with eventfd\n"); 474 watch_vector_notifier(s, n, vector); 475 } else if (msix_enabled(pdev)) { 476 IVSHMEM_DPRINTF("with irqfd\n"); 477 ivshmem_add_kvm_msi_virq(s, vector, &err); 478 if (err) { 479 error_propagate(errp, err); 480 return; 481 } 482 483 if (!msix_is_masked(pdev, vector)) { 484 kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, 485 s->msi_vectors[vector].virq); 486 /* TODO handle error */ 487 } 488 } else { 489 /* it will be delayed until msix is enabled, in write_config */ 490 IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n"); 491 } 492 } 493 494 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) 495 { 496 struct stat buf; 497 size_t size; 498 void *ptr; 499 500 if (s->ivshmem_bar2) { 501 error_setg(errp, "server sent unexpected shared memory message"); 502 close(fd); 503 return; 504 } 505 506 if (fstat(fd, &buf) < 0) { 507 error_setg_errno(errp, errno, 508 "can't determine size of shared memory sent by server"); 509 close(fd); 510 return; 511 } 512 513 size = buf.st_size; 514 515 /* Legacy cruft */ 516 if (s->legacy_size != SIZE_MAX) { 517 if (size < s->legacy_size) { 518 error_setg(errp, "server sent only %zd bytes of shared memory", 519 (size_t)buf.st_size); 520 close(fd); 521 return; 522 } 523 size = s->legacy_size; 524 } 525 526 /* mmap the region and map into the BAR2 */ 527 ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 528 if (ptr == MAP_FAILED) { 529 error_setg_errno(errp, errno, "Failed to mmap shared memory"); 530 close(fd); 531 return; 532 } 533 memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s), 534 "ivshmem.bar2", size, ptr); 535 memory_region_set_fd(&s->server_bar2, fd); 536 s->ivshmem_bar2 = &s->server_bar2; 537 } 538 539 static void process_msg_disconnect(IVShmemState *s, uint16_t posn, 540 Error **errp) 541 { 542 IVSHMEM_DPRINTF("posn %d has gone away\n", posn); 543 if (posn >= s->nb_peers || posn == s->vm_id) { 544 error_setg(errp, "invalid peer %d", posn); 545 return; 546 } 547 close_peer_eventfds(s, posn); 548 } 549 550 static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, 551 Error **errp) 552 { 553 Peer *peer = &s->peers[posn]; 554 int vector; 555 556 /* 557 * The N-th connect message for this peer comes with the file 558 * descriptor for vector N-1. Count messages to find the vector. 559 */ 560 if (peer->nb_eventfds >= s->vectors) { 561 error_setg(errp, "Too many eventfd received, device has %d vectors", 562 s->vectors); 563 close(fd); 564 return; 565 } 566 vector = peer->nb_eventfds++; 567 568 IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); 569 event_notifier_init_fd(&peer->eventfds[vector], fd); 570 fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ 571 572 if (posn == s->vm_id) { 573 setup_interrupt(s, vector, errp); 574 /* TODO do we need to handle the error? */ 575 } 576 577 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 578 ivshmem_add_eventfd(s, posn, vector); 579 } 580 } 581 582 static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) 583 { 584 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 585 586 if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { 587 error_setg(errp, "server sent invalid message %" PRId64, msg); 588 close(fd); 589 return; 590 } 591 592 if (msg == -1) { 593 process_msg_shmem(s, fd, errp); 594 return; 595 } 596 597 if (msg >= s->nb_peers) { 598 resize_peers(s, msg + 1); 599 } 600 601 if (fd >= 0) { 602 process_msg_connect(s, msg, fd, errp); 603 } else { 604 process_msg_disconnect(s, msg, errp); 605 } 606 } 607 608 static int ivshmem_can_receive(void *opaque) 609 { 610 IVShmemState *s = opaque; 611 612 assert(s->msg_buffered_bytes < sizeof(s->msg_buf)); 613 return sizeof(s->msg_buf) - s->msg_buffered_bytes; 614 } 615 616 static void ivshmem_read(void *opaque, const uint8_t *buf, int size) 617 { 618 IVShmemState *s = opaque; 619 Error *err = NULL; 620 int fd; 621 int64_t msg; 622 623 assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf)); 624 memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size); 625 s->msg_buffered_bytes += size; 626 if (s->msg_buffered_bytes < sizeof(s->msg_buf)) { 627 return; 628 } 629 msg = le64_to_cpu(s->msg_buf); 630 s->msg_buffered_bytes = 0; 631 632 fd = qemu_chr_fe_get_msgfd(s->server_chr); 633 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 634 635 process_msg(s, msg, fd, &err); 636 if (err) { 637 error_report_err(err); 638 } 639 } 640 641 static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp) 642 { 643 int64_t msg; 644 int n, ret; 645 646 n = 0; 647 do { 648 ret = qemu_chr_fe_read_all(s->server_chr, (uint8_t *)&msg + n, 649 sizeof(msg) - n); 650 if (ret < 0 && ret != -EINTR) { 651 error_setg_errno(errp, -ret, "read from server failed"); 652 return INT64_MIN; 653 } 654 n += ret; 655 } while (n < sizeof(msg)); 656 657 *pfd = qemu_chr_fe_get_msgfd(s->server_chr); 658 return msg; 659 } 660 661 static void ivshmem_recv_setup(IVShmemState *s, Error **errp) 662 { 663 Error *err = NULL; 664 int64_t msg; 665 int fd; 666 667 msg = ivshmem_recv_msg(s, &fd, &err); 668 if (err) { 669 error_propagate(errp, err); 670 return; 671 } 672 if (msg != IVSHMEM_PROTOCOL_VERSION) { 673 error_setg(errp, "server sent version %" PRId64 ", expecting %d", 674 msg, IVSHMEM_PROTOCOL_VERSION); 675 return; 676 } 677 if (fd != -1) { 678 error_setg(errp, "server sent invalid version message"); 679 return; 680 } 681 682 /* 683 * ivshmem-server sends the remaining initial messages in a fixed 684 * order, but the device has always accepted them in any order. 685 * Stay as compatible as practical, just in case people use 686 * servers that behave differently. 687 */ 688 689 /* 690 * ivshmem_device_spec.txt has always required the ID message 691 * right here, and ivshmem-server has always complied. However, 692 * older versions of the device accepted it out of order, but 693 * broke when an interrupt setup message arrived before it. 694 */ 695 msg = ivshmem_recv_msg(s, &fd, &err); 696 if (err) { 697 error_propagate(errp, err); 698 return; 699 } 700 if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) { 701 error_setg(errp, "server sent invalid ID message"); 702 return; 703 } 704 s->vm_id = msg; 705 706 /* 707 * Receive more messages until we got shared memory. 708 */ 709 do { 710 msg = ivshmem_recv_msg(s, &fd, &err); 711 if (err) { 712 error_propagate(errp, err); 713 return; 714 } 715 process_msg(s, msg, fd, &err); 716 if (err) { 717 error_propagate(errp, err); 718 return; 719 } 720 } while (msg != -1); 721 722 /* 723 * This function must either map the shared memory or fail. The 724 * loop above ensures that: it terminates normally only after it 725 * successfully processed the server's shared memory message. 726 * Assert that actually mapped the shared memory: 727 */ 728 assert(s->ivshmem_bar2); 729 } 730 731 /* Select the MSI-X vectors used by device. 732 * ivshmem maps events to vectors statically, so 733 * we just enable all vectors on init and after reset. */ 734 static void ivshmem_msix_vector_use(IVShmemState *s) 735 { 736 PCIDevice *d = PCI_DEVICE(s); 737 int i; 738 739 for (i = 0; i < s->vectors; i++) { 740 msix_vector_use(d, i); 741 } 742 } 743 744 static void ivshmem_reset(DeviceState *d) 745 { 746 IVShmemState *s = IVSHMEM_COMMON(d); 747 748 s->intrstatus = 0; 749 s->intrmask = 0; 750 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 751 ivshmem_msix_vector_use(s); 752 } 753 } 754 755 static int ivshmem_setup_interrupts(IVShmemState *s) 756 { 757 /* allocate QEMU callback data for receiving interrupts */ 758 s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector)); 759 760 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 761 if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1)) { 762 return -1; 763 } 764 765 IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); 766 ivshmem_msix_vector_use(s); 767 } 768 769 return 0; 770 } 771 772 static void ivshmem_enable_irqfd(IVShmemState *s) 773 { 774 PCIDevice *pdev = PCI_DEVICE(s); 775 int i; 776 777 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 778 Error *err = NULL; 779 780 ivshmem_add_kvm_msi_virq(s, i, &err); 781 if (err) { 782 error_report_err(err); 783 /* TODO do we need to handle the error? */ 784 } 785 } 786 787 if (msix_set_vector_notifiers(pdev, 788 ivshmem_vector_unmask, 789 ivshmem_vector_mask, 790 ivshmem_vector_poll)) { 791 error_report("ivshmem: msix_set_vector_notifiers failed"); 792 } 793 } 794 795 static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) 796 { 797 IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); 798 799 if (s->msi_vectors[vector].pdev == NULL) { 800 return; 801 } 802 803 /* it was cleaned when masked in the frontend. */ 804 kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); 805 806 s->msi_vectors[vector].pdev = NULL; 807 } 808 809 static void ivshmem_disable_irqfd(IVShmemState *s) 810 { 811 PCIDevice *pdev = PCI_DEVICE(s); 812 int i; 813 814 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 815 ivshmem_remove_kvm_msi_virq(s, i); 816 } 817 818 msix_unset_vector_notifiers(pdev); 819 } 820 821 static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, 822 uint32_t val, int len) 823 { 824 IVShmemState *s = IVSHMEM_COMMON(pdev); 825 int is_enabled, was_enabled = msix_enabled(pdev); 826 827 pci_default_write_config(pdev, address, val, len); 828 is_enabled = msix_enabled(pdev); 829 830 if (kvm_msi_via_irqfd_enabled()) { 831 if (!was_enabled && is_enabled) { 832 ivshmem_enable_irqfd(s); 833 } else if (was_enabled && !is_enabled) { 834 ivshmem_disable_irqfd(s); 835 } 836 } 837 } 838 839 static void ivshmem_common_realize(PCIDevice *dev, Error **errp) 840 { 841 IVShmemState *s = IVSHMEM_COMMON(dev); 842 Error *err = NULL; 843 uint8_t *pci_conf; 844 uint8_t attr = PCI_BASE_ADDRESS_SPACE_MEMORY | 845 PCI_BASE_ADDRESS_MEM_PREFETCH; 846 847 /* IRQFD requires MSI */ 848 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && 849 !ivshmem_has_feature(s, IVSHMEM_MSI)) { 850 error_setg(errp, "ioeventfd/irqfd requires MSI"); 851 return; 852 } 853 854 pci_conf = dev->config; 855 pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; 856 857 memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, 858 "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE); 859 860 /* region for registers*/ 861 pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, 862 &s->ivshmem_mmio); 863 864 if (!s->not_legacy_32bit) { 865 attr |= PCI_BASE_ADDRESS_MEM_TYPE_64; 866 } 867 868 if (s->hostmem != NULL) { 869 IVSHMEM_DPRINTF("using hostmem\n"); 870 871 s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem, 872 &error_abort); 873 } else { 874 assert(s->server_chr); 875 876 IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", 877 s->server_chr->filename); 878 879 /* we allocate enough space for 16 peers and grow as needed */ 880 resize_peers(s, 16); 881 882 /* 883 * Receive setup messages from server synchronously. 884 * Older versions did it asynchronously, but that creates a 885 * number of entertaining race conditions. 886 */ 887 ivshmem_recv_setup(s, &err); 888 if (err) { 889 error_propagate(errp, err); 890 return; 891 } 892 893 if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) { 894 error_setg(errp, 895 "master must connect to the server before any peers"); 896 return; 897 } 898 899 qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, 900 ivshmem_read, NULL, s); 901 902 if (ivshmem_setup_interrupts(s) < 0) { 903 error_setg(errp, "failed to initialize interrupts"); 904 return; 905 } 906 } 907 908 vmstate_register_ram(s->ivshmem_bar2, DEVICE(s)); 909 pci_register_bar(PCI_DEVICE(s), 2, attr, s->ivshmem_bar2); 910 911 if (s->master == ON_OFF_AUTO_AUTO) { 912 s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; 913 } 914 915 if (!ivshmem_is_master(s)) { 916 error_setg(&s->migration_blocker, 917 "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); 918 migrate_add_blocker(s->migration_blocker); 919 } 920 } 921 922 static void ivshmem_exit(PCIDevice *dev) 923 { 924 IVShmemState *s = IVSHMEM_COMMON(dev); 925 int i; 926 927 if (s->migration_blocker) { 928 migrate_del_blocker(s->migration_blocker); 929 error_free(s->migration_blocker); 930 } 931 932 if (memory_region_is_mapped(s->ivshmem_bar2)) { 933 if (!s->hostmem) { 934 void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2); 935 int fd; 936 937 if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) { 938 error_report("Failed to munmap shared memory %s", 939 strerror(errno)); 940 } 941 942 fd = memory_region_get_fd(s->ivshmem_bar2); 943 close(fd); 944 } 945 946 vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev)); 947 } 948 949 if (s->peers) { 950 for (i = 0; i < s->nb_peers; i++) { 951 close_peer_eventfds(s, i); 952 } 953 g_free(s->peers); 954 } 955 956 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 957 msix_uninit_exclusive_bar(dev); 958 } 959 960 g_free(s->msi_vectors); 961 } 962 963 static int ivshmem_pre_load(void *opaque) 964 { 965 IVShmemState *s = opaque; 966 967 if (!ivshmem_is_master(s)) { 968 error_report("'peer' devices are not migratable"); 969 return -EINVAL; 970 } 971 972 return 0; 973 } 974 975 static int ivshmem_post_load(void *opaque, int version_id) 976 { 977 IVShmemState *s = opaque; 978 979 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 980 ivshmem_msix_vector_use(s); 981 } 982 return 0; 983 } 984 985 static void ivshmem_common_class_init(ObjectClass *klass, void *data) 986 { 987 DeviceClass *dc = DEVICE_CLASS(klass); 988 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 989 990 k->realize = ivshmem_common_realize; 991 k->exit = ivshmem_exit; 992 k->config_write = ivshmem_write_config; 993 k->vendor_id = PCI_VENDOR_ID_IVSHMEM; 994 k->device_id = PCI_DEVICE_ID_IVSHMEM; 995 k->class_id = PCI_CLASS_MEMORY_RAM; 996 k->revision = 1; 997 dc->reset = ivshmem_reset; 998 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 999 dc->desc = "Inter-VM shared memory"; 1000 } 1001 1002 static const TypeInfo ivshmem_common_info = { 1003 .name = TYPE_IVSHMEM_COMMON, 1004 .parent = TYPE_PCI_DEVICE, 1005 .instance_size = sizeof(IVShmemState), 1006 .abstract = true, 1007 .class_init = ivshmem_common_class_init, 1008 }; 1009 1010 static void ivshmem_check_memdev_is_busy(Object *obj, const char *name, 1011 Object *val, Error **errp) 1012 { 1013 MemoryRegion *mr; 1014 1015 mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), &error_abort); 1016 if (memory_region_is_mapped(mr)) { 1017 char *path = object_get_canonical_path_component(val); 1018 error_setg(errp, "can't use already busy memdev: %s", path); 1019 g_free(path); 1020 } else { 1021 qdev_prop_allow_set_link_before_realize(obj, name, val, errp); 1022 } 1023 } 1024 1025 static const VMStateDescription ivshmem_plain_vmsd = { 1026 .name = TYPE_IVSHMEM_PLAIN, 1027 .version_id = 0, 1028 .minimum_version_id = 0, 1029 .pre_load = ivshmem_pre_load, 1030 .post_load = ivshmem_post_load, 1031 .fields = (VMStateField[]) { 1032 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1033 VMSTATE_UINT32(intrstatus, IVShmemState), 1034 VMSTATE_UINT32(intrmask, IVShmemState), 1035 VMSTATE_END_OF_LIST() 1036 }, 1037 }; 1038 1039 static Property ivshmem_plain_properties[] = { 1040 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1041 DEFINE_PROP_END_OF_LIST(), 1042 }; 1043 1044 static void ivshmem_plain_init(Object *obj) 1045 { 1046 IVShmemState *s = IVSHMEM_PLAIN(obj); 1047 1048 object_property_add_link(obj, "memdev", TYPE_MEMORY_BACKEND, 1049 (Object **)&s->hostmem, 1050 ivshmem_check_memdev_is_busy, 1051 OBJ_PROP_LINK_UNREF_ON_RELEASE, 1052 &error_abort); 1053 } 1054 1055 static void ivshmem_plain_realize(PCIDevice *dev, Error **errp) 1056 { 1057 IVShmemState *s = IVSHMEM_COMMON(dev); 1058 1059 if (!s->hostmem) { 1060 error_setg(errp, "You must specify a 'memdev'"); 1061 return; 1062 } 1063 1064 ivshmem_common_realize(dev, errp); 1065 } 1066 1067 static void ivshmem_plain_class_init(ObjectClass *klass, void *data) 1068 { 1069 DeviceClass *dc = DEVICE_CLASS(klass); 1070 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1071 1072 k->realize = ivshmem_plain_realize; 1073 dc->props = ivshmem_plain_properties; 1074 dc->vmsd = &ivshmem_plain_vmsd; 1075 } 1076 1077 static const TypeInfo ivshmem_plain_info = { 1078 .name = TYPE_IVSHMEM_PLAIN, 1079 .parent = TYPE_IVSHMEM_COMMON, 1080 .instance_size = sizeof(IVShmemState), 1081 .instance_init = ivshmem_plain_init, 1082 .class_init = ivshmem_plain_class_init, 1083 }; 1084 1085 static const VMStateDescription ivshmem_doorbell_vmsd = { 1086 .name = TYPE_IVSHMEM_DOORBELL, 1087 .version_id = 0, 1088 .minimum_version_id = 0, 1089 .pre_load = ivshmem_pre_load, 1090 .post_load = ivshmem_post_load, 1091 .fields = (VMStateField[]) { 1092 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1093 VMSTATE_MSIX(parent_obj, IVShmemState), 1094 VMSTATE_UINT32(intrstatus, IVShmemState), 1095 VMSTATE_UINT32(intrmask, IVShmemState), 1096 VMSTATE_END_OF_LIST() 1097 }, 1098 }; 1099 1100 static Property ivshmem_doorbell_properties[] = { 1101 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1102 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1103 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1104 true), 1105 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1106 DEFINE_PROP_END_OF_LIST(), 1107 }; 1108 1109 static void ivshmem_doorbell_init(Object *obj) 1110 { 1111 IVShmemState *s = IVSHMEM_DOORBELL(obj); 1112 1113 s->features |= (1 << IVSHMEM_MSI); 1114 s->legacy_size = SIZE_MAX; /* whatever the server sends */ 1115 } 1116 1117 static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp) 1118 { 1119 IVShmemState *s = IVSHMEM_COMMON(dev); 1120 1121 if (!s->server_chr) { 1122 error_setg(errp, "You must specify a 'chardev'"); 1123 return; 1124 } 1125 1126 ivshmem_common_realize(dev, errp); 1127 } 1128 1129 static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) 1130 { 1131 DeviceClass *dc = DEVICE_CLASS(klass); 1132 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1133 1134 k->realize = ivshmem_doorbell_realize; 1135 dc->props = ivshmem_doorbell_properties; 1136 dc->vmsd = &ivshmem_doorbell_vmsd; 1137 } 1138 1139 static const TypeInfo ivshmem_doorbell_info = { 1140 .name = TYPE_IVSHMEM_DOORBELL, 1141 .parent = TYPE_IVSHMEM_COMMON, 1142 .instance_size = sizeof(IVShmemState), 1143 .instance_init = ivshmem_doorbell_init, 1144 .class_init = ivshmem_doorbell_class_init, 1145 }; 1146 1147 static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id) 1148 { 1149 IVShmemState *s = opaque; 1150 PCIDevice *pdev = PCI_DEVICE(s); 1151 int ret; 1152 1153 IVSHMEM_DPRINTF("ivshmem_load_old\n"); 1154 1155 if (version_id != 0) { 1156 return -EINVAL; 1157 } 1158 1159 ret = ivshmem_pre_load(s); 1160 if (ret) { 1161 return ret; 1162 } 1163 1164 ret = pci_device_load(pdev, f); 1165 if (ret) { 1166 return ret; 1167 } 1168 1169 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 1170 msix_load(pdev, f); 1171 ivshmem_msix_vector_use(s); 1172 } else { 1173 s->intrstatus = qemu_get_be32(f); 1174 s->intrmask = qemu_get_be32(f); 1175 } 1176 1177 return 0; 1178 } 1179 1180 static bool test_msix(void *opaque, int version_id) 1181 { 1182 IVShmemState *s = opaque; 1183 1184 return ivshmem_has_feature(s, IVSHMEM_MSI); 1185 } 1186 1187 static bool test_no_msix(void *opaque, int version_id) 1188 { 1189 return !test_msix(opaque, version_id); 1190 } 1191 1192 static const VMStateDescription ivshmem_vmsd = { 1193 .name = "ivshmem", 1194 .version_id = 1, 1195 .minimum_version_id = 1, 1196 .pre_load = ivshmem_pre_load, 1197 .post_load = ivshmem_post_load, 1198 .fields = (VMStateField[]) { 1199 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1200 1201 VMSTATE_MSIX_TEST(parent_obj, IVShmemState, test_msix), 1202 VMSTATE_UINT32_TEST(intrstatus, IVShmemState, test_no_msix), 1203 VMSTATE_UINT32_TEST(intrmask, IVShmemState, test_no_msix), 1204 1205 VMSTATE_END_OF_LIST() 1206 }, 1207 .load_state_old = ivshmem_load_old, 1208 .minimum_version_id_old = 0 1209 }; 1210 1211 static Property ivshmem_properties[] = { 1212 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1213 DEFINE_PROP_STRING("size", IVShmemState, sizearg), 1214 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1215 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1216 false), 1217 DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true), 1218 DEFINE_PROP_STRING("shm", IVShmemState, shmobj), 1219 DEFINE_PROP_STRING("role", IVShmemState, role), 1220 DEFINE_PROP_UINT32("use64", IVShmemState, not_legacy_32bit, 1), 1221 DEFINE_PROP_END_OF_LIST(), 1222 }; 1223 1224 static void desugar_shm(IVShmemState *s) 1225 { 1226 Object *obj; 1227 char *path; 1228 1229 obj = object_new("memory-backend-file"); 1230 path = g_strdup_printf("/dev/shm/%s", s->shmobj); 1231 object_property_set_str(obj, path, "mem-path", &error_abort); 1232 g_free(path); 1233 object_property_set_int(obj, s->legacy_size, "size", &error_abort); 1234 object_property_set_bool(obj, true, "share", &error_abort); 1235 object_property_add_child(OBJECT(s), "internal-shm-backend", obj, 1236 &error_abort); 1237 user_creatable_complete(obj, &error_abort); 1238 s->hostmem = MEMORY_BACKEND(obj); 1239 } 1240 1241 static void ivshmem_realize(PCIDevice *dev, Error **errp) 1242 { 1243 IVShmemState *s = IVSHMEM_COMMON(dev); 1244 1245 if (!qtest_enabled()) { 1246 error_report("ivshmem is deprecated, please use ivshmem-plain" 1247 " or ivshmem-doorbell instead"); 1248 } 1249 1250 if (!!s->server_chr + !!s->shmobj != 1) { 1251 error_setg(errp, "You must specify either 'shm' or 'chardev'"); 1252 return; 1253 } 1254 1255 if (s->sizearg == NULL) { 1256 s->legacy_size = 4 << 20; /* 4 MB default */ 1257 } else { 1258 char *end; 1259 int64_t size = qemu_strtosz(s->sizearg, &end); 1260 if (size < 0 || (size_t)size != size || *end != '\0' 1261 || !is_power_of_2(size)) { 1262 error_setg(errp, "Invalid size %s", s->sizearg); 1263 return; 1264 } 1265 s->legacy_size = size; 1266 } 1267 1268 /* check that role is reasonable */ 1269 if (s->role) { 1270 if (strncmp(s->role, "peer", 5) == 0) { 1271 s->master = ON_OFF_AUTO_OFF; 1272 } else if (strncmp(s->role, "master", 7) == 0) { 1273 s->master = ON_OFF_AUTO_ON; 1274 } else { 1275 error_setg(errp, "'role' must be 'peer' or 'master'"); 1276 return; 1277 } 1278 } else { 1279 s->master = ON_OFF_AUTO_AUTO; 1280 } 1281 1282 if (s->shmobj) { 1283 desugar_shm(s); 1284 } 1285 1286 /* 1287 * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a 1288 * bald-faced lie then. But it's a backwards compatible lie. 1289 */ 1290 pci_config_set_interrupt_pin(dev->config, 1); 1291 1292 ivshmem_common_realize(dev, errp); 1293 } 1294 1295 static void ivshmem_class_init(ObjectClass *klass, void *data) 1296 { 1297 DeviceClass *dc = DEVICE_CLASS(klass); 1298 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1299 1300 k->realize = ivshmem_realize; 1301 k->revision = 0; 1302 dc->desc = "Inter-VM shared memory (legacy)"; 1303 dc->props = ivshmem_properties; 1304 dc->vmsd = &ivshmem_vmsd; 1305 } 1306 1307 static const TypeInfo ivshmem_info = { 1308 .name = TYPE_IVSHMEM, 1309 .parent = TYPE_IVSHMEM_COMMON, 1310 .instance_size = sizeof(IVShmemState), 1311 .class_init = ivshmem_class_init, 1312 }; 1313 1314 static void ivshmem_register_types(void) 1315 { 1316 type_register_static(&ivshmem_common_info); 1317 type_register_static(&ivshmem_plain_info); 1318 type_register_static(&ivshmem_doorbell_info); 1319 type_register_static(&ivshmem_info); 1320 } 1321 1322 type_init(ivshmem_register_types) 1323