1 /* 2 * Inter-VM Shared Memory PCI device. 3 * 4 * Author: 5 * Cam Macdonell <cam@cs.ualberta.ca> 6 * 7 * Based On: cirrus_vga.c 8 * Copyright (c) 2004 Fabrice Bellard 9 * Copyright (c) 2004 Makoto Suzuki (suzu) 10 * 11 * and rtl8139.c 12 * Copyright (c) 2006 Igor Kovalenko 13 * 14 * This code is licensed under the GNU GPL v2. 15 * 16 * Contributions after 2012-01-13 are licensed under the terms of the 17 * GNU GPL, version 2 or (at your option) any later version. 18 */ 19 #include "qemu/osdep.h" 20 #include "qemu/units.h" 21 #include "qapi/error.h" 22 #include "qemu/cutils.h" 23 #include "hw/hw.h" 24 #include "hw/pci/pci.h" 25 #include "hw/pci/msi.h" 26 #include "hw/pci/msix.h" 27 #include "sysemu/kvm.h" 28 #include "migration/blocker.h" 29 #include "qemu/error-report.h" 30 #include "qemu/event_notifier.h" 31 #include "qom/object_interfaces.h" 32 #include "chardev/char-fe.h" 33 #include "sysemu/hostmem.h" 34 #include "sysemu/qtest.h" 35 #include "qapi/visitor.h" 36 37 #include "hw/misc/ivshmem.h" 38 39 #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET 40 #define PCI_DEVICE_ID_IVSHMEM 0x1110 41 42 #define IVSHMEM_MAX_PEERS UINT16_MAX 43 #define IVSHMEM_IOEVENTFD 0 44 #define IVSHMEM_MSI 1 45 46 #define IVSHMEM_REG_BAR_SIZE 0x100 47 48 #define IVSHMEM_DEBUG 0 49 #define IVSHMEM_DPRINTF(fmt, ...) \ 50 do { \ 51 if (IVSHMEM_DEBUG) { \ 52 printf("IVSHMEM: " fmt, ## __VA_ARGS__); \ 53 } \ 54 } while (0) 55 56 #define TYPE_IVSHMEM_COMMON "ivshmem-common" 57 #define IVSHMEM_COMMON(obj) \ 58 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON) 59 60 #define TYPE_IVSHMEM_PLAIN "ivshmem-plain" 61 #define IVSHMEM_PLAIN(obj) \ 62 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN) 63 64 #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell" 65 #define IVSHMEM_DOORBELL(obj) \ 66 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL) 67 68 #define TYPE_IVSHMEM "ivshmem" 69 #define IVSHMEM(obj) \ 70 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) 71 72 typedef struct Peer { 73 int nb_eventfds; 74 EventNotifier *eventfds; 75 } Peer; 76 77 typedef struct MSIVector { 78 PCIDevice *pdev; 79 int virq; 80 bool unmasked; 81 } MSIVector; 82 83 typedef struct IVShmemState { 84 /*< private >*/ 85 PCIDevice parent_obj; 86 /*< public >*/ 87 88 uint32_t features; 89 90 /* exactly one of these two may be set */ 91 HostMemoryBackend *hostmem; /* with interrupts */ 92 CharBackend server_chr; /* without interrupts */ 93 94 /* registers */ 95 uint32_t intrmask; 96 uint32_t intrstatus; 97 int vm_id; 98 99 /* BARs */ 100 MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */ 101 MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ 102 MemoryRegion server_bar2; /* used with server_chr */ 103 104 /* interrupt support */ 105 Peer *peers; 106 int nb_peers; /* space in @peers[] */ 107 uint32_t vectors; 108 MSIVector *msi_vectors; 109 uint64_t msg_buf; /* buffer for receiving server messages */ 110 int msg_buffered_bytes; /* #bytes in @msg_buf */ 111 112 /* migration stuff */ 113 OnOffAuto master; 114 Error *migration_blocker; 115 116 /* legacy cruft */ 117 char *role; 118 char *shmobj; 119 char *sizearg; 120 size_t legacy_size; 121 uint32_t not_legacy_32bit; 122 } IVShmemState; 123 124 /* registers for the Inter-VM shared memory device */ 125 enum ivshmem_registers { 126 INTRMASK = 0, 127 INTRSTATUS = 4, 128 IVPOSITION = 8, 129 DOORBELL = 12, 130 }; 131 132 static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, 133 unsigned int feature) { 134 return (ivs->features & (1 << feature)); 135 } 136 137 static inline bool ivshmem_is_master(IVShmemState *s) 138 { 139 assert(s->master != ON_OFF_AUTO_AUTO); 140 return s->master == ON_OFF_AUTO_ON; 141 } 142 143 static void ivshmem_update_irq(IVShmemState *s) 144 { 145 PCIDevice *d = PCI_DEVICE(s); 146 uint32_t isr = s->intrstatus & s->intrmask; 147 148 /* 149 * Do nothing unless the device actually uses INTx. Here's how 150 * the device variants signal interrupts, what they put in PCI 151 * config space: 152 * Device variant Interrupt Interrupt Pin MSI-X cap. 153 * ivshmem-plain none 0 no 154 * ivshmem-doorbell MSI-X 1 yes(1) 155 * ivshmem,msi=off INTx 1 no 156 * ivshmem,msi=on MSI-X 1(2) yes(1) 157 * (1) if guest enabled MSI-X 158 * (2) the device lies 159 * Leads to the condition for doing nothing: 160 */ 161 if (ivshmem_has_feature(s, IVSHMEM_MSI) 162 || !d->config[PCI_INTERRUPT_PIN]) { 163 return; 164 } 165 166 /* don't print ISR resets */ 167 if (isr) { 168 IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", 169 isr ? 1 : 0, s->intrstatus, s->intrmask); 170 } 171 172 pci_set_irq(d, isr != 0); 173 } 174 175 static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) 176 { 177 IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); 178 179 s->intrmask = val; 180 ivshmem_update_irq(s); 181 } 182 183 static uint32_t ivshmem_IntrMask_read(IVShmemState *s) 184 { 185 uint32_t ret = s->intrmask; 186 187 IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); 188 return ret; 189 } 190 191 static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) 192 { 193 IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); 194 195 s->intrstatus = val; 196 ivshmem_update_irq(s); 197 } 198 199 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) 200 { 201 uint32_t ret = s->intrstatus; 202 203 /* reading ISR clears all interrupts */ 204 s->intrstatus = 0; 205 ivshmem_update_irq(s); 206 return ret; 207 } 208 209 static void ivshmem_io_write(void *opaque, hwaddr addr, 210 uint64_t val, unsigned size) 211 { 212 IVShmemState *s = opaque; 213 214 uint16_t dest = val >> 16; 215 uint16_t vector = val & 0xff; 216 217 addr &= 0xfc; 218 219 IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); 220 switch (addr) 221 { 222 case INTRMASK: 223 ivshmem_IntrMask_write(s, val); 224 break; 225 226 case INTRSTATUS: 227 ivshmem_IntrStatus_write(s, val); 228 break; 229 230 case DOORBELL: 231 /* check that dest VM ID is reasonable */ 232 if (dest >= s->nb_peers) { 233 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); 234 break; 235 } 236 237 /* check doorbell range */ 238 if (vector < s->peers[dest].nb_eventfds) { 239 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); 240 event_notifier_set(&s->peers[dest].eventfds[vector]); 241 } else { 242 IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n", 243 vector, dest); 244 } 245 break; 246 default: 247 IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr); 248 } 249 } 250 251 static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, 252 unsigned size) 253 { 254 255 IVShmemState *s = opaque; 256 uint32_t ret; 257 258 switch (addr) 259 { 260 case INTRMASK: 261 ret = ivshmem_IntrMask_read(s); 262 break; 263 264 case INTRSTATUS: 265 ret = ivshmem_IntrStatus_read(s); 266 break; 267 268 case IVPOSITION: 269 ret = s->vm_id; 270 break; 271 272 default: 273 IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); 274 ret = 0; 275 } 276 277 return ret; 278 } 279 280 static const MemoryRegionOps ivshmem_mmio_ops = { 281 .read = ivshmem_io_read, 282 .write = ivshmem_io_write, 283 .endianness = DEVICE_NATIVE_ENDIAN, 284 .impl = { 285 .min_access_size = 4, 286 .max_access_size = 4, 287 }, 288 }; 289 290 static void ivshmem_vector_notify(void *opaque) 291 { 292 MSIVector *entry = opaque; 293 PCIDevice *pdev = entry->pdev; 294 IVShmemState *s = IVSHMEM_COMMON(pdev); 295 int vector = entry - s->msi_vectors; 296 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 297 298 if (!event_notifier_test_and_clear(n)) { 299 return; 300 } 301 302 IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); 303 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 304 if (msix_enabled(pdev)) { 305 msix_notify(pdev, vector); 306 } 307 } else { 308 ivshmem_IntrStatus_write(s, 1); 309 } 310 } 311 312 static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, 313 MSIMessage msg) 314 { 315 IVShmemState *s = IVSHMEM_COMMON(dev); 316 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 317 MSIVector *v = &s->msi_vectors[vector]; 318 int ret; 319 320 IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); 321 if (!v->pdev) { 322 error_report("ivshmem: vector %d route does not exist", vector); 323 return -EINVAL; 324 } 325 assert(!v->unmasked); 326 327 ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); 328 if (ret < 0) { 329 return ret; 330 } 331 kvm_irqchip_commit_routes(kvm_state); 332 333 ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); 334 if (ret < 0) { 335 return ret; 336 } 337 v->unmasked = true; 338 339 return 0; 340 } 341 342 static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) 343 { 344 IVShmemState *s = IVSHMEM_COMMON(dev); 345 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 346 MSIVector *v = &s->msi_vectors[vector]; 347 int ret; 348 349 IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); 350 if (!v->pdev) { 351 error_report("ivshmem: vector %d route does not exist", vector); 352 return; 353 } 354 assert(v->unmasked); 355 356 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq); 357 if (ret < 0) { 358 error_report("remove_irqfd_notifier_gsi failed"); 359 return; 360 } 361 v->unmasked = false; 362 } 363 364 static void ivshmem_vector_poll(PCIDevice *dev, 365 unsigned int vector_start, 366 unsigned int vector_end) 367 { 368 IVShmemState *s = IVSHMEM_COMMON(dev); 369 unsigned int vector; 370 371 IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end); 372 373 vector_end = MIN(vector_end, s->vectors); 374 375 for (vector = vector_start; vector < vector_end; vector++) { 376 EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector]; 377 378 if (!msix_is_masked(dev, vector)) { 379 continue; 380 } 381 382 if (event_notifier_test_and_clear(notifier)) { 383 msix_set_pending(dev, vector); 384 } 385 } 386 } 387 388 static void watch_vector_notifier(IVShmemState *s, EventNotifier *n, 389 int vector) 390 { 391 int eventfd = event_notifier_get_fd(n); 392 393 assert(!s->msi_vectors[vector].pdev); 394 s->msi_vectors[vector].pdev = PCI_DEVICE(s); 395 396 qemu_set_fd_handler(eventfd, ivshmem_vector_notify, 397 NULL, &s->msi_vectors[vector]); 398 } 399 400 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) 401 { 402 memory_region_add_eventfd(&s->ivshmem_mmio, 403 DOORBELL, 404 4, 405 true, 406 (posn << 16) | i, 407 &s->peers[posn].eventfds[i]); 408 } 409 410 static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) 411 { 412 memory_region_del_eventfd(&s->ivshmem_mmio, 413 DOORBELL, 414 4, 415 true, 416 (posn << 16) | i, 417 &s->peers[posn].eventfds[i]); 418 } 419 420 static void close_peer_eventfds(IVShmemState *s, int posn) 421 { 422 int i, n; 423 424 assert(posn >= 0 && posn < s->nb_peers); 425 n = s->peers[posn].nb_eventfds; 426 427 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 428 memory_region_transaction_begin(); 429 for (i = 0; i < n; i++) { 430 ivshmem_del_eventfd(s, posn, i); 431 } 432 memory_region_transaction_commit(); 433 } 434 435 for (i = 0; i < n; i++) { 436 event_notifier_cleanup(&s->peers[posn].eventfds[i]); 437 } 438 439 g_free(s->peers[posn].eventfds); 440 s->peers[posn].nb_eventfds = 0; 441 } 442 443 static void resize_peers(IVShmemState *s, int nb_peers) 444 { 445 int old_nb_peers = s->nb_peers; 446 int i; 447 448 assert(nb_peers > old_nb_peers); 449 IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers); 450 451 s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer)); 452 s->nb_peers = nb_peers; 453 454 for (i = old_nb_peers; i < nb_peers; i++) { 455 s->peers[i].eventfds = g_new0(EventNotifier, s->vectors); 456 s->peers[i].nb_eventfds = 0; 457 } 458 } 459 460 static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, 461 Error **errp) 462 { 463 PCIDevice *pdev = PCI_DEVICE(s); 464 int ret; 465 466 IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); 467 assert(!s->msi_vectors[vector].pdev); 468 469 ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev); 470 if (ret < 0) { 471 error_setg(errp, "kvm_irqchip_add_msi_route failed"); 472 return; 473 } 474 475 s->msi_vectors[vector].virq = ret; 476 s->msi_vectors[vector].pdev = pdev; 477 } 478 479 static void setup_interrupt(IVShmemState *s, int vector, Error **errp) 480 { 481 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 482 bool with_irqfd = kvm_msi_via_irqfd_enabled() && 483 ivshmem_has_feature(s, IVSHMEM_MSI); 484 PCIDevice *pdev = PCI_DEVICE(s); 485 Error *err = NULL; 486 487 IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); 488 489 if (!with_irqfd) { 490 IVSHMEM_DPRINTF("with eventfd\n"); 491 watch_vector_notifier(s, n, vector); 492 } else if (msix_enabled(pdev)) { 493 IVSHMEM_DPRINTF("with irqfd\n"); 494 ivshmem_add_kvm_msi_virq(s, vector, &err); 495 if (err) { 496 error_propagate(errp, err); 497 return; 498 } 499 500 if (!msix_is_masked(pdev, vector)) { 501 kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, 502 s->msi_vectors[vector].virq); 503 /* TODO handle error */ 504 } 505 } else { 506 /* it will be delayed until msix is enabled, in write_config */ 507 IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n"); 508 } 509 } 510 511 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) 512 { 513 Error *local_err = NULL; 514 struct stat buf; 515 size_t size; 516 517 if (s->ivshmem_bar2) { 518 error_setg(errp, "server sent unexpected shared memory message"); 519 close(fd); 520 return; 521 } 522 523 if (fstat(fd, &buf) < 0) { 524 error_setg_errno(errp, errno, 525 "can't determine size of shared memory sent by server"); 526 close(fd); 527 return; 528 } 529 530 size = buf.st_size; 531 532 /* Legacy cruft */ 533 if (s->legacy_size != SIZE_MAX) { 534 if (size < s->legacy_size) { 535 error_setg(errp, "server sent only %zd bytes of shared memory", 536 (size_t)buf.st_size); 537 close(fd); 538 return; 539 } 540 size = s->legacy_size; 541 } 542 543 /* mmap the region and map into the BAR2 */ 544 memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), 545 "ivshmem.bar2", size, true, fd, &local_err); 546 if (local_err) { 547 error_propagate(errp, local_err); 548 return; 549 } 550 551 s->ivshmem_bar2 = &s->server_bar2; 552 } 553 554 static void process_msg_disconnect(IVShmemState *s, uint16_t posn, 555 Error **errp) 556 { 557 IVSHMEM_DPRINTF("posn %d has gone away\n", posn); 558 if (posn >= s->nb_peers || posn == s->vm_id) { 559 error_setg(errp, "invalid peer %d", posn); 560 return; 561 } 562 close_peer_eventfds(s, posn); 563 } 564 565 static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, 566 Error **errp) 567 { 568 Peer *peer = &s->peers[posn]; 569 int vector; 570 571 /* 572 * The N-th connect message for this peer comes with the file 573 * descriptor for vector N-1. Count messages to find the vector. 574 */ 575 if (peer->nb_eventfds >= s->vectors) { 576 error_setg(errp, "Too many eventfd received, device has %d vectors", 577 s->vectors); 578 close(fd); 579 return; 580 } 581 vector = peer->nb_eventfds++; 582 583 IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); 584 event_notifier_init_fd(&peer->eventfds[vector], fd); 585 fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ 586 587 if (posn == s->vm_id) { 588 setup_interrupt(s, vector, errp); 589 /* TODO do we need to handle the error? */ 590 } 591 592 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 593 ivshmem_add_eventfd(s, posn, vector); 594 } 595 } 596 597 static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) 598 { 599 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 600 601 if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { 602 error_setg(errp, "server sent invalid message %" PRId64, msg); 603 close(fd); 604 return; 605 } 606 607 if (msg == -1) { 608 process_msg_shmem(s, fd, errp); 609 return; 610 } 611 612 if (msg >= s->nb_peers) { 613 resize_peers(s, msg + 1); 614 } 615 616 if (fd >= 0) { 617 process_msg_connect(s, msg, fd, errp); 618 } else { 619 process_msg_disconnect(s, msg, errp); 620 } 621 } 622 623 static int ivshmem_can_receive(void *opaque) 624 { 625 IVShmemState *s = opaque; 626 627 assert(s->msg_buffered_bytes < sizeof(s->msg_buf)); 628 return sizeof(s->msg_buf) - s->msg_buffered_bytes; 629 } 630 631 static void ivshmem_read(void *opaque, const uint8_t *buf, int size) 632 { 633 IVShmemState *s = opaque; 634 Error *err = NULL; 635 int fd; 636 int64_t msg; 637 638 assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf)); 639 memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size); 640 s->msg_buffered_bytes += size; 641 if (s->msg_buffered_bytes < sizeof(s->msg_buf)) { 642 return; 643 } 644 msg = le64_to_cpu(s->msg_buf); 645 s->msg_buffered_bytes = 0; 646 647 fd = qemu_chr_fe_get_msgfd(&s->server_chr); 648 649 process_msg(s, msg, fd, &err); 650 if (err) { 651 error_report_err(err); 652 } 653 } 654 655 static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp) 656 { 657 int64_t msg; 658 int n, ret; 659 660 n = 0; 661 do { 662 ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n, 663 sizeof(msg) - n); 664 if (ret < 0) { 665 if (ret == -EINTR) { 666 continue; 667 } 668 error_setg_errno(errp, -ret, "read from server failed"); 669 return INT64_MIN; 670 } 671 n += ret; 672 } while (n < sizeof(msg)); 673 674 *pfd = qemu_chr_fe_get_msgfd(&s->server_chr); 675 return le64_to_cpu(msg); 676 } 677 678 static void ivshmem_recv_setup(IVShmemState *s, Error **errp) 679 { 680 Error *err = NULL; 681 int64_t msg; 682 int fd; 683 684 msg = ivshmem_recv_msg(s, &fd, &err); 685 if (err) { 686 error_propagate(errp, err); 687 return; 688 } 689 if (msg != IVSHMEM_PROTOCOL_VERSION) { 690 error_setg(errp, "server sent version %" PRId64 ", expecting %d", 691 msg, IVSHMEM_PROTOCOL_VERSION); 692 return; 693 } 694 if (fd != -1) { 695 error_setg(errp, "server sent invalid version message"); 696 return; 697 } 698 699 /* 700 * ivshmem-server sends the remaining initial messages in a fixed 701 * order, but the device has always accepted them in any order. 702 * Stay as compatible as practical, just in case people use 703 * servers that behave differently. 704 */ 705 706 /* 707 * ivshmem_device_spec.txt has always required the ID message 708 * right here, and ivshmem-server has always complied. However, 709 * older versions of the device accepted it out of order, but 710 * broke when an interrupt setup message arrived before it. 711 */ 712 msg = ivshmem_recv_msg(s, &fd, &err); 713 if (err) { 714 error_propagate(errp, err); 715 return; 716 } 717 if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) { 718 error_setg(errp, "server sent invalid ID message"); 719 return; 720 } 721 s->vm_id = msg; 722 723 /* 724 * Receive more messages until we got shared memory. 725 */ 726 do { 727 msg = ivshmem_recv_msg(s, &fd, &err); 728 if (err) { 729 error_propagate(errp, err); 730 return; 731 } 732 process_msg(s, msg, fd, &err); 733 if (err) { 734 error_propagate(errp, err); 735 return; 736 } 737 } while (msg != -1); 738 739 /* 740 * This function must either map the shared memory or fail. The 741 * loop above ensures that: it terminates normally only after it 742 * successfully processed the server's shared memory message. 743 * Assert that actually mapped the shared memory: 744 */ 745 assert(s->ivshmem_bar2); 746 } 747 748 /* Select the MSI-X vectors used by device. 749 * ivshmem maps events to vectors statically, so 750 * we just enable all vectors on init and after reset. */ 751 static void ivshmem_msix_vector_use(IVShmemState *s) 752 { 753 PCIDevice *d = PCI_DEVICE(s); 754 int i; 755 756 for (i = 0; i < s->vectors; i++) { 757 msix_vector_use(d, i); 758 } 759 } 760 761 static void ivshmem_disable_irqfd(IVShmemState *s); 762 763 static void ivshmem_reset(DeviceState *d) 764 { 765 IVShmemState *s = IVSHMEM_COMMON(d); 766 767 ivshmem_disable_irqfd(s); 768 769 s->intrstatus = 0; 770 s->intrmask = 0; 771 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 772 ivshmem_msix_vector_use(s); 773 } 774 } 775 776 static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp) 777 { 778 /* allocate QEMU callback data for receiving interrupts */ 779 s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector)); 780 781 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 782 if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) { 783 return -1; 784 } 785 786 IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); 787 ivshmem_msix_vector_use(s); 788 } 789 790 return 0; 791 } 792 793 static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) 794 { 795 IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); 796 797 if (s->msi_vectors[vector].pdev == NULL) { 798 return; 799 } 800 801 /* it was cleaned when masked in the frontend. */ 802 kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); 803 804 s->msi_vectors[vector].pdev = NULL; 805 } 806 807 static void ivshmem_enable_irqfd(IVShmemState *s) 808 { 809 PCIDevice *pdev = PCI_DEVICE(s); 810 int i; 811 812 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 813 Error *err = NULL; 814 815 ivshmem_add_kvm_msi_virq(s, i, &err); 816 if (err) { 817 error_report_err(err); 818 goto undo; 819 } 820 } 821 822 if (msix_set_vector_notifiers(pdev, 823 ivshmem_vector_unmask, 824 ivshmem_vector_mask, 825 ivshmem_vector_poll)) { 826 error_report("ivshmem: msix_set_vector_notifiers failed"); 827 goto undo; 828 } 829 return; 830 831 undo: 832 while (--i >= 0) { 833 ivshmem_remove_kvm_msi_virq(s, i); 834 } 835 } 836 837 static void ivshmem_disable_irqfd(IVShmemState *s) 838 { 839 PCIDevice *pdev = PCI_DEVICE(s); 840 int i; 841 842 if (!pdev->msix_vector_use_notifier) { 843 return; 844 } 845 846 msix_unset_vector_notifiers(pdev); 847 848 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 849 /* 850 * MSI-X is already disabled here so msix_unset_vector_notifiers() 851 * didn't call our release notifier. Do it now to keep our masks and 852 * unmasks balanced. 853 */ 854 if (s->msi_vectors[i].unmasked) { 855 ivshmem_vector_mask(pdev, i); 856 } 857 ivshmem_remove_kvm_msi_virq(s, i); 858 } 859 860 } 861 862 static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, 863 uint32_t val, int len) 864 { 865 IVShmemState *s = IVSHMEM_COMMON(pdev); 866 int is_enabled, was_enabled = msix_enabled(pdev); 867 868 pci_default_write_config(pdev, address, val, len); 869 is_enabled = msix_enabled(pdev); 870 871 if (kvm_msi_via_irqfd_enabled()) { 872 if (!was_enabled && is_enabled) { 873 ivshmem_enable_irqfd(s); 874 } else if (was_enabled && !is_enabled) { 875 ivshmem_disable_irqfd(s); 876 } 877 } 878 } 879 880 static void ivshmem_common_realize(PCIDevice *dev, Error **errp) 881 { 882 IVShmemState *s = IVSHMEM_COMMON(dev); 883 Error *err = NULL; 884 uint8_t *pci_conf; 885 uint8_t attr = PCI_BASE_ADDRESS_SPACE_MEMORY | 886 PCI_BASE_ADDRESS_MEM_PREFETCH; 887 Error *local_err = NULL; 888 889 /* IRQFD requires MSI */ 890 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && 891 !ivshmem_has_feature(s, IVSHMEM_MSI)) { 892 error_setg(errp, "ioeventfd/irqfd requires MSI"); 893 return; 894 } 895 896 pci_conf = dev->config; 897 pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; 898 899 memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, 900 "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE); 901 902 /* region for registers*/ 903 pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, 904 &s->ivshmem_mmio); 905 906 if (s->not_legacy_32bit) { 907 attr |= PCI_BASE_ADDRESS_MEM_TYPE_64; 908 } 909 910 if (s->hostmem != NULL) { 911 IVSHMEM_DPRINTF("using hostmem\n"); 912 913 s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem); 914 host_memory_backend_set_mapped(s->hostmem, true); 915 } else { 916 Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr); 917 assert(chr); 918 919 IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", 920 chr->filename); 921 922 /* we allocate enough space for 16 peers and grow as needed */ 923 resize_peers(s, 16); 924 925 /* 926 * Receive setup messages from server synchronously. 927 * Older versions did it asynchronously, but that creates a 928 * number of entertaining race conditions. 929 */ 930 ivshmem_recv_setup(s, &err); 931 if (err) { 932 error_propagate(errp, err); 933 return; 934 } 935 936 if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) { 937 error_setg(errp, 938 "master must connect to the server before any peers"); 939 return; 940 } 941 942 qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive, 943 ivshmem_read, NULL, NULL, s, NULL, true); 944 945 if (ivshmem_setup_interrupts(s, errp) < 0) { 946 error_prepend(errp, "Failed to initialize interrupts: "); 947 return; 948 } 949 } 950 951 if (s->master == ON_OFF_AUTO_AUTO) { 952 s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; 953 } 954 955 if (!ivshmem_is_master(s)) { 956 error_setg(&s->migration_blocker, 957 "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); 958 migrate_add_blocker(s->migration_blocker, &local_err); 959 if (local_err) { 960 error_propagate(errp, local_err); 961 error_free(s->migration_blocker); 962 return; 963 } 964 } 965 966 vmstate_register_ram(s->ivshmem_bar2, DEVICE(s)); 967 pci_register_bar(PCI_DEVICE(s), 2, attr, s->ivshmem_bar2); 968 } 969 970 static void ivshmem_exit(PCIDevice *dev) 971 { 972 IVShmemState *s = IVSHMEM_COMMON(dev); 973 int i; 974 975 if (s->migration_blocker) { 976 migrate_del_blocker(s->migration_blocker); 977 error_free(s->migration_blocker); 978 } 979 980 if (memory_region_is_mapped(s->ivshmem_bar2)) { 981 if (!s->hostmem) { 982 void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2); 983 int fd; 984 985 if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) { 986 error_report("Failed to munmap shared memory %s", 987 strerror(errno)); 988 } 989 990 fd = memory_region_get_fd(s->ivshmem_bar2); 991 close(fd); 992 } 993 994 vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev)); 995 } 996 997 if (s->hostmem) { 998 host_memory_backend_set_mapped(s->hostmem, false); 999 } 1000 1001 if (s->peers) { 1002 for (i = 0; i < s->nb_peers; i++) { 1003 close_peer_eventfds(s, i); 1004 } 1005 g_free(s->peers); 1006 } 1007 1008 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 1009 msix_uninit_exclusive_bar(dev); 1010 } 1011 1012 g_free(s->msi_vectors); 1013 } 1014 1015 static int ivshmem_pre_load(void *opaque) 1016 { 1017 IVShmemState *s = opaque; 1018 1019 if (!ivshmem_is_master(s)) { 1020 error_report("'peer' devices are not migratable"); 1021 return -EINVAL; 1022 } 1023 1024 return 0; 1025 } 1026 1027 static int ivshmem_post_load(void *opaque, int version_id) 1028 { 1029 IVShmemState *s = opaque; 1030 1031 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 1032 ivshmem_msix_vector_use(s); 1033 } 1034 return 0; 1035 } 1036 1037 static void ivshmem_common_class_init(ObjectClass *klass, void *data) 1038 { 1039 DeviceClass *dc = DEVICE_CLASS(klass); 1040 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1041 1042 k->realize = ivshmem_common_realize; 1043 k->exit = ivshmem_exit; 1044 k->config_write = ivshmem_write_config; 1045 k->vendor_id = PCI_VENDOR_ID_IVSHMEM; 1046 k->device_id = PCI_DEVICE_ID_IVSHMEM; 1047 k->class_id = PCI_CLASS_MEMORY_RAM; 1048 k->revision = 1; 1049 dc->reset = ivshmem_reset; 1050 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1051 dc->desc = "Inter-VM shared memory"; 1052 } 1053 1054 static const TypeInfo ivshmem_common_info = { 1055 .name = TYPE_IVSHMEM_COMMON, 1056 .parent = TYPE_PCI_DEVICE, 1057 .instance_size = sizeof(IVShmemState), 1058 .abstract = true, 1059 .class_init = ivshmem_common_class_init, 1060 .interfaces = (InterfaceInfo[]) { 1061 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1062 { }, 1063 }, 1064 }; 1065 1066 static const VMStateDescription ivshmem_plain_vmsd = { 1067 .name = TYPE_IVSHMEM_PLAIN, 1068 .version_id = 0, 1069 .minimum_version_id = 0, 1070 .pre_load = ivshmem_pre_load, 1071 .post_load = ivshmem_post_load, 1072 .fields = (VMStateField[]) { 1073 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1074 VMSTATE_UINT32(intrstatus, IVShmemState), 1075 VMSTATE_UINT32(intrmask, IVShmemState), 1076 VMSTATE_END_OF_LIST() 1077 }, 1078 }; 1079 1080 static Property ivshmem_plain_properties[] = { 1081 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1082 DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND, 1083 HostMemoryBackend *), 1084 DEFINE_PROP_END_OF_LIST(), 1085 }; 1086 1087 static void ivshmem_plain_init(Object *obj) 1088 { 1089 IVShmemState *s = IVSHMEM_PLAIN(obj); 1090 1091 s->not_legacy_32bit = 1; 1092 } 1093 1094 static void ivshmem_plain_realize(PCIDevice *dev, Error **errp) 1095 { 1096 IVShmemState *s = IVSHMEM_COMMON(dev); 1097 1098 if (!s->hostmem) { 1099 error_setg(errp, "You must specify a 'memdev'"); 1100 return; 1101 } else if (host_memory_backend_is_mapped(s->hostmem)) { 1102 char *path = object_get_canonical_path_component(OBJECT(s->hostmem)); 1103 error_setg(errp, "can't use already busy memdev: %s", path); 1104 g_free(path); 1105 return; 1106 } 1107 1108 ivshmem_common_realize(dev, errp); 1109 } 1110 1111 static void ivshmem_plain_class_init(ObjectClass *klass, void *data) 1112 { 1113 DeviceClass *dc = DEVICE_CLASS(klass); 1114 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1115 1116 k->realize = ivshmem_plain_realize; 1117 dc->props = ivshmem_plain_properties; 1118 dc->vmsd = &ivshmem_plain_vmsd; 1119 } 1120 1121 static const TypeInfo ivshmem_plain_info = { 1122 .name = TYPE_IVSHMEM_PLAIN, 1123 .parent = TYPE_IVSHMEM_COMMON, 1124 .instance_size = sizeof(IVShmemState), 1125 .instance_init = ivshmem_plain_init, 1126 .class_init = ivshmem_plain_class_init, 1127 }; 1128 1129 static const VMStateDescription ivshmem_doorbell_vmsd = { 1130 .name = TYPE_IVSHMEM_DOORBELL, 1131 .version_id = 0, 1132 .minimum_version_id = 0, 1133 .pre_load = ivshmem_pre_load, 1134 .post_load = ivshmem_post_load, 1135 .fields = (VMStateField[]) { 1136 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1137 VMSTATE_MSIX(parent_obj, IVShmemState), 1138 VMSTATE_UINT32(intrstatus, IVShmemState), 1139 VMSTATE_UINT32(intrmask, IVShmemState), 1140 VMSTATE_END_OF_LIST() 1141 }, 1142 }; 1143 1144 static Property ivshmem_doorbell_properties[] = { 1145 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1146 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1147 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1148 true), 1149 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1150 DEFINE_PROP_END_OF_LIST(), 1151 }; 1152 1153 static void ivshmem_doorbell_init(Object *obj) 1154 { 1155 IVShmemState *s = IVSHMEM_DOORBELL(obj); 1156 1157 s->features |= (1 << IVSHMEM_MSI); 1158 s->legacy_size = SIZE_MAX; /* whatever the server sends */ 1159 s->not_legacy_32bit = 1; 1160 } 1161 1162 static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp) 1163 { 1164 IVShmemState *s = IVSHMEM_COMMON(dev); 1165 1166 if (!qemu_chr_fe_backend_connected(&s->server_chr)) { 1167 error_setg(errp, "You must specify a 'chardev'"); 1168 return; 1169 } 1170 1171 ivshmem_common_realize(dev, errp); 1172 } 1173 1174 static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) 1175 { 1176 DeviceClass *dc = DEVICE_CLASS(klass); 1177 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1178 1179 k->realize = ivshmem_doorbell_realize; 1180 dc->props = ivshmem_doorbell_properties; 1181 dc->vmsd = &ivshmem_doorbell_vmsd; 1182 } 1183 1184 static const TypeInfo ivshmem_doorbell_info = { 1185 .name = TYPE_IVSHMEM_DOORBELL, 1186 .parent = TYPE_IVSHMEM_COMMON, 1187 .instance_size = sizeof(IVShmemState), 1188 .instance_init = ivshmem_doorbell_init, 1189 .class_init = ivshmem_doorbell_class_init, 1190 }; 1191 1192 static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id) 1193 { 1194 IVShmemState *s = opaque; 1195 PCIDevice *pdev = PCI_DEVICE(s); 1196 int ret; 1197 1198 IVSHMEM_DPRINTF("ivshmem_load_old\n"); 1199 1200 if (version_id != 0) { 1201 return -EINVAL; 1202 } 1203 1204 ret = ivshmem_pre_load(s); 1205 if (ret) { 1206 return ret; 1207 } 1208 1209 ret = pci_device_load(pdev, f); 1210 if (ret) { 1211 return ret; 1212 } 1213 1214 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 1215 msix_load(pdev, f); 1216 ivshmem_msix_vector_use(s); 1217 } else { 1218 s->intrstatus = qemu_get_be32(f); 1219 s->intrmask = qemu_get_be32(f); 1220 } 1221 1222 return 0; 1223 } 1224 1225 static bool test_msix(void *opaque, int version_id) 1226 { 1227 IVShmemState *s = opaque; 1228 1229 return ivshmem_has_feature(s, IVSHMEM_MSI); 1230 } 1231 1232 static bool test_no_msix(void *opaque, int version_id) 1233 { 1234 return !test_msix(opaque, version_id); 1235 } 1236 1237 static const VMStateDescription ivshmem_vmsd = { 1238 .name = "ivshmem", 1239 .version_id = 1, 1240 .minimum_version_id = 1, 1241 .pre_load = ivshmem_pre_load, 1242 .post_load = ivshmem_post_load, 1243 .fields = (VMStateField[]) { 1244 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1245 1246 VMSTATE_MSIX_TEST(parent_obj, IVShmemState, test_msix), 1247 VMSTATE_UINT32_TEST(intrstatus, IVShmemState, test_no_msix), 1248 VMSTATE_UINT32_TEST(intrmask, IVShmemState, test_no_msix), 1249 1250 VMSTATE_END_OF_LIST() 1251 }, 1252 .load_state_old = ivshmem_load_old, 1253 .minimum_version_id_old = 0 1254 }; 1255 1256 static Property ivshmem_properties[] = { 1257 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1258 DEFINE_PROP_STRING("size", IVShmemState, sizearg), 1259 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1260 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1261 false), 1262 DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true), 1263 DEFINE_PROP_STRING("shm", IVShmemState, shmobj), 1264 DEFINE_PROP_STRING("role", IVShmemState, role), 1265 DEFINE_PROP_UINT32("use64", IVShmemState, not_legacy_32bit, 1), 1266 DEFINE_PROP_END_OF_LIST(), 1267 }; 1268 1269 static void desugar_shm(IVShmemState *s) 1270 { 1271 Object *obj; 1272 char *path; 1273 1274 obj = object_new("memory-backend-file"); 1275 path = g_strdup_printf("/dev/shm/%s", s->shmobj); 1276 object_property_set_str(obj, path, "mem-path", &error_abort); 1277 g_free(path); 1278 object_property_set_int(obj, s->legacy_size, "size", &error_abort); 1279 object_property_set_bool(obj, true, "share", &error_abort); 1280 object_property_add_child(OBJECT(s), "internal-shm-backend", obj, 1281 &error_abort); 1282 user_creatable_complete(obj, &error_abort); 1283 s->hostmem = MEMORY_BACKEND(obj); 1284 } 1285 1286 static void ivshmem_realize(PCIDevice *dev, Error **errp) 1287 { 1288 IVShmemState *s = IVSHMEM_COMMON(dev); 1289 1290 if (!qtest_enabled()) { 1291 warn_report("ivshmem is deprecated, please use ivshmem-plain" 1292 " or ivshmem-doorbell instead"); 1293 } 1294 1295 if (qemu_chr_fe_backend_connected(&s->server_chr) + !!s->shmobj != 1) { 1296 error_setg(errp, "You must specify either 'shm' or 'chardev'"); 1297 return; 1298 } 1299 1300 if (s->sizearg == NULL) { 1301 s->legacy_size = 4 * MiB; /* 4 MB default */ 1302 } else { 1303 int ret; 1304 uint64_t size; 1305 1306 ret = qemu_strtosz_MiB(s->sizearg, NULL, &size); 1307 if (ret < 0 || (size_t)size != size || !is_power_of_2(size)) { 1308 error_setg(errp, "Invalid size %s", s->sizearg); 1309 return; 1310 } 1311 s->legacy_size = size; 1312 } 1313 1314 /* check that role is reasonable */ 1315 if (s->role) { 1316 if (strncmp(s->role, "peer", 5) == 0) { 1317 s->master = ON_OFF_AUTO_OFF; 1318 } else if (strncmp(s->role, "master", 7) == 0) { 1319 s->master = ON_OFF_AUTO_ON; 1320 } else { 1321 error_setg(errp, "'role' must be 'peer' or 'master'"); 1322 return; 1323 } 1324 } else { 1325 s->master = ON_OFF_AUTO_AUTO; 1326 } 1327 1328 if (s->shmobj) { 1329 desugar_shm(s); 1330 } 1331 1332 /* 1333 * Note: we don't use INTx with IVSHMEM_MSI at all, so this is a 1334 * bald-faced lie then. But it's a backwards compatible lie. 1335 */ 1336 pci_config_set_interrupt_pin(dev->config, 1); 1337 1338 ivshmem_common_realize(dev, errp); 1339 } 1340 1341 static void ivshmem_class_init(ObjectClass *klass, void *data) 1342 { 1343 DeviceClass *dc = DEVICE_CLASS(klass); 1344 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1345 1346 k->realize = ivshmem_realize; 1347 k->revision = 0; 1348 dc->desc = "Inter-VM shared memory (legacy)"; 1349 dc->props = ivshmem_properties; 1350 dc->vmsd = &ivshmem_vmsd; 1351 } 1352 1353 static const TypeInfo ivshmem_info = { 1354 .name = TYPE_IVSHMEM, 1355 .parent = TYPE_IVSHMEM_COMMON, 1356 .instance_size = sizeof(IVShmemState), 1357 .class_init = ivshmem_class_init, 1358 }; 1359 1360 static void ivshmem_register_types(void) 1361 { 1362 type_register_static(&ivshmem_common_info); 1363 type_register_static(&ivshmem_plain_info); 1364 type_register_static(&ivshmem_doorbell_info); 1365 type_register_static(&ivshmem_info); 1366 } 1367 1368 type_init(ivshmem_register_types) 1369