1 /* 2 * Inter-VM Shared Memory PCI device. 3 * 4 * Author: 5 * Cam Macdonell <cam@cs.ualberta.ca> 6 * 7 * Based On: cirrus_vga.c 8 * Copyright (c) 2004 Fabrice Bellard 9 * Copyright (c) 2004 Makoto Suzuki (suzu) 10 * 11 * and rtl8139.c 12 * Copyright (c) 2006 Igor Kovalenko 13 * 14 * This code is licensed under the GNU GPL v2. 15 * 16 * Contributions after 2012-01-13 are licensed under the terms of the 17 * GNU GPL, version 2 or (at your option) any later version. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/units.h" 22 #include "qapi/error.h" 23 #include "qemu/cutils.h" 24 #include "hw/pci/pci.h" 25 #include "hw/qdev-properties.h" 26 #include "hw/pci/msi.h" 27 #include "hw/pci/msix.h" 28 #include "sysemu/kvm.h" 29 #include "migration/blocker.h" 30 #include "migration/vmstate.h" 31 #include "qemu/error-report.h" 32 #include "qemu/event_notifier.h" 33 #include "qemu/module.h" 34 #include "qom/object_interfaces.h" 35 #include "chardev/char-fe.h" 36 #include "sysemu/hostmem.h" 37 #include "sysemu/qtest.h" 38 #include "qapi/visitor.h" 39 40 #include "hw/misc/ivshmem.h" 41 42 #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET 43 #define PCI_DEVICE_ID_IVSHMEM 0x1110 44 45 #define IVSHMEM_MAX_PEERS UINT16_MAX 46 #define IVSHMEM_IOEVENTFD 0 47 #define IVSHMEM_MSI 1 48 49 #define IVSHMEM_REG_BAR_SIZE 0x100 50 51 #define IVSHMEM_DEBUG 0 52 #define IVSHMEM_DPRINTF(fmt, ...) \ 53 do { \ 54 if (IVSHMEM_DEBUG) { \ 55 printf("IVSHMEM: " fmt, ## __VA_ARGS__); \ 56 } \ 57 } while (0) 58 59 #define TYPE_IVSHMEM_COMMON "ivshmem-common" 60 #define IVSHMEM_COMMON(obj) \ 61 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON) 62 63 #define TYPE_IVSHMEM_PLAIN "ivshmem-plain" 64 #define IVSHMEM_PLAIN(obj) \ 65 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN) 66 67 #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell" 68 #define IVSHMEM_DOORBELL(obj) \ 69 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL) 70 71 #define TYPE_IVSHMEM "ivshmem" 72 #define IVSHMEM(obj) \ 73 OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM) 74 75 typedef struct Peer { 76 int nb_eventfds; 77 EventNotifier *eventfds; 78 } Peer; 79 80 typedef struct MSIVector { 81 PCIDevice *pdev; 82 int virq; 83 bool unmasked; 84 } MSIVector; 85 86 typedef struct IVShmemState { 87 /*< private >*/ 88 PCIDevice parent_obj; 89 /*< public >*/ 90 91 uint32_t features; 92 93 /* exactly one of these two may be set */ 94 HostMemoryBackend *hostmem; /* with interrupts */ 95 CharBackend server_chr; /* without interrupts */ 96 97 /* registers */ 98 uint32_t intrmask; 99 uint32_t intrstatus; 100 int vm_id; 101 102 /* BARs */ 103 MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */ 104 MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */ 105 MemoryRegion server_bar2; /* used with server_chr */ 106 107 /* interrupt support */ 108 Peer *peers; 109 int nb_peers; /* space in @peers[] */ 110 uint32_t vectors; 111 MSIVector *msi_vectors; 112 uint64_t msg_buf; /* buffer for receiving server messages */ 113 int msg_buffered_bytes; /* #bytes in @msg_buf */ 114 115 /* migration stuff */ 116 OnOffAuto master; 117 Error *migration_blocker; 118 } IVShmemState; 119 120 /* registers for the Inter-VM shared memory device */ 121 enum ivshmem_registers { 122 INTRMASK = 0, 123 INTRSTATUS = 4, 124 IVPOSITION = 8, 125 DOORBELL = 12, 126 }; 127 128 static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, 129 unsigned int feature) { 130 return (ivs->features & (1 << feature)); 131 } 132 133 static inline bool ivshmem_is_master(IVShmemState *s) 134 { 135 assert(s->master != ON_OFF_AUTO_AUTO); 136 return s->master == ON_OFF_AUTO_ON; 137 } 138 139 static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) 140 { 141 IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); 142 143 s->intrmask = val; 144 } 145 146 static uint32_t ivshmem_IntrMask_read(IVShmemState *s) 147 { 148 uint32_t ret = s->intrmask; 149 150 IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); 151 return ret; 152 } 153 154 static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) 155 { 156 IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); 157 158 s->intrstatus = val; 159 } 160 161 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) 162 { 163 uint32_t ret = s->intrstatus; 164 165 /* reading ISR clears all interrupts */ 166 s->intrstatus = 0; 167 return ret; 168 } 169 170 static void ivshmem_io_write(void *opaque, hwaddr addr, 171 uint64_t val, unsigned size) 172 { 173 IVShmemState *s = opaque; 174 175 uint16_t dest = val >> 16; 176 uint16_t vector = val & 0xff; 177 178 addr &= 0xfc; 179 180 IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); 181 switch (addr) 182 { 183 case INTRMASK: 184 ivshmem_IntrMask_write(s, val); 185 break; 186 187 case INTRSTATUS: 188 ivshmem_IntrStatus_write(s, val); 189 break; 190 191 case DOORBELL: 192 /* check that dest VM ID is reasonable */ 193 if (dest >= s->nb_peers) { 194 IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); 195 break; 196 } 197 198 /* check doorbell range */ 199 if (vector < s->peers[dest].nb_eventfds) { 200 IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); 201 event_notifier_set(&s->peers[dest].eventfds[vector]); 202 } else { 203 IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n", 204 vector, dest); 205 } 206 break; 207 default: 208 IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr); 209 } 210 } 211 212 static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, 213 unsigned size) 214 { 215 216 IVShmemState *s = opaque; 217 uint32_t ret; 218 219 switch (addr) 220 { 221 case INTRMASK: 222 ret = ivshmem_IntrMask_read(s); 223 break; 224 225 case INTRSTATUS: 226 ret = ivshmem_IntrStatus_read(s); 227 break; 228 229 case IVPOSITION: 230 ret = s->vm_id; 231 break; 232 233 default: 234 IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); 235 ret = 0; 236 } 237 238 return ret; 239 } 240 241 static const MemoryRegionOps ivshmem_mmio_ops = { 242 .read = ivshmem_io_read, 243 .write = ivshmem_io_write, 244 .endianness = DEVICE_NATIVE_ENDIAN, 245 .impl = { 246 .min_access_size = 4, 247 .max_access_size = 4, 248 }, 249 }; 250 251 static void ivshmem_vector_notify(void *opaque) 252 { 253 MSIVector *entry = opaque; 254 PCIDevice *pdev = entry->pdev; 255 IVShmemState *s = IVSHMEM_COMMON(pdev); 256 int vector = entry - s->msi_vectors; 257 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 258 259 if (!event_notifier_test_and_clear(n)) { 260 return; 261 } 262 263 IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); 264 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 265 if (msix_enabled(pdev)) { 266 msix_notify(pdev, vector); 267 } 268 } else { 269 ivshmem_IntrStatus_write(s, 1); 270 } 271 } 272 273 static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, 274 MSIMessage msg) 275 { 276 IVShmemState *s = IVSHMEM_COMMON(dev); 277 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 278 MSIVector *v = &s->msi_vectors[vector]; 279 int ret; 280 281 IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); 282 if (!v->pdev) { 283 error_report("ivshmem: vector %d route does not exist", vector); 284 return -EINVAL; 285 } 286 assert(!v->unmasked); 287 288 ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); 289 if (ret < 0) { 290 return ret; 291 } 292 kvm_irqchip_commit_routes(kvm_state); 293 294 ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); 295 if (ret < 0) { 296 return ret; 297 } 298 v->unmasked = true; 299 300 return 0; 301 } 302 303 static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) 304 { 305 IVShmemState *s = IVSHMEM_COMMON(dev); 306 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 307 MSIVector *v = &s->msi_vectors[vector]; 308 int ret; 309 310 IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); 311 if (!v->pdev) { 312 error_report("ivshmem: vector %d route does not exist", vector); 313 return; 314 } 315 assert(v->unmasked); 316 317 ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq); 318 if (ret < 0) { 319 error_report("remove_irqfd_notifier_gsi failed"); 320 return; 321 } 322 v->unmasked = false; 323 } 324 325 static void ivshmem_vector_poll(PCIDevice *dev, 326 unsigned int vector_start, 327 unsigned int vector_end) 328 { 329 IVShmemState *s = IVSHMEM_COMMON(dev); 330 unsigned int vector; 331 332 IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end); 333 334 vector_end = MIN(vector_end, s->vectors); 335 336 for (vector = vector_start; vector < vector_end; vector++) { 337 EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector]; 338 339 if (!msix_is_masked(dev, vector)) { 340 continue; 341 } 342 343 if (event_notifier_test_and_clear(notifier)) { 344 msix_set_pending(dev, vector); 345 } 346 } 347 } 348 349 static void watch_vector_notifier(IVShmemState *s, EventNotifier *n, 350 int vector) 351 { 352 int eventfd = event_notifier_get_fd(n); 353 354 assert(!s->msi_vectors[vector].pdev); 355 s->msi_vectors[vector].pdev = PCI_DEVICE(s); 356 357 qemu_set_fd_handler(eventfd, ivshmem_vector_notify, 358 NULL, &s->msi_vectors[vector]); 359 } 360 361 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i) 362 { 363 memory_region_add_eventfd(&s->ivshmem_mmio, 364 DOORBELL, 365 4, 366 true, 367 (posn << 16) | i, 368 &s->peers[posn].eventfds[i]); 369 } 370 371 static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i) 372 { 373 memory_region_del_eventfd(&s->ivshmem_mmio, 374 DOORBELL, 375 4, 376 true, 377 (posn << 16) | i, 378 &s->peers[posn].eventfds[i]); 379 } 380 381 static void close_peer_eventfds(IVShmemState *s, int posn) 382 { 383 int i, n; 384 385 assert(posn >= 0 && posn < s->nb_peers); 386 n = s->peers[posn].nb_eventfds; 387 388 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 389 memory_region_transaction_begin(); 390 for (i = 0; i < n; i++) { 391 ivshmem_del_eventfd(s, posn, i); 392 } 393 memory_region_transaction_commit(); 394 } 395 396 for (i = 0; i < n; i++) { 397 event_notifier_cleanup(&s->peers[posn].eventfds[i]); 398 } 399 400 g_free(s->peers[posn].eventfds); 401 s->peers[posn].nb_eventfds = 0; 402 } 403 404 static void resize_peers(IVShmemState *s, int nb_peers) 405 { 406 int old_nb_peers = s->nb_peers; 407 int i; 408 409 assert(nb_peers > old_nb_peers); 410 IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers); 411 412 s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer)); 413 s->nb_peers = nb_peers; 414 415 for (i = old_nb_peers; i < nb_peers; i++) { 416 s->peers[i].eventfds = g_new0(EventNotifier, s->vectors); 417 s->peers[i].nb_eventfds = 0; 418 } 419 } 420 421 static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, 422 Error **errp) 423 { 424 PCIDevice *pdev = PCI_DEVICE(s); 425 int ret; 426 427 IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); 428 assert(!s->msi_vectors[vector].pdev); 429 430 ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev); 431 if (ret < 0) { 432 error_setg(errp, "kvm_irqchip_add_msi_route failed"); 433 return; 434 } 435 436 s->msi_vectors[vector].virq = ret; 437 s->msi_vectors[vector].pdev = pdev; 438 } 439 440 static void setup_interrupt(IVShmemState *s, int vector, Error **errp) 441 { 442 EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; 443 bool with_irqfd = kvm_msi_via_irqfd_enabled() && 444 ivshmem_has_feature(s, IVSHMEM_MSI); 445 PCIDevice *pdev = PCI_DEVICE(s); 446 Error *err = NULL; 447 448 IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); 449 450 if (!with_irqfd) { 451 IVSHMEM_DPRINTF("with eventfd\n"); 452 watch_vector_notifier(s, n, vector); 453 } else if (msix_enabled(pdev)) { 454 IVSHMEM_DPRINTF("with irqfd\n"); 455 ivshmem_add_kvm_msi_virq(s, vector, &err); 456 if (err) { 457 error_propagate(errp, err); 458 return; 459 } 460 461 if (!msix_is_masked(pdev, vector)) { 462 kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, 463 s->msi_vectors[vector].virq); 464 /* TODO handle error */ 465 } 466 } else { 467 /* it will be delayed until msix is enabled, in write_config */ 468 IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n"); 469 } 470 } 471 472 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) 473 { 474 Error *local_err = NULL; 475 struct stat buf; 476 size_t size; 477 478 if (s->ivshmem_bar2) { 479 error_setg(errp, "server sent unexpected shared memory message"); 480 close(fd); 481 return; 482 } 483 484 if (fstat(fd, &buf) < 0) { 485 error_setg_errno(errp, errno, 486 "can't determine size of shared memory sent by server"); 487 close(fd); 488 return; 489 } 490 491 size = buf.st_size; 492 493 /* mmap the region and map into the BAR2 */ 494 memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), 495 "ivshmem.bar2", size, true, fd, &local_err); 496 if (local_err) { 497 error_propagate(errp, local_err); 498 return; 499 } 500 501 s->ivshmem_bar2 = &s->server_bar2; 502 } 503 504 static void process_msg_disconnect(IVShmemState *s, uint16_t posn, 505 Error **errp) 506 { 507 IVSHMEM_DPRINTF("posn %d has gone away\n", posn); 508 if (posn >= s->nb_peers || posn == s->vm_id) { 509 error_setg(errp, "invalid peer %d", posn); 510 return; 511 } 512 close_peer_eventfds(s, posn); 513 } 514 515 static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, 516 Error **errp) 517 { 518 Peer *peer = &s->peers[posn]; 519 int vector; 520 521 /* 522 * The N-th connect message for this peer comes with the file 523 * descriptor for vector N-1. Count messages to find the vector. 524 */ 525 if (peer->nb_eventfds >= s->vectors) { 526 error_setg(errp, "Too many eventfd received, device has %d vectors", 527 s->vectors); 528 close(fd); 529 return; 530 } 531 vector = peer->nb_eventfds++; 532 533 IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); 534 event_notifier_init_fd(&peer->eventfds[vector], fd); 535 fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ 536 537 if (posn == s->vm_id) { 538 setup_interrupt(s, vector, errp); 539 /* TODO do we need to handle the error? */ 540 } 541 542 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { 543 ivshmem_add_eventfd(s, posn, vector); 544 } 545 } 546 547 static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) 548 { 549 IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); 550 551 if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { 552 error_setg(errp, "server sent invalid message %" PRId64, msg); 553 close(fd); 554 return; 555 } 556 557 if (msg == -1) { 558 process_msg_shmem(s, fd, errp); 559 return; 560 } 561 562 if (msg >= s->nb_peers) { 563 resize_peers(s, msg + 1); 564 } 565 566 if (fd >= 0) { 567 process_msg_connect(s, msg, fd, errp); 568 } else { 569 process_msg_disconnect(s, msg, errp); 570 } 571 } 572 573 static int ivshmem_can_receive(void *opaque) 574 { 575 IVShmemState *s = opaque; 576 577 assert(s->msg_buffered_bytes < sizeof(s->msg_buf)); 578 return sizeof(s->msg_buf) - s->msg_buffered_bytes; 579 } 580 581 static void ivshmem_read(void *opaque, const uint8_t *buf, int size) 582 { 583 IVShmemState *s = opaque; 584 Error *err = NULL; 585 int fd; 586 int64_t msg; 587 588 assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf)); 589 memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size); 590 s->msg_buffered_bytes += size; 591 if (s->msg_buffered_bytes < sizeof(s->msg_buf)) { 592 return; 593 } 594 msg = le64_to_cpu(s->msg_buf); 595 s->msg_buffered_bytes = 0; 596 597 fd = qemu_chr_fe_get_msgfd(&s->server_chr); 598 599 process_msg(s, msg, fd, &err); 600 if (err) { 601 error_report_err(err); 602 } 603 } 604 605 static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp) 606 { 607 int64_t msg; 608 int n, ret; 609 610 n = 0; 611 do { 612 ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n, 613 sizeof(msg) - n); 614 if (ret < 0) { 615 if (ret == -EINTR) { 616 continue; 617 } 618 error_setg_errno(errp, -ret, "read from server failed"); 619 return INT64_MIN; 620 } 621 n += ret; 622 } while (n < sizeof(msg)); 623 624 *pfd = qemu_chr_fe_get_msgfd(&s->server_chr); 625 return le64_to_cpu(msg); 626 } 627 628 static void ivshmem_recv_setup(IVShmemState *s, Error **errp) 629 { 630 Error *err = NULL; 631 int64_t msg; 632 int fd; 633 634 msg = ivshmem_recv_msg(s, &fd, &err); 635 if (err) { 636 error_propagate(errp, err); 637 return; 638 } 639 if (msg != IVSHMEM_PROTOCOL_VERSION) { 640 error_setg(errp, "server sent version %" PRId64 ", expecting %d", 641 msg, IVSHMEM_PROTOCOL_VERSION); 642 return; 643 } 644 if (fd != -1) { 645 error_setg(errp, "server sent invalid version message"); 646 return; 647 } 648 649 /* 650 * ivshmem-server sends the remaining initial messages in a fixed 651 * order, but the device has always accepted them in any order. 652 * Stay as compatible as practical, just in case people use 653 * servers that behave differently. 654 */ 655 656 /* 657 * ivshmem_device_spec.txt has always required the ID message 658 * right here, and ivshmem-server has always complied. However, 659 * older versions of the device accepted it out of order, but 660 * broke when an interrupt setup message arrived before it. 661 */ 662 msg = ivshmem_recv_msg(s, &fd, &err); 663 if (err) { 664 error_propagate(errp, err); 665 return; 666 } 667 if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) { 668 error_setg(errp, "server sent invalid ID message"); 669 return; 670 } 671 s->vm_id = msg; 672 673 /* 674 * Receive more messages until we got shared memory. 675 */ 676 do { 677 msg = ivshmem_recv_msg(s, &fd, &err); 678 if (err) { 679 error_propagate(errp, err); 680 return; 681 } 682 process_msg(s, msg, fd, &err); 683 if (err) { 684 error_propagate(errp, err); 685 return; 686 } 687 } while (msg != -1); 688 689 /* 690 * This function must either map the shared memory or fail. The 691 * loop above ensures that: it terminates normally only after it 692 * successfully processed the server's shared memory message. 693 * Assert that actually mapped the shared memory: 694 */ 695 assert(s->ivshmem_bar2); 696 } 697 698 /* Select the MSI-X vectors used by device. 699 * ivshmem maps events to vectors statically, so 700 * we just enable all vectors on init and after reset. */ 701 static void ivshmem_msix_vector_use(IVShmemState *s) 702 { 703 PCIDevice *d = PCI_DEVICE(s); 704 int i; 705 706 for (i = 0; i < s->vectors; i++) { 707 msix_vector_use(d, i); 708 } 709 } 710 711 static void ivshmem_disable_irqfd(IVShmemState *s); 712 713 static void ivshmem_reset(DeviceState *d) 714 { 715 IVShmemState *s = IVSHMEM_COMMON(d); 716 717 ivshmem_disable_irqfd(s); 718 719 s->intrstatus = 0; 720 s->intrmask = 0; 721 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 722 ivshmem_msix_vector_use(s); 723 } 724 } 725 726 static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp) 727 { 728 /* allocate QEMU callback data for receiving interrupts */ 729 s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector)); 730 731 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 732 if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) { 733 return -1; 734 } 735 736 IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); 737 ivshmem_msix_vector_use(s); 738 } 739 740 return 0; 741 } 742 743 static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) 744 { 745 IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); 746 747 if (s->msi_vectors[vector].pdev == NULL) { 748 return; 749 } 750 751 /* it was cleaned when masked in the frontend. */ 752 kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); 753 754 s->msi_vectors[vector].pdev = NULL; 755 } 756 757 static void ivshmem_enable_irqfd(IVShmemState *s) 758 { 759 PCIDevice *pdev = PCI_DEVICE(s); 760 int i; 761 762 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 763 Error *err = NULL; 764 765 ivshmem_add_kvm_msi_virq(s, i, &err); 766 if (err) { 767 error_report_err(err); 768 goto undo; 769 } 770 } 771 772 if (msix_set_vector_notifiers(pdev, 773 ivshmem_vector_unmask, 774 ivshmem_vector_mask, 775 ivshmem_vector_poll)) { 776 error_report("ivshmem: msix_set_vector_notifiers failed"); 777 goto undo; 778 } 779 return; 780 781 undo: 782 while (--i >= 0) { 783 ivshmem_remove_kvm_msi_virq(s, i); 784 } 785 } 786 787 static void ivshmem_disable_irqfd(IVShmemState *s) 788 { 789 PCIDevice *pdev = PCI_DEVICE(s); 790 int i; 791 792 if (!pdev->msix_vector_use_notifier) { 793 return; 794 } 795 796 msix_unset_vector_notifiers(pdev); 797 798 for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { 799 /* 800 * MSI-X is already disabled here so msix_unset_vector_notifiers() 801 * didn't call our release notifier. Do it now to keep our masks and 802 * unmasks balanced. 803 */ 804 if (s->msi_vectors[i].unmasked) { 805 ivshmem_vector_mask(pdev, i); 806 } 807 ivshmem_remove_kvm_msi_virq(s, i); 808 } 809 810 } 811 812 static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, 813 uint32_t val, int len) 814 { 815 IVShmemState *s = IVSHMEM_COMMON(pdev); 816 int is_enabled, was_enabled = msix_enabled(pdev); 817 818 pci_default_write_config(pdev, address, val, len); 819 is_enabled = msix_enabled(pdev); 820 821 if (kvm_msi_via_irqfd_enabled()) { 822 if (!was_enabled && is_enabled) { 823 ivshmem_enable_irqfd(s); 824 } else if (was_enabled && !is_enabled) { 825 ivshmem_disable_irqfd(s); 826 } 827 } 828 } 829 830 static void ivshmem_common_realize(PCIDevice *dev, Error **errp) 831 { 832 IVShmemState *s = IVSHMEM_COMMON(dev); 833 Error *err = NULL; 834 uint8_t *pci_conf; 835 Error *local_err = NULL; 836 837 /* IRQFD requires MSI */ 838 if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) && 839 !ivshmem_has_feature(s, IVSHMEM_MSI)) { 840 error_setg(errp, "ioeventfd/irqfd requires MSI"); 841 return; 842 } 843 844 pci_conf = dev->config; 845 pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; 846 847 memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s, 848 "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE); 849 850 /* region for registers*/ 851 pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, 852 &s->ivshmem_mmio); 853 854 if (s->hostmem != NULL) { 855 IVSHMEM_DPRINTF("using hostmem\n"); 856 857 s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem); 858 host_memory_backend_set_mapped(s->hostmem, true); 859 } else { 860 Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr); 861 assert(chr); 862 863 IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n", 864 chr->filename); 865 866 /* we allocate enough space for 16 peers and grow as needed */ 867 resize_peers(s, 16); 868 869 /* 870 * Receive setup messages from server synchronously. 871 * Older versions did it asynchronously, but that creates a 872 * number of entertaining race conditions. 873 */ 874 ivshmem_recv_setup(s, &err); 875 if (err) { 876 error_propagate(errp, err); 877 return; 878 } 879 880 if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) { 881 error_setg(errp, 882 "master must connect to the server before any peers"); 883 return; 884 } 885 886 qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive, 887 ivshmem_read, NULL, NULL, s, NULL, true); 888 889 if (ivshmem_setup_interrupts(s, errp) < 0) { 890 error_prepend(errp, "Failed to initialize interrupts: "); 891 return; 892 } 893 } 894 895 if (s->master == ON_OFF_AUTO_AUTO) { 896 s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; 897 } 898 899 if (!ivshmem_is_master(s)) { 900 error_setg(&s->migration_blocker, 901 "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); 902 migrate_add_blocker(s->migration_blocker, &local_err); 903 if (local_err) { 904 error_propagate(errp, local_err); 905 error_free(s->migration_blocker); 906 return; 907 } 908 } 909 910 vmstate_register_ram(s->ivshmem_bar2, DEVICE(s)); 911 pci_register_bar(PCI_DEVICE(s), 2, 912 PCI_BASE_ADDRESS_SPACE_MEMORY | 913 PCI_BASE_ADDRESS_MEM_PREFETCH | 914 PCI_BASE_ADDRESS_MEM_TYPE_64, 915 s->ivshmem_bar2); 916 } 917 918 static void ivshmem_exit(PCIDevice *dev) 919 { 920 IVShmemState *s = IVSHMEM_COMMON(dev); 921 int i; 922 923 if (s->migration_blocker) { 924 migrate_del_blocker(s->migration_blocker); 925 error_free(s->migration_blocker); 926 } 927 928 if (memory_region_is_mapped(s->ivshmem_bar2)) { 929 if (!s->hostmem) { 930 void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2); 931 int fd; 932 933 if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) { 934 error_report("Failed to munmap shared memory %s", 935 strerror(errno)); 936 } 937 938 fd = memory_region_get_fd(s->ivshmem_bar2); 939 close(fd); 940 } 941 942 vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev)); 943 } 944 945 if (s->hostmem) { 946 host_memory_backend_set_mapped(s->hostmem, false); 947 } 948 949 if (s->peers) { 950 for (i = 0; i < s->nb_peers; i++) { 951 close_peer_eventfds(s, i); 952 } 953 g_free(s->peers); 954 } 955 956 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 957 msix_uninit_exclusive_bar(dev); 958 } 959 960 g_free(s->msi_vectors); 961 } 962 963 static int ivshmem_pre_load(void *opaque) 964 { 965 IVShmemState *s = opaque; 966 967 if (!ivshmem_is_master(s)) { 968 error_report("'peer' devices are not migratable"); 969 return -EINVAL; 970 } 971 972 return 0; 973 } 974 975 static int ivshmem_post_load(void *opaque, int version_id) 976 { 977 IVShmemState *s = opaque; 978 979 if (ivshmem_has_feature(s, IVSHMEM_MSI)) { 980 ivshmem_msix_vector_use(s); 981 } 982 return 0; 983 } 984 985 static void ivshmem_common_class_init(ObjectClass *klass, void *data) 986 { 987 DeviceClass *dc = DEVICE_CLASS(klass); 988 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 989 990 k->realize = ivshmem_common_realize; 991 k->exit = ivshmem_exit; 992 k->config_write = ivshmem_write_config; 993 k->vendor_id = PCI_VENDOR_ID_IVSHMEM; 994 k->device_id = PCI_DEVICE_ID_IVSHMEM; 995 k->class_id = PCI_CLASS_MEMORY_RAM; 996 k->revision = 1; 997 dc->reset = ivshmem_reset; 998 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 999 dc->desc = "Inter-VM shared memory"; 1000 } 1001 1002 static const TypeInfo ivshmem_common_info = { 1003 .name = TYPE_IVSHMEM_COMMON, 1004 .parent = TYPE_PCI_DEVICE, 1005 .instance_size = sizeof(IVShmemState), 1006 .abstract = true, 1007 .class_init = ivshmem_common_class_init, 1008 .interfaces = (InterfaceInfo[]) { 1009 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1010 { }, 1011 }, 1012 }; 1013 1014 static const VMStateDescription ivshmem_plain_vmsd = { 1015 .name = TYPE_IVSHMEM_PLAIN, 1016 .version_id = 0, 1017 .minimum_version_id = 0, 1018 .pre_load = ivshmem_pre_load, 1019 .post_load = ivshmem_post_load, 1020 .fields = (VMStateField[]) { 1021 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1022 VMSTATE_UINT32(intrstatus, IVShmemState), 1023 VMSTATE_UINT32(intrmask, IVShmemState), 1024 VMSTATE_END_OF_LIST() 1025 }, 1026 }; 1027 1028 static Property ivshmem_plain_properties[] = { 1029 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1030 DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND, 1031 HostMemoryBackend *), 1032 DEFINE_PROP_END_OF_LIST(), 1033 }; 1034 1035 static void ivshmem_plain_realize(PCIDevice *dev, Error **errp) 1036 { 1037 IVShmemState *s = IVSHMEM_COMMON(dev); 1038 1039 if (!s->hostmem) { 1040 error_setg(errp, "You must specify a 'memdev'"); 1041 return; 1042 } else if (host_memory_backend_is_mapped(s->hostmem)) { 1043 char *path = object_get_canonical_path_component(OBJECT(s->hostmem)); 1044 error_setg(errp, "can't use already busy memdev: %s", path); 1045 g_free(path); 1046 return; 1047 } 1048 1049 ivshmem_common_realize(dev, errp); 1050 } 1051 1052 static void ivshmem_plain_class_init(ObjectClass *klass, void *data) 1053 { 1054 DeviceClass *dc = DEVICE_CLASS(klass); 1055 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1056 1057 k->realize = ivshmem_plain_realize; 1058 device_class_set_props(dc, ivshmem_plain_properties); 1059 dc->vmsd = &ivshmem_plain_vmsd; 1060 } 1061 1062 static const TypeInfo ivshmem_plain_info = { 1063 .name = TYPE_IVSHMEM_PLAIN, 1064 .parent = TYPE_IVSHMEM_COMMON, 1065 .instance_size = sizeof(IVShmemState), 1066 .class_init = ivshmem_plain_class_init, 1067 }; 1068 1069 static const VMStateDescription ivshmem_doorbell_vmsd = { 1070 .name = TYPE_IVSHMEM_DOORBELL, 1071 .version_id = 0, 1072 .minimum_version_id = 0, 1073 .pre_load = ivshmem_pre_load, 1074 .post_load = ivshmem_post_load, 1075 .fields = (VMStateField[]) { 1076 VMSTATE_PCI_DEVICE(parent_obj, IVShmemState), 1077 VMSTATE_MSIX(parent_obj, IVShmemState), 1078 VMSTATE_UINT32(intrstatus, IVShmemState), 1079 VMSTATE_UINT32(intrmask, IVShmemState), 1080 VMSTATE_END_OF_LIST() 1081 }, 1082 }; 1083 1084 static Property ivshmem_doorbell_properties[] = { 1085 DEFINE_PROP_CHR("chardev", IVShmemState, server_chr), 1086 DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1), 1087 DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD, 1088 true), 1089 DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF), 1090 DEFINE_PROP_END_OF_LIST(), 1091 }; 1092 1093 static void ivshmem_doorbell_init(Object *obj) 1094 { 1095 IVShmemState *s = IVSHMEM_DOORBELL(obj); 1096 1097 s->features |= (1 << IVSHMEM_MSI); 1098 } 1099 1100 static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp) 1101 { 1102 IVShmemState *s = IVSHMEM_COMMON(dev); 1103 1104 if (!qemu_chr_fe_backend_connected(&s->server_chr)) { 1105 error_setg(errp, "You must specify a 'chardev'"); 1106 return; 1107 } 1108 1109 ivshmem_common_realize(dev, errp); 1110 } 1111 1112 static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) 1113 { 1114 DeviceClass *dc = DEVICE_CLASS(klass); 1115 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1116 1117 k->realize = ivshmem_doorbell_realize; 1118 device_class_set_props(dc, ivshmem_doorbell_properties); 1119 dc->vmsd = &ivshmem_doorbell_vmsd; 1120 } 1121 1122 static const TypeInfo ivshmem_doorbell_info = { 1123 .name = TYPE_IVSHMEM_DOORBELL, 1124 .parent = TYPE_IVSHMEM_COMMON, 1125 .instance_size = sizeof(IVShmemState), 1126 .instance_init = ivshmem_doorbell_init, 1127 .class_init = ivshmem_doorbell_class_init, 1128 }; 1129 1130 static void ivshmem_register_types(void) 1131 { 1132 type_register_static(&ivshmem_common_info); 1133 type_register_static(&ivshmem_plain_info); 1134 type_register_static(&ivshmem_doorbell_info); 1135 } 1136 1137 type_init(ivshmem_register_types) 1138