1 /* 2 * vhost support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "hw/virtio/vhost.h" 17 #include "hw/hw.h" 18 #include "qemu/atomic.h" 19 #include "qemu/range.h" 20 #include <linux/vhost.h> 21 #include "exec/address-spaces.h" 22 #include "hw/virtio/virtio-bus.h" 23 #include "migration/migration.h" 24 25 static struct vhost_log *vhost_log; 26 27 static void vhost_dev_sync_region(struct vhost_dev *dev, 28 MemoryRegionSection *section, 29 uint64_t mfirst, uint64_t mlast, 30 uint64_t rfirst, uint64_t rlast) 31 { 32 vhost_log_chunk_t *log = dev->log->log; 33 34 uint64_t start = MAX(mfirst, rfirst); 35 uint64_t end = MIN(mlast, rlast); 36 vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK; 37 vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1; 38 uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK; 39 40 if (end < start) { 41 return; 42 } 43 assert(end / VHOST_LOG_CHUNK < dev->log_size); 44 assert(start / VHOST_LOG_CHUNK < dev->log_size); 45 46 for (;from < to; ++from) { 47 vhost_log_chunk_t log; 48 /* We first check with non-atomic: much cheaper, 49 * and we expect non-dirty to be the common case. */ 50 if (!*from) { 51 addr += VHOST_LOG_CHUNK; 52 continue; 53 } 54 /* Data must be read atomically. We don't really need barrier semantics 55 * but it's easier to use atomic_* than roll our own. */ 56 log = atomic_xchg(from, 0); 57 while (log) { 58 int bit = ctzl(log); 59 hwaddr page_addr; 60 hwaddr section_offset; 61 hwaddr mr_offset; 62 page_addr = addr + bit * VHOST_LOG_PAGE; 63 section_offset = page_addr - section->offset_within_address_space; 64 mr_offset = section_offset + section->offset_within_region; 65 memory_region_set_dirty(section->mr, mr_offset, VHOST_LOG_PAGE); 66 log &= ~(0x1ull << bit); 67 } 68 addr += VHOST_LOG_CHUNK; 69 } 70 } 71 72 static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, 73 MemoryRegionSection *section, 74 hwaddr first, 75 hwaddr last) 76 { 77 int i; 78 hwaddr start_addr; 79 hwaddr end_addr; 80 81 if (!dev->log_enabled || !dev->started) { 82 return 0; 83 } 84 start_addr = section->offset_within_address_space; 85 end_addr = range_get_last(start_addr, int128_get64(section->size)); 86 start_addr = MAX(first, start_addr); 87 end_addr = MIN(last, end_addr); 88 89 for (i = 0; i < dev->mem->nregions; ++i) { 90 struct vhost_memory_region *reg = dev->mem->regions + i; 91 vhost_dev_sync_region(dev, section, start_addr, end_addr, 92 reg->guest_phys_addr, 93 range_get_last(reg->guest_phys_addr, 94 reg->memory_size)); 95 } 96 for (i = 0; i < dev->nvqs; ++i) { 97 struct vhost_virtqueue *vq = dev->vqs + i; 98 vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys, 99 range_get_last(vq->used_phys, vq->used_size)); 100 } 101 return 0; 102 } 103 104 static void vhost_log_sync(MemoryListener *listener, 105 MemoryRegionSection *section) 106 { 107 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 108 memory_listener); 109 vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL); 110 } 111 112 static void vhost_log_sync_range(struct vhost_dev *dev, 113 hwaddr first, hwaddr last) 114 { 115 int i; 116 /* FIXME: this is N^2 in number of sections */ 117 for (i = 0; i < dev->n_mem_sections; ++i) { 118 MemoryRegionSection *section = &dev->mem_sections[i]; 119 vhost_sync_dirty_bitmap(dev, section, first, last); 120 } 121 } 122 123 /* Assign/unassign. Keep an unsorted array of non-overlapping 124 * memory regions in dev->mem. */ 125 static void vhost_dev_unassign_memory(struct vhost_dev *dev, 126 uint64_t start_addr, 127 uint64_t size) 128 { 129 int from, to, n = dev->mem->nregions; 130 /* Track overlapping/split regions for sanity checking. */ 131 int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0; 132 133 for (from = 0, to = 0; from < n; ++from, ++to) { 134 struct vhost_memory_region *reg = dev->mem->regions + to; 135 uint64_t reglast; 136 uint64_t memlast; 137 uint64_t change; 138 139 /* clone old region */ 140 if (to != from) { 141 memcpy(reg, dev->mem->regions + from, sizeof *reg); 142 } 143 144 /* No overlap is simple */ 145 if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size, 146 start_addr, size)) { 147 continue; 148 } 149 150 /* Split only happens if supplied region 151 * is in the middle of an existing one. Thus it can not 152 * overlap with any other existing region. */ 153 assert(!split); 154 155 reglast = range_get_last(reg->guest_phys_addr, reg->memory_size); 156 memlast = range_get_last(start_addr, size); 157 158 /* Remove whole region */ 159 if (start_addr <= reg->guest_phys_addr && memlast >= reglast) { 160 --dev->mem->nregions; 161 --to; 162 ++overlap_middle; 163 continue; 164 } 165 166 /* Shrink region */ 167 if (memlast >= reglast) { 168 reg->memory_size = start_addr - reg->guest_phys_addr; 169 assert(reg->memory_size); 170 assert(!overlap_end); 171 ++overlap_end; 172 continue; 173 } 174 175 /* Shift region */ 176 if (start_addr <= reg->guest_phys_addr) { 177 change = memlast + 1 - reg->guest_phys_addr; 178 reg->memory_size -= change; 179 reg->guest_phys_addr += change; 180 reg->userspace_addr += change; 181 assert(reg->memory_size); 182 assert(!overlap_start); 183 ++overlap_start; 184 continue; 185 } 186 187 /* This only happens if supplied region 188 * is in the middle of an existing one. Thus it can not 189 * overlap with any other existing region. */ 190 assert(!overlap_start); 191 assert(!overlap_end); 192 assert(!overlap_middle); 193 /* Split region: shrink first part, shift second part. */ 194 memcpy(dev->mem->regions + n, reg, sizeof *reg); 195 reg->memory_size = start_addr - reg->guest_phys_addr; 196 assert(reg->memory_size); 197 change = memlast + 1 - reg->guest_phys_addr; 198 reg = dev->mem->regions + n; 199 reg->memory_size -= change; 200 assert(reg->memory_size); 201 reg->guest_phys_addr += change; 202 reg->userspace_addr += change; 203 /* Never add more than 1 region */ 204 assert(dev->mem->nregions == n); 205 ++dev->mem->nregions; 206 ++split; 207 } 208 } 209 210 /* Called after unassign, so no regions overlap the given range. */ 211 static void vhost_dev_assign_memory(struct vhost_dev *dev, 212 uint64_t start_addr, 213 uint64_t size, 214 uint64_t uaddr) 215 { 216 int from, to; 217 struct vhost_memory_region *merged = NULL; 218 for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) { 219 struct vhost_memory_region *reg = dev->mem->regions + to; 220 uint64_t prlast, urlast; 221 uint64_t pmlast, umlast; 222 uint64_t s, e, u; 223 224 /* clone old region */ 225 if (to != from) { 226 memcpy(reg, dev->mem->regions + from, sizeof *reg); 227 } 228 prlast = range_get_last(reg->guest_phys_addr, reg->memory_size); 229 pmlast = range_get_last(start_addr, size); 230 urlast = range_get_last(reg->userspace_addr, reg->memory_size); 231 umlast = range_get_last(uaddr, size); 232 233 /* check for overlapping regions: should never happen. */ 234 assert(prlast < start_addr || pmlast < reg->guest_phys_addr); 235 /* Not an adjacent or overlapping region - do not merge. */ 236 if ((prlast + 1 != start_addr || urlast + 1 != uaddr) && 237 (pmlast + 1 != reg->guest_phys_addr || 238 umlast + 1 != reg->userspace_addr)) { 239 continue; 240 } 241 242 if (merged) { 243 --to; 244 assert(to >= 0); 245 } else { 246 merged = reg; 247 } 248 u = MIN(uaddr, reg->userspace_addr); 249 s = MIN(start_addr, reg->guest_phys_addr); 250 e = MAX(pmlast, prlast); 251 uaddr = merged->userspace_addr = u; 252 start_addr = merged->guest_phys_addr = s; 253 size = merged->memory_size = e - s + 1; 254 assert(merged->memory_size); 255 } 256 257 if (!merged) { 258 struct vhost_memory_region *reg = dev->mem->regions + to; 259 memset(reg, 0, sizeof *reg); 260 reg->memory_size = size; 261 assert(reg->memory_size); 262 reg->guest_phys_addr = start_addr; 263 reg->userspace_addr = uaddr; 264 ++to; 265 } 266 assert(to <= dev->mem->nregions + 1); 267 dev->mem->nregions = to; 268 } 269 270 static uint64_t vhost_get_log_size(struct vhost_dev *dev) 271 { 272 uint64_t log_size = 0; 273 int i; 274 for (i = 0; i < dev->mem->nregions; ++i) { 275 struct vhost_memory_region *reg = dev->mem->regions + i; 276 uint64_t last = range_get_last(reg->guest_phys_addr, 277 reg->memory_size); 278 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1); 279 } 280 for (i = 0; i < dev->nvqs; ++i) { 281 struct vhost_virtqueue *vq = dev->vqs + i; 282 uint64_t last = vq->used_phys + vq->used_size - 1; 283 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1); 284 } 285 return log_size; 286 } 287 static struct vhost_log *vhost_log_alloc(uint64_t size) 288 { 289 struct vhost_log *log = g_malloc0(sizeof *log + size * sizeof(*(log->log))); 290 291 log->size = size; 292 log->refcnt = 1; 293 294 return log; 295 } 296 297 static struct vhost_log *vhost_log_get(uint64_t size) 298 { 299 if (!vhost_log || vhost_log->size != size) { 300 vhost_log = vhost_log_alloc(size); 301 } else { 302 ++vhost_log->refcnt; 303 } 304 305 return vhost_log; 306 } 307 308 static void vhost_log_put(struct vhost_dev *dev, bool sync) 309 { 310 struct vhost_log *log = dev->log; 311 312 if (!log) { 313 return; 314 } 315 316 --log->refcnt; 317 if (log->refcnt == 0) { 318 /* Sync only the range covered by the old log */ 319 if (dev->log_size && sync) { 320 vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); 321 } 322 if (vhost_log == log) { 323 vhost_log = NULL; 324 } 325 g_free(log); 326 } 327 } 328 329 static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size) 330 { 331 struct vhost_log *log = vhost_log_get(size); 332 uint64_t log_base = (uintptr_t)log->log; 333 int r; 334 335 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base); 336 assert(r >= 0); 337 vhost_log_put(dev, true); 338 dev->log = log; 339 dev->log_size = size; 340 } 341 342 static int vhost_verify_ring_mappings(struct vhost_dev *dev, 343 uint64_t start_addr, 344 uint64_t size) 345 { 346 int i; 347 int r = 0; 348 349 for (i = 0; !r && i < dev->nvqs; ++i) { 350 struct vhost_virtqueue *vq = dev->vqs + i; 351 hwaddr l; 352 void *p; 353 354 if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) { 355 continue; 356 } 357 l = vq->ring_size; 358 p = cpu_physical_memory_map(vq->ring_phys, &l, 1); 359 if (!p || l != vq->ring_size) { 360 fprintf(stderr, "Unable to map ring buffer for ring %d\n", i); 361 r = -ENOMEM; 362 } 363 if (p != vq->ring) { 364 fprintf(stderr, "Ring buffer relocated for ring %d\n", i); 365 r = -EBUSY; 366 } 367 cpu_physical_memory_unmap(p, l, 0, 0); 368 } 369 return r; 370 } 371 372 static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev, 373 uint64_t start_addr, 374 uint64_t size) 375 { 376 int i, n = dev->mem->nregions; 377 for (i = 0; i < n; ++i) { 378 struct vhost_memory_region *reg = dev->mem->regions + i; 379 if (ranges_overlap(reg->guest_phys_addr, reg->memory_size, 380 start_addr, size)) { 381 return reg; 382 } 383 } 384 return NULL; 385 } 386 387 static bool vhost_dev_cmp_memory(struct vhost_dev *dev, 388 uint64_t start_addr, 389 uint64_t size, 390 uint64_t uaddr) 391 { 392 struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size); 393 uint64_t reglast; 394 uint64_t memlast; 395 396 if (!reg) { 397 return true; 398 } 399 400 reglast = range_get_last(reg->guest_phys_addr, reg->memory_size); 401 memlast = range_get_last(start_addr, size); 402 403 /* Need to extend region? */ 404 if (start_addr < reg->guest_phys_addr || memlast > reglast) { 405 return true; 406 } 407 /* userspace_addr changed? */ 408 return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr; 409 } 410 411 static void vhost_set_memory(MemoryListener *listener, 412 MemoryRegionSection *section, 413 bool add) 414 { 415 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 416 memory_listener); 417 hwaddr start_addr = section->offset_within_address_space; 418 ram_addr_t size = int128_get64(section->size); 419 bool log_dirty = 420 memory_region_get_dirty_log_mask(section->mr) & ~(1 << DIRTY_MEMORY_MIGRATION); 421 int s = offsetof(struct vhost_memory, regions) + 422 (dev->mem->nregions + 1) * sizeof dev->mem->regions[0]; 423 void *ram; 424 425 dev->mem = g_realloc(dev->mem, s); 426 427 if (log_dirty) { 428 add = false; 429 } 430 431 assert(size); 432 433 /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */ 434 ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region; 435 if (add) { 436 if (!vhost_dev_cmp_memory(dev, start_addr, size, (uintptr_t)ram)) { 437 /* Region exists with same address. Nothing to do. */ 438 return; 439 } 440 } else { 441 if (!vhost_dev_find_reg(dev, start_addr, size)) { 442 /* Removing region that we don't access. Nothing to do. */ 443 return; 444 } 445 } 446 447 vhost_dev_unassign_memory(dev, start_addr, size); 448 if (add) { 449 /* Add given mapping, merging adjacent regions if any */ 450 vhost_dev_assign_memory(dev, start_addr, size, (uintptr_t)ram); 451 } else { 452 /* Remove old mapping for this memory, if any. */ 453 vhost_dev_unassign_memory(dev, start_addr, size); 454 } 455 dev->mem_changed_start_addr = MIN(dev->mem_changed_start_addr, start_addr); 456 dev->mem_changed_end_addr = MAX(dev->mem_changed_end_addr, start_addr + size - 1); 457 dev->memory_changed = true; 458 } 459 460 static bool vhost_section(MemoryRegionSection *section) 461 { 462 return memory_region_is_ram(section->mr); 463 } 464 465 static void vhost_begin(MemoryListener *listener) 466 { 467 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 468 memory_listener); 469 dev->mem_changed_end_addr = 0; 470 dev->mem_changed_start_addr = -1; 471 } 472 473 static void vhost_commit(MemoryListener *listener) 474 { 475 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 476 memory_listener); 477 hwaddr start_addr = 0; 478 ram_addr_t size = 0; 479 uint64_t log_size; 480 int r; 481 482 if (!dev->memory_changed) { 483 return; 484 } 485 if (!dev->started) { 486 return; 487 } 488 if (dev->mem_changed_start_addr > dev->mem_changed_end_addr) { 489 return; 490 } 491 492 if (dev->started) { 493 start_addr = dev->mem_changed_start_addr; 494 size = dev->mem_changed_end_addr - dev->mem_changed_start_addr + 1; 495 496 r = vhost_verify_ring_mappings(dev, start_addr, size); 497 assert(r >= 0); 498 } 499 500 if (!dev->log_enabled) { 501 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); 502 assert(r >= 0); 503 dev->memory_changed = false; 504 return; 505 } 506 log_size = vhost_get_log_size(dev); 507 /* We allocate an extra 4K bytes to log, 508 * to reduce the * number of reallocations. */ 509 #define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log) 510 /* To log more, must increase log size before table update. */ 511 if (dev->log_size < log_size) { 512 vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER); 513 } 514 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); 515 assert(r >= 0); 516 /* To log less, can only decrease log size after table update. */ 517 if (dev->log_size > log_size + VHOST_LOG_BUFFER) { 518 vhost_dev_log_resize(dev, log_size); 519 } 520 dev->memory_changed = false; 521 } 522 523 static void vhost_region_add(MemoryListener *listener, 524 MemoryRegionSection *section) 525 { 526 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 527 memory_listener); 528 529 if (!vhost_section(section)) { 530 return; 531 } 532 533 ++dev->n_mem_sections; 534 dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections, 535 dev->n_mem_sections); 536 dev->mem_sections[dev->n_mem_sections - 1] = *section; 537 memory_region_ref(section->mr); 538 vhost_set_memory(listener, section, true); 539 } 540 541 static void vhost_region_del(MemoryListener *listener, 542 MemoryRegionSection *section) 543 { 544 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 545 memory_listener); 546 int i; 547 548 if (!vhost_section(section)) { 549 return; 550 } 551 552 vhost_set_memory(listener, section, false); 553 memory_region_unref(section->mr); 554 for (i = 0; i < dev->n_mem_sections; ++i) { 555 if (dev->mem_sections[i].offset_within_address_space 556 == section->offset_within_address_space) { 557 --dev->n_mem_sections; 558 memmove(&dev->mem_sections[i], &dev->mem_sections[i+1], 559 (dev->n_mem_sections - i) * sizeof(*dev->mem_sections)); 560 break; 561 } 562 } 563 } 564 565 static void vhost_region_nop(MemoryListener *listener, 566 MemoryRegionSection *section) 567 { 568 } 569 570 static int vhost_virtqueue_set_addr(struct vhost_dev *dev, 571 struct vhost_virtqueue *vq, 572 unsigned idx, bool enable_log) 573 { 574 struct vhost_vring_addr addr = { 575 .index = idx, 576 .desc_user_addr = (uint64_t)(unsigned long)vq->desc, 577 .avail_user_addr = (uint64_t)(unsigned long)vq->avail, 578 .used_user_addr = (uint64_t)(unsigned long)vq->used, 579 .log_guest_addr = vq->used_phys, 580 .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0, 581 }; 582 int r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ADDR, &addr); 583 if (r < 0) { 584 return -errno; 585 } 586 return 0; 587 } 588 589 static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) 590 { 591 uint64_t features = dev->acked_features; 592 int r; 593 if (enable_log) { 594 features |= 0x1ULL << VHOST_F_LOG_ALL; 595 } 596 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_FEATURES, &features); 597 return r < 0 ? -errno : 0; 598 } 599 600 static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) 601 { 602 int r, t, i; 603 r = vhost_dev_set_features(dev, enable_log); 604 if (r < 0) { 605 goto err_features; 606 } 607 for (i = 0; i < dev->nvqs; ++i) { 608 r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i, 609 enable_log); 610 if (r < 0) { 611 goto err_vq; 612 } 613 } 614 return 0; 615 err_vq: 616 for (; i >= 0; --i) { 617 t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i, 618 dev->log_enabled); 619 assert(t >= 0); 620 } 621 t = vhost_dev_set_features(dev, dev->log_enabled); 622 assert(t >= 0); 623 err_features: 624 return r; 625 } 626 627 static int vhost_migration_log(MemoryListener *listener, int enable) 628 { 629 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 630 memory_listener); 631 int r; 632 if (!!enable == dev->log_enabled) { 633 return 0; 634 } 635 if (!dev->started) { 636 dev->log_enabled = enable; 637 return 0; 638 } 639 if (!enable) { 640 r = vhost_dev_set_log(dev, false); 641 if (r < 0) { 642 return r; 643 } 644 vhost_log_put(dev, false); 645 dev->log = NULL; 646 dev->log_size = 0; 647 } else { 648 vhost_dev_log_resize(dev, vhost_get_log_size(dev)); 649 r = vhost_dev_set_log(dev, true); 650 if (r < 0) { 651 return r; 652 } 653 } 654 dev->log_enabled = enable; 655 return 0; 656 } 657 658 static void vhost_log_global_start(MemoryListener *listener) 659 { 660 int r; 661 662 r = vhost_migration_log(listener, true); 663 if (r < 0) { 664 abort(); 665 } 666 } 667 668 static void vhost_log_global_stop(MemoryListener *listener) 669 { 670 int r; 671 672 r = vhost_migration_log(listener, false); 673 if (r < 0) { 674 abort(); 675 } 676 } 677 678 static void vhost_log_start(MemoryListener *listener, 679 MemoryRegionSection *section, 680 int old, int new) 681 { 682 /* FIXME: implement */ 683 } 684 685 static void vhost_log_stop(MemoryListener *listener, 686 MemoryRegionSection *section, 687 int old, int new) 688 { 689 /* FIXME: implement */ 690 } 691 692 static int vhost_virtqueue_start(struct vhost_dev *dev, 693 struct VirtIODevice *vdev, 694 struct vhost_virtqueue *vq, 695 unsigned idx) 696 { 697 hwaddr s, l, a; 698 int r; 699 int vhost_vq_index = idx - dev->vq_index; 700 struct vhost_vring_file file = { 701 .index = vhost_vq_index 702 }; 703 struct vhost_vring_state state = { 704 .index = vhost_vq_index 705 }; 706 struct VirtQueue *vvq = virtio_get_queue(vdev, idx); 707 708 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 709 710 vq->num = state.num = virtio_queue_get_num(vdev, idx); 711 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_NUM, &state); 712 if (r) { 713 return -errno; 714 } 715 716 state.num = virtio_queue_get_last_avail_idx(vdev, idx); 717 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_BASE, &state); 718 if (r) { 719 return -errno; 720 } 721 722 s = l = virtio_queue_get_desc_size(vdev, idx); 723 a = virtio_queue_get_desc_addr(vdev, idx); 724 vq->desc = cpu_physical_memory_map(a, &l, 0); 725 if (!vq->desc || l != s) { 726 r = -ENOMEM; 727 goto fail_alloc_desc; 728 } 729 s = l = virtio_queue_get_avail_size(vdev, idx); 730 a = virtio_queue_get_avail_addr(vdev, idx); 731 vq->avail = cpu_physical_memory_map(a, &l, 0); 732 if (!vq->avail || l != s) { 733 r = -ENOMEM; 734 goto fail_alloc_avail; 735 } 736 vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx); 737 vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx); 738 vq->used = cpu_physical_memory_map(a, &l, 1); 739 if (!vq->used || l != s) { 740 r = -ENOMEM; 741 goto fail_alloc_used; 742 } 743 744 vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx); 745 vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx); 746 vq->ring = cpu_physical_memory_map(a, &l, 1); 747 if (!vq->ring || l != s) { 748 r = -ENOMEM; 749 goto fail_alloc_ring; 750 } 751 752 r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled); 753 if (r < 0) { 754 r = -errno; 755 goto fail_alloc; 756 } 757 758 file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq)); 759 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_KICK, &file); 760 if (r) { 761 r = -errno; 762 goto fail_kick; 763 } 764 765 /* Clear and discard previous events if any. */ 766 event_notifier_test_and_clear(&vq->masked_notifier); 767 768 return 0; 769 770 fail_kick: 771 fail_alloc: 772 cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx), 773 0, 0); 774 fail_alloc_ring: 775 cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx), 776 0, 0); 777 fail_alloc_used: 778 cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx), 779 0, 0); 780 fail_alloc_avail: 781 cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx), 782 0, 0); 783 fail_alloc_desc: 784 return r; 785 } 786 787 static void vhost_virtqueue_stop(struct vhost_dev *dev, 788 struct VirtIODevice *vdev, 789 struct vhost_virtqueue *vq, 790 unsigned idx) 791 { 792 struct vhost_vring_state state = { 793 .index = idx - dev->vq_index 794 }; 795 int r; 796 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 797 r = dev->vhost_ops->vhost_call(dev, VHOST_GET_VRING_BASE, &state); 798 if (r < 0) { 799 fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r); 800 fflush(stderr); 801 } 802 virtio_queue_set_last_avail_idx(vdev, idx, state.num); 803 virtio_queue_invalidate_signalled_used(vdev, idx); 804 assert (r >= 0); 805 cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx), 806 0, virtio_queue_get_ring_size(vdev, idx)); 807 cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx), 808 1, virtio_queue_get_used_size(vdev, idx)); 809 cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx), 810 0, virtio_queue_get_avail_size(vdev, idx)); 811 cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx), 812 0, virtio_queue_get_desc_size(vdev, idx)); 813 } 814 815 static void vhost_eventfd_add(MemoryListener *listener, 816 MemoryRegionSection *section, 817 bool match_data, uint64_t data, EventNotifier *e) 818 { 819 } 820 821 static void vhost_eventfd_del(MemoryListener *listener, 822 MemoryRegionSection *section, 823 bool match_data, uint64_t data, EventNotifier *e) 824 { 825 } 826 827 static int vhost_virtqueue_init(struct vhost_dev *dev, 828 struct vhost_virtqueue *vq, int n) 829 { 830 struct vhost_vring_file file = { 831 .index = n, 832 }; 833 int r = event_notifier_init(&vq->masked_notifier, 0); 834 if (r < 0) { 835 return r; 836 } 837 838 file.fd = event_notifier_get_fd(&vq->masked_notifier); 839 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_CALL, &file); 840 if (r) { 841 r = -errno; 842 goto fail_call; 843 } 844 return 0; 845 fail_call: 846 event_notifier_cleanup(&vq->masked_notifier); 847 return r; 848 } 849 850 static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) 851 { 852 event_notifier_cleanup(&vq->masked_notifier); 853 } 854 855 int vhost_dev_init(struct vhost_dev *hdev, void *opaque, 856 VhostBackendType backend_type, bool force) 857 { 858 uint64_t features; 859 int i, r; 860 861 if (vhost_set_backend_type(hdev, backend_type) < 0) { 862 close((uintptr_t)opaque); 863 return -1; 864 } 865 866 if (hdev->vhost_ops->vhost_backend_init(hdev, opaque) < 0) { 867 close((uintptr_t)opaque); 868 return -errno; 869 } 870 871 r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL); 872 if (r < 0) { 873 goto fail; 874 } 875 876 r = hdev->vhost_ops->vhost_call(hdev, VHOST_GET_FEATURES, &features); 877 if (r < 0) { 878 goto fail; 879 } 880 881 for (i = 0; i < hdev->nvqs; ++i) { 882 r = vhost_virtqueue_init(hdev, hdev->vqs + i, i); 883 if (r < 0) { 884 goto fail_vq; 885 } 886 } 887 hdev->features = features; 888 889 hdev->memory_listener = (MemoryListener) { 890 .begin = vhost_begin, 891 .commit = vhost_commit, 892 .region_add = vhost_region_add, 893 .region_del = vhost_region_del, 894 .region_nop = vhost_region_nop, 895 .log_start = vhost_log_start, 896 .log_stop = vhost_log_stop, 897 .log_sync = vhost_log_sync, 898 .log_global_start = vhost_log_global_start, 899 .log_global_stop = vhost_log_global_stop, 900 .eventfd_add = vhost_eventfd_add, 901 .eventfd_del = vhost_eventfd_del, 902 .priority = 10 903 }; 904 hdev->migration_blocker = NULL; 905 if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { 906 error_setg(&hdev->migration_blocker, 907 "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature."); 908 migrate_add_blocker(hdev->migration_blocker); 909 } 910 hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions)); 911 hdev->n_mem_sections = 0; 912 hdev->mem_sections = NULL; 913 hdev->log = NULL; 914 hdev->log_size = 0; 915 hdev->log_enabled = false; 916 hdev->started = false; 917 hdev->memory_changed = false; 918 memory_listener_register(&hdev->memory_listener, &address_space_memory); 919 hdev->force = force; 920 return 0; 921 fail_vq: 922 while (--i >= 0) { 923 vhost_virtqueue_cleanup(hdev->vqs + i); 924 } 925 fail: 926 r = -errno; 927 hdev->vhost_ops->vhost_backend_cleanup(hdev); 928 return r; 929 } 930 931 void vhost_dev_cleanup(struct vhost_dev *hdev) 932 { 933 int i; 934 for (i = 0; i < hdev->nvqs; ++i) { 935 vhost_virtqueue_cleanup(hdev->vqs + i); 936 } 937 memory_listener_unregister(&hdev->memory_listener); 938 if (hdev->migration_blocker) { 939 migrate_del_blocker(hdev->migration_blocker); 940 error_free(hdev->migration_blocker); 941 } 942 g_free(hdev->mem); 943 g_free(hdev->mem_sections); 944 hdev->vhost_ops->vhost_backend_cleanup(hdev); 945 } 946 947 bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev) 948 { 949 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); 950 VirtioBusState *vbus = VIRTIO_BUS(qbus); 951 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 952 953 return !k->query_guest_notifiers || 954 k->query_guest_notifiers(qbus->parent) || 955 hdev->force; 956 } 957 958 /* Stop processing guest IO notifications in qemu. 959 * Start processing them in vhost in kernel. 960 */ 961 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) 962 { 963 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); 964 VirtioBusState *vbus = VIRTIO_BUS(qbus); 965 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 966 int i, r; 967 if (!k->set_host_notifier) { 968 fprintf(stderr, "binding does not support host notifiers\n"); 969 r = -ENOSYS; 970 goto fail; 971 } 972 973 for (i = 0; i < hdev->nvqs; ++i) { 974 r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, true); 975 if (r < 0) { 976 fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r); 977 goto fail_vq; 978 } 979 } 980 981 return 0; 982 fail_vq: 983 while (--i >= 0) { 984 r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false); 985 if (r < 0) { 986 fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r); 987 fflush(stderr); 988 } 989 assert (r >= 0); 990 } 991 fail: 992 return r; 993 } 994 995 /* Stop processing guest IO notifications in vhost. 996 * Start processing them in qemu. 997 * This might actually run the qemu handlers right away, 998 * so virtio in qemu must be completely setup when this is called. 999 */ 1000 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) 1001 { 1002 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); 1003 VirtioBusState *vbus = VIRTIO_BUS(qbus); 1004 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 1005 int i, r; 1006 1007 for (i = 0; i < hdev->nvqs; ++i) { 1008 r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false); 1009 if (r < 0) { 1010 fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r); 1011 fflush(stderr); 1012 } 1013 assert (r >= 0); 1014 } 1015 } 1016 1017 /* Test and clear event pending status. 1018 * Should be called after unmask to avoid losing events. 1019 */ 1020 bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n) 1021 { 1022 struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index; 1023 assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs); 1024 return event_notifier_test_and_clear(&vq->masked_notifier); 1025 } 1026 1027 /* Mask/unmask events from this vq. */ 1028 void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, 1029 bool mask) 1030 { 1031 struct VirtQueue *vvq = virtio_get_queue(vdev, n); 1032 int r, index = n - hdev->vq_index; 1033 1034 assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs); 1035 1036 struct vhost_vring_file file = { 1037 .index = index 1038 }; 1039 if (mask) { 1040 file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier); 1041 } else { 1042 file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq)); 1043 } 1044 r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_VRING_CALL, &file); 1045 assert(r >= 0); 1046 } 1047 1048 uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, 1049 uint64_t features) 1050 { 1051 const int *bit = feature_bits; 1052 while (*bit != VHOST_INVALID_FEATURE_BIT) { 1053 uint64_t bit_mask = (1ULL << *bit); 1054 if (!(hdev->features & bit_mask)) { 1055 features &= ~bit_mask; 1056 } 1057 bit++; 1058 } 1059 return features; 1060 } 1061 1062 void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, 1063 uint64_t features) 1064 { 1065 const int *bit = feature_bits; 1066 while (*bit != VHOST_INVALID_FEATURE_BIT) { 1067 uint64_t bit_mask = (1ULL << *bit); 1068 if (features & bit_mask) { 1069 hdev->acked_features |= bit_mask; 1070 } 1071 bit++; 1072 } 1073 } 1074 1075 /* Host notifiers must be enabled at this point. */ 1076 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) 1077 { 1078 int i, r; 1079 1080 hdev->started = true; 1081 1082 r = vhost_dev_set_features(hdev, hdev->log_enabled); 1083 if (r < 0) { 1084 goto fail_features; 1085 } 1086 r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); 1087 if (r < 0) { 1088 r = -errno; 1089 goto fail_mem; 1090 } 1091 for (i = 0; i < hdev->nvqs; ++i) { 1092 r = vhost_virtqueue_start(hdev, 1093 vdev, 1094 hdev->vqs + i, 1095 hdev->vq_index + i); 1096 if (r < 0) { 1097 goto fail_vq; 1098 } 1099 } 1100 1101 if (hdev->log_enabled) { 1102 uint64_t log_base; 1103 1104 hdev->log_size = vhost_get_log_size(hdev); 1105 hdev->log = vhost_log_get(hdev->log_size); 1106 log_base = (uintptr_t)hdev->log->log; 1107 r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, 1108 hdev->log_size ? &log_base : NULL); 1109 if (r < 0) { 1110 r = -errno; 1111 goto fail_log; 1112 } 1113 } 1114 1115 return 0; 1116 fail_log: 1117 vhost_log_put(hdev, false); 1118 fail_vq: 1119 while (--i >= 0) { 1120 vhost_virtqueue_stop(hdev, 1121 vdev, 1122 hdev->vqs + i, 1123 hdev->vq_index + i); 1124 } 1125 i = hdev->nvqs; 1126 fail_mem: 1127 fail_features: 1128 1129 hdev->started = false; 1130 return r; 1131 } 1132 1133 /* Host notifiers must be enabled at this point. */ 1134 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) 1135 { 1136 int i; 1137 1138 for (i = 0; i < hdev->nvqs; ++i) { 1139 vhost_virtqueue_stop(hdev, 1140 vdev, 1141 hdev->vqs + i, 1142 hdev->vq_index + i); 1143 } 1144 1145 vhost_log_put(hdev, true); 1146 hdev->started = false; 1147 hdev->log = NULL; 1148 hdev->log_size = 0; 1149 } 1150 1151