1 /* 2 * vhost support 3 * 4 * Copyright Red Hat, Inc. 2010 5 * 6 * Authors: 7 * Michael S. Tsirkin <mst@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "hw/virtio/vhost.h" 17 #include "hw/hw.h" 18 #include "qemu/atomic.h" 19 #include "qemu/range.h" 20 #include <linux/vhost.h> 21 #include "exec/address-spaces.h" 22 #include "hw/virtio/virtio-bus.h" 23 24 static void vhost_dev_sync_region(struct vhost_dev *dev, 25 MemoryRegionSection *section, 26 uint64_t mfirst, uint64_t mlast, 27 uint64_t rfirst, uint64_t rlast) 28 { 29 uint64_t start = MAX(mfirst, rfirst); 30 uint64_t end = MIN(mlast, rlast); 31 vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK; 32 vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1; 33 uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK; 34 35 if (end < start) { 36 return; 37 } 38 assert(end / VHOST_LOG_CHUNK < dev->log_size); 39 assert(start / VHOST_LOG_CHUNK < dev->log_size); 40 41 for (;from < to; ++from) { 42 vhost_log_chunk_t log; 43 /* We first check with non-atomic: much cheaper, 44 * and we expect non-dirty to be the common case. */ 45 if (!*from) { 46 addr += VHOST_LOG_CHUNK; 47 continue; 48 } 49 /* Data must be read atomically. We don't really need barrier semantics 50 * but it's easier to use atomic_* than roll our own. */ 51 log = atomic_xchg(from, 0); 52 while (log) { 53 int bit = ctzl(log); 54 hwaddr page_addr; 55 hwaddr section_offset; 56 hwaddr mr_offset; 57 page_addr = addr + bit * VHOST_LOG_PAGE; 58 section_offset = page_addr - section->offset_within_address_space; 59 mr_offset = section_offset + section->offset_within_region; 60 memory_region_set_dirty(section->mr, mr_offset, VHOST_LOG_PAGE); 61 log &= ~(0x1ull << bit); 62 } 63 addr += VHOST_LOG_CHUNK; 64 } 65 } 66 67 static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, 68 MemoryRegionSection *section, 69 hwaddr first, 70 hwaddr last) 71 { 72 int i; 73 hwaddr start_addr; 74 hwaddr end_addr; 75 76 if (!dev->log_enabled || !dev->started) { 77 return 0; 78 } 79 start_addr = section->offset_within_address_space; 80 end_addr = range_get_last(start_addr, int128_get64(section->size)); 81 start_addr = MAX(first, start_addr); 82 end_addr = MIN(last, end_addr); 83 84 for (i = 0; i < dev->mem->nregions; ++i) { 85 struct vhost_memory_region *reg = dev->mem->regions + i; 86 vhost_dev_sync_region(dev, section, start_addr, end_addr, 87 reg->guest_phys_addr, 88 range_get_last(reg->guest_phys_addr, 89 reg->memory_size)); 90 } 91 for (i = 0; i < dev->nvqs; ++i) { 92 struct vhost_virtqueue *vq = dev->vqs + i; 93 vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys, 94 range_get_last(vq->used_phys, vq->used_size)); 95 } 96 return 0; 97 } 98 99 static void vhost_log_sync(MemoryListener *listener, 100 MemoryRegionSection *section) 101 { 102 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 103 memory_listener); 104 vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL); 105 } 106 107 static void vhost_log_sync_range(struct vhost_dev *dev, 108 hwaddr first, hwaddr last) 109 { 110 int i; 111 /* FIXME: this is N^2 in number of sections */ 112 for (i = 0; i < dev->n_mem_sections; ++i) { 113 MemoryRegionSection *section = &dev->mem_sections[i]; 114 vhost_sync_dirty_bitmap(dev, section, first, last); 115 } 116 } 117 118 /* Assign/unassign. Keep an unsorted array of non-overlapping 119 * memory regions in dev->mem. */ 120 static void vhost_dev_unassign_memory(struct vhost_dev *dev, 121 uint64_t start_addr, 122 uint64_t size) 123 { 124 int from, to, n = dev->mem->nregions; 125 /* Track overlapping/split regions for sanity checking. */ 126 int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0; 127 128 for (from = 0, to = 0; from < n; ++from, ++to) { 129 struct vhost_memory_region *reg = dev->mem->regions + to; 130 uint64_t reglast; 131 uint64_t memlast; 132 uint64_t change; 133 134 /* clone old region */ 135 if (to != from) { 136 memcpy(reg, dev->mem->regions + from, sizeof *reg); 137 } 138 139 /* No overlap is simple */ 140 if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size, 141 start_addr, size)) { 142 continue; 143 } 144 145 /* Split only happens if supplied region 146 * is in the middle of an existing one. Thus it can not 147 * overlap with any other existing region. */ 148 assert(!split); 149 150 reglast = range_get_last(reg->guest_phys_addr, reg->memory_size); 151 memlast = range_get_last(start_addr, size); 152 153 /* Remove whole region */ 154 if (start_addr <= reg->guest_phys_addr && memlast >= reglast) { 155 --dev->mem->nregions; 156 --to; 157 ++overlap_middle; 158 continue; 159 } 160 161 /* Shrink region */ 162 if (memlast >= reglast) { 163 reg->memory_size = start_addr - reg->guest_phys_addr; 164 assert(reg->memory_size); 165 assert(!overlap_end); 166 ++overlap_end; 167 continue; 168 } 169 170 /* Shift region */ 171 if (start_addr <= reg->guest_phys_addr) { 172 change = memlast + 1 - reg->guest_phys_addr; 173 reg->memory_size -= change; 174 reg->guest_phys_addr += change; 175 reg->userspace_addr += change; 176 assert(reg->memory_size); 177 assert(!overlap_start); 178 ++overlap_start; 179 continue; 180 } 181 182 /* This only happens if supplied region 183 * is in the middle of an existing one. Thus it can not 184 * overlap with any other existing region. */ 185 assert(!overlap_start); 186 assert(!overlap_end); 187 assert(!overlap_middle); 188 /* Split region: shrink first part, shift second part. */ 189 memcpy(dev->mem->regions + n, reg, sizeof *reg); 190 reg->memory_size = start_addr - reg->guest_phys_addr; 191 assert(reg->memory_size); 192 change = memlast + 1 - reg->guest_phys_addr; 193 reg = dev->mem->regions + n; 194 reg->memory_size -= change; 195 assert(reg->memory_size); 196 reg->guest_phys_addr += change; 197 reg->userspace_addr += change; 198 /* Never add more than 1 region */ 199 assert(dev->mem->nregions == n); 200 ++dev->mem->nregions; 201 ++split; 202 } 203 } 204 205 /* Called after unassign, so no regions overlap the given range. */ 206 static void vhost_dev_assign_memory(struct vhost_dev *dev, 207 uint64_t start_addr, 208 uint64_t size, 209 uint64_t uaddr) 210 { 211 int from, to; 212 struct vhost_memory_region *merged = NULL; 213 for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) { 214 struct vhost_memory_region *reg = dev->mem->regions + to; 215 uint64_t prlast, urlast; 216 uint64_t pmlast, umlast; 217 uint64_t s, e, u; 218 219 /* clone old region */ 220 if (to != from) { 221 memcpy(reg, dev->mem->regions + from, sizeof *reg); 222 } 223 prlast = range_get_last(reg->guest_phys_addr, reg->memory_size); 224 pmlast = range_get_last(start_addr, size); 225 urlast = range_get_last(reg->userspace_addr, reg->memory_size); 226 umlast = range_get_last(uaddr, size); 227 228 /* check for overlapping regions: should never happen. */ 229 assert(prlast < start_addr || pmlast < reg->guest_phys_addr); 230 /* Not an adjacent or overlapping region - do not merge. */ 231 if ((prlast + 1 != start_addr || urlast + 1 != uaddr) && 232 (pmlast + 1 != reg->guest_phys_addr || 233 umlast + 1 != reg->userspace_addr)) { 234 continue; 235 } 236 237 if (merged) { 238 --to; 239 assert(to >= 0); 240 } else { 241 merged = reg; 242 } 243 u = MIN(uaddr, reg->userspace_addr); 244 s = MIN(start_addr, reg->guest_phys_addr); 245 e = MAX(pmlast, prlast); 246 uaddr = merged->userspace_addr = u; 247 start_addr = merged->guest_phys_addr = s; 248 size = merged->memory_size = e - s + 1; 249 assert(merged->memory_size); 250 } 251 252 if (!merged) { 253 struct vhost_memory_region *reg = dev->mem->regions + to; 254 memset(reg, 0, sizeof *reg); 255 reg->memory_size = size; 256 assert(reg->memory_size); 257 reg->guest_phys_addr = start_addr; 258 reg->userspace_addr = uaddr; 259 ++to; 260 } 261 assert(to <= dev->mem->nregions + 1); 262 dev->mem->nregions = to; 263 } 264 265 static uint64_t vhost_get_log_size(struct vhost_dev *dev) 266 { 267 uint64_t log_size = 0; 268 int i; 269 for (i = 0; i < dev->mem->nregions; ++i) { 270 struct vhost_memory_region *reg = dev->mem->regions + i; 271 uint64_t last = range_get_last(reg->guest_phys_addr, 272 reg->memory_size); 273 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1); 274 } 275 for (i = 0; i < dev->nvqs; ++i) { 276 struct vhost_virtqueue *vq = dev->vqs + i; 277 uint64_t last = vq->used_phys + vq->used_size - 1; 278 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1); 279 } 280 return log_size; 281 } 282 283 static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size) 284 { 285 vhost_log_chunk_t *log; 286 uint64_t log_base; 287 int r; 288 289 log = g_malloc0(size * sizeof *log); 290 log_base = (uint64_t)(unsigned long)log; 291 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base); 292 assert(r >= 0); 293 /* Sync only the range covered by the old log */ 294 if (dev->log_size) { 295 vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); 296 } 297 g_free(dev->log); 298 dev->log = log; 299 dev->log_size = size; 300 } 301 302 static int vhost_verify_ring_mappings(struct vhost_dev *dev, 303 uint64_t start_addr, 304 uint64_t size) 305 { 306 int i; 307 for (i = 0; i < dev->nvqs; ++i) { 308 struct vhost_virtqueue *vq = dev->vqs + i; 309 hwaddr l; 310 void *p; 311 312 if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) { 313 continue; 314 } 315 l = vq->ring_size; 316 p = cpu_physical_memory_map(vq->ring_phys, &l, 1); 317 if (!p || l != vq->ring_size) { 318 fprintf(stderr, "Unable to map ring buffer for ring %d\n", i); 319 return -ENOMEM; 320 } 321 if (p != vq->ring) { 322 fprintf(stderr, "Ring buffer relocated for ring %d\n", i); 323 return -EBUSY; 324 } 325 cpu_physical_memory_unmap(p, l, 0, 0); 326 } 327 return 0; 328 } 329 330 static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev, 331 uint64_t start_addr, 332 uint64_t size) 333 { 334 int i, n = dev->mem->nregions; 335 for (i = 0; i < n; ++i) { 336 struct vhost_memory_region *reg = dev->mem->regions + i; 337 if (ranges_overlap(reg->guest_phys_addr, reg->memory_size, 338 start_addr, size)) { 339 return reg; 340 } 341 } 342 return NULL; 343 } 344 345 static bool vhost_dev_cmp_memory(struct vhost_dev *dev, 346 uint64_t start_addr, 347 uint64_t size, 348 uint64_t uaddr) 349 { 350 struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size); 351 uint64_t reglast; 352 uint64_t memlast; 353 354 if (!reg) { 355 return true; 356 } 357 358 reglast = range_get_last(reg->guest_phys_addr, reg->memory_size); 359 memlast = range_get_last(start_addr, size); 360 361 /* Need to extend region? */ 362 if (start_addr < reg->guest_phys_addr || memlast > reglast) { 363 return true; 364 } 365 /* userspace_addr changed? */ 366 return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr; 367 } 368 369 static void vhost_set_memory(MemoryListener *listener, 370 MemoryRegionSection *section, 371 bool add) 372 { 373 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 374 memory_listener); 375 hwaddr start_addr = section->offset_within_address_space; 376 ram_addr_t size = int128_get64(section->size); 377 bool log_dirty = memory_region_is_logging(section->mr); 378 int s = offsetof(struct vhost_memory, regions) + 379 (dev->mem->nregions + 1) * sizeof dev->mem->regions[0]; 380 void *ram; 381 382 dev->mem = g_realloc(dev->mem, s); 383 384 if (log_dirty) { 385 add = false; 386 } 387 388 assert(size); 389 390 /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */ 391 ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region; 392 if (add) { 393 if (!vhost_dev_cmp_memory(dev, start_addr, size, (uintptr_t)ram)) { 394 /* Region exists with same address. Nothing to do. */ 395 return; 396 } 397 } else { 398 if (!vhost_dev_find_reg(dev, start_addr, size)) { 399 /* Removing region that we don't access. Nothing to do. */ 400 return; 401 } 402 } 403 404 vhost_dev_unassign_memory(dev, start_addr, size); 405 if (add) { 406 /* Add given mapping, merging adjacent regions if any */ 407 vhost_dev_assign_memory(dev, start_addr, size, (uintptr_t)ram); 408 } else { 409 /* Remove old mapping for this memory, if any. */ 410 vhost_dev_unassign_memory(dev, start_addr, size); 411 } 412 dev->mem_changed_start_addr = MIN(dev->mem_changed_start_addr, start_addr); 413 dev->mem_changed_end_addr = MAX(dev->mem_changed_end_addr, start_addr + size - 1); 414 dev->memory_changed = true; 415 } 416 417 static bool vhost_section(MemoryRegionSection *section) 418 { 419 return memory_region_is_ram(section->mr); 420 } 421 422 static void vhost_begin(MemoryListener *listener) 423 { 424 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 425 memory_listener); 426 dev->mem_changed_end_addr = 0; 427 dev->mem_changed_start_addr = -1; 428 } 429 430 static void vhost_commit(MemoryListener *listener) 431 { 432 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 433 memory_listener); 434 hwaddr start_addr = 0; 435 ram_addr_t size = 0; 436 uint64_t log_size; 437 int r; 438 439 if (!dev->memory_changed) { 440 return; 441 } 442 if (!dev->started) { 443 return; 444 } 445 if (dev->mem_changed_start_addr > dev->mem_changed_end_addr) { 446 return; 447 } 448 449 if (dev->started) { 450 start_addr = dev->mem_changed_start_addr; 451 size = dev->mem_changed_end_addr - dev->mem_changed_start_addr + 1; 452 453 r = vhost_verify_ring_mappings(dev, start_addr, size); 454 assert(r >= 0); 455 } 456 457 if (!dev->log_enabled) { 458 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); 459 assert(r >= 0); 460 dev->memory_changed = false; 461 return; 462 } 463 log_size = vhost_get_log_size(dev); 464 /* We allocate an extra 4K bytes to log, 465 * to reduce the * number of reallocations. */ 466 #define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log) 467 /* To log more, must increase log size before table update. */ 468 if (dev->log_size < log_size) { 469 vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER); 470 } 471 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); 472 assert(r >= 0); 473 /* To log less, can only decrease log size after table update. */ 474 if (dev->log_size > log_size + VHOST_LOG_BUFFER) { 475 vhost_dev_log_resize(dev, log_size); 476 } 477 dev->memory_changed = false; 478 } 479 480 static void vhost_region_add(MemoryListener *listener, 481 MemoryRegionSection *section) 482 { 483 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 484 memory_listener); 485 486 if (!vhost_section(section)) { 487 return; 488 } 489 490 ++dev->n_mem_sections; 491 dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections, 492 dev->n_mem_sections); 493 dev->mem_sections[dev->n_mem_sections - 1] = *section; 494 memory_region_ref(section->mr); 495 vhost_set_memory(listener, section, true); 496 } 497 498 static void vhost_region_del(MemoryListener *listener, 499 MemoryRegionSection *section) 500 { 501 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 502 memory_listener); 503 int i; 504 505 if (!vhost_section(section)) { 506 return; 507 } 508 509 vhost_set_memory(listener, section, false); 510 memory_region_unref(section->mr); 511 for (i = 0; i < dev->n_mem_sections; ++i) { 512 if (dev->mem_sections[i].offset_within_address_space 513 == section->offset_within_address_space) { 514 --dev->n_mem_sections; 515 memmove(&dev->mem_sections[i], &dev->mem_sections[i+1], 516 (dev->n_mem_sections - i) * sizeof(*dev->mem_sections)); 517 break; 518 } 519 } 520 } 521 522 static void vhost_region_nop(MemoryListener *listener, 523 MemoryRegionSection *section) 524 { 525 } 526 527 static int vhost_virtqueue_set_addr(struct vhost_dev *dev, 528 struct vhost_virtqueue *vq, 529 unsigned idx, bool enable_log) 530 { 531 struct vhost_vring_addr addr = { 532 .index = idx, 533 .desc_user_addr = (uint64_t)(unsigned long)vq->desc, 534 .avail_user_addr = (uint64_t)(unsigned long)vq->avail, 535 .used_user_addr = (uint64_t)(unsigned long)vq->used, 536 .log_guest_addr = vq->used_phys, 537 .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0, 538 }; 539 int r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ADDR, &addr); 540 if (r < 0) { 541 return -errno; 542 } 543 return 0; 544 } 545 546 static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) 547 { 548 uint64_t features = dev->acked_features; 549 int r; 550 if (enable_log) { 551 features |= 0x1 << VHOST_F_LOG_ALL; 552 } 553 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_FEATURES, &features); 554 return r < 0 ? -errno : 0; 555 } 556 557 static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) 558 { 559 int r, t, i; 560 r = vhost_dev_set_features(dev, enable_log); 561 if (r < 0) { 562 goto err_features; 563 } 564 for (i = 0; i < dev->nvqs; ++i) { 565 r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i, 566 enable_log); 567 if (r < 0) { 568 goto err_vq; 569 } 570 } 571 return 0; 572 err_vq: 573 for (; i >= 0; --i) { 574 t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i, 575 dev->log_enabled); 576 assert(t >= 0); 577 } 578 t = vhost_dev_set_features(dev, dev->log_enabled); 579 assert(t >= 0); 580 err_features: 581 return r; 582 } 583 584 static int vhost_migration_log(MemoryListener *listener, int enable) 585 { 586 struct vhost_dev *dev = container_of(listener, struct vhost_dev, 587 memory_listener); 588 int r; 589 if (!!enable == dev->log_enabled) { 590 return 0; 591 } 592 if (!dev->started) { 593 dev->log_enabled = enable; 594 return 0; 595 } 596 if (!enable) { 597 r = vhost_dev_set_log(dev, false); 598 if (r < 0) { 599 return r; 600 } 601 g_free(dev->log); 602 dev->log = NULL; 603 dev->log_size = 0; 604 } else { 605 vhost_dev_log_resize(dev, vhost_get_log_size(dev)); 606 r = vhost_dev_set_log(dev, true); 607 if (r < 0) { 608 return r; 609 } 610 } 611 dev->log_enabled = enable; 612 return 0; 613 } 614 615 static void vhost_log_global_start(MemoryListener *listener) 616 { 617 int r; 618 619 r = vhost_migration_log(listener, true); 620 if (r < 0) { 621 abort(); 622 } 623 } 624 625 static void vhost_log_global_stop(MemoryListener *listener) 626 { 627 int r; 628 629 r = vhost_migration_log(listener, false); 630 if (r < 0) { 631 abort(); 632 } 633 } 634 635 static void vhost_log_start(MemoryListener *listener, 636 MemoryRegionSection *section) 637 { 638 /* FIXME: implement */ 639 } 640 641 static void vhost_log_stop(MemoryListener *listener, 642 MemoryRegionSection *section) 643 { 644 /* FIXME: implement */ 645 } 646 647 static int vhost_virtqueue_start(struct vhost_dev *dev, 648 struct VirtIODevice *vdev, 649 struct vhost_virtqueue *vq, 650 unsigned idx) 651 { 652 hwaddr s, l, a; 653 int r; 654 int vhost_vq_index = idx - dev->vq_index; 655 struct vhost_vring_file file = { 656 .index = vhost_vq_index 657 }; 658 struct vhost_vring_state state = { 659 .index = vhost_vq_index 660 }; 661 struct VirtQueue *vvq = virtio_get_queue(vdev, idx); 662 663 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 664 665 vq->num = state.num = virtio_queue_get_num(vdev, idx); 666 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_NUM, &state); 667 if (r) { 668 return -errno; 669 } 670 671 state.num = virtio_queue_get_last_avail_idx(vdev, idx); 672 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_BASE, &state); 673 if (r) { 674 return -errno; 675 } 676 677 s = l = virtio_queue_get_desc_size(vdev, idx); 678 a = virtio_queue_get_desc_addr(vdev, idx); 679 vq->desc = cpu_physical_memory_map(a, &l, 0); 680 if (!vq->desc || l != s) { 681 r = -ENOMEM; 682 goto fail_alloc_desc; 683 } 684 s = l = virtio_queue_get_avail_size(vdev, idx); 685 a = virtio_queue_get_avail_addr(vdev, idx); 686 vq->avail = cpu_physical_memory_map(a, &l, 0); 687 if (!vq->avail || l != s) { 688 r = -ENOMEM; 689 goto fail_alloc_avail; 690 } 691 vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx); 692 vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx); 693 vq->used = cpu_physical_memory_map(a, &l, 1); 694 if (!vq->used || l != s) { 695 r = -ENOMEM; 696 goto fail_alloc_used; 697 } 698 699 vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx); 700 vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx); 701 vq->ring = cpu_physical_memory_map(a, &l, 1); 702 if (!vq->ring || l != s) { 703 r = -ENOMEM; 704 goto fail_alloc_ring; 705 } 706 707 r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled); 708 if (r < 0) { 709 r = -errno; 710 goto fail_alloc; 711 } 712 713 file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq)); 714 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_KICK, &file); 715 if (r) { 716 r = -errno; 717 goto fail_kick; 718 } 719 720 /* Clear and discard previous events if any. */ 721 event_notifier_test_and_clear(&vq->masked_notifier); 722 723 return 0; 724 725 fail_kick: 726 fail_alloc: 727 cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx), 728 0, 0); 729 fail_alloc_ring: 730 cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx), 731 0, 0); 732 fail_alloc_used: 733 cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx), 734 0, 0); 735 fail_alloc_avail: 736 cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx), 737 0, 0); 738 fail_alloc_desc: 739 return r; 740 } 741 742 static void vhost_virtqueue_stop(struct vhost_dev *dev, 743 struct VirtIODevice *vdev, 744 struct vhost_virtqueue *vq, 745 unsigned idx) 746 { 747 struct vhost_vring_state state = { 748 .index = idx - dev->vq_index 749 }; 750 int r; 751 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 752 r = dev->vhost_ops->vhost_call(dev, VHOST_GET_VRING_BASE, &state); 753 if (r < 0) { 754 fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r); 755 fflush(stderr); 756 } 757 virtio_queue_set_last_avail_idx(vdev, idx, state.num); 758 virtio_queue_invalidate_signalled_used(vdev, idx); 759 assert (r >= 0); 760 cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx), 761 0, virtio_queue_get_ring_size(vdev, idx)); 762 cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx), 763 1, virtio_queue_get_used_size(vdev, idx)); 764 cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx), 765 0, virtio_queue_get_avail_size(vdev, idx)); 766 cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx), 767 0, virtio_queue_get_desc_size(vdev, idx)); 768 } 769 770 static void vhost_eventfd_add(MemoryListener *listener, 771 MemoryRegionSection *section, 772 bool match_data, uint64_t data, EventNotifier *e) 773 { 774 } 775 776 static void vhost_eventfd_del(MemoryListener *listener, 777 MemoryRegionSection *section, 778 bool match_data, uint64_t data, EventNotifier *e) 779 { 780 } 781 782 static int vhost_virtqueue_init(struct vhost_dev *dev, 783 struct vhost_virtqueue *vq, int n) 784 { 785 struct vhost_vring_file file = { 786 .index = n, 787 }; 788 int r = event_notifier_init(&vq->masked_notifier, 0); 789 if (r < 0) { 790 return r; 791 } 792 793 file.fd = event_notifier_get_fd(&vq->masked_notifier); 794 r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_CALL, &file); 795 if (r) { 796 r = -errno; 797 goto fail_call; 798 } 799 return 0; 800 fail_call: 801 event_notifier_cleanup(&vq->masked_notifier); 802 return r; 803 } 804 805 static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) 806 { 807 event_notifier_cleanup(&vq->masked_notifier); 808 } 809 810 int vhost_dev_init(struct vhost_dev *hdev, void *opaque, 811 VhostBackendType backend_type, bool force) 812 { 813 uint64_t features; 814 int i, r; 815 816 if (vhost_set_backend_type(hdev, backend_type) < 0) { 817 return -1; 818 } 819 820 if (hdev->vhost_ops->vhost_backend_init(hdev, opaque) < 0) { 821 return -errno; 822 } 823 824 r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL); 825 if (r < 0) { 826 goto fail; 827 } 828 829 r = hdev->vhost_ops->vhost_call(hdev, VHOST_GET_FEATURES, &features); 830 if (r < 0) { 831 goto fail; 832 } 833 834 for (i = 0; i < hdev->nvqs; ++i) { 835 r = vhost_virtqueue_init(hdev, hdev->vqs + i, i); 836 if (r < 0) { 837 goto fail_vq; 838 } 839 } 840 hdev->features = features; 841 842 hdev->memory_listener = (MemoryListener) { 843 .begin = vhost_begin, 844 .commit = vhost_commit, 845 .region_add = vhost_region_add, 846 .region_del = vhost_region_del, 847 .region_nop = vhost_region_nop, 848 .log_start = vhost_log_start, 849 .log_stop = vhost_log_stop, 850 .log_sync = vhost_log_sync, 851 .log_global_start = vhost_log_global_start, 852 .log_global_stop = vhost_log_global_stop, 853 .eventfd_add = vhost_eventfd_add, 854 .eventfd_del = vhost_eventfd_del, 855 .priority = 10 856 }; 857 hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions)); 858 hdev->n_mem_sections = 0; 859 hdev->mem_sections = NULL; 860 hdev->log = NULL; 861 hdev->log_size = 0; 862 hdev->log_enabled = false; 863 hdev->started = false; 864 hdev->memory_changed = false; 865 memory_listener_register(&hdev->memory_listener, &address_space_memory); 866 hdev->force = force; 867 return 0; 868 fail_vq: 869 while (--i >= 0) { 870 vhost_virtqueue_cleanup(hdev->vqs + i); 871 } 872 fail: 873 r = -errno; 874 hdev->vhost_ops->vhost_backend_cleanup(hdev); 875 return r; 876 } 877 878 void vhost_dev_cleanup(struct vhost_dev *hdev) 879 { 880 int i; 881 for (i = 0; i < hdev->nvqs; ++i) { 882 vhost_virtqueue_cleanup(hdev->vqs + i); 883 } 884 memory_listener_unregister(&hdev->memory_listener); 885 g_free(hdev->mem); 886 g_free(hdev->mem_sections); 887 hdev->vhost_ops->vhost_backend_cleanup(hdev); 888 } 889 890 bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev) 891 { 892 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); 893 VirtioBusState *vbus = VIRTIO_BUS(qbus); 894 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 895 896 return !k->query_guest_notifiers || 897 k->query_guest_notifiers(qbus->parent) || 898 hdev->force; 899 } 900 901 /* Stop processing guest IO notifications in qemu. 902 * Start processing them in vhost in kernel. 903 */ 904 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) 905 { 906 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); 907 VirtioBusState *vbus = VIRTIO_BUS(qbus); 908 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 909 int i, r; 910 if (!k->set_host_notifier) { 911 fprintf(stderr, "binding does not support host notifiers\n"); 912 r = -ENOSYS; 913 goto fail; 914 } 915 916 for (i = 0; i < hdev->nvqs; ++i) { 917 r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, true); 918 if (r < 0) { 919 fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r); 920 goto fail_vq; 921 } 922 } 923 924 return 0; 925 fail_vq: 926 while (--i >= 0) { 927 r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false); 928 if (r < 0) { 929 fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r); 930 fflush(stderr); 931 } 932 assert (r >= 0); 933 } 934 fail: 935 return r; 936 } 937 938 /* Stop processing guest IO notifications in vhost. 939 * Start processing them in qemu. 940 * This might actually run the qemu handlers right away, 941 * so virtio in qemu must be completely setup when this is called. 942 */ 943 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) 944 { 945 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); 946 VirtioBusState *vbus = VIRTIO_BUS(qbus); 947 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); 948 int i, r; 949 950 for (i = 0; i < hdev->nvqs; ++i) { 951 r = k->set_host_notifier(qbus->parent, hdev->vq_index + i, false); 952 if (r < 0) { 953 fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r); 954 fflush(stderr); 955 } 956 assert (r >= 0); 957 } 958 } 959 960 /* Test and clear event pending status. 961 * Should be called after unmask to avoid losing events. 962 */ 963 bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n) 964 { 965 struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index; 966 assert(hdev->started); 967 assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs); 968 return event_notifier_test_and_clear(&vq->masked_notifier); 969 } 970 971 /* Mask/unmask events from this vq. */ 972 void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, 973 bool mask) 974 { 975 struct VirtQueue *vvq = virtio_get_queue(vdev, n); 976 int r, index = n - hdev->vq_index; 977 978 assert(hdev->started); 979 assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs); 980 981 struct vhost_vring_file file = { 982 .index = index 983 }; 984 if (mask) { 985 file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier); 986 } else { 987 file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq)); 988 } 989 r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_VRING_CALL, &file); 990 assert(r >= 0); 991 } 992 993 unsigned vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, 994 unsigned features) 995 { 996 const int *bit = feature_bits; 997 while (*bit != VHOST_INVALID_FEATURE_BIT) { 998 unsigned bit_mask = (1 << *bit); 999 if (!(hdev->features & bit_mask)) { 1000 features &= ~bit_mask; 1001 } 1002 bit++; 1003 } 1004 return features; 1005 } 1006 1007 void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, 1008 unsigned features) 1009 { 1010 const int *bit = feature_bits; 1011 while (*bit != VHOST_INVALID_FEATURE_BIT) { 1012 unsigned bit_mask = (1 << *bit); 1013 if (features & bit_mask) { 1014 hdev->acked_features |= bit_mask; 1015 } 1016 bit++; 1017 } 1018 } 1019 1020 /* Host notifiers must be enabled at this point. */ 1021 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) 1022 { 1023 int i, r; 1024 1025 hdev->started = true; 1026 1027 r = vhost_dev_set_features(hdev, hdev->log_enabled); 1028 if (r < 0) { 1029 goto fail_features; 1030 } 1031 r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); 1032 if (r < 0) { 1033 r = -errno; 1034 goto fail_mem; 1035 } 1036 for (i = 0; i < hdev->nvqs; ++i) { 1037 r = vhost_virtqueue_start(hdev, 1038 vdev, 1039 hdev->vqs + i, 1040 hdev->vq_index + i); 1041 if (r < 0) { 1042 goto fail_vq; 1043 } 1044 } 1045 1046 if (hdev->log_enabled) { 1047 hdev->log_size = vhost_get_log_size(hdev); 1048 hdev->log = hdev->log_size ? 1049 g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL; 1050 r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, hdev->log); 1051 if (r < 0) { 1052 r = -errno; 1053 goto fail_log; 1054 } 1055 } 1056 1057 return 0; 1058 fail_log: 1059 fail_vq: 1060 while (--i >= 0) { 1061 vhost_virtqueue_stop(hdev, 1062 vdev, 1063 hdev->vqs + i, 1064 hdev->vq_index + i); 1065 } 1066 i = hdev->nvqs; 1067 fail_mem: 1068 fail_features: 1069 1070 hdev->started = false; 1071 return r; 1072 } 1073 1074 /* Host notifiers must be enabled at this point. */ 1075 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) 1076 { 1077 int i; 1078 1079 for (i = 0; i < hdev->nvqs; ++i) { 1080 vhost_virtqueue_stop(hdev, 1081 vdev, 1082 hdev->vqs + i, 1083 hdev->vq_index + i); 1084 } 1085 vhost_log_sync_range(hdev, 0, ~0x0ull); 1086 1087 hdev->started = false; 1088 g_free(hdev->log); 1089 hdev->log = NULL; 1090 hdev->log_size = 0; 1091 } 1092 1093