1 /* 2 * Virtio MEM device 3 * 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qemu/iov.h" 15 #include "qemu/cutils.h" 16 #include "qemu/error-report.h" 17 #include "qemu/units.h" 18 #include "sysemu/numa.h" 19 #include "sysemu/sysemu.h" 20 #include "sysemu/reset.h" 21 #include "hw/virtio/virtio.h" 22 #include "hw/virtio/virtio-bus.h" 23 #include "hw/virtio/virtio-access.h" 24 #include "hw/virtio/virtio-mem.h" 25 #include "qapi/error.h" 26 #include "qapi/visitor.h" 27 #include "exec/ram_addr.h" 28 #include "migration/misc.h" 29 #include "hw/boards.h" 30 #include "hw/qdev-properties.h" 31 #include CONFIG_DEVICES 32 #include "trace.h" 33 34 static const VMStateDescription vmstate_virtio_mem_device_early; 35 36 /* 37 * We only had legacy x86 guests that did not support 38 * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests. 39 */ 40 #if defined(TARGET_X86_64) || defined(TARGET_I386) 41 #define VIRTIO_MEM_HAS_LEGACY_GUESTS 42 #endif 43 44 /* 45 * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking 46 * bitmap small. 47 */ 48 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB)) 49 50 static uint32_t virtio_mem_default_thp_size(void) 51 { 52 uint32_t default_thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE; 53 54 #if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__) 55 default_thp_size = 2 * MiB; 56 #elif defined(__aarch64__) 57 if (qemu_real_host_page_size() == 4 * KiB) { 58 default_thp_size = 2 * MiB; 59 } else if (qemu_real_host_page_size() == 16 * KiB) { 60 default_thp_size = 32 * MiB; 61 } else if (qemu_real_host_page_size() == 64 * KiB) { 62 default_thp_size = 512 * MiB; 63 } 64 #endif 65 66 return default_thp_size; 67 } 68 69 /* 70 * We want to have a reasonable default block size such that 71 * 1. We avoid splitting THPs when unplugging memory, which degrades 72 * performance. 73 * 2. We avoid placing THPs for plugged blocks that also cover unplugged 74 * blocks. 75 * 76 * The actual THP size might differ between Linux kernels, so we try to probe 77 * it. In the future (if we ever run into issues regarding 2.), we might want 78 * to disable THP in case we fail to properly probe the THP size, or if the 79 * block size is configured smaller than the THP size. 80 */ 81 static uint32_t thp_size; 82 83 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" 84 static uint32_t virtio_mem_thp_size(void) 85 { 86 gchar *content = NULL; 87 const char *endptr; 88 uint64_t tmp; 89 90 if (thp_size) { 91 return thp_size; 92 } 93 94 /* 95 * Try to probe the actual THP size, fallback to (sane but eventually 96 * incorrect) default sizes. 97 */ 98 if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) && 99 !qemu_strtou64(content, &endptr, 0, &tmp) && 100 (!endptr || *endptr == '\n')) { 101 /* Sanity-check the value and fallback to something reasonable. */ 102 if (!tmp || !is_power_of_2(tmp)) { 103 warn_report("Read unsupported THP size: %" PRIx64, tmp); 104 } else { 105 thp_size = tmp; 106 } 107 } 108 109 if (!thp_size) { 110 thp_size = virtio_mem_default_thp_size(); 111 warn_report("Could not detect THP size, falling back to %" PRIx64 112 " MiB.", thp_size / MiB); 113 } 114 115 g_free(content); 116 return thp_size; 117 } 118 119 static uint64_t virtio_mem_default_block_size(RAMBlock *rb) 120 { 121 const uint64_t page_size = qemu_ram_pagesize(rb); 122 123 /* We can have hugetlbfs with a page size smaller than the THP size. */ 124 if (page_size == qemu_real_host_page_size()) { 125 return MAX(page_size, virtio_mem_thp_size()); 126 } 127 return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE); 128 } 129 130 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS) 131 static bool virtio_mem_has_shared_zeropage(RAMBlock *rb) 132 { 133 /* 134 * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE 135 * anonymous RAM. In any other case, reading unplugged *can* populate a 136 * fresh page, consuming actual memory. 137 */ 138 return !qemu_ram_is_shared(rb) && rb->fd < 0 && 139 qemu_ram_pagesize(rb) == qemu_real_host_page_size(); 140 } 141 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ 142 143 /* 144 * Size the usable region bigger than the requested size if possible. Esp. 145 * Linux guests will only add (aligned) memory blocks in case they fully 146 * fit into the usable region, but plug+online only a subset of the pages. 147 * The memory block size corresponds mostly to the section size. 148 * 149 * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and 150 * a section size of 512MB on arm64 (as long as the start address is properly 151 * aligned, similar to ordinary DIMMs). 152 * 153 * We can change this at any time and maybe even make it configurable if 154 * necessary (as the section size can change). But it's more likely that the 155 * section size will rather get smaller and not bigger over time. 156 */ 157 #if defined(TARGET_X86_64) || defined(TARGET_I386) 158 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB)) 159 #elif defined(TARGET_ARM) 160 #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB)) 161 #else 162 #error VIRTIO_MEM_USABLE_EXTENT not defined 163 #endif 164 165 static bool virtio_mem_is_busy(void) 166 { 167 /* 168 * Postcopy cannot handle concurrent discards and we don't want to migrate 169 * pages on-demand with stale content when plugging new blocks. 170 * 171 * For precopy, we don't want unplugged blocks in our migration stream, and 172 * when plugging new blocks, the page content might differ between source 173 * and destination (observable by the guest when not initializing pages 174 * after plugging them) until we're running on the destination (as we didn't 175 * migrate these blocks when they were unplugged). 176 */ 177 return migration_in_incoming_postcopy() || !migration_is_idle(); 178 } 179 180 typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg, 181 uint64_t offset, uint64_t size); 182 183 static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg, 184 virtio_mem_range_cb cb) 185 { 186 unsigned long first_zero_bit, last_zero_bit; 187 uint64_t offset, size; 188 int ret = 0; 189 190 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); 191 while (first_zero_bit < vmem->bitmap_size) { 192 offset = first_zero_bit * vmem->block_size; 193 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 194 first_zero_bit + 1) - 1; 195 size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; 196 197 ret = cb(vmem, arg, offset, size); 198 if (ret) { 199 break; 200 } 201 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 202 last_zero_bit + 2); 203 } 204 return ret; 205 } 206 207 /* 208 * Adjust the memory section to cover the intersection with the given range. 209 * 210 * Returns false if the intersection is empty, otherwise returns true. 211 */ 212 static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s, 213 uint64_t offset, uint64_t size) 214 { 215 uint64_t start = MAX(s->offset_within_region, offset); 216 uint64_t end = MIN(s->offset_within_region + int128_get64(s->size), 217 offset + size); 218 219 if (end <= start) { 220 return false; 221 } 222 223 s->offset_within_address_space += start - s->offset_within_region; 224 s->offset_within_region = start; 225 s->size = int128_make64(end - start); 226 return true; 227 } 228 229 typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg); 230 231 static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, 232 MemoryRegionSection *s, 233 void *arg, 234 virtio_mem_section_cb cb) 235 { 236 unsigned long first_bit, last_bit; 237 uint64_t offset, size; 238 int ret = 0; 239 240 first_bit = s->offset_within_region / vmem->block_size; 241 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit); 242 while (first_bit < vmem->bitmap_size) { 243 MemoryRegionSection tmp = *s; 244 245 offset = first_bit * vmem->block_size; 246 last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 247 first_bit + 1) - 1; 248 size = (last_bit - first_bit + 1) * vmem->block_size; 249 250 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { 251 break; 252 } 253 ret = cb(&tmp, arg); 254 if (ret) { 255 break; 256 } 257 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 258 last_bit + 2); 259 } 260 return ret; 261 } 262 263 static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, 264 MemoryRegionSection *s, 265 void *arg, 266 virtio_mem_section_cb cb) 267 { 268 unsigned long first_bit, last_bit; 269 uint64_t offset, size; 270 int ret = 0; 271 272 first_bit = s->offset_within_region / vmem->block_size; 273 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit); 274 while (first_bit < vmem->bitmap_size) { 275 MemoryRegionSection tmp = *s; 276 277 offset = first_bit * vmem->block_size; 278 last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 279 first_bit + 1) - 1; 280 size = (last_bit - first_bit + 1) * vmem->block_size; 281 282 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { 283 break; 284 } 285 ret = cb(&tmp, arg); 286 if (ret) { 287 break; 288 } 289 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 290 last_bit + 2); 291 } 292 return ret; 293 } 294 295 static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg) 296 { 297 RamDiscardListener *rdl = arg; 298 299 return rdl->notify_populate(rdl, s); 300 } 301 302 static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg) 303 { 304 RamDiscardListener *rdl = arg; 305 306 rdl->notify_discard(rdl, s); 307 return 0; 308 } 309 310 static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, 311 uint64_t size) 312 { 313 RamDiscardListener *rdl; 314 315 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 316 MemoryRegionSection tmp = *rdl->section; 317 318 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { 319 continue; 320 } 321 rdl->notify_discard(rdl, &tmp); 322 } 323 } 324 325 static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, 326 uint64_t size) 327 { 328 RamDiscardListener *rdl, *rdl2; 329 int ret = 0; 330 331 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 332 MemoryRegionSection tmp = *rdl->section; 333 334 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { 335 continue; 336 } 337 ret = rdl->notify_populate(rdl, &tmp); 338 if (ret) { 339 break; 340 } 341 } 342 343 if (ret) { 344 /* Notify all already-notified listeners. */ 345 QLIST_FOREACH(rdl2, &vmem->rdl_list, next) { 346 MemoryRegionSection tmp = *rdl2->section; 347 348 if (rdl2 == rdl) { 349 break; 350 } 351 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { 352 continue; 353 } 354 rdl2->notify_discard(rdl2, &tmp); 355 } 356 } 357 return ret; 358 } 359 360 static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem) 361 { 362 RamDiscardListener *rdl; 363 364 if (!vmem->size) { 365 return; 366 } 367 368 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 369 if (rdl->double_discard_supported) { 370 rdl->notify_discard(rdl, rdl->section); 371 } else { 372 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 373 virtio_mem_notify_discard_cb); 374 } 375 } 376 } 377 378 static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa, 379 uint64_t size, bool plugged) 380 { 381 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; 382 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; 383 unsigned long found_bit; 384 385 /* We fake a shorter bitmap to avoid searching too far. */ 386 if (plugged) { 387 found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); 388 } else { 389 found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); 390 } 391 return found_bit > last_bit; 392 } 393 394 static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, 395 uint64_t size, bool plugged) 396 { 397 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; 398 const unsigned long nbits = size / vmem->block_size; 399 400 if (plugged) { 401 bitmap_set(vmem->bitmap, bit, nbits); 402 } else { 403 bitmap_clear(vmem->bitmap, bit, nbits); 404 } 405 } 406 407 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem, 408 struct virtio_mem_resp *resp) 409 { 410 VirtIODevice *vdev = VIRTIO_DEVICE(vmem); 411 VirtQueue *vq = vmem->vq; 412 413 trace_virtio_mem_send_response(le16_to_cpu(resp->type)); 414 iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp)); 415 416 virtqueue_push(vq, elem, sizeof(*resp)); 417 virtio_notify(vdev, vq); 418 } 419 420 static void virtio_mem_send_response_simple(VirtIOMEM *vmem, 421 VirtQueueElement *elem, 422 uint16_t type) 423 { 424 struct virtio_mem_resp resp = { 425 .type = cpu_to_le16(type), 426 }; 427 428 virtio_mem_send_response(vmem, elem, &resp); 429 } 430 431 static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa, 432 uint64_t size) 433 { 434 if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) { 435 return false; 436 } 437 if (gpa + size < gpa || !size) { 438 return false; 439 } 440 if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) { 441 return false; 442 } 443 if (gpa + size > vmem->addr + vmem->usable_region_size) { 444 return false; 445 } 446 return true; 447 } 448 449 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, 450 uint64_t size, bool plug) 451 { 452 const uint64_t offset = start_gpa - vmem->addr; 453 RAMBlock *rb = vmem->memdev->mr.ram_block; 454 455 if (virtio_mem_is_busy()) { 456 return -EBUSY; 457 } 458 459 if (!plug) { 460 if (ram_block_discard_range(rb, offset, size)) { 461 return -EBUSY; 462 } 463 virtio_mem_notify_unplug(vmem, offset, size); 464 } else { 465 int ret = 0; 466 467 if (vmem->prealloc) { 468 void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; 469 int fd = memory_region_get_fd(&vmem->memdev->mr); 470 Error *local_err = NULL; 471 472 qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err); 473 if (local_err) { 474 static bool warned; 475 476 /* 477 * Warn only once, we don't want to fill the log with these 478 * warnings. 479 */ 480 if (!warned) { 481 warn_report_err(local_err); 482 warned = true; 483 } else { 484 error_free(local_err); 485 } 486 ret = -EBUSY; 487 } 488 } 489 if (!ret) { 490 ret = virtio_mem_notify_plug(vmem, offset, size); 491 } 492 493 if (ret) { 494 /* Could be preallocation or a notifier populated memory. */ 495 ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); 496 return -EBUSY; 497 } 498 } 499 virtio_mem_set_bitmap(vmem, start_gpa, size, plug); 500 return 0; 501 } 502 503 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa, 504 uint16_t nb_blocks, bool plug) 505 { 506 const uint64_t size = nb_blocks * vmem->block_size; 507 int ret; 508 509 if (!virtio_mem_valid_range(vmem, gpa, size)) { 510 return VIRTIO_MEM_RESP_ERROR; 511 } 512 513 if (plug && (vmem->size + size > vmem->requested_size)) { 514 return VIRTIO_MEM_RESP_NACK; 515 } 516 517 /* test if really all blocks are in the opposite state */ 518 if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) { 519 return VIRTIO_MEM_RESP_ERROR; 520 } 521 522 ret = virtio_mem_set_block_state(vmem, gpa, size, plug); 523 if (ret) { 524 return VIRTIO_MEM_RESP_BUSY; 525 } 526 if (plug) { 527 vmem->size += size; 528 } else { 529 vmem->size -= size; 530 } 531 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 532 return VIRTIO_MEM_RESP_ACK; 533 } 534 535 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 536 struct virtio_mem_req *req) 537 { 538 const uint64_t gpa = le64_to_cpu(req->u.plug.addr); 539 const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks); 540 uint16_t type; 541 542 trace_virtio_mem_plug_request(gpa, nb_blocks); 543 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true); 544 virtio_mem_send_response_simple(vmem, elem, type); 545 } 546 547 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 548 struct virtio_mem_req *req) 549 { 550 const uint64_t gpa = le64_to_cpu(req->u.unplug.addr); 551 const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks); 552 uint16_t type; 553 554 trace_virtio_mem_unplug_request(gpa, nb_blocks); 555 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false); 556 virtio_mem_send_response_simple(vmem, elem, type); 557 } 558 559 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem, 560 uint64_t requested_size, 561 bool can_shrink) 562 { 563 uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr), 564 requested_size + VIRTIO_MEM_USABLE_EXTENT); 565 566 /* The usable region size always has to be multiples of the block size. */ 567 newsize = QEMU_ALIGN_UP(newsize, vmem->block_size); 568 569 if (!requested_size) { 570 newsize = 0; 571 } 572 573 if (newsize < vmem->usable_region_size && !can_shrink) { 574 return; 575 } 576 577 trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize); 578 vmem->usable_region_size = newsize; 579 } 580 581 static int virtio_mem_unplug_all(VirtIOMEM *vmem) 582 { 583 RAMBlock *rb = vmem->memdev->mr.ram_block; 584 585 if (virtio_mem_is_busy()) { 586 return -EBUSY; 587 } 588 589 if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) { 590 return -EBUSY; 591 } 592 virtio_mem_notify_unplug_all(vmem); 593 594 bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); 595 if (vmem->size) { 596 vmem->size = 0; 597 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 598 } 599 trace_virtio_mem_unplugged_all(); 600 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 601 return 0; 602 } 603 604 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem, 605 VirtQueueElement *elem) 606 { 607 trace_virtio_mem_unplug_all_request(); 608 if (virtio_mem_unplug_all(vmem)) { 609 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY); 610 } else { 611 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK); 612 } 613 } 614 615 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem, 616 struct virtio_mem_req *req) 617 { 618 const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks); 619 const uint64_t gpa = le64_to_cpu(req->u.state.addr); 620 const uint64_t size = nb_blocks * vmem->block_size; 621 struct virtio_mem_resp resp = { 622 .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK), 623 }; 624 625 trace_virtio_mem_state_request(gpa, nb_blocks); 626 if (!virtio_mem_valid_range(vmem, gpa, size)) { 627 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR); 628 return; 629 } 630 631 if (virtio_mem_test_bitmap(vmem, gpa, size, true)) { 632 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED); 633 } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) { 634 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED); 635 } else { 636 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED); 637 } 638 trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state)); 639 virtio_mem_send_response(vmem, elem, &resp); 640 } 641 642 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq) 643 { 644 const int len = sizeof(struct virtio_mem_req); 645 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 646 VirtQueueElement *elem; 647 struct virtio_mem_req req; 648 uint16_t type; 649 650 while (true) { 651 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 652 if (!elem) { 653 return; 654 } 655 656 if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) { 657 virtio_error(vdev, "virtio-mem protocol violation: invalid request" 658 " size: %d", len); 659 virtqueue_detach_element(vq, elem, 0); 660 g_free(elem); 661 return; 662 } 663 664 if (iov_size(elem->in_sg, elem->in_num) < 665 sizeof(struct virtio_mem_resp)) { 666 virtio_error(vdev, "virtio-mem protocol violation: not enough space" 667 " for response: %zu", 668 iov_size(elem->in_sg, elem->in_num)); 669 virtqueue_detach_element(vq, elem, 0); 670 g_free(elem); 671 return; 672 } 673 674 type = le16_to_cpu(req.type); 675 switch (type) { 676 case VIRTIO_MEM_REQ_PLUG: 677 virtio_mem_plug_request(vmem, elem, &req); 678 break; 679 case VIRTIO_MEM_REQ_UNPLUG: 680 virtio_mem_unplug_request(vmem, elem, &req); 681 break; 682 case VIRTIO_MEM_REQ_UNPLUG_ALL: 683 virtio_mem_unplug_all_request(vmem, elem); 684 break; 685 case VIRTIO_MEM_REQ_STATE: 686 virtio_mem_state_request(vmem, elem, &req); 687 break; 688 default: 689 virtio_error(vdev, "virtio-mem protocol violation: unknown request" 690 " type: %d", type); 691 virtqueue_detach_element(vq, elem, 0); 692 g_free(elem); 693 return; 694 } 695 696 g_free(elem); 697 } 698 } 699 700 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data) 701 { 702 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 703 struct virtio_mem_config *config = (void *) config_data; 704 705 config->block_size = cpu_to_le64(vmem->block_size); 706 config->node_id = cpu_to_le16(vmem->node); 707 config->requested_size = cpu_to_le64(vmem->requested_size); 708 config->plugged_size = cpu_to_le64(vmem->size); 709 config->addr = cpu_to_le64(vmem->addr); 710 config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr)); 711 config->usable_region_size = cpu_to_le64(vmem->usable_region_size); 712 } 713 714 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features, 715 Error **errp) 716 { 717 MachineState *ms = MACHINE(qdev_get_machine()); 718 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 719 720 if (ms->numa_state) { 721 #if defined(CONFIG_ACPI) 722 virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM); 723 #endif 724 } 725 assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO); 726 if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) { 727 virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE); 728 } 729 return features; 730 } 731 732 static int virtio_mem_validate_features(VirtIODevice *vdev) 733 { 734 if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) && 735 !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) { 736 return -EFAULT; 737 } 738 return 0; 739 } 740 741 static void virtio_mem_system_reset(void *opaque) 742 { 743 VirtIOMEM *vmem = VIRTIO_MEM(opaque); 744 745 /* 746 * During usual resets, we will unplug all memory and shrink the usable 747 * region size. This is, however, not possible in all scenarios. Then, 748 * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL). 749 */ 750 virtio_mem_unplug_all(vmem); 751 } 752 753 static void virtio_mem_device_realize(DeviceState *dev, Error **errp) 754 { 755 MachineState *ms = MACHINE(qdev_get_machine()); 756 int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0; 757 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 758 VirtIOMEM *vmem = VIRTIO_MEM(dev); 759 uint64_t page_size; 760 RAMBlock *rb; 761 int ret; 762 763 if (!vmem->memdev) { 764 error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP); 765 return; 766 } else if (host_memory_backend_is_mapped(vmem->memdev)) { 767 error_setg(errp, "'%s' property specifies a busy memdev: %s", 768 VIRTIO_MEM_MEMDEV_PROP, 769 object_get_canonical_path_component(OBJECT(vmem->memdev))); 770 return; 771 } else if (!memory_region_is_ram(&vmem->memdev->mr) || 772 memory_region_is_rom(&vmem->memdev->mr) || 773 !vmem->memdev->mr.ram_block) { 774 error_setg(errp, "'%s' property specifies an unsupported memdev", 775 VIRTIO_MEM_MEMDEV_PROP); 776 return; 777 } else if (vmem->memdev->prealloc) { 778 error_setg(errp, "'%s' property specifies a memdev with preallocation" 779 " enabled: %s. Instead, specify 'prealloc=on' for the" 780 " virtio-mem device. ", VIRTIO_MEM_MEMDEV_PROP, 781 object_get_canonical_path_component(OBJECT(vmem->memdev))); 782 return; 783 } 784 785 if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) || 786 (!nb_numa_nodes && vmem->node)) { 787 error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds" 788 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP, 789 vmem->node, nb_numa_nodes ? nb_numa_nodes : 1); 790 return; 791 } 792 793 if (enable_mlock) { 794 error_setg(errp, "Incompatible with mlock"); 795 return; 796 } 797 798 rb = vmem->memdev->mr.ram_block; 799 page_size = qemu_ram_pagesize(rb); 800 801 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS) 802 switch (vmem->unplugged_inaccessible) { 803 case ON_OFF_AUTO_AUTO: 804 if (virtio_mem_has_shared_zeropage(rb)) { 805 vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF; 806 } else { 807 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON; 808 } 809 break; 810 case ON_OFF_AUTO_OFF: 811 if (!virtio_mem_has_shared_zeropage(rb)) { 812 warn_report("'%s' property set to 'off' with a memdev that does" 813 " not support the shared zeropage.", 814 VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP); 815 } 816 break; 817 default: 818 break; 819 } 820 #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ 821 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON; 822 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ 823 824 /* 825 * If the block size wasn't configured by the user, use a sane default. This 826 * allows using hugetlbfs backends of any page size without manual 827 * intervention. 828 */ 829 if (!vmem->block_size) { 830 vmem->block_size = virtio_mem_default_block_size(rb); 831 } 832 833 if (vmem->block_size < page_size) { 834 error_setg(errp, "'%s' property has to be at least the page size (0x%" 835 PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size); 836 return; 837 } else if (vmem->block_size < virtio_mem_default_block_size(rb)) { 838 warn_report("'%s' property is smaller than the default block size (%" 839 PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP, 840 virtio_mem_default_block_size(rb) / MiB); 841 } 842 if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) { 843 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64 844 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP, 845 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 846 return; 847 } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) { 848 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64 849 ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP, 850 vmem->block_size); 851 return; 852 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr), 853 vmem->block_size)) { 854 error_setg(errp, "'%s' property memdev size has to be multiples of" 855 "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP, 856 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 857 return; 858 } 859 860 if (ram_block_coordinated_discard_require(true)) { 861 error_setg(errp, "Discarding RAM is disabled"); 862 return; 863 } 864 865 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); 866 if (ret) { 867 error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); 868 ram_block_coordinated_discard_require(false); 869 return; 870 } 871 872 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 873 874 vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) / 875 vmem->block_size; 876 vmem->bitmap = bitmap_new(vmem->bitmap_size); 877 878 virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config)); 879 vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request); 880 881 host_memory_backend_set_mapped(vmem->memdev, true); 882 vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem)); 883 if (vmem->early_migration) { 884 vmstate_register(VMSTATE_IF(vmem), VMSTATE_INSTANCE_ID_ANY, 885 &vmstate_virtio_mem_device_early, vmem); 886 } 887 qemu_register_reset(virtio_mem_system_reset, vmem); 888 889 /* 890 * Set ourselves as RamDiscardManager before the plug handler maps the 891 * memory region and exposes it via an address space. 892 */ 893 memory_region_set_ram_discard_manager(&vmem->memdev->mr, 894 RAM_DISCARD_MANAGER(vmem)); 895 } 896 897 static void virtio_mem_device_unrealize(DeviceState *dev) 898 { 899 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 900 VirtIOMEM *vmem = VIRTIO_MEM(dev); 901 902 /* 903 * The unplug handler unmapped the memory region, it cannot be 904 * found via an address space anymore. Unset ourselves. 905 */ 906 memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); 907 qemu_unregister_reset(virtio_mem_system_reset, vmem); 908 if (vmem->early_migration) { 909 vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early, 910 vmem); 911 } 912 vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem)); 913 host_memory_backend_set_mapped(vmem->memdev, false); 914 virtio_del_queue(vdev, 0); 915 virtio_cleanup(vdev); 916 g_free(vmem->bitmap); 917 ram_block_coordinated_discard_require(false); 918 } 919 920 static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg, 921 uint64_t offset, uint64_t size) 922 { 923 RAMBlock *rb = vmem->memdev->mr.ram_block; 924 925 return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0; 926 } 927 928 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem) 929 { 930 /* Make sure all memory is really discarded after migration. */ 931 return virtio_mem_for_each_unplugged_range(vmem, NULL, 932 virtio_mem_discard_range_cb); 933 } 934 935 static int virtio_mem_post_load(void *opaque, int version_id) 936 { 937 VirtIOMEM *vmem = VIRTIO_MEM(opaque); 938 RamDiscardListener *rdl; 939 int ret; 940 941 /* 942 * We started out with all memory discarded and our memory region is mapped 943 * into an address space. Replay, now that we updated the bitmap. 944 */ 945 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 946 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 947 virtio_mem_notify_populate_cb); 948 if (ret) { 949 return ret; 950 } 951 } 952 953 if (migration_in_incoming_postcopy()) { 954 return 0; 955 } 956 957 return virtio_mem_restore_unplugged(vmem); 958 } 959 960 typedef struct VirtIOMEMMigSanityChecks { 961 VirtIOMEM *parent; 962 uint64_t addr; 963 uint64_t region_size; 964 uint64_t block_size; 965 uint32_t node; 966 } VirtIOMEMMigSanityChecks; 967 968 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque) 969 { 970 VirtIOMEMMigSanityChecks *tmp = opaque; 971 VirtIOMEM *vmem = tmp->parent; 972 973 tmp->addr = vmem->addr; 974 tmp->region_size = memory_region_size(&vmem->memdev->mr); 975 tmp->block_size = vmem->block_size; 976 tmp->node = vmem->node; 977 return 0; 978 } 979 980 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id) 981 { 982 VirtIOMEMMigSanityChecks *tmp = opaque; 983 VirtIOMEM *vmem = tmp->parent; 984 const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr); 985 986 if (tmp->addr != vmem->addr) { 987 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 988 VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr); 989 return -EINVAL; 990 } 991 /* 992 * Note: Preparation for resizeable memory regions. The maximum size 993 * of the memory region must not change during migration. 994 */ 995 if (tmp->region_size != new_region_size) { 996 error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%" 997 PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size, 998 new_region_size); 999 return -EINVAL; 1000 } 1001 if (tmp->block_size != vmem->block_size) { 1002 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 1003 VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size, 1004 vmem->block_size); 1005 return -EINVAL; 1006 } 1007 if (tmp->node != vmem->node) { 1008 error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32, 1009 VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node); 1010 return -EINVAL; 1011 } 1012 return 0; 1013 } 1014 1015 static const VMStateDescription vmstate_virtio_mem_sanity_checks = { 1016 .name = "virtio-mem-device/sanity-checks", 1017 .pre_save = virtio_mem_mig_sanity_checks_pre_save, 1018 .post_load = virtio_mem_mig_sanity_checks_post_load, 1019 .fields = (VMStateField[]) { 1020 VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks), 1021 VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks), 1022 VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks), 1023 VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks), 1024 VMSTATE_END_OF_LIST(), 1025 }, 1026 }; 1027 1028 static bool virtio_mem_vmstate_field_exists(void *opaque, int version_id) 1029 { 1030 const VirtIOMEM *vmem = VIRTIO_MEM(opaque); 1031 1032 /* With early migration, these fields were already migrated. */ 1033 return !vmem->early_migration; 1034 } 1035 1036 static const VMStateDescription vmstate_virtio_mem_device = { 1037 .name = "virtio-mem-device", 1038 .minimum_version_id = 1, 1039 .version_id = 1, 1040 .priority = MIG_PRI_VIRTIO_MEM, 1041 .post_load = virtio_mem_post_load, 1042 .fields = (VMStateField[]) { 1043 VMSTATE_WITH_TMP_TEST(VirtIOMEM, virtio_mem_vmstate_field_exists, 1044 VirtIOMEMMigSanityChecks, 1045 vmstate_virtio_mem_sanity_checks), 1046 VMSTATE_UINT64(usable_region_size, VirtIOMEM), 1047 VMSTATE_UINT64_TEST(size, VirtIOMEM, virtio_mem_vmstate_field_exists), 1048 VMSTATE_UINT64(requested_size, VirtIOMEM), 1049 VMSTATE_BITMAP_TEST(bitmap, VirtIOMEM, virtio_mem_vmstate_field_exists, 1050 0, bitmap_size), 1051 VMSTATE_END_OF_LIST() 1052 }, 1053 }; 1054 1055 /* 1056 * Transfer properties that are immutable while migration is active early, 1057 * such that we have have this information around before migrating any RAM 1058 * content. 1059 * 1060 * Note that virtio_mem_is_busy() makes sure these properties can no longer 1061 * change on the migration source until migration completed. 1062 * 1063 * With QEMU compat machines, we transmit these properties later, via 1064 * vmstate_virtio_mem_device instead -- see virtio_mem_vmstate_field_exists(). 1065 */ 1066 static const VMStateDescription vmstate_virtio_mem_device_early = { 1067 .name = "virtio-mem-device-early", 1068 .minimum_version_id = 1, 1069 .version_id = 1, 1070 .early_setup = true, 1071 .fields = (VMStateField[]) { 1072 VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks, 1073 vmstate_virtio_mem_sanity_checks), 1074 VMSTATE_UINT64(size, VirtIOMEM), 1075 VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size), 1076 VMSTATE_END_OF_LIST() 1077 }, 1078 }; 1079 1080 static const VMStateDescription vmstate_virtio_mem = { 1081 .name = "virtio-mem", 1082 .minimum_version_id = 1, 1083 .version_id = 1, 1084 .fields = (VMStateField[]) { 1085 VMSTATE_VIRTIO_DEVICE, 1086 VMSTATE_END_OF_LIST() 1087 }, 1088 }; 1089 1090 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem, 1091 VirtioMEMDeviceInfo *vi) 1092 { 1093 vi->memaddr = vmem->addr; 1094 vi->node = vmem->node; 1095 vi->requested_size = vmem->requested_size; 1096 vi->size = vmem->size; 1097 vi->max_size = memory_region_size(&vmem->memdev->mr); 1098 vi->block_size = vmem->block_size; 1099 vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev)); 1100 } 1101 1102 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp) 1103 { 1104 if (!vmem->memdev) { 1105 error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP); 1106 return NULL; 1107 } 1108 1109 return &vmem->memdev->mr; 1110 } 1111 1112 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem, 1113 Notifier *notifier) 1114 { 1115 notifier_list_add(&vmem->size_change_notifiers, notifier); 1116 } 1117 1118 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem, 1119 Notifier *notifier) 1120 { 1121 notifier_remove(notifier); 1122 } 1123 1124 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name, 1125 void *opaque, Error **errp) 1126 { 1127 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 1128 uint64_t value = vmem->size; 1129 1130 visit_type_size(v, name, &value, errp); 1131 } 1132 1133 static void virtio_mem_get_requested_size(Object *obj, Visitor *v, 1134 const char *name, void *opaque, 1135 Error **errp) 1136 { 1137 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 1138 uint64_t value = vmem->requested_size; 1139 1140 visit_type_size(v, name, &value, errp); 1141 } 1142 1143 static void virtio_mem_set_requested_size(Object *obj, Visitor *v, 1144 const char *name, void *opaque, 1145 Error **errp) 1146 { 1147 VirtIOMEM *vmem = VIRTIO_MEM(obj); 1148 uint64_t value; 1149 1150 if (!visit_type_size(v, name, &value, errp)) { 1151 return; 1152 } 1153 1154 /* 1155 * The block size and memory backend are not fixed until the device was 1156 * realized. realize() will verify these properties then. 1157 */ 1158 if (DEVICE(obj)->realized) { 1159 if (!QEMU_IS_ALIGNED(value, vmem->block_size)) { 1160 error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64 1161 ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP, 1162 vmem->block_size); 1163 return; 1164 } else if (value > memory_region_size(&vmem->memdev->mr)) { 1165 error_setg(errp, "'%s' cannot exceed the memory backend size" 1166 "(0x%" PRIx64 ")", name, 1167 memory_region_size(&vmem->memdev->mr)); 1168 return; 1169 } 1170 1171 if (value != vmem->requested_size) { 1172 virtio_mem_resize_usable_region(vmem, value, false); 1173 vmem->requested_size = value; 1174 } 1175 /* 1176 * Trigger a config update so the guest gets notified. We trigger 1177 * even if the size didn't change (especially helpful for debugging). 1178 */ 1179 virtio_notify_config(VIRTIO_DEVICE(vmem)); 1180 } else { 1181 vmem->requested_size = value; 1182 } 1183 } 1184 1185 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name, 1186 void *opaque, Error **errp) 1187 { 1188 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 1189 uint64_t value = vmem->block_size; 1190 1191 /* 1192 * If not configured by the user (and we're not realized yet), use the 1193 * default block size we would use with the current memory backend. 1194 */ 1195 if (!value) { 1196 if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) { 1197 value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block); 1198 } else { 1199 value = virtio_mem_thp_size(); 1200 } 1201 } 1202 1203 visit_type_size(v, name, &value, errp); 1204 } 1205 1206 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name, 1207 void *opaque, Error **errp) 1208 { 1209 VirtIOMEM *vmem = VIRTIO_MEM(obj); 1210 uint64_t value; 1211 1212 if (DEVICE(obj)->realized) { 1213 error_setg(errp, "'%s' cannot be changed", name); 1214 return; 1215 } 1216 1217 if (!visit_type_size(v, name, &value, errp)) { 1218 return; 1219 } 1220 1221 if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) { 1222 error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name, 1223 VIRTIO_MEM_MIN_BLOCK_SIZE); 1224 return; 1225 } else if (!is_power_of_2(value)) { 1226 error_setg(errp, "'%s' property has to be a power of two", name); 1227 return; 1228 } 1229 vmem->block_size = value; 1230 } 1231 1232 static void virtio_mem_instance_init(Object *obj) 1233 { 1234 VirtIOMEM *vmem = VIRTIO_MEM(obj); 1235 1236 notifier_list_init(&vmem->size_change_notifiers); 1237 QLIST_INIT(&vmem->rdl_list); 1238 1239 object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size, 1240 NULL, NULL, NULL); 1241 object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size", 1242 virtio_mem_get_requested_size, 1243 virtio_mem_set_requested_size, NULL, NULL); 1244 object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size", 1245 virtio_mem_get_block_size, virtio_mem_set_block_size, 1246 NULL, NULL); 1247 } 1248 1249 static Property virtio_mem_properties[] = { 1250 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0), 1251 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0), 1252 DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false), 1253 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev, 1254 TYPE_MEMORY_BACKEND, HostMemoryBackend *), 1255 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS) 1256 DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM, 1257 unplugged_inaccessible, ON_OFF_AUTO_AUTO), 1258 #endif 1259 DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, 1260 early_migration, true), 1261 DEFINE_PROP_END_OF_LIST(), 1262 }; 1263 1264 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm, 1265 const MemoryRegion *mr) 1266 { 1267 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1268 1269 g_assert(mr == &vmem->memdev->mr); 1270 return vmem->block_size; 1271 } 1272 1273 static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, 1274 const MemoryRegionSection *s) 1275 { 1276 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1277 uint64_t start_gpa = vmem->addr + s->offset_within_region; 1278 uint64_t end_gpa = start_gpa + int128_get64(s->size); 1279 1280 g_assert(s->mr == &vmem->memdev->mr); 1281 1282 start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size); 1283 end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size); 1284 1285 if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) { 1286 return false; 1287 } 1288 1289 return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true); 1290 } 1291 1292 struct VirtIOMEMReplayData { 1293 void *fn; 1294 void *opaque; 1295 }; 1296 1297 static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) 1298 { 1299 struct VirtIOMEMReplayData *data = arg; 1300 1301 return ((ReplayRamPopulate)data->fn)(s, data->opaque); 1302 } 1303 1304 static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, 1305 MemoryRegionSection *s, 1306 ReplayRamPopulate replay_fn, 1307 void *opaque) 1308 { 1309 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1310 struct VirtIOMEMReplayData data = { 1311 .fn = replay_fn, 1312 .opaque = opaque, 1313 }; 1314 1315 g_assert(s->mr == &vmem->memdev->mr); 1316 return virtio_mem_for_each_plugged_section(vmem, s, &data, 1317 virtio_mem_rdm_replay_populated_cb); 1318 } 1319 1320 static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, 1321 void *arg) 1322 { 1323 struct VirtIOMEMReplayData *data = arg; 1324 1325 ((ReplayRamDiscard)data->fn)(s, data->opaque); 1326 return 0; 1327 } 1328 1329 static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, 1330 MemoryRegionSection *s, 1331 ReplayRamDiscard replay_fn, 1332 void *opaque) 1333 { 1334 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1335 struct VirtIOMEMReplayData data = { 1336 .fn = replay_fn, 1337 .opaque = opaque, 1338 }; 1339 1340 g_assert(s->mr == &vmem->memdev->mr); 1341 virtio_mem_for_each_unplugged_section(vmem, s, &data, 1342 virtio_mem_rdm_replay_discarded_cb); 1343 } 1344 1345 static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, 1346 RamDiscardListener *rdl, 1347 MemoryRegionSection *s) 1348 { 1349 VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1350 int ret; 1351 1352 g_assert(s->mr == &vmem->memdev->mr); 1353 rdl->section = memory_region_section_new_copy(s); 1354 1355 QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next); 1356 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 1357 virtio_mem_notify_populate_cb); 1358 if (ret) { 1359 error_report("%s: Replaying plugged ranges failed: %s", __func__, 1360 strerror(-ret)); 1361 } 1362 } 1363 1364 static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm, 1365 RamDiscardListener *rdl) 1366 { 1367 VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1368 1369 g_assert(rdl->section->mr == &vmem->memdev->mr); 1370 if (vmem->size) { 1371 if (rdl->double_discard_supported) { 1372 rdl->notify_discard(rdl, rdl->section); 1373 } else { 1374 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 1375 virtio_mem_notify_discard_cb); 1376 } 1377 } 1378 1379 memory_region_section_free_copy(rdl->section); 1380 rdl->section = NULL; 1381 QLIST_REMOVE(rdl, next); 1382 } 1383 1384 static void virtio_mem_class_init(ObjectClass *klass, void *data) 1385 { 1386 DeviceClass *dc = DEVICE_CLASS(klass); 1387 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 1388 VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass); 1389 RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass); 1390 1391 device_class_set_props(dc, virtio_mem_properties); 1392 dc->vmsd = &vmstate_virtio_mem; 1393 1394 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1395 vdc->realize = virtio_mem_device_realize; 1396 vdc->unrealize = virtio_mem_device_unrealize; 1397 vdc->get_config = virtio_mem_get_config; 1398 vdc->get_features = virtio_mem_get_features; 1399 vdc->validate_features = virtio_mem_validate_features; 1400 vdc->vmsd = &vmstate_virtio_mem_device; 1401 1402 vmc->fill_device_info = virtio_mem_fill_device_info; 1403 vmc->get_memory_region = virtio_mem_get_memory_region; 1404 vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier; 1405 vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; 1406 1407 rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity; 1408 rdmc->is_populated = virtio_mem_rdm_is_populated; 1409 rdmc->replay_populated = virtio_mem_rdm_replay_populated; 1410 rdmc->replay_discarded = virtio_mem_rdm_replay_discarded; 1411 rdmc->register_listener = virtio_mem_rdm_register_listener; 1412 rdmc->unregister_listener = virtio_mem_rdm_unregister_listener; 1413 } 1414 1415 static const TypeInfo virtio_mem_info = { 1416 .name = TYPE_VIRTIO_MEM, 1417 .parent = TYPE_VIRTIO_DEVICE, 1418 .instance_size = sizeof(VirtIOMEM), 1419 .instance_init = virtio_mem_instance_init, 1420 .class_init = virtio_mem_class_init, 1421 .class_size = sizeof(VirtIOMEMClass), 1422 .interfaces = (InterfaceInfo[]) { 1423 { TYPE_RAM_DISCARD_MANAGER }, 1424 { } 1425 }, 1426 }; 1427 1428 static void virtio_register_types(void) 1429 { 1430 type_register_static(&virtio_mem_info); 1431 } 1432 1433 type_init(virtio_register_types) 1434