1 /* 2 * Virtio MEM device 3 * 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qemu-common.h" 15 #include "qemu/iov.h" 16 #include "qemu/cutils.h" 17 #include "qemu/error-report.h" 18 #include "qemu/units.h" 19 #include "sysemu/numa.h" 20 #include "sysemu/sysemu.h" 21 #include "sysemu/reset.h" 22 #include "hw/virtio/virtio.h" 23 #include "hw/virtio/virtio-bus.h" 24 #include "hw/virtio/virtio-access.h" 25 #include "hw/virtio/virtio-mem.h" 26 #include "qapi/error.h" 27 #include "qapi/visitor.h" 28 #include "exec/ram_addr.h" 29 #include "migration/misc.h" 30 #include "hw/boards.h" 31 #include "hw/qdev-properties.h" 32 #include CONFIG_DEVICES 33 #include "trace.h" 34 35 /* 36 * We only had legacy x86 guests that did not support 37 * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests. 38 */ 39 #if defined(TARGET_X86_64) || defined(TARGET_I386) 40 #define VIRTIO_MEM_HAS_LEGACY_GUESTS 41 #endif 42 43 /* 44 * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking 45 * bitmap small. 46 */ 47 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB)) 48 49 static uint32_t virtio_mem_default_thp_size(void) 50 { 51 uint32_t default_thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE; 52 53 #if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__) 54 default_thp_size = 2 * MiB; 55 #elif defined(__aarch64__) 56 if (qemu_real_host_page_size == 4 * KiB) { 57 default_thp_size = 2 * MiB; 58 } else if (qemu_real_host_page_size == 16 * KiB) { 59 default_thp_size = 32 * MiB; 60 } else if (qemu_real_host_page_size == 64 * KiB) { 61 default_thp_size = 512 * MiB; 62 } 63 #endif 64 65 return default_thp_size; 66 } 67 68 /* 69 * We want to have a reasonable default block size such that 70 * 1. We avoid splitting THPs when unplugging memory, which degrades 71 * performance. 72 * 2. We avoid placing THPs for plugged blocks that also cover unplugged 73 * blocks. 74 * 75 * The actual THP size might differ between Linux kernels, so we try to probe 76 * it. In the future (if we ever run into issues regarding 2.), we might want 77 * to disable THP in case we fail to properly probe the THP size, or if the 78 * block size is configured smaller than the THP size. 79 */ 80 static uint32_t thp_size; 81 82 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" 83 static uint32_t virtio_mem_thp_size(void) 84 { 85 gchar *content = NULL; 86 const char *endptr; 87 uint64_t tmp; 88 89 if (thp_size) { 90 return thp_size; 91 } 92 93 /* 94 * Try to probe the actual THP size, fallback to (sane but eventually 95 * incorrect) default sizes. 96 */ 97 if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) && 98 !qemu_strtou64(content, &endptr, 0, &tmp) && 99 (!endptr || *endptr == '\n')) { 100 /* Sanity-check the value and fallback to something reasonable. */ 101 if (!tmp || !is_power_of_2(tmp)) { 102 warn_report("Read unsupported THP size: %" PRIx64, tmp); 103 } else { 104 thp_size = tmp; 105 } 106 } 107 108 if (!thp_size) { 109 thp_size = virtio_mem_default_thp_size(); 110 warn_report("Could not detect THP size, falling back to %" PRIx64 111 " MiB.", thp_size / MiB); 112 } 113 114 g_free(content); 115 return thp_size; 116 } 117 118 static uint64_t virtio_mem_default_block_size(RAMBlock *rb) 119 { 120 const uint64_t page_size = qemu_ram_pagesize(rb); 121 122 /* We can have hugetlbfs with a page size smaller than the THP size. */ 123 if (page_size == qemu_real_host_page_size) { 124 return MAX(page_size, virtio_mem_thp_size()); 125 } 126 return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE); 127 } 128 129 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS) 130 static bool virtio_mem_has_shared_zeropage(RAMBlock *rb) 131 { 132 /* 133 * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE 134 * anonymous RAM. In any other case, reading unplugged *can* populate a 135 * fresh page, consuming actual memory. 136 */ 137 return !qemu_ram_is_shared(rb) && rb->fd < 0 && 138 qemu_ram_pagesize(rb) == qemu_real_host_page_size; 139 } 140 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ 141 142 /* 143 * Size the usable region bigger than the requested size if possible. Esp. 144 * Linux guests will only add (aligned) memory blocks in case they fully 145 * fit into the usable region, but plug+online only a subset of the pages. 146 * The memory block size corresponds mostly to the section size. 147 * 148 * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and 149 * a section size of 512MB on arm64 (as long as the start address is properly 150 * aligned, similar to ordinary DIMMs). 151 * 152 * We can change this at any time and maybe even make it configurable if 153 * necessary (as the section size can change). But it's more likely that the 154 * section size will rather get smaller and not bigger over time. 155 */ 156 #if defined(TARGET_X86_64) || defined(TARGET_I386) 157 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB)) 158 #elif defined(TARGET_ARM) 159 #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB)) 160 #else 161 #error VIRTIO_MEM_USABLE_EXTENT not defined 162 #endif 163 164 static bool virtio_mem_is_busy(void) 165 { 166 /* 167 * Postcopy cannot handle concurrent discards and we don't want to migrate 168 * pages on-demand with stale content when plugging new blocks. 169 * 170 * For precopy, we don't want unplugged blocks in our migration stream, and 171 * when plugging new blocks, the page content might differ between source 172 * and destination (observable by the guest when not initializing pages 173 * after plugging them) until we're running on the destination (as we didn't 174 * migrate these blocks when they were unplugged). 175 */ 176 return migration_in_incoming_postcopy() || !migration_is_idle(); 177 } 178 179 typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg, 180 uint64_t offset, uint64_t size); 181 182 static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg, 183 virtio_mem_range_cb cb) 184 { 185 unsigned long first_zero_bit, last_zero_bit; 186 uint64_t offset, size; 187 int ret = 0; 188 189 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); 190 while (first_zero_bit < vmem->bitmap_size) { 191 offset = first_zero_bit * vmem->block_size; 192 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 193 first_zero_bit + 1) - 1; 194 size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; 195 196 ret = cb(vmem, arg, offset, size); 197 if (ret) { 198 break; 199 } 200 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 201 last_zero_bit + 2); 202 } 203 return ret; 204 } 205 206 /* 207 * Adjust the memory section to cover the intersection with the given range. 208 * 209 * Returns false if the intersection is empty, otherwise returns true. 210 */ 211 static bool virito_mem_intersect_memory_section(MemoryRegionSection *s, 212 uint64_t offset, uint64_t size) 213 { 214 uint64_t start = MAX(s->offset_within_region, offset); 215 uint64_t end = MIN(s->offset_within_region + int128_get64(s->size), 216 offset + size); 217 218 if (end <= start) { 219 return false; 220 } 221 222 s->offset_within_address_space += start - s->offset_within_region; 223 s->offset_within_region = start; 224 s->size = int128_make64(end - start); 225 return true; 226 } 227 228 typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg); 229 230 static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, 231 MemoryRegionSection *s, 232 void *arg, 233 virtio_mem_section_cb cb) 234 { 235 unsigned long first_bit, last_bit; 236 uint64_t offset, size; 237 int ret = 0; 238 239 first_bit = s->offset_within_region / vmem->bitmap_size; 240 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit); 241 while (first_bit < vmem->bitmap_size) { 242 MemoryRegionSection tmp = *s; 243 244 offset = first_bit * vmem->block_size; 245 last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 246 first_bit + 1) - 1; 247 size = (last_bit - first_bit + 1) * vmem->block_size; 248 249 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) { 250 break; 251 } 252 ret = cb(&tmp, arg); 253 if (ret) { 254 break; 255 } 256 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 257 last_bit + 2); 258 } 259 return ret; 260 } 261 262 static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, 263 MemoryRegionSection *s, 264 void *arg, 265 virtio_mem_section_cb cb) 266 { 267 unsigned long first_bit, last_bit; 268 uint64_t offset, size; 269 int ret = 0; 270 271 first_bit = s->offset_within_region / vmem->bitmap_size; 272 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit); 273 while (first_bit < vmem->bitmap_size) { 274 MemoryRegionSection tmp = *s; 275 276 offset = first_bit * vmem->block_size; 277 last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 278 first_bit + 1) - 1; 279 size = (last_bit - first_bit + 1) * vmem->block_size; 280 281 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) { 282 break; 283 } 284 ret = cb(&tmp, arg); 285 if (ret) { 286 break; 287 } 288 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 289 last_bit + 2); 290 } 291 return ret; 292 } 293 294 static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg) 295 { 296 RamDiscardListener *rdl = arg; 297 298 return rdl->notify_populate(rdl, s); 299 } 300 301 static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg) 302 { 303 RamDiscardListener *rdl = arg; 304 305 rdl->notify_discard(rdl, s); 306 return 0; 307 } 308 309 static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, 310 uint64_t size) 311 { 312 RamDiscardListener *rdl; 313 314 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 315 MemoryRegionSection tmp = *rdl->section; 316 317 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) { 318 continue; 319 } 320 rdl->notify_discard(rdl, &tmp); 321 } 322 } 323 324 static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, 325 uint64_t size) 326 { 327 RamDiscardListener *rdl, *rdl2; 328 int ret = 0; 329 330 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 331 MemoryRegionSection tmp = *rdl->section; 332 333 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) { 334 continue; 335 } 336 ret = rdl->notify_populate(rdl, &tmp); 337 if (ret) { 338 break; 339 } 340 } 341 342 if (ret) { 343 /* Notify all already-notified listeners. */ 344 QLIST_FOREACH(rdl2, &vmem->rdl_list, next) { 345 MemoryRegionSection tmp = *rdl->section; 346 347 if (rdl2 == rdl) { 348 break; 349 } 350 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) { 351 continue; 352 } 353 rdl2->notify_discard(rdl2, &tmp); 354 } 355 } 356 return ret; 357 } 358 359 static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem) 360 { 361 RamDiscardListener *rdl; 362 363 if (!vmem->size) { 364 return; 365 } 366 367 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 368 if (rdl->double_discard_supported) { 369 rdl->notify_discard(rdl, rdl->section); 370 } else { 371 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 372 virtio_mem_notify_discard_cb); 373 } 374 } 375 } 376 377 static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa, 378 uint64_t size, bool plugged) 379 { 380 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; 381 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; 382 unsigned long found_bit; 383 384 /* We fake a shorter bitmap to avoid searching too far. */ 385 if (plugged) { 386 found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); 387 } else { 388 found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); 389 } 390 return found_bit > last_bit; 391 } 392 393 static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, 394 uint64_t size, bool plugged) 395 { 396 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; 397 const unsigned long nbits = size / vmem->block_size; 398 399 if (plugged) { 400 bitmap_set(vmem->bitmap, bit, nbits); 401 } else { 402 bitmap_clear(vmem->bitmap, bit, nbits); 403 } 404 } 405 406 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem, 407 struct virtio_mem_resp *resp) 408 { 409 VirtIODevice *vdev = VIRTIO_DEVICE(vmem); 410 VirtQueue *vq = vmem->vq; 411 412 trace_virtio_mem_send_response(le16_to_cpu(resp->type)); 413 iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp)); 414 415 virtqueue_push(vq, elem, sizeof(*resp)); 416 virtio_notify(vdev, vq); 417 } 418 419 static void virtio_mem_send_response_simple(VirtIOMEM *vmem, 420 VirtQueueElement *elem, 421 uint16_t type) 422 { 423 struct virtio_mem_resp resp = { 424 .type = cpu_to_le16(type), 425 }; 426 427 virtio_mem_send_response(vmem, elem, &resp); 428 } 429 430 static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa, 431 uint64_t size) 432 { 433 if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) { 434 return false; 435 } 436 if (gpa + size < gpa || !size) { 437 return false; 438 } 439 if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) { 440 return false; 441 } 442 if (gpa + size > vmem->addr + vmem->usable_region_size) { 443 return false; 444 } 445 return true; 446 } 447 448 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, 449 uint64_t size, bool plug) 450 { 451 const uint64_t offset = start_gpa - vmem->addr; 452 RAMBlock *rb = vmem->memdev->mr.ram_block; 453 454 if (virtio_mem_is_busy()) { 455 return -EBUSY; 456 } 457 458 if (!plug) { 459 if (ram_block_discard_range(rb, offset, size)) { 460 return -EBUSY; 461 } 462 virtio_mem_notify_unplug(vmem, offset, size); 463 } else { 464 int ret = 0; 465 466 if (vmem->prealloc) { 467 void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; 468 int fd = memory_region_get_fd(&vmem->memdev->mr); 469 Error *local_err = NULL; 470 471 os_mem_prealloc(fd, area, size, 1, &local_err); 472 if (local_err) { 473 static bool warned; 474 475 /* 476 * Warn only once, we don't want to fill the log with these 477 * warnings. 478 */ 479 if (!warned) { 480 warn_report_err(local_err); 481 warned = true; 482 } else { 483 error_free(local_err); 484 } 485 ret = -EBUSY; 486 } 487 } 488 if (!ret) { 489 ret = virtio_mem_notify_plug(vmem, offset, size); 490 } 491 492 if (ret) { 493 /* Could be preallocation or a notifier populated memory. */ 494 ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); 495 return -EBUSY; 496 } 497 } 498 virtio_mem_set_bitmap(vmem, start_gpa, size, plug); 499 return 0; 500 } 501 502 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa, 503 uint16_t nb_blocks, bool plug) 504 { 505 const uint64_t size = nb_blocks * vmem->block_size; 506 int ret; 507 508 if (!virtio_mem_valid_range(vmem, gpa, size)) { 509 return VIRTIO_MEM_RESP_ERROR; 510 } 511 512 if (plug && (vmem->size + size > vmem->requested_size)) { 513 return VIRTIO_MEM_RESP_NACK; 514 } 515 516 /* test if really all blocks are in the opposite state */ 517 if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) { 518 return VIRTIO_MEM_RESP_ERROR; 519 } 520 521 ret = virtio_mem_set_block_state(vmem, gpa, size, plug); 522 if (ret) { 523 return VIRTIO_MEM_RESP_BUSY; 524 } 525 if (plug) { 526 vmem->size += size; 527 } else { 528 vmem->size -= size; 529 } 530 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 531 return VIRTIO_MEM_RESP_ACK; 532 } 533 534 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 535 struct virtio_mem_req *req) 536 { 537 const uint64_t gpa = le64_to_cpu(req->u.plug.addr); 538 const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks); 539 uint16_t type; 540 541 trace_virtio_mem_plug_request(gpa, nb_blocks); 542 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true); 543 virtio_mem_send_response_simple(vmem, elem, type); 544 } 545 546 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 547 struct virtio_mem_req *req) 548 { 549 const uint64_t gpa = le64_to_cpu(req->u.unplug.addr); 550 const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks); 551 uint16_t type; 552 553 trace_virtio_mem_unplug_request(gpa, nb_blocks); 554 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false); 555 virtio_mem_send_response_simple(vmem, elem, type); 556 } 557 558 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem, 559 uint64_t requested_size, 560 bool can_shrink) 561 { 562 uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr), 563 requested_size + VIRTIO_MEM_USABLE_EXTENT); 564 565 /* The usable region size always has to be multiples of the block size. */ 566 newsize = QEMU_ALIGN_UP(newsize, vmem->block_size); 567 568 if (!requested_size) { 569 newsize = 0; 570 } 571 572 if (newsize < vmem->usable_region_size && !can_shrink) { 573 return; 574 } 575 576 trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize); 577 vmem->usable_region_size = newsize; 578 } 579 580 static int virtio_mem_unplug_all(VirtIOMEM *vmem) 581 { 582 RAMBlock *rb = vmem->memdev->mr.ram_block; 583 584 if (virtio_mem_is_busy()) { 585 return -EBUSY; 586 } 587 588 if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) { 589 return -EBUSY; 590 } 591 virtio_mem_notify_unplug_all(vmem); 592 593 bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); 594 if (vmem->size) { 595 vmem->size = 0; 596 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 597 } 598 trace_virtio_mem_unplugged_all(); 599 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 600 return 0; 601 } 602 603 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem, 604 VirtQueueElement *elem) 605 { 606 trace_virtio_mem_unplug_all_request(); 607 if (virtio_mem_unplug_all(vmem)) { 608 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY); 609 } else { 610 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK); 611 } 612 } 613 614 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem, 615 struct virtio_mem_req *req) 616 { 617 const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks); 618 const uint64_t gpa = le64_to_cpu(req->u.state.addr); 619 const uint64_t size = nb_blocks * vmem->block_size; 620 struct virtio_mem_resp resp = { 621 .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK), 622 }; 623 624 trace_virtio_mem_state_request(gpa, nb_blocks); 625 if (!virtio_mem_valid_range(vmem, gpa, size)) { 626 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR); 627 return; 628 } 629 630 if (virtio_mem_test_bitmap(vmem, gpa, size, true)) { 631 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED); 632 } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) { 633 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED); 634 } else { 635 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED); 636 } 637 trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state)); 638 virtio_mem_send_response(vmem, elem, &resp); 639 } 640 641 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq) 642 { 643 const int len = sizeof(struct virtio_mem_req); 644 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 645 VirtQueueElement *elem; 646 struct virtio_mem_req req; 647 uint16_t type; 648 649 while (true) { 650 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 651 if (!elem) { 652 return; 653 } 654 655 if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) { 656 virtio_error(vdev, "virtio-mem protocol violation: invalid request" 657 " size: %d", len); 658 virtqueue_detach_element(vq, elem, 0); 659 g_free(elem); 660 return; 661 } 662 663 if (iov_size(elem->in_sg, elem->in_num) < 664 sizeof(struct virtio_mem_resp)) { 665 virtio_error(vdev, "virtio-mem protocol violation: not enough space" 666 " for response: %zu", 667 iov_size(elem->in_sg, elem->in_num)); 668 virtqueue_detach_element(vq, elem, 0); 669 g_free(elem); 670 return; 671 } 672 673 type = le16_to_cpu(req.type); 674 switch (type) { 675 case VIRTIO_MEM_REQ_PLUG: 676 virtio_mem_plug_request(vmem, elem, &req); 677 break; 678 case VIRTIO_MEM_REQ_UNPLUG: 679 virtio_mem_unplug_request(vmem, elem, &req); 680 break; 681 case VIRTIO_MEM_REQ_UNPLUG_ALL: 682 virtio_mem_unplug_all_request(vmem, elem); 683 break; 684 case VIRTIO_MEM_REQ_STATE: 685 virtio_mem_state_request(vmem, elem, &req); 686 break; 687 default: 688 virtio_error(vdev, "virtio-mem protocol violation: unknown request" 689 " type: %d", type); 690 virtqueue_detach_element(vq, elem, 0); 691 g_free(elem); 692 return; 693 } 694 695 g_free(elem); 696 } 697 } 698 699 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data) 700 { 701 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 702 struct virtio_mem_config *config = (void *) config_data; 703 704 config->block_size = cpu_to_le64(vmem->block_size); 705 config->node_id = cpu_to_le16(vmem->node); 706 config->requested_size = cpu_to_le64(vmem->requested_size); 707 config->plugged_size = cpu_to_le64(vmem->size); 708 config->addr = cpu_to_le64(vmem->addr); 709 config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr)); 710 config->usable_region_size = cpu_to_le64(vmem->usable_region_size); 711 } 712 713 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features, 714 Error **errp) 715 { 716 MachineState *ms = MACHINE(qdev_get_machine()); 717 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 718 719 if (ms->numa_state) { 720 #if defined(CONFIG_ACPI) 721 virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM); 722 #endif 723 } 724 assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO); 725 if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) { 726 virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE); 727 } 728 return features; 729 } 730 731 static int virtio_mem_validate_features(VirtIODevice *vdev) 732 { 733 if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) && 734 !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) { 735 return -EFAULT; 736 } 737 return 0; 738 } 739 740 static void virtio_mem_system_reset(void *opaque) 741 { 742 VirtIOMEM *vmem = VIRTIO_MEM(opaque); 743 744 /* 745 * During usual resets, we will unplug all memory and shrink the usable 746 * region size. This is, however, not possible in all scenarios. Then, 747 * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL). 748 */ 749 virtio_mem_unplug_all(vmem); 750 } 751 752 static void virtio_mem_device_realize(DeviceState *dev, Error **errp) 753 { 754 MachineState *ms = MACHINE(qdev_get_machine()); 755 int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0; 756 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 757 VirtIOMEM *vmem = VIRTIO_MEM(dev); 758 uint64_t page_size; 759 RAMBlock *rb; 760 int ret; 761 762 if (!vmem->memdev) { 763 error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP); 764 return; 765 } else if (host_memory_backend_is_mapped(vmem->memdev)) { 766 error_setg(errp, "'%s' property specifies a busy memdev: %s", 767 VIRTIO_MEM_MEMDEV_PROP, 768 object_get_canonical_path_component(OBJECT(vmem->memdev))); 769 return; 770 } else if (!memory_region_is_ram(&vmem->memdev->mr) || 771 memory_region_is_rom(&vmem->memdev->mr) || 772 !vmem->memdev->mr.ram_block) { 773 error_setg(errp, "'%s' property specifies an unsupported memdev", 774 VIRTIO_MEM_MEMDEV_PROP); 775 return; 776 } 777 778 if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) || 779 (!nb_numa_nodes && vmem->node)) { 780 error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds" 781 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP, 782 vmem->node, nb_numa_nodes ? nb_numa_nodes : 1); 783 return; 784 } 785 786 if (enable_mlock) { 787 error_setg(errp, "Incompatible with mlock"); 788 return; 789 } 790 791 rb = vmem->memdev->mr.ram_block; 792 page_size = qemu_ram_pagesize(rb); 793 794 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS) 795 switch (vmem->unplugged_inaccessible) { 796 case ON_OFF_AUTO_AUTO: 797 if (virtio_mem_has_shared_zeropage(rb)) { 798 vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF; 799 } else { 800 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON; 801 } 802 break; 803 case ON_OFF_AUTO_OFF: 804 if (!virtio_mem_has_shared_zeropage(rb)) { 805 warn_report("'%s' property set to 'off' with a memdev that does" 806 " not support the shared zeropage.", 807 VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP); 808 } 809 break; 810 default: 811 break; 812 } 813 #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ 814 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON; 815 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ 816 817 /* 818 * If the block size wasn't configured by the user, use a sane default. This 819 * allows using hugetlbfs backends of any page size without manual 820 * intervention. 821 */ 822 if (!vmem->block_size) { 823 vmem->block_size = virtio_mem_default_block_size(rb); 824 } 825 826 if (vmem->block_size < page_size) { 827 error_setg(errp, "'%s' property has to be at least the page size (0x%" 828 PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size); 829 return; 830 } else if (vmem->block_size < virtio_mem_default_block_size(rb)) { 831 warn_report("'%s' property is smaller than the default block size (%" 832 PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP, 833 virtio_mem_default_block_size(rb) / MiB); 834 } 835 if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) { 836 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64 837 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP, 838 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 839 return; 840 } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) { 841 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64 842 ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP, 843 vmem->block_size); 844 return; 845 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr), 846 vmem->block_size)) { 847 error_setg(errp, "'%s' property memdev size has to be multiples of" 848 "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP, 849 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 850 return; 851 } 852 853 if (ram_block_coordinated_discard_require(true)) { 854 error_setg(errp, "Discarding RAM is disabled"); 855 return; 856 } 857 858 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); 859 if (ret) { 860 error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); 861 ram_block_coordinated_discard_require(false); 862 return; 863 } 864 865 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 866 867 vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) / 868 vmem->block_size; 869 vmem->bitmap = bitmap_new(vmem->bitmap_size); 870 871 virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM, 872 sizeof(struct virtio_mem_config)); 873 vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request); 874 875 host_memory_backend_set_mapped(vmem->memdev, true); 876 vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem)); 877 qemu_register_reset(virtio_mem_system_reset, vmem); 878 879 /* 880 * Set ourselves as RamDiscardManager before the plug handler maps the 881 * memory region and exposes it via an address space. 882 */ 883 memory_region_set_ram_discard_manager(&vmem->memdev->mr, 884 RAM_DISCARD_MANAGER(vmem)); 885 } 886 887 static void virtio_mem_device_unrealize(DeviceState *dev) 888 { 889 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 890 VirtIOMEM *vmem = VIRTIO_MEM(dev); 891 892 /* 893 * The unplug handler unmapped the memory region, it cannot be 894 * found via an address space anymore. Unset ourselves. 895 */ 896 memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); 897 qemu_unregister_reset(virtio_mem_system_reset, vmem); 898 vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem)); 899 host_memory_backend_set_mapped(vmem->memdev, false); 900 virtio_del_queue(vdev, 0); 901 virtio_cleanup(vdev); 902 g_free(vmem->bitmap); 903 ram_block_coordinated_discard_require(false); 904 } 905 906 static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg, 907 uint64_t offset, uint64_t size) 908 { 909 RAMBlock *rb = vmem->memdev->mr.ram_block; 910 911 return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0; 912 } 913 914 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem) 915 { 916 /* Make sure all memory is really discarded after migration. */ 917 return virtio_mem_for_each_unplugged_range(vmem, NULL, 918 virtio_mem_discard_range_cb); 919 } 920 921 static int virtio_mem_post_load(void *opaque, int version_id) 922 { 923 VirtIOMEM *vmem = VIRTIO_MEM(opaque); 924 RamDiscardListener *rdl; 925 int ret; 926 927 /* 928 * We started out with all memory discarded and our memory region is mapped 929 * into an address space. Replay, now that we updated the bitmap. 930 */ 931 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 932 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 933 virtio_mem_notify_populate_cb); 934 if (ret) { 935 return ret; 936 } 937 } 938 939 if (migration_in_incoming_postcopy()) { 940 return 0; 941 } 942 943 return virtio_mem_restore_unplugged(vmem); 944 } 945 946 typedef struct VirtIOMEMMigSanityChecks { 947 VirtIOMEM *parent; 948 uint64_t addr; 949 uint64_t region_size; 950 uint64_t block_size; 951 uint32_t node; 952 } VirtIOMEMMigSanityChecks; 953 954 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque) 955 { 956 VirtIOMEMMigSanityChecks *tmp = opaque; 957 VirtIOMEM *vmem = tmp->parent; 958 959 tmp->addr = vmem->addr; 960 tmp->region_size = memory_region_size(&vmem->memdev->mr); 961 tmp->block_size = vmem->block_size; 962 tmp->node = vmem->node; 963 return 0; 964 } 965 966 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id) 967 { 968 VirtIOMEMMigSanityChecks *tmp = opaque; 969 VirtIOMEM *vmem = tmp->parent; 970 const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr); 971 972 if (tmp->addr != vmem->addr) { 973 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 974 VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr); 975 return -EINVAL; 976 } 977 /* 978 * Note: Preparation for resizeable memory regions. The maximum size 979 * of the memory region must not change during migration. 980 */ 981 if (tmp->region_size != new_region_size) { 982 error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%" 983 PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size, 984 new_region_size); 985 return -EINVAL; 986 } 987 if (tmp->block_size != vmem->block_size) { 988 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 989 VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size, 990 vmem->block_size); 991 return -EINVAL; 992 } 993 if (tmp->node != vmem->node) { 994 error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32, 995 VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node); 996 return -EINVAL; 997 } 998 return 0; 999 } 1000 1001 static const VMStateDescription vmstate_virtio_mem_sanity_checks = { 1002 .name = "virtio-mem-device/sanity-checks", 1003 .pre_save = virtio_mem_mig_sanity_checks_pre_save, 1004 .post_load = virtio_mem_mig_sanity_checks_post_load, 1005 .fields = (VMStateField[]) { 1006 VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks), 1007 VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks), 1008 VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks), 1009 VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks), 1010 VMSTATE_END_OF_LIST(), 1011 }, 1012 }; 1013 1014 static const VMStateDescription vmstate_virtio_mem_device = { 1015 .name = "virtio-mem-device", 1016 .minimum_version_id = 1, 1017 .version_id = 1, 1018 .priority = MIG_PRI_VIRTIO_MEM, 1019 .post_load = virtio_mem_post_load, 1020 .fields = (VMStateField[]) { 1021 VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks, 1022 vmstate_virtio_mem_sanity_checks), 1023 VMSTATE_UINT64(usable_region_size, VirtIOMEM), 1024 VMSTATE_UINT64(size, VirtIOMEM), 1025 VMSTATE_UINT64(requested_size, VirtIOMEM), 1026 VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size), 1027 VMSTATE_END_OF_LIST() 1028 }, 1029 }; 1030 1031 static const VMStateDescription vmstate_virtio_mem = { 1032 .name = "virtio-mem", 1033 .minimum_version_id = 1, 1034 .version_id = 1, 1035 .fields = (VMStateField[]) { 1036 VMSTATE_VIRTIO_DEVICE, 1037 VMSTATE_END_OF_LIST() 1038 }, 1039 }; 1040 1041 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem, 1042 VirtioMEMDeviceInfo *vi) 1043 { 1044 vi->memaddr = vmem->addr; 1045 vi->node = vmem->node; 1046 vi->requested_size = vmem->requested_size; 1047 vi->size = vmem->size; 1048 vi->max_size = memory_region_size(&vmem->memdev->mr); 1049 vi->block_size = vmem->block_size; 1050 vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev)); 1051 } 1052 1053 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp) 1054 { 1055 if (!vmem->memdev) { 1056 error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP); 1057 return NULL; 1058 } 1059 1060 return &vmem->memdev->mr; 1061 } 1062 1063 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem, 1064 Notifier *notifier) 1065 { 1066 notifier_list_add(&vmem->size_change_notifiers, notifier); 1067 } 1068 1069 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem, 1070 Notifier *notifier) 1071 { 1072 notifier_remove(notifier); 1073 } 1074 1075 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name, 1076 void *opaque, Error **errp) 1077 { 1078 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 1079 uint64_t value = vmem->size; 1080 1081 visit_type_size(v, name, &value, errp); 1082 } 1083 1084 static void virtio_mem_get_requested_size(Object *obj, Visitor *v, 1085 const char *name, void *opaque, 1086 Error **errp) 1087 { 1088 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 1089 uint64_t value = vmem->requested_size; 1090 1091 visit_type_size(v, name, &value, errp); 1092 } 1093 1094 static void virtio_mem_set_requested_size(Object *obj, Visitor *v, 1095 const char *name, void *opaque, 1096 Error **errp) 1097 { 1098 VirtIOMEM *vmem = VIRTIO_MEM(obj); 1099 Error *err = NULL; 1100 uint64_t value; 1101 1102 visit_type_size(v, name, &value, &err); 1103 if (err) { 1104 error_propagate(errp, err); 1105 return; 1106 } 1107 1108 /* 1109 * The block size and memory backend are not fixed until the device was 1110 * realized. realize() will verify these properties then. 1111 */ 1112 if (DEVICE(obj)->realized) { 1113 if (!QEMU_IS_ALIGNED(value, vmem->block_size)) { 1114 error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64 1115 ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP, 1116 vmem->block_size); 1117 return; 1118 } else if (value > memory_region_size(&vmem->memdev->mr)) { 1119 error_setg(errp, "'%s' cannot exceed the memory backend size" 1120 "(0x%" PRIx64 ")", name, 1121 memory_region_size(&vmem->memdev->mr)); 1122 return; 1123 } 1124 1125 if (value != vmem->requested_size) { 1126 virtio_mem_resize_usable_region(vmem, value, false); 1127 vmem->requested_size = value; 1128 } 1129 /* 1130 * Trigger a config update so the guest gets notified. We trigger 1131 * even if the size didn't change (especially helpful for debugging). 1132 */ 1133 virtio_notify_config(VIRTIO_DEVICE(vmem)); 1134 } else { 1135 vmem->requested_size = value; 1136 } 1137 } 1138 1139 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name, 1140 void *opaque, Error **errp) 1141 { 1142 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 1143 uint64_t value = vmem->block_size; 1144 1145 /* 1146 * If not configured by the user (and we're not realized yet), use the 1147 * default block size we would use with the current memory backend. 1148 */ 1149 if (!value) { 1150 if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) { 1151 value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block); 1152 } else { 1153 value = virtio_mem_thp_size(); 1154 } 1155 } 1156 1157 visit_type_size(v, name, &value, errp); 1158 } 1159 1160 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name, 1161 void *opaque, Error **errp) 1162 { 1163 VirtIOMEM *vmem = VIRTIO_MEM(obj); 1164 Error *err = NULL; 1165 uint64_t value; 1166 1167 if (DEVICE(obj)->realized) { 1168 error_setg(errp, "'%s' cannot be changed", name); 1169 return; 1170 } 1171 1172 visit_type_size(v, name, &value, &err); 1173 if (err) { 1174 error_propagate(errp, err); 1175 return; 1176 } 1177 1178 if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) { 1179 error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name, 1180 VIRTIO_MEM_MIN_BLOCK_SIZE); 1181 return; 1182 } else if (!is_power_of_2(value)) { 1183 error_setg(errp, "'%s' property has to be a power of two", name); 1184 return; 1185 } 1186 vmem->block_size = value; 1187 } 1188 1189 static void virtio_mem_instance_init(Object *obj) 1190 { 1191 VirtIOMEM *vmem = VIRTIO_MEM(obj); 1192 1193 notifier_list_init(&vmem->size_change_notifiers); 1194 QLIST_INIT(&vmem->rdl_list); 1195 1196 object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size, 1197 NULL, NULL, NULL); 1198 object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size", 1199 virtio_mem_get_requested_size, 1200 virtio_mem_set_requested_size, NULL, NULL); 1201 object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size", 1202 virtio_mem_get_block_size, virtio_mem_set_block_size, 1203 NULL, NULL); 1204 } 1205 1206 static Property virtio_mem_properties[] = { 1207 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0), 1208 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0), 1209 DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false), 1210 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev, 1211 TYPE_MEMORY_BACKEND, HostMemoryBackend *), 1212 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS) 1213 DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM, 1214 unplugged_inaccessible, ON_OFF_AUTO_AUTO), 1215 #endif 1216 DEFINE_PROP_END_OF_LIST(), 1217 }; 1218 1219 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm, 1220 const MemoryRegion *mr) 1221 { 1222 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1223 1224 g_assert(mr == &vmem->memdev->mr); 1225 return vmem->block_size; 1226 } 1227 1228 static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, 1229 const MemoryRegionSection *s) 1230 { 1231 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1232 uint64_t start_gpa = vmem->addr + s->offset_within_region; 1233 uint64_t end_gpa = start_gpa + int128_get64(s->size); 1234 1235 g_assert(s->mr == &vmem->memdev->mr); 1236 1237 start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size); 1238 end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size); 1239 1240 if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) { 1241 return false; 1242 } 1243 1244 return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true); 1245 } 1246 1247 struct VirtIOMEMReplayData { 1248 void *fn; 1249 void *opaque; 1250 }; 1251 1252 static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) 1253 { 1254 struct VirtIOMEMReplayData *data = arg; 1255 1256 return ((ReplayRamPopulate)data->fn)(s, data->opaque); 1257 } 1258 1259 static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, 1260 MemoryRegionSection *s, 1261 ReplayRamPopulate replay_fn, 1262 void *opaque) 1263 { 1264 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1265 struct VirtIOMEMReplayData data = { 1266 .fn = replay_fn, 1267 .opaque = opaque, 1268 }; 1269 1270 g_assert(s->mr == &vmem->memdev->mr); 1271 return virtio_mem_for_each_plugged_section(vmem, s, &data, 1272 virtio_mem_rdm_replay_populated_cb); 1273 } 1274 1275 static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, 1276 void *arg) 1277 { 1278 struct VirtIOMEMReplayData *data = arg; 1279 1280 ((ReplayRamDiscard)data->fn)(s, data->opaque); 1281 return 0; 1282 } 1283 1284 static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, 1285 MemoryRegionSection *s, 1286 ReplayRamDiscard replay_fn, 1287 void *opaque) 1288 { 1289 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1290 struct VirtIOMEMReplayData data = { 1291 .fn = replay_fn, 1292 .opaque = opaque, 1293 }; 1294 1295 g_assert(s->mr == &vmem->memdev->mr); 1296 virtio_mem_for_each_unplugged_section(vmem, s, &data, 1297 virtio_mem_rdm_replay_discarded_cb); 1298 } 1299 1300 static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, 1301 RamDiscardListener *rdl, 1302 MemoryRegionSection *s) 1303 { 1304 VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1305 int ret; 1306 1307 g_assert(s->mr == &vmem->memdev->mr); 1308 rdl->section = memory_region_section_new_copy(s); 1309 1310 QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next); 1311 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 1312 virtio_mem_notify_populate_cb); 1313 if (ret) { 1314 error_report("%s: Replaying plugged ranges failed: %s", __func__, 1315 strerror(-ret)); 1316 } 1317 } 1318 1319 static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm, 1320 RamDiscardListener *rdl) 1321 { 1322 VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1323 1324 g_assert(rdl->section->mr == &vmem->memdev->mr); 1325 if (vmem->size) { 1326 if (rdl->double_discard_supported) { 1327 rdl->notify_discard(rdl, rdl->section); 1328 } else { 1329 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 1330 virtio_mem_notify_discard_cb); 1331 } 1332 } 1333 1334 memory_region_section_free_copy(rdl->section); 1335 rdl->section = NULL; 1336 QLIST_REMOVE(rdl, next); 1337 } 1338 1339 static void virtio_mem_class_init(ObjectClass *klass, void *data) 1340 { 1341 DeviceClass *dc = DEVICE_CLASS(klass); 1342 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 1343 VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass); 1344 RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass); 1345 1346 device_class_set_props(dc, virtio_mem_properties); 1347 dc->vmsd = &vmstate_virtio_mem; 1348 1349 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1350 vdc->realize = virtio_mem_device_realize; 1351 vdc->unrealize = virtio_mem_device_unrealize; 1352 vdc->get_config = virtio_mem_get_config; 1353 vdc->get_features = virtio_mem_get_features; 1354 vdc->validate_features = virtio_mem_validate_features; 1355 vdc->vmsd = &vmstate_virtio_mem_device; 1356 1357 vmc->fill_device_info = virtio_mem_fill_device_info; 1358 vmc->get_memory_region = virtio_mem_get_memory_region; 1359 vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier; 1360 vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; 1361 1362 rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity; 1363 rdmc->is_populated = virtio_mem_rdm_is_populated; 1364 rdmc->replay_populated = virtio_mem_rdm_replay_populated; 1365 rdmc->replay_discarded = virtio_mem_rdm_replay_discarded; 1366 rdmc->register_listener = virtio_mem_rdm_register_listener; 1367 rdmc->unregister_listener = virtio_mem_rdm_unregister_listener; 1368 } 1369 1370 static const TypeInfo virtio_mem_info = { 1371 .name = TYPE_VIRTIO_MEM, 1372 .parent = TYPE_VIRTIO_DEVICE, 1373 .instance_size = sizeof(VirtIOMEM), 1374 .instance_init = virtio_mem_instance_init, 1375 .class_init = virtio_mem_class_init, 1376 .class_size = sizeof(VirtIOMEMClass), 1377 .interfaces = (InterfaceInfo[]) { 1378 { TYPE_RAM_DISCARD_MANAGER }, 1379 { } 1380 }, 1381 }; 1382 1383 static void virtio_register_types(void) 1384 { 1385 type_register_static(&virtio_mem_info); 1386 } 1387 1388 type_init(virtio_register_types) 1389