1 /* 2 * Virtio MEM device 3 * 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qemu/iov.h" 15 #include "qemu/cutils.h" 16 #include "qemu/error-report.h" 17 #include "qemu/units.h" 18 #include "sysemu/numa.h" 19 #include "sysemu/sysemu.h" 20 #include "sysemu/reset.h" 21 #include "hw/virtio/virtio.h" 22 #include "hw/virtio/virtio-bus.h" 23 #include "hw/virtio/virtio-mem.h" 24 #include "qapi/error.h" 25 #include "qapi/visitor.h" 26 #include "exec/ram_addr.h" 27 #include "migration/misc.h" 28 #include "hw/boards.h" 29 #include "hw/qdev-properties.h" 30 #include CONFIG_DEVICES 31 #include "trace.h" 32 33 static const VMStateDescription vmstate_virtio_mem_device_early; 34 35 /* 36 * We only had legacy x86 guests that did not support 37 * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests. 38 */ 39 #if defined(TARGET_X86_64) || defined(TARGET_I386) 40 #define VIRTIO_MEM_HAS_LEGACY_GUESTS 41 #endif 42 43 /* 44 * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking 45 * bitmap small. 46 */ 47 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB)) 48 49 static uint32_t virtio_mem_default_thp_size(void) 50 { 51 uint32_t default_thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE; 52 53 #if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__) 54 default_thp_size = 2 * MiB; 55 #elif defined(__aarch64__) 56 if (qemu_real_host_page_size() == 4 * KiB) { 57 default_thp_size = 2 * MiB; 58 } else if (qemu_real_host_page_size() == 16 * KiB) { 59 default_thp_size = 32 * MiB; 60 } else if (qemu_real_host_page_size() == 64 * KiB) { 61 default_thp_size = 512 * MiB; 62 } 63 #endif 64 65 return default_thp_size; 66 } 67 68 /* 69 * We want to have a reasonable default block size such that 70 * 1. We avoid splitting THPs when unplugging memory, which degrades 71 * performance. 72 * 2. We avoid placing THPs for plugged blocks that also cover unplugged 73 * blocks. 74 * 75 * The actual THP size might differ between Linux kernels, so we try to probe 76 * it. In the future (if we ever run into issues regarding 2.), we might want 77 * to disable THP in case we fail to properly probe the THP size, or if the 78 * block size is configured smaller than the THP size. 79 */ 80 static uint32_t thp_size; 81 82 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" 83 static uint32_t virtio_mem_thp_size(void) 84 { 85 gchar *content = NULL; 86 const char *endptr; 87 uint64_t tmp; 88 89 if (thp_size) { 90 return thp_size; 91 } 92 93 /* 94 * Try to probe the actual THP size, fallback to (sane but eventually 95 * incorrect) default sizes. 96 */ 97 if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) && 98 !qemu_strtou64(content, &endptr, 0, &tmp) && 99 (!endptr || *endptr == '\n')) { 100 /* Sanity-check the value and fallback to something reasonable. */ 101 if (!tmp || !is_power_of_2(tmp)) { 102 warn_report("Read unsupported THP size: %" PRIx64, tmp); 103 } else { 104 thp_size = tmp; 105 } 106 } 107 108 if (!thp_size) { 109 thp_size = virtio_mem_default_thp_size(); 110 warn_report("Could not detect THP size, falling back to %" PRIx64 111 " MiB.", thp_size / MiB); 112 } 113 114 g_free(content); 115 return thp_size; 116 } 117 118 static uint64_t virtio_mem_default_block_size(RAMBlock *rb) 119 { 120 const uint64_t page_size = qemu_ram_pagesize(rb); 121 122 /* We can have hugetlbfs with a page size smaller than the THP size. */ 123 if (page_size == qemu_real_host_page_size()) { 124 return MAX(page_size, virtio_mem_thp_size()); 125 } 126 return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE); 127 } 128 129 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS) 130 static bool virtio_mem_has_shared_zeropage(RAMBlock *rb) 131 { 132 /* 133 * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE 134 * anonymous RAM. In any other case, reading unplugged *can* populate a 135 * fresh page, consuming actual memory. 136 */ 137 return !qemu_ram_is_shared(rb) && qemu_ram_get_fd(rb) < 0 && 138 qemu_ram_pagesize(rb) == qemu_real_host_page_size(); 139 } 140 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ 141 142 /* 143 * Size the usable region bigger than the requested size if possible. Esp. 144 * Linux guests will only add (aligned) memory blocks in case they fully 145 * fit into the usable region, but plug+online only a subset of the pages. 146 * The memory block size corresponds mostly to the section size. 147 * 148 * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and 149 * a section size of 512MB on arm64 (as long as the start address is properly 150 * aligned, similar to ordinary DIMMs). 151 * 152 * We can change this at any time and maybe even make it configurable if 153 * necessary (as the section size can change). But it's more likely that the 154 * section size will rather get smaller and not bigger over time. 155 */ 156 #if defined(TARGET_X86_64) || defined(TARGET_I386) 157 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB)) 158 #elif defined(TARGET_ARM) 159 #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB)) 160 #else 161 #error VIRTIO_MEM_USABLE_EXTENT not defined 162 #endif 163 164 static bool virtio_mem_is_busy(void) 165 { 166 /* 167 * Postcopy cannot handle concurrent discards and we don't want to migrate 168 * pages on-demand with stale content when plugging new blocks. 169 * 170 * For precopy, we don't want unplugged blocks in our migration stream, and 171 * when plugging new blocks, the page content might differ between source 172 * and destination (observable by the guest when not initializing pages 173 * after plugging them) until we're running on the destination (as we didn't 174 * migrate these blocks when they were unplugged). 175 */ 176 return migration_in_incoming_postcopy() || !migration_is_idle(); 177 } 178 179 typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg, 180 uint64_t offset, uint64_t size); 181 182 static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg, 183 virtio_mem_range_cb cb) 184 { 185 unsigned long first_zero_bit, last_zero_bit; 186 uint64_t offset, size; 187 int ret = 0; 188 189 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); 190 while (first_zero_bit < vmem->bitmap_size) { 191 offset = first_zero_bit * vmem->block_size; 192 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 193 first_zero_bit + 1) - 1; 194 size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; 195 196 ret = cb(vmem, arg, offset, size); 197 if (ret) { 198 break; 199 } 200 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 201 last_zero_bit + 2); 202 } 203 return ret; 204 } 205 206 static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void *arg, 207 virtio_mem_range_cb cb) 208 { 209 unsigned long first_bit, last_bit; 210 uint64_t offset, size; 211 int ret = 0; 212 213 first_bit = find_first_bit(vmem->bitmap, vmem->bitmap_size); 214 while (first_bit < vmem->bitmap_size) { 215 offset = first_bit * vmem->block_size; 216 last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 217 first_bit + 1) - 1; 218 size = (last_bit - first_bit + 1) * vmem->block_size; 219 220 ret = cb(vmem, arg, offset, size); 221 if (ret) { 222 break; 223 } 224 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 225 last_bit + 2); 226 } 227 return ret; 228 } 229 230 /* 231 * Adjust the memory section to cover the intersection with the given range. 232 * 233 * Returns false if the intersection is empty, otherwise returns true. 234 */ 235 static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s, 236 uint64_t offset, uint64_t size) 237 { 238 uint64_t start = MAX(s->offset_within_region, offset); 239 uint64_t end = MIN(s->offset_within_region + int128_get64(s->size), 240 offset + size); 241 242 if (end <= start) { 243 return false; 244 } 245 246 s->offset_within_address_space += start - s->offset_within_region; 247 s->offset_within_region = start; 248 s->size = int128_make64(end - start); 249 return true; 250 } 251 252 typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg); 253 254 static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, 255 MemoryRegionSection *s, 256 void *arg, 257 virtio_mem_section_cb cb) 258 { 259 unsigned long first_bit, last_bit; 260 uint64_t offset, size; 261 int ret = 0; 262 263 first_bit = s->offset_within_region / vmem->block_size; 264 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit); 265 while (first_bit < vmem->bitmap_size) { 266 MemoryRegionSection tmp = *s; 267 268 offset = first_bit * vmem->block_size; 269 last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 270 first_bit + 1) - 1; 271 size = (last_bit - first_bit + 1) * vmem->block_size; 272 273 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { 274 break; 275 } 276 ret = cb(&tmp, arg); 277 if (ret) { 278 break; 279 } 280 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 281 last_bit + 2); 282 } 283 return ret; 284 } 285 286 static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, 287 MemoryRegionSection *s, 288 void *arg, 289 virtio_mem_section_cb cb) 290 { 291 unsigned long first_bit, last_bit; 292 uint64_t offset, size; 293 int ret = 0; 294 295 first_bit = s->offset_within_region / vmem->block_size; 296 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit); 297 while (first_bit < vmem->bitmap_size) { 298 MemoryRegionSection tmp = *s; 299 300 offset = first_bit * vmem->block_size; 301 last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 302 first_bit + 1) - 1; 303 size = (last_bit - first_bit + 1) * vmem->block_size; 304 305 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { 306 break; 307 } 308 ret = cb(&tmp, arg); 309 if (ret) { 310 break; 311 } 312 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 313 last_bit + 2); 314 } 315 return ret; 316 } 317 318 static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg) 319 { 320 RamDiscardListener *rdl = arg; 321 322 return rdl->notify_populate(rdl, s); 323 } 324 325 static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg) 326 { 327 RamDiscardListener *rdl = arg; 328 329 rdl->notify_discard(rdl, s); 330 return 0; 331 } 332 333 static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, 334 uint64_t size) 335 { 336 RamDiscardListener *rdl; 337 338 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 339 MemoryRegionSection tmp = *rdl->section; 340 341 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { 342 continue; 343 } 344 rdl->notify_discard(rdl, &tmp); 345 } 346 } 347 348 static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, 349 uint64_t size) 350 { 351 RamDiscardListener *rdl, *rdl2; 352 int ret = 0; 353 354 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 355 MemoryRegionSection tmp = *rdl->section; 356 357 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { 358 continue; 359 } 360 ret = rdl->notify_populate(rdl, &tmp); 361 if (ret) { 362 break; 363 } 364 } 365 366 if (ret) { 367 /* Notify all already-notified listeners. */ 368 QLIST_FOREACH(rdl2, &vmem->rdl_list, next) { 369 MemoryRegionSection tmp = *rdl2->section; 370 371 if (rdl2 == rdl) { 372 break; 373 } 374 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { 375 continue; 376 } 377 rdl2->notify_discard(rdl2, &tmp); 378 } 379 } 380 return ret; 381 } 382 383 static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem) 384 { 385 RamDiscardListener *rdl; 386 387 if (!vmem->size) { 388 return; 389 } 390 391 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 392 if (rdl->double_discard_supported) { 393 rdl->notify_discard(rdl, rdl->section); 394 } else { 395 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 396 virtio_mem_notify_discard_cb); 397 } 398 } 399 } 400 401 static bool virtio_mem_is_range_plugged(const VirtIOMEM *vmem, 402 uint64_t start_gpa, uint64_t size) 403 { 404 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; 405 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; 406 unsigned long found_bit; 407 408 /* We fake a shorter bitmap to avoid searching too far. */ 409 found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); 410 return found_bit > last_bit; 411 } 412 413 static bool virtio_mem_is_range_unplugged(const VirtIOMEM *vmem, 414 uint64_t start_gpa, uint64_t size) 415 { 416 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; 417 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; 418 unsigned long found_bit; 419 420 /* We fake a shorter bitmap to avoid searching too far. */ 421 found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); 422 return found_bit > last_bit; 423 } 424 425 static void virtio_mem_set_range_plugged(VirtIOMEM *vmem, uint64_t start_gpa, 426 uint64_t size) 427 { 428 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; 429 const unsigned long nbits = size / vmem->block_size; 430 431 bitmap_set(vmem->bitmap, bit, nbits); 432 } 433 434 static void virtio_mem_set_range_unplugged(VirtIOMEM *vmem, uint64_t start_gpa, 435 uint64_t size) 436 { 437 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; 438 const unsigned long nbits = size / vmem->block_size; 439 440 bitmap_clear(vmem->bitmap, bit, nbits); 441 } 442 443 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem, 444 struct virtio_mem_resp *resp) 445 { 446 VirtIODevice *vdev = VIRTIO_DEVICE(vmem); 447 VirtQueue *vq = vmem->vq; 448 449 trace_virtio_mem_send_response(le16_to_cpu(resp->type)); 450 iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp)); 451 452 virtqueue_push(vq, elem, sizeof(*resp)); 453 virtio_notify(vdev, vq); 454 } 455 456 static void virtio_mem_send_response_simple(VirtIOMEM *vmem, 457 VirtQueueElement *elem, 458 uint16_t type) 459 { 460 struct virtio_mem_resp resp = { 461 .type = cpu_to_le16(type), 462 }; 463 464 virtio_mem_send_response(vmem, elem, &resp); 465 } 466 467 static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa, 468 uint64_t size) 469 { 470 if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) { 471 return false; 472 } 473 if (gpa + size < gpa || !size) { 474 return false; 475 } 476 if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) { 477 return false; 478 } 479 if (gpa + size > vmem->addr + vmem->usable_region_size) { 480 return false; 481 } 482 return true; 483 } 484 485 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, 486 uint64_t size, bool plug) 487 { 488 const uint64_t offset = start_gpa - vmem->addr; 489 RAMBlock *rb = vmem->memdev->mr.ram_block; 490 int ret = 0; 491 492 if (virtio_mem_is_busy()) { 493 return -EBUSY; 494 } 495 496 if (!plug) { 497 if (ram_block_discard_range(rb, offset, size)) { 498 return -EBUSY; 499 } 500 virtio_mem_notify_unplug(vmem, offset, size); 501 virtio_mem_set_range_unplugged(vmem, start_gpa, size); 502 return 0; 503 } 504 505 if (vmem->prealloc) { 506 void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; 507 int fd = memory_region_get_fd(&vmem->memdev->mr); 508 Error *local_err = NULL; 509 510 qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err); 511 if (local_err) { 512 static bool warned; 513 514 /* 515 * Warn only once, we don't want to fill the log with these 516 * warnings. 517 */ 518 if (!warned) { 519 warn_report_err(local_err); 520 warned = true; 521 } else { 522 error_free(local_err); 523 } 524 ret = -EBUSY; 525 } 526 } 527 528 if (!ret) { 529 ret = virtio_mem_notify_plug(vmem, offset, size); 530 } 531 if (ret) { 532 /* Could be preallocation or a notifier populated memory. */ 533 ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); 534 return -EBUSY; 535 } 536 537 virtio_mem_set_range_plugged(vmem, start_gpa, size); 538 return 0; 539 } 540 541 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa, 542 uint16_t nb_blocks, bool plug) 543 { 544 const uint64_t size = nb_blocks * vmem->block_size; 545 int ret; 546 547 if (!virtio_mem_valid_range(vmem, gpa, size)) { 548 return VIRTIO_MEM_RESP_ERROR; 549 } 550 551 if (plug && (vmem->size + size > vmem->requested_size)) { 552 return VIRTIO_MEM_RESP_NACK; 553 } 554 555 /* test if really all blocks are in the opposite state */ 556 if ((plug && !virtio_mem_is_range_unplugged(vmem, gpa, size)) || 557 (!plug && !virtio_mem_is_range_plugged(vmem, gpa, size))) { 558 return VIRTIO_MEM_RESP_ERROR; 559 } 560 561 ret = virtio_mem_set_block_state(vmem, gpa, size, plug); 562 if (ret) { 563 return VIRTIO_MEM_RESP_BUSY; 564 } 565 if (plug) { 566 vmem->size += size; 567 } else { 568 vmem->size -= size; 569 } 570 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 571 return VIRTIO_MEM_RESP_ACK; 572 } 573 574 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 575 struct virtio_mem_req *req) 576 { 577 const uint64_t gpa = le64_to_cpu(req->u.plug.addr); 578 const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks); 579 uint16_t type; 580 581 trace_virtio_mem_plug_request(gpa, nb_blocks); 582 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true); 583 virtio_mem_send_response_simple(vmem, elem, type); 584 } 585 586 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 587 struct virtio_mem_req *req) 588 { 589 const uint64_t gpa = le64_to_cpu(req->u.unplug.addr); 590 const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks); 591 uint16_t type; 592 593 trace_virtio_mem_unplug_request(gpa, nb_blocks); 594 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false); 595 virtio_mem_send_response_simple(vmem, elem, type); 596 } 597 598 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem, 599 uint64_t requested_size, 600 bool can_shrink) 601 { 602 uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr), 603 requested_size + VIRTIO_MEM_USABLE_EXTENT); 604 605 /* The usable region size always has to be multiples of the block size. */ 606 newsize = QEMU_ALIGN_UP(newsize, vmem->block_size); 607 608 if (!requested_size) { 609 newsize = 0; 610 } 611 612 if (newsize < vmem->usable_region_size && !can_shrink) { 613 return; 614 } 615 616 trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize); 617 vmem->usable_region_size = newsize; 618 } 619 620 static int virtio_mem_unplug_all(VirtIOMEM *vmem) 621 { 622 RAMBlock *rb = vmem->memdev->mr.ram_block; 623 624 if (virtio_mem_is_busy()) { 625 return -EBUSY; 626 } 627 628 if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) { 629 return -EBUSY; 630 } 631 virtio_mem_notify_unplug_all(vmem); 632 633 bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); 634 if (vmem->size) { 635 vmem->size = 0; 636 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 637 } 638 trace_virtio_mem_unplugged_all(); 639 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 640 return 0; 641 } 642 643 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem, 644 VirtQueueElement *elem) 645 { 646 trace_virtio_mem_unplug_all_request(); 647 if (virtio_mem_unplug_all(vmem)) { 648 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY); 649 } else { 650 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK); 651 } 652 } 653 654 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem, 655 struct virtio_mem_req *req) 656 { 657 const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks); 658 const uint64_t gpa = le64_to_cpu(req->u.state.addr); 659 const uint64_t size = nb_blocks * vmem->block_size; 660 struct virtio_mem_resp resp = { 661 .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK), 662 }; 663 664 trace_virtio_mem_state_request(gpa, nb_blocks); 665 if (!virtio_mem_valid_range(vmem, gpa, size)) { 666 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR); 667 return; 668 } 669 670 if (virtio_mem_is_range_plugged(vmem, gpa, size)) { 671 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED); 672 } else if (virtio_mem_is_range_unplugged(vmem, gpa, size)) { 673 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED); 674 } else { 675 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED); 676 } 677 trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state)); 678 virtio_mem_send_response(vmem, elem, &resp); 679 } 680 681 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq) 682 { 683 const int len = sizeof(struct virtio_mem_req); 684 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 685 VirtQueueElement *elem; 686 struct virtio_mem_req req; 687 uint16_t type; 688 689 while (true) { 690 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 691 if (!elem) { 692 return; 693 } 694 695 if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) { 696 virtio_error(vdev, "virtio-mem protocol violation: invalid request" 697 " size: %d", len); 698 virtqueue_detach_element(vq, elem, 0); 699 g_free(elem); 700 return; 701 } 702 703 if (iov_size(elem->in_sg, elem->in_num) < 704 sizeof(struct virtio_mem_resp)) { 705 virtio_error(vdev, "virtio-mem protocol violation: not enough space" 706 " for response: %zu", 707 iov_size(elem->in_sg, elem->in_num)); 708 virtqueue_detach_element(vq, elem, 0); 709 g_free(elem); 710 return; 711 } 712 713 type = le16_to_cpu(req.type); 714 switch (type) { 715 case VIRTIO_MEM_REQ_PLUG: 716 virtio_mem_plug_request(vmem, elem, &req); 717 break; 718 case VIRTIO_MEM_REQ_UNPLUG: 719 virtio_mem_unplug_request(vmem, elem, &req); 720 break; 721 case VIRTIO_MEM_REQ_UNPLUG_ALL: 722 virtio_mem_unplug_all_request(vmem, elem); 723 break; 724 case VIRTIO_MEM_REQ_STATE: 725 virtio_mem_state_request(vmem, elem, &req); 726 break; 727 default: 728 virtio_error(vdev, "virtio-mem protocol violation: unknown request" 729 " type: %d", type); 730 virtqueue_detach_element(vq, elem, 0); 731 g_free(elem); 732 return; 733 } 734 735 g_free(elem); 736 } 737 } 738 739 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data) 740 { 741 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 742 struct virtio_mem_config *config = (void *) config_data; 743 744 config->block_size = cpu_to_le64(vmem->block_size); 745 config->node_id = cpu_to_le16(vmem->node); 746 config->requested_size = cpu_to_le64(vmem->requested_size); 747 config->plugged_size = cpu_to_le64(vmem->size); 748 config->addr = cpu_to_le64(vmem->addr); 749 config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr)); 750 config->usable_region_size = cpu_to_le64(vmem->usable_region_size); 751 } 752 753 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features, 754 Error **errp) 755 { 756 MachineState *ms = MACHINE(qdev_get_machine()); 757 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 758 759 if (ms->numa_state) { 760 #if defined(CONFIG_ACPI) 761 virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM); 762 #endif 763 } 764 assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO); 765 if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) { 766 virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE); 767 } 768 return features; 769 } 770 771 static int virtio_mem_validate_features(VirtIODevice *vdev) 772 { 773 if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) && 774 !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) { 775 return -EFAULT; 776 } 777 return 0; 778 } 779 780 static void virtio_mem_system_reset(void *opaque) 781 { 782 VirtIOMEM *vmem = VIRTIO_MEM(opaque); 783 784 /* 785 * During usual resets, we will unplug all memory and shrink the usable 786 * region size. This is, however, not possible in all scenarios. Then, 787 * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL). 788 */ 789 virtio_mem_unplug_all(vmem); 790 } 791 792 static void virtio_mem_device_realize(DeviceState *dev, Error **errp) 793 { 794 MachineState *ms = MACHINE(qdev_get_machine()); 795 int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0; 796 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 797 VirtIOMEM *vmem = VIRTIO_MEM(dev); 798 uint64_t page_size; 799 RAMBlock *rb; 800 int ret; 801 802 if (!vmem->memdev) { 803 error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP); 804 return; 805 } else if (host_memory_backend_is_mapped(vmem->memdev)) { 806 error_setg(errp, "'%s' property specifies a busy memdev: %s", 807 VIRTIO_MEM_MEMDEV_PROP, 808 object_get_canonical_path_component(OBJECT(vmem->memdev))); 809 return; 810 } else if (!memory_region_is_ram(&vmem->memdev->mr) || 811 memory_region_is_rom(&vmem->memdev->mr) || 812 !vmem->memdev->mr.ram_block) { 813 error_setg(errp, "'%s' property specifies an unsupported memdev", 814 VIRTIO_MEM_MEMDEV_PROP); 815 return; 816 } else if (vmem->memdev->prealloc) { 817 error_setg(errp, "'%s' property specifies a memdev with preallocation" 818 " enabled: %s. Instead, specify 'prealloc=on' for the" 819 " virtio-mem device. ", VIRTIO_MEM_MEMDEV_PROP, 820 object_get_canonical_path_component(OBJECT(vmem->memdev))); 821 return; 822 } 823 824 if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) || 825 (!nb_numa_nodes && vmem->node)) { 826 error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds" 827 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP, 828 vmem->node, nb_numa_nodes ? nb_numa_nodes : 1); 829 return; 830 } 831 832 if (enable_mlock) { 833 error_setg(errp, "Incompatible with mlock"); 834 return; 835 } 836 837 rb = vmem->memdev->mr.ram_block; 838 page_size = qemu_ram_pagesize(rb); 839 840 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS) 841 switch (vmem->unplugged_inaccessible) { 842 case ON_OFF_AUTO_AUTO: 843 if (virtio_mem_has_shared_zeropage(rb)) { 844 vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF; 845 } else { 846 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON; 847 } 848 break; 849 case ON_OFF_AUTO_OFF: 850 if (!virtio_mem_has_shared_zeropage(rb)) { 851 warn_report("'%s' property set to 'off' with a memdev that does" 852 " not support the shared zeropage.", 853 VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP); 854 } 855 break; 856 default: 857 break; 858 } 859 #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ 860 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON; 861 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ 862 863 /* 864 * If the block size wasn't configured by the user, use a sane default. This 865 * allows using hugetlbfs backends of any page size without manual 866 * intervention. 867 */ 868 if (!vmem->block_size) { 869 vmem->block_size = virtio_mem_default_block_size(rb); 870 } 871 872 if (vmem->block_size < page_size) { 873 error_setg(errp, "'%s' property has to be at least the page size (0x%" 874 PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size); 875 return; 876 } else if (vmem->block_size < virtio_mem_default_block_size(rb)) { 877 warn_report("'%s' property is smaller than the default block size (%" 878 PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP, 879 virtio_mem_default_block_size(rb) / MiB); 880 } 881 if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) { 882 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64 883 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP, 884 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 885 return; 886 } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) { 887 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64 888 ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP, 889 vmem->block_size); 890 return; 891 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr), 892 vmem->block_size)) { 893 error_setg(errp, "'%s' property memdev size has to be multiples of" 894 "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP, 895 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 896 return; 897 } 898 899 if (ram_block_coordinated_discard_require(true)) { 900 error_setg(errp, "Discarding RAM is disabled"); 901 return; 902 } 903 904 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); 905 if (ret) { 906 error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); 907 ram_block_coordinated_discard_require(false); 908 return; 909 } 910 911 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 912 913 vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) / 914 vmem->block_size; 915 vmem->bitmap = bitmap_new(vmem->bitmap_size); 916 917 virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config)); 918 vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request); 919 920 host_memory_backend_set_mapped(vmem->memdev, true); 921 vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem)); 922 if (vmem->early_migration) { 923 vmstate_register(VMSTATE_IF(vmem), VMSTATE_INSTANCE_ID_ANY, 924 &vmstate_virtio_mem_device_early, vmem); 925 } 926 qemu_register_reset(virtio_mem_system_reset, vmem); 927 928 /* 929 * Set ourselves as RamDiscardManager before the plug handler maps the 930 * memory region and exposes it via an address space. 931 */ 932 memory_region_set_ram_discard_manager(&vmem->memdev->mr, 933 RAM_DISCARD_MANAGER(vmem)); 934 } 935 936 static void virtio_mem_device_unrealize(DeviceState *dev) 937 { 938 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 939 VirtIOMEM *vmem = VIRTIO_MEM(dev); 940 941 /* 942 * The unplug handler unmapped the memory region, it cannot be 943 * found via an address space anymore. Unset ourselves. 944 */ 945 memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); 946 qemu_unregister_reset(virtio_mem_system_reset, vmem); 947 if (vmem->early_migration) { 948 vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early, 949 vmem); 950 } 951 vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem)); 952 host_memory_backend_set_mapped(vmem->memdev, false); 953 virtio_del_queue(vdev, 0); 954 virtio_cleanup(vdev); 955 g_free(vmem->bitmap); 956 ram_block_coordinated_discard_require(false); 957 } 958 959 static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg, 960 uint64_t offset, uint64_t size) 961 { 962 RAMBlock *rb = vmem->memdev->mr.ram_block; 963 964 return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0; 965 } 966 967 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem) 968 { 969 /* Make sure all memory is really discarded after migration. */ 970 return virtio_mem_for_each_unplugged_range(vmem, NULL, 971 virtio_mem_discard_range_cb); 972 } 973 974 static int virtio_mem_post_load(void *opaque, int version_id) 975 { 976 VirtIOMEM *vmem = VIRTIO_MEM(opaque); 977 RamDiscardListener *rdl; 978 int ret; 979 980 if (vmem->prealloc && !vmem->early_migration) { 981 warn_report("Proper preallocation with migration requires a newer QEMU machine"); 982 } 983 984 /* 985 * We started out with all memory discarded and our memory region is mapped 986 * into an address space. Replay, now that we updated the bitmap. 987 */ 988 QLIST_FOREACH(rdl, &vmem->rdl_list, next) { 989 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 990 virtio_mem_notify_populate_cb); 991 if (ret) { 992 return ret; 993 } 994 } 995 996 if (migration_in_incoming_postcopy()) { 997 return 0; 998 } 999 1000 return virtio_mem_restore_unplugged(vmem); 1001 } 1002 1003 static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg, 1004 uint64_t offset, uint64_t size) 1005 { 1006 void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; 1007 int fd = memory_region_get_fd(&vmem->memdev->mr); 1008 Error *local_err = NULL; 1009 1010 qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err); 1011 if (local_err) { 1012 error_report_err(local_err); 1013 return -ENOMEM; 1014 } 1015 return 0; 1016 } 1017 1018 static int virtio_mem_post_load_early(void *opaque, int version_id) 1019 { 1020 VirtIOMEM *vmem = VIRTIO_MEM(opaque); 1021 RAMBlock *rb = vmem->memdev->mr.ram_block; 1022 int ret; 1023 1024 if (!vmem->prealloc) { 1025 return 0; 1026 } 1027 1028 /* 1029 * We restored the bitmap and verified that the basic properties 1030 * match on source and destination, so we can go ahead and preallocate 1031 * memory for all plugged memory blocks, before actual RAM migration starts 1032 * touching this memory. 1033 */ 1034 ret = virtio_mem_for_each_plugged_range(vmem, NULL, 1035 virtio_mem_prealloc_range_cb); 1036 if (ret) { 1037 return ret; 1038 } 1039 1040 /* 1041 * This is tricky: postcopy wants to start with a clean slate. On 1042 * POSTCOPY_INCOMING_ADVISE, postcopy code discards all (ordinarily 1043 * preallocated) RAM such that postcopy will work as expected later. 1044 * 1045 * However, we run after POSTCOPY_INCOMING_ADVISE -- but before actual 1046 * RAM migration. So let's discard all memory again. This looks like an 1047 * expensive NOP, but actually serves a purpose: we made sure that we 1048 * were able to allocate all required backend memory once. We cannot 1049 * guarantee that the backend memory we will free will remain free 1050 * until we need it during postcopy, but at least we can catch the 1051 * obvious setup issues this way. 1052 */ 1053 if (migration_incoming_postcopy_advised()) { 1054 if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) { 1055 return -EBUSY; 1056 } 1057 } 1058 return 0; 1059 } 1060 1061 typedef struct VirtIOMEMMigSanityChecks { 1062 VirtIOMEM *parent; 1063 uint64_t addr; 1064 uint64_t region_size; 1065 uint64_t block_size; 1066 uint32_t node; 1067 } VirtIOMEMMigSanityChecks; 1068 1069 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque) 1070 { 1071 VirtIOMEMMigSanityChecks *tmp = opaque; 1072 VirtIOMEM *vmem = tmp->parent; 1073 1074 tmp->addr = vmem->addr; 1075 tmp->region_size = memory_region_size(&vmem->memdev->mr); 1076 tmp->block_size = vmem->block_size; 1077 tmp->node = vmem->node; 1078 return 0; 1079 } 1080 1081 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id) 1082 { 1083 VirtIOMEMMigSanityChecks *tmp = opaque; 1084 VirtIOMEM *vmem = tmp->parent; 1085 const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr); 1086 1087 if (tmp->addr != vmem->addr) { 1088 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 1089 VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr); 1090 return -EINVAL; 1091 } 1092 /* 1093 * Note: Preparation for resizeable memory regions. The maximum size 1094 * of the memory region must not change during migration. 1095 */ 1096 if (tmp->region_size != new_region_size) { 1097 error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%" 1098 PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size, 1099 new_region_size); 1100 return -EINVAL; 1101 } 1102 if (tmp->block_size != vmem->block_size) { 1103 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 1104 VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size, 1105 vmem->block_size); 1106 return -EINVAL; 1107 } 1108 if (tmp->node != vmem->node) { 1109 error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32, 1110 VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node); 1111 return -EINVAL; 1112 } 1113 return 0; 1114 } 1115 1116 static const VMStateDescription vmstate_virtio_mem_sanity_checks = { 1117 .name = "virtio-mem-device/sanity-checks", 1118 .pre_save = virtio_mem_mig_sanity_checks_pre_save, 1119 .post_load = virtio_mem_mig_sanity_checks_post_load, 1120 .fields = (VMStateField[]) { 1121 VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks), 1122 VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks), 1123 VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks), 1124 VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks), 1125 VMSTATE_END_OF_LIST(), 1126 }, 1127 }; 1128 1129 static bool virtio_mem_vmstate_field_exists(void *opaque, int version_id) 1130 { 1131 const VirtIOMEM *vmem = VIRTIO_MEM(opaque); 1132 1133 /* With early migration, these fields were already migrated. */ 1134 return !vmem->early_migration; 1135 } 1136 1137 static const VMStateDescription vmstate_virtio_mem_device = { 1138 .name = "virtio-mem-device", 1139 .minimum_version_id = 1, 1140 .version_id = 1, 1141 .priority = MIG_PRI_VIRTIO_MEM, 1142 .post_load = virtio_mem_post_load, 1143 .fields = (VMStateField[]) { 1144 VMSTATE_WITH_TMP_TEST(VirtIOMEM, virtio_mem_vmstate_field_exists, 1145 VirtIOMEMMigSanityChecks, 1146 vmstate_virtio_mem_sanity_checks), 1147 VMSTATE_UINT64(usable_region_size, VirtIOMEM), 1148 VMSTATE_UINT64_TEST(size, VirtIOMEM, virtio_mem_vmstate_field_exists), 1149 VMSTATE_UINT64(requested_size, VirtIOMEM), 1150 VMSTATE_BITMAP_TEST(bitmap, VirtIOMEM, virtio_mem_vmstate_field_exists, 1151 0, bitmap_size), 1152 VMSTATE_END_OF_LIST() 1153 }, 1154 }; 1155 1156 /* 1157 * Transfer properties that are immutable while migration is active early, 1158 * such that we have have this information around before migrating any RAM 1159 * content. 1160 * 1161 * Note that virtio_mem_is_busy() makes sure these properties can no longer 1162 * change on the migration source until migration completed. 1163 * 1164 * With QEMU compat machines, we transmit these properties later, via 1165 * vmstate_virtio_mem_device instead -- see virtio_mem_vmstate_field_exists(). 1166 */ 1167 static const VMStateDescription vmstate_virtio_mem_device_early = { 1168 .name = "virtio-mem-device-early", 1169 .minimum_version_id = 1, 1170 .version_id = 1, 1171 .early_setup = true, 1172 .post_load = virtio_mem_post_load_early, 1173 .fields = (VMStateField[]) { 1174 VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks, 1175 vmstate_virtio_mem_sanity_checks), 1176 VMSTATE_UINT64(size, VirtIOMEM), 1177 VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size), 1178 VMSTATE_END_OF_LIST() 1179 }, 1180 }; 1181 1182 static const VMStateDescription vmstate_virtio_mem = { 1183 .name = "virtio-mem", 1184 .minimum_version_id = 1, 1185 .version_id = 1, 1186 .fields = (VMStateField[]) { 1187 VMSTATE_VIRTIO_DEVICE, 1188 VMSTATE_END_OF_LIST() 1189 }, 1190 }; 1191 1192 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem, 1193 VirtioMEMDeviceInfo *vi) 1194 { 1195 vi->memaddr = vmem->addr; 1196 vi->node = vmem->node; 1197 vi->requested_size = vmem->requested_size; 1198 vi->size = vmem->size; 1199 vi->max_size = memory_region_size(&vmem->memdev->mr); 1200 vi->block_size = vmem->block_size; 1201 vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev)); 1202 } 1203 1204 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp) 1205 { 1206 if (!vmem->memdev) { 1207 error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP); 1208 return NULL; 1209 } 1210 1211 return &vmem->memdev->mr; 1212 } 1213 1214 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem, 1215 Notifier *notifier) 1216 { 1217 notifier_list_add(&vmem->size_change_notifiers, notifier); 1218 } 1219 1220 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem, 1221 Notifier *notifier) 1222 { 1223 notifier_remove(notifier); 1224 } 1225 1226 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name, 1227 void *opaque, Error **errp) 1228 { 1229 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 1230 uint64_t value = vmem->size; 1231 1232 visit_type_size(v, name, &value, errp); 1233 } 1234 1235 static void virtio_mem_get_requested_size(Object *obj, Visitor *v, 1236 const char *name, void *opaque, 1237 Error **errp) 1238 { 1239 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 1240 uint64_t value = vmem->requested_size; 1241 1242 visit_type_size(v, name, &value, errp); 1243 } 1244 1245 static void virtio_mem_set_requested_size(Object *obj, Visitor *v, 1246 const char *name, void *opaque, 1247 Error **errp) 1248 { 1249 VirtIOMEM *vmem = VIRTIO_MEM(obj); 1250 uint64_t value; 1251 1252 if (!visit_type_size(v, name, &value, errp)) { 1253 return; 1254 } 1255 1256 /* 1257 * The block size and memory backend are not fixed until the device was 1258 * realized. realize() will verify these properties then. 1259 */ 1260 if (DEVICE(obj)->realized) { 1261 if (!QEMU_IS_ALIGNED(value, vmem->block_size)) { 1262 error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64 1263 ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP, 1264 vmem->block_size); 1265 return; 1266 } else if (value > memory_region_size(&vmem->memdev->mr)) { 1267 error_setg(errp, "'%s' cannot exceed the memory backend size" 1268 "(0x%" PRIx64 ")", name, 1269 memory_region_size(&vmem->memdev->mr)); 1270 return; 1271 } 1272 1273 if (value != vmem->requested_size) { 1274 virtio_mem_resize_usable_region(vmem, value, false); 1275 vmem->requested_size = value; 1276 } 1277 /* 1278 * Trigger a config update so the guest gets notified. We trigger 1279 * even if the size didn't change (especially helpful for debugging). 1280 */ 1281 virtio_notify_config(VIRTIO_DEVICE(vmem)); 1282 } else { 1283 vmem->requested_size = value; 1284 } 1285 } 1286 1287 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name, 1288 void *opaque, Error **errp) 1289 { 1290 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 1291 uint64_t value = vmem->block_size; 1292 1293 /* 1294 * If not configured by the user (and we're not realized yet), use the 1295 * default block size we would use with the current memory backend. 1296 */ 1297 if (!value) { 1298 if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) { 1299 value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block); 1300 } else { 1301 value = virtio_mem_thp_size(); 1302 } 1303 } 1304 1305 visit_type_size(v, name, &value, errp); 1306 } 1307 1308 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name, 1309 void *opaque, Error **errp) 1310 { 1311 VirtIOMEM *vmem = VIRTIO_MEM(obj); 1312 uint64_t value; 1313 1314 if (DEVICE(obj)->realized) { 1315 error_setg(errp, "'%s' cannot be changed", name); 1316 return; 1317 } 1318 1319 if (!visit_type_size(v, name, &value, errp)) { 1320 return; 1321 } 1322 1323 if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) { 1324 error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name, 1325 VIRTIO_MEM_MIN_BLOCK_SIZE); 1326 return; 1327 } else if (!is_power_of_2(value)) { 1328 error_setg(errp, "'%s' property has to be a power of two", name); 1329 return; 1330 } 1331 vmem->block_size = value; 1332 } 1333 1334 static void virtio_mem_instance_init(Object *obj) 1335 { 1336 VirtIOMEM *vmem = VIRTIO_MEM(obj); 1337 1338 notifier_list_init(&vmem->size_change_notifiers); 1339 QLIST_INIT(&vmem->rdl_list); 1340 1341 object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size, 1342 NULL, NULL, NULL); 1343 object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size", 1344 virtio_mem_get_requested_size, 1345 virtio_mem_set_requested_size, NULL, NULL); 1346 object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size", 1347 virtio_mem_get_block_size, virtio_mem_set_block_size, 1348 NULL, NULL); 1349 } 1350 1351 static Property virtio_mem_properties[] = { 1352 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0), 1353 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0), 1354 DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false), 1355 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev, 1356 TYPE_MEMORY_BACKEND, HostMemoryBackend *), 1357 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS) 1358 DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM, 1359 unplugged_inaccessible, ON_OFF_AUTO_ON), 1360 #endif 1361 DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, 1362 early_migration, true), 1363 DEFINE_PROP_END_OF_LIST(), 1364 }; 1365 1366 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm, 1367 const MemoryRegion *mr) 1368 { 1369 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1370 1371 g_assert(mr == &vmem->memdev->mr); 1372 return vmem->block_size; 1373 } 1374 1375 static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, 1376 const MemoryRegionSection *s) 1377 { 1378 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1379 uint64_t start_gpa = vmem->addr + s->offset_within_region; 1380 uint64_t end_gpa = start_gpa + int128_get64(s->size); 1381 1382 g_assert(s->mr == &vmem->memdev->mr); 1383 1384 start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size); 1385 end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size); 1386 1387 if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) { 1388 return false; 1389 } 1390 1391 return virtio_mem_is_range_plugged(vmem, start_gpa, end_gpa - start_gpa); 1392 } 1393 1394 struct VirtIOMEMReplayData { 1395 void *fn; 1396 void *opaque; 1397 }; 1398 1399 static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) 1400 { 1401 struct VirtIOMEMReplayData *data = arg; 1402 1403 return ((ReplayRamPopulate)data->fn)(s, data->opaque); 1404 } 1405 1406 static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, 1407 MemoryRegionSection *s, 1408 ReplayRamPopulate replay_fn, 1409 void *opaque) 1410 { 1411 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1412 struct VirtIOMEMReplayData data = { 1413 .fn = replay_fn, 1414 .opaque = opaque, 1415 }; 1416 1417 g_assert(s->mr == &vmem->memdev->mr); 1418 return virtio_mem_for_each_plugged_section(vmem, s, &data, 1419 virtio_mem_rdm_replay_populated_cb); 1420 } 1421 1422 static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, 1423 void *arg) 1424 { 1425 struct VirtIOMEMReplayData *data = arg; 1426 1427 ((ReplayRamDiscard)data->fn)(s, data->opaque); 1428 return 0; 1429 } 1430 1431 static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, 1432 MemoryRegionSection *s, 1433 ReplayRamDiscard replay_fn, 1434 void *opaque) 1435 { 1436 const VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1437 struct VirtIOMEMReplayData data = { 1438 .fn = replay_fn, 1439 .opaque = opaque, 1440 }; 1441 1442 g_assert(s->mr == &vmem->memdev->mr); 1443 virtio_mem_for_each_unplugged_section(vmem, s, &data, 1444 virtio_mem_rdm_replay_discarded_cb); 1445 } 1446 1447 static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, 1448 RamDiscardListener *rdl, 1449 MemoryRegionSection *s) 1450 { 1451 VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1452 int ret; 1453 1454 g_assert(s->mr == &vmem->memdev->mr); 1455 rdl->section = memory_region_section_new_copy(s); 1456 1457 QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next); 1458 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 1459 virtio_mem_notify_populate_cb); 1460 if (ret) { 1461 error_report("%s: Replaying plugged ranges failed: %s", __func__, 1462 strerror(-ret)); 1463 } 1464 } 1465 1466 static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm, 1467 RamDiscardListener *rdl) 1468 { 1469 VirtIOMEM *vmem = VIRTIO_MEM(rdm); 1470 1471 g_assert(rdl->section->mr == &vmem->memdev->mr); 1472 if (vmem->size) { 1473 if (rdl->double_discard_supported) { 1474 rdl->notify_discard(rdl, rdl->section); 1475 } else { 1476 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, 1477 virtio_mem_notify_discard_cb); 1478 } 1479 } 1480 1481 memory_region_section_free_copy(rdl->section); 1482 rdl->section = NULL; 1483 QLIST_REMOVE(rdl, next); 1484 } 1485 1486 static void virtio_mem_class_init(ObjectClass *klass, void *data) 1487 { 1488 DeviceClass *dc = DEVICE_CLASS(klass); 1489 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 1490 VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass); 1491 RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass); 1492 1493 device_class_set_props(dc, virtio_mem_properties); 1494 dc->vmsd = &vmstate_virtio_mem; 1495 1496 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1497 vdc->realize = virtio_mem_device_realize; 1498 vdc->unrealize = virtio_mem_device_unrealize; 1499 vdc->get_config = virtio_mem_get_config; 1500 vdc->get_features = virtio_mem_get_features; 1501 vdc->validate_features = virtio_mem_validate_features; 1502 vdc->vmsd = &vmstate_virtio_mem_device; 1503 1504 vmc->fill_device_info = virtio_mem_fill_device_info; 1505 vmc->get_memory_region = virtio_mem_get_memory_region; 1506 vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier; 1507 vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; 1508 1509 rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity; 1510 rdmc->is_populated = virtio_mem_rdm_is_populated; 1511 rdmc->replay_populated = virtio_mem_rdm_replay_populated; 1512 rdmc->replay_discarded = virtio_mem_rdm_replay_discarded; 1513 rdmc->register_listener = virtio_mem_rdm_register_listener; 1514 rdmc->unregister_listener = virtio_mem_rdm_unregister_listener; 1515 } 1516 1517 static const TypeInfo virtio_mem_info = { 1518 .name = TYPE_VIRTIO_MEM, 1519 .parent = TYPE_VIRTIO_DEVICE, 1520 .instance_size = sizeof(VirtIOMEM), 1521 .instance_init = virtio_mem_instance_init, 1522 .class_init = virtio_mem_class_init, 1523 .class_size = sizeof(VirtIOMEMClass), 1524 .interfaces = (InterfaceInfo[]) { 1525 { TYPE_RAM_DISCARD_MANAGER }, 1526 { } 1527 }, 1528 }; 1529 1530 static void virtio_register_types(void) 1531 { 1532 type_register_static(&virtio_mem_info); 1533 } 1534 1535 type_init(virtio_register_types) 1536