1 /* 2 * Virtio MEM device 3 * 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qemu-common.h" 15 #include "qemu/iov.h" 16 #include "qemu/cutils.h" 17 #include "qemu/error-report.h" 18 #include "qemu/units.h" 19 #include "sysemu/numa.h" 20 #include "sysemu/sysemu.h" 21 #include "sysemu/reset.h" 22 #include "hw/virtio/virtio.h" 23 #include "hw/virtio/virtio-bus.h" 24 #include "hw/virtio/virtio-access.h" 25 #include "hw/virtio/virtio-mem.h" 26 #include "qapi/error.h" 27 #include "qapi/visitor.h" 28 #include "exec/ram_addr.h" 29 #include "migration/misc.h" 30 #include "hw/boards.h" 31 #include "hw/qdev-properties.h" 32 #include "config-devices.h" 33 #include "trace.h" 34 35 /* 36 * Use QEMU_VMALLOC_ALIGN, so no THP will have to be split when unplugging 37 * memory (e.g., 2MB on x86_64). 38 */ 39 #define VIRTIO_MEM_MIN_BLOCK_SIZE QEMU_VMALLOC_ALIGN 40 /* 41 * Size the usable region bigger than the requested size if possible. Esp. 42 * Linux guests will only add (aligned) memory blocks in case they fully 43 * fit into the usable region, but plug+online only a subset of the pages. 44 * The memory block size corresponds mostly to the section size. 45 * 46 * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and 47 * a section size of 1GB on arm64 (as long as the start address is properly 48 * aligned, similar to ordinary DIMMs). 49 * 50 * We can change this at any time and maybe even make it configurable if 51 * necessary (as the section size can change). But it's more likely that the 52 * section size will rather get smaller and not bigger over time. 53 */ 54 #if defined(TARGET_X86_64) || defined(TARGET_I386) 55 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB)) 56 #else 57 #error VIRTIO_MEM_USABLE_EXTENT not defined 58 #endif 59 60 static bool virtio_mem_is_busy(void) 61 { 62 /* 63 * Postcopy cannot handle concurrent discards and we don't want to migrate 64 * pages on-demand with stale content when plugging new blocks. 65 * 66 * For precopy, we don't want unplugged blocks in our migration stream, and 67 * when plugging new blocks, the page content might differ between source 68 * and destination (observable by the guest when not initializing pages 69 * after plugging them) until we're running on the destination (as we didn't 70 * migrate these blocks when they were unplugged). 71 */ 72 return migration_in_incoming_postcopy() || !migration_is_idle(); 73 } 74 75 static bool virtio_mem_test_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, 76 uint64_t size, bool plugged) 77 { 78 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; 79 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; 80 unsigned long found_bit; 81 82 /* We fake a shorter bitmap to avoid searching too far. */ 83 if (plugged) { 84 found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); 85 } else { 86 found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); 87 } 88 return found_bit > last_bit; 89 } 90 91 static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, 92 uint64_t size, bool plugged) 93 { 94 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; 95 const unsigned long nbits = size / vmem->block_size; 96 97 if (plugged) { 98 bitmap_set(vmem->bitmap, bit, nbits); 99 } else { 100 bitmap_clear(vmem->bitmap, bit, nbits); 101 } 102 } 103 104 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem, 105 struct virtio_mem_resp *resp) 106 { 107 VirtIODevice *vdev = VIRTIO_DEVICE(vmem); 108 VirtQueue *vq = vmem->vq; 109 110 trace_virtio_mem_send_response(le16_to_cpu(resp->type)); 111 iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp)); 112 113 virtqueue_push(vq, elem, sizeof(*resp)); 114 virtio_notify(vdev, vq); 115 } 116 117 static void virtio_mem_send_response_simple(VirtIOMEM *vmem, 118 VirtQueueElement *elem, 119 uint16_t type) 120 { 121 struct virtio_mem_resp resp = { 122 .type = cpu_to_le16(type), 123 }; 124 125 virtio_mem_send_response(vmem, elem, &resp); 126 } 127 128 static bool virtio_mem_valid_range(VirtIOMEM *vmem, uint64_t gpa, uint64_t size) 129 { 130 if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) { 131 return false; 132 } 133 if (gpa + size < gpa || !size) { 134 return false; 135 } 136 if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) { 137 return false; 138 } 139 if (gpa + size > vmem->addr + vmem->usable_region_size) { 140 return false; 141 } 142 return true; 143 } 144 145 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, 146 uint64_t size, bool plug) 147 { 148 const uint64_t offset = start_gpa - vmem->addr; 149 int ret; 150 151 if (virtio_mem_is_busy()) { 152 return -EBUSY; 153 } 154 155 if (!plug) { 156 ret = ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); 157 if (ret) { 158 error_report("Unexpected error discarding RAM: %s", 159 strerror(-ret)); 160 return -EBUSY; 161 } 162 } 163 virtio_mem_set_bitmap(vmem, start_gpa, size, plug); 164 return 0; 165 } 166 167 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa, 168 uint16_t nb_blocks, bool plug) 169 { 170 const uint64_t size = nb_blocks * vmem->block_size; 171 int ret; 172 173 if (!virtio_mem_valid_range(vmem, gpa, size)) { 174 return VIRTIO_MEM_RESP_ERROR; 175 } 176 177 if (plug && (vmem->size + size > vmem->requested_size)) { 178 return VIRTIO_MEM_RESP_NACK; 179 } 180 181 /* test if really all blocks are in the opposite state */ 182 if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) { 183 return VIRTIO_MEM_RESP_ERROR; 184 } 185 186 ret = virtio_mem_set_block_state(vmem, gpa, size, plug); 187 if (ret) { 188 return VIRTIO_MEM_RESP_BUSY; 189 } 190 if (plug) { 191 vmem->size += size; 192 } else { 193 vmem->size -= size; 194 } 195 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 196 return VIRTIO_MEM_RESP_ACK; 197 } 198 199 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 200 struct virtio_mem_req *req) 201 { 202 const uint64_t gpa = le64_to_cpu(req->u.plug.addr); 203 const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks); 204 uint16_t type; 205 206 trace_virtio_mem_plug_request(gpa, nb_blocks); 207 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true); 208 virtio_mem_send_response_simple(vmem, elem, type); 209 } 210 211 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 212 struct virtio_mem_req *req) 213 { 214 const uint64_t gpa = le64_to_cpu(req->u.unplug.addr); 215 const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks); 216 uint16_t type; 217 218 trace_virtio_mem_unplug_request(gpa, nb_blocks); 219 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false); 220 virtio_mem_send_response_simple(vmem, elem, type); 221 } 222 223 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem, 224 uint64_t requested_size, 225 bool can_shrink) 226 { 227 uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr), 228 requested_size + VIRTIO_MEM_USABLE_EXTENT); 229 230 if (!requested_size) { 231 newsize = 0; 232 } 233 234 if (newsize < vmem->usable_region_size && !can_shrink) { 235 return; 236 } 237 238 trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize); 239 vmem->usable_region_size = newsize; 240 } 241 242 static int virtio_mem_unplug_all(VirtIOMEM *vmem) 243 { 244 RAMBlock *rb = vmem->memdev->mr.ram_block; 245 int ret; 246 247 if (virtio_mem_is_busy()) { 248 return -EBUSY; 249 } 250 251 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); 252 if (ret) { 253 error_report("Unexpected error discarding RAM: %s", strerror(-ret)); 254 return -EBUSY; 255 } 256 bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); 257 if (vmem->size) { 258 vmem->size = 0; 259 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 260 } 261 trace_virtio_mem_unplugged_all(); 262 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 263 return 0; 264 } 265 266 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem, 267 VirtQueueElement *elem) 268 { 269 trace_virtio_mem_unplug_all_request(); 270 if (virtio_mem_unplug_all(vmem)) { 271 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY); 272 } else { 273 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK); 274 } 275 } 276 277 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem, 278 struct virtio_mem_req *req) 279 { 280 const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks); 281 const uint64_t gpa = le64_to_cpu(req->u.state.addr); 282 const uint64_t size = nb_blocks * vmem->block_size; 283 struct virtio_mem_resp resp = { 284 .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK), 285 }; 286 287 trace_virtio_mem_state_request(gpa, nb_blocks); 288 if (!virtio_mem_valid_range(vmem, gpa, size)) { 289 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR); 290 return; 291 } 292 293 if (virtio_mem_test_bitmap(vmem, gpa, size, true)) { 294 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED); 295 } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) { 296 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED); 297 } else { 298 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED); 299 } 300 trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state)); 301 virtio_mem_send_response(vmem, elem, &resp); 302 } 303 304 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq) 305 { 306 const int len = sizeof(struct virtio_mem_req); 307 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 308 VirtQueueElement *elem; 309 struct virtio_mem_req req; 310 uint16_t type; 311 312 while (true) { 313 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 314 if (!elem) { 315 return; 316 } 317 318 if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) { 319 virtio_error(vdev, "virtio-mem protocol violation: invalid request" 320 " size: %d", len); 321 g_free(elem); 322 return; 323 } 324 325 if (iov_size(elem->in_sg, elem->in_num) < 326 sizeof(struct virtio_mem_resp)) { 327 virtio_error(vdev, "virtio-mem protocol violation: not enough space" 328 " for response: %zu", 329 iov_size(elem->in_sg, elem->in_num)); 330 g_free(elem); 331 return; 332 } 333 334 type = le16_to_cpu(req.type); 335 switch (type) { 336 case VIRTIO_MEM_REQ_PLUG: 337 virtio_mem_plug_request(vmem, elem, &req); 338 break; 339 case VIRTIO_MEM_REQ_UNPLUG: 340 virtio_mem_unplug_request(vmem, elem, &req); 341 break; 342 case VIRTIO_MEM_REQ_UNPLUG_ALL: 343 virtio_mem_unplug_all_request(vmem, elem); 344 break; 345 case VIRTIO_MEM_REQ_STATE: 346 virtio_mem_state_request(vmem, elem, &req); 347 break; 348 default: 349 virtio_error(vdev, "virtio-mem protocol violation: unknown request" 350 " type: %d", type); 351 g_free(elem); 352 return; 353 } 354 355 g_free(elem); 356 } 357 } 358 359 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data) 360 { 361 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 362 struct virtio_mem_config *config = (void *) config_data; 363 364 config->block_size = cpu_to_le64(vmem->block_size); 365 config->node_id = cpu_to_le16(vmem->node); 366 config->requested_size = cpu_to_le64(vmem->requested_size); 367 config->plugged_size = cpu_to_le64(vmem->size); 368 config->addr = cpu_to_le64(vmem->addr); 369 config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr)); 370 config->usable_region_size = cpu_to_le64(vmem->usable_region_size); 371 } 372 373 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features, 374 Error **errp) 375 { 376 MachineState *ms = MACHINE(qdev_get_machine()); 377 378 if (ms->numa_state) { 379 #if defined(CONFIG_ACPI) 380 virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM); 381 #endif 382 } 383 return features; 384 } 385 386 static void virtio_mem_system_reset(void *opaque) 387 { 388 VirtIOMEM *vmem = VIRTIO_MEM(opaque); 389 390 /* 391 * During usual resets, we will unplug all memory and shrink the usable 392 * region size. This is, however, not possible in all scenarios. Then, 393 * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL). 394 */ 395 virtio_mem_unplug_all(vmem); 396 } 397 398 static void virtio_mem_device_realize(DeviceState *dev, Error **errp) 399 { 400 MachineState *ms = MACHINE(qdev_get_machine()); 401 int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0; 402 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 403 VirtIOMEM *vmem = VIRTIO_MEM(dev); 404 uint64_t page_size; 405 RAMBlock *rb; 406 int ret; 407 408 if (!vmem->memdev) { 409 error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP); 410 return; 411 } else if (host_memory_backend_is_mapped(vmem->memdev)) { 412 char *path = object_get_canonical_path_component(OBJECT(vmem->memdev)); 413 414 error_setg(errp, "'%s' property specifies a busy memdev: %s", 415 VIRTIO_MEM_MEMDEV_PROP, path); 416 g_free(path); 417 return; 418 } else if (!memory_region_is_ram(&vmem->memdev->mr) || 419 memory_region_is_rom(&vmem->memdev->mr) || 420 !vmem->memdev->mr.ram_block) { 421 error_setg(errp, "'%s' property specifies an unsupported memdev", 422 VIRTIO_MEM_MEMDEV_PROP); 423 return; 424 } 425 426 if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) || 427 (!nb_numa_nodes && vmem->node)) { 428 error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds" 429 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP, 430 vmem->node, nb_numa_nodes ? nb_numa_nodes : 1); 431 return; 432 } 433 434 if (enable_mlock) { 435 error_setg(errp, "Incompatible with mlock"); 436 return; 437 } 438 439 rb = vmem->memdev->mr.ram_block; 440 page_size = qemu_ram_pagesize(rb); 441 442 if (vmem->block_size < page_size) { 443 error_setg(errp, "'%s' property has to be at least the page size (0x%" 444 PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size); 445 return; 446 } else if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) { 447 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64 448 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP, 449 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 450 return; 451 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr), 452 vmem->block_size)) { 453 error_setg(errp, "'%s' property memdev size has to be multiples of" 454 "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP, 455 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 456 return; 457 } 458 459 if (ram_block_discard_require(true)) { 460 error_setg(errp, "Discarding RAM is disabled"); 461 return; 462 } 463 464 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); 465 if (ret) { 466 error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); 467 ram_block_discard_require(false); 468 return; 469 } 470 471 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 472 473 vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) / 474 vmem->block_size; 475 vmem->bitmap = bitmap_new(vmem->bitmap_size); 476 477 virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM, 478 sizeof(struct virtio_mem_config)); 479 vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request); 480 481 host_memory_backend_set_mapped(vmem->memdev, true); 482 vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem)); 483 qemu_register_reset(virtio_mem_system_reset, vmem); 484 precopy_add_notifier(&vmem->precopy_notifier); 485 } 486 487 static void virtio_mem_device_unrealize(DeviceState *dev) 488 { 489 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 490 VirtIOMEM *vmem = VIRTIO_MEM(dev); 491 492 precopy_remove_notifier(&vmem->precopy_notifier); 493 qemu_unregister_reset(virtio_mem_system_reset, vmem); 494 vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem)); 495 host_memory_backend_set_mapped(vmem->memdev, false); 496 virtio_del_queue(vdev, 0); 497 virtio_cleanup(vdev); 498 g_free(vmem->bitmap); 499 ram_block_discard_require(false); 500 } 501 502 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem) 503 { 504 RAMBlock *rb = vmem->memdev->mr.ram_block; 505 unsigned long first_zero_bit, last_zero_bit; 506 uint64_t offset, length; 507 int ret; 508 509 /* Find consecutive unplugged blocks and discard the consecutive range. */ 510 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); 511 while (first_zero_bit < vmem->bitmap_size) { 512 offset = first_zero_bit * vmem->block_size; 513 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 514 first_zero_bit + 1) - 1; 515 length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; 516 517 ret = ram_block_discard_range(rb, offset, length); 518 if (ret) { 519 error_report("Unexpected error discarding RAM: %s", 520 strerror(-ret)); 521 return -EINVAL; 522 } 523 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 524 last_zero_bit + 2); 525 } 526 return 0; 527 } 528 529 static int virtio_mem_post_load(void *opaque, int version_id) 530 { 531 if (migration_in_incoming_postcopy()) { 532 return 0; 533 } 534 535 return virtio_mem_restore_unplugged(VIRTIO_MEM(opaque)); 536 } 537 538 typedef struct VirtIOMEMMigSanityChecks { 539 VirtIOMEM *parent; 540 uint64_t addr; 541 uint64_t region_size; 542 uint64_t block_size; 543 uint32_t node; 544 } VirtIOMEMMigSanityChecks; 545 546 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque) 547 { 548 VirtIOMEMMigSanityChecks *tmp = opaque; 549 VirtIOMEM *vmem = tmp->parent; 550 551 tmp->addr = vmem->addr; 552 tmp->region_size = memory_region_size(&vmem->memdev->mr); 553 tmp->block_size = vmem->block_size; 554 tmp->node = vmem->node; 555 return 0; 556 } 557 558 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id) 559 { 560 VirtIOMEMMigSanityChecks *tmp = opaque; 561 VirtIOMEM *vmem = tmp->parent; 562 const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr); 563 564 if (tmp->addr != vmem->addr) { 565 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 566 VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr); 567 return -EINVAL; 568 } 569 /* 570 * Note: Preparation for resizeable memory regions. The maximum size 571 * of the memory region must not change during migration. 572 */ 573 if (tmp->region_size != new_region_size) { 574 error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%" 575 PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size, 576 new_region_size); 577 return -EINVAL; 578 } 579 if (tmp->block_size != vmem->block_size) { 580 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 581 VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size, 582 vmem->block_size); 583 return -EINVAL; 584 } 585 if (tmp->node != vmem->node) { 586 error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32, 587 VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node); 588 return -EINVAL; 589 } 590 return 0; 591 } 592 593 static const VMStateDescription vmstate_virtio_mem_sanity_checks = { 594 .name = "virtio-mem-device/sanity-checks", 595 .pre_save = virtio_mem_mig_sanity_checks_pre_save, 596 .post_load = virtio_mem_mig_sanity_checks_post_load, 597 .fields = (VMStateField[]) { 598 VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks), 599 VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks), 600 VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks), 601 VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks), 602 VMSTATE_END_OF_LIST(), 603 }, 604 }; 605 606 static const VMStateDescription vmstate_virtio_mem_device = { 607 .name = "virtio-mem-device", 608 .minimum_version_id = 1, 609 .version_id = 1, 610 .post_load = virtio_mem_post_load, 611 .fields = (VMStateField[]) { 612 VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks, 613 vmstate_virtio_mem_sanity_checks), 614 VMSTATE_UINT64(usable_region_size, VirtIOMEM), 615 VMSTATE_UINT64(size, VirtIOMEM), 616 VMSTATE_UINT64(requested_size, VirtIOMEM), 617 VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size), 618 VMSTATE_END_OF_LIST() 619 }, 620 }; 621 622 static const VMStateDescription vmstate_virtio_mem = { 623 .name = "virtio-mem", 624 .minimum_version_id = 1, 625 .version_id = 1, 626 .fields = (VMStateField[]) { 627 VMSTATE_VIRTIO_DEVICE, 628 VMSTATE_END_OF_LIST() 629 }, 630 }; 631 632 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem, 633 VirtioMEMDeviceInfo *vi) 634 { 635 vi->memaddr = vmem->addr; 636 vi->node = vmem->node; 637 vi->requested_size = vmem->requested_size; 638 vi->size = vmem->size; 639 vi->max_size = memory_region_size(&vmem->memdev->mr); 640 vi->block_size = vmem->block_size; 641 vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev)); 642 } 643 644 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp) 645 { 646 if (!vmem->memdev) { 647 error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP); 648 return NULL; 649 } 650 651 return &vmem->memdev->mr; 652 } 653 654 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem, 655 Notifier *notifier) 656 { 657 notifier_list_add(&vmem->size_change_notifiers, notifier); 658 } 659 660 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem, 661 Notifier *notifier) 662 { 663 notifier_remove(notifier); 664 } 665 666 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name, 667 void *opaque, Error **errp) 668 { 669 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 670 uint64_t value = vmem->size; 671 672 visit_type_size(v, name, &value, errp); 673 } 674 675 static void virtio_mem_get_requested_size(Object *obj, Visitor *v, 676 const char *name, void *opaque, 677 Error **errp) 678 { 679 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 680 uint64_t value = vmem->requested_size; 681 682 visit_type_size(v, name, &value, errp); 683 } 684 685 static void virtio_mem_set_requested_size(Object *obj, Visitor *v, 686 const char *name, void *opaque, 687 Error **errp) 688 { 689 VirtIOMEM *vmem = VIRTIO_MEM(obj); 690 Error *err = NULL; 691 uint64_t value; 692 693 visit_type_size(v, name, &value, &err); 694 if (err) { 695 error_propagate(errp, err); 696 return; 697 } 698 699 /* 700 * The block size and memory backend are not fixed until the device was 701 * realized. realize() will verify these properties then. 702 */ 703 if (DEVICE(obj)->realized) { 704 if (!QEMU_IS_ALIGNED(value, vmem->block_size)) { 705 error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64 706 ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP, 707 vmem->block_size); 708 return; 709 } else if (value > memory_region_size(&vmem->memdev->mr)) { 710 error_setg(errp, "'%s' cannot exceed the memory backend size" 711 "(0x%" PRIx64 ")", name, 712 memory_region_size(&vmem->memdev->mr)); 713 return; 714 } 715 716 if (value != vmem->requested_size) { 717 virtio_mem_resize_usable_region(vmem, value, false); 718 vmem->requested_size = value; 719 } 720 /* 721 * Trigger a config update so the guest gets notified. We trigger 722 * even if the size didn't change (especially helpful for debugging). 723 */ 724 virtio_notify_config(VIRTIO_DEVICE(vmem)); 725 } else { 726 vmem->requested_size = value; 727 } 728 } 729 730 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name, 731 void *opaque, Error **errp) 732 { 733 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 734 uint64_t value = vmem->block_size; 735 736 visit_type_size(v, name, &value, errp); 737 } 738 739 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name, 740 void *opaque, Error **errp) 741 { 742 VirtIOMEM *vmem = VIRTIO_MEM(obj); 743 Error *err = NULL; 744 uint64_t value; 745 746 if (DEVICE(obj)->realized) { 747 error_setg(errp, "'%s' cannot be changed", name); 748 return; 749 } 750 751 visit_type_size(v, name, &value, &err); 752 if (err) { 753 error_propagate(errp, err); 754 return; 755 } 756 757 if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) { 758 error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name, 759 VIRTIO_MEM_MIN_BLOCK_SIZE); 760 return; 761 } else if (!is_power_of_2(value)) { 762 error_setg(errp, "'%s' property has to be a power of two", name); 763 return; 764 } 765 vmem->block_size = value; 766 } 767 768 static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem) 769 { 770 void * const host = qemu_ram_get_host_addr(vmem->memdev->mr.ram_block); 771 unsigned long first_zero_bit, last_zero_bit; 772 uint64_t offset, length; 773 774 /* 775 * Find consecutive unplugged blocks and exclude them from migration. 776 * 777 * Note: Blocks cannot get (un)plugged during precopy, no locking needed. 778 */ 779 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); 780 while (first_zero_bit < vmem->bitmap_size) { 781 offset = first_zero_bit * vmem->block_size; 782 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 783 first_zero_bit + 1) - 1; 784 length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; 785 786 qemu_guest_free_page_hint(host + offset, length); 787 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 788 last_zero_bit + 2); 789 } 790 } 791 792 static int virtio_mem_precopy_notify(NotifierWithReturn *n, void *data) 793 { 794 VirtIOMEM *vmem = container_of(n, VirtIOMEM, precopy_notifier); 795 PrecopyNotifyData *pnd = data; 796 797 switch (pnd->reason) { 798 case PRECOPY_NOTIFY_SETUP: 799 precopy_enable_free_page_optimization(); 800 break; 801 case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC: 802 virtio_mem_precopy_exclude_unplugged(vmem); 803 break; 804 default: 805 break; 806 } 807 808 return 0; 809 } 810 811 static void virtio_mem_instance_init(Object *obj) 812 { 813 VirtIOMEM *vmem = VIRTIO_MEM(obj); 814 815 vmem->block_size = VIRTIO_MEM_MIN_BLOCK_SIZE; 816 notifier_list_init(&vmem->size_change_notifiers); 817 vmem->precopy_notifier.notify = virtio_mem_precopy_notify; 818 819 object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size, 820 NULL, NULL, NULL); 821 object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size", 822 virtio_mem_get_requested_size, 823 virtio_mem_set_requested_size, NULL, NULL); 824 object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size", 825 virtio_mem_get_block_size, virtio_mem_set_block_size, 826 NULL, NULL); 827 } 828 829 static Property virtio_mem_properties[] = { 830 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0), 831 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0), 832 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev, 833 TYPE_MEMORY_BACKEND, HostMemoryBackend *), 834 DEFINE_PROP_END_OF_LIST(), 835 }; 836 837 static void virtio_mem_class_init(ObjectClass *klass, void *data) 838 { 839 DeviceClass *dc = DEVICE_CLASS(klass); 840 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 841 VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass); 842 843 device_class_set_props(dc, virtio_mem_properties); 844 dc->vmsd = &vmstate_virtio_mem; 845 846 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 847 vdc->realize = virtio_mem_device_realize; 848 vdc->unrealize = virtio_mem_device_unrealize; 849 vdc->get_config = virtio_mem_get_config; 850 vdc->get_features = virtio_mem_get_features; 851 vdc->vmsd = &vmstate_virtio_mem_device; 852 853 vmc->fill_device_info = virtio_mem_fill_device_info; 854 vmc->get_memory_region = virtio_mem_get_memory_region; 855 vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier; 856 vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; 857 } 858 859 static const TypeInfo virtio_mem_info = { 860 .name = TYPE_VIRTIO_MEM, 861 .parent = TYPE_VIRTIO_DEVICE, 862 .instance_size = sizeof(VirtIOMEM), 863 .instance_init = virtio_mem_instance_init, 864 .class_init = virtio_mem_class_init, 865 .class_size = sizeof(VirtIOMEMClass), 866 }; 867 868 static void virtio_register_types(void) 869 { 870 type_register_static(&virtio_mem_info); 871 } 872 873 type_init(virtio_register_types) 874