1 /* 2 * Virtio MEM device 3 * 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qemu-common.h" 15 #include "qemu/iov.h" 16 #include "qemu/cutils.h" 17 #include "qemu/error-report.h" 18 #include "qemu/units.h" 19 #include "sysemu/numa.h" 20 #include "sysemu/sysemu.h" 21 #include "sysemu/reset.h" 22 #include "hw/virtio/virtio.h" 23 #include "hw/virtio/virtio-bus.h" 24 #include "hw/virtio/virtio-access.h" 25 #include "hw/virtio/virtio-mem.h" 26 #include "qapi/error.h" 27 #include "qapi/visitor.h" 28 #include "exec/ram_addr.h" 29 #include "migration/misc.h" 30 #include "hw/boards.h" 31 #include "hw/qdev-properties.h" 32 #include "config-devices.h" 33 #include "trace.h" 34 35 /* 36 * Use QEMU_VMALLOC_ALIGN, so no THP will have to be split when unplugging 37 * memory (e.g., 2MB on x86_64). 38 */ 39 #define VIRTIO_MEM_MIN_BLOCK_SIZE QEMU_VMALLOC_ALIGN 40 /* 41 * Size the usable region bigger than the requested size if possible. Esp. 42 * Linux guests will only add (aligned) memory blocks in case they fully 43 * fit into the usable region, but plug+online only a subset of the pages. 44 * The memory block size corresponds mostly to the section size. 45 * 46 * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and 47 * a section size of 1GB on arm64 (as long as the start address is properly 48 * aligned, similar to ordinary DIMMs). 49 * 50 * We can change this at any time and maybe even make it configurable if 51 * necessary (as the section size can change). But it's more likely that the 52 * section size will rather get smaller and not bigger over time. 53 */ 54 #if defined(TARGET_X86_64) || defined(TARGET_I386) 55 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB)) 56 #else 57 #error VIRTIO_MEM_USABLE_EXTENT not defined 58 #endif 59 60 static bool virtio_mem_is_busy(void) 61 { 62 /* 63 * Postcopy cannot handle concurrent discards and we don't want to migrate 64 * pages on-demand with stale content when plugging new blocks. 65 * 66 * For precopy, we don't want unplugged blocks in our migration stream, and 67 * when plugging new blocks, the page content might differ between source 68 * and destination (observable by the guest when not initializing pages 69 * after plugging them) until we're running on the destination (as we didn't 70 * migrate these blocks when they were unplugged). 71 */ 72 return migration_in_incoming_postcopy() || !migration_is_idle(); 73 } 74 75 static bool virtio_mem_test_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, 76 uint64_t size, bool plugged) 77 { 78 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; 79 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; 80 unsigned long found_bit; 81 82 /* We fake a shorter bitmap to avoid searching too far. */ 83 if (plugged) { 84 found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); 85 } else { 86 found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); 87 } 88 return found_bit > last_bit; 89 } 90 91 static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, 92 uint64_t size, bool plugged) 93 { 94 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; 95 const unsigned long nbits = size / vmem->block_size; 96 97 if (plugged) { 98 bitmap_set(vmem->bitmap, bit, nbits); 99 } else { 100 bitmap_clear(vmem->bitmap, bit, nbits); 101 } 102 } 103 104 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem, 105 struct virtio_mem_resp *resp) 106 { 107 VirtIODevice *vdev = VIRTIO_DEVICE(vmem); 108 VirtQueue *vq = vmem->vq; 109 110 trace_virtio_mem_send_response(le16_to_cpu(resp->type)); 111 iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp)); 112 113 virtqueue_push(vq, elem, sizeof(*resp)); 114 virtio_notify(vdev, vq); 115 } 116 117 static void virtio_mem_send_response_simple(VirtIOMEM *vmem, 118 VirtQueueElement *elem, 119 uint16_t type) 120 { 121 struct virtio_mem_resp resp = { 122 .type = cpu_to_le16(type), 123 }; 124 125 virtio_mem_send_response(vmem, elem, &resp); 126 } 127 128 static bool virtio_mem_valid_range(VirtIOMEM *vmem, uint64_t gpa, uint64_t size) 129 { 130 if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) { 131 return false; 132 } 133 if (gpa + size < gpa || !size) { 134 return false; 135 } 136 if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) { 137 return false; 138 } 139 if (gpa + size > vmem->addr + vmem->usable_region_size) { 140 return false; 141 } 142 return true; 143 } 144 145 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, 146 uint64_t size, bool plug) 147 { 148 const uint64_t offset = start_gpa - vmem->addr; 149 int ret; 150 151 if (virtio_mem_is_busy()) { 152 return -EBUSY; 153 } 154 155 if (!plug) { 156 ret = ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); 157 if (ret) { 158 error_report("Unexpected error discarding RAM: %s", 159 strerror(-ret)); 160 return -EBUSY; 161 } 162 } 163 virtio_mem_set_bitmap(vmem, start_gpa, size, plug); 164 return 0; 165 } 166 167 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa, 168 uint16_t nb_blocks, bool plug) 169 { 170 const uint64_t size = nb_blocks * vmem->block_size; 171 int ret; 172 173 if (!virtio_mem_valid_range(vmem, gpa, size)) { 174 return VIRTIO_MEM_RESP_ERROR; 175 } 176 177 if (plug && (vmem->size + size > vmem->requested_size)) { 178 return VIRTIO_MEM_RESP_NACK; 179 } 180 181 /* test if really all blocks are in the opposite state */ 182 if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) { 183 return VIRTIO_MEM_RESP_ERROR; 184 } 185 186 ret = virtio_mem_set_block_state(vmem, gpa, size, plug); 187 if (ret) { 188 return VIRTIO_MEM_RESP_BUSY; 189 } 190 if (plug) { 191 vmem->size += size; 192 } else { 193 vmem->size -= size; 194 } 195 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 196 return VIRTIO_MEM_RESP_ACK; 197 } 198 199 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 200 struct virtio_mem_req *req) 201 { 202 const uint64_t gpa = le64_to_cpu(req->u.plug.addr); 203 const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks); 204 uint16_t type; 205 206 trace_virtio_mem_plug_request(gpa, nb_blocks); 207 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true); 208 virtio_mem_send_response_simple(vmem, elem, type); 209 } 210 211 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 212 struct virtio_mem_req *req) 213 { 214 const uint64_t gpa = le64_to_cpu(req->u.unplug.addr); 215 const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks); 216 uint16_t type; 217 218 trace_virtio_mem_unplug_request(gpa, nb_blocks); 219 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false); 220 virtio_mem_send_response_simple(vmem, elem, type); 221 } 222 223 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem, 224 uint64_t requested_size, 225 bool can_shrink) 226 { 227 uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr), 228 requested_size + VIRTIO_MEM_USABLE_EXTENT); 229 230 if (!requested_size) { 231 newsize = 0; 232 } 233 234 if (newsize < vmem->usable_region_size && !can_shrink) { 235 return; 236 } 237 238 trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize); 239 vmem->usable_region_size = newsize; 240 } 241 242 static int virtio_mem_unplug_all(VirtIOMEM *vmem) 243 { 244 RAMBlock *rb = vmem->memdev->mr.ram_block; 245 int ret; 246 247 if (virtio_mem_is_busy()) { 248 return -EBUSY; 249 } 250 251 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); 252 if (ret) { 253 error_report("Unexpected error discarding RAM: %s", strerror(-ret)); 254 return -EBUSY; 255 } 256 bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); 257 if (vmem->size) { 258 vmem->size = 0; 259 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 260 } 261 trace_virtio_mem_unplugged_all(); 262 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 263 return 0; 264 } 265 266 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem, 267 VirtQueueElement *elem) 268 { 269 trace_virtio_mem_unplug_all_request(); 270 if (virtio_mem_unplug_all(vmem)) { 271 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY); 272 } else { 273 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK); 274 } 275 } 276 277 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem, 278 struct virtio_mem_req *req) 279 { 280 const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks); 281 const uint64_t gpa = le64_to_cpu(req->u.state.addr); 282 const uint64_t size = nb_blocks * vmem->block_size; 283 struct virtio_mem_resp resp = { 284 .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK), 285 }; 286 287 trace_virtio_mem_state_request(gpa, nb_blocks); 288 if (!virtio_mem_valid_range(vmem, gpa, size)) { 289 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR); 290 return; 291 } 292 293 if (virtio_mem_test_bitmap(vmem, gpa, size, true)) { 294 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED); 295 } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) { 296 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED); 297 } else { 298 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED); 299 } 300 trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state)); 301 virtio_mem_send_response(vmem, elem, &resp); 302 } 303 304 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq) 305 { 306 const int len = sizeof(struct virtio_mem_req); 307 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 308 VirtQueueElement *elem; 309 struct virtio_mem_req req; 310 uint16_t type; 311 312 while (true) { 313 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 314 if (!elem) { 315 return; 316 } 317 318 if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) { 319 virtio_error(vdev, "virtio-mem protocol violation: invalid request" 320 " size: %d", len); 321 g_free(elem); 322 return; 323 } 324 325 if (iov_size(elem->in_sg, elem->in_num) < 326 sizeof(struct virtio_mem_resp)) { 327 virtio_error(vdev, "virtio-mem protocol violation: not enough space" 328 " for response: %zu", 329 iov_size(elem->in_sg, elem->in_num)); 330 g_free(elem); 331 return; 332 } 333 334 type = le16_to_cpu(req.type); 335 switch (type) { 336 case VIRTIO_MEM_REQ_PLUG: 337 virtio_mem_plug_request(vmem, elem, &req); 338 break; 339 case VIRTIO_MEM_REQ_UNPLUG: 340 virtio_mem_unplug_request(vmem, elem, &req); 341 break; 342 case VIRTIO_MEM_REQ_UNPLUG_ALL: 343 virtio_mem_unplug_all_request(vmem, elem); 344 break; 345 case VIRTIO_MEM_REQ_STATE: 346 virtio_mem_state_request(vmem, elem, &req); 347 break; 348 default: 349 virtio_error(vdev, "virtio-mem protocol violation: unknown request" 350 " type: %d", type); 351 g_free(elem); 352 return; 353 } 354 355 g_free(elem); 356 } 357 } 358 359 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data) 360 { 361 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 362 struct virtio_mem_config *config = (void *) config_data; 363 364 config->block_size = cpu_to_le64(vmem->block_size); 365 config->node_id = cpu_to_le16(vmem->node); 366 config->requested_size = cpu_to_le64(vmem->requested_size); 367 config->plugged_size = cpu_to_le64(vmem->size); 368 config->addr = cpu_to_le64(vmem->addr); 369 config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr)); 370 config->usable_region_size = cpu_to_le64(vmem->usable_region_size); 371 } 372 373 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features, 374 Error **errp) 375 { 376 MachineState *ms = MACHINE(qdev_get_machine()); 377 378 if (ms->numa_state) { 379 #if defined(CONFIG_ACPI) 380 virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM); 381 #endif 382 } 383 return features; 384 } 385 386 static void virtio_mem_system_reset(void *opaque) 387 { 388 VirtIOMEM *vmem = VIRTIO_MEM(opaque); 389 390 /* 391 * During usual resets, we will unplug all memory and shrink the usable 392 * region size. This is, however, not possible in all scenarios. Then, 393 * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL). 394 */ 395 virtio_mem_unplug_all(vmem); 396 } 397 398 static void virtio_mem_device_realize(DeviceState *dev, Error **errp) 399 { 400 MachineState *ms = MACHINE(qdev_get_machine()); 401 int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0; 402 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 403 VirtIOMEM *vmem = VIRTIO_MEM(dev); 404 uint64_t page_size; 405 RAMBlock *rb; 406 int ret; 407 408 if (!vmem->memdev) { 409 error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP); 410 return; 411 } else if (host_memory_backend_is_mapped(vmem->memdev)) { 412 error_setg(errp, "'%s' property specifies a busy memdev: %s", 413 VIRTIO_MEM_MEMDEV_PROP, 414 object_get_canonical_path_component(OBJECT(vmem->memdev))); 415 return; 416 } else if (!memory_region_is_ram(&vmem->memdev->mr) || 417 memory_region_is_rom(&vmem->memdev->mr) || 418 !vmem->memdev->mr.ram_block) { 419 error_setg(errp, "'%s' property specifies an unsupported memdev", 420 VIRTIO_MEM_MEMDEV_PROP); 421 return; 422 } 423 424 if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) || 425 (!nb_numa_nodes && vmem->node)) { 426 error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds" 427 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP, 428 vmem->node, nb_numa_nodes ? nb_numa_nodes : 1); 429 return; 430 } 431 432 if (enable_mlock) { 433 error_setg(errp, "Incompatible with mlock"); 434 return; 435 } 436 437 rb = vmem->memdev->mr.ram_block; 438 page_size = qemu_ram_pagesize(rb); 439 440 if (vmem->block_size < page_size) { 441 error_setg(errp, "'%s' property has to be at least the page size (0x%" 442 PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size); 443 return; 444 } else if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) { 445 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64 446 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP, 447 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 448 return; 449 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr), 450 vmem->block_size)) { 451 error_setg(errp, "'%s' property memdev size has to be multiples of" 452 "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP, 453 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 454 return; 455 } 456 457 if (ram_block_discard_require(true)) { 458 error_setg(errp, "Discarding RAM is disabled"); 459 return; 460 } 461 462 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); 463 if (ret) { 464 error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); 465 ram_block_discard_require(false); 466 return; 467 } 468 469 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 470 471 vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) / 472 vmem->block_size; 473 vmem->bitmap = bitmap_new(vmem->bitmap_size); 474 475 virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM, 476 sizeof(struct virtio_mem_config)); 477 vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request); 478 479 host_memory_backend_set_mapped(vmem->memdev, true); 480 vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem)); 481 qemu_register_reset(virtio_mem_system_reset, vmem); 482 precopy_add_notifier(&vmem->precopy_notifier); 483 } 484 485 static void virtio_mem_device_unrealize(DeviceState *dev) 486 { 487 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 488 VirtIOMEM *vmem = VIRTIO_MEM(dev); 489 490 precopy_remove_notifier(&vmem->precopy_notifier); 491 qemu_unregister_reset(virtio_mem_system_reset, vmem); 492 vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem)); 493 host_memory_backend_set_mapped(vmem->memdev, false); 494 virtio_del_queue(vdev, 0); 495 virtio_cleanup(vdev); 496 g_free(vmem->bitmap); 497 ram_block_discard_require(false); 498 } 499 500 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem) 501 { 502 RAMBlock *rb = vmem->memdev->mr.ram_block; 503 unsigned long first_zero_bit, last_zero_bit; 504 uint64_t offset, length; 505 int ret; 506 507 /* Find consecutive unplugged blocks and discard the consecutive range. */ 508 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); 509 while (first_zero_bit < vmem->bitmap_size) { 510 offset = first_zero_bit * vmem->block_size; 511 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 512 first_zero_bit + 1) - 1; 513 length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; 514 515 ret = ram_block_discard_range(rb, offset, length); 516 if (ret) { 517 error_report("Unexpected error discarding RAM: %s", 518 strerror(-ret)); 519 return -EINVAL; 520 } 521 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 522 last_zero_bit + 2); 523 } 524 return 0; 525 } 526 527 static int virtio_mem_post_load(void *opaque, int version_id) 528 { 529 if (migration_in_incoming_postcopy()) { 530 return 0; 531 } 532 533 return virtio_mem_restore_unplugged(VIRTIO_MEM(opaque)); 534 } 535 536 typedef struct VirtIOMEMMigSanityChecks { 537 VirtIOMEM *parent; 538 uint64_t addr; 539 uint64_t region_size; 540 uint64_t block_size; 541 uint32_t node; 542 } VirtIOMEMMigSanityChecks; 543 544 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque) 545 { 546 VirtIOMEMMigSanityChecks *tmp = opaque; 547 VirtIOMEM *vmem = tmp->parent; 548 549 tmp->addr = vmem->addr; 550 tmp->region_size = memory_region_size(&vmem->memdev->mr); 551 tmp->block_size = vmem->block_size; 552 tmp->node = vmem->node; 553 return 0; 554 } 555 556 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id) 557 { 558 VirtIOMEMMigSanityChecks *tmp = opaque; 559 VirtIOMEM *vmem = tmp->parent; 560 const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr); 561 562 if (tmp->addr != vmem->addr) { 563 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 564 VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr); 565 return -EINVAL; 566 } 567 /* 568 * Note: Preparation for resizeable memory regions. The maximum size 569 * of the memory region must not change during migration. 570 */ 571 if (tmp->region_size != new_region_size) { 572 error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%" 573 PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size, 574 new_region_size); 575 return -EINVAL; 576 } 577 if (tmp->block_size != vmem->block_size) { 578 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 579 VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size, 580 vmem->block_size); 581 return -EINVAL; 582 } 583 if (tmp->node != vmem->node) { 584 error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32, 585 VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node); 586 return -EINVAL; 587 } 588 return 0; 589 } 590 591 static const VMStateDescription vmstate_virtio_mem_sanity_checks = { 592 .name = "virtio-mem-device/sanity-checks", 593 .pre_save = virtio_mem_mig_sanity_checks_pre_save, 594 .post_load = virtio_mem_mig_sanity_checks_post_load, 595 .fields = (VMStateField[]) { 596 VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks), 597 VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks), 598 VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks), 599 VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks), 600 VMSTATE_END_OF_LIST(), 601 }, 602 }; 603 604 static const VMStateDescription vmstate_virtio_mem_device = { 605 .name = "virtio-mem-device", 606 .minimum_version_id = 1, 607 .version_id = 1, 608 .post_load = virtio_mem_post_load, 609 .fields = (VMStateField[]) { 610 VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks, 611 vmstate_virtio_mem_sanity_checks), 612 VMSTATE_UINT64(usable_region_size, VirtIOMEM), 613 VMSTATE_UINT64(size, VirtIOMEM), 614 VMSTATE_UINT64(requested_size, VirtIOMEM), 615 VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size), 616 VMSTATE_END_OF_LIST() 617 }, 618 }; 619 620 static const VMStateDescription vmstate_virtio_mem = { 621 .name = "virtio-mem", 622 .minimum_version_id = 1, 623 .version_id = 1, 624 .fields = (VMStateField[]) { 625 VMSTATE_VIRTIO_DEVICE, 626 VMSTATE_END_OF_LIST() 627 }, 628 }; 629 630 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem, 631 VirtioMEMDeviceInfo *vi) 632 { 633 vi->memaddr = vmem->addr; 634 vi->node = vmem->node; 635 vi->requested_size = vmem->requested_size; 636 vi->size = vmem->size; 637 vi->max_size = memory_region_size(&vmem->memdev->mr); 638 vi->block_size = vmem->block_size; 639 vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev)); 640 } 641 642 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp) 643 { 644 if (!vmem->memdev) { 645 error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP); 646 return NULL; 647 } 648 649 return &vmem->memdev->mr; 650 } 651 652 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem, 653 Notifier *notifier) 654 { 655 notifier_list_add(&vmem->size_change_notifiers, notifier); 656 } 657 658 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem, 659 Notifier *notifier) 660 { 661 notifier_remove(notifier); 662 } 663 664 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name, 665 void *opaque, Error **errp) 666 { 667 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 668 uint64_t value = vmem->size; 669 670 visit_type_size(v, name, &value, errp); 671 } 672 673 static void virtio_mem_get_requested_size(Object *obj, Visitor *v, 674 const char *name, void *opaque, 675 Error **errp) 676 { 677 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 678 uint64_t value = vmem->requested_size; 679 680 visit_type_size(v, name, &value, errp); 681 } 682 683 static void virtio_mem_set_requested_size(Object *obj, Visitor *v, 684 const char *name, void *opaque, 685 Error **errp) 686 { 687 VirtIOMEM *vmem = VIRTIO_MEM(obj); 688 Error *err = NULL; 689 uint64_t value; 690 691 visit_type_size(v, name, &value, &err); 692 if (err) { 693 error_propagate(errp, err); 694 return; 695 } 696 697 /* 698 * The block size and memory backend are not fixed until the device was 699 * realized. realize() will verify these properties then. 700 */ 701 if (DEVICE(obj)->realized) { 702 if (!QEMU_IS_ALIGNED(value, vmem->block_size)) { 703 error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64 704 ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP, 705 vmem->block_size); 706 return; 707 } else if (value > memory_region_size(&vmem->memdev->mr)) { 708 error_setg(errp, "'%s' cannot exceed the memory backend size" 709 "(0x%" PRIx64 ")", name, 710 memory_region_size(&vmem->memdev->mr)); 711 return; 712 } 713 714 if (value != vmem->requested_size) { 715 virtio_mem_resize_usable_region(vmem, value, false); 716 vmem->requested_size = value; 717 } 718 /* 719 * Trigger a config update so the guest gets notified. We trigger 720 * even if the size didn't change (especially helpful for debugging). 721 */ 722 virtio_notify_config(VIRTIO_DEVICE(vmem)); 723 } else { 724 vmem->requested_size = value; 725 } 726 } 727 728 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name, 729 void *opaque, Error **errp) 730 { 731 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 732 uint64_t value = vmem->block_size; 733 734 visit_type_size(v, name, &value, errp); 735 } 736 737 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name, 738 void *opaque, Error **errp) 739 { 740 VirtIOMEM *vmem = VIRTIO_MEM(obj); 741 Error *err = NULL; 742 uint64_t value; 743 744 if (DEVICE(obj)->realized) { 745 error_setg(errp, "'%s' cannot be changed", name); 746 return; 747 } 748 749 visit_type_size(v, name, &value, &err); 750 if (err) { 751 error_propagate(errp, err); 752 return; 753 } 754 755 if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) { 756 error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name, 757 VIRTIO_MEM_MIN_BLOCK_SIZE); 758 return; 759 } else if (!is_power_of_2(value)) { 760 error_setg(errp, "'%s' property has to be a power of two", name); 761 return; 762 } 763 vmem->block_size = value; 764 } 765 766 static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem) 767 { 768 void * const host = qemu_ram_get_host_addr(vmem->memdev->mr.ram_block); 769 unsigned long first_zero_bit, last_zero_bit; 770 uint64_t offset, length; 771 772 /* 773 * Find consecutive unplugged blocks and exclude them from migration. 774 * 775 * Note: Blocks cannot get (un)plugged during precopy, no locking needed. 776 */ 777 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); 778 while (first_zero_bit < vmem->bitmap_size) { 779 offset = first_zero_bit * vmem->block_size; 780 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 781 first_zero_bit + 1) - 1; 782 length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; 783 784 qemu_guest_free_page_hint(host + offset, length); 785 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 786 last_zero_bit + 2); 787 } 788 } 789 790 static int virtio_mem_precopy_notify(NotifierWithReturn *n, void *data) 791 { 792 VirtIOMEM *vmem = container_of(n, VirtIOMEM, precopy_notifier); 793 PrecopyNotifyData *pnd = data; 794 795 switch (pnd->reason) { 796 case PRECOPY_NOTIFY_SETUP: 797 precopy_enable_free_page_optimization(); 798 break; 799 case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC: 800 virtio_mem_precopy_exclude_unplugged(vmem); 801 break; 802 default: 803 break; 804 } 805 806 return 0; 807 } 808 809 static void virtio_mem_instance_init(Object *obj) 810 { 811 VirtIOMEM *vmem = VIRTIO_MEM(obj); 812 813 vmem->block_size = VIRTIO_MEM_MIN_BLOCK_SIZE; 814 notifier_list_init(&vmem->size_change_notifiers); 815 vmem->precopy_notifier.notify = virtio_mem_precopy_notify; 816 817 object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size, 818 NULL, NULL, NULL); 819 object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size", 820 virtio_mem_get_requested_size, 821 virtio_mem_set_requested_size, NULL, NULL); 822 object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size", 823 virtio_mem_get_block_size, virtio_mem_set_block_size, 824 NULL, NULL); 825 } 826 827 static Property virtio_mem_properties[] = { 828 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0), 829 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0), 830 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev, 831 TYPE_MEMORY_BACKEND, HostMemoryBackend *), 832 DEFINE_PROP_END_OF_LIST(), 833 }; 834 835 static void virtio_mem_class_init(ObjectClass *klass, void *data) 836 { 837 DeviceClass *dc = DEVICE_CLASS(klass); 838 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 839 VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass); 840 841 device_class_set_props(dc, virtio_mem_properties); 842 dc->vmsd = &vmstate_virtio_mem; 843 844 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 845 vdc->realize = virtio_mem_device_realize; 846 vdc->unrealize = virtio_mem_device_unrealize; 847 vdc->get_config = virtio_mem_get_config; 848 vdc->get_features = virtio_mem_get_features; 849 vdc->vmsd = &vmstate_virtio_mem_device; 850 851 vmc->fill_device_info = virtio_mem_fill_device_info; 852 vmc->get_memory_region = virtio_mem_get_memory_region; 853 vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier; 854 vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; 855 } 856 857 static const TypeInfo virtio_mem_info = { 858 .name = TYPE_VIRTIO_MEM, 859 .parent = TYPE_VIRTIO_DEVICE, 860 .instance_size = sizeof(VirtIOMEM), 861 .instance_init = virtio_mem_instance_init, 862 .class_init = virtio_mem_class_init, 863 .class_size = sizeof(VirtIOMEMClass), 864 }; 865 866 static void virtio_register_types(void) 867 { 868 type_register_static(&virtio_mem_info); 869 } 870 871 type_init(virtio_register_types) 872