1 /* 2 * Virtio MEM device 3 * 4 * Copyright (C) 2020 Red Hat, Inc. 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qemu-common.h" 15 #include "qemu/iov.h" 16 #include "qemu/cutils.h" 17 #include "qemu/error-report.h" 18 #include "qemu/units.h" 19 #include "sysemu/numa.h" 20 #include "sysemu/sysemu.h" 21 #include "sysemu/reset.h" 22 #include "hw/virtio/virtio.h" 23 #include "hw/virtio/virtio-bus.h" 24 #include "hw/virtio/virtio-access.h" 25 #include "hw/virtio/virtio-mem.h" 26 #include "qapi/error.h" 27 #include "qapi/visitor.h" 28 #include "exec/ram_addr.h" 29 #include "migration/misc.h" 30 #include "hw/boards.h" 31 #include "hw/qdev-properties.h" 32 #include CONFIG_DEVICES 33 #include "trace.h" 34 35 /* 36 * Use QEMU_VMALLOC_ALIGN, so no THP will have to be split when unplugging 37 * memory (e.g., 2MB on x86_64). 38 */ 39 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)QEMU_VMALLOC_ALIGN) 40 /* 41 * Size the usable region bigger than the requested size if possible. Esp. 42 * Linux guests will only add (aligned) memory blocks in case they fully 43 * fit into the usable region, but plug+online only a subset of the pages. 44 * The memory block size corresponds mostly to the section size. 45 * 46 * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and 47 * a section size of 1GB on arm64 (as long as the start address is properly 48 * aligned, similar to ordinary DIMMs). 49 * 50 * We can change this at any time and maybe even make it configurable if 51 * necessary (as the section size can change). But it's more likely that the 52 * section size will rather get smaller and not bigger over time. 53 */ 54 #if defined(TARGET_X86_64) || defined(TARGET_I386) 55 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB)) 56 #else 57 #error VIRTIO_MEM_USABLE_EXTENT not defined 58 #endif 59 60 static bool virtio_mem_is_busy(void) 61 { 62 /* 63 * Postcopy cannot handle concurrent discards and we don't want to migrate 64 * pages on-demand with stale content when plugging new blocks. 65 * 66 * For precopy, we don't want unplugged blocks in our migration stream, and 67 * when plugging new blocks, the page content might differ between source 68 * and destination (observable by the guest when not initializing pages 69 * after plugging them) until we're running on the destination (as we didn't 70 * migrate these blocks when they were unplugged). 71 */ 72 return migration_in_incoming_postcopy() || !migration_is_idle(); 73 } 74 75 static bool virtio_mem_test_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, 76 uint64_t size, bool plugged) 77 { 78 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size; 79 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1; 80 unsigned long found_bit; 81 82 /* We fake a shorter bitmap to avoid searching too far. */ 83 if (plugged) { 84 found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit); 85 } else { 86 found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit); 87 } 88 return found_bit > last_bit; 89 } 90 91 static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa, 92 uint64_t size, bool plugged) 93 { 94 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size; 95 const unsigned long nbits = size / vmem->block_size; 96 97 if (plugged) { 98 bitmap_set(vmem->bitmap, bit, nbits); 99 } else { 100 bitmap_clear(vmem->bitmap, bit, nbits); 101 } 102 } 103 104 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem, 105 struct virtio_mem_resp *resp) 106 { 107 VirtIODevice *vdev = VIRTIO_DEVICE(vmem); 108 VirtQueue *vq = vmem->vq; 109 110 trace_virtio_mem_send_response(le16_to_cpu(resp->type)); 111 iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp)); 112 113 virtqueue_push(vq, elem, sizeof(*resp)); 114 virtio_notify(vdev, vq); 115 } 116 117 static void virtio_mem_send_response_simple(VirtIOMEM *vmem, 118 VirtQueueElement *elem, 119 uint16_t type) 120 { 121 struct virtio_mem_resp resp = { 122 .type = cpu_to_le16(type), 123 }; 124 125 virtio_mem_send_response(vmem, elem, &resp); 126 } 127 128 static bool virtio_mem_valid_range(VirtIOMEM *vmem, uint64_t gpa, uint64_t size) 129 { 130 if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) { 131 return false; 132 } 133 if (gpa + size < gpa || !size) { 134 return false; 135 } 136 if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) { 137 return false; 138 } 139 if (gpa + size > vmem->addr + vmem->usable_region_size) { 140 return false; 141 } 142 return true; 143 } 144 145 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, 146 uint64_t size, bool plug) 147 { 148 const uint64_t offset = start_gpa - vmem->addr; 149 int ret; 150 151 if (virtio_mem_is_busy()) { 152 return -EBUSY; 153 } 154 155 if (!plug) { 156 ret = ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size); 157 if (ret) { 158 error_report("Unexpected error discarding RAM: %s", 159 strerror(-ret)); 160 return -EBUSY; 161 } 162 } 163 virtio_mem_set_bitmap(vmem, start_gpa, size, plug); 164 return 0; 165 } 166 167 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa, 168 uint16_t nb_blocks, bool plug) 169 { 170 const uint64_t size = nb_blocks * vmem->block_size; 171 int ret; 172 173 if (!virtio_mem_valid_range(vmem, gpa, size)) { 174 return VIRTIO_MEM_RESP_ERROR; 175 } 176 177 if (plug && (vmem->size + size > vmem->requested_size)) { 178 return VIRTIO_MEM_RESP_NACK; 179 } 180 181 /* test if really all blocks are in the opposite state */ 182 if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) { 183 return VIRTIO_MEM_RESP_ERROR; 184 } 185 186 ret = virtio_mem_set_block_state(vmem, gpa, size, plug); 187 if (ret) { 188 return VIRTIO_MEM_RESP_BUSY; 189 } 190 if (plug) { 191 vmem->size += size; 192 } else { 193 vmem->size -= size; 194 } 195 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 196 return VIRTIO_MEM_RESP_ACK; 197 } 198 199 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 200 struct virtio_mem_req *req) 201 { 202 const uint64_t gpa = le64_to_cpu(req->u.plug.addr); 203 const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks); 204 uint16_t type; 205 206 trace_virtio_mem_plug_request(gpa, nb_blocks); 207 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true); 208 virtio_mem_send_response_simple(vmem, elem, type); 209 } 210 211 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem, 212 struct virtio_mem_req *req) 213 { 214 const uint64_t gpa = le64_to_cpu(req->u.unplug.addr); 215 const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks); 216 uint16_t type; 217 218 trace_virtio_mem_unplug_request(gpa, nb_blocks); 219 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false); 220 virtio_mem_send_response_simple(vmem, elem, type); 221 } 222 223 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem, 224 uint64_t requested_size, 225 bool can_shrink) 226 { 227 uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr), 228 requested_size + VIRTIO_MEM_USABLE_EXTENT); 229 230 if (!requested_size) { 231 newsize = 0; 232 } 233 234 if (newsize < vmem->usable_region_size && !can_shrink) { 235 return; 236 } 237 238 trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize); 239 vmem->usable_region_size = newsize; 240 } 241 242 static int virtio_mem_unplug_all(VirtIOMEM *vmem) 243 { 244 RAMBlock *rb = vmem->memdev->mr.ram_block; 245 int ret; 246 247 if (virtio_mem_is_busy()) { 248 return -EBUSY; 249 } 250 251 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); 252 if (ret) { 253 error_report("Unexpected error discarding RAM: %s", strerror(-ret)); 254 return -EBUSY; 255 } 256 bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); 257 if (vmem->size) { 258 vmem->size = 0; 259 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); 260 } 261 trace_virtio_mem_unplugged_all(); 262 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 263 return 0; 264 } 265 266 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem, 267 VirtQueueElement *elem) 268 { 269 trace_virtio_mem_unplug_all_request(); 270 if (virtio_mem_unplug_all(vmem)) { 271 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY); 272 } else { 273 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK); 274 } 275 } 276 277 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem, 278 struct virtio_mem_req *req) 279 { 280 const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks); 281 const uint64_t gpa = le64_to_cpu(req->u.state.addr); 282 const uint64_t size = nb_blocks * vmem->block_size; 283 struct virtio_mem_resp resp = { 284 .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK), 285 }; 286 287 trace_virtio_mem_state_request(gpa, nb_blocks); 288 if (!virtio_mem_valid_range(vmem, gpa, size)) { 289 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR); 290 return; 291 } 292 293 if (virtio_mem_test_bitmap(vmem, gpa, size, true)) { 294 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED); 295 } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) { 296 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED); 297 } else { 298 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED); 299 } 300 trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state)); 301 virtio_mem_send_response(vmem, elem, &resp); 302 } 303 304 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq) 305 { 306 const int len = sizeof(struct virtio_mem_req); 307 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 308 VirtQueueElement *elem; 309 struct virtio_mem_req req; 310 uint16_t type; 311 312 while (true) { 313 elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); 314 if (!elem) { 315 return; 316 } 317 318 if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) { 319 virtio_error(vdev, "virtio-mem protocol violation: invalid request" 320 " size: %d", len); 321 virtqueue_detach_element(vq, elem, 0); 322 g_free(elem); 323 return; 324 } 325 326 if (iov_size(elem->in_sg, elem->in_num) < 327 sizeof(struct virtio_mem_resp)) { 328 virtio_error(vdev, "virtio-mem protocol violation: not enough space" 329 " for response: %zu", 330 iov_size(elem->in_sg, elem->in_num)); 331 virtqueue_detach_element(vq, elem, 0); 332 g_free(elem); 333 return; 334 } 335 336 type = le16_to_cpu(req.type); 337 switch (type) { 338 case VIRTIO_MEM_REQ_PLUG: 339 virtio_mem_plug_request(vmem, elem, &req); 340 break; 341 case VIRTIO_MEM_REQ_UNPLUG: 342 virtio_mem_unplug_request(vmem, elem, &req); 343 break; 344 case VIRTIO_MEM_REQ_UNPLUG_ALL: 345 virtio_mem_unplug_all_request(vmem, elem); 346 break; 347 case VIRTIO_MEM_REQ_STATE: 348 virtio_mem_state_request(vmem, elem, &req); 349 break; 350 default: 351 virtio_error(vdev, "virtio-mem protocol violation: unknown request" 352 " type: %d", type); 353 virtqueue_detach_element(vq, elem, 0); 354 g_free(elem); 355 return; 356 } 357 358 g_free(elem); 359 } 360 } 361 362 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data) 363 { 364 VirtIOMEM *vmem = VIRTIO_MEM(vdev); 365 struct virtio_mem_config *config = (void *) config_data; 366 367 config->block_size = cpu_to_le64(vmem->block_size); 368 config->node_id = cpu_to_le16(vmem->node); 369 config->requested_size = cpu_to_le64(vmem->requested_size); 370 config->plugged_size = cpu_to_le64(vmem->size); 371 config->addr = cpu_to_le64(vmem->addr); 372 config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr)); 373 config->usable_region_size = cpu_to_le64(vmem->usable_region_size); 374 } 375 376 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features, 377 Error **errp) 378 { 379 MachineState *ms = MACHINE(qdev_get_machine()); 380 381 if (ms->numa_state) { 382 #if defined(CONFIG_ACPI) 383 virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM); 384 #endif 385 } 386 return features; 387 } 388 389 static void virtio_mem_system_reset(void *opaque) 390 { 391 VirtIOMEM *vmem = VIRTIO_MEM(opaque); 392 393 /* 394 * During usual resets, we will unplug all memory and shrink the usable 395 * region size. This is, however, not possible in all scenarios. Then, 396 * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL). 397 */ 398 virtio_mem_unplug_all(vmem); 399 } 400 401 static void virtio_mem_device_realize(DeviceState *dev, Error **errp) 402 { 403 MachineState *ms = MACHINE(qdev_get_machine()); 404 int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0; 405 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 406 VirtIOMEM *vmem = VIRTIO_MEM(dev); 407 uint64_t page_size; 408 RAMBlock *rb; 409 int ret; 410 411 if (!vmem->memdev) { 412 error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP); 413 return; 414 } else if (host_memory_backend_is_mapped(vmem->memdev)) { 415 error_setg(errp, "'%s' property specifies a busy memdev: %s", 416 VIRTIO_MEM_MEMDEV_PROP, 417 object_get_canonical_path_component(OBJECT(vmem->memdev))); 418 return; 419 } else if (!memory_region_is_ram(&vmem->memdev->mr) || 420 memory_region_is_rom(&vmem->memdev->mr) || 421 !vmem->memdev->mr.ram_block) { 422 error_setg(errp, "'%s' property specifies an unsupported memdev", 423 VIRTIO_MEM_MEMDEV_PROP); 424 return; 425 } 426 427 if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) || 428 (!nb_numa_nodes && vmem->node)) { 429 error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds" 430 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP, 431 vmem->node, nb_numa_nodes ? nb_numa_nodes : 1); 432 return; 433 } 434 435 if (enable_mlock) { 436 error_setg(errp, "Incompatible with mlock"); 437 return; 438 } 439 440 rb = vmem->memdev->mr.ram_block; 441 page_size = qemu_ram_pagesize(rb); 442 443 if (vmem->block_size < page_size) { 444 error_setg(errp, "'%s' property has to be at least the page size (0x%" 445 PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size); 446 return; 447 } else if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) { 448 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64 449 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP, 450 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 451 return; 452 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr), 453 vmem->block_size)) { 454 error_setg(errp, "'%s' property memdev size has to be multiples of" 455 "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP, 456 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size); 457 return; 458 } 459 460 if (ram_block_discard_require(true)) { 461 error_setg(errp, "Discarding RAM is disabled"); 462 return; 463 } 464 465 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); 466 if (ret) { 467 error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); 468 ram_block_discard_require(false); 469 return; 470 } 471 472 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true); 473 474 vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) / 475 vmem->block_size; 476 vmem->bitmap = bitmap_new(vmem->bitmap_size); 477 478 virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM, 479 sizeof(struct virtio_mem_config)); 480 vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request); 481 482 host_memory_backend_set_mapped(vmem->memdev, true); 483 vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem)); 484 qemu_register_reset(virtio_mem_system_reset, vmem); 485 precopy_add_notifier(&vmem->precopy_notifier); 486 } 487 488 static void virtio_mem_device_unrealize(DeviceState *dev) 489 { 490 VirtIODevice *vdev = VIRTIO_DEVICE(dev); 491 VirtIOMEM *vmem = VIRTIO_MEM(dev); 492 493 precopy_remove_notifier(&vmem->precopy_notifier); 494 qemu_unregister_reset(virtio_mem_system_reset, vmem); 495 vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem)); 496 host_memory_backend_set_mapped(vmem->memdev, false); 497 virtio_del_queue(vdev, 0); 498 virtio_cleanup(vdev); 499 g_free(vmem->bitmap); 500 ram_block_discard_require(false); 501 } 502 503 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem) 504 { 505 RAMBlock *rb = vmem->memdev->mr.ram_block; 506 unsigned long first_zero_bit, last_zero_bit; 507 uint64_t offset, length; 508 int ret; 509 510 /* Find consecutive unplugged blocks and discard the consecutive range. */ 511 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); 512 while (first_zero_bit < vmem->bitmap_size) { 513 offset = first_zero_bit * vmem->block_size; 514 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 515 first_zero_bit + 1) - 1; 516 length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; 517 518 ret = ram_block_discard_range(rb, offset, length); 519 if (ret) { 520 error_report("Unexpected error discarding RAM: %s", 521 strerror(-ret)); 522 return -EINVAL; 523 } 524 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 525 last_zero_bit + 2); 526 } 527 return 0; 528 } 529 530 static int virtio_mem_post_load(void *opaque, int version_id) 531 { 532 if (migration_in_incoming_postcopy()) { 533 return 0; 534 } 535 536 return virtio_mem_restore_unplugged(VIRTIO_MEM(opaque)); 537 } 538 539 typedef struct VirtIOMEMMigSanityChecks { 540 VirtIOMEM *parent; 541 uint64_t addr; 542 uint64_t region_size; 543 uint64_t block_size; 544 uint32_t node; 545 } VirtIOMEMMigSanityChecks; 546 547 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque) 548 { 549 VirtIOMEMMigSanityChecks *tmp = opaque; 550 VirtIOMEM *vmem = tmp->parent; 551 552 tmp->addr = vmem->addr; 553 tmp->region_size = memory_region_size(&vmem->memdev->mr); 554 tmp->block_size = vmem->block_size; 555 tmp->node = vmem->node; 556 return 0; 557 } 558 559 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id) 560 { 561 VirtIOMEMMigSanityChecks *tmp = opaque; 562 VirtIOMEM *vmem = tmp->parent; 563 const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr); 564 565 if (tmp->addr != vmem->addr) { 566 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 567 VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr); 568 return -EINVAL; 569 } 570 /* 571 * Note: Preparation for resizeable memory regions. The maximum size 572 * of the memory region must not change during migration. 573 */ 574 if (tmp->region_size != new_region_size) { 575 error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%" 576 PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size, 577 new_region_size); 578 return -EINVAL; 579 } 580 if (tmp->block_size != vmem->block_size) { 581 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64, 582 VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size, 583 vmem->block_size); 584 return -EINVAL; 585 } 586 if (tmp->node != vmem->node) { 587 error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32, 588 VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node); 589 return -EINVAL; 590 } 591 return 0; 592 } 593 594 static const VMStateDescription vmstate_virtio_mem_sanity_checks = { 595 .name = "virtio-mem-device/sanity-checks", 596 .pre_save = virtio_mem_mig_sanity_checks_pre_save, 597 .post_load = virtio_mem_mig_sanity_checks_post_load, 598 .fields = (VMStateField[]) { 599 VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks), 600 VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks), 601 VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks), 602 VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks), 603 VMSTATE_END_OF_LIST(), 604 }, 605 }; 606 607 static const VMStateDescription vmstate_virtio_mem_device = { 608 .name = "virtio-mem-device", 609 .minimum_version_id = 1, 610 .version_id = 1, 611 .post_load = virtio_mem_post_load, 612 .fields = (VMStateField[]) { 613 VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks, 614 vmstate_virtio_mem_sanity_checks), 615 VMSTATE_UINT64(usable_region_size, VirtIOMEM), 616 VMSTATE_UINT64(size, VirtIOMEM), 617 VMSTATE_UINT64(requested_size, VirtIOMEM), 618 VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size), 619 VMSTATE_END_OF_LIST() 620 }, 621 }; 622 623 static const VMStateDescription vmstate_virtio_mem = { 624 .name = "virtio-mem", 625 .minimum_version_id = 1, 626 .version_id = 1, 627 .fields = (VMStateField[]) { 628 VMSTATE_VIRTIO_DEVICE, 629 VMSTATE_END_OF_LIST() 630 }, 631 }; 632 633 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem, 634 VirtioMEMDeviceInfo *vi) 635 { 636 vi->memaddr = vmem->addr; 637 vi->node = vmem->node; 638 vi->requested_size = vmem->requested_size; 639 vi->size = vmem->size; 640 vi->max_size = memory_region_size(&vmem->memdev->mr); 641 vi->block_size = vmem->block_size; 642 vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev)); 643 } 644 645 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp) 646 { 647 if (!vmem->memdev) { 648 error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP); 649 return NULL; 650 } 651 652 return &vmem->memdev->mr; 653 } 654 655 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem, 656 Notifier *notifier) 657 { 658 notifier_list_add(&vmem->size_change_notifiers, notifier); 659 } 660 661 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem, 662 Notifier *notifier) 663 { 664 notifier_remove(notifier); 665 } 666 667 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name, 668 void *opaque, Error **errp) 669 { 670 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 671 uint64_t value = vmem->size; 672 673 visit_type_size(v, name, &value, errp); 674 } 675 676 static void virtio_mem_get_requested_size(Object *obj, Visitor *v, 677 const char *name, void *opaque, 678 Error **errp) 679 { 680 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 681 uint64_t value = vmem->requested_size; 682 683 visit_type_size(v, name, &value, errp); 684 } 685 686 static void virtio_mem_set_requested_size(Object *obj, Visitor *v, 687 const char *name, void *opaque, 688 Error **errp) 689 { 690 VirtIOMEM *vmem = VIRTIO_MEM(obj); 691 Error *err = NULL; 692 uint64_t value; 693 694 visit_type_size(v, name, &value, &err); 695 if (err) { 696 error_propagate(errp, err); 697 return; 698 } 699 700 /* 701 * The block size and memory backend are not fixed until the device was 702 * realized. realize() will verify these properties then. 703 */ 704 if (DEVICE(obj)->realized) { 705 if (!QEMU_IS_ALIGNED(value, vmem->block_size)) { 706 error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64 707 ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP, 708 vmem->block_size); 709 return; 710 } else if (value > memory_region_size(&vmem->memdev->mr)) { 711 error_setg(errp, "'%s' cannot exceed the memory backend size" 712 "(0x%" PRIx64 ")", name, 713 memory_region_size(&vmem->memdev->mr)); 714 return; 715 } 716 717 if (value != vmem->requested_size) { 718 virtio_mem_resize_usable_region(vmem, value, false); 719 vmem->requested_size = value; 720 } 721 /* 722 * Trigger a config update so the guest gets notified. We trigger 723 * even if the size didn't change (especially helpful for debugging). 724 */ 725 virtio_notify_config(VIRTIO_DEVICE(vmem)); 726 } else { 727 vmem->requested_size = value; 728 } 729 } 730 731 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name, 732 void *opaque, Error **errp) 733 { 734 const VirtIOMEM *vmem = VIRTIO_MEM(obj); 735 uint64_t value = vmem->block_size; 736 737 visit_type_size(v, name, &value, errp); 738 } 739 740 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name, 741 void *opaque, Error **errp) 742 { 743 VirtIOMEM *vmem = VIRTIO_MEM(obj); 744 Error *err = NULL; 745 uint64_t value; 746 747 if (DEVICE(obj)->realized) { 748 error_setg(errp, "'%s' cannot be changed", name); 749 return; 750 } 751 752 visit_type_size(v, name, &value, &err); 753 if (err) { 754 error_propagate(errp, err); 755 return; 756 } 757 758 if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) { 759 error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name, 760 VIRTIO_MEM_MIN_BLOCK_SIZE); 761 return; 762 } else if (!is_power_of_2(value)) { 763 error_setg(errp, "'%s' property has to be a power of two", name); 764 return; 765 } 766 vmem->block_size = value; 767 } 768 769 static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem) 770 { 771 void * const host = qemu_ram_get_host_addr(vmem->memdev->mr.ram_block); 772 unsigned long first_zero_bit, last_zero_bit; 773 uint64_t offset, length; 774 775 /* 776 * Find consecutive unplugged blocks and exclude them from migration. 777 * 778 * Note: Blocks cannot get (un)plugged during precopy, no locking needed. 779 */ 780 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size); 781 while (first_zero_bit < vmem->bitmap_size) { 782 offset = first_zero_bit * vmem->block_size; 783 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, 784 first_zero_bit + 1) - 1; 785 length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size; 786 787 qemu_guest_free_page_hint(host + offset, length); 788 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, 789 last_zero_bit + 2); 790 } 791 } 792 793 static int virtio_mem_precopy_notify(NotifierWithReturn *n, void *data) 794 { 795 VirtIOMEM *vmem = container_of(n, VirtIOMEM, precopy_notifier); 796 PrecopyNotifyData *pnd = data; 797 798 switch (pnd->reason) { 799 case PRECOPY_NOTIFY_SETUP: 800 precopy_enable_free_page_optimization(); 801 break; 802 case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC: 803 virtio_mem_precopy_exclude_unplugged(vmem); 804 break; 805 default: 806 break; 807 } 808 809 return 0; 810 } 811 812 static void virtio_mem_instance_init(Object *obj) 813 { 814 VirtIOMEM *vmem = VIRTIO_MEM(obj); 815 816 vmem->block_size = VIRTIO_MEM_MIN_BLOCK_SIZE; 817 notifier_list_init(&vmem->size_change_notifiers); 818 vmem->precopy_notifier.notify = virtio_mem_precopy_notify; 819 820 object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size, 821 NULL, NULL, NULL); 822 object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size", 823 virtio_mem_get_requested_size, 824 virtio_mem_set_requested_size, NULL, NULL); 825 object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size", 826 virtio_mem_get_block_size, virtio_mem_set_block_size, 827 NULL, NULL); 828 } 829 830 static Property virtio_mem_properties[] = { 831 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0), 832 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0), 833 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev, 834 TYPE_MEMORY_BACKEND, HostMemoryBackend *), 835 DEFINE_PROP_END_OF_LIST(), 836 }; 837 838 static void virtio_mem_class_init(ObjectClass *klass, void *data) 839 { 840 DeviceClass *dc = DEVICE_CLASS(klass); 841 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); 842 VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass); 843 844 device_class_set_props(dc, virtio_mem_properties); 845 dc->vmsd = &vmstate_virtio_mem; 846 847 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 848 vdc->realize = virtio_mem_device_realize; 849 vdc->unrealize = virtio_mem_device_unrealize; 850 vdc->get_config = virtio_mem_get_config; 851 vdc->get_features = virtio_mem_get_features; 852 vdc->vmsd = &vmstate_virtio_mem_device; 853 854 vmc->fill_device_info = virtio_mem_fill_device_info; 855 vmc->get_memory_region = virtio_mem_get_memory_region; 856 vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier; 857 vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; 858 } 859 860 static const TypeInfo virtio_mem_info = { 861 .name = TYPE_VIRTIO_MEM, 862 .parent = TYPE_VIRTIO_DEVICE, 863 .instance_size = sizeof(VirtIOMEM), 864 .instance_init = virtio_mem_instance_init, 865 .class_init = virtio_mem_class_init, 866 .class_size = sizeof(VirtIOMEMClass), 867 }; 868 869 static void virtio_register_types(void) 870 { 871 type_register_static(&virtio_mem_info); 872 } 873 874 type_init(virtio_register_types) 875