1 /* 2 * Migration support for VFIO devices 3 * 4 * Copyright NVIDIA, Inc. 2020 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/main-loop.h" 12 #include "qemu/cutils.h" 13 #include <linux/vfio.h> 14 #include <sys/ioctl.h> 15 16 #include "sysemu/runstate.h" 17 #include "hw/vfio/vfio-common.h" 18 #include "cpu.h" 19 #include "migration/migration.h" 20 #include "migration/vmstate.h" 21 #include "migration/qemu-file.h" 22 #include "migration/register.h" 23 #include "migration/blocker.h" 24 #include "migration/misc.h" 25 #include "qapi/error.h" 26 #include "exec/ramlist.h" 27 #include "exec/ram_addr.h" 28 #include "pci.h" 29 #include "trace.h" 30 #include "hw/hw.h" 31 32 /* 33 * Flags to be used as unique delimiters for VFIO devices in the migration 34 * stream. These flags are composed as: 35 * 0xffffffff => MSB 32-bit all 1s 36 * 0xef10 => Magic ID, represents emulated (virtual) function IO 37 * 0x0000 => 16-bits reserved for flags 38 * 39 * The beginning of state information is marked by _DEV_CONFIG_STATE, 40 * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a 41 * certain state information is marked by _END_OF_STATE. 42 */ 43 #define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) 44 #define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) 45 #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) 46 #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) 47 48 static int64_t bytes_transferred; 49 50 static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, 51 off_t off, bool iswrite) 52 { 53 int ret; 54 55 ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : 56 pread(vbasedev->fd, val, count, off); 57 if (ret < count) { 58 error_report("vfio_mig_%s %d byte %s: failed at offset 0x%" 59 HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count, 60 vbasedev->name, off, strerror(errno)); 61 return (ret < 0) ? ret : -EINVAL; 62 } 63 return 0; 64 } 65 66 static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, 67 off_t off, bool iswrite) 68 { 69 int ret, done = 0; 70 __u8 *tbuf = buf; 71 72 while (count) { 73 int bytes = 0; 74 75 if (count >= 8 && !(off % 8)) { 76 bytes = 8; 77 } else if (count >= 4 && !(off % 4)) { 78 bytes = 4; 79 } else if (count >= 2 && !(off % 2)) { 80 bytes = 2; 81 } else { 82 bytes = 1; 83 } 84 85 ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); 86 if (ret) { 87 return ret; 88 } 89 90 count -= bytes; 91 done += bytes; 92 off += bytes; 93 tbuf += bytes; 94 } 95 return done; 96 } 97 98 #define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) 99 #define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) 100 101 #define VFIO_MIG_STRUCT_OFFSET(f) \ 102 offsetof(struct vfio_device_migration_info, f) 103 /* 104 * Change the device_state register for device @vbasedev. Bits set in @mask 105 * are preserved, bits set in @value are set, and bits not set in either @mask 106 * or @value are cleared in device_state. If the register cannot be accessed, 107 * the resulting state would be invalid, or the device enters an error state, 108 * an error is returned. 109 */ 110 111 static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, 112 uint32_t value) 113 { 114 VFIOMigration *migration = vbasedev->migration; 115 VFIORegion *region = &migration->region; 116 off_t dev_state_off = region->fd_offset + 117 VFIO_MIG_STRUCT_OFFSET(device_state); 118 uint32_t device_state; 119 int ret; 120 121 ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), 122 dev_state_off); 123 if (ret < 0) { 124 return ret; 125 } 126 127 device_state = (device_state & mask) | value; 128 129 if (!VFIO_DEVICE_STATE_VALID(device_state)) { 130 return -EINVAL; 131 } 132 133 ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), 134 dev_state_off); 135 if (ret < 0) { 136 int rret; 137 138 rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), 139 dev_state_off); 140 141 if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) { 142 hw_error("%s: Device in error state 0x%x", vbasedev->name, 143 device_state); 144 return rret ? rret : -EIO; 145 } 146 return ret; 147 } 148 149 migration->device_state = device_state; 150 trace_vfio_migration_set_state(vbasedev->name, device_state); 151 return 0; 152 } 153 154 static void *get_data_section_size(VFIORegion *region, uint64_t data_offset, 155 uint64_t data_size, uint64_t *size) 156 { 157 void *ptr = NULL; 158 uint64_t limit = 0; 159 int i; 160 161 if (!region->mmaps) { 162 if (size) { 163 *size = MIN(data_size, region->size - data_offset); 164 } 165 return ptr; 166 } 167 168 for (i = 0; i < region->nr_mmaps; i++) { 169 VFIOMmap *map = region->mmaps + i; 170 171 if ((data_offset >= map->offset) && 172 (data_offset < map->offset + map->size)) { 173 174 /* check if data_offset is within sparse mmap areas */ 175 ptr = map->mmap + data_offset - map->offset; 176 if (size) { 177 *size = MIN(data_size, map->offset + map->size - data_offset); 178 } 179 break; 180 } else if ((data_offset < map->offset) && 181 (!limit || limit > map->offset)) { 182 /* 183 * data_offset is not within sparse mmap areas, find size of 184 * non-mapped area. Check through all list since region->mmaps list 185 * is not sorted. 186 */ 187 limit = map->offset; 188 } 189 } 190 191 if (!ptr && size) { 192 *size = limit ? MIN(data_size, limit - data_offset) : data_size; 193 } 194 return ptr; 195 } 196 197 static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) 198 { 199 VFIOMigration *migration = vbasedev->migration; 200 VFIORegion *region = &migration->region; 201 uint64_t data_offset = 0, data_size = 0, sz; 202 int ret; 203 204 ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), 205 region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); 206 if (ret < 0) { 207 return ret; 208 } 209 210 ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size), 211 region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); 212 if (ret < 0) { 213 return ret; 214 } 215 216 trace_vfio_save_buffer(vbasedev->name, data_offset, data_size, 217 migration->pending_bytes); 218 219 qemu_put_be64(f, data_size); 220 sz = data_size; 221 222 while (sz) { 223 void *buf; 224 uint64_t sec_size; 225 bool buf_allocated = false; 226 227 buf = get_data_section_size(region, data_offset, sz, &sec_size); 228 229 if (!buf) { 230 buf = g_try_malloc(sec_size); 231 if (!buf) { 232 error_report("%s: Error allocating buffer ", __func__); 233 return -ENOMEM; 234 } 235 buf_allocated = true; 236 237 ret = vfio_mig_read(vbasedev, buf, sec_size, 238 region->fd_offset + data_offset); 239 if (ret < 0) { 240 g_free(buf); 241 return ret; 242 } 243 } 244 245 qemu_put_buffer(f, buf, sec_size); 246 247 if (buf_allocated) { 248 g_free(buf); 249 } 250 sz -= sec_size; 251 data_offset += sec_size; 252 } 253 254 ret = qemu_file_get_error(f); 255 256 if (!ret && size) { 257 *size = data_size; 258 } 259 260 bytes_transferred += data_size; 261 return ret; 262 } 263 264 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, 265 uint64_t data_size) 266 { 267 VFIORegion *region = &vbasedev->migration->region; 268 uint64_t data_offset = 0, size, report_size; 269 int ret; 270 271 do { 272 ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), 273 region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); 274 if (ret < 0) { 275 return ret; 276 } 277 278 if (data_offset + data_size > region->size) { 279 /* 280 * If data_size is greater than the data section of migration region 281 * then iterate the write buffer operation. This case can occur if 282 * size of migration region at destination is smaller than size of 283 * migration region at source. 284 */ 285 report_size = size = region->size - data_offset; 286 data_size -= size; 287 } else { 288 report_size = size = data_size; 289 data_size = 0; 290 } 291 292 trace_vfio_load_state_device_data(vbasedev->name, data_offset, size); 293 294 while (size) { 295 void *buf; 296 uint64_t sec_size; 297 bool buf_alloc = false; 298 299 buf = get_data_section_size(region, data_offset, size, &sec_size); 300 301 if (!buf) { 302 buf = g_try_malloc(sec_size); 303 if (!buf) { 304 error_report("%s: Error allocating buffer ", __func__); 305 return -ENOMEM; 306 } 307 buf_alloc = true; 308 } 309 310 qemu_get_buffer(f, buf, sec_size); 311 312 if (buf_alloc) { 313 ret = vfio_mig_write(vbasedev, buf, sec_size, 314 region->fd_offset + data_offset); 315 g_free(buf); 316 317 if (ret < 0) { 318 return ret; 319 } 320 } 321 size -= sec_size; 322 data_offset += sec_size; 323 } 324 325 ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), 326 region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); 327 if (ret < 0) { 328 return ret; 329 } 330 } while (data_size); 331 332 return 0; 333 } 334 335 static int vfio_update_pending(VFIODevice *vbasedev) 336 { 337 VFIOMigration *migration = vbasedev->migration; 338 VFIORegion *region = &migration->region; 339 uint64_t pending_bytes = 0; 340 int ret; 341 342 ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes), 343 region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); 344 if (ret < 0) { 345 migration->pending_bytes = 0; 346 return ret; 347 } 348 349 migration->pending_bytes = pending_bytes; 350 trace_vfio_update_pending(vbasedev->name, pending_bytes); 351 return 0; 352 } 353 354 static int vfio_save_device_config_state(QEMUFile *f, void *opaque) 355 { 356 VFIODevice *vbasedev = opaque; 357 358 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE); 359 360 if (vbasedev->ops && vbasedev->ops->vfio_save_config) { 361 vbasedev->ops->vfio_save_config(vbasedev, f); 362 } 363 364 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); 365 366 trace_vfio_save_device_config_state(vbasedev->name); 367 368 return qemu_file_get_error(f); 369 } 370 371 static int vfio_load_device_config_state(QEMUFile *f, void *opaque) 372 { 373 VFIODevice *vbasedev = opaque; 374 uint64_t data; 375 376 if (vbasedev->ops && vbasedev->ops->vfio_load_config) { 377 int ret; 378 379 ret = vbasedev->ops->vfio_load_config(vbasedev, f); 380 if (ret) { 381 error_report("%s: Failed to load device config space", 382 vbasedev->name); 383 return ret; 384 } 385 } 386 387 data = qemu_get_be64(f); 388 if (data != VFIO_MIG_FLAG_END_OF_STATE) { 389 error_report("%s: Failed loading device config space, " 390 "end flag incorrect 0x%"PRIx64, vbasedev->name, data); 391 return -EINVAL; 392 } 393 394 trace_vfio_load_device_config_state(vbasedev->name); 395 return qemu_file_get_error(f); 396 } 397 398 static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start) 399 { 400 int ret; 401 VFIOMigration *migration = vbasedev->migration; 402 VFIOContainer *container = vbasedev->group->container; 403 struct vfio_iommu_type1_dirty_bitmap dirty = { 404 .argsz = sizeof(dirty), 405 }; 406 407 if (start) { 408 if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { 409 dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; 410 } else { 411 return -EINVAL; 412 } 413 } else { 414 dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; 415 } 416 417 ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); 418 if (ret) { 419 error_report("Failed to set dirty tracking flag 0x%x errno: %d", 420 dirty.flags, errno); 421 return -errno; 422 } 423 return ret; 424 } 425 426 static void vfio_migration_cleanup(VFIODevice *vbasedev) 427 { 428 VFIOMigration *migration = vbasedev->migration; 429 430 vfio_set_dirty_page_tracking(vbasedev, false); 431 432 if (migration->region.mmaps) { 433 vfio_region_unmap(&migration->region); 434 } 435 } 436 437 /* ---------------------------------------------------------------------- */ 438 439 static int vfio_save_setup(QEMUFile *f, void *opaque) 440 { 441 VFIODevice *vbasedev = opaque; 442 VFIOMigration *migration = vbasedev->migration; 443 int ret; 444 445 trace_vfio_save_setup(vbasedev->name); 446 447 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); 448 449 if (migration->region.mmaps) { 450 /* 451 * Calling vfio_region_mmap() from migration thread. Memory API called 452 * from this function require locking the iothread when called from 453 * outside the main loop thread. 454 */ 455 qemu_mutex_lock_iothread(); 456 ret = vfio_region_mmap(&migration->region); 457 qemu_mutex_unlock_iothread(); 458 if (ret) { 459 error_report("%s: Failed to mmap VFIO migration region: %s", 460 vbasedev->name, strerror(-ret)); 461 error_report("%s: Falling back to slow path", vbasedev->name); 462 } 463 } 464 465 ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, 466 VFIO_DEVICE_STATE_SAVING); 467 if (ret) { 468 error_report("%s: Failed to set state SAVING", vbasedev->name); 469 return ret; 470 } 471 472 ret = vfio_set_dirty_page_tracking(vbasedev, true); 473 if (ret) { 474 return ret; 475 } 476 477 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); 478 479 ret = qemu_file_get_error(f); 480 if (ret) { 481 return ret; 482 } 483 484 return 0; 485 } 486 487 static void vfio_save_cleanup(void *opaque) 488 { 489 VFIODevice *vbasedev = opaque; 490 491 vfio_migration_cleanup(vbasedev); 492 trace_vfio_save_cleanup(vbasedev->name); 493 } 494 495 static void vfio_save_pending(QEMUFile *f, void *opaque, 496 uint64_t threshold_size, 497 uint64_t *res_precopy_only, 498 uint64_t *res_compatible, 499 uint64_t *res_postcopy_only) 500 { 501 VFIODevice *vbasedev = opaque; 502 VFIOMigration *migration = vbasedev->migration; 503 int ret; 504 505 ret = vfio_update_pending(vbasedev); 506 if (ret) { 507 return; 508 } 509 510 *res_precopy_only += migration->pending_bytes; 511 512 trace_vfio_save_pending(vbasedev->name, *res_precopy_only, 513 *res_postcopy_only, *res_compatible); 514 } 515 516 static int vfio_save_iterate(QEMUFile *f, void *opaque) 517 { 518 VFIODevice *vbasedev = opaque; 519 VFIOMigration *migration = vbasedev->migration; 520 uint64_t data_size; 521 int ret; 522 523 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); 524 525 if (migration->pending_bytes == 0) { 526 ret = vfio_update_pending(vbasedev); 527 if (ret) { 528 return ret; 529 } 530 531 if (migration->pending_bytes == 0) { 532 qemu_put_be64(f, 0); 533 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); 534 /* indicates data finished, goto complete phase */ 535 return 1; 536 } 537 } 538 539 ret = vfio_save_buffer(f, vbasedev, &data_size); 540 if (ret) { 541 error_report("%s: vfio_save_buffer failed %s", vbasedev->name, 542 strerror(errno)); 543 return ret; 544 } 545 546 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); 547 548 ret = qemu_file_get_error(f); 549 if (ret) { 550 return ret; 551 } 552 553 /* 554 * Reset pending_bytes as .save_live_pending is not called during savevm or 555 * snapshot case, in such case vfio_update_pending() at the start of this 556 * function updates pending_bytes. 557 */ 558 migration->pending_bytes = 0; 559 trace_vfio_save_iterate(vbasedev->name, data_size); 560 return 0; 561 } 562 563 static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) 564 { 565 VFIODevice *vbasedev = opaque; 566 VFIOMigration *migration = vbasedev->migration; 567 uint64_t data_size; 568 int ret; 569 570 ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING, 571 VFIO_DEVICE_STATE_SAVING); 572 if (ret) { 573 error_report("%s: Failed to set state STOP and SAVING", 574 vbasedev->name); 575 return ret; 576 } 577 578 ret = vfio_save_device_config_state(f, opaque); 579 if (ret) { 580 return ret; 581 } 582 583 ret = vfio_update_pending(vbasedev); 584 if (ret) { 585 return ret; 586 } 587 588 while (migration->pending_bytes > 0) { 589 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); 590 ret = vfio_save_buffer(f, vbasedev, &data_size); 591 if (ret < 0) { 592 error_report("%s: Failed to save buffer", vbasedev->name); 593 return ret; 594 } 595 596 if (data_size == 0) { 597 break; 598 } 599 600 ret = vfio_update_pending(vbasedev); 601 if (ret) { 602 return ret; 603 } 604 } 605 606 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); 607 608 ret = qemu_file_get_error(f); 609 if (ret) { 610 return ret; 611 } 612 613 ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0); 614 if (ret) { 615 error_report("%s: Failed to set state STOPPED", vbasedev->name); 616 return ret; 617 } 618 619 trace_vfio_save_complete_precopy(vbasedev->name); 620 return ret; 621 } 622 623 static int vfio_load_setup(QEMUFile *f, void *opaque) 624 { 625 VFIODevice *vbasedev = opaque; 626 VFIOMigration *migration = vbasedev->migration; 627 int ret = 0; 628 629 if (migration->region.mmaps) { 630 ret = vfio_region_mmap(&migration->region); 631 if (ret) { 632 error_report("%s: Failed to mmap VFIO migration region %d: %s", 633 vbasedev->name, migration->region.nr, 634 strerror(-ret)); 635 error_report("%s: Falling back to slow path", vbasedev->name); 636 } 637 } 638 639 ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, 640 VFIO_DEVICE_STATE_RESUMING); 641 if (ret) { 642 error_report("%s: Failed to set state RESUMING", vbasedev->name); 643 if (migration->region.mmaps) { 644 vfio_region_unmap(&migration->region); 645 } 646 } 647 return ret; 648 } 649 650 static int vfio_load_cleanup(void *opaque) 651 { 652 VFIODevice *vbasedev = opaque; 653 654 vfio_migration_cleanup(vbasedev); 655 trace_vfio_load_cleanup(vbasedev->name); 656 return 0; 657 } 658 659 static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) 660 { 661 VFIODevice *vbasedev = opaque; 662 int ret = 0; 663 uint64_t data; 664 665 data = qemu_get_be64(f); 666 while (data != VFIO_MIG_FLAG_END_OF_STATE) { 667 668 trace_vfio_load_state(vbasedev->name, data); 669 670 switch (data) { 671 case VFIO_MIG_FLAG_DEV_CONFIG_STATE: 672 { 673 ret = vfio_load_device_config_state(f, opaque); 674 if (ret) { 675 return ret; 676 } 677 break; 678 } 679 case VFIO_MIG_FLAG_DEV_SETUP_STATE: 680 { 681 data = qemu_get_be64(f); 682 if (data == VFIO_MIG_FLAG_END_OF_STATE) { 683 return ret; 684 } else { 685 error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64, 686 vbasedev->name, data); 687 return -EINVAL; 688 } 689 break; 690 } 691 case VFIO_MIG_FLAG_DEV_DATA_STATE: 692 { 693 uint64_t data_size = qemu_get_be64(f); 694 695 if (data_size) { 696 ret = vfio_load_buffer(f, vbasedev, data_size); 697 if (ret < 0) { 698 return ret; 699 } 700 } 701 break; 702 } 703 default: 704 error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); 705 return -EINVAL; 706 } 707 708 data = qemu_get_be64(f); 709 ret = qemu_file_get_error(f); 710 if (ret) { 711 return ret; 712 } 713 } 714 return ret; 715 } 716 717 static SaveVMHandlers savevm_vfio_handlers = { 718 .save_setup = vfio_save_setup, 719 .save_cleanup = vfio_save_cleanup, 720 .save_live_pending = vfio_save_pending, 721 .save_live_iterate = vfio_save_iterate, 722 .save_live_complete_precopy = vfio_save_complete_precopy, 723 .load_setup = vfio_load_setup, 724 .load_cleanup = vfio_load_cleanup, 725 .load_state = vfio_load_state, 726 }; 727 728 /* ---------------------------------------------------------------------- */ 729 730 static void vfio_vmstate_change(void *opaque, bool running, RunState state) 731 { 732 VFIODevice *vbasedev = opaque; 733 VFIOMigration *migration = vbasedev->migration; 734 uint32_t value, mask; 735 int ret; 736 737 if (vbasedev->migration->vm_running == running) { 738 return; 739 } 740 741 if (running) { 742 /* 743 * Here device state can have one of _SAVING, _RESUMING or _STOP bit. 744 * Transition from _SAVING to _RUNNING can happen if there is migration 745 * failure, in that case clear _SAVING bit. 746 * Transition from _RESUMING to _RUNNING occurs during resuming 747 * phase, in that case clear _RESUMING bit. 748 * In both the above cases, set _RUNNING bit. 749 */ 750 mask = ~VFIO_DEVICE_STATE_MASK; 751 value = VFIO_DEVICE_STATE_RUNNING; 752 } else { 753 /* 754 * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset 755 * _RUNNING bit 756 */ 757 mask = ~VFIO_DEVICE_STATE_RUNNING; 758 value = 0; 759 } 760 761 ret = vfio_migration_set_state(vbasedev, mask, value); 762 if (ret) { 763 /* 764 * Migration should be aborted in this case, but vm_state_notify() 765 * currently does not support reporting failures. 766 */ 767 error_report("%s: Failed to set device state 0x%x", vbasedev->name, 768 (migration->device_state & mask) | value); 769 qemu_file_set_error(migrate_get_current()->to_dst_file, ret); 770 } 771 vbasedev->migration->vm_running = running; 772 trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), 773 (migration->device_state & mask) | value); 774 } 775 776 static void vfio_migration_state_notifier(Notifier *notifier, void *data) 777 { 778 MigrationState *s = data; 779 VFIOMigration *migration = container_of(notifier, VFIOMigration, 780 migration_state); 781 VFIODevice *vbasedev = migration->vbasedev; 782 int ret; 783 784 trace_vfio_migration_state_notifier(vbasedev->name, 785 MigrationStatus_str(s->state)); 786 787 switch (s->state) { 788 case MIGRATION_STATUS_CANCELLING: 789 case MIGRATION_STATUS_CANCELLED: 790 case MIGRATION_STATUS_FAILED: 791 bytes_transferred = 0; 792 ret = vfio_migration_set_state(vbasedev, 793 ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING), 794 VFIO_DEVICE_STATE_RUNNING); 795 if (ret) { 796 error_report("%s: Failed to set state RUNNING", vbasedev->name); 797 } 798 } 799 } 800 801 static void vfio_migration_exit(VFIODevice *vbasedev) 802 { 803 VFIOMigration *migration = vbasedev->migration; 804 805 vfio_region_exit(&migration->region); 806 vfio_region_finalize(&migration->region); 807 g_free(vbasedev->migration); 808 vbasedev->migration = NULL; 809 } 810 811 static int vfio_migration_init(VFIODevice *vbasedev, 812 struct vfio_region_info *info) 813 { 814 int ret; 815 Object *obj; 816 VFIOMigration *migration; 817 char id[256] = ""; 818 g_autofree char *path = NULL, *oid = NULL; 819 820 if (!vbasedev->ops->vfio_get_object) { 821 return -EINVAL; 822 } 823 824 obj = vbasedev->ops->vfio_get_object(vbasedev); 825 if (!obj) { 826 return -EINVAL; 827 } 828 829 vbasedev->migration = g_new0(VFIOMigration, 1); 830 831 ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region, 832 info->index, "migration"); 833 if (ret) { 834 error_report("%s: Failed to setup VFIO migration region %d: %s", 835 vbasedev->name, info->index, strerror(-ret)); 836 goto err; 837 } 838 839 if (!vbasedev->migration->region.size) { 840 error_report("%s: Invalid zero-sized VFIO migration region %d", 841 vbasedev->name, info->index); 842 ret = -EINVAL; 843 goto err; 844 } 845 846 migration = vbasedev->migration; 847 migration->vbasedev = vbasedev; 848 849 oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj))); 850 if (oid) { 851 path = g_strdup_printf("%s/vfio", oid); 852 } else { 853 path = g_strdup("vfio"); 854 } 855 strpadcpy(id, sizeof(id), path, '\0'); 856 857 register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, 858 vbasedev); 859 860 migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, 861 vbasedev); 862 migration->migration_state.notify = vfio_migration_state_notifier; 863 add_migration_state_change_notifier(&migration->migration_state); 864 return 0; 865 866 err: 867 vfio_migration_exit(vbasedev); 868 return ret; 869 } 870 871 /* ---------------------------------------------------------------------- */ 872 873 int64_t vfio_mig_bytes_transferred(void) 874 { 875 return bytes_transferred; 876 } 877 878 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) 879 { 880 VFIOContainer *container = vbasedev->group->container; 881 struct vfio_region_info *info = NULL; 882 Error *local_err = NULL; 883 int ret = -ENOTSUP; 884 885 if (!vbasedev->enable_migration || !container->dirty_pages_supported) { 886 goto add_blocker; 887 } 888 889 ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION, 890 VFIO_REGION_SUBTYPE_MIGRATION, &info); 891 if (ret) { 892 goto add_blocker; 893 } 894 895 ret = vfio_migration_init(vbasedev, info); 896 if (ret) { 897 goto add_blocker; 898 } 899 900 trace_vfio_migration_probe(vbasedev->name, info->index); 901 g_free(info); 902 return 0; 903 904 add_blocker: 905 error_setg(&vbasedev->migration_blocker, 906 "VFIO device doesn't support migration"); 907 g_free(info); 908 909 ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err); 910 if (local_err) { 911 error_propagate(errp, local_err); 912 error_free(vbasedev->migration_blocker); 913 vbasedev->migration_blocker = NULL; 914 } 915 return ret; 916 } 917 918 void vfio_migration_finalize(VFIODevice *vbasedev) 919 { 920 if (vbasedev->migration) { 921 VFIOMigration *migration = vbasedev->migration; 922 923 remove_migration_state_change_notifier(&migration->migration_state); 924 qemu_del_vm_change_state_handler(migration->vm_state); 925 vfio_migration_exit(vbasedev); 926 } 927 928 if (vbasedev->migration_blocker) { 929 migrate_del_blocker(vbasedev->migration_blocker); 930 error_free(vbasedev->migration_blocker); 931 vbasedev->migration_blocker = NULL; 932 } 933 } 934