1 /* 2 * Migration support for VFIO devices 3 * 4 * Copyright NVIDIA, Inc. 2020 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * the COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/main-loop.h" 12 #include "qemu/cutils.h" 13 #include "qemu/units.h" 14 #include "qemu/error-report.h" 15 #include <linux/vfio.h> 16 #include <sys/ioctl.h> 17 18 #include "sysemu/runstate.h" 19 #include "hw/vfio/vfio-common.h" 20 #include "migration/misc.h" 21 #include "migration/savevm.h" 22 #include "migration/vmstate.h" 23 #include "migration/qemu-file.h" 24 #include "migration/register.h" 25 #include "migration/blocker.h" 26 #include "qapi/error.h" 27 #include "exec/ramlist.h" 28 #include "exec/ram_addr.h" 29 #include "pci.h" 30 #include "trace.h" 31 #include "hw/hw.h" 32 33 /* 34 * Flags to be used as unique delimiters for VFIO devices in the migration 35 * stream. These flags are composed as: 36 * 0xffffffff => MSB 32-bit all 1s 37 * 0xef10 => Magic ID, represents emulated (virtual) function IO 38 * 0x0000 => 16-bits reserved for flags 39 * 40 * The beginning of state information is marked by _DEV_CONFIG_STATE, 41 * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a 42 * certain state information is marked by _END_OF_STATE. 43 */ 44 #define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) 45 #define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) 46 #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) 47 #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) 48 #define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) 49 50 /* 51 * This is an arbitrary size based on migration of mlx5 devices, where typically 52 * total device migration size is on the order of 100s of MB. Testing with 53 * larger values, e.g. 128MB and 1GB, did not show a performance improvement. 54 */ 55 #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB) 56 57 static int64_t bytes_transferred; 58 59 static const char *mig_state_to_str(enum vfio_device_mig_state state) 60 { 61 switch (state) { 62 case VFIO_DEVICE_STATE_ERROR: 63 return "ERROR"; 64 case VFIO_DEVICE_STATE_STOP: 65 return "STOP"; 66 case VFIO_DEVICE_STATE_RUNNING: 67 return "RUNNING"; 68 case VFIO_DEVICE_STATE_STOP_COPY: 69 return "STOP_COPY"; 70 case VFIO_DEVICE_STATE_RESUMING: 71 return "RESUMING"; 72 case VFIO_DEVICE_STATE_RUNNING_P2P: 73 return "RUNNING_P2P"; 74 case VFIO_DEVICE_STATE_PRE_COPY: 75 return "PRE_COPY"; 76 case VFIO_DEVICE_STATE_PRE_COPY_P2P: 77 return "PRE_COPY_P2P"; 78 default: 79 return "UNKNOWN STATE"; 80 } 81 } 82 83 static int vfio_migration_set_state(VFIODevice *vbasedev, 84 enum vfio_device_mig_state new_state, 85 enum vfio_device_mig_state recover_state) 86 { 87 VFIOMigration *migration = vbasedev->migration; 88 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + 89 sizeof(struct vfio_device_feature_mig_state), 90 sizeof(uint64_t))] = {}; 91 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; 92 struct vfio_device_feature_mig_state *mig_state = 93 (struct vfio_device_feature_mig_state *)feature->data; 94 int ret; 95 96 feature->argsz = sizeof(buf); 97 feature->flags = 98 VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE; 99 mig_state->device_state = new_state; 100 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { 101 /* Try to set the device in some good state */ 102 ret = -errno; 103 104 if (recover_state == VFIO_DEVICE_STATE_ERROR) { 105 error_report("%s: Failed setting device state to %s, err: %s. " 106 "Recover state is ERROR. Resetting device", 107 vbasedev->name, mig_state_to_str(new_state), 108 strerror(errno)); 109 110 goto reset_device; 111 } 112 113 error_report( 114 "%s: Failed setting device state to %s, err: %s. Setting device in recover state %s", 115 vbasedev->name, mig_state_to_str(new_state), 116 strerror(errno), mig_state_to_str(recover_state)); 117 118 mig_state->device_state = recover_state; 119 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { 120 ret = -errno; 121 error_report( 122 "%s: Failed setting device in recover state, err: %s. Resetting device", 123 vbasedev->name, strerror(errno)); 124 125 goto reset_device; 126 } 127 128 migration->device_state = recover_state; 129 130 return ret; 131 } 132 133 migration->device_state = new_state; 134 if (mig_state->data_fd != -1) { 135 if (migration->data_fd != -1) { 136 /* 137 * This can happen if the device is asynchronously reset and 138 * terminates a data transfer. 139 */ 140 error_report("%s: data_fd out of sync", vbasedev->name); 141 close(mig_state->data_fd); 142 143 return -EBADF; 144 } 145 146 migration->data_fd = mig_state->data_fd; 147 } 148 149 trace_vfio_migration_set_state(vbasedev->name, mig_state_to_str(new_state)); 150 151 return 0; 152 153 reset_device: 154 if (ioctl(vbasedev->fd, VFIO_DEVICE_RESET)) { 155 hw_error("%s: Failed resetting device, err: %s", vbasedev->name, 156 strerror(errno)); 157 } 158 159 migration->device_state = VFIO_DEVICE_STATE_RUNNING; 160 161 return ret; 162 } 163 164 /* 165 * Some device state transitions require resetting the device if they fail. 166 * This function sets the device in new_state and resets the device if that 167 * fails. Reset is done by using ERROR as the recover state. 168 */ 169 static int 170 vfio_migration_set_state_or_reset(VFIODevice *vbasedev, 171 enum vfio_device_mig_state new_state) 172 { 173 return vfio_migration_set_state(vbasedev, new_state, 174 VFIO_DEVICE_STATE_ERROR); 175 } 176 177 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, 178 uint64_t data_size) 179 { 180 VFIOMigration *migration = vbasedev->migration; 181 int ret; 182 183 ret = qemu_file_get_to_fd(f, migration->data_fd, data_size); 184 trace_vfio_load_state_device_data(vbasedev->name, data_size, ret); 185 186 return ret; 187 } 188 189 static int vfio_save_device_config_state(QEMUFile *f, void *opaque) 190 { 191 VFIODevice *vbasedev = opaque; 192 193 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE); 194 195 if (vbasedev->ops && vbasedev->ops->vfio_save_config) { 196 vbasedev->ops->vfio_save_config(vbasedev, f); 197 } 198 199 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); 200 201 trace_vfio_save_device_config_state(vbasedev->name); 202 203 return qemu_file_get_error(f); 204 } 205 206 static int vfio_load_device_config_state(QEMUFile *f, void *opaque) 207 { 208 VFIODevice *vbasedev = opaque; 209 uint64_t data; 210 211 if (vbasedev->ops && vbasedev->ops->vfio_load_config) { 212 int ret; 213 214 ret = vbasedev->ops->vfio_load_config(vbasedev, f); 215 if (ret) { 216 error_report("%s: Failed to load device config space", 217 vbasedev->name); 218 return ret; 219 } 220 } 221 222 data = qemu_get_be64(f); 223 if (data != VFIO_MIG_FLAG_END_OF_STATE) { 224 error_report("%s: Failed loading device config space, " 225 "end flag incorrect 0x%"PRIx64, vbasedev->name, data); 226 return -EINVAL; 227 } 228 229 trace_vfio_load_device_config_state(vbasedev->name); 230 return qemu_file_get_error(f); 231 } 232 233 static void vfio_migration_cleanup(VFIODevice *vbasedev) 234 { 235 VFIOMigration *migration = vbasedev->migration; 236 237 close(migration->data_fd); 238 migration->data_fd = -1; 239 } 240 241 static int vfio_query_stop_copy_size(VFIODevice *vbasedev, 242 uint64_t *stop_copy_size) 243 { 244 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + 245 sizeof(struct vfio_device_feature_mig_data_size), 246 sizeof(uint64_t))] = {}; 247 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; 248 struct vfio_device_feature_mig_data_size *mig_data_size = 249 (struct vfio_device_feature_mig_data_size *)feature->data; 250 251 feature->argsz = sizeof(buf); 252 feature->flags = 253 VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIG_DATA_SIZE; 254 255 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { 256 return -errno; 257 } 258 259 *stop_copy_size = mig_data_size->stop_copy_length; 260 261 return 0; 262 } 263 264 static int vfio_query_precopy_size(VFIOMigration *migration) 265 { 266 struct vfio_precopy_info precopy = { 267 .argsz = sizeof(precopy), 268 }; 269 270 migration->precopy_init_size = 0; 271 migration->precopy_dirty_size = 0; 272 273 if (ioctl(migration->data_fd, VFIO_MIG_GET_PRECOPY_INFO, &precopy)) { 274 return -errno; 275 } 276 277 migration->precopy_init_size = precopy.initial_bytes; 278 migration->precopy_dirty_size = precopy.dirty_bytes; 279 280 return 0; 281 } 282 283 /* Returns the size of saved data on success and -errno on error */ 284 static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) 285 { 286 ssize_t data_size; 287 288 data_size = read(migration->data_fd, migration->data_buffer, 289 migration->data_buffer_size); 290 if (data_size < 0) { 291 /* 292 * Pre-copy emptied all the device state for now. For more information, 293 * please refer to the Linux kernel VFIO uAPI. 294 */ 295 if (errno == ENOMSG) { 296 return 0; 297 } 298 299 return -errno; 300 } 301 if (data_size == 0) { 302 return 0; 303 } 304 305 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); 306 qemu_put_be64(f, data_size); 307 qemu_put_buffer(f, migration->data_buffer, data_size); 308 bytes_transferred += data_size; 309 310 trace_vfio_save_block(migration->vbasedev->name, data_size); 311 312 return qemu_file_get_error(f) ?: data_size; 313 } 314 315 static void vfio_update_estimated_pending_data(VFIOMigration *migration, 316 uint64_t data_size) 317 { 318 if (!data_size) { 319 /* 320 * Pre-copy emptied all the device state for now, update estimated sizes 321 * accordingly. 322 */ 323 migration->precopy_init_size = 0; 324 migration->precopy_dirty_size = 0; 325 326 return; 327 } 328 329 if (migration->precopy_init_size) { 330 uint64_t init_size = MIN(migration->precopy_init_size, data_size); 331 332 migration->precopy_init_size -= init_size; 333 data_size -= init_size; 334 } 335 336 migration->precopy_dirty_size -= MIN(migration->precopy_dirty_size, 337 data_size); 338 } 339 340 static bool vfio_precopy_supported(VFIODevice *vbasedev) 341 { 342 VFIOMigration *migration = vbasedev->migration; 343 344 return migration->mig_flags & VFIO_MIGRATION_PRE_COPY; 345 } 346 347 /* ---------------------------------------------------------------------- */ 348 349 static int vfio_save_prepare(void *opaque, Error **errp) 350 { 351 VFIODevice *vbasedev = opaque; 352 353 /* 354 * Snapshot doesn't use postcopy nor background snapshot, so allow snapshot 355 * even if they are on. 356 */ 357 if (runstate_check(RUN_STATE_SAVE_VM)) { 358 return 0; 359 } 360 361 if (migrate_postcopy_ram()) { 362 error_setg( 363 errp, "%s: VFIO migration is not supported with postcopy migration", 364 vbasedev->name); 365 return -EOPNOTSUPP; 366 } 367 368 if (migrate_background_snapshot()) { 369 error_setg( 370 errp, 371 "%s: VFIO migration is not supported with background snapshot", 372 vbasedev->name); 373 return -EOPNOTSUPP; 374 } 375 376 return 0; 377 } 378 379 static int vfio_save_setup(QEMUFile *f, void *opaque) 380 { 381 VFIODevice *vbasedev = opaque; 382 VFIOMigration *migration = vbasedev->migration; 383 uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE; 384 385 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); 386 387 vfio_query_stop_copy_size(vbasedev, &stop_copy_size); 388 migration->data_buffer_size = MIN(VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE, 389 stop_copy_size); 390 migration->data_buffer = g_try_malloc0(migration->data_buffer_size); 391 if (!migration->data_buffer) { 392 error_report("%s: Failed to allocate migration data buffer", 393 vbasedev->name); 394 return -ENOMEM; 395 } 396 397 if (vfio_precopy_supported(vbasedev)) { 398 int ret; 399 400 switch (migration->device_state) { 401 case VFIO_DEVICE_STATE_RUNNING: 402 ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY, 403 VFIO_DEVICE_STATE_RUNNING); 404 if (ret) { 405 return ret; 406 } 407 408 vfio_query_precopy_size(migration); 409 410 break; 411 case VFIO_DEVICE_STATE_STOP: 412 /* vfio_save_complete_precopy() will go to STOP_COPY */ 413 break; 414 default: 415 return -EINVAL; 416 } 417 } 418 419 trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size); 420 421 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); 422 423 return qemu_file_get_error(f); 424 } 425 426 static void vfio_save_cleanup(void *opaque) 427 { 428 VFIODevice *vbasedev = opaque; 429 VFIOMigration *migration = vbasedev->migration; 430 431 /* 432 * Changing device state from STOP_COPY to STOP can take time. Do it here, 433 * after migration has completed, so it won't increase downtime. 434 */ 435 if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) { 436 vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP); 437 } 438 439 g_free(migration->data_buffer); 440 migration->data_buffer = NULL; 441 migration->precopy_init_size = 0; 442 migration->precopy_dirty_size = 0; 443 migration->initial_data_sent = false; 444 vfio_migration_cleanup(vbasedev); 445 trace_vfio_save_cleanup(vbasedev->name); 446 } 447 448 static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy, 449 uint64_t *can_postcopy) 450 { 451 VFIODevice *vbasedev = opaque; 452 VFIOMigration *migration = vbasedev->migration; 453 454 if (!vfio_device_state_is_precopy(vbasedev)) { 455 return; 456 } 457 458 *must_precopy += 459 migration->precopy_init_size + migration->precopy_dirty_size; 460 461 trace_vfio_state_pending_estimate(vbasedev->name, *must_precopy, 462 *can_postcopy, 463 migration->precopy_init_size, 464 migration->precopy_dirty_size); 465 } 466 467 /* 468 * Migration size of VFIO devices can be as little as a few KBs or as big as 469 * many GBs. This value should be big enough to cover the worst case. 470 */ 471 #define VFIO_MIG_STOP_COPY_SIZE (100 * GiB) 472 473 static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, 474 uint64_t *can_postcopy) 475 { 476 VFIODevice *vbasedev = opaque; 477 VFIOMigration *migration = vbasedev->migration; 478 uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE; 479 480 /* 481 * If getting pending migration size fails, VFIO_MIG_STOP_COPY_SIZE is 482 * reported so downtime limit won't be violated. 483 */ 484 vfio_query_stop_copy_size(vbasedev, &stop_copy_size); 485 *must_precopy += stop_copy_size; 486 487 if (vfio_device_state_is_precopy(vbasedev)) { 488 vfio_query_precopy_size(migration); 489 490 *must_precopy += 491 migration->precopy_init_size + migration->precopy_dirty_size; 492 } 493 494 trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy, 495 stop_copy_size, migration->precopy_init_size, 496 migration->precopy_dirty_size); 497 } 498 499 static bool vfio_is_active_iterate(void *opaque) 500 { 501 VFIODevice *vbasedev = opaque; 502 503 return vfio_device_state_is_precopy(vbasedev); 504 } 505 506 /* 507 * Note about migration rate limiting: VFIO migration buffer size is currently 508 * limited to 1MB, so there is no need to check if migration rate exceeded (as 509 * in the worst case it will exceed by 1MB). However, if the buffer size is 510 * later changed to a bigger value, migration rate should be enforced here. 511 */ 512 static int vfio_save_iterate(QEMUFile *f, void *opaque) 513 { 514 VFIODevice *vbasedev = opaque; 515 VFIOMigration *migration = vbasedev->migration; 516 ssize_t data_size; 517 518 data_size = vfio_save_block(f, migration); 519 if (data_size < 0) { 520 return data_size; 521 } 522 523 vfio_update_estimated_pending_data(migration, data_size); 524 525 if (migrate_switchover_ack() && !migration->precopy_init_size && 526 !migration->initial_data_sent) { 527 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT); 528 migration->initial_data_sent = true; 529 } else { 530 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); 531 } 532 533 trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, 534 migration->precopy_dirty_size); 535 536 return !migration->precopy_init_size && !migration->precopy_dirty_size; 537 } 538 539 static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) 540 { 541 VFIODevice *vbasedev = opaque; 542 ssize_t data_size; 543 int ret; 544 545 /* We reach here with device state STOP or STOP_COPY only */ 546 ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, 547 VFIO_DEVICE_STATE_STOP); 548 if (ret) { 549 return ret; 550 } 551 552 do { 553 data_size = vfio_save_block(f, vbasedev->migration); 554 if (data_size < 0) { 555 return data_size; 556 } 557 } while (data_size); 558 559 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); 560 ret = qemu_file_get_error(f); 561 if (ret) { 562 return ret; 563 } 564 565 trace_vfio_save_complete_precopy(vbasedev->name, ret); 566 567 return ret; 568 } 569 570 static void vfio_save_state(QEMUFile *f, void *opaque) 571 { 572 VFIODevice *vbasedev = opaque; 573 int ret; 574 575 ret = vfio_save_device_config_state(f, opaque); 576 if (ret) { 577 error_report("%s: Failed to save device config space", 578 vbasedev->name); 579 qemu_file_set_error(f, ret); 580 } 581 } 582 583 static int vfio_load_setup(QEMUFile *f, void *opaque) 584 { 585 VFIODevice *vbasedev = opaque; 586 587 return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING, 588 vbasedev->migration->device_state); 589 } 590 591 static int vfio_load_cleanup(void *opaque) 592 { 593 VFIODevice *vbasedev = opaque; 594 595 vfio_migration_cleanup(vbasedev); 596 trace_vfio_load_cleanup(vbasedev->name); 597 598 return 0; 599 } 600 601 static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) 602 { 603 VFIODevice *vbasedev = opaque; 604 int ret = 0; 605 uint64_t data; 606 607 data = qemu_get_be64(f); 608 while (data != VFIO_MIG_FLAG_END_OF_STATE) { 609 610 trace_vfio_load_state(vbasedev->name, data); 611 612 switch (data) { 613 case VFIO_MIG_FLAG_DEV_CONFIG_STATE: 614 { 615 return vfio_load_device_config_state(f, opaque); 616 } 617 case VFIO_MIG_FLAG_DEV_SETUP_STATE: 618 { 619 data = qemu_get_be64(f); 620 if (data == VFIO_MIG_FLAG_END_OF_STATE) { 621 return ret; 622 } else { 623 error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64, 624 vbasedev->name, data); 625 return -EINVAL; 626 } 627 break; 628 } 629 case VFIO_MIG_FLAG_DEV_DATA_STATE: 630 { 631 uint64_t data_size = qemu_get_be64(f); 632 633 if (data_size) { 634 ret = vfio_load_buffer(f, vbasedev, data_size); 635 if (ret < 0) { 636 return ret; 637 } 638 } 639 break; 640 } 641 case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT: 642 { 643 if (!vfio_precopy_supported(vbasedev) || 644 !migrate_switchover_ack()) { 645 error_report("%s: Received INIT_DATA_SENT but switchover ack " 646 "is not used", vbasedev->name); 647 return -EINVAL; 648 } 649 650 ret = qemu_loadvm_approve_switchover(); 651 if (ret) { 652 error_report( 653 "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)", 654 vbasedev->name, ret, strerror(-ret)); 655 } 656 657 return ret; 658 } 659 default: 660 error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); 661 return -EINVAL; 662 } 663 664 data = qemu_get_be64(f); 665 ret = qemu_file_get_error(f); 666 if (ret) { 667 return ret; 668 } 669 } 670 return ret; 671 } 672 673 static bool vfio_switchover_ack_needed(void *opaque) 674 { 675 VFIODevice *vbasedev = opaque; 676 677 return vfio_precopy_supported(vbasedev); 678 } 679 680 static const SaveVMHandlers savevm_vfio_handlers = { 681 .save_prepare = vfio_save_prepare, 682 .save_setup = vfio_save_setup, 683 .save_cleanup = vfio_save_cleanup, 684 .state_pending_estimate = vfio_state_pending_estimate, 685 .state_pending_exact = vfio_state_pending_exact, 686 .is_active_iterate = vfio_is_active_iterate, 687 .save_live_iterate = vfio_save_iterate, 688 .save_live_complete_precopy = vfio_save_complete_precopy, 689 .save_state = vfio_save_state, 690 .load_setup = vfio_load_setup, 691 .load_cleanup = vfio_load_cleanup, 692 .load_state = vfio_load_state, 693 .switchover_ack_needed = vfio_switchover_ack_needed, 694 }; 695 696 /* ---------------------------------------------------------------------- */ 697 698 static void vfio_vmstate_change_prepare(void *opaque, bool running, 699 RunState state) 700 { 701 VFIODevice *vbasedev = opaque; 702 VFIOMigration *migration = vbasedev->migration; 703 enum vfio_device_mig_state new_state; 704 int ret; 705 706 new_state = migration->device_state == VFIO_DEVICE_STATE_PRE_COPY ? 707 VFIO_DEVICE_STATE_PRE_COPY_P2P : 708 VFIO_DEVICE_STATE_RUNNING_P2P; 709 710 ret = vfio_migration_set_state_or_reset(vbasedev, new_state); 711 if (ret) { 712 /* 713 * Migration should be aborted in this case, but vm_state_notify() 714 * currently does not support reporting failures. 715 */ 716 migration_file_set_error(ret); 717 } 718 719 trace_vfio_vmstate_change_prepare(vbasedev->name, running, 720 RunState_str(state), 721 mig_state_to_str(new_state)); 722 } 723 724 static void vfio_vmstate_change(void *opaque, bool running, RunState state) 725 { 726 VFIODevice *vbasedev = opaque; 727 enum vfio_device_mig_state new_state; 728 int ret; 729 730 if (running) { 731 new_state = VFIO_DEVICE_STATE_RUNNING; 732 } else { 733 new_state = 734 (vfio_device_state_is_precopy(vbasedev) && 735 (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ? 736 VFIO_DEVICE_STATE_STOP_COPY : 737 VFIO_DEVICE_STATE_STOP; 738 } 739 740 ret = vfio_migration_set_state_or_reset(vbasedev, new_state); 741 if (ret) { 742 /* 743 * Migration should be aborted in this case, but vm_state_notify() 744 * currently does not support reporting failures. 745 */ 746 migration_file_set_error(ret); 747 } 748 749 trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), 750 mig_state_to_str(new_state)); 751 } 752 753 static int vfio_migration_state_notifier(NotifierWithReturn *notifier, 754 MigrationEvent *e, Error **errp) 755 { 756 VFIOMigration *migration = container_of(notifier, VFIOMigration, 757 migration_state); 758 VFIODevice *vbasedev = migration->vbasedev; 759 760 trace_vfio_migration_state_notifier(vbasedev->name, e->type); 761 762 if (e->type == MIG_EVENT_PRECOPY_FAILED) { 763 vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING); 764 } 765 return 0; 766 } 767 768 static void vfio_migration_free(VFIODevice *vbasedev) 769 { 770 g_free(vbasedev->migration); 771 vbasedev->migration = NULL; 772 } 773 774 static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags) 775 { 776 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + 777 sizeof(struct vfio_device_feature_migration), 778 sizeof(uint64_t))] = {}; 779 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; 780 struct vfio_device_feature_migration *mig = 781 (struct vfio_device_feature_migration *)feature->data; 782 783 feature->argsz = sizeof(buf); 784 feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION; 785 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { 786 return -errno; 787 } 788 789 *mig_flags = mig->flags; 790 791 return 0; 792 } 793 794 static bool vfio_dma_logging_supported(VFIODevice *vbasedev) 795 { 796 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), 797 sizeof(uint64_t))] = {}; 798 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; 799 800 feature->argsz = sizeof(buf); 801 feature->flags = VFIO_DEVICE_FEATURE_PROBE | 802 VFIO_DEVICE_FEATURE_DMA_LOGGING_START; 803 804 return !ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); 805 } 806 807 static int vfio_migration_init(VFIODevice *vbasedev) 808 { 809 int ret; 810 Object *obj; 811 VFIOMigration *migration; 812 char id[256] = ""; 813 g_autofree char *path = NULL, *oid = NULL; 814 uint64_t mig_flags = 0; 815 VMChangeStateHandler *prepare_cb; 816 817 if (!vbasedev->ops->vfio_get_object) { 818 return -EINVAL; 819 } 820 821 obj = vbasedev->ops->vfio_get_object(vbasedev); 822 if (!obj) { 823 return -EINVAL; 824 } 825 826 ret = vfio_migration_query_flags(vbasedev, &mig_flags); 827 if (ret) { 828 return ret; 829 } 830 831 /* Basic migration functionality must be supported */ 832 if (!(mig_flags & VFIO_MIGRATION_STOP_COPY)) { 833 return -EOPNOTSUPP; 834 } 835 836 vbasedev->migration = g_new0(VFIOMigration, 1); 837 migration = vbasedev->migration; 838 migration->vbasedev = vbasedev; 839 migration->device_state = VFIO_DEVICE_STATE_RUNNING; 840 migration->data_fd = -1; 841 migration->mig_flags = mig_flags; 842 843 vbasedev->dirty_pages_supported = vfio_dma_logging_supported(vbasedev); 844 845 oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj))); 846 if (oid) { 847 path = g_strdup_printf("%s/vfio", oid); 848 } else { 849 path = g_strdup("vfio"); 850 } 851 strpadcpy(id, sizeof(id), path, '\0'); 852 853 register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, 854 vbasedev); 855 856 prepare_cb = migration->mig_flags & VFIO_MIGRATION_P2P ? 857 vfio_vmstate_change_prepare : 858 NULL; 859 migration->vm_state = qdev_add_vm_change_state_handler_full( 860 vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev); 861 migration_add_notifier(&migration->migration_state, 862 vfio_migration_state_notifier); 863 864 return 0; 865 } 866 867 static void vfio_migration_deinit(VFIODevice *vbasedev) 868 { 869 VFIOMigration *migration = vbasedev->migration; 870 871 migration_remove_notifier(&migration->migration_state); 872 qemu_del_vm_change_state_handler(migration->vm_state); 873 unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); 874 vfio_migration_free(vbasedev); 875 vfio_unblock_multiple_devices_migration(); 876 } 877 878 static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) 879 { 880 if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { 881 error_propagate(errp, err); 882 return -EINVAL; 883 } 884 885 vbasedev->migration_blocker = error_copy(err); 886 error_free(err); 887 888 return migrate_add_blocker_normal(&vbasedev->migration_blocker, errp); 889 } 890 891 /* ---------------------------------------------------------------------- */ 892 893 int64_t vfio_mig_bytes_transferred(void) 894 { 895 return bytes_transferred; 896 } 897 898 void vfio_reset_bytes_transferred(void) 899 { 900 bytes_transferred = 0; 901 } 902 903 /* 904 * Return true when either migration initialized or blocker registered. 905 * Currently only return false when adding blocker fails which will 906 * de-register vfio device. 907 */ 908 bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) 909 { 910 Error *err = NULL; 911 int ret; 912 913 if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { 914 error_setg(&err, "%s: Migration is disabled for VFIO device", 915 vbasedev->name); 916 return !vfio_block_migration(vbasedev, err, errp); 917 } 918 919 ret = vfio_migration_init(vbasedev); 920 if (ret) { 921 if (ret == -ENOTTY) { 922 error_setg(&err, "%s: VFIO migration is not supported in kernel", 923 vbasedev->name); 924 } else { 925 error_setg(&err, 926 "%s: Migration couldn't be initialized for VFIO device, " 927 "err: %d (%s)", 928 vbasedev->name, ret, strerror(-ret)); 929 } 930 931 return !vfio_block_migration(vbasedev, err, errp); 932 } 933 934 if (!vbasedev->dirty_pages_supported) { 935 if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { 936 error_setg(&err, 937 "%s: VFIO device doesn't support device dirty tracking", 938 vbasedev->name); 939 goto add_blocker; 940 } 941 942 warn_report("%s: VFIO device doesn't support device dirty tracking", 943 vbasedev->name); 944 } 945 946 ret = vfio_block_multiple_devices_migration(vbasedev, errp); 947 if (ret) { 948 goto out_deinit; 949 } 950 951 if (vfio_viommu_preset(vbasedev)) { 952 error_setg(&err, "%s: Migration is currently not supported " 953 "with vIOMMU enabled", vbasedev->name); 954 goto add_blocker; 955 } 956 957 trace_vfio_migration_realize(vbasedev->name); 958 return true; 959 960 add_blocker: 961 ret = vfio_block_migration(vbasedev, err, errp); 962 out_deinit: 963 if (ret) { 964 vfio_migration_deinit(vbasedev); 965 } 966 return !ret; 967 } 968 969 void vfio_migration_exit(VFIODevice *vbasedev) 970 { 971 if (vbasedev->migration) { 972 vfio_migration_deinit(vbasedev); 973 } 974 975 migrate_del_blocker(&vbasedev->migration_blocker); 976 } 977