xref: /openbmc/qemu/hw/vfio/migration.c (revision 57b3a7d8)
1 /*
2  * Migration support for VFIO devices
3  *
4  * Copyright NVIDIA, Inc. 2020
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2. See
7  * the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/cutils.h"
13 #include <linux/vfio.h>
14 #include <sys/ioctl.h>
15 
16 #include "sysemu/runstate.h"
17 #include "hw/vfio/vfio-common.h"
18 #include "cpu.h"
19 #include "migration/migration.h"
20 #include "migration/vmstate.h"
21 #include "migration/qemu-file.h"
22 #include "migration/register.h"
23 #include "migration/blocker.h"
24 #include "migration/misc.h"
25 #include "qapi/error.h"
26 #include "exec/ramlist.h"
27 #include "exec/ram_addr.h"
28 #include "pci.h"
29 #include "trace.h"
30 #include "hw/hw.h"
31 
32 /*
33  * Flags to be used as unique delimiters for VFIO devices in the migration
34  * stream. These flags are composed as:
35  * 0xffffffff => MSB 32-bit all 1s
36  * 0xef10     => Magic ID, represents emulated (virtual) function IO
37  * 0x0000     => 16-bits reserved for flags
38  *
39  * The beginning of state information is marked by _DEV_CONFIG_STATE,
40  * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
41  * certain state information is marked by _END_OF_STATE.
42  */
43 #define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
44 #define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
45 #define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
46 #define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
47 
48 static int64_t bytes_transferred;
49 
50 static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
51                                   off_t off, bool iswrite)
52 {
53     int ret;
54 
55     ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
56                     pread(vbasedev->fd, val, count, off);
57     if (ret < count) {
58         error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
59                      HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
60                      vbasedev->name, off, strerror(errno));
61         return (ret < 0) ? ret : -EINVAL;
62     }
63     return 0;
64 }
65 
66 static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
67                        off_t off, bool iswrite)
68 {
69     int ret, done = 0;
70     __u8 *tbuf = buf;
71 
72     while (count) {
73         int bytes = 0;
74 
75         if (count >= 8 && !(off % 8)) {
76             bytes = 8;
77         } else if (count >= 4 && !(off % 4)) {
78             bytes = 4;
79         } else if (count >= 2 && !(off % 2)) {
80             bytes = 2;
81         } else {
82             bytes = 1;
83         }
84 
85         ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
86         if (ret) {
87             return ret;
88         }
89 
90         count -= bytes;
91         done += bytes;
92         off += bytes;
93         tbuf += bytes;
94     }
95     return done;
96 }
97 
98 #define vfio_mig_read(f, v, c, o)       vfio_mig_rw(f, (__u8 *)v, c, o, false)
99 #define vfio_mig_write(f, v, c, o)      vfio_mig_rw(f, (__u8 *)v, c, o, true)
100 
101 #define VFIO_MIG_STRUCT_OFFSET(f)       \
102                                  offsetof(struct vfio_device_migration_info, f)
103 /*
104  * Change the device_state register for device @vbasedev. Bits set in @mask
105  * are preserved, bits set in @value are set, and bits not set in either @mask
106  * or @value are cleared in device_state. If the register cannot be accessed,
107  * the resulting state would be invalid, or the device enters an error state,
108  * an error is returned.
109  */
110 
111 static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
112                                     uint32_t value)
113 {
114     VFIOMigration *migration = vbasedev->migration;
115     VFIORegion *region = &migration->region;
116     off_t dev_state_off = region->fd_offset +
117                           VFIO_MIG_STRUCT_OFFSET(device_state);
118     uint32_t device_state;
119     int ret;
120 
121     ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
122                         dev_state_off);
123     if (ret < 0) {
124         return ret;
125     }
126 
127     device_state = (device_state & mask) | value;
128 
129     if (!VFIO_DEVICE_STATE_VALID(device_state)) {
130         return -EINVAL;
131     }
132 
133     ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
134                          dev_state_off);
135     if (ret < 0) {
136         int rret;
137 
138         rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
139                              dev_state_off);
140 
141         if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
142             hw_error("%s: Device in error state 0x%x", vbasedev->name,
143                      device_state);
144             return rret ? rret : -EIO;
145         }
146         return ret;
147     }
148 
149     migration->device_state = device_state;
150     trace_vfio_migration_set_state(vbasedev->name, device_state);
151     return 0;
152 }
153 
154 static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
155                                    uint64_t data_size, uint64_t *size)
156 {
157     void *ptr = NULL;
158     uint64_t limit = 0;
159     int i;
160 
161     if (!region->mmaps) {
162         if (size) {
163             *size = MIN(data_size, region->size - data_offset);
164         }
165         return ptr;
166     }
167 
168     for (i = 0; i < region->nr_mmaps; i++) {
169         VFIOMmap *map = region->mmaps + i;
170 
171         if ((data_offset >= map->offset) &&
172             (data_offset < map->offset + map->size)) {
173 
174             /* check if data_offset is within sparse mmap areas */
175             ptr = map->mmap + data_offset - map->offset;
176             if (size) {
177                 *size = MIN(data_size, map->offset + map->size - data_offset);
178             }
179             break;
180         } else if ((data_offset < map->offset) &&
181                    (!limit || limit > map->offset)) {
182             /*
183              * data_offset is not within sparse mmap areas, find size of
184              * non-mapped area. Check through all list since region->mmaps list
185              * is not sorted.
186              */
187             limit = map->offset;
188         }
189     }
190 
191     if (!ptr && size) {
192         *size = limit ? MIN(data_size, limit - data_offset) : data_size;
193     }
194     return ptr;
195 }
196 
197 static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
198 {
199     VFIOMigration *migration = vbasedev->migration;
200     VFIORegion *region = &migration->region;
201     uint64_t data_offset = 0, data_size = 0, sz;
202     int ret;
203 
204     ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
205                       region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
206     if (ret < 0) {
207         return ret;
208     }
209 
210     ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
211                         region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
212     if (ret < 0) {
213         return ret;
214     }
215 
216     trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
217                            migration->pending_bytes);
218 
219     qemu_put_be64(f, data_size);
220     sz = data_size;
221 
222     while (sz) {
223         void *buf;
224         uint64_t sec_size;
225         bool buf_allocated = false;
226 
227         buf = get_data_section_size(region, data_offset, sz, &sec_size);
228 
229         if (!buf) {
230             buf = g_try_malloc(sec_size);
231             if (!buf) {
232                 error_report("%s: Error allocating buffer ", __func__);
233                 return -ENOMEM;
234             }
235             buf_allocated = true;
236 
237             ret = vfio_mig_read(vbasedev, buf, sec_size,
238                                 region->fd_offset + data_offset);
239             if (ret < 0) {
240                 g_free(buf);
241                 return ret;
242             }
243         }
244 
245         qemu_put_buffer(f, buf, sec_size);
246 
247         if (buf_allocated) {
248             g_free(buf);
249         }
250         sz -= sec_size;
251         data_offset += sec_size;
252     }
253 
254     ret = qemu_file_get_error(f);
255 
256     if (!ret && size) {
257         *size = data_size;
258     }
259 
260     bytes_transferred += data_size;
261     return ret;
262 }
263 
264 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
265                             uint64_t data_size)
266 {
267     VFIORegion *region = &vbasedev->migration->region;
268     uint64_t data_offset = 0, size, report_size;
269     int ret;
270 
271     do {
272         ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
273                       region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
274         if (ret < 0) {
275             return ret;
276         }
277 
278         if (data_offset + data_size > region->size) {
279             /*
280              * If data_size is greater than the data section of migration region
281              * then iterate the write buffer operation. This case can occur if
282              * size of migration region at destination is smaller than size of
283              * migration region at source.
284              */
285             report_size = size = region->size - data_offset;
286             data_size -= size;
287         } else {
288             report_size = size = data_size;
289             data_size = 0;
290         }
291 
292         trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
293 
294         while (size) {
295             void *buf;
296             uint64_t sec_size;
297             bool buf_alloc = false;
298 
299             buf = get_data_section_size(region, data_offset, size, &sec_size);
300 
301             if (!buf) {
302                 buf = g_try_malloc(sec_size);
303                 if (!buf) {
304                     error_report("%s: Error allocating buffer ", __func__);
305                     return -ENOMEM;
306                 }
307                 buf_alloc = true;
308             }
309 
310             qemu_get_buffer(f, buf, sec_size);
311 
312             if (buf_alloc) {
313                 ret = vfio_mig_write(vbasedev, buf, sec_size,
314                         region->fd_offset + data_offset);
315                 g_free(buf);
316 
317                 if (ret < 0) {
318                     return ret;
319                 }
320             }
321             size -= sec_size;
322             data_offset += sec_size;
323         }
324 
325         ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
326                         region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
327         if (ret < 0) {
328             return ret;
329         }
330     } while (data_size);
331 
332     return 0;
333 }
334 
335 static int vfio_update_pending(VFIODevice *vbasedev)
336 {
337     VFIOMigration *migration = vbasedev->migration;
338     VFIORegion *region = &migration->region;
339     uint64_t pending_bytes = 0;
340     int ret;
341 
342     ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
343                     region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
344     if (ret < 0) {
345         migration->pending_bytes = 0;
346         return ret;
347     }
348 
349     migration->pending_bytes = pending_bytes;
350     trace_vfio_update_pending(vbasedev->name, pending_bytes);
351     return 0;
352 }
353 
354 static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
355 {
356     VFIODevice *vbasedev = opaque;
357 
358     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
359 
360     if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
361         vbasedev->ops->vfio_save_config(vbasedev, f);
362     }
363 
364     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
365 
366     trace_vfio_save_device_config_state(vbasedev->name);
367 
368     return qemu_file_get_error(f);
369 }
370 
371 static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
372 {
373     VFIODevice *vbasedev = opaque;
374     uint64_t data;
375 
376     if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
377         int ret;
378 
379         ret = vbasedev->ops->vfio_load_config(vbasedev, f);
380         if (ret) {
381             error_report("%s: Failed to load device config space",
382                          vbasedev->name);
383             return ret;
384         }
385     }
386 
387     data = qemu_get_be64(f);
388     if (data != VFIO_MIG_FLAG_END_OF_STATE) {
389         error_report("%s: Failed loading device config space, "
390                      "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
391         return -EINVAL;
392     }
393 
394     trace_vfio_load_device_config_state(vbasedev->name);
395     return qemu_file_get_error(f);
396 }
397 
398 static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start)
399 {
400     int ret;
401     VFIOMigration *migration = vbasedev->migration;
402     VFIOContainer *container = vbasedev->group->container;
403     struct vfio_iommu_type1_dirty_bitmap dirty = {
404         .argsz = sizeof(dirty),
405     };
406 
407     if (start) {
408         if (migration->device_state & VFIO_DEVICE_STATE_SAVING) {
409             dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
410         } else {
411             return -EINVAL;
412         }
413     } else {
414             dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
415     }
416 
417     ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
418     if (ret) {
419         error_report("Failed to set dirty tracking flag 0x%x errno: %d",
420                      dirty.flags, errno);
421         return -errno;
422     }
423     return ret;
424 }
425 
426 static void vfio_migration_cleanup(VFIODevice *vbasedev)
427 {
428     VFIOMigration *migration = vbasedev->migration;
429 
430     vfio_set_dirty_page_tracking(vbasedev, false);
431 
432     if (migration->region.mmaps) {
433         vfio_region_unmap(&migration->region);
434     }
435 }
436 
437 /* ---------------------------------------------------------------------- */
438 
439 static int vfio_save_setup(QEMUFile *f, void *opaque)
440 {
441     VFIODevice *vbasedev = opaque;
442     VFIOMigration *migration = vbasedev->migration;
443     int ret;
444 
445     trace_vfio_save_setup(vbasedev->name);
446 
447     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
448 
449     if (migration->region.mmaps) {
450         /*
451          * Calling vfio_region_mmap() from migration thread. Memory API called
452          * from this function require locking the iothread when called from
453          * outside the main loop thread.
454          */
455         qemu_mutex_lock_iothread();
456         ret = vfio_region_mmap(&migration->region);
457         qemu_mutex_unlock_iothread();
458         if (ret) {
459             error_report("%s: Failed to mmap VFIO migration region: %s",
460                          vbasedev->name, strerror(-ret));
461             error_report("%s: Falling back to slow path", vbasedev->name);
462         }
463     }
464 
465     ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
466                                    VFIO_DEVICE_STATE_SAVING);
467     if (ret) {
468         error_report("%s: Failed to set state SAVING", vbasedev->name);
469         return ret;
470     }
471 
472     ret = vfio_set_dirty_page_tracking(vbasedev, true);
473     if (ret) {
474         return ret;
475     }
476 
477     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
478 
479     ret = qemu_file_get_error(f);
480     if (ret) {
481         return ret;
482     }
483 
484     return 0;
485 }
486 
487 static void vfio_save_cleanup(void *opaque)
488 {
489     VFIODevice *vbasedev = opaque;
490 
491     vfio_migration_cleanup(vbasedev);
492     trace_vfio_save_cleanup(vbasedev->name);
493 }
494 
495 static void vfio_save_pending(QEMUFile *f, void *opaque,
496                               uint64_t threshold_size,
497                               uint64_t *res_precopy_only,
498                               uint64_t *res_compatible,
499                               uint64_t *res_postcopy_only)
500 {
501     VFIODevice *vbasedev = opaque;
502     VFIOMigration *migration = vbasedev->migration;
503     int ret;
504 
505     ret = vfio_update_pending(vbasedev);
506     if (ret) {
507         return;
508     }
509 
510     *res_precopy_only += migration->pending_bytes;
511 
512     trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
513                             *res_postcopy_only, *res_compatible);
514 }
515 
516 static int vfio_save_iterate(QEMUFile *f, void *opaque)
517 {
518     VFIODevice *vbasedev = opaque;
519     VFIOMigration *migration = vbasedev->migration;
520     uint64_t data_size;
521     int ret;
522 
523     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
524 
525     if (migration->pending_bytes == 0) {
526         ret = vfio_update_pending(vbasedev);
527         if (ret) {
528             return ret;
529         }
530 
531         if (migration->pending_bytes == 0) {
532             qemu_put_be64(f, 0);
533             qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
534             /* indicates data finished, goto complete phase */
535             return 1;
536         }
537     }
538 
539     ret = vfio_save_buffer(f, vbasedev, &data_size);
540     if (ret) {
541         error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
542                      strerror(errno));
543         return ret;
544     }
545 
546     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
547 
548     ret = qemu_file_get_error(f);
549     if (ret) {
550         return ret;
551     }
552 
553     /*
554      * Reset pending_bytes as .save_live_pending is not called during savevm or
555      * snapshot case, in such case vfio_update_pending() at the start of this
556      * function updates pending_bytes.
557      */
558     migration->pending_bytes = 0;
559     trace_vfio_save_iterate(vbasedev->name, data_size);
560     return 0;
561 }
562 
563 static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
564 {
565     VFIODevice *vbasedev = opaque;
566     VFIOMigration *migration = vbasedev->migration;
567     uint64_t data_size;
568     int ret;
569 
570     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING,
571                                    VFIO_DEVICE_STATE_SAVING);
572     if (ret) {
573         error_report("%s: Failed to set state STOP and SAVING",
574                      vbasedev->name);
575         return ret;
576     }
577 
578     ret = vfio_save_device_config_state(f, opaque);
579     if (ret) {
580         return ret;
581     }
582 
583     ret = vfio_update_pending(vbasedev);
584     if (ret) {
585         return ret;
586     }
587 
588     while (migration->pending_bytes > 0) {
589         qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
590         ret = vfio_save_buffer(f, vbasedev, &data_size);
591         if (ret < 0) {
592             error_report("%s: Failed to save buffer", vbasedev->name);
593             return ret;
594         }
595 
596         if (data_size == 0) {
597             break;
598         }
599 
600         ret = vfio_update_pending(vbasedev);
601         if (ret) {
602             return ret;
603         }
604     }
605 
606     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
607 
608     ret = qemu_file_get_error(f);
609     if (ret) {
610         return ret;
611     }
612 
613     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0);
614     if (ret) {
615         error_report("%s: Failed to set state STOPPED", vbasedev->name);
616         return ret;
617     }
618 
619     trace_vfio_save_complete_precopy(vbasedev->name);
620     return ret;
621 }
622 
623 static int vfio_load_setup(QEMUFile *f, void *opaque)
624 {
625     VFIODevice *vbasedev = opaque;
626     VFIOMigration *migration = vbasedev->migration;
627     int ret = 0;
628 
629     if (migration->region.mmaps) {
630         ret = vfio_region_mmap(&migration->region);
631         if (ret) {
632             error_report("%s: Failed to mmap VFIO migration region %d: %s",
633                          vbasedev->name, migration->region.nr,
634                          strerror(-ret));
635             error_report("%s: Falling back to slow path", vbasedev->name);
636         }
637     }
638 
639     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
640                                    VFIO_DEVICE_STATE_RESUMING);
641     if (ret) {
642         error_report("%s: Failed to set state RESUMING", vbasedev->name);
643         if (migration->region.mmaps) {
644             vfio_region_unmap(&migration->region);
645         }
646     }
647     return ret;
648 }
649 
650 static int vfio_load_cleanup(void *opaque)
651 {
652     VFIODevice *vbasedev = opaque;
653 
654     vfio_migration_cleanup(vbasedev);
655     trace_vfio_load_cleanup(vbasedev->name);
656     return 0;
657 }
658 
659 static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
660 {
661     VFIODevice *vbasedev = opaque;
662     int ret = 0;
663     uint64_t data;
664 
665     data = qemu_get_be64(f);
666     while (data != VFIO_MIG_FLAG_END_OF_STATE) {
667 
668         trace_vfio_load_state(vbasedev->name, data);
669 
670         switch (data) {
671         case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
672         {
673             ret = vfio_load_device_config_state(f, opaque);
674             if (ret) {
675                 return ret;
676             }
677             break;
678         }
679         case VFIO_MIG_FLAG_DEV_SETUP_STATE:
680         {
681             data = qemu_get_be64(f);
682             if (data == VFIO_MIG_FLAG_END_OF_STATE) {
683                 return ret;
684             } else {
685                 error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
686                              vbasedev->name, data);
687                 return -EINVAL;
688             }
689             break;
690         }
691         case VFIO_MIG_FLAG_DEV_DATA_STATE:
692         {
693             uint64_t data_size = qemu_get_be64(f);
694 
695             if (data_size) {
696                 ret = vfio_load_buffer(f, vbasedev, data_size);
697                 if (ret < 0) {
698                     return ret;
699                 }
700             }
701             break;
702         }
703         default:
704             error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
705             return -EINVAL;
706         }
707 
708         data = qemu_get_be64(f);
709         ret = qemu_file_get_error(f);
710         if (ret) {
711             return ret;
712         }
713     }
714     return ret;
715 }
716 
717 static SaveVMHandlers savevm_vfio_handlers = {
718     .save_setup = vfio_save_setup,
719     .save_cleanup = vfio_save_cleanup,
720     .save_live_pending = vfio_save_pending,
721     .save_live_iterate = vfio_save_iterate,
722     .save_live_complete_precopy = vfio_save_complete_precopy,
723     .load_setup = vfio_load_setup,
724     .load_cleanup = vfio_load_cleanup,
725     .load_state = vfio_load_state,
726 };
727 
728 /* ---------------------------------------------------------------------- */
729 
730 static void vfio_vmstate_change(void *opaque, int running, RunState state)
731 {
732     VFIODevice *vbasedev = opaque;
733     VFIOMigration *migration = vbasedev->migration;
734     uint32_t value, mask;
735     int ret;
736 
737     if (vbasedev->migration->vm_running == running) {
738         return;
739     }
740 
741     if (running) {
742         /*
743          * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
744          * Transition from _SAVING to _RUNNING can happen if there is migration
745          * failure, in that case clear _SAVING bit.
746          * Transition from _RESUMING to _RUNNING occurs during resuming
747          * phase, in that case clear _RESUMING bit.
748          * In both the above cases, set _RUNNING bit.
749          */
750         mask = ~VFIO_DEVICE_STATE_MASK;
751         value = VFIO_DEVICE_STATE_RUNNING;
752     } else {
753         /*
754          * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
755          * _RUNNING bit
756          */
757         mask = ~VFIO_DEVICE_STATE_RUNNING;
758         value = 0;
759     }
760 
761     ret = vfio_migration_set_state(vbasedev, mask, value);
762     if (ret) {
763         /*
764          * Migration should be aborted in this case, but vm_state_notify()
765          * currently does not support reporting failures.
766          */
767         error_report("%s: Failed to set device state 0x%x", vbasedev->name,
768                      (migration->device_state & mask) | value);
769         qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
770     }
771     vbasedev->migration->vm_running = running;
772     trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
773             (migration->device_state & mask) | value);
774 }
775 
776 static void vfio_migration_state_notifier(Notifier *notifier, void *data)
777 {
778     MigrationState *s = data;
779     VFIOMigration *migration = container_of(notifier, VFIOMigration,
780                                             migration_state);
781     VFIODevice *vbasedev = migration->vbasedev;
782     int ret;
783 
784     trace_vfio_migration_state_notifier(vbasedev->name,
785                                         MigrationStatus_str(s->state));
786 
787     switch (s->state) {
788     case MIGRATION_STATUS_CANCELLING:
789     case MIGRATION_STATUS_CANCELLED:
790     case MIGRATION_STATUS_FAILED:
791         bytes_transferred = 0;
792         ret = vfio_migration_set_state(vbasedev,
793                       ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING),
794                       VFIO_DEVICE_STATE_RUNNING);
795         if (ret) {
796             error_report("%s: Failed to set state RUNNING", vbasedev->name);
797         }
798     }
799 }
800 
801 static void vfio_migration_exit(VFIODevice *vbasedev)
802 {
803     VFIOMigration *migration = vbasedev->migration;
804 
805     vfio_region_exit(&migration->region);
806     vfio_region_finalize(&migration->region);
807     g_free(vbasedev->migration);
808     vbasedev->migration = NULL;
809 }
810 
811 static int vfio_migration_init(VFIODevice *vbasedev,
812                                struct vfio_region_info *info)
813 {
814     int ret;
815     Object *obj;
816     VFIOMigration *migration;
817     char id[256] = "";
818     g_autofree char *path = NULL, *oid = NULL;
819 
820     if (!vbasedev->ops->vfio_get_object) {
821         return -EINVAL;
822     }
823 
824     obj = vbasedev->ops->vfio_get_object(vbasedev);
825     if (!obj) {
826         return -EINVAL;
827     }
828 
829     vbasedev->migration = g_new0(VFIOMigration, 1);
830 
831     ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
832                             info->index, "migration");
833     if (ret) {
834         error_report("%s: Failed to setup VFIO migration region %d: %s",
835                      vbasedev->name, info->index, strerror(-ret));
836         goto err;
837     }
838 
839     if (!vbasedev->migration->region.size) {
840         error_report("%s: Invalid zero-sized VFIO migration region %d",
841                      vbasedev->name, info->index);
842         ret = -EINVAL;
843         goto err;
844     }
845 
846     migration = vbasedev->migration;
847     migration->vbasedev = vbasedev;
848 
849     oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
850     if (oid) {
851         path = g_strdup_printf("%s/vfio", oid);
852     } else {
853         path = g_strdup("vfio");
854     }
855     strpadcpy(id, sizeof(id), path, '\0');
856 
857     register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
858                          vbasedev);
859 
860     migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
861                                                            vbasedev);
862     migration->migration_state.notify = vfio_migration_state_notifier;
863     add_migration_state_change_notifier(&migration->migration_state);
864     return 0;
865 
866 err:
867     vfio_migration_exit(vbasedev);
868     return ret;
869 }
870 
871 /* ---------------------------------------------------------------------- */
872 
873 int64_t vfio_mig_bytes_transferred(void)
874 {
875     return bytes_transferred;
876 }
877 
878 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
879 {
880     VFIOContainer *container = vbasedev->group->container;
881     struct vfio_region_info *info = NULL;
882     Error *local_err = NULL;
883     int ret = -ENOTSUP;
884 
885     if (!container->dirty_pages_supported) {
886         goto add_blocker;
887     }
888 
889     ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION,
890                                    VFIO_REGION_SUBTYPE_MIGRATION, &info);
891     if (ret) {
892         goto add_blocker;
893     }
894 
895     ret = vfio_migration_init(vbasedev, info);
896     if (ret) {
897         goto add_blocker;
898     }
899 
900     g_free(info);
901     trace_vfio_migration_probe(vbasedev->name, info->index);
902     return 0;
903 
904 add_blocker:
905     error_setg(&vbasedev->migration_blocker,
906                "VFIO device doesn't support migration");
907     g_free(info);
908 
909     ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err);
910     if (local_err) {
911         error_propagate(errp, local_err);
912         error_free(vbasedev->migration_blocker);
913         vbasedev->migration_blocker = NULL;
914     }
915     return ret;
916 }
917 
918 void vfio_migration_finalize(VFIODevice *vbasedev)
919 {
920     if (vbasedev->migration) {
921         VFIOMigration *migration = vbasedev->migration;
922 
923         remove_migration_state_change_notifier(&migration->migration_state);
924         qemu_del_vm_change_state_handler(migration->vm_state);
925         vfio_migration_exit(vbasedev);
926     }
927 
928     if (vbasedev->migration_blocker) {
929         migrate_del_blocker(vbasedev->migration_blocker);
930         error_free(vbasedev->migration_blocker);
931         vbasedev->migration_blocker = NULL;
932     }
933 }
934