xref: /openbmc/qemu/hw/vfio/migration.c (revision 4011a686)
1 /*
2  * Migration support for VFIO devices
3  *
4  * Copyright NVIDIA, Inc. 2020
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2. See
7  * the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/cutils.h"
13 #include <linux/vfio.h>
14 #include <sys/ioctl.h>
15 
16 #include "sysemu/runstate.h"
17 #include "hw/vfio/vfio-common.h"
18 #include "cpu.h"
19 #include "migration/migration.h"
20 #include "migration/vmstate.h"
21 #include "migration/qemu-file.h"
22 #include "migration/register.h"
23 #include "migration/blocker.h"
24 #include "migration/misc.h"
25 #include "qapi/error.h"
26 #include "exec/ramlist.h"
27 #include "exec/ram_addr.h"
28 #include "pci.h"
29 #include "trace.h"
30 #include "hw/hw.h"
31 
32 /*
33  * Flags to be used as unique delimiters for VFIO devices in the migration
34  * stream. These flags are composed as:
35  * 0xffffffff => MSB 32-bit all 1s
36  * 0xef10     => Magic ID, represents emulated (virtual) function IO
37  * 0x0000     => 16-bits reserved for flags
38  *
39  * The beginning of state information is marked by _DEV_CONFIG_STATE,
40  * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
41  * certain state information is marked by _END_OF_STATE.
42  */
43 #define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
44 #define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
45 #define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
46 #define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
47 
48 static int64_t bytes_transferred;
49 
50 static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
51                                   off_t off, bool iswrite)
52 {
53     int ret;
54 
55     ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
56                     pread(vbasedev->fd, val, count, off);
57     if (ret < count) {
58         error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
59                      HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
60                      vbasedev->name, off, strerror(errno));
61         return (ret < 0) ? ret : -EINVAL;
62     }
63     return 0;
64 }
65 
66 static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
67                        off_t off, bool iswrite)
68 {
69     int ret, done = 0;
70     __u8 *tbuf = buf;
71 
72     while (count) {
73         int bytes = 0;
74 
75         if (count >= 8 && !(off % 8)) {
76             bytes = 8;
77         } else if (count >= 4 && !(off % 4)) {
78             bytes = 4;
79         } else if (count >= 2 && !(off % 2)) {
80             bytes = 2;
81         } else {
82             bytes = 1;
83         }
84 
85         ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
86         if (ret) {
87             return ret;
88         }
89 
90         count -= bytes;
91         done += bytes;
92         off += bytes;
93         tbuf += bytes;
94     }
95     return done;
96 }
97 
98 #define vfio_mig_read(f, v, c, o)       vfio_mig_rw(f, (__u8 *)v, c, o, false)
99 #define vfio_mig_write(f, v, c, o)      vfio_mig_rw(f, (__u8 *)v, c, o, true)
100 
101 #define VFIO_MIG_STRUCT_OFFSET(f)       \
102                                  offsetof(struct vfio_device_migration_info, f)
103 /*
104  * Change the device_state register for device @vbasedev. Bits set in @mask
105  * are preserved, bits set in @value are set, and bits not set in either @mask
106  * or @value are cleared in device_state. If the register cannot be accessed,
107  * the resulting state would be invalid, or the device enters an error state,
108  * an error is returned.
109  */
110 
111 static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
112                                     uint32_t value)
113 {
114     VFIOMigration *migration = vbasedev->migration;
115     VFIORegion *region = &migration->region;
116     off_t dev_state_off = region->fd_offset +
117                           VFIO_MIG_STRUCT_OFFSET(device_state);
118     uint32_t device_state;
119     int ret;
120 
121     ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
122                         dev_state_off);
123     if (ret < 0) {
124         return ret;
125     }
126 
127     device_state = (device_state & mask) | value;
128 
129     if (!VFIO_DEVICE_STATE_VALID(device_state)) {
130         return -EINVAL;
131     }
132 
133     ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
134                          dev_state_off);
135     if (ret < 0) {
136         int rret;
137 
138         rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
139                              dev_state_off);
140 
141         if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
142             hw_error("%s: Device in error state 0x%x", vbasedev->name,
143                      device_state);
144             return rret ? rret : -EIO;
145         }
146         return ret;
147     }
148 
149     migration->device_state = device_state;
150     trace_vfio_migration_set_state(vbasedev->name, device_state);
151     return 0;
152 }
153 
154 static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
155                                    uint64_t data_size, uint64_t *size)
156 {
157     void *ptr = NULL;
158     uint64_t limit = 0;
159     int i;
160 
161     if (!region->mmaps) {
162         if (size) {
163             *size = MIN(data_size, region->size - data_offset);
164         }
165         return ptr;
166     }
167 
168     for (i = 0; i < region->nr_mmaps; i++) {
169         VFIOMmap *map = region->mmaps + i;
170 
171         if ((data_offset >= map->offset) &&
172             (data_offset < map->offset + map->size)) {
173 
174             /* check if data_offset is within sparse mmap areas */
175             ptr = map->mmap + data_offset - map->offset;
176             if (size) {
177                 *size = MIN(data_size, map->offset + map->size - data_offset);
178             }
179             break;
180         } else if ((data_offset < map->offset) &&
181                    (!limit || limit > map->offset)) {
182             /*
183              * data_offset is not within sparse mmap areas, find size of
184              * non-mapped area. Check through all list since region->mmaps list
185              * is not sorted.
186              */
187             limit = map->offset;
188         }
189     }
190 
191     if (!ptr && size) {
192         *size = limit ? MIN(data_size, limit - data_offset) : data_size;
193     }
194     return ptr;
195 }
196 
197 static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
198 {
199     VFIOMigration *migration = vbasedev->migration;
200     VFIORegion *region = &migration->region;
201     uint64_t data_offset = 0, data_size = 0, sz;
202     int ret;
203 
204     ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
205                       region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
206     if (ret < 0) {
207         return ret;
208     }
209 
210     ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
211                         region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
212     if (ret < 0) {
213         return ret;
214     }
215 
216     trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
217                            migration->pending_bytes);
218 
219     qemu_put_be64(f, data_size);
220     sz = data_size;
221 
222     while (sz) {
223         void *buf;
224         uint64_t sec_size;
225         bool buf_allocated = false;
226 
227         buf = get_data_section_size(region, data_offset, sz, &sec_size);
228 
229         if (!buf) {
230             buf = g_try_malloc(sec_size);
231             if (!buf) {
232                 error_report("%s: Error allocating buffer ", __func__);
233                 return -ENOMEM;
234             }
235             buf_allocated = true;
236 
237             ret = vfio_mig_read(vbasedev, buf, sec_size,
238                                 region->fd_offset + data_offset);
239             if (ret < 0) {
240                 g_free(buf);
241                 return ret;
242             }
243         }
244 
245         qemu_put_buffer(f, buf, sec_size);
246 
247         if (buf_allocated) {
248             g_free(buf);
249         }
250         sz -= sec_size;
251         data_offset += sec_size;
252     }
253 
254     ret = qemu_file_get_error(f);
255 
256     if (!ret && size) {
257         *size = data_size;
258     }
259 
260     bytes_transferred += data_size;
261     return ret;
262 }
263 
264 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
265                             uint64_t data_size)
266 {
267     VFIORegion *region = &vbasedev->migration->region;
268     uint64_t data_offset = 0, size, report_size;
269     int ret;
270 
271     do {
272         ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
273                       region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
274         if (ret < 0) {
275             return ret;
276         }
277 
278         if (data_offset + data_size > region->size) {
279             /*
280              * If data_size is greater than the data section of migration region
281              * then iterate the write buffer operation. This case can occur if
282              * size of migration region at destination is smaller than size of
283              * migration region at source.
284              */
285             report_size = size = region->size - data_offset;
286             data_size -= size;
287         } else {
288             report_size = size = data_size;
289             data_size = 0;
290         }
291 
292         trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
293 
294         while (size) {
295             void *buf;
296             uint64_t sec_size;
297             bool buf_alloc = false;
298 
299             buf = get_data_section_size(region, data_offset, size, &sec_size);
300 
301             if (!buf) {
302                 buf = g_try_malloc(sec_size);
303                 if (!buf) {
304                     error_report("%s: Error allocating buffer ", __func__);
305                     return -ENOMEM;
306                 }
307                 buf_alloc = true;
308             }
309 
310             qemu_get_buffer(f, buf, sec_size);
311 
312             if (buf_alloc) {
313                 ret = vfio_mig_write(vbasedev, buf, sec_size,
314                         region->fd_offset + data_offset);
315                 g_free(buf);
316 
317                 if (ret < 0) {
318                     return ret;
319                 }
320             }
321             size -= sec_size;
322             data_offset += sec_size;
323         }
324 
325         ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
326                         region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
327         if (ret < 0) {
328             return ret;
329         }
330     } while (data_size);
331 
332     return 0;
333 }
334 
335 static int vfio_update_pending(VFIODevice *vbasedev)
336 {
337     VFIOMigration *migration = vbasedev->migration;
338     VFIORegion *region = &migration->region;
339     uint64_t pending_bytes = 0;
340     int ret;
341 
342     ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
343                     region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
344     if (ret < 0) {
345         migration->pending_bytes = 0;
346         return ret;
347     }
348 
349     migration->pending_bytes = pending_bytes;
350     trace_vfio_update_pending(vbasedev->name, pending_bytes);
351     return 0;
352 }
353 
354 static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
355 {
356     VFIODevice *vbasedev = opaque;
357 
358     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
359 
360     if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
361         vbasedev->ops->vfio_save_config(vbasedev, f);
362     }
363 
364     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
365 
366     trace_vfio_save_device_config_state(vbasedev->name);
367 
368     return qemu_file_get_error(f);
369 }
370 
371 static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
372 {
373     VFIODevice *vbasedev = opaque;
374     uint64_t data;
375 
376     if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
377         int ret;
378 
379         ret = vbasedev->ops->vfio_load_config(vbasedev, f);
380         if (ret) {
381             error_report("%s: Failed to load device config space",
382                          vbasedev->name);
383             return ret;
384         }
385     }
386 
387     data = qemu_get_be64(f);
388     if (data != VFIO_MIG_FLAG_END_OF_STATE) {
389         error_report("%s: Failed loading device config space, "
390                      "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
391         return -EINVAL;
392     }
393 
394     trace_vfio_load_device_config_state(vbasedev->name);
395     return qemu_file_get_error(f);
396 }
397 
398 static void vfio_migration_cleanup(VFIODevice *vbasedev)
399 {
400     VFIOMigration *migration = vbasedev->migration;
401 
402     if (migration->region.mmaps) {
403         vfio_region_unmap(&migration->region);
404     }
405 }
406 
407 /* ---------------------------------------------------------------------- */
408 
409 static int vfio_save_setup(QEMUFile *f, void *opaque)
410 {
411     VFIODevice *vbasedev = opaque;
412     VFIOMigration *migration = vbasedev->migration;
413     int ret;
414 
415     trace_vfio_save_setup(vbasedev->name);
416 
417     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
418 
419     if (migration->region.mmaps) {
420         /*
421          * Calling vfio_region_mmap() from migration thread. Memory API called
422          * from this function require locking the iothread when called from
423          * outside the main loop thread.
424          */
425         qemu_mutex_lock_iothread();
426         ret = vfio_region_mmap(&migration->region);
427         qemu_mutex_unlock_iothread();
428         if (ret) {
429             error_report("%s: Failed to mmap VFIO migration region: %s",
430                          vbasedev->name, strerror(-ret));
431             error_report("%s: Falling back to slow path", vbasedev->name);
432         }
433     }
434 
435     ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
436                                    VFIO_DEVICE_STATE_SAVING);
437     if (ret) {
438         error_report("%s: Failed to set state SAVING", vbasedev->name);
439         return ret;
440     }
441 
442     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
443 
444     ret = qemu_file_get_error(f);
445     if (ret) {
446         return ret;
447     }
448 
449     return 0;
450 }
451 
452 static void vfio_save_cleanup(void *opaque)
453 {
454     VFIODevice *vbasedev = opaque;
455 
456     vfio_migration_cleanup(vbasedev);
457     trace_vfio_save_cleanup(vbasedev->name);
458 }
459 
460 static void vfio_save_pending(QEMUFile *f, void *opaque,
461                               uint64_t threshold_size,
462                               uint64_t *res_precopy_only,
463                               uint64_t *res_compatible,
464                               uint64_t *res_postcopy_only)
465 {
466     VFIODevice *vbasedev = opaque;
467     VFIOMigration *migration = vbasedev->migration;
468     int ret;
469 
470     ret = vfio_update_pending(vbasedev);
471     if (ret) {
472         return;
473     }
474 
475     *res_precopy_only += migration->pending_bytes;
476 
477     trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
478                             *res_postcopy_only, *res_compatible);
479 }
480 
481 static int vfio_save_iterate(QEMUFile *f, void *opaque)
482 {
483     VFIODevice *vbasedev = opaque;
484     VFIOMigration *migration = vbasedev->migration;
485     uint64_t data_size;
486     int ret;
487 
488     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
489 
490     if (migration->pending_bytes == 0) {
491         ret = vfio_update_pending(vbasedev);
492         if (ret) {
493             return ret;
494         }
495 
496         if (migration->pending_bytes == 0) {
497             qemu_put_be64(f, 0);
498             qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
499             /* indicates data finished, goto complete phase */
500             return 1;
501         }
502     }
503 
504     ret = vfio_save_buffer(f, vbasedev, &data_size);
505     if (ret) {
506         error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
507                      strerror(errno));
508         return ret;
509     }
510 
511     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
512 
513     ret = qemu_file_get_error(f);
514     if (ret) {
515         return ret;
516     }
517 
518     /*
519      * Reset pending_bytes as .save_live_pending is not called during savevm or
520      * snapshot case, in such case vfio_update_pending() at the start of this
521      * function updates pending_bytes.
522      */
523     migration->pending_bytes = 0;
524     trace_vfio_save_iterate(vbasedev->name, data_size);
525     return 0;
526 }
527 
528 static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
529 {
530     VFIODevice *vbasedev = opaque;
531     VFIOMigration *migration = vbasedev->migration;
532     uint64_t data_size;
533     int ret;
534 
535     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING,
536                                    VFIO_DEVICE_STATE_SAVING);
537     if (ret) {
538         error_report("%s: Failed to set state STOP and SAVING",
539                      vbasedev->name);
540         return ret;
541     }
542 
543     ret = vfio_update_pending(vbasedev);
544     if (ret) {
545         return ret;
546     }
547 
548     while (migration->pending_bytes > 0) {
549         qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
550         ret = vfio_save_buffer(f, vbasedev, &data_size);
551         if (ret < 0) {
552             error_report("%s: Failed to save buffer", vbasedev->name);
553             return ret;
554         }
555 
556         if (data_size == 0) {
557             break;
558         }
559 
560         ret = vfio_update_pending(vbasedev);
561         if (ret) {
562             return ret;
563         }
564     }
565 
566     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
567 
568     ret = qemu_file_get_error(f);
569     if (ret) {
570         return ret;
571     }
572 
573     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0);
574     if (ret) {
575         error_report("%s: Failed to set state STOPPED", vbasedev->name);
576         return ret;
577     }
578 
579     trace_vfio_save_complete_precopy(vbasedev->name);
580     return ret;
581 }
582 
583 static void vfio_save_state(QEMUFile *f, void *opaque)
584 {
585     VFIODevice *vbasedev = opaque;
586     int ret;
587 
588     ret = vfio_save_device_config_state(f, opaque);
589     if (ret) {
590         error_report("%s: Failed to save device config space",
591                      vbasedev->name);
592         qemu_file_set_error(f, ret);
593     }
594 }
595 
596 static int vfio_load_setup(QEMUFile *f, void *opaque)
597 {
598     VFIODevice *vbasedev = opaque;
599     VFIOMigration *migration = vbasedev->migration;
600     int ret = 0;
601 
602     if (migration->region.mmaps) {
603         ret = vfio_region_mmap(&migration->region);
604         if (ret) {
605             error_report("%s: Failed to mmap VFIO migration region %d: %s",
606                          vbasedev->name, migration->region.nr,
607                          strerror(-ret));
608             error_report("%s: Falling back to slow path", vbasedev->name);
609         }
610     }
611 
612     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
613                                    VFIO_DEVICE_STATE_RESUMING);
614     if (ret) {
615         error_report("%s: Failed to set state RESUMING", vbasedev->name);
616         if (migration->region.mmaps) {
617             vfio_region_unmap(&migration->region);
618         }
619     }
620     return ret;
621 }
622 
623 static int vfio_load_cleanup(void *opaque)
624 {
625     VFIODevice *vbasedev = opaque;
626 
627     vfio_migration_cleanup(vbasedev);
628     trace_vfio_load_cleanup(vbasedev->name);
629     return 0;
630 }
631 
632 static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
633 {
634     VFIODevice *vbasedev = opaque;
635     int ret = 0;
636     uint64_t data;
637 
638     data = qemu_get_be64(f);
639     while (data != VFIO_MIG_FLAG_END_OF_STATE) {
640 
641         trace_vfio_load_state(vbasedev->name, data);
642 
643         switch (data) {
644         case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
645         {
646             return vfio_load_device_config_state(f, opaque);
647         }
648         case VFIO_MIG_FLAG_DEV_SETUP_STATE:
649         {
650             data = qemu_get_be64(f);
651             if (data == VFIO_MIG_FLAG_END_OF_STATE) {
652                 return ret;
653             } else {
654                 error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
655                              vbasedev->name, data);
656                 return -EINVAL;
657             }
658             break;
659         }
660         case VFIO_MIG_FLAG_DEV_DATA_STATE:
661         {
662             uint64_t data_size = qemu_get_be64(f);
663 
664             if (data_size) {
665                 ret = vfio_load_buffer(f, vbasedev, data_size);
666                 if (ret < 0) {
667                     return ret;
668                 }
669             }
670             break;
671         }
672         default:
673             error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
674             return -EINVAL;
675         }
676 
677         data = qemu_get_be64(f);
678         ret = qemu_file_get_error(f);
679         if (ret) {
680             return ret;
681         }
682     }
683     return ret;
684 }
685 
686 static SaveVMHandlers savevm_vfio_handlers = {
687     .save_setup = vfio_save_setup,
688     .save_cleanup = vfio_save_cleanup,
689     .save_live_pending = vfio_save_pending,
690     .save_live_iterate = vfio_save_iterate,
691     .save_live_complete_precopy = vfio_save_complete_precopy,
692     .save_state = vfio_save_state,
693     .load_setup = vfio_load_setup,
694     .load_cleanup = vfio_load_cleanup,
695     .load_state = vfio_load_state,
696 };
697 
698 /* ---------------------------------------------------------------------- */
699 
700 static void vfio_vmstate_change(void *opaque, bool running, RunState state)
701 {
702     VFIODevice *vbasedev = opaque;
703     VFIOMigration *migration = vbasedev->migration;
704     uint32_t value, mask;
705     int ret;
706 
707     if (vbasedev->migration->vm_running == running) {
708         return;
709     }
710 
711     if (running) {
712         /*
713          * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
714          * Transition from _SAVING to _RUNNING can happen if there is migration
715          * failure, in that case clear _SAVING bit.
716          * Transition from _RESUMING to _RUNNING occurs during resuming
717          * phase, in that case clear _RESUMING bit.
718          * In both the above cases, set _RUNNING bit.
719          */
720         mask = ~VFIO_DEVICE_STATE_MASK;
721         value = VFIO_DEVICE_STATE_RUNNING;
722     } else {
723         /*
724          * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
725          * _RUNNING bit
726          */
727         mask = ~VFIO_DEVICE_STATE_RUNNING;
728         value = 0;
729     }
730 
731     ret = vfio_migration_set_state(vbasedev, mask, value);
732     if (ret) {
733         /*
734          * Migration should be aborted in this case, but vm_state_notify()
735          * currently does not support reporting failures.
736          */
737         error_report("%s: Failed to set device state 0x%x", vbasedev->name,
738                      (migration->device_state & mask) | value);
739         qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
740     }
741     vbasedev->migration->vm_running = running;
742     trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
743             (migration->device_state & mask) | value);
744 }
745 
746 static void vfio_migration_state_notifier(Notifier *notifier, void *data)
747 {
748     MigrationState *s = data;
749     VFIOMigration *migration = container_of(notifier, VFIOMigration,
750                                             migration_state);
751     VFIODevice *vbasedev = migration->vbasedev;
752     int ret;
753 
754     trace_vfio_migration_state_notifier(vbasedev->name,
755                                         MigrationStatus_str(s->state));
756 
757     switch (s->state) {
758     case MIGRATION_STATUS_CANCELLING:
759     case MIGRATION_STATUS_CANCELLED:
760     case MIGRATION_STATUS_FAILED:
761         bytes_transferred = 0;
762         ret = vfio_migration_set_state(vbasedev,
763                       ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING),
764                       VFIO_DEVICE_STATE_RUNNING);
765         if (ret) {
766             error_report("%s: Failed to set state RUNNING", vbasedev->name);
767         }
768     }
769 }
770 
771 static void vfio_migration_exit(VFIODevice *vbasedev)
772 {
773     VFIOMigration *migration = vbasedev->migration;
774 
775     vfio_region_exit(&migration->region);
776     vfio_region_finalize(&migration->region);
777     g_free(vbasedev->migration);
778     vbasedev->migration = NULL;
779 }
780 
781 static int vfio_migration_init(VFIODevice *vbasedev,
782                                struct vfio_region_info *info)
783 {
784     int ret;
785     Object *obj;
786     VFIOMigration *migration;
787     char id[256] = "";
788     g_autofree char *path = NULL, *oid = NULL;
789 
790     if (!vbasedev->ops->vfio_get_object) {
791         return -EINVAL;
792     }
793 
794     obj = vbasedev->ops->vfio_get_object(vbasedev);
795     if (!obj) {
796         return -EINVAL;
797     }
798 
799     vbasedev->migration = g_new0(VFIOMigration, 1);
800 
801     ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
802                             info->index, "migration");
803     if (ret) {
804         error_report("%s: Failed to setup VFIO migration region %d: %s",
805                      vbasedev->name, info->index, strerror(-ret));
806         goto err;
807     }
808 
809     if (!vbasedev->migration->region.size) {
810         error_report("%s: Invalid zero-sized VFIO migration region %d",
811                      vbasedev->name, info->index);
812         ret = -EINVAL;
813         goto err;
814     }
815 
816     migration = vbasedev->migration;
817     migration->vbasedev = vbasedev;
818 
819     oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
820     if (oid) {
821         path = g_strdup_printf("%s/vfio", oid);
822     } else {
823         path = g_strdup("vfio");
824     }
825     strpadcpy(id, sizeof(id), path, '\0');
826 
827     register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
828                          vbasedev);
829 
830     migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
831                                                            vfio_vmstate_change,
832                                                            vbasedev);
833     migration->migration_state.notify = vfio_migration_state_notifier;
834     add_migration_state_change_notifier(&migration->migration_state);
835     return 0;
836 
837 err:
838     vfio_migration_exit(vbasedev);
839     return ret;
840 }
841 
842 /* ---------------------------------------------------------------------- */
843 
844 int64_t vfio_mig_bytes_transferred(void)
845 {
846     return bytes_transferred;
847 }
848 
849 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
850 {
851     VFIOContainer *container = vbasedev->group->container;
852     struct vfio_region_info *info = NULL;
853     Error *local_err = NULL;
854     int ret = -ENOTSUP;
855 
856     if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
857         goto add_blocker;
858     }
859 
860     ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION,
861                                    VFIO_REGION_SUBTYPE_MIGRATION, &info);
862     if (ret) {
863         goto add_blocker;
864     }
865 
866     ret = vfio_migration_init(vbasedev, info);
867     if (ret) {
868         goto add_blocker;
869     }
870 
871     trace_vfio_migration_probe(vbasedev->name, info->index);
872     g_free(info);
873     return 0;
874 
875 add_blocker:
876     error_setg(&vbasedev->migration_blocker,
877                "VFIO device doesn't support migration");
878     g_free(info);
879 
880     ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err);
881     if (local_err) {
882         error_propagate(errp, local_err);
883         error_free(vbasedev->migration_blocker);
884         vbasedev->migration_blocker = NULL;
885     }
886     return ret;
887 }
888 
889 void vfio_migration_finalize(VFIODevice *vbasedev)
890 {
891     if (vbasedev->migration) {
892         VFIOMigration *migration = vbasedev->migration;
893 
894         remove_migration_state_change_notifier(&migration->migration_state);
895         qemu_del_vm_change_state_handler(migration->vm_state);
896         vfio_migration_exit(vbasedev);
897     }
898 
899     if (vbasedev->migration_blocker) {
900         migrate_del_blocker(vbasedev->migration_blocker);
901         error_free(vbasedev->migration_blocker);
902         vbasedev->migration_blocker = NULL;
903     }
904 }
905