xref: /openbmc/qemu/hw/vfio/migration.c (revision 93d7620c)
1 /*
2  * Migration support for VFIO devices
3  *
4  * Copyright NVIDIA, Inc. 2020
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2. See
7  * the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/cutils.h"
13 #include <linux/vfio.h>
14 #include <sys/ioctl.h>
15 
16 #include "sysemu/runstate.h"
17 #include "hw/vfio/vfio-common.h"
18 #include "migration/migration.h"
19 #include "migration/vmstate.h"
20 #include "migration/qemu-file.h"
21 #include "migration/register.h"
22 #include "migration/blocker.h"
23 #include "migration/misc.h"
24 #include "qapi/error.h"
25 #include "exec/ramlist.h"
26 #include "exec/ram_addr.h"
27 #include "pci.h"
28 #include "trace.h"
29 #include "hw/hw.h"
30 
31 /*
32  * Flags to be used as unique delimiters for VFIO devices in the migration
33  * stream. These flags are composed as:
34  * 0xffffffff => MSB 32-bit all 1s
35  * 0xef10     => Magic ID, represents emulated (virtual) function IO
36  * 0x0000     => 16-bits reserved for flags
37  *
38  * The beginning of state information is marked by _DEV_CONFIG_STATE,
39  * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
40  * certain state information is marked by _END_OF_STATE.
41  */
42 #define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
43 #define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
44 #define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
45 #define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
46 
47 static int64_t bytes_transferred;
48 
49 static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
50                                   off_t off, bool iswrite)
51 {
52     int ret;
53 
54     ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
55                     pread(vbasedev->fd, val, count, off);
56     if (ret < count) {
57         error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
58                      HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
59                      vbasedev->name, off, strerror(errno));
60         return (ret < 0) ? ret : -EINVAL;
61     }
62     return 0;
63 }
64 
65 static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
66                        off_t off, bool iswrite)
67 {
68     int ret, done = 0;
69     __u8 *tbuf = buf;
70 
71     while (count) {
72         int bytes = 0;
73 
74         if (count >= 8 && !(off % 8)) {
75             bytes = 8;
76         } else if (count >= 4 && !(off % 4)) {
77             bytes = 4;
78         } else if (count >= 2 && !(off % 2)) {
79             bytes = 2;
80         } else {
81             bytes = 1;
82         }
83 
84         ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
85         if (ret) {
86             return ret;
87         }
88 
89         count -= bytes;
90         done += bytes;
91         off += bytes;
92         tbuf += bytes;
93     }
94     return done;
95 }
96 
97 #define vfio_mig_read(f, v, c, o)       vfio_mig_rw(f, (__u8 *)v, c, o, false)
98 #define vfio_mig_write(f, v, c, o)      vfio_mig_rw(f, (__u8 *)v, c, o, true)
99 
100 #define VFIO_MIG_STRUCT_OFFSET(f)       \
101                                  offsetof(struct vfio_device_migration_info, f)
102 /*
103  * Change the device_state register for device @vbasedev. Bits set in @mask
104  * are preserved, bits set in @value are set, and bits not set in either @mask
105  * or @value are cleared in device_state. If the register cannot be accessed,
106  * the resulting state would be invalid, or the device enters an error state,
107  * an error is returned.
108  */
109 
110 static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
111                                     uint32_t value)
112 {
113     VFIOMigration *migration = vbasedev->migration;
114     VFIORegion *region = &migration->region;
115     off_t dev_state_off = region->fd_offset +
116                           VFIO_MIG_STRUCT_OFFSET(device_state);
117     uint32_t device_state;
118     int ret;
119 
120     ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
121                         dev_state_off);
122     if (ret < 0) {
123         return ret;
124     }
125 
126     device_state = (device_state & mask) | value;
127 
128     if (!VFIO_DEVICE_STATE_VALID(device_state)) {
129         return -EINVAL;
130     }
131 
132     ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
133                          dev_state_off);
134     if (ret < 0) {
135         int rret;
136 
137         rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
138                              dev_state_off);
139 
140         if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
141             hw_error("%s: Device in error state 0x%x", vbasedev->name,
142                      device_state);
143             return rret ? rret : -EIO;
144         }
145         return ret;
146     }
147 
148     migration->device_state = device_state;
149     trace_vfio_migration_set_state(vbasedev->name, device_state);
150     return 0;
151 }
152 
153 static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
154                                    uint64_t data_size, uint64_t *size)
155 {
156     void *ptr = NULL;
157     uint64_t limit = 0;
158     int i;
159 
160     if (!region->mmaps) {
161         if (size) {
162             *size = MIN(data_size, region->size - data_offset);
163         }
164         return ptr;
165     }
166 
167     for (i = 0; i < region->nr_mmaps; i++) {
168         VFIOMmap *map = region->mmaps + i;
169 
170         if ((data_offset >= map->offset) &&
171             (data_offset < map->offset + map->size)) {
172 
173             /* check if data_offset is within sparse mmap areas */
174             ptr = map->mmap + data_offset - map->offset;
175             if (size) {
176                 *size = MIN(data_size, map->offset + map->size - data_offset);
177             }
178             break;
179         } else if ((data_offset < map->offset) &&
180                    (!limit || limit > map->offset)) {
181             /*
182              * data_offset is not within sparse mmap areas, find size of
183              * non-mapped area. Check through all list since region->mmaps list
184              * is not sorted.
185              */
186             limit = map->offset;
187         }
188     }
189 
190     if (!ptr && size) {
191         *size = limit ? MIN(data_size, limit - data_offset) : data_size;
192     }
193     return ptr;
194 }
195 
196 static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
197 {
198     VFIOMigration *migration = vbasedev->migration;
199     VFIORegion *region = &migration->region;
200     uint64_t data_offset = 0, data_size = 0, sz;
201     int ret;
202 
203     ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
204                       region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
205     if (ret < 0) {
206         return ret;
207     }
208 
209     ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
210                         region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
211     if (ret < 0) {
212         return ret;
213     }
214 
215     trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
216                            migration->pending_bytes);
217 
218     qemu_put_be64(f, data_size);
219     sz = data_size;
220 
221     while (sz) {
222         void *buf;
223         uint64_t sec_size;
224         bool buf_allocated = false;
225 
226         buf = get_data_section_size(region, data_offset, sz, &sec_size);
227 
228         if (!buf) {
229             buf = g_try_malloc(sec_size);
230             if (!buf) {
231                 error_report("%s: Error allocating buffer ", __func__);
232                 return -ENOMEM;
233             }
234             buf_allocated = true;
235 
236             ret = vfio_mig_read(vbasedev, buf, sec_size,
237                                 region->fd_offset + data_offset);
238             if (ret < 0) {
239                 g_free(buf);
240                 return ret;
241             }
242         }
243 
244         qemu_put_buffer(f, buf, sec_size);
245 
246         if (buf_allocated) {
247             g_free(buf);
248         }
249         sz -= sec_size;
250         data_offset += sec_size;
251     }
252 
253     ret = qemu_file_get_error(f);
254 
255     if (!ret && size) {
256         *size = data_size;
257     }
258 
259     bytes_transferred += data_size;
260     return ret;
261 }
262 
263 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
264                             uint64_t data_size)
265 {
266     VFIORegion *region = &vbasedev->migration->region;
267     uint64_t data_offset = 0, size, report_size;
268     int ret;
269 
270     do {
271         ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
272                       region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
273         if (ret < 0) {
274             return ret;
275         }
276 
277         if (data_offset + data_size > region->size) {
278             /*
279              * If data_size is greater than the data section of migration region
280              * then iterate the write buffer operation. This case can occur if
281              * size of migration region at destination is smaller than size of
282              * migration region at source.
283              */
284             report_size = size = region->size - data_offset;
285             data_size -= size;
286         } else {
287             report_size = size = data_size;
288             data_size = 0;
289         }
290 
291         trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
292 
293         while (size) {
294             void *buf;
295             uint64_t sec_size;
296             bool buf_alloc = false;
297 
298             buf = get_data_section_size(region, data_offset, size, &sec_size);
299 
300             if (!buf) {
301                 buf = g_try_malloc(sec_size);
302                 if (!buf) {
303                     error_report("%s: Error allocating buffer ", __func__);
304                     return -ENOMEM;
305                 }
306                 buf_alloc = true;
307             }
308 
309             qemu_get_buffer(f, buf, sec_size);
310 
311             if (buf_alloc) {
312                 ret = vfio_mig_write(vbasedev, buf, sec_size,
313                         region->fd_offset + data_offset);
314                 g_free(buf);
315 
316                 if (ret < 0) {
317                     return ret;
318                 }
319             }
320             size -= sec_size;
321             data_offset += sec_size;
322         }
323 
324         ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
325                         region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
326         if (ret < 0) {
327             return ret;
328         }
329     } while (data_size);
330 
331     return 0;
332 }
333 
334 static int vfio_update_pending(VFIODevice *vbasedev)
335 {
336     VFIOMigration *migration = vbasedev->migration;
337     VFIORegion *region = &migration->region;
338     uint64_t pending_bytes = 0;
339     int ret;
340 
341     ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
342                     region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
343     if (ret < 0) {
344         migration->pending_bytes = 0;
345         return ret;
346     }
347 
348     migration->pending_bytes = pending_bytes;
349     trace_vfio_update_pending(vbasedev->name, pending_bytes);
350     return 0;
351 }
352 
353 static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
354 {
355     VFIODevice *vbasedev = opaque;
356 
357     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
358 
359     if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
360         vbasedev->ops->vfio_save_config(vbasedev, f);
361     }
362 
363     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
364 
365     trace_vfio_save_device_config_state(vbasedev->name);
366 
367     return qemu_file_get_error(f);
368 }
369 
370 static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
371 {
372     VFIODevice *vbasedev = opaque;
373     uint64_t data;
374 
375     if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
376         int ret;
377 
378         ret = vbasedev->ops->vfio_load_config(vbasedev, f);
379         if (ret) {
380             error_report("%s: Failed to load device config space",
381                          vbasedev->name);
382             return ret;
383         }
384     }
385 
386     data = qemu_get_be64(f);
387     if (data != VFIO_MIG_FLAG_END_OF_STATE) {
388         error_report("%s: Failed loading device config space, "
389                      "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
390         return -EINVAL;
391     }
392 
393     trace_vfio_load_device_config_state(vbasedev->name);
394     return qemu_file_get_error(f);
395 }
396 
397 static void vfio_migration_cleanup(VFIODevice *vbasedev)
398 {
399     VFIOMigration *migration = vbasedev->migration;
400 
401     if (migration->region.mmaps) {
402         vfio_region_unmap(&migration->region);
403     }
404 }
405 
406 /* ---------------------------------------------------------------------- */
407 
408 static int vfio_save_setup(QEMUFile *f, void *opaque)
409 {
410     VFIODevice *vbasedev = opaque;
411     VFIOMigration *migration = vbasedev->migration;
412     int ret;
413 
414     trace_vfio_save_setup(vbasedev->name);
415 
416     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
417 
418     if (migration->region.mmaps) {
419         /*
420          * Calling vfio_region_mmap() from migration thread. Memory API called
421          * from this function require locking the iothread when called from
422          * outside the main loop thread.
423          */
424         qemu_mutex_lock_iothread();
425         ret = vfio_region_mmap(&migration->region);
426         qemu_mutex_unlock_iothread();
427         if (ret) {
428             error_report("%s: Failed to mmap VFIO migration region: %s",
429                          vbasedev->name, strerror(-ret));
430             error_report("%s: Falling back to slow path", vbasedev->name);
431         }
432     }
433 
434     ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
435                                    VFIO_DEVICE_STATE_V1_SAVING);
436     if (ret) {
437         error_report("%s: Failed to set state SAVING", vbasedev->name);
438         return ret;
439     }
440 
441     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
442 
443     ret = qemu_file_get_error(f);
444     if (ret) {
445         return ret;
446     }
447 
448     return 0;
449 }
450 
451 static void vfio_save_cleanup(void *opaque)
452 {
453     VFIODevice *vbasedev = opaque;
454 
455     vfio_migration_cleanup(vbasedev);
456     trace_vfio_save_cleanup(vbasedev->name);
457 }
458 
459 static void vfio_state_pending(void *opaque, uint64_t *must_precopy,
460                                uint64_t *can_postcopy)
461 {
462     VFIODevice *vbasedev = opaque;
463     VFIOMigration *migration = vbasedev->migration;
464     int ret;
465 
466     ret = vfio_update_pending(vbasedev);
467     if (ret) {
468         return;
469     }
470 
471     *must_precopy += migration->pending_bytes;
472 
473     trace_vfio_state_pending(vbasedev->name, *must_precopy, *can_postcopy);
474 }
475 
476 static int vfio_save_iterate(QEMUFile *f, void *opaque)
477 {
478     VFIODevice *vbasedev = opaque;
479     VFIOMigration *migration = vbasedev->migration;
480     uint64_t data_size;
481     int ret;
482 
483     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
484 
485     if (migration->pending_bytes == 0) {
486         ret = vfio_update_pending(vbasedev);
487         if (ret) {
488             return ret;
489         }
490 
491         if (migration->pending_bytes == 0) {
492             qemu_put_be64(f, 0);
493             qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
494             /* indicates data finished, goto complete phase */
495             return 1;
496         }
497     }
498 
499     ret = vfio_save_buffer(f, vbasedev, &data_size);
500     if (ret) {
501         error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
502                      strerror(errno));
503         return ret;
504     }
505 
506     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
507 
508     ret = qemu_file_get_error(f);
509     if (ret) {
510         return ret;
511     }
512 
513     /*
514      * Reset pending_bytes as state_pending* are not called during
515      * savevm or snapshot case, in such case vfio_update_pending() at
516      * the start of this function updates pending_bytes.
517      */
518     migration->pending_bytes = 0;
519     trace_vfio_save_iterate(vbasedev->name, data_size);
520     return 0;
521 }
522 
523 static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
524 {
525     VFIODevice *vbasedev = opaque;
526     VFIOMigration *migration = vbasedev->migration;
527     uint64_t data_size;
528     int ret;
529 
530     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_RUNNING,
531                                    VFIO_DEVICE_STATE_V1_SAVING);
532     if (ret) {
533         error_report("%s: Failed to set state STOP and SAVING",
534                      vbasedev->name);
535         return ret;
536     }
537 
538     ret = vfio_update_pending(vbasedev);
539     if (ret) {
540         return ret;
541     }
542 
543     while (migration->pending_bytes > 0) {
544         qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
545         ret = vfio_save_buffer(f, vbasedev, &data_size);
546         if (ret < 0) {
547             error_report("%s: Failed to save buffer", vbasedev->name);
548             return ret;
549         }
550 
551         if (data_size == 0) {
552             break;
553         }
554 
555         ret = vfio_update_pending(vbasedev);
556         if (ret) {
557             return ret;
558         }
559     }
560 
561     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
562 
563     ret = qemu_file_get_error(f);
564     if (ret) {
565         return ret;
566     }
567 
568     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_SAVING, 0);
569     if (ret) {
570         error_report("%s: Failed to set state STOPPED", vbasedev->name);
571         return ret;
572     }
573 
574     trace_vfio_save_complete_precopy(vbasedev->name);
575     return ret;
576 }
577 
578 static void vfio_save_state(QEMUFile *f, void *opaque)
579 {
580     VFIODevice *vbasedev = opaque;
581     int ret;
582 
583     ret = vfio_save_device_config_state(f, opaque);
584     if (ret) {
585         error_report("%s: Failed to save device config space",
586                      vbasedev->name);
587         qemu_file_set_error(f, ret);
588     }
589 }
590 
591 static int vfio_load_setup(QEMUFile *f, void *opaque)
592 {
593     VFIODevice *vbasedev = opaque;
594     VFIOMigration *migration = vbasedev->migration;
595     int ret = 0;
596 
597     if (migration->region.mmaps) {
598         ret = vfio_region_mmap(&migration->region);
599         if (ret) {
600             error_report("%s: Failed to mmap VFIO migration region %d: %s",
601                          vbasedev->name, migration->region.nr,
602                          strerror(-ret));
603             error_report("%s: Falling back to slow path", vbasedev->name);
604         }
605     }
606 
607     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
608                                    VFIO_DEVICE_STATE_V1_RESUMING);
609     if (ret) {
610         error_report("%s: Failed to set state RESUMING", vbasedev->name);
611         if (migration->region.mmaps) {
612             vfio_region_unmap(&migration->region);
613         }
614     }
615     return ret;
616 }
617 
618 static int vfio_load_cleanup(void *opaque)
619 {
620     VFIODevice *vbasedev = opaque;
621 
622     vfio_migration_cleanup(vbasedev);
623     trace_vfio_load_cleanup(vbasedev->name);
624     return 0;
625 }
626 
627 static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
628 {
629     VFIODevice *vbasedev = opaque;
630     int ret = 0;
631     uint64_t data;
632 
633     data = qemu_get_be64(f);
634     while (data != VFIO_MIG_FLAG_END_OF_STATE) {
635 
636         trace_vfio_load_state(vbasedev->name, data);
637 
638         switch (data) {
639         case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
640         {
641             return vfio_load_device_config_state(f, opaque);
642         }
643         case VFIO_MIG_FLAG_DEV_SETUP_STATE:
644         {
645             data = qemu_get_be64(f);
646             if (data == VFIO_MIG_FLAG_END_OF_STATE) {
647                 return ret;
648             } else {
649                 error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
650                              vbasedev->name, data);
651                 return -EINVAL;
652             }
653             break;
654         }
655         case VFIO_MIG_FLAG_DEV_DATA_STATE:
656         {
657             uint64_t data_size = qemu_get_be64(f);
658 
659             if (data_size) {
660                 ret = vfio_load_buffer(f, vbasedev, data_size);
661                 if (ret < 0) {
662                     return ret;
663                 }
664             }
665             break;
666         }
667         default:
668             error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
669             return -EINVAL;
670         }
671 
672         data = qemu_get_be64(f);
673         ret = qemu_file_get_error(f);
674         if (ret) {
675             return ret;
676         }
677     }
678     return ret;
679 }
680 
681 static SaveVMHandlers savevm_vfio_handlers = {
682     .save_setup = vfio_save_setup,
683     .save_cleanup = vfio_save_cleanup,
684     .state_pending_exact = vfio_state_pending,
685     .state_pending_estimate = vfio_state_pending,
686     .save_live_iterate = vfio_save_iterate,
687     .save_live_complete_precopy = vfio_save_complete_precopy,
688     .save_state = vfio_save_state,
689     .load_setup = vfio_load_setup,
690     .load_cleanup = vfio_load_cleanup,
691     .load_state = vfio_load_state,
692 };
693 
694 /* ---------------------------------------------------------------------- */
695 
696 static void vfio_vmstate_change(void *opaque, bool running, RunState state)
697 {
698     VFIODevice *vbasedev = opaque;
699     VFIOMigration *migration = vbasedev->migration;
700     uint32_t value, mask;
701     int ret;
702 
703     if (vbasedev->migration->vm_running == running) {
704         return;
705     }
706 
707     if (running) {
708         /*
709          * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
710          * Transition from _SAVING to _RUNNING can happen if there is migration
711          * failure, in that case clear _SAVING bit.
712          * Transition from _RESUMING to _RUNNING occurs during resuming
713          * phase, in that case clear _RESUMING bit.
714          * In both the above cases, set _RUNNING bit.
715          */
716         mask = ~VFIO_DEVICE_STATE_MASK;
717         value = VFIO_DEVICE_STATE_V1_RUNNING;
718     } else {
719         /*
720          * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
721          * _RUNNING bit
722          */
723         mask = ~VFIO_DEVICE_STATE_V1_RUNNING;
724 
725         /*
726          * When VM state transition to stop for savevm command, device should
727          * start saving data.
728          */
729         if (state == RUN_STATE_SAVE_VM) {
730             value = VFIO_DEVICE_STATE_V1_SAVING;
731         } else {
732             value = 0;
733         }
734     }
735 
736     ret = vfio_migration_set_state(vbasedev, mask, value);
737     if (ret) {
738         /*
739          * Migration should be aborted in this case, but vm_state_notify()
740          * currently does not support reporting failures.
741          */
742         error_report("%s: Failed to set device state 0x%x", vbasedev->name,
743                      (migration->device_state & mask) | value);
744         qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
745     }
746     vbasedev->migration->vm_running = running;
747     trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
748             (migration->device_state & mask) | value);
749 }
750 
751 static void vfio_migration_state_notifier(Notifier *notifier, void *data)
752 {
753     MigrationState *s = data;
754     VFIOMigration *migration = container_of(notifier, VFIOMigration,
755                                             migration_state);
756     VFIODevice *vbasedev = migration->vbasedev;
757     int ret;
758 
759     trace_vfio_migration_state_notifier(vbasedev->name,
760                                         MigrationStatus_str(s->state));
761 
762     switch (s->state) {
763     case MIGRATION_STATUS_CANCELLING:
764     case MIGRATION_STATUS_CANCELLED:
765     case MIGRATION_STATUS_FAILED:
766         bytes_transferred = 0;
767         ret = vfio_migration_set_state(vbasedev,
768                                        ~(VFIO_DEVICE_STATE_V1_SAVING |
769                                          VFIO_DEVICE_STATE_V1_RESUMING),
770                                        VFIO_DEVICE_STATE_V1_RUNNING);
771         if (ret) {
772             error_report("%s: Failed to set state RUNNING", vbasedev->name);
773         }
774     }
775 }
776 
777 static void vfio_migration_exit(VFIODevice *vbasedev)
778 {
779     VFIOMigration *migration = vbasedev->migration;
780 
781     vfio_region_exit(&migration->region);
782     vfio_region_finalize(&migration->region);
783     g_free(vbasedev->migration);
784     vbasedev->migration = NULL;
785 }
786 
787 static int vfio_migration_init(VFIODevice *vbasedev,
788                                struct vfio_region_info *info)
789 {
790     int ret;
791     Object *obj;
792     VFIOMigration *migration;
793     char id[256] = "";
794     g_autofree char *path = NULL, *oid = NULL;
795 
796     if (!vbasedev->ops->vfio_get_object) {
797         return -EINVAL;
798     }
799 
800     obj = vbasedev->ops->vfio_get_object(vbasedev);
801     if (!obj) {
802         return -EINVAL;
803     }
804 
805     vbasedev->migration = g_new0(VFIOMigration, 1);
806     vbasedev->migration->device_state = VFIO_DEVICE_STATE_V1_RUNNING;
807     vbasedev->migration->vm_running = runstate_is_running();
808 
809     ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
810                             info->index, "migration");
811     if (ret) {
812         error_report("%s: Failed to setup VFIO migration region %d: %s",
813                      vbasedev->name, info->index, strerror(-ret));
814         goto err;
815     }
816 
817     if (!vbasedev->migration->region.size) {
818         error_report("%s: Invalid zero-sized VFIO migration region %d",
819                      vbasedev->name, info->index);
820         ret = -EINVAL;
821         goto err;
822     }
823 
824     migration = vbasedev->migration;
825     migration->vbasedev = vbasedev;
826 
827     oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
828     if (oid) {
829         path = g_strdup_printf("%s/vfio", oid);
830     } else {
831         path = g_strdup("vfio");
832     }
833     strpadcpy(id, sizeof(id), path, '\0');
834 
835     register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
836                          vbasedev);
837 
838     migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
839                                                            vfio_vmstate_change,
840                                                            vbasedev);
841     migration->migration_state.notify = vfio_migration_state_notifier;
842     add_migration_state_change_notifier(&migration->migration_state);
843     return 0;
844 
845 err:
846     vfio_migration_exit(vbasedev);
847     return ret;
848 }
849 
850 /* ---------------------------------------------------------------------- */
851 
852 int64_t vfio_mig_bytes_transferred(void)
853 {
854     return bytes_transferred;
855 }
856 
857 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
858 {
859     VFIOContainer *container = vbasedev->group->container;
860     struct vfio_region_info *info = NULL;
861     int ret = -ENOTSUP;
862 
863     if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
864         goto add_blocker;
865     }
866 
867     ret = vfio_get_dev_region_info(vbasedev,
868                                    VFIO_REGION_TYPE_MIGRATION_DEPRECATED,
869                                    VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED,
870                                    &info);
871     if (ret) {
872         goto add_blocker;
873     }
874 
875     ret = vfio_migration_init(vbasedev, info);
876     if (ret) {
877         goto add_blocker;
878     }
879 
880     trace_vfio_migration_probe(vbasedev->name, info->index);
881     g_free(info);
882     return 0;
883 
884 add_blocker:
885     error_setg(&vbasedev->migration_blocker,
886                "VFIO device doesn't support migration");
887     g_free(info);
888 
889     ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
890     if (ret < 0) {
891         error_free(vbasedev->migration_blocker);
892         vbasedev->migration_blocker = NULL;
893     }
894     return ret;
895 }
896 
897 void vfio_migration_finalize(VFIODevice *vbasedev)
898 {
899     if (vbasedev->migration) {
900         VFIOMigration *migration = vbasedev->migration;
901 
902         remove_migration_state_change_notifier(&migration->migration_state);
903         qemu_del_vm_change_state_handler(migration->vm_state);
904         unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
905         vfio_migration_exit(vbasedev);
906     }
907 
908     if (vbasedev->migration_blocker) {
909         migrate_del_blocker(vbasedev->migration_blocker);
910         error_free(vbasedev->migration_blocker);
911         vbasedev->migration_blocker = NULL;
912     }
913 }
914