xref: /openbmc/qemu/hw/vfio/migration.c (revision a003dbc6)
1 /*
2  * Migration support for VFIO devices
3  *
4  * Copyright NVIDIA, Inc. 2020
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2. See
7  * the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/cutils.h"
13 #include <linux/vfio.h>
14 #include <sys/ioctl.h>
15 
16 #include "sysemu/runstate.h"
17 #include "hw/vfio/vfio-common.h"
18 #include "migration/migration.h"
19 #include "migration/vmstate.h"
20 #include "migration/qemu-file.h"
21 #include "migration/register.h"
22 #include "migration/blocker.h"
23 #include "migration/misc.h"
24 #include "qapi/error.h"
25 #include "exec/ramlist.h"
26 #include "exec/ram_addr.h"
27 #include "pci.h"
28 #include "trace.h"
29 #include "hw/hw.h"
30 
31 /*
32  * Flags to be used as unique delimiters for VFIO devices in the migration
33  * stream. These flags are composed as:
34  * 0xffffffff => MSB 32-bit all 1s
35  * 0xef10     => Magic ID, represents emulated (virtual) function IO
36  * 0x0000     => 16-bits reserved for flags
37  *
38  * The beginning of state information is marked by _DEV_CONFIG_STATE,
39  * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
40  * certain state information is marked by _END_OF_STATE.
41  */
42 #define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
43 #define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
44 #define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
45 #define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
46 
47 static int64_t bytes_transferred;
48 
49 static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
50                                   off_t off, bool iswrite)
51 {
52     int ret;
53 
54     ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
55                     pread(vbasedev->fd, val, count, off);
56     if (ret < count) {
57         error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
58                      HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
59                      vbasedev->name, off, strerror(errno));
60         return (ret < 0) ? ret : -EINVAL;
61     }
62     return 0;
63 }
64 
65 static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
66                        off_t off, bool iswrite)
67 {
68     int ret, done = 0;
69     __u8 *tbuf = buf;
70 
71     while (count) {
72         int bytes = 0;
73 
74         if (count >= 8 && !(off % 8)) {
75             bytes = 8;
76         } else if (count >= 4 && !(off % 4)) {
77             bytes = 4;
78         } else if (count >= 2 && !(off % 2)) {
79             bytes = 2;
80         } else {
81             bytes = 1;
82         }
83 
84         ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
85         if (ret) {
86             return ret;
87         }
88 
89         count -= bytes;
90         done += bytes;
91         off += bytes;
92         tbuf += bytes;
93     }
94     return done;
95 }
96 
97 #define vfio_mig_read(f, v, c, o)       vfio_mig_rw(f, (__u8 *)v, c, o, false)
98 #define vfio_mig_write(f, v, c, o)      vfio_mig_rw(f, (__u8 *)v, c, o, true)
99 
100 #define VFIO_MIG_STRUCT_OFFSET(f)       \
101                                  offsetof(struct vfio_device_migration_info, f)
102 /*
103  * Change the device_state register for device @vbasedev. Bits set in @mask
104  * are preserved, bits set in @value are set, and bits not set in either @mask
105  * or @value are cleared in device_state. If the register cannot be accessed,
106  * the resulting state would be invalid, or the device enters an error state,
107  * an error is returned.
108  */
109 
110 static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
111                                     uint32_t value)
112 {
113     VFIOMigration *migration = vbasedev->migration;
114     VFIORegion *region = &migration->region;
115     off_t dev_state_off = region->fd_offset +
116                           VFIO_MIG_STRUCT_OFFSET(device_state);
117     uint32_t device_state;
118     int ret;
119 
120     ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
121                         dev_state_off);
122     if (ret < 0) {
123         return ret;
124     }
125 
126     device_state = (device_state & mask) | value;
127 
128     if (!VFIO_DEVICE_STATE_VALID(device_state)) {
129         return -EINVAL;
130     }
131 
132     ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
133                          dev_state_off);
134     if (ret < 0) {
135         int rret;
136 
137         rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
138                              dev_state_off);
139 
140         if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
141             hw_error("%s: Device in error state 0x%x", vbasedev->name,
142                      device_state);
143             return rret ? rret : -EIO;
144         }
145         return ret;
146     }
147 
148     migration->device_state = device_state;
149     trace_vfio_migration_set_state(vbasedev->name, device_state);
150     return 0;
151 }
152 
153 static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
154                                    uint64_t data_size, uint64_t *size)
155 {
156     void *ptr = NULL;
157     uint64_t limit = 0;
158     int i;
159 
160     if (!region->mmaps) {
161         if (size) {
162             *size = MIN(data_size, region->size - data_offset);
163         }
164         return ptr;
165     }
166 
167     for (i = 0; i < region->nr_mmaps; i++) {
168         VFIOMmap *map = region->mmaps + i;
169 
170         if ((data_offset >= map->offset) &&
171             (data_offset < map->offset + map->size)) {
172 
173             /* check if data_offset is within sparse mmap areas */
174             ptr = map->mmap + data_offset - map->offset;
175             if (size) {
176                 *size = MIN(data_size, map->offset + map->size - data_offset);
177             }
178             break;
179         } else if ((data_offset < map->offset) &&
180                    (!limit || limit > map->offset)) {
181             /*
182              * data_offset is not within sparse mmap areas, find size of
183              * non-mapped area. Check through all list since region->mmaps list
184              * is not sorted.
185              */
186             limit = map->offset;
187         }
188     }
189 
190     if (!ptr && size) {
191         *size = limit ? MIN(data_size, limit - data_offset) : data_size;
192     }
193     return ptr;
194 }
195 
196 static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
197 {
198     VFIOMigration *migration = vbasedev->migration;
199     VFIORegion *region = &migration->region;
200     uint64_t data_offset = 0, data_size = 0, sz;
201     int ret;
202 
203     ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
204                       region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
205     if (ret < 0) {
206         return ret;
207     }
208 
209     ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
210                         region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
211     if (ret < 0) {
212         return ret;
213     }
214 
215     trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
216                            migration->pending_bytes);
217 
218     qemu_put_be64(f, data_size);
219     sz = data_size;
220 
221     while (sz) {
222         void *buf;
223         uint64_t sec_size;
224         bool buf_allocated = false;
225 
226         buf = get_data_section_size(region, data_offset, sz, &sec_size);
227 
228         if (!buf) {
229             buf = g_try_malloc(sec_size);
230             if (!buf) {
231                 error_report("%s: Error allocating buffer ", __func__);
232                 return -ENOMEM;
233             }
234             buf_allocated = true;
235 
236             ret = vfio_mig_read(vbasedev, buf, sec_size,
237                                 region->fd_offset + data_offset);
238             if (ret < 0) {
239                 g_free(buf);
240                 return ret;
241             }
242         }
243 
244         qemu_put_buffer(f, buf, sec_size);
245 
246         if (buf_allocated) {
247             g_free(buf);
248         }
249         sz -= sec_size;
250         data_offset += sec_size;
251     }
252 
253     ret = qemu_file_get_error(f);
254 
255     if (!ret && size) {
256         *size = data_size;
257     }
258 
259     bytes_transferred += data_size;
260     return ret;
261 }
262 
263 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
264                             uint64_t data_size)
265 {
266     VFIORegion *region = &vbasedev->migration->region;
267     uint64_t data_offset = 0, size, report_size;
268     int ret;
269 
270     do {
271         ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
272                       region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
273         if (ret < 0) {
274             return ret;
275         }
276 
277         if (data_offset + data_size > region->size) {
278             /*
279              * If data_size is greater than the data section of migration region
280              * then iterate the write buffer operation. This case can occur if
281              * size of migration region at destination is smaller than size of
282              * migration region at source.
283              */
284             report_size = size = region->size - data_offset;
285             data_size -= size;
286         } else {
287             report_size = size = data_size;
288             data_size = 0;
289         }
290 
291         trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
292 
293         while (size) {
294             void *buf;
295             uint64_t sec_size;
296             bool buf_alloc = false;
297 
298             buf = get_data_section_size(region, data_offset, size, &sec_size);
299 
300             if (!buf) {
301                 buf = g_try_malloc(sec_size);
302                 if (!buf) {
303                     error_report("%s: Error allocating buffer ", __func__);
304                     return -ENOMEM;
305                 }
306                 buf_alloc = true;
307             }
308 
309             qemu_get_buffer(f, buf, sec_size);
310 
311             if (buf_alloc) {
312                 ret = vfio_mig_write(vbasedev, buf, sec_size,
313                         region->fd_offset + data_offset);
314                 g_free(buf);
315 
316                 if (ret < 0) {
317                     return ret;
318                 }
319             }
320             size -= sec_size;
321             data_offset += sec_size;
322         }
323 
324         ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
325                         region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
326         if (ret < 0) {
327             return ret;
328         }
329     } while (data_size);
330 
331     return 0;
332 }
333 
334 static int vfio_update_pending(VFIODevice *vbasedev)
335 {
336     VFIOMigration *migration = vbasedev->migration;
337     VFIORegion *region = &migration->region;
338     uint64_t pending_bytes = 0;
339     int ret;
340 
341     ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
342                     region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
343     if (ret < 0) {
344         migration->pending_bytes = 0;
345         return ret;
346     }
347 
348     migration->pending_bytes = pending_bytes;
349     trace_vfio_update_pending(vbasedev->name, pending_bytes);
350     return 0;
351 }
352 
353 static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
354 {
355     VFIODevice *vbasedev = opaque;
356 
357     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
358 
359     if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
360         vbasedev->ops->vfio_save_config(vbasedev, f);
361     }
362 
363     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
364 
365     trace_vfio_save_device_config_state(vbasedev->name);
366 
367     return qemu_file_get_error(f);
368 }
369 
370 static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
371 {
372     VFIODevice *vbasedev = opaque;
373     uint64_t data;
374 
375     if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
376         int ret;
377 
378         ret = vbasedev->ops->vfio_load_config(vbasedev, f);
379         if (ret) {
380             error_report("%s: Failed to load device config space",
381                          vbasedev->name);
382             return ret;
383         }
384     }
385 
386     data = qemu_get_be64(f);
387     if (data != VFIO_MIG_FLAG_END_OF_STATE) {
388         error_report("%s: Failed loading device config space, "
389                      "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
390         return -EINVAL;
391     }
392 
393     trace_vfio_load_device_config_state(vbasedev->name);
394     return qemu_file_get_error(f);
395 }
396 
397 static void vfio_migration_cleanup(VFIODevice *vbasedev)
398 {
399     VFIOMigration *migration = vbasedev->migration;
400 
401     if (migration->region.mmaps) {
402         vfio_region_unmap(&migration->region);
403     }
404 }
405 
406 /* ---------------------------------------------------------------------- */
407 
408 static int vfio_save_setup(QEMUFile *f, void *opaque)
409 {
410     VFIODevice *vbasedev = opaque;
411     VFIOMigration *migration = vbasedev->migration;
412     int ret;
413 
414     trace_vfio_save_setup(vbasedev->name);
415 
416     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
417 
418     if (migration->region.mmaps) {
419         /*
420          * Calling vfio_region_mmap() from migration thread. Memory API called
421          * from this function require locking the iothread when called from
422          * outside the main loop thread.
423          */
424         qemu_mutex_lock_iothread();
425         ret = vfio_region_mmap(&migration->region);
426         qemu_mutex_unlock_iothread();
427         if (ret) {
428             error_report("%s: Failed to mmap VFIO migration region: %s",
429                          vbasedev->name, strerror(-ret));
430             error_report("%s: Falling back to slow path", vbasedev->name);
431         }
432     }
433 
434     ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
435                                    VFIO_DEVICE_STATE_SAVING);
436     if (ret) {
437         error_report("%s: Failed to set state SAVING", vbasedev->name);
438         return ret;
439     }
440 
441     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
442 
443     ret = qemu_file_get_error(f);
444     if (ret) {
445         return ret;
446     }
447 
448     return 0;
449 }
450 
451 static void vfio_save_cleanup(void *opaque)
452 {
453     VFIODevice *vbasedev = opaque;
454 
455     vfio_migration_cleanup(vbasedev);
456     trace_vfio_save_cleanup(vbasedev->name);
457 }
458 
459 static void vfio_save_pending(QEMUFile *f, void *opaque,
460                               uint64_t threshold_size,
461                               uint64_t *res_precopy_only,
462                               uint64_t *res_compatible,
463                               uint64_t *res_postcopy_only)
464 {
465     VFIODevice *vbasedev = opaque;
466     VFIOMigration *migration = vbasedev->migration;
467     int ret;
468 
469     ret = vfio_update_pending(vbasedev);
470     if (ret) {
471         return;
472     }
473 
474     *res_precopy_only += migration->pending_bytes;
475 
476     trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
477                             *res_postcopy_only, *res_compatible);
478 }
479 
480 static int vfio_save_iterate(QEMUFile *f, void *opaque)
481 {
482     VFIODevice *vbasedev = opaque;
483     VFIOMigration *migration = vbasedev->migration;
484     uint64_t data_size;
485     int ret;
486 
487     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
488 
489     if (migration->pending_bytes == 0) {
490         ret = vfio_update_pending(vbasedev);
491         if (ret) {
492             return ret;
493         }
494 
495         if (migration->pending_bytes == 0) {
496             qemu_put_be64(f, 0);
497             qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
498             /* indicates data finished, goto complete phase */
499             return 1;
500         }
501     }
502 
503     ret = vfio_save_buffer(f, vbasedev, &data_size);
504     if (ret) {
505         error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
506                      strerror(errno));
507         return ret;
508     }
509 
510     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
511 
512     ret = qemu_file_get_error(f);
513     if (ret) {
514         return ret;
515     }
516 
517     /*
518      * Reset pending_bytes as .save_live_pending is not called during savevm or
519      * snapshot case, in such case vfio_update_pending() at the start of this
520      * function updates pending_bytes.
521      */
522     migration->pending_bytes = 0;
523     trace_vfio_save_iterate(vbasedev->name, data_size);
524     return 0;
525 }
526 
527 static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
528 {
529     VFIODevice *vbasedev = opaque;
530     VFIOMigration *migration = vbasedev->migration;
531     uint64_t data_size;
532     int ret;
533 
534     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING,
535                                    VFIO_DEVICE_STATE_SAVING);
536     if (ret) {
537         error_report("%s: Failed to set state STOP and SAVING",
538                      vbasedev->name);
539         return ret;
540     }
541 
542     ret = vfio_update_pending(vbasedev);
543     if (ret) {
544         return ret;
545     }
546 
547     while (migration->pending_bytes > 0) {
548         qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
549         ret = vfio_save_buffer(f, vbasedev, &data_size);
550         if (ret < 0) {
551             error_report("%s: Failed to save buffer", vbasedev->name);
552             return ret;
553         }
554 
555         if (data_size == 0) {
556             break;
557         }
558 
559         ret = vfio_update_pending(vbasedev);
560         if (ret) {
561             return ret;
562         }
563     }
564 
565     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
566 
567     ret = qemu_file_get_error(f);
568     if (ret) {
569         return ret;
570     }
571 
572     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0);
573     if (ret) {
574         error_report("%s: Failed to set state STOPPED", vbasedev->name);
575         return ret;
576     }
577 
578     trace_vfio_save_complete_precopy(vbasedev->name);
579     return ret;
580 }
581 
582 static void vfio_save_state(QEMUFile *f, void *opaque)
583 {
584     VFIODevice *vbasedev = opaque;
585     int ret;
586 
587     ret = vfio_save_device_config_state(f, opaque);
588     if (ret) {
589         error_report("%s: Failed to save device config space",
590                      vbasedev->name);
591         qemu_file_set_error(f, ret);
592     }
593 }
594 
595 static int vfio_load_setup(QEMUFile *f, void *opaque)
596 {
597     VFIODevice *vbasedev = opaque;
598     VFIOMigration *migration = vbasedev->migration;
599     int ret = 0;
600 
601     if (migration->region.mmaps) {
602         ret = vfio_region_mmap(&migration->region);
603         if (ret) {
604             error_report("%s: Failed to mmap VFIO migration region %d: %s",
605                          vbasedev->name, migration->region.nr,
606                          strerror(-ret));
607             error_report("%s: Falling back to slow path", vbasedev->name);
608         }
609     }
610 
611     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
612                                    VFIO_DEVICE_STATE_RESUMING);
613     if (ret) {
614         error_report("%s: Failed to set state RESUMING", vbasedev->name);
615         if (migration->region.mmaps) {
616             vfio_region_unmap(&migration->region);
617         }
618     }
619     return ret;
620 }
621 
622 static int vfio_load_cleanup(void *opaque)
623 {
624     VFIODevice *vbasedev = opaque;
625 
626     vfio_migration_cleanup(vbasedev);
627     trace_vfio_load_cleanup(vbasedev->name);
628     return 0;
629 }
630 
631 static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
632 {
633     VFIODevice *vbasedev = opaque;
634     int ret = 0;
635     uint64_t data;
636 
637     data = qemu_get_be64(f);
638     while (data != VFIO_MIG_FLAG_END_OF_STATE) {
639 
640         trace_vfio_load_state(vbasedev->name, data);
641 
642         switch (data) {
643         case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
644         {
645             return vfio_load_device_config_state(f, opaque);
646         }
647         case VFIO_MIG_FLAG_DEV_SETUP_STATE:
648         {
649             data = qemu_get_be64(f);
650             if (data == VFIO_MIG_FLAG_END_OF_STATE) {
651                 return ret;
652             } else {
653                 error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
654                              vbasedev->name, data);
655                 return -EINVAL;
656             }
657             break;
658         }
659         case VFIO_MIG_FLAG_DEV_DATA_STATE:
660         {
661             uint64_t data_size = qemu_get_be64(f);
662 
663             if (data_size) {
664                 ret = vfio_load_buffer(f, vbasedev, data_size);
665                 if (ret < 0) {
666                     return ret;
667                 }
668             }
669             break;
670         }
671         default:
672             error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
673             return -EINVAL;
674         }
675 
676         data = qemu_get_be64(f);
677         ret = qemu_file_get_error(f);
678         if (ret) {
679             return ret;
680         }
681     }
682     return ret;
683 }
684 
685 static SaveVMHandlers savevm_vfio_handlers = {
686     .save_setup = vfio_save_setup,
687     .save_cleanup = vfio_save_cleanup,
688     .save_live_pending = vfio_save_pending,
689     .save_live_iterate = vfio_save_iterate,
690     .save_live_complete_precopy = vfio_save_complete_precopy,
691     .save_state = vfio_save_state,
692     .load_setup = vfio_load_setup,
693     .load_cleanup = vfio_load_cleanup,
694     .load_state = vfio_load_state,
695 };
696 
697 /* ---------------------------------------------------------------------- */
698 
699 static void vfio_vmstate_change(void *opaque, bool running, RunState state)
700 {
701     VFIODevice *vbasedev = opaque;
702     VFIOMigration *migration = vbasedev->migration;
703     uint32_t value, mask;
704     int ret;
705 
706     if (vbasedev->migration->vm_running == running) {
707         return;
708     }
709 
710     if (running) {
711         /*
712          * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
713          * Transition from _SAVING to _RUNNING can happen if there is migration
714          * failure, in that case clear _SAVING bit.
715          * Transition from _RESUMING to _RUNNING occurs during resuming
716          * phase, in that case clear _RESUMING bit.
717          * In both the above cases, set _RUNNING bit.
718          */
719         mask = ~VFIO_DEVICE_STATE_MASK;
720         value = VFIO_DEVICE_STATE_RUNNING;
721     } else {
722         /*
723          * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
724          * _RUNNING bit
725          */
726         mask = ~VFIO_DEVICE_STATE_RUNNING;
727 
728         /*
729          * When VM state transition to stop for savevm command, device should
730          * start saving data.
731          */
732         if (state == RUN_STATE_SAVE_VM) {
733             value = VFIO_DEVICE_STATE_SAVING;
734         } else {
735             value = 0;
736         }
737     }
738 
739     ret = vfio_migration_set_state(vbasedev, mask, value);
740     if (ret) {
741         /*
742          * Migration should be aborted in this case, but vm_state_notify()
743          * currently does not support reporting failures.
744          */
745         error_report("%s: Failed to set device state 0x%x", vbasedev->name,
746                      (migration->device_state & mask) | value);
747         qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
748     }
749     vbasedev->migration->vm_running = running;
750     trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
751             (migration->device_state & mask) | value);
752 }
753 
754 static void vfio_migration_state_notifier(Notifier *notifier, void *data)
755 {
756     MigrationState *s = data;
757     VFIOMigration *migration = container_of(notifier, VFIOMigration,
758                                             migration_state);
759     VFIODevice *vbasedev = migration->vbasedev;
760     int ret;
761 
762     trace_vfio_migration_state_notifier(vbasedev->name,
763                                         MigrationStatus_str(s->state));
764 
765     switch (s->state) {
766     case MIGRATION_STATUS_CANCELLING:
767     case MIGRATION_STATUS_CANCELLED:
768     case MIGRATION_STATUS_FAILED:
769         bytes_transferred = 0;
770         ret = vfio_migration_set_state(vbasedev,
771                       ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING),
772                       VFIO_DEVICE_STATE_RUNNING);
773         if (ret) {
774             error_report("%s: Failed to set state RUNNING", vbasedev->name);
775         }
776     }
777 }
778 
779 static void vfio_migration_exit(VFIODevice *vbasedev)
780 {
781     VFIOMigration *migration = vbasedev->migration;
782 
783     vfio_region_exit(&migration->region);
784     vfio_region_finalize(&migration->region);
785     g_free(vbasedev->migration);
786     vbasedev->migration = NULL;
787 }
788 
789 static int vfio_migration_init(VFIODevice *vbasedev,
790                                struct vfio_region_info *info)
791 {
792     int ret;
793     Object *obj;
794     VFIOMigration *migration;
795     char id[256] = "";
796     g_autofree char *path = NULL, *oid = NULL;
797 
798     if (!vbasedev->ops->vfio_get_object) {
799         return -EINVAL;
800     }
801 
802     obj = vbasedev->ops->vfio_get_object(vbasedev);
803     if (!obj) {
804         return -EINVAL;
805     }
806 
807     vbasedev->migration = g_new0(VFIOMigration, 1);
808 
809     ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
810                             info->index, "migration");
811     if (ret) {
812         error_report("%s: Failed to setup VFIO migration region %d: %s",
813                      vbasedev->name, info->index, strerror(-ret));
814         goto err;
815     }
816 
817     if (!vbasedev->migration->region.size) {
818         error_report("%s: Invalid zero-sized VFIO migration region %d",
819                      vbasedev->name, info->index);
820         ret = -EINVAL;
821         goto err;
822     }
823 
824     migration = vbasedev->migration;
825     migration->vbasedev = vbasedev;
826 
827     oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
828     if (oid) {
829         path = g_strdup_printf("%s/vfio", oid);
830     } else {
831         path = g_strdup("vfio");
832     }
833     strpadcpy(id, sizeof(id), path, '\0');
834 
835     register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
836                          vbasedev);
837 
838     migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
839                                                            vfio_vmstate_change,
840                                                            vbasedev);
841     migration->migration_state.notify = vfio_migration_state_notifier;
842     add_migration_state_change_notifier(&migration->migration_state);
843     return 0;
844 
845 err:
846     vfio_migration_exit(vbasedev);
847     return ret;
848 }
849 
850 /* ---------------------------------------------------------------------- */
851 
852 int64_t vfio_mig_bytes_transferred(void)
853 {
854     return bytes_transferred;
855 }
856 
857 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
858 {
859     VFIOContainer *container = vbasedev->group->container;
860     struct vfio_region_info *info = NULL;
861     int ret = -ENOTSUP;
862 
863     if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
864         goto add_blocker;
865     }
866 
867     ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION,
868                                    VFIO_REGION_SUBTYPE_MIGRATION, &info);
869     if (ret) {
870         goto add_blocker;
871     }
872 
873     ret = vfio_migration_init(vbasedev, info);
874     if (ret) {
875         goto add_blocker;
876     }
877 
878     trace_vfio_migration_probe(vbasedev->name, info->index);
879     g_free(info);
880     return 0;
881 
882 add_blocker:
883     error_setg(&vbasedev->migration_blocker,
884                "VFIO device doesn't support migration");
885     g_free(info);
886 
887     ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
888     if (ret < 0) {
889         error_free(vbasedev->migration_blocker);
890         vbasedev->migration_blocker = NULL;
891     }
892     return ret;
893 }
894 
895 void vfio_migration_finalize(VFIODevice *vbasedev)
896 {
897     if (vbasedev->migration) {
898         VFIOMigration *migration = vbasedev->migration;
899 
900         remove_migration_state_change_notifier(&migration->migration_state);
901         qemu_del_vm_change_state_handler(migration->vm_state);
902         unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
903         vfio_migration_exit(vbasedev);
904     }
905 
906     if (vbasedev->migration_blocker) {
907         migrate_del_blocker(vbasedev->migration_blocker);
908         error_free(vbasedev->migration_blocker);
909         vbasedev->migration_blocker = NULL;
910     }
911 }
912