xref: /openbmc/qemu/blockdev.c (revision d195325b)
1 /*
2  * QEMU host block devices
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or
7  * later.  See the COPYING file in the top-level directory.
8  */
9 
10 #include "blockdev.h"
11 #include "hw/block-common.h"
12 #include "monitor.h"
13 #include "qerror.h"
14 #include "qemu-option.h"
15 #include "qemu-config.h"
16 #include "qemu-objects.h"
17 #include "sysemu.h"
18 #include "block_int.h"
19 #include "qmp-commands.h"
20 #include "trace.h"
21 #include "arch_init.h"
22 
23 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
24 
25 static const char *const if_name[IF_COUNT] = {
26     [IF_NONE] = "none",
27     [IF_IDE] = "ide",
28     [IF_SCSI] = "scsi",
29     [IF_FLOPPY] = "floppy",
30     [IF_PFLASH] = "pflash",
31     [IF_MTD] = "mtd",
32     [IF_SD] = "sd",
33     [IF_VIRTIO] = "virtio",
34     [IF_XEN] = "xen",
35 };
36 
37 static const int if_max_devs[IF_COUNT] = {
38     /*
39      * Do not change these numbers!  They govern how drive option
40      * index maps to unit and bus.  That mapping is ABI.
41      *
42      * All controllers used to imlement if=T drives need to support
43      * if_max_devs[T] units, for any T with if_max_devs[T] != 0.
44      * Otherwise, some index values map to "impossible" bus, unit
45      * values.
46      *
47      * For instance, if you change [IF_SCSI] to 255, -drive
48      * if=scsi,index=12 no longer means bus=1,unit=5, but
49      * bus=0,unit=12.  With an lsi53c895a controller (7 units max),
50      * the drive can't be set up.  Regression.
51      */
52     [IF_IDE] = 2,
53     [IF_SCSI] = 7,
54 };
55 
56 /*
57  * We automatically delete the drive when a device using it gets
58  * unplugged.  Questionable feature, but we can't just drop it.
59  * Device models call blockdev_mark_auto_del() to schedule the
60  * automatic deletion, and generic qdev code calls blockdev_auto_del()
61  * when deletion is actually safe.
62  */
63 void blockdev_mark_auto_del(BlockDriverState *bs)
64 {
65     DriveInfo *dinfo = drive_get_by_blockdev(bs);
66 
67     if (bs->job) {
68         block_job_cancel(bs->job);
69     }
70     if (dinfo) {
71         dinfo->auto_del = 1;
72     }
73 }
74 
75 void blockdev_auto_del(BlockDriverState *bs)
76 {
77     DriveInfo *dinfo = drive_get_by_blockdev(bs);
78 
79     if (dinfo && dinfo->auto_del) {
80         drive_put_ref(dinfo);
81     }
82 }
83 
84 static int drive_index_to_bus_id(BlockInterfaceType type, int index)
85 {
86     int max_devs = if_max_devs[type];
87     return max_devs ? index / max_devs : 0;
88 }
89 
90 static int drive_index_to_unit_id(BlockInterfaceType type, int index)
91 {
92     int max_devs = if_max_devs[type];
93     return max_devs ? index % max_devs : index;
94 }
95 
96 QemuOpts *drive_def(const char *optstr)
97 {
98     return qemu_opts_parse(qemu_find_opts("drive"), optstr, 0);
99 }
100 
101 QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
102                     const char *optstr)
103 {
104     QemuOpts *opts;
105     char buf[32];
106 
107     opts = drive_def(optstr);
108     if (!opts) {
109         return NULL;
110     }
111     if (type != IF_DEFAULT) {
112         qemu_opt_set(opts, "if", if_name[type]);
113     }
114     if (index >= 0) {
115         snprintf(buf, sizeof(buf), "%d", index);
116         qemu_opt_set(opts, "index", buf);
117     }
118     if (file)
119         qemu_opt_set(opts, "file", file);
120     return opts;
121 }
122 
123 DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
124 {
125     DriveInfo *dinfo;
126 
127     /* seek interface, bus and unit */
128 
129     QTAILQ_FOREACH(dinfo, &drives, next) {
130         if (dinfo->type == type &&
131 	    dinfo->bus == bus &&
132 	    dinfo->unit == unit)
133             return dinfo;
134     }
135 
136     return NULL;
137 }
138 
139 DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
140 {
141     return drive_get(type,
142                      drive_index_to_bus_id(type, index),
143                      drive_index_to_unit_id(type, index));
144 }
145 
146 int drive_get_max_bus(BlockInterfaceType type)
147 {
148     int max_bus;
149     DriveInfo *dinfo;
150 
151     max_bus = -1;
152     QTAILQ_FOREACH(dinfo, &drives, next) {
153         if(dinfo->type == type &&
154            dinfo->bus > max_bus)
155             max_bus = dinfo->bus;
156     }
157     return max_bus;
158 }
159 
160 /* Get a block device.  This should only be used for single-drive devices
161    (e.g. SD/Floppy/MTD).  Multi-disk devices (scsi/ide) should use the
162    appropriate bus.  */
163 DriveInfo *drive_get_next(BlockInterfaceType type)
164 {
165     static int next_block_unit[IF_COUNT];
166 
167     return drive_get(type, 0, next_block_unit[type]++);
168 }
169 
170 DriveInfo *drive_get_by_blockdev(BlockDriverState *bs)
171 {
172     DriveInfo *dinfo;
173 
174     QTAILQ_FOREACH(dinfo, &drives, next) {
175         if (dinfo->bdrv == bs) {
176             return dinfo;
177         }
178     }
179     return NULL;
180 }
181 
182 static void bdrv_format_print(void *opaque, const char *name)
183 {
184     error_printf(" %s", name);
185 }
186 
187 static void drive_uninit(DriveInfo *dinfo)
188 {
189     qemu_opts_del(dinfo->opts);
190     bdrv_delete(dinfo->bdrv);
191     g_free(dinfo->id);
192     QTAILQ_REMOVE(&drives, dinfo, next);
193     g_free(dinfo);
194 }
195 
196 void drive_put_ref(DriveInfo *dinfo)
197 {
198     assert(dinfo->refcount);
199     if (--dinfo->refcount == 0) {
200         drive_uninit(dinfo);
201     }
202 }
203 
204 void drive_get_ref(DriveInfo *dinfo)
205 {
206     dinfo->refcount++;
207 }
208 
209 typedef struct {
210     QEMUBH *bh;
211     DriveInfo *dinfo;
212 } DrivePutRefBH;
213 
214 static void drive_put_ref_bh(void *opaque)
215 {
216     DrivePutRefBH *s = opaque;
217 
218     drive_put_ref(s->dinfo);
219     qemu_bh_delete(s->bh);
220     g_free(s);
221 }
222 
223 /*
224  * Release a drive reference in a BH
225  *
226  * It is not possible to use drive_put_ref() from a callback function when the
227  * callers still need the drive.  In such cases we schedule a BH to release the
228  * reference.
229  */
230 static void drive_put_ref_bh_schedule(DriveInfo *dinfo)
231 {
232     DrivePutRefBH *s;
233 
234     s = g_new(DrivePutRefBH, 1);
235     s->bh = qemu_bh_new(drive_put_ref_bh, s);
236     s->dinfo = dinfo;
237     qemu_bh_schedule(s->bh);
238 }
239 
240 static int parse_block_error_action(const char *buf, int is_read)
241 {
242     if (!strcmp(buf, "ignore")) {
243         return BLOCK_ERR_IGNORE;
244     } else if (!is_read && !strcmp(buf, "enospc")) {
245         return BLOCK_ERR_STOP_ENOSPC;
246     } else if (!strcmp(buf, "stop")) {
247         return BLOCK_ERR_STOP_ANY;
248     } else if (!strcmp(buf, "report")) {
249         return BLOCK_ERR_REPORT;
250     } else {
251         error_report("'%s' invalid %s error action",
252                      buf, is_read ? "read" : "write");
253         return -1;
254     }
255 }
256 
257 static bool do_check_io_limits(BlockIOLimit *io_limits)
258 {
259     bool bps_flag;
260     bool iops_flag;
261 
262     assert(io_limits);
263 
264     bps_flag  = (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] != 0)
265                  && ((io_limits->bps[BLOCK_IO_LIMIT_READ] != 0)
266                  || (io_limits->bps[BLOCK_IO_LIMIT_WRITE] != 0));
267     iops_flag = (io_limits->iops[BLOCK_IO_LIMIT_TOTAL] != 0)
268                  && ((io_limits->iops[BLOCK_IO_LIMIT_READ] != 0)
269                  || (io_limits->iops[BLOCK_IO_LIMIT_WRITE] != 0));
270     if (bps_flag || iops_flag) {
271         return false;
272     }
273 
274     return true;
275 }
276 
277 DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
278 {
279     const char *buf;
280     const char *file = NULL;
281     const char *serial;
282     const char *mediastr = "";
283     BlockInterfaceType type;
284     enum { MEDIA_DISK, MEDIA_CDROM } media;
285     int bus_id, unit_id;
286     int cyls, heads, secs, translation;
287     BlockDriver *drv = NULL;
288     int max_devs;
289     int index;
290     int ro = 0;
291     int bdrv_flags = 0;
292     int on_read_error, on_write_error;
293     const char *devaddr;
294     DriveInfo *dinfo;
295     BlockIOLimit io_limits;
296     int snapshot = 0;
297     bool copy_on_read;
298     int ret;
299 
300     translation = BIOS_ATA_TRANSLATION_AUTO;
301     media = MEDIA_DISK;
302 
303     /* extract parameters */
304     bus_id  = qemu_opt_get_number(opts, "bus", 0);
305     unit_id = qemu_opt_get_number(opts, "unit", -1);
306     index   = qemu_opt_get_number(opts, "index", -1);
307 
308     cyls  = qemu_opt_get_number(opts, "cyls", 0);
309     heads = qemu_opt_get_number(opts, "heads", 0);
310     secs  = qemu_opt_get_number(opts, "secs", 0);
311 
312     snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
313     ro = qemu_opt_get_bool(opts, "readonly", 0);
314     copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
315 
316     file = qemu_opt_get(opts, "file");
317     serial = qemu_opt_get(opts, "serial");
318 
319     if ((buf = qemu_opt_get(opts, "if")) != NULL) {
320         for (type = 0; type < IF_COUNT && strcmp(buf, if_name[type]); type++)
321             ;
322         if (type == IF_COUNT) {
323             error_report("unsupported bus type '%s'", buf);
324             return NULL;
325 	}
326     } else {
327         type = default_to_scsi ? IF_SCSI : IF_IDE;
328     }
329 
330     max_devs = if_max_devs[type];
331 
332     if (cyls || heads || secs) {
333         if (cyls < 1) {
334             error_report("invalid physical cyls number");
335 	    return NULL;
336 	}
337         if (heads < 1) {
338             error_report("invalid physical heads number");
339 	    return NULL;
340 	}
341         if (secs < 1) {
342             error_report("invalid physical secs number");
343 	    return NULL;
344 	}
345     }
346 
347     if ((buf = qemu_opt_get(opts, "trans")) != NULL) {
348         if (!cyls) {
349             error_report("'%s' trans must be used with cyls, heads and secs",
350                          buf);
351             return NULL;
352         }
353         if (!strcmp(buf, "none"))
354             translation = BIOS_ATA_TRANSLATION_NONE;
355         else if (!strcmp(buf, "lba"))
356             translation = BIOS_ATA_TRANSLATION_LBA;
357         else if (!strcmp(buf, "auto"))
358             translation = BIOS_ATA_TRANSLATION_AUTO;
359 	else {
360             error_report("'%s' invalid translation type", buf);
361 	    return NULL;
362 	}
363     }
364 
365     if ((buf = qemu_opt_get(opts, "media")) != NULL) {
366         if (!strcmp(buf, "disk")) {
367 	    media = MEDIA_DISK;
368 	} else if (!strcmp(buf, "cdrom")) {
369             if (cyls || secs || heads) {
370                 error_report("CHS can't be set with media=%s", buf);
371 	        return NULL;
372             }
373 	    media = MEDIA_CDROM;
374 	} else {
375 	    error_report("'%s' invalid media", buf);
376 	    return NULL;
377 	}
378     }
379 
380     if ((buf = qemu_opt_get(opts, "cache")) != NULL) {
381         if (bdrv_parse_cache_flags(buf, &bdrv_flags) != 0) {
382             error_report("invalid cache option");
383             return NULL;
384         }
385     }
386 
387 #ifdef CONFIG_LINUX_AIO
388     if ((buf = qemu_opt_get(opts, "aio")) != NULL) {
389         if (!strcmp(buf, "native")) {
390             bdrv_flags |= BDRV_O_NATIVE_AIO;
391         } else if (!strcmp(buf, "threads")) {
392             /* this is the default */
393         } else {
394            error_report("invalid aio option");
395            return NULL;
396         }
397     }
398 #endif
399 
400     if ((buf = qemu_opt_get(opts, "format")) != NULL) {
401        if (strcmp(buf, "?") == 0) {
402            error_printf("Supported formats:");
403            bdrv_iterate_format(bdrv_format_print, NULL);
404            error_printf("\n");
405            return NULL;
406         }
407         drv = bdrv_find_whitelisted_format(buf);
408         if (!drv) {
409             error_report("'%s' invalid format", buf);
410             return NULL;
411         }
412     }
413 
414     /* disk I/O throttling */
415     io_limits.bps[BLOCK_IO_LIMIT_TOTAL]  =
416                            qemu_opt_get_number(opts, "bps", 0);
417     io_limits.bps[BLOCK_IO_LIMIT_READ]   =
418                            qemu_opt_get_number(opts, "bps_rd", 0);
419     io_limits.bps[BLOCK_IO_LIMIT_WRITE]  =
420                            qemu_opt_get_number(opts, "bps_wr", 0);
421     io_limits.iops[BLOCK_IO_LIMIT_TOTAL] =
422                            qemu_opt_get_number(opts, "iops", 0);
423     io_limits.iops[BLOCK_IO_LIMIT_READ]  =
424                            qemu_opt_get_number(opts, "iops_rd", 0);
425     io_limits.iops[BLOCK_IO_LIMIT_WRITE] =
426                            qemu_opt_get_number(opts, "iops_wr", 0);
427 
428     if (!do_check_io_limits(&io_limits)) {
429         error_report("bps(iops) and bps_rd/bps_wr(iops_rd/iops_wr) "
430                      "cannot be used at the same time");
431         return NULL;
432     }
433 
434     on_write_error = BLOCK_ERR_STOP_ENOSPC;
435     if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
436         if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
437             error_report("werror is not supported by this bus type");
438             return NULL;
439         }
440 
441         on_write_error = parse_block_error_action(buf, 0);
442         if (on_write_error < 0) {
443             return NULL;
444         }
445     }
446 
447     on_read_error = BLOCK_ERR_REPORT;
448     if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
449         if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) {
450             error_report("rerror is not supported by this bus type");
451             return NULL;
452         }
453 
454         on_read_error = parse_block_error_action(buf, 1);
455         if (on_read_error < 0) {
456             return NULL;
457         }
458     }
459 
460     if ((devaddr = qemu_opt_get(opts, "addr")) != NULL) {
461         if (type != IF_VIRTIO) {
462             error_report("addr is not supported by this bus type");
463             return NULL;
464         }
465     }
466 
467     /* compute bus and unit according index */
468 
469     if (index != -1) {
470         if (bus_id != 0 || unit_id != -1) {
471             error_report("index cannot be used with bus and unit");
472             return NULL;
473         }
474         bus_id = drive_index_to_bus_id(type, index);
475         unit_id = drive_index_to_unit_id(type, index);
476     }
477 
478     /* if user doesn't specify a unit_id,
479      * try to find the first free
480      */
481 
482     if (unit_id == -1) {
483        unit_id = 0;
484        while (drive_get(type, bus_id, unit_id) != NULL) {
485            unit_id++;
486            if (max_devs && unit_id >= max_devs) {
487                unit_id -= max_devs;
488                bus_id++;
489            }
490        }
491     }
492 
493     /* check unit id */
494 
495     if (max_devs && unit_id >= max_devs) {
496         error_report("unit %d too big (max is %d)",
497                      unit_id, max_devs - 1);
498         return NULL;
499     }
500 
501     /*
502      * catch multiple definitions
503      */
504 
505     if (drive_get(type, bus_id, unit_id) != NULL) {
506         error_report("drive with bus=%d, unit=%d (index=%d) exists",
507                      bus_id, unit_id, index);
508         return NULL;
509     }
510 
511     /* init */
512 
513     dinfo = g_malloc0(sizeof(*dinfo));
514     if ((buf = qemu_opts_id(opts)) != NULL) {
515         dinfo->id = g_strdup(buf);
516     } else {
517         /* no id supplied -> create one */
518         dinfo->id = g_malloc0(32);
519         if (type == IF_IDE || type == IF_SCSI)
520             mediastr = (media == MEDIA_CDROM) ? "-cd" : "-hd";
521         if (max_devs)
522             snprintf(dinfo->id, 32, "%s%i%s%i",
523                      if_name[type], bus_id, mediastr, unit_id);
524         else
525             snprintf(dinfo->id, 32, "%s%s%i",
526                      if_name[type], mediastr, unit_id);
527     }
528     dinfo->bdrv = bdrv_new(dinfo->id);
529     dinfo->devaddr = devaddr;
530     dinfo->type = type;
531     dinfo->bus = bus_id;
532     dinfo->unit = unit_id;
533     dinfo->cyls = cyls;
534     dinfo->heads = heads;
535     dinfo->secs = secs;
536     dinfo->trans = translation;
537     dinfo->opts = opts;
538     dinfo->refcount = 1;
539     dinfo->serial = serial;
540     QTAILQ_INSERT_TAIL(&drives, dinfo, next);
541 
542     bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
543 
544     /* disk I/O throttling */
545     bdrv_set_io_limits(dinfo->bdrv, &io_limits);
546 
547     switch(type) {
548     case IF_IDE:
549     case IF_SCSI:
550     case IF_XEN:
551     case IF_NONE:
552         dinfo->media_cd = media == MEDIA_CDROM;
553         break;
554     case IF_SD:
555     case IF_FLOPPY:
556     case IF_PFLASH:
557     case IF_MTD:
558         break;
559     case IF_VIRTIO:
560         /* add virtio block device */
561         opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL);
562         if (arch_type == QEMU_ARCH_S390X) {
563             qemu_opt_set(opts, "driver", "virtio-blk-s390");
564         } else {
565             qemu_opt_set(opts, "driver", "virtio-blk-pci");
566         }
567         qemu_opt_set(opts, "drive", dinfo->id);
568         if (devaddr)
569             qemu_opt_set(opts, "addr", devaddr);
570         break;
571     default:
572         abort();
573     }
574     if (!file || !*file) {
575         return dinfo;
576     }
577     if (snapshot) {
578         /* always use cache=unsafe with snapshot */
579         bdrv_flags &= ~BDRV_O_CACHE_MASK;
580         bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
581     }
582 
583     if (copy_on_read) {
584         bdrv_flags |= BDRV_O_COPY_ON_READ;
585     }
586 
587     if (runstate_check(RUN_STATE_INMIGRATE)) {
588         bdrv_flags |= BDRV_O_INCOMING;
589     }
590 
591     if (media == MEDIA_CDROM) {
592         /* CDROM is fine for any interface, don't check.  */
593         ro = 1;
594     } else if (ro == 1) {
595         if (type != IF_SCSI && type != IF_VIRTIO && type != IF_FLOPPY &&
596             type != IF_NONE && type != IF_PFLASH) {
597             error_report("readonly not supported by this bus type");
598             goto err;
599         }
600     }
601 
602     bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
603 
604     if (ro && copy_on_read) {
605         error_report("warning: disabling copy_on_read on readonly drive");
606     }
607 
608     ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv);
609     if (ret < 0) {
610         error_report("could not open disk image %s: %s",
611                      file, strerror(-ret));
612         goto err;
613     }
614 
615     if (bdrv_key_required(dinfo->bdrv))
616         autostart = 0;
617     return dinfo;
618 
619 err:
620     bdrv_delete(dinfo->bdrv);
621     g_free(dinfo->id);
622     QTAILQ_REMOVE(&drives, dinfo, next);
623     g_free(dinfo);
624     return NULL;
625 }
626 
627 void do_commit(Monitor *mon, const QDict *qdict)
628 {
629     const char *device = qdict_get_str(qdict, "device");
630     BlockDriverState *bs;
631     int ret;
632 
633     if (!strcmp(device, "all")) {
634         ret = bdrv_commit_all();
635         if (ret == -EBUSY) {
636             qerror_report(QERR_DEVICE_IN_USE, device);
637             return;
638         }
639     } else {
640         bs = bdrv_find(device);
641         if (!bs) {
642             qerror_report(QERR_DEVICE_NOT_FOUND, device);
643             return;
644         }
645         ret = bdrv_commit(bs);
646         if (ret == -EBUSY) {
647             qerror_report(QERR_DEVICE_IN_USE, device);
648             return;
649         }
650     }
651 }
652 
653 static void blockdev_do_action(int kind, void *data, Error **errp)
654 {
655     BlockdevAction action;
656     BlockdevActionList list;
657 
658     action.kind = kind;
659     action.data = data;
660     list.value = &action;
661     list.next = NULL;
662     qmp_transaction(&list, errp);
663 }
664 
665 void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
666                                 bool has_format, const char *format,
667                                 bool has_mode, enum NewImageMode mode,
668                                 Error **errp)
669 {
670     BlockdevSnapshot snapshot = {
671         .device = (char *) device,
672         .snapshot_file = (char *) snapshot_file,
673         .has_format = has_format,
674         .format = (char *) format,
675         .has_mode = has_mode,
676         .mode = mode,
677     };
678     blockdev_do_action(BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC, &snapshot,
679                        errp);
680 }
681 
682 
683 /* New and old BlockDriverState structs for group snapshots */
684 typedef struct BlkTransactionStates {
685     BlockDriverState *old_bs;
686     BlockDriverState *new_bs;
687     QSIMPLEQ_ENTRY(BlkTransactionStates) entry;
688 } BlkTransactionStates;
689 
690 /*
691  * 'Atomic' group snapshots.  The snapshots are taken as a set, and if any fail
692  *  then we do not pivot any of the devices in the group, and abandon the
693  *  snapshots
694  */
695 void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
696 {
697     int ret = 0;
698     BlockdevActionList *dev_entry = dev_list;
699     BlkTransactionStates *states, *next;
700 
701     QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states;
702     QSIMPLEQ_INIT(&snap_bdrv_states);
703 
704     /* drain all i/o before any snapshots */
705     bdrv_drain_all();
706 
707     /* We don't do anything in this loop that commits us to the snapshot */
708     while (NULL != dev_entry) {
709         BlockdevAction *dev_info = NULL;
710         BlockDriver *proto_drv;
711         BlockDriver *drv;
712         int flags;
713         enum NewImageMode mode;
714         const char *new_image_file;
715         const char *device;
716         const char *format = "qcow2";
717 
718         dev_info = dev_entry->value;
719         dev_entry = dev_entry->next;
720 
721         states = g_malloc0(sizeof(BlkTransactionStates));
722         QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);
723 
724         switch (dev_info->kind) {
725         case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
726             device = dev_info->blockdev_snapshot_sync->device;
727             if (!dev_info->blockdev_snapshot_sync->has_mode) {
728                 dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
729             }
730             new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file;
731             if (dev_info->blockdev_snapshot_sync->has_format) {
732                 format = dev_info->blockdev_snapshot_sync->format;
733             }
734             mode = dev_info->blockdev_snapshot_sync->mode;
735             break;
736         default:
737             abort();
738         }
739 
740         drv = bdrv_find_format(format);
741         if (!drv) {
742             error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
743             goto delete_and_fail;
744         }
745 
746         states->old_bs = bdrv_find(device);
747         if (!states->old_bs) {
748             error_set(errp, QERR_DEVICE_NOT_FOUND, device);
749             goto delete_and_fail;
750         }
751 
752         if (!bdrv_is_inserted(states->old_bs)) {
753             error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
754             goto delete_and_fail;
755         }
756 
757         if (bdrv_in_use(states->old_bs)) {
758             error_set(errp, QERR_DEVICE_IN_USE, device);
759             goto delete_and_fail;
760         }
761 
762         if (!bdrv_is_read_only(states->old_bs)) {
763             if (bdrv_flush(states->old_bs)) {
764                 error_set(errp, QERR_IO_ERROR);
765                 goto delete_and_fail;
766             }
767         }
768 
769         flags = states->old_bs->open_flags;
770 
771         proto_drv = bdrv_find_protocol(new_image_file);
772         if (!proto_drv) {
773             error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
774             goto delete_and_fail;
775         }
776 
777         /* create new image w/backing file */
778         if (mode != NEW_IMAGE_MODE_EXISTING) {
779             ret = bdrv_img_create(new_image_file, format,
780                                   states->old_bs->filename,
781                                   states->old_bs->drv->format_name,
782                                   NULL, -1, flags);
783             if (ret) {
784                 error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
785                 goto delete_and_fail;
786             }
787         }
788 
789         /* We will manually add the backing_hd field to the bs later */
790         states->new_bs = bdrv_new("");
791         ret = bdrv_open(states->new_bs, new_image_file,
792                         flags | BDRV_O_NO_BACKING, drv);
793         if (ret != 0) {
794             error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
795             goto delete_and_fail;
796         }
797     }
798 
799 
800     /* Now we are going to do the actual pivot.  Everything up to this point
801      * is reversible, but we are committed at this point */
802     QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
803         /* This removes our old bs from the bdrv_states, and adds the new bs */
804         bdrv_append(states->new_bs, states->old_bs);
805     }
806 
807     /* success */
808     goto exit;
809 
810 delete_and_fail:
811     /*
812     * failure, and it is all-or-none; abandon each new bs, and keep using
813     * the original bs for all images
814     */
815     QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
816         if (states->new_bs) {
817              bdrv_delete(states->new_bs);
818         }
819     }
820 exit:
821     QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) {
822         g_free(states);
823     }
824     return;
825 }
826 
827 
828 static void eject_device(BlockDriverState *bs, int force, Error **errp)
829 {
830     if (bdrv_in_use(bs)) {
831         error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
832         return;
833     }
834     if (!bdrv_dev_has_removable_media(bs)) {
835         error_set(errp, QERR_DEVICE_NOT_REMOVABLE, bdrv_get_device_name(bs));
836         return;
837     }
838 
839     if (bdrv_dev_is_medium_locked(bs) && !bdrv_dev_is_tray_open(bs)) {
840         bdrv_dev_eject_request(bs, force);
841         if (!force) {
842             error_set(errp, QERR_DEVICE_LOCKED, bdrv_get_device_name(bs));
843             return;
844         }
845     }
846 
847     bdrv_close(bs);
848 }
849 
850 void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
851 {
852     BlockDriverState *bs;
853 
854     bs = bdrv_find(device);
855     if (!bs) {
856         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
857         return;
858     }
859 
860     eject_device(bs, force, errp);
861 }
862 
863 void qmp_block_passwd(const char *device, const char *password, Error **errp)
864 {
865     BlockDriverState *bs;
866     int err;
867 
868     bs = bdrv_find(device);
869     if (!bs) {
870         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
871         return;
872     }
873 
874     err = bdrv_set_key(bs, password);
875     if (err == -EINVAL) {
876         error_set(errp, QERR_DEVICE_NOT_ENCRYPTED, bdrv_get_device_name(bs));
877         return;
878     } else if (err < 0) {
879         error_set(errp, QERR_INVALID_PASSWORD);
880         return;
881     }
882 }
883 
884 static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
885                                     int bdrv_flags, BlockDriver *drv,
886                                     const char *password, Error **errp)
887 {
888     if (bdrv_open(bs, filename, bdrv_flags, drv) < 0) {
889         error_set(errp, QERR_OPEN_FILE_FAILED, filename);
890         return;
891     }
892 
893     if (bdrv_key_required(bs)) {
894         if (password) {
895             if (bdrv_set_key(bs, password) < 0) {
896                 error_set(errp, QERR_INVALID_PASSWORD);
897             }
898         } else {
899             error_set(errp, QERR_DEVICE_ENCRYPTED, bdrv_get_device_name(bs),
900                       bdrv_get_encrypted_filename(bs));
901         }
902     } else if (password) {
903         error_set(errp, QERR_DEVICE_NOT_ENCRYPTED, bdrv_get_device_name(bs));
904     }
905 }
906 
907 void qmp_change_blockdev(const char *device, const char *filename,
908                          bool has_format, const char *format, Error **errp)
909 {
910     BlockDriverState *bs;
911     BlockDriver *drv = NULL;
912     int bdrv_flags;
913     Error *err = NULL;
914 
915     bs = bdrv_find(device);
916     if (!bs) {
917         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
918         return;
919     }
920 
921     if (format) {
922         drv = bdrv_find_whitelisted_format(format);
923         if (!drv) {
924             error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
925             return;
926         }
927     }
928 
929     eject_device(bs, 0, &err);
930     if (error_is_set(&err)) {
931         error_propagate(errp, err);
932         return;
933     }
934 
935     bdrv_flags = bdrv_is_read_only(bs) ? 0 : BDRV_O_RDWR;
936     bdrv_flags |= bdrv_is_snapshot(bs) ? BDRV_O_SNAPSHOT : 0;
937 
938     qmp_bdrv_open_encrypted(bs, filename, bdrv_flags, drv, NULL, errp);
939 }
940 
941 /* throttling disk I/O limits */
942 void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
943                                int64_t bps_wr, int64_t iops, int64_t iops_rd,
944                                int64_t iops_wr, Error **errp)
945 {
946     BlockIOLimit io_limits;
947     BlockDriverState *bs;
948 
949     bs = bdrv_find(device);
950     if (!bs) {
951         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
952         return;
953     }
954 
955     io_limits.bps[BLOCK_IO_LIMIT_TOTAL] = bps;
956     io_limits.bps[BLOCK_IO_LIMIT_READ]  = bps_rd;
957     io_limits.bps[BLOCK_IO_LIMIT_WRITE] = bps_wr;
958     io_limits.iops[BLOCK_IO_LIMIT_TOTAL]= iops;
959     io_limits.iops[BLOCK_IO_LIMIT_READ] = iops_rd;
960     io_limits.iops[BLOCK_IO_LIMIT_WRITE]= iops_wr;
961 
962     if (!do_check_io_limits(&io_limits)) {
963         error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
964         return;
965     }
966 
967     bs->io_limits = io_limits;
968     bs->slice_time = BLOCK_IO_SLICE_TIME;
969 
970     if (!bs->io_limits_enabled && bdrv_io_limits_enabled(bs)) {
971         bdrv_io_limits_enable(bs);
972     } else if (bs->io_limits_enabled && !bdrv_io_limits_enabled(bs)) {
973         bdrv_io_limits_disable(bs);
974     } else {
975         if (bs->block_timer) {
976             qemu_mod_timer(bs->block_timer, qemu_get_clock_ns(vm_clock));
977         }
978     }
979 }
980 
981 int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
982 {
983     const char *id = qdict_get_str(qdict, "id");
984     BlockDriverState *bs;
985 
986     bs = bdrv_find(id);
987     if (!bs) {
988         qerror_report(QERR_DEVICE_NOT_FOUND, id);
989         return -1;
990     }
991     if (bdrv_in_use(bs)) {
992         qerror_report(QERR_DEVICE_IN_USE, id);
993         return -1;
994     }
995 
996     /* quiesce block driver; prevent further io */
997     bdrv_drain_all();
998     bdrv_flush(bs);
999     bdrv_close(bs);
1000 
1001     /* if we have a device attached to this BlockDriverState
1002      * then we need to make the drive anonymous until the device
1003      * can be removed.  If this is a drive with no device backing
1004      * then we can just get rid of the block driver state right here.
1005      */
1006     if (bdrv_get_attached_dev(bs)) {
1007         bdrv_make_anon(bs);
1008     } else {
1009         drive_uninit(drive_get_by_blockdev(bs));
1010     }
1011 
1012     return 0;
1013 }
1014 
1015 void qmp_block_resize(const char *device, int64_t size, Error **errp)
1016 {
1017     BlockDriverState *bs;
1018 
1019     bs = bdrv_find(device);
1020     if (!bs) {
1021         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
1022         return;
1023     }
1024 
1025     if (size < 0) {
1026         error_set(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size");
1027         return;
1028     }
1029 
1030     switch (bdrv_truncate(bs, size)) {
1031     case 0:
1032         break;
1033     case -ENOMEDIUM:
1034         error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
1035         break;
1036     case -ENOTSUP:
1037         error_set(errp, QERR_UNSUPPORTED);
1038         break;
1039     case -EACCES:
1040         error_set(errp, QERR_DEVICE_IS_READ_ONLY, device);
1041         break;
1042     case -EBUSY:
1043         error_set(errp, QERR_DEVICE_IN_USE, device);
1044         break;
1045     default:
1046         error_set(errp, QERR_UNDEFINED_ERROR);
1047         break;
1048     }
1049 }
1050 
1051 static QObject *qobject_from_block_job(BlockJob *job)
1052 {
1053     return qobject_from_jsonf("{ 'type': %s,"
1054                               "'device': %s,"
1055                               "'len': %" PRId64 ","
1056                               "'offset': %" PRId64 ","
1057                               "'speed': %" PRId64 " }",
1058                               job->job_type->job_type,
1059                               bdrv_get_device_name(job->bs),
1060                               job->len,
1061                               job->offset,
1062                               job->speed);
1063 }
1064 
1065 static void block_stream_cb(void *opaque, int ret)
1066 {
1067     BlockDriverState *bs = opaque;
1068     QObject *obj;
1069 
1070     trace_block_stream_cb(bs, bs->job, ret);
1071 
1072     assert(bs->job);
1073     obj = qobject_from_block_job(bs->job);
1074     if (ret < 0) {
1075         QDict *dict = qobject_to_qdict(obj);
1076         qdict_put(dict, "error", qstring_from_str(strerror(-ret)));
1077     }
1078 
1079     if (block_job_is_cancelled(bs->job)) {
1080         monitor_protocol_event(QEVENT_BLOCK_JOB_CANCELLED, obj);
1081     } else {
1082         monitor_protocol_event(QEVENT_BLOCK_JOB_COMPLETED, obj);
1083     }
1084     qobject_decref(obj);
1085 
1086     drive_put_ref_bh_schedule(drive_get_by_blockdev(bs));
1087 }
1088 
1089 void qmp_block_stream(const char *device, bool has_base,
1090                       const char *base, bool has_speed,
1091                       int64_t speed, Error **errp)
1092 {
1093     BlockDriverState *bs;
1094     BlockDriverState *base_bs = NULL;
1095     Error *local_err = NULL;
1096 
1097     bs = bdrv_find(device);
1098     if (!bs) {
1099         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
1100         return;
1101     }
1102 
1103     if (base) {
1104         base_bs = bdrv_find_backing_image(bs, base);
1105         if (base_bs == NULL) {
1106             error_set(errp, QERR_BASE_NOT_FOUND, base);
1107             return;
1108         }
1109     }
1110 
1111     stream_start(bs, base_bs, base, has_speed ? speed : 0,
1112                  block_stream_cb, bs, &local_err);
1113     if (error_is_set(&local_err)) {
1114         error_propagate(errp, local_err);
1115         return;
1116     }
1117 
1118     /* Grab a reference so hotplug does not delete the BlockDriverState from
1119      * underneath us.
1120      */
1121     drive_get_ref(drive_get_by_blockdev(bs));
1122 
1123     trace_qmp_block_stream(bs, bs->job);
1124 }
1125 
1126 static BlockJob *find_block_job(const char *device)
1127 {
1128     BlockDriverState *bs;
1129 
1130     bs = bdrv_find(device);
1131     if (!bs || !bs->job) {
1132         return NULL;
1133     }
1134     return bs->job;
1135 }
1136 
1137 void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
1138 {
1139     BlockJob *job = find_block_job(device);
1140 
1141     if (!job) {
1142         error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
1143         return;
1144     }
1145 
1146     block_job_set_speed(job, speed, errp);
1147 }
1148 
1149 void qmp_block_job_cancel(const char *device, Error **errp)
1150 {
1151     BlockJob *job = find_block_job(device);
1152 
1153     if (!job) {
1154         error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
1155         return;
1156     }
1157 
1158     trace_qmp_block_job_cancel(job);
1159     block_job_cancel(job);
1160 }
1161 
1162 static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
1163 {
1164     BlockJobInfoList **prev = opaque;
1165     BlockJob *job = bs->job;
1166 
1167     if (job) {
1168         BlockJobInfoList *elem;
1169         BlockJobInfo *info = g_new(BlockJobInfo, 1);
1170         *info = (BlockJobInfo){
1171             .type   = g_strdup(job->job_type->job_type),
1172             .device = g_strdup(bdrv_get_device_name(bs)),
1173             .len    = job->len,
1174             .offset = job->offset,
1175             .speed  = job->speed,
1176         };
1177 
1178         elem = g_new0(BlockJobInfoList, 1);
1179         elem->value = info;
1180 
1181         (*prev)->next = elem;
1182         *prev = elem;
1183     }
1184 }
1185 
1186 BlockJobInfoList *qmp_query_block_jobs(Error **errp)
1187 {
1188     /* Dummy is a fake list element for holding the head pointer */
1189     BlockJobInfoList dummy = {};
1190     BlockJobInfoList *prev = &dummy;
1191     bdrv_iterate(do_qmp_query_block_jobs_one, &prev);
1192     return dummy.next;
1193 }
1194