xref: /openbmc/qemu/blockdev.c (revision 6b034aa1)
1 /*
2  * QEMU host block devices
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or
7  * later.  See the COPYING file in the top-level directory.
8  */
9 
10 #include "block.h"
11 #include "blockdev.h"
12 #include "monitor.h"
13 #include "qerror.h"
14 #include "qemu-option.h"
15 #include "qemu-config.h"
16 #include "qemu-objects.h"
17 #include "sysemu.h"
18 #include "block_int.h"
19 #include "qmp-commands.h"
20 #include "trace.h"
21 #include "arch_init.h"
22 
23 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
24 
25 static const char *const if_name[IF_COUNT] = {
26     [IF_NONE] = "none",
27     [IF_IDE] = "ide",
28     [IF_SCSI] = "scsi",
29     [IF_FLOPPY] = "floppy",
30     [IF_PFLASH] = "pflash",
31     [IF_MTD] = "mtd",
32     [IF_SD] = "sd",
33     [IF_VIRTIO] = "virtio",
34     [IF_XEN] = "xen",
35 };
36 
37 static const int if_max_devs[IF_COUNT] = {
38     /*
39      * Do not change these numbers!  They govern how drive option
40      * index maps to unit and bus.  That mapping is ABI.
41      *
42      * All controllers used to imlement if=T drives need to support
43      * if_max_devs[T] units, for any T with if_max_devs[T] != 0.
44      * Otherwise, some index values map to "impossible" bus, unit
45      * values.
46      *
47      * For instance, if you change [IF_SCSI] to 255, -drive
48      * if=scsi,index=12 no longer means bus=1,unit=5, but
49      * bus=0,unit=12.  With an lsi53c895a controller (7 units max),
50      * the drive can't be set up.  Regression.
51      */
52     [IF_IDE] = 2,
53     [IF_SCSI] = 7,
54 };
55 
56 /*
57  * We automatically delete the drive when a device using it gets
58  * unplugged.  Questionable feature, but we can't just drop it.
59  * Device models call blockdev_mark_auto_del() to schedule the
60  * automatic deletion, and generic qdev code calls blockdev_auto_del()
61  * when deletion is actually safe.
62  */
63 void blockdev_mark_auto_del(BlockDriverState *bs)
64 {
65     DriveInfo *dinfo = drive_get_by_blockdev(bs);
66 
67     if (bs->job) {
68         block_job_cancel(bs->job);
69     }
70     if (dinfo) {
71         dinfo->auto_del = 1;
72     }
73 }
74 
75 void blockdev_auto_del(BlockDriverState *bs)
76 {
77     DriveInfo *dinfo = drive_get_by_blockdev(bs);
78 
79     if (dinfo && dinfo->auto_del) {
80         drive_put_ref(dinfo);
81     }
82 }
83 
84 static int drive_index_to_bus_id(BlockInterfaceType type, int index)
85 {
86     int max_devs = if_max_devs[type];
87     return max_devs ? index / max_devs : 0;
88 }
89 
90 static int drive_index_to_unit_id(BlockInterfaceType type, int index)
91 {
92     int max_devs = if_max_devs[type];
93     return max_devs ? index % max_devs : index;
94 }
95 
96 QemuOpts *drive_def(const char *optstr)
97 {
98     return qemu_opts_parse(qemu_find_opts("drive"), optstr, 0);
99 }
100 
101 QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
102                     const char *optstr)
103 {
104     QemuOpts *opts;
105     char buf[32];
106 
107     opts = drive_def(optstr);
108     if (!opts) {
109         return NULL;
110     }
111     if (type != IF_DEFAULT) {
112         qemu_opt_set(opts, "if", if_name[type]);
113     }
114     if (index >= 0) {
115         snprintf(buf, sizeof(buf), "%d", index);
116         qemu_opt_set(opts, "index", buf);
117     }
118     if (file)
119         qemu_opt_set(opts, "file", file);
120     return opts;
121 }
122 
123 DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
124 {
125     DriveInfo *dinfo;
126 
127     /* seek interface, bus and unit */
128 
129     QTAILQ_FOREACH(dinfo, &drives, next) {
130         if (dinfo->type == type &&
131 	    dinfo->bus == bus &&
132 	    dinfo->unit == unit)
133             return dinfo;
134     }
135 
136     return NULL;
137 }
138 
139 DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
140 {
141     return drive_get(type,
142                      drive_index_to_bus_id(type, index),
143                      drive_index_to_unit_id(type, index));
144 }
145 
146 int drive_get_max_bus(BlockInterfaceType type)
147 {
148     int max_bus;
149     DriveInfo *dinfo;
150 
151     max_bus = -1;
152     QTAILQ_FOREACH(dinfo, &drives, next) {
153         if(dinfo->type == type &&
154            dinfo->bus > max_bus)
155             max_bus = dinfo->bus;
156     }
157     return max_bus;
158 }
159 
160 /* Get a block device.  This should only be used for single-drive devices
161    (e.g. SD/Floppy/MTD).  Multi-disk devices (scsi/ide) should use the
162    appropriate bus.  */
163 DriveInfo *drive_get_next(BlockInterfaceType type)
164 {
165     static int next_block_unit[IF_COUNT];
166 
167     return drive_get(type, 0, next_block_unit[type]++);
168 }
169 
170 DriveInfo *drive_get_by_blockdev(BlockDriverState *bs)
171 {
172     DriveInfo *dinfo;
173 
174     QTAILQ_FOREACH(dinfo, &drives, next) {
175         if (dinfo->bdrv == bs) {
176             return dinfo;
177         }
178     }
179     return NULL;
180 }
181 
182 static void bdrv_format_print(void *opaque, const char *name)
183 {
184     error_printf(" %s", name);
185 }
186 
187 static void drive_uninit(DriveInfo *dinfo)
188 {
189     qemu_opts_del(dinfo->opts);
190     bdrv_delete(dinfo->bdrv);
191     g_free(dinfo->id);
192     QTAILQ_REMOVE(&drives, dinfo, next);
193     g_free(dinfo);
194 }
195 
196 void drive_put_ref(DriveInfo *dinfo)
197 {
198     assert(dinfo->refcount);
199     if (--dinfo->refcount == 0) {
200         drive_uninit(dinfo);
201     }
202 }
203 
204 void drive_get_ref(DriveInfo *dinfo)
205 {
206     dinfo->refcount++;
207 }
208 
209 typedef struct {
210     QEMUBH *bh;
211     DriveInfo *dinfo;
212 } DrivePutRefBH;
213 
214 static void drive_put_ref_bh(void *opaque)
215 {
216     DrivePutRefBH *s = opaque;
217 
218     drive_put_ref(s->dinfo);
219     qemu_bh_delete(s->bh);
220     g_free(s);
221 }
222 
223 /*
224  * Release a drive reference in a BH
225  *
226  * It is not possible to use drive_put_ref() from a callback function when the
227  * callers still need the drive.  In such cases we schedule a BH to release the
228  * reference.
229  */
230 static void drive_put_ref_bh_schedule(DriveInfo *dinfo)
231 {
232     DrivePutRefBH *s;
233 
234     s = g_new(DrivePutRefBH, 1);
235     s->bh = qemu_bh_new(drive_put_ref_bh, s);
236     s->dinfo = dinfo;
237     qemu_bh_schedule(s->bh);
238 }
239 
240 static int parse_block_error_action(const char *buf, int is_read)
241 {
242     if (!strcmp(buf, "ignore")) {
243         return BLOCK_ERR_IGNORE;
244     } else if (!is_read && !strcmp(buf, "enospc")) {
245         return BLOCK_ERR_STOP_ENOSPC;
246     } else if (!strcmp(buf, "stop")) {
247         return BLOCK_ERR_STOP_ANY;
248     } else if (!strcmp(buf, "report")) {
249         return BLOCK_ERR_REPORT;
250     } else {
251         error_report("'%s' invalid %s error action",
252                      buf, is_read ? "read" : "write");
253         return -1;
254     }
255 }
256 
257 static bool do_check_io_limits(BlockIOLimit *io_limits)
258 {
259     bool bps_flag;
260     bool iops_flag;
261 
262     assert(io_limits);
263 
264     bps_flag  = (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] != 0)
265                  && ((io_limits->bps[BLOCK_IO_LIMIT_READ] != 0)
266                  || (io_limits->bps[BLOCK_IO_LIMIT_WRITE] != 0));
267     iops_flag = (io_limits->iops[BLOCK_IO_LIMIT_TOTAL] != 0)
268                  && ((io_limits->iops[BLOCK_IO_LIMIT_READ] != 0)
269                  || (io_limits->iops[BLOCK_IO_LIMIT_WRITE] != 0));
270     if (bps_flag || iops_flag) {
271         return false;
272     }
273 
274     return true;
275 }
276 
277 DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
278 {
279     const char *buf;
280     const char *file = NULL;
281     char devname[128];
282     const char *serial;
283     const char *mediastr = "";
284     BlockInterfaceType type;
285     enum { MEDIA_DISK, MEDIA_CDROM } media;
286     int bus_id, unit_id;
287     int cyls, heads, secs, translation;
288     BlockDriver *drv = NULL;
289     int max_devs;
290     int index;
291     int ro = 0;
292     int bdrv_flags = 0;
293     int on_read_error, on_write_error;
294     const char *devaddr;
295     DriveInfo *dinfo;
296     BlockIOLimit io_limits;
297     int snapshot = 0;
298     bool copy_on_read;
299     int ret;
300 
301     translation = BIOS_ATA_TRANSLATION_AUTO;
302     media = MEDIA_DISK;
303 
304     /* extract parameters */
305     bus_id  = qemu_opt_get_number(opts, "bus", 0);
306     unit_id = qemu_opt_get_number(opts, "unit", -1);
307     index   = qemu_opt_get_number(opts, "index", -1);
308 
309     cyls  = qemu_opt_get_number(opts, "cyls", 0);
310     heads = qemu_opt_get_number(opts, "heads", 0);
311     secs  = qemu_opt_get_number(opts, "secs", 0);
312 
313     snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
314     ro = qemu_opt_get_bool(opts, "readonly", 0);
315     copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
316 
317     file = qemu_opt_get(opts, "file");
318     serial = qemu_opt_get(opts, "serial");
319 
320     if ((buf = qemu_opt_get(opts, "if")) != NULL) {
321         pstrcpy(devname, sizeof(devname), buf);
322         for (type = 0; type < IF_COUNT && strcmp(buf, if_name[type]); type++)
323             ;
324         if (type == IF_COUNT) {
325             error_report("unsupported bus type '%s'", buf);
326             return NULL;
327 	}
328     } else {
329         type = default_to_scsi ? IF_SCSI : IF_IDE;
330         pstrcpy(devname, sizeof(devname), if_name[type]);
331     }
332 
333     max_devs = if_max_devs[type];
334 
335     if (cyls || heads || secs) {
336         if (cyls < 1 || (type == IF_IDE && cyls > 16383)) {
337             error_report("invalid physical cyls number");
338 	    return NULL;
339 	}
340         if (heads < 1 || (type == IF_IDE && heads > 16)) {
341             error_report("invalid physical heads number");
342 	    return NULL;
343 	}
344         if (secs < 1 || (type == IF_IDE && secs > 63)) {
345             error_report("invalid physical secs number");
346 	    return NULL;
347 	}
348     }
349 
350     if ((buf = qemu_opt_get(opts, "trans")) != NULL) {
351         if (!cyls) {
352             error_report("'%s' trans must be used with cyls, heads and secs",
353                          buf);
354             return NULL;
355         }
356         if (!strcmp(buf, "none"))
357             translation = BIOS_ATA_TRANSLATION_NONE;
358         else if (!strcmp(buf, "lba"))
359             translation = BIOS_ATA_TRANSLATION_LBA;
360         else if (!strcmp(buf, "auto"))
361             translation = BIOS_ATA_TRANSLATION_AUTO;
362 	else {
363             error_report("'%s' invalid translation type", buf);
364 	    return NULL;
365 	}
366     }
367 
368     if ((buf = qemu_opt_get(opts, "media")) != NULL) {
369         if (!strcmp(buf, "disk")) {
370 	    media = MEDIA_DISK;
371 	} else if (!strcmp(buf, "cdrom")) {
372             if (cyls || secs || heads) {
373                 error_report("CHS can't be set with media=%s", buf);
374 	        return NULL;
375             }
376 	    media = MEDIA_CDROM;
377 	} else {
378 	    error_report("'%s' invalid media", buf);
379 	    return NULL;
380 	}
381     }
382 
383     if ((buf = qemu_opt_get(opts, "cache")) != NULL) {
384         if (bdrv_parse_cache_flags(buf, &bdrv_flags) != 0) {
385             error_report("invalid cache option");
386             return NULL;
387         }
388     }
389 
390 #ifdef CONFIG_LINUX_AIO
391     if ((buf = qemu_opt_get(opts, "aio")) != NULL) {
392         if (!strcmp(buf, "native")) {
393             bdrv_flags |= BDRV_O_NATIVE_AIO;
394         } else if (!strcmp(buf, "threads")) {
395             /* this is the default */
396         } else {
397            error_report("invalid aio option");
398            return NULL;
399         }
400     }
401 #endif
402 
403     if ((buf = qemu_opt_get(opts, "format")) != NULL) {
404        if (strcmp(buf, "?") == 0) {
405            error_printf("Supported formats:");
406            bdrv_iterate_format(bdrv_format_print, NULL);
407            error_printf("\n");
408            return NULL;
409         }
410         drv = bdrv_find_whitelisted_format(buf);
411         if (!drv) {
412             error_report("'%s' invalid format", buf);
413             return NULL;
414         }
415     }
416 
417     /* disk I/O throttling */
418     io_limits.bps[BLOCK_IO_LIMIT_TOTAL]  =
419                            qemu_opt_get_number(opts, "bps", 0);
420     io_limits.bps[BLOCK_IO_LIMIT_READ]   =
421                            qemu_opt_get_number(opts, "bps_rd", 0);
422     io_limits.bps[BLOCK_IO_LIMIT_WRITE]  =
423                            qemu_opt_get_number(opts, "bps_wr", 0);
424     io_limits.iops[BLOCK_IO_LIMIT_TOTAL] =
425                            qemu_opt_get_number(opts, "iops", 0);
426     io_limits.iops[BLOCK_IO_LIMIT_READ]  =
427                            qemu_opt_get_number(opts, "iops_rd", 0);
428     io_limits.iops[BLOCK_IO_LIMIT_WRITE] =
429                            qemu_opt_get_number(opts, "iops_wr", 0);
430 
431     if (!do_check_io_limits(&io_limits)) {
432         error_report("bps(iops) and bps_rd/bps_wr(iops_rd/iops_wr) "
433                      "cannot be used at the same time");
434         return NULL;
435     }
436 
437     on_write_error = BLOCK_ERR_STOP_ENOSPC;
438     if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
439         if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
440             error_report("werror is not supported by this bus type");
441             return NULL;
442         }
443 
444         on_write_error = parse_block_error_action(buf, 0);
445         if (on_write_error < 0) {
446             return NULL;
447         }
448     }
449 
450     on_read_error = BLOCK_ERR_REPORT;
451     if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
452         if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) {
453             error_report("rerror is not supported by this bus type");
454             return NULL;
455         }
456 
457         on_read_error = parse_block_error_action(buf, 1);
458         if (on_read_error < 0) {
459             return NULL;
460         }
461     }
462 
463     if ((devaddr = qemu_opt_get(opts, "addr")) != NULL) {
464         if (type != IF_VIRTIO) {
465             error_report("addr is not supported by this bus type");
466             return NULL;
467         }
468     }
469 
470     /* compute bus and unit according index */
471 
472     if (index != -1) {
473         if (bus_id != 0 || unit_id != -1) {
474             error_report("index cannot be used with bus and unit");
475             return NULL;
476         }
477         bus_id = drive_index_to_bus_id(type, index);
478         unit_id = drive_index_to_unit_id(type, index);
479     }
480 
481     /* if user doesn't specify a unit_id,
482      * try to find the first free
483      */
484 
485     if (unit_id == -1) {
486        unit_id = 0;
487        while (drive_get(type, bus_id, unit_id) != NULL) {
488            unit_id++;
489            if (max_devs && unit_id >= max_devs) {
490                unit_id -= max_devs;
491                bus_id++;
492            }
493        }
494     }
495 
496     /* check unit id */
497 
498     if (max_devs && unit_id >= max_devs) {
499         error_report("unit %d too big (max is %d)",
500                      unit_id, max_devs - 1);
501         return NULL;
502     }
503 
504     /*
505      * catch multiple definitions
506      */
507 
508     if (drive_get(type, bus_id, unit_id) != NULL) {
509         error_report("drive with bus=%d, unit=%d (index=%d) exists",
510                      bus_id, unit_id, index);
511         return NULL;
512     }
513 
514     /* init */
515 
516     dinfo = g_malloc0(sizeof(*dinfo));
517     if ((buf = qemu_opts_id(opts)) != NULL) {
518         dinfo->id = g_strdup(buf);
519     } else {
520         /* no id supplied -> create one */
521         dinfo->id = g_malloc0(32);
522         if (type == IF_IDE || type == IF_SCSI)
523             mediastr = (media == MEDIA_CDROM) ? "-cd" : "-hd";
524         if (max_devs)
525             snprintf(dinfo->id, 32, "%s%i%s%i",
526                      devname, bus_id, mediastr, unit_id);
527         else
528             snprintf(dinfo->id, 32, "%s%s%i",
529                      devname, mediastr, unit_id);
530     }
531     dinfo->bdrv = bdrv_new(dinfo->id);
532     dinfo->devaddr = devaddr;
533     dinfo->type = type;
534     dinfo->bus = bus_id;
535     dinfo->unit = unit_id;
536     dinfo->opts = opts;
537     dinfo->refcount = 1;
538     if (serial) {
539         pstrcpy(dinfo->serial, sizeof(dinfo->serial), serial);
540     }
541     QTAILQ_INSERT_TAIL(&drives, dinfo, next);
542 
543     bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
544 
545     /* disk I/O throttling */
546     bdrv_set_io_limits(dinfo->bdrv, &io_limits);
547 
548     switch(type) {
549     case IF_IDE:
550     case IF_SCSI:
551     case IF_XEN:
552     case IF_NONE:
553         switch(media) {
554 	case MEDIA_DISK:
555             if (cyls != 0) {
556                 bdrv_set_geometry_hint(dinfo->bdrv, cyls, heads, secs);
557                 bdrv_set_translation_hint(dinfo->bdrv, translation);
558             }
559 	    break;
560 	case MEDIA_CDROM:
561             dinfo->media_cd = 1;
562 	    break;
563 	}
564         break;
565     case IF_SD:
566     case IF_FLOPPY:
567     case IF_PFLASH:
568     case IF_MTD:
569         break;
570     case IF_VIRTIO:
571         /* add virtio block device */
572         opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0);
573         if (arch_type == QEMU_ARCH_S390X) {
574             qemu_opt_set(opts, "driver", "virtio-blk-s390");
575         } else {
576             qemu_opt_set(opts, "driver", "virtio-blk-pci");
577         }
578         qemu_opt_set(opts, "drive", dinfo->id);
579         if (devaddr)
580             qemu_opt_set(opts, "addr", devaddr);
581         break;
582     default:
583         abort();
584     }
585     if (!file || !*file) {
586         return dinfo;
587     }
588     if (snapshot) {
589         /* always use cache=unsafe with snapshot */
590         bdrv_flags &= ~BDRV_O_CACHE_MASK;
591         bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
592     }
593 
594     if (copy_on_read) {
595         bdrv_flags |= BDRV_O_COPY_ON_READ;
596     }
597 
598     if (runstate_check(RUN_STATE_INMIGRATE)) {
599         bdrv_flags |= BDRV_O_INCOMING;
600     }
601 
602     if (media == MEDIA_CDROM) {
603         /* CDROM is fine for any interface, don't check.  */
604         ro = 1;
605     } else if (ro == 1) {
606         if (type != IF_SCSI && type != IF_VIRTIO && type != IF_FLOPPY &&
607             type != IF_NONE && type != IF_PFLASH) {
608             error_report("readonly not supported by this bus type");
609             goto err;
610         }
611     }
612 
613     bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
614 
615     ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv);
616     if (ret < 0) {
617         error_report("could not open disk image %s: %s",
618                      file, strerror(-ret));
619         goto err;
620     }
621 
622     if (bdrv_key_required(dinfo->bdrv))
623         autostart = 0;
624     return dinfo;
625 
626 err:
627     bdrv_delete(dinfo->bdrv);
628     g_free(dinfo->id);
629     QTAILQ_REMOVE(&drives, dinfo, next);
630     g_free(dinfo);
631     return NULL;
632 }
633 
634 void do_commit(Monitor *mon, const QDict *qdict)
635 {
636     const char *device = qdict_get_str(qdict, "device");
637     BlockDriverState *bs;
638     int ret;
639 
640     if (!strcmp(device, "all")) {
641         ret = bdrv_commit_all();
642         if (ret == -EBUSY) {
643             qerror_report(QERR_DEVICE_IN_USE, device);
644             return;
645         }
646     } else {
647         bs = bdrv_find(device);
648         if (!bs) {
649             qerror_report(QERR_DEVICE_NOT_FOUND, device);
650             return;
651         }
652         ret = bdrv_commit(bs);
653         if (ret == -EBUSY) {
654             qerror_report(QERR_DEVICE_IN_USE, device);
655             return;
656         }
657     }
658 }
659 
660 static void blockdev_do_action(int kind, void *data, Error **errp)
661 {
662     BlockdevAction action;
663     BlockdevActionList list;
664 
665     action.kind = kind;
666     action.data = data;
667     list.value = &action;
668     list.next = NULL;
669     qmp_transaction(&list, errp);
670 }
671 
672 void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
673                                 bool has_format, const char *format,
674                                 bool has_mode, enum NewImageMode mode,
675                                 Error **errp)
676 {
677     BlockdevSnapshot snapshot = {
678         .device = (char *) device,
679         .snapshot_file = (char *) snapshot_file,
680         .has_format = has_format,
681         .format = (char *) format,
682         .has_mode = has_mode,
683         .mode = mode,
684     };
685     blockdev_do_action(BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC, &snapshot,
686                        errp);
687 }
688 
689 
690 /* New and old BlockDriverState structs for group snapshots */
691 typedef struct BlkTransactionStates {
692     BlockDriverState *old_bs;
693     BlockDriverState *new_bs;
694     QSIMPLEQ_ENTRY(BlkTransactionStates) entry;
695 } BlkTransactionStates;
696 
697 /*
698  * 'Atomic' group snapshots.  The snapshots are taken as a set, and if any fail
699  *  then we do not pivot any of the devices in the group, and abandon the
700  *  snapshots
701  */
702 void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
703 {
704     int ret = 0;
705     BlockdevActionList *dev_entry = dev_list;
706     BlkTransactionStates *states, *next;
707 
708     QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states;
709     QSIMPLEQ_INIT(&snap_bdrv_states);
710 
711     /* drain all i/o before any snapshots */
712     bdrv_drain_all();
713 
714     /* We don't do anything in this loop that commits us to the snapshot */
715     while (NULL != dev_entry) {
716         BlockdevAction *dev_info = NULL;
717         BlockDriver *proto_drv;
718         BlockDriver *drv;
719         int flags;
720         enum NewImageMode mode;
721         const char *new_image_file;
722         const char *device;
723         const char *format = "qcow2";
724 
725         dev_info = dev_entry->value;
726         dev_entry = dev_entry->next;
727 
728         states = g_malloc0(sizeof(BlkTransactionStates));
729         QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);
730 
731         switch (dev_info->kind) {
732         case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
733             device = dev_info->blockdev_snapshot_sync->device;
734             if (!dev_info->blockdev_snapshot_sync->has_mode) {
735                 dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
736             }
737             new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file;
738             if (dev_info->blockdev_snapshot_sync->has_format) {
739                 format = dev_info->blockdev_snapshot_sync->format;
740             }
741             mode = dev_info->blockdev_snapshot_sync->mode;
742             break;
743         default:
744             abort();
745         }
746 
747         drv = bdrv_find_format(format);
748         if (!drv) {
749             error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
750             goto delete_and_fail;
751         }
752 
753         states->old_bs = bdrv_find(device);
754         if (!states->old_bs) {
755             error_set(errp, QERR_DEVICE_NOT_FOUND, device);
756             goto delete_and_fail;
757         }
758 
759         if (bdrv_in_use(states->old_bs)) {
760             error_set(errp, QERR_DEVICE_IN_USE, device);
761             goto delete_and_fail;
762         }
763 
764         if (!bdrv_is_read_only(states->old_bs) &&
765              bdrv_is_inserted(states->old_bs)) {
766 
767             if (bdrv_flush(states->old_bs)) {
768                 error_set(errp, QERR_IO_ERROR);
769                 goto delete_and_fail;
770             }
771         }
772 
773         flags = states->old_bs->open_flags;
774 
775         proto_drv = bdrv_find_protocol(new_image_file);
776         if (!proto_drv) {
777             error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
778             goto delete_and_fail;
779         }
780 
781         /* create new image w/backing file */
782         if (mode != NEW_IMAGE_MODE_EXISTING) {
783             ret = bdrv_img_create(new_image_file, format,
784                                   states->old_bs->filename,
785                                   states->old_bs->drv->format_name,
786                                   NULL, -1, flags);
787             if (ret) {
788                 error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
789                 goto delete_and_fail;
790             }
791         }
792 
793         /* We will manually add the backing_hd field to the bs later */
794         states->new_bs = bdrv_new("");
795         ret = bdrv_open(states->new_bs, new_image_file,
796                         flags | BDRV_O_NO_BACKING, drv);
797         if (ret != 0) {
798             error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
799             goto delete_and_fail;
800         }
801     }
802 
803 
804     /* Now we are going to do the actual pivot.  Everything up to this point
805      * is reversible, but we are committed at this point */
806     QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
807         /* This removes our old bs from the bdrv_states, and adds the new bs */
808         bdrv_append(states->new_bs, states->old_bs);
809     }
810 
811     /* success */
812     goto exit;
813 
814 delete_and_fail:
815     /*
816     * failure, and it is all-or-none; abandon each new bs, and keep using
817     * the original bs for all images
818     */
819     QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
820         if (states->new_bs) {
821              bdrv_delete(states->new_bs);
822         }
823     }
824 exit:
825     QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) {
826         g_free(states);
827     }
828     return;
829 }
830 
831 
832 static void eject_device(BlockDriverState *bs, int force, Error **errp)
833 {
834     if (bdrv_in_use(bs)) {
835         error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
836         return;
837     }
838     if (!bdrv_dev_has_removable_media(bs)) {
839         error_set(errp, QERR_DEVICE_NOT_REMOVABLE, bdrv_get_device_name(bs));
840         return;
841     }
842 
843     if (bdrv_dev_is_medium_locked(bs) && !bdrv_dev_is_tray_open(bs)) {
844         bdrv_dev_eject_request(bs, force);
845         if (!force) {
846             error_set(errp, QERR_DEVICE_LOCKED, bdrv_get_device_name(bs));
847             return;
848         }
849     }
850 
851     bdrv_close(bs);
852 }
853 
854 void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
855 {
856     BlockDriverState *bs;
857 
858     bs = bdrv_find(device);
859     if (!bs) {
860         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
861         return;
862     }
863 
864     eject_device(bs, force, errp);
865 }
866 
867 void qmp_block_passwd(const char *device, const char *password, Error **errp)
868 {
869     BlockDriverState *bs;
870     int err;
871 
872     bs = bdrv_find(device);
873     if (!bs) {
874         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
875         return;
876     }
877 
878     err = bdrv_set_key(bs, password);
879     if (err == -EINVAL) {
880         error_set(errp, QERR_DEVICE_NOT_ENCRYPTED, bdrv_get_device_name(bs));
881         return;
882     } else if (err < 0) {
883         error_set(errp, QERR_INVALID_PASSWORD);
884         return;
885     }
886 }
887 
888 static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
889                                     int bdrv_flags, BlockDriver *drv,
890                                     const char *password, Error **errp)
891 {
892     if (bdrv_open(bs, filename, bdrv_flags, drv) < 0) {
893         error_set(errp, QERR_OPEN_FILE_FAILED, filename);
894         return;
895     }
896 
897     if (bdrv_key_required(bs)) {
898         if (password) {
899             if (bdrv_set_key(bs, password) < 0) {
900                 error_set(errp, QERR_INVALID_PASSWORD);
901             }
902         } else {
903             error_set(errp, QERR_DEVICE_ENCRYPTED, bdrv_get_device_name(bs),
904                       bdrv_get_encrypted_filename(bs));
905         }
906     } else if (password) {
907         error_set(errp, QERR_DEVICE_NOT_ENCRYPTED, bdrv_get_device_name(bs));
908     }
909 }
910 
911 void qmp_change_blockdev(const char *device, const char *filename,
912                          bool has_format, const char *format, Error **errp)
913 {
914     BlockDriverState *bs;
915     BlockDriver *drv = NULL;
916     int bdrv_flags;
917     Error *err = NULL;
918 
919     bs = bdrv_find(device);
920     if (!bs) {
921         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
922         return;
923     }
924 
925     if (format) {
926         drv = bdrv_find_whitelisted_format(format);
927         if (!drv) {
928             error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
929             return;
930         }
931     }
932 
933     eject_device(bs, 0, &err);
934     if (error_is_set(&err)) {
935         error_propagate(errp, err);
936         return;
937     }
938 
939     bdrv_flags = bdrv_is_read_only(bs) ? 0 : BDRV_O_RDWR;
940     bdrv_flags |= bdrv_is_snapshot(bs) ? BDRV_O_SNAPSHOT : 0;
941 
942     qmp_bdrv_open_encrypted(bs, filename, bdrv_flags, drv, NULL, errp);
943 }
944 
945 /* throttling disk I/O limits */
946 void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
947                                int64_t bps_wr, int64_t iops, int64_t iops_rd,
948                                int64_t iops_wr, Error **errp)
949 {
950     BlockIOLimit io_limits;
951     BlockDriverState *bs;
952 
953     bs = bdrv_find(device);
954     if (!bs) {
955         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
956         return;
957     }
958 
959     io_limits.bps[BLOCK_IO_LIMIT_TOTAL] = bps;
960     io_limits.bps[BLOCK_IO_LIMIT_READ]  = bps_rd;
961     io_limits.bps[BLOCK_IO_LIMIT_WRITE] = bps_wr;
962     io_limits.iops[BLOCK_IO_LIMIT_TOTAL]= iops;
963     io_limits.iops[BLOCK_IO_LIMIT_READ] = iops_rd;
964     io_limits.iops[BLOCK_IO_LIMIT_WRITE]= iops_wr;
965 
966     if (!do_check_io_limits(&io_limits)) {
967         error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
968         return;
969     }
970 
971     bs->io_limits = io_limits;
972     bs->slice_time = BLOCK_IO_SLICE_TIME;
973 
974     if (!bs->io_limits_enabled && bdrv_io_limits_enabled(bs)) {
975         bdrv_io_limits_enable(bs);
976     } else if (bs->io_limits_enabled && !bdrv_io_limits_enabled(bs)) {
977         bdrv_io_limits_disable(bs);
978     } else {
979         if (bs->block_timer) {
980             qemu_mod_timer(bs->block_timer, qemu_get_clock_ns(vm_clock));
981         }
982     }
983 }
984 
985 int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
986 {
987     const char *id = qdict_get_str(qdict, "id");
988     BlockDriverState *bs;
989 
990     bs = bdrv_find(id);
991     if (!bs) {
992         qerror_report(QERR_DEVICE_NOT_FOUND, id);
993         return -1;
994     }
995     if (bdrv_in_use(bs)) {
996         qerror_report(QERR_DEVICE_IN_USE, id);
997         return -1;
998     }
999 
1000     /* quiesce block driver; prevent further io */
1001     bdrv_drain_all();
1002     bdrv_flush(bs);
1003     bdrv_close(bs);
1004 
1005     /* if we have a device attached to this BlockDriverState
1006      * then we need to make the drive anonymous until the device
1007      * can be removed.  If this is a drive with no device backing
1008      * then we can just get rid of the block driver state right here.
1009      */
1010     if (bdrv_get_attached_dev(bs)) {
1011         bdrv_make_anon(bs);
1012     } else {
1013         drive_uninit(drive_get_by_blockdev(bs));
1014     }
1015 
1016     return 0;
1017 }
1018 
1019 void qmp_block_resize(const char *device, int64_t size, Error **errp)
1020 {
1021     BlockDriverState *bs;
1022 
1023     bs = bdrv_find(device);
1024     if (!bs) {
1025         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
1026         return;
1027     }
1028 
1029     if (size < 0) {
1030         error_set(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size");
1031         return;
1032     }
1033 
1034     switch (bdrv_truncate(bs, size)) {
1035     case 0:
1036         break;
1037     case -ENOMEDIUM:
1038         error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
1039         break;
1040     case -ENOTSUP:
1041         error_set(errp, QERR_UNSUPPORTED);
1042         break;
1043     case -EACCES:
1044         error_set(errp, QERR_DEVICE_IS_READ_ONLY, device);
1045         break;
1046     case -EBUSY:
1047         error_set(errp, QERR_DEVICE_IN_USE, device);
1048         break;
1049     default:
1050         error_set(errp, QERR_UNDEFINED_ERROR);
1051         break;
1052     }
1053 }
1054 
1055 static QObject *qobject_from_block_job(BlockJob *job)
1056 {
1057     return qobject_from_jsonf("{ 'type': %s,"
1058                               "'device': %s,"
1059                               "'len': %" PRId64 ","
1060                               "'offset': %" PRId64 ","
1061                               "'speed': %" PRId64 " }",
1062                               job->job_type->job_type,
1063                               bdrv_get_device_name(job->bs),
1064                               job->len,
1065                               job->offset,
1066                               job->speed);
1067 }
1068 
1069 static void block_stream_cb(void *opaque, int ret)
1070 {
1071     BlockDriverState *bs = opaque;
1072     QObject *obj;
1073 
1074     trace_block_stream_cb(bs, bs->job, ret);
1075 
1076     assert(bs->job);
1077     obj = qobject_from_block_job(bs->job);
1078     if (ret < 0) {
1079         QDict *dict = qobject_to_qdict(obj);
1080         qdict_put(dict, "error", qstring_from_str(strerror(-ret)));
1081     }
1082 
1083     if (block_job_is_cancelled(bs->job)) {
1084         monitor_protocol_event(QEVENT_BLOCK_JOB_CANCELLED, obj);
1085     } else {
1086         monitor_protocol_event(QEVENT_BLOCK_JOB_COMPLETED, obj);
1087     }
1088     qobject_decref(obj);
1089 
1090     drive_put_ref_bh_schedule(drive_get_by_blockdev(bs));
1091 }
1092 
1093 void qmp_block_stream(const char *device, bool has_base,
1094                       const char *base, Error **errp)
1095 {
1096     BlockDriverState *bs;
1097     BlockDriverState *base_bs = NULL;
1098     int ret;
1099 
1100     bs = bdrv_find(device);
1101     if (!bs) {
1102         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
1103         return;
1104     }
1105 
1106     if (base) {
1107         base_bs = bdrv_find_backing_image(bs, base);
1108         if (base_bs == NULL) {
1109             error_set(errp, QERR_BASE_NOT_FOUND, base);
1110             return;
1111         }
1112     }
1113 
1114     ret = stream_start(bs, base_bs, base, block_stream_cb, bs);
1115     if (ret < 0) {
1116         switch (ret) {
1117         case -EBUSY:
1118             error_set(errp, QERR_DEVICE_IN_USE, device);
1119             return;
1120         default:
1121             error_set(errp, QERR_NOT_SUPPORTED);
1122             return;
1123         }
1124     }
1125 
1126     /* Grab a reference so hotplug does not delete the BlockDriverState from
1127      * underneath us.
1128      */
1129     drive_get_ref(drive_get_by_blockdev(bs));
1130 
1131     trace_qmp_block_stream(bs, bs->job);
1132 }
1133 
1134 static BlockJob *find_block_job(const char *device)
1135 {
1136     BlockDriverState *bs;
1137 
1138     bs = bdrv_find(device);
1139     if (!bs || !bs->job) {
1140         return NULL;
1141     }
1142     return bs->job;
1143 }
1144 
1145 void qmp_block_job_set_speed(const char *device, int64_t value, Error **errp)
1146 {
1147     BlockJob *job = find_block_job(device);
1148 
1149     if (!job) {
1150         error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
1151         return;
1152     }
1153 
1154     if (block_job_set_speed(job, value) < 0) {
1155         error_set(errp, QERR_NOT_SUPPORTED);
1156     }
1157 }
1158 
1159 void qmp_block_job_cancel(const char *device, Error **errp)
1160 {
1161     BlockJob *job = find_block_job(device);
1162 
1163     if (!job) {
1164         error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
1165         return;
1166     }
1167 
1168     trace_qmp_block_job_cancel(job);
1169     block_job_cancel(job);
1170 }
1171 
1172 static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
1173 {
1174     BlockJobInfoList **prev = opaque;
1175     BlockJob *job = bs->job;
1176 
1177     if (job) {
1178         BlockJobInfoList *elem;
1179         BlockJobInfo *info = g_new(BlockJobInfo, 1);
1180         *info = (BlockJobInfo){
1181             .type   = g_strdup(job->job_type->job_type),
1182             .device = g_strdup(bdrv_get_device_name(bs)),
1183             .len    = job->len,
1184             .offset = job->offset,
1185             .speed  = job->speed,
1186         };
1187 
1188         elem = g_new0(BlockJobInfoList, 1);
1189         elem->value = info;
1190 
1191         (*prev)->next = elem;
1192         *prev = elem;
1193     }
1194 }
1195 
1196 BlockJobInfoList *qmp_query_block_jobs(Error **errp)
1197 {
1198     /* Dummy is a fake list element for holding the head pointer */
1199     BlockJobInfoList dummy = {};
1200     BlockJobInfoList *prev = &dummy;
1201     bdrv_iterate(do_qmp_query_block_jobs_one, &prev);
1202     return dummy.next;
1203 }
1204