xref: /openbmc/qemu/block.c (revision 7267c094)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
32 
33 #ifdef CONFIG_BSD
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42 
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46 
47 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
48         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
49         BlockDriverCompletionFunc *cb, void *opaque);
50 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
51         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52         BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
54         BlockDriverCompletionFunc *cb, void *opaque);
55 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
56         BlockDriverCompletionFunc *cb, void *opaque);
57 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
58                         uint8_t *buf, int nb_sectors);
59 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
60                          const uint8_t *buf, int nb_sectors);
61 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
62         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
63         BlockDriverCompletionFunc *cb, void *opaque);
64 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
65         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66         BlockDriverCompletionFunc *cb, void *opaque);
67 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68                                          int64_t sector_num, int nb_sectors,
69                                          QEMUIOVector *iov);
70 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71                                          int64_t sector_num, int nb_sectors,
72                                          QEMUIOVector *iov);
73 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
74 
75 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
76     QTAILQ_HEAD_INITIALIZER(bdrv_states);
77 
78 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
79     QLIST_HEAD_INITIALIZER(bdrv_drivers);
80 
81 /* The device to use for VM snapshots */
82 static BlockDriverState *bs_snapshots;
83 
84 /* If non-zero, use only whitelisted block drivers */
85 static int use_bdrv_whitelist;
86 
87 #ifdef _WIN32
88 static int is_windows_drive_prefix(const char *filename)
89 {
90     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
91              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
92             filename[1] == ':');
93 }
94 
95 int is_windows_drive(const char *filename)
96 {
97     if (is_windows_drive_prefix(filename) &&
98         filename[2] == '\0')
99         return 1;
100     if (strstart(filename, "\\\\.\\", NULL) ||
101         strstart(filename, "//./", NULL))
102         return 1;
103     return 0;
104 }
105 #endif
106 
107 /* check if the path starts with "<protocol>:" */
108 static int path_has_protocol(const char *path)
109 {
110 #ifdef _WIN32
111     if (is_windows_drive(path) ||
112         is_windows_drive_prefix(path)) {
113         return 0;
114     }
115 #endif
116 
117     return strchr(path, ':') != NULL;
118 }
119 
120 int path_is_absolute(const char *path)
121 {
122     const char *p;
123 #ifdef _WIN32
124     /* specific case for names like: "\\.\d:" */
125     if (*path == '/' || *path == '\\')
126         return 1;
127 #endif
128     p = strchr(path, ':');
129     if (p)
130         p++;
131     else
132         p = path;
133 #ifdef _WIN32
134     return (*p == '/' || *p == '\\');
135 #else
136     return (*p == '/');
137 #endif
138 }
139 
140 /* if filename is absolute, just copy it to dest. Otherwise, build a
141    path to it by considering it is relative to base_path. URL are
142    supported. */
143 void path_combine(char *dest, int dest_size,
144                   const char *base_path,
145                   const char *filename)
146 {
147     const char *p, *p1;
148     int len;
149 
150     if (dest_size <= 0)
151         return;
152     if (path_is_absolute(filename)) {
153         pstrcpy(dest, dest_size, filename);
154     } else {
155         p = strchr(base_path, ':');
156         if (p)
157             p++;
158         else
159             p = base_path;
160         p1 = strrchr(base_path, '/');
161 #ifdef _WIN32
162         {
163             const char *p2;
164             p2 = strrchr(base_path, '\\');
165             if (!p1 || p2 > p1)
166                 p1 = p2;
167         }
168 #endif
169         if (p1)
170             p1++;
171         else
172             p1 = base_path;
173         if (p1 > p)
174             p = p1;
175         len = p - base_path;
176         if (len > dest_size - 1)
177             len = dest_size - 1;
178         memcpy(dest, base_path, len);
179         dest[len] = '\0';
180         pstrcat(dest, dest_size, filename);
181     }
182 }
183 
184 void bdrv_register(BlockDriver *bdrv)
185 {
186     if (bdrv->bdrv_co_readv) {
187         /* Emulate AIO by coroutines, and sync by AIO */
188         bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
189         bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
190         bdrv->bdrv_read = bdrv_read_em;
191         bdrv->bdrv_write = bdrv_write_em;
192      } else {
193         bdrv->bdrv_co_readv = bdrv_co_readv_em;
194         bdrv->bdrv_co_writev = bdrv_co_writev_em;
195 
196         if (!bdrv->bdrv_aio_readv) {
197             /* add AIO emulation layer */
198             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
199             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
200         } else if (!bdrv->bdrv_read) {
201             /* add synchronous IO emulation layer */
202             bdrv->bdrv_read = bdrv_read_em;
203             bdrv->bdrv_write = bdrv_write_em;
204         }
205     }
206 
207     if (!bdrv->bdrv_aio_flush)
208         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
209 
210     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
211 }
212 
213 /* create a new block device (by default it is empty) */
214 BlockDriverState *bdrv_new(const char *device_name)
215 {
216     BlockDriverState *bs;
217 
218     bs = g_malloc0(sizeof(BlockDriverState));
219     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
220     if (device_name[0] != '\0') {
221         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
222     }
223     return bs;
224 }
225 
226 BlockDriver *bdrv_find_format(const char *format_name)
227 {
228     BlockDriver *drv1;
229     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
230         if (!strcmp(drv1->format_name, format_name)) {
231             return drv1;
232         }
233     }
234     return NULL;
235 }
236 
237 static int bdrv_is_whitelisted(BlockDriver *drv)
238 {
239     static const char *whitelist[] = {
240         CONFIG_BDRV_WHITELIST
241     };
242     const char **p;
243 
244     if (!whitelist[0])
245         return 1;               /* no whitelist, anything goes */
246 
247     for (p = whitelist; *p; p++) {
248         if (!strcmp(drv->format_name, *p)) {
249             return 1;
250         }
251     }
252     return 0;
253 }
254 
255 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
256 {
257     BlockDriver *drv = bdrv_find_format(format_name);
258     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
259 }
260 
261 int bdrv_create(BlockDriver *drv, const char* filename,
262     QEMUOptionParameter *options)
263 {
264     if (!drv->bdrv_create)
265         return -ENOTSUP;
266 
267     return drv->bdrv_create(filename, options);
268 }
269 
270 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
271 {
272     BlockDriver *drv;
273 
274     drv = bdrv_find_protocol(filename);
275     if (drv == NULL) {
276         return -ENOENT;
277     }
278 
279     return bdrv_create(drv, filename, options);
280 }
281 
282 #ifdef _WIN32
283 void get_tmp_filename(char *filename, int size)
284 {
285     char temp_dir[MAX_PATH];
286 
287     GetTempPath(MAX_PATH, temp_dir);
288     GetTempFileName(temp_dir, "qem", 0, filename);
289 }
290 #else
291 void get_tmp_filename(char *filename, int size)
292 {
293     int fd;
294     const char *tmpdir;
295     /* XXX: race condition possible */
296     tmpdir = getenv("TMPDIR");
297     if (!tmpdir)
298         tmpdir = "/tmp";
299     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
300     fd = mkstemp(filename);
301     close(fd);
302 }
303 #endif
304 
305 /*
306  * Detect host devices. By convention, /dev/cdrom[N] is always
307  * recognized as a host CDROM.
308  */
309 static BlockDriver *find_hdev_driver(const char *filename)
310 {
311     int score_max = 0, score;
312     BlockDriver *drv = NULL, *d;
313 
314     QLIST_FOREACH(d, &bdrv_drivers, list) {
315         if (d->bdrv_probe_device) {
316             score = d->bdrv_probe_device(filename);
317             if (score > score_max) {
318                 score_max = score;
319                 drv = d;
320             }
321         }
322     }
323 
324     return drv;
325 }
326 
327 BlockDriver *bdrv_find_protocol(const char *filename)
328 {
329     BlockDriver *drv1;
330     char protocol[128];
331     int len;
332     const char *p;
333 
334     /* TODO Drivers without bdrv_file_open must be specified explicitly */
335 
336     /*
337      * XXX(hch): we really should not let host device detection
338      * override an explicit protocol specification, but moving this
339      * later breaks access to device names with colons in them.
340      * Thanks to the brain-dead persistent naming schemes on udev-
341      * based Linux systems those actually are quite common.
342      */
343     drv1 = find_hdev_driver(filename);
344     if (drv1) {
345         return drv1;
346     }
347 
348     if (!path_has_protocol(filename)) {
349         return bdrv_find_format("file");
350     }
351     p = strchr(filename, ':');
352     assert(p != NULL);
353     len = p - filename;
354     if (len > sizeof(protocol) - 1)
355         len = sizeof(protocol) - 1;
356     memcpy(protocol, filename, len);
357     protocol[len] = '\0';
358     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
359         if (drv1->protocol_name &&
360             !strcmp(drv1->protocol_name, protocol)) {
361             return drv1;
362         }
363     }
364     return NULL;
365 }
366 
367 static int find_image_format(const char *filename, BlockDriver **pdrv)
368 {
369     int ret, score, score_max;
370     BlockDriver *drv1, *drv;
371     uint8_t buf[2048];
372     BlockDriverState *bs;
373 
374     ret = bdrv_file_open(&bs, filename, 0);
375     if (ret < 0) {
376         *pdrv = NULL;
377         return ret;
378     }
379 
380     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
381     if (bs->sg || !bdrv_is_inserted(bs)) {
382         bdrv_delete(bs);
383         drv = bdrv_find_format("raw");
384         if (!drv) {
385             ret = -ENOENT;
386         }
387         *pdrv = drv;
388         return ret;
389     }
390 
391     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
392     bdrv_delete(bs);
393     if (ret < 0) {
394         *pdrv = NULL;
395         return ret;
396     }
397 
398     score_max = 0;
399     drv = NULL;
400     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
401         if (drv1->bdrv_probe) {
402             score = drv1->bdrv_probe(buf, ret, filename);
403             if (score > score_max) {
404                 score_max = score;
405                 drv = drv1;
406             }
407         }
408     }
409     if (!drv) {
410         ret = -ENOENT;
411     }
412     *pdrv = drv;
413     return ret;
414 }
415 
416 /**
417  * Set the current 'total_sectors' value
418  */
419 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
420 {
421     BlockDriver *drv = bs->drv;
422 
423     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
424     if (bs->sg)
425         return 0;
426 
427     /* query actual device if possible, otherwise just trust the hint */
428     if (drv->bdrv_getlength) {
429         int64_t length = drv->bdrv_getlength(bs);
430         if (length < 0) {
431             return length;
432         }
433         hint = length >> BDRV_SECTOR_BITS;
434     }
435 
436     bs->total_sectors = hint;
437     return 0;
438 }
439 
440 /*
441  * Common part for opening disk images and files
442  */
443 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
444     int flags, BlockDriver *drv)
445 {
446     int ret, open_flags;
447 
448     assert(drv != NULL);
449 
450     bs->file = NULL;
451     bs->total_sectors = 0;
452     bs->encrypted = 0;
453     bs->valid_key = 0;
454     bs->open_flags = flags;
455     /* buffer_alignment defaulted to 512, drivers can change this value */
456     bs->buffer_alignment = 512;
457 
458     pstrcpy(bs->filename, sizeof(bs->filename), filename);
459 
460     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
461         return -ENOTSUP;
462     }
463 
464     bs->drv = drv;
465     bs->opaque = g_malloc0(drv->instance_size);
466 
467     if (flags & BDRV_O_CACHE_WB)
468         bs->enable_write_cache = 1;
469 
470     /*
471      * Clear flags that are internal to the block layer before opening the
472      * image.
473      */
474     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
475 
476     /*
477      * Snapshots should be writable.
478      */
479     if (bs->is_temporary) {
480         open_flags |= BDRV_O_RDWR;
481     }
482 
483     /* Open the image, either directly or using a protocol */
484     if (drv->bdrv_file_open) {
485         ret = drv->bdrv_file_open(bs, filename, open_flags);
486     } else {
487         ret = bdrv_file_open(&bs->file, filename, open_flags);
488         if (ret >= 0) {
489             ret = drv->bdrv_open(bs, open_flags);
490         }
491     }
492 
493     if (ret < 0) {
494         goto free_and_fail;
495     }
496 
497     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
498 
499     ret = refresh_total_sectors(bs, bs->total_sectors);
500     if (ret < 0) {
501         goto free_and_fail;
502     }
503 
504 #ifndef _WIN32
505     if (bs->is_temporary) {
506         unlink(filename);
507     }
508 #endif
509     return 0;
510 
511 free_and_fail:
512     if (bs->file) {
513         bdrv_delete(bs->file);
514         bs->file = NULL;
515     }
516     g_free(bs->opaque);
517     bs->opaque = NULL;
518     bs->drv = NULL;
519     return ret;
520 }
521 
522 /*
523  * Opens a file using a protocol (file, host_device, nbd, ...)
524  */
525 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
526 {
527     BlockDriverState *bs;
528     BlockDriver *drv;
529     int ret;
530 
531     drv = bdrv_find_protocol(filename);
532     if (!drv) {
533         return -ENOENT;
534     }
535 
536     bs = bdrv_new("");
537     ret = bdrv_open_common(bs, filename, flags, drv);
538     if (ret < 0) {
539         bdrv_delete(bs);
540         return ret;
541     }
542     bs->growable = 1;
543     *pbs = bs;
544     return 0;
545 }
546 
547 /*
548  * Opens a disk image (raw, qcow2, vmdk, ...)
549  */
550 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
551               BlockDriver *drv)
552 {
553     int ret;
554 
555     if (flags & BDRV_O_SNAPSHOT) {
556         BlockDriverState *bs1;
557         int64_t total_size;
558         int is_protocol = 0;
559         BlockDriver *bdrv_qcow2;
560         QEMUOptionParameter *options;
561         char tmp_filename[PATH_MAX];
562         char backing_filename[PATH_MAX];
563 
564         /* if snapshot, we create a temporary backing file and open it
565            instead of opening 'filename' directly */
566 
567         /* if there is a backing file, use it */
568         bs1 = bdrv_new("");
569         ret = bdrv_open(bs1, filename, 0, drv);
570         if (ret < 0) {
571             bdrv_delete(bs1);
572             return ret;
573         }
574         total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
575 
576         if (bs1->drv && bs1->drv->protocol_name)
577             is_protocol = 1;
578 
579         bdrv_delete(bs1);
580 
581         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
582 
583         /* Real path is meaningless for protocols */
584         if (is_protocol)
585             snprintf(backing_filename, sizeof(backing_filename),
586                      "%s", filename);
587         else if (!realpath(filename, backing_filename))
588             return -errno;
589 
590         bdrv_qcow2 = bdrv_find_format("qcow2");
591         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
592 
593         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
594         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
595         if (drv) {
596             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
597                 drv->format_name);
598         }
599 
600         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
601         free_option_parameters(options);
602         if (ret < 0) {
603             return ret;
604         }
605 
606         filename = tmp_filename;
607         drv = bdrv_qcow2;
608         bs->is_temporary = 1;
609     }
610 
611     /* Find the right image format driver */
612     if (!drv) {
613         ret = find_image_format(filename, &drv);
614     }
615 
616     if (!drv) {
617         goto unlink_and_fail;
618     }
619 
620     /* Open the image */
621     ret = bdrv_open_common(bs, filename, flags, drv);
622     if (ret < 0) {
623         goto unlink_and_fail;
624     }
625 
626     /* If there is a backing file, use it */
627     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
628         char backing_filename[PATH_MAX];
629         int back_flags;
630         BlockDriver *back_drv = NULL;
631 
632         bs->backing_hd = bdrv_new("");
633 
634         if (path_has_protocol(bs->backing_file)) {
635             pstrcpy(backing_filename, sizeof(backing_filename),
636                     bs->backing_file);
637         } else {
638             path_combine(backing_filename, sizeof(backing_filename),
639                          filename, bs->backing_file);
640         }
641 
642         if (bs->backing_format[0] != '\0') {
643             back_drv = bdrv_find_format(bs->backing_format);
644         }
645 
646         /* backing files always opened read-only */
647         back_flags =
648             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
649 
650         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
651         if (ret < 0) {
652             bdrv_close(bs);
653             return ret;
654         }
655         if (bs->is_temporary) {
656             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
657         } else {
658             /* base image inherits from "parent" */
659             bs->backing_hd->keep_read_only = bs->keep_read_only;
660         }
661     }
662 
663     if (!bdrv_key_required(bs)) {
664         /* call the change callback */
665         bs->media_changed = 1;
666         if (bs->change_cb)
667             bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
668     }
669 
670     return 0;
671 
672 unlink_and_fail:
673     if (bs->is_temporary) {
674         unlink(filename);
675     }
676     return ret;
677 }
678 
679 void bdrv_close(BlockDriverState *bs)
680 {
681     if (bs->drv) {
682         if (bs == bs_snapshots) {
683             bs_snapshots = NULL;
684         }
685         if (bs->backing_hd) {
686             bdrv_delete(bs->backing_hd);
687             bs->backing_hd = NULL;
688         }
689         bs->drv->bdrv_close(bs);
690         g_free(bs->opaque);
691 #ifdef _WIN32
692         if (bs->is_temporary) {
693             unlink(bs->filename);
694         }
695 #endif
696         bs->opaque = NULL;
697         bs->drv = NULL;
698 
699         if (bs->file != NULL) {
700             bdrv_close(bs->file);
701         }
702 
703         /* call the change callback */
704         bs->media_changed = 1;
705         if (bs->change_cb)
706             bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
707     }
708 }
709 
710 void bdrv_close_all(void)
711 {
712     BlockDriverState *bs;
713 
714     QTAILQ_FOREACH(bs, &bdrv_states, list) {
715         bdrv_close(bs);
716     }
717 }
718 
719 /* make a BlockDriverState anonymous by removing from bdrv_state list.
720    Also, NULL terminate the device_name to prevent double remove */
721 void bdrv_make_anon(BlockDriverState *bs)
722 {
723     if (bs->device_name[0] != '\0') {
724         QTAILQ_REMOVE(&bdrv_states, bs, list);
725     }
726     bs->device_name[0] = '\0';
727 }
728 
729 void bdrv_delete(BlockDriverState *bs)
730 {
731     assert(!bs->peer);
732 
733     /* remove from list, if necessary */
734     bdrv_make_anon(bs);
735 
736     bdrv_close(bs);
737     if (bs->file != NULL) {
738         bdrv_delete(bs->file);
739     }
740 
741     assert(bs != bs_snapshots);
742     g_free(bs);
743 }
744 
745 int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
746 {
747     if (bs->peer) {
748         return -EBUSY;
749     }
750     bs->peer = qdev;
751     return 0;
752 }
753 
754 void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
755 {
756     assert(bs->peer == qdev);
757     bs->peer = NULL;
758     bs->change_cb = NULL;
759     bs->change_opaque = NULL;
760 }
761 
762 DeviceState *bdrv_get_attached(BlockDriverState *bs)
763 {
764     return bs->peer;
765 }
766 
767 /*
768  * Run consistency checks on an image
769  *
770  * Returns 0 if the check could be completed (it doesn't mean that the image is
771  * free of errors) or -errno when an internal error occurred. The results of the
772  * check are stored in res.
773  */
774 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
775 {
776     if (bs->drv->bdrv_check == NULL) {
777         return -ENOTSUP;
778     }
779 
780     memset(res, 0, sizeof(*res));
781     return bs->drv->bdrv_check(bs, res);
782 }
783 
784 #define COMMIT_BUF_SECTORS 2048
785 
786 /* commit COW file into the raw image */
787 int bdrv_commit(BlockDriverState *bs)
788 {
789     BlockDriver *drv = bs->drv;
790     BlockDriver *backing_drv;
791     int64_t sector, total_sectors;
792     int n, ro, open_flags;
793     int ret = 0, rw_ret = 0;
794     uint8_t *buf;
795     char filename[1024];
796     BlockDriverState *bs_rw, *bs_ro;
797 
798     if (!drv)
799         return -ENOMEDIUM;
800 
801     if (!bs->backing_hd) {
802         return -ENOTSUP;
803     }
804 
805     if (bs->backing_hd->keep_read_only) {
806         return -EACCES;
807     }
808 
809     backing_drv = bs->backing_hd->drv;
810     ro = bs->backing_hd->read_only;
811     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
812     open_flags =  bs->backing_hd->open_flags;
813 
814     if (ro) {
815         /* re-open as RW */
816         bdrv_delete(bs->backing_hd);
817         bs->backing_hd = NULL;
818         bs_rw = bdrv_new("");
819         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
820             backing_drv);
821         if (rw_ret < 0) {
822             bdrv_delete(bs_rw);
823             /* try to re-open read-only */
824             bs_ro = bdrv_new("");
825             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
826                 backing_drv);
827             if (ret < 0) {
828                 bdrv_delete(bs_ro);
829                 /* drive not functional anymore */
830                 bs->drv = NULL;
831                 return ret;
832             }
833             bs->backing_hd = bs_ro;
834             return rw_ret;
835         }
836         bs->backing_hd = bs_rw;
837     }
838 
839     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
840     buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
841 
842     for (sector = 0; sector < total_sectors; sector += n) {
843         if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
844 
845             if (bdrv_read(bs, sector, buf, n) != 0) {
846                 ret = -EIO;
847                 goto ro_cleanup;
848             }
849 
850             if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
851                 ret = -EIO;
852                 goto ro_cleanup;
853             }
854         }
855     }
856 
857     if (drv->bdrv_make_empty) {
858         ret = drv->bdrv_make_empty(bs);
859         bdrv_flush(bs);
860     }
861 
862     /*
863      * Make sure all data we wrote to the backing device is actually
864      * stable on disk.
865      */
866     if (bs->backing_hd)
867         bdrv_flush(bs->backing_hd);
868 
869 ro_cleanup:
870     g_free(buf);
871 
872     if (ro) {
873         /* re-open as RO */
874         bdrv_delete(bs->backing_hd);
875         bs->backing_hd = NULL;
876         bs_ro = bdrv_new("");
877         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
878             backing_drv);
879         if (ret < 0) {
880             bdrv_delete(bs_ro);
881             /* drive not functional anymore */
882             bs->drv = NULL;
883             return ret;
884         }
885         bs->backing_hd = bs_ro;
886         bs->backing_hd->keep_read_only = 0;
887     }
888 
889     return ret;
890 }
891 
892 void bdrv_commit_all(void)
893 {
894     BlockDriverState *bs;
895 
896     QTAILQ_FOREACH(bs, &bdrv_states, list) {
897         bdrv_commit(bs);
898     }
899 }
900 
901 /*
902  * Return values:
903  * 0        - success
904  * -EINVAL  - backing format specified, but no file
905  * -ENOSPC  - can't update the backing file because no space is left in the
906  *            image file header
907  * -ENOTSUP - format driver doesn't support changing the backing file
908  */
909 int bdrv_change_backing_file(BlockDriverState *bs,
910     const char *backing_file, const char *backing_fmt)
911 {
912     BlockDriver *drv = bs->drv;
913 
914     if (drv->bdrv_change_backing_file != NULL) {
915         return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
916     } else {
917         return -ENOTSUP;
918     }
919 }
920 
921 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
922                                    size_t size)
923 {
924     int64_t len;
925 
926     if (!bdrv_is_inserted(bs))
927         return -ENOMEDIUM;
928 
929     if (bs->growable)
930         return 0;
931 
932     len = bdrv_getlength(bs);
933 
934     if (offset < 0)
935         return -EIO;
936 
937     if ((offset > len) || (len - offset < size))
938         return -EIO;
939 
940     return 0;
941 }
942 
943 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
944                               int nb_sectors)
945 {
946     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
947                                    nb_sectors * BDRV_SECTOR_SIZE);
948 }
949 
950 static inline bool bdrv_has_async_rw(BlockDriver *drv)
951 {
952     return drv->bdrv_co_readv != bdrv_co_readv_em
953         || drv->bdrv_aio_readv != bdrv_aio_readv_em;
954 }
955 
956 static inline bool bdrv_has_async_flush(BlockDriver *drv)
957 {
958     return drv->bdrv_aio_flush != bdrv_aio_flush_em;
959 }
960 
961 /* return < 0 if error. See bdrv_write() for the return codes */
962 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
963               uint8_t *buf, int nb_sectors)
964 {
965     BlockDriver *drv = bs->drv;
966 
967     if (!drv)
968         return -ENOMEDIUM;
969 
970     if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
971         QEMUIOVector qiov;
972         struct iovec iov = {
973             .iov_base = (void *)buf,
974             .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
975         };
976 
977         qemu_iovec_init_external(&qiov, &iov, 1);
978         return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
979     }
980 
981     if (bdrv_check_request(bs, sector_num, nb_sectors))
982         return -EIO;
983 
984     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
985 }
986 
987 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
988                              int nb_sectors, int dirty)
989 {
990     int64_t start, end;
991     unsigned long val, idx, bit;
992 
993     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
994     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
995 
996     for (; start <= end; start++) {
997         idx = start / (sizeof(unsigned long) * 8);
998         bit = start % (sizeof(unsigned long) * 8);
999         val = bs->dirty_bitmap[idx];
1000         if (dirty) {
1001             if (!(val & (1UL << bit))) {
1002                 bs->dirty_count++;
1003                 val |= 1UL << bit;
1004             }
1005         } else {
1006             if (val & (1UL << bit)) {
1007                 bs->dirty_count--;
1008                 val &= ~(1UL << bit);
1009             }
1010         }
1011         bs->dirty_bitmap[idx] = val;
1012     }
1013 }
1014 
1015 /* Return < 0 if error. Important errors are:
1016   -EIO         generic I/O error (may happen for all errors)
1017   -ENOMEDIUM   No media inserted.
1018   -EINVAL      Invalid sector number or nb_sectors
1019   -EACCES      Trying to write a read-only device
1020 */
1021 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1022                const uint8_t *buf, int nb_sectors)
1023 {
1024     BlockDriver *drv = bs->drv;
1025 
1026     if (!bs->drv)
1027         return -ENOMEDIUM;
1028 
1029     if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1030         QEMUIOVector qiov;
1031         struct iovec iov = {
1032             .iov_base = (void *)buf,
1033             .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1034         };
1035 
1036         qemu_iovec_init_external(&qiov, &iov, 1);
1037         return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1038     }
1039 
1040     if (bs->read_only)
1041         return -EACCES;
1042     if (bdrv_check_request(bs, sector_num, nb_sectors))
1043         return -EIO;
1044 
1045     if (bs->dirty_bitmap) {
1046         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1047     }
1048 
1049     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1050         bs->wr_highest_sector = sector_num + nb_sectors - 1;
1051     }
1052 
1053     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
1054 }
1055 
1056 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1057                void *buf, int count1)
1058 {
1059     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1060     int len, nb_sectors, count;
1061     int64_t sector_num;
1062     int ret;
1063 
1064     count = count1;
1065     /* first read to align to sector start */
1066     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1067     if (len > count)
1068         len = count;
1069     sector_num = offset >> BDRV_SECTOR_BITS;
1070     if (len > 0) {
1071         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1072             return ret;
1073         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1074         count -= len;
1075         if (count == 0)
1076             return count1;
1077         sector_num++;
1078         buf += len;
1079     }
1080 
1081     /* read the sectors "in place" */
1082     nb_sectors = count >> BDRV_SECTOR_BITS;
1083     if (nb_sectors > 0) {
1084         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1085             return ret;
1086         sector_num += nb_sectors;
1087         len = nb_sectors << BDRV_SECTOR_BITS;
1088         buf += len;
1089         count -= len;
1090     }
1091 
1092     /* add data from the last sector */
1093     if (count > 0) {
1094         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1095             return ret;
1096         memcpy(buf, tmp_buf, count);
1097     }
1098     return count1;
1099 }
1100 
1101 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1102                 const void *buf, int count1)
1103 {
1104     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1105     int len, nb_sectors, count;
1106     int64_t sector_num;
1107     int ret;
1108 
1109     count = count1;
1110     /* first write to align to sector start */
1111     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1112     if (len > count)
1113         len = count;
1114     sector_num = offset >> BDRV_SECTOR_BITS;
1115     if (len > 0) {
1116         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1117             return ret;
1118         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1119         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1120             return ret;
1121         count -= len;
1122         if (count == 0)
1123             return count1;
1124         sector_num++;
1125         buf += len;
1126     }
1127 
1128     /* write the sectors "in place" */
1129     nb_sectors = count >> BDRV_SECTOR_BITS;
1130     if (nb_sectors > 0) {
1131         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1132             return ret;
1133         sector_num += nb_sectors;
1134         len = nb_sectors << BDRV_SECTOR_BITS;
1135         buf += len;
1136         count -= len;
1137     }
1138 
1139     /* add data from the last sector */
1140     if (count > 0) {
1141         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1142             return ret;
1143         memcpy(tmp_buf, buf, count);
1144         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1145             return ret;
1146     }
1147     return count1;
1148 }
1149 
1150 /*
1151  * Writes to the file and ensures that no writes are reordered across this
1152  * request (acts as a barrier)
1153  *
1154  * Returns 0 on success, -errno in error cases.
1155  */
1156 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1157     const void *buf, int count)
1158 {
1159     int ret;
1160 
1161     ret = bdrv_pwrite(bs, offset, buf, count);
1162     if (ret < 0) {
1163         return ret;
1164     }
1165 
1166     /* No flush needed for cache=writethrough, it uses O_DSYNC */
1167     if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) {
1168         bdrv_flush(bs);
1169     }
1170 
1171     return 0;
1172 }
1173 
1174 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1175     int nb_sectors, QEMUIOVector *qiov)
1176 {
1177     BlockDriver *drv = bs->drv;
1178 
1179     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1180 
1181     if (!drv) {
1182         return -ENOMEDIUM;
1183     }
1184     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1185         return -EIO;
1186     }
1187 
1188     return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1189 }
1190 
1191 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1192     int nb_sectors, QEMUIOVector *qiov)
1193 {
1194     BlockDriver *drv = bs->drv;
1195 
1196     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1197 
1198     if (!bs->drv) {
1199         return -ENOMEDIUM;
1200     }
1201     if (bs->read_only) {
1202         return -EACCES;
1203     }
1204     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1205         return -EIO;
1206     }
1207 
1208     if (bs->dirty_bitmap) {
1209         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1210     }
1211 
1212     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1213         bs->wr_highest_sector = sector_num + nb_sectors - 1;
1214     }
1215 
1216     return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1217 }
1218 
1219 /**
1220  * Truncate file to 'offset' bytes (needed only for file protocols)
1221  */
1222 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1223 {
1224     BlockDriver *drv = bs->drv;
1225     int ret;
1226     if (!drv)
1227         return -ENOMEDIUM;
1228     if (!drv->bdrv_truncate)
1229         return -ENOTSUP;
1230     if (bs->read_only)
1231         return -EACCES;
1232     if (bdrv_in_use(bs))
1233         return -EBUSY;
1234     ret = drv->bdrv_truncate(bs, offset);
1235     if (ret == 0) {
1236         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1237         if (bs->change_cb) {
1238             bs->change_cb(bs->change_opaque, CHANGE_SIZE);
1239         }
1240     }
1241     return ret;
1242 }
1243 
1244 /**
1245  * Length of a allocated file in bytes. Sparse files are counted by actual
1246  * allocated space. Return < 0 if error or unknown.
1247  */
1248 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1249 {
1250     BlockDriver *drv = bs->drv;
1251     if (!drv) {
1252         return -ENOMEDIUM;
1253     }
1254     if (drv->bdrv_get_allocated_file_size) {
1255         return drv->bdrv_get_allocated_file_size(bs);
1256     }
1257     if (bs->file) {
1258         return bdrv_get_allocated_file_size(bs->file);
1259     }
1260     return -ENOTSUP;
1261 }
1262 
1263 /**
1264  * Length of a file in bytes. Return < 0 if error or unknown.
1265  */
1266 int64_t bdrv_getlength(BlockDriverState *bs)
1267 {
1268     BlockDriver *drv = bs->drv;
1269     if (!drv)
1270         return -ENOMEDIUM;
1271 
1272     if (bs->growable || bs->removable) {
1273         if (drv->bdrv_getlength) {
1274             return drv->bdrv_getlength(bs);
1275         }
1276     }
1277     return bs->total_sectors * BDRV_SECTOR_SIZE;
1278 }
1279 
1280 /* return 0 as number of sectors if no device present or error */
1281 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1282 {
1283     int64_t length;
1284     length = bdrv_getlength(bs);
1285     if (length < 0)
1286         length = 0;
1287     else
1288         length = length >> BDRV_SECTOR_BITS;
1289     *nb_sectors_ptr = length;
1290 }
1291 
1292 struct partition {
1293         uint8_t boot_ind;           /* 0x80 - active */
1294         uint8_t head;               /* starting head */
1295         uint8_t sector;             /* starting sector */
1296         uint8_t cyl;                /* starting cylinder */
1297         uint8_t sys_ind;            /* What partition type */
1298         uint8_t end_head;           /* end head */
1299         uint8_t end_sector;         /* end sector */
1300         uint8_t end_cyl;            /* end cylinder */
1301         uint32_t start_sect;        /* starting sector counting from 0 */
1302         uint32_t nr_sects;          /* nr of sectors in partition */
1303 } __attribute__((packed));
1304 
1305 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1306 static int guess_disk_lchs(BlockDriverState *bs,
1307                            int *pcylinders, int *pheads, int *psectors)
1308 {
1309     uint8_t buf[BDRV_SECTOR_SIZE];
1310     int ret, i, heads, sectors, cylinders;
1311     struct partition *p;
1312     uint32_t nr_sects;
1313     uint64_t nb_sectors;
1314 
1315     bdrv_get_geometry(bs, &nb_sectors);
1316 
1317     ret = bdrv_read(bs, 0, buf, 1);
1318     if (ret < 0)
1319         return -1;
1320     /* test msdos magic */
1321     if (buf[510] != 0x55 || buf[511] != 0xaa)
1322         return -1;
1323     for(i = 0; i < 4; i++) {
1324         p = ((struct partition *)(buf + 0x1be)) + i;
1325         nr_sects = le32_to_cpu(p->nr_sects);
1326         if (nr_sects && p->end_head) {
1327             /* We make the assumption that the partition terminates on
1328                a cylinder boundary */
1329             heads = p->end_head + 1;
1330             sectors = p->end_sector & 63;
1331             if (sectors == 0)
1332                 continue;
1333             cylinders = nb_sectors / (heads * sectors);
1334             if (cylinders < 1 || cylinders > 16383)
1335                 continue;
1336             *pheads = heads;
1337             *psectors = sectors;
1338             *pcylinders = cylinders;
1339 #if 0
1340             printf("guessed geometry: LCHS=%d %d %d\n",
1341                    cylinders, heads, sectors);
1342 #endif
1343             return 0;
1344         }
1345     }
1346     return -1;
1347 }
1348 
1349 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1350 {
1351     int translation, lba_detected = 0;
1352     int cylinders, heads, secs;
1353     uint64_t nb_sectors;
1354 
1355     /* if a geometry hint is available, use it */
1356     bdrv_get_geometry(bs, &nb_sectors);
1357     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1358     translation = bdrv_get_translation_hint(bs);
1359     if (cylinders != 0) {
1360         *pcyls = cylinders;
1361         *pheads = heads;
1362         *psecs = secs;
1363     } else {
1364         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1365             if (heads > 16) {
1366                 /* if heads > 16, it means that a BIOS LBA
1367                    translation was active, so the default
1368                    hardware geometry is OK */
1369                 lba_detected = 1;
1370                 goto default_geometry;
1371             } else {
1372                 *pcyls = cylinders;
1373                 *pheads = heads;
1374                 *psecs = secs;
1375                 /* disable any translation to be in sync with
1376                    the logical geometry */
1377                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1378                     bdrv_set_translation_hint(bs,
1379                                               BIOS_ATA_TRANSLATION_NONE);
1380                 }
1381             }
1382         } else {
1383         default_geometry:
1384             /* if no geometry, use a standard physical disk geometry */
1385             cylinders = nb_sectors / (16 * 63);
1386 
1387             if (cylinders > 16383)
1388                 cylinders = 16383;
1389             else if (cylinders < 2)
1390                 cylinders = 2;
1391             *pcyls = cylinders;
1392             *pheads = 16;
1393             *psecs = 63;
1394             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1395                 if ((*pcyls * *pheads) <= 131072) {
1396                     bdrv_set_translation_hint(bs,
1397                                               BIOS_ATA_TRANSLATION_LARGE);
1398                 } else {
1399                     bdrv_set_translation_hint(bs,
1400                                               BIOS_ATA_TRANSLATION_LBA);
1401                 }
1402             }
1403         }
1404         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1405     }
1406 }
1407 
1408 void bdrv_set_geometry_hint(BlockDriverState *bs,
1409                             int cyls, int heads, int secs)
1410 {
1411     bs->cyls = cyls;
1412     bs->heads = heads;
1413     bs->secs = secs;
1414 }
1415 
1416 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1417 {
1418     bs->translation = translation;
1419 }
1420 
1421 void bdrv_get_geometry_hint(BlockDriverState *bs,
1422                             int *pcyls, int *pheads, int *psecs)
1423 {
1424     *pcyls = bs->cyls;
1425     *pheads = bs->heads;
1426     *psecs = bs->secs;
1427 }
1428 
1429 /* Recognize floppy formats */
1430 typedef struct FDFormat {
1431     FDriveType drive;
1432     uint8_t last_sect;
1433     uint8_t max_track;
1434     uint8_t max_head;
1435 } FDFormat;
1436 
1437 static const FDFormat fd_formats[] = {
1438     /* First entry is default format */
1439     /* 1.44 MB 3"1/2 floppy disks */
1440     { FDRIVE_DRV_144, 18, 80, 1, },
1441     { FDRIVE_DRV_144, 20, 80, 1, },
1442     { FDRIVE_DRV_144, 21, 80, 1, },
1443     { FDRIVE_DRV_144, 21, 82, 1, },
1444     { FDRIVE_DRV_144, 21, 83, 1, },
1445     { FDRIVE_DRV_144, 22, 80, 1, },
1446     { FDRIVE_DRV_144, 23, 80, 1, },
1447     { FDRIVE_DRV_144, 24, 80, 1, },
1448     /* 2.88 MB 3"1/2 floppy disks */
1449     { FDRIVE_DRV_288, 36, 80, 1, },
1450     { FDRIVE_DRV_288, 39, 80, 1, },
1451     { FDRIVE_DRV_288, 40, 80, 1, },
1452     { FDRIVE_DRV_288, 44, 80, 1, },
1453     { FDRIVE_DRV_288, 48, 80, 1, },
1454     /* 720 kB 3"1/2 floppy disks */
1455     { FDRIVE_DRV_144,  9, 80, 1, },
1456     { FDRIVE_DRV_144, 10, 80, 1, },
1457     { FDRIVE_DRV_144, 10, 82, 1, },
1458     { FDRIVE_DRV_144, 10, 83, 1, },
1459     { FDRIVE_DRV_144, 13, 80, 1, },
1460     { FDRIVE_DRV_144, 14, 80, 1, },
1461     /* 1.2 MB 5"1/4 floppy disks */
1462     { FDRIVE_DRV_120, 15, 80, 1, },
1463     { FDRIVE_DRV_120, 18, 80, 1, },
1464     { FDRIVE_DRV_120, 18, 82, 1, },
1465     { FDRIVE_DRV_120, 18, 83, 1, },
1466     { FDRIVE_DRV_120, 20, 80, 1, },
1467     /* 720 kB 5"1/4 floppy disks */
1468     { FDRIVE_DRV_120,  9, 80, 1, },
1469     { FDRIVE_DRV_120, 11, 80, 1, },
1470     /* 360 kB 5"1/4 floppy disks */
1471     { FDRIVE_DRV_120,  9, 40, 1, },
1472     { FDRIVE_DRV_120,  9, 40, 0, },
1473     { FDRIVE_DRV_120, 10, 41, 1, },
1474     { FDRIVE_DRV_120, 10, 42, 1, },
1475     /* 320 kB 5"1/4 floppy disks */
1476     { FDRIVE_DRV_120,  8, 40, 1, },
1477     { FDRIVE_DRV_120,  8, 40, 0, },
1478     /* 360 kB must match 5"1/4 better than 3"1/2... */
1479     { FDRIVE_DRV_144,  9, 80, 0, },
1480     /* end */
1481     { FDRIVE_DRV_NONE, -1, -1, 0, },
1482 };
1483 
1484 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1485                                    int *max_track, int *last_sect,
1486                                    FDriveType drive_in, FDriveType *drive)
1487 {
1488     const FDFormat *parse;
1489     uint64_t nb_sectors, size;
1490     int i, first_match, match;
1491 
1492     bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1493     if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1494         /* User defined disk */
1495     } else {
1496         bdrv_get_geometry(bs, &nb_sectors);
1497         match = -1;
1498         first_match = -1;
1499         for (i = 0; ; i++) {
1500             parse = &fd_formats[i];
1501             if (parse->drive == FDRIVE_DRV_NONE) {
1502                 break;
1503             }
1504             if (drive_in == parse->drive ||
1505                 drive_in == FDRIVE_DRV_NONE) {
1506                 size = (parse->max_head + 1) * parse->max_track *
1507                     parse->last_sect;
1508                 if (nb_sectors == size) {
1509                     match = i;
1510                     break;
1511                 }
1512                 if (first_match == -1) {
1513                     first_match = i;
1514                 }
1515             }
1516         }
1517         if (match == -1) {
1518             if (first_match == -1) {
1519                 match = 1;
1520             } else {
1521                 match = first_match;
1522             }
1523             parse = &fd_formats[match];
1524         }
1525         *nb_heads = parse->max_head + 1;
1526         *max_track = parse->max_track;
1527         *last_sect = parse->last_sect;
1528         *drive = parse->drive;
1529     }
1530 }
1531 
1532 int bdrv_get_translation_hint(BlockDriverState *bs)
1533 {
1534     return bs->translation;
1535 }
1536 
1537 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1538                        BlockErrorAction on_write_error)
1539 {
1540     bs->on_read_error = on_read_error;
1541     bs->on_write_error = on_write_error;
1542 }
1543 
1544 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1545 {
1546     return is_read ? bs->on_read_error : bs->on_write_error;
1547 }
1548 
1549 void bdrv_set_removable(BlockDriverState *bs, int removable)
1550 {
1551     bs->removable = removable;
1552     if (removable && bs == bs_snapshots) {
1553         bs_snapshots = NULL;
1554     }
1555 }
1556 
1557 int bdrv_is_removable(BlockDriverState *bs)
1558 {
1559     return bs->removable;
1560 }
1561 
1562 int bdrv_is_read_only(BlockDriverState *bs)
1563 {
1564     return bs->read_only;
1565 }
1566 
1567 int bdrv_is_sg(BlockDriverState *bs)
1568 {
1569     return bs->sg;
1570 }
1571 
1572 int bdrv_enable_write_cache(BlockDriverState *bs)
1573 {
1574     return bs->enable_write_cache;
1575 }
1576 
1577 /* XXX: no longer used */
1578 void bdrv_set_change_cb(BlockDriverState *bs,
1579                         void (*change_cb)(void *opaque, int reason),
1580                         void *opaque)
1581 {
1582     bs->change_cb = change_cb;
1583     bs->change_opaque = opaque;
1584 }
1585 
1586 int bdrv_is_encrypted(BlockDriverState *bs)
1587 {
1588     if (bs->backing_hd && bs->backing_hd->encrypted)
1589         return 1;
1590     return bs->encrypted;
1591 }
1592 
1593 int bdrv_key_required(BlockDriverState *bs)
1594 {
1595     BlockDriverState *backing_hd = bs->backing_hd;
1596 
1597     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1598         return 1;
1599     return (bs->encrypted && !bs->valid_key);
1600 }
1601 
1602 int bdrv_set_key(BlockDriverState *bs, const char *key)
1603 {
1604     int ret;
1605     if (bs->backing_hd && bs->backing_hd->encrypted) {
1606         ret = bdrv_set_key(bs->backing_hd, key);
1607         if (ret < 0)
1608             return ret;
1609         if (!bs->encrypted)
1610             return 0;
1611     }
1612     if (!bs->encrypted) {
1613         return -EINVAL;
1614     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1615         return -ENOMEDIUM;
1616     }
1617     ret = bs->drv->bdrv_set_key(bs, key);
1618     if (ret < 0) {
1619         bs->valid_key = 0;
1620     } else if (!bs->valid_key) {
1621         bs->valid_key = 1;
1622         /* call the change callback now, we skipped it on open */
1623         bs->media_changed = 1;
1624         if (bs->change_cb)
1625             bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
1626     }
1627     return ret;
1628 }
1629 
1630 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1631 {
1632     if (!bs->drv) {
1633         buf[0] = '\0';
1634     } else {
1635         pstrcpy(buf, buf_size, bs->drv->format_name);
1636     }
1637 }
1638 
1639 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1640                          void *opaque)
1641 {
1642     BlockDriver *drv;
1643 
1644     QLIST_FOREACH(drv, &bdrv_drivers, list) {
1645         it(opaque, drv->format_name);
1646     }
1647 }
1648 
1649 BlockDriverState *bdrv_find(const char *name)
1650 {
1651     BlockDriverState *bs;
1652 
1653     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1654         if (!strcmp(name, bs->device_name)) {
1655             return bs;
1656         }
1657     }
1658     return NULL;
1659 }
1660 
1661 BlockDriverState *bdrv_next(BlockDriverState *bs)
1662 {
1663     if (!bs) {
1664         return QTAILQ_FIRST(&bdrv_states);
1665     }
1666     return QTAILQ_NEXT(bs, list);
1667 }
1668 
1669 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1670 {
1671     BlockDriverState *bs;
1672 
1673     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1674         it(opaque, bs);
1675     }
1676 }
1677 
1678 const char *bdrv_get_device_name(BlockDriverState *bs)
1679 {
1680     return bs->device_name;
1681 }
1682 
1683 int bdrv_flush(BlockDriverState *bs)
1684 {
1685     if (bs->open_flags & BDRV_O_NO_FLUSH) {
1686         return 0;
1687     }
1688 
1689     if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1690         return bdrv_co_flush_em(bs);
1691     }
1692 
1693     if (bs->drv && bs->drv->bdrv_flush) {
1694         return bs->drv->bdrv_flush(bs);
1695     }
1696 
1697     /*
1698      * Some block drivers always operate in either writethrough or unsafe mode
1699      * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1700      * the server works (because the behaviour is hardcoded or depends on
1701      * server-side configuration), so we can't ensure that everything is safe
1702      * on disk. Returning an error doesn't work because that would break guests
1703      * even if the server operates in writethrough mode.
1704      *
1705      * Let's hope the user knows what he's doing.
1706      */
1707     return 0;
1708 }
1709 
1710 void bdrv_flush_all(void)
1711 {
1712     BlockDriverState *bs;
1713 
1714     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1715         if (bs->drv && !bdrv_is_read_only(bs) &&
1716             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1717             bdrv_flush(bs);
1718         }
1719     }
1720 }
1721 
1722 int bdrv_has_zero_init(BlockDriverState *bs)
1723 {
1724     assert(bs->drv);
1725 
1726     if (bs->drv->bdrv_has_zero_init) {
1727         return bs->drv->bdrv_has_zero_init(bs);
1728     }
1729 
1730     return 1;
1731 }
1732 
1733 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1734 {
1735     if (!bs->drv) {
1736         return -ENOMEDIUM;
1737     }
1738     if (!bs->drv->bdrv_discard) {
1739         return 0;
1740     }
1741     return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1742 }
1743 
1744 /*
1745  * Returns true iff the specified sector is present in the disk image. Drivers
1746  * not implementing the functionality are assumed to not support backing files,
1747  * hence all their sectors are reported as allocated.
1748  *
1749  * 'pnum' is set to the number of sectors (including and immediately following
1750  * the specified sector) that are known to be in the same
1751  * allocated/unallocated state.
1752  *
1753  * 'nb_sectors' is the max value 'pnum' should be set to.
1754  */
1755 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1756 	int *pnum)
1757 {
1758     int64_t n;
1759     if (!bs->drv->bdrv_is_allocated) {
1760         if (sector_num >= bs->total_sectors) {
1761             *pnum = 0;
1762             return 0;
1763         }
1764         n = bs->total_sectors - sector_num;
1765         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1766         return 1;
1767     }
1768     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1769 }
1770 
1771 void bdrv_mon_event(const BlockDriverState *bdrv,
1772                     BlockMonEventAction action, int is_read)
1773 {
1774     QObject *data;
1775     const char *action_str;
1776 
1777     switch (action) {
1778     case BDRV_ACTION_REPORT:
1779         action_str = "report";
1780         break;
1781     case BDRV_ACTION_IGNORE:
1782         action_str = "ignore";
1783         break;
1784     case BDRV_ACTION_STOP:
1785         action_str = "stop";
1786         break;
1787     default:
1788         abort();
1789     }
1790 
1791     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1792                               bdrv->device_name,
1793                               action_str,
1794                               is_read ? "read" : "write");
1795     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1796 
1797     qobject_decref(data);
1798 }
1799 
1800 static void bdrv_print_dict(QObject *obj, void *opaque)
1801 {
1802     QDict *bs_dict;
1803     Monitor *mon = opaque;
1804 
1805     bs_dict = qobject_to_qdict(obj);
1806 
1807     monitor_printf(mon, "%s: removable=%d",
1808                         qdict_get_str(bs_dict, "device"),
1809                         qdict_get_bool(bs_dict, "removable"));
1810 
1811     if (qdict_get_bool(bs_dict, "removable")) {
1812         monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1813     }
1814 
1815     if (qdict_haskey(bs_dict, "inserted")) {
1816         QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1817 
1818         monitor_printf(mon, " file=");
1819         monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1820         if (qdict_haskey(qdict, "backing_file")) {
1821             monitor_printf(mon, " backing_file=");
1822             monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1823         }
1824         monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1825                             qdict_get_bool(qdict, "ro"),
1826                             qdict_get_str(qdict, "drv"),
1827                             qdict_get_bool(qdict, "encrypted"));
1828     } else {
1829         monitor_printf(mon, " [not inserted]");
1830     }
1831 
1832     monitor_printf(mon, "\n");
1833 }
1834 
1835 void bdrv_info_print(Monitor *mon, const QObject *data)
1836 {
1837     qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1838 }
1839 
1840 void bdrv_info(Monitor *mon, QObject **ret_data)
1841 {
1842     QList *bs_list;
1843     BlockDriverState *bs;
1844 
1845     bs_list = qlist_new();
1846 
1847     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1848         QObject *bs_obj;
1849 
1850         bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1851                                     "'removable': %i, 'locked': %i }",
1852                                     bs->device_name, bs->removable,
1853                                     bs->locked);
1854 
1855         if (bs->drv) {
1856             QObject *obj;
1857             QDict *bs_dict = qobject_to_qdict(bs_obj);
1858 
1859             obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1860                                      "'encrypted': %i }",
1861                                      bs->filename, bs->read_only,
1862                                      bs->drv->format_name,
1863                                      bdrv_is_encrypted(bs));
1864             if (bs->backing_file[0] != '\0') {
1865                 QDict *qdict = qobject_to_qdict(obj);
1866                 qdict_put(qdict, "backing_file",
1867                           qstring_from_str(bs->backing_file));
1868             }
1869 
1870             qdict_put_obj(bs_dict, "inserted", obj);
1871         }
1872         qlist_append_obj(bs_list, bs_obj);
1873     }
1874 
1875     *ret_data = QOBJECT(bs_list);
1876 }
1877 
1878 static void bdrv_stats_iter(QObject *data, void *opaque)
1879 {
1880     QDict *qdict;
1881     Monitor *mon = opaque;
1882 
1883     qdict = qobject_to_qdict(data);
1884     monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1885 
1886     qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1887     monitor_printf(mon, " rd_bytes=%" PRId64
1888                         " wr_bytes=%" PRId64
1889                         " rd_operations=%" PRId64
1890                         " wr_operations=%" PRId64
1891                         "\n",
1892                         qdict_get_int(qdict, "rd_bytes"),
1893                         qdict_get_int(qdict, "wr_bytes"),
1894                         qdict_get_int(qdict, "rd_operations"),
1895                         qdict_get_int(qdict, "wr_operations"));
1896 }
1897 
1898 void bdrv_stats_print(Monitor *mon, const QObject *data)
1899 {
1900     qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1901 }
1902 
1903 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1904 {
1905     QObject *res;
1906     QDict *dict;
1907 
1908     res = qobject_from_jsonf("{ 'stats': {"
1909                              "'rd_bytes': %" PRId64 ","
1910                              "'wr_bytes': %" PRId64 ","
1911                              "'rd_operations': %" PRId64 ","
1912                              "'wr_operations': %" PRId64 ","
1913                              "'wr_highest_offset': %" PRId64
1914                              "} }",
1915                              bs->rd_bytes, bs->wr_bytes,
1916                              bs->rd_ops, bs->wr_ops,
1917                              bs->wr_highest_sector *
1918                              (uint64_t)BDRV_SECTOR_SIZE);
1919     dict  = qobject_to_qdict(res);
1920 
1921     if (*bs->device_name) {
1922         qdict_put(dict, "device", qstring_from_str(bs->device_name));
1923     }
1924 
1925     if (bs->file) {
1926         QObject *parent = bdrv_info_stats_bs(bs->file);
1927         qdict_put_obj(dict, "parent", parent);
1928     }
1929 
1930     return res;
1931 }
1932 
1933 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1934 {
1935     QObject *obj;
1936     QList *devices;
1937     BlockDriverState *bs;
1938 
1939     devices = qlist_new();
1940 
1941     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1942         obj = bdrv_info_stats_bs(bs);
1943         qlist_append_obj(devices, obj);
1944     }
1945 
1946     *ret_data = QOBJECT(devices);
1947 }
1948 
1949 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1950 {
1951     if (bs->backing_hd && bs->backing_hd->encrypted)
1952         return bs->backing_file;
1953     else if (bs->encrypted)
1954         return bs->filename;
1955     else
1956         return NULL;
1957 }
1958 
1959 void bdrv_get_backing_filename(BlockDriverState *bs,
1960                                char *filename, int filename_size)
1961 {
1962     if (!bs->backing_file) {
1963         pstrcpy(filename, filename_size, "");
1964     } else {
1965         pstrcpy(filename, filename_size, bs->backing_file);
1966     }
1967 }
1968 
1969 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1970                           const uint8_t *buf, int nb_sectors)
1971 {
1972     BlockDriver *drv = bs->drv;
1973     if (!drv)
1974         return -ENOMEDIUM;
1975     if (!drv->bdrv_write_compressed)
1976         return -ENOTSUP;
1977     if (bdrv_check_request(bs, sector_num, nb_sectors))
1978         return -EIO;
1979 
1980     if (bs->dirty_bitmap) {
1981         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1982     }
1983 
1984     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1985 }
1986 
1987 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1988 {
1989     BlockDriver *drv = bs->drv;
1990     if (!drv)
1991         return -ENOMEDIUM;
1992     if (!drv->bdrv_get_info)
1993         return -ENOTSUP;
1994     memset(bdi, 0, sizeof(*bdi));
1995     return drv->bdrv_get_info(bs, bdi);
1996 }
1997 
1998 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1999                       int64_t pos, int size)
2000 {
2001     BlockDriver *drv = bs->drv;
2002     if (!drv)
2003         return -ENOMEDIUM;
2004     if (drv->bdrv_save_vmstate)
2005         return drv->bdrv_save_vmstate(bs, buf, pos, size);
2006     if (bs->file)
2007         return bdrv_save_vmstate(bs->file, buf, pos, size);
2008     return -ENOTSUP;
2009 }
2010 
2011 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2012                       int64_t pos, int size)
2013 {
2014     BlockDriver *drv = bs->drv;
2015     if (!drv)
2016         return -ENOMEDIUM;
2017     if (drv->bdrv_load_vmstate)
2018         return drv->bdrv_load_vmstate(bs, buf, pos, size);
2019     if (bs->file)
2020         return bdrv_load_vmstate(bs->file, buf, pos, size);
2021     return -ENOTSUP;
2022 }
2023 
2024 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2025 {
2026     BlockDriver *drv = bs->drv;
2027 
2028     if (!drv || !drv->bdrv_debug_event) {
2029         return;
2030     }
2031 
2032     return drv->bdrv_debug_event(bs, event);
2033 
2034 }
2035 
2036 /**************************************************************/
2037 /* handling of snapshots */
2038 
2039 int bdrv_can_snapshot(BlockDriverState *bs)
2040 {
2041     BlockDriver *drv = bs->drv;
2042     if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) {
2043         return 0;
2044     }
2045 
2046     if (!drv->bdrv_snapshot_create) {
2047         if (bs->file != NULL) {
2048             return bdrv_can_snapshot(bs->file);
2049         }
2050         return 0;
2051     }
2052 
2053     return 1;
2054 }
2055 
2056 int bdrv_is_snapshot(BlockDriverState *bs)
2057 {
2058     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2059 }
2060 
2061 BlockDriverState *bdrv_snapshots(void)
2062 {
2063     BlockDriverState *bs;
2064 
2065     if (bs_snapshots) {
2066         return bs_snapshots;
2067     }
2068 
2069     bs = NULL;
2070     while ((bs = bdrv_next(bs))) {
2071         if (bdrv_can_snapshot(bs)) {
2072             bs_snapshots = bs;
2073             return bs;
2074         }
2075     }
2076     return NULL;
2077 }
2078 
2079 int bdrv_snapshot_create(BlockDriverState *bs,
2080                          QEMUSnapshotInfo *sn_info)
2081 {
2082     BlockDriver *drv = bs->drv;
2083     if (!drv)
2084         return -ENOMEDIUM;
2085     if (drv->bdrv_snapshot_create)
2086         return drv->bdrv_snapshot_create(bs, sn_info);
2087     if (bs->file)
2088         return bdrv_snapshot_create(bs->file, sn_info);
2089     return -ENOTSUP;
2090 }
2091 
2092 int bdrv_snapshot_goto(BlockDriverState *bs,
2093                        const char *snapshot_id)
2094 {
2095     BlockDriver *drv = bs->drv;
2096     int ret, open_ret;
2097 
2098     if (!drv)
2099         return -ENOMEDIUM;
2100     if (drv->bdrv_snapshot_goto)
2101         return drv->bdrv_snapshot_goto(bs, snapshot_id);
2102 
2103     if (bs->file) {
2104         drv->bdrv_close(bs);
2105         ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2106         open_ret = drv->bdrv_open(bs, bs->open_flags);
2107         if (open_ret < 0) {
2108             bdrv_delete(bs->file);
2109             bs->drv = NULL;
2110             return open_ret;
2111         }
2112         return ret;
2113     }
2114 
2115     return -ENOTSUP;
2116 }
2117 
2118 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2119 {
2120     BlockDriver *drv = bs->drv;
2121     if (!drv)
2122         return -ENOMEDIUM;
2123     if (drv->bdrv_snapshot_delete)
2124         return drv->bdrv_snapshot_delete(bs, snapshot_id);
2125     if (bs->file)
2126         return bdrv_snapshot_delete(bs->file, snapshot_id);
2127     return -ENOTSUP;
2128 }
2129 
2130 int bdrv_snapshot_list(BlockDriverState *bs,
2131                        QEMUSnapshotInfo **psn_info)
2132 {
2133     BlockDriver *drv = bs->drv;
2134     if (!drv)
2135         return -ENOMEDIUM;
2136     if (drv->bdrv_snapshot_list)
2137         return drv->bdrv_snapshot_list(bs, psn_info);
2138     if (bs->file)
2139         return bdrv_snapshot_list(bs->file, psn_info);
2140     return -ENOTSUP;
2141 }
2142 
2143 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2144         const char *snapshot_name)
2145 {
2146     BlockDriver *drv = bs->drv;
2147     if (!drv) {
2148         return -ENOMEDIUM;
2149     }
2150     if (!bs->read_only) {
2151         return -EINVAL;
2152     }
2153     if (drv->bdrv_snapshot_load_tmp) {
2154         return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2155     }
2156     return -ENOTSUP;
2157 }
2158 
2159 #define NB_SUFFIXES 4
2160 
2161 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2162 {
2163     static const char suffixes[NB_SUFFIXES] = "KMGT";
2164     int64_t base;
2165     int i;
2166 
2167     if (size <= 999) {
2168         snprintf(buf, buf_size, "%" PRId64, size);
2169     } else {
2170         base = 1024;
2171         for(i = 0; i < NB_SUFFIXES; i++) {
2172             if (size < (10 * base)) {
2173                 snprintf(buf, buf_size, "%0.1f%c",
2174                          (double)size / base,
2175                          suffixes[i]);
2176                 break;
2177             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2178                 snprintf(buf, buf_size, "%" PRId64 "%c",
2179                          ((size + (base >> 1)) / base),
2180                          suffixes[i]);
2181                 break;
2182             }
2183             base = base * 1024;
2184         }
2185     }
2186     return buf;
2187 }
2188 
2189 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2190 {
2191     char buf1[128], date_buf[128], clock_buf[128];
2192 #ifdef _WIN32
2193     struct tm *ptm;
2194 #else
2195     struct tm tm;
2196 #endif
2197     time_t ti;
2198     int64_t secs;
2199 
2200     if (!sn) {
2201         snprintf(buf, buf_size,
2202                  "%-10s%-20s%7s%20s%15s",
2203                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2204     } else {
2205         ti = sn->date_sec;
2206 #ifdef _WIN32
2207         ptm = localtime(&ti);
2208         strftime(date_buf, sizeof(date_buf),
2209                  "%Y-%m-%d %H:%M:%S", ptm);
2210 #else
2211         localtime_r(&ti, &tm);
2212         strftime(date_buf, sizeof(date_buf),
2213                  "%Y-%m-%d %H:%M:%S", &tm);
2214 #endif
2215         secs = sn->vm_clock_nsec / 1000000000;
2216         snprintf(clock_buf, sizeof(clock_buf),
2217                  "%02d:%02d:%02d.%03d",
2218                  (int)(secs / 3600),
2219                  (int)((secs / 60) % 60),
2220                  (int)(secs % 60),
2221                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
2222         snprintf(buf, buf_size,
2223                  "%-10s%-20s%7s%20s%15s",
2224                  sn->id_str, sn->name,
2225                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2226                  date_buf,
2227                  clock_buf);
2228     }
2229     return buf;
2230 }
2231 
2232 
2233 /**************************************************************/
2234 /* async I/Os */
2235 
2236 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2237                                  QEMUIOVector *qiov, int nb_sectors,
2238                                  BlockDriverCompletionFunc *cb, void *opaque)
2239 {
2240     BlockDriver *drv = bs->drv;
2241     BlockDriverAIOCB *ret;
2242 
2243     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2244 
2245     if (!drv)
2246         return NULL;
2247     if (bdrv_check_request(bs, sector_num, nb_sectors))
2248         return NULL;
2249 
2250     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2251                               cb, opaque);
2252 
2253     if (ret) {
2254 	/* Update stats even though technically transfer has not happened. */
2255 	bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2256 	bs->rd_ops ++;
2257     }
2258 
2259     return ret;
2260 }
2261 
2262 typedef struct BlockCompleteData {
2263     BlockDriverCompletionFunc *cb;
2264     void *opaque;
2265     BlockDriverState *bs;
2266     int64_t sector_num;
2267     int nb_sectors;
2268 } BlockCompleteData;
2269 
2270 static void block_complete_cb(void *opaque, int ret)
2271 {
2272     BlockCompleteData *b = opaque;
2273 
2274     if (b->bs->dirty_bitmap) {
2275         set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2276     }
2277     b->cb(b->opaque, ret);
2278     g_free(b);
2279 }
2280 
2281 static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2282                                              int64_t sector_num,
2283                                              int nb_sectors,
2284                                              BlockDriverCompletionFunc *cb,
2285                                              void *opaque)
2286 {
2287     BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
2288 
2289     blkdata->bs = bs;
2290     blkdata->cb = cb;
2291     blkdata->opaque = opaque;
2292     blkdata->sector_num = sector_num;
2293     blkdata->nb_sectors = nb_sectors;
2294 
2295     return blkdata;
2296 }
2297 
2298 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2299                                   QEMUIOVector *qiov, int nb_sectors,
2300                                   BlockDriverCompletionFunc *cb, void *opaque)
2301 {
2302     BlockDriver *drv = bs->drv;
2303     BlockDriverAIOCB *ret;
2304     BlockCompleteData *blk_cb_data;
2305 
2306     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2307 
2308     if (!drv)
2309         return NULL;
2310     if (bs->read_only)
2311         return NULL;
2312     if (bdrv_check_request(bs, sector_num, nb_sectors))
2313         return NULL;
2314 
2315     if (bs->dirty_bitmap) {
2316         blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2317                                          opaque);
2318         cb = &block_complete_cb;
2319         opaque = blk_cb_data;
2320     }
2321 
2322     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2323                                cb, opaque);
2324 
2325     if (ret) {
2326         /* Update stats even though technically transfer has not happened. */
2327         bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2328         bs->wr_ops ++;
2329         if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2330             bs->wr_highest_sector = sector_num + nb_sectors - 1;
2331         }
2332     }
2333 
2334     return ret;
2335 }
2336 
2337 
2338 typedef struct MultiwriteCB {
2339     int error;
2340     int num_requests;
2341     int num_callbacks;
2342     struct {
2343         BlockDriverCompletionFunc *cb;
2344         void *opaque;
2345         QEMUIOVector *free_qiov;
2346         void *free_buf;
2347     } callbacks[];
2348 } MultiwriteCB;
2349 
2350 static void multiwrite_user_cb(MultiwriteCB *mcb)
2351 {
2352     int i;
2353 
2354     for (i = 0; i < mcb->num_callbacks; i++) {
2355         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2356         if (mcb->callbacks[i].free_qiov) {
2357             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2358         }
2359         g_free(mcb->callbacks[i].free_qiov);
2360         qemu_vfree(mcb->callbacks[i].free_buf);
2361     }
2362 }
2363 
2364 static void multiwrite_cb(void *opaque, int ret)
2365 {
2366     MultiwriteCB *mcb = opaque;
2367 
2368     trace_multiwrite_cb(mcb, ret);
2369 
2370     if (ret < 0 && !mcb->error) {
2371         mcb->error = ret;
2372     }
2373 
2374     mcb->num_requests--;
2375     if (mcb->num_requests == 0) {
2376         multiwrite_user_cb(mcb);
2377         g_free(mcb);
2378     }
2379 }
2380 
2381 static int multiwrite_req_compare(const void *a, const void *b)
2382 {
2383     const BlockRequest *req1 = a, *req2 = b;
2384 
2385     /*
2386      * Note that we can't simply subtract req2->sector from req1->sector
2387      * here as that could overflow the return value.
2388      */
2389     if (req1->sector > req2->sector) {
2390         return 1;
2391     } else if (req1->sector < req2->sector) {
2392         return -1;
2393     } else {
2394         return 0;
2395     }
2396 }
2397 
2398 /*
2399  * Takes a bunch of requests and tries to merge them. Returns the number of
2400  * requests that remain after merging.
2401  */
2402 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2403     int num_reqs, MultiwriteCB *mcb)
2404 {
2405     int i, outidx;
2406 
2407     // Sort requests by start sector
2408     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2409 
2410     // Check if adjacent requests touch the same clusters. If so, combine them,
2411     // filling up gaps with zero sectors.
2412     outidx = 0;
2413     for (i = 1; i < num_reqs; i++) {
2414         int merge = 0;
2415         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2416 
2417         // This handles the cases that are valid for all block drivers, namely
2418         // exactly sequential writes and overlapping writes.
2419         if (reqs[i].sector <= oldreq_last) {
2420             merge = 1;
2421         }
2422 
2423         // The block driver may decide that it makes sense to combine requests
2424         // even if there is a gap of some sectors between them. In this case,
2425         // the gap is filled with zeros (therefore only applicable for yet
2426         // unused space in format like qcow2).
2427         if (!merge && bs->drv->bdrv_merge_requests) {
2428             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2429         }
2430 
2431         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2432             merge = 0;
2433         }
2434 
2435         if (merge) {
2436             size_t size;
2437             QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2438             qemu_iovec_init(qiov,
2439                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2440 
2441             // Add the first request to the merged one. If the requests are
2442             // overlapping, drop the last sectors of the first request.
2443             size = (reqs[i].sector - reqs[outidx].sector) << 9;
2444             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2445 
2446             // We might need to add some zeros between the two requests
2447             if (reqs[i].sector > oldreq_last) {
2448                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2449                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2450                 memset(buf, 0, zero_bytes);
2451                 qemu_iovec_add(qiov, buf, zero_bytes);
2452                 mcb->callbacks[i].free_buf = buf;
2453             }
2454 
2455             // Add the second request
2456             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2457 
2458             reqs[outidx].nb_sectors = qiov->size >> 9;
2459             reqs[outidx].qiov = qiov;
2460 
2461             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2462         } else {
2463             outidx++;
2464             reqs[outidx].sector     = reqs[i].sector;
2465             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2466             reqs[outidx].qiov       = reqs[i].qiov;
2467         }
2468     }
2469 
2470     return outidx + 1;
2471 }
2472 
2473 /*
2474  * Submit multiple AIO write requests at once.
2475  *
2476  * On success, the function returns 0 and all requests in the reqs array have
2477  * been submitted. In error case this function returns -1, and any of the
2478  * requests may or may not be submitted yet. In particular, this means that the
2479  * callback will be called for some of the requests, for others it won't. The
2480  * caller must check the error field of the BlockRequest to wait for the right
2481  * callbacks (if error != 0, no callback will be called).
2482  *
2483  * The implementation may modify the contents of the reqs array, e.g. to merge
2484  * requests. However, the fields opaque and error are left unmodified as they
2485  * are used to signal failure for a single request to the caller.
2486  */
2487 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2488 {
2489     BlockDriverAIOCB *acb;
2490     MultiwriteCB *mcb;
2491     int i;
2492 
2493     /* don't submit writes if we don't have a medium */
2494     if (bs->drv == NULL) {
2495         for (i = 0; i < num_reqs; i++) {
2496             reqs[i].error = -ENOMEDIUM;
2497         }
2498         return -1;
2499     }
2500 
2501     if (num_reqs == 0) {
2502         return 0;
2503     }
2504 
2505     // Create MultiwriteCB structure
2506     mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2507     mcb->num_requests = 0;
2508     mcb->num_callbacks = num_reqs;
2509 
2510     for (i = 0; i < num_reqs; i++) {
2511         mcb->callbacks[i].cb = reqs[i].cb;
2512         mcb->callbacks[i].opaque = reqs[i].opaque;
2513     }
2514 
2515     // Check for mergable requests
2516     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2517 
2518     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2519 
2520     /*
2521      * Run the aio requests. As soon as one request can't be submitted
2522      * successfully, fail all requests that are not yet submitted (we must
2523      * return failure for all requests anyway)
2524      *
2525      * num_requests cannot be set to the right value immediately: If
2526      * bdrv_aio_writev fails for some request, num_requests would be too high
2527      * and therefore multiwrite_cb() would never recognize the multiwrite
2528      * request as completed. We also cannot use the loop variable i to set it
2529      * when the first request fails because the callback may already have been
2530      * called for previously submitted requests. Thus, num_requests must be
2531      * incremented for each request that is submitted.
2532      *
2533      * The problem that callbacks may be called early also means that we need
2534      * to take care that num_requests doesn't become 0 before all requests are
2535      * submitted - multiwrite_cb() would consider the multiwrite request
2536      * completed. A dummy request that is "completed" by a manual call to
2537      * multiwrite_cb() takes care of this.
2538      */
2539     mcb->num_requests = 1;
2540 
2541     // Run the aio requests
2542     for (i = 0; i < num_reqs; i++) {
2543         mcb->num_requests++;
2544         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2545             reqs[i].nb_sectors, multiwrite_cb, mcb);
2546 
2547         if (acb == NULL) {
2548             // We can only fail the whole thing if no request has been
2549             // submitted yet. Otherwise we'll wait for the submitted AIOs to
2550             // complete and report the error in the callback.
2551             if (i == 0) {
2552                 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2553                 goto fail;
2554             } else {
2555                 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2556                 multiwrite_cb(mcb, -EIO);
2557                 break;
2558             }
2559         }
2560     }
2561 
2562     /* Complete the dummy request */
2563     multiwrite_cb(mcb, 0);
2564 
2565     return 0;
2566 
2567 fail:
2568     for (i = 0; i < mcb->num_callbacks; i++) {
2569         reqs[i].error = -EIO;
2570     }
2571     g_free(mcb);
2572     return -1;
2573 }
2574 
2575 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2576         BlockDriverCompletionFunc *cb, void *opaque)
2577 {
2578     BlockDriver *drv = bs->drv;
2579 
2580     trace_bdrv_aio_flush(bs, opaque);
2581 
2582     if (bs->open_flags & BDRV_O_NO_FLUSH) {
2583         return bdrv_aio_noop_em(bs, cb, opaque);
2584     }
2585 
2586     if (!drv)
2587         return NULL;
2588     return drv->bdrv_aio_flush(bs, cb, opaque);
2589 }
2590 
2591 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2592 {
2593     acb->pool->cancel(acb);
2594 }
2595 
2596 
2597 /**************************************************************/
2598 /* async block device emulation */
2599 
2600 typedef struct BlockDriverAIOCBSync {
2601     BlockDriverAIOCB common;
2602     QEMUBH *bh;
2603     int ret;
2604     /* vector translation state */
2605     QEMUIOVector *qiov;
2606     uint8_t *bounce;
2607     int is_write;
2608 } BlockDriverAIOCBSync;
2609 
2610 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2611 {
2612     BlockDriverAIOCBSync *acb =
2613         container_of(blockacb, BlockDriverAIOCBSync, common);
2614     qemu_bh_delete(acb->bh);
2615     acb->bh = NULL;
2616     qemu_aio_release(acb);
2617 }
2618 
2619 static AIOPool bdrv_em_aio_pool = {
2620     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2621     .cancel             = bdrv_aio_cancel_em,
2622 };
2623 
2624 static void bdrv_aio_bh_cb(void *opaque)
2625 {
2626     BlockDriverAIOCBSync *acb = opaque;
2627 
2628     if (!acb->is_write)
2629         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2630     qemu_vfree(acb->bounce);
2631     acb->common.cb(acb->common.opaque, acb->ret);
2632     qemu_bh_delete(acb->bh);
2633     acb->bh = NULL;
2634     qemu_aio_release(acb);
2635 }
2636 
2637 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2638                                             int64_t sector_num,
2639                                             QEMUIOVector *qiov,
2640                                             int nb_sectors,
2641                                             BlockDriverCompletionFunc *cb,
2642                                             void *opaque,
2643                                             int is_write)
2644 
2645 {
2646     BlockDriverAIOCBSync *acb;
2647 
2648     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2649     acb->is_write = is_write;
2650     acb->qiov = qiov;
2651     acb->bounce = qemu_blockalign(bs, qiov->size);
2652 
2653     if (!acb->bh)
2654         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2655 
2656     if (is_write) {
2657         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2658         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2659     } else {
2660         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2661     }
2662 
2663     qemu_bh_schedule(acb->bh);
2664 
2665     return &acb->common;
2666 }
2667 
2668 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2669         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2670         BlockDriverCompletionFunc *cb, void *opaque)
2671 {
2672     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2673 }
2674 
2675 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2676         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2677         BlockDriverCompletionFunc *cb, void *opaque)
2678 {
2679     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2680 }
2681 
2682 
2683 typedef struct BlockDriverAIOCBCoroutine {
2684     BlockDriverAIOCB common;
2685     BlockRequest req;
2686     bool is_write;
2687     QEMUBH* bh;
2688 } BlockDriverAIOCBCoroutine;
2689 
2690 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2691 {
2692     qemu_aio_flush();
2693 }
2694 
2695 static AIOPool bdrv_em_co_aio_pool = {
2696     .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
2697     .cancel             = bdrv_aio_co_cancel_em,
2698 };
2699 
2700 static void bdrv_co_rw_bh(void *opaque)
2701 {
2702     BlockDriverAIOCBCoroutine *acb = opaque;
2703 
2704     acb->common.cb(acb->common.opaque, acb->req.error);
2705     qemu_bh_delete(acb->bh);
2706     qemu_aio_release(acb);
2707 }
2708 
2709 static void coroutine_fn bdrv_co_rw(void *opaque)
2710 {
2711     BlockDriverAIOCBCoroutine *acb = opaque;
2712     BlockDriverState *bs = acb->common.bs;
2713 
2714     if (!acb->is_write) {
2715         acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2716             acb->req.nb_sectors, acb->req.qiov);
2717     } else {
2718         acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2719             acb->req.nb_sectors, acb->req.qiov);
2720     }
2721 
2722     acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2723     qemu_bh_schedule(acb->bh);
2724 }
2725 
2726 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2727                                                int64_t sector_num,
2728                                                QEMUIOVector *qiov,
2729                                                int nb_sectors,
2730                                                BlockDriverCompletionFunc *cb,
2731                                                void *opaque,
2732                                                bool is_write)
2733 {
2734     Coroutine *co;
2735     BlockDriverAIOCBCoroutine *acb;
2736 
2737     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2738     acb->req.sector = sector_num;
2739     acb->req.nb_sectors = nb_sectors;
2740     acb->req.qiov = qiov;
2741     acb->is_write = is_write;
2742 
2743     co = qemu_coroutine_create(bdrv_co_rw);
2744     qemu_coroutine_enter(co, acb);
2745 
2746     return &acb->common;
2747 }
2748 
2749 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2750         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2751         BlockDriverCompletionFunc *cb, void *opaque)
2752 {
2753     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2754                                  false);
2755 }
2756 
2757 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2758         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2759         BlockDriverCompletionFunc *cb, void *opaque)
2760 {
2761     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2762                                  true);
2763 }
2764 
2765 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2766         BlockDriverCompletionFunc *cb, void *opaque)
2767 {
2768     BlockDriverAIOCBSync *acb;
2769 
2770     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2771     acb->is_write = 1; /* don't bounce in the completion hadler */
2772     acb->qiov = NULL;
2773     acb->bounce = NULL;
2774     acb->ret = 0;
2775 
2776     if (!acb->bh)
2777         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2778 
2779     bdrv_flush(bs);
2780     qemu_bh_schedule(acb->bh);
2781     return &acb->common;
2782 }
2783 
2784 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2785         BlockDriverCompletionFunc *cb, void *opaque)
2786 {
2787     BlockDriverAIOCBSync *acb;
2788 
2789     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2790     acb->is_write = 1; /* don't bounce in the completion handler */
2791     acb->qiov = NULL;
2792     acb->bounce = NULL;
2793     acb->ret = 0;
2794 
2795     if (!acb->bh) {
2796         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2797     }
2798 
2799     qemu_bh_schedule(acb->bh);
2800     return &acb->common;
2801 }
2802 
2803 /**************************************************************/
2804 /* sync block device emulation */
2805 
2806 static void bdrv_rw_em_cb(void *opaque, int ret)
2807 {
2808     *(int *)opaque = ret;
2809 }
2810 
2811 #define NOT_DONE 0x7fffffff
2812 
2813 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2814                         uint8_t *buf, int nb_sectors)
2815 {
2816     int async_ret;
2817     BlockDriverAIOCB *acb;
2818     struct iovec iov;
2819     QEMUIOVector qiov;
2820 
2821     async_ret = NOT_DONE;
2822     iov.iov_base = (void *)buf;
2823     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2824     qemu_iovec_init_external(&qiov, &iov, 1);
2825     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2826         bdrv_rw_em_cb, &async_ret);
2827     if (acb == NULL) {
2828         async_ret = -1;
2829         goto fail;
2830     }
2831 
2832     while (async_ret == NOT_DONE) {
2833         qemu_aio_wait();
2834     }
2835 
2836 
2837 fail:
2838     return async_ret;
2839 }
2840 
2841 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2842                          const uint8_t *buf, int nb_sectors)
2843 {
2844     int async_ret;
2845     BlockDriverAIOCB *acb;
2846     struct iovec iov;
2847     QEMUIOVector qiov;
2848 
2849     async_ret = NOT_DONE;
2850     iov.iov_base = (void *)buf;
2851     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2852     qemu_iovec_init_external(&qiov, &iov, 1);
2853     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2854         bdrv_rw_em_cb, &async_ret);
2855     if (acb == NULL) {
2856         async_ret = -1;
2857         goto fail;
2858     }
2859     while (async_ret == NOT_DONE) {
2860         qemu_aio_wait();
2861     }
2862 
2863 fail:
2864     return async_ret;
2865 }
2866 
2867 void bdrv_init(void)
2868 {
2869     module_call_init(MODULE_INIT_BLOCK);
2870 }
2871 
2872 void bdrv_init_with_whitelist(void)
2873 {
2874     use_bdrv_whitelist = 1;
2875     bdrv_init();
2876 }
2877 
2878 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2879                    BlockDriverCompletionFunc *cb, void *opaque)
2880 {
2881     BlockDriverAIOCB *acb;
2882 
2883     if (pool->free_aiocb) {
2884         acb = pool->free_aiocb;
2885         pool->free_aiocb = acb->next;
2886     } else {
2887         acb = g_malloc0(pool->aiocb_size);
2888         acb->pool = pool;
2889     }
2890     acb->bs = bs;
2891     acb->cb = cb;
2892     acb->opaque = opaque;
2893     return acb;
2894 }
2895 
2896 void qemu_aio_release(void *p)
2897 {
2898     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2899     AIOPool *pool = acb->pool;
2900     acb->next = pool->free_aiocb;
2901     pool->free_aiocb = acb;
2902 }
2903 
2904 /**************************************************************/
2905 /* Coroutine block device emulation */
2906 
2907 typedef struct CoroutineIOCompletion {
2908     Coroutine *coroutine;
2909     int ret;
2910 } CoroutineIOCompletion;
2911 
2912 static void bdrv_co_io_em_complete(void *opaque, int ret)
2913 {
2914     CoroutineIOCompletion *co = opaque;
2915 
2916     co->ret = ret;
2917     qemu_coroutine_enter(co->coroutine, NULL);
2918 }
2919 
2920 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2921                                       int nb_sectors, QEMUIOVector *iov,
2922                                       bool is_write)
2923 {
2924     CoroutineIOCompletion co = {
2925         .coroutine = qemu_coroutine_self(),
2926     };
2927     BlockDriverAIOCB *acb;
2928 
2929     if (is_write) {
2930         acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2931                               bdrv_co_io_em_complete, &co);
2932     } else {
2933         acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2934                              bdrv_co_io_em_complete, &co);
2935     }
2936 
2937     trace_bdrv_co_io(is_write, acb);
2938     if (!acb) {
2939         return -EIO;
2940     }
2941     qemu_coroutine_yield();
2942 
2943     return co.ret;
2944 }
2945 
2946 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2947                                          int64_t sector_num, int nb_sectors,
2948                                          QEMUIOVector *iov)
2949 {
2950     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2951 }
2952 
2953 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2954                                          int64_t sector_num, int nb_sectors,
2955                                          QEMUIOVector *iov)
2956 {
2957     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2958 }
2959 
2960 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
2961 {
2962     CoroutineIOCompletion co = {
2963         .coroutine = qemu_coroutine_self(),
2964     };
2965     BlockDriverAIOCB *acb;
2966 
2967     acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2968     if (!acb) {
2969         return -EIO;
2970     }
2971     qemu_coroutine_yield();
2972     return co.ret;
2973 }
2974 
2975 /**************************************************************/
2976 /* removable device support */
2977 
2978 /**
2979  * Return TRUE if the media is present
2980  */
2981 int bdrv_is_inserted(BlockDriverState *bs)
2982 {
2983     BlockDriver *drv = bs->drv;
2984     int ret;
2985     if (!drv)
2986         return 0;
2987     if (!drv->bdrv_is_inserted)
2988         return !bs->tray_open;
2989     ret = drv->bdrv_is_inserted(bs);
2990     return ret;
2991 }
2992 
2993 /**
2994  * Return TRUE if the media changed since the last call to this
2995  * function. It is currently only used for floppy disks
2996  */
2997 int bdrv_media_changed(BlockDriverState *bs)
2998 {
2999     BlockDriver *drv = bs->drv;
3000     int ret;
3001 
3002     if (!drv || !drv->bdrv_media_changed)
3003         ret = -ENOTSUP;
3004     else
3005         ret = drv->bdrv_media_changed(bs);
3006     if (ret == -ENOTSUP)
3007         ret = bs->media_changed;
3008     bs->media_changed = 0;
3009     return ret;
3010 }
3011 
3012 /**
3013  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3014  */
3015 int bdrv_eject(BlockDriverState *bs, int eject_flag)
3016 {
3017     BlockDriver *drv = bs->drv;
3018 
3019     if (eject_flag && bs->locked) {
3020         return -EBUSY;
3021     }
3022 
3023     if (drv && drv->bdrv_eject) {
3024         drv->bdrv_eject(bs, eject_flag);
3025     }
3026     bs->tray_open = eject_flag;
3027     return 0;
3028 }
3029 
3030 int bdrv_is_locked(BlockDriverState *bs)
3031 {
3032     return bs->locked;
3033 }
3034 
3035 /**
3036  * Lock or unlock the media (if it is locked, the user won't be able
3037  * to eject it manually).
3038  */
3039 void bdrv_set_locked(BlockDriverState *bs, int locked)
3040 {
3041     BlockDriver *drv = bs->drv;
3042 
3043     trace_bdrv_set_locked(bs, locked);
3044 
3045     bs->locked = locked;
3046     if (drv && drv->bdrv_set_locked) {
3047         drv->bdrv_set_locked(bs, locked);
3048     }
3049 }
3050 
3051 /* needed for generic scsi interface */
3052 
3053 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3054 {
3055     BlockDriver *drv = bs->drv;
3056 
3057     if (drv && drv->bdrv_ioctl)
3058         return drv->bdrv_ioctl(bs, req, buf);
3059     return -ENOTSUP;
3060 }
3061 
3062 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3063         unsigned long int req, void *buf,
3064         BlockDriverCompletionFunc *cb, void *opaque)
3065 {
3066     BlockDriver *drv = bs->drv;
3067 
3068     if (drv && drv->bdrv_aio_ioctl)
3069         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3070     return NULL;
3071 }
3072 
3073 
3074 
3075 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3076 {
3077     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3078 }
3079 
3080 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3081 {
3082     int64_t bitmap_size;
3083 
3084     bs->dirty_count = 0;
3085     if (enable) {
3086         if (!bs->dirty_bitmap) {
3087             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3088                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3089             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3090 
3091             bs->dirty_bitmap = g_malloc0(bitmap_size);
3092         }
3093     } else {
3094         if (bs->dirty_bitmap) {
3095             g_free(bs->dirty_bitmap);
3096             bs->dirty_bitmap = NULL;
3097         }
3098     }
3099 }
3100 
3101 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3102 {
3103     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3104 
3105     if (bs->dirty_bitmap &&
3106         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3107         return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3108             (1UL << (chunk % (sizeof(unsigned long) * 8))));
3109     } else {
3110         return 0;
3111     }
3112 }
3113 
3114 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3115                       int nr_sectors)
3116 {
3117     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3118 }
3119 
3120 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3121 {
3122     return bs->dirty_count;
3123 }
3124 
3125 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3126 {
3127     assert(bs->in_use != in_use);
3128     bs->in_use = in_use;
3129 }
3130 
3131 int bdrv_in_use(BlockDriverState *bs)
3132 {
3133     return bs->in_use;
3134 }
3135 
3136 int bdrv_img_create(const char *filename, const char *fmt,
3137                     const char *base_filename, const char *base_fmt,
3138                     char *options, uint64_t img_size, int flags)
3139 {
3140     QEMUOptionParameter *param = NULL, *create_options = NULL;
3141     QEMUOptionParameter *backing_fmt, *backing_file, *size;
3142     BlockDriverState *bs = NULL;
3143     BlockDriver *drv, *proto_drv;
3144     BlockDriver *backing_drv = NULL;
3145     int ret = 0;
3146 
3147     /* Find driver and parse its options */
3148     drv = bdrv_find_format(fmt);
3149     if (!drv) {
3150         error_report("Unknown file format '%s'", fmt);
3151         ret = -EINVAL;
3152         goto out;
3153     }
3154 
3155     proto_drv = bdrv_find_protocol(filename);
3156     if (!proto_drv) {
3157         error_report("Unknown protocol '%s'", filename);
3158         ret = -EINVAL;
3159         goto out;
3160     }
3161 
3162     create_options = append_option_parameters(create_options,
3163                                               drv->create_options);
3164     create_options = append_option_parameters(create_options,
3165                                               proto_drv->create_options);
3166 
3167     /* Create parameter list with default values */
3168     param = parse_option_parameters("", create_options, param);
3169 
3170     set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3171 
3172     /* Parse -o options */
3173     if (options) {
3174         param = parse_option_parameters(options, create_options, param);
3175         if (param == NULL) {
3176             error_report("Invalid options for file format '%s'.", fmt);
3177             ret = -EINVAL;
3178             goto out;
3179         }
3180     }
3181 
3182     if (base_filename) {
3183         if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3184                                  base_filename)) {
3185             error_report("Backing file not supported for file format '%s'",
3186                          fmt);
3187             ret = -EINVAL;
3188             goto out;
3189         }
3190     }
3191 
3192     if (base_fmt) {
3193         if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3194             error_report("Backing file format not supported for file "
3195                          "format '%s'", fmt);
3196             ret = -EINVAL;
3197             goto out;
3198         }
3199     }
3200 
3201     backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3202     if (backing_file && backing_file->value.s) {
3203         if (!strcmp(filename, backing_file->value.s)) {
3204             error_report("Error: Trying to create an image with the "
3205                          "same filename as the backing file");
3206             ret = -EINVAL;
3207             goto out;
3208         }
3209     }
3210 
3211     backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3212     if (backing_fmt && backing_fmt->value.s) {
3213         backing_drv = bdrv_find_format(backing_fmt->value.s);
3214         if (!backing_drv) {
3215             error_report("Unknown backing file format '%s'",
3216                          backing_fmt->value.s);
3217             ret = -EINVAL;
3218             goto out;
3219         }
3220     }
3221 
3222     // The size for the image must always be specified, with one exception:
3223     // If we are using a backing file, we can obtain the size from there
3224     size = get_option_parameter(param, BLOCK_OPT_SIZE);
3225     if (size && size->value.n == -1) {
3226         if (backing_file && backing_file->value.s) {
3227             uint64_t size;
3228             char buf[32];
3229 
3230             bs = bdrv_new("");
3231 
3232             ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3233             if (ret < 0) {
3234                 error_report("Could not open '%s'", backing_file->value.s);
3235                 goto out;
3236             }
3237             bdrv_get_geometry(bs, &size);
3238             size *= 512;
3239 
3240             snprintf(buf, sizeof(buf), "%" PRId64, size);
3241             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3242         } else {
3243             error_report("Image creation needs a size parameter");
3244             ret = -EINVAL;
3245             goto out;
3246         }
3247     }
3248 
3249     printf("Formatting '%s', fmt=%s ", filename, fmt);
3250     print_option_parameters(param);
3251     puts("");
3252 
3253     ret = bdrv_create(drv, filename, param);
3254 
3255     if (ret < 0) {
3256         if (ret == -ENOTSUP) {
3257             error_report("Formatting or formatting option not supported for "
3258                          "file format '%s'", fmt);
3259         } else if (ret == -EFBIG) {
3260             error_report("The image size is too large for file format '%s'",
3261                          fmt);
3262         } else {
3263             error_report("%s: error while creating %s: %s", filename, fmt,
3264                          strerror(-ret));
3265         }
3266     }
3267 
3268 out:
3269     free_option_parameters(create_options);
3270     free_option_parameters(param);
3271 
3272     if (bs) {
3273         bdrv_delete(bs);
3274     }
3275 
3276     return ret;
3277 }
3278