xref: /openbmc/qemu/block.c (revision f9f05dc58c50d19ad762e6c1ce6b5def9814a4ed)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
32 
33 #ifdef CONFIG_BSD
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
42 
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
46 
47 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
48         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
49         BlockDriverCompletionFunc *cb, void *opaque);
50 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
51         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52         BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
54         BlockDriverCompletionFunc *cb, void *opaque);
55 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
56         BlockDriverCompletionFunc *cb, void *opaque);
57 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
58                         uint8_t *buf, int nb_sectors);
59 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
60                          const uint8_t *buf, int nb_sectors);
61 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
62         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
63         BlockDriverCompletionFunc *cb, void *opaque);
64 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
65         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66         BlockDriverCompletionFunc *cb, void *opaque);
67 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68                                          int64_t sector_num, int nb_sectors,
69                                          QEMUIOVector *iov);
70 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71                                          int64_t sector_num, int nb_sectors,
72                                          QEMUIOVector *iov);
73 
74 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
75     QTAILQ_HEAD_INITIALIZER(bdrv_states);
76 
77 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
78     QLIST_HEAD_INITIALIZER(bdrv_drivers);
79 
80 /* The device to use for VM snapshots */
81 static BlockDriverState *bs_snapshots;
82 
83 /* If non-zero, use only whitelisted block drivers */
84 static int use_bdrv_whitelist;
85 
86 #ifdef _WIN32
87 static int is_windows_drive_prefix(const char *filename)
88 {
89     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
90              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
91             filename[1] == ':');
92 }
93 
94 int is_windows_drive(const char *filename)
95 {
96     if (is_windows_drive_prefix(filename) &&
97         filename[2] == '\0')
98         return 1;
99     if (strstart(filename, "\\\\.\\", NULL) ||
100         strstart(filename, "//./", NULL))
101         return 1;
102     return 0;
103 }
104 #endif
105 
106 /* check if the path starts with "<protocol>:" */
107 static int path_has_protocol(const char *path)
108 {
109 #ifdef _WIN32
110     if (is_windows_drive(path) ||
111         is_windows_drive_prefix(path)) {
112         return 0;
113     }
114 #endif
115 
116     return strchr(path, ':') != NULL;
117 }
118 
119 int path_is_absolute(const char *path)
120 {
121     const char *p;
122 #ifdef _WIN32
123     /* specific case for names like: "\\.\d:" */
124     if (*path == '/' || *path == '\\')
125         return 1;
126 #endif
127     p = strchr(path, ':');
128     if (p)
129         p++;
130     else
131         p = path;
132 #ifdef _WIN32
133     return (*p == '/' || *p == '\\');
134 #else
135     return (*p == '/');
136 #endif
137 }
138 
139 /* if filename is absolute, just copy it to dest. Otherwise, build a
140    path to it by considering it is relative to base_path. URL are
141    supported. */
142 void path_combine(char *dest, int dest_size,
143                   const char *base_path,
144                   const char *filename)
145 {
146     const char *p, *p1;
147     int len;
148 
149     if (dest_size <= 0)
150         return;
151     if (path_is_absolute(filename)) {
152         pstrcpy(dest, dest_size, filename);
153     } else {
154         p = strchr(base_path, ':');
155         if (p)
156             p++;
157         else
158             p = base_path;
159         p1 = strrchr(base_path, '/');
160 #ifdef _WIN32
161         {
162             const char *p2;
163             p2 = strrchr(base_path, '\\');
164             if (!p1 || p2 > p1)
165                 p1 = p2;
166         }
167 #endif
168         if (p1)
169             p1++;
170         else
171             p1 = base_path;
172         if (p1 > p)
173             p = p1;
174         len = p - base_path;
175         if (len > dest_size - 1)
176             len = dest_size - 1;
177         memcpy(dest, base_path, len);
178         dest[len] = '\0';
179         pstrcat(dest, dest_size, filename);
180     }
181 }
182 
183 void bdrv_register(BlockDriver *bdrv)
184 {
185     if (bdrv->bdrv_co_readv) {
186         /* Emulate AIO by coroutines, and sync by AIO */
187         bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
188         bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
189         bdrv->bdrv_read = bdrv_read_em;
190         bdrv->bdrv_write = bdrv_write_em;
191      } else {
192         bdrv->bdrv_co_readv = bdrv_co_readv_em;
193         bdrv->bdrv_co_writev = bdrv_co_writev_em;
194 
195         if (!bdrv->bdrv_aio_readv) {
196             /* add AIO emulation layer */
197             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
198             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
199         } else if (!bdrv->bdrv_read) {
200             /* add synchronous IO emulation layer */
201             bdrv->bdrv_read = bdrv_read_em;
202             bdrv->bdrv_write = bdrv_write_em;
203         }
204     }
205 
206     if (!bdrv->bdrv_aio_flush)
207         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
208 
209     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
210 }
211 
212 /* create a new block device (by default it is empty) */
213 BlockDriverState *bdrv_new(const char *device_name)
214 {
215     BlockDriverState *bs;
216 
217     bs = qemu_mallocz(sizeof(BlockDriverState));
218     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
219     if (device_name[0] != '\0') {
220         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
221     }
222     return bs;
223 }
224 
225 BlockDriver *bdrv_find_format(const char *format_name)
226 {
227     BlockDriver *drv1;
228     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
229         if (!strcmp(drv1->format_name, format_name)) {
230             return drv1;
231         }
232     }
233     return NULL;
234 }
235 
236 static int bdrv_is_whitelisted(BlockDriver *drv)
237 {
238     static const char *whitelist[] = {
239         CONFIG_BDRV_WHITELIST
240     };
241     const char **p;
242 
243     if (!whitelist[0])
244         return 1;               /* no whitelist, anything goes */
245 
246     for (p = whitelist; *p; p++) {
247         if (!strcmp(drv->format_name, *p)) {
248             return 1;
249         }
250     }
251     return 0;
252 }
253 
254 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
255 {
256     BlockDriver *drv = bdrv_find_format(format_name);
257     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
258 }
259 
260 int bdrv_create(BlockDriver *drv, const char* filename,
261     QEMUOptionParameter *options)
262 {
263     if (!drv->bdrv_create)
264         return -ENOTSUP;
265 
266     return drv->bdrv_create(filename, options);
267 }
268 
269 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
270 {
271     BlockDriver *drv;
272 
273     drv = bdrv_find_protocol(filename);
274     if (drv == NULL) {
275         return -ENOENT;
276     }
277 
278     return bdrv_create(drv, filename, options);
279 }
280 
281 #ifdef _WIN32
282 void get_tmp_filename(char *filename, int size)
283 {
284     char temp_dir[MAX_PATH];
285 
286     GetTempPath(MAX_PATH, temp_dir);
287     GetTempFileName(temp_dir, "qem", 0, filename);
288 }
289 #else
290 void get_tmp_filename(char *filename, int size)
291 {
292     int fd;
293     const char *tmpdir;
294     /* XXX: race condition possible */
295     tmpdir = getenv("TMPDIR");
296     if (!tmpdir)
297         tmpdir = "/tmp";
298     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
299     fd = mkstemp(filename);
300     close(fd);
301 }
302 #endif
303 
304 /*
305  * Detect host devices. By convention, /dev/cdrom[N] is always
306  * recognized as a host CDROM.
307  */
308 static BlockDriver *find_hdev_driver(const char *filename)
309 {
310     int score_max = 0, score;
311     BlockDriver *drv = NULL, *d;
312 
313     QLIST_FOREACH(d, &bdrv_drivers, list) {
314         if (d->bdrv_probe_device) {
315             score = d->bdrv_probe_device(filename);
316             if (score > score_max) {
317                 score_max = score;
318                 drv = d;
319             }
320         }
321     }
322 
323     return drv;
324 }
325 
326 BlockDriver *bdrv_find_protocol(const char *filename)
327 {
328     BlockDriver *drv1;
329     char protocol[128];
330     int len;
331     const char *p;
332 
333     /* TODO Drivers without bdrv_file_open must be specified explicitly */
334 
335     /*
336      * XXX(hch): we really should not let host device detection
337      * override an explicit protocol specification, but moving this
338      * later breaks access to device names with colons in them.
339      * Thanks to the brain-dead persistent naming schemes on udev-
340      * based Linux systems those actually are quite common.
341      */
342     drv1 = find_hdev_driver(filename);
343     if (drv1) {
344         return drv1;
345     }
346 
347     if (!path_has_protocol(filename)) {
348         return bdrv_find_format("file");
349     }
350     p = strchr(filename, ':');
351     assert(p != NULL);
352     len = p - filename;
353     if (len > sizeof(protocol) - 1)
354         len = sizeof(protocol) - 1;
355     memcpy(protocol, filename, len);
356     protocol[len] = '\0';
357     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
358         if (drv1->protocol_name &&
359             !strcmp(drv1->protocol_name, protocol)) {
360             return drv1;
361         }
362     }
363     return NULL;
364 }
365 
366 static int find_image_format(const char *filename, BlockDriver **pdrv)
367 {
368     int ret, score, score_max;
369     BlockDriver *drv1, *drv;
370     uint8_t buf[2048];
371     BlockDriverState *bs;
372 
373     ret = bdrv_file_open(&bs, filename, 0);
374     if (ret < 0) {
375         *pdrv = NULL;
376         return ret;
377     }
378 
379     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
380     if (bs->sg || !bdrv_is_inserted(bs)) {
381         bdrv_delete(bs);
382         drv = bdrv_find_format("raw");
383         if (!drv) {
384             ret = -ENOENT;
385         }
386         *pdrv = drv;
387         return ret;
388     }
389 
390     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
391     bdrv_delete(bs);
392     if (ret < 0) {
393         *pdrv = NULL;
394         return ret;
395     }
396 
397     score_max = 0;
398     drv = NULL;
399     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
400         if (drv1->bdrv_probe) {
401             score = drv1->bdrv_probe(buf, ret, filename);
402             if (score > score_max) {
403                 score_max = score;
404                 drv = drv1;
405             }
406         }
407     }
408     if (!drv) {
409         ret = -ENOENT;
410     }
411     *pdrv = drv;
412     return ret;
413 }
414 
415 /**
416  * Set the current 'total_sectors' value
417  */
418 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
419 {
420     BlockDriver *drv = bs->drv;
421 
422     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
423     if (bs->sg)
424         return 0;
425 
426     /* query actual device if possible, otherwise just trust the hint */
427     if (drv->bdrv_getlength) {
428         int64_t length = drv->bdrv_getlength(bs);
429         if (length < 0) {
430             return length;
431         }
432         hint = length >> BDRV_SECTOR_BITS;
433     }
434 
435     bs->total_sectors = hint;
436     return 0;
437 }
438 
439 /*
440  * Common part for opening disk images and files
441  */
442 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
443     int flags, BlockDriver *drv)
444 {
445     int ret, open_flags;
446 
447     assert(drv != NULL);
448 
449     bs->file = NULL;
450     bs->total_sectors = 0;
451     bs->encrypted = 0;
452     bs->valid_key = 0;
453     bs->open_flags = flags;
454     /* buffer_alignment defaulted to 512, drivers can change this value */
455     bs->buffer_alignment = 512;
456 
457     pstrcpy(bs->filename, sizeof(bs->filename), filename);
458 
459     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
460         return -ENOTSUP;
461     }
462 
463     bs->drv = drv;
464     bs->opaque = qemu_mallocz(drv->instance_size);
465 
466     if (flags & BDRV_O_CACHE_WB)
467         bs->enable_write_cache = 1;
468 
469     /*
470      * Clear flags that are internal to the block layer before opening the
471      * image.
472      */
473     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
474 
475     /*
476      * Snapshots should be writable.
477      */
478     if (bs->is_temporary) {
479         open_flags |= BDRV_O_RDWR;
480     }
481 
482     /* Open the image, either directly or using a protocol */
483     if (drv->bdrv_file_open) {
484         ret = drv->bdrv_file_open(bs, filename, open_flags);
485     } else {
486         ret = bdrv_file_open(&bs->file, filename, open_flags);
487         if (ret >= 0) {
488             ret = drv->bdrv_open(bs, open_flags);
489         }
490     }
491 
492     if (ret < 0) {
493         goto free_and_fail;
494     }
495 
496     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
497 
498     ret = refresh_total_sectors(bs, bs->total_sectors);
499     if (ret < 0) {
500         goto free_and_fail;
501     }
502 
503 #ifndef _WIN32
504     if (bs->is_temporary) {
505         unlink(filename);
506     }
507 #endif
508     return 0;
509 
510 free_and_fail:
511     if (bs->file) {
512         bdrv_delete(bs->file);
513         bs->file = NULL;
514     }
515     qemu_free(bs->opaque);
516     bs->opaque = NULL;
517     bs->drv = NULL;
518     return ret;
519 }
520 
521 /*
522  * Opens a file using a protocol (file, host_device, nbd, ...)
523  */
524 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
525 {
526     BlockDriverState *bs;
527     BlockDriver *drv;
528     int ret;
529 
530     drv = bdrv_find_protocol(filename);
531     if (!drv) {
532         return -ENOENT;
533     }
534 
535     bs = bdrv_new("");
536     ret = bdrv_open_common(bs, filename, flags, drv);
537     if (ret < 0) {
538         bdrv_delete(bs);
539         return ret;
540     }
541     bs->growable = 1;
542     *pbs = bs;
543     return 0;
544 }
545 
546 /*
547  * Opens a disk image (raw, qcow2, vmdk, ...)
548  */
549 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
550               BlockDriver *drv)
551 {
552     int ret;
553 
554     if (flags & BDRV_O_SNAPSHOT) {
555         BlockDriverState *bs1;
556         int64_t total_size;
557         int is_protocol = 0;
558         BlockDriver *bdrv_qcow2;
559         QEMUOptionParameter *options;
560         char tmp_filename[PATH_MAX];
561         char backing_filename[PATH_MAX];
562 
563         /* if snapshot, we create a temporary backing file and open it
564            instead of opening 'filename' directly */
565 
566         /* if there is a backing file, use it */
567         bs1 = bdrv_new("");
568         ret = bdrv_open(bs1, filename, 0, drv);
569         if (ret < 0) {
570             bdrv_delete(bs1);
571             return ret;
572         }
573         total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
574 
575         if (bs1->drv && bs1->drv->protocol_name)
576             is_protocol = 1;
577 
578         bdrv_delete(bs1);
579 
580         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
581 
582         /* Real path is meaningless for protocols */
583         if (is_protocol)
584             snprintf(backing_filename, sizeof(backing_filename),
585                      "%s", filename);
586         else if (!realpath(filename, backing_filename))
587             return -errno;
588 
589         bdrv_qcow2 = bdrv_find_format("qcow2");
590         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
591 
592         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
593         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
594         if (drv) {
595             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
596                 drv->format_name);
597         }
598 
599         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
600         free_option_parameters(options);
601         if (ret < 0) {
602             return ret;
603         }
604 
605         filename = tmp_filename;
606         drv = bdrv_qcow2;
607         bs->is_temporary = 1;
608     }
609 
610     /* Find the right image format driver */
611     if (!drv) {
612         ret = find_image_format(filename, &drv);
613     }
614 
615     if (!drv) {
616         goto unlink_and_fail;
617     }
618 
619     /* Open the image */
620     ret = bdrv_open_common(bs, filename, flags, drv);
621     if (ret < 0) {
622         goto unlink_and_fail;
623     }
624 
625     /* If there is a backing file, use it */
626     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
627         char backing_filename[PATH_MAX];
628         int back_flags;
629         BlockDriver *back_drv = NULL;
630 
631         bs->backing_hd = bdrv_new("");
632 
633         if (path_has_protocol(bs->backing_file)) {
634             pstrcpy(backing_filename, sizeof(backing_filename),
635                     bs->backing_file);
636         } else {
637             path_combine(backing_filename, sizeof(backing_filename),
638                          filename, bs->backing_file);
639         }
640 
641         if (bs->backing_format[0] != '\0') {
642             back_drv = bdrv_find_format(bs->backing_format);
643         }
644 
645         /* backing files always opened read-only */
646         back_flags =
647             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
648 
649         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
650         if (ret < 0) {
651             bdrv_close(bs);
652             return ret;
653         }
654         if (bs->is_temporary) {
655             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
656         } else {
657             /* base image inherits from "parent" */
658             bs->backing_hd->keep_read_only = bs->keep_read_only;
659         }
660     }
661 
662     if (!bdrv_key_required(bs)) {
663         /* call the change callback */
664         bs->media_changed = 1;
665         if (bs->change_cb)
666             bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
667     }
668 
669     return 0;
670 
671 unlink_and_fail:
672     if (bs->is_temporary) {
673         unlink(filename);
674     }
675     return ret;
676 }
677 
678 void bdrv_close(BlockDriverState *bs)
679 {
680     if (bs->drv) {
681         if (bs == bs_snapshots) {
682             bs_snapshots = NULL;
683         }
684         if (bs->backing_hd) {
685             bdrv_delete(bs->backing_hd);
686             bs->backing_hd = NULL;
687         }
688         bs->drv->bdrv_close(bs);
689         qemu_free(bs->opaque);
690 #ifdef _WIN32
691         if (bs->is_temporary) {
692             unlink(bs->filename);
693         }
694 #endif
695         bs->opaque = NULL;
696         bs->drv = NULL;
697 
698         if (bs->file != NULL) {
699             bdrv_close(bs->file);
700         }
701 
702         /* call the change callback */
703         bs->media_changed = 1;
704         if (bs->change_cb)
705             bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
706     }
707 }
708 
709 void bdrv_close_all(void)
710 {
711     BlockDriverState *bs;
712 
713     QTAILQ_FOREACH(bs, &bdrv_states, list) {
714         bdrv_close(bs);
715     }
716 }
717 
718 /* make a BlockDriverState anonymous by removing from bdrv_state list.
719    Also, NULL terminate the device_name to prevent double remove */
720 void bdrv_make_anon(BlockDriverState *bs)
721 {
722     if (bs->device_name[0] != '\0') {
723         QTAILQ_REMOVE(&bdrv_states, bs, list);
724     }
725     bs->device_name[0] = '\0';
726 }
727 
728 void bdrv_delete(BlockDriverState *bs)
729 {
730     assert(!bs->peer);
731 
732     /* remove from list, if necessary */
733     bdrv_make_anon(bs);
734 
735     bdrv_close(bs);
736     if (bs->file != NULL) {
737         bdrv_delete(bs->file);
738     }
739 
740     assert(bs != bs_snapshots);
741     qemu_free(bs);
742 }
743 
744 int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
745 {
746     if (bs->peer) {
747         return -EBUSY;
748     }
749     bs->peer = qdev;
750     return 0;
751 }
752 
753 void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
754 {
755     assert(bs->peer == qdev);
756     bs->peer = NULL;
757     bs->change_cb = NULL;
758     bs->change_opaque = NULL;
759 }
760 
761 DeviceState *bdrv_get_attached(BlockDriverState *bs)
762 {
763     return bs->peer;
764 }
765 
766 /*
767  * Run consistency checks on an image
768  *
769  * Returns 0 if the check could be completed (it doesn't mean that the image is
770  * free of errors) or -errno when an internal error occurred. The results of the
771  * check are stored in res.
772  */
773 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
774 {
775     if (bs->drv->bdrv_check == NULL) {
776         return -ENOTSUP;
777     }
778 
779     memset(res, 0, sizeof(*res));
780     return bs->drv->bdrv_check(bs, res);
781 }
782 
783 #define COMMIT_BUF_SECTORS 2048
784 
785 /* commit COW file into the raw image */
786 int bdrv_commit(BlockDriverState *bs)
787 {
788     BlockDriver *drv = bs->drv;
789     BlockDriver *backing_drv;
790     int64_t sector, total_sectors;
791     int n, ro, open_flags;
792     int ret = 0, rw_ret = 0;
793     uint8_t *buf;
794     char filename[1024];
795     BlockDriverState *bs_rw, *bs_ro;
796 
797     if (!drv)
798         return -ENOMEDIUM;
799 
800     if (!bs->backing_hd) {
801         return -ENOTSUP;
802     }
803 
804     if (bs->backing_hd->keep_read_only) {
805         return -EACCES;
806     }
807 
808     backing_drv = bs->backing_hd->drv;
809     ro = bs->backing_hd->read_only;
810     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
811     open_flags =  bs->backing_hd->open_flags;
812 
813     if (ro) {
814         /* re-open as RW */
815         bdrv_delete(bs->backing_hd);
816         bs->backing_hd = NULL;
817         bs_rw = bdrv_new("");
818         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
819             backing_drv);
820         if (rw_ret < 0) {
821             bdrv_delete(bs_rw);
822             /* try to re-open read-only */
823             bs_ro = bdrv_new("");
824             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
825                 backing_drv);
826             if (ret < 0) {
827                 bdrv_delete(bs_ro);
828                 /* drive not functional anymore */
829                 bs->drv = NULL;
830                 return ret;
831             }
832             bs->backing_hd = bs_ro;
833             return rw_ret;
834         }
835         bs->backing_hd = bs_rw;
836     }
837 
838     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
839     buf = qemu_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
840 
841     for (sector = 0; sector < total_sectors; sector += n) {
842         if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
843 
844             if (bdrv_read(bs, sector, buf, n) != 0) {
845                 ret = -EIO;
846                 goto ro_cleanup;
847             }
848 
849             if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
850                 ret = -EIO;
851                 goto ro_cleanup;
852             }
853         }
854     }
855 
856     if (drv->bdrv_make_empty) {
857         ret = drv->bdrv_make_empty(bs);
858         bdrv_flush(bs);
859     }
860 
861     /*
862      * Make sure all data we wrote to the backing device is actually
863      * stable on disk.
864      */
865     if (bs->backing_hd)
866         bdrv_flush(bs->backing_hd);
867 
868 ro_cleanup:
869     qemu_free(buf);
870 
871     if (ro) {
872         /* re-open as RO */
873         bdrv_delete(bs->backing_hd);
874         bs->backing_hd = NULL;
875         bs_ro = bdrv_new("");
876         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
877             backing_drv);
878         if (ret < 0) {
879             bdrv_delete(bs_ro);
880             /* drive not functional anymore */
881             bs->drv = NULL;
882             return ret;
883         }
884         bs->backing_hd = bs_ro;
885         bs->backing_hd->keep_read_only = 0;
886     }
887 
888     return ret;
889 }
890 
891 void bdrv_commit_all(void)
892 {
893     BlockDriverState *bs;
894 
895     QTAILQ_FOREACH(bs, &bdrv_states, list) {
896         bdrv_commit(bs);
897     }
898 }
899 
900 /*
901  * Return values:
902  * 0        - success
903  * -EINVAL  - backing format specified, but no file
904  * -ENOSPC  - can't update the backing file because no space is left in the
905  *            image file header
906  * -ENOTSUP - format driver doesn't support changing the backing file
907  */
908 int bdrv_change_backing_file(BlockDriverState *bs,
909     const char *backing_file, const char *backing_fmt)
910 {
911     BlockDriver *drv = bs->drv;
912 
913     if (drv->bdrv_change_backing_file != NULL) {
914         return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
915     } else {
916         return -ENOTSUP;
917     }
918 }
919 
920 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
921                                    size_t size)
922 {
923     int64_t len;
924 
925     if (!bdrv_is_inserted(bs))
926         return -ENOMEDIUM;
927 
928     if (bs->growable)
929         return 0;
930 
931     len = bdrv_getlength(bs);
932 
933     if (offset < 0)
934         return -EIO;
935 
936     if ((offset > len) || (len - offset < size))
937         return -EIO;
938 
939     return 0;
940 }
941 
942 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
943                               int nb_sectors)
944 {
945     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
946                                    nb_sectors * BDRV_SECTOR_SIZE);
947 }
948 
949 /* return < 0 if error. See bdrv_write() for the return codes */
950 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
951               uint8_t *buf, int nb_sectors)
952 {
953     BlockDriver *drv = bs->drv;
954 
955     if (!drv)
956         return -ENOMEDIUM;
957     if (bdrv_check_request(bs, sector_num, nb_sectors))
958         return -EIO;
959 
960     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
961 }
962 
963 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
964                              int nb_sectors, int dirty)
965 {
966     int64_t start, end;
967     unsigned long val, idx, bit;
968 
969     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
970     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
971 
972     for (; start <= end; start++) {
973         idx = start / (sizeof(unsigned long) * 8);
974         bit = start % (sizeof(unsigned long) * 8);
975         val = bs->dirty_bitmap[idx];
976         if (dirty) {
977             if (!(val & (1UL << bit))) {
978                 bs->dirty_count++;
979                 val |= 1UL << bit;
980             }
981         } else {
982             if (val & (1UL << bit)) {
983                 bs->dirty_count--;
984                 val &= ~(1UL << bit);
985             }
986         }
987         bs->dirty_bitmap[idx] = val;
988     }
989 }
990 
991 /* Return < 0 if error. Important errors are:
992   -EIO         generic I/O error (may happen for all errors)
993   -ENOMEDIUM   No media inserted.
994   -EINVAL      Invalid sector number or nb_sectors
995   -EACCES      Trying to write a read-only device
996 */
997 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
998                const uint8_t *buf, int nb_sectors)
999 {
1000     BlockDriver *drv = bs->drv;
1001     if (!bs->drv)
1002         return -ENOMEDIUM;
1003     if (bs->read_only)
1004         return -EACCES;
1005     if (bdrv_check_request(bs, sector_num, nb_sectors))
1006         return -EIO;
1007 
1008     if (bs->dirty_bitmap) {
1009         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1010     }
1011 
1012     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1013         bs->wr_highest_sector = sector_num + nb_sectors - 1;
1014     }
1015 
1016     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
1017 }
1018 
1019 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1020                void *buf, int count1)
1021 {
1022     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1023     int len, nb_sectors, count;
1024     int64_t sector_num;
1025     int ret;
1026 
1027     count = count1;
1028     /* first read to align to sector start */
1029     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1030     if (len > count)
1031         len = count;
1032     sector_num = offset >> BDRV_SECTOR_BITS;
1033     if (len > 0) {
1034         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1035             return ret;
1036         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1037         count -= len;
1038         if (count == 0)
1039             return count1;
1040         sector_num++;
1041         buf += len;
1042     }
1043 
1044     /* read the sectors "in place" */
1045     nb_sectors = count >> BDRV_SECTOR_BITS;
1046     if (nb_sectors > 0) {
1047         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1048             return ret;
1049         sector_num += nb_sectors;
1050         len = nb_sectors << BDRV_SECTOR_BITS;
1051         buf += len;
1052         count -= len;
1053     }
1054 
1055     /* add data from the last sector */
1056     if (count > 0) {
1057         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1058             return ret;
1059         memcpy(buf, tmp_buf, count);
1060     }
1061     return count1;
1062 }
1063 
1064 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1065                 const void *buf, int count1)
1066 {
1067     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1068     int len, nb_sectors, count;
1069     int64_t sector_num;
1070     int ret;
1071 
1072     count = count1;
1073     /* first write to align to sector start */
1074     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1075     if (len > count)
1076         len = count;
1077     sector_num = offset >> BDRV_SECTOR_BITS;
1078     if (len > 0) {
1079         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1080             return ret;
1081         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1082         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1083             return ret;
1084         count -= len;
1085         if (count == 0)
1086             return count1;
1087         sector_num++;
1088         buf += len;
1089     }
1090 
1091     /* write the sectors "in place" */
1092     nb_sectors = count >> BDRV_SECTOR_BITS;
1093     if (nb_sectors > 0) {
1094         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1095             return ret;
1096         sector_num += nb_sectors;
1097         len = nb_sectors << BDRV_SECTOR_BITS;
1098         buf += len;
1099         count -= len;
1100     }
1101 
1102     /* add data from the last sector */
1103     if (count > 0) {
1104         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1105             return ret;
1106         memcpy(tmp_buf, buf, count);
1107         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1108             return ret;
1109     }
1110     return count1;
1111 }
1112 
1113 /*
1114  * Writes to the file and ensures that no writes are reordered across this
1115  * request (acts as a barrier)
1116  *
1117  * Returns 0 on success, -errno in error cases.
1118  */
1119 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1120     const void *buf, int count)
1121 {
1122     int ret;
1123 
1124     ret = bdrv_pwrite(bs, offset, buf, count);
1125     if (ret < 0) {
1126         return ret;
1127     }
1128 
1129     /* No flush needed for cache=writethrough, it uses O_DSYNC */
1130     if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) {
1131         bdrv_flush(bs);
1132     }
1133 
1134     return 0;
1135 }
1136 
1137 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1138     int nb_sectors, QEMUIOVector *qiov)
1139 {
1140     BlockDriver *drv = bs->drv;
1141 
1142     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1143 
1144     if (!drv) {
1145         return -ENOMEDIUM;
1146     }
1147     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1148         return -EIO;
1149     }
1150 
1151     return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1152 }
1153 
1154 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1155     int nb_sectors, QEMUIOVector *qiov)
1156 {
1157     BlockDriver *drv = bs->drv;
1158 
1159     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1160 
1161     if (!bs->drv) {
1162         return -ENOMEDIUM;
1163     }
1164     if (bs->read_only) {
1165         return -EACCES;
1166     }
1167     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1168         return -EIO;
1169     }
1170 
1171     if (bs->dirty_bitmap) {
1172         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1173     }
1174 
1175     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1176         bs->wr_highest_sector = sector_num + nb_sectors - 1;
1177     }
1178 
1179     return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1180 }
1181 
1182 /**
1183  * Truncate file to 'offset' bytes (needed only for file protocols)
1184  */
1185 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1186 {
1187     BlockDriver *drv = bs->drv;
1188     int ret;
1189     if (!drv)
1190         return -ENOMEDIUM;
1191     if (!drv->bdrv_truncate)
1192         return -ENOTSUP;
1193     if (bs->read_only)
1194         return -EACCES;
1195     if (bdrv_in_use(bs))
1196         return -EBUSY;
1197     ret = drv->bdrv_truncate(bs, offset);
1198     if (ret == 0) {
1199         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1200         if (bs->change_cb) {
1201             bs->change_cb(bs->change_opaque, CHANGE_SIZE);
1202         }
1203     }
1204     return ret;
1205 }
1206 
1207 /**
1208  * Length of a allocated file in bytes. Sparse files are counted by actual
1209  * allocated space. Return < 0 if error or unknown.
1210  */
1211 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1212 {
1213     BlockDriver *drv = bs->drv;
1214     if (!drv) {
1215         return -ENOMEDIUM;
1216     }
1217     if (drv->bdrv_get_allocated_file_size) {
1218         return drv->bdrv_get_allocated_file_size(bs);
1219     }
1220     if (bs->file) {
1221         return bdrv_get_allocated_file_size(bs->file);
1222     }
1223     return -ENOTSUP;
1224 }
1225 
1226 /**
1227  * Length of a file in bytes. Return < 0 if error or unknown.
1228  */
1229 int64_t bdrv_getlength(BlockDriverState *bs)
1230 {
1231     BlockDriver *drv = bs->drv;
1232     if (!drv)
1233         return -ENOMEDIUM;
1234 
1235     if (bs->growable || bs->removable) {
1236         if (drv->bdrv_getlength) {
1237             return drv->bdrv_getlength(bs);
1238         }
1239     }
1240     return bs->total_sectors * BDRV_SECTOR_SIZE;
1241 }
1242 
1243 /* return 0 as number of sectors if no device present or error */
1244 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1245 {
1246     int64_t length;
1247     length = bdrv_getlength(bs);
1248     if (length < 0)
1249         length = 0;
1250     else
1251         length = length >> BDRV_SECTOR_BITS;
1252     *nb_sectors_ptr = length;
1253 }
1254 
1255 struct partition {
1256         uint8_t boot_ind;           /* 0x80 - active */
1257         uint8_t head;               /* starting head */
1258         uint8_t sector;             /* starting sector */
1259         uint8_t cyl;                /* starting cylinder */
1260         uint8_t sys_ind;            /* What partition type */
1261         uint8_t end_head;           /* end head */
1262         uint8_t end_sector;         /* end sector */
1263         uint8_t end_cyl;            /* end cylinder */
1264         uint32_t start_sect;        /* starting sector counting from 0 */
1265         uint32_t nr_sects;          /* nr of sectors in partition */
1266 } __attribute__((packed));
1267 
1268 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1269 static int guess_disk_lchs(BlockDriverState *bs,
1270                            int *pcylinders, int *pheads, int *psectors)
1271 {
1272     uint8_t buf[BDRV_SECTOR_SIZE];
1273     int ret, i, heads, sectors, cylinders;
1274     struct partition *p;
1275     uint32_t nr_sects;
1276     uint64_t nb_sectors;
1277 
1278     bdrv_get_geometry(bs, &nb_sectors);
1279 
1280     ret = bdrv_read(bs, 0, buf, 1);
1281     if (ret < 0)
1282         return -1;
1283     /* test msdos magic */
1284     if (buf[510] != 0x55 || buf[511] != 0xaa)
1285         return -1;
1286     for(i = 0; i < 4; i++) {
1287         p = ((struct partition *)(buf + 0x1be)) + i;
1288         nr_sects = le32_to_cpu(p->nr_sects);
1289         if (nr_sects && p->end_head) {
1290             /* We make the assumption that the partition terminates on
1291                a cylinder boundary */
1292             heads = p->end_head + 1;
1293             sectors = p->end_sector & 63;
1294             if (sectors == 0)
1295                 continue;
1296             cylinders = nb_sectors / (heads * sectors);
1297             if (cylinders < 1 || cylinders > 16383)
1298                 continue;
1299             *pheads = heads;
1300             *psectors = sectors;
1301             *pcylinders = cylinders;
1302 #if 0
1303             printf("guessed geometry: LCHS=%d %d %d\n",
1304                    cylinders, heads, sectors);
1305 #endif
1306             return 0;
1307         }
1308     }
1309     return -1;
1310 }
1311 
1312 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1313 {
1314     int translation, lba_detected = 0;
1315     int cylinders, heads, secs;
1316     uint64_t nb_sectors;
1317 
1318     /* if a geometry hint is available, use it */
1319     bdrv_get_geometry(bs, &nb_sectors);
1320     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1321     translation = bdrv_get_translation_hint(bs);
1322     if (cylinders != 0) {
1323         *pcyls = cylinders;
1324         *pheads = heads;
1325         *psecs = secs;
1326     } else {
1327         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1328             if (heads > 16) {
1329                 /* if heads > 16, it means that a BIOS LBA
1330                    translation was active, so the default
1331                    hardware geometry is OK */
1332                 lba_detected = 1;
1333                 goto default_geometry;
1334             } else {
1335                 *pcyls = cylinders;
1336                 *pheads = heads;
1337                 *psecs = secs;
1338                 /* disable any translation to be in sync with
1339                    the logical geometry */
1340                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1341                     bdrv_set_translation_hint(bs,
1342                                               BIOS_ATA_TRANSLATION_NONE);
1343                 }
1344             }
1345         } else {
1346         default_geometry:
1347             /* if no geometry, use a standard physical disk geometry */
1348             cylinders = nb_sectors / (16 * 63);
1349 
1350             if (cylinders > 16383)
1351                 cylinders = 16383;
1352             else if (cylinders < 2)
1353                 cylinders = 2;
1354             *pcyls = cylinders;
1355             *pheads = 16;
1356             *psecs = 63;
1357             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1358                 if ((*pcyls * *pheads) <= 131072) {
1359                     bdrv_set_translation_hint(bs,
1360                                               BIOS_ATA_TRANSLATION_LARGE);
1361                 } else {
1362                     bdrv_set_translation_hint(bs,
1363                                               BIOS_ATA_TRANSLATION_LBA);
1364                 }
1365             }
1366         }
1367         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1368     }
1369 }
1370 
1371 void bdrv_set_geometry_hint(BlockDriverState *bs,
1372                             int cyls, int heads, int secs)
1373 {
1374     bs->cyls = cyls;
1375     bs->heads = heads;
1376     bs->secs = secs;
1377 }
1378 
1379 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1380 {
1381     bs->translation = translation;
1382 }
1383 
1384 void bdrv_get_geometry_hint(BlockDriverState *bs,
1385                             int *pcyls, int *pheads, int *psecs)
1386 {
1387     *pcyls = bs->cyls;
1388     *pheads = bs->heads;
1389     *psecs = bs->secs;
1390 }
1391 
1392 /* Recognize floppy formats */
1393 typedef struct FDFormat {
1394     FDriveType drive;
1395     uint8_t last_sect;
1396     uint8_t max_track;
1397     uint8_t max_head;
1398 } FDFormat;
1399 
1400 static const FDFormat fd_formats[] = {
1401     /* First entry is default format */
1402     /* 1.44 MB 3"1/2 floppy disks */
1403     { FDRIVE_DRV_144, 18, 80, 1, },
1404     { FDRIVE_DRV_144, 20, 80, 1, },
1405     { FDRIVE_DRV_144, 21, 80, 1, },
1406     { FDRIVE_DRV_144, 21, 82, 1, },
1407     { FDRIVE_DRV_144, 21, 83, 1, },
1408     { FDRIVE_DRV_144, 22, 80, 1, },
1409     { FDRIVE_DRV_144, 23, 80, 1, },
1410     { FDRIVE_DRV_144, 24, 80, 1, },
1411     /* 2.88 MB 3"1/2 floppy disks */
1412     { FDRIVE_DRV_288, 36, 80, 1, },
1413     { FDRIVE_DRV_288, 39, 80, 1, },
1414     { FDRIVE_DRV_288, 40, 80, 1, },
1415     { FDRIVE_DRV_288, 44, 80, 1, },
1416     { FDRIVE_DRV_288, 48, 80, 1, },
1417     /* 720 kB 3"1/2 floppy disks */
1418     { FDRIVE_DRV_144,  9, 80, 1, },
1419     { FDRIVE_DRV_144, 10, 80, 1, },
1420     { FDRIVE_DRV_144, 10, 82, 1, },
1421     { FDRIVE_DRV_144, 10, 83, 1, },
1422     { FDRIVE_DRV_144, 13, 80, 1, },
1423     { FDRIVE_DRV_144, 14, 80, 1, },
1424     /* 1.2 MB 5"1/4 floppy disks */
1425     { FDRIVE_DRV_120, 15, 80, 1, },
1426     { FDRIVE_DRV_120, 18, 80, 1, },
1427     { FDRIVE_DRV_120, 18, 82, 1, },
1428     { FDRIVE_DRV_120, 18, 83, 1, },
1429     { FDRIVE_DRV_120, 20, 80, 1, },
1430     /* 720 kB 5"1/4 floppy disks */
1431     { FDRIVE_DRV_120,  9, 80, 1, },
1432     { FDRIVE_DRV_120, 11, 80, 1, },
1433     /* 360 kB 5"1/4 floppy disks */
1434     { FDRIVE_DRV_120,  9, 40, 1, },
1435     { FDRIVE_DRV_120,  9, 40, 0, },
1436     { FDRIVE_DRV_120, 10, 41, 1, },
1437     { FDRIVE_DRV_120, 10, 42, 1, },
1438     /* 320 kB 5"1/4 floppy disks */
1439     { FDRIVE_DRV_120,  8, 40, 1, },
1440     { FDRIVE_DRV_120,  8, 40, 0, },
1441     /* 360 kB must match 5"1/4 better than 3"1/2... */
1442     { FDRIVE_DRV_144,  9, 80, 0, },
1443     /* end */
1444     { FDRIVE_DRV_NONE, -1, -1, 0, },
1445 };
1446 
1447 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1448                                    int *max_track, int *last_sect,
1449                                    FDriveType drive_in, FDriveType *drive)
1450 {
1451     const FDFormat *parse;
1452     uint64_t nb_sectors, size;
1453     int i, first_match, match;
1454 
1455     bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1456     if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1457         /* User defined disk */
1458     } else {
1459         bdrv_get_geometry(bs, &nb_sectors);
1460         match = -1;
1461         first_match = -1;
1462         for (i = 0; ; i++) {
1463             parse = &fd_formats[i];
1464             if (parse->drive == FDRIVE_DRV_NONE) {
1465                 break;
1466             }
1467             if (drive_in == parse->drive ||
1468                 drive_in == FDRIVE_DRV_NONE) {
1469                 size = (parse->max_head + 1) * parse->max_track *
1470                     parse->last_sect;
1471                 if (nb_sectors == size) {
1472                     match = i;
1473                     break;
1474                 }
1475                 if (first_match == -1) {
1476                     first_match = i;
1477                 }
1478             }
1479         }
1480         if (match == -1) {
1481             if (first_match == -1) {
1482                 match = 1;
1483             } else {
1484                 match = first_match;
1485             }
1486             parse = &fd_formats[match];
1487         }
1488         *nb_heads = parse->max_head + 1;
1489         *max_track = parse->max_track;
1490         *last_sect = parse->last_sect;
1491         *drive = parse->drive;
1492     }
1493 }
1494 
1495 int bdrv_get_translation_hint(BlockDriverState *bs)
1496 {
1497     return bs->translation;
1498 }
1499 
1500 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1501                        BlockErrorAction on_write_error)
1502 {
1503     bs->on_read_error = on_read_error;
1504     bs->on_write_error = on_write_error;
1505 }
1506 
1507 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1508 {
1509     return is_read ? bs->on_read_error : bs->on_write_error;
1510 }
1511 
1512 void bdrv_set_removable(BlockDriverState *bs, int removable)
1513 {
1514     bs->removable = removable;
1515     if (removable && bs == bs_snapshots) {
1516         bs_snapshots = NULL;
1517     }
1518 }
1519 
1520 int bdrv_is_removable(BlockDriverState *bs)
1521 {
1522     return bs->removable;
1523 }
1524 
1525 int bdrv_is_read_only(BlockDriverState *bs)
1526 {
1527     return bs->read_only;
1528 }
1529 
1530 int bdrv_is_sg(BlockDriverState *bs)
1531 {
1532     return bs->sg;
1533 }
1534 
1535 int bdrv_enable_write_cache(BlockDriverState *bs)
1536 {
1537     return bs->enable_write_cache;
1538 }
1539 
1540 /* XXX: no longer used */
1541 void bdrv_set_change_cb(BlockDriverState *bs,
1542                         void (*change_cb)(void *opaque, int reason),
1543                         void *opaque)
1544 {
1545     bs->change_cb = change_cb;
1546     bs->change_opaque = opaque;
1547 }
1548 
1549 int bdrv_is_encrypted(BlockDriverState *bs)
1550 {
1551     if (bs->backing_hd && bs->backing_hd->encrypted)
1552         return 1;
1553     return bs->encrypted;
1554 }
1555 
1556 int bdrv_key_required(BlockDriverState *bs)
1557 {
1558     BlockDriverState *backing_hd = bs->backing_hd;
1559 
1560     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1561         return 1;
1562     return (bs->encrypted && !bs->valid_key);
1563 }
1564 
1565 int bdrv_set_key(BlockDriverState *bs, const char *key)
1566 {
1567     int ret;
1568     if (bs->backing_hd && bs->backing_hd->encrypted) {
1569         ret = bdrv_set_key(bs->backing_hd, key);
1570         if (ret < 0)
1571             return ret;
1572         if (!bs->encrypted)
1573             return 0;
1574     }
1575     if (!bs->encrypted) {
1576         return -EINVAL;
1577     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1578         return -ENOMEDIUM;
1579     }
1580     ret = bs->drv->bdrv_set_key(bs, key);
1581     if (ret < 0) {
1582         bs->valid_key = 0;
1583     } else if (!bs->valid_key) {
1584         bs->valid_key = 1;
1585         /* call the change callback now, we skipped it on open */
1586         bs->media_changed = 1;
1587         if (bs->change_cb)
1588             bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
1589     }
1590     return ret;
1591 }
1592 
1593 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1594 {
1595     if (!bs->drv) {
1596         buf[0] = '\0';
1597     } else {
1598         pstrcpy(buf, buf_size, bs->drv->format_name);
1599     }
1600 }
1601 
1602 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1603                          void *opaque)
1604 {
1605     BlockDriver *drv;
1606 
1607     QLIST_FOREACH(drv, &bdrv_drivers, list) {
1608         it(opaque, drv->format_name);
1609     }
1610 }
1611 
1612 BlockDriverState *bdrv_find(const char *name)
1613 {
1614     BlockDriverState *bs;
1615 
1616     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1617         if (!strcmp(name, bs->device_name)) {
1618             return bs;
1619         }
1620     }
1621     return NULL;
1622 }
1623 
1624 BlockDriverState *bdrv_next(BlockDriverState *bs)
1625 {
1626     if (!bs) {
1627         return QTAILQ_FIRST(&bdrv_states);
1628     }
1629     return QTAILQ_NEXT(bs, list);
1630 }
1631 
1632 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1633 {
1634     BlockDriverState *bs;
1635 
1636     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1637         it(opaque, bs);
1638     }
1639 }
1640 
1641 const char *bdrv_get_device_name(BlockDriverState *bs)
1642 {
1643     return bs->device_name;
1644 }
1645 
1646 int bdrv_flush(BlockDriverState *bs)
1647 {
1648     if (bs->open_flags & BDRV_O_NO_FLUSH) {
1649         return 0;
1650     }
1651 
1652     if (bs->drv && bs->drv->bdrv_flush) {
1653         return bs->drv->bdrv_flush(bs);
1654     }
1655 
1656     /*
1657      * Some block drivers always operate in either writethrough or unsafe mode
1658      * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1659      * the server works (because the behaviour is hardcoded or depends on
1660      * server-side configuration), so we can't ensure that everything is safe
1661      * on disk. Returning an error doesn't work because that would break guests
1662      * even if the server operates in writethrough mode.
1663      *
1664      * Let's hope the user knows what he's doing.
1665      */
1666     return 0;
1667 }
1668 
1669 void bdrv_flush_all(void)
1670 {
1671     BlockDriverState *bs;
1672 
1673     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1674         if (bs->drv && !bdrv_is_read_only(bs) &&
1675             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1676             bdrv_flush(bs);
1677         }
1678     }
1679 }
1680 
1681 int bdrv_has_zero_init(BlockDriverState *bs)
1682 {
1683     assert(bs->drv);
1684 
1685     if (bs->drv->bdrv_has_zero_init) {
1686         return bs->drv->bdrv_has_zero_init(bs);
1687     }
1688 
1689     return 1;
1690 }
1691 
1692 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1693 {
1694     if (!bs->drv) {
1695         return -ENOMEDIUM;
1696     }
1697     if (!bs->drv->bdrv_discard) {
1698         return 0;
1699     }
1700     return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1701 }
1702 
1703 /*
1704  * Returns true iff the specified sector is present in the disk image. Drivers
1705  * not implementing the functionality are assumed to not support backing files,
1706  * hence all their sectors are reported as allocated.
1707  *
1708  * 'pnum' is set to the number of sectors (including and immediately following
1709  * the specified sector) that are known to be in the same
1710  * allocated/unallocated state.
1711  *
1712  * 'nb_sectors' is the max value 'pnum' should be set to.
1713  */
1714 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1715 	int *pnum)
1716 {
1717     int64_t n;
1718     if (!bs->drv->bdrv_is_allocated) {
1719         if (sector_num >= bs->total_sectors) {
1720             *pnum = 0;
1721             return 0;
1722         }
1723         n = bs->total_sectors - sector_num;
1724         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1725         return 1;
1726     }
1727     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1728 }
1729 
1730 void bdrv_mon_event(const BlockDriverState *bdrv,
1731                     BlockMonEventAction action, int is_read)
1732 {
1733     QObject *data;
1734     const char *action_str;
1735 
1736     switch (action) {
1737     case BDRV_ACTION_REPORT:
1738         action_str = "report";
1739         break;
1740     case BDRV_ACTION_IGNORE:
1741         action_str = "ignore";
1742         break;
1743     case BDRV_ACTION_STOP:
1744         action_str = "stop";
1745         break;
1746     default:
1747         abort();
1748     }
1749 
1750     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1751                               bdrv->device_name,
1752                               action_str,
1753                               is_read ? "read" : "write");
1754     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1755 
1756     qobject_decref(data);
1757 }
1758 
1759 static void bdrv_print_dict(QObject *obj, void *opaque)
1760 {
1761     QDict *bs_dict;
1762     Monitor *mon = opaque;
1763 
1764     bs_dict = qobject_to_qdict(obj);
1765 
1766     monitor_printf(mon, "%s: removable=%d",
1767                         qdict_get_str(bs_dict, "device"),
1768                         qdict_get_bool(bs_dict, "removable"));
1769 
1770     if (qdict_get_bool(bs_dict, "removable")) {
1771         monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1772     }
1773 
1774     if (qdict_haskey(bs_dict, "inserted")) {
1775         QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1776 
1777         monitor_printf(mon, " file=");
1778         monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1779         if (qdict_haskey(qdict, "backing_file")) {
1780             monitor_printf(mon, " backing_file=");
1781             monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1782         }
1783         monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1784                             qdict_get_bool(qdict, "ro"),
1785                             qdict_get_str(qdict, "drv"),
1786                             qdict_get_bool(qdict, "encrypted"));
1787     } else {
1788         monitor_printf(mon, " [not inserted]");
1789     }
1790 
1791     monitor_printf(mon, "\n");
1792 }
1793 
1794 void bdrv_info_print(Monitor *mon, const QObject *data)
1795 {
1796     qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1797 }
1798 
1799 void bdrv_info(Monitor *mon, QObject **ret_data)
1800 {
1801     QList *bs_list;
1802     BlockDriverState *bs;
1803 
1804     bs_list = qlist_new();
1805 
1806     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1807         QObject *bs_obj;
1808 
1809         bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1810                                     "'removable': %i, 'locked': %i }",
1811                                     bs->device_name, bs->removable,
1812                                     bs->locked);
1813 
1814         if (bs->drv) {
1815             QObject *obj;
1816             QDict *bs_dict = qobject_to_qdict(bs_obj);
1817 
1818             obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1819                                      "'encrypted': %i }",
1820                                      bs->filename, bs->read_only,
1821                                      bs->drv->format_name,
1822                                      bdrv_is_encrypted(bs));
1823             if (bs->backing_file[0] != '\0') {
1824                 QDict *qdict = qobject_to_qdict(obj);
1825                 qdict_put(qdict, "backing_file",
1826                           qstring_from_str(bs->backing_file));
1827             }
1828 
1829             qdict_put_obj(bs_dict, "inserted", obj);
1830         }
1831         qlist_append_obj(bs_list, bs_obj);
1832     }
1833 
1834     *ret_data = QOBJECT(bs_list);
1835 }
1836 
1837 static void bdrv_stats_iter(QObject *data, void *opaque)
1838 {
1839     QDict *qdict;
1840     Monitor *mon = opaque;
1841 
1842     qdict = qobject_to_qdict(data);
1843     monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1844 
1845     qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1846     monitor_printf(mon, " rd_bytes=%" PRId64
1847                         " wr_bytes=%" PRId64
1848                         " rd_operations=%" PRId64
1849                         " wr_operations=%" PRId64
1850                         "\n",
1851                         qdict_get_int(qdict, "rd_bytes"),
1852                         qdict_get_int(qdict, "wr_bytes"),
1853                         qdict_get_int(qdict, "rd_operations"),
1854                         qdict_get_int(qdict, "wr_operations"));
1855 }
1856 
1857 void bdrv_stats_print(Monitor *mon, const QObject *data)
1858 {
1859     qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1860 }
1861 
1862 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1863 {
1864     QObject *res;
1865     QDict *dict;
1866 
1867     res = qobject_from_jsonf("{ 'stats': {"
1868                              "'rd_bytes': %" PRId64 ","
1869                              "'wr_bytes': %" PRId64 ","
1870                              "'rd_operations': %" PRId64 ","
1871                              "'wr_operations': %" PRId64 ","
1872                              "'wr_highest_offset': %" PRId64
1873                              "} }",
1874                              bs->rd_bytes, bs->wr_bytes,
1875                              bs->rd_ops, bs->wr_ops,
1876                              bs->wr_highest_sector *
1877                              (uint64_t)BDRV_SECTOR_SIZE);
1878     dict  = qobject_to_qdict(res);
1879 
1880     if (*bs->device_name) {
1881         qdict_put(dict, "device", qstring_from_str(bs->device_name));
1882     }
1883 
1884     if (bs->file) {
1885         QObject *parent = bdrv_info_stats_bs(bs->file);
1886         qdict_put_obj(dict, "parent", parent);
1887     }
1888 
1889     return res;
1890 }
1891 
1892 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1893 {
1894     QObject *obj;
1895     QList *devices;
1896     BlockDriverState *bs;
1897 
1898     devices = qlist_new();
1899 
1900     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1901         obj = bdrv_info_stats_bs(bs);
1902         qlist_append_obj(devices, obj);
1903     }
1904 
1905     *ret_data = QOBJECT(devices);
1906 }
1907 
1908 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1909 {
1910     if (bs->backing_hd && bs->backing_hd->encrypted)
1911         return bs->backing_file;
1912     else if (bs->encrypted)
1913         return bs->filename;
1914     else
1915         return NULL;
1916 }
1917 
1918 void bdrv_get_backing_filename(BlockDriverState *bs,
1919                                char *filename, int filename_size)
1920 {
1921     if (!bs->backing_file) {
1922         pstrcpy(filename, filename_size, "");
1923     } else {
1924         pstrcpy(filename, filename_size, bs->backing_file);
1925     }
1926 }
1927 
1928 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1929                           const uint8_t *buf, int nb_sectors)
1930 {
1931     BlockDriver *drv = bs->drv;
1932     if (!drv)
1933         return -ENOMEDIUM;
1934     if (!drv->bdrv_write_compressed)
1935         return -ENOTSUP;
1936     if (bdrv_check_request(bs, sector_num, nb_sectors))
1937         return -EIO;
1938 
1939     if (bs->dirty_bitmap) {
1940         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1941     }
1942 
1943     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1944 }
1945 
1946 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1947 {
1948     BlockDriver *drv = bs->drv;
1949     if (!drv)
1950         return -ENOMEDIUM;
1951     if (!drv->bdrv_get_info)
1952         return -ENOTSUP;
1953     memset(bdi, 0, sizeof(*bdi));
1954     return drv->bdrv_get_info(bs, bdi);
1955 }
1956 
1957 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1958                       int64_t pos, int size)
1959 {
1960     BlockDriver *drv = bs->drv;
1961     if (!drv)
1962         return -ENOMEDIUM;
1963     if (drv->bdrv_save_vmstate)
1964         return drv->bdrv_save_vmstate(bs, buf, pos, size);
1965     if (bs->file)
1966         return bdrv_save_vmstate(bs->file, buf, pos, size);
1967     return -ENOTSUP;
1968 }
1969 
1970 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1971                       int64_t pos, int size)
1972 {
1973     BlockDriver *drv = bs->drv;
1974     if (!drv)
1975         return -ENOMEDIUM;
1976     if (drv->bdrv_load_vmstate)
1977         return drv->bdrv_load_vmstate(bs, buf, pos, size);
1978     if (bs->file)
1979         return bdrv_load_vmstate(bs->file, buf, pos, size);
1980     return -ENOTSUP;
1981 }
1982 
1983 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
1984 {
1985     BlockDriver *drv = bs->drv;
1986 
1987     if (!drv || !drv->bdrv_debug_event) {
1988         return;
1989     }
1990 
1991     return drv->bdrv_debug_event(bs, event);
1992 
1993 }
1994 
1995 /**************************************************************/
1996 /* handling of snapshots */
1997 
1998 int bdrv_can_snapshot(BlockDriverState *bs)
1999 {
2000     BlockDriver *drv = bs->drv;
2001     if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) {
2002         return 0;
2003     }
2004 
2005     if (!drv->bdrv_snapshot_create) {
2006         if (bs->file != NULL) {
2007             return bdrv_can_snapshot(bs->file);
2008         }
2009         return 0;
2010     }
2011 
2012     return 1;
2013 }
2014 
2015 int bdrv_is_snapshot(BlockDriverState *bs)
2016 {
2017     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2018 }
2019 
2020 BlockDriverState *bdrv_snapshots(void)
2021 {
2022     BlockDriverState *bs;
2023 
2024     if (bs_snapshots) {
2025         return bs_snapshots;
2026     }
2027 
2028     bs = NULL;
2029     while ((bs = bdrv_next(bs))) {
2030         if (bdrv_can_snapshot(bs)) {
2031             bs_snapshots = bs;
2032             return bs;
2033         }
2034     }
2035     return NULL;
2036 }
2037 
2038 int bdrv_snapshot_create(BlockDriverState *bs,
2039                          QEMUSnapshotInfo *sn_info)
2040 {
2041     BlockDriver *drv = bs->drv;
2042     if (!drv)
2043         return -ENOMEDIUM;
2044     if (drv->bdrv_snapshot_create)
2045         return drv->bdrv_snapshot_create(bs, sn_info);
2046     if (bs->file)
2047         return bdrv_snapshot_create(bs->file, sn_info);
2048     return -ENOTSUP;
2049 }
2050 
2051 int bdrv_snapshot_goto(BlockDriverState *bs,
2052                        const char *snapshot_id)
2053 {
2054     BlockDriver *drv = bs->drv;
2055     int ret, open_ret;
2056 
2057     if (!drv)
2058         return -ENOMEDIUM;
2059     if (drv->bdrv_snapshot_goto)
2060         return drv->bdrv_snapshot_goto(bs, snapshot_id);
2061 
2062     if (bs->file) {
2063         drv->bdrv_close(bs);
2064         ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2065         open_ret = drv->bdrv_open(bs, bs->open_flags);
2066         if (open_ret < 0) {
2067             bdrv_delete(bs->file);
2068             bs->drv = NULL;
2069             return open_ret;
2070         }
2071         return ret;
2072     }
2073 
2074     return -ENOTSUP;
2075 }
2076 
2077 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2078 {
2079     BlockDriver *drv = bs->drv;
2080     if (!drv)
2081         return -ENOMEDIUM;
2082     if (drv->bdrv_snapshot_delete)
2083         return drv->bdrv_snapshot_delete(bs, snapshot_id);
2084     if (bs->file)
2085         return bdrv_snapshot_delete(bs->file, snapshot_id);
2086     return -ENOTSUP;
2087 }
2088 
2089 int bdrv_snapshot_list(BlockDriverState *bs,
2090                        QEMUSnapshotInfo **psn_info)
2091 {
2092     BlockDriver *drv = bs->drv;
2093     if (!drv)
2094         return -ENOMEDIUM;
2095     if (drv->bdrv_snapshot_list)
2096         return drv->bdrv_snapshot_list(bs, psn_info);
2097     if (bs->file)
2098         return bdrv_snapshot_list(bs->file, psn_info);
2099     return -ENOTSUP;
2100 }
2101 
2102 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2103         const char *snapshot_name)
2104 {
2105     BlockDriver *drv = bs->drv;
2106     if (!drv) {
2107         return -ENOMEDIUM;
2108     }
2109     if (!bs->read_only) {
2110         return -EINVAL;
2111     }
2112     if (drv->bdrv_snapshot_load_tmp) {
2113         return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2114     }
2115     return -ENOTSUP;
2116 }
2117 
2118 #define NB_SUFFIXES 4
2119 
2120 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2121 {
2122     static const char suffixes[NB_SUFFIXES] = "KMGT";
2123     int64_t base;
2124     int i;
2125 
2126     if (size <= 999) {
2127         snprintf(buf, buf_size, "%" PRId64, size);
2128     } else {
2129         base = 1024;
2130         for(i = 0; i < NB_SUFFIXES; i++) {
2131             if (size < (10 * base)) {
2132                 snprintf(buf, buf_size, "%0.1f%c",
2133                          (double)size / base,
2134                          suffixes[i]);
2135                 break;
2136             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2137                 snprintf(buf, buf_size, "%" PRId64 "%c",
2138                          ((size + (base >> 1)) / base),
2139                          suffixes[i]);
2140                 break;
2141             }
2142             base = base * 1024;
2143         }
2144     }
2145     return buf;
2146 }
2147 
2148 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2149 {
2150     char buf1[128], date_buf[128], clock_buf[128];
2151 #ifdef _WIN32
2152     struct tm *ptm;
2153 #else
2154     struct tm tm;
2155 #endif
2156     time_t ti;
2157     int64_t secs;
2158 
2159     if (!sn) {
2160         snprintf(buf, buf_size,
2161                  "%-10s%-20s%7s%20s%15s",
2162                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2163     } else {
2164         ti = sn->date_sec;
2165 #ifdef _WIN32
2166         ptm = localtime(&ti);
2167         strftime(date_buf, sizeof(date_buf),
2168                  "%Y-%m-%d %H:%M:%S", ptm);
2169 #else
2170         localtime_r(&ti, &tm);
2171         strftime(date_buf, sizeof(date_buf),
2172                  "%Y-%m-%d %H:%M:%S", &tm);
2173 #endif
2174         secs = sn->vm_clock_nsec / 1000000000;
2175         snprintf(clock_buf, sizeof(clock_buf),
2176                  "%02d:%02d:%02d.%03d",
2177                  (int)(secs / 3600),
2178                  (int)((secs / 60) % 60),
2179                  (int)(secs % 60),
2180                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
2181         snprintf(buf, buf_size,
2182                  "%-10s%-20s%7s%20s%15s",
2183                  sn->id_str, sn->name,
2184                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2185                  date_buf,
2186                  clock_buf);
2187     }
2188     return buf;
2189 }
2190 
2191 
2192 /**************************************************************/
2193 /* async I/Os */
2194 
2195 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2196                                  QEMUIOVector *qiov, int nb_sectors,
2197                                  BlockDriverCompletionFunc *cb, void *opaque)
2198 {
2199     BlockDriver *drv = bs->drv;
2200     BlockDriverAIOCB *ret;
2201 
2202     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2203 
2204     if (!drv)
2205         return NULL;
2206     if (bdrv_check_request(bs, sector_num, nb_sectors))
2207         return NULL;
2208 
2209     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2210                               cb, opaque);
2211 
2212     if (ret) {
2213 	/* Update stats even though technically transfer has not happened. */
2214 	bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2215 	bs->rd_ops ++;
2216     }
2217 
2218     return ret;
2219 }
2220 
2221 typedef struct BlockCompleteData {
2222     BlockDriverCompletionFunc *cb;
2223     void *opaque;
2224     BlockDriverState *bs;
2225     int64_t sector_num;
2226     int nb_sectors;
2227 } BlockCompleteData;
2228 
2229 static void block_complete_cb(void *opaque, int ret)
2230 {
2231     BlockCompleteData *b = opaque;
2232 
2233     if (b->bs->dirty_bitmap) {
2234         set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2235     }
2236     b->cb(b->opaque, ret);
2237     qemu_free(b);
2238 }
2239 
2240 static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2241                                              int64_t sector_num,
2242                                              int nb_sectors,
2243                                              BlockDriverCompletionFunc *cb,
2244                                              void *opaque)
2245 {
2246     BlockCompleteData *blkdata = qemu_mallocz(sizeof(BlockCompleteData));
2247 
2248     blkdata->bs = bs;
2249     blkdata->cb = cb;
2250     blkdata->opaque = opaque;
2251     blkdata->sector_num = sector_num;
2252     blkdata->nb_sectors = nb_sectors;
2253 
2254     return blkdata;
2255 }
2256 
2257 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2258                                   QEMUIOVector *qiov, int nb_sectors,
2259                                   BlockDriverCompletionFunc *cb, void *opaque)
2260 {
2261     BlockDriver *drv = bs->drv;
2262     BlockDriverAIOCB *ret;
2263     BlockCompleteData *blk_cb_data;
2264 
2265     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2266 
2267     if (!drv)
2268         return NULL;
2269     if (bs->read_only)
2270         return NULL;
2271     if (bdrv_check_request(bs, sector_num, nb_sectors))
2272         return NULL;
2273 
2274     if (bs->dirty_bitmap) {
2275         blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2276                                          opaque);
2277         cb = &block_complete_cb;
2278         opaque = blk_cb_data;
2279     }
2280 
2281     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2282                                cb, opaque);
2283 
2284     if (ret) {
2285         /* Update stats even though technically transfer has not happened. */
2286         bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2287         bs->wr_ops ++;
2288         if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2289             bs->wr_highest_sector = sector_num + nb_sectors - 1;
2290         }
2291     }
2292 
2293     return ret;
2294 }
2295 
2296 
2297 typedef struct MultiwriteCB {
2298     int error;
2299     int num_requests;
2300     int num_callbacks;
2301     struct {
2302         BlockDriverCompletionFunc *cb;
2303         void *opaque;
2304         QEMUIOVector *free_qiov;
2305         void *free_buf;
2306     } callbacks[];
2307 } MultiwriteCB;
2308 
2309 static void multiwrite_user_cb(MultiwriteCB *mcb)
2310 {
2311     int i;
2312 
2313     for (i = 0; i < mcb->num_callbacks; i++) {
2314         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2315         if (mcb->callbacks[i].free_qiov) {
2316             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2317         }
2318         qemu_free(mcb->callbacks[i].free_qiov);
2319         qemu_vfree(mcb->callbacks[i].free_buf);
2320     }
2321 }
2322 
2323 static void multiwrite_cb(void *opaque, int ret)
2324 {
2325     MultiwriteCB *mcb = opaque;
2326 
2327     trace_multiwrite_cb(mcb, ret);
2328 
2329     if (ret < 0 && !mcb->error) {
2330         mcb->error = ret;
2331     }
2332 
2333     mcb->num_requests--;
2334     if (mcb->num_requests == 0) {
2335         multiwrite_user_cb(mcb);
2336         qemu_free(mcb);
2337     }
2338 }
2339 
2340 static int multiwrite_req_compare(const void *a, const void *b)
2341 {
2342     const BlockRequest *req1 = a, *req2 = b;
2343 
2344     /*
2345      * Note that we can't simply subtract req2->sector from req1->sector
2346      * here as that could overflow the return value.
2347      */
2348     if (req1->sector > req2->sector) {
2349         return 1;
2350     } else if (req1->sector < req2->sector) {
2351         return -1;
2352     } else {
2353         return 0;
2354     }
2355 }
2356 
2357 /*
2358  * Takes a bunch of requests and tries to merge them. Returns the number of
2359  * requests that remain after merging.
2360  */
2361 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2362     int num_reqs, MultiwriteCB *mcb)
2363 {
2364     int i, outidx;
2365 
2366     // Sort requests by start sector
2367     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2368 
2369     // Check if adjacent requests touch the same clusters. If so, combine them,
2370     // filling up gaps with zero sectors.
2371     outidx = 0;
2372     for (i = 1; i < num_reqs; i++) {
2373         int merge = 0;
2374         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2375 
2376         // This handles the cases that are valid for all block drivers, namely
2377         // exactly sequential writes and overlapping writes.
2378         if (reqs[i].sector <= oldreq_last) {
2379             merge = 1;
2380         }
2381 
2382         // The block driver may decide that it makes sense to combine requests
2383         // even if there is a gap of some sectors between them. In this case,
2384         // the gap is filled with zeros (therefore only applicable for yet
2385         // unused space in format like qcow2).
2386         if (!merge && bs->drv->bdrv_merge_requests) {
2387             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2388         }
2389 
2390         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2391             merge = 0;
2392         }
2393 
2394         if (merge) {
2395             size_t size;
2396             QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
2397             qemu_iovec_init(qiov,
2398                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2399 
2400             // Add the first request to the merged one. If the requests are
2401             // overlapping, drop the last sectors of the first request.
2402             size = (reqs[i].sector - reqs[outidx].sector) << 9;
2403             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2404 
2405             // We might need to add some zeros between the two requests
2406             if (reqs[i].sector > oldreq_last) {
2407                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2408                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2409                 memset(buf, 0, zero_bytes);
2410                 qemu_iovec_add(qiov, buf, zero_bytes);
2411                 mcb->callbacks[i].free_buf = buf;
2412             }
2413 
2414             // Add the second request
2415             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2416 
2417             reqs[outidx].nb_sectors = qiov->size >> 9;
2418             reqs[outidx].qiov = qiov;
2419 
2420             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2421         } else {
2422             outidx++;
2423             reqs[outidx].sector     = reqs[i].sector;
2424             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2425             reqs[outidx].qiov       = reqs[i].qiov;
2426         }
2427     }
2428 
2429     return outidx + 1;
2430 }
2431 
2432 /*
2433  * Submit multiple AIO write requests at once.
2434  *
2435  * On success, the function returns 0 and all requests in the reqs array have
2436  * been submitted. In error case this function returns -1, and any of the
2437  * requests may or may not be submitted yet. In particular, this means that the
2438  * callback will be called for some of the requests, for others it won't. The
2439  * caller must check the error field of the BlockRequest to wait for the right
2440  * callbacks (if error != 0, no callback will be called).
2441  *
2442  * The implementation may modify the contents of the reqs array, e.g. to merge
2443  * requests. However, the fields opaque and error are left unmodified as they
2444  * are used to signal failure for a single request to the caller.
2445  */
2446 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2447 {
2448     BlockDriverAIOCB *acb;
2449     MultiwriteCB *mcb;
2450     int i;
2451 
2452     /* don't submit writes if we don't have a medium */
2453     if (bs->drv == NULL) {
2454         for (i = 0; i < num_reqs; i++) {
2455             reqs[i].error = -ENOMEDIUM;
2456         }
2457         return -1;
2458     }
2459 
2460     if (num_reqs == 0) {
2461         return 0;
2462     }
2463 
2464     // Create MultiwriteCB structure
2465     mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2466     mcb->num_requests = 0;
2467     mcb->num_callbacks = num_reqs;
2468 
2469     for (i = 0; i < num_reqs; i++) {
2470         mcb->callbacks[i].cb = reqs[i].cb;
2471         mcb->callbacks[i].opaque = reqs[i].opaque;
2472     }
2473 
2474     // Check for mergable requests
2475     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2476 
2477     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2478 
2479     /*
2480      * Run the aio requests. As soon as one request can't be submitted
2481      * successfully, fail all requests that are not yet submitted (we must
2482      * return failure for all requests anyway)
2483      *
2484      * num_requests cannot be set to the right value immediately: If
2485      * bdrv_aio_writev fails for some request, num_requests would be too high
2486      * and therefore multiwrite_cb() would never recognize the multiwrite
2487      * request as completed. We also cannot use the loop variable i to set it
2488      * when the first request fails because the callback may already have been
2489      * called for previously submitted requests. Thus, num_requests must be
2490      * incremented for each request that is submitted.
2491      *
2492      * The problem that callbacks may be called early also means that we need
2493      * to take care that num_requests doesn't become 0 before all requests are
2494      * submitted - multiwrite_cb() would consider the multiwrite request
2495      * completed. A dummy request that is "completed" by a manual call to
2496      * multiwrite_cb() takes care of this.
2497      */
2498     mcb->num_requests = 1;
2499 
2500     // Run the aio requests
2501     for (i = 0; i < num_reqs; i++) {
2502         mcb->num_requests++;
2503         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2504             reqs[i].nb_sectors, multiwrite_cb, mcb);
2505 
2506         if (acb == NULL) {
2507             // We can only fail the whole thing if no request has been
2508             // submitted yet. Otherwise we'll wait for the submitted AIOs to
2509             // complete and report the error in the callback.
2510             if (i == 0) {
2511                 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2512                 goto fail;
2513             } else {
2514                 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2515                 multiwrite_cb(mcb, -EIO);
2516                 break;
2517             }
2518         }
2519     }
2520 
2521     /* Complete the dummy request */
2522     multiwrite_cb(mcb, 0);
2523 
2524     return 0;
2525 
2526 fail:
2527     for (i = 0; i < mcb->num_callbacks; i++) {
2528         reqs[i].error = -EIO;
2529     }
2530     qemu_free(mcb);
2531     return -1;
2532 }
2533 
2534 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2535         BlockDriverCompletionFunc *cb, void *opaque)
2536 {
2537     BlockDriver *drv = bs->drv;
2538 
2539     trace_bdrv_aio_flush(bs, opaque);
2540 
2541     if (bs->open_flags & BDRV_O_NO_FLUSH) {
2542         return bdrv_aio_noop_em(bs, cb, opaque);
2543     }
2544 
2545     if (!drv)
2546         return NULL;
2547     return drv->bdrv_aio_flush(bs, cb, opaque);
2548 }
2549 
2550 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2551 {
2552     acb->pool->cancel(acb);
2553 }
2554 
2555 
2556 /**************************************************************/
2557 /* async block device emulation */
2558 
2559 typedef struct BlockDriverAIOCBSync {
2560     BlockDriverAIOCB common;
2561     QEMUBH *bh;
2562     int ret;
2563     /* vector translation state */
2564     QEMUIOVector *qiov;
2565     uint8_t *bounce;
2566     int is_write;
2567 } BlockDriverAIOCBSync;
2568 
2569 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2570 {
2571     BlockDriverAIOCBSync *acb =
2572         container_of(blockacb, BlockDriverAIOCBSync, common);
2573     qemu_bh_delete(acb->bh);
2574     acb->bh = NULL;
2575     qemu_aio_release(acb);
2576 }
2577 
2578 static AIOPool bdrv_em_aio_pool = {
2579     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2580     .cancel             = bdrv_aio_cancel_em,
2581 };
2582 
2583 static void bdrv_aio_bh_cb(void *opaque)
2584 {
2585     BlockDriverAIOCBSync *acb = opaque;
2586 
2587     if (!acb->is_write)
2588         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2589     qemu_vfree(acb->bounce);
2590     acb->common.cb(acb->common.opaque, acb->ret);
2591     qemu_bh_delete(acb->bh);
2592     acb->bh = NULL;
2593     qemu_aio_release(acb);
2594 }
2595 
2596 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2597                                             int64_t sector_num,
2598                                             QEMUIOVector *qiov,
2599                                             int nb_sectors,
2600                                             BlockDriverCompletionFunc *cb,
2601                                             void *opaque,
2602                                             int is_write)
2603 
2604 {
2605     BlockDriverAIOCBSync *acb;
2606 
2607     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2608     acb->is_write = is_write;
2609     acb->qiov = qiov;
2610     acb->bounce = qemu_blockalign(bs, qiov->size);
2611 
2612     if (!acb->bh)
2613         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2614 
2615     if (is_write) {
2616         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2617         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2618     } else {
2619         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2620     }
2621 
2622     qemu_bh_schedule(acb->bh);
2623 
2624     return &acb->common;
2625 }
2626 
2627 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2628         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2629         BlockDriverCompletionFunc *cb, void *opaque)
2630 {
2631     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2632 }
2633 
2634 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2635         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2636         BlockDriverCompletionFunc *cb, void *opaque)
2637 {
2638     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2639 }
2640 
2641 
2642 typedef struct BlockDriverAIOCBCoroutine {
2643     BlockDriverAIOCB common;
2644     BlockRequest req;
2645     bool is_write;
2646     QEMUBH* bh;
2647 } BlockDriverAIOCBCoroutine;
2648 
2649 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2650 {
2651     qemu_aio_flush();
2652 }
2653 
2654 static AIOPool bdrv_em_co_aio_pool = {
2655     .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
2656     .cancel             = bdrv_aio_co_cancel_em,
2657 };
2658 
2659 static void bdrv_co_rw_bh(void *opaque)
2660 {
2661     BlockDriverAIOCBCoroutine *acb = opaque;
2662 
2663     acb->common.cb(acb->common.opaque, acb->req.error);
2664     qemu_bh_delete(acb->bh);
2665     qemu_aio_release(acb);
2666 }
2667 
2668 static void coroutine_fn bdrv_co_rw(void *opaque)
2669 {
2670     BlockDriverAIOCBCoroutine *acb = opaque;
2671     BlockDriverState *bs = acb->common.bs;
2672 
2673     if (!acb->is_write) {
2674         acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2675             acb->req.nb_sectors, acb->req.qiov);
2676     } else {
2677         acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2678             acb->req.nb_sectors, acb->req.qiov);
2679     }
2680 
2681     acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2682     qemu_bh_schedule(acb->bh);
2683 }
2684 
2685 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2686                                                int64_t sector_num,
2687                                                QEMUIOVector *qiov,
2688                                                int nb_sectors,
2689                                                BlockDriverCompletionFunc *cb,
2690                                                void *opaque,
2691                                                bool is_write)
2692 {
2693     Coroutine *co;
2694     BlockDriverAIOCBCoroutine *acb;
2695 
2696     acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2697     acb->req.sector = sector_num;
2698     acb->req.nb_sectors = nb_sectors;
2699     acb->req.qiov = qiov;
2700     acb->is_write = is_write;
2701 
2702     co = qemu_coroutine_create(bdrv_co_rw);
2703     qemu_coroutine_enter(co, acb);
2704 
2705     return &acb->common;
2706 }
2707 
2708 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2709         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2710         BlockDriverCompletionFunc *cb, void *opaque)
2711 {
2712     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2713                                  false);
2714 }
2715 
2716 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2717         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2718         BlockDriverCompletionFunc *cb, void *opaque)
2719 {
2720     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2721                                  true);
2722 }
2723 
2724 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2725         BlockDriverCompletionFunc *cb, void *opaque)
2726 {
2727     BlockDriverAIOCBSync *acb;
2728 
2729     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2730     acb->is_write = 1; /* don't bounce in the completion hadler */
2731     acb->qiov = NULL;
2732     acb->bounce = NULL;
2733     acb->ret = 0;
2734 
2735     if (!acb->bh)
2736         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2737 
2738     bdrv_flush(bs);
2739     qemu_bh_schedule(acb->bh);
2740     return &acb->common;
2741 }
2742 
2743 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2744         BlockDriverCompletionFunc *cb, void *opaque)
2745 {
2746     BlockDriverAIOCBSync *acb;
2747 
2748     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2749     acb->is_write = 1; /* don't bounce in the completion handler */
2750     acb->qiov = NULL;
2751     acb->bounce = NULL;
2752     acb->ret = 0;
2753 
2754     if (!acb->bh) {
2755         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2756     }
2757 
2758     qemu_bh_schedule(acb->bh);
2759     return &acb->common;
2760 }
2761 
2762 /**************************************************************/
2763 /* sync block device emulation */
2764 
2765 static void bdrv_rw_em_cb(void *opaque, int ret)
2766 {
2767     *(int *)opaque = ret;
2768 }
2769 
2770 #define NOT_DONE 0x7fffffff
2771 
2772 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2773                         uint8_t *buf, int nb_sectors)
2774 {
2775     int async_ret;
2776     BlockDriverAIOCB *acb;
2777     struct iovec iov;
2778     QEMUIOVector qiov;
2779 
2780     async_context_push();
2781 
2782     async_ret = NOT_DONE;
2783     iov.iov_base = (void *)buf;
2784     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2785     qemu_iovec_init_external(&qiov, &iov, 1);
2786     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2787         bdrv_rw_em_cb, &async_ret);
2788     if (acb == NULL) {
2789         async_ret = -1;
2790         goto fail;
2791     }
2792 
2793     while (async_ret == NOT_DONE) {
2794         qemu_aio_wait();
2795     }
2796 
2797 
2798 fail:
2799     async_context_pop();
2800     return async_ret;
2801 }
2802 
2803 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2804                          const uint8_t *buf, int nb_sectors)
2805 {
2806     int async_ret;
2807     BlockDriverAIOCB *acb;
2808     struct iovec iov;
2809     QEMUIOVector qiov;
2810 
2811     async_context_push();
2812 
2813     async_ret = NOT_DONE;
2814     iov.iov_base = (void *)buf;
2815     iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2816     qemu_iovec_init_external(&qiov, &iov, 1);
2817     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2818         bdrv_rw_em_cb, &async_ret);
2819     if (acb == NULL) {
2820         async_ret = -1;
2821         goto fail;
2822     }
2823     while (async_ret == NOT_DONE) {
2824         qemu_aio_wait();
2825     }
2826 
2827 fail:
2828     async_context_pop();
2829     return async_ret;
2830 }
2831 
2832 void bdrv_init(void)
2833 {
2834     module_call_init(MODULE_INIT_BLOCK);
2835 }
2836 
2837 void bdrv_init_with_whitelist(void)
2838 {
2839     use_bdrv_whitelist = 1;
2840     bdrv_init();
2841 }
2842 
2843 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2844                    BlockDriverCompletionFunc *cb, void *opaque)
2845 {
2846     BlockDriverAIOCB *acb;
2847 
2848     if (pool->free_aiocb) {
2849         acb = pool->free_aiocb;
2850         pool->free_aiocb = acb->next;
2851     } else {
2852         acb = qemu_mallocz(pool->aiocb_size);
2853         acb->pool = pool;
2854     }
2855     acb->bs = bs;
2856     acb->cb = cb;
2857     acb->opaque = opaque;
2858     return acb;
2859 }
2860 
2861 void qemu_aio_release(void *p)
2862 {
2863     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2864     AIOPool *pool = acb->pool;
2865     acb->next = pool->free_aiocb;
2866     pool->free_aiocb = acb;
2867 }
2868 
2869 /**************************************************************/
2870 /* Coroutine block device emulation */
2871 
2872 typedef struct CoroutineIOCompletion {
2873     Coroutine *coroutine;
2874     int ret;
2875 } CoroutineIOCompletion;
2876 
2877 static void bdrv_co_io_em_complete(void *opaque, int ret)
2878 {
2879     CoroutineIOCompletion *co = opaque;
2880 
2881     co->ret = ret;
2882     qemu_coroutine_enter(co->coroutine, NULL);
2883 }
2884 
2885 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2886                                       int nb_sectors, QEMUIOVector *iov,
2887                                       bool is_write)
2888 {
2889     CoroutineIOCompletion co = {
2890         .coroutine = qemu_coroutine_self(),
2891     };
2892     BlockDriverAIOCB *acb;
2893 
2894     if (is_write) {
2895         acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2896                               bdrv_co_io_em_complete, &co);
2897     } else {
2898         acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2899                              bdrv_co_io_em_complete, &co);
2900     }
2901 
2902     trace_bdrv_co_io(is_write, acb);
2903     if (!acb) {
2904         return -EIO;
2905     }
2906     qemu_coroutine_yield();
2907 
2908     return co.ret;
2909 }
2910 
2911 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2912                                          int64_t sector_num, int nb_sectors,
2913                                          QEMUIOVector *iov)
2914 {
2915     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2916 }
2917 
2918 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2919                                          int64_t sector_num, int nb_sectors,
2920                                          QEMUIOVector *iov)
2921 {
2922     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2923 }
2924 
2925 /**************************************************************/
2926 /* removable device support */
2927 
2928 /**
2929  * Return TRUE if the media is present
2930  */
2931 int bdrv_is_inserted(BlockDriverState *bs)
2932 {
2933     BlockDriver *drv = bs->drv;
2934     int ret;
2935     if (!drv)
2936         return 0;
2937     if (!drv->bdrv_is_inserted)
2938         return !bs->tray_open;
2939     ret = drv->bdrv_is_inserted(bs);
2940     return ret;
2941 }
2942 
2943 /**
2944  * Return TRUE if the media changed since the last call to this
2945  * function. It is currently only used for floppy disks
2946  */
2947 int bdrv_media_changed(BlockDriverState *bs)
2948 {
2949     BlockDriver *drv = bs->drv;
2950     int ret;
2951 
2952     if (!drv || !drv->bdrv_media_changed)
2953         ret = -ENOTSUP;
2954     else
2955         ret = drv->bdrv_media_changed(bs);
2956     if (ret == -ENOTSUP)
2957         ret = bs->media_changed;
2958     bs->media_changed = 0;
2959     return ret;
2960 }
2961 
2962 /**
2963  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2964  */
2965 int bdrv_eject(BlockDriverState *bs, int eject_flag)
2966 {
2967     BlockDriver *drv = bs->drv;
2968 
2969     if (eject_flag && bs->locked) {
2970         return -EBUSY;
2971     }
2972 
2973     if (drv && drv->bdrv_eject) {
2974         drv->bdrv_eject(bs, eject_flag);
2975     }
2976     bs->tray_open = eject_flag;
2977     return 0;
2978 }
2979 
2980 int bdrv_is_locked(BlockDriverState *bs)
2981 {
2982     return bs->locked;
2983 }
2984 
2985 /**
2986  * Lock or unlock the media (if it is locked, the user won't be able
2987  * to eject it manually).
2988  */
2989 void bdrv_set_locked(BlockDriverState *bs, int locked)
2990 {
2991     BlockDriver *drv = bs->drv;
2992 
2993     trace_bdrv_set_locked(bs, locked);
2994 
2995     bs->locked = locked;
2996     if (drv && drv->bdrv_set_locked) {
2997         drv->bdrv_set_locked(bs, locked);
2998     }
2999 }
3000 
3001 /* needed for generic scsi interface */
3002 
3003 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3004 {
3005     BlockDriver *drv = bs->drv;
3006 
3007     if (drv && drv->bdrv_ioctl)
3008         return drv->bdrv_ioctl(bs, req, buf);
3009     return -ENOTSUP;
3010 }
3011 
3012 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3013         unsigned long int req, void *buf,
3014         BlockDriverCompletionFunc *cb, void *opaque)
3015 {
3016     BlockDriver *drv = bs->drv;
3017 
3018     if (drv && drv->bdrv_aio_ioctl)
3019         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3020     return NULL;
3021 }
3022 
3023 
3024 
3025 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3026 {
3027     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3028 }
3029 
3030 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3031 {
3032     int64_t bitmap_size;
3033 
3034     bs->dirty_count = 0;
3035     if (enable) {
3036         if (!bs->dirty_bitmap) {
3037             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3038                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3039             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3040 
3041             bs->dirty_bitmap = qemu_mallocz(bitmap_size);
3042         }
3043     } else {
3044         if (bs->dirty_bitmap) {
3045             qemu_free(bs->dirty_bitmap);
3046             bs->dirty_bitmap = NULL;
3047         }
3048     }
3049 }
3050 
3051 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3052 {
3053     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3054 
3055     if (bs->dirty_bitmap &&
3056         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3057         return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3058             (1UL << (chunk % (sizeof(unsigned long) * 8))));
3059     } else {
3060         return 0;
3061     }
3062 }
3063 
3064 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3065                       int nr_sectors)
3066 {
3067     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3068 }
3069 
3070 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3071 {
3072     return bs->dirty_count;
3073 }
3074 
3075 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3076 {
3077     assert(bs->in_use != in_use);
3078     bs->in_use = in_use;
3079 }
3080 
3081 int bdrv_in_use(BlockDriverState *bs)
3082 {
3083     return bs->in_use;
3084 }
3085 
3086 int bdrv_img_create(const char *filename, const char *fmt,
3087                     const char *base_filename, const char *base_fmt,
3088                     char *options, uint64_t img_size, int flags)
3089 {
3090     QEMUOptionParameter *param = NULL, *create_options = NULL;
3091     QEMUOptionParameter *backing_fmt, *backing_file, *size;
3092     BlockDriverState *bs = NULL;
3093     BlockDriver *drv, *proto_drv;
3094     BlockDriver *backing_drv = NULL;
3095     int ret = 0;
3096 
3097     /* Find driver and parse its options */
3098     drv = bdrv_find_format(fmt);
3099     if (!drv) {
3100         error_report("Unknown file format '%s'", fmt);
3101         ret = -EINVAL;
3102         goto out;
3103     }
3104 
3105     proto_drv = bdrv_find_protocol(filename);
3106     if (!proto_drv) {
3107         error_report("Unknown protocol '%s'", filename);
3108         ret = -EINVAL;
3109         goto out;
3110     }
3111 
3112     create_options = append_option_parameters(create_options,
3113                                               drv->create_options);
3114     create_options = append_option_parameters(create_options,
3115                                               proto_drv->create_options);
3116 
3117     /* Create parameter list with default values */
3118     param = parse_option_parameters("", create_options, param);
3119 
3120     set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3121 
3122     /* Parse -o options */
3123     if (options) {
3124         param = parse_option_parameters(options, create_options, param);
3125         if (param == NULL) {
3126             error_report("Invalid options for file format '%s'.", fmt);
3127             ret = -EINVAL;
3128             goto out;
3129         }
3130     }
3131 
3132     if (base_filename) {
3133         if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3134                                  base_filename)) {
3135             error_report("Backing file not supported for file format '%s'",
3136                          fmt);
3137             ret = -EINVAL;
3138             goto out;
3139         }
3140     }
3141 
3142     if (base_fmt) {
3143         if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3144             error_report("Backing file format not supported for file "
3145                          "format '%s'", fmt);
3146             ret = -EINVAL;
3147             goto out;
3148         }
3149     }
3150 
3151     backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3152     if (backing_file && backing_file->value.s) {
3153         if (!strcmp(filename, backing_file->value.s)) {
3154             error_report("Error: Trying to create an image with the "
3155                          "same filename as the backing file");
3156             ret = -EINVAL;
3157             goto out;
3158         }
3159     }
3160 
3161     backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3162     if (backing_fmt && backing_fmt->value.s) {
3163         backing_drv = bdrv_find_format(backing_fmt->value.s);
3164         if (!backing_drv) {
3165             error_report("Unknown backing file format '%s'",
3166                          backing_fmt->value.s);
3167             ret = -EINVAL;
3168             goto out;
3169         }
3170     }
3171 
3172     // The size for the image must always be specified, with one exception:
3173     // If we are using a backing file, we can obtain the size from there
3174     size = get_option_parameter(param, BLOCK_OPT_SIZE);
3175     if (size && size->value.n == -1) {
3176         if (backing_file && backing_file->value.s) {
3177             uint64_t size;
3178             char buf[32];
3179 
3180             bs = bdrv_new("");
3181 
3182             ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3183             if (ret < 0) {
3184                 error_report("Could not open '%s'", backing_file->value.s);
3185                 goto out;
3186             }
3187             bdrv_get_geometry(bs, &size);
3188             size *= 512;
3189 
3190             snprintf(buf, sizeof(buf), "%" PRId64, size);
3191             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3192         } else {
3193             error_report("Image creation needs a size parameter");
3194             ret = -EINVAL;
3195             goto out;
3196         }
3197     }
3198 
3199     printf("Formatting '%s', fmt=%s ", filename, fmt);
3200     print_option_parameters(param);
3201     puts("");
3202 
3203     ret = bdrv_create(drv, filename, param);
3204 
3205     if (ret < 0) {
3206         if (ret == -ENOTSUP) {
3207             error_report("Formatting or formatting option not supported for "
3208                          "file format '%s'", fmt);
3209         } else if (ret == -EFBIG) {
3210             error_report("The image size is too large for file format '%s'",
3211                          fmt);
3212         } else {
3213             error_report("%s: error while creating %s: %s", filename, fmt,
3214                          strerror(-ret));
3215         }
3216     }
3217 
3218 out:
3219     free_option_parameters(create_options);
3220     free_option_parameters(param);
3221 
3222     if (bs) {
3223         bdrv_delete(bs);
3224     }
3225 
3226     return ret;
3227 }
3228