xref: /openbmc/qemu/block.c (revision 66f82ceed6781261c09e65fb440ca76842fd0500)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "monitor.h"
27 #include "block_int.h"
28 #include "module.h"
29 #include "qemu-objects.h"
30 
31 #ifdef CONFIG_BSD
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/ioctl.h>
35 #include <sys/queue.h>
36 #ifndef __DragonFly__
37 #include <sys/disk.h>
38 #endif
39 #endif
40 
41 #ifdef _WIN32
42 #include <windows.h>
43 #endif
44 
45 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
46         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
47         BlockDriverCompletionFunc *cb, void *opaque);
48 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
49         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50         BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
52         BlockDriverCompletionFunc *cb, void *opaque);
53 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
54                         uint8_t *buf, int nb_sectors);
55 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
56                          const uint8_t *buf, int nb_sectors);
57 static BlockDriver *find_protocol(const char *filename);
58 
59 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
60     QTAILQ_HEAD_INITIALIZER(bdrv_states);
61 
62 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
63     QLIST_HEAD_INITIALIZER(bdrv_drivers);
64 
65 /* If non-zero, use only whitelisted block drivers */
66 static int use_bdrv_whitelist;
67 
68 int path_is_absolute(const char *path)
69 {
70     const char *p;
71 #ifdef _WIN32
72     /* specific case for names like: "\\.\d:" */
73     if (*path == '/' || *path == '\\')
74         return 1;
75 #endif
76     p = strchr(path, ':');
77     if (p)
78         p++;
79     else
80         p = path;
81 #ifdef _WIN32
82     return (*p == '/' || *p == '\\');
83 #else
84     return (*p == '/');
85 #endif
86 }
87 
88 /* if filename is absolute, just copy it to dest. Otherwise, build a
89    path to it by considering it is relative to base_path. URL are
90    supported. */
91 void path_combine(char *dest, int dest_size,
92                   const char *base_path,
93                   const char *filename)
94 {
95     const char *p, *p1;
96     int len;
97 
98     if (dest_size <= 0)
99         return;
100     if (path_is_absolute(filename)) {
101         pstrcpy(dest, dest_size, filename);
102     } else {
103         p = strchr(base_path, ':');
104         if (p)
105             p++;
106         else
107             p = base_path;
108         p1 = strrchr(base_path, '/');
109 #ifdef _WIN32
110         {
111             const char *p2;
112             p2 = strrchr(base_path, '\\');
113             if (!p1 || p2 > p1)
114                 p1 = p2;
115         }
116 #endif
117         if (p1)
118             p1++;
119         else
120             p1 = base_path;
121         if (p1 > p)
122             p = p1;
123         len = p - base_path;
124         if (len > dest_size - 1)
125             len = dest_size - 1;
126         memcpy(dest, base_path, len);
127         dest[len] = '\0';
128         pstrcat(dest, dest_size, filename);
129     }
130 }
131 
132 void bdrv_register(BlockDriver *bdrv)
133 {
134     if (!bdrv->bdrv_aio_readv) {
135         /* add AIO emulation layer */
136         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
137         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
138     } else if (!bdrv->bdrv_read) {
139         /* add synchronous IO emulation layer */
140         bdrv->bdrv_read = bdrv_read_em;
141         bdrv->bdrv_write = bdrv_write_em;
142     }
143 
144     if (!bdrv->bdrv_aio_flush)
145         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
146 
147     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
148 }
149 
150 /* create a new block device (by default it is empty) */
151 BlockDriverState *bdrv_new(const char *device_name)
152 {
153     BlockDriverState *bs;
154 
155     bs = qemu_mallocz(sizeof(BlockDriverState));
156     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
157     if (device_name[0] != '\0') {
158         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
159     }
160     return bs;
161 }
162 
163 BlockDriver *bdrv_find_format(const char *format_name)
164 {
165     BlockDriver *drv1;
166     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
167         if (!strcmp(drv1->format_name, format_name)) {
168             return drv1;
169         }
170     }
171     return NULL;
172 }
173 
174 static int bdrv_is_whitelisted(BlockDriver *drv)
175 {
176     static const char *whitelist[] = {
177         CONFIG_BDRV_WHITELIST
178     };
179     const char **p;
180 
181     if (!whitelist[0])
182         return 1;               /* no whitelist, anything goes */
183 
184     for (p = whitelist; *p; p++) {
185         if (!strcmp(drv->format_name, *p)) {
186             return 1;
187         }
188     }
189     return 0;
190 }
191 
192 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
193 {
194     BlockDriver *drv = bdrv_find_format(format_name);
195     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
196 }
197 
198 int bdrv_create(BlockDriver *drv, const char* filename,
199     QEMUOptionParameter *options)
200 {
201     if (!drv->bdrv_create)
202         return -ENOTSUP;
203 
204     return drv->bdrv_create(filename, options);
205 }
206 
207 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
208 {
209     BlockDriver *drv;
210 
211     drv = find_protocol(filename);
212     if (drv == NULL) {
213         drv = bdrv_find_format("file");
214     }
215 
216     return bdrv_create(drv, filename, options);
217 }
218 
219 #ifdef _WIN32
220 void get_tmp_filename(char *filename, int size)
221 {
222     char temp_dir[MAX_PATH];
223 
224     GetTempPath(MAX_PATH, temp_dir);
225     GetTempFileName(temp_dir, "qem", 0, filename);
226 }
227 #else
228 void get_tmp_filename(char *filename, int size)
229 {
230     int fd;
231     const char *tmpdir;
232     /* XXX: race condition possible */
233     tmpdir = getenv("TMPDIR");
234     if (!tmpdir)
235         tmpdir = "/tmp";
236     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
237     fd = mkstemp(filename);
238     close(fd);
239 }
240 #endif
241 
242 #ifdef _WIN32
243 static int is_windows_drive_prefix(const char *filename)
244 {
245     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
246              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
247             filename[1] == ':');
248 }
249 
250 int is_windows_drive(const char *filename)
251 {
252     if (is_windows_drive_prefix(filename) &&
253         filename[2] == '\0')
254         return 1;
255     if (strstart(filename, "\\\\.\\", NULL) ||
256         strstart(filename, "//./", NULL))
257         return 1;
258     return 0;
259 }
260 #endif
261 
262 /*
263  * Detect host devices. By convention, /dev/cdrom[N] is always
264  * recognized as a host CDROM.
265  */
266 static BlockDriver *find_hdev_driver(const char *filename)
267 {
268     int score_max = 0, score;
269     BlockDriver *drv = NULL, *d;
270 
271     QLIST_FOREACH(d, &bdrv_drivers, list) {
272         if (d->bdrv_probe_device) {
273             score = d->bdrv_probe_device(filename);
274             if (score > score_max) {
275                 score_max = score;
276                 drv = d;
277             }
278         }
279     }
280 
281     return drv;
282 }
283 
284 static BlockDriver *find_protocol(const char *filename)
285 {
286     BlockDriver *drv1;
287     char protocol[128];
288     int len;
289     const char *p;
290 
291     /* TODO Drivers without bdrv_file_open must be specified explicitly */
292 
293 #ifdef _WIN32
294     if (is_windows_drive(filename) ||
295         is_windows_drive_prefix(filename))
296         return bdrv_find_format("file");
297 #endif
298     p = strchr(filename, ':');
299     if (!p) {
300         drv1 = find_hdev_driver(filename);
301         if (!drv1) {
302             drv1 = bdrv_find_format("file");
303         }
304         return drv1;
305     }
306     len = p - filename;
307     if (len > sizeof(protocol) - 1)
308         len = sizeof(protocol) - 1;
309     memcpy(protocol, filename, len);
310     protocol[len] = '\0';
311     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
312         if (drv1->protocol_name &&
313             !strcmp(drv1->protocol_name, protocol)) {
314             return drv1;
315         }
316     }
317     return NULL;
318 }
319 
320 static BlockDriver *find_image_format(const char *filename)
321 {
322     int ret, score, score_max;
323     BlockDriver *drv1, *drv;
324     uint8_t buf[2048];
325     BlockDriverState *bs;
326 
327     drv = find_protocol(filename);
328     /* no need to test disk image formats for vvfat */
329     if (drv && strcmp(drv->format_name, "vvfat") == 0)
330         return drv;
331 
332     ret = bdrv_file_open(&bs, filename, 0);
333     if (ret < 0)
334         return NULL;
335     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
336     bdrv_delete(bs);
337     if (ret < 0) {
338         return NULL;
339     }
340 
341     score_max = 0;
342     drv = NULL;
343     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
344         if (drv1->bdrv_probe) {
345             score = drv1->bdrv_probe(buf, ret, filename);
346             if (score > score_max) {
347                 score_max = score;
348                 drv = drv1;
349             }
350         }
351     }
352     return drv;
353 }
354 
355 /*
356  * Common part for opening disk images and files
357  */
358 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
359     int flags, BlockDriver *drv)
360 {
361     int ret, open_flags;
362 
363     assert(drv != NULL);
364 
365     bs->file = NULL;
366     bs->is_temporary = 0;
367     bs->encrypted = 0;
368     bs->valid_key = 0;
369     bs->open_flags = flags;
370     /* buffer_alignment defaulted to 512, drivers can change this value */
371     bs->buffer_alignment = 512;
372 
373     pstrcpy(bs->filename, sizeof(bs->filename), filename);
374 
375     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
376         return -ENOTSUP;
377     }
378 
379     bs->drv = drv;
380     bs->opaque = qemu_mallocz(drv->instance_size);
381 
382     /*
383      * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a
384      * write cache to the guest.  We do need the fdatasync to flush
385      * out transactions for block allocations, and we maybe have a
386      * volatile write cache in our backing device to deal with.
387      */
388     if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE))
389         bs->enable_write_cache = 1;
390 
391     /*
392      * Clear flags that are internal to the block layer before opening the
393      * image.
394      */
395     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
396 
397     /*
398      * Snapshots should be writeable.
399      */
400     if (bs->is_temporary) {
401         open_flags |= BDRV_O_RDWR;
402     }
403 
404     /* Open the image, either directly or using a protocol */
405     if (drv->bdrv_file_open) {
406         ret = drv->bdrv_file_open(bs, filename, open_flags);
407     } else {
408         ret = bdrv_file_open(&bs->file, filename, open_flags);
409         if (ret >= 0) {
410             ret = drv->bdrv_open(bs, open_flags);
411         }
412     }
413 
414     if (ret < 0) {
415         goto free_and_fail;
416     }
417 
418     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
419     if (drv->bdrv_getlength) {
420         bs->total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
421     }
422 #ifndef _WIN32
423     if (bs->is_temporary) {
424         unlink(filename);
425     }
426 #endif
427     return 0;
428 
429 free_and_fail:
430     if (bs->file) {
431         bdrv_delete(bs->file);
432         bs->file = NULL;
433     }
434     qemu_free(bs->opaque);
435     bs->opaque = NULL;
436     bs->drv = NULL;
437     return ret;
438 }
439 
440 /*
441  * Opens a file using a protocol (file, host_device, nbd, ...)
442  */
443 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
444 {
445     BlockDriverState *bs;
446     BlockDriver *drv;
447     int ret;
448 
449     drv = find_protocol(filename);
450     if (!drv) {
451         return -ENOENT;
452     }
453 
454     bs = bdrv_new("");
455     ret = bdrv_open_common(bs, filename, flags, drv);
456     if (ret < 0) {
457         bdrv_delete(bs);
458         return ret;
459     }
460     bs->growable = 1;
461     *pbs = bs;
462     return 0;
463 }
464 
465 /*
466  * Opens a disk image (raw, qcow2, vmdk, ...)
467  */
468 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
469               BlockDriver *drv)
470 {
471     int ret;
472 
473     if (flags & BDRV_O_SNAPSHOT) {
474         BlockDriverState *bs1;
475         int64_t total_size;
476         int is_protocol = 0;
477         BlockDriver *bdrv_qcow2;
478         QEMUOptionParameter *options;
479         char tmp_filename[PATH_MAX];
480         char backing_filename[PATH_MAX];
481 
482         /* if snapshot, we create a temporary backing file and open it
483            instead of opening 'filename' directly */
484 
485         /* if there is a backing file, use it */
486         bs1 = bdrv_new("");
487         ret = bdrv_open(bs1, filename, 0, drv);
488         if (ret < 0) {
489             bdrv_delete(bs1);
490             return ret;
491         }
492         total_size = bdrv_getlength(bs1) >> BDRV_SECTOR_BITS;
493 
494         if (bs1->drv && bs1->drv->protocol_name)
495             is_protocol = 1;
496 
497         bdrv_delete(bs1);
498 
499         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
500 
501         /* Real path is meaningless for protocols */
502         if (is_protocol)
503             snprintf(backing_filename, sizeof(backing_filename),
504                      "%s", filename);
505         else if (!realpath(filename, backing_filename))
506             return -errno;
507 
508         bdrv_qcow2 = bdrv_find_format("qcow2");
509         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
510 
511         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512);
512         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
513         if (drv) {
514             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
515                 drv->format_name);
516         }
517 
518         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
519         if (ret < 0) {
520             return ret;
521         }
522 
523         filename = tmp_filename;
524         drv = bdrv_qcow2;
525         bs->is_temporary = 1;
526     }
527 
528     /* Find the right image format driver */
529     if (!drv) {
530         drv = find_image_format(filename);
531     }
532 
533     if (!drv) {
534         ret = -ENOENT;
535         goto unlink_and_fail;
536     }
537 
538     /* Open the image */
539     ret = bdrv_open_common(bs, filename, flags, drv);
540     if (ret < 0) {
541         goto unlink_and_fail;
542     }
543 
544     /* If there is a backing file, use it */
545     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
546         char backing_filename[PATH_MAX];
547         int back_flags;
548         BlockDriver *back_drv = NULL;
549 
550         bs->backing_hd = bdrv_new("");
551         path_combine(backing_filename, sizeof(backing_filename),
552                      filename, bs->backing_file);
553         if (bs->backing_format[0] != '\0')
554             back_drv = bdrv_find_format(bs->backing_format);
555 
556         /* backing files always opened read-only */
557         back_flags =
558             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
559 
560         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
561         if (ret < 0) {
562             bdrv_close(bs);
563             return ret;
564         }
565         if (bs->is_temporary) {
566             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
567         } else {
568             /* base image inherits from "parent" */
569             bs->backing_hd->keep_read_only = bs->keep_read_only;
570         }
571     }
572 
573     if (!bdrv_key_required(bs)) {
574         /* call the change callback */
575         bs->media_changed = 1;
576         if (bs->change_cb)
577             bs->change_cb(bs->change_opaque);
578     }
579 
580     return 0;
581 
582 unlink_and_fail:
583     if (bs->is_temporary) {
584         unlink(filename);
585     }
586     return ret;
587 }
588 
589 void bdrv_close(BlockDriverState *bs)
590 {
591     if (bs->drv) {
592         if (bs->backing_hd)
593             bdrv_delete(bs->backing_hd);
594         bs->drv->bdrv_close(bs);
595         qemu_free(bs->opaque);
596 #ifdef _WIN32
597         if (bs->is_temporary) {
598             unlink(bs->filename);
599         }
600 #endif
601         bs->opaque = NULL;
602         bs->drv = NULL;
603 
604         if (bs->file != NULL) {
605             bdrv_close(bs->file);
606         }
607 
608         /* call the change callback */
609         bs->media_changed = 1;
610         if (bs->change_cb)
611             bs->change_cb(bs->change_opaque);
612     }
613 }
614 
615 void bdrv_delete(BlockDriverState *bs)
616 {
617     /* remove from list, if necessary */
618     if (bs->device_name[0] != '\0') {
619         QTAILQ_REMOVE(&bdrv_states, bs, list);
620     }
621 
622     bdrv_close(bs);
623     if (bs->file != NULL) {
624         bdrv_delete(bs->file);
625     }
626 
627     qemu_free(bs);
628 }
629 
630 /*
631  * Run consistency checks on an image
632  *
633  * Returns the number of errors or -errno when an internal error occurs
634  */
635 int bdrv_check(BlockDriverState *bs)
636 {
637     if (bs->drv->bdrv_check == NULL) {
638         return -ENOTSUP;
639     }
640 
641     return bs->drv->bdrv_check(bs);
642 }
643 
644 /* commit COW file into the raw image */
645 int bdrv_commit(BlockDriverState *bs)
646 {
647     BlockDriver *drv = bs->drv;
648     int64_t i, total_sectors;
649     int n, j, ro, open_flags;
650     int ret = 0, rw_ret = 0;
651     unsigned char sector[512];
652     char filename[1024];
653     BlockDriverState *bs_rw, *bs_ro;
654 
655     if (!drv)
656         return -ENOMEDIUM;
657 
658     if (!bs->backing_hd) {
659         return -ENOTSUP;
660     }
661 
662     if (bs->backing_hd->keep_read_only) {
663         return -EACCES;
664     }
665 
666     ro = bs->backing_hd->read_only;
667     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
668     open_flags =  bs->backing_hd->open_flags;
669 
670     if (ro) {
671         /* re-open as RW */
672         bdrv_delete(bs->backing_hd);
673         bs->backing_hd = NULL;
674         bs_rw = bdrv_new("");
675         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, NULL);
676         if (rw_ret < 0) {
677             bdrv_delete(bs_rw);
678             /* try to re-open read-only */
679             bs_ro = bdrv_new("");
680             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, NULL);
681             if (ret < 0) {
682                 bdrv_delete(bs_ro);
683                 /* drive not functional anymore */
684                 bs->drv = NULL;
685                 return ret;
686             }
687             bs->backing_hd = bs_ro;
688             return rw_ret;
689         }
690         bs->backing_hd = bs_rw;
691     }
692 
693     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
694     for (i = 0; i < total_sectors;) {
695         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
696             for(j = 0; j < n; j++) {
697                 if (bdrv_read(bs, i, sector, 1) != 0) {
698                     ret = -EIO;
699                     goto ro_cleanup;
700                 }
701 
702                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
703                     ret = -EIO;
704                     goto ro_cleanup;
705                 }
706                 i++;
707 	    }
708 	} else {
709             i += n;
710         }
711     }
712 
713     if (drv->bdrv_make_empty) {
714         ret = drv->bdrv_make_empty(bs);
715         bdrv_flush(bs);
716     }
717 
718     /*
719      * Make sure all data we wrote to the backing device is actually
720      * stable on disk.
721      */
722     if (bs->backing_hd)
723         bdrv_flush(bs->backing_hd);
724 
725 ro_cleanup:
726 
727     if (ro) {
728         /* re-open as RO */
729         bdrv_delete(bs->backing_hd);
730         bs->backing_hd = NULL;
731         bs_ro = bdrv_new("");
732         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, NULL);
733         if (ret < 0) {
734             bdrv_delete(bs_ro);
735             /* drive not functional anymore */
736             bs->drv = NULL;
737             return ret;
738         }
739         bs->backing_hd = bs_ro;
740         bs->backing_hd->keep_read_only = 0;
741     }
742 
743     return ret;
744 }
745 
746 /*
747  * Return values:
748  * 0        - success
749  * -EINVAL  - backing format specified, but no file
750  * -ENOSPC  - can't update the backing file because no space is left in the
751  *            image file header
752  * -ENOTSUP - format driver doesn't support changing the backing file
753  */
754 int bdrv_change_backing_file(BlockDriverState *bs,
755     const char *backing_file, const char *backing_fmt)
756 {
757     BlockDriver *drv = bs->drv;
758 
759     if (drv->bdrv_change_backing_file != NULL) {
760         return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
761     } else {
762         return -ENOTSUP;
763     }
764 }
765 
766 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
767                                    size_t size)
768 {
769     int64_t len;
770 
771     if (!bdrv_is_inserted(bs))
772         return -ENOMEDIUM;
773 
774     if (bs->growable)
775         return 0;
776 
777     len = bdrv_getlength(bs);
778 
779     if (offset < 0)
780         return -EIO;
781 
782     if ((offset > len) || (len - offset < size))
783         return -EIO;
784 
785     return 0;
786 }
787 
788 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
789                               int nb_sectors)
790 {
791     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
792 }
793 
794 /* return < 0 if error. See bdrv_write() for the return codes */
795 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
796               uint8_t *buf, int nb_sectors)
797 {
798     BlockDriver *drv = bs->drv;
799 
800     if (!drv)
801         return -ENOMEDIUM;
802     if (bdrv_check_request(bs, sector_num, nb_sectors))
803         return -EIO;
804 
805     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
806 }
807 
808 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
809                              int nb_sectors, int dirty)
810 {
811     int64_t start, end;
812     unsigned long val, idx, bit;
813 
814     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
815     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
816 
817     for (; start <= end; start++) {
818         idx = start / (sizeof(unsigned long) * 8);
819         bit = start % (sizeof(unsigned long) * 8);
820         val = bs->dirty_bitmap[idx];
821         if (dirty) {
822             if (!(val & (1 << bit))) {
823                 bs->dirty_count++;
824                 val |= 1 << bit;
825             }
826         } else {
827             if (val & (1 << bit)) {
828                 bs->dirty_count--;
829                 val &= ~(1 << bit);
830             }
831         }
832         bs->dirty_bitmap[idx] = val;
833     }
834 }
835 
836 /* Return < 0 if error. Important errors are:
837   -EIO         generic I/O error (may happen for all errors)
838   -ENOMEDIUM   No media inserted.
839   -EINVAL      Invalid sector number or nb_sectors
840   -EACCES      Trying to write a read-only device
841 */
842 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
843                const uint8_t *buf, int nb_sectors)
844 {
845     BlockDriver *drv = bs->drv;
846     if (!bs->drv)
847         return -ENOMEDIUM;
848     if (bs->read_only)
849         return -EACCES;
850     if (bdrv_check_request(bs, sector_num, nb_sectors))
851         return -EIO;
852 
853     if (bs->dirty_bitmap) {
854         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
855     }
856 
857     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
858 }
859 
860 int bdrv_pread(BlockDriverState *bs, int64_t offset,
861                void *buf, int count1)
862 {
863     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
864     int len, nb_sectors, count;
865     int64_t sector_num;
866     int ret;
867 
868     count = count1;
869     /* first read to align to sector start */
870     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
871     if (len > count)
872         len = count;
873     sector_num = offset >> BDRV_SECTOR_BITS;
874     if (len > 0) {
875         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
876             return ret;
877         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
878         count -= len;
879         if (count == 0)
880             return count1;
881         sector_num++;
882         buf += len;
883     }
884 
885     /* read the sectors "in place" */
886     nb_sectors = count >> BDRV_SECTOR_BITS;
887     if (nb_sectors > 0) {
888         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
889             return ret;
890         sector_num += nb_sectors;
891         len = nb_sectors << BDRV_SECTOR_BITS;
892         buf += len;
893         count -= len;
894     }
895 
896     /* add data from the last sector */
897     if (count > 0) {
898         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
899             return ret;
900         memcpy(buf, tmp_buf, count);
901     }
902     return count1;
903 }
904 
905 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
906                 const void *buf, int count1)
907 {
908     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
909     int len, nb_sectors, count;
910     int64_t sector_num;
911     int ret;
912 
913     count = count1;
914     /* first write to align to sector start */
915     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
916     if (len > count)
917         len = count;
918     sector_num = offset >> BDRV_SECTOR_BITS;
919     if (len > 0) {
920         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
921             return ret;
922         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
923         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
924             return ret;
925         count -= len;
926         if (count == 0)
927             return count1;
928         sector_num++;
929         buf += len;
930     }
931 
932     /* write the sectors "in place" */
933     nb_sectors = count >> BDRV_SECTOR_BITS;
934     if (nb_sectors > 0) {
935         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
936             return ret;
937         sector_num += nb_sectors;
938         len = nb_sectors << BDRV_SECTOR_BITS;
939         buf += len;
940         count -= len;
941     }
942 
943     /* add data from the last sector */
944     if (count > 0) {
945         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
946             return ret;
947         memcpy(tmp_buf, buf, count);
948         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
949             return ret;
950     }
951     return count1;
952 }
953 
954 /**
955  * Truncate file to 'offset' bytes (needed only for file protocols)
956  */
957 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
958 {
959     BlockDriver *drv = bs->drv;
960     if (!drv)
961         return -ENOMEDIUM;
962     if (!drv->bdrv_truncate)
963         return -ENOTSUP;
964     if (bs->read_only)
965         return -EACCES;
966     return drv->bdrv_truncate(bs, offset);
967 }
968 
969 /**
970  * Length of a file in bytes. Return < 0 if error or unknown.
971  */
972 int64_t bdrv_getlength(BlockDriverState *bs)
973 {
974     BlockDriver *drv = bs->drv;
975     if (!drv)
976         return -ENOMEDIUM;
977     if (!drv->bdrv_getlength) {
978         /* legacy mode */
979         return bs->total_sectors * BDRV_SECTOR_SIZE;
980     }
981     return drv->bdrv_getlength(bs);
982 }
983 
984 /* return 0 as number of sectors if no device present or error */
985 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
986 {
987     int64_t length;
988     length = bdrv_getlength(bs);
989     if (length < 0)
990         length = 0;
991     else
992         length = length >> BDRV_SECTOR_BITS;
993     *nb_sectors_ptr = length;
994 }
995 
996 struct partition {
997         uint8_t boot_ind;           /* 0x80 - active */
998         uint8_t head;               /* starting head */
999         uint8_t sector;             /* starting sector */
1000         uint8_t cyl;                /* starting cylinder */
1001         uint8_t sys_ind;            /* What partition type */
1002         uint8_t end_head;           /* end head */
1003         uint8_t end_sector;         /* end sector */
1004         uint8_t end_cyl;            /* end cylinder */
1005         uint32_t start_sect;        /* starting sector counting from 0 */
1006         uint32_t nr_sects;          /* nr of sectors in partition */
1007 } __attribute__((packed));
1008 
1009 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1010 static int guess_disk_lchs(BlockDriverState *bs,
1011                            int *pcylinders, int *pheads, int *psectors)
1012 {
1013     uint8_t buf[512];
1014     int ret, i, heads, sectors, cylinders;
1015     struct partition *p;
1016     uint32_t nr_sects;
1017     uint64_t nb_sectors;
1018 
1019     bdrv_get_geometry(bs, &nb_sectors);
1020 
1021     ret = bdrv_read(bs, 0, buf, 1);
1022     if (ret < 0)
1023         return -1;
1024     /* test msdos magic */
1025     if (buf[510] != 0x55 || buf[511] != 0xaa)
1026         return -1;
1027     for(i = 0; i < 4; i++) {
1028         p = ((struct partition *)(buf + 0x1be)) + i;
1029         nr_sects = le32_to_cpu(p->nr_sects);
1030         if (nr_sects && p->end_head) {
1031             /* We make the assumption that the partition terminates on
1032                a cylinder boundary */
1033             heads = p->end_head + 1;
1034             sectors = p->end_sector & 63;
1035             if (sectors == 0)
1036                 continue;
1037             cylinders = nb_sectors / (heads * sectors);
1038             if (cylinders < 1 || cylinders > 16383)
1039                 continue;
1040             *pheads = heads;
1041             *psectors = sectors;
1042             *pcylinders = cylinders;
1043 #if 0
1044             printf("guessed geometry: LCHS=%d %d %d\n",
1045                    cylinders, heads, sectors);
1046 #endif
1047             return 0;
1048         }
1049     }
1050     return -1;
1051 }
1052 
1053 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1054 {
1055     int translation, lba_detected = 0;
1056     int cylinders, heads, secs;
1057     uint64_t nb_sectors;
1058 
1059     /* if a geometry hint is available, use it */
1060     bdrv_get_geometry(bs, &nb_sectors);
1061     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1062     translation = bdrv_get_translation_hint(bs);
1063     if (cylinders != 0) {
1064         *pcyls = cylinders;
1065         *pheads = heads;
1066         *psecs = secs;
1067     } else {
1068         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1069             if (heads > 16) {
1070                 /* if heads > 16, it means that a BIOS LBA
1071                    translation was active, so the default
1072                    hardware geometry is OK */
1073                 lba_detected = 1;
1074                 goto default_geometry;
1075             } else {
1076                 *pcyls = cylinders;
1077                 *pheads = heads;
1078                 *psecs = secs;
1079                 /* disable any translation to be in sync with
1080                    the logical geometry */
1081                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1082                     bdrv_set_translation_hint(bs,
1083                                               BIOS_ATA_TRANSLATION_NONE);
1084                 }
1085             }
1086         } else {
1087         default_geometry:
1088             /* if no geometry, use a standard physical disk geometry */
1089             cylinders = nb_sectors / (16 * 63);
1090 
1091             if (cylinders > 16383)
1092                 cylinders = 16383;
1093             else if (cylinders < 2)
1094                 cylinders = 2;
1095             *pcyls = cylinders;
1096             *pheads = 16;
1097             *psecs = 63;
1098             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1099                 if ((*pcyls * *pheads) <= 131072) {
1100                     bdrv_set_translation_hint(bs,
1101                                               BIOS_ATA_TRANSLATION_LARGE);
1102                 } else {
1103                     bdrv_set_translation_hint(bs,
1104                                               BIOS_ATA_TRANSLATION_LBA);
1105                 }
1106             }
1107         }
1108         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1109     }
1110 }
1111 
1112 void bdrv_set_geometry_hint(BlockDriverState *bs,
1113                             int cyls, int heads, int secs)
1114 {
1115     bs->cyls = cyls;
1116     bs->heads = heads;
1117     bs->secs = secs;
1118 }
1119 
1120 void bdrv_set_type_hint(BlockDriverState *bs, int type)
1121 {
1122     bs->type = type;
1123     bs->removable = ((type == BDRV_TYPE_CDROM ||
1124                       type == BDRV_TYPE_FLOPPY));
1125 }
1126 
1127 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1128 {
1129     bs->translation = translation;
1130 }
1131 
1132 void bdrv_get_geometry_hint(BlockDriverState *bs,
1133                             int *pcyls, int *pheads, int *psecs)
1134 {
1135     *pcyls = bs->cyls;
1136     *pheads = bs->heads;
1137     *psecs = bs->secs;
1138 }
1139 
1140 int bdrv_get_type_hint(BlockDriverState *bs)
1141 {
1142     return bs->type;
1143 }
1144 
1145 int bdrv_get_translation_hint(BlockDriverState *bs)
1146 {
1147     return bs->translation;
1148 }
1149 
1150 int bdrv_is_removable(BlockDriverState *bs)
1151 {
1152     return bs->removable;
1153 }
1154 
1155 int bdrv_is_read_only(BlockDriverState *bs)
1156 {
1157     return bs->read_only;
1158 }
1159 
1160 int bdrv_is_sg(BlockDriverState *bs)
1161 {
1162     return bs->sg;
1163 }
1164 
1165 int bdrv_enable_write_cache(BlockDriverState *bs)
1166 {
1167     return bs->enable_write_cache;
1168 }
1169 
1170 /* XXX: no longer used */
1171 void bdrv_set_change_cb(BlockDriverState *bs,
1172                         void (*change_cb)(void *opaque), void *opaque)
1173 {
1174     bs->change_cb = change_cb;
1175     bs->change_opaque = opaque;
1176 }
1177 
1178 int bdrv_is_encrypted(BlockDriverState *bs)
1179 {
1180     if (bs->backing_hd && bs->backing_hd->encrypted)
1181         return 1;
1182     return bs->encrypted;
1183 }
1184 
1185 int bdrv_key_required(BlockDriverState *bs)
1186 {
1187     BlockDriverState *backing_hd = bs->backing_hd;
1188 
1189     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1190         return 1;
1191     return (bs->encrypted && !bs->valid_key);
1192 }
1193 
1194 int bdrv_set_key(BlockDriverState *bs, const char *key)
1195 {
1196     int ret;
1197     if (bs->backing_hd && bs->backing_hd->encrypted) {
1198         ret = bdrv_set_key(bs->backing_hd, key);
1199         if (ret < 0)
1200             return ret;
1201         if (!bs->encrypted)
1202             return 0;
1203     }
1204     if (!bs->encrypted) {
1205         return -EINVAL;
1206     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1207         return -ENOMEDIUM;
1208     }
1209     ret = bs->drv->bdrv_set_key(bs, key);
1210     if (ret < 0) {
1211         bs->valid_key = 0;
1212     } else if (!bs->valid_key) {
1213         bs->valid_key = 1;
1214         /* call the change callback now, we skipped it on open */
1215         bs->media_changed = 1;
1216         if (bs->change_cb)
1217             bs->change_cb(bs->change_opaque);
1218     }
1219     return ret;
1220 }
1221 
1222 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1223 {
1224     if (!bs->drv) {
1225         buf[0] = '\0';
1226     } else {
1227         pstrcpy(buf, buf_size, bs->drv->format_name);
1228     }
1229 }
1230 
1231 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1232                          void *opaque)
1233 {
1234     BlockDriver *drv;
1235 
1236     QLIST_FOREACH(drv, &bdrv_drivers, list) {
1237         it(opaque, drv->format_name);
1238     }
1239 }
1240 
1241 BlockDriverState *bdrv_find(const char *name)
1242 {
1243     BlockDriverState *bs;
1244 
1245     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1246         if (!strcmp(name, bs->device_name)) {
1247             return bs;
1248         }
1249     }
1250     return NULL;
1251 }
1252 
1253 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1254 {
1255     BlockDriverState *bs;
1256 
1257     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1258         it(opaque, bs);
1259     }
1260 }
1261 
1262 const char *bdrv_get_device_name(BlockDriverState *bs)
1263 {
1264     return bs->device_name;
1265 }
1266 
1267 void bdrv_flush(BlockDriverState *bs)
1268 {
1269     if (bs->drv && bs->drv->bdrv_flush)
1270         bs->drv->bdrv_flush(bs);
1271 }
1272 
1273 void bdrv_flush_all(void)
1274 {
1275     BlockDriverState *bs;
1276 
1277     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1278         if (bs->drv && !bdrv_is_read_only(bs) &&
1279             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1280             bdrv_flush(bs);
1281         }
1282     }
1283 }
1284 
1285 /*
1286  * Returns true iff the specified sector is present in the disk image. Drivers
1287  * not implementing the functionality are assumed to not support backing files,
1288  * hence all their sectors are reported as allocated.
1289  *
1290  * 'pnum' is set to the number of sectors (including and immediately following
1291  * the specified sector) that are known to be in the same
1292  * allocated/unallocated state.
1293  *
1294  * 'nb_sectors' is the max value 'pnum' should be set to.
1295  */
1296 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1297 	int *pnum)
1298 {
1299     int64_t n;
1300     if (!bs->drv->bdrv_is_allocated) {
1301         if (sector_num >= bs->total_sectors) {
1302             *pnum = 0;
1303             return 0;
1304         }
1305         n = bs->total_sectors - sector_num;
1306         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1307         return 1;
1308     }
1309     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1310 }
1311 
1312 void bdrv_mon_event(const BlockDriverState *bdrv,
1313                     BlockMonEventAction action, int is_read)
1314 {
1315     QObject *data;
1316     const char *action_str;
1317 
1318     switch (action) {
1319     case BDRV_ACTION_REPORT:
1320         action_str = "report";
1321         break;
1322     case BDRV_ACTION_IGNORE:
1323         action_str = "ignore";
1324         break;
1325     case BDRV_ACTION_STOP:
1326         action_str = "stop";
1327         break;
1328     default:
1329         abort();
1330     }
1331 
1332     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1333                               bdrv->device_name,
1334                               action_str,
1335                               is_read ? "read" : "write");
1336     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1337 
1338     qobject_decref(data);
1339 }
1340 
1341 static void bdrv_print_dict(QObject *obj, void *opaque)
1342 {
1343     QDict *bs_dict;
1344     Monitor *mon = opaque;
1345 
1346     bs_dict = qobject_to_qdict(obj);
1347 
1348     monitor_printf(mon, "%s: type=%s removable=%d",
1349                         qdict_get_str(bs_dict, "device"),
1350                         qdict_get_str(bs_dict, "type"),
1351                         qdict_get_bool(bs_dict, "removable"));
1352 
1353     if (qdict_get_bool(bs_dict, "removable")) {
1354         monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1355     }
1356 
1357     if (qdict_haskey(bs_dict, "inserted")) {
1358         QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1359 
1360         monitor_printf(mon, " file=");
1361         monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1362         if (qdict_haskey(qdict, "backing_file")) {
1363             monitor_printf(mon, " backing_file=");
1364             monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1365         }
1366         monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1367                             qdict_get_bool(qdict, "ro"),
1368                             qdict_get_str(qdict, "drv"),
1369                             qdict_get_bool(qdict, "encrypted"));
1370     } else {
1371         monitor_printf(mon, " [not inserted]");
1372     }
1373 
1374     monitor_printf(mon, "\n");
1375 }
1376 
1377 void bdrv_info_print(Monitor *mon, const QObject *data)
1378 {
1379     qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1380 }
1381 
1382 /**
1383  * bdrv_info(): Block devices information
1384  *
1385  * Each block device information is stored in a QDict and the
1386  * returned QObject is a QList of all devices.
1387  *
1388  * The QDict contains the following:
1389  *
1390  * - "device": device name
1391  * - "type": device type
1392  * - "removable": true if the device is removable, false otherwise
1393  * - "locked": true if the device is locked, false otherwise
1394  * - "inserted": only present if the device is inserted, it is a QDict
1395  *    containing the following:
1396  *          - "file": device file name
1397  *          - "ro": true if read-only, false otherwise
1398  *          - "drv": driver format name
1399  *          - "backing_file": backing file name if one is used
1400  *          - "encrypted": true if encrypted, false otherwise
1401  *
1402  * Example:
1403  *
1404  * [ { "device": "ide0-hd0", "type": "hd", "removable": false, "locked": false,
1405  *     "inserted": { "file": "/tmp/foobar", "ro": false, "drv": "qcow2" } },
1406  *   { "device": "floppy0", "type": "floppy", "removable": true,
1407  *     "locked": false } ]
1408  */
1409 void bdrv_info(Monitor *mon, QObject **ret_data)
1410 {
1411     QList *bs_list;
1412     BlockDriverState *bs;
1413 
1414     bs_list = qlist_new();
1415 
1416     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1417         QObject *bs_obj;
1418         const char *type = "unknown";
1419 
1420         switch(bs->type) {
1421         case BDRV_TYPE_HD:
1422             type = "hd";
1423             break;
1424         case BDRV_TYPE_CDROM:
1425             type = "cdrom";
1426             break;
1427         case BDRV_TYPE_FLOPPY:
1428             type = "floppy";
1429             break;
1430         }
1431 
1432         bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': %s, "
1433                                     "'removable': %i, 'locked': %i }",
1434                                     bs->device_name, type, bs->removable,
1435                                     bs->locked);
1436 
1437         if (bs->drv) {
1438             QObject *obj;
1439             QDict *bs_dict = qobject_to_qdict(bs_obj);
1440 
1441             obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1442                                      "'encrypted': %i }",
1443                                      bs->filename, bs->read_only,
1444                                      bs->drv->format_name,
1445                                      bdrv_is_encrypted(bs));
1446             if (bs->backing_file[0] != '\0') {
1447                 QDict *qdict = qobject_to_qdict(obj);
1448                 qdict_put(qdict, "backing_file",
1449                           qstring_from_str(bs->backing_file));
1450             }
1451 
1452             qdict_put_obj(bs_dict, "inserted", obj);
1453         }
1454         qlist_append_obj(bs_list, bs_obj);
1455     }
1456 
1457     *ret_data = QOBJECT(bs_list);
1458 }
1459 
1460 static void bdrv_stats_iter(QObject *data, void *opaque)
1461 {
1462     QDict *qdict;
1463     Monitor *mon = opaque;
1464 
1465     qdict = qobject_to_qdict(data);
1466     monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1467 
1468     qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1469     monitor_printf(mon, " rd_bytes=%" PRId64
1470                         " wr_bytes=%" PRId64
1471                         " rd_operations=%" PRId64
1472                         " wr_operations=%" PRId64
1473                         "\n",
1474                         qdict_get_int(qdict, "rd_bytes"),
1475                         qdict_get_int(qdict, "wr_bytes"),
1476                         qdict_get_int(qdict, "rd_operations"),
1477                         qdict_get_int(qdict, "wr_operations"));
1478 }
1479 
1480 void bdrv_stats_print(Monitor *mon, const QObject *data)
1481 {
1482     qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1483 }
1484 
1485 /**
1486  * bdrv_info_stats(): show block device statistics
1487  *
1488  * Each device statistic information is stored in a QDict and
1489  * the returned QObject is a QList of all devices.
1490  *
1491  * The QDict contains the following:
1492  *
1493  * - "device": device name
1494  * - "stats": A QDict with the statistics information, it contains:
1495  *     - "rd_bytes": bytes read
1496  *     - "wr_bytes": bytes written
1497  *     - "rd_operations": read operations
1498  *     - "wr_operations": write operations
1499  *
1500  * Example:
1501  *
1502  * [ { "device": "ide0-hd0",
1503  *               "stats": { "rd_bytes": 512,
1504  *                          "wr_bytes": 0,
1505  *                          "rd_operations": 1,
1506  *                          "wr_operations": 0 } },
1507  *   { "device": "ide1-cd0",
1508  *               "stats": { "rd_bytes": 0,
1509  *                          "wr_bytes": 0,
1510  *                          "rd_operations": 0,
1511  *                          "wr_operations": 0 } } ]
1512  */
1513 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1514 {
1515     QObject *obj;
1516     QList *devices;
1517     BlockDriverState *bs;
1518 
1519     devices = qlist_new();
1520 
1521     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1522         obj = qobject_from_jsonf("{ 'device': %s, 'stats': {"
1523                                  "'rd_bytes': %" PRId64 ","
1524                                  "'wr_bytes': %" PRId64 ","
1525                                  "'rd_operations': %" PRId64 ","
1526                                  "'wr_operations': %" PRId64
1527                                  "} }",
1528                                  bs->device_name,
1529                                  bs->rd_bytes, bs->wr_bytes,
1530                                  bs->rd_ops, bs->wr_ops);
1531         qlist_append_obj(devices, obj);
1532     }
1533 
1534     *ret_data = QOBJECT(devices);
1535 }
1536 
1537 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1538 {
1539     if (bs->backing_hd && bs->backing_hd->encrypted)
1540         return bs->backing_file;
1541     else if (bs->encrypted)
1542         return bs->filename;
1543     else
1544         return NULL;
1545 }
1546 
1547 void bdrv_get_backing_filename(BlockDriverState *bs,
1548                                char *filename, int filename_size)
1549 {
1550     if (!bs->backing_file) {
1551         pstrcpy(filename, filename_size, "");
1552     } else {
1553         pstrcpy(filename, filename_size, bs->backing_file);
1554     }
1555 }
1556 
1557 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1558                           const uint8_t *buf, int nb_sectors)
1559 {
1560     BlockDriver *drv = bs->drv;
1561     if (!drv)
1562         return -ENOMEDIUM;
1563     if (!drv->bdrv_write_compressed)
1564         return -ENOTSUP;
1565     if (bdrv_check_request(bs, sector_num, nb_sectors))
1566         return -EIO;
1567 
1568     if (bs->dirty_bitmap) {
1569         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1570     }
1571 
1572     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1573 }
1574 
1575 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1576 {
1577     BlockDriver *drv = bs->drv;
1578     if (!drv)
1579         return -ENOMEDIUM;
1580     if (!drv->bdrv_get_info)
1581         return -ENOTSUP;
1582     memset(bdi, 0, sizeof(*bdi));
1583     return drv->bdrv_get_info(bs, bdi);
1584 }
1585 
1586 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1587                       int64_t pos, int size)
1588 {
1589     BlockDriver *drv = bs->drv;
1590     if (!drv)
1591         return -ENOMEDIUM;
1592     if (!drv->bdrv_save_vmstate)
1593         return -ENOTSUP;
1594     return drv->bdrv_save_vmstate(bs, buf, pos, size);
1595 }
1596 
1597 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1598                       int64_t pos, int size)
1599 {
1600     BlockDriver *drv = bs->drv;
1601     if (!drv)
1602         return -ENOMEDIUM;
1603     if (!drv->bdrv_load_vmstate)
1604         return -ENOTSUP;
1605     return drv->bdrv_load_vmstate(bs, buf, pos, size);
1606 }
1607 
1608 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
1609 {
1610     BlockDriver *drv = bs->drv;
1611 
1612     if (!drv || !drv->bdrv_debug_event) {
1613         return;
1614     }
1615 
1616     return drv->bdrv_debug_event(bs, event);
1617 
1618 }
1619 
1620 /**************************************************************/
1621 /* handling of snapshots */
1622 
1623 int bdrv_snapshot_create(BlockDriverState *bs,
1624                          QEMUSnapshotInfo *sn_info)
1625 {
1626     BlockDriver *drv = bs->drv;
1627     if (!drv)
1628         return -ENOMEDIUM;
1629     if (!drv->bdrv_snapshot_create)
1630         return -ENOTSUP;
1631     return drv->bdrv_snapshot_create(bs, sn_info);
1632 }
1633 
1634 int bdrv_snapshot_goto(BlockDriverState *bs,
1635                        const char *snapshot_id)
1636 {
1637     BlockDriver *drv = bs->drv;
1638     if (!drv)
1639         return -ENOMEDIUM;
1640     if (!drv->bdrv_snapshot_goto)
1641         return -ENOTSUP;
1642     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1643 }
1644 
1645 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1646 {
1647     BlockDriver *drv = bs->drv;
1648     if (!drv)
1649         return -ENOMEDIUM;
1650     if (!drv->bdrv_snapshot_delete)
1651         return -ENOTSUP;
1652     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1653 }
1654 
1655 int bdrv_snapshot_list(BlockDriverState *bs,
1656                        QEMUSnapshotInfo **psn_info)
1657 {
1658     BlockDriver *drv = bs->drv;
1659     if (!drv)
1660         return -ENOMEDIUM;
1661     if (!drv->bdrv_snapshot_list)
1662         return -ENOTSUP;
1663     return drv->bdrv_snapshot_list(bs, psn_info);
1664 }
1665 
1666 #define NB_SUFFIXES 4
1667 
1668 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1669 {
1670     static const char suffixes[NB_SUFFIXES] = "KMGT";
1671     int64_t base;
1672     int i;
1673 
1674     if (size <= 999) {
1675         snprintf(buf, buf_size, "%" PRId64, size);
1676     } else {
1677         base = 1024;
1678         for(i = 0; i < NB_SUFFIXES; i++) {
1679             if (size < (10 * base)) {
1680                 snprintf(buf, buf_size, "%0.1f%c",
1681                          (double)size / base,
1682                          suffixes[i]);
1683                 break;
1684             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1685                 snprintf(buf, buf_size, "%" PRId64 "%c",
1686                          ((size + (base >> 1)) / base),
1687                          suffixes[i]);
1688                 break;
1689             }
1690             base = base * 1024;
1691         }
1692     }
1693     return buf;
1694 }
1695 
1696 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1697 {
1698     char buf1[128], date_buf[128], clock_buf[128];
1699 #ifdef _WIN32
1700     struct tm *ptm;
1701 #else
1702     struct tm tm;
1703 #endif
1704     time_t ti;
1705     int64_t secs;
1706 
1707     if (!sn) {
1708         snprintf(buf, buf_size,
1709                  "%-10s%-20s%7s%20s%15s",
1710                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1711     } else {
1712         ti = sn->date_sec;
1713 #ifdef _WIN32
1714         ptm = localtime(&ti);
1715         strftime(date_buf, sizeof(date_buf),
1716                  "%Y-%m-%d %H:%M:%S", ptm);
1717 #else
1718         localtime_r(&ti, &tm);
1719         strftime(date_buf, sizeof(date_buf),
1720                  "%Y-%m-%d %H:%M:%S", &tm);
1721 #endif
1722         secs = sn->vm_clock_nsec / 1000000000;
1723         snprintf(clock_buf, sizeof(clock_buf),
1724                  "%02d:%02d:%02d.%03d",
1725                  (int)(secs / 3600),
1726                  (int)((secs / 60) % 60),
1727                  (int)(secs % 60),
1728                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1729         snprintf(buf, buf_size,
1730                  "%-10s%-20s%7s%20s%15s",
1731                  sn->id_str, sn->name,
1732                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1733                  date_buf,
1734                  clock_buf);
1735     }
1736     return buf;
1737 }
1738 
1739 
1740 /**************************************************************/
1741 /* async I/Os */
1742 
1743 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1744                                  QEMUIOVector *qiov, int nb_sectors,
1745                                  BlockDriverCompletionFunc *cb, void *opaque)
1746 {
1747     BlockDriver *drv = bs->drv;
1748     BlockDriverAIOCB *ret;
1749 
1750     if (!drv)
1751         return NULL;
1752     if (bdrv_check_request(bs, sector_num, nb_sectors))
1753         return NULL;
1754 
1755     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1756                               cb, opaque);
1757 
1758     if (ret) {
1759 	/* Update stats even though technically transfer has not happened. */
1760 	bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1761 	bs->rd_ops ++;
1762     }
1763 
1764     return ret;
1765 }
1766 
1767 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1768                                   QEMUIOVector *qiov, int nb_sectors,
1769                                   BlockDriverCompletionFunc *cb, void *opaque)
1770 {
1771     BlockDriver *drv = bs->drv;
1772     BlockDriverAIOCB *ret;
1773 
1774     if (!drv)
1775         return NULL;
1776     if (bs->read_only)
1777         return NULL;
1778     if (bdrv_check_request(bs, sector_num, nb_sectors))
1779         return NULL;
1780 
1781     if (bs->dirty_bitmap) {
1782         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1783     }
1784 
1785     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1786                                cb, opaque);
1787 
1788     if (ret) {
1789 	/* Update stats even though technically transfer has not happened. */
1790 	bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1791 	bs->wr_ops ++;
1792     }
1793 
1794     return ret;
1795 }
1796 
1797 
1798 typedef struct MultiwriteCB {
1799     int error;
1800     int num_requests;
1801     int num_callbacks;
1802     struct {
1803         BlockDriverCompletionFunc *cb;
1804         void *opaque;
1805         QEMUIOVector *free_qiov;
1806         void *free_buf;
1807     } callbacks[];
1808 } MultiwriteCB;
1809 
1810 static void multiwrite_user_cb(MultiwriteCB *mcb)
1811 {
1812     int i;
1813 
1814     for (i = 0; i < mcb->num_callbacks; i++) {
1815         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1816         if (mcb->callbacks[i].free_qiov) {
1817             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
1818         }
1819         qemu_free(mcb->callbacks[i].free_qiov);
1820         qemu_vfree(mcb->callbacks[i].free_buf);
1821     }
1822 }
1823 
1824 static void multiwrite_cb(void *opaque, int ret)
1825 {
1826     MultiwriteCB *mcb = opaque;
1827 
1828     if (ret < 0 && !mcb->error) {
1829         mcb->error = ret;
1830         multiwrite_user_cb(mcb);
1831     }
1832 
1833     mcb->num_requests--;
1834     if (mcb->num_requests == 0) {
1835         if (mcb->error == 0) {
1836             multiwrite_user_cb(mcb);
1837         }
1838         qemu_free(mcb);
1839     }
1840 }
1841 
1842 static int multiwrite_req_compare(const void *a, const void *b)
1843 {
1844     return (((BlockRequest*) a)->sector - ((BlockRequest*) b)->sector);
1845 }
1846 
1847 /*
1848  * Takes a bunch of requests and tries to merge them. Returns the number of
1849  * requests that remain after merging.
1850  */
1851 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
1852     int num_reqs, MultiwriteCB *mcb)
1853 {
1854     int i, outidx;
1855 
1856     // Sort requests by start sector
1857     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
1858 
1859     // Check if adjacent requests touch the same clusters. If so, combine them,
1860     // filling up gaps with zero sectors.
1861     outidx = 0;
1862     for (i = 1; i < num_reqs; i++) {
1863         int merge = 0;
1864         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
1865 
1866         // This handles the cases that are valid for all block drivers, namely
1867         // exactly sequential writes and overlapping writes.
1868         if (reqs[i].sector <= oldreq_last) {
1869             merge = 1;
1870         }
1871 
1872         // The block driver may decide that it makes sense to combine requests
1873         // even if there is a gap of some sectors between them. In this case,
1874         // the gap is filled with zeros (therefore only applicable for yet
1875         // unused space in format like qcow2).
1876         if (!merge && bs->drv->bdrv_merge_requests) {
1877             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
1878         }
1879 
1880         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
1881             merge = 0;
1882         }
1883 
1884         if (merge) {
1885             size_t size;
1886             QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
1887             qemu_iovec_init(qiov,
1888                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
1889 
1890             // Add the first request to the merged one. If the requests are
1891             // overlapping, drop the last sectors of the first request.
1892             size = (reqs[i].sector - reqs[outidx].sector) << 9;
1893             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
1894 
1895             // We might need to add some zeros between the two requests
1896             if (reqs[i].sector > oldreq_last) {
1897                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
1898                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
1899                 memset(buf, 0, zero_bytes);
1900                 qemu_iovec_add(qiov, buf, zero_bytes);
1901                 mcb->callbacks[i].free_buf = buf;
1902             }
1903 
1904             // Add the second request
1905             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
1906 
1907             reqs[outidx].nb_sectors += reqs[i].nb_sectors;
1908             reqs[outidx].qiov = qiov;
1909 
1910             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
1911         } else {
1912             outidx++;
1913             reqs[outidx].sector     = reqs[i].sector;
1914             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
1915             reqs[outidx].qiov       = reqs[i].qiov;
1916         }
1917     }
1918 
1919     return outidx + 1;
1920 }
1921 
1922 /*
1923  * Submit multiple AIO write requests at once.
1924  *
1925  * On success, the function returns 0 and all requests in the reqs array have
1926  * been submitted. In error case this function returns -1, and any of the
1927  * requests may or may not be submitted yet. In particular, this means that the
1928  * callback will be called for some of the requests, for others it won't. The
1929  * caller must check the error field of the BlockRequest to wait for the right
1930  * callbacks (if error != 0, no callback will be called).
1931  *
1932  * The implementation may modify the contents of the reqs array, e.g. to merge
1933  * requests. However, the fields opaque and error are left unmodified as they
1934  * are used to signal failure for a single request to the caller.
1935  */
1936 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
1937 {
1938     BlockDriverAIOCB *acb;
1939     MultiwriteCB *mcb;
1940     int i;
1941 
1942     if (num_reqs == 0) {
1943         return 0;
1944     }
1945 
1946     // Create MultiwriteCB structure
1947     mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
1948     mcb->num_requests = 0;
1949     mcb->num_callbacks = num_reqs;
1950 
1951     for (i = 0; i < num_reqs; i++) {
1952         mcb->callbacks[i].cb = reqs[i].cb;
1953         mcb->callbacks[i].opaque = reqs[i].opaque;
1954     }
1955 
1956     // Check for mergable requests
1957     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
1958 
1959     // Run the aio requests
1960     for (i = 0; i < num_reqs; i++) {
1961         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
1962             reqs[i].nb_sectors, multiwrite_cb, mcb);
1963 
1964         if (acb == NULL) {
1965             // We can only fail the whole thing if no request has been
1966             // submitted yet. Otherwise we'll wait for the submitted AIOs to
1967             // complete and report the error in the callback.
1968             if (mcb->num_requests == 0) {
1969                 reqs[i].error = -EIO;
1970                 goto fail;
1971             } else {
1972                 mcb->num_requests++;
1973                 multiwrite_cb(mcb, -EIO);
1974                 break;
1975             }
1976         } else {
1977             mcb->num_requests++;
1978         }
1979     }
1980 
1981     return 0;
1982 
1983 fail:
1984     free(mcb);
1985     return -1;
1986 }
1987 
1988 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
1989         BlockDriverCompletionFunc *cb, void *opaque)
1990 {
1991     BlockDriver *drv = bs->drv;
1992 
1993     if (!drv)
1994         return NULL;
1995     return drv->bdrv_aio_flush(bs, cb, opaque);
1996 }
1997 
1998 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1999 {
2000     acb->pool->cancel(acb);
2001 }
2002 
2003 
2004 /**************************************************************/
2005 /* async block device emulation */
2006 
2007 typedef struct BlockDriverAIOCBSync {
2008     BlockDriverAIOCB common;
2009     QEMUBH *bh;
2010     int ret;
2011     /* vector translation state */
2012     QEMUIOVector *qiov;
2013     uint8_t *bounce;
2014     int is_write;
2015 } BlockDriverAIOCBSync;
2016 
2017 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2018 {
2019     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
2020     qemu_bh_delete(acb->bh);
2021     acb->bh = NULL;
2022     qemu_aio_release(acb);
2023 }
2024 
2025 static AIOPool bdrv_em_aio_pool = {
2026     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2027     .cancel             = bdrv_aio_cancel_em,
2028 };
2029 
2030 static void bdrv_aio_bh_cb(void *opaque)
2031 {
2032     BlockDriverAIOCBSync *acb = opaque;
2033 
2034     if (!acb->is_write)
2035         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2036     qemu_vfree(acb->bounce);
2037     acb->common.cb(acb->common.opaque, acb->ret);
2038     qemu_bh_delete(acb->bh);
2039     acb->bh = NULL;
2040     qemu_aio_release(acb);
2041 }
2042 
2043 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2044                                             int64_t sector_num,
2045                                             QEMUIOVector *qiov,
2046                                             int nb_sectors,
2047                                             BlockDriverCompletionFunc *cb,
2048                                             void *opaque,
2049                                             int is_write)
2050 
2051 {
2052     BlockDriverAIOCBSync *acb;
2053 
2054     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2055     acb->is_write = is_write;
2056     acb->qiov = qiov;
2057     acb->bounce = qemu_blockalign(bs, qiov->size);
2058 
2059     if (!acb->bh)
2060         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2061 
2062     if (is_write) {
2063         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2064         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2065     } else {
2066         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2067     }
2068 
2069     qemu_bh_schedule(acb->bh);
2070 
2071     return &acb->common;
2072 }
2073 
2074 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2075         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2076         BlockDriverCompletionFunc *cb, void *opaque)
2077 {
2078     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2079 }
2080 
2081 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2082         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2083         BlockDriverCompletionFunc *cb, void *opaque)
2084 {
2085     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2086 }
2087 
2088 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2089         BlockDriverCompletionFunc *cb, void *opaque)
2090 {
2091     BlockDriverAIOCBSync *acb;
2092 
2093     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2094     acb->is_write = 1; /* don't bounce in the completion hadler */
2095     acb->qiov = NULL;
2096     acb->bounce = NULL;
2097     acb->ret = 0;
2098 
2099     if (!acb->bh)
2100         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2101 
2102     bdrv_flush(bs);
2103     qemu_bh_schedule(acb->bh);
2104     return &acb->common;
2105 }
2106 
2107 /**************************************************************/
2108 /* sync block device emulation */
2109 
2110 static void bdrv_rw_em_cb(void *opaque, int ret)
2111 {
2112     *(int *)opaque = ret;
2113 }
2114 
2115 #define NOT_DONE 0x7fffffff
2116 
2117 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2118                         uint8_t *buf, int nb_sectors)
2119 {
2120     int async_ret;
2121     BlockDriverAIOCB *acb;
2122     struct iovec iov;
2123     QEMUIOVector qiov;
2124 
2125     async_context_push();
2126 
2127     async_ret = NOT_DONE;
2128     iov.iov_base = (void *)buf;
2129     iov.iov_len = nb_sectors * 512;
2130     qemu_iovec_init_external(&qiov, &iov, 1);
2131     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2132         bdrv_rw_em_cb, &async_ret);
2133     if (acb == NULL) {
2134         async_ret = -1;
2135         goto fail;
2136     }
2137 
2138     while (async_ret == NOT_DONE) {
2139         qemu_aio_wait();
2140     }
2141 
2142 
2143 fail:
2144     async_context_pop();
2145     return async_ret;
2146 }
2147 
2148 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2149                          const uint8_t *buf, int nb_sectors)
2150 {
2151     int async_ret;
2152     BlockDriverAIOCB *acb;
2153     struct iovec iov;
2154     QEMUIOVector qiov;
2155 
2156     async_context_push();
2157 
2158     async_ret = NOT_DONE;
2159     iov.iov_base = (void *)buf;
2160     iov.iov_len = nb_sectors * 512;
2161     qemu_iovec_init_external(&qiov, &iov, 1);
2162     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2163         bdrv_rw_em_cb, &async_ret);
2164     if (acb == NULL) {
2165         async_ret = -1;
2166         goto fail;
2167     }
2168     while (async_ret == NOT_DONE) {
2169         qemu_aio_wait();
2170     }
2171 
2172 fail:
2173     async_context_pop();
2174     return async_ret;
2175 }
2176 
2177 void bdrv_init(void)
2178 {
2179     module_call_init(MODULE_INIT_BLOCK);
2180 }
2181 
2182 void bdrv_init_with_whitelist(void)
2183 {
2184     use_bdrv_whitelist = 1;
2185     bdrv_init();
2186 }
2187 
2188 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2189                    BlockDriverCompletionFunc *cb, void *opaque)
2190 {
2191     BlockDriverAIOCB *acb;
2192 
2193     if (pool->free_aiocb) {
2194         acb = pool->free_aiocb;
2195         pool->free_aiocb = acb->next;
2196     } else {
2197         acb = qemu_mallocz(pool->aiocb_size);
2198         acb->pool = pool;
2199     }
2200     acb->bs = bs;
2201     acb->cb = cb;
2202     acb->opaque = opaque;
2203     return acb;
2204 }
2205 
2206 void qemu_aio_release(void *p)
2207 {
2208     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2209     AIOPool *pool = acb->pool;
2210     acb->next = pool->free_aiocb;
2211     pool->free_aiocb = acb;
2212 }
2213 
2214 /**************************************************************/
2215 /* removable device support */
2216 
2217 /**
2218  * Return TRUE if the media is present
2219  */
2220 int bdrv_is_inserted(BlockDriverState *bs)
2221 {
2222     BlockDriver *drv = bs->drv;
2223     int ret;
2224     if (!drv)
2225         return 0;
2226     if (!drv->bdrv_is_inserted)
2227         return 1;
2228     ret = drv->bdrv_is_inserted(bs);
2229     return ret;
2230 }
2231 
2232 /**
2233  * Return TRUE if the media changed since the last call to this
2234  * function. It is currently only used for floppy disks
2235  */
2236 int bdrv_media_changed(BlockDriverState *bs)
2237 {
2238     BlockDriver *drv = bs->drv;
2239     int ret;
2240 
2241     if (!drv || !drv->bdrv_media_changed)
2242         ret = -ENOTSUP;
2243     else
2244         ret = drv->bdrv_media_changed(bs);
2245     if (ret == -ENOTSUP)
2246         ret = bs->media_changed;
2247     bs->media_changed = 0;
2248     return ret;
2249 }
2250 
2251 /**
2252  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2253  */
2254 int bdrv_eject(BlockDriverState *bs, int eject_flag)
2255 {
2256     BlockDriver *drv = bs->drv;
2257     int ret;
2258 
2259     if (bs->locked) {
2260         return -EBUSY;
2261     }
2262 
2263     if (!drv || !drv->bdrv_eject) {
2264         ret = -ENOTSUP;
2265     } else {
2266         ret = drv->bdrv_eject(bs, eject_flag);
2267     }
2268     if (ret == -ENOTSUP) {
2269         if (eject_flag)
2270             bdrv_close(bs);
2271         ret = 0;
2272     }
2273 
2274     return ret;
2275 }
2276 
2277 int bdrv_is_locked(BlockDriverState *bs)
2278 {
2279     return bs->locked;
2280 }
2281 
2282 /**
2283  * Lock or unlock the media (if it is locked, the user won't be able
2284  * to eject it manually).
2285  */
2286 void bdrv_set_locked(BlockDriverState *bs, int locked)
2287 {
2288     BlockDriver *drv = bs->drv;
2289 
2290     bs->locked = locked;
2291     if (drv && drv->bdrv_set_locked) {
2292         drv->bdrv_set_locked(bs, locked);
2293     }
2294 }
2295 
2296 /* needed for generic scsi interface */
2297 
2298 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2299 {
2300     BlockDriver *drv = bs->drv;
2301 
2302     if (drv && drv->bdrv_ioctl)
2303         return drv->bdrv_ioctl(bs, req, buf);
2304     return -ENOTSUP;
2305 }
2306 
2307 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2308         unsigned long int req, void *buf,
2309         BlockDriverCompletionFunc *cb, void *opaque)
2310 {
2311     BlockDriver *drv = bs->drv;
2312 
2313     if (drv && drv->bdrv_aio_ioctl)
2314         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2315     return NULL;
2316 }
2317 
2318 
2319 
2320 void *qemu_blockalign(BlockDriverState *bs, size_t size)
2321 {
2322     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2323 }
2324 
2325 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2326 {
2327     int64_t bitmap_size;
2328 
2329     bs->dirty_count = 0;
2330     if (enable) {
2331         if (!bs->dirty_bitmap) {
2332             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
2333                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
2334             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
2335 
2336             bs->dirty_bitmap = qemu_mallocz(bitmap_size);
2337         }
2338     } else {
2339         if (bs->dirty_bitmap) {
2340             qemu_free(bs->dirty_bitmap);
2341             bs->dirty_bitmap = NULL;
2342         }
2343     }
2344 }
2345 
2346 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
2347 {
2348     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
2349 
2350     if (bs->dirty_bitmap &&
2351         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
2352         return bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
2353             (1 << (chunk % (sizeof(unsigned long) * 8)));
2354     } else {
2355         return 0;
2356     }
2357 }
2358 
2359 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
2360                       int nr_sectors)
2361 {
2362     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2363 }
2364 
2365 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
2366 {
2367     return bs->dirty_count;
2368 }
2369