xref: /openbmc/qemu/block.c (revision b6ce07aa83bdee3cfd2610f270a0ce304e78df95)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "monitor.h"
27 #include "block_int.h"
28 #include "module.h"
29 #include "qemu-objects.h"
30 
31 #ifdef CONFIG_BSD
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/ioctl.h>
35 #include <sys/queue.h>
36 #ifndef __DragonFly__
37 #include <sys/disk.h>
38 #endif
39 #endif
40 
41 #ifdef _WIN32
42 #include <windows.h>
43 #endif
44 
45 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
46     int flags, BlockDriver *drv);
47 
48 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
49         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50         BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
52         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
53         BlockDriverCompletionFunc *cb, void *opaque);
54 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
55         BlockDriverCompletionFunc *cb, void *opaque);
56 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
57                         uint8_t *buf, int nb_sectors);
58 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
59                          const uint8_t *buf, int nb_sectors);
60 static BlockDriver *find_protocol(const char *filename);
61 
62 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
63     QTAILQ_HEAD_INITIALIZER(bdrv_states);
64 
65 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
66     QLIST_HEAD_INITIALIZER(bdrv_drivers);
67 
68 /* If non-zero, use only whitelisted block drivers */
69 static int use_bdrv_whitelist;
70 
71 int path_is_absolute(const char *path)
72 {
73     const char *p;
74 #ifdef _WIN32
75     /* specific case for names like: "\\.\d:" */
76     if (*path == '/' || *path == '\\')
77         return 1;
78 #endif
79     p = strchr(path, ':');
80     if (p)
81         p++;
82     else
83         p = path;
84 #ifdef _WIN32
85     return (*p == '/' || *p == '\\');
86 #else
87     return (*p == '/');
88 #endif
89 }
90 
91 /* if filename is absolute, just copy it to dest. Otherwise, build a
92    path to it by considering it is relative to base_path. URL are
93    supported. */
94 void path_combine(char *dest, int dest_size,
95                   const char *base_path,
96                   const char *filename)
97 {
98     const char *p, *p1;
99     int len;
100 
101     if (dest_size <= 0)
102         return;
103     if (path_is_absolute(filename)) {
104         pstrcpy(dest, dest_size, filename);
105     } else {
106         p = strchr(base_path, ':');
107         if (p)
108             p++;
109         else
110             p = base_path;
111         p1 = strrchr(base_path, '/');
112 #ifdef _WIN32
113         {
114             const char *p2;
115             p2 = strrchr(base_path, '\\');
116             if (!p1 || p2 > p1)
117                 p1 = p2;
118         }
119 #endif
120         if (p1)
121             p1++;
122         else
123             p1 = base_path;
124         if (p1 > p)
125             p = p1;
126         len = p - base_path;
127         if (len > dest_size - 1)
128             len = dest_size - 1;
129         memcpy(dest, base_path, len);
130         dest[len] = '\0';
131         pstrcat(dest, dest_size, filename);
132     }
133 }
134 
135 void bdrv_register(BlockDriver *bdrv)
136 {
137     if (!bdrv->bdrv_aio_readv) {
138         /* add AIO emulation layer */
139         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
140         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
141     } else if (!bdrv->bdrv_read) {
142         /* add synchronous IO emulation layer */
143         bdrv->bdrv_read = bdrv_read_em;
144         bdrv->bdrv_write = bdrv_write_em;
145     }
146 
147     if (!bdrv->bdrv_aio_flush)
148         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
149 
150     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
151 }
152 
153 /* create a new block device (by default it is empty) */
154 BlockDriverState *bdrv_new(const char *device_name)
155 {
156     BlockDriverState *bs;
157 
158     bs = qemu_mallocz(sizeof(BlockDriverState));
159     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
160     if (device_name[0] != '\0') {
161         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
162     }
163     return bs;
164 }
165 
166 BlockDriver *bdrv_find_format(const char *format_name)
167 {
168     BlockDriver *drv1;
169     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
170         if (!strcmp(drv1->format_name, format_name)) {
171             return drv1;
172         }
173     }
174     return NULL;
175 }
176 
177 static int bdrv_is_whitelisted(BlockDriver *drv)
178 {
179     static const char *whitelist[] = {
180         CONFIG_BDRV_WHITELIST
181     };
182     const char **p;
183 
184     if (!whitelist[0])
185         return 1;               /* no whitelist, anything goes */
186 
187     for (p = whitelist; *p; p++) {
188         if (!strcmp(drv->format_name, *p)) {
189             return 1;
190         }
191     }
192     return 0;
193 }
194 
195 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
196 {
197     BlockDriver *drv = bdrv_find_format(format_name);
198     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
199 }
200 
201 int bdrv_create(BlockDriver *drv, const char* filename,
202     QEMUOptionParameter *options)
203 {
204     if (!drv->bdrv_create)
205         return -ENOTSUP;
206 
207     return drv->bdrv_create(filename, options);
208 }
209 
210 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
211 {
212     BlockDriver *drv;
213 
214     drv = find_protocol(filename);
215     if (drv == NULL) {
216         drv = bdrv_find_format("file");
217     }
218 
219     return bdrv_create(drv, filename, options);
220 }
221 
222 #ifdef _WIN32
223 void get_tmp_filename(char *filename, int size)
224 {
225     char temp_dir[MAX_PATH];
226 
227     GetTempPath(MAX_PATH, temp_dir);
228     GetTempFileName(temp_dir, "qem", 0, filename);
229 }
230 #else
231 void get_tmp_filename(char *filename, int size)
232 {
233     int fd;
234     const char *tmpdir;
235     /* XXX: race condition possible */
236     tmpdir = getenv("TMPDIR");
237     if (!tmpdir)
238         tmpdir = "/tmp";
239     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
240     fd = mkstemp(filename);
241     close(fd);
242 }
243 #endif
244 
245 #ifdef _WIN32
246 static int is_windows_drive_prefix(const char *filename)
247 {
248     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
249              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
250             filename[1] == ':');
251 }
252 
253 int is_windows_drive(const char *filename)
254 {
255     if (is_windows_drive_prefix(filename) &&
256         filename[2] == '\0')
257         return 1;
258     if (strstart(filename, "\\\\.\\", NULL) ||
259         strstart(filename, "//./", NULL))
260         return 1;
261     return 0;
262 }
263 #endif
264 
265 /*
266  * Detect host devices. By convention, /dev/cdrom[N] is always
267  * recognized as a host CDROM.
268  */
269 static BlockDriver *find_hdev_driver(const char *filename)
270 {
271     int score_max = 0, score;
272     BlockDriver *drv = NULL, *d;
273 
274     QLIST_FOREACH(d, &bdrv_drivers, list) {
275         if (d->bdrv_probe_device) {
276             score = d->bdrv_probe_device(filename);
277             if (score > score_max) {
278                 score_max = score;
279                 drv = d;
280             }
281         }
282     }
283 
284     return drv;
285 }
286 
287 static BlockDriver *find_protocol(const char *filename)
288 {
289     BlockDriver *drv1;
290     char protocol[128];
291     int len;
292     const char *p;
293 
294 #ifdef _WIN32
295     if (is_windows_drive(filename) ||
296         is_windows_drive_prefix(filename))
297         return bdrv_find_format("file");
298 #endif
299     p = strchr(filename, ':');
300     if (!p) {
301         drv1 = find_hdev_driver(filename);
302         if (!drv1) {
303             drv1 = bdrv_find_format("file");
304         }
305         return drv1;
306     }
307     len = p - filename;
308     if (len > sizeof(protocol) - 1)
309         len = sizeof(protocol) - 1;
310     memcpy(protocol, filename, len);
311     protocol[len] = '\0';
312     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
313         if (drv1->protocol_name &&
314             !strcmp(drv1->protocol_name, protocol)) {
315             return drv1;
316         }
317     }
318     return NULL;
319 }
320 
321 static BlockDriver *find_image_format(const char *filename)
322 {
323     int ret, score, score_max;
324     BlockDriver *drv1, *drv;
325     uint8_t buf[2048];
326     BlockDriverState *bs;
327 
328     drv = find_protocol(filename);
329     /* no need to test disk image formats for vvfat */
330     if (drv && strcmp(drv->format_name, "vvfat") == 0)
331         return drv;
332 
333     ret = bdrv_file_open(&bs, filename, 0);
334     if (ret < 0)
335         return NULL;
336     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
337     bdrv_delete(bs);
338     if (ret < 0) {
339         return NULL;
340     }
341 
342     score_max = 0;
343     drv = NULL;
344     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
345         if (drv1->bdrv_probe) {
346             score = drv1->bdrv_probe(buf, ret, filename);
347             if (score > score_max) {
348                 score_max = score;
349                 drv = drv1;
350             }
351         }
352     }
353     return drv;
354 }
355 
356 /*
357  * Opens a file using a protocol (file, host_device, nbd, ...)
358  */
359 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
360 {
361     BlockDriverState *bs;
362     BlockDriver *drv;
363     int ret;
364 
365     drv = find_protocol(filename);
366     if (!drv) {
367         return -ENOENT;
368     }
369 
370     bs = bdrv_new("");
371     ret = bdrv_open_common(bs, filename, flags, drv);
372     if (ret < 0) {
373         bdrv_delete(bs);
374         return ret;
375     }
376     bs->growable = 1;
377     *pbs = bs;
378     return 0;
379 }
380 
381 /*
382  * Opens a disk image (raw, qcow2, vmdk, ...)
383  */
384 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
385               BlockDriver *drv)
386 {
387     int ret;
388 
389     if (flags & BDRV_O_SNAPSHOT) {
390         BlockDriverState *bs1;
391         int64_t total_size;
392         int is_protocol = 0;
393         BlockDriver *bdrv_qcow2;
394         QEMUOptionParameter *options;
395         char tmp_filename[PATH_MAX];
396         char backing_filename[PATH_MAX];
397 
398         /* if snapshot, we create a temporary backing file and open it
399            instead of opening 'filename' directly */
400 
401         /* if there is a backing file, use it */
402         bs1 = bdrv_new("");
403         ret = bdrv_open(bs1, filename, 0, drv);
404         if (ret < 0) {
405             bdrv_delete(bs1);
406             return ret;
407         }
408         total_size = bdrv_getlength(bs1) >> BDRV_SECTOR_BITS;
409 
410         if (bs1->drv && bs1->drv->protocol_name)
411             is_protocol = 1;
412 
413         bdrv_delete(bs1);
414 
415         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
416 
417         /* Real path is meaningless for protocols */
418         if (is_protocol)
419             snprintf(backing_filename, sizeof(backing_filename),
420                      "%s", filename);
421         else if (!realpath(filename, backing_filename))
422             return -errno;
423 
424         bdrv_qcow2 = bdrv_find_format("qcow2");
425         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
426 
427         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512);
428         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
429         if (drv) {
430             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
431                 drv->format_name);
432         }
433 
434         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
435         if (ret < 0) {
436             return ret;
437         }
438 
439         filename = tmp_filename;
440         drv = bdrv_qcow2;
441         bs->is_temporary = 1;
442     }
443 
444     /* Find the right image format driver */
445     if (!drv) {
446         drv = find_image_format(filename);
447     }
448 
449     if (!drv) {
450         ret = -ENOENT;
451         goto unlink_and_fail;
452     }
453 
454     /* Open the image */
455     ret = bdrv_open_common(bs, filename, flags, drv);
456     if (ret < 0) {
457         goto unlink_and_fail;
458     }
459 
460     /* If there is a backing file, use it */
461     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
462         char backing_filename[PATH_MAX];
463         int back_flags;
464         BlockDriver *back_drv = NULL;
465 
466         bs->backing_hd = bdrv_new("");
467         path_combine(backing_filename, sizeof(backing_filename),
468                      filename, bs->backing_file);
469         if (bs->backing_format[0] != '\0')
470             back_drv = bdrv_find_format(bs->backing_format);
471 
472         /* backing files always opened read-only */
473         back_flags =
474             flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
475 
476         ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
477         if (ret < 0) {
478             bdrv_close(bs);
479             return ret;
480         }
481         if (bs->is_temporary) {
482             bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
483         } else {
484             /* base image inherits from "parent" */
485             bs->backing_hd->keep_read_only = bs->keep_read_only;
486         }
487     }
488 
489     if (!bdrv_key_required(bs)) {
490         /* call the change callback */
491         bs->media_changed = 1;
492         if (bs->change_cb)
493             bs->change_cb(bs->change_opaque);
494     }
495 
496     return 0;
497 
498 unlink_and_fail:
499     if (bs->is_temporary) {
500         unlink(filename);
501     }
502     return ret;
503 }
504 
505 /*
506  * Common part for opening disk images and files
507  */
508 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
509     int flags, BlockDriver *drv)
510 {
511     int ret, open_flags;
512 
513     assert(drv != NULL);
514 
515     bs->is_temporary = 0;
516     bs->encrypted = 0;
517     bs->valid_key = 0;
518     bs->open_flags = flags;
519     /* buffer_alignment defaulted to 512, drivers can change this value */
520     bs->buffer_alignment = 512;
521 
522     pstrcpy(bs->filename, sizeof(bs->filename), filename);
523 
524     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
525         return -ENOTSUP;
526     }
527 
528     bs->drv = drv;
529     bs->opaque = qemu_mallocz(drv->instance_size);
530 
531     /*
532      * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a
533      * write cache to the guest.  We do need the fdatasync to flush
534      * out transactions for block allocations, and we maybe have a
535      * volatile write cache in our backing device to deal with.
536      */
537     if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE))
538         bs->enable_write_cache = 1;
539 
540     /*
541      * Clear flags that are internal to the block layer before opening the
542      * image.
543      */
544     open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
545 
546     /*
547      * Snapshots should be writeable.
548      */
549     if (bs->is_temporary) {
550         open_flags |= BDRV_O_RDWR;
551     }
552 
553     ret = drv->bdrv_open(bs, filename, open_flags);
554     if (ret < 0) {
555         goto free_and_fail;
556     }
557 
558     bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
559     if (drv->bdrv_getlength) {
560         bs->total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
561     }
562 #ifndef _WIN32
563     if (bs->is_temporary) {
564         unlink(filename);
565     }
566 #endif
567     return 0;
568 
569 free_and_fail:
570     qemu_free(bs->opaque);
571     bs->opaque = NULL;
572     bs->drv = NULL;
573     return ret;
574 }
575 
576 void bdrv_close(BlockDriverState *bs)
577 {
578     if (bs->drv) {
579         if (bs->backing_hd)
580             bdrv_delete(bs->backing_hd);
581         bs->drv->bdrv_close(bs);
582         qemu_free(bs->opaque);
583 #ifdef _WIN32
584         if (bs->is_temporary) {
585             unlink(bs->filename);
586         }
587 #endif
588         bs->opaque = NULL;
589         bs->drv = NULL;
590 
591         /* call the change callback */
592         bs->media_changed = 1;
593         if (bs->change_cb)
594             bs->change_cb(bs->change_opaque);
595     }
596 }
597 
598 void bdrv_delete(BlockDriverState *bs)
599 {
600     /* remove from list, if necessary */
601     if (bs->device_name[0] != '\0') {
602         QTAILQ_REMOVE(&bdrv_states, bs, list);
603     }
604 
605     bdrv_close(bs);
606     qemu_free(bs);
607 }
608 
609 /*
610  * Run consistency checks on an image
611  *
612  * Returns the number of errors or -errno when an internal error occurs
613  */
614 int bdrv_check(BlockDriverState *bs)
615 {
616     if (bs->drv->bdrv_check == NULL) {
617         return -ENOTSUP;
618     }
619 
620     return bs->drv->bdrv_check(bs);
621 }
622 
623 /* commit COW file into the raw image */
624 int bdrv_commit(BlockDriverState *bs)
625 {
626     BlockDriver *drv = bs->drv;
627     int64_t i, total_sectors;
628     int n, j, ro, open_flags;
629     int ret = 0, rw_ret = 0;
630     unsigned char sector[512];
631     char filename[1024];
632     BlockDriverState *bs_rw, *bs_ro;
633 
634     if (!drv)
635         return -ENOMEDIUM;
636 
637     if (!bs->backing_hd) {
638         return -ENOTSUP;
639     }
640 
641     if (bs->backing_hd->keep_read_only) {
642         return -EACCES;
643     }
644 
645     ro = bs->backing_hd->read_only;
646     strncpy(filename, bs->backing_hd->filename, sizeof(filename));
647     open_flags =  bs->backing_hd->open_flags;
648 
649     if (ro) {
650         /* re-open as RW */
651         bdrv_delete(bs->backing_hd);
652         bs->backing_hd = NULL;
653         bs_rw = bdrv_new("");
654         rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR, NULL);
655         if (rw_ret < 0) {
656             bdrv_delete(bs_rw);
657             /* try to re-open read-only */
658             bs_ro = bdrv_new("");
659             ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, NULL);
660             if (ret < 0) {
661                 bdrv_delete(bs_ro);
662                 /* drive not functional anymore */
663                 bs->drv = NULL;
664                 return ret;
665             }
666             bs->backing_hd = bs_ro;
667             return rw_ret;
668         }
669         bs->backing_hd = bs_rw;
670     }
671 
672     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
673     for (i = 0; i < total_sectors;) {
674         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
675             for(j = 0; j < n; j++) {
676                 if (bdrv_read(bs, i, sector, 1) != 0) {
677                     ret = -EIO;
678                     goto ro_cleanup;
679                 }
680 
681                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
682                     ret = -EIO;
683                     goto ro_cleanup;
684                 }
685                 i++;
686 	    }
687 	} else {
688             i += n;
689         }
690     }
691 
692     if (drv->bdrv_make_empty) {
693         ret = drv->bdrv_make_empty(bs);
694         bdrv_flush(bs);
695     }
696 
697     /*
698      * Make sure all data we wrote to the backing device is actually
699      * stable on disk.
700      */
701     if (bs->backing_hd)
702         bdrv_flush(bs->backing_hd);
703 
704 ro_cleanup:
705 
706     if (ro) {
707         /* re-open as RO */
708         bdrv_delete(bs->backing_hd);
709         bs->backing_hd = NULL;
710         bs_ro = bdrv_new("");
711         ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR, NULL);
712         if (ret < 0) {
713             bdrv_delete(bs_ro);
714             /* drive not functional anymore */
715             bs->drv = NULL;
716             return ret;
717         }
718         bs->backing_hd = bs_ro;
719         bs->backing_hd->keep_read_only = 0;
720     }
721 
722     return ret;
723 }
724 
725 /*
726  * Return values:
727  * 0        - success
728  * -EINVAL  - backing format specified, but no file
729  * -ENOSPC  - can't update the backing file because no space is left in the
730  *            image file header
731  * -ENOTSUP - format driver doesn't support changing the backing file
732  */
733 int bdrv_change_backing_file(BlockDriverState *bs,
734     const char *backing_file, const char *backing_fmt)
735 {
736     BlockDriver *drv = bs->drv;
737 
738     if (drv->bdrv_change_backing_file != NULL) {
739         return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
740     } else {
741         return -ENOTSUP;
742     }
743 }
744 
745 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
746                                    size_t size)
747 {
748     int64_t len;
749 
750     if (!bdrv_is_inserted(bs))
751         return -ENOMEDIUM;
752 
753     if (bs->growable)
754         return 0;
755 
756     len = bdrv_getlength(bs);
757 
758     if (offset < 0)
759         return -EIO;
760 
761     if ((offset > len) || (len - offset < size))
762         return -EIO;
763 
764     return 0;
765 }
766 
767 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
768                               int nb_sectors)
769 {
770     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
771 }
772 
773 /* return < 0 if error. See bdrv_write() for the return codes */
774 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
775               uint8_t *buf, int nb_sectors)
776 {
777     BlockDriver *drv = bs->drv;
778 
779     if (!drv)
780         return -ENOMEDIUM;
781     if (bdrv_check_request(bs, sector_num, nb_sectors))
782         return -EIO;
783 
784     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
785 }
786 
787 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
788                              int nb_sectors, int dirty)
789 {
790     int64_t start, end;
791     unsigned long val, idx, bit;
792 
793     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
794     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
795 
796     for (; start <= end; start++) {
797         idx = start / (sizeof(unsigned long) * 8);
798         bit = start % (sizeof(unsigned long) * 8);
799         val = bs->dirty_bitmap[idx];
800         if (dirty) {
801             if (!(val & (1 << bit))) {
802                 bs->dirty_count++;
803                 val |= 1 << bit;
804             }
805         } else {
806             if (val & (1 << bit)) {
807                 bs->dirty_count--;
808                 val &= ~(1 << bit);
809             }
810         }
811         bs->dirty_bitmap[idx] = val;
812     }
813 }
814 
815 /* Return < 0 if error. Important errors are:
816   -EIO         generic I/O error (may happen for all errors)
817   -ENOMEDIUM   No media inserted.
818   -EINVAL      Invalid sector number or nb_sectors
819   -EACCES      Trying to write a read-only device
820 */
821 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
822                const uint8_t *buf, int nb_sectors)
823 {
824     BlockDriver *drv = bs->drv;
825     if (!bs->drv)
826         return -ENOMEDIUM;
827     if (bs->read_only)
828         return -EACCES;
829     if (bdrv_check_request(bs, sector_num, nb_sectors))
830         return -EIO;
831 
832     if (bs->dirty_bitmap) {
833         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
834     }
835 
836     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
837 }
838 
839 int bdrv_pread(BlockDriverState *bs, int64_t offset,
840                void *buf, int count1)
841 {
842     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
843     int len, nb_sectors, count;
844     int64_t sector_num;
845     int ret;
846 
847     count = count1;
848     /* first read to align to sector start */
849     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
850     if (len > count)
851         len = count;
852     sector_num = offset >> BDRV_SECTOR_BITS;
853     if (len > 0) {
854         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
855             return ret;
856         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
857         count -= len;
858         if (count == 0)
859             return count1;
860         sector_num++;
861         buf += len;
862     }
863 
864     /* read the sectors "in place" */
865     nb_sectors = count >> BDRV_SECTOR_BITS;
866     if (nb_sectors > 0) {
867         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
868             return ret;
869         sector_num += nb_sectors;
870         len = nb_sectors << BDRV_SECTOR_BITS;
871         buf += len;
872         count -= len;
873     }
874 
875     /* add data from the last sector */
876     if (count > 0) {
877         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
878             return ret;
879         memcpy(buf, tmp_buf, count);
880     }
881     return count1;
882 }
883 
884 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
885                 const void *buf, int count1)
886 {
887     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
888     int len, nb_sectors, count;
889     int64_t sector_num;
890     int ret;
891 
892     count = count1;
893     /* first write to align to sector start */
894     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
895     if (len > count)
896         len = count;
897     sector_num = offset >> BDRV_SECTOR_BITS;
898     if (len > 0) {
899         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
900             return ret;
901         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
902         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
903             return ret;
904         count -= len;
905         if (count == 0)
906             return count1;
907         sector_num++;
908         buf += len;
909     }
910 
911     /* write the sectors "in place" */
912     nb_sectors = count >> BDRV_SECTOR_BITS;
913     if (nb_sectors > 0) {
914         if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
915             return ret;
916         sector_num += nb_sectors;
917         len = nb_sectors << BDRV_SECTOR_BITS;
918         buf += len;
919         count -= len;
920     }
921 
922     /* add data from the last sector */
923     if (count > 0) {
924         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
925             return ret;
926         memcpy(tmp_buf, buf, count);
927         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
928             return ret;
929     }
930     return count1;
931 }
932 
933 /**
934  * Truncate file to 'offset' bytes (needed only for file protocols)
935  */
936 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
937 {
938     BlockDriver *drv = bs->drv;
939     if (!drv)
940         return -ENOMEDIUM;
941     if (!drv->bdrv_truncate)
942         return -ENOTSUP;
943     if (bs->read_only)
944         return -EACCES;
945     return drv->bdrv_truncate(bs, offset);
946 }
947 
948 /**
949  * Length of a file in bytes. Return < 0 if error or unknown.
950  */
951 int64_t bdrv_getlength(BlockDriverState *bs)
952 {
953     BlockDriver *drv = bs->drv;
954     if (!drv)
955         return -ENOMEDIUM;
956     if (!drv->bdrv_getlength) {
957         /* legacy mode */
958         return bs->total_sectors * BDRV_SECTOR_SIZE;
959     }
960     return drv->bdrv_getlength(bs);
961 }
962 
963 /* return 0 as number of sectors if no device present or error */
964 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
965 {
966     int64_t length;
967     length = bdrv_getlength(bs);
968     if (length < 0)
969         length = 0;
970     else
971         length = length >> BDRV_SECTOR_BITS;
972     *nb_sectors_ptr = length;
973 }
974 
975 struct partition {
976         uint8_t boot_ind;           /* 0x80 - active */
977         uint8_t head;               /* starting head */
978         uint8_t sector;             /* starting sector */
979         uint8_t cyl;                /* starting cylinder */
980         uint8_t sys_ind;            /* What partition type */
981         uint8_t end_head;           /* end head */
982         uint8_t end_sector;         /* end sector */
983         uint8_t end_cyl;            /* end cylinder */
984         uint32_t start_sect;        /* starting sector counting from 0 */
985         uint32_t nr_sects;          /* nr of sectors in partition */
986 } __attribute__((packed));
987 
988 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
989 static int guess_disk_lchs(BlockDriverState *bs,
990                            int *pcylinders, int *pheads, int *psectors)
991 {
992     uint8_t buf[512];
993     int ret, i, heads, sectors, cylinders;
994     struct partition *p;
995     uint32_t nr_sects;
996     uint64_t nb_sectors;
997 
998     bdrv_get_geometry(bs, &nb_sectors);
999 
1000     ret = bdrv_read(bs, 0, buf, 1);
1001     if (ret < 0)
1002         return -1;
1003     /* test msdos magic */
1004     if (buf[510] != 0x55 || buf[511] != 0xaa)
1005         return -1;
1006     for(i = 0; i < 4; i++) {
1007         p = ((struct partition *)(buf + 0x1be)) + i;
1008         nr_sects = le32_to_cpu(p->nr_sects);
1009         if (nr_sects && p->end_head) {
1010             /* We make the assumption that the partition terminates on
1011                a cylinder boundary */
1012             heads = p->end_head + 1;
1013             sectors = p->end_sector & 63;
1014             if (sectors == 0)
1015                 continue;
1016             cylinders = nb_sectors / (heads * sectors);
1017             if (cylinders < 1 || cylinders > 16383)
1018                 continue;
1019             *pheads = heads;
1020             *psectors = sectors;
1021             *pcylinders = cylinders;
1022 #if 0
1023             printf("guessed geometry: LCHS=%d %d %d\n",
1024                    cylinders, heads, sectors);
1025 #endif
1026             return 0;
1027         }
1028     }
1029     return -1;
1030 }
1031 
1032 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1033 {
1034     int translation, lba_detected = 0;
1035     int cylinders, heads, secs;
1036     uint64_t nb_sectors;
1037 
1038     /* if a geometry hint is available, use it */
1039     bdrv_get_geometry(bs, &nb_sectors);
1040     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1041     translation = bdrv_get_translation_hint(bs);
1042     if (cylinders != 0) {
1043         *pcyls = cylinders;
1044         *pheads = heads;
1045         *psecs = secs;
1046     } else {
1047         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1048             if (heads > 16) {
1049                 /* if heads > 16, it means that a BIOS LBA
1050                    translation was active, so the default
1051                    hardware geometry is OK */
1052                 lba_detected = 1;
1053                 goto default_geometry;
1054             } else {
1055                 *pcyls = cylinders;
1056                 *pheads = heads;
1057                 *psecs = secs;
1058                 /* disable any translation to be in sync with
1059                    the logical geometry */
1060                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1061                     bdrv_set_translation_hint(bs,
1062                                               BIOS_ATA_TRANSLATION_NONE);
1063                 }
1064             }
1065         } else {
1066         default_geometry:
1067             /* if no geometry, use a standard physical disk geometry */
1068             cylinders = nb_sectors / (16 * 63);
1069 
1070             if (cylinders > 16383)
1071                 cylinders = 16383;
1072             else if (cylinders < 2)
1073                 cylinders = 2;
1074             *pcyls = cylinders;
1075             *pheads = 16;
1076             *psecs = 63;
1077             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1078                 if ((*pcyls * *pheads) <= 131072) {
1079                     bdrv_set_translation_hint(bs,
1080                                               BIOS_ATA_TRANSLATION_LARGE);
1081                 } else {
1082                     bdrv_set_translation_hint(bs,
1083                                               BIOS_ATA_TRANSLATION_LBA);
1084                 }
1085             }
1086         }
1087         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1088     }
1089 }
1090 
1091 void bdrv_set_geometry_hint(BlockDriverState *bs,
1092                             int cyls, int heads, int secs)
1093 {
1094     bs->cyls = cyls;
1095     bs->heads = heads;
1096     bs->secs = secs;
1097 }
1098 
1099 void bdrv_set_type_hint(BlockDriverState *bs, int type)
1100 {
1101     bs->type = type;
1102     bs->removable = ((type == BDRV_TYPE_CDROM ||
1103                       type == BDRV_TYPE_FLOPPY));
1104 }
1105 
1106 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1107 {
1108     bs->translation = translation;
1109 }
1110 
1111 void bdrv_get_geometry_hint(BlockDriverState *bs,
1112                             int *pcyls, int *pheads, int *psecs)
1113 {
1114     *pcyls = bs->cyls;
1115     *pheads = bs->heads;
1116     *psecs = bs->secs;
1117 }
1118 
1119 int bdrv_get_type_hint(BlockDriverState *bs)
1120 {
1121     return bs->type;
1122 }
1123 
1124 int bdrv_get_translation_hint(BlockDriverState *bs)
1125 {
1126     return bs->translation;
1127 }
1128 
1129 int bdrv_is_removable(BlockDriverState *bs)
1130 {
1131     return bs->removable;
1132 }
1133 
1134 int bdrv_is_read_only(BlockDriverState *bs)
1135 {
1136     return bs->read_only;
1137 }
1138 
1139 int bdrv_is_sg(BlockDriverState *bs)
1140 {
1141     return bs->sg;
1142 }
1143 
1144 int bdrv_enable_write_cache(BlockDriverState *bs)
1145 {
1146     return bs->enable_write_cache;
1147 }
1148 
1149 /* XXX: no longer used */
1150 void bdrv_set_change_cb(BlockDriverState *bs,
1151                         void (*change_cb)(void *opaque), void *opaque)
1152 {
1153     bs->change_cb = change_cb;
1154     bs->change_opaque = opaque;
1155 }
1156 
1157 int bdrv_is_encrypted(BlockDriverState *bs)
1158 {
1159     if (bs->backing_hd && bs->backing_hd->encrypted)
1160         return 1;
1161     return bs->encrypted;
1162 }
1163 
1164 int bdrv_key_required(BlockDriverState *bs)
1165 {
1166     BlockDriverState *backing_hd = bs->backing_hd;
1167 
1168     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1169         return 1;
1170     return (bs->encrypted && !bs->valid_key);
1171 }
1172 
1173 int bdrv_set_key(BlockDriverState *bs, const char *key)
1174 {
1175     int ret;
1176     if (bs->backing_hd && bs->backing_hd->encrypted) {
1177         ret = bdrv_set_key(bs->backing_hd, key);
1178         if (ret < 0)
1179             return ret;
1180         if (!bs->encrypted)
1181             return 0;
1182     }
1183     if (!bs->encrypted) {
1184         return -EINVAL;
1185     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1186         return -ENOMEDIUM;
1187     }
1188     ret = bs->drv->bdrv_set_key(bs, key);
1189     if (ret < 0) {
1190         bs->valid_key = 0;
1191     } else if (!bs->valid_key) {
1192         bs->valid_key = 1;
1193         /* call the change callback now, we skipped it on open */
1194         bs->media_changed = 1;
1195         if (bs->change_cb)
1196             bs->change_cb(bs->change_opaque);
1197     }
1198     return ret;
1199 }
1200 
1201 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1202 {
1203     if (!bs->drv) {
1204         buf[0] = '\0';
1205     } else {
1206         pstrcpy(buf, buf_size, bs->drv->format_name);
1207     }
1208 }
1209 
1210 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1211                          void *opaque)
1212 {
1213     BlockDriver *drv;
1214 
1215     QLIST_FOREACH(drv, &bdrv_drivers, list) {
1216         it(opaque, drv->format_name);
1217     }
1218 }
1219 
1220 BlockDriverState *bdrv_find(const char *name)
1221 {
1222     BlockDriverState *bs;
1223 
1224     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1225         if (!strcmp(name, bs->device_name)) {
1226             return bs;
1227         }
1228     }
1229     return NULL;
1230 }
1231 
1232 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1233 {
1234     BlockDriverState *bs;
1235 
1236     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1237         it(opaque, bs);
1238     }
1239 }
1240 
1241 const char *bdrv_get_device_name(BlockDriverState *bs)
1242 {
1243     return bs->device_name;
1244 }
1245 
1246 void bdrv_flush(BlockDriverState *bs)
1247 {
1248     if (bs->drv && bs->drv->bdrv_flush)
1249         bs->drv->bdrv_flush(bs);
1250 }
1251 
1252 void bdrv_flush_all(void)
1253 {
1254     BlockDriverState *bs;
1255 
1256     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1257         if (bs->drv && !bdrv_is_read_only(bs) &&
1258             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
1259             bdrv_flush(bs);
1260         }
1261     }
1262 }
1263 
1264 /*
1265  * Returns true iff the specified sector is present in the disk image. Drivers
1266  * not implementing the functionality are assumed to not support backing files,
1267  * hence all their sectors are reported as allocated.
1268  *
1269  * 'pnum' is set to the number of sectors (including and immediately following
1270  * the specified sector) that are known to be in the same
1271  * allocated/unallocated state.
1272  *
1273  * 'nb_sectors' is the max value 'pnum' should be set to.
1274  */
1275 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1276 	int *pnum)
1277 {
1278     int64_t n;
1279     if (!bs->drv->bdrv_is_allocated) {
1280         if (sector_num >= bs->total_sectors) {
1281             *pnum = 0;
1282             return 0;
1283         }
1284         n = bs->total_sectors - sector_num;
1285         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1286         return 1;
1287     }
1288     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1289 }
1290 
1291 void bdrv_mon_event(const BlockDriverState *bdrv,
1292                     BlockMonEventAction action, int is_read)
1293 {
1294     QObject *data;
1295     const char *action_str;
1296 
1297     switch (action) {
1298     case BDRV_ACTION_REPORT:
1299         action_str = "report";
1300         break;
1301     case BDRV_ACTION_IGNORE:
1302         action_str = "ignore";
1303         break;
1304     case BDRV_ACTION_STOP:
1305         action_str = "stop";
1306         break;
1307     default:
1308         abort();
1309     }
1310 
1311     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1312                               bdrv->device_name,
1313                               action_str,
1314                               is_read ? "read" : "write");
1315     monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1316 
1317     qobject_decref(data);
1318 }
1319 
1320 static void bdrv_print_dict(QObject *obj, void *opaque)
1321 {
1322     QDict *bs_dict;
1323     Monitor *mon = opaque;
1324 
1325     bs_dict = qobject_to_qdict(obj);
1326 
1327     monitor_printf(mon, "%s: type=%s removable=%d",
1328                         qdict_get_str(bs_dict, "device"),
1329                         qdict_get_str(bs_dict, "type"),
1330                         qdict_get_bool(bs_dict, "removable"));
1331 
1332     if (qdict_get_bool(bs_dict, "removable")) {
1333         monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1334     }
1335 
1336     if (qdict_haskey(bs_dict, "inserted")) {
1337         QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1338 
1339         monitor_printf(mon, " file=");
1340         monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1341         if (qdict_haskey(qdict, "backing_file")) {
1342             monitor_printf(mon, " backing_file=");
1343             monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1344         }
1345         monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1346                             qdict_get_bool(qdict, "ro"),
1347                             qdict_get_str(qdict, "drv"),
1348                             qdict_get_bool(qdict, "encrypted"));
1349     } else {
1350         monitor_printf(mon, " [not inserted]");
1351     }
1352 
1353     monitor_printf(mon, "\n");
1354 }
1355 
1356 void bdrv_info_print(Monitor *mon, const QObject *data)
1357 {
1358     qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1359 }
1360 
1361 /**
1362  * bdrv_info(): Block devices information
1363  *
1364  * Each block device information is stored in a QDict and the
1365  * returned QObject is a QList of all devices.
1366  *
1367  * The QDict contains the following:
1368  *
1369  * - "device": device name
1370  * - "type": device type
1371  * - "removable": true if the device is removable, false otherwise
1372  * - "locked": true if the device is locked, false otherwise
1373  * - "inserted": only present if the device is inserted, it is a QDict
1374  *    containing the following:
1375  *          - "file": device file name
1376  *          - "ro": true if read-only, false otherwise
1377  *          - "drv": driver format name
1378  *          - "backing_file": backing file name if one is used
1379  *          - "encrypted": true if encrypted, false otherwise
1380  *
1381  * Example:
1382  *
1383  * [ { "device": "ide0-hd0", "type": "hd", "removable": false, "locked": false,
1384  *     "inserted": { "file": "/tmp/foobar", "ro": false, "drv": "qcow2" } },
1385  *   { "device": "floppy0", "type": "floppy", "removable": true,
1386  *     "locked": false } ]
1387  */
1388 void bdrv_info(Monitor *mon, QObject **ret_data)
1389 {
1390     QList *bs_list;
1391     BlockDriverState *bs;
1392 
1393     bs_list = qlist_new();
1394 
1395     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1396         QObject *bs_obj;
1397         const char *type = "unknown";
1398 
1399         switch(bs->type) {
1400         case BDRV_TYPE_HD:
1401             type = "hd";
1402             break;
1403         case BDRV_TYPE_CDROM:
1404             type = "cdrom";
1405             break;
1406         case BDRV_TYPE_FLOPPY:
1407             type = "floppy";
1408             break;
1409         }
1410 
1411         bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': %s, "
1412                                     "'removable': %i, 'locked': %i }",
1413                                     bs->device_name, type, bs->removable,
1414                                     bs->locked);
1415 
1416         if (bs->drv) {
1417             QObject *obj;
1418             QDict *bs_dict = qobject_to_qdict(bs_obj);
1419 
1420             obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1421                                      "'encrypted': %i }",
1422                                      bs->filename, bs->read_only,
1423                                      bs->drv->format_name,
1424                                      bdrv_is_encrypted(bs));
1425             if (bs->backing_file[0] != '\0') {
1426                 QDict *qdict = qobject_to_qdict(obj);
1427                 qdict_put(qdict, "backing_file",
1428                           qstring_from_str(bs->backing_file));
1429             }
1430 
1431             qdict_put_obj(bs_dict, "inserted", obj);
1432         }
1433         qlist_append_obj(bs_list, bs_obj);
1434     }
1435 
1436     *ret_data = QOBJECT(bs_list);
1437 }
1438 
1439 static void bdrv_stats_iter(QObject *data, void *opaque)
1440 {
1441     QDict *qdict;
1442     Monitor *mon = opaque;
1443 
1444     qdict = qobject_to_qdict(data);
1445     monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1446 
1447     qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1448     monitor_printf(mon, " rd_bytes=%" PRId64
1449                         " wr_bytes=%" PRId64
1450                         " rd_operations=%" PRId64
1451                         " wr_operations=%" PRId64
1452                         "\n",
1453                         qdict_get_int(qdict, "rd_bytes"),
1454                         qdict_get_int(qdict, "wr_bytes"),
1455                         qdict_get_int(qdict, "rd_operations"),
1456                         qdict_get_int(qdict, "wr_operations"));
1457 }
1458 
1459 void bdrv_stats_print(Monitor *mon, const QObject *data)
1460 {
1461     qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1462 }
1463 
1464 /**
1465  * bdrv_info_stats(): show block device statistics
1466  *
1467  * Each device statistic information is stored in a QDict and
1468  * the returned QObject is a QList of all devices.
1469  *
1470  * The QDict contains the following:
1471  *
1472  * - "device": device name
1473  * - "stats": A QDict with the statistics information, it contains:
1474  *     - "rd_bytes": bytes read
1475  *     - "wr_bytes": bytes written
1476  *     - "rd_operations": read operations
1477  *     - "wr_operations": write operations
1478  *
1479  * Example:
1480  *
1481  * [ { "device": "ide0-hd0",
1482  *               "stats": { "rd_bytes": 512,
1483  *                          "wr_bytes": 0,
1484  *                          "rd_operations": 1,
1485  *                          "wr_operations": 0 } },
1486  *   { "device": "ide1-cd0",
1487  *               "stats": { "rd_bytes": 0,
1488  *                          "wr_bytes": 0,
1489  *                          "rd_operations": 0,
1490  *                          "wr_operations": 0 } } ]
1491  */
1492 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1493 {
1494     QObject *obj;
1495     QList *devices;
1496     BlockDriverState *bs;
1497 
1498     devices = qlist_new();
1499 
1500     QTAILQ_FOREACH(bs, &bdrv_states, list) {
1501         obj = qobject_from_jsonf("{ 'device': %s, 'stats': {"
1502                                  "'rd_bytes': %" PRId64 ","
1503                                  "'wr_bytes': %" PRId64 ","
1504                                  "'rd_operations': %" PRId64 ","
1505                                  "'wr_operations': %" PRId64
1506                                  "} }",
1507                                  bs->device_name,
1508                                  bs->rd_bytes, bs->wr_bytes,
1509                                  bs->rd_ops, bs->wr_ops);
1510         qlist_append_obj(devices, obj);
1511     }
1512 
1513     *ret_data = QOBJECT(devices);
1514 }
1515 
1516 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1517 {
1518     if (bs->backing_hd && bs->backing_hd->encrypted)
1519         return bs->backing_file;
1520     else if (bs->encrypted)
1521         return bs->filename;
1522     else
1523         return NULL;
1524 }
1525 
1526 void bdrv_get_backing_filename(BlockDriverState *bs,
1527                                char *filename, int filename_size)
1528 {
1529     if (!bs->backing_file) {
1530         pstrcpy(filename, filename_size, "");
1531     } else {
1532         pstrcpy(filename, filename_size, bs->backing_file);
1533     }
1534 }
1535 
1536 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1537                           const uint8_t *buf, int nb_sectors)
1538 {
1539     BlockDriver *drv = bs->drv;
1540     if (!drv)
1541         return -ENOMEDIUM;
1542     if (!drv->bdrv_write_compressed)
1543         return -ENOTSUP;
1544     if (bdrv_check_request(bs, sector_num, nb_sectors))
1545         return -EIO;
1546 
1547     if (bs->dirty_bitmap) {
1548         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1549     }
1550 
1551     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1552 }
1553 
1554 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1555 {
1556     BlockDriver *drv = bs->drv;
1557     if (!drv)
1558         return -ENOMEDIUM;
1559     if (!drv->bdrv_get_info)
1560         return -ENOTSUP;
1561     memset(bdi, 0, sizeof(*bdi));
1562     return drv->bdrv_get_info(bs, bdi);
1563 }
1564 
1565 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1566                       int64_t pos, int size)
1567 {
1568     BlockDriver *drv = bs->drv;
1569     if (!drv)
1570         return -ENOMEDIUM;
1571     if (!drv->bdrv_save_vmstate)
1572         return -ENOTSUP;
1573     return drv->bdrv_save_vmstate(bs, buf, pos, size);
1574 }
1575 
1576 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1577                       int64_t pos, int size)
1578 {
1579     BlockDriver *drv = bs->drv;
1580     if (!drv)
1581         return -ENOMEDIUM;
1582     if (!drv->bdrv_load_vmstate)
1583         return -ENOTSUP;
1584     return drv->bdrv_load_vmstate(bs, buf, pos, size);
1585 }
1586 
1587 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
1588 {
1589     BlockDriver *drv = bs->drv;
1590 
1591     if (!drv || !drv->bdrv_debug_event) {
1592         return;
1593     }
1594 
1595     return drv->bdrv_debug_event(bs, event);
1596 
1597 }
1598 
1599 /**************************************************************/
1600 /* handling of snapshots */
1601 
1602 int bdrv_snapshot_create(BlockDriverState *bs,
1603                          QEMUSnapshotInfo *sn_info)
1604 {
1605     BlockDriver *drv = bs->drv;
1606     if (!drv)
1607         return -ENOMEDIUM;
1608     if (!drv->bdrv_snapshot_create)
1609         return -ENOTSUP;
1610     return drv->bdrv_snapshot_create(bs, sn_info);
1611 }
1612 
1613 int bdrv_snapshot_goto(BlockDriverState *bs,
1614                        const char *snapshot_id)
1615 {
1616     BlockDriver *drv = bs->drv;
1617     if (!drv)
1618         return -ENOMEDIUM;
1619     if (!drv->bdrv_snapshot_goto)
1620         return -ENOTSUP;
1621     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1622 }
1623 
1624 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1625 {
1626     BlockDriver *drv = bs->drv;
1627     if (!drv)
1628         return -ENOMEDIUM;
1629     if (!drv->bdrv_snapshot_delete)
1630         return -ENOTSUP;
1631     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1632 }
1633 
1634 int bdrv_snapshot_list(BlockDriverState *bs,
1635                        QEMUSnapshotInfo **psn_info)
1636 {
1637     BlockDriver *drv = bs->drv;
1638     if (!drv)
1639         return -ENOMEDIUM;
1640     if (!drv->bdrv_snapshot_list)
1641         return -ENOTSUP;
1642     return drv->bdrv_snapshot_list(bs, psn_info);
1643 }
1644 
1645 #define NB_SUFFIXES 4
1646 
1647 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1648 {
1649     static const char suffixes[NB_SUFFIXES] = "KMGT";
1650     int64_t base;
1651     int i;
1652 
1653     if (size <= 999) {
1654         snprintf(buf, buf_size, "%" PRId64, size);
1655     } else {
1656         base = 1024;
1657         for(i = 0; i < NB_SUFFIXES; i++) {
1658             if (size < (10 * base)) {
1659                 snprintf(buf, buf_size, "%0.1f%c",
1660                          (double)size / base,
1661                          suffixes[i]);
1662                 break;
1663             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1664                 snprintf(buf, buf_size, "%" PRId64 "%c",
1665                          ((size + (base >> 1)) / base),
1666                          suffixes[i]);
1667                 break;
1668             }
1669             base = base * 1024;
1670         }
1671     }
1672     return buf;
1673 }
1674 
1675 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1676 {
1677     char buf1[128], date_buf[128], clock_buf[128];
1678 #ifdef _WIN32
1679     struct tm *ptm;
1680 #else
1681     struct tm tm;
1682 #endif
1683     time_t ti;
1684     int64_t secs;
1685 
1686     if (!sn) {
1687         snprintf(buf, buf_size,
1688                  "%-10s%-20s%7s%20s%15s",
1689                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1690     } else {
1691         ti = sn->date_sec;
1692 #ifdef _WIN32
1693         ptm = localtime(&ti);
1694         strftime(date_buf, sizeof(date_buf),
1695                  "%Y-%m-%d %H:%M:%S", ptm);
1696 #else
1697         localtime_r(&ti, &tm);
1698         strftime(date_buf, sizeof(date_buf),
1699                  "%Y-%m-%d %H:%M:%S", &tm);
1700 #endif
1701         secs = sn->vm_clock_nsec / 1000000000;
1702         snprintf(clock_buf, sizeof(clock_buf),
1703                  "%02d:%02d:%02d.%03d",
1704                  (int)(secs / 3600),
1705                  (int)((secs / 60) % 60),
1706                  (int)(secs % 60),
1707                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1708         snprintf(buf, buf_size,
1709                  "%-10s%-20s%7s%20s%15s",
1710                  sn->id_str, sn->name,
1711                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1712                  date_buf,
1713                  clock_buf);
1714     }
1715     return buf;
1716 }
1717 
1718 
1719 /**************************************************************/
1720 /* async I/Os */
1721 
1722 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1723                                  QEMUIOVector *qiov, int nb_sectors,
1724                                  BlockDriverCompletionFunc *cb, void *opaque)
1725 {
1726     BlockDriver *drv = bs->drv;
1727     BlockDriverAIOCB *ret;
1728 
1729     if (!drv)
1730         return NULL;
1731     if (bdrv_check_request(bs, sector_num, nb_sectors))
1732         return NULL;
1733 
1734     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1735                               cb, opaque);
1736 
1737     if (ret) {
1738 	/* Update stats even though technically transfer has not happened. */
1739 	bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1740 	bs->rd_ops ++;
1741     }
1742 
1743     return ret;
1744 }
1745 
1746 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1747                                   QEMUIOVector *qiov, int nb_sectors,
1748                                   BlockDriverCompletionFunc *cb, void *opaque)
1749 {
1750     BlockDriver *drv = bs->drv;
1751     BlockDriverAIOCB *ret;
1752 
1753     if (!drv)
1754         return NULL;
1755     if (bs->read_only)
1756         return NULL;
1757     if (bdrv_check_request(bs, sector_num, nb_sectors))
1758         return NULL;
1759 
1760     if (bs->dirty_bitmap) {
1761         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1762     }
1763 
1764     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1765                                cb, opaque);
1766 
1767     if (ret) {
1768 	/* Update stats even though technically transfer has not happened. */
1769 	bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1770 	bs->wr_ops ++;
1771     }
1772 
1773     return ret;
1774 }
1775 
1776 
1777 typedef struct MultiwriteCB {
1778     int error;
1779     int num_requests;
1780     int num_callbacks;
1781     struct {
1782         BlockDriverCompletionFunc *cb;
1783         void *opaque;
1784         QEMUIOVector *free_qiov;
1785         void *free_buf;
1786     } callbacks[];
1787 } MultiwriteCB;
1788 
1789 static void multiwrite_user_cb(MultiwriteCB *mcb)
1790 {
1791     int i;
1792 
1793     for (i = 0; i < mcb->num_callbacks; i++) {
1794         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1795         if (mcb->callbacks[i].free_qiov) {
1796             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
1797         }
1798         qemu_free(mcb->callbacks[i].free_qiov);
1799         qemu_vfree(mcb->callbacks[i].free_buf);
1800     }
1801 }
1802 
1803 static void multiwrite_cb(void *opaque, int ret)
1804 {
1805     MultiwriteCB *mcb = opaque;
1806 
1807     if (ret < 0 && !mcb->error) {
1808         mcb->error = ret;
1809         multiwrite_user_cb(mcb);
1810     }
1811 
1812     mcb->num_requests--;
1813     if (mcb->num_requests == 0) {
1814         if (mcb->error == 0) {
1815             multiwrite_user_cb(mcb);
1816         }
1817         qemu_free(mcb);
1818     }
1819 }
1820 
1821 static int multiwrite_req_compare(const void *a, const void *b)
1822 {
1823     return (((BlockRequest*) a)->sector - ((BlockRequest*) b)->sector);
1824 }
1825 
1826 /*
1827  * Takes a bunch of requests and tries to merge them. Returns the number of
1828  * requests that remain after merging.
1829  */
1830 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
1831     int num_reqs, MultiwriteCB *mcb)
1832 {
1833     int i, outidx;
1834 
1835     // Sort requests by start sector
1836     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
1837 
1838     // Check if adjacent requests touch the same clusters. If so, combine them,
1839     // filling up gaps with zero sectors.
1840     outidx = 0;
1841     for (i = 1; i < num_reqs; i++) {
1842         int merge = 0;
1843         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
1844 
1845         // This handles the cases that are valid for all block drivers, namely
1846         // exactly sequential writes and overlapping writes.
1847         if (reqs[i].sector <= oldreq_last) {
1848             merge = 1;
1849         }
1850 
1851         // The block driver may decide that it makes sense to combine requests
1852         // even if there is a gap of some sectors between them. In this case,
1853         // the gap is filled with zeros (therefore only applicable for yet
1854         // unused space in format like qcow2).
1855         if (!merge && bs->drv->bdrv_merge_requests) {
1856             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
1857         }
1858 
1859         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
1860             merge = 0;
1861         }
1862 
1863         if (merge) {
1864             size_t size;
1865             QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
1866             qemu_iovec_init(qiov,
1867                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
1868 
1869             // Add the first request to the merged one. If the requests are
1870             // overlapping, drop the last sectors of the first request.
1871             size = (reqs[i].sector - reqs[outidx].sector) << 9;
1872             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
1873 
1874             // We might need to add some zeros between the two requests
1875             if (reqs[i].sector > oldreq_last) {
1876                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
1877                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
1878                 memset(buf, 0, zero_bytes);
1879                 qemu_iovec_add(qiov, buf, zero_bytes);
1880                 mcb->callbacks[i].free_buf = buf;
1881             }
1882 
1883             // Add the second request
1884             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
1885 
1886             reqs[outidx].nb_sectors += reqs[i].nb_sectors;
1887             reqs[outidx].qiov = qiov;
1888 
1889             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
1890         } else {
1891             outidx++;
1892             reqs[outidx].sector     = reqs[i].sector;
1893             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
1894             reqs[outidx].qiov       = reqs[i].qiov;
1895         }
1896     }
1897 
1898     return outidx + 1;
1899 }
1900 
1901 /*
1902  * Submit multiple AIO write requests at once.
1903  *
1904  * On success, the function returns 0 and all requests in the reqs array have
1905  * been submitted. In error case this function returns -1, and any of the
1906  * requests may or may not be submitted yet. In particular, this means that the
1907  * callback will be called for some of the requests, for others it won't. The
1908  * caller must check the error field of the BlockRequest to wait for the right
1909  * callbacks (if error != 0, no callback will be called).
1910  *
1911  * The implementation may modify the contents of the reqs array, e.g. to merge
1912  * requests. However, the fields opaque and error are left unmodified as they
1913  * are used to signal failure for a single request to the caller.
1914  */
1915 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
1916 {
1917     BlockDriverAIOCB *acb;
1918     MultiwriteCB *mcb;
1919     int i;
1920 
1921     if (num_reqs == 0) {
1922         return 0;
1923     }
1924 
1925     // Create MultiwriteCB structure
1926     mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
1927     mcb->num_requests = 0;
1928     mcb->num_callbacks = num_reqs;
1929 
1930     for (i = 0; i < num_reqs; i++) {
1931         mcb->callbacks[i].cb = reqs[i].cb;
1932         mcb->callbacks[i].opaque = reqs[i].opaque;
1933     }
1934 
1935     // Check for mergable requests
1936     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
1937 
1938     // Run the aio requests
1939     for (i = 0; i < num_reqs; i++) {
1940         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
1941             reqs[i].nb_sectors, multiwrite_cb, mcb);
1942 
1943         if (acb == NULL) {
1944             // We can only fail the whole thing if no request has been
1945             // submitted yet. Otherwise we'll wait for the submitted AIOs to
1946             // complete and report the error in the callback.
1947             if (mcb->num_requests == 0) {
1948                 reqs[i].error = -EIO;
1949                 goto fail;
1950             } else {
1951                 mcb->num_requests++;
1952                 multiwrite_cb(mcb, -EIO);
1953                 break;
1954             }
1955         } else {
1956             mcb->num_requests++;
1957         }
1958     }
1959 
1960     return 0;
1961 
1962 fail:
1963     free(mcb);
1964     return -1;
1965 }
1966 
1967 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
1968         BlockDriverCompletionFunc *cb, void *opaque)
1969 {
1970     BlockDriver *drv = bs->drv;
1971 
1972     if (!drv)
1973         return NULL;
1974     return drv->bdrv_aio_flush(bs, cb, opaque);
1975 }
1976 
1977 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1978 {
1979     acb->pool->cancel(acb);
1980 }
1981 
1982 
1983 /**************************************************************/
1984 /* async block device emulation */
1985 
1986 typedef struct BlockDriverAIOCBSync {
1987     BlockDriverAIOCB common;
1988     QEMUBH *bh;
1989     int ret;
1990     /* vector translation state */
1991     QEMUIOVector *qiov;
1992     uint8_t *bounce;
1993     int is_write;
1994 } BlockDriverAIOCBSync;
1995 
1996 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1997 {
1998     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1999     qemu_bh_delete(acb->bh);
2000     acb->bh = NULL;
2001     qemu_aio_release(acb);
2002 }
2003 
2004 static AIOPool bdrv_em_aio_pool = {
2005     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
2006     .cancel             = bdrv_aio_cancel_em,
2007 };
2008 
2009 static void bdrv_aio_bh_cb(void *opaque)
2010 {
2011     BlockDriverAIOCBSync *acb = opaque;
2012 
2013     if (!acb->is_write)
2014         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2015     qemu_vfree(acb->bounce);
2016     acb->common.cb(acb->common.opaque, acb->ret);
2017     qemu_bh_delete(acb->bh);
2018     acb->bh = NULL;
2019     qemu_aio_release(acb);
2020 }
2021 
2022 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2023                                             int64_t sector_num,
2024                                             QEMUIOVector *qiov,
2025                                             int nb_sectors,
2026                                             BlockDriverCompletionFunc *cb,
2027                                             void *opaque,
2028                                             int is_write)
2029 
2030 {
2031     BlockDriverAIOCBSync *acb;
2032 
2033     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2034     acb->is_write = is_write;
2035     acb->qiov = qiov;
2036     acb->bounce = qemu_blockalign(bs, qiov->size);
2037 
2038     if (!acb->bh)
2039         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2040 
2041     if (is_write) {
2042         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2043         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2044     } else {
2045         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2046     }
2047 
2048     qemu_bh_schedule(acb->bh);
2049 
2050     return &acb->common;
2051 }
2052 
2053 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2054         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2055         BlockDriverCompletionFunc *cb, void *opaque)
2056 {
2057     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2058 }
2059 
2060 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2061         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2062         BlockDriverCompletionFunc *cb, void *opaque)
2063 {
2064     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2065 }
2066 
2067 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2068         BlockDriverCompletionFunc *cb, void *opaque)
2069 {
2070     BlockDriverAIOCBSync *acb;
2071 
2072     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2073     acb->is_write = 1; /* don't bounce in the completion hadler */
2074     acb->qiov = NULL;
2075     acb->bounce = NULL;
2076     acb->ret = 0;
2077 
2078     if (!acb->bh)
2079         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2080 
2081     bdrv_flush(bs);
2082     qemu_bh_schedule(acb->bh);
2083     return &acb->common;
2084 }
2085 
2086 /**************************************************************/
2087 /* sync block device emulation */
2088 
2089 static void bdrv_rw_em_cb(void *opaque, int ret)
2090 {
2091     *(int *)opaque = ret;
2092 }
2093 
2094 #define NOT_DONE 0x7fffffff
2095 
2096 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2097                         uint8_t *buf, int nb_sectors)
2098 {
2099     int async_ret;
2100     BlockDriverAIOCB *acb;
2101     struct iovec iov;
2102     QEMUIOVector qiov;
2103 
2104     async_context_push();
2105 
2106     async_ret = NOT_DONE;
2107     iov.iov_base = (void *)buf;
2108     iov.iov_len = nb_sectors * 512;
2109     qemu_iovec_init_external(&qiov, &iov, 1);
2110     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2111         bdrv_rw_em_cb, &async_ret);
2112     if (acb == NULL) {
2113         async_ret = -1;
2114         goto fail;
2115     }
2116 
2117     while (async_ret == NOT_DONE) {
2118         qemu_aio_wait();
2119     }
2120 
2121 
2122 fail:
2123     async_context_pop();
2124     return async_ret;
2125 }
2126 
2127 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2128                          const uint8_t *buf, int nb_sectors)
2129 {
2130     int async_ret;
2131     BlockDriverAIOCB *acb;
2132     struct iovec iov;
2133     QEMUIOVector qiov;
2134 
2135     async_context_push();
2136 
2137     async_ret = NOT_DONE;
2138     iov.iov_base = (void *)buf;
2139     iov.iov_len = nb_sectors * 512;
2140     qemu_iovec_init_external(&qiov, &iov, 1);
2141     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2142         bdrv_rw_em_cb, &async_ret);
2143     if (acb == NULL) {
2144         async_ret = -1;
2145         goto fail;
2146     }
2147     while (async_ret == NOT_DONE) {
2148         qemu_aio_wait();
2149     }
2150 
2151 fail:
2152     async_context_pop();
2153     return async_ret;
2154 }
2155 
2156 void bdrv_init(void)
2157 {
2158     module_call_init(MODULE_INIT_BLOCK);
2159 }
2160 
2161 void bdrv_init_with_whitelist(void)
2162 {
2163     use_bdrv_whitelist = 1;
2164     bdrv_init();
2165 }
2166 
2167 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2168                    BlockDriverCompletionFunc *cb, void *opaque)
2169 {
2170     BlockDriverAIOCB *acb;
2171 
2172     if (pool->free_aiocb) {
2173         acb = pool->free_aiocb;
2174         pool->free_aiocb = acb->next;
2175     } else {
2176         acb = qemu_mallocz(pool->aiocb_size);
2177         acb->pool = pool;
2178     }
2179     acb->bs = bs;
2180     acb->cb = cb;
2181     acb->opaque = opaque;
2182     return acb;
2183 }
2184 
2185 void qemu_aio_release(void *p)
2186 {
2187     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2188     AIOPool *pool = acb->pool;
2189     acb->next = pool->free_aiocb;
2190     pool->free_aiocb = acb;
2191 }
2192 
2193 /**************************************************************/
2194 /* removable device support */
2195 
2196 /**
2197  * Return TRUE if the media is present
2198  */
2199 int bdrv_is_inserted(BlockDriverState *bs)
2200 {
2201     BlockDriver *drv = bs->drv;
2202     int ret;
2203     if (!drv)
2204         return 0;
2205     if (!drv->bdrv_is_inserted)
2206         return 1;
2207     ret = drv->bdrv_is_inserted(bs);
2208     return ret;
2209 }
2210 
2211 /**
2212  * Return TRUE if the media changed since the last call to this
2213  * function. It is currently only used for floppy disks
2214  */
2215 int bdrv_media_changed(BlockDriverState *bs)
2216 {
2217     BlockDriver *drv = bs->drv;
2218     int ret;
2219 
2220     if (!drv || !drv->bdrv_media_changed)
2221         ret = -ENOTSUP;
2222     else
2223         ret = drv->bdrv_media_changed(bs);
2224     if (ret == -ENOTSUP)
2225         ret = bs->media_changed;
2226     bs->media_changed = 0;
2227     return ret;
2228 }
2229 
2230 /**
2231  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2232  */
2233 int bdrv_eject(BlockDriverState *bs, int eject_flag)
2234 {
2235     BlockDriver *drv = bs->drv;
2236     int ret;
2237 
2238     if (bs->locked) {
2239         return -EBUSY;
2240     }
2241 
2242     if (!drv || !drv->bdrv_eject) {
2243         ret = -ENOTSUP;
2244     } else {
2245         ret = drv->bdrv_eject(bs, eject_flag);
2246     }
2247     if (ret == -ENOTSUP) {
2248         if (eject_flag)
2249             bdrv_close(bs);
2250         ret = 0;
2251     }
2252 
2253     return ret;
2254 }
2255 
2256 int bdrv_is_locked(BlockDriverState *bs)
2257 {
2258     return bs->locked;
2259 }
2260 
2261 /**
2262  * Lock or unlock the media (if it is locked, the user won't be able
2263  * to eject it manually).
2264  */
2265 void bdrv_set_locked(BlockDriverState *bs, int locked)
2266 {
2267     BlockDriver *drv = bs->drv;
2268 
2269     bs->locked = locked;
2270     if (drv && drv->bdrv_set_locked) {
2271         drv->bdrv_set_locked(bs, locked);
2272     }
2273 }
2274 
2275 /* needed for generic scsi interface */
2276 
2277 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2278 {
2279     BlockDriver *drv = bs->drv;
2280 
2281     if (drv && drv->bdrv_ioctl)
2282         return drv->bdrv_ioctl(bs, req, buf);
2283     return -ENOTSUP;
2284 }
2285 
2286 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2287         unsigned long int req, void *buf,
2288         BlockDriverCompletionFunc *cb, void *opaque)
2289 {
2290     BlockDriver *drv = bs->drv;
2291 
2292     if (drv && drv->bdrv_aio_ioctl)
2293         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2294     return NULL;
2295 }
2296 
2297 
2298 
2299 void *qemu_blockalign(BlockDriverState *bs, size_t size)
2300 {
2301     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2302 }
2303 
2304 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2305 {
2306     int64_t bitmap_size;
2307 
2308     bs->dirty_count = 0;
2309     if (enable) {
2310         if (!bs->dirty_bitmap) {
2311             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
2312                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
2313             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
2314 
2315             bs->dirty_bitmap = qemu_mallocz(bitmap_size);
2316         }
2317     } else {
2318         if (bs->dirty_bitmap) {
2319             qemu_free(bs->dirty_bitmap);
2320             bs->dirty_bitmap = NULL;
2321         }
2322     }
2323 }
2324 
2325 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
2326 {
2327     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
2328 
2329     if (bs->dirty_bitmap &&
2330         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
2331         return bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
2332             (1 << (chunk % (sizeof(unsigned long) * 8)));
2333     } else {
2334         return 0;
2335     }
2336 }
2337 
2338 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
2339                       int nr_sectors)
2340 {
2341     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2342 }
2343 
2344 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
2345 {
2346     return bs->dirty_count;
2347 }
2348