xref: /openbmc/qemu/block.c (revision 756e6736a12a46330d9532d5f861ba15b38886d8)
1 /*
2  * QEMU System Emulator block driver
3  *
4  * Copyright (c) 2003 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "monitor.h"
27 #include "block_int.h"
28 #include "module.h"
29 #include "qemu-objects.h"
30 
31 #ifdef CONFIG_BSD
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/ioctl.h>
35 #include <sys/queue.h>
36 #ifndef __DragonFly__
37 #include <sys/disk.h>
38 #endif
39 #endif
40 
41 #ifdef _WIN32
42 #include <windows.h>
43 #endif
44 
45 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
46         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
47         BlockDriverCompletionFunc *cb, void *opaque);
48 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
49         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50         BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
52         BlockDriverCompletionFunc *cb, void *opaque);
53 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
54                         uint8_t *buf, int nb_sectors);
55 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
56                          const uint8_t *buf, int nb_sectors);
57 
58 BlockDriverState *bdrv_first;
59 
60 static BlockDriver *first_drv;
61 
62 /* If non-zero, use only whitelisted block drivers */
63 static int use_bdrv_whitelist;
64 
65 int path_is_absolute(const char *path)
66 {
67     const char *p;
68 #ifdef _WIN32
69     /* specific case for names like: "\\.\d:" */
70     if (*path == '/' || *path == '\\')
71         return 1;
72 #endif
73     p = strchr(path, ':');
74     if (p)
75         p++;
76     else
77         p = path;
78 #ifdef _WIN32
79     return (*p == '/' || *p == '\\');
80 #else
81     return (*p == '/');
82 #endif
83 }
84 
85 /* if filename is absolute, just copy it to dest. Otherwise, build a
86    path to it by considering it is relative to base_path. URL are
87    supported. */
88 void path_combine(char *dest, int dest_size,
89                   const char *base_path,
90                   const char *filename)
91 {
92     const char *p, *p1;
93     int len;
94 
95     if (dest_size <= 0)
96         return;
97     if (path_is_absolute(filename)) {
98         pstrcpy(dest, dest_size, filename);
99     } else {
100         p = strchr(base_path, ':');
101         if (p)
102             p++;
103         else
104             p = base_path;
105         p1 = strrchr(base_path, '/');
106 #ifdef _WIN32
107         {
108             const char *p2;
109             p2 = strrchr(base_path, '\\');
110             if (!p1 || p2 > p1)
111                 p1 = p2;
112         }
113 #endif
114         if (p1)
115             p1++;
116         else
117             p1 = base_path;
118         if (p1 > p)
119             p = p1;
120         len = p - base_path;
121         if (len > dest_size - 1)
122             len = dest_size - 1;
123         memcpy(dest, base_path, len);
124         dest[len] = '\0';
125         pstrcat(dest, dest_size, filename);
126     }
127 }
128 
129 void bdrv_register(BlockDriver *bdrv)
130 {
131     if (!bdrv->bdrv_aio_readv) {
132         /* add AIO emulation layer */
133         bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
134         bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
135     } else if (!bdrv->bdrv_read) {
136         /* add synchronous IO emulation layer */
137         bdrv->bdrv_read = bdrv_read_em;
138         bdrv->bdrv_write = bdrv_write_em;
139     }
140 
141     if (!bdrv->bdrv_aio_flush)
142         bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
143 
144     bdrv->next = first_drv;
145     first_drv = bdrv;
146 }
147 
148 /* create a new block device (by default it is empty) */
149 BlockDriverState *bdrv_new(const char *device_name)
150 {
151     BlockDriverState **pbs, *bs;
152 
153     bs = qemu_mallocz(sizeof(BlockDriverState));
154     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
155     if (device_name[0] != '\0') {
156         /* insert at the end */
157         pbs = &bdrv_first;
158         while (*pbs != NULL)
159             pbs = &(*pbs)->next;
160         *pbs = bs;
161     }
162     return bs;
163 }
164 
165 BlockDriver *bdrv_find_format(const char *format_name)
166 {
167     BlockDriver *drv1;
168     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
169         if (!strcmp(drv1->format_name, format_name))
170             return drv1;
171     }
172     return NULL;
173 }
174 
175 static int bdrv_is_whitelisted(BlockDriver *drv)
176 {
177     static const char *whitelist[] = {
178         CONFIG_BDRV_WHITELIST
179     };
180     const char **p;
181 
182     if (!whitelist[0])
183         return 1;               /* no whitelist, anything goes */
184 
185     for (p = whitelist; *p; p++) {
186         if (!strcmp(drv->format_name, *p)) {
187             return 1;
188         }
189     }
190     return 0;
191 }
192 
193 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
194 {
195     BlockDriver *drv = bdrv_find_format(format_name);
196     return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
197 }
198 
199 int bdrv_create(BlockDriver *drv, const char* filename,
200     QEMUOptionParameter *options)
201 {
202     if (!drv->bdrv_create)
203         return -ENOTSUP;
204 
205     return drv->bdrv_create(filename, options);
206 }
207 
208 #ifdef _WIN32
209 void get_tmp_filename(char *filename, int size)
210 {
211     char temp_dir[MAX_PATH];
212 
213     GetTempPath(MAX_PATH, temp_dir);
214     GetTempFileName(temp_dir, "qem", 0, filename);
215 }
216 #else
217 void get_tmp_filename(char *filename, int size)
218 {
219     int fd;
220     const char *tmpdir;
221     /* XXX: race condition possible */
222     tmpdir = getenv("TMPDIR");
223     if (!tmpdir)
224         tmpdir = "/tmp";
225     snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
226     fd = mkstemp(filename);
227     close(fd);
228 }
229 #endif
230 
231 #ifdef _WIN32
232 static int is_windows_drive_prefix(const char *filename)
233 {
234     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
235              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
236             filename[1] == ':');
237 }
238 
239 int is_windows_drive(const char *filename)
240 {
241     if (is_windows_drive_prefix(filename) &&
242         filename[2] == '\0')
243         return 1;
244     if (strstart(filename, "\\\\.\\", NULL) ||
245         strstart(filename, "//./", NULL))
246         return 1;
247     return 0;
248 }
249 #endif
250 
251 static BlockDriver *find_protocol(const char *filename)
252 {
253     BlockDriver *drv1;
254     char protocol[128];
255     int len;
256     const char *p;
257 
258 #ifdef _WIN32
259     if (is_windows_drive(filename) ||
260         is_windows_drive_prefix(filename))
261         return bdrv_find_format("raw");
262 #endif
263     p = strchr(filename, ':');
264     if (!p)
265         return bdrv_find_format("raw");
266     len = p - filename;
267     if (len > sizeof(protocol) - 1)
268         len = sizeof(protocol) - 1;
269     memcpy(protocol, filename, len);
270     protocol[len] = '\0';
271     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
272         if (drv1->protocol_name &&
273             !strcmp(drv1->protocol_name, protocol))
274             return drv1;
275     }
276     return NULL;
277 }
278 
279 /*
280  * Detect host devices. By convention, /dev/cdrom[N] is always
281  * recognized as a host CDROM.
282  */
283 static BlockDriver *find_hdev_driver(const char *filename)
284 {
285     int score_max = 0, score;
286     BlockDriver *drv = NULL, *d;
287 
288     for (d = first_drv; d; d = d->next) {
289         if (d->bdrv_probe_device) {
290             score = d->bdrv_probe_device(filename);
291             if (score > score_max) {
292                 score_max = score;
293                 drv = d;
294             }
295         }
296     }
297 
298     return drv;
299 }
300 
301 static BlockDriver *find_image_format(const char *filename)
302 {
303     int ret, score, score_max;
304     BlockDriver *drv1, *drv;
305     uint8_t buf[2048];
306     BlockDriverState *bs;
307 
308     drv = find_protocol(filename);
309     /* no need to test disk image formats for vvfat */
310     if (drv && strcmp(drv->format_name, "vvfat") == 0)
311         return drv;
312 
313     ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
314     if (ret < 0)
315         return NULL;
316     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
317     bdrv_delete(bs);
318     if (ret < 0) {
319         return NULL;
320     }
321 
322     score_max = 0;
323     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
324         if (drv1->bdrv_probe) {
325             score = drv1->bdrv_probe(buf, ret, filename);
326             if (score > score_max) {
327                 score_max = score;
328                 drv = drv1;
329             }
330         }
331     }
332     return drv;
333 }
334 
335 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
336 {
337     BlockDriverState *bs;
338     int ret;
339 
340     bs = bdrv_new("");
341     ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
342     if (ret < 0) {
343         bdrv_delete(bs);
344         return ret;
345     }
346     bs->growable = 1;
347     *pbs = bs;
348     return 0;
349 }
350 
351 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
352 {
353     return bdrv_open2(bs, filename, flags, NULL);
354 }
355 
356 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
357                BlockDriver *drv)
358 {
359     int ret, open_flags, try_rw;
360     char tmp_filename[PATH_MAX];
361     char backing_filename[PATH_MAX];
362 
363     bs->is_temporary = 0;
364     bs->encrypted = 0;
365     bs->valid_key = 0;
366     /* buffer_alignment defaulted to 512, drivers can change this value */
367     bs->buffer_alignment = 512;
368 
369     if (flags & BDRV_O_SNAPSHOT) {
370         BlockDriverState *bs1;
371         int64_t total_size;
372         int is_protocol = 0;
373         BlockDriver *bdrv_qcow2;
374         QEMUOptionParameter *options;
375 
376         /* if snapshot, we create a temporary backing file and open it
377            instead of opening 'filename' directly */
378 
379         /* if there is a backing file, use it */
380         bs1 = bdrv_new("");
381         ret = bdrv_open2(bs1, filename, 0, drv);
382         if (ret < 0) {
383             bdrv_delete(bs1);
384             return ret;
385         }
386         total_size = bdrv_getlength(bs1) >> BDRV_SECTOR_BITS;
387 
388         if (bs1->drv && bs1->drv->protocol_name)
389             is_protocol = 1;
390 
391         bdrv_delete(bs1);
392 
393         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
394 
395         /* Real path is meaningless for protocols */
396         if (is_protocol)
397             snprintf(backing_filename, sizeof(backing_filename),
398                      "%s", filename);
399         else if (!realpath(filename, backing_filename))
400             return -errno;
401 
402         bdrv_qcow2 = bdrv_find_format("qcow2");
403         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
404 
405         set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512);
406         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
407         if (drv) {
408             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
409                 drv->format_name);
410         }
411 
412         ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
413         if (ret < 0) {
414             return ret;
415         }
416 
417         filename = tmp_filename;
418         drv = bdrv_qcow2;
419         bs->is_temporary = 1;
420     }
421 
422     pstrcpy(bs->filename, sizeof(bs->filename), filename);
423     if (flags & BDRV_O_FILE) {
424         drv = find_protocol(filename);
425     } else if (!drv) {
426         drv = find_hdev_driver(filename);
427         if (!drv) {
428             drv = find_image_format(filename);
429         }
430     }
431     if (!drv) {
432         ret = -ENOENT;
433         goto unlink_and_fail;
434     }
435     bs->drv = drv;
436     bs->opaque = qemu_mallocz(drv->instance_size);
437 
438     /*
439      * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a
440      * write cache to the guest.  We do need the fdatasync to flush
441      * out transactions for block allocations, and we maybe have a
442      * volatile write cache in our backing device to deal with.
443      */
444     if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE))
445         bs->enable_write_cache = 1;
446 
447     /* Note: for compatibility, we open disk image files as RDWR, and
448        RDONLY as fallback */
449     try_rw = !bs->read_only || bs->is_temporary;
450     if (!(flags & BDRV_O_FILE))
451         open_flags = (try_rw ? BDRV_O_RDWR : 0) |
452             (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO));
453     else
454         open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
455     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv))
456         ret = -ENOTSUP;
457     else
458         ret = drv->bdrv_open(bs, filename, open_flags);
459     if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
460         ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
461         bs->read_only = 1;
462     }
463     if (ret < 0) {
464         qemu_free(bs->opaque);
465         bs->opaque = NULL;
466         bs->drv = NULL;
467     unlink_and_fail:
468         if (bs->is_temporary)
469             unlink(filename);
470         return ret;
471     }
472     if (drv->bdrv_getlength) {
473         bs->total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
474     }
475 #ifndef _WIN32
476     if (bs->is_temporary) {
477         unlink(filename);
478     }
479 #endif
480     if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
481         /* if there is a backing file, use it */
482         BlockDriver *back_drv = NULL;
483         bs->backing_hd = bdrv_new("");
484         /* pass on read_only property to the backing_hd */
485         bs->backing_hd->read_only = bs->read_only;
486         path_combine(backing_filename, sizeof(backing_filename),
487                      filename, bs->backing_file);
488         if (bs->backing_format[0] != '\0')
489             back_drv = bdrv_find_format(bs->backing_format);
490         ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
491                          back_drv);
492         if (ret < 0) {
493             bdrv_close(bs);
494             return ret;
495         }
496     }
497 
498     if (!bdrv_key_required(bs)) {
499         /* call the change callback */
500         bs->media_changed = 1;
501         if (bs->change_cb)
502             bs->change_cb(bs->change_opaque);
503     }
504     return 0;
505 }
506 
507 void bdrv_close(BlockDriverState *bs)
508 {
509     if (bs->drv) {
510         if (bs->backing_hd)
511             bdrv_delete(bs->backing_hd);
512         bs->drv->bdrv_close(bs);
513         qemu_free(bs->opaque);
514 #ifdef _WIN32
515         if (bs->is_temporary) {
516             unlink(bs->filename);
517         }
518 #endif
519         bs->opaque = NULL;
520         bs->drv = NULL;
521 
522         /* call the change callback */
523         bs->media_changed = 1;
524         if (bs->change_cb)
525             bs->change_cb(bs->change_opaque);
526     }
527 }
528 
529 void bdrv_delete(BlockDriverState *bs)
530 {
531     BlockDriverState **pbs;
532 
533     pbs = &bdrv_first;
534     while (*pbs != bs && *pbs != NULL)
535         pbs = &(*pbs)->next;
536     if (*pbs == bs)
537         *pbs = bs->next;
538 
539     bdrv_close(bs);
540     qemu_free(bs);
541 }
542 
543 /*
544  * Run consistency checks on an image
545  *
546  * Returns the number of errors or -errno when an internal error occurs
547  */
548 int bdrv_check(BlockDriverState *bs)
549 {
550     if (bs->drv->bdrv_check == NULL) {
551         return -ENOTSUP;
552     }
553 
554     return bs->drv->bdrv_check(bs);
555 }
556 
557 /* commit COW file into the raw image */
558 int bdrv_commit(BlockDriverState *bs)
559 {
560     BlockDriver *drv = bs->drv;
561     int64_t i, total_sectors;
562     int n, j;
563     unsigned char sector[512];
564 
565     if (!drv)
566         return -ENOMEDIUM;
567 
568     if (bs->read_only) {
569 	return -EACCES;
570     }
571 
572     if (!bs->backing_hd) {
573 	return -ENOTSUP;
574     }
575 
576     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
577     for (i = 0; i < total_sectors;) {
578         if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
579             for(j = 0; j < n; j++) {
580                 if (bdrv_read(bs, i, sector, 1) != 0) {
581                     return -EIO;
582                 }
583 
584                 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
585                     return -EIO;
586                 }
587                 i++;
588 	    }
589 	} else {
590             i += n;
591         }
592     }
593 
594     if (drv->bdrv_make_empty)
595 	return drv->bdrv_make_empty(bs);
596 
597     return 0;
598 }
599 
600 /*
601  * Return values:
602  * 0        - success
603  * -EINVAL  - backing format specified, but no file
604  * -ENOSPC  - can't update the backing file because no space is left in the
605  *            image file header
606  * -ENOTSUP - format driver doesn't support changing the backing file
607  */
608 int bdrv_change_backing_file(BlockDriverState *bs,
609     const char *backing_file, const char *backing_fmt)
610 {
611     BlockDriver *drv = bs->drv;
612 
613     if (drv->bdrv_change_backing_file != NULL) {
614         return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
615     } else {
616         return -ENOTSUP;
617     }
618 }
619 
620 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
621                                    size_t size)
622 {
623     int64_t len;
624 
625     if (!bdrv_is_inserted(bs))
626         return -ENOMEDIUM;
627 
628     if (bs->growable)
629         return 0;
630 
631     len = bdrv_getlength(bs);
632 
633     if (offset < 0)
634         return -EIO;
635 
636     if ((offset > len) || (len - offset < size))
637         return -EIO;
638 
639     return 0;
640 }
641 
642 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
643                               int nb_sectors)
644 {
645     return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
646 }
647 
648 /* return < 0 if error. See bdrv_write() for the return codes */
649 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
650               uint8_t *buf, int nb_sectors)
651 {
652     BlockDriver *drv = bs->drv;
653 
654     if (!drv)
655         return -ENOMEDIUM;
656     if (bdrv_check_request(bs, sector_num, nb_sectors))
657         return -EIO;
658 
659     return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
660 }
661 
662 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
663                              int nb_sectors, int dirty)
664 {
665     int64_t start, end;
666     unsigned long val, idx, bit;
667 
668     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
669     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
670 
671     for (; start <= end; start++) {
672         idx = start / (sizeof(unsigned long) * 8);
673         bit = start % (sizeof(unsigned long) * 8);
674         val = bs->dirty_bitmap[idx];
675         if (dirty) {
676             val |= 1 << bit;
677         } else {
678             val &= ~(1 << bit);
679         }
680         bs->dirty_bitmap[idx] = val;
681     }
682 }
683 
684 /* Return < 0 if error. Important errors are:
685   -EIO         generic I/O error (may happen for all errors)
686   -ENOMEDIUM   No media inserted.
687   -EINVAL      Invalid sector number or nb_sectors
688   -EACCES      Trying to write a read-only device
689 */
690 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
691                const uint8_t *buf, int nb_sectors)
692 {
693     BlockDriver *drv = bs->drv;
694     if (!bs->drv)
695         return -ENOMEDIUM;
696     if (bs->read_only)
697         return -EACCES;
698     if (bdrv_check_request(bs, sector_num, nb_sectors))
699         return -EIO;
700 
701     if (bs->dirty_bitmap) {
702         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
703     }
704 
705     return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
706 }
707 
708 int bdrv_pread(BlockDriverState *bs, int64_t offset,
709                void *buf, int count1)
710 {
711     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
712     int len, nb_sectors, count;
713     int64_t sector_num;
714 
715     count = count1;
716     /* first read to align to sector start */
717     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
718     if (len > count)
719         len = count;
720     sector_num = offset >> BDRV_SECTOR_BITS;
721     if (len > 0) {
722         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
723             return -EIO;
724         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
725         count -= len;
726         if (count == 0)
727             return count1;
728         sector_num++;
729         buf += len;
730     }
731 
732     /* read the sectors "in place" */
733     nb_sectors = count >> BDRV_SECTOR_BITS;
734     if (nb_sectors > 0) {
735         if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
736             return -EIO;
737         sector_num += nb_sectors;
738         len = nb_sectors << BDRV_SECTOR_BITS;
739         buf += len;
740         count -= len;
741     }
742 
743     /* add data from the last sector */
744     if (count > 0) {
745         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
746             return -EIO;
747         memcpy(buf, tmp_buf, count);
748     }
749     return count1;
750 }
751 
752 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
753                 const void *buf, int count1)
754 {
755     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
756     int len, nb_sectors, count;
757     int64_t sector_num;
758 
759     count = count1;
760     /* first write to align to sector start */
761     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
762     if (len > count)
763         len = count;
764     sector_num = offset >> BDRV_SECTOR_BITS;
765     if (len > 0) {
766         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
767             return -EIO;
768         memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
769         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
770             return -EIO;
771         count -= len;
772         if (count == 0)
773             return count1;
774         sector_num++;
775         buf += len;
776     }
777 
778     /* write the sectors "in place" */
779     nb_sectors = count >> BDRV_SECTOR_BITS;
780     if (nb_sectors > 0) {
781         if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
782             return -EIO;
783         sector_num += nb_sectors;
784         len = nb_sectors << BDRV_SECTOR_BITS;
785         buf += len;
786         count -= len;
787     }
788 
789     /* add data from the last sector */
790     if (count > 0) {
791         if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
792             return -EIO;
793         memcpy(tmp_buf, buf, count);
794         if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
795             return -EIO;
796     }
797     return count1;
798 }
799 
800 /**
801  * Truncate file to 'offset' bytes (needed only for file protocols)
802  */
803 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
804 {
805     BlockDriver *drv = bs->drv;
806     if (!drv)
807         return -ENOMEDIUM;
808     if (!drv->bdrv_truncate)
809         return -ENOTSUP;
810     if (bs->read_only)
811         return -EACCES;
812     return drv->bdrv_truncate(bs, offset);
813 }
814 
815 /**
816  * Length of a file in bytes. Return < 0 if error or unknown.
817  */
818 int64_t bdrv_getlength(BlockDriverState *bs)
819 {
820     BlockDriver *drv = bs->drv;
821     if (!drv)
822         return -ENOMEDIUM;
823     if (!drv->bdrv_getlength) {
824         /* legacy mode */
825         return bs->total_sectors * BDRV_SECTOR_SIZE;
826     }
827     return drv->bdrv_getlength(bs);
828 }
829 
830 /* return 0 as number of sectors if no device present or error */
831 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
832 {
833     int64_t length;
834     length = bdrv_getlength(bs);
835     if (length < 0)
836         length = 0;
837     else
838         length = length >> BDRV_SECTOR_BITS;
839     *nb_sectors_ptr = length;
840 }
841 
842 struct partition {
843         uint8_t boot_ind;           /* 0x80 - active */
844         uint8_t head;               /* starting head */
845         uint8_t sector;             /* starting sector */
846         uint8_t cyl;                /* starting cylinder */
847         uint8_t sys_ind;            /* What partition type */
848         uint8_t end_head;           /* end head */
849         uint8_t end_sector;         /* end sector */
850         uint8_t end_cyl;            /* end cylinder */
851         uint32_t start_sect;        /* starting sector counting from 0 */
852         uint32_t nr_sects;          /* nr of sectors in partition */
853 } __attribute__((packed));
854 
855 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
856 static int guess_disk_lchs(BlockDriverState *bs,
857                            int *pcylinders, int *pheads, int *psectors)
858 {
859     uint8_t buf[512];
860     int ret, i, heads, sectors, cylinders;
861     struct partition *p;
862     uint32_t nr_sects;
863     uint64_t nb_sectors;
864 
865     bdrv_get_geometry(bs, &nb_sectors);
866 
867     ret = bdrv_read(bs, 0, buf, 1);
868     if (ret < 0)
869         return -1;
870     /* test msdos magic */
871     if (buf[510] != 0x55 || buf[511] != 0xaa)
872         return -1;
873     for(i = 0; i < 4; i++) {
874         p = ((struct partition *)(buf + 0x1be)) + i;
875         nr_sects = le32_to_cpu(p->nr_sects);
876         if (nr_sects && p->end_head) {
877             /* We make the assumption that the partition terminates on
878                a cylinder boundary */
879             heads = p->end_head + 1;
880             sectors = p->end_sector & 63;
881             if (sectors == 0)
882                 continue;
883             cylinders = nb_sectors / (heads * sectors);
884             if (cylinders < 1 || cylinders > 16383)
885                 continue;
886             *pheads = heads;
887             *psectors = sectors;
888             *pcylinders = cylinders;
889 #if 0
890             printf("guessed geometry: LCHS=%d %d %d\n",
891                    cylinders, heads, sectors);
892 #endif
893             return 0;
894         }
895     }
896     return -1;
897 }
898 
899 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
900 {
901     int translation, lba_detected = 0;
902     int cylinders, heads, secs;
903     uint64_t nb_sectors;
904 
905     /* if a geometry hint is available, use it */
906     bdrv_get_geometry(bs, &nb_sectors);
907     bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
908     translation = bdrv_get_translation_hint(bs);
909     if (cylinders != 0) {
910         *pcyls = cylinders;
911         *pheads = heads;
912         *psecs = secs;
913     } else {
914         if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
915             if (heads > 16) {
916                 /* if heads > 16, it means that a BIOS LBA
917                    translation was active, so the default
918                    hardware geometry is OK */
919                 lba_detected = 1;
920                 goto default_geometry;
921             } else {
922                 *pcyls = cylinders;
923                 *pheads = heads;
924                 *psecs = secs;
925                 /* disable any translation to be in sync with
926                    the logical geometry */
927                 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
928                     bdrv_set_translation_hint(bs,
929                                               BIOS_ATA_TRANSLATION_NONE);
930                 }
931             }
932         } else {
933         default_geometry:
934             /* if no geometry, use a standard physical disk geometry */
935             cylinders = nb_sectors / (16 * 63);
936 
937             if (cylinders > 16383)
938                 cylinders = 16383;
939             else if (cylinders < 2)
940                 cylinders = 2;
941             *pcyls = cylinders;
942             *pheads = 16;
943             *psecs = 63;
944             if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
945                 if ((*pcyls * *pheads) <= 131072) {
946                     bdrv_set_translation_hint(bs,
947                                               BIOS_ATA_TRANSLATION_LARGE);
948                 } else {
949                     bdrv_set_translation_hint(bs,
950                                               BIOS_ATA_TRANSLATION_LBA);
951                 }
952             }
953         }
954         bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
955     }
956 }
957 
958 void bdrv_set_geometry_hint(BlockDriverState *bs,
959                             int cyls, int heads, int secs)
960 {
961     bs->cyls = cyls;
962     bs->heads = heads;
963     bs->secs = secs;
964 }
965 
966 void bdrv_set_type_hint(BlockDriverState *bs, int type)
967 {
968     bs->type = type;
969     bs->removable = ((type == BDRV_TYPE_CDROM ||
970                       type == BDRV_TYPE_FLOPPY));
971 }
972 
973 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
974 {
975     bs->translation = translation;
976 }
977 
978 void bdrv_get_geometry_hint(BlockDriverState *bs,
979                             int *pcyls, int *pheads, int *psecs)
980 {
981     *pcyls = bs->cyls;
982     *pheads = bs->heads;
983     *psecs = bs->secs;
984 }
985 
986 int bdrv_get_type_hint(BlockDriverState *bs)
987 {
988     return bs->type;
989 }
990 
991 int bdrv_get_translation_hint(BlockDriverState *bs)
992 {
993     return bs->translation;
994 }
995 
996 int bdrv_is_removable(BlockDriverState *bs)
997 {
998     return bs->removable;
999 }
1000 
1001 int bdrv_is_read_only(BlockDriverState *bs)
1002 {
1003     return bs->read_only;
1004 }
1005 
1006 int bdrv_set_read_only(BlockDriverState *bs, int read_only)
1007 {
1008     int ret = bs->read_only;
1009     bs->read_only = read_only;
1010     return ret;
1011 }
1012 
1013 int bdrv_is_sg(BlockDriverState *bs)
1014 {
1015     return bs->sg;
1016 }
1017 
1018 int bdrv_enable_write_cache(BlockDriverState *bs)
1019 {
1020     return bs->enable_write_cache;
1021 }
1022 
1023 /* XXX: no longer used */
1024 void bdrv_set_change_cb(BlockDriverState *bs,
1025                         void (*change_cb)(void *opaque), void *opaque)
1026 {
1027     bs->change_cb = change_cb;
1028     bs->change_opaque = opaque;
1029 }
1030 
1031 int bdrv_is_encrypted(BlockDriverState *bs)
1032 {
1033     if (bs->backing_hd && bs->backing_hd->encrypted)
1034         return 1;
1035     return bs->encrypted;
1036 }
1037 
1038 int bdrv_key_required(BlockDriverState *bs)
1039 {
1040     BlockDriverState *backing_hd = bs->backing_hd;
1041 
1042     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1043         return 1;
1044     return (bs->encrypted && !bs->valid_key);
1045 }
1046 
1047 int bdrv_set_key(BlockDriverState *bs, const char *key)
1048 {
1049     int ret;
1050     if (bs->backing_hd && bs->backing_hd->encrypted) {
1051         ret = bdrv_set_key(bs->backing_hd, key);
1052         if (ret < 0)
1053             return ret;
1054         if (!bs->encrypted)
1055             return 0;
1056     }
1057     if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
1058         return -1;
1059     ret = bs->drv->bdrv_set_key(bs, key);
1060     if (ret < 0) {
1061         bs->valid_key = 0;
1062     } else if (!bs->valid_key) {
1063         bs->valid_key = 1;
1064         /* call the change callback now, we skipped it on open */
1065         bs->media_changed = 1;
1066         if (bs->change_cb)
1067             bs->change_cb(bs->change_opaque);
1068     }
1069     return ret;
1070 }
1071 
1072 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1073 {
1074     if (!bs->drv) {
1075         buf[0] = '\0';
1076     } else {
1077         pstrcpy(buf, buf_size, bs->drv->format_name);
1078     }
1079 }
1080 
1081 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1082                          void *opaque)
1083 {
1084     BlockDriver *drv;
1085 
1086     for (drv = first_drv; drv != NULL; drv = drv->next) {
1087         it(opaque, drv->format_name);
1088     }
1089 }
1090 
1091 BlockDriverState *bdrv_find(const char *name)
1092 {
1093     BlockDriverState *bs;
1094 
1095     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1096         if (!strcmp(name, bs->device_name))
1097             return bs;
1098     }
1099     return NULL;
1100 }
1101 
1102 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1103 {
1104     BlockDriverState *bs;
1105 
1106     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1107         it(opaque, bs);
1108     }
1109 }
1110 
1111 const char *bdrv_get_device_name(BlockDriverState *bs)
1112 {
1113     return bs->device_name;
1114 }
1115 
1116 void bdrv_flush(BlockDriverState *bs)
1117 {
1118     if (!bs->drv)
1119         return;
1120     if (bs->drv->bdrv_flush)
1121         bs->drv->bdrv_flush(bs);
1122     if (bs->backing_hd)
1123         bdrv_flush(bs->backing_hd);
1124 }
1125 
1126 void bdrv_flush_all(void)
1127 {
1128     BlockDriverState *bs;
1129 
1130     for (bs = bdrv_first; bs != NULL; bs = bs->next)
1131         if (bs->drv && !bdrv_is_read_only(bs) &&
1132             (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1133             bdrv_flush(bs);
1134 }
1135 
1136 /*
1137  * Returns true iff the specified sector is present in the disk image. Drivers
1138  * not implementing the functionality are assumed to not support backing files,
1139  * hence all their sectors are reported as allocated.
1140  *
1141  * 'pnum' is set to the number of sectors (including and immediately following
1142  * the specified sector) that are known to be in the same
1143  * allocated/unallocated state.
1144  *
1145  * 'nb_sectors' is the max value 'pnum' should be set to.
1146  */
1147 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1148 	int *pnum)
1149 {
1150     int64_t n;
1151     if (!bs->drv->bdrv_is_allocated) {
1152         if (sector_num >= bs->total_sectors) {
1153             *pnum = 0;
1154             return 0;
1155         }
1156         n = bs->total_sectors - sector_num;
1157         *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1158         return 1;
1159     }
1160     return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1161 }
1162 
1163 static void bdrv_print_dict(QObject *obj, void *opaque)
1164 {
1165     QDict *bs_dict;
1166     Monitor *mon = opaque;
1167 
1168     bs_dict = qobject_to_qdict(obj);
1169 
1170     monitor_printf(mon, "%s: type=%s removable=%d",
1171                         qdict_get_str(bs_dict, "device"),
1172                         qdict_get_str(bs_dict, "type"),
1173                         qdict_get_bool(bs_dict, "removable"));
1174 
1175     if (qdict_get_bool(bs_dict, "removable")) {
1176         monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1177     }
1178 
1179     if (qdict_haskey(bs_dict, "inserted")) {
1180         QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1181 
1182         monitor_printf(mon, " file=");
1183         monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1184         if (qdict_haskey(qdict, "backing_file")) {
1185             monitor_printf(mon, " backing_file=");
1186             monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1187         }
1188         monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1189                             qdict_get_bool(qdict, "ro"),
1190                             qdict_get_str(qdict, "drv"),
1191                             qdict_get_bool(qdict, "encrypted"));
1192     } else {
1193         monitor_printf(mon, " [not inserted]");
1194     }
1195 
1196     monitor_printf(mon, "\n");
1197 }
1198 
1199 void bdrv_info_print(Monitor *mon, const QObject *data)
1200 {
1201     qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1202 }
1203 
1204 /**
1205  * bdrv_info(): Block devices information
1206  *
1207  * Each block device information is stored in a QDict and the
1208  * returned QObject is a QList of all devices.
1209  *
1210  * The QDict contains the following:
1211  *
1212  * - "device": device name
1213  * - "type": device type
1214  * - "removable": true if the device is removable, false otherwise
1215  * - "locked": true if the device is locked, false otherwise
1216  * - "inserted": only present if the device is inserted, it is a QDict
1217  *    containing the following:
1218  *          - "file": device file name
1219  *          - "ro": true if read-only, false otherwise
1220  *          - "drv": driver format name
1221  *          - "backing_file": backing file name if one is used
1222  *          - "encrypted": true if encrypted, false otherwise
1223  *
1224  * Example:
1225  *
1226  * [ { "device": "ide0-hd0", "type": "hd", "removable": false, "locked": false,
1227  *     "inserted": { "file": "/tmp/foobar", "ro": false, "drv": "qcow2" } },
1228  *   { "device": "floppy0", "type": "floppy", "removable": true,
1229  *     "locked": false } ]
1230  */
1231 void bdrv_info(Monitor *mon, QObject **ret_data)
1232 {
1233     QList *bs_list;
1234     BlockDriverState *bs;
1235 
1236     bs_list = qlist_new();
1237 
1238     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1239         QObject *bs_obj;
1240         const char *type = "unknown";
1241 
1242         switch(bs->type) {
1243         case BDRV_TYPE_HD:
1244             type = "hd";
1245             break;
1246         case BDRV_TYPE_CDROM:
1247             type = "cdrom";
1248             break;
1249         case BDRV_TYPE_FLOPPY:
1250             type = "floppy";
1251             break;
1252         }
1253 
1254         bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': %s, "
1255                                     "'removable': %i, 'locked': %i }",
1256                                     bs->device_name, type, bs->removable,
1257                                     bs->locked);
1258         assert(bs_obj != NULL);
1259 
1260         if (bs->drv) {
1261             QObject *obj;
1262             QDict *bs_dict = qobject_to_qdict(bs_obj);
1263 
1264             obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1265                                      "'encrypted': %i }",
1266                                      bs->filename, bs->read_only,
1267                                      bs->drv->format_name,
1268                                      bdrv_is_encrypted(bs));
1269             assert(obj != NULL);
1270             if (bs->backing_file[0] != '\0') {
1271                 QDict *qdict = qobject_to_qdict(obj);
1272                 qdict_put(qdict, "backing_file",
1273                           qstring_from_str(bs->backing_file));
1274             }
1275 
1276             qdict_put_obj(bs_dict, "inserted", obj);
1277         }
1278         qlist_append_obj(bs_list, bs_obj);
1279     }
1280 
1281     *ret_data = QOBJECT(bs_list);
1282 }
1283 
1284 static void bdrv_stats_iter(QObject *data, void *opaque)
1285 {
1286     QDict *qdict;
1287     Monitor *mon = opaque;
1288 
1289     qdict = qobject_to_qdict(data);
1290     monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1291 
1292     qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1293     monitor_printf(mon, " rd_bytes=%" PRId64
1294                         " wr_bytes=%" PRId64
1295                         " rd_operations=%" PRId64
1296                         " wr_operations=%" PRId64
1297                         "\n",
1298                         qdict_get_int(qdict, "rd_bytes"),
1299                         qdict_get_int(qdict, "wr_bytes"),
1300                         qdict_get_int(qdict, "rd_operations"),
1301                         qdict_get_int(qdict, "wr_operations"));
1302 }
1303 
1304 void bdrv_stats_print(Monitor *mon, const QObject *data)
1305 {
1306     qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1307 }
1308 
1309 /**
1310  * bdrv_info_stats(): show block device statistics
1311  *
1312  * Each device statistic information is stored in a QDict and
1313  * the returned QObject is a QList of all devices.
1314  *
1315  * The QDict contains the following:
1316  *
1317  * - "device": device name
1318  * - "stats": A QDict with the statistics information, it contains:
1319  *     - "rd_bytes": bytes read
1320  *     - "wr_bytes": bytes written
1321  *     - "rd_operations": read operations
1322  *     - "wr_operations": write operations
1323  *
1324  * Example:
1325  *
1326  * [ { "device": "ide0-hd0",
1327  *               "stats": { "rd_bytes": 512,
1328  *                          "wr_bytes": 0,
1329  *                          "rd_operations": 1,
1330  *                          "wr_operations": 0 } },
1331  *   { "device": "ide1-cd0",
1332  *               "stats": { "rd_bytes": 0,
1333  *                          "wr_bytes": 0,
1334  *                          "rd_operations": 0,
1335  *                          "wr_operations": 0 } } ]
1336  */
1337 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
1338 {
1339     QObject *obj;
1340     QList *devices;
1341     BlockDriverState *bs;
1342 
1343     devices = qlist_new();
1344 
1345     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1346         obj = qobject_from_jsonf("{ 'device': %s, 'stats': {"
1347                                  "'rd_bytes': %" PRId64 ","
1348                                  "'wr_bytes': %" PRId64 ","
1349                                  "'rd_operations': %" PRId64 ","
1350                                  "'wr_operations': %" PRId64
1351                                  "} }",
1352                                  bs->device_name,
1353                                  bs->rd_bytes, bs->wr_bytes,
1354                                  bs->rd_ops, bs->wr_ops);
1355         assert(obj != NULL);
1356         qlist_append_obj(devices, obj);
1357     }
1358 
1359     *ret_data = QOBJECT(devices);
1360 }
1361 
1362 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1363 {
1364     if (bs->backing_hd && bs->backing_hd->encrypted)
1365         return bs->backing_file;
1366     else if (bs->encrypted)
1367         return bs->filename;
1368     else
1369         return NULL;
1370 }
1371 
1372 void bdrv_get_backing_filename(BlockDriverState *bs,
1373                                char *filename, int filename_size)
1374 {
1375     if (!bs->backing_file) {
1376         pstrcpy(filename, filename_size, "");
1377     } else {
1378         pstrcpy(filename, filename_size, bs->backing_file);
1379     }
1380 }
1381 
1382 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1383                           const uint8_t *buf, int nb_sectors)
1384 {
1385     BlockDriver *drv = bs->drv;
1386     if (!drv)
1387         return -ENOMEDIUM;
1388     if (!drv->bdrv_write_compressed)
1389         return -ENOTSUP;
1390     if (bdrv_check_request(bs, sector_num, nb_sectors))
1391         return -EIO;
1392 
1393     if (bs->dirty_bitmap) {
1394         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1395     }
1396 
1397     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1398 }
1399 
1400 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1401 {
1402     BlockDriver *drv = bs->drv;
1403     if (!drv)
1404         return -ENOMEDIUM;
1405     if (!drv->bdrv_get_info)
1406         return -ENOTSUP;
1407     memset(bdi, 0, sizeof(*bdi));
1408     return drv->bdrv_get_info(bs, bdi);
1409 }
1410 
1411 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1412                       int64_t pos, int size)
1413 {
1414     BlockDriver *drv = bs->drv;
1415     if (!drv)
1416         return -ENOMEDIUM;
1417     if (!drv->bdrv_save_vmstate)
1418         return -ENOTSUP;
1419     return drv->bdrv_save_vmstate(bs, buf, pos, size);
1420 }
1421 
1422 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1423                       int64_t pos, int size)
1424 {
1425     BlockDriver *drv = bs->drv;
1426     if (!drv)
1427         return -ENOMEDIUM;
1428     if (!drv->bdrv_load_vmstate)
1429         return -ENOTSUP;
1430     return drv->bdrv_load_vmstate(bs, buf, pos, size);
1431 }
1432 
1433 /**************************************************************/
1434 /* handling of snapshots */
1435 
1436 int bdrv_snapshot_create(BlockDriverState *bs,
1437                          QEMUSnapshotInfo *sn_info)
1438 {
1439     BlockDriver *drv = bs->drv;
1440     if (!drv)
1441         return -ENOMEDIUM;
1442     if (!drv->bdrv_snapshot_create)
1443         return -ENOTSUP;
1444     return drv->bdrv_snapshot_create(bs, sn_info);
1445 }
1446 
1447 int bdrv_snapshot_goto(BlockDriverState *bs,
1448                        const char *snapshot_id)
1449 {
1450     BlockDriver *drv = bs->drv;
1451     if (!drv)
1452         return -ENOMEDIUM;
1453     if (!drv->bdrv_snapshot_goto)
1454         return -ENOTSUP;
1455     return drv->bdrv_snapshot_goto(bs, snapshot_id);
1456 }
1457 
1458 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1459 {
1460     BlockDriver *drv = bs->drv;
1461     if (!drv)
1462         return -ENOMEDIUM;
1463     if (!drv->bdrv_snapshot_delete)
1464         return -ENOTSUP;
1465     return drv->bdrv_snapshot_delete(bs, snapshot_id);
1466 }
1467 
1468 int bdrv_snapshot_list(BlockDriverState *bs,
1469                        QEMUSnapshotInfo **psn_info)
1470 {
1471     BlockDriver *drv = bs->drv;
1472     if (!drv)
1473         return -ENOMEDIUM;
1474     if (!drv->bdrv_snapshot_list)
1475         return -ENOTSUP;
1476     return drv->bdrv_snapshot_list(bs, psn_info);
1477 }
1478 
1479 #define NB_SUFFIXES 4
1480 
1481 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1482 {
1483     static const char suffixes[NB_SUFFIXES] = "KMGT";
1484     int64_t base;
1485     int i;
1486 
1487     if (size <= 999) {
1488         snprintf(buf, buf_size, "%" PRId64, size);
1489     } else {
1490         base = 1024;
1491         for(i = 0; i < NB_SUFFIXES; i++) {
1492             if (size < (10 * base)) {
1493                 snprintf(buf, buf_size, "%0.1f%c",
1494                          (double)size / base,
1495                          suffixes[i]);
1496                 break;
1497             } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1498                 snprintf(buf, buf_size, "%" PRId64 "%c",
1499                          ((size + (base >> 1)) / base),
1500                          suffixes[i]);
1501                 break;
1502             }
1503             base = base * 1024;
1504         }
1505     }
1506     return buf;
1507 }
1508 
1509 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1510 {
1511     char buf1[128], date_buf[128], clock_buf[128];
1512 #ifdef _WIN32
1513     struct tm *ptm;
1514 #else
1515     struct tm tm;
1516 #endif
1517     time_t ti;
1518     int64_t secs;
1519 
1520     if (!sn) {
1521         snprintf(buf, buf_size,
1522                  "%-10s%-20s%7s%20s%15s",
1523                  "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1524     } else {
1525         ti = sn->date_sec;
1526 #ifdef _WIN32
1527         ptm = localtime(&ti);
1528         strftime(date_buf, sizeof(date_buf),
1529                  "%Y-%m-%d %H:%M:%S", ptm);
1530 #else
1531         localtime_r(&ti, &tm);
1532         strftime(date_buf, sizeof(date_buf),
1533                  "%Y-%m-%d %H:%M:%S", &tm);
1534 #endif
1535         secs = sn->vm_clock_nsec / 1000000000;
1536         snprintf(clock_buf, sizeof(clock_buf),
1537                  "%02d:%02d:%02d.%03d",
1538                  (int)(secs / 3600),
1539                  (int)((secs / 60) % 60),
1540                  (int)(secs % 60),
1541                  (int)((sn->vm_clock_nsec / 1000000) % 1000));
1542         snprintf(buf, buf_size,
1543                  "%-10s%-20s%7s%20s%15s",
1544                  sn->id_str, sn->name,
1545                  get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1546                  date_buf,
1547                  clock_buf);
1548     }
1549     return buf;
1550 }
1551 
1552 
1553 /**************************************************************/
1554 /* async I/Os */
1555 
1556 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1557                                  QEMUIOVector *qiov, int nb_sectors,
1558                                  BlockDriverCompletionFunc *cb, void *opaque)
1559 {
1560     BlockDriver *drv = bs->drv;
1561     BlockDriverAIOCB *ret;
1562 
1563     if (!drv)
1564         return NULL;
1565     if (bdrv_check_request(bs, sector_num, nb_sectors))
1566         return NULL;
1567 
1568     ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1569                               cb, opaque);
1570 
1571     if (ret) {
1572 	/* Update stats even though technically transfer has not happened. */
1573 	bs->rd_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1574 	bs->rd_ops ++;
1575     }
1576 
1577     return ret;
1578 }
1579 
1580 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1581                                   QEMUIOVector *qiov, int nb_sectors,
1582                                   BlockDriverCompletionFunc *cb, void *opaque)
1583 {
1584     BlockDriver *drv = bs->drv;
1585     BlockDriverAIOCB *ret;
1586 
1587     if (!drv)
1588         return NULL;
1589     if (bs->read_only)
1590         return NULL;
1591     if (bdrv_check_request(bs, sector_num, nb_sectors))
1592         return NULL;
1593 
1594     if (bs->dirty_bitmap) {
1595         set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1596     }
1597 
1598     ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1599                                cb, opaque);
1600 
1601     if (ret) {
1602 	/* Update stats even though technically transfer has not happened. */
1603 	bs->wr_bytes += (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
1604 	bs->wr_ops ++;
1605     }
1606 
1607     return ret;
1608 }
1609 
1610 
1611 typedef struct MultiwriteCB {
1612     int error;
1613     int num_requests;
1614     int num_callbacks;
1615     struct {
1616         BlockDriverCompletionFunc *cb;
1617         void *opaque;
1618         QEMUIOVector *free_qiov;
1619         void *free_buf;
1620     } callbacks[];
1621 } MultiwriteCB;
1622 
1623 static void multiwrite_user_cb(MultiwriteCB *mcb)
1624 {
1625     int i;
1626 
1627     for (i = 0; i < mcb->num_callbacks; i++) {
1628         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1629         qemu_free(mcb->callbacks[i].free_qiov);
1630         qemu_free(mcb->callbacks[i].free_buf);
1631     }
1632 }
1633 
1634 static void multiwrite_cb(void *opaque, int ret)
1635 {
1636     MultiwriteCB *mcb = opaque;
1637 
1638     if (ret < 0) {
1639         mcb->error = ret;
1640         multiwrite_user_cb(mcb);
1641     }
1642 
1643     mcb->num_requests--;
1644     if (mcb->num_requests == 0) {
1645         if (mcb->error == 0) {
1646             multiwrite_user_cb(mcb);
1647         }
1648         qemu_free(mcb);
1649     }
1650 }
1651 
1652 static int multiwrite_req_compare(const void *a, const void *b)
1653 {
1654     return (((BlockRequest*) a)->sector - ((BlockRequest*) b)->sector);
1655 }
1656 
1657 /*
1658  * Takes a bunch of requests and tries to merge them. Returns the number of
1659  * requests that remain after merging.
1660  */
1661 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
1662     int num_reqs, MultiwriteCB *mcb)
1663 {
1664     int i, outidx;
1665 
1666     // Sort requests by start sector
1667     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
1668 
1669     // Check if adjacent requests touch the same clusters. If so, combine them,
1670     // filling up gaps with zero sectors.
1671     outidx = 0;
1672     for (i = 1; i < num_reqs; i++) {
1673         int merge = 0;
1674         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
1675 
1676         // This handles the cases that are valid for all block drivers, namely
1677         // exactly sequential writes and overlapping writes.
1678         if (reqs[i].sector <= oldreq_last) {
1679             merge = 1;
1680         }
1681 
1682         // The block driver may decide that it makes sense to combine requests
1683         // even if there is a gap of some sectors between them. In this case,
1684         // the gap is filled with zeros (therefore only applicable for yet
1685         // unused space in format like qcow2).
1686         if (!merge && bs->drv->bdrv_merge_requests) {
1687             merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
1688         }
1689 
1690         if (merge) {
1691             size_t size;
1692             QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
1693             qemu_iovec_init(qiov,
1694                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
1695 
1696             // Add the first request to the merged one. If the requests are
1697             // overlapping, drop the last sectors of the first request.
1698             size = (reqs[i].sector - reqs[outidx].sector) << 9;
1699             qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
1700 
1701             // We might need to add some zeros between the two requests
1702             if (reqs[i].sector > oldreq_last) {
1703                 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
1704                 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
1705                 memset(buf, 0, zero_bytes);
1706                 qemu_iovec_add(qiov, buf, zero_bytes);
1707                 mcb->callbacks[i].free_buf = buf;
1708             }
1709 
1710             // Add the second request
1711             qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
1712 
1713             reqs[outidx].nb_sectors += reqs[i].nb_sectors;
1714             reqs[outidx].qiov = qiov;
1715 
1716             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
1717         } else {
1718             outidx++;
1719             reqs[outidx].sector     = reqs[i].sector;
1720             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
1721             reqs[outidx].qiov       = reqs[i].qiov;
1722         }
1723     }
1724 
1725     return outidx + 1;
1726 }
1727 
1728 /*
1729  * Submit multiple AIO write requests at once.
1730  *
1731  * On success, the function returns 0 and all requests in the reqs array have
1732  * been submitted. In error case this function returns -1, and any of the
1733  * requests may or may not be submitted yet. In particular, this means that the
1734  * callback will be called for some of the requests, for others it won't. The
1735  * caller must check the error field of the BlockRequest to wait for the right
1736  * callbacks (if error != 0, no callback will be called).
1737  *
1738  * The implementation may modify the contents of the reqs array, e.g. to merge
1739  * requests. However, the fields opaque and error are left unmodified as they
1740  * are used to signal failure for a single request to the caller.
1741  */
1742 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
1743 {
1744     BlockDriverAIOCB *acb;
1745     MultiwriteCB *mcb;
1746     int i;
1747 
1748     if (num_reqs == 0) {
1749         return 0;
1750     }
1751 
1752     // Create MultiwriteCB structure
1753     mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
1754     mcb->num_requests = 0;
1755     mcb->num_callbacks = num_reqs;
1756 
1757     for (i = 0; i < num_reqs; i++) {
1758         mcb->callbacks[i].cb = reqs[i].cb;
1759         mcb->callbacks[i].opaque = reqs[i].opaque;
1760     }
1761 
1762     // Check for mergable requests
1763     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
1764 
1765     // Run the aio requests
1766     for (i = 0; i < num_reqs; i++) {
1767         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
1768             reqs[i].nb_sectors, multiwrite_cb, mcb);
1769 
1770         if (acb == NULL) {
1771             // We can only fail the whole thing if no request has been
1772             // submitted yet. Otherwise we'll wait for the submitted AIOs to
1773             // complete and report the error in the callback.
1774             if (mcb->num_requests == 0) {
1775                 reqs[i].error = EIO;
1776                 goto fail;
1777             } else {
1778                 mcb->error = EIO;
1779                 break;
1780             }
1781         } else {
1782             mcb->num_requests++;
1783         }
1784     }
1785 
1786     return 0;
1787 
1788 fail:
1789     free(mcb);
1790     return -1;
1791 }
1792 
1793 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
1794         BlockDriverCompletionFunc *cb, void *opaque)
1795 {
1796     BlockDriver *drv = bs->drv;
1797 
1798     if (!drv)
1799         return NULL;
1800 
1801     /*
1802      * Note that unlike bdrv_flush the driver is reponsible for flushing a
1803      * backing image if it exists.
1804      */
1805     return drv->bdrv_aio_flush(bs, cb, opaque);
1806 }
1807 
1808 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1809 {
1810     acb->pool->cancel(acb);
1811 }
1812 
1813 
1814 /**************************************************************/
1815 /* async block device emulation */
1816 
1817 typedef struct BlockDriverAIOCBSync {
1818     BlockDriverAIOCB common;
1819     QEMUBH *bh;
1820     int ret;
1821     /* vector translation state */
1822     QEMUIOVector *qiov;
1823     uint8_t *bounce;
1824     int is_write;
1825 } BlockDriverAIOCBSync;
1826 
1827 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1828 {
1829     BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1830     qemu_bh_delete(acb->bh);
1831     acb->bh = NULL;
1832     qemu_aio_release(acb);
1833 }
1834 
1835 static AIOPool bdrv_em_aio_pool = {
1836     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
1837     .cancel             = bdrv_aio_cancel_em,
1838 };
1839 
1840 static void bdrv_aio_bh_cb(void *opaque)
1841 {
1842     BlockDriverAIOCBSync *acb = opaque;
1843 
1844     if (!acb->is_write)
1845         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
1846     qemu_vfree(acb->bounce);
1847     acb->common.cb(acb->common.opaque, acb->ret);
1848     qemu_bh_delete(acb->bh);
1849     acb->bh = NULL;
1850     qemu_aio_release(acb);
1851 }
1852 
1853 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1854                                             int64_t sector_num,
1855                                             QEMUIOVector *qiov,
1856                                             int nb_sectors,
1857                                             BlockDriverCompletionFunc *cb,
1858                                             void *opaque,
1859                                             int is_write)
1860 
1861 {
1862     BlockDriverAIOCBSync *acb;
1863 
1864     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
1865     acb->is_write = is_write;
1866     acb->qiov = qiov;
1867     acb->bounce = qemu_blockalign(bs, qiov->size);
1868 
1869     if (!acb->bh)
1870         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1871 
1872     if (is_write) {
1873         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1874         acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
1875     } else {
1876         acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
1877     }
1878 
1879     qemu_bh_schedule(acb->bh);
1880 
1881     return &acb->common;
1882 }
1883 
1884 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
1885         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1886         BlockDriverCompletionFunc *cb, void *opaque)
1887 {
1888     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
1889 }
1890 
1891 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
1892         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1893         BlockDriverCompletionFunc *cb, void *opaque)
1894 {
1895     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1896 }
1897 
1898 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
1899         BlockDriverCompletionFunc *cb, void *opaque)
1900 {
1901     BlockDriverAIOCBSync *acb;
1902 
1903     acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
1904     acb->is_write = 1; /* don't bounce in the completion hadler */
1905     acb->qiov = NULL;
1906     acb->bounce = NULL;
1907     acb->ret = 0;
1908 
1909     if (!acb->bh)
1910         acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1911 
1912     bdrv_flush(bs);
1913     qemu_bh_schedule(acb->bh);
1914     return &acb->common;
1915 }
1916 
1917 /**************************************************************/
1918 /* sync block device emulation */
1919 
1920 static void bdrv_rw_em_cb(void *opaque, int ret)
1921 {
1922     *(int *)opaque = ret;
1923 }
1924 
1925 #define NOT_DONE 0x7fffffff
1926 
1927 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1928                         uint8_t *buf, int nb_sectors)
1929 {
1930     int async_ret;
1931     BlockDriverAIOCB *acb;
1932     struct iovec iov;
1933     QEMUIOVector qiov;
1934 
1935     async_context_push();
1936 
1937     async_ret = NOT_DONE;
1938     iov.iov_base = (void *)buf;
1939     iov.iov_len = nb_sectors * 512;
1940     qemu_iovec_init_external(&qiov, &iov, 1);
1941     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
1942         bdrv_rw_em_cb, &async_ret);
1943     if (acb == NULL) {
1944         async_ret = -1;
1945         goto fail;
1946     }
1947 
1948     while (async_ret == NOT_DONE) {
1949         qemu_aio_wait();
1950     }
1951 
1952 
1953 fail:
1954     async_context_pop();
1955     return async_ret;
1956 }
1957 
1958 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1959                          const uint8_t *buf, int nb_sectors)
1960 {
1961     int async_ret;
1962     BlockDriverAIOCB *acb;
1963     struct iovec iov;
1964     QEMUIOVector qiov;
1965 
1966     async_context_push();
1967 
1968     async_ret = NOT_DONE;
1969     iov.iov_base = (void *)buf;
1970     iov.iov_len = nb_sectors * 512;
1971     qemu_iovec_init_external(&qiov, &iov, 1);
1972     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
1973         bdrv_rw_em_cb, &async_ret);
1974     if (acb == NULL) {
1975         async_ret = -1;
1976         goto fail;
1977     }
1978     while (async_ret == NOT_DONE) {
1979         qemu_aio_wait();
1980     }
1981 
1982 fail:
1983     async_context_pop();
1984     return async_ret;
1985 }
1986 
1987 void bdrv_init(void)
1988 {
1989     module_call_init(MODULE_INIT_BLOCK);
1990 }
1991 
1992 void bdrv_init_with_whitelist(void)
1993 {
1994     use_bdrv_whitelist = 1;
1995     bdrv_init();
1996 }
1997 
1998 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
1999                    BlockDriverCompletionFunc *cb, void *opaque)
2000 {
2001     BlockDriverAIOCB *acb;
2002 
2003     if (pool->free_aiocb) {
2004         acb = pool->free_aiocb;
2005         pool->free_aiocb = acb->next;
2006     } else {
2007         acb = qemu_mallocz(pool->aiocb_size);
2008         acb->pool = pool;
2009     }
2010     acb->bs = bs;
2011     acb->cb = cb;
2012     acb->opaque = opaque;
2013     return acb;
2014 }
2015 
2016 void qemu_aio_release(void *p)
2017 {
2018     BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2019     AIOPool *pool = acb->pool;
2020     acb->next = pool->free_aiocb;
2021     pool->free_aiocb = acb;
2022 }
2023 
2024 /**************************************************************/
2025 /* removable device support */
2026 
2027 /**
2028  * Return TRUE if the media is present
2029  */
2030 int bdrv_is_inserted(BlockDriverState *bs)
2031 {
2032     BlockDriver *drv = bs->drv;
2033     int ret;
2034     if (!drv)
2035         return 0;
2036     if (!drv->bdrv_is_inserted)
2037         return 1;
2038     ret = drv->bdrv_is_inserted(bs);
2039     return ret;
2040 }
2041 
2042 /**
2043  * Return TRUE if the media changed since the last call to this
2044  * function. It is currently only used for floppy disks
2045  */
2046 int bdrv_media_changed(BlockDriverState *bs)
2047 {
2048     BlockDriver *drv = bs->drv;
2049     int ret;
2050 
2051     if (!drv || !drv->bdrv_media_changed)
2052         ret = -ENOTSUP;
2053     else
2054         ret = drv->bdrv_media_changed(bs);
2055     if (ret == -ENOTSUP)
2056         ret = bs->media_changed;
2057     bs->media_changed = 0;
2058     return ret;
2059 }
2060 
2061 /**
2062  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2063  */
2064 int bdrv_eject(BlockDriverState *bs, int eject_flag)
2065 {
2066     BlockDriver *drv = bs->drv;
2067     int ret;
2068 
2069     if (bs->locked) {
2070         return -EBUSY;
2071     }
2072 
2073     if (!drv || !drv->bdrv_eject) {
2074         ret = -ENOTSUP;
2075     } else {
2076         ret = drv->bdrv_eject(bs, eject_flag);
2077     }
2078     if (ret == -ENOTSUP) {
2079         if (eject_flag)
2080             bdrv_close(bs);
2081         ret = 0;
2082     }
2083 
2084     return ret;
2085 }
2086 
2087 int bdrv_is_locked(BlockDriverState *bs)
2088 {
2089     return bs->locked;
2090 }
2091 
2092 /**
2093  * Lock or unlock the media (if it is locked, the user won't be able
2094  * to eject it manually).
2095  */
2096 void bdrv_set_locked(BlockDriverState *bs, int locked)
2097 {
2098     BlockDriver *drv = bs->drv;
2099 
2100     bs->locked = locked;
2101     if (drv && drv->bdrv_set_locked) {
2102         drv->bdrv_set_locked(bs, locked);
2103     }
2104 }
2105 
2106 /* needed for generic scsi interface */
2107 
2108 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2109 {
2110     BlockDriver *drv = bs->drv;
2111 
2112     if (drv && drv->bdrv_ioctl)
2113         return drv->bdrv_ioctl(bs, req, buf);
2114     return -ENOTSUP;
2115 }
2116 
2117 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2118         unsigned long int req, void *buf,
2119         BlockDriverCompletionFunc *cb, void *opaque)
2120 {
2121     BlockDriver *drv = bs->drv;
2122 
2123     if (drv && drv->bdrv_aio_ioctl)
2124         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2125     return NULL;
2126 }
2127 
2128 
2129 
2130 void *qemu_blockalign(BlockDriverState *bs, size_t size)
2131 {
2132     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2133 }
2134 
2135 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2136 {
2137     int64_t bitmap_size;
2138 
2139     if (enable) {
2140         if (!bs->dirty_bitmap) {
2141             bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
2142                     BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
2143             bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
2144 
2145             bs->dirty_bitmap = qemu_mallocz(bitmap_size);
2146         }
2147     } else {
2148         if (bs->dirty_bitmap) {
2149             qemu_free(bs->dirty_bitmap);
2150             bs->dirty_bitmap = NULL;
2151         }
2152     }
2153 }
2154 
2155 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
2156 {
2157     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
2158 
2159     if (bs->dirty_bitmap &&
2160         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
2161         return bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
2162             (1 << (chunk % (sizeof(unsigned long) * 8)));
2163     } else {
2164         return 0;
2165     }
2166 }
2167 
2168 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
2169                       int nr_sectors)
2170 {
2171     set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
2172 }
2173