xref: /openbmc/qemu/block/block-backend.c (revision c0bea68f9ea48f0dea7a06a259a613bfd3a7e35e)
1 /*
2  * QEMU Block backends
3  *
4  * Copyright (C) 2014-2016 Red Hat, Inc.
5  *
6  * Authors:
7  *  Markus Armbruster <armbru@redhat.com>,
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1
10  * or later.  See the COPYING.LIB file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "sysemu/block-backend.h"
15 #include "block/block_int.h"
16 #include "block/blockjob.h"
17 #include "block/throttle-groups.h"
18 #include "sysemu/blockdev.h"
19 #include "sysemu/sysemu.h"
20 #include "qapi-event.h"
21 #include "qemu/id.h"
22 
23 /* Number of coroutines to reserve per attached device model */
24 #define COROUTINE_POOL_RESERVATION 64
25 
26 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
27 
28 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
29 
30 struct BlockBackend {
31     char *name;
32     int refcnt;
33     BdrvChild *root;
34     DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
35     QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
36     QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
37     BlockBackendPublic public;
38 
39     void *dev;                  /* attached device model, if any */
40     /* TODO change to DeviceState when all users are qdevified */
41     const BlockDevOps *dev_ops;
42     void *dev_opaque;
43 
44     /* the block size for which the guest device expects atomicity */
45     int guest_block_size;
46 
47     /* If the BDS tree is removed, some of its options are stored here (which
48      * can be used to restore those options in the new BDS on insert) */
49     BlockBackendRootState root_state;
50 
51     bool enable_write_cache;
52 
53     /* I/O stats (display with "info blockstats"). */
54     BlockAcctStats stats;
55 
56     BlockdevOnError on_read_error, on_write_error;
57     bool iostatus_enabled;
58     BlockDeviceIoStatus iostatus;
59 
60     bool allow_write_beyond_eof;
61 
62     NotifierList remove_bs_notifiers, insert_bs_notifiers;
63 };
64 
65 typedef struct BlockBackendAIOCB {
66     BlockAIOCB common;
67     QEMUBH *bh;
68     BlockBackend *blk;
69     int ret;
70 } BlockBackendAIOCB;
71 
72 static const AIOCBInfo block_backend_aiocb_info = {
73     .get_aio_context = blk_aiocb_get_aio_context,
74     .aiocb_size = sizeof(BlockBackendAIOCB),
75 };
76 
77 static void drive_info_del(DriveInfo *dinfo);
78 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
79 
80 /* All BlockBackends */
81 static QTAILQ_HEAD(, BlockBackend) block_backends =
82     QTAILQ_HEAD_INITIALIZER(block_backends);
83 
84 /* All BlockBackends referenced by the monitor and which are iterated through by
85  * blk_next() */
86 static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
87     QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
88 
89 static void blk_root_inherit_options(int *child_flags, QDict *child_options,
90                                      int parent_flags, QDict *parent_options)
91 {
92     /* We're not supposed to call this function for root nodes */
93     abort();
94 }
95 static void blk_root_drained_begin(BdrvChild *child);
96 static void blk_root_drained_end(BdrvChild *child);
97 
98 static void blk_root_change_media(BdrvChild *child, bool load);
99 static void blk_root_resize(BdrvChild *child);
100 
101 static const char *blk_root_get_name(BdrvChild *child)
102 {
103     return blk_name(child->opaque);
104 }
105 
106 static const BdrvChildRole child_root = {
107     .inherit_options    = blk_root_inherit_options,
108 
109     .change_media       = blk_root_change_media,
110     .resize             = blk_root_resize,
111     .get_name           = blk_root_get_name,
112 
113     .drained_begin      = blk_root_drained_begin,
114     .drained_end        = blk_root_drained_end,
115 };
116 
117 /*
118  * Create a new BlockBackend with a reference count of one.
119  * Store an error through @errp on failure, unless it's null.
120  * Return the new BlockBackend on success, null on failure.
121  */
122 BlockBackend *blk_new(Error **errp)
123 {
124     BlockBackend *blk;
125 
126     blk = g_new0(BlockBackend, 1);
127     blk->refcnt = 1;
128     qemu_co_queue_init(&blk->public.throttled_reqs[0]);
129     qemu_co_queue_init(&blk->public.throttled_reqs[1]);
130 
131     notifier_list_init(&blk->remove_bs_notifiers);
132     notifier_list_init(&blk->insert_bs_notifiers);
133 
134     QTAILQ_INSERT_TAIL(&block_backends, blk, link);
135     return blk;
136 }
137 
138 /*
139  * Create a new BlockBackend with a new BlockDriverState attached.
140  * Otherwise just like blk_new(), which see.
141  */
142 BlockBackend *blk_new_with_bs(Error **errp)
143 {
144     BlockBackend *blk;
145     BlockDriverState *bs;
146 
147     blk = blk_new(errp);
148     if (!blk) {
149         return NULL;
150     }
151 
152     bs = bdrv_new_root();
153     blk->root = bdrv_root_attach_child(bs, "root", &child_root);
154     blk->root->opaque = blk;
155     return blk;
156 }
157 
158 /*
159  * Calls blk_new_with_bs() and then calls bdrv_open() on the BlockDriverState.
160  *
161  * Just as with bdrv_open(), after having called this function the reference to
162  * @options belongs to the block layer (even on failure).
163  *
164  * TODO: Remove @filename and @flags; it should be possible to specify a whole
165  * BDS tree just by specifying the @options QDict (or @reference,
166  * alternatively). At the time of adding this function, this is not possible,
167  * though, so callers of this function have to be able to specify @filename and
168  * @flags.
169  */
170 BlockBackend *blk_new_open(const char *filename, const char *reference,
171                            QDict *options, int flags, Error **errp)
172 {
173     BlockBackend *blk;
174     int ret;
175 
176     blk = blk_new_with_bs(errp);
177     if (!blk) {
178         QDECREF(options);
179         return NULL;
180     }
181 
182     ret = bdrv_open(&blk->root->bs, filename, reference, options, flags, errp);
183     if (ret < 0) {
184         blk_unref(blk);
185         return NULL;
186     }
187 
188     blk_set_enable_write_cache(blk, true);
189 
190     return blk;
191 }
192 
193 static void blk_delete(BlockBackend *blk)
194 {
195     assert(!blk->refcnt);
196     assert(!blk->name);
197     assert(!blk->dev);
198     if (blk->root) {
199         blk_remove_bs(blk);
200     }
201     assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
202     assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
203     QTAILQ_REMOVE(&block_backends, blk, link);
204     drive_info_del(blk->legacy_dinfo);
205     block_acct_cleanup(&blk->stats);
206     g_free(blk);
207 }
208 
209 static void drive_info_del(DriveInfo *dinfo)
210 {
211     if (!dinfo) {
212         return;
213     }
214     qemu_opts_del(dinfo->opts);
215     g_free(dinfo->serial);
216     g_free(dinfo);
217 }
218 
219 int blk_get_refcnt(BlockBackend *blk)
220 {
221     return blk ? blk->refcnt : 0;
222 }
223 
224 /*
225  * Increment @blk's reference count.
226  * @blk must not be null.
227  */
228 void blk_ref(BlockBackend *blk)
229 {
230     blk->refcnt++;
231 }
232 
233 /*
234  * Decrement @blk's reference count.
235  * If this drops it to zero, destroy @blk.
236  * For convenience, do nothing if @blk is null.
237  */
238 void blk_unref(BlockBackend *blk)
239 {
240     if (blk) {
241         assert(blk->refcnt > 0);
242         if (!--blk->refcnt) {
243             blk_delete(blk);
244         }
245     }
246 }
247 
248 /*
249  * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
250  * ones which are hidden (i.e. are not referenced by the monitor).
251  */
252 static BlockBackend *blk_all_next(BlockBackend *blk)
253 {
254     return blk ? QTAILQ_NEXT(blk, link)
255                : QTAILQ_FIRST(&block_backends);
256 }
257 
258 void blk_remove_all_bs(void)
259 {
260     BlockBackend *blk = NULL;
261 
262     while ((blk = blk_all_next(blk)) != NULL) {
263         AioContext *ctx = blk_get_aio_context(blk);
264 
265         aio_context_acquire(ctx);
266         if (blk->root) {
267             blk_remove_bs(blk);
268         }
269         aio_context_release(ctx);
270     }
271 }
272 
273 /*
274  * Return the monitor-owned BlockBackend after @blk.
275  * If @blk is null, return the first one.
276  * Else, return @blk's next sibling, which may be null.
277  *
278  * To iterate over all BlockBackends, do
279  * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
280  *     ...
281  * }
282  */
283 BlockBackend *blk_next(BlockBackend *blk)
284 {
285     return blk ? QTAILQ_NEXT(blk, monitor_link)
286                : QTAILQ_FIRST(&monitor_block_backends);
287 }
288 
289 struct BdrvNextIterator {
290     enum {
291         BDRV_NEXT_BACKEND_ROOTS,
292         BDRV_NEXT_MONITOR_OWNED,
293     } phase;
294     BlockBackend *blk;
295     BlockDriverState *bs;
296 };
297 
298 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
299  * the monitor or attached to a BlockBackend */
300 BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
301 {
302     if (!it) {
303         it = g_new(BdrvNextIterator, 1);
304         *it = (BdrvNextIterator) {
305             .phase = BDRV_NEXT_BACKEND_ROOTS,
306         };
307     }
308 
309     /* First, return all root nodes of BlockBackends. In order to avoid
310      * returning a BDS twice when multiple BBs refer to it, we only return it
311      * if the BB is the first one in the parent list of the BDS. */
312     if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
313         do {
314             it->blk = blk_all_next(it->blk);
315             *bs = it->blk ? blk_bs(it->blk) : NULL;
316         } while (it->blk && (*bs == NULL || bdrv_first_blk(*bs) != it->blk));
317 
318         if (*bs) {
319             return it;
320         }
321         it->phase = BDRV_NEXT_MONITOR_OWNED;
322     }
323 
324     /* Then return the monitor-owned BDSes without a BB attached. Ignore all
325      * BDSes that are attached to a BlockBackend here; they have been handled
326      * by the above block already */
327     do {
328         it->bs = bdrv_next_monitor_owned(it->bs);
329         *bs = it->bs;
330     } while (*bs && bdrv_has_blk(*bs));
331 
332     return *bs ? it : NULL;
333 }
334 
335 /*
336  * Add a BlockBackend into the list of backends referenced by the monitor, with
337  * the given @name acting as the handle for the monitor.
338  * Strictly for use by blockdev.c.
339  *
340  * @name must not be null or empty.
341  *
342  * Returns true on success and false on failure. In the latter case, an Error
343  * object is returned through @errp.
344  */
345 bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
346 {
347     assert(!blk->name);
348     assert(name && name[0]);
349 
350     if (!id_wellformed(name)) {
351         error_setg(errp, "Invalid device name");
352         return false;
353     }
354     if (blk_by_name(name)) {
355         error_setg(errp, "Device with id '%s' already exists", name);
356         return false;
357     }
358     if (bdrv_find_node(name)) {
359         error_setg(errp,
360                    "Device name '%s' conflicts with an existing node name",
361                    name);
362         return false;
363     }
364 
365     blk->name = g_strdup(name);
366     QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
367     return true;
368 }
369 
370 /*
371  * Remove a BlockBackend from the list of backends referenced by the monitor.
372  * Strictly for use by blockdev.c.
373  */
374 void monitor_remove_blk(BlockBackend *blk)
375 {
376     if (!blk->name) {
377         return;
378     }
379 
380     QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
381     g_free(blk->name);
382     blk->name = NULL;
383 }
384 
385 /*
386  * Return @blk's name, a non-null string.
387  * Returns an empty string iff @blk is not referenced by the monitor.
388  */
389 const char *blk_name(BlockBackend *blk)
390 {
391     return blk->name ?: "";
392 }
393 
394 /*
395  * Return the BlockBackend with name @name if it exists, else null.
396  * @name must not be null.
397  */
398 BlockBackend *blk_by_name(const char *name)
399 {
400     BlockBackend *blk = NULL;
401 
402     assert(name);
403     while ((blk = blk_next(blk)) != NULL) {
404         if (!strcmp(name, blk->name)) {
405             return blk;
406         }
407     }
408     return NULL;
409 }
410 
411 /*
412  * Return the BlockDriverState attached to @blk if any, else null.
413  */
414 BlockDriverState *blk_bs(BlockBackend *blk)
415 {
416     return blk->root ? blk->root->bs : NULL;
417 }
418 
419 static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
420 {
421     BdrvChild *child;
422     QLIST_FOREACH(child, &bs->parents, next_parent) {
423         if (child->role == &child_root) {
424             return child->opaque;
425         }
426     }
427 
428     return NULL;
429 }
430 
431 /*
432  * Returns true if @bs has an associated BlockBackend.
433  */
434 bool bdrv_has_blk(BlockDriverState *bs)
435 {
436     return bdrv_first_blk(bs) != NULL;
437 }
438 
439 /*
440  * Return @blk's DriveInfo if any, else null.
441  */
442 DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
443 {
444     return blk->legacy_dinfo;
445 }
446 
447 /*
448  * Set @blk's DriveInfo to @dinfo, and return it.
449  * @blk must not have a DriveInfo set already.
450  * No other BlockBackend may have the same DriveInfo set.
451  */
452 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
453 {
454     assert(!blk->legacy_dinfo);
455     return blk->legacy_dinfo = dinfo;
456 }
457 
458 /*
459  * Return the BlockBackend with DriveInfo @dinfo.
460  * It must exist.
461  */
462 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
463 {
464     BlockBackend *blk = NULL;
465 
466     while ((blk = blk_next(blk)) != NULL) {
467         if (blk->legacy_dinfo == dinfo) {
468             return blk;
469         }
470     }
471     abort();
472 }
473 
474 /*
475  * Returns a pointer to the publicly accessible fields of @blk.
476  */
477 BlockBackendPublic *blk_get_public(BlockBackend *blk)
478 {
479     return &blk->public;
480 }
481 
482 /*
483  * Returns a BlockBackend given the associated @public fields.
484  */
485 BlockBackend *blk_by_public(BlockBackendPublic *public)
486 {
487     return container_of(public, BlockBackend, public);
488 }
489 
490 /*
491  * Disassociates the currently associated BlockDriverState from @blk.
492  */
493 void blk_remove_bs(BlockBackend *blk)
494 {
495     notifier_list_notify(&blk->remove_bs_notifiers, blk);
496     if (blk->public.throttle_state) {
497         throttle_timers_detach_aio_context(&blk->public.throttle_timers);
498     }
499 
500     blk_update_root_state(blk);
501 
502     bdrv_root_unref_child(blk->root);
503     blk->root = NULL;
504 }
505 
506 /*
507  * Associates a new BlockDriverState with @blk.
508  */
509 void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
510 {
511     bdrv_ref(bs);
512     blk->root = bdrv_root_attach_child(bs, "root", &child_root);
513     blk->root->opaque = blk;
514 
515     notifier_list_notify(&blk->insert_bs_notifiers, blk);
516     if (blk->public.throttle_state) {
517         throttle_timers_attach_aio_context(
518             &blk->public.throttle_timers, bdrv_get_aio_context(bs));
519     }
520 }
521 
522 /*
523  * Attach device model @dev to @blk.
524  * Return 0 on success, -EBUSY when a device model is attached already.
525  */
526 int blk_attach_dev(BlockBackend *blk, void *dev)
527 /* TODO change to DeviceState *dev when all users are qdevified */
528 {
529     if (blk->dev) {
530         return -EBUSY;
531     }
532     blk_ref(blk);
533     blk->dev = dev;
534     blk_iostatus_reset(blk);
535     return 0;
536 }
537 
538 /*
539  * Attach device model @dev to @blk.
540  * @blk must not have a device model attached already.
541  * TODO qdevified devices don't use this, remove when devices are qdevified
542  */
543 void blk_attach_dev_nofail(BlockBackend *blk, void *dev)
544 {
545     if (blk_attach_dev(blk, dev) < 0) {
546         abort();
547     }
548 }
549 
550 /*
551  * Detach device model @dev from @blk.
552  * @dev must be currently attached to @blk.
553  */
554 void blk_detach_dev(BlockBackend *blk, void *dev)
555 /* TODO change to DeviceState *dev when all users are qdevified */
556 {
557     assert(blk->dev == dev);
558     blk->dev = NULL;
559     blk->dev_ops = NULL;
560     blk->dev_opaque = NULL;
561     blk->guest_block_size = 512;
562     blk_unref(blk);
563 }
564 
565 /*
566  * Return the device model attached to @blk if any, else null.
567  */
568 void *blk_get_attached_dev(BlockBackend *blk)
569 /* TODO change to return DeviceState * when all users are qdevified */
570 {
571     return blk->dev;
572 }
573 
574 /*
575  * Set @blk's device model callbacks to @ops.
576  * @opaque is the opaque argument to pass to the callbacks.
577  * This is for use by device models.
578  */
579 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
580                      void *opaque)
581 {
582     blk->dev_ops = ops;
583     blk->dev_opaque = opaque;
584 }
585 
586 /*
587  * Notify @blk's attached device model of media change.
588  * If @load is true, notify of media load.
589  * Else, notify of media eject.
590  * Also send DEVICE_TRAY_MOVED events as appropriate.
591  */
592 void blk_dev_change_media_cb(BlockBackend *blk, bool load)
593 {
594     if (blk->dev_ops && blk->dev_ops->change_media_cb) {
595         bool tray_was_open, tray_is_open;
596 
597         tray_was_open = blk_dev_is_tray_open(blk);
598         blk->dev_ops->change_media_cb(blk->dev_opaque, load);
599         tray_is_open = blk_dev_is_tray_open(blk);
600 
601         if (tray_was_open != tray_is_open) {
602             qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open,
603                                               &error_abort);
604         }
605     }
606 }
607 
608 static void blk_root_change_media(BdrvChild *child, bool load)
609 {
610     blk_dev_change_media_cb(child->opaque, load);
611 }
612 
613 /*
614  * Does @blk's attached device model have removable media?
615  * %true if no device model is attached.
616  */
617 bool blk_dev_has_removable_media(BlockBackend *blk)
618 {
619     return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
620 }
621 
622 /*
623  * Does @blk's attached device model have a tray?
624  */
625 bool blk_dev_has_tray(BlockBackend *blk)
626 {
627     return blk->dev_ops && blk->dev_ops->is_tray_open;
628 }
629 
630 /*
631  * Notify @blk's attached device model of a media eject request.
632  * If @force is true, the medium is about to be yanked out forcefully.
633  */
634 void blk_dev_eject_request(BlockBackend *blk, bool force)
635 {
636     if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
637         blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
638     }
639 }
640 
641 /*
642  * Does @blk's attached device model have a tray, and is it open?
643  */
644 bool blk_dev_is_tray_open(BlockBackend *blk)
645 {
646     if (blk_dev_has_tray(blk)) {
647         return blk->dev_ops->is_tray_open(blk->dev_opaque);
648     }
649     return false;
650 }
651 
652 /*
653  * Does @blk's attached device model have the medium locked?
654  * %false if the device model has no such lock.
655  */
656 bool blk_dev_is_medium_locked(BlockBackend *blk)
657 {
658     if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
659         return blk->dev_ops->is_medium_locked(blk->dev_opaque);
660     }
661     return false;
662 }
663 
664 /*
665  * Notify @blk's attached device model of a backend size change.
666  */
667 static void blk_root_resize(BdrvChild *child)
668 {
669     BlockBackend *blk = child->opaque;
670 
671     if (blk->dev_ops && blk->dev_ops->resize_cb) {
672         blk->dev_ops->resize_cb(blk->dev_opaque);
673     }
674 }
675 
676 void blk_iostatus_enable(BlockBackend *blk)
677 {
678     blk->iostatus_enabled = true;
679     blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
680 }
681 
682 /* The I/O status is only enabled if the drive explicitly
683  * enables it _and_ the VM is configured to stop on errors */
684 bool blk_iostatus_is_enabled(const BlockBackend *blk)
685 {
686     return (blk->iostatus_enabled &&
687            (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
688             blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
689             blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
690 }
691 
692 BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
693 {
694     return blk->iostatus;
695 }
696 
697 void blk_iostatus_disable(BlockBackend *blk)
698 {
699     blk->iostatus_enabled = false;
700 }
701 
702 void blk_iostatus_reset(BlockBackend *blk)
703 {
704     if (blk_iostatus_is_enabled(blk)) {
705         BlockDriverState *bs = blk_bs(blk);
706         blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
707         if (bs && bs->job) {
708             block_job_iostatus_reset(bs->job);
709         }
710     }
711 }
712 
713 void blk_iostatus_set_err(BlockBackend *blk, int error)
714 {
715     assert(blk_iostatus_is_enabled(blk));
716     if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
717         blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
718                                           BLOCK_DEVICE_IO_STATUS_FAILED;
719     }
720 }
721 
722 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
723 {
724     blk->allow_write_beyond_eof = allow;
725 }
726 
727 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
728                                   size_t size)
729 {
730     int64_t len;
731 
732     if (size > INT_MAX) {
733         return -EIO;
734     }
735 
736     if (!blk_is_available(blk)) {
737         return -ENOMEDIUM;
738     }
739 
740     if (offset < 0) {
741         return -EIO;
742     }
743 
744     if (!blk->allow_write_beyond_eof) {
745         len = blk_getlength(blk);
746         if (len < 0) {
747             return len;
748         }
749 
750         if (offset > len || len - offset < size) {
751             return -EIO;
752         }
753     }
754 
755     return 0;
756 }
757 
758 static int blk_check_request(BlockBackend *blk, int64_t sector_num,
759                              int nb_sectors)
760 {
761     if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) {
762         return -EIO;
763     }
764 
765     if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
766         return -EIO;
767     }
768 
769     return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE,
770                                   nb_sectors * BDRV_SECTOR_SIZE);
771 }
772 
773 static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
774                                       unsigned int bytes, QEMUIOVector *qiov,
775                                       BdrvRequestFlags flags)
776 {
777     int ret = blk_check_byte_request(blk, offset, bytes);
778     if (ret < 0) {
779         return ret;
780     }
781 
782     /* throttling disk I/O */
783     if (blk->public.throttle_state) {
784         throttle_group_co_io_limits_intercept(blk, bytes, false);
785     }
786 
787     return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags);
788 }
789 
790 static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
791                                       unsigned int bytes, QEMUIOVector *qiov,
792                                       BdrvRequestFlags flags)
793 {
794     int ret;
795 
796     ret = blk_check_byte_request(blk, offset, bytes);
797     if (ret < 0) {
798         return ret;
799     }
800 
801     /* throttling disk I/O */
802     if (blk->public.throttle_state) {
803         throttle_group_co_io_limits_intercept(blk, bytes, true);
804     }
805 
806     if (!blk->enable_write_cache) {
807         flags |= BDRV_REQ_FUA;
808     }
809 
810     return bdrv_co_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
811 }
812 
813 typedef struct BlkRwCo {
814     BlockBackend *blk;
815     int64_t offset;
816     QEMUIOVector *qiov;
817     int ret;
818     BdrvRequestFlags flags;
819 } BlkRwCo;
820 
821 static void blk_read_entry(void *opaque)
822 {
823     BlkRwCo *rwco = opaque;
824 
825     rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
826                               rwco->qiov, rwco->flags);
827 }
828 
829 static void blk_write_entry(void *opaque)
830 {
831     BlkRwCo *rwco = opaque;
832 
833     rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
834                                rwco->qiov, rwco->flags);
835 }
836 
837 static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
838                    int64_t bytes, CoroutineEntry co_entry,
839                    BdrvRequestFlags flags)
840 {
841     AioContext *aio_context;
842     QEMUIOVector qiov;
843     struct iovec iov;
844     Coroutine *co;
845     BlkRwCo rwco;
846 
847     iov = (struct iovec) {
848         .iov_base = buf,
849         .iov_len = bytes,
850     };
851     qemu_iovec_init_external(&qiov, &iov, 1);
852 
853     rwco = (BlkRwCo) {
854         .blk    = blk,
855         .offset = offset,
856         .qiov   = &qiov,
857         .flags  = flags,
858         .ret    = NOT_DONE,
859     };
860 
861     co = qemu_coroutine_create(co_entry);
862     qemu_coroutine_enter(co, &rwco);
863 
864     aio_context = blk_get_aio_context(blk);
865     while (rwco.ret == NOT_DONE) {
866         aio_poll(aio_context, true);
867     }
868 
869     return rwco.ret;
870 }
871 
872 int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
873                           int count)
874 {
875     int ret;
876 
877     ret = blk_check_byte_request(blk, offset, count);
878     if (ret < 0) {
879         return ret;
880     }
881 
882     blk_root_drained_begin(blk->root);
883     ret = blk_pread(blk, offset, buf, count);
884     blk_root_drained_end(blk->root);
885     return ret;
886 }
887 
888 int blk_write_zeroes(BlockBackend *blk, int64_t offset,
889                      int count, BdrvRequestFlags flags)
890 {
891     return blk_prw(blk, offset, NULL, count, blk_write_entry,
892                    flags | BDRV_REQ_ZERO_WRITE);
893 }
894 
895 static void error_callback_bh(void *opaque)
896 {
897     struct BlockBackendAIOCB *acb = opaque;
898     qemu_bh_delete(acb->bh);
899     acb->common.cb(acb->common.opaque, acb->ret);
900     qemu_aio_unref(acb);
901 }
902 
903 BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
904                                   BlockCompletionFunc *cb,
905                                   void *opaque, int ret)
906 {
907     struct BlockBackendAIOCB *acb;
908     QEMUBH *bh;
909 
910     acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
911     acb->blk = blk;
912     acb->ret = ret;
913 
914     bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
915     acb->bh = bh;
916     qemu_bh_schedule(bh);
917 
918     return &acb->common;
919 }
920 
921 typedef struct BlkAioEmAIOCB {
922     BlockAIOCB common;
923     BlkRwCo rwco;
924     int bytes;
925     bool has_returned;
926     QEMUBH* bh;
927 } BlkAioEmAIOCB;
928 
929 static const AIOCBInfo blk_aio_em_aiocb_info = {
930     .aiocb_size         = sizeof(BlkAioEmAIOCB),
931 };
932 
933 static void blk_aio_complete(BlkAioEmAIOCB *acb)
934 {
935     if (acb->bh) {
936         assert(acb->has_returned);
937         qemu_bh_delete(acb->bh);
938     }
939     if (acb->has_returned) {
940         acb->common.cb(acb->common.opaque, acb->rwco.ret);
941         qemu_aio_unref(acb);
942     }
943 }
944 
945 static void blk_aio_complete_bh(void *opaque)
946 {
947     blk_aio_complete(opaque);
948 }
949 
950 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
951                                 QEMUIOVector *qiov, CoroutineEntry co_entry,
952                                 BdrvRequestFlags flags,
953                                 BlockCompletionFunc *cb, void *opaque)
954 {
955     BlkAioEmAIOCB *acb;
956     Coroutine *co;
957 
958     acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
959     acb->rwco = (BlkRwCo) {
960         .blk    = blk,
961         .offset = offset,
962         .qiov   = qiov,
963         .flags  = flags,
964         .ret    = NOT_DONE,
965     };
966     acb->bytes = bytes;
967     acb->bh = NULL;
968     acb->has_returned = false;
969 
970     co = qemu_coroutine_create(co_entry);
971     qemu_coroutine_enter(co, acb);
972 
973     acb->has_returned = true;
974     if (acb->rwco.ret != NOT_DONE) {
975         acb->bh = aio_bh_new(blk_get_aio_context(blk), blk_aio_complete_bh, acb);
976         qemu_bh_schedule(acb->bh);
977     }
978 
979     return &acb->common;
980 }
981 
982 static void blk_aio_read_entry(void *opaque)
983 {
984     BlkAioEmAIOCB *acb = opaque;
985     BlkRwCo *rwco = &acb->rwco;
986 
987     assert(rwco->qiov->size == acb->bytes);
988     rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
989                               rwco->qiov, rwco->flags);
990     blk_aio_complete(acb);
991 }
992 
993 static void blk_aio_write_entry(void *opaque)
994 {
995     BlkAioEmAIOCB *acb = opaque;
996     BlkRwCo *rwco = &acb->rwco;
997 
998     assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
999     rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
1000                                rwco->qiov, rwco->flags);
1001     blk_aio_complete(acb);
1002 }
1003 
1004 BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
1005                                  int count, BdrvRequestFlags flags,
1006                                  BlockCompletionFunc *cb, void *opaque)
1007 {
1008     return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
1009                         flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
1010 }
1011 
1012 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
1013 {
1014     int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
1015     if (ret < 0) {
1016         return ret;
1017     }
1018     return count;
1019 }
1020 
1021 int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
1022                BdrvRequestFlags flags)
1023 {
1024     int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1025                       flags);
1026     if (ret < 0) {
1027         return ret;
1028     }
1029     return count;
1030 }
1031 
1032 int64_t blk_getlength(BlockBackend *blk)
1033 {
1034     if (!blk_is_available(blk)) {
1035         return -ENOMEDIUM;
1036     }
1037 
1038     return bdrv_getlength(blk_bs(blk));
1039 }
1040 
1041 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
1042 {
1043     if (!blk_bs(blk)) {
1044         *nb_sectors_ptr = 0;
1045     } else {
1046         bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
1047     }
1048 }
1049 
1050 int64_t blk_nb_sectors(BlockBackend *blk)
1051 {
1052     if (!blk_is_available(blk)) {
1053         return -ENOMEDIUM;
1054     }
1055 
1056     return bdrv_nb_sectors(blk_bs(blk));
1057 }
1058 
1059 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
1060                            QEMUIOVector *qiov, BdrvRequestFlags flags,
1061                            BlockCompletionFunc *cb, void *opaque)
1062 {
1063     return blk_aio_prwv(blk, offset, qiov->size, qiov,
1064                         blk_aio_read_entry, flags, cb, opaque);
1065 }
1066 
1067 BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
1068                             QEMUIOVector *qiov, BdrvRequestFlags flags,
1069                             BlockCompletionFunc *cb, void *opaque)
1070 {
1071     return blk_aio_prwv(blk, offset, qiov->size, qiov,
1072                         blk_aio_write_entry, flags, cb, opaque);
1073 }
1074 
1075 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
1076                           BlockCompletionFunc *cb, void *opaque)
1077 {
1078     if (!blk_is_available(blk)) {
1079         return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
1080     }
1081 
1082     return bdrv_aio_flush(blk_bs(blk), cb, opaque);
1083 }
1084 
1085 BlockAIOCB *blk_aio_discard(BlockBackend *blk,
1086                             int64_t sector_num, int nb_sectors,
1087                             BlockCompletionFunc *cb, void *opaque)
1088 {
1089     int ret = blk_check_request(blk, sector_num, nb_sectors);
1090     if (ret < 0) {
1091         return blk_abort_aio_request(blk, cb, opaque, ret);
1092     }
1093 
1094     return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque);
1095 }
1096 
1097 void blk_aio_cancel(BlockAIOCB *acb)
1098 {
1099     bdrv_aio_cancel(acb);
1100 }
1101 
1102 void blk_aio_cancel_async(BlockAIOCB *acb)
1103 {
1104     bdrv_aio_cancel_async(acb);
1105 }
1106 
1107 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1108 {
1109     if (!blk_is_available(blk)) {
1110         return -ENOMEDIUM;
1111     }
1112 
1113     return bdrv_ioctl(blk_bs(blk), req, buf);
1114 }
1115 
1116 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
1117                           BlockCompletionFunc *cb, void *opaque)
1118 {
1119     if (!blk_is_available(blk)) {
1120         return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
1121     }
1122 
1123     return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
1124 }
1125 
1126 int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
1127 {
1128     int ret = blk_check_request(blk, sector_num, nb_sectors);
1129     if (ret < 0) {
1130         return ret;
1131     }
1132 
1133     return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors);
1134 }
1135 
1136 int blk_co_flush(BlockBackend *blk)
1137 {
1138     if (!blk_is_available(blk)) {
1139         return -ENOMEDIUM;
1140     }
1141 
1142     return bdrv_co_flush(blk_bs(blk));
1143 }
1144 
1145 int blk_flush(BlockBackend *blk)
1146 {
1147     if (!blk_is_available(blk)) {
1148         return -ENOMEDIUM;
1149     }
1150 
1151     return bdrv_flush(blk_bs(blk));
1152 }
1153 
1154 void blk_drain(BlockBackend *blk)
1155 {
1156     if (blk_bs(blk)) {
1157         bdrv_drain(blk_bs(blk));
1158     }
1159 }
1160 
1161 void blk_drain_all(void)
1162 {
1163     bdrv_drain_all();
1164 }
1165 
1166 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
1167                       BlockdevOnError on_write_error)
1168 {
1169     blk->on_read_error = on_read_error;
1170     blk->on_write_error = on_write_error;
1171 }
1172 
1173 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
1174 {
1175     return is_read ? blk->on_read_error : blk->on_write_error;
1176 }
1177 
1178 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
1179                                       int error)
1180 {
1181     BlockdevOnError on_err = blk_get_on_error(blk, is_read);
1182 
1183     switch (on_err) {
1184     case BLOCKDEV_ON_ERROR_ENOSPC:
1185         return (error == ENOSPC) ?
1186                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
1187     case BLOCKDEV_ON_ERROR_STOP:
1188         return BLOCK_ERROR_ACTION_STOP;
1189     case BLOCKDEV_ON_ERROR_REPORT:
1190         return BLOCK_ERROR_ACTION_REPORT;
1191     case BLOCKDEV_ON_ERROR_IGNORE:
1192         return BLOCK_ERROR_ACTION_IGNORE;
1193     default:
1194         abort();
1195     }
1196 }
1197 
1198 static void send_qmp_error_event(BlockBackend *blk,
1199                                  BlockErrorAction action,
1200                                  bool is_read, int error)
1201 {
1202     IoOperationType optype;
1203 
1204     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
1205     qapi_event_send_block_io_error(blk_name(blk), optype, action,
1206                                    blk_iostatus_is_enabled(blk),
1207                                    error == ENOSPC, strerror(error),
1208                                    &error_abort);
1209 }
1210 
1211 /* This is done by device models because, while the block layer knows
1212  * about the error, it does not know whether an operation comes from
1213  * the device or the block layer (from a job, for example).
1214  */
1215 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
1216                       bool is_read, int error)
1217 {
1218     assert(error >= 0);
1219 
1220     if (action == BLOCK_ERROR_ACTION_STOP) {
1221         /* First set the iostatus, so that "info block" returns an iostatus
1222          * that matches the events raised so far (an additional error iostatus
1223          * is fine, but not a lost one).
1224          */
1225         blk_iostatus_set_err(blk, error);
1226 
1227         /* Then raise the request to stop the VM and the event.
1228          * qemu_system_vmstop_request_prepare has two effects.  First,
1229          * it ensures that the STOP event always comes after the
1230          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
1231          * can observe the STOP event and do a "cont" before the STOP
1232          * event is issued, the VM will not stop.  In this case, vm_start()
1233          * also ensures that the STOP/RESUME pair of events is emitted.
1234          */
1235         qemu_system_vmstop_request_prepare();
1236         send_qmp_error_event(blk, action, is_read, error);
1237         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
1238     } else {
1239         send_qmp_error_event(blk, action, is_read, error);
1240     }
1241 }
1242 
1243 int blk_is_read_only(BlockBackend *blk)
1244 {
1245     BlockDriverState *bs = blk_bs(blk);
1246 
1247     if (bs) {
1248         return bdrv_is_read_only(bs);
1249     } else {
1250         return blk->root_state.read_only;
1251     }
1252 }
1253 
1254 int blk_is_sg(BlockBackend *blk)
1255 {
1256     BlockDriverState *bs = blk_bs(blk);
1257 
1258     if (!bs) {
1259         return 0;
1260     }
1261 
1262     return bdrv_is_sg(bs);
1263 }
1264 
1265 int blk_enable_write_cache(BlockBackend *blk)
1266 {
1267     return blk->enable_write_cache;
1268 }
1269 
1270 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
1271 {
1272     blk->enable_write_cache = wce;
1273 }
1274 
1275 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
1276 {
1277     BlockDriverState *bs = blk_bs(blk);
1278 
1279     if (!bs) {
1280         error_setg(errp, "Device '%s' has no medium", blk->name);
1281         return;
1282     }
1283 
1284     bdrv_invalidate_cache(bs, errp);
1285 }
1286 
1287 bool blk_is_inserted(BlockBackend *blk)
1288 {
1289     BlockDriverState *bs = blk_bs(blk);
1290 
1291     return bs && bdrv_is_inserted(bs);
1292 }
1293 
1294 bool blk_is_available(BlockBackend *blk)
1295 {
1296     return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
1297 }
1298 
1299 void blk_lock_medium(BlockBackend *blk, bool locked)
1300 {
1301     BlockDriverState *bs = blk_bs(blk);
1302 
1303     if (bs) {
1304         bdrv_lock_medium(bs, locked);
1305     }
1306 }
1307 
1308 void blk_eject(BlockBackend *blk, bool eject_flag)
1309 {
1310     BlockDriverState *bs = blk_bs(blk);
1311 
1312     if (bs) {
1313         bdrv_eject(bs, eject_flag);
1314     }
1315 }
1316 
1317 int blk_get_flags(BlockBackend *blk)
1318 {
1319     BlockDriverState *bs = blk_bs(blk);
1320 
1321     if (bs) {
1322         return bdrv_get_flags(bs);
1323     } else {
1324         return blk->root_state.open_flags;
1325     }
1326 }
1327 
1328 int blk_get_max_transfer_length(BlockBackend *blk)
1329 {
1330     BlockDriverState *bs = blk_bs(blk);
1331 
1332     if (bs) {
1333         return bs->bl.max_transfer_length;
1334     } else {
1335         return 0;
1336     }
1337 }
1338 
1339 int blk_get_max_iov(BlockBackend *blk)
1340 {
1341     return blk->root->bs->bl.max_iov;
1342 }
1343 
1344 void blk_set_guest_block_size(BlockBackend *blk, int align)
1345 {
1346     blk->guest_block_size = align;
1347 }
1348 
1349 void *blk_try_blockalign(BlockBackend *blk, size_t size)
1350 {
1351     return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
1352 }
1353 
1354 void *blk_blockalign(BlockBackend *blk, size_t size)
1355 {
1356     return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
1357 }
1358 
1359 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
1360 {
1361     BlockDriverState *bs = blk_bs(blk);
1362 
1363     if (!bs) {
1364         return false;
1365     }
1366 
1367     return bdrv_op_is_blocked(bs, op, errp);
1368 }
1369 
1370 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
1371 {
1372     BlockDriverState *bs = blk_bs(blk);
1373 
1374     if (bs) {
1375         bdrv_op_unblock(bs, op, reason);
1376     }
1377 }
1378 
1379 void blk_op_block_all(BlockBackend *blk, Error *reason)
1380 {
1381     BlockDriverState *bs = blk_bs(blk);
1382 
1383     if (bs) {
1384         bdrv_op_block_all(bs, reason);
1385     }
1386 }
1387 
1388 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
1389 {
1390     BlockDriverState *bs = blk_bs(blk);
1391 
1392     if (bs) {
1393         bdrv_op_unblock_all(bs, reason);
1394     }
1395 }
1396 
1397 AioContext *blk_get_aio_context(BlockBackend *blk)
1398 {
1399     BlockDriverState *bs = blk_bs(blk);
1400 
1401     if (bs) {
1402         return bdrv_get_aio_context(bs);
1403     } else {
1404         return qemu_get_aio_context();
1405     }
1406 }
1407 
1408 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
1409 {
1410     BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
1411     return blk_get_aio_context(blk_acb->blk);
1412 }
1413 
1414 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
1415 {
1416     BlockDriverState *bs = blk_bs(blk);
1417 
1418     if (bs) {
1419         if (blk->public.throttle_state) {
1420             throttle_timers_detach_aio_context(&blk->public.throttle_timers);
1421         }
1422         bdrv_set_aio_context(bs, new_context);
1423         if (blk->public.throttle_state) {
1424             throttle_timers_attach_aio_context(&blk->public.throttle_timers,
1425                                                new_context);
1426         }
1427     }
1428 }
1429 
1430 void blk_add_aio_context_notifier(BlockBackend *blk,
1431         void (*attached_aio_context)(AioContext *new_context, void *opaque),
1432         void (*detach_aio_context)(void *opaque), void *opaque)
1433 {
1434     BlockDriverState *bs = blk_bs(blk);
1435 
1436     if (bs) {
1437         bdrv_add_aio_context_notifier(bs, attached_aio_context,
1438                                       detach_aio_context, opaque);
1439     }
1440 }
1441 
1442 void blk_remove_aio_context_notifier(BlockBackend *blk,
1443                                      void (*attached_aio_context)(AioContext *,
1444                                                                   void *),
1445                                      void (*detach_aio_context)(void *),
1446                                      void *opaque)
1447 {
1448     BlockDriverState *bs = blk_bs(blk);
1449 
1450     if (bs) {
1451         bdrv_remove_aio_context_notifier(bs, attached_aio_context,
1452                                          detach_aio_context, opaque);
1453     }
1454 }
1455 
1456 void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
1457 {
1458     notifier_list_add(&blk->remove_bs_notifiers, notify);
1459 }
1460 
1461 void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
1462 {
1463     notifier_list_add(&blk->insert_bs_notifiers, notify);
1464 }
1465 
1466 void blk_io_plug(BlockBackend *blk)
1467 {
1468     BlockDriverState *bs = blk_bs(blk);
1469 
1470     if (bs) {
1471         bdrv_io_plug(bs);
1472     }
1473 }
1474 
1475 void blk_io_unplug(BlockBackend *blk)
1476 {
1477     BlockDriverState *bs = blk_bs(blk);
1478 
1479     if (bs) {
1480         bdrv_io_unplug(bs);
1481     }
1482 }
1483 
1484 BlockAcctStats *blk_get_stats(BlockBackend *blk)
1485 {
1486     return &blk->stats;
1487 }
1488 
1489 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
1490                   BlockCompletionFunc *cb, void *opaque)
1491 {
1492     return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
1493 }
1494 
1495 int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
1496                                      int count, BdrvRequestFlags flags)
1497 {
1498     return blk_co_pwritev(blk, offset, count, NULL,
1499                           flags | BDRV_REQ_ZERO_WRITE);
1500 }
1501 
1502 int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
1503                          const uint8_t *buf, int nb_sectors)
1504 {
1505     int ret = blk_check_request(blk, sector_num, nb_sectors);
1506     if (ret < 0) {
1507         return ret;
1508     }
1509 
1510     return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
1511 }
1512 
1513 int blk_truncate(BlockBackend *blk, int64_t offset)
1514 {
1515     if (!blk_is_available(blk)) {
1516         return -ENOMEDIUM;
1517     }
1518 
1519     return bdrv_truncate(blk_bs(blk), offset);
1520 }
1521 
1522 int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
1523 {
1524     int ret = blk_check_request(blk, sector_num, nb_sectors);
1525     if (ret < 0) {
1526         return ret;
1527     }
1528 
1529     return bdrv_discard(blk_bs(blk), sector_num, nb_sectors);
1530 }
1531 
1532 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
1533                      int64_t pos, int size)
1534 {
1535     int ret;
1536 
1537     if (!blk_is_available(blk)) {
1538         return -ENOMEDIUM;
1539     }
1540 
1541     ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
1542     if (ret < 0) {
1543         return ret;
1544     }
1545 
1546     if (ret == size && !blk->enable_write_cache) {
1547         ret = bdrv_flush(blk_bs(blk));
1548     }
1549 
1550     return ret < 0 ? ret : size;
1551 }
1552 
1553 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
1554 {
1555     if (!blk_is_available(blk)) {
1556         return -ENOMEDIUM;
1557     }
1558 
1559     return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
1560 }
1561 
1562 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
1563 {
1564     if (!blk_is_available(blk)) {
1565         return -ENOMEDIUM;
1566     }
1567 
1568     return bdrv_probe_blocksizes(blk_bs(blk), bsz);
1569 }
1570 
1571 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
1572 {
1573     if (!blk_is_available(blk)) {
1574         return -ENOMEDIUM;
1575     }
1576 
1577     return bdrv_probe_geometry(blk_bs(blk), geo);
1578 }
1579 
1580 /*
1581  * Updates the BlockBackendRootState object with data from the currently
1582  * attached BlockDriverState.
1583  */
1584 void blk_update_root_state(BlockBackend *blk)
1585 {
1586     assert(blk->root);
1587 
1588     blk->root_state.open_flags    = blk->root->bs->open_flags;
1589     blk->root_state.read_only     = blk->root->bs->read_only;
1590     blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
1591 }
1592 
1593 /*
1594  * Applies the information in the root state to the given BlockDriverState. This
1595  * does not include the flags which have to be specified for bdrv_open(), use
1596  * blk_get_open_flags_from_root_state() to inquire them.
1597  */
1598 void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
1599 {
1600     bs->detect_zeroes = blk->root_state.detect_zeroes;
1601 }
1602 
1603 /*
1604  * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
1605  * supposed to inherit the root state.
1606  */
1607 int blk_get_open_flags_from_root_state(BlockBackend *blk)
1608 {
1609     int bs_flags;
1610 
1611     bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
1612     bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
1613 
1614     return bs_flags;
1615 }
1616 
1617 BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
1618 {
1619     return &blk->root_state;
1620 }
1621 
1622 int blk_commit_all(void)
1623 {
1624     BlockBackend *blk = NULL;
1625 
1626     while ((blk = blk_all_next(blk)) != NULL) {
1627         AioContext *aio_context = blk_get_aio_context(blk);
1628 
1629         aio_context_acquire(aio_context);
1630         if (blk_is_inserted(blk) && blk->root->bs->backing) {
1631             int ret = bdrv_commit(blk->root->bs);
1632             if (ret < 0) {
1633                 aio_context_release(aio_context);
1634                 return ret;
1635             }
1636         }
1637         aio_context_release(aio_context);
1638     }
1639     return 0;
1640 }
1641 
1642 int blk_flush_all(void)
1643 {
1644     BlockBackend *blk = NULL;
1645     int result = 0;
1646 
1647     while ((blk = blk_all_next(blk)) != NULL) {
1648         AioContext *aio_context = blk_get_aio_context(blk);
1649         int ret;
1650 
1651         aio_context_acquire(aio_context);
1652         if (blk_is_inserted(blk)) {
1653             ret = blk_flush(blk);
1654             if (ret < 0 && !result) {
1655                 result = ret;
1656             }
1657         }
1658         aio_context_release(aio_context);
1659     }
1660 
1661     return result;
1662 }
1663 
1664 
1665 /* throttling disk I/O limits */
1666 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
1667 {
1668     throttle_group_config(blk, cfg);
1669 }
1670 
1671 void blk_io_limits_disable(BlockBackend *blk)
1672 {
1673     assert(blk->public.throttle_state);
1674     bdrv_drained_begin(blk_bs(blk));
1675     throttle_group_unregister_blk(blk);
1676     bdrv_drained_end(blk_bs(blk));
1677 }
1678 
1679 /* should be called before blk_set_io_limits if a limit is set */
1680 void blk_io_limits_enable(BlockBackend *blk, const char *group)
1681 {
1682     assert(!blk->public.throttle_state);
1683     throttle_group_register_blk(blk, group);
1684 }
1685 
1686 void blk_io_limits_update_group(BlockBackend *blk, const char *group)
1687 {
1688     /* this BB is not part of any group */
1689     if (!blk->public.throttle_state) {
1690         return;
1691     }
1692 
1693     /* this BB is a part of the same group than the one we want */
1694     if (!g_strcmp0(throttle_group_get_name(blk), group)) {
1695         return;
1696     }
1697 
1698     /* need to change the group this bs belong to */
1699     blk_io_limits_disable(blk);
1700     blk_io_limits_enable(blk, group);
1701 }
1702 
1703 static void blk_root_drained_begin(BdrvChild *child)
1704 {
1705     BlockBackend *blk = child->opaque;
1706 
1707     if (blk->public.io_limits_disabled++ == 0) {
1708         throttle_group_restart_blk(blk);
1709     }
1710 }
1711 
1712 static void blk_root_drained_end(BdrvChild *child)
1713 {
1714     BlockBackend *blk = child->opaque;
1715 
1716     assert(blk->public.io_limits_disabled);
1717     --blk->public.io_limits_disabled;
1718 }
1719