xref: /openbmc/qemu/block/block-backend.c (revision 49d755d0)
1 /*
2  * QEMU Block backends
3  *
4  * Copyright (C) 2014-2016 Red Hat, Inc.
5  *
6  * Authors:
7  *  Markus Armbruster <armbru@redhat.com>,
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1
10  * or later.  See the COPYING.LIB file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "sysemu/block-backend.h"
15 #include "block/block_int.h"
16 #include "block/blockjob.h"
17 #include "block/throttle-groups.h"
18 #include "sysemu/blockdev.h"
19 #include "sysemu/sysemu.h"
20 #include "qapi/error.h"
21 #include "qapi/qapi-events-block.h"
22 #include "qemu/id.h"
23 #include "qemu/option.h"
24 #include "trace.h"
25 #include "migration/misc.h"
26 
27 /* Number of coroutines to reserve per attached device model */
28 #define COROUTINE_POOL_RESERVATION 64
29 
30 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
31 
32 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
33 
34 typedef struct BlockBackendAioNotifier {
35     void (*attached_aio_context)(AioContext *new_context, void *opaque);
36     void (*detach_aio_context)(void *opaque);
37     void *opaque;
38     QLIST_ENTRY(BlockBackendAioNotifier) list;
39 } BlockBackendAioNotifier;
40 
41 struct BlockBackend {
42     char *name;
43     int refcnt;
44     BdrvChild *root;
45     DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
46     QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
47     QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
48     BlockBackendPublic public;
49 
50     DeviceState *dev;           /* attached device model, if any */
51     const BlockDevOps *dev_ops;
52     void *dev_opaque;
53 
54     /* the block size for which the guest device expects atomicity */
55     int guest_block_size;
56 
57     /* If the BDS tree is removed, some of its options are stored here (which
58      * can be used to restore those options in the new BDS on insert) */
59     BlockBackendRootState root_state;
60 
61     bool enable_write_cache;
62 
63     /* I/O stats (display with "info blockstats"). */
64     BlockAcctStats stats;
65 
66     BlockdevOnError on_read_error, on_write_error;
67     bool iostatus_enabled;
68     BlockDeviceIoStatus iostatus;
69 
70     uint64_t perm;
71     uint64_t shared_perm;
72     bool disable_perm;
73 
74     bool allow_aio_context_change;
75     bool allow_write_beyond_eof;
76 
77     NotifierList remove_bs_notifiers, insert_bs_notifiers;
78     QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
79 
80     int quiesce_counter;
81     VMChangeStateEntry *vmsh;
82     bool force_allow_inactivate;
83 
84     /* Number of in-flight aio requests.  BlockDriverState also counts
85      * in-flight requests but aio requests can exist even when blk->root is
86      * NULL, so we cannot rely on its counter for that case.
87      * Accessed with atomic ops.
88      */
89     unsigned int in_flight;
90 };
91 
92 typedef struct BlockBackendAIOCB {
93     BlockAIOCB common;
94     BlockBackend *blk;
95     int ret;
96 } BlockBackendAIOCB;
97 
98 static const AIOCBInfo block_backend_aiocb_info = {
99     .get_aio_context = blk_aiocb_get_aio_context,
100     .aiocb_size = sizeof(BlockBackendAIOCB),
101 };
102 
103 static void drive_info_del(DriveInfo *dinfo);
104 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
105 
106 /* All BlockBackends */
107 static QTAILQ_HEAD(, BlockBackend) block_backends =
108     QTAILQ_HEAD_INITIALIZER(block_backends);
109 
110 /* All BlockBackends referenced by the monitor and which are iterated through by
111  * blk_next() */
112 static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
113     QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
114 
115 static void blk_root_inherit_options(int *child_flags, QDict *child_options,
116                                      int parent_flags, QDict *parent_options)
117 {
118     /* We're not supposed to call this function for root nodes */
119     abort();
120 }
121 static void blk_root_drained_begin(BdrvChild *child);
122 static bool blk_root_drained_poll(BdrvChild *child);
123 static void blk_root_drained_end(BdrvChild *child);
124 
125 static void blk_root_change_media(BdrvChild *child, bool load);
126 static void blk_root_resize(BdrvChild *child);
127 
128 static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
129                                      GSList **ignore, Error **errp);
130 static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
131                                  GSList **ignore);
132 
133 static char *blk_root_get_parent_desc(BdrvChild *child)
134 {
135     BlockBackend *blk = child->opaque;
136     char *dev_id;
137 
138     if (blk->name) {
139         return g_strdup(blk->name);
140     }
141 
142     dev_id = blk_get_attached_dev_id(blk);
143     if (*dev_id) {
144         return dev_id;
145     } else {
146         /* TODO Callback into the BB owner for something more detailed */
147         g_free(dev_id);
148         return g_strdup("a block device");
149     }
150 }
151 
152 static const char *blk_root_get_name(BdrvChild *child)
153 {
154     return blk_name(child->opaque);
155 }
156 
157 static void blk_vm_state_changed(void *opaque, int running, RunState state)
158 {
159     Error *local_err = NULL;
160     BlockBackend *blk = opaque;
161 
162     if (state == RUN_STATE_INMIGRATE) {
163         return;
164     }
165 
166     qemu_del_vm_change_state_handler(blk->vmsh);
167     blk->vmsh = NULL;
168     blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
169     if (local_err) {
170         error_report_err(local_err);
171     }
172 }
173 
174 /*
175  * Notifies the user of the BlockBackend that migration has completed. qdev
176  * devices can tighten their permissions in response (specifically revoke
177  * shared write permissions that we needed for storage migration).
178  *
179  * If an error is returned, the VM cannot be allowed to be resumed.
180  */
181 static void blk_root_activate(BdrvChild *child, Error **errp)
182 {
183     BlockBackend *blk = child->opaque;
184     Error *local_err = NULL;
185 
186     if (!blk->disable_perm) {
187         return;
188     }
189 
190     blk->disable_perm = false;
191 
192     blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
193     if (local_err) {
194         error_propagate(errp, local_err);
195         blk->disable_perm = true;
196         return;
197     }
198 
199     if (runstate_check(RUN_STATE_INMIGRATE)) {
200         /* Activation can happen when migration process is still active, for
201          * example when nbd_server_add is called during non-shared storage
202          * migration. Defer the shared_perm update to migration completion. */
203         if (!blk->vmsh) {
204             blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed,
205                                                          blk);
206         }
207         return;
208     }
209 
210     blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
211     if (local_err) {
212         error_propagate(errp, local_err);
213         blk->disable_perm = true;
214         return;
215     }
216 }
217 
218 void blk_set_force_allow_inactivate(BlockBackend *blk)
219 {
220     blk->force_allow_inactivate = true;
221 }
222 
223 static bool blk_can_inactivate(BlockBackend *blk)
224 {
225     /* If it is a guest device, inactivate is ok. */
226     if (blk->dev || blk_name(blk)[0]) {
227         return true;
228     }
229 
230     /* Inactivating means no more writes to the image can be done,
231      * even if those writes would be changes invisible to the
232      * guest.  For block job BBs that satisfy this, we can just allow
233      * it.  This is the case for mirror job source, which is required
234      * by libvirt non-shared block migration. */
235     if (!(blk->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))) {
236         return true;
237     }
238 
239     return blk->force_allow_inactivate;
240 }
241 
242 static int blk_root_inactivate(BdrvChild *child)
243 {
244     BlockBackend *blk = child->opaque;
245 
246     if (blk->disable_perm) {
247         return 0;
248     }
249 
250     if (!blk_can_inactivate(blk)) {
251         return -EPERM;
252     }
253 
254     blk->disable_perm = true;
255     if (blk->root) {
256         bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
257     }
258 
259     return 0;
260 }
261 
262 static void blk_root_attach(BdrvChild *child)
263 {
264     BlockBackend *blk = child->opaque;
265     BlockBackendAioNotifier *notifier;
266 
267     trace_blk_root_attach(child, blk, child->bs);
268 
269     QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
270         bdrv_add_aio_context_notifier(child->bs,
271                 notifier->attached_aio_context,
272                 notifier->detach_aio_context,
273                 notifier->opaque);
274     }
275 }
276 
277 static void blk_root_detach(BdrvChild *child)
278 {
279     BlockBackend *blk = child->opaque;
280     BlockBackendAioNotifier *notifier;
281 
282     trace_blk_root_detach(child, blk, child->bs);
283 
284     QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
285         bdrv_remove_aio_context_notifier(child->bs,
286                 notifier->attached_aio_context,
287                 notifier->detach_aio_context,
288                 notifier->opaque);
289     }
290 }
291 
292 static const BdrvChildRole child_root = {
293     .inherit_options    = blk_root_inherit_options,
294 
295     .change_media       = blk_root_change_media,
296     .resize             = blk_root_resize,
297     .get_name           = blk_root_get_name,
298     .get_parent_desc    = blk_root_get_parent_desc,
299 
300     .drained_begin      = blk_root_drained_begin,
301     .drained_poll       = blk_root_drained_poll,
302     .drained_end        = blk_root_drained_end,
303 
304     .activate           = blk_root_activate,
305     .inactivate         = blk_root_inactivate,
306 
307     .attach             = blk_root_attach,
308     .detach             = blk_root_detach,
309 
310     .can_set_aio_ctx    = blk_root_can_set_aio_ctx,
311     .set_aio_ctx        = blk_root_set_aio_ctx,
312 };
313 
314 /*
315  * Create a new BlockBackend with a reference count of one.
316  *
317  * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
318  * to request for a block driver node that is attached to this BlockBackend.
319  * @shared_perm is a bitmask which describes which permissions may be granted
320  * to other users of the attached node.
321  * Both sets of permissions can be changed later using blk_set_perm().
322  *
323  * Return the new BlockBackend on success, null on failure.
324  */
325 BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
326 {
327     BlockBackend *blk;
328 
329     blk = g_new0(BlockBackend, 1);
330     blk->refcnt = 1;
331     blk->perm = perm;
332     blk->shared_perm = shared_perm;
333     blk_set_enable_write_cache(blk, true);
334 
335     blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT;
336     blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
337 
338     block_acct_init(&blk->stats);
339 
340     notifier_list_init(&blk->remove_bs_notifiers);
341     notifier_list_init(&blk->insert_bs_notifiers);
342     QLIST_INIT(&blk->aio_notifiers);
343 
344     QTAILQ_INSERT_TAIL(&block_backends, blk, link);
345     return blk;
346 }
347 
348 /*
349  * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
350  *
351  * Just as with bdrv_open(), after having called this function the reference to
352  * @options belongs to the block layer (even on failure).
353  *
354  * TODO: Remove @filename and @flags; it should be possible to specify a whole
355  * BDS tree just by specifying the @options QDict (or @reference,
356  * alternatively). At the time of adding this function, this is not possible,
357  * though, so callers of this function have to be able to specify @filename and
358  * @flags.
359  */
360 BlockBackend *blk_new_open(const char *filename, const char *reference,
361                            QDict *options, int flags, Error **errp)
362 {
363     BlockBackend *blk;
364     BlockDriverState *bs;
365     uint64_t perm = 0;
366 
367     /* blk_new_open() is mainly used in .bdrv_create implementations and the
368      * tools where sharing isn't a concern because the BDS stays private, so we
369      * just request permission according to the flags.
370      *
371      * The exceptions are xen_disk and blockdev_init(); in these cases, the
372      * caller of blk_new_open() doesn't make use of the permissions, but they
373      * shouldn't hurt either. We can still share everything here because the
374      * guest devices will add their own blockers if they can't share. */
375     if ((flags & BDRV_O_NO_IO) == 0) {
376         perm |= BLK_PERM_CONSISTENT_READ;
377         if (flags & BDRV_O_RDWR) {
378             perm |= BLK_PERM_WRITE;
379         }
380     }
381     if (flags & BDRV_O_RESIZE) {
382         perm |= BLK_PERM_RESIZE;
383     }
384 
385     blk = blk_new(perm, BLK_PERM_ALL);
386     bs = bdrv_open(filename, reference, options, flags, errp);
387     if (!bs) {
388         blk_unref(blk);
389         return NULL;
390     }
391 
392     blk->root = bdrv_root_attach_child(bs, "root", &child_root,
393                                        perm, BLK_PERM_ALL, blk, errp);
394     if (!blk->root) {
395         bdrv_unref(bs);
396         blk_unref(blk);
397         return NULL;
398     }
399 
400     return blk;
401 }
402 
403 static void blk_delete(BlockBackend *blk)
404 {
405     assert(!blk->refcnt);
406     assert(!blk->name);
407     assert(!blk->dev);
408     if (blk->public.throttle_group_member.throttle_state) {
409         blk_io_limits_disable(blk);
410     }
411     if (blk->root) {
412         blk_remove_bs(blk);
413     }
414     if (blk->vmsh) {
415         qemu_del_vm_change_state_handler(blk->vmsh);
416         blk->vmsh = NULL;
417     }
418     assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
419     assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
420     assert(QLIST_EMPTY(&blk->aio_notifiers));
421     QTAILQ_REMOVE(&block_backends, blk, link);
422     drive_info_del(blk->legacy_dinfo);
423     block_acct_cleanup(&blk->stats);
424     g_free(blk);
425 }
426 
427 static void drive_info_del(DriveInfo *dinfo)
428 {
429     if (!dinfo) {
430         return;
431     }
432     qemu_opts_del(dinfo->opts);
433     g_free(dinfo);
434 }
435 
436 int blk_get_refcnt(BlockBackend *blk)
437 {
438     return blk ? blk->refcnt : 0;
439 }
440 
441 /*
442  * Increment @blk's reference count.
443  * @blk must not be null.
444  */
445 void blk_ref(BlockBackend *blk)
446 {
447     assert(blk->refcnt > 0);
448     blk->refcnt++;
449 }
450 
451 /*
452  * Decrement @blk's reference count.
453  * If this drops it to zero, destroy @blk.
454  * For convenience, do nothing if @blk is null.
455  */
456 void blk_unref(BlockBackend *blk)
457 {
458     if (blk) {
459         assert(blk->refcnt > 0);
460         if (blk->refcnt > 1) {
461             blk->refcnt--;
462         } else {
463             blk_drain(blk);
464             /* blk_drain() cannot resurrect blk, nobody held a reference */
465             assert(blk->refcnt == 1);
466             blk->refcnt = 0;
467             blk_delete(blk);
468         }
469     }
470 }
471 
472 /*
473  * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
474  * ones which are hidden (i.e. are not referenced by the monitor).
475  */
476 BlockBackend *blk_all_next(BlockBackend *blk)
477 {
478     return blk ? QTAILQ_NEXT(blk, link)
479                : QTAILQ_FIRST(&block_backends);
480 }
481 
482 void blk_remove_all_bs(void)
483 {
484     BlockBackend *blk = NULL;
485 
486     while ((blk = blk_all_next(blk)) != NULL) {
487         AioContext *ctx = blk_get_aio_context(blk);
488 
489         aio_context_acquire(ctx);
490         if (blk->root) {
491             blk_remove_bs(blk);
492         }
493         aio_context_release(ctx);
494     }
495 }
496 
497 /*
498  * Return the monitor-owned BlockBackend after @blk.
499  * If @blk is null, return the first one.
500  * Else, return @blk's next sibling, which may be null.
501  *
502  * To iterate over all BlockBackends, do
503  * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
504  *     ...
505  * }
506  */
507 BlockBackend *blk_next(BlockBackend *blk)
508 {
509     return blk ? QTAILQ_NEXT(blk, monitor_link)
510                : QTAILQ_FIRST(&monitor_block_backends);
511 }
512 
513 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
514  * the monitor or attached to a BlockBackend */
515 BlockDriverState *bdrv_next(BdrvNextIterator *it)
516 {
517     BlockDriverState *bs, *old_bs;
518 
519     /* Must be called from the main loop */
520     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
521 
522     /* First, return all root nodes of BlockBackends. In order to avoid
523      * returning a BDS twice when multiple BBs refer to it, we only return it
524      * if the BB is the first one in the parent list of the BDS. */
525     if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
526         BlockBackend *old_blk = it->blk;
527 
528         old_bs = old_blk ? blk_bs(old_blk) : NULL;
529 
530         do {
531             it->blk = blk_all_next(it->blk);
532             bs = it->blk ? blk_bs(it->blk) : NULL;
533         } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
534 
535         if (it->blk) {
536             blk_ref(it->blk);
537         }
538         blk_unref(old_blk);
539 
540         if (bs) {
541             bdrv_ref(bs);
542             bdrv_unref(old_bs);
543             return bs;
544         }
545         it->phase = BDRV_NEXT_MONITOR_OWNED;
546     } else {
547         old_bs = it->bs;
548     }
549 
550     /* Then return the monitor-owned BDSes without a BB attached. Ignore all
551      * BDSes that are attached to a BlockBackend here; they have been handled
552      * by the above block already */
553     do {
554         it->bs = bdrv_next_monitor_owned(it->bs);
555         bs = it->bs;
556     } while (bs && bdrv_has_blk(bs));
557 
558     if (bs) {
559         bdrv_ref(bs);
560     }
561     bdrv_unref(old_bs);
562 
563     return bs;
564 }
565 
566 static void bdrv_next_reset(BdrvNextIterator *it)
567 {
568     *it = (BdrvNextIterator) {
569         .phase = BDRV_NEXT_BACKEND_ROOTS,
570     };
571 }
572 
573 BlockDriverState *bdrv_first(BdrvNextIterator *it)
574 {
575     bdrv_next_reset(it);
576     return bdrv_next(it);
577 }
578 
579 /* Must be called when aborting a bdrv_next() iteration before
580  * bdrv_next() returns NULL */
581 void bdrv_next_cleanup(BdrvNextIterator *it)
582 {
583     /* Must be called from the main loop */
584     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
585 
586     if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
587         if (it->blk) {
588             bdrv_unref(blk_bs(it->blk));
589             blk_unref(it->blk);
590         }
591     } else {
592         bdrv_unref(it->bs);
593     }
594 
595     bdrv_next_reset(it);
596 }
597 
598 /*
599  * Add a BlockBackend into the list of backends referenced by the monitor, with
600  * the given @name acting as the handle for the monitor.
601  * Strictly for use by blockdev.c.
602  *
603  * @name must not be null or empty.
604  *
605  * Returns true on success and false on failure. In the latter case, an Error
606  * object is returned through @errp.
607  */
608 bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
609 {
610     assert(!blk->name);
611     assert(name && name[0]);
612 
613     if (!id_wellformed(name)) {
614         error_setg(errp, "Invalid device name");
615         return false;
616     }
617     if (blk_by_name(name)) {
618         error_setg(errp, "Device with id '%s' already exists", name);
619         return false;
620     }
621     if (bdrv_find_node(name)) {
622         error_setg(errp,
623                    "Device name '%s' conflicts with an existing node name",
624                    name);
625         return false;
626     }
627 
628     blk->name = g_strdup(name);
629     QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
630     return true;
631 }
632 
633 /*
634  * Remove a BlockBackend from the list of backends referenced by the monitor.
635  * Strictly for use by blockdev.c.
636  */
637 void monitor_remove_blk(BlockBackend *blk)
638 {
639     if (!blk->name) {
640         return;
641     }
642 
643     QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
644     g_free(blk->name);
645     blk->name = NULL;
646 }
647 
648 /*
649  * Return @blk's name, a non-null string.
650  * Returns an empty string iff @blk is not referenced by the monitor.
651  */
652 const char *blk_name(const BlockBackend *blk)
653 {
654     return blk->name ?: "";
655 }
656 
657 /*
658  * Return the BlockBackend with name @name if it exists, else null.
659  * @name must not be null.
660  */
661 BlockBackend *blk_by_name(const char *name)
662 {
663     BlockBackend *blk = NULL;
664 
665     assert(name);
666     while ((blk = blk_next(blk)) != NULL) {
667         if (!strcmp(name, blk->name)) {
668             return blk;
669         }
670     }
671     return NULL;
672 }
673 
674 /*
675  * Return the BlockDriverState attached to @blk if any, else null.
676  */
677 BlockDriverState *blk_bs(BlockBackend *blk)
678 {
679     return blk->root ? blk->root->bs : NULL;
680 }
681 
682 static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
683 {
684     BdrvChild *child;
685     QLIST_FOREACH(child, &bs->parents, next_parent) {
686         if (child->role == &child_root) {
687             return child->opaque;
688         }
689     }
690 
691     return NULL;
692 }
693 
694 /*
695  * Returns true if @bs has an associated BlockBackend.
696  */
697 bool bdrv_has_blk(BlockDriverState *bs)
698 {
699     return bdrv_first_blk(bs) != NULL;
700 }
701 
702 /*
703  * Returns true if @bs has only BlockBackends as parents.
704  */
705 bool bdrv_is_root_node(BlockDriverState *bs)
706 {
707     BdrvChild *c;
708 
709     QLIST_FOREACH(c, &bs->parents, next_parent) {
710         if (c->role != &child_root) {
711             return false;
712         }
713     }
714 
715     return true;
716 }
717 
718 /*
719  * Return @blk's DriveInfo if any, else null.
720  */
721 DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
722 {
723     return blk->legacy_dinfo;
724 }
725 
726 /*
727  * Set @blk's DriveInfo to @dinfo, and return it.
728  * @blk must not have a DriveInfo set already.
729  * No other BlockBackend may have the same DriveInfo set.
730  */
731 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
732 {
733     assert(!blk->legacy_dinfo);
734     return blk->legacy_dinfo = dinfo;
735 }
736 
737 /*
738  * Return the BlockBackend with DriveInfo @dinfo.
739  * It must exist.
740  */
741 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
742 {
743     BlockBackend *blk = NULL;
744 
745     while ((blk = blk_next(blk)) != NULL) {
746         if (blk->legacy_dinfo == dinfo) {
747             return blk;
748         }
749     }
750     abort();
751 }
752 
753 /*
754  * Returns a pointer to the publicly accessible fields of @blk.
755  */
756 BlockBackendPublic *blk_get_public(BlockBackend *blk)
757 {
758     return &blk->public;
759 }
760 
761 /*
762  * Returns a BlockBackend given the associated @public fields.
763  */
764 BlockBackend *blk_by_public(BlockBackendPublic *public)
765 {
766     return container_of(public, BlockBackend, public);
767 }
768 
769 /*
770  * Disassociates the currently associated BlockDriverState from @blk.
771  */
772 void blk_remove_bs(BlockBackend *blk)
773 {
774     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
775     BlockDriverState *bs;
776 
777     notifier_list_notify(&blk->remove_bs_notifiers, blk);
778     if (tgm->throttle_state) {
779         bs = blk_bs(blk);
780         bdrv_drained_begin(bs);
781         throttle_group_detach_aio_context(tgm);
782         throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
783         bdrv_drained_end(bs);
784     }
785 
786     blk_update_root_state(blk);
787 
788     /* bdrv_root_unref_child() will cause blk->root to become stale and may
789      * switch to a completion coroutine later on. Let's drain all I/O here
790      * to avoid that and a potential QEMU crash.
791      */
792     blk_drain(blk);
793     bdrv_root_unref_child(blk->root);
794     blk->root = NULL;
795 }
796 
797 /*
798  * Associates a new BlockDriverState with @blk.
799  */
800 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
801 {
802     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
803     blk->root = bdrv_root_attach_child(bs, "root", &child_root,
804                                        blk->perm, blk->shared_perm, blk, errp);
805     if (blk->root == NULL) {
806         return -EPERM;
807     }
808     bdrv_ref(bs);
809 
810     notifier_list_notify(&blk->insert_bs_notifiers, blk);
811     if (tgm->throttle_state) {
812         throttle_group_detach_aio_context(tgm);
813         throttle_group_attach_aio_context(tgm, bdrv_get_aio_context(bs));
814     }
815 
816     return 0;
817 }
818 
819 /*
820  * Sets the permission bitmasks that the user of the BlockBackend needs.
821  */
822 int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
823                  Error **errp)
824 {
825     int ret;
826 
827     if (blk->root && !blk->disable_perm) {
828         ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
829         if (ret < 0) {
830             return ret;
831         }
832     }
833 
834     blk->perm = perm;
835     blk->shared_perm = shared_perm;
836 
837     return 0;
838 }
839 
840 void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
841 {
842     *perm = blk->perm;
843     *shared_perm = blk->shared_perm;
844 }
845 
846 /*
847  * Attach device model @dev to @blk.
848  * Return 0 on success, -EBUSY when a device model is attached already.
849  */
850 int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
851 {
852     if (blk->dev) {
853         return -EBUSY;
854     }
855 
856     /* While migration is still incoming, we don't need to apply the
857      * permissions of guest device BlockBackends. We might still have a block
858      * job or NBD server writing to the image for storage migration. */
859     if (runstate_check(RUN_STATE_INMIGRATE)) {
860         blk->disable_perm = true;
861     }
862 
863     blk_ref(blk);
864     blk->dev = dev;
865     blk_iostatus_reset(blk);
866 
867     return 0;
868 }
869 
870 /*
871  * Detach device model @dev from @blk.
872  * @dev must be currently attached to @blk.
873  */
874 void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
875 {
876     assert(blk->dev == dev);
877     blk->dev = NULL;
878     blk->dev_ops = NULL;
879     blk->dev_opaque = NULL;
880     blk->guest_block_size = 512;
881     blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
882     blk_unref(blk);
883 }
884 
885 /*
886  * Return the device model attached to @blk if any, else null.
887  */
888 DeviceState *blk_get_attached_dev(BlockBackend *blk)
889 {
890     return blk->dev;
891 }
892 
893 /* Return the qdev ID, or if no ID is assigned the QOM path, of the block
894  * device attached to the BlockBackend. */
895 char *blk_get_attached_dev_id(BlockBackend *blk)
896 {
897     DeviceState *dev = blk->dev;
898 
899     if (!dev) {
900         return g_strdup("");
901     } else if (dev->id) {
902         return g_strdup(dev->id);
903     }
904 
905     return object_get_canonical_path(OBJECT(dev)) ?: g_strdup("");
906 }
907 
908 /*
909  * Return the BlockBackend which has the device model @dev attached if it
910  * exists, else null.
911  *
912  * @dev must not be null.
913  */
914 BlockBackend *blk_by_dev(void *dev)
915 {
916     BlockBackend *blk = NULL;
917 
918     assert(dev != NULL);
919     while ((blk = blk_all_next(blk)) != NULL) {
920         if (blk->dev == dev) {
921             return blk;
922         }
923     }
924     return NULL;
925 }
926 
927 /*
928  * Set @blk's device model callbacks to @ops.
929  * @opaque is the opaque argument to pass to the callbacks.
930  * This is for use by device models.
931  */
932 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
933                      void *opaque)
934 {
935     blk->dev_ops = ops;
936     blk->dev_opaque = opaque;
937 
938     /* Are we currently quiesced? Should we enforce this right now? */
939     if (blk->quiesce_counter && ops->drained_begin) {
940         ops->drained_begin(opaque);
941     }
942 }
943 
944 /*
945  * Notify @blk's attached device model of media change.
946  *
947  * If @load is true, notify of media load. This action can fail, meaning that
948  * the medium cannot be loaded. @errp is set then.
949  *
950  * If @load is false, notify of media eject. This can never fail.
951  *
952  * Also send DEVICE_TRAY_MOVED events as appropriate.
953  */
954 void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
955 {
956     if (blk->dev_ops && blk->dev_ops->change_media_cb) {
957         bool tray_was_open, tray_is_open;
958         Error *local_err = NULL;
959 
960         tray_was_open = blk_dev_is_tray_open(blk);
961         blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
962         if (local_err) {
963             assert(load == true);
964             error_propagate(errp, local_err);
965             return;
966         }
967         tray_is_open = blk_dev_is_tray_open(blk);
968 
969         if (tray_was_open != tray_is_open) {
970             char *id = blk_get_attached_dev_id(blk);
971             qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open);
972             g_free(id);
973         }
974     }
975 }
976 
977 static void blk_root_change_media(BdrvChild *child, bool load)
978 {
979     blk_dev_change_media_cb(child->opaque, load, NULL);
980 }
981 
982 /*
983  * Does @blk's attached device model have removable media?
984  * %true if no device model is attached.
985  */
986 bool blk_dev_has_removable_media(BlockBackend *blk)
987 {
988     return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
989 }
990 
991 /*
992  * Does @blk's attached device model have a tray?
993  */
994 bool blk_dev_has_tray(BlockBackend *blk)
995 {
996     return blk->dev_ops && blk->dev_ops->is_tray_open;
997 }
998 
999 /*
1000  * Notify @blk's attached device model of a media eject request.
1001  * If @force is true, the medium is about to be yanked out forcefully.
1002  */
1003 void blk_dev_eject_request(BlockBackend *blk, bool force)
1004 {
1005     if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
1006         blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
1007     }
1008 }
1009 
1010 /*
1011  * Does @blk's attached device model have a tray, and is it open?
1012  */
1013 bool blk_dev_is_tray_open(BlockBackend *blk)
1014 {
1015     if (blk_dev_has_tray(blk)) {
1016         return blk->dev_ops->is_tray_open(blk->dev_opaque);
1017     }
1018     return false;
1019 }
1020 
1021 /*
1022  * Does @blk's attached device model have the medium locked?
1023  * %false if the device model has no such lock.
1024  */
1025 bool blk_dev_is_medium_locked(BlockBackend *blk)
1026 {
1027     if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
1028         return blk->dev_ops->is_medium_locked(blk->dev_opaque);
1029     }
1030     return false;
1031 }
1032 
1033 /*
1034  * Notify @blk's attached device model of a backend size change.
1035  */
1036 static void blk_root_resize(BdrvChild *child)
1037 {
1038     BlockBackend *blk = child->opaque;
1039 
1040     if (blk->dev_ops && blk->dev_ops->resize_cb) {
1041         blk->dev_ops->resize_cb(blk->dev_opaque);
1042     }
1043 }
1044 
1045 void blk_iostatus_enable(BlockBackend *blk)
1046 {
1047     blk->iostatus_enabled = true;
1048     blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1049 }
1050 
1051 /* The I/O status is only enabled if the drive explicitly
1052  * enables it _and_ the VM is configured to stop on errors */
1053 bool blk_iostatus_is_enabled(const BlockBackend *blk)
1054 {
1055     return (blk->iostatus_enabled &&
1056            (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
1057             blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
1058             blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
1059 }
1060 
1061 BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
1062 {
1063     return blk->iostatus;
1064 }
1065 
1066 void blk_iostatus_disable(BlockBackend *blk)
1067 {
1068     blk->iostatus_enabled = false;
1069 }
1070 
1071 void blk_iostatus_reset(BlockBackend *blk)
1072 {
1073     if (blk_iostatus_is_enabled(blk)) {
1074         BlockDriverState *bs = blk_bs(blk);
1075         blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1076         if (bs && bs->job) {
1077             block_job_iostatus_reset(bs->job);
1078         }
1079     }
1080 }
1081 
1082 void blk_iostatus_set_err(BlockBackend *blk, int error)
1083 {
1084     assert(blk_iostatus_is_enabled(blk));
1085     if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
1086         blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
1087                                           BLOCK_DEVICE_IO_STATUS_FAILED;
1088     }
1089 }
1090 
1091 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
1092 {
1093     blk->allow_write_beyond_eof = allow;
1094 }
1095 
1096 void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
1097 {
1098     blk->allow_aio_context_change = allow;
1099 }
1100 
1101 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
1102                                   size_t size)
1103 {
1104     int64_t len;
1105 
1106     if (size > INT_MAX) {
1107         return -EIO;
1108     }
1109 
1110     if (!blk_is_available(blk)) {
1111         return -ENOMEDIUM;
1112     }
1113 
1114     if (offset < 0) {
1115         return -EIO;
1116     }
1117 
1118     if (!blk->allow_write_beyond_eof) {
1119         len = blk_getlength(blk);
1120         if (len < 0) {
1121             return len;
1122         }
1123 
1124         if (offset > len || len - offset < size) {
1125             return -EIO;
1126         }
1127     }
1128 
1129     return 0;
1130 }
1131 
1132 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
1133                                unsigned int bytes, QEMUIOVector *qiov,
1134                                BdrvRequestFlags flags)
1135 {
1136     int ret;
1137     BlockDriverState *bs = blk_bs(blk);
1138 
1139     trace_blk_co_preadv(blk, bs, offset, bytes, flags);
1140 
1141     ret = blk_check_byte_request(blk, offset, bytes);
1142     if (ret < 0) {
1143         return ret;
1144     }
1145 
1146     bdrv_inc_in_flight(bs);
1147 
1148     /* throttling disk I/O */
1149     if (blk->public.throttle_group_member.throttle_state) {
1150         throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1151                 bytes, false);
1152     }
1153 
1154     ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
1155     bdrv_dec_in_flight(bs);
1156     return ret;
1157 }
1158 
1159 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
1160                                 unsigned int bytes, QEMUIOVector *qiov,
1161                                 BdrvRequestFlags flags)
1162 {
1163     int ret;
1164     BlockDriverState *bs = blk_bs(blk);
1165 
1166     trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
1167 
1168     ret = blk_check_byte_request(blk, offset, bytes);
1169     if (ret < 0) {
1170         return ret;
1171     }
1172 
1173     bdrv_inc_in_flight(bs);
1174     /* throttling disk I/O */
1175     if (blk->public.throttle_group_member.throttle_state) {
1176         throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1177                 bytes, true);
1178     }
1179 
1180     if (!blk->enable_write_cache) {
1181         flags |= BDRV_REQ_FUA;
1182     }
1183 
1184     ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
1185     bdrv_dec_in_flight(bs);
1186     return ret;
1187 }
1188 
1189 typedef struct BlkRwCo {
1190     BlockBackend *blk;
1191     int64_t offset;
1192     void *iobuf;
1193     int ret;
1194     BdrvRequestFlags flags;
1195 } BlkRwCo;
1196 
1197 static void blk_read_entry(void *opaque)
1198 {
1199     BlkRwCo *rwco = opaque;
1200     QEMUIOVector *qiov = rwco->iobuf;
1201 
1202     rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
1203                               qiov, rwco->flags);
1204     aio_wait_kick();
1205 }
1206 
1207 static void blk_write_entry(void *opaque)
1208 {
1209     BlkRwCo *rwco = opaque;
1210     QEMUIOVector *qiov = rwco->iobuf;
1211 
1212     rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
1213                                qiov, rwco->flags);
1214     aio_wait_kick();
1215 }
1216 
1217 static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
1218                    int64_t bytes, CoroutineEntry co_entry,
1219                    BdrvRequestFlags flags)
1220 {
1221     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1222     BlkRwCo rwco = {
1223         .blk    = blk,
1224         .offset = offset,
1225         .iobuf  = &qiov,
1226         .flags  = flags,
1227         .ret    = NOT_DONE,
1228     };
1229 
1230     if (qemu_in_coroutine()) {
1231         /* Fast-path if already in coroutine context */
1232         co_entry(&rwco);
1233     } else {
1234         Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
1235         bdrv_coroutine_enter(blk_bs(blk), co);
1236         BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
1237     }
1238 
1239     return rwco.ret;
1240 }
1241 
1242 int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
1243                           int count)
1244 {
1245     int ret;
1246 
1247     ret = blk_check_byte_request(blk, offset, count);
1248     if (ret < 0) {
1249         return ret;
1250     }
1251 
1252     blk_root_drained_begin(blk->root);
1253     ret = blk_pread(blk, offset, buf, count);
1254     blk_root_drained_end(blk->root);
1255     return ret;
1256 }
1257 
1258 int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1259                       int bytes, BdrvRequestFlags flags)
1260 {
1261     return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
1262                    flags | BDRV_REQ_ZERO_WRITE);
1263 }
1264 
1265 int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
1266 {
1267     return bdrv_make_zero(blk->root, flags);
1268 }
1269 
1270 void blk_inc_in_flight(BlockBackend *blk)
1271 {
1272     atomic_inc(&blk->in_flight);
1273 }
1274 
1275 void blk_dec_in_flight(BlockBackend *blk)
1276 {
1277     atomic_dec(&blk->in_flight);
1278     aio_wait_kick();
1279 }
1280 
1281 static void error_callback_bh(void *opaque)
1282 {
1283     struct BlockBackendAIOCB *acb = opaque;
1284 
1285     blk_dec_in_flight(acb->blk);
1286     acb->common.cb(acb->common.opaque, acb->ret);
1287     qemu_aio_unref(acb);
1288 }
1289 
1290 BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
1291                                   BlockCompletionFunc *cb,
1292                                   void *opaque, int ret)
1293 {
1294     struct BlockBackendAIOCB *acb;
1295 
1296     blk_inc_in_flight(blk);
1297     acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
1298     acb->blk = blk;
1299     acb->ret = ret;
1300 
1301     aio_bh_schedule_oneshot(blk_get_aio_context(blk), error_callback_bh, acb);
1302     return &acb->common;
1303 }
1304 
1305 typedef struct BlkAioEmAIOCB {
1306     BlockAIOCB common;
1307     BlkRwCo rwco;
1308     int bytes;
1309     bool has_returned;
1310 } BlkAioEmAIOCB;
1311 
1312 static const AIOCBInfo blk_aio_em_aiocb_info = {
1313     .aiocb_size         = sizeof(BlkAioEmAIOCB),
1314 };
1315 
1316 static void blk_aio_complete(BlkAioEmAIOCB *acb)
1317 {
1318     if (acb->has_returned) {
1319         acb->common.cb(acb->common.opaque, acb->rwco.ret);
1320         blk_dec_in_flight(acb->rwco.blk);
1321         qemu_aio_unref(acb);
1322     }
1323 }
1324 
1325 static void blk_aio_complete_bh(void *opaque)
1326 {
1327     BlkAioEmAIOCB *acb = opaque;
1328     assert(acb->has_returned);
1329     blk_aio_complete(acb);
1330 }
1331 
1332 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
1333                                 void *iobuf, CoroutineEntry co_entry,
1334                                 BdrvRequestFlags flags,
1335                                 BlockCompletionFunc *cb, void *opaque)
1336 {
1337     BlkAioEmAIOCB *acb;
1338     Coroutine *co;
1339 
1340     blk_inc_in_flight(blk);
1341     acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
1342     acb->rwco = (BlkRwCo) {
1343         .blk    = blk,
1344         .offset = offset,
1345         .iobuf  = iobuf,
1346         .flags  = flags,
1347         .ret    = NOT_DONE,
1348     };
1349     acb->bytes = bytes;
1350     acb->has_returned = false;
1351 
1352     co = qemu_coroutine_create(co_entry, acb);
1353     bdrv_coroutine_enter(blk_bs(blk), co);
1354 
1355     acb->has_returned = true;
1356     if (acb->rwco.ret != NOT_DONE) {
1357         aio_bh_schedule_oneshot(blk_get_aio_context(blk),
1358                                 blk_aio_complete_bh, acb);
1359     }
1360 
1361     return &acb->common;
1362 }
1363 
1364 static void blk_aio_read_entry(void *opaque)
1365 {
1366     BlkAioEmAIOCB *acb = opaque;
1367     BlkRwCo *rwco = &acb->rwco;
1368     QEMUIOVector *qiov = rwco->iobuf;
1369 
1370     assert(qiov->size == acb->bytes);
1371     rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
1372                               qiov, rwco->flags);
1373     blk_aio_complete(acb);
1374 }
1375 
1376 static void blk_aio_write_entry(void *opaque)
1377 {
1378     BlkAioEmAIOCB *acb = opaque;
1379     BlkRwCo *rwco = &acb->rwco;
1380     QEMUIOVector *qiov = rwco->iobuf;
1381 
1382     assert(!qiov || qiov->size == acb->bytes);
1383     rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
1384                                qiov, rwco->flags);
1385     blk_aio_complete(acb);
1386 }
1387 
1388 BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1389                                   int count, BdrvRequestFlags flags,
1390                                   BlockCompletionFunc *cb, void *opaque)
1391 {
1392     return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
1393                         flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
1394 }
1395 
1396 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
1397 {
1398     int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
1399     if (ret < 0) {
1400         return ret;
1401     }
1402     return count;
1403 }
1404 
1405 int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
1406                BdrvRequestFlags flags)
1407 {
1408     int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1409                       flags);
1410     if (ret < 0) {
1411         return ret;
1412     }
1413     return count;
1414 }
1415 
1416 int64_t blk_getlength(BlockBackend *blk)
1417 {
1418     if (!blk_is_available(blk)) {
1419         return -ENOMEDIUM;
1420     }
1421 
1422     return bdrv_getlength(blk_bs(blk));
1423 }
1424 
1425 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
1426 {
1427     if (!blk_bs(blk)) {
1428         *nb_sectors_ptr = 0;
1429     } else {
1430         bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
1431     }
1432 }
1433 
1434 int64_t blk_nb_sectors(BlockBackend *blk)
1435 {
1436     if (!blk_is_available(blk)) {
1437         return -ENOMEDIUM;
1438     }
1439 
1440     return bdrv_nb_sectors(blk_bs(blk));
1441 }
1442 
1443 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
1444                            QEMUIOVector *qiov, BdrvRequestFlags flags,
1445                            BlockCompletionFunc *cb, void *opaque)
1446 {
1447     return blk_aio_prwv(blk, offset, qiov->size, qiov,
1448                         blk_aio_read_entry, flags, cb, opaque);
1449 }
1450 
1451 BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
1452                             QEMUIOVector *qiov, BdrvRequestFlags flags,
1453                             BlockCompletionFunc *cb, void *opaque)
1454 {
1455     return blk_aio_prwv(blk, offset, qiov->size, qiov,
1456                         blk_aio_write_entry, flags, cb, opaque);
1457 }
1458 
1459 static void blk_aio_flush_entry(void *opaque)
1460 {
1461     BlkAioEmAIOCB *acb = opaque;
1462     BlkRwCo *rwco = &acb->rwco;
1463 
1464     rwco->ret = blk_co_flush(rwco->blk);
1465     blk_aio_complete(acb);
1466 }
1467 
1468 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
1469                           BlockCompletionFunc *cb, void *opaque)
1470 {
1471     return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
1472 }
1473 
1474 static void blk_aio_pdiscard_entry(void *opaque)
1475 {
1476     BlkAioEmAIOCB *acb = opaque;
1477     BlkRwCo *rwco = &acb->rwco;
1478 
1479     rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
1480     blk_aio_complete(acb);
1481 }
1482 
1483 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
1484                              int64_t offset, int bytes,
1485                              BlockCompletionFunc *cb, void *opaque)
1486 {
1487     return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
1488                         cb, opaque);
1489 }
1490 
1491 void blk_aio_cancel(BlockAIOCB *acb)
1492 {
1493     bdrv_aio_cancel(acb);
1494 }
1495 
1496 void blk_aio_cancel_async(BlockAIOCB *acb)
1497 {
1498     bdrv_aio_cancel_async(acb);
1499 }
1500 
1501 int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1502 {
1503     if (!blk_is_available(blk)) {
1504         return -ENOMEDIUM;
1505     }
1506 
1507     return bdrv_co_ioctl(blk_bs(blk), req, buf);
1508 }
1509 
1510 static void blk_ioctl_entry(void *opaque)
1511 {
1512     BlkRwCo *rwco = opaque;
1513     QEMUIOVector *qiov = rwco->iobuf;
1514 
1515     rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
1516                              qiov->iov[0].iov_base);
1517     aio_wait_kick();
1518 }
1519 
1520 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1521 {
1522     return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
1523 }
1524 
1525 static void blk_aio_ioctl_entry(void *opaque)
1526 {
1527     BlkAioEmAIOCB *acb = opaque;
1528     BlkRwCo *rwco = &acb->rwco;
1529 
1530     rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
1531 
1532     blk_aio_complete(acb);
1533 }
1534 
1535 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
1536                           BlockCompletionFunc *cb, void *opaque)
1537 {
1538     return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
1539 }
1540 
1541 int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
1542 {
1543     int ret = blk_check_byte_request(blk, offset, bytes);
1544     if (ret < 0) {
1545         return ret;
1546     }
1547 
1548     return bdrv_co_pdiscard(blk->root, offset, bytes);
1549 }
1550 
1551 int blk_co_flush(BlockBackend *blk)
1552 {
1553     if (!blk_is_available(blk)) {
1554         return -ENOMEDIUM;
1555     }
1556 
1557     return bdrv_co_flush(blk_bs(blk));
1558 }
1559 
1560 static void blk_flush_entry(void *opaque)
1561 {
1562     BlkRwCo *rwco = opaque;
1563     rwco->ret = blk_co_flush(rwco->blk);
1564     aio_wait_kick();
1565 }
1566 
1567 int blk_flush(BlockBackend *blk)
1568 {
1569     return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
1570 }
1571 
1572 void blk_drain(BlockBackend *blk)
1573 {
1574     BlockDriverState *bs = blk_bs(blk);
1575 
1576     if (bs) {
1577         bdrv_drained_begin(bs);
1578     }
1579 
1580     /* We may have -ENOMEDIUM completions in flight */
1581     AIO_WAIT_WHILE(blk_get_aio_context(blk),
1582                    atomic_mb_read(&blk->in_flight) > 0);
1583 
1584     if (bs) {
1585         bdrv_drained_end(bs);
1586     }
1587 }
1588 
1589 void blk_drain_all(void)
1590 {
1591     BlockBackend *blk = NULL;
1592 
1593     bdrv_drain_all_begin();
1594 
1595     while ((blk = blk_all_next(blk)) != NULL) {
1596         AioContext *ctx = blk_get_aio_context(blk);
1597 
1598         aio_context_acquire(ctx);
1599 
1600         /* We may have -ENOMEDIUM completions in flight */
1601         AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > 0);
1602 
1603         aio_context_release(ctx);
1604     }
1605 
1606     bdrv_drain_all_end();
1607 }
1608 
1609 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
1610                       BlockdevOnError on_write_error)
1611 {
1612     blk->on_read_error = on_read_error;
1613     blk->on_write_error = on_write_error;
1614 }
1615 
1616 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
1617 {
1618     return is_read ? blk->on_read_error : blk->on_write_error;
1619 }
1620 
1621 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
1622                                       int error)
1623 {
1624     BlockdevOnError on_err = blk_get_on_error(blk, is_read);
1625 
1626     switch (on_err) {
1627     case BLOCKDEV_ON_ERROR_ENOSPC:
1628         return (error == ENOSPC) ?
1629                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
1630     case BLOCKDEV_ON_ERROR_STOP:
1631         return BLOCK_ERROR_ACTION_STOP;
1632     case BLOCKDEV_ON_ERROR_REPORT:
1633         return BLOCK_ERROR_ACTION_REPORT;
1634     case BLOCKDEV_ON_ERROR_IGNORE:
1635         return BLOCK_ERROR_ACTION_IGNORE;
1636     case BLOCKDEV_ON_ERROR_AUTO:
1637     default:
1638         abort();
1639     }
1640 }
1641 
1642 static void send_qmp_error_event(BlockBackend *blk,
1643                                  BlockErrorAction action,
1644                                  bool is_read, int error)
1645 {
1646     IoOperationType optype;
1647     BlockDriverState *bs = blk_bs(blk);
1648 
1649     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
1650     qapi_event_send_block_io_error(blk_name(blk), !!bs,
1651                                    bs ? bdrv_get_node_name(bs) : NULL, optype,
1652                                    action, blk_iostatus_is_enabled(blk),
1653                                    error == ENOSPC, strerror(error));
1654 }
1655 
1656 /* This is done by device models because, while the block layer knows
1657  * about the error, it does not know whether an operation comes from
1658  * the device or the block layer (from a job, for example).
1659  */
1660 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
1661                       bool is_read, int error)
1662 {
1663     assert(error >= 0);
1664 
1665     if (action == BLOCK_ERROR_ACTION_STOP) {
1666         /* First set the iostatus, so that "info block" returns an iostatus
1667          * that matches the events raised so far (an additional error iostatus
1668          * is fine, but not a lost one).
1669          */
1670         blk_iostatus_set_err(blk, error);
1671 
1672         /* Then raise the request to stop the VM and the event.
1673          * qemu_system_vmstop_request_prepare has two effects.  First,
1674          * it ensures that the STOP event always comes after the
1675          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
1676          * can observe the STOP event and do a "cont" before the STOP
1677          * event is issued, the VM will not stop.  In this case, vm_start()
1678          * also ensures that the STOP/RESUME pair of events is emitted.
1679          */
1680         qemu_system_vmstop_request_prepare();
1681         send_qmp_error_event(blk, action, is_read, error);
1682         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
1683     } else {
1684         send_qmp_error_event(blk, action, is_read, error);
1685     }
1686 }
1687 
1688 bool blk_is_read_only(BlockBackend *blk)
1689 {
1690     BlockDriverState *bs = blk_bs(blk);
1691 
1692     if (bs) {
1693         return bdrv_is_read_only(bs);
1694     } else {
1695         return blk->root_state.read_only;
1696     }
1697 }
1698 
1699 bool blk_is_sg(BlockBackend *blk)
1700 {
1701     BlockDriverState *bs = blk_bs(blk);
1702 
1703     if (!bs) {
1704         return false;
1705     }
1706 
1707     return bdrv_is_sg(bs);
1708 }
1709 
1710 bool blk_enable_write_cache(BlockBackend *blk)
1711 {
1712     return blk->enable_write_cache;
1713 }
1714 
1715 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
1716 {
1717     blk->enable_write_cache = wce;
1718 }
1719 
1720 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
1721 {
1722     BlockDriverState *bs = blk_bs(blk);
1723 
1724     if (!bs) {
1725         error_setg(errp, "Device '%s' has no medium", blk->name);
1726         return;
1727     }
1728 
1729     bdrv_invalidate_cache(bs, errp);
1730 }
1731 
1732 bool blk_is_inserted(BlockBackend *blk)
1733 {
1734     BlockDriverState *bs = blk_bs(blk);
1735 
1736     return bs && bdrv_is_inserted(bs);
1737 }
1738 
1739 bool blk_is_available(BlockBackend *blk)
1740 {
1741     return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
1742 }
1743 
1744 void blk_lock_medium(BlockBackend *blk, bool locked)
1745 {
1746     BlockDriverState *bs = blk_bs(blk);
1747 
1748     if (bs) {
1749         bdrv_lock_medium(bs, locked);
1750     }
1751 }
1752 
1753 void blk_eject(BlockBackend *blk, bool eject_flag)
1754 {
1755     BlockDriverState *bs = blk_bs(blk);
1756     char *id;
1757 
1758     if (bs) {
1759         bdrv_eject(bs, eject_flag);
1760     }
1761 
1762     /* Whether or not we ejected on the backend,
1763      * the frontend experienced a tray event. */
1764     id = blk_get_attached_dev_id(blk);
1765     qapi_event_send_device_tray_moved(blk_name(blk), id,
1766                                       eject_flag);
1767     g_free(id);
1768 }
1769 
1770 int blk_get_flags(BlockBackend *blk)
1771 {
1772     BlockDriverState *bs = blk_bs(blk);
1773 
1774     if (bs) {
1775         return bdrv_get_flags(bs);
1776     } else {
1777         return blk->root_state.open_flags;
1778     }
1779 }
1780 
1781 /* Returns the minimum request alignment, in bytes; guaranteed nonzero */
1782 uint32_t blk_get_request_alignment(BlockBackend *blk)
1783 {
1784     BlockDriverState *bs = blk_bs(blk);
1785     return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
1786 }
1787 
1788 /* Returns the maximum transfer length, in bytes; guaranteed nonzero */
1789 uint32_t blk_get_max_transfer(BlockBackend *blk)
1790 {
1791     BlockDriverState *bs = blk_bs(blk);
1792     uint32_t max = 0;
1793 
1794     if (bs) {
1795         max = bs->bl.max_transfer;
1796     }
1797     return MIN_NON_ZERO(max, INT_MAX);
1798 }
1799 
1800 int blk_get_max_iov(BlockBackend *blk)
1801 {
1802     return blk->root->bs->bl.max_iov;
1803 }
1804 
1805 void blk_set_guest_block_size(BlockBackend *blk, int align)
1806 {
1807     blk->guest_block_size = align;
1808 }
1809 
1810 void *blk_try_blockalign(BlockBackend *blk, size_t size)
1811 {
1812     return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
1813 }
1814 
1815 void *blk_blockalign(BlockBackend *blk, size_t size)
1816 {
1817     return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
1818 }
1819 
1820 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
1821 {
1822     BlockDriverState *bs = blk_bs(blk);
1823 
1824     if (!bs) {
1825         return false;
1826     }
1827 
1828     return bdrv_op_is_blocked(bs, op, errp);
1829 }
1830 
1831 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
1832 {
1833     BlockDriverState *bs = blk_bs(blk);
1834 
1835     if (bs) {
1836         bdrv_op_unblock(bs, op, reason);
1837     }
1838 }
1839 
1840 void blk_op_block_all(BlockBackend *blk, Error *reason)
1841 {
1842     BlockDriverState *bs = blk_bs(blk);
1843 
1844     if (bs) {
1845         bdrv_op_block_all(bs, reason);
1846     }
1847 }
1848 
1849 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
1850 {
1851     BlockDriverState *bs = blk_bs(blk);
1852 
1853     if (bs) {
1854         bdrv_op_unblock_all(bs, reason);
1855     }
1856 }
1857 
1858 AioContext *blk_get_aio_context(BlockBackend *blk)
1859 {
1860     return bdrv_get_aio_context(blk_bs(blk));
1861 }
1862 
1863 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
1864 {
1865     BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
1866     return blk_get_aio_context(blk_acb->blk);
1867 }
1868 
1869 static void blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
1870                                    bool update_root_node)
1871 {
1872     BlockDriverState *bs = blk_bs(blk);
1873     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
1874 
1875     if (bs) {
1876         if (tgm->throttle_state) {
1877             bdrv_drained_begin(bs);
1878             throttle_group_detach_aio_context(tgm);
1879             throttle_group_attach_aio_context(tgm, new_context);
1880             bdrv_drained_end(bs);
1881         }
1882         if (update_root_node) {
1883             GSList *ignore = g_slist_prepend(NULL, blk->root);
1884             bdrv_set_aio_context_ignore(bs, new_context, &ignore);
1885             g_slist_free(ignore);
1886         }
1887     }
1888 }
1889 
1890 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
1891 {
1892     blk_do_set_aio_context(blk, new_context, true);
1893 }
1894 
1895 static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1896                                      GSList **ignore, Error **errp)
1897 {
1898     BlockBackend *blk = child->opaque;
1899 
1900     if (blk->allow_aio_context_change) {
1901         return true;
1902     }
1903 
1904     /* Only manually created BlockBackends that are not attached to anything
1905      * can change their AioContext without updating their user. */
1906     if (!blk->name || blk->dev) {
1907         /* TODO Add BB name/QOM path */
1908         error_setg(errp, "Cannot change iothread of active block backend");
1909         return false;
1910     }
1911 
1912     return true;
1913 }
1914 
1915 static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1916                                  GSList **ignore)
1917 {
1918     BlockBackend *blk = child->opaque;
1919     blk_do_set_aio_context(blk, ctx, false);
1920 }
1921 
1922 void blk_add_aio_context_notifier(BlockBackend *blk,
1923         void (*attached_aio_context)(AioContext *new_context, void *opaque),
1924         void (*detach_aio_context)(void *opaque), void *opaque)
1925 {
1926     BlockBackendAioNotifier *notifier;
1927     BlockDriverState *bs = blk_bs(blk);
1928 
1929     notifier = g_new(BlockBackendAioNotifier, 1);
1930     notifier->attached_aio_context = attached_aio_context;
1931     notifier->detach_aio_context = detach_aio_context;
1932     notifier->opaque = opaque;
1933     QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list);
1934 
1935     if (bs) {
1936         bdrv_add_aio_context_notifier(bs, attached_aio_context,
1937                                       detach_aio_context, opaque);
1938     }
1939 }
1940 
1941 void blk_remove_aio_context_notifier(BlockBackend *blk,
1942                                      void (*attached_aio_context)(AioContext *,
1943                                                                   void *),
1944                                      void (*detach_aio_context)(void *),
1945                                      void *opaque)
1946 {
1947     BlockBackendAioNotifier *notifier;
1948     BlockDriverState *bs = blk_bs(blk);
1949 
1950     if (bs) {
1951         bdrv_remove_aio_context_notifier(bs, attached_aio_context,
1952                                          detach_aio_context, opaque);
1953     }
1954 
1955     QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
1956         if (notifier->attached_aio_context == attached_aio_context &&
1957             notifier->detach_aio_context == detach_aio_context &&
1958             notifier->opaque == opaque) {
1959             QLIST_REMOVE(notifier, list);
1960             g_free(notifier);
1961             return;
1962         }
1963     }
1964 
1965     abort();
1966 }
1967 
1968 void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
1969 {
1970     notifier_list_add(&blk->remove_bs_notifiers, notify);
1971 }
1972 
1973 void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
1974 {
1975     notifier_list_add(&blk->insert_bs_notifiers, notify);
1976 }
1977 
1978 void blk_io_plug(BlockBackend *blk)
1979 {
1980     BlockDriverState *bs = blk_bs(blk);
1981 
1982     if (bs) {
1983         bdrv_io_plug(bs);
1984     }
1985 }
1986 
1987 void blk_io_unplug(BlockBackend *blk)
1988 {
1989     BlockDriverState *bs = blk_bs(blk);
1990 
1991     if (bs) {
1992         bdrv_io_unplug(bs);
1993     }
1994 }
1995 
1996 BlockAcctStats *blk_get_stats(BlockBackend *blk)
1997 {
1998     return &blk->stats;
1999 }
2000 
2001 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
2002                   BlockCompletionFunc *cb, void *opaque)
2003 {
2004     return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
2005 }
2006 
2007 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
2008                                       int bytes, BdrvRequestFlags flags)
2009 {
2010     return blk_co_pwritev(blk, offset, bytes, NULL,
2011                           flags | BDRV_REQ_ZERO_WRITE);
2012 }
2013 
2014 int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
2015                           int count)
2016 {
2017     return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
2018                    BDRV_REQ_WRITE_COMPRESSED);
2019 }
2020 
2021 int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
2022                  Error **errp)
2023 {
2024     if (!blk_is_available(blk)) {
2025         error_setg(errp, "No medium inserted");
2026         return -ENOMEDIUM;
2027     }
2028 
2029     return bdrv_truncate(blk->root, offset, prealloc, errp);
2030 }
2031 
2032 static void blk_pdiscard_entry(void *opaque)
2033 {
2034     BlkRwCo *rwco = opaque;
2035     QEMUIOVector *qiov = rwco->iobuf;
2036 
2037     rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
2038     aio_wait_kick();
2039 }
2040 
2041 int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
2042 {
2043     return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
2044 }
2045 
2046 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
2047                      int64_t pos, int size)
2048 {
2049     int ret;
2050 
2051     if (!blk_is_available(blk)) {
2052         return -ENOMEDIUM;
2053     }
2054 
2055     ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
2056     if (ret < 0) {
2057         return ret;
2058     }
2059 
2060     if (ret == size && !blk->enable_write_cache) {
2061         ret = bdrv_flush(blk_bs(blk));
2062     }
2063 
2064     return ret < 0 ? ret : size;
2065 }
2066 
2067 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
2068 {
2069     if (!blk_is_available(blk)) {
2070         return -ENOMEDIUM;
2071     }
2072 
2073     return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
2074 }
2075 
2076 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
2077 {
2078     if (!blk_is_available(blk)) {
2079         return -ENOMEDIUM;
2080     }
2081 
2082     return bdrv_probe_blocksizes(blk_bs(blk), bsz);
2083 }
2084 
2085 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
2086 {
2087     if (!blk_is_available(blk)) {
2088         return -ENOMEDIUM;
2089     }
2090 
2091     return bdrv_probe_geometry(blk_bs(blk), geo);
2092 }
2093 
2094 /*
2095  * Updates the BlockBackendRootState object with data from the currently
2096  * attached BlockDriverState.
2097  */
2098 void blk_update_root_state(BlockBackend *blk)
2099 {
2100     assert(blk->root);
2101 
2102     blk->root_state.open_flags    = blk->root->bs->open_flags;
2103     blk->root_state.read_only     = blk->root->bs->read_only;
2104     blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
2105 }
2106 
2107 /*
2108  * Returns the detect-zeroes setting to be used for bdrv_open() of a
2109  * BlockDriverState which is supposed to inherit the root state.
2110  */
2111 bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
2112 {
2113     return blk->root_state.detect_zeroes;
2114 }
2115 
2116 /*
2117  * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
2118  * supposed to inherit the root state.
2119  */
2120 int blk_get_open_flags_from_root_state(BlockBackend *blk)
2121 {
2122     int bs_flags;
2123 
2124     bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
2125     bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
2126 
2127     return bs_flags;
2128 }
2129 
2130 BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
2131 {
2132     return &blk->root_state;
2133 }
2134 
2135 int blk_commit_all(void)
2136 {
2137     BlockBackend *blk = NULL;
2138 
2139     while ((blk = blk_all_next(blk)) != NULL) {
2140         AioContext *aio_context = blk_get_aio_context(blk);
2141 
2142         aio_context_acquire(aio_context);
2143         if (blk_is_inserted(blk) && blk->root->bs->backing) {
2144             int ret = bdrv_commit(blk->root->bs);
2145             if (ret < 0) {
2146                 aio_context_release(aio_context);
2147                 return ret;
2148             }
2149         }
2150         aio_context_release(aio_context);
2151     }
2152     return 0;
2153 }
2154 
2155 
2156 /* throttling disk I/O limits */
2157 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
2158 {
2159     throttle_group_config(&blk->public.throttle_group_member, cfg);
2160 }
2161 
2162 void blk_io_limits_disable(BlockBackend *blk)
2163 {
2164     BlockDriverState *bs = blk_bs(blk);
2165     ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2166     assert(tgm->throttle_state);
2167     if (bs) {
2168         bdrv_drained_begin(bs);
2169     }
2170     throttle_group_unregister_tgm(tgm);
2171     if (bs) {
2172         bdrv_drained_end(bs);
2173     }
2174 }
2175 
2176 /* should be called before blk_set_io_limits if a limit is set */
2177 void blk_io_limits_enable(BlockBackend *blk, const char *group)
2178 {
2179     assert(!blk->public.throttle_group_member.throttle_state);
2180     throttle_group_register_tgm(&blk->public.throttle_group_member,
2181                                 group, blk_get_aio_context(blk));
2182 }
2183 
2184 void blk_io_limits_update_group(BlockBackend *blk, const char *group)
2185 {
2186     /* this BB is not part of any group */
2187     if (!blk->public.throttle_group_member.throttle_state) {
2188         return;
2189     }
2190 
2191     /* this BB is a part of the same group than the one we want */
2192     if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member),
2193                 group)) {
2194         return;
2195     }
2196 
2197     /* need to change the group this bs belong to */
2198     blk_io_limits_disable(blk);
2199     blk_io_limits_enable(blk, group);
2200 }
2201 
2202 static void blk_root_drained_begin(BdrvChild *child)
2203 {
2204     BlockBackend *blk = child->opaque;
2205 
2206     if (++blk->quiesce_counter == 1) {
2207         if (blk->dev_ops && blk->dev_ops->drained_begin) {
2208             blk->dev_ops->drained_begin(blk->dev_opaque);
2209         }
2210     }
2211 
2212     /* Note that blk->root may not be accessible here yet if we are just
2213      * attaching to a BlockDriverState that is drained. Use child instead. */
2214 
2215     if (atomic_fetch_inc(&blk->public.throttle_group_member.io_limits_disabled) == 0) {
2216         throttle_group_restart_tgm(&blk->public.throttle_group_member);
2217     }
2218 }
2219 
2220 static bool blk_root_drained_poll(BdrvChild *child)
2221 {
2222     BlockBackend *blk = child->opaque;
2223     assert(blk->quiesce_counter);
2224     return !!blk->in_flight;
2225 }
2226 
2227 static void blk_root_drained_end(BdrvChild *child)
2228 {
2229     BlockBackend *blk = child->opaque;
2230     assert(blk->quiesce_counter);
2231 
2232     assert(blk->public.throttle_group_member.io_limits_disabled);
2233     atomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
2234 
2235     if (--blk->quiesce_counter == 0) {
2236         if (blk->dev_ops && blk->dev_ops->drained_end) {
2237             blk->dev_ops->drained_end(blk->dev_opaque);
2238         }
2239     }
2240 }
2241 
2242 void blk_register_buf(BlockBackend *blk, void *host, size_t size)
2243 {
2244     bdrv_register_buf(blk_bs(blk), host, size);
2245 }
2246 
2247 void blk_unregister_buf(BlockBackend *blk, void *host)
2248 {
2249     bdrv_unregister_buf(blk_bs(blk), host);
2250 }
2251 
2252 int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
2253                                    BlockBackend *blk_out, int64_t off_out,
2254                                    int bytes, BdrvRequestFlags read_flags,
2255                                    BdrvRequestFlags write_flags)
2256 {
2257     int r;
2258     r = blk_check_byte_request(blk_in, off_in, bytes);
2259     if (r) {
2260         return r;
2261     }
2262     r = blk_check_byte_request(blk_out, off_out, bytes);
2263     if (r) {
2264         return r;
2265     }
2266     return bdrv_co_copy_range(blk_in->root, off_in,
2267                               blk_out->root, off_out,
2268                               bytes, read_flags, write_flags);
2269 }
2270 
2271 const BdrvChild *blk_root(BlockBackend *blk)
2272 {
2273     return blk->root;
2274 }
2275