xref: /openbmc/qemu/block/block-backend.c (revision 795c40b8)
1 /*
2  * QEMU Block backends
3  *
4  * Copyright (C) 2014-2016 Red Hat, Inc.
5  *
6  * Authors:
7  *  Markus Armbruster <armbru@redhat.com>,
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1
10  * or later.  See the COPYING.LIB file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "sysemu/block-backend.h"
15 #include "block/block_int.h"
16 #include "block/blockjob.h"
17 #include "block/throttle-groups.h"
18 #include "sysemu/blockdev.h"
19 #include "sysemu/sysemu.h"
20 #include "qapi-event.h"
21 #include "qemu/id.h"
22 #include "trace.h"
23 
24 /* Number of coroutines to reserve per attached device model */
25 #define COROUTINE_POOL_RESERVATION 64
26 
27 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
28 
29 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
30 
31 struct BlockBackend {
32     char *name;
33     int refcnt;
34     BdrvChild *root;
35     DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
36     QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
37     QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
38     BlockBackendPublic public;
39 
40     void *dev;                  /* attached device model, if any */
41     bool legacy_dev;            /* true if dev is not a DeviceState */
42     /* TODO change to DeviceState when all users are qdevified */
43     const BlockDevOps *dev_ops;
44     void *dev_opaque;
45 
46     /* the block size for which the guest device expects atomicity */
47     int guest_block_size;
48 
49     /* If the BDS tree is removed, some of its options are stored here (which
50      * can be used to restore those options in the new BDS on insert) */
51     BlockBackendRootState root_state;
52 
53     bool enable_write_cache;
54 
55     /* I/O stats (display with "info blockstats"). */
56     BlockAcctStats stats;
57 
58     BlockdevOnError on_read_error, on_write_error;
59     bool iostatus_enabled;
60     BlockDeviceIoStatus iostatus;
61 
62     uint64_t perm;
63     uint64_t shared_perm;
64     bool disable_perm;
65 
66     bool allow_write_beyond_eof;
67 
68     NotifierList remove_bs_notifiers, insert_bs_notifiers;
69 
70     int quiesce_counter;
71 };
72 
73 typedef struct BlockBackendAIOCB {
74     BlockAIOCB common;
75     BlockBackend *blk;
76     int ret;
77 } BlockBackendAIOCB;
78 
79 static const AIOCBInfo block_backend_aiocb_info = {
80     .get_aio_context = blk_aiocb_get_aio_context,
81     .aiocb_size = sizeof(BlockBackendAIOCB),
82 };
83 
84 static void drive_info_del(DriveInfo *dinfo);
85 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
86 static char *blk_get_attached_dev_id(BlockBackend *blk);
87 
88 /* All BlockBackends */
89 static QTAILQ_HEAD(, BlockBackend) block_backends =
90     QTAILQ_HEAD_INITIALIZER(block_backends);
91 
92 /* All BlockBackends referenced by the monitor and which are iterated through by
93  * blk_next() */
94 static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
95     QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
96 
97 static void blk_root_inherit_options(int *child_flags, QDict *child_options,
98                                      int parent_flags, QDict *parent_options)
99 {
100     /* We're not supposed to call this function for root nodes */
101     abort();
102 }
103 static void blk_root_drained_begin(BdrvChild *child);
104 static void blk_root_drained_end(BdrvChild *child);
105 
106 static void blk_root_change_media(BdrvChild *child, bool load);
107 static void blk_root_resize(BdrvChild *child);
108 
109 static char *blk_root_get_parent_desc(BdrvChild *child)
110 {
111     BlockBackend *blk = child->opaque;
112     char *dev_id;
113 
114     if (blk->name) {
115         return g_strdup(blk->name);
116     }
117 
118     dev_id = blk_get_attached_dev_id(blk);
119     if (*dev_id) {
120         return dev_id;
121     } else {
122         /* TODO Callback into the BB owner for something more detailed */
123         g_free(dev_id);
124         return g_strdup("a block device");
125     }
126 }
127 
128 static const char *blk_root_get_name(BdrvChild *child)
129 {
130     return blk_name(child->opaque);
131 }
132 
133 /*
134  * Notifies the user of the BlockBackend that migration has completed. qdev
135  * devices can tighten their permissions in response (specifically revoke
136  * shared write permissions that we needed for storage migration).
137  *
138  * If an error is returned, the VM cannot be allowed to be resumed.
139  */
140 static void blk_root_activate(BdrvChild *child, Error **errp)
141 {
142     BlockBackend *blk = child->opaque;
143     Error *local_err = NULL;
144 
145     if (!blk->disable_perm) {
146         return;
147     }
148 
149     blk->disable_perm = false;
150 
151     blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
152     if (local_err) {
153         error_propagate(errp, local_err);
154         blk->disable_perm = true;
155         return;
156     }
157 }
158 
159 static int blk_root_inactivate(BdrvChild *child)
160 {
161     BlockBackend *blk = child->opaque;
162 
163     if (blk->disable_perm) {
164         return 0;
165     }
166 
167     /* Only inactivate BlockBackends for guest devices (which are inactive at
168      * this point because the VM is stopped) and unattached monitor-owned
169      * BlockBackends. If there is still any other user like a block job, then
170      * we simply can't inactivate the image. */
171     if (!blk->dev && !blk->name[0]) {
172         return -EPERM;
173     }
174 
175     blk->disable_perm = true;
176     if (blk->root) {
177         bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
178     }
179 
180     return 0;
181 }
182 
183 static const BdrvChildRole child_root = {
184     .inherit_options    = blk_root_inherit_options,
185 
186     .change_media       = blk_root_change_media,
187     .resize             = blk_root_resize,
188     .get_name           = blk_root_get_name,
189     .get_parent_desc    = blk_root_get_parent_desc,
190 
191     .drained_begin      = blk_root_drained_begin,
192     .drained_end        = blk_root_drained_end,
193 
194     .activate           = blk_root_activate,
195     .inactivate         = blk_root_inactivate,
196 };
197 
198 /*
199  * Create a new BlockBackend with a reference count of one.
200  *
201  * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
202  * to request for a block driver node that is attached to this BlockBackend.
203  * @shared_perm is a bitmask which describes which permissions may be granted
204  * to other users of the attached node.
205  * Both sets of permissions can be changed later using blk_set_perm().
206  *
207  * Return the new BlockBackend on success, null on failure.
208  */
209 BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
210 {
211     BlockBackend *blk;
212 
213     blk = g_new0(BlockBackend, 1);
214     blk->refcnt = 1;
215     blk->perm = perm;
216     blk->shared_perm = shared_perm;
217     blk_set_enable_write_cache(blk, true);
218 
219     qemu_co_queue_init(&blk->public.throttled_reqs[0]);
220     qemu_co_queue_init(&blk->public.throttled_reqs[1]);
221 
222     notifier_list_init(&blk->remove_bs_notifiers);
223     notifier_list_init(&blk->insert_bs_notifiers);
224 
225     QTAILQ_INSERT_TAIL(&block_backends, blk, link);
226     return blk;
227 }
228 
229 /*
230  * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
231  *
232  * Just as with bdrv_open(), after having called this function the reference to
233  * @options belongs to the block layer (even on failure).
234  *
235  * TODO: Remove @filename and @flags; it should be possible to specify a whole
236  * BDS tree just by specifying the @options QDict (or @reference,
237  * alternatively). At the time of adding this function, this is not possible,
238  * though, so callers of this function have to be able to specify @filename and
239  * @flags.
240  */
241 BlockBackend *blk_new_open(const char *filename, const char *reference,
242                            QDict *options, int flags, Error **errp)
243 {
244     BlockBackend *blk;
245     BlockDriverState *bs;
246     uint64_t perm;
247 
248     /* blk_new_open() is mainly used in .bdrv_create implementations and the
249      * tools where sharing isn't a concern because the BDS stays private, so we
250      * just request permission according to the flags.
251      *
252      * The exceptions are xen_disk and blockdev_init(); in these cases, the
253      * caller of blk_new_open() doesn't make use of the permissions, but they
254      * shouldn't hurt either. We can still share everything here because the
255      * guest devices will add their own blockers if they can't share. */
256     perm = BLK_PERM_CONSISTENT_READ;
257     if (flags & BDRV_O_RDWR) {
258         perm |= BLK_PERM_WRITE;
259     }
260     if (flags & BDRV_O_RESIZE) {
261         perm |= BLK_PERM_RESIZE;
262     }
263 
264     blk = blk_new(perm, BLK_PERM_ALL);
265     bs = bdrv_open(filename, reference, options, flags, errp);
266     if (!bs) {
267         blk_unref(blk);
268         return NULL;
269     }
270 
271     blk->root = bdrv_root_attach_child(bs, "root", &child_root,
272                                        perm, BLK_PERM_ALL, blk, errp);
273     if (!blk->root) {
274         bdrv_unref(bs);
275         blk_unref(blk);
276         return NULL;
277     }
278 
279     return blk;
280 }
281 
282 static void blk_delete(BlockBackend *blk)
283 {
284     assert(!blk->refcnt);
285     assert(!blk->name);
286     assert(!blk->dev);
287     if (blk->public.throttle_state) {
288         blk_io_limits_disable(blk);
289     }
290     if (blk->root) {
291         blk_remove_bs(blk);
292     }
293     assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
294     assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
295     QTAILQ_REMOVE(&block_backends, blk, link);
296     drive_info_del(blk->legacy_dinfo);
297     block_acct_cleanup(&blk->stats);
298     g_free(blk);
299 }
300 
301 static void drive_info_del(DriveInfo *dinfo)
302 {
303     if (!dinfo) {
304         return;
305     }
306     qemu_opts_del(dinfo->opts);
307     g_free(dinfo->serial);
308     g_free(dinfo);
309 }
310 
311 int blk_get_refcnt(BlockBackend *blk)
312 {
313     return blk ? blk->refcnt : 0;
314 }
315 
316 /*
317  * Increment @blk's reference count.
318  * @blk must not be null.
319  */
320 void blk_ref(BlockBackend *blk)
321 {
322     blk->refcnt++;
323 }
324 
325 /*
326  * Decrement @blk's reference count.
327  * If this drops it to zero, destroy @blk.
328  * For convenience, do nothing if @blk is null.
329  */
330 void blk_unref(BlockBackend *blk)
331 {
332     if (blk) {
333         assert(blk->refcnt > 0);
334         if (!--blk->refcnt) {
335             blk_delete(blk);
336         }
337     }
338 }
339 
340 /*
341  * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
342  * ones which are hidden (i.e. are not referenced by the monitor).
343  */
344 static BlockBackend *blk_all_next(BlockBackend *blk)
345 {
346     return blk ? QTAILQ_NEXT(blk, link)
347                : QTAILQ_FIRST(&block_backends);
348 }
349 
350 void blk_remove_all_bs(void)
351 {
352     BlockBackend *blk = NULL;
353 
354     while ((blk = blk_all_next(blk)) != NULL) {
355         AioContext *ctx = blk_get_aio_context(blk);
356 
357         aio_context_acquire(ctx);
358         if (blk->root) {
359             blk_remove_bs(blk);
360         }
361         aio_context_release(ctx);
362     }
363 }
364 
365 /*
366  * Return the monitor-owned BlockBackend after @blk.
367  * If @blk is null, return the first one.
368  * Else, return @blk's next sibling, which may be null.
369  *
370  * To iterate over all BlockBackends, do
371  * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
372  *     ...
373  * }
374  */
375 BlockBackend *blk_next(BlockBackend *blk)
376 {
377     return blk ? QTAILQ_NEXT(blk, monitor_link)
378                : QTAILQ_FIRST(&monitor_block_backends);
379 }
380 
381 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
382  * the monitor or attached to a BlockBackend */
383 BlockDriverState *bdrv_next(BdrvNextIterator *it)
384 {
385     BlockDriverState *bs;
386 
387     /* First, return all root nodes of BlockBackends. In order to avoid
388      * returning a BDS twice when multiple BBs refer to it, we only return it
389      * if the BB is the first one in the parent list of the BDS. */
390     if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
391         do {
392             it->blk = blk_all_next(it->blk);
393             bs = it->blk ? blk_bs(it->blk) : NULL;
394         } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
395 
396         if (bs) {
397             return bs;
398         }
399         it->phase = BDRV_NEXT_MONITOR_OWNED;
400     }
401 
402     /* Then return the monitor-owned BDSes without a BB attached. Ignore all
403      * BDSes that are attached to a BlockBackend here; they have been handled
404      * by the above block already */
405     do {
406         it->bs = bdrv_next_monitor_owned(it->bs);
407         bs = it->bs;
408     } while (bs && bdrv_has_blk(bs));
409 
410     return bs;
411 }
412 
413 BlockDriverState *bdrv_first(BdrvNextIterator *it)
414 {
415     *it = (BdrvNextIterator) {
416         .phase = BDRV_NEXT_BACKEND_ROOTS,
417     };
418 
419     return bdrv_next(it);
420 }
421 
422 /*
423  * Add a BlockBackend into the list of backends referenced by the monitor, with
424  * the given @name acting as the handle for the monitor.
425  * Strictly for use by blockdev.c.
426  *
427  * @name must not be null or empty.
428  *
429  * Returns true on success and false on failure. In the latter case, an Error
430  * object is returned through @errp.
431  */
432 bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
433 {
434     assert(!blk->name);
435     assert(name && name[0]);
436 
437     if (!id_wellformed(name)) {
438         error_setg(errp, "Invalid device name");
439         return false;
440     }
441     if (blk_by_name(name)) {
442         error_setg(errp, "Device with id '%s' already exists", name);
443         return false;
444     }
445     if (bdrv_find_node(name)) {
446         error_setg(errp,
447                    "Device name '%s' conflicts with an existing node name",
448                    name);
449         return false;
450     }
451 
452     blk->name = g_strdup(name);
453     QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
454     return true;
455 }
456 
457 /*
458  * Remove a BlockBackend from the list of backends referenced by the monitor.
459  * Strictly for use by blockdev.c.
460  */
461 void monitor_remove_blk(BlockBackend *blk)
462 {
463     if (!blk->name) {
464         return;
465     }
466 
467     QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
468     g_free(blk->name);
469     blk->name = NULL;
470 }
471 
472 /*
473  * Return @blk's name, a non-null string.
474  * Returns an empty string iff @blk is not referenced by the monitor.
475  */
476 const char *blk_name(const BlockBackend *blk)
477 {
478     return blk->name ?: "";
479 }
480 
481 /*
482  * Return the BlockBackend with name @name if it exists, else null.
483  * @name must not be null.
484  */
485 BlockBackend *blk_by_name(const char *name)
486 {
487     BlockBackend *blk = NULL;
488 
489     assert(name);
490     while ((blk = blk_next(blk)) != NULL) {
491         if (!strcmp(name, blk->name)) {
492             return blk;
493         }
494     }
495     return NULL;
496 }
497 
498 /*
499  * Return the BlockDriverState attached to @blk if any, else null.
500  */
501 BlockDriverState *blk_bs(BlockBackend *blk)
502 {
503     return blk->root ? blk->root->bs : NULL;
504 }
505 
506 static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
507 {
508     BdrvChild *child;
509     QLIST_FOREACH(child, &bs->parents, next_parent) {
510         if (child->role == &child_root) {
511             return child->opaque;
512         }
513     }
514 
515     return NULL;
516 }
517 
518 /*
519  * Returns true if @bs has an associated BlockBackend.
520  */
521 bool bdrv_has_blk(BlockDriverState *bs)
522 {
523     return bdrv_first_blk(bs) != NULL;
524 }
525 
526 /*
527  * Returns true if @bs has only BlockBackends as parents.
528  */
529 bool bdrv_is_root_node(BlockDriverState *bs)
530 {
531     BdrvChild *c;
532 
533     QLIST_FOREACH(c, &bs->parents, next_parent) {
534         if (c->role != &child_root) {
535             return false;
536         }
537     }
538 
539     return true;
540 }
541 
542 /*
543  * Return @blk's DriveInfo if any, else null.
544  */
545 DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
546 {
547     return blk->legacy_dinfo;
548 }
549 
550 /*
551  * Set @blk's DriveInfo to @dinfo, and return it.
552  * @blk must not have a DriveInfo set already.
553  * No other BlockBackend may have the same DriveInfo set.
554  */
555 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
556 {
557     assert(!blk->legacy_dinfo);
558     return blk->legacy_dinfo = dinfo;
559 }
560 
561 /*
562  * Return the BlockBackend with DriveInfo @dinfo.
563  * It must exist.
564  */
565 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
566 {
567     BlockBackend *blk = NULL;
568 
569     while ((blk = blk_next(blk)) != NULL) {
570         if (blk->legacy_dinfo == dinfo) {
571             return blk;
572         }
573     }
574     abort();
575 }
576 
577 /*
578  * Returns a pointer to the publicly accessible fields of @blk.
579  */
580 BlockBackendPublic *blk_get_public(BlockBackend *blk)
581 {
582     return &blk->public;
583 }
584 
585 /*
586  * Returns a BlockBackend given the associated @public fields.
587  */
588 BlockBackend *blk_by_public(BlockBackendPublic *public)
589 {
590     return container_of(public, BlockBackend, public);
591 }
592 
593 /*
594  * Disassociates the currently associated BlockDriverState from @blk.
595  */
596 void blk_remove_bs(BlockBackend *blk)
597 {
598     notifier_list_notify(&blk->remove_bs_notifiers, blk);
599     if (blk->public.throttle_state) {
600         throttle_timers_detach_aio_context(&blk->public.throttle_timers);
601     }
602 
603     blk_update_root_state(blk);
604 
605     bdrv_root_unref_child(blk->root);
606     blk->root = NULL;
607 }
608 
609 /*
610  * Associates a new BlockDriverState with @blk.
611  */
612 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
613 {
614     blk->root = bdrv_root_attach_child(bs, "root", &child_root,
615                                        blk->perm, blk->shared_perm, blk, errp);
616     if (blk->root == NULL) {
617         return -EPERM;
618     }
619     bdrv_ref(bs);
620 
621     notifier_list_notify(&blk->insert_bs_notifiers, blk);
622     if (blk->public.throttle_state) {
623         throttle_timers_attach_aio_context(
624             &blk->public.throttle_timers, bdrv_get_aio_context(bs));
625     }
626 
627     return 0;
628 }
629 
630 /*
631  * Sets the permission bitmasks that the user of the BlockBackend needs.
632  */
633 int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
634                  Error **errp)
635 {
636     int ret;
637 
638     if (blk->root && !blk->disable_perm) {
639         ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
640         if (ret < 0) {
641             return ret;
642         }
643     }
644 
645     blk->perm = perm;
646     blk->shared_perm = shared_perm;
647 
648     return 0;
649 }
650 
651 void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
652 {
653     *perm = blk->perm;
654     *shared_perm = blk->shared_perm;
655 }
656 
657 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
658 {
659     if (blk->dev) {
660         return -EBUSY;
661     }
662 
663     /* While migration is still incoming, we don't need to apply the
664      * permissions of guest device BlockBackends. We might still have a block
665      * job or NBD server writing to the image for storage migration. */
666     if (runstate_check(RUN_STATE_INMIGRATE)) {
667         blk->disable_perm = true;
668     }
669 
670     blk_ref(blk);
671     blk->dev = dev;
672     blk->legacy_dev = false;
673     blk_iostatus_reset(blk);
674 
675     return 0;
676 }
677 
678 /*
679  * Attach device model @dev to @blk.
680  * Return 0 on success, -EBUSY when a device model is attached already.
681  */
682 int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
683 {
684     return blk_do_attach_dev(blk, dev);
685 }
686 
687 /*
688  * Attach device model @dev to @blk.
689  * @blk must not have a device model attached already.
690  * TODO qdevified devices don't use this, remove when devices are qdevified
691  */
692 void blk_attach_dev_legacy(BlockBackend *blk, void *dev)
693 {
694     if (blk_do_attach_dev(blk, dev) < 0) {
695         abort();
696     }
697     blk->legacy_dev = true;
698 }
699 
700 /*
701  * Detach device model @dev from @blk.
702  * @dev must be currently attached to @blk.
703  */
704 void blk_detach_dev(BlockBackend *blk, void *dev)
705 /* TODO change to DeviceState *dev when all users are qdevified */
706 {
707     assert(blk->dev == dev);
708     blk->dev = NULL;
709     blk->dev_ops = NULL;
710     blk->dev_opaque = NULL;
711     blk->guest_block_size = 512;
712     blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
713     blk_unref(blk);
714 }
715 
716 /*
717  * Return the device model attached to @blk if any, else null.
718  */
719 void *blk_get_attached_dev(BlockBackend *blk)
720 /* TODO change to return DeviceState * when all users are qdevified */
721 {
722     return blk->dev;
723 }
724 
725 /* Return the qdev ID, or if no ID is assigned the QOM path, of the block
726  * device attached to the BlockBackend. */
727 static char *blk_get_attached_dev_id(BlockBackend *blk)
728 {
729     DeviceState *dev;
730 
731     assert(!blk->legacy_dev);
732     dev = blk->dev;
733 
734     if (!dev) {
735         return g_strdup("");
736     } else if (dev->id) {
737         return g_strdup(dev->id);
738     }
739     return object_get_canonical_path(OBJECT(dev));
740 }
741 
742 /*
743  * Return the BlockBackend which has the device model @dev attached if it
744  * exists, else null.
745  *
746  * @dev must not be null.
747  */
748 BlockBackend *blk_by_dev(void *dev)
749 {
750     BlockBackend *blk = NULL;
751 
752     assert(dev != NULL);
753     while ((blk = blk_all_next(blk)) != NULL) {
754         if (blk->dev == dev) {
755             return blk;
756         }
757     }
758     return NULL;
759 }
760 
761 /*
762  * Set @blk's device model callbacks to @ops.
763  * @opaque is the opaque argument to pass to the callbacks.
764  * This is for use by device models.
765  */
766 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
767                      void *opaque)
768 {
769     /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
770      * it that way, so we can assume blk->dev, if present, is a DeviceState if
771      * blk->dev_ops is set. Non-device users may use dev_ops without device. */
772     assert(!blk->legacy_dev);
773 
774     blk->dev_ops = ops;
775     blk->dev_opaque = opaque;
776 
777     /* Are we currently quiesced? Should we enforce this right now? */
778     if (blk->quiesce_counter && ops->drained_begin) {
779         ops->drained_begin(opaque);
780     }
781 }
782 
783 /*
784  * Notify @blk's attached device model of media change.
785  *
786  * If @load is true, notify of media load. This action can fail, meaning that
787  * the medium cannot be loaded. @errp is set then.
788  *
789  * If @load is false, notify of media eject. This can never fail.
790  *
791  * Also send DEVICE_TRAY_MOVED events as appropriate.
792  */
793 void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
794 {
795     if (blk->dev_ops && blk->dev_ops->change_media_cb) {
796         bool tray_was_open, tray_is_open;
797         Error *local_err = NULL;
798 
799         assert(!blk->legacy_dev);
800 
801         tray_was_open = blk_dev_is_tray_open(blk);
802         blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
803         if (local_err) {
804             assert(load == true);
805             error_propagate(errp, local_err);
806             return;
807         }
808         tray_is_open = blk_dev_is_tray_open(blk);
809 
810         if (tray_was_open != tray_is_open) {
811             char *id = blk_get_attached_dev_id(blk);
812             qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open,
813                                               &error_abort);
814             g_free(id);
815         }
816     }
817 }
818 
819 static void blk_root_change_media(BdrvChild *child, bool load)
820 {
821     blk_dev_change_media_cb(child->opaque, load, NULL);
822 }
823 
824 /*
825  * Does @blk's attached device model have removable media?
826  * %true if no device model is attached.
827  */
828 bool blk_dev_has_removable_media(BlockBackend *blk)
829 {
830     return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
831 }
832 
833 /*
834  * Does @blk's attached device model have a tray?
835  */
836 bool blk_dev_has_tray(BlockBackend *blk)
837 {
838     return blk->dev_ops && blk->dev_ops->is_tray_open;
839 }
840 
841 /*
842  * Notify @blk's attached device model of a media eject request.
843  * If @force is true, the medium is about to be yanked out forcefully.
844  */
845 void blk_dev_eject_request(BlockBackend *blk, bool force)
846 {
847     if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
848         blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
849     }
850 }
851 
852 /*
853  * Does @blk's attached device model have a tray, and is it open?
854  */
855 bool blk_dev_is_tray_open(BlockBackend *blk)
856 {
857     if (blk_dev_has_tray(blk)) {
858         return blk->dev_ops->is_tray_open(blk->dev_opaque);
859     }
860     return false;
861 }
862 
863 /*
864  * Does @blk's attached device model have the medium locked?
865  * %false if the device model has no such lock.
866  */
867 bool blk_dev_is_medium_locked(BlockBackend *blk)
868 {
869     if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
870         return blk->dev_ops->is_medium_locked(blk->dev_opaque);
871     }
872     return false;
873 }
874 
875 /*
876  * Notify @blk's attached device model of a backend size change.
877  */
878 static void blk_root_resize(BdrvChild *child)
879 {
880     BlockBackend *blk = child->opaque;
881 
882     if (blk->dev_ops && blk->dev_ops->resize_cb) {
883         blk->dev_ops->resize_cb(blk->dev_opaque);
884     }
885 }
886 
887 void blk_iostatus_enable(BlockBackend *blk)
888 {
889     blk->iostatus_enabled = true;
890     blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
891 }
892 
893 /* The I/O status is only enabled if the drive explicitly
894  * enables it _and_ the VM is configured to stop on errors */
895 bool blk_iostatus_is_enabled(const BlockBackend *blk)
896 {
897     return (blk->iostatus_enabled &&
898            (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
899             blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
900             blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
901 }
902 
903 BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
904 {
905     return blk->iostatus;
906 }
907 
908 void blk_iostatus_disable(BlockBackend *blk)
909 {
910     blk->iostatus_enabled = false;
911 }
912 
913 void blk_iostatus_reset(BlockBackend *blk)
914 {
915     if (blk_iostatus_is_enabled(blk)) {
916         BlockDriverState *bs = blk_bs(blk);
917         blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
918         if (bs && bs->job) {
919             block_job_iostatus_reset(bs->job);
920         }
921     }
922 }
923 
924 void blk_iostatus_set_err(BlockBackend *blk, int error)
925 {
926     assert(blk_iostatus_is_enabled(blk));
927     if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
928         blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
929                                           BLOCK_DEVICE_IO_STATUS_FAILED;
930     }
931 }
932 
933 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
934 {
935     blk->allow_write_beyond_eof = allow;
936 }
937 
938 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
939                                   size_t size)
940 {
941     int64_t len;
942 
943     if (size > INT_MAX) {
944         return -EIO;
945     }
946 
947     if (!blk_is_available(blk)) {
948         return -ENOMEDIUM;
949     }
950 
951     if (offset < 0) {
952         return -EIO;
953     }
954 
955     if (!blk->allow_write_beyond_eof) {
956         len = blk_getlength(blk);
957         if (len < 0) {
958             return len;
959         }
960 
961         if (offset > len || len - offset < size) {
962             return -EIO;
963         }
964     }
965 
966     return 0;
967 }
968 
969 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
970                                unsigned int bytes, QEMUIOVector *qiov,
971                                BdrvRequestFlags flags)
972 {
973     int ret;
974     BlockDriverState *bs = blk_bs(blk);
975 
976     trace_blk_co_preadv(blk, bs, offset, bytes, flags);
977 
978     ret = blk_check_byte_request(blk, offset, bytes);
979     if (ret < 0) {
980         return ret;
981     }
982 
983     bdrv_inc_in_flight(bs);
984 
985     /* throttling disk I/O */
986     if (blk->public.throttle_state) {
987         throttle_group_co_io_limits_intercept(blk, bytes, false);
988     }
989 
990     ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
991     bdrv_dec_in_flight(bs);
992     return ret;
993 }
994 
995 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
996                                 unsigned int bytes, QEMUIOVector *qiov,
997                                 BdrvRequestFlags flags)
998 {
999     int ret;
1000     BlockDriverState *bs = blk_bs(blk);
1001 
1002     trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
1003 
1004     ret = blk_check_byte_request(blk, offset, bytes);
1005     if (ret < 0) {
1006         return ret;
1007     }
1008 
1009     bdrv_inc_in_flight(bs);
1010 
1011     /* throttling disk I/O */
1012     if (blk->public.throttle_state) {
1013         throttle_group_co_io_limits_intercept(blk, bytes, true);
1014     }
1015 
1016     if (!blk->enable_write_cache) {
1017         flags |= BDRV_REQ_FUA;
1018     }
1019 
1020     ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
1021     bdrv_dec_in_flight(bs);
1022     return ret;
1023 }
1024 
1025 typedef struct BlkRwCo {
1026     BlockBackend *blk;
1027     int64_t offset;
1028     QEMUIOVector *qiov;
1029     int ret;
1030     BdrvRequestFlags flags;
1031 } BlkRwCo;
1032 
1033 static void blk_read_entry(void *opaque)
1034 {
1035     BlkRwCo *rwco = opaque;
1036 
1037     rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
1038                               rwco->qiov, rwco->flags);
1039 }
1040 
1041 static void blk_write_entry(void *opaque)
1042 {
1043     BlkRwCo *rwco = opaque;
1044 
1045     rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
1046                                rwco->qiov, rwco->flags);
1047 }
1048 
1049 static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
1050                    int64_t bytes, CoroutineEntry co_entry,
1051                    BdrvRequestFlags flags)
1052 {
1053     QEMUIOVector qiov;
1054     struct iovec iov;
1055     BlkRwCo rwco;
1056 
1057     iov = (struct iovec) {
1058         .iov_base = buf,
1059         .iov_len = bytes,
1060     };
1061     qemu_iovec_init_external(&qiov, &iov, 1);
1062 
1063     rwco = (BlkRwCo) {
1064         .blk    = blk,
1065         .offset = offset,
1066         .qiov   = &qiov,
1067         .flags  = flags,
1068         .ret    = NOT_DONE,
1069     };
1070 
1071     if (qemu_in_coroutine()) {
1072         /* Fast-path if already in coroutine context */
1073         co_entry(&rwco);
1074     } else {
1075         Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
1076         bdrv_coroutine_enter(blk_bs(blk), co);
1077         BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
1078     }
1079 
1080     return rwco.ret;
1081 }
1082 
1083 int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
1084                           int count)
1085 {
1086     int ret;
1087 
1088     ret = blk_check_byte_request(blk, offset, count);
1089     if (ret < 0) {
1090         return ret;
1091     }
1092 
1093     blk_root_drained_begin(blk->root);
1094     ret = blk_pread(blk, offset, buf, count);
1095     blk_root_drained_end(blk->root);
1096     return ret;
1097 }
1098 
1099 int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1100                       int count, BdrvRequestFlags flags)
1101 {
1102     return blk_prw(blk, offset, NULL, count, blk_write_entry,
1103                    flags | BDRV_REQ_ZERO_WRITE);
1104 }
1105 
1106 int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
1107 {
1108     return bdrv_make_zero(blk->root, flags);
1109 }
1110 
1111 static void error_callback_bh(void *opaque)
1112 {
1113     struct BlockBackendAIOCB *acb = opaque;
1114 
1115     bdrv_dec_in_flight(acb->common.bs);
1116     acb->common.cb(acb->common.opaque, acb->ret);
1117     qemu_aio_unref(acb);
1118 }
1119 
1120 BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
1121                                   BlockCompletionFunc *cb,
1122                                   void *opaque, int ret)
1123 {
1124     struct BlockBackendAIOCB *acb;
1125 
1126     bdrv_inc_in_flight(blk_bs(blk));
1127     acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
1128     acb->blk = blk;
1129     acb->ret = ret;
1130 
1131     aio_bh_schedule_oneshot(blk_get_aio_context(blk), error_callback_bh, acb);
1132     return &acb->common;
1133 }
1134 
1135 typedef struct BlkAioEmAIOCB {
1136     BlockAIOCB common;
1137     BlkRwCo rwco;
1138     int bytes;
1139     bool has_returned;
1140 } BlkAioEmAIOCB;
1141 
1142 static const AIOCBInfo blk_aio_em_aiocb_info = {
1143     .aiocb_size         = sizeof(BlkAioEmAIOCB),
1144 };
1145 
1146 static void blk_aio_complete(BlkAioEmAIOCB *acb)
1147 {
1148     if (acb->has_returned) {
1149         bdrv_dec_in_flight(acb->common.bs);
1150         acb->common.cb(acb->common.opaque, acb->rwco.ret);
1151         qemu_aio_unref(acb);
1152     }
1153 }
1154 
1155 static void blk_aio_complete_bh(void *opaque)
1156 {
1157     BlkAioEmAIOCB *acb = opaque;
1158     assert(acb->has_returned);
1159     blk_aio_complete(acb);
1160 }
1161 
1162 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
1163                                 QEMUIOVector *qiov, CoroutineEntry co_entry,
1164                                 BdrvRequestFlags flags,
1165                                 BlockCompletionFunc *cb, void *opaque)
1166 {
1167     BlkAioEmAIOCB *acb;
1168     Coroutine *co;
1169 
1170     bdrv_inc_in_flight(blk_bs(blk));
1171     acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
1172     acb->rwco = (BlkRwCo) {
1173         .blk    = blk,
1174         .offset = offset,
1175         .qiov   = qiov,
1176         .flags  = flags,
1177         .ret    = NOT_DONE,
1178     };
1179     acb->bytes = bytes;
1180     acb->has_returned = false;
1181 
1182     co = qemu_coroutine_create(co_entry, acb);
1183     bdrv_coroutine_enter(blk_bs(blk), co);
1184 
1185     acb->has_returned = true;
1186     if (acb->rwco.ret != NOT_DONE) {
1187         aio_bh_schedule_oneshot(blk_get_aio_context(blk),
1188                                 blk_aio_complete_bh, acb);
1189     }
1190 
1191     return &acb->common;
1192 }
1193 
1194 static void blk_aio_read_entry(void *opaque)
1195 {
1196     BlkAioEmAIOCB *acb = opaque;
1197     BlkRwCo *rwco = &acb->rwco;
1198 
1199     assert(rwco->qiov->size == acb->bytes);
1200     rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
1201                               rwco->qiov, rwco->flags);
1202     blk_aio_complete(acb);
1203 }
1204 
1205 static void blk_aio_write_entry(void *opaque)
1206 {
1207     BlkAioEmAIOCB *acb = opaque;
1208     BlkRwCo *rwco = &acb->rwco;
1209 
1210     assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
1211     rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
1212                                rwco->qiov, rwco->flags);
1213     blk_aio_complete(acb);
1214 }
1215 
1216 BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1217                                   int count, BdrvRequestFlags flags,
1218                                   BlockCompletionFunc *cb, void *opaque)
1219 {
1220     return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
1221                         flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
1222 }
1223 
1224 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
1225 {
1226     int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
1227     if (ret < 0) {
1228         return ret;
1229     }
1230     return count;
1231 }
1232 
1233 int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
1234                BdrvRequestFlags flags)
1235 {
1236     int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1237                       flags);
1238     if (ret < 0) {
1239         return ret;
1240     }
1241     return count;
1242 }
1243 
1244 int64_t blk_getlength(BlockBackend *blk)
1245 {
1246     if (!blk_is_available(blk)) {
1247         return -ENOMEDIUM;
1248     }
1249 
1250     return bdrv_getlength(blk_bs(blk));
1251 }
1252 
1253 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
1254 {
1255     if (!blk_bs(blk)) {
1256         *nb_sectors_ptr = 0;
1257     } else {
1258         bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
1259     }
1260 }
1261 
1262 int64_t blk_nb_sectors(BlockBackend *blk)
1263 {
1264     if (!blk_is_available(blk)) {
1265         return -ENOMEDIUM;
1266     }
1267 
1268     return bdrv_nb_sectors(blk_bs(blk));
1269 }
1270 
1271 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
1272                            QEMUIOVector *qiov, BdrvRequestFlags flags,
1273                            BlockCompletionFunc *cb, void *opaque)
1274 {
1275     return blk_aio_prwv(blk, offset, qiov->size, qiov,
1276                         blk_aio_read_entry, flags, cb, opaque);
1277 }
1278 
1279 BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
1280                             QEMUIOVector *qiov, BdrvRequestFlags flags,
1281                             BlockCompletionFunc *cb, void *opaque)
1282 {
1283     return blk_aio_prwv(blk, offset, qiov->size, qiov,
1284                         blk_aio_write_entry, flags, cb, opaque);
1285 }
1286 
1287 static void blk_aio_flush_entry(void *opaque)
1288 {
1289     BlkAioEmAIOCB *acb = opaque;
1290     BlkRwCo *rwco = &acb->rwco;
1291 
1292     rwco->ret = blk_co_flush(rwco->blk);
1293     blk_aio_complete(acb);
1294 }
1295 
1296 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
1297                           BlockCompletionFunc *cb, void *opaque)
1298 {
1299     return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
1300 }
1301 
1302 static void blk_aio_pdiscard_entry(void *opaque)
1303 {
1304     BlkAioEmAIOCB *acb = opaque;
1305     BlkRwCo *rwco = &acb->rwco;
1306 
1307     rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
1308     blk_aio_complete(acb);
1309 }
1310 
1311 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
1312                              int64_t offset, int count,
1313                              BlockCompletionFunc *cb, void *opaque)
1314 {
1315     return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0,
1316                         cb, opaque);
1317 }
1318 
1319 void blk_aio_cancel(BlockAIOCB *acb)
1320 {
1321     bdrv_aio_cancel(acb);
1322 }
1323 
1324 void blk_aio_cancel_async(BlockAIOCB *acb)
1325 {
1326     bdrv_aio_cancel_async(acb);
1327 }
1328 
1329 int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1330 {
1331     if (!blk_is_available(blk)) {
1332         return -ENOMEDIUM;
1333     }
1334 
1335     return bdrv_co_ioctl(blk_bs(blk), req, buf);
1336 }
1337 
1338 static void blk_ioctl_entry(void *opaque)
1339 {
1340     BlkRwCo *rwco = opaque;
1341     rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
1342                              rwco->qiov->iov[0].iov_base);
1343 }
1344 
1345 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1346 {
1347     return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
1348 }
1349 
1350 static void blk_aio_ioctl_entry(void *opaque)
1351 {
1352     BlkAioEmAIOCB *acb = opaque;
1353     BlkRwCo *rwco = &acb->rwco;
1354 
1355     rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
1356                              rwco->qiov->iov[0].iov_base);
1357     blk_aio_complete(acb);
1358 }
1359 
1360 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
1361                           BlockCompletionFunc *cb, void *opaque)
1362 {
1363     QEMUIOVector qiov;
1364     struct iovec iov;
1365 
1366     iov = (struct iovec) {
1367         .iov_base = buf,
1368         .iov_len = 0,
1369     };
1370     qemu_iovec_init_external(&qiov, &iov, 1);
1371 
1372     return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
1373 }
1374 
1375 int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
1376 {
1377     int ret = blk_check_byte_request(blk, offset, count);
1378     if (ret < 0) {
1379         return ret;
1380     }
1381 
1382     return bdrv_co_pdiscard(blk_bs(blk), offset, count);
1383 }
1384 
1385 int blk_co_flush(BlockBackend *blk)
1386 {
1387     if (!blk_is_available(blk)) {
1388         return -ENOMEDIUM;
1389     }
1390 
1391     return bdrv_co_flush(blk_bs(blk));
1392 }
1393 
1394 static void blk_flush_entry(void *opaque)
1395 {
1396     BlkRwCo *rwco = opaque;
1397     rwco->ret = blk_co_flush(rwco->blk);
1398 }
1399 
1400 int blk_flush(BlockBackend *blk)
1401 {
1402     return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
1403 }
1404 
1405 void blk_drain(BlockBackend *blk)
1406 {
1407     if (blk_bs(blk)) {
1408         bdrv_drain(blk_bs(blk));
1409     }
1410 }
1411 
1412 void blk_drain_all(void)
1413 {
1414     bdrv_drain_all();
1415 }
1416 
1417 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
1418                       BlockdevOnError on_write_error)
1419 {
1420     blk->on_read_error = on_read_error;
1421     blk->on_write_error = on_write_error;
1422 }
1423 
1424 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
1425 {
1426     return is_read ? blk->on_read_error : blk->on_write_error;
1427 }
1428 
1429 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
1430                                       int error)
1431 {
1432     BlockdevOnError on_err = blk_get_on_error(blk, is_read);
1433 
1434     switch (on_err) {
1435     case BLOCKDEV_ON_ERROR_ENOSPC:
1436         return (error == ENOSPC) ?
1437                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
1438     case BLOCKDEV_ON_ERROR_STOP:
1439         return BLOCK_ERROR_ACTION_STOP;
1440     case BLOCKDEV_ON_ERROR_REPORT:
1441         return BLOCK_ERROR_ACTION_REPORT;
1442     case BLOCKDEV_ON_ERROR_IGNORE:
1443         return BLOCK_ERROR_ACTION_IGNORE;
1444     case BLOCKDEV_ON_ERROR_AUTO:
1445     default:
1446         abort();
1447     }
1448 }
1449 
1450 static void send_qmp_error_event(BlockBackend *blk,
1451                                  BlockErrorAction action,
1452                                  bool is_read, int error)
1453 {
1454     IoOperationType optype;
1455 
1456     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
1457     qapi_event_send_block_io_error(blk_name(blk),
1458                                    bdrv_get_node_name(blk_bs(blk)), optype,
1459                                    action, blk_iostatus_is_enabled(blk),
1460                                    error == ENOSPC, strerror(error),
1461                                    &error_abort);
1462 }
1463 
1464 /* This is done by device models because, while the block layer knows
1465  * about the error, it does not know whether an operation comes from
1466  * the device or the block layer (from a job, for example).
1467  */
1468 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
1469                       bool is_read, int error)
1470 {
1471     assert(error >= 0);
1472 
1473     if (action == BLOCK_ERROR_ACTION_STOP) {
1474         /* First set the iostatus, so that "info block" returns an iostatus
1475          * that matches the events raised so far (an additional error iostatus
1476          * is fine, but not a lost one).
1477          */
1478         blk_iostatus_set_err(blk, error);
1479 
1480         /* Then raise the request to stop the VM and the event.
1481          * qemu_system_vmstop_request_prepare has two effects.  First,
1482          * it ensures that the STOP event always comes after the
1483          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
1484          * can observe the STOP event and do a "cont" before the STOP
1485          * event is issued, the VM will not stop.  In this case, vm_start()
1486          * also ensures that the STOP/RESUME pair of events is emitted.
1487          */
1488         qemu_system_vmstop_request_prepare();
1489         send_qmp_error_event(blk, action, is_read, error);
1490         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
1491     } else {
1492         send_qmp_error_event(blk, action, is_read, error);
1493     }
1494 }
1495 
1496 int blk_is_read_only(BlockBackend *blk)
1497 {
1498     BlockDriverState *bs = blk_bs(blk);
1499 
1500     if (bs) {
1501         return bdrv_is_read_only(bs);
1502     } else {
1503         return blk->root_state.read_only;
1504     }
1505 }
1506 
1507 int blk_is_sg(BlockBackend *blk)
1508 {
1509     BlockDriverState *bs = blk_bs(blk);
1510 
1511     if (!bs) {
1512         return 0;
1513     }
1514 
1515     return bdrv_is_sg(bs);
1516 }
1517 
1518 int blk_enable_write_cache(BlockBackend *blk)
1519 {
1520     return blk->enable_write_cache;
1521 }
1522 
1523 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
1524 {
1525     blk->enable_write_cache = wce;
1526 }
1527 
1528 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
1529 {
1530     BlockDriverState *bs = blk_bs(blk);
1531 
1532     if (!bs) {
1533         error_setg(errp, "Device '%s' has no medium", blk->name);
1534         return;
1535     }
1536 
1537     bdrv_invalidate_cache(bs, errp);
1538 }
1539 
1540 bool blk_is_inserted(BlockBackend *blk)
1541 {
1542     BlockDriverState *bs = blk_bs(blk);
1543 
1544     return bs && bdrv_is_inserted(bs);
1545 }
1546 
1547 bool blk_is_available(BlockBackend *blk)
1548 {
1549     return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
1550 }
1551 
1552 void blk_lock_medium(BlockBackend *blk, bool locked)
1553 {
1554     BlockDriverState *bs = blk_bs(blk);
1555 
1556     if (bs) {
1557         bdrv_lock_medium(bs, locked);
1558     }
1559 }
1560 
1561 void blk_eject(BlockBackend *blk, bool eject_flag)
1562 {
1563     BlockDriverState *bs = blk_bs(blk);
1564     char *id;
1565 
1566     /* blk_eject is only called by qdevified devices */
1567     assert(!blk->legacy_dev);
1568 
1569     if (bs) {
1570         bdrv_eject(bs, eject_flag);
1571     }
1572 
1573     /* Whether or not we ejected on the backend,
1574      * the frontend experienced a tray event. */
1575     id = blk_get_attached_dev_id(blk);
1576     qapi_event_send_device_tray_moved(blk_name(blk), id,
1577                                       eject_flag, &error_abort);
1578     g_free(id);
1579 }
1580 
1581 int blk_get_flags(BlockBackend *blk)
1582 {
1583     BlockDriverState *bs = blk_bs(blk);
1584 
1585     if (bs) {
1586         return bdrv_get_flags(bs);
1587     } else {
1588         return blk->root_state.open_flags;
1589     }
1590 }
1591 
1592 /* Returns the maximum transfer length, in bytes; guaranteed nonzero */
1593 uint32_t blk_get_max_transfer(BlockBackend *blk)
1594 {
1595     BlockDriverState *bs = blk_bs(blk);
1596     uint32_t max = 0;
1597 
1598     if (bs) {
1599         max = bs->bl.max_transfer;
1600     }
1601     return MIN_NON_ZERO(max, INT_MAX);
1602 }
1603 
1604 int blk_get_max_iov(BlockBackend *blk)
1605 {
1606     return blk->root->bs->bl.max_iov;
1607 }
1608 
1609 void blk_set_guest_block_size(BlockBackend *blk, int align)
1610 {
1611     blk->guest_block_size = align;
1612 }
1613 
1614 void *blk_try_blockalign(BlockBackend *blk, size_t size)
1615 {
1616     return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
1617 }
1618 
1619 void *blk_blockalign(BlockBackend *blk, size_t size)
1620 {
1621     return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
1622 }
1623 
1624 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
1625 {
1626     BlockDriverState *bs = blk_bs(blk);
1627 
1628     if (!bs) {
1629         return false;
1630     }
1631 
1632     return bdrv_op_is_blocked(bs, op, errp);
1633 }
1634 
1635 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
1636 {
1637     BlockDriverState *bs = blk_bs(blk);
1638 
1639     if (bs) {
1640         bdrv_op_unblock(bs, op, reason);
1641     }
1642 }
1643 
1644 void blk_op_block_all(BlockBackend *blk, Error *reason)
1645 {
1646     BlockDriverState *bs = blk_bs(blk);
1647 
1648     if (bs) {
1649         bdrv_op_block_all(bs, reason);
1650     }
1651 }
1652 
1653 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
1654 {
1655     BlockDriverState *bs = blk_bs(blk);
1656 
1657     if (bs) {
1658         bdrv_op_unblock_all(bs, reason);
1659     }
1660 }
1661 
1662 AioContext *blk_get_aio_context(BlockBackend *blk)
1663 {
1664     BlockDriverState *bs = blk_bs(blk);
1665 
1666     if (bs) {
1667         return bdrv_get_aio_context(bs);
1668     } else {
1669         return qemu_get_aio_context();
1670     }
1671 }
1672 
1673 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
1674 {
1675     BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
1676     return blk_get_aio_context(blk_acb->blk);
1677 }
1678 
1679 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
1680 {
1681     BlockDriverState *bs = blk_bs(blk);
1682 
1683     if (bs) {
1684         if (blk->public.throttle_state) {
1685             throttle_timers_detach_aio_context(&blk->public.throttle_timers);
1686         }
1687         bdrv_set_aio_context(bs, new_context);
1688         if (blk->public.throttle_state) {
1689             throttle_timers_attach_aio_context(&blk->public.throttle_timers,
1690                                                new_context);
1691         }
1692     }
1693 }
1694 
1695 void blk_add_aio_context_notifier(BlockBackend *blk,
1696         void (*attached_aio_context)(AioContext *new_context, void *opaque),
1697         void (*detach_aio_context)(void *opaque), void *opaque)
1698 {
1699     BlockDriverState *bs = blk_bs(blk);
1700 
1701     if (bs) {
1702         bdrv_add_aio_context_notifier(bs, attached_aio_context,
1703                                       detach_aio_context, opaque);
1704     }
1705 }
1706 
1707 void blk_remove_aio_context_notifier(BlockBackend *blk,
1708                                      void (*attached_aio_context)(AioContext *,
1709                                                                   void *),
1710                                      void (*detach_aio_context)(void *),
1711                                      void *opaque)
1712 {
1713     BlockDriverState *bs = blk_bs(blk);
1714 
1715     if (bs) {
1716         bdrv_remove_aio_context_notifier(bs, attached_aio_context,
1717                                          detach_aio_context, opaque);
1718     }
1719 }
1720 
1721 void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
1722 {
1723     notifier_list_add(&blk->remove_bs_notifiers, notify);
1724 }
1725 
1726 void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
1727 {
1728     notifier_list_add(&blk->insert_bs_notifiers, notify);
1729 }
1730 
1731 void blk_io_plug(BlockBackend *blk)
1732 {
1733     BlockDriverState *bs = blk_bs(blk);
1734 
1735     if (bs) {
1736         bdrv_io_plug(bs);
1737     }
1738 }
1739 
1740 void blk_io_unplug(BlockBackend *blk)
1741 {
1742     BlockDriverState *bs = blk_bs(blk);
1743 
1744     if (bs) {
1745         bdrv_io_unplug(bs);
1746     }
1747 }
1748 
1749 BlockAcctStats *blk_get_stats(BlockBackend *blk)
1750 {
1751     return &blk->stats;
1752 }
1753 
1754 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
1755                   BlockCompletionFunc *cb, void *opaque)
1756 {
1757     return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
1758 }
1759 
1760 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1761                                       int count, BdrvRequestFlags flags)
1762 {
1763     return blk_co_pwritev(blk, offset, count, NULL,
1764                           flags | BDRV_REQ_ZERO_WRITE);
1765 }
1766 
1767 int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
1768                           int count)
1769 {
1770     return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1771                    BDRV_REQ_WRITE_COMPRESSED);
1772 }
1773 
1774 int blk_truncate(BlockBackend *blk, int64_t offset, Error **errp)
1775 {
1776     if (!blk_is_available(blk)) {
1777         error_setg(errp, "No medium inserted");
1778         return -ENOMEDIUM;
1779     }
1780 
1781     return bdrv_truncate(blk->root, offset, errp);
1782 }
1783 
1784 static void blk_pdiscard_entry(void *opaque)
1785 {
1786     BlkRwCo *rwco = opaque;
1787     rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
1788 }
1789 
1790 int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
1791 {
1792     return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0);
1793 }
1794 
1795 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
1796                      int64_t pos, int size)
1797 {
1798     int ret;
1799 
1800     if (!blk_is_available(blk)) {
1801         return -ENOMEDIUM;
1802     }
1803 
1804     ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
1805     if (ret < 0) {
1806         return ret;
1807     }
1808 
1809     if (ret == size && !blk->enable_write_cache) {
1810         ret = bdrv_flush(blk_bs(blk));
1811     }
1812 
1813     return ret < 0 ? ret : size;
1814 }
1815 
1816 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
1817 {
1818     if (!blk_is_available(blk)) {
1819         return -ENOMEDIUM;
1820     }
1821 
1822     return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
1823 }
1824 
1825 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
1826 {
1827     if (!blk_is_available(blk)) {
1828         return -ENOMEDIUM;
1829     }
1830 
1831     return bdrv_probe_blocksizes(blk_bs(blk), bsz);
1832 }
1833 
1834 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
1835 {
1836     if (!blk_is_available(blk)) {
1837         return -ENOMEDIUM;
1838     }
1839 
1840     return bdrv_probe_geometry(blk_bs(blk), geo);
1841 }
1842 
1843 /*
1844  * Updates the BlockBackendRootState object with data from the currently
1845  * attached BlockDriverState.
1846  */
1847 void blk_update_root_state(BlockBackend *blk)
1848 {
1849     assert(blk->root);
1850 
1851     blk->root_state.open_flags    = blk->root->bs->open_flags;
1852     blk->root_state.read_only     = blk->root->bs->read_only;
1853     blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
1854 }
1855 
1856 /*
1857  * Returns the detect-zeroes setting to be used for bdrv_open() of a
1858  * BlockDriverState which is supposed to inherit the root state.
1859  */
1860 bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
1861 {
1862     return blk->root_state.detect_zeroes;
1863 }
1864 
1865 /*
1866  * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
1867  * supposed to inherit the root state.
1868  */
1869 int blk_get_open_flags_from_root_state(BlockBackend *blk)
1870 {
1871     int bs_flags;
1872 
1873     bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
1874     bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
1875 
1876     return bs_flags;
1877 }
1878 
1879 BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
1880 {
1881     return &blk->root_state;
1882 }
1883 
1884 int blk_commit_all(void)
1885 {
1886     BlockBackend *blk = NULL;
1887 
1888     while ((blk = blk_all_next(blk)) != NULL) {
1889         AioContext *aio_context = blk_get_aio_context(blk);
1890 
1891         aio_context_acquire(aio_context);
1892         if (blk_is_inserted(blk) && blk->root->bs->backing) {
1893             int ret = bdrv_commit(blk->root->bs);
1894             if (ret < 0) {
1895                 aio_context_release(aio_context);
1896                 return ret;
1897             }
1898         }
1899         aio_context_release(aio_context);
1900     }
1901     return 0;
1902 }
1903 
1904 
1905 /* throttling disk I/O limits */
1906 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
1907 {
1908     throttle_group_config(blk, cfg);
1909 }
1910 
1911 void blk_io_limits_disable(BlockBackend *blk)
1912 {
1913     assert(blk->public.throttle_state);
1914     bdrv_drained_begin(blk_bs(blk));
1915     throttle_group_unregister_blk(blk);
1916     bdrv_drained_end(blk_bs(blk));
1917 }
1918 
1919 /* should be called before blk_set_io_limits if a limit is set */
1920 void blk_io_limits_enable(BlockBackend *blk, const char *group)
1921 {
1922     assert(!blk->public.throttle_state);
1923     throttle_group_register_blk(blk, group);
1924 }
1925 
1926 void blk_io_limits_update_group(BlockBackend *blk, const char *group)
1927 {
1928     /* this BB is not part of any group */
1929     if (!blk->public.throttle_state) {
1930         return;
1931     }
1932 
1933     /* this BB is a part of the same group than the one we want */
1934     if (!g_strcmp0(throttle_group_get_name(blk), group)) {
1935         return;
1936     }
1937 
1938     /* need to change the group this bs belong to */
1939     blk_io_limits_disable(blk);
1940     blk_io_limits_enable(blk, group);
1941 }
1942 
1943 static void blk_root_drained_begin(BdrvChild *child)
1944 {
1945     BlockBackend *blk = child->opaque;
1946 
1947     if (++blk->quiesce_counter == 1) {
1948         if (blk->dev_ops && blk->dev_ops->drained_begin) {
1949             blk->dev_ops->drained_begin(blk->dev_opaque);
1950         }
1951     }
1952 
1953     /* Note that blk->root may not be accessible here yet if we are just
1954      * attaching to a BlockDriverState that is drained. Use child instead. */
1955 
1956     if (blk->public.io_limits_disabled++ == 0) {
1957         throttle_group_restart_blk(blk);
1958     }
1959 }
1960 
1961 static void blk_root_drained_end(BdrvChild *child)
1962 {
1963     BlockBackend *blk = child->opaque;
1964     assert(blk->quiesce_counter);
1965 
1966     assert(blk->public.io_limits_disabled);
1967     --blk->public.io_limits_disabled;
1968 
1969     if (--blk->quiesce_counter == 0) {
1970         if (blk->dev_ops && blk->dev_ops->drained_end) {
1971             blk->dev_ops->drained_end(blk->dev_opaque);
1972         }
1973     }
1974 }
1975