xref: /openbmc/qemu/block.c (revision e0c47b6cb1de430fbc6f828f7acffa851c580840)
1fc01f7e7Sbellard /*
2fc01f7e7Sbellard  * QEMU System Emulator block driver
3fc01f7e7Sbellard  *
4fc01f7e7Sbellard  * Copyright (c) 2003 Fabrice Bellard
5fc01f7e7Sbellard  *
6fc01f7e7Sbellard  * Permission is hereby granted, free of charge, to any person obtaining a copy
7fc01f7e7Sbellard  * of this software and associated documentation files (the "Software"), to deal
8fc01f7e7Sbellard  * in the Software without restriction, including without limitation the rights
9fc01f7e7Sbellard  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10fc01f7e7Sbellard  * copies of the Software, and to permit persons to whom the Software is
11fc01f7e7Sbellard  * furnished to do so, subject to the following conditions:
12fc01f7e7Sbellard  *
13fc01f7e7Sbellard  * The above copyright notice and this permission notice shall be included in
14fc01f7e7Sbellard  * all copies or substantial portions of the Software.
15fc01f7e7Sbellard  *
16fc01f7e7Sbellard  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17fc01f7e7Sbellard  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18fc01f7e7Sbellard  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19fc01f7e7Sbellard  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20fc01f7e7Sbellard  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21fc01f7e7Sbellard  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22fc01f7e7Sbellard  * THE SOFTWARE.
23fc01f7e7Sbellard  */
243990d09aSblueswir1 #include "config-host.h"
25faf07963Spbrook #include "qemu-common.h"
266d519a5fSStefan Hajnoczi #include "trace.h"
27737e150eSPaolo Bonzini #include "block/block_int.h"
28737e150eSPaolo Bonzini #include "block/blockjob.h"
291de7afc9SPaolo Bonzini #include "qemu/module.h"
307b1b5d19SPaolo Bonzini #include "qapi/qmp/qjson.h"
31bfb197e0SMarkus Armbruster #include "sysemu/block-backend.h"
329c17d615SPaolo Bonzini #include "sysemu/sysemu.h"
33de50a20aSFam Zheng #include "sysemu/qtest.h"
341de7afc9SPaolo Bonzini #include "qemu/notify.h"
35737e150eSPaolo Bonzini #include "block/coroutine.h"
36c13163fbSBenoît Canet #include "block/qapi.h"
37b2023818SLuiz Capitulino #include "qmp-commands.h"
381de7afc9SPaolo Bonzini #include "qemu/timer.h"
39a5ee7bd4SWenchao Xia #include "qapi-event.h"
40fc01f7e7Sbellard 
4171e72a19SJuan Quintela #ifdef CONFIG_BSD
427674e7bfSbellard #include <sys/types.h>
437674e7bfSbellard #include <sys/stat.h>
447674e7bfSbellard #include <sys/ioctl.h>
4572cf2d4fSBlue Swirl #include <sys/queue.h>
46c5e97233Sblueswir1 #ifndef __DragonFly__
477674e7bfSbellard #include <sys/disk.h>
487674e7bfSbellard #endif
49c5e97233Sblueswir1 #endif
507674e7bfSbellard 
5149dc768dSaliguori #ifdef _WIN32
5249dc768dSaliguori #include <windows.h>
5349dc768dSaliguori #endif
5449dc768dSaliguori 
559bd2b08fSJohn Snow /**
569bd2b08fSJohn Snow  * A BdrvDirtyBitmap can be in three possible states:
579bd2b08fSJohn Snow  * (1) successor is NULL and disabled is false: full r/w mode
589bd2b08fSJohn Snow  * (2) successor is NULL and disabled is true: read only mode ("disabled")
599bd2b08fSJohn Snow  * (3) successor is set: frozen mode.
609bd2b08fSJohn Snow  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
619bd2b08fSJohn Snow  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
629bd2b08fSJohn Snow  */
63e4654d2dSFam Zheng struct BdrvDirtyBitmap {
64aa0c7ca5SJohn Snow     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
65aa0c7ca5SJohn Snow     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
66aa0c7ca5SJohn Snow     char *name;                 /* Optional non-empty unique ID */
67aa0c7ca5SJohn Snow     int64_t size;               /* Size of the bitmap (Number of sectors) */
68aa0c7ca5SJohn Snow     bool disabled;              /* Bitmap is read-only */
69e4654d2dSFam Zheng     QLIST_ENTRY(BdrvDirtyBitmap) list;
70e4654d2dSFam Zheng };
71e4654d2dSFam Zheng 
721c9805a3SStefan Hajnoczi #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
731c9805a3SStefan Hajnoczi 
747c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
75f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
76097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque);
777c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
78f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
79097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque);
80f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
81f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
82f9f05dc5SKevin Wolf                                          QEMUIOVector *iov);
83f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
84f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
85f9f05dc5SKevin Wolf                                          QEMUIOVector *iov);
86775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
87775aa8b6SKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
88470c0504SStefan Hajnoczi     BdrvRequestFlags flags);
89775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
90775aa8b6SKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
91f08f2ddaSStefan Hajnoczi     BdrvRequestFlags flags);
927c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
93b2a61371SStefan Hajnoczi                                          int64_t sector_num,
94b2a61371SStefan Hajnoczi                                          QEMUIOVector *qiov,
95b2a61371SStefan Hajnoczi                                          int nb_sectors,
96d20d9b7cSPaolo Bonzini                                          BdrvRequestFlags flags,
97097310b5SMarkus Armbruster                                          BlockCompletionFunc *cb,
98b2a61371SStefan Hajnoczi                                          void *opaque,
998c5873d6SStefan Hajnoczi                                          bool is_write);
100b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque);
101621f0589SKevin Wolf static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
102aa7bfbffSPeter Lieven     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
103ec530c81Sbellard 
1041b7bdbc1SStefan Hajnoczi static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
1051b7bdbc1SStefan Hajnoczi     QTAILQ_HEAD_INITIALIZER(bdrv_states);
1067ee930d0Sblueswir1 
107dc364f4cSBenoît Canet static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
108dc364f4cSBenoît Canet     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
109dc364f4cSBenoît Canet 
1108a22f02aSStefan Hajnoczi static QLIST_HEAD(, BlockDriver) bdrv_drivers =
1118a22f02aSStefan Hajnoczi     QLIST_HEAD_INITIALIZER(bdrv_drivers);
112ea2384d3Sbellard 
113ce1ffea8SJohn Snow static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
114eb852011SMarkus Armbruster /* If non-zero, use only whitelisted block drivers */
115eb852011SMarkus Armbruster static int use_bdrv_whitelist;
116eb852011SMarkus Armbruster 
1179e0b22f4SStefan Hajnoczi #ifdef _WIN32
1189e0b22f4SStefan Hajnoczi static int is_windows_drive_prefix(const char *filename)
1199e0b22f4SStefan Hajnoczi {
1209e0b22f4SStefan Hajnoczi     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
1219e0b22f4SStefan Hajnoczi              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
1229e0b22f4SStefan Hajnoczi             filename[1] == ':');
1239e0b22f4SStefan Hajnoczi }
1249e0b22f4SStefan Hajnoczi 
1259e0b22f4SStefan Hajnoczi int is_windows_drive(const char *filename)
1269e0b22f4SStefan Hajnoczi {
1279e0b22f4SStefan Hajnoczi     if (is_windows_drive_prefix(filename) &&
1289e0b22f4SStefan Hajnoczi         filename[2] == '\0')
1299e0b22f4SStefan Hajnoczi         return 1;
1309e0b22f4SStefan Hajnoczi     if (strstart(filename, "\\\\.\\", NULL) ||
1319e0b22f4SStefan Hajnoczi         strstart(filename, "//./", NULL))
1329e0b22f4SStefan Hajnoczi         return 1;
1339e0b22f4SStefan Hajnoczi     return 0;
1349e0b22f4SStefan Hajnoczi }
1359e0b22f4SStefan Hajnoczi #endif
1369e0b22f4SStefan Hajnoczi 
1370563e191SZhi Yong Wu /* throttling disk I/O limits */
138cc0681c4SBenoît Canet void bdrv_set_io_limits(BlockDriverState *bs,
139cc0681c4SBenoît Canet                         ThrottleConfig *cfg)
140cc0681c4SBenoît Canet {
141cc0681c4SBenoît Canet     int i;
142cc0681c4SBenoît Canet 
143cc0681c4SBenoît Canet     throttle_config(&bs->throttle_state, cfg);
144cc0681c4SBenoît Canet 
145cc0681c4SBenoît Canet     for (i = 0; i < 2; i++) {
146cc0681c4SBenoît Canet         qemu_co_enter_next(&bs->throttled_reqs[i]);
147cc0681c4SBenoît Canet     }
148cc0681c4SBenoît Canet }
149cc0681c4SBenoît Canet 
150cc0681c4SBenoît Canet /* this function drain all the throttled IOs */
151cc0681c4SBenoît Canet static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
152cc0681c4SBenoît Canet {
153cc0681c4SBenoît Canet     bool drained = false;
154cc0681c4SBenoît Canet     bool enabled = bs->io_limits_enabled;
155cc0681c4SBenoît Canet     int i;
156cc0681c4SBenoît Canet 
157cc0681c4SBenoît Canet     bs->io_limits_enabled = false;
158cc0681c4SBenoît Canet 
159cc0681c4SBenoît Canet     for (i = 0; i < 2; i++) {
160cc0681c4SBenoît Canet         while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
161cc0681c4SBenoît Canet             drained = true;
162cc0681c4SBenoît Canet         }
163cc0681c4SBenoît Canet     }
164cc0681c4SBenoît Canet 
165cc0681c4SBenoît Canet     bs->io_limits_enabled = enabled;
166cc0681c4SBenoît Canet 
167cc0681c4SBenoît Canet     return drained;
168cc0681c4SBenoît Canet }
169cc0681c4SBenoît Canet 
17098f90dbaSZhi Yong Wu void bdrv_io_limits_disable(BlockDriverState *bs)
17198f90dbaSZhi Yong Wu {
17298f90dbaSZhi Yong Wu     bs->io_limits_enabled = false;
17398f90dbaSZhi Yong Wu 
174cc0681c4SBenoît Canet     bdrv_start_throttled_reqs(bs);
17598f90dbaSZhi Yong Wu 
176cc0681c4SBenoît Canet     throttle_destroy(&bs->throttle_state);
17798f90dbaSZhi Yong Wu }
17898f90dbaSZhi Yong Wu 
179cc0681c4SBenoît Canet static void bdrv_throttle_read_timer_cb(void *opaque)
1800563e191SZhi Yong Wu {
1810563e191SZhi Yong Wu     BlockDriverState *bs = opaque;
182cc0681c4SBenoît Canet     qemu_co_enter_next(&bs->throttled_reqs[0]);
1830563e191SZhi Yong Wu }
1840563e191SZhi Yong Wu 
185cc0681c4SBenoît Canet static void bdrv_throttle_write_timer_cb(void *opaque)
186cc0681c4SBenoît Canet {
187cc0681c4SBenoît Canet     BlockDriverState *bs = opaque;
188cc0681c4SBenoît Canet     qemu_co_enter_next(&bs->throttled_reqs[1]);
189cc0681c4SBenoît Canet }
190cc0681c4SBenoît Canet 
191cc0681c4SBenoît Canet /* should be called before bdrv_set_io_limits if a limit is set */
1920563e191SZhi Yong Wu void bdrv_io_limits_enable(BlockDriverState *bs)
1930563e191SZhi Yong Wu {
194de50a20aSFam Zheng     int clock_type = QEMU_CLOCK_REALTIME;
195de50a20aSFam Zheng 
196de50a20aSFam Zheng     if (qtest_enabled()) {
197de50a20aSFam Zheng         /* For testing block IO throttling only */
198de50a20aSFam Zheng         clock_type = QEMU_CLOCK_VIRTUAL;
199de50a20aSFam Zheng     }
200cc0681c4SBenoît Canet     assert(!bs->io_limits_enabled);
201cc0681c4SBenoît Canet     throttle_init(&bs->throttle_state,
20213af91ebSStefan Hajnoczi                   bdrv_get_aio_context(bs),
203de50a20aSFam Zheng                   clock_type,
204cc0681c4SBenoît Canet                   bdrv_throttle_read_timer_cb,
205cc0681c4SBenoît Canet                   bdrv_throttle_write_timer_cb,
206cc0681c4SBenoît Canet                   bs);
2070563e191SZhi Yong Wu     bs->io_limits_enabled = true;
2080563e191SZhi Yong Wu }
2090563e191SZhi Yong Wu 
210cc0681c4SBenoît Canet /* This function makes an IO wait if needed
211cc0681c4SBenoît Canet  *
212cc0681c4SBenoît Canet  * @nb_sectors: the number of sectors of the IO
213cc0681c4SBenoît Canet  * @is_write:   is the IO a write
21498f90dbaSZhi Yong Wu  */
215cc0681c4SBenoît Canet static void bdrv_io_limits_intercept(BlockDriverState *bs,
216d5103588SKevin Wolf                                      unsigned int bytes,
217cc0681c4SBenoît Canet                                      bool is_write)
218cc0681c4SBenoît Canet {
219cc0681c4SBenoît Canet     /* does this io must wait */
220cc0681c4SBenoît Canet     bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
22198f90dbaSZhi Yong Wu 
222cc0681c4SBenoît Canet     /* if must wait or any request of this type throttled queue the IO */
223cc0681c4SBenoît Canet     if (must_wait ||
224cc0681c4SBenoît Canet         !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
225cc0681c4SBenoît Canet         qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
22698f90dbaSZhi Yong Wu     }
22798f90dbaSZhi Yong Wu 
228cc0681c4SBenoît Canet     /* the IO will be executed, do the accounting */
229d5103588SKevin Wolf     throttle_account(&bs->throttle_state, is_write, bytes);
230d5103588SKevin Wolf 
231cc0681c4SBenoît Canet 
232cc0681c4SBenoît Canet     /* if the next request must wait -> do nothing */
233cc0681c4SBenoît Canet     if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
234cc0681c4SBenoît Canet         return;
235cc0681c4SBenoît Canet     }
236cc0681c4SBenoît Canet 
237cc0681c4SBenoît Canet     /* else queue next request for execution */
238cc0681c4SBenoît Canet     qemu_co_queue_next(&bs->throttled_reqs[is_write]);
23998f90dbaSZhi Yong Wu }
24098f90dbaSZhi Yong Wu 
241339064d5SKevin Wolf size_t bdrv_opt_mem_align(BlockDriverState *bs)
242339064d5SKevin Wolf {
243339064d5SKevin Wolf     if (!bs || !bs->drv) {
244339064d5SKevin Wolf         /* 4k should be on the safe side */
245339064d5SKevin Wolf         return 4096;
246339064d5SKevin Wolf     }
247339064d5SKevin Wolf 
248339064d5SKevin Wolf     return bs->bl.opt_mem_alignment;
249339064d5SKevin Wolf }
250339064d5SKevin Wolf 
2519e0b22f4SStefan Hajnoczi /* check if the path starts with "<protocol>:" */
2525c98415bSMax Reitz int path_has_protocol(const char *path)
2539e0b22f4SStefan Hajnoczi {
254947995c0SPaolo Bonzini     const char *p;
255947995c0SPaolo Bonzini 
2569e0b22f4SStefan Hajnoczi #ifdef _WIN32
2579e0b22f4SStefan Hajnoczi     if (is_windows_drive(path) ||
2589e0b22f4SStefan Hajnoczi         is_windows_drive_prefix(path)) {
2599e0b22f4SStefan Hajnoczi         return 0;
2609e0b22f4SStefan Hajnoczi     }
261947995c0SPaolo Bonzini     p = path + strcspn(path, ":/\\");
262947995c0SPaolo Bonzini #else
263947995c0SPaolo Bonzini     p = path + strcspn(path, ":/");
2649e0b22f4SStefan Hajnoczi #endif
2659e0b22f4SStefan Hajnoczi 
266947995c0SPaolo Bonzini     return *p == ':';
2679e0b22f4SStefan Hajnoczi }
2689e0b22f4SStefan Hajnoczi 
26983f64091Sbellard int path_is_absolute(const char *path)
27083f64091Sbellard {
27121664424Sbellard #ifdef _WIN32
27221664424Sbellard     /* specific case for names like: "\\.\d:" */
273f53f4da9SPaolo Bonzini     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
27421664424Sbellard         return 1;
275f53f4da9SPaolo Bonzini     }
276f53f4da9SPaolo Bonzini     return (*path == '/' || *path == '\\');
2773b9f94e1Sbellard #else
278f53f4da9SPaolo Bonzini     return (*path == '/');
2793b9f94e1Sbellard #endif
28083f64091Sbellard }
28183f64091Sbellard 
28283f64091Sbellard /* if filename is absolute, just copy it to dest. Otherwise, build a
28383f64091Sbellard    path to it by considering it is relative to base_path. URL are
28483f64091Sbellard    supported. */
28583f64091Sbellard void path_combine(char *dest, int dest_size,
28683f64091Sbellard                   const char *base_path,
28783f64091Sbellard                   const char *filename)
28883f64091Sbellard {
28983f64091Sbellard     const char *p, *p1;
29083f64091Sbellard     int len;
29183f64091Sbellard 
29283f64091Sbellard     if (dest_size <= 0)
29383f64091Sbellard         return;
29483f64091Sbellard     if (path_is_absolute(filename)) {
29583f64091Sbellard         pstrcpy(dest, dest_size, filename);
29683f64091Sbellard     } else {
29783f64091Sbellard         p = strchr(base_path, ':');
29883f64091Sbellard         if (p)
29983f64091Sbellard             p++;
30083f64091Sbellard         else
30183f64091Sbellard             p = base_path;
3023b9f94e1Sbellard         p1 = strrchr(base_path, '/');
3033b9f94e1Sbellard #ifdef _WIN32
3043b9f94e1Sbellard         {
3053b9f94e1Sbellard             const char *p2;
3063b9f94e1Sbellard             p2 = strrchr(base_path, '\\');
3073b9f94e1Sbellard             if (!p1 || p2 > p1)
3083b9f94e1Sbellard                 p1 = p2;
3093b9f94e1Sbellard         }
3103b9f94e1Sbellard #endif
31183f64091Sbellard         if (p1)
31283f64091Sbellard             p1++;
31383f64091Sbellard         else
31483f64091Sbellard             p1 = base_path;
31583f64091Sbellard         if (p1 > p)
31683f64091Sbellard             p = p1;
31783f64091Sbellard         len = p - base_path;
31883f64091Sbellard         if (len > dest_size - 1)
31983f64091Sbellard             len = dest_size - 1;
32083f64091Sbellard         memcpy(dest, base_path, len);
32183f64091Sbellard         dest[len] = '\0';
32283f64091Sbellard         pstrcat(dest, dest_size, filename);
32383f64091Sbellard     }
32483f64091Sbellard }
32583f64091Sbellard 
3260a82855aSMax Reitz void bdrv_get_full_backing_filename_from_filename(const char *backed,
3270a82855aSMax Reitz                                                   const char *backing,
3289f07429eSMax Reitz                                                   char *dest, size_t sz,
3299f07429eSMax Reitz                                                   Error **errp)
3300a82855aSMax Reitz {
3319f07429eSMax Reitz     if (backing[0] == '\0' || path_has_protocol(backing) ||
3329f07429eSMax Reitz         path_is_absolute(backing))
3339f07429eSMax Reitz     {
3340a82855aSMax Reitz         pstrcpy(dest, sz, backing);
3359f07429eSMax Reitz     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
3369f07429eSMax Reitz         error_setg(errp, "Cannot use relative backing file names for '%s'",
3379f07429eSMax Reitz                    backed);
3380a82855aSMax Reitz     } else {
3390a82855aSMax Reitz         path_combine(dest, sz, backed, backing);
3400a82855aSMax Reitz     }
3410a82855aSMax Reitz }
3420a82855aSMax Reitz 
3439f07429eSMax Reitz void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
3449f07429eSMax Reitz                                     Error **errp)
345dc5a1371SPaolo Bonzini {
3469f07429eSMax Reitz     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
3479f07429eSMax Reitz 
3489f07429eSMax Reitz     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
3499f07429eSMax Reitz                                                  dest, sz, errp);
350dc5a1371SPaolo Bonzini }
351dc5a1371SPaolo Bonzini 
3525efa9d5aSAnthony Liguori void bdrv_register(BlockDriver *bdrv)
353ea2384d3Sbellard {
3548c5873d6SStefan Hajnoczi     /* Block drivers without coroutine functions need emulation */
3558c5873d6SStefan Hajnoczi     if (!bdrv->bdrv_co_readv) {
356f9f05dc5SKevin Wolf         bdrv->bdrv_co_readv = bdrv_co_readv_em;
357f9f05dc5SKevin Wolf         bdrv->bdrv_co_writev = bdrv_co_writev_em;
358f9f05dc5SKevin Wolf 
359f8c35c1dSStefan Hajnoczi         /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
360f8c35c1dSStefan Hajnoczi          * the block driver lacks aio we need to emulate that too.
361f8c35c1dSStefan Hajnoczi          */
362f9f05dc5SKevin Wolf         if (!bdrv->bdrv_aio_readv) {
36383f64091Sbellard             /* add AIO emulation layer */
364f141eafeSaliguori             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
365f141eafeSaliguori             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
36683f64091Sbellard         }
367f9f05dc5SKevin Wolf     }
368b2e12bc6SChristoph Hellwig 
3698a22f02aSStefan Hajnoczi     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
370ea2384d3Sbellard }
371b338082bSbellard 
3727f06d47eSMarkus Armbruster BlockDriverState *bdrv_new_root(void)
373fc01f7e7Sbellard {
3747f06d47eSMarkus Armbruster     BlockDriverState *bs = bdrv_new();
375e4e9986bSMarkus Armbruster 
376e4e9986bSMarkus Armbruster     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
377e4e9986bSMarkus Armbruster     return bs;
378e4e9986bSMarkus Armbruster }
379e4e9986bSMarkus Armbruster 
380e4e9986bSMarkus Armbruster BlockDriverState *bdrv_new(void)
381e4e9986bSMarkus Armbruster {
382e4e9986bSMarkus Armbruster     BlockDriverState *bs;
383e4e9986bSMarkus Armbruster     int i;
384e4e9986bSMarkus Armbruster 
3855839e53bSMarkus Armbruster     bs = g_new0(BlockDriverState, 1);
386e4654d2dSFam Zheng     QLIST_INIT(&bs->dirty_bitmaps);
387fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
388fbe40ff7SFam Zheng         QLIST_INIT(&bs->op_blockers[i]);
389fbe40ff7SFam Zheng     }
39028a7282aSLuiz Capitulino     bdrv_iostatus_disable(bs);
391d7d512f6SPaolo Bonzini     notifier_list_init(&bs->close_notifiers);
392d616b224SStefan Hajnoczi     notifier_with_return_list_init(&bs->before_write_notifiers);
393cc0681c4SBenoît Canet     qemu_co_queue_init(&bs->throttled_reqs[0]);
394cc0681c4SBenoît Canet     qemu_co_queue_init(&bs->throttled_reqs[1]);
3959fcb0251SFam Zheng     bs->refcnt = 1;
396dcd04228SStefan Hajnoczi     bs->aio_context = qemu_get_aio_context();
397d7d512f6SPaolo Bonzini 
398b338082bSbellard     return bs;
399b338082bSbellard }
400b338082bSbellard 
401d7d512f6SPaolo Bonzini void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
402d7d512f6SPaolo Bonzini {
403d7d512f6SPaolo Bonzini     notifier_list_add(&bs->close_notifiers, notify);
404d7d512f6SPaolo Bonzini }
405d7d512f6SPaolo Bonzini 
406ea2384d3Sbellard BlockDriver *bdrv_find_format(const char *format_name)
407ea2384d3Sbellard {
408ea2384d3Sbellard     BlockDriver *drv1;
4098a22f02aSStefan Hajnoczi     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
4108a22f02aSStefan Hajnoczi         if (!strcmp(drv1->format_name, format_name)) {
411ea2384d3Sbellard             return drv1;
412ea2384d3Sbellard         }
4138a22f02aSStefan Hajnoczi     }
414ea2384d3Sbellard     return NULL;
415ea2384d3Sbellard }
416ea2384d3Sbellard 
417b64ec4e4SFam Zheng static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
418eb852011SMarkus Armbruster {
419b64ec4e4SFam Zheng     static const char *whitelist_rw[] = {
420b64ec4e4SFam Zheng         CONFIG_BDRV_RW_WHITELIST
421b64ec4e4SFam Zheng     };
422b64ec4e4SFam Zheng     static const char *whitelist_ro[] = {
423b64ec4e4SFam Zheng         CONFIG_BDRV_RO_WHITELIST
424eb852011SMarkus Armbruster     };
425eb852011SMarkus Armbruster     const char **p;
426eb852011SMarkus Armbruster 
427b64ec4e4SFam Zheng     if (!whitelist_rw[0] && !whitelist_ro[0]) {
428eb852011SMarkus Armbruster         return 1;               /* no whitelist, anything goes */
429b64ec4e4SFam Zheng     }
430eb852011SMarkus Armbruster 
431b64ec4e4SFam Zheng     for (p = whitelist_rw; *p; p++) {
432eb852011SMarkus Armbruster         if (!strcmp(drv->format_name, *p)) {
433eb852011SMarkus Armbruster             return 1;
434eb852011SMarkus Armbruster         }
435eb852011SMarkus Armbruster     }
436b64ec4e4SFam Zheng     if (read_only) {
437b64ec4e4SFam Zheng         for (p = whitelist_ro; *p; p++) {
438b64ec4e4SFam Zheng             if (!strcmp(drv->format_name, *p)) {
439b64ec4e4SFam Zheng                 return 1;
440b64ec4e4SFam Zheng             }
441b64ec4e4SFam Zheng         }
442b64ec4e4SFam Zheng     }
443eb852011SMarkus Armbruster     return 0;
444eb852011SMarkus Armbruster }
445eb852011SMarkus Armbruster 
446b64ec4e4SFam Zheng BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
447b64ec4e4SFam Zheng                                           bool read_only)
448eb852011SMarkus Armbruster {
449eb852011SMarkus Armbruster     BlockDriver *drv = bdrv_find_format(format_name);
450b64ec4e4SFam Zheng     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
451eb852011SMarkus Armbruster }
452eb852011SMarkus Armbruster 
4535b7e1542SZhi Yong Wu typedef struct CreateCo {
4545b7e1542SZhi Yong Wu     BlockDriver *drv;
4555b7e1542SZhi Yong Wu     char *filename;
45683d0521aSChunyan Liu     QemuOpts *opts;
4575b7e1542SZhi Yong Wu     int ret;
458cc84d90fSMax Reitz     Error *err;
4595b7e1542SZhi Yong Wu } CreateCo;
4605b7e1542SZhi Yong Wu 
4615b7e1542SZhi Yong Wu static void coroutine_fn bdrv_create_co_entry(void *opaque)
4625b7e1542SZhi Yong Wu {
463cc84d90fSMax Reitz     Error *local_err = NULL;
464cc84d90fSMax Reitz     int ret;
465cc84d90fSMax Reitz 
4665b7e1542SZhi Yong Wu     CreateCo *cco = opaque;
4675b7e1542SZhi Yong Wu     assert(cco->drv);
4685b7e1542SZhi Yong Wu 
469c282e1fdSChunyan Liu     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
47084d18f06SMarkus Armbruster     if (local_err) {
471cc84d90fSMax Reitz         error_propagate(&cco->err, local_err);
472cc84d90fSMax Reitz     }
473cc84d90fSMax Reitz     cco->ret = ret;
4745b7e1542SZhi Yong Wu }
4755b7e1542SZhi Yong Wu 
4760e7e1989SKevin Wolf int bdrv_create(BlockDriver *drv, const char* filename,
47783d0521aSChunyan Liu                 QemuOpts *opts, Error **errp)
478ea2384d3Sbellard {
4795b7e1542SZhi Yong Wu     int ret;
4800e7e1989SKevin Wolf 
4815b7e1542SZhi Yong Wu     Coroutine *co;
4825b7e1542SZhi Yong Wu     CreateCo cco = {
4835b7e1542SZhi Yong Wu         .drv = drv,
4845b7e1542SZhi Yong Wu         .filename = g_strdup(filename),
48583d0521aSChunyan Liu         .opts = opts,
4865b7e1542SZhi Yong Wu         .ret = NOT_DONE,
487cc84d90fSMax Reitz         .err = NULL,
4885b7e1542SZhi Yong Wu     };
4895b7e1542SZhi Yong Wu 
490c282e1fdSChunyan Liu     if (!drv->bdrv_create) {
491cc84d90fSMax Reitz         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
49280168bffSLuiz Capitulino         ret = -ENOTSUP;
49380168bffSLuiz Capitulino         goto out;
4945b7e1542SZhi Yong Wu     }
4955b7e1542SZhi Yong Wu 
4965b7e1542SZhi Yong Wu     if (qemu_in_coroutine()) {
4975b7e1542SZhi Yong Wu         /* Fast-path if already in coroutine context */
4985b7e1542SZhi Yong Wu         bdrv_create_co_entry(&cco);
4995b7e1542SZhi Yong Wu     } else {
5005b7e1542SZhi Yong Wu         co = qemu_coroutine_create(bdrv_create_co_entry);
5015b7e1542SZhi Yong Wu         qemu_coroutine_enter(co, &cco);
5025b7e1542SZhi Yong Wu         while (cco.ret == NOT_DONE) {
503b47ec2c4SPaolo Bonzini             aio_poll(qemu_get_aio_context(), true);
5045b7e1542SZhi Yong Wu         }
5055b7e1542SZhi Yong Wu     }
5065b7e1542SZhi Yong Wu 
5075b7e1542SZhi Yong Wu     ret = cco.ret;
508cc84d90fSMax Reitz     if (ret < 0) {
50984d18f06SMarkus Armbruster         if (cco.err) {
510cc84d90fSMax Reitz             error_propagate(errp, cco.err);
511cc84d90fSMax Reitz         } else {
512cc84d90fSMax Reitz             error_setg_errno(errp, -ret, "Could not create image");
513cc84d90fSMax Reitz         }
514cc84d90fSMax Reitz     }
5155b7e1542SZhi Yong Wu 
51680168bffSLuiz Capitulino out:
51780168bffSLuiz Capitulino     g_free(cco.filename);
5185b7e1542SZhi Yong Wu     return ret;
519ea2384d3Sbellard }
520ea2384d3Sbellard 
521c282e1fdSChunyan Liu int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
52284a12e66SChristoph Hellwig {
52384a12e66SChristoph Hellwig     BlockDriver *drv;
524cc84d90fSMax Reitz     Error *local_err = NULL;
525cc84d90fSMax Reitz     int ret;
52684a12e66SChristoph Hellwig 
527b65a5e12SMax Reitz     drv = bdrv_find_protocol(filename, true, errp);
52884a12e66SChristoph Hellwig     if (drv == NULL) {
52916905d71SStefan Hajnoczi         return -ENOENT;
53084a12e66SChristoph Hellwig     }
53184a12e66SChristoph Hellwig 
532c282e1fdSChunyan Liu     ret = bdrv_create(drv, filename, opts, &local_err);
53384d18f06SMarkus Armbruster     if (local_err) {
534cc84d90fSMax Reitz         error_propagate(errp, local_err);
535cc84d90fSMax Reitz     }
536cc84d90fSMax Reitz     return ret;
53784a12e66SChristoph Hellwig }
53884a12e66SChristoph Hellwig 
5393baca891SKevin Wolf void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
540d34682cdSKevin Wolf {
541d34682cdSKevin Wolf     BlockDriver *drv = bs->drv;
5423baca891SKevin Wolf     Error *local_err = NULL;
543d34682cdSKevin Wolf 
544d34682cdSKevin Wolf     memset(&bs->bl, 0, sizeof(bs->bl));
545d34682cdSKevin Wolf 
546466ad822SKevin Wolf     if (!drv) {
5473baca891SKevin Wolf         return;
548466ad822SKevin Wolf     }
549466ad822SKevin Wolf 
550466ad822SKevin Wolf     /* Take some limits from the children as a default */
551466ad822SKevin Wolf     if (bs->file) {
5523baca891SKevin Wolf         bdrv_refresh_limits(bs->file, &local_err);
5533baca891SKevin Wolf         if (local_err) {
5543baca891SKevin Wolf             error_propagate(errp, local_err);
5553baca891SKevin Wolf             return;
5563baca891SKevin Wolf         }
557466ad822SKevin Wolf         bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
5582647fab5SPeter Lieven         bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
559339064d5SKevin Wolf         bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
560339064d5SKevin Wolf     } else {
561339064d5SKevin Wolf         bs->bl.opt_mem_alignment = 512;
562466ad822SKevin Wolf     }
563466ad822SKevin Wolf 
564466ad822SKevin Wolf     if (bs->backing_hd) {
5653baca891SKevin Wolf         bdrv_refresh_limits(bs->backing_hd, &local_err);
5663baca891SKevin Wolf         if (local_err) {
5673baca891SKevin Wolf             error_propagate(errp, local_err);
5683baca891SKevin Wolf             return;
5693baca891SKevin Wolf         }
570466ad822SKevin Wolf         bs->bl.opt_transfer_length =
571466ad822SKevin Wolf             MAX(bs->bl.opt_transfer_length,
572466ad822SKevin Wolf                 bs->backing_hd->bl.opt_transfer_length);
5732647fab5SPeter Lieven         bs->bl.max_transfer_length =
5742647fab5SPeter Lieven             MIN_NON_ZERO(bs->bl.max_transfer_length,
5752647fab5SPeter Lieven                          bs->backing_hd->bl.max_transfer_length);
576339064d5SKevin Wolf         bs->bl.opt_mem_alignment =
577339064d5SKevin Wolf             MAX(bs->bl.opt_mem_alignment,
578339064d5SKevin Wolf                 bs->backing_hd->bl.opt_mem_alignment);
579466ad822SKevin Wolf     }
580466ad822SKevin Wolf 
581466ad822SKevin Wolf     /* Then let the driver override it */
582466ad822SKevin Wolf     if (drv->bdrv_refresh_limits) {
5833baca891SKevin Wolf         drv->bdrv_refresh_limits(bs, errp);
584d34682cdSKevin Wolf     }
585d34682cdSKevin Wolf }
586d34682cdSKevin Wolf 
587892b7de8SEkaterina Tumanova /**
588892b7de8SEkaterina Tumanova  * Try to get @bs's logical and physical block size.
589892b7de8SEkaterina Tumanova  * On success, store them in @bsz struct and return 0.
590892b7de8SEkaterina Tumanova  * On failure return -errno.
591892b7de8SEkaterina Tumanova  * @bs must not be empty.
592892b7de8SEkaterina Tumanova  */
593892b7de8SEkaterina Tumanova int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
594892b7de8SEkaterina Tumanova {
595892b7de8SEkaterina Tumanova     BlockDriver *drv = bs->drv;
596892b7de8SEkaterina Tumanova 
597892b7de8SEkaterina Tumanova     if (drv && drv->bdrv_probe_blocksizes) {
598892b7de8SEkaterina Tumanova         return drv->bdrv_probe_blocksizes(bs, bsz);
599892b7de8SEkaterina Tumanova     }
600892b7de8SEkaterina Tumanova 
601892b7de8SEkaterina Tumanova     return -ENOTSUP;
602892b7de8SEkaterina Tumanova }
603892b7de8SEkaterina Tumanova 
604892b7de8SEkaterina Tumanova /**
605892b7de8SEkaterina Tumanova  * Try to get @bs's geometry (cyls, heads, sectors).
606892b7de8SEkaterina Tumanova  * On success, store them in @geo struct and return 0.
607892b7de8SEkaterina Tumanova  * On failure return -errno.
608892b7de8SEkaterina Tumanova  * @bs must not be empty.
609892b7de8SEkaterina Tumanova  */
610892b7de8SEkaterina Tumanova int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
611892b7de8SEkaterina Tumanova {
612892b7de8SEkaterina Tumanova     BlockDriver *drv = bs->drv;
613892b7de8SEkaterina Tumanova 
614892b7de8SEkaterina Tumanova     if (drv && drv->bdrv_probe_geometry) {
615892b7de8SEkaterina Tumanova         return drv->bdrv_probe_geometry(bs, geo);
616892b7de8SEkaterina Tumanova     }
617892b7de8SEkaterina Tumanova 
618892b7de8SEkaterina Tumanova     return -ENOTSUP;
619892b7de8SEkaterina Tumanova }
620892b7de8SEkaterina Tumanova 
621eba25057SJim Meyering /*
622eba25057SJim Meyering  * Create a uniquely-named empty temporary file.
623eba25057SJim Meyering  * Return 0 upon success, otherwise a negative errno value.
624eba25057SJim Meyering  */
625eba25057SJim Meyering int get_tmp_filename(char *filename, int size)
626eba25057SJim Meyering {
627d5249393Sbellard #ifdef _WIN32
6283b9f94e1Sbellard     char temp_dir[MAX_PATH];
629eba25057SJim Meyering     /* GetTempFileName requires that its output buffer (4th param)
630eba25057SJim Meyering        have length MAX_PATH or greater.  */
631eba25057SJim Meyering     assert(size >= MAX_PATH);
632eba25057SJim Meyering     return (GetTempPath(MAX_PATH, temp_dir)
633eba25057SJim Meyering             && GetTempFileName(temp_dir, "qem", 0, filename)
634eba25057SJim Meyering             ? 0 : -GetLastError());
635d5249393Sbellard #else
636ea2384d3Sbellard     int fd;
6377ccfb2ebSblueswir1     const char *tmpdir;
6380badc1eeSaurel32     tmpdir = getenv("TMPDIR");
63969bef793SAmit Shah     if (!tmpdir) {
64069bef793SAmit Shah         tmpdir = "/var/tmp";
64169bef793SAmit Shah     }
642eba25057SJim Meyering     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
643eba25057SJim Meyering         return -EOVERFLOW;
644ea2384d3Sbellard     }
645eba25057SJim Meyering     fd = mkstemp(filename);
646fe235a06SDunrong Huang     if (fd < 0) {
647fe235a06SDunrong Huang         return -errno;
648fe235a06SDunrong Huang     }
649fe235a06SDunrong Huang     if (close(fd) != 0) {
650fe235a06SDunrong Huang         unlink(filename);
651eba25057SJim Meyering         return -errno;
652eba25057SJim Meyering     }
653eba25057SJim Meyering     return 0;
654d5249393Sbellard #endif
655eba25057SJim Meyering }
656ea2384d3Sbellard 
657f3a5d3f8SChristoph Hellwig /*
658f3a5d3f8SChristoph Hellwig  * Detect host devices. By convention, /dev/cdrom[N] is always
659f3a5d3f8SChristoph Hellwig  * recognized as a host CDROM.
660f3a5d3f8SChristoph Hellwig  */
661f3a5d3f8SChristoph Hellwig static BlockDriver *find_hdev_driver(const char *filename)
662f3a5d3f8SChristoph Hellwig {
663508c7cb3SChristoph Hellwig     int score_max = 0, score;
664508c7cb3SChristoph Hellwig     BlockDriver *drv = NULL, *d;
665f3a5d3f8SChristoph Hellwig 
6668a22f02aSStefan Hajnoczi     QLIST_FOREACH(d, &bdrv_drivers, list) {
667508c7cb3SChristoph Hellwig         if (d->bdrv_probe_device) {
668508c7cb3SChristoph Hellwig             score = d->bdrv_probe_device(filename);
669508c7cb3SChristoph Hellwig             if (score > score_max) {
670508c7cb3SChristoph Hellwig                 score_max = score;
671508c7cb3SChristoph Hellwig                 drv = d;
672f3a5d3f8SChristoph Hellwig             }
673508c7cb3SChristoph Hellwig         }
674f3a5d3f8SChristoph Hellwig     }
675f3a5d3f8SChristoph Hellwig 
676508c7cb3SChristoph Hellwig     return drv;
677f3a5d3f8SChristoph Hellwig }
678f3a5d3f8SChristoph Hellwig 
67998289620SKevin Wolf BlockDriver *bdrv_find_protocol(const char *filename,
680b65a5e12SMax Reitz                                 bool allow_protocol_prefix,
681b65a5e12SMax Reitz                                 Error **errp)
68284a12e66SChristoph Hellwig {
68384a12e66SChristoph Hellwig     BlockDriver *drv1;
68484a12e66SChristoph Hellwig     char protocol[128];
68584a12e66SChristoph Hellwig     int len;
68684a12e66SChristoph Hellwig     const char *p;
68784a12e66SChristoph Hellwig 
68866f82ceeSKevin Wolf     /* TODO Drivers without bdrv_file_open must be specified explicitly */
68966f82ceeSKevin Wolf 
69039508e7aSChristoph Hellwig     /*
69139508e7aSChristoph Hellwig      * XXX(hch): we really should not let host device detection
69239508e7aSChristoph Hellwig      * override an explicit protocol specification, but moving this
69339508e7aSChristoph Hellwig      * later breaks access to device names with colons in them.
69439508e7aSChristoph Hellwig      * Thanks to the brain-dead persistent naming schemes on udev-
69539508e7aSChristoph Hellwig      * based Linux systems those actually are quite common.
69639508e7aSChristoph Hellwig      */
69784a12e66SChristoph Hellwig     drv1 = find_hdev_driver(filename);
69839508e7aSChristoph Hellwig     if (drv1) {
69984a12e66SChristoph Hellwig         return drv1;
70084a12e66SChristoph Hellwig     }
70139508e7aSChristoph Hellwig 
70298289620SKevin Wolf     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
703ef810437SMax Reitz         return &bdrv_file;
70439508e7aSChristoph Hellwig     }
70598289620SKevin Wolf 
7069e0b22f4SStefan Hajnoczi     p = strchr(filename, ':');
7079e0b22f4SStefan Hajnoczi     assert(p != NULL);
70884a12e66SChristoph Hellwig     len = p - filename;
70984a12e66SChristoph Hellwig     if (len > sizeof(protocol) - 1)
71084a12e66SChristoph Hellwig         len = sizeof(protocol) - 1;
71184a12e66SChristoph Hellwig     memcpy(protocol, filename, len);
71284a12e66SChristoph Hellwig     protocol[len] = '\0';
71384a12e66SChristoph Hellwig     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
71484a12e66SChristoph Hellwig         if (drv1->protocol_name &&
71584a12e66SChristoph Hellwig             !strcmp(drv1->protocol_name, protocol)) {
71684a12e66SChristoph Hellwig             return drv1;
71784a12e66SChristoph Hellwig         }
71884a12e66SChristoph Hellwig     }
719b65a5e12SMax Reitz 
720b65a5e12SMax Reitz     error_setg(errp, "Unknown protocol '%s'", protocol);
72184a12e66SChristoph Hellwig     return NULL;
72284a12e66SChristoph Hellwig }
72384a12e66SChristoph Hellwig 
724c6684249SMarkus Armbruster /*
725c6684249SMarkus Armbruster  * Guess image format by probing its contents.
726c6684249SMarkus Armbruster  * This is not a good idea when your image is raw (CVE-2008-2004), but
727c6684249SMarkus Armbruster  * we do it anyway for backward compatibility.
728c6684249SMarkus Armbruster  *
729c6684249SMarkus Armbruster  * @buf         contains the image's first @buf_size bytes.
7307cddd372SKevin Wolf  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
7317cddd372SKevin Wolf  *              but can be smaller if the image file is smaller)
732c6684249SMarkus Armbruster  * @filename    is its filename.
733c6684249SMarkus Armbruster  *
734c6684249SMarkus Armbruster  * For all block drivers, call the bdrv_probe() method to get its
735c6684249SMarkus Armbruster  * probing score.
736c6684249SMarkus Armbruster  * Return the first block driver with the highest probing score.
737c6684249SMarkus Armbruster  */
73838f3ef57SKevin Wolf BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
739c6684249SMarkus Armbruster                             const char *filename)
740c6684249SMarkus Armbruster {
741c6684249SMarkus Armbruster     int score_max = 0, score;
742c6684249SMarkus Armbruster     BlockDriver *drv = NULL, *d;
743c6684249SMarkus Armbruster 
744c6684249SMarkus Armbruster     QLIST_FOREACH(d, &bdrv_drivers, list) {
745c6684249SMarkus Armbruster         if (d->bdrv_probe) {
746c6684249SMarkus Armbruster             score = d->bdrv_probe(buf, buf_size, filename);
747c6684249SMarkus Armbruster             if (score > score_max) {
748c6684249SMarkus Armbruster                 score_max = score;
749c6684249SMarkus Armbruster                 drv = d;
750c6684249SMarkus Armbruster             }
751c6684249SMarkus Armbruster         }
752c6684249SMarkus Armbruster     }
753c6684249SMarkus Armbruster 
754c6684249SMarkus Armbruster     return drv;
755c6684249SMarkus Armbruster }
756c6684249SMarkus Armbruster 
757f500a6d3SKevin Wolf static int find_image_format(BlockDriverState *bs, const char *filename,
75834b5d2c6SMax Reitz                              BlockDriver **pdrv, Error **errp)
759ea2384d3Sbellard {
760c6684249SMarkus Armbruster     BlockDriver *drv;
7617cddd372SKevin Wolf     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
762f500a6d3SKevin Wolf     int ret = 0;
763f8ea0b00SNicholas Bellinger 
76408a00559SKevin Wolf     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
7658e895599SPaolo Bonzini     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
766ef810437SMax Reitz         *pdrv = &bdrv_raw;
767c98ac35dSStefan Weil         return ret;
7681a396859SNicholas A. Bellinger     }
769f8ea0b00SNicholas Bellinger 
77083f64091Sbellard     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
771ea2384d3Sbellard     if (ret < 0) {
77234b5d2c6SMax Reitz         error_setg_errno(errp, -ret, "Could not read image for determining its "
77334b5d2c6SMax Reitz                          "format");
774c98ac35dSStefan Weil         *pdrv = NULL;
775c98ac35dSStefan Weil         return ret;
776ea2384d3Sbellard     }
777ea2384d3Sbellard 
778c6684249SMarkus Armbruster     drv = bdrv_probe_all(buf, ret, filename);
779c98ac35dSStefan Weil     if (!drv) {
78034b5d2c6SMax Reitz         error_setg(errp, "Could not determine image format: No compatible "
78134b5d2c6SMax Reitz                    "driver found");
782c98ac35dSStefan Weil         ret = -ENOENT;
783c98ac35dSStefan Weil     }
784c98ac35dSStefan Weil     *pdrv = drv;
785c98ac35dSStefan Weil     return ret;
786ea2384d3Sbellard }
787ea2384d3Sbellard 
78851762288SStefan Hajnoczi /**
78951762288SStefan Hajnoczi  * Set the current 'total_sectors' value
79065a9bb25SMarkus Armbruster  * Return 0 on success, -errno on error.
79151762288SStefan Hajnoczi  */
79251762288SStefan Hajnoczi static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
79351762288SStefan Hajnoczi {
79451762288SStefan Hajnoczi     BlockDriver *drv = bs->drv;
79551762288SStefan Hajnoczi 
796396759adSNicholas Bellinger     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
797396759adSNicholas Bellinger     if (bs->sg)
798396759adSNicholas Bellinger         return 0;
799396759adSNicholas Bellinger 
80051762288SStefan Hajnoczi     /* query actual device if possible, otherwise just trust the hint */
80151762288SStefan Hajnoczi     if (drv->bdrv_getlength) {
80251762288SStefan Hajnoczi         int64_t length = drv->bdrv_getlength(bs);
80351762288SStefan Hajnoczi         if (length < 0) {
80451762288SStefan Hajnoczi             return length;
80551762288SStefan Hajnoczi         }
8067e382003SFam Zheng         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
80751762288SStefan Hajnoczi     }
80851762288SStefan Hajnoczi 
80951762288SStefan Hajnoczi     bs->total_sectors = hint;
81051762288SStefan Hajnoczi     return 0;
81151762288SStefan Hajnoczi }
81251762288SStefan Hajnoczi 
813c3993cdcSStefan Hajnoczi /**
8149e8f1835SPaolo Bonzini  * Set open flags for a given discard mode
8159e8f1835SPaolo Bonzini  *
8169e8f1835SPaolo Bonzini  * Return 0 on success, -1 if the discard mode was invalid.
8179e8f1835SPaolo Bonzini  */
8189e8f1835SPaolo Bonzini int bdrv_parse_discard_flags(const char *mode, int *flags)
8199e8f1835SPaolo Bonzini {
8209e8f1835SPaolo Bonzini     *flags &= ~BDRV_O_UNMAP;
8219e8f1835SPaolo Bonzini 
8229e8f1835SPaolo Bonzini     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
8239e8f1835SPaolo Bonzini         /* do nothing */
8249e8f1835SPaolo Bonzini     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
8259e8f1835SPaolo Bonzini         *flags |= BDRV_O_UNMAP;
8269e8f1835SPaolo Bonzini     } else {
8279e8f1835SPaolo Bonzini         return -1;
8289e8f1835SPaolo Bonzini     }
8299e8f1835SPaolo Bonzini 
8309e8f1835SPaolo Bonzini     return 0;
8319e8f1835SPaolo Bonzini }
8329e8f1835SPaolo Bonzini 
8339e8f1835SPaolo Bonzini /**
834c3993cdcSStefan Hajnoczi  * Set open flags for a given cache mode
835c3993cdcSStefan Hajnoczi  *
836c3993cdcSStefan Hajnoczi  * Return 0 on success, -1 if the cache mode was invalid.
837c3993cdcSStefan Hajnoczi  */
838c3993cdcSStefan Hajnoczi int bdrv_parse_cache_flags(const char *mode, int *flags)
839c3993cdcSStefan Hajnoczi {
840c3993cdcSStefan Hajnoczi     *flags &= ~BDRV_O_CACHE_MASK;
841c3993cdcSStefan Hajnoczi 
842c3993cdcSStefan Hajnoczi     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
843c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
84492196b2fSStefan Hajnoczi     } else if (!strcmp(mode, "directsync")) {
84592196b2fSStefan Hajnoczi         *flags |= BDRV_O_NOCACHE;
846c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "writeback")) {
847c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_CACHE_WB;
848c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "unsafe")) {
849c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_CACHE_WB;
850c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_NO_FLUSH;
851c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "writethrough")) {
852c3993cdcSStefan Hajnoczi         /* this is the default */
853c3993cdcSStefan Hajnoczi     } else {
854c3993cdcSStefan Hajnoczi         return -1;
855c3993cdcSStefan Hajnoczi     }
856c3993cdcSStefan Hajnoczi 
857c3993cdcSStefan Hajnoczi     return 0;
858c3993cdcSStefan Hajnoczi }
859c3993cdcSStefan Hajnoczi 
86053fec9d3SStefan Hajnoczi /**
86153fec9d3SStefan Hajnoczi  * The copy-on-read flag is actually a reference count so multiple users may
86253fec9d3SStefan Hajnoczi  * use the feature without worrying about clobbering its previous state.
86353fec9d3SStefan Hajnoczi  * Copy-on-read stays enabled until all users have called to disable it.
86453fec9d3SStefan Hajnoczi  */
86553fec9d3SStefan Hajnoczi void bdrv_enable_copy_on_read(BlockDriverState *bs)
86653fec9d3SStefan Hajnoczi {
86753fec9d3SStefan Hajnoczi     bs->copy_on_read++;
86853fec9d3SStefan Hajnoczi }
86953fec9d3SStefan Hajnoczi 
87053fec9d3SStefan Hajnoczi void bdrv_disable_copy_on_read(BlockDriverState *bs)
87153fec9d3SStefan Hajnoczi {
87253fec9d3SStefan Hajnoczi     assert(bs->copy_on_read > 0);
87353fec9d3SStefan Hajnoczi     bs->copy_on_read--;
87453fec9d3SStefan Hajnoczi }
87553fec9d3SStefan Hajnoczi 
8760b50cc88SKevin Wolf /*
877b1e6fc08SKevin Wolf  * Returns the flags that a temporary snapshot should get, based on the
878b1e6fc08SKevin Wolf  * originally requested flags (the originally requested image will have flags
879b1e6fc08SKevin Wolf  * like a backing file)
880b1e6fc08SKevin Wolf  */
881b1e6fc08SKevin Wolf static int bdrv_temp_snapshot_flags(int flags)
882b1e6fc08SKevin Wolf {
883b1e6fc08SKevin Wolf     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
884b1e6fc08SKevin Wolf }
885b1e6fc08SKevin Wolf 
886b1e6fc08SKevin Wolf /*
8870b50cc88SKevin Wolf  * Returns the flags that bs->file should get, based on the given flags for
8880b50cc88SKevin Wolf  * the parent BDS
8890b50cc88SKevin Wolf  */
8900b50cc88SKevin Wolf static int bdrv_inherited_flags(int flags)
8910b50cc88SKevin Wolf {
8920b50cc88SKevin Wolf     /* Enable protocol handling, disable format probing for bs->file */
8930b50cc88SKevin Wolf     flags |= BDRV_O_PROTOCOL;
8940b50cc88SKevin Wolf 
8950b50cc88SKevin Wolf     /* Our block drivers take care to send flushes and respect unmap policy,
8960b50cc88SKevin Wolf      * so we can enable both unconditionally on lower layers. */
8970b50cc88SKevin Wolf     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
8980b50cc88SKevin Wolf 
8990b50cc88SKevin Wolf     /* Clear flags that only apply to the top layer */
9005669b44dSKevin Wolf     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
9010b50cc88SKevin Wolf 
9020b50cc88SKevin Wolf     return flags;
9030b50cc88SKevin Wolf }
9040b50cc88SKevin Wolf 
905317fc44eSKevin Wolf /*
906317fc44eSKevin Wolf  * Returns the flags that bs->backing_hd should get, based on the given flags
907317fc44eSKevin Wolf  * for the parent BDS
908317fc44eSKevin Wolf  */
909317fc44eSKevin Wolf static int bdrv_backing_flags(int flags)
910317fc44eSKevin Wolf {
911317fc44eSKevin Wolf     /* backing files always opened read-only */
912317fc44eSKevin Wolf     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
913317fc44eSKevin Wolf 
914317fc44eSKevin Wolf     /* snapshot=on is handled on the top layer */
9158bfea15dSKevin Wolf     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
916317fc44eSKevin Wolf 
917317fc44eSKevin Wolf     return flags;
918317fc44eSKevin Wolf }
919317fc44eSKevin Wolf 
9207b272452SKevin Wolf static int bdrv_open_flags(BlockDriverState *bs, int flags)
9217b272452SKevin Wolf {
9227b272452SKevin Wolf     int open_flags = flags | BDRV_O_CACHE_WB;
9237b272452SKevin Wolf 
9247b272452SKevin Wolf     /*
9257b272452SKevin Wolf      * Clear flags that are internal to the block layer before opening the
9267b272452SKevin Wolf      * image.
9277b272452SKevin Wolf      */
92820cca275SKevin Wolf     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
9297b272452SKevin Wolf 
9307b272452SKevin Wolf     /*
9317b272452SKevin Wolf      * Snapshots should be writable.
9327b272452SKevin Wolf      */
9338bfea15dSKevin Wolf     if (flags & BDRV_O_TEMPORARY) {
9347b272452SKevin Wolf         open_flags |= BDRV_O_RDWR;
9357b272452SKevin Wolf     }
9367b272452SKevin Wolf 
9377b272452SKevin Wolf     return open_flags;
9387b272452SKevin Wolf }
9397b272452SKevin Wolf 
940636ea370SKevin Wolf static void bdrv_assign_node_name(BlockDriverState *bs,
9416913c0c2SBenoît Canet                                   const char *node_name,
9426913c0c2SBenoît Canet                                   Error **errp)
9436913c0c2SBenoît Canet {
9446913c0c2SBenoît Canet     if (!node_name) {
945636ea370SKevin Wolf         return;
9466913c0c2SBenoît Canet     }
9476913c0c2SBenoît Canet 
9489aebf3b8SKevin Wolf     /* Check for empty string or invalid characters */
949f5bebbbbSMarkus Armbruster     if (!id_wellformed(node_name)) {
9509aebf3b8SKevin Wolf         error_setg(errp, "Invalid node name");
951636ea370SKevin Wolf         return;
9526913c0c2SBenoît Canet     }
9536913c0c2SBenoît Canet 
9540c5e94eeSBenoît Canet     /* takes care of avoiding namespaces collisions */
9557f06d47eSMarkus Armbruster     if (blk_by_name(node_name)) {
9560c5e94eeSBenoît Canet         error_setg(errp, "node-name=%s is conflicting with a device id",
9570c5e94eeSBenoît Canet                    node_name);
958636ea370SKevin Wolf         return;
9590c5e94eeSBenoît Canet     }
9600c5e94eeSBenoît Canet 
9616913c0c2SBenoît Canet     /* takes care of avoiding duplicates node names */
9626913c0c2SBenoît Canet     if (bdrv_find_node(node_name)) {
9636913c0c2SBenoît Canet         error_setg(errp, "Duplicate node name");
964636ea370SKevin Wolf         return;
9656913c0c2SBenoît Canet     }
9666913c0c2SBenoît Canet 
9676913c0c2SBenoît Canet     /* copy node name into the bs and insert it into the graph list */
9686913c0c2SBenoît Canet     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
9696913c0c2SBenoît Canet     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
9706913c0c2SBenoît Canet }
9716913c0c2SBenoît Canet 
972b6ce07aaSKevin Wolf /*
97357915332SKevin Wolf  * Common part for opening disk images and files
974b6ad491aSKevin Wolf  *
975b6ad491aSKevin Wolf  * Removes all processed options from *options.
97657915332SKevin Wolf  */
977f500a6d3SKevin Wolf static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
97834b5d2c6SMax Reitz     QDict *options, int flags, BlockDriver *drv, Error **errp)
97957915332SKevin Wolf {
98057915332SKevin Wolf     int ret, open_flags;
981035fccdfSKevin Wolf     const char *filename;
9826913c0c2SBenoît Canet     const char *node_name = NULL;
98334b5d2c6SMax Reitz     Error *local_err = NULL;
98457915332SKevin Wolf 
98557915332SKevin Wolf     assert(drv != NULL);
9866405875cSPaolo Bonzini     assert(bs->file == NULL);
987707ff828SKevin Wolf     assert(options != NULL && bs->options != options);
98857915332SKevin Wolf 
98945673671SKevin Wolf     if (file != NULL) {
99045673671SKevin Wolf         filename = file->filename;
99145673671SKevin Wolf     } else {
99245673671SKevin Wolf         filename = qdict_get_try_str(options, "filename");
99345673671SKevin Wolf     }
99445673671SKevin Wolf 
995765003dbSKevin Wolf     if (drv->bdrv_needs_filename && !filename) {
996765003dbSKevin Wolf         error_setg(errp, "The '%s' block driver requires a file name",
997765003dbSKevin Wolf                    drv->format_name);
998765003dbSKevin Wolf         return -EINVAL;
999765003dbSKevin Wolf     }
1000765003dbSKevin Wolf 
100145673671SKevin Wolf     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
100228dcee10SStefan Hajnoczi 
10036913c0c2SBenoît Canet     node_name = qdict_get_try_str(options, "node-name");
1004636ea370SKevin Wolf     bdrv_assign_node_name(bs, node_name, &local_err);
10050fb6395cSMarkus Armbruster     if (local_err) {
1006636ea370SKevin Wolf         error_propagate(errp, local_err);
1007636ea370SKevin Wolf         return -EINVAL;
10086913c0c2SBenoît Canet     }
10096913c0c2SBenoît Canet     qdict_del(options, "node-name");
10106913c0c2SBenoît Canet 
10115d186eb0SKevin Wolf     /* bdrv_open() with directly using a protocol as drv. This layer is already
10125d186eb0SKevin Wolf      * opened, so assign it to bs (while file becomes a closed BlockDriverState)
10135d186eb0SKevin Wolf      * and return immediately. */
10145d186eb0SKevin Wolf     if (file != NULL && drv->bdrv_file_open) {
10155d186eb0SKevin Wolf         bdrv_swap(file, bs);
10165d186eb0SKevin Wolf         return 0;
10175d186eb0SKevin Wolf     }
10185d186eb0SKevin Wolf 
101957915332SKevin Wolf     bs->open_flags = flags;
10201b7fd729SPaolo Bonzini     bs->guest_block_size = 512;
1021c25f53b0SPaolo Bonzini     bs->request_alignment = 512;
10220d51b4deSAsias He     bs->zero_beyond_eof = true;
1023b64ec4e4SFam Zheng     open_flags = bdrv_open_flags(bs, flags);
1024b64ec4e4SFam Zheng     bs->read_only = !(open_flags & BDRV_O_RDWR);
1025b64ec4e4SFam Zheng 
1026b64ec4e4SFam Zheng     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
10278f94a6e4SKevin Wolf         error_setg(errp,
10288f94a6e4SKevin Wolf                    !bs->read_only && bdrv_is_whitelisted(drv, true)
10298f94a6e4SKevin Wolf                         ? "Driver '%s' can only be used for read-only devices"
10308f94a6e4SKevin Wolf                         : "Driver '%s' is not whitelisted",
10318f94a6e4SKevin Wolf                    drv->format_name);
1032b64ec4e4SFam Zheng         return -ENOTSUP;
1033b64ec4e4SFam Zheng     }
103457915332SKevin Wolf 
103553fec9d3SStefan Hajnoczi     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
10360ebd24e0SKevin Wolf     if (flags & BDRV_O_COPY_ON_READ) {
10370ebd24e0SKevin Wolf         if (!bs->read_only) {
103853fec9d3SStefan Hajnoczi             bdrv_enable_copy_on_read(bs);
10390ebd24e0SKevin Wolf         } else {
10400ebd24e0SKevin Wolf             error_setg(errp, "Can't use copy-on-read on read-only device");
10410ebd24e0SKevin Wolf             return -EINVAL;
10420ebd24e0SKevin Wolf         }
104353fec9d3SStefan Hajnoczi     }
104453fec9d3SStefan Hajnoczi 
1045c2ad1b0cSKevin Wolf     if (filename != NULL) {
104657915332SKevin Wolf         pstrcpy(bs->filename, sizeof(bs->filename), filename);
1047c2ad1b0cSKevin Wolf     } else {
1048c2ad1b0cSKevin Wolf         bs->filename[0] = '\0';
1049c2ad1b0cSKevin Wolf     }
105091af7014SMax Reitz     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
105157915332SKevin Wolf 
105257915332SKevin Wolf     bs->drv = drv;
10537267c094SAnthony Liguori     bs->opaque = g_malloc0(drv->instance_size);
105457915332SKevin Wolf 
105503f541bdSStefan Hajnoczi     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
1056e7c63796SStefan Hajnoczi 
105766f82ceeSKevin Wolf     /* Open the image, either directly or using a protocol */
105866f82ceeSKevin Wolf     if (drv->bdrv_file_open) {
10595d186eb0SKevin Wolf         assert(file == NULL);
1060030be321SBenoît Canet         assert(!drv->bdrv_needs_filename || filename != NULL);
106134b5d2c6SMax Reitz         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1062f500a6d3SKevin Wolf     } else {
10632af5ef70SKevin Wolf         if (file == NULL) {
106434b5d2c6SMax Reitz             error_setg(errp, "Can't use '%s' as a block driver for the "
106534b5d2c6SMax Reitz                        "protocol level", drv->format_name);
10662af5ef70SKevin Wolf             ret = -EINVAL;
10672af5ef70SKevin Wolf             goto free_and_fail;
10682af5ef70SKevin Wolf         }
1069f500a6d3SKevin Wolf         bs->file = file;
107034b5d2c6SMax Reitz         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
107166f82ceeSKevin Wolf     }
107266f82ceeSKevin Wolf 
107357915332SKevin Wolf     if (ret < 0) {
107484d18f06SMarkus Armbruster         if (local_err) {
107534b5d2c6SMax Reitz             error_propagate(errp, local_err);
10762fa9aa59SDunrong Huang         } else if (bs->filename[0]) {
10772fa9aa59SDunrong Huang             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
107834b5d2c6SMax Reitz         } else {
107934b5d2c6SMax Reitz             error_setg_errno(errp, -ret, "Could not open image");
108034b5d2c6SMax Reitz         }
108157915332SKevin Wolf         goto free_and_fail;
108257915332SKevin Wolf     }
108357915332SKevin Wolf 
1084a1f688f4SMarkus Armbruster     if (bs->encrypted) {
1085a1f688f4SMarkus Armbruster         error_report("Encrypted images are deprecated");
1086a1f688f4SMarkus Armbruster         error_printf("Support for them will be removed in a future release.\n"
1087a1f688f4SMarkus Armbruster                      "You can use 'qemu-img convert' to convert your image"
1088a1f688f4SMarkus Armbruster                      " to an unencrypted one.\n");
1089a1f688f4SMarkus Armbruster     }
1090a1f688f4SMarkus Armbruster 
109151762288SStefan Hajnoczi     ret = refresh_total_sectors(bs, bs->total_sectors);
109251762288SStefan Hajnoczi     if (ret < 0) {
109334b5d2c6SMax Reitz         error_setg_errno(errp, -ret, "Could not refresh total sector count");
109451762288SStefan Hajnoczi         goto free_and_fail;
109557915332SKevin Wolf     }
109651762288SStefan Hajnoczi 
10973baca891SKevin Wolf     bdrv_refresh_limits(bs, &local_err);
10983baca891SKevin Wolf     if (local_err) {
10993baca891SKevin Wolf         error_propagate(errp, local_err);
11003baca891SKevin Wolf         ret = -EINVAL;
11013baca891SKevin Wolf         goto free_and_fail;
11023baca891SKevin Wolf     }
11033baca891SKevin Wolf 
1104c25f53b0SPaolo Bonzini     assert(bdrv_opt_mem_align(bs) != 0);
110547ea2de2SKevin Wolf     assert((bs->request_alignment != 0) || bs->sg);
110657915332SKevin Wolf     return 0;
110757915332SKevin Wolf 
110857915332SKevin Wolf free_and_fail:
110966f82ceeSKevin Wolf     bs->file = NULL;
11107267c094SAnthony Liguori     g_free(bs->opaque);
111157915332SKevin Wolf     bs->opaque = NULL;
111257915332SKevin Wolf     bs->drv = NULL;
111357915332SKevin Wolf     return ret;
111457915332SKevin Wolf }
111557915332SKevin Wolf 
11165e5c4f63SKevin Wolf static QDict *parse_json_filename(const char *filename, Error **errp)
11175e5c4f63SKevin Wolf {
11185e5c4f63SKevin Wolf     QObject *options_obj;
11195e5c4f63SKevin Wolf     QDict *options;
11205e5c4f63SKevin Wolf     int ret;
11215e5c4f63SKevin Wolf 
11225e5c4f63SKevin Wolf     ret = strstart(filename, "json:", &filename);
11235e5c4f63SKevin Wolf     assert(ret);
11245e5c4f63SKevin Wolf 
11255e5c4f63SKevin Wolf     options_obj = qobject_from_json(filename);
11265e5c4f63SKevin Wolf     if (!options_obj) {
11275e5c4f63SKevin Wolf         error_setg(errp, "Could not parse the JSON options");
11285e5c4f63SKevin Wolf         return NULL;
11295e5c4f63SKevin Wolf     }
11305e5c4f63SKevin Wolf 
11315e5c4f63SKevin Wolf     if (qobject_type(options_obj) != QTYPE_QDICT) {
11325e5c4f63SKevin Wolf         qobject_decref(options_obj);
11335e5c4f63SKevin Wolf         error_setg(errp, "Invalid JSON object given");
11345e5c4f63SKevin Wolf         return NULL;
11355e5c4f63SKevin Wolf     }
11365e5c4f63SKevin Wolf 
11375e5c4f63SKevin Wolf     options = qobject_to_qdict(options_obj);
11385e5c4f63SKevin Wolf     qdict_flatten(options);
11395e5c4f63SKevin Wolf 
11405e5c4f63SKevin Wolf     return options;
11415e5c4f63SKevin Wolf }
11425e5c4f63SKevin Wolf 
114357915332SKevin Wolf /*
1144f54120ffSKevin Wolf  * Fills in default options for opening images and converts the legacy
1145f54120ffSKevin Wolf  * filename/flags pair to option QDict entries.
1146f54120ffSKevin Wolf  */
11475e5c4f63SKevin Wolf static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
114817b005f1SKevin Wolf                              BlockDriver *drv, Error **errp)
1149f54120ffSKevin Wolf {
11505e5c4f63SKevin Wolf     const char *filename = *pfilename;
1151f54120ffSKevin Wolf     const char *drvname;
1152462f5bcfSKevin Wolf     bool protocol = flags & BDRV_O_PROTOCOL;
1153f54120ffSKevin Wolf     bool parse_filename = false;
1154f54120ffSKevin Wolf     Error *local_err = NULL;
1155f54120ffSKevin Wolf 
11565e5c4f63SKevin Wolf     /* Parse json: pseudo-protocol */
11575e5c4f63SKevin Wolf     if (filename && g_str_has_prefix(filename, "json:")) {
11585e5c4f63SKevin Wolf         QDict *json_options = parse_json_filename(filename, &local_err);
11595e5c4f63SKevin Wolf         if (local_err) {
11605e5c4f63SKevin Wolf             error_propagate(errp, local_err);
11615e5c4f63SKevin Wolf             return -EINVAL;
11625e5c4f63SKevin Wolf         }
11635e5c4f63SKevin Wolf 
11645e5c4f63SKevin Wolf         /* Options given in the filename have lower priority than options
11655e5c4f63SKevin Wolf          * specified directly */
11665e5c4f63SKevin Wolf         qdict_join(*options, json_options, false);
11675e5c4f63SKevin Wolf         QDECREF(json_options);
11685e5c4f63SKevin Wolf         *pfilename = filename = NULL;
11695e5c4f63SKevin Wolf     }
11705e5c4f63SKevin Wolf 
1171f54120ffSKevin Wolf     /* Fetch the file name from the options QDict if necessary */
117217b005f1SKevin Wolf     if (protocol && filename) {
1173f54120ffSKevin Wolf         if (!qdict_haskey(*options, "filename")) {
1174f54120ffSKevin Wolf             qdict_put(*options, "filename", qstring_from_str(filename));
1175f54120ffSKevin Wolf             parse_filename = true;
1176f54120ffSKevin Wolf         } else {
1177f54120ffSKevin Wolf             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1178f54120ffSKevin Wolf                              "the same time");
1179f54120ffSKevin Wolf             return -EINVAL;
1180f54120ffSKevin Wolf         }
1181f54120ffSKevin Wolf     }
1182f54120ffSKevin Wolf 
1183f54120ffSKevin Wolf     /* Find the right block driver */
1184f54120ffSKevin Wolf     filename = qdict_get_try_str(*options, "filename");
1185f54120ffSKevin Wolf     drvname = qdict_get_try_str(*options, "driver");
1186f54120ffSKevin Wolf 
118717b005f1SKevin Wolf     if (drv) {
118817b005f1SKevin Wolf         if (drvname) {
118917b005f1SKevin Wolf             error_setg(errp, "Driver specified twice");
119017b005f1SKevin Wolf             return -EINVAL;
119117b005f1SKevin Wolf         }
119217b005f1SKevin Wolf         drvname = drv->format_name;
119317b005f1SKevin Wolf         qdict_put(*options, "driver", qstring_from_str(drvname));
119417b005f1SKevin Wolf     } else {
119517b005f1SKevin Wolf         if (!drvname && protocol) {
1196f54120ffSKevin Wolf             if (filename) {
1197b65a5e12SMax Reitz                 drv = bdrv_find_protocol(filename, parse_filename, errp);
1198f54120ffSKevin Wolf                 if (!drv) {
1199f54120ffSKevin Wolf                     return -EINVAL;
1200f54120ffSKevin Wolf                 }
1201f54120ffSKevin Wolf 
1202f54120ffSKevin Wolf                 drvname = drv->format_name;
1203f54120ffSKevin Wolf                 qdict_put(*options, "driver", qstring_from_str(drvname));
1204f54120ffSKevin Wolf             } else {
1205f54120ffSKevin Wolf                 error_setg(errp, "Must specify either driver or file");
1206f54120ffSKevin Wolf                 return -EINVAL;
1207f54120ffSKevin Wolf             }
120817b005f1SKevin Wolf         } else if (drvname) {
1209f54120ffSKevin Wolf             drv = bdrv_find_format(drvname);
1210f54120ffSKevin Wolf             if (!drv) {
1211f54120ffSKevin Wolf                 error_setg(errp, "Unknown driver '%s'", drvname);
1212f54120ffSKevin Wolf                 return -ENOENT;
1213f54120ffSKevin Wolf             }
121417b005f1SKevin Wolf         }
121517b005f1SKevin Wolf     }
121617b005f1SKevin Wolf 
121717b005f1SKevin Wolf     assert(drv || !protocol);
1218f54120ffSKevin Wolf 
1219f54120ffSKevin Wolf     /* Driver-specific filename parsing */
122017b005f1SKevin Wolf     if (drv && drv->bdrv_parse_filename && parse_filename) {
1221f54120ffSKevin Wolf         drv->bdrv_parse_filename(filename, *options, &local_err);
1222f54120ffSKevin Wolf         if (local_err) {
1223f54120ffSKevin Wolf             error_propagate(errp, local_err);
1224f54120ffSKevin Wolf             return -EINVAL;
1225f54120ffSKevin Wolf         }
1226f54120ffSKevin Wolf 
1227f54120ffSKevin Wolf         if (!drv->bdrv_needs_filename) {
1228f54120ffSKevin Wolf             qdict_del(*options, "filename");
1229f54120ffSKevin Wolf         }
1230f54120ffSKevin Wolf     }
1231f54120ffSKevin Wolf 
1232f54120ffSKevin Wolf     return 0;
1233f54120ffSKevin Wolf }
1234f54120ffSKevin Wolf 
12358d24cce1SFam Zheng void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
12368d24cce1SFam Zheng {
12378d24cce1SFam Zheng 
1238826b6ca0SFam Zheng     if (bs->backing_hd) {
1239826b6ca0SFam Zheng         assert(bs->backing_blocker);
1240826b6ca0SFam Zheng         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1241826b6ca0SFam Zheng     } else if (backing_hd) {
1242826b6ca0SFam Zheng         error_setg(&bs->backing_blocker,
124381e5f78aSAlberto Garcia                    "node is used as backing hd of '%s'",
124481e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(bs));
1245826b6ca0SFam Zheng     }
1246826b6ca0SFam Zheng 
12478d24cce1SFam Zheng     bs->backing_hd = backing_hd;
12488d24cce1SFam Zheng     if (!backing_hd) {
1249826b6ca0SFam Zheng         error_free(bs->backing_blocker);
1250826b6ca0SFam Zheng         bs->backing_blocker = NULL;
12518d24cce1SFam Zheng         goto out;
12528d24cce1SFam Zheng     }
12538d24cce1SFam Zheng     bs->open_flags &= ~BDRV_O_NO_BACKING;
12548d24cce1SFam Zheng     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
12558d24cce1SFam Zheng     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
12568d24cce1SFam Zheng             backing_hd->drv ? backing_hd->drv->format_name : "");
1257826b6ca0SFam Zheng 
1258826b6ca0SFam Zheng     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1259826b6ca0SFam Zheng     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1260bb00021dSFam Zheng     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1261826b6ca0SFam Zheng                     bs->backing_blocker);
12628d24cce1SFam Zheng out:
12633baca891SKevin Wolf     bdrv_refresh_limits(bs, NULL);
12648d24cce1SFam Zheng }
12658d24cce1SFam Zheng 
126631ca6d07SKevin Wolf /*
126731ca6d07SKevin Wolf  * Opens the backing file for a BlockDriverState if not yet open
126831ca6d07SKevin Wolf  *
126931ca6d07SKevin Wolf  * options is a QDict of options to pass to the block drivers, or NULL for an
127031ca6d07SKevin Wolf  * empty set of options. The reference to the QDict is transferred to this
127131ca6d07SKevin Wolf  * function (even on failure), so if the caller intends to reuse the dictionary,
127231ca6d07SKevin Wolf  * it needs to use QINCREF() before calling bdrv_file_open.
127331ca6d07SKevin Wolf  */
127434b5d2c6SMax Reitz int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
12759156df12SPaolo Bonzini {
12761ba4b6a5SBenoît Canet     char *backing_filename = g_malloc0(PATH_MAX);
1277317fc44eSKevin Wolf     int ret = 0;
12788d24cce1SFam Zheng     BlockDriverState *backing_hd;
127934b5d2c6SMax Reitz     Error *local_err = NULL;
12809156df12SPaolo Bonzini 
12819156df12SPaolo Bonzini     if (bs->backing_hd != NULL) {
128231ca6d07SKevin Wolf         QDECREF(options);
12831ba4b6a5SBenoît Canet         goto free_exit;
12849156df12SPaolo Bonzini     }
12859156df12SPaolo Bonzini 
128631ca6d07SKevin Wolf     /* NULL means an empty set of options */
128731ca6d07SKevin Wolf     if (options == NULL) {
128831ca6d07SKevin Wolf         options = qdict_new();
128931ca6d07SKevin Wolf     }
129031ca6d07SKevin Wolf 
12919156df12SPaolo Bonzini     bs->open_flags &= ~BDRV_O_NO_BACKING;
12921cb6f506SKevin Wolf     if (qdict_haskey(options, "file.filename")) {
12931cb6f506SKevin Wolf         backing_filename[0] = '\0';
12941cb6f506SKevin Wolf     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
129531ca6d07SKevin Wolf         QDECREF(options);
12961ba4b6a5SBenoît Canet         goto free_exit;
1297dbecebddSFam Zheng     } else {
12989f07429eSMax Reitz         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
12999f07429eSMax Reitz                                        &local_err);
13009f07429eSMax Reitz         if (local_err) {
13019f07429eSMax Reitz             ret = -EINVAL;
13029f07429eSMax Reitz             error_propagate(errp, local_err);
13039f07429eSMax Reitz             QDECREF(options);
13049f07429eSMax Reitz             goto free_exit;
13059f07429eSMax Reitz         }
13069156df12SPaolo Bonzini     }
13079156df12SPaolo Bonzini 
13088ee79e70SKevin Wolf     if (!bs->drv || !bs->drv->supports_backing) {
13098ee79e70SKevin Wolf         ret = -EINVAL;
13108ee79e70SKevin Wolf         error_setg(errp, "Driver doesn't support backing files");
13118ee79e70SKevin Wolf         QDECREF(options);
13128ee79e70SKevin Wolf         goto free_exit;
13138ee79e70SKevin Wolf     }
13148ee79e70SKevin Wolf 
1315e4e9986bSMarkus Armbruster     backing_hd = bdrv_new();
13168d24cce1SFam Zheng 
1317c5f6e493SKevin Wolf     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1318c5f6e493SKevin Wolf         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
13199156df12SPaolo Bonzini     }
13209156df12SPaolo Bonzini 
1321f67503e5SMax Reitz     assert(bs->backing_hd == NULL);
13228d24cce1SFam Zheng     ret = bdrv_open(&backing_hd,
1323ddf5636dSMax Reitz                     *backing_filename ? backing_filename : NULL, NULL, options,
1324c5f6e493SKevin Wolf                     bdrv_backing_flags(bs->open_flags), NULL, &local_err);
13259156df12SPaolo Bonzini     if (ret < 0) {
13268d24cce1SFam Zheng         bdrv_unref(backing_hd);
13278d24cce1SFam Zheng         backing_hd = NULL;
13289156df12SPaolo Bonzini         bs->open_flags |= BDRV_O_NO_BACKING;
1329b04b6b6eSFam Zheng         error_setg(errp, "Could not open backing file: %s",
1330b04b6b6eSFam Zheng                    error_get_pretty(local_err));
1331b04b6b6eSFam Zheng         error_free(local_err);
13321ba4b6a5SBenoît Canet         goto free_exit;
13339156df12SPaolo Bonzini     }
13348d24cce1SFam Zheng     bdrv_set_backing_hd(bs, backing_hd);
1335d80ac658SPeter Feiner 
13361ba4b6a5SBenoît Canet free_exit:
13371ba4b6a5SBenoît Canet     g_free(backing_filename);
13381ba4b6a5SBenoît Canet     return ret;
13399156df12SPaolo Bonzini }
13409156df12SPaolo Bonzini 
1341b6ce07aaSKevin Wolf /*
1342da557aacSMax Reitz  * Opens a disk image whose options are given as BlockdevRef in another block
1343da557aacSMax Reitz  * device's options.
1344da557aacSMax Reitz  *
1345da557aacSMax Reitz  * If allow_none is true, no image will be opened if filename is false and no
1346da557aacSMax Reitz  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1347da557aacSMax Reitz  *
1348da557aacSMax Reitz  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1349da557aacSMax Reitz  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1350da557aacSMax Reitz  * itself, all options starting with "${bdref_key}." are considered part of the
1351da557aacSMax Reitz  * BlockdevRef.
1352da557aacSMax Reitz  *
1353da557aacSMax Reitz  * The BlockdevRef will be removed from the options QDict.
1354f67503e5SMax Reitz  *
1355f67503e5SMax Reitz  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1356da557aacSMax Reitz  */
1357da557aacSMax Reitz int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1358da557aacSMax Reitz                     QDict *options, const char *bdref_key, int flags,
1359f7d9fd8cSMax Reitz                     bool allow_none, Error **errp)
1360da557aacSMax Reitz {
1361da557aacSMax Reitz     QDict *image_options;
1362da557aacSMax Reitz     int ret;
1363da557aacSMax Reitz     char *bdref_key_dot;
1364da557aacSMax Reitz     const char *reference;
1365da557aacSMax Reitz 
1366f67503e5SMax Reitz     assert(pbs);
1367f67503e5SMax Reitz     assert(*pbs == NULL);
1368f67503e5SMax Reitz 
1369da557aacSMax Reitz     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1370da557aacSMax Reitz     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1371da557aacSMax Reitz     g_free(bdref_key_dot);
1372da557aacSMax Reitz 
1373da557aacSMax Reitz     reference = qdict_get_try_str(options, bdref_key);
1374da557aacSMax Reitz     if (!filename && !reference && !qdict_size(image_options)) {
1375da557aacSMax Reitz         if (allow_none) {
1376da557aacSMax Reitz             ret = 0;
1377da557aacSMax Reitz         } else {
1378da557aacSMax Reitz             error_setg(errp, "A block device must be specified for \"%s\"",
1379da557aacSMax Reitz                        bdref_key);
1380da557aacSMax Reitz             ret = -EINVAL;
1381da557aacSMax Reitz         }
1382b20e61e0SMarkus Armbruster         QDECREF(image_options);
1383da557aacSMax Reitz         goto done;
1384da557aacSMax Reitz     }
1385da557aacSMax Reitz 
1386f7d9fd8cSMax Reitz     ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1387da557aacSMax Reitz 
1388da557aacSMax Reitz done:
1389da557aacSMax Reitz     qdict_del(options, bdref_key);
1390da557aacSMax Reitz     return ret;
1391da557aacSMax Reitz }
1392da557aacSMax Reitz 
13936b8aeca5SChen Gang int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1394b998875dSKevin Wolf {
1395b998875dSKevin Wolf     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
13961ba4b6a5SBenoît Canet     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1397b998875dSKevin Wolf     int64_t total_size;
139883d0521aSChunyan Liu     QemuOpts *opts = NULL;
1399b998875dSKevin Wolf     QDict *snapshot_options;
1400b998875dSKevin Wolf     BlockDriverState *bs_snapshot;
1401b998875dSKevin Wolf     Error *local_err;
1402b998875dSKevin Wolf     int ret;
1403b998875dSKevin Wolf 
1404b998875dSKevin Wolf     /* if snapshot, we create a temporary backing file and open it
1405b998875dSKevin Wolf        instead of opening 'filename' directly */
1406b998875dSKevin Wolf 
1407b998875dSKevin Wolf     /* Get the required size from the image */
1408f187743aSKevin Wolf     total_size = bdrv_getlength(bs);
1409f187743aSKevin Wolf     if (total_size < 0) {
14106b8aeca5SChen Gang         ret = total_size;
1411f187743aSKevin Wolf         error_setg_errno(errp, -total_size, "Could not get image size");
14121ba4b6a5SBenoît Canet         goto out;
1413f187743aSKevin Wolf     }
1414b998875dSKevin Wolf 
1415b998875dSKevin Wolf     /* Create the temporary image */
14161ba4b6a5SBenoît Canet     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1417b998875dSKevin Wolf     if (ret < 0) {
1418b998875dSKevin Wolf         error_setg_errno(errp, -ret, "Could not get temporary filename");
14191ba4b6a5SBenoît Canet         goto out;
1420b998875dSKevin Wolf     }
1421b998875dSKevin Wolf 
1422ef810437SMax Reitz     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1423c282e1fdSChunyan Liu                             &error_abort);
142439101f25SMarkus Armbruster     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1425ef810437SMax Reitz     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
142683d0521aSChunyan Liu     qemu_opts_del(opts);
1427b998875dSKevin Wolf     if (ret < 0) {
1428b998875dSKevin Wolf         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1429b998875dSKevin Wolf                          "'%s': %s", tmp_filename,
1430b998875dSKevin Wolf                          error_get_pretty(local_err));
1431b998875dSKevin Wolf         error_free(local_err);
14321ba4b6a5SBenoît Canet         goto out;
1433b998875dSKevin Wolf     }
1434b998875dSKevin Wolf 
1435b998875dSKevin Wolf     /* Prepare a new options QDict for the temporary file */
1436b998875dSKevin Wolf     snapshot_options = qdict_new();
1437b998875dSKevin Wolf     qdict_put(snapshot_options, "file.driver",
1438b998875dSKevin Wolf               qstring_from_str("file"));
1439b998875dSKevin Wolf     qdict_put(snapshot_options, "file.filename",
1440b998875dSKevin Wolf               qstring_from_str(tmp_filename));
1441b998875dSKevin Wolf 
1442e4e9986bSMarkus Armbruster     bs_snapshot = bdrv_new();
1443b998875dSKevin Wolf 
1444b998875dSKevin Wolf     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1445ef810437SMax Reitz                     flags, &bdrv_qcow2, &local_err);
1446b998875dSKevin Wolf     if (ret < 0) {
1447b998875dSKevin Wolf         error_propagate(errp, local_err);
14481ba4b6a5SBenoît Canet         goto out;
1449b998875dSKevin Wolf     }
1450b998875dSKevin Wolf 
1451b998875dSKevin Wolf     bdrv_append(bs_snapshot, bs);
14521ba4b6a5SBenoît Canet 
14531ba4b6a5SBenoît Canet out:
14541ba4b6a5SBenoît Canet     g_free(tmp_filename);
14556b8aeca5SChen Gang     return ret;
1456b998875dSKevin Wolf }
1457b998875dSKevin Wolf 
1458da557aacSMax Reitz /*
1459b6ce07aaSKevin Wolf  * Opens a disk image (raw, qcow2, vmdk, ...)
1460de9c0cecSKevin Wolf  *
1461de9c0cecSKevin Wolf  * options is a QDict of options to pass to the block drivers, or NULL for an
1462de9c0cecSKevin Wolf  * empty set of options. The reference to the QDict belongs to the block layer
1463de9c0cecSKevin Wolf  * after the call (even on failure), so if the caller intends to reuse the
1464de9c0cecSKevin Wolf  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1465f67503e5SMax Reitz  *
1466f67503e5SMax Reitz  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1467f67503e5SMax Reitz  * If it is not NULL, the referenced BDS will be reused.
1468ddf5636dSMax Reitz  *
1469ddf5636dSMax Reitz  * The reference parameter may be used to specify an existing block device which
1470ddf5636dSMax Reitz  * should be opened. If specified, neither options nor a filename may be given,
1471ddf5636dSMax Reitz  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1472b6ce07aaSKevin Wolf  */
1473ddf5636dSMax Reitz int bdrv_open(BlockDriverState **pbs, const char *filename,
1474ddf5636dSMax Reitz               const char *reference, QDict *options, int flags,
1475ddf5636dSMax Reitz               BlockDriver *drv, Error **errp)
1476ea2384d3Sbellard {
1477b6ce07aaSKevin Wolf     int ret;
1478f67503e5SMax Reitz     BlockDriverState *file = NULL, *bs;
147974fe54f2SKevin Wolf     const char *drvname;
148034b5d2c6SMax Reitz     Error *local_err = NULL;
1481b1e6fc08SKevin Wolf     int snapshot_flags = 0;
148233e3963eSbellard 
1483f67503e5SMax Reitz     assert(pbs);
1484f67503e5SMax Reitz 
1485ddf5636dSMax Reitz     if (reference) {
1486ddf5636dSMax Reitz         bool options_non_empty = options ? qdict_size(options) : false;
1487ddf5636dSMax Reitz         QDECREF(options);
1488ddf5636dSMax Reitz 
1489ddf5636dSMax Reitz         if (*pbs) {
1490ddf5636dSMax Reitz             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1491ddf5636dSMax Reitz                        "another block device");
1492ddf5636dSMax Reitz             return -EINVAL;
1493ddf5636dSMax Reitz         }
1494ddf5636dSMax Reitz 
1495ddf5636dSMax Reitz         if (filename || options_non_empty) {
1496ddf5636dSMax Reitz             error_setg(errp, "Cannot reference an existing block device with "
1497ddf5636dSMax Reitz                        "additional options or a new filename");
1498ddf5636dSMax Reitz             return -EINVAL;
1499ddf5636dSMax Reitz         }
1500ddf5636dSMax Reitz 
1501ddf5636dSMax Reitz         bs = bdrv_lookup_bs(reference, reference, errp);
1502ddf5636dSMax Reitz         if (!bs) {
1503ddf5636dSMax Reitz             return -ENODEV;
1504ddf5636dSMax Reitz         }
1505ddf5636dSMax Reitz         bdrv_ref(bs);
1506ddf5636dSMax Reitz         *pbs = bs;
1507ddf5636dSMax Reitz         return 0;
1508ddf5636dSMax Reitz     }
1509ddf5636dSMax Reitz 
1510f67503e5SMax Reitz     if (*pbs) {
1511f67503e5SMax Reitz         bs = *pbs;
1512f67503e5SMax Reitz     } else {
1513e4e9986bSMarkus Armbruster         bs = bdrv_new();
1514f67503e5SMax Reitz     }
1515f67503e5SMax Reitz 
1516de9c0cecSKevin Wolf     /* NULL means an empty set of options */
1517de9c0cecSKevin Wolf     if (options == NULL) {
1518de9c0cecSKevin Wolf         options = qdict_new();
1519de9c0cecSKevin Wolf     }
1520de9c0cecSKevin Wolf 
152117b005f1SKevin Wolf     ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1522462f5bcfSKevin Wolf     if (local_err) {
1523462f5bcfSKevin Wolf         goto fail;
1524462f5bcfSKevin Wolf     }
1525462f5bcfSKevin Wolf 
152676c591b0SKevin Wolf     /* Find the right image format driver */
152776c591b0SKevin Wolf     drv = NULL;
152876c591b0SKevin Wolf     drvname = qdict_get_try_str(options, "driver");
152976c591b0SKevin Wolf     if (drvname) {
153076c591b0SKevin Wolf         drv = bdrv_find_format(drvname);
153176c591b0SKevin Wolf         qdict_del(options, "driver");
153276c591b0SKevin Wolf         if (!drv) {
153376c591b0SKevin Wolf             error_setg(errp, "Unknown driver: '%s'", drvname);
153476c591b0SKevin Wolf             ret = -EINVAL;
153576c591b0SKevin Wolf             goto fail;
153676c591b0SKevin Wolf         }
153776c591b0SKevin Wolf     }
153876c591b0SKevin Wolf 
153976c591b0SKevin Wolf     assert(drvname || !(flags & BDRV_O_PROTOCOL));
154076c591b0SKevin Wolf     if (drv && !drv->bdrv_file_open) {
154176c591b0SKevin Wolf         /* If the user explicitly wants a format driver here, we'll need to add
154276c591b0SKevin Wolf          * another layer for the protocol in bs->file */
154376c591b0SKevin Wolf         flags &= ~BDRV_O_PROTOCOL;
154476c591b0SKevin Wolf     }
154576c591b0SKevin Wolf 
1546de9c0cecSKevin Wolf     bs->options = options;
1547b6ad491aSKevin Wolf     options = qdict_clone_shallow(options);
1548de9c0cecSKevin Wolf 
1549f500a6d3SKevin Wolf     /* Open image file without format layer */
1550f4788adcSKevin Wolf     if ((flags & BDRV_O_PROTOCOL) == 0) {
1551be028adcSJeff Cody         if (flags & BDRV_O_RDWR) {
1552be028adcSJeff Cody             flags |= BDRV_O_ALLOW_RDWR;
1553be028adcSJeff Cody         }
1554b1e6fc08SKevin Wolf         if (flags & BDRV_O_SNAPSHOT) {
1555b1e6fc08SKevin Wolf             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1556b1e6fc08SKevin Wolf             flags = bdrv_backing_flags(flags);
1557b1e6fc08SKevin Wolf         }
1558be028adcSJeff Cody 
1559f67503e5SMax Reitz         assert(file == NULL);
1560054963f8SMax Reitz         ret = bdrv_open_image(&file, filename, options, "file",
15610b50cc88SKevin Wolf                               bdrv_inherited_flags(flags),
15620b50cc88SKevin Wolf                               true, &local_err);
1563f500a6d3SKevin Wolf         if (ret < 0) {
15648bfea15dSKevin Wolf             goto fail;
1565f500a6d3SKevin Wolf         }
1566f4788adcSKevin Wolf     }
1567f500a6d3SKevin Wolf 
156876c591b0SKevin Wolf     /* Image format probing */
156938f3ef57SKevin Wolf     bs->probed = !drv;
157076c591b0SKevin Wolf     if (!drv && file) {
157134b5d2c6SMax Reitz         ret = find_image_format(file, filename, &drv, &local_err);
157217b005f1SKevin Wolf         if (ret < 0) {
157317b005f1SKevin Wolf             goto fail;
157417b005f1SKevin Wolf         }
157576c591b0SKevin Wolf     } else if (!drv) {
15762a05cbe4SMax Reitz         error_setg(errp, "Must specify either driver or file");
15772a05cbe4SMax Reitz         ret = -EINVAL;
15788bfea15dSKevin Wolf         goto fail;
15792a05cbe4SMax Reitz     }
1580f500a6d3SKevin Wolf 
1581b6ce07aaSKevin Wolf     /* Open the image */
158234b5d2c6SMax Reitz     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1583b6ce07aaSKevin Wolf     if (ret < 0) {
15848bfea15dSKevin Wolf         goto fail;
15856987307cSChristoph Hellwig     }
15866987307cSChristoph Hellwig 
15872a05cbe4SMax Reitz     if (file && (bs->file != file)) {
15884f6fd349SFam Zheng         bdrv_unref(file);
1589f500a6d3SKevin Wolf         file = NULL;
1590f500a6d3SKevin Wolf     }
1591f500a6d3SKevin Wolf 
1592b6ce07aaSKevin Wolf     /* If there is a backing file, use it */
15939156df12SPaolo Bonzini     if ((flags & BDRV_O_NO_BACKING) == 0) {
159431ca6d07SKevin Wolf         QDict *backing_options;
159531ca6d07SKevin Wolf 
15965726d872SBenoît Canet         qdict_extract_subqdict(options, &backing_options, "backing.");
159734b5d2c6SMax Reitz         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1598b6ce07aaSKevin Wolf         if (ret < 0) {
1599b6ad491aSKevin Wolf             goto close_and_fail;
1600b6ce07aaSKevin Wolf         }
1601b6ce07aaSKevin Wolf     }
1602b6ce07aaSKevin Wolf 
160391af7014SMax Reitz     bdrv_refresh_filename(bs);
160491af7014SMax Reitz 
1605b998875dSKevin Wolf     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1606b998875dSKevin Wolf      * temporary snapshot afterwards. */
1607b1e6fc08SKevin Wolf     if (snapshot_flags) {
16086b8aeca5SChen Gang         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1609b998875dSKevin Wolf         if (local_err) {
1610b998875dSKevin Wolf             goto close_and_fail;
1611b998875dSKevin Wolf         }
1612b998875dSKevin Wolf     }
1613b998875dSKevin Wolf 
1614b6ad491aSKevin Wolf     /* Check if any unknown options were used */
16155acd9d81SMax Reitz     if (options && (qdict_size(options) != 0)) {
1616b6ad491aSKevin Wolf         const QDictEntry *entry = qdict_first(options);
16175acd9d81SMax Reitz         if (flags & BDRV_O_PROTOCOL) {
16185acd9d81SMax Reitz             error_setg(errp, "Block protocol '%s' doesn't support the option "
16195acd9d81SMax Reitz                        "'%s'", drv->format_name, entry->key);
16205acd9d81SMax Reitz         } else {
162134b5d2c6SMax Reitz             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
16225acd9d81SMax Reitz                        "support the option '%s'", drv->format_name,
1623bfb197e0SMarkus Armbruster                        bdrv_get_device_name(bs), entry->key);
16245acd9d81SMax Reitz         }
1625b6ad491aSKevin Wolf 
1626b6ad491aSKevin Wolf         ret = -EINVAL;
1627b6ad491aSKevin Wolf         goto close_and_fail;
1628b6ad491aSKevin Wolf     }
1629b6ad491aSKevin Wolf 
1630b6ce07aaSKevin Wolf     if (!bdrv_key_required(bs)) {
1631a7f53e26SMarkus Armbruster         if (bs->blk) {
1632a7f53e26SMarkus Armbruster             blk_dev_change_media_cb(bs->blk, true);
1633a7f53e26SMarkus Armbruster         }
1634c3adb58fSMarkus Armbruster     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1635c3adb58fSMarkus Armbruster                && !runstate_check(RUN_STATE_INMIGRATE)
1636c3adb58fSMarkus Armbruster                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1637c3adb58fSMarkus Armbruster         error_setg(errp,
1638c3adb58fSMarkus Armbruster                    "Guest must be stopped for opening of encrypted image");
1639c3adb58fSMarkus Armbruster         ret = -EBUSY;
1640c3adb58fSMarkus Armbruster         goto close_and_fail;
1641b6ce07aaSKevin Wolf     }
1642b6ce07aaSKevin Wolf 
1643c3adb58fSMarkus Armbruster     QDECREF(options);
1644f67503e5SMax Reitz     *pbs = bs;
1645b6ce07aaSKevin Wolf     return 0;
1646b6ce07aaSKevin Wolf 
16478bfea15dSKevin Wolf fail:
1648f500a6d3SKevin Wolf     if (file != NULL) {
16494f6fd349SFam Zheng         bdrv_unref(file);
1650f500a6d3SKevin Wolf     }
1651de9c0cecSKevin Wolf     QDECREF(bs->options);
1652b6ad491aSKevin Wolf     QDECREF(options);
1653de9c0cecSKevin Wolf     bs->options = NULL;
1654f67503e5SMax Reitz     if (!*pbs) {
1655f67503e5SMax Reitz         /* If *pbs is NULL, a new BDS has been created in this function and
1656f67503e5SMax Reitz            needs to be freed now. Otherwise, it does not need to be closed,
1657f67503e5SMax Reitz            since it has not really been opened yet. */
1658f67503e5SMax Reitz         bdrv_unref(bs);
1659f67503e5SMax Reitz     }
166084d18f06SMarkus Armbruster     if (local_err) {
166134b5d2c6SMax Reitz         error_propagate(errp, local_err);
166234b5d2c6SMax Reitz     }
1663b6ad491aSKevin Wolf     return ret;
1664de9c0cecSKevin Wolf 
1665b6ad491aSKevin Wolf close_and_fail:
1666f67503e5SMax Reitz     /* See fail path, but now the BDS has to be always closed */
1667f67503e5SMax Reitz     if (*pbs) {
1668b6ad491aSKevin Wolf         bdrv_close(bs);
1669f67503e5SMax Reitz     } else {
1670f67503e5SMax Reitz         bdrv_unref(bs);
1671f67503e5SMax Reitz     }
1672b6ad491aSKevin Wolf     QDECREF(options);
167384d18f06SMarkus Armbruster     if (local_err) {
167434b5d2c6SMax Reitz         error_propagate(errp, local_err);
167534b5d2c6SMax Reitz     }
1676b6ce07aaSKevin Wolf     return ret;
1677b6ce07aaSKevin Wolf }
1678b6ce07aaSKevin Wolf 
1679e971aa12SJeff Cody typedef struct BlockReopenQueueEntry {
1680e971aa12SJeff Cody      bool prepared;
1681e971aa12SJeff Cody      BDRVReopenState state;
1682e971aa12SJeff Cody      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1683e971aa12SJeff Cody } BlockReopenQueueEntry;
1684e971aa12SJeff Cody 
1685e971aa12SJeff Cody /*
1686e971aa12SJeff Cody  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1687e971aa12SJeff Cody  * reopen of multiple devices.
1688e971aa12SJeff Cody  *
1689e971aa12SJeff Cody  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1690e971aa12SJeff Cody  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1691e971aa12SJeff Cody  * be created and initialized. This newly created BlockReopenQueue should be
1692e971aa12SJeff Cody  * passed back in for subsequent calls that are intended to be of the same
1693e971aa12SJeff Cody  * atomic 'set'.
1694e971aa12SJeff Cody  *
1695e971aa12SJeff Cody  * bs is the BlockDriverState to add to the reopen queue.
1696e971aa12SJeff Cody  *
1697e971aa12SJeff Cody  * flags contains the open flags for the associated bs
1698e971aa12SJeff Cody  *
1699e971aa12SJeff Cody  * returns a pointer to bs_queue, which is either the newly allocated
1700e971aa12SJeff Cody  * bs_queue, or the existing bs_queue being used.
1701e971aa12SJeff Cody  *
1702e971aa12SJeff Cody  */
1703e971aa12SJeff Cody BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1704e971aa12SJeff Cody                                     BlockDriverState *bs, int flags)
1705e971aa12SJeff Cody {
1706e971aa12SJeff Cody     assert(bs != NULL);
1707e971aa12SJeff Cody 
1708e971aa12SJeff Cody     BlockReopenQueueEntry *bs_entry;
1709e971aa12SJeff Cody     if (bs_queue == NULL) {
1710e971aa12SJeff Cody         bs_queue = g_new0(BlockReopenQueue, 1);
1711e971aa12SJeff Cody         QSIMPLEQ_INIT(bs_queue);
1712e971aa12SJeff Cody     }
1713e971aa12SJeff Cody 
1714f1f25a2eSKevin Wolf     /* bdrv_open() masks this flag out */
1715f1f25a2eSKevin Wolf     flags &= ~BDRV_O_PROTOCOL;
1716f1f25a2eSKevin Wolf 
1717e971aa12SJeff Cody     if (bs->file) {
1718f1f25a2eSKevin Wolf         bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1719e971aa12SJeff Cody     }
1720e971aa12SJeff Cody 
1721e971aa12SJeff Cody     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1722e971aa12SJeff Cody     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1723e971aa12SJeff Cody 
1724e971aa12SJeff Cody     bs_entry->state.bs = bs;
1725e971aa12SJeff Cody     bs_entry->state.flags = flags;
1726e971aa12SJeff Cody 
1727e971aa12SJeff Cody     return bs_queue;
1728e971aa12SJeff Cody }
1729e971aa12SJeff Cody 
1730e971aa12SJeff Cody /*
1731e971aa12SJeff Cody  * Reopen multiple BlockDriverStates atomically & transactionally.
1732e971aa12SJeff Cody  *
1733e971aa12SJeff Cody  * The queue passed in (bs_queue) must have been built up previous
1734e971aa12SJeff Cody  * via bdrv_reopen_queue().
1735e971aa12SJeff Cody  *
1736e971aa12SJeff Cody  * Reopens all BDS specified in the queue, with the appropriate
1737e971aa12SJeff Cody  * flags.  All devices are prepared for reopen, and failure of any
1738e971aa12SJeff Cody  * device will cause all device changes to be abandonded, and intermediate
1739e971aa12SJeff Cody  * data cleaned up.
1740e971aa12SJeff Cody  *
1741e971aa12SJeff Cody  * If all devices prepare successfully, then the changes are committed
1742e971aa12SJeff Cody  * to all devices.
1743e971aa12SJeff Cody  *
1744e971aa12SJeff Cody  */
1745e971aa12SJeff Cody int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1746e971aa12SJeff Cody {
1747e971aa12SJeff Cody     int ret = -1;
1748e971aa12SJeff Cody     BlockReopenQueueEntry *bs_entry, *next;
1749e971aa12SJeff Cody     Error *local_err = NULL;
1750e971aa12SJeff Cody 
1751e971aa12SJeff Cody     assert(bs_queue != NULL);
1752e971aa12SJeff Cody 
1753e971aa12SJeff Cody     bdrv_drain_all();
1754e971aa12SJeff Cody 
1755e971aa12SJeff Cody     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1756e971aa12SJeff Cody         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1757e971aa12SJeff Cody             error_propagate(errp, local_err);
1758e971aa12SJeff Cody             goto cleanup;
1759e971aa12SJeff Cody         }
1760e971aa12SJeff Cody         bs_entry->prepared = true;
1761e971aa12SJeff Cody     }
1762e971aa12SJeff Cody 
1763e971aa12SJeff Cody     /* If we reach this point, we have success and just need to apply the
1764e971aa12SJeff Cody      * changes
1765e971aa12SJeff Cody      */
1766e971aa12SJeff Cody     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1767e971aa12SJeff Cody         bdrv_reopen_commit(&bs_entry->state);
1768e971aa12SJeff Cody     }
1769e971aa12SJeff Cody 
1770e971aa12SJeff Cody     ret = 0;
1771e971aa12SJeff Cody 
1772e971aa12SJeff Cody cleanup:
1773e971aa12SJeff Cody     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1774e971aa12SJeff Cody         if (ret && bs_entry->prepared) {
1775e971aa12SJeff Cody             bdrv_reopen_abort(&bs_entry->state);
1776e971aa12SJeff Cody         }
1777e971aa12SJeff Cody         g_free(bs_entry);
1778e971aa12SJeff Cody     }
1779e971aa12SJeff Cody     g_free(bs_queue);
1780e971aa12SJeff Cody     return ret;
1781e971aa12SJeff Cody }
1782e971aa12SJeff Cody 
1783e971aa12SJeff Cody 
1784e971aa12SJeff Cody /* Reopen a single BlockDriverState with the specified flags. */
1785e971aa12SJeff Cody int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1786e971aa12SJeff Cody {
1787e971aa12SJeff Cody     int ret = -1;
1788e971aa12SJeff Cody     Error *local_err = NULL;
1789e971aa12SJeff Cody     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1790e971aa12SJeff Cody 
1791e971aa12SJeff Cody     ret = bdrv_reopen_multiple(queue, &local_err);
1792e971aa12SJeff Cody     if (local_err != NULL) {
1793e971aa12SJeff Cody         error_propagate(errp, local_err);
1794e971aa12SJeff Cody     }
1795e971aa12SJeff Cody     return ret;
1796e971aa12SJeff Cody }
1797e971aa12SJeff Cody 
1798e971aa12SJeff Cody 
1799e971aa12SJeff Cody /*
1800e971aa12SJeff Cody  * Prepares a BlockDriverState for reopen. All changes are staged in the
1801e971aa12SJeff Cody  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1802e971aa12SJeff Cody  * the block driver layer .bdrv_reopen_prepare()
1803e971aa12SJeff Cody  *
1804e971aa12SJeff Cody  * bs is the BlockDriverState to reopen
1805e971aa12SJeff Cody  * flags are the new open flags
1806e971aa12SJeff Cody  * queue is the reopen queue
1807e971aa12SJeff Cody  *
1808e971aa12SJeff Cody  * Returns 0 on success, non-zero on error.  On error errp will be set
1809e971aa12SJeff Cody  * as well.
1810e971aa12SJeff Cody  *
1811e971aa12SJeff Cody  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1812e971aa12SJeff Cody  * It is the responsibility of the caller to then call the abort() or
1813e971aa12SJeff Cody  * commit() for any other BDS that have been left in a prepare() state
1814e971aa12SJeff Cody  *
1815e971aa12SJeff Cody  */
1816e971aa12SJeff Cody int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1817e971aa12SJeff Cody                         Error **errp)
1818e971aa12SJeff Cody {
1819e971aa12SJeff Cody     int ret = -1;
1820e971aa12SJeff Cody     Error *local_err = NULL;
1821e971aa12SJeff Cody     BlockDriver *drv;
1822e971aa12SJeff Cody 
1823e971aa12SJeff Cody     assert(reopen_state != NULL);
1824e971aa12SJeff Cody     assert(reopen_state->bs->drv != NULL);
1825e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1826e971aa12SJeff Cody 
1827e971aa12SJeff Cody     /* if we are to stay read-only, do not allow permission change
1828e971aa12SJeff Cody      * to r/w */
1829e971aa12SJeff Cody     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1830e971aa12SJeff Cody         reopen_state->flags & BDRV_O_RDWR) {
183181e5f78aSAlberto Garcia         error_setg(errp, "Node '%s' is read only",
183281e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(reopen_state->bs));
1833e971aa12SJeff Cody         goto error;
1834e971aa12SJeff Cody     }
1835e971aa12SJeff Cody 
1836e971aa12SJeff Cody 
1837e971aa12SJeff Cody     ret = bdrv_flush(reopen_state->bs);
1838e971aa12SJeff Cody     if (ret) {
1839e971aa12SJeff Cody         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1840e971aa12SJeff Cody                   strerror(-ret));
1841e971aa12SJeff Cody         goto error;
1842e971aa12SJeff Cody     }
1843e971aa12SJeff Cody 
1844e971aa12SJeff Cody     if (drv->bdrv_reopen_prepare) {
1845e971aa12SJeff Cody         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1846e971aa12SJeff Cody         if (ret) {
1847e971aa12SJeff Cody             if (local_err != NULL) {
1848e971aa12SJeff Cody                 error_propagate(errp, local_err);
1849e971aa12SJeff Cody             } else {
1850d8b6895fSLuiz Capitulino                 error_setg(errp, "failed while preparing to reopen image '%s'",
1851e971aa12SJeff Cody                            reopen_state->bs->filename);
1852e971aa12SJeff Cody             }
1853e971aa12SJeff Cody             goto error;
1854e971aa12SJeff Cody         }
1855e971aa12SJeff Cody     } else {
1856e971aa12SJeff Cody         /* It is currently mandatory to have a bdrv_reopen_prepare()
1857e971aa12SJeff Cody          * handler for each supported drv. */
185881e5f78aSAlberto Garcia         error_setg(errp, "Block format '%s' used by node '%s' "
185981e5f78aSAlberto Garcia                    "does not support reopening files", drv->format_name,
186081e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(reopen_state->bs));
1861e971aa12SJeff Cody         ret = -1;
1862e971aa12SJeff Cody         goto error;
1863e971aa12SJeff Cody     }
1864e971aa12SJeff Cody 
1865e971aa12SJeff Cody     ret = 0;
1866e971aa12SJeff Cody 
1867e971aa12SJeff Cody error:
1868e971aa12SJeff Cody     return ret;
1869e971aa12SJeff Cody }
1870e971aa12SJeff Cody 
1871e971aa12SJeff Cody /*
1872e971aa12SJeff Cody  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1873e971aa12SJeff Cody  * makes them final by swapping the staging BlockDriverState contents into
1874e971aa12SJeff Cody  * the active BlockDriverState contents.
1875e971aa12SJeff Cody  */
1876e971aa12SJeff Cody void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1877e971aa12SJeff Cody {
1878e971aa12SJeff Cody     BlockDriver *drv;
1879e971aa12SJeff Cody 
1880e971aa12SJeff Cody     assert(reopen_state != NULL);
1881e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1882e971aa12SJeff Cody     assert(drv != NULL);
1883e971aa12SJeff Cody 
1884e971aa12SJeff Cody     /* If there are any driver level actions to take */
1885e971aa12SJeff Cody     if (drv->bdrv_reopen_commit) {
1886e971aa12SJeff Cody         drv->bdrv_reopen_commit(reopen_state);
1887e971aa12SJeff Cody     }
1888e971aa12SJeff Cody 
1889e971aa12SJeff Cody     /* set BDS specific flags now */
1890e971aa12SJeff Cody     reopen_state->bs->open_flags         = reopen_state->flags;
1891e971aa12SJeff Cody     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1892e971aa12SJeff Cody                                               BDRV_O_CACHE_WB);
1893e971aa12SJeff Cody     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1894355ef4acSKevin Wolf 
18953baca891SKevin Wolf     bdrv_refresh_limits(reopen_state->bs, NULL);
1896e971aa12SJeff Cody }
1897e971aa12SJeff Cody 
1898e971aa12SJeff Cody /*
1899e971aa12SJeff Cody  * Abort the reopen, and delete and free the staged changes in
1900e971aa12SJeff Cody  * reopen_state
1901e971aa12SJeff Cody  */
1902e971aa12SJeff Cody void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1903e971aa12SJeff Cody {
1904e971aa12SJeff Cody     BlockDriver *drv;
1905e971aa12SJeff Cody 
1906e971aa12SJeff Cody     assert(reopen_state != NULL);
1907e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1908e971aa12SJeff Cody     assert(drv != NULL);
1909e971aa12SJeff Cody 
1910e971aa12SJeff Cody     if (drv->bdrv_reopen_abort) {
1911e971aa12SJeff Cody         drv->bdrv_reopen_abort(reopen_state);
1912e971aa12SJeff Cody     }
1913e971aa12SJeff Cody }
1914e971aa12SJeff Cody 
1915e971aa12SJeff Cody 
1916fc01f7e7Sbellard void bdrv_close(BlockDriverState *bs)
1917fc01f7e7Sbellard {
191833384421SMax Reitz     BdrvAioNotifier *ban, *ban_next;
191933384421SMax Reitz 
19203e914655SPaolo Bonzini     if (bs->job) {
19213e914655SPaolo Bonzini         block_job_cancel_sync(bs->job);
19223e914655SPaolo Bonzini     }
192358fda173SStefan Hajnoczi     bdrv_drain_all(); /* complete I/O */
192458fda173SStefan Hajnoczi     bdrv_flush(bs);
192558fda173SStefan Hajnoczi     bdrv_drain_all(); /* in case flush left pending I/O */
1926d7d512f6SPaolo Bonzini     notifier_list_notify(&bs->close_notifiers, bs);
19277094f12fSKevin Wolf 
19283cbc002cSPaolo Bonzini     if (bs->drv) {
1929557df6acSStefan Hajnoczi         if (bs->backing_hd) {
1930826b6ca0SFam Zheng             BlockDriverState *backing_hd = bs->backing_hd;
1931826b6ca0SFam Zheng             bdrv_set_backing_hd(bs, NULL);
1932826b6ca0SFam Zheng             bdrv_unref(backing_hd);
1933557df6acSStefan Hajnoczi         }
1934ea2384d3Sbellard         bs->drv->bdrv_close(bs);
19357267c094SAnthony Liguori         g_free(bs->opaque);
1936ea2384d3Sbellard         bs->opaque = NULL;
1937ea2384d3Sbellard         bs->drv = NULL;
193853fec9d3SStefan Hajnoczi         bs->copy_on_read = 0;
1939a275fa42SPaolo Bonzini         bs->backing_file[0] = '\0';
1940a275fa42SPaolo Bonzini         bs->backing_format[0] = '\0';
19416405875cSPaolo Bonzini         bs->total_sectors = 0;
19426405875cSPaolo Bonzini         bs->encrypted = 0;
19436405875cSPaolo Bonzini         bs->valid_key = 0;
19446405875cSPaolo Bonzini         bs->sg = 0;
19450d51b4deSAsias He         bs->zero_beyond_eof = false;
1946de9c0cecSKevin Wolf         QDECREF(bs->options);
1947de9c0cecSKevin Wolf         bs->options = NULL;
194891af7014SMax Reitz         QDECREF(bs->full_open_options);
194991af7014SMax Reitz         bs->full_open_options = NULL;
1950b338082bSbellard 
195166f82ceeSKevin Wolf         if (bs->file != NULL) {
19524f6fd349SFam Zheng             bdrv_unref(bs->file);
19530ac9377dSPaolo Bonzini             bs->file = NULL;
195466f82ceeSKevin Wolf         }
19559ca11154SPavel Hrdina     }
195666f82ceeSKevin Wolf 
1957a7f53e26SMarkus Armbruster     if (bs->blk) {
1958a7f53e26SMarkus Armbruster         blk_dev_change_media_cb(bs->blk, false);
1959a7f53e26SMarkus Armbruster     }
196098f90dbaSZhi Yong Wu 
196198f90dbaSZhi Yong Wu     /*throttling disk I/O limits*/
196298f90dbaSZhi Yong Wu     if (bs->io_limits_enabled) {
196398f90dbaSZhi Yong Wu         bdrv_io_limits_disable(bs);
196498f90dbaSZhi Yong Wu     }
196533384421SMax Reitz 
196633384421SMax Reitz     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
196733384421SMax Reitz         g_free(ban);
196833384421SMax Reitz     }
196933384421SMax Reitz     QLIST_INIT(&bs->aio_notifiers);
1970b338082bSbellard }
1971b338082bSbellard 
19722bc93fedSMORITA Kazutaka void bdrv_close_all(void)
19732bc93fedSMORITA Kazutaka {
19742bc93fedSMORITA Kazutaka     BlockDriverState *bs;
19752bc93fedSMORITA Kazutaka 
1976dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1977ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
1978ed78cda3SStefan Hajnoczi 
1979ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
19802bc93fedSMORITA Kazutaka         bdrv_close(bs);
1981ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
19822bc93fedSMORITA Kazutaka     }
19832bc93fedSMORITA Kazutaka }
19842bc93fedSMORITA Kazutaka 
198588266f5aSStefan Hajnoczi /* Check if any requests are in-flight (including throttled requests) */
198688266f5aSStefan Hajnoczi static bool bdrv_requests_pending(BlockDriverState *bs)
198788266f5aSStefan Hajnoczi {
198888266f5aSStefan Hajnoczi     if (!QLIST_EMPTY(&bs->tracked_requests)) {
198988266f5aSStefan Hajnoczi         return true;
199088266f5aSStefan Hajnoczi     }
1991cc0681c4SBenoît Canet     if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1992cc0681c4SBenoît Canet         return true;
1993cc0681c4SBenoît Canet     }
1994cc0681c4SBenoît Canet     if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
199588266f5aSStefan Hajnoczi         return true;
199688266f5aSStefan Hajnoczi     }
199788266f5aSStefan Hajnoczi     if (bs->file && bdrv_requests_pending(bs->file)) {
199888266f5aSStefan Hajnoczi         return true;
199988266f5aSStefan Hajnoczi     }
200088266f5aSStefan Hajnoczi     if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
200188266f5aSStefan Hajnoczi         return true;
200288266f5aSStefan Hajnoczi     }
200388266f5aSStefan Hajnoczi     return false;
200488266f5aSStefan Hajnoczi }
200588266f5aSStefan Hajnoczi 
20065b98db0aSStefan Hajnoczi static bool bdrv_drain_one(BlockDriverState *bs)
20075b98db0aSStefan Hajnoczi {
20085b98db0aSStefan Hajnoczi     bool bs_busy;
20095b98db0aSStefan Hajnoczi 
20105b98db0aSStefan Hajnoczi     bdrv_flush_io_queue(bs);
20115b98db0aSStefan Hajnoczi     bdrv_start_throttled_reqs(bs);
20125b98db0aSStefan Hajnoczi     bs_busy = bdrv_requests_pending(bs);
20135b98db0aSStefan Hajnoczi     bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
20145b98db0aSStefan Hajnoczi     return bs_busy;
20155b98db0aSStefan Hajnoczi }
20165b98db0aSStefan Hajnoczi 
20175b98db0aSStefan Hajnoczi /*
20185b98db0aSStefan Hajnoczi  * Wait for pending requests to complete on a single BlockDriverState subtree
20195b98db0aSStefan Hajnoczi  *
20205b98db0aSStefan Hajnoczi  * See the warning in bdrv_drain_all().  This function can only be called if
20215b98db0aSStefan Hajnoczi  * you are sure nothing can generate I/O because you have op blockers
20225b98db0aSStefan Hajnoczi  * installed.
20235b98db0aSStefan Hajnoczi  *
20245b98db0aSStefan Hajnoczi  * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
20255b98db0aSStefan Hajnoczi  * AioContext.
20265b98db0aSStefan Hajnoczi  */
20275b98db0aSStefan Hajnoczi void bdrv_drain(BlockDriverState *bs)
20285b98db0aSStefan Hajnoczi {
20295b98db0aSStefan Hajnoczi     while (bdrv_drain_one(bs)) {
20305b98db0aSStefan Hajnoczi         /* Keep iterating */
20315b98db0aSStefan Hajnoczi     }
20325b98db0aSStefan Hajnoczi }
20335b98db0aSStefan Hajnoczi 
2034922453bcSStefan Hajnoczi /*
2035922453bcSStefan Hajnoczi  * Wait for pending requests to complete across all BlockDriverStates
2036922453bcSStefan Hajnoczi  *
2037922453bcSStefan Hajnoczi  * This function does not flush data to disk, use bdrv_flush_all() for that
2038922453bcSStefan Hajnoczi  * after calling this function.
20394c355d53SZhi Yong Wu  *
20404c355d53SZhi Yong Wu  * Note that completion of an asynchronous I/O operation can trigger any
20414c355d53SZhi Yong Wu  * number of other I/O operations on other devices---for example a coroutine
20424c355d53SZhi Yong Wu  * can be arbitrarily complex and a constant flow of I/O can come until the
20434c355d53SZhi Yong Wu  * coroutine is complete.  Because of this, it is not possible to have a
20444c355d53SZhi Yong Wu  * function to drain a single device's I/O queue.
2045922453bcSStefan Hajnoczi  */
2046922453bcSStefan Hajnoczi void bdrv_drain_all(void)
2047922453bcSStefan Hajnoczi {
204888266f5aSStefan Hajnoczi     /* Always run first iteration so any pending completion BHs run */
204988266f5aSStefan Hajnoczi     bool busy = true;
20504f5472cbSStefan Hajnoczi     BlockDriverState *bs = NULL;
2051922453bcSStefan Hajnoczi 
20524f5472cbSStefan Hajnoczi     while ((bs = bdrv_next(bs))) {
205369da3b0bSFam Zheng         AioContext *aio_context = bdrv_get_aio_context(bs);
205469da3b0bSFam Zheng 
205569da3b0bSFam Zheng         aio_context_acquire(aio_context);
205669da3b0bSFam Zheng         if (bs->job) {
205769da3b0bSFam Zheng             block_job_pause(bs->job);
205869da3b0bSFam Zheng         }
205969da3b0bSFam Zheng         aio_context_release(aio_context);
206069da3b0bSFam Zheng     }
206169da3b0bSFam Zheng 
206288266f5aSStefan Hajnoczi     while (busy) {
20639b536adcSStefan Hajnoczi         busy = false;
20644f5472cbSStefan Hajnoczi         bs = NULL;
2065922453bcSStefan Hajnoczi 
20664f5472cbSStefan Hajnoczi         while ((bs = bdrv_next(bs))) {
20679b536adcSStefan Hajnoczi             AioContext *aio_context = bdrv_get_aio_context(bs);
20689b536adcSStefan Hajnoczi 
20699b536adcSStefan Hajnoczi             aio_context_acquire(aio_context);
20705b98db0aSStefan Hajnoczi             busy |= bdrv_drain_one(bs);
20719b536adcSStefan Hajnoczi             aio_context_release(aio_context);
20729b536adcSStefan Hajnoczi         }
2073922453bcSStefan Hajnoczi     }
207469da3b0bSFam Zheng 
20754f5472cbSStefan Hajnoczi     bs = NULL;
20764f5472cbSStefan Hajnoczi     while ((bs = bdrv_next(bs))) {
207769da3b0bSFam Zheng         AioContext *aio_context = bdrv_get_aio_context(bs);
207869da3b0bSFam Zheng 
207969da3b0bSFam Zheng         aio_context_acquire(aio_context);
208069da3b0bSFam Zheng         if (bs->job) {
208169da3b0bSFam Zheng             block_job_resume(bs->job);
208269da3b0bSFam Zheng         }
208369da3b0bSFam Zheng         aio_context_release(aio_context);
208469da3b0bSFam Zheng     }
2085922453bcSStefan Hajnoczi }
2086922453bcSStefan Hajnoczi 
2087dc364f4cSBenoît Canet /* make a BlockDriverState anonymous by removing from bdrv_state and
2088dc364f4cSBenoît Canet  * graph_bdrv_state list.
2089d22b2f41SRyan Harper    Also, NULL terminate the device_name to prevent double remove */
2090d22b2f41SRyan Harper void bdrv_make_anon(BlockDriverState *bs)
2091d22b2f41SRyan Harper {
2092bfb197e0SMarkus Armbruster     /*
2093bfb197e0SMarkus Armbruster      * Take care to remove bs from bdrv_states only when it's actually
2094bfb197e0SMarkus Armbruster      * in it.  Note that bs->device_list.tqe_prev is initially null,
2095bfb197e0SMarkus Armbruster      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
2096bfb197e0SMarkus Armbruster      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2097bfb197e0SMarkus Armbruster      * resetting it to null on remove.
2098bfb197e0SMarkus Armbruster      */
2099bfb197e0SMarkus Armbruster     if (bs->device_list.tqe_prev) {
2100dc364f4cSBenoît Canet         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2101bfb197e0SMarkus Armbruster         bs->device_list.tqe_prev = NULL;
2102d22b2f41SRyan Harper     }
2103dc364f4cSBenoît Canet     if (bs->node_name[0] != '\0') {
2104dc364f4cSBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2105dc364f4cSBenoît Canet     }
2106dc364f4cSBenoît Canet     bs->node_name[0] = '\0';
2107d22b2f41SRyan Harper }
2108d22b2f41SRyan Harper 
2109e023b2e2SPaolo Bonzini static void bdrv_rebind(BlockDriverState *bs)
2110e023b2e2SPaolo Bonzini {
2111e023b2e2SPaolo Bonzini     if (bs->drv && bs->drv->bdrv_rebind) {
2112e023b2e2SPaolo Bonzini         bs->drv->bdrv_rebind(bs);
2113e023b2e2SPaolo Bonzini     }
2114e023b2e2SPaolo Bonzini }
2115e023b2e2SPaolo Bonzini 
21164ddc07caSPaolo Bonzini static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
21174ddc07caSPaolo Bonzini                                      BlockDriverState *bs_src)
21184ddc07caSPaolo Bonzini {
21194ddc07caSPaolo Bonzini     /* move some fields that need to stay attached to the device */
21204ddc07caSPaolo Bonzini 
21214ddc07caSPaolo Bonzini     /* dev info */
21221b7fd729SPaolo Bonzini     bs_dest->guest_block_size   = bs_src->guest_block_size;
21234ddc07caSPaolo Bonzini     bs_dest->copy_on_read       = bs_src->copy_on_read;
21244ddc07caSPaolo Bonzini 
21254ddc07caSPaolo Bonzini     bs_dest->enable_write_cache = bs_src->enable_write_cache;
21264ddc07caSPaolo Bonzini 
2127cc0681c4SBenoît Canet     /* i/o throttled req */
2128cc0681c4SBenoît Canet     memcpy(&bs_dest->throttle_state,
2129cc0681c4SBenoît Canet            &bs_src->throttle_state,
2130cc0681c4SBenoît Canet            sizeof(ThrottleState));
2131cc0681c4SBenoît Canet     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
2132cc0681c4SBenoît Canet     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
21334ddc07caSPaolo Bonzini     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
21344ddc07caSPaolo Bonzini 
21354ddc07caSPaolo Bonzini     /* r/w error */
21364ddc07caSPaolo Bonzini     bs_dest->on_read_error      = bs_src->on_read_error;
21374ddc07caSPaolo Bonzini     bs_dest->on_write_error     = bs_src->on_write_error;
21384ddc07caSPaolo Bonzini 
21394ddc07caSPaolo Bonzini     /* i/o status */
21404ddc07caSPaolo Bonzini     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
21414ddc07caSPaolo Bonzini     bs_dest->iostatus           = bs_src->iostatus;
21424ddc07caSPaolo Bonzini 
21434ddc07caSPaolo Bonzini     /* dirty bitmap */
2144e4654d2dSFam Zheng     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
21454ddc07caSPaolo Bonzini 
21469fcb0251SFam Zheng     /* reference count */
21479fcb0251SFam Zheng     bs_dest->refcnt             = bs_src->refcnt;
21489fcb0251SFam Zheng 
21494ddc07caSPaolo Bonzini     /* job */
21504ddc07caSPaolo Bonzini     bs_dest->job                = bs_src->job;
21514ddc07caSPaolo Bonzini 
21524ddc07caSPaolo Bonzini     /* keep the same entry in bdrv_states */
2153dc364f4cSBenoît Canet     bs_dest->device_list = bs_src->device_list;
21547e7d56d9SMarkus Armbruster     bs_dest->blk = bs_src->blk;
21557e7d56d9SMarkus Armbruster 
2156fbe40ff7SFam Zheng     memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2157fbe40ff7SFam Zheng            sizeof(bs_dest->op_blockers));
21584ddc07caSPaolo Bonzini }
21594ddc07caSPaolo Bonzini 
21604ddc07caSPaolo Bonzini /*
21614ddc07caSPaolo Bonzini  * Swap bs contents for two image chains while they are live,
21624ddc07caSPaolo Bonzini  * while keeping required fields on the BlockDriverState that is
21634ddc07caSPaolo Bonzini  * actually attached to a device.
21644ddc07caSPaolo Bonzini  *
21654ddc07caSPaolo Bonzini  * This will modify the BlockDriverState fields, and swap contents
21664ddc07caSPaolo Bonzini  * between bs_new and bs_old. Both bs_new and bs_old are modified.
21674ddc07caSPaolo Bonzini  *
2168bfb197e0SMarkus Armbruster  * bs_new must not be attached to a BlockBackend.
21694ddc07caSPaolo Bonzini  *
21704ddc07caSPaolo Bonzini  * This function does not create any image files.
21714ddc07caSPaolo Bonzini  */
21724ddc07caSPaolo Bonzini void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
21734ddc07caSPaolo Bonzini {
21744ddc07caSPaolo Bonzini     BlockDriverState tmp;
21754ddc07caSPaolo Bonzini 
217690ce8a06SBenoît Canet     /* The code needs to swap the node_name but simply swapping node_list won't
217790ce8a06SBenoît Canet      * work so first remove the nodes from the graph list, do the swap then
217890ce8a06SBenoît Canet      * insert them back if needed.
217990ce8a06SBenoît Canet      */
218090ce8a06SBenoît Canet     if (bs_new->node_name[0] != '\0') {
218190ce8a06SBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
218290ce8a06SBenoît Canet     }
218390ce8a06SBenoît Canet     if (bs_old->node_name[0] != '\0') {
218490ce8a06SBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
218590ce8a06SBenoît Canet     }
218690ce8a06SBenoît Canet 
2187bfb197e0SMarkus Armbruster     /* bs_new must be unattached and shouldn't have anything fancy enabled */
21887e7d56d9SMarkus Armbruster     assert(!bs_new->blk);
2189e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
21904ddc07caSPaolo Bonzini     assert(bs_new->job == NULL);
21914ddc07caSPaolo Bonzini     assert(bs_new->io_limits_enabled == false);
2192cc0681c4SBenoît Canet     assert(!throttle_have_timer(&bs_new->throttle_state));
21934ddc07caSPaolo Bonzini 
21944ddc07caSPaolo Bonzini     tmp = *bs_new;
21954ddc07caSPaolo Bonzini     *bs_new = *bs_old;
21964ddc07caSPaolo Bonzini     *bs_old = tmp;
21974ddc07caSPaolo Bonzini 
21984ddc07caSPaolo Bonzini     /* there are some fields that should not be swapped, move them back */
21994ddc07caSPaolo Bonzini     bdrv_move_feature_fields(&tmp, bs_old);
22004ddc07caSPaolo Bonzini     bdrv_move_feature_fields(bs_old, bs_new);
22014ddc07caSPaolo Bonzini     bdrv_move_feature_fields(bs_new, &tmp);
22024ddc07caSPaolo Bonzini 
2203bfb197e0SMarkus Armbruster     /* bs_new must remain unattached */
22047e7d56d9SMarkus Armbruster     assert(!bs_new->blk);
22054ddc07caSPaolo Bonzini 
22064ddc07caSPaolo Bonzini     /* Check a few fields that should remain attached to the device */
22074ddc07caSPaolo Bonzini     assert(bs_new->job == NULL);
22084ddc07caSPaolo Bonzini     assert(bs_new->io_limits_enabled == false);
2209cc0681c4SBenoît Canet     assert(!throttle_have_timer(&bs_new->throttle_state));
22104ddc07caSPaolo Bonzini 
221190ce8a06SBenoît Canet     /* insert the nodes back into the graph node list if needed */
221290ce8a06SBenoît Canet     if (bs_new->node_name[0] != '\0') {
221390ce8a06SBenoît Canet         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
221490ce8a06SBenoît Canet     }
221590ce8a06SBenoît Canet     if (bs_old->node_name[0] != '\0') {
221690ce8a06SBenoît Canet         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
221790ce8a06SBenoît Canet     }
221890ce8a06SBenoît Canet 
22194ddc07caSPaolo Bonzini     bdrv_rebind(bs_new);
22204ddc07caSPaolo Bonzini     bdrv_rebind(bs_old);
22214ddc07caSPaolo Bonzini }
22224ddc07caSPaolo Bonzini 
22238802d1fdSJeff Cody /*
22248802d1fdSJeff Cody  * Add new bs contents at the top of an image chain while the chain is
22258802d1fdSJeff Cody  * live, while keeping required fields on the top layer.
22268802d1fdSJeff Cody  *
22278802d1fdSJeff Cody  * This will modify the BlockDriverState fields, and swap contents
22288802d1fdSJeff Cody  * between bs_new and bs_top. Both bs_new and bs_top are modified.
22298802d1fdSJeff Cody  *
2230bfb197e0SMarkus Armbruster  * bs_new must not be attached to a BlockBackend.
2231f6801b83SJeff Cody  *
22328802d1fdSJeff Cody  * This function does not create any image files.
22338802d1fdSJeff Cody  */
22348802d1fdSJeff Cody void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
22358802d1fdSJeff Cody {
22364ddc07caSPaolo Bonzini     bdrv_swap(bs_new, bs_top);
22378802d1fdSJeff Cody 
22388802d1fdSJeff Cody     /* The contents of 'tmp' will become bs_top, as we are
22398802d1fdSJeff Cody      * swapping bs_new and bs_top contents. */
22408d24cce1SFam Zheng     bdrv_set_backing_hd(bs_top, bs_new);
22418802d1fdSJeff Cody }
22428802d1fdSJeff Cody 
22434f6fd349SFam Zheng static void bdrv_delete(BlockDriverState *bs)
2244b338082bSbellard {
22453e914655SPaolo Bonzini     assert(!bs->job);
22463718d8abSFam Zheng     assert(bdrv_op_blocker_is_empty(bs));
22474f6fd349SFam Zheng     assert(!bs->refcnt);
2248e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
224918846deeSMarkus Armbruster 
2250e1b5c52eSStefan Hajnoczi     bdrv_close(bs);
2251e1b5c52eSStefan Hajnoczi 
22521b7bdbc1SStefan Hajnoczi     /* remove from list, if necessary */
2253d22b2f41SRyan Harper     bdrv_make_anon(bs);
225434c6f050Saurel32 
22557267c094SAnthony Liguori     g_free(bs);
2256fc01f7e7Sbellard }
2257fc01f7e7Sbellard 
2258e97fc193Saliguori /*
2259e97fc193Saliguori  * Run consistency checks on an image
2260e97fc193Saliguori  *
2261e076f338SKevin Wolf  * Returns 0 if the check could be completed (it doesn't mean that the image is
2262a1c7273bSStefan Weil  * free of errors) or -errno when an internal error occurred. The results of the
2263e076f338SKevin Wolf  * check are stored in res.
2264e97fc193Saliguori  */
22654534ff54SKevin Wolf int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2266e97fc193Saliguori {
2267908bcd54SMax Reitz     if (bs->drv == NULL) {
2268908bcd54SMax Reitz         return -ENOMEDIUM;
2269908bcd54SMax Reitz     }
2270e97fc193Saliguori     if (bs->drv->bdrv_check == NULL) {
2271e97fc193Saliguori         return -ENOTSUP;
2272e97fc193Saliguori     }
2273e97fc193Saliguori 
2274e076f338SKevin Wolf     memset(res, 0, sizeof(*res));
22754534ff54SKevin Wolf     return bs->drv->bdrv_check(bs, res, fix);
2276e97fc193Saliguori }
2277e97fc193Saliguori 
22788a426614SKevin Wolf #define COMMIT_BUF_SECTORS 2048
22798a426614SKevin Wolf 
228033e3963eSbellard /* commit COW file into the raw image */
228133e3963eSbellard int bdrv_commit(BlockDriverState *bs)
228233e3963eSbellard {
228319cb3738Sbellard     BlockDriver *drv = bs->drv;
228472706ea4SJeff Cody     int64_t sector, total_sectors, length, backing_length;
22858a426614SKevin Wolf     int n, ro, open_flags;
22860bce597dSJeff Cody     int ret = 0;
228772706ea4SJeff Cody     uint8_t *buf = NULL;
228833e3963eSbellard 
228919cb3738Sbellard     if (!drv)
229019cb3738Sbellard         return -ENOMEDIUM;
229133e3963eSbellard 
22924dca4b63SNaphtali Sprei     if (!bs->backing_hd) {
22934dca4b63SNaphtali Sprei         return -ENOTSUP;
22944dca4b63SNaphtali Sprei     }
22954dca4b63SNaphtali Sprei 
2296bb00021dSFam Zheng     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2297bb00021dSFam Zheng         bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
22982d3735d3SStefan Hajnoczi         return -EBUSY;
22992d3735d3SStefan Hajnoczi     }
23002d3735d3SStefan Hajnoczi 
23014dca4b63SNaphtali Sprei     ro = bs->backing_hd->read_only;
23024dca4b63SNaphtali Sprei     open_flags =  bs->backing_hd->open_flags;
23034dca4b63SNaphtali Sprei 
23044dca4b63SNaphtali Sprei     if (ro) {
23050bce597dSJeff Cody         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
23060bce597dSJeff Cody             return -EACCES;
23074dca4b63SNaphtali Sprei         }
2308ea2384d3Sbellard     }
2309ea2384d3Sbellard 
231072706ea4SJeff Cody     length = bdrv_getlength(bs);
231172706ea4SJeff Cody     if (length < 0) {
231272706ea4SJeff Cody         ret = length;
231372706ea4SJeff Cody         goto ro_cleanup;
231472706ea4SJeff Cody     }
231572706ea4SJeff Cody 
231672706ea4SJeff Cody     backing_length = bdrv_getlength(bs->backing_hd);
231772706ea4SJeff Cody     if (backing_length < 0) {
231872706ea4SJeff Cody         ret = backing_length;
231972706ea4SJeff Cody         goto ro_cleanup;
232072706ea4SJeff Cody     }
232172706ea4SJeff Cody 
232272706ea4SJeff Cody     /* If our top snapshot is larger than the backing file image,
232372706ea4SJeff Cody      * grow the backing file image if possible.  If not possible,
232472706ea4SJeff Cody      * we must return an error */
232572706ea4SJeff Cody     if (length > backing_length) {
232672706ea4SJeff Cody         ret = bdrv_truncate(bs->backing_hd, length);
232772706ea4SJeff Cody         if (ret < 0) {
232872706ea4SJeff Cody             goto ro_cleanup;
232972706ea4SJeff Cody         }
233072706ea4SJeff Cody     }
233172706ea4SJeff Cody 
233272706ea4SJeff Cody     total_sectors = length >> BDRV_SECTOR_BITS;
2333857d4f46SKevin Wolf 
2334857d4f46SKevin Wolf     /* qemu_try_blockalign() for bs will choose an alignment that works for
2335857d4f46SKevin Wolf      * bs->backing_hd as well, so no need to compare the alignment manually. */
2336857d4f46SKevin Wolf     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2337857d4f46SKevin Wolf     if (buf == NULL) {
2338857d4f46SKevin Wolf         ret = -ENOMEM;
2339857d4f46SKevin Wolf         goto ro_cleanup;
2340857d4f46SKevin Wolf     }
23418a426614SKevin Wolf 
23428a426614SKevin Wolf     for (sector = 0; sector < total_sectors; sector += n) {
2343d663640cSPaolo Bonzini         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2344d663640cSPaolo Bonzini         if (ret < 0) {
2345d663640cSPaolo Bonzini             goto ro_cleanup;
2346d663640cSPaolo Bonzini         }
2347d663640cSPaolo Bonzini         if (ret) {
2348dabfa6ccSKevin Wolf             ret = bdrv_read(bs, sector, buf, n);
2349dabfa6ccSKevin Wolf             if (ret < 0) {
23504dca4b63SNaphtali Sprei                 goto ro_cleanup;
235133e3963eSbellard             }
235233e3963eSbellard 
2353dabfa6ccSKevin Wolf             ret = bdrv_write(bs->backing_hd, sector, buf, n);
2354dabfa6ccSKevin Wolf             if (ret < 0) {
23554dca4b63SNaphtali Sprei                 goto ro_cleanup;
235633e3963eSbellard             }
235733e3963eSbellard         }
235833e3963eSbellard     }
235995389c86Sbellard 
23601d44952fSChristoph Hellwig     if (drv->bdrv_make_empty) {
23611d44952fSChristoph Hellwig         ret = drv->bdrv_make_empty(bs);
2362dabfa6ccSKevin Wolf         if (ret < 0) {
2363dabfa6ccSKevin Wolf             goto ro_cleanup;
2364dabfa6ccSKevin Wolf         }
23651d44952fSChristoph Hellwig         bdrv_flush(bs);
23661d44952fSChristoph Hellwig     }
236795389c86Sbellard 
23683f5075aeSChristoph Hellwig     /*
23693f5075aeSChristoph Hellwig      * Make sure all data we wrote to the backing device is actually
23703f5075aeSChristoph Hellwig      * stable on disk.
23713f5075aeSChristoph Hellwig      */
2372dabfa6ccSKevin Wolf     if (bs->backing_hd) {
23733f5075aeSChristoph Hellwig         bdrv_flush(bs->backing_hd);
2374dabfa6ccSKevin Wolf     }
23754dca4b63SNaphtali Sprei 
2376dabfa6ccSKevin Wolf     ret = 0;
23774dca4b63SNaphtali Sprei ro_cleanup:
2378857d4f46SKevin Wolf     qemu_vfree(buf);
23794dca4b63SNaphtali Sprei 
23804dca4b63SNaphtali Sprei     if (ro) {
23810bce597dSJeff Cody         /* ignoring error return here */
23820bce597dSJeff Cody         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
23834dca4b63SNaphtali Sprei     }
23844dca4b63SNaphtali Sprei 
23851d44952fSChristoph Hellwig     return ret;
238633e3963eSbellard }
238733e3963eSbellard 
2388e8877497SStefan Hajnoczi int bdrv_commit_all(void)
23896ab4b5abSMarkus Armbruster {
23906ab4b5abSMarkus Armbruster     BlockDriverState *bs;
23916ab4b5abSMarkus Armbruster 
2392dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2393ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
2394ed78cda3SStefan Hajnoczi 
2395ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
2396272d2d8eSJeff Cody         if (bs->drv && bs->backing_hd) {
2397e8877497SStefan Hajnoczi             int ret = bdrv_commit(bs);
2398e8877497SStefan Hajnoczi             if (ret < 0) {
2399ed78cda3SStefan Hajnoczi                 aio_context_release(aio_context);
2400e8877497SStefan Hajnoczi                 return ret;
24016ab4b5abSMarkus Armbruster             }
24026ab4b5abSMarkus Armbruster         }
2403ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
2404272d2d8eSJeff Cody     }
2405e8877497SStefan Hajnoczi     return 0;
2406e8877497SStefan Hajnoczi }
24076ab4b5abSMarkus Armbruster 
2408dbffbdcfSStefan Hajnoczi /**
2409dbffbdcfSStefan Hajnoczi  * Remove an active request from the tracked requests list
2410dbffbdcfSStefan Hajnoczi  *
2411dbffbdcfSStefan Hajnoczi  * This function should be called when a tracked request is completing.
2412dbffbdcfSStefan Hajnoczi  */
2413dbffbdcfSStefan Hajnoczi static void tracked_request_end(BdrvTrackedRequest *req)
2414dbffbdcfSStefan Hajnoczi {
24152dbafdc0SKevin Wolf     if (req->serialising) {
24162dbafdc0SKevin Wolf         req->bs->serialising_in_flight--;
24172dbafdc0SKevin Wolf     }
24182dbafdc0SKevin Wolf 
2419dbffbdcfSStefan Hajnoczi     QLIST_REMOVE(req, list);
2420f4658285SStefan Hajnoczi     qemu_co_queue_restart_all(&req->wait_queue);
2421dbffbdcfSStefan Hajnoczi }
2422dbffbdcfSStefan Hajnoczi 
2423dbffbdcfSStefan Hajnoczi /**
2424dbffbdcfSStefan Hajnoczi  * Add an active request to the tracked requests list
2425dbffbdcfSStefan Hajnoczi  */
2426dbffbdcfSStefan Hajnoczi static void tracked_request_begin(BdrvTrackedRequest *req,
2427dbffbdcfSStefan Hajnoczi                                   BlockDriverState *bs,
2428793ed47aSKevin Wolf                                   int64_t offset,
2429793ed47aSKevin Wolf                                   unsigned int bytes, bool is_write)
2430dbffbdcfSStefan Hajnoczi {
2431dbffbdcfSStefan Hajnoczi     *req = (BdrvTrackedRequest){
2432dbffbdcfSStefan Hajnoczi         .bs = bs,
2433793ed47aSKevin Wolf         .offset         = offset,
2434793ed47aSKevin Wolf         .bytes          = bytes,
2435dbffbdcfSStefan Hajnoczi         .is_write       = is_write,
24365f8b6491SStefan Hajnoczi         .co             = qemu_coroutine_self(),
24372dbafdc0SKevin Wolf         .serialising    = false,
24387327145fSKevin Wolf         .overlap_offset = offset,
24397327145fSKevin Wolf         .overlap_bytes  = bytes,
2440dbffbdcfSStefan Hajnoczi     };
2441dbffbdcfSStefan Hajnoczi 
2442f4658285SStefan Hajnoczi     qemu_co_queue_init(&req->wait_queue);
2443f4658285SStefan Hajnoczi 
2444dbffbdcfSStefan Hajnoczi     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2445dbffbdcfSStefan Hajnoczi }
2446dbffbdcfSStefan Hajnoczi 
2447e96126ffSKevin Wolf static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
24482dbafdc0SKevin Wolf {
24497327145fSKevin Wolf     int64_t overlap_offset = req->offset & ~(align - 1);
2450e96126ffSKevin Wolf     unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
24517327145fSKevin Wolf                                - overlap_offset;
24527327145fSKevin Wolf 
24532dbafdc0SKevin Wolf     if (!req->serialising) {
24542dbafdc0SKevin Wolf         req->bs->serialising_in_flight++;
24552dbafdc0SKevin Wolf         req->serialising = true;
24562dbafdc0SKevin Wolf     }
24577327145fSKevin Wolf 
24587327145fSKevin Wolf     req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
24597327145fSKevin Wolf     req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
24602dbafdc0SKevin Wolf }
24612dbafdc0SKevin Wolf 
2462d83947acSStefan Hajnoczi /**
2463d83947acSStefan Hajnoczi  * Round a region to cluster boundaries
2464d83947acSStefan Hajnoczi  */
2465343bded4SPaolo Bonzini void bdrv_round_to_clusters(BlockDriverState *bs,
2466d83947acSStefan Hajnoczi                             int64_t sector_num, int nb_sectors,
2467d83947acSStefan Hajnoczi                             int64_t *cluster_sector_num,
2468d83947acSStefan Hajnoczi                             int *cluster_nb_sectors)
2469d83947acSStefan Hajnoczi {
2470d83947acSStefan Hajnoczi     BlockDriverInfo bdi;
2471d83947acSStefan Hajnoczi 
2472d83947acSStefan Hajnoczi     if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2473d83947acSStefan Hajnoczi         *cluster_sector_num = sector_num;
2474d83947acSStefan Hajnoczi         *cluster_nb_sectors = nb_sectors;
2475d83947acSStefan Hajnoczi     } else {
2476d83947acSStefan Hajnoczi         int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2477d83947acSStefan Hajnoczi         *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2478d83947acSStefan Hajnoczi         *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2479d83947acSStefan Hajnoczi                                             nb_sectors, c);
2480d83947acSStefan Hajnoczi     }
2481d83947acSStefan Hajnoczi }
2482d83947acSStefan Hajnoczi 
24837327145fSKevin Wolf static int bdrv_get_cluster_size(BlockDriverState *bs)
2484793ed47aSKevin Wolf {
2485793ed47aSKevin Wolf     BlockDriverInfo bdi;
24867327145fSKevin Wolf     int ret;
2487793ed47aSKevin Wolf 
24887327145fSKevin Wolf     ret = bdrv_get_info(bs, &bdi);
24897327145fSKevin Wolf     if (ret < 0 || bdi.cluster_size == 0) {
24907327145fSKevin Wolf         return bs->request_alignment;
2491793ed47aSKevin Wolf     } else {
24927327145fSKevin Wolf         return bdi.cluster_size;
2493793ed47aSKevin Wolf     }
2494793ed47aSKevin Wolf }
2495793ed47aSKevin Wolf 
2496f4658285SStefan Hajnoczi static bool tracked_request_overlaps(BdrvTrackedRequest *req,
2497793ed47aSKevin Wolf                                      int64_t offset, unsigned int bytes)
2498793ed47aSKevin Wolf {
2499d83947acSStefan Hajnoczi     /*        aaaa   bbbb */
25007327145fSKevin Wolf     if (offset >= req->overlap_offset + req->overlap_bytes) {
2501d83947acSStefan Hajnoczi         return false;
2502d83947acSStefan Hajnoczi     }
2503d83947acSStefan Hajnoczi     /* bbbb   aaaa        */
25047327145fSKevin Wolf     if (req->overlap_offset >= offset + bytes) {
2505d83947acSStefan Hajnoczi         return false;
2506d83947acSStefan Hajnoczi     }
2507d83947acSStefan Hajnoczi     return true;
2508f4658285SStefan Hajnoczi }
2509f4658285SStefan Hajnoczi 
251028de2dcdSKevin Wolf static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
2511f4658285SStefan Hajnoczi {
25122dbafdc0SKevin Wolf     BlockDriverState *bs = self->bs;
2513f4658285SStefan Hajnoczi     BdrvTrackedRequest *req;
2514f4658285SStefan Hajnoczi     bool retry;
251528de2dcdSKevin Wolf     bool waited = false;
2516f4658285SStefan Hajnoczi 
25172dbafdc0SKevin Wolf     if (!bs->serialising_in_flight) {
251828de2dcdSKevin Wolf         return false;
25192dbafdc0SKevin Wolf     }
25202dbafdc0SKevin Wolf 
2521f4658285SStefan Hajnoczi     do {
2522f4658285SStefan Hajnoczi         retry = false;
2523f4658285SStefan Hajnoczi         QLIST_FOREACH(req, &bs->tracked_requests, list) {
25242dbafdc0SKevin Wolf             if (req == self || (!req->serialising && !self->serialising)) {
252565afd211SKevin Wolf                 continue;
252665afd211SKevin Wolf             }
25277327145fSKevin Wolf             if (tracked_request_overlaps(req, self->overlap_offset,
25287327145fSKevin Wolf                                          self->overlap_bytes))
25297327145fSKevin Wolf             {
25305f8b6491SStefan Hajnoczi                 /* Hitting this means there was a reentrant request, for
25315f8b6491SStefan Hajnoczi                  * example, a block driver issuing nested requests.  This must
25325f8b6491SStefan Hajnoczi                  * never happen since it means deadlock.
25335f8b6491SStefan Hajnoczi                  */
25345f8b6491SStefan Hajnoczi                 assert(qemu_coroutine_self() != req->co);
25355f8b6491SStefan Hajnoczi 
25366460440fSKevin Wolf                 /* If the request is already (indirectly) waiting for us, or
25376460440fSKevin Wolf                  * will wait for us as soon as it wakes up, then just go on
25386460440fSKevin Wolf                  * (instead of producing a deadlock in the former case). */
25396460440fSKevin Wolf                 if (!req->waiting_for) {
25406460440fSKevin Wolf                     self->waiting_for = req;
2541f4658285SStefan Hajnoczi                     qemu_co_queue_wait(&req->wait_queue);
25426460440fSKevin Wolf                     self->waiting_for = NULL;
2543f4658285SStefan Hajnoczi                     retry = true;
254428de2dcdSKevin Wolf                     waited = true;
2545f4658285SStefan Hajnoczi                     break;
2546f4658285SStefan Hajnoczi                 }
2547f4658285SStefan Hajnoczi             }
25486460440fSKevin Wolf         }
2549f4658285SStefan Hajnoczi     } while (retry);
255028de2dcdSKevin Wolf 
255128de2dcdSKevin Wolf     return waited;
2552f4658285SStefan Hajnoczi }
2553f4658285SStefan Hajnoczi 
2554756e6736SKevin Wolf /*
2555756e6736SKevin Wolf  * Return values:
2556756e6736SKevin Wolf  * 0        - success
2557756e6736SKevin Wolf  * -EINVAL  - backing format specified, but no file
2558756e6736SKevin Wolf  * -ENOSPC  - can't update the backing file because no space is left in the
2559756e6736SKevin Wolf  *            image file header
2560756e6736SKevin Wolf  * -ENOTSUP - format driver doesn't support changing the backing file
2561756e6736SKevin Wolf  */
2562756e6736SKevin Wolf int bdrv_change_backing_file(BlockDriverState *bs,
2563756e6736SKevin Wolf     const char *backing_file, const char *backing_fmt)
2564756e6736SKevin Wolf {
2565756e6736SKevin Wolf     BlockDriver *drv = bs->drv;
2566469ef350SPaolo Bonzini     int ret;
2567756e6736SKevin Wolf 
25685f377794SPaolo Bonzini     /* Backing file format doesn't make sense without a backing file */
25695f377794SPaolo Bonzini     if (backing_fmt && !backing_file) {
25705f377794SPaolo Bonzini         return -EINVAL;
25715f377794SPaolo Bonzini     }
25725f377794SPaolo Bonzini 
2573756e6736SKevin Wolf     if (drv->bdrv_change_backing_file != NULL) {
2574469ef350SPaolo Bonzini         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2575756e6736SKevin Wolf     } else {
2576469ef350SPaolo Bonzini         ret = -ENOTSUP;
2577756e6736SKevin Wolf     }
2578469ef350SPaolo Bonzini 
2579469ef350SPaolo Bonzini     if (ret == 0) {
2580469ef350SPaolo Bonzini         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2581469ef350SPaolo Bonzini         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2582469ef350SPaolo Bonzini     }
2583469ef350SPaolo Bonzini     return ret;
2584756e6736SKevin Wolf }
2585756e6736SKevin Wolf 
25866ebdcee2SJeff Cody /*
25876ebdcee2SJeff Cody  * Finds the image layer in the chain that has 'bs' as its backing file.
25886ebdcee2SJeff Cody  *
25896ebdcee2SJeff Cody  * active is the current topmost image.
25906ebdcee2SJeff Cody  *
25916ebdcee2SJeff Cody  * Returns NULL if bs is not found in active's image chain,
25926ebdcee2SJeff Cody  * or if active == bs.
25934caf0fcdSJeff Cody  *
25944caf0fcdSJeff Cody  * Returns the bottommost base image if bs == NULL.
25956ebdcee2SJeff Cody  */
25966ebdcee2SJeff Cody BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
25976ebdcee2SJeff Cody                                     BlockDriverState *bs)
25986ebdcee2SJeff Cody {
25994caf0fcdSJeff Cody     while (active && bs != active->backing_hd) {
26004caf0fcdSJeff Cody         active = active->backing_hd;
26016ebdcee2SJeff Cody     }
26026ebdcee2SJeff Cody 
26034caf0fcdSJeff Cody     return active;
26046ebdcee2SJeff Cody }
26056ebdcee2SJeff Cody 
26064caf0fcdSJeff Cody /* Given a BDS, searches for the base layer. */
26074caf0fcdSJeff Cody BlockDriverState *bdrv_find_base(BlockDriverState *bs)
26084caf0fcdSJeff Cody {
26094caf0fcdSJeff Cody     return bdrv_find_overlay(bs, NULL);
26106ebdcee2SJeff Cody }
26116ebdcee2SJeff Cody 
26126ebdcee2SJeff Cody typedef struct BlkIntermediateStates {
26136ebdcee2SJeff Cody     BlockDriverState *bs;
26146ebdcee2SJeff Cody     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
26156ebdcee2SJeff Cody } BlkIntermediateStates;
26166ebdcee2SJeff Cody 
26176ebdcee2SJeff Cody 
26186ebdcee2SJeff Cody /*
26196ebdcee2SJeff Cody  * Drops images above 'base' up to and including 'top', and sets the image
26206ebdcee2SJeff Cody  * above 'top' to have base as its backing file.
26216ebdcee2SJeff Cody  *
26226ebdcee2SJeff Cody  * Requires that the overlay to 'top' is opened r/w, so that the backing file
26236ebdcee2SJeff Cody  * information in 'bs' can be properly updated.
26246ebdcee2SJeff Cody  *
26256ebdcee2SJeff Cody  * E.g., this will convert the following chain:
26266ebdcee2SJeff Cody  * bottom <- base <- intermediate <- top <- active
26276ebdcee2SJeff Cody  *
26286ebdcee2SJeff Cody  * to
26296ebdcee2SJeff Cody  *
26306ebdcee2SJeff Cody  * bottom <- base <- active
26316ebdcee2SJeff Cody  *
26326ebdcee2SJeff Cody  * It is allowed for bottom==base, in which case it converts:
26336ebdcee2SJeff Cody  *
26346ebdcee2SJeff Cody  * base <- intermediate <- top <- active
26356ebdcee2SJeff Cody  *
26366ebdcee2SJeff Cody  * to
26376ebdcee2SJeff Cody  *
26386ebdcee2SJeff Cody  * base <- active
26396ebdcee2SJeff Cody  *
264054e26900SJeff Cody  * If backing_file_str is non-NULL, it will be used when modifying top's
264154e26900SJeff Cody  * overlay image metadata.
264254e26900SJeff Cody  *
26436ebdcee2SJeff Cody  * Error conditions:
26446ebdcee2SJeff Cody  *  if active == top, that is considered an error
26456ebdcee2SJeff Cody  *
26466ebdcee2SJeff Cody  */
26476ebdcee2SJeff Cody int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
264854e26900SJeff Cody                            BlockDriverState *base, const char *backing_file_str)
26496ebdcee2SJeff Cody {
26506ebdcee2SJeff Cody     BlockDriverState *intermediate;
26516ebdcee2SJeff Cody     BlockDriverState *base_bs = NULL;
26526ebdcee2SJeff Cody     BlockDriverState *new_top_bs = NULL;
26536ebdcee2SJeff Cody     BlkIntermediateStates *intermediate_state, *next;
26546ebdcee2SJeff Cody     int ret = -EIO;
26556ebdcee2SJeff Cody 
26566ebdcee2SJeff Cody     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
26576ebdcee2SJeff Cody     QSIMPLEQ_INIT(&states_to_delete);
26586ebdcee2SJeff Cody 
26596ebdcee2SJeff Cody     if (!top->drv || !base->drv) {
26606ebdcee2SJeff Cody         goto exit;
26616ebdcee2SJeff Cody     }
26626ebdcee2SJeff Cody 
26636ebdcee2SJeff Cody     new_top_bs = bdrv_find_overlay(active, top);
26646ebdcee2SJeff Cody 
26656ebdcee2SJeff Cody     if (new_top_bs == NULL) {
26666ebdcee2SJeff Cody         /* we could not find the image above 'top', this is an error */
26676ebdcee2SJeff Cody         goto exit;
26686ebdcee2SJeff Cody     }
26696ebdcee2SJeff Cody 
26706ebdcee2SJeff Cody     /* special case of new_top_bs->backing_hd already pointing to base - nothing
26716ebdcee2SJeff Cody      * to do, no intermediate images */
26726ebdcee2SJeff Cody     if (new_top_bs->backing_hd == base) {
26736ebdcee2SJeff Cody         ret = 0;
26746ebdcee2SJeff Cody         goto exit;
26756ebdcee2SJeff Cody     }
26766ebdcee2SJeff Cody 
26776ebdcee2SJeff Cody     intermediate = top;
26786ebdcee2SJeff Cody 
26796ebdcee2SJeff Cody     /* now we will go down through the list, and add each BDS we find
26806ebdcee2SJeff Cody      * into our deletion queue, until we hit the 'base'
26816ebdcee2SJeff Cody      */
26826ebdcee2SJeff Cody     while (intermediate) {
26835839e53bSMarkus Armbruster         intermediate_state = g_new0(BlkIntermediateStates, 1);
26846ebdcee2SJeff Cody         intermediate_state->bs = intermediate;
26856ebdcee2SJeff Cody         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
26866ebdcee2SJeff Cody 
26876ebdcee2SJeff Cody         if (intermediate->backing_hd == base) {
26886ebdcee2SJeff Cody             base_bs = intermediate->backing_hd;
26896ebdcee2SJeff Cody             break;
26906ebdcee2SJeff Cody         }
26916ebdcee2SJeff Cody         intermediate = intermediate->backing_hd;
26926ebdcee2SJeff Cody     }
26936ebdcee2SJeff Cody     if (base_bs == NULL) {
26946ebdcee2SJeff Cody         /* something went wrong, we did not end at the base. safely
26956ebdcee2SJeff Cody          * unravel everything, and exit with error */
26966ebdcee2SJeff Cody         goto exit;
26976ebdcee2SJeff Cody     }
26986ebdcee2SJeff Cody 
26996ebdcee2SJeff Cody     /* success - we can delete the intermediate states, and link top->base */
270054e26900SJeff Cody     backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
270154e26900SJeff Cody     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
27026ebdcee2SJeff Cody                                    base_bs->drv ? base_bs->drv->format_name : "");
27036ebdcee2SJeff Cody     if (ret) {
27046ebdcee2SJeff Cody         goto exit;
27056ebdcee2SJeff Cody     }
2706920beae1SFam Zheng     bdrv_set_backing_hd(new_top_bs, base_bs);
27076ebdcee2SJeff Cody 
27086ebdcee2SJeff Cody     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
27096ebdcee2SJeff Cody         /* so that bdrv_close() does not recursively close the chain */
2710920beae1SFam Zheng         bdrv_set_backing_hd(intermediate_state->bs, NULL);
27114f6fd349SFam Zheng         bdrv_unref(intermediate_state->bs);
27126ebdcee2SJeff Cody     }
27136ebdcee2SJeff Cody     ret = 0;
27146ebdcee2SJeff Cody 
27156ebdcee2SJeff Cody exit:
27166ebdcee2SJeff Cody     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
27176ebdcee2SJeff Cody         g_free(intermediate_state);
27186ebdcee2SJeff Cody     }
27196ebdcee2SJeff Cody     return ret;
27206ebdcee2SJeff Cody }
27216ebdcee2SJeff Cody 
27226ebdcee2SJeff Cody 
272371d0770cSaliguori static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
272471d0770cSaliguori                                    size_t size)
272571d0770cSaliguori {
272675af1f34SPeter Lieven     if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
27271dd3a447SKevin Wolf         return -EIO;
27281dd3a447SKevin Wolf     }
27291dd3a447SKevin Wolf 
2730c0191e76SMax Reitz     if (!bdrv_is_inserted(bs)) {
273171d0770cSaliguori         return -ENOMEDIUM;
2732c0191e76SMax Reitz     }
273371d0770cSaliguori 
2734c0191e76SMax Reitz     if (offset < 0) {
2735fbb7b4e0SKevin Wolf         return -EIO;
2736c0191e76SMax Reitz     }
273771d0770cSaliguori 
273871d0770cSaliguori     return 0;
273971d0770cSaliguori }
274071d0770cSaliguori 
274171d0770cSaliguori static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
274271d0770cSaliguori                               int nb_sectors)
274371d0770cSaliguori {
274475af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
27458f4754edSKevin Wolf         return -EIO;
27468f4754edSKevin Wolf     }
27478f4754edSKevin Wolf 
2748eb5a3165SJes Sorensen     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2749eb5a3165SJes Sorensen                                    nb_sectors * BDRV_SECTOR_SIZE);
275071d0770cSaliguori }
275171d0770cSaliguori 
27521c9805a3SStefan Hajnoczi typedef struct RwCo {
27531c9805a3SStefan Hajnoczi     BlockDriverState *bs;
2754775aa8b6SKevin Wolf     int64_t offset;
27551c9805a3SStefan Hajnoczi     QEMUIOVector *qiov;
27561c9805a3SStefan Hajnoczi     bool is_write;
27571c9805a3SStefan Hajnoczi     int ret;
27584105eaaaSPeter Lieven     BdrvRequestFlags flags;
27591c9805a3SStefan Hajnoczi } RwCo;
27601c9805a3SStefan Hajnoczi 
27611c9805a3SStefan Hajnoczi static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2762fc01f7e7Sbellard {
27631c9805a3SStefan Hajnoczi     RwCo *rwco = opaque;
2764fc01f7e7Sbellard 
27651c9805a3SStefan Hajnoczi     if (!rwco->is_write) {
2766775aa8b6SKevin Wolf         rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2767775aa8b6SKevin Wolf                                       rwco->qiov->size, rwco->qiov,
27684105eaaaSPeter Lieven                                       rwco->flags);
27691c9805a3SStefan Hajnoczi     } else {
2770775aa8b6SKevin Wolf         rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2771775aa8b6SKevin Wolf                                        rwco->qiov->size, rwco->qiov,
27724105eaaaSPeter Lieven                                        rwco->flags);
27731c9805a3SStefan Hajnoczi     }
27741c9805a3SStefan Hajnoczi }
2775e7a8a783SKevin Wolf 
27761c9805a3SStefan Hajnoczi /*
27778d3b1a2dSKevin Wolf  * Process a vectored synchronous request using coroutines
27781c9805a3SStefan Hajnoczi  */
2779775aa8b6SKevin Wolf static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
27804105eaaaSPeter Lieven                         QEMUIOVector *qiov, bool is_write,
27814105eaaaSPeter Lieven                         BdrvRequestFlags flags)
27821c9805a3SStefan Hajnoczi {
27831c9805a3SStefan Hajnoczi     Coroutine *co;
27841c9805a3SStefan Hajnoczi     RwCo rwco = {
27851c9805a3SStefan Hajnoczi         .bs = bs,
2786775aa8b6SKevin Wolf         .offset = offset,
27878d3b1a2dSKevin Wolf         .qiov = qiov,
27881c9805a3SStefan Hajnoczi         .is_write = is_write,
27891c9805a3SStefan Hajnoczi         .ret = NOT_DONE,
27904105eaaaSPeter Lieven         .flags = flags,
27911c9805a3SStefan Hajnoczi     };
27921c9805a3SStefan Hajnoczi 
2793498e386cSZhi Yong Wu     /**
2794498e386cSZhi Yong Wu      * In sync call context, when the vcpu is blocked, this throttling timer
2795498e386cSZhi Yong Wu      * will not fire; so the I/O throttling function has to be disabled here
2796498e386cSZhi Yong Wu      * if it has been enabled.
2797498e386cSZhi Yong Wu      */
2798498e386cSZhi Yong Wu     if (bs->io_limits_enabled) {
2799498e386cSZhi Yong Wu         fprintf(stderr, "Disabling I/O throttling on '%s' due "
2800498e386cSZhi Yong Wu                         "to synchronous I/O.\n", bdrv_get_device_name(bs));
2801498e386cSZhi Yong Wu         bdrv_io_limits_disable(bs);
2802498e386cSZhi Yong Wu     }
2803498e386cSZhi Yong Wu 
28041c9805a3SStefan Hajnoczi     if (qemu_in_coroutine()) {
28051c9805a3SStefan Hajnoczi         /* Fast-path if already in coroutine context */
28061c9805a3SStefan Hajnoczi         bdrv_rw_co_entry(&rwco);
28071c9805a3SStefan Hajnoczi     } else {
28082572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
28092572b37aSStefan Hajnoczi 
28101c9805a3SStefan Hajnoczi         co = qemu_coroutine_create(bdrv_rw_co_entry);
28111c9805a3SStefan Hajnoczi         qemu_coroutine_enter(co, &rwco);
28121c9805a3SStefan Hajnoczi         while (rwco.ret == NOT_DONE) {
28132572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
28141c9805a3SStefan Hajnoczi         }
28151c9805a3SStefan Hajnoczi     }
28161c9805a3SStefan Hajnoczi     return rwco.ret;
2817e7a8a783SKevin Wolf }
2818e7a8a783SKevin Wolf 
28198d3b1a2dSKevin Wolf /*
28208d3b1a2dSKevin Wolf  * Process a synchronous request using coroutines
28218d3b1a2dSKevin Wolf  */
28228d3b1a2dSKevin Wolf static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
28234105eaaaSPeter Lieven                       int nb_sectors, bool is_write, BdrvRequestFlags flags)
28248d3b1a2dSKevin Wolf {
28258d3b1a2dSKevin Wolf     QEMUIOVector qiov;
28268d3b1a2dSKevin Wolf     struct iovec iov = {
28278d3b1a2dSKevin Wolf         .iov_base = (void *)buf,
28288d3b1a2dSKevin Wolf         .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
28298d3b1a2dSKevin Wolf     };
28308d3b1a2dSKevin Wolf 
283175af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
2832da15ee51SKevin Wolf         return -EINVAL;
2833da15ee51SKevin Wolf     }
2834da15ee51SKevin Wolf 
28358d3b1a2dSKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
2836775aa8b6SKevin Wolf     return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2837775aa8b6SKevin Wolf                         &qiov, is_write, flags);
28388d3b1a2dSKevin Wolf }
28398d3b1a2dSKevin Wolf 
28401c9805a3SStefan Hajnoczi /* return < 0 if error. See bdrv_write() for the return codes */
28411c9805a3SStefan Hajnoczi int bdrv_read(BlockDriverState *bs, int64_t sector_num,
28421c9805a3SStefan Hajnoczi               uint8_t *buf, int nb_sectors)
28431c9805a3SStefan Hajnoczi {
28444105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
284583f64091Sbellard }
2846fc01f7e7Sbellard 
284707d27a44SMarkus Armbruster /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
284807d27a44SMarkus Armbruster int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
284907d27a44SMarkus Armbruster                           uint8_t *buf, int nb_sectors)
285007d27a44SMarkus Armbruster {
285107d27a44SMarkus Armbruster     bool enabled;
285207d27a44SMarkus Armbruster     int ret;
285307d27a44SMarkus Armbruster 
285407d27a44SMarkus Armbruster     enabled = bs->io_limits_enabled;
285507d27a44SMarkus Armbruster     bs->io_limits_enabled = false;
28564e7395e8SPeter Lieven     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
285707d27a44SMarkus Armbruster     bs->io_limits_enabled = enabled;
285807d27a44SMarkus Armbruster     return ret;
285907d27a44SMarkus Armbruster }
286007d27a44SMarkus Armbruster 
286119cb3738Sbellard /* Return < 0 if error. Important errors are:
286219cb3738Sbellard   -EIO         generic I/O error (may happen for all errors)
286319cb3738Sbellard   -ENOMEDIUM   No media inserted.
286419cb3738Sbellard   -EINVAL      Invalid sector number or nb_sectors
286519cb3738Sbellard   -EACCES      Trying to write a read-only device
286619cb3738Sbellard */
2867fc01f7e7Sbellard int bdrv_write(BlockDriverState *bs, int64_t sector_num,
2868fc01f7e7Sbellard                const uint8_t *buf, int nb_sectors)
2869fc01f7e7Sbellard {
28704105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
287183f64091Sbellard }
287283f64091Sbellard 
2873aa7bfbffSPeter Lieven int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2874aa7bfbffSPeter Lieven                       int nb_sectors, BdrvRequestFlags flags)
28754105eaaaSPeter Lieven {
28764105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
2877aa7bfbffSPeter Lieven                       BDRV_REQ_ZERO_WRITE | flags);
28788d3b1a2dSKevin Wolf }
28798d3b1a2dSKevin Wolf 
2880d75cbb5eSPeter Lieven /*
2881d75cbb5eSPeter Lieven  * Completely zero out a block device with the help of bdrv_write_zeroes.
2882d75cbb5eSPeter Lieven  * The operation is sped up by checking the block status and only writing
2883d75cbb5eSPeter Lieven  * zeroes to the device if they currently do not return zeroes. Optional
2884d75cbb5eSPeter Lieven  * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2885d75cbb5eSPeter Lieven  *
2886d75cbb5eSPeter Lieven  * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2887d75cbb5eSPeter Lieven  */
2888d75cbb5eSPeter Lieven int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2889d75cbb5eSPeter Lieven {
2890d32f7c10SMarkus Armbruster     int64_t target_sectors, ret, nb_sectors, sector_num = 0;
2891d75cbb5eSPeter Lieven     int n;
2892d75cbb5eSPeter Lieven 
2893d32f7c10SMarkus Armbruster     target_sectors = bdrv_nb_sectors(bs);
2894d32f7c10SMarkus Armbruster     if (target_sectors < 0) {
2895d32f7c10SMarkus Armbruster         return target_sectors;
28969ce10c0bSKevin Wolf     }
28979ce10c0bSKevin Wolf 
2898d75cbb5eSPeter Lieven     for (;;) {
289975af1f34SPeter Lieven         nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
2900d75cbb5eSPeter Lieven         if (nb_sectors <= 0) {
2901d75cbb5eSPeter Lieven             return 0;
2902d75cbb5eSPeter Lieven         }
2903d75cbb5eSPeter Lieven         ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
29043d94ce60SPeter Lieven         if (ret < 0) {
29053d94ce60SPeter Lieven             error_report("error getting block status at sector %" PRId64 ": %s",
29063d94ce60SPeter Lieven                          sector_num, strerror(-ret));
29073d94ce60SPeter Lieven             return ret;
29083d94ce60SPeter Lieven         }
2909d75cbb5eSPeter Lieven         if (ret & BDRV_BLOCK_ZERO) {
2910d75cbb5eSPeter Lieven             sector_num += n;
2911d75cbb5eSPeter Lieven             continue;
2912d75cbb5eSPeter Lieven         }
2913d75cbb5eSPeter Lieven         ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2914d75cbb5eSPeter Lieven         if (ret < 0) {
2915d75cbb5eSPeter Lieven             error_report("error writing zeroes at sector %" PRId64 ": %s",
2916d75cbb5eSPeter Lieven                          sector_num, strerror(-ret));
2917d75cbb5eSPeter Lieven             return ret;
2918d75cbb5eSPeter Lieven         }
2919d75cbb5eSPeter Lieven         sector_num += n;
2920d75cbb5eSPeter Lieven     }
2921d75cbb5eSPeter Lieven }
2922d75cbb5eSPeter Lieven 
2923a3ef6571SKevin Wolf int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
292483f64091Sbellard {
2925a3ef6571SKevin Wolf     QEMUIOVector qiov;
2926a3ef6571SKevin Wolf     struct iovec iov = {
2927a3ef6571SKevin Wolf         .iov_base = (void *)buf,
2928a3ef6571SKevin Wolf         .iov_len = bytes,
2929a3ef6571SKevin Wolf     };
29309a8c4cceSKevin Wolf     int ret;
293183f64091Sbellard 
2932a3ef6571SKevin Wolf     if (bytes < 0) {
2933a3ef6571SKevin Wolf         return -EINVAL;
293483f64091Sbellard     }
293583f64091Sbellard 
2936a3ef6571SKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
2937a3ef6571SKevin Wolf     ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2938a3ef6571SKevin Wolf     if (ret < 0) {
29399a8c4cceSKevin Wolf         return ret;
294083f64091Sbellard     }
294183f64091Sbellard 
2942a3ef6571SKevin Wolf     return bytes;
294383f64091Sbellard }
294483f64091Sbellard 
29458d3b1a2dSKevin Wolf int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
294683f64091Sbellard {
29479a8c4cceSKevin Wolf     int ret;
294883f64091Sbellard 
29498407d5d7SKevin Wolf     ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
29508d3b1a2dSKevin Wolf     if (ret < 0) {
29519a8c4cceSKevin Wolf         return ret;
29528d3b1a2dSKevin Wolf     }
29538d3b1a2dSKevin Wolf 
29548d3b1a2dSKevin Wolf     return qiov->size;
29558d3b1a2dSKevin Wolf }
29568d3b1a2dSKevin Wolf 
29578d3b1a2dSKevin Wolf int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
29588407d5d7SKevin Wolf                 const void *buf, int bytes)
29598d3b1a2dSKevin Wolf {
29608d3b1a2dSKevin Wolf     QEMUIOVector qiov;
29618d3b1a2dSKevin Wolf     struct iovec iov = {
29628d3b1a2dSKevin Wolf         .iov_base   = (void *) buf,
29638407d5d7SKevin Wolf         .iov_len    = bytes,
29648d3b1a2dSKevin Wolf     };
29658d3b1a2dSKevin Wolf 
29668407d5d7SKevin Wolf     if (bytes < 0) {
29678407d5d7SKevin Wolf         return -EINVAL;
29688407d5d7SKevin Wolf     }
29698407d5d7SKevin Wolf 
29708d3b1a2dSKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
29718d3b1a2dSKevin Wolf     return bdrv_pwritev(bs, offset, &qiov);
297283f64091Sbellard }
297383f64091Sbellard 
2974f08145feSKevin Wolf /*
2975f08145feSKevin Wolf  * Writes to the file and ensures that no writes are reordered across this
2976f08145feSKevin Wolf  * request (acts as a barrier)
2977f08145feSKevin Wolf  *
2978f08145feSKevin Wolf  * Returns 0 on success, -errno in error cases.
2979f08145feSKevin Wolf  */
2980f08145feSKevin Wolf int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2981f08145feSKevin Wolf     const void *buf, int count)
2982f08145feSKevin Wolf {
2983f08145feSKevin Wolf     int ret;
2984f08145feSKevin Wolf 
2985f08145feSKevin Wolf     ret = bdrv_pwrite(bs, offset, buf, count);
2986f08145feSKevin Wolf     if (ret < 0) {
2987f08145feSKevin Wolf         return ret;
2988f08145feSKevin Wolf     }
2989f08145feSKevin Wolf 
2990f05fa4adSPaolo Bonzini     /* No flush needed for cache modes that already do it */
2991f05fa4adSPaolo Bonzini     if (bs->enable_write_cache) {
2992f08145feSKevin Wolf         bdrv_flush(bs);
2993f08145feSKevin Wolf     }
2994f08145feSKevin Wolf 
2995f08145feSKevin Wolf     return 0;
2996f08145feSKevin Wolf }
2997f08145feSKevin Wolf 
2998470c0504SStefan Hajnoczi static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
2999ab185921SStefan Hajnoczi         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3000ab185921SStefan Hajnoczi {
3001ab185921SStefan Hajnoczi     /* Perform I/O through a temporary buffer so that users who scribble over
3002ab185921SStefan Hajnoczi      * their read buffer while the operation is in progress do not end up
3003ab185921SStefan Hajnoczi      * modifying the image file.  This is critical for zero-copy guest I/O
3004ab185921SStefan Hajnoczi      * where anything might happen inside guest memory.
3005ab185921SStefan Hajnoczi      */
3006ab185921SStefan Hajnoczi     void *bounce_buffer;
3007ab185921SStefan Hajnoczi 
300879c053bdSStefan Hajnoczi     BlockDriver *drv = bs->drv;
3009ab185921SStefan Hajnoczi     struct iovec iov;
3010ab185921SStefan Hajnoczi     QEMUIOVector bounce_qiov;
3011ab185921SStefan Hajnoczi     int64_t cluster_sector_num;
3012ab185921SStefan Hajnoczi     int cluster_nb_sectors;
3013ab185921SStefan Hajnoczi     size_t skip_bytes;
3014ab185921SStefan Hajnoczi     int ret;
3015ab185921SStefan Hajnoczi 
3016ab185921SStefan Hajnoczi     /* Cover entire cluster so no additional backing file I/O is required when
3017ab185921SStefan Hajnoczi      * allocating cluster in the image file.
3018ab185921SStefan Hajnoczi      */
3019343bded4SPaolo Bonzini     bdrv_round_to_clusters(bs, sector_num, nb_sectors,
3020ab185921SStefan Hajnoczi                            &cluster_sector_num, &cluster_nb_sectors);
3021ab185921SStefan Hajnoczi 
3022470c0504SStefan Hajnoczi     trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3023ab185921SStefan Hajnoczi                                    cluster_sector_num, cluster_nb_sectors);
3024ab185921SStefan Hajnoczi 
3025ab185921SStefan Hajnoczi     iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
3026857d4f46SKevin Wolf     iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3027857d4f46SKevin Wolf     if (bounce_buffer == NULL) {
3028857d4f46SKevin Wolf         ret = -ENOMEM;
3029857d4f46SKevin Wolf         goto err;
3030857d4f46SKevin Wolf     }
3031857d4f46SKevin Wolf 
3032ab185921SStefan Hajnoczi     qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3033ab185921SStefan Hajnoczi 
303479c053bdSStefan Hajnoczi     ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3035ab185921SStefan Hajnoczi                              &bounce_qiov);
3036ab185921SStefan Hajnoczi     if (ret < 0) {
3037ab185921SStefan Hajnoczi         goto err;
3038ab185921SStefan Hajnoczi     }
3039ab185921SStefan Hajnoczi 
304079c053bdSStefan Hajnoczi     if (drv->bdrv_co_write_zeroes &&
304179c053bdSStefan Hajnoczi         buffer_is_zero(bounce_buffer, iov.iov_len)) {
3042621f0589SKevin Wolf         ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
3043aa7bfbffSPeter Lieven                                       cluster_nb_sectors, 0);
304479c053bdSStefan Hajnoczi     } else {
3045f05fa4adSPaolo Bonzini         /* This does not change the data on the disk, it is not necessary
3046f05fa4adSPaolo Bonzini          * to flush even in cache=writethrough mode.
3047f05fa4adSPaolo Bonzini          */
304879c053bdSStefan Hajnoczi         ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
3049ab185921SStefan Hajnoczi                                   &bounce_qiov);
305079c053bdSStefan Hajnoczi     }
305179c053bdSStefan Hajnoczi 
3052ab185921SStefan Hajnoczi     if (ret < 0) {
3053ab185921SStefan Hajnoczi         /* It might be okay to ignore write errors for guest requests.  If this
3054ab185921SStefan Hajnoczi          * is a deliberate copy-on-read then we don't want to ignore the error.
3055ab185921SStefan Hajnoczi          * Simply report it in all cases.
3056ab185921SStefan Hajnoczi          */
3057ab185921SStefan Hajnoczi         goto err;
3058ab185921SStefan Hajnoczi     }
3059ab185921SStefan Hajnoczi 
3060ab185921SStefan Hajnoczi     skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
306103396148SMichael Tokarev     qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3062ab185921SStefan Hajnoczi                         nb_sectors * BDRV_SECTOR_SIZE);
3063ab185921SStefan Hajnoczi 
3064ab185921SStefan Hajnoczi err:
3065ab185921SStefan Hajnoczi     qemu_vfree(bounce_buffer);
3066ab185921SStefan Hajnoczi     return ret;
3067ab185921SStefan Hajnoczi }
3068ab185921SStefan Hajnoczi 
3069c5fbe571SStefan Hajnoczi /*
3070d0c7f642SKevin Wolf  * Forwards an already correctly aligned request to the BlockDriver. This
3071d0c7f642SKevin Wolf  * handles copy on read and zeroing after EOF; any other features must be
3072d0c7f642SKevin Wolf  * implemented by the caller.
3073c5fbe571SStefan Hajnoczi  */
3074d0c7f642SKevin Wolf static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
307565afd211SKevin Wolf     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3076ec746e10SKevin Wolf     int64_t align, QEMUIOVector *qiov, int flags)
3077da1fa91dSKevin Wolf {
3078da1fa91dSKevin Wolf     BlockDriver *drv = bs->drv;
3079dbffbdcfSStefan Hajnoczi     int ret;
3080da1fa91dSKevin Wolf 
3081d0c7f642SKevin Wolf     int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3082d0c7f642SKevin Wolf     unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3083da1fa91dSKevin Wolf 
3084d0c7f642SKevin Wolf     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3085d0c7f642SKevin Wolf     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
30868eb029c2SKevin Wolf     assert(!qiov || bytes == qiov->size);
3087d0c7f642SKevin Wolf 
3088d0c7f642SKevin Wolf     /* Handle Copy on Read and associated serialisation */
3089470c0504SStefan Hajnoczi     if (flags & BDRV_REQ_COPY_ON_READ) {
30907327145fSKevin Wolf         /* If we touch the same cluster it counts as an overlap.  This
30917327145fSKevin Wolf          * guarantees that allocating writes will be serialized and not race
30927327145fSKevin Wolf          * with each other for the same cluster.  For example, in copy-on-read
30937327145fSKevin Wolf          * it ensures that the CoR read and write operations are atomic and
30947327145fSKevin Wolf          * guest writes cannot interleave between them. */
30957327145fSKevin Wolf         mark_request_serialising(req, bdrv_get_cluster_size(bs));
3096470c0504SStefan Hajnoczi     }
3097470c0504SStefan Hajnoczi 
30982dbafdc0SKevin Wolf     wait_serialising_requests(req);
3099f4658285SStefan Hajnoczi 
3100470c0504SStefan Hajnoczi     if (flags & BDRV_REQ_COPY_ON_READ) {
3101ab185921SStefan Hajnoczi         int pnum;
3102ab185921SStefan Hajnoczi 
3103bdad13b9SPaolo Bonzini         ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
3104ab185921SStefan Hajnoczi         if (ret < 0) {
3105ab185921SStefan Hajnoczi             goto out;
3106ab185921SStefan Hajnoczi         }
3107ab185921SStefan Hajnoczi 
3108ab185921SStefan Hajnoczi         if (!ret || pnum != nb_sectors) {
3109470c0504SStefan Hajnoczi             ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
3110ab185921SStefan Hajnoczi             goto out;
3111ab185921SStefan Hajnoczi         }
3112ab185921SStefan Hajnoczi     }
3113ab185921SStefan Hajnoczi 
3114d0c7f642SKevin Wolf     /* Forward the request to the BlockDriver */
3115c0191e76SMax Reitz     if (!bs->zero_beyond_eof) {
3116dbffbdcfSStefan Hajnoczi         ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3117893a8f62SMORITA Kazutaka     } else {
3118c0191e76SMax Reitz         /* Read zeros after EOF */
31194049082cSMarkus Armbruster         int64_t total_sectors, max_nb_sectors;
3120893a8f62SMORITA Kazutaka 
31214049082cSMarkus Armbruster         total_sectors = bdrv_nb_sectors(bs);
31224049082cSMarkus Armbruster         if (total_sectors < 0) {
31234049082cSMarkus Armbruster             ret = total_sectors;
3124893a8f62SMORITA Kazutaka             goto out;
3125893a8f62SMORITA Kazutaka         }
3126893a8f62SMORITA Kazutaka 
31275f5bcd80SKevin Wolf         max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
31285f5bcd80SKevin Wolf                                   align >> BDRV_SECTOR_BITS);
3129e012b78cSPaolo Bonzini         if (nb_sectors < max_nb_sectors) {
3130e012b78cSPaolo Bonzini             ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3131e012b78cSPaolo Bonzini         } else if (max_nb_sectors > 0) {
313233f461e0SKevin Wolf             QEMUIOVector local_qiov;
313333f461e0SKevin Wolf 
313433f461e0SKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov);
313533f461e0SKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0,
3136e012b78cSPaolo Bonzini                               max_nb_sectors * BDRV_SECTOR_SIZE);
313733f461e0SKevin Wolf 
3138e012b78cSPaolo Bonzini             ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
313933f461e0SKevin Wolf                                      &local_qiov);
314033f461e0SKevin Wolf 
314133f461e0SKevin Wolf             qemu_iovec_destroy(&local_qiov);
3142893a8f62SMORITA Kazutaka         } else {
3143893a8f62SMORITA Kazutaka             ret = 0;
3144893a8f62SMORITA Kazutaka         }
3145893a8f62SMORITA Kazutaka 
3146893a8f62SMORITA Kazutaka         /* Reading beyond end of file is supposed to produce zeroes */
3147893a8f62SMORITA Kazutaka         if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3148893a8f62SMORITA Kazutaka             uint64_t offset = MAX(0, total_sectors - sector_num);
3149893a8f62SMORITA Kazutaka             uint64_t bytes = (sector_num + nb_sectors - offset) *
3150893a8f62SMORITA Kazutaka                               BDRV_SECTOR_SIZE;
3151893a8f62SMORITA Kazutaka             qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3152893a8f62SMORITA Kazutaka         }
3153893a8f62SMORITA Kazutaka     }
3154ab185921SStefan Hajnoczi 
3155ab185921SStefan Hajnoczi out:
3156dbffbdcfSStefan Hajnoczi     return ret;
3157da1fa91dSKevin Wolf }
3158da1fa91dSKevin Wolf 
3159fc3959e4SFam Zheng static inline uint64_t bdrv_get_align(BlockDriverState *bs)
3160fc3959e4SFam Zheng {
3161fc3959e4SFam Zheng     /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3162fc3959e4SFam Zheng     return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3163fc3959e4SFam Zheng }
3164fc3959e4SFam Zheng 
3165fc3959e4SFam Zheng static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
3166fc3959e4SFam Zheng                                        int64_t offset, size_t bytes)
3167fc3959e4SFam Zheng {
3168fc3959e4SFam Zheng     int64_t align = bdrv_get_align(bs);
3169fc3959e4SFam Zheng     return !(offset & (align - 1) || (bytes & (align - 1)));
3170fc3959e4SFam Zheng }
3171fc3959e4SFam Zheng 
3172d0c7f642SKevin Wolf /*
3173d0c7f642SKevin Wolf  * Handle a read request in coroutine context
3174d0c7f642SKevin Wolf  */
31751b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
31761b0288aeSKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3177d0c7f642SKevin Wolf     BdrvRequestFlags flags)
3178d0c7f642SKevin Wolf {
3179d0c7f642SKevin Wolf     BlockDriver *drv = bs->drv;
318065afd211SKevin Wolf     BdrvTrackedRequest req;
318165afd211SKevin Wolf 
3182fc3959e4SFam Zheng     uint64_t align = bdrv_get_align(bs);
31831b0288aeSKevin Wolf     uint8_t *head_buf = NULL;
31841b0288aeSKevin Wolf     uint8_t *tail_buf = NULL;
31851b0288aeSKevin Wolf     QEMUIOVector local_qiov;
31861b0288aeSKevin Wolf     bool use_local_qiov = false;
3187d0c7f642SKevin Wolf     int ret;
3188d0c7f642SKevin Wolf 
3189d0c7f642SKevin Wolf     if (!drv) {
3190d0c7f642SKevin Wolf         return -ENOMEDIUM;
3191d0c7f642SKevin Wolf     }
3192b9c64947SMax Reitz 
3193b9c64947SMax Reitz     ret = bdrv_check_byte_request(bs, offset, bytes);
3194b9c64947SMax Reitz     if (ret < 0) {
3195b9c64947SMax Reitz         return ret;
3196d0c7f642SKevin Wolf     }
3197d0c7f642SKevin Wolf 
3198d0c7f642SKevin Wolf     if (bs->copy_on_read) {
3199d0c7f642SKevin Wolf         flags |= BDRV_REQ_COPY_ON_READ;
3200d0c7f642SKevin Wolf     }
3201d0c7f642SKevin Wolf 
3202d0c7f642SKevin Wolf     /* throttling disk I/O */
3203d0c7f642SKevin Wolf     if (bs->io_limits_enabled) {
3204d5103588SKevin Wolf         bdrv_io_limits_intercept(bs, bytes, false);
3205d0c7f642SKevin Wolf     }
3206d0c7f642SKevin Wolf 
32071b0288aeSKevin Wolf     /* Align read if necessary by padding qiov */
32081b0288aeSKevin Wolf     if (offset & (align - 1)) {
32091b0288aeSKevin Wolf         head_buf = qemu_blockalign(bs, align);
32101b0288aeSKevin Wolf         qemu_iovec_init(&local_qiov, qiov->niov + 2);
32111b0288aeSKevin Wolf         qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
32121b0288aeSKevin Wolf         qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
32131b0288aeSKevin Wolf         use_local_qiov = true;
32141b0288aeSKevin Wolf 
32151b0288aeSKevin Wolf         bytes += offset & (align - 1);
32161b0288aeSKevin Wolf         offset = offset & ~(align - 1);
32171b0288aeSKevin Wolf     }
32181b0288aeSKevin Wolf 
32191b0288aeSKevin Wolf     if ((offset + bytes) & (align - 1)) {
32201b0288aeSKevin Wolf         if (!use_local_qiov) {
32211b0288aeSKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov + 1);
32221b0288aeSKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
32231b0288aeSKevin Wolf             use_local_qiov = true;
32241b0288aeSKevin Wolf         }
32251b0288aeSKevin Wolf         tail_buf = qemu_blockalign(bs, align);
32261b0288aeSKevin Wolf         qemu_iovec_add(&local_qiov, tail_buf,
32271b0288aeSKevin Wolf                        align - ((offset + bytes) & (align - 1)));
32281b0288aeSKevin Wolf 
32291b0288aeSKevin Wolf         bytes = ROUND_UP(bytes, align);
32301b0288aeSKevin Wolf     }
32311b0288aeSKevin Wolf 
323265afd211SKevin Wolf     tracked_request_begin(&req, bs, offset, bytes, false);
3233ec746e10SKevin Wolf     ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
32341b0288aeSKevin Wolf                               use_local_qiov ? &local_qiov : qiov,
32351b0288aeSKevin Wolf                               flags);
323665afd211SKevin Wolf     tracked_request_end(&req);
32371b0288aeSKevin Wolf 
32381b0288aeSKevin Wolf     if (use_local_qiov) {
32391b0288aeSKevin Wolf         qemu_iovec_destroy(&local_qiov);
32401b0288aeSKevin Wolf         qemu_vfree(head_buf);
32411b0288aeSKevin Wolf         qemu_vfree(tail_buf);
32421b0288aeSKevin Wolf     }
32431b0288aeSKevin Wolf 
3244d0c7f642SKevin Wolf     return ret;
3245d0c7f642SKevin Wolf }
3246d0c7f642SKevin Wolf 
32471b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
32481b0288aeSKevin Wolf     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
32491b0288aeSKevin Wolf     BdrvRequestFlags flags)
32501b0288aeSKevin Wolf {
325175af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
32521b0288aeSKevin Wolf         return -EINVAL;
32531b0288aeSKevin Wolf     }
32541b0288aeSKevin Wolf 
32551b0288aeSKevin Wolf     return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
32561b0288aeSKevin Wolf                              nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
32571b0288aeSKevin Wolf }
32581b0288aeSKevin Wolf 
3259c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
3260da1fa91dSKevin Wolf     int nb_sectors, QEMUIOVector *qiov)
3261da1fa91dSKevin Wolf {
3262c5fbe571SStefan Hajnoczi     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
3263da1fa91dSKevin Wolf 
3264470c0504SStefan Hajnoczi     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3265470c0504SStefan Hajnoczi }
3266470c0504SStefan Hajnoczi 
3267470c0504SStefan Hajnoczi int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3268470c0504SStefan Hajnoczi     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3269470c0504SStefan Hajnoczi {
3270470c0504SStefan Hajnoczi     trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3271470c0504SStefan Hajnoczi 
3272470c0504SStefan Hajnoczi     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3273470c0504SStefan Hajnoczi                             BDRV_REQ_COPY_ON_READ);
3274c5fbe571SStefan Hajnoczi }
3275c5fbe571SStefan Hajnoczi 
327698764152SPeter Lieven #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
3277c31cb707SPeter Lieven 
3278f08f2ddaSStefan Hajnoczi static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
3279aa7bfbffSPeter Lieven     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
3280f08f2ddaSStefan Hajnoczi {
3281f08f2ddaSStefan Hajnoczi     BlockDriver *drv = bs->drv;
3282f08f2ddaSStefan Hajnoczi     QEMUIOVector qiov;
3283c31cb707SPeter Lieven     struct iovec iov = {0};
3284c31cb707SPeter Lieven     int ret = 0;
3285f08f2ddaSStefan Hajnoczi 
328675af1f34SPeter Lieven     int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
328775af1f34SPeter Lieven                                         BDRV_REQUEST_MAX_SECTORS);
3288621f0589SKevin Wolf 
3289c31cb707SPeter Lieven     while (nb_sectors > 0 && !ret) {
3290c31cb707SPeter Lieven         int num = nb_sectors;
3291c31cb707SPeter Lieven 
3292b8d71c09SPaolo Bonzini         /* Align request.  Block drivers can expect the "bulk" of the request
3293b8d71c09SPaolo Bonzini          * to be aligned.
3294b8d71c09SPaolo Bonzini          */
3295b8d71c09SPaolo Bonzini         if (bs->bl.write_zeroes_alignment
3296b8d71c09SPaolo Bonzini             && num > bs->bl.write_zeroes_alignment) {
3297b8d71c09SPaolo Bonzini             if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3298b8d71c09SPaolo Bonzini                 /* Make a small request up to the first aligned sector.  */
3299c31cb707SPeter Lieven                 num = bs->bl.write_zeroes_alignment;
3300c31cb707SPeter Lieven                 num -= sector_num % bs->bl.write_zeroes_alignment;
3301b8d71c09SPaolo Bonzini             } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3302b8d71c09SPaolo Bonzini                 /* Shorten the request to the last aligned sector.  num cannot
3303b8d71c09SPaolo Bonzini                  * underflow because num > bs->bl.write_zeroes_alignment.
3304b8d71c09SPaolo Bonzini                  */
3305b8d71c09SPaolo Bonzini                 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
3306b8d71c09SPaolo Bonzini             }
3307c31cb707SPeter Lieven         }
3308c31cb707SPeter Lieven 
3309c31cb707SPeter Lieven         /* limit request size */
3310c31cb707SPeter Lieven         if (num > max_write_zeroes) {
3311c31cb707SPeter Lieven             num = max_write_zeroes;
3312c31cb707SPeter Lieven         }
3313c31cb707SPeter Lieven 
3314c31cb707SPeter Lieven         ret = -ENOTSUP;
3315f08f2ddaSStefan Hajnoczi         /* First try the efficient write zeroes operation */
3316f08f2ddaSStefan Hajnoczi         if (drv->bdrv_co_write_zeroes) {
3317c31cb707SPeter Lieven             ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3318f08f2ddaSStefan Hajnoczi         }
3319f08f2ddaSStefan Hajnoczi 
3320c31cb707SPeter Lieven         if (ret == -ENOTSUP) {
3321f08f2ddaSStefan Hajnoczi             /* Fall back to bounce buffer if write zeroes is unsupported */
3322095e4fa4SPeter Lieven             int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
332398764152SPeter Lieven                                             MAX_WRITE_ZEROES_BOUNCE_BUFFER);
3324095e4fa4SPeter Lieven             num = MIN(num, max_xfer_len);
3325c31cb707SPeter Lieven             iov.iov_len = num * BDRV_SECTOR_SIZE;
3326c31cb707SPeter Lieven             if (iov.iov_base == NULL) {
3327857d4f46SKevin Wolf                 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3328857d4f46SKevin Wolf                 if (iov.iov_base == NULL) {
3329857d4f46SKevin Wolf                     ret = -ENOMEM;
3330857d4f46SKevin Wolf                     goto fail;
3331857d4f46SKevin Wolf                 }
3332b8d71c09SPaolo Bonzini                 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
3333c31cb707SPeter Lieven             }
3334f08f2ddaSStefan Hajnoczi             qemu_iovec_init_external(&qiov, &iov, 1);
3335f08f2ddaSStefan Hajnoczi 
3336c31cb707SPeter Lieven             ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
3337b8d71c09SPaolo Bonzini 
3338b8d71c09SPaolo Bonzini             /* Keep bounce buffer around if it is big enough for all
3339b8d71c09SPaolo Bonzini              * all future requests.
3340b8d71c09SPaolo Bonzini              */
3341095e4fa4SPeter Lieven             if (num < max_xfer_len) {
3342b8d71c09SPaolo Bonzini                 qemu_vfree(iov.iov_base);
3343b8d71c09SPaolo Bonzini                 iov.iov_base = NULL;
3344b8d71c09SPaolo Bonzini             }
3345c31cb707SPeter Lieven         }
3346c31cb707SPeter Lieven 
3347c31cb707SPeter Lieven         sector_num += num;
3348c31cb707SPeter Lieven         nb_sectors -= num;
3349c31cb707SPeter Lieven     }
3350f08f2ddaSStefan Hajnoczi 
3351857d4f46SKevin Wolf fail:
3352f08f2ddaSStefan Hajnoczi     qemu_vfree(iov.iov_base);
3353f08f2ddaSStefan Hajnoczi     return ret;
3354f08f2ddaSStefan Hajnoczi }
3355f08f2ddaSStefan Hajnoczi 
3356c5fbe571SStefan Hajnoczi /*
3357b404f720SKevin Wolf  * Forwards an already correctly aligned write request to the BlockDriver.
3358c5fbe571SStefan Hajnoczi  */
3359b404f720SKevin Wolf static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
336065afd211SKevin Wolf     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
336165afd211SKevin Wolf     QEMUIOVector *qiov, int flags)
3362c5fbe571SStefan Hajnoczi {
3363c5fbe571SStefan Hajnoczi     BlockDriver *drv = bs->drv;
336428de2dcdSKevin Wolf     bool waited;
33656b7cb247SStefan Hajnoczi     int ret;
3366da1fa91dSKevin Wolf 
3367b404f720SKevin Wolf     int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3368b404f720SKevin Wolf     unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3369da1fa91dSKevin Wolf 
3370b404f720SKevin Wolf     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3371b404f720SKevin Wolf     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
33728eb029c2SKevin Wolf     assert(!qiov || bytes == qiov->size);
3373cc0681c4SBenoît Canet 
337428de2dcdSKevin Wolf     waited = wait_serialising_requests(req);
337528de2dcdSKevin Wolf     assert(!waited || !req->serialising);
3376af91f9a7SKevin Wolf     assert(req->overlap_offset <= offset);
3377af91f9a7SKevin Wolf     assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
3378244eadefSKevin Wolf 
337965afd211SKevin Wolf     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
3380d616b224SStefan Hajnoczi 
3381465bee1dSPeter Lieven     if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3382465bee1dSPeter Lieven         !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3383465bee1dSPeter Lieven         qemu_iovec_is_zero(qiov)) {
3384465bee1dSPeter Lieven         flags |= BDRV_REQ_ZERO_WRITE;
3385465bee1dSPeter Lieven         if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3386465bee1dSPeter Lieven             flags |= BDRV_REQ_MAY_UNMAP;
3387465bee1dSPeter Lieven         }
3388465bee1dSPeter Lieven     }
3389465bee1dSPeter Lieven 
3390d616b224SStefan Hajnoczi     if (ret < 0) {
3391d616b224SStefan Hajnoczi         /* Do nothing, write notifier decided to fail this request */
3392d616b224SStefan Hajnoczi     } else if (flags & BDRV_REQ_ZERO_WRITE) {
33939e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
3394aa7bfbffSPeter Lieven         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
3395f08f2ddaSStefan Hajnoczi     } else {
33969e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
33976b7cb247SStefan Hajnoczi         ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3398f08f2ddaSStefan Hajnoczi     }
33999e1cb96dSKevin Wolf     BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
34006b7cb247SStefan Hajnoczi 
3401f05fa4adSPaolo Bonzini     if (ret == 0 && !bs->enable_write_cache) {
3402f05fa4adSPaolo Bonzini         ret = bdrv_co_flush(bs);
3403f05fa4adSPaolo Bonzini     }
3404f05fa4adSPaolo Bonzini 
34051755da16SPaolo Bonzini     bdrv_set_dirty(bs, sector_num, nb_sectors);
3406da1fa91dSKevin Wolf 
34075366d0c8SBenoît Canet     block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
34085e5a94b6SBenoît Canet 
3409c0191e76SMax Reitz     if (ret >= 0) {
3410df2a6f29SPaolo Bonzini         bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3411df2a6f29SPaolo Bonzini     }
3412da1fa91dSKevin Wolf 
34136b7cb247SStefan Hajnoczi     return ret;
3414da1fa91dSKevin Wolf }
3415da1fa91dSKevin Wolf 
3416b404f720SKevin Wolf /*
3417b404f720SKevin Wolf  * Handle a write request in coroutine context
3418b404f720SKevin Wolf  */
34196601553eSKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
34206601553eSKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3421b404f720SKevin Wolf     BdrvRequestFlags flags)
3422b404f720SKevin Wolf {
342365afd211SKevin Wolf     BdrvTrackedRequest req;
3424fc3959e4SFam Zheng     uint64_t align = bdrv_get_align(bs);
34253b8242e0SKevin Wolf     uint8_t *head_buf = NULL;
34263b8242e0SKevin Wolf     uint8_t *tail_buf = NULL;
34273b8242e0SKevin Wolf     QEMUIOVector local_qiov;
34283b8242e0SKevin Wolf     bool use_local_qiov = false;
3429b404f720SKevin Wolf     int ret;
3430b404f720SKevin Wolf 
3431b404f720SKevin Wolf     if (!bs->drv) {
3432b404f720SKevin Wolf         return -ENOMEDIUM;
3433b404f720SKevin Wolf     }
3434b404f720SKevin Wolf     if (bs->read_only) {
3435b404f720SKevin Wolf         return -EACCES;
3436b404f720SKevin Wolf     }
3437b9c64947SMax Reitz 
3438b9c64947SMax Reitz     ret = bdrv_check_byte_request(bs, offset, bytes);
3439b9c64947SMax Reitz     if (ret < 0) {
3440b9c64947SMax Reitz         return ret;
3441b404f720SKevin Wolf     }
3442b404f720SKevin Wolf 
3443b404f720SKevin Wolf     /* throttling disk I/O */
3444b404f720SKevin Wolf     if (bs->io_limits_enabled) {
3445d5103588SKevin Wolf         bdrv_io_limits_intercept(bs, bytes, true);
3446b404f720SKevin Wolf     }
3447b404f720SKevin Wolf 
34483b8242e0SKevin Wolf     /*
34493b8242e0SKevin Wolf      * Align write if necessary by performing a read-modify-write cycle.
34503b8242e0SKevin Wolf      * Pad qiov with the read parts and be sure to have a tracked request not
34513b8242e0SKevin Wolf      * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
34523b8242e0SKevin Wolf      */
345365afd211SKevin Wolf     tracked_request_begin(&req, bs, offset, bytes, true);
34543b8242e0SKevin Wolf 
34553b8242e0SKevin Wolf     if (offset & (align - 1)) {
34563b8242e0SKevin Wolf         QEMUIOVector head_qiov;
34573b8242e0SKevin Wolf         struct iovec head_iov;
34583b8242e0SKevin Wolf 
34593b8242e0SKevin Wolf         mark_request_serialising(&req, align);
34603b8242e0SKevin Wolf         wait_serialising_requests(&req);
34613b8242e0SKevin Wolf 
34623b8242e0SKevin Wolf         head_buf = qemu_blockalign(bs, align);
34633b8242e0SKevin Wolf         head_iov = (struct iovec) {
34643b8242e0SKevin Wolf             .iov_base   = head_buf,
34653b8242e0SKevin Wolf             .iov_len    = align,
34663b8242e0SKevin Wolf         };
34673b8242e0SKevin Wolf         qemu_iovec_init_external(&head_qiov, &head_iov, 1);
34683b8242e0SKevin Wolf 
34699e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
34703b8242e0SKevin Wolf         ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
34713b8242e0SKevin Wolf                                   align, &head_qiov, 0);
34723b8242e0SKevin Wolf         if (ret < 0) {
34733b8242e0SKevin Wolf             goto fail;
34743b8242e0SKevin Wolf         }
34759e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
34763b8242e0SKevin Wolf 
34773b8242e0SKevin Wolf         qemu_iovec_init(&local_qiov, qiov->niov + 2);
34783b8242e0SKevin Wolf         qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
34793b8242e0SKevin Wolf         qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
34803b8242e0SKevin Wolf         use_local_qiov = true;
34813b8242e0SKevin Wolf 
34823b8242e0SKevin Wolf         bytes += offset & (align - 1);
34833b8242e0SKevin Wolf         offset = offset & ~(align - 1);
34843b8242e0SKevin Wolf     }
34853b8242e0SKevin Wolf 
34863b8242e0SKevin Wolf     if ((offset + bytes) & (align - 1)) {
34873b8242e0SKevin Wolf         QEMUIOVector tail_qiov;
34883b8242e0SKevin Wolf         struct iovec tail_iov;
34893b8242e0SKevin Wolf         size_t tail_bytes;
349028de2dcdSKevin Wolf         bool waited;
34913b8242e0SKevin Wolf 
34923b8242e0SKevin Wolf         mark_request_serialising(&req, align);
349328de2dcdSKevin Wolf         waited = wait_serialising_requests(&req);
349428de2dcdSKevin Wolf         assert(!waited || !use_local_qiov);
34953b8242e0SKevin Wolf 
34963b8242e0SKevin Wolf         tail_buf = qemu_blockalign(bs, align);
34973b8242e0SKevin Wolf         tail_iov = (struct iovec) {
34983b8242e0SKevin Wolf             .iov_base   = tail_buf,
34993b8242e0SKevin Wolf             .iov_len    = align,
35003b8242e0SKevin Wolf         };
35013b8242e0SKevin Wolf         qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
35023b8242e0SKevin Wolf 
35039e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
35043b8242e0SKevin Wolf         ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
35053b8242e0SKevin Wolf                                   align, &tail_qiov, 0);
35063b8242e0SKevin Wolf         if (ret < 0) {
35073b8242e0SKevin Wolf             goto fail;
35083b8242e0SKevin Wolf         }
35099e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
35103b8242e0SKevin Wolf 
35113b8242e0SKevin Wolf         if (!use_local_qiov) {
35123b8242e0SKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov + 1);
35133b8242e0SKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
35143b8242e0SKevin Wolf             use_local_qiov = true;
35153b8242e0SKevin Wolf         }
35163b8242e0SKevin Wolf 
35173b8242e0SKevin Wolf         tail_bytes = (offset + bytes) & (align - 1);
35183b8242e0SKevin Wolf         qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
35193b8242e0SKevin Wolf 
35203b8242e0SKevin Wolf         bytes = ROUND_UP(bytes, align);
35213b8242e0SKevin Wolf     }
35223b8242e0SKevin Wolf 
3523fc3959e4SFam Zheng     if (use_local_qiov) {
3524fc3959e4SFam Zheng         /* Local buffer may have non-zero data. */
3525fc3959e4SFam Zheng         flags &= ~BDRV_REQ_ZERO_WRITE;
3526fc3959e4SFam Zheng     }
35273b8242e0SKevin Wolf     ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
35283b8242e0SKevin Wolf                                use_local_qiov ? &local_qiov : qiov,
35293b8242e0SKevin Wolf                                flags);
35303b8242e0SKevin Wolf 
35313b8242e0SKevin Wolf fail:
353265afd211SKevin Wolf     tracked_request_end(&req);
3533b404f720SKevin Wolf 
35343b8242e0SKevin Wolf     if (use_local_qiov) {
35353b8242e0SKevin Wolf         qemu_iovec_destroy(&local_qiov);
353699c4a85cSKevin Wolf     }
35373b8242e0SKevin Wolf     qemu_vfree(head_buf);
35383b8242e0SKevin Wolf     qemu_vfree(tail_buf);
35393b8242e0SKevin Wolf 
3540b404f720SKevin Wolf     return ret;
3541b404f720SKevin Wolf }
3542b404f720SKevin Wolf 
35436601553eSKevin Wolf static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
35446601553eSKevin Wolf     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
35456601553eSKevin Wolf     BdrvRequestFlags flags)
35466601553eSKevin Wolf {
354775af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
35486601553eSKevin Wolf         return -EINVAL;
35496601553eSKevin Wolf     }
35506601553eSKevin Wolf 
35516601553eSKevin Wolf     return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
35526601553eSKevin Wolf                               nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
35536601553eSKevin Wolf }
35546601553eSKevin Wolf 
3555c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3556c5fbe571SStefan Hajnoczi     int nb_sectors, QEMUIOVector *qiov)
3557c5fbe571SStefan Hajnoczi {
3558c5fbe571SStefan Hajnoczi     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3559c5fbe571SStefan Hajnoczi 
3560f08f2ddaSStefan Hajnoczi     return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3561f08f2ddaSStefan Hajnoczi }
3562f08f2ddaSStefan Hajnoczi 
3563f08f2ddaSStefan Hajnoczi int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
3564aa7bfbffSPeter Lieven                                       int64_t sector_num, int nb_sectors,
3565aa7bfbffSPeter Lieven                                       BdrvRequestFlags flags)
3566f08f2ddaSStefan Hajnoczi {
3567fc3959e4SFam Zheng     int ret;
3568fc3959e4SFam Zheng 
356994d6ff21SPaolo Bonzini     trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
3570f08f2ddaSStefan Hajnoczi 
3571d32f35cbSPeter Lieven     if (!(bs->open_flags & BDRV_O_UNMAP)) {
3572d32f35cbSPeter Lieven         flags &= ~BDRV_REQ_MAY_UNMAP;
3573d32f35cbSPeter Lieven     }
3574fc3959e4SFam Zheng     if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
3575fc3959e4SFam Zheng                             nb_sectors << BDRV_SECTOR_BITS)) {
3576fc3959e4SFam Zheng         ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3577aa7bfbffSPeter Lieven                                 BDRV_REQ_ZERO_WRITE | flags);
3578fc3959e4SFam Zheng     } else {
3579fc3959e4SFam Zheng         uint8_t *buf;
3580fc3959e4SFam Zheng         QEMUIOVector local_qiov;
3581fc3959e4SFam Zheng         size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
3582fc3959e4SFam Zheng 
3583fc3959e4SFam Zheng         buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
3584fc3959e4SFam Zheng         memset(buf, 0, bytes);
3585fc3959e4SFam Zheng         qemu_iovec_init(&local_qiov, 1);
3586fc3959e4SFam Zheng         qemu_iovec_add(&local_qiov, buf, bytes);
3587fc3959e4SFam Zheng 
3588fc3959e4SFam Zheng         ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
3589fc3959e4SFam Zheng                                 BDRV_REQ_ZERO_WRITE | flags);
3590fc3959e4SFam Zheng         qemu_vfree(buf);
3591fc3959e4SFam Zheng     }
3592fc3959e4SFam Zheng     return ret;
3593c5fbe571SStefan Hajnoczi }
3594c5fbe571SStefan Hajnoczi 
359583f64091Sbellard /**
359683f64091Sbellard  * Truncate file to 'offset' bytes (needed only for file protocols)
359783f64091Sbellard  */
359883f64091Sbellard int bdrv_truncate(BlockDriverState *bs, int64_t offset)
359983f64091Sbellard {
360083f64091Sbellard     BlockDriver *drv = bs->drv;
360151762288SStefan Hajnoczi     int ret;
360283f64091Sbellard     if (!drv)
360319cb3738Sbellard         return -ENOMEDIUM;
360483f64091Sbellard     if (!drv->bdrv_truncate)
360583f64091Sbellard         return -ENOTSUP;
360659f2689dSNaphtali Sprei     if (bs->read_only)
360759f2689dSNaphtali Sprei         return -EACCES;
36089c75e168SJeff Cody 
360951762288SStefan Hajnoczi     ret = drv->bdrv_truncate(bs, offset);
361051762288SStefan Hajnoczi     if (ret == 0) {
361151762288SStefan Hajnoczi         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3612ce1ffea8SJohn Snow         bdrv_dirty_bitmap_truncate(bs);
3613a7f53e26SMarkus Armbruster         if (bs->blk) {
3614a7f53e26SMarkus Armbruster             blk_dev_resize_cb(bs->blk);
3615a7f53e26SMarkus Armbruster         }
361651762288SStefan Hajnoczi     }
361751762288SStefan Hajnoczi     return ret;
361883f64091Sbellard }
361983f64091Sbellard 
362083f64091Sbellard /**
36214a1d5e1fSFam Zheng  * Length of a allocated file in bytes. Sparse files are counted by actual
36224a1d5e1fSFam Zheng  * allocated space. Return < 0 if error or unknown.
36234a1d5e1fSFam Zheng  */
36244a1d5e1fSFam Zheng int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
36254a1d5e1fSFam Zheng {
36264a1d5e1fSFam Zheng     BlockDriver *drv = bs->drv;
36274a1d5e1fSFam Zheng     if (!drv) {
36284a1d5e1fSFam Zheng         return -ENOMEDIUM;
36294a1d5e1fSFam Zheng     }
36304a1d5e1fSFam Zheng     if (drv->bdrv_get_allocated_file_size) {
36314a1d5e1fSFam Zheng         return drv->bdrv_get_allocated_file_size(bs);
36324a1d5e1fSFam Zheng     }
36334a1d5e1fSFam Zheng     if (bs->file) {
36344a1d5e1fSFam Zheng         return bdrv_get_allocated_file_size(bs->file);
36354a1d5e1fSFam Zheng     }
36364a1d5e1fSFam Zheng     return -ENOTSUP;
36374a1d5e1fSFam Zheng }
36384a1d5e1fSFam Zheng 
36394a1d5e1fSFam Zheng /**
364065a9bb25SMarkus Armbruster  * Return number of sectors on success, -errno on error.
364183f64091Sbellard  */
364265a9bb25SMarkus Armbruster int64_t bdrv_nb_sectors(BlockDriverState *bs)
364383f64091Sbellard {
364483f64091Sbellard     BlockDriver *drv = bs->drv;
364565a9bb25SMarkus Armbruster 
364683f64091Sbellard     if (!drv)
364719cb3738Sbellard         return -ENOMEDIUM;
364851762288SStefan Hajnoczi 
3649b94a2610SKevin Wolf     if (drv->has_variable_length) {
3650b94a2610SKevin Wolf         int ret = refresh_total_sectors(bs, bs->total_sectors);
3651b94a2610SKevin Wolf         if (ret < 0) {
3652b94a2610SKevin Wolf             return ret;
3653fc01f7e7Sbellard         }
365446a4e4e6SStefan Hajnoczi     }
365565a9bb25SMarkus Armbruster     return bs->total_sectors;
365665a9bb25SMarkus Armbruster }
365765a9bb25SMarkus Armbruster 
365865a9bb25SMarkus Armbruster /**
365965a9bb25SMarkus Armbruster  * Return length in bytes on success, -errno on error.
366065a9bb25SMarkus Armbruster  * The length is always a multiple of BDRV_SECTOR_SIZE.
366165a9bb25SMarkus Armbruster  */
366265a9bb25SMarkus Armbruster int64_t bdrv_getlength(BlockDriverState *bs)
366365a9bb25SMarkus Armbruster {
366465a9bb25SMarkus Armbruster     int64_t ret = bdrv_nb_sectors(bs);
366565a9bb25SMarkus Armbruster 
366665a9bb25SMarkus Armbruster     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
366746a4e4e6SStefan Hajnoczi }
3668fc01f7e7Sbellard 
366919cb3738Sbellard /* return 0 as number of sectors if no device present or error */
367096b8f136Sths void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
3671fc01f7e7Sbellard {
367265a9bb25SMarkus Armbruster     int64_t nb_sectors = bdrv_nb_sectors(bs);
367365a9bb25SMarkus Armbruster 
367465a9bb25SMarkus Armbruster     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
3675fc01f7e7Sbellard }
3676cf98951bSbellard 
3677ff06f5f3SPaolo Bonzini void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3678ff06f5f3SPaolo Bonzini                        BlockdevOnError on_write_error)
3679abd7f68dSMarkus Armbruster {
3680abd7f68dSMarkus Armbruster     bs->on_read_error = on_read_error;
3681abd7f68dSMarkus Armbruster     bs->on_write_error = on_write_error;
3682abd7f68dSMarkus Armbruster }
3683abd7f68dSMarkus Armbruster 
36841ceee0d5SPaolo Bonzini BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
3685abd7f68dSMarkus Armbruster {
3686abd7f68dSMarkus Armbruster     return is_read ? bs->on_read_error : bs->on_write_error;
3687abd7f68dSMarkus Armbruster }
3688abd7f68dSMarkus Armbruster 
36893e1caa5fSPaolo Bonzini BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
36903e1caa5fSPaolo Bonzini {
36913e1caa5fSPaolo Bonzini     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
36923e1caa5fSPaolo Bonzini 
36933e1caa5fSPaolo Bonzini     switch (on_err) {
36943e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_ENOSPC:
3695a589569fSWenchao Xia         return (error == ENOSPC) ?
3696a589569fSWenchao Xia                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
36973e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_STOP:
3698a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_STOP;
36993e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_REPORT:
3700a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_REPORT;
37013e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_IGNORE:
3702a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_IGNORE;
37033e1caa5fSPaolo Bonzini     default:
37043e1caa5fSPaolo Bonzini         abort();
37053e1caa5fSPaolo Bonzini     }
37063e1caa5fSPaolo Bonzini }
37073e1caa5fSPaolo Bonzini 
3708c7c2ff0cSLuiz Capitulino static void send_qmp_error_event(BlockDriverState *bs,
3709c7c2ff0cSLuiz Capitulino                                  BlockErrorAction action,
3710c7c2ff0cSLuiz Capitulino                                  bool is_read, int error)
3711c7c2ff0cSLuiz Capitulino {
3712573742a5SPeter Maydell     IoOperationType optype;
3713c7c2ff0cSLuiz Capitulino 
3714573742a5SPeter Maydell     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3715573742a5SPeter Maydell     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
3716c7c2ff0cSLuiz Capitulino                                    bdrv_iostatus_is_enabled(bs),
3717624ff573SLuiz Capitulino                                    error == ENOSPC, strerror(error),
3718624ff573SLuiz Capitulino                                    &error_abort);
3719c7c2ff0cSLuiz Capitulino }
3720c7c2ff0cSLuiz Capitulino 
37213e1caa5fSPaolo Bonzini /* This is done by device models because, while the block layer knows
37223e1caa5fSPaolo Bonzini  * about the error, it does not know whether an operation comes from
37233e1caa5fSPaolo Bonzini  * the device or the block layer (from a job, for example).
37243e1caa5fSPaolo Bonzini  */
37253e1caa5fSPaolo Bonzini void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
37263e1caa5fSPaolo Bonzini                        bool is_read, int error)
37273e1caa5fSPaolo Bonzini {
37283e1caa5fSPaolo Bonzini     assert(error >= 0);
37292bd3bce8SPaolo Bonzini 
3730a589569fSWenchao Xia     if (action == BLOCK_ERROR_ACTION_STOP) {
37312bd3bce8SPaolo Bonzini         /* First set the iostatus, so that "info block" returns an iostatus
37322bd3bce8SPaolo Bonzini          * that matches the events raised so far (an additional error iostatus
37332bd3bce8SPaolo Bonzini          * is fine, but not a lost one).
37342bd3bce8SPaolo Bonzini          */
37353e1caa5fSPaolo Bonzini         bdrv_iostatus_set_err(bs, error);
37362bd3bce8SPaolo Bonzini 
37372bd3bce8SPaolo Bonzini         /* Then raise the request to stop the VM and the event.
37382bd3bce8SPaolo Bonzini          * qemu_system_vmstop_request_prepare has two effects.  First,
37392bd3bce8SPaolo Bonzini          * it ensures that the STOP event always comes after the
37402bd3bce8SPaolo Bonzini          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
37412bd3bce8SPaolo Bonzini          * can observe the STOP event and do a "cont" before the STOP
37422bd3bce8SPaolo Bonzini          * event is issued, the VM will not stop.  In this case, vm_start()
37432bd3bce8SPaolo Bonzini          * also ensures that the STOP/RESUME pair of events is emitted.
37442bd3bce8SPaolo Bonzini          */
37452bd3bce8SPaolo Bonzini         qemu_system_vmstop_request_prepare();
3746c7c2ff0cSLuiz Capitulino         send_qmp_error_event(bs, action, is_read, error);
37472bd3bce8SPaolo Bonzini         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
37482bd3bce8SPaolo Bonzini     } else {
3749c7c2ff0cSLuiz Capitulino         send_qmp_error_event(bs, action, is_read, error);
37503e1caa5fSPaolo Bonzini     }
37513e1caa5fSPaolo Bonzini }
37523e1caa5fSPaolo Bonzini 
3753b338082bSbellard int bdrv_is_read_only(BlockDriverState *bs)
3754b338082bSbellard {
3755b338082bSbellard     return bs->read_only;
3756b338082bSbellard }
3757b338082bSbellard 
3758985a03b0Sths int bdrv_is_sg(BlockDriverState *bs)
3759985a03b0Sths {
3760985a03b0Sths     return bs->sg;
3761985a03b0Sths }
3762985a03b0Sths 
3763e900a7b7SChristoph Hellwig int bdrv_enable_write_cache(BlockDriverState *bs)
3764e900a7b7SChristoph Hellwig {
3765e900a7b7SChristoph Hellwig     return bs->enable_write_cache;
3766e900a7b7SChristoph Hellwig }
3767e900a7b7SChristoph Hellwig 
3768425b0148SPaolo Bonzini void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3769425b0148SPaolo Bonzini {
3770425b0148SPaolo Bonzini     bs->enable_write_cache = wce;
377155b110f2SJeff Cody 
377255b110f2SJeff Cody     /* so a reopen() will preserve wce */
377355b110f2SJeff Cody     if (wce) {
377455b110f2SJeff Cody         bs->open_flags |= BDRV_O_CACHE_WB;
377555b110f2SJeff Cody     } else {
377655b110f2SJeff Cody         bs->open_flags &= ~BDRV_O_CACHE_WB;
377755b110f2SJeff Cody     }
3778425b0148SPaolo Bonzini }
3779425b0148SPaolo Bonzini 
3780ea2384d3Sbellard int bdrv_is_encrypted(BlockDriverState *bs)
3781ea2384d3Sbellard {
3782ea2384d3Sbellard     if (bs->backing_hd && bs->backing_hd->encrypted)
3783ea2384d3Sbellard         return 1;
3784ea2384d3Sbellard     return bs->encrypted;
3785ea2384d3Sbellard }
3786ea2384d3Sbellard 
3787c0f4ce77Saliguori int bdrv_key_required(BlockDriverState *bs)
3788c0f4ce77Saliguori {
3789c0f4ce77Saliguori     BlockDriverState *backing_hd = bs->backing_hd;
3790c0f4ce77Saliguori 
3791c0f4ce77Saliguori     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3792c0f4ce77Saliguori         return 1;
3793c0f4ce77Saliguori     return (bs->encrypted && !bs->valid_key);
3794c0f4ce77Saliguori }
3795c0f4ce77Saliguori 
3796ea2384d3Sbellard int bdrv_set_key(BlockDriverState *bs, const char *key)
3797ea2384d3Sbellard {
3798ea2384d3Sbellard     int ret;
3799ea2384d3Sbellard     if (bs->backing_hd && bs->backing_hd->encrypted) {
3800ea2384d3Sbellard         ret = bdrv_set_key(bs->backing_hd, key);
3801ea2384d3Sbellard         if (ret < 0)
3802ea2384d3Sbellard             return ret;
3803ea2384d3Sbellard         if (!bs->encrypted)
3804ea2384d3Sbellard             return 0;
3805ea2384d3Sbellard     }
3806fd04a2aeSShahar Havivi     if (!bs->encrypted) {
3807fd04a2aeSShahar Havivi         return -EINVAL;
3808fd04a2aeSShahar Havivi     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3809fd04a2aeSShahar Havivi         return -ENOMEDIUM;
3810fd04a2aeSShahar Havivi     }
3811c0f4ce77Saliguori     ret = bs->drv->bdrv_set_key(bs, key);
3812bb5fc20fSaliguori     if (ret < 0) {
3813bb5fc20fSaliguori         bs->valid_key = 0;
3814bb5fc20fSaliguori     } else if (!bs->valid_key) {
3815bb5fc20fSaliguori         bs->valid_key = 1;
3816a7f53e26SMarkus Armbruster         if (bs->blk) {
3817bb5fc20fSaliguori             /* call the change callback now, we skipped it on open */
3818a7f53e26SMarkus Armbruster             blk_dev_change_media_cb(bs->blk, true);
3819a7f53e26SMarkus Armbruster         }
3820bb5fc20fSaliguori     }
3821c0f4ce77Saliguori     return ret;
3822ea2384d3Sbellard }
3823ea2384d3Sbellard 
38244d2855a3SMarkus Armbruster /*
38254d2855a3SMarkus Armbruster  * Provide an encryption key for @bs.
38264d2855a3SMarkus Armbruster  * If @key is non-null:
38274d2855a3SMarkus Armbruster  *     If @bs is not encrypted, fail.
38284d2855a3SMarkus Armbruster  *     Else if the key is invalid, fail.
38294d2855a3SMarkus Armbruster  *     Else set @bs's key to @key, replacing the existing key, if any.
38304d2855a3SMarkus Armbruster  * If @key is null:
38314d2855a3SMarkus Armbruster  *     If @bs is encrypted and still lacks a key, fail.
38324d2855a3SMarkus Armbruster  *     Else do nothing.
38334d2855a3SMarkus Armbruster  * On failure, store an error object through @errp if non-null.
38344d2855a3SMarkus Armbruster  */
38354d2855a3SMarkus Armbruster void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
38364d2855a3SMarkus Armbruster {
38374d2855a3SMarkus Armbruster     if (key) {
38384d2855a3SMarkus Armbruster         if (!bdrv_is_encrypted(bs)) {
383981e5f78aSAlberto Garcia             error_setg(errp, "Node '%s' is not encrypted",
384081e5f78aSAlberto Garcia                       bdrv_get_device_or_node_name(bs));
38414d2855a3SMarkus Armbruster         } else if (bdrv_set_key(bs, key) < 0) {
38424d2855a3SMarkus Armbruster             error_set(errp, QERR_INVALID_PASSWORD);
38434d2855a3SMarkus Armbruster         }
38444d2855a3SMarkus Armbruster     } else {
38454d2855a3SMarkus Armbruster         if (bdrv_key_required(bs)) {
3846b1ca6391SMarkus Armbruster             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3847b1ca6391SMarkus Armbruster                       "'%s' (%s) is encrypted",
384881e5f78aSAlberto Garcia                       bdrv_get_device_or_node_name(bs),
38494d2855a3SMarkus Armbruster                       bdrv_get_encrypted_filename(bs));
38504d2855a3SMarkus Armbruster         }
38514d2855a3SMarkus Armbruster     }
38524d2855a3SMarkus Armbruster }
38534d2855a3SMarkus Armbruster 
3854f8d6bba1SMarkus Armbruster const char *bdrv_get_format_name(BlockDriverState *bs)
3855ea2384d3Sbellard {
3856f8d6bba1SMarkus Armbruster     return bs->drv ? bs->drv->format_name : NULL;
3857ea2384d3Sbellard }
3858ea2384d3Sbellard 
3859ada42401SStefan Hajnoczi static int qsort_strcmp(const void *a, const void *b)
3860ada42401SStefan Hajnoczi {
3861ada42401SStefan Hajnoczi     return strcmp(a, b);
3862ada42401SStefan Hajnoczi }
3863ada42401SStefan Hajnoczi 
3864ea2384d3Sbellard void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
3865ea2384d3Sbellard                          void *opaque)
3866ea2384d3Sbellard {
3867ea2384d3Sbellard     BlockDriver *drv;
3868e855e4fbSJeff Cody     int count = 0;
3869ada42401SStefan Hajnoczi     int i;
3870e855e4fbSJeff Cody     const char **formats = NULL;
3871ea2384d3Sbellard 
38728a22f02aSStefan Hajnoczi     QLIST_FOREACH(drv, &bdrv_drivers, list) {
3873e855e4fbSJeff Cody         if (drv->format_name) {
3874e855e4fbSJeff Cody             bool found = false;
3875e855e4fbSJeff Cody             int i = count;
3876e855e4fbSJeff Cody             while (formats && i && !found) {
3877e855e4fbSJeff Cody                 found = !strcmp(formats[--i], drv->format_name);
3878e855e4fbSJeff Cody             }
3879e855e4fbSJeff Cody 
3880e855e4fbSJeff Cody             if (!found) {
38815839e53bSMarkus Armbruster                 formats = g_renew(const char *, formats, count + 1);
3882e855e4fbSJeff Cody                 formats[count++] = drv->format_name;
3883ea2384d3Sbellard             }
3884ea2384d3Sbellard         }
3885e855e4fbSJeff Cody     }
3886ada42401SStefan Hajnoczi 
3887ada42401SStefan Hajnoczi     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3888ada42401SStefan Hajnoczi 
3889ada42401SStefan Hajnoczi     for (i = 0; i < count; i++) {
3890ada42401SStefan Hajnoczi         it(opaque, formats[i]);
3891ada42401SStefan Hajnoczi     }
3892ada42401SStefan Hajnoczi 
3893e855e4fbSJeff Cody     g_free(formats);
3894e855e4fbSJeff Cody }
3895ea2384d3Sbellard 
3896dc364f4cSBenoît Canet /* This function is to find a node in the bs graph */
3897dc364f4cSBenoît Canet BlockDriverState *bdrv_find_node(const char *node_name)
3898dc364f4cSBenoît Canet {
3899dc364f4cSBenoît Canet     BlockDriverState *bs;
3900dc364f4cSBenoît Canet 
3901dc364f4cSBenoît Canet     assert(node_name);
3902dc364f4cSBenoît Canet 
3903dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3904dc364f4cSBenoît Canet         if (!strcmp(node_name, bs->node_name)) {
3905dc364f4cSBenoît Canet             return bs;
3906dc364f4cSBenoît Canet         }
3907dc364f4cSBenoît Canet     }
3908dc364f4cSBenoît Canet     return NULL;
3909dc364f4cSBenoît Canet }
3910dc364f4cSBenoît Canet 
3911c13163fbSBenoît Canet /* Put this QMP function here so it can access the static graph_bdrv_states. */
3912d5a8ee60SAlberto Garcia BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
3913c13163fbSBenoît Canet {
3914c13163fbSBenoît Canet     BlockDeviceInfoList *list, *entry;
3915c13163fbSBenoît Canet     BlockDriverState *bs;
3916c13163fbSBenoît Canet 
3917c13163fbSBenoît Canet     list = NULL;
3918c13163fbSBenoît Canet     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3919d5a8ee60SAlberto Garcia         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
3920d5a8ee60SAlberto Garcia         if (!info) {
3921d5a8ee60SAlberto Garcia             qapi_free_BlockDeviceInfoList(list);
3922d5a8ee60SAlberto Garcia             return NULL;
3923d5a8ee60SAlberto Garcia         }
3924c13163fbSBenoît Canet         entry = g_malloc0(sizeof(*entry));
3925d5a8ee60SAlberto Garcia         entry->value = info;
3926c13163fbSBenoît Canet         entry->next = list;
3927c13163fbSBenoît Canet         list = entry;
3928c13163fbSBenoît Canet     }
3929c13163fbSBenoît Canet 
3930c13163fbSBenoît Canet     return list;
3931c13163fbSBenoît Canet }
3932c13163fbSBenoît Canet 
393312d3ba82SBenoît Canet BlockDriverState *bdrv_lookup_bs(const char *device,
393412d3ba82SBenoît Canet                                  const char *node_name,
393512d3ba82SBenoît Canet                                  Error **errp)
393612d3ba82SBenoît Canet {
39377f06d47eSMarkus Armbruster     BlockBackend *blk;
39387f06d47eSMarkus Armbruster     BlockDriverState *bs;
393912d3ba82SBenoît Canet 
394012d3ba82SBenoît Canet     if (device) {
39417f06d47eSMarkus Armbruster         blk = blk_by_name(device);
394212d3ba82SBenoît Canet 
39437f06d47eSMarkus Armbruster         if (blk) {
39447f06d47eSMarkus Armbruster             return blk_bs(blk);
394512d3ba82SBenoît Canet         }
3946dd67fa50SBenoît Canet     }
394712d3ba82SBenoît Canet 
3948dd67fa50SBenoît Canet     if (node_name) {
394912d3ba82SBenoît Canet         bs = bdrv_find_node(node_name);
395012d3ba82SBenoît Canet 
3951dd67fa50SBenoît Canet         if (bs) {
3952dd67fa50SBenoît Canet             return bs;
3953dd67fa50SBenoît Canet         }
395412d3ba82SBenoît Canet     }
395512d3ba82SBenoît Canet 
3956dd67fa50SBenoît Canet     error_setg(errp, "Cannot find device=%s nor node_name=%s",
3957dd67fa50SBenoît Canet                      device ? device : "",
3958dd67fa50SBenoît Canet                      node_name ? node_name : "");
3959dd67fa50SBenoît Canet     return NULL;
396012d3ba82SBenoît Canet }
396112d3ba82SBenoît Canet 
39625a6684d2SJeff Cody /* If 'base' is in the same chain as 'top', return true. Otherwise,
39635a6684d2SJeff Cody  * return false.  If either argument is NULL, return false. */
39645a6684d2SJeff Cody bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
39655a6684d2SJeff Cody {
39665a6684d2SJeff Cody     while (top && top != base) {
39675a6684d2SJeff Cody         top = top->backing_hd;
39685a6684d2SJeff Cody     }
39695a6684d2SJeff Cody 
39705a6684d2SJeff Cody     return top != NULL;
39715a6684d2SJeff Cody }
39725a6684d2SJeff Cody 
397304df765aSFam Zheng BlockDriverState *bdrv_next_node(BlockDriverState *bs)
397404df765aSFam Zheng {
397504df765aSFam Zheng     if (!bs) {
397604df765aSFam Zheng         return QTAILQ_FIRST(&graph_bdrv_states);
397704df765aSFam Zheng     }
397804df765aSFam Zheng     return QTAILQ_NEXT(bs, node_list);
397904df765aSFam Zheng }
398004df765aSFam Zheng 
39812f399b0aSMarkus Armbruster BlockDriverState *bdrv_next(BlockDriverState *bs)
39822f399b0aSMarkus Armbruster {
39832f399b0aSMarkus Armbruster     if (!bs) {
39842f399b0aSMarkus Armbruster         return QTAILQ_FIRST(&bdrv_states);
39852f399b0aSMarkus Armbruster     }
3986dc364f4cSBenoît Canet     return QTAILQ_NEXT(bs, device_list);
39872f399b0aSMarkus Armbruster }
39882f399b0aSMarkus Armbruster 
398920a9e77dSFam Zheng const char *bdrv_get_node_name(const BlockDriverState *bs)
399020a9e77dSFam Zheng {
399120a9e77dSFam Zheng     return bs->node_name;
399220a9e77dSFam Zheng }
399320a9e77dSFam Zheng 
39947f06d47eSMarkus Armbruster /* TODO check what callers really want: bs->node_name or blk_name() */
3995bfb197e0SMarkus Armbruster const char *bdrv_get_device_name(const BlockDriverState *bs)
3996ea2384d3Sbellard {
3997bfb197e0SMarkus Armbruster     return bs->blk ? blk_name(bs->blk) : "";
3998ea2384d3Sbellard }
3999ea2384d3Sbellard 
40009b2aa84fSAlberto Garcia /* This can be used to identify nodes that might not have a device
40019b2aa84fSAlberto Garcia  * name associated. Since node and device names live in the same
40029b2aa84fSAlberto Garcia  * namespace, the result is unambiguous. The exception is if both are
40039b2aa84fSAlberto Garcia  * absent, then this returns an empty (non-null) string. */
40049b2aa84fSAlberto Garcia const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
40059b2aa84fSAlberto Garcia {
40069b2aa84fSAlberto Garcia     return bs->blk ? blk_name(bs->blk) : bs->node_name;
40079b2aa84fSAlberto Garcia }
40089b2aa84fSAlberto Garcia 
4009c8433287SMarkus Armbruster int bdrv_get_flags(BlockDriverState *bs)
4010c8433287SMarkus Armbruster {
4011c8433287SMarkus Armbruster     return bs->open_flags;
4012c8433287SMarkus Armbruster }
4013c8433287SMarkus Armbruster 
4014f0f0fdfeSKevin Wolf int bdrv_flush_all(void)
4015c6ca28d6Saliguori {
40164f5472cbSStefan Hajnoczi     BlockDriverState *bs = NULL;
4017f0f0fdfeSKevin Wolf     int result = 0;
4018c6ca28d6Saliguori 
40194f5472cbSStefan Hajnoczi     while ((bs = bdrv_next(bs))) {
4020ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
4021ed78cda3SStefan Hajnoczi         int ret;
4022ed78cda3SStefan Hajnoczi 
4023ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
4024ed78cda3SStefan Hajnoczi         ret = bdrv_flush(bs);
4025f0f0fdfeSKevin Wolf         if (ret < 0 && !result) {
4026f0f0fdfeSKevin Wolf             result = ret;
4027c6ca28d6Saliguori         }
4028ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
40291b7bdbc1SStefan Hajnoczi     }
4030c6ca28d6Saliguori 
4031f0f0fdfeSKevin Wolf     return result;
4032f0f0fdfeSKevin Wolf }
4033f0f0fdfeSKevin Wolf 
40343ac21627SPeter Lieven int bdrv_has_zero_init_1(BlockDriverState *bs)
40353ac21627SPeter Lieven {
40363ac21627SPeter Lieven     return 1;
40373ac21627SPeter Lieven }
40383ac21627SPeter Lieven 
4039f2feebbdSKevin Wolf int bdrv_has_zero_init(BlockDriverState *bs)
4040f2feebbdSKevin Wolf {
4041f2feebbdSKevin Wolf     assert(bs->drv);
4042f2feebbdSKevin Wolf 
404311212d8fSPaolo Bonzini     /* If BS is a copy on write image, it is initialized to
404411212d8fSPaolo Bonzini        the contents of the base image, which may not be zeroes.  */
404511212d8fSPaolo Bonzini     if (bs->backing_hd) {
404611212d8fSPaolo Bonzini         return 0;
404711212d8fSPaolo Bonzini     }
4048336c1c12SKevin Wolf     if (bs->drv->bdrv_has_zero_init) {
4049336c1c12SKevin Wolf         return bs->drv->bdrv_has_zero_init(bs);
4050f2feebbdSKevin Wolf     }
4051f2feebbdSKevin Wolf 
40523ac21627SPeter Lieven     /* safe default */
40533ac21627SPeter Lieven     return 0;
4054f2feebbdSKevin Wolf }
4055f2feebbdSKevin Wolf 
40564ce78691SPeter Lieven bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
40574ce78691SPeter Lieven {
40584ce78691SPeter Lieven     BlockDriverInfo bdi;
40594ce78691SPeter Lieven 
40604ce78691SPeter Lieven     if (bs->backing_hd) {
40614ce78691SPeter Lieven         return false;
40624ce78691SPeter Lieven     }
40634ce78691SPeter Lieven 
40644ce78691SPeter Lieven     if (bdrv_get_info(bs, &bdi) == 0) {
40654ce78691SPeter Lieven         return bdi.unallocated_blocks_are_zero;
40664ce78691SPeter Lieven     }
40674ce78691SPeter Lieven 
40684ce78691SPeter Lieven     return false;
40694ce78691SPeter Lieven }
40704ce78691SPeter Lieven 
40714ce78691SPeter Lieven bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
40724ce78691SPeter Lieven {
40734ce78691SPeter Lieven     BlockDriverInfo bdi;
40744ce78691SPeter Lieven 
40754ce78691SPeter Lieven     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
40764ce78691SPeter Lieven         return false;
40774ce78691SPeter Lieven     }
40784ce78691SPeter Lieven 
40794ce78691SPeter Lieven     if (bdrv_get_info(bs, &bdi) == 0) {
40804ce78691SPeter Lieven         return bdi.can_write_zeroes_with_unmap;
40814ce78691SPeter Lieven     }
40824ce78691SPeter Lieven 
40834ce78691SPeter Lieven     return false;
40844ce78691SPeter Lieven }
40854ce78691SPeter Lieven 
4086b6b8a333SPaolo Bonzini typedef struct BdrvCoGetBlockStatusData {
4087376ae3f1SStefan Hajnoczi     BlockDriverState *bs;
4088b35b2bbaSMiroslav Rezanina     BlockDriverState *base;
4089376ae3f1SStefan Hajnoczi     int64_t sector_num;
4090376ae3f1SStefan Hajnoczi     int nb_sectors;
4091376ae3f1SStefan Hajnoczi     int *pnum;
4092b6b8a333SPaolo Bonzini     int64_t ret;
4093376ae3f1SStefan Hajnoczi     bool done;
4094b6b8a333SPaolo Bonzini } BdrvCoGetBlockStatusData;
4095376ae3f1SStefan Hajnoczi 
4096f58c7b35Sths /*
4097705be728SFam Zheng  * Returns the allocation status of the specified sectors.
4098705be728SFam Zheng  * Drivers not implementing the functionality are assumed to not support
4099705be728SFam Zheng  * backing files, hence all their sectors are reported as allocated.
4100f58c7b35Sths  *
4101bd9533e3SStefan Hajnoczi  * If 'sector_num' is beyond the end of the disk image the return value is 0
4102bd9533e3SStefan Hajnoczi  * and 'pnum' is set to 0.
4103bd9533e3SStefan Hajnoczi  *
4104f58c7b35Sths  * 'pnum' is set to the number of sectors (including and immediately following
4105f58c7b35Sths  * the specified sector) that are known to be in the same
4106f58c7b35Sths  * allocated/unallocated state.
4107f58c7b35Sths  *
4108bd9533e3SStefan Hajnoczi  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
4109bd9533e3SStefan Hajnoczi  * beyond the end of the disk image it will be clamped.
4110f58c7b35Sths  */
4111b6b8a333SPaolo Bonzini static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4112bdad13b9SPaolo Bonzini                                                      int64_t sector_num,
4113060f51c9SStefan Hajnoczi                                                      int nb_sectors, int *pnum)
4114f58c7b35Sths {
411530a7f2fcSMarkus Armbruster     int64_t total_sectors;
4116f58c7b35Sths     int64_t n;
41175daa74a6SPaolo Bonzini     int64_t ret, ret2;
4118bd9533e3SStefan Hajnoczi 
411930a7f2fcSMarkus Armbruster     total_sectors = bdrv_nb_sectors(bs);
412030a7f2fcSMarkus Armbruster     if (total_sectors < 0) {
412130a7f2fcSMarkus Armbruster         return total_sectors;
4122617ccb46SPaolo Bonzini     }
4123617ccb46SPaolo Bonzini 
412430a7f2fcSMarkus Armbruster     if (sector_num >= total_sectors) {
41256aebab14SStefan Hajnoczi         *pnum = 0;
41266aebab14SStefan Hajnoczi         return 0;
41276aebab14SStefan Hajnoczi     }
4128bd9533e3SStefan Hajnoczi 
412930a7f2fcSMarkus Armbruster     n = total_sectors - sector_num;
4130bd9533e3SStefan Hajnoczi     if (n < nb_sectors) {
4131bd9533e3SStefan Hajnoczi         nb_sectors = n;
4132bd9533e3SStefan Hajnoczi     }
4133bd9533e3SStefan Hajnoczi 
4134b6b8a333SPaolo Bonzini     if (!bs->drv->bdrv_co_get_block_status) {
4135bd9533e3SStefan Hajnoczi         *pnum = nb_sectors;
4136e88ae226SKevin Wolf         ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
4137918e92d7SPaolo Bonzini         if (bs->drv->protocol_name) {
4138918e92d7SPaolo Bonzini             ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4139918e92d7SPaolo Bonzini         }
4140918e92d7SPaolo Bonzini         return ret;
41416aebab14SStefan Hajnoczi     }
41426aebab14SStefan Hajnoczi 
4143415b5b01SPaolo Bonzini     ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4144415b5b01SPaolo Bonzini     if (ret < 0) {
41453e0a233dSPeter Lieven         *pnum = 0;
4146415b5b01SPaolo Bonzini         return ret;
4147415b5b01SPaolo Bonzini     }
4148415b5b01SPaolo Bonzini 
414992bc50a5SPeter Lieven     if (ret & BDRV_BLOCK_RAW) {
415092bc50a5SPeter Lieven         assert(ret & BDRV_BLOCK_OFFSET_VALID);
415192bc50a5SPeter Lieven         return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
415292bc50a5SPeter Lieven                                      *pnum, pnum);
415392bc50a5SPeter Lieven     }
415492bc50a5SPeter Lieven 
4155e88ae226SKevin Wolf     if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4156e88ae226SKevin Wolf         ret |= BDRV_BLOCK_ALLOCATED;
4157e88ae226SKevin Wolf     }
4158e88ae226SKevin Wolf 
4159c3d86884SPeter Lieven     if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4160c3d86884SPeter Lieven         if (bdrv_unallocated_blocks_are_zero(bs)) {
4161415b5b01SPaolo Bonzini             ret |= BDRV_BLOCK_ZERO;
41621f9db224SPeter Lieven         } else if (bs->backing_hd) {
4163f0ad5712SPaolo Bonzini             BlockDriverState *bs2 = bs->backing_hd;
416430a7f2fcSMarkus Armbruster             int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
416530a7f2fcSMarkus Armbruster             if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
4166f0ad5712SPaolo Bonzini                 ret |= BDRV_BLOCK_ZERO;
4167f0ad5712SPaolo Bonzini             }
4168f0ad5712SPaolo Bonzini         }
4169415b5b01SPaolo Bonzini     }
41705daa74a6SPaolo Bonzini 
41715daa74a6SPaolo Bonzini     if (bs->file &&
41725daa74a6SPaolo Bonzini         (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
41735daa74a6SPaolo Bonzini         (ret & BDRV_BLOCK_OFFSET_VALID)) {
417459c9a95fSMax Reitz         int file_pnum;
417559c9a95fSMax Reitz 
41765daa74a6SPaolo Bonzini         ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
417759c9a95fSMax Reitz                                         *pnum, &file_pnum);
41785daa74a6SPaolo Bonzini         if (ret2 >= 0) {
41795daa74a6SPaolo Bonzini             /* Ignore errors.  This is just providing extra information, it
41805daa74a6SPaolo Bonzini              * is useful but not necessary.
41815daa74a6SPaolo Bonzini              */
418259c9a95fSMax Reitz             if (!file_pnum) {
418359c9a95fSMax Reitz                 /* !file_pnum indicates an offset at or beyond the EOF; it is
418459c9a95fSMax Reitz                  * perfectly valid for the format block driver to point to such
418559c9a95fSMax Reitz                  * offsets, so catch it and mark everything as zero */
418659c9a95fSMax Reitz                 ret |= BDRV_BLOCK_ZERO;
418759c9a95fSMax Reitz             } else {
418859c9a95fSMax Reitz                 /* Limit request to the range reported by the protocol driver */
418959c9a95fSMax Reitz                 *pnum = file_pnum;
41905daa74a6SPaolo Bonzini                 ret |= (ret2 & BDRV_BLOCK_ZERO);
41915daa74a6SPaolo Bonzini             }
41925daa74a6SPaolo Bonzini         }
419359c9a95fSMax Reitz     }
41945daa74a6SPaolo Bonzini 
4195415b5b01SPaolo Bonzini     return ret;
4196060f51c9SStefan Hajnoczi }
4197060f51c9SStefan Hajnoczi 
4198b6b8a333SPaolo Bonzini /* Coroutine wrapper for bdrv_get_block_status() */
4199b6b8a333SPaolo Bonzini static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
4200060f51c9SStefan Hajnoczi {
4201b6b8a333SPaolo Bonzini     BdrvCoGetBlockStatusData *data = opaque;
4202060f51c9SStefan Hajnoczi     BlockDriverState *bs = data->bs;
4203060f51c9SStefan Hajnoczi 
4204b6b8a333SPaolo Bonzini     data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4205060f51c9SStefan Hajnoczi                                          data->pnum);
4206060f51c9SStefan Hajnoczi     data->done = true;
4207060f51c9SStefan Hajnoczi }
4208060f51c9SStefan Hajnoczi 
4209060f51c9SStefan Hajnoczi /*
4210b6b8a333SPaolo Bonzini  * Synchronous wrapper around bdrv_co_get_block_status().
4211060f51c9SStefan Hajnoczi  *
4212b6b8a333SPaolo Bonzini  * See bdrv_co_get_block_status() for details.
4213060f51c9SStefan Hajnoczi  */
4214b6b8a333SPaolo Bonzini int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4215b6b8a333SPaolo Bonzini                               int nb_sectors, int *pnum)
4216060f51c9SStefan Hajnoczi {
4217376ae3f1SStefan Hajnoczi     Coroutine *co;
4218b6b8a333SPaolo Bonzini     BdrvCoGetBlockStatusData data = {
4219376ae3f1SStefan Hajnoczi         .bs = bs,
4220376ae3f1SStefan Hajnoczi         .sector_num = sector_num,
4221376ae3f1SStefan Hajnoczi         .nb_sectors = nb_sectors,
4222376ae3f1SStefan Hajnoczi         .pnum = pnum,
4223376ae3f1SStefan Hajnoczi         .done = false,
4224376ae3f1SStefan Hajnoczi     };
4225376ae3f1SStefan Hajnoczi 
4226bdad13b9SPaolo Bonzini     if (qemu_in_coroutine()) {
4227bdad13b9SPaolo Bonzini         /* Fast-path if already in coroutine context */
4228b6b8a333SPaolo Bonzini         bdrv_get_block_status_co_entry(&data);
4229bdad13b9SPaolo Bonzini     } else {
42302572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
42312572b37aSStefan Hajnoczi 
4232b6b8a333SPaolo Bonzini         co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
4233376ae3f1SStefan Hajnoczi         qemu_coroutine_enter(co, &data);
4234376ae3f1SStefan Hajnoczi         while (!data.done) {
42352572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
4236376ae3f1SStefan Hajnoczi         }
4237bdad13b9SPaolo Bonzini     }
4238376ae3f1SStefan Hajnoczi     return data.ret;
4239376ae3f1SStefan Hajnoczi }
4240f58c7b35Sths 
4241b6b8a333SPaolo Bonzini int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4242b6b8a333SPaolo Bonzini                                    int nb_sectors, int *pnum)
4243b6b8a333SPaolo Bonzini {
42444333bb71SPaolo Bonzini     int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
42454333bb71SPaolo Bonzini     if (ret < 0) {
42464333bb71SPaolo Bonzini         return ret;
42474333bb71SPaolo Bonzini     }
424801fb2705SKevin Wolf     return !!(ret & BDRV_BLOCK_ALLOCATED);
4249b6b8a333SPaolo Bonzini }
4250b6b8a333SPaolo Bonzini 
4251188a7bbfSPaolo Bonzini /*
4252188a7bbfSPaolo Bonzini  * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4253188a7bbfSPaolo Bonzini  *
4254188a7bbfSPaolo Bonzini  * Return true if the given sector is allocated in any image between
4255188a7bbfSPaolo Bonzini  * BASE and TOP (inclusive).  BASE can be NULL to check if the given
4256188a7bbfSPaolo Bonzini  * sector is allocated in any image of the chain.  Return false otherwise.
4257188a7bbfSPaolo Bonzini  *
4258188a7bbfSPaolo Bonzini  * 'pnum' is set to the number of sectors (including and immediately following
4259188a7bbfSPaolo Bonzini  *  the specified sector) that are known to be in the same
4260188a7bbfSPaolo Bonzini  *  allocated/unallocated state.
4261188a7bbfSPaolo Bonzini  *
4262188a7bbfSPaolo Bonzini  */
42634f578637SPaolo Bonzini int bdrv_is_allocated_above(BlockDriverState *top,
4264188a7bbfSPaolo Bonzini                             BlockDriverState *base,
4265188a7bbfSPaolo Bonzini                             int64_t sector_num,
4266188a7bbfSPaolo Bonzini                             int nb_sectors, int *pnum)
4267188a7bbfSPaolo Bonzini {
4268188a7bbfSPaolo Bonzini     BlockDriverState *intermediate;
4269188a7bbfSPaolo Bonzini     int ret, n = nb_sectors;
4270188a7bbfSPaolo Bonzini 
4271188a7bbfSPaolo Bonzini     intermediate = top;
4272188a7bbfSPaolo Bonzini     while (intermediate && intermediate != base) {
4273188a7bbfSPaolo Bonzini         int pnum_inter;
4274bdad13b9SPaolo Bonzini         ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4275188a7bbfSPaolo Bonzini                                 &pnum_inter);
4276188a7bbfSPaolo Bonzini         if (ret < 0) {
4277188a7bbfSPaolo Bonzini             return ret;
4278188a7bbfSPaolo Bonzini         } else if (ret) {
4279188a7bbfSPaolo Bonzini             *pnum = pnum_inter;
4280188a7bbfSPaolo Bonzini             return 1;
4281188a7bbfSPaolo Bonzini         }
4282188a7bbfSPaolo Bonzini 
4283188a7bbfSPaolo Bonzini         /*
4284188a7bbfSPaolo Bonzini          * [sector_num, nb_sectors] is unallocated on top but intermediate
4285188a7bbfSPaolo Bonzini          * might have
4286188a7bbfSPaolo Bonzini          *
4287188a7bbfSPaolo Bonzini          * [sector_num+x, nr_sectors] allocated.
4288188a7bbfSPaolo Bonzini          */
428963ba17d3SVishvananda Ishaya         if (n > pnum_inter &&
429063ba17d3SVishvananda Ishaya             (intermediate == top ||
429163ba17d3SVishvananda Ishaya              sector_num + pnum_inter < intermediate->total_sectors)) {
4292188a7bbfSPaolo Bonzini             n = pnum_inter;
4293188a7bbfSPaolo Bonzini         }
4294188a7bbfSPaolo Bonzini 
4295188a7bbfSPaolo Bonzini         intermediate = intermediate->backing_hd;
4296188a7bbfSPaolo Bonzini     }
4297188a7bbfSPaolo Bonzini 
4298188a7bbfSPaolo Bonzini     *pnum = n;
4299188a7bbfSPaolo Bonzini     return 0;
4300188a7bbfSPaolo Bonzini }
4301188a7bbfSPaolo Bonzini 
4302045df330Saliguori const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4303045df330Saliguori {
4304045df330Saliguori     if (bs->backing_hd && bs->backing_hd->encrypted)
4305045df330Saliguori         return bs->backing_file;
4306045df330Saliguori     else if (bs->encrypted)
4307045df330Saliguori         return bs->filename;
4308045df330Saliguori     else
4309045df330Saliguori         return NULL;
4310045df330Saliguori }
4311045df330Saliguori 
431283f64091Sbellard void bdrv_get_backing_filename(BlockDriverState *bs,
431383f64091Sbellard                                char *filename, int filename_size)
431483f64091Sbellard {
431583f64091Sbellard     pstrcpy(filename, filename_size, bs->backing_file);
431683f64091Sbellard }
431783f64091Sbellard 
4318faea38e7Sbellard int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
4319faea38e7Sbellard                           const uint8_t *buf, int nb_sectors)
4320faea38e7Sbellard {
4321faea38e7Sbellard     BlockDriver *drv = bs->drv;
4322b9c64947SMax Reitz     int ret;
4323b9c64947SMax Reitz 
4324b9c64947SMax Reitz     if (!drv) {
432519cb3738Sbellard         return -ENOMEDIUM;
4326b9c64947SMax Reitz     }
4327b9c64947SMax Reitz     if (!drv->bdrv_write_compressed) {
4328faea38e7Sbellard         return -ENOTSUP;
4329b9c64947SMax Reitz     }
4330b9c64947SMax Reitz     ret = bdrv_check_request(bs, sector_num, nb_sectors);
4331b9c64947SMax Reitz     if (ret < 0) {
4332b9c64947SMax Reitz         return ret;
4333b9c64947SMax Reitz     }
43347cd1e32aSlirans@il.ibm.com 
4335e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
43367cd1e32aSlirans@il.ibm.com 
4337faea38e7Sbellard     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4338faea38e7Sbellard }
4339faea38e7Sbellard 
4340faea38e7Sbellard int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4341faea38e7Sbellard {
4342faea38e7Sbellard     BlockDriver *drv = bs->drv;
4343faea38e7Sbellard     if (!drv)
434419cb3738Sbellard         return -ENOMEDIUM;
4345faea38e7Sbellard     if (!drv->bdrv_get_info)
4346faea38e7Sbellard         return -ENOTSUP;
4347faea38e7Sbellard     memset(bdi, 0, sizeof(*bdi));
4348faea38e7Sbellard     return drv->bdrv_get_info(bs, bdi);
4349faea38e7Sbellard }
4350faea38e7Sbellard 
4351eae041feSMax Reitz ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4352eae041feSMax Reitz {
4353eae041feSMax Reitz     BlockDriver *drv = bs->drv;
4354eae041feSMax Reitz     if (drv && drv->bdrv_get_specific_info) {
4355eae041feSMax Reitz         return drv->bdrv_get_specific_info(bs);
4356eae041feSMax Reitz     }
4357eae041feSMax Reitz     return NULL;
4358eae041feSMax Reitz }
4359eae041feSMax Reitz 
436045566e9cSChristoph Hellwig int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
436145566e9cSChristoph Hellwig                       int64_t pos, int size)
4362178e08a5Saliguori {
4363cf8074b3SKevin Wolf     QEMUIOVector qiov;
4364cf8074b3SKevin Wolf     struct iovec iov = {
4365cf8074b3SKevin Wolf         .iov_base   = (void *) buf,
4366cf8074b3SKevin Wolf         .iov_len    = size,
4367cf8074b3SKevin Wolf     };
4368cf8074b3SKevin Wolf 
4369cf8074b3SKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
4370cf8074b3SKevin Wolf     return bdrv_writev_vmstate(bs, &qiov, pos);
4371cf8074b3SKevin Wolf }
4372cf8074b3SKevin Wolf 
4373cf8074b3SKevin Wolf int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4374cf8074b3SKevin Wolf {
4375178e08a5Saliguori     BlockDriver *drv = bs->drv;
4376cf8074b3SKevin Wolf 
4377cf8074b3SKevin Wolf     if (!drv) {
4378178e08a5Saliguori         return -ENOMEDIUM;
4379cf8074b3SKevin Wolf     } else if (drv->bdrv_save_vmstate) {
4380cf8074b3SKevin Wolf         return drv->bdrv_save_vmstate(bs, qiov, pos);
4381cf8074b3SKevin Wolf     } else if (bs->file) {
4382cf8074b3SKevin Wolf         return bdrv_writev_vmstate(bs->file, qiov, pos);
4383cf8074b3SKevin Wolf     }
4384cf8074b3SKevin Wolf 
43857cdb1f6dSMORITA Kazutaka     return -ENOTSUP;
4386178e08a5Saliguori }
4387178e08a5Saliguori 
438845566e9cSChristoph Hellwig int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
438945566e9cSChristoph Hellwig                       int64_t pos, int size)
4390178e08a5Saliguori {
4391178e08a5Saliguori     BlockDriver *drv = bs->drv;
4392178e08a5Saliguori     if (!drv)
4393178e08a5Saliguori         return -ENOMEDIUM;
43947cdb1f6dSMORITA Kazutaka     if (drv->bdrv_load_vmstate)
439545566e9cSChristoph Hellwig         return drv->bdrv_load_vmstate(bs, buf, pos, size);
43967cdb1f6dSMORITA Kazutaka     if (bs->file)
43977cdb1f6dSMORITA Kazutaka         return bdrv_load_vmstate(bs->file, buf, pos, size);
43987cdb1f6dSMORITA Kazutaka     return -ENOTSUP;
4399178e08a5Saliguori }
4400178e08a5Saliguori 
44018b9b0cc2SKevin Wolf void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
44028b9b0cc2SKevin Wolf {
4403bf736fe3SKevin Wolf     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
44048b9b0cc2SKevin Wolf         return;
44058b9b0cc2SKevin Wolf     }
44068b9b0cc2SKevin Wolf 
4407bf736fe3SKevin Wolf     bs->drv->bdrv_debug_event(bs, event);
440841c695c7SKevin Wolf }
44098b9b0cc2SKevin Wolf 
441041c695c7SKevin Wolf int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
441141c695c7SKevin Wolf                           const char *tag)
441241c695c7SKevin Wolf {
441341c695c7SKevin Wolf     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
441441c695c7SKevin Wolf         bs = bs->file;
441541c695c7SKevin Wolf     }
441641c695c7SKevin Wolf 
441741c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
441841c695c7SKevin Wolf         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
441941c695c7SKevin Wolf     }
442041c695c7SKevin Wolf 
442141c695c7SKevin Wolf     return -ENOTSUP;
442241c695c7SKevin Wolf }
442341c695c7SKevin Wolf 
44244cc70e93SFam Zheng int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
44254cc70e93SFam Zheng {
44264cc70e93SFam Zheng     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
44274cc70e93SFam Zheng         bs = bs->file;
44284cc70e93SFam Zheng     }
44294cc70e93SFam Zheng 
44304cc70e93SFam Zheng     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
44314cc70e93SFam Zheng         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
44324cc70e93SFam Zheng     }
44334cc70e93SFam Zheng 
44344cc70e93SFam Zheng     return -ENOTSUP;
44354cc70e93SFam Zheng }
44364cc70e93SFam Zheng 
443741c695c7SKevin Wolf int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
443841c695c7SKevin Wolf {
4439938789eaSMax Reitz     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
444041c695c7SKevin Wolf         bs = bs->file;
444141c695c7SKevin Wolf     }
444241c695c7SKevin Wolf 
444341c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
444441c695c7SKevin Wolf         return bs->drv->bdrv_debug_resume(bs, tag);
444541c695c7SKevin Wolf     }
444641c695c7SKevin Wolf 
444741c695c7SKevin Wolf     return -ENOTSUP;
444841c695c7SKevin Wolf }
444941c695c7SKevin Wolf 
445041c695c7SKevin Wolf bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
445141c695c7SKevin Wolf {
445241c695c7SKevin Wolf     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
445341c695c7SKevin Wolf         bs = bs->file;
445441c695c7SKevin Wolf     }
445541c695c7SKevin Wolf 
445641c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
445741c695c7SKevin Wolf         return bs->drv->bdrv_debug_is_suspended(bs, tag);
445841c695c7SKevin Wolf     }
445941c695c7SKevin Wolf 
446041c695c7SKevin Wolf     return false;
44618b9b0cc2SKevin Wolf }
44628b9b0cc2SKevin Wolf 
4463199630b6SBlue Swirl int bdrv_is_snapshot(BlockDriverState *bs)
4464199630b6SBlue Swirl {
4465199630b6SBlue Swirl     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4466199630b6SBlue Swirl }
4467199630b6SBlue Swirl 
4468b1b1d783SJeff Cody /* backing_file can either be relative, or absolute, or a protocol.  If it is
4469b1b1d783SJeff Cody  * relative, it must be relative to the chain.  So, passing in bs->filename
4470b1b1d783SJeff Cody  * from a BDS as backing_file should not be done, as that may be relative to
4471b1b1d783SJeff Cody  * the CWD rather than the chain. */
4472e8a6bb9cSMarcelo Tosatti BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4473e8a6bb9cSMarcelo Tosatti         const char *backing_file)
4474e8a6bb9cSMarcelo Tosatti {
4475b1b1d783SJeff Cody     char *filename_full = NULL;
4476b1b1d783SJeff Cody     char *backing_file_full = NULL;
4477b1b1d783SJeff Cody     char *filename_tmp = NULL;
4478b1b1d783SJeff Cody     int is_protocol = 0;
4479b1b1d783SJeff Cody     BlockDriverState *curr_bs = NULL;
4480b1b1d783SJeff Cody     BlockDriverState *retval = NULL;
4481b1b1d783SJeff Cody 
4482b1b1d783SJeff Cody     if (!bs || !bs->drv || !backing_file) {
4483e8a6bb9cSMarcelo Tosatti         return NULL;
4484e8a6bb9cSMarcelo Tosatti     }
4485e8a6bb9cSMarcelo Tosatti 
4486b1b1d783SJeff Cody     filename_full     = g_malloc(PATH_MAX);
4487b1b1d783SJeff Cody     backing_file_full = g_malloc(PATH_MAX);
4488b1b1d783SJeff Cody     filename_tmp      = g_malloc(PATH_MAX);
4489b1b1d783SJeff Cody 
4490b1b1d783SJeff Cody     is_protocol = path_has_protocol(backing_file);
4491b1b1d783SJeff Cody 
4492b1b1d783SJeff Cody     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4493b1b1d783SJeff Cody 
4494b1b1d783SJeff Cody         /* If either of the filename paths is actually a protocol, then
4495b1b1d783SJeff Cody          * compare unmodified paths; otherwise make paths relative */
4496b1b1d783SJeff Cody         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4497b1b1d783SJeff Cody             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4498b1b1d783SJeff Cody                 retval = curr_bs->backing_hd;
4499b1b1d783SJeff Cody                 break;
4500b1b1d783SJeff Cody             }
4501e8a6bb9cSMarcelo Tosatti         } else {
4502b1b1d783SJeff Cody             /* If not an absolute filename path, make it relative to the current
4503b1b1d783SJeff Cody              * image's filename path */
4504b1b1d783SJeff Cody             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4505b1b1d783SJeff Cody                          backing_file);
4506b1b1d783SJeff Cody 
4507b1b1d783SJeff Cody             /* We are going to compare absolute pathnames */
4508b1b1d783SJeff Cody             if (!realpath(filename_tmp, filename_full)) {
4509b1b1d783SJeff Cody                 continue;
4510b1b1d783SJeff Cody             }
4511b1b1d783SJeff Cody 
4512b1b1d783SJeff Cody             /* We need to make sure the backing filename we are comparing against
4513b1b1d783SJeff Cody              * is relative to the current image filename (or absolute) */
4514b1b1d783SJeff Cody             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4515b1b1d783SJeff Cody                          curr_bs->backing_file);
4516b1b1d783SJeff Cody 
4517b1b1d783SJeff Cody             if (!realpath(filename_tmp, backing_file_full)) {
4518b1b1d783SJeff Cody                 continue;
4519b1b1d783SJeff Cody             }
4520b1b1d783SJeff Cody 
4521b1b1d783SJeff Cody             if (strcmp(backing_file_full, filename_full) == 0) {
4522b1b1d783SJeff Cody                 retval = curr_bs->backing_hd;
4523b1b1d783SJeff Cody                 break;
4524b1b1d783SJeff Cody             }
4525e8a6bb9cSMarcelo Tosatti         }
4526e8a6bb9cSMarcelo Tosatti     }
4527e8a6bb9cSMarcelo Tosatti 
4528b1b1d783SJeff Cody     g_free(filename_full);
4529b1b1d783SJeff Cody     g_free(backing_file_full);
4530b1b1d783SJeff Cody     g_free(filename_tmp);
4531b1b1d783SJeff Cody     return retval;
4532e8a6bb9cSMarcelo Tosatti }
4533e8a6bb9cSMarcelo Tosatti 
4534f198fd1cSBenoît Canet int bdrv_get_backing_file_depth(BlockDriverState *bs)
4535f198fd1cSBenoît Canet {
4536f198fd1cSBenoît Canet     if (!bs->drv) {
4537f198fd1cSBenoît Canet         return 0;
4538f198fd1cSBenoît Canet     }
4539f198fd1cSBenoît Canet 
4540f198fd1cSBenoît Canet     if (!bs->backing_hd) {
4541f198fd1cSBenoît Canet         return 0;
4542f198fd1cSBenoît Canet     }
4543f198fd1cSBenoît Canet 
4544f198fd1cSBenoît Canet     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4545f198fd1cSBenoît Canet }
4546f198fd1cSBenoît Canet 
4547ea2384d3Sbellard /**************************************************************/
454883f64091Sbellard /* async I/Os */
4549ea2384d3Sbellard 
45507c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4551f141eafeSaliguori                            QEMUIOVector *qiov, int nb_sectors,
4552097310b5SMarkus Armbruster                            BlockCompletionFunc *cb, void *opaque)
4553ea2384d3Sbellard {
4554bbf0a440SStefan Hajnoczi     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4555bbf0a440SStefan Hajnoczi 
4556d20d9b7cSPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
45578c5873d6SStefan Hajnoczi                                  cb, opaque, false);
455883f64091Sbellard }
455983f64091Sbellard 
45607c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4561f141eafeSaliguori                             QEMUIOVector *qiov, int nb_sectors,
4562097310b5SMarkus Armbruster                             BlockCompletionFunc *cb, void *opaque)
45637674e7bfSbellard {
4564bbf0a440SStefan Hajnoczi     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4565bbf0a440SStefan Hajnoczi 
4566d20d9b7cSPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
45678c5873d6SStefan Hajnoczi                                  cb, opaque, true);
456883f64091Sbellard }
456983f64091Sbellard 
45707c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4571d5ef94d4SPaolo Bonzini         int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4572097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4573d5ef94d4SPaolo Bonzini {
4574d5ef94d4SPaolo Bonzini     trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4575d5ef94d4SPaolo Bonzini 
4576d5ef94d4SPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4577d5ef94d4SPaolo Bonzini                                  BDRV_REQ_ZERO_WRITE | flags,
4578d5ef94d4SPaolo Bonzini                                  cb, opaque, true);
4579d5ef94d4SPaolo Bonzini }
4580d5ef94d4SPaolo Bonzini 
458140b4f539SKevin Wolf 
458240b4f539SKevin Wolf typedef struct MultiwriteCB {
458340b4f539SKevin Wolf     int error;
458440b4f539SKevin Wolf     int num_requests;
458540b4f539SKevin Wolf     int num_callbacks;
458640b4f539SKevin Wolf     struct {
4587097310b5SMarkus Armbruster         BlockCompletionFunc *cb;
458840b4f539SKevin Wolf         void *opaque;
458940b4f539SKevin Wolf         QEMUIOVector *free_qiov;
459040b4f539SKevin Wolf     } callbacks[];
459140b4f539SKevin Wolf } MultiwriteCB;
459240b4f539SKevin Wolf 
459340b4f539SKevin Wolf static void multiwrite_user_cb(MultiwriteCB *mcb)
459440b4f539SKevin Wolf {
459540b4f539SKevin Wolf     int i;
459640b4f539SKevin Wolf 
459740b4f539SKevin Wolf     for (i = 0; i < mcb->num_callbacks; i++) {
459840b4f539SKevin Wolf         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
45991e1ea48dSStefan Hajnoczi         if (mcb->callbacks[i].free_qiov) {
46001e1ea48dSStefan Hajnoczi             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
46011e1ea48dSStefan Hajnoczi         }
46027267c094SAnthony Liguori         g_free(mcb->callbacks[i].free_qiov);
460340b4f539SKevin Wolf     }
460440b4f539SKevin Wolf }
460540b4f539SKevin Wolf 
460640b4f539SKevin Wolf static void multiwrite_cb(void *opaque, int ret)
460740b4f539SKevin Wolf {
460840b4f539SKevin Wolf     MultiwriteCB *mcb = opaque;
460940b4f539SKevin Wolf 
46106d519a5fSStefan Hajnoczi     trace_multiwrite_cb(mcb, ret);
46116d519a5fSStefan Hajnoczi 
4612cb6d3ca0SKevin Wolf     if (ret < 0 && !mcb->error) {
461340b4f539SKevin Wolf         mcb->error = ret;
461440b4f539SKevin Wolf     }
461540b4f539SKevin Wolf 
461640b4f539SKevin Wolf     mcb->num_requests--;
461740b4f539SKevin Wolf     if (mcb->num_requests == 0) {
461840b4f539SKevin Wolf         multiwrite_user_cb(mcb);
46197267c094SAnthony Liguori         g_free(mcb);
462040b4f539SKevin Wolf     }
462140b4f539SKevin Wolf }
462240b4f539SKevin Wolf 
462340b4f539SKevin Wolf static int multiwrite_req_compare(const void *a, const void *b)
462440b4f539SKevin Wolf {
462577be4366SChristoph Hellwig     const BlockRequest *req1 = a, *req2 = b;
462677be4366SChristoph Hellwig 
462777be4366SChristoph Hellwig     /*
462877be4366SChristoph Hellwig      * Note that we can't simply subtract req2->sector from req1->sector
462977be4366SChristoph Hellwig      * here as that could overflow the return value.
463077be4366SChristoph Hellwig      */
463177be4366SChristoph Hellwig     if (req1->sector > req2->sector) {
463277be4366SChristoph Hellwig         return 1;
463377be4366SChristoph Hellwig     } else if (req1->sector < req2->sector) {
463477be4366SChristoph Hellwig         return -1;
463577be4366SChristoph Hellwig     } else {
463677be4366SChristoph Hellwig         return 0;
463777be4366SChristoph Hellwig     }
463840b4f539SKevin Wolf }
463940b4f539SKevin Wolf 
464040b4f539SKevin Wolf /*
464140b4f539SKevin Wolf  * Takes a bunch of requests and tries to merge them. Returns the number of
464240b4f539SKevin Wolf  * requests that remain after merging.
464340b4f539SKevin Wolf  */
464440b4f539SKevin Wolf static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
464540b4f539SKevin Wolf     int num_reqs, MultiwriteCB *mcb)
464640b4f539SKevin Wolf {
464740b4f539SKevin Wolf     int i, outidx;
464840b4f539SKevin Wolf 
464940b4f539SKevin Wolf     // Sort requests by start sector
465040b4f539SKevin Wolf     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
465140b4f539SKevin Wolf 
465240b4f539SKevin Wolf     // Check if adjacent requests touch the same clusters. If so, combine them,
465340b4f539SKevin Wolf     // filling up gaps with zero sectors.
465440b4f539SKevin Wolf     outidx = 0;
465540b4f539SKevin Wolf     for (i = 1; i < num_reqs; i++) {
465640b4f539SKevin Wolf         int merge = 0;
465740b4f539SKevin Wolf         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
465840b4f539SKevin Wolf 
4659b6a127a1SPaolo Bonzini         // Handle exactly sequential writes and overlapping writes.
466040b4f539SKevin Wolf         if (reqs[i].sector <= oldreq_last) {
466140b4f539SKevin Wolf             merge = 1;
466240b4f539SKevin Wolf         }
466340b4f539SKevin Wolf 
4664e2a305fbSChristoph Hellwig         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4665e2a305fbSChristoph Hellwig             merge = 0;
4666e2a305fbSChristoph Hellwig         }
4667e2a305fbSChristoph Hellwig 
46686c5a42acSPeter Lieven         if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
46696c5a42acSPeter Lieven             reqs[i].nb_sectors > bs->bl.max_transfer_length) {
46706c5a42acSPeter Lieven             merge = 0;
46716c5a42acSPeter Lieven         }
46726c5a42acSPeter Lieven 
467340b4f539SKevin Wolf         if (merge) {
467440b4f539SKevin Wolf             size_t size;
46757267c094SAnthony Liguori             QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
467640b4f539SKevin Wolf             qemu_iovec_init(qiov,
467740b4f539SKevin Wolf                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
467840b4f539SKevin Wolf 
467940b4f539SKevin Wolf             // Add the first request to the merged one. If the requests are
468040b4f539SKevin Wolf             // overlapping, drop the last sectors of the first request.
468140b4f539SKevin Wolf             size = (reqs[i].sector - reqs[outidx].sector) << 9;
46821b093c48SMichael Tokarev             qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
468340b4f539SKevin Wolf 
4684b6a127a1SPaolo Bonzini             // We should need to add any zeros between the two requests
4685b6a127a1SPaolo Bonzini             assert (reqs[i].sector <= oldreq_last);
468640b4f539SKevin Wolf 
468740b4f539SKevin Wolf             // Add the second request
46881b093c48SMichael Tokarev             qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
468940b4f539SKevin Wolf 
4690391827ebSStefan Hajnoczi             // Add tail of first request, if necessary
4691391827ebSStefan Hajnoczi             if (qiov->size < reqs[outidx].qiov->size) {
4692391827ebSStefan Hajnoczi                 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4693391827ebSStefan Hajnoczi                                   reqs[outidx].qiov->size - qiov->size);
4694391827ebSStefan Hajnoczi             }
4695391827ebSStefan Hajnoczi 
4696cbf1dff2SKevin Wolf             reqs[outidx].nb_sectors = qiov->size >> 9;
469740b4f539SKevin Wolf             reqs[outidx].qiov = qiov;
469840b4f539SKevin Wolf 
469940b4f539SKevin Wolf             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
470040b4f539SKevin Wolf         } else {
470140b4f539SKevin Wolf             outidx++;
470240b4f539SKevin Wolf             reqs[outidx].sector     = reqs[i].sector;
470340b4f539SKevin Wolf             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
470440b4f539SKevin Wolf             reqs[outidx].qiov       = reqs[i].qiov;
470540b4f539SKevin Wolf         }
470640b4f539SKevin Wolf     }
470740b4f539SKevin Wolf 
4708f4564d53SPeter Lieven     block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
4709f4564d53SPeter Lieven 
471040b4f539SKevin Wolf     return outidx + 1;
471140b4f539SKevin Wolf }
471240b4f539SKevin Wolf 
471340b4f539SKevin Wolf /*
471440b4f539SKevin Wolf  * Submit multiple AIO write requests at once.
471540b4f539SKevin Wolf  *
471640b4f539SKevin Wolf  * On success, the function returns 0 and all requests in the reqs array have
471740b4f539SKevin Wolf  * been submitted. In error case this function returns -1, and any of the
471840b4f539SKevin Wolf  * requests may or may not be submitted yet. In particular, this means that the
471940b4f539SKevin Wolf  * callback will be called for some of the requests, for others it won't. The
472040b4f539SKevin Wolf  * caller must check the error field of the BlockRequest to wait for the right
472140b4f539SKevin Wolf  * callbacks (if error != 0, no callback will be called).
472240b4f539SKevin Wolf  *
472340b4f539SKevin Wolf  * The implementation may modify the contents of the reqs array, e.g. to merge
472440b4f539SKevin Wolf  * requests. However, the fields opaque and error are left unmodified as they
472540b4f539SKevin Wolf  * are used to signal failure for a single request to the caller.
472640b4f539SKevin Wolf  */
472740b4f539SKevin Wolf int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
472840b4f539SKevin Wolf {
472940b4f539SKevin Wolf     MultiwriteCB *mcb;
473040b4f539SKevin Wolf     int i;
473140b4f539SKevin Wolf 
4732301db7c2SRyan Harper     /* don't submit writes if we don't have a medium */
4733301db7c2SRyan Harper     if (bs->drv == NULL) {
4734301db7c2SRyan Harper         for (i = 0; i < num_reqs; i++) {
4735301db7c2SRyan Harper             reqs[i].error = -ENOMEDIUM;
4736301db7c2SRyan Harper         }
4737301db7c2SRyan Harper         return -1;
4738301db7c2SRyan Harper     }
4739301db7c2SRyan Harper 
474040b4f539SKevin Wolf     if (num_reqs == 0) {
474140b4f539SKevin Wolf         return 0;
474240b4f539SKevin Wolf     }
474340b4f539SKevin Wolf 
474440b4f539SKevin Wolf     // Create MultiwriteCB structure
47457267c094SAnthony Liguori     mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
474640b4f539SKevin Wolf     mcb->num_requests = 0;
474740b4f539SKevin Wolf     mcb->num_callbacks = num_reqs;
474840b4f539SKevin Wolf 
474940b4f539SKevin Wolf     for (i = 0; i < num_reqs; i++) {
475040b4f539SKevin Wolf         mcb->callbacks[i].cb = reqs[i].cb;
475140b4f539SKevin Wolf         mcb->callbacks[i].opaque = reqs[i].opaque;
475240b4f539SKevin Wolf     }
475340b4f539SKevin Wolf 
475440b4f539SKevin Wolf     // Check for mergable requests
475540b4f539SKevin Wolf     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
475640b4f539SKevin Wolf 
47576d519a5fSStefan Hajnoczi     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
47586d519a5fSStefan Hajnoczi 
4759df9309fbSPaolo Bonzini     /* Run the aio requests. */
4760df9309fbSPaolo Bonzini     mcb->num_requests = num_reqs;
476140b4f539SKevin Wolf     for (i = 0; i < num_reqs; i++) {
4762d20d9b7cSPaolo Bonzini         bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4763d20d9b7cSPaolo Bonzini                               reqs[i].nb_sectors, reqs[i].flags,
4764d20d9b7cSPaolo Bonzini                               multiwrite_cb, mcb,
4765d20d9b7cSPaolo Bonzini                               true);
476640b4f539SKevin Wolf     }
476740b4f539SKevin Wolf 
476840b4f539SKevin Wolf     return 0;
476940b4f539SKevin Wolf }
477040b4f539SKevin Wolf 
47717c84b1b8SMarkus Armbruster void bdrv_aio_cancel(BlockAIOCB *acb)
477283f64091Sbellard {
477302c50efeSFam Zheng     qemu_aio_ref(acb);
477402c50efeSFam Zheng     bdrv_aio_cancel_async(acb);
477502c50efeSFam Zheng     while (acb->refcnt > 1) {
477602c50efeSFam Zheng         if (acb->aiocb_info->get_aio_context) {
477702c50efeSFam Zheng             aio_poll(acb->aiocb_info->get_aio_context(acb), true);
477802c50efeSFam Zheng         } else if (acb->bs) {
477902c50efeSFam Zheng             aio_poll(bdrv_get_aio_context(acb->bs), true);
478002c50efeSFam Zheng         } else {
478102c50efeSFam Zheng             abort();
478202c50efeSFam Zheng         }
478302c50efeSFam Zheng     }
47848007429aSFam Zheng     qemu_aio_unref(acb);
478502c50efeSFam Zheng }
478602c50efeSFam Zheng 
478702c50efeSFam Zheng /* Async version of aio cancel. The caller is not blocked if the acb implements
478802c50efeSFam Zheng  * cancel_async, otherwise we do nothing and let the request normally complete.
478902c50efeSFam Zheng  * In either case the completion callback must be called. */
47907c84b1b8SMarkus Armbruster void bdrv_aio_cancel_async(BlockAIOCB *acb)
479102c50efeSFam Zheng {
479202c50efeSFam Zheng     if (acb->aiocb_info->cancel_async) {
479302c50efeSFam Zheng         acb->aiocb_info->cancel_async(acb);
479402c50efeSFam Zheng     }
479583f64091Sbellard }
479683f64091Sbellard 
479783f64091Sbellard /**************************************************************/
479883f64091Sbellard /* async block device emulation */
479983f64091Sbellard 
48007c84b1b8SMarkus Armbruster typedef struct BlockAIOCBSync {
48017c84b1b8SMarkus Armbruster     BlockAIOCB common;
4802c16b5a2cSChristoph Hellwig     QEMUBH *bh;
4803c16b5a2cSChristoph Hellwig     int ret;
4804c16b5a2cSChristoph Hellwig     /* vector translation state */
4805c16b5a2cSChristoph Hellwig     QEMUIOVector *qiov;
4806c16b5a2cSChristoph Hellwig     uint8_t *bounce;
4807c16b5a2cSChristoph Hellwig     int is_write;
48087c84b1b8SMarkus Armbruster } BlockAIOCBSync;
4809c16b5a2cSChristoph Hellwig 
4810d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_aiocb_info = {
48117c84b1b8SMarkus Armbruster     .aiocb_size         = sizeof(BlockAIOCBSync),
4812c16b5a2cSChristoph Hellwig };
4813c16b5a2cSChristoph Hellwig 
481483f64091Sbellard static void bdrv_aio_bh_cb(void *opaque)
4815beac80cdSbellard {
48167c84b1b8SMarkus Armbruster     BlockAIOCBSync *acb = opaque;
4817f141eafeSaliguori 
4818857d4f46SKevin Wolf     if (!acb->is_write && acb->ret >= 0) {
481903396148SMichael Tokarev         qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
4820857d4f46SKevin Wolf     }
4821ceb42de8Saliguori     qemu_vfree(acb->bounce);
4822ce1a14dcSpbrook     acb->common.cb(acb->common.opaque, acb->ret);
48236a7ad299SDor Laor     qemu_bh_delete(acb->bh);
482436afc451SAvi Kivity     acb->bh = NULL;
48258007429aSFam Zheng     qemu_aio_unref(acb);
4826beac80cdSbellard }
4827beac80cdSbellard 
48287c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4829f141eafeSaliguori                                       int64_t sector_num,
4830f141eafeSaliguori                                       QEMUIOVector *qiov,
4831f141eafeSaliguori                                       int nb_sectors,
4832097310b5SMarkus Armbruster                                       BlockCompletionFunc *cb,
4833f141eafeSaliguori                                       void *opaque,
4834f141eafeSaliguori                                       int is_write)
4835f141eafeSaliguori 
4836ea2384d3Sbellard {
48377c84b1b8SMarkus Armbruster     BlockAIOCBSync *acb;
483883f64091Sbellard 
4839d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
4840f141eafeSaliguori     acb->is_write = is_write;
4841f141eafeSaliguori     acb->qiov = qiov;
4842857d4f46SKevin Wolf     acb->bounce = qemu_try_blockalign(bs, qiov->size);
48432572b37aSStefan Hajnoczi     acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
4844f141eafeSaliguori 
4845857d4f46SKevin Wolf     if (acb->bounce == NULL) {
4846857d4f46SKevin Wolf         acb->ret = -ENOMEM;
4847857d4f46SKevin Wolf     } else if (is_write) {
4848d5e6b161SMichael Tokarev         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
48491ed20acfSStefan Hajnoczi         acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
4850f141eafeSaliguori     } else {
48511ed20acfSStefan Hajnoczi         acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
4852f141eafeSaliguori     }
4853f141eafeSaliguori 
4854ce1a14dcSpbrook     qemu_bh_schedule(acb->bh);
4855f141eafeSaliguori 
4856ce1a14dcSpbrook     return &acb->common;
48577a6cba61Spbrook }
48587a6cba61Spbrook 
48597c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4860f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4861097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
486283f64091Sbellard {
4863f141eafeSaliguori     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
486483f64091Sbellard }
486583f64091Sbellard 
48667c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4867f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4868097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4869f141eafeSaliguori {
4870f141eafeSaliguori     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4871f141eafeSaliguori }
4872f141eafeSaliguori 
487368485420SKevin Wolf 
48747c84b1b8SMarkus Armbruster typedef struct BlockAIOCBCoroutine {
48757c84b1b8SMarkus Armbruster     BlockAIOCB common;
487668485420SKevin Wolf     BlockRequest req;
487768485420SKevin Wolf     bool is_write;
48780b5a2445SPaolo Bonzini     bool need_bh;
4879d318aea9SKevin Wolf     bool *done;
488068485420SKevin Wolf     QEMUBH* bh;
48817c84b1b8SMarkus Armbruster } BlockAIOCBCoroutine;
488268485420SKevin Wolf 
4883d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_co_aiocb_info = {
48847c84b1b8SMarkus Armbruster     .aiocb_size         = sizeof(BlockAIOCBCoroutine),
488568485420SKevin Wolf };
488668485420SKevin Wolf 
48870b5a2445SPaolo Bonzini static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
48880b5a2445SPaolo Bonzini {
48890b5a2445SPaolo Bonzini     if (!acb->need_bh) {
48900b5a2445SPaolo Bonzini         acb->common.cb(acb->common.opaque, acb->req.error);
48910b5a2445SPaolo Bonzini         qemu_aio_unref(acb);
48920b5a2445SPaolo Bonzini     }
48930b5a2445SPaolo Bonzini }
48940b5a2445SPaolo Bonzini 
489535246a68SPaolo Bonzini static void bdrv_co_em_bh(void *opaque)
489668485420SKevin Wolf {
48977c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
489868485420SKevin Wolf 
48990b5a2445SPaolo Bonzini     assert(!acb->need_bh);
490068485420SKevin Wolf     qemu_bh_delete(acb->bh);
49010b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
49020b5a2445SPaolo Bonzini }
49030b5a2445SPaolo Bonzini 
49040b5a2445SPaolo Bonzini static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
49050b5a2445SPaolo Bonzini {
49060b5a2445SPaolo Bonzini     acb->need_bh = false;
49070b5a2445SPaolo Bonzini     if (acb->req.error != -EINPROGRESS) {
49080b5a2445SPaolo Bonzini         BlockDriverState *bs = acb->common.bs;
49090b5a2445SPaolo Bonzini 
49100b5a2445SPaolo Bonzini         acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
49110b5a2445SPaolo Bonzini         qemu_bh_schedule(acb->bh);
49120b5a2445SPaolo Bonzini     }
491368485420SKevin Wolf }
491468485420SKevin Wolf 
4915b2a61371SStefan Hajnoczi /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4916b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque)
4917b2a61371SStefan Hajnoczi {
49187c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
4919b2a61371SStefan Hajnoczi     BlockDriverState *bs = acb->common.bs;
4920b2a61371SStefan Hajnoczi 
4921b2a61371SStefan Hajnoczi     if (!acb->is_write) {
4922b2a61371SStefan Hajnoczi         acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
4923d20d9b7cSPaolo Bonzini             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4924b2a61371SStefan Hajnoczi     } else {
4925b2a61371SStefan Hajnoczi         acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
4926d20d9b7cSPaolo Bonzini             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4927b2a61371SStefan Hajnoczi     }
4928b2a61371SStefan Hajnoczi 
49290b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
4930b2a61371SStefan Hajnoczi }
4931b2a61371SStefan Hajnoczi 
49327c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
493368485420SKevin Wolf                                          int64_t sector_num,
493468485420SKevin Wolf                                          QEMUIOVector *qiov,
493568485420SKevin Wolf                                          int nb_sectors,
4936d20d9b7cSPaolo Bonzini                                          BdrvRequestFlags flags,
4937097310b5SMarkus Armbruster                                          BlockCompletionFunc *cb,
493868485420SKevin Wolf                                          void *opaque,
49398c5873d6SStefan Hajnoczi                                          bool is_write)
494068485420SKevin Wolf {
494168485420SKevin Wolf     Coroutine *co;
49427c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
494368485420SKevin Wolf 
4944d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
49450b5a2445SPaolo Bonzini     acb->need_bh = true;
49460b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
494768485420SKevin Wolf     acb->req.sector = sector_num;
494868485420SKevin Wolf     acb->req.nb_sectors = nb_sectors;
494968485420SKevin Wolf     acb->req.qiov = qiov;
4950d20d9b7cSPaolo Bonzini     acb->req.flags = flags;
495168485420SKevin Wolf     acb->is_write = is_write;
495268485420SKevin Wolf 
49538c5873d6SStefan Hajnoczi     co = qemu_coroutine_create(bdrv_co_do_rw);
495468485420SKevin Wolf     qemu_coroutine_enter(co, acb);
495568485420SKevin Wolf 
49560b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
495768485420SKevin Wolf     return &acb->common;
495868485420SKevin Wolf }
495968485420SKevin Wolf 
496007f07615SPaolo Bonzini static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
4961b2e12bc6SChristoph Hellwig {
49627c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
496307f07615SPaolo Bonzini     BlockDriverState *bs = acb->common.bs;
4964b2e12bc6SChristoph Hellwig 
496507f07615SPaolo Bonzini     acb->req.error = bdrv_co_flush(bs);
49660b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
4967b2e12bc6SChristoph Hellwig }
4968b2e12bc6SChristoph Hellwig 
49697c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
4970097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4971016f5cf6SAlexander Graf {
497207f07615SPaolo Bonzini     trace_bdrv_aio_flush(bs, opaque);
4973016f5cf6SAlexander Graf 
497407f07615SPaolo Bonzini     Coroutine *co;
49757c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
4976016f5cf6SAlexander Graf 
4977d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
49780b5a2445SPaolo Bonzini     acb->need_bh = true;
49790b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
4980d318aea9SKevin Wolf 
498107f07615SPaolo Bonzini     co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
498207f07615SPaolo Bonzini     qemu_coroutine_enter(co, acb);
4983016f5cf6SAlexander Graf 
49840b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
4985016f5cf6SAlexander Graf     return &acb->common;
4986016f5cf6SAlexander Graf }
4987016f5cf6SAlexander Graf 
49884265d620SPaolo Bonzini static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
49894265d620SPaolo Bonzini {
49907c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
49914265d620SPaolo Bonzini     BlockDriverState *bs = acb->common.bs;
49924265d620SPaolo Bonzini 
49934265d620SPaolo Bonzini     acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
49940b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
49954265d620SPaolo Bonzini }
49964265d620SPaolo Bonzini 
49977c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
49984265d620SPaolo Bonzini         int64_t sector_num, int nb_sectors,
4999097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
50004265d620SPaolo Bonzini {
50014265d620SPaolo Bonzini     Coroutine *co;
50027c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
50034265d620SPaolo Bonzini 
50044265d620SPaolo Bonzini     trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
50054265d620SPaolo Bonzini 
5006d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
50070b5a2445SPaolo Bonzini     acb->need_bh = true;
50080b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
50094265d620SPaolo Bonzini     acb->req.sector = sector_num;
50104265d620SPaolo Bonzini     acb->req.nb_sectors = nb_sectors;
50114265d620SPaolo Bonzini     co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
50124265d620SPaolo Bonzini     qemu_coroutine_enter(co, acb);
50134265d620SPaolo Bonzini 
50140b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
50154265d620SPaolo Bonzini     return &acb->common;
50164265d620SPaolo Bonzini }
50174265d620SPaolo Bonzini 
5018ea2384d3Sbellard void bdrv_init(void)
5019ea2384d3Sbellard {
50205efa9d5aSAnthony Liguori     module_call_init(MODULE_INIT_BLOCK);
5021ea2384d3Sbellard }
5022ce1a14dcSpbrook 
5023eb852011SMarkus Armbruster void bdrv_init_with_whitelist(void)
5024eb852011SMarkus Armbruster {
5025eb852011SMarkus Armbruster     use_bdrv_whitelist = 1;
5026eb852011SMarkus Armbruster     bdrv_init();
5027eb852011SMarkus Armbruster }
5028eb852011SMarkus Armbruster 
5029d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
5030097310b5SMarkus Armbruster                    BlockCompletionFunc *cb, void *opaque)
50316bbff9a0Saliguori {
50327c84b1b8SMarkus Armbruster     BlockAIOCB *acb;
5033ce1a14dcSpbrook 
5034d7331bedSStefan Hajnoczi     acb = g_slice_alloc(aiocb_info->aiocb_size);
5035d7331bedSStefan Hajnoczi     acb->aiocb_info = aiocb_info;
5036ce1a14dcSpbrook     acb->bs = bs;
5037ce1a14dcSpbrook     acb->cb = cb;
5038ce1a14dcSpbrook     acb->opaque = opaque;
5039f197fe2bSFam Zheng     acb->refcnt = 1;
5040ce1a14dcSpbrook     return acb;
5041ce1a14dcSpbrook }
5042ce1a14dcSpbrook 
5043f197fe2bSFam Zheng void qemu_aio_ref(void *p)
5044f197fe2bSFam Zheng {
50457c84b1b8SMarkus Armbruster     BlockAIOCB *acb = p;
5046f197fe2bSFam Zheng     acb->refcnt++;
5047f197fe2bSFam Zheng }
5048f197fe2bSFam Zheng 
50498007429aSFam Zheng void qemu_aio_unref(void *p)
5050ce1a14dcSpbrook {
50517c84b1b8SMarkus Armbruster     BlockAIOCB *acb = p;
5052f197fe2bSFam Zheng     assert(acb->refcnt > 0);
5053f197fe2bSFam Zheng     if (--acb->refcnt == 0) {
5054d7331bedSStefan Hajnoczi         g_slice_free1(acb->aiocb_info->aiocb_size, acb);
5055ce1a14dcSpbrook     }
5056f197fe2bSFam Zheng }
505719cb3738Sbellard 
505819cb3738Sbellard /**************************************************************/
5059f9f05dc5SKevin Wolf /* Coroutine block device emulation */
5060f9f05dc5SKevin Wolf 
5061f9f05dc5SKevin Wolf typedef struct CoroutineIOCompletion {
5062f9f05dc5SKevin Wolf     Coroutine *coroutine;
5063f9f05dc5SKevin Wolf     int ret;
5064f9f05dc5SKevin Wolf } CoroutineIOCompletion;
5065f9f05dc5SKevin Wolf 
5066f9f05dc5SKevin Wolf static void bdrv_co_io_em_complete(void *opaque, int ret)
5067f9f05dc5SKevin Wolf {
5068f9f05dc5SKevin Wolf     CoroutineIOCompletion *co = opaque;
5069f9f05dc5SKevin Wolf 
5070f9f05dc5SKevin Wolf     co->ret = ret;
5071f9f05dc5SKevin Wolf     qemu_coroutine_enter(co->coroutine, NULL);
5072f9f05dc5SKevin Wolf }
5073f9f05dc5SKevin Wolf 
5074f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
5075f9f05dc5SKevin Wolf                                       int nb_sectors, QEMUIOVector *iov,
5076f9f05dc5SKevin Wolf                                       bool is_write)
5077f9f05dc5SKevin Wolf {
5078f9f05dc5SKevin Wolf     CoroutineIOCompletion co = {
5079f9f05dc5SKevin Wolf         .coroutine = qemu_coroutine_self(),
5080f9f05dc5SKevin Wolf     };
50817c84b1b8SMarkus Armbruster     BlockAIOCB *acb;
5082f9f05dc5SKevin Wolf 
5083f9f05dc5SKevin Wolf     if (is_write) {
5084a652d160SStefan Hajnoczi         acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
5085f9f05dc5SKevin Wolf                                        bdrv_co_io_em_complete, &co);
5086f9f05dc5SKevin Wolf     } else {
5087a652d160SStefan Hajnoczi         acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
5088f9f05dc5SKevin Wolf                                       bdrv_co_io_em_complete, &co);
5089f9f05dc5SKevin Wolf     }
5090f9f05dc5SKevin Wolf 
509159370aaaSStefan Hajnoczi     trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
5092f9f05dc5SKevin Wolf     if (!acb) {
5093f9f05dc5SKevin Wolf         return -EIO;
5094f9f05dc5SKevin Wolf     }
5095f9f05dc5SKevin Wolf     qemu_coroutine_yield();
5096f9f05dc5SKevin Wolf 
5097f9f05dc5SKevin Wolf     return co.ret;
5098f9f05dc5SKevin Wolf }
5099f9f05dc5SKevin Wolf 
5100f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
5101f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
5102f9f05dc5SKevin Wolf                                          QEMUIOVector *iov)
5103f9f05dc5SKevin Wolf {
5104f9f05dc5SKevin Wolf     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
5105f9f05dc5SKevin Wolf }
5106f9f05dc5SKevin Wolf 
5107f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
5108f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
5109f9f05dc5SKevin Wolf                                          QEMUIOVector *iov)
5110f9f05dc5SKevin Wolf {
5111f9f05dc5SKevin Wolf     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
5112f9f05dc5SKevin Wolf }
5113f9f05dc5SKevin Wolf 
511407f07615SPaolo Bonzini static void coroutine_fn bdrv_flush_co_entry(void *opaque)
5115e7a8a783SKevin Wolf {
511607f07615SPaolo Bonzini     RwCo *rwco = opaque;
511707f07615SPaolo Bonzini 
511807f07615SPaolo Bonzini     rwco->ret = bdrv_co_flush(rwco->bs);
511907f07615SPaolo Bonzini }
512007f07615SPaolo Bonzini 
512107f07615SPaolo Bonzini int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
512207f07615SPaolo Bonzini {
5123eb489bb1SKevin Wolf     int ret;
5124eb489bb1SKevin Wolf 
512529cdb251SPaolo Bonzini     if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
512607f07615SPaolo Bonzini         return 0;
5127eb489bb1SKevin Wolf     }
5128eb489bb1SKevin Wolf 
5129ca716364SKevin Wolf     /* Write back cached data to the OS even with cache=unsafe */
5130bf736fe3SKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
5131eb489bb1SKevin Wolf     if (bs->drv->bdrv_co_flush_to_os) {
5132eb489bb1SKevin Wolf         ret = bs->drv->bdrv_co_flush_to_os(bs);
5133eb489bb1SKevin Wolf         if (ret < 0) {
5134eb489bb1SKevin Wolf             return ret;
5135eb489bb1SKevin Wolf         }
5136eb489bb1SKevin Wolf     }
5137eb489bb1SKevin Wolf 
5138ca716364SKevin Wolf     /* But don't actually force it to the disk with cache=unsafe */
5139ca716364SKevin Wolf     if (bs->open_flags & BDRV_O_NO_FLUSH) {
5140d4c82329SKevin Wolf         goto flush_parent;
5141ca716364SKevin Wolf     }
5142ca716364SKevin Wolf 
5143bf736fe3SKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
5144eb489bb1SKevin Wolf     if (bs->drv->bdrv_co_flush_to_disk) {
514529cdb251SPaolo Bonzini         ret = bs->drv->bdrv_co_flush_to_disk(bs);
514607f07615SPaolo Bonzini     } else if (bs->drv->bdrv_aio_flush) {
51477c84b1b8SMarkus Armbruster         BlockAIOCB *acb;
5148e7a8a783SKevin Wolf         CoroutineIOCompletion co = {
5149e7a8a783SKevin Wolf             .coroutine = qemu_coroutine_self(),
5150e7a8a783SKevin Wolf         };
5151e7a8a783SKevin Wolf 
515207f07615SPaolo Bonzini         acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
515307f07615SPaolo Bonzini         if (acb == NULL) {
515429cdb251SPaolo Bonzini             ret = -EIO;
515507f07615SPaolo Bonzini         } else {
5156e7a8a783SKevin Wolf             qemu_coroutine_yield();
515729cdb251SPaolo Bonzini             ret = co.ret;
5158e7a8a783SKevin Wolf         }
515907f07615SPaolo Bonzini     } else {
516007f07615SPaolo Bonzini         /*
516107f07615SPaolo Bonzini          * Some block drivers always operate in either writethrough or unsafe
516207f07615SPaolo Bonzini          * mode and don't support bdrv_flush therefore. Usually qemu doesn't
516307f07615SPaolo Bonzini          * know how the server works (because the behaviour is hardcoded or
516407f07615SPaolo Bonzini          * depends on server-side configuration), so we can't ensure that
516507f07615SPaolo Bonzini          * everything is safe on disk. Returning an error doesn't work because
516607f07615SPaolo Bonzini          * that would break guests even if the server operates in writethrough
516707f07615SPaolo Bonzini          * mode.
516807f07615SPaolo Bonzini          *
516907f07615SPaolo Bonzini          * Let's hope the user knows what he's doing.
517007f07615SPaolo Bonzini          */
517129cdb251SPaolo Bonzini         ret = 0;
517207f07615SPaolo Bonzini     }
517329cdb251SPaolo Bonzini     if (ret < 0) {
517429cdb251SPaolo Bonzini         return ret;
517529cdb251SPaolo Bonzini     }
517629cdb251SPaolo Bonzini 
517729cdb251SPaolo Bonzini     /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
517829cdb251SPaolo Bonzini      * in the case of cache=unsafe, so there are no useless flushes.
517929cdb251SPaolo Bonzini      */
5180d4c82329SKevin Wolf flush_parent:
518129cdb251SPaolo Bonzini     return bdrv_co_flush(bs->file);
518207f07615SPaolo Bonzini }
518307f07615SPaolo Bonzini 
51845a8a30dbSKevin Wolf void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
51850f15423cSAnthony Liguori {
51865a8a30dbSKevin Wolf     Error *local_err = NULL;
51875a8a30dbSKevin Wolf     int ret;
51885a8a30dbSKevin Wolf 
51893456a8d1SKevin Wolf     if (!bs->drv)  {
51903456a8d1SKevin Wolf         return;
51910f15423cSAnthony Liguori     }
51923456a8d1SKevin Wolf 
51937ea2d269SAlexey Kardashevskiy     if (!(bs->open_flags & BDRV_O_INCOMING)) {
51947ea2d269SAlexey Kardashevskiy         return;
51957ea2d269SAlexey Kardashevskiy     }
51967ea2d269SAlexey Kardashevskiy     bs->open_flags &= ~BDRV_O_INCOMING;
51977ea2d269SAlexey Kardashevskiy 
51983456a8d1SKevin Wolf     if (bs->drv->bdrv_invalidate_cache) {
51995a8a30dbSKevin Wolf         bs->drv->bdrv_invalidate_cache(bs, &local_err);
52003456a8d1SKevin Wolf     } else if (bs->file) {
52015a8a30dbSKevin Wolf         bdrv_invalidate_cache(bs->file, &local_err);
52025a8a30dbSKevin Wolf     }
52035a8a30dbSKevin Wolf     if (local_err) {
52045a8a30dbSKevin Wolf         error_propagate(errp, local_err);
52055a8a30dbSKevin Wolf         return;
52063456a8d1SKevin Wolf     }
52073456a8d1SKevin Wolf 
52085a8a30dbSKevin Wolf     ret = refresh_total_sectors(bs, bs->total_sectors);
52095a8a30dbSKevin Wolf     if (ret < 0) {
52105a8a30dbSKevin Wolf         error_setg_errno(errp, -ret, "Could not refresh total sector count");
52115a8a30dbSKevin Wolf         return;
52125a8a30dbSKevin Wolf     }
52130f15423cSAnthony Liguori }
52140f15423cSAnthony Liguori 
52155a8a30dbSKevin Wolf void bdrv_invalidate_cache_all(Error **errp)
52160f15423cSAnthony Liguori {
52170f15423cSAnthony Liguori     BlockDriverState *bs;
52185a8a30dbSKevin Wolf     Error *local_err = NULL;
52190f15423cSAnthony Liguori 
5220dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5221ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
5222ed78cda3SStefan Hajnoczi 
5223ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
52245a8a30dbSKevin Wolf         bdrv_invalidate_cache(bs, &local_err);
5225ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
52265a8a30dbSKevin Wolf         if (local_err) {
52275a8a30dbSKevin Wolf             error_propagate(errp, local_err);
52285a8a30dbSKevin Wolf             return;
52295a8a30dbSKevin Wolf         }
52300f15423cSAnthony Liguori     }
52310f15423cSAnthony Liguori }
52320f15423cSAnthony Liguori 
523307f07615SPaolo Bonzini int bdrv_flush(BlockDriverState *bs)
523407f07615SPaolo Bonzini {
523507f07615SPaolo Bonzini     Coroutine *co;
523607f07615SPaolo Bonzini     RwCo rwco = {
523707f07615SPaolo Bonzini         .bs = bs,
523807f07615SPaolo Bonzini         .ret = NOT_DONE,
523907f07615SPaolo Bonzini     };
524007f07615SPaolo Bonzini 
524107f07615SPaolo Bonzini     if (qemu_in_coroutine()) {
524207f07615SPaolo Bonzini         /* Fast-path if already in coroutine context */
524307f07615SPaolo Bonzini         bdrv_flush_co_entry(&rwco);
524407f07615SPaolo Bonzini     } else {
52452572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
52462572b37aSStefan Hajnoczi 
524707f07615SPaolo Bonzini         co = qemu_coroutine_create(bdrv_flush_co_entry);
524807f07615SPaolo Bonzini         qemu_coroutine_enter(co, &rwco);
524907f07615SPaolo Bonzini         while (rwco.ret == NOT_DONE) {
52502572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
525107f07615SPaolo Bonzini         }
525207f07615SPaolo Bonzini     }
525307f07615SPaolo Bonzini 
525407f07615SPaolo Bonzini     return rwco.ret;
525507f07615SPaolo Bonzini }
5256e7a8a783SKevin Wolf 
5257775aa8b6SKevin Wolf typedef struct DiscardCo {
5258775aa8b6SKevin Wolf     BlockDriverState *bs;
5259775aa8b6SKevin Wolf     int64_t sector_num;
5260775aa8b6SKevin Wolf     int nb_sectors;
5261775aa8b6SKevin Wolf     int ret;
5262775aa8b6SKevin Wolf } DiscardCo;
52634265d620SPaolo Bonzini static void coroutine_fn bdrv_discard_co_entry(void *opaque)
52644265d620SPaolo Bonzini {
5265775aa8b6SKevin Wolf     DiscardCo *rwco = opaque;
52664265d620SPaolo Bonzini 
52674265d620SPaolo Bonzini     rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
52684265d620SPaolo Bonzini }
52694265d620SPaolo Bonzini 
52704265d620SPaolo Bonzini int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
52714265d620SPaolo Bonzini                                  int nb_sectors)
52724265d620SPaolo Bonzini {
5273b9c64947SMax Reitz     int max_discard, ret;
5274d51e9fe5SPaolo Bonzini 
52754265d620SPaolo Bonzini     if (!bs->drv) {
52764265d620SPaolo Bonzini         return -ENOMEDIUM;
5277b9c64947SMax Reitz     }
5278b9c64947SMax Reitz 
5279b9c64947SMax Reitz     ret = bdrv_check_request(bs, sector_num, nb_sectors);
5280b9c64947SMax Reitz     if (ret < 0) {
5281b9c64947SMax Reitz         return ret;
52824265d620SPaolo Bonzini     } else if (bs->read_only) {
52834265d620SPaolo Bonzini         return -EROFS;
5284df702c9bSPaolo Bonzini     }
5285df702c9bSPaolo Bonzini 
52868f0720ecSPaolo Bonzini     bdrv_reset_dirty(bs, sector_num, nb_sectors);
5287df702c9bSPaolo Bonzini 
52889e8f1835SPaolo Bonzini     /* Do nothing if disabled.  */
52899e8f1835SPaolo Bonzini     if (!(bs->open_flags & BDRV_O_UNMAP)) {
52909e8f1835SPaolo Bonzini         return 0;
52919e8f1835SPaolo Bonzini     }
52929e8f1835SPaolo Bonzini 
5293d51e9fe5SPaolo Bonzini     if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
5294d51e9fe5SPaolo Bonzini         return 0;
5295d51e9fe5SPaolo Bonzini     }
52966f14da52SPeter Lieven 
529775af1f34SPeter Lieven     max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
52986f14da52SPeter Lieven     while (nb_sectors > 0) {
52996f14da52SPeter Lieven         int ret;
53006f14da52SPeter Lieven         int num = nb_sectors;
53016f14da52SPeter Lieven 
53026f14da52SPeter Lieven         /* align request */
53036f14da52SPeter Lieven         if (bs->bl.discard_alignment &&
53046f14da52SPeter Lieven             num >= bs->bl.discard_alignment &&
53056f14da52SPeter Lieven             sector_num % bs->bl.discard_alignment) {
53066f14da52SPeter Lieven             if (num > bs->bl.discard_alignment) {
53076f14da52SPeter Lieven                 num = bs->bl.discard_alignment;
53086f14da52SPeter Lieven             }
53096f14da52SPeter Lieven             num -= sector_num % bs->bl.discard_alignment;
53106f14da52SPeter Lieven         }
53116f14da52SPeter Lieven 
53126f14da52SPeter Lieven         /* limit request size */
53136f14da52SPeter Lieven         if (num > max_discard) {
53146f14da52SPeter Lieven             num = max_discard;
53156f14da52SPeter Lieven         }
53166f14da52SPeter Lieven 
5317d51e9fe5SPaolo Bonzini         if (bs->drv->bdrv_co_discard) {
53186f14da52SPeter Lieven             ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5319d51e9fe5SPaolo Bonzini         } else {
53207c84b1b8SMarkus Armbruster             BlockAIOCB *acb;
53214265d620SPaolo Bonzini             CoroutineIOCompletion co = {
53224265d620SPaolo Bonzini                 .coroutine = qemu_coroutine_self(),
53234265d620SPaolo Bonzini             };
53244265d620SPaolo Bonzini 
53254265d620SPaolo Bonzini             acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
53264265d620SPaolo Bonzini                                             bdrv_co_io_em_complete, &co);
53274265d620SPaolo Bonzini             if (acb == NULL) {
53284265d620SPaolo Bonzini                 return -EIO;
53294265d620SPaolo Bonzini             } else {
53304265d620SPaolo Bonzini                 qemu_coroutine_yield();
5331d51e9fe5SPaolo Bonzini                 ret = co.ret;
53324265d620SPaolo Bonzini             }
5333d51e9fe5SPaolo Bonzini         }
53347ce21016SPaolo Bonzini         if (ret && ret != -ENOTSUP) {
5335d51e9fe5SPaolo Bonzini             return ret;
5336d51e9fe5SPaolo Bonzini         }
5337d51e9fe5SPaolo Bonzini 
5338d51e9fe5SPaolo Bonzini         sector_num += num;
5339d51e9fe5SPaolo Bonzini         nb_sectors -= num;
5340d51e9fe5SPaolo Bonzini     }
53414265d620SPaolo Bonzini     return 0;
53424265d620SPaolo Bonzini }
53434265d620SPaolo Bonzini 
53444265d620SPaolo Bonzini int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
53454265d620SPaolo Bonzini {
53464265d620SPaolo Bonzini     Coroutine *co;
5347775aa8b6SKevin Wolf     DiscardCo rwco = {
53484265d620SPaolo Bonzini         .bs = bs,
53494265d620SPaolo Bonzini         .sector_num = sector_num,
53504265d620SPaolo Bonzini         .nb_sectors = nb_sectors,
53514265d620SPaolo Bonzini         .ret = NOT_DONE,
53524265d620SPaolo Bonzini     };
53534265d620SPaolo Bonzini 
53544265d620SPaolo Bonzini     if (qemu_in_coroutine()) {
53554265d620SPaolo Bonzini         /* Fast-path if already in coroutine context */
53564265d620SPaolo Bonzini         bdrv_discard_co_entry(&rwco);
53574265d620SPaolo Bonzini     } else {
53582572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
53592572b37aSStefan Hajnoczi 
53604265d620SPaolo Bonzini         co = qemu_coroutine_create(bdrv_discard_co_entry);
53614265d620SPaolo Bonzini         qemu_coroutine_enter(co, &rwco);
53624265d620SPaolo Bonzini         while (rwco.ret == NOT_DONE) {
53632572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
53644265d620SPaolo Bonzini         }
53654265d620SPaolo Bonzini     }
53664265d620SPaolo Bonzini 
53674265d620SPaolo Bonzini     return rwco.ret;
53684265d620SPaolo Bonzini }
53694265d620SPaolo Bonzini 
5370f9f05dc5SKevin Wolf /**************************************************************/
537119cb3738Sbellard /* removable device support */
537219cb3738Sbellard 
537319cb3738Sbellard /**
537419cb3738Sbellard  * Return TRUE if the media is present
537519cb3738Sbellard  */
537619cb3738Sbellard int bdrv_is_inserted(BlockDriverState *bs)
537719cb3738Sbellard {
537819cb3738Sbellard     BlockDriver *drv = bs->drv;
5379a1aff5bfSMarkus Armbruster 
538019cb3738Sbellard     if (!drv)
538119cb3738Sbellard         return 0;
538219cb3738Sbellard     if (!drv->bdrv_is_inserted)
5383a1aff5bfSMarkus Armbruster         return 1;
5384a1aff5bfSMarkus Armbruster     return drv->bdrv_is_inserted(bs);
538519cb3738Sbellard }
538619cb3738Sbellard 
538719cb3738Sbellard /**
53888e49ca46SMarkus Armbruster  * Return whether the media changed since the last call to this
53898e49ca46SMarkus Armbruster  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
539019cb3738Sbellard  */
539119cb3738Sbellard int bdrv_media_changed(BlockDriverState *bs)
539219cb3738Sbellard {
539319cb3738Sbellard     BlockDriver *drv = bs->drv;
539419cb3738Sbellard 
53958e49ca46SMarkus Armbruster     if (drv && drv->bdrv_media_changed) {
53968e49ca46SMarkus Armbruster         return drv->bdrv_media_changed(bs);
53978e49ca46SMarkus Armbruster     }
53988e49ca46SMarkus Armbruster     return -ENOTSUP;
539919cb3738Sbellard }
540019cb3738Sbellard 
540119cb3738Sbellard /**
540219cb3738Sbellard  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
540319cb3738Sbellard  */
5404f36f3949SLuiz Capitulino void bdrv_eject(BlockDriverState *bs, bool eject_flag)
540519cb3738Sbellard {
540619cb3738Sbellard     BlockDriver *drv = bs->drv;
5407bfb197e0SMarkus Armbruster     const char *device_name;
540819cb3738Sbellard 
5409822e1cd1SMarkus Armbruster     if (drv && drv->bdrv_eject) {
5410822e1cd1SMarkus Armbruster         drv->bdrv_eject(bs, eject_flag);
541119cb3738Sbellard     }
54126f382ed2SLuiz Capitulino 
5413bfb197e0SMarkus Armbruster     device_name = bdrv_get_device_name(bs);
5414bfb197e0SMarkus Armbruster     if (device_name[0] != '\0') {
5415bfb197e0SMarkus Armbruster         qapi_event_send_device_tray_moved(device_name,
5416a5ee7bd4SWenchao Xia                                           eject_flag, &error_abort);
54176f382ed2SLuiz Capitulino     }
541819cb3738Sbellard }
541919cb3738Sbellard 
542019cb3738Sbellard /**
542119cb3738Sbellard  * Lock or unlock the media (if it is locked, the user won't be able
542219cb3738Sbellard  * to eject it manually).
542319cb3738Sbellard  */
5424025e849aSMarkus Armbruster void bdrv_lock_medium(BlockDriverState *bs, bool locked)
542519cb3738Sbellard {
542619cb3738Sbellard     BlockDriver *drv = bs->drv;
542719cb3738Sbellard 
5428025e849aSMarkus Armbruster     trace_bdrv_lock_medium(bs, locked);
5429b8c6d095SStefan Hajnoczi 
5430025e849aSMarkus Armbruster     if (drv && drv->bdrv_lock_medium) {
5431025e849aSMarkus Armbruster         drv->bdrv_lock_medium(bs, locked);
543219cb3738Sbellard     }
543319cb3738Sbellard }
5434985a03b0Sths 
5435985a03b0Sths /* needed for generic scsi interface */
5436985a03b0Sths 
5437985a03b0Sths int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5438985a03b0Sths {
5439985a03b0Sths     BlockDriver *drv = bs->drv;
5440985a03b0Sths 
5441985a03b0Sths     if (drv && drv->bdrv_ioctl)
5442985a03b0Sths         return drv->bdrv_ioctl(bs, req, buf);
5443985a03b0Sths     return -ENOTSUP;
5444985a03b0Sths }
54457d780669Saliguori 
54467c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5447221f715dSaliguori         unsigned long int req, void *buf,
5448097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
54497d780669Saliguori {
5450221f715dSaliguori     BlockDriver *drv = bs->drv;
54517d780669Saliguori 
5452221f715dSaliguori     if (drv && drv->bdrv_aio_ioctl)
5453221f715dSaliguori         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5454221f715dSaliguori     return NULL;
54557d780669Saliguori }
5456e268ca52Saliguori 
54571b7fd729SPaolo Bonzini void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
54587b6f9300SMarkus Armbruster {
54591b7fd729SPaolo Bonzini     bs->guest_block_size = align;
54607b6f9300SMarkus Armbruster }
54617cd1e32aSlirans@il.ibm.com 
5462e268ca52Saliguori void *qemu_blockalign(BlockDriverState *bs, size_t size)
5463e268ca52Saliguori {
5464339064d5SKevin Wolf     return qemu_memalign(bdrv_opt_mem_align(bs), size);
5465e268ca52Saliguori }
54667cd1e32aSlirans@il.ibm.com 
54679ebd8448SMax Reitz void *qemu_blockalign0(BlockDriverState *bs, size_t size)
54689ebd8448SMax Reitz {
54699ebd8448SMax Reitz     return memset(qemu_blockalign(bs, size), 0, size);
54709ebd8448SMax Reitz }
54719ebd8448SMax Reitz 
54727d2a35ccSKevin Wolf void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
54737d2a35ccSKevin Wolf {
54747d2a35ccSKevin Wolf     size_t align = bdrv_opt_mem_align(bs);
54757d2a35ccSKevin Wolf 
54767d2a35ccSKevin Wolf     /* Ensure that NULL is never returned on success */
54777d2a35ccSKevin Wolf     assert(align > 0);
54787d2a35ccSKevin Wolf     if (size == 0) {
54797d2a35ccSKevin Wolf         size = align;
54807d2a35ccSKevin Wolf     }
54817d2a35ccSKevin Wolf 
54827d2a35ccSKevin Wolf     return qemu_try_memalign(align, size);
54837d2a35ccSKevin Wolf }
54847d2a35ccSKevin Wolf 
54859ebd8448SMax Reitz void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
54869ebd8448SMax Reitz {
54879ebd8448SMax Reitz     void *mem = qemu_try_blockalign(bs, size);
54889ebd8448SMax Reitz 
54899ebd8448SMax Reitz     if (mem) {
54909ebd8448SMax Reitz         memset(mem, 0, size);
54919ebd8448SMax Reitz     }
54929ebd8448SMax Reitz 
54939ebd8448SMax Reitz     return mem;
54949ebd8448SMax Reitz }
54959ebd8448SMax Reitz 
5496c53b1c51SStefan Hajnoczi /*
5497c53b1c51SStefan Hajnoczi  * Check if all memory in this vector is sector aligned.
5498c53b1c51SStefan Hajnoczi  */
5499c53b1c51SStefan Hajnoczi bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5500c53b1c51SStefan Hajnoczi {
5501c53b1c51SStefan Hajnoczi     int i;
5502339064d5SKevin Wolf     size_t alignment = bdrv_opt_mem_align(bs);
5503c53b1c51SStefan Hajnoczi 
5504c53b1c51SStefan Hajnoczi     for (i = 0; i < qiov->niov; i++) {
5505339064d5SKevin Wolf         if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
5506c53b1c51SStefan Hajnoczi             return false;
5507c53b1c51SStefan Hajnoczi         }
5508339064d5SKevin Wolf         if (qiov->iov[i].iov_len % alignment) {
55091ff735bdSKevin Wolf             return false;
55101ff735bdSKevin Wolf         }
5511c53b1c51SStefan Hajnoczi     }
5512c53b1c51SStefan Hajnoczi 
5513c53b1c51SStefan Hajnoczi     return true;
5514c53b1c51SStefan Hajnoczi }
5515c53b1c51SStefan Hajnoczi 
55160db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
55170db6e54aSFam Zheng {
55180db6e54aSFam Zheng     BdrvDirtyBitmap *bm;
55190db6e54aSFam Zheng 
55200db6e54aSFam Zheng     assert(name);
55210db6e54aSFam Zheng     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
55220db6e54aSFam Zheng         if (bm->name && !strcmp(name, bm->name)) {
55230db6e54aSFam Zheng             return bm;
55240db6e54aSFam Zheng         }
55250db6e54aSFam Zheng     }
55260db6e54aSFam Zheng     return NULL;
55270db6e54aSFam Zheng }
55280db6e54aSFam Zheng 
552920dca810SJohn Snow void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
55300db6e54aSFam Zheng {
55319bd2b08fSJohn Snow     assert(!bdrv_dirty_bitmap_frozen(bitmap));
55320db6e54aSFam Zheng     g_free(bitmap->name);
55330db6e54aSFam Zheng     bitmap->name = NULL;
55340db6e54aSFam Zheng }
55350db6e54aSFam Zheng 
55360db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
55375fba6c0eSJohn Snow                                           uint32_t granularity,
55380db6e54aSFam Zheng                                           const char *name,
5539b8afb520SFam Zheng                                           Error **errp)
55407cd1e32aSlirans@il.ibm.com {
55417cd1e32aSlirans@il.ibm.com     int64_t bitmap_size;
5542e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
55435fba6c0eSJohn Snow     uint32_t sector_granularity;
5544a55eb92cSJan Kiszka 
554550717e94SPaolo Bonzini     assert((granularity & (granularity - 1)) == 0);
554650717e94SPaolo Bonzini 
55470db6e54aSFam Zheng     if (name && bdrv_find_dirty_bitmap(bs, name)) {
55480db6e54aSFam Zheng         error_setg(errp, "Bitmap already exists: %s", name);
55490db6e54aSFam Zheng         return NULL;
55500db6e54aSFam Zheng     }
55515fba6c0eSJohn Snow     sector_granularity = granularity >> BDRV_SECTOR_BITS;
55525fba6c0eSJohn Snow     assert(sector_granularity);
555357322b78SMarkus Armbruster     bitmap_size = bdrv_nb_sectors(bs);
5554b8afb520SFam Zheng     if (bitmap_size < 0) {
5555b8afb520SFam Zheng         error_setg_errno(errp, -bitmap_size, "could not get length of device");
5556b8afb520SFam Zheng         errno = -bitmap_size;
5557b8afb520SFam Zheng         return NULL;
5558b8afb520SFam Zheng     }
55595839e53bSMarkus Armbruster     bitmap = g_new0(BdrvDirtyBitmap, 1);
55605fba6c0eSJohn Snow     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
5561e74e6b78SJohn Snow     bitmap->size = bitmap_size;
55620db6e54aSFam Zheng     bitmap->name = g_strdup(name);
5563b8e6fb75SJohn Snow     bitmap->disabled = false;
5564e4654d2dSFam Zheng     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5565e4654d2dSFam Zheng     return bitmap;
5566e4654d2dSFam Zheng }
5567e4654d2dSFam Zheng 
55689bd2b08fSJohn Snow bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
55699bd2b08fSJohn Snow {
55709bd2b08fSJohn Snow     return bitmap->successor;
55719bd2b08fSJohn Snow }
55729bd2b08fSJohn Snow 
5573b8e6fb75SJohn Snow bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
5574b8e6fb75SJohn Snow {
55759bd2b08fSJohn Snow     return !(bitmap->disabled || bitmap->successor);
55769bd2b08fSJohn Snow }
55779bd2b08fSJohn Snow 
55789bd2b08fSJohn Snow /**
55799bd2b08fSJohn Snow  * Create a successor bitmap destined to replace this bitmap after an operation.
55809bd2b08fSJohn Snow  * Requires that the bitmap is not frozen and has no successor.
55819bd2b08fSJohn Snow  */
55829bd2b08fSJohn Snow int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
55839bd2b08fSJohn Snow                                        BdrvDirtyBitmap *bitmap, Error **errp)
55849bd2b08fSJohn Snow {
55859bd2b08fSJohn Snow     uint64_t granularity;
55869bd2b08fSJohn Snow     BdrvDirtyBitmap *child;
55879bd2b08fSJohn Snow 
55889bd2b08fSJohn Snow     if (bdrv_dirty_bitmap_frozen(bitmap)) {
55899bd2b08fSJohn Snow         error_setg(errp, "Cannot create a successor for a bitmap that is "
55909bd2b08fSJohn Snow                    "currently frozen");
55919bd2b08fSJohn Snow         return -1;
55929bd2b08fSJohn Snow     }
55939bd2b08fSJohn Snow     assert(!bitmap->successor);
55949bd2b08fSJohn Snow 
55959bd2b08fSJohn Snow     /* Create an anonymous successor */
55969bd2b08fSJohn Snow     granularity = bdrv_dirty_bitmap_granularity(bitmap);
55979bd2b08fSJohn Snow     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
55989bd2b08fSJohn Snow     if (!child) {
55999bd2b08fSJohn Snow         return -1;
56009bd2b08fSJohn Snow     }
56019bd2b08fSJohn Snow 
56029bd2b08fSJohn Snow     /* Successor will be on or off based on our current state. */
56039bd2b08fSJohn Snow     child->disabled = bitmap->disabled;
56049bd2b08fSJohn Snow 
56059bd2b08fSJohn Snow     /* Install the successor and freeze the parent */
56069bd2b08fSJohn Snow     bitmap->successor = child;
56079bd2b08fSJohn Snow     return 0;
56089bd2b08fSJohn Snow }
56099bd2b08fSJohn Snow 
56109bd2b08fSJohn Snow /**
56119bd2b08fSJohn Snow  * For a bitmap with a successor, yield our name to the successor,
56129bd2b08fSJohn Snow  * delete the old bitmap, and return a handle to the new bitmap.
56139bd2b08fSJohn Snow  */
56149bd2b08fSJohn Snow BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
56159bd2b08fSJohn Snow                                             BdrvDirtyBitmap *bitmap,
56169bd2b08fSJohn Snow                                             Error **errp)
56179bd2b08fSJohn Snow {
56189bd2b08fSJohn Snow     char *name;
56199bd2b08fSJohn Snow     BdrvDirtyBitmap *successor = bitmap->successor;
56209bd2b08fSJohn Snow 
56219bd2b08fSJohn Snow     if (successor == NULL) {
56229bd2b08fSJohn Snow         error_setg(errp, "Cannot relinquish control if "
56239bd2b08fSJohn Snow                    "there's no successor present");
56249bd2b08fSJohn Snow         return NULL;
56259bd2b08fSJohn Snow     }
56269bd2b08fSJohn Snow 
56279bd2b08fSJohn Snow     name = bitmap->name;
56289bd2b08fSJohn Snow     bitmap->name = NULL;
56299bd2b08fSJohn Snow     successor->name = name;
56309bd2b08fSJohn Snow     bitmap->successor = NULL;
56319bd2b08fSJohn Snow     bdrv_release_dirty_bitmap(bs, bitmap);
56329bd2b08fSJohn Snow 
56339bd2b08fSJohn Snow     return successor;
56349bd2b08fSJohn Snow }
56359bd2b08fSJohn Snow 
56369bd2b08fSJohn Snow /**
56379bd2b08fSJohn Snow  * In cases of failure where we can no longer safely delete the parent,
56389bd2b08fSJohn Snow  * we may wish to re-join the parent and child/successor.
56399bd2b08fSJohn Snow  * The merged parent will be un-frozen, but not explicitly re-enabled.
56409bd2b08fSJohn Snow  */
56419bd2b08fSJohn Snow BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
56429bd2b08fSJohn Snow                                            BdrvDirtyBitmap *parent,
56439bd2b08fSJohn Snow                                            Error **errp)
56449bd2b08fSJohn Snow {
56459bd2b08fSJohn Snow     BdrvDirtyBitmap *successor = parent->successor;
56469bd2b08fSJohn Snow 
56479bd2b08fSJohn Snow     if (!successor) {
56489bd2b08fSJohn Snow         error_setg(errp, "Cannot reclaim a successor when none is present");
56499bd2b08fSJohn Snow         return NULL;
56509bd2b08fSJohn Snow     }
56519bd2b08fSJohn Snow 
56529bd2b08fSJohn Snow     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
56539bd2b08fSJohn Snow         error_setg(errp, "Merging of parent and successor bitmap failed");
56549bd2b08fSJohn Snow         return NULL;
56559bd2b08fSJohn Snow     }
56569bd2b08fSJohn Snow     bdrv_release_dirty_bitmap(bs, successor);
56579bd2b08fSJohn Snow     parent->successor = NULL;
56589bd2b08fSJohn Snow 
56599bd2b08fSJohn Snow     return parent;
5660b8e6fb75SJohn Snow }
5661b8e6fb75SJohn Snow 
5662ce1ffea8SJohn Snow /**
5663ce1ffea8SJohn Snow  * Truncates _all_ bitmaps attached to a BDS.
5664ce1ffea8SJohn Snow  */
5665ce1ffea8SJohn Snow static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
5666ce1ffea8SJohn Snow {
5667ce1ffea8SJohn Snow     BdrvDirtyBitmap *bitmap;
5668ce1ffea8SJohn Snow     uint64_t size = bdrv_nb_sectors(bs);
5669ce1ffea8SJohn Snow 
5670ce1ffea8SJohn Snow     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5671ce1ffea8SJohn Snow         if (bdrv_dirty_bitmap_frozen(bitmap)) {
5672ce1ffea8SJohn Snow             continue;
5673ce1ffea8SJohn Snow         }
5674ce1ffea8SJohn Snow         hbitmap_truncate(bitmap->bitmap, size);
5675ce1ffea8SJohn Snow     }
5676ce1ffea8SJohn Snow }
5677ce1ffea8SJohn Snow 
5678e4654d2dSFam Zheng void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5679e4654d2dSFam Zheng {
5680e4654d2dSFam Zheng     BdrvDirtyBitmap *bm, *next;
5681e4654d2dSFam Zheng     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5682e4654d2dSFam Zheng         if (bm == bitmap) {
56839bd2b08fSJohn Snow             assert(!bdrv_dirty_bitmap_frozen(bm));
5684e4654d2dSFam Zheng             QLIST_REMOVE(bitmap, list);
5685e4654d2dSFam Zheng             hbitmap_free(bitmap->bitmap);
56860db6e54aSFam Zheng             g_free(bitmap->name);
5687e4654d2dSFam Zheng             g_free(bitmap);
5688e4654d2dSFam Zheng             return;
56897cd1e32aSlirans@il.ibm.com         }
56907cd1e32aSlirans@il.ibm.com     }
56917cd1e32aSlirans@il.ibm.com }
56927cd1e32aSlirans@il.ibm.com 
5693b8e6fb75SJohn Snow void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5694b8e6fb75SJohn Snow {
56959bd2b08fSJohn Snow     assert(!bdrv_dirty_bitmap_frozen(bitmap));
5696b8e6fb75SJohn Snow     bitmap->disabled = true;
5697b8e6fb75SJohn Snow }
5698b8e6fb75SJohn Snow 
5699b8e6fb75SJohn Snow void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5700b8e6fb75SJohn Snow {
57019bd2b08fSJohn Snow     assert(!bdrv_dirty_bitmap_frozen(bitmap));
5702b8e6fb75SJohn Snow     bitmap->disabled = false;
5703b8e6fb75SJohn Snow }
5704b8e6fb75SJohn Snow 
570521b56835SFam Zheng BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
570621b56835SFam Zheng {
570721b56835SFam Zheng     BdrvDirtyBitmap *bm;
570821b56835SFam Zheng     BlockDirtyInfoList *list = NULL;
570921b56835SFam Zheng     BlockDirtyInfoList **plist = &list;
571021b56835SFam Zheng 
571121b56835SFam Zheng     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
57125839e53bSMarkus Armbruster         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
57135839e53bSMarkus Armbruster         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
571420dca810SJohn Snow         info->count = bdrv_get_dirty_count(bm);
5715592fdd02SJohn Snow         info->granularity = bdrv_dirty_bitmap_granularity(bm);
57160db6e54aSFam Zheng         info->has_name = !!bm->name;
57170db6e54aSFam Zheng         info->name = g_strdup(bm->name);
5718a113534fSJohn Snow         info->frozen = bdrv_dirty_bitmap_frozen(bm);
571921b56835SFam Zheng         entry->value = info;
572021b56835SFam Zheng         *plist = entry;
572121b56835SFam Zheng         plist = &entry->next;
572221b56835SFam Zheng     }
572321b56835SFam Zheng 
572421b56835SFam Zheng     return list;
572521b56835SFam Zheng }
572621b56835SFam Zheng 
5727e4654d2dSFam Zheng int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
57287cd1e32aSlirans@il.ibm.com {
5729e4654d2dSFam Zheng     if (bitmap) {
5730e4654d2dSFam Zheng         return hbitmap_get(bitmap->bitmap, sector);
57317cd1e32aSlirans@il.ibm.com     } else {
57327cd1e32aSlirans@il.ibm.com         return 0;
57337cd1e32aSlirans@il.ibm.com     }
57347cd1e32aSlirans@il.ibm.com }
57357cd1e32aSlirans@il.ibm.com 
5736341ebc2fSJohn Snow /**
5737341ebc2fSJohn Snow  * Chooses a default granularity based on the existing cluster size,
5738341ebc2fSJohn Snow  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
5739341ebc2fSJohn Snow  * is no cluster size information available.
5740341ebc2fSJohn Snow  */
5741341ebc2fSJohn Snow uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
5742341ebc2fSJohn Snow {
5743341ebc2fSJohn Snow     BlockDriverInfo bdi;
5744341ebc2fSJohn Snow     uint32_t granularity;
5745341ebc2fSJohn Snow 
5746341ebc2fSJohn Snow     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
5747341ebc2fSJohn Snow         granularity = MAX(4096, bdi.cluster_size);
5748341ebc2fSJohn Snow         granularity = MIN(65536, granularity);
5749341ebc2fSJohn Snow     } else {
5750341ebc2fSJohn Snow         granularity = 65536;
5751341ebc2fSJohn Snow     }
5752341ebc2fSJohn Snow 
5753341ebc2fSJohn Snow     return granularity;
5754341ebc2fSJohn Snow }
5755341ebc2fSJohn Snow 
5756592fdd02SJohn Snow uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
5757592fdd02SJohn Snow {
5758592fdd02SJohn Snow     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
5759592fdd02SJohn Snow }
5760592fdd02SJohn Snow 
576120dca810SJohn Snow void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
57621755da16SPaolo Bonzini {
5763e4654d2dSFam Zheng     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
57641755da16SPaolo Bonzini }
57651755da16SPaolo Bonzini 
576620dca810SJohn Snow void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
5767c4237dfaSVladimir Sementsov-Ogievskiy                            int64_t cur_sector, int nr_sectors)
5768c4237dfaSVladimir Sementsov-Ogievskiy {
5769b8e6fb75SJohn Snow     assert(bdrv_dirty_bitmap_enabled(bitmap));
5770c4237dfaSVladimir Sementsov-Ogievskiy     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5771c4237dfaSVladimir Sementsov-Ogievskiy }
5772c4237dfaSVladimir Sementsov-Ogievskiy 
577320dca810SJohn Snow void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
5774c4237dfaSVladimir Sementsov-Ogievskiy                              int64_t cur_sector, int nr_sectors)
5775c4237dfaSVladimir Sementsov-Ogievskiy {
5776b8e6fb75SJohn Snow     assert(bdrv_dirty_bitmap_enabled(bitmap));
5777c4237dfaSVladimir Sementsov-Ogievskiy     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5778c4237dfaSVladimir Sementsov-Ogievskiy }
5779c4237dfaSVladimir Sementsov-Ogievskiy 
5780e74e6b78SJohn Snow void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5781e74e6b78SJohn Snow {
5782e74e6b78SJohn Snow     assert(bdrv_dirty_bitmap_enabled(bitmap));
5783e74e6b78SJohn Snow     hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
5784e74e6b78SJohn Snow }
5785e74e6b78SJohn Snow 
5786*e0c47b6cSStefan Hajnoczi void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
57871755da16SPaolo Bonzini                     int nr_sectors)
57881755da16SPaolo Bonzini {
5789e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
5790e4654d2dSFam Zheng     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5791b8e6fb75SJohn Snow         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5792b8e6fb75SJohn Snow             continue;
5793b8e6fb75SJohn Snow         }
5794e4654d2dSFam Zheng         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5795e4654d2dSFam Zheng     }
57961755da16SPaolo Bonzini }
57971755da16SPaolo Bonzini 
5798*e0c47b6cSStefan Hajnoczi void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
5799c4237dfaSVladimir Sementsov-Ogievskiy                       int nr_sectors)
58007cd1e32aSlirans@il.ibm.com {
5801e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
5802e4654d2dSFam Zheng     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5803b8e6fb75SJohn Snow         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5804b8e6fb75SJohn Snow             continue;
5805b8e6fb75SJohn Snow         }
5806e4654d2dSFam Zheng         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5807e4654d2dSFam Zheng     }
58087cd1e32aSlirans@il.ibm.com }
5809aaa0eb75SLiran Schour 
5810d58d8453SJohn Snow /**
5811d58d8453SJohn Snow  * Advance an HBitmapIter to an arbitrary offset.
5812d58d8453SJohn Snow  */
5813d58d8453SJohn Snow void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
5814d58d8453SJohn Snow {
5815d58d8453SJohn Snow     assert(hbi->hb);
5816d58d8453SJohn Snow     hbitmap_iter_init(hbi, hbi->hb, offset);
5817d58d8453SJohn Snow }
5818d58d8453SJohn Snow 
581920dca810SJohn Snow int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
5820aaa0eb75SLiran Schour {
5821e4654d2dSFam Zheng     return hbitmap_count(bitmap->bitmap);
5822aaa0eb75SLiran Schour }
5823f88e1a42SJes Sorensen 
58249fcb0251SFam Zheng /* Get a reference to bs */
58259fcb0251SFam Zheng void bdrv_ref(BlockDriverState *bs)
58269fcb0251SFam Zheng {
58279fcb0251SFam Zheng     bs->refcnt++;
58289fcb0251SFam Zheng }
58299fcb0251SFam Zheng 
58309fcb0251SFam Zheng /* Release a previously grabbed reference to bs.
58319fcb0251SFam Zheng  * If after releasing, reference count is zero, the BlockDriverState is
58329fcb0251SFam Zheng  * deleted. */
58339fcb0251SFam Zheng void bdrv_unref(BlockDriverState *bs)
58349fcb0251SFam Zheng {
58359a4d5ca6SJeff Cody     if (!bs) {
58369a4d5ca6SJeff Cody         return;
58379a4d5ca6SJeff Cody     }
58389fcb0251SFam Zheng     assert(bs->refcnt > 0);
58399fcb0251SFam Zheng     if (--bs->refcnt == 0) {
58409fcb0251SFam Zheng         bdrv_delete(bs);
58419fcb0251SFam Zheng     }
58429fcb0251SFam Zheng }
58439fcb0251SFam Zheng 
5844fbe40ff7SFam Zheng struct BdrvOpBlocker {
5845fbe40ff7SFam Zheng     Error *reason;
5846fbe40ff7SFam Zheng     QLIST_ENTRY(BdrvOpBlocker) list;
5847fbe40ff7SFam Zheng };
5848fbe40ff7SFam Zheng 
5849fbe40ff7SFam Zheng bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5850fbe40ff7SFam Zheng {
5851fbe40ff7SFam Zheng     BdrvOpBlocker *blocker;
5852fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5853fbe40ff7SFam Zheng     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5854fbe40ff7SFam Zheng         blocker = QLIST_FIRST(&bs->op_blockers[op]);
5855fbe40ff7SFam Zheng         if (errp) {
585681e5f78aSAlberto Garcia             error_setg(errp, "Node '%s' is busy: %s",
585781e5f78aSAlberto Garcia                        bdrv_get_device_or_node_name(bs),
5858bfb197e0SMarkus Armbruster                        error_get_pretty(blocker->reason));
5859fbe40ff7SFam Zheng         }
5860fbe40ff7SFam Zheng         return true;
5861fbe40ff7SFam Zheng     }
5862fbe40ff7SFam Zheng     return false;
5863fbe40ff7SFam Zheng }
5864fbe40ff7SFam Zheng 
5865fbe40ff7SFam Zheng void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5866fbe40ff7SFam Zheng {
5867fbe40ff7SFam Zheng     BdrvOpBlocker *blocker;
5868fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5869fbe40ff7SFam Zheng 
58705839e53bSMarkus Armbruster     blocker = g_new0(BdrvOpBlocker, 1);
5871fbe40ff7SFam Zheng     blocker->reason = reason;
5872fbe40ff7SFam Zheng     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5873fbe40ff7SFam Zheng }
5874fbe40ff7SFam Zheng 
5875fbe40ff7SFam Zheng void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5876fbe40ff7SFam Zheng {
5877fbe40ff7SFam Zheng     BdrvOpBlocker *blocker, *next;
5878fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5879fbe40ff7SFam Zheng     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5880fbe40ff7SFam Zheng         if (blocker->reason == reason) {
5881fbe40ff7SFam Zheng             QLIST_REMOVE(blocker, list);
5882fbe40ff7SFam Zheng             g_free(blocker);
5883fbe40ff7SFam Zheng         }
5884fbe40ff7SFam Zheng     }
5885fbe40ff7SFam Zheng }
5886fbe40ff7SFam Zheng 
5887fbe40ff7SFam Zheng void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5888fbe40ff7SFam Zheng {
5889fbe40ff7SFam Zheng     int i;
5890fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5891fbe40ff7SFam Zheng         bdrv_op_block(bs, i, reason);
5892fbe40ff7SFam Zheng     }
5893fbe40ff7SFam Zheng }
5894fbe40ff7SFam Zheng 
5895fbe40ff7SFam Zheng void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5896fbe40ff7SFam Zheng {
5897fbe40ff7SFam Zheng     int i;
5898fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5899fbe40ff7SFam Zheng         bdrv_op_unblock(bs, i, reason);
5900fbe40ff7SFam Zheng     }
5901fbe40ff7SFam Zheng }
5902fbe40ff7SFam Zheng 
5903fbe40ff7SFam Zheng bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5904fbe40ff7SFam Zheng {
5905fbe40ff7SFam Zheng     int i;
5906fbe40ff7SFam Zheng 
5907fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5908fbe40ff7SFam Zheng         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5909fbe40ff7SFam Zheng             return false;
5910fbe40ff7SFam Zheng         }
5911fbe40ff7SFam Zheng     }
5912fbe40ff7SFam Zheng     return true;
5913fbe40ff7SFam Zheng }
5914fbe40ff7SFam Zheng 
591528a7282aSLuiz Capitulino void bdrv_iostatus_enable(BlockDriverState *bs)
591628a7282aSLuiz Capitulino {
5917d6bf279eSLuiz Capitulino     bs->iostatus_enabled = true;
591858e21ef5SLuiz Capitulino     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
591928a7282aSLuiz Capitulino }
592028a7282aSLuiz Capitulino 
592128a7282aSLuiz Capitulino /* The I/O status is only enabled if the drive explicitly
592228a7282aSLuiz Capitulino  * enables it _and_ the VM is configured to stop on errors */
592328a7282aSLuiz Capitulino bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
592428a7282aSLuiz Capitulino {
5925d6bf279eSLuiz Capitulino     return (bs->iostatus_enabled &&
592692aa5c6dSPaolo Bonzini            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
592792aa5c6dSPaolo Bonzini             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
592892aa5c6dSPaolo Bonzini             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
592928a7282aSLuiz Capitulino }
593028a7282aSLuiz Capitulino 
593128a7282aSLuiz Capitulino void bdrv_iostatus_disable(BlockDriverState *bs)
593228a7282aSLuiz Capitulino {
5933d6bf279eSLuiz Capitulino     bs->iostatus_enabled = false;
593428a7282aSLuiz Capitulino }
593528a7282aSLuiz Capitulino 
593628a7282aSLuiz Capitulino void bdrv_iostatus_reset(BlockDriverState *bs)
593728a7282aSLuiz Capitulino {
593828a7282aSLuiz Capitulino     if (bdrv_iostatus_is_enabled(bs)) {
593958e21ef5SLuiz Capitulino         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
59403bd293c3SPaolo Bonzini         if (bs->job) {
59413bd293c3SPaolo Bonzini             block_job_iostatus_reset(bs->job);
59423bd293c3SPaolo Bonzini         }
594328a7282aSLuiz Capitulino     }
594428a7282aSLuiz Capitulino }
594528a7282aSLuiz Capitulino 
594628a7282aSLuiz Capitulino void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
594728a7282aSLuiz Capitulino {
59483e1caa5fSPaolo Bonzini     assert(bdrv_iostatus_is_enabled(bs));
59493e1caa5fSPaolo Bonzini     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
595058e21ef5SLuiz Capitulino         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
595158e21ef5SLuiz Capitulino                                          BLOCK_DEVICE_IO_STATUS_FAILED;
595228a7282aSLuiz Capitulino     }
595328a7282aSLuiz Capitulino }
595428a7282aSLuiz Capitulino 
5955d92ada22SLuiz Capitulino void bdrv_img_create(const char *filename, const char *fmt,
5956f88e1a42SJes Sorensen                      const char *base_filename, const char *base_fmt,
5957f382d43aSMiroslav Rezanina                      char *options, uint64_t img_size, int flags,
5958f382d43aSMiroslav Rezanina                      Error **errp, bool quiet)
5959f88e1a42SJes Sorensen {
596083d0521aSChunyan Liu     QemuOptsList *create_opts = NULL;
596183d0521aSChunyan Liu     QemuOpts *opts = NULL;
596283d0521aSChunyan Liu     const char *backing_fmt, *backing_file;
596383d0521aSChunyan Liu     int64_t size;
5964f88e1a42SJes Sorensen     BlockDriver *drv, *proto_drv;
596596df67d1SStefan Hajnoczi     BlockDriver *backing_drv = NULL;
5966cc84d90fSMax Reitz     Error *local_err = NULL;
5967f88e1a42SJes Sorensen     int ret = 0;
5968f88e1a42SJes Sorensen 
5969f88e1a42SJes Sorensen     /* Find driver and parse its options */
5970f88e1a42SJes Sorensen     drv = bdrv_find_format(fmt);
5971f88e1a42SJes Sorensen     if (!drv) {
597271c79813SLuiz Capitulino         error_setg(errp, "Unknown file format '%s'", fmt);
5973d92ada22SLuiz Capitulino         return;
5974f88e1a42SJes Sorensen     }
5975f88e1a42SJes Sorensen 
5976b65a5e12SMax Reitz     proto_drv = bdrv_find_protocol(filename, true, errp);
5977f88e1a42SJes Sorensen     if (!proto_drv) {
5978d92ada22SLuiz Capitulino         return;
5979f88e1a42SJes Sorensen     }
5980f88e1a42SJes Sorensen 
5981c6149724SMax Reitz     if (!drv->create_opts) {
5982c6149724SMax Reitz         error_setg(errp, "Format driver '%s' does not support image creation",
5983c6149724SMax Reitz                    drv->format_name);
5984c6149724SMax Reitz         return;
5985c6149724SMax Reitz     }
5986c6149724SMax Reitz 
5987c6149724SMax Reitz     if (!proto_drv->create_opts) {
5988c6149724SMax Reitz         error_setg(errp, "Protocol driver '%s' does not support image creation",
5989c6149724SMax Reitz                    proto_drv->format_name);
5990c6149724SMax Reitz         return;
5991c6149724SMax Reitz     }
5992c6149724SMax Reitz 
5993c282e1fdSChunyan Liu     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5994c282e1fdSChunyan Liu     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5995f88e1a42SJes Sorensen 
5996f88e1a42SJes Sorensen     /* Create parameter list with default values */
599783d0521aSChunyan Liu     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
599839101f25SMarkus Armbruster     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5999f88e1a42SJes Sorensen 
6000f88e1a42SJes Sorensen     /* Parse -o options */
6001f88e1a42SJes Sorensen     if (options) {
6002dc523cd3SMarkus Armbruster         qemu_opts_do_parse(opts, options, NULL, &local_err);
6003dc523cd3SMarkus Armbruster         if (local_err) {
6004dc523cd3SMarkus Armbruster             error_report_err(local_err);
6005dc523cd3SMarkus Armbruster             local_err = NULL;
600683d0521aSChunyan Liu             error_setg(errp, "Invalid options for file format '%s'", fmt);
6007f88e1a42SJes Sorensen             goto out;
6008f88e1a42SJes Sorensen         }
6009f88e1a42SJes Sorensen     }
6010f88e1a42SJes Sorensen 
6011f88e1a42SJes Sorensen     if (base_filename) {
6012f43e47dbSMarkus Armbruster         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
60136be4194bSMarkus Armbruster         if (local_err) {
601471c79813SLuiz Capitulino             error_setg(errp, "Backing file not supported for file format '%s'",
601571c79813SLuiz Capitulino                        fmt);
6016f88e1a42SJes Sorensen             goto out;
6017f88e1a42SJes Sorensen         }
6018f88e1a42SJes Sorensen     }
6019f88e1a42SJes Sorensen 
6020f88e1a42SJes Sorensen     if (base_fmt) {
6021f43e47dbSMarkus Armbruster         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
60226be4194bSMarkus Armbruster         if (local_err) {
602371c79813SLuiz Capitulino             error_setg(errp, "Backing file format not supported for file "
602471c79813SLuiz Capitulino                              "format '%s'", fmt);
6025f88e1a42SJes Sorensen             goto out;
6026f88e1a42SJes Sorensen         }
6027f88e1a42SJes Sorensen     }
6028f88e1a42SJes Sorensen 
602983d0521aSChunyan Liu     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
603083d0521aSChunyan Liu     if (backing_file) {
603183d0521aSChunyan Liu         if (!strcmp(filename, backing_file)) {
603271c79813SLuiz Capitulino             error_setg(errp, "Error: Trying to create an image with the "
603371c79813SLuiz Capitulino                              "same filename as the backing file");
6034792da93aSJes Sorensen             goto out;
6035792da93aSJes Sorensen         }
6036792da93aSJes Sorensen     }
6037792da93aSJes Sorensen 
603883d0521aSChunyan Liu     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
603983d0521aSChunyan Liu     if (backing_fmt) {
604083d0521aSChunyan Liu         backing_drv = bdrv_find_format(backing_fmt);
604196df67d1SStefan Hajnoczi         if (!backing_drv) {
604271c79813SLuiz Capitulino             error_setg(errp, "Unknown backing file format '%s'",
604383d0521aSChunyan Liu                        backing_fmt);
6044f88e1a42SJes Sorensen             goto out;
6045f88e1a42SJes Sorensen         }
6046f88e1a42SJes Sorensen     }
6047f88e1a42SJes Sorensen 
6048f88e1a42SJes Sorensen     // The size for the image must always be specified, with one exception:
6049f88e1a42SJes Sorensen     // If we are using a backing file, we can obtain the size from there
605083d0521aSChunyan Liu     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
605183d0521aSChunyan Liu     if (size == -1) {
605283d0521aSChunyan Liu         if (backing_file) {
605366f6b814SMax Reitz             BlockDriverState *bs;
605429168018SMax Reitz             char *full_backing = g_new0(char, PATH_MAX);
605552bf1e72SMarkus Armbruster             int64_t size;
605663090dacSPaolo Bonzini             int back_flags;
605763090dacSPaolo Bonzini 
605829168018SMax Reitz             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
605929168018SMax Reitz                                                          full_backing, PATH_MAX,
606029168018SMax Reitz                                                          &local_err);
606129168018SMax Reitz             if (local_err) {
606229168018SMax Reitz                 g_free(full_backing);
606329168018SMax Reitz                 goto out;
606429168018SMax Reitz             }
606529168018SMax Reitz 
606663090dacSPaolo Bonzini             /* backing files always opened read-only */
606763090dacSPaolo Bonzini             back_flags =
606863090dacSPaolo Bonzini                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
6069f88e1a42SJes Sorensen 
6070f67503e5SMax Reitz             bs = NULL;
607129168018SMax Reitz             ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
6072cc84d90fSMax Reitz                             backing_drv, &local_err);
607329168018SMax Reitz             g_free(full_backing);
6074f88e1a42SJes Sorensen             if (ret < 0) {
6075f88e1a42SJes Sorensen                 goto out;
6076f88e1a42SJes Sorensen             }
607752bf1e72SMarkus Armbruster             size = bdrv_getlength(bs);
607852bf1e72SMarkus Armbruster             if (size < 0) {
607952bf1e72SMarkus Armbruster                 error_setg_errno(errp, -size, "Could not get size of '%s'",
608052bf1e72SMarkus Armbruster                                  backing_file);
608152bf1e72SMarkus Armbruster                 bdrv_unref(bs);
608252bf1e72SMarkus Armbruster                 goto out;
608352bf1e72SMarkus Armbruster             }
6084f88e1a42SJes Sorensen 
608539101f25SMarkus Armbruster             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
608666f6b814SMax Reitz 
608766f6b814SMax Reitz             bdrv_unref(bs);
6088f88e1a42SJes Sorensen         } else {
608971c79813SLuiz Capitulino             error_setg(errp, "Image creation needs a size parameter");
6090f88e1a42SJes Sorensen             goto out;
6091f88e1a42SJes Sorensen         }
6092f88e1a42SJes Sorensen     }
6093f88e1a42SJes Sorensen 
6094f382d43aSMiroslav Rezanina     if (!quiet) {
6095f88e1a42SJes Sorensen         printf("Formatting '%s', fmt=%s", filename, fmt);
609643c5d8f8SFam Zheng         qemu_opts_print(opts, " ");
6097f88e1a42SJes Sorensen         puts("");
6098f382d43aSMiroslav Rezanina     }
609983d0521aSChunyan Liu 
6100c282e1fdSChunyan Liu     ret = bdrv_create(drv, filename, opts, &local_err);
610183d0521aSChunyan Liu 
6102cc84d90fSMax Reitz     if (ret == -EFBIG) {
6103cc84d90fSMax Reitz         /* This is generally a better message than whatever the driver would
6104cc84d90fSMax Reitz          * deliver (especially because of the cluster_size_hint), since that
6105cc84d90fSMax Reitz          * is most probably not much different from "image too large". */
6106f3f4d2c0SKevin Wolf         const char *cluster_size_hint = "";
610783d0521aSChunyan Liu         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
6108f3f4d2c0SKevin Wolf             cluster_size_hint = " (try using a larger cluster size)";
6109f3f4d2c0SKevin Wolf         }
6110cc84d90fSMax Reitz         error_setg(errp, "The image size is too large for file format '%s'"
6111cc84d90fSMax Reitz                    "%s", fmt, cluster_size_hint);
6112cc84d90fSMax Reitz         error_free(local_err);
6113cc84d90fSMax Reitz         local_err = NULL;
6114f88e1a42SJes Sorensen     }
6115f88e1a42SJes Sorensen 
6116f88e1a42SJes Sorensen out:
611783d0521aSChunyan Liu     qemu_opts_del(opts);
611883d0521aSChunyan Liu     qemu_opts_free(create_opts);
611984d18f06SMarkus Armbruster     if (local_err) {
6120cc84d90fSMax Reitz         error_propagate(errp, local_err);
6121cc84d90fSMax Reitz     }
6122f88e1a42SJes Sorensen }
612385d126f3SStefan Hajnoczi 
612485d126f3SStefan Hajnoczi AioContext *bdrv_get_aio_context(BlockDriverState *bs)
612585d126f3SStefan Hajnoczi {
6126dcd04228SStefan Hajnoczi     return bs->aio_context;
6127dcd04228SStefan Hajnoczi }
6128dcd04228SStefan Hajnoczi 
6129dcd04228SStefan Hajnoczi void bdrv_detach_aio_context(BlockDriverState *bs)
6130dcd04228SStefan Hajnoczi {
613133384421SMax Reitz     BdrvAioNotifier *baf;
613233384421SMax Reitz 
6133dcd04228SStefan Hajnoczi     if (!bs->drv) {
6134dcd04228SStefan Hajnoczi         return;
6135dcd04228SStefan Hajnoczi     }
6136dcd04228SStefan Hajnoczi 
613733384421SMax Reitz     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
613833384421SMax Reitz         baf->detach_aio_context(baf->opaque);
613933384421SMax Reitz     }
614033384421SMax Reitz 
614113af91ebSStefan Hajnoczi     if (bs->io_limits_enabled) {
614213af91ebSStefan Hajnoczi         throttle_detach_aio_context(&bs->throttle_state);
614313af91ebSStefan Hajnoczi     }
6144dcd04228SStefan Hajnoczi     if (bs->drv->bdrv_detach_aio_context) {
6145dcd04228SStefan Hajnoczi         bs->drv->bdrv_detach_aio_context(bs);
6146dcd04228SStefan Hajnoczi     }
6147dcd04228SStefan Hajnoczi     if (bs->file) {
6148dcd04228SStefan Hajnoczi         bdrv_detach_aio_context(bs->file);
6149dcd04228SStefan Hajnoczi     }
6150dcd04228SStefan Hajnoczi     if (bs->backing_hd) {
6151dcd04228SStefan Hajnoczi         bdrv_detach_aio_context(bs->backing_hd);
6152dcd04228SStefan Hajnoczi     }
6153dcd04228SStefan Hajnoczi 
6154dcd04228SStefan Hajnoczi     bs->aio_context = NULL;
6155dcd04228SStefan Hajnoczi }
6156dcd04228SStefan Hajnoczi 
6157dcd04228SStefan Hajnoczi void bdrv_attach_aio_context(BlockDriverState *bs,
6158dcd04228SStefan Hajnoczi                              AioContext *new_context)
6159dcd04228SStefan Hajnoczi {
616033384421SMax Reitz     BdrvAioNotifier *ban;
616133384421SMax Reitz 
6162dcd04228SStefan Hajnoczi     if (!bs->drv) {
6163dcd04228SStefan Hajnoczi         return;
6164dcd04228SStefan Hajnoczi     }
6165dcd04228SStefan Hajnoczi 
6166dcd04228SStefan Hajnoczi     bs->aio_context = new_context;
6167dcd04228SStefan Hajnoczi 
6168dcd04228SStefan Hajnoczi     if (bs->backing_hd) {
6169dcd04228SStefan Hajnoczi         bdrv_attach_aio_context(bs->backing_hd, new_context);
6170dcd04228SStefan Hajnoczi     }
6171dcd04228SStefan Hajnoczi     if (bs->file) {
6172dcd04228SStefan Hajnoczi         bdrv_attach_aio_context(bs->file, new_context);
6173dcd04228SStefan Hajnoczi     }
6174dcd04228SStefan Hajnoczi     if (bs->drv->bdrv_attach_aio_context) {
6175dcd04228SStefan Hajnoczi         bs->drv->bdrv_attach_aio_context(bs, new_context);
6176dcd04228SStefan Hajnoczi     }
617713af91ebSStefan Hajnoczi     if (bs->io_limits_enabled) {
617813af91ebSStefan Hajnoczi         throttle_attach_aio_context(&bs->throttle_state, new_context);
617913af91ebSStefan Hajnoczi     }
618033384421SMax Reitz 
618133384421SMax Reitz     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
618233384421SMax Reitz         ban->attached_aio_context(new_context, ban->opaque);
618333384421SMax Reitz     }
6184dcd04228SStefan Hajnoczi }
6185dcd04228SStefan Hajnoczi 
6186dcd04228SStefan Hajnoczi void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
6187dcd04228SStefan Hajnoczi {
6188dcd04228SStefan Hajnoczi     bdrv_drain_all(); /* ensure there are no in-flight requests */
6189dcd04228SStefan Hajnoczi 
6190dcd04228SStefan Hajnoczi     bdrv_detach_aio_context(bs);
6191dcd04228SStefan Hajnoczi 
6192dcd04228SStefan Hajnoczi     /* This function executes in the old AioContext so acquire the new one in
6193dcd04228SStefan Hajnoczi      * case it runs in a different thread.
6194dcd04228SStefan Hajnoczi      */
6195dcd04228SStefan Hajnoczi     aio_context_acquire(new_context);
6196dcd04228SStefan Hajnoczi     bdrv_attach_aio_context(bs, new_context);
6197dcd04228SStefan Hajnoczi     aio_context_release(new_context);
619885d126f3SStefan Hajnoczi }
6199d616b224SStefan Hajnoczi 
620033384421SMax Reitz void bdrv_add_aio_context_notifier(BlockDriverState *bs,
620133384421SMax Reitz         void (*attached_aio_context)(AioContext *new_context, void *opaque),
620233384421SMax Reitz         void (*detach_aio_context)(void *opaque), void *opaque)
620333384421SMax Reitz {
620433384421SMax Reitz     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
620533384421SMax Reitz     *ban = (BdrvAioNotifier){
620633384421SMax Reitz         .attached_aio_context = attached_aio_context,
620733384421SMax Reitz         .detach_aio_context   = detach_aio_context,
620833384421SMax Reitz         .opaque               = opaque
620933384421SMax Reitz     };
621033384421SMax Reitz 
621133384421SMax Reitz     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
621233384421SMax Reitz }
621333384421SMax Reitz 
621433384421SMax Reitz void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
621533384421SMax Reitz                                       void (*attached_aio_context)(AioContext *,
621633384421SMax Reitz                                                                    void *),
621733384421SMax Reitz                                       void (*detach_aio_context)(void *),
621833384421SMax Reitz                                       void *opaque)
621933384421SMax Reitz {
622033384421SMax Reitz     BdrvAioNotifier *ban, *ban_next;
622133384421SMax Reitz 
622233384421SMax Reitz     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
622333384421SMax Reitz         if (ban->attached_aio_context == attached_aio_context &&
622433384421SMax Reitz             ban->detach_aio_context   == detach_aio_context   &&
622533384421SMax Reitz             ban->opaque               == opaque)
622633384421SMax Reitz         {
622733384421SMax Reitz             QLIST_REMOVE(ban, list);
622833384421SMax Reitz             g_free(ban);
622933384421SMax Reitz 
623033384421SMax Reitz             return;
623133384421SMax Reitz         }
623233384421SMax Reitz     }
623333384421SMax Reitz 
623433384421SMax Reitz     abort();
623533384421SMax Reitz }
623633384421SMax Reitz 
6237d616b224SStefan Hajnoczi void bdrv_add_before_write_notifier(BlockDriverState *bs,
6238d616b224SStefan Hajnoczi                                     NotifierWithReturn *notifier)
6239d616b224SStefan Hajnoczi {
6240d616b224SStefan Hajnoczi     notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
6241d616b224SStefan Hajnoczi }
62426f176b48SMax Reitz 
624377485434SMax Reitz int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
624477485434SMax Reitz                        BlockDriverAmendStatusCB *status_cb)
62456f176b48SMax Reitz {
6246c282e1fdSChunyan Liu     if (!bs->drv->bdrv_amend_options) {
62476f176b48SMax Reitz         return -ENOTSUP;
62486f176b48SMax Reitz     }
624977485434SMax Reitz     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
62506f176b48SMax Reitz }
6251f6186f49SBenoît Canet 
6252b5042a36SBenoît Canet /* This function will be called by the bdrv_recurse_is_first_non_filter method
6253b5042a36SBenoît Canet  * of block filter and by bdrv_is_first_non_filter.
6254b5042a36SBenoît Canet  * It is used to test if the given bs is the candidate or recurse more in the
6255b5042a36SBenoît Canet  * node graph.
6256212a5a8fSBenoît Canet  */
6257212a5a8fSBenoît Canet bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
6258212a5a8fSBenoît Canet                                       BlockDriverState *candidate)
6259f6186f49SBenoît Canet {
6260b5042a36SBenoît Canet     /* return false if basic checks fails */
6261b5042a36SBenoît Canet     if (!bs || !bs->drv) {
6262b5042a36SBenoît Canet         return false;
6263b5042a36SBenoît Canet     }
6264b5042a36SBenoît Canet 
6265b5042a36SBenoît Canet     /* the code reached a non block filter driver -> check if the bs is
6266b5042a36SBenoît Canet      * the same as the candidate. It's the recursion termination condition.
6267b5042a36SBenoît Canet      */
6268b5042a36SBenoît Canet     if (!bs->drv->is_filter) {
6269b5042a36SBenoît Canet         return bs == candidate;
6270b5042a36SBenoît Canet     }
6271b5042a36SBenoît Canet     /* Down this path the driver is a block filter driver */
6272b5042a36SBenoît Canet 
6273b5042a36SBenoît Canet     /* If the block filter recursion method is defined use it to recurse down
6274b5042a36SBenoît Canet      * the node graph.
6275b5042a36SBenoît Canet      */
6276b5042a36SBenoît Canet     if (bs->drv->bdrv_recurse_is_first_non_filter) {
6277212a5a8fSBenoît Canet         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
6278212a5a8fSBenoît Canet     }
6279212a5a8fSBenoît Canet 
6280b5042a36SBenoît Canet     /* the driver is a block filter but don't allow to recurse -> return false
6281b5042a36SBenoît Canet      */
6282b5042a36SBenoît Canet     return false;
6283212a5a8fSBenoît Canet }
6284212a5a8fSBenoît Canet 
6285212a5a8fSBenoît Canet /* This function checks if the candidate is the first non filter bs down it's
6286212a5a8fSBenoît Canet  * bs chain. Since we don't have pointers to parents it explore all bs chains
6287212a5a8fSBenoît Canet  * from the top. Some filters can choose not to pass down the recursion.
6288212a5a8fSBenoît Canet  */
6289212a5a8fSBenoît Canet bool bdrv_is_first_non_filter(BlockDriverState *candidate)
6290212a5a8fSBenoît Canet {
6291212a5a8fSBenoît Canet     BlockDriverState *bs;
6292212a5a8fSBenoît Canet 
6293212a5a8fSBenoît Canet     /* walk down the bs forest recursively */
6294212a5a8fSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
6295212a5a8fSBenoît Canet         bool perm;
6296212a5a8fSBenoît Canet 
6297b5042a36SBenoît Canet         /* try to recurse in this top level bs */
6298e6dc8a1fSKevin Wolf         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
6299212a5a8fSBenoît Canet 
6300212a5a8fSBenoît Canet         /* candidate is the first non filter */
6301212a5a8fSBenoît Canet         if (perm) {
6302212a5a8fSBenoît Canet             return true;
6303212a5a8fSBenoît Canet         }
6304212a5a8fSBenoît Canet     }
6305212a5a8fSBenoît Canet 
6306212a5a8fSBenoît Canet     return false;
6307f6186f49SBenoît Canet }
630809158f00SBenoît Canet 
630909158f00SBenoît Canet BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
631009158f00SBenoît Canet {
631109158f00SBenoît Canet     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
63125a7e7a0bSStefan Hajnoczi     AioContext *aio_context;
63135a7e7a0bSStefan Hajnoczi 
631409158f00SBenoît Canet     if (!to_replace_bs) {
631509158f00SBenoît Canet         error_setg(errp, "Node name '%s' not found", node_name);
631609158f00SBenoît Canet         return NULL;
631709158f00SBenoît Canet     }
631809158f00SBenoît Canet 
63195a7e7a0bSStefan Hajnoczi     aio_context = bdrv_get_aio_context(to_replace_bs);
63205a7e7a0bSStefan Hajnoczi     aio_context_acquire(aio_context);
63215a7e7a0bSStefan Hajnoczi 
632209158f00SBenoît Canet     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
63235a7e7a0bSStefan Hajnoczi         to_replace_bs = NULL;
63245a7e7a0bSStefan Hajnoczi         goto out;
632509158f00SBenoît Canet     }
632609158f00SBenoît Canet 
632709158f00SBenoît Canet     /* We don't want arbitrary node of the BDS chain to be replaced only the top
632809158f00SBenoît Canet      * most non filter in order to prevent data corruption.
632909158f00SBenoît Canet      * Another benefit is that this tests exclude backing files which are
633009158f00SBenoît Canet      * blocked by the backing blockers.
633109158f00SBenoît Canet      */
633209158f00SBenoît Canet     if (!bdrv_is_first_non_filter(to_replace_bs)) {
633309158f00SBenoît Canet         error_setg(errp, "Only top most non filter can be replaced");
63345a7e7a0bSStefan Hajnoczi         to_replace_bs = NULL;
63355a7e7a0bSStefan Hajnoczi         goto out;
633609158f00SBenoît Canet     }
633709158f00SBenoît Canet 
63385a7e7a0bSStefan Hajnoczi out:
63395a7e7a0bSStefan Hajnoczi     aio_context_release(aio_context);
634009158f00SBenoît Canet     return to_replace_bs;
634109158f00SBenoît Canet }
6342448ad91dSMing Lei 
6343448ad91dSMing Lei void bdrv_io_plug(BlockDriverState *bs)
6344448ad91dSMing Lei {
6345448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6346448ad91dSMing Lei     if (drv && drv->bdrv_io_plug) {
6347448ad91dSMing Lei         drv->bdrv_io_plug(bs);
6348448ad91dSMing Lei     } else if (bs->file) {
6349448ad91dSMing Lei         bdrv_io_plug(bs->file);
6350448ad91dSMing Lei     }
6351448ad91dSMing Lei }
6352448ad91dSMing Lei 
6353448ad91dSMing Lei void bdrv_io_unplug(BlockDriverState *bs)
6354448ad91dSMing Lei {
6355448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6356448ad91dSMing Lei     if (drv && drv->bdrv_io_unplug) {
6357448ad91dSMing Lei         drv->bdrv_io_unplug(bs);
6358448ad91dSMing Lei     } else if (bs->file) {
6359448ad91dSMing Lei         bdrv_io_unplug(bs->file);
6360448ad91dSMing Lei     }
6361448ad91dSMing Lei }
6362448ad91dSMing Lei 
6363448ad91dSMing Lei void bdrv_flush_io_queue(BlockDriverState *bs)
6364448ad91dSMing Lei {
6365448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6366448ad91dSMing Lei     if (drv && drv->bdrv_flush_io_queue) {
6367448ad91dSMing Lei         drv->bdrv_flush_io_queue(bs);
6368448ad91dSMing Lei     } else if (bs->file) {
6369448ad91dSMing Lei         bdrv_flush_io_queue(bs->file);
6370448ad91dSMing Lei     }
6371448ad91dSMing Lei }
637291af7014SMax Reitz 
637391af7014SMax Reitz static bool append_open_options(QDict *d, BlockDriverState *bs)
637491af7014SMax Reitz {
637591af7014SMax Reitz     const QDictEntry *entry;
637691af7014SMax Reitz     bool found_any = false;
637791af7014SMax Reitz 
637891af7014SMax Reitz     for (entry = qdict_first(bs->options); entry;
637991af7014SMax Reitz          entry = qdict_next(bs->options, entry))
638091af7014SMax Reitz     {
638191af7014SMax Reitz         /* Only take options for this level and exclude all non-driver-specific
638291af7014SMax Reitz          * options */
638391af7014SMax Reitz         if (!strchr(qdict_entry_key(entry), '.') &&
638491af7014SMax Reitz             strcmp(qdict_entry_key(entry), "node-name"))
638591af7014SMax Reitz         {
638691af7014SMax Reitz             qobject_incref(qdict_entry_value(entry));
638791af7014SMax Reitz             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
638891af7014SMax Reitz             found_any = true;
638991af7014SMax Reitz         }
639091af7014SMax Reitz     }
639191af7014SMax Reitz 
639291af7014SMax Reitz     return found_any;
639391af7014SMax Reitz }
639491af7014SMax Reitz 
639591af7014SMax Reitz /* Updates the following BDS fields:
639691af7014SMax Reitz  *  - exact_filename: A filename which may be used for opening a block device
639791af7014SMax Reitz  *                    which (mostly) equals the given BDS (even without any
639891af7014SMax Reitz  *                    other options; so reading and writing must return the same
639991af7014SMax Reitz  *                    results, but caching etc. may be different)
640091af7014SMax Reitz  *  - full_open_options: Options which, when given when opening a block device
640191af7014SMax Reitz  *                       (without a filename), result in a BDS (mostly)
640291af7014SMax Reitz  *                       equalling the given one
640391af7014SMax Reitz  *  - filename: If exact_filename is set, it is copied here. Otherwise,
640491af7014SMax Reitz  *              full_open_options is converted to a JSON object, prefixed with
640591af7014SMax Reitz  *              "json:" (for use through the JSON pseudo protocol) and put here.
640691af7014SMax Reitz  */
640791af7014SMax Reitz void bdrv_refresh_filename(BlockDriverState *bs)
640891af7014SMax Reitz {
640991af7014SMax Reitz     BlockDriver *drv = bs->drv;
641091af7014SMax Reitz     QDict *opts;
641191af7014SMax Reitz 
641291af7014SMax Reitz     if (!drv) {
641391af7014SMax Reitz         return;
641491af7014SMax Reitz     }
641591af7014SMax Reitz 
641691af7014SMax Reitz     /* This BDS's file name will most probably depend on its file's name, so
641791af7014SMax Reitz      * refresh that first */
641891af7014SMax Reitz     if (bs->file) {
641991af7014SMax Reitz         bdrv_refresh_filename(bs->file);
642091af7014SMax Reitz     }
642191af7014SMax Reitz 
642291af7014SMax Reitz     if (drv->bdrv_refresh_filename) {
642391af7014SMax Reitz         /* Obsolete information is of no use here, so drop the old file name
642491af7014SMax Reitz          * information before refreshing it */
642591af7014SMax Reitz         bs->exact_filename[0] = '\0';
642691af7014SMax Reitz         if (bs->full_open_options) {
642791af7014SMax Reitz             QDECREF(bs->full_open_options);
642891af7014SMax Reitz             bs->full_open_options = NULL;
642991af7014SMax Reitz         }
643091af7014SMax Reitz 
643191af7014SMax Reitz         drv->bdrv_refresh_filename(bs);
643291af7014SMax Reitz     } else if (bs->file) {
643391af7014SMax Reitz         /* Try to reconstruct valid information from the underlying file */
643491af7014SMax Reitz         bool has_open_options;
643591af7014SMax Reitz 
643691af7014SMax Reitz         bs->exact_filename[0] = '\0';
643791af7014SMax Reitz         if (bs->full_open_options) {
643891af7014SMax Reitz             QDECREF(bs->full_open_options);
643991af7014SMax Reitz             bs->full_open_options = NULL;
644091af7014SMax Reitz         }
644191af7014SMax Reitz 
644291af7014SMax Reitz         opts = qdict_new();
644391af7014SMax Reitz         has_open_options = append_open_options(opts, bs);
644491af7014SMax Reitz 
644591af7014SMax Reitz         /* If no specific options have been given for this BDS, the filename of
644691af7014SMax Reitz          * the underlying file should suffice for this one as well */
644791af7014SMax Reitz         if (bs->file->exact_filename[0] && !has_open_options) {
644891af7014SMax Reitz             strcpy(bs->exact_filename, bs->file->exact_filename);
644991af7014SMax Reitz         }
645091af7014SMax Reitz         /* Reconstructing the full options QDict is simple for most format block
645191af7014SMax Reitz          * drivers, as long as the full options are known for the underlying
645291af7014SMax Reitz          * file BDS. The full options QDict of that file BDS should somehow
645391af7014SMax Reitz          * contain a representation of the filename, therefore the following
645491af7014SMax Reitz          * suffices without querying the (exact_)filename of this BDS. */
645591af7014SMax Reitz         if (bs->file->full_open_options) {
645691af7014SMax Reitz             qdict_put_obj(opts, "driver",
645791af7014SMax Reitz                           QOBJECT(qstring_from_str(drv->format_name)));
645891af7014SMax Reitz             QINCREF(bs->file->full_open_options);
645991af7014SMax Reitz             qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
646091af7014SMax Reitz 
646191af7014SMax Reitz             bs->full_open_options = opts;
646291af7014SMax Reitz         } else {
646391af7014SMax Reitz             QDECREF(opts);
646491af7014SMax Reitz         }
646591af7014SMax Reitz     } else if (!bs->full_open_options && qdict_size(bs->options)) {
646691af7014SMax Reitz         /* There is no underlying file BDS (at least referenced by BDS.file),
646791af7014SMax Reitz          * so the full options QDict should be equal to the options given
646891af7014SMax Reitz          * specifically for this block device when it was opened (plus the
646991af7014SMax Reitz          * driver specification).
647091af7014SMax Reitz          * Because those options don't change, there is no need to update
647191af7014SMax Reitz          * full_open_options when it's already set. */
647291af7014SMax Reitz 
647391af7014SMax Reitz         opts = qdict_new();
647491af7014SMax Reitz         append_open_options(opts, bs);
647591af7014SMax Reitz         qdict_put_obj(opts, "driver",
647691af7014SMax Reitz                       QOBJECT(qstring_from_str(drv->format_name)));
647791af7014SMax Reitz 
647891af7014SMax Reitz         if (bs->exact_filename[0]) {
647991af7014SMax Reitz             /* This may not work for all block protocol drivers (some may
648091af7014SMax Reitz              * require this filename to be parsed), but we have to find some
648191af7014SMax Reitz              * default solution here, so just include it. If some block driver
648291af7014SMax Reitz              * does not support pure options without any filename at all or
648391af7014SMax Reitz              * needs some special format of the options QDict, it needs to
648491af7014SMax Reitz              * implement the driver-specific bdrv_refresh_filename() function.
648591af7014SMax Reitz              */
648691af7014SMax Reitz             qdict_put_obj(opts, "filename",
648791af7014SMax Reitz                           QOBJECT(qstring_from_str(bs->exact_filename)));
648891af7014SMax Reitz         }
648991af7014SMax Reitz 
649091af7014SMax Reitz         bs->full_open_options = opts;
649191af7014SMax Reitz     }
649291af7014SMax Reitz 
649391af7014SMax Reitz     if (bs->exact_filename[0]) {
649491af7014SMax Reitz         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
649591af7014SMax Reitz     } else if (bs->full_open_options) {
649691af7014SMax Reitz         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
649791af7014SMax Reitz         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
649891af7014SMax Reitz                  qstring_get_str(json));
649991af7014SMax Reitz         QDECREF(json);
650091af7014SMax Reitz     }
650191af7014SMax Reitz }
65025366d0c8SBenoît Canet 
65035366d0c8SBenoît Canet /* This accessor function purpose is to allow the device models to access the
65045366d0c8SBenoît Canet  * BlockAcctStats structure embedded inside a BlockDriverState without being
65055366d0c8SBenoît Canet  * aware of the BlockDriverState structure layout.
65065366d0c8SBenoît Canet  * It will go away when the BlockAcctStats structure will be moved inside
65075366d0c8SBenoît Canet  * the device models.
65085366d0c8SBenoît Canet  */
65095366d0c8SBenoît Canet BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
65105366d0c8SBenoît Canet {
65115366d0c8SBenoît Canet     return &bs->stats;
65125366d0c8SBenoît Canet }
6513