xref: /openbmc/qemu/block.c (revision b8e6fb752e43b45b428487c244cab35f0ab94b10)
1fc01f7e7Sbellard /*
2fc01f7e7Sbellard  * QEMU System Emulator block driver
3fc01f7e7Sbellard  *
4fc01f7e7Sbellard  * Copyright (c) 2003 Fabrice Bellard
5fc01f7e7Sbellard  *
6fc01f7e7Sbellard  * Permission is hereby granted, free of charge, to any person obtaining a copy
7fc01f7e7Sbellard  * of this software and associated documentation files (the "Software"), to deal
8fc01f7e7Sbellard  * in the Software without restriction, including without limitation the rights
9fc01f7e7Sbellard  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10fc01f7e7Sbellard  * copies of the Software, and to permit persons to whom the Software is
11fc01f7e7Sbellard  * furnished to do so, subject to the following conditions:
12fc01f7e7Sbellard  *
13fc01f7e7Sbellard  * The above copyright notice and this permission notice shall be included in
14fc01f7e7Sbellard  * all copies or substantial portions of the Software.
15fc01f7e7Sbellard  *
16fc01f7e7Sbellard  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17fc01f7e7Sbellard  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18fc01f7e7Sbellard  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19fc01f7e7Sbellard  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20fc01f7e7Sbellard  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21fc01f7e7Sbellard  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22fc01f7e7Sbellard  * THE SOFTWARE.
23fc01f7e7Sbellard  */
243990d09aSblueswir1 #include "config-host.h"
25faf07963Spbrook #include "qemu-common.h"
266d519a5fSStefan Hajnoczi #include "trace.h"
27737e150eSPaolo Bonzini #include "block/block_int.h"
28737e150eSPaolo Bonzini #include "block/blockjob.h"
291de7afc9SPaolo Bonzini #include "qemu/module.h"
307b1b5d19SPaolo Bonzini #include "qapi/qmp/qjson.h"
31bfb197e0SMarkus Armbruster #include "sysemu/block-backend.h"
329c17d615SPaolo Bonzini #include "sysemu/sysemu.h"
33de50a20aSFam Zheng #include "sysemu/qtest.h"
341de7afc9SPaolo Bonzini #include "qemu/notify.h"
35737e150eSPaolo Bonzini #include "block/coroutine.h"
36c13163fbSBenoît Canet #include "block/qapi.h"
37b2023818SLuiz Capitulino #include "qmp-commands.h"
381de7afc9SPaolo Bonzini #include "qemu/timer.h"
39a5ee7bd4SWenchao Xia #include "qapi-event.h"
40fc01f7e7Sbellard 
4171e72a19SJuan Quintela #ifdef CONFIG_BSD
427674e7bfSbellard #include <sys/types.h>
437674e7bfSbellard #include <sys/stat.h>
447674e7bfSbellard #include <sys/ioctl.h>
4572cf2d4fSBlue Swirl #include <sys/queue.h>
46c5e97233Sblueswir1 #ifndef __DragonFly__
477674e7bfSbellard #include <sys/disk.h>
487674e7bfSbellard #endif
49c5e97233Sblueswir1 #endif
507674e7bfSbellard 
5149dc768dSaliguori #ifdef _WIN32
5249dc768dSaliguori #include <windows.h>
5349dc768dSaliguori #endif
5449dc768dSaliguori 
55e4654d2dSFam Zheng struct BdrvDirtyBitmap {
56e4654d2dSFam Zheng     HBitmap *bitmap;
570db6e54aSFam Zheng     char *name;
58*b8e6fb75SJohn Snow     bool disabled;
59e4654d2dSFam Zheng     QLIST_ENTRY(BdrvDirtyBitmap) list;
60e4654d2dSFam Zheng };
61e4654d2dSFam Zheng 
621c9805a3SStefan Hajnoczi #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
631c9805a3SStefan Hajnoczi 
647c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
65f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque);
677c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
68f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
69097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque);
70f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
71f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
72f9f05dc5SKevin Wolf                                          QEMUIOVector *iov);
73f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
74f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
75f9f05dc5SKevin Wolf                                          QEMUIOVector *iov);
76775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
77775aa8b6SKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
78470c0504SStefan Hajnoczi     BdrvRequestFlags flags);
79775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
80775aa8b6SKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
81f08f2ddaSStefan Hajnoczi     BdrvRequestFlags flags);
827c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
83b2a61371SStefan Hajnoczi                                          int64_t sector_num,
84b2a61371SStefan Hajnoczi                                          QEMUIOVector *qiov,
85b2a61371SStefan Hajnoczi                                          int nb_sectors,
86d20d9b7cSPaolo Bonzini                                          BdrvRequestFlags flags,
87097310b5SMarkus Armbruster                                          BlockCompletionFunc *cb,
88b2a61371SStefan Hajnoczi                                          void *opaque,
898c5873d6SStefan Hajnoczi                                          bool is_write);
90b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque);
91621f0589SKevin Wolf static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
92aa7bfbffSPeter Lieven     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
93ec530c81Sbellard 
941b7bdbc1SStefan Hajnoczi static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
951b7bdbc1SStefan Hajnoczi     QTAILQ_HEAD_INITIALIZER(bdrv_states);
967ee930d0Sblueswir1 
97dc364f4cSBenoît Canet static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
98dc364f4cSBenoît Canet     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
99dc364f4cSBenoît Canet 
1008a22f02aSStefan Hajnoczi static QLIST_HEAD(, BlockDriver) bdrv_drivers =
1018a22f02aSStefan Hajnoczi     QLIST_HEAD_INITIALIZER(bdrv_drivers);
102ea2384d3Sbellard 
103c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
104c4237dfaSVladimir Sementsov-Ogievskiy                            int nr_sectors);
105c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
106c4237dfaSVladimir Sementsov-Ogievskiy                              int nr_sectors);
107eb852011SMarkus Armbruster /* If non-zero, use only whitelisted block drivers */
108eb852011SMarkus Armbruster static int use_bdrv_whitelist;
109eb852011SMarkus Armbruster 
1109e0b22f4SStefan Hajnoczi #ifdef _WIN32
1119e0b22f4SStefan Hajnoczi static int is_windows_drive_prefix(const char *filename)
1129e0b22f4SStefan Hajnoczi {
1139e0b22f4SStefan Hajnoczi     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
1149e0b22f4SStefan Hajnoczi              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
1159e0b22f4SStefan Hajnoczi             filename[1] == ':');
1169e0b22f4SStefan Hajnoczi }
1179e0b22f4SStefan Hajnoczi 
1189e0b22f4SStefan Hajnoczi int is_windows_drive(const char *filename)
1199e0b22f4SStefan Hajnoczi {
1209e0b22f4SStefan Hajnoczi     if (is_windows_drive_prefix(filename) &&
1219e0b22f4SStefan Hajnoczi         filename[2] == '\0')
1229e0b22f4SStefan Hajnoczi         return 1;
1239e0b22f4SStefan Hajnoczi     if (strstart(filename, "\\\\.\\", NULL) ||
1249e0b22f4SStefan Hajnoczi         strstart(filename, "//./", NULL))
1259e0b22f4SStefan Hajnoczi         return 1;
1269e0b22f4SStefan Hajnoczi     return 0;
1279e0b22f4SStefan Hajnoczi }
1289e0b22f4SStefan Hajnoczi #endif
1299e0b22f4SStefan Hajnoczi 
1300563e191SZhi Yong Wu /* throttling disk I/O limits */
131cc0681c4SBenoît Canet void bdrv_set_io_limits(BlockDriverState *bs,
132cc0681c4SBenoît Canet                         ThrottleConfig *cfg)
133cc0681c4SBenoît Canet {
134cc0681c4SBenoît Canet     int i;
135cc0681c4SBenoît Canet 
136cc0681c4SBenoît Canet     throttle_config(&bs->throttle_state, cfg);
137cc0681c4SBenoît Canet 
138cc0681c4SBenoît Canet     for (i = 0; i < 2; i++) {
139cc0681c4SBenoît Canet         qemu_co_enter_next(&bs->throttled_reqs[i]);
140cc0681c4SBenoît Canet     }
141cc0681c4SBenoît Canet }
142cc0681c4SBenoît Canet 
143cc0681c4SBenoît Canet /* this function drain all the throttled IOs */
144cc0681c4SBenoît Canet static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
145cc0681c4SBenoît Canet {
146cc0681c4SBenoît Canet     bool drained = false;
147cc0681c4SBenoît Canet     bool enabled = bs->io_limits_enabled;
148cc0681c4SBenoît Canet     int i;
149cc0681c4SBenoît Canet 
150cc0681c4SBenoît Canet     bs->io_limits_enabled = false;
151cc0681c4SBenoît Canet 
152cc0681c4SBenoît Canet     for (i = 0; i < 2; i++) {
153cc0681c4SBenoît Canet         while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
154cc0681c4SBenoît Canet             drained = true;
155cc0681c4SBenoît Canet         }
156cc0681c4SBenoît Canet     }
157cc0681c4SBenoît Canet 
158cc0681c4SBenoît Canet     bs->io_limits_enabled = enabled;
159cc0681c4SBenoît Canet 
160cc0681c4SBenoît Canet     return drained;
161cc0681c4SBenoît Canet }
162cc0681c4SBenoît Canet 
16398f90dbaSZhi Yong Wu void bdrv_io_limits_disable(BlockDriverState *bs)
16498f90dbaSZhi Yong Wu {
16598f90dbaSZhi Yong Wu     bs->io_limits_enabled = false;
16698f90dbaSZhi Yong Wu 
167cc0681c4SBenoît Canet     bdrv_start_throttled_reqs(bs);
16898f90dbaSZhi Yong Wu 
169cc0681c4SBenoît Canet     throttle_destroy(&bs->throttle_state);
17098f90dbaSZhi Yong Wu }
17198f90dbaSZhi Yong Wu 
172cc0681c4SBenoît Canet static void bdrv_throttle_read_timer_cb(void *opaque)
1730563e191SZhi Yong Wu {
1740563e191SZhi Yong Wu     BlockDriverState *bs = opaque;
175cc0681c4SBenoît Canet     qemu_co_enter_next(&bs->throttled_reqs[0]);
1760563e191SZhi Yong Wu }
1770563e191SZhi Yong Wu 
178cc0681c4SBenoît Canet static void bdrv_throttle_write_timer_cb(void *opaque)
179cc0681c4SBenoît Canet {
180cc0681c4SBenoît Canet     BlockDriverState *bs = opaque;
181cc0681c4SBenoît Canet     qemu_co_enter_next(&bs->throttled_reqs[1]);
182cc0681c4SBenoît Canet }
183cc0681c4SBenoît Canet 
184cc0681c4SBenoît Canet /* should be called before bdrv_set_io_limits if a limit is set */
1850563e191SZhi Yong Wu void bdrv_io_limits_enable(BlockDriverState *bs)
1860563e191SZhi Yong Wu {
187de50a20aSFam Zheng     int clock_type = QEMU_CLOCK_REALTIME;
188de50a20aSFam Zheng 
189de50a20aSFam Zheng     if (qtest_enabled()) {
190de50a20aSFam Zheng         /* For testing block IO throttling only */
191de50a20aSFam Zheng         clock_type = QEMU_CLOCK_VIRTUAL;
192de50a20aSFam Zheng     }
193cc0681c4SBenoît Canet     assert(!bs->io_limits_enabled);
194cc0681c4SBenoît Canet     throttle_init(&bs->throttle_state,
19513af91ebSStefan Hajnoczi                   bdrv_get_aio_context(bs),
196de50a20aSFam Zheng                   clock_type,
197cc0681c4SBenoît Canet                   bdrv_throttle_read_timer_cb,
198cc0681c4SBenoît Canet                   bdrv_throttle_write_timer_cb,
199cc0681c4SBenoît Canet                   bs);
2000563e191SZhi Yong Wu     bs->io_limits_enabled = true;
2010563e191SZhi Yong Wu }
2020563e191SZhi Yong Wu 
203cc0681c4SBenoît Canet /* This function makes an IO wait if needed
204cc0681c4SBenoît Canet  *
205cc0681c4SBenoît Canet  * @nb_sectors: the number of sectors of the IO
206cc0681c4SBenoît Canet  * @is_write:   is the IO a write
20798f90dbaSZhi Yong Wu  */
208cc0681c4SBenoît Canet static void bdrv_io_limits_intercept(BlockDriverState *bs,
209d5103588SKevin Wolf                                      unsigned int bytes,
210cc0681c4SBenoît Canet                                      bool is_write)
211cc0681c4SBenoît Canet {
212cc0681c4SBenoît Canet     /* does this io must wait */
213cc0681c4SBenoît Canet     bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
21498f90dbaSZhi Yong Wu 
215cc0681c4SBenoît Canet     /* if must wait or any request of this type throttled queue the IO */
216cc0681c4SBenoît Canet     if (must_wait ||
217cc0681c4SBenoît Canet         !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
218cc0681c4SBenoît Canet         qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
21998f90dbaSZhi Yong Wu     }
22098f90dbaSZhi Yong Wu 
221cc0681c4SBenoît Canet     /* the IO will be executed, do the accounting */
222d5103588SKevin Wolf     throttle_account(&bs->throttle_state, is_write, bytes);
223d5103588SKevin Wolf 
224cc0681c4SBenoît Canet 
225cc0681c4SBenoît Canet     /* if the next request must wait -> do nothing */
226cc0681c4SBenoît Canet     if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
227cc0681c4SBenoît Canet         return;
228cc0681c4SBenoît Canet     }
229cc0681c4SBenoît Canet 
230cc0681c4SBenoît Canet     /* else queue next request for execution */
231cc0681c4SBenoît Canet     qemu_co_queue_next(&bs->throttled_reqs[is_write]);
23298f90dbaSZhi Yong Wu }
23398f90dbaSZhi Yong Wu 
234339064d5SKevin Wolf size_t bdrv_opt_mem_align(BlockDriverState *bs)
235339064d5SKevin Wolf {
236339064d5SKevin Wolf     if (!bs || !bs->drv) {
237339064d5SKevin Wolf         /* 4k should be on the safe side */
238339064d5SKevin Wolf         return 4096;
239339064d5SKevin Wolf     }
240339064d5SKevin Wolf 
241339064d5SKevin Wolf     return bs->bl.opt_mem_alignment;
242339064d5SKevin Wolf }
243339064d5SKevin Wolf 
2449e0b22f4SStefan Hajnoczi /* check if the path starts with "<protocol>:" */
2455c98415bSMax Reitz int path_has_protocol(const char *path)
2469e0b22f4SStefan Hajnoczi {
247947995c0SPaolo Bonzini     const char *p;
248947995c0SPaolo Bonzini 
2499e0b22f4SStefan Hajnoczi #ifdef _WIN32
2509e0b22f4SStefan Hajnoczi     if (is_windows_drive(path) ||
2519e0b22f4SStefan Hajnoczi         is_windows_drive_prefix(path)) {
2529e0b22f4SStefan Hajnoczi         return 0;
2539e0b22f4SStefan Hajnoczi     }
254947995c0SPaolo Bonzini     p = path + strcspn(path, ":/\\");
255947995c0SPaolo Bonzini #else
256947995c0SPaolo Bonzini     p = path + strcspn(path, ":/");
2579e0b22f4SStefan Hajnoczi #endif
2589e0b22f4SStefan Hajnoczi 
259947995c0SPaolo Bonzini     return *p == ':';
2609e0b22f4SStefan Hajnoczi }
2619e0b22f4SStefan Hajnoczi 
26283f64091Sbellard int path_is_absolute(const char *path)
26383f64091Sbellard {
26421664424Sbellard #ifdef _WIN32
26521664424Sbellard     /* specific case for names like: "\\.\d:" */
266f53f4da9SPaolo Bonzini     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
26721664424Sbellard         return 1;
268f53f4da9SPaolo Bonzini     }
269f53f4da9SPaolo Bonzini     return (*path == '/' || *path == '\\');
2703b9f94e1Sbellard #else
271f53f4da9SPaolo Bonzini     return (*path == '/');
2723b9f94e1Sbellard #endif
27383f64091Sbellard }
27483f64091Sbellard 
27583f64091Sbellard /* if filename is absolute, just copy it to dest. Otherwise, build a
27683f64091Sbellard    path to it by considering it is relative to base_path. URL are
27783f64091Sbellard    supported. */
27883f64091Sbellard void path_combine(char *dest, int dest_size,
27983f64091Sbellard                   const char *base_path,
28083f64091Sbellard                   const char *filename)
28183f64091Sbellard {
28283f64091Sbellard     const char *p, *p1;
28383f64091Sbellard     int len;
28483f64091Sbellard 
28583f64091Sbellard     if (dest_size <= 0)
28683f64091Sbellard         return;
28783f64091Sbellard     if (path_is_absolute(filename)) {
28883f64091Sbellard         pstrcpy(dest, dest_size, filename);
28983f64091Sbellard     } else {
29083f64091Sbellard         p = strchr(base_path, ':');
29183f64091Sbellard         if (p)
29283f64091Sbellard             p++;
29383f64091Sbellard         else
29483f64091Sbellard             p = base_path;
2953b9f94e1Sbellard         p1 = strrchr(base_path, '/');
2963b9f94e1Sbellard #ifdef _WIN32
2973b9f94e1Sbellard         {
2983b9f94e1Sbellard             const char *p2;
2993b9f94e1Sbellard             p2 = strrchr(base_path, '\\');
3003b9f94e1Sbellard             if (!p1 || p2 > p1)
3013b9f94e1Sbellard                 p1 = p2;
3023b9f94e1Sbellard         }
3033b9f94e1Sbellard #endif
30483f64091Sbellard         if (p1)
30583f64091Sbellard             p1++;
30683f64091Sbellard         else
30783f64091Sbellard             p1 = base_path;
30883f64091Sbellard         if (p1 > p)
30983f64091Sbellard             p = p1;
31083f64091Sbellard         len = p - base_path;
31183f64091Sbellard         if (len > dest_size - 1)
31283f64091Sbellard             len = dest_size - 1;
31383f64091Sbellard         memcpy(dest, base_path, len);
31483f64091Sbellard         dest[len] = '\0';
31583f64091Sbellard         pstrcat(dest, dest_size, filename);
31683f64091Sbellard     }
31783f64091Sbellard }
31883f64091Sbellard 
3190a82855aSMax Reitz void bdrv_get_full_backing_filename_from_filename(const char *backed,
3200a82855aSMax Reitz                                                   const char *backing,
3219f07429eSMax Reitz                                                   char *dest, size_t sz,
3229f07429eSMax Reitz                                                   Error **errp)
3230a82855aSMax Reitz {
3249f07429eSMax Reitz     if (backing[0] == '\0' || path_has_protocol(backing) ||
3259f07429eSMax Reitz         path_is_absolute(backing))
3269f07429eSMax Reitz     {
3270a82855aSMax Reitz         pstrcpy(dest, sz, backing);
3289f07429eSMax Reitz     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
3299f07429eSMax Reitz         error_setg(errp, "Cannot use relative backing file names for '%s'",
3309f07429eSMax Reitz                    backed);
3310a82855aSMax Reitz     } else {
3320a82855aSMax Reitz         path_combine(dest, sz, backed, backing);
3330a82855aSMax Reitz     }
3340a82855aSMax Reitz }
3350a82855aSMax Reitz 
3369f07429eSMax Reitz void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
3379f07429eSMax Reitz                                     Error **errp)
338dc5a1371SPaolo Bonzini {
3399f07429eSMax Reitz     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
3409f07429eSMax Reitz 
3419f07429eSMax Reitz     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
3429f07429eSMax Reitz                                                  dest, sz, errp);
343dc5a1371SPaolo Bonzini }
344dc5a1371SPaolo Bonzini 
3455efa9d5aSAnthony Liguori void bdrv_register(BlockDriver *bdrv)
346ea2384d3Sbellard {
3478c5873d6SStefan Hajnoczi     /* Block drivers without coroutine functions need emulation */
3488c5873d6SStefan Hajnoczi     if (!bdrv->bdrv_co_readv) {
349f9f05dc5SKevin Wolf         bdrv->bdrv_co_readv = bdrv_co_readv_em;
350f9f05dc5SKevin Wolf         bdrv->bdrv_co_writev = bdrv_co_writev_em;
351f9f05dc5SKevin Wolf 
352f8c35c1dSStefan Hajnoczi         /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
353f8c35c1dSStefan Hajnoczi          * the block driver lacks aio we need to emulate that too.
354f8c35c1dSStefan Hajnoczi          */
355f9f05dc5SKevin Wolf         if (!bdrv->bdrv_aio_readv) {
35683f64091Sbellard             /* add AIO emulation layer */
357f141eafeSaliguori             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
358f141eafeSaliguori             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
35983f64091Sbellard         }
360f9f05dc5SKevin Wolf     }
361b2e12bc6SChristoph Hellwig 
3628a22f02aSStefan Hajnoczi     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
363ea2384d3Sbellard }
364b338082bSbellard 
3657f06d47eSMarkus Armbruster BlockDriverState *bdrv_new_root(void)
366fc01f7e7Sbellard {
3677f06d47eSMarkus Armbruster     BlockDriverState *bs = bdrv_new();
368e4e9986bSMarkus Armbruster 
369e4e9986bSMarkus Armbruster     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
370e4e9986bSMarkus Armbruster     return bs;
371e4e9986bSMarkus Armbruster }
372e4e9986bSMarkus Armbruster 
373e4e9986bSMarkus Armbruster BlockDriverState *bdrv_new(void)
374e4e9986bSMarkus Armbruster {
375e4e9986bSMarkus Armbruster     BlockDriverState *bs;
376e4e9986bSMarkus Armbruster     int i;
377e4e9986bSMarkus Armbruster 
3785839e53bSMarkus Armbruster     bs = g_new0(BlockDriverState, 1);
379e4654d2dSFam Zheng     QLIST_INIT(&bs->dirty_bitmaps);
380fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
381fbe40ff7SFam Zheng         QLIST_INIT(&bs->op_blockers[i]);
382fbe40ff7SFam Zheng     }
38328a7282aSLuiz Capitulino     bdrv_iostatus_disable(bs);
384d7d512f6SPaolo Bonzini     notifier_list_init(&bs->close_notifiers);
385d616b224SStefan Hajnoczi     notifier_with_return_list_init(&bs->before_write_notifiers);
386cc0681c4SBenoît Canet     qemu_co_queue_init(&bs->throttled_reqs[0]);
387cc0681c4SBenoît Canet     qemu_co_queue_init(&bs->throttled_reqs[1]);
3889fcb0251SFam Zheng     bs->refcnt = 1;
389dcd04228SStefan Hajnoczi     bs->aio_context = qemu_get_aio_context();
390d7d512f6SPaolo Bonzini 
391b338082bSbellard     return bs;
392b338082bSbellard }
393b338082bSbellard 
394d7d512f6SPaolo Bonzini void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
395d7d512f6SPaolo Bonzini {
396d7d512f6SPaolo Bonzini     notifier_list_add(&bs->close_notifiers, notify);
397d7d512f6SPaolo Bonzini }
398d7d512f6SPaolo Bonzini 
399ea2384d3Sbellard BlockDriver *bdrv_find_format(const char *format_name)
400ea2384d3Sbellard {
401ea2384d3Sbellard     BlockDriver *drv1;
4028a22f02aSStefan Hajnoczi     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
4038a22f02aSStefan Hajnoczi         if (!strcmp(drv1->format_name, format_name)) {
404ea2384d3Sbellard             return drv1;
405ea2384d3Sbellard         }
4068a22f02aSStefan Hajnoczi     }
407ea2384d3Sbellard     return NULL;
408ea2384d3Sbellard }
409ea2384d3Sbellard 
410b64ec4e4SFam Zheng static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
411eb852011SMarkus Armbruster {
412b64ec4e4SFam Zheng     static const char *whitelist_rw[] = {
413b64ec4e4SFam Zheng         CONFIG_BDRV_RW_WHITELIST
414b64ec4e4SFam Zheng     };
415b64ec4e4SFam Zheng     static const char *whitelist_ro[] = {
416b64ec4e4SFam Zheng         CONFIG_BDRV_RO_WHITELIST
417eb852011SMarkus Armbruster     };
418eb852011SMarkus Armbruster     const char **p;
419eb852011SMarkus Armbruster 
420b64ec4e4SFam Zheng     if (!whitelist_rw[0] && !whitelist_ro[0]) {
421eb852011SMarkus Armbruster         return 1;               /* no whitelist, anything goes */
422b64ec4e4SFam Zheng     }
423eb852011SMarkus Armbruster 
424b64ec4e4SFam Zheng     for (p = whitelist_rw; *p; p++) {
425eb852011SMarkus Armbruster         if (!strcmp(drv->format_name, *p)) {
426eb852011SMarkus Armbruster             return 1;
427eb852011SMarkus Armbruster         }
428eb852011SMarkus Armbruster     }
429b64ec4e4SFam Zheng     if (read_only) {
430b64ec4e4SFam Zheng         for (p = whitelist_ro; *p; p++) {
431b64ec4e4SFam Zheng             if (!strcmp(drv->format_name, *p)) {
432b64ec4e4SFam Zheng                 return 1;
433b64ec4e4SFam Zheng             }
434b64ec4e4SFam Zheng         }
435b64ec4e4SFam Zheng     }
436eb852011SMarkus Armbruster     return 0;
437eb852011SMarkus Armbruster }
438eb852011SMarkus Armbruster 
439b64ec4e4SFam Zheng BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
440b64ec4e4SFam Zheng                                           bool read_only)
441eb852011SMarkus Armbruster {
442eb852011SMarkus Armbruster     BlockDriver *drv = bdrv_find_format(format_name);
443b64ec4e4SFam Zheng     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
444eb852011SMarkus Armbruster }
445eb852011SMarkus Armbruster 
4465b7e1542SZhi Yong Wu typedef struct CreateCo {
4475b7e1542SZhi Yong Wu     BlockDriver *drv;
4485b7e1542SZhi Yong Wu     char *filename;
44983d0521aSChunyan Liu     QemuOpts *opts;
4505b7e1542SZhi Yong Wu     int ret;
451cc84d90fSMax Reitz     Error *err;
4525b7e1542SZhi Yong Wu } CreateCo;
4535b7e1542SZhi Yong Wu 
4545b7e1542SZhi Yong Wu static void coroutine_fn bdrv_create_co_entry(void *opaque)
4555b7e1542SZhi Yong Wu {
456cc84d90fSMax Reitz     Error *local_err = NULL;
457cc84d90fSMax Reitz     int ret;
458cc84d90fSMax Reitz 
4595b7e1542SZhi Yong Wu     CreateCo *cco = opaque;
4605b7e1542SZhi Yong Wu     assert(cco->drv);
4615b7e1542SZhi Yong Wu 
462c282e1fdSChunyan Liu     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
46384d18f06SMarkus Armbruster     if (local_err) {
464cc84d90fSMax Reitz         error_propagate(&cco->err, local_err);
465cc84d90fSMax Reitz     }
466cc84d90fSMax Reitz     cco->ret = ret;
4675b7e1542SZhi Yong Wu }
4685b7e1542SZhi Yong Wu 
4690e7e1989SKevin Wolf int bdrv_create(BlockDriver *drv, const char* filename,
47083d0521aSChunyan Liu                 QemuOpts *opts, Error **errp)
471ea2384d3Sbellard {
4725b7e1542SZhi Yong Wu     int ret;
4730e7e1989SKevin Wolf 
4745b7e1542SZhi Yong Wu     Coroutine *co;
4755b7e1542SZhi Yong Wu     CreateCo cco = {
4765b7e1542SZhi Yong Wu         .drv = drv,
4775b7e1542SZhi Yong Wu         .filename = g_strdup(filename),
47883d0521aSChunyan Liu         .opts = opts,
4795b7e1542SZhi Yong Wu         .ret = NOT_DONE,
480cc84d90fSMax Reitz         .err = NULL,
4815b7e1542SZhi Yong Wu     };
4825b7e1542SZhi Yong Wu 
483c282e1fdSChunyan Liu     if (!drv->bdrv_create) {
484cc84d90fSMax Reitz         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
48580168bffSLuiz Capitulino         ret = -ENOTSUP;
48680168bffSLuiz Capitulino         goto out;
4875b7e1542SZhi Yong Wu     }
4885b7e1542SZhi Yong Wu 
4895b7e1542SZhi Yong Wu     if (qemu_in_coroutine()) {
4905b7e1542SZhi Yong Wu         /* Fast-path if already in coroutine context */
4915b7e1542SZhi Yong Wu         bdrv_create_co_entry(&cco);
4925b7e1542SZhi Yong Wu     } else {
4935b7e1542SZhi Yong Wu         co = qemu_coroutine_create(bdrv_create_co_entry);
4945b7e1542SZhi Yong Wu         qemu_coroutine_enter(co, &cco);
4955b7e1542SZhi Yong Wu         while (cco.ret == NOT_DONE) {
496b47ec2c4SPaolo Bonzini             aio_poll(qemu_get_aio_context(), true);
4975b7e1542SZhi Yong Wu         }
4985b7e1542SZhi Yong Wu     }
4995b7e1542SZhi Yong Wu 
5005b7e1542SZhi Yong Wu     ret = cco.ret;
501cc84d90fSMax Reitz     if (ret < 0) {
50284d18f06SMarkus Armbruster         if (cco.err) {
503cc84d90fSMax Reitz             error_propagate(errp, cco.err);
504cc84d90fSMax Reitz         } else {
505cc84d90fSMax Reitz             error_setg_errno(errp, -ret, "Could not create image");
506cc84d90fSMax Reitz         }
507cc84d90fSMax Reitz     }
5085b7e1542SZhi Yong Wu 
50980168bffSLuiz Capitulino out:
51080168bffSLuiz Capitulino     g_free(cco.filename);
5115b7e1542SZhi Yong Wu     return ret;
512ea2384d3Sbellard }
513ea2384d3Sbellard 
514c282e1fdSChunyan Liu int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
51584a12e66SChristoph Hellwig {
51684a12e66SChristoph Hellwig     BlockDriver *drv;
517cc84d90fSMax Reitz     Error *local_err = NULL;
518cc84d90fSMax Reitz     int ret;
51984a12e66SChristoph Hellwig 
520b65a5e12SMax Reitz     drv = bdrv_find_protocol(filename, true, errp);
52184a12e66SChristoph Hellwig     if (drv == NULL) {
52216905d71SStefan Hajnoczi         return -ENOENT;
52384a12e66SChristoph Hellwig     }
52484a12e66SChristoph Hellwig 
525c282e1fdSChunyan Liu     ret = bdrv_create(drv, filename, opts, &local_err);
52684d18f06SMarkus Armbruster     if (local_err) {
527cc84d90fSMax Reitz         error_propagate(errp, local_err);
528cc84d90fSMax Reitz     }
529cc84d90fSMax Reitz     return ret;
53084a12e66SChristoph Hellwig }
53184a12e66SChristoph Hellwig 
5323baca891SKevin Wolf void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
533d34682cdSKevin Wolf {
534d34682cdSKevin Wolf     BlockDriver *drv = bs->drv;
5353baca891SKevin Wolf     Error *local_err = NULL;
536d34682cdSKevin Wolf 
537d34682cdSKevin Wolf     memset(&bs->bl, 0, sizeof(bs->bl));
538d34682cdSKevin Wolf 
539466ad822SKevin Wolf     if (!drv) {
5403baca891SKevin Wolf         return;
541466ad822SKevin Wolf     }
542466ad822SKevin Wolf 
543466ad822SKevin Wolf     /* Take some limits from the children as a default */
544466ad822SKevin Wolf     if (bs->file) {
5453baca891SKevin Wolf         bdrv_refresh_limits(bs->file, &local_err);
5463baca891SKevin Wolf         if (local_err) {
5473baca891SKevin Wolf             error_propagate(errp, local_err);
5483baca891SKevin Wolf             return;
5493baca891SKevin Wolf         }
550466ad822SKevin Wolf         bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
5512647fab5SPeter Lieven         bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
552339064d5SKevin Wolf         bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
553339064d5SKevin Wolf     } else {
554339064d5SKevin Wolf         bs->bl.opt_mem_alignment = 512;
555466ad822SKevin Wolf     }
556466ad822SKevin Wolf 
557466ad822SKevin Wolf     if (bs->backing_hd) {
5583baca891SKevin Wolf         bdrv_refresh_limits(bs->backing_hd, &local_err);
5593baca891SKevin Wolf         if (local_err) {
5603baca891SKevin Wolf             error_propagate(errp, local_err);
5613baca891SKevin Wolf             return;
5623baca891SKevin Wolf         }
563466ad822SKevin Wolf         bs->bl.opt_transfer_length =
564466ad822SKevin Wolf             MAX(bs->bl.opt_transfer_length,
565466ad822SKevin Wolf                 bs->backing_hd->bl.opt_transfer_length);
5662647fab5SPeter Lieven         bs->bl.max_transfer_length =
5672647fab5SPeter Lieven             MIN_NON_ZERO(bs->bl.max_transfer_length,
5682647fab5SPeter Lieven                          bs->backing_hd->bl.max_transfer_length);
569339064d5SKevin Wolf         bs->bl.opt_mem_alignment =
570339064d5SKevin Wolf             MAX(bs->bl.opt_mem_alignment,
571339064d5SKevin Wolf                 bs->backing_hd->bl.opt_mem_alignment);
572466ad822SKevin Wolf     }
573466ad822SKevin Wolf 
574466ad822SKevin Wolf     /* Then let the driver override it */
575466ad822SKevin Wolf     if (drv->bdrv_refresh_limits) {
5763baca891SKevin Wolf         drv->bdrv_refresh_limits(bs, errp);
577d34682cdSKevin Wolf     }
578d34682cdSKevin Wolf }
579d34682cdSKevin Wolf 
580892b7de8SEkaterina Tumanova /**
581892b7de8SEkaterina Tumanova  * Try to get @bs's logical and physical block size.
582892b7de8SEkaterina Tumanova  * On success, store them in @bsz struct and return 0.
583892b7de8SEkaterina Tumanova  * On failure return -errno.
584892b7de8SEkaterina Tumanova  * @bs must not be empty.
585892b7de8SEkaterina Tumanova  */
586892b7de8SEkaterina Tumanova int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
587892b7de8SEkaterina Tumanova {
588892b7de8SEkaterina Tumanova     BlockDriver *drv = bs->drv;
589892b7de8SEkaterina Tumanova 
590892b7de8SEkaterina Tumanova     if (drv && drv->bdrv_probe_blocksizes) {
591892b7de8SEkaterina Tumanova         return drv->bdrv_probe_blocksizes(bs, bsz);
592892b7de8SEkaterina Tumanova     }
593892b7de8SEkaterina Tumanova 
594892b7de8SEkaterina Tumanova     return -ENOTSUP;
595892b7de8SEkaterina Tumanova }
596892b7de8SEkaterina Tumanova 
597892b7de8SEkaterina Tumanova /**
598892b7de8SEkaterina Tumanova  * Try to get @bs's geometry (cyls, heads, sectors).
599892b7de8SEkaterina Tumanova  * On success, store them in @geo struct and return 0.
600892b7de8SEkaterina Tumanova  * On failure return -errno.
601892b7de8SEkaterina Tumanova  * @bs must not be empty.
602892b7de8SEkaterina Tumanova  */
603892b7de8SEkaterina Tumanova int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
604892b7de8SEkaterina Tumanova {
605892b7de8SEkaterina Tumanova     BlockDriver *drv = bs->drv;
606892b7de8SEkaterina Tumanova 
607892b7de8SEkaterina Tumanova     if (drv && drv->bdrv_probe_geometry) {
608892b7de8SEkaterina Tumanova         return drv->bdrv_probe_geometry(bs, geo);
609892b7de8SEkaterina Tumanova     }
610892b7de8SEkaterina Tumanova 
611892b7de8SEkaterina Tumanova     return -ENOTSUP;
612892b7de8SEkaterina Tumanova }
613892b7de8SEkaterina Tumanova 
614eba25057SJim Meyering /*
615eba25057SJim Meyering  * Create a uniquely-named empty temporary file.
616eba25057SJim Meyering  * Return 0 upon success, otherwise a negative errno value.
617eba25057SJim Meyering  */
618eba25057SJim Meyering int get_tmp_filename(char *filename, int size)
619eba25057SJim Meyering {
620d5249393Sbellard #ifdef _WIN32
6213b9f94e1Sbellard     char temp_dir[MAX_PATH];
622eba25057SJim Meyering     /* GetTempFileName requires that its output buffer (4th param)
623eba25057SJim Meyering        have length MAX_PATH or greater.  */
624eba25057SJim Meyering     assert(size >= MAX_PATH);
625eba25057SJim Meyering     return (GetTempPath(MAX_PATH, temp_dir)
626eba25057SJim Meyering             && GetTempFileName(temp_dir, "qem", 0, filename)
627eba25057SJim Meyering             ? 0 : -GetLastError());
628d5249393Sbellard #else
629ea2384d3Sbellard     int fd;
6307ccfb2ebSblueswir1     const char *tmpdir;
6310badc1eeSaurel32     tmpdir = getenv("TMPDIR");
63269bef793SAmit Shah     if (!tmpdir) {
63369bef793SAmit Shah         tmpdir = "/var/tmp";
63469bef793SAmit Shah     }
635eba25057SJim Meyering     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
636eba25057SJim Meyering         return -EOVERFLOW;
637ea2384d3Sbellard     }
638eba25057SJim Meyering     fd = mkstemp(filename);
639fe235a06SDunrong Huang     if (fd < 0) {
640fe235a06SDunrong Huang         return -errno;
641fe235a06SDunrong Huang     }
642fe235a06SDunrong Huang     if (close(fd) != 0) {
643fe235a06SDunrong Huang         unlink(filename);
644eba25057SJim Meyering         return -errno;
645eba25057SJim Meyering     }
646eba25057SJim Meyering     return 0;
647d5249393Sbellard #endif
648eba25057SJim Meyering }
649ea2384d3Sbellard 
650f3a5d3f8SChristoph Hellwig /*
651f3a5d3f8SChristoph Hellwig  * Detect host devices. By convention, /dev/cdrom[N] is always
652f3a5d3f8SChristoph Hellwig  * recognized as a host CDROM.
653f3a5d3f8SChristoph Hellwig  */
654f3a5d3f8SChristoph Hellwig static BlockDriver *find_hdev_driver(const char *filename)
655f3a5d3f8SChristoph Hellwig {
656508c7cb3SChristoph Hellwig     int score_max = 0, score;
657508c7cb3SChristoph Hellwig     BlockDriver *drv = NULL, *d;
658f3a5d3f8SChristoph Hellwig 
6598a22f02aSStefan Hajnoczi     QLIST_FOREACH(d, &bdrv_drivers, list) {
660508c7cb3SChristoph Hellwig         if (d->bdrv_probe_device) {
661508c7cb3SChristoph Hellwig             score = d->bdrv_probe_device(filename);
662508c7cb3SChristoph Hellwig             if (score > score_max) {
663508c7cb3SChristoph Hellwig                 score_max = score;
664508c7cb3SChristoph Hellwig                 drv = d;
665f3a5d3f8SChristoph Hellwig             }
666508c7cb3SChristoph Hellwig         }
667f3a5d3f8SChristoph Hellwig     }
668f3a5d3f8SChristoph Hellwig 
669508c7cb3SChristoph Hellwig     return drv;
670f3a5d3f8SChristoph Hellwig }
671f3a5d3f8SChristoph Hellwig 
67298289620SKevin Wolf BlockDriver *bdrv_find_protocol(const char *filename,
673b65a5e12SMax Reitz                                 bool allow_protocol_prefix,
674b65a5e12SMax Reitz                                 Error **errp)
67584a12e66SChristoph Hellwig {
67684a12e66SChristoph Hellwig     BlockDriver *drv1;
67784a12e66SChristoph Hellwig     char protocol[128];
67884a12e66SChristoph Hellwig     int len;
67984a12e66SChristoph Hellwig     const char *p;
68084a12e66SChristoph Hellwig 
68166f82ceeSKevin Wolf     /* TODO Drivers without bdrv_file_open must be specified explicitly */
68266f82ceeSKevin Wolf 
68339508e7aSChristoph Hellwig     /*
68439508e7aSChristoph Hellwig      * XXX(hch): we really should not let host device detection
68539508e7aSChristoph Hellwig      * override an explicit protocol specification, but moving this
68639508e7aSChristoph Hellwig      * later breaks access to device names with colons in them.
68739508e7aSChristoph Hellwig      * Thanks to the brain-dead persistent naming schemes on udev-
68839508e7aSChristoph Hellwig      * based Linux systems those actually are quite common.
68939508e7aSChristoph Hellwig      */
69084a12e66SChristoph Hellwig     drv1 = find_hdev_driver(filename);
69139508e7aSChristoph Hellwig     if (drv1) {
69284a12e66SChristoph Hellwig         return drv1;
69384a12e66SChristoph Hellwig     }
69439508e7aSChristoph Hellwig 
69598289620SKevin Wolf     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
696ef810437SMax Reitz         return &bdrv_file;
69739508e7aSChristoph Hellwig     }
69898289620SKevin Wolf 
6999e0b22f4SStefan Hajnoczi     p = strchr(filename, ':');
7009e0b22f4SStefan Hajnoczi     assert(p != NULL);
70184a12e66SChristoph Hellwig     len = p - filename;
70284a12e66SChristoph Hellwig     if (len > sizeof(protocol) - 1)
70384a12e66SChristoph Hellwig         len = sizeof(protocol) - 1;
70484a12e66SChristoph Hellwig     memcpy(protocol, filename, len);
70584a12e66SChristoph Hellwig     protocol[len] = '\0';
70684a12e66SChristoph Hellwig     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
70784a12e66SChristoph Hellwig         if (drv1->protocol_name &&
70884a12e66SChristoph Hellwig             !strcmp(drv1->protocol_name, protocol)) {
70984a12e66SChristoph Hellwig             return drv1;
71084a12e66SChristoph Hellwig         }
71184a12e66SChristoph Hellwig     }
712b65a5e12SMax Reitz 
713b65a5e12SMax Reitz     error_setg(errp, "Unknown protocol '%s'", protocol);
71484a12e66SChristoph Hellwig     return NULL;
71584a12e66SChristoph Hellwig }
71684a12e66SChristoph Hellwig 
717c6684249SMarkus Armbruster /*
718c6684249SMarkus Armbruster  * Guess image format by probing its contents.
719c6684249SMarkus Armbruster  * This is not a good idea when your image is raw (CVE-2008-2004), but
720c6684249SMarkus Armbruster  * we do it anyway for backward compatibility.
721c6684249SMarkus Armbruster  *
722c6684249SMarkus Armbruster  * @buf         contains the image's first @buf_size bytes.
7237cddd372SKevin Wolf  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
7247cddd372SKevin Wolf  *              but can be smaller if the image file is smaller)
725c6684249SMarkus Armbruster  * @filename    is its filename.
726c6684249SMarkus Armbruster  *
727c6684249SMarkus Armbruster  * For all block drivers, call the bdrv_probe() method to get its
728c6684249SMarkus Armbruster  * probing score.
729c6684249SMarkus Armbruster  * Return the first block driver with the highest probing score.
730c6684249SMarkus Armbruster  */
73138f3ef57SKevin Wolf BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
732c6684249SMarkus Armbruster                             const char *filename)
733c6684249SMarkus Armbruster {
734c6684249SMarkus Armbruster     int score_max = 0, score;
735c6684249SMarkus Armbruster     BlockDriver *drv = NULL, *d;
736c6684249SMarkus Armbruster 
737c6684249SMarkus Armbruster     QLIST_FOREACH(d, &bdrv_drivers, list) {
738c6684249SMarkus Armbruster         if (d->bdrv_probe) {
739c6684249SMarkus Armbruster             score = d->bdrv_probe(buf, buf_size, filename);
740c6684249SMarkus Armbruster             if (score > score_max) {
741c6684249SMarkus Armbruster                 score_max = score;
742c6684249SMarkus Armbruster                 drv = d;
743c6684249SMarkus Armbruster             }
744c6684249SMarkus Armbruster         }
745c6684249SMarkus Armbruster     }
746c6684249SMarkus Armbruster 
747c6684249SMarkus Armbruster     return drv;
748c6684249SMarkus Armbruster }
749c6684249SMarkus Armbruster 
750f500a6d3SKevin Wolf static int find_image_format(BlockDriverState *bs, const char *filename,
75134b5d2c6SMax Reitz                              BlockDriver **pdrv, Error **errp)
752ea2384d3Sbellard {
753c6684249SMarkus Armbruster     BlockDriver *drv;
7547cddd372SKevin Wolf     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
755f500a6d3SKevin Wolf     int ret = 0;
756f8ea0b00SNicholas Bellinger 
75708a00559SKevin Wolf     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
7588e895599SPaolo Bonzini     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
759ef810437SMax Reitz         *pdrv = &bdrv_raw;
760c98ac35dSStefan Weil         return ret;
7611a396859SNicholas A. Bellinger     }
762f8ea0b00SNicholas Bellinger 
76383f64091Sbellard     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
764ea2384d3Sbellard     if (ret < 0) {
76534b5d2c6SMax Reitz         error_setg_errno(errp, -ret, "Could not read image for determining its "
76634b5d2c6SMax Reitz                          "format");
767c98ac35dSStefan Weil         *pdrv = NULL;
768c98ac35dSStefan Weil         return ret;
769ea2384d3Sbellard     }
770ea2384d3Sbellard 
771c6684249SMarkus Armbruster     drv = bdrv_probe_all(buf, ret, filename);
772c98ac35dSStefan Weil     if (!drv) {
77334b5d2c6SMax Reitz         error_setg(errp, "Could not determine image format: No compatible "
77434b5d2c6SMax Reitz                    "driver found");
775c98ac35dSStefan Weil         ret = -ENOENT;
776c98ac35dSStefan Weil     }
777c98ac35dSStefan Weil     *pdrv = drv;
778c98ac35dSStefan Weil     return ret;
779ea2384d3Sbellard }
780ea2384d3Sbellard 
78151762288SStefan Hajnoczi /**
78251762288SStefan Hajnoczi  * Set the current 'total_sectors' value
78365a9bb25SMarkus Armbruster  * Return 0 on success, -errno on error.
78451762288SStefan Hajnoczi  */
78551762288SStefan Hajnoczi static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
78651762288SStefan Hajnoczi {
78751762288SStefan Hajnoczi     BlockDriver *drv = bs->drv;
78851762288SStefan Hajnoczi 
789396759adSNicholas Bellinger     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
790396759adSNicholas Bellinger     if (bs->sg)
791396759adSNicholas Bellinger         return 0;
792396759adSNicholas Bellinger 
79351762288SStefan Hajnoczi     /* query actual device if possible, otherwise just trust the hint */
79451762288SStefan Hajnoczi     if (drv->bdrv_getlength) {
79551762288SStefan Hajnoczi         int64_t length = drv->bdrv_getlength(bs);
79651762288SStefan Hajnoczi         if (length < 0) {
79751762288SStefan Hajnoczi             return length;
79851762288SStefan Hajnoczi         }
7997e382003SFam Zheng         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
80051762288SStefan Hajnoczi     }
80151762288SStefan Hajnoczi 
80251762288SStefan Hajnoczi     bs->total_sectors = hint;
80351762288SStefan Hajnoczi     return 0;
80451762288SStefan Hajnoczi }
80551762288SStefan Hajnoczi 
806c3993cdcSStefan Hajnoczi /**
8079e8f1835SPaolo Bonzini  * Set open flags for a given discard mode
8089e8f1835SPaolo Bonzini  *
8099e8f1835SPaolo Bonzini  * Return 0 on success, -1 if the discard mode was invalid.
8109e8f1835SPaolo Bonzini  */
8119e8f1835SPaolo Bonzini int bdrv_parse_discard_flags(const char *mode, int *flags)
8129e8f1835SPaolo Bonzini {
8139e8f1835SPaolo Bonzini     *flags &= ~BDRV_O_UNMAP;
8149e8f1835SPaolo Bonzini 
8159e8f1835SPaolo Bonzini     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
8169e8f1835SPaolo Bonzini         /* do nothing */
8179e8f1835SPaolo Bonzini     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
8189e8f1835SPaolo Bonzini         *flags |= BDRV_O_UNMAP;
8199e8f1835SPaolo Bonzini     } else {
8209e8f1835SPaolo Bonzini         return -1;
8219e8f1835SPaolo Bonzini     }
8229e8f1835SPaolo Bonzini 
8239e8f1835SPaolo Bonzini     return 0;
8249e8f1835SPaolo Bonzini }
8259e8f1835SPaolo Bonzini 
8269e8f1835SPaolo Bonzini /**
827c3993cdcSStefan Hajnoczi  * Set open flags for a given cache mode
828c3993cdcSStefan Hajnoczi  *
829c3993cdcSStefan Hajnoczi  * Return 0 on success, -1 if the cache mode was invalid.
830c3993cdcSStefan Hajnoczi  */
831c3993cdcSStefan Hajnoczi int bdrv_parse_cache_flags(const char *mode, int *flags)
832c3993cdcSStefan Hajnoczi {
833c3993cdcSStefan Hajnoczi     *flags &= ~BDRV_O_CACHE_MASK;
834c3993cdcSStefan Hajnoczi 
835c3993cdcSStefan Hajnoczi     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
836c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
83792196b2fSStefan Hajnoczi     } else if (!strcmp(mode, "directsync")) {
83892196b2fSStefan Hajnoczi         *flags |= BDRV_O_NOCACHE;
839c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "writeback")) {
840c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_CACHE_WB;
841c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "unsafe")) {
842c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_CACHE_WB;
843c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_NO_FLUSH;
844c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "writethrough")) {
845c3993cdcSStefan Hajnoczi         /* this is the default */
846c3993cdcSStefan Hajnoczi     } else {
847c3993cdcSStefan Hajnoczi         return -1;
848c3993cdcSStefan Hajnoczi     }
849c3993cdcSStefan Hajnoczi 
850c3993cdcSStefan Hajnoczi     return 0;
851c3993cdcSStefan Hajnoczi }
852c3993cdcSStefan Hajnoczi 
85353fec9d3SStefan Hajnoczi /**
85453fec9d3SStefan Hajnoczi  * The copy-on-read flag is actually a reference count so multiple users may
85553fec9d3SStefan Hajnoczi  * use the feature without worrying about clobbering its previous state.
85653fec9d3SStefan Hajnoczi  * Copy-on-read stays enabled until all users have called to disable it.
85753fec9d3SStefan Hajnoczi  */
85853fec9d3SStefan Hajnoczi void bdrv_enable_copy_on_read(BlockDriverState *bs)
85953fec9d3SStefan Hajnoczi {
86053fec9d3SStefan Hajnoczi     bs->copy_on_read++;
86153fec9d3SStefan Hajnoczi }
86253fec9d3SStefan Hajnoczi 
86353fec9d3SStefan Hajnoczi void bdrv_disable_copy_on_read(BlockDriverState *bs)
86453fec9d3SStefan Hajnoczi {
86553fec9d3SStefan Hajnoczi     assert(bs->copy_on_read > 0);
86653fec9d3SStefan Hajnoczi     bs->copy_on_read--;
86753fec9d3SStefan Hajnoczi }
86853fec9d3SStefan Hajnoczi 
8690b50cc88SKevin Wolf /*
870b1e6fc08SKevin Wolf  * Returns the flags that a temporary snapshot should get, based on the
871b1e6fc08SKevin Wolf  * originally requested flags (the originally requested image will have flags
872b1e6fc08SKevin Wolf  * like a backing file)
873b1e6fc08SKevin Wolf  */
874b1e6fc08SKevin Wolf static int bdrv_temp_snapshot_flags(int flags)
875b1e6fc08SKevin Wolf {
876b1e6fc08SKevin Wolf     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
877b1e6fc08SKevin Wolf }
878b1e6fc08SKevin Wolf 
879b1e6fc08SKevin Wolf /*
8800b50cc88SKevin Wolf  * Returns the flags that bs->file should get, based on the given flags for
8810b50cc88SKevin Wolf  * the parent BDS
8820b50cc88SKevin Wolf  */
8830b50cc88SKevin Wolf static int bdrv_inherited_flags(int flags)
8840b50cc88SKevin Wolf {
8850b50cc88SKevin Wolf     /* Enable protocol handling, disable format probing for bs->file */
8860b50cc88SKevin Wolf     flags |= BDRV_O_PROTOCOL;
8870b50cc88SKevin Wolf 
8880b50cc88SKevin Wolf     /* Our block drivers take care to send flushes and respect unmap policy,
8890b50cc88SKevin Wolf      * so we can enable both unconditionally on lower layers. */
8900b50cc88SKevin Wolf     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
8910b50cc88SKevin Wolf 
8920b50cc88SKevin Wolf     /* Clear flags that only apply to the top layer */
8935669b44dSKevin Wolf     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
8940b50cc88SKevin Wolf 
8950b50cc88SKevin Wolf     return flags;
8960b50cc88SKevin Wolf }
8970b50cc88SKevin Wolf 
898317fc44eSKevin Wolf /*
899317fc44eSKevin Wolf  * Returns the flags that bs->backing_hd should get, based on the given flags
900317fc44eSKevin Wolf  * for the parent BDS
901317fc44eSKevin Wolf  */
902317fc44eSKevin Wolf static int bdrv_backing_flags(int flags)
903317fc44eSKevin Wolf {
904317fc44eSKevin Wolf     /* backing files always opened read-only */
905317fc44eSKevin Wolf     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
906317fc44eSKevin Wolf 
907317fc44eSKevin Wolf     /* snapshot=on is handled on the top layer */
9088bfea15dSKevin Wolf     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
909317fc44eSKevin Wolf 
910317fc44eSKevin Wolf     return flags;
911317fc44eSKevin Wolf }
912317fc44eSKevin Wolf 
9137b272452SKevin Wolf static int bdrv_open_flags(BlockDriverState *bs, int flags)
9147b272452SKevin Wolf {
9157b272452SKevin Wolf     int open_flags = flags | BDRV_O_CACHE_WB;
9167b272452SKevin Wolf 
9177b272452SKevin Wolf     /*
9187b272452SKevin Wolf      * Clear flags that are internal to the block layer before opening the
9197b272452SKevin Wolf      * image.
9207b272452SKevin Wolf      */
92120cca275SKevin Wolf     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
9227b272452SKevin Wolf 
9237b272452SKevin Wolf     /*
9247b272452SKevin Wolf      * Snapshots should be writable.
9257b272452SKevin Wolf      */
9268bfea15dSKevin Wolf     if (flags & BDRV_O_TEMPORARY) {
9277b272452SKevin Wolf         open_flags |= BDRV_O_RDWR;
9287b272452SKevin Wolf     }
9297b272452SKevin Wolf 
9307b272452SKevin Wolf     return open_flags;
9317b272452SKevin Wolf }
9327b272452SKevin Wolf 
933636ea370SKevin Wolf static void bdrv_assign_node_name(BlockDriverState *bs,
9346913c0c2SBenoît Canet                                   const char *node_name,
9356913c0c2SBenoît Canet                                   Error **errp)
9366913c0c2SBenoît Canet {
9376913c0c2SBenoît Canet     if (!node_name) {
938636ea370SKevin Wolf         return;
9396913c0c2SBenoît Canet     }
9406913c0c2SBenoît Canet 
9419aebf3b8SKevin Wolf     /* Check for empty string or invalid characters */
942f5bebbbbSMarkus Armbruster     if (!id_wellformed(node_name)) {
9439aebf3b8SKevin Wolf         error_setg(errp, "Invalid node name");
944636ea370SKevin Wolf         return;
9456913c0c2SBenoît Canet     }
9466913c0c2SBenoît Canet 
9470c5e94eeSBenoît Canet     /* takes care of avoiding namespaces collisions */
9487f06d47eSMarkus Armbruster     if (blk_by_name(node_name)) {
9490c5e94eeSBenoît Canet         error_setg(errp, "node-name=%s is conflicting with a device id",
9500c5e94eeSBenoît Canet                    node_name);
951636ea370SKevin Wolf         return;
9520c5e94eeSBenoît Canet     }
9530c5e94eeSBenoît Canet 
9546913c0c2SBenoît Canet     /* takes care of avoiding duplicates node names */
9556913c0c2SBenoît Canet     if (bdrv_find_node(node_name)) {
9566913c0c2SBenoît Canet         error_setg(errp, "Duplicate node name");
957636ea370SKevin Wolf         return;
9586913c0c2SBenoît Canet     }
9596913c0c2SBenoît Canet 
9606913c0c2SBenoît Canet     /* copy node name into the bs and insert it into the graph list */
9616913c0c2SBenoît Canet     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
9626913c0c2SBenoît Canet     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
9636913c0c2SBenoît Canet }
9646913c0c2SBenoît Canet 
965b6ce07aaSKevin Wolf /*
96657915332SKevin Wolf  * Common part for opening disk images and files
967b6ad491aSKevin Wolf  *
968b6ad491aSKevin Wolf  * Removes all processed options from *options.
96957915332SKevin Wolf  */
970f500a6d3SKevin Wolf static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
97134b5d2c6SMax Reitz     QDict *options, int flags, BlockDriver *drv, Error **errp)
97257915332SKevin Wolf {
97357915332SKevin Wolf     int ret, open_flags;
974035fccdfSKevin Wolf     const char *filename;
9756913c0c2SBenoît Canet     const char *node_name = NULL;
97634b5d2c6SMax Reitz     Error *local_err = NULL;
97757915332SKevin Wolf 
97857915332SKevin Wolf     assert(drv != NULL);
9796405875cSPaolo Bonzini     assert(bs->file == NULL);
980707ff828SKevin Wolf     assert(options != NULL && bs->options != options);
98157915332SKevin Wolf 
98245673671SKevin Wolf     if (file != NULL) {
98345673671SKevin Wolf         filename = file->filename;
98445673671SKevin Wolf     } else {
98545673671SKevin Wolf         filename = qdict_get_try_str(options, "filename");
98645673671SKevin Wolf     }
98745673671SKevin Wolf 
988765003dbSKevin Wolf     if (drv->bdrv_needs_filename && !filename) {
989765003dbSKevin Wolf         error_setg(errp, "The '%s' block driver requires a file name",
990765003dbSKevin Wolf                    drv->format_name);
991765003dbSKevin Wolf         return -EINVAL;
992765003dbSKevin Wolf     }
993765003dbSKevin Wolf 
99445673671SKevin Wolf     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
99528dcee10SStefan Hajnoczi 
9966913c0c2SBenoît Canet     node_name = qdict_get_try_str(options, "node-name");
997636ea370SKevin Wolf     bdrv_assign_node_name(bs, node_name, &local_err);
9980fb6395cSMarkus Armbruster     if (local_err) {
999636ea370SKevin Wolf         error_propagate(errp, local_err);
1000636ea370SKevin Wolf         return -EINVAL;
10016913c0c2SBenoît Canet     }
10026913c0c2SBenoît Canet     qdict_del(options, "node-name");
10036913c0c2SBenoît Canet 
10045d186eb0SKevin Wolf     /* bdrv_open() with directly using a protocol as drv. This layer is already
10055d186eb0SKevin Wolf      * opened, so assign it to bs (while file becomes a closed BlockDriverState)
10065d186eb0SKevin Wolf      * and return immediately. */
10075d186eb0SKevin Wolf     if (file != NULL && drv->bdrv_file_open) {
10085d186eb0SKevin Wolf         bdrv_swap(file, bs);
10095d186eb0SKevin Wolf         return 0;
10105d186eb0SKevin Wolf     }
10115d186eb0SKevin Wolf 
101257915332SKevin Wolf     bs->open_flags = flags;
10131b7fd729SPaolo Bonzini     bs->guest_block_size = 512;
1014c25f53b0SPaolo Bonzini     bs->request_alignment = 512;
10150d51b4deSAsias He     bs->zero_beyond_eof = true;
1016b64ec4e4SFam Zheng     open_flags = bdrv_open_flags(bs, flags);
1017b64ec4e4SFam Zheng     bs->read_only = !(open_flags & BDRV_O_RDWR);
1018b64ec4e4SFam Zheng 
1019b64ec4e4SFam Zheng     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
10208f94a6e4SKevin Wolf         error_setg(errp,
10218f94a6e4SKevin Wolf                    !bs->read_only && bdrv_is_whitelisted(drv, true)
10228f94a6e4SKevin Wolf                         ? "Driver '%s' can only be used for read-only devices"
10238f94a6e4SKevin Wolf                         : "Driver '%s' is not whitelisted",
10248f94a6e4SKevin Wolf                    drv->format_name);
1025b64ec4e4SFam Zheng         return -ENOTSUP;
1026b64ec4e4SFam Zheng     }
102757915332SKevin Wolf 
102853fec9d3SStefan Hajnoczi     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
10290ebd24e0SKevin Wolf     if (flags & BDRV_O_COPY_ON_READ) {
10300ebd24e0SKevin Wolf         if (!bs->read_only) {
103153fec9d3SStefan Hajnoczi             bdrv_enable_copy_on_read(bs);
10320ebd24e0SKevin Wolf         } else {
10330ebd24e0SKevin Wolf             error_setg(errp, "Can't use copy-on-read on read-only device");
10340ebd24e0SKevin Wolf             return -EINVAL;
10350ebd24e0SKevin Wolf         }
103653fec9d3SStefan Hajnoczi     }
103753fec9d3SStefan Hajnoczi 
1038c2ad1b0cSKevin Wolf     if (filename != NULL) {
103957915332SKevin Wolf         pstrcpy(bs->filename, sizeof(bs->filename), filename);
1040c2ad1b0cSKevin Wolf     } else {
1041c2ad1b0cSKevin Wolf         bs->filename[0] = '\0';
1042c2ad1b0cSKevin Wolf     }
104391af7014SMax Reitz     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
104457915332SKevin Wolf 
104557915332SKevin Wolf     bs->drv = drv;
10467267c094SAnthony Liguori     bs->opaque = g_malloc0(drv->instance_size);
104757915332SKevin Wolf 
104803f541bdSStefan Hajnoczi     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
1049e7c63796SStefan Hajnoczi 
105066f82ceeSKevin Wolf     /* Open the image, either directly or using a protocol */
105166f82ceeSKevin Wolf     if (drv->bdrv_file_open) {
10525d186eb0SKevin Wolf         assert(file == NULL);
1053030be321SBenoît Canet         assert(!drv->bdrv_needs_filename || filename != NULL);
105434b5d2c6SMax Reitz         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1055f500a6d3SKevin Wolf     } else {
10562af5ef70SKevin Wolf         if (file == NULL) {
105734b5d2c6SMax Reitz             error_setg(errp, "Can't use '%s' as a block driver for the "
105834b5d2c6SMax Reitz                        "protocol level", drv->format_name);
10592af5ef70SKevin Wolf             ret = -EINVAL;
10602af5ef70SKevin Wolf             goto free_and_fail;
10612af5ef70SKevin Wolf         }
1062f500a6d3SKevin Wolf         bs->file = file;
106334b5d2c6SMax Reitz         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
106466f82ceeSKevin Wolf     }
106566f82ceeSKevin Wolf 
106657915332SKevin Wolf     if (ret < 0) {
106784d18f06SMarkus Armbruster         if (local_err) {
106834b5d2c6SMax Reitz             error_propagate(errp, local_err);
10692fa9aa59SDunrong Huang         } else if (bs->filename[0]) {
10702fa9aa59SDunrong Huang             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
107134b5d2c6SMax Reitz         } else {
107234b5d2c6SMax Reitz             error_setg_errno(errp, -ret, "Could not open image");
107334b5d2c6SMax Reitz         }
107457915332SKevin Wolf         goto free_and_fail;
107557915332SKevin Wolf     }
107657915332SKevin Wolf 
1077a1f688f4SMarkus Armbruster     if (bs->encrypted) {
1078a1f688f4SMarkus Armbruster         error_report("Encrypted images are deprecated");
1079a1f688f4SMarkus Armbruster         error_printf("Support for them will be removed in a future release.\n"
1080a1f688f4SMarkus Armbruster                      "You can use 'qemu-img convert' to convert your image"
1081a1f688f4SMarkus Armbruster                      " to an unencrypted one.\n");
1082a1f688f4SMarkus Armbruster     }
1083a1f688f4SMarkus Armbruster 
108451762288SStefan Hajnoczi     ret = refresh_total_sectors(bs, bs->total_sectors);
108551762288SStefan Hajnoczi     if (ret < 0) {
108634b5d2c6SMax Reitz         error_setg_errno(errp, -ret, "Could not refresh total sector count");
108751762288SStefan Hajnoczi         goto free_and_fail;
108857915332SKevin Wolf     }
108951762288SStefan Hajnoczi 
10903baca891SKevin Wolf     bdrv_refresh_limits(bs, &local_err);
10913baca891SKevin Wolf     if (local_err) {
10923baca891SKevin Wolf         error_propagate(errp, local_err);
10933baca891SKevin Wolf         ret = -EINVAL;
10943baca891SKevin Wolf         goto free_and_fail;
10953baca891SKevin Wolf     }
10963baca891SKevin Wolf 
1097c25f53b0SPaolo Bonzini     assert(bdrv_opt_mem_align(bs) != 0);
109847ea2de2SKevin Wolf     assert((bs->request_alignment != 0) || bs->sg);
109957915332SKevin Wolf     return 0;
110057915332SKevin Wolf 
110157915332SKevin Wolf free_and_fail:
110266f82ceeSKevin Wolf     bs->file = NULL;
11037267c094SAnthony Liguori     g_free(bs->opaque);
110457915332SKevin Wolf     bs->opaque = NULL;
110557915332SKevin Wolf     bs->drv = NULL;
110657915332SKevin Wolf     return ret;
110757915332SKevin Wolf }
110857915332SKevin Wolf 
11095e5c4f63SKevin Wolf static QDict *parse_json_filename(const char *filename, Error **errp)
11105e5c4f63SKevin Wolf {
11115e5c4f63SKevin Wolf     QObject *options_obj;
11125e5c4f63SKevin Wolf     QDict *options;
11135e5c4f63SKevin Wolf     int ret;
11145e5c4f63SKevin Wolf 
11155e5c4f63SKevin Wolf     ret = strstart(filename, "json:", &filename);
11165e5c4f63SKevin Wolf     assert(ret);
11175e5c4f63SKevin Wolf 
11185e5c4f63SKevin Wolf     options_obj = qobject_from_json(filename);
11195e5c4f63SKevin Wolf     if (!options_obj) {
11205e5c4f63SKevin Wolf         error_setg(errp, "Could not parse the JSON options");
11215e5c4f63SKevin Wolf         return NULL;
11225e5c4f63SKevin Wolf     }
11235e5c4f63SKevin Wolf 
11245e5c4f63SKevin Wolf     if (qobject_type(options_obj) != QTYPE_QDICT) {
11255e5c4f63SKevin Wolf         qobject_decref(options_obj);
11265e5c4f63SKevin Wolf         error_setg(errp, "Invalid JSON object given");
11275e5c4f63SKevin Wolf         return NULL;
11285e5c4f63SKevin Wolf     }
11295e5c4f63SKevin Wolf 
11305e5c4f63SKevin Wolf     options = qobject_to_qdict(options_obj);
11315e5c4f63SKevin Wolf     qdict_flatten(options);
11325e5c4f63SKevin Wolf 
11335e5c4f63SKevin Wolf     return options;
11345e5c4f63SKevin Wolf }
11355e5c4f63SKevin Wolf 
113657915332SKevin Wolf /*
1137f54120ffSKevin Wolf  * Fills in default options for opening images and converts the legacy
1138f54120ffSKevin Wolf  * filename/flags pair to option QDict entries.
1139f54120ffSKevin Wolf  */
11405e5c4f63SKevin Wolf static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
114117b005f1SKevin Wolf                              BlockDriver *drv, Error **errp)
1142f54120ffSKevin Wolf {
11435e5c4f63SKevin Wolf     const char *filename = *pfilename;
1144f54120ffSKevin Wolf     const char *drvname;
1145462f5bcfSKevin Wolf     bool protocol = flags & BDRV_O_PROTOCOL;
1146f54120ffSKevin Wolf     bool parse_filename = false;
1147f54120ffSKevin Wolf     Error *local_err = NULL;
1148f54120ffSKevin Wolf 
11495e5c4f63SKevin Wolf     /* Parse json: pseudo-protocol */
11505e5c4f63SKevin Wolf     if (filename && g_str_has_prefix(filename, "json:")) {
11515e5c4f63SKevin Wolf         QDict *json_options = parse_json_filename(filename, &local_err);
11525e5c4f63SKevin Wolf         if (local_err) {
11535e5c4f63SKevin Wolf             error_propagate(errp, local_err);
11545e5c4f63SKevin Wolf             return -EINVAL;
11555e5c4f63SKevin Wolf         }
11565e5c4f63SKevin Wolf 
11575e5c4f63SKevin Wolf         /* Options given in the filename have lower priority than options
11585e5c4f63SKevin Wolf          * specified directly */
11595e5c4f63SKevin Wolf         qdict_join(*options, json_options, false);
11605e5c4f63SKevin Wolf         QDECREF(json_options);
11615e5c4f63SKevin Wolf         *pfilename = filename = NULL;
11625e5c4f63SKevin Wolf     }
11635e5c4f63SKevin Wolf 
1164f54120ffSKevin Wolf     /* Fetch the file name from the options QDict if necessary */
116517b005f1SKevin Wolf     if (protocol && filename) {
1166f54120ffSKevin Wolf         if (!qdict_haskey(*options, "filename")) {
1167f54120ffSKevin Wolf             qdict_put(*options, "filename", qstring_from_str(filename));
1168f54120ffSKevin Wolf             parse_filename = true;
1169f54120ffSKevin Wolf         } else {
1170f54120ffSKevin Wolf             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1171f54120ffSKevin Wolf                              "the same time");
1172f54120ffSKevin Wolf             return -EINVAL;
1173f54120ffSKevin Wolf         }
1174f54120ffSKevin Wolf     }
1175f54120ffSKevin Wolf 
1176f54120ffSKevin Wolf     /* Find the right block driver */
1177f54120ffSKevin Wolf     filename = qdict_get_try_str(*options, "filename");
1178f54120ffSKevin Wolf     drvname = qdict_get_try_str(*options, "driver");
1179f54120ffSKevin Wolf 
118017b005f1SKevin Wolf     if (drv) {
118117b005f1SKevin Wolf         if (drvname) {
118217b005f1SKevin Wolf             error_setg(errp, "Driver specified twice");
118317b005f1SKevin Wolf             return -EINVAL;
118417b005f1SKevin Wolf         }
118517b005f1SKevin Wolf         drvname = drv->format_name;
118617b005f1SKevin Wolf         qdict_put(*options, "driver", qstring_from_str(drvname));
118717b005f1SKevin Wolf     } else {
118817b005f1SKevin Wolf         if (!drvname && protocol) {
1189f54120ffSKevin Wolf             if (filename) {
1190b65a5e12SMax Reitz                 drv = bdrv_find_protocol(filename, parse_filename, errp);
1191f54120ffSKevin Wolf                 if (!drv) {
1192f54120ffSKevin Wolf                     return -EINVAL;
1193f54120ffSKevin Wolf                 }
1194f54120ffSKevin Wolf 
1195f54120ffSKevin Wolf                 drvname = drv->format_name;
1196f54120ffSKevin Wolf                 qdict_put(*options, "driver", qstring_from_str(drvname));
1197f54120ffSKevin Wolf             } else {
1198f54120ffSKevin Wolf                 error_setg(errp, "Must specify either driver or file");
1199f54120ffSKevin Wolf                 return -EINVAL;
1200f54120ffSKevin Wolf             }
120117b005f1SKevin Wolf         } else if (drvname) {
1202f54120ffSKevin Wolf             drv = bdrv_find_format(drvname);
1203f54120ffSKevin Wolf             if (!drv) {
1204f54120ffSKevin Wolf                 error_setg(errp, "Unknown driver '%s'", drvname);
1205f54120ffSKevin Wolf                 return -ENOENT;
1206f54120ffSKevin Wolf             }
120717b005f1SKevin Wolf         }
120817b005f1SKevin Wolf     }
120917b005f1SKevin Wolf 
121017b005f1SKevin Wolf     assert(drv || !protocol);
1211f54120ffSKevin Wolf 
1212f54120ffSKevin Wolf     /* Driver-specific filename parsing */
121317b005f1SKevin Wolf     if (drv && drv->bdrv_parse_filename && parse_filename) {
1214f54120ffSKevin Wolf         drv->bdrv_parse_filename(filename, *options, &local_err);
1215f54120ffSKevin Wolf         if (local_err) {
1216f54120ffSKevin Wolf             error_propagate(errp, local_err);
1217f54120ffSKevin Wolf             return -EINVAL;
1218f54120ffSKevin Wolf         }
1219f54120ffSKevin Wolf 
1220f54120ffSKevin Wolf         if (!drv->bdrv_needs_filename) {
1221f54120ffSKevin Wolf             qdict_del(*options, "filename");
1222f54120ffSKevin Wolf         }
1223f54120ffSKevin Wolf     }
1224f54120ffSKevin Wolf 
1225f54120ffSKevin Wolf     return 0;
1226f54120ffSKevin Wolf }
1227f54120ffSKevin Wolf 
12288d24cce1SFam Zheng void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
12298d24cce1SFam Zheng {
12308d24cce1SFam Zheng 
1231826b6ca0SFam Zheng     if (bs->backing_hd) {
1232826b6ca0SFam Zheng         assert(bs->backing_blocker);
1233826b6ca0SFam Zheng         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1234826b6ca0SFam Zheng     } else if (backing_hd) {
1235826b6ca0SFam Zheng         error_setg(&bs->backing_blocker,
123681e5f78aSAlberto Garcia                    "node is used as backing hd of '%s'",
123781e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(bs));
1238826b6ca0SFam Zheng     }
1239826b6ca0SFam Zheng 
12408d24cce1SFam Zheng     bs->backing_hd = backing_hd;
12418d24cce1SFam Zheng     if (!backing_hd) {
1242826b6ca0SFam Zheng         error_free(bs->backing_blocker);
1243826b6ca0SFam Zheng         bs->backing_blocker = NULL;
12448d24cce1SFam Zheng         goto out;
12458d24cce1SFam Zheng     }
12468d24cce1SFam Zheng     bs->open_flags &= ~BDRV_O_NO_BACKING;
12478d24cce1SFam Zheng     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
12488d24cce1SFam Zheng     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
12498d24cce1SFam Zheng             backing_hd->drv ? backing_hd->drv->format_name : "");
1250826b6ca0SFam Zheng 
1251826b6ca0SFam Zheng     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1252826b6ca0SFam Zheng     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1253bb00021dSFam Zheng     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1254826b6ca0SFam Zheng                     bs->backing_blocker);
12558d24cce1SFam Zheng out:
12563baca891SKevin Wolf     bdrv_refresh_limits(bs, NULL);
12578d24cce1SFam Zheng }
12588d24cce1SFam Zheng 
125931ca6d07SKevin Wolf /*
126031ca6d07SKevin Wolf  * Opens the backing file for a BlockDriverState if not yet open
126131ca6d07SKevin Wolf  *
126231ca6d07SKevin Wolf  * options is a QDict of options to pass to the block drivers, or NULL for an
126331ca6d07SKevin Wolf  * empty set of options. The reference to the QDict is transferred to this
126431ca6d07SKevin Wolf  * function (even on failure), so if the caller intends to reuse the dictionary,
126531ca6d07SKevin Wolf  * it needs to use QINCREF() before calling bdrv_file_open.
126631ca6d07SKevin Wolf  */
126734b5d2c6SMax Reitz int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
12689156df12SPaolo Bonzini {
12691ba4b6a5SBenoît Canet     char *backing_filename = g_malloc0(PATH_MAX);
1270317fc44eSKevin Wolf     int ret = 0;
12718d24cce1SFam Zheng     BlockDriverState *backing_hd;
127234b5d2c6SMax Reitz     Error *local_err = NULL;
12739156df12SPaolo Bonzini 
12749156df12SPaolo Bonzini     if (bs->backing_hd != NULL) {
127531ca6d07SKevin Wolf         QDECREF(options);
12761ba4b6a5SBenoît Canet         goto free_exit;
12779156df12SPaolo Bonzini     }
12789156df12SPaolo Bonzini 
127931ca6d07SKevin Wolf     /* NULL means an empty set of options */
128031ca6d07SKevin Wolf     if (options == NULL) {
128131ca6d07SKevin Wolf         options = qdict_new();
128231ca6d07SKevin Wolf     }
128331ca6d07SKevin Wolf 
12849156df12SPaolo Bonzini     bs->open_flags &= ~BDRV_O_NO_BACKING;
12851cb6f506SKevin Wolf     if (qdict_haskey(options, "file.filename")) {
12861cb6f506SKevin Wolf         backing_filename[0] = '\0';
12871cb6f506SKevin Wolf     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
128831ca6d07SKevin Wolf         QDECREF(options);
12891ba4b6a5SBenoît Canet         goto free_exit;
1290dbecebddSFam Zheng     } else {
12919f07429eSMax Reitz         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
12929f07429eSMax Reitz                                        &local_err);
12939f07429eSMax Reitz         if (local_err) {
12949f07429eSMax Reitz             ret = -EINVAL;
12959f07429eSMax Reitz             error_propagate(errp, local_err);
12969f07429eSMax Reitz             QDECREF(options);
12979f07429eSMax Reitz             goto free_exit;
12989f07429eSMax Reitz         }
12999156df12SPaolo Bonzini     }
13009156df12SPaolo Bonzini 
13018ee79e70SKevin Wolf     if (!bs->drv || !bs->drv->supports_backing) {
13028ee79e70SKevin Wolf         ret = -EINVAL;
13038ee79e70SKevin Wolf         error_setg(errp, "Driver doesn't support backing files");
13048ee79e70SKevin Wolf         QDECREF(options);
13058ee79e70SKevin Wolf         goto free_exit;
13068ee79e70SKevin Wolf     }
13078ee79e70SKevin Wolf 
1308e4e9986bSMarkus Armbruster     backing_hd = bdrv_new();
13098d24cce1SFam Zheng 
1310c5f6e493SKevin Wolf     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1311c5f6e493SKevin Wolf         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
13129156df12SPaolo Bonzini     }
13139156df12SPaolo Bonzini 
1314f67503e5SMax Reitz     assert(bs->backing_hd == NULL);
13158d24cce1SFam Zheng     ret = bdrv_open(&backing_hd,
1316ddf5636dSMax Reitz                     *backing_filename ? backing_filename : NULL, NULL, options,
1317c5f6e493SKevin Wolf                     bdrv_backing_flags(bs->open_flags), NULL, &local_err);
13189156df12SPaolo Bonzini     if (ret < 0) {
13198d24cce1SFam Zheng         bdrv_unref(backing_hd);
13208d24cce1SFam Zheng         backing_hd = NULL;
13219156df12SPaolo Bonzini         bs->open_flags |= BDRV_O_NO_BACKING;
1322b04b6b6eSFam Zheng         error_setg(errp, "Could not open backing file: %s",
1323b04b6b6eSFam Zheng                    error_get_pretty(local_err));
1324b04b6b6eSFam Zheng         error_free(local_err);
13251ba4b6a5SBenoît Canet         goto free_exit;
13269156df12SPaolo Bonzini     }
13278d24cce1SFam Zheng     bdrv_set_backing_hd(bs, backing_hd);
1328d80ac658SPeter Feiner 
13291ba4b6a5SBenoît Canet free_exit:
13301ba4b6a5SBenoît Canet     g_free(backing_filename);
13311ba4b6a5SBenoît Canet     return ret;
13329156df12SPaolo Bonzini }
13339156df12SPaolo Bonzini 
1334b6ce07aaSKevin Wolf /*
1335da557aacSMax Reitz  * Opens a disk image whose options are given as BlockdevRef in another block
1336da557aacSMax Reitz  * device's options.
1337da557aacSMax Reitz  *
1338da557aacSMax Reitz  * If allow_none is true, no image will be opened if filename is false and no
1339da557aacSMax Reitz  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1340da557aacSMax Reitz  *
1341da557aacSMax Reitz  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1342da557aacSMax Reitz  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1343da557aacSMax Reitz  * itself, all options starting with "${bdref_key}." are considered part of the
1344da557aacSMax Reitz  * BlockdevRef.
1345da557aacSMax Reitz  *
1346da557aacSMax Reitz  * The BlockdevRef will be removed from the options QDict.
1347f67503e5SMax Reitz  *
1348f67503e5SMax Reitz  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1349da557aacSMax Reitz  */
1350da557aacSMax Reitz int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1351da557aacSMax Reitz                     QDict *options, const char *bdref_key, int flags,
1352f7d9fd8cSMax Reitz                     bool allow_none, Error **errp)
1353da557aacSMax Reitz {
1354da557aacSMax Reitz     QDict *image_options;
1355da557aacSMax Reitz     int ret;
1356da557aacSMax Reitz     char *bdref_key_dot;
1357da557aacSMax Reitz     const char *reference;
1358da557aacSMax Reitz 
1359f67503e5SMax Reitz     assert(pbs);
1360f67503e5SMax Reitz     assert(*pbs == NULL);
1361f67503e5SMax Reitz 
1362da557aacSMax Reitz     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1363da557aacSMax Reitz     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1364da557aacSMax Reitz     g_free(bdref_key_dot);
1365da557aacSMax Reitz 
1366da557aacSMax Reitz     reference = qdict_get_try_str(options, bdref_key);
1367da557aacSMax Reitz     if (!filename && !reference && !qdict_size(image_options)) {
1368da557aacSMax Reitz         if (allow_none) {
1369da557aacSMax Reitz             ret = 0;
1370da557aacSMax Reitz         } else {
1371da557aacSMax Reitz             error_setg(errp, "A block device must be specified for \"%s\"",
1372da557aacSMax Reitz                        bdref_key);
1373da557aacSMax Reitz             ret = -EINVAL;
1374da557aacSMax Reitz         }
1375b20e61e0SMarkus Armbruster         QDECREF(image_options);
1376da557aacSMax Reitz         goto done;
1377da557aacSMax Reitz     }
1378da557aacSMax Reitz 
1379f7d9fd8cSMax Reitz     ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1380da557aacSMax Reitz 
1381da557aacSMax Reitz done:
1382da557aacSMax Reitz     qdict_del(options, bdref_key);
1383da557aacSMax Reitz     return ret;
1384da557aacSMax Reitz }
1385da557aacSMax Reitz 
13866b8aeca5SChen Gang int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1387b998875dSKevin Wolf {
1388b998875dSKevin Wolf     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
13891ba4b6a5SBenoît Canet     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1390b998875dSKevin Wolf     int64_t total_size;
139183d0521aSChunyan Liu     QemuOpts *opts = NULL;
1392b998875dSKevin Wolf     QDict *snapshot_options;
1393b998875dSKevin Wolf     BlockDriverState *bs_snapshot;
1394b998875dSKevin Wolf     Error *local_err;
1395b998875dSKevin Wolf     int ret;
1396b998875dSKevin Wolf 
1397b998875dSKevin Wolf     /* if snapshot, we create a temporary backing file and open it
1398b998875dSKevin Wolf        instead of opening 'filename' directly */
1399b998875dSKevin Wolf 
1400b998875dSKevin Wolf     /* Get the required size from the image */
1401f187743aSKevin Wolf     total_size = bdrv_getlength(bs);
1402f187743aSKevin Wolf     if (total_size < 0) {
14036b8aeca5SChen Gang         ret = total_size;
1404f187743aSKevin Wolf         error_setg_errno(errp, -total_size, "Could not get image size");
14051ba4b6a5SBenoît Canet         goto out;
1406f187743aSKevin Wolf     }
1407b998875dSKevin Wolf 
1408b998875dSKevin Wolf     /* Create the temporary image */
14091ba4b6a5SBenoît Canet     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1410b998875dSKevin Wolf     if (ret < 0) {
1411b998875dSKevin Wolf         error_setg_errno(errp, -ret, "Could not get temporary filename");
14121ba4b6a5SBenoît Canet         goto out;
1413b998875dSKevin Wolf     }
1414b998875dSKevin Wolf 
1415ef810437SMax Reitz     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1416c282e1fdSChunyan Liu                             &error_abort);
141739101f25SMarkus Armbruster     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1418ef810437SMax Reitz     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
141983d0521aSChunyan Liu     qemu_opts_del(opts);
1420b998875dSKevin Wolf     if (ret < 0) {
1421b998875dSKevin Wolf         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1422b998875dSKevin Wolf                          "'%s': %s", tmp_filename,
1423b998875dSKevin Wolf                          error_get_pretty(local_err));
1424b998875dSKevin Wolf         error_free(local_err);
14251ba4b6a5SBenoît Canet         goto out;
1426b998875dSKevin Wolf     }
1427b998875dSKevin Wolf 
1428b998875dSKevin Wolf     /* Prepare a new options QDict for the temporary file */
1429b998875dSKevin Wolf     snapshot_options = qdict_new();
1430b998875dSKevin Wolf     qdict_put(snapshot_options, "file.driver",
1431b998875dSKevin Wolf               qstring_from_str("file"));
1432b998875dSKevin Wolf     qdict_put(snapshot_options, "file.filename",
1433b998875dSKevin Wolf               qstring_from_str(tmp_filename));
1434b998875dSKevin Wolf 
1435e4e9986bSMarkus Armbruster     bs_snapshot = bdrv_new();
1436b998875dSKevin Wolf 
1437b998875dSKevin Wolf     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1438ef810437SMax Reitz                     flags, &bdrv_qcow2, &local_err);
1439b998875dSKevin Wolf     if (ret < 0) {
1440b998875dSKevin Wolf         error_propagate(errp, local_err);
14411ba4b6a5SBenoît Canet         goto out;
1442b998875dSKevin Wolf     }
1443b998875dSKevin Wolf 
1444b998875dSKevin Wolf     bdrv_append(bs_snapshot, bs);
14451ba4b6a5SBenoît Canet 
14461ba4b6a5SBenoît Canet out:
14471ba4b6a5SBenoît Canet     g_free(tmp_filename);
14486b8aeca5SChen Gang     return ret;
1449b998875dSKevin Wolf }
1450b998875dSKevin Wolf 
1451da557aacSMax Reitz /*
1452b6ce07aaSKevin Wolf  * Opens a disk image (raw, qcow2, vmdk, ...)
1453de9c0cecSKevin Wolf  *
1454de9c0cecSKevin Wolf  * options is a QDict of options to pass to the block drivers, or NULL for an
1455de9c0cecSKevin Wolf  * empty set of options. The reference to the QDict belongs to the block layer
1456de9c0cecSKevin Wolf  * after the call (even on failure), so if the caller intends to reuse the
1457de9c0cecSKevin Wolf  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1458f67503e5SMax Reitz  *
1459f67503e5SMax Reitz  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1460f67503e5SMax Reitz  * If it is not NULL, the referenced BDS will be reused.
1461ddf5636dSMax Reitz  *
1462ddf5636dSMax Reitz  * The reference parameter may be used to specify an existing block device which
1463ddf5636dSMax Reitz  * should be opened. If specified, neither options nor a filename may be given,
1464ddf5636dSMax Reitz  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1465b6ce07aaSKevin Wolf  */
1466ddf5636dSMax Reitz int bdrv_open(BlockDriverState **pbs, const char *filename,
1467ddf5636dSMax Reitz               const char *reference, QDict *options, int flags,
1468ddf5636dSMax Reitz               BlockDriver *drv, Error **errp)
1469ea2384d3Sbellard {
1470b6ce07aaSKevin Wolf     int ret;
1471f67503e5SMax Reitz     BlockDriverState *file = NULL, *bs;
147274fe54f2SKevin Wolf     const char *drvname;
147334b5d2c6SMax Reitz     Error *local_err = NULL;
1474b1e6fc08SKevin Wolf     int snapshot_flags = 0;
147533e3963eSbellard 
1476f67503e5SMax Reitz     assert(pbs);
1477f67503e5SMax Reitz 
1478ddf5636dSMax Reitz     if (reference) {
1479ddf5636dSMax Reitz         bool options_non_empty = options ? qdict_size(options) : false;
1480ddf5636dSMax Reitz         QDECREF(options);
1481ddf5636dSMax Reitz 
1482ddf5636dSMax Reitz         if (*pbs) {
1483ddf5636dSMax Reitz             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1484ddf5636dSMax Reitz                        "another block device");
1485ddf5636dSMax Reitz             return -EINVAL;
1486ddf5636dSMax Reitz         }
1487ddf5636dSMax Reitz 
1488ddf5636dSMax Reitz         if (filename || options_non_empty) {
1489ddf5636dSMax Reitz             error_setg(errp, "Cannot reference an existing block device with "
1490ddf5636dSMax Reitz                        "additional options or a new filename");
1491ddf5636dSMax Reitz             return -EINVAL;
1492ddf5636dSMax Reitz         }
1493ddf5636dSMax Reitz 
1494ddf5636dSMax Reitz         bs = bdrv_lookup_bs(reference, reference, errp);
1495ddf5636dSMax Reitz         if (!bs) {
1496ddf5636dSMax Reitz             return -ENODEV;
1497ddf5636dSMax Reitz         }
1498ddf5636dSMax Reitz         bdrv_ref(bs);
1499ddf5636dSMax Reitz         *pbs = bs;
1500ddf5636dSMax Reitz         return 0;
1501ddf5636dSMax Reitz     }
1502ddf5636dSMax Reitz 
1503f67503e5SMax Reitz     if (*pbs) {
1504f67503e5SMax Reitz         bs = *pbs;
1505f67503e5SMax Reitz     } else {
1506e4e9986bSMarkus Armbruster         bs = bdrv_new();
1507f67503e5SMax Reitz     }
1508f67503e5SMax Reitz 
1509de9c0cecSKevin Wolf     /* NULL means an empty set of options */
1510de9c0cecSKevin Wolf     if (options == NULL) {
1511de9c0cecSKevin Wolf         options = qdict_new();
1512de9c0cecSKevin Wolf     }
1513de9c0cecSKevin Wolf 
151417b005f1SKevin Wolf     ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1515462f5bcfSKevin Wolf     if (local_err) {
1516462f5bcfSKevin Wolf         goto fail;
1517462f5bcfSKevin Wolf     }
1518462f5bcfSKevin Wolf 
151976c591b0SKevin Wolf     /* Find the right image format driver */
152076c591b0SKevin Wolf     drv = NULL;
152176c591b0SKevin Wolf     drvname = qdict_get_try_str(options, "driver");
152276c591b0SKevin Wolf     if (drvname) {
152376c591b0SKevin Wolf         drv = bdrv_find_format(drvname);
152476c591b0SKevin Wolf         qdict_del(options, "driver");
152576c591b0SKevin Wolf         if (!drv) {
152676c591b0SKevin Wolf             error_setg(errp, "Unknown driver: '%s'", drvname);
152776c591b0SKevin Wolf             ret = -EINVAL;
152876c591b0SKevin Wolf             goto fail;
152976c591b0SKevin Wolf         }
153076c591b0SKevin Wolf     }
153176c591b0SKevin Wolf 
153276c591b0SKevin Wolf     assert(drvname || !(flags & BDRV_O_PROTOCOL));
153376c591b0SKevin Wolf     if (drv && !drv->bdrv_file_open) {
153476c591b0SKevin Wolf         /* If the user explicitly wants a format driver here, we'll need to add
153576c591b0SKevin Wolf          * another layer for the protocol in bs->file */
153676c591b0SKevin Wolf         flags &= ~BDRV_O_PROTOCOL;
153776c591b0SKevin Wolf     }
153876c591b0SKevin Wolf 
1539de9c0cecSKevin Wolf     bs->options = options;
1540b6ad491aSKevin Wolf     options = qdict_clone_shallow(options);
1541de9c0cecSKevin Wolf 
1542f500a6d3SKevin Wolf     /* Open image file without format layer */
1543f4788adcSKevin Wolf     if ((flags & BDRV_O_PROTOCOL) == 0) {
1544be028adcSJeff Cody         if (flags & BDRV_O_RDWR) {
1545be028adcSJeff Cody             flags |= BDRV_O_ALLOW_RDWR;
1546be028adcSJeff Cody         }
1547b1e6fc08SKevin Wolf         if (flags & BDRV_O_SNAPSHOT) {
1548b1e6fc08SKevin Wolf             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1549b1e6fc08SKevin Wolf             flags = bdrv_backing_flags(flags);
1550b1e6fc08SKevin Wolf         }
1551be028adcSJeff Cody 
1552f67503e5SMax Reitz         assert(file == NULL);
1553054963f8SMax Reitz         ret = bdrv_open_image(&file, filename, options, "file",
15540b50cc88SKevin Wolf                               bdrv_inherited_flags(flags),
15550b50cc88SKevin Wolf                               true, &local_err);
1556f500a6d3SKevin Wolf         if (ret < 0) {
15578bfea15dSKevin Wolf             goto fail;
1558f500a6d3SKevin Wolf         }
1559f4788adcSKevin Wolf     }
1560f500a6d3SKevin Wolf 
156176c591b0SKevin Wolf     /* Image format probing */
156238f3ef57SKevin Wolf     bs->probed = !drv;
156376c591b0SKevin Wolf     if (!drv && file) {
156434b5d2c6SMax Reitz         ret = find_image_format(file, filename, &drv, &local_err);
156517b005f1SKevin Wolf         if (ret < 0) {
156617b005f1SKevin Wolf             goto fail;
156717b005f1SKevin Wolf         }
156876c591b0SKevin Wolf     } else if (!drv) {
15692a05cbe4SMax Reitz         error_setg(errp, "Must specify either driver or file");
15702a05cbe4SMax Reitz         ret = -EINVAL;
15718bfea15dSKevin Wolf         goto fail;
15722a05cbe4SMax Reitz     }
1573f500a6d3SKevin Wolf 
1574b6ce07aaSKevin Wolf     /* Open the image */
157534b5d2c6SMax Reitz     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1576b6ce07aaSKevin Wolf     if (ret < 0) {
15778bfea15dSKevin Wolf         goto fail;
15786987307cSChristoph Hellwig     }
15796987307cSChristoph Hellwig 
15802a05cbe4SMax Reitz     if (file && (bs->file != file)) {
15814f6fd349SFam Zheng         bdrv_unref(file);
1582f500a6d3SKevin Wolf         file = NULL;
1583f500a6d3SKevin Wolf     }
1584f500a6d3SKevin Wolf 
1585b6ce07aaSKevin Wolf     /* If there is a backing file, use it */
15869156df12SPaolo Bonzini     if ((flags & BDRV_O_NO_BACKING) == 0) {
158731ca6d07SKevin Wolf         QDict *backing_options;
158831ca6d07SKevin Wolf 
15895726d872SBenoît Canet         qdict_extract_subqdict(options, &backing_options, "backing.");
159034b5d2c6SMax Reitz         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1591b6ce07aaSKevin Wolf         if (ret < 0) {
1592b6ad491aSKevin Wolf             goto close_and_fail;
1593b6ce07aaSKevin Wolf         }
1594b6ce07aaSKevin Wolf     }
1595b6ce07aaSKevin Wolf 
159691af7014SMax Reitz     bdrv_refresh_filename(bs);
159791af7014SMax Reitz 
1598b998875dSKevin Wolf     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1599b998875dSKevin Wolf      * temporary snapshot afterwards. */
1600b1e6fc08SKevin Wolf     if (snapshot_flags) {
16016b8aeca5SChen Gang         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1602b998875dSKevin Wolf         if (local_err) {
1603b998875dSKevin Wolf             goto close_and_fail;
1604b998875dSKevin Wolf         }
1605b998875dSKevin Wolf     }
1606b998875dSKevin Wolf 
1607b6ad491aSKevin Wolf     /* Check if any unknown options were used */
16085acd9d81SMax Reitz     if (options && (qdict_size(options) != 0)) {
1609b6ad491aSKevin Wolf         const QDictEntry *entry = qdict_first(options);
16105acd9d81SMax Reitz         if (flags & BDRV_O_PROTOCOL) {
16115acd9d81SMax Reitz             error_setg(errp, "Block protocol '%s' doesn't support the option "
16125acd9d81SMax Reitz                        "'%s'", drv->format_name, entry->key);
16135acd9d81SMax Reitz         } else {
161434b5d2c6SMax Reitz             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
16155acd9d81SMax Reitz                        "support the option '%s'", drv->format_name,
1616bfb197e0SMarkus Armbruster                        bdrv_get_device_name(bs), entry->key);
16175acd9d81SMax Reitz         }
1618b6ad491aSKevin Wolf 
1619b6ad491aSKevin Wolf         ret = -EINVAL;
1620b6ad491aSKevin Wolf         goto close_and_fail;
1621b6ad491aSKevin Wolf     }
1622b6ad491aSKevin Wolf 
1623b6ce07aaSKevin Wolf     if (!bdrv_key_required(bs)) {
1624a7f53e26SMarkus Armbruster         if (bs->blk) {
1625a7f53e26SMarkus Armbruster             blk_dev_change_media_cb(bs->blk, true);
1626a7f53e26SMarkus Armbruster         }
1627c3adb58fSMarkus Armbruster     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1628c3adb58fSMarkus Armbruster                && !runstate_check(RUN_STATE_INMIGRATE)
1629c3adb58fSMarkus Armbruster                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1630c3adb58fSMarkus Armbruster         error_setg(errp,
1631c3adb58fSMarkus Armbruster                    "Guest must be stopped for opening of encrypted image");
1632c3adb58fSMarkus Armbruster         ret = -EBUSY;
1633c3adb58fSMarkus Armbruster         goto close_and_fail;
1634b6ce07aaSKevin Wolf     }
1635b6ce07aaSKevin Wolf 
1636c3adb58fSMarkus Armbruster     QDECREF(options);
1637f67503e5SMax Reitz     *pbs = bs;
1638b6ce07aaSKevin Wolf     return 0;
1639b6ce07aaSKevin Wolf 
16408bfea15dSKevin Wolf fail:
1641f500a6d3SKevin Wolf     if (file != NULL) {
16424f6fd349SFam Zheng         bdrv_unref(file);
1643f500a6d3SKevin Wolf     }
1644de9c0cecSKevin Wolf     QDECREF(bs->options);
1645b6ad491aSKevin Wolf     QDECREF(options);
1646de9c0cecSKevin Wolf     bs->options = NULL;
1647f67503e5SMax Reitz     if (!*pbs) {
1648f67503e5SMax Reitz         /* If *pbs is NULL, a new BDS has been created in this function and
1649f67503e5SMax Reitz            needs to be freed now. Otherwise, it does not need to be closed,
1650f67503e5SMax Reitz            since it has not really been opened yet. */
1651f67503e5SMax Reitz         bdrv_unref(bs);
1652f67503e5SMax Reitz     }
165384d18f06SMarkus Armbruster     if (local_err) {
165434b5d2c6SMax Reitz         error_propagate(errp, local_err);
165534b5d2c6SMax Reitz     }
1656b6ad491aSKevin Wolf     return ret;
1657de9c0cecSKevin Wolf 
1658b6ad491aSKevin Wolf close_and_fail:
1659f67503e5SMax Reitz     /* See fail path, but now the BDS has to be always closed */
1660f67503e5SMax Reitz     if (*pbs) {
1661b6ad491aSKevin Wolf         bdrv_close(bs);
1662f67503e5SMax Reitz     } else {
1663f67503e5SMax Reitz         bdrv_unref(bs);
1664f67503e5SMax Reitz     }
1665b6ad491aSKevin Wolf     QDECREF(options);
166684d18f06SMarkus Armbruster     if (local_err) {
166734b5d2c6SMax Reitz         error_propagate(errp, local_err);
166834b5d2c6SMax Reitz     }
1669b6ce07aaSKevin Wolf     return ret;
1670b6ce07aaSKevin Wolf }
1671b6ce07aaSKevin Wolf 
1672e971aa12SJeff Cody typedef struct BlockReopenQueueEntry {
1673e971aa12SJeff Cody      bool prepared;
1674e971aa12SJeff Cody      BDRVReopenState state;
1675e971aa12SJeff Cody      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1676e971aa12SJeff Cody } BlockReopenQueueEntry;
1677e971aa12SJeff Cody 
1678e971aa12SJeff Cody /*
1679e971aa12SJeff Cody  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1680e971aa12SJeff Cody  * reopen of multiple devices.
1681e971aa12SJeff Cody  *
1682e971aa12SJeff Cody  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1683e971aa12SJeff Cody  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1684e971aa12SJeff Cody  * be created and initialized. This newly created BlockReopenQueue should be
1685e971aa12SJeff Cody  * passed back in for subsequent calls that are intended to be of the same
1686e971aa12SJeff Cody  * atomic 'set'.
1687e971aa12SJeff Cody  *
1688e971aa12SJeff Cody  * bs is the BlockDriverState to add to the reopen queue.
1689e971aa12SJeff Cody  *
1690e971aa12SJeff Cody  * flags contains the open flags for the associated bs
1691e971aa12SJeff Cody  *
1692e971aa12SJeff Cody  * returns a pointer to bs_queue, which is either the newly allocated
1693e971aa12SJeff Cody  * bs_queue, or the existing bs_queue being used.
1694e971aa12SJeff Cody  *
1695e971aa12SJeff Cody  */
1696e971aa12SJeff Cody BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1697e971aa12SJeff Cody                                     BlockDriverState *bs, int flags)
1698e971aa12SJeff Cody {
1699e971aa12SJeff Cody     assert(bs != NULL);
1700e971aa12SJeff Cody 
1701e971aa12SJeff Cody     BlockReopenQueueEntry *bs_entry;
1702e971aa12SJeff Cody     if (bs_queue == NULL) {
1703e971aa12SJeff Cody         bs_queue = g_new0(BlockReopenQueue, 1);
1704e971aa12SJeff Cody         QSIMPLEQ_INIT(bs_queue);
1705e971aa12SJeff Cody     }
1706e971aa12SJeff Cody 
1707f1f25a2eSKevin Wolf     /* bdrv_open() masks this flag out */
1708f1f25a2eSKevin Wolf     flags &= ~BDRV_O_PROTOCOL;
1709f1f25a2eSKevin Wolf 
1710e971aa12SJeff Cody     if (bs->file) {
1711f1f25a2eSKevin Wolf         bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1712e971aa12SJeff Cody     }
1713e971aa12SJeff Cody 
1714e971aa12SJeff Cody     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1715e971aa12SJeff Cody     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1716e971aa12SJeff Cody 
1717e971aa12SJeff Cody     bs_entry->state.bs = bs;
1718e971aa12SJeff Cody     bs_entry->state.flags = flags;
1719e971aa12SJeff Cody 
1720e971aa12SJeff Cody     return bs_queue;
1721e971aa12SJeff Cody }
1722e971aa12SJeff Cody 
1723e971aa12SJeff Cody /*
1724e971aa12SJeff Cody  * Reopen multiple BlockDriverStates atomically & transactionally.
1725e971aa12SJeff Cody  *
1726e971aa12SJeff Cody  * The queue passed in (bs_queue) must have been built up previous
1727e971aa12SJeff Cody  * via bdrv_reopen_queue().
1728e971aa12SJeff Cody  *
1729e971aa12SJeff Cody  * Reopens all BDS specified in the queue, with the appropriate
1730e971aa12SJeff Cody  * flags.  All devices are prepared for reopen, and failure of any
1731e971aa12SJeff Cody  * device will cause all device changes to be abandonded, and intermediate
1732e971aa12SJeff Cody  * data cleaned up.
1733e971aa12SJeff Cody  *
1734e971aa12SJeff Cody  * If all devices prepare successfully, then the changes are committed
1735e971aa12SJeff Cody  * to all devices.
1736e971aa12SJeff Cody  *
1737e971aa12SJeff Cody  */
1738e971aa12SJeff Cody int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1739e971aa12SJeff Cody {
1740e971aa12SJeff Cody     int ret = -1;
1741e971aa12SJeff Cody     BlockReopenQueueEntry *bs_entry, *next;
1742e971aa12SJeff Cody     Error *local_err = NULL;
1743e971aa12SJeff Cody 
1744e971aa12SJeff Cody     assert(bs_queue != NULL);
1745e971aa12SJeff Cody 
1746e971aa12SJeff Cody     bdrv_drain_all();
1747e971aa12SJeff Cody 
1748e971aa12SJeff Cody     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1749e971aa12SJeff Cody         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1750e971aa12SJeff Cody             error_propagate(errp, local_err);
1751e971aa12SJeff Cody             goto cleanup;
1752e971aa12SJeff Cody         }
1753e971aa12SJeff Cody         bs_entry->prepared = true;
1754e971aa12SJeff Cody     }
1755e971aa12SJeff Cody 
1756e971aa12SJeff Cody     /* If we reach this point, we have success and just need to apply the
1757e971aa12SJeff Cody      * changes
1758e971aa12SJeff Cody      */
1759e971aa12SJeff Cody     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1760e971aa12SJeff Cody         bdrv_reopen_commit(&bs_entry->state);
1761e971aa12SJeff Cody     }
1762e971aa12SJeff Cody 
1763e971aa12SJeff Cody     ret = 0;
1764e971aa12SJeff Cody 
1765e971aa12SJeff Cody cleanup:
1766e971aa12SJeff Cody     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1767e971aa12SJeff Cody         if (ret && bs_entry->prepared) {
1768e971aa12SJeff Cody             bdrv_reopen_abort(&bs_entry->state);
1769e971aa12SJeff Cody         }
1770e971aa12SJeff Cody         g_free(bs_entry);
1771e971aa12SJeff Cody     }
1772e971aa12SJeff Cody     g_free(bs_queue);
1773e971aa12SJeff Cody     return ret;
1774e971aa12SJeff Cody }
1775e971aa12SJeff Cody 
1776e971aa12SJeff Cody 
1777e971aa12SJeff Cody /* Reopen a single BlockDriverState with the specified flags. */
1778e971aa12SJeff Cody int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1779e971aa12SJeff Cody {
1780e971aa12SJeff Cody     int ret = -1;
1781e971aa12SJeff Cody     Error *local_err = NULL;
1782e971aa12SJeff Cody     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1783e971aa12SJeff Cody 
1784e971aa12SJeff Cody     ret = bdrv_reopen_multiple(queue, &local_err);
1785e971aa12SJeff Cody     if (local_err != NULL) {
1786e971aa12SJeff Cody         error_propagate(errp, local_err);
1787e971aa12SJeff Cody     }
1788e971aa12SJeff Cody     return ret;
1789e971aa12SJeff Cody }
1790e971aa12SJeff Cody 
1791e971aa12SJeff Cody 
1792e971aa12SJeff Cody /*
1793e971aa12SJeff Cody  * Prepares a BlockDriverState for reopen. All changes are staged in the
1794e971aa12SJeff Cody  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1795e971aa12SJeff Cody  * the block driver layer .bdrv_reopen_prepare()
1796e971aa12SJeff Cody  *
1797e971aa12SJeff Cody  * bs is the BlockDriverState to reopen
1798e971aa12SJeff Cody  * flags are the new open flags
1799e971aa12SJeff Cody  * queue is the reopen queue
1800e971aa12SJeff Cody  *
1801e971aa12SJeff Cody  * Returns 0 on success, non-zero on error.  On error errp will be set
1802e971aa12SJeff Cody  * as well.
1803e971aa12SJeff Cody  *
1804e971aa12SJeff Cody  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1805e971aa12SJeff Cody  * It is the responsibility of the caller to then call the abort() or
1806e971aa12SJeff Cody  * commit() for any other BDS that have been left in a prepare() state
1807e971aa12SJeff Cody  *
1808e971aa12SJeff Cody  */
1809e971aa12SJeff Cody int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1810e971aa12SJeff Cody                         Error **errp)
1811e971aa12SJeff Cody {
1812e971aa12SJeff Cody     int ret = -1;
1813e971aa12SJeff Cody     Error *local_err = NULL;
1814e971aa12SJeff Cody     BlockDriver *drv;
1815e971aa12SJeff Cody 
1816e971aa12SJeff Cody     assert(reopen_state != NULL);
1817e971aa12SJeff Cody     assert(reopen_state->bs->drv != NULL);
1818e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1819e971aa12SJeff Cody 
1820e971aa12SJeff Cody     /* if we are to stay read-only, do not allow permission change
1821e971aa12SJeff Cody      * to r/w */
1822e971aa12SJeff Cody     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1823e971aa12SJeff Cody         reopen_state->flags & BDRV_O_RDWR) {
182481e5f78aSAlberto Garcia         error_setg(errp, "Node '%s' is read only",
182581e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(reopen_state->bs));
1826e971aa12SJeff Cody         goto error;
1827e971aa12SJeff Cody     }
1828e971aa12SJeff Cody 
1829e971aa12SJeff Cody 
1830e971aa12SJeff Cody     ret = bdrv_flush(reopen_state->bs);
1831e971aa12SJeff Cody     if (ret) {
1832e971aa12SJeff Cody         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1833e971aa12SJeff Cody                   strerror(-ret));
1834e971aa12SJeff Cody         goto error;
1835e971aa12SJeff Cody     }
1836e971aa12SJeff Cody 
1837e971aa12SJeff Cody     if (drv->bdrv_reopen_prepare) {
1838e971aa12SJeff Cody         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1839e971aa12SJeff Cody         if (ret) {
1840e971aa12SJeff Cody             if (local_err != NULL) {
1841e971aa12SJeff Cody                 error_propagate(errp, local_err);
1842e971aa12SJeff Cody             } else {
1843d8b6895fSLuiz Capitulino                 error_setg(errp, "failed while preparing to reopen image '%s'",
1844e971aa12SJeff Cody                            reopen_state->bs->filename);
1845e971aa12SJeff Cody             }
1846e971aa12SJeff Cody             goto error;
1847e971aa12SJeff Cody         }
1848e971aa12SJeff Cody     } else {
1849e971aa12SJeff Cody         /* It is currently mandatory to have a bdrv_reopen_prepare()
1850e971aa12SJeff Cody          * handler for each supported drv. */
185181e5f78aSAlberto Garcia         error_setg(errp, "Block format '%s' used by node '%s' "
185281e5f78aSAlberto Garcia                    "does not support reopening files", drv->format_name,
185381e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(reopen_state->bs));
1854e971aa12SJeff Cody         ret = -1;
1855e971aa12SJeff Cody         goto error;
1856e971aa12SJeff Cody     }
1857e971aa12SJeff Cody 
1858e971aa12SJeff Cody     ret = 0;
1859e971aa12SJeff Cody 
1860e971aa12SJeff Cody error:
1861e971aa12SJeff Cody     return ret;
1862e971aa12SJeff Cody }
1863e971aa12SJeff Cody 
1864e971aa12SJeff Cody /*
1865e971aa12SJeff Cody  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1866e971aa12SJeff Cody  * makes them final by swapping the staging BlockDriverState contents into
1867e971aa12SJeff Cody  * the active BlockDriverState contents.
1868e971aa12SJeff Cody  */
1869e971aa12SJeff Cody void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1870e971aa12SJeff Cody {
1871e971aa12SJeff Cody     BlockDriver *drv;
1872e971aa12SJeff Cody 
1873e971aa12SJeff Cody     assert(reopen_state != NULL);
1874e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1875e971aa12SJeff Cody     assert(drv != NULL);
1876e971aa12SJeff Cody 
1877e971aa12SJeff Cody     /* If there are any driver level actions to take */
1878e971aa12SJeff Cody     if (drv->bdrv_reopen_commit) {
1879e971aa12SJeff Cody         drv->bdrv_reopen_commit(reopen_state);
1880e971aa12SJeff Cody     }
1881e971aa12SJeff Cody 
1882e971aa12SJeff Cody     /* set BDS specific flags now */
1883e971aa12SJeff Cody     reopen_state->bs->open_flags         = reopen_state->flags;
1884e971aa12SJeff Cody     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1885e971aa12SJeff Cody                                               BDRV_O_CACHE_WB);
1886e971aa12SJeff Cody     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1887355ef4acSKevin Wolf 
18883baca891SKevin Wolf     bdrv_refresh_limits(reopen_state->bs, NULL);
1889e971aa12SJeff Cody }
1890e971aa12SJeff Cody 
1891e971aa12SJeff Cody /*
1892e971aa12SJeff Cody  * Abort the reopen, and delete and free the staged changes in
1893e971aa12SJeff Cody  * reopen_state
1894e971aa12SJeff Cody  */
1895e971aa12SJeff Cody void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1896e971aa12SJeff Cody {
1897e971aa12SJeff Cody     BlockDriver *drv;
1898e971aa12SJeff Cody 
1899e971aa12SJeff Cody     assert(reopen_state != NULL);
1900e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1901e971aa12SJeff Cody     assert(drv != NULL);
1902e971aa12SJeff Cody 
1903e971aa12SJeff Cody     if (drv->bdrv_reopen_abort) {
1904e971aa12SJeff Cody         drv->bdrv_reopen_abort(reopen_state);
1905e971aa12SJeff Cody     }
1906e971aa12SJeff Cody }
1907e971aa12SJeff Cody 
1908e971aa12SJeff Cody 
1909fc01f7e7Sbellard void bdrv_close(BlockDriverState *bs)
1910fc01f7e7Sbellard {
191133384421SMax Reitz     BdrvAioNotifier *ban, *ban_next;
191233384421SMax Reitz 
19133e914655SPaolo Bonzini     if (bs->job) {
19143e914655SPaolo Bonzini         block_job_cancel_sync(bs->job);
19153e914655SPaolo Bonzini     }
191658fda173SStefan Hajnoczi     bdrv_drain_all(); /* complete I/O */
191758fda173SStefan Hajnoczi     bdrv_flush(bs);
191858fda173SStefan Hajnoczi     bdrv_drain_all(); /* in case flush left pending I/O */
1919d7d512f6SPaolo Bonzini     notifier_list_notify(&bs->close_notifiers, bs);
19207094f12fSKevin Wolf 
19213cbc002cSPaolo Bonzini     if (bs->drv) {
1922557df6acSStefan Hajnoczi         if (bs->backing_hd) {
1923826b6ca0SFam Zheng             BlockDriverState *backing_hd = bs->backing_hd;
1924826b6ca0SFam Zheng             bdrv_set_backing_hd(bs, NULL);
1925826b6ca0SFam Zheng             bdrv_unref(backing_hd);
1926557df6acSStefan Hajnoczi         }
1927ea2384d3Sbellard         bs->drv->bdrv_close(bs);
19287267c094SAnthony Liguori         g_free(bs->opaque);
1929ea2384d3Sbellard         bs->opaque = NULL;
1930ea2384d3Sbellard         bs->drv = NULL;
193153fec9d3SStefan Hajnoczi         bs->copy_on_read = 0;
1932a275fa42SPaolo Bonzini         bs->backing_file[0] = '\0';
1933a275fa42SPaolo Bonzini         bs->backing_format[0] = '\0';
19346405875cSPaolo Bonzini         bs->total_sectors = 0;
19356405875cSPaolo Bonzini         bs->encrypted = 0;
19366405875cSPaolo Bonzini         bs->valid_key = 0;
19376405875cSPaolo Bonzini         bs->sg = 0;
19380d51b4deSAsias He         bs->zero_beyond_eof = false;
1939de9c0cecSKevin Wolf         QDECREF(bs->options);
1940de9c0cecSKevin Wolf         bs->options = NULL;
194191af7014SMax Reitz         QDECREF(bs->full_open_options);
194291af7014SMax Reitz         bs->full_open_options = NULL;
1943b338082bSbellard 
194466f82ceeSKevin Wolf         if (bs->file != NULL) {
19454f6fd349SFam Zheng             bdrv_unref(bs->file);
19460ac9377dSPaolo Bonzini             bs->file = NULL;
194766f82ceeSKevin Wolf         }
19489ca11154SPavel Hrdina     }
194966f82ceeSKevin Wolf 
1950a7f53e26SMarkus Armbruster     if (bs->blk) {
1951a7f53e26SMarkus Armbruster         blk_dev_change_media_cb(bs->blk, false);
1952a7f53e26SMarkus Armbruster     }
195398f90dbaSZhi Yong Wu 
195498f90dbaSZhi Yong Wu     /*throttling disk I/O limits*/
195598f90dbaSZhi Yong Wu     if (bs->io_limits_enabled) {
195698f90dbaSZhi Yong Wu         bdrv_io_limits_disable(bs);
195798f90dbaSZhi Yong Wu     }
195833384421SMax Reitz 
195933384421SMax Reitz     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
196033384421SMax Reitz         g_free(ban);
196133384421SMax Reitz     }
196233384421SMax Reitz     QLIST_INIT(&bs->aio_notifiers);
1963b338082bSbellard }
1964b338082bSbellard 
19652bc93fedSMORITA Kazutaka void bdrv_close_all(void)
19662bc93fedSMORITA Kazutaka {
19672bc93fedSMORITA Kazutaka     BlockDriverState *bs;
19682bc93fedSMORITA Kazutaka 
1969dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1970ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
1971ed78cda3SStefan Hajnoczi 
1972ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
19732bc93fedSMORITA Kazutaka         bdrv_close(bs);
1974ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
19752bc93fedSMORITA Kazutaka     }
19762bc93fedSMORITA Kazutaka }
19772bc93fedSMORITA Kazutaka 
197888266f5aSStefan Hajnoczi /* Check if any requests are in-flight (including throttled requests) */
197988266f5aSStefan Hajnoczi static bool bdrv_requests_pending(BlockDriverState *bs)
198088266f5aSStefan Hajnoczi {
198188266f5aSStefan Hajnoczi     if (!QLIST_EMPTY(&bs->tracked_requests)) {
198288266f5aSStefan Hajnoczi         return true;
198388266f5aSStefan Hajnoczi     }
1984cc0681c4SBenoît Canet     if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1985cc0681c4SBenoît Canet         return true;
1986cc0681c4SBenoît Canet     }
1987cc0681c4SBenoît Canet     if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
198888266f5aSStefan Hajnoczi         return true;
198988266f5aSStefan Hajnoczi     }
199088266f5aSStefan Hajnoczi     if (bs->file && bdrv_requests_pending(bs->file)) {
199188266f5aSStefan Hajnoczi         return true;
199288266f5aSStefan Hajnoczi     }
199388266f5aSStefan Hajnoczi     if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
199488266f5aSStefan Hajnoczi         return true;
199588266f5aSStefan Hajnoczi     }
199688266f5aSStefan Hajnoczi     return false;
199788266f5aSStefan Hajnoczi }
199888266f5aSStefan Hajnoczi 
19995b98db0aSStefan Hajnoczi static bool bdrv_drain_one(BlockDriverState *bs)
20005b98db0aSStefan Hajnoczi {
20015b98db0aSStefan Hajnoczi     bool bs_busy;
20025b98db0aSStefan Hajnoczi 
20035b98db0aSStefan Hajnoczi     bdrv_flush_io_queue(bs);
20045b98db0aSStefan Hajnoczi     bdrv_start_throttled_reqs(bs);
20055b98db0aSStefan Hajnoczi     bs_busy = bdrv_requests_pending(bs);
20065b98db0aSStefan Hajnoczi     bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
20075b98db0aSStefan Hajnoczi     return bs_busy;
20085b98db0aSStefan Hajnoczi }
20095b98db0aSStefan Hajnoczi 
20105b98db0aSStefan Hajnoczi /*
20115b98db0aSStefan Hajnoczi  * Wait for pending requests to complete on a single BlockDriverState subtree
20125b98db0aSStefan Hajnoczi  *
20135b98db0aSStefan Hajnoczi  * See the warning in bdrv_drain_all().  This function can only be called if
20145b98db0aSStefan Hajnoczi  * you are sure nothing can generate I/O because you have op blockers
20155b98db0aSStefan Hajnoczi  * installed.
20165b98db0aSStefan Hajnoczi  *
20175b98db0aSStefan Hajnoczi  * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
20185b98db0aSStefan Hajnoczi  * AioContext.
20195b98db0aSStefan Hajnoczi  */
20205b98db0aSStefan Hajnoczi void bdrv_drain(BlockDriverState *bs)
20215b98db0aSStefan Hajnoczi {
20225b98db0aSStefan Hajnoczi     while (bdrv_drain_one(bs)) {
20235b98db0aSStefan Hajnoczi         /* Keep iterating */
20245b98db0aSStefan Hajnoczi     }
20255b98db0aSStefan Hajnoczi }
20265b98db0aSStefan Hajnoczi 
2027922453bcSStefan Hajnoczi /*
2028922453bcSStefan Hajnoczi  * Wait for pending requests to complete across all BlockDriverStates
2029922453bcSStefan Hajnoczi  *
2030922453bcSStefan Hajnoczi  * This function does not flush data to disk, use bdrv_flush_all() for that
2031922453bcSStefan Hajnoczi  * after calling this function.
20324c355d53SZhi Yong Wu  *
20334c355d53SZhi Yong Wu  * Note that completion of an asynchronous I/O operation can trigger any
20344c355d53SZhi Yong Wu  * number of other I/O operations on other devices---for example a coroutine
20354c355d53SZhi Yong Wu  * can be arbitrarily complex and a constant flow of I/O can come until the
20364c355d53SZhi Yong Wu  * coroutine is complete.  Because of this, it is not possible to have a
20374c355d53SZhi Yong Wu  * function to drain a single device's I/O queue.
2038922453bcSStefan Hajnoczi  */
2039922453bcSStefan Hajnoczi void bdrv_drain_all(void)
2040922453bcSStefan Hajnoczi {
204188266f5aSStefan Hajnoczi     /* Always run first iteration so any pending completion BHs run */
204288266f5aSStefan Hajnoczi     bool busy = true;
2043922453bcSStefan Hajnoczi     BlockDriverState *bs;
2044922453bcSStefan Hajnoczi 
204569da3b0bSFam Zheng     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
204669da3b0bSFam Zheng         AioContext *aio_context = bdrv_get_aio_context(bs);
204769da3b0bSFam Zheng 
204869da3b0bSFam Zheng         aio_context_acquire(aio_context);
204969da3b0bSFam Zheng         if (bs->job) {
205069da3b0bSFam Zheng             block_job_pause(bs->job);
205169da3b0bSFam Zheng         }
205269da3b0bSFam Zheng         aio_context_release(aio_context);
205369da3b0bSFam Zheng     }
205469da3b0bSFam Zheng 
205588266f5aSStefan Hajnoczi     while (busy) {
20569b536adcSStefan Hajnoczi         busy = false;
2057922453bcSStefan Hajnoczi 
20589b536adcSStefan Hajnoczi         QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
20599b536adcSStefan Hajnoczi             AioContext *aio_context = bdrv_get_aio_context(bs);
20609b536adcSStefan Hajnoczi 
20619b536adcSStefan Hajnoczi             aio_context_acquire(aio_context);
20625b98db0aSStefan Hajnoczi             busy |= bdrv_drain_one(bs);
20639b536adcSStefan Hajnoczi             aio_context_release(aio_context);
20649b536adcSStefan Hajnoczi         }
2065922453bcSStefan Hajnoczi     }
206669da3b0bSFam Zheng 
206769da3b0bSFam Zheng     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
206869da3b0bSFam Zheng         AioContext *aio_context = bdrv_get_aio_context(bs);
206969da3b0bSFam Zheng 
207069da3b0bSFam Zheng         aio_context_acquire(aio_context);
207169da3b0bSFam Zheng         if (bs->job) {
207269da3b0bSFam Zheng             block_job_resume(bs->job);
207369da3b0bSFam Zheng         }
207469da3b0bSFam Zheng         aio_context_release(aio_context);
207569da3b0bSFam Zheng     }
2076922453bcSStefan Hajnoczi }
2077922453bcSStefan Hajnoczi 
2078dc364f4cSBenoît Canet /* make a BlockDriverState anonymous by removing from bdrv_state and
2079dc364f4cSBenoît Canet  * graph_bdrv_state list.
2080d22b2f41SRyan Harper    Also, NULL terminate the device_name to prevent double remove */
2081d22b2f41SRyan Harper void bdrv_make_anon(BlockDriverState *bs)
2082d22b2f41SRyan Harper {
2083bfb197e0SMarkus Armbruster     /*
2084bfb197e0SMarkus Armbruster      * Take care to remove bs from bdrv_states only when it's actually
2085bfb197e0SMarkus Armbruster      * in it.  Note that bs->device_list.tqe_prev is initially null,
2086bfb197e0SMarkus Armbruster      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
2087bfb197e0SMarkus Armbruster      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2088bfb197e0SMarkus Armbruster      * resetting it to null on remove.
2089bfb197e0SMarkus Armbruster      */
2090bfb197e0SMarkus Armbruster     if (bs->device_list.tqe_prev) {
2091dc364f4cSBenoît Canet         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2092bfb197e0SMarkus Armbruster         bs->device_list.tqe_prev = NULL;
2093d22b2f41SRyan Harper     }
2094dc364f4cSBenoît Canet     if (bs->node_name[0] != '\0') {
2095dc364f4cSBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2096dc364f4cSBenoît Canet     }
2097dc364f4cSBenoît Canet     bs->node_name[0] = '\0';
2098d22b2f41SRyan Harper }
2099d22b2f41SRyan Harper 
2100e023b2e2SPaolo Bonzini static void bdrv_rebind(BlockDriverState *bs)
2101e023b2e2SPaolo Bonzini {
2102e023b2e2SPaolo Bonzini     if (bs->drv && bs->drv->bdrv_rebind) {
2103e023b2e2SPaolo Bonzini         bs->drv->bdrv_rebind(bs);
2104e023b2e2SPaolo Bonzini     }
2105e023b2e2SPaolo Bonzini }
2106e023b2e2SPaolo Bonzini 
21074ddc07caSPaolo Bonzini static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
21084ddc07caSPaolo Bonzini                                      BlockDriverState *bs_src)
21094ddc07caSPaolo Bonzini {
21104ddc07caSPaolo Bonzini     /* move some fields that need to stay attached to the device */
21114ddc07caSPaolo Bonzini 
21124ddc07caSPaolo Bonzini     /* dev info */
21131b7fd729SPaolo Bonzini     bs_dest->guest_block_size   = bs_src->guest_block_size;
21144ddc07caSPaolo Bonzini     bs_dest->copy_on_read       = bs_src->copy_on_read;
21154ddc07caSPaolo Bonzini 
21164ddc07caSPaolo Bonzini     bs_dest->enable_write_cache = bs_src->enable_write_cache;
21174ddc07caSPaolo Bonzini 
2118cc0681c4SBenoît Canet     /* i/o throttled req */
2119cc0681c4SBenoît Canet     memcpy(&bs_dest->throttle_state,
2120cc0681c4SBenoît Canet            &bs_src->throttle_state,
2121cc0681c4SBenoît Canet            sizeof(ThrottleState));
2122cc0681c4SBenoît Canet     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
2123cc0681c4SBenoît Canet     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
21244ddc07caSPaolo Bonzini     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
21254ddc07caSPaolo Bonzini 
21264ddc07caSPaolo Bonzini     /* r/w error */
21274ddc07caSPaolo Bonzini     bs_dest->on_read_error      = bs_src->on_read_error;
21284ddc07caSPaolo Bonzini     bs_dest->on_write_error     = bs_src->on_write_error;
21294ddc07caSPaolo Bonzini 
21304ddc07caSPaolo Bonzini     /* i/o status */
21314ddc07caSPaolo Bonzini     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
21324ddc07caSPaolo Bonzini     bs_dest->iostatus           = bs_src->iostatus;
21334ddc07caSPaolo Bonzini 
21344ddc07caSPaolo Bonzini     /* dirty bitmap */
2135e4654d2dSFam Zheng     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
21364ddc07caSPaolo Bonzini 
21379fcb0251SFam Zheng     /* reference count */
21389fcb0251SFam Zheng     bs_dest->refcnt             = bs_src->refcnt;
21399fcb0251SFam Zheng 
21404ddc07caSPaolo Bonzini     /* job */
21414ddc07caSPaolo Bonzini     bs_dest->job                = bs_src->job;
21424ddc07caSPaolo Bonzini 
21434ddc07caSPaolo Bonzini     /* keep the same entry in bdrv_states */
2144dc364f4cSBenoît Canet     bs_dest->device_list = bs_src->device_list;
21457e7d56d9SMarkus Armbruster     bs_dest->blk = bs_src->blk;
21467e7d56d9SMarkus Armbruster 
2147fbe40ff7SFam Zheng     memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2148fbe40ff7SFam Zheng            sizeof(bs_dest->op_blockers));
21494ddc07caSPaolo Bonzini }
21504ddc07caSPaolo Bonzini 
21514ddc07caSPaolo Bonzini /*
21524ddc07caSPaolo Bonzini  * Swap bs contents for two image chains while they are live,
21534ddc07caSPaolo Bonzini  * while keeping required fields on the BlockDriverState that is
21544ddc07caSPaolo Bonzini  * actually attached to a device.
21554ddc07caSPaolo Bonzini  *
21564ddc07caSPaolo Bonzini  * This will modify the BlockDriverState fields, and swap contents
21574ddc07caSPaolo Bonzini  * between bs_new and bs_old. Both bs_new and bs_old are modified.
21584ddc07caSPaolo Bonzini  *
2159bfb197e0SMarkus Armbruster  * bs_new must not be attached to a BlockBackend.
21604ddc07caSPaolo Bonzini  *
21614ddc07caSPaolo Bonzini  * This function does not create any image files.
21624ddc07caSPaolo Bonzini  */
21634ddc07caSPaolo Bonzini void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
21644ddc07caSPaolo Bonzini {
21654ddc07caSPaolo Bonzini     BlockDriverState tmp;
21664ddc07caSPaolo Bonzini 
216790ce8a06SBenoît Canet     /* The code needs to swap the node_name but simply swapping node_list won't
216890ce8a06SBenoît Canet      * work so first remove the nodes from the graph list, do the swap then
216990ce8a06SBenoît Canet      * insert them back if needed.
217090ce8a06SBenoît Canet      */
217190ce8a06SBenoît Canet     if (bs_new->node_name[0] != '\0') {
217290ce8a06SBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
217390ce8a06SBenoît Canet     }
217490ce8a06SBenoît Canet     if (bs_old->node_name[0] != '\0') {
217590ce8a06SBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
217690ce8a06SBenoît Canet     }
217790ce8a06SBenoît Canet 
2178bfb197e0SMarkus Armbruster     /* bs_new must be unattached and shouldn't have anything fancy enabled */
21797e7d56d9SMarkus Armbruster     assert(!bs_new->blk);
2180e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
21814ddc07caSPaolo Bonzini     assert(bs_new->job == NULL);
21824ddc07caSPaolo Bonzini     assert(bs_new->io_limits_enabled == false);
2183cc0681c4SBenoît Canet     assert(!throttle_have_timer(&bs_new->throttle_state));
21844ddc07caSPaolo Bonzini 
21854ddc07caSPaolo Bonzini     tmp = *bs_new;
21864ddc07caSPaolo Bonzini     *bs_new = *bs_old;
21874ddc07caSPaolo Bonzini     *bs_old = tmp;
21884ddc07caSPaolo Bonzini 
21894ddc07caSPaolo Bonzini     /* there are some fields that should not be swapped, move them back */
21904ddc07caSPaolo Bonzini     bdrv_move_feature_fields(&tmp, bs_old);
21914ddc07caSPaolo Bonzini     bdrv_move_feature_fields(bs_old, bs_new);
21924ddc07caSPaolo Bonzini     bdrv_move_feature_fields(bs_new, &tmp);
21934ddc07caSPaolo Bonzini 
2194bfb197e0SMarkus Armbruster     /* bs_new must remain unattached */
21957e7d56d9SMarkus Armbruster     assert(!bs_new->blk);
21964ddc07caSPaolo Bonzini 
21974ddc07caSPaolo Bonzini     /* Check a few fields that should remain attached to the device */
21984ddc07caSPaolo Bonzini     assert(bs_new->job == NULL);
21994ddc07caSPaolo Bonzini     assert(bs_new->io_limits_enabled == false);
2200cc0681c4SBenoît Canet     assert(!throttle_have_timer(&bs_new->throttle_state));
22014ddc07caSPaolo Bonzini 
220290ce8a06SBenoît Canet     /* insert the nodes back into the graph node list if needed */
220390ce8a06SBenoît Canet     if (bs_new->node_name[0] != '\0') {
220490ce8a06SBenoît Canet         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
220590ce8a06SBenoît Canet     }
220690ce8a06SBenoît Canet     if (bs_old->node_name[0] != '\0') {
220790ce8a06SBenoît Canet         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
220890ce8a06SBenoît Canet     }
220990ce8a06SBenoît Canet 
22104ddc07caSPaolo Bonzini     bdrv_rebind(bs_new);
22114ddc07caSPaolo Bonzini     bdrv_rebind(bs_old);
22124ddc07caSPaolo Bonzini }
22134ddc07caSPaolo Bonzini 
22148802d1fdSJeff Cody /*
22158802d1fdSJeff Cody  * Add new bs contents at the top of an image chain while the chain is
22168802d1fdSJeff Cody  * live, while keeping required fields on the top layer.
22178802d1fdSJeff Cody  *
22188802d1fdSJeff Cody  * This will modify the BlockDriverState fields, and swap contents
22198802d1fdSJeff Cody  * between bs_new and bs_top. Both bs_new and bs_top are modified.
22208802d1fdSJeff Cody  *
2221bfb197e0SMarkus Armbruster  * bs_new must not be attached to a BlockBackend.
2222f6801b83SJeff Cody  *
22238802d1fdSJeff Cody  * This function does not create any image files.
22248802d1fdSJeff Cody  */
22258802d1fdSJeff Cody void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
22268802d1fdSJeff Cody {
22274ddc07caSPaolo Bonzini     bdrv_swap(bs_new, bs_top);
22288802d1fdSJeff Cody 
22298802d1fdSJeff Cody     /* The contents of 'tmp' will become bs_top, as we are
22308802d1fdSJeff Cody      * swapping bs_new and bs_top contents. */
22318d24cce1SFam Zheng     bdrv_set_backing_hd(bs_top, bs_new);
22328802d1fdSJeff Cody }
22338802d1fdSJeff Cody 
22344f6fd349SFam Zheng static void bdrv_delete(BlockDriverState *bs)
2235b338082bSbellard {
22363e914655SPaolo Bonzini     assert(!bs->job);
22373718d8abSFam Zheng     assert(bdrv_op_blocker_is_empty(bs));
22384f6fd349SFam Zheng     assert(!bs->refcnt);
2239e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
224018846deeSMarkus Armbruster 
2241e1b5c52eSStefan Hajnoczi     bdrv_close(bs);
2242e1b5c52eSStefan Hajnoczi 
22431b7bdbc1SStefan Hajnoczi     /* remove from list, if necessary */
2244d22b2f41SRyan Harper     bdrv_make_anon(bs);
224534c6f050Saurel32 
22467267c094SAnthony Liguori     g_free(bs);
2247fc01f7e7Sbellard }
2248fc01f7e7Sbellard 
2249e97fc193Saliguori /*
2250e97fc193Saliguori  * Run consistency checks on an image
2251e97fc193Saliguori  *
2252e076f338SKevin Wolf  * Returns 0 if the check could be completed (it doesn't mean that the image is
2253a1c7273bSStefan Weil  * free of errors) or -errno when an internal error occurred. The results of the
2254e076f338SKevin Wolf  * check are stored in res.
2255e97fc193Saliguori  */
22564534ff54SKevin Wolf int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2257e97fc193Saliguori {
2258908bcd54SMax Reitz     if (bs->drv == NULL) {
2259908bcd54SMax Reitz         return -ENOMEDIUM;
2260908bcd54SMax Reitz     }
2261e97fc193Saliguori     if (bs->drv->bdrv_check == NULL) {
2262e97fc193Saliguori         return -ENOTSUP;
2263e97fc193Saliguori     }
2264e97fc193Saliguori 
2265e076f338SKevin Wolf     memset(res, 0, sizeof(*res));
22664534ff54SKevin Wolf     return bs->drv->bdrv_check(bs, res, fix);
2267e97fc193Saliguori }
2268e97fc193Saliguori 
22698a426614SKevin Wolf #define COMMIT_BUF_SECTORS 2048
22708a426614SKevin Wolf 
227133e3963eSbellard /* commit COW file into the raw image */
227233e3963eSbellard int bdrv_commit(BlockDriverState *bs)
227333e3963eSbellard {
227419cb3738Sbellard     BlockDriver *drv = bs->drv;
227572706ea4SJeff Cody     int64_t sector, total_sectors, length, backing_length;
22768a426614SKevin Wolf     int n, ro, open_flags;
22770bce597dSJeff Cody     int ret = 0;
227872706ea4SJeff Cody     uint8_t *buf = NULL;
227933e3963eSbellard 
228019cb3738Sbellard     if (!drv)
228119cb3738Sbellard         return -ENOMEDIUM;
228233e3963eSbellard 
22834dca4b63SNaphtali Sprei     if (!bs->backing_hd) {
22844dca4b63SNaphtali Sprei         return -ENOTSUP;
22854dca4b63SNaphtali Sprei     }
22864dca4b63SNaphtali Sprei 
2287bb00021dSFam Zheng     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2288bb00021dSFam Zheng         bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
22892d3735d3SStefan Hajnoczi         return -EBUSY;
22902d3735d3SStefan Hajnoczi     }
22912d3735d3SStefan Hajnoczi 
22924dca4b63SNaphtali Sprei     ro = bs->backing_hd->read_only;
22934dca4b63SNaphtali Sprei     open_flags =  bs->backing_hd->open_flags;
22944dca4b63SNaphtali Sprei 
22954dca4b63SNaphtali Sprei     if (ro) {
22960bce597dSJeff Cody         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
22970bce597dSJeff Cody             return -EACCES;
22984dca4b63SNaphtali Sprei         }
2299ea2384d3Sbellard     }
2300ea2384d3Sbellard 
230172706ea4SJeff Cody     length = bdrv_getlength(bs);
230272706ea4SJeff Cody     if (length < 0) {
230372706ea4SJeff Cody         ret = length;
230472706ea4SJeff Cody         goto ro_cleanup;
230572706ea4SJeff Cody     }
230672706ea4SJeff Cody 
230772706ea4SJeff Cody     backing_length = bdrv_getlength(bs->backing_hd);
230872706ea4SJeff Cody     if (backing_length < 0) {
230972706ea4SJeff Cody         ret = backing_length;
231072706ea4SJeff Cody         goto ro_cleanup;
231172706ea4SJeff Cody     }
231272706ea4SJeff Cody 
231372706ea4SJeff Cody     /* If our top snapshot is larger than the backing file image,
231472706ea4SJeff Cody      * grow the backing file image if possible.  If not possible,
231572706ea4SJeff Cody      * we must return an error */
231672706ea4SJeff Cody     if (length > backing_length) {
231772706ea4SJeff Cody         ret = bdrv_truncate(bs->backing_hd, length);
231872706ea4SJeff Cody         if (ret < 0) {
231972706ea4SJeff Cody             goto ro_cleanup;
232072706ea4SJeff Cody         }
232172706ea4SJeff Cody     }
232272706ea4SJeff Cody 
232372706ea4SJeff Cody     total_sectors = length >> BDRV_SECTOR_BITS;
2324857d4f46SKevin Wolf 
2325857d4f46SKevin Wolf     /* qemu_try_blockalign() for bs will choose an alignment that works for
2326857d4f46SKevin Wolf      * bs->backing_hd as well, so no need to compare the alignment manually. */
2327857d4f46SKevin Wolf     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2328857d4f46SKevin Wolf     if (buf == NULL) {
2329857d4f46SKevin Wolf         ret = -ENOMEM;
2330857d4f46SKevin Wolf         goto ro_cleanup;
2331857d4f46SKevin Wolf     }
23328a426614SKevin Wolf 
23338a426614SKevin Wolf     for (sector = 0; sector < total_sectors; sector += n) {
2334d663640cSPaolo Bonzini         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2335d663640cSPaolo Bonzini         if (ret < 0) {
2336d663640cSPaolo Bonzini             goto ro_cleanup;
2337d663640cSPaolo Bonzini         }
2338d663640cSPaolo Bonzini         if (ret) {
2339dabfa6ccSKevin Wolf             ret = bdrv_read(bs, sector, buf, n);
2340dabfa6ccSKevin Wolf             if (ret < 0) {
23414dca4b63SNaphtali Sprei                 goto ro_cleanup;
234233e3963eSbellard             }
234333e3963eSbellard 
2344dabfa6ccSKevin Wolf             ret = bdrv_write(bs->backing_hd, sector, buf, n);
2345dabfa6ccSKevin Wolf             if (ret < 0) {
23464dca4b63SNaphtali Sprei                 goto ro_cleanup;
234733e3963eSbellard             }
234833e3963eSbellard         }
234933e3963eSbellard     }
235095389c86Sbellard 
23511d44952fSChristoph Hellwig     if (drv->bdrv_make_empty) {
23521d44952fSChristoph Hellwig         ret = drv->bdrv_make_empty(bs);
2353dabfa6ccSKevin Wolf         if (ret < 0) {
2354dabfa6ccSKevin Wolf             goto ro_cleanup;
2355dabfa6ccSKevin Wolf         }
23561d44952fSChristoph Hellwig         bdrv_flush(bs);
23571d44952fSChristoph Hellwig     }
235895389c86Sbellard 
23593f5075aeSChristoph Hellwig     /*
23603f5075aeSChristoph Hellwig      * Make sure all data we wrote to the backing device is actually
23613f5075aeSChristoph Hellwig      * stable on disk.
23623f5075aeSChristoph Hellwig      */
2363dabfa6ccSKevin Wolf     if (bs->backing_hd) {
23643f5075aeSChristoph Hellwig         bdrv_flush(bs->backing_hd);
2365dabfa6ccSKevin Wolf     }
23664dca4b63SNaphtali Sprei 
2367dabfa6ccSKevin Wolf     ret = 0;
23684dca4b63SNaphtali Sprei ro_cleanup:
2369857d4f46SKevin Wolf     qemu_vfree(buf);
23704dca4b63SNaphtali Sprei 
23714dca4b63SNaphtali Sprei     if (ro) {
23720bce597dSJeff Cody         /* ignoring error return here */
23730bce597dSJeff Cody         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
23744dca4b63SNaphtali Sprei     }
23754dca4b63SNaphtali Sprei 
23761d44952fSChristoph Hellwig     return ret;
237733e3963eSbellard }
237833e3963eSbellard 
2379e8877497SStefan Hajnoczi int bdrv_commit_all(void)
23806ab4b5abSMarkus Armbruster {
23816ab4b5abSMarkus Armbruster     BlockDriverState *bs;
23826ab4b5abSMarkus Armbruster 
2383dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2384ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
2385ed78cda3SStefan Hajnoczi 
2386ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
2387272d2d8eSJeff Cody         if (bs->drv && bs->backing_hd) {
2388e8877497SStefan Hajnoczi             int ret = bdrv_commit(bs);
2389e8877497SStefan Hajnoczi             if (ret < 0) {
2390ed78cda3SStefan Hajnoczi                 aio_context_release(aio_context);
2391e8877497SStefan Hajnoczi                 return ret;
23926ab4b5abSMarkus Armbruster             }
23936ab4b5abSMarkus Armbruster         }
2394ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
2395272d2d8eSJeff Cody     }
2396e8877497SStefan Hajnoczi     return 0;
2397e8877497SStefan Hajnoczi }
23986ab4b5abSMarkus Armbruster 
2399dbffbdcfSStefan Hajnoczi /**
2400dbffbdcfSStefan Hajnoczi  * Remove an active request from the tracked requests list
2401dbffbdcfSStefan Hajnoczi  *
2402dbffbdcfSStefan Hajnoczi  * This function should be called when a tracked request is completing.
2403dbffbdcfSStefan Hajnoczi  */
2404dbffbdcfSStefan Hajnoczi static void tracked_request_end(BdrvTrackedRequest *req)
2405dbffbdcfSStefan Hajnoczi {
24062dbafdc0SKevin Wolf     if (req->serialising) {
24072dbafdc0SKevin Wolf         req->bs->serialising_in_flight--;
24082dbafdc0SKevin Wolf     }
24092dbafdc0SKevin Wolf 
2410dbffbdcfSStefan Hajnoczi     QLIST_REMOVE(req, list);
2411f4658285SStefan Hajnoczi     qemu_co_queue_restart_all(&req->wait_queue);
2412dbffbdcfSStefan Hajnoczi }
2413dbffbdcfSStefan Hajnoczi 
2414dbffbdcfSStefan Hajnoczi /**
2415dbffbdcfSStefan Hajnoczi  * Add an active request to the tracked requests list
2416dbffbdcfSStefan Hajnoczi  */
2417dbffbdcfSStefan Hajnoczi static void tracked_request_begin(BdrvTrackedRequest *req,
2418dbffbdcfSStefan Hajnoczi                                   BlockDriverState *bs,
2419793ed47aSKevin Wolf                                   int64_t offset,
2420793ed47aSKevin Wolf                                   unsigned int bytes, bool is_write)
2421dbffbdcfSStefan Hajnoczi {
2422dbffbdcfSStefan Hajnoczi     *req = (BdrvTrackedRequest){
2423dbffbdcfSStefan Hajnoczi         .bs = bs,
2424793ed47aSKevin Wolf         .offset         = offset,
2425793ed47aSKevin Wolf         .bytes          = bytes,
2426dbffbdcfSStefan Hajnoczi         .is_write       = is_write,
24275f8b6491SStefan Hajnoczi         .co             = qemu_coroutine_self(),
24282dbafdc0SKevin Wolf         .serialising    = false,
24297327145fSKevin Wolf         .overlap_offset = offset,
24307327145fSKevin Wolf         .overlap_bytes  = bytes,
2431dbffbdcfSStefan Hajnoczi     };
2432dbffbdcfSStefan Hajnoczi 
2433f4658285SStefan Hajnoczi     qemu_co_queue_init(&req->wait_queue);
2434f4658285SStefan Hajnoczi 
2435dbffbdcfSStefan Hajnoczi     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2436dbffbdcfSStefan Hajnoczi }
2437dbffbdcfSStefan Hajnoczi 
2438e96126ffSKevin Wolf static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
24392dbafdc0SKevin Wolf {
24407327145fSKevin Wolf     int64_t overlap_offset = req->offset & ~(align - 1);
2441e96126ffSKevin Wolf     unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
24427327145fSKevin Wolf                                - overlap_offset;
24437327145fSKevin Wolf 
24442dbafdc0SKevin Wolf     if (!req->serialising) {
24452dbafdc0SKevin Wolf         req->bs->serialising_in_flight++;
24462dbafdc0SKevin Wolf         req->serialising = true;
24472dbafdc0SKevin Wolf     }
24487327145fSKevin Wolf 
24497327145fSKevin Wolf     req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
24507327145fSKevin Wolf     req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
24512dbafdc0SKevin Wolf }
24522dbafdc0SKevin Wolf 
2453d83947acSStefan Hajnoczi /**
2454d83947acSStefan Hajnoczi  * Round a region to cluster boundaries
2455d83947acSStefan Hajnoczi  */
2456343bded4SPaolo Bonzini void bdrv_round_to_clusters(BlockDriverState *bs,
2457d83947acSStefan Hajnoczi                             int64_t sector_num, int nb_sectors,
2458d83947acSStefan Hajnoczi                             int64_t *cluster_sector_num,
2459d83947acSStefan Hajnoczi                             int *cluster_nb_sectors)
2460d83947acSStefan Hajnoczi {
2461d83947acSStefan Hajnoczi     BlockDriverInfo bdi;
2462d83947acSStefan Hajnoczi 
2463d83947acSStefan Hajnoczi     if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2464d83947acSStefan Hajnoczi         *cluster_sector_num = sector_num;
2465d83947acSStefan Hajnoczi         *cluster_nb_sectors = nb_sectors;
2466d83947acSStefan Hajnoczi     } else {
2467d83947acSStefan Hajnoczi         int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2468d83947acSStefan Hajnoczi         *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2469d83947acSStefan Hajnoczi         *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2470d83947acSStefan Hajnoczi                                             nb_sectors, c);
2471d83947acSStefan Hajnoczi     }
2472d83947acSStefan Hajnoczi }
2473d83947acSStefan Hajnoczi 
24747327145fSKevin Wolf static int bdrv_get_cluster_size(BlockDriverState *bs)
2475793ed47aSKevin Wolf {
2476793ed47aSKevin Wolf     BlockDriverInfo bdi;
24777327145fSKevin Wolf     int ret;
2478793ed47aSKevin Wolf 
24797327145fSKevin Wolf     ret = bdrv_get_info(bs, &bdi);
24807327145fSKevin Wolf     if (ret < 0 || bdi.cluster_size == 0) {
24817327145fSKevin Wolf         return bs->request_alignment;
2482793ed47aSKevin Wolf     } else {
24837327145fSKevin Wolf         return bdi.cluster_size;
2484793ed47aSKevin Wolf     }
2485793ed47aSKevin Wolf }
2486793ed47aSKevin Wolf 
2487f4658285SStefan Hajnoczi static bool tracked_request_overlaps(BdrvTrackedRequest *req,
2488793ed47aSKevin Wolf                                      int64_t offset, unsigned int bytes)
2489793ed47aSKevin Wolf {
2490d83947acSStefan Hajnoczi     /*        aaaa   bbbb */
24917327145fSKevin Wolf     if (offset >= req->overlap_offset + req->overlap_bytes) {
2492d83947acSStefan Hajnoczi         return false;
2493d83947acSStefan Hajnoczi     }
2494d83947acSStefan Hajnoczi     /* bbbb   aaaa        */
24957327145fSKevin Wolf     if (req->overlap_offset >= offset + bytes) {
2496d83947acSStefan Hajnoczi         return false;
2497d83947acSStefan Hajnoczi     }
2498d83947acSStefan Hajnoczi     return true;
2499f4658285SStefan Hajnoczi }
2500f4658285SStefan Hajnoczi 
250128de2dcdSKevin Wolf static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
2502f4658285SStefan Hajnoczi {
25032dbafdc0SKevin Wolf     BlockDriverState *bs = self->bs;
2504f4658285SStefan Hajnoczi     BdrvTrackedRequest *req;
2505f4658285SStefan Hajnoczi     bool retry;
250628de2dcdSKevin Wolf     bool waited = false;
2507f4658285SStefan Hajnoczi 
25082dbafdc0SKevin Wolf     if (!bs->serialising_in_flight) {
250928de2dcdSKevin Wolf         return false;
25102dbafdc0SKevin Wolf     }
25112dbafdc0SKevin Wolf 
2512f4658285SStefan Hajnoczi     do {
2513f4658285SStefan Hajnoczi         retry = false;
2514f4658285SStefan Hajnoczi         QLIST_FOREACH(req, &bs->tracked_requests, list) {
25152dbafdc0SKevin Wolf             if (req == self || (!req->serialising && !self->serialising)) {
251665afd211SKevin Wolf                 continue;
251765afd211SKevin Wolf             }
25187327145fSKevin Wolf             if (tracked_request_overlaps(req, self->overlap_offset,
25197327145fSKevin Wolf                                          self->overlap_bytes))
25207327145fSKevin Wolf             {
25215f8b6491SStefan Hajnoczi                 /* Hitting this means there was a reentrant request, for
25225f8b6491SStefan Hajnoczi                  * example, a block driver issuing nested requests.  This must
25235f8b6491SStefan Hajnoczi                  * never happen since it means deadlock.
25245f8b6491SStefan Hajnoczi                  */
25255f8b6491SStefan Hajnoczi                 assert(qemu_coroutine_self() != req->co);
25265f8b6491SStefan Hajnoczi 
25276460440fSKevin Wolf                 /* If the request is already (indirectly) waiting for us, or
25286460440fSKevin Wolf                  * will wait for us as soon as it wakes up, then just go on
25296460440fSKevin Wolf                  * (instead of producing a deadlock in the former case). */
25306460440fSKevin Wolf                 if (!req->waiting_for) {
25316460440fSKevin Wolf                     self->waiting_for = req;
2532f4658285SStefan Hajnoczi                     qemu_co_queue_wait(&req->wait_queue);
25336460440fSKevin Wolf                     self->waiting_for = NULL;
2534f4658285SStefan Hajnoczi                     retry = true;
253528de2dcdSKevin Wolf                     waited = true;
2536f4658285SStefan Hajnoczi                     break;
2537f4658285SStefan Hajnoczi                 }
2538f4658285SStefan Hajnoczi             }
25396460440fSKevin Wolf         }
2540f4658285SStefan Hajnoczi     } while (retry);
254128de2dcdSKevin Wolf 
254228de2dcdSKevin Wolf     return waited;
2543f4658285SStefan Hajnoczi }
2544f4658285SStefan Hajnoczi 
2545756e6736SKevin Wolf /*
2546756e6736SKevin Wolf  * Return values:
2547756e6736SKevin Wolf  * 0        - success
2548756e6736SKevin Wolf  * -EINVAL  - backing format specified, but no file
2549756e6736SKevin Wolf  * -ENOSPC  - can't update the backing file because no space is left in the
2550756e6736SKevin Wolf  *            image file header
2551756e6736SKevin Wolf  * -ENOTSUP - format driver doesn't support changing the backing file
2552756e6736SKevin Wolf  */
2553756e6736SKevin Wolf int bdrv_change_backing_file(BlockDriverState *bs,
2554756e6736SKevin Wolf     const char *backing_file, const char *backing_fmt)
2555756e6736SKevin Wolf {
2556756e6736SKevin Wolf     BlockDriver *drv = bs->drv;
2557469ef350SPaolo Bonzini     int ret;
2558756e6736SKevin Wolf 
25595f377794SPaolo Bonzini     /* Backing file format doesn't make sense without a backing file */
25605f377794SPaolo Bonzini     if (backing_fmt && !backing_file) {
25615f377794SPaolo Bonzini         return -EINVAL;
25625f377794SPaolo Bonzini     }
25635f377794SPaolo Bonzini 
2564756e6736SKevin Wolf     if (drv->bdrv_change_backing_file != NULL) {
2565469ef350SPaolo Bonzini         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2566756e6736SKevin Wolf     } else {
2567469ef350SPaolo Bonzini         ret = -ENOTSUP;
2568756e6736SKevin Wolf     }
2569469ef350SPaolo Bonzini 
2570469ef350SPaolo Bonzini     if (ret == 0) {
2571469ef350SPaolo Bonzini         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2572469ef350SPaolo Bonzini         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2573469ef350SPaolo Bonzini     }
2574469ef350SPaolo Bonzini     return ret;
2575756e6736SKevin Wolf }
2576756e6736SKevin Wolf 
25776ebdcee2SJeff Cody /*
25786ebdcee2SJeff Cody  * Finds the image layer in the chain that has 'bs' as its backing file.
25796ebdcee2SJeff Cody  *
25806ebdcee2SJeff Cody  * active is the current topmost image.
25816ebdcee2SJeff Cody  *
25826ebdcee2SJeff Cody  * Returns NULL if bs is not found in active's image chain,
25836ebdcee2SJeff Cody  * or if active == bs.
25844caf0fcdSJeff Cody  *
25854caf0fcdSJeff Cody  * Returns the bottommost base image if bs == NULL.
25866ebdcee2SJeff Cody  */
25876ebdcee2SJeff Cody BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
25886ebdcee2SJeff Cody                                     BlockDriverState *bs)
25896ebdcee2SJeff Cody {
25904caf0fcdSJeff Cody     while (active && bs != active->backing_hd) {
25914caf0fcdSJeff Cody         active = active->backing_hd;
25926ebdcee2SJeff Cody     }
25936ebdcee2SJeff Cody 
25944caf0fcdSJeff Cody     return active;
25956ebdcee2SJeff Cody }
25966ebdcee2SJeff Cody 
25974caf0fcdSJeff Cody /* Given a BDS, searches for the base layer. */
25984caf0fcdSJeff Cody BlockDriverState *bdrv_find_base(BlockDriverState *bs)
25994caf0fcdSJeff Cody {
26004caf0fcdSJeff Cody     return bdrv_find_overlay(bs, NULL);
26016ebdcee2SJeff Cody }
26026ebdcee2SJeff Cody 
26036ebdcee2SJeff Cody typedef struct BlkIntermediateStates {
26046ebdcee2SJeff Cody     BlockDriverState *bs;
26056ebdcee2SJeff Cody     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
26066ebdcee2SJeff Cody } BlkIntermediateStates;
26076ebdcee2SJeff Cody 
26086ebdcee2SJeff Cody 
26096ebdcee2SJeff Cody /*
26106ebdcee2SJeff Cody  * Drops images above 'base' up to and including 'top', and sets the image
26116ebdcee2SJeff Cody  * above 'top' to have base as its backing file.
26126ebdcee2SJeff Cody  *
26136ebdcee2SJeff Cody  * Requires that the overlay to 'top' is opened r/w, so that the backing file
26146ebdcee2SJeff Cody  * information in 'bs' can be properly updated.
26156ebdcee2SJeff Cody  *
26166ebdcee2SJeff Cody  * E.g., this will convert the following chain:
26176ebdcee2SJeff Cody  * bottom <- base <- intermediate <- top <- active
26186ebdcee2SJeff Cody  *
26196ebdcee2SJeff Cody  * to
26206ebdcee2SJeff Cody  *
26216ebdcee2SJeff Cody  * bottom <- base <- active
26226ebdcee2SJeff Cody  *
26236ebdcee2SJeff Cody  * It is allowed for bottom==base, in which case it converts:
26246ebdcee2SJeff Cody  *
26256ebdcee2SJeff Cody  * base <- intermediate <- top <- active
26266ebdcee2SJeff Cody  *
26276ebdcee2SJeff Cody  * to
26286ebdcee2SJeff Cody  *
26296ebdcee2SJeff Cody  * base <- active
26306ebdcee2SJeff Cody  *
263154e26900SJeff Cody  * If backing_file_str is non-NULL, it will be used when modifying top's
263254e26900SJeff Cody  * overlay image metadata.
263354e26900SJeff Cody  *
26346ebdcee2SJeff Cody  * Error conditions:
26356ebdcee2SJeff Cody  *  if active == top, that is considered an error
26366ebdcee2SJeff Cody  *
26376ebdcee2SJeff Cody  */
26386ebdcee2SJeff Cody int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
263954e26900SJeff Cody                            BlockDriverState *base, const char *backing_file_str)
26406ebdcee2SJeff Cody {
26416ebdcee2SJeff Cody     BlockDriverState *intermediate;
26426ebdcee2SJeff Cody     BlockDriverState *base_bs = NULL;
26436ebdcee2SJeff Cody     BlockDriverState *new_top_bs = NULL;
26446ebdcee2SJeff Cody     BlkIntermediateStates *intermediate_state, *next;
26456ebdcee2SJeff Cody     int ret = -EIO;
26466ebdcee2SJeff Cody 
26476ebdcee2SJeff Cody     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
26486ebdcee2SJeff Cody     QSIMPLEQ_INIT(&states_to_delete);
26496ebdcee2SJeff Cody 
26506ebdcee2SJeff Cody     if (!top->drv || !base->drv) {
26516ebdcee2SJeff Cody         goto exit;
26526ebdcee2SJeff Cody     }
26536ebdcee2SJeff Cody 
26546ebdcee2SJeff Cody     new_top_bs = bdrv_find_overlay(active, top);
26556ebdcee2SJeff Cody 
26566ebdcee2SJeff Cody     if (new_top_bs == NULL) {
26576ebdcee2SJeff Cody         /* we could not find the image above 'top', this is an error */
26586ebdcee2SJeff Cody         goto exit;
26596ebdcee2SJeff Cody     }
26606ebdcee2SJeff Cody 
26616ebdcee2SJeff Cody     /* special case of new_top_bs->backing_hd already pointing to base - nothing
26626ebdcee2SJeff Cody      * to do, no intermediate images */
26636ebdcee2SJeff Cody     if (new_top_bs->backing_hd == base) {
26646ebdcee2SJeff Cody         ret = 0;
26656ebdcee2SJeff Cody         goto exit;
26666ebdcee2SJeff Cody     }
26676ebdcee2SJeff Cody 
26686ebdcee2SJeff Cody     intermediate = top;
26696ebdcee2SJeff Cody 
26706ebdcee2SJeff Cody     /* now we will go down through the list, and add each BDS we find
26716ebdcee2SJeff Cody      * into our deletion queue, until we hit the 'base'
26726ebdcee2SJeff Cody      */
26736ebdcee2SJeff Cody     while (intermediate) {
26745839e53bSMarkus Armbruster         intermediate_state = g_new0(BlkIntermediateStates, 1);
26756ebdcee2SJeff Cody         intermediate_state->bs = intermediate;
26766ebdcee2SJeff Cody         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
26776ebdcee2SJeff Cody 
26786ebdcee2SJeff Cody         if (intermediate->backing_hd == base) {
26796ebdcee2SJeff Cody             base_bs = intermediate->backing_hd;
26806ebdcee2SJeff Cody             break;
26816ebdcee2SJeff Cody         }
26826ebdcee2SJeff Cody         intermediate = intermediate->backing_hd;
26836ebdcee2SJeff Cody     }
26846ebdcee2SJeff Cody     if (base_bs == NULL) {
26856ebdcee2SJeff Cody         /* something went wrong, we did not end at the base. safely
26866ebdcee2SJeff Cody          * unravel everything, and exit with error */
26876ebdcee2SJeff Cody         goto exit;
26886ebdcee2SJeff Cody     }
26896ebdcee2SJeff Cody 
26906ebdcee2SJeff Cody     /* success - we can delete the intermediate states, and link top->base */
269154e26900SJeff Cody     backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
269254e26900SJeff Cody     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
26936ebdcee2SJeff Cody                                    base_bs->drv ? base_bs->drv->format_name : "");
26946ebdcee2SJeff Cody     if (ret) {
26956ebdcee2SJeff Cody         goto exit;
26966ebdcee2SJeff Cody     }
2697920beae1SFam Zheng     bdrv_set_backing_hd(new_top_bs, base_bs);
26986ebdcee2SJeff Cody 
26996ebdcee2SJeff Cody     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
27006ebdcee2SJeff Cody         /* so that bdrv_close() does not recursively close the chain */
2701920beae1SFam Zheng         bdrv_set_backing_hd(intermediate_state->bs, NULL);
27024f6fd349SFam Zheng         bdrv_unref(intermediate_state->bs);
27036ebdcee2SJeff Cody     }
27046ebdcee2SJeff Cody     ret = 0;
27056ebdcee2SJeff Cody 
27066ebdcee2SJeff Cody exit:
27076ebdcee2SJeff Cody     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
27086ebdcee2SJeff Cody         g_free(intermediate_state);
27096ebdcee2SJeff Cody     }
27106ebdcee2SJeff Cody     return ret;
27116ebdcee2SJeff Cody }
27126ebdcee2SJeff Cody 
27136ebdcee2SJeff Cody 
271471d0770cSaliguori static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
271571d0770cSaliguori                                    size_t size)
271671d0770cSaliguori {
271775af1f34SPeter Lieven     if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
27181dd3a447SKevin Wolf         return -EIO;
27191dd3a447SKevin Wolf     }
27201dd3a447SKevin Wolf 
2721c0191e76SMax Reitz     if (!bdrv_is_inserted(bs)) {
272271d0770cSaliguori         return -ENOMEDIUM;
2723c0191e76SMax Reitz     }
272471d0770cSaliguori 
2725c0191e76SMax Reitz     if (offset < 0) {
2726fbb7b4e0SKevin Wolf         return -EIO;
2727c0191e76SMax Reitz     }
272871d0770cSaliguori 
272971d0770cSaliguori     return 0;
273071d0770cSaliguori }
273171d0770cSaliguori 
273271d0770cSaliguori static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
273371d0770cSaliguori                               int nb_sectors)
273471d0770cSaliguori {
273575af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
27368f4754edSKevin Wolf         return -EIO;
27378f4754edSKevin Wolf     }
27388f4754edSKevin Wolf 
2739eb5a3165SJes Sorensen     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2740eb5a3165SJes Sorensen                                    nb_sectors * BDRV_SECTOR_SIZE);
274171d0770cSaliguori }
274271d0770cSaliguori 
27431c9805a3SStefan Hajnoczi typedef struct RwCo {
27441c9805a3SStefan Hajnoczi     BlockDriverState *bs;
2745775aa8b6SKevin Wolf     int64_t offset;
27461c9805a3SStefan Hajnoczi     QEMUIOVector *qiov;
27471c9805a3SStefan Hajnoczi     bool is_write;
27481c9805a3SStefan Hajnoczi     int ret;
27494105eaaaSPeter Lieven     BdrvRequestFlags flags;
27501c9805a3SStefan Hajnoczi } RwCo;
27511c9805a3SStefan Hajnoczi 
27521c9805a3SStefan Hajnoczi static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2753fc01f7e7Sbellard {
27541c9805a3SStefan Hajnoczi     RwCo *rwco = opaque;
2755fc01f7e7Sbellard 
27561c9805a3SStefan Hajnoczi     if (!rwco->is_write) {
2757775aa8b6SKevin Wolf         rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2758775aa8b6SKevin Wolf                                       rwco->qiov->size, rwco->qiov,
27594105eaaaSPeter Lieven                                       rwco->flags);
27601c9805a3SStefan Hajnoczi     } else {
2761775aa8b6SKevin Wolf         rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2762775aa8b6SKevin Wolf                                        rwco->qiov->size, rwco->qiov,
27634105eaaaSPeter Lieven                                        rwco->flags);
27641c9805a3SStefan Hajnoczi     }
27651c9805a3SStefan Hajnoczi }
2766e7a8a783SKevin Wolf 
27671c9805a3SStefan Hajnoczi /*
27688d3b1a2dSKevin Wolf  * Process a vectored synchronous request using coroutines
27691c9805a3SStefan Hajnoczi  */
2770775aa8b6SKevin Wolf static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
27714105eaaaSPeter Lieven                         QEMUIOVector *qiov, bool is_write,
27724105eaaaSPeter Lieven                         BdrvRequestFlags flags)
27731c9805a3SStefan Hajnoczi {
27741c9805a3SStefan Hajnoczi     Coroutine *co;
27751c9805a3SStefan Hajnoczi     RwCo rwco = {
27761c9805a3SStefan Hajnoczi         .bs = bs,
2777775aa8b6SKevin Wolf         .offset = offset,
27788d3b1a2dSKevin Wolf         .qiov = qiov,
27791c9805a3SStefan Hajnoczi         .is_write = is_write,
27801c9805a3SStefan Hajnoczi         .ret = NOT_DONE,
27814105eaaaSPeter Lieven         .flags = flags,
27821c9805a3SStefan Hajnoczi     };
27831c9805a3SStefan Hajnoczi 
2784498e386cSZhi Yong Wu     /**
2785498e386cSZhi Yong Wu      * In sync call context, when the vcpu is blocked, this throttling timer
2786498e386cSZhi Yong Wu      * will not fire; so the I/O throttling function has to be disabled here
2787498e386cSZhi Yong Wu      * if it has been enabled.
2788498e386cSZhi Yong Wu      */
2789498e386cSZhi Yong Wu     if (bs->io_limits_enabled) {
2790498e386cSZhi Yong Wu         fprintf(stderr, "Disabling I/O throttling on '%s' due "
2791498e386cSZhi Yong Wu                         "to synchronous I/O.\n", bdrv_get_device_name(bs));
2792498e386cSZhi Yong Wu         bdrv_io_limits_disable(bs);
2793498e386cSZhi Yong Wu     }
2794498e386cSZhi Yong Wu 
27951c9805a3SStefan Hajnoczi     if (qemu_in_coroutine()) {
27961c9805a3SStefan Hajnoczi         /* Fast-path if already in coroutine context */
27971c9805a3SStefan Hajnoczi         bdrv_rw_co_entry(&rwco);
27981c9805a3SStefan Hajnoczi     } else {
27992572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
28002572b37aSStefan Hajnoczi 
28011c9805a3SStefan Hajnoczi         co = qemu_coroutine_create(bdrv_rw_co_entry);
28021c9805a3SStefan Hajnoczi         qemu_coroutine_enter(co, &rwco);
28031c9805a3SStefan Hajnoczi         while (rwco.ret == NOT_DONE) {
28042572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
28051c9805a3SStefan Hajnoczi         }
28061c9805a3SStefan Hajnoczi     }
28071c9805a3SStefan Hajnoczi     return rwco.ret;
2808e7a8a783SKevin Wolf }
2809e7a8a783SKevin Wolf 
28108d3b1a2dSKevin Wolf /*
28118d3b1a2dSKevin Wolf  * Process a synchronous request using coroutines
28128d3b1a2dSKevin Wolf  */
28138d3b1a2dSKevin Wolf static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
28144105eaaaSPeter Lieven                       int nb_sectors, bool is_write, BdrvRequestFlags flags)
28158d3b1a2dSKevin Wolf {
28168d3b1a2dSKevin Wolf     QEMUIOVector qiov;
28178d3b1a2dSKevin Wolf     struct iovec iov = {
28188d3b1a2dSKevin Wolf         .iov_base = (void *)buf,
28198d3b1a2dSKevin Wolf         .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
28208d3b1a2dSKevin Wolf     };
28218d3b1a2dSKevin Wolf 
282275af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
2823da15ee51SKevin Wolf         return -EINVAL;
2824da15ee51SKevin Wolf     }
2825da15ee51SKevin Wolf 
28268d3b1a2dSKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
2827775aa8b6SKevin Wolf     return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2828775aa8b6SKevin Wolf                         &qiov, is_write, flags);
28298d3b1a2dSKevin Wolf }
28308d3b1a2dSKevin Wolf 
28311c9805a3SStefan Hajnoczi /* return < 0 if error. See bdrv_write() for the return codes */
28321c9805a3SStefan Hajnoczi int bdrv_read(BlockDriverState *bs, int64_t sector_num,
28331c9805a3SStefan Hajnoczi               uint8_t *buf, int nb_sectors)
28341c9805a3SStefan Hajnoczi {
28354105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
283683f64091Sbellard }
2837fc01f7e7Sbellard 
283807d27a44SMarkus Armbruster /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
283907d27a44SMarkus Armbruster int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
284007d27a44SMarkus Armbruster                           uint8_t *buf, int nb_sectors)
284107d27a44SMarkus Armbruster {
284207d27a44SMarkus Armbruster     bool enabled;
284307d27a44SMarkus Armbruster     int ret;
284407d27a44SMarkus Armbruster 
284507d27a44SMarkus Armbruster     enabled = bs->io_limits_enabled;
284607d27a44SMarkus Armbruster     bs->io_limits_enabled = false;
28474e7395e8SPeter Lieven     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
284807d27a44SMarkus Armbruster     bs->io_limits_enabled = enabled;
284907d27a44SMarkus Armbruster     return ret;
285007d27a44SMarkus Armbruster }
285107d27a44SMarkus Armbruster 
285219cb3738Sbellard /* Return < 0 if error. Important errors are:
285319cb3738Sbellard   -EIO         generic I/O error (may happen for all errors)
285419cb3738Sbellard   -ENOMEDIUM   No media inserted.
285519cb3738Sbellard   -EINVAL      Invalid sector number or nb_sectors
285619cb3738Sbellard   -EACCES      Trying to write a read-only device
285719cb3738Sbellard */
2858fc01f7e7Sbellard int bdrv_write(BlockDriverState *bs, int64_t sector_num,
2859fc01f7e7Sbellard                const uint8_t *buf, int nb_sectors)
2860fc01f7e7Sbellard {
28614105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
286283f64091Sbellard }
286383f64091Sbellard 
2864aa7bfbffSPeter Lieven int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2865aa7bfbffSPeter Lieven                       int nb_sectors, BdrvRequestFlags flags)
28664105eaaaSPeter Lieven {
28674105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
2868aa7bfbffSPeter Lieven                       BDRV_REQ_ZERO_WRITE | flags);
28698d3b1a2dSKevin Wolf }
28708d3b1a2dSKevin Wolf 
2871d75cbb5eSPeter Lieven /*
2872d75cbb5eSPeter Lieven  * Completely zero out a block device with the help of bdrv_write_zeroes.
2873d75cbb5eSPeter Lieven  * The operation is sped up by checking the block status and only writing
2874d75cbb5eSPeter Lieven  * zeroes to the device if they currently do not return zeroes. Optional
2875d75cbb5eSPeter Lieven  * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2876d75cbb5eSPeter Lieven  *
2877d75cbb5eSPeter Lieven  * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2878d75cbb5eSPeter Lieven  */
2879d75cbb5eSPeter Lieven int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2880d75cbb5eSPeter Lieven {
2881d32f7c10SMarkus Armbruster     int64_t target_sectors, ret, nb_sectors, sector_num = 0;
2882d75cbb5eSPeter Lieven     int n;
2883d75cbb5eSPeter Lieven 
2884d32f7c10SMarkus Armbruster     target_sectors = bdrv_nb_sectors(bs);
2885d32f7c10SMarkus Armbruster     if (target_sectors < 0) {
2886d32f7c10SMarkus Armbruster         return target_sectors;
28879ce10c0bSKevin Wolf     }
28889ce10c0bSKevin Wolf 
2889d75cbb5eSPeter Lieven     for (;;) {
289075af1f34SPeter Lieven         nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
2891d75cbb5eSPeter Lieven         if (nb_sectors <= 0) {
2892d75cbb5eSPeter Lieven             return 0;
2893d75cbb5eSPeter Lieven         }
2894d75cbb5eSPeter Lieven         ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
28953d94ce60SPeter Lieven         if (ret < 0) {
28963d94ce60SPeter Lieven             error_report("error getting block status at sector %" PRId64 ": %s",
28973d94ce60SPeter Lieven                          sector_num, strerror(-ret));
28983d94ce60SPeter Lieven             return ret;
28993d94ce60SPeter Lieven         }
2900d75cbb5eSPeter Lieven         if (ret & BDRV_BLOCK_ZERO) {
2901d75cbb5eSPeter Lieven             sector_num += n;
2902d75cbb5eSPeter Lieven             continue;
2903d75cbb5eSPeter Lieven         }
2904d75cbb5eSPeter Lieven         ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2905d75cbb5eSPeter Lieven         if (ret < 0) {
2906d75cbb5eSPeter Lieven             error_report("error writing zeroes at sector %" PRId64 ": %s",
2907d75cbb5eSPeter Lieven                          sector_num, strerror(-ret));
2908d75cbb5eSPeter Lieven             return ret;
2909d75cbb5eSPeter Lieven         }
2910d75cbb5eSPeter Lieven         sector_num += n;
2911d75cbb5eSPeter Lieven     }
2912d75cbb5eSPeter Lieven }
2913d75cbb5eSPeter Lieven 
2914a3ef6571SKevin Wolf int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
291583f64091Sbellard {
2916a3ef6571SKevin Wolf     QEMUIOVector qiov;
2917a3ef6571SKevin Wolf     struct iovec iov = {
2918a3ef6571SKevin Wolf         .iov_base = (void *)buf,
2919a3ef6571SKevin Wolf         .iov_len = bytes,
2920a3ef6571SKevin Wolf     };
29219a8c4cceSKevin Wolf     int ret;
292283f64091Sbellard 
2923a3ef6571SKevin Wolf     if (bytes < 0) {
2924a3ef6571SKevin Wolf         return -EINVAL;
292583f64091Sbellard     }
292683f64091Sbellard 
2927a3ef6571SKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
2928a3ef6571SKevin Wolf     ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2929a3ef6571SKevin Wolf     if (ret < 0) {
29309a8c4cceSKevin Wolf         return ret;
293183f64091Sbellard     }
293283f64091Sbellard 
2933a3ef6571SKevin Wolf     return bytes;
293483f64091Sbellard }
293583f64091Sbellard 
29368d3b1a2dSKevin Wolf int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
293783f64091Sbellard {
29389a8c4cceSKevin Wolf     int ret;
293983f64091Sbellard 
29408407d5d7SKevin Wolf     ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
29418d3b1a2dSKevin Wolf     if (ret < 0) {
29429a8c4cceSKevin Wolf         return ret;
29438d3b1a2dSKevin Wolf     }
29448d3b1a2dSKevin Wolf 
29458d3b1a2dSKevin Wolf     return qiov->size;
29468d3b1a2dSKevin Wolf }
29478d3b1a2dSKevin Wolf 
29488d3b1a2dSKevin Wolf int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
29498407d5d7SKevin Wolf                 const void *buf, int bytes)
29508d3b1a2dSKevin Wolf {
29518d3b1a2dSKevin Wolf     QEMUIOVector qiov;
29528d3b1a2dSKevin Wolf     struct iovec iov = {
29538d3b1a2dSKevin Wolf         .iov_base   = (void *) buf,
29548407d5d7SKevin Wolf         .iov_len    = bytes,
29558d3b1a2dSKevin Wolf     };
29568d3b1a2dSKevin Wolf 
29578407d5d7SKevin Wolf     if (bytes < 0) {
29588407d5d7SKevin Wolf         return -EINVAL;
29598407d5d7SKevin Wolf     }
29608407d5d7SKevin Wolf 
29618d3b1a2dSKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
29628d3b1a2dSKevin Wolf     return bdrv_pwritev(bs, offset, &qiov);
296383f64091Sbellard }
296483f64091Sbellard 
2965f08145feSKevin Wolf /*
2966f08145feSKevin Wolf  * Writes to the file and ensures that no writes are reordered across this
2967f08145feSKevin Wolf  * request (acts as a barrier)
2968f08145feSKevin Wolf  *
2969f08145feSKevin Wolf  * Returns 0 on success, -errno in error cases.
2970f08145feSKevin Wolf  */
2971f08145feSKevin Wolf int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2972f08145feSKevin Wolf     const void *buf, int count)
2973f08145feSKevin Wolf {
2974f08145feSKevin Wolf     int ret;
2975f08145feSKevin Wolf 
2976f08145feSKevin Wolf     ret = bdrv_pwrite(bs, offset, buf, count);
2977f08145feSKevin Wolf     if (ret < 0) {
2978f08145feSKevin Wolf         return ret;
2979f08145feSKevin Wolf     }
2980f08145feSKevin Wolf 
2981f05fa4adSPaolo Bonzini     /* No flush needed for cache modes that already do it */
2982f05fa4adSPaolo Bonzini     if (bs->enable_write_cache) {
2983f08145feSKevin Wolf         bdrv_flush(bs);
2984f08145feSKevin Wolf     }
2985f08145feSKevin Wolf 
2986f08145feSKevin Wolf     return 0;
2987f08145feSKevin Wolf }
2988f08145feSKevin Wolf 
2989470c0504SStefan Hajnoczi static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
2990ab185921SStefan Hajnoczi         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2991ab185921SStefan Hajnoczi {
2992ab185921SStefan Hajnoczi     /* Perform I/O through a temporary buffer so that users who scribble over
2993ab185921SStefan Hajnoczi      * their read buffer while the operation is in progress do not end up
2994ab185921SStefan Hajnoczi      * modifying the image file.  This is critical for zero-copy guest I/O
2995ab185921SStefan Hajnoczi      * where anything might happen inside guest memory.
2996ab185921SStefan Hajnoczi      */
2997ab185921SStefan Hajnoczi     void *bounce_buffer;
2998ab185921SStefan Hajnoczi 
299979c053bdSStefan Hajnoczi     BlockDriver *drv = bs->drv;
3000ab185921SStefan Hajnoczi     struct iovec iov;
3001ab185921SStefan Hajnoczi     QEMUIOVector bounce_qiov;
3002ab185921SStefan Hajnoczi     int64_t cluster_sector_num;
3003ab185921SStefan Hajnoczi     int cluster_nb_sectors;
3004ab185921SStefan Hajnoczi     size_t skip_bytes;
3005ab185921SStefan Hajnoczi     int ret;
3006ab185921SStefan Hajnoczi 
3007ab185921SStefan Hajnoczi     /* Cover entire cluster so no additional backing file I/O is required when
3008ab185921SStefan Hajnoczi      * allocating cluster in the image file.
3009ab185921SStefan Hajnoczi      */
3010343bded4SPaolo Bonzini     bdrv_round_to_clusters(bs, sector_num, nb_sectors,
3011ab185921SStefan Hajnoczi                            &cluster_sector_num, &cluster_nb_sectors);
3012ab185921SStefan Hajnoczi 
3013470c0504SStefan Hajnoczi     trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3014ab185921SStefan Hajnoczi                                    cluster_sector_num, cluster_nb_sectors);
3015ab185921SStefan Hajnoczi 
3016ab185921SStefan Hajnoczi     iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
3017857d4f46SKevin Wolf     iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3018857d4f46SKevin Wolf     if (bounce_buffer == NULL) {
3019857d4f46SKevin Wolf         ret = -ENOMEM;
3020857d4f46SKevin Wolf         goto err;
3021857d4f46SKevin Wolf     }
3022857d4f46SKevin Wolf 
3023ab185921SStefan Hajnoczi     qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3024ab185921SStefan Hajnoczi 
302579c053bdSStefan Hajnoczi     ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3026ab185921SStefan Hajnoczi                              &bounce_qiov);
3027ab185921SStefan Hajnoczi     if (ret < 0) {
3028ab185921SStefan Hajnoczi         goto err;
3029ab185921SStefan Hajnoczi     }
3030ab185921SStefan Hajnoczi 
303179c053bdSStefan Hajnoczi     if (drv->bdrv_co_write_zeroes &&
303279c053bdSStefan Hajnoczi         buffer_is_zero(bounce_buffer, iov.iov_len)) {
3033621f0589SKevin Wolf         ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
3034aa7bfbffSPeter Lieven                                       cluster_nb_sectors, 0);
303579c053bdSStefan Hajnoczi     } else {
3036f05fa4adSPaolo Bonzini         /* This does not change the data on the disk, it is not necessary
3037f05fa4adSPaolo Bonzini          * to flush even in cache=writethrough mode.
3038f05fa4adSPaolo Bonzini          */
303979c053bdSStefan Hajnoczi         ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
3040ab185921SStefan Hajnoczi                                   &bounce_qiov);
304179c053bdSStefan Hajnoczi     }
304279c053bdSStefan Hajnoczi 
3043ab185921SStefan Hajnoczi     if (ret < 0) {
3044ab185921SStefan Hajnoczi         /* It might be okay to ignore write errors for guest requests.  If this
3045ab185921SStefan Hajnoczi          * is a deliberate copy-on-read then we don't want to ignore the error.
3046ab185921SStefan Hajnoczi          * Simply report it in all cases.
3047ab185921SStefan Hajnoczi          */
3048ab185921SStefan Hajnoczi         goto err;
3049ab185921SStefan Hajnoczi     }
3050ab185921SStefan Hajnoczi 
3051ab185921SStefan Hajnoczi     skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
305203396148SMichael Tokarev     qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3053ab185921SStefan Hajnoczi                         nb_sectors * BDRV_SECTOR_SIZE);
3054ab185921SStefan Hajnoczi 
3055ab185921SStefan Hajnoczi err:
3056ab185921SStefan Hajnoczi     qemu_vfree(bounce_buffer);
3057ab185921SStefan Hajnoczi     return ret;
3058ab185921SStefan Hajnoczi }
3059ab185921SStefan Hajnoczi 
3060c5fbe571SStefan Hajnoczi /*
3061d0c7f642SKevin Wolf  * Forwards an already correctly aligned request to the BlockDriver. This
3062d0c7f642SKevin Wolf  * handles copy on read and zeroing after EOF; any other features must be
3063d0c7f642SKevin Wolf  * implemented by the caller.
3064c5fbe571SStefan Hajnoczi  */
3065d0c7f642SKevin Wolf static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
306665afd211SKevin Wolf     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3067ec746e10SKevin Wolf     int64_t align, QEMUIOVector *qiov, int flags)
3068da1fa91dSKevin Wolf {
3069da1fa91dSKevin Wolf     BlockDriver *drv = bs->drv;
3070dbffbdcfSStefan Hajnoczi     int ret;
3071da1fa91dSKevin Wolf 
3072d0c7f642SKevin Wolf     int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3073d0c7f642SKevin Wolf     unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3074da1fa91dSKevin Wolf 
3075d0c7f642SKevin Wolf     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3076d0c7f642SKevin Wolf     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
30778eb029c2SKevin Wolf     assert(!qiov || bytes == qiov->size);
3078d0c7f642SKevin Wolf 
3079d0c7f642SKevin Wolf     /* Handle Copy on Read and associated serialisation */
3080470c0504SStefan Hajnoczi     if (flags & BDRV_REQ_COPY_ON_READ) {
30817327145fSKevin Wolf         /* If we touch the same cluster it counts as an overlap.  This
30827327145fSKevin Wolf          * guarantees that allocating writes will be serialized and not race
30837327145fSKevin Wolf          * with each other for the same cluster.  For example, in copy-on-read
30847327145fSKevin Wolf          * it ensures that the CoR read and write operations are atomic and
30857327145fSKevin Wolf          * guest writes cannot interleave between them. */
30867327145fSKevin Wolf         mark_request_serialising(req, bdrv_get_cluster_size(bs));
3087470c0504SStefan Hajnoczi     }
3088470c0504SStefan Hajnoczi 
30892dbafdc0SKevin Wolf     wait_serialising_requests(req);
3090f4658285SStefan Hajnoczi 
3091470c0504SStefan Hajnoczi     if (flags & BDRV_REQ_COPY_ON_READ) {
3092ab185921SStefan Hajnoczi         int pnum;
3093ab185921SStefan Hajnoczi 
3094bdad13b9SPaolo Bonzini         ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
3095ab185921SStefan Hajnoczi         if (ret < 0) {
3096ab185921SStefan Hajnoczi             goto out;
3097ab185921SStefan Hajnoczi         }
3098ab185921SStefan Hajnoczi 
3099ab185921SStefan Hajnoczi         if (!ret || pnum != nb_sectors) {
3100470c0504SStefan Hajnoczi             ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
3101ab185921SStefan Hajnoczi             goto out;
3102ab185921SStefan Hajnoczi         }
3103ab185921SStefan Hajnoczi     }
3104ab185921SStefan Hajnoczi 
3105d0c7f642SKevin Wolf     /* Forward the request to the BlockDriver */
3106c0191e76SMax Reitz     if (!bs->zero_beyond_eof) {
3107dbffbdcfSStefan Hajnoczi         ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3108893a8f62SMORITA Kazutaka     } else {
3109c0191e76SMax Reitz         /* Read zeros after EOF */
31104049082cSMarkus Armbruster         int64_t total_sectors, max_nb_sectors;
3111893a8f62SMORITA Kazutaka 
31124049082cSMarkus Armbruster         total_sectors = bdrv_nb_sectors(bs);
31134049082cSMarkus Armbruster         if (total_sectors < 0) {
31144049082cSMarkus Armbruster             ret = total_sectors;
3115893a8f62SMORITA Kazutaka             goto out;
3116893a8f62SMORITA Kazutaka         }
3117893a8f62SMORITA Kazutaka 
31185f5bcd80SKevin Wolf         max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
31195f5bcd80SKevin Wolf                                   align >> BDRV_SECTOR_BITS);
3120e012b78cSPaolo Bonzini         if (nb_sectors < max_nb_sectors) {
3121e012b78cSPaolo Bonzini             ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3122e012b78cSPaolo Bonzini         } else if (max_nb_sectors > 0) {
312333f461e0SKevin Wolf             QEMUIOVector local_qiov;
312433f461e0SKevin Wolf 
312533f461e0SKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov);
312633f461e0SKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0,
3127e012b78cSPaolo Bonzini                               max_nb_sectors * BDRV_SECTOR_SIZE);
312833f461e0SKevin Wolf 
3129e012b78cSPaolo Bonzini             ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
313033f461e0SKevin Wolf                                      &local_qiov);
313133f461e0SKevin Wolf 
313233f461e0SKevin Wolf             qemu_iovec_destroy(&local_qiov);
3133893a8f62SMORITA Kazutaka         } else {
3134893a8f62SMORITA Kazutaka             ret = 0;
3135893a8f62SMORITA Kazutaka         }
3136893a8f62SMORITA Kazutaka 
3137893a8f62SMORITA Kazutaka         /* Reading beyond end of file is supposed to produce zeroes */
3138893a8f62SMORITA Kazutaka         if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3139893a8f62SMORITA Kazutaka             uint64_t offset = MAX(0, total_sectors - sector_num);
3140893a8f62SMORITA Kazutaka             uint64_t bytes = (sector_num + nb_sectors - offset) *
3141893a8f62SMORITA Kazutaka                               BDRV_SECTOR_SIZE;
3142893a8f62SMORITA Kazutaka             qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3143893a8f62SMORITA Kazutaka         }
3144893a8f62SMORITA Kazutaka     }
3145ab185921SStefan Hajnoczi 
3146ab185921SStefan Hajnoczi out:
3147dbffbdcfSStefan Hajnoczi     return ret;
3148da1fa91dSKevin Wolf }
3149da1fa91dSKevin Wolf 
3150fc3959e4SFam Zheng static inline uint64_t bdrv_get_align(BlockDriverState *bs)
3151fc3959e4SFam Zheng {
3152fc3959e4SFam Zheng     /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3153fc3959e4SFam Zheng     return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3154fc3959e4SFam Zheng }
3155fc3959e4SFam Zheng 
3156fc3959e4SFam Zheng static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
3157fc3959e4SFam Zheng                                        int64_t offset, size_t bytes)
3158fc3959e4SFam Zheng {
3159fc3959e4SFam Zheng     int64_t align = bdrv_get_align(bs);
3160fc3959e4SFam Zheng     return !(offset & (align - 1) || (bytes & (align - 1)));
3161fc3959e4SFam Zheng }
3162fc3959e4SFam Zheng 
3163d0c7f642SKevin Wolf /*
3164d0c7f642SKevin Wolf  * Handle a read request in coroutine context
3165d0c7f642SKevin Wolf  */
31661b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
31671b0288aeSKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3168d0c7f642SKevin Wolf     BdrvRequestFlags flags)
3169d0c7f642SKevin Wolf {
3170d0c7f642SKevin Wolf     BlockDriver *drv = bs->drv;
317165afd211SKevin Wolf     BdrvTrackedRequest req;
317265afd211SKevin Wolf 
3173fc3959e4SFam Zheng     uint64_t align = bdrv_get_align(bs);
31741b0288aeSKevin Wolf     uint8_t *head_buf = NULL;
31751b0288aeSKevin Wolf     uint8_t *tail_buf = NULL;
31761b0288aeSKevin Wolf     QEMUIOVector local_qiov;
31771b0288aeSKevin Wolf     bool use_local_qiov = false;
3178d0c7f642SKevin Wolf     int ret;
3179d0c7f642SKevin Wolf 
3180d0c7f642SKevin Wolf     if (!drv) {
3181d0c7f642SKevin Wolf         return -ENOMEDIUM;
3182d0c7f642SKevin Wolf     }
3183b9c64947SMax Reitz 
3184b9c64947SMax Reitz     ret = bdrv_check_byte_request(bs, offset, bytes);
3185b9c64947SMax Reitz     if (ret < 0) {
3186b9c64947SMax Reitz         return ret;
3187d0c7f642SKevin Wolf     }
3188d0c7f642SKevin Wolf 
3189d0c7f642SKevin Wolf     if (bs->copy_on_read) {
3190d0c7f642SKevin Wolf         flags |= BDRV_REQ_COPY_ON_READ;
3191d0c7f642SKevin Wolf     }
3192d0c7f642SKevin Wolf 
3193d0c7f642SKevin Wolf     /* throttling disk I/O */
3194d0c7f642SKevin Wolf     if (bs->io_limits_enabled) {
3195d5103588SKevin Wolf         bdrv_io_limits_intercept(bs, bytes, false);
3196d0c7f642SKevin Wolf     }
3197d0c7f642SKevin Wolf 
31981b0288aeSKevin Wolf     /* Align read if necessary by padding qiov */
31991b0288aeSKevin Wolf     if (offset & (align - 1)) {
32001b0288aeSKevin Wolf         head_buf = qemu_blockalign(bs, align);
32011b0288aeSKevin Wolf         qemu_iovec_init(&local_qiov, qiov->niov + 2);
32021b0288aeSKevin Wolf         qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
32031b0288aeSKevin Wolf         qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
32041b0288aeSKevin Wolf         use_local_qiov = true;
32051b0288aeSKevin Wolf 
32061b0288aeSKevin Wolf         bytes += offset & (align - 1);
32071b0288aeSKevin Wolf         offset = offset & ~(align - 1);
32081b0288aeSKevin Wolf     }
32091b0288aeSKevin Wolf 
32101b0288aeSKevin Wolf     if ((offset + bytes) & (align - 1)) {
32111b0288aeSKevin Wolf         if (!use_local_qiov) {
32121b0288aeSKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov + 1);
32131b0288aeSKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
32141b0288aeSKevin Wolf             use_local_qiov = true;
32151b0288aeSKevin Wolf         }
32161b0288aeSKevin Wolf         tail_buf = qemu_blockalign(bs, align);
32171b0288aeSKevin Wolf         qemu_iovec_add(&local_qiov, tail_buf,
32181b0288aeSKevin Wolf                        align - ((offset + bytes) & (align - 1)));
32191b0288aeSKevin Wolf 
32201b0288aeSKevin Wolf         bytes = ROUND_UP(bytes, align);
32211b0288aeSKevin Wolf     }
32221b0288aeSKevin Wolf 
322365afd211SKevin Wolf     tracked_request_begin(&req, bs, offset, bytes, false);
3224ec746e10SKevin Wolf     ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
32251b0288aeSKevin Wolf                               use_local_qiov ? &local_qiov : qiov,
32261b0288aeSKevin Wolf                               flags);
322765afd211SKevin Wolf     tracked_request_end(&req);
32281b0288aeSKevin Wolf 
32291b0288aeSKevin Wolf     if (use_local_qiov) {
32301b0288aeSKevin Wolf         qemu_iovec_destroy(&local_qiov);
32311b0288aeSKevin Wolf         qemu_vfree(head_buf);
32321b0288aeSKevin Wolf         qemu_vfree(tail_buf);
32331b0288aeSKevin Wolf     }
32341b0288aeSKevin Wolf 
3235d0c7f642SKevin Wolf     return ret;
3236d0c7f642SKevin Wolf }
3237d0c7f642SKevin Wolf 
32381b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
32391b0288aeSKevin Wolf     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
32401b0288aeSKevin Wolf     BdrvRequestFlags flags)
32411b0288aeSKevin Wolf {
324275af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
32431b0288aeSKevin Wolf         return -EINVAL;
32441b0288aeSKevin Wolf     }
32451b0288aeSKevin Wolf 
32461b0288aeSKevin Wolf     return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
32471b0288aeSKevin Wolf                              nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
32481b0288aeSKevin Wolf }
32491b0288aeSKevin Wolf 
3250c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
3251da1fa91dSKevin Wolf     int nb_sectors, QEMUIOVector *qiov)
3252da1fa91dSKevin Wolf {
3253c5fbe571SStefan Hajnoczi     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
3254da1fa91dSKevin Wolf 
3255470c0504SStefan Hajnoczi     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3256470c0504SStefan Hajnoczi }
3257470c0504SStefan Hajnoczi 
3258470c0504SStefan Hajnoczi int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3259470c0504SStefan Hajnoczi     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3260470c0504SStefan Hajnoczi {
3261470c0504SStefan Hajnoczi     trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3262470c0504SStefan Hajnoczi 
3263470c0504SStefan Hajnoczi     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3264470c0504SStefan Hajnoczi                             BDRV_REQ_COPY_ON_READ);
3265c5fbe571SStefan Hajnoczi }
3266c5fbe571SStefan Hajnoczi 
326798764152SPeter Lieven #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
3268c31cb707SPeter Lieven 
3269f08f2ddaSStefan Hajnoczi static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
3270aa7bfbffSPeter Lieven     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
3271f08f2ddaSStefan Hajnoczi {
3272f08f2ddaSStefan Hajnoczi     BlockDriver *drv = bs->drv;
3273f08f2ddaSStefan Hajnoczi     QEMUIOVector qiov;
3274c31cb707SPeter Lieven     struct iovec iov = {0};
3275c31cb707SPeter Lieven     int ret = 0;
3276f08f2ddaSStefan Hajnoczi 
327775af1f34SPeter Lieven     int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
327875af1f34SPeter Lieven                                         BDRV_REQUEST_MAX_SECTORS);
3279621f0589SKevin Wolf 
3280c31cb707SPeter Lieven     while (nb_sectors > 0 && !ret) {
3281c31cb707SPeter Lieven         int num = nb_sectors;
3282c31cb707SPeter Lieven 
3283b8d71c09SPaolo Bonzini         /* Align request.  Block drivers can expect the "bulk" of the request
3284b8d71c09SPaolo Bonzini          * to be aligned.
3285b8d71c09SPaolo Bonzini          */
3286b8d71c09SPaolo Bonzini         if (bs->bl.write_zeroes_alignment
3287b8d71c09SPaolo Bonzini             && num > bs->bl.write_zeroes_alignment) {
3288b8d71c09SPaolo Bonzini             if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3289b8d71c09SPaolo Bonzini                 /* Make a small request up to the first aligned sector.  */
3290c31cb707SPeter Lieven                 num = bs->bl.write_zeroes_alignment;
3291c31cb707SPeter Lieven                 num -= sector_num % bs->bl.write_zeroes_alignment;
3292b8d71c09SPaolo Bonzini             } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3293b8d71c09SPaolo Bonzini                 /* Shorten the request to the last aligned sector.  num cannot
3294b8d71c09SPaolo Bonzini                  * underflow because num > bs->bl.write_zeroes_alignment.
3295b8d71c09SPaolo Bonzini                  */
3296b8d71c09SPaolo Bonzini                 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
3297b8d71c09SPaolo Bonzini             }
3298c31cb707SPeter Lieven         }
3299c31cb707SPeter Lieven 
3300c31cb707SPeter Lieven         /* limit request size */
3301c31cb707SPeter Lieven         if (num > max_write_zeroes) {
3302c31cb707SPeter Lieven             num = max_write_zeroes;
3303c31cb707SPeter Lieven         }
3304c31cb707SPeter Lieven 
3305c31cb707SPeter Lieven         ret = -ENOTSUP;
3306f08f2ddaSStefan Hajnoczi         /* First try the efficient write zeroes operation */
3307f08f2ddaSStefan Hajnoczi         if (drv->bdrv_co_write_zeroes) {
3308c31cb707SPeter Lieven             ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3309f08f2ddaSStefan Hajnoczi         }
3310f08f2ddaSStefan Hajnoczi 
3311c31cb707SPeter Lieven         if (ret == -ENOTSUP) {
3312f08f2ddaSStefan Hajnoczi             /* Fall back to bounce buffer if write zeroes is unsupported */
3313095e4fa4SPeter Lieven             int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
331498764152SPeter Lieven                                             MAX_WRITE_ZEROES_BOUNCE_BUFFER);
3315095e4fa4SPeter Lieven             num = MIN(num, max_xfer_len);
3316c31cb707SPeter Lieven             iov.iov_len = num * BDRV_SECTOR_SIZE;
3317c31cb707SPeter Lieven             if (iov.iov_base == NULL) {
3318857d4f46SKevin Wolf                 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3319857d4f46SKevin Wolf                 if (iov.iov_base == NULL) {
3320857d4f46SKevin Wolf                     ret = -ENOMEM;
3321857d4f46SKevin Wolf                     goto fail;
3322857d4f46SKevin Wolf                 }
3323b8d71c09SPaolo Bonzini                 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
3324c31cb707SPeter Lieven             }
3325f08f2ddaSStefan Hajnoczi             qemu_iovec_init_external(&qiov, &iov, 1);
3326f08f2ddaSStefan Hajnoczi 
3327c31cb707SPeter Lieven             ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
3328b8d71c09SPaolo Bonzini 
3329b8d71c09SPaolo Bonzini             /* Keep bounce buffer around if it is big enough for all
3330b8d71c09SPaolo Bonzini              * all future requests.
3331b8d71c09SPaolo Bonzini              */
3332095e4fa4SPeter Lieven             if (num < max_xfer_len) {
3333b8d71c09SPaolo Bonzini                 qemu_vfree(iov.iov_base);
3334b8d71c09SPaolo Bonzini                 iov.iov_base = NULL;
3335b8d71c09SPaolo Bonzini             }
3336c31cb707SPeter Lieven         }
3337c31cb707SPeter Lieven 
3338c31cb707SPeter Lieven         sector_num += num;
3339c31cb707SPeter Lieven         nb_sectors -= num;
3340c31cb707SPeter Lieven     }
3341f08f2ddaSStefan Hajnoczi 
3342857d4f46SKevin Wolf fail:
3343f08f2ddaSStefan Hajnoczi     qemu_vfree(iov.iov_base);
3344f08f2ddaSStefan Hajnoczi     return ret;
3345f08f2ddaSStefan Hajnoczi }
3346f08f2ddaSStefan Hajnoczi 
3347c5fbe571SStefan Hajnoczi /*
3348b404f720SKevin Wolf  * Forwards an already correctly aligned write request to the BlockDriver.
3349c5fbe571SStefan Hajnoczi  */
3350b404f720SKevin Wolf static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
335165afd211SKevin Wolf     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
335265afd211SKevin Wolf     QEMUIOVector *qiov, int flags)
3353c5fbe571SStefan Hajnoczi {
3354c5fbe571SStefan Hajnoczi     BlockDriver *drv = bs->drv;
335528de2dcdSKevin Wolf     bool waited;
33566b7cb247SStefan Hajnoczi     int ret;
3357da1fa91dSKevin Wolf 
3358b404f720SKevin Wolf     int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3359b404f720SKevin Wolf     unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3360da1fa91dSKevin Wolf 
3361b404f720SKevin Wolf     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3362b404f720SKevin Wolf     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
33638eb029c2SKevin Wolf     assert(!qiov || bytes == qiov->size);
3364cc0681c4SBenoît Canet 
336528de2dcdSKevin Wolf     waited = wait_serialising_requests(req);
336628de2dcdSKevin Wolf     assert(!waited || !req->serialising);
3367af91f9a7SKevin Wolf     assert(req->overlap_offset <= offset);
3368af91f9a7SKevin Wolf     assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
3369244eadefSKevin Wolf 
337065afd211SKevin Wolf     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
3371d616b224SStefan Hajnoczi 
3372465bee1dSPeter Lieven     if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3373465bee1dSPeter Lieven         !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3374465bee1dSPeter Lieven         qemu_iovec_is_zero(qiov)) {
3375465bee1dSPeter Lieven         flags |= BDRV_REQ_ZERO_WRITE;
3376465bee1dSPeter Lieven         if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3377465bee1dSPeter Lieven             flags |= BDRV_REQ_MAY_UNMAP;
3378465bee1dSPeter Lieven         }
3379465bee1dSPeter Lieven     }
3380465bee1dSPeter Lieven 
3381d616b224SStefan Hajnoczi     if (ret < 0) {
3382d616b224SStefan Hajnoczi         /* Do nothing, write notifier decided to fail this request */
3383d616b224SStefan Hajnoczi     } else if (flags & BDRV_REQ_ZERO_WRITE) {
33849e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
3385aa7bfbffSPeter Lieven         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
3386f08f2ddaSStefan Hajnoczi     } else {
33879e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
33886b7cb247SStefan Hajnoczi         ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3389f08f2ddaSStefan Hajnoczi     }
33909e1cb96dSKevin Wolf     BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
33916b7cb247SStefan Hajnoczi 
3392f05fa4adSPaolo Bonzini     if (ret == 0 && !bs->enable_write_cache) {
3393f05fa4adSPaolo Bonzini         ret = bdrv_co_flush(bs);
3394f05fa4adSPaolo Bonzini     }
3395f05fa4adSPaolo Bonzini 
33961755da16SPaolo Bonzini     bdrv_set_dirty(bs, sector_num, nb_sectors);
3397da1fa91dSKevin Wolf 
33985366d0c8SBenoît Canet     block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
33995e5a94b6SBenoît Canet 
3400c0191e76SMax Reitz     if (ret >= 0) {
3401df2a6f29SPaolo Bonzini         bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3402df2a6f29SPaolo Bonzini     }
3403da1fa91dSKevin Wolf 
34046b7cb247SStefan Hajnoczi     return ret;
3405da1fa91dSKevin Wolf }
3406da1fa91dSKevin Wolf 
3407b404f720SKevin Wolf /*
3408b404f720SKevin Wolf  * Handle a write request in coroutine context
3409b404f720SKevin Wolf  */
34106601553eSKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
34116601553eSKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3412b404f720SKevin Wolf     BdrvRequestFlags flags)
3413b404f720SKevin Wolf {
341465afd211SKevin Wolf     BdrvTrackedRequest req;
3415fc3959e4SFam Zheng     uint64_t align = bdrv_get_align(bs);
34163b8242e0SKevin Wolf     uint8_t *head_buf = NULL;
34173b8242e0SKevin Wolf     uint8_t *tail_buf = NULL;
34183b8242e0SKevin Wolf     QEMUIOVector local_qiov;
34193b8242e0SKevin Wolf     bool use_local_qiov = false;
3420b404f720SKevin Wolf     int ret;
3421b404f720SKevin Wolf 
3422b404f720SKevin Wolf     if (!bs->drv) {
3423b404f720SKevin Wolf         return -ENOMEDIUM;
3424b404f720SKevin Wolf     }
3425b404f720SKevin Wolf     if (bs->read_only) {
3426b404f720SKevin Wolf         return -EACCES;
3427b404f720SKevin Wolf     }
3428b9c64947SMax Reitz 
3429b9c64947SMax Reitz     ret = bdrv_check_byte_request(bs, offset, bytes);
3430b9c64947SMax Reitz     if (ret < 0) {
3431b9c64947SMax Reitz         return ret;
3432b404f720SKevin Wolf     }
3433b404f720SKevin Wolf 
3434b404f720SKevin Wolf     /* throttling disk I/O */
3435b404f720SKevin Wolf     if (bs->io_limits_enabled) {
3436d5103588SKevin Wolf         bdrv_io_limits_intercept(bs, bytes, true);
3437b404f720SKevin Wolf     }
3438b404f720SKevin Wolf 
34393b8242e0SKevin Wolf     /*
34403b8242e0SKevin Wolf      * Align write if necessary by performing a read-modify-write cycle.
34413b8242e0SKevin Wolf      * Pad qiov with the read parts and be sure to have a tracked request not
34423b8242e0SKevin Wolf      * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
34433b8242e0SKevin Wolf      */
344465afd211SKevin Wolf     tracked_request_begin(&req, bs, offset, bytes, true);
34453b8242e0SKevin Wolf 
34463b8242e0SKevin Wolf     if (offset & (align - 1)) {
34473b8242e0SKevin Wolf         QEMUIOVector head_qiov;
34483b8242e0SKevin Wolf         struct iovec head_iov;
34493b8242e0SKevin Wolf 
34503b8242e0SKevin Wolf         mark_request_serialising(&req, align);
34513b8242e0SKevin Wolf         wait_serialising_requests(&req);
34523b8242e0SKevin Wolf 
34533b8242e0SKevin Wolf         head_buf = qemu_blockalign(bs, align);
34543b8242e0SKevin Wolf         head_iov = (struct iovec) {
34553b8242e0SKevin Wolf             .iov_base   = head_buf,
34563b8242e0SKevin Wolf             .iov_len    = align,
34573b8242e0SKevin Wolf         };
34583b8242e0SKevin Wolf         qemu_iovec_init_external(&head_qiov, &head_iov, 1);
34593b8242e0SKevin Wolf 
34609e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
34613b8242e0SKevin Wolf         ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
34623b8242e0SKevin Wolf                                   align, &head_qiov, 0);
34633b8242e0SKevin Wolf         if (ret < 0) {
34643b8242e0SKevin Wolf             goto fail;
34653b8242e0SKevin Wolf         }
34669e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
34673b8242e0SKevin Wolf 
34683b8242e0SKevin Wolf         qemu_iovec_init(&local_qiov, qiov->niov + 2);
34693b8242e0SKevin Wolf         qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
34703b8242e0SKevin Wolf         qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
34713b8242e0SKevin Wolf         use_local_qiov = true;
34723b8242e0SKevin Wolf 
34733b8242e0SKevin Wolf         bytes += offset & (align - 1);
34743b8242e0SKevin Wolf         offset = offset & ~(align - 1);
34753b8242e0SKevin Wolf     }
34763b8242e0SKevin Wolf 
34773b8242e0SKevin Wolf     if ((offset + bytes) & (align - 1)) {
34783b8242e0SKevin Wolf         QEMUIOVector tail_qiov;
34793b8242e0SKevin Wolf         struct iovec tail_iov;
34803b8242e0SKevin Wolf         size_t tail_bytes;
348128de2dcdSKevin Wolf         bool waited;
34823b8242e0SKevin Wolf 
34833b8242e0SKevin Wolf         mark_request_serialising(&req, align);
348428de2dcdSKevin Wolf         waited = wait_serialising_requests(&req);
348528de2dcdSKevin Wolf         assert(!waited || !use_local_qiov);
34863b8242e0SKevin Wolf 
34873b8242e0SKevin Wolf         tail_buf = qemu_blockalign(bs, align);
34883b8242e0SKevin Wolf         tail_iov = (struct iovec) {
34893b8242e0SKevin Wolf             .iov_base   = tail_buf,
34903b8242e0SKevin Wolf             .iov_len    = align,
34913b8242e0SKevin Wolf         };
34923b8242e0SKevin Wolf         qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
34933b8242e0SKevin Wolf 
34949e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
34953b8242e0SKevin Wolf         ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
34963b8242e0SKevin Wolf                                   align, &tail_qiov, 0);
34973b8242e0SKevin Wolf         if (ret < 0) {
34983b8242e0SKevin Wolf             goto fail;
34993b8242e0SKevin Wolf         }
35009e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
35013b8242e0SKevin Wolf 
35023b8242e0SKevin Wolf         if (!use_local_qiov) {
35033b8242e0SKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov + 1);
35043b8242e0SKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
35053b8242e0SKevin Wolf             use_local_qiov = true;
35063b8242e0SKevin Wolf         }
35073b8242e0SKevin Wolf 
35083b8242e0SKevin Wolf         tail_bytes = (offset + bytes) & (align - 1);
35093b8242e0SKevin Wolf         qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
35103b8242e0SKevin Wolf 
35113b8242e0SKevin Wolf         bytes = ROUND_UP(bytes, align);
35123b8242e0SKevin Wolf     }
35133b8242e0SKevin Wolf 
3514fc3959e4SFam Zheng     if (use_local_qiov) {
3515fc3959e4SFam Zheng         /* Local buffer may have non-zero data. */
3516fc3959e4SFam Zheng         flags &= ~BDRV_REQ_ZERO_WRITE;
3517fc3959e4SFam Zheng     }
35183b8242e0SKevin Wolf     ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
35193b8242e0SKevin Wolf                                use_local_qiov ? &local_qiov : qiov,
35203b8242e0SKevin Wolf                                flags);
35213b8242e0SKevin Wolf 
35223b8242e0SKevin Wolf fail:
352365afd211SKevin Wolf     tracked_request_end(&req);
3524b404f720SKevin Wolf 
35253b8242e0SKevin Wolf     if (use_local_qiov) {
35263b8242e0SKevin Wolf         qemu_iovec_destroy(&local_qiov);
352799c4a85cSKevin Wolf     }
35283b8242e0SKevin Wolf     qemu_vfree(head_buf);
35293b8242e0SKevin Wolf     qemu_vfree(tail_buf);
35303b8242e0SKevin Wolf 
3531b404f720SKevin Wolf     return ret;
3532b404f720SKevin Wolf }
3533b404f720SKevin Wolf 
35346601553eSKevin Wolf static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
35356601553eSKevin Wolf     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
35366601553eSKevin Wolf     BdrvRequestFlags flags)
35376601553eSKevin Wolf {
353875af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
35396601553eSKevin Wolf         return -EINVAL;
35406601553eSKevin Wolf     }
35416601553eSKevin Wolf 
35426601553eSKevin Wolf     return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
35436601553eSKevin Wolf                               nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
35446601553eSKevin Wolf }
35456601553eSKevin Wolf 
3546c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3547c5fbe571SStefan Hajnoczi     int nb_sectors, QEMUIOVector *qiov)
3548c5fbe571SStefan Hajnoczi {
3549c5fbe571SStefan Hajnoczi     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3550c5fbe571SStefan Hajnoczi 
3551f08f2ddaSStefan Hajnoczi     return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3552f08f2ddaSStefan Hajnoczi }
3553f08f2ddaSStefan Hajnoczi 
3554f08f2ddaSStefan Hajnoczi int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
3555aa7bfbffSPeter Lieven                                       int64_t sector_num, int nb_sectors,
3556aa7bfbffSPeter Lieven                                       BdrvRequestFlags flags)
3557f08f2ddaSStefan Hajnoczi {
3558fc3959e4SFam Zheng     int ret;
3559fc3959e4SFam Zheng 
356094d6ff21SPaolo Bonzini     trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
3561f08f2ddaSStefan Hajnoczi 
3562d32f35cbSPeter Lieven     if (!(bs->open_flags & BDRV_O_UNMAP)) {
3563d32f35cbSPeter Lieven         flags &= ~BDRV_REQ_MAY_UNMAP;
3564d32f35cbSPeter Lieven     }
3565fc3959e4SFam Zheng     if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
3566fc3959e4SFam Zheng                             nb_sectors << BDRV_SECTOR_BITS)) {
3567fc3959e4SFam Zheng         ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3568aa7bfbffSPeter Lieven                                 BDRV_REQ_ZERO_WRITE | flags);
3569fc3959e4SFam Zheng     } else {
3570fc3959e4SFam Zheng         uint8_t *buf;
3571fc3959e4SFam Zheng         QEMUIOVector local_qiov;
3572fc3959e4SFam Zheng         size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
3573fc3959e4SFam Zheng 
3574fc3959e4SFam Zheng         buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
3575fc3959e4SFam Zheng         memset(buf, 0, bytes);
3576fc3959e4SFam Zheng         qemu_iovec_init(&local_qiov, 1);
3577fc3959e4SFam Zheng         qemu_iovec_add(&local_qiov, buf, bytes);
3578fc3959e4SFam Zheng 
3579fc3959e4SFam Zheng         ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
3580fc3959e4SFam Zheng                                 BDRV_REQ_ZERO_WRITE | flags);
3581fc3959e4SFam Zheng         qemu_vfree(buf);
3582fc3959e4SFam Zheng     }
3583fc3959e4SFam Zheng     return ret;
3584c5fbe571SStefan Hajnoczi }
3585c5fbe571SStefan Hajnoczi 
358683f64091Sbellard /**
358783f64091Sbellard  * Truncate file to 'offset' bytes (needed only for file protocols)
358883f64091Sbellard  */
358983f64091Sbellard int bdrv_truncate(BlockDriverState *bs, int64_t offset)
359083f64091Sbellard {
359183f64091Sbellard     BlockDriver *drv = bs->drv;
359251762288SStefan Hajnoczi     int ret;
359383f64091Sbellard     if (!drv)
359419cb3738Sbellard         return -ENOMEDIUM;
359583f64091Sbellard     if (!drv->bdrv_truncate)
359683f64091Sbellard         return -ENOTSUP;
359759f2689dSNaphtali Sprei     if (bs->read_only)
359859f2689dSNaphtali Sprei         return -EACCES;
35999c75e168SJeff Cody 
360051762288SStefan Hajnoczi     ret = drv->bdrv_truncate(bs, offset);
360151762288SStefan Hajnoczi     if (ret == 0) {
360251762288SStefan Hajnoczi         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3603a7f53e26SMarkus Armbruster         if (bs->blk) {
3604a7f53e26SMarkus Armbruster             blk_dev_resize_cb(bs->blk);
3605a7f53e26SMarkus Armbruster         }
360651762288SStefan Hajnoczi     }
360751762288SStefan Hajnoczi     return ret;
360883f64091Sbellard }
360983f64091Sbellard 
361083f64091Sbellard /**
36114a1d5e1fSFam Zheng  * Length of a allocated file in bytes. Sparse files are counted by actual
36124a1d5e1fSFam Zheng  * allocated space. Return < 0 if error or unknown.
36134a1d5e1fSFam Zheng  */
36144a1d5e1fSFam Zheng int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
36154a1d5e1fSFam Zheng {
36164a1d5e1fSFam Zheng     BlockDriver *drv = bs->drv;
36174a1d5e1fSFam Zheng     if (!drv) {
36184a1d5e1fSFam Zheng         return -ENOMEDIUM;
36194a1d5e1fSFam Zheng     }
36204a1d5e1fSFam Zheng     if (drv->bdrv_get_allocated_file_size) {
36214a1d5e1fSFam Zheng         return drv->bdrv_get_allocated_file_size(bs);
36224a1d5e1fSFam Zheng     }
36234a1d5e1fSFam Zheng     if (bs->file) {
36244a1d5e1fSFam Zheng         return bdrv_get_allocated_file_size(bs->file);
36254a1d5e1fSFam Zheng     }
36264a1d5e1fSFam Zheng     return -ENOTSUP;
36274a1d5e1fSFam Zheng }
36284a1d5e1fSFam Zheng 
36294a1d5e1fSFam Zheng /**
363065a9bb25SMarkus Armbruster  * Return number of sectors on success, -errno on error.
363183f64091Sbellard  */
363265a9bb25SMarkus Armbruster int64_t bdrv_nb_sectors(BlockDriverState *bs)
363383f64091Sbellard {
363483f64091Sbellard     BlockDriver *drv = bs->drv;
363565a9bb25SMarkus Armbruster 
363683f64091Sbellard     if (!drv)
363719cb3738Sbellard         return -ENOMEDIUM;
363851762288SStefan Hajnoczi 
3639b94a2610SKevin Wolf     if (drv->has_variable_length) {
3640b94a2610SKevin Wolf         int ret = refresh_total_sectors(bs, bs->total_sectors);
3641b94a2610SKevin Wolf         if (ret < 0) {
3642b94a2610SKevin Wolf             return ret;
3643fc01f7e7Sbellard         }
364446a4e4e6SStefan Hajnoczi     }
364565a9bb25SMarkus Armbruster     return bs->total_sectors;
364665a9bb25SMarkus Armbruster }
364765a9bb25SMarkus Armbruster 
364865a9bb25SMarkus Armbruster /**
364965a9bb25SMarkus Armbruster  * Return length in bytes on success, -errno on error.
365065a9bb25SMarkus Armbruster  * The length is always a multiple of BDRV_SECTOR_SIZE.
365165a9bb25SMarkus Armbruster  */
365265a9bb25SMarkus Armbruster int64_t bdrv_getlength(BlockDriverState *bs)
365365a9bb25SMarkus Armbruster {
365465a9bb25SMarkus Armbruster     int64_t ret = bdrv_nb_sectors(bs);
365565a9bb25SMarkus Armbruster 
365665a9bb25SMarkus Armbruster     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
365746a4e4e6SStefan Hajnoczi }
3658fc01f7e7Sbellard 
365919cb3738Sbellard /* return 0 as number of sectors if no device present or error */
366096b8f136Sths void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
3661fc01f7e7Sbellard {
366265a9bb25SMarkus Armbruster     int64_t nb_sectors = bdrv_nb_sectors(bs);
366365a9bb25SMarkus Armbruster 
366465a9bb25SMarkus Armbruster     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
3665fc01f7e7Sbellard }
3666cf98951bSbellard 
3667ff06f5f3SPaolo Bonzini void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3668ff06f5f3SPaolo Bonzini                        BlockdevOnError on_write_error)
3669abd7f68dSMarkus Armbruster {
3670abd7f68dSMarkus Armbruster     bs->on_read_error = on_read_error;
3671abd7f68dSMarkus Armbruster     bs->on_write_error = on_write_error;
3672abd7f68dSMarkus Armbruster }
3673abd7f68dSMarkus Armbruster 
36741ceee0d5SPaolo Bonzini BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
3675abd7f68dSMarkus Armbruster {
3676abd7f68dSMarkus Armbruster     return is_read ? bs->on_read_error : bs->on_write_error;
3677abd7f68dSMarkus Armbruster }
3678abd7f68dSMarkus Armbruster 
36793e1caa5fSPaolo Bonzini BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
36803e1caa5fSPaolo Bonzini {
36813e1caa5fSPaolo Bonzini     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
36823e1caa5fSPaolo Bonzini 
36833e1caa5fSPaolo Bonzini     switch (on_err) {
36843e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_ENOSPC:
3685a589569fSWenchao Xia         return (error == ENOSPC) ?
3686a589569fSWenchao Xia                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
36873e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_STOP:
3688a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_STOP;
36893e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_REPORT:
3690a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_REPORT;
36913e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_IGNORE:
3692a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_IGNORE;
36933e1caa5fSPaolo Bonzini     default:
36943e1caa5fSPaolo Bonzini         abort();
36953e1caa5fSPaolo Bonzini     }
36963e1caa5fSPaolo Bonzini }
36973e1caa5fSPaolo Bonzini 
3698c7c2ff0cSLuiz Capitulino static void send_qmp_error_event(BlockDriverState *bs,
3699c7c2ff0cSLuiz Capitulino                                  BlockErrorAction action,
3700c7c2ff0cSLuiz Capitulino                                  bool is_read, int error)
3701c7c2ff0cSLuiz Capitulino {
3702573742a5SPeter Maydell     IoOperationType optype;
3703c7c2ff0cSLuiz Capitulino 
3704573742a5SPeter Maydell     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3705573742a5SPeter Maydell     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
3706c7c2ff0cSLuiz Capitulino                                    bdrv_iostatus_is_enabled(bs),
3707624ff573SLuiz Capitulino                                    error == ENOSPC, strerror(error),
3708624ff573SLuiz Capitulino                                    &error_abort);
3709c7c2ff0cSLuiz Capitulino }
3710c7c2ff0cSLuiz Capitulino 
37113e1caa5fSPaolo Bonzini /* This is done by device models because, while the block layer knows
37123e1caa5fSPaolo Bonzini  * about the error, it does not know whether an operation comes from
37133e1caa5fSPaolo Bonzini  * the device or the block layer (from a job, for example).
37143e1caa5fSPaolo Bonzini  */
37153e1caa5fSPaolo Bonzini void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
37163e1caa5fSPaolo Bonzini                        bool is_read, int error)
37173e1caa5fSPaolo Bonzini {
37183e1caa5fSPaolo Bonzini     assert(error >= 0);
37192bd3bce8SPaolo Bonzini 
3720a589569fSWenchao Xia     if (action == BLOCK_ERROR_ACTION_STOP) {
37212bd3bce8SPaolo Bonzini         /* First set the iostatus, so that "info block" returns an iostatus
37222bd3bce8SPaolo Bonzini          * that matches the events raised so far (an additional error iostatus
37232bd3bce8SPaolo Bonzini          * is fine, but not a lost one).
37242bd3bce8SPaolo Bonzini          */
37253e1caa5fSPaolo Bonzini         bdrv_iostatus_set_err(bs, error);
37262bd3bce8SPaolo Bonzini 
37272bd3bce8SPaolo Bonzini         /* Then raise the request to stop the VM and the event.
37282bd3bce8SPaolo Bonzini          * qemu_system_vmstop_request_prepare has two effects.  First,
37292bd3bce8SPaolo Bonzini          * it ensures that the STOP event always comes after the
37302bd3bce8SPaolo Bonzini          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
37312bd3bce8SPaolo Bonzini          * can observe the STOP event and do a "cont" before the STOP
37322bd3bce8SPaolo Bonzini          * event is issued, the VM will not stop.  In this case, vm_start()
37332bd3bce8SPaolo Bonzini          * also ensures that the STOP/RESUME pair of events is emitted.
37342bd3bce8SPaolo Bonzini          */
37352bd3bce8SPaolo Bonzini         qemu_system_vmstop_request_prepare();
3736c7c2ff0cSLuiz Capitulino         send_qmp_error_event(bs, action, is_read, error);
37372bd3bce8SPaolo Bonzini         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
37382bd3bce8SPaolo Bonzini     } else {
3739c7c2ff0cSLuiz Capitulino         send_qmp_error_event(bs, action, is_read, error);
37403e1caa5fSPaolo Bonzini     }
37413e1caa5fSPaolo Bonzini }
37423e1caa5fSPaolo Bonzini 
3743b338082bSbellard int bdrv_is_read_only(BlockDriverState *bs)
3744b338082bSbellard {
3745b338082bSbellard     return bs->read_only;
3746b338082bSbellard }
3747b338082bSbellard 
3748985a03b0Sths int bdrv_is_sg(BlockDriverState *bs)
3749985a03b0Sths {
3750985a03b0Sths     return bs->sg;
3751985a03b0Sths }
3752985a03b0Sths 
3753e900a7b7SChristoph Hellwig int bdrv_enable_write_cache(BlockDriverState *bs)
3754e900a7b7SChristoph Hellwig {
3755e900a7b7SChristoph Hellwig     return bs->enable_write_cache;
3756e900a7b7SChristoph Hellwig }
3757e900a7b7SChristoph Hellwig 
3758425b0148SPaolo Bonzini void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3759425b0148SPaolo Bonzini {
3760425b0148SPaolo Bonzini     bs->enable_write_cache = wce;
376155b110f2SJeff Cody 
376255b110f2SJeff Cody     /* so a reopen() will preserve wce */
376355b110f2SJeff Cody     if (wce) {
376455b110f2SJeff Cody         bs->open_flags |= BDRV_O_CACHE_WB;
376555b110f2SJeff Cody     } else {
376655b110f2SJeff Cody         bs->open_flags &= ~BDRV_O_CACHE_WB;
376755b110f2SJeff Cody     }
3768425b0148SPaolo Bonzini }
3769425b0148SPaolo Bonzini 
3770ea2384d3Sbellard int bdrv_is_encrypted(BlockDriverState *bs)
3771ea2384d3Sbellard {
3772ea2384d3Sbellard     if (bs->backing_hd && bs->backing_hd->encrypted)
3773ea2384d3Sbellard         return 1;
3774ea2384d3Sbellard     return bs->encrypted;
3775ea2384d3Sbellard }
3776ea2384d3Sbellard 
3777c0f4ce77Saliguori int bdrv_key_required(BlockDriverState *bs)
3778c0f4ce77Saliguori {
3779c0f4ce77Saliguori     BlockDriverState *backing_hd = bs->backing_hd;
3780c0f4ce77Saliguori 
3781c0f4ce77Saliguori     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3782c0f4ce77Saliguori         return 1;
3783c0f4ce77Saliguori     return (bs->encrypted && !bs->valid_key);
3784c0f4ce77Saliguori }
3785c0f4ce77Saliguori 
3786ea2384d3Sbellard int bdrv_set_key(BlockDriverState *bs, const char *key)
3787ea2384d3Sbellard {
3788ea2384d3Sbellard     int ret;
3789ea2384d3Sbellard     if (bs->backing_hd && bs->backing_hd->encrypted) {
3790ea2384d3Sbellard         ret = bdrv_set_key(bs->backing_hd, key);
3791ea2384d3Sbellard         if (ret < 0)
3792ea2384d3Sbellard             return ret;
3793ea2384d3Sbellard         if (!bs->encrypted)
3794ea2384d3Sbellard             return 0;
3795ea2384d3Sbellard     }
3796fd04a2aeSShahar Havivi     if (!bs->encrypted) {
3797fd04a2aeSShahar Havivi         return -EINVAL;
3798fd04a2aeSShahar Havivi     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3799fd04a2aeSShahar Havivi         return -ENOMEDIUM;
3800fd04a2aeSShahar Havivi     }
3801c0f4ce77Saliguori     ret = bs->drv->bdrv_set_key(bs, key);
3802bb5fc20fSaliguori     if (ret < 0) {
3803bb5fc20fSaliguori         bs->valid_key = 0;
3804bb5fc20fSaliguori     } else if (!bs->valid_key) {
3805bb5fc20fSaliguori         bs->valid_key = 1;
3806a7f53e26SMarkus Armbruster         if (bs->blk) {
3807bb5fc20fSaliguori             /* call the change callback now, we skipped it on open */
3808a7f53e26SMarkus Armbruster             blk_dev_change_media_cb(bs->blk, true);
3809a7f53e26SMarkus Armbruster         }
3810bb5fc20fSaliguori     }
3811c0f4ce77Saliguori     return ret;
3812ea2384d3Sbellard }
3813ea2384d3Sbellard 
38144d2855a3SMarkus Armbruster /*
38154d2855a3SMarkus Armbruster  * Provide an encryption key for @bs.
38164d2855a3SMarkus Armbruster  * If @key is non-null:
38174d2855a3SMarkus Armbruster  *     If @bs is not encrypted, fail.
38184d2855a3SMarkus Armbruster  *     Else if the key is invalid, fail.
38194d2855a3SMarkus Armbruster  *     Else set @bs's key to @key, replacing the existing key, if any.
38204d2855a3SMarkus Armbruster  * If @key is null:
38214d2855a3SMarkus Armbruster  *     If @bs is encrypted and still lacks a key, fail.
38224d2855a3SMarkus Armbruster  *     Else do nothing.
38234d2855a3SMarkus Armbruster  * On failure, store an error object through @errp if non-null.
38244d2855a3SMarkus Armbruster  */
38254d2855a3SMarkus Armbruster void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
38264d2855a3SMarkus Armbruster {
38274d2855a3SMarkus Armbruster     if (key) {
38284d2855a3SMarkus Armbruster         if (!bdrv_is_encrypted(bs)) {
382981e5f78aSAlberto Garcia             error_setg(errp, "Node '%s' is not encrypted",
383081e5f78aSAlberto Garcia                       bdrv_get_device_or_node_name(bs));
38314d2855a3SMarkus Armbruster         } else if (bdrv_set_key(bs, key) < 0) {
38324d2855a3SMarkus Armbruster             error_set(errp, QERR_INVALID_PASSWORD);
38334d2855a3SMarkus Armbruster         }
38344d2855a3SMarkus Armbruster     } else {
38354d2855a3SMarkus Armbruster         if (bdrv_key_required(bs)) {
3836b1ca6391SMarkus Armbruster             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3837b1ca6391SMarkus Armbruster                       "'%s' (%s) is encrypted",
383881e5f78aSAlberto Garcia                       bdrv_get_device_or_node_name(bs),
38394d2855a3SMarkus Armbruster                       bdrv_get_encrypted_filename(bs));
38404d2855a3SMarkus Armbruster         }
38414d2855a3SMarkus Armbruster     }
38424d2855a3SMarkus Armbruster }
38434d2855a3SMarkus Armbruster 
3844f8d6bba1SMarkus Armbruster const char *bdrv_get_format_name(BlockDriverState *bs)
3845ea2384d3Sbellard {
3846f8d6bba1SMarkus Armbruster     return bs->drv ? bs->drv->format_name : NULL;
3847ea2384d3Sbellard }
3848ea2384d3Sbellard 
3849ada42401SStefan Hajnoczi static int qsort_strcmp(const void *a, const void *b)
3850ada42401SStefan Hajnoczi {
3851ada42401SStefan Hajnoczi     return strcmp(a, b);
3852ada42401SStefan Hajnoczi }
3853ada42401SStefan Hajnoczi 
3854ea2384d3Sbellard void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
3855ea2384d3Sbellard                          void *opaque)
3856ea2384d3Sbellard {
3857ea2384d3Sbellard     BlockDriver *drv;
3858e855e4fbSJeff Cody     int count = 0;
3859ada42401SStefan Hajnoczi     int i;
3860e855e4fbSJeff Cody     const char **formats = NULL;
3861ea2384d3Sbellard 
38628a22f02aSStefan Hajnoczi     QLIST_FOREACH(drv, &bdrv_drivers, list) {
3863e855e4fbSJeff Cody         if (drv->format_name) {
3864e855e4fbSJeff Cody             bool found = false;
3865e855e4fbSJeff Cody             int i = count;
3866e855e4fbSJeff Cody             while (formats && i && !found) {
3867e855e4fbSJeff Cody                 found = !strcmp(formats[--i], drv->format_name);
3868e855e4fbSJeff Cody             }
3869e855e4fbSJeff Cody 
3870e855e4fbSJeff Cody             if (!found) {
38715839e53bSMarkus Armbruster                 formats = g_renew(const char *, formats, count + 1);
3872e855e4fbSJeff Cody                 formats[count++] = drv->format_name;
3873ea2384d3Sbellard             }
3874ea2384d3Sbellard         }
3875e855e4fbSJeff Cody     }
3876ada42401SStefan Hajnoczi 
3877ada42401SStefan Hajnoczi     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3878ada42401SStefan Hajnoczi 
3879ada42401SStefan Hajnoczi     for (i = 0; i < count; i++) {
3880ada42401SStefan Hajnoczi         it(opaque, formats[i]);
3881ada42401SStefan Hajnoczi     }
3882ada42401SStefan Hajnoczi 
3883e855e4fbSJeff Cody     g_free(formats);
3884e855e4fbSJeff Cody }
3885ea2384d3Sbellard 
3886dc364f4cSBenoît Canet /* This function is to find a node in the bs graph */
3887dc364f4cSBenoît Canet BlockDriverState *bdrv_find_node(const char *node_name)
3888dc364f4cSBenoît Canet {
3889dc364f4cSBenoît Canet     BlockDriverState *bs;
3890dc364f4cSBenoît Canet 
3891dc364f4cSBenoît Canet     assert(node_name);
3892dc364f4cSBenoît Canet 
3893dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3894dc364f4cSBenoît Canet         if (!strcmp(node_name, bs->node_name)) {
3895dc364f4cSBenoît Canet             return bs;
3896dc364f4cSBenoît Canet         }
3897dc364f4cSBenoît Canet     }
3898dc364f4cSBenoît Canet     return NULL;
3899dc364f4cSBenoît Canet }
3900dc364f4cSBenoît Canet 
3901c13163fbSBenoît Canet /* Put this QMP function here so it can access the static graph_bdrv_states. */
3902d5a8ee60SAlberto Garcia BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
3903c13163fbSBenoît Canet {
3904c13163fbSBenoît Canet     BlockDeviceInfoList *list, *entry;
3905c13163fbSBenoît Canet     BlockDriverState *bs;
3906c13163fbSBenoît Canet 
3907c13163fbSBenoît Canet     list = NULL;
3908c13163fbSBenoît Canet     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3909d5a8ee60SAlberto Garcia         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
3910d5a8ee60SAlberto Garcia         if (!info) {
3911d5a8ee60SAlberto Garcia             qapi_free_BlockDeviceInfoList(list);
3912d5a8ee60SAlberto Garcia             return NULL;
3913d5a8ee60SAlberto Garcia         }
3914c13163fbSBenoît Canet         entry = g_malloc0(sizeof(*entry));
3915d5a8ee60SAlberto Garcia         entry->value = info;
3916c13163fbSBenoît Canet         entry->next = list;
3917c13163fbSBenoît Canet         list = entry;
3918c13163fbSBenoît Canet     }
3919c13163fbSBenoît Canet 
3920c13163fbSBenoît Canet     return list;
3921c13163fbSBenoît Canet }
3922c13163fbSBenoît Canet 
392312d3ba82SBenoît Canet BlockDriverState *bdrv_lookup_bs(const char *device,
392412d3ba82SBenoît Canet                                  const char *node_name,
392512d3ba82SBenoît Canet                                  Error **errp)
392612d3ba82SBenoît Canet {
39277f06d47eSMarkus Armbruster     BlockBackend *blk;
39287f06d47eSMarkus Armbruster     BlockDriverState *bs;
392912d3ba82SBenoît Canet 
393012d3ba82SBenoît Canet     if (device) {
39317f06d47eSMarkus Armbruster         blk = blk_by_name(device);
393212d3ba82SBenoît Canet 
39337f06d47eSMarkus Armbruster         if (blk) {
39347f06d47eSMarkus Armbruster             return blk_bs(blk);
393512d3ba82SBenoît Canet         }
3936dd67fa50SBenoît Canet     }
393712d3ba82SBenoît Canet 
3938dd67fa50SBenoît Canet     if (node_name) {
393912d3ba82SBenoît Canet         bs = bdrv_find_node(node_name);
394012d3ba82SBenoît Canet 
3941dd67fa50SBenoît Canet         if (bs) {
3942dd67fa50SBenoît Canet             return bs;
3943dd67fa50SBenoît Canet         }
394412d3ba82SBenoît Canet     }
394512d3ba82SBenoît Canet 
3946dd67fa50SBenoît Canet     error_setg(errp, "Cannot find device=%s nor node_name=%s",
3947dd67fa50SBenoît Canet                      device ? device : "",
3948dd67fa50SBenoît Canet                      node_name ? node_name : "");
3949dd67fa50SBenoît Canet     return NULL;
395012d3ba82SBenoît Canet }
395112d3ba82SBenoît Canet 
39525a6684d2SJeff Cody /* If 'base' is in the same chain as 'top', return true. Otherwise,
39535a6684d2SJeff Cody  * return false.  If either argument is NULL, return false. */
39545a6684d2SJeff Cody bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
39555a6684d2SJeff Cody {
39565a6684d2SJeff Cody     while (top && top != base) {
39575a6684d2SJeff Cody         top = top->backing_hd;
39585a6684d2SJeff Cody     }
39595a6684d2SJeff Cody 
39605a6684d2SJeff Cody     return top != NULL;
39615a6684d2SJeff Cody }
39625a6684d2SJeff Cody 
396304df765aSFam Zheng BlockDriverState *bdrv_next_node(BlockDriverState *bs)
396404df765aSFam Zheng {
396504df765aSFam Zheng     if (!bs) {
396604df765aSFam Zheng         return QTAILQ_FIRST(&graph_bdrv_states);
396704df765aSFam Zheng     }
396804df765aSFam Zheng     return QTAILQ_NEXT(bs, node_list);
396904df765aSFam Zheng }
397004df765aSFam Zheng 
39712f399b0aSMarkus Armbruster BlockDriverState *bdrv_next(BlockDriverState *bs)
39722f399b0aSMarkus Armbruster {
39732f399b0aSMarkus Armbruster     if (!bs) {
39742f399b0aSMarkus Armbruster         return QTAILQ_FIRST(&bdrv_states);
39752f399b0aSMarkus Armbruster     }
3976dc364f4cSBenoît Canet     return QTAILQ_NEXT(bs, device_list);
39772f399b0aSMarkus Armbruster }
39782f399b0aSMarkus Armbruster 
397920a9e77dSFam Zheng const char *bdrv_get_node_name(const BlockDriverState *bs)
398020a9e77dSFam Zheng {
398120a9e77dSFam Zheng     return bs->node_name;
398220a9e77dSFam Zheng }
398320a9e77dSFam Zheng 
39847f06d47eSMarkus Armbruster /* TODO check what callers really want: bs->node_name or blk_name() */
3985bfb197e0SMarkus Armbruster const char *bdrv_get_device_name(const BlockDriverState *bs)
3986ea2384d3Sbellard {
3987bfb197e0SMarkus Armbruster     return bs->blk ? blk_name(bs->blk) : "";
3988ea2384d3Sbellard }
3989ea2384d3Sbellard 
39909b2aa84fSAlberto Garcia /* This can be used to identify nodes that might not have a device
39919b2aa84fSAlberto Garcia  * name associated. Since node and device names live in the same
39929b2aa84fSAlberto Garcia  * namespace, the result is unambiguous. The exception is if both are
39939b2aa84fSAlberto Garcia  * absent, then this returns an empty (non-null) string. */
39949b2aa84fSAlberto Garcia const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
39959b2aa84fSAlberto Garcia {
39969b2aa84fSAlberto Garcia     return bs->blk ? blk_name(bs->blk) : bs->node_name;
39979b2aa84fSAlberto Garcia }
39989b2aa84fSAlberto Garcia 
3999c8433287SMarkus Armbruster int bdrv_get_flags(BlockDriverState *bs)
4000c8433287SMarkus Armbruster {
4001c8433287SMarkus Armbruster     return bs->open_flags;
4002c8433287SMarkus Armbruster }
4003c8433287SMarkus Armbruster 
4004f0f0fdfeSKevin Wolf int bdrv_flush_all(void)
4005c6ca28d6Saliguori {
4006c6ca28d6Saliguori     BlockDriverState *bs;
4007f0f0fdfeSKevin Wolf     int result = 0;
4008c6ca28d6Saliguori 
4009dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4010ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
4011ed78cda3SStefan Hajnoczi         int ret;
4012ed78cda3SStefan Hajnoczi 
4013ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
4014ed78cda3SStefan Hajnoczi         ret = bdrv_flush(bs);
4015f0f0fdfeSKevin Wolf         if (ret < 0 && !result) {
4016f0f0fdfeSKevin Wolf             result = ret;
4017c6ca28d6Saliguori         }
4018ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
40191b7bdbc1SStefan Hajnoczi     }
4020c6ca28d6Saliguori 
4021f0f0fdfeSKevin Wolf     return result;
4022f0f0fdfeSKevin Wolf }
4023f0f0fdfeSKevin Wolf 
40243ac21627SPeter Lieven int bdrv_has_zero_init_1(BlockDriverState *bs)
40253ac21627SPeter Lieven {
40263ac21627SPeter Lieven     return 1;
40273ac21627SPeter Lieven }
40283ac21627SPeter Lieven 
4029f2feebbdSKevin Wolf int bdrv_has_zero_init(BlockDriverState *bs)
4030f2feebbdSKevin Wolf {
4031f2feebbdSKevin Wolf     assert(bs->drv);
4032f2feebbdSKevin Wolf 
403311212d8fSPaolo Bonzini     /* If BS is a copy on write image, it is initialized to
403411212d8fSPaolo Bonzini        the contents of the base image, which may not be zeroes.  */
403511212d8fSPaolo Bonzini     if (bs->backing_hd) {
403611212d8fSPaolo Bonzini         return 0;
403711212d8fSPaolo Bonzini     }
4038336c1c12SKevin Wolf     if (bs->drv->bdrv_has_zero_init) {
4039336c1c12SKevin Wolf         return bs->drv->bdrv_has_zero_init(bs);
4040f2feebbdSKevin Wolf     }
4041f2feebbdSKevin Wolf 
40423ac21627SPeter Lieven     /* safe default */
40433ac21627SPeter Lieven     return 0;
4044f2feebbdSKevin Wolf }
4045f2feebbdSKevin Wolf 
40464ce78691SPeter Lieven bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
40474ce78691SPeter Lieven {
40484ce78691SPeter Lieven     BlockDriverInfo bdi;
40494ce78691SPeter Lieven 
40504ce78691SPeter Lieven     if (bs->backing_hd) {
40514ce78691SPeter Lieven         return false;
40524ce78691SPeter Lieven     }
40534ce78691SPeter Lieven 
40544ce78691SPeter Lieven     if (bdrv_get_info(bs, &bdi) == 0) {
40554ce78691SPeter Lieven         return bdi.unallocated_blocks_are_zero;
40564ce78691SPeter Lieven     }
40574ce78691SPeter Lieven 
40584ce78691SPeter Lieven     return false;
40594ce78691SPeter Lieven }
40604ce78691SPeter Lieven 
40614ce78691SPeter Lieven bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
40624ce78691SPeter Lieven {
40634ce78691SPeter Lieven     BlockDriverInfo bdi;
40644ce78691SPeter Lieven 
40654ce78691SPeter Lieven     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
40664ce78691SPeter Lieven         return false;
40674ce78691SPeter Lieven     }
40684ce78691SPeter Lieven 
40694ce78691SPeter Lieven     if (bdrv_get_info(bs, &bdi) == 0) {
40704ce78691SPeter Lieven         return bdi.can_write_zeroes_with_unmap;
40714ce78691SPeter Lieven     }
40724ce78691SPeter Lieven 
40734ce78691SPeter Lieven     return false;
40744ce78691SPeter Lieven }
40754ce78691SPeter Lieven 
4076b6b8a333SPaolo Bonzini typedef struct BdrvCoGetBlockStatusData {
4077376ae3f1SStefan Hajnoczi     BlockDriverState *bs;
4078b35b2bbaSMiroslav Rezanina     BlockDriverState *base;
4079376ae3f1SStefan Hajnoczi     int64_t sector_num;
4080376ae3f1SStefan Hajnoczi     int nb_sectors;
4081376ae3f1SStefan Hajnoczi     int *pnum;
4082b6b8a333SPaolo Bonzini     int64_t ret;
4083376ae3f1SStefan Hajnoczi     bool done;
4084b6b8a333SPaolo Bonzini } BdrvCoGetBlockStatusData;
4085376ae3f1SStefan Hajnoczi 
4086f58c7b35Sths /*
4087705be728SFam Zheng  * Returns the allocation status of the specified sectors.
4088705be728SFam Zheng  * Drivers not implementing the functionality are assumed to not support
4089705be728SFam Zheng  * backing files, hence all their sectors are reported as allocated.
4090f58c7b35Sths  *
4091bd9533e3SStefan Hajnoczi  * If 'sector_num' is beyond the end of the disk image the return value is 0
4092bd9533e3SStefan Hajnoczi  * and 'pnum' is set to 0.
4093bd9533e3SStefan Hajnoczi  *
4094f58c7b35Sths  * 'pnum' is set to the number of sectors (including and immediately following
4095f58c7b35Sths  * the specified sector) that are known to be in the same
4096f58c7b35Sths  * allocated/unallocated state.
4097f58c7b35Sths  *
4098bd9533e3SStefan Hajnoczi  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
4099bd9533e3SStefan Hajnoczi  * beyond the end of the disk image it will be clamped.
4100f58c7b35Sths  */
4101b6b8a333SPaolo Bonzini static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4102bdad13b9SPaolo Bonzini                                                      int64_t sector_num,
4103060f51c9SStefan Hajnoczi                                                      int nb_sectors, int *pnum)
4104f58c7b35Sths {
410530a7f2fcSMarkus Armbruster     int64_t total_sectors;
4106f58c7b35Sths     int64_t n;
41075daa74a6SPaolo Bonzini     int64_t ret, ret2;
4108bd9533e3SStefan Hajnoczi 
410930a7f2fcSMarkus Armbruster     total_sectors = bdrv_nb_sectors(bs);
411030a7f2fcSMarkus Armbruster     if (total_sectors < 0) {
411130a7f2fcSMarkus Armbruster         return total_sectors;
4112617ccb46SPaolo Bonzini     }
4113617ccb46SPaolo Bonzini 
411430a7f2fcSMarkus Armbruster     if (sector_num >= total_sectors) {
41156aebab14SStefan Hajnoczi         *pnum = 0;
41166aebab14SStefan Hajnoczi         return 0;
41176aebab14SStefan Hajnoczi     }
4118bd9533e3SStefan Hajnoczi 
411930a7f2fcSMarkus Armbruster     n = total_sectors - sector_num;
4120bd9533e3SStefan Hajnoczi     if (n < nb_sectors) {
4121bd9533e3SStefan Hajnoczi         nb_sectors = n;
4122bd9533e3SStefan Hajnoczi     }
4123bd9533e3SStefan Hajnoczi 
4124b6b8a333SPaolo Bonzini     if (!bs->drv->bdrv_co_get_block_status) {
4125bd9533e3SStefan Hajnoczi         *pnum = nb_sectors;
4126e88ae226SKevin Wolf         ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
4127918e92d7SPaolo Bonzini         if (bs->drv->protocol_name) {
4128918e92d7SPaolo Bonzini             ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4129918e92d7SPaolo Bonzini         }
4130918e92d7SPaolo Bonzini         return ret;
41316aebab14SStefan Hajnoczi     }
41326aebab14SStefan Hajnoczi 
4133415b5b01SPaolo Bonzini     ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4134415b5b01SPaolo Bonzini     if (ret < 0) {
41353e0a233dSPeter Lieven         *pnum = 0;
4136415b5b01SPaolo Bonzini         return ret;
4137415b5b01SPaolo Bonzini     }
4138415b5b01SPaolo Bonzini 
413992bc50a5SPeter Lieven     if (ret & BDRV_BLOCK_RAW) {
414092bc50a5SPeter Lieven         assert(ret & BDRV_BLOCK_OFFSET_VALID);
414192bc50a5SPeter Lieven         return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
414292bc50a5SPeter Lieven                                      *pnum, pnum);
414392bc50a5SPeter Lieven     }
414492bc50a5SPeter Lieven 
4145e88ae226SKevin Wolf     if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4146e88ae226SKevin Wolf         ret |= BDRV_BLOCK_ALLOCATED;
4147e88ae226SKevin Wolf     }
4148e88ae226SKevin Wolf 
4149c3d86884SPeter Lieven     if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4150c3d86884SPeter Lieven         if (bdrv_unallocated_blocks_are_zero(bs)) {
4151415b5b01SPaolo Bonzini             ret |= BDRV_BLOCK_ZERO;
41521f9db224SPeter Lieven         } else if (bs->backing_hd) {
4153f0ad5712SPaolo Bonzini             BlockDriverState *bs2 = bs->backing_hd;
415430a7f2fcSMarkus Armbruster             int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
415530a7f2fcSMarkus Armbruster             if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
4156f0ad5712SPaolo Bonzini                 ret |= BDRV_BLOCK_ZERO;
4157f0ad5712SPaolo Bonzini             }
4158f0ad5712SPaolo Bonzini         }
4159415b5b01SPaolo Bonzini     }
41605daa74a6SPaolo Bonzini 
41615daa74a6SPaolo Bonzini     if (bs->file &&
41625daa74a6SPaolo Bonzini         (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
41635daa74a6SPaolo Bonzini         (ret & BDRV_BLOCK_OFFSET_VALID)) {
416459c9a95fSMax Reitz         int file_pnum;
416559c9a95fSMax Reitz 
41665daa74a6SPaolo Bonzini         ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
416759c9a95fSMax Reitz                                         *pnum, &file_pnum);
41685daa74a6SPaolo Bonzini         if (ret2 >= 0) {
41695daa74a6SPaolo Bonzini             /* Ignore errors.  This is just providing extra information, it
41705daa74a6SPaolo Bonzini              * is useful but not necessary.
41715daa74a6SPaolo Bonzini              */
417259c9a95fSMax Reitz             if (!file_pnum) {
417359c9a95fSMax Reitz                 /* !file_pnum indicates an offset at or beyond the EOF; it is
417459c9a95fSMax Reitz                  * perfectly valid for the format block driver to point to such
417559c9a95fSMax Reitz                  * offsets, so catch it and mark everything as zero */
417659c9a95fSMax Reitz                 ret |= BDRV_BLOCK_ZERO;
417759c9a95fSMax Reitz             } else {
417859c9a95fSMax Reitz                 /* Limit request to the range reported by the protocol driver */
417959c9a95fSMax Reitz                 *pnum = file_pnum;
41805daa74a6SPaolo Bonzini                 ret |= (ret2 & BDRV_BLOCK_ZERO);
41815daa74a6SPaolo Bonzini             }
41825daa74a6SPaolo Bonzini         }
418359c9a95fSMax Reitz     }
41845daa74a6SPaolo Bonzini 
4185415b5b01SPaolo Bonzini     return ret;
4186060f51c9SStefan Hajnoczi }
4187060f51c9SStefan Hajnoczi 
4188b6b8a333SPaolo Bonzini /* Coroutine wrapper for bdrv_get_block_status() */
4189b6b8a333SPaolo Bonzini static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
4190060f51c9SStefan Hajnoczi {
4191b6b8a333SPaolo Bonzini     BdrvCoGetBlockStatusData *data = opaque;
4192060f51c9SStefan Hajnoczi     BlockDriverState *bs = data->bs;
4193060f51c9SStefan Hajnoczi 
4194b6b8a333SPaolo Bonzini     data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4195060f51c9SStefan Hajnoczi                                          data->pnum);
4196060f51c9SStefan Hajnoczi     data->done = true;
4197060f51c9SStefan Hajnoczi }
4198060f51c9SStefan Hajnoczi 
4199060f51c9SStefan Hajnoczi /*
4200b6b8a333SPaolo Bonzini  * Synchronous wrapper around bdrv_co_get_block_status().
4201060f51c9SStefan Hajnoczi  *
4202b6b8a333SPaolo Bonzini  * See bdrv_co_get_block_status() for details.
4203060f51c9SStefan Hajnoczi  */
4204b6b8a333SPaolo Bonzini int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4205b6b8a333SPaolo Bonzini                               int nb_sectors, int *pnum)
4206060f51c9SStefan Hajnoczi {
4207376ae3f1SStefan Hajnoczi     Coroutine *co;
4208b6b8a333SPaolo Bonzini     BdrvCoGetBlockStatusData data = {
4209376ae3f1SStefan Hajnoczi         .bs = bs,
4210376ae3f1SStefan Hajnoczi         .sector_num = sector_num,
4211376ae3f1SStefan Hajnoczi         .nb_sectors = nb_sectors,
4212376ae3f1SStefan Hajnoczi         .pnum = pnum,
4213376ae3f1SStefan Hajnoczi         .done = false,
4214376ae3f1SStefan Hajnoczi     };
4215376ae3f1SStefan Hajnoczi 
4216bdad13b9SPaolo Bonzini     if (qemu_in_coroutine()) {
4217bdad13b9SPaolo Bonzini         /* Fast-path if already in coroutine context */
4218b6b8a333SPaolo Bonzini         bdrv_get_block_status_co_entry(&data);
4219bdad13b9SPaolo Bonzini     } else {
42202572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
42212572b37aSStefan Hajnoczi 
4222b6b8a333SPaolo Bonzini         co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
4223376ae3f1SStefan Hajnoczi         qemu_coroutine_enter(co, &data);
4224376ae3f1SStefan Hajnoczi         while (!data.done) {
42252572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
4226376ae3f1SStefan Hajnoczi         }
4227bdad13b9SPaolo Bonzini     }
4228376ae3f1SStefan Hajnoczi     return data.ret;
4229376ae3f1SStefan Hajnoczi }
4230f58c7b35Sths 
4231b6b8a333SPaolo Bonzini int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4232b6b8a333SPaolo Bonzini                                    int nb_sectors, int *pnum)
4233b6b8a333SPaolo Bonzini {
42344333bb71SPaolo Bonzini     int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
42354333bb71SPaolo Bonzini     if (ret < 0) {
42364333bb71SPaolo Bonzini         return ret;
42374333bb71SPaolo Bonzini     }
423801fb2705SKevin Wolf     return !!(ret & BDRV_BLOCK_ALLOCATED);
4239b6b8a333SPaolo Bonzini }
4240b6b8a333SPaolo Bonzini 
4241188a7bbfSPaolo Bonzini /*
4242188a7bbfSPaolo Bonzini  * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4243188a7bbfSPaolo Bonzini  *
4244188a7bbfSPaolo Bonzini  * Return true if the given sector is allocated in any image between
4245188a7bbfSPaolo Bonzini  * BASE and TOP (inclusive).  BASE can be NULL to check if the given
4246188a7bbfSPaolo Bonzini  * sector is allocated in any image of the chain.  Return false otherwise.
4247188a7bbfSPaolo Bonzini  *
4248188a7bbfSPaolo Bonzini  * 'pnum' is set to the number of sectors (including and immediately following
4249188a7bbfSPaolo Bonzini  *  the specified sector) that are known to be in the same
4250188a7bbfSPaolo Bonzini  *  allocated/unallocated state.
4251188a7bbfSPaolo Bonzini  *
4252188a7bbfSPaolo Bonzini  */
42534f578637SPaolo Bonzini int bdrv_is_allocated_above(BlockDriverState *top,
4254188a7bbfSPaolo Bonzini                             BlockDriverState *base,
4255188a7bbfSPaolo Bonzini                             int64_t sector_num,
4256188a7bbfSPaolo Bonzini                             int nb_sectors, int *pnum)
4257188a7bbfSPaolo Bonzini {
4258188a7bbfSPaolo Bonzini     BlockDriverState *intermediate;
4259188a7bbfSPaolo Bonzini     int ret, n = nb_sectors;
4260188a7bbfSPaolo Bonzini 
4261188a7bbfSPaolo Bonzini     intermediate = top;
4262188a7bbfSPaolo Bonzini     while (intermediate && intermediate != base) {
4263188a7bbfSPaolo Bonzini         int pnum_inter;
4264bdad13b9SPaolo Bonzini         ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4265188a7bbfSPaolo Bonzini                                 &pnum_inter);
4266188a7bbfSPaolo Bonzini         if (ret < 0) {
4267188a7bbfSPaolo Bonzini             return ret;
4268188a7bbfSPaolo Bonzini         } else if (ret) {
4269188a7bbfSPaolo Bonzini             *pnum = pnum_inter;
4270188a7bbfSPaolo Bonzini             return 1;
4271188a7bbfSPaolo Bonzini         }
4272188a7bbfSPaolo Bonzini 
4273188a7bbfSPaolo Bonzini         /*
4274188a7bbfSPaolo Bonzini          * [sector_num, nb_sectors] is unallocated on top but intermediate
4275188a7bbfSPaolo Bonzini          * might have
4276188a7bbfSPaolo Bonzini          *
4277188a7bbfSPaolo Bonzini          * [sector_num+x, nr_sectors] allocated.
4278188a7bbfSPaolo Bonzini          */
427963ba17d3SVishvananda Ishaya         if (n > pnum_inter &&
428063ba17d3SVishvananda Ishaya             (intermediate == top ||
428163ba17d3SVishvananda Ishaya              sector_num + pnum_inter < intermediate->total_sectors)) {
4282188a7bbfSPaolo Bonzini             n = pnum_inter;
4283188a7bbfSPaolo Bonzini         }
4284188a7bbfSPaolo Bonzini 
4285188a7bbfSPaolo Bonzini         intermediate = intermediate->backing_hd;
4286188a7bbfSPaolo Bonzini     }
4287188a7bbfSPaolo Bonzini 
4288188a7bbfSPaolo Bonzini     *pnum = n;
4289188a7bbfSPaolo Bonzini     return 0;
4290188a7bbfSPaolo Bonzini }
4291188a7bbfSPaolo Bonzini 
4292045df330Saliguori const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4293045df330Saliguori {
4294045df330Saliguori     if (bs->backing_hd && bs->backing_hd->encrypted)
4295045df330Saliguori         return bs->backing_file;
4296045df330Saliguori     else if (bs->encrypted)
4297045df330Saliguori         return bs->filename;
4298045df330Saliguori     else
4299045df330Saliguori         return NULL;
4300045df330Saliguori }
4301045df330Saliguori 
430283f64091Sbellard void bdrv_get_backing_filename(BlockDriverState *bs,
430383f64091Sbellard                                char *filename, int filename_size)
430483f64091Sbellard {
430583f64091Sbellard     pstrcpy(filename, filename_size, bs->backing_file);
430683f64091Sbellard }
430783f64091Sbellard 
4308faea38e7Sbellard int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
4309faea38e7Sbellard                           const uint8_t *buf, int nb_sectors)
4310faea38e7Sbellard {
4311faea38e7Sbellard     BlockDriver *drv = bs->drv;
4312b9c64947SMax Reitz     int ret;
4313b9c64947SMax Reitz 
4314b9c64947SMax Reitz     if (!drv) {
431519cb3738Sbellard         return -ENOMEDIUM;
4316b9c64947SMax Reitz     }
4317b9c64947SMax Reitz     if (!drv->bdrv_write_compressed) {
4318faea38e7Sbellard         return -ENOTSUP;
4319b9c64947SMax Reitz     }
4320b9c64947SMax Reitz     ret = bdrv_check_request(bs, sector_num, nb_sectors);
4321b9c64947SMax Reitz     if (ret < 0) {
4322b9c64947SMax Reitz         return ret;
4323b9c64947SMax Reitz     }
43247cd1e32aSlirans@il.ibm.com 
4325e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
43267cd1e32aSlirans@il.ibm.com 
4327faea38e7Sbellard     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4328faea38e7Sbellard }
4329faea38e7Sbellard 
4330faea38e7Sbellard int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4331faea38e7Sbellard {
4332faea38e7Sbellard     BlockDriver *drv = bs->drv;
4333faea38e7Sbellard     if (!drv)
433419cb3738Sbellard         return -ENOMEDIUM;
4335faea38e7Sbellard     if (!drv->bdrv_get_info)
4336faea38e7Sbellard         return -ENOTSUP;
4337faea38e7Sbellard     memset(bdi, 0, sizeof(*bdi));
4338faea38e7Sbellard     return drv->bdrv_get_info(bs, bdi);
4339faea38e7Sbellard }
4340faea38e7Sbellard 
4341eae041feSMax Reitz ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4342eae041feSMax Reitz {
4343eae041feSMax Reitz     BlockDriver *drv = bs->drv;
4344eae041feSMax Reitz     if (drv && drv->bdrv_get_specific_info) {
4345eae041feSMax Reitz         return drv->bdrv_get_specific_info(bs);
4346eae041feSMax Reitz     }
4347eae041feSMax Reitz     return NULL;
4348eae041feSMax Reitz }
4349eae041feSMax Reitz 
435045566e9cSChristoph Hellwig int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
435145566e9cSChristoph Hellwig                       int64_t pos, int size)
4352178e08a5Saliguori {
4353cf8074b3SKevin Wolf     QEMUIOVector qiov;
4354cf8074b3SKevin Wolf     struct iovec iov = {
4355cf8074b3SKevin Wolf         .iov_base   = (void *) buf,
4356cf8074b3SKevin Wolf         .iov_len    = size,
4357cf8074b3SKevin Wolf     };
4358cf8074b3SKevin Wolf 
4359cf8074b3SKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
4360cf8074b3SKevin Wolf     return bdrv_writev_vmstate(bs, &qiov, pos);
4361cf8074b3SKevin Wolf }
4362cf8074b3SKevin Wolf 
4363cf8074b3SKevin Wolf int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4364cf8074b3SKevin Wolf {
4365178e08a5Saliguori     BlockDriver *drv = bs->drv;
4366cf8074b3SKevin Wolf 
4367cf8074b3SKevin Wolf     if (!drv) {
4368178e08a5Saliguori         return -ENOMEDIUM;
4369cf8074b3SKevin Wolf     } else if (drv->bdrv_save_vmstate) {
4370cf8074b3SKevin Wolf         return drv->bdrv_save_vmstate(bs, qiov, pos);
4371cf8074b3SKevin Wolf     } else if (bs->file) {
4372cf8074b3SKevin Wolf         return bdrv_writev_vmstate(bs->file, qiov, pos);
4373cf8074b3SKevin Wolf     }
4374cf8074b3SKevin Wolf 
43757cdb1f6dSMORITA Kazutaka     return -ENOTSUP;
4376178e08a5Saliguori }
4377178e08a5Saliguori 
437845566e9cSChristoph Hellwig int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
437945566e9cSChristoph Hellwig                       int64_t pos, int size)
4380178e08a5Saliguori {
4381178e08a5Saliguori     BlockDriver *drv = bs->drv;
4382178e08a5Saliguori     if (!drv)
4383178e08a5Saliguori         return -ENOMEDIUM;
43847cdb1f6dSMORITA Kazutaka     if (drv->bdrv_load_vmstate)
438545566e9cSChristoph Hellwig         return drv->bdrv_load_vmstate(bs, buf, pos, size);
43867cdb1f6dSMORITA Kazutaka     if (bs->file)
43877cdb1f6dSMORITA Kazutaka         return bdrv_load_vmstate(bs->file, buf, pos, size);
43887cdb1f6dSMORITA Kazutaka     return -ENOTSUP;
4389178e08a5Saliguori }
4390178e08a5Saliguori 
43918b9b0cc2SKevin Wolf void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
43928b9b0cc2SKevin Wolf {
4393bf736fe3SKevin Wolf     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
43948b9b0cc2SKevin Wolf         return;
43958b9b0cc2SKevin Wolf     }
43968b9b0cc2SKevin Wolf 
4397bf736fe3SKevin Wolf     bs->drv->bdrv_debug_event(bs, event);
439841c695c7SKevin Wolf }
43998b9b0cc2SKevin Wolf 
440041c695c7SKevin Wolf int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
440141c695c7SKevin Wolf                           const char *tag)
440241c695c7SKevin Wolf {
440341c695c7SKevin Wolf     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
440441c695c7SKevin Wolf         bs = bs->file;
440541c695c7SKevin Wolf     }
440641c695c7SKevin Wolf 
440741c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
440841c695c7SKevin Wolf         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
440941c695c7SKevin Wolf     }
441041c695c7SKevin Wolf 
441141c695c7SKevin Wolf     return -ENOTSUP;
441241c695c7SKevin Wolf }
441341c695c7SKevin Wolf 
44144cc70e93SFam Zheng int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
44154cc70e93SFam Zheng {
44164cc70e93SFam Zheng     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
44174cc70e93SFam Zheng         bs = bs->file;
44184cc70e93SFam Zheng     }
44194cc70e93SFam Zheng 
44204cc70e93SFam Zheng     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
44214cc70e93SFam Zheng         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
44224cc70e93SFam Zheng     }
44234cc70e93SFam Zheng 
44244cc70e93SFam Zheng     return -ENOTSUP;
44254cc70e93SFam Zheng }
44264cc70e93SFam Zheng 
442741c695c7SKevin Wolf int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
442841c695c7SKevin Wolf {
4429938789eaSMax Reitz     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
443041c695c7SKevin Wolf         bs = bs->file;
443141c695c7SKevin Wolf     }
443241c695c7SKevin Wolf 
443341c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
443441c695c7SKevin Wolf         return bs->drv->bdrv_debug_resume(bs, tag);
443541c695c7SKevin Wolf     }
443641c695c7SKevin Wolf 
443741c695c7SKevin Wolf     return -ENOTSUP;
443841c695c7SKevin Wolf }
443941c695c7SKevin Wolf 
444041c695c7SKevin Wolf bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
444141c695c7SKevin Wolf {
444241c695c7SKevin Wolf     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
444341c695c7SKevin Wolf         bs = bs->file;
444441c695c7SKevin Wolf     }
444541c695c7SKevin Wolf 
444641c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
444741c695c7SKevin Wolf         return bs->drv->bdrv_debug_is_suspended(bs, tag);
444841c695c7SKevin Wolf     }
444941c695c7SKevin Wolf 
445041c695c7SKevin Wolf     return false;
44518b9b0cc2SKevin Wolf }
44528b9b0cc2SKevin Wolf 
4453199630b6SBlue Swirl int bdrv_is_snapshot(BlockDriverState *bs)
4454199630b6SBlue Swirl {
4455199630b6SBlue Swirl     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4456199630b6SBlue Swirl }
4457199630b6SBlue Swirl 
4458b1b1d783SJeff Cody /* backing_file can either be relative, or absolute, or a protocol.  If it is
4459b1b1d783SJeff Cody  * relative, it must be relative to the chain.  So, passing in bs->filename
4460b1b1d783SJeff Cody  * from a BDS as backing_file should not be done, as that may be relative to
4461b1b1d783SJeff Cody  * the CWD rather than the chain. */
4462e8a6bb9cSMarcelo Tosatti BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4463e8a6bb9cSMarcelo Tosatti         const char *backing_file)
4464e8a6bb9cSMarcelo Tosatti {
4465b1b1d783SJeff Cody     char *filename_full = NULL;
4466b1b1d783SJeff Cody     char *backing_file_full = NULL;
4467b1b1d783SJeff Cody     char *filename_tmp = NULL;
4468b1b1d783SJeff Cody     int is_protocol = 0;
4469b1b1d783SJeff Cody     BlockDriverState *curr_bs = NULL;
4470b1b1d783SJeff Cody     BlockDriverState *retval = NULL;
4471b1b1d783SJeff Cody 
4472b1b1d783SJeff Cody     if (!bs || !bs->drv || !backing_file) {
4473e8a6bb9cSMarcelo Tosatti         return NULL;
4474e8a6bb9cSMarcelo Tosatti     }
4475e8a6bb9cSMarcelo Tosatti 
4476b1b1d783SJeff Cody     filename_full     = g_malloc(PATH_MAX);
4477b1b1d783SJeff Cody     backing_file_full = g_malloc(PATH_MAX);
4478b1b1d783SJeff Cody     filename_tmp      = g_malloc(PATH_MAX);
4479b1b1d783SJeff Cody 
4480b1b1d783SJeff Cody     is_protocol = path_has_protocol(backing_file);
4481b1b1d783SJeff Cody 
4482b1b1d783SJeff Cody     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4483b1b1d783SJeff Cody 
4484b1b1d783SJeff Cody         /* If either of the filename paths is actually a protocol, then
4485b1b1d783SJeff Cody          * compare unmodified paths; otherwise make paths relative */
4486b1b1d783SJeff Cody         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4487b1b1d783SJeff Cody             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4488b1b1d783SJeff Cody                 retval = curr_bs->backing_hd;
4489b1b1d783SJeff Cody                 break;
4490b1b1d783SJeff Cody             }
4491e8a6bb9cSMarcelo Tosatti         } else {
4492b1b1d783SJeff Cody             /* If not an absolute filename path, make it relative to the current
4493b1b1d783SJeff Cody              * image's filename path */
4494b1b1d783SJeff Cody             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4495b1b1d783SJeff Cody                          backing_file);
4496b1b1d783SJeff Cody 
4497b1b1d783SJeff Cody             /* We are going to compare absolute pathnames */
4498b1b1d783SJeff Cody             if (!realpath(filename_tmp, filename_full)) {
4499b1b1d783SJeff Cody                 continue;
4500b1b1d783SJeff Cody             }
4501b1b1d783SJeff Cody 
4502b1b1d783SJeff Cody             /* We need to make sure the backing filename we are comparing against
4503b1b1d783SJeff Cody              * is relative to the current image filename (or absolute) */
4504b1b1d783SJeff Cody             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4505b1b1d783SJeff Cody                          curr_bs->backing_file);
4506b1b1d783SJeff Cody 
4507b1b1d783SJeff Cody             if (!realpath(filename_tmp, backing_file_full)) {
4508b1b1d783SJeff Cody                 continue;
4509b1b1d783SJeff Cody             }
4510b1b1d783SJeff Cody 
4511b1b1d783SJeff Cody             if (strcmp(backing_file_full, filename_full) == 0) {
4512b1b1d783SJeff Cody                 retval = curr_bs->backing_hd;
4513b1b1d783SJeff Cody                 break;
4514b1b1d783SJeff Cody             }
4515e8a6bb9cSMarcelo Tosatti         }
4516e8a6bb9cSMarcelo Tosatti     }
4517e8a6bb9cSMarcelo Tosatti 
4518b1b1d783SJeff Cody     g_free(filename_full);
4519b1b1d783SJeff Cody     g_free(backing_file_full);
4520b1b1d783SJeff Cody     g_free(filename_tmp);
4521b1b1d783SJeff Cody     return retval;
4522e8a6bb9cSMarcelo Tosatti }
4523e8a6bb9cSMarcelo Tosatti 
4524f198fd1cSBenoît Canet int bdrv_get_backing_file_depth(BlockDriverState *bs)
4525f198fd1cSBenoît Canet {
4526f198fd1cSBenoît Canet     if (!bs->drv) {
4527f198fd1cSBenoît Canet         return 0;
4528f198fd1cSBenoît Canet     }
4529f198fd1cSBenoît Canet 
4530f198fd1cSBenoît Canet     if (!bs->backing_hd) {
4531f198fd1cSBenoît Canet         return 0;
4532f198fd1cSBenoît Canet     }
4533f198fd1cSBenoît Canet 
4534f198fd1cSBenoît Canet     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4535f198fd1cSBenoît Canet }
4536f198fd1cSBenoît Canet 
4537ea2384d3Sbellard /**************************************************************/
453883f64091Sbellard /* async I/Os */
4539ea2384d3Sbellard 
45407c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4541f141eafeSaliguori                            QEMUIOVector *qiov, int nb_sectors,
4542097310b5SMarkus Armbruster                            BlockCompletionFunc *cb, void *opaque)
4543ea2384d3Sbellard {
4544bbf0a440SStefan Hajnoczi     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4545bbf0a440SStefan Hajnoczi 
4546d20d9b7cSPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
45478c5873d6SStefan Hajnoczi                                  cb, opaque, false);
454883f64091Sbellard }
454983f64091Sbellard 
45507c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4551f141eafeSaliguori                             QEMUIOVector *qiov, int nb_sectors,
4552097310b5SMarkus Armbruster                             BlockCompletionFunc *cb, void *opaque)
45537674e7bfSbellard {
4554bbf0a440SStefan Hajnoczi     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4555bbf0a440SStefan Hajnoczi 
4556d20d9b7cSPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
45578c5873d6SStefan Hajnoczi                                  cb, opaque, true);
455883f64091Sbellard }
455983f64091Sbellard 
45607c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4561d5ef94d4SPaolo Bonzini         int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4562097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4563d5ef94d4SPaolo Bonzini {
4564d5ef94d4SPaolo Bonzini     trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4565d5ef94d4SPaolo Bonzini 
4566d5ef94d4SPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4567d5ef94d4SPaolo Bonzini                                  BDRV_REQ_ZERO_WRITE | flags,
4568d5ef94d4SPaolo Bonzini                                  cb, opaque, true);
4569d5ef94d4SPaolo Bonzini }
4570d5ef94d4SPaolo Bonzini 
457140b4f539SKevin Wolf 
457240b4f539SKevin Wolf typedef struct MultiwriteCB {
457340b4f539SKevin Wolf     int error;
457440b4f539SKevin Wolf     int num_requests;
457540b4f539SKevin Wolf     int num_callbacks;
457640b4f539SKevin Wolf     struct {
4577097310b5SMarkus Armbruster         BlockCompletionFunc *cb;
457840b4f539SKevin Wolf         void *opaque;
457940b4f539SKevin Wolf         QEMUIOVector *free_qiov;
458040b4f539SKevin Wolf     } callbacks[];
458140b4f539SKevin Wolf } MultiwriteCB;
458240b4f539SKevin Wolf 
458340b4f539SKevin Wolf static void multiwrite_user_cb(MultiwriteCB *mcb)
458440b4f539SKevin Wolf {
458540b4f539SKevin Wolf     int i;
458640b4f539SKevin Wolf 
458740b4f539SKevin Wolf     for (i = 0; i < mcb->num_callbacks; i++) {
458840b4f539SKevin Wolf         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
45891e1ea48dSStefan Hajnoczi         if (mcb->callbacks[i].free_qiov) {
45901e1ea48dSStefan Hajnoczi             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
45911e1ea48dSStefan Hajnoczi         }
45927267c094SAnthony Liguori         g_free(mcb->callbacks[i].free_qiov);
459340b4f539SKevin Wolf     }
459440b4f539SKevin Wolf }
459540b4f539SKevin Wolf 
459640b4f539SKevin Wolf static void multiwrite_cb(void *opaque, int ret)
459740b4f539SKevin Wolf {
459840b4f539SKevin Wolf     MultiwriteCB *mcb = opaque;
459940b4f539SKevin Wolf 
46006d519a5fSStefan Hajnoczi     trace_multiwrite_cb(mcb, ret);
46016d519a5fSStefan Hajnoczi 
4602cb6d3ca0SKevin Wolf     if (ret < 0 && !mcb->error) {
460340b4f539SKevin Wolf         mcb->error = ret;
460440b4f539SKevin Wolf     }
460540b4f539SKevin Wolf 
460640b4f539SKevin Wolf     mcb->num_requests--;
460740b4f539SKevin Wolf     if (mcb->num_requests == 0) {
460840b4f539SKevin Wolf         multiwrite_user_cb(mcb);
46097267c094SAnthony Liguori         g_free(mcb);
461040b4f539SKevin Wolf     }
461140b4f539SKevin Wolf }
461240b4f539SKevin Wolf 
461340b4f539SKevin Wolf static int multiwrite_req_compare(const void *a, const void *b)
461440b4f539SKevin Wolf {
461577be4366SChristoph Hellwig     const BlockRequest *req1 = a, *req2 = b;
461677be4366SChristoph Hellwig 
461777be4366SChristoph Hellwig     /*
461877be4366SChristoph Hellwig      * Note that we can't simply subtract req2->sector from req1->sector
461977be4366SChristoph Hellwig      * here as that could overflow the return value.
462077be4366SChristoph Hellwig      */
462177be4366SChristoph Hellwig     if (req1->sector > req2->sector) {
462277be4366SChristoph Hellwig         return 1;
462377be4366SChristoph Hellwig     } else if (req1->sector < req2->sector) {
462477be4366SChristoph Hellwig         return -1;
462577be4366SChristoph Hellwig     } else {
462677be4366SChristoph Hellwig         return 0;
462777be4366SChristoph Hellwig     }
462840b4f539SKevin Wolf }
462940b4f539SKevin Wolf 
463040b4f539SKevin Wolf /*
463140b4f539SKevin Wolf  * Takes a bunch of requests and tries to merge them. Returns the number of
463240b4f539SKevin Wolf  * requests that remain after merging.
463340b4f539SKevin Wolf  */
463440b4f539SKevin Wolf static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
463540b4f539SKevin Wolf     int num_reqs, MultiwriteCB *mcb)
463640b4f539SKevin Wolf {
463740b4f539SKevin Wolf     int i, outidx;
463840b4f539SKevin Wolf 
463940b4f539SKevin Wolf     // Sort requests by start sector
464040b4f539SKevin Wolf     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
464140b4f539SKevin Wolf 
464240b4f539SKevin Wolf     // Check if adjacent requests touch the same clusters. If so, combine them,
464340b4f539SKevin Wolf     // filling up gaps with zero sectors.
464440b4f539SKevin Wolf     outidx = 0;
464540b4f539SKevin Wolf     for (i = 1; i < num_reqs; i++) {
464640b4f539SKevin Wolf         int merge = 0;
464740b4f539SKevin Wolf         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
464840b4f539SKevin Wolf 
4649b6a127a1SPaolo Bonzini         // Handle exactly sequential writes and overlapping writes.
465040b4f539SKevin Wolf         if (reqs[i].sector <= oldreq_last) {
465140b4f539SKevin Wolf             merge = 1;
465240b4f539SKevin Wolf         }
465340b4f539SKevin Wolf 
4654e2a305fbSChristoph Hellwig         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4655e2a305fbSChristoph Hellwig             merge = 0;
4656e2a305fbSChristoph Hellwig         }
4657e2a305fbSChristoph Hellwig 
46586c5a42acSPeter Lieven         if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
46596c5a42acSPeter Lieven             reqs[i].nb_sectors > bs->bl.max_transfer_length) {
46606c5a42acSPeter Lieven             merge = 0;
46616c5a42acSPeter Lieven         }
46626c5a42acSPeter Lieven 
466340b4f539SKevin Wolf         if (merge) {
466440b4f539SKevin Wolf             size_t size;
46657267c094SAnthony Liguori             QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
466640b4f539SKevin Wolf             qemu_iovec_init(qiov,
466740b4f539SKevin Wolf                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
466840b4f539SKevin Wolf 
466940b4f539SKevin Wolf             // Add the first request to the merged one. If the requests are
467040b4f539SKevin Wolf             // overlapping, drop the last sectors of the first request.
467140b4f539SKevin Wolf             size = (reqs[i].sector - reqs[outidx].sector) << 9;
46721b093c48SMichael Tokarev             qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
467340b4f539SKevin Wolf 
4674b6a127a1SPaolo Bonzini             // We should need to add any zeros between the two requests
4675b6a127a1SPaolo Bonzini             assert (reqs[i].sector <= oldreq_last);
467640b4f539SKevin Wolf 
467740b4f539SKevin Wolf             // Add the second request
46781b093c48SMichael Tokarev             qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
467940b4f539SKevin Wolf 
4680391827ebSStefan Hajnoczi             // Add tail of first request, if necessary
4681391827ebSStefan Hajnoczi             if (qiov->size < reqs[outidx].qiov->size) {
4682391827ebSStefan Hajnoczi                 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4683391827ebSStefan Hajnoczi                                   reqs[outidx].qiov->size - qiov->size);
4684391827ebSStefan Hajnoczi             }
4685391827ebSStefan Hajnoczi 
4686cbf1dff2SKevin Wolf             reqs[outidx].nb_sectors = qiov->size >> 9;
468740b4f539SKevin Wolf             reqs[outidx].qiov = qiov;
468840b4f539SKevin Wolf 
468940b4f539SKevin Wolf             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
469040b4f539SKevin Wolf         } else {
469140b4f539SKevin Wolf             outidx++;
469240b4f539SKevin Wolf             reqs[outidx].sector     = reqs[i].sector;
469340b4f539SKevin Wolf             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
469440b4f539SKevin Wolf             reqs[outidx].qiov       = reqs[i].qiov;
469540b4f539SKevin Wolf         }
469640b4f539SKevin Wolf     }
469740b4f539SKevin Wolf 
4698f4564d53SPeter Lieven     block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
4699f4564d53SPeter Lieven 
470040b4f539SKevin Wolf     return outidx + 1;
470140b4f539SKevin Wolf }
470240b4f539SKevin Wolf 
470340b4f539SKevin Wolf /*
470440b4f539SKevin Wolf  * Submit multiple AIO write requests at once.
470540b4f539SKevin Wolf  *
470640b4f539SKevin Wolf  * On success, the function returns 0 and all requests in the reqs array have
470740b4f539SKevin Wolf  * been submitted. In error case this function returns -1, and any of the
470840b4f539SKevin Wolf  * requests may or may not be submitted yet. In particular, this means that the
470940b4f539SKevin Wolf  * callback will be called for some of the requests, for others it won't. The
471040b4f539SKevin Wolf  * caller must check the error field of the BlockRequest to wait for the right
471140b4f539SKevin Wolf  * callbacks (if error != 0, no callback will be called).
471240b4f539SKevin Wolf  *
471340b4f539SKevin Wolf  * The implementation may modify the contents of the reqs array, e.g. to merge
471440b4f539SKevin Wolf  * requests. However, the fields opaque and error are left unmodified as they
471540b4f539SKevin Wolf  * are used to signal failure for a single request to the caller.
471640b4f539SKevin Wolf  */
471740b4f539SKevin Wolf int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
471840b4f539SKevin Wolf {
471940b4f539SKevin Wolf     MultiwriteCB *mcb;
472040b4f539SKevin Wolf     int i;
472140b4f539SKevin Wolf 
4722301db7c2SRyan Harper     /* don't submit writes if we don't have a medium */
4723301db7c2SRyan Harper     if (bs->drv == NULL) {
4724301db7c2SRyan Harper         for (i = 0; i < num_reqs; i++) {
4725301db7c2SRyan Harper             reqs[i].error = -ENOMEDIUM;
4726301db7c2SRyan Harper         }
4727301db7c2SRyan Harper         return -1;
4728301db7c2SRyan Harper     }
4729301db7c2SRyan Harper 
473040b4f539SKevin Wolf     if (num_reqs == 0) {
473140b4f539SKevin Wolf         return 0;
473240b4f539SKevin Wolf     }
473340b4f539SKevin Wolf 
473440b4f539SKevin Wolf     // Create MultiwriteCB structure
47357267c094SAnthony Liguori     mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
473640b4f539SKevin Wolf     mcb->num_requests = 0;
473740b4f539SKevin Wolf     mcb->num_callbacks = num_reqs;
473840b4f539SKevin Wolf 
473940b4f539SKevin Wolf     for (i = 0; i < num_reqs; i++) {
474040b4f539SKevin Wolf         mcb->callbacks[i].cb = reqs[i].cb;
474140b4f539SKevin Wolf         mcb->callbacks[i].opaque = reqs[i].opaque;
474240b4f539SKevin Wolf     }
474340b4f539SKevin Wolf 
474440b4f539SKevin Wolf     // Check for mergable requests
474540b4f539SKevin Wolf     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
474640b4f539SKevin Wolf 
47476d519a5fSStefan Hajnoczi     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
47486d519a5fSStefan Hajnoczi 
4749df9309fbSPaolo Bonzini     /* Run the aio requests. */
4750df9309fbSPaolo Bonzini     mcb->num_requests = num_reqs;
475140b4f539SKevin Wolf     for (i = 0; i < num_reqs; i++) {
4752d20d9b7cSPaolo Bonzini         bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4753d20d9b7cSPaolo Bonzini                               reqs[i].nb_sectors, reqs[i].flags,
4754d20d9b7cSPaolo Bonzini                               multiwrite_cb, mcb,
4755d20d9b7cSPaolo Bonzini                               true);
475640b4f539SKevin Wolf     }
475740b4f539SKevin Wolf 
475840b4f539SKevin Wolf     return 0;
475940b4f539SKevin Wolf }
476040b4f539SKevin Wolf 
47617c84b1b8SMarkus Armbruster void bdrv_aio_cancel(BlockAIOCB *acb)
476283f64091Sbellard {
476302c50efeSFam Zheng     qemu_aio_ref(acb);
476402c50efeSFam Zheng     bdrv_aio_cancel_async(acb);
476502c50efeSFam Zheng     while (acb->refcnt > 1) {
476602c50efeSFam Zheng         if (acb->aiocb_info->get_aio_context) {
476702c50efeSFam Zheng             aio_poll(acb->aiocb_info->get_aio_context(acb), true);
476802c50efeSFam Zheng         } else if (acb->bs) {
476902c50efeSFam Zheng             aio_poll(bdrv_get_aio_context(acb->bs), true);
477002c50efeSFam Zheng         } else {
477102c50efeSFam Zheng             abort();
477202c50efeSFam Zheng         }
477302c50efeSFam Zheng     }
47748007429aSFam Zheng     qemu_aio_unref(acb);
477502c50efeSFam Zheng }
477602c50efeSFam Zheng 
477702c50efeSFam Zheng /* Async version of aio cancel. The caller is not blocked if the acb implements
477802c50efeSFam Zheng  * cancel_async, otherwise we do nothing and let the request normally complete.
477902c50efeSFam Zheng  * In either case the completion callback must be called. */
47807c84b1b8SMarkus Armbruster void bdrv_aio_cancel_async(BlockAIOCB *acb)
478102c50efeSFam Zheng {
478202c50efeSFam Zheng     if (acb->aiocb_info->cancel_async) {
478302c50efeSFam Zheng         acb->aiocb_info->cancel_async(acb);
478402c50efeSFam Zheng     }
478583f64091Sbellard }
478683f64091Sbellard 
478783f64091Sbellard /**************************************************************/
478883f64091Sbellard /* async block device emulation */
478983f64091Sbellard 
47907c84b1b8SMarkus Armbruster typedef struct BlockAIOCBSync {
47917c84b1b8SMarkus Armbruster     BlockAIOCB common;
4792c16b5a2cSChristoph Hellwig     QEMUBH *bh;
4793c16b5a2cSChristoph Hellwig     int ret;
4794c16b5a2cSChristoph Hellwig     /* vector translation state */
4795c16b5a2cSChristoph Hellwig     QEMUIOVector *qiov;
4796c16b5a2cSChristoph Hellwig     uint8_t *bounce;
4797c16b5a2cSChristoph Hellwig     int is_write;
47987c84b1b8SMarkus Armbruster } BlockAIOCBSync;
4799c16b5a2cSChristoph Hellwig 
4800d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_aiocb_info = {
48017c84b1b8SMarkus Armbruster     .aiocb_size         = sizeof(BlockAIOCBSync),
4802c16b5a2cSChristoph Hellwig };
4803c16b5a2cSChristoph Hellwig 
480483f64091Sbellard static void bdrv_aio_bh_cb(void *opaque)
4805beac80cdSbellard {
48067c84b1b8SMarkus Armbruster     BlockAIOCBSync *acb = opaque;
4807f141eafeSaliguori 
4808857d4f46SKevin Wolf     if (!acb->is_write && acb->ret >= 0) {
480903396148SMichael Tokarev         qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
4810857d4f46SKevin Wolf     }
4811ceb42de8Saliguori     qemu_vfree(acb->bounce);
4812ce1a14dcSpbrook     acb->common.cb(acb->common.opaque, acb->ret);
48136a7ad299SDor Laor     qemu_bh_delete(acb->bh);
481436afc451SAvi Kivity     acb->bh = NULL;
48158007429aSFam Zheng     qemu_aio_unref(acb);
4816beac80cdSbellard }
4817beac80cdSbellard 
48187c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4819f141eafeSaliguori                                       int64_t sector_num,
4820f141eafeSaliguori                                       QEMUIOVector *qiov,
4821f141eafeSaliguori                                       int nb_sectors,
4822097310b5SMarkus Armbruster                                       BlockCompletionFunc *cb,
4823f141eafeSaliguori                                       void *opaque,
4824f141eafeSaliguori                                       int is_write)
4825f141eafeSaliguori 
4826ea2384d3Sbellard {
48277c84b1b8SMarkus Armbruster     BlockAIOCBSync *acb;
482883f64091Sbellard 
4829d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
4830f141eafeSaliguori     acb->is_write = is_write;
4831f141eafeSaliguori     acb->qiov = qiov;
4832857d4f46SKevin Wolf     acb->bounce = qemu_try_blockalign(bs, qiov->size);
48332572b37aSStefan Hajnoczi     acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
4834f141eafeSaliguori 
4835857d4f46SKevin Wolf     if (acb->bounce == NULL) {
4836857d4f46SKevin Wolf         acb->ret = -ENOMEM;
4837857d4f46SKevin Wolf     } else if (is_write) {
4838d5e6b161SMichael Tokarev         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
48391ed20acfSStefan Hajnoczi         acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
4840f141eafeSaliguori     } else {
48411ed20acfSStefan Hajnoczi         acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
4842f141eafeSaliguori     }
4843f141eafeSaliguori 
4844ce1a14dcSpbrook     qemu_bh_schedule(acb->bh);
4845f141eafeSaliguori 
4846ce1a14dcSpbrook     return &acb->common;
48477a6cba61Spbrook }
48487a6cba61Spbrook 
48497c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4850f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4851097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
485283f64091Sbellard {
4853f141eafeSaliguori     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
485483f64091Sbellard }
485583f64091Sbellard 
48567c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4857f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4858097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4859f141eafeSaliguori {
4860f141eafeSaliguori     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4861f141eafeSaliguori }
4862f141eafeSaliguori 
486368485420SKevin Wolf 
48647c84b1b8SMarkus Armbruster typedef struct BlockAIOCBCoroutine {
48657c84b1b8SMarkus Armbruster     BlockAIOCB common;
486668485420SKevin Wolf     BlockRequest req;
486768485420SKevin Wolf     bool is_write;
48680b5a2445SPaolo Bonzini     bool need_bh;
4869d318aea9SKevin Wolf     bool *done;
487068485420SKevin Wolf     QEMUBH* bh;
48717c84b1b8SMarkus Armbruster } BlockAIOCBCoroutine;
487268485420SKevin Wolf 
4873d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_co_aiocb_info = {
48747c84b1b8SMarkus Armbruster     .aiocb_size         = sizeof(BlockAIOCBCoroutine),
487568485420SKevin Wolf };
487668485420SKevin Wolf 
48770b5a2445SPaolo Bonzini static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
48780b5a2445SPaolo Bonzini {
48790b5a2445SPaolo Bonzini     if (!acb->need_bh) {
48800b5a2445SPaolo Bonzini         acb->common.cb(acb->common.opaque, acb->req.error);
48810b5a2445SPaolo Bonzini         qemu_aio_unref(acb);
48820b5a2445SPaolo Bonzini     }
48830b5a2445SPaolo Bonzini }
48840b5a2445SPaolo Bonzini 
488535246a68SPaolo Bonzini static void bdrv_co_em_bh(void *opaque)
488668485420SKevin Wolf {
48877c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
488868485420SKevin Wolf 
48890b5a2445SPaolo Bonzini     assert(!acb->need_bh);
489068485420SKevin Wolf     qemu_bh_delete(acb->bh);
48910b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
48920b5a2445SPaolo Bonzini }
48930b5a2445SPaolo Bonzini 
48940b5a2445SPaolo Bonzini static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
48950b5a2445SPaolo Bonzini {
48960b5a2445SPaolo Bonzini     acb->need_bh = false;
48970b5a2445SPaolo Bonzini     if (acb->req.error != -EINPROGRESS) {
48980b5a2445SPaolo Bonzini         BlockDriverState *bs = acb->common.bs;
48990b5a2445SPaolo Bonzini 
49000b5a2445SPaolo Bonzini         acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
49010b5a2445SPaolo Bonzini         qemu_bh_schedule(acb->bh);
49020b5a2445SPaolo Bonzini     }
490368485420SKevin Wolf }
490468485420SKevin Wolf 
4905b2a61371SStefan Hajnoczi /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4906b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque)
4907b2a61371SStefan Hajnoczi {
49087c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
4909b2a61371SStefan Hajnoczi     BlockDriverState *bs = acb->common.bs;
4910b2a61371SStefan Hajnoczi 
4911b2a61371SStefan Hajnoczi     if (!acb->is_write) {
4912b2a61371SStefan Hajnoczi         acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
4913d20d9b7cSPaolo Bonzini             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4914b2a61371SStefan Hajnoczi     } else {
4915b2a61371SStefan Hajnoczi         acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
4916d20d9b7cSPaolo Bonzini             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4917b2a61371SStefan Hajnoczi     }
4918b2a61371SStefan Hajnoczi 
49190b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
4920b2a61371SStefan Hajnoczi }
4921b2a61371SStefan Hajnoczi 
49227c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
492368485420SKevin Wolf                                          int64_t sector_num,
492468485420SKevin Wolf                                          QEMUIOVector *qiov,
492568485420SKevin Wolf                                          int nb_sectors,
4926d20d9b7cSPaolo Bonzini                                          BdrvRequestFlags flags,
4927097310b5SMarkus Armbruster                                          BlockCompletionFunc *cb,
492868485420SKevin Wolf                                          void *opaque,
49298c5873d6SStefan Hajnoczi                                          bool is_write)
493068485420SKevin Wolf {
493168485420SKevin Wolf     Coroutine *co;
49327c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
493368485420SKevin Wolf 
4934d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
49350b5a2445SPaolo Bonzini     acb->need_bh = true;
49360b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
493768485420SKevin Wolf     acb->req.sector = sector_num;
493868485420SKevin Wolf     acb->req.nb_sectors = nb_sectors;
493968485420SKevin Wolf     acb->req.qiov = qiov;
4940d20d9b7cSPaolo Bonzini     acb->req.flags = flags;
494168485420SKevin Wolf     acb->is_write = is_write;
494268485420SKevin Wolf 
49438c5873d6SStefan Hajnoczi     co = qemu_coroutine_create(bdrv_co_do_rw);
494468485420SKevin Wolf     qemu_coroutine_enter(co, acb);
494568485420SKevin Wolf 
49460b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
494768485420SKevin Wolf     return &acb->common;
494868485420SKevin Wolf }
494968485420SKevin Wolf 
495007f07615SPaolo Bonzini static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
4951b2e12bc6SChristoph Hellwig {
49527c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
495307f07615SPaolo Bonzini     BlockDriverState *bs = acb->common.bs;
4954b2e12bc6SChristoph Hellwig 
495507f07615SPaolo Bonzini     acb->req.error = bdrv_co_flush(bs);
49560b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
4957b2e12bc6SChristoph Hellwig }
4958b2e12bc6SChristoph Hellwig 
49597c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
4960097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4961016f5cf6SAlexander Graf {
496207f07615SPaolo Bonzini     trace_bdrv_aio_flush(bs, opaque);
4963016f5cf6SAlexander Graf 
496407f07615SPaolo Bonzini     Coroutine *co;
49657c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
4966016f5cf6SAlexander Graf 
4967d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
49680b5a2445SPaolo Bonzini     acb->need_bh = true;
49690b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
4970d318aea9SKevin Wolf 
497107f07615SPaolo Bonzini     co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
497207f07615SPaolo Bonzini     qemu_coroutine_enter(co, acb);
4973016f5cf6SAlexander Graf 
49740b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
4975016f5cf6SAlexander Graf     return &acb->common;
4976016f5cf6SAlexander Graf }
4977016f5cf6SAlexander Graf 
49784265d620SPaolo Bonzini static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
49794265d620SPaolo Bonzini {
49807c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
49814265d620SPaolo Bonzini     BlockDriverState *bs = acb->common.bs;
49824265d620SPaolo Bonzini 
49834265d620SPaolo Bonzini     acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
49840b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
49854265d620SPaolo Bonzini }
49864265d620SPaolo Bonzini 
49877c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
49884265d620SPaolo Bonzini         int64_t sector_num, int nb_sectors,
4989097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
49904265d620SPaolo Bonzini {
49914265d620SPaolo Bonzini     Coroutine *co;
49927c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
49934265d620SPaolo Bonzini 
49944265d620SPaolo Bonzini     trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
49954265d620SPaolo Bonzini 
4996d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
49970b5a2445SPaolo Bonzini     acb->need_bh = true;
49980b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
49994265d620SPaolo Bonzini     acb->req.sector = sector_num;
50004265d620SPaolo Bonzini     acb->req.nb_sectors = nb_sectors;
50014265d620SPaolo Bonzini     co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
50024265d620SPaolo Bonzini     qemu_coroutine_enter(co, acb);
50034265d620SPaolo Bonzini 
50040b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
50054265d620SPaolo Bonzini     return &acb->common;
50064265d620SPaolo Bonzini }
50074265d620SPaolo Bonzini 
5008ea2384d3Sbellard void bdrv_init(void)
5009ea2384d3Sbellard {
50105efa9d5aSAnthony Liguori     module_call_init(MODULE_INIT_BLOCK);
5011ea2384d3Sbellard }
5012ce1a14dcSpbrook 
5013eb852011SMarkus Armbruster void bdrv_init_with_whitelist(void)
5014eb852011SMarkus Armbruster {
5015eb852011SMarkus Armbruster     use_bdrv_whitelist = 1;
5016eb852011SMarkus Armbruster     bdrv_init();
5017eb852011SMarkus Armbruster }
5018eb852011SMarkus Armbruster 
5019d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
5020097310b5SMarkus Armbruster                    BlockCompletionFunc *cb, void *opaque)
50216bbff9a0Saliguori {
50227c84b1b8SMarkus Armbruster     BlockAIOCB *acb;
5023ce1a14dcSpbrook 
5024d7331bedSStefan Hajnoczi     acb = g_slice_alloc(aiocb_info->aiocb_size);
5025d7331bedSStefan Hajnoczi     acb->aiocb_info = aiocb_info;
5026ce1a14dcSpbrook     acb->bs = bs;
5027ce1a14dcSpbrook     acb->cb = cb;
5028ce1a14dcSpbrook     acb->opaque = opaque;
5029f197fe2bSFam Zheng     acb->refcnt = 1;
5030ce1a14dcSpbrook     return acb;
5031ce1a14dcSpbrook }
5032ce1a14dcSpbrook 
5033f197fe2bSFam Zheng void qemu_aio_ref(void *p)
5034f197fe2bSFam Zheng {
50357c84b1b8SMarkus Armbruster     BlockAIOCB *acb = p;
5036f197fe2bSFam Zheng     acb->refcnt++;
5037f197fe2bSFam Zheng }
5038f197fe2bSFam Zheng 
50398007429aSFam Zheng void qemu_aio_unref(void *p)
5040ce1a14dcSpbrook {
50417c84b1b8SMarkus Armbruster     BlockAIOCB *acb = p;
5042f197fe2bSFam Zheng     assert(acb->refcnt > 0);
5043f197fe2bSFam Zheng     if (--acb->refcnt == 0) {
5044d7331bedSStefan Hajnoczi         g_slice_free1(acb->aiocb_info->aiocb_size, acb);
5045ce1a14dcSpbrook     }
5046f197fe2bSFam Zheng }
504719cb3738Sbellard 
504819cb3738Sbellard /**************************************************************/
5049f9f05dc5SKevin Wolf /* Coroutine block device emulation */
5050f9f05dc5SKevin Wolf 
5051f9f05dc5SKevin Wolf typedef struct CoroutineIOCompletion {
5052f9f05dc5SKevin Wolf     Coroutine *coroutine;
5053f9f05dc5SKevin Wolf     int ret;
5054f9f05dc5SKevin Wolf } CoroutineIOCompletion;
5055f9f05dc5SKevin Wolf 
5056f9f05dc5SKevin Wolf static void bdrv_co_io_em_complete(void *opaque, int ret)
5057f9f05dc5SKevin Wolf {
5058f9f05dc5SKevin Wolf     CoroutineIOCompletion *co = opaque;
5059f9f05dc5SKevin Wolf 
5060f9f05dc5SKevin Wolf     co->ret = ret;
5061f9f05dc5SKevin Wolf     qemu_coroutine_enter(co->coroutine, NULL);
5062f9f05dc5SKevin Wolf }
5063f9f05dc5SKevin Wolf 
5064f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
5065f9f05dc5SKevin Wolf                                       int nb_sectors, QEMUIOVector *iov,
5066f9f05dc5SKevin Wolf                                       bool is_write)
5067f9f05dc5SKevin Wolf {
5068f9f05dc5SKevin Wolf     CoroutineIOCompletion co = {
5069f9f05dc5SKevin Wolf         .coroutine = qemu_coroutine_self(),
5070f9f05dc5SKevin Wolf     };
50717c84b1b8SMarkus Armbruster     BlockAIOCB *acb;
5072f9f05dc5SKevin Wolf 
5073f9f05dc5SKevin Wolf     if (is_write) {
5074a652d160SStefan Hajnoczi         acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
5075f9f05dc5SKevin Wolf                                        bdrv_co_io_em_complete, &co);
5076f9f05dc5SKevin Wolf     } else {
5077a652d160SStefan Hajnoczi         acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
5078f9f05dc5SKevin Wolf                                       bdrv_co_io_em_complete, &co);
5079f9f05dc5SKevin Wolf     }
5080f9f05dc5SKevin Wolf 
508159370aaaSStefan Hajnoczi     trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
5082f9f05dc5SKevin Wolf     if (!acb) {
5083f9f05dc5SKevin Wolf         return -EIO;
5084f9f05dc5SKevin Wolf     }
5085f9f05dc5SKevin Wolf     qemu_coroutine_yield();
5086f9f05dc5SKevin Wolf 
5087f9f05dc5SKevin Wolf     return co.ret;
5088f9f05dc5SKevin Wolf }
5089f9f05dc5SKevin Wolf 
5090f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
5091f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
5092f9f05dc5SKevin Wolf                                          QEMUIOVector *iov)
5093f9f05dc5SKevin Wolf {
5094f9f05dc5SKevin Wolf     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
5095f9f05dc5SKevin Wolf }
5096f9f05dc5SKevin Wolf 
5097f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
5098f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
5099f9f05dc5SKevin Wolf                                          QEMUIOVector *iov)
5100f9f05dc5SKevin Wolf {
5101f9f05dc5SKevin Wolf     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
5102f9f05dc5SKevin Wolf }
5103f9f05dc5SKevin Wolf 
510407f07615SPaolo Bonzini static void coroutine_fn bdrv_flush_co_entry(void *opaque)
5105e7a8a783SKevin Wolf {
510607f07615SPaolo Bonzini     RwCo *rwco = opaque;
510707f07615SPaolo Bonzini 
510807f07615SPaolo Bonzini     rwco->ret = bdrv_co_flush(rwco->bs);
510907f07615SPaolo Bonzini }
511007f07615SPaolo Bonzini 
511107f07615SPaolo Bonzini int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
511207f07615SPaolo Bonzini {
5113eb489bb1SKevin Wolf     int ret;
5114eb489bb1SKevin Wolf 
511529cdb251SPaolo Bonzini     if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
511607f07615SPaolo Bonzini         return 0;
5117eb489bb1SKevin Wolf     }
5118eb489bb1SKevin Wolf 
5119ca716364SKevin Wolf     /* Write back cached data to the OS even with cache=unsafe */
5120bf736fe3SKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
5121eb489bb1SKevin Wolf     if (bs->drv->bdrv_co_flush_to_os) {
5122eb489bb1SKevin Wolf         ret = bs->drv->bdrv_co_flush_to_os(bs);
5123eb489bb1SKevin Wolf         if (ret < 0) {
5124eb489bb1SKevin Wolf             return ret;
5125eb489bb1SKevin Wolf         }
5126eb489bb1SKevin Wolf     }
5127eb489bb1SKevin Wolf 
5128ca716364SKevin Wolf     /* But don't actually force it to the disk with cache=unsafe */
5129ca716364SKevin Wolf     if (bs->open_flags & BDRV_O_NO_FLUSH) {
5130d4c82329SKevin Wolf         goto flush_parent;
5131ca716364SKevin Wolf     }
5132ca716364SKevin Wolf 
5133bf736fe3SKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
5134eb489bb1SKevin Wolf     if (bs->drv->bdrv_co_flush_to_disk) {
513529cdb251SPaolo Bonzini         ret = bs->drv->bdrv_co_flush_to_disk(bs);
513607f07615SPaolo Bonzini     } else if (bs->drv->bdrv_aio_flush) {
51377c84b1b8SMarkus Armbruster         BlockAIOCB *acb;
5138e7a8a783SKevin Wolf         CoroutineIOCompletion co = {
5139e7a8a783SKevin Wolf             .coroutine = qemu_coroutine_self(),
5140e7a8a783SKevin Wolf         };
5141e7a8a783SKevin Wolf 
514207f07615SPaolo Bonzini         acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
514307f07615SPaolo Bonzini         if (acb == NULL) {
514429cdb251SPaolo Bonzini             ret = -EIO;
514507f07615SPaolo Bonzini         } else {
5146e7a8a783SKevin Wolf             qemu_coroutine_yield();
514729cdb251SPaolo Bonzini             ret = co.ret;
5148e7a8a783SKevin Wolf         }
514907f07615SPaolo Bonzini     } else {
515007f07615SPaolo Bonzini         /*
515107f07615SPaolo Bonzini          * Some block drivers always operate in either writethrough or unsafe
515207f07615SPaolo Bonzini          * mode and don't support bdrv_flush therefore. Usually qemu doesn't
515307f07615SPaolo Bonzini          * know how the server works (because the behaviour is hardcoded or
515407f07615SPaolo Bonzini          * depends on server-side configuration), so we can't ensure that
515507f07615SPaolo Bonzini          * everything is safe on disk. Returning an error doesn't work because
515607f07615SPaolo Bonzini          * that would break guests even if the server operates in writethrough
515707f07615SPaolo Bonzini          * mode.
515807f07615SPaolo Bonzini          *
515907f07615SPaolo Bonzini          * Let's hope the user knows what he's doing.
516007f07615SPaolo Bonzini          */
516129cdb251SPaolo Bonzini         ret = 0;
516207f07615SPaolo Bonzini     }
516329cdb251SPaolo Bonzini     if (ret < 0) {
516429cdb251SPaolo Bonzini         return ret;
516529cdb251SPaolo Bonzini     }
516629cdb251SPaolo Bonzini 
516729cdb251SPaolo Bonzini     /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
516829cdb251SPaolo Bonzini      * in the case of cache=unsafe, so there are no useless flushes.
516929cdb251SPaolo Bonzini      */
5170d4c82329SKevin Wolf flush_parent:
517129cdb251SPaolo Bonzini     return bdrv_co_flush(bs->file);
517207f07615SPaolo Bonzini }
517307f07615SPaolo Bonzini 
51745a8a30dbSKevin Wolf void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
51750f15423cSAnthony Liguori {
51765a8a30dbSKevin Wolf     Error *local_err = NULL;
51775a8a30dbSKevin Wolf     int ret;
51785a8a30dbSKevin Wolf 
51793456a8d1SKevin Wolf     if (!bs->drv)  {
51803456a8d1SKevin Wolf         return;
51810f15423cSAnthony Liguori     }
51823456a8d1SKevin Wolf 
51837ea2d269SAlexey Kardashevskiy     if (!(bs->open_flags & BDRV_O_INCOMING)) {
51847ea2d269SAlexey Kardashevskiy         return;
51857ea2d269SAlexey Kardashevskiy     }
51867ea2d269SAlexey Kardashevskiy     bs->open_flags &= ~BDRV_O_INCOMING;
51877ea2d269SAlexey Kardashevskiy 
51883456a8d1SKevin Wolf     if (bs->drv->bdrv_invalidate_cache) {
51895a8a30dbSKevin Wolf         bs->drv->bdrv_invalidate_cache(bs, &local_err);
51903456a8d1SKevin Wolf     } else if (bs->file) {
51915a8a30dbSKevin Wolf         bdrv_invalidate_cache(bs->file, &local_err);
51925a8a30dbSKevin Wolf     }
51935a8a30dbSKevin Wolf     if (local_err) {
51945a8a30dbSKevin Wolf         error_propagate(errp, local_err);
51955a8a30dbSKevin Wolf         return;
51963456a8d1SKevin Wolf     }
51973456a8d1SKevin Wolf 
51985a8a30dbSKevin Wolf     ret = refresh_total_sectors(bs, bs->total_sectors);
51995a8a30dbSKevin Wolf     if (ret < 0) {
52005a8a30dbSKevin Wolf         error_setg_errno(errp, -ret, "Could not refresh total sector count");
52015a8a30dbSKevin Wolf         return;
52025a8a30dbSKevin Wolf     }
52030f15423cSAnthony Liguori }
52040f15423cSAnthony Liguori 
52055a8a30dbSKevin Wolf void bdrv_invalidate_cache_all(Error **errp)
52060f15423cSAnthony Liguori {
52070f15423cSAnthony Liguori     BlockDriverState *bs;
52085a8a30dbSKevin Wolf     Error *local_err = NULL;
52090f15423cSAnthony Liguori 
5210dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5211ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
5212ed78cda3SStefan Hajnoczi 
5213ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
52145a8a30dbSKevin Wolf         bdrv_invalidate_cache(bs, &local_err);
5215ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
52165a8a30dbSKevin Wolf         if (local_err) {
52175a8a30dbSKevin Wolf             error_propagate(errp, local_err);
52185a8a30dbSKevin Wolf             return;
52195a8a30dbSKevin Wolf         }
52200f15423cSAnthony Liguori     }
52210f15423cSAnthony Liguori }
52220f15423cSAnthony Liguori 
522307f07615SPaolo Bonzini int bdrv_flush(BlockDriverState *bs)
522407f07615SPaolo Bonzini {
522507f07615SPaolo Bonzini     Coroutine *co;
522607f07615SPaolo Bonzini     RwCo rwco = {
522707f07615SPaolo Bonzini         .bs = bs,
522807f07615SPaolo Bonzini         .ret = NOT_DONE,
522907f07615SPaolo Bonzini     };
523007f07615SPaolo Bonzini 
523107f07615SPaolo Bonzini     if (qemu_in_coroutine()) {
523207f07615SPaolo Bonzini         /* Fast-path if already in coroutine context */
523307f07615SPaolo Bonzini         bdrv_flush_co_entry(&rwco);
523407f07615SPaolo Bonzini     } else {
52352572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
52362572b37aSStefan Hajnoczi 
523707f07615SPaolo Bonzini         co = qemu_coroutine_create(bdrv_flush_co_entry);
523807f07615SPaolo Bonzini         qemu_coroutine_enter(co, &rwco);
523907f07615SPaolo Bonzini         while (rwco.ret == NOT_DONE) {
52402572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
524107f07615SPaolo Bonzini         }
524207f07615SPaolo Bonzini     }
524307f07615SPaolo Bonzini 
524407f07615SPaolo Bonzini     return rwco.ret;
524507f07615SPaolo Bonzini }
5246e7a8a783SKevin Wolf 
5247775aa8b6SKevin Wolf typedef struct DiscardCo {
5248775aa8b6SKevin Wolf     BlockDriverState *bs;
5249775aa8b6SKevin Wolf     int64_t sector_num;
5250775aa8b6SKevin Wolf     int nb_sectors;
5251775aa8b6SKevin Wolf     int ret;
5252775aa8b6SKevin Wolf } DiscardCo;
52534265d620SPaolo Bonzini static void coroutine_fn bdrv_discard_co_entry(void *opaque)
52544265d620SPaolo Bonzini {
5255775aa8b6SKevin Wolf     DiscardCo *rwco = opaque;
52564265d620SPaolo Bonzini 
52574265d620SPaolo Bonzini     rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
52584265d620SPaolo Bonzini }
52594265d620SPaolo Bonzini 
52604265d620SPaolo Bonzini int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
52614265d620SPaolo Bonzini                                  int nb_sectors)
52624265d620SPaolo Bonzini {
5263b9c64947SMax Reitz     int max_discard, ret;
5264d51e9fe5SPaolo Bonzini 
52654265d620SPaolo Bonzini     if (!bs->drv) {
52664265d620SPaolo Bonzini         return -ENOMEDIUM;
5267b9c64947SMax Reitz     }
5268b9c64947SMax Reitz 
5269b9c64947SMax Reitz     ret = bdrv_check_request(bs, sector_num, nb_sectors);
5270b9c64947SMax Reitz     if (ret < 0) {
5271b9c64947SMax Reitz         return ret;
52724265d620SPaolo Bonzini     } else if (bs->read_only) {
52734265d620SPaolo Bonzini         return -EROFS;
5274df702c9bSPaolo Bonzini     }
5275df702c9bSPaolo Bonzini 
52768f0720ecSPaolo Bonzini     bdrv_reset_dirty(bs, sector_num, nb_sectors);
5277df702c9bSPaolo Bonzini 
52789e8f1835SPaolo Bonzini     /* Do nothing if disabled.  */
52799e8f1835SPaolo Bonzini     if (!(bs->open_flags & BDRV_O_UNMAP)) {
52809e8f1835SPaolo Bonzini         return 0;
52819e8f1835SPaolo Bonzini     }
52829e8f1835SPaolo Bonzini 
5283d51e9fe5SPaolo Bonzini     if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
5284d51e9fe5SPaolo Bonzini         return 0;
5285d51e9fe5SPaolo Bonzini     }
52866f14da52SPeter Lieven 
528775af1f34SPeter Lieven     max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
52886f14da52SPeter Lieven     while (nb_sectors > 0) {
52896f14da52SPeter Lieven         int ret;
52906f14da52SPeter Lieven         int num = nb_sectors;
52916f14da52SPeter Lieven 
52926f14da52SPeter Lieven         /* align request */
52936f14da52SPeter Lieven         if (bs->bl.discard_alignment &&
52946f14da52SPeter Lieven             num >= bs->bl.discard_alignment &&
52956f14da52SPeter Lieven             sector_num % bs->bl.discard_alignment) {
52966f14da52SPeter Lieven             if (num > bs->bl.discard_alignment) {
52976f14da52SPeter Lieven                 num = bs->bl.discard_alignment;
52986f14da52SPeter Lieven             }
52996f14da52SPeter Lieven             num -= sector_num % bs->bl.discard_alignment;
53006f14da52SPeter Lieven         }
53016f14da52SPeter Lieven 
53026f14da52SPeter Lieven         /* limit request size */
53036f14da52SPeter Lieven         if (num > max_discard) {
53046f14da52SPeter Lieven             num = max_discard;
53056f14da52SPeter Lieven         }
53066f14da52SPeter Lieven 
5307d51e9fe5SPaolo Bonzini         if (bs->drv->bdrv_co_discard) {
53086f14da52SPeter Lieven             ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5309d51e9fe5SPaolo Bonzini         } else {
53107c84b1b8SMarkus Armbruster             BlockAIOCB *acb;
53114265d620SPaolo Bonzini             CoroutineIOCompletion co = {
53124265d620SPaolo Bonzini                 .coroutine = qemu_coroutine_self(),
53134265d620SPaolo Bonzini             };
53144265d620SPaolo Bonzini 
53154265d620SPaolo Bonzini             acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
53164265d620SPaolo Bonzini                                             bdrv_co_io_em_complete, &co);
53174265d620SPaolo Bonzini             if (acb == NULL) {
53184265d620SPaolo Bonzini                 return -EIO;
53194265d620SPaolo Bonzini             } else {
53204265d620SPaolo Bonzini                 qemu_coroutine_yield();
5321d51e9fe5SPaolo Bonzini                 ret = co.ret;
53224265d620SPaolo Bonzini             }
5323d51e9fe5SPaolo Bonzini         }
53247ce21016SPaolo Bonzini         if (ret && ret != -ENOTSUP) {
5325d51e9fe5SPaolo Bonzini             return ret;
5326d51e9fe5SPaolo Bonzini         }
5327d51e9fe5SPaolo Bonzini 
5328d51e9fe5SPaolo Bonzini         sector_num += num;
5329d51e9fe5SPaolo Bonzini         nb_sectors -= num;
5330d51e9fe5SPaolo Bonzini     }
53314265d620SPaolo Bonzini     return 0;
53324265d620SPaolo Bonzini }
53334265d620SPaolo Bonzini 
53344265d620SPaolo Bonzini int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
53354265d620SPaolo Bonzini {
53364265d620SPaolo Bonzini     Coroutine *co;
5337775aa8b6SKevin Wolf     DiscardCo rwco = {
53384265d620SPaolo Bonzini         .bs = bs,
53394265d620SPaolo Bonzini         .sector_num = sector_num,
53404265d620SPaolo Bonzini         .nb_sectors = nb_sectors,
53414265d620SPaolo Bonzini         .ret = NOT_DONE,
53424265d620SPaolo Bonzini     };
53434265d620SPaolo Bonzini 
53444265d620SPaolo Bonzini     if (qemu_in_coroutine()) {
53454265d620SPaolo Bonzini         /* Fast-path if already in coroutine context */
53464265d620SPaolo Bonzini         bdrv_discard_co_entry(&rwco);
53474265d620SPaolo Bonzini     } else {
53482572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
53492572b37aSStefan Hajnoczi 
53504265d620SPaolo Bonzini         co = qemu_coroutine_create(bdrv_discard_co_entry);
53514265d620SPaolo Bonzini         qemu_coroutine_enter(co, &rwco);
53524265d620SPaolo Bonzini         while (rwco.ret == NOT_DONE) {
53532572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
53544265d620SPaolo Bonzini         }
53554265d620SPaolo Bonzini     }
53564265d620SPaolo Bonzini 
53574265d620SPaolo Bonzini     return rwco.ret;
53584265d620SPaolo Bonzini }
53594265d620SPaolo Bonzini 
5360f9f05dc5SKevin Wolf /**************************************************************/
536119cb3738Sbellard /* removable device support */
536219cb3738Sbellard 
536319cb3738Sbellard /**
536419cb3738Sbellard  * Return TRUE if the media is present
536519cb3738Sbellard  */
536619cb3738Sbellard int bdrv_is_inserted(BlockDriverState *bs)
536719cb3738Sbellard {
536819cb3738Sbellard     BlockDriver *drv = bs->drv;
5369a1aff5bfSMarkus Armbruster 
537019cb3738Sbellard     if (!drv)
537119cb3738Sbellard         return 0;
537219cb3738Sbellard     if (!drv->bdrv_is_inserted)
5373a1aff5bfSMarkus Armbruster         return 1;
5374a1aff5bfSMarkus Armbruster     return drv->bdrv_is_inserted(bs);
537519cb3738Sbellard }
537619cb3738Sbellard 
537719cb3738Sbellard /**
53788e49ca46SMarkus Armbruster  * Return whether the media changed since the last call to this
53798e49ca46SMarkus Armbruster  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
538019cb3738Sbellard  */
538119cb3738Sbellard int bdrv_media_changed(BlockDriverState *bs)
538219cb3738Sbellard {
538319cb3738Sbellard     BlockDriver *drv = bs->drv;
538419cb3738Sbellard 
53858e49ca46SMarkus Armbruster     if (drv && drv->bdrv_media_changed) {
53868e49ca46SMarkus Armbruster         return drv->bdrv_media_changed(bs);
53878e49ca46SMarkus Armbruster     }
53888e49ca46SMarkus Armbruster     return -ENOTSUP;
538919cb3738Sbellard }
539019cb3738Sbellard 
539119cb3738Sbellard /**
539219cb3738Sbellard  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
539319cb3738Sbellard  */
5394f36f3949SLuiz Capitulino void bdrv_eject(BlockDriverState *bs, bool eject_flag)
539519cb3738Sbellard {
539619cb3738Sbellard     BlockDriver *drv = bs->drv;
5397bfb197e0SMarkus Armbruster     const char *device_name;
539819cb3738Sbellard 
5399822e1cd1SMarkus Armbruster     if (drv && drv->bdrv_eject) {
5400822e1cd1SMarkus Armbruster         drv->bdrv_eject(bs, eject_flag);
540119cb3738Sbellard     }
54026f382ed2SLuiz Capitulino 
5403bfb197e0SMarkus Armbruster     device_name = bdrv_get_device_name(bs);
5404bfb197e0SMarkus Armbruster     if (device_name[0] != '\0') {
5405bfb197e0SMarkus Armbruster         qapi_event_send_device_tray_moved(device_name,
5406a5ee7bd4SWenchao Xia                                           eject_flag, &error_abort);
54076f382ed2SLuiz Capitulino     }
540819cb3738Sbellard }
540919cb3738Sbellard 
541019cb3738Sbellard /**
541119cb3738Sbellard  * Lock or unlock the media (if it is locked, the user won't be able
541219cb3738Sbellard  * to eject it manually).
541319cb3738Sbellard  */
5414025e849aSMarkus Armbruster void bdrv_lock_medium(BlockDriverState *bs, bool locked)
541519cb3738Sbellard {
541619cb3738Sbellard     BlockDriver *drv = bs->drv;
541719cb3738Sbellard 
5418025e849aSMarkus Armbruster     trace_bdrv_lock_medium(bs, locked);
5419b8c6d095SStefan Hajnoczi 
5420025e849aSMarkus Armbruster     if (drv && drv->bdrv_lock_medium) {
5421025e849aSMarkus Armbruster         drv->bdrv_lock_medium(bs, locked);
542219cb3738Sbellard     }
542319cb3738Sbellard }
5424985a03b0Sths 
5425985a03b0Sths /* needed for generic scsi interface */
5426985a03b0Sths 
5427985a03b0Sths int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5428985a03b0Sths {
5429985a03b0Sths     BlockDriver *drv = bs->drv;
5430985a03b0Sths 
5431985a03b0Sths     if (drv && drv->bdrv_ioctl)
5432985a03b0Sths         return drv->bdrv_ioctl(bs, req, buf);
5433985a03b0Sths     return -ENOTSUP;
5434985a03b0Sths }
54357d780669Saliguori 
54367c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5437221f715dSaliguori         unsigned long int req, void *buf,
5438097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
54397d780669Saliguori {
5440221f715dSaliguori     BlockDriver *drv = bs->drv;
54417d780669Saliguori 
5442221f715dSaliguori     if (drv && drv->bdrv_aio_ioctl)
5443221f715dSaliguori         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5444221f715dSaliguori     return NULL;
54457d780669Saliguori }
5446e268ca52Saliguori 
54471b7fd729SPaolo Bonzini void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
54487b6f9300SMarkus Armbruster {
54491b7fd729SPaolo Bonzini     bs->guest_block_size = align;
54507b6f9300SMarkus Armbruster }
54517cd1e32aSlirans@il.ibm.com 
5452e268ca52Saliguori void *qemu_blockalign(BlockDriverState *bs, size_t size)
5453e268ca52Saliguori {
5454339064d5SKevin Wolf     return qemu_memalign(bdrv_opt_mem_align(bs), size);
5455e268ca52Saliguori }
54567cd1e32aSlirans@il.ibm.com 
54579ebd8448SMax Reitz void *qemu_blockalign0(BlockDriverState *bs, size_t size)
54589ebd8448SMax Reitz {
54599ebd8448SMax Reitz     return memset(qemu_blockalign(bs, size), 0, size);
54609ebd8448SMax Reitz }
54619ebd8448SMax Reitz 
54627d2a35ccSKevin Wolf void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
54637d2a35ccSKevin Wolf {
54647d2a35ccSKevin Wolf     size_t align = bdrv_opt_mem_align(bs);
54657d2a35ccSKevin Wolf 
54667d2a35ccSKevin Wolf     /* Ensure that NULL is never returned on success */
54677d2a35ccSKevin Wolf     assert(align > 0);
54687d2a35ccSKevin Wolf     if (size == 0) {
54697d2a35ccSKevin Wolf         size = align;
54707d2a35ccSKevin Wolf     }
54717d2a35ccSKevin Wolf 
54727d2a35ccSKevin Wolf     return qemu_try_memalign(align, size);
54737d2a35ccSKevin Wolf }
54747d2a35ccSKevin Wolf 
54759ebd8448SMax Reitz void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
54769ebd8448SMax Reitz {
54779ebd8448SMax Reitz     void *mem = qemu_try_blockalign(bs, size);
54789ebd8448SMax Reitz 
54799ebd8448SMax Reitz     if (mem) {
54809ebd8448SMax Reitz         memset(mem, 0, size);
54819ebd8448SMax Reitz     }
54829ebd8448SMax Reitz 
54839ebd8448SMax Reitz     return mem;
54849ebd8448SMax Reitz }
54859ebd8448SMax Reitz 
5486c53b1c51SStefan Hajnoczi /*
5487c53b1c51SStefan Hajnoczi  * Check if all memory in this vector is sector aligned.
5488c53b1c51SStefan Hajnoczi  */
5489c53b1c51SStefan Hajnoczi bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5490c53b1c51SStefan Hajnoczi {
5491c53b1c51SStefan Hajnoczi     int i;
5492339064d5SKevin Wolf     size_t alignment = bdrv_opt_mem_align(bs);
5493c53b1c51SStefan Hajnoczi 
5494c53b1c51SStefan Hajnoczi     for (i = 0; i < qiov->niov; i++) {
5495339064d5SKevin Wolf         if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
5496c53b1c51SStefan Hajnoczi             return false;
5497c53b1c51SStefan Hajnoczi         }
5498339064d5SKevin Wolf         if (qiov->iov[i].iov_len % alignment) {
54991ff735bdSKevin Wolf             return false;
55001ff735bdSKevin Wolf         }
5501c53b1c51SStefan Hajnoczi     }
5502c53b1c51SStefan Hajnoczi 
5503c53b1c51SStefan Hajnoczi     return true;
5504c53b1c51SStefan Hajnoczi }
5505c53b1c51SStefan Hajnoczi 
55060db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
55070db6e54aSFam Zheng {
55080db6e54aSFam Zheng     BdrvDirtyBitmap *bm;
55090db6e54aSFam Zheng 
55100db6e54aSFam Zheng     assert(name);
55110db6e54aSFam Zheng     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
55120db6e54aSFam Zheng         if (bm->name && !strcmp(name, bm->name)) {
55130db6e54aSFam Zheng             return bm;
55140db6e54aSFam Zheng         }
55150db6e54aSFam Zheng     }
55160db6e54aSFam Zheng     return NULL;
55170db6e54aSFam Zheng }
55180db6e54aSFam Zheng 
55190db6e54aSFam Zheng void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
55200db6e54aSFam Zheng {
55210db6e54aSFam Zheng     g_free(bitmap->name);
55220db6e54aSFam Zheng     bitmap->name = NULL;
55230db6e54aSFam Zheng }
55240db6e54aSFam Zheng 
55250db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
55265fba6c0eSJohn Snow                                           uint32_t granularity,
55270db6e54aSFam Zheng                                           const char *name,
5528b8afb520SFam Zheng                                           Error **errp)
55297cd1e32aSlirans@il.ibm.com {
55307cd1e32aSlirans@il.ibm.com     int64_t bitmap_size;
5531e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
55325fba6c0eSJohn Snow     uint32_t sector_granularity;
5533a55eb92cSJan Kiszka 
553450717e94SPaolo Bonzini     assert((granularity & (granularity - 1)) == 0);
553550717e94SPaolo Bonzini 
55360db6e54aSFam Zheng     if (name && bdrv_find_dirty_bitmap(bs, name)) {
55370db6e54aSFam Zheng         error_setg(errp, "Bitmap already exists: %s", name);
55380db6e54aSFam Zheng         return NULL;
55390db6e54aSFam Zheng     }
55405fba6c0eSJohn Snow     sector_granularity = granularity >> BDRV_SECTOR_BITS;
55415fba6c0eSJohn Snow     assert(sector_granularity);
554257322b78SMarkus Armbruster     bitmap_size = bdrv_nb_sectors(bs);
5543b8afb520SFam Zheng     if (bitmap_size < 0) {
5544b8afb520SFam Zheng         error_setg_errno(errp, -bitmap_size, "could not get length of device");
5545b8afb520SFam Zheng         errno = -bitmap_size;
5546b8afb520SFam Zheng         return NULL;
5547b8afb520SFam Zheng     }
55485839e53bSMarkus Armbruster     bitmap = g_new0(BdrvDirtyBitmap, 1);
55495fba6c0eSJohn Snow     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
55500db6e54aSFam Zheng     bitmap->name = g_strdup(name);
5551*b8e6fb75SJohn Snow     bitmap->disabled = false;
5552e4654d2dSFam Zheng     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5553e4654d2dSFam Zheng     return bitmap;
5554e4654d2dSFam Zheng }
5555e4654d2dSFam Zheng 
5556*b8e6fb75SJohn Snow bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
5557*b8e6fb75SJohn Snow {
5558*b8e6fb75SJohn Snow     return !bitmap->disabled;
5559*b8e6fb75SJohn Snow }
5560*b8e6fb75SJohn Snow 
5561e4654d2dSFam Zheng void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5562e4654d2dSFam Zheng {
5563e4654d2dSFam Zheng     BdrvDirtyBitmap *bm, *next;
5564e4654d2dSFam Zheng     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5565e4654d2dSFam Zheng         if (bm == bitmap) {
5566e4654d2dSFam Zheng             QLIST_REMOVE(bitmap, list);
5567e4654d2dSFam Zheng             hbitmap_free(bitmap->bitmap);
55680db6e54aSFam Zheng             g_free(bitmap->name);
5569e4654d2dSFam Zheng             g_free(bitmap);
5570e4654d2dSFam Zheng             return;
55717cd1e32aSlirans@il.ibm.com         }
55727cd1e32aSlirans@il.ibm.com     }
55737cd1e32aSlirans@il.ibm.com }
55747cd1e32aSlirans@il.ibm.com 
5575*b8e6fb75SJohn Snow void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5576*b8e6fb75SJohn Snow {
5577*b8e6fb75SJohn Snow     bitmap->disabled = true;
5578*b8e6fb75SJohn Snow }
5579*b8e6fb75SJohn Snow 
5580*b8e6fb75SJohn Snow void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5581*b8e6fb75SJohn Snow {
5582*b8e6fb75SJohn Snow     bitmap->disabled = false;
5583*b8e6fb75SJohn Snow }
5584*b8e6fb75SJohn Snow 
558521b56835SFam Zheng BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
558621b56835SFam Zheng {
558721b56835SFam Zheng     BdrvDirtyBitmap *bm;
558821b56835SFam Zheng     BlockDirtyInfoList *list = NULL;
558921b56835SFam Zheng     BlockDirtyInfoList **plist = &list;
559021b56835SFam Zheng 
559121b56835SFam Zheng     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
55925839e53bSMarkus Armbruster         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
55935839e53bSMarkus Armbruster         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
559421b56835SFam Zheng         info->count = bdrv_get_dirty_count(bs, bm);
5595592fdd02SJohn Snow         info->granularity = bdrv_dirty_bitmap_granularity(bm);
55960db6e54aSFam Zheng         info->has_name = !!bm->name;
55970db6e54aSFam Zheng         info->name = g_strdup(bm->name);
559821b56835SFam Zheng         entry->value = info;
559921b56835SFam Zheng         *plist = entry;
560021b56835SFam Zheng         plist = &entry->next;
560121b56835SFam Zheng     }
560221b56835SFam Zheng 
560321b56835SFam Zheng     return list;
560421b56835SFam Zheng }
560521b56835SFam Zheng 
5606e4654d2dSFam Zheng int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
56077cd1e32aSlirans@il.ibm.com {
5608e4654d2dSFam Zheng     if (bitmap) {
5609e4654d2dSFam Zheng         return hbitmap_get(bitmap->bitmap, sector);
56107cd1e32aSlirans@il.ibm.com     } else {
56117cd1e32aSlirans@il.ibm.com         return 0;
56127cd1e32aSlirans@il.ibm.com     }
56137cd1e32aSlirans@il.ibm.com }
56147cd1e32aSlirans@il.ibm.com 
5615341ebc2fSJohn Snow /**
5616341ebc2fSJohn Snow  * Chooses a default granularity based on the existing cluster size,
5617341ebc2fSJohn Snow  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
5618341ebc2fSJohn Snow  * is no cluster size information available.
5619341ebc2fSJohn Snow  */
5620341ebc2fSJohn Snow uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
5621341ebc2fSJohn Snow {
5622341ebc2fSJohn Snow     BlockDriverInfo bdi;
5623341ebc2fSJohn Snow     uint32_t granularity;
5624341ebc2fSJohn Snow 
5625341ebc2fSJohn Snow     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
5626341ebc2fSJohn Snow         granularity = MAX(4096, bdi.cluster_size);
5627341ebc2fSJohn Snow         granularity = MIN(65536, granularity);
5628341ebc2fSJohn Snow     } else {
5629341ebc2fSJohn Snow         granularity = 65536;
5630341ebc2fSJohn Snow     }
5631341ebc2fSJohn Snow 
5632341ebc2fSJohn Snow     return granularity;
5633341ebc2fSJohn Snow }
5634341ebc2fSJohn Snow 
5635592fdd02SJohn Snow uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
5636592fdd02SJohn Snow {
5637592fdd02SJohn Snow     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
5638592fdd02SJohn Snow }
5639592fdd02SJohn Snow 
5640e4654d2dSFam Zheng void bdrv_dirty_iter_init(BlockDriverState *bs,
5641e4654d2dSFam Zheng                           BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
56421755da16SPaolo Bonzini {
5643e4654d2dSFam Zheng     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
56441755da16SPaolo Bonzini }
56451755da16SPaolo Bonzini 
5646c4237dfaSVladimir Sementsov-Ogievskiy void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5647c4237dfaSVladimir Sementsov-Ogievskiy                            int64_t cur_sector, int nr_sectors)
5648c4237dfaSVladimir Sementsov-Ogievskiy {
5649*b8e6fb75SJohn Snow     assert(bdrv_dirty_bitmap_enabled(bitmap));
5650c4237dfaSVladimir Sementsov-Ogievskiy     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5651c4237dfaSVladimir Sementsov-Ogievskiy }
5652c4237dfaSVladimir Sementsov-Ogievskiy 
5653c4237dfaSVladimir Sementsov-Ogievskiy void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5654c4237dfaSVladimir Sementsov-Ogievskiy                              int64_t cur_sector, int nr_sectors)
5655c4237dfaSVladimir Sementsov-Ogievskiy {
5656*b8e6fb75SJohn Snow     assert(bdrv_dirty_bitmap_enabled(bitmap));
5657c4237dfaSVladimir Sementsov-Ogievskiy     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5658c4237dfaSVladimir Sementsov-Ogievskiy }
5659c4237dfaSVladimir Sementsov-Ogievskiy 
5660c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
56611755da16SPaolo Bonzini                            int nr_sectors)
56621755da16SPaolo Bonzini {
5663e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
5664e4654d2dSFam Zheng     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5665*b8e6fb75SJohn Snow         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5666*b8e6fb75SJohn Snow             continue;
5667*b8e6fb75SJohn Snow         }
5668e4654d2dSFam Zheng         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5669e4654d2dSFam Zheng     }
56701755da16SPaolo Bonzini }
56711755da16SPaolo Bonzini 
5672c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
5673c4237dfaSVladimir Sementsov-Ogievskiy                              int nr_sectors)
56747cd1e32aSlirans@il.ibm.com {
5675e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
5676e4654d2dSFam Zheng     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5677*b8e6fb75SJohn Snow         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5678*b8e6fb75SJohn Snow             continue;
5679*b8e6fb75SJohn Snow         }
5680e4654d2dSFam Zheng         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5681e4654d2dSFam Zheng     }
56827cd1e32aSlirans@il.ibm.com }
5683aaa0eb75SLiran Schour 
5684e4654d2dSFam Zheng int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5685aaa0eb75SLiran Schour {
5686e4654d2dSFam Zheng     return hbitmap_count(bitmap->bitmap);
5687aaa0eb75SLiran Schour }
5688f88e1a42SJes Sorensen 
56899fcb0251SFam Zheng /* Get a reference to bs */
56909fcb0251SFam Zheng void bdrv_ref(BlockDriverState *bs)
56919fcb0251SFam Zheng {
56929fcb0251SFam Zheng     bs->refcnt++;
56939fcb0251SFam Zheng }
56949fcb0251SFam Zheng 
56959fcb0251SFam Zheng /* Release a previously grabbed reference to bs.
56969fcb0251SFam Zheng  * If after releasing, reference count is zero, the BlockDriverState is
56979fcb0251SFam Zheng  * deleted. */
56989fcb0251SFam Zheng void bdrv_unref(BlockDriverState *bs)
56999fcb0251SFam Zheng {
57009a4d5ca6SJeff Cody     if (!bs) {
57019a4d5ca6SJeff Cody         return;
57029a4d5ca6SJeff Cody     }
57039fcb0251SFam Zheng     assert(bs->refcnt > 0);
57049fcb0251SFam Zheng     if (--bs->refcnt == 0) {
57059fcb0251SFam Zheng         bdrv_delete(bs);
57069fcb0251SFam Zheng     }
57079fcb0251SFam Zheng }
57089fcb0251SFam Zheng 
5709fbe40ff7SFam Zheng struct BdrvOpBlocker {
5710fbe40ff7SFam Zheng     Error *reason;
5711fbe40ff7SFam Zheng     QLIST_ENTRY(BdrvOpBlocker) list;
5712fbe40ff7SFam Zheng };
5713fbe40ff7SFam Zheng 
5714fbe40ff7SFam Zheng bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5715fbe40ff7SFam Zheng {
5716fbe40ff7SFam Zheng     BdrvOpBlocker *blocker;
5717fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5718fbe40ff7SFam Zheng     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5719fbe40ff7SFam Zheng         blocker = QLIST_FIRST(&bs->op_blockers[op]);
5720fbe40ff7SFam Zheng         if (errp) {
572181e5f78aSAlberto Garcia             error_setg(errp, "Node '%s' is busy: %s",
572281e5f78aSAlberto Garcia                        bdrv_get_device_or_node_name(bs),
5723bfb197e0SMarkus Armbruster                        error_get_pretty(blocker->reason));
5724fbe40ff7SFam Zheng         }
5725fbe40ff7SFam Zheng         return true;
5726fbe40ff7SFam Zheng     }
5727fbe40ff7SFam Zheng     return false;
5728fbe40ff7SFam Zheng }
5729fbe40ff7SFam Zheng 
5730fbe40ff7SFam Zheng void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5731fbe40ff7SFam Zheng {
5732fbe40ff7SFam Zheng     BdrvOpBlocker *blocker;
5733fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5734fbe40ff7SFam Zheng 
57355839e53bSMarkus Armbruster     blocker = g_new0(BdrvOpBlocker, 1);
5736fbe40ff7SFam Zheng     blocker->reason = reason;
5737fbe40ff7SFam Zheng     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5738fbe40ff7SFam Zheng }
5739fbe40ff7SFam Zheng 
5740fbe40ff7SFam Zheng void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5741fbe40ff7SFam Zheng {
5742fbe40ff7SFam Zheng     BdrvOpBlocker *blocker, *next;
5743fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5744fbe40ff7SFam Zheng     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5745fbe40ff7SFam Zheng         if (blocker->reason == reason) {
5746fbe40ff7SFam Zheng             QLIST_REMOVE(blocker, list);
5747fbe40ff7SFam Zheng             g_free(blocker);
5748fbe40ff7SFam Zheng         }
5749fbe40ff7SFam Zheng     }
5750fbe40ff7SFam Zheng }
5751fbe40ff7SFam Zheng 
5752fbe40ff7SFam Zheng void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5753fbe40ff7SFam Zheng {
5754fbe40ff7SFam Zheng     int i;
5755fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5756fbe40ff7SFam Zheng         bdrv_op_block(bs, i, reason);
5757fbe40ff7SFam Zheng     }
5758fbe40ff7SFam Zheng }
5759fbe40ff7SFam Zheng 
5760fbe40ff7SFam Zheng void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5761fbe40ff7SFam Zheng {
5762fbe40ff7SFam Zheng     int i;
5763fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5764fbe40ff7SFam Zheng         bdrv_op_unblock(bs, i, reason);
5765fbe40ff7SFam Zheng     }
5766fbe40ff7SFam Zheng }
5767fbe40ff7SFam Zheng 
5768fbe40ff7SFam Zheng bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5769fbe40ff7SFam Zheng {
5770fbe40ff7SFam Zheng     int i;
5771fbe40ff7SFam Zheng 
5772fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5773fbe40ff7SFam Zheng         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5774fbe40ff7SFam Zheng             return false;
5775fbe40ff7SFam Zheng         }
5776fbe40ff7SFam Zheng     }
5777fbe40ff7SFam Zheng     return true;
5778fbe40ff7SFam Zheng }
5779fbe40ff7SFam Zheng 
578028a7282aSLuiz Capitulino void bdrv_iostatus_enable(BlockDriverState *bs)
578128a7282aSLuiz Capitulino {
5782d6bf279eSLuiz Capitulino     bs->iostatus_enabled = true;
578358e21ef5SLuiz Capitulino     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
578428a7282aSLuiz Capitulino }
578528a7282aSLuiz Capitulino 
578628a7282aSLuiz Capitulino /* The I/O status is only enabled if the drive explicitly
578728a7282aSLuiz Capitulino  * enables it _and_ the VM is configured to stop on errors */
578828a7282aSLuiz Capitulino bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
578928a7282aSLuiz Capitulino {
5790d6bf279eSLuiz Capitulino     return (bs->iostatus_enabled &&
579192aa5c6dSPaolo Bonzini            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
579292aa5c6dSPaolo Bonzini             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
579392aa5c6dSPaolo Bonzini             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
579428a7282aSLuiz Capitulino }
579528a7282aSLuiz Capitulino 
579628a7282aSLuiz Capitulino void bdrv_iostatus_disable(BlockDriverState *bs)
579728a7282aSLuiz Capitulino {
5798d6bf279eSLuiz Capitulino     bs->iostatus_enabled = false;
579928a7282aSLuiz Capitulino }
580028a7282aSLuiz Capitulino 
580128a7282aSLuiz Capitulino void bdrv_iostatus_reset(BlockDriverState *bs)
580228a7282aSLuiz Capitulino {
580328a7282aSLuiz Capitulino     if (bdrv_iostatus_is_enabled(bs)) {
580458e21ef5SLuiz Capitulino         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
58053bd293c3SPaolo Bonzini         if (bs->job) {
58063bd293c3SPaolo Bonzini             block_job_iostatus_reset(bs->job);
58073bd293c3SPaolo Bonzini         }
580828a7282aSLuiz Capitulino     }
580928a7282aSLuiz Capitulino }
581028a7282aSLuiz Capitulino 
581128a7282aSLuiz Capitulino void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
581228a7282aSLuiz Capitulino {
58133e1caa5fSPaolo Bonzini     assert(bdrv_iostatus_is_enabled(bs));
58143e1caa5fSPaolo Bonzini     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
581558e21ef5SLuiz Capitulino         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
581658e21ef5SLuiz Capitulino                                          BLOCK_DEVICE_IO_STATUS_FAILED;
581728a7282aSLuiz Capitulino     }
581828a7282aSLuiz Capitulino }
581928a7282aSLuiz Capitulino 
5820d92ada22SLuiz Capitulino void bdrv_img_create(const char *filename, const char *fmt,
5821f88e1a42SJes Sorensen                      const char *base_filename, const char *base_fmt,
5822f382d43aSMiroslav Rezanina                      char *options, uint64_t img_size, int flags,
5823f382d43aSMiroslav Rezanina                      Error **errp, bool quiet)
5824f88e1a42SJes Sorensen {
582583d0521aSChunyan Liu     QemuOptsList *create_opts = NULL;
582683d0521aSChunyan Liu     QemuOpts *opts = NULL;
582783d0521aSChunyan Liu     const char *backing_fmt, *backing_file;
582883d0521aSChunyan Liu     int64_t size;
5829f88e1a42SJes Sorensen     BlockDriver *drv, *proto_drv;
583096df67d1SStefan Hajnoczi     BlockDriver *backing_drv = NULL;
5831cc84d90fSMax Reitz     Error *local_err = NULL;
5832f88e1a42SJes Sorensen     int ret = 0;
5833f88e1a42SJes Sorensen 
5834f88e1a42SJes Sorensen     /* Find driver and parse its options */
5835f88e1a42SJes Sorensen     drv = bdrv_find_format(fmt);
5836f88e1a42SJes Sorensen     if (!drv) {
583771c79813SLuiz Capitulino         error_setg(errp, "Unknown file format '%s'", fmt);
5838d92ada22SLuiz Capitulino         return;
5839f88e1a42SJes Sorensen     }
5840f88e1a42SJes Sorensen 
5841b65a5e12SMax Reitz     proto_drv = bdrv_find_protocol(filename, true, errp);
5842f88e1a42SJes Sorensen     if (!proto_drv) {
5843d92ada22SLuiz Capitulino         return;
5844f88e1a42SJes Sorensen     }
5845f88e1a42SJes Sorensen 
5846c6149724SMax Reitz     if (!drv->create_opts) {
5847c6149724SMax Reitz         error_setg(errp, "Format driver '%s' does not support image creation",
5848c6149724SMax Reitz                    drv->format_name);
5849c6149724SMax Reitz         return;
5850c6149724SMax Reitz     }
5851c6149724SMax Reitz 
5852c6149724SMax Reitz     if (!proto_drv->create_opts) {
5853c6149724SMax Reitz         error_setg(errp, "Protocol driver '%s' does not support image creation",
5854c6149724SMax Reitz                    proto_drv->format_name);
5855c6149724SMax Reitz         return;
5856c6149724SMax Reitz     }
5857c6149724SMax Reitz 
5858c282e1fdSChunyan Liu     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5859c282e1fdSChunyan Liu     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5860f88e1a42SJes Sorensen 
5861f88e1a42SJes Sorensen     /* Create parameter list with default values */
586283d0521aSChunyan Liu     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
586339101f25SMarkus Armbruster     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5864f88e1a42SJes Sorensen 
5865f88e1a42SJes Sorensen     /* Parse -o options */
5866f88e1a42SJes Sorensen     if (options) {
5867dc523cd3SMarkus Armbruster         qemu_opts_do_parse(opts, options, NULL, &local_err);
5868dc523cd3SMarkus Armbruster         if (local_err) {
5869dc523cd3SMarkus Armbruster             error_report_err(local_err);
5870dc523cd3SMarkus Armbruster             local_err = NULL;
587183d0521aSChunyan Liu             error_setg(errp, "Invalid options for file format '%s'", fmt);
5872f88e1a42SJes Sorensen             goto out;
5873f88e1a42SJes Sorensen         }
5874f88e1a42SJes Sorensen     }
5875f88e1a42SJes Sorensen 
5876f88e1a42SJes Sorensen     if (base_filename) {
5877f43e47dbSMarkus Armbruster         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
58786be4194bSMarkus Armbruster         if (local_err) {
587971c79813SLuiz Capitulino             error_setg(errp, "Backing file not supported for file format '%s'",
588071c79813SLuiz Capitulino                        fmt);
5881f88e1a42SJes Sorensen             goto out;
5882f88e1a42SJes Sorensen         }
5883f88e1a42SJes Sorensen     }
5884f88e1a42SJes Sorensen 
5885f88e1a42SJes Sorensen     if (base_fmt) {
5886f43e47dbSMarkus Armbruster         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
58876be4194bSMarkus Armbruster         if (local_err) {
588871c79813SLuiz Capitulino             error_setg(errp, "Backing file format not supported for file "
588971c79813SLuiz Capitulino                              "format '%s'", fmt);
5890f88e1a42SJes Sorensen             goto out;
5891f88e1a42SJes Sorensen         }
5892f88e1a42SJes Sorensen     }
5893f88e1a42SJes Sorensen 
589483d0521aSChunyan Liu     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
589583d0521aSChunyan Liu     if (backing_file) {
589683d0521aSChunyan Liu         if (!strcmp(filename, backing_file)) {
589771c79813SLuiz Capitulino             error_setg(errp, "Error: Trying to create an image with the "
589871c79813SLuiz Capitulino                              "same filename as the backing file");
5899792da93aSJes Sorensen             goto out;
5900792da93aSJes Sorensen         }
5901792da93aSJes Sorensen     }
5902792da93aSJes Sorensen 
590383d0521aSChunyan Liu     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
590483d0521aSChunyan Liu     if (backing_fmt) {
590583d0521aSChunyan Liu         backing_drv = bdrv_find_format(backing_fmt);
590696df67d1SStefan Hajnoczi         if (!backing_drv) {
590771c79813SLuiz Capitulino             error_setg(errp, "Unknown backing file format '%s'",
590883d0521aSChunyan Liu                        backing_fmt);
5909f88e1a42SJes Sorensen             goto out;
5910f88e1a42SJes Sorensen         }
5911f88e1a42SJes Sorensen     }
5912f88e1a42SJes Sorensen 
5913f88e1a42SJes Sorensen     // The size for the image must always be specified, with one exception:
5914f88e1a42SJes Sorensen     // If we are using a backing file, we can obtain the size from there
591583d0521aSChunyan Liu     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
591683d0521aSChunyan Liu     if (size == -1) {
591783d0521aSChunyan Liu         if (backing_file) {
591866f6b814SMax Reitz             BlockDriverState *bs;
591929168018SMax Reitz             char *full_backing = g_new0(char, PATH_MAX);
592052bf1e72SMarkus Armbruster             int64_t size;
592163090dacSPaolo Bonzini             int back_flags;
592263090dacSPaolo Bonzini 
592329168018SMax Reitz             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
592429168018SMax Reitz                                                          full_backing, PATH_MAX,
592529168018SMax Reitz                                                          &local_err);
592629168018SMax Reitz             if (local_err) {
592729168018SMax Reitz                 g_free(full_backing);
592829168018SMax Reitz                 goto out;
592929168018SMax Reitz             }
593029168018SMax Reitz 
593163090dacSPaolo Bonzini             /* backing files always opened read-only */
593263090dacSPaolo Bonzini             back_flags =
593363090dacSPaolo Bonzini                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
5934f88e1a42SJes Sorensen 
5935f67503e5SMax Reitz             bs = NULL;
593629168018SMax Reitz             ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
5937cc84d90fSMax Reitz                             backing_drv, &local_err);
593829168018SMax Reitz             g_free(full_backing);
5939f88e1a42SJes Sorensen             if (ret < 0) {
5940f88e1a42SJes Sorensen                 goto out;
5941f88e1a42SJes Sorensen             }
594252bf1e72SMarkus Armbruster             size = bdrv_getlength(bs);
594352bf1e72SMarkus Armbruster             if (size < 0) {
594452bf1e72SMarkus Armbruster                 error_setg_errno(errp, -size, "Could not get size of '%s'",
594552bf1e72SMarkus Armbruster                                  backing_file);
594652bf1e72SMarkus Armbruster                 bdrv_unref(bs);
594752bf1e72SMarkus Armbruster                 goto out;
594852bf1e72SMarkus Armbruster             }
5949f88e1a42SJes Sorensen 
595039101f25SMarkus Armbruster             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
595166f6b814SMax Reitz 
595266f6b814SMax Reitz             bdrv_unref(bs);
5953f88e1a42SJes Sorensen         } else {
595471c79813SLuiz Capitulino             error_setg(errp, "Image creation needs a size parameter");
5955f88e1a42SJes Sorensen             goto out;
5956f88e1a42SJes Sorensen         }
5957f88e1a42SJes Sorensen     }
5958f88e1a42SJes Sorensen 
5959f382d43aSMiroslav Rezanina     if (!quiet) {
5960f88e1a42SJes Sorensen         printf("Formatting '%s', fmt=%s", filename, fmt);
596143c5d8f8SFam Zheng         qemu_opts_print(opts, " ");
5962f88e1a42SJes Sorensen         puts("");
5963f382d43aSMiroslav Rezanina     }
596483d0521aSChunyan Liu 
5965c282e1fdSChunyan Liu     ret = bdrv_create(drv, filename, opts, &local_err);
596683d0521aSChunyan Liu 
5967cc84d90fSMax Reitz     if (ret == -EFBIG) {
5968cc84d90fSMax Reitz         /* This is generally a better message than whatever the driver would
5969cc84d90fSMax Reitz          * deliver (especially because of the cluster_size_hint), since that
5970cc84d90fSMax Reitz          * is most probably not much different from "image too large". */
5971f3f4d2c0SKevin Wolf         const char *cluster_size_hint = "";
597283d0521aSChunyan Liu         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
5973f3f4d2c0SKevin Wolf             cluster_size_hint = " (try using a larger cluster size)";
5974f3f4d2c0SKevin Wolf         }
5975cc84d90fSMax Reitz         error_setg(errp, "The image size is too large for file format '%s'"
5976cc84d90fSMax Reitz                    "%s", fmt, cluster_size_hint);
5977cc84d90fSMax Reitz         error_free(local_err);
5978cc84d90fSMax Reitz         local_err = NULL;
5979f88e1a42SJes Sorensen     }
5980f88e1a42SJes Sorensen 
5981f88e1a42SJes Sorensen out:
598283d0521aSChunyan Liu     qemu_opts_del(opts);
598383d0521aSChunyan Liu     qemu_opts_free(create_opts);
598484d18f06SMarkus Armbruster     if (local_err) {
5985cc84d90fSMax Reitz         error_propagate(errp, local_err);
5986cc84d90fSMax Reitz     }
5987f88e1a42SJes Sorensen }
598885d126f3SStefan Hajnoczi 
598985d126f3SStefan Hajnoczi AioContext *bdrv_get_aio_context(BlockDriverState *bs)
599085d126f3SStefan Hajnoczi {
5991dcd04228SStefan Hajnoczi     return bs->aio_context;
5992dcd04228SStefan Hajnoczi }
5993dcd04228SStefan Hajnoczi 
5994dcd04228SStefan Hajnoczi void bdrv_detach_aio_context(BlockDriverState *bs)
5995dcd04228SStefan Hajnoczi {
599633384421SMax Reitz     BdrvAioNotifier *baf;
599733384421SMax Reitz 
5998dcd04228SStefan Hajnoczi     if (!bs->drv) {
5999dcd04228SStefan Hajnoczi         return;
6000dcd04228SStefan Hajnoczi     }
6001dcd04228SStefan Hajnoczi 
600233384421SMax Reitz     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
600333384421SMax Reitz         baf->detach_aio_context(baf->opaque);
600433384421SMax Reitz     }
600533384421SMax Reitz 
600613af91ebSStefan Hajnoczi     if (bs->io_limits_enabled) {
600713af91ebSStefan Hajnoczi         throttle_detach_aio_context(&bs->throttle_state);
600813af91ebSStefan Hajnoczi     }
6009dcd04228SStefan Hajnoczi     if (bs->drv->bdrv_detach_aio_context) {
6010dcd04228SStefan Hajnoczi         bs->drv->bdrv_detach_aio_context(bs);
6011dcd04228SStefan Hajnoczi     }
6012dcd04228SStefan Hajnoczi     if (bs->file) {
6013dcd04228SStefan Hajnoczi         bdrv_detach_aio_context(bs->file);
6014dcd04228SStefan Hajnoczi     }
6015dcd04228SStefan Hajnoczi     if (bs->backing_hd) {
6016dcd04228SStefan Hajnoczi         bdrv_detach_aio_context(bs->backing_hd);
6017dcd04228SStefan Hajnoczi     }
6018dcd04228SStefan Hajnoczi 
6019dcd04228SStefan Hajnoczi     bs->aio_context = NULL;
6020dcd04228SStefan Hajnoczi }
6021dcd04228SStefan Hajnoczi 
6022dcd04228SStefan Hajnoczi void bdrv_attach_aio_context(BlockDriverState *bs,
6023dcd04228SStefan Hajnoczi                              AioContext *new_context)
6024dcd04228SStefan Hajnoczi {
602533384421SMax Reitz     BdrvAioNotifier *ban;
602633384421SMax Reitz 
6027dcd04228SStefan Hajnoczi     if (!bs->drv) {
6028dcd04228SStefan Hajnoczi         return;
6029dcd04228SStefan Hajnoczi     }
6030dcd04228SStefan Hajnoczi 
6031dcd04228SStefan Hajnoczi     bs->aio_context = new_context;
6032dcd04228SStefan Hajnoczi 
6033dcd04228SStefan Hajnoczi     if (bs->backing_hd) {
6034dcd04228SStefan Hajnoczi         bdrv_attach_aio_context(bs->backing_hd, new_context);
6035dcd04228SStefan Hajnoczi     }
6036dcd04228SStefan Hajnoczi     if (bs->file) {
6037dcd04228SStefan Hajnoczi         bdrv_attach_aio_context(bs->file, new_context);
6038dcd04228SStefan Hajnoczi     }
6039dcd04228SStefan Hajnoczi     if (bs->drv->bdrv_attach_aio_context) {
6040dcd04228SStefan Hajnoczi         bs->drv->bdrv_attach_aio_context(bs, new_context);
6041dcd04228SStefan Hajnoczi     }
604213af91ebSStefan Hajnoczi     if (bs->io_limits_enabled) {
604313af91ebSStefan Hajnoczi         throttle_attach_aio_context(&bs->throttle_state, new_context);
604413af91ebSStefan Hajnoczi     }
604533384421SMax Reitz 
604633384421SMax Reitz     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
604733384421SMax Reitz         ban->attached_aio_context(new_context, ban->opaque);
604833384421SMax Reitz     }
6049dcd04228SStefan Hajnoczi }
6050dcd04228SStefan Hajnoczi 
6051dcd04228SStefan Hajnoczi void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
6052dcd04228SStefan Hajnoczi {
6053dcd04228SStefan Hajnoczi     bdrv_drain_all(); /* ensure there are no in-flight requests */
6054dcd04228SStefan Hajnoczi 
6055dcd04228SStefan Hajnoczi     bdrv_detach_aio_context(bs);
6056dcd04228SStefan Hajnoczi 
6057dcd04228SStefan Hajnoczi     /* This function executes in the old AioContext so acquire the new one in
6058dcd04228SStefan Hajnoczi      * case it runs in a different thread.
6059dcd04228SStefan Hajnoczi      */
6060dcd04228SStefan Hajnoczi     aio_context_acquire(new_context);
6061dcd04228SStefan Hajnoczi     bdrv_attach_aio_context(bs, new_context);
6062dcd04228SStefan Hajnoczi     aio_context_release(new_context);
606385d126f3SStefan Hajnoczi }
6064d616b224SStefan Hajnoczi 
606533384421SMax Reitz void bdrv_add_aio_context_notifier(BlockDriverState *bs,
606633384421SMax Reitz         void (*attached_aio_context)(AioContext *new_context, void *opaque),
606733384421SMax Reitz         void (*detach_aio_context)(void *opaque), void *opaque)
606833384421SMax Reitz {
606933384421SMax Reitz     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
607033384421SMax Reitz     *ban = (BdrvAioNotifier){
607133384421SMax Reitz         .attached_aio_context = attached_aio_context,
607233384421SMax Reitz         .detach_aio_context   = detach_aio_context,
607333384421SMax Reitz         .opaque               = opaque
607433384421SMax Reitz     };
607533384421SMax Reitz 
607633384421SMax Reitz     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
607733384421SMax Reitz }
607833384421SMax Reitz 
607933384421SMax Reitz void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
608033384421SMax Reitz                                       void (*attached_aio_context)(AioContext *,
608133384421SMax Reitz                                                                    void *),
608233384421SMax Reitz                                       void (*detach_aio_context)(void *),
608333384421SMax Reitz                                       void *opaque)
608433384421SMax Reitz {
608533384421SMax Reitz     BdrvAioNotifier *ban, *ban_next;
608633384421SMax Reitz 
608733384421SMax Reitz     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
608833384421SMax Reitz         if (ban->attached_aio_context == attached_aio_context &&
608933384421SMax Reitz             ban->detach_aio_context   == detach_aio_context   &&
609033384421SMax Reitz             ban->opaque               == opaque)
609133384421SMax Reitz         {
609233384421SMax Reitz             QLIST_REMOVE(ban, list);
609333384421SMax Reitz             g_free(ban);
609433384421SMax Reitz 
609533384421SMax Reitz             return;
609633384421SMax Reitz         }
609733384421SMax Reitz     }
609833384421SMax Reitz 
609933384421SMax Reitz     abort();
610033384421SMax Reitz }
610133384421SMax Reitz 
6102d616b224SStefan Hajnoczi void bdrv_add_before_write_notifier(BlockDriverState *bs,
6103d616b224SStefan Hajnoczi                                     NotifierWithReturn *notifier)
6104d616b224SStefan Hajnoczi {
6105d616b224SStefan Hajnoczi     notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
6106d616b224SStefan Hajnoczi }
61076f176b48SMax Reitz 
610877485434SMax Reitz int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
610977485434SMax Reitz                        BlockDriverAmendStatusCB *status_cb)
61106f176b48SMax Reitz {
6111c282e1fdSChunyan Liu     if (!bs->drv->bdrv_amend_options) {
61126f176b48SMax Reitz         return -ENOTSUP;
61136f176b48SMax Reitz     }
611477485434SMax Reitz     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
61156f176b48SMax Reitz }
6116f6186f49SBenoît Canet 
6117b5042a36SBenoît Canet /* This function will be called by the bdrv_recurse_is_first_non_filter method
6118b5042a36SBenoît Canet  * of block filter and by bdrv_is_first_non_filter.
6119b5042a36SBenoît Canet  * It is used to test if the given bs is the candidate or recurse more in the
6120b5042a36SBenoît Canet  * node graph.
6121212a5a8fSBenoît Canet  */
6122212a5a8fSBenoît Canet bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
6123212a5a8fSBenoît Canet                                       BlockDriverState *candidate)
6124f6186f49SBenoît Canet {
6125b5042a36SBenoît Canet     /* return false if basic checks fails */
6126b5042a36SBenoît Canet     if (!bs || !bs->drv) {
6127b5042a36SBenoît Canet         return false;
6128b5042a36SBenoît Canet     }
6129b5042a36SBenoît Canet 
6130b5042a36SBenoît Canet     /* the code reached a non block filter driver -> check if the bs is
6131b5042a36SBenoît Canet      * the same as the candidate. It's the recursion termination condition.
6132b5042a36SBenoît Canet      */
6133b5042a36SBenoît Canet     if (!bs->drv->is_filter) {
6134b5042a36SBenoît Canet         return bs == candidate;
6135b5042a36SBenoît Canet     }
6136b5042a36SBenoît Canet     /* Down this path the driver is a block filter driver */
6137b5042a36SBenoît Canet 
6138b5042a36SBenoît Canet     /* If the block filter recursion method is defined use it to recurse down
6139b5042a36SBenoît Canet      * the node graph.
6140b5042a36SBenoît Canet      */
6141b5042a36SBenoît Canet     if (bs->drv->bdrv_recurse_is_first_non_filter) {
6142212a5a8fSBenoît Canet         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
6143212a5a8fSBenoît Canet     }
6144212a5a8fSBenoît Canet 
6145b5042a36SBenoît Canet     /* the driver is a block filter but don't allow to recurse -> return false
6146b5042a36SBenoît Canet      */
6147b5042a36SBenoît Canet     return false;
6148212a5a8fSBenoît Canet }
6149212a5a8fSBenoît Canet 
6150212a5a8fSBenoît Canet /* This function checks if the candidate is the first non filter bs down it's
6151212a5a8fSBenoît Canet  * bs chain. Since we don't have pointers to parents it explore all bs chains
6152212a5a8fSBenoît Canet  * from the top. Some filters can choose not to pass down the recursion.
6153212a5a8fSBenoît Canet  */
6154212a5a8fSBenoît Canet bool bdrv_is_first_non_filter(BlockDriverState *candidate)
6155212a5a8fSBenoît Canet {
6156212a5a8fSBenoît Canet     BlockDriverState *bs;
6157212a5a8fSBenoît Canet 
6158212a5a8fSBenoît Canet     /* walk down the bs forest recursively */
6159212a5a8fSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
6160212a5a8fSBenoît Canet         bool perm;
6161212a5a8fSBenoît Canet 
6162b5042a36SBenoît Canet         /* try to recurse in this top level bs */
6163e6dc8a1fSKevin Wolf         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
6164212a5a8fSBenoît Canet 
6165212a5a8fSBenoît Canet         /* candidate is the first non filter */
6166212a5a8fSBenoît Canet         if (perm) {
6167212a5a8fSBenoît Canet             return true;
6168212a5a8fSBenoît Canet         }
6169212a5a8fSBenoît Canet     }
6170212a5a8fSBenoît Canet 
6171212a5a8fSBenoît Canet     return false;
6172f6186f49SBenoît Canet }
617309158f00SBenoît Canet 
617409158f00SBenoît Canet BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
617509158f00SBenoît Canet {
617609158f00SBenoît Canet     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
61775a7e7a0bSStefan Hajnoczi     AioContext *aio_context;
61785a7e7a0bSStefan Hajnoczi 
617909158f00SBenoît Canet     if (!to_replace_bs) {
618009158f00SBenoît Canet         error_setg(errp, "Node name '%s' not found", node_name);
618109158f00SBenoît Canet         return NULL;
618209158f00SBenoît Canet     }
618309158f00SBenoît Canet 
61845a7e7a0bSStefan Hajnoczi     aio_context = bdrv_get_aio_context(to_replace_bs);
61855a7e7a0bSStefan Hajnoczi     aio_context_acquire(aio_context);
61865a7e7a0bSStefan Hajnoczi 
618709158f00SBenoît Canet     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
61885a7e7a0bSStefan Hajnoczi         to_replace_bs = NULL;
61895a7e7a0bSStefan Hajnoczi         goto out;
619009158f00SBenoît Canet     }
619109158f00SBenoît Canet 
619209158f00SBenoît Canet     /* We don't want arbitrary node of the BDS chain to be replaced only the top
619309158f00SBenoît Canet      * most non filter in order to prevent data corruption.
619409158f00SBenoît Canet      * Another benefit is that this tests exclude backing files which are
619509158f00SBenoît Canet      * blocked by the backing blockers.
619609158f00SBenoît Canet      */
619709158f00SBenoît Canet     if (!bdrv_is_first_non_filter(to_replace_bs)) {
619809158f00SBenoît Canet         error_setg(errp, "Only top most non filter can be replaced");
61995a7e7a0bSStefan Hajnoczi         to_replace_bs = NULL;
62005a7e7a0bSStefan Hajnoczi         goto out;
620109158f00SBenoît Canet     }
620209158f00SBenoît Canet 
62035a7e7a0bSStefan Hajnoczi out:
62045a7e7a0bSStefan Hajnoczi     aio_context_release(aio_context);
620509158f00SBenoît Canet     return to_replace_bs;
620609158f00SBenoît Canet }
6207448ad91dSMing Lei 
6208448ad91dSMing Lei void bdrv_io_plug(BlockDriverState *bs)
6209448ad91dSMing Lei {
6210448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6211448ad91dSMing Lei     if (drv && drv->bdrv_io_plug) {
6212448ad91dSMing Lei         drv->bdrv_io_plug(bs);
6213448ad91dSMing Lei     } else if (bs->file) {
6214448ad91dSMing Lei         bdrv_io_plug(bs->file);
6215448ad91dSMing Lei     }
6216448ad91dSMing Lei }
6217448ad91dSMing Lei 
6218448ad91dSMing Lei void bdrv_io_unplug(BlockDriverState *bs)
6219448ad91dSMing Lei {
6220448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6221448ad91dSMing Lei     if (drv && drv->bdrv_io_unplug) {
6222448ad91dSMing Lei         drv->bdrv_io_unplug(bs);
6223448ad91dSMing Lei     } else if (bs->file) {
6224448ad91dSMing Lei         bdrv_io_unplug(bs->file);
6225448ad91dSMing Lei     }
6226448ad91dSMing Lei }
6227448ad91dSMing Lei 
6228448ad91dSMing Lei void bdrv_flush_io_queue(BlockDriverState *bs)
6229448ad91dSMing Lei {
6230448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6231448ad91dSMing Lei     if (drv && drv->bdrv_flush_io_queue) {
6232448ad91dSMing Lei         drv->bdrv_flush_io_queue(bs);
6233448ad91dSMing Lei     } else if (bs->file) {
6234448ad91dSMing Lei         bdrv_flush_io_queue(bs->file);
6235448ad91dSMing Lei     }
6236448ad91dSMing Lei }
623791af7014SMax Reitz 
623891af7014SMax Reitz static bool append_open_options(QDict *d, BlockDriverState *bs)
623991af7014SMax Reitz {
624091af7014SMax Reitz     const QDictEntry *entry;
624191af7014SMax Reitz     bool found_any = false;
624291af7014SMax Reitz 
624391af7014SMax Reitz     for (entry = qdict_first(bs->options); entry;
624491af7014SMax Reitz          entry = qdict_next(bs->options, entry))
624591af7014SMax Reitz     {
624691af7014SMax Reitz         /* Only take options for this level and exclude all non-driver-specific
624791af7014SMax Reitz          * options */
624891af7014SMax Reitz         if (!strchr(qdict_entry_key(entry), '.') &&
624991af7014SMax Reitz             strcmp(qdict_entry_key(entry), "node-name"))
625091af7014SMax Reitz         {
625191af7014SMax Reitz             qobject_incref(qdict_entry_value(entry));
625291af7014SMax Reitz             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
625391af7014SMax Reitz             found_any = true;
625491af7014SMax Reitz         }
625591af7014SMax Reitz     }
625691af7014SMax Reitz 
625791af7014SMax Reitz     return found_any;
625891af7014SMax Reitz }
625991af7014SMax Reitz 
626091af7014SMax Reitz /* Updates the following BDS fields:
626191af7014SMax Reitz  *  - exact_filename: A filename which may be used for opening a block device
626291af7014SMax Reitz  *                    which (mostly) equals the given BDS (even without any
626391af7014SMax Reitz  *                    other options; so reading and writing must return the same
626491af7014SMax Reitz  *                    results, but caching etc. may be different)
626591af7014SMax Reitz  *  - full_open_options: Options which, when given when opening a block device
626691af7014SMax Reitz  *                       (without a filename), result in a BDS (mostly)
626791af7014SMax Reitz  *                       equalling the given one
626891af7014SMax Reitz  *  - filename: If exact_filename is set, it is copied here. Otherwise,
626991af7014SMax Reitz  *              full_open_options is converted to a JSON object, prefixed with
627091af7014SMax Reitz  *              "json:" (for use through the JSON pseudo protocol) and put here.
627191af7014SMax Reitz  */
627291af7014SMax Reitz void bdrv_refresh_filename(BlockDriverState *bs)
627391af7014SMax Reitz {
627491af7014SMax Reitz     BlockDriver *drv = bs->drv;
627591af7014SMax Reitz     QDict *opts;
627691af7014SMax Reitz 
627791af7014SMax Reitz     if (!drv) {
627891af7014SMax Reitz         return;
627991af7014SMax Reitz     }
628091af7014SMax Reitz 
628191af7014SMax Reitz     /* This BDS's file name will most probably depend on its file's name, so
628291af7014SMax Reitz      * refresh that first */
628391af7014SMax Reitz     if (bs->file) {
628491af7014SMax Reitz         bdrv_refresh_filename(bs->file);
628591af7014SMax Reitz     }
628691af7014SMax Reitz 
628791af7014SMax Reitz     if (drv->bdrv_refresh_filename) {
628891af7014SMax Reitz         /* Obsolete information is of no use here, so drop the old file name
628991af7014SMax Reitz          * information before refreshing it */
629091af7014SMax Reitz         bs->exact_filename[0] = '\0';
629191af7014SMax Reitz         if (bs->full_open_options) {
629291af7014SMax Reitz             QDECREF(bs->full_open_options);
629391af7014SMax Reitz             bs->full_open_options = NULL;
629491af7014SMax Reitz         }
629591af7014SMax Reitz 
629691af7014SMax Reitz         drv->bdrv_refresh_filename(bs);
629791af7014SMax Reitz     } else if (bs->file) {
629891af7014SMax Reitz         /* Try to reconstruct valid information from the underlying file */
629991af7014SMax Reitz         bool has_open_options;
630091af7014SMax Reitz 
630191af7014SMax Reitz         bs->exact_filename[0] = '\0';
630291af7014SMax Reitz         if (bs->full_open_options) {
630391af7014SMax Reitz             QDECREF(bs->full_open_options);
630491af7014SMax Reitz             bs->full_open_options = NULL;
630591af7014SMax Reitz         }
630691af7014SMax Reitz 
630791af7014SMax Reitz         opts = qdict_new();
630891af7014SMax Reitz         has_open_options = append_open_options(opts, bs);
630991af7014SMax Reitz 
631091af7014SMax Reitz         /* If no specific options have been given for this BDS, the filename of
631191af7014SMax Reitz          * the underlying file should suffice for this one as well */
631291af7014SMax Reitz         if (bs->file->exact_filename[0] && !has_open_options) {
631391af7014SMax Reitz             strcpy(bs->exact_filename, bs->file->exact_filename);
631491af7014SMax Reitz         }
631591af7014SMax Reitz         /* Reconstructing the full options QDict is simple for most format block
631691af7014SMax Reitz          * drivers, as long as the full options are known for the underlying
631791af7014SMax Reitz          * file BDS. The full options QDict of that file BDS should somehow
631891af7014SMax Reitz          * contain a representation of the filename, therefore the following
631991af7014SMax Reitz          * suffices without querying the (exact_)filename of this BDS. */
632091af7014SMax Reitz         if (bs->file->full_open_options) {
632191af7014SMax Reitz             qdict_put_obj(opts, "driver",
632291af7014SMax Reitz                           QOBJECT(qstring_from_str(drv->format_name)));
632391af7014SMax Reitz             QINCREF(bs->file->full_open_options);
632491af7014SMax Reitz             qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
632591af7014SMax Reitz 
632691af7014SMax Reitz             bs->full_open_options = opts;
632791af7014SMax Reitz         } else {
632891af7014SMax Reitz             QDECREF(opts);
632991af7014SMax Reitz         }
633091af7014SMax Reitz     } else if (!bs->full_open_options && qdict_size(bs->options)) {
633191af7014SMax Reitz         /* There is no underlying file BDS (at least referenced by BDS.file),
633291af7014SMax Reitz          * so the full options QDict should be equal to the options given
633391af7014SMax Reitz          * specifically for this block device when it was opened (plus the
633491af7014SMax Reitz          * driver specification).
633591af7014SMax Reitz          * Because those options don't change, there is no need to update
633691af7014SMax Reitz          * full_open_options when it's already set. */
633791af7014SMax Reitz 
633891af7014SMax Reitz         opts = qdict_new();
633991af7014SMax Reitz         append_open_options(opts, bs);
634091af7014SMax Reitz         qdict_put_obj(opts, "driver",
634191af7014SMax Reitz                       QOBJECT(qstring_from_str(drv->format_name)));
634291af7014SMax Reitz 
634391af7014SMax Reitz         if (bs->exact_filename[0]) {
634491af7014SMax Reitz             /* This may not work for all block protocol drivers (some may
634591af7014SMax Reitz              * require this filename to be parsed), but we have to find some
634691af7014SMax Reitz              * default solution here, so just include it. If some block driver
634791af7014SMax Reitz              * does not support pure options without any filename at all or
634891af7014SMax Reitz              * needs some special format of the options QDict, it needs to
634991af7014SMax Reitz              * implement the driver-specific bdrv_refresh_filename() function.
635091af7014SMax Reitz              */
635191af7014SMax Reitz             qdict_put_obj(opts, "filename",
635291af7014SMax Reitz                           QOBJECT(qstring_from_str(bs->exact_filename)));
635391af7014SMax Reitz         }
635491af7014SMax Reitz 
635591af7014SMax Reitz         bs->full_open_options = opts;
635691af7014SMax Reitz     }
635791af7014SMax Reitz 
635891af7014SMax Reitz     if (bs->exact_filename[0]) {
635991af7014SMax Reitz         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
636091af7014SMax Reitz     } else if (bs->full_open_options) {
636191af7014SMax Reitz         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
636291af7014SMax Reitz         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
636391af7014SMax Reitz                  qstring_get_str(json));
636491af7014SMax Reitz         QDECREF(json);
636591af7014SMax Reitz     }
636691af7014SMax Reitz }
63675366d0c8SBenoît Canet 
63685366d0c8SBenoît Canet /* This accessor function purpose is to allow the device models to access the
63695366d0c8SBenoît Canet  * BlockAcctStats structure embedded inside a BlockDriverState without being
63705366d0c8SBenoît Canet  * aware of the BlockDriverState structure layout.
63715366d0c8SBenoît Canet  * It will go away when the BlockAcctStats structure will be moved inside
63725366d0c8SBenoît Canet  * the device models.
63735366d0c8SBenoît Canet  */
63745366d0c8SBenoît Canet BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
63755366d0c8SBenoît Canet {
63765366d0c8SBenoît Canet     return &bs->stats;
63775366d0c8SBenoît Canet }
6378