xref: /openbmc/qemu/block.c (revision 341ebc2f81b14862347e4d4c1fcb3759f815237a)
1fc01f7e7Sbellard /*
2fc01f7e7Sbellard  * QEMU System Emulator block driver
3fc01f7e7Sbellard  *
4fc01f7e7Sbellard  * Copyright (c) 2003 Fabrice Bellard
5fc01f7e7Sbellard  *
6fc01f7e7Sbellard  * Permission is hereby granted, free of charge, to any person obtaining a copy
7fc01f7e7Sbellard  * of this software and associated documentation files (the "Software"), to deal
8fc01f7e7Sbellard  * in the Software without restriction, including without limitation the rights
9fc01f7e7Sbellard  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10fc01f7e7Sbellard  * copies of the Software, and to permit persons to whom the Software is
11fc01f7e7Sbellard  * furnished to do so, subject to the following conditions:
12fc01f7e7Sbellard  *
13fc01f7e7Sbellard  * The above copyright notice and this permission notice shall be included in
14fc01f7e7Sbellard  * all copies or substantial portions of the Software.
15fc01f7e7Sbellard  *
16fc01f7e7Sbellard  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17fc01f7e7Sbellard  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18fc01f7e7Sbellard  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19fc01f7e7Sbellard  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20fc01f7e7Sbellard  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21fc01f7e7Sbellard  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22fc01f7e7Sbellard  * THE SOFTWARE.
23fc01f7e7Sbellard  */
243990d09aSblueswir1 #include "config-host.h"
25faf07963Spbrook #include "qemu-common.h"
266d519a5fSStefan Hajnoczi #include "trace.h"
27737e150eSPaolo Bonzini #include "block/block_int.h"
28737e150eSPaolo Bonzini #include "block/blockjob.h"
291de7afc9SPaolo Bonzini #include "qemu/module.h"
307b1b5d19SPaolo Bonzini #include "qapi/qmp/qjson.h"
31bfb197e0SMarkus Armbruster #include "sysemu/block-backend.h"
329c17d615SPaolo Bonzini #include "sysemu/sysemu.h"
33de50a20aSFam Zheng #include "sysemu/qtest.h"
341de7afc9SPaolo Bonzini #include "qemu/notify.h"
35737e150eSPaolo Bonzini #include "block/coroutine.h"
36c13163fbSBenoît Canet #include "block/qapi.h"
37b2023818SLuiz Capitulino #include "qmp-commands.h"
381de7afc9SPaolo Bonzini #include "qemu/timer.h"
39a5ee7bd4SWenchao Xia #include "qapi-event.h"
40fc01f7e7Sbellard 
4171e72a19SJuan Quintela #ifdef CONFIG_BSD
427674e7bfSbellard #include <sys/types.h>
437674e7bfSbellard #include <sys/stat.h>
447674e7bfSbellard #include <sys/ioctl.h>
4572cf2d4fSBlue Swirl #include <sys/queue.h>
46c5e97233Sblueswir1 #ifndef __DragonFly__
477674e7bfSbellard #include <sys/disk.h>
487674e7bfSbellard #endif
49c5e97233Sblueswir1 #endif
507674e7bfSbellard 
5149dc768dSaliguori #ifdef _WIN32
5249dc768dSaliguori #include <windows.h>
5349dc768dSaliguori #endif
5449dc768dSaliguori 
55e4654d2dSFam Zheng struct BdrvDirtyBitmap {
56e4654d2dSFam Zheng     HBitmap *bitmap;
570db6e54aSFam Zheng     char *name;
58e4654d2dSFam Zheng     QLIST_ENTRY(BdrvDirtyBitmap) list;
59e4654d2dSFam Zheng };
60e4654d2dSFam Zheng 
611c9805a3SStefan Hajnoczi #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
621c9805a3SStefan Hajnoczi 
637c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
64f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
65097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque);
667c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
67f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
68097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque);
69f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
70f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
71f9f05dc5SKevin Wolf                                          QEMUIOVector *iov);
72f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
73f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
74f9f05dc5SKevin Wolf                                          QEMUIOVector *iov);
75775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
76775aa8b6SKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
77470c0504SStefan Hajnoczi     BdrvRequestFlags flags);
78775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
79775aa8b6SKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
80f08f2ddaSStefan Hajnoczi     BdrvRequestFlags flags);
817c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
82b2a61371SStefan Hajnoczi                                          int64_t sector_num,
83b2a61371SStefan Hajnoczi                                          QEMUIOVector *qiov,
84b2a61371SStefan Hajnoczi                                          int nb_sectors,
85d20d9b7cSPaolo Bonzini                                          BdrvRequestFlags flags,
86097310b5SMarkus Armbruster                                          BlockCompletionFunc *cb,
87b2a61371SStefan Hajnoczi                                          void *opaque,
888c5873d6SStefan Hajnoczi                                          bool is_write);
89b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque);
90621f0589SKevin Wolf static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
91aa7bfbffSPeter Lieven     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
92ec530c81Sbellard 
931b7bdbc1SStefan Hajnoczi static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
941b7bdbc1SStefan Hajnoczi     QTAILQ_HEAD_INITIALIZER(bdrv_states);
957ee930d0Sblueswir1 
96dc364f4cSBenoît Canet static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
97dc364f4cSBenoît Canet     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
98dc364f4cSBenoît Canet 
998a22f02aSStefan Hajnoczi static QLIST_HEAD(, BlockDriver) bdrv_drivers =
1008a22f02aSStefan Hajnoczi     QLIST_HEAD_INITIALIZER(bdrv_drivers);
101ea2384d3Sbellard 
102c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
103c4237dfaSVladimir Sementsov-Ogievskiy                            int nr_sectors);
104c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
105c4237dfaSVladimir Sementsov-Ogievskiy                              int nr_sectors);
106eb852011SMarkus Armbruster /* If non-zero, use only whitelisted block drivers */
107eb852011SMarkus Armbruster static int use_bdrv_whitelist;
108eb852011SMarkus Armbruster 
1099e0b22f4SStefan Hajnoczi #ifdef _WIN32
1109e0b22f4SStefan Hajnoczi static int is_windows_drive_prefix(const char *filename)
1119e0b22f4SStefan Hajnoczi {
1129e0b22f4SStefan Hajnoczi     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
1139e0b22f4SStefan Hajnoczi              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
1149e0b22f4SStefan Hajnoczi             filename[1] == ':');
1159e0b22f4SStefan Hajnoczi }
1169e0b22f4SStefan Hajnoczi 
1179e0b22f4SStefan Hajnoczi int is_windows_drive(const char *filename)
1189e0b22f4SStefan Hajnoczi {
1199e0b22f4SStefan Hajnoczi     if (is_windows_drive_prefix(filename) &&
1209e0b22f4SStefan Hajnoczi         filename[2] == '\0')
1219e0b22f4SStefan Hajnoczi         return 1;
1229e0b22f4SStefan Hajnoczi     if (strstart(filename, "\\\\.\\", NULL) ||
1239e0b22f4SStefan Hajnoczi         strstart(filename, "//./", NULL))
1249e0b22f4SStefan Hajnoczi         return 1;
1259e0b22f4SStefan Hajnoczi     return 0;
1269e0b22f4SStefan Hajnoczi }
1279e0b22f4SStefan Hajnoczi #endif
1289e0b22f4SStefan Hajnoczi 
1290563e191SZhi Yong Wu /* throttling disk I/O limits */
130cc0681c4SBenoît Canet void bdrv_set_io_limits(BlockDriverState *bs,
131cc0681c4SBenoît Canet                         ThrottleConfig *cfg)
132cc0681c4SBenoît Canet {
133cc0681c4SBenoît Canet     int i;
134cc0681c4SBenoît Canet 
135cc0681c4SBenoît Canet     throttle_config(&bs->throttle_state, cfg);
136cc0681c4SBenoît Canet 
137cc0681c4SBenoît Canet     for (i = 0; i < 2; i++) {
138cc0681c4SBenoît Canet         qemu_co_enter_next(&bs->throttled_reqs[i]);
139cc0681c4SBenoît Canet     }
140cc0681c4SBenoît Canet }
141cc0681c4SBenoît Canet 
142cc0681c4SBenoît Canet /* this function drain all the throttled IOs */
143cc0681c4SBenoît Canet static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
144cc0681c4SBenoît Canet {
145cc0681c4SBenoît Canet     bool drained = false;
146cc0681c4SBenoît Canet     bool enabled = bs->io_limits_enabled;
147cc0681c4SBenoît Canet     int i;
148cc0681c4SBenoît Canet 
149cc0681c4SBenoît Canet     bs->io_limits_enabled = false;
150cc0681c4SBenoît Canet 
151cc0681c4SBenoît Canet     for (i = 0; i < 2; i++) {
152cc0681c4SBenoît Canet         while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
153cc0681c4SBenoît Canet             drained = true;
154cc0681c4SBenoît Canet         }
155cc0681c4SBenoît Canet     }
156cc0681c4SBenoît Canet 
157cc0681c4SBenoît Canet     bs->io_limits_enabled = enabled;
158cc0681c4SBenoît Canet 
159cc0681c4SBenoît Canet     return drained;
160cc0681c4SBenoît Canet }
161cc0681c4SBenoît Canet 
16298f90dbaSZhi Yong Wu void bdrv_io_limits_disable(BlockDriverState *bs)
16398f90dbaSZhi Yong Wu {
16498f90dbaSZhi Yong Wu     bs->io_limits_enabled = false;
16598f90dbaSZhi Yong Wu 
166cc0681c4SBenoît Canet     bdrv_start_throttled_reqs(bs);
16798f90dbaSZhi Yong Wu 
168cc0681c4SBenoît Canet     throttle_destroy(&bs->throttle_state);
16998f90dbaSZhi Yong Wu }
17098f90dbaSZhi Yong Wu 
171cc0681c4SBenoît Canet static void bdrv_throttle_read_timer_cb(void *opaque)
1720563e191SZhi Yong Wu {
1730563e191SZhi Yong Wu     BlockDriverState *bs = opaque;
174cc0681c4SBenoît Canet     qemu_co_enter_next(&bs->throttled_reqs[0]);
1750563e191SZhi Yong Wu }
1760563e191SZhi Yong Wu 
177cc0681c4SBenoît Canet static void bdrv_throttle_write_timer_cb(void *opaque)
178cc0681c4SBenoît Canet {
179cc0681c4SBenoît Canet     BlockDriverState *bs = opaque;
180cc0681c4SBenoît Canet     qemu_co_enter_next(&bs->throttled_reqs[1]);
181cc0681c4SBenoît Canet }
182cc0681c4SBenoît Canet 
183cc0681c4SBenoît Canet /* should be called before bdrv_set_io_limits if a limit is set */
1840563e191SZhi Yong Wu void bdrv_io_limits_enable(BlockDriverState *bs)
1850563e191SZhi Yong Wu {
186de50a20aSFam Zheng     int clock_type = QEMU_CLOCK_REALTIME;
187de50a20aSFam Zheng 
188de50a20aSFam Zheng     if (qtest_enabled()) {
189de50a20aSFam Zheng         /* For testing block IO throttling only */
190de50a20aSFam Zheng         clock_type = QEMU_CLOCK_VIRTUAL;
191de50a20aSFam Zheng     }
192cc0681c4SBenoît Canet     assert(!bs->io_limits_enabled);
193cc0681c4SBenoît Canet     throttle_init(&bs->throttle_state,
19413af91ebSStefan Hajnoczi                   bdrv_get_aio_context(bs),
195de50a20aSFam Zheng                   clock_type,
196cc0681c4SBenoît Canet                   bdrv_throttle_read_timer_cb,
197cc0681c4SBenoît Canet                   bdrv_throttle_write_timer_cb,
198cc0681c4SBenoît Canet                   bs);
1990563e191SZhi Yong Wu     bs->io_limits_enabled = true;
2000563e191SZhi Yong Wu }
2010563e191SZhi Yong Wu 
202cc0681c4SBenoît Canet /* This function makes an IO wait if needed
203cc0681c4SBenoît Canet  *
204cc0681c4SBenoît Canet  * @nb_sectors: the number of sectors of the IO
205cc0681c4SBenoît Canet  * @is_write:   is the IO a write
20698f90dbaSZhi Yong Wu  */
207cc0681c4SBenoît Canet static void bdrv_io_limits_intercept(BlockDriverState *bs,
208d5103588SKevin Wolf                                      unsigned int bytes,
209cc0681c4SBenoît Canet                                      bool is_write)
210cc0681c4SBenoît Canet {
211cc0681c4SBenoît Canet     /* does this io must wait */
212cc0681c4SBenoît Canet     bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
21398f90dbaSZhi Yong Wu 
214cc0681c4SBenoît Canet     /* if must wait or any request of this type throttled queue the IO */
215cc0681c4SBenoît Canet     if (must_wait ||
216cc0681c4SBenoît Canet         !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
217cc0681c4SBenoît Canet         qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
21898f90dbaSZhi Yong Wu     }
21998f90dbaSZhi Yong Wu 
220cc0681c4SBenoît Canet     /* the IO will be executed, do the accounting */
221d5103588SKevin Wolf     throttle_account(&bs->throttle_state, is_write, bytes);
222d5103588SKevin Wolf 
223cc0681c4SBenoît Canet 
224cc0681c4SBenoît Canet     /* if the next request must wait -> do nothing */
225cc0681c4SBenoît Canet     if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
226cc0681c4SBenoît Canet         return;
227cc0681c4SBenoît Canet     }
228cc0681c4SBenoît Canet 
229cc0681c4SBenoît Canet     /* else queue next request for execution */
230cc0681c4SBenoît Canet     qemu_co_queue_next(&bs->throttled_reqs[is_write]);
23198f90dbaSZhi Yong Wu }
23298f90dbaSZhi Yong Wu 
233339064d5SKevin Wolf size_t bdrv_opt_mem_align(BlockDriverState *bs)
234339064d5SKevin Wolf {
235339064d5SKevin Wolf     if (!bs || !bs->drv) {
236339064d5SKevin Wolf         /* 4k should be on the safe side */
237339064d5SKevin Wolf         return 4096;
238339064d5SKevin Wolf     }
239339064d5SKevin Wolf 
240339064d5SKevin Wolf     return bs->bl.opt_mem_alignment;
241339064d5SKevin Wolf }
242339064d5SKevin Wolf 
2439e0b22f4SStefan Hajnoczi /* check if the path starts with "<protocol>:" */
2445c98415bSMax Reitz int path_has_protocol(const char *path)
2459e0b22f4SStefan Hajnoczi {
246947995c0SPaolo Bonzini     const char *p;
247947995c0SPaolo Bonzini 
2489e0b22f4SStefan Hajnoczi #ifdef _WIN32
2499e0b22f4SStefan Hajnoczi     if (is_windows_drive(path) ||
2509e0b22f4SStefan Hajnoczi         is_windows_drive_prefix(path)) {
2519e0b22f4SStefan Hajnoczi         return 0;
2529e0b22f4SStefan Hajnoczi     }
253947995c0SPaolo Bonzini     p = path + strcspn(path, ":/\\");
254947995c0SPaolo Bonzini #else
255947995c0SPaolo Bonzini     p = path + strcspn(path, ":/");
2569e0b22f4SStefan Hajnoczi #endif
2579e0b22f4SStefan Hajnoczi 
258947995c0SPaolo Bonzini     return *p == ':';
2599e0b22f4SStefan Hajnoczi }
2609e0b22f4SStefan Hajnoczi 
26183f64091Sbellard int path_is_absolute(const char *path)
26283f64091Sbellard {
26321664424Sbellard #ifdef _WIN32
26421664424Sbellard     /* specific case for names like: "\\.\d:" */
265f53f4da9SPaolo Bonzini     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
26621664424Sbellard         return 1;
267f53f4da9SPaolo Bonzini     }
268f53f4da9SPaolo Bonzini     return (*path == '/' || *path == '\\');
2693b9f94e1Sbellard #else
270f53f4da9SPaolo Bonzini     return (*path == '/');
2713b9f94e1Sbellard #endif
27283f64091Sbellard }
27383f64091Sbellard 
27483f64091Sbellard /* if filename is absolute, just copy it to dest. Otherwise, build a
27583f64091Sbellard    path to it by considering it is relative to base_path. URL are
27683f64091Sbellard    supported. */
27783f64091Sbellard void path_combine(char *dest, int dest_size,
27883f64091Sbellard                   const char *base_path,
27983f64091Sbellard                   const char *filename)
28083f64091Sbellard {
28183f64091Sbellard     const char *p, *p1;
28283f64091Sbellard     int len;
28383f64091Sbellard 
28483f64091Sbellard     if (dest_size <= 0)
28583f64091Sbellard         return;
28683f64091Sbellard     if (path_is_absolute(filename)) {
28783f64091Sbellard         pstrcpy(dest, dest_size, filename);
28883f64091Sbellard     } else {
28983f64091Sbellard         p = strchr(base_path, ':');
29083f64091Sbellard         if (p)
29183f64091Sbellard             p++;
29283f64091Sbellard         else
29383f64091Sbellard             p = base_path;
2943b9f94e1Sbellard         p1 = strrchr(base_path, '/');
2953b9f94e1Sbellard #ifdef _WIN32
2963b9f94e1Sbellard         {
2973b9f94e1Sbellard             const char *p2;
2983b9f94e1Sbellard             p2 = strrchr(base_path, '\\');
2993b9f94e1Sbellard             if (!p1 || p2 > p1)
3003b9f94e1Sbellard                 p1 = p2;
3013b9f94e1Sbellard         }
3023b9f94e1Sbellard #endif
30383f64091Sbellard         if (p1)
30483f64091Sbellard             p1++;
30583f64091Sbellard         else
30683f64091Sbellard             p1 = base_path;
30783f64091Sbellard         if (p1 > p)
30883f64091Sbellard             p = p1;
30983f64091Sbellard         len = p - base_path;
31083f64091Sbellard         if (len > dest_size - 1)
31183f64091Sbellard             len = dest_size - 1;
31283f64091Sbellard         memcpy(dest, base_path, len);
31383f64091Sbellard         dest[len] = '\0';
31483f64091Sbellard         pstrcat(dest, dest_size, filename);
31583f64091Sbellard     }
31683f64091Sbellard }
31783f64091Sbellard 
3180a82855aSMax Reitz void bdrv_get_full_backing_filename_from_filename(const char *backed,
3190a82855aSMax Reitz                                                   const char *backing,
3209f07429eSMax Reitz                                                   char *dest, size_t sz,
3219f07429eSMax Reitz                                                   Error **errp)
3220a82855aSMax Reitz {
3239f07429eSMax Reitz     if (backing[0] == '\0' || path_has_protocol(backing) ||
3249f07429eSMax Reitz         path_is_absolute(backing))
3259f07429eSMax Reitz     {
3260a82855aSMax Reitz         pstrcpy(dest, sz, backing);
3279f07429eSMax Reitz     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
3289f07429eSMax Reitz         error_setg(errp, "Cannot use relative backing file names for '%s'",
3299f07429eSMax Reitz                    backed);
3300a82855aSMax Reitz     } else {
3310a82855aSMax Reitz         path_combine(dest, sz, backed, backing);
3320a82855aSMax Reitz     }
3330a82855aSMax Reitz }
3340a82855aSMax Reitz 
3359f07429eSMax Reitz void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
3369f07429eSMax Reitz                                     Error **errp)
337dc5a1371SPaolo Bonzini {
3389f07429eSMax Reitz     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
3399f07429eSMax Reitz 
3409f07429eSMax Reitz     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
3419f07429eSMax Reitz                                                  dest, sz, errp);
342dc5a1371SPaolo Bonzini }
343dc5a1371SPaolo Bonzini 
3445efa9d5aSAnthony Liguori void bdrv_register(BlockDriver *bdrv)
345ea2384d3Sbellard {
3468c5873d6SStefan Hajnoczi     /* Block drivers without coroutine functions need emulation */
3478c5873d6SStefan Hajnoczi     if (!bdrv->bdrv_co_readv) {
348f9f05dc5SKevin Wolf         bdrv->bdrv_co_readv = bdrv_co_readv_em;
349f9f05dc5SKevin Wolf         bdrv->bdrv_co_writev = bdrv_co_writev_em;
350f9f05dc5SKevin Wolf 
351f8c35c1dSStefan Hajnoczi         /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
352f8c35c1dSStefan Hajnoczi          * the block driver lacks aio we need to emulate that too.
353f8c35c1dSStefan Hajnoczi          */
354f9f05dc5SKevin Wolf         if (!bdrv->bdrv_aio_readv) {
35583f64091Sbellard             /* add AIO emulation layer */
356f141eafeSaliguori             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
357f141eafeSaliguori             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
35883f64091Sbellard         }
359f9f05dc5SKevin Wolf     }
360b2e12bc6SChristoph Hellwig 
3618a22f02aSStefan Hajnoczi     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
362ea2384d3Sbellard }
363b338082bSbellard 
3647f06d47eSMarkus Armbruster BlockDriverState *bdrv_new_root(void)
365fc01f7e7Sbellard {
3667f06d47eSMarkus Armbruster     BlockDriverState *bs = bdrv_new();
367e4e9986bSMarkus Armbruster 
368e4e9986bSMarkus Armbruster     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
369e4e9986bSMarkus Armbruster     return bs;
370e4e9986bSMarkus Armbruster }
371e4e9986bSMarkus Armbruster 
372e4e9986bSMarkus Armbruster BlockDriverState *bdrv_new(void)
373e4e9986bSMarkus Armbruster {
374e4e9986bSMarkus Armbruster     BlockDriverState *bs;
375e4e9986bSMarkus Armbruster     int i;
376e4e9986bSMarkus Armbruster 
3775839e53bSMarkus Armbruster     bs = g_new0(BlockDriverState, 1);
378e4654d2dSFam Zheng     QLIST_INIT(&bs->dirty_bitmaps);
379fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
380fbe40ff7SFam Zheng         QLIST_INIT(&bs->op_blockers[i]);
381fbe40ff7SFam Zheng     }
38228a7282aSLuiz Capitulino     bdrv_iostatus_disable(bs);
383d7d512f6SPaolo Bonzini     notifier_list_init(&bs->close_notifiers);
384d616b224SStefan Hajnoczi     notifier_with_return_list_init(&bs->before_write_notifiers);
385cc0681c4SBenoît Canet     qemu_co_queue_init(&bs->throttled_reqs[0]);
386cc0681c4SBenoît Canet     qemu_co_queue_init(&bs->throttled_reqs[1]);
3879fcb0251SFam Zheng     bs->refcnt = 1;
388dcd04228SStefan Hajnoczi     bs->aio_context = qemu_get_aio_context();
389d7d512f6SPaolo Bonzini 
390b338082bSbellard     return bs;
391b338082bSbellard }
392b338082bSbellard 
393d7d512f6SPaolo Bonzini void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
394d7d512f6SPaolo Bonzini {
395d7d512f6SPaolo Bonzini     notifier_list_add(&bs->close_notifiers, notify);
396d7d512f6SPaolo Bonzini }
397d7d512f6SPaolo Bonzini 
398ea2384d3Sbellard BlockDriver *bdrv_find_format(const char *format_name)
399ea2384d3Sbellard {
400ea2384d3Sbellard     BlockDriver *drv1;
4018a22f02aSStefan Hajnoczi     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
4028a22f02aSStefan Hajnoczi         if (!strcmp(drv1->format_name, format_name)) {
403ea2384d3Sbellard             return drv1;
404ea2384d3Sbellard         }
4058a22f02aSStefan Hajnoczi     }
406ea2384d3Sbellard     return NULL;
407ea2384d3Sbellard }
408ea2384d3Sbellard 
409b64ec4e4SFam Zheng static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
410eb852011SMarkus Armbruster {
411b64ec4e4SFam Zheng     static const char *whitelist_rw[] = {
412b64ec4e4SFam Zheng         CONFIG_BDRV_RW_WHITELIST
413b64ec4e4SFam Zheng     };
414b64ec4e4SFam Zheng     static const char *whitelist_ro[] = {
415b64ec4e4SFam Zheng         CONFIG_BDRV_RO_WHITELIST
416eb852011SMarkus Armbruster     };
417eb852011SMarkus Armbruster     const char **p;
418eb852011SMarkus Armbruster 
419b64ec4e4SFam Zheng     if (!whitelist_rw[0] && !whitelist_ro[0]) {
420eb852011SMarkus Armbruster         return 1;               /* no whitelist, anything goes */
421b64ec4e4SFam Zheng     }
422eb852011SMarkus Armbruster 
423b64ec4e4SFam Zheng     for (p = whitelist_rw; *p; p++) {
424eb852011SMarkus Armbruster         if (!strcmp(drv->format_name, *p)) {
425eb852011SMarkus Armbruster             return 1;
426eb852011SMarkus Armbruster         }
427eb852011SMarkus Armbruster     }
428b64ec4e4SFam Zheng     if (read_only) {
429b64ec4e4SFam Zheng         for (p = whitelist_ro; *p; p++) {
430b64ec4e4SFam Zheng             if (!strcmp(drv->format_name, *p)) {
431b64ec4e4SFam Zheng                 return 1;
432b64ec4e4SFam Zheng             }
433b64ec4e4SFam Zheng         }
434b64ec4e4SFam Zheng     }
435eb852011SMarkus Armbruster     return 0;
436eb852011SMarkus Armbruster }
437eb852011SMarkus Armbruster 
438b64ec4e4SFam Zheng BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
439b64ec4e4SFam Zheng                                           bool read_only)
440eb852011SMarkus Armbruster {
441eb852011SMarkus Armbruster     BlockDriver *drv = bdrv_find_format(format_name);
442b64ec4e4SFam Zheng     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
443eb852011SMarkus Armbruster }
444eb852011SMarkus Armbruster 
4455b7e1542SZhi Yong Wu typedef struct CreateCo {
4465b7e1542SZhi Yong Wu     BlockDriver *drv;
4475b7e1542SZhi Yong Wu     char *filename;
44883d0521aSChunyan Liu     QemuOpts *opts;
4495b7e1542SZhi Yong Wu     int ret;
450cc84d90fSMax Reitz     Error *err;
4515b7e1542SZhi Yong Wu } CreateCo;
4525b7e1542SZhi Yong Wu 
4535b7e1542SZhi Yong Wu static void coroutine_fn bdrv_create_co_entry(void *opaque)
4545b7e1542SZhi Yong Wu {
455cc84d90fSMax Reitz     Error *local_err = NULL;
456cc84d90fSMax Reitz     int ret;
457cc84d90fSMax Reitz 
4585b7e1542SZhi Yong Wu     CreateCo *cco = opaque;
4595b7e1542SZhi Yong Wu     assert(cco->drv);
4605b7e1542SZhi Yong Wu 
461c282e1fdSChunyan Liu     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
46284d18f06SMarkus Armbruster     if (local_err) {
463cc84d90fSMax Reitz         error_propagate(&cco->err, local_err);
464cc84d90fSMax Reitz     }
465cc84d90fSMax Reitz     cco->ret = ret;
4665b7e1542SZhi Yong Wu }
4675b7e1542SZhi Yong Wu 
4680e7e1989SKevin Wolf int bdrv_create(BlockDriver *drv, const char* filename,
46983d0521aSChunyan Liu                 QemuOpts *opts, Error **errp)
470ea2384d3Sbellard {
4715b7e1542SZhi Yong Wu     int ret;
4720e7e1989SKevin Wolf 
4735b7e1542SZhi Yong Wu     Coroutine *co;
4745b7e1542SZhi Yong Wu     CreateCo cco = {
4755b7e1542SZhi Yong Wu         .drv = drv,
4765b7e1542SZhi Yong Wu         .filename = g_strdup(filename),
47783d0521aSChunyan Liu         .opts = opts,
4785b7e1542SZhi Yong Wu         .ret = NOT_DONE,
479cc84d90fSMax Reitz         .err = NULL,
4805b7e1542SZhi Yong Wu     };
4815b7e1542SZhi Yong Wu 
482c282e1fdSChunyan Liu     if (!drv->bdrv_create) {
483cc84d90fSMax Reitz         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
48480168bffSLuiz Capitulino         ret = -ENOTSUP;
48580168bffSLuiz Capitulino         goto out;
4865b7e1542SZhi Yong Wu     }
4875b7e1542SZhi Yong Wu 
4885b7e1542SZhi Yong Wu     if (qemu_in_coroutine()) {
4895b7e1542SZhi Yong Wu         /* Fast-path if already in coroutine context */
4905b7e1542SZhi Yong Wu         bdrv_create_co_entry(&cco);
4915b7e1542SZhi Yong Wu     } else {
4925b7e1542SZhi Yong Wu         co = qemu_coroutine_create(bdrv_create_co_entry);
4935b7e1542SZhi Yong Wu         qemu_coroutine_enter(co, &cco);
4945b7e1542SZhi Yong Wu         while (cco.ret == NOT_DONE) {
495b47ec2c4SPaolo Bonzini             aio_poll(qemu_get_aio_context(), true);
4965b7e1542SZhi Yong Wu         }
4975b7e1542SZhi Yong Wu     }
4985b7e1542SZhi Yong Wu 
4995b7e1542SZhi Yong Wu     ret = cco.ret;
500cc84d90fSMax Reitz     if (ret < 0) {
50184d18f06SMarkus Armbruster         if (cco.err) {
502cc84d90fSMax Reitz             error_propagate(errp, cco.err);
503cc84d90fSMax Reitz         } else {
504cc84d90fSMax Reitz             error_setg_errno(errp, -ret, "Could not create image");
505cc84d90fSMax Reitz         }
506cc84d90fSMax Reitz     }
5075b7e1542SZhi Yong Wu 
50880168bffSLuiz Capitulino out:
50980168bffSLuiz Capitulino     g_free(cco.filename);
5105b7e1542SZhi Yong Wu     return ret;
511ea2384d3Sbellard }
512ea2384d3Sbellard 
513c282e1fdSChunyan Liu int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
51484a12e66SChristoph Hellwig {
51584a12e66SChristoph Hellwig     BlockDriver *drv;
516cc84d90fSMax Reitz     Error *local_err = NULL;
517cc84d90fSMax Reitz     int ret;
51884a12e66SChristoph Hellwig 
519b65a5e12SMax Reitz     drv = bdrv_find_protocol(filename, true, errp);
52084a12e66SChristoph Hellwig     if (drv == NULL) {
52116905d71SStefan Hajnoczi         return -ENOENT;
52284a12e66SChristoph Hellwig     }
52384a12e66SChristoph Hellwig 
524c282e1fdSChunyan Liu     ret = bdrv_create(drv, filename, opts, &local_err);
52584d18f06SMarkus Armbruster     if (local_err) {
526cc84d90fSMax Reitz         error_propagate(errp, local_err);
527cc84d90fSMax Reitz     }
528cc84d90fSMax Reitz     return ret;
52984a12e66SChristoph Hellwig }
53084a12e66SChristoph Hellwig 
5313baca891SKevin Wolf void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
532d34682cdSKevin Wolf {
533d34682cdSKevin Wolf     BlockDriver *drv = bs->drv;
5343baca891SKevin Wolf     Error *local_err = NULL;
535d34682cdSKevin Wolf 
536d34682cdSKevin Wolf     memset(&bs->bl, 0, sizeof(bs->bl));
537d34682cdSKevin Wolf 
538466ad822SKevin Wolf     if (!drv) {
5393baca891SKevin Wolf         return;
540466ad822SKevin Wolf     }
541466ad822SKevin Wolf 
542466ad822SKevin Wolf     /* Take some limits from the children as a default */
543466ad822SKevin Wolf     if (bs->file) {
5443baca891SKevin Wolf         bdrv_refresh_limits(bs->file, &local_err);
5453baca891SKevin Wolf         if (local_err) {
5463baca891SKevin Wolf             error_propagate(errp, local_err);
5473baca891SKevin Wolf             return;
5483baca891SKevin Wolf         }
549466ad822SKevin Wolf         bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
5502647fab5SPeter Lieven         bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
551339064d5SKevin Wolf         bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
552339064d5SKevin Wolf     } else {
553339064d5SKevin Wolf         bs->bl.opt_mem_alignment = 512;
554466ad822SKevin Wolf     }
555466ad822SKevin Wolf 
556466ad822SKevin Wolf     if (bs->backing_hd) {
5573baca891SKevin Wolf         bdrv_refresh_limits(bs->backing_hd, &local_err);
5583baca891SKevin Wolf         if (local_err) {
5593baca891SKevin Wolf             error_propagate(errp, local_err);
5603baca891SKevin Wolf             return;
5613baca891SKevin Wolf         }
562466ad822SKevin Wolf         bs->bl.opt_transfer_length =
563466ad822SKevin Wolf             MAX(bs->bl.opt_transfer_length,
564466ad822SKevin Wolf                 bs->backing_hd->bl.opt_transfer_length);
5652647fab5SPeter Lieven         bs->bl.max_transfer_length =
5662647fab5SPeter Lieven             MIN_NON_ZERO(bs->bl.max_transfer_length,
5672647fab5SPeter Lieven                          bs->backing_hd->bl.max_transfer_length);
568339064d5SKevin Wolf         bs->bl.opt_mem_alignment =
569339064d5SKevin Wolf             MAX(bs->bl.opt_mem_alignment,
570339064d5SKevin Wolf                 bs->backing_hd->bl.opt_mem_alignment);
571466ad822SKevin Wolf     }
572466ad822SKevin Wolf 
573466ad822SKevin Wolf     /* Then let the driver override it */
574466ad822SKevin Wolf     if (drv->bdrv_refresh_limits) {
5753baca891SKevin Wolf         drv->bdrv_refresh_limits(bs, errp);
576d34682cdSKevin Wolf     }
577d34682cdSKevin Wolf }
578d34682cdSKevin Wolf 
579892b7de8SEkaterina Tumanova /**
580892b7de8SEkaterina Tumanova  * Try to get @bs's logical and physical block size.
581892b7de8SEkaterina Tumanova  * On success, store them in @bsz struct and return 0.
582892b7de8SEkaterina Tumanova  * On failure return -errno.
583892b7de8SEkaterina Tumanova  * @bs must not be empty.
584892b7de8SEkaterina Tumanova  */
585892b7de8SEkaterina Tumanova int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
586892b7de8SEkaterina Tumanova {
587892b7de8SEkaterina Tumanova     BlockDriver *drv = bs->drv;
588892b7de8SEkaterina Tumanova 
589892b7de8SEkaterina Tumanova     if (drv && drv->bdrv_probe_blocksizes) {
590892b7de8SEkaterina Tumanova         return drv->bdrv_probe_blocksizes(bs, bsz);
591892b7de8SEkaterina Tumanova     }
592892b7de8SEkaterina Tumanova 
593892b7de8SEkaterina Tumanova     return -ENOTSUP;
594892b7de8SEkaterina Tumanova }
595892b7de8SEkaterina Tumanova 
596892b7de8SEkaterina Tumanova /**
597892b7de8SEkaterina Tumanova  * Try to get @bs's geometry (cyls, heads, sectors).
598892b7de8SEkaterina Tumanova  * On success, store them in @geo struct and return 0.
599892b7de8SEkaterina Tumanova  * On failure return -errno.
600892b7de8SEkaterina Tumanova  * @bs must not be empty.
601892b7de8SEkaterina Tumanova  */
602892b7de8SEkaterina Tumanova int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
603892b7de8SEkaterina Tumanova {
604892b7de8SEkaterina Tumanova     BlockDriver *drv = bs->drv;
605892b7de8SEkaterina Tumanova 
606892b7de8SEkaterina Tumanova     if (drv && drv->bdrv_probe_geometry) {
607892b7de8SEkaterina Tumanova         return drv->bdrv_probe_geometry(bs, geo);
608892b7de8SEkaterina Tumanova     }
609892b7de8SEkaterina Tumanova 
610892b7de8SEkaterina Tumanova     return -ENOTSUP;
611892b7de8SEkaterina Tumanova }
612892b7de8SEkaterina Tumanova 
613eba25057SJim Meyering /*
614eba25057SJim Meyering  * Create a uniquely-named empty temporary file.
615eba25057SJim Meyering  * Return 0 upon success, otherwise a negative errno value.
616eba25057SJim Meyering  */
617eba25057SJim Meyering int get_tmp_filename(char *filename, int size)
618eba25057SJim Meyering {
619d5249393Sbellard #ifdef _WIN32
6203b9f94e1Sbellard     char temp_dir[MAX_PATH];
621eba25057SJim Meyering     /* GetTempFileName requires that its output buffer (4th param)
622eba25057SJim Meyering        have length MAX_PATH or greater.  */
623eba25057SJim Meyering     assert(size >= MAX_PATH);
624eba25057SJim Meyering     return (GetTempPath(MAX_PATH, temp_dir)
625eba25057SJim Meyering             && GetTempFileName(temp_dir, "qem", 0, filename)
626eba25057SJim Meyering             ? 0 : -GetLastError());
627d5249393Sbellard #else
628ea2384d3Sbellard     int fd;
6297ccfb2ebSblueswir1     const char *tmpdir;
6300badc1eeSaurel32     tmpdir = getenv("TMPDIR");
63169bef793SAmit Shah     if (!tmpdir) {
63269bef793SAmit Shah         tmpdir = "/var/tmp";
63369bef793SAmit Shah     }
634eba25057SJim Meyering     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
635eba25057SJim Meyering         return -EOVERFLOW;
636ea2384d3Sbellard     }
637eba25057SJim Meyering     fd = mkstemp(filename);
638fe235a06SDunrong Huang     if (fd < 0) {
639fe235a06SDunrong Huang         return -errno;
640fe235a06SDunrong Huang     }
641fe235a06SDunrong Huang     if (close(fd) != 0) {
642fe235a06SDunrong Huang         unlink(filename);
643eba25057SJim Meyering         return -errno;
644eba25057SJim Meyering     }
645eba25057SJim Meyering     return 0;
646d5249393Sbellard #endif
647eba25057SJim Meyering }
648ea2384d3Sbellard 
649f3a5d3f8SChristoph Hellwig /*
650f3a5d3f8SChristoph Hellwig  * Detect host devices. By convention, /dev/cdrom[N] is always
651f3a5d3f8SChristoph Hellwig  * recognized as a host CDROM.
652f3a5d3f8SChristoph Hellwig  */
653f3a5d3f8SChristoph Hellwig static BlockDriver *find_hdev_driver(const char *filename)
654f3a5d3f8SChristoph Hellwig {
655508c7cb3SChristoph Hellwig     int score_max = 0, score;
656508c7cb3SChristoph Hellwig     BlockDriver *drv = NULL, *d;
657f3a5d3f8SChristoph Hellwig 
6588a22f02aSStefan Hajnoczi     QLIST_FOREACH(d, &bdrv_drivers, list) {
659508c7cb3SChristoph Hellwig         if (d->bdrv_probe_device) {
660508c7cb3SChristoph Hellwig             score = d->bdrv_probe_device(filename);
661508c7cb3SChristoph Hellwig             if (score > score_max) {
662508c7cb3SChristoph Hellwig                 score_max = score;
663508c7cb3SChristoph Hellwig                 drv = d;
664f3a5d3f8SChristoph Hellwig             }
665508c7cb3SChristoph Hellwig         }
666f3a5d3f8SChristoph Hellwig     }
667f3a5d3f8SChristoph Hellwig 
668508c7cb3SChristoph Hellwig     return drv;
669f3a5d3f8SChristoph Hellwig }
670f3a5d3f8SChristoph Hellwig 
67198289620SKevin Wolf BlockDriver *bdrv_find_protocol(const char *filename,
672b65a5e12SMax Reitz                                 bool allow_protocol_prefix,
673b65a5e12SMax Reitz                                 Error **errp)
67484a12e66SChristoph Hellwig {
67584a12e66SChristoph Hellwig     BlockDriver *drv1;
67684a12e66SChristoph Hellwig     char protocol[128];
67784a12e66SChristoph Hellwig     int len;
67884a12e66SChristoph Hellwig     const char *p;
67984a12e66SChristoph Hellwig 
68066f82ceeSKevin Wolf     /* TODO Drivers without bdrv_file_open must be specified explicitly */
68166f82ceeSKevin Wolf 
68239508e7aSChristoph Hellwig     /*
68339508e7aSChristoph Hellwig      * XXX(hch): we really should not let host device detection
68439508e7aSChristoph Hellwig      * override an explicit protocol specification, but moving this
68539508e7aSChristoph Hellwig      * later breaks access to device names with colons in them.
68639508e7aSChristoph Hellwig      * Thanks to the brain-dead persistent naming schemes on udev-
68739508e7aSChristoph Hellwig      * based Linux systems those actually are quite common.
68839508e7aSChristoph Hellwig      */
68984a12e66SChristoph Hellwig     drv1 = find_hdev_driver(filename);
69039508e7aSChristoph Hellwig     if (drv1) {
69184a12e66SChristoph Hellwig         return drv1;
69284a12e66SChristoph Hellwig     }
69339508e7aSChristoph Hellwig 
69498289620SKevin Wolf     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
695ef810437SMax Reitz         return &bdrv_file;
69639508e7aSChristoph Hellwig     }
69798289620SKevin Wolf 
6989e0b22f4SStefan Hajnoczi     p = strchr(filename, ':');
6999e0b22f4SStefan Hajnoczi     assert(p != NULL);
70084a12e66SChristoph Hellwig     len = p - filename;
70184a12e66SChristoph Hellwig     if (len > sizeof(protocol) - 1)
70284a12e66SChristoph Hellwig         len = sizeof(protocol) - 1;
70384a12e66SChristoph Hellwig     memcpy(protocol, filename, len);
70484a12e66SChristoph Hellwig     protocol[len] = '\0';
70584a12e66SChristoph Hellwig     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
70684a12e66SChristoph Hellwig         if (drv1->protocol_name &&
70784a12e66SChristoph Hellwig             !strcmp(drv1->protocol_name, protocol)) {
70884a12e66SChristoph Hellwig             return drv1;
70984a12e66SChristoph Hellwig         }
71084a12e66SChristoph Hellwig     }
711b65a5e12SMax Reitz 
712b65a5e12SMax Reitz     error_setg(errp, "Unknown protocol '%s'", protocol);
71384a12e66SChristoph Hellwig     return NULL;
71484a12e66SChristoph Hellwig }
71584a12e66SChristoph Hellwig 
716c6684249SMarkus Armbruster /*
717c6684249SMarkus Armbruster  * Guess image format by probing its contents.
718c6684249SMarkus Armbruster  * This is not a good idea when your image is raw (CVE-2008-2004), but
719c6684249SMarkus Armbruster  * we do it anyway for backward compatibility.
720c6684249SMarkus Armbruster  *
721c6684249SMarkus Armbruster  * @buf         contains the image's first @buf_size bytes.
7227cddd372SKevin Wolf  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
7237cddd372SKevin Wolf  *              but can be smaller if the image file is smaller)
724c6684249SMarkus Armbruster  * @filename    is its filename.
725c6684249SMarkus Armbruster  *
726c6684249SMarkus Armbruster  * For all block drivers, call the bdrv_probe() method to get its
727c6684249SMarkus Armbruster  * probing score.
728c6684249SMarkus Armbruster  * Return the first block driver with the highest probing score.
729c6684249SMarkus Armbruster  */
73038f3ef57SKevin Wolf BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
731c6684249SMarkus Armbruster                             const char *filename)
732c6684249SMarkus Armbruster {
733c6684249SMarkus Armbruster     int score_max = 0, score;
734c6684249SMarkus Armbruster     BlockDriver *drv = NULL, *d;
735c6684249SMarkus Armbruster 
736c6684249SMarkus Armbruster     QLIST_FOREACH(d, &bdrv_drivers, list) {
737c6684249SMarkus Armbruster         if (d->bdrv_probe) {
738c6684249SMarkus Armbruster             score = d->bdrv_probe(buf, buf_size, filename);
739c6684249SMarkus Armbruster             if (score > score_max) {
740c6684249SMarkus Armbruster                 score_max = score;
741c6684249SMarkus Armbruster                 drv = d;
742c6684249SMarkus Armbruster             }
743c6684249SMarkus Armbruster         }
744c6684249SMarkus Armbruster     }
745c6684249SMarkus Armbruster 
746c6684249SMarkus Armbruster     return drv;
747c6684249SMarkus Armbruster }
748c6684249SMarkus Armbruster 
749f500a6d3SKevin Wolf static int find_image_format(BlockDriverState *bs, const char *filename,
75034b5d2c6SMax Reitz                              BlockDriver **pdrv, Error **errp)
751ea2384d3Sbellard {
752c6684249SMarkus Armbruster     BlockDriver *drv;
7537cddd372SKevin Wolf     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
754f500a6d3SKevin Wolf     int ret = 0;
755f8ea0b00SNicholas Bellinger 
75608a00559SKevin Wolf     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
7578e895599SPaolo Bonzini     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
758ef810437SMax Reitz         *pdrv = &bdrv_raw;
759c98ac35dSStefan Weil         return ret;
7601a396859SNicholas A. Bellinger     }
761f8ea0b00SNicholas Bellinger 
76283f64091Sbellard     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
763ea2384d3Sbellard     if (ret < 0) {
76434b5d2c6SMax Reitz         error_setg_errno(errp, -ret, "Could not read image for determining its "
76534b5d2c6SMax Reitz                          "format");
766c98ac35dSStefan Weil         *pdrv = NULL;
767c98ac35dSStefan Weil         return ret;
768ea2384d3Sbellard     }
769ea2384d3Sbellard 
770c6684249SMarkus Armbruster     drv = bdrv_probe_all(buf, ret, filename);
771c98ac35dSStefan Weil     if (!drv) {
77234b5d2c6SMax Reitz         error_setg(errp, "Could not determine image format: No compatible "
77334b5d2c6SMax Reitz                    "driver found");
774c98ac35dSStefan Weil         ret = -ENOENT;
775c98ac35dSStefan Weil     }
776c98ac35dSStefan Weil     *pdrv = drv;
777c98ac35dSStefan Weil     return ret;
778ea2384d3Sbellard }
779ea2384d3Sbellard 
78051762288SStefan Hajnoczi /**
78151762288SStefan Hajnoczi  * Set the current 'total_sectors' value
78265a9bb25SMarkus Armbruster  * Return 0 on success, -errno on error.
78351762288SStefan Hajnoczi  */
78451762288SStefan Hajnoczi static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
78551762288SStefan Hajnoczi {
78651762288SStefan Hajnoczi     BlockDriver *drv = bs->drv;
78751762288SStefan Hajnoczi 
788396759adSNicholas Bellinger     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
789396759adSNicholas Bellinger     if (bs->sg)
790396759adSNicholas Bellinger         return 0;
791396759adSNicholas Bellinger 
79251762288SStefan Hajnoczi     /* query actual device if possible, otherwise just trust the hint */
79351762288SStefan Hajnoczi     if (drv->bdrv_getlength) {
79451762288SStefan Hajnoczi         int64_t length = drv->bdrv_getlength(bs);
79551762288SStefan Hajnoczi         if (length < 0) {
79651762288SStefan Hajnoczi             return length;
79751762288SStefan Hajnoczi         }
7987e382003SFam Zheng         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
79951762288SStefan Hajnoczi     }
80051762288SStefan Hajnoczi 
80151762288SStefan Hajnoczi     bs->total_sectors = hint;
80251762288SStefan Hajnoczi     return 0;
80351762288SStefan Hajnoczi }
80451762288SStefan Hajnoczi 
805c3993cdcSStefan Hajnoczi /**
8069e8f1835SPaolo Bonzini  * Set open flags for a given discard mode
8079e8f1835SPaolo Bonzini  *
8089e8f1835SPaolo Bonzini  * Return 0 on success, -1 if the discard mode was invalid.
8099e8f1835SPaolo Bonzini  */
8109e8f1835SPaolo Bonzini int bdrv_parse_discard_flags(const char *mode, int *flags)
8119e8f1835SPaolo Bonzini {
8129e8f1835SPaolo Bonzini     *flags &= ~BDRV_O_UNMAP;
8139e8f1835SPaolo Bonzini 
8149e8f1835SPaolo Bonzini     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
8159e8f1835SPaolo Bonzini         /* do nothing */
8169e8f1835SPaolo Bonzini     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
8179e8f1835SPaolo Bonzini         *flags |= BDRV_O_UNMAP;
8189e8f1835SPaolo Bonzini     } else {
8199e8f1835SPaolo Bonzini         return -1;
8209e8f1835SPaolo Bonzini     }
8219e8f1835SPaolo Bonzini 
8229e8f1835SPaolo Bonzini     return 0;
8239e8f1835SPaolo Bonzini }
8249e8f1835SPaolo Bonzini 
8259e8f1835SPaolo Bonzini /**
826c3993cdcSStefan Hajnoczi  * Set open flags for a given cache mode
827c3993cdcSStefan Hajnoczi  *
828c3993cdcSStefan Hajnoczi  * Return 0 on success, -1 if the cache mode was invalid.
829c3993cdcSStefan Hajnoczi  */
830c3993cdcSStefan Hajnoczi int bdrv_parse_cache_flags(const char *mode, int *flags)
831c3993cdcSStefan Hajnoczi {
832c3993cdcSStefan Hajnoczi     *flags &= ~BDRV_O_CACHE_MASK;
833c3993cdcSStefan Hajnoczi 
834c3993cdcSStefan Hajnoczi     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
835c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
83692196b2fSStefan Hajnoczi     } else if (!strcmp(mode, "directsync")) {
83792196b2fSStefan Hajnoczi         *flags |= BDRV_O_NOCACHE;
838c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "writeback")) {
839c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_CACHE_WB;
840c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "unsafe")) {
841c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_CACHE_WB;
842c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_NO_FLUSH;
843c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "writethrough")) {
844c3993cdcSStefan Hajnoczi         /* this is the default */
845c3993cdcSStefan Hajnoczi     } else {
846c3993cdcSStefan Hajnoczi         return -1;
847c3993cdcSStefan Hajnoczi     }
848c3993cdcSStefan Hajnoczi 
849c3993cdcSStefan Hajnoczi     return 0;
850c3993cdcSStefan Hajnoczi }
851c3993cdcSStefan Hajnoczi 
85253fec9d3SStefan Hajnoczi /**
85353fec9d3SStefan Hajnoczi  * The copy-on-read flag is actually a reference count so multiple users may
85453fec9d3SStefan Hajnoczi  * use the feature without worrying about clobbering its previous state.
85553fec9d3SStefan Hajnoczi  * Copy-on-read stays enabled until all users have called to disable it.
85653fec9d3SStefan Hajnoczi  */
85753fec9d3SStefan Hajnoczi void bdrv_enable_copy_on_read(BlockDriverState *bs)
85853fec9d3SStefan Hajnoczi {
85953fec9d3SStefan Hajnoczi     bs->copy_on_read++;
86053fec9d3SStefan Hajnoczi }
86153fec9d3SStefan Hajnoczi 
86253fec9d3SStefan Hajnoczi void bdrv_disable_copy_on_read(BlockDriverState *bs)
86353fec9d3SStefan Hajnoczi {
86453fec9d3SStefan Hajnoczi     assert(bs->copy_on_read > 0);
86553fec9d3SStefan Hajnoczi     bs->copy_on_read--;
86653fec9d3SStefan Hajnoczi }
86753fec9d3SStefan Hajnoczi 
8680b50cc88SKevin Wolf /*
869b1e6fc08SKevin Wolf  * Returns the flags that a temporary snapshot should get, based on the
870b1e6fc08SKevin Wolf  * originally requested flags (the originally requested image will have flags
871b1e6fc08SKevin Wolf  * like a backing file)
872b1e6fc08SKevin Wolf  */
873b1e6fc08SKevin Wolf static int bdrv_temp_snapshot_flags(int flags)
874b1e6fc08SKevin Wolf {
875b1e6fc08SKevin Wolf     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
876b1e6fc08SKevin Wolf }
877b1e6fc08SKevin Wolf 
878b1e6fc08SKevin Wolf /*
8790b50cc88SKevin Wolf  * Returns the flags that bs->file should get, based on the given flags for
8800b50cc88SKevin Wolf  * the parent BDS
8810b50cc88SKevin Wolf  */
8820b50cc88SKevin Wolf static int bdrv_inherited_flags(int flags)
8830b50cc88SKevin Wolf {
8840b50cc88SKevin Wolf     /* Enable protocol handling, disable format probing for bs->file */
8850b50cc88SKevin Wolf     flags |= BDRV_O_PROTOCOL;
8860b50cc88SKevin Wolf 
8870b50cc88SKevin Wolf     /* Our block drivers take care to send flushes and respect unmap policy,
8880b50cc88SKevin Wolf      * so we can enable both unconditionally on lower layers. */
8890b50cc88SKevin Wolf     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
8900b50cc88SKevin Wolf 
8910b50cc88SKevin Wolf     /* Clear flags that only apply to the top layer */
8925669b44dSKevin Wolf     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
8930b50cc88SKevin Wolf 
8940b50cc88SKevin Wolf     return flags;
8950b50cc88SKevin Wolf }
8960b50cc88SKevin Wolf 
897317fc44eSKevin Wolf /*
898317fc44eSKevin Wolf  * Returns the flags that bs->backing_hd should get, based on the given flags
899317fc44eSKevin Wolf  * for the parent BDS
900317fc44eSKevin Wolf  */
901317fc44eSKevin Wolf static int bdrv_backing_flags(int flags)
902317fc44eSKevin Wolf {
903317fc44eSKevin Wolf     /* backing files always opened read-only */
904317fc44eSKevin Wolf     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
905317fc44eSKevin Wolf 
906317fc44eSKevin Wolf     /* snapshot=on is handled on the top layer */
9078bfea15dSKevin Wolf     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
908317fc44eSKevin Wolf 
909317fc44eSKevin Wolf     return flags;
910317fc44eSKevin Wolf }
911317fc44eSKevin Wolf 
9127b272452SKevin Wolf static int bdrv_open_flags(BlockDriverState *bs, int flags)
9137b272452SKevin Wolf {
9147b272452SKevin Wolf     int open_flags = flags | BDRV_O_CACHE_WB;
9157b272452SKevin Wolf 
9167b272452SKevin Wolf     /*
9177b272452SKevin Wolf      * Clear flags that are internal to the block layer before opening the
9187b272452SKevin Wolf      * image.
9197b272452SKevin Wolf      */
92020cca275SKevin Wolf     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
9217b272452SKevin Wolf 
9227b272452SKevin Wolf     /*
9237b272452SKevin Wolf      * Snapshots should be writable.
9247b272452SKevin Wolf      */
9258bfea15dSKevin Wolf     if (flags & BDRV_O_TEMPORARY) {
9267b272452SKevin Wolf         open_flags |= BDRV_O_RDWR;
9277b272452SKevin Wolf     }
9287b272452SKevin Wolf 
9297b272452SKevin Wolf     return open_flags;
9307b272452SKevin Wolf }
9317b272452SKevin Wolf 
932636ea370SKevin Wolf static void bdrv_assign_node_name(BlockDriverState *bs,
9336913c0c2SBenoît Canet                                   const char *node_name,
9346913c0c2SBenoît Canet                                   Error **errp)
9356913c0c2SBenoît Canet {
9366913c0c2SBenoît Canet     if (!node_name) {
937636ea370SKevin Wolf         return;
9386913c0c2SBenoît Canet     }
9396913c0c2SBenoît Canet 
9409aebf3b8SKevin Wolf     /* Check for empty string or invalid characters */
941f5bebbbbSMarkus Armbruster     if (!id_wellformed(node_name)) {
9429aebf3b8SKevin Wolf         error_setg(errp, "Invalid node name");
943636ea370SKevin Wolf         return;
9446913c0c2SBenoît Canet     }
9456913c0c2SBenoît Canet 
9460c5e94eeSBenoît Canet     /* takes care of avoiding namespaces collisions */
9477f06d47eSMarkus Armbruster     if (blk_by_name(node_name)) {
9480c5e94eeSBenoît Canet         error_setg(errp, "node-name=%s is conflicting with a device id",
9490c5e94eeSBenoît Canet                    node_name);
950636ea370SKevin Wolf         return;
9510c5e94eeSBenoît Canet     }
9520c5e94eeSBenoît Canet 
9536913c0c2SBenoît Canet     /* takes care of avoiding duplicates node names */
9546913c0c2SBenoît Canet     if (bdrv_find_node(node_name)) {
9556913c0c2SBenoît Canet         error_setg(errp, "Duplicate node name");
956636ea370SKevin Wolf         return;
9576913c0c2SBenoît Canet     }
9586913c0c2SBenoît Canet 
9596913c0c2SBenoît Canet     /* copy node name into the bs and insert it into the graph list */
9606913c0c2SBenoît Canet     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
9616913c0c2SBenoît Canet     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
9626913c0c2SBenoît Canet }
9636913c0c2SBenoît Canet 
964b6ce07aaSKevin Wolf /*
96557915332SKevin Wolf  * Common part for opening disk images and files
966b6ad491aSKevin Wolf  *
967b6ad491aSKevin Wolf  * Removes all processed options from *options.
96857915332SKevin Wolf  */
969f500a6d3SKevin Wolf static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
97034b5d2c6SMax Reitz     QDict *options, int flags, BlockDriver *drv, Error **errp)
97157915332SKevin Wolf {
97257915332SKevin Wolf     int ret, open_flags;
973035fccdfSKevin Wolf     const char *filename;
9746913c0c2SBenoît Canet     const char *node_name = NULL;
97534b5d2c6SMax Reitz     Error *local_err = NULL;
97657915332SKevin Wolf 
97757915332SKevin Wolf     assert(drv != NULL);
9786405875cSPaolo Bonzini     assert(bs->file == NULL);
979707ff828SKevin Wolf     assert(options != NULL && bs->options != options);
98057915332SKevin Wolf 
98145673671SKevin Wolf     if (file != NULL) {
98245673671SKevin Wolf         filename = file->filename;
98345673671SKevin Wolf     } else {
98445673671SKevin Wolf         filename = qdict_get_try_str(options, "filename");
98545673671SKevin Wolf     }
98645673671SKevin Wolf 
987765003dbSKevin Wolf     if (drv->bdrv_needs_filename && !filename) {
988765003dbSKevin Wolf         error_setg(errp, "The '%s' block driver requires a file name",
989765003dbSKevin Wolf                    drv->format_name);
990765003dbSKevin Wolf         return -EINVAL;
991765003dbSKevin Wolf     }
992765003dbSKevin Wolf 
99345673671SKevin Wolf     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
99428dcee10SStefan Hajnoczi 
9956913c0c2SBenoît Canet     node_name = qdict_get_try_str(options, "node-name");
996636ea370SKevin Wolf     bdrv_assign_node_name(bs, node_name, &local_err);
9970fb6395cSMarkus Armbruster     if (local_err) {
998636ea370SKevin Wolf         error_propagate(errp, local_err);
999636ea370SKevin Wolf         return -EINVAL;
10006913c0c2SBenoît Canet     }
10016913c0c2SBenoît Canet     qdict_del(options, "node-name");
10026913c0c2SBenoît Canet 
10035d186eb0SKevin Wolf     /* bdrv_open() with directly using a protocol as drv. This layer is already
10045d186eb0SKevin Wolf      * opened, so assign it to bs (while file becomes a closed BlockDriverState)
10055d186eb0SKevin Wolf      * and return immediately. */
10065d186eb0SKevin Wolf     if (file != NULL && drv->bdrv_file_open) {
10075d186eb0SKevin Wolf         bdrv_swap(file, bs);
10085d186eb0SKevin Wolf         return 0;
10095d186eb0SKevin Wolf     }
10105d186eb0SKevin Wolf 
101157915332SKevin Wolf     bs->open_flags = flags;
10121b7fd729SPaolo Bonzini     bs->guest_block_size = 512;
1013c25f53b0SPaolo Bonzini     bs->request_alignment = 512;
10140d51b4deSAsias He     bs->zero_beyond_eof = true;
1015b64ec4e4SFam Zheng     open_flags = bdrv_open_flags(bs, flags);
1016b64ec4e4SFam Zheng     bs->read_only = !(open_flags & BDRV_O_RDWR);
1017b64ec4e4SFam Zheng 
1018b64ec4e4SFam Zheng     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
10198f94a6e4SKevin Wolf         error_setg(errp,
10208f94a6e4SKevin Wolf                    !bs->read_only && bdrv_is_whitelisted(drv, true)
10218f94a6e4SKevin Wolf                         ? "Driver '%s' can only be used for read-only devices"
10228f94a6e4SKevin Wolf                         : "Driver '%s' is not whitelisted",
10238f94a6e4SKevin Wolf                    drv->format_name);
1024b64ec4e4SFam Zheng         return -ENOTSUP;
1025b64ec4e4SFam Zheng     }
102657915332SKevin Wolf 
102753fec9d3SStefan Hajnoczi     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
10280ebd24e0SKevin Wolf     if (flags & BDRV_O_COPY_ON_READ) {
10290ebd24e0SKevin Wolf         if (!bs->read_only) {
103053fec9d3SStefan Hajnoczi             bdrv_enable_copy_on_read(bs);
10310ebd24e0SKevin Wolf         } else {
10320ebd24e0SKevin Wolf             error_setg(errp, "Can't use copy-on-read on read-only device");
10330ebd24e0SKevin Wolf             return -EINVAL;
10340ebd24e0SKevin Wolf         }
103553fec9d3SStefan Hajnoczi     }
103653fec9d3SStefan Hajnoczi 
1037c2ad1b0cSKevin Wolf     if (filename != NULL) {
103857915332SKevin Wolf         pstrcpy(bs->filename, sizeof(bs->filename), filename);
1039c2ad1b0cSKevin Wolf     } else {
1040c2ad1b0cSKevin Wolf         bs->filename[0] = '\0';
1041c2ad1b0cSKevin Wolf     }
104291af7014SMax Reitz     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
104357915332SKevin Wolf 
104457915332SKevin Wolf     bs->drv = drv;
10457267c094SAnthony Liguori     bs->opaque = g_malloc0(drv->instance_size);
104657915332SKevin Wolf 
104703f541bdSStefan Hajnoczi     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
1048e7c63796SStefan Hajnoczi 
104966f82ceeSKevin Wolf     /* Open the image, either directly or using a protocol */
105066f82ceeSKevin Wolf     if (drv->bdrv_file_open) {
10515d186eb0SKevin Wolf         assert(file == NULL);
1052030be321SBenoît Canet         assert(!drv->bdrv_needs_filename || filename != NULL);
105334b5d2c6SMax Reitz         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1054f500a6d3SKevin Wolf     } else {
10552af5ef70SKevin Wolf         if (file == NULL) {
105634b5d2c6SMax Reitz             error_setg(errp, "Can't use '%s' as a block driver for the "
105734b5d2c6SMax Reitz                        "protocol level", drv->format_name);
10582af5ef70SKevin Wolf             ret = -EINVAL;
10592af5ef70SKevin Wolf             goto free_and_fail;
10602af5ef70SKevin Wolf         }
1061f500a6d3SKevin Wolf         bs->file = file;
106234b5d2c6SMax Reitz         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
106366f82ceeSKevin Wolf     }
106466f82ceeSKevin Wolf 
106557915332SKevin Wolf     if (ret < 0) {
106684d18f06SMarkus Armbruster         if (local_err) {
106734b5d2c6SMax Reitz             error_propagate(errp, local_err);
10682fa9aa59SDunrong Huang         } else if (bs->filename[0]) {
10692fa9aa59SDunrong Huang             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
107034b5d2c6SMax Reitz         } else {
107134b5d2c6SMax Reitz             error_setg_errno(errp, -ret, "Could not open image");
107234b5d2c6SMax Reitz         }
107357915332SKevin Wolf         goto free_and_fail;
107457915332SKevin Wolf     }
107557915332SKevin Wolf 
1076a1f688f4SMarkus Armbruster     if (bs->encrypted) {
1077a1f688f4SMarkus Armbruster         error_report("Encrypted images are deprecated");
1078a1f688f4SMarkus Armbruster         error_printf("Support for them will be removed in a future release.\n"
1079a1f688f4SMarkus Armbruster                      "You can use 'qemu-img convert' to convert your image"
1080a1f688f4SMarkus Armbruster                      " to an unencrypted one.\n");
1081a1f688f4SMarkus Armbruster     }
1082a1f688f4SMarkus Armbruster 
108351762288SStefan Hajnoczi     ret = refresh_total_sectors(bs, bs->total_sectors);
108451762288SStefan Hajnoczi     if (ret < 0) {
108534b5d2c6SMax Reitz         error_setg_errno(errp, -ret, "Could not refresh total sector count");
108651762288SStefan Hajnoczi         goto free_and_fail;
108757915332SKevin Wolf     }
108851762288SStefan Hajnoczi 
10893baca891SKevin Wolf     bdrv_refresh_limits(bs, &local_err);
10903baca891SKevin Wolf     if (local_err) {
10913baca891SKevin Wolf         error_propagate(errp, local_err);
10923baca891SKevin Wolf         ret = -EINVAL;
10933baca891SKevin Wolf         goto free_and_fail;
10943baca891SKevin Wolf     }
10953baca891SKevin Wolf 
1096c25f53b0SPaolo Bonzini     assert(bdrv_opt_mem_align(bs) != 0);
109747ea2de2SKevin Wolf     assert((bs->request_alignment != 0) || bs->sg);
109857915332SKevin Wolf     return 0;
109957915332SKevin Wolf 
110057915332SKevin Wolf free_and_fail:
110166f82ceeSKevin Wolf     bs->file = NULL;
11027267c094SAnthony Liguori     g_free(bs->opaque);
110357915332SKevin Wolf     bs->opaque = NULL;
110457915332SKevin Wolf     bs->drv = NULL;
110557915332SKevin Wolf     return ret;
110657915332SKevin Wolf }
110757915332SKevin Wolf 
11085e5c4f63SKevin Wolf static QDict *parse_json_filename(const char *filename, Error **errp)
11095e5c4f63SKevin Wolf {
11105e5c4f63SKevin Wolf     QObject *options_obj;
11115e5c4f63SKevin Wolf     QDict *options;
11125e5c4f63SKevin Wolf     int ret;
11135e5c4f63SKevin Wolf 
11145e5c4f63SKevin Wolf     ret = strstart(filename, "json:", &filename);
11155e5c4f63SKevin Wolf     assert(ret);
11165e5c4f63SKevin Wolf 
11175e5c4f63SKevin Wolf     options_obj = qobject_from_json(filename);
11185e5c4f63SKevin Wolf     if (!options_obj) {
11195e5c4f63SKevin Wolf         error_setg(errp, "Could not parse the JSON options");
11205e5c4f63SKevin Wolf         return NULL;
11215e5c4f63SKevin Wolf     }
11225e5c4f63SKevin Wolf 
11235e5c4f63SKevin Wolf     if (qobject_type(options_obj) != QTYPE_QDICT) {
11245e5c4f63SKevin Wolf         qobject_decref(options_obj);
11255e5c4f63SKevin Wolf         error_setg(errp, "Invalid JSON object given");
11265e5c4f63SKevin Wolf         return NULL;
11275e5c4f63SKevin Wolf     }
11285e5c4f63SKevin Wolf 
11295e5c4f63SKevin Wolf     options = qobject_to_qdict(options_obj);
11305e5c4f63SKevin Wolf     qdict_flatten(options);
11315e5c4f63SKevin Wolf 
11325e5c4f63SKevin Wolf     return options;
11335e5c4f63SKevin Wolf }
11345e5c4f63SKevin Wolf 
113557915332SKevin Wolf /*
1136f54120ffSKevin Wolf  * Fills in default options for opening images and converts the legacy
1137f54120ffSKevin Wolf  * filename/flags pair to option QDict entries.
1138f54120ffSKevin Wolf  */
11395e5c4f63SKevin Wolf static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
114017b005f1SKevin Wolf                              BlockDriver *drv, Error **errp)
1141f54120ffSKevin Wolf {
11425e5c4f63SKevin Wolf     const char *filename = *pfilename;
1143f54120ffSKevin Wolf     const char *drvname;
1144462f5bcfSKevin Wolf     bool protocol = flags & BDRV_O_PROTOCOL;
1145f54120ffSKevin Wolf     bool parse_filename = false;
1146f54120ffSKevin Wolf     Error *local_err = NULL;
1147f54120ffSKevin Wolf 
11485e5c4f63SKevin Wolf     /* Parse json: pseudo-protocol */
11495e5c4f63SKevin Wolf     if (filename && g_str_has_prefix(filename, "json:")) {
11505e5c4f63SKevin Wolf         QDict *json_options = parse_json_filename(filename, &local_err);
11515e5c4f63SKevin Wolf         if (local_err) {
11525e5c4f63SKevin Wolf             error_propagate(errp, local_err);
11535e5c4f63SKevin Wolf             return -EINVAL;
11545e5c4f63SKevin Wolf         }
11555e5c4f63SKevin Wolf 
11565e5c4f63SKevin Wolf         /* Options given in the filename have lower priority than options
11575e5c4f63SKevin Wolf          * specified directly */
11585e5c4f63SKevin Wolf         qdict_join(*options, json_options, false);
11595e5c4f63SKevin Wolf         QDECREF(json_options);
11605e5c4f63SKevin Wolf         *pfilename = filename = NULL;
11615e5c4f63SKevin Wolf     }
11625e5c4f63SKevin Wolf 
1163f54120ffSKevin Wolf     /* Fetch the file name from the options QDict if necessary */
116417b005f1SKevin Wolf     if (protocol && filename) {
1165f54120ffSKevin Wolf         if (!qdict_haskey(*options, "filename")) {
1166f54120ffSKevin Wolf             qdict_put(*options, "filename", qstring_from_str(filename));
1167f54120ffSKevin Wolf             parse_filename = true;
1168f54120ffSKevin Wolf         } else {
1169f54120ffSKevin Wolf             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1170f54120ffSKevin Wolf                              "the same time");
1171f54120ffSKevin Wolf             return -EINVAL;
1172f54120ffSKevin Wolf         }
1173f54120ffSKevin Wolf     }
1174f54120ffSKevin Wolf 
1175f54120ffSKevin Wolf     /* Find the right block driver */
1176f54120ffSKevin Wolf     filename = qdict_get_try_str(*options, "filename");
1177f54120ffSKevin Wolf     drvname = qdict_get_try_str(*options, "driver");
1178f54120ffSKevin Wolf 
117917b005f1SKevin Wolf     if (drv) {
118017b005f1SKevin Wolf         if (drvname) {
118117b005f1SKevin Wolf             error_setg(errp, "Driver specified twice");
118217b005f1SKevin Wolf             return -EINVAL;
118317b005f1SKevin Wolf         }
118417b005f1SKevin Wolf         drvname = drv->format_name;
118517b005f1SKevin Wolf         qdict_put(*options, "driver", qstring_from_str(drvname));
118617b005f1SKevin Wolf     } else {
118717b005f1SKevin Wolf         if (!drvname && protocol) {
1188f54120ffSKevin Wolf             if (filename) {
1189b65a5e12SMax Reitz                 drv = bdrv_find_protocol(filename, parse_filename, errp);
1190f54120ffSKevin Wolf                 if (!drv) {
1191f54120ffSKevin Wolf                     return -EINVAL;
1192f54120ffSKevin Wolf                 }
1193f54120ffSKevin Wolf 
1194f54120ffSKevin Wolf                 drvname = drv->format_name;
1195f54120ffSKevin Wolf                 qdict_put(*options, "driver", qstring_from_str(drvname));
1196f54120ffSKevin Wolf             } else {
1197f54120ffSKevin Wolf                 error_setg(errp, "Must specify either driver or file");
1198f54120ffSKevin Wolf                 return -EINVAL;
1199f54120ffSKevin Wolf             }
120017b005f1SKevin Wolf         } else if (drvname) {
1201f54120ffSKevin Wolf             drv = bdrv_find_format(drvname);
1202f54120ffSKevin Wolf             if (!drv) {
1203f54120ffSKevin Wolf                 error_setg(errp, "Unknown driver '%s'", drvname);
1204f54120ffSKevin Wolf                 return -ENOENT;
1205f54120ffSKevin Wolf             }
120617b005f1SKevin Wolf         }
120717b005f1SKevin Wolf     }
120817b005f1SKevin Wolf 
120917b005f1SKevin Wolf     assert(drv || !protocol);
1210f54120ffSKevin Wolf 
1211f54120ffSKevin Wolf     /* Driver-specific filename parsing */
121217b005f1SKevin Wolf     if (drv && drv->bdrv_parse_filename && parse_filename) {
1213f54120ffSKevin Wolf         drv->bdrv_parse_filename(filename, *options, &local_err);
1214f54120ffSKevin Wolf         if (local_err) {
1215f54120ffSKevin Wolf             error_propagate(errp, local_err);
1216f54120ffSKevin Wolf             return -EINVAL;
1217f54120ffSKevin Wolf         }
1218f54120ffSKevin Wolf 
1219f54120ffSKevin Wolf         if (!drv->bdrv_needs_filename) {
1220f54120ffSKevin Wolf             qdict_del(*options, "filename");
1221f54120ffSKevin Wolf         }
1222f54120ffSKevin Wolf     }
1223f54120ffSKevin Wolf 
1224f54120ffSKevin Wolf     return 0;
1225f54120ffSKevin Wolf }
1226f54120ffSKevin Wolf 
12278d24cce1SFam Zheng void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
12288d24cce1SFam Zheng {
12298d24cce1SFam Zheng 
1230826b6ca0SFam Zheng     if (bs->backing_hd) {
1231826b6ca0SFam Zheng         assert(bs->backing_blocker);
1232826b6ca0SFam Zheng         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1233826b6ca0SFam Zheng     } else if (backing_hd) {
1234826b6ca0SFam Zheng         error_setg(&bs->backing_blocker,
123581e5f78aSAlberto Garcia                    "node is used as backing hd of '%s'",
123681e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(bs));
1237826b6ca0SFam Zheng     }
1238826b6ca0SFam Zheng 
12398d24cce1SFam Zheng     bs->backing_hd = backing_hd;
12408d24cce1SFam Zheng     if (!backing_hd) {
1241826b6ca0SFam Zheng         error_free(bs->backing_blocker);
1242826b6ca0SFam Zheng         bs->backing_blocker = NULL;
12438d24cce1SFam Zheng         goto out;
12448d24cce1SFam Zheng     }
12458d24cce1SFam Zheng     bs->open_flags &= ~BDRV_O_NO_BACKING;
12468d24cce1SFam Zheng     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
12478d24cce1SFam Zheng     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
12488d24cce1SFam Zheng             backing_hd->drv ? backing_hd->drv->format_name : "");
1249826b6ca0SFam Zheng 
1250826b6ca0SFam Zheng     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1251826b6ca0SFam Zheng     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1252bb00021dSFam Zheng     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1253826b6ca0SFam Zheng                     bs->backing_blocker);
12548d24cce1SFam Zheng out:
12553baca891SKevin Wolf     bdrv_refresh_limits(bs, NULL);
12568d24cce1SFam Zheng }
12578d24cce1SFam Zheng 
125831ca6d07SKevin Wolf /*
125931ca6d07SKevin Wolf  * Opens the backing file for a BlockDriverState if not yet open
126031ca6d07SKevin Wolf  *
126131ca6d07SKevin Wolf  * options is a QDict of options to pass to the block drivers, or NULL for an
126231ca6d07SKevin Wolf  * empty set of options. The reference to the QDict is transferred to this
126331ca6d07SKevin Wolf  * function (even on failure), so if the caller intends to reuse the dictionary,
126431ca6d07SKevin Wolf  * it needs to use QINCREF() before calling bdrv_file_open.
126531ca6d07SKevin Wolf  */
126634b5d2c6SMax Reitz int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
12679156df12SPaolo Bonzini {
12681ba4b6a5SBenoît Canet     char *backing_filename = g_malloc0(PATH_MAX);
1269317fc44eSKevin Wolf     int ret = 0;
12708d24cce1SFam Zheng     BlockDriverState *backing_hd;
127134b5d2c6SMax Reitz     Error *local_err = NULL;
12729156df12SPaolo Bonzini 
12739156df12SPaolo Bonzini     if (bs->backing_hd != NULL) {
127431ca6d07SKevin Wolf         QDECREF(options);
12751ba4b6a5SBenoît Canet         goto free_exit;
12769156df12SPaolo Bonzini     }
12779156df12SPaolo Bonzini 
127831ca6d07SKevin Wolf     /* NULL means an empty set of options */
127931ca6d07SKevin Wolf     if (options == NULL) {
128031ca6d07SKevin Wolf         options = qdict_new();
128131ca6d07SKevin Wolf     }
128231ca6d07SKevin Wolf 
12839156df12SPaolo Bonzini     bs->open_flags &= ~BDRV_O_NO_BACKING;
12841cb6f506SKevin Wolf     if (qdict_haskey(options, "file.filename")) {
12851cb6f506SKevin Wolf         backing_filename[0] = '\0';
12861cb6f506SKevin Wolf     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
128731ca6d07SKevin Wolf         QDECREF(options);
12881ba4b6a5SBenoît Canet         goto free_exit;
1289dbecebddSFam Zheng     } else {
12909f07429eSMax Reitz         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
12919f07429eSMax Reitz                                        &local_err);
12929f07429eSMax Reitz         if (local_err) {
12939f07429eSMax Reitz             ret = -EINVAL;
12949f07429eSMax Reitz             error_propagate(errp, local_err);
12959f07429eSMax Reitz             QDECREF(options);
12969f07429eSMax Reitz             goto free_exit;
12979f07429eSMax Reitz         }
12989156df12SPaolo Bonzini     }
12999156df12SPaolo Bonzini 
13008ee79e70SKevin Wolf     if (!bs->drv || !bs->drv->supports_backing) {
13018ee79e70SKevin Wolf         ret = -EINVAL;
13028ee79e70SKevin Wolf         error_setg(errp, "Driver doesn't support backing files");
13038ee79e70SKevin Wolf         QDECREF(options);
13048ee79e70SKevin Wolf         goto free_exit;
13058ee79e70SKevin Wolf     }
13068ee79e70SKevin Wolf 
1307e4e9986bSMarkus Armbruster     backing_hd = bdrv_new();
13088d24cce1SFam Zheng 
1309c5f6e493SKevin Wolf     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1310c5f6e493SKevin Wolf         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
13119156df12SPaolo Bonzini     }
13129156df12SPaolo Bonzini 
1313f67503e5SMax Reitz     assert(bs->backing_hd == NULL);
13148d24cce1SFam Zheng     ret = bdrv_open(&backing_hd,
1315ddf5636dSMax Reitz                     *backing_filename ? backing_filename : NULL, NULL, options,
1316c5f6e493SKevin Wolf                     bdrv_backing_flags(bs->open_flags), NULL, &local_err);
13179156df12SPaolo Bonzini     if (ret < 0) {
13188d24cce1SFam Zheng         bdrv_unref(backing_hd);
13198d24cce1SFam Zheng         backing_hd = NULL;
13209156df12SPaolo Bonzini         bs->open_flags |= BDRV_O_NO_BACKING;
1321b04b6b6eSFam Zheng         error_setg(errp, "Could not open backing file: %s",
1322b04b6b6eSFam Zheng                    error_get_pretty(local_err));
1323b04b6b6eSFam Zheng         error_free(local_err);
13241ba4b6a5SBenoît Canet         goto free_exit;
13259156df12SPaolo Bonzini     }
13268d24cce1SFam Zheng     bdrv_set_backing_hd(bs, backing_hd);
1327d80ac658SPeter Feiner 
13281ba4b6a5SBenoît Canet free_exit:
13291ba4b6a5SBenoît Canet     g_free(backing_filename);
13301ba4b6a5SBenoît Canet     return ret;
13319156df12SPaolo Bonzini }
13329156df12SPaolo Bonzini 
1333b6ce07aaSKevin Wolf /*
1334da557aacSMax Reitz  * Opens a disk image whose options are given as BlockdevRef in another block
1335da557aacSMax Reitz  * device's options.
1336da557aacSMax Reitz  *
1337da557aacSMax Reitz  * If allow_none is true, no image will be opened if filename is false and no
1338da557aacSMax Reitz  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1339da557aacSMax Reitz  *
1340da557aacSMax Reitz  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1341da557aacSMax Reitz  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1342da557aacSMax Reitz  * itself, all options starting with "${bdref_key}." are considered part of the
1343da557aacSMax Reitz  * BlockdevRef.
1344da557aacSMax Reitz  *
1345da557aacSMax Reitz  * The BlockdevRef will be removed from the options QDict.
1346f67503e5SMax Reitz  *
1347f67503e5SMax Reitz  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1348da557aacSMax Reitz  */
1349da557aacSMax Reitz int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1350da557aacSMax Reitz                     QDict *options, const char *bdref_key, int flags,
1351f7d9fd8cSMax Reitz                     bool allow_none, Error **errp)
1352da557aacSMax Reitz {
1353da557aacSMax Reitz     QDict *image_options;
1354da557aacSMax Reitz     int ret;
1355da557aacSMax Reitz     char *bdref_key_dot;
1356da557aacSMax Reitz     const char *reference;
1357da557aacSMax Reitz 
1358f67503e5SMax Reitz     assert(pbs);
1359f67503e5SMax Reitz     assert(*pbs == NULL);
1360f67503e5SMax Reitz 
1361da557aacSMax Reitz     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1362da557aacSMax Reitz     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1363da557aacSMax Reitz     g_free(bdref_key_dot);
1364da557aacSMax Reitz 
1365da557aacSMax Reitz     reference = qdict_get_try_str(options, bdref_key);
1366da557aacSMax Reitz     if (!filename && !reference && !qdict_size(image_options)) {
1367da557aacSMax Reitz         if (allow_none) {
1368da557aacSMax Reitz             ret = 0;
1369da557aacSMax Reitz         } else {
1370da557aacSMax Reitz             error_setg(errp, "A block device must be specified for \"%s\"",
1371da557aacSMax Reitz                        bdref_key);
1372da557aacSMax Reitz             ret = -EINVAL;
1373da557aacSMax Reitz         }
1374b20e61e0SMarkus Armbruster         QDECREF(image_options);
1375da557aacSMax Reitz         goto done;
1376da557aacSMax Reitz     }
1377da557aacSMax Reitz 
1378f7d9fd8cSMax Reitz     ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1379da557aacSMax Reitz 
1380da557aacSMax Reitz done:
1381da557aacSMax Reitz     qdict_del(options, bdref_key);
1382da557aacSMax Reitz     return ret;
1383da557aacSMax Reitz }
1384da557aacSMax Reitz 
13856b8aeca5SChen Gang int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1386b998875dSKevin Wolf {
1387b998875dSKevin Wolf     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
13881ba4b6a5SBenoît Canet     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1389b998875dSKevin Wolf     int64_t total_size;
139083d0521aSChunyan Liu     QemuOpts *opts = NULL;
1391b998875dSKevin Wolf     QDict *snapshot_options;
1392b998875dSKevin Wolf     BlockDriverState *bs_snapshot;
1393b998875dSKevin Wolf     Error *local_err;
1394b998875dSKevin Wolf     int ret;
1395b998875dSKevin Wolf 
1396b998875dSKevin Wolf     /* if snapshot, we create a temporary backing file and open it
1397b998875dSKevin Wolf        instead of opening 'filename' directly */
1398b998875dSKevin Wolf 
1399b998875dSKevin Wolf     /* Get the required size from the image */
1400f187743aSKevin Wolf     total_size = bdrv_getlength(bs);
1401f187743aSKevin Wolf     if (total_size < 0) {
14026b8aeca5SChen Gang         ret = total_size;
1403f187743aSKevin Wolf         error_setg_errno(errp, -total_size, "Could not get image size");
14041ba4b6a5SBenoît Canet         goto out;
1405f187743aSKevin Wolf     }
1406b998875dSKevin Wolf 
1407b998875dSKevin Wolf     /* Create the temporary image */
14081ba4b6a5SBenoît Canet     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1409b998875dSKevin Wolf     if (ret < 0) {
1410b998875dSKevin Wolf         error_setg_errno(errp, -ret, "Could not get temporary filename");
14111ba4b6a5SBenoît Canet         goto out;
1412b998875dSKevin Wolf     }
1413b998875dSKevin Wolf 
1414ef810437SMax Reitz     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1415c282e1fdSChunyan Liu                             &error_abort);
141639101f25SMarkus Armbruster     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1417ef810437SMax Reitz     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
141883d0521aSChunyan Liu     qemu_opts_del(opts);
1419b998875dSKevin Wolf     if (ret < 0) {
1420b998875dSKevin Wolf         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1421b998875dSKevin Wolf                          "'%s': %s", tmp_filename,
1422b998875dSKevin Wolf                          error_get_pretty(local_err));
1423b998875dSKevin Wolf         error_free(local_err);
14241ba4b6a5SBenoît Canet         goto out;
1425b998875dSKevin Wolf     }
1426b998875dSKevin Wolf 
1427b998875dSKevin Wolf     /* Prepare a new options QDict for the temporary file */
1428b998875dSKevin Wolf     snapshot_options = qdict_new();
1429b998875dSKevin Wolf     qdict_put(snapshot_options, "file.driver",
1430b998875dSKevin Wolf               qstring_from_str("file"));
1431b998875dSKevin Wolf     qdict_put(snapshot_options, "file.filename",
1432b998875dSKevin Wolf               qstring_from_str(tmp_filename));
1433b998875dSKevin Wolf 
1434e4e9986bSMarkus Armbruster     bs_snapshot = bdrv_new();
1435b998875dSKevin Wolf 
1436b998875dSKevin Wolf     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1437ef810437SMax Reitz                     flags, &bdrv_qcow2, &local_err);
1438b998875dSKevin Wolf     if (ret < 0) {
1439b998875dSKevin Wolf         error_propagate(errp, local_err);
14401ba4b6a5SBenoît Canet         goto out;
1441b998875dSKevin Wolf     }
1442b998875dSKevin Wolf 
1443b998875dSKevin Wolf     bdrv_append(bs_snapshot, bs);
14441ba4b6a5SBenoît Canet 
14451ba4b6a5SBenoît Canet out:
14461ba4b6a5SBenoît Canet     g_free(tmp_filename);
14476b8aeca5SChen Gang     return ret;
1448b998875dSKevin Wolf }
1449b998875dSKevin Wolf 
1450da557aacSMax Reitz /*
1451b6ce07aaSKevin Wolf  * Opens a disk image (raw, qcow2, vmdk, ...)
1452de9c0cecSKevin Wolf  *
1453de9c0cecSKevin Wolf  * options is a QDict of options to pass to the block drivers, or NULL for an
1454de9c0cecSKevin Wolf  * empty set of options. The reference to the QDict belongs to the block layer
1455de9c0cecSKevin Wolf  * after the call (even on failure), so if the caller intends to reuse the
1456de9c0cecSKevin Wolf  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1457f67503e5SMax Reitz  *
1458f67503e5SMax Reitz  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1459f67503e5SMax Reitz  * If it is not NULL, the referenced BDS will be reused.
1460ddf5636dSMax Reitz  *
1461ddf5636dSMax Reitz  * The reference parameter may be used to specify an existing block device which
1462ddf5636dSMax Reitz  * should be opened. If specified, neither options nor a filename may be given,
1463ddf5636dSMax Reitz  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1464b6ce07aaSKevin Wolf  */
1465ddf5636dSMax Reitz int bdrv_open(BlockDriverState **pbs, const char *filename,
1466ddf5636dSMax Reitz               const char *reference, QDict *options, int flags,
1467ddf5636dSMax Reitz               BlockDriver *drv, Error **errp)
1468ea2384d3Sbellard {
1469b6ce07aaSKevin Wolf     int ret;
1470f67503e5SMax Reitz     BlockDriverState *file = NULL, *bs;
147174fe54f2SKevin Wolf     const char *drvname;
147234b5d2c6SMax Reitz     Error *local_err = NULL;
1473b1e6fc08SKevin Wolf     int snapshot_flags = 0;
147433e3963eSbellard 
1475f67503e5SMax Reitz     assert(pbs);
1476f67503e5SMax Reitz 
1477ddf5636dSMax Reitz     if (reference) {
1478ddf5636dSMax Reitz         bool options_non_empty = options ? qdict_size(options) : false;
1479ddf5636dSMax Reitz         QDECREF(options);
1480ddf5636dSMax Reitz 
1481ddf5636dSMax Reitz         if (*pbs) {
1482ddf5636dSMax Reitz             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1483ddf5636dSMax Reitz                        "another block device");
1484ddf5636dSMax Reitz             return -EINVAL;
1485ddf5636dSMax Reitz         }
1486ddf5636dSMax Reitz 
1487ddf5636dSMax Reitz         if (filename || options_non_empty) {
1488ddf5636dSMax Reitz             error_setg(errp, "Cannot reference an existing block device with "
1489ddf5636dSMax Reitz                        "additional options or a new filename");
1490ddf5636dSMax Reitz             return -EINVAL;
1491ddf5636dSMax Reitz         }
1492ddf5636dSMax Reitz 
1493ddf5636dSMax Reitz         bs = bdrv_lookup_bs(reference, reference, errp);
1494ddf5636dSMax Reitz         if (!bs) {
1495ddf5636dSMax Reitz             return -ENODEV;
1496ddf5636dSMax Reitz         }
1497ddf5636dSMax Reitz         bdrv_ref(bs);
1498ddf5636dSMax Reitz         *pbs = bs;
1499ddf5636dSMax Reitz         return 0;
1500ddf5636dSMax Reitz     }
1501ddf5636dSMax Reitz 
1502f67503e5SMax Reitz     if (*pbs) {
1503f67503e5SMax Reitz         bs = *pbs;
1504f67503e5SMax Reitz     } else {
1505e4e9986bSMarkus Armbruster         bs = bdrv_new();
1506f67503e5SMax Reitz     }
1507f67503e5SMax Reitz 
1508de9c0cecSKevin Wolf     /* NULL means an empty set of options */
1509de9c0cecSKevin Wolf     if (options == NULL) {
1510de9c0cecSKevin Wolf         options = qdict_new();
1511de9c0cecSKevin Wolf     }
1512de9c0cecSKevin Wolf 
151317b005f1SKevin Wolf     ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1514462f5bcfSKevin Wolf     if (local_err) {
1515462f5bcfSKevin Wolf         goto fail;
1516462f5bcfSKevin Wolf     }
1517462f5bcfSKevin Wolf 
151876c591b0SKevin Wolf     /* Find the right image format driver */
151976c591b0SKevin Wolf     drv = NULL;
152076c591b0SKevin Wolf     drvname = qdict_get_try_str(options, "driver");
152176c591b0SKevin Wolf     if (drvname) {
152276c591b0SKevin Wolf         drv = bdrv_find_format(drvname);
152376c591b0SKevin Wolf         qdict_del(options, "driver");
152476c591b0SKevin Wolf         if (!drv) {
152576c591b0SKevin Wolf             error_setg(errp, "Unknown driver: '%s'", drvname);
152676c591b0SKevin Wolf             ret = -EINVAL;
152776c591b0SKevin Wolf             goto fail;
152876c591b0SKevin Wolf         }
152976c591b0SKevin Wolf     }
153076c591b0SKevin Wolf 
153176c591b0SKevin Wolf     assert(drvname || !(flags & BDRV_O_PROTOCOL));
153276c591b0SKevin Wolf     if (drv && !drv->bdrv_file_open) {
153376c591b0SKevin Wolf         /* If the user explicitly wants a format driver here, we'll need to add
153476c591b0SKevin Wolf          * another layer for the protocol in bs->file */
153576c591b0SKevin Wolf         flags &= ~BDRV_O_PROTOCOL;
153676c591b0SKevin Wolf     }
153776c591b0SKevin Wolf 
1538de9c0cecSKevin Wolf     bs->options = options;
1539b6ad491aSKevin Wolf     options = qdict_clone_shallow(options);
1540de9c0cecSKevin Wolf 
1541f500a6d3SKevin Wolf     /* Open image file without format layer */
1542f4788adcSKevin Wolf     if ((flags & BDRV_O_PROTOCOL) == 0) {
1543be028adcSJeff Cody         if (flags & BDRV_O_RDWR) {
1544be028adcSJeff Cody             flags |= BDRV_O_ALLOW_RDWR;
1545be028adcSJeff Cody         }
1546b1e6fc08SKevin Wolf         if (flags & BDRV_O_SNAPSHOT) {
1547b1e6fc08SKevin Wolf             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1548b1e6fc08SKevin Wolf             flags = bdrv_backing_flags(flags);
1549b1e6fc08SKevin Wolf         }
1550be028adcSJeff Cody 
1551f67503e5SMax Reitz         assert(file == NULL);
1552054963f8SMax Reitz         ret = bdrv_open_image(&file, filename, options, "file",
15530b50cc88SKevin Wolf                               bdrv_inherited_flags(flags),
15540b50cc88SKevin Wolf                               true, &local_err);
1555f500a6d3SKevin Wolf         if (ret < 0) {
15568bfea15dSKevin Wolf             goto fail;
1557f500a6d3SKevin Wolf         }
1558f4788adcSKevin Wolf     }
1559f500a6d3SKevin Wolf 
156076c591b0SKevin Wolf     /* Image format probing */
156138f3ef57SKevin Wolf     bs->probed = !drv;
156276c591b0SKevin Wolf     if (!drv && file) {
156334b5d2c6SMax Reitz         ret = find_image_format(file, filename, &drv, &local_err);
156417b005f1SKevin Wolf         if (ret < 0) {
156517b005f1SKevin Wolf             goto fail;
156617b005f1SKevin Wolf         }
156776c591b0SKevin Wolf     } else if (!drv) {
15682a05cbe4SMax Reitz         error_setg(errp, "Must specify either driver or file");
15692a05cbe4SMax Reitz         ret = -EINVAL;
15708bfea15dSKevin Wolf         goto fail;
15712a05cbe4SMax Reitz     }
1572f500a6d3SKevin Wolf 
1573b6ce07aaSKevin Wolf     /* Open the image */
157434b5d2c6SMax Reitz     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1575b6ce07aaSKevin Wolf     if (ret < 0) {
15768bfea15dSKevin Wolf         goto fail;
15776987307cSChristoph Hellwig     }
15786987307cSChristoph Hellwig 
15792a05cbe4SMax Reitz     if (file && (bs->file != file)) {
15804f6fd349SFam Zheng         bdrv_unref(file);
1581f500a6d3SKevin Wolf         file = NULL;
1582f500a6d3SKevin Wolf     }
1583f500a6d3SKevin Wolf 
1584b6ce07aaSKevin Wolf     /* If there is a backing file, use it */
15859156df12SPaolo Bonzini     if ((flags & BDRV_O_NO_BACKING) == 0) {
158631ca6d07SKevin Wolf         QDict *backing_options;
158731ca6d07SKevin Wolf 
15885726d872SBenoît Canet         qdict_extract_subqdict(options, &backing_options, "backing.");
158934b5d2c6SMax Reitz         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1590b6ce07aaSKevin Wolf         if (ret < 0) {
1591b6ad491aSKevin Wolf             goto close_and_fail;
1592b6ce07aaSKevin Wolf         }
1593b6ce07aaSKevin Wolf     }
1594b6ce07aaSKevin Wolf 
159591af7014SMax Reitz     bdrv_refresh_filename(bs);
159691af7014SMax Reitz 
1597b998875dSKevin Wolf     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1598b998875dSKevin Wolf      * temporary snapshot afterwards. */
1599b1e6fc08SKevin Wolf     if (snapshot_flags) {
16006b8aeca5SChen Gang         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1601b998875dSKevin Wolf         if (local_err) {
1602b998875dSKevin Wolf             goto close_and_fail;
1603b998875dSKevin Wolf         }
1604b998875dSKevin Wolf     }
1605b998875dSKevin Wolf 
1606b6ad491aSKevin Wolf     /* Check if any unknown options were used */
16075acd9d81SMax Reitz     if (options && (qdict_size(options) != 0)) {
1608b6ad491aSKevin Wolf         const QDictEntry *entry = qdict_first(options);
16095acd9d81SMax Reitz         if (flags & BDRV_O_PROTOCOL) {
16105acd9d81SMax Reitz             error_setg(errp, "Block protocol '%s' doesn't support the option "
16115acd9d81SMax Reitz                        "'%s'", drv->format_name, entry->key);
16125acd9d81SMax Reitz         } else {
161334b5d2c6SMax Reitz             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
16145acd9d81SMax Reitz                        "support the option '%s'", drv->format_name,
1615bfb197e0SMarkus Armbruster                        bdrv_get_device_name(bs), entry->key);
16165acd9d81SMax Reitz         }
1617b6ad491aSKevin Wolf 
1618b6ad491aSKevin Wolf         ret = -EINVAL;
1619b6ad491aSKevin Wolf         goto close_and_fail;
1620b6ad491aSKevin Wolf     }
1621b6ad491aSKevin Wolf 
1622b6ce07aaSKevin Wolf     if (!bdrv_key_required(bs)) {
1623a7f53e26SMarkus Armbruster         if (bs->blk) {
1624a7f53e26SMarkus Armbruster             blk_dev_change_media_cb(bs->blk, true);
1625a7f53e26SMarkus Armbruster         }
1626c3adb58fSMarkus Armbruster     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1627c3adb58fSMarkus Armbruster                && !runstate_check(RUN_STATE_INMIGRATE)
1628c3adb58fSMarkus Armbruster                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1629c3adb58fSMarkus Armbruster         error_setg(errp,
1630c3adb58fSMarkus Armbruster                    "Guest must be stopped for opening of encrypted image");
1631c3adb58fSMarkus Armbruster         ret = -EBUSY;
1632c3adb58fSMarkus Armbruster         goto close_and_fail;
1633b6ce07aaSKevin Wolf     }
1634b6ce07aaSKevin Wolf 
1635c3adb58fSMarkus Armbruster     QDECREF(options);
1636f67503e5SMax Reitz     *pbs = bs;
1637b6ce07aaSKevin Wolf     return 0;
1638b6ce07aaSKevin Wolf 
16398bfea15dSKevin Wolf fail:
1640f500a6d3SKevin Wolf     if (file != NULL) {
16414f6fd349SFam Zheng         bdrv_unref(file);
1642f500a6d3SKevin Wolf     }
1643de9c0cecSKevin Wolf     QDECREF(bs->options);
1644b6ad491aSKevin Wolf     QDECREF(options);
1645de9c0cecSKevin Wolf     bs->options = NULL;
1646f67503e5SMax Reitz     if (!*pbs) {
1647f67503e5SMax Reitz         /* If *pbs is NULL, a new BDS has been created in this function and
1648f67503e5SMax Reitz            needs to be freed now. Otherwise, it does not need to be closed,
1649f67503e5SMax Reitz            since it has not really been opened yet. */
1650f67503e5SMax Reitz         bdrv_unref(bs);
1651f67503e5SMax Reitz     }
165284d18f06SMarkus Armbruster     if (local_err) {
165334b5d2c6SMax Reitz         error_propagate(errp, local_err);
165434b5d2c6SMax Reitz     }
1655b6ad491aSKevin Wolf     return ret;
1656de9c0cecSKevin Wolf 
1657b6ad491aSKevin Wolf close_and_fail:
1658f67503e5SMax Reitz     /* See fail path, but now the BDS has to be always closed */
1659f67503e5SMax Reitz     if (*pbs) {
1660b6ad491aSKevin Wolf         bdrv_close(bs);
1661f67503e5SMax Reitz     } else {
1662f67503e5SMax Reitz         bdrv_unref(bs);
1663f67503e5SMax Reitz     }
1664b6ad491aSKevin Wolf     QDECREF(options);
166584d18f06SMarkus Armbruster     if (local_err) {
166634b5d2c6SMax Reitz         error_propagate(errp, local_err);
166734b5d2c6SMax Reitz     }
1668b6ce07aaSKevin Wolf     return ret;
1669b6ce07aaSKevin Wolf }
1670b6ce07aaSKevin Wolf 
1671e971aa12SJeff Cody typedef struct BlockReopenQueueEntry {
1672e971aa12SJeff Cody      bool prepared;
1673e971aa12SJeff Cody      BDRVReopenState state;
1674e971aa12SJeff Cody      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1675e971aa12SJeff Cody } BlockReopenQueueEntry;
1676e971aa12SJeff Cody 
1677e971aa12SJeff Cody /*
1678e971aa12SJeff Cody  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1679e971aa12SJeff Cody  * reopen of multiple devices.
1680e971aa12SJeff Cody  *
1681e971aa12SJeff Cody  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1682e971aa12SJeff Cody  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1683e971aa12SJeff Cody  * be created and initialized. This newly created BlockReopenQueue should be
1684e971aa12SJeff Cody  * passed back in for subsequent calls that are intended to be of the same
1685e971aa12SJeff Cody  * atomic 'set'.
1686e971aa12SJeff Cody  *
1687e971aa12SJeff Cody  * bs is the BlockDriverState to add to the reopen queue.
1688e971aa12SJeff Cody  *
1689e971aa12SJeff Cody  * flags contains the open flags for the associated bs
1690e971aa12SJeff Cody  *
1691e971aa12SJeff Cody  * returns a pointer to bs_queue, which is either the newly allocated
1692e971aa12SJeff Cody  * bs_queue, or the existing bs_queue being used.
1693e971aa12SJeff Cody  *
1694e971aa12SJeff Cody  */
1695e971aa12SJeff Cody BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1696e971aa12SJeff Cody                                     BlockDriverState *bs, int flags)
1697e971aa12SJeff Cody {
1698e971aa12SJeff Cody     assert(bs != NULL);
1699e971aa12SJeff Cody 
1700e971aa12SJeff Cody     BlockReopenQueueEntry *bs_entry;
1701e971aa12SJeff Cody     if (bs_queue == NULL) {
1702e971aa12SJeff Cody         bs_queue = g_new0(BlockReopenQueue, 1);
1703e971aa12SJeff Cody         QSIMPLEQ_INIT(bs_queue);
1704e971aa12SJeff Cody     }
1705e971aa12SJeff Cody 
1706f1f25a2eSKevin Wolf     /* bdrv_open() masks this flag out */
1707f1f25a2eSKevin Wolf     flags &= ~BDRV_O_PROTOCOL;
1708f1f25a2eSKevin Wolf 
1709e971aa12SJeff Cody     if (bs->file) {
1710f1f25a2eSKevin Wolf         bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1711e971aa12SJeff Cody     }
1712e971aa12SJeff Cody 
1713e971aa12SJeff Cody     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1714e971aa12SJeff Cody     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1715e971aa12SJeff Cody 
1716e971aa12SJeff Cody     bs_entry->state.bs = bs;
1717e971aa12SJeff Cody     bs_entry->state.flags = flags;
1718e971aa12SJeff Cody 
1719e971aa12SJeff Cody     return bs_queue;
1720e971aa12SJeff Cody }
1721e971aa12SJeff Cody 
1722e971aa12SJeff Cody /*
1723e971aa12SJeff Cody  * Reopen multiple BlockDriverStates atomically & transactionally.
1724e971aa12SJeff Cody  *
1725e971aa12SJeff Cody  * The queue passed in (bs_queue) must have been built up previous
1726e971aa12SJeff Cody  * via bdrv_reopen_queue().
1727e971aa12SJeff Cody  *
1728e971aa12SJeff Cody  * Reopens all BDS specified in the queue, with the appropriate
1729e971aa12SJeff Cody  * flags.  All devices are prepared for reopen, and failure of any
1730e971aa12SJeff Cody  * device will cause all device changes to be abandonded, and intermediate
1731e971aa12SJeff Cody  * data cleaned up.
1732e971aa12SJeff Cody  *
1733e971aa12SJeff Cody  * If all devices prepare successfully, then the changes are committed
1734e971aa12SJeff Cody  * to all devices.
1735e971aa12SJeff Cody  *
1736e971aa12SJeff Cody  */
1737e971aa12SJeff Cody int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1738e971aa12SJeff Cody {
1739e971aa12SJeff Cody     int ret = -1;
1740e971aa12SJeff Cody     BlockReopenQueueEntry *bs_entry, *next;
1741e971aa12SJeff Cody     Error *local_err = NULL;
1742e971aa12SJeff Cody 
1743e971aa12SJeff Cody     assert(bs_queue != NULL);
1744e971aa12SJeff Cody 
1745e971aa12SJeff Cody     bdrv_drain_all();
1746e971aa12SJeff Cody 
1747e971aa12SJeff Cody     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1748e971aa12SJeff Cody         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1749e971aa12SJeff Cody             error_propagate(errp, local_err);
1750e971aa12SJeff Cody             goto cleanup;
1751e971aa12SJeff Cody         }
1752e971aa12SJeff Cody         bs_entry->prepared = true;
1753e971aa12SJeff Cody     }
1754e971aa12SJeff Cody 
1755e971aa12SJeff Cody     /* If we reach this point, we have success and just need to apply the
1756e971aa12SJeff Cody      * changes
1757e971aa12SJeff Cody      */
1758e971aa12SJeff Cody     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1759e971aa12SJeff Cody         bdrv_reopen_commit(&bs_entry->state);
1760e971aa12SJeff Cody     }
1761e971aa12SJeff Cody 
1762e971aa12SJeff Cody     ret = 0;
1763e971aa12SJeff Cody 
1764e971aa12SJeff Cody cleanup:
1765e971aa12SJeff Cody     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1766e971aa12SJeff Cody         if (ret && bs_entry->prepared) {
1767e971aa12SJeff Cody             bdrv_reopen_abort(&bs_entry->state);
1768e971aa12SJeff Cody         }
1769e971aa12SJeff Cody         g_free(bs_entry);
1770e971aa12SJeff Cody     }
1771e971aa12SJeff Cody     g_free(bs_queue);
1772e971aa12SJeff Cody     return ret;
1773e971aa12SJeff Cody }
1774e971aa12SJeff Cody 
1775e971aa12SJeff Cody 
1776e971aa12SJeff Cody /* Reopen a single BlockDriverState with the specified flags. */
1777e971aa12SJeff Cody int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1778e971aa12SJeff Cody {
1779e971aa12SJeff Cody     int ret = -1;
1780e971aa12SJeff Cody     Error *local_err = NULL;
1781e971aa12SJeff Cody     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1782e971aa12SJeff Cody 
1783e971aa12SJeff Cody     ret = bdrv_reopen_multiple(queue, &local_err);
1784e971aa12SJeff Cody     if (local_err != NULL) {
1785e971aa12SJeff Cody         error_propagate(errp, local_err);
1786e971aa12SJeff Cody     }
1787e971aa12SJeff Cody     return ret;
1788e971aa12SJeff Cody }
1789e971aa12SJeff Cody 
1790e971aa12SJeff Cody 
1791e971aa12SJeff Cody /*
1792e971aa12SJeff Cody  * Prepares a BlockDriverState for reopen. All changes are staged in the
1793e971aa12SJeff Cody  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1794e971aa12SJeff Cody  * the block driver layer .bdrv_reopen_prepare()
1795e971aa12SJeff Cody  *
1796e971aa12SJeff Cody  * bs is the BlockDriverState to reopen
1797e971aa12SJeff Cody  * flags are the new open flags
1798e971aa12SJeff Cody  * queue is the reopen queue
1799e971aa12SJeff Cody  *
1800e971aa12SJeff Cody  * Returns 0 on success, non-zero on error.  On error errp will be set
1801e971aa12SJeff Cody  * as well.
1802e971aa12SJeff Cody  *
1803e971aa12SJeff Cody  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1804e971aa12SJeff Cody  * It is the responsibility of the caller to then call the abort() or
1805e971aa12SJeff Cody  * commit() for any other BDS that have been left in a prepare() state
1806e971aa12SJeff Cody  *
1807e971aa12SJeff Cody  */
1808e971aa12SJeff Cody int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1809e971aa12SJeff Cody                         Error **errp)
1810e971aa12SJeff Cody {
1811e971aa12SJeff Cody     int ret = -1;
1812e971aa12SJeff Cody     Error *local_err = NULL;
1813e971aa12SJeff Cody     BlockDriver *drv;
1814e971aa12SJeff Cody 
1815e971aa12SJeff Cody     assert(reopen_state != NULL);
1816e971aa12SJeff Cody     assert(reopen_state->bs->drv != NULL);
1817e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1818e971aa12SJeff Cody 
1819e971aa12SJeff Cody     /* if we are to stay read-only, do not allow permission change
1820e971aa12SJeff Cody      * to r/w */
1821e971aa12SJeff Cody     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1822e971aa12SJeff Cody         reopen_state->flags & BDRV_O_RDWR) {
182381e5f78aSAlberto Garcia         error_setg(errp, "Node '%s' is read only",
182481e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(reopen_state->bs));
1825e971aa12SJeff Cody         goto error;
1826e971aa12SJeff Cody     }
1827e971aa12SJeff Cody 
1828e971aa12SJeff Cody 
1829e971aa12SJeff Cody     ret = bdrv_flush(reopen_state->bs);
1830e971aa12SJeff Cody     if (ret) {
1831e971aa12SJeff Cody         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1832e971aa12SJeff Cody                   strerror(-ret));
1833e971aa12SJeff Cody         goto error;
1834e971aa12SJeff Cody     }
1835e971aa12SJeff Cody 
1836e971aa12SJeff Cody     if (drv->bdrv_reopen_prepare) {
1837e971aa12SJeff Cody         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1838e971aa12SJeff Cody         if (ret) {
1839e971aa12SJeff Cody             if (local_err != NULL) {
1840e971aa12SJeff Cody                 error_propagate(errp, local_err);
1841e971aa12SJeff Cody             } else {
1842d8b6895fSLuiz Capitulino                 error_setg(errp, "failed while preparing to reopen image '%s'",
1843e971aa12SJeff Cody                            reopen_state->bs->filename);
1844e971aa12SJeff Cody             }
1845e971aa12SJeff Cody             goto error;
1846e971aa12SJeff Cody         }
1847e971aa12SJeff Cody     } else {
1848e971aa12SJeff Cody         /* It is currently mandatory to have a bdrv_reopen_prepare()
1849e971aa12SJeff Cody          * handler for each supported drv. */
185081e5f78aSAlberto Garcia         error_setg(errp, "Block format '%s' used by node '%s' "
185181e5f78aSAlberto Garcia                    "does not support reopening files", drv->format_name,
185281e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(reopen_state->bs));
1853e971aa12SJeff Cody         ret = -1;
1854e971aa12SJeff Cody         goto error;
1855e971aa12SJeff Cody     }
1856e971aa12SJeff Cody 
1857e971aa12SJeff Cody     ret = 0;
1858e971aa12SJeff Cody 
1859e971aa12SJeff Cody error:
1860e971aa12SJeff Cody     return ret;
1861e971aa12SJeff Cody }
1862e971aa12SJeff Cody 
1863e971aa12SJeff Cody /*
1864e971aa12SJeff Cody  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1865e971aa12SJeff Cody  * makes them final by swapping the staging BlockDriverState contents into
1866e971aa12SJeff Cody  * the active BlockDriverState contents.
1867e971aa12SJeff Cody  */
1868e971aa12SJeff Cody void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1869e971aa12SJeff Cody {
1870e971aa12SJeff Cody     BlockDriver *drv;
1871e971aa12SJeff Cody 
1872e971aa12SJeff Cody     assert(reopen_state != NULL);
1873e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1874e971aa12SJeff Cody     assert(drv != NULL);
1875e971aa12SJeff Cody 
1876e971aa12SJeff Cody     /* If there are any driver level actions to take */
1877e971aa12SJeff Cody     if (drv->bdrv_reopen_commit) {
1878e971aa12SJeff Cody         drv->bdrv_reopen_commit(reopen_state);
1879e971aa12SJeff Cody     }
1880e971aa12SJeff Cody 
1881e971aa12SJeff Cody     /* set BDS specific flags now */
1882e971aa12SJeff Cody     reopen_state->bs->open_flags         = reopen_state->flags;
1883e971aa12SJeff Cody     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1884e971aa12SJeff Cody                                               BDRV_O_CACHE_WB);
1885e971aa12SJeff Cody     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1886355ef4acSKevin Wolf 
18873baca891SKevin Wolf     bdrv_refresh_limits(reopen_state->bs, NULL);
1888e971aa12SJeff Cody }
1889e971aa12SJeff Cody 
1890e971aa12SJeff Cody /*
1891e971aa12SJeff Cody  * Abort the reopen, and delete and free the staged changes in
1892e971aa12SJeff Cody  * reopen_state
1893e971aa12SJeff Cody  */
1894e971aa12SJeff Cody void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1895e971aa12SJeff Cody {
1896e971aa12SJeff Cody     BlockDriver *drv;
1897e971aa12SJeff Cody 
1898e971aa12SJeff Cody     assert(reopen_state != NULL);
1899e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1900e971aa12SJeff Cody     assert(drv != NULL);
1901e971aa12SJeff Cody 
1902e971aa12SJeff Cody     if (drv->bdrv_reopen_abort) {
1903e971aa12SJeff Cody         drv->bdrv_reopen_abort(reopen_state);
1904e971aa12SJeff Cody     }
1905e971aa12SJeff Cody }
1906e971aa12SJeff Cody 
1907e971aa12SJeff Cody 
1908fc01f7e7Sbellard void bdrv_close(BlockDriverState *bs)
1909fc01f7e7Sbellard {
191033384421SMax Reitz     BdrvAioNotifier *ban, *ban_next;
191133384421SMax Reitz 
19123e914655SPaolo Bonzini     if (bs->job) {
19133e914655SPaolo Bonzini         block_job_cancel_sync(bs->job);
19143e914655SPaolo Bonzini     }
191558fda173SStefan Hajnoczi     bdrv_drain_all(); /* complete I/O */
191658fda173SStefan Hajnoczi     bdrv_flush(bs);
191758fda173SStefan Hajnoczi     bdrv_drain_all(); /* in case flush left pending I/O */
1918d7d512f6SPaolo Bonzini     notifier_list_notify(&bs->close_notifiers, bs);
19197094f12fSKevin Wolf 
19203cbc002cSPaolo Bonzini     if (bs->drv) {
1921557df6acSStefan Hajnoczi         if (bs->backing_hd) {
1922826b6ca0SFam Zheng             BlockDriverState *backing_hd = bs->backing_hd;
1923826b6ca0SFam Zheng             bdrv_set_backing_hd(bs, NULL);
1924826b6ca0SFam Zheng             bdrv_unref(backing_hd);
1925557df6acSStefan Hajnoczi         }
1926ea2384d3Sbellard         bs->drv->bdrv_close(bs);
19277267c094SAnthony Liguori         g_free(bs->opaque);
1928ea2384d3Sbellard         bs->opaque = NULL;
1929ea2384d3Sbellard         bs->drv = NULL;
193053fec9d3SStefan Hajnoczi         bs->copy_on_read = 0;
1931a275fa42SPaolo Bonzini         bs->backing_file[0] = '\0';
1932a275fa42SPaolo Bonzini         bs->backing_format[0] = '\0';
19336405875cSPaolo Bonzini         bs->total_sectors = 0;
19346405875cSPaolo Bonzini         bs->encrypted = 0;
19356405875cSPaolo Bonzini         bs->valid_key = 0;
19366405875cSPaolo Bonzini         bs->sg = 0;
19370d51b4deSAsias He         bs->zero_beyond_eof = false;
1938de9c0cecSKevin Wolf         QDECREF(bs->options);
1939de9c0cecSKevin Wolf         bs->options = NULL;
194091af7014SMax Reitz         QDECREF(bs->full_open_options);
194191af7014SMax Reitz         bs->full_open_options = NULL;
1942b338082bSbellard 
194366f82ceeSKevin Wolf         if (bs->file != NULL) {
19444f6fd349SFam Zheng             bdrv_unref(bs->file);
19450ac9377dSPaolo Bonzini             bs->file = NULL;
194666f82ceeSKevin Wolf         }
19479ca11154SPavel Hrdina     }
194866f82ceeSKevin Wolf 
1949a7f53e26SMarkus Armbruster     if (bs->blk) {
1950a7f53e26SMarkus Armbruster         blk_dev_change_media_cb(bs->blk, false);
1951a7f53e26SMarkus Armbruster     }
195298f90dbaSZhi Yong Wu 
195398f90dbaSZhi Yong Wu     /*throttling disk I/O limits*/
195498f90dbaSZhi Yong Wu     if (bs->io_limits_enabled) {
195598f90dbaSZhi Yong Wu         bdrv_io_limits_disable(bs);
195698f90dbaSZhi Yong Wu     }
195733384421SMax Reitz 
195833384421SMax Reitz     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
195933384421SMax Reitz         g_free(ban);
196033384421SMax Reitz     }
196133384421SMax Reitz     QLIST_INIT(&bs->aio_notifiers);
1962b338082bSbellard }
1963b338082bSbellard 
19642bc93fedSMORITA Kazutaka void bdrv_close_all(void)
19652bc93fedSMORITA Kazutaka {
19662bc93fedSMORITA Kazutaka     BlockDriverState *bs;
19672bc93fedSMORITA Kazutaka 
1968dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1969ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
1970ed78cda3SStefan Hajnoczi 
1971ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
19722bc93fedSMORITA Kazutaka         bdrv_close(bs);
1973ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
19742bc93fedSMORITA Kazutaka     }
19752bc93fedSMORITA Kazutaka }
19762bc93fedSMORITA Kazutaka 
197788266f5aSStefan Hajnoczi /* Check if any requests are in-flight (including throttled requests) */
197888266f5aSStefan Hajnoczi static bool bdrv_requests_pending(BlockDriverState *bs)
197988266f5aSStefan Hajnoczi {
198088266f5aSStefan Hajnoczi     if (!QLIST_EMPTY(&bs->tracked_requests)) {
198188266f5aSStefan Hajnoczi         return true;
198288266f5aSStefan Hajnoczi     }
1983cc0681c4SBenoît Canet     if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1984cc0681c4SBenoît Canet         return true;
1985cc0681c4SBenoît Canet     }
1986cc0681c4SBenoît Canet     if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
198788266f5aSStefan Hajnoczi         return true;
198888266f5aSStefan Hajnoczi     }
198988266f5aSStefan Hajnoczi     if (bs->file && bdrv_requests_pending(bs->file)) {
199088266f5aSStefan Hajnoczi         return true;
199188266f5aSStefan Hajnoczi     }
199288266f5aSStefan Hajnoczi     if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
199388266f5aSStefan Hajnoczi         return true;
199488266f5aSStefan Hajnoczi     }
199588266f5aSStefan Hajnoczi     return false;
199688266f5aSStefan Hajnoczi }
199788266f5aSStefan Hajnoczi 
19985b98db0aSStefan Hajnoczi static bool bdrv_drain_one(BlockDriverState *bs)
19995b98db0aSStefan Hajnoczi {
20005b98db0aSStefan Hajnoczi     bool bs_busy;
20015b98db0aSStefan Hajnoczi 
20025b98db0aSStefan Hajnoczi     bdrv_flush_io_queue(bs);
20035b98db0aSStefan Hajnoczi     bdrv_start_throttled_reqs(bs);
20045b98db0aSStefan Hajnoczi     bs_busy = bdrv_requests_pending(bs);
20055b98db0aSStefan Hajnoczi     bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
20065b98db0aSStefan Hajnoczi     return bs_busy;
20075b98db0aSStefan Hajnoczi }
20085b98db0aSStefan Hajnoczi 
20095b98db0aSStefan Hajnoczi /*
20105b98db0aSStefan Hajnoczi  * Wait for pending requests to complete on a single BlockDriverState subtree
20115b98db0aSStefan Hajnoczi  *
20125b98db0aSStefan Hajnoczi  * See the warning in bdrv_drain_all().  This function can only be called if
20135b98db0aSStefan Hajnoczi  * you are sure nothing can generate I/O because you have op blockers
20145b98db0aSStefan Hajnoczi  * installed.
20155b98db0aSStefan Hajnoczi  *
20165b98db0aSStefan Hajnoczi  * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
20175b98db0aSStefan Hajnoczi  * AioContext.
20185b98db0aSStefan Hajnoczi  */
20195b98db0aSStefan Hajnoczi void bdrv_drain(BlockDriverState *bs)
20205b98db0aSStefan Hajnoczi {
20215b98db0aSStefan Hajnoczi     while (bdrv_drain_one(bs)) {
20225b98db0aSStefan Hajnoczi         /* Keep iterating */
20235b98db0aSStefan Hajnoczi     }
20245b98db0aSStefan Hajnoczi }
20255b98db0aSStefan Hajnoczi 
2026922453bcSStefan Hajnoczi /*
2027922453bcSStefan Hajnoczi  * Wait for pending requests to complete across all BlockDriverStates
2028922453bcSStefan Hajnoczi  *
2029922453bcSStefan Hajnoczi  * This function does not flush data to disk, use bdrv_flush_all() for that
2030922453bcSStefan Hajnoczi  * after calling this function.
20314c355d53SZhi Yong Wu  *
20324c355d53SZhi Yong Wu  * Note that completion of an asynchronous I/O operation can trigger any
20334c355d53SZhi Yong Wu  * number of other I/O operations on other devices---for example a coroutine
20344c355d53SZhi Yong Wu  * can be arbitrarily complex and a constant flow of I/O can come until the
20354c355d53SZhi Yong Wu  * coroutine is complete.  Because of this, it is not possible to have a
20364c355d53SZhi Yong Wu  * function to drain a single device's I/O queue.
2037922453bcSStefan Hajnoczi  */
2038922453bcSStefan Hajnoczi void bdrv_drain_all(void)
2039922453bcSStefan Hajnoczi {
204088266f5aSStefan Hajnoczi     /* Always run first iteration so any pending completion BHs run */
204188266f5aSStefan Hajnoczi     bool busy = true;
2042922453bcSStefan Hajnoczi     BlockDriverState *bs;
2043922453bcSStefan Hajnoczi 
204469da3b0bSFam Zheng     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
204569da3b0bSFam Zheng         AioContext *aio_context = bdrv_get_aio_context(bs);
204669da3b0bSFam Zheng 
204769da3b0bSFam Zheng         aio_context_acquire(aio_context);
204869da3b0bSFam Zheng         if (bs->job) {
204969da3b0bSFam Zheng             block_job_pause(bs->job);
205069da3b0bSFam Zheng         }
205169da3b0bSFam Zheng         aio_context_release(aio_context);
205269da3b0bSFam Zheng     }
205369da3b0bSFam Zheng 
205488266f5aSStefan Hajnoczi     while (busy) {
20559b536adcSStefan Hajnoczi         busy = false;
2056922453bcSStefan Hajnoczi 
20579b536adcSStefan Hajnoczi         QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
20589b536adcSStefan Hajnoczi             AioContext *aio_context = bdrv_get_aio_context(bs);
20599b536adcSStefan Hajnoczi 
20609b536adcSStefan Hajnoczi             aio_context_acquire(aio_context);
20615b98db0aSStefan Hajnoczi             busy |= bdrv_drain_one(bs);
20629b536adcSStefan Hajnoczi             aio_context_release(aio_context);
20639b536adcSStefan Hajnoczi         }
2064922453bcSStefan Hajnoczi     }
206569da3b0bSFam Zheng 
206669da3b0bSFam Zheng     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
206769da3b0bSFam Zheng         AioContext *aio_context = bdrv_get_aio_context(bs);
206869da3b0bSFam Zheng 
206969da3b0bSFam Zheng         aio_context_acquire(aio_context);
207069da3b0bSFam Zheng         if (bs->job) {
207169da3b0bSFam Zheng             block_job_resume(bs->job);
207269da3b0bSFam Zheng         }
207369da3b0bSFam Zheng         aio_context_release(aio_context);
207469da3b0bSFam Zheng     }
2075922453bcSStefan Hajnoczi }
2076922453bcSStefan Hajnoczi 
2077dc364f4cSBenoît Canet /* make a BlockDriverState anonymous by removing from bdrv_state and
2078dc364f4cSBenoît Canet  * graph_bdrv_state list.
2079d22b2f41SRyan Harper    Also, NULL terminate the device_name to prevent double remove */
2080d22b2f41SRyan Harper void bdrv_make_anon(BlockDriverState *bs)
2081d22b2f41SRyan Harper {
2082bfb197e0SMarkus Armbruster     /*
2083bfb197e0SMarkus Armbruster      * Take care to remove bs from bdrv_states only when it's actually
2084bfb197e0SMarkus Armbruster      * in it.  Note that bs->device_list.tqe_prev is initially null,
2085bfb197e0SMarkus Armbruster      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
2086bfb197e0SMarkus Armbruster      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2087bfb197e0SMarkus Armbruster      * resetting it to null on remove.
2088bfb197e0SMarkus Armbruster      */
2089bfb197e0SMarkus Armbruster     if (bs->device_list.tqe_prev) {
2090dc364f4cSBenoît Canet         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2091bfb197e0SMarkus Armbruster         bs->device_list.tqe_prev = NULL;
2092d22b2f41SRyan Harper     }
2093dc364f4cSBenoît Canet     if (bs->node_name[0] != '\0') {
2094dc364f4cSBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2095dc364f4cSBenoît Canet     }
2096dc364f4cSBenoît Canet     bs->node_name[0] = '\0';
2097d22b2f41SRyan Harper }
2098d22b2f41SRyan Harper 
2099e023b2e2SPaolo Bonzini static void bdrv_rebind(BlockDriverState *bs)
2100e023b2e2SPaolo Bonzini {
2101e023b2e2SPaolo Bonzini     if (bs->drv && bs->drv->bdrv_rebind) {
2102e023b2e2SPaolo Bonzini         bs->drv->bdrv_rebind(bs);
2103e023b2e2SPaolo Bonzini     }
2104e023b2e2SPaolo Bonzini }
2105e023b2e2SPaolo Bonzini 
21064ddc07caSPaolo Bonzini static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
21074ddc07caSPaolo Bonzini                                      BlockDriverState *bs_src)
21084ddc07caSPaolo Bonzini {
21094ddc07caSPaolo Bonzini     /* move some fields that need to stay attached to the device */
21104ddc07caSPaolo Bonzini 
21114ddc07caSPaolo Bonzini     /* dev info */
21121b7fd729SPaolo Bonzini     bs_dest->guest_block_size   = bs_src->guest_block_size;
21134ddc07caSPaolo Bonzini     bs_dest->copy_on_read       = bs_src->copy_on_read;
21144ddc07caSPaolo Bonzini 
21154ddc07caSPaolo Bonzini     bs_dest->enable_write_cache = bs_src->enable_write_cache;
21164ddc07caSPaolo Bonzini 
2117cc0681c4SBenoît Canet     /* i/o throttled req */
2118cc0681c4SBenoît Canet     memcpy(&bs_dest->throttle_state,
2119cc0681c4SBenoît Canet            &bs_src->throttle_state,
2120cc0681c4SBenoît Canet            sizeof(ThrottleState));
2121cc0681c4SBenoît Canet     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
2122cc0681c4SBenoît Canet     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
21234ddc07caSPaolo Bonzini     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
21244ddc07caSPaolo Bonzini 
21254ddc07caSPaolo Bonzini     /* r/w error */
21264ddc07caSPaolo Bonzini     bs_dest->on_read_error      = bs_src->on_read_error;
21274ddc07caSPaolo Bonzini     bs_dest->on_write_error     = bs_src->on_write_error;
21284ddc07caSPaolo Bonzini 
21294ddc07caSPaolo Bonzini     /* i/o status */
21304ddc07caSPaolo Bonzini     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
21314ddc07caSPaolo Bonzini     bs_dest->iostatus           = bs_src->iostatus;
21324ddc07caSPaolo Bonzini 
21334ddc07caSPaolo Bonzini     /* dirty bitmap */
2134e4654d2dSFam Zheng     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
21354ddc07caSPaolo Bonzini 
21369fcb0251SFam Zheng     /* reference count */
21379fcb0251SFam Zheng     bs_dest->refcnt             = bs_src->refcnt;
21389fcb0251SFam Zheng 
21394ddc07caSPaolo Bonzini     /* job */
21404ddc07caSPaolo Bonzini     bs_dest->job                = bs_src->job;
21414ddc07caSPaolo Bonzini 
21424ddc07caSPaolo Bonzini     /* keep the same entry in bdrv_states */
2143dc364f4cSBenoît Canet     bs_dest->device_list = bs_src->device_list;
21447e7d56d9SMarkus Armbruster     bs_dest->blk = bs_src->blk;
21457e7d56d9SMarkus Armbruster 
2146fbe40ff7SFam Zheng     memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2147fbe40ff7SFam Zheng            sizeof(bs_dest->op_blockers));
21484ddc07caSPaolo Bonzini }
21494ddc07caSPaolo Bonzini 
21504ddc07caSPaolo Bonzini /*
21514ddc07caSPaolo Bonzini  * Swap bs contents for two image chains while they are live,
21524ddc07caSPaolo Bonzini  * while keeping required fields on the BlockDriverState that is
21534ddc07caSPaolo Bonzini  * actually attached to a device.
21544ddc07caSPaolo Bonzini  *
21554ddc07caSPaolo Bonzini  * This will modify the BlockDriverState fields, and swap contents
21564ddc07caSPaolo Bonzini  * between bs_new and bs_old. Both bs_new and bs_old are modified.
21574ddc07caSPaolo Bonzini  *
2158bfb197e0SMarkus Armbruster  * bs_new must not be attached to a BlockBackend.
21594ddc07caSPaolo Bonzini  *
21604ddc07caSPaolo Bonzini  * This function does not create any image files.
21614ddc07caSPaolo Bonzini  */
21624ddc07caSPaolo Bonzini void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
21634ddc07caSPaolo Bonzini {
21644ddc07caSPaolo Bonzini     BlockDriverState tmp;
21654ddc07caSPaolo Bonzini 
216690ce8a06SBenoît Canet     /* The code needs to swap the node_name but simply swapping node_list won't
216790ce8a06SBenoît Canet      * work so first remove the nodes from the graph list, do the swap then
216890ce8a06SBenoît Canet      * insert them back if needed.
216990ce8a06SBenoît Canet      */
217090ce8a06SBenoît Canet     if (bs_new->node_name[0] != '\0') {
217190ce8a06SBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
217290ce8a06SBenoît Canet     }
217390ce8a06SBenoît Canet     if (bs_old->node_name[0] != '\0') {
217490ce8a06SBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
217590ce8a06SBenoît Canet     }
217690ce8a06SBenoît Canet 
2177bfb197e0SMarkus Armbruster     /* bs_new must be unattached and shouldn't have anything fancy enabled */
21787e7d56d9SMarkus Armbruster     assert(!bs_new->blk);
2179e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
21804ddc07caSPaolo Bonzini     assert(bs_new->job == NULL);
21814ddc07caSPaolo Bonzini     assert(bs_new->io_limits_enabled == false);
2182cc0681c4SBenoît Canet     assert(!throttle_have_timer(&bs_new->throttle_state));
21834ddc07caSPaolo Bonzini 
21844ddc07caSPaolo Bonzini     tmp = *bs_new;
21854ddc07caSPaolo Bonzini     *bs_new = *bs_old;
21864ddc07caSPaolo Bonzini     *bs_old = tmp;
21874ddc07caSPaolo Bonzini 
21884ddc07caSPaolo Bonzini     /* there are some fields that should not be swapped, move them back */
21894ddc07caSPaolo Bonzini     bdrv_move_feature_fields(&tmp, bs_old);
21904ddc07caSPaolo Bonzini     bdrv_move_feature_fields(bs_old, bs_new);
21914ddc07caSPaolo Bonzini     bdrv_move_feature_fields(bs_new, &tmp);
21924ddc07caSPaolo Bonzini 
2193bfb197e0SMarkus Armbruster     /* bs_new must remain unattached */
21947e7d56d9SMarkus Armbruster     assert(!bs_new->blk);
21954ddc07caSPaolo Bonzini 
21964ddc07caSPaolo Bonzini     /* Check a few fields that should remain attached to the device */
21974ddc07caSPaolo Bonzini     assert(bs_new->job == NULL);
21984ddc07caSPaolo Bonzini     assert(bs_new->io_limits_enabled == false);
2199cc0681c4SBenoît Canet     assert(!throttle_have_timer(&bs_new->throttle_state));
22004ddc07caSPaolo Bonzini 
220190ce8a06SBenoît Canet     /* insert the nodes back into the graph node list if needed */
220290ce8a06SBenoît Canet     if (bs_new->node_name[0] != '\0') {
220390ce8a06SBenoît Canet         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
220490ce8a06SBenoît Canet     }
220590ce8a06SBenoît Canet     if (bs_old->node_name[0] != '\0') {
220690ce8a06SBenoît Canet         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
220790ce8a06SBenoît Canet     }
220890ce8a06SBenoît Canet 
22094ddc07caSPaolo Bonzini     bdrv_rebind(bs_new);
22104ddc07caSPaolo Bonzini     bdrv_rebind(bs_old);
22114ddc07caSPaolo Bonzini }
22124ddc07caSPaolo Bonzini 
22138802d1fdSJeff Cody /*
22148802d1fdSJeff Cody  * Add new bs contents at the top of an image chain while the chain is
22158802d1fdSJeff Cody  * live, while keeping required fields on the top layer.
22168802d1fdSJeff Cody  *
22178802d1fdSJeff Cody  * This will modify the BlockDriverState fields, and swap contents
22188802d1fdSJeff Cody  * between bs_new and bs_top. Both bs_new and bs_top are modified.
22198802d1fdSJeff Cody  *
2220bfb197e0SMarkus Armbruster  * bs_new must not be attached to a BlockBackend.
2221f6801b83SJeff Cody  *
22228802d1fdSJeff Cody  * This function does not create any image files.
22238802d1fdSJeff Cody  */
22248802d1fdSJeff Cody void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
22258802d1fdSJeff Cody {
22264ddc07caSPaolo Bonzini     bdrv_swap(bs_new, bs_top);
22278802d1fdSJeff Cody 
22288802d1fdSJeff Cody     /* The contents of 'tmp' will become bs_top, as we are
22298802d1fdSJeff Cody      * swapping bs_new and bs_top contents. */
22308d24cce1SFam Zheng     bdrv_set_backing_hd(bs_top, bs_new);
22318802d1fdSJeff Cody }
22328802d1fdSJeff Cody 
22334f6fd349SFam Zheng static void bdrv_delete(BlockDriverState *bs)
2234b338082bSbellard {
22353e914655SPaolo Bonzini     assert(!bs->job);
22363718d8abSFam Zheng     assert(bdrv_op_blocker_is_empty(bs));
22374f6fd349SFam Zheng     assert(!bs->refcnt);
2238e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
223918846deeSMarkus Armbruster 
2240e1b5c52eSStefan Hajnoczi     bdrv_close(bs);
2241e1b5c52eSStefan Hajnoczi 
22421b7bdbc1SStefan Hajnoczi     /* remove from list, if necessary */
2243d22b2f41SRyan Harper     bdrv_make_anon(bs);
224434c6f050Saurel32 
22457267c094SAnthony Liguori     g_free(bs);
2246fc01f7e7Sbellard }
2247fc01f7e7Sbellard 
2248e97fc193Saliguori /*
2249e97fc193Saliguori  * Run consistency checks on an image
2250e97fc193Saliguori  *
2251e076f338SKevin Wolf  * Returns 0 if the check could be completed (it doesn't mean that the image is
2252a1c7273bSStefan Weil  * free of errors) or -errno when an internal error occurred. The results of the
2253e076f338SKevin Wolf  * check are stored in res.
2254e97fc193Saliguori  */
22554534ff54SKevin Wolf int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2256e97fc193Saliguori {
2257908bcd54SMax Reitz     if (bs->drv == NULL) {
2258908bcd54SMax Reitz         return -ENOMEDIUM;
2259908bcd54SMax Reitz     }
2260e97fc193Saliguori     if (bs->drv->bdrv_check == NULL) {
2261e97fc193Saliguori         return -ENOTSUP;
2262e97fc193Saliguori     }
2263e97fc193Saliguori 
2264e076f338SKevin Wolf     memset(res, 0, sizeof(*res));
22654534ff54SKevin Wolf     return bs->drv->bdrv_check(bs, res, fix);
2266e97fc193Saliguori }
2267e97fc193Saliguori 
22688a426614SKevin Wolf #define COMMIT_BUF_SECTORS 2048
22698a426614SKevin Wolf 
227033e3963eSbellard /* commit COW file into the raw image */
227133e3963eSbellard int bdrv_commit(BlockDriverState *bs)
227233e3963eSbellard {
227319cb3738Sbellard     BlockDriver *drv = bs->drv;
227472706ea4SJeff Cody     int64_t sector, total_sectors, length, backing_length;
22758a426614SKevin Wolf     int n, ro, open_flags;
22760bce597dSJeff Cody     int ret = 0;
227772706ea4SJeff Cody     uint8_t *buf = NULL;
227833e3963eSbellard 
227919cb3738Sbellard     if (!drv)
228019cb3738Sbellard         return -ENOMEDIUM;
228133e3963eSbellard 
22824dca4b63SNaphtali Sprei     if (!bs->backing_hd) {
22834dca4b63SNaphtali Sprei         return -ENOTSUP;
22844dca4b63SNaphtali Sprei     }
22854dca4b63SNaphtali Sprei 
2286bb00021dSFam Zheng     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2287bb00021dSFam Zheng         bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
22882d3735d3SStefan Hajnoczi         return -EBUSY;
22892d3735d3SStefan Hajnoczi     }
22902d3735d3SStefan Hajnoczi 
22914dca4b63SNaphtali Sprei     ro = bs->backing_hd->read_only;
22924dca4b63SNaphtali Sprei     open_flags =  bs->backing_hd->open_flags;
22934dca4b63SNaphtali Sprei 
22944dca4b63SNaphtali Sprei     if (ro) {
22950bce597dSJeff Cody         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
22960bce597dSJeff Cody             return -EACCES;
22974dca4b63SNaphtali Sprei         }
2298ea2384d3Sbellard     }
2299ea2384d3Sbellard 
230072706ea4SJeff Cody     length = bdrv_getlength(bs);
230172706ea4SJeff Cody     if (length < 0) {
230272706ea4SJeff Cody         ret = length;
230372706ea4SJeff Cody         goto ro_cleanup;
230472706ea4SJeff Cody     }
230572706ea4SJeff Cody 
230672706ea4SJeff Cody     backing_length = bdrv_getlength(bs->backing_hd);
230772706ea4SJeff Cody     if (backing_length < 0) {
230872706ea4SJeff Cody         ret = backing_length;
230972706ea4SJeff Cody         goto ro_cleanup;
231072706ea4SJeff Cody     }
231172706ea4SJeff Cody 
231272706ea4SJeff Cody     /* If our top snapshot is larger than the backing file image,
231372706ea4SJeff Cody      * grow the backing file image if possible.  If not possible,
231472706ea4SJeff Cody      * we must return an error */
231572706ea4SJeff Cody     if (length > backing_length) {
231672706ea4SJeff Cody         ret = bdrv_truncate(bs->backing_hd, length);
231772706ea4SJeff Cody         if (ret < 0) {
231872706ea4SJeff Cody             goto ro_cleanup;
231972706ea4SJeff Cody         }
232072706ea4SJeff Cody     }
232172706ea4SJeff Cody 
232272706ea4SJeff Cody     total_sectors = length >> BDRV_SECTOR_BITS;
2323857d4f46SKevin Wolf 
2324857d4f46SKevin Wolf     /* qemu_try_blockalign() for bs will choose an alignment that works for
2325857d4f46SKevin Wolf      * bs->backing_hd as well, so no need to compare the alignment manually. */
2326857d4f46SKevin Wolf     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2327857d4f46SKevin Wolf     if (buf == NULL) {
2328857d4f46SKevin Wolf         ret = -ENOMEM;
2329857d4f46SKevin Wolf         goto ro_cleanup;
2330857d4f46SKevin Wolf     }
23318a426614SKevin Wolf 
23328a426614SKevin Wolf     for (sector = 0; sector < total_sectors; sector += n) {
2333d663640cSPaolo Bonzini         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2334d663640cSPaolo Bonzini         if (ret < 0) {
2335d663640cSPaolo Bonzini             goto ro_cleanup;
2336d663640cSPaolo Bonzini         }
2337d663640cSPaolo Bonzini         if (ret) {
2338dabfa6ccSKevin Wolf             ret = bdrv_read(bs, sector, buf, n);
2339dabfa6ccSKevin Wolf             if (ret < 0) {
23404dca4b63SNaphtali Sprei                 goto ro_cleanup;
234133e3963eSbellard             }
234233e3963eSbellard 
2343dabfa6ccSKevin Wolf             ret = bdrv_write(bs->backing_hd, sector, buf, n);
2344dabfa6ccSKevin Wolf             if (ret < 0) {
23454dca4b63SNaphtali Sprei                 goto ro_cleanup;
234633e3963eSbellard             }
234733e3963eSbellard         }
234833e3963eSbellard     }
234995389c86Sbellard 
23501d44952fSChristoph Hellwig     if (drv->bdrv_make_empty) {
23511d44952fSChristoph Hellwig         ret = drv->bdrv_make_empty(bs);
2352dabfa6ccSKevin Wolf         if (ret < 0) {
2353dabfa6ccSKevin Wolf             goto ro_cleanup;
2354dabfa6ccSKevin Wolf         }
23551d44952fSChristoph Hellwig         bdrv_flush(bs);
23561d44952fSChristoph Hellwig     }
235795389c86Sbellard 
23583f5075aeSChristoph Hellwig     /*
23593f5075aeSChristoph Hellwig      * Make sure all data we wrote to the backing device is actually
23603f5075aeSChristoph Hellwig      * stable on disk.
23613f5075aeSChristoph Hellwig      */
2362dabfa6ccSKevin Wolf     if (bs->backing_hd) {
23633f5075aeSChristoph Hellwig         bdrv_flush(bs->backing_hd);
2364dabfa6ccSKevin Wolf     }
23654dca4b63SNaphtali Sprei 
2366dabfa6ccSKevin Wolf     ret = 0;
23674dca4b63SNaphtali Sprei ro_cleanup:
2368857d4f46SKevin Wolf     qemu_vfree(buf);
23694dca4b63SNaphtali Sprei 
23704dca4b63SNaphtali Sprei     if (ro) {
23710bce597dSJeff Cody         /* ignoring error return here */
23720bce597dSJeff Cody         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
23734dca4b63SNaphtali Sprei     }
23744dca4b63SNaphtali Sprei 
23751d44952fSChristoph Hellwig     return ret;
237633e3963eSbellard }
237733e3963eSbellard 
2378e8877497SStefan Hajnoczi int bdrv_commit_all(void)
23796ab4b5abSMarkus Armbruster {
23806ab4b5abSMarkus Armbruster     BlockDriverState *bs;
23816ab4b5abSMarkus Armbruster 
2382dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2383ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
2384ed78cda3SStefan Hajnoczi 
2385ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
2386272d2d8eSJeff Cody         if (bs->drv && bs->backing_hd) {
2387e8877497SStefan Hajnoczi             int ret = bdrv_commit(bs);
2388e8877497SStefan Hajnoczi             if (ret < 0) {
2389ed78cda3SStefan Hajnoczi                 aio_context_release(aio_context);
2390e8877497SStefan Hajnoczi                 return ret;
23916ab4b5abSMarkus Armbruster             }
23926ab4b5abSMarkus Armbruster         }
2393ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
2394272d2d8eSJeff Cody     }
2395e8877497SStefan Hajnoczi     return 0;
2396e8877497SStefan Hajnoczi }
23976ab4b5abSMarkus Armbruster 
2398dbffbdcfSStefan Hajnoczi /**
2399dbffbdcfSStefan Hajnoczi  * Remove an active request from the tracked requests list
2400dbffbdcfSStefan Hajnoczi  *
2401dbffbdcfSStefan Hajnoczi  * This function should be called when a tracked request is completing.
2402dbffbdcfSStefan Hajnoczi  */
2403dbffbdcfSStefan Hajnoczi static void tracked_request_end(BdrvTrackedRequest *req)
2404dbffbdcfSStefan Hajnoczi {
24052dbafdc0SKevin Wolf     if (req->serialising) {
24062dbafdc0SKevin Wolf         req->bs->serialising_in_flight--;
24072dbafdc0SKevin Wolf     }
24082dbafdc0SKevin Wolf 
2409dbffbdcfSStefan Hajnoczi     QLIST_REMOVE(req, list);
2410f4658285SStefan Hajnoczi     qemu_co_queue_restart_all(&req->wait_queue);
2411dbffbdcfSStefan Hajnoczi }
2412dbffbdcfSStefan Hajnoczi 
2413dbffbdcfSStefan Hajnoczi /**
2414dbffbdcfSStefan Hajnoczi  * Add an active request to the tracked requests list
2415dbffbdcfSStefan Hajnoczi  */
2416dbffbdcfSStefan Hajnoczi static void tracked_request_begin(BdrvTrackedRequest *req,
2417dbffbdcfSStefan Hajnoczi                                   BlockDriverState *bs,
2418793ed47aSKevin Wolf                                   int64_t offset,
2419793ed47aSKevin Wolf                                   unsigned int bytes, bool is_write)
2420dbffbdcfSStefan Hajnoczi {
2421dbffbdcfSStefan Hajnoczi     *req = (BdrvTrackedRequest){
2422dbffbdcfSStefan Hajnoczi         .bs = bs,
2423793ed47aSKevin Wolf         .offset         = offset,
2424793ed47aSKevin Wolf         .bytes          = bytes,
2425dbffbdcfSStefan Hajnoczi         .is_write       = is_write,
24265f8b6491SStefan Hajnoczi         .co             = qemu_coroutine_self(),
24272dbafdc0SKevin Wolf         .serialising    = false,
24287327145fSKevin Wolf         .overlap_offset = offset,
24297327145fSKevin Wolf         .overlap_bytes  = bytes,
2430dbffbdcfSStefan Hajnoczi     };
2431dbffbdcfSStefan Hajnoczi 
2432f4658285SStefan Hajnoczi     qemu_co_queue_init(&req->wait_queue);
2433f4658285SStefan Hajnoczi 
2434dbffbdcfSStefan Hajnoczi     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2435dbffbdcfSStefan Hajnoczi }
2436dbffbdcfSStefan Hajnoczi 
2437e96126ffSKevin Wolf static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
24382dbafdc0SKevin Wolf {
24397327145fSKevin Wolf     int64_t overlap_offset = req->offset & ~(align - 1);
2440e96126ffSKevin Wolf     unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
24417327145fSKevin Wolf                                - overlap_offset;
24427327145fSKevin Wolf 
24432dbafdc0SKevin Wolf     if (!req->serialising) {
24442dbafdc0SKevin Wolf         req->bs->serialising_in_flight++;
24452dbafdc0SKevin Wolf         req->serialising = true;
24462dbafdc0SKevin Wolf     }
24477327145fSKevin Wolf 
24487327145fSKevin Wolf     req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
24497327145fSKevin Wolf     req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
24502dbafdc0SKevin Wolf }
24512dbafdc0SKevin Wolf 
2452d83947acSStefan Hajnoczi /**
2453d83947acSStefan Hajnoczi  * Round a region to cluster boundaries
2454d83947acSStefan Hajnoczi  */
2455343bded4SPaolo Bonzini void bdrv_round_to_clusters(BlockDriverState *bs,
2456d83947acSStefan Hajnoczi                             int64_t sector_num, int nb_sectors,
2457d83947acSStefan Hajnoczi                             int64_t *cluster_sector_num,
2458d83947acSStefan Hajnoczi                             int *cluster_nb_sectors)
2459d83947acSStefan Hajnoczi {
2460d83947acSStefan Hajnoczi     BlockDriverInfo bdi;
2461d83947acSStefan Hajnoczi 
2462d83947acSStefan Hajnoczi     if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2463d83947acSStefan Hajnoczi         *cluster_sector_num = sector_num;
2464d83947acSStefan Hajnoczi         *cluster_nb_sectors = nb_sectors;
2465d83947acSStefan Hajnoczi     } else {
2466d83947acSStefan Hajnoczi         int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2467d83947acSStefan Hajnoczi         *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2468d83947acSStefan Hajnoczi         *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2469d83947acSStefan Hajnoczi                                             nb_sectors, c);
2470d83947acSStefan Hajnoczi     }
2471d83947acSStefan Hajnoczi }
2472d83947acSStefan Hajnoczi 
24737327145fSKevin Wolf static int bdrv_get_cluster_size(BlockDriverState *bs)
2474793ed47aSKevin Wolf {
2475793ed47aSKevin Wolf     BlockDriverInfo bdi;
24767327145fSKevin Wolf     int ret;
2477793ed47aSKevin Wolf 
24787327145fSKevin Wolf     ret = bdrv_get_info(bs, &bdi);
24797327145fSKevin Wolf     if (ret < 0 || bdi.cluster_size == 0) {
24807327145fSKevin Wolf         return bs->request_alignment;
2481793ed47aSKevin Wolf     } else {
24827327145fSKevin Wolf         return bdi.cluster_size;
2483793ed47aSKevin Wolf     }
2484793ed47aSKevin Wolf }
2485793ed47aSKevin Wolf 
2486f4658285SStefan Hajnoczi static bool tracked_request_overlaps(BdrvTrackedRequest *req,
2487793ed47aSKevin Wolf                                      int64_t offset, unsigned int bytes)
2488793ed47aSKevin Wolf {
2489d83947acSStefan Hajnoczi     /*        aaaa   bbbb */
24907327145fSKevin Wolf     if (offset >= req->overlap_offset + req->overlap_bytes) {
2491d83947acSStefan Hajnoczi         return false;
2492d83947acSStefan Hajnoczi     }
2493d83947acSStefan Hajnoczi     /* bbbb   aaaa        */
24947327145fSKevin Wolf     if (req->overlap_offset >= offset + bytes) {
2495d83947acSStefan Hajnoczi         return false;
2496d83947acSStefan Hajnoczi     }
2497d83947acSStefan Hajnoczi     return true;
2498f4658285SStefan Hajnoczi }
2499f4658285SStefan Hajnoczi 
250028de2dcdSKevin Wolf static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
2501f4658285SStefan Hajnoczi {
25022dbafdc0SKevin Wolf     BlockDriverState *bs = self->bs;
2503f4658285SStefan Hajnoczi     BdrvTrackedRequest *req;
2504f4658285SStefan Hajnoczi     bool retry;
250528de2dcdSKevin Wolf     bool waited = false;
2506f4658285SStefan Hajnoczi 
25072dbafdc0SKevin Wolf     if (!bs->serialising_in_flight) {
250828de2dcdSKevin Wolf         return false;
25092dbafdc0SKevin Wolf     }
25102dbafdc0SKevin Wolf 
2511f4658285SStefan Hajnoczi     do {
2512f4658285SStefan Hajnoczi         retry = false;
2513f4658285SStefan Hajnoczi         QLIST_FOREACH(req, &bs->tracked_requests, list) {
25142dbafdc0SKevin Wolf             if (req == self || (!req->serialising && !self->serialising)) {
251565afd211SKevin Wolf                 continue;
251665afd211SKevin Wolf             }
25177327145fSKevin Wolf             if (tracked_request_overlaps(req, self->overlap_offset,
25187327145fSKevin Wolf                                          self->overlap_bytes))
25197327145fSKevin Wolf             {
25205f8b6491SStefan Hajnoczi                 /* Hitting this means there was a reentrant request, for
25215f8b6491SStefan Hajnoczi                  * example, a block driver issuing nested requests.  This must
25225f8b6491SStefan Hajnoczi                  * never happen since it means deadlock.
25235f8b6491SStefan Hajnoczi                  */
25245f8b6491SStefan Hajnoczi                 assert(qemu_coroutine_self() != req->co);
25255f8b6491SStefan Hajnoczi 
25266460440fSKevin Wolf                 /* If the request is already (indirectly) waiting for us, or
25276460440fSKevin Wolf                  * will wait for us as soon as it wakes up, then just go on
25286460440fSKevin Wolf                  * (instead of producing a deadlock in the former case). */
25296460440fSKevin Wolf                 if (!req->waiting_for) {
25306460440fSKevin Wolf                     self->waiting_for = req;
2531f4658285SStefan Hajnoczi                     qemu_co_queue_wait(&req->wait_queue);
25326460440fSKevin Wolf                     self->waiting_for = NULL;
2533f4658285SStefan Hajnoczi                     retry = true;
253428de2dcdSKevin Wolf                     waited = true;
2535f4658285SStefan Hajnoczi                     break;
2536f4658285SStefan Hajnoczi                 }
2537f4658285SStefan Hajnoczi             }
25386460440fSKevin Wolf         }
2539f4658285SStefan Hajnoczi     } while (retry);
254028de2dcdSKevin Wolf 
254128de2dcdSKevin Wolf     return waited;
2542f4658285SStefan Hajnoczi }
2543f4658285SStefan Hajnoczi 
2544756e6736SKevin Wolf /*
2545756e6736SKevin Wolf  * Return values:
2546756e6736SKevin Wolf  * 0        - success
2547756e6736SKevin Wolf  * -EINVAL  - backing format specified, but no file
2548756e6736SKevin Wolf  * -ENOSPC  - can't update the backing file because no space is left in the
2549756e6736SKevin Wolf  *            image file header
2550756e6736SKevin Wolf  * -ENOTSUP - format driver doesn't support changing the backing file
2551756e6736SKevin Wolf  */
2552756e6736SKevin Wolf int bdrv_change_backing_file(BlockDriverState *bs,
2553756e6736SKevin Wolf     const char *backing_file, const char *backing_fmt)
2554756e6736SKevin Wolf {
2555756e6736SKevin Wolf     BlockDriver *drv = bs->drv;
2556469ef350SPaolo Bonzini     int ret;
2557756e6736SKevin Wolf 
25585f377794SPaolo Bonzini     /* Backing file format doesn't make sense without a backing file */
25595f377794SPaolo Bonzini     if (backing_fmt && !backing_file) {
25605f377794SPaolo Bonzini         return -EINVAL;
25615f377794SPaolo Bonzini     }
25625f377794SPaolo Bonzini 
2563756e6736SKevin Wolf     if (drv->bdrv_change_backing_file != NULL) {
2564469ef350SPaolo Bonzini         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2565756e6736SKevin Wolf     } else {
2566469ef350SPaolo Bonzini         ret = -ENOTSUP;
2567756e6736SKevin Wolf     }
2568469ef350SPaolo Bonzini 
2569469ef350SPaolo Bonzini     if (ret == 0) {
2570469ef350SPaolo Bonzini         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2571469ef350SPaolo Bonzini         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2572469ef350SPaolo Bonzini     }
2573469ef350SPaolo Bonzini     return ret;
2574756e6736SKevin Wolf }
2575756e6736SKevin Wolf 
25766ebdcee2SJeff Cody /*
25776ebdcee2SJeff Cody  * Finds the image layer in the chain that has 'bs' as its backing file.
25786ebdcee2SJeff Cody  *
25796ebdcee2SJeff Cody  * active is the current topmost image.
25806ebdcee2SJeff Cody  *
25816ebdcee2SJeff Cody  * Returns NULL if bs is not found in active's image chain,
25826ebdcee2SJeff Cody  * or if active == bs.
25834caf0fcdSJeff Cody  *
25844caf0fcdSJeff Cody  * Returns the bottommost base image if bs == NULL.
25856ebdcee2SJeff Cody  */
25866ebdcee2SJeff Cody BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
25876ebdcee2SJeff Cody                                     BlockDriverState *bs)
25886ebdcee2SJeff Cody {
25894caf0fcdSJeff Cody     while (active && bs != active->backing_hd) {
25904caf0fcdSJeff Cody         active = active->backing_hd;
25916ebdcee2SJeff Cody     }
25926ebdcee2SJeff Cody 
25934caf0fcdSJeff Cody     return active;
25946ebdcee2SJeff Cody }
25956ebdcee2SJeff Cody 
25964caf0fcdSJeff Cody /* Given a BDS, searches for the base layer. */
25974caf0fcdSJeff Cody BlockDriverState *bdrv_find_base(BlockDriverState *bs)
25984caf0fcdSJeff Cody {
25994caf0fcdSJeff Cody     return bdrv_find_overlay(bs, NULL);
26006ebdcee2SJeff Cody }
26016ebdcee2SJeff Cody 
26026ebdcee2SJeff Cody typedef struct BlkIntermediateStates {
26036ebdcee2SJeff Cody     BlockDriverState *bs;
26046ebdcee2SJeff Cody     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
26056ebdcee2SJeff Cody } BlkIntermediateStates;
26066ebdcee2SJeff Cody 
26076ebdcee2SJeff Cody 
26086ebdcee2SJeff Cody /*
26096ebdcee2SJeff Cody  * Drops images above 'base' up to and including 'top', and sets the image
26106ebdcee2SJeff Cody  * above 'top' to have base as its backing file.
26116ebdcee2SJeff Cody  *
26126ebdcee2SJeff Cody  * Requires that the overlay to 'top' is opened r/w, so that the backing file
26136ebdcee2SJeff Cody  * information in 'bs' can be properly updated.
26146ebdcee2SJeff Cody  *
26156ebdcee2SJeff Cody  * E.g., this will convert the following chain:
26166ebdcee2SJeff Cody  * bottom <- base <- intermediate <- top <- active
26176ebdcee2SJeff Cody  *
26186ebdcee2SJeff Cody  * to
26196ebdcee2SJeff Cody  *
26206ebdcee2SJeff Cody  * bottom <- base <- active
26216ebdcee2SJeff Cody  *
26226ebdcee2SJeff Cody  * It is allowed for bottom==base, in which case it converts:
26236ebdcee2SJeff Cody  *
26246ebdcee2SJeff Cody  * base <- intermediate <- top <- active
26256ebdcee2SJeff Cody  *
26266ebdcee2SJeff Cody  * to
26276ebdcee2SJeff Cody  *
26286ebdcee2SJeff Cody  * base <- active
26296ebdcee2SJeff Cody  *
263054e26900SJeff Cody  * If backing_file_str is non-NULL, it will be used when modifying top's
263154e26900SJeff Cody  * overlay image metadata.
263254e26900SJeff Cody  *
26336ebdcee2SJeff Cody  * Error conditions:
26346ebdcee2SJeff Cody  *  if active == top, that is considered an error
26356ebdcee2SJeff Cody  *
26366ebdcee2SJeff Cody  */
26376ebdcee2SJeff Cody int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
263854e26900SJeff Cody                            BlockDriverState *base, const char *backing_file_str)
26396ebdcee2SJeff Cody {
26406ebdcee2SJeff Cody     BlockDriverState *intermediate;
26416ebdcee2SJeff Cody     BlockDriverState *base_bs = NULL;
26426ebdcee2SJeff Cody     BlockDriverState *new_top_bs = NULL;
26436ebdcee2SJeff Cody     BlkIntermediateStates *intermediate_state, *next;
26446ebdcee2SJeff Cody     int ret = -EIO;
26456ebdcee2SJeff Cody 
26466ebdcee2SJeff Cody     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
26476ebdcee2SJeff Cody     QSIMPLEQ_INIT(&states_to_delete);
26486ebdcee2SJeff Cody 
26496ebdcee2SJeff Cody     if (!top->drv || !base->drv) {
26506ebdcee2SJeff Cody         goto exit;
26516ebdcee2SJeff Cody     }
26526ebdcee2SJeff Cody 
26536ebdcee2SJeff Cody     new_top_bs = bdrv_find_overlay(active, top);
26546ebdcee2SJeff Cody 
26556ebdcee2SJeff Cody     if (new_top_bs == NULL) {
26566ebdcee2SJeff Cody         /* we could not find the image above 'top', this is an error */
26576ebdcee2SJeff Cody         goto exit;
26586ebdcee2SJeff Cody     }
26596ebdcee2SJeff Cody 
26606ebdcee2SJeff Cody     /* special case of new_top_bs->backing_hd already pointing to base - nothing
26616ebdcee2SJeff Cody      * to do, no intermediate images */
26626ebdcee2SJeff Cody     if (new_top_bs->backing_hd == base) {
26636ebdcee2SJeff Cody         ret = 0;
26646ebdcee2SJeff Cody         goto exit;
26656ebdcee2SJeff Cody     }
26666ebdcee2SJeff Cody 
26676ebdcee2SJeff Cody     intermediate = top;
26686ebdcee2SJeff Cody 
26696ebdcee2SJeff Cody     /* now we will go down through the list, and add each BDS we find
26706ebdcee2SJeff Cody      * into our deletion queue, until we hit the 'base'
26716ebdcee2SJeff Cody      */
26726ebdcee2SJeff Cody     while (intermediate) {
26735839e53bSMarkus Armbruster         intermediate_state = g_new0(BlkIntermediateStates, 1);
26746ebdcee2SJeff Cody         intermediate_state->bs = intermediate;
26756ebdcee2SJeff Cody         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
26766ebdcee2SJeff Cody 
26776ebdcee2SJeff Cody         if (intermediate->backing_hd == base) {
26786ebdcee2SJeff Cody             base_bs = intermediate->backing_hd;
26796ebdcee2SJeff Cody             break;
26806ebdcee2SJeff Cody         }
26816ebdcee2SJeff Cody         intermediate = intermediate->backing_hd;
26826ebdcee2SJeff Cody     }
26836ebdcee2SJeff Cody     if (base_bs == NULL) {
26846ebdcee2SJeff Cody         /* something went wrong, we did not end at the base. safely
26856ebdcee2SJeff Cody          * unravel everything, and exit with error */
26866ebdcee2SJeff Cody         goto exit;
26876ebdcee2SJeff Cody     }
26886ebdcee2SJeff Cody 
26896ebdcee2SJeff Cody     /* success - we can delete the intermediate states, and link top->base */
269054e26900SJeff Cody     backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
269154e26900SJeff Cody     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
26926ebdcee2SJeff Cody                                    base_bs->drv ? base_bs->drv->format_name : "");
26936ebdcee2SJeff Cody     if (ret) {
26946ebdcee2SJeff Cody         goto exit;
26956ebdcee2SJeff Cody     }
2696920beae1SFam Zheng     bdrv_set_backing_hd(new_top_bs, base_bs);
26976ebdcee2SJeff Cody 
26986ebdcee2SJeff Cody     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
26996ebdcee2SJeff Cody         /* so that bdrv_close() does not recursively close the chain */
2700920beae1SFam Zheng         bdrv_set_backing_hd(intermediate_state->bs, NULL);
27014f6fd349SFam Zheng         bdrv_unref(intermediate_state->bs);
27026ebdcee2SJeff Cody     }
27036ebdcee2SJeff Cody     ret = 0;
27046ebdcee2SJeff Cody 
27056ebdcee2SJeff Cody exit:
27066ebdcee2SJeff Cody     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
27076ebdcee2SJeff Cody         g_free(intermediate_state);
27086ebdcee2SJeff Cody     }
27096ebdcee2SJeff Cody     return ret;
27106ebdcee2SJeff Cody }
27116ebdcee2SJeff Cody 
27126ebdcee2SJeff Cody 
271371d0770cSaliguori static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
271471d0770cSaliguori                                    size_t size)
271571d0770cSaliguori {
271675af1f34SPeter Lieven     if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
27171dd3a447SKevin Wolf         return -EIO;
27181dd3a447SKevin Wolf     }
27191dd3a447SKevin Wolf 
2720c0191e76SMax Reitz     if (!bdrv_is_inserted(bs)) {
272171d0770cSaliguori         return -ENOMEDIUM;
2722c0191e76SMax Reitz     }
272371d0770cSaliguori 
2724c0191e76SMax Reitz     if (offset < 0) {
2725fbb7b4e0SKevin Wolf         return -EIO;
2726c0191e76SMax Reitz     }
272771d0770cSaliguori 
272871d0770cSaliguori     return 0;
272971d0770cSaliguori }
273071d0770cSaliguori 
273171d0770cSaliguori static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
273271d0770cSaliguori                               int nb_sectors)
273371d0770cSaliguori {
273475af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
27358f4754edSKevin Wolf         return -EIO;
27368f4754edSKevin Wolf     }
27378f4754edSKevin Wolf 
2738eb5a3165SJes Sorensen     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2739eb5a3165SJes Sorensen                                    nb_sectors * BDRV_SECTOR_SIZE);
274071d0770cSaliguori }
274171d0770cSaliguori 
27421c9805a3SStefan Hajnoczi typedef struct RwCo {
27431c9805a3SStefan Hajnoczi     BlockDriverState *bs;
2744775aa8b6SKevin Wolf     int64_t offset;
27451c9805a3SStefan Hajnoczi     QEMUIOVector *qiov;
27461c9805a3SStefan Hajnoczi     bool is_write;
27471c9805a3SStefan Hajnoczi     int ret;
27484105eaaaSPeter Lieven     BdrvRequestFlags flags;
27491c9805a3SStefan Hajnoczi } RwCo;
27501c9805a3SStefan Hajnoczi 
27511c9805a3SStefan Hajnoczi static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2752fc01f7e7Sbellard {
27531c9805a3SStefan Hajnoczi     RwCo *rwco = opaque;
2754fc01f7e7Sbellard 
27551c9805a3SStefan Hajnoczi     if (!rwco->is_write) {
2756775aa8b6SKevin Wolf         rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2757775aa8b6SKevin Wolf                                       rwco->qiov->size, rwco->qiov,
27584105eaaaSPeter Lieven                                       rwco->flags);
27591c9805a3SStefan Hajnoczi     } else {
2760775aa8b6SKevin Wolf         rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2761775aa8b6SKevin Wolf                                        rwco->qiov->size, rwco->qiov,
27624105eaaaSPeter Lieven                                        rwco->flags);
27631c9805a3SStefan Hajnoczi     }
27641c9805a3SStefan Hajnoczi }
2765e7a8a783SKevin Wolf 
27661c9805a3SStefan Hajnoczi /*
27678d3b1a2dSKevin Wolf  * Process a vectored synchronous request using coroutines
27681c9805a3SStefan Hajnoczi  */
2769775aa8b6SKevin Wolf static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
27704105eaaaSPeter Lieven                         QEMUIOVector *qiov, bool is_write,
27714105eaaaSPeter Lieven                         BdrvRequestFlags flags)
27721c9805a3SStefan Hajnoczi {
27731c9805a3SStefan Hajnoczi     Coroutine *co;
27741c9805a3SStefan Hajnoczi     RwCo rwco = {
27751c9805a3SStefan Hajnoczi         .bs = bs,
2776775aa8b6SKevin Wolf         .offset = offset,
27778d3b1a2dSKevin Wolf         .qiov = qiov,
27781c9805a3SStefan Hajnoczi         .is_write = is_write,
27791c9805a3SStefan Hajnoczi         .ret = NOT_DONE,
27804105eaaaSPeter Lieven         .flags = flags,
27811c9805a3SStefan Hajnoczi     };
27821c9805a3SStefan Hajnoczi 
2783498e386cSZhi Yong Wu     /**
2784498e386cSZhi Yong Wu      * In sync call context, when the vcpu is blocked, this throttling timer
2785498e386cSZhi Yong Wu      * will not fire; so the I/O throttling function has to be disabled here
2786498e386cSZhi Yong Wu      * if it has been enabled.
2787498e386cSZhi Yong Wu      */
2788498e386cSZhi Yong Wu     if (bs->io_limits_enabled) {
2789498e386cSZhi Yong Wu         fprintf(stderr, "Disabling I/O throttling on '%s' due "
2790498e386cSZhi Yong Wu                         "to synchronous I/O.\n", bdrv_get_device_name(bs));
2791498e386cSZhi Yong Wu         bdrv_io_limits_disable(bs);
2792498e386cSZhi Yong Wu     }
2793498e386cSZhi Yong Wu 
27941c9805a3SStefan Hajnoczi     if (qemu_in_coroutine()) {
27951c9805a3SStefan Hajnoczi         /* Fast-path if already in coroutine context */
27961c9805a3SStefan Hajnoczi         bdrv_rw_co_entry(&rwco);
27971c9805a3SStefan Hajnoczi     } else {
27982572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
27992572b37aSStefan Hajnoczi 
28001c9805a3SStefan Hajnoczi         co = qemu_coroutine_create(bdrv_rw_co_entry);
28011c9805a3SStefan Hajnoczi         qemu_coroutine_enter(co, &rwco);
28021c9805a3SStefan Hajnoczi         while (rwco.ret == NOT_DONE) {
28032572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
28041c9805a3SStefan Hajnoczi         }
28051c9805a3SStefan Hajnoczi     }
28061c9805a3SStefan Hajnoczi     return rwco.ret;
2807e7a8a783SKevin Wolf }
2808e7a8a783SKevin Wolf 
28098d3b1a2dSKevin Wolf /*
28108d3b1a2dSKevin Wolf  * Process a synchronous request using coroutines
28118d3b1a2dSKevin Wolf  */
28128d3b1a2dSKevin Wolf static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
28134105eaaaSPeter Lieven                       int nb_sectors, bool is_write, BdrvRequestFlags flags)
28148d3b1a2dSKevin Wolf {
28158d3b1a2dSKevin Wolf     QEMUIOVector qiov;
28168d3b1a2dSKevin Wolf     struct iovec iov = {
28178d3b1a2dSKevin Wolf         .iov_base = (void *)buf,
28188d3b1a2dSKevin Wolf         .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
28198d3b1a2dSKevin Wolf     };
28208d3b1a2dSKevin Wolf 
282175af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
2822da15ee51SKevin Wolf         return -EINVAL;
2823da15ee51SKevin Wolf     }
2824da15ee51SKevin Wolf 
28258d3b1a2dSKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
2826775aa8b6SKevin Wolf     return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2827775aa8b6SKevin Wolf                         &qiov, is_write, flags);
28288d3b1a2dSKevin Wolf }
28298d3b1a2dSKevin Wolf 
28301c9805a3SStefan Hajnoczi /* return < 0 if error. See bdrv_write() for the return codes */
28311c9805a3SStefan Hajnoczi int bdrv_read(BlockDriverState *bs, int64_t sector_num,
28321c9805a3SStefan Hajnoczi               uint8_t *buf, int nb_sectors)
28331c9805a3SStefan Hajnoczi {
28344105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
283583f64091Sbellard }
2836fc01f7e7Sbellard 
283707d27a44SMarkus Armbruster /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
283807d27a44SMarkus Armbruster int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
283907d27a44SMarkus Armbruster                           uint8_t *buf, int nb_sectors)
284007d27a44SMarkus Armbruster {
284107d27a44SMarkus Armbruster     bool enabled;
284207d27a44SMarkus Armbruster     int ret;
284307d27a44SMarkus Armbruster 
284407d27a44SMarkus Armbruster     enabled = bs->io_limits_enabled;
284507d27a44SMarkus Armbruster     bs->io_limits_enabled = false;
28464e7395e8SPeter Lieven     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
284707d27a44SMarkus Armbruster     bs->io_limits_enabled = enabled;
284807d27a44SMarkus Armbruster     return ret;
284907d27a44SMarkus Armbruster }
285007d27a44SMarkus Armbruster 
285119cb3738Sbellard /* Return < 0 if error. Important errors are:
285219cb3738Sbellard   -EIO         generic I/O error (may happen for all errors)
285319cb3738Sbellard   -ENOMEDIUM   No media inserted.
285419cb3738Sbellard   -EINVAL      Invalid sector number or nb_sectors
285519cb3738Sbellard   -EACCES      Trying to write a read-only device
285619cb3738Sbellard */
2857fc01f7e7Sbellard int bdrv_write(BlockDriverState *bs, int64_t sector_num,
2858fc01f7e7Sbellard                const uint8_t *buf, int nb_sectors)
2859fc01f7e7Sbellard {
28604105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
286183f64091Sbellard }
286283f64091Sbellard 
2863aa7bfbffSPeter Lieven int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2864aa7bfbffSPeter Lieven                       int nb_sectors, BdrvRequestFlags flags)
28654105eaaaSPeter Lieven {
28664105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
2867aa7bfbffSPeter Lieven                       BDRV_REQ_ZERO_WRITE | flags);
28688d3b1a2dSKevin Wolf }
28698d3b1a2dSKevin Wolf 
2870d75cbb5eSPeter Lieven /*
2871d75cbb5eSPeter Lieven  * Completely zero out a block device with the help of bdrv_write_zeroes.
2872d75cbb5eSPeter Lieven  * The operation is sped up by checking the block status and only writing
2873d75cbb5eSPeter Lieven  * zeroes to the device if they currently do not return zeroes. Optional
2874d75cbb5eSPeter Lieven  * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2875d75cbb5eSPeter Lieven  *
2876d75cbb5eSPeter Lieven  * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2877d75cbb5eSPeter Lieven  */
2878d75cbb5eSPeter Lieven int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2879d75cbb5eSPeter Lieven {
2880d32f7c10SMarkus Armbruster     int64_t target_sectors, ret, nb_sectors, sector_num = 0;
2881d75cbb5eSPeter Lieven     int n;
2882d75cbb5eSPeter Lieven 
2883d32f7c10SMarkus Armbruster     target_sectors = bdrv_nb_sectors(bs);
2884d32f7c10SMarkus Armbruster     if (target_sectors < 0) {
2885d32f7c10SMarkus Armbruster         return target_sectors;
28869ce10c0bSKevin Wolf     }
28879ce10c0bSKevin Wolf 
2888d75cbb5eSPeter Lieven     for (;;) {
288975af1f34SPeter Lieven         nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
2890d75cbb5eSPeter Lieven         if (nb_sectors <= 0) {
2891d75cbb5eSPeter Lieven             return 0;
2892d75cbb5eSPeter Lieven         }
2893d75cbb5eSPeter Lieven         ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
28943d94ce60SPeter Lieven         if (ret < 0) {
28953d94ce60SPeter Lieven             error_report("error getting block status at sector %" PRId64 ": %s",
28963d94ce60SPeter Lieven                          sector_num, strerror(-ret));
28973d94ce60SPeter Lieven             return ret;
28983d94ce60SPeter Lieven         }
2899d75cbb5eSPeter Lieven         if (ret & BDRV_BLOCK_ZERO) {
2900d75cbb5eSPeter Lieven             sector_num += n;
2901d75cbb5eSPeter Lieven             continue;
2902d75cbb5eSPeter Lieven         }
2903d75cbb5eSPeter Lieven         ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2904d75cbb5eSPeter Lieven         if (ret < 0) {
2905d75cbb5eSPeter Lieven             error_report("error writing zeroes at sector %" PRId64 ": %s",
2906d75cbb5eSPeter Lieven                          sector_num, strerror(-ret));
2907d75cbb5eSPeter Lieven             return ret;
2908d75cbb5eSPeter Lieven         }
2909d75cbb5eSPeter Lieven         sector_num += n;
2910d75cbb5eSPeter Lieven     }
2911d75cbb5eSPeter Lieven }
2912d75cbb5eSPeter Lieven 
2913a3ef6571SKevin Wolf int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
291483f64091Sbellard {
2915a3ef6571SKevin Wolf     QEMUIOVector qiov;
2916a3ef6571SKevin Wolf     struct iovec iov = {
2917a3ef6571SKevin Wolf         .iov_base = (void *)buf,
2918a3ef6571SKevin Wolf         .iov_len = bytes,
2919a3ef6571SKevin Wolf     };
29209a8c4cceSKevin Wolf     int ret;
292183f64091Sbellard 
2922a3ef6571SKevin Wolf     if (bytes < 0) {
2923a3ef6571SKevin Wolf         return -EINVAL;
292483f64091Sbellard     }
292583f64091Sbellard 
2926a3ef6571SKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
2927a3ef6571SKevin Wolf     ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2928a3ef6571SKevin Wolf     if (ret < 0) {
29299a8c4cceSKevin Wolf         return ret;
293083f64091Sbellard     }
293183f64091Sbellard 
2932a3ef6571SKevin Wolf     return bytes;
293383f64091Sbellard }
293483f64091Sbellard 
29358d3b1a2dSKevin Wolf int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
293683f64091Sbellard {
29379a8c4cceSKevin Wolf     int ret;
293883f64091Sbellard 
29398407d5d7SKevin Wolf     ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
29408d3b1a2dSKevin Wolf     if (ret < 0) {
29419a8c4cceSKevin Wolf         return ret;
29428d3b1a2dSKevin Wolf     }
29438d3b1a2dSKevin Wolf 
29448d3b1a2dSKevin Wolf     return qiov->size;
29458d3b1a2dSKevin Wolf }
29468d3b1a2dSKevin Wolf 
29478d3b1a2dSKevin Wolf int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
29488407d5d7SKevin Wolf                 const void *buf, int bytes)
29498d3b1a2dSKevin Wolf {
29508d3b1a2dSKevin Wolf     QEMUIOVector qiov;
29518d3b1a2dSKevin Wolf     struct iovec iov = {
29528d3b1a2dSKevin Wolf         .iov_base   = (void *) buf,
29538407d5d7SKevin Wolf         .iov_len    = bytes,
29548d3b1a2dSKevin Wolf     };
29558d3b1a2dSKevin Wolf 
29568407d5d7SKevin Wolf     if (bytes < 0) {
29578407d5d7SKevin Wolf         return -EINVAL;
29588407d5d7SKevin Wolf     }
29598407d5d7SKevin Wolf 
29608d3b1a2dSKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
29618d3b1a2dSKevin Wolf     return bdrv_pwritev(bs, offset, &qiov);
296283f64091Sbellard }
296383f64091Sbellard 
2964f08145feSKevin Wolf /*
2965f08145feSKevin Wolf  * Writes to the file and ensures that no writes are reordered across this
2966f08145feSKevin Wolf  * request (acts as a barrier)
2967f08145feSKevin Wolf  *
2968f08145feSKevin Wolf  * Returns 0 on success, -errno in error cases.
2969f08145feSKevin Wolf  */
2970f08145feSKevin Wolf int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2971f08145feSKevin Wolf     const void *buf, int count)
2972f08145feSKevin Wolf {
2973f08145feSKevin Wolf     int ret;
2974f08145feSKevin Wolf 
2975f08145feSKevin Wolf     ret = bdrv_pwrite(bs, offset, buf, count);
2976f08145feSKevin Wolf     if (ret < 0) {
2977f08145feSKevin Wolf         return ret;
2978f08145feSKevin Wolf     }
2979f08145feSKevin Wolf 
2980f05fa4adSPaolo Bonzini     /* No flush needed for cache modes that already do it */
2981f05fa4adSPaolo Bonzini     if (bs->enable_write_cache) {
2982f08145feSKevin Wolf         bdrv_flush(bs);
2983f08145feSKevin Wolf     }
2984f08145feSKevin Wolf 
2985f08145feSKevin Wolf     return 0;
2986f08145feSKevin Wolf }
2987f08145feSKevin Wolf 
2988470c0504SStefan Hajnoczi static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
2989ab185921SStefan Hajnoczi         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2990ab185921SStefan Hajnoczi {
2991ab185921SStefan Hajnoczi     /* Perform I/O through a temporary buffer so that users who scribble over
2992ab185921SStefan Hajnoczi      * their read buffer while the operation is in progress do not end up
2993ab185921SStefan Hajnoczi      * modifying the image file.  This is critical for zero-copy guest I/O
2994ab185921SStefan Hajnoczi      * where anything might happen inside guest memory.
2995ab185921SStefan Hajnoczi      */
2996ab185921SStefan Hajnoczi     void *bounce_buffer;
2997ab185921SStefan Hajnoczi 
299879c053bdSStefan Hajnoczi     BlockDriver *drv = bs->drv;
2999ab185921SStefan Hajnoczi     struct iovec iov;
3000ab185921SStefan Hajnoczi     QEMUIOVector bounce_qiov;
3001ab185921SStefan Hajnoczi     int64_t cluster_sector_num;
3002ab185921SStefan Hajnoczi     int cluster_nb_sectors;
3003ab185921SStefan Hajnoczi     size_t skip_bytes;
3004ab185921SStefan Hajnoczi     int ret;
3005ab185921SStefan Hajnoczi 
3006ab185921SStefan Hajnoczi     /* Cover entire cluster so no additional backing file I/O is required when
3007ab185921SStefan Hajnoczi      * allocating cluster in the image file.
3008ab185921SStefan Hajnoczi      */
3009343bded4SPaolo Bonzini     bdrv_round_to_clusters(bs, sector_num, nb_sectors,
3010ab185921SStefan Hajnoczi                            &cluster_sector_num, &cluster_nb_sectors);
3011ab185921SStefan Hajnoczi 
3012470c0504SStefan Hajnoczi     trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3013ab185921SStefan Hajnoczi                                    cluster_sector_num, cluster_nb_sectors);
3014ab185921SStefan Hajnoczi 
3015ab185921SStefan Hajnoczi     iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
3016857d4f46SKevin Wolf     iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3017857d4f46SKevin Wolf     if (bounce_buffer == NULL) {
3018857d4f46SKevin Wolf         ret = -ENOMEM;
3019857d4f46SKevin Wolf         goto err;
3020857d4f46SKevin Wolf     }
3021857d4f46SKevin Wolf 
3022ab185921SStefan Hajnoczi     qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3023ab185921SStefan Hajnoczi 
302479c053bdSStefan Hajnoczi     ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3025ab185921SStefan Hajnoczi                              &bounce_qiov);
3026ab185921SStefan Hajnoczi     if (ret < 0) {
3027ab185921SStefan Hajnoczi         goto err;
3028ab185921SStefan Hajnoczi     }
3029ab185921SStefan Hajnoczi 
303079c053bdSStefan Hajnoczi     if (drv->bdrv_co_write_zeroes &&
303179c053bdSStefan Hajnoczi         buffer_is_zero(bounce_buffer, iov.iov_len)) {
3032621f0589SKevin Wolf         ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
3033aa7bfbffSPeter Lieven                                       cluster_nb_sectors, 0);
303479c053bdSStefan Hajnoczi     } else {
3035f05fa4adSPaolo Bonzini         /* This does not change the data on the disk, it is not necessary
3036f05fa4adSPaolo Bonzini          * to flush even in cache=writethrough mode.
3037f05fa4adSPaolo Bonzini          */
303879c053bdSStefan Hajnoczi         ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
3039ab185921SStefan Hajnoczi                                   &bounce_qiov);
304079c053bdSStefan Hajnoczi     }
304179c053bdSStefan Hajnoczi 
3042ab185921SStefan Hajnoczi     if (ret < 0) {
3043ab185921SStefan Hajnoczi         /* It might be okay to ignore write errors for guest requests.  If this
3044ab185921SStefan Hajnoczi          * is a deliberate copy-on-read then we don't want to ignore the error.
3045ab185921SStefan Hajnoczi          * Simply report it in all cases.
3046ab185921SStefan Hajnoczi          */
3047ab185921SStefan Hajnoczi         goto err;
3048ab185921SStefan Hajnoczi     }
3049ab185921SStefan Hajnoczi 
3050ab185921SStefan Hajnoczi     skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
305103396148SMichael Tokarev     qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3052ab185921SStefan Hajnoczi                         nb_sectors * BDRV_SECTOR_SIZE);
3053ab185921SStefan Hajnoczi 
3054ab185921SStefan Hajnoczi err:
3055ab185921SStefan Hajnoczi     qemu_vfree(bounce_buffer);
3056ab185921SStefan Hajnoczi     return ret;
3057ab185921SStefan Hajnoczi }
3058ab185921SStefan Hajnoczi 
3059c5fbe571SStefan Hajnoczi /*
3060d0c7f642SKevin Wolf  * Forwards an already correctly aligned request to the BlockDriver. This
3061d0c7f642SKevin Wolf  * handles copy on read and zeroing after EOF; any other features must be
3062d0c7f642SKevin Wolf  * implemented by the caller.
3063c5fbe571SStefan Hajnoczi  */
3064d0c7f642SKevin Wolf static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
306565afd211SKevin Wolf     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3066ec746e10SKevin Wolf     int64_t align, QEMUIOVector *qiov, int flags)
3067da1fa91dSKevin Wolf {
3068da1fa91dSKevin Wolf     BlockDriver *drv = bs->drv;
3069dbffbdcfSStefan Hajnoczi     int ret;
3070da1fa91dSKevin Wolf 
3071d0c7f642SKevin Wolf     int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3072d0c7f642SKevin Wolf     unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3073da1fa91dSKevin Wolf 
3074d0c7f642SKevin Wolf     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3075d0c7f642SKevin Wolf     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
30768eb029c2SKevin Wolf     assert(!qiov || bytes == qiov->size);
3077d0c7f642SKevin Wolf 
3078d0c7f642SKevin Wolf     /* Handle Copy on Read and associated serialisation */
3079470c0504SStefan Hajnoczi     if (flags & BDRV_REQ_COPY_ON_READ) {
30807327145fSKevin Wolf         /* If we touch the same cluster it counts as an overlap.  This
30817327145fSKevin Wolf          * guarantees that allocating writes will be serialized and not race
30827327145fSKevin Wolf          * with each other for the same cluster.  For example, in copy-on-read
30837327145fSKevin Wolf          * it ensures that the CoR read and write operations are atomic and
30847327145fSKevin Wolf          * guest writes cannot interleave between them. */
30857327145fSKevin Wolf         mark_request_serialising(req, bdrv_get_cluster_size(bs));
3086470c0504SStefan Hajnoczi     }
3087470c0504SStefan Hajnoczi 
30882dbafdc0SKevin Wolf     wait_serialising_requests(req);
3089f4658285SStefan Hajnoczi 
3090470c0504SStefan Hajnoczi     if (flags & BDRV_REQ_COPY_ON_READ) {
3091ab185921SStefan Hajnoczi         int pnum;
3092ab185921SStefan Hajnoczi 
3093bdad13b9SPaolo Bonzini         ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
3094ab185921SStefan Hajnoczi         if (ret < 0) {
3095ab185921SStefan Hajnoczi             goto out;
3096ab185921SStefan Hajnoczi         }
3097ab185921SStefan Hajnoczi 
3098ab185921SStefan Hajnoczi         if (!ret || pnum != nb_sectors) {
3099470c0504SStefan Hajnoczi             ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
3100ab185921SStefan Hajnoczi             goto out;
3101ab185921SStefan Hajnoczi         }
3102ab185921SStefan Hajnoczi     }
3103ab185921SStefan Hajnoczi 
3104d0c7f642SKevin Wolf     /* Forward the request to the BlockDriver */
3105c0191e76SMax Reitz     if (!bs->zero_beyond_eof) {
3106dbffbdcfSStefan Hajnoczi         ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3107893a8f62SMORITA Kazutaka     } else {
3108c0191e76SMax Reitz         /* Read zeros after EOF */
31094049082cSMarkus Armbruster         int64_t total_sectors, max_nb_sectors;
3110893a8f62SMORITA Kazutaka 
31114049082cSMarkus Armbruster         total_sectors = bdrv_nb_sectors(bs);
31124049082cSMarkus Armbruster         if (total_sectors < 0) {
31134049082cSMarkus Armbruster             ret = total_sectors;
3114893a8f62SMORITA Kazutaka             goto out;
3115893a8f62SMORITA Kazutaka         }
3116893a8f62SMORITA Kazutaka 
31175f5bcd80SKevin Wolf         max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
31185f5bcd80SKevin Wolf                                   align >> BDRV_SECTOR_BITS);
3119e012b78cSPaolo Bonzini         if (nb_sectors < max_nb_sectors) {
3120e012b78cSPaolo Bonzini             ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3121e012b78cSPaolo Bonzini         } else if (max_nb_sectors > 0) {
312233f461e0SKevin Wolf             QEMUIOVector local_qiov;
312333f461e0SKevin Wolf 
312433f461e0SKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov);
312533f461e0SKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0,
3126e012b78cSPaolo Bonzini                               max_nb_sectors * BDRV_SECTOR_SIZE);
312733f461e0SKevin Wolf 
3128e012b78cSPaolo Bonzini             ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
312933f461e0SKevin Wolf                                      &local_qiov);
313033f461e0SKevin Wolf 
313133f461e0SKevin Wolf             qemu_iovec_destroy(&local_qiov);
3132893a8f62SMORITA Kazutaka         } else {
3133893a8f62SMORITA Kazutaka             ret = 0;
3134893a8f62SMORITA Kazutaka         }
3135893a8f62SMORITA Kazutaka 
3136893a8f62SMORITA Kazutaka         /* Reading beyond end of file is supposed to produce zeroes */
3137893a8f62SMORITA Kazutaka         if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3138893a8f62SMORITA Kazutaka             uint64_t offset = MAX(0, total_sectors - sector_num);
3139893a8f62SMORITA Kazutaka             uint64_t bytes = (sector_num + nb_sectors - offset) *
3140893a8f62SMORITA Kazutaka                               BDRV_SECTOR_SIZE;
3141893a8f62SMORITA Kazutaka             qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3142893a8f62SMORITA Kazutaka         }
3143893a8f62SMORITA Kazutaka     }
3144ab185921SStefan Hajnoczi 
3145ab185921SStefan Hajnoczi out:
3146dbffbdcfSStefan Hajnoczi     return ret;
3147da1fa91dSKevin Wolf }
3148da1fa91dSKevin Wolf 
3149fc3959e4SFam Zheng static inline uint64_t bdrv_get_align(BlockDriverState *bs)
3150fc3959e4SFam Zheng {
3151fc3959e4SFam Zheng     /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3152fc3959e4SFam Zheng     return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3153fc3959e4SFam Zheng }
3154fc3959e4SFam Zheng 
3155fc3959e4SFam Zheng static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
3156fc3959e4SFam Zheng                                        int64_t offset, size_t bytes)
3157fc3959e4SFam Zheng {
3158fc3959e4SFam Zheng     int64_t align = bdrv_get_align(bs);
3159fc3959e4SFam Zheng     return !(offset & (align - 1) || (bytes & (align - 1)));
3160fc3959e4SFam Zheng }
3161fc3959e4SFam Zheng 
3162d0c7f642SKevin Wolf /*
3163d0c7f642SKevin Wolf  * Handle a read request in coroutine context
3164d0c7f642SKevin Wolf  */
31651b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
31661b0288aeSKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3167d0c7f642SKevin Wolf     BdrvRequestFlags flags)
3168d0c7f642SKevin Wolf {
3169d0c7f642SKevin Wolf     BlockDriver *drv = bs->drv;
317065afd211SKevin Wolf     BdrvTrackedRequest req;
317165afd211SKevin Wolf 
3172fc3959e4SFam Zheng     uint64_t align = bdrv_get_align(bs);
31731b0288aeSKevin Wolf     uint8_t *head_buf = NULL;
31741b0288aeSKevin Wolf     uint8_t *tail_buf = NULL;
31751b0288aeSKevin Wolf     QEMUIOVector local_qiov;
31761b0288aeSKevin Wolf     bool use_local_qiov = false;
3177d0c7f642SKevin Wolf     int ret;
3178d0c7f642SKevin Wolf 
3179d0c7f642SKevin Wolf     if (!drv) {
3180d0c7f642SKevin Wolf         return -ENOMEDIUM;
3181d0c7f642SKevin Wolf     }
3182b9c64947SMax Reitz 
3183b9c64947SMax Reitz     ret = bdrv_check_byte_request(bs, offset, bytes);
3184b9c64947SMax Reitz     if (ret < 0) {
3185b9c64947SMax Reitz         return ret;
3186d0c7f642SKevin Wolf     }
3187d0c7f642SKevin Wolf 
3188d0c7f642SKevin Wolf     if (bs->copy_on_read) {
3189d0c7f642SKevin Wolf         flags |= BDRV_REQ_COPY_ON_READ;
3190d0c7f642SKevin Wolf     }
3191d0c7f642SKevin Wolf 
3192d0c7f642SKevin Wolf     /* throttling disk I/O */
3193d0c7f642SKevin Wolf     if (bs->io_limits_enabled) {
3194d5103588SKevin Wolf         bdrv_io_limits_intercept(bs, bytes, false);
3195d0c7f642SKevin Wolf     }
3196d0c7f642SKevin Wolf 
31971b0288aeSKevin Wolf     /* Align read if necessary by padding qiov */
31981b0288aeSKevin Wolf     if (offset & (align - 1)) {
31991b0288aeSKevin Wolf         head_buf = qemu_blockalign(bs, align);
32001b0288aeSKevin Wolf         qemu_iovec_init(&local_qiov, qiov->niov + 2);
32011b0288aeSKevin Wolf         qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
32021b0288aeSKevin Wolf         qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
32031b0288aeSKevin Wolf         use_local_qiov = true;
32041b0288aeSKevin Wolf 
32051b0288aeSKevin Wolf         bytes += offset & (align - 1);
32061b0288aeSKevin Wolf         offset = offset & ~(align - 1);
32071b0288aeSKevin Wolf     }
32081b0288aeSKevin Wolf 
32091b0288aeSKevin Wolf     if ((offset + bytes) & (align - 1)) {
32101b0288aeSKevin Wolf         if (!use_local_qiov) {
32111b0288aeSKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov + 1);
32121b0288aeSKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
32131b0288aeSKevin Wolf             use_local_qiov = true;
32141b0288aeSKevin Wolf         }
32151b0288aeSKevin Wolf         tail_buf = qemu_blockalign(bs, align);
32161b0288aeSKevin Wolf         qemu_iovec_add(&local_qiov, tail_buf,
32171b0288aeSKevin Wolf                        align - ((offset + bytes) & (align - 1)));
32181b0288aeSKevin Wolf 
32191b0288aeSKevin Wolf         bytes = ROUND_UP(bytes, align);
32201b0288aeSKevin Wolf     }
32211b0288aeSKevin Wolf 
322265afd211SKevin Wolf     tracked_request_begin(&req, bs, offset, bytes, false);
3223ec746e10SKevin Wolf     ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
32241b0288aeSKevin Wolf                               use_local_qiov ? &local_qiov : qiov,
32251b0288aeSKevin Wolf                               flags);
322665afd211SKevin Wolf     tracked_request_end(&req);
32271b0288aeSKevin Wolf 
32281b0288aeSKevin Wolf     if (use_local_qiov) {
32291b0288aeSKevin Wolf         qemu_iovec_destroy(&local_qiov);
32301b0288aeSKevin Wolf         qemu_vfree(head_buf);
32311b0288aeSKevin Wolf         qemu_vfree(tail_buf);
32321b0288aeSKevin Wolf     }
32331b0288aeSKevin Wolf 
3234d0c7f642SKevin Wolf     return ret;
3235d0c7f642SKevin Wolf }
3236d0c7f642SKevin Wolf 
32371b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
32381b0288aeSKevin Wolf     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
32391b0288aeSKevin Wolf     BdrvRequestFlags flags)
32401b0288aeSKevin Wolf {
324175af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
32421b0288aeSKevin Wolf         return -EINVAL;
32431b0288aeSKevin Wolf     }
32441b0288aeSKevin Wolf 
32451b0288aeSKevin Wolf     return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
32461b0288aeSKevin Wolf                              nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
32471b0288aeSKevin Wolf }
32481b0288aeSKevin Wolf 
3249c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
3250da1fa91dSKevin Wolf     int nb_sectors, QEMUIOVector *qiov)
3251da1fa91dSKevin Wolf {
3252c5fbe571SStefan Hajnoczi     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
3253da1fa91dSKevin Wolf 
3254470c0504SStefan Hajnoczi     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3255470c0504SStefan Hajnoczi }
3256470c0504SStefan Hajnoczi 
3257470c0504SStefan Hajnoczi int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3258470c0504SStefan Hajnoczi     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3259470c0504SStefan Hajnoczi {
3260470c0504SStefan Hajnoczi     trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3261470c0504SStefan Hajnoczi 
3262470c0504SStefan Hajnoczi     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3263470c0504SStefan Hajnoczi                             BDRV_REQ_COPY_ON_READ);
3264c5fbe571SStefan Hajnoczi }
3265c5fbe571SStefan Hajnoczi 
326698764152SPeter Lieven #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
3267c31cb707SPeter Lieven 
3268f08f2ddaSStefan Hajnoczi static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
3269aa7bfbffSPeter Lieven     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
3270f08f2ddaSStefan Hajnoczi {
3271f08f2ddaSStefan Hajnoczi     BlockDriver *drv = bs->drv;
3272f08f2ddaSStefan Hajnoczi     QEMUIOVector qiov;
3273c31cb707SPeter Lieven     struct iovec iov = {0};
3274c31cb707SPeter Lieven     int ret = 0;
3275f08f2ddaSStefan Hajnoczi 
327675af1f34SPeter Lieven     int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
327775af1f34SPeter Lieven                                         BDRV_REQUEST_MAX_SECTORS);
3278621f0589SKevin Wolf 
3279c31cb707SPeter Lieven     while (nb_sectors > 0 && !ret) {
3280c31cb707SPeter Lieven         int num = nb_sectors;
3281c31cb707SPeter Lieven 
3282b8d71c09SPaolo Bonzini         /* Align request.  Block drivers can expect the "bulk" of the request
3283b8d71c09SPaolo Bonzini          * to be aligned.
3284b8d71c09SPaolo Bonzini          */
3285b8d71c09SPaolo Bonzini         if (bs->bl.write_zeroes_alignment
3286b8d71c09SPaolo Bonzini             && num > bs->bl.write_zeroes_alignment) {
3287b8d71c09SPaolo Bonzini             if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3288b8d71c09SPaolo Bonzini                 /* Make a small request up to the first aligned sector.  */
3289c31cb707SPeter Lieven                 num = bs->bl.write_zeroes_alignment;
3290c31cb707SPeter Lieven                 num -= sector_num % bs->bl.write_zeroes_alignment;
3291b8d71c09SPaolo Bonzini             } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3292b8d71c09SPaolo Bonzini                 /* Shorten the request to the last aligned sector.  num cannot
3293b8d71c09SPaolo Bonzini                  * underflow because num > bs->bl.write_zeroes_alignment.
3294b8d71c09SPaolo Bonzini                  */
3295b8d71c09SPaolo Bonzini                 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
3296b8d71c09SPaolo Bonzini             }
3297c31cb707SPeter Lieven         }
3298c31cb707SPeter Lieven 
3299c31cb707SPeter Lieven         /* limit request size */
3300c31cb707SPeter Lieven         if (num > max_write_zeroes) {
3301c31cb707SPeter Lieven             num = max_write_zeroes;
3302c31cb707SPeter Lieven         }
3303c31cb707SPeter Lieven 
3304c31cb707SPeter Lieven         ret = -ENOTSUP;
3305f08f2ddaSStefan Hajnoczi         /* First try the efficient write zeroes operation */
3306f08f2ddaSStefan Hajnoczi         if (drv->bdrv_co_write_zeroes) {
3307c31cb707SPeter Lieven             ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3308f08f2ddaSStefan Hajnoczi         }
3309f08f2ddaSStefan Hajnoczi 
3310c31cb707SPeter Lieven         if (ret == -ENOTSUP) {
3311f08f2ddaSStefan Hajnoczi             /* Fall back to bounce buffer if write zeroes is unsupported */
3312095e4fa4SPeter Lieven             int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
331398764152SPeter Lieven                                             MAX_WRITE_ZEROES_BOUNCE_BUFFER);
3314095e4fa4SPeter Lieven             num = MIN(num, max_xfer_len);
3315c31cb707SPeter Lieven             iov.iov_len = num * BDRV_SECTOR_SIZE;
3316c31cb707SPeter Lieven             if (iov.iov_base == NULL) {
3317857d4f46SKevin Wolf                 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3318857d4f46SKevin Wolf                 if (iov.iov_base == NULL) {
3319857d4f46SKevin Wolf                     ret = -ENOMEM;
3320857d4f46SKevin Wolf                     goto fail;
3321857d4f46SKevin Wolf                 }
3322b8d71c09SPaolo Bonzini                 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
3323c31cb707SPeter Lieven             }
3324f08f2ddaSStefan Hajnoczi             qemu_iovec_init_external(&qiov, &iov, 1);
3325f08f2ddaSStefan Hajnoczi 
3326c31cb707SPeter Lieven             ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
3327b8d71c09SPaolo Bonzini 
3328b8d71c09SPaolo Bonzini             /* Keep bounce buffer around if it is big enough for all
3329b8d71c09SPaolo Bonzini              * all future requests.
3330b8d71c09SPaolo Bonzini              */
3331095e4fa4SPeter Lieven             if (num < max_xfer_len) {
3332b8d71c09SPaolo Bonzini                 qemu_vfree(iov.iov_base);
3333b8d71c09SPaolo Bonzini                 iov.iov_base = NULL;
3334b8d71c09SPaolo Bonzini             }
3335c31cb707SPeter Lieven         }
3336c31cb707SPeter Lieven 
3337c31cb707SPeter Lieven         sector_num += num;
3338c31cb707SPeter Lieven         nb_sectors -= num;
3339c31cb707SPeter Lieven     }
3340f08f2ddaSStefan Hajnoczi 
3341857d4f46SKevin Wolf fail:
3342f08f2ddaSStefan Hajnoczi     qemu_vfree(iov.iov_base);
3343f08f2ddaSStefan Hajnoczi     return ret;
3344f08f2ddaSStefan Hajnoczi }
3345f08f2ddaSStefan Hajnoczi 
3346c5fbe571SStefan Hajnoczi /*
3347b404f720SKevin Wolf  * Forwards an already correctly aligned write request to the BlockDriver.
3348c5fbe571SStefan Hajnoczi  */
3349b404f720SKevin Wolf static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
335065afd211SKevin Wolf     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
335165afd211SKevin Wolf     QEMUIOVector *qiov, int flags)
3352c5fbe571SStefan Hajnoczi {
3353c5fbe571SStefan Hajnoczi     BlockDriver *drv = bs->drv;
335428de2dcdSKevin Wolf     bool waited;
33556b7cb247SStefan Hajnoczi     int ret;
3356da1fa91dSKevin Wolf 
3357b404f720SKevin Wolf     int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3358b404f720SKevin Wolf     unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3359da1fa91dSKevin Wolf 
3360b404f720SKevin Wolf     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3361b404f720SKevin Wolf     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
33628eb029c2SKevin Wolf     assert(!qiov || bytes == qiov->size);
3363cc0681c4SBenoît Canet 
336428de2dcdSKevin Wolf     waited = wait_serialising_requests(req);
336528de2dcdSKevin Wolf     assert(!waited || !req->serialising);
3366af91f9a7SKevin Wolf     assert(req->overlap_offset <= offset);
3367af91f9a7SKevin Wolf     assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
3368244eadefSKevin Wolf 
336965afd211SKevin Wolf     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
3370d616b224SStefan Hajnoczi 
3371465bee1dSPeter Lieven     if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3372465bee1dSPeter Lieven         !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3373465bee1dSPeter Lieven         qemu_iovec_is_zero(qiov)) {
3374465bee1dSPeter Lieven         flags |= BDRV_REQ_ZERO_WRITE;
3375465bee1dSPeter Lieven         if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3376465bee1dSPeter Lieven             flags |= BDRV_REQ_MAY_UNMAP;
3377465bee1dSPeter Lieven         }
3378465bee1dSPeter Lieven     }
3379465bee1dSPeter Lieven 
3380d616b224SStefan Hajnoczi     if (ret < 0) {
3381d616b224SStefan Hajnoczi         /* Do nothing, write notifier decided to fail this request */
3382d616b224SStefan Hajnoczi     } else if (flags & BDRV_REQ_ZERO_WRITE) {
33839e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
3384aa7bfbffSPeter Lieven         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
3385f08f2ddaSStefan Hajnoczi     } else {
33869e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
33876b7cb247SStefan Hajnoczi         ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3388f08f2ddaSStefan Hajnoczi     }
33899e1cb96dSKevin Wolf     BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
33906b7cb247SStefan Hajnoczi 
3391f05fa4adSPaolo Bonzini     if (ret == 0 && !bs->enable_write_cache) {
3392f05fa4adSPaolo Bonzini         ret = bdrv_co_flush(bs);
3393f05fa4adSPaolo Bonzini     }
3394f05fa4adSPaolo Bonzini 
33951755da16SPaolo Bonzini     bdrv_set_dirty(bs, sector_num, nb_sectors);
3396da1fa91dSKevin Wolf 
33975366d0c8SBenoît Canet     block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
33985e5a94b6SBenoît Canet 
3399c0191e76SMax Reitz     if (ret >= 0) {
3400df2a6f29SPaolo Bonzini         bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3401df2a6f29SPaolo Bonzini     }
3402da1fa91dSKevin Wolf 
34036b7cb247SStefan Hajnoczi     return ret;
3404da1fa91dSKevin Wolf }
3405da1fa91dSKevin Wolf 
3406b404f720SKevin Wolf /*
3407b404f720SKevin Wolf  * Handle a write request in coroutine context
3408b404f720SKevin Wolf  */
34096601553eSKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
34106601553eSKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3411b404f720SKevin Wolf     BdrvRequestFlags flags)
3412b404f720SKevin Wolf {
341365afd211SKevin Wolf     BdrvTrackedRequest req;
3414fc3959e4SFam Zheng     uint64_t align = bdrv_get_align(bs);
34153b8242e0SKevin Wolf     uint8_t *head_buf = NULL;
34163b8242e0SKevin Wolf     uint8_t *tail_buf = NULL;
34173b8242e0SKevin Wolf     QEMUIOVector local_qiov;
34183b8242e0SKevin Wolf     bool use_local_qiov = false;
3419b404f720SKevin Wolf     int ret;
3420b404f720SKevin Wolf 
3421b404f720SKevin Wolf     if (!bs->drv) {
3422b404f720SKevin Wolf         return -ENOMEDIUM;
3423b404f720SKevin Wolf     }
3424b404f720SKevin Wolf     if (bs->read_only) {
3425b404f720SKevin Wolf         return -EACCES;
3426b404f720SKevin Wolf     }
3427b9c64947SMax Reitz 
3428b9c64947SMax Reitz     ret = bdrv_check_byte_request(bs, offset, bytes);
3429b9c64947SMax Reitz     if (ret < 0) {
3430b9c64947SMax Reitz         return ret;
3431b404f720SKevin Wolf     }
3432b404f720SKevin Wolf 
3433b404f720SKevin Wolf     /* throttling disk I/O */
3434b404f720SKevin Wolf     if (bs->io_limits_enabled) {
3435d5103588SKevin Wolf         bdrv_io_limits_intercept(bs, bytes, true);
3436b404f720SKevin Wolf     }
3437b404f720SKevin Wolf 
34383b8242e0SKevin Wolf     /*
34393b8242e0SKevin Wolf      * Align write if necessary by performing a read-modify-write cycle.
34403b8242e0SKevin Wolf      * Pad qiov with the read parts and be sure to have a tracked request not
34413b8242e0SKevin Wolf      * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
34423b8242e0SKevin Wolf      */
344365afd211SKevin Wolf     tracked_request_begin(&req, bs, offset, bytes, true);
34443b8242e0SKevin Wolf 
34453b8242e0SKevin Wolf     if (offset & (align - 1)) {
34463b8242e0SKevin Wolf         QEMUIOVector head_qiov;
34473b8242e0SKevin Wolf         struct iovec head_iov;
34483b8242e0SKevin Wolf 
34493b8242e0SKevin Wolf         mark_request_serialising(&req, align);
34503b8242e0SKevin Wolf         wait_serialising_requests(&req);
34513b8242e0SKevin Wolf 
34523b8242e0SKevin Wolf         head_buf = qemu_blockalign(bs, align);
34533b8242e0SKevin Wolf         head_iov = (struct iovec) {
34543b8242e0SKevin Wolf             .iov_base   = head_buf,
34553b8242e0SKevin Wolf             .iov_len    = align,
34563b8242e0SKevin Wolf         };
34573b8242e0SKevin Wolf         qemu_iovec_init_external(&head_qiov, &head_iov, 1);
34583b8242e0SKevin Wolf 
34599e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
34603b8242e0SKevin Wolf         ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
34613b8242e0SKevin Wolf                                   align, &head_qiov, 0);
34623b8242e0SKevin Wolf         if (ret < 0) {
34633b8242e0SKevin Wolf             goto fail;
34643b8242e0SKevin Wolf         }
34659e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
34663b8242e0SKevin Wolf 
34673b8242e0SKevin Wolf         qemu_iovec_init(&local_qiov, qiov->niov + 2);
34683b8242e0SKevin Wolf         qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
34693b8242e0SKevin Wolf         qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
34703b8242e0SKevin Wolf         use_local_qiov = true;
34713b8242e0SKevin Wolf 
34723b8242e0SKevin Wolf         bytes += offset & (align - 1);
34733b8242e0SKevin Wolf         offset = offset & ~(align - 1);
34743b8242e0SKevin Wolf     }
34753b8242e0SKevin Wolf 
34763b8242e0SKevin Wolf     if ((offset + bytes) & (align - 1)) {
34773b8242e0SKevin Wolf         QEMUIOVector tail_qiov;
34783b8242e0SKevin Wolf         struct iovec tail_iov;
34793b8242e0SKevin Wolf         size_t tail_bytes;
348028de2dcdSKevin Wolf         bool waited;
34813b8242e0SKevin Wolf 
34823b8242e0SKevin Wolf         mark_request_serialising(&req, align);
348328de2dcdSKevin Wolf         waited = wait_serialising_requests(&req);
348428de2dcdSKevin Wolf         assert(!waited || !use_local_qiov);
34853b8242e0SKevin Wolf 
34863b8242e0SKevin Wolf         tail_buf = qemu_blockalign(bs, align);
34873b8242e0SKevin Wolf         tail_iov = (struct iovec) {
34883b8242e0SKevin Wolf             .iov_base   = tail_buf,
34893b8242e0SKevin Wolf             .iov_len    = align,
34903b8242e0SKevin Wolf         };
34913b8242e0SKevin Wolf         qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
34923b8242e0SKevin Wolf 
34939e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
34943b8242e0SKevin Wolf         ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
34953b8242e0SKevin Wolf                                   align, &tail_qiov, 0);
34963b8242e0SKevin Wolf         if (ret < 0) {
34973b8242e0SKevin Wolf             goto fail;
34983b8242e0SKevin Wolf         }
34999e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
35003b8242e0SKevin Wolf 
35013b8242e0SKevin Wolf         if (!use_local_qiov) {
35023b8242e0SKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov + 1);
35033b8242e0SKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
35043b8242e0SKevin Wolf             use_local_qiov = true;
35053b8242e0SKevin Wolf         }
35063b8242e0SKevin Wolf 
35073b8242e0SKevin Wolf         tail_bytes = (offset + bytes) & (align - 1);
35083b8242e0SKevin Wolf         qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
35093b8242e0SKevin Wolf 
35103b8242e0SKevin Wolf         bytes = ROUND_UP(bytes, align);
35113b8242e0SKevin Wolf     }
35123b8242e0SKevin Wolf 
3513fc3959e4SFam Zheng     if (use_local_qiov) {
3514fc3959e4SFam Zheng         /* Local buffer may have non-zero data. */
3515fc3959e4SFam Zheng         flags &= ~BDRV_REQ_ZERO_WRITE;
3516fc3959e4SFam Zheng     }
35173b8242e0SKevin Wolf     ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
35183b8242e0SKevin Wolf                                use_local_qiov ? &local_qiov : qiov,
35193b8242e0SKevin Wolf                                flags);
35203b8242e0SKevin Wolf 
35213b8242e0SKevin Wolf fail:
352265afd211SKevin Wolf     tracked_request_end(&req);
3523b404f720SKevin Wolf 
35243b8242e0SKevin Wolf     if (use_local_qiov) {
35253b8242e0SKevin Wolf         qemu_iovec_destroy(&local_qiov);
352699c4a85cSKevin Wolf     }
35273b8242e0SKevin Wolf     qemu_vfree(head_buf);
35283b8242e0SKevin Wolf     qemu_vfree(tail_buf);
35293b8242e0SKevin Wolf 
3530b404f720SKevin Wolf     return ret;
3531b404f720SKevin Wolf }
3532b404f720SKevin Wolf 
35336601553eSKevin Wolf static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
35346601553eSKevin Wolf     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
35356601553eSKevin Wolf     BdrvRequestFlags flags)
35366601553eSKevin Wolf {
353775af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
35386601553eSKevin Wolf         return -EINVAL;
35396601553eSKevin Wolf     }
35406601553eSKevin Wolf 
35416601553eSKevin Wolf     return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
35426601553eSKevin Wolf                               nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
35436601553eSKevin Wolf }
35446601553eSKevin Wolf 
3545c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3546c5fbe571SStefan Hajnoczi     int nb_sectors, QEMUIOVector *qiov)
3547c5fbe571SStefan Hajnoczi {
3548c5fbe571SStefan Hajnoczi     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3549c5fbe571SStefan Hajnoczi 
3550f08f2ddaSStefan Hajnoczi     return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3551f08f2ddaSStefan Hajnoczi }
3552f08f2ddaSStefan Hajnoczi 
3553f08f2ddaSStefan Hajnoczi int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
3554aa7bfbffSPeter Lieven                                       int64_t sector_num, int nb_sectors,
3555aa7bfbffSPeter Lieven                                       BdrvRequestFlags flags)
3556f08f2ddaSStefan Hajnoczi {
3557fc3959e4SFam Zheng     int ret;
3558fc3959e4SFam Zheng 
355994d6ff21SPaolo Bonzini     trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
3560f08f2ddaSStefan Hajnoczi 
3561d32f35cbSPeter Lieven     if (!(bs->open_flags & BDRV_O_UNMAP)) {
3562d32f35cbSPeter Lieven         flags &= ~BDRV_REQ_MAY_UNMAP;
3563d32f35cbSPeter Lieven     }
3564fc3959e4SFam Zheng     if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
3565fc3959e4SFam Zheng                             nb_sectors << BDRV_SECTOR_BITS)) {
3566fc3959e4SFam Zheng         ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3567aa7bfbffSPeter Lieven                                 BDRV_REQ_ZERO_WRITE | flags);
3568fc3959e4SFam Zheng     } else {
3569fc3959e4SFam Zheng         uint8_t *buf;
3570fc3959e4SFam Zheng         QEMUIOVector local_qiov;
3571fc3959e4SFam Zheng         size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
3572fc3959e4SFam Zheng 
3573fc3959e4SFam Zheng         buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
3574fc3959e4SFam Zheng         memset(buf, 0, bytes);
3575fc3959e4SFam Zheng         qemu_iovec_init(&local_qiov, 1);
3576fc3959e4SFam Zheng         qemu_iovec_add(&local_qiov, buf, bytes);
3577fc3959e4SFam Zheng 
3578fc3959e4SFam Zheng         ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
3579fc3959e4SFam Zheng                                 BDRV_REQ_ZERO_WRITE | flags);
3580fc3959e4SFam Zheng         qemu_vfree(buf);
3581fc3959e4SFam Zheng     }
3582fc3959e4SFam Zheng     return ret;
3583c5fbe571SStefan Hajnoczi }
3584c5fbe571SStefan Hajnoczi 
358583f64091Sbellard /**
358683f64091Sbellard  * Truncate file to 'offset' bytes (needed only for file protocols)
358783f64091Sbellard  */
358883f64091Sbellard int bdrv_truncate(BlockDriverState *bs, int64_t offset)
358983f64091Sbellard {
359083f64091Sbellard     BlockDriver *drv = bs->drv;
359151762288SStefan Hajnoczi     int ret;
359283f64091Sbellard     if (!drv)
359319cb3738Sbellard         return -ENOMEDIUM;
359483f64091Sbellard     if (!drv->bdrv_truncate)
359583f64091Sbellard         return -ENOTSUP;
359659f2689dSNaphtali Sprei     if (bs->read_only)
359759f2689dSNaphtali Sprei         return -EACCES;
35989c75e168SJeff Cody 
359951762288SStefan Hajnoczi     ret = drv->bdrv_truncate(bs, offset);
360051762288SStefan Hajnoczi     if (ret == 0) {
360151762288SStefan Hajnoczi         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3602a7f53e26SMarkus Armbruster         if (bs->blk) {
3603a7f53e26SMarkus Armbruster             blk_dev_resize_cb(bs->blk);
3604a7f53e26SMarkus Armbruster         }
360551762288SStefan Hajnoczi     }
360651762288SStefan Hajnoczi     return ret;
360783f64091Sbellard }
360883f64091Sbellard 
360983f64091Sbellard /**
36104a1d5e1fSFam Zheng  * Length of a allocated file in bytes. Sparse files are counted by actual
36114a1d5e1fSFam Zheng  * allocated space. Return < 0 if error or unknown.
36124a1d5e1fSFam Zheng  */
36134a1d5e1fSFam Zheng int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
36144a1d5e1fSFam Zheng {
36154a1d5e1fSFam Zheng     BlockDriver *drv = bs->drv;
36164a1d5e1fSFam Zheng     if (!drv) {
36174a1d5e1fSFam Zheng         return -ENOMEDIUM;
36184a1d5e1fSFam Zheng     }
36194a1d5e1fSFam Zheng     if (drv->bdrv_get_allocated_file_size) {
36204a1d5e1fSFam Zheng         return drv->bdrv_get_allocated_file_size(bs);
36214a1d5e1fSFam Zheng     }
36224a1d5e1fSFam Zheng     if (bs->file) {
36234a1d5e1fSFam Zheng         return bdrv_get_allocated_file_size(bs->file);
36244a1d5e1fSFam Zheng     }
36254a1d5e1fSFam Zheng     return -ENOTSUP;
36264a1d5e1fSFam Zheng }
36274a1d5e1fSFam Zheng 
36284a1d5e1fSFam Zheng /**
362965a9bb25SMarkus Armbruster  * Return number of sectors on success, -errno on error.
363083f64091Sbellard  */
363165a9bb25SMarkus Armbruster int64_t bdrv_nb_sectors(BlockDriverState *bs)
363283f64091Sbellard {
363383f64091Sbellard     BlockDriver *drv = bs->drv;
363465a9bb25SMarkus Armbruster 
363583f64091Sbellard     if (!drv)
363619cb3738Sbellard         return -ENOMEDIUM;
363751762288SStefan Hajnoczi 
3638b94a2610SKevin Wolf     if (drv->has_variable_length) {
3639b94a2610SKevin Wolf         int ret = refresh_total_sectors(bs, bs->total_sectors);
3640b94a2610SKevin Wolf         if (ret < 0) {
3641b94a2610SKevin Wolf             return ret;
3642fc01f7e7Sbellard         }
364346a4e4e6SStefan Hajnoczi     }
364465a9bb25SMarkus Armbruster     return bs->total_sectors;
364565a9bb25SMarkus Armbruster }
364665a9bb25SMarkus Armbruster 
364765a9bb25SMarkus Armbruster /**
364865a9bb25SMarkus Armbruster  * Return length in bytes on success, -errno on error.
364965a9bb25SMarkus Armbruster  * The length is always a multiple of BDRV_SECTOR_SIZE.
365065a9bb25SMarkus Armbruster  */
365165a9bb25SMarkus Armbruster int64_t bdrv_getlength(BlockDriverState *bs)
365265a9bb25SMarkus Armbruster {
365365a9bb25SMarkus Armbruster     int64_t ret = bdrv_nb_sectors(bs);
365465a9bb25SMarkus Armbruster 
365565a9bb25SMarkus Armbruster     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
365646a4e4e6SStefan Hajnoczi }
3657fc01f7e7Sbellard 
365819cb3738Sbellard /* return 0 as number of sectors if no device present or error */
365996b8f136Sths void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
3660fc01f7e7Sbellard {
366165a9bb25SMarkus Armbruster     int64_t nb_sectors = bdrv_nb_sectors(bs);
366265a9bb25SMarkus Armbruster 
366365a9bb25SMarkus Armbruster     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
3664fc01f7e7Sbellard }
3665cf98951bSbellard 
3666ff06f5f3SPaolo Bonzini void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3667ff06f5f3SPaolo Bonzini                        BlockdevOnError on_write_error)
3668abd7f68dSMarkus Armbruster {
3669abd7f68dSMarkus Armbruster     bs->on_read_error = on_read_error;
3670abd7f68dSMarkus Armbruster     bs->on_write_error = on_write_error;
3671abd7f68dSMarkus Armbruster }
3672abd7f68dSMarkus Armbruster 
36731ceee0d5SPaolo Bonzini BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
3674abd7f68dSMarkus Armbruster {
3675abd7f68dSMarkus Armbruster     return is_read ? bs->on_read_error : bs->on_write_error;
3676abd7f68dSMarkus Armbruster }
3677abd7f68dSMarkus Armbruster 
36783e1caa5fSPaolo Bonzini BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
36793e1caa5fSPaolo Bonzini {
36803e1caa5fSPaolo Bonzini     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
36813e1caa5fSPaolo Bonzini 
36823e1caa5fSPaolo Bonzini     switch (on_err) {
36833e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_ENOSPC:
3684a589569fSWenchao Xia         return (error == ENOSPC) ?
3685a589569fSWenchao Xia                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
36863e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_STOP:
3687a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_STOP;
36883e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_REPORT:
3689a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_REPORT;
36903e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_IGNORE:
3691a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_IGNORE;
36923e1caa5fSPaolo Bonzini     default:
36933e1caa5fSPaolo Bonzini         abort();
36943e1caa5fSPaolo Bonzini     }
36953e1caa5fSPaolo Bonzini }
36963e1caa5fSPaolo Bonzini 
3697c7c2ff0cSLuiz Capitulino static void send_qmp_error_event(BlockDriverState *bs,
3698c7c2ff0cSLuiz Capitulino                                  BlockErrorAction action,
3699c7c2ff0cSLuiz Capitulino                                  bool is_read, int error)
3700c7c2ff0cSLuiz Capitulino {
3701573742a5SPeter Maydell     IoOperationType optype;
3702c7c2ff0cSLuiz Capitulino 
3703573742a5SPeter Maydell     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3704573742a5SPeter Maydell     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
3705c7c2ff0cSLuiz Capitulino                                    bdrv_iostatus_is_enabled(bs),
3706624ff573SLuiz Capitulino                                    error == ENOSPC, strerror(error),
3707624ff573SLuiz Capitulino                                    &error_abort);
3708c7c2ff0cSLuiz Capitulino }
3709c7c2ff0cSLuiz Capitulino 
37103e1caa5fSPaolo Bonzini /* This is done by device models because, while the block layer knows
37113e1caa5fSPaolo Bonzini  * about the error, it does not know whether an operation comes from
37123e1caa5fSPaolo Bonzini  * the device or the block layer (from a job, for example).
37133e1caa5fSPaolo Bonzini  */
37143e1caa5fSPaolo Bonzini void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
37153e1caa5fSPaolo Bonzini                        bool is_read, int error)
37163e1caa5fSPaolo Bonzini {
37173e1caa5fSPaolo Bonzini     assert(error >= 0);
37182bd3bce8SPaolo Bonzini 
3719a589569fSWenchao Xia     if (action == BLOCK_ERROR_ACTION_STOP) {
37202bd3bce8SPaolo Bonzini         /* First set the iostatus, so that "info block" returns an iostatus
37212bd3bce8SPaolo Bonzini          * that matches the events raised so far (an additional error iostatus
37222bd3bce8SPaolo Bonzini          * is fine, but not a lost one).
37232bd3bce8SPaolo Bonzini          */
37243e1caa5fSPaolo Bonzini         bdrv_iostatus_set_err(bs, error);
37252bd3bce8SPaolo Bonzini 
37262bd3bce8SPaolo Bonzini         /* Then raise the request to stop the VM and the event.
37272bd3bce8SPaolo Bonzini          * qemu_system_vmstop_request_prepare has two effects.  First,
37282bd3bce8SPaolo Bonzini          * it ensures that the STOP event always comes after the
37292bd3bce8SPaolo Bonzini          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
37302bd3bce8SPaolo Bonzini          * can observe the STOP event and do a "cont" before the STOP
37312bd3bce8SPaolo Bonzini          * event is issued, the VM will not stop.  In this case, vm_start()
37322bd3bce8SPaolo Bonzini          * also ensures that the STOP/RESUME pair of events is emitted.
37332bd3bce8SPaolo Bonzini          */
37342bd3bce8SPaolo Bonzini         qemu_system_vmstop_request_prepare();
3735c7c2ff0cSLuiz Capitulino         send_qmp_error_event(bs, action, is_read, error);
37362bd3bce8SPaolo Bonzini         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
37372bd3bce8SPaolo Bonzini     } else {
3738c7c2ff0cSLuiz Capitulino         send_qmp_error_event(bs, action, is_read, error);
37393e1caa5fSPaolo Bonzini     }
37403e1caa5fSPaolo Bonzini }
37413e1caa5fSPaolo Bonzini 
3742b338082bSbellard int bdrv_is_read_only(BlockDriverState *bs)
3743b338082bSbellard {
3744b338082bSbellard     return bs->read_only;
3745b338082bSbellard }
3746b338082bSbellard 
3747985a03b0Sths int bdrv_is_sg(BlockDriverState *bs)
3748985a03b0Sths {
3749985a03b0Sths     return bs->sg;
3750985a03b0Sths }
3751985a03b0Sths 
3752e900a7b7SChristoph Hellwig int bdrv_enable_write_cache(BlockDriverState *bs)
3753e900a7b7SChristoph Hellwig {
3754e900a7b7SChristoph Hellwig     return bs->enable_write_cache;
3755e900a7b7SChristoph Hellwig }
3756e900a7b7SChristoph Hellwig 
3757425b0148SPaolo Bonzini void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3758425b0148SPaolo Bonzini {
3759425b0148SPaolo Bonzini     bs->enable_write_cache = wce;
376055b110f2SJeff Cody 
376155b110f2SJeff Cody     /* so a reopen() will preserve wce */
376255b110f2SJeff Cody     if (wce) {
376355b110f2SJeff Cody         bs->open_flags |= BDRV_O_CACHE_WB;
376455b110f2SJeff Cody     } else {
376555b110f2SJeff Cody         bs->open_flags &= ~BDRV_O_CACHE_WB;
376655b110f2SJeff Cody     }
3767425b0148SPaolo Bonzini }
3768425b0148SPaolo Bonzini 
3769ea2384d3Sbellard int bdrv_is_encrypted(BlockDriverState *bs)
3770ea2384d3Sbellard {
3771ea2384d3Sbellard     if (bs->backing_hd && bs->backing_hd->encrypted)
3772ea2384d3Sbellard         return 1;
3773ea2384d3Sbellard     return bs->encrypted;
3774ea2384d3Sbellard }
3775ea2384d3Sbellard 
3776c0f4ce77Saliguori int bdrv_key_required(BlockDriverState *bs)
3777c0f4ce77Saliguori {
3778c0f4ce77Saliguori     BlockDriverState *backing_hd = bs->backing_hd;
3779c0f4ce77Saliguori 
3780c0f4ce77Saliguori     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3781c0f4ce77Saliguori         return 1;
3782c0f4ce77Saliguori     return (bs->encrypted && !bs->valid_key);
3783c0f4ce77Saliguori }
3784c0f4ce77Saliguori 
3785ea2384d3Sbellard int bdrv_set_key(BlockDriverState *bs, const char *key)
3786ea2384d3Sbellard {
3787ea2384d3Sbellard     int ret;
3788ea2384d3Sbellard     if (bs->backing_hd && bs->backing_hd->encrypted) {
3789ea2384d3Sbellard         ret = bdrv_set_key(bs->backing_hd, key);
3790ea2384d3Sbellard         if (ret < 0)
3791ea2384d3Sbellard             return ret;
3792ea2384d3Sbellard         if (!bs->encrypted)
3793ea2384d3Sbellard             return 0;
3794ea2384d3Sbellard     }
3795fd04a2aeSShahar Havivi     if (!bs->encrypted) {
3796fd04a2aeSShahar Havivi         return -EINVAL;
3797fd04a2aeSShahar Havivi     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3798fd04a2aeSShahar Havivi         return -ENOMEDIUM;
3799fd04a2aeSShahar Havivi     }
3800c0f4ce77Saliguori     ret = bs->drv->bdrv_set_key(bs, key);
3801bb5fc20fSaliguori     if (ret < 0) {
3802bb5fc20fSaliguori         bs->valid_key = 0;
3803bb5fc20fSaliguori     } else if (!bs->valid_key) {
3804bb5fc20fSaliguori         bs->valid_key = 1;
3805a7f53e26SMarkus Armbruster         if (bs->blk) {
3806bb5fc20fSaliguori             /* call the change callback now, we skipped it on open */
3807a7f53e26SMarkus Armbruster             blk_dev_change_media_cb(bs->blk, true);
3808a7f53e26SMarkus Armbruster         }
3809bb5fc20fSaliguori     }
3810c0f4ce77Saliguori     return ret;
3811ea2384d3Sbellard }
3812ea2384d3Sbellard 
38134d2855a3SMarkus Armbruster /*
38144d2855a3SMarkus Armbruster  * Provide an encryption key for @bs.
38154d2855a3SMarkus Armbruster  * If @key is non-null:
38164d2855a3SMarkus Armbruster  *     If @bs is not encrypted, fail.
38174d2855a3SMarkus Armbruster  *     Else if the key is invalid, fail.
38184d2855a3SMarkus Armbruster  *     Else set @bs's key to @key, replacing the existing key, if any.
38194d2855a3SMarkus Armbruster  * If @key is null:
38204d2855a3SMarkus Armbruster  *     If @bs is encrypted and still lacks a key, fail.
38214d2855a3SMarkus Armbruster  *     Else do nothing.
38224d2855a3SMarkus Armbruster  * On failure, store an error object through @errp if non-null.
38234d2855a3SMarkus Armbruster  */
38244d2855a3SMarkus Armbruster void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
38254d2855a3SMarkus Armbruster {
38264d2855a3SMarkus Armbruster     if (key) {
38274d2855a3SMarkus Armbruster         if (!bdrv_is_encrypted(bs)) {
382881e5f78aSAlberto Garcia             error_setg(errp, "Node '%s' is not encrypted",
382981e5f78aSAlberto Garcia                       bdrv_get_device_or_node_name(bs));
38304d2855a3SMarkus Armbruster         } else if (bdrv_set_key(bs, key) < 0) {
38314d2855a3SMarkus Armbruster             error_set(errp, QERR_INVALID_PASSWORD);
38324d2855a3SMarkus Armbruster         }
38334d2855a3SMarkus Armbruster     } else {
38344d2855a3SMarkus Armbruster         if (bdrv_key_required(bs)) {
3835b1ca6391SMarkus Armbruster             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3836b1ca6391SMarkus Armbruster                       "'%s' (%s) is encrypted",
383781e5f78aSAlberto Garcia                       bdrv_get_device_or_node_name(bs),
38384d2855a3SMarkus Armbruster                       bdrv_get_encrypted_filename(bs));
38394d2855a3SMarkus Armbruster         }
38404d2855a3SMarkus Armbruster     }
38414d2855a3SMarkus Armbruster }
38424d2855a3SMarkus Armbruster 
3843f8d6bba1SMarkus Armbruster const char *bdrv_get_format_name(BlockDriverState *bs)
3844ea2384d3Sbellard {
3845f8d6bba1SMarkus Armbruster     return bs->drv ? bs->drv->format_name : NULL;
3846ea2384d3Sbellard }
3847ea2384d3Sbellard 
3848ada42401SStefan Hajnoczi static int qsort_strcmp(const void *a, const void *b)
3849ada42401SStefan Hajnoczi {
3850ada42401SStefan Hajnoczi     return strcmp(a, b);
3851ada42401SStefan Hajnoczi }
3852ada42401SStefan Hajnoczi 
3853ea2384d3Sbellard void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
3854ea2384d3Sbellard                          void *opaque)
3855ea2384d3Sbellard {
3856ea2384d3Sbellard     BlockDriver *drv;
3857e855e4fbSJeff Cody     int count = 0;
3858ada42401SStefan Hajnoczi     int i;
3859e855e4fbSJeff Cody     const char **formats = NULL;
3860ea2384d3Sbellard 
38618a22f02aSStefan Hajnoczi     QLIST_FOREACH(drv, &bdrv_drivers, list) {
3862e855e4fbSJeff Cody         if (drv->format_name) {
3863e855e4fbSJeff Cody             bool found = false;
3864e855e4fbSJeff Cody             int i = count;
3865e855e4fbSJeff Cody             while (formats && i && !found) {
3866e855e4fbSJeff Cody                 found = !strcmp(formats[--i], drv->format_name);
3867e855e4fbSJeff Cody             }
3868e855e4fbSJeff Cody 
3869e855e4fbSJeff Cody             if (!found) {
38705839e53bSMarkus Armbruster                 formats = g_renew(const char *, formats, count + 1);
3871e855e4fbSJeff Cody                 formats[count++] = drv->format_name;
3872ea2384d3Sbellard             }
3873ea2384d3Sbellard         }
3874e855e4fbSJeff Cody     }
3875ada42401SStefan Hajnoczi 
3876ada42401SStefan Hajnoczi     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3877ada42401SStefan Hajnoczi 
3878ada42401SStefan Hajnoczi     for (i = 0; i < count; i++) {
3879ada42401SStefan Hajnoczi         it(opaque, formats[i]);
3880ada42401SStefan Hajnoczi     }
3881ada42401SStefan Hajnoczi 
3882e855e4fbSJeff Cody     g_free(formats);
3883e855e4fbSJeff Cody }
3884ea2384d3Sbellard 
3885dc364f4cSBenoît Canet /* This function is to find a node in the bs graph */
3886dc364f4cSBenoît Canet BlockDriverState *bdrv_find_node(const char *node_name)
3887dc364f4cSBenoît Canet {
3888dc364f4cSBenoît Canet     BlockDriverState *bs;
3889dc364f4cSBenoît Canet 
3890dc364f4cSBenoît Canet     assert(node_name);
3891dc364f4cSBenoît Canet 
3892dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3893dc364f4cSBenoît Canet         if (!strcmp(node_name, bs->node_name)) {
3894dc364f4cSBenoît Canet             return bs;
3895dc364f4cSBenoît Canet         }
3896dc364f4cSBenoît Canet     }
3897dc364f4cSBenoît Canet     return NULL;
3898dc364f4cSBenoît Canet }
3899dc364f4cSBenoît Canet 
3900c13163fbSBenoît Canet /* Put this QMP function here so it can access the static graph_bdrv_states. */
3901d5a8ee60SAlberto Garcia BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
3902c13163fbSBenoît Canet {
3903c13163fbSBenoît Canet     BlockDeviceInfoList *list, *entry;
3904c13163fbSBenoît Canet     BlockDriverState *bs;
3905c13163fbSBenoît Canet 
3906c13163fbSBenoît Canet     list = NULL;
3907c13163fbSBenoît Canet     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3908d5a8ee60SAlberto Garcia         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
3909d5a8ee60SAlberto Garcia         if (!info) {
3910d5a8ee60SAlberto Garcia             qapi_free_BlockDeviceInfoList(list);
3911d5a8ee60SAlberto Garcia             return NULL;
3912d5a8ee60SAlberto Garcia         }
3913c13163fbSBenoît Canet         entry = g_malloc0(sizeof(*entry));
3914d5a8ee60SAlberto Garcia         entry->value = info;
3915c13163fbSBenoît Canet         entry->next = list;
3916c13163fbSBenoît Canet         list = entry;
3917c13163fbSBenoît Canet     }
3918c13163fbSBenoît Canet 
3919c13163fbSBenoît Canet     return list;
3920c13163fbSBenoît Canet }
3921c13163fbSBenoît Canet 
392212d3ba82SBenoît Canet BlockDriverState *bdrv_lookup_bs(const char *device,
392312d3ba82SBenoît Canet                                  const char *node_name,
392412d3ba82SBenoît Canet                                  Error **errp)
392512d3ba82SBenoît Canet {
39267f06d47eSMarkus Armbruster     BlockBackend *blk;
39277f06d47eSMarkus Armbruster     BlockDriverState *bs;
392812d3ba82SBenoît Canet 
392912d3ba82SBenoît Canet     if (device) {
39307f06d47eSMarkus Armbruster         blk = blk_by_name(device);
393112d3ba82SBenoît Canet 
39327f06d47eSMarkus Armbruster         if (blk) {
39337f06d47eSMarkus Armbruster             return blk_bs(blk);
393412d3ba82SBenoît Canet         }
3935dd67fa50SBenoît Canet     }
393612d3ba82SBenoît Canet 
3937dd67fa50SBenoît Canet     if (node_name) {
393812d3ba82SBenoît Canet         bs = bdrv_find_node(node_name);
393912d3ba82SBenoît Canet 
3940dd67fa50SBenoît Canet         if (bs) {
3941dd67fa50SBenoît Canet             return bs;
3942dd67fa50SBenoît Canet         }
394312d3ba82SBenoît Canet     }
394412d3ba82SBenoît Canet 
3945dd67fa50SBenoît Canet     error_setg(errp, "Cannot find device=%s nor node_name=%s",
3946dd67fa50SBenoît Canet                      device ? device : "",
3947dd67fa50SBenoît Canet                      node_name ? node_name : "");
3948dd67fa50SBenoît Canet     return NULL;
394912d3ba82SBenoît Canet }
395012d3ba82SBenoît Canet 
39515a6684d2SJeff Cody /* If 'base' is in the same chain as 'top', return true. Otherwise,
39525a6684d2SJeff Cody  * return false.  If either argument is NULL, return false. */
39535a6684d2SJeff Cody bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
39545a6684d2SJeff Cody {
39555a6684d2SJeff Cody     while (top && top != base) {
39565a6684d2SJeff Cody         top = top->backing_hd;
39575a6684d2SJeff Cody     }
39585a6684d2SJeff Cody 
39595a6684d2SJeff Cody     return top != NULL;
39605a6684d2SJeff Cody }
39615a6684d2SJeff Cody 
396204df765aSFam Zheng BlockDriverState *bdrv_next_node(BlockDriverState *bs)
396304df765aSFam Zheng {
396404df765aSFam Zheng     if (!bs) {
396504df765aSFam Zheng         return QTAILQ_FIRST(&graph_bdrv_states);
396604df765aSFam Zheng     }
396704df765aSFam Zheng     return QTAILQ_NEXT(bs, node_list);
396804df765aSFam Zheng }
396904df765aSFam Zheng 
39702f399b0aSMarkus Armbruster BlockDriverState *bdrv_next(BlockDriverState *bs)
39712f399b0aSMarkus Armbruster {
39722f399b0aSMarkus Armbruster     if (!bs) {
39732f399b0aSMarkus Armbruster         return QTAILQ_FIRST(&bdrv_states);
39742f399b0aSMarkus Armbruster     }
3975dc364f4cSBenoît Canet     return QTAILQ_NEXT(bs, device_list);
39762f399b0aSMarkus Armbruster }
39772f399b0aSMarkus Armbruster 
397820a9e77dSFam Zheng const char *bdrv_get_node_name(const BlockDriverState *bs)
397920a9e77dSFam Zheng {
398020a9e77dSFam Zheng     return bs->node_name;
398120a9e77dSFam Zheng }
398220a9e77dSFam Zheng 
39837f06d47eSMarkus Armbruster /* TODO check what callers really want: bs->node_name or blk_name() */
3984bfb197e0SMarkus Armbruster const char *bdrv_get_device_name(const BlockDriverState *bs)
3985ea2384d3Sbellard {
3986bfb197e0SMarkus Armbruster     return bs->blk ? blk_name(bs->blk) : "";
3987ea2384d3Sbellard }
3988ea2384d3Sbellard 
39899b2aa84fSAlberto Garcia /* This can be used to identify nodes that might not have a device
39909b2aa84fSAlberto Garcia  * name associated. Since node and device names live in the same
39919b2aa84fSAlberto Garcia  * namespace, the result is unambiguous. The exception is if both are
39929b2aa84fSAlberto Garcia  * absent, then this returns an empty (non-null) string. */
39939b2aa84fSAlberto Garcia const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
39949b2aa84fSAlberto Garcia {
39959b2aa84fSAlberto Garcia     return bs->blk ? blk_name(bs->blk) : bs->node_name;
39969b2aa84fSAlberto Garcia }
39979b2aa84fSAlberto Garcia 
3998c8433287SMarkus Armbruster int bdrv_get_flags(BlockDriverState *bs)
3999c8433287SMarkus Armbruster {
4000c8433287SMarkus Armbruster     return bs->open_flags;
4001c8433287SMarkus Armbruster }
4002c8433287SMarkus Armbruster 
4003f0f0fdfeSKevin Wolf int bdrv_flush_all(void)
4004c6ca28d6Saliguori {
4005c6ca28d6Saliguori     BlockDriverState *bs;
4006f0f0fdfeSKevin Wolf     int result = 0;
4007c6ca28d6Saliguori 
4008dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4009ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
4010ed78cda3SStefan Hajnoczi         int ret;
4011ed78cda3SStefan Hajnoczi 
4012ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
4013ed78cda3SStefan Hajnoczi         ret = bdrv_flush(bs);
4014f0f0fdfeSKevin Wolf         if (ret < 0 && !result) {
4015f0f0fdfeSKevin Wolf             result = ret;
4016c6ca28d6Saliguori         }
4017ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
40181b7bdbc1SStefan Hajnoczi     }
4019c6ca28d6Saliguori 
4020f0f0fdfeSKevin Wolf     return result;
4021f0f0fdfeSKevin Wolf }
4022f0f0fdfeSKevin Wolf 
40233ac21627SPeter Lieven int bdrv_has_zero_init_1(BlockDriverState *bs)
40243ac21627SPeter Lieven {
40253ac21627SPeter Lieven     return 1;
40263ac21627SPeter Lieven }
40273ac21627SPeter Lieven 
4028f2feebbdSKevin Wolf int bdrv_has_zero_init(BlockDriverState *bs)
4029f2feebbdSKevin Wolf {
4030f2feebbdSKevin Wolf     assert(bs->drv);
4031f2feebbdSKevin Wolf 
403211212d8fSPaolo Bonzini     /* If BS is a copy on write image, it is initialized to
403311212d8fSPaolo Bonzini        the contents of the base image, which may not be zeroes.  */
403411212d8fSPaolo Bonzini     if (bs->backing_hd) {
403511212d8fSPaolo Bonzini         return 0;
403611212d8fSPaolo Bonzini     }
4037336c1c12SKevin Wolf     if (bs->drv->bdrv_has_zero_init) {
4038336c1c12SKevin Wolf         return bs->drv->bdrv_has_zero_init(bs);
4039f2feebbdSKevin Wolf     }
4040f2feebbdSKevin Wolf 
40413ac21627SPeter Lieven     /* safe default */
40423ac21627SPeter Lieven     return 0;
4043f2feebbdSKevin Wolf }
4044f2feebbdSKevin Wolf 
40454ce78691SPeter Lieven bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
40464ce78691SPeter Lieven {
40474ce78691SPeter Lieven     BlockDriverInfo bdi;
40484ce78691SPeter Lieven 
40494ce78691SPeter Lieven     if (bs->backing_hd) {
40504ce78691SPeter Lieven         return false;
40514ce78691SPeter Lieven     }
40524ce78691SPeter Lieven 
40534ce78691SPeter Lieven     if (bdrv_get_info(bs, &bdi) == 0) {
40544ce78691SPeter Lieven         return bdi.unallocated_blocks_are_zero;
40554ce78691SPeter Lieven     }
40564ce78691SPeter Lieven 
40574ce78691SPeter Lieven     return false;
40584ce78691SPeter Lieven }
40594ce78691SPeter Lieven 
40604ce78691SPeter Lieven bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
40614ce78691SPeter Lieven {
40624ce78691SPeter Lieven     BlockDriverInfo bdi;
40634ce78691SPeter Lieven 
40644ce78691SPeter Lieven     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
40654ce78691SPeter Lieven         return false;
40664ce78691SPeter Lieven     }
40674ce78691SPeter Lieven 
40684ce78691SPeter Lieven     if (bdrv_get_info(bs, &bdi) == 0) {
40694ce78691SPeter Lieven         return bdi.can_write_zeroes_with_unmap;
40704ce78691SPeter Lieven     }
40714ce78691SPeter Lieven 
40724ce78691SPeter Lieven     return false;
40734ce78691SPeter Lieven }
40744ce78691SPeter Lieven 
4075b6b8a333SPaolo Bonzini typedef struct BdrvCoGetBlockStatusData {
4076376ae3f1SStefan Hajnoczi     BlockDriverState *bs;
4077b35b2bbaSMiroslav Rezanina     BlockDriverState *base;
4078376ae3f1SStefan Hajnoczi     int64_t sector_num;
4079376ae3f1SStefan Hajnoczi     int nb_sectors;
4080376ae3f1SStefan Hajnoczi     int *pnum;
4081b6b8a333SPaolo Bonzini     int64_t ret;
4082376ae3f1SStefan Hajnoczi     bool done;
4083b6b8a333SPaolo Bonzini } BdrvCoGetBlockStatusData;
4084376ae3f1SStefan Hajnoczi 
4085f58c7b35Sths /*
4086705be728SFam Zheng  * Returns the allocation status of the specified sectors.
4087705be728SFam Zheng  * Drivers not implementing the functionality are assumed to not support
4088705be728SFam Zheng  * backing files, hence all their sectors are reported as allocated.
4089f58c7b35Sths  *
4090bd9533e3SStefan Hajnoczi  * If 'sector_num' is beyond the end of the disk image the return value is 0
4091bd9533e3SStefan Hajnoczi  * and 'pnum' is set to 0.
4092bd9533e3SStefan Hajnoczi  *
4093f58c7b35Sths  * 'pnum' is set to the number of sectors (including and immediately following
4094f58c7b35Sths  * the specified sector) that are known to be in the same
4095f58c7b35Sths  * allocated/unallocated state.
4096f58c7b35Sths  *
4097bd9533e3SStefan Hajnoczi  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
4098bd9533e3SStefan Hajnoczi  * beyond the end of the disk image it will be clamped.
4099f58c7b35Sths  */
4100b6b8a333SPaolo Bonzini static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4101bdad13b9SPaolo Bonzini                                                      int64_t sector_num,
4102060f51c9SStefan Hajnoczi                                                      int nb_sectors, int *pnum)
4103f58c7b35Sths {
410430a7f2fcSMarkus Armbruster     int64_t total_sectors;
4105f58c7b35Sths     int64_t n;
41065daa74a6SPaolo Bonzini     int64_t ret, ret2;
4107bd9533e3SStefan Hajnoczi 
410830a7f2fcSMarkus Armbruster     total_sectors = bdrv_nb_sectors(bs);
410930a7f2fcSMarkus Armbruster     if (total_sectors < 0) {
411030a7f2fcSMarkus Armbruster         return total_sectors;
4111617ccb46SPaolo Bonzini     }
4112617ccb46SPaolo Bonzini 
411330a7f2fcSMarkus Armbruster     if (sector_num >= total_sectors) {
41146aebab14SStefan Hajnoczi         *pnum = 0;
41156aebab14SStefan Hajnoczi         return 0;
41166aebab14SStefan Hajnoczi     }
4117bd9533e3SStefan Hajnoczi 
411830a7f2fcSMarkus Armbruster     n = total_sectors - sector_num;
4119bd9533e3SStefan Hajnoczi     if (n < nb_sectors) {
4120bd9533e3SStefan Hajnoczi         nb_sectors = n;
4121bd9533e3SStefan Hajnoczi     }
4122bd9533e3SStefan Hajnoczi 
4123b6b8a333SPaolo Bonzini     if (!bs->drv->bdrv_co_get_block_status) {
4124bd9533e3SStefan Hajnoczi         *pnum = nb_sectors;
4125e88ae226SKevin Wolf         ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
4126918e92d7SPaolo Bonzini         if (bs->drv->protocol_name) {
4127918e92d7SPaolo Bonzini             ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4128918e92d7SPaolo Bonzini         }
4129918e92d7SPaolo Bonzini         return ret;
41306aebab14SStefan Hajnoczi     }
41316aebab14SStefan Hajnoczi 
4132415b5b01SPaolo Bonzini     ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4133415b5b01SPaolo Bonzini     if (ret < 0) {
41343e0a233dSPeter Lieven         *pnum = 0;
4135415b5b01SPaolo Bonzini         return ret;
4136415b5b01SPaolo Bonzini     }
4137415b5b01SPaolo Bonzini 
413892bc50a5SPeter Lieven     if (ret & BDRV_BLOCK_RAW) {
413992bc50a5SPeter Lieven         assert(ret & BDRV_BLOCK_OFFSET_VALID);
414092bc50a5SPeter Lieven         return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
414192bc50a5SPeter Lieven                                      *pnum, pnum);
414292bc50a5SPeter Lieven     }
414392bc50a5SPeter Lieven 
4144e88ae226SKevin Wolf     if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4145e88ae226SKevin Wolf         ret |= BDRV_BLOCK_ALLOCATED;
4146e88ae226SKevin Wolf     }
4147e88ae226SKevin Wolf 
4148c3d86884SPeter Lieven     if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4149c3d86884SPeter Lieven         if (bdrv_unallocated_blocks_are_zero(bs)) {
4150415b5b01SPaolo Bonzini             ret |= BDRV_BLOCK_ZERO;
41511f9db224SPeter Lieven         } else if (bs->backing_hd) {
4152f0ad5712SPaolo Bonzini             BlockDriverState *bs2 = bs->backing_hd;
415330a7f2fcSMarkus Armbruster             int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
415430a7f2fcSMarkus Armbruster             if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
4155f0ad5712SPaolo Bonzini                 ret |= BDRV_BLOCK_ZERO;
4156f0ad5712SPaolo Bonzini             }
4157f0ad5712SPaolo Bonzini         }
4158415b5b01SPaolo Bonzini     }
41595daa74a6SPaolo Bonzini 
41605daa74a6SPaolo Bonzini     if (bs->file &&
41615daa74a6SPaolo Bonzini         (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
41625daa74a6SPaolo Bonzini         (ret & BDRV_BLOCK_OFFSET_VALID)) {
416359c9a95fSMax Reitz         int file_pnum;
416459c9a95fSMax Reitz 
41655daa74a6SPaolo Bonzini         ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
416659c9a95fSMax Reitz                                         *pnum, &file_pnum);
41675daa74a6SPaolo Bonzini         if (ret2 >= 0) {
41685daa74a6SPaolo Bonzini             /* Ignore errors.  This is just providing extra information, it
41695daa74a6SPaolo Bonzini              * is useful but not necessary.
41705daa74a6SPaolo Bonzini              */
417159c9a95fSMax Reitz             if (!file_pnum) {
417259c9a95fSMax Reitz                 /* !file_pnum indicates an offset at or beyond the EOF; it is
417359c9a95fSMax Reitz                  * perfectly valid for the format block driver to point to such
417459c9a95fSMax Reitz                  * offsets, so catch it and mark everything as zero */
417559c9a95fSMax Reitz                 ret |= BDRV_BLOCK_ZERO;
417659c9a95fSMax Reitz             } else {
417759c9a95fSMax Reitz                 /* Limit request to the range reported by the protocol driver */
417859c9a95fSMax Reitz                 *pnum = file_pnum;
41795daa74a6SPaolo Bonzini                 ret |= (ret2 & BDRV_BLOCK_ZERO);
41805daa74a6SPaolo Bonzini             }
41815daa74a6SPaolo Bonzini         }
418259c9a95fSMax Reitz     }
41835daa74a6SPaolo Bonzini 
4184415b5b01SPaolo Bonzini     return ret;
4185060f51c9SStefan Hajnoczi }
4186060f51c9SStefan Hajnoczi 
4187b6b8a333SPaolo Bonzini /* Coroutine wrapper for bdrv_get_block_status() */
4188b6b8a333SPaolo Bonzini static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
4189060f51c9SStefan Hajnoczi {
4190b6b8a333SPaolo Bonzini     BdrvCoGetBlockStatusData *data = opaque;
4191060f51c9SStefan Hajnoczi     BlockDriverState *bs = data->bs;
4192060f51c9SStefan Hajnoczi 
4193b6b8a333SPaolo Bonzini     data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4194060f51c9SStefan Hajnoczi                                          data->pnum);
4195060f51c9SStefan Hajnoczi     data->done = true;
4196060f51c9SStefan Hajnoczi }
4197060f51c9SStefan Hajnoczi 
4198060f51c9SStefan Hajnoczi /*
4199b6b8a333SPaolo Bonzini  * Synchronous wrapper around bdrv_co_get_block_status().
4200060f51c9SStefan Hajnoczi  *
4201b6b8a333SPaolo Bonzini  * See bdrv_co_get_block_status() for details.
4202060f51c9SStefan Hajnoczi  */
4203b6b8a333SPaolo Bonzini int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4204b6b8a333SPaolo Bonzini                               int nb_sectors, int *pnum)
4205060f51c9SStefan Hajnoczi {
4206376ae3f1SStefan Hajnoczi     Coroutine *co;
4207b6b8a333SPaolo Bonzini     BdrvCoGetBlockStatusData data = {
4208376ae3f1SStefan Hajnoczi         .bs = bs,
4209376ae3f1SStefan Hajnoczi         .sector_num = sector_num,
4210376ae3f1SStefan Hajnoczi         .nb_sectors = nb_sectors,
4211376ae3f1SStefan Hajnoczi         .pnum = pnum,
4212376ae3f1SStefan Hajnoczi         .done = false,
4213376ae3f1SStefan Hajnoczi     };
4214376ae3f1SStefan Hajnoczi 
4215bdad13b9SPaolo Bonzini     if (qemu_in_coroutine()) {
4216bdad13b9SPaolo Bonzini         /* Fast-path if already in coroutine context */
4217b6b8a333SPaolo Bonzini         bdrv_get_block_status_co_entry(&data);
4218bdad13b9SPaolo Bonzini     } else {
42192572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
42202572b37aSStefan Hajnoczi 
4221b6b8a333SPaolo Bonzini         co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
4222376ae3f1SStefan Hajnoczi         qemu_coroutine_enter(co, &data);
4223376ae3f1SStefan Hajnoczi         while (!data.done) {
42242572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
4225376ae3f1SStefan Hajnoczi         }
4226bdad13b9SPaolo Bonzini     }
4227376ae3f1SStefan Hajnoczi     return data.ret;
4228376ae3f1SStefan Hajnoczi }
4229f58c7b35Sths 
4230b6b8a333SPaolo Bonzini int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4231b6b8a333SPaolo Bonzini                                    int nb_sectors, int *pnum)
4232b6b8a333SPaolo Bonzini {
42334333bb71SPaolo Bonzini     int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
42344333bb71SPaolo Bonzini     if (ret < 0) {
42354333bb71SPaolo Bonzini         return ret;
42364333bb71SPaolo Bonzini     }
423701fb2705SKevin Wolf     return !!(ret & BDRV_BLOCK_ALLOCATED);
4238b6b8a333SPaolo Bonzini }
4239b6b8a333SPaolo Bonzini 
4240188a7bbfSPaolo Bonzini /*
4241188a7bbfSPaolo Bonzini  * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4242188a7bbfSPaolo Bonzini  *
4243188a7bbfSPaolo Bonzini  * Return true if the given sector is allocated in any image between
4244188a7bbfSPaolo Bonzini  * BASE and TOP (inclusive).  BASE can be NULL to check if the given
4245188a7bbfSPaolo Bonzini  * sector is allocated in any image of the chain.  Return false otherwise.
4246188a7bbfSPaolo Bonzini  *
4247188a7bbfSPaolo Bonzini  * 'pnum' is set to the number of sectors (including and immediately following
4248188a7bbfSPaolo Bonzini  *  the specified sector) that are known to be in the same
4249188a7bbfSPaolo Bonzini  *  allocated/unallocated state.
4250188a7bbfSPaolo Bonzini  *
4251188a7bbfSPaolo Bonzini  */
42524f578637SPaolo Bonzini int bdrv_is_allocated_above(BlockDriverState *top,
4253188a7bbfSPaolo Bonzini                             BlockDriverState *base,
4254188a7bbfSPaolo Bonzini                             int64_t sector_num,
4255188a7bbfSPaolo Bonzini                             int nb_sectors, int *pnum)
4256188a7bbfSPaolo Bonzini {
4257188a7bbfSPaolo Bonzini     BlockDriverState *intermediate;
4258188a7bbfSPaolo Bonzini     int ret, n = nb_sectors;
4259188a7bbfSPaolo Bonzini 
4260188a7bbfSPaolo Bonzini     intermediate = top;
4261188a7bbfSPaolo Bonzini     while (intermediate && intermediate != base) {
4262188a7bbfSPaolo Bonzini         int pnum_inter;
4263bdad13b9SPaolo Bonzini         ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4264188a7bbfSPaolo Bonzini                                 &pnum_inter);
4265188a7bbfSPaolo Bonzini         if (ret < 0) {
4266188a7bbfSPaolo Bonzini             return ret;
4267188a7bbfSPaolo Bonzini         } else if (ret) {
4268188a7bbfSPaolo Bonzini             *pnum = pnum_inter;
4269188a7bbfSPaolo Bonzini             return 1;
4270188a7bbfSPaolo Bonzini         }
4271188a7bbfSPaolo Bonzini 
4272188a7bbfSPaolo Bonzini         /*
4273188a7bbfSPaolo Bonzini          * [sector_num, nb_sectors] is unallocated on top but intermediate
4274188a7bbfSPaolo Bonzini          * might have
4275188a7bbfSPaolo Bonzini          *
4276188a7bbfSPaolo Bonzini          * [sector_num+x, nr_sectors] allocated.
4277188a7bbfSPaolo Bonzini          */
427863ba17d3SVishvananda Ishaya         if (n > pnum_inter &&
427963ba17d3SVishvananda Ishaya             (intermediate == top ||
428063ba17d3SVishvananda Ishaya              sector_num + pnum_inter < intermediate->total_sectors)) {
4281188a7bbfSPaolo Bonzini             n = pnum_inter;
4282188a7bbfSPaolo Bonzini         }
4283188a7bbfSPaolo Bonzini 
4284188a7bbfSPaolo Bonzini         intermediate = intermediate->backing_hd;
4285188a7bbfSPaolo Bonzini     }
4286188a7bbfSPaolo Bonzini 
4287188a7bbfSPaolo Bonzini     *pnum = n;
4288188a7bbfSPaolo Bonzini     return 0;
4289188a7bbfSPaolo Bonzini }
4290188a7bbfSPaolo Bonzini 
4291045df330Saliguori const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4292045df330Saliguori {
4293045df330Saliguori     if (bs->backing_hd && bs->backing_hd->encrypted)
4294045df330Saliguori         return bs->backing_file;
4295045df330Saliguori     else if (bs->encrypted)
4296045df330Saliguori         return bs->filename;
4297045df330Saliguori     else
4298045df330Saliguori         return NULL;
4299045df330Saliguori }
4300045df330Saliguori 
430183f64091Sbellard void bdrv_get_backing_filename(BlockDriverState *bs,
430283f64091Sbellard                                char *filename, int filename_size)
430383f64091Sbellard {
430483f64091Sbellard     pstrcpy(filename, filename_size, bs->backing_file);
430583f64091Sbellard }
430683f64091Sbellard 
4307faea38e7Sbellard int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
4308faea38e7Sbellard                           const uint8_t *buf, int nb_sectors)
4309faea38e7Sbellard {
4310faea38e7Sbellard     BlockDriver *drv = bs->drv;
4311b9c64947SMax Reitz     int ret;
4312b9c64947SMax Reitz 
4313b9c64947SMax Reitz     if (!drv) {
431419cb3738Sbellard         return -ENOMEDIUM;
4315b9c64947SMax Reitz     }
4316b9c64947SMax Reitz     if (!drv->bdrv_write_compressed) {
4317faea38e7Sbellard         return -ENOTSUP;
4318b9c64947SMax Reitz     }
4319b9c64947SMax Reitz     ret = bdrv_check_request(bs, sector_num, nb_sectors);
4320b9c64947SMax Reitz     if (ret < 0) {
4321b9c64947SMax Reitz         return ret;
4322b9c64947SMax Reitz     }
43237cd1e32aSlirans@il.ibm.com 
4324e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
43257cd1e32aSlirans@il.ibm.com 
4326faea38e7Sbellard     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4327faea38e7Sbellard }
4328faea38e7Sbellard 
4329faea38e7Sbellard int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4330faea38e7Sbellard {
4331faea38e7Sbellard     BlockDriver *drv = bs->drv;
4332faea38e7Sbellard     if (!drv)
433319cb3738Sbellard         return -ENOMEDIUM;
4334faea38e7Sbellard     if (!drv->bdrv_get_info)
4335faea38e7Sbellard         return -ENOTSUP;
4336faea38e7Sbellard     memset(bdi, 0, sizeof(*bdi));
4337faea38e7Sbellard     return drv->bdrv_get_info(bs, bdi);
4338faea38e7Sbellard }
4339faea38e7Sbellard 
4340eae041feSMax Reitz ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4341eae041feSMax Reitz {
4342eae041feSMax Reitz     BlockDriver *drv = bs->drv;
4343eae041feSMax Reitz     if (drv && drv->bdrv_get_specific_info) {
4344eae041feSMax Reitz         return drv->bdrv_get_specific_info(bs);
4345eae041feSMax Reitz     }
4346eae041feSMax Reitz     return NULL;
4347eae041feSMax Reitz }
4348eae041feSMax Reitz 
434945566e9cSChristoph Hellwig int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
435045566e9cSChristoph Hellwig                       int64_t pos, int size)
4351178e08a5Saliguori {
4352cf8074b3SKevin Wolf     QEMUIOVector qiov;
4353cf8074b3SKevin Wolf     struct iovec iov = {
4354cf8074b3SKevin Wolf         .iov_base   = (void *) buf,
4355cf8074b3SKevin Wolf         .iov_len    = size,
4356cf8074b3SKevin Wolf     };
4357cf8074b3SKevin Wolf 
4358cf8074b3SKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
4359cf8074b3SKevin Wolf     return bdrv_writev_vmstate(bs, &qiov, pos);
4360cf8074b3SKevin Wolf }
4361cf8074b3SKevin Wolf 
4362cf8074b3SKevin Wolf int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4363cf8074b3SKevin Wolf {
4364178e08a5Saliguori     BlockDriver *drv = bs->drv;
4365cf8074b3SKevin Wolf 
4366cf8074b3SKevin Wolf     if (!drv) {
4367178e08a5Saliguori         return -ENOMEDIUM;
4368cf8074b3SKevin Wolf     } else if (drv->bdrv_save_vmstate) {
4369cf8074b3SKevin Wolf         return drv->bdrv_save_vmstate(bs, qiov, pos);
4370cf8074b3SKevin Wolf     } else if (bs->file) {
4371cf8074b3SKevin Wolf         return bdrv_writev_vmstate(bs->file, qiov, pos);
4372cf8074b3SKevin Wolf     }
4373cf8074b3SKevin Wolf 
43747cdb1f6dSMORITA Kazutaka     return -ENOTSUP;
4375178e08a5Saliguori }
4376178e08a5Saliguori 
437745566e9cSChristoph Hellwig int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
437845566e9cSChristoph Hellwig                       int64_t pos, int size)
4379178e08a5Saliguori {
4380178e08a5Saliguori     BlockDriver *drv = bs->drv;
4381178e08a5Saliguori     if (!drv)
4382178e08a5Saliguori         return -ENOMEDIUM;
43837cdb1f6dSMORITA Kazutaka     if (drv->bdrv_load_vmstate)
438445566e9cSChristoph Hellwig         return drv->bdrv_load_vmstate(bs, buf, pos, size);
43857cdb1f6dSMORITA Kazutaka     if (bs->file)
43867cdb1f6dSMORITA Kazutaka         return bdrv_load_vmstate(bs->file, buf, pos, size);
43877cdb1f6dSMORITA Kazutaka     return -ENOTSUP;
4388178e08a5Saliguori }
4389178e08a5Saliguori 
43908b9b0cc2SKevin Wolf void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
43918b9b0cc2SKevin Wolf {
4392bf736fe3SKevin Wolf     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
43938b9b0cc2SKevin Wolf         return;
43948b9b0cc2SKevin Wolf     }
43958b9b0cc2SKevin Wolf 
4396bf736fe3SKevin Wolf     bs->drv->bdrv_debug_event(bs, event);
439741c695c7SKevin Wolf }
43988b9b0cc2SKevin Wolf 
439941c695c7SKevin Wolf int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
440041c695c7SKevin Wolf                           const char *tag)
440141c695c7SKevin Wolf {
440241c695c7SKevin Wolf     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
440341c695c7SKevin Wolf         bs = bs->file;
440441c695c7SKevin Wolf     }
440541c695c7SKevin Wolf 
440641c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
440741c695c7SKevin Wolf         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
440841c695c7SKevin Wolf     }
440941c695c7SKevin Wolf 
441041c695c7SKevin Wolf     return -ENOTSUP;
441141c695c7SKevin Wolf }
441241c695c7SKevin Wolf 
44134cc70e93SFam Zheng int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
44144cc70e93SFam Zheng {
44154cc70e93SFam Zheng     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
44164cc70e93SFam Zheng         bs = bs->file;
44174cc70e93SFam Zheng     }
44184cc70e93SFam Zheng 
44194cc70e93SFam Zheng     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
44204cc70e93SFam Zheng         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
44214cc70e93SFam Zheng     }
44224cc70e93SFam Zheng 
44234cc70e93SFam Zheng     return -ENOTSUP;
44244cc70e93SFam Zheng }
44254cc70e93SFam Zheng 
442641c695c7SKevin Wolf int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
442741c695c7SKevin Wolf {
4428938789eaSMax Reitz     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
442941c695c7SKevin Wolf         bs = bs->file;
443041c695c7SKevin Wolf     }
443141c695c7SKevin Wolf 
443241c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
443341c695c7SKevin Wolf         return bs->drv->bdrv_debug_resume(bs, tag);
443441c695c7SKevin Wolf     }
443541c695c7SKevin Wolf 
443641c695c7SKevin Wolf     return -ENOTSUP;
443741c695c7SKevin Wolf }
443841c695c7SKevin Wolf 
443941c695c7SKevin Wolf bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
444041c695c7SKevin Wolf {
444141c695c7SKevin Wolf     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
444241c695c7SKevin Wolf         bs = bs->file;
444341c695c7SKevin Wolf     }
444441c695c7SKevin Wolf 
444541c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
444641c695c7SKevin Wolf         return bs->drv->bdrv_debug_is_suspended(bs, tag);
444741c695c7SKevin Wolf     }
444841c695c7SKevin Wolf 
444941c695c7SKevin Wolf     return false;
44508b9b0cc2SKevin Wolf }
44518b9b0cc2SKevin Wolf 
4452199630b6SBlue Swirl int bdrv_is_snapshot(BlockDriverState *bs)
4453199630b6SBlue Swirl {
4454199630b6SBlue Swirl     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4455199630b6SBlue Swirl }
4456199630b6SBlue Swirl 
4457b1b1d783SJeff Cody /* backing_file can either be relative, or absolute, or a protocol.  If it is
4458b1b1d783SJeff Cody  * relative, it must be relative to the chain.  So, passing in bs->filename
4459b1b1d783SJeff Cody  * from a BDS as backing_file should not be done, as that may be relative to
4460b1b1d783SJeff Cody  * the CWD rather than the chain. */
4461e8a6bb9cSMarcelo Tosatti BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4462e8a6bb9cSMarcelo Tosatti         const char *backing_file)
4463e8a6bb9cSMarcelo Tosatti {
4464b1b1d783SJeff Cody     char *filename_full = NULL;
4465b1b1d783SJeff Cody     char *backing_file_full = NULL;
4466b1b1d783SJeff Cody     char *filename_tmp = NULL;
4467b1b1d783SJeff Cody     int is_protocol = 0;
4468b1b1d783SJeff Cody     BlockDriverState *curr_bs = NULL;
4469b1b1d783SJeff Cody     BlockDriverState *retval = NULL;
4470b1b1d783SJeff Cody 
4471b1b1d783SJeff Cody     if (!bs || !bs->drv || !backing_file) {
4472e8a6bb9cSMarcelo Tosatti         return NULL;
4473e8a6bb9cSMarcelo Tosatti     }
4474e8a6bb9cSMarcelo Tosatti 
4475b1b1d783SJeff Cody     filename_full     = g_malloc(PATH_MAX);
4476b1b1d783SJeff Cody     backing_file_full = g_malloc(PATH_MAX);
4477b1b1d783SJeff Cody     filename_tmp      = g_malloc(PATH_MAX);
4478b1b1d783SJeff Cody 
4479b1b1d783SJeff Cody     is_protocol = path_has_protocol(backing_file);
4480b1b1d783SJeff Cody 
4481b1b1d783SJeff Cody     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4482b1b1d783SJeff Cody 
4483b1b1d783SJeff Cody         /* If either of the filename paths is actually a protocol, then
4484b1b1d783SJeff Cody          * compare unmodified paths; otherwise make paths relative */
4485b1b1d783SJeff Cody         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4486b1b1d783SJeff Cody             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4487b1b1d783SJeff Cody                 retval = curr_bs->backing_hd;
4488b1b1d783SJeff Cody                 break;
4489b1b1d783SJeff Cody             }
4490e8a6bb9cSMarcelo Tosatti         } else {
4491b1b1d783SJeff Cody             /* If not an absolute filename path, make it relative to the current
4492b1b1d783SJeff Cody              * image's filename path */
4493b1b1d783SJeff Cody             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4494b1b1d783SJeff Cody                          backing_file);
4495b1b1d783SJeff Cody 
4496b1b1d783SJeff Cody             /* We are going to compare absolute pathnames */
4497b1b1d783SJeff Cody             if (!realpath(filename_tmp, filename_full)) {
4498b1b1d783SJeff Cody                 continue;
4499b1b1d783SJeff Cody             }
4500b1b1d783SJeff Cody 
4501b1b1d783SJeff Cody             /* We need to make sure the backing filename we are comparing against
4502b1b1d783SJeff Cody              * is relative to the current image filename (or absolute) */
4503b1b1d783SJeff Cody             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4504b1b1d783SJeff Cody                          curr_bs->backing_file);
4505b1b1d783SJeff Cody 
4506b1b1d783SJeff Cody             if (!realpath(filename_tmp, backing_file_full)) {
4507b1b1d783SJeff Cody                 continue;
4508b1b1d783SJeff Cody             }
4509b1b1d783SJeff Cody 
4510b1b1d783SJeff Cody             if (strcmp(backing_file_full, filename_full) == 0) {
4511b1b1d783SJeff Cody                 retval = curr_bs->backing_hd;
4512b1b1d783SJeff Cody                 break;
4513b1b1d783SJeff Cody             }
4514e8a6bb9cSMarcelo Tosatti         }
4515e8a6bb9cSMarcelo Tosatti     }
4516e8a6bb9cSMarcelo Tosatti 
4517b1b1d783SJeff Cody     g_free(filename_full);
4518b1b1d783SJeff Cody     g_free(backing_file_full);
4519b1b1d783SJeff Cody     g_free(filename_tmp);
4520b1b1d783SJeff Cody     return retval;
4521e8a6bb9cSMarcelo Tosatti }
4522e8a6bb9cSMarcelo Tosatti 
4523f198fd1cSBenoît Canet int bdrv_get_backing_file_depth(BlockDriverState *bs)
4524f198fd1cSBenoît Canet {
4525f198fd1cSBenoît Canet     if (!bs->drv) {
4526f198fd1cSBenoît Canet         return 0;
4527f198fd1cSBenoît Canet     }
4528f198fd1cSBenoît Canet 
4529f198fd1cSBenoît Canet     if (!bs->backing_hd) {
4530f198fd1cSBenoît Canet         return 0;
4531f198fd1cSBenoît Canet     }
4532f198fd1cSBenoît Canet 
4533f198fd1cSBenoît Canet     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4534f198fd1cSBenoît Canet }
4535f198fd1cSBenoît Canet 
4536ea2384d3Sbellard /**************************************************************/
453783f64091Sbellard /* async I/Os */
4538ea2384d3Sbellard 
45397c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4540f141eafeSaliguori                            QEMUIOVector *qiov, int nb_sectors,
4541097310b5SMarkus Armbruster                            BlockCompletionFunc *cb, void *opaque)
4542ea2384d3Sbellard {
4543bbf0a440SStefan Hajnoczi     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4544bbf0a440SStefan Hajnoczi 
4545d20d9b7cSPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
45468c5873d6SStefan Hajnoczi                                  cb, opaque, false);
454783f64091Sbellard }
454883f64091Sbellard 
45497c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4550f141eafeSaliguori                             QEMUIOVector *qiov, int nb_sectors,
4551097310b5SMarkus Armbruster                             BlockCompletionFunc *cb, void *opaque)
45527674e7bfSbellard {
4553bbf0a440SStefan Hajnoczi     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4554bbf0a440SStefan Hajnoczi 
4555d20d9b7cSPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
45568c5873d6SStefan Hajnoczi                                  cb, opaque, true);
455783f64091Sbellard }
455883f64091Sbellard 
45597c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4560d5ef94d4SPaolo Bonzini         int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4561097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4562d5ef94d4SPaolo Bonzini {
4563d5ef94d4SPaolo Bonzini     trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4564d5ef94d4SPaolo Bonzini 
4565d5ef94d4SPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4566d5ef94d4SPaolo Bonzini                                  BDRV_REQ_ZERO_WRITE | flags,
4567d5ef94d4SPaolo Bonzini                                  cb, opaque, true);
4568d5ef94d4SPaolo Bonzini }
4569d5ef94d4SPaolo Bonzini 
457040b4f539SKevin Wolf 
457140b4f539SKevin Wolf typedef struct MultiwriteCB {
457240b4f539SKevin Wolf     int error;
457340b4f539SKevin Wolf     int num_requests;
457440b4f539SKevin Wolf     int num_callbacks;
457540b4f539SKevin Wolf     struct {
4576097310b5SMarkus Armbruster         BlockCompletionFunc *cb;
457740b4f539SKevin Wolf         void *opaque;
457840b4f539SKevin Wolf         QEMUIOVector *free_qiov;
457940b4f539SKevin Wolf     } callbacks[];
458040b4f539SKevin Wolf } MultiwriteCB;
458140b4f539SKevin Wolf 
458240b4f539SKevin Wolf static void multiwrite_user_cb(MultiwriteCB *mcb)
458340b4f539SKevin Wolf {
458440b4f539SKevin Wolf     int i;
458540b4f539SKevin Wolf 
458640b4f539SKevin Wolf     for (i = 0; i < mcb->num_callbacks; i++) {
458740b4f539SKevin Wolf         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
45881e1ea48dSStefan Hajnoczi         if (mcb->callbacks[i].free_qiov) {
45891e1ea48dSStefan Hajnoczi             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
45901e1ea48dSStefan Hajnoczi         }
45917267c094SAnthony Liguori         g_free(mcb->callbacks[i].free_qiov);
459240b4f539SKevin Wolf     }
459340b4f539SKevin Wolf }
459440b4f539SKevin Wolf 
459540b4f539SKevin Wolf static void multiwrite_cb(void *opaque, int ret)
459640b4f539SKevin Wolf {
459740b4f539SKevin Wolf     MultiwriteCB *mcb = opaque;
459840b4f539SKevin Wolf 
45996d519a5fSStefan Hajnoczi     trace_multiwrite_cb(mcb, ret);
46006d519a5fSStefan Hajnoczi 
4601cb6d3ca0SKevin Wolf     if (ret < 0 && !mcb->error) {
460240b4f539SKevin Wolf         mcb->error = ret;
460340b4f539SKevin Wolf     }
460440b4f539SKevin Wolf 
460540b4f539SKevin Wolf     mcb->num_requests--;
460640b4f539SKevin Wolf     if (mcb->num_requests == 0) {
460740b4f539SKevin Wolf         multiwrite_user_cb(mcb);
46087267c094SAnthony Liguori         g_free(mcb);
460940b4f539SKevin Wolf     }
461040b4f539SKevin Wolf }
461140b4f539SKevin Wolf 
461240b4f539SKevin Wolf static int multiwrite_req_compare(const void *a, const void *b)
461340b4f539SKevin Wolf {
461477be4366SChristoph Hellwig     const BlockRequest *req1 = a, *req2 = b;
461577be4366SChristoph Hellwig 
461677be4366SChristoph Hellwig     /*
461777be4366SChristoph Hellwig      * Note that we can't simply subtract req2->sector from req1->sector
461877be4366SChristoph Hellwig      * here as that could overflow the return value.
461977be4366SChristoph Hellwig      */
462077be4366SChristoph Hellwig     if (req1->sector > req2->sector) {
462177be4366SChristoph Hellwig         return 1;
462277be4366SChristoph Hellwig     } else if (req1->sector < req2->sector) {
462377be4366SChristoph Hellwig         return -1;
462477be4366SChristoph Hellwig     } else {
462577be4366SChristoph Hellwig         return 0;
462677be4366SChristoph Hellwig     }
462740b4f539SKevin Wolf }
462840b4f539SKevin Wolf 
462940b4f539SKevin Wolf /*
463040b4f539SKevin Wolf  * Takes a bunch of requests and tries to merge them. Returns the number of
463140b4f539SKevin Wolf  * requests that remain after merging.
463240b4f539SKevin Wolf  */
463340b4f539SKevin Wolf static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
463440b4f539SKevin Wolf     int num_reqs, MultiwriteCB *mcb)
463540b4f539SKevin Wolf {
463640b4f539SKevin Wolf     int i, outidx;
463740b4f539SKevin Wolf 
463840b4f539SKevin Wolf     // Sort requests by start sector
463940b4f539SKevin Wolf     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
464040b4f539SKevin Wolf 
464140b4f539SKevin Wolf     // Check if adjacent requests touch the same clusters. If so, combine them,
464240b4f539SKevin Wolf     // filling up gaps with zero sectors.
464340b4f539SKevin Wolf     outidx = 0;
464440b4f539SKevin Wolf     for (i = 1; i < num_reqs; i++) {
464540b4f539SKevin Wolf         int merge = 0;
464640b4f539SKevin Wolf         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
464740b4f539SKevin Wolf 
4648b6a127a1SPaolo Bonzini         // Handle exactly sequential writes and overlapping writes.
464940b4f539SKevin Wolf         if (reqs[i].sector <= oldreq_last) {
465040b4f539SKevin Wolf             merge = 1;
465140b4f539SKevin Wolf         }
465240b4f539SKevin Wolf 
4653e2a305fbSChristoph Hellwig         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4654e2a305fbSChristoph Hellwig             merge = 0;
4655e2a305fbSChristoph Hellwig         }
4656e2a305fbSChristoph Hellwig 
46576c5a42acSPeter Lieven         if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
46586c5a42acSPeter Lieven             reqs[i].nb_sectors > bs->bl.max_transfer_length) {
46596c5a42acSPeter Lieven             merge = 0;
46606c5a42acSPeter Lieven         }
46616c5a42acSPeter Lieven 
466240b4f539SKevin Wolf         if (merge) {
466340b4f539SKevin Wolf             size_t size;
46647267c094SAnthony Liguori             QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
466540b4f539SKevin Wolf             qemu_iovec_init(qiov,
466640b4f539SKevin Wolf                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
466740b4f539SKevin Wolf 
466840b4f539SKevin Wolf             // Add the first request to the merged one. If the requests are
466940b4f539SKevin Wolf             // overlapping, drop the last sectors of the first request.
467040b4f539SKevin Wolf             size = (reqs[i].sector - reqs[outidx].sector) << 9;
46711b093c48SMichael Tokarev             qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
467240b4f539SKevin Wolf 
4673b6a127a1SPaolo Bonzini             // We should need to add any zeros between the two requests
4674b6a127a1SPaolo Bonzini             assert (reqs[i].sector <= oldreq_last);
467540b4f539SKevin Wolf 
467640b4f539SKevin Wolf             // Add the second request
46771b093c48SMichael Tokarev             qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
467840b4f539SKevin Wolf 
4679391827ebSStefan Hajnoczi             // Add tail of first request, if necessary
4680391827ebSStefan Hajnoczi             if (qiov->size < reqs[outidx].qiov->size) {
4681391827ebSStefan Hajnoczi                 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4682391827ebSStefan Hajnoczi                                   reqs[outidx].qiov->size - qiov->size);
4683391827ebSStefan Hajnoczi             }
4684391827ebSStefan Hajnoczi 
4685cbf1dff2SKevin Wolf             reqs[outidx].nb_sectors = qiov->size >> 9;
468640b4f539SKevin Wolf             reqs[outidx].qiov = qiov;
468740b4f539SKevin Wolf 
468840b4f539SKevin Wolf             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
468940b4f539SKevin Wolf         } else {
469040b4f539SKevin Wolf             outidx++;
469140b4f539SKevin Wolf             reqs[outidx].sector     = reqs[i].sector;
469240b4f539SKevin Wolf             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
469340b4f539SKevin Wolf             reqs[outidx].qiov       = reqs[i].qiov;
469440b4f539SKevin Wolf         }
469540b4f539SKevin Wolf     }
469640b4f539SKevin Wolf 
4697f4564d53SPeter Lieven     block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
4698f4564d53SPeter Lieven 
469940b4f539SKevin Wolf     return outidx + 1;
470040b4f539SKevin Wolf }
470140b4f539SKevin Wolf 
470240b4f539SKevin Wolf /*
470340b4f539SKevin Wolf  * Submit multiple AIO write requests at once.
470440b4f539SKevin Wolf  *
470540b4f539SKevin Wolf  * On success, the function returns 0 and all requests in the reqs array have
470640b4f539SKevin Wolf  * been submitted. In error case this function returns -1, and any of the
470740b4f539SKevin Wolf  * requests may or may not be submitted yet. In particular, this means that the
470840b4f539SKevin Wolf  * callback will be called for some of the requests, for others it won't. The
470940b4f539SKevin Wolf  * caller must check the error field of the BlockRequest to wait for the right
471040b4f539SKevin Wolf  * callbacks (if error != 0, no callback will be called).
471140b4f539SKevin Wolf  *
471240b4f539SKevin Wolf  * The implementation may modify the contents of the reqs array, e.g. to merge
471340b4f539SKevin Wolf  * requests. However, the fields opaque and error are left unmodified as they
471440b4f539SKevin Wolf  * are used to signal failure for a single request to the caller.
471540b4f539SKevin Wolf  */
471640b4f539SKevin Wolf int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
471740b4f539SKevin Wolf {
471840b4f539SKevin Wolf     MultiwriteCB *mcb;
471940b4f539SKevin Wolf     int i;
472040b4f539SKevin Wolf 
4721301db7c2SRyan Harper     /* don't submit writes if we don't have a medium */
4722301db7c2SRyan Harper     if (bs->drv == NULL) {
4723301db7c2SRyan Harper         for (i = 0; i < num_reqs; i++) {
4724301db7c2SRyan Harper             reqs[i].error = -ENOMEDIUM;
4725301db7c2SRyan Harper         }
4726301db7c2SRyan Harper         return -1;
4727301db7c2SRyan Harper     }
4728301db7c2SRyan Harper 
472940b4f539SKevin Wolf     if (num_reqs == 0) {
473040b4f539SKevin Wolf         return 0;
473140b4f539SKevin Wolf     }
473240b4f539SKevin Wolf 
473340b4f539SKevin Wolf     // Create MultiwriteCB structure
47347267c094SAnthony Liguori     mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
473540b4f539SKevin Wolf     mcb->num_requests = 0;
473640b4f539SKevin Wolf     mcb->num_callbacks = num_reqs;
473740b4f539SKevin Wolf 
473840b4f539SKevin Wolf     for (i = 0; i < num_reqs; i++) {
473940b4f539SKevin Wolf         mcb->callbacks[i].cb = reqs[i].cb;
474040b4f539SKevin Wolf         mcb->callbacks[i].opaque = reqs[i].opaque;
474140b4f539SKevin Wolf     }
474240b4f539SKevin Wolf 
474340b4f539SKevin Wolf     // Check for mergable requests
474440b4f539SKevin Wolf     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
474540b4f539SKevin Wolf 
47466d519a5fSStefan Hajnoczi     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
47476d519a5fSStefan Hajnoczi 
4748df9309fbSPaolo Bonzini     /* Run the aio requests. */
4749df9309fbSPaolo Bonzini     mcb->num_requests = num_reqs;
475040b4f539SKevin Wolf     for (i = 0; i < num_reqs; i++) {
4751d20d9b7cSPaolo Bonzini         bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4752d20d9b7cSPaolo Bonzini                               reqs[i].nb_sectors, reqs[i].flags,
4753d20d9b7cSPaolo Bonzini                               multiwrite_cb, mcb,
4754d20d9b7cSPaolo Bonzini                               true);
475540b4f539SKevin Wolf     }
475640b4f539SKevin Wolf 
475740b4f539SKevin Wolf     return 0;
475840b4f539SKevin Wolf }
475940b4f539SKevin Wolf 
47607c84b1b8SMarkus Armbruster void bdrv_aio_cancel(BlockAIOCB *acb)
476183f64091Sbellard {
476202c50efeSFam Zheng     qemu_aio_ref(acb);
476302c50efeSFam Zheng     bdrv_aio_cancel_async(acb);
476402c50efeSFam Zheng     while (acb->refcnt > 1) {
476502c50efeSFam Zheng         if (acb->aiocb_info->get_aio_context) {
476602c50efeSFam Zheng             aio_poll(acb->aiocb_info->get_aio_context(acb), true);
476702c50efeSFam Zheng         } else if (acb->bs) {
476802c50efeSFam Zheng             aio_poll(bdrv_get_aio_context(acb->bs), true);
476902c50efeSFam Zheng         } else {
477002c50efeSFam Zheng             abort();
477102c50efeSFam Zheng         }
477202c50efeSFam Zheng     }
47738007429aSFam Zheng     qemu_aio_unref(acb);
477402c50efeSFam Zheng }
477502c50efeSFam Zheng 
477602c50efeSFam Zheng /* Async version of aio cancel. The caller is not blocked if the acb implements
477702c50efeSFam Zheng  * cancel_async, otherwise we do nothing and let the request normally complete.
477802c50efeSFam Zheng  * In either case the completion callback must be called. */
47797c84b1b8SMarkus Armbruster void bdrv_aio_cancel_async(BlockAIOCB *acb)
478002c50efeSFam Zheng {
478102c50efeSFam Zheng     if (acb->aiocb_info->cancel_async) {
478202c50efeSFam Zheng         acb->aiocb_info->cancel_async(acb);
478302c50efeSFam Zheng     }
478483f64091Sbellard }
478583f64091Sbellard 
478683f64091Sbellard /**************************************************************/
478783f64091Sbellard /* async block device emulation */
478883f64091Sbellard 
47897c84b1b8SMarkus Armbruster typedef struct BlockAIOCBSync {
47907c84b1b8SMarkus Armbruster     BlockAIOCB common;
4791c16b5a2cSChristoph Hellwig     QEMUBH *bh;
4792c16b5a2cSChristoph Hellwig     int ret;
4793c16b5a2cSChristoph Hellwig     /* vector translation state */
4794c16b5a2cSChristoph Hellwig     QEMUIOVector *qiov;
4795c16b5a2cSChristoph Hellwig     uint8_t *bounce;
4796c16b5a2cSChristoph Hellwig     int is_write;
47977c84b1b8SMarkus Armbruster } BlockAIOCBSync;
4798c16b5a2cSChristoph Hellwig 
4799d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_aiocb_info = {
48007c84b1b8SMarkus Armbruster     .aiocb_size         = sizeof(BlockAIOCBSync),
4801c16b5a2cSChristoph Hellwig };
4802c16b5a2cSChristoph Hellwig 
480383f64091Sbellard static void bdrv_aio_bh_cb(void *opaque)
4804beac80cdSbellard {
48057c84b1b8SMarkus Armbruster     BlockAIOCBSync *acb = opaque;
4806f141eafeSaliguori 
4807857d4f46SKevin Wolf     if (!acb->is_write && acb->ret >= 0) {
480803396148SMichael Tokarev         qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
4809857d4f46SKevin Wolf     }
4810ceb42de8Saliguori     qemu_vfree(acb->bounce);
4811ce1a14dcSpbrook     acb->common.cb(acb->common.opaque, acb->ret);
48126a7ad299SDor Laor     qemu_bh_delete(acb->bh);
481336afc451SAvi Kivity     acb->bh = NULL;
48148007429aSFam Zheng     qemu_aio_unref(acb);
4815beac80cdSbellard }
4816beac80cdSbellard 
48177c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4818f141eafeSaliguori                                       int64_t sector_num,
4819f141eafeSaliguori                                       QEMUIOVector *qiov,
4820f141eafeSaliguori                                       int nb_sectors,
4821097310b5SMarkus Armbruster                                       BlockCompletionFunc *cb,
4822f141eafeSaliguori                                       void *opaque,
4823f141eafeSaliguori                                       int is_write)
4824f141eafeSaliguori 
4825ea2384d3Sbellard {
48267c84b1b8SMarkus Armbruster     BlockAIOCBSync *acb;
482783f64091Sbellard 
4828d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
4829f141eafeSaliguori     acb->is_write = is_write;
4830f141eafeSaliguori     acb->qiov = qiov;
4831857d4f46SKevin Wolf     acb->bounce = qemu_try_blockalign(bs, qiov->size);
48322572b37aSStefan Hajnoczi     acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
4833f141eafeSaliguori 
4834857d4f46SKevin Wolf     if (acb->bounce == NULL) {
4835857d4f46SKevin Wolf         acb->ret = -ENOMEM;
4836857d4f46SKevin Wolf     } else if (is_write) {
4837d5e6b161SMichael Tokarev         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
48381ed20acfSStefan Hajnoczi         acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
4839f141eafeSaliguori     } else {
48401ed20acfSStefan Hajnoczi         acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
4841f141eafeSaliguori     }
4842f141eafeSaliguori 
4843ce1a14dcSpbrook     qemu_bh_schedule(acb->bh);
4844f141eafeSaliguori 
4845ce1a14dcSpbrook     return &acb->common;
48467a6cba61Spbrook }
48477a6cba61Spbrook 
48487c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4849f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4850097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
485183f64091Sbellard {
4852f141eafeSaliguori     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
485383f64091Sbellard }
485483f64091Sbellard 
48557c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4856f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4857097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4858f141eafeSaliguori {
4859f141eafeSaliguori     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4860f141eafeSaliguori }
4861f141eafeSaliguori 
486268485420SKevin Wolf 
48637c84b1b8SMarkus Armbruster typedef struct BlockAIOCBCoroutine {
48647c84b1b8SMarkus Armbruster     BlockAIOCB common;
486568485420SKevin Wolf     BlockRequest req;
486668485420SKevin Wolf     bool is_write;
48670b5a2445SPaolo Bonzini     bool need_bh;
4868d318aea9SKevin Wolf     bool *done;
486968485420SKevin Wolf     QEMUBH* bh;
48707c84b1b8SMarkus Armbruster } BlockAIOCBCoroutine;
487168485420SKevin Wolf 
4872d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_co_aiocb_info = {
48737c84b1b8SMarkus Armbruster     .aiocb_size         = sizeof(BlockAIOCBCoroutine),
487468485420SKevin Wolf };
487568485420SKevin Wolf 
48760b5a2445SPaolo Bonzini static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
48770b5a2445SPaolo Bonzini {
48780b5a2445SPaolo Bonzini     if (!acb->need_bh) {
48790b5a2445SPaolo Bonzini         acb->common.cb(acb->common.opaque, acb->req.error);
48800b5a2445SPaolo Bonzini         qemu_aio_unref(acb);
48810b5a2445SPaolo Bonzini     }
48820b5a2445SPaolo Bonzini }
48830b5a2445SPaolo Bonzini 
488435246a68SPaolo Bonzini static void bdrv_co_em_bh(void *opaque)
488568485420SKevin Wolf {
48867c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
488768485420SKevin Wolf 
48880b5a2445SPaolo Bonzini     assert(!acb->need_bh);
488968485420SKevin Wolf     qemu_bh_delete(acb->bh);
48900b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
48910b5a2445SPaolo Bonzini }
48920b5a2445SPaolo Bonzini 
48930b5a2445SPaolo Bonzini static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
48940b5a2445SPaolo Bonzini {
48950b5a2445SPaolo Bonzini     acb->need_bh = false;
48960b5a2445SPaolo Bonzini     if (acb->req.error != -EINPROGRESS) {
48970b5a2445SPaolo Bonzini         BlockDriverState *bs = acb->common.bs;
48980b5a2445SPaolo Bonzini 
48990b5a2445SPaolo Bonzini         acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
49000b5a2445SPaolo Bonzini         qemu_bh_schedule(acb->bh);
49010b5a2445SPaolo Bonzini     }
490268485420SKevin Wolf }
490368485420SKevin Wolf 
4904b2a61371SStefan Hajnoczi /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4905b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque)
4906b2a61371SStefan Hajnoczi {
49077c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
4908b2a61371SStefan Hajnoczi     BlockDriverState *bs = acb->common.bs;
4909b2a61371SStefan Hajnoczi 
4910b2a61371SStefan Hajnoczi     if (!acb->is_write) {
4911b2a61371SStefan Hajnoczi         acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
4912d20d9b7cSPaolo Bonzini             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4913b2a61371SStefan Hajnoczi     } else {
4914b2a61371SStefan Hajnoczi         acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
4915d20d9b7cSPaolo Bonzini             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4916b2a61371SStefan Hajnoczi     }
4917b2a61371SStefan Hajnoczi 
49180b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
4919b2a61371SStefan Hajnoczi }
4920b2a61371SStefan Hajnoczi 
49217c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
492268485420SKevin Wolf                                          int64_t sector_num,
492368485420SKevin Wolf                                          QEMUIOVector *qiov,
492468485420SKevin Wolf                                          int nb_sectors,
4925d20d9b7cSPaolo Bonzini                                          BdrvRequestFlags flags,
4926097310b5SMarkus Armbruster                                          BlockCompletionFunc *cb,
492768485420SKevin Wolf                                          void *opaque,
49288c5873d6SStefan Hajnoczi                                          bool is_write)
492968485420SKevin Wolf {
493068485420SKevin Wolf     Coroutine *co;
49317c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
493268485420SKevin Wolf 
4933d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
49340b5a2445SPaolo Bonzini     acb->need_bh = true;
49350b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
493668485420SKevin Wolf     acb->req.sector = sector_num;
493768485420SKevin Wolf     acb->req.nb_sectors = nb_sectors;
493868485420SKevin Wolf     acb->req.qiov = qiov;
4939d20d9b7cSPaolo Bonzini     acb->req.flags = flags;
494068485420SKevin Wolf     acb->is_write = is_write;
494168485420SKevin Wolf 
49428c5873d6SStefan Hajnoczi     co = qemu_coroutine_create(bdrv_co_do_rw);
494368485420SKevin Wolf     qemu_coroutine_enter(co, acb);
494468485420SKevin Wolf 
49450b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
494668485420SKevin Wolf     return &acb->common;
494768485420SKevin Wolf }
494868485420SKevin Wolf 
494907f07615SPaolo Bonzini static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
4950b2e12bc6SChristoph Hellwig {
49517c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
495207f07615SPaolo Bonzini     BlockDriverState *bs = acb->common.bs;
4953b2e12bc6SChristoph Hellwig 
495407f07615SPaolo Bonzini     acb->req.error = bdrv_co_flush(bs);
49550b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
4956b2e12bc6SChristoph Hellwig }
4957b2e12bc6SChristoph Hellwig 
49587c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
4959097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4960016f5cf6SAlexander Graf {
496107f07615SPaolo Bonzini     trace_bdrv_aio_flush(bs, opaque);
4962016f5cf6SAlexander Graf 
496307f07615SPaolo Bonzini     Coroutine *co;
49647c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
4965016f5cf6SAlexander Graf 
4966d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
49670b5a2445SPaolo Bonzini     acb->need_bh = true;
49680b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
4969d318aea9SKevin Wolf 
497007f07615SPaolo Bonzini     co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
497107f07615SPaolo Bonzini     qemu_coroutine_enter(co, acb);
4972016f5cf6SAlexander Graf 
49730b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
4974016f5cf6SAlexander Graf     return &acb->common;
4975016f5cf6SAlexander Graf }
4976016f5cf6SAlexander Graf 
49774265d620SPaolo Bonzini static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
49784265d620SPaolo Bonzini {
49797c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
49804265d620SPaolo Bonzini     BlockDriverState *bs = acb->common.bs;
49814265d620SPaolo Bonzini 
49824265d620SPaolo Bonzini     acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
49830b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
49844265d620SPaolo Bonzini }
49854265d620SPaolo Bonzini 
49867c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
49874265d620SPaolo Bonzini         int64_t sector_num, int nb_sectors,
4988097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
49894265d620SPaolo Bonzini {
49904265d620SPaolo Bonzini     Coroutine *co;
49917c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
49924265d620SPaolo Bonzini 
49934265d620SPaolo Bonzini     trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
49944265d620SPaolo Bonzini 
4995d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
49960b5a2445SPaolo Bonzini     acb->need_bh = true;
49970b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
49984265d620SPaolo Bonzini     acb->req.sector = sector_num;
49994265d620SPaolo Bonzini     acb->req.nb_sectors = nb_sectors;
50004265d620SPaolo Bonzini     co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
50014265d620SPaolo Bonzini     qemu_coroutine_enter(co, acb);
50024265d620SPaolo Bonzini 
50030b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
50044265d620SPaolo Bonzini     return &acb->common;
50054265d620SPaolo Bonzini }
50064265d620SPaolo Bonzini 
5007ea2384d3Sbellard void bdrv_init(void)
5008ea2384d3Sbellard {
50095efa9d5aSAnthony Liguori     module_call_init(MODULE_INIT_BLOCK);
5010ea2384d3Sbellard }
5011ce1a14dcSpbrook 
5012eb852011SMarkus Armbruster void bdrv_init_with_whitelist(void)
5013eb852011SMarkus Armbruster {
5014eb852011SMarkus Armbruster     use_bdrv_whitelist = 1;
5015eb852011SMarkus Armbruster     bdrv_init();
5016eb852011SMarkus Armbruster }
5017eb852011SMarkus Armbruster 
5018d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
5019097310b5SMarkus Armbruster                    BlockCompletionFunc *cb, void *opaque)
50206bbff9a0Saliguori {
50217c84b1b8SMarkus Armbruster     BlockAIOCB *acb;
5022ce1a14dcSpbrook 
5023d7331bedSStefan Hajnoczi     acb = g_slice_alloc(aiocb_info->aiocb_size);
5024d7331bedSStefan Hajnoczi     acb->aiocb_info = aiocb_info;
5025ce1a14dcSpbrook     acb->bs = bs;
5026ce1a14dcSpbrook     acb->cb = cb;
5027ce1a14dcSpbrook     acb->opaque = opaque;
5028f197fe2bSFam Zheng     acb->refcnt = 1;
5029ce1a14dcSpbrook     return acb;
5030ce1a14dcSpbrook }
5031ce1a14dcSpbrook 
5032f197fe2bSFam Zheng void qemu_aio_ref(void *p)
5033f197fe2bSFam Zheng {
50347c84b1b8SMarkus Armbruster     BlockAIOCB *acb = p;
5035f197fe2bSFam Zheng     acb->refcnt++;
5036f197fe2bSFam Zheng }
5037f197fe2bSFam Zheng 
50388007429aSFam Zheng void qemu_aio_unref(void *p)
5039ce1a14dcSpbrook {
50407c84b1b8SMarkus Armbruster     BlockAIOCB *acb = p;
5041f197fe2bSFam Zheng     assert(acb->refcnt > 0);
5042f197fe2bSFam Zheng     if (--acb->refcnt == 0) {
5043d7331bedSStefan Hajnoczi         g_slice_free1(acb->aiocb_info->aiocb_size, acb);
5044ce1a14dcSpbrook     }
5045f197fe2bSFam Zheng }
504619cb3738Sbellard 
504719cb3738Sbellard /**************************************************************/
5048f9f05dc5SKevin Wolf /* Coroutine block device emulation */
5049f9f05dc5SKevin Wolf 
5050f9f05dc5SKevin Wolf typedef struct CoroutineIOCompletion {
5051f9f05dc5SKevin Wolf     Coroutine *coroutine;
5052f9f05dc5SKevin Wolf     int ret;
5053f9f05dc5SKevin Wolf } CoroutineIOCompletion;
5054f9f05dc5SKevin Wolf 
5055f9f05dc5SKevin Wolf static void bdrv_co_io_em_complete(void *opaque, int ret)
5056f9f05dc5SKevin Wolf {
5057f9f05dc5SKevin Wolf     CoroutineIOCompletion *co = opaque;
5058f9f05dc5SKevin Wolf 
5059f9f05dc5SKevin Wolf     co->ret = ret;
5060f9f05dc5SKevin Wolf     qemu_coroutine_enter(co->coroutine, NULL);
5061f9f05dc5SKevin Wolf }
5062f9f05dc5SKevin Wolf 
5063f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
5064f9f05dc5SKevin Wolf                                       int nb_sectors, QEMUIOVector *iov,
5065f9f05dc5SKevin Wolf                                       bool is_write)
5066f9f05dc5SKevin Wolf {
5067f9f05dc5SKevin Wolf     CoroutineIOCompletion co = {
5068f9f05dc5SKevin Wolf         .coroutine = qemu_coroutine_self(),
5069f9f05dc5SKevin Wolf     };
50707c84b1b8SMarkus Armbruster     BlockAIOCB *acb;
5071f9f05dc5SKevin Wolf 
5072f9f05dc5SKevin Wolf     if (is_write) {
5073a652d160SStefan Hajnoczi         acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
5074f9f05dc5SKevin Wolf                                        bdrv_co_io_em_complete, &co);
5075f9f05dc5SKevin Wolf     } else {
5076a652d160SStefan Hajnoczi         acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
5077f9f05dc5SKevin Wolf                                       bdrv_co_io_em_complete, &co);
5078f9f05dc5SKevin Wolf     }
5079f9f05dc5SKevin Wolf 
508059370aaaSStefan Hajnoczi     trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
5081f9f05dc5SKevin Wolf     if (!acb) {
5082f9f05dc5SKevin Wolf         return -EIO;
5083f9f05dc5SKevin Wolf     }
5084f9f05dc5SKevin Wolf     qemu_coroutine_yield();
5085f9f05dc5SKevin Wolf 
5086f9f05dc5SKevin Wolf     return co.ret;
5087f9f05dc5SKevin Wolf }
5088f9f05dc5SKevin Wolf 
5089f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
5090f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
5091f9f05dc5SKevin Wolf                                          QEMUIOVector *iov)
5092f9f05dc5SKevin Wolf {
5093f9f05dc5SKevin Wolf     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
5094f9f05dc5SKevin Wolf }
5095f9f05dc5SKevin Wolf 
5096f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
5097f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
5098f9f05dc5SKevin Wolf                                          QEMUIOVector *iov)
5099f9f05dc5SKevin Wolf {
5100f9f05dc5SKevin Wolf     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
5101f9f05dc5SKevin Wolf }
5102f9f05dc5SKevin Wolf 
510307f07615SPaolo Bonzini static void coroutine_fn bdrv_flush_co_entry(void *opaque)
5104e7a8a783SKevin Wolf {
510507f07615SPaolo Bonzini     RwCo *rwco = opaque;
510607f07615SPaolo Bonzini 
510707f07615SPaolo Bonzini     rwco->ret = bdrv_co_flush(rwco->bs);
510807f07615SPaolo Bonzini }
510907f07615SPaolo Bonzini 
511007f07615SPaolo Bonzini int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
511107f07615SPaolo Bonzini {
5112eb489bb1SKevin Wolf     int ret;
5113eb489bb1SKevin Wolf 
511429cdb251SPaolo Bonzini     if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
511507f07615SPaolo Bonzini         return 0;
5116eb489bb1SKevin Wolf     }
5117eb489bb1SKevin Wolf 
5118ca716364SKevin Wolf     /* Write back cached data to the OS even with cache=unsafe */
5119bf736fe3SKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
5120eb489bb1SKevin Wolf     if (bs->drv->bdrv_co_flush_to_os) {
5121eb489bb1SKevin Wolf         ret = bs->drv->bdrv_co_flush_to_os(bs);
5122eb489bb1SKevin Wolf         if (ret < 0) {
5123eb489bb1SKevin Wolf             return ret;
5124eb489bb1SKevin Wolf         }
5125eb489bb1SKevin Wolf     }
5126eb489bb1SKevin Wolf 
5127ca716364SKevin Wolf     /* But don't actually force it to the disk with cache=unsafe */
5128ca716364SKevin Wolf     if (bs->open_flags & BDRV_O_NO_FLUSH) {
5129d4c82329SKevin Wolf         goto flush_parent;
5130ca716364SKevin Wolf     }
5131ca716364SKevin Wolf 
5132bf736fe3SKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
5133eb489bb1SKevin Wolf     if (bs->drv->bdrv_co_flush_to_disk) {
513429cdb251SPaolo Bonzini         ret = bs->drv->bdrv_co_flush_to_disk(bs);
513507f07615SPaolo Bonzini     } else if (bs->drv->bdrv_aio_flush) {
51367c84b1b8SMarkus Armbruster         BlockAIOCB *acb;
5137e7a8a783SKevin Wolf         CoroutineIOCompletion co = {
5138e7a8a783SKevin Wolf             .coroutine = qemu_coroutine_self(),
5139e7a8a783SKevin Wolf         };
5140e7a8a783SKevin Wolf 
514107f07615SPaolo Bonzini         acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
514207f07615SPaolo Bonzini         if (acb == NULL) {
514329cdb251SPaolo Bonzini             ret = -EIO;
514407f07615SPaolo Bonzini         } else {
5145e7a8a783SKevin Wolf             qemu_coroutine_yield();
514629cdb251SPaolo Bonzini             ret = co.ret;
5147e7a8a783SKevin Wolf         }
514807f07615SPaolo Bonzini     } else {
514907f07615SPaolo Bonzini         /*
515007f07615SPaolo Bonzini          * Some block drivers always operate in either writethrough or unsafe
515107f07615SPaolo Bonzini          * mode and don't support bdrv_flush therefore. Usually qemu doesn't
515207f07615SPaolo Bonzini          * know how the server works (because the behaviour is hardcoded or
515307f07615SPaolo Bonzini          * depends on server-side configuration), so we can't ensure that
515407f07615SPaolo Bonzini          * everything is safe on disk. Returning an error doesn't work because
515507f07615SPaolo Bonzini          * that would break guests even if the server operates in writethrough
515607f07615SPaolo Bonzini          * mode.
515707f07615SPaolo Bonzini          *
515807f07615SPaolo Bonzini          * Let's hope the user knows what he's doing.
515907f07615SPaolo Bonzini          */
516029cdb251SPaolo Bonzini         ret = 0;
516107f07615SPaolo Bonzini     }
516229cdb251SPaolo Bonzini     if (ret < 0) {
516329cdb251SPaolo Bonzini         return ret;
516429cdb251SPaolo Bonzini     }
516529cdb251SPaolo Bonzini 
516629cdb251SPaolo Bonzini     /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
516729cdb251SPaolo Bonzini      * in the case of cache=unsafe, so there are no useless flushes.
516829cdb251SPaolo Bonzini      */
5169d4c82329SKevin Wolf flush_parent:
517029cdb251SPaolo Bonzini     return bdrv_co_flush(bs->file);
517107f07615SPaolo Bonzini }
517207f07615SPaolo Bonzini 
51735a8a30dbSKevin Wolf void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
51740f15423cSAnthony Liguori {
51755a8a30dbSKevin Wolf     Error *local_err = NULL;
51765a8a30dbSKevin Wolf     int ret;
51775a8a30dbSKevin Wolf 
51783456a8d1SKevin Wolf     if (!bs->drv)  {
51793456a8d1SKevin Wolf         return;
51800f15423cSAnthony Liguori     }
51813456a8d1SKevin Wolf 
51827ea2d269SAlexey Kardashevskiy     if (!(bs->open_flags & BDRV_O_INCOMING)) {
51837ea2d269SAlexey Kardashevskiy         return;
51847ea2d269SAlexey Kardashevskiy     }
51857ea2d269SAlexey Kardashevskiy     bs->open_flags &= ~BDRV_O_INCOMING;
51867ea2d269SAlexey Kardashevskiy 
51873456a8d1SKevin Wolf     if (bs->drv->bdrv_invalidate_cache) {
51885a8a30dbSKevin Wolf         bs->drv->bdrv_invalidate_cache(bs, &local_err);
51893456a8d1SKevin Wolf     } else if (bs->file) {
51905a8a30dbSKevin Wolf         bdrv_invalidate_cache(bs->file, &local_err);
51915a8a30dbSKevin Wolf     }
51925a8a30dbSKevin Wolf     if (local_err) {
51935a8a30dbSKevin Wolf         error_propagate(errp, local_err);
51945a8a30dbSKevin Wolf         return;
51953456a8d1SKevin Wolf     }
51963456a8d1SKevin Wolf 
51975a8a30dbSKevin Wolf     ret = refresh_total_sectors(bs, bs->total_sectors);
51985a8a30dbSKevin Wolf     if (ret < 0) {
51995a8a30dbSKevin Wolf         error_setg_errno(errp, -ret, "Could not refresh total sector count");
52005a8a30dbSKevin Wolf         return;
52015a8a30dbSKevin Wolf     }
52020f15423cSAnthony Liguori }
52030f15423cSAnthony Liguori 
52045a8a30dbSKevin Wolf void bdrv_invalidate_cache_all(Error **errp)
52050f15423cSAnthony Liguori {
52060f15423cSAnthony Liguori     BlockDriverState *bs;
52075a8a30dbSKevin Wolf     Error *local_err = NULL;
52080f15423cSAnthony Liguori 
5209dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5210ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
5211ed78cda3SStefan Hajnoczi 
5212ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
52135a8a30dbSKevin Wolf         bdrv_invalidate_cache(bs, &local_err);
5214ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
52155a8a30dbSKevin Wolf         if (local_err) {
52165a8a30dbSKevin Wolf             error_propagate(errp, local_err);
52175a8a30dbSKevin Wolf             return;
52185a8a30dbSKevin Wolf         }
52190f15423cSAnthony Liguori     }
52200f15423cSAnthony Liguori }
52210f15423cSAnthony Liguori 
522207f07615SPaolo Bonzini int bdrv_flush(BlockDriverState *bs)
522307f07615SPaolo Bonzini {
522407f07615SPaolo Bonzini     Coroutine *co;
522507f07615SPaolo Bonzini     RwCo rwco = {
522607f07615SPaolo Bonzini         .bs = bs,
522707f07615SPaolo Bonzini         .ret = NOT_DONE,
522807f07615SPaolo Bonzini     };
522907f07615SPaolo Bonzini 
523007f07615SPaolo Bonzini     if (qemu_in_coroutine()) {
523107f07615SPaolo Bonzini         /* Fast-path if already in coroutine context */
523207f07615SPaolo Bonzini         bdrv_flush_co_entry(&rwco);
523307f07615SPaolo Bonzini     } else {
52342572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
52352572b37aSStefan Hajnoczi 
523607f07615SPaolo Bonzini         co = qemu_coroutine_create(bdrv_flush_co_entry);
523707f07615SPaolo Bonzini         qemu_coroutine_enter(co, &rwco);
523807f07615SPaolo Bonzini         while (rwco.ret == NOT_DONE) {
52392572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
524007f07615SPaolo Bonzini         }
524107f07615SPaolo Bonzini     }
524207f07615SPaolo Bonzini 
524307f07615SPaolo Bonzini     return rwco.ret;
524407f07615SPaolo Bonzini }
5245e7a8a783SKevin Wolf 
5246775aa8b6SKevin Wolf typedef struct DiscardCo {
5247775aa8b6SKevin Wolf     BlockDriverState *bs;
5248775aa8b6SKevin Wolf     int64_t sector_num;
5249775aa8b6SKevin Wolf     int nb_sectors;
5250775aa8b6SKevin Wolf     int ret;
5251775aa8b6SKevin Wolf } DiscardCo;
52524265d620SPaolo Bonzini static void coroutine_fn bdrv_discard_co_entry(void *opaque)
52534265d620SPaolo Bonzini {
5254775aa8b6SKevin Wolf     DiscardCo *rwco = opaque;
52554265d620SPaolo Bonzini 
52564265d620SPaolo Bonzini     rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
52574265d620SPaolo Bonzini }
52584265d620SPaolo Bonzini 
52594265d620SPaolo Bonzini int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
52604265d620SPaolo Bonzini                                  int nb_sectors)
52614265d620SPaolo Bonzini {
5262b9c64947SMax Reitz     int max_discard, ret;
5263d51e9fe5SPaolo Bonzini 
52644265d620SPaolo Bonzini     if (!bs->drv) {
52654265d620SPaolo Bonzini         return -ENOMEDIUM;
5266b9c64947SMax Reitz     }
5267b9c64947SMax Reitz 
5268b9c64947SMax Reitz     ret = bdrv_check_request(bs, sector_num, nb_sectors);
5269b9c64947SMax Reitz     if (ret < 0) {
5270b9c64947SMax Reitz         return ret;
52714265d620SPaolo Bonzini     } else if (bs->read_only) {
52724265d620SPaolo Bonzini         return -EROFS;
5273df702c9bSPaolo Bonzini     }
5274df702c9bSPaolo Bonzini 
52758f0720ecSPaolo Bonzini     bdrv_reset_dirty(bs, sector_num, nb_sectors);
5276df702c9bSPaolo Bonzini 
52779e8f1835SPaolo Bonzini     /* Do nothing if disabled.  */
52789e8f1835SPaolo Bonzini     if (!(bs->open_flags & BDRV_O_UNMAP)) {
52799e8f1835SPaolo Bonzini         return 0;
52809e8f1835SPaolo Bonzini     }
52819e8f1835SPaolo Bonzini 
5282d51e9fe5SPaolo Bonzini     if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
5283d51e9fe5SPaolo Bonzini         return 0;
5284d51e9fe5SPaolo Bonzini     }
52856f14da52SPeter Lieven 
528675af1f34SPeter Lieven     max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
52876f14da52SPeter Lieven     while (nb_sectors > 0) {
52886f14da52SPeter Lieven         int ret;
52896f14da52SPeter Lieven         int num = nb_sectors;
52906f14da52SPeter Lieven 
52916f14da52SPeter Lieven         /* align request */
52926f14da52SPeter Lieven         if (bs->bl.discard_alignment &&
52936f14da52SPeter Lieven             num >= bs->bl.discard_alignment &&
52946f14da52SPeter Lieven             sector_num % bs->bl.discard_alignment) {
52956f14da52SPeter Lieven             if (num > bs->bl.discard_alignment) {
52966f14da52SPeter Lieven                 num = bs->bl.discard_alignment;
52976f14da52SPeter Lieven             }
52986f14da52SPeter Lieven             num -= sector_num % bs->bl.discard_alignment;
52996f14da52SPeter Lieven         }
53006f14da52SPeter Lieven 
53016f14da52SPeter Lieven         /* limit request size */
53026f14da52SPeter Lieven         if (num > max_discard) {
53036f14da52SPeter Lieven             num = max_discard;
53046f14da52SPeter Lieven         }
53056f14da52SPeter Lieven 
5306d51e9fe5SPaolo Bonzini         if (bs->drv->bdrv_co_discard) {
53076f14da52SPeter Lieven             ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5308d51e9fe5SPaolo Bonzini         } else {
53097c84b1b8SMarkus Armbruster             BlockAIOCB *acb;
53104265d620SPaolo Bonzini             CoroutineIOCompletion co = {
53114265d620SPaolo Bonzini                 .coroutine = qemu_coroutine_self(),
53124265d620SPaolo Bonzini             };
53134265d620SPaolo Bonzini 
53144265d620SPaolo Bonzini             acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
53154265d620SPaolo Bonzini                                             bdrv_co_io_em_complete, &co);
53164265d620SPaolo Bonzini             if (acb == NULL) {
53174265d620SPaolo Bonzini                 return -EIO;
53184265d620SPaolo Bonzini             } else {
53194265d620SPaolo Bonzini                 qemu_coroutine_yield();
5320d51e9fe5SPaolo Bonzini                 ret = co.ret;
53214265d620SPaolo Bonzini             }
5322d51e9fe5SPaolo Bonzini         }
53237ce21016SPaolo Bonzini         if (ret && ret != -ENOTSUP) {
5324d51e9fe5SPaolo Bonzini             return ret;
5325d51e9fe5SPaolo Bonzini         }
5326d51e9fe5SPaolo Bonzini 
5327d51e9fe5SPaolo Bonzini         sector_num += num;
5328d51e9fe5SPaolo Bonzini         nb_sectors -= num;
5329d51e9fe5SPaolo Bonzini     }
53304265d620SPaolo Bonzini     return 0;
53314265d620SPaolo Bonzini }
53324265d620SPaolo Bonzini 
53334265d620SPaolo Bonzini int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
53344265d620SPaolo Bonzini {
53354265d620SPaolo Bonzini     Coroutine *co;
5336775aa8b6SKevin Wolf     DiscardCo rwco = {
53374265d620SPaolo Bonzini         .bs = bs,
53384265d620SPaolo Bonzini         .sector_num = sector_num,
53394265d620SPaolo Bonzini         .nb_sectors = nb_sectors,
53404265d620SPaolo Bonzini         .ret = NOT_DONE,
53414265d620SPaolo Bonzini     };
53424265d620SPaolo Bonzini 
53434265d620SPaolo Bonzini     if (qemu_in_coroutine()) {
53444265d620SPaolo Bonzini         /* Fast-path if already in coroutine context */
53454265d620SPaolo Bonzini         bdrv_discard_co_entry(&rwco);
53464265d620SPaolo Bonzini     } else {
53472572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
53482572b37aSStefan Hajnoczi 
53494265d620SPaolo Bonzini         co = qemu_coroutine_create(bdrv_discard_co_entry);
53504265d620SPaolo Bonzini         qemu_coroutine_enter(co, &rwco);
53514265d620SPaolo Bonzini         while (rwco.ret == NOT_DONE) {
53522572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
53534265d620SPaolo Bonzini         }
53544265d620SPaolo Bonzini     }
53554265d620SPaolo Bonzini 
53564265d620SPaolo Bonzini     return rwco.ret;
53574265d620SPaolo Bonzini }
53584265d620SPaolo Bonzini 
5359f9f05dc5SKevin Wolf /**************************************************************/
536019cb3738Sbellard /* removable device support */
536119cb3738Sbellard 
536219cb3738Sbellard /**
536319cb3738Sbellard  * Return TRUE if the media is present
536419cb3738Sbellard  */
536519cb3738Sbellard int bdrv_is_inserted(BlockDriverState *bs)
536619cb3738Sbellard {
536719cb3738Sbellard     BlockDriver *drv = bs->drv;
5368a1aff5bfSMarkus Armbruster 
536919cb3738Sbellard     if (!drv)
537019cb3738Sbellard         return 0;
537119cb3738Sbellard     if (!drv->bdrv_is_inserted)
5372a1aff5bfSMarkus Armbruster         return 1;
5373a1aff5bfSMarkus Armbruster     return drv->bdrv_is_inserted(bs);
537419cb3738Sbellard }
537519cb3738Sbellard 
537619cb3738Sbellard /**
53778e49ca46SMarkus Armbruster  * Return whether the media changed since the last call to this
53788e49ca46SMarkus Armbruster  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
537919cb3738Sbellard  */
538019cb3738Sbellard int bdrv_media_changed(BlockDriverState *bs)
538119cb3738Sbellard {
538219cb3738Sbellard     BlockDriver *drv = bs->drv;
538319cb3738Sbellard 
53848e49ca46SMarkus Armbruster     if (drv && drv->bdrv_media_changed) {
53858e49ca46SMarkus Armbruster         return drv->bdrv_media_changed(bs);
53868e49ca46SMarkus Armbruster     }
53878e49ca46SMarkus Armbruster     return -ENOTSUP;
538819cb3738Sbellard }
538919cb3738Sbellard 
539019cb3738Sbellard /**
539119cb3738Sbellard  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
539219cb3738Sbellard  */
5393f36f3949SLuiz Capitulino void bdrv_eject(BlockDriverState *bs, bool eject_flag)
539419cb3738Sbellard {
539519cb3738Sbellard     BlockDriver *drv = bs->drv;
5396bfb197e0SMarkus Armbruster     const char *device_name;
539719cb3738Sbellard 
5398822e1cd1SMarkus Armbruster     if (drv && drv->bdrv_eject) {
5399822e1cd1SMarkus Armbruster         drv->bdrv_eject(bs, eject_flag);
540019cb3738Sbellard     }
54016f382ed2SLuiz Capitulino 
5402bfb197e0SMarkus Armbruster     device_name = bdrv_get_device_name(bs);
5403bfb197e0SMarkus Armbruster     if (device_name[0] != '\0') {
5404bfb197e0SMarkus Armbruster         qapi_event_send_device_tray_moved(device_name,
5405a5ee7bd4SWenchao Xia                                           eject_flag, &error_abort);
54066f382ed2SLuiz Capitulino     }
540719cb3738Sbellard }
540819cb3738Sbellard 
540919cb3738Sbellard /**
541019cb3738Sbellard  * Lock or unlock the media (if it is locked, the user won't be able
541119cb3738Sbellard  * to eject it manually).
541219cb3738Sbellard  */
5413025e849aSMarkus Armbruster void bdrv_lock_medium(BlockDriverState *bs, bool locked)
541419cb3738Sbellard {
541519cb3738Sbellard     BlockDriver *drv = bs->drv;
541619cb3738Sbellard 
5417025e849aSMarkus Armbruster     trace_bdrv_lock_medium(bs, locked);
5418b8c6d095SStefan Hajnoczi 
5419025e849aSMarkus Armbruster     if (drv && drv->bdrv_lock_medium) {
5420025e849aSMarkus Armbruster         drv->bdrv_lock_medium(bs, locked);
542119cb3738Sbellard     }
542219cb3738Sbellard }
5423985a03b0Sths 
5424985a03b0Sths /* needed for generic scsi interface */
5425985a03b0Sths 
5426985a03b0Sths int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5427985a03b0Sths {
5428985a03b0Sths     BlockDriver *drv = bs->drv;
5429985a03b0Sths 
5430985a03b0Sths     if (drv && drv->bdrv_ioctl)
5431985a03b0Sths         return drv->bdrv_ioctl(bs, req, buf);
5432985a03b0Sths     return -ENOTSUP;
5433985a03b0Sths }
54347d780669Saliguori 
54357c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5436221f715dSaliguori         unsigned long int req, void *buf,
5437097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
54387d780669Saliguori {
5439221f715dSaliguori     BlockDriver *drv = bs->drv;
54407d780669Saliguori 
5441221f715dSaliguori     if (drv && drv->bdrv_aio_ioctl)
5442221f715dSaliguori         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5443221f715dSaliguori     return NULL;
54447d780669Saliguori }
5445e268ca52Saliguori 
54461b7fd729SPaolo Bonzini void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
54477b6f9300SMarkus Armbruster {
54481b7fd729SPaolo Bonzini     bs->guest_block_size = align;
54497b6f9300SMarkus Armbruster }
54507cd1e32aSlirans@il.ibm.com 
5451e268ca52Saliguori void *qemu_blockalign(BlockDriverState *bs, size_t size)
5452e268ca52Saliguori {
5453339064d5SKevin Wolf     return qemu_memalign(bdrv_opt_mem_align(bs), size);
5454e268ca52Saliguori }
54557cd1e32aSlirans@il.ibm.com 
54569ebd8448SMax Reitz void *qemu_blockalign0(BlockDriverState *bs, size_t size)
54579ebd8448SMax Reitz {
54589ebd8448SMax Reitz     return memset(qemu_blockalign(bs, size), 0, size);
54599ebd8448SMax Reitz }
54609ebd8448SMax Reitz 
54617d2a35ccSKevin Wolf void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
54627d2a35ccSKevin Wolf {
54637d2a35ccSKevin Wolf     size_t align = bdrv_opt_mem_align(bs);
54647d2a35ccSKevin Wolf 
54657d2a35ccSKevin Wolf     /* Ensure that NULL is never returned on success */
54667d2a35ccSKevin Wolf     assert(align > 0);
54677d2a35ccSKevin Wolf     if (size == 0) {
54687d2a35ccSKevin Wolf         size = align;
54697d2a35ccSKevin Wolf     }
54707d2a35ccSKevin Wolf 
54717d2a35ccSKevin Wolf     return qemu_try_memalign(align, size);
54727d2a35ccSKevin Wolf }
54737d2a35ccSKevin Wolf 
54749ebd8448SMax Reitz void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
54759ebd8448SMax Reitz {
54769ebd8448SMax Reitz     void *mem = qemu_try_blockalign(bs, size);
54779ebd8448SMax Reitz 
54789ebd8448SMax Reitz     if (mem) {
54799ebd8448SMax Reitz         memset(mem, 0, size);
54809ebd8448SMax Reitz     }
54819ebd8448SMax Reitz 
54829ebd8448SMax Reitz     return mem;
54839ebd8448SMax Reitz }
54849ebd8448SMax Reitz 
5485c53b1c51SStefan Hajnoczi /*
5486c53b1c51SStefan Hajnoczi  * Check if all memory in this vector is sector aligned.
5487c53b1c51SStefan Hajnoczi  */
5488c53b1c51SStefan Hajnoczi bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5489c53b1c51SStefan Hajnoczi {
5490c53b1c51SStefan Hajnoczi     int i;
5491339064d5SKevin Wolf     size_t alignment = bdrv_opt_mem_align(bs);
5492c53b1c51SStefan Hajnoczi 
5493c53b1c51SStefan Hajnoczi     for (i = 0; i < qiov->niov; i++) {
5494339064d5SKevin Wolf         if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
5495c53b1c51SStefan Hajnoczi             return false;
5496c53b1c51SStefan Hajnoczi         }
5497339064d5SKevin Wolf         if (qiov->iov[i].iov_len % alignment) {
54981ff735bdSKevin Wolf             return false;
54991ff735bdSKevin Wolf         }
5500c53b1c51SStefan Hajnoczi     }
5501c53b1c51SStefan Hajnoczi 
5502c53b1c51SStefan Hajnoczi     return true;
5503c53b1c51SStefan Hajnoczi }
5504c53b1c51SStefan Hajnoczi 
55050db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
55060db6e54aSFam Zheng {
55070db6e54aSFam Zheng     BdrvDirtyBitmap *bm;
55080db6e54aSFam Zheng 
55090db6e54aSFam Zheng     assert(name);
55100db6e54aSFam Zheng     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
55110db6e54aSFam Zheng         if (bm->name && !strcmp(name, bm->name)) {
55120db6e54aSFam Zheng             return bm;
55130db6e54aSFam Zheng         }
55140db6e54aSFam Zheng     }
55150db6e54aSFam Zheng     return NULL;
55160db6e54aSFam Zheng }
55170db6e54aSFam Zheng 
55180db6e54aSFam Zheng void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
55190db6e54aSFam Zheng {
55200db6e54aSFam Zheng     g_free(bitmap->name);
55210db6e54aSFam Zheng     bitmap->name = NULL;
55220db6e54aSFam Zheng }
55230db6e54aSFam Zheng 
55240db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
55255fba6c0eSJohn Snow                                           uint32_t granularity,
55260db6e54aSFam Zheng                                           const char *name,
5527b8afb520SFam Zheng                                           Error **errp)
55287cd1e32aSlirans@il.ibm.com {
55297cd1e32aSlirans@il.ibm.com     int64_t bitmap_size;
5530e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
55315fba6c0eSJohn Snow     uint32_t sector_granularity;
5532a55eb92cSJan Kiszka 
553350717e94SPaolo Bonzini     assert((granularity & (granularity - 1)) == 0);
553450717e94SPaolo Bonzini 
55350db6e54aSFam Zheng     if (name && bdrv_find_dirty_bitmap(bs, name)) {
55360db6e54aSFam Zheng         error_setg(errp, "Bitmap already exists: %s", name);
55370db6e54aSFam Zheng         return NULL;
55380db6e54aSFam Zheng     }
55395fba6c0eSJohn Snow     sector_granularity = granularity >> BDRV_SECTOR_BITS;
55405fba6c0eSJohn Snow     assert(sector_granularity);
554157322b78SMarkus Armbruster     bitmap_size = bdrv_nb_sectors(bs);
5542b8afb520SFam Zheng     if (bitmap_size < 0) {
5543b8afb520SFam Zheng         error_setg_errno(errp, -bitmap_size, "could not get length of device");
5544b8afb520SFam Zheng         errno = -bitmap_size;
5545b8afb520SFam Zheng         return NULL;
5546b8afb520SFam Zheng     }
55475839e53bSMarkus Armbruster     bitmap = g_new0(BdrvDirtyBitmap, 1);
55485fba6c0eSJohn Snow     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
55490db6e54aSFam Zheng     bitmap->name = g_strdup(name);
5550e4654d2dSFam Zheng     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5551e4654d2dSFam Zheng     return bitmap;
5552e4654d2dSFam Zheng }
5553e4654d2dSFam Zheng 
5554e4654d2dSFam Zheng void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5555e4654d2dSFam Zheng {
5556e4654d2dSFam Zheng     BdrvDirtyBitmap *bm, *next;
5557e4654d2dSFam Zheng     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5558e4654d2dSFam Zheng         if (bm == bitmap) {
5559e4654d2dSFam Zheng             QLIST_REMOVE(bitmap, list);
5560e4654d2dSFam Zheng             hbitmap_free(bitmap->bitmap);
55610db6e54aSFam Zheng             g_free(bitmap->name);
5562e4654d2dSFam Zheng             g_free(bitmap);
5563e4654d2dSFam Zheng             return;
55647cd1e32aSlirans@il.ibm.com         }
55657cd1e32aSlirans@il.ibm.com     }
55667cd1e32aSlirans@il.ibm.com }
55677cd1e32aSlirans@il.ibm.com 
556821b56835SFam Zheng BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
556921b56835SFam Zheng {
557021b56835SFam Zheng     BdrvDirtyBitmap *bm;
557121b56835SFam Zheng     BlockDirtyInfoList *list = NULL;
557221b56835SFam Zheng     BlockDirtyInfoList **plist = &list;
557321b56835SFam Zheng 
557421b56835SFam Zheng     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
55755839e53bSMarkus Armbruster         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
55765839e53bSMarkus Armbruster         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
557721b56835SFam Zheng         info->count = bdrv_get_dirty_count(bs, bm);
557821b56835SFam Zheng         info->granularity =
55795fba6c0eSJohn Snow             ((uint32_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
55800db6e54aSFam Zheng         info->has_name = !!bm->name;
55810db6e54aSFam Zheng         info->name = g_strdup(bm->name);
558221b56835SFam Zheng         entry->value = info;
558321b56835SFam Zheng         *plist = entry;
558421b56835SFam Zheng         plist = &entry->next;
558521b56835SFam Zheng     }
558621b56835SFam Zheng 
558721b56835SFam Zheng     return list;
558821b56835SFam Zheng }
558921b56835SFam Zheng 
5590e4654d2dSFam Zheng int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
55917cd1e32aSlirans@il.ibm.com {
5592e4654d2dSFam Zheng     if (bitmap) {
5593e4654d2dSFam Zheng         return hbitmap_get(bitmap->bitmap, sector);
55947cd1e32aSlirans@il.ibm.com     } else {
55957cd1e32aSlirans@il.ibm.com         return 0;
55967cd1e32aSlirans@il.ibm.com     }
55977cd1e32aSlirans@il.ibm.com }
55987cd1e32aSlirans@il.ibm.com 
5599*341ebc2fSJohn Snow /**
5600*341ebc2fSJohn Snow  * Chooses a default granularity based on the existing cluster size,
5601*341ebc2fSJohn Snow  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
5602*341ebc2fSJohn Snow  * is no cluster size information available.
5603*341ebc2fSJohn Snow  */
5604*341ebc2fSJohn Snow uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
5605*341ebc2fSJohn Snow {
5606*341ebc2fSJohn Snow     BlockDriverInfo bdi;
5607*341ebc2fSJohn Snow     uint32_t granularity;
5608*341ebc2fSJohn Snow 
5609*341ebc2fSJohn Snow     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
5610*341ebc2fSJohn Snow         granularity = MAX(4096, bdi.cluster_size);
5611*341ebc2fSJohn Snow         granularity = MIN(65536, granularity);
5612*341ebc2fSJohn Snow     } else {
5613*341ebc2fSJohn Snow         granularity = 65536;
5614*341ebc2fSJohn Snow     }
5615*341ebc2fSJohn Snow 
5616*341ebc2fSJohn Snow     return granularity;
5617*341ebc2fSJohn Snow }
5618*341ebc2fSJohn Snow 
5619e4654d2dSFam Zheng void bdrv_dirty_iter_init(BlockDriverState *bs,
5620e4654d2dSFam Zheng                           BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
56211755da16SPaolo Bonzini {
5622e4654d2dSFam Zheng     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
56231755da16SPaolo Bonzini }
56241755da16SPaolo Bonzini 
5625c4237dfaSVladimir Sementsov-Ogievskiy void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5626c4237dfaSVladimir Sementsov-Ogievskiy                            int64_t cur_sector, int nr_sectors)
5627c4237dfaSVladimir Sementsov-Ogievskiy {
5628c4237dfaSVladimir Sementsov-Ogievskiy     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5629c4237dfaSVladimir Sementsov-Ogievskiy }
5630c4237dfaSVladimir Sementsov-Ogievskiy 
5631c4237dfaSVladimir Sementsov-Ogievskiy void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5632c4237dfaSVladimir Sementsov-Ogievskiy                              int64_t cur_sector, int nr_sectors)
5633c4237dfaSVladimir Sementsov-Ogievskiy {
5634c4237dfaSVladimir Sementsov-Ogievskiy     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5635c4237dfaSVladimir Sementsov-Ogievskiy }
5636c4237dfaSVladimir Sementsov-Ogievskiy 
5637c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
56381755da16SPaolo Bonzini                            int nr_sectors)
56391755da16SPaolo Bonzini {
5640e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
5641e4654d2dSFam Zheng     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5642e4654d2dSFam Zheng         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5643e4654d2dSFam Zheng     }
56441755da16SPaolo Bonzini }
56451755da16SPaolo Bonzini 
5646c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
5647c4237dfaSVladimir Sementsov-Ogievskiy                              int nr_sectors)
56487cd1e32aSlirans@il.ibm.com {
5649e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
5650e4654d2dSFam Zheng     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5651e4654d2dSFam Zheng         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5652e4654d2dSFam Zheng     }
56537cd1e32aSlirans@il.ibm.com }
5654aaa0eb75SLiran Schour 
5655e4654d2dSFam Zheng int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5656aaa0eb75SLiran Schour {
5657e4654d2dSFam Zheng     return hbitmap_count(bitmap->bitmap);
5658aaa0eb75SLiran Schour }
5659f88e1a42SJes Sorensen 
56609fcb0251SFam Zheng /* Get a reference to bs */
56619fcb0251SFam Zheng void bdrv_ref(BlockDriverState *bs)
56629fcb0251SFam Zheng {
56639fcb0251SFam Zheng     bs->refcnt++;
56649fcb0251SFam Zheng }
56659fcb0251SFam Zheng 
56669fcb0251SFam Zheng /* Release a previously grabbed reference to bs.
56679fcb0251SFam Zheng  * If after releasing, reference count is zero, the BlockDriverState is
56689fcb0251SFam Zheng  * deleted. */
56699fcb0251SFam Zheng void bdrv_unref(BlockDriverState *bs)
56709fcb0251SFam Zheng {
56719a4d5ca6SJeff Cody     if (!bs) {
56729a4d5ca6SJeff Cody         return;
56739a4d5ca6SJeff Cody     }
56749fcb0251SFam Zheng     assert(bs->refcnt > 0);
56759fcb0251SFam Zheng     if (--bs->refcnt == 0) {
56769fcb0251SFam Zheng         bdrv_delete(bs);
56779fcb0251SFam Zheng     }
56789fcb0251SFam Zheng }
56799fcb0251SFam Zheng 
5680fbe40ff7SFam Zheng struct BdrvOpBlocker {
5681fbe40ff7SFam Zheng     Error *reason;
5682fbe40ff7SFam Zheng     QLIST_ENTRY(BdrvOpBlocker) list;
5683fbe40ff7SFam Zheng };
5684fbe40ff7SFam Zheng 
5685fbe40ff7SFam Zheng bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5686fbe40ff7SFam Zheng {
5687fbe40ff7SFam Zheng     BdrvOpBlocker *blocker;
5688fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5689fbe40ff7SFam Zheng     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5690fbe40ff7SFam Zheng         blocker = QLIST_FIRST(&bs->op_blockers[op]);
5691fbe40ff7SFam Zheng         if (errp) {
569281e5f78aSAlberto Garcia             error_setg(errp, "Node '%s' is busy: %s",
569381e5f78aSAlberto Garcia                        bdrv_get_device_or_node_name(bs),
5694bfb197e0SMarkus Armbruster                        error_get_pretty(blocker->reason));
5695fbe40ff7SFam Zheng         }
5696fbe40ff7SFam Zheng         return true;
5697fbe40ff7SFam Zheng     }
5698fbe40ff7SFam Zheng     return false;
5699fbe40ff7SFam Zheng }
5700fbe40ff7SFam Zheng 
5701fbe40ff7SFam Zheng void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5702fbe40ff7SFam Zheng {
5703fbe40ff7SFam Zheng     BdrvOpBlocker *blocker;
5704fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5705fbe40ff7SFam Zheng 
57065839e53bSMarkus Armbruster     blocker = g_new0(BdrvOpBlocker, 1);
5707fbe40ff7SFam Zheng     blocker->reason = reason;
5708fbe40ff7SFam Zheng     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5709fbe40ff7SFam Zheng }
5710fbe40ff7SFam Zheng 
5711fbe40ff7SFam Zheng void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5712fbe40ff7SFam Zheng {
5713fbe40ff7SFam Zheng     BdrvOpBlocker *blocker, *next;
5714fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5715fbe40ff7SFam Zheng     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5716fbe40ff7SFam Zheng         if (blocker->reason == reason) {
5717fbe40ff7SFam Zheng             QLIST_REMOVE(blocker, list);
5718fbe40ff7SFam Zheng             g_free(blocker);
5719fbe40ff7SFam Zheng         }
5720fbe40ff7SFam Zheng     }
5721fbe40ff7SFam Zheng }
5722fbe40ff7SFam Zheng 
5723fbe40ff7SFam Zheng void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5724fbe40ff7SFam Zheng {
5725fbe40ff7SFam Zheng     int i;
5726fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5727fbe40ff7SFam Zheng         bdrv_op_block(bs, i, reason);
5728fbe40ff7SFam Zheng     }
5729fbe40ff7SFam Zheng }
5730fbe40ff7SFam Zheng 
5731fbe40ff7SFam Zheng void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5732fbe40ff7SFam Zheng {
5733fbe40ff7SFam Zheng     int i;
5734fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5735fbe40ff7SFam Zheng         bdrv_op_unblock(bs, i, reason);
5736fbe40ff7SFam Zheng     }
5737fbe40ff7SFam Zheng }
5738fbe40ff7SFam Zheng 
5739fbe40ff7SFam Zheng bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5740fbe40ff7SFam Zheng {
5741fbe40ff7SFam Zheng     int i;
5742fbe40ff7SFam Zheng 
5743fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5744fbe40ff7SFam Zheng         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5745fbe40ff7SFam Zheng             return false;
5746fbe40ff7SFam Zheng         }
5747fbe40ff7SFam Zheng     }
5748fbe40ff7SFam Zheng     return true;
5749fbe40ff7SFam Zheng }
5750fbe40ff7SFam Zheng 
575128a7282aSLuiz Capitulino void bdrv_iostatus_enable(BlockDriverState *bs)
575228a7282aSLuiz Capitulino {
5753d6bf279eSLuiz Capitulino     bs->iostatus_enabled = true;
575458e21ef5SLuiz Capitulino     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
575528a7282aSLuiz Capitulino }
575628a7282aSLuiz Capitulino 
575728a7282aSLuiz Capitulino /* The I/O status is only enabled if the drive explicitly
575828a7282aSLuiz Capitulino  * enables it _and_ the VM is configured to stop on errors */
575928a7282aSLuiz Capitulino bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
576028a7282aSLuiz Capitulino {
5761d6bf279eSLuiz Capitulino     return (bs->iostatus_enabled &&
576292aa5c6dSPaolo Bonzini            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
576392aa5c6dSPaolo Bonzini             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
576492aa5c6dSPaolo Bonzini             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
576528a7282aSLuiz Capitulino }
576628a7282aSLuiz Capitulino 
576728a7282aSLuiz Capitulino void bdrv_iostatus_disable(BlockDriverState *bs)
576828a7282aSLuiz Capitulino {
5769d6bf279eSLuiz Capitulino     bs->iostatus_enabled = false;
577028a7282aSLuiz Capitulino }
577128a7282aSLuiz Capitulino 
577228a7282aSLuiz Capitulino void bdrv_iostatus_reset(BlockDriverState *bs)
577328a7282aSLuiz Capitulino {
577428a7282aSLuiz Capitulino     if (bdrv_iostatus_is_enabled(bs)) {
577558e21ef5SLuiz Capitulino         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
57763bd293c3SPaolo Bonzini         if (bs->job) {
57773bd293c3SPaolo Bonzini             block_job_iostatus_reset(bs->job);
57783bd293c3SPaolo Bonzini         }
577928a7282aSLuiz Capitulino     }
578028a7282aSLuiz Capitulino }
578128a7282aSLuiz Capitulino 
578228a7282aSLuiz Capitulino void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
578328a7282aSLuiz Capitulino {
57843e1caa5fSPaolo Bonzini     assert(bdrv_iostatus_is_enabled(bs));
57853e1caa5fSPaolo Bonzini     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
578658e21ef5SLuiz Capitulino         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
578758e21ef5SLuiz Capitulino                                          BLOCK_DEVICE_IO_STATUS_FAILED;
578828a7282aSLuiz Capitulino     }
578928a7282aSLuiz Capitulino }
579028a7282aSLuiz Capitulino 
5791d92ada22SLuiz Capitulino void bdrv_img_create(const char *filename, const char *fmt,
5792f88e1a42SJes Sorensen                      const char *base_filename, const char *base_fmt,
5793f382d43aSMiroslav Rezanina                      char *options, uint64_t img_size, int flags,
5794f382d43aSMiroslav Rezanina                      Error **errp, bool quiet)
5795f88e1a42SJes Sorensen {
579683d0521aSChunyan Liu     QemuOptsList *create_opts = NULL;
579783d0521aSChunyan Liu     QemuOpts *opts = NULL;
579883d0521aSChunyan Liu     const char *backing_fmt, *backing_file;
579983d0521aSChunyan Liu     int64_t size;
5800f88e1a42SJes Sorensen     BlockDriver *drv, *proto_drv;
580196df67d1SStefan Hajnoczi     BlockDriver *backing_drv = NULL;
5802cc84d90fSMax Reitz     Error *local_err = NULL;
5803f88e1a42SJes Sorensen     int ret = 0;
5804f88e1a42SJes Sorensen 
5805f88e1a42SJes Sorensen     /* Find driver and parse its options */
5806f88e1a42SJes Sorensen     drv = bdrv_find_format(fmt);
5807f88e1a42SJes Sorensen     if (!drv) {
580871c79813SLuiz Capitulino         error_setg(errp, "Unknown file format '%s'", fmt);
5809d92ada22SLuiz Capitulino         return;
5810f88e1a42SJes Sorensen     }
5811f88e1a42SJes Sorensen 
5812b65a5e12SMax Reitz     proto_drv = bdrv_find_protocol(filename, true, errp);
5813f88e1a42SJes Sorensen     if (!proto_drv) {
5814d92ada22SLuiz Capitulino         return;
5815f88e1a42SJes Sorensen     }
5816f88e1a42SJes Sorensen 
5817c6149724SMax Reitz     if (!drv->create_opts) {
5818c6149724SMax Reitz         error_setg(errp, "Format driver '%s' does not support image creation",
5819c6149724SMax Reitz                    drv->format_name);
5820c6149724SMax Reitz         return;
5821c6149724SMax Reitz     }
5822c6149724SMax Reitz 
5823c6149724SMax Reitz     if (!proto_drv->create_opts) {
5824c6149724SMax Reitz         error_setg(errp, "Protocol driver '%s' does not support image creation",
5825c6149724SMax Reitz                    proto_drv->format_name);
5826c6149724SMax Reitz         return;
5827c6149724SMax Reitz     }
5828c6149724SMax Reitz 
5829c282e1fdSChunyan Liu     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5830c282e1fdSChunyan Liu     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5831f88e1a42SJes Sorensen 
5832f88e1a42SJes Sorensen     /* Create parameter list with default values */
583383d0521aSChunyan Liu     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
583439101f25SMarkus Armbruster     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5835f88e1a42SJes Sorensen 
5836f88e1a42SJes Sorensen     /* Parse -o options */
5837f88e1a42SJes Sorensen     if (options) {
5838dc523cd3SMarkus Armbruster         qemu_opts_do_parse(opts, options, NULL, &local_err);
5839dc523cd3SMarkus Armbruster         if (local_err) {
5840dc523cd3SMarkus Armbruster             error_report_err(local_err);
5841dc523cd3SMarkus Armbruster             local_err = NULL;
584283d0521aSChunyan Liu             error_setg(errp, "Invalid options for file format '%s'", fmt);
5843f88e1a42SJes Sorensen             goto out;
5844f88e1a42SJes Sorensen         }
5845f88e1a42SJes Sorensen     }
5846f88e1a42SJes Sorensen 
5847f88e1a42SJes Sorensen     if (base_filename) {
5848f43e47dbSMarkus Armbruster         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
58496be4194bSMarkus Armbruster         if (local_err) {
585071c79813SLuiz Capitulino             error_setg(errp, "Backing file not supported for file format '%s'",
585171c79813SLuiz Capitulino                        fmt);
5852f88e1a42SJes Sorensen             goto out;
5853f88e1a42SJes Sorensen         }
5854f88e1a42SJes Sorensen     }
5855f88e1a42SJes Sorensen 
5856f88e1a42SJes Sorensen     if (base_fmt) {
5857f43e47dbSMarkus Armbruster         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
58586be4194bSMarkus Armbruster         if (local_err) {
585971c79813SLuiz Capitulino             error_setg(errp, "Backing file format not supported for file "
586071c79813SLuiz Capitulino                              "format '%s'", fmt);
5861f88e1a42SJes Sorensen             goto out;
5862f88e1a42SJes Sorensen         }
5863f88e1a42SJes Sorensen     }
5864f88e1a42SJes Sorensen 
586583d0521aSChunyan Liu     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
586683d0521aSChunyan Liu     if (backing_file) {
586783d0521aSChunyan Liu         if (!strcmp(filename, backing_file)) {
586871c79813SLuiz Capitulino             error_setg(errp, "Error: Trying to create an image with the "
586971c79813SLuiz Capitulino                              "same filename as the backing file");
5870792da93aSJes Sorensen             goto out;
5871792da93aSJes Sorensen         }
5872792da93aSJes Sorensen     }
5873792da93aSJes Sorensen 
587483d0521aSChunyan Liu     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
587583d0521aSChunyan Liu     if (backing_fmt) {
587683d0521aSChunyan Liu         backing_drv = bdrv_find_format(backing_fmt);
587796df67d1SStefan Hajnoczi         if (!backing_drv) {
587871c79813SLuiz Capitulino             error_setg(errp, "Unknown backing file format '%s'",
587983d0521aSChunyan Liu                        backing_fmt);
5880f88e1a42SJes Sorensen             goto out;
5881f88e1a42SJes Sorensen         }
5882f88e1a42SJes Sorensen     }
5883f88e1a42SJes Sorensen 
5884f88e1a42SJes Sorensen     // The size for the image must always be specified, with one exception:
5885f88e1a42SJes Sorensen     // If we are using a backing file, we can obtain the size from there
588683d0521aSChunyan Liu     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
588783d0521aSChunyan Liu     if (size == -1) {
588883d0521aSChunyan Liu         if (backing_file) {
588966f6b814SMax Reitz             BlockDriverState *bs;
589029168018SMax Reitz             char *full_backing = g_new0(char, PATH_MAX);
589152bf1e72SMarkus Armbruster             int64_t size;
589263090dacSPaolo Bonzini             int back_flags;
589363090dacSPaolo Bonzini 
589429168018SMax Reitz             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
589529168018SMax Reitz                                                          full_backing, PATH_MAX,
589629168018SMax Reitz                                                          &local_err);
589729168018SMax Reitz             if (local_err) {
589829168018SMax Reitz                 g_free(full_backing);
589929168018SMax Reitz                 goto out;
590029168018SMax Reitz             }
590129168018SMax Reitz 
590263090dacSPaolo Bonzini             /* backing files always opened read-only */
590363090dacSPaolo Bonzini             back_flags =
590463090dacSPaolo Bonzini                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
5905f88e1a42SJes Sorensen 
5906f67503e5SMax Reitz             bs = NULL;
590729168018SMax Reitz             ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
5908cc84d90fSMax Reitz                             backing_drv, &local_err);
590929168018SMax Reitz             g_free(full_backing);
5910f88e1a42SJes Sorensen             if (ret < 0) {
5911f88e1a42SJes Sorensen                 goto out;
5912f88e1a42SJes Sorensen             }
591352bf1e72SMarkus Armbruster             size = bdrv_getlength(bs);
591452bf1e72SMarkus Armbruster             if (size < 0) {
591552bf1e72SMarkus Armbruster                 error_setg_errno(errp, -size, "Could not get size of '%s'",
591652bf1e72SMarkus Armbruster                                  backing_file);
591752bf1e72SMarkus Armbruster                 bdrv_unref(bs);
591852bf1e72SMarkus Armbruster                 goto out;
591952bf1e72SMarkus Armbruster             }
5920f88e1a42SJes Sorensen 
592139101f25SMarkus Armbruster             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
592266f6b814SMax Reitz 
592366f6b814SMax Reitz             bdrv_unref(bs);
5924f88e1a42SJes Sorensen         } else {
592571c79813SLuiz Capitulino             error_setg(errp, "Image creation needs a size parameter");
5926f88e1a42SJes Sorensen             goto out;
5927f88e1a42SJes Sorensen         }
5928f88e1a42SJes Sorensen     }
5929f88e1a42SJes Sorensen 
5930f382d43aSMiroslav Rezanina     if (!quiet) {
5931f88e1a42SJes Sorensen         printf("Formatting '%s', fmt=%s", filename, fmt);
593243c5d8f8SFam Zheng         qemu_opts_print(opts, " ");
5933f88e1a42SJes Sorensen         puts("");
5934f382d43aSMiroslav Rezanina     }
593583d0521aSChunyan Liu 
5936c282e1fdSChunyan Liu     ret = bdrv_create(drv, filename, opts, &local_err);
593783d0521aSChunyan Liu 
5938cc84d90fSMax Reitz     if (ret == -EFBIG) {
5939cc84d90fSMax Reitz         /* This is generally a better message than whatever the driver would
5940cc84d90fSMax Reitz          * deliver (especially because of the cluster_size_hint), since that
5941cc84d90fSMax Reitz          * is most probably not much different from "image too large". */
5942f3f4d2c0SKevin Wolf         const char *cluster_size_hint = "";
594383d0521aSChunyan Liu         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
5944f3f4d2c0SKevin Wolf             cluster_size_hint = " (try using a larger cluster size)";
5945f3f4d2c0SKevin Wolf         }
5946cc84d90fSMax Reitz         error_setg(errp, "The image size is too large for file format '%s'"
5947cc84d90fSMax Reitz                    "%s", fmt, cluster_size_hint);
5948cc84d90fSMax Reitz         error_free(local_err);
5949cc84d90fSMax Reitz         local_err = NULL;
5950f88e1a42SJes Sorensen     }
5951f88e1a42SJes Sorensen 
5952f88e1a42SJes Sorensen out:
595383d0521aSChunyan Liu     qemu_opts_del(opts);
595483d0521aSChunyan Liu     qemu_opts_free(create_opts);
595584d18f06SMarkus Armbruster     if (local_err) {
5956cc84d90fSMax Reitz         error_propagate(errp, local_err);
5957cc84d90fSMax Reitz     }
5958f88e1a42SJes Sorensen }
595985d126f3SStefan Hajnoczi 
596085d126f3SStefan Hajnoczi AioContext *bdrv_get_aio_context(BlockDriverState *bs)
596185d126f3SStefan Hajnoczi {
5962dcd04228SStefan Hajnoczi     return bs->aio_context;
5963dcd04228SStefan Hajnoczi }
5964dcd04228SStefan Hajnoczi 
5965dcd04228SStefan Hajnoczi void bdrv_detach_aio_context(BlockDriverState *bs)
5966dcd04228SStefan Hajnoczi {
596733384421SMax Reitz     BdrvAioNotifier *baf;
596833384421SMax Reitz 
5969dcd04228SStefan Hajnoczi     if (!bs->drv) {
5970dcd04228SStefan Hajnoczi         return;
5971dcd04228SStefan Hajnoczi     }
5972dcd04228SStefan Hajnoczi 
597333384421SMax Reitz     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
597433384421SMax Reitz         baf->detach_aio_context(baf->opaque);
597533384421SMax Reitz     }
597633384421SMax Reitz 
597713af91ebSStefan Hajnoczi     if (bs->io_limits_enabled) {
597813af91ebSStefan Hajnoczi         throttle_detach_aio_context(&bs->throttle_state);
597913af91ebSStefan Hajnoczi     }
5980dcd04228SStefan Hajnoczi     if (bs->drv->bdrv_detach_aio_context) {
5981dcd04228SStefan Hajnoczi         bs->drv->bdrv_detach_aio_context(bs);
5982dcd04228SStefan Hajnoczi     }
5983dcd04228SStefan Hajnoczi     if (bs->file) {
5984dcd04228SStefan Hajnoczi         bdrv_detach_aio_context(bs->file);
5985dcd04228SStefan Hajnoczi     }
5986dcd04228SStefan Hajnoczi     if (bs->backing_hd) {
5987dcd04228SStefan Hajnoczi         bdrv_detach_aio_context(bs->backing_hd);
5988dcd04228SStefan Hajnoczi     }
5989dcd04228SStefan Hajnoczi 
5990dcd04228SStefan Hajnoczi     bs->aio_context = NULL;
5991dcd04228SStefan Hajnoczi }
5992dcd04228SStefan Hajnoczi 
5993dcd04228SStefan Hajnoczi void bdrv_attach_aio_context(BlockDriverState *bs,
5994dcd04228SStefan Hajnoczi                              AioContext *new_context)
5995dcd04228SStefan Hajnoczi {
599633384421SMax Reitz     BdrvAioNotifier *ban;
599733384421SMax Reitz 
5998dcd04228SStefan Hajnoczi     if (!bs->drv) {
5999dcd04228SStefan Hajnoczi         return;
6000dcd04228SStefan Hajnoczi     }
6001dcd04228SStefan Hajnoczi 
6002dcd04228SStefan Hajnoczi     bs->aio_context = new_context;
6003dcd04228SStefan Hajnoczi 
6004dcd04228SStefan Hajnoczi     if (bs->backing_hd) {
6005dcd04228SStefan Hajnoczi         bdrv_attach_aio_context(bs->backing_hd, new_context);
6006dcd04228SStefan Hajnoczi     }
6007dcd04228SStefan Hajnoczi     if (bs->file) {
6008dcd04228SStefan Hajnoczi         bdrv_attach_aio_context(bs->file, new_context);
6009dcd04228SStefan Hajnoczi     }
6010dcd04228SStefan Hajnoczi     if (bs->drv->bdrv_attach_aio_context) {
6011dcd04228SStefan Hajnoczi         bs->drv->bdrv_attach_aio_context(bs, new_context);
6012dcd04228SStefan Hajnoczi     }
601313af91ebSStefan Hajnoczi     if (bs->io_limits_enabled) {
601413af91ebSStefan Hajnoczi         throttle_attach_aio_context(&bs->throttle_state, new_context);
601513af91ebSStefan Hajnoczi     }
601633384421SMax Reitz 
601733384421SMax Reitz     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
601833384421SMax Reitz         ban->attached_aio_context(new_context, ban->opaque);
601933384421SMax Reitz     }
6020dcd04228SStefan Hajnoczi }
6021dcd04228SStefan Hajnoczi 
6022dcd04228SStefan Hajnoczi void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
6023dcd04228SStefan Hajnoczi {
6024dcd04228SStefan Hajnoczi     bdrv_drain_all(); /* ensure there are no in-flight requests */
6025dcd04228SStefan Hajnoczi 
6026dcd04228SStefan Hajnoczi     bdrv_detach_aio_context(bs);
6027dcd04228SStefan Hajnoczi 
6028dcd04228SStefan Hajnoczi     /* This function executes in the old AioContext so acquire the new one in
6029dcd04228SStefan Hajnoczi      * case it runs in a different thread.
6030dcd04228SStefan Hajnoczi      */
6031dcd04228SStefan Hajnoczi     aio_context_acquire(new_context);
6032dcd04228SStefan Hajnoczi     bdrv_attach_aio_context(bs, new_context);
6033dcd04228SStefan Hajnoczi     aio_context_release(new_context);
603485d126f3SStefan Hajnoczi }
6035d616b224SStefan Hajnoczi 
603633384421SMax Reitz void bdrv_add_aio_context_notifier(BlockDriverState *bs,
603733384421SMax Reitz         void (*attached_aio_context)(AioContext *new_context, void *opaque),
603833384421SMax Reitz         void (*detach_aio_context)(void *opaque), void *opaque)
603933384421SMax Reitz {
604033384421SMax Reitz     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
604133384421SMax Reitz     *ban = (BdrvAioNotifier){
604233384421SMax Reitz         .attached_aio_context = attached_aio_context,
604333384421SMax Reitz         .detach_aio_context   = detach_aio_context,
604433384421SMax Reitz         .opaque               = opaque
604533384421SMax Reitz     };
604633384421SMax Reitz 
604733384421SMax Reitz     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
604833384421SMax Reitz }
604933384421SMax Reitz 
605033384421SMax Reitz void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
605133384421SMax Reitz                                       void (*attached_aio_context)(AioContext *,
605233384421SMax Reitz                                                                    void *),
605333384421SMax Reitz                                       void (*detach_aio_context)(void *),
605433384421SMax Reitz                                       void *opaque)
605533384421SMax Reitz {
605633384421SMax Reitz     BdrvAioNotifier *ban, *ban_next;
605733384421SMax Reitz 
605833384421SMax Reitz     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
605933384421SMax Reitz         if (ban->attached_aio_context == attached_aio_context &&
606033384421SMax Reitz             ban->detach_aio_context   == detach_aio_context   &&
606133384421SMax Reitz             ban->opaque               == opaque)
606233384421SMax Reitz         {
606333384421SMax Reitz             QLIST_REMOVE(ban, list);
606433384421SMax Reitz             g_free(ban);
606533384421SMax Reitz 
606633384421SMax Reitz             return;
606733384421SMax Reitz         }
606833384421SMax Reitz     }
606933384421SMax Reitz 
607033384421SMax Reitz     abort();
607133384421SMax Reitz }
607233384421SMax Reitz 
6073d616b224SStefan Hajnoczi void bdrv_add_before_write_notifier(BlockDriverState *bs,
6074d616b224SStefan Hajnoczi                                     NotifierWithReturn *notifier)
6075d616b224SStefan Hajnoczi {
6076d616b224SStefan Hajnoczi     notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
6077d616b224SStefan Hajnoczi }
60786f176b48SMax Reitz 
607977485434SMax Reitz int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
608077485434SMax Reitz                        BlockDriverAmendStatusCB *status_cb)
60816f176b48SMax Reitz {
6082c282e1fdSChunyan Liu     if (!bs->drv->bdrv_amend_options) {
60836f176b48SMax Reitz         return -ENOTSUP;
60846f176b48SMax Reitz     }
608577485434SMax Reitz     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
60866f176b48SMax Reitz }
6087f6186f49SBenoît Canet 
6088b5042a36SBenoît Canet /* This function will be called by the bdrv_recurse_is_first_non_filter method
6089b5042a36SBenoît Canet  * of block filter and by bdrv_is_first_non_filter.
6090b5042a36SBenoît Canet  * It is used to test if the given bs is the candidate or recurse more in the
6091b5042a36SBenoît Canet  * node graph.
6092212a5a8fSBenoît Canet  */
6093212a5a8fSBenoît Canet bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
6094212a5a8fSBenoît Canet                                       BlockDriverState *candidate)
6095f6186f49SBenoît Canet {
6096b5042a36SBenoît Canet     /* return false if basic checks fails */
6097b5042a36SBenoît Canet     if (!bs || !bs->drv) {
6098b5042a36SBenoît Canet         return false;
6099b5042a36SBenoît Canet     }
6100b5042a36SBenoît Canet 
6101b5042a36SBenoît Canet     /* the code reached a non block filter driver -> check if the bs is
6102b5042a36SBenoît Canet      * the same as the candidate. It's the recursion termination condition.
6103b5042a36SBenoît Canet      */
6104b5042a36SBenoît Canet     if (!bs->drv->is_filter) {
6105b5042a36SBenoît Canet         return bs == candidate;
6106b5042a36SBenoît Canet     }
6107b5042a36SBenoît Canet     /* Down this path the driver is a block filter driver */
6108b5042a36SBenoît Canet 
6109b5042a36SBenoît Canet     /* If the block filter recursion method is defined use it to recurse down
6110b5042a36SBenoît Canet      * the node graph.
6111b5042a36SBenoît Canet      */
6112b5042a36SBenoît Canet     if (bs->drv->bdrv_recurse_is_first_non_filter) {
6113212a5a8fSBenoît Canet         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
6114212a5a8fSBenoît Canet     }
6115212a5a8fSBenoît Canet 
6116b5042a36SBenoît Canet     /* the driver is a block filter but don't allow to recurse -> return false
6117b5042a36SBenoît Canet      */
6118b5042a36SBenoît Canet     return false;
6119212a5a8fSBenoît Canet }
6120212a5a8fSBenoît Canet 
6121212a5a8fSBenoît Canet /* This function checks if the candidate is the first non filter bs down it's
6122212a5a8fSBenoît Canet  * bs chain. Since we don't have pointers to parents it explore all bs chains
6123212a5a8fSBenoît Canet  * from the top. Some filters can choose not to pass down the recursion.
6124212a5a8fSBenoît Canet  */
6125212a5a8fSBenoît Canet bool bdrv_is_first_non_filter(BlockDriverState *candidate)
6126212a5a8fSBenoît Canet {
6127212a5a8fSBenoît Canet     BlockDriverState *bs;
6128212a5a8fSBenoît Canet 
6129212a5a8fSBenoît Canet     /* walk down the bs forest recursively */
6130212a5a8fSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
6131212a5a8fSBenoît Canet         bool perm;
6132212a5a8fSBenoît Canet 
6133b5042a36SBenoît Canet         /* try to recurse in this top level bs */
6134e6dc8a1fSKevin Wolf         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
6135212a5a8fSBenoît Canet 
6136212a5a8fSBenoît Canet         /* candidate is the first non filter */
6137212a5a8fSBenoît Canet         if (perm) {
6138212a5a8fSBenoît Canet             return true;
6139212a5a8fSBenoît Canet         }
6140212a5a8fSBenoît Canet     }
6141212a5a8fSBenoît Canet 
6142212a5a8fSBenoît Canet     return false;
6143f6186f49SBenoît Canet }
614409158f00SBenoît Canet 
614509158f00SBenoît Canet BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
614609158f00SBenoît Canet {
614709158f00SBenoît Canet     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
61485a7e7a0bSStefan Hajnoczi     AioContext *aio_context;
61495a7e7a0bSStefan Hajnoczi 
615009158f00SBenoît Canet     if (!to_replace_bs) {
615109158f00SBenoît Canet         error_setg(errp, "Node name '%s' not found", node_name);
615209158f00SBenoît Canet         return NULL;
615309158f00SBenoît Canet     }
615409158f00SBenoît Canet 
61555a7e7a0bSStefan Hajnoczi     aio_context = bdrv_get_aio_context(to_replace_bs);
61565a7e7a0bSStefan Hajnoczi     aio_context_acquire(aio_context);
61575a7e7a0bSStefan Hajnoczi 
615809158f00SBenoît Canet     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
61595a7e7a0bSStefan Hajnoczi         to_replace_bs = NULL;
61605a7e7a0bSStefan Hajnoczi         goto out;
616109158f00SBenoît Canet     }
616209158f00SBenoît Canet 
616309158f00SBenoît Canet     /* We don't want arbitrary node of the BDS chain to be replaced only the top
616409158f00SBenoît Canet      * most non filter in order to prevent data corruption.
616509158f00SBenoît Canet      * Another benefit is that this tests exclude backing files which are
616609158f00SBenoît Canet      * blocked by the backing blockers.
616709158f00SBenoît Canet      */
616809158f00SBenoît Canet     if (!bdrv_is_first_non_filter(to_replace_bs)) {
616909158f00SBenoît Canet         error_setg(errp, "Only top most non filter can be replaced");
61705a7e7a0bSStefan Hajnoczi         to_replace_bs = NULL;
61715a7e7a0bSStefan Hajnoczi         goto out;
617209158f00SBenoît Canet     }
617309158f00SBenoît Canet 
61745a7e7a0bSStefan Hajnoczi out:
61755a7e7a0bSStefan Hajnoczi     aio_context_release(aio_context);
617609158f00SBenoît Canet     return to_replace_bs;
617709158f00SBenoît Canet }
6178448ad91dSMing Lei 
6179448ad91dSMing Lei void bdrv_io_plug(BlockDriverState *bs)
6180448ad91dSMing Lei {
6181448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6182448ad91dSMing Lei     if (drv && drv->bdrv_io_plug) {
6183448ad91dSMing Lei         drv->bdrv_io_plug(bs);
6184448ad91dSMing Lei     } else if (bs->file) {
6185448ad91dSMing Lei         bdrv_io_plug(bs->file);
6186448ad91dSMing Lei     }
6187448ad91dSMing Lei }
6188448ad91dSMing Lei 
6189448ad91dSMing Lei void bdrv_io_unplug(BlockDriverState *bs)
6190448ad91dSMing Lei {
6191448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6192448ad91dSMing Lei     if (drv && drv->bdrv_io_unplug) {
6193448ad91dSMing Lei         drv->bdrv_io_unplug(bs);
6194448ad91dSMing Lei     } else if (bs->file) {
6195448ad91dSMing Lei         bdrv_io_unplug(bs->file);
6196448ad91dSMing Lei     }
6197448ad91dSMing Lei }
6198448ad91dSMing Lei 
6199448ad91dSMing Lei void bdrv_flush_io_queue(BlockDriverState *bs)
6200448ad91dSMing Lei {
6201448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6202448ad91dSMing Lei     if (drv && drv->bdrv_flush_io_queue) {
6203448ad91dSMing Lei         drv->bdrv_flush_io_queue(bs);
6204448ad91dSMing Lei     } else if (bs->file) {
6205448ad91dSMing Lei         bdrv_flush_io_queue(bs->file);
6206448ad91dSMing Lei     }
6207448ad91dSMing Lei }
620891af7014SMax Reitz 
620991af7014SMax Reitz static bool append_open_options(QDict *d, BlockDriverState *bs)
621091af7014SMax Reitz {
621191af7014SMax Reitz     const QDictEntry *entry;
621291af7014SMax Reitz     bool found_any = false;
621391af7014SMax Reitz 
621491af7014SMax Reitz     for (entry = qdict_first(bs->options); entry;
621591af7014SMax Reitz          entry = qdict_next(bs->options, entry))
621691af7014SMax Reitz     {
621791af7014SMax Reitz         /* Only take options for this level and exclude all non-driver-specific
621891af7014SMax Reitz          * options */
621991af7014SMax Reitz         if (!strchr(qdict_entry_key(entry), '.') &&
622091af7014SMax Reitz             strcmp(qdict_entry_key(entry), "node-name"))
622191af7014SMax Reitz         {
622291af7014SMax Reitz             qobject_incref(qdict_entry_value(entry));
622391af7014SMax Reitz             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
622491af7014SMax Reitz             found_any = true;
622591af7014SMax Reitz         }
622691af7014SMax Reitz     }
622791af7014SMax Reitz 
622891af7014SMax Reitz     return found_any;
622991af7014SMax Reitz }
623091af7014SMax Reitz 
623191af7014SMax Reitz /* Updates the following BDS fields:
623291af7014SMax Reitz  *  - exact_filename: A filename which may be used for opening a block device
623391af7014SMax Reitz  *                    which (mostly) equals the given BDS (even without any
623491af7014SMax Reitz  *                    other options; so reading and writing must return the same
623591af7014SMax Reitz  *                    results, but caching etc. may be different)
623691af7014SMax Reitz  *  - full_open_options: Options which, when given when opening a block device
623791af7014SMax Reitz  *                       (without a filename), result in a BDS (mostly)
623891af7014SMax Reitz  *                       equalling the given one
623991af7014SMax Reitz  *  - filename: If exact_filename is set, it is copied here. Otherwise,
624091af7014SMax Reitz  *              full_open_options is converted to a JSON object, prefixed with
624191af7014SMax Reitz  *              "json:" (for use through the JSON pseudo protocol) and put here.
624291af7014SMax Reitz  */
624391af7014SMax Reitz void bdrv_refresh_filename(BlockDriverState *bs)
624491af7014SMax Reitz {
624591af7014SMax Reitz     BlockDriver *drv = bs->drv;
624691af7014SMax Reitz     QDict *opts;
624791af7014SMax Reitz 
624891af7014SMax Reitz     if (!drv) {
624991af7014SMax Reitz         return;
625091af7014SMax Reitz     }
625191af7014SMax Reitz 
625291af7014SMax Reitz     /* This BDS's file name will most probably depend on its file's name, so
625391af7014SMax Reitz      * refresh that first */
625491af7014SMax Reitz     if (bs->file) {
625591af7014SMax Reitz         bdrv_refresh_filename(bs->file);
625691af7014SMax Reitz     }
625791af7014SMax Reitz 
625891af7014SMax Reitz     if (drv->bdrv_refresh_filename) {
625991af7014SMax Reitz         /* Obsolete information is of no use here, so drop the old file name
626091af7014SMax Reitz          * information before refreshing it */
626191af7014SMax Reitz         bs->exact_filename[0] = '\0';
626291af7014SMax Reitz         if (bs->full_open_options) {
626391af7014SMax Reitz             QDECREF(bs->full_open_options);
626491af7014SMax Reitz             bs->full_open_options = NULL;
626591af7014SMax Reitz         }
626691af7014SMax Reitz 
626791af7014SMax Reitz         drv->bdrv_refresh_filename(bs);
626891af7014SMax Reitz     } else if (bs->file) {
626991af7014SMax Reitz         /* Try to reconstruct valid information from the underlying file */
627091af7014SMax Reitz         bool has_open_options;
627191af7014SMax Reitz 
627291af7014SMax Reitz         bs->exact_filename[0] = '\0';
627391af7014SMax Reitz         if (bs->full_open_options) {
627491af7014SMax Reitz             QDECREF(bs->full_open_options);
627591af7014SMax Reitz             bs->full_open_options = NULL;
627691af7014SMax Reitz         }
627791af7014SMax Reitz 
627891af7014SMax Reitz         opts = qdict_new();
627991af7014SMax Reitz         has_open_options = append_open_options(opts, bs);
628091af7014SMax Reitz 
628191af7014SMax Reitz         /* If no specific options have been given for this BDS, the filename of
628291af7014SMax Reitz          * the underlying file should suffice for this one as well */
628391af7014SMax Reitz         if (bs->file->exact_filename[0] && !has_open_options) {
628491af7014SMax Reitz             strcpy(bs->exact_filename, bs->file->exact_filename);
628591af7014SMax Reitz         }
628691af7014SMax Reitz         /* Reconstructing the full options QDict is simple for most format block
628791af7014SMax Reitz          * drivers, as long as the full options are known for the underlying
628891af7014SMax Reitz          * file BDS. The full options QDict of that file BDS should somehow
628991af7014SMax Reitz          * contain a representation of the filename, therefore the following
629091af7014SMax Reitz          * suffices without querying the (exact_)filename of this BDS. */
629191af7014SMax Reitz         if (bs->file->full_open_options) {
629291af7014SMax Reitz             qdict_put_obj(opts, "driver",
629391af7014SMax Reitz                           QOBJECT(qstring_from_str(drv->format_name)));
629491af7014SMax Reitz             QINCREF(bs->file->full_open_options);
629591af7014SMax Reitz             qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
629691af7014SMax Reitz 
629791af7014SMax Reitz             bs->full_open_options = opts;
629891af7014SMax Reitz         } else {
629991af7014SMax Reitz             QDECREF(opts);
630091af7014SMax Reitz         }
630191af7014SMax Reitz     } else if (!bs->full_open_options && qdict_size(bs->options)) {
630291af7014SMax Reitz         /* There is no underlying file BDS (at least referenced by BDS.file),
630391af7014SMax Reitz          * so the full options QDict should be equal to the options given
630491af7014SMax Reitz          * specifically for this block device when it was opened (plus the
630591af7014SMax Reitz          * driver specification).
630691af7014SMax Reitz          * Because those options don't change, there is no need to update
630791af7014SMax Reitz          * full_open_options when it's already set. */
630891af7014SMax Reitz 
630991af7014SMax Reitz         opts = qdict_new();
631091af7014SMax Reitz         append_open_options(opts, bs);
631191af7014SMax Reitz         qdict_put_obj(opts, "driver",
631291af7014SMax Reitz                       QOBJECT(qstring_from_str(drv->format_name)));
631391af7014SMax Reitz 
631491af7014SMax Reitz         if (bs->exact_filename[0]) {
631591af7014SMax Reitz             /* This may not work for all block protocol drivers (some may
631691af7014SMax Reitz              * require this filename to be parsed), but we have to find some
631791af7014SMax Reitz              * default solution here, so just include it. If some block driver
631891af7014SMax Reitz              * does not support pure options without any filename at all or
631991af7014SMax Reitz              * needs some special format of the options QDict, it needs to
632091af7014SMax Reitz              * implement the driver-specific bdrv_refresh_filename() function.
632191af7014SMax Reitz              */
632291af7014SMax Reitz             qdict_put_obj(opts, "filename",
632391af7014SMax Reitz                           QOBJECT(qstring_from_str(bs->exact_filename)));
632491af7014SMax Reitz         }
632591af7014SMax Reitz 
632691af7014SMax Reitz         bs->full_open_options = opts;
632791af7014SMax Reitz     }
632891af7014SMax Reitz 
632991af7014SMax Reitz     if (bs->exact_filename[0]) {
633091af7014SMax Reitz         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
633191af7014SMax Reitz     } else if (bs->full_open_options) {
633291af7014SMax Reitz         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
633391af7014SMax Reitz         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
633491af7014SMax Reitz                  qstring_get_str(json));
633591af7014SMax Reitz         QDECREF(json);
633691af7014SMax Reitz     }
633791af7014SMax Reitz }
63385366d0c8SBenoît Canet 
63395366d0c8SBenoît Canet /* This accessor function purpose is to allow the device models to access the
63405366d0c8SBenoît Canet  * BlockAcctStats structure embedded inside a BlockDriverState without being
63415366d0c8SBenoît Canet  * aware of the BlockDriverState structure layout.
63425366d0c8SBenoît Canet  * It will go away when the BlockAcctStats structure will be moved inside
63435366d0c8SBenoît Canet  * the device models.
63445366d0c8SBenoît Canet  */
63455366d0c8SBenoît Canet BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
63465366d0c8SBenoît Canet {
63475366d0c8SBenoît Canet     return &bs->stats;
63485366d0c8SBenoît Canet }
6349