xref: /openbmc/qemu/block.c (revision 0eb7217e49b84553bb30f97bc34380633fd846fe)
1fc01f7e7Sbellard /*
2fc01f7e7Sbellard  * QEMU System Emulator block driver
3fc01f7e7Sbellard  *
4fc01f7e7Sbellard  * Copyright (c) 2003 Fabrice Bellard
5fc01f7e7Sbellard  *
6fc01f7e7Sbellard  * Permission is hereby granted, free of charge, to any person obtaining a copy
7fc01f7e7Sbellard  * of this software and associated documentation files (the "Software"), to deal
8fc01f7e7Sbellard  * in the Software without restriction, including without limitation the rights
9fc01f7e7Sbellard  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10fc01f7e7Sbellard  * copies of the Software, and to permit persons to whom the Software is
11fc01f7e7Sbellard  * furnished to do so, subject to the following conditions:
12fc01f7e7Sbellard  *
13fc01f7e7Sbellard  * The above copyright notice and this permission notice shall be included in
14fc01f7e7Sbellard  * all copies or substantial portions of the Software.
15fc01f7e7Sbellard  *
16fc01f7e7Sbellard  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17fc01f7e7Sbellard  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18fc01f7e7Sbellard  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19fc01f7e7Sbellard  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20fc01f7e7Sbellard  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21fc01f7e7Sbellard  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22fc01f7e7Sbellard  * THE SOFTWARE.
23fc01f7e7Sbellard  */
243990d09aSblueswir1 #include "config-host.h"
25faf07963Spbrook #include "qemu-common.h"
266d519a5fSStefan Hajnoczi #include "trace.h"
27737e150eSPaolo Bonzini #include "block/block_int.h"
28737e150eSPaolo Bonzini #include "block/blockjob.h"
291de7afc9SPaolo Bonzini #include "qemu/module.h"
307b1b5d19SPaolo Bonzini #include "qapi/qmp/qjson.h"
31bfb197e0SMarkus Armbruster #include "sysemu/block-backend.h"
329c17d615SPaolo Bonzini #include "sysemu/sysemu.h"
33de50a20aSFam Zheng #include "sysemu/qtest.h"
341de7afc9SPaolo Bonzini #include "qemu/notify.h"
35737e150eSPaolo Bonzini #include "block/coroutine.h"
36c13163fbSBenoît Canet #include "block/qapi.h"
37b2023818SLuiz Capitulino #include "qmp-commands.h"
381de7afc9SPaolo Bonzini #include "qemu/timer.h"
39a5ee7bd4SWenchao Xia #include "qapi-event.h"
40fc01f7e7Sbellard 
4171e72a19SJuan Quintela #ifdef CONFIG_BSD
427674e7bfSbellard #include <sys/types.h>
437674e7bfSbellard #include <sys/stat.h>
447674e7bfSbellard #include <sys/ioctl.h>
4572cf2d4fSBlue Swirl #include <sys/queue.h>
46c5e97233Sblueswir1 #ifndef __DragonFly__
477674e7bfSbellard #include <sys/disk.h>
487674e7bfSbellard #endif
49c5e97233Sblueswir1 #endif
507674e7bfSbellard 
5149dc768dSaliguori #ifdef _WIN32
5249dc768dSaliguori #include <windows.h>
5349dc768dSaliguori #endif
5449dc768dSaliguori 
559bd2b08fSJohn Snow /**
569bd2b08fSJohn Snow  * A BdrvDirtyBitmap can be in three possible states:
579bd2b08fSJohn Snow  * (1) successor is NULL and disabled is false: full r/w mode
589bd2b08fSJohn Snow  * (2) successor is NULL and disabled is true: read only mode ("disabled")
599bd2b08fSJohn Snow  * (3) successor is set: frozen mode.
609bd2b08fSJohn Snow  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
619bd2b08fSJohn Snow  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
629bd2b08fSJohn Snow  */
63e4654d2dSFam Zheng struct BdrvDirtyBitmap {
64aa0c7ca5SJohn Snow     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
65aa0c7ca5SJohn Snow     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
66aa0c7ca5SJohn Snow     char *name;                 /* Optional non-empty unique ID */
67aa0c7ca5SJohn Snow     int64_t size;               /* Size of the bitmap (Number of sectors) */
68aa0c7ca5SJohn Snow     bool disabled;              /* Bitmap is read-only */
69e4654d2dSFam Zheng     QLIST_ENTRY(BdrvDirtyBitmap) list;
70e4654d2dSFam Zheng };
71e4654d2dSFam Zheng 
721c9805a3SStefan Hajnoczi #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
731c9805a3SStefan Hajnoczi 
747c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
75f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
76097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque);
777c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
78f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
79097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque);
80f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
81f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
82f9f05dc5SKevin Wolf                                          QEMUIOVector *iov);
83f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
84f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
85f9f05dc5SKevin Wolf                                          QEMUIOVector *iov);
86775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
87775aa8b6SKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
88470c0504SStefan Hajnoczi     BdrvRequestFlags flags);
89775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
90775aa8b6SKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
91f08f2ddaSStefan Hajnoczi     BdrvRequestFlags flags);
927c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
93b2a61371SStefan Hajnoczi                                          int64_t sector_num,
94b2a61371SStefan Hajnoczi                                          QEMUIOVector *qiov,
95b2a61371SStefan Hajnoczi                                          int nb_sectors,
96d20d9b7cSPaolo Bonzini                                          BdrvRequestFlags flags,
97097310b5SMarkus Armbruster                                          BlockCompletionFunc *cb,
98b2a61371SStefan Hajnoczi                                          void *opaque,
998c5873d6SStefan Hajnoczi                                          bool is_write);
100b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque);
101621f0589SKevin Wolf static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
102aa7bfbffSPeter Lieven     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
103ec530c81Sbellard 
1041b7bdbc1SStefan Hajnoczi static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
1051b7bdbc1SStefan Hajnoczi     QTAILQ_HEAD_INITIALIZER(bdrv_states);
1067ee930d0Sblueswir1 
107dc364f4cSBenoît Canet static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
108dc364f4cSBenoît Canet     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
109dc364f4cSBenoît Canet 
1108a22f02aSStefan Hajnoczi static QLIST_HEAD(, BlockDriver) bdrv_drivers =
1118a22f02aSStefan Hajnoczi     QLIST_HEAD_INITIALIZER(bdrv_drivers);
112ea2384d3Sbellard 
113ce1ffea8SJohn Snow static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
114eb852011SMarkus Armbruster /* If non-zero, use only whitelisted block drivers */
115eb852011SMarkus Armbruster static int use_bdrv_whitelist;
116eb852011SMarkus Armbruster 
1179e0b22f4SStefan Hajnoczi #ifdef _WIN32
1189e0b22f4SStefan Hajnoczi static int is_windows_drive_prefix(const char *filename)
1199e0b22f4SStefan Hajnoczi {
1209e0b22f4SStefan Hajnoczi     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
1219e0b22f4SStefan Hajnoczi              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
1229e0b22f4SStefan Hajnoczi             filename[1] == ':');
1239e0b22f4SStefan Hajnoczi }
1249e0b22f4SStefan Hajnoczi 
1259e0b22f4SStefan Hajnoczi int is_windows_drive(const char *filename)
1269e0b22f4SStefan Hajnoczi {
1279e0b22f4SStefan Hajnoczi     if (is_windows_drive_prefix(filename) &&
1289e0b22f4SStefan Hajnoczi         filename[2] == '\0')
1299e0b22f4SStefan Hajnoczi         return 1;
1309e0b22f4SStefan Hajnoczi     if (strstart(filename, "\\\\.\\", NULL) ||
1319e0b22f4SStefan Hajnoczi         strstart(filename, "//./", NULL))
1329e0b22f4SStefan Hajnoczi         return 1;
1339e0b22f4SStefan Hajnoczi     return 0;
1349e0b22f4SStefan Hajnoczi }
1359e0b22f4SStefan Hajnoczi #endif
1369e0b22f4SStefan Hajnoczi 
1370563e191SZhi Yong Wu /* throttling disk I/O limits */
138cc0681c4SBenoît Canet void bdrv_set_io_limits(BlockDriverState *bs,
139cc0681c4SBenoît Canet                         ThrottleConfig *cfg)
140cc0681c4SBenoît Canet {
141cc0681c4SBenoît Canet     int i;
142cc0681c4SBenoît Canet 
143cc0681c4SBenoît Canet     throttle_config(&bs->throttle_state, cfg);
144cc0681c4SBenoît Canet 
145cc0681c4SBenoît Canet     for (i = 0; i < 2; i++) {
146cc0681c4SBenoît Canet         qemu_co_enter_next(&bs->throttled_reqs[i]);
147cc0681c4SBenoît Canet     }
148cc0681c4SBenoît Canet }
149cc0681c4SBenoît Canet 
150cc0681c4SBenoît Canet /* this function drain all the throttled IOs */
151cc0681c4SBenoît Canet static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
152cc0681c4SBenoît Canet {
153cc0681c4SBenoît Canet     bool drained = false;
154cc0681c4SBenoît Canet     bool enabled = bs->io_limits_enabled;
155cc0681c4SBenoît Canet     int i;
156cc0681c4SBenoît Canet 
157cc0681c4SBenoît Canet     bs->io_limits_enabled = false;
158cc0681c4SBenoît Canet 
159cc0681c4SBenoît Canet     for (i = 0; i < 2; i++) {
160cc0681c4SBenoît Canet         while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
161cc0681c4SBenoît Canet             drained = true;
162cc0681c4SBenoît Canet         }
163cc0681c4SBenoît Canet     }
164cc0681c4SBenoît Canet 
165cc0681c4SBenoît Canet     bs->io_limits_enabled = enabled;
166cc0681c4SBenoît Canet 
167cc0681c4SBenoît Canet     return drained;
168cc0681c4SBenoît Canet }
169cc0681c4SBenoît Canet 
17098f90dbaSZhi Yong Wu void bdrv_io_limits_disable(BlockDriverState *bs)
17198f90dbaSZhi Yong Wu {
17298f90dbaSZhi Yong Wu     bs->io_limits_enabled = false;
17398f90dbaSZhi Yong Wu 
174cc0681c4SBenoît Canet     bdrv_start_throttled_reqs(bs);
17598f90dbaSZhi Yong Wu 
176cc0681c4SBenoît Canet     throttle_destroy(&bs->throttle_state);
17798f90dbaSZhi Yong Wu }
17898f90dbaSZhi Yong Wu 
179cc0681c4SBenoît Canet static void bdrv_throttle_read_timer_cb(void *opaque)
1800563e191SZhi Yong Wu {
1810563e191SZhi Yong Wu     BlockDriverState *bs = opaque;
182cc0681c4SBenoît Canet     qemu_co_enter_next(&bs->throttled_reqs[0]);
1830563e191SZhi Yong Wu }
1840563e191SZhi Yong Wu 
185cc0681c4SBenoît Canet static void bdrv_throttle_write_timer_cb(void *opaque)
186cc0681c4SBenoît Canet {
187cc0681c4SBenoît Canet     BlockDriverState *bs = opaque;
188cc0681c4SBenoît Canet     qemu_co_enter_next(&bs->throttled_reqs[1]);
189cc0681c4SBenoît Canet }
190cc0681c4SBenoît Canet 
191cc0681c4SBenoît Canet /* should be called before bdrv_set_io_limits if a limit is set */
1920563e191SZhi Yong Wu void bdrv_io_limits_enable(BlockDriverState *bs)
1930563e191SZhi Yong Wu {
194de50a20aSFam Zheng     int clock_type = QEMU_CLOCK_REALTIME;
195de50a20aSFam Zheng 
196de50a20aSFam Zheng     if (qtest_enabled()) {
197de50a20aSFam Zheng         /* For testing block IO throttling only */
198de50a20aSFam Zheng         clock_type = QEMU_CLOCK_VIRTUAL;
199de50a20aSFam Zheng     }
200cc0681c4SBenoît Canet     assert(!bs->io_limits_enabled);
201cc0681c4SBenoît Canet     throttle_init(&bs->throttle_state,
20213af91ebSStefan Hajnoczi                   bdrv_get_aio_context(bs),
203de50a20aSFam Zheng                   clock_type,
204cc0681c4SBenoît Canet                   bdrv_throttle_read_timer_cb,
205cc0681c4SBenoît Canet                   bdrv_throttle_write_timer_cb,
206cc0681c4SBenoît Canet                   bs);
2070563e191SZhi Yong Wu     bs->io_limits_enabled = true;
2080563e191SZhi Yong Wu }
2090563e191SZhi Yong Wu 
210cc0681c4SBenoît Canet /* This function makes an IO wait if needed
211cc0681c4SBenoît Canet  *
212cc0681c4SBenoît Canet  * @nb_sectors: the number of sectors of the IO
213cc0681c4SBenoît Canet  * @is_write:   is the IO a write
21498f90dbaSZhi Yong Wu  */
215cc0681c4SBenoît Canet static void bdrv_io_limits_intercept(BlockDriverState *bs,
216d5103588SKevin Wolf                                      unsigned int bytes,
217cc0681c4SBenoît Canet                                      bool is_write)
218cc0681c4SBenoît Canet {
219cc0681c4SBenoît Canet     /* does this io must wait */
220cc0681c4SBenoît Canet     bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
22198f90dbaSZhi Yong Wu 
222cc0681c4SBenoît Canet     /* if must wait or any request of this type throttled queue the IO */
223cc0681c4SBenoît Canet     if (must_wait ||
224cc0681c4SBenoît Canet         !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
225cc0681c4SBenoît Canet         qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
22698f90dbaSZhi Yong Wu     }
22798f90dbaSZhi Yong Wu 
228cc0681c4SBenoît Canet     /* the IO will be executed, do the accounting */
229d5103588SKevin Wolf     throttle_account(&bs->throttle_state, is_write, bytes);
230d5103588SKevin Wolf 
231cc0681c4SBenoît Canet 
232cc0681c4SBenoît Canet     /* if the next request must wait -> do nothing */
233cc0681c4SBenoît Canet     if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
234cc0681c4SBenoît Canet         return;
235cc0681c4SBenoît Canet     }
236cc0681c4SBenoît Canet 
237cc0681c4SBenoît Canet     /* else queue next request for execution */
238cc0681c4SBenoît Canet     qemu_co_queue_next(&bs->throttled_reqs[is_write]);
23998f90dbaSZhi Yong Wu }
24098f90dbaSZhi Yong Wu 
241339064d5SKevin Wolf size_t bdrv_opt_mem_align(BlockDriverState *bs)
242339064d5SKevin Wolf {
243339064d5SKevin Wolf     if (!bs || !bs->drv) {
244339064d5SKevin Wolf         /* 4k should be on the safe side */
245339064d5SKevin Wolf         return 4096;
246339064d5SKevin Wolf     }
247339064d5SKevin Wolf 
248339064d5SKevin Wolf     return bs->bl.opt_mem_alignment;
249339064d5SKevin Wolf }
250339064d5SKevin Wolf 
2519e0b22f4SStefan Hajnoczi /* check if the path starts with "<protocol>:" */
2525c98415bSMax Reitz int path_has_protocol(const char *path)
2539e0b22f4SStefan Hajnoczi {
254947995c0SPaolo Bonzini     const char *p;
255947995c0SPaolo Bonzini 
2569e0b22f4SStefan Hajnoczi #ifdef _WIN32
2579e0b22f4SStefan Hajnoczi     if (is_windows_drive(path) ||
2589e0b22f4SStefan Hajnoczi         is_windows_drive_prefix(path)) {
2599e0b22f4SStefan Hajnoczi         return 0;
2609e0b22f4SStefan Hajnoczi     }
261947995c0SPaolo Bonzini     p = path + strcspn(path, ":/\\");
262947995c0SPaolo Bonzini #else
263947995c0SPaolo Bonzini     p = path + strcspn(path, ":/");
2649e0b22f4SStefan Hajnoczi #endif
2659e0b22f4SStefan Hajnoczi 
266947995c0SPaolo Bonzini     return *p == ':';
2679e0b22f4SStefan Hajnoczi }
2689e0b22f4SStefan Hajnoczi 
26983f64091Sbellard int path_is_absolute(const char *path)
27083f64091Sbellard {
27121664424Sbellard #ifdef _WIN32
27221664424Sbellard     /* specific case for names like: "\\.\d:" */
273f53f4da9SPaolo Bonzini     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
27421664424Sbellard         return 1;
275f53f4da9SPaolo Bonzini     }
276f53f4da9SPaolo Bonzini     return (*path == '/' || *path == '\\');
2773b9f94e1Sbellard #else
278f53f4da9SPaolo Bonzini     return (*path == '/');
2793b9f94e1Sbellard #endif
28083f64091Sbellard }
28183f64091Sbellard 
28283f64091Sbellard /* if filename is absolute, just copy it to dest. Otherwise, build a
28383f64091Sbellard    path to it by considering it is relative to base_path. URL are
28483f64091Sbellard    supported. */
28583f64091Sbellard void path_combine(char *dest, int dest_size,
28683f64091Sbellard                   const char *base_path,
28783f64091Sbellard                   const char *filename)
28883f64091Sbellard {
28983f64091Sbellard     const char *p, *p1;
29083f64091Sbellard     int len;
29183f64091Sbellard 
29283f64091Sbellard     if (dest_size <= 0)
29383f64091Sbellard         return;
29483f64091Sbellard     if (path_is_absolute(filename)) {
29583f64091Sbellard         pstrcpy(dest, dest_size, filename);
29683f64091Sbellard     } else {
29783f64091Sbellard         p = strchr(base_path, ':');
29883f64091Sbellard         if (p)
29983f64091Sbellard             p++;
30083f64091Sbellard         else
30183f64091Sbellard             p = base_path;
3023b9f94e1Sbellard         p1 = strrchr(base_path, '/');
3033b9f94e1Sbellard #ifdef _WIN32
3043b9f94e1Sbellard         {
3053b9f94e1Sbellard             const char *p2;
3063b9f94e1Sbellard             p2 = strrchr(base_path, '\\');
3073b9f94e1Sbellard             if (!p1 || p2 > p1)
3083b9f94e1Sbellard                 p1 = p2;
3093b9f94e1Sbellard         }
3103b9f94e1Sbellard #endif
31183f64091Sbellard         if (p1)
31283f64091Sbellard             p1++;
31383f64091Sbellard         else
31483f64091Sbellard             p1 = base_path;
31583f64091Sbellard         if (p1 > p)
31683f64091Sbellard             p = p1;
31783f64091Sbellard         len = p - base_path;
31883f64091Sbellard         if (len > dest_size - 1)
31983f64091Sbellard             len = dest_size - 1;
32083f64091Sbellard         memcpy(dest, base_path, len);
32183f64091Sbellard         dest[len] = '\0';
32283f64091Sbellard         pstrcat(dest, dest_size, filename);
32383f64091Sbellard     }
32483f64091Sbellard }
32583f64091Sbellard 
3260a82855aSMax Reitz void bdrv_get_full_backing_filename_from_filename(const char *backed,
3270a82855aSMax Reitz                                                   const char *backing,
3289f07429eSMax Reitz                                                   char *dest, size_t sz,
3299f07429eSMax Reitz                                                   Error **errp)
3300a82855aSMax Reitz {
3319f07429eSMax Reitz     if (backing[0] == '\0' || path_has_protocol(backing) ||
3329f07429eSMax Reitz         path_is_absolute(backing))
3339f07429eSMax Reitz     {
3340a82855aSMax Reitz         pstrcpy(dest, sz, backing);
3359f07429eSMax Reitz     } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
3369f07429eSMax Reitz         error_setg(errp, "Cannot use relative backing file names for '%s'",
3379f07429eSMax Reitz                    backed);
3380a82855aSMax Reitz     } else {
3390a82855aSMax Reitz         path_combine(dest, sz, backed, backing);
3400a82855aSMax Reitz     }
3410a82855aSMax Reitz }
3420a82855aSMax Reitz 
3439f07429eSMax Reitz void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
3449f07429eSMax Reitz                                     Error **errp)
345dc5a1371SPaolo Bonzini {
3469f07429eSMax Reitz     char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
3479f07429eSMax Reitz 
3489f07429eSMax Reitz     bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
3499f07429eSMax Reitz                                                  dest, sz, errp);
350dc5a1371SPaolo Bonzini }
351dc5a1371SPaolo Bonzini 
352*0eb7217eSStefan Hajnoczi void bdrv_setup_io_funcs(BlockDriver *bdrv)
353ea2384d3Sbellard {
3548c5873d6SStefan Hajnoczi     /* Block drivers without coroutine functions need emulation */
3558c5873d6SStefan Hajnoczi     if (!bdrv->bdrv_co_readv) {
356f9f05dc5SKevin Wolf         bdrv->bdrv_co_readv = bdrv_co_readv_em;
357f9f05dc5SKevin Wolf         bdrv->bdrv_co_writev = bdrv_co_writev_em;
358f9f05dc5SKevin Wolf 
359f8c35c1dSStefan Hajnoczi         /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
360f8c35c1dSStefan Hajnoczi          * the block driver lacks aio we need to emulate that too.
361f8c35c1dSStefan Hajnoczi          */
362f9f05dc5SKevin Wolf         if (!bdrv->bdrv_aio_readv) {
36383f64091Sbellard             /* add AIO emulation layer */
364f141eafeSaliguori             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
365f141eafeSaliguori             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
36683f64091Sbellard         }
367f9f05dc5SKevin Wolf     }
368*0eb7217eSStefan Hajnoczi }
369*0eb7217eSStefan Hajnoczi 
370*0eb7217eSStefan Hajnoczi void bdrv_register(BlockDriver *bdrv)
371*0eb7217eSStefan Hajnoczi {
372*0eb7217eSStefan Hajnoczi     bdrv_setup_io_funcs(bdrv);
373b2e12bc6SChristoph Hellwig 
3748a22f02aSStefan Hajnoczi     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
375ea2384d3Sbellard }
376b338082bSbellard 
3777f06d47eSMarkus Armbruster BlockDriverState *bdrv_new_root(void)
378fc01f7e7Sbellard {
3797f06d47eSMarkus Armbruster     BlockDriverState *bs = bdrv_new();
380e4e9986bSMarkus Armbruster 
381e4e9986bSMarkus Armbruster     QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
382e4e9986bSMarkus Armbruster     return bs;
383e4e9986bSMarkus Armbruster }
384e4e9986bSMarkus Armbruster 
385e4e9986bSMarkus Armbruster BlockDriverState *bdrv_new(void)
386e4e9986bSMarkus Armbruster {
387e4e9986bSMarkus Armbruster     BlockDriverState *bs;
388e4e9986bSMarkus Armbruster     int i;
389e4e9986bSMarkus Armbruster 
3905839e53bSMarkus Armbruster     bs = g_new0(BlockDriverState, 1);
391e4654d2dSFam Zheng     QLIST_INIT(&bs->dirty_bitmaps);
392fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
393fbe40ff7SFam Zheng         QLIST_INIT(&bs->op_blockers[i]);
394fbe40ff7SFam Zheng     }
39528a7282aSLuiz Capitulino     bdrv_iostatus_disable(bs);
396d7d512f6SPaolo Bonzini     notifier_list_init(&bs->close_notifiers);
397d616b224SStefan Hajnoczi     notifier_with_return_list_init(&bs->before_write_notifiers);
398cc0681c4SBenoît Canet     qemu_co_queue_init(&bs->throttled_reqs[0]);
399cc0681c4SBenoît Canet     qemu_co_queue_init(&bs->throttled_reqs[1]);
4009fcb0251SFam Zheng     bs->refcnt = 1;
401dcd04228SStefan Hajnoczi     bs->aio_context = qemu_get_aio_context();
402d7d512f6SPaolo Bonzini 
403b338082bSbellard     return bs;
404b338082bSbellard }
405b338082bSbellard 
406d7d512f6SPaolo Bonzini void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
407d7d512f6SPaolo Bonzini {
408d7d512f6SPaolo Bonzini     notifier_list_add(&bs->close_notifiers, notify);
409d7d512f6SPaolo Bonzini }
410d7d512f6SPaolo Bonzini 
411ea2384d3Sbellard BlockDriver *bdrv_find_format(const char *format_name)
412ea2384d3Sbellard {
413ea2384d3Sbellard     BlockDriver *drv1;
4148a22f02aSStefan Hajnoczi     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
4158a22f02aSStefan Hajnoczi         if (!strcmp(drv1->format_name, format_name)) {
416ea2384d3Sbellard             return drv1;
417ea2384d3Sbellard         }
4188a22f02aSStefan Hajnoczi     }
419ea2384d3Sbellard     return NULL;
420ea2384d3Sbellard }
421ea2384d3Sbellard 
422b64ec4e4SFam Zheng static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
423eb852011SMarkus Armbruster {
424b64ec4e4SFam Zheng     static const char *whitelist_rw[] = {
425b64ec4e4SFam Zheng         CONFIG_BDRV_RW_WHITELIST
426b64ec4e4SFam Zheng     };
427b64ec4e4SFam Zheng     static const char *whitelist_ro[] = {
428b64ec4e4SFam Zheng         CONFIG_BDRV_RO_WHITELIST
429eb852011SMarkus Armbruster     };
430eb852011SMarkus Armbruster     const char **p;
431eb852011SMarkus Armbruster 
432b64ec4e4SFam Zheng     if (!whitelist_rw[0] && !whitelist_ro[0]) {
433eb852011SMarkus Armbruster         return 1;               /* no whitelist, anything goes */
434b64ec4e4SFam Zheng     }
435eb852011SMarkus Armbruster 
436b64ec4e4SFam Zheng     for (p = whitelist_rw; *p; p++) {
437eb852011SMarkus Armbruster         if (!strcmp(drv->format_name, *p)) {
438eb852011SMarkus Armbruster             return 1;
439eb852011SMarkus Armbruster         }
440eb852011SMarkus Armbruster     }
441b64ec4e4SFam Zheng     if (read_only) {
442b64ec4e4SFam Zheng         for (p = whitelist_ro; *p; p++) {
443b64ec4e4SFam Zheng             if (!strcmp(drv->format_name, *p)) {
444b64ec4e4SFam Zheng                 return 1;
445b64ec4e4SFam Zheng             }
446b64ec4e4SFam Zheng         }
447b64ec4e4SFam Zheng     }
448eb852011SMarkus Armbruster     return 0;
449eb852011SMarkus Armbruster }
450eb852011SMarkus Armbruster 
451b64ec4e4SFam Zheng BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
452b64ec4e4SFam Zheng                                           bool read_only)
453eb852011SMarkus Armbruster {
454eb852011SMarkus Armbruster     BlockDriver *drv = bdrv_find_format(format_name);
455b64ec4e4SFam Zheng     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
456eb852011SMarkus Armbruster }
457eb852011SMarkus Armbruster 
4585b7e1542SZhi Yong Wu typedef struct CreateCo {
4595b7e1542SZhi Yong Wu     BlockDriver *drv;
4605b7e1542SZhi Yong Wu     char *filename;
46183d0521aSChunyan Liu     QemuOpts *opts;
4625b7e1542SZhi Yong Wu     int ret;
463cc84d90fSMax Reitz     Error *err;
4645b7e1542SZhi Yong Wu } CreateCo;
4655b7e1542SZhi Yong Wu 
4665b7e1542SZhi Yong Wu static void coroutine_fn bdrv_create_co_entry(void *opaque)
4675b7e1542SZhi Yong Wu {
468cc84d90fSMax Reitz     Error *local_err = NULL;
469cc84d90fSMax Reitz     int ret;
470cc84d90fSMax Reitz 
4715b7e1542SZhi Yong Wu     CreateCo *cco = opaque;
4725b7e1542SZhi Yong Wu     assert(cco->drv);
4735b7e1542SZhi Yong Wu 
474c282e1fdSChunyan Liu     ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
47584d18f06SMarkus Armbruster     if (local_err) {
476cc84d90fSMax Reitz         error_propagate(&cco->err, local_err);
477cc84d90fSMax Reitz     }
478cc84d90fSMax Reitz     cco->ret = ret;
4795b7e1542SZhi Yong Wu }
4805b7e1542SZhi Yong Wu 
4810e7e1989SKevin Wolf int bdrv_create(BlockDriver *drv, const char* filename,
48283d0521aSChunyan Liu                 QemuOpts *opts, Error **errp)
483ea2384d3Sbellard {
4845b7e1542SZhi Yong Wu     int ret;
4850e7e1989SKevin Wolf 
4865b7e1542SZhi Yong Wu     Coroutine *co;
4875b7e1542SZhi Yong Wu     CreateCo cco = {
4885b7e1542SZhi Yong Wu         .drv = drv,
4895b7e1542SZhi Yong Wu         .filename = g_strdup(filename),
49083d0521aSChunyan Liu         .opts = opts,
4915b7e1542SZhi Yong Wu         .ret = NOT_DONE,
492cc84d90fSMax Reitz         .err = NULL,
4935b7e1542SZhi Yong Wu     };
4945b7e1542SZhi Yong Wu 
495c282e1fdSChunyan Liu     if (!drv->bdrv_create) {
496cc84d90fSMax Reitz         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
49780168bffSLuiz Capitulino         ret = -ENOTSUP;
49880168bffSLuiz Capitulino         goto out;
4995b7e1542SZhi Yong Wu     }
5005b7e1542SZhi Yong Wu 
5015b7e1542SZhi Yong Wu     if (qemu_in_coroutine()) {
5025b7e1542SZhi Yong Wu         /* Fast-path if already in coroutine context */
5035b7e1542SZhi Yong Wu         bdrv_create_co_entry(&cco);
5045b7e1542SZhi Yong Wu     } else {
5055b7e1542SZhi Yong Wu         co = qemu_coroutine_create(bdrv_create_co_entry);
5065b7e1542SZhi Yong Wu         qemu_coroutine_enter(co, &cco);
5075b7e1542SZhi Yong Wu         while (cco.ret == NOT_DONE) {
508b47ec2c4SPaolo Bonzini             aio_poll(qemu_get_aio_context(), true);
5095b7e1542SZhi Yong Wu         }
5105b7e1542SZhi Yong Wu     }
5115b7e1542SZhi Yong Wu 
5125b7e1542SZhi Yong Wu     ret = cco.ret;
513cc84d90fSMax Reitz     if (ret < 0) {
51484d18f06SMarkus Armbruster         if (cco.err) {
515cc84d90fSMax Reitz             error_propagate(errp, cco.err);
516cc84d90fSMax Reitz         } else {
517cc84d90fSMax Reitz             error_setg_errno(errp, -ret, "Could not create image");
518cc84d90fSMax Reitz         }
519cc84d90fSMax Reitz     }
5205b7e1542SZhi Yong Wu 
52180168bffSLuiz Capitulino out:
52280168bffSLuiz Capitulino     g_free(cco.filename);
5235b7e1542SZhi Yong Wu     return ret;
524ea2384d3Sbellard }
525ea2384d3Sbellard 
526c282e1fdSChunyan Liu int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
52784a12e66SChristoph Hellwig {
52884a12e66SChristoph Hellwig     BlockDriver *drv;
529cc84d90fSMax Reitz     Error *local_err = NULL;
530cc84d90fSMax Reitz     int ret;
53184a12e66SChristoph Hellwig 
532b65a5e12SMax Reitz     drv = bdrv_find_protocol(filename, true, errp);
53384a12e66SChristoph Hellwig     if (drv == NULL) {
53416905d71SStefan Hajnoczi         return -ENOENT;
53584a12e66SChristoph Hellwig     }
53684a12e66SChristoph Hellwig 
537c282e1fdSChunyan Liu     ret = bdrv_create(drv, filename, opts, &local_err);
53884d18f06SMarkus Armbruster     if (local_err) {
539cc84d90fSMax Reitz         error_propagate(errp, local_err);
540cc84d90fSMax Reitz     }
541cc84d90fSMax Reitz     return ret;
54284a12e66SChristoph Hellwig }
54384a12e66SChristoph Hellwig 
5443baca891SKevin Wolf void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
545d34682cdSKevin Wolf {
546d34682cdSKevin Wolf     BlockDriver *drv = bs->drv;
5473baca891SKevin Wolf     Error *local_err = NULL;
548d34682cdSKevin Wolf 
549d34682cdSKevin Wolf     memset(&bs->bl, 0, sizeof(bs->bl));
550d34682cdSKevin Wolf 
551466ad822SKevin Wolf     if (!drv) {
5523baca891SKevin Wolf         return;
553466ad822SKevin Wolf     }
554466ad822SKevin Wolf 
555466ad822SKevin Wolf     /* Take some limits from the children as a default */
556466ad822SKevin Wolf     if (bs->file) {
5573baca891SKevin Wolf         bdrv_refresh_limits(bs->file, &local_err);
5583baca891SKevin Wolf         if (local_err) {
5593baca891SKevin Wolf             error_propagate(errp, local_err);
5603baca891SKevin Wolf             return;
5613baca891SKevin Wolf         }
562466ad822SKevin Wolf         bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
5632647fab5SPeter Lieven         bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
564339064d5SKevin Wolf         bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
565339064d5SKevin Wolf     } else {
566339064d5SKevin Wolf         bs->bl.opt_mem_alignment = 512;
567466ad822SKevin Wolf     }
568466ad822SKevin Wolf 
569466ad822SKevin Wolf     if (bs->backing_hd) {
5703baca891SKevin Wolf         bdrv_refresh_limits(bs->backing_hd, &local_err);
5713baca891SKevin Wolf         if (local_err) {
5723baca891SKevin Wolf             error_propagate(errp, local_err);
5733baca891SKevin Wolf             return;
5743baca891SKevin Wolf         }
575466ad822SKevin Wolf         bs->bl.opt_transfer_length =
576466ad822SKevin Wolf             MAX(bs->bl.opt_transfer_length,
577466ad822SKevin Wolf                 bs->backing_hd->bl.opt_transfer_length);
5782647fab5SPeter Lieven         bs->bl.max_transfer_length =
5792647fab5SPeter Lieven             MIN_NON_ZERO(bs->bl.max_transfer_length,
5802647fab5SPeter Lieven                          bs->backing_hd->bl.max_transfer_length);
581339064d5SKevin Wolf         bs->bl.opt_mem_alignment =
582339064d5SKevin Wolf             MAX(bs->bl.opt_mem_alignment,
583339064d5SKevin Wolf                 bs->backing_hd->bl.opt_mem_alignment);
584466ad822SKevin Wolf     }
585466ad822SKevin Wolf 
586466ad822SKevin Wolf     /* Then let the driver override it */
587466ad822SKevin Wolf     if (drv->bdrv_refresh_limits) {
5883baca891SKevin Wolf         drv->bdrv_refresh_limits(bs, errp);
589d34682cdSKevin Wolf     }
590d34682cdSKevin Wolf }
591d34682cdSKevin Wolf 
592892b7de8SEkaterina Tumanova /**
593892b7de8SEkaterina Tumanova  * Try to get @bs's logical and physical block size.
594892b7de8SEkaterina Tumanova  * On success, store them in @bsz struct and return 0.
595892b7de8SEkaterina Tumanova  * On failure return -errno.
596892b7de8SEkaterina Tumanova  * @bs must not be empty.
597892b7de8SEkaterina Tumanova  */
598892b7de8SEkaterina Tumanova int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
599892b7de8SEkaterina Tumanova {
600892b7de8SEkaterina Tumanova     BlockDriver *drv = bs->drv;
601892b7de8SEkaterina Tumanova 
602892b7de8SEkaterina Tumanova     if (drv && drv->bdrv_probe_blocksizes) {
603892b7de8SEkaterina Tumanova         return drv->bdrv_probe_blocksizes(bs, bsz);
604892b7de8SEkaterina Tumanova     }
605892b7de8SEkaterina Tumanova 
606892b7de8SEkaterina Tumanova     return -ENOTSUP;
607892b7de8SEkaterina Tumanova }
608892b7de8SEkaterina Tumanova 
609892b7de8SEkaterina Tumanova /**
610892b7de8SEkaterina Tumanova  * Try to get @bs's geometry (cyls, heads, sectors).
611892b7de8SEkaterina Tumanova  * On success, store them in @geo struct and return 0.
612892b7de8SEkaterina Tumanova  * On failure return -errno.
613892b7de8SEkaterina Tumanova  * @bs must not be empty.
614892b7de8SEkaterina Tumanova  */
615892b7de8SEkaterina Tumanova int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
616892b7de8SEkaterina Tumanova {
617892b7de8SEkaterina Tumanova     BlockDriver *drv = bs->drv;
618892b7de8SEkaterina Tumanova 
619892b7de8SEkaterina Tumanova     if (drv && drv->bdrv_probe_geometry) {
620892b7de8SEkaterina Tumanova         return drv->bdrv_probe_geometry(bs, geo);
621892b7de8SEkaterina Tumanova     }
622892b7de8SEkaterina Tumanova 
623892b7de8SEkaterina Tumanova     return -ENOTSUP;
624892b7de8SEkaterina Tumanova }
625892b7de8SEkaterina Tumanova 
626eba25057SJim Meyering /*
627eba25057SJim Meyering  * Create a uniquely-named empty temporary file.
628eba25057SJim Meyering  * Return 0 upon success, otherwise a negative errno value.
629eba25057SJim Meyering  */
630eba25057SJim Meyering int get_tmp_filename(char *filename, int size)
631eba25057SJim Meyering {
632d5249393Sbellard #ifdef _WIN32
6333b9f94e1Sbellard     char temp_dir[MAX_PATH];
634eba25057SJim Meyering     /* GetTempFileName requires that its output buffer (4th param)
635eba25057SJim Meyering        have length MAX_PATH or greater.  */
636eba25057SJim Meyering     assert(size >= MAX_PATH);
637eba25057SJim Meyering     return (GetTempPath(MAX_PATH, temp_dir)
638eba25057SJim Meyering             && GetTempFileName(temp_dir, "qem", 0, filename)
639eba25057SJim Meyering             ? 0 : -GetLastError());
640d5249393Sbellard #else
641ea2384d3Sbellard     int fd;
6427ccfb2ebSblueswir1     const char *tmpdir;
6430badc1eeSaurel32     tmpdir = getenv("TMPDIR");
64469bef793SAmit Shah     if (!tmpdir) {
64569bef793SAmit Shah         tmpdir = "/var/tmp";
64669bef793SAmit Shah     }
647eba25057SJim Meyering     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
648eba25057SJim Meyering         return -EOVERFLOW;
649ea2384d3Sbellard     }
650eba25057SJim Meyering     fd = mkstemp(filename);
651fe235a06SDunrong Huang     if (fd < 0) {
652fe235a06SDunrong Huang         return -errno;
653fe235a06SDunrong Huang     }
654fe235a06SDunrong Huang     if (close(fd) != 0) {
655fe235a06SDunrong Huang         unlink(filename);
656eba25057SJim Meyering         return -errno;
657eba25057SJim Meyering     }
658eba25057SJim Meyering     return 0;
659d5249393Sbellard #endif
660eba25057SJim Meyering }
661ea2384d3Sbellard 
662f3a5d3f8SChristoph Hellwig /*
663f3a5d3f8SChristoph Hellwig  * Detect host devices. By convention, /dev/cdrom[N] is always
664f3a5d3f8SChristoph Hellwig  * recognized as a host CDROM.
665f3a5d3f8SChristoph Hellwig  */
666f3a5d3f8SChristoph Hellwig static BlockDriver *find_hdev_driver(const char *filename)
667f3a5d3f8SChristoph Hellwig {
668508c7cb3SChristoph Hellwig     int score_max = 0, score;
669508c7cb3SChristoph Hellwig     BlockDriver *drv = NULL, *d;
670f3a5d3f8SChristoph Hellwig 
6718a22f02aSStefan Hajnoczi     QLIST_FOREACH(d, &bdrv_drivers, list) {
672508c7cb3SChristoph Hellwig         if (d->bdrv_probe_device) {
673508c7cb3SChristoph Hellwig             score = d->bdrv_probe_device(filename);
674508c7cb3SChristoph Hellwig             if (score > score_max) {
675508c7cb3SChristoph Hellwig                 score_max = score;
676508c7cb3SChristoph Hellwig                 drv = d;
677f3a5d3f8SChristoph Hellwig             }
678508c7cb3SChristoph Hellwig         }
679f3a5d3f8SChristoph Hellwig     }
680f3a5d3f8SChristoph Hellwig 
681508c7cb3SChristoph Hellwig     return drv;
682f3a5d3f8SChristoph Hellwig }
683f3a5d3f8SChristoph Hellwig 
68498289620SKevin Wolf BlockDriver *bdrv_find_protocol(const char *filename,
685b65a5e12SMax Reitz                                 bool allow_protocol_prefix,
686b65a5e12SMax Reitz                                 Error **errp)
68784a12e66SChristoph Hellwig {
68884a12e66SChristoph Hellwig     BlockDriver *drv1;
68984a12e66SChristoph Hellwig     char protocol[128];
69084a12e66SChristoph Hellwig     int len;
69184a12e66SChristoph Hellwig     const char *p;
69284a12e66SChristoph Hellwig 
69366f82ceeSKevin Wolf     /* TODO Drivers without bdrv_file_open must be specified explicitly */
69466f82ceeSKevin Wolf 
69539508e7aSChristoph Hellwig     /*
69639508e7aSChristoph Hellwig      * XXX(hch): we really should not let host device detection
69739508e7aSChristoph Hellwig      * override an explicit protocol specification, but moving this
69839508e7aSChristoph Hellwig      * later breaks access to device names with colons in them.
69939508e7aSChristoph Hellwig      * Thanks to the brain-dead persistent naming schemes on udev-
70039508e7aSChristoph Hellwig      * based Linux systems those actually are quite common.
70139508e7aSChristoph Hellwig      */
70284a12e66SChristoph Hellwig     drv1 = find_hdev_driver(filename);
70339508e7aSChristoph Hellwig     if (drv1) {
70484a12e66SChristoph Hellwig         return drv1;
70584a12e66SChristoph Hellwig     }
70639508e7aSChristoph Hellwig 
70798289620SKevin Wolf     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
708ef810437SMax Reitz         return &bdrv_file;
70939508e7aSChristoph Hellwig     }
71098289620SKevin Wolf 
7119e0b22f4SStefan Hajnoczi     p = strchr(filename, ':');
7129e0b22f4SStefan Hajnoczi     assert(p != NULL);
71384a12e66SChristoph Hellwig     len = p - filename;
71484a12e66SChristoph Hellwig     if (len > sizeof(protocol) - 1)
71584a12e66SChristoph Hellwig         len = sizeof(protocol) - 1;
71684a12e66SChristoph Hellwig     memcpy(protocol, filename, len);
71784a12e66SChristoph Hellwig     protocol[len] = '\0';
71884a12e66SChristoph Hellwig     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
71984a12e66SChristoph Hellwig         if (drv1->protocol_name &&
72084a12e66SChristoph Hellwig             !strcmp(drv1->protocol_name, protocol)) {
72184a12e66SChristoph Hellwig             return drv1;
72284a12e66SChristoph Hellwig         }
72384a12e66SChristoph Hellwig     }
724b65a5e12SMax Reitz 
725b65a5e12SMax Reitz     error_setg(errp, "Unknown protocol '%s'", protocol);
72684a12e66SChristoph Hellwig     return NULL;
72784a12e66SChristoph Hellwig }
72884a12e66SChristoph Hellwig 
729c6684249SMarkus Armbruster /*
730c6684249SMarkus Armbruster  * Guess image format by probing its contents.
731c6684249SMarkus Armbruster  * This is not a good idea when your image is raw (CVE-2008-2004), but
732c6684249SMarkus Armbruster  * we do it anyway for backward compatibility.
733c6684249SMarkus Armbruster  *
734c6684249SMarkus Armbruster  * @buf         contains the image's first @buf_size bytes.
7357cddd372SKevin Wolf  * @buf_size    is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
7367cddd372SKevin Wolf  *              but can be smaller if the image file is smaller)
737c6684249SMarkus Armbruster  * @filename    is its filename.
738c6684249SMarkus Armbruster  *
739c6684249SMarkus Armbruster  * For all block drivers, call the bdrv_probe() method to get its
740c6684249SMarkus Armbruster  * probing score.
741c6684249SMarkus Armbruster  * Return the first block driver with the highest probing score.
742c6684249SMarkus Armbruster  */
74338f3ef57SKevin Wolf BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
744c6684249SMarkus Armbruster                             const char *filename)
745c6684249SMarkus Armbruster {
746c6684249SMarkus Armbruster     int score_max = 0, score;
747c6684249SMarkus Armbruster     BlockDriver *drv = NULL, *d;
748c6684249SMarkus Armbruster 
749c6684249SMarkus Armbruster     QLIST_FOREACH(d, &bdrv_drivers, list) {
750c6684249SMarkus Armbruster         if (d->bdrv_probe) {
751c6684249SMarkus Armbruster             score = d->bdrv_probe(buf, buf_size, filename);
752c6684249SMarkus Armbruster             if (score > score_max) {
753c6684249SMarkus Armbruster                 score_max = score;
754c6684249SMarkus Armbruster                 drv = d;
755c6684249SMarkus Armbruster             }
756c6684249SMarkus Armbruster         }
757c6684249SMarkus Armbruster     }
758c6684249SMarkus Armbruster 
759c6684249SMarkus Armbruster     return drv;
760c6684249SMarkus Armbruster }
761c6684249SMarkus Armbruster 
762f500a6d3SKevin Wolf static int find_image_format(BlockDriverState *bs, const char *filename,
76334b5d2c6SMax Reitz                              BlockDriver **pdrv, Error **errp)
764ea2384d3Sbellard {
765c6684249SMarkus Armbruster     BlockDriver *drv;
7667cddd372SKevin Wolf     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
767f500a6d3SKevin Wolf     int ret = 0;
768f8ea0b00SNicholas Bellinger 
76908a00559SKevin Wolf     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
7708e895599SPaolo Bonzini     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
771ef810437SMax Reitz         *pdrv = &bdrv_raw;
772c98ac35dSStefan Weil         return ret;
7731a396859SNicholas A. Bellinger     }
774f8ea0b00SNicholas Bellinger 
77583f64091Sbellard     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
776ea2384d3Sbellard     if (ret < 0) {
77734b5d2c6SMax Reitz         error_setg_errno(errp, -ret, "Could not read image for determining its "
77834b5d2c6SMax Reitz                          "format");
779c98ac35dSStefan Weil         *pdrv = NULL;
780c98ac35dSStefan Weil         return ret;
781ea2384d3Sbellard     }
782ea2384d3Sbellard 
783c6684249SMarkus Armbruster     drv = bdrv_probe_all(buf, ret, filename);
784c98ac35dSStefan Weil     if (!drv) {
78534b5d2c6SMax Reitz         error_setg(errp, "Could not determine image format: No compatible "
78634b5d2c6SMax Reitz                    "driver found");
787c98ac35dSStefan Weil         ret = -ENOENT;
788c98ac35dSStefan Weil     }
789c98ac35dSStefan Weil     *pdrv = drv;
790c98ac35dSStefan Weil     return ret;
791ea2384d3Sbellard }
792ea2384d3Sbellard 
79351762288SStefan Hajnoczi /**
79451762288SStefan Hajnoczi  * Set the current 'total_sectors' value
79565a9bb25SMarkus Armbruster  * Return 0 on success, -errno on error.
79651762288SStefan Hajnoczi  */
79751762288SStefan Hajnoczi static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
79851762288SStefan Hajnoczi {
79951762288SStefan Hajnoczi     BlockDriver *drv = bs->drv;
80051762288SStefan Hajnoczi 
801396759adSNicholas Bellinger     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
802396759adSNicholas Bellinger     if (bs->sg)
803396759adSNicholas Bellinger         return 0;
804396759adSNicholas Bellinger 
80551762288SStefan Hajnoczi     /* query actual device if possible, otherwise just trust the hint */
80651762288SStefan Hajnoczi     if (drv->bdrv_getlength) {
80751762288SStefan Hajnoczi         int64_t length = drv->bdrv_getlength(bs);
80851762288SStefan Hajnoczi         if (length < 0) {
80951762288SStefan Hajnoczi             return length;
81051762288SStefan Hajnoczi         }
8117e382003SFam Zheng         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
81251762288SStefan Hajnoczi     }
81351762288SStefan Hajnoczi 
81451762288SStefan Hajnoczi     bs->total_sectors = hint;
81551762288SStefan Hajnoczi     return 0;
81651762288SStefan Hajnoczi }
81751762288SStefan Hajnoczi 
818c3993cdcSStefan Hajnoczi /**
8199e8f1835SPaolo Bonzini  * Set open flags for a given discard mode
8209e8f1835SPaolo Bonzini  *
8219e8f1835SPaolo Bonzini  * Return 0 on success, -1 if the discard mode was invalid.
8229e8f1835SPaolo Bonzini  */
8239e8f1835SPaolo Bonzini int bdrv_parse_discard_flags(const char *mode, int *flags)
8249e8f1835SPaolo Bonzini {
8259e8f1835SPaolo Bonzini     *flags &= ~BDRV_O_UNMAP;
8269e8f1835SPaolo Bonzini 
8279e8f1835SPaolo Bonzini     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
8289e8f1835SPaolo Bonzini         /* do nothing */
8299e8f1835SPaolo Bonzini     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
8309e8f1835SPaolo Bonzini         *flags |= BDRV_O_UNMAP;
8319e8f1835SPaolo Bonzini     } else {
8329e8f1835SPaolo Bonzini         return -1;
8339e8f1835SPaolo Bonzini     }
8349e8f1835SPaolo Bonzini 
8359e8f1835SPaolo Bonzini     return 0;
8369e8f1835SPaolo Bonzini }
8379e8f1835SPaolo Bonzini 
8389e8f1835SPaolo Bonzini /**
839c3993cdcSStefan Hajnoczi  * Set open flags for a given cache mode
840c3993cdcSStefan Hajnoczi  *
841c3993cdcSStefan Hajnoczi  * Return 0 on success, -1 if the cache mode was invalid.
842c3993cdcSStefan Hajnoczi  */
843c3993cdcSStefan Hajnoczi int bdrv_parse_cache_flags(const char *mode, int *flags)
844c3993cdcSStefan Hajnoczi {
845c3993cdcSStefan Hajnoczi     *flags &= ~BDRV_O_CACHE_MASK;
846c3993cdcSStefan Hajnoczi 
847c3993cdcSStefan Hajnoczi     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
848c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
84992196b2fSStefan Hajnoczi     } else if (!strcmp(mode, "directsync")) {
85092196b2fSStefan Hajnoczi         *flags |= BDRV_O_NOCACHE;
851c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "writeback")) {
852c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_CACHE_WB;
853c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "unsafe")) {
854c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_CACHE_WB;
855c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_NO_FLUSH;
856c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "writethrough")) {
857c3993cdcSStefan Hajnoczi         /* this is the default */
858c3993cdcSStefan Hajnoczi     } else {
859c3993cdcSStefan Hajnoczi         return -1;
860c3993cdcSStefan Hajnoczi     }
861c3993cdcSStefan Hajnoczi 
862c3993cdcSStefan Hajnoczi     return 0;
863c3993cdcSStefan Hajnoczi }
864c3993cdcSStefan Hajnoczi 
86553fec9d3SStefan Hajnoczi /**
86653fec9d3SStefan Hajnoczi  * The copy-on-read flag is actually a reference count so multiple users may
86753fec9d3SStefan Hajnoczi  * use the feature without worrying about clobbering its previous state.
86853fec9d3SStefan Hajnoczi  * Copy-on-read stays enabled until all users have called to disable it.
86953fec9d3SStefan Hajnoczi  */
87053fec9d3SStefan Hajnoczi void bdrv_enable_copy_on_read(BlockDriverState *bs)
87153fec9d3SStefan Hajnoczi {
87253fec9d3SStefan Hajnoczi     bs->copy_on_read++;
87353fec9d3SStefan Hajnoczi }
87453fec9d3SStefan Hajnoczi 
87553fec9d3SStefan Hajnoczi void bdrv_disable_copy_on_read(BlockDriverState *bs)
87653fec9d3SStefan Hajnoczi {
87753fec9d3SStefan Hajnoczi     assert(bs->copy_on_read > 0);
87853fec9d3SStefan Hajnoczi     bs->copy_on_read--;
87953fec9d3SStefan Hajnoczi }
88053fec9d3SStefan Hajnoczi 
8810b50cc88SKevin Wolf /*
882b1e6fc08SKevin Wolf  * Returns the flags that a temporary snapshot should get, based on the
883b1e6fc08SKevin Wolf  * originally requested flags (the originally requested image will have flags
884b1e6fc08SKevin Wolf  * like a backing file)
885b1e6fc08SKevin Wolf  */
886b1e6fc08SKevin Wolf static int bdrv_temp_snapshot_flags(int flags)
887b1e6fc08SKevin Wolf {
888b1e6fc08SKevin Wolf     return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
889b1e6fc08SKevin Wolf }
890b1e6fc08SKevin Wolf 
891b1e6fc08SKevin Wolf /*
8920b50cc88SKevin Wolf  * Returns the flags that bs->file should get, based on the given flags for
8930b50cc88SKevin Wolf  * the parent BDS
8940b50cc88SKevin Wolf  */
8950b50cc88SKevin Wolf static int bdrv_inherited_flags(int flags)
8960b50cc88SKevin Wolf {
8970b50cc88SKevin Wolf     /* Enable protocol handling, disable format probing for bs->file */
8980b50cc88SKevin Wolf     flags |= BDRV_O_PROTOCOL;
8990b50cc88SKevin Wolf 
9000b50cc88SKevin Wolf     /* Our block drivers take care to send flushes and respect unmap policy,
9010b50cc88SKevin Wolf      * so we can enable both unconditionally on lower layers. */
9020b50cc88SKevin Wolf     flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
9030b50cc88SKevin Wolf 
9040b50cc88SKevin Wolf     /* Clear flags that only apply to the top layer */
9055669b44dSKevin Wolf     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
9060b50cc88SKevin Wolf 
9070b50cc88SKevin Wolf     return flags;
9080b50cc88SKevin Wolf }
9090b50cc88SKevin Wolf 
910317fc44eSKevin Wolf /*
911317fc44eSKevin Wolf  * Returns the flags that bs->backing_hd should get, based on the given flags
912317fc44eSKevin Wolf  * for the parent BDS
913317fc44eSKevin Wolf  */
914317fc44eSKevin Wolf static int bdrv_backing_flags(int flags)
915317fc44eSKevin Wolf {
916317fc44eSKevin Wolf     /* backing files always opened read-only */
917317fc44eSKevin Wolf     flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
918317fc44eSKevin Wolf 
919317fc44eSKevin Wolf     /* snapshot=on is handled on the top layer */
9208bfea15dSKevin Wolf     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
921317fc44eSKevin Wolf 
922317fc44eSKevin Wolf     return flags;
923317fc44eSKevin Wolf }
924317fc44eSKevin Wolf 
9257b272452SKevin Wolf static int bdrv_open_flags(BlockDriverState *bs, int flags)
9267b272452SKevin Wolf {
9277b272452SKevin Wolf     int open_flags = flags | BDRV_O_CACHE_WB;
9287b272452SKevin Wolf 
9297b272452SKevin Wolf     /*
9307b272452SKevin Wolf      * Clear flags that are internal to the block layer before opening the
9317b272452SKevin Wolf      * image.
9327b272452SKevin Wolf      */
93320cca275SKevin Wolf     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
9347b272452SKevin Wolf 
9357b272452SKevin Wolf     /*
9367b272452SKevin Wolf      * Snapshots should be writable.
9377b272452SKevin Wolf      */
9388bfea15dSKevin Wolf     if (flags & BDRV_O_TEMPORARY) {
9397b272452SKevin Wolf         open_flags |= BDRV_O_RDWR;
9407b272452SKevin Wolf     }
9417b272452SKevin Wolf 
9427b272452SKevin Wolf     return open_flags;
9437b272452SKevin Wolf }
9447b272452SKevin Wolf 
945636ea370SKevin Wolf static void bdrv_assign_node_name(BlockDriverState *bs,
9466913c0c2SBenoît Canet                                   const char *node_name,
9476913c0c2SBenoît Canet                                   Error **errp)
9486913c0c2SBenoît Canet {
9496913c0c2SBenoît Canet     if (!node_name) {
950636ea370SKevin Wolf         return;
9516913c0c2SBenoît Canet     }
9526913c0c2SBenoît Canet 
9539aebf3b8SKevin Wolf     /* Check for empty string or invalid characters */
954f5bebbbbSMarkus Armbruster     if (!id_wellformed(node_name)) {
9559aebf3b8SKevin Wolf         error_setg(errp, "Invalid node name");
956636ea370SKevin Wolf         return;
9576913c0c2SBenoît Canet     }
9586913c0c2SBenoît Canet 
9590c5e94eeSBenoît Canet     /* takes care of avoiding namespaces collisions */
9607f06d47eSMarkus Armbruster     if (blk_by_name(node_name)) {
9610c5e94eeSBenoît Canet         error_setg(errp, "node-name=%s is conflicting with a device id",
9620c5e94eeSBenoît Canet                    node_name);
963636ea370SKevin Wolf         return;
9640c5e94eeSBenoît Canet     }
9650c5e94eeSBenoît Canet 
9666913c0c2SBenoît Canet     /* takes care of avoiding duplicates node names */
9676913c0c2SBenoît Canet     if (bdrv_find_node(node_name)) {
9686913c0c2SBenoît Canet         error_setg(errp, "Duplicate node name");
969636ea370SKevin Wolf         return;
9706913c0c2SBenoît Canet     }
9716913c0c2SBenoît Canet 
9726913c0c2SBenoît Canet     /* copy node name into the bs and insert it into the graph list */
9736913c0c2SBenoît Canet     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
9746913c0c2SBenoît Canet     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
9756913c0c2SBenoît Canet }
9766913c0c2SBenoît Canet 
977b6ce07aaSKevin Wolf /*
97857915332SKevin Wolf  * Common part for opening disk images and files
979b6ad491aSKevin Wolf  *
980b6ad491aSKevin Wolf  * Removes all processed options from *options.
98157915332SKevin Wolf  */
982f500a6d3SKevin Wolf static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
98334b5d2c6SMax Reitz     QDict *options, int flags, BlockDriver *drv, Error **errp)
98457915332SKevin Wolf {
98557915332SKevin Wolf     int ret, open_flags;
986035fccdfSKevin Wolf     const char *filename;
9876913c0c2SBenoît Canet     const char *node_name = NULL;
98834b5d2c6SMax Reitz     Error *local_err = NULL;
98957915332SKevin Wolf 
99057915332SKevin Wolf     assert(drv != NULL);
9916405875cSPaolo Bonzini     assert(bs->file == NULL);
992707ff828SKevin Wolf     assert(options != NULL && bs->options != options);
99357915332SKevin Wolf 
99445673671SKevin Wolf     if (file != NULL) {
99545673671SKevin Wolf         filename = file->filename;
99645673671SKevin Wolf     } else {
99745673671SKevin Wolf         filename = qdict_get_try_str(options, "filename");
99845673671SKevin Wolf     }
99945673671SKevin Wolf 
1000765003dbSKevin Wolf     if (drv->bdrv_needs_filename && !filename) {
1001765003dbSKevin Wolf         error_setg(errp, "The '%s' block driver requires a file name",
1002765003dbSKevin Wolf                    drv->format_name);
1003765003dbSKevin Wolf         return -EINVAL;
1004765003dbSKevin Wolf     }
1005765003dbSKevin Wolf 
100645673671SKevin Wolf     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
100728dcee10SStefan Hajnoczi 
10086913c0c2SBenoît Canet     node_name = qdict_get_try_str(options, "node-name");
1009636ea370SKevin Wolf     bdrv_assign_node_name(bs, node_name, &local_err);
10100fb6395cSMarkus Armbruster     if (local_err) {
1011636ea370SKevin Wolf         error_propagate(errp, local_err);
1012636ea370SKevin Wolf         return -EINVAL;
10136913c0c2SBenoît Canet     }
10146913c0c2SBenoît Canet     qdict_del(options, "node-name");
10156913c0c2SBenoît Canet 
10165d186eb0SKevin Wolf     /* bdrv_open() with directly using a protocol as drv. This layer is already
10175d186eb0SKevin Wolf      * opened, so assign it to bs (while file becomes a closed BlockDriverState)
10185d186eb0SKevin Wolf      * and return immediately. */
10195d186eb0SKevin Wolf     if (file != NULL && drv->bdrv_file_open) {
10205d186eb0SKevin Wolf         bdrv_swap(file, bs);
10215d186eb0SKevin Wolf         return 0;
10225d186eb0SKevin Wolf     }
10235d186eb0SKevin Wolf 
102457915332SKevin Wolf     bs->open_flags = flags;
10251b7fd729SPaolo Bonzini     bs->guest_block_size = 512;
1026c25f53b0SPaolo Bonzini     bs->request_alignment = 512;
10270d51b4deSAsias He     bs->zero_beyond_eof = true;
1028b64ec4e4SFam Zheng     open_flags = bdrv_open_flags(bs, flags);
1029b64ec4e4SFam Zheng     bs->read_only = !(open_flags & BDRV_O_RDWR);
1030b64ec4e4SFam Zheng 
1031b64ec4e4SFam Zheng     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
10328f94a6e4SKevin Wolf         error_setg(errp,
10338f94a6e4SKevin Wolf                    !bs->read_only && bdrv_is_whitelisted(drv, true)
10348f94a6e4SKevin Wolf                         ? "Driver '%s' can only be used for read-only devices"
10358f94a6e4SKevin Wolf                         : "Driver '%s' is not whitelisted",
10368f94a6e4SKevin Wolf                    drv->format_name);
1037b64ec4e4SFam Zheng         return -ENOTSUP;
1038b64ec4e4SFam Zheng     }
103957915332SKevin Wolf 
104053fec9d3SStefan Hajnoczi     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
10410ebd24e0SKevin Wolf     if (flags & BDRV_O_COPY_ON_READ) {
10420ebd24e0SKevin Wolf         if (!bs->read_only) {
104353fec9d3SStefan Hajnoczi             bdrv_enable_copy_on_read(bs);
10440ebd24e0SKevin Wolf         } else {
10450ebd24e0SKevin Wolf             error_setg(errp, "Can't use copy-on-read on read-only device");
10460ebd24e0SKevin Wolf             return -EINVAL;
10470ebd24e0SKevin Wolf         }
104853fec9d3SStefan Hajnoczi     }
104953fec9d3SStefan Hajnoczi 
1050c2ad1b0cSKevin Wolf     if (filename != NULL) {
105157915332SKevin Wolf         pstrcpy(bs->filename, sizeof(bs->filename), filename);
1052c2ad1b0cSKevin Wolf     } else {
1053c2ad1b0cSKevin Wolf         bs->filename[0] = '\0';
1054c2ad1b0cSKevin Wolf     }
105591af7014SMax Reitz     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
105657915332SKevin Wolf 
105757915332SKevin Wolf     bs->drv = drv;
10587267c094SAnthony Liguori     bs->opaque = g_malloc0(drv->instance_size);
105957915332SKevin Wolf 
106003f541bdSStefan Hajnoczi     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
1061e7c63796SStefan Hajnoczi 
106266f82ceeSKevin Wolf     /* Open the image, either directly or using a protocol */
106366f82ceeSKevin Wolf     if (drv->bdrv_file_open) {
10645d186eb0SKevin Wolf         assert(file == NULL);
1065030be321SBenoît Canet         assert(!drv->bdrv_needs_filename || filename != NULL);
106634b5d2c6SMax Reitz         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1067f500a6d3SKevin Wolf     } else {
10682af5ef70SKevin Wolf         if (file == NULL) {
106934b5d2c6SMax Reitz             error_setg(errp, "Can't use '%s' as a block driver for the "
107034b5d2c6SMax Reitz                        "protocol level", drv->format_name);
10712af5ef70SKevin Wolf             ret = -EINVAL;
10722af5ef70SKevin Wolf             goto free_and_fail;
10732af5ef70SKevin Wolf         }
1074f500a6d3SKevin Wolf         bs->file = file;
107534b5d2c6SMax Reitz         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
107666f82ceeSKevin Wolf     }
107766f82ceeSKevin Wolf 
107857915332SKevin Wolf     if (ret < 0) {
107984d18f06SMarkus Armbruster         if (local_err) {
108034b5d2c6SMax Reitz             error_propagate(errp, local_err);
10812fa9aa59SDunrong Huang         } else if (bs->filename[0]) {
10822fa9aa59SDunrong Huang             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
108334b5d2c6SMax Reitz         } else {
108434b5d2c6SMax Reitz             error_setg_errno(errp, -ret, "Could not open image");
108534b5d2c6SMax Reitz         }
108657915332SKevin Wolf         goto free_and_fail;
108757915332SKevin Wolf     }
108857915332SKevin Wolf 
1089a1f688f4SMarkus Armbruster     if (bs->encrypted) {
1090a1f688f4SMarkus Armbruster         error_report("Encrypted images are deprecated");
1091a1f688f4SMarkus Armbruster         error_printf("Support for them will be removed in a future release.\n"
1092a1f688f4SMarkus Armbruster                      "You can use 'qemu-img convert' to convert your image"
1093a1f688f4SMarkus Armbruster                      " to an unencrypted one.\n");
1094a1f688f4SMarkus Armbruster     }
1095a1f688f4SMarkus Armbruster 
109651762288SStefan Hajnoczi     ret = refresh_total_sectors(bs, bs->total_sectors);
109751762288SStefan Hajnoczi     if (ret < 0) {
109834b5d2c6SMax Reitz         error_setg_errno(errp, -ret, "Could not refresh total sector count");
109951762288SStefan Hajnoczi         goto free_and_fail;
110057915332SKevin Wolf     }
110151762288SStefan Hajnoczi 
11023baca891SKevin Wolf     bdrv_refresh_limits(bs, &local_err);
11033baca891SKevin Wolf     if (local_err) {
11043baca891SKevin Wolf         error_propagate(errp, local_err);
11053baca891SKevin Wolf         ret = -EINVAL;
11063baca891SKevin Wolf         goto free_and_fail;
11073baca891SKevin Wolf     }
11083baca891SKevin Wolf 
1109c25f53b0SPaolo Bonzini     assert(bdrv_opt_mem_align(bs) != 0);
111047ea2de2SKevin Wolf     assert((bs->request_alignment != 0) || bs->sg);
111157915332SKevin Wolf     return 0;
111257915332SKevin Wolf 
111357915332SKevin Wolf free_and_fail:
111466f82ceeSKevin Wolf     bs->file = NULL;
11157267c094SAnthony Liguori     g_free(bs->opaque);
111657915332SKevin Wolf     bs->opaque = NULL;
111757915332SKevin Wolf     bs->drv = NULL;
111857915332SKevin Wolf     return ret;
111957915332SKevin Wolf }
112057915332SKevin Wolf 
11215e5c4f63SKevin Wolf static QDict *parse_json_filename(const char *filename, Error **errp)
11225e5c4f63SKevin Wolf {
11235e5c4f63SKevin Wolf     QObject *options_obj;
11245e5c4f63SKevin Wolf     QDict *options;
11255e5c4f63SKevin Wolf     int ret;
11265e5c4f63SKevin Wolf 
11275e5c4f63SKevin Wolf     ret = strstart(filename, "json:", &filename);
11285e5c4f63SKevin Wolf     assert(ret);
11295e5c4f63SKevin Wolf 
11305e5c4f63SKevin Wolf     options_obj = qobject_from_json(filename);
11315e5c4f63SKevin Wolf     if (!options_obj) {
11325e5c4f63SKevin Wolf         error_setg(errp, "Could not parse the JSON options");
11335e5c4f63SKevin Wolf         return NULL;
11345e5c4f63SKevin Wolf     }
11355e5c4f63SKevin Wolf 
11365e5c4f63SKevin Wolf     if (qobject_type(options_obj) != QTYPE_QDICT) {
11375e5c4f63SKevin Wolf         qobject_decref(options_obj);
11385e5c4f63SKevin Wolf         error_setg(errp, "Invalid JSON object given");
11395e5c4f63SKevin Wolf         return NULL;
11405e5c4f63SKevin Wolf     }
11415e5c4f63SKevin Wolf 
11425e5c4f63SKevin Wolf     options = qobject_to_qdict(options_obj);
11435e5c4f63SKevin Wolf     qdict_flatten(options);
11445e5c4f63SKevin Wolf 
11455e5c4f63SKevin Wolf     return options;
11465e5c4f63SKevin Wolf }
11475e5c4f63SKevin Wolf 
114857915332SKevin Wolf /*
1149f54120ffSKevin Wolf  * Fills in default options for opening images and converts the legacy
1150f54120ffSKevin Wolf  * filename/flags pair to option QDict entries.
1151f54120ffSKevin Wolf  */
11525e5c4f63SKevin Wolf static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
115317b005f1SKevin Wolf                              BlockDriver *drv, Error **errp)
1154f54120ffSKevin Wolf {
11555e5c4f63SKevin Wolf     const char *filename = *pfilename;
1156f54120ffSKevin Wolf     const char *drvname;
1157462f5bcfSKevin Wolf     bool protocol = flags & BDRV_O_PROTOCOL;
1158f54120ffSKevin Wolf     bool parse_filename = false;
1159f54120ffSKevin Wolf     Error *local_err = NULL;
1160f54120ffSKevin Wolf 
11615e5c4f63SKevin Wolf     /* Parse json: pseudo-protocol */
11625e5c4f63SKevin Wolf     if (filename && g_str_has_prefix(filename, "json:")) {
11635e5c4f63SKevin Wolf         QDict *json_options = parse_json_filename(filename, &local_err);
11645e5c4f63SKevin Wolf         if (local_err) {
11655e5c4f63SKevin Wolf             error_propagate(errp, local_err);
11665e5c4f63SKevin Wolf             return -EINVAL;
11675e5c4f63SKevin Wolf         }
11685e5c4f63SKevin Wolf 
11695e5c4f63SKevin Wolf         /* Options given in the filename have lower priority than options
11705e5c4f63SKevin Wolf          * specified directly */
11715e5c4f63SKevin Wolf         qdict_join(*options, json_options, false);
11725e5c4f63SKevin Wolf         QDECREF(json_options);
11735e5c4f63SKevin Wolf         *pfilename = filename = NULL;
11745e5c4f63SKevin Wolf     }
11755e5c4f63SKevin Wolf 
1176f54120ffSKevin Wolf     /* Fetch the file name from the options QDict if necessary */
117717b005f1SKevin Wolf     if (protocol && filename) {
1178f54120ffSKevin Wolf         if (!qdict_haskey(*options, "filename")) {
1179f54120ffSKevin Wolf             qdict_put(*options, "filename", qstring_from_str(filename));
1180f54120ffSKevin Wolf             parse_filename = true;
1181f54120ffSKevin Wolf         } else {
1182f54120ffSKevin Wolf             error_setg(errp, "Can't specify 'file' and 'filename' options at "
1183f54120ffSKevin Wolf                              "the same time");
1184f54120ffSKevin Wolf             return -EINVAL;
1185f54120ffSKevin Wolf         }
1186f54120ffSKevin Wolf     }
1187f54120ffSKevin Wolf 
1188f54120ffSKevin Wolf     /* Find the right block driver */
1189f54120ffSKevin Wolf     filename = qdict_get_try_str(*options, "filename");
1190f54120ffSKevin Wolf     drvname = qdict_get_try_str(*options, "driver");
1191f54120ffSKevin Wolf 
119217b005f1SKevin Wolf     if (drv) {
119317b005f1SKevin Wolf         if (drvname) {
119417b005f1SKevin Wolf             error_setg(errp, "Driver specified twice");
119517b005f1SKevin Wolf             return -EINVAL;
119617b005f1SKevin Wolf         }
119717b005f1SKevin Wolf         drvname = drv->format_name;
119817b005f1SKevin Wolf         qdict_put(*options, "driver", qstring_from_str(drvname));
119917b005f1SKevin Wolf     } else {
120017b005f1SKevin Wolf         if (!drvname && protocol) {
1201f54120ffSKevin Wolf             if (filename) {
1202b65a5e12SMax Reitz                 drv = bdrv_find_protocol(filename, parse_filename, errp);
1203f54120ffSKevin Wolf                 if (!drv) {
1204f54120ffSKevin Wolf                     return -EINVAL;
1205f54120ffSKevin Wolf                 }
1206f54120ffSKevin Wolf 
1207f54120ffSKevin Wolf                 drvname = drv->format_name;
1208f54120ffSKevin Wolf                 qdict_put(*options, "driver", qstring_from_str(drvname));
1209f54120ffSKevin Wolf             } else {
1210f54120ffSKevin Wolf                 error_setg(errp, "Must specify either driver or file");
1211f54120ffSKevin Wolf                 return -EINVAL;
1212f54120ffSKevin Wolf             }
121317b005f1SKevin Wolf         } else if (drvname) {
1214f54120ffSKevin Wolf             drv = bdrv_find_format(drvname);
1215f54120ffSKevin Wolf             if (!drv) {
1216f54120ffSKevin Wolf                 error_setg(errp, "Unknown driver '%s'", drvname);
1217f54120ffSKevin Wolf                 return -ENOENT;
1218f54120ffSKevin Wolf             }
121917b005f1SKevin Wolf         }
122017b005f1SKevin Wolf     }
122117b005f1SKevin Wolf 
122217b005f1SKevin Wolf     assert(drv || !protocol);
1223f54120ffSKevin Wolf 
1224f54120ffSKevin Wolf     /* Driver-specific filename parsing */
122517b005f1SKevin Wolf     if (drv && drv->bdrv_parse_filename && parse_filename) {
1226f54120ffSKevin Wolf         drv->bdrv_parse_filename(filename, *options, &local_err);
1227f54120ffSKevin Wolf         if (local_err) {
1228f54120ffSKevin Wolf             error_propagate(errp, local_err);
1229f54120ffSKevin Wolf             return -EINVAL;
1230f54120ffSKevin Wolf         }
1231f54120ffSKevin Wolf 
1232f54120ffSKevin Wolf         if (!drv->bdrv_needs_filename) {
1233f54120ffSKevin Wolf             qdict_del(*options, "filename");
1234f54120ffSKevin Wolf         }
1235f54120ffSKevin Wolf     }
1236f54120ffSKevin Wolf 
1237f54120ffSKevin Wolf     return 0;
1238f54120ffSKevin Wolf }
1239f54120ffSKevin Wolf 
12408d24cce1SFam Zheng void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
12418d24cce1SFam Zheng {
12428d24cce1SFam Zheng 
1243826b6ca0SFam Zheng     if (bs->backing_hd) {
1244826b6ca0SFam Zheng         assert(bs->backing_blocker);
1245826b6ca0SFam Zheng         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1246826b6ca0SFam Zheng     } else if (backing_hd) {
1247826b6ca0SFam Zheng         error_setg(&bs->backing_blocker,
124881e5f78aSAlberto Garcia                    "node is used as backing hd of '%s'",
124981e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(bs));
1250826b6ca0SFam Zheng     }
1251826b6ca0SFam Zheng 
12528d24cce1SFam Zheng     bs->backing_hd = backing_hd;
12538d24cce1SFam Zheng     if (!backing_hd) {
1254826b6ca0SFam Zheng         error_free(bs->backing_blocker);
1255826b6ca0SFam Zheng         bs->backing_blocker = NULL;
12568d24cce1SFam Zheng         goto out;
12578d24cce1SFam Zheng     }
12588d24cce1SFam Zheng     bs->open_flags &= ~BDRV_O_NO_BACKING;
12598d24cce1SFam Zheng     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
12608d24cce1SFam Zheng     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
12618d24cce1SFam Zheng             backing_hd->drv ? backing_hd->drv->format_name : "");
1262826b6ca0SFam Zheng 
1263826b6ca0SFam Zheng     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1264826b6ca0SFam Zheng     /* Otherwise we won't be able to commit due to check in bdrv_commit */
1265bb00021dSFam Zheng     bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1266826b6ca0SFam Zheng                     bs->backing_blocker);
12678d24cce1SFam Zheng out:
12683baca891SKevin Wolf     bdrv_refresh_limits(bs, NULL);
12698d24cce1SFam Zheng }
12708d24cce1SFam Zheng 
127131ca6d07SKevin Wolf /*
127231ca6d07SKevin Wolf  * Opens the backing file for a BlockDriverState if not yet open
127331ca6d07SKevin Wolf  *
127431ca6d07SKevin Wolf  * options is a QDict of options to pass to the block drivers, or NULL for an
127531ca6d07SKevin Wolf  * empty set of options. The reference to the QDict is transferred to this
127631ca6d07SKevin Wolf  * function (even on failure), so if the caller intends to reuse the dictionary,
127731ca6d07SKevin Wolf  * it needs to use QINCREF() before calling bdrv_file_open.
127831ca6d07SKevin Wolf  */
127934b5d2c6SMax Reitz int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
12809156df12SPaolo Bonzini {
12811ba4b6a5SBenoît Canet     char *backing_filename = g_malloc0(PATH_MAX);
1282317fc44eSKevin Wolf     int ret = 0;
12838d24cce1SFam Zheng     BlockDriverState *backing_hd;
128434b5d2c6SMax Reitz     Error *local_err = NULL;
12859156df12SPaolo Bonzini 
12869156df12SPaolo Bonzini     if (bs->backing_hd != NULL) {
128731ca6d07SKevin Wolf         QDECREF(options);
12881ba4b6a5SBenoît Canet         goto free_exit;
12899156df12SPaolo Bonzini     }
12909156df12SPaolo Bonzini 
129131ca6d07SKevin Wolf     /* NULL means an empty set of options */
129231ca6d07SKevin Wolf     if (options == NULL) {
129331ca6d07SKevin Wolf         options = qdict_new();
129431ca6d07SKevin Wolf     }
129531ca6d07SKevin Wolf 
12969156df12SPaolo Bonzini     bs->open_flags &= ~BDRV_O_NO_BACKING;
12971cb6f506SKevin Wolf     if (qdict_haskey(options, "file.filename")) {
12981cb6f506SKevin Wolf         backing_filename[0] = '\0';
12991cb6f506SKevin Wolf     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
130031ca6d07SKevin Wolf         QDECREF(options);
13011ba4b6a5SBenoît Canet         goto free_exit;
1302dbecebddSFam Zheng     } else {
13039f07429eSMax Reitz         bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
13049f07429eSMax Reitz                                        &local_err);
13059f07429eSMax Reitz         if (local_err) {
13069f07429eSMax Reitz             ret = -EINVAL;
13079f07429eSMax Reitz             error_propagate(errp, local_err);
13089f07429eSMax Reitz             QDECREF(options);
13099f07429eSMax Reitz             goto free_exit;
13109f07429eSMax Reitz         }
13119156df12SPaolo Bonzini     }
13129156df12SPaolo Bonzini 
13138ee79e70SKevin Wolf     if (!bs->drv || !bs->drv->supports_backing) {
13148ee79e70SKevin Wolf         ret = -EINVAL;
13158ee79e70SKevin Wolf         error_setg(errp, "Driver doesn't support backing files");
13168ee79e70SKevin Wolf         QDECREF(options);
13178ee79e70SKevin Wolf         goto free_exit;
13188ee79e70SKevin Wolf     }
13198ee79e70SKevin Wolf 
1320e4e9986bSMarkus Armbruster     backing_hd = bdrv_new();
13218d24cce1SFam Zheng 
1322c5f6e493SKevin Wolf     if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1323c5f6e493SKevin Wolf         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
13249156df12SPaolo Bonzini     }
13259156df12SPaolo Bonzini 
1326f67503e5SMax Reitz     assert(bs->backing_hd == NULL);
13278d24cce1SFam Zheng     ret = bdrv_open(&backing_hd,
1328ddf5636dSMax Reitz                     *backing_filename ? backing_filename : NULL, NULL, options,
1329c5f6e493SKevin Wolf                     bdrv_backing_flags(bs->open_flags), NULL, &local_err);
13309156df12SPaolo Bonzini     if (ret < 0) {
13318d24cce1SFam Zheng         bdrv_unref(backing_hd);
13328d24cce1SFam Zheng         backing_hd = NULL;
13339156df12SPaolo Bonzini         bs->open_flags |= BDRV_O_NO_BACKING;
1334b04b6b6eSFam Zheng         error_setg(errp, "Could not open backing file: %s",
1335b04b6b6eSFam Zheng                    error_get_pretty(local_err));
1336b04b6b6eSFam Zheng         error_free(local_err);
13371ba4b6a5SBenoît Canet         goto free_exit;
13389156df12SPaolo Bonzini     }
13398d24cce1SFam Zheng     bdrv_set_backing_hd(bs, backing_hd);
1340d80ac658SPeter Feiner 
13411ba4b6a5SBenoît Canet free_exit:
13421ba4b6a5SBenoît Canet     g_free(backing_filename);
13431ba4b6a5SBenoît Canet     return ret;
13449156df12SPaolo Bonzini }
13459156df12SPaolo Bonzini 
1346b6ce07aaSKevin Wolf /*
1347da557aacSMax Reitz  * Opens a disk image whose options are given as BlockdevRef in another block
1348da557aacSMax Reitz  * device's options.
1349da557aacSMax Reitz  *
1350da557aacSMax Reitz  * If allow_none is true, no image will be opened if filename is false and no
1351da557aacSMax Reitz  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1352da557aacSMax Reitz  *
1353da557aacSMax Reitz  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1354da557aacSMax Reitz  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1355da557aacSMax Reitz  * itself, all options starting with "${bdref_key}." are considered part of the
1356da557aacSMax Reitz  * BlockdevRef.
1357da557aacSMax Reitz  *
1358da557aacSMax Reitz  * The BlockdevRef will be removed from the options QDict.
1359f67503e5SMax Reitz  *
1360f67503e5SMax Reitz  * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1361da557aacSMax Reitz  */
1362da557aacSMax Reitz int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1363da557aacSMax Reitz                     QDict *options, const char *bdref_key, int flags,
1364f7d9fd8cSMax Reitz                     bool allow_none, Error **errp)
1365da557aacSMax Reitz {
1366da557aacSMax Reitz     QDict *image_options;
1367da557aacSMax Reitz     int ret;
1368da557aacSMax Reitz     char *bdref_key_dot;
1369da557aacSMax Reitz     const char *reference;
1370da557aacSMax Reitz 
1371f67503e5SMax Reitz     assert(pbs);
1372f67503e5SMax Reitz     assert(*pbs == NULL);
1373f67503e5SMax Reitz 
1374da557aacSMax Reitz     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1375da557aacSMax Reitz     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1376da557aacSMax Reitz     g_free(bdref_key_dot);
1377da557aacSMax Reitz 
1378da557aacSMax Reitz     reference = qdict_get_try_str(options, bdref_key);
1379da557aacSMax Reitz     if (!filename && !reference && !qdict_size(image_options)) {
1380da557aacSMax Reitz         if (allow_none) {
1381da557aacSMax Reitz             ret = 0;
1382da557aacSMax Reitz         } else {
1383da557aacSMax Reitz             error_setg(errp, "A block device must be specified for \"%s\"",
1384da557aacSMax Reitz                        bdref_key);
1385da557aacSMax Reitz             ret = -EINVAL;
1386da557aacSMax Reitz         }
1387b20e61e0SMarkus Armbruster         QDECREF(image_options);
1388da557aacSMax Reitz         goto done;
1389da557aacSMax Reitz     }
1390da557aacSMax Reitz 
1391f7d9fd8cSMax Reitz     ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1392da557aacSMax Reitz 
1393da557aacSMax Reitz done:
1394da557aacSMax Reitz     qdict_del(options, bdref_key);
1395da557aacSMax Reitz     return ret;
1396da557aacSMax Reitz }
1397da557aacSMax Reitz 
13986b8aeca5SChen Gang int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1399b998875dSKevin Wolf {
1400b998875dSKevin Wolf     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
14011ba4b6a5SBenoît Canet     char *tmp_filename = g_malloc0(PATH_MAX + 1);
1402b998875dSKevin Wolf     int64_t total_size;
140383d0521aSChunyan Liu     QemuOpts *opts = NULL;
1404b998875dSKevin Wolf     QDict *snapshot_options;
1405b998875dSKevin Wolf     BlockDriverState *bs_snapshot;
1406b998875dSKevin Wolf     Error *local_err;
1407b998875dSKevin Wolf     int ret;
1408b998875dSKevin Wolf 
1409b998875dSKevin Wolf     /* if snapshot, we create a temporary backing file and open it
1410b998875dSKevin Wolf        instead of opening 'filename' directly */
1411b998875dSKevin Wolf 
1412b998875dSKevin Wolf     /* Get the required size from the image */
1413f187743aSKevin Wolf     total_size = bdrv_getlength(bs);
1414f187743aSKevin Wolf     if (total_size < 0) {
14156b8aeca5SChen Gang         ret = total_size;
1416f187743aSKevin Wolf         error_setg_errno(errp, -total_size, "Could not get image size");
14171ba4b6a5SBenoît Canet         goto out;
1418f187743aSKevin Wolf     }
1419b998875dSKevin Wolf 
1420b998875dSKevin Wolf     /* Create the temporary image */
14211ba4b6a5SBenoît Canet     ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1422b998875dSKevin Wolf     if (ret < 0) {
1423b998875dSKevin Wolf         error_setg_errno(errp, -ret, "Could not get temporary filename");
14241ba4b6a5SBenoît Canet         goto out;
1425b998875dSKevin Wolf     }
1426b998875dSKevin Wolf 
1427ef810437SMax Reitz     opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1428c282e1fdSChunyan Liu                             &error_abort);
142939101f25SMarkus Armbruster     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1430ef810437SMax Reitz     ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
143183d0521aSChunyan Liu     qemu_opts_del(opts);
1432b998875dSKevin Wolf     if (ret < 0) {
1433b998875dSKevin Wolf         error_setg_errno(errp, -ret, "Could not create temporary overlay "
1434b998875dSKevin Wolf                          "'%s': %s", tmp_filename,
1435b998875dSKevin Wolf                          error_get_pretty(local_err));
1436b998875dSKevin Wolf         error_free(local_err);
14371ba4b6a5SBenoît Canet         goto out;
1438b998875dSKevin Wolf     }
1439b998875dSKevin Wolf 
1440b998875dSKevin Wolf     /* Prepare a new options QDict for the temporary file */
1441b998875dSKevin Wolf     snapshot_options = qdict_new();
1442b998875dSKevin Wolf     qdict_put(snapshot_options, "file.driver",
1443b998875dSKevin Wolf               qstring_from_str("file"));
1444b998875dSKevin Wolf     qdict_put(snapshot_options, "file.filename",
1445b998875dSKevin Wolf               qstring_from_str(tmp_filename));
1446b998875dSKevin Wolf 
1447e4e9986bSMarkus Armbruster     bs_snapshot = bdrv_new();
1448b998875dSKevin Wolf 
1449b998875dSKevin Wolf     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1450ef810437SMax Reitz                     flags, &bdrv_qcow2, &local_err);
1451b998875dSKevin Wolf     if (ret < 0) {
1452b998875dSKevin Wolf         error_propagate(errp, local_err);
14531ba4b6a5SBenoît Canet         goto out;
1454b998875dSKevin Wolf     }
1455b998875dSKevin Wolf 
1456b998875dSKevin Wolf     bdrv_append(bs_snapshot, bs);
14571ba4b6a5SBenoît Canet 
14581ba4b6a5SBenoît Canet out:
14591ba4b6a5SBenoît Canet     g_free(tmp_filename);
14606b8aeca5SChen Gang     return ret;
1461b998875dSKevin Wolf }
1462b998875dSKevin Wolf 
1463da557aacSMax Reitz /*
1464b6ce07aaSKevin Wolf  * Opens a disk image (raw, qcow2, vmdk, ...)
1465de9c0cecSKevin Wolf  *
1466de9c0cecSKevin Wolf  * options is a QDict of options to pass to the block drivers, or NULL for an
1467de9c0cecSKevin Wolf  * empty set of options. The reference to the QDict belongs to the block layer
1468de9c0cecSKevin Wolf  * after the call (even on failure), so if the caller intends to reuse the
1469de9c0cecSKevin Wolf  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1470f67503e5SMax Reitz  *
1471f67503e5SMax Reitz  * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1472f67503e5SMax Reitz  * If it is not NULL, the referenced BDS will be reused.
1473ddf5636dSMax Reitz  *
1474ddf5636dSMax Reitz  * The reference parameter may be used to specify an existing block device which
1475ddf5636dSMax Reitz  * should be opened. If specified, neither options nor a filename may be given,
1476ddf5636dSMax Reitz  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1477b6ce07aaSKevin Wolf  */
1478ddf5636dSMax Reitz int bdrv_open(BlockDriverState **pbs, const char *filename,
1479ddf5636dSMax Reitz               const char *reference, QDict *options, int flags,
1480ddf5636dSMax Reitz               BlockDriver *drv, Error **errp)
1481ea2384d3Sbellard {
1482b6ce07aaSKevin Wolf     int ret;
1483f67503e5SMax Reitz     BlockDriverState *file = NULL, *bs;
148474fe54f2SKevin Wolf     const char *drvname;
148534b5d2c6SMax Reitz     Error *local_err = NULL;
1486b1e6fc08SKevin Wolf     int snapshot_flags = 0;
148733e3963eSbellard 
1488f67503e5SMax Reitz     assert(pbs);
1489f67503e5SMax Reitz 
1490ddf5636dSMax Reitz     if (reference) {
1491ddf5636dSMax Reitz         bool options_non_empty = options ? qdict_size(options) : false;
1492ddf5636dSMax Reitz         QDECREF(options);
1493ddf5636dSMax Reitz 
1494ddf5636dSMax Reitz         if (*pbs) {
1495ddf5636dSMax Reitz             error_setg(errp, "Cannot reuse an existing BDS when referencing "
1496ddf5636dSMax Reitz                        "another block device");
1497ddf5636dSMax Reitz             return -EINVAL;
1498ddf5636dSMax Reitz         }
1499ddf5636dSMax Reitz 
1500ddf5636dSMax Reitz         if (filename || options_non_empty) {
1501ddf5636dSMax Reitz             error_setg(errp, "Cannot reference an existing block device with "
1502ddf5636dSMax Reitz                        "additional options or a new filename");
1503ddf5636dSMax Reitz             return -EINVAL;
1504ddf5636dSMax Reitz         }
1505ddf5636dSMax Reitz 
1506ddf5636dSMax Reitz         bs = bdrv_lookup_bs(reference, reference, errp);
1507ddf5636dSMax Reitz         if (!bs) {
1508ddf5636dSMax Reitz             return -ENODEV;
1509ddf5636dSMax Reitz         }
1510ddf5636dSMax Reitz         bdrv_ref(bs);
1511ddf5636dSMax Reitz         *pbs = bs;
1512ddf5636dSMax Reitz         return 0;
1513ddf5636dSMax Reitz     }
1514ddf5636dSMax Reitz 
1515f67503e5SMax Reitz     if (*pbs) {
1516f67503e5SMax Reitz         bs = *pbs;
1517f67503e5SMax Reitz     } else {
1518e4e9986bSMarkus Armbruster         bs = bdrv_new();
1519f67503e5SMax Reitz     }
1520f67503e5SMax Reitz 
1521de9c0cecSKevin Wolf     /* NULL means an empty set of options */
1522de9c0cecSKevin Wolf     if (options == NULL) {
1523de9c0cecSKevin Wolf         options = qdict_new();
1524de9c0cecSKevin Wolf     }
1525de9c0cecSKevin Wolf 
152617b005f1SKevin Wolf     ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1527462f5bcfSKevin Wolf     if (local_err) {
1528462f5bcfSKevin Wolf         goto fail;
1529462f5bcfSKevin Wolf     }
1530462f5bcfSKevin Wolf 
153176c591b0SKevin Wolf     /* Find the right image format driver */
153276c591b0SKevin Wolf     drv = NULL;
153376c591b0SKevin Wolf     drvname = qdict_get_try_str(options, "driver");
153476c591b0SKevin Wolf     if (drvname) {
153576c591b0SKevin Wolf         drv = bdrv_find_format(drvname);
153676c591b0SKevin Wolf         qdict_del(options, "driver");
153776c591b0SKevin Wolf         if (!drv) {
153876c591b0SKevin Wolf             error_setg(errp, "Unknown driver: '%s'", drvname);
153976c591b0SKevin Wolf             ret = -EINVAL;
154076c591b0SKevin Wolf             goto fail;
154176c591b0SKevin Wolf         }
154276c591b0SKevin Wolf     }
154376c591b0SKevin Wolf 
154476c591b0SKevin Wolf     assert(drvname || !(flags & BDRV_O_PROTOCOL));
154576c591b0SKevin Wolf     if (drv && !drv->bdrv_file_open) {
154676c591b0SKevin Wolf         /* If the user explicitly wants a format driver here, we'll need to add
154776c591b0SKevin Wolf          * another layer for the protocol in bs->file */
154876c591b0SKevin Wolf         flags &= ~BDRV_O_PROTOCOL;
154976c591b0SKevin Wolf     }
155076c591b0SKevin Wolf 
1551de9c0cecSKevin Wolf     bs->options = options;
1552b6ad491aSKevin Wolf     options = qdict_clone_shallow(options);
1553de9c0cecSKevin Wolf 
1554f500a6d3SKevin Wolf     /* Open image file without format layer */
1555f4788adcSKevin Wolf     if ((flags & BDRV_O_PROTOCOL) == 0) {
1556be028adcSJeff Cody         if (flags & BDRV_O_RDWR) {
1557be028adcSJeff Cody             flags |= BDRV_O_ALLOW_RDWR;
1558be028adcSJeff Cody         }
1559b1e6fc08SKevin Wolf         if (flags & BDRV_O_SNAPSHOT) {
1560b1e6fc08SKevin Wolf             snapshot_flags = bdrv_temp_snapshot_flags(flags);
1561b1e6fc08SKevin Wolf             flags = bdrv_backing_flags(flags);
1562b1e6fc08SKevin Wolf         }
1563be028adcSJeff Cody 
1564f67503e5SMax Reitz         assert(file == NULL);
1565054963f8SMax Reitz         ret = bdrv_open_image(&file, filename, options, "file",
15660b50cc88SKevin Wolf                               bdrv_inherited_flags(flags),
15670b50cc88SKevin Wolf                               true, &local_err);
1568f500a6d3SKevin Wolf         if (ret < 0) {
15698bfea15dSKevin Wolf             goto fail;
1570f500a6d3SKevin Wolf         }
1571f4788adcSKevin Wolf     }
1572f500a6d3SKevin Wolf 
157376c591b0SKevin Wolf     /* Image format probing */
157438f3ef57SKevin Wolf     bs->probed = !drv;
157576c591b0SKevin Wolf     if (!drv && file) {
157634b5d2c6SMax Reitz         ret = find_image_format(file, filename, &drv, &local_err);
157717b005f1SKevin Wolf         if (ret < 0) {
157817b005f1SKevin Wolf             goto fail;
157917b005f1SKevin Wolf         }
158076c591b0SKevin Wolf     } else if (!drv) {
15812a05cbe4SMax Reitz         error_setg(errp, "Must specify either driver or file");
15822a05cbe4SMax Reitz         ret = -EINVAL;
15838bfea15dSKevin Wolf         goto fail;
15842a05cbe4SMax Reitz     }
1585f500a6d3SKevin Wolf 
1586b6ce07aaSKevin Wolf     /* Open the image */
158734b5d2c6SMax Reitz     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1588b6ce07aaSKevin Wolf     if (ret < 0) {
15898bfea15dSKevin Wolf         goto fail;
15906987307cSChristoph Hellwig     }
15916987307cSChristoph Hellwig 
15922a05cbe4SMax Reitz     if (file && (bs->file != file)) {
15934f6fd349SFam Zheng         bdrv_unref(file);
1594f500a6d3SKevin Wolf         file = NULL;
1595f500a6d3SKevin Wolf     }
1596f500a6d3SKevin Wolf 
1597b6ce07aaSKevin Wolf     /* If there is a backing file, use it */
15989156df12SPaolo Bonzini     if ((flags & BDRV_O_NO_BACKING) == 0) {
159931ca6d07SKevin Wolf         QDict *backing_options;
160031ca6d07SKevin Wolf 
16015726d872SBenoît Canet         qdict_extract_subqdict(options, &backing_options, "backing.");
160234b5d2c6SMax Reitz         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1603b6ce07aaSKevin Wolf         if (ret < 0) {
1604b6ad491aSKevin Wolf             goto close_and_fail;
1605b6ce07aaSKevin Wolf         }
1606b6ce07aaSKevin Wolf     }
1607b6ce07aaSKevin Wolf 
160891af7014SMax Reitz     bdrv_refresh_filename(bs);
160991af7014SMax Reitz 
1610b998875dSKevin Wolf     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1611b998875dSKevin Wolf      * temporary snapshot afterwards. */
1612b1e6fc08SKevin Wolf     if (snapshot_flags) {
16136b8aeca5SChen Gang         ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1614b998875dSKevin Wolf         if (local_err) {
1615b998875dSKevin Wolf             goto close_and_fail;
1616b998875dSKevin Wolf         }
1617b998875dSKevin Wolf     }
1618b998875dSKevin Wolf 
1619b6ad491aSKevin Wolf     /* Check if any unknown options were used */
16205acd9d81SMax Reitz     if (options && (qdict_size(options) != 0)) {
1621b6ad491aSKevin Wolf         const QDictEntry *entry = qdict_first(options);
16225acd9d81SMax Reitz         if (flags & BDRV_O_PROTOCOL) {
16235acd9d81SMax Reitz             error_setg(errp, "Block protocol '%s' doesn't support the option "
16245acd9d81SMax Reitz                        "'%s'", drv->format_name, entry->key);
16255acd9d81SMax Reitz         } else {
162634b5d2c6SMax Reitz             error_setg(errp, "Block format '%s' used by device '%s' doesn't "
16275acd9d81SMax Reitz                        "support the option '%s'", drv->format_name,
1628bfb197e0SMarkus Armbruster                        bdrv_get_device_name(bs), entry->key);
16295acd9d81SMax Reitz         }
1630b6ad491aSKevin Wolf 
1631b6ad491aSKevin Wolf         ret = -EINVAL;
1632b6ad491aSKevin Wolf         goto close_and_fail;
1633b6ad491aSKevin Wolf     }
1634b6ad491aSKevin Wolf 
1635b6ce07aaSKevin Wolf     if (!bdrv_key_required(bs)) {
1636a7f53e26SMarkus Armbruster         if (bs->blk) {
1637a7f53e26SMarkus Armbruster             blk_dev_change_media_cb(bs->blk, true);
1638a7f53e26SMarkus Armbruster         }
1639c3adb58fSMarkus Armbruster     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1640c3adb58fSMarkus Armbruster                && !runstate_check(RUN_STATE_INMIGRATE)
1641c3adb58fSMarkus Armbruster                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1642c3adb58fSMarkus Armbruster         error_setg(errp,
1643c3adb58fSMarkus Armbruster                    "Guest must be stopped for opening of encrypted image");
1644c3adb58fSMarkus Armbruster         ret = -EBUSY;
1645c3adb58fSMarkus Armbruster         goto close_and_fail;
1646b6ce07aaSKevin Wolf     }
1647b6ce07aaSKevin Wolf 
1648c3adb58fSMarkus Armbruster     QDECREF(options);
1649f67503e5SMax Reitz     *pbs = bs;
1650b6ce07aaSKevin Wolf     return 0;
1651b6ce07aaSKevin Wolf 
16528bfea15dSKevin Wolf fail:
1653f500a6d3SKevin Wolf     if (file != NULL) {
16544f6fd349SFam Zheng         bdrv_unref(file);
1655f500a6d3SKevin Wolf     }
1656de9c0cecSKevin Wolf     QDECREF(bs->options);
1657b6ad491aSKevin Wolf     QDECREF(options);
1658de9c0cecSKevin Wolf     bs->options = NULL;
1659f67503e5SMax Reitz     if (!*pbs) {
1660f67503e5SMax Reitz         /* If *pbs is NULL, a new BDS has been created in this function and
1661f67503e5SMax Reitz            needs to be freed now. Otherwise, it does not need to be closed,
1662f67503e5SMax Reitz            since it has not really been opened yet. */
1663f67503e5SMax Reitz         bdrv_unref(bs);
1664f67503e5SMax Reitz     }
166584d18f06SMarkus Armbruster     if (local_err) {
166634b5d2c6SMax Reitz         error_propagate(errp, local_err);
166734b5d2c6SMax Reitz     }
1668b6ad491aSKevin Wolf     return ret;
1669de9c0cecSKevin Wolf 
1670b6ad491aSKevin Wolf close_and_fail:
1671f67503e5SMax Reitz     /* See fail path, but now the BDS has to be always closed */
1672f67503e5SMax Reitz     if (*pbs) {
1673b6ad491aSKevin Wolf         bdrv_close(bs);
1674f67503e5SMax Reitz     } else {
1675f67503e5SMax Reitz         bdrv_unref(bs);
1676f67503e5SMax Reitz     }
1677b6ad491aSKevin Wolf     QDECREF(options);
167884d18f06SMarkus Armbruster     if (local_err) {
167934b5d2c6SMax Reitz         error_propagate(errp, local_err);
168034b5d2c6SMax Reitz     }
1681b6ce07aaSKevin Wolf     return ret;
1682b6ce07aaSKevin Wolf }
1683b6ce07aaSKevin Wolf 
1684e971aa12SJeff Cody typedef struct BlockReopenQueueEntry {
1685e971aa12SJeff Cody      bool prepared;
1686e971aa12SJeff Cody      BDRVReopenState state;
1687e971aa12SJeff Cody      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1688e971aa12SJeff Cody } BlockReopenQueueEntry;
1689e971aa12SJeff Cody 
1690e971aa12SJeff Cody /*
1691e971aa12SJeff Cody  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1692e971aa12SJeff Cody  * reopen of multiple devices.
1693e971aa12SJeff Cody  *
1694e971aa12SJeff Cody  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1695e971aa12SJeff Cody  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1696e971aa12SJeff Cody  * be created and initialized. This newly created BlockReopenQueue should be
1697e971aa12SJeff Cody  * passed back in for subsequent calls that are intended to be of the same
1698e971aa12SJeff Cody  * atomic 'set'.
1699e971aa12SJeff Cody  *
1700e971aa12SJeff Cody  * bs is the BlockDriverState to add to the reopen queue.
1701e971aa12SJeff Cody  *
1702e971aa12SJeff Cody  * flags contains the open flags for the associated bs
1703e971aa12SJeff Cody  *
1704e971aa12SJeff Cody  * returns a pointer to bs_queue, which is either the newly allocated
1705e971aa12SJeff Cody  * bs_queue, or the existing bs_queue being used.
1706e971aa12SJeff Cody  *
1707e971aa12SJeff Cody  */
1708e971aa12SJeff Cody BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1709e971aa12SJeff Cody                                     BlockDriverState *bs, int flags)
1710e971aa12SJeff Cody {
1711e971aa12SJeff Cody     assert(bs != NULL);
1712e971aa12SJeff Cody 
1713e971aa12SJeff Cody     BlockReopenQueueEntry *bs_entry;
1714e971aa12SJeff Cody     if (bs_queue == NULL) {
1715e971aa12SJeff Cody         bs_queue = g_new0(BlockReopenQueue, 1);
1716e971aa12SJeff Cody         QSIMPLEQ_INIT(bs_queue);
1717e971aa12SJeff Cody     }
1718e971aa12SJeff Cody 
1719f1f25a2eSKevin Wolf     /* bdrv_open() masks this flag out */
1720f1f25a2eSKevin Wolf     flags &= ~BDRV_O_PROTOCOL;
1721f1f25a2eSKevin Wolf 
1722e971aa12SJeff Cody     if (bs->file) {
1723f1f25a2eSKevin Wolf         bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1724e971aa12SJeff Cody     }
1725e971aa12SJeff Cody 
1726e971aa12SJeff Cody     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1727e971aa12SJeff Cody     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1728e971aa12SJeff Cody 
1729e971aa12SJeff Cody     bs_entry->state.bs = bs;
1730e971aa12SJeff Cody     bs_entry->state.flags = flags;
1731e971aa12SJeff Cody 
1732e971aa12SJeff Cody     return bs_queue;
1733e971aa12SJeff Cody }
1734e971aa12SJeff Cody 
1735e971aa12SJeff Cody /*
1736e971aa12SJeff Cody  * Reopen multiple BlockDriverStates atomically & transactionally.
1737e971aa12SJeff Cody  *
1738e971aa12SJeff Cody  * The queue passed in (bs_queue) must have been built up previous
1739e971aa12SJeff Cody  * via bdrv_reopen_queue().
1740e971aa12SJeff Cody  *
1741e971aa12SJeff Cody  * Reopens all BDS specified in the queue, with the appropriate
1742e971aa12SJeff Cody  * flags.  All devices are prepared for reopen, and failure of any
1743e971aa12SJeff Cody  * device will cause all device changes to be abandonded, and intermediate
1744e971aa12SJeff Cody  * data cleaned up.
1745e971aa12SJeff Cody  *
1746e971aa12SJeff Cody  * If all devices prepare successfully, then the changes are committed
1747e971aa12SJeff Cody  * to all devices.
1748e971aa12SJeff Cody  *
1749e971aa12SJeff Cody  */
1750e971aa12SJeff Cody int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1751e971aa12SJeff Cody {
1752e971aa12SJeff Cody     int ret = -1;
1753e971aa12SJeff Cody     BlockReopenQueueEntry *bs_entry, *next;
1754e971aa12SJeff Cody     Error *local_err = NULL;
1755e971aa12SJeff Cody 
1756e971aa12SJeff Cody     assert(bs_queue != NULL);
1757e971aa12SJeff Cody 
1758e971aa12SJeff Cody     bdrv_drain_all();
1759e971aa12SJeff Cody 
1760e971aa12SJeff Cody     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1761e971aa12SJeff Cody         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1762e971aa12SJeff Cody             error_propagate(errp, local_err);
1763e971aa12SJeff Cody             goto cleanup;
1764e971aa12SJeff Cody         }
1765e971aa12SJeff Cody         bs_entry->prepared = true;
1766e971aa12SJeff Cody     }
1767e971aa12SJeff Cody 
1768e971aa12SJeff Cody     /* If we reach this point, we have success and just need to apply the
1769e971aa12SJeff Cody      * changes
1770e971aa12SJeff Cody      */
1771e971aa12SJeff Cody     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1772e971aa12SJeff Cody         bdrv_reopen_commit(&bs_entry->state);
1773e971aa12SJeff Cody     }
1774e971aa12SJeff Cody 
1775e971aa12SJeff Cody     ret = 0;
1776e971aa12SJeff Cody 
1777e971aa12SJeff Cody cleanup:
1778e971aa12SJeff Cody     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1779e971aa12SJeff Cody         if (ret && bs_entry->prepared) {
1780e971aa12SJeff Cody             bdrv_reopen_abort(&bs_entry->state);
1781e971aa12SJeff Cody         }
1782e971aa12SJeff Cody         g_free(bs_entry);
1783e971aa12SJeff Cody     }
1784e971aa12SJeff Cody     g_free(bs_queue);
1785e971aa12SJeff Cody     return ret;
1786e971aa12SJeff Cody }
1787e971aa12SJeff Cody 
1788e971aa12SJeff Cody 
1789e971aa12SJeff Cody /* Reopen a single BlockDriverState with the specified flags. */
1790e971aa12SJeff Cody int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1791e971aa12SJeff Cody {
1792e971aa12SJeff Cody     int ret = -1;
1793e971aa12SJeff Cody     Error *local_err = NULL;
1794e971aa12SJeff Cody     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1795e971aa12SJeff Cody 
1796e971aa12SJeff Cody     ret = bdrv_reopen_multiple(queue, &local_err);
1797e971aa12SJeff Cody     if (local_err != NULL) {
1798e971aa12SJeff Cody         error_propagate(errp, local_err);
1799e971aa12SJeff Cody     }
1800e971aa12SJeff Cody     return ret;
1801e971aa12SJeff Cody }
1802e971aa12SJeff Cody 
1803e971aa12SJeff Cody 
1804e971aa12SJeff Cody /*
1805e971aa12SJeff Cody  * Prepares a BlockDriverState for reopen. All changes are staged in the
1806e971aa12SJeff Cody  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1807e971aa12SJeff Cody  * the block driver layer .bdrv_reopen_prepare()
1808e971aa12SJeff Cody  *
1809e971aa12SJeff Cody  * bs is the BlockDriverState to reopen
1810e971aa12SJeff Cody  * flags are the new open flags
1811e971aa12SJeff Cody  * queue is the reopen queue
1812e971aa12SJeff Cody  *
1813e971aa12SJeff Cody  * Returns 0 on success, non-zero on error.  On error errp will be set
1814e971aa12SJeff Cody  * as well.
1815e971aa12SJeff Cody  *
1816e971aa12SJeff Cody  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1817e971aa12SJeff Cody  * It is the responsibility of the caller to then call the abort() or
1818e971aa12SJeff Cody  * commit() for any other BDS that have been left in a prepare() state
1819e971aa12SJeff Cody  *
1820e971aa12SJeff Cody  */
1821e971aa12SJeff Cody int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1822e971aa12SJeff Cody                         Error **errp)
1823e971aa12SJeff Cody {
1824e971aa12SJeff Cody     int ret = -1;
1825e971aa12SJeff Cody     Error *local_err = NULL;
1826e971aa12SJeff Cody     BlockDriver *drv;
1827e971aa12SJeff Cody 
1828e971aa12SJeff Cody     assert(reopen_state != NULL);
1829e971aa12SJeff Cody     assert(reopen_state->bs->drv != NULL);
1830e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1831e971aa12SJeff Cody 
1832e971aa12SJeff Cody     /* if we are to stay read-only, do not allow permission change
1833e971aa12SJeff Cody      * to r/w */
1834e971aa12SJeff Cody     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1835e971aa12SJeff Cody         reopen_state->flags & BDRV_O_RDWR) {
183681e5f78aSAlberto Garcia         error_setg(errp, "Node '%s' is read only",
183781e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(reopen_state->bs));
1838e971aa12SJeff Cody         goto error;
1839e971aa12SJeff Cody     }
1840e971aa12SJeff Cody 
1841e971aa12SJeff Cody 
1842e971aa12SJeff Cody     ret = bdrv_flush(reopen_state->bs);
1843e971aa12SJeff Cody     if (ret) {
1844e971aa12SJeff Cody         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1845e971aa12SJeff Cody                   strerror(-ret));
1846e971aa12SJeff Cody         goto error;
1847e971aa12SJeff Cody     }
1848e971aa12SJeff Cody 
1849e971aa12SJeff Cody     if (drv->bdrv_reopen_prepare) {
1850e971aa12SJeff Cody         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1851e971aa12SJeff Cody         if (ret) {
1852e971aa12SJeff Cody             if (local_err != NULL) {
1853e971aa12SJeff Cody                 error_propagate(errp, local_err);
1854e971aa12SJeff Cody             } else {
1855d8b6895fSLuiz Capitulino                 error_setg(errp, "failed while preparing to reopen image '%s'",
1856e971aa12SJeff Cody                            reopen_state->bs->filename);
1857e971aa12SJeff Cody             }
1858e971aa12SJeff Cody             goto error;
1859e971aa12SJeff Cody         }
1860e971aa12SJeff Cody     } else {
1861e971aa12SJeff Cody         /* It is currently mandatory to have a bdrv_reopen_prepare()
1862e971aa12SJeff Cody          * handler for each supported drv. */
186381e5f78aSAlberto Garcia         error_setg(errp, "Block format '%s' used by node '%s' "
186481e5f78aSAlberto Garcia                    "does not support reopening files", drv->format_name,
186581e5f78aSAlberto Garcia                    bdrv_get_device_or_node_name(reopen_state->bs));
1866e971aa12SJeff Cody         ret = -1;
1867e971aa12SJeff Cody         goto error;
1868e971aa12SJeff Cody     }
1869e971aa12SJeff Cody 
1870e971aa12SJeff Cody     ret = 0;
1871e971aa12SJeff Cody 
1872e971aa12SJeff Cody error:
1873e971aa12SJeff Cody     return ret;
1874e971aa12SJeff Cody }
1875e971aa12SJeff Cody 
1876e971aa12SJeff Cody /*
1877e971aa12SJeff Cody  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1878e971aa12SJeff Cody  * makes them final by swapping the staging BlockDriverState contents into
1879e971aa12SJeff Cody  * the active BlockDriverState contents.
1880e971aa12SJeff Cody  */
1881e971aa12SJeff Cody void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1882e971aa12SJeff Cody {
1883e971aa12SJeff Cody     BlockDriver *drv;
1884e971aa12SJeff Cody 
1885e971aa12SJeff Cody     assert(reopen_state != NULL);
1886e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1887e971aa12SJeff Cody     assert(drv != NULL);
1888e971aa12SJeff Cody 
1889e971aa12SJeff Cody     /* If there are any driver level actions to take */
1890e971aa12SJeff Cody     if (drv->bdrv_reopen_commit) {
1891e971aa12SJeff Cody         drv->bdrv_reopen_commit(reopen_state);
1892e971aa12SJeff Cody     }
1893e971aa12SJeff Cody 
1894e971aa12SJeff Cody     /* set BDS specific flags now */
1895e971aa12SJeff Cody     reopen_state->bs->open_flags         = reopen_state->flags;
1896e971aa12SJeff Cody     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1897e971aa12SJeff Cody                                               BDRV_O_CACHE_WB);
1898e971aa12SJeff Cody     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1899355ef4acSKevin Wolf 
19003baca891SKevin Wolf     bdrv_refresh_limits(reopen_state->bs, NULL);
1901e971aa12SJeff Cody }
1902e971aa12SJeff Cody 
1903e971aa12SJeff Cody /*
1904e971aa12SJeff Cody  * Abort the reopen, and delete and free the staged changes in
1905e971aa12SJeff Cody  * reopen_state
1906e971aa12SJeff Cody  */
1907e971aa12SJeff Cody void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1908e971aa12SJeff Cody {
1909e971aa12SJeff Cody     BlockDriver *drv;
1910e971aa12SJeff Cody 
1911e971aa12SJeff Cody     assert(reopen_state != NULL);
1912e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1913e971aa12SJeff Cody     assert(drv != NULL);
1914e971aa12SJeff Cody 
1915e971aa12SJeff Cody     if (drv->bdrv_reopen_abort) {
1916e971aa12SJeff Cody         drv->bdrv_reopen_abort(reopen_state);
1917e971aa12SJeff Cody     }
1918e971aa12SJeff Cody }
1919e971aa12SJeff Cody 
1920e971aa12SJeff Cody 
1921fc01f7e7Sbellard void bdrv_close(BlockDriverState *bs)
1922fc01f7e7Sbellard {
192333384421SMax Reitz     BdrvAioNotifier *ban, *ban_next;
192433384421SMax Reitz 
19253e914655SPaolo Bonzini     if (bs->job) {
19263e914655SPaolo Bonzini         block_job_cancel_sync(bs->job);
19273e914655SPaolo Bonzini     }
192858fda173SStefan Hajnoczi     bdrv_drain_all(); /* complete I/O */
192958fda173SStefan Hajnoczi     bdrv_flush(bs);
193058fda173SStefan Hajnoczi     bdrv_drain_all(); /* in case flush left pending I/O */
1931d7d512f6SPaolo Bonzini     notifier_list_notify(&bs->close_notifiers, bs);
19327094f12fSKevin Wolf 
19333cbc002cSPaolo Bonzini     if (bs->drv) {
1934557df6acSStefan Hajnoczi         if (bs->backing_hd) {
1935826b6ca0SFam Zheng             BlockDriverState *backing_hd = bs->backing_hd;
1936826b6ca0SFam Zheng             bdrv_set_backing_hd(bs, NULL);
1937826b6ca0SFam Zheng             bdrv_unref(backing_hd);
1938557df6acSStefan Hajnoczi         }
1939ea2384d3Sbellard         bs->drv->bdrv_close(bs);
19407267c094SAnthony Liguori         g_free(bs->opaque);
1941ea2384d3Sbellard         bs->opaque = NULL;
1942ea2384d3Sbellard         bs->drv = NULL;
194353fec9d3SStefan Hajnoczi         bs->copy_on_read = 0;
1944a275fa42SPaolo Bonzini         bs->backing_file[0] = '\0';
1945a275fa42SPaolo Bonzini         bs->backing_format[0] = '\0';
19466405875cSPaolo Bonzini         bs->total_sectors = 0;
19476405875cSPaolo Bonzini         bs->encrypted = 0;
19486405875cSPaolo Bonzini         bs->valid_key = 0;
19496405875cSPaolo Bonzini         bs->sg = 0;
19500d51b4deSAsias He         bs->zero_beyond_eof = false;
1951de9c0cecSKevin Wolf         QDECREF(bs->options);
1952de9c0cecSKevin Wolf         bs->options = NULL;
195391af7014SMax Reitz         QDECREF(bs->full_open_options);
195491af7014SMax Reitz         bs->full_open_options = NULL;
1955b338082bSbellard 
195666f82ceeSKevin Wolf         if (bs->file != NULL) {
19574f6fd349SFam Zheng             bdrv_unref(bs->file);
19580ac9377dSPaolo Bonzini             bs->file = NULL;
195966f82ceeSKevin Wolf         }
19609ca11154SPavel Hrdina     }
196166f82ceeSKevin Wolf 
1962a7f53e26SMarkus Armbruster     if (bs->blk) {
1963a7f53e26SMarkus Armbruster         blk_dev_change_media_cb(bs->blk, false);
1964a7f53e26SMarkus Armbruster     }
196598f90dbaSZhi Yong Wu 
196698f90dbaSZhi Yong Wu     /*throttling disk I/O limits*/
196798f90dbaSZhi Yong Wu     if (bs->io_limits_enabled) {
196898f90dbaSZhi Yong Wu         bdrv_io_limits_disable(bs);
196998f90dbaSZhi Yong Wu     }
197033384421SMax Reitz 
197133384421SMax Reitz     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
197233384421SMax Reitz         g_free(ban);
197333384421SMax Reitz     }
197433384421SMax Reitz     QLIST_INIT(&bs->aio_notifiers);
1975b338082bSbellard }
1976b338082bSbellard 
19772bc93fedSMORITA Kazutaka void bdrv_close_all(void)
19782bc93fedSMORITA Kazutaka {
19792bc93fedSMORITA Kazutaka     BlockDriverState *bs;
19802bc93fedSMORITA Kazutaka 
1981dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1982ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
1983ed78cda3SStefan Hajnoczi 
1984ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
19852bc93fedSMORITA Kazutaka         bdrv_close(bs);
1986ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
19872bc93fedSMORITA Kazutaka     }
19882bc93fedSMORITA Kazutaka }
19892bc93fedSMORITA Kazutaka 
199088266f5aSStefan Hajnoczi /* Check if any requests are in-flight (including throttled requests) */
199188266f5aSStefan Hajnoczi static bool bdrv_requests_pending(BlockDriverState *bs)
199288266f5aSStefan Hajnoczi {
199388266f5aSStefan Hajnoczi     if (!QLIST_EMPTY(&bs->tracked_requests)) {
199488266f5aSStefan Hajnoczi         return true;
199588266f5aSStefan Hajnoczi     }
1996cc0681c4SBenoît Canet     if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1997cc0681c4SBenoît Canet         return true;
1998cc0681c4SBenoît Canet     }
1999cc0681c4SBenoît Canet     if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
200088266f5aSStefan Hajnoczi         return true;
200188266f5aSStefan Hajnoczi     }
200288266f5aSStefan Hajnoczi     if (bs->file && bdrv_requests_pending(bs->file)) {
200388266f5aSStefan Hajnoczi         return true;
200488266f5aSStefan Hajnoczi     }
200588266f5aSStefan Hajnoczi     if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
200688266f5aSStefan Hajnoczi         return true;
200788266f5aSStefan Hajnoczi     }
200888266f5aSStefan Hajnoczi     return false;
200988266f5aSStefan Hajnoczi }
201088266f5aSStefan Hajnoczi 
20115b98db0aSStefan Hajnoczi static bool bdrv_drain_one(BlockDriverState *bs)
20125b98db0aSStefan Hajnoczi {
20135b98db0aSStefan Hajnoczi     bool bs_busy;
20145b98db0aSStefan Hajnoczi 
20155b98db0aSStefan Hajnoczi     bdrv_flush_io_queue(bs);
20165b98db0aSStefan Hajnoczi     bdrv_start_throttled_reqs(bs);
20175b98db0aSStefan Hajnoczi     bs_busy = bdrv_requests_pending(bs);
20185b98db0aSStefan Hajnoczi     bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
20195b98db0aSStefan Hajnoczi     return bs_busy;
20205b98db0aSStefan Hajnoczi }
20215b98db0aSStefan Hajnoczi 
20225b98db0aSStefan Hajnoczi /*
20235b98db0aSStefan Hajnoczi  * Wait for pending requests to complete on a single BlockDriverState subtree
20245b98db0aSStefan Hajnoczi  *
20255b98db0aSStefan Hajnoczi  * See the warning in bdrv_drain_all().  This function can only be called if
20265b98db0aSStefan Hajnoczi  * you are sure nothing can generate I/O because you have op blockers
20275b98db0aSStefan Hajnoczi  * installed.
20285b98db0aSStefan Hajnoczi  *
20295b98db0aSStefan Hajnoczi  * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
20305b98db0aSStefan Hajnoczi  * AioContext.
20315b98db0aSStefan Hajnoczi  */
20325b98db0aSStefan Hajnoczi void bdrv_drain(BlockDriverState *bs)
20335b98db0aSStefan Hajnoczi {
20345b98db0aSStefan Hajnoczi     while (bdrv_drain_one(bs)) {
20355b98db0aSStefan Hajnoczi         /* Keep iterating */
20365b98db0aSStefan Hajnoczi     }
20375b98db0aSStefan Hajnoczi }
20385b98db0aSStefan Hajnoczi 
2039922453bcSStefan Hajnoczi /*
2040922453bcSStefan Hajnoczi  * Wait for pending requests to complete across all BlockDriverStates
2041922453bcSStefan Hajnoczi  *
2042922453bcSStefan Hajnoczi  * This function does not flush data to disk, use bdrv_flush_all() for that
2043922453bcSStefan Hajnoczi  * after calling this function.
20444c355d53SZhi Yong Wu  *
20454c355d53SZhi Yong Wu  * Note that completion of an asynchronous I/O operation can trigger any
20464c355d53SZhi Yong Wu  * number of other I/O operations on other devices---for example a coroutine
20474c355d53SZhi Yong Wu  * can be arbitrarily complex and a constant flow of I/O can come until the
20484c355d53SZhi Yong Wu  * coroutine is complete.  Because of this, it is not possible to have a
20494c355d53SZhi Yong Wu  * function to drain a single device's I/O queue.
2050922453bcSStefan Hajnoczi  */
2051922453bcSStefan Hajnoczi void bdrv_drain_all(void)
2052922453bcSStefan Hajnoczi {
205388266f5aSStefan Hajnoczi     /* Always run first iteration so any pending completion BHs run */
205488266f5aSStefan Hajnoczi     bool busy = true;
20554f5472cbSStefan Hajnoczi     BlockDriverState *bs = NULL;
2056922453bcSStefan Hajnoczi 
20574f5472cbSStefan Hajnoczi     while ((bs = bdrv_next(bs))) {
205869da3b0bSFam Zheng         AioContext *aio_context = bdrv_get_aio_context(bs);
205969da3b0bSFam Zheng 
206069da3b0bSFam Zheng         aio_context_acquire(aio_context);
206169da3b0bSFam Zheng         if (bs->job) {
206269da3b0bSFam Zheng             block_job_pause(bs->job);
206369da3b0bSFam Zheng         }
206469da3b0bSFam Zheng         aio_context_release(aio_context);
206569da3b0bSFam Zheng     }
206669da3b0bSFam Zheng 
206788266f5aSStefan Hajnoczi     while (busy) {
20689b536adcSStefan Hajnoczi         busy = false;
20694f5472cbSStefan Hajnoczi         bs = NULL;
2070922453bcSStefan Hajnoczi 
20714f5472cbSStefan Hajnoczi         while ((bs = bdrv_next(bs))) {
20729b536adcSStefan Hajnoczi             AioContext *aio_context = bdrv_get_aio_context(bs);
20739b536adcSStefan Hajnoczi 
20749b536adcSStefan Hajnoczi             aio_context_acquire(aio_context);
20755b98db0aSStefan Hajnoczi             busy |= bdrv_drain_one(bs);
20769b536adcSStefan Hajnoczi             aio_context_release(aio_context);
20779b536adcSStefan Hajnoczi         }
2078922453bcSStefan Hajnoczi     }
207969da3b0bSFam Zheng 
20804f5472cbSStefan Hajnoczi     bs = NULL;
20814f5472cbSStefan Hajnoczi     while ((bs = bdrv_next(bs))) {
208269da3b0bSFam Zheng         AioContext *aio_context = bdrv_get_aio_context(bs);
208369da3b0bSFam Zheng 
208469da3b0bSFam Zheng         aio_context_acquire(aio_context);
208569da3b0bSFam Zheng         if (bs->job) {
208669da3b0bSFam Zheng             block_job_resume(bs->job);
208769da3b0bSFam Zheng         }
208869da3b0bSFam Zheng         aio_context_release(aio_context);
208969da3b0bSFam Zheng     }
2090922453bcSStefan Hajnoczi }
2091922453bcSStefan Hajnoczi 
2092dc364f4cSBenoît Canet /* make a BlockDriverState anonymous by removing from bdrv_state and
2093dc364f4cSBenoît Canet  * graph_bdrv_state list.
2094d22b2f41SRyan Harper    Also, NULL terminate the device_name to prevent double remove */
2095d22b2f41SRyan Harper void bdrv_make_anon(BlockDriverState *bs)
2096d22b2f41SRyan Harper {
2097bfb197e0SMarkus Armbruster     /*
2098bfb197e0SMarkus Armbruster      * Take care to remove bs from bdrv_states only when it's actually
2099bfb197e0SMarkus Armbruster      * in it.  Note that bs->device_list.tqe_prev is initially null,
2100bfb197e0SMarkus Armbruster      * and gets set to non-null by QTAILQ_INSERT_TAIL().  Establish
2101bfb197e0SMarkus Armbruster      * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2102bfb197e0SMarkus Armbruster      * resetting it to null on remove.
2103bfb197e0SMarkus Armbruster      */
2104bfb197e0SMarkus Armbruster     if (bs->device_list.tqe_prev) {
2105dc364f4cSBenoît Canet         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2106bfb197e0SMarkus Armbruster         bs->device_list.tqe_prev = NULL;
2107d22b2f41SRyan Harper     }
2108dc364f4cSBenoît Canet     if (bs->node_name[0] != '\0') {
2109dc364f4cSBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2110dc364f4cSBenoît Canet     }
2111dc364f4cSBenoît Canet     bs->node_name[0] = '\0';
2112d22b2f41SRyan Harper }
2113d22b2f41SRyan Harper 
2114e023b2e2SPaolo Bonzini static void bdrv_rebind(BlockDriverState *bs)
2115e023b2e2SPaolo Bonzini {
2116e023b2e2SPaolo Bonzini     if (bs->drv && bs->drv->bdrv_rebind) {
2117e023b2e2SPaolo Bonzini         bs->drv->bdrv_rebind(bs);
2118e023b2e2SPaolo Bonzini     }
2119e023b2e2SPaolo Bonzini }
2120e023b2e2SPaolo Bonzini 
21214ddc07caSPaolo Bonzini static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
21224ddc07caSPaolo Bonzini                                      BlockDriverState *bs_src)
21234ddc07caSPaolo Bonzini {
21244ddc07caSPaolo Bonzini     /* move some fields that need to stay attached to the device */
21254ddc07caSPaolo Bonzini 
21264ddc07caSPaolo Bonzini     /* dev info */
21271b7fd729SPaolo Bonzini     bs_dest->guest_block_size   = bs_src->guest_block_size;
21284ddc07caSPaolo Bonzini     bs_dest->copy_on_read       = bs_src->copy_on_read;
21294ddc07caSPaolo Bonzini 
21304ddc07caSPaolo Bonzini     bs_dest->enable_write_cache = bs_src->enable_write_cache;
21314ddc07caSPaolo Bonzini 
2132cc0681c4SBenoît Canet     /* i/o throttled req */
2133cc0681c4SBenoît Canet     memcpy(&bs_dest->throttle_state,
2134cc0681c4SBenoît Canet            &bs_src->throttle_state,
2135cc0681c4SBenoît Canet            sizeof(ThrottleState));
2136cc0681c4SBenoît Canet     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
2137cc0681c4SBenoît Canet     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
21384ddc07caSPaolo Bonzini     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
21394ddc07caSPaolo Bonzini 
21404ddc07caSPaolo Bonzini     /* r/w error */
21414ddc07caSPaolo Bonzini     bs_dest->on_read_error      = bs_src->on_read_error;
21424ddc07caSPaolo Bonzini     bs_dest->on_write_error     = bs_src->on_write_error;
21434ddc07caSPaolo Bonzini 
21444ddc07caSPaolo Bonzini     /* i/o status */
21454ddc07caSPaolo Bonzini     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
21464ddc07caSPaolo Bonzini     bs_dest->iostatus           = bs_src->iostatus;
21474ddc07caSPaolo Bonzini 
21484ddc07caSPaolo Bonzini     /* dirty bitmap */
2149e4654d2dSFam Zheng     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
21504ddc07caSPaolo Bonzini 
21519fcb0251SFam Zheng     /* reference count */
21529fcb0251SFam Zheng     bs_dest->refcnt             = bs_src->refcnt;
21539fcb0251SFam Zheng 
21544ddc07caSPaolo Bonzini     /* job */
21554ddc07caSPaolo Bonzini     bs_dest->job                = bs_src->job;
21564ddc07caSPaolo Bonzini 
21574ddc07caSPaolo Bonzini     /* keep the same entry in bdrv_states */
2158dc364f4cSBenoît Canet     bs_dest->device_list = bs_src->device_list;
21597e7d56d9SMarkus Armbruster     bs_dest->blk = bs_src->blk;
21607e7d56d9SMarkus Armbruster 
2161fbe40ff7SFam Zheng     memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2162fbe40ff7SFam Zheng            sizeof(bs_dest->op_blockers));
21634ddc07caSPaolo Bonzini }
21644ddc07caSPaolo Bonzini 
21654ddc07caSPaolo Bonzini /*
21664ddc07caSPaolo Bonzini  * Swap bs contents for two image chains while they are live,
21674ddc07caSPaolo Bonzini  * while keeping required fields on the BlockDriverState that is
21684ddc07caSPaolo Bonzini  * actually attached to a device.
21694ddc07caSPaolo Bonzini  *
21704ddc07caSPaolo Bonzini  * This will modify the BlockDriverState fields, and swap contents
21714ddc07caSPaolo Bonzini  * between bs_new and bs_old. Both bs_new and bs_old are modified.
21724ddc07caSPaolo Bonzini  *
2173bfb197e0SMarkus Armbruster  * bs_new must not be attached to a BlockBackend.
21744ddc07caSPaolo Bonzini  *
21754ddc07caSPaolo Bonzini  * This function does not create any image files.
21764ddc07caSPaolo Bonzini  */
21774ddc07caSPaolo Bonzini void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
21784ddc07caSPaolo Bonzini {
21794ddc07caSPaolo Bonzini     BlockDriverState tmp;
21804ddc07caSPaolo Bonzini 
218190ce8a06SBenoît Canet     /* The code needs to swap the node_name but simply swapping node_list won't
218290ce8a06SBenoît Canet      * work so first remove the nodes from the graph list, do the swap then
218390ce8a06SBenoît Canet      * insert them back if needed.
218490ce8a06SBenoît Canet      */
218590ce8a06SBenoît Canet     if (bs_new->node_name[0] != '\0') {
218690ce8a06SBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
218790ce8a06SBenoît Canet     }
218890ce8a06SBenoît Canet     if (bs_old->node_name[0] != '\0') {
218990ce8a06SBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
219090ce8a06SBenoît Canet     }
219190ce8a06SBenoît Canet 
2192bfb197e0SMarkus Armbruster     /* bs_new must be unattached and shouldn't have anything fancy enabled */
21937e7d56d9SMarkus Armbruster     assert(!bs_new->blk);
2194e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
21954ddc07caSPaolo Bonzini     assert(bs_new->job == NULL);
21964ddc07caSPaolo Bonzini     assert(bs_new->io_limits_enabled == false);
2197cc0681c4SBenoît Canet     assert(!throttle_have_timer(&bs_new->throttle_state));
21984ddc07caSPaolo Bonzini 
21994ddc07caSPaolo Bonzini     tmp = *bs_new;
22004ddc07caSPaolo Bonzini     *bs_new = *bs_old;
22014ddc07caSPaolo Bonzini     *bs_old = tmp;
22024ddc07caSPaolo Bonzini 
22034ddc07caSPaolo Bonzini     /* there are some fields that should not be swapped, move them back */
22044ddc07caSPaolo Bonzini     bdrv_move_feature_fields(&tmp, bs_old);
22054ddc07caSPaolo Bonzini     bdrv_move_feature_fields(bs_old, bs_new);
22064ddc07caSPaolo Bonzini     bdrv_move_feature_fields(bs_new, &tmp);
22074ddc07caSPaolo Bonzini 
2208bfb197e0SMarkus Armbruster     /* bs_new must remain unattached */
22097e7d56d9SMarkus Armbruster     assert(!bs_new->blk);
22104ddc07caSPaolo Bonzini 
22114ddc07caSPaolo Bonzini     /* Check a few fields that should remain attached to the device */
22124ddc07caSPaolo Bonzini     assert(bs_new->job == NULL);
22134ddc07caSPaolo Bonzini     assert(bs_new->io_limits_enabled == false);
2214cc0681c4SBenoît Canet     assert(!throttle_have_timer(&bs_new->throttle_state));
22154ddc07caSPaolo Bonzini 
221690ce8a06SBenoît Canet     /* insert the nodes back into the graph node list if needed */
221790ce8a06SBenoît Canet     if (bs_new->node_name[0] != '\0') {
221890ce8a06SBenoît Canet         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
221990ce8a06SBenoît Canet     }
222090ce8a06SBenoît Canet     if (bs_old->node_name[0] != '\0') {
222190ce8a06SBenoît Canet         QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
222290ce8a06SBenoît Canet     }
222390ce8a06SBenoît Canet 
22244ddc07caSPaolo Bonzini     bdrv_rebind(bs_new);
22254ddc07caSPaolo Bonzini     bdrv_rebind(bs_old);
22264ddc07caSPaolo Bonzini }
22274ddc07caSPaolo Bonzini 
22288802d1fdSJeff Cody /*
22298802d1fdSJeff Cody  * Add new bs contents at the top of an image chain while the chain is
22308802d1fdSJeff Cody  * live, while keeping required fields on the top layer.
22318802d1fdSJeff Cody  *
22328802d1fdSJeff Cody  * This will modify the BlockDriverState fields, and swap contents
22338802d1fdSJeff Cody  * between bs_new and bs_top. Both bs_new and bs_top are modified.
22348802d1fdSJeff Cody  *
2235bfb197e0SMarkus Armbruster  * bs_new must not be attached to a BlockBackend.
2236f6801b83SJeff Cody  *
22378802d1fdSJeff Cody  * This function does not create any image files.
22388802d1fdSJeff Cody  */
22398802d1fdSJeff Cody void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
22408802d1fdSJeff Cody {
22414ddc07caSPaolo Bonzini     bdrv_swap(bs_new, bs_top);
22428802d1fdSJeff Cody 
22438802d1fdSJeff Cody     /* The contents of 'tmp' will become bs_top, as we are
22448802d1fdSJeff Cody      * swapping bs_new and bs_top contents. */
22458d24cce1SFam Zheng     bdrv_set_backing_hd(bs_top, bs_new);
22468802d1fdSJeff Cody }
22478802d1fdSJeff Cody 
22484f6fd349SFam Zheng static void bdrv_delete(BlockDriverState *bs)
2249b338082bSbellard {
22503e914655SPaolo Bonzini     assert(!bs->job);
22513718d8abSFam Zheng     assert(bdrv_op_blocker_is_empty(bs));
22524f6fd349SFam Zheng     assert(!bs->refcnt);
2253e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
225418846deeSMarkus Armbruster 
2255e1b5c52eSStefan Hajnoczi     bdrv_close(bs);
2256e1b5c52eSStefan Hajnoczi 
22571b7bdbc1SStefan Hajnoczi     /* remove from list, if necessary */
2258d22b2f41SRyan Harper     bdrv_make_anon(bs);
225934c6f050Saurel32 
22607267c094SAnthony Liguori     g_free(bs);
2261fc01f7e7Sbellard }
2262fc01f7e7Sbellard 
2263e97fc193Saliguori /*
2264e97fc193Saliguori  * Run consistency checks on an image
2265e97fc193Saliguori  *
2266e076f338SKevin Wolf  * Returns 0 if the check could be completed (it doesn't mean that the image is
2267a1c7273bSStefan Weil  * free of errors) or -errno when an internal error occurred. The results of the
2268e076f338SKevin Wolf  * check are stored in res.
2269e97fc193Saliguori  */
22704534ff54SKevin Wolf int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2271e97fc193Saliguori {
2272908bcd54SMax Reitz     if (bs->drv == NULL) {
2273908bcd54SMax Reitz         return -ENOMEDIUM;
2274908bcd54SMax Reitz     }
2275e97fc193Saliguori     if (bs->drv->bdrv_check == NULL) {
2276e97fc193Saliguori         return -ENOTSUP;
2277e97fc193Saliguori     }
2278e97fc193Saliguori 
2279e076f338SKevin Wolf     memset(res, 0, sizeof(*res));
22804534ff54SKevin Wolf     return bs->drv->bdrv_check(bs, res, fix);
2281e97fc193Saliguori }
2282e97fc193Saliguori 
22838a426614SKevin Wolf #define COMMIT_BUF_SECTORS 2048
22848a426614SKevin Wolf 
228533e3963eSbellard /* commit COW file into the raw image */
228633e3963eSbellard int bdrv_commit(BlockDriverState *bs)
228733e3963eSbellard {
228819cb3738Sbellard     BlockDriver *drv = bs->drv;
228972706ea4SJeff Cody     int64_t sector, total_sectors, length, backing_length;
22908a426614SKevin Wolf     int n, ro, open_flags;
22910bce597dSJeff Cody     int ret = 0;
229272706ea4SJeff Cody     uint8_t *buf = NULL;
229333e3963eSbellard 
229419cb3738Sbellard     if (!drv)
229519cb3738Sbellard         return -ENOMEDIUM;
229633e3963eSbellard 
22974dca4b63SNaphtali Sprei     if (!bs->backing_hd) {
22984dca4b63SNaphtali Sprei         return -ENOTSUP;
22994dca4b63SNaphtali Sprei     }
23004dca4b63SNaphtali Sprei 
2301bb00021dSFam Zheng     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2302bb00021dSFam Zheng         bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
23032d3735d3SStefan Hajnoczi         return -EBUSY;
23042d3735d3SStefan Hajnoczi     }
23052d3735d3SStefan Hajnoczi 
23064dca4b63SNaphtali Sprei     ro = bs->backing_hd->read_only;
23074dca4b63SNaphtali Sprei     open_flags =  bs->backing_hd->open_flags;
23084dca4b63SNaphtali Sprei 
23094dca4b63SNaphtali Sprei     if (ro) {
23100bce597dSJeff Cody         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
23110bce597dSJeff Cody             return -EACCES;
23124dca4b63SNaphtali Sprei         }
2313ea2384d3Sbellard     }
2314ea2384d3Sbellard 
231572706ea4SJeff Cody     length = bdrv_getlength(bs);
231672706ea4SJeff Cody     if (length < 0) {
231772706ea4SJeff Cody         ret = length;
231872706ea4SJeff Cody         goto ro_cleanup;
231972706ea4SJeff Cody     }
232072706ea4SJeff Cody 
232172706ea4SJeff Cody     backing_length = bdrv_getlength(bs->backing_hd);
232272706ea4SJeff Cody     if (backing_length < 0) {
232372706ea4SJeff Cody         ret = backing_length;
232472706ea4SJeff Cody         goto ro_cleanup;
232572706ea4SJeff Cody     }
232672706ea4SJeff Cody 
232772706ea4SJeff Cody     /* If our top snapshot is larger than the backing file image,
232872706ea4SJeff Cody      * grow the backing file image if possible.  If not possible,
232972706ea4SJeff Cody      * we must return an error */
233072706ea4SJeff Cody     if (length > backing_length) {
233172706ea4SJeff Cody         ret = bdrv_truncate(bs->backing_hd, length);
233272706ea4SJeff Cody         if (ret < 0) {
233372706ea4SJeff Cody             goto ro_cleanup;
233472706ea4SJeff Cody         }
233572706ea4SJeff Cody     }
233672706ea4SJeff Cody 
233772706ea4SJeff Cody     total_sectors = length >> BDRV_SECTOR_BITS;
2338857d4f46SKevin Wolf 
2339857d4f46SKevin Wolf     /* qemu_try_blockalign() for bs will choose an alignment that works for
2340857d4f46SKevin Wolf      * bs->backing_hd as well, so no need to compare the alignment manually. */
2341857d4f46SKevin Wolf     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2342857d4f46SKevin Wolf     if (buf == NULL) {
2343857d4f46SKevin Wolf         ret = -ENOMEM;
2344857d4f46SKevin Wolf         goto ro_cleanup;
2345857d4f46SKevin Wolf     }
23468a426614SKevin Wolf 
23478a426614SKevin Wolf     for (sector = 0; sector < total_sectors; sector += n) {
2348d663640cSPaolo Bonzini         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2349d663640cSPaolo Bonzini         if (ret < 0) {
2350d663640cSPaolo Bonzini             goto ro_cleanup;
2351d663640cSPaolo Bonzini         }
2352d663640cSPaolo Bonzini         if (ret) {
2353dabfa6ccSKevin Wolf             ret = bdrv_read(bs, sector, buf, n);
2354dabfa6ccSKevin Wolf             if (ret < 0) {
23554dca4b63SNaphtali Sprei                 goto ro_cleanup;
235633e3963eSbellard             }
235733e3963eSbellard 
2358dabfa6ccSKevin Wolf             ret = bdrv_write(bs->backing_hd, sector, buf, n);
2359dabfa6ccSKevin Wolf             if (ret < 0) {
23604dca4b63SNaphtali Sprei                 goto ro_cleanup;
236133e3963eSbellard             }
236233e3963eSbellard         }
236333e3963eSbellard     }
236495389c86Sbellard 
23651d44952fSChristoph Hellwig     if (drv->bdrv_make_empty) {
23661d44952fSChristoph Hellwig         ret = drv->bdrv_make_empty(bs);
2367dabfa6ccSKevin Wolf         if (ret < 0) {
2368dabfa6ccSKevin Wolf             goto ro_cleanup;
2369dabfa6ccSKevin Wolf         }
23701d44952fSChristoph Hellwig         bdrv_flush(bs);
23711d44952fSChristoph Hellwig     }
237295389c86Sbellard 
23733f5075aeSChristoph Hellwig     /*
23743f5075aeSChristoph Hellwig      * Make sure all data we wrote to the backing device is actually
23753f5075aeSChristoph Hellwig      * stable on disk.
23763f5075aeSChristoph Hellwig      */
2377dabfa6ccSKevin Wolf     if (bs->backing_hd) {
23783f5075aeSChristoph Hellwig         bdrv_flush(bs->backing_hd);
2379dabfa6ccSKevin Wolf     }
23804dca4b63SNaphtali Sprei 
2381dabfa6ccSKevin Wolf     ret = 0;
23824dca4b63SNaphtali Sprei ro_cleanup:
2383857d4f46SKevin Wolf     qemu_vfree(buf);
23844dca4b63SNaphtali Sprei 
23854dca4b63SNaphtali Sprei     if (ro) {
23860bce597dSJeff Cody         /* ignoring error return here */
23870bce597dSJeff Cody         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
23884dca4b63SNaphtali Sprei     }
23894dca4b63SNaphtali Sprei 
23901d44952fSChristoph Hellwig     return ret;
239133e3963eSbellard }
239233e3963eSbellard 
2393e8877497SStefan Hajnoczi int bdrv_commit_all(void)
23946ab4b5abSMarkus Armbruster {
23956ab4b5abSMarkus Armbruster     BlockDriverState *bs;
23966ab4b5abSMarkus Armbruster 
2397dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2398ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
2399ed78cda3SStefan Hajnoczi 
2400ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
2401272d2d8eSJeff Cody         if (bs->drv && bs->backing_hd) {
2402e8877497SStefan Hajnoczi             int ret = bdrv_commit(bs);
2403e8877497SStefan Hajnoczi             if (ret < 0) {
2404ed78cda3SStefan Hajnoczi                 aio_context_release(aio_context);
2405e8877497SStefan Hajnoczi                 return ret;
24066ab4b5abSMarkus Armbruster             }
24076ab4b5abSMarkus Armbruster         }
2408ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
2409272d2d8eSJeff Cody     }
2410e8877497SStefan Hajnoczi     return 0;
2411e8877497SStefan Hajnoczi }
24126ab4b5abSMarkus Armbruster 
2413dbffbdcfSStefan Hajnoczi /**
2414dbffbdcfSStefan Hajnoczi  * Remove an active request from the tracked requests list
2415dbffbdcfSStefan Hajnoczi  *
2416dbffbdcfSStefan Hajnoczi  * This function should be called when a tracked request is completing.
2417dbffbdcfSStefan Hajnoczi  */
2418dbffbdcfSStefan Hajnoczi static void tracked_request_end(BdrvTrackedRequest *req)
2419dbffbdcfSStefan Hajnoczi {
24202dbafdc0SKevin Wolf     if (req->serialising) {
24212dbafdc0SKevin Wolf         req->bs->serialising_in_flight--;
24222dbafdc0SKevin Wolf     }
24232dbafdc0SKevin Wolf 
2424dbffbdcfSStefan Hajnoczi     QLIST_REMOVE(req, list);
2425f4658285SStefan Hajnoczi     qemu_co_queue_restart_all(&req->wait_queue);
2426dbffbdcfSStefan Hajnoczi }
2427dbffbdcfSStefan Hajnoczi 
2428dbffbdcfSStefan Hajnoczi /**
2429dbffbdcfSStefan Hajnoczi  * Add an active request to the tracked requests list
2430dbffbdcfSStefan Hajnoczi  */
2431dbffbdcfSStefan Hajnoczi static void tracked_request_begin(BdrvTrackedRequest *req,
2432dbffbdcfSStefan Hajnoczi                                   BlockDriverState *bs,
2433793ed47aSKevin Wolf                                   int64_t offset,
2434793ed47aSKevin Wolf                                   unsigned int bytes, bool is_write)
2435dbffbdcfSStefan Hajnoczi {
2436dbffbdcfSStefan Hajnoczi     *req = (BdrvTrackedRequest){
2437dbffbdcfSStefan Hajnoczi         .bs = bs,
2438793ed47aSKevin Wolf         .offset         = offset,
2439793ed47aSKevin Wolf         .bytes          = bytes,
2440dbffbdcfSStefan Hajnoczi         .is_write       = is_write,
24415f8b6491SStefan Hajnoczi         .co             = qemu_coroutine_self(),
24422dbafdc0SKevin Wolf         .serialising    = false,
24437327145fSKevin Wolf         .overlap_offset = offset,
24447327145fSKevin Wolf         .overlap_bytes  = bytes,
2445dbffbdcfSStefan Hajnoczi     };
2446dbffbdcfSStefan Hajnoczi 
2447f4658285SStefan Hajnoczi     qemu_co_queue_init(&req->wait_queue);
2448f4658285SStefan Hajnoczi 
2449dbffbdcfSStefan Hajnoczi     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2450dbffbdcfSStefan Hajnoczi }
2451dbffbdcfSStefan Hajnoczi 
2452e96126ffSKevin Wolf static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
24532dbafdc0SKevin Wolf {
24547327145fSKevin Wolf     int64_t overlap_offset = req->offset & ~(align - 1);
2455e96126ffSKevin Wolf     unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
24567327145fSKevin Wolf                                - overlap_offset;
24577327145fSKevin Wolf 
24582dbafdc0SKevin Wolf     if (!req->serialising) {
24592dbafdc0SKevin Wolf         req->bs->serialising_in_flight++;
24602dbafdc0SKevin Wolf         req->serialising = true;
24612dbafdc0SKevin Wolf     }
24627327145fSKevin Wolf 
24637327145fSKevin Wolf     req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
24647327145fSKevin Wolf     req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
24652dbafdc0SKevin Wolf }
24662dbafdc0SKevin Wolf 
2467d83947acSStefan Hajnoczi /**
2468d83947acSStefan Hajnoczi  * Round a region to cluster boundaries
2469d83947acSStefan Hajnoczi  */
2470343bded4SPaolo Bonzini void bdrv_round_to_clusters(BlockDriverState *bs,
2471d83947acSStefan Hajnoczi                             int64_t sector_num, int nb_sectors,
2472d83947acSStefan Hajnoczi                             int64_t *cluster_sector_num,
2473d83947acSStefan Hajnoczi                             int *cluster_nb_sectors)
2474d83947acSStefan Hajnoczi {
2475d83947acSStefan Hajnoczi     BlockDriverInfo bdi;
2476d83947acSStefan Hajnoczi 
2477d83947acSStefan Hajnoczi     if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2478d83947acSStefan Hajnoczi         *cluster_sector_num = sector_num;
2479d83947acSStefan Hajnoczi         *cluster_nb_sectors = nb_sectors;
2480d83947acSStefan Hajnoczi     } else {
2481d83947acSStefan Hajnoczi         int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2482d83947acSStefan Hajnoczi         *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2483d83947acSStefan Hajnoczi         *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2484d83947acSStefan Hajnoczi                                             nb_sectors, c);
2485d83947acSStefan Hajnoczi     }
2486d83947acSStefan Hajnoczi }
2487d83947acSStefan Hajnoczi 
24887327145fSKevin Wolf static int bdrv_get_cluster_size(BlockDriverState *bs)
2489793ed47aSKevin Wolf {
2490793ed47aSKevin Wolf     BlockDriverInfo bdi;
24917327145fSKevin Wolf     int ret;
2492793ed47aSKevin Wolf 
24937327145fSKevin Wolf     ret = bdrv_get_info(bs, &bdi);
24947327145fSKevin Wolf     if (ret < 0 || bdi.cluster_size == 0) {
24957327145fSKevin Wolf         return bs->request_alignment;
2496793ed47aSKevin Wolf     } else {
24977327145fSKevin Wolf         return bdi.cluster_size;
2498793ed47aSKevin Wolf     }
2499793ed47aSKevin Wolf }
2500793ed47aSKevin Wolf 
2501f4658285SStefan Hajnoczi static bool tracked_request_overlaps(BdrvTrackedRequest *req,
2502793ed47aSKevin Wolf                                      int64_t offset, unsigned int bytes)
2503793ed47aSKevin Wolf {
2504d83947acSStefan Hajnoczi     /*        aaaa   bbbb */
25057327145fSKevin Wolf     if (offset >= req->overlap_offset + req->overlap_bytes) {
2506d83947acSStefan Hajnoczi         return false;
2507d83947acSStefan Hajnoczi     }
2508d83947acSStefan Hajnoczi     /* bbbb   aaaa        */
25097327145fSKevin Wolf     if (req->overlap_offset >= offset + bytes) {
2510d83947acSStefan Hajnoczi         return false;
2511d83947acSStefan Hajnoczi     }
2512d83947acSStefan Hajnoczi     return true;
2513f4658285SStefan Hajnoczi }
2514f4658285SStefan Hajnoczi 
251528de2dcdSKevin Wolf static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
2516f4658285SStefan Hajnoczi {
25172dbafdc0SKevin Wolf     BlockDriverState *bs = self->bs;
2518f4658285SStefan Hajnoczi     BdrvTrackedRequest *req;
2519f4658285SStefan Hajnoczi     bool retry;
252028de2dcdSKevin Wolf     bool waited = false;
2521f4658285SStefan Hajnoczi 
25222dbafdc0SKevin Wolf     if (!bs->serialising_in_flight) {
252328de2dcdSKevin Wolf         return false;
25242dbafdc0SKevin Wolf     }
25252dbafdc0SKevin Wolf 
2526f4658285SStefan Hajnoczi     do {
2527f4658285SStefan Hajnoczi         retry = false;
2528f4658285SStefan Hajnoczi         QLIST_FOREACH(req, &bs->tracked_requests, list) {
25292dbafdc0SKevin Wolf             if (req == self || (!req->serialising && !self->serialising)) {
253065afd211SKevin Wolf                 continue;
253165afd211SKevin Wolf             }
25327327145fSKevin Wolf             if (tracked_request_overlaps(req, self->overlap_offset,
25337327145fSKevin Wolf                                          self->overlap_bytes))
25347327145fSKevin Wolf             {
25355f8b6491SStefan Hajnoczi                 /* Hitting this means there was a reentrant request, for
25365f8b6491SStefan Hajnoczi                  * example, a block driver issuing nested requests.  This must
25375f8b6491SStefan Hajnoczi                  * never happen since it means deadlock.
25385f8b6491SStefan Hajnoczi                  */
25395f8b6491SStefan Hajnoczi                 assert(qemu_coroutine_self() != req->co);
25405f8b6491SStefan Hajnoczi 
25416460440fSKevin Wolf                 /* If the request is already (indirectly) waiting for us, or
25426460440fSKevin Wolf                  * will wait for us as soon as it wakes up, then just go on
25436460440fSKevin Wolf                  * (instead of producing a deadlock in the former case). */
25446460440fSKevin Wolf                 if (!req->waiting_for) {
25456460440fSKevin Wolf                     self->waiting_for = req;
2546f4658285SStefan Hajnoczi                     qemu_co_queue_wait(&req->wait_queue);
25476460440fSKevin Wolf                     self->waiting_for = NULL;
2548f4658285SStefan Hajnoczi                     retry = true;
254928de2dcdSKevin Wolf                     waited = true;
2550f4658285SStefan Hajnoczi                     break;
2551f4658285SStefan Hajnoczi                 }
2552f4658285SStefan Hajnoczi             }
25536460440fSKevin Wolf         }
2554f4658285SStefan Hajnoczi     } while (retry);
255528de2dcdSKevin Wolf 
255628de2dcdSKevin Wolf     return waited;
2557f4658285SStefan Hajnoczi }
2558f4658285SStefan Hajnoczi 
2559756e6736SKevin Wolf /*
2560756e6736SKevin Wolf  * Return values:
2561756e6736SKevin Wolf  * 0        - success
2562756e6736SKevin Wolf  * -EINVAL  - backing format specified, but no file
2563756e6736SKevin Wolf  * -ENOSPC  - can't update the backing file because no space is left in the
2564756e6736SKevin Wolf  *            image file header
2565756e6736SKevin Wolf  * -ENOTSUP - format driver doesn't support changing the backing file
2566756e6736SKevin Wolf  */
2567756e6736SKevin Wolf int bdrv_change_backing_file(BlockDriverState *bs,
2568756e6736SKevin Wolf     const char *backing_file, const char *backing_fmt)
2569756e6736SKevin Wolf {
2570756e6736SKevin Wolf     BlockDriver *drv = bs->drv;
2571469ef350SPaolo Bonzini     int ret;
2572756e6736SKevin Wolf 
25735f377794SPaolo Bonzini     /* Backing file format doesn't make sense without a backing file */
25745f377794SPaolo Bonzini     if (backing_fmt && !backing_file) {
25755f377794SPaolo Bonzini         return -EINVAL;
25765f377794SPaolo Bonzini     }
25775f377794SPaolo Bonzini 
2578756e6736SKevin Wolf     if (drv->bdrv_change_backing_file != NULL) {
2579469ef350SPaolo Bonzini         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2580756e6736SKevin Wolf     } else {
2581469ef350SPaolo Bonzini         ret = -ENOTSUP;
2582756e6736SKevin Wolf     }
2583469ef350SPaolo Bonzini 
2584469ef350SPaolo Bonzini     if (ret == 0) {
2585469ef350SPaolo Bonzini         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2586469ef350SPaolo Bonzini         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2587469ef350SPaolo Bonzini     }
2588469ef350SPaolo Bonzini     return ret;
2589756e6736SKevin Wolf }
2590756e6736SKevin Wolf 
25916ebdcee2SJeff Cody /*
25926ebdcee2SJeff Cody  * Finds the image layer in the chain that has 'bs' as its backing file.
25936ebdcee2SJeff Cody  *
25946ebdcee2SJeff Cody  * active is the current topmost image.
25956ebdcee2SJeff Cody  *
25966ebdcee2SJeff Cody  * Returns NULL if bs is not found in active's image chain,
25976ebdcee2SJeff Cody  * or if active == bs.
25984caf0fcdSJeff Cody  *
25994caf0fcdSJeff Cody  * Returns the bottommost base image if bs == NULL.
26006ebdcee2SJeff Cody  */
26016ebdcee2SJeff Cody BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
26026ebdcee2SJeff Cody                                     BlockDriverState *bs)
26036ebdcee2SJeff Cody {
26044caf0fcdSJeff Cody     while (active && bs != active->backing_hd) {
26054caf0fcdSJeff Cody         active = active->backing_hd;
26066ebdcee2SJeff Cody     }
26076ebdcee2SJeff Cody 
26084caf0fcdSJeff Cody     return active;
26096ebdcee2SJeff Cody }
26106ebdcee2SJeff Cody 
26114caf0fcdSJeff Cody /* Given a BDS, searches for the base layer. */
26124caf0fcdSJeff Cody BlockDriverState *bdrv_find_base(BlockDriverState *bs)
26134caf0fcdSJeff Cody {
26144caf0fcdSJeff Cody     return bdrv_find_overlay(bs, NULL);
26156ebdcee2SJeff Cody }
26166ebdcee2SJeff Cody 
26176ebdcee2SJeff Cody typedef struct BlkIntermediateStates {
26186ebdcee2SJeff Cody     BlockDriverState *bs;
26196ebdcee2SJeff Cody     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
26206ebdcee2SJeff Cody } BlkIntermediateStates;
26216ebdcee2SJeff Cody 
26226ebdcee2SJeff Cody 
26236ebdcee2SJeff Cody /*
26246ebdcee2SJeff Cody  * Drops images above 'base' up to and including 'top', and sets the image
26256ebdcee2SJeff Cody  * above 'top' to have base as its backing file.
26266ebdcee2SJeff Cody  *
26276ebdcee2SJeff Cody  * Requires that the overlay to 'top' is opened r/w, so that the backing file
26286ebdcee2SJeff Cody  * information in 'bs' can be properly updated.
26296ebdcee2SJeff Cody  *
26306ebdcee2SJeff Cody  * E.g., this will convert the following chain:
26316ebdcee2SJeff Cody  * bottom <- base <- intermediate <- top <- active
26326ebdcee2SJeff Cody  *
26336ebdcee2SJeff Cody  * to
26346ebdcee2SJeff Cody  *
26356ebdcee2SJeff Cody  * bottom <- base <- active
26366ebdcee2SJeff Cody  *
26376ebdcee2SJeff Cody  * It is allowed for bottom==base, in which case it converts:
26386ebdcee2SJeff Cody  *
26396ebdcee2SJeff Cody  * base <- intermediate <- top <- active
26406ebdcee2SJeff Cody  *
26416ebdcee2SJeff Cody  * to
26426ebdcee2SJeff Cody  *
26436ebdcee2SJeff Cody  * base <- active
26446ebdcee2SJeff Cody  *
264554e26900SJeff Cody  * If backing_file_str is non-NULL, it will be used when modifying top's
264654e26900SJeff Cody  * overlay image metadata.
264754e26900SJeff Cody  *
26486ebdcee2SJeff Cody  * Error conditions:
26496ebdcee2SJeff Cody  *  if active == top, that is considered an error
26506ebdcee2SJeff Cody  *
26516ebdcee2SJeff Cody  */
26526ebdcee2SJeff Cody int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
265354e26900SJeff Cody                            BlockDriverState *base, const char *backing_file_str)
26546ebdcee2SJeff Cody {
26556ebdcee2SJeff Cody     BlockDriverState *intermediate;
26566ebdcee2SJeff Cody     BlockDriverState *base_bs = NULL;
26576ebdcee2SJeff Cody     BlockDriverState *new_top_bs = NULL;
26586ebdcee2SJeff Cody     BlkIntermediateStates *intermediate_state, *next;
26596ebdcee2SJeff Cody     int ret = -EIO;
26606ebdcee2SJeff Cody 
26616ebdcee2SJeff Cody     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
26626ebdcee2SJeff Cody     QSIMPLEQ_INIT(&states_to_delete);
26636ebdcee2SJeff Cody 
26646ebdcee2SJeff Cody     if (!top->drv || !base->drv) {
26656ebdcee2SJeff Cody         goto exit;
26666ebdcee2SJeff Cody     }
26676ebdcee2SJeff Cody 
26686ebdcee2SJeff Cody     new_top_bs = bdrv_find_overlay(active, top);
26696ebdcee2SJeff Cody 
26706ebdcee2SJeff Cody     if (new_top_bs == NULL) {
26716ebdcee2SJeff Cody         /* we could not find the image above 'top', this is an error */
26726ebdcee2SJeff Cody         goto exit;
26736ebdcee2SJeff Cody     }
26746ebdcee2SJeff Cody 
26756ebdcee2SJeff Cody     /* special case of new_top_bs->backing_hd already pointing to base - nothing
26766ebdcee2SJeff Cody      * to do, no intermediate images */
26776ebdcee2SJeff Cody     if (new_top_bs->backing_hd == base) {
26786ebdcee2SJeff Cody         ret = 0;
26796ebdcee2SJeff Cody         goto exit;
26806ebdcee2SJeff Cody     }
26816ebdcee2SJeff Cody 
26826ebdcee2SJeff Cody     intermediate = top;
26836ebdcee2SJeff Cody 
26846ebdcee2SJeff Cody     /* now we will go down through the list, and add each BDS we find
26856ebdcee2SJeff Cody      * into our deletion queue, until we hit the 'base'
26866ebdcee2SJeff Cody      */
26876ebdcee2SJeff Cody     while (intermediate) {
26885839e53bSMarkus Armbruster         intermediate_state = g_new0(BlkIntermediateStates, 1);
26896ebdcee2SJeff Cody         intermediate_state->bs = intermediate;
26906ebdcee2SJeff Cody         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
26916ebdcee2SJeff Cody 
26926ebdcee2SJeff Cody         if (intermediate->backing_hd == base) {
26936ebdcee2SJeff Cody             base_bs = intermediate->backing_hd;
26946ebdcee2SJeff Cody             break;
26956ebdcee2SJeff Cody         }
26966ebdcee2SJeff Cody         intermediate = intermediate->backing_hd;
26976ebdcee2SJeff Cody     }
26986ebdcee2SJeff Cody     if (base_bs == NULL) {
26996ebdcee2SJeff Cody         /* something went wrong, we did not end at the base. safely
27006ebdcee2SJeff Cody          * unravel everything, and exit with error */
27016ebdcee2SJeff Cody         goto exit;
27026ebdcee2SJeff Cody     }
27036ebdcee2SJeff Cody 
27046ebdcee2SJeff Cody     /* success - we can delete the intermediate states, and link top->base */
270554e26900SJeff Cody     backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
270654e26900SJeff Cody     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
27076ebdcee2SJeff Cody                                    base_bs->drv ? base_bs->drv->format_name : "");
27086ebdcee2SJeff Cody     if (ret) {
27096ebdcee2SJeff Cody         goto exit;
27106ebdcee2SJeff Cody     }
2711920beae1SFam Zheng     bdrv_set_backing_hd(new_top_bs, base_bs);
27126ebdcee2SJeff Cody 
27136ebdcee2SJeff Cody     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
27146ebdcee2SJeff Cody         /* so that bdrv_close() does not recursively close the chain */
2715920beae1SFam Zheng         bdrv_set_backing_hd(intermediate_state->bs, NULL);
27164f6fd349SFam Zheng         bdrv_unref(intermediate_state->bs);
27176ebdcee2SJeff Cody     }
27186ebdcee2SJeff Cody     ret = 0;
27196ebdcee2SJeff Cody 
27206ebdcee2SJeff Cody exit:
27216ebdcee2SJeff Cody     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
27226ebdcee2SJeff Cody         g_free(intermediate_state);
27236ebdcee2SJeff Cody     }
27246ebdcee2SJeff Cody     return ret;
27256ebdcee2SJeff Cody }
27266ebdcee2SJeff Cody 
27276ebdcee2SJeff Cody 
272871d0770cSaliguori static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
272971d0770cSaliguori                                    size_t size)
273071d0770cSaliguori {
273175af1f34SPeter Lieven     if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
27321dd3a447SKevin Wolf         return -EIO;
27331dd3a447SKevin Wolf     }
27341dd3a447SKevin Wolf 
2735c0191e76SMax Reitz     if (!bdrv_is_inserted(bs)) {
273671d0770cSaliguori         return -ENOMEDIUM;
2737c0191e76SMax Reitz     }
273871d0770cSaliguori 
2739c0191e76SMax Reitz     if (offset < 0) {
2740fbb7b4e0SKevin Wolf         return -EIO;
2741c0191e76SMax Reitz     }
274271d0770cSaliguori 
274371d0770cSaliguori     return 0;
274471d0770cSaliguori }
274571d0770cSaliguori 
274671d0770cSaliguori static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
274771d0770cSaliguori                               int nb_sectors)
274871d0770cSaliguori {
274975af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
27508f4754edSKevin Wolf         return -EIO;
27518f4754edSKevin Wolf     }
27528f4754edSKevin Wolf 
2753eb5a3165SJes Sorensen     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2754eb5a3165SJes Sorensen                                    nb_sectors * BDRV_SECTOR_SIZE);
275571d0770cSaliguori }
275671d0770cSaliguori 
27571c9805a3SStefan Hajnoczi typedef struct RwCo {
27581c9805a3SStefan Hajnoczi     BlockDriverState *bs;
2759775aa8b6SKevin Wolf     int64_t offset;
27601c9805a3SStefan Hajnoczi     QEMUIOVector *qiov;
27611c9805a3SStefan Hajnoczi     bool is_write;
27621c9805a3SStefan Hajnoczi     int ret;
27634105eaaaSPeter Lieven     BdrvRequestFlags flags;
27641c9805a3SStefan Hajnoczi } RwCo;
27651c9805a3SStefan Hajnoczi 
27661c9805a3SStefan Hajnoczi static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2767fc01f7e7Sbellard {
27681c9805a3SStefan Hajnoczi     RwCo *rwco = opaque;
2769fc01f7e7Sbellard 
27701c9805a3SStefan Hajnoczi     if (!rwco->is_write) {
2771775aa8b6SKevin Wolf         rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2772775aa8b6SKevin Wolf                                       rwco->qiov->size, rwco->qiov,
27734105eaaaSPeter Lieven                                       rwco->flags);
27741c9805a3SStefan Hajnoczi     } else {
2775775aa8b6SKevin Wolf         rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2776775aa8b6SKevin Wolf                                        rwco->qiov->size, rwco->qiov,
27774105eaaaSPeter Lieven                                        rwco->flags);
27781c9805a3SStefan Hajnoczi     }
27791c9805a3SStefan Hajnoczi }
2780e7a8a783SKevin Wolf 
27811c9805a3SStefan Hajnoczi /*
27828d3b1a2dSKevin Wolf  * Process a vectored synchronous request using coroutines
27831c9805a3SStefan Hajnoczi  */
2784775aa8b6SKevin Wolf static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
27854105eaaaSPeter Lieven                         QEMUIOVector *qiov, bool is_write,
27864105eaaaSPeter Lieven                         BdrvRequestFlags flags)
27871c9805a3SStefan Hajnoczi {
27881c9805a3SStefan Hajnoczi     Coroutine *co;
27891c9805a3SStefan Hajnoczi     RwCo rwco = {
27901c9805a3SStefan Hajnoczi         .bs = bs,
2791775aa8b6SKevin Wolf         .offset = offset,
27928d3b1a2dSKevin Wolf         .qiov = qiov,
27931c9805a3SStefan Hajnoczi         .is_write = is_write,
27941c9805a3SStefan Hajnoczi         .ret = NOT_DONE,
27954105eaaaSPeter Lieven         .flags = flags,
27961c9805a3SStefan Hajnoczi     };
27971c9805a3SStefan Hajnoczi 
2798498e386cSZhi Yong Wu     /**
2799498e386cSZhi Yong Wu      * In sync call context, when the vcpu is blocked, this throttling timer
2800498e386cSZhi Yong Wu      * will not fire; so the I/O throttling function has to be disabled here
2801498e386cSZhi Yong Wu      * if it has been enabled.
2802498e386cSZhi Yong Wu      */
2803498e386cSZhi Yong Wu     if (bs->io_limits_enabled) {
2804498e386cSZhi Yong Wu         fprintf(stderr, "Disabling I/O throttling on '%s' due "
2805498e386cSZhi Yong Wu                         "to synchronous I/O.\n", bdrv_get_device_name(bs));
2806498e386cSZhi Yong Wu         bdrv_io_limits_disable(bs);
2807498e386cSZhi Yong Wu     }
2808498e386cSZhi Yong Wu 
28091c9805a3SStefan Hajnoczi     if (qemu_in_coroutine()) {
28101c9805a3SStefan Hajnoczi         /* Fast-path if already in coroutine context */
28111c9805a3SStefan Hajnoczi         bdrv_rw_co_entry(&rwco);
28121c9805a3SStefan Hajnoczi     } else {
28132572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
28142572b37aSStefan Hajnoczi 
28151c9805a3SStefan Hajnoczi         co = qemu_coroutine_create(bdrv_rw_co_entry);
28161c9805a3SStefan Hajnoczi         qemu_coroutine_enter(co, &rwco);
28171c9805a3SStefan Hajnoczi         while (rwco.ret == NOT_DONE) {
28182572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
28191c9805a3SStefan Hajnoczi         }
28201c9805a3SStefan Hajnoczi     }
28211c9805a3SStefan Hajnoczi     return rwco.ret;
2822e7a8a783SKevin Wolf }
2823e7a8a783SKevin Wolf 
28248d3b1a2dSKevin Wolf /*
28258d3b1a2dSKevin Wolf  * Process a synchronous request using coroutines
28268d3b1a2dSKevin Wolf  */
28278d3b1a2dSKevin Wolf static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
28284105eaaaSPeter Lieven                       int nb_sectors, bool is_write, BdrvRequestFlags flags)
28298d3b1a2dSKevin Wolf {
28308d3b1a2dSKevin Wolf     QEMUIOVector qiov;
28318d3b1a2dSKevin Wolf     struct iovec iov = {
28328d3b1a2dSKevin Wolf         .iov_base = (void *)buf,
28338d3b1a2dSKevin Wolf         .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
28348d3b1a2dSKevin Wolf     };
28358d3b1a2dSKevin Wolf 
283675af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
2837da15ee51SKevin Wolf         return -EINVAL;
2838da15ee51SKevin Wolf     }
2839da15ee51SKevin Wolf 
28408d3b1a2dSKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
2841775aa8b6SKevin Wolf     return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2842775aa8b6SKevin Wolf                         &qiov, is_write, flags);
28438d3b1a2dSKevin Wolf }
28448d3b1a2dSKevin Wolf 
28451c9805a3SStefan Hajnoczi /* return < 0 if error. See bdrv_write() for the return codes */
28461c9805a3SStefan Hajnoczi int bdrv_read(BlockDriverState *bs, int64_t sector_num,
28471c9805a3SStefan Hajnoczi               uint8_t *buf, int nb_sectors)
28481c9805a3SStefan Hajnoczi {
28494105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
285083f64091Sbellard }
2851fc01f7e7Sbellard 
285207d27a44SMarkus Armbruster /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
285307d27a44SMarkus Armbruster int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
285407d27a44SMarkus Armbruster                           uint8_t *buf, int nb_sectors)
285507d27a44SMarkus Armbruster {
285607d27a44SMarkus Armbruster     bool enabled;
285707d27a44SMarkus Armbruster     int ret;
285807d27a44SMarkus Armbruster 
285907d27a44SMarkus Armbruster     enabled = bs->io_limits_enabled;
286007d27a44SMarkus Armbruster     bs->io_limits_enabled = false;
28614e7395e8SPeter Lieven     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
286207d27a44SMarkus Armbruster     bs->io_limits_enabled = enabled;
286307d27a44SMarkus Armbruster     return ret;
286407d27a44SMarkus Armbruster }
286507d27a44SMarkus Armbruster 
286619cb3738Sbellard /* Return < 0 if error. Important errors are:
286719cb3738Sbellard   -EIO         generic I/O error (may happen for all errors)
286819cb3738Sbellard   -ENOMEDIUM   No media inserted.
286919cb3738Sbellard   -EINVAL      Invalid sector number or nb_sectors
287019cb3738Sbellard   -EACCES      Trying to write a read-only device
287119cb3738Sbellard */
2872fc01f7e7Sbellard int bdrv_write(BlockDriverState *bs, int64_t sector_num,
2873fc01f7e7Sbellard                const uint8_t *buf, int nb_sectors)
2874fc01f7e7Sbellard {
28754105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
287683f64091Sbellard }
287783f64091Sbellard 
2878aa7bfbffSPeter Lieven int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2879aa7bfbffSPeter Lieven                       int nb_sectors, BdrvRequestFlags flags)
28804105eaaaSPeter Lieven {
28814105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
2882aa7bfbffSPeter Lieven                       BDRV_REQ_ZERO_WRITE | flags);
28838d3b1a2dSKevin Wolf }
28848d3b1a2dSKevin Wolf 
2885d75cbb5eSPeter Lieven /*
2886d75cbb5eSPeter Lieven  * Completely zero out a block device with the help of bdrv_write_zeroes.
2887d75cbb5eSPeter Lieven  * The operation is sped up by checking the block status and only writing
2888d75cbb5eSPeter Lieven  * zeroes to the device if they currently do not return zeroes. Optional
2889d75cbb5eSPeter Lieven  * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2890d75cbb5eSPeter Lieven  *
2891d75cbb5eSPeter Lieven  * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2892d75cbb5eSPeter Lieven  */
2893d75cbb5eSPeter Lieven int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2894d75cbb5eSPeter Lieven {
2895d32f7c10SMarkus Armbruster     int64_t target_sectors, ret, nb_sectors, sector_num = 0;
2896d75cbb5eSPeter Lieven     int n;
2897d75cbb5eSPeter Lieven 
2898d32f7c10SMarkus Armbruster     target_sectors = bdrv_nb_sectors(bs);
2899d32f7c10SMarkus Armbruster     if (target_sectors < 0) {
2900d32f7c10SMarkus Armbruster         return target_sectors;
29019ce10c0bSKevin Wolf     }
29029ce10c0bSKevin Wolf 
2903d75cbb5eSPeter Lieven     for (;;) {
290475af1f34SPeter Lieven         nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
2905d75cbb5eSPeter Lieven         if (nb_sectors <= 0) {
2906d75cbb5eSPeter Lieven             return 0;
2907d75cbb5eSPeter Lieven         }
2908d75cbb5eSPeter Lieven         ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
29093d94ce60SPeter Lieven         if (ret < 0) {
29103d94ce60SPeter Lieven             error_report("error getting block status at sector %" PRId64 ": %s",
29113d94ce60SPeter Lieven                          sector_num, strerror(-ret));
29123d94ce60SPeter Lieven             return ret;
29133d94ce60SPeter Lieven         }
2914d75cbb5eSPeter Lieven         if (ret & BDRV_BLOCK_ZERO) {
2915d75cbb5eSPeter Lieven             sector_num += n;
2916d75cbb5eSPeter Lieven             continue;
2917d75cbb5eSPeter Lieven         }
2918d75cbb5eSPeter Lieven         ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2919d75cbb5eSPeter Lieven         if (ret < 0) {
2920d75cbb5eSPeter Lieven             error_report("error writing zeroes at sector %" PRId64 ": %s",
2921d75cbb5eSPeter Lieven                          sector_num, strerror(-ret));
2922d75cbb5eSPeter Lieven             return ret;
2923d75cbb5eSPeter Lieven         }
2924d75cbb5eSPeter Lieven         sector_num += n;
2925d75cbb5eSPeter Lieven     }
2926d75cbb5eSPeter Lieven }
2927d75cbb5eSPeter Lieven 
2928a3ef6571SKevin Wolf int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
292983f64091Sbellard {
2930a3ef6571SKevin Wolf     QEMUIOVector qiov;
2931a3ef6571SKevin Wolf     struct iovec iov = {
2932a3ef6571SKevin Wolf         .iov_base = (void *)buf,
2933a3ef6571SKevin Wolf         .iov_len = bytes,
2934a3ef6571SKevin Wolf     };
29359a8c4cceSKevin Wolf     int ret;
293683f64091Sbellard 
2937a3ef6571SKevin Wolf     if (bytes < 0) {
2938a3ef6571SKevin Wolf         return -EINVAL;
293983f64091Sbellard     }
294083f64091Sbellard 
2941a3ef6571SKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
2942a3ef6571SKevin Wolf     ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2943a3ef6571SKevin Wolf     if (ret < 0) {
29449a8c4cceSKevin Wolf         return ret;
294583f64091Sbellard     }
294683f64091Sbellard 
2947a3ef6571SKevin Wolf     return bytes;
294883f64091Sbellard }
294983f64091Sbellard 
29508d3b1a2dSKevin Wolf int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
295183f64091Sbellard {
29529a8c4cceSKevin Wolf     int ret;
295383f64091Sbellard 
29548407d5d7SKevin Wolf     ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
29558d3b1a2dSKevin Wolf     if (ret < 0) {
29569a8c4cceSKevin Wolf         return ret;
29578d3b1a2dSKevin Wolf     }
29588d3b1a2dSKevin Wolf 
29598d3b1a2dSKevin Wolf     return qiov->size;
29608d3b1a2dSKevin Wolf }
29618d3b1a2dSKevin Wolf 
29628d3b1a2dSKevin Wolf int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
29638407d5d7SKevin Wolf                 const void *buf, int bytes)
29648d3b1a2dSKevin Wolf {
29658d3b1a2dSKevin Wolf     QEMUIOVector qiov;
29668d3b1a2dSKevin Wolf     struct iovec iov = {
29678d3b1a2dSKevin Wolf         .iov_base   = (void *) buf,
29688407d5d7SKevin Wolf         .iov_len    = bytes,
29698d3b1a2dSKevin Wolf     };
29708d3b1a2dSKevin Wolf 
29718407d5d7SKevin Wolf     if (bytes < 0) {
29728407d5d7SKevin Wolf         return -EINVAL;
29738407d5d7SKevin Wolf     }
29748407d5d7SKevin Wolf 
29758d3b1a2dSKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
29768d3b1a2dSKevin Wolf     return bdrv_pwritev(bs, offset, &qiov);
297783f64091Sbellard }
297883f64091Sbellard 
2979f08145feSKevin Wolf /*
2980f08145feSKevin Wolf  * Writes to the file and ensures that no writes are reordered across this
2981f08145feSKevin Wolf  * request (acts as a barrier)
2982f08145feSKevin Wolf  *
2983f08145feSKevin Wolf  * Returns 0 on success, -errno in error cases.
2984f08145feSKevin Wolf  */
2985f08145feSKevin Wolf int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2986f08145feSKevin Wolf     const void *buf, int count)
2987f08145feSKevin Wolf {
2988f08145feSKevin Wolf     int ret;
2989f08145feSKevin Wolf 
2990f08145feSKevin Wolf     ret = bdrv_pwrite(bs, offset, buf, count);
2991f08145feSKevin Wolf     if (ret < 0) {
2992f08145feSKevin Wolf         return ret;
2993f08145feSKevin Wolf     }
2994f08145feSKevin Wolf 
2995f05fa4adSPaolo Bonzini     /* No flush needed for cache modes that already do it */
2996f05fa4adSPaolo Bonzini     if (bs->enable_write_cache) {
2997f08145feSKevin Wolf         bdrv_flush(bs);
2998f08145feSKevin Wolf     }
2999f08145feSKevin Wolf 
3000f08145feSKevin Wolf     return 0;
3001f08145feSKevin Wolf }
3002f08145feSKevin Wolf 
3003470c0504SStefan Hajnoczi static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
3004ab185921SStefan Hajnoczi         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3005ab185921SStefan Hajnoczi {
3006ab185921SStefan Hajnoczi     /* Perform I/O through a temporary buffer so that users who scribble over
3007ab185921SStefan Hajnoczi      * their read buffer while the operation is in progress do not end up
3008ab185921SStefan Hajnoczi      * modifying the image file.  This is critical for zero-copy guest I/O
3009ab185921SStefan Hajnoczi      * where anything might happen inside guest memory.
3010ab185921SStefan Hajnoczi      */
3011ab185921SStefan Hajnoczi     void *bounce_buffer;
3012ab185921SStefan Hajnoczi 
301379c053bdSStefan Hajnoczi     BlockDriver *drv = bs->drv;
3014ab185921SStefan Hajnoczi     struct iovec iov;
3015ab185921SStefan Hajnoczi     QEMUIOVector bounce_qiov;
3016ab185921SStefan Hajnoczi     int64_t cluster_sector_num;
3017ab185921SStefan Hajnoczi     int cluster_nb_sectors;
3018ab185921SStefan Hajnoczi     size_t skip_bytes;
3019ab185921SStefan Hajnoczi     int ret;
3020ab185921SStefan Hajnoczi 
3021ab185921SStefan Hajnoczi     /* Cover entire cluster so no additional backing file I/O is required when
3022ab185921SStefan Hajnoczi      * allocating cluster in the image file.
3023ab185921SStefan Hajnoczi      */
3024343bded4SPaolo Bonzini     bdrv_round_to_clusters(bs, sector_num, nb_sectors,
3025ab185921SStefan Hajnoczi                            &cluster_sector_num, &cluster_nb_sectors);
3026ab185921SStefan Hajnoczi 
3027470c0504SStefan Hajnoczi     trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3028ab185921SStefan Hajnoczi                                    cluster_sector_num, cluster_nb_sectors);
3029ab185921SStefan Hajnoczi 
3030ab185921SStefan Hajnoczi     iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
3031857d4f46SKevin Wolf     iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3032857d4f46SKevin Wolf     if (bounce_buffer == NULL) {
3033857d4f46SKevin Wolf         ret = -ENOMEM;
3034857d4f46SKevin Wolf         goto err;
3035857d4f46SKevin Wolf     }
3036857d4f46SKevin Wolf 
3037ab185921SStefan Hajnoczi     qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3038ab185921SStefan Hajnoczi 
303979c053bdSStefan Hajnoczi     ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3040ab185921SStefan Hajnoczi                              &bounce_qiov);
3041ab185921SStefan Hajnoczi     if (ret < 0) {
3042ab185921SStefan Hajnoczi         goto err;
3043ab185921SStefan Hajnoczi     }
3044ab185921SStefan Hajnoczi 
304579c053bdSStefan Hajnoczi     if (drv->bdrv_co_write_zeroes &&
304679c053bdSStefan Hajnoczi         buffer_is_zero(bounce_buffer, iov.iov_len)) {
3047621f0589SKevin Wolf         ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
3048aa7bfbffSPeter Lieven                                       cluster_nb_sectors, 0);
304979c053bdSStefan Hajnoczi     } else {
3050f05fa4adSPaolo Bonzini         /* This does not change the data on the disk, it is not necessary
3051f05fa4adSPaolo Bonzini          * to flush even in cache=writethrough mode.
3052f05fa4adSPaolo Bonzini          */
305379c053bdSStefan Hajnoczi         ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
3054ab185921SStefan Hajnoczi                                   &bounce_qiov);
305579c053bdSStefan Hajnoczi     }
305679c053bdSStefan Hajnoczi 
3057ab185921SStefan Hajnoczi     if (ret < 0) {
3058ab185921SStefan Hajnoczi         /* It might be okay to ignore write errors for guest requests.  If this
3059ab185921SStefan Hajnoczi          * is a deliberate copy-on-read then we don't want to ignore the error.
3060ab185921SStefan Hajnoczi          * Simply report it in all cases.
3061ab185921SStefan Hajnoczi          */
3062ab185921SStefan Hajnoczi         goto err;
3063ab185921SStefan Hajnoczi     }
3064ab185921SStefan Hajnoczi 
3065ab185921SStefan Hajnoczi     skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
306603396148SMichael Tokarev     qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3067ab185921SStefan Hajnoczi                         nb_sectors * BDRV_SECTOR_SIZE);
3068ab185921SStefan Hajnoczi 
3069ab185921SStefan Hajnoczi err:
3070ab185921SStefan Hajnoczi     qemu_vfree(bounce_buffer);
3071ab185921SStefan Hajnoczi     return ret;
3072ab185921SStefan Hajnoczi }
3073ab185921SStefan Hajnoczi 
3074c5fbe571SStefan Hajnoczi /*
3075d0c7f642SKevin Wolf  * Forwards an already correctly aligned request to the BlockDriver. This
3076d0c7f642SKevin Wolf  * handles copy on read and zeroing after EOF; any other features must be
3077d0c7f642SKevin Wolf  * implemented by the caller.
3078c5fbe571SStefan Hajnoczi  */
3079d0c7f642SKevin Wolf static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
308065afd211SKevin Wolf     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3081ec746e10SKevin Wolf     int64_t align, QEMUIOVector *qiov, int flags)
3082da1fa91dSKevin Wolf {
3083da1fa91dSKevin Wolf     BlockDriver *drv = bs->drv;
3084dbffbdcfSStefan Hajnoczi     int ret;
3085da1fa91dSKevin Wolf 
3086d0c7f642SKevin Wolf     int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3087d0c7f642SKevin Wolf     unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3088da1fa91dSKevin Wolf 
3089d0c7f642SKevin Wolf     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3090d0c7f642SKevin Wolf     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
30918eb029c2SKevin Wolf     assert(!qiov || bytes == qiov->size);
3092d0c7f642SKevin Wolf 
3093d0c7f642SKevin Wolf     /* Handle Copy on Read and associated serialisation */
3094470c0504SStefan Hajnoczi     if (flags & BDRV_REQ_COPY_ON_READ) {
30957327145fSKevin Wolf         /* If we touch the same cluster it counts as an overlap.  This
30967327145fSKevin Wolf          * guarantees that allocating writes will be serialized and not race
30977327145fSKevin Wolf          * with each other for the same cluster.  For example, in copy-on-read
30987327145fSKevin Wolf          * it ensures that the CoR read and write operations are atomic and
30997327145fSKevin Wolf          * guest writes cannot interleave between them. */
31007327145fSKevin Wolf         mark_request_serialising(req, bdrv_get_cluster_size(bs));
3101470c0504SStefan Hajnoczi     }
3102470c0504SStefan Hajnoczi 
31032dbafdc0SKevin Wolf     wait_serialising_requests(req);
3104f4658285SStefan Hajnoczi 
3105470c0504SStefan Hajnoczi     if (flags & BDRV_REQ_COPY_ON_READ) {
3106ab185921SStefan Hajnoczi         int pnum;
3107ab185921SStefan Hajnoczi 
3108bdad13b9SPaolo Bonzini         ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
3109ab185921SStefan Hajnoczi         if (ret < 0) {
3110ab185921SStefan Hajnoczi             goto out;
3111ab185921SStefan Hajnoczi         }
3112ab185921SStefan Hajnoczi 
3113ab185921SStefan Hajnoczi         if (!ret || pnum != nb_sectors) {
3114470c0504SStefan Hajnoczi             ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
3115ab185921SStefan Hajnoczi             goto out;
3116ab185921SStefan Hajnoczi         }
3117ab185921SStefan Hajnoczi     }
3118ab185921SStefan Hajnoczi 
3119d0c7f642SKevin Wolf     /* Forward the request to the BlockDriver */
3120c0191e76SMax Reitz     if (!bs->zero_beyond_eof) {
3121dbffbdcfSStefan Hajnoczi         ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3122893a8f62SMORITA Kazutaka     } else {
3123c0191e76SMax Reitz         /* Read zeros after EOF */
31244049082cSMarkus Armbruster         int64_t total_sectors, max_nb_sectors;
3125893a8f62SMORITA Kazutaka 
31264049082cSMarkus Armbruster         total_sectors = bdrv_nb_sectors(bs);
31274049082cSMarkus Armbruster         if (total_sectors < 0) {
31284049082cSMarkus Armbruster             ret = total_sectors;
3129893a8f62SMORITA Kazutaka             goto out;
3130893a8f62SMORITA Kazutaka         }
3131893a8f62SMORITA Kazutaka 
31325f5bcd80SKevin Wolf         max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
31335f5bcd80SKevin Wolf                                   align >> BDRV_SECTOR_BITS);
3134e012b78cSPaolo Bonzini         if (nb_sectors < max_nb_sectors) {
3135e012b78cSPaolo Bonzini             ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3136e012b78cSPaolo Bonzini         } else if (max_nb_sectors > 0) {
313733f461e0SKevin Wolf             QEMUIOVector local_qiov;
313833f461e0SKevin Wolf 
313933f461e0SKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov);
314033f461e0SKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0,
3141e012b78cSPaolo Bonzini                               max_nb_sectors * BDRV_SECTOR_SIZE);
314233f461e0SKevin Wolf 
3143e012b78cSPaolo Bonzini             ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
314433f461e0SKevin Wolf                                      &local_qiov);
314533f461e0SKevin Wolf 
314633f461e0SKevin Wolf             qemu_iovec_destroy(&local_qiov);
3147893a8f62SMORITA Kazutaka         } else {
3148893a8f62SMORITA Kazutaka             ret = 0;
3149893a8f62SMORITA Kazutaka         }
3150893a8f62SMORITA Kazutaka 
3151893a8f62SMORITA Kazutaka         /* Reading beyond end of file is supposed to produce zeroes */
3152893a8f62SMORITA Kazutaka         if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3153893a8f62SMORITA Kazutaka             uint64_t offset = MAX(0, total_sectors - sector_num);
3154893a8f62SMORITA Kazutaka             uint64_t bytes = (sector_num + nb_sectors - offset) *
3155893a8f62SMORITA Kazutaka                               BDRV_SECTOR_SIZE;
3156893a8f62SMORITA Kazutaka             qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3157893a8f62SMORITA Kazutaka         }
3158893a8f62SMORITA Kazutaka     }
3159ab185921SStefan Hajnoczi 
3160ab185921SStefan Hajnoczi out:
3161dbffbdcfSStefan Hajnoczi     return ret;
3162da1fa91dSKevin Wolf }
3163da1fa91dSKevin Wolf 
3164fc3959e4SFam Zheng static inline uint64_t bdrv_get_align(BlockDriverState *bs)
3165fc3959e4SFam Zheng {
3166fc3959e4SFam Zheng     /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3167fc3959e4SFam Zheng     return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3168fc3959e4SFam Zheng }
3169fc3959e4SFam Zheng 
3170fc3959e4SFam Zheng static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
3171fc3959e4SFam Zheng                                        int64_t offset, size_t bytes)
3172fc3959e4SFam Zheng {
3173fc3959e4SFam Zheng     int64_t align = bdrv_get_align(bs);
3174fc3959e4SFam Zheng     return !(offset & (align - 1) || (bytes & (align - 1)));
3175fc3959e4SFam Zheng }
3176fc3959e4SFam Zheng 
3177d0c7f642SKevin Wolf /*
3178d0c7f642SKevin Wolf  * Handle a read request in coroutine context
3179d0c7f642SKevin Wolf  */
31801b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
31811b0288aeSKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3182d0c7f642SKevin Wolf     BdrvRequestFlags flags)
3183d0c7f642SKevin Wolf {
3184d0c7f642SKevin Wolf     BlockDriver *drv = bs->drv;
318565afd211SKevin Wolf     BdrvTrackedRequest req;
318665afd211SKevin Wolf 
3187fc3959e4SFam Zheng     uint64_t align = bdrv_get_align(bs);
31881b0288aeSKevin Wolf     uint8_t *head_buf = NULL;
31891b0288aeSKevin Wolf     uint8_t *tail_buf = NULL;
31901b0288aeSKevin Wolf     QEMUIOVector local_qiov;
31911b0288aeSKevin Wolf     bool use_local_qiov = false;
3192d0c7f642SKevin Wolf     int ret;
3193d0c7f642SKevin Wolf 
3194d0c7f642SKevin Wolf     if (!drv) {
3195d0c7f642SKevin Wolf         return -ENOMEDIUM;
3196d0c7f642SKevin Wolf     }
3197b9c64947SMax Reitz 
3198b9c64947SMax Reitz     ret = bdrv_check_byte_request(bs, offset, bytes);
3199b9c64947SMax Reitz     if (ret < 0) {
3200b9c64947SMax Reitz         return ret;
3201d0c7f642SKevin Wolf     }
3202d0c7f642SKevin Wolf 
3203d0c7f642SKevin Wolf     if (bs->copy_on_read) {
3204d0c7f642SKevin Wolf         flags |= BDRV_REQ_COPY_ON_READ;
3205d0c7f642SKevin Wolf     }
3206d0c7f642SKevin Wolf 
3207d0c7f642SKevin Wolf     /* throttling disk I/O */
3208d0c7f642SKevin Wolf     if (bs->io_limits_enabled) {
3209d5103588SKevin Wolf         bdrv_io_limits_intercept(bs, bytes, false);
3210d0c7f642SKevin Wolf     }
3211d0c7f642SKevin Wolf 
32121b0288aeSKevin Wolf     /* Align read if necessary by padding qiov */
32131b0288aeSKevin Wolf     if (offset & (align - 1)) {
32141b0288aeSKevin Wolf         head_buf = qemu_blockalign(bs, align);
32151b0288aeSKevin Wolf         qemu_iovec_init(&local_qiov, qiov->niov + 2);
32161b0288aeSKevin Wolf         qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
32171b0288aeSKevin Wolf         qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
32181b0288aeSKevin Wolf         use_local_qiov = true;
32191b0288aeSKevin Wolf 
32201b0288aeSKevin Wolf         bytes += offset & (align - 1);
32211b0288aeSKevin Wolf         offset = offset & ~(align - 1);
32221b0288aeSKevin Wolf     }
32231b0288aeSKevin Wolf 
32241b0288aeSKevin Wolf     if ((offset + bytes) & (align - 1)) {
32251b0288aeSKevin Wolf         if (!use_local_qiov) {
32261b0288aeSKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov + 1);
32271b0288aeSKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
32281b0288aeSKevin Wolf             use_local_qiov = true;
32291b0288aeSKevin Wolf         }
32301b0288aeSKevin Wolf         tail_buf = qemu_blockalign(bs, align);
32311b0288aeSKevin Wolf         qemu_iovec_add(&local_qiov, tail_buf,
32321b0288aeSKevin Wolf                        align - ((offset + bytes) & (align - 1)));
32331b0288aeSKevin Wolf 
32341b0288aeSKevin Wolf         bytes = ROUND_UP(bytes, align);
32351b0288aeSKevin Wolf     }
32361b0288aeSKevin Wolf 
323765afd211SKevin Wolf     tracked_request_begin(&req, bs, offset, bytes, false);
3238ec746e10SKevin Wolf     ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
32391b0288aeSKevin Wolf                               use_local_qiov ? &local_qiov : qiov,
32401b0288aeSKevin Wolf                               flags);
324165afd211SKevin Wolf     tracked_request_end(&req);
32421b0288aeSKevin Wolf 
32431b0288aeSKevin Wolf     if (use_local_qiov) {
32441b0288aeSKevin Wolf         qemu_iovec_destroy(&local_qiov);
32451b0288aeSKevin Wolf         qemu_vfree(head_buf);
32461b0288aeSKevin Wolf         qemu_vfree(tail_buf);
32471b0288aeSKevin Wolf     }
32481b0288aeSKevin Wolf 
3249d0c7f642SKevin Wolf     return ret;
3250d0c7f642SKevin Wolf }
3251d0c7f642SKevin Wolf 
32521b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
32531b0288aeSKevin Wolf     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
32541b0288aeSKevin Wolf     BdrvRequestFlags flags)
32551b0288aeSKevin Wolf {
325675af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
32571b0288aeSKevin Wolf         return -EINVAL;
32581b0288aeSKevin Wolf     }
32591b0288aeSKevin Wolf 
32601b0288aeSKevin Wolf     return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
32611b0288aeSKevin Wolf                              nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
32621b0288aeSKevin Wolf }
32631b0288aeSKevin Wolf 
3264c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
3265da1fa91dSKevin Wolf     int nb_sectors, QEMUIOVector *qiov)
3266da1fa91dSKevin Wolf {
3267c5fbe571SStefan Hajnoczi     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
3268da1fa91dSKevin Wolf 
3269470c0504SStefan Hajnoczi     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3270470c0504SStefan Hajnoczi }
3271470c0504SStefan Hajnoczi 
3272470c0504SStefan Hajnoczi int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3273470c0504SStefan Hajnoczi     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3274470c0504SStefan Hajnoczi {
3275470c0504SStefan Hajnoczi     trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3276470c0504SStefan Hajnoczi 
3277470c0504SStefan Hajnoczi     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3278470c0504SStefan Hajnoczi                             BDRV_REQ_COPY_ON_READ);
3279c5fbe571SStefan Hajnoczi }
3280c5fbe571SStefan Hajnoczi 
328198764152SPeter Lieven #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
3282c31cb707SPeter Lieven 
3283f08f2ddaSStefan Hajnoczi static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
3284aa7bfbffSPeter Lieven     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
3285f08f2ddaSStefan Hajnoczi {
3286f08f2ddaSStefan Hajnoczi     BlockDriver *drv = bs->drv;
3287f08f2ddaSStefan Hajnoczi     QEMUIOVector qiov;
3288c31cb707SPeter Lieven     struct iovec iov = {0};
3289c31cb707SPeter Lieven     int ret = 0;
3290f08f2ddaSStefan Hajnoczi 
329175af1f34SPeter Lieven     int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
329275af1f34SPeter Lieven                                         BDRV_REQUEST_MAX_SECTORS);
3293621f0589SKevin Wolf 
3294c31cb707SPeter Lieven     while (nb_sectors > 0 && !ret) {
3295c31cb707SPeter Lieven         int num = nb_sectors;
3296c31cb707SPeter Lieven 
3297b8d71c09SPaolo Bonzini         /* Align request.  Block drivers can expect the "bulk" of the request
3298b8d71c09SPaolo Bonzini          * to be aligned.
3299b8d71c09SPaolo Bonzini          */
3300b8d71c09SPaolo Bonzini         if (bs->bl.write_zeroes_alignment
3301b8d71c09SPaolo Bonzini             && num > bs->bl.write_zeroes_alignment) {
3302b8d71c09SPaolo Bonzini             if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3303b8d71c09SPaolo Bonzini                 /* Make a small request up to the first aligned sector.  */
3304c31cb707SPeter Lieven                 num = bs->bl.write_zeroes_alignment;
3305c31cb707SPeter Lieven                 num -= sector_num % bs->bl.write_zeroes_alignment;
3306b8d71c09SPaolo Bonzini             } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3307b8d71c09SPaolo Bonzini                 /* Shorten the request to the last aligned sector.  num cannot
3308b8d71c09SPaolo Bonzini                  * underflow because num > bs->bl.write_zeroes_alignment.
3309b8d71c09SPaolo Bonzini                  */
3310b8d71c09SPaolo Bonzini                 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
3311b8d71c09SPaolo Bonzini             }
3312c31cb707SPeter Lieven         }
3313c31cb707SPeter Lieven 
3314c31cb707SPeter Lieven         /* limit request size */
3315c31cb707SPeter Lieven         if (num > max_write_zeroes) {
3316c31cb707SPeter Lieven             num = max_write_zeroes;
3317c31cb707SPeter Lieven         }
3318c31cb707SPeter Lieven 
3319c31cb707SPeter Lieven         ret = -ENOTSUP;
3320f08f2ddaSStefan Hajnoczi         /* First try the efficient write zeroes operation */
3321f08f2ddaSStefan Hajnoczi         if (drv->bdrv_co_write_zeroes) {
3322c31cb707SPeter Lieven             ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3323f08f2ddaSStefan Hajnoczi         }
3324f08f2ddaSStefan Hajnoczi 
3325c31cb707SPeter Lieven         if (ret == -ENOTSUP) {
3326f08f2ddaSStefan Hajnoczi             /* Fall back to bounce buffer if write zeroes is unsupported */
3327095e4fa4SPeter Lieven             int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
332898764152SPeter Lieven                                             MAX_WRITE_ZEROES_BOUNCE_BUFFER);
3329095e4fa4SPeter Lieven             num = MIN(num, max_xfer_len);
3330c31cb707SPeter Lieven             iov.iov_len = num * BDRV_SECTOR_SIZE;
3331c31cb707SPeter Lieven             if (iov.iov_base == NULL) {
3332857d4f46SKevin Wolf                 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3333857d4f46SKevin Wolf                 if (iov.iov_base == NULL) {
3334857d4f46SKevin Wolf                     ret = -ENOMEM;
3335857d4f46SKevin Wolf                     goto fail;
3336857d4f46SKevin Wolf                 }
3337b8d71c09SPaolo Bonzini                 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
3338c31cb707SPeter Lieven             }
3339f08f2ddaSStefan Hajnoczi             qemu_iovec_init_external(&qiov, &iov, 1);
3340f08f2ddaSStefan Hajnoczi 
3341c31cb707SPeter Lieven             ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
3342b8d71c09SPaolo Bonzini 
3343b8d71c09SPaolo Bonzini             /* Keep bounce buffer around if it is big enough for all
3344b8d71c09SPaolo Bonzini              * all future requests.
3345b8d71c09SPaolo Bonzini              */
3346095e4fa4SPeter Lieven             if (num < max_xfer_len) {
3347b8d71c09SPaolo Bonzini                 qemu_vfree(iov.iov_base);
3348b8d71c09SPaolo Bonzini                 iov.iov_base = NULL;
3349b8d71c09SPaolo Bonzini             }
3350c31cb707SPeter Lieven         }
3351c31cb707SPeter Lieven 
3352c31cb707SPeter Lieven         sector_num += num;
3353c31cb707SPeter Lieven         nb_sectors -= num;
3354c31cb707SPeter Lieven     }
3355f08f2ddaSStefan Hajnoczi 
3356857d4f46SKevin Wolf fail:
3357f08f2ddaSStefan Hajnoczi     qemu_vfree(iov.iov_base);
3358f08f2ddaSStefan Hajnoczi     return ret;
3359f08f2ddaSStefan Hajnoczi }
3360f08f2ddaSStefan Hajnoczi 
3361c5fbe571SStefan Hajnoczi /*
3362b404f720SKevin Wolf  * Forwards an already correctly aligned write request to the BlockDriver.
3363c5fbe571SStefan Hajnoczi  */
3364b404f720SKevin Wolf static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
336565afd211SKevin Wolf     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
336665afd211SKevin Wolf     QEMUIOVector *qiov, int flags)
3367c5fbe571SStefan Hajnoczi {
3368c5fbe571SStefan Hajnoczi     BlockDriver *drv = bs->drv;
336928de2dcdSKevin Wolf     bool waited;
33706b7cb247SStefan Hajnoczi     int ret;
3371da1fa91dSKevin Wolf 
3372b404f720SKevin Wolf     int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3373b404f720SKevin Wolf     unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3374da1fa91dSKevin Wolf 
3375b404f720SKevin Wolf     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3376b404f720SKevin Wolf     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
33778eb029c2SKevin Wolf     assert(!qiov || bytes == qiov->size);
3378cc0681c4SBenoît Canet 
337928de2dcdSKevin Wolf     waited = wait_serialising_requests(req);
338028de2dcdSKevin Wolf     assert(!waited || !req->serialising);
3381af91f9a7SKevin Wolf     assert(req->overlap_offset <= offset);
3382af91f9a7SKevin Wolf     assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
3383244eadefSKevin Wolf 
338465afd211SKevin Wolf     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
3385d616b224SStefan Hajnoczi 
3386465bee1dSPeter Lieven     if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3387465bee1dSPeter Lieven         !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3388465bee1dSPeter Lieven         qemu_iovec_is_zero(qiov)) {
3389465bee1dSPeter Lieven         flags |= BDRV_REQ_ZERO_WRITE;
3390465bee1dSPeter Lieven         if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3391465bee1dSPeter Lieven             flags |= BDRV_REQ_MAY_UNMAP;
3392465bee1dSPeter Lieven         }
3393465bee1dSPeter Lieven     }
3394465bee1dSPeter Lieven 
3395d616b224SStefan Hajnoczi     if (ret < 0) {
3396d616b224SStefan Hajnoczi         /* Do nothing, write notifier decided to fail this request */
3397d616b224SStefan Hajnoczi     } else if (flags & BDRV_REQ_ZERO_WRITE) {
33989e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
3399aa7bfbffSPeter Lieven         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
3400f08f2ddaSStefan Hajnoczi     } else {
34019e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
34026b7cb247SStefan Hajnoczi         ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3403f08f2ddaSStefan Hajnoczi     }
34049e1cb96dSKevin Wolf     BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
34056b7cb247SStefan Hajnoczi 
3406f05fa4adSPaolo Bonzini     if (ret == 0 && !bs->enable_write_cache) {
3407f05fa4adSPaolo Bonzini         ret = bdrv_co_flush(bs);
3408f05fa4adSPaolo Bonzini     }
3409f05fa4adSPaolo Bonzini 
34101755da16SPaolo Bonzini     bdrv_set_dirty(bs, sector_num, nb_sectors);
3411da1fa91dSKevin Wolf 
34125366d0c8SBenoît Canet     block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
34135e5a94b6SBenoît Canet 
3414c0191e76SMax Reitz     if (ret >= 0) {
3415df2a6f29SPaolo Bonzini         bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3416df2a6f29SPaolo Bonzini     }
3417da1fa91dSKevin Wolf 
34186b7cb247SStefan Hajnoczi     return ret;
3419da1fa91dSKevin Wolf }
3420da1fa91dSKevin Wolf 
3421b404f720SKevin Wolf /*
3422b404f720SKevin Wolf  * Handle a write request in coroutine context
3423b404f720SKevin Wolf  */
34246601553eSKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
34256601553eSKevin Wolf     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3426b404f720SKevin Wolf     BdrvRequestFlags flags)
3427b404f720SKevin Wolf {
342865afd211SKevin Wolf     BdrvTrackedRequest req;
3429fc3959e4SFam Zheng     uint64_t align = bdrv_get_align(bs);
34303b8242e0SKevin Wolf     uint8_t *head_buf = NULL;
34313b8242e0SKevin Wolf     uint8_t *tail_buf = NULL;
34323b8242e0SKevin Wolf     QEMUIOVector local_qiov;
34333b8242e0SKevin Wolf     bool use_local_qiov = false;
3434b404f720SKevin Wolf     int ret;
3435b404f720SKevin Wolf 
3436b404f720SKevin Wolf     if (!bs->drv) {
3437b404f720SKevin Wolf         return -ENOMEDIUM;
3438b404f720SKevin Wolf     }
3439b404f720SKevin Wolf     if (bs->read_only) {
3440b404f720SKevin Wolf         return -EACCES;
3441b404f720SKevin Wolf     }
3442b9c64947SMax Reitz 
3443b9c64947SMax Reitz     ret = bdrv_check_byte_request(bs, offset, bytes);
3444b9c64947SMax Reitz     if (ret < 0) {
3445b9c64947SMax Reitz         return ret;
3446b404f720SKevin Wolf     }
3447b404f720SKevin Wolf 
3448b404f720SKevin Wolf     /* throttling disk I/O */
3449b404f720SKevin Wolf     if (bs->io_limits_enabled) {
3450d5103588SKevin Wolf         bdrv_io_limits_intercept(bs, bytes, true);
3451b404f720SKevin Wolf     }
3452b404f720SKevin Wolf 
34533b8242e0SKevin Wolf     /*
34543b8242e0SKevin Wolf      * Align write if necessary by performing a read-modify-write cycle.
34553b8242e0SKevin Wolf      * Pad qiov with the read parts and be sure to have a tracked request not
34563b8242e0SKevin Wolf      * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
34573b8242e0SKevin Wolf      */
345865afd211SKevin Wolf     tracked_request_begin(&req, bs, offset, bytes, true);
34593b8242e0SKevin Wolf 
34603b8242e0SKevin Wolf     if (offset & (align - 1)) {
34613b8242e0SKevin Wolf         QEMUIOVector head_qiov;
34623b8242e0SKevin Wolf         struct iovec head_iov;
34633b8242e0SKevin Wolf 
34643b8242e0SKevin Wolf         mark_request_serialising(&req, align);
34653b8242e0SKevin Wolf         wait_serialising_requests(&req);
34663b8242e0SKevin Wolf 
34673b8242e0SKevin Wolf         head_buf = qemu_blockalign(bs, align);
34683b8242e0SKevin Wolf         head_iov = (struct iovec) {
34693b8242e0SKevin Wolf             .iov_base   = head_buf,
34703b8242e0SKevin Wolf             .iov_len    = align,
34713b8242e0SKevin Wolf         };
34723b8242e0SKevin Wolf         qemu_iovec_init_external(&head_qiov, &head_iov, 1);
34733b8242e0SKevin Wolf 
34749e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
34753b8242e0SKevin Wolf         ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
34763b8242e0SKevin Wolf                                   align, &head_qiov, 0);
34773b8242e0SKevin Wolf         if (ret < 0) {
34783b8242e0SKevin Wolf             goto fail;
34793b8242e0SKevin Wolf         }
34809e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
34813b8242e0SKevin Wolf 
34823b8242e0SKevin Wolf         qemu_iovec_init(&local_qiov, qiov->niov + 2);
34833b8242e0SKevin Wolf         qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
34843b8242e0SKevin Wolf         qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
34853b8242e0SKevin Wolf         use_local_qiov = true;
34863b8242e0SKevin Wolf 
34873b8242e0SKevin Wolf         bytes += offset & (align - 1);
34883b8242e0SKevin Wolf         offset = offset & ~(align - 1);
34893b8242e0SKevin Wolf     }
34903b8242e0SKevin Wolf 
34913b8242e0SKevin Wolf     if ((offset + bytes) & (align - 1)) {
34923b8242e0SKevin Wolf         QEMUIOVector tail_qiov;
34933b8242e0SKevin Wolf         struct iovec tail_iov;
34943b8242e0SKevin Wolf         size_t tail_bytes;
349528de2dcdSKevin Wolf         bool waited;
34963b8242e0SKevin Wolf 
34973b8242e0SKevin Wolf         mark_request_serialising(&req, align);
349828de2dcdSKevin Wolf         waited = wait_serialising_requests(&req);
349928de2dcdSKevin Wolf         assert(!waited || !use_local_qiov);
35003b8242e0SKevin Wolf 
35013b8242e0SKevin Wolf         tail_buf = qemu_blockalign(bs, align);
35023b8242e0SKevin Wolf         tail_iov = (struct iovec) {
35033b8242e0SKevin Wolf             .iov_base   = tail_buf,
35043b8242e0SKevin Wolf             .iov_len    = align,
35053b8242e0SKevin Wolf         };
35063b8242e0SKevin Wolf         qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
35073b8242e0SKevin Wolf 
35089e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
35093b8242e0SKevin Wolf         ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
35103b8242e0SKevin Wolf                                   align, &tail_qiov, 0);
35113b8242e0SKevin Wolf         if (ret < 0) {
35123b8242e0SKevin Wolf             goto fail;
35133b8242e0SKevin Wolf         }
35149e1cb96dSKevin Wolf         BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
35153b8242e0SKevin Wolf 
35163b8242e0SKevin Wolf         if (!use_local_qiov) {
35173b8242e0SKevin Wolf             qemu_iovec_init(&local_qiov, qiov->niov + 1);
35183b8242e0SKevin Wolf             qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
35193b8242e0SKevin Wolf             use_local_qiov = true;
35203b8242e0SKevin Wolf         }
35213b8242e0SKevin Wolf 
35223b8242e0SKevin Wolf         tail_bytes = (offset + bytes) & (align - 1);
35233b8242e0SKevin Wolf         qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
35243b8242e0SKevin Wolf 
35253b8242e0SKevin Wolf         bytes = ROUND_UP(bytes, align);
35263b8242e0SKevin Wolf     }
35273b8242e0SKevin Wolf 
3528fc3959e4SFam Zheng     if (use_local_qiov) {
3529fc3959e4SFam Zheng         /* Local buffer may have non-zero data. */
3530fc3959e4SFam Zheng         flags &= ~BDRV_REQ_ZERO_WRITE;
3531fc3959e4SFam Zheng     }
35323b8242e0SKevin Wolf     ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
35333b8242e0SKevin Wolf                                use_local_qiov ? &local_qiov : qiov,
35343b8242e0SKevin Wolf                                flags);
35353b8242e0SKevin Wolf 
35363b8242e0SKevin Wolf fail:
353765afd211SKevin Wolf     tracked_request_end(&req);
3538b404f720SKevin Wolf 
35393b8242e0SKevin Wolf     if (use_local_qiov) {
35403b8242e0SKevin Wolf         qemu_iovec_destroy(&local_qiov);
354199c4a85cSKevin Wolf     }
35423b8242e0SKevin Wolf     qemu_vfree(head_buf);
35433b8242e0SKevin Wolf     qemu_vfree(tail_buf);
35443b8242e0SKevin Wolf 
3545b404f720SKevin Wolf     return ret;
3546b404f720SKevin Wolf }
3547b404f720SKevin Wolf 
35486601553eSKevin Wolf static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
35496601553eSKevin Wolf     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
35506601553eSKevin Wolf     BdrvRequestFlags flags)
35516601553eSKevin Wolf {
355275af1f34SPeter Lieven     if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
35536601553eSKevin Wolf         return -EINVAL;
35546601553eSKevin Wolf     }
35556601553eSKevin Wolf 
35566601553eSKevin Wolf     return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
35576601553eSKevin Wolf                               nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
35586601553eSKevin Wolf }
35596601553eSKevin Wolf 
3560c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3561c5fbe571SStefan Hajnoczi     int nb_sectors, QEMUIOVector *qiov)
3562c5fbe571SStefan Hajnoczi {
3563c5fbe571SStefan Hajnoczi     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3564c5fbe571SStefan Hajnoczi 
3565f08f2ddaSStefan Hajnoczi     return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3566f08f2ddaSStefan Hajnoczi }
3567f08f2ddaSStefan Hajnoczi 
3568f08f2ddaSStefan Hajnoczi int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
3569aa7bfbffSPeter Lieven                                       int64_t sector_num, int nb_sectors,
3570aa7bfbffSPeter Lieven                                       BdrvRequestFlags flags)
3571f08f2ddaSStefan Hajnoczi {
3572fc3959e4SFam Zheng     int ret;
3573fc3959e4SFam Zheng 
357494d6ff21SPaolo Bonzini     trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
3575f08f2ddaSStefan Hajnoczi 
3576d32f35cbSPeter Lieven     if (!(bs->open_flags & BDRV_O_UNMAP)) {
3577d32f35cbSPeter Lieven         flags &= ~BDRV_REQ_MAY_UNMAP;
3578d32f35cbSPeter Lieven     }
3579fc3959e4SFam Zheng     if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
3580fc3959e4SFam Zheng                             nb_sectors << BDRV_SECTOR_BITS)) {
3581fc3959e4SFam Zheng         ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3582aa7bfbffSPeter Lieven                                 BDRV_REQ_ZERO_WRITE | flags);
3583fc3959e4SFam Zheng     } else {
3584fc3959e4SFam Zheng         uint8_t *buf;
3585fc3959e4SFam Zheng         QEMUIOVector local_qiov;
3586fc3959e4SFam Zheng         size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
3587fc3959e4SFam Zheng 
3588fc3959e4SFam Zheng         buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
3589fc3959e4SFam Zheng         memset(buf, 0, bytes);
3590fc3959e4SFam Zheng         qemu_iovec_init(&local_qiov, 1);
3591fc3959e4SFam Zheng         qemu_iovec_add(&local_qiov, buf, bytes);
3592fc3959e4SFam Zheng 
3593fc3959e4SFam Zheng         ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
3594fc3959e4SFam Zheng                                 BDRV_REQ_ZERO_WRITE | flags);
3595fc3959e4SFam Zheng         qemu_vfree(buf);
3596fc3959e4SFam Zheng     }
3597fc3959e4SFam Zheng     return ret;
3598c5fbe571SStefan Hajnoczi }
3599c5fbe571SStefan Hajnoczi 
360083f64091Sbellard /**
360183f64091Sbellard  * Truncate file to 'offset' bytes (needed only for file protocols)
360283f64091Sbellard  */
360383f64091Sbellard int bdrv_truncate(BlockDriverState *bs, int64_t offset)
360483f64091Sbellard {
360583f64091Sbellard     BlockDriver *drv = bs->drv;
360651762288SStefan Hajnoczi     int ret;
360783f64091Sbellard     if (!drv)
360819cb3738Sbellard         return -ENOMEDIUM;
360983f64091Sbellard     if (!drv->bdrv_truncate)
361083f64091Sbellard         return -ENOTSUP;
361159f2689dSNaphtali Sprei     if (bs->read_only)
361259f2689dSNaphtali Sprei         return -EACCES;
36139c75e168SJeff Cody 
361451762288SStefan Hajnoczi     ret = drv->bdrv_truncate(bs, offset);
361551762288SStefan Hajnoczi     if (ret == 0) {
361651762288SStefan Hajnoczi         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3617ce1ffea8SJohn Snow         bdrv_dirty_bitmap_truncate(bs);
3618a7f53e26SMarkus Armbruster         if (bs->blk) {
3619a7f53e26SMarkus Armbruster             blk_dev_resize_cb(bs->blk);
3620a7f53e26SMarkus Armbruster         }
362151762288SStefan Hajnoczi     }
362251762288SStefan Hajnoczi     return ret;
362383f64091Sbellard }
362483f64091Sbellard 
362583f64091Sbellard /**
36264a1d5e1fSFam Zheng  * Length of a allocated file in bytes. Sparse files are counted by actual
36274a1d5e1fSFam Zheng  * allocated space. Return < 0 if error or unknown.
36284a1d5e1fSFam Zheng  */
36294a1d5e1fSFam Zheng int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
36304a1d5e1fSFam Zheng {
36314a1d5e1fSFam Zheng     BlockDriver *drv = bs->drv;
36324a1d5e1fSFam Zheng     if (!drv) {
36334a1d5e1fSFam Zheng         return -ENOMEDIUM;
36344a1d5e1fSFam Zheng     }
36354a1d5e1fSFam Zheng     if (drv->bdrv_get_allocated_file_size) {
36364a1d5e1fSFam Zheng         return drv->bdrv_get_allocated_file_size(bs);
36374a1d5e1fSFam Zheng     }
36384a1d5e1fSFam Zheng     if (bs->file) {
36394a1d5e1fSFam Zheng         return bdrv_get_allocated_file_size(bs->file);
36404a1d5e1fSFam Zheng     }
36414a1d5e1fSFam Zheng     return -ENOTSUP;
36424a1d5e1fSFam Zheng }
36434a1d5e1fSFam Zheng 
36444a1d5e1fSFam Zheng /**
364565a9bb25SMarkus Armbruster  * Return number of sectors on success, -errno on error.
364683f64091Sbellard  */
364765a9bb25SMarkus Armbruster int64_t bdrv_nb_sectors(BlockDriverState *bs)
364883f64091Sbellard {
364983f64091Sbellard     BlockDriver *drv = bs->drv;
365065a9bb25SMarkus Armbruster 
365183f64091Sbellard     if (!drv)
365219cb3738Sbellard         return -ENOMEDIUM;
365351762288SStefan Hajnoczi 
3654b94a2610SKevin Wolf     if (drv->has_variable_length) {
3655b94a2610SKevin Wolf         int ret = refresh_total_sectors(bs, bs->total_sectors);
3656b94a2610SKevin Wolf         if (ret < 0) {
3657b94a2610SKevin Wolf             return ret;
3658fc01f7e7Sbellard         }
365946a4e4e6SStefan Hajnoczi     }
366065a9bb25SMarkus Armbruster     return bs->total_sectors;
366165a9bb25SMarkus Armbruster }
366265a9bb25SMarkus Armbruster 
366365a9bb25SMarkus Armbruster /**
366465a9bb25SMarkus Armbruster  * Return length in bytes on success, -errno on error.
366565a9bb25SMarkus Armbruster  * The length is always a multiple of BDRV_SECTOR_SIZE.
366665a9bb25SMarkus Armbruster  */
366765a9bb25SMarkus Armbruster int64_t bdrv_getlength(BlockDriverState *bs)
366865a9bb25SMarkus Armbruster {
366965a9bb25SMarkus Armbruster     int64_t ret = bdrv_nb_sectors(bs);
367065a9bb25SMarkus Armbruster 
367165a9bb25SMarkus Armbruster     return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
367246a4e4e6SStefan Hajnoczi }
3673fc01f7e7Sbellard 
367419cb3738Sbellard /* return 0 as number of sectors if no device present or error */
367596b8f136Sths void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
3676fc01f7e7Sbellard {
367765a9bb25SMarkus Armbruster     int64_t nb_sectors = bdrv_nb_sectors(bs);
367865a9bb25SMarkus Armbruster 
367965a9bb25SMarkus Armbruster     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
3680fc01f7e7Sbellard }
3681cf98951bSbellard 
3682ff06f5f3SPaolo Bonzini void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3683ff06f5f3SPaolo Bonzini                        BlockdevOnError on_write_error)
3684abd7f68dSMarkus Armbruster {
3685abd7f68dSMarkus Armbruster     bs->on_read_error = on_read_error;
3686abd7f68dSMarkus Armbruster     bs->on_write_error = on_write_error;
3687abd7f68dSMarkus Armbruster }
3688abd7f68dSMarkus Armbruster 
36891ceee0d5SPaolo Bonzini BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
3690abd7f68dSMarkus Armbruster {
3691abd7f68dSMarkus Armbruster     return is_read ? bs->on_read_error : bs->on_write_error;
3692abd7f68dSMarkus Armbruster }
3693abd7f68dSMarkus Armbruster 
36943e1caa5fSPaolo Bonzini BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
36953e1caa5fSPaolo Bonzini {
36963e1caa5fSPaolo Bonzini     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
36973e1caa5fSPaolo Bonzini 
36983e1caa5fSPaolo Bonzini     switch (on_err) {
36993e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_ENOSPC:
3700a589569fSWenchao Xia         return (error == ENOSPC) ?
3701a589569fSWenchao Xia                BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
37023e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_STOP:
3703a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_STOP;
37043e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_REPORT:
3705a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_REPORT;
37063e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_IGNORE:
3707a589569fSWenchao Xia         return BLOCK_ERROR_ACTION_IGNORE;
37083e1caa5fSPaolo Bonzini     default:
37093e1caa5fSPaolo Bonzini         abort();
37103e1caa5fSPaolo Bonzini     }
37113e1caa5fSPaolo Bonzini }
37123e1caa5fSPaolo Bonzini 
3713c7c2ff0cSLuiz Capitulino static void send_qmp_error_event(BlockDriverState *bs,
3714c7c2ff0cSLuiz Capitulino                                  BlockErrorAction action,
3715c7c2ff0cSLuiz Capitulino                                  bool is_read, int error)
3716c7c2ff0cSLuiz Capitulino {
3717573742a5SPeter Maydell     IoOperationType optype;
3718c7c2ff0cSLuiz Capitulino 
3719573742a5SPeter Maydell     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3720573742a5SPeter Maydell     qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
3721c7c2ff0cSLuiz Capitulino                                    bdrv_iostatus_is_enabled(bs),
3722624ff573SLuiz Capitulino                                    error == ENOSPC, strerror(error),
3723624ff573SLuiz Capitulino                                    &error_abort);
3724c7c2ff0cSLuiz Capitulino }
3725c7c2ff0cSLuiz Capitulino 
37263e1caa5fSPaolo Bonzini /* This is done by device models because, while the block layer knows
37273e1caa5fSPaolo Bonzini  * about the error, it does not know whether an operation comes from
37283e1caa5fSPaolo Bonzini  * the device or the block layer (from a job, for example).
37293e1caa5fSPaolo Bonzini  */
37303e1caa5fSPaolo Bonzini void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
37313e1caa5fSPaolo Bonzini                        bool is_read, int error)
37323e1caa5fSPaolo Bonzini {
37333e1caa5fSPaolo Bonzini     assert(error >= 0);
37342bd3bce8SPaolo Bonzini 
3735a589569fSWenchao Xia     if (action == BLOCK_ERROR_ACTION_STOP) {
37362bd3bce8SPaolo Bonzini         /* First set the iostatus, so that "info block" returns an iostatus
37372bd3bce8SPaolo Bonzini          * that matches the events raised so far (an additional error iostatus
37382bd3bce8SPaolo Bonzini          * is fine, but not a lost one).
37392bd3bce8SPaolo Bonzini          */
37403e1caa5fSPaolo Bonzini         bdrv_iostatus_set_err(bs, error);
37412bd3bce8SPaolo Bonzini 
37422bd3bce8SPaolo Bonzini         /* Then raise the request to stop the VM and the event.
37432bd3bce8SPaolo Bonzini          * qemu_system_vmstop_request_prepare has two effects.  First,
37442bd3bce8SPaolo Bonzini          * it ensures that the STOP event always comes after the
37452bd3bce8SPaolo Bonzini          * BLOCK_IO_ERROR event.  Second, it ensures that even if management
37462bd3bce8SPaolo Bonzini          * can observe the STOP event and do a "cont" before the STOP
37472bd3bce8SPaolo Bonzini          * event is issued, the VM will not stop.  In this case, vm_start()
37482bd3bce8SPaolo Bonzini          * also ensures that the STOP/RESUME pair of events is emitted.
37492bd3bce8SPaolo Bonzini          */
37502bd3bce8SPaolo Bonzini         qemu_system_vmstop_request_prepare();
3751c7c2ff0cSLuiz Capitulino         send_qmp_error_event(bs, action, is_read, error);
37522bd3bce8SPaolo Bonzini         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
37532bd3bce8SPaolo Bonzini     } else {
3754c7c2ff0cSLuiz Capitulino         send_qmp_error_event(bs, action, is_read, error);
37553e1caa5fSPaolo Bonzini     }
37563e1caa5fSPaolo Bonzini }
37573e1caa5fSPaolo Bonzini 
3758b338082bSbellard int bdrv_is_read_only(BlockDriverState *bs)
3759b338082bSbellard {
3760b338082bSbellard     return bs->read_only;
3761b338082bSbellard }
3762b338082bSbellard 
3763985a03b0Sths int bdrv_is_sg(BlockDriverState *bs)
3764985a03b0Sths {
3765985a03b0Sths     return bs->sg;
3766985a03b0Sths }
3767985a03b0Sths 
3768e900a7b7SChristoph Hellwig int bdrv_enable_write_cache(BlockDriverState *bs)
3769e900a7b7SChristoph Hellwig {
3770e900a7b7SChristoph Hellwig     return bs->enable_write_cache;
3771e900a7b7SChristoph Hellwig }
3772e900a7b7SChristoph Hellwig 
3773425b0148SPaolo Bonzini void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3774425b0148SPaolo Bonzini {
3775425b0148SPaolo Bonzini     bs->enable_write_cache = wce;
377655b110f2SJeff Cody 
377755b110f2SJeff Cody     /* so a reopen() will preserve wce */
377855b110f2SJeff Cody     if (wce) {
377955b110f2SJeff Cody         bs->open_flags |= BDRV_O_CACHE_WB;
378055b110f2SJeff Cody     } else {
378155b110f2SJeff Cody         bs->open_flags &= ~BDRV_O_CACHE_WB;
378255b110f2SJeff Cody     }
3783425b0148SPaolo Bonzini }
3784425b0148SPaolo Bonzini 
3785ea2384d3Sbellard int bdrv_is_encrypted(BlockDriverState *bs)
3786ea2384d3Sbellard {
3787ea2384d3Sbellard     if (bs->backing_hd && bs->backing_hd->encrypted)
3788ea2384d3Sbellard         return 1;
3789ea2384d3Sbellard     return bs->encrypted;
3790ea2384d3Sbellard }
3791ea2384d3Sbellard 
3792c0f4ce77Saliguori int bdrv_key_required(BlockDriverState *bs)
3793c0f4ce77Saliguori {
3794c0f4ce77Saliguori     BlockDriverState *backing_hd = bs->backing_hd;
3795c0f4ce77Saliguori 
3796c0f4ce77Saliguori     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3797c0f4ce77Saliguori         return 1;
3798c0f4ce77Saliguori     return (bs->encrypted && !bs->valid_key);
3799c0f4ce77Saliguori }
3800c0f4ce77Saliguori 
3801ea2384d3Sbellard int bdrv_set_key(BlockDriverState *bs, const char *key)
3802ea2384d3Sbellard {
3803ea2384d3Sbellard     int ret;
3804ea2384d3Sbellard     if (bs->backing_hd && bs->backing_hd->encrypted) {
3805ea2384d3Sbellard         ret = bdrv_set_key(bs->backing_hd, key);
3806ea2384d3Sbellard         if (ret < 0)
3807ea2384d3Sbellard             return ret;
3808ea2384d3Sbellard         if (!bs->encrypted)
3809ea2384d3Sbellard             return 0;
3810ea2384d3Sbellard     }
3811fd04a2aeSShahar Havivi     if (!bs->encrypted) {
3812fd04a2aeSShahar Havivi         return -EINVAL;
3813fd04a2aeSShahar Havivi     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3814fd04a2aeSShahar Havivi         return -ENOMEDIUM;
3815fd04a2aeSShahar Havivi     }
3816c0f4ce77Saliguori     ret = bs->drv->bdrv_set_key(bs, key);
3817bb5fc20fSaliguori     if (ret < 0) {
3818bb5fc20fSaliguori         bs->valid_key = 0;
3819bb5fc20fSaliguori     } else if (!bs->valid_key) {
3820bb5fc20fSaliguori         bs->valid_key = 1;
3821a7f53e26SMarkus Armbruster         if (bs->blk) {
3822bb5fc20fSaliguori             /* call the change callback now, we skipped it on open */
3823a7f53e26SMarkus Armbruster             blk_dev_change_media_cb(bs->blk, true);
3824a7f53e26SMarkus Armbruster         }
3825bb5fc20fSaliguori     }
3826c0f4ce77Saliguori     return ret;
3827ea2384d3Sbellard }
3828ea2384d3Sbellard 
38294d2855a3SMarkus Armbruster /*
38304d2855a3SMarkus Armbruster  * Provide an encryption key for @bs.
38314d2855a3SMarkus Armbruster  * If @key is non-null:
38324d2855a3SMarkus Armbruster  *     If @bs is not encrypted, fail.
38334d2855a3SMarkus Armbruster  *     Else if the key is invalid, fail.
38344d2855a3SMarkus Armbruster  *     Else set @bs's key to @key, replacing the existing key, if any.
38354d2855a3SMarkus Armbruster  * If @key is null:
38364d2855a3SMarkus Armbruster  *     If @bs is encrypted and still lacks a key, fail.
38374d2855a3SMarkus Armbruster  *     Else do nothing.
38384d2855a3SMarkus Armbruster  * On failure, store an error object through @errp if non-null.
38394d2855a3SMarkus Armbruster  */
38404d2855a3SMarkus Armbruster void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
38414d2855a3SMarkus Armbruster {
38424d2855a3SMarkus Armbruster     if (key) {
38434d2855a3SMarkus Armbruster         if (!bdrv_is_encrypted(bs)) {
384481e5f78aSAlberto Garcia             error_setg(errp, "Node '%s' is not encrypted",
384581e5f78aSAlberto Garcia                       bdrv_get_device_or_node_name(bs));
38464d2855a3SMarkus Armbruster         } else if (bdrv_set_key(bs, key) < 0) {
38474d2855a3SMarkus Armbruster             error_set(errp, QERR_INVALID_PASSWORD);
38484d2855a3SMarkus Armbruster         }
38494d2855a3SMarkus Armbruster     } else {
38504d2855a3SMarkus Armbruster         if (bdrv_key_required(bs)) {
3851b1ca6391SMarkus Armbruster             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3852b1ca6391SMarkus Armbruster                       "'%s' (%s) is encrypted",
385381e5f78aSAlberto Garcia                       bdrv_get_device_or_node_name(bs),
38544d2855a3SMarkus Armbruster                       bdrv_get_encrypted_filename(bs));
38554d2855a3SMarkus Armbruster         }
38564d2855a3SMarkus Armbruster     }
38574d2855a3SMarkus Armbruster }
38584d2855a3SMarkus Armbruster 
3859f8d6bba1SMarkus Armbruster const char *bdrv_get_format_name(BlockDriverState *bs)
3860ea2384d3Sbellard {
3861f8d6bba1SMarkus Armbruster     return bs->drv ? bs->drv->format_name : NULL;
3862ea2384d3Sbellard }
3863ea2384d3Sbellard 
3864ada42401SStefan Hajnoczi static int qsort_strcmp(const void *a, const void *b)
3865ada42401SStefan Hajnoczi {
3866ada42401SStefan Hajnoczi     return strcmp(a, b);
3867ada42401SStefan Hajnoczi }
3868ada42401SStefan Hajnoczi 
3869ea2384d3Sbellard void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
3870ea2384d3Sbellard                          void *opaque)
3871ea2384d3Sbellard {
3872ea2384d3Sbellard     BlockDriver *drv;
3873e855e4fbSJeff Cody     int count = 0;
3874ada42401SStefan Hajnoczi     int i;
3875e855e4fbSJeff Cody     const char **formats = NULL;
3876ea2384d3Sbellard 
38778a22f02aSStefan Hajnoczi     QLIST_FOREACH(drv, &bdrv_drivers, list) {
3878e855e4fbSJeff Cody         if (drv->format_name) {
3879e855e4fbSJeff Cody             bool found = false;
3880e855e4fbSJeff Cody             int i = count;
3881e855e4fbSJeff Cody             while (formats && i && !found) {
3882e855e4fbSJeff Cody                 found = !strcmp(formats[--i], drv->format_name);
3883e855e4fbSJeff Cody             }
3884e855e4fbSJeff Cody 
3885e855e4fbSJeff Cody             if (!found) {
38865839e53bSMarkus Armbruster                 formats = g_renew(const char *, formats, count + 1);
3887e855e4fbSJeff Cody                 formats[count++] = drv->format_name;
3888ea2384d3Sbellard             }
3889ea2384d3Sbellard         }
3890e855e4fbSJeff Cody     }
3891ada42401SStefan Hajnoczi 
3892ada42401SStefan Hajnoczi     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3893ada42401SStefan Hajnoczi 
3894ada42401SStefan Hajnoczi     for (i = 0; i < count; i++) {
3895ada42401SStefan Hajnoczi         it(opaque, formats[i]);
3896ada42401SStefan Hajnoczi     }
3897ada42401SStefan Hajnoczi 
3898e855e4fbSJeff Cody     g_free(formats);
3899e855e4fbSJeff Cody }
3900ea2384d3Sbellard 
3901dc364f4cSBenoît Canet /* This function is to find a node in the bs graph */
3902dc364f4cSBenoît Canet BlockDriverState *bdrv_find_node(const char *node_name)
3903dc364f4cSBenoît Canet {
3904dc364f4cSBenoît Canet     BlockDriverState *bs;
3905dc364f4cSBenoît Canet 
3906dc364f4cSBenoît Canet     assert(node_name);
3907dc364f4cSBenoît Canet 
3908dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3909dc364f4cSBenoît Canet         if (!strcmp(node_name, bs->node_name)) {
3910dc364f4cSBenoît Canet             return bs;
3911dc364f4cSBenoît Canet         }
3912dc364f4cSBenoît Canet     }
3913dc364f4cSBenoît Canet     return NULL;
3914dc364f4cSBenoît Canet }
3915dc364f4cSBenoît Canet 
3916c13163fbSBenoît Canet /* Put this QMP function here so it can access the static graph_bdrv_states. */
3917d5a8ee60SAlberto Garcia BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
3918c13163fbSBenoît Canet {
3919c13163fbSBenoît Canet     BlockDeviceInfoList *list, *entry;
3920c13163fbSBenoît Canet     BlockDriverState *bs;
3921c13163fbSBenoît Canet 
3922c13163fbSBenoît Canet     list = NULL;
3923c13163fbSBenoît Canet     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3924d5a8ee60SAlberto Garcia         BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
3925d5a8ee60SAlberto Garcia         if (!info) {
3926d5a8ee60SAlberto Garcia             qapi_free_BlockDeviceInfoList(list);
3927d5a8ee60SAlberto Garcia             return NULL;
3928d5a8ee60SAlberto Garcia         }
3929c13163fbSBenoît Canet         entry = g_malloc0(sizeof(*entry));
3930d5a8ee60SAlberto Garcia         entry->value = info;
3931c13163fbSBenoît Canet         entry->next = list;
3932c13163fbSBenoît Canet         list = entry;
3933c13163fbSBenoît Canet     }
3934c13163fbSBenoît Canet 
3935c13163fbSBenoît Canet     return list;
3936c13163fbSBenoît Canet }
3937c13163fbSBenoît Canet 
393812d3ba82SBenoît Canet BlockDriverState *bdrv_lookup_bs(const char *device,
393912d3ba82SBenoît Canet                                  const char *node_name,
394012d3ba82SBenoît Canet                                  Error **errp)
394112d3ba82SBenoît Canet {
39427f06d47eSMarkus Armbruster     BlockBackend *blk;
39437f06d47eSMarkus Armbruster     BlockDriverState *bs;
394412d3ba82SBenoît Canet 
394512d3ba82SBenoît Canet     if (device) {
39467f06d47eSMarkus Armbruster         blk = blk_by_name(device);
394712d3ba82SBenoît Canet 
39487f06d47eSMarkus Armbruster         if (blk) {
39497f06d47eSMarkus Armbruster             return blk_bs(blk);
395012d3ba82SBenoît Canet         }
3951dd67fa50SBenoît Canet     }
395212d3ba82SBenoît Canet 
3953dd67fa50SBenoît Canet     if (node_name) {
395412d3ba82SBenoît Canet         bs = bdrv_find_node(node_name);
395512d3ba82SBenoît Canet 
3956dd67fa50SBenoît Canet         if (bs) {
3957dd67fa50SBenoît Canet             return bs;
3958dd67fa50SBenoît Canet         }
395912d3ba82SBenoît Canet     }
396012d3ba82SBenoît Canet 
3961dd67fa50SBenoît Canet     error_setg(errp, "Cannot find device=%s nor node_name=%s",
3962dd67fa50SBenoît Canet                      device ? device : "",
3963dd67fa50SBenoît Canet                      node_name ? node_name : "");
3964dd67fa50SBenoît Canet     return NULL;
396512d3ba82SBenoît Canet }
396612d3ba82SBenoît Canet 
39675a6684d2SJeff Cody /* If 'base' is in the same chain as 'top', return true. Otherwise,
39685a6684d2SJeff Cody  * return false.  If either argument is NULL, return false. */
39695a6684d2SJeff Cody bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
39705a6684d2SJeff Cody {
39715a6684d2SJeff Cody     while (top && top != base) {
39725a6684d2SJeff Cody         top = top->backing_hd;
39735a6684d2SJeff Cody     }
39745a6684d2SJeff Cody 
39755a6684d2SJeff Cody     return top != NULL;
39765a6684d2SJeff Cody }
39775a6684d2SJeff Cody 
397804df765aSFam Zheng BlockDriverState *bdrv_next_node(BlockDriverState *bs)
397904df765aSFam Zheng {
398004df765aSFam Zheng     if (!bs) {
398104df765aSFam Zheng         return QTAILQ_FIRST(&graph_bdrv_states);
398204df765aSFam Zheng     }
398304df765aSFam Zheng     return QTAILQ_NEXT(bs, node_list);
398404df765aSFam Zheng }
398504df765aSFam Zheng 
39862f399b0aSMarkus Armbruster BlockDriverState *bdrv_next(BlockDriverState *bs)
39872f399b0aSMarkus Armbruster {
39882f399b0aSMarkus Armbruster     if (!bs) {
39892f399b0aSMarkus Armbruster         return QTAILQ_FIRST(&bdrv_states);
39902f399b0aSMarkus Armbruster     }
3991dc364f4cSBenoît Canet     return QTAILQ_NEXT(bs, device_list);
39922f399b0aSMarkus Armbruster }
39932f399b0aSMarkus Armbruster 
399420a9e77dSFam Zheng const char *bdrv_get_node_name(const BlockDriverState *bs)
399520a9e77dSFam Zheng {
399620a9e77dSFam Zheng     return bs->node_name;
399720a9e77dSFam Zheng }
399820a9e77dSFam Zheng 
39997f06d47eSMarkus Armbruster /* TODO check what callers really want: bs->node_name or blk_name() */
4000bfb197e0SMarkus Armbruster const char *bdrv_get_device_name(const BlockDriverState *bs)
4001ea2384d3Sbellard {
4002bfb197e0SMarkus Armbruster     return bs->blk ? blk_name(bs->blk) : "";
4003ea2384d3Sbellard }
4004ea2384d3Sbellard 
40059b2aa84fSAlberto Garcia /* This can be used to identify nodes that might not have a device
40069b2aa84fSAlberto Garcia  * name associated. Since node and device names live in the same
40079b2aa84fSAlberto Garcia  * namespace, the result is unambiguous. The exception is if both are
40089b2aa84fSAlberto Garcia  * absent, then this returns an empty (non-null) string. */
40099b2aa84fSAlberto Garcia const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
40109b2aa84fSAlberto Garcia {
40119b2aa84fSAlberto Garcia     return bs->blk ? blk_name(bs->blk) : bs->node_name;
40129b2aa84fSAlberto Garcia }
40139b2aa84fSAlberto Garcia 
4014c8433287SMarkus Armbruster int bdrv_get_flags(BlockDriverState *bs)
4015c8433287SMarkus Armbruster {
4016c8433287SMarkus Armbruster     return bs->open_flags;
4017c8433287SMarkus Armbruster }
4018c8433287SMarkus Armbruster 
4019f0f0fdfeSKevin Wolf int bdrv_flush_all(void)
4020c6ca28d6Saliguori {
40214f5472cbSStefan Hajnoczi     BlockDriverState *bs = NULL;
4022f0f0fdfeSKevin Wolf     int result = 0;
4023c6ca28d6Saliguori 
40244f5472cbSStefan Hajnoczi     while ((bs = bdrv_next(bs))) {
4025ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
4026ed78cda3SStefan Hajnoczi         int ret;
4027ed78cda3SStefan Hajnoczi 
4028ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
4029ed78cda3SStefan Hajnoczi         ret = bdrv_flush(bs);
4030f0f0fdfeSKevin Wolf         if (ret < 0 && !result) {
4031f0f0fdfeSKevin Wolf             result = ret;
4032c6ca28d6Saliguori         }
4033ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
40341b7bdbc1SStefan Hajnoczi     }
4035c6ca28d6Saliguori 
4036f0f0fdfeSKevin Wolf     return result;
4037f0f0fdfeSKevin Wolf }
4038f0f0fdfeSKevin Wolf 
40393ac21627SPeter Lieven int bdrv_has_zero_init_1(BlockDriverState *bs)
40403ac21627SPeter Lieven {
40413ac21627SPeter Lieven     return 1;
40423ac21627SPeter Lieven }
40433ac21627SPeter Lieven 
4044f2feebbdSKevin Wolf int bdrv_has_zero_init(BlockDriverState *bs)
4045f2feebbdSKevin Wolf {
4046f2feebbdSKevin Wolf     assert(bs->drv);
4047f2feebbdSKevin Wolf 
404811212d8fSPaolo Bonzini     /* If BS is a copy on write image, it is initialized to
404911212d8fSPaolo Bonzini        the contents of the base image, which may not be zeroes.  */
405011212d8fSPaolo Bonzini     if (bs->backing_hd) {
405111212d8fSPaolo Bonzini         return 0;
405211212d8fSPaolo Bonzini     }
4053336c1c12SKevin Wolf     if (bs->drv->bdrv_has_zero_init) {
4054336c1c12SKevin Wolf         return bs->drv->bdrv_has_zero_init(bs);
4055f2feebbdSKevin Wolf     }
4056f2feebbdSKevin Wolf 
40573ac21627SPeter Lieven     /* safe default */
40583ac21627SPeter Lieven     return 0;
4059f2feebbdSKevin Wolf }
4060f2feebbdSKevin Wolf 
40614ce78691SPeter Lieven bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
40624ce78691SPeter Lieven {
40634ce78691SPeter Lieven     BlockDriverInfo bdi;
40644ce78691SPeter Lieven 
40654ce78691SPeter Lieven     if (bs->backing_hd) {
40664ce78691SPeter Lieven         return false;
40674ce78691SPeter Lieven     }
40684ce78691SPeter Lieven 
40694ce78691SPeter Lieven     if (bdrv_get_info(bs, &bdi) == 0) {
40704ce78691SPeter Lieven         return bdi.unallocated_blocks_are_zero;
40714ce78691SPeter Lieven     }
40724ce78691SPeter Lieven 
40734ce78691SPeter Lieven     return false;
40744ce78691SPeter Lieven }
40754ce78691SPeter Lieven 
40764ce78691SPeter Lieven bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
40774ce78691SPeter Lieven {
40784ce78691SPeter Lieven     BlockDriverInfo bdi;
40794ce78691SPeter Lieven 
40804ce78691SPeter Lieven     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
40814ce78691SPeter Lieven         return false;
40824ce78691SPeter Lieven     }
40834ce78691SPeter Lieven 
40844ce78691SPeter Lieven     if (bdrv_get_info(bs, &bdi) == 0) {
40854ce78691SPeter Lieven         return bdi.can_write_zeroes_with_unmap;
40864ce78691SPeter Lieven     }
40874ce78691SPeter Lieven 
40884ce78691SPeter Lieven     return false;
40894ce78691SPeter Lieven }
40904ce78691SPeter Lieven 
4091b6b8a333SPaolo Bonzini typedef struct BdrvCoGetBlockStatusData {
4092376ae3f1SStefan Hajnoczi     BlockDriverState *bs;
4093b35b2bbaSMiroslav Rezanina     BlockDriverState *base;
4094376ae3f1SStefan Hajnoczi     int64_t sector_num;
4095376ae3f1SStefan Hajnoczi     int nb_sectors;
4096376ae3f1SStefan Hajnoczi     int *pnum;
4097b6b8a333SPaolo Bonzini     int64_t ret;
4098376ae3f1SStefan Hajnoczi     bool done;
4099b6b8a333SPaolo Bonzini } BdrvCoGetBlockStatusData;
4100376ae3f1SStefan Hajnoczi 
4101f58c7b35Sths /*
4102705be728SFam Zheng  * Returns the allocation status of the specified sectors.
4103705be728SFam Zheng  * Drivers not implementing the functionality are assumed to not support
4104705be728SFam Zheng  * backing files, hence all their sectors are reported as allocated.
4105f58c7b35Sths  *
4106bd9533e3SStefan Hajnoczi  * If 'sector_num' is beyond the end of the disk image the return value is 0
4107bd9533e3SStefan Hajnoczi  * and 'pnum' is set to 0.
4108bd9533e3SStefan Hajnoczi  *
4109f58c7b35Sths  * 'pnum' is set to the number of sectors (including and immediately following
4110f58c7b35Sths  * the specified sector) that are known to be in the same
4111f58c7b35Sths  * allocated/unallocated state.
4112f58c7b35Sths  *
4113bd9533e3SStefan Hajnoczi  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
4114bd9533e3SStefan Hajnoczi  * beyond the end of the disk image it will be clamped.
4115f58c7b35Sths  */
4116b6b8a333SPaolo Bonzini static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4117bdad13b9SPaolo Bonzini                                                      int64_t sector_num,
4118060f51c9SStefan Hajnoczi                                                      int nb_sectors, int *pnum)
4119f58c7b35Sths {
412030a7f2fcSMarkus Armbruster     int64_t total_sectors;
4121f58c7b35Sths     int64_t n;
41225daa74a6SPaolo Bonzini     int64_t ret, ret2;
4123bd9533e3SStefan Hajnoczi 
412430a7f2fcSMarkus Armbruster     total_sectors = bdrv_nb_sectors(bs);
412530a7f2fcSMarkus Armbruster     if (total_sectors < 0) {
412630a7f2fcSMarkus Armbruster         return total_sectors;
4127617ccb46SPaolo Bonzini     }
4128617ccb46SPaolo Bonzini 
412930a7f2fcSMarkus Armbruster     if (sector_num >= total_sectors) {
41306aebab14SStefan Hajnoczi         *pnum = 0;
41316aebab14SStefan Hajnoczi         return 0;
41326aebab14SStefan Hajnoczi     }
4133bd9533e3SStefan Hajnoczi 
413430a7f2fcSMarkus Armbruster     n = total_sectors - sector_num;
4135bd9533e3SStefan Hajnoczi     if (n < nb_sectors) {
4136bd9533e3SStefan Hajnoczi         nb_sectors = n;
4137bd9533e3SStefan Hajnoczi     }
4138bd9533e3SStefan Hajnoczi 
4139b6b8a333SPaolo Bonzini     if (!bs->drv->bdrv_co_get_block_status) {
4140bd9533e3SStefan Hajnoczi         *pnum = nb_sectors;
4141e88ae226SKevin Wolf         ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
4142918e92d7SPaolo Bonzini         if (bs->drv->protocol_name) {
4143918e92d7SPaolo Bonzini             ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4144918e92d7SPaolo Bonzini         }
4145918e92d7SPaolo Bonzini         return ret;
41466aebab14SStefan Hajnoczi     }
41476aebab14SStefan Hajnoczi 
4148415b5b01SPaolo Bonzini     ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4149415b5b01SPaolo Bonzini     if (ret < 0) {
41503e0a233dSPeter Lieven         *pnum = 0;
4151415b5b01SPaolo Bonzini         return ret;
4152415b5b01SPaolo Bonzini     }
4153415b5b01SPaolo Bonzini 
415492bc50a5SPeter Lieven     if (ret & BDRV_BLOCK_RAW) {
415592bc50a5SPeter Lieven         assert(ret & BDRV_BLOCK_OFFSET_VALID);
415692bc50a5SPeter Lieven         return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
415792bc50a5SPeter Lieven                                      *pnum, pnum);
415892bc50a5SPeter Lieven     }
415992bc50a5SPeter Lieven 
4160e88ae226SKevin Wolf     if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4161e88ae226SKevin Wolf         ret |= BDRV_BLOCK_ALLOCATED;
4162e88ae226SKevin Wolf     }
4163e88ae226SKevin Wolf 
4164c3d86884SPeter Lieven     if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4165c3d86884SPeter Lieven         if (bdrv_unallocated_blocks_are_zero(bs)) {
4166415b5b01SPaolo Bonzini             ret |= BDRV_BLOCK_ZERO;
41671f9db224SPeter Lieven         } else if (bs->backing_hd) {
4168f0ad5712SPaolo Bonzini             BlockDriverState *bs2 = bs->backing_hd;
416930a7f2fcSMarkus Armbruster             int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
417030a7f2fcSMarkus Armbruster             if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
4171f0ad5712SPaolo Bonzini                 ret |= BDRV_BLOCK_ZERO;
4172f0ad5712SPaolo Bonzini             }
4173f0ad5712SPaolo Bonzini         }
4174415b5b01SPaolo Bonzini     }
41755daa74a6SPaolo Bonzini 
41765daa74a6SPaolo Bonzini     if (bs->file &&
41775daa74a6SPaolo Bonzini         (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
41785daa74a6SPaolo Bonzini         (ret & BDRV_BLOCK_OFFSET_VALID)) {
417959c9a95fSMax Reitz         int file_pnum;
418059c9a95fSMax Reitz 
41815daa74a6SPaolo Bonzini         ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
418259c9a95fSMax Reitz                                         *pnum, &file_pnum);
41835daa74a6SPaolo Bonzini         if (ret2 >= 0) {
41845daa74a6SPaolo Bonzini             /* Ignore errors.  This is just providing extra information, it
41855daa74a6SPaolo Bonzini              * is useful but not necessary.
41865daa74a6SPaolo Bonzini              */
418759c9a95fSMax Reitz             if (!file_pnum) {
418859c9a95fSMax Reitz                 /* !file_pnum indicates an offset at or beyond the EOF; it is
418959c9a95fSMax Reitz                  * perfectly valid for the format block driver to point to such
419059c9a95fSMax Reitz                  * offsets, so catch it and mark everything as zero */
419159c9a95fSMax Reitz                 ret |= BDRV_BLOCK_ZERO;
419259c9a95fSMax Reitz             } else {
419359c9a95fSMax Reitz                 /* Limit request to the range reported by the protocol driver */
419459c9a95fSMax Reitz                 *pnum = file_pnum;
41955daa74a6SPaolo Bonzini                 ret |= (ret2 & BDRV_BLOCK_ZERO);
41965daa74a6SPaolo Bonzini             }
41975daa74a6SPaolo Bonzini         }
419859c9a95fSMax Reitz     }
41995daa74a6SPaolo Bonzini 
4200415b5b01SPaolo Bonzini     return ret;
4201060f51c9SStefan Hajnoczi }
4202060f51c9SStefan Hajnoczi 
4203b6b8a333SPaolo Bonzini /* Coroutine wrapper for bdrv_get_block_status() */
4204b6b8a333SPaolo Bonzini static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
4205060f51c9SStefan Hajnoczi {
4206b6b8a333SPaolo Bonzini     BdrvCoGetBlockStatusData *data = opaque;
4207060f51c9SStefan Hajnoczi     BlockDriverState *bs = data->bs;
4208060f51c9SStefan Hajnoczi 
4209b6b8a333SPaolo Bonzini     data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4210060f51c9SStefan Hajnoczi                                          data->pnum);
4211060f51c9SStefan Hajnoczi     data->done = true;
4212060f51c9SStefan Hajnoczi }
4213060f51c9SStefan Hajnoczi 
4214060f51c9SStefan Hajnoczi /*
4215b6b8a333SPaolo Bonzini  * Synchronous wrapper around bdrv_co_get_block_status().
4216060f51c9SStefan Hajnoczi  *
4217b6b8a333SPaolo Bonzini  * See bdrv_co_get_block_status() for details.
4218060f51c9SStefan Hajnoczi  */
4219b6b8a333SPaolo Bonzini int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4220b6b8a333SPaolo Bonzini                               int nb_sectors, int *pnum)
4221060f51c9SStefan Hajnoczi {
4222376ae3f1SStefan Hajnoczi     Coroutine *co;
4223b6b8a333SPaolo Bonzini     BdrvCoGetBlockStatusData data = {
4224376ae3f1SStefan Hajnoczi         .bs = bs,
4225376ae3f1SStefan Hajnoczi         .sector_num = sector_num,
4226376ae3f1SStefan Hajnoczi         .nb_sectors = nb_sectors,
4227376ae3f1SStefan Hajnoczi         .pnum = pnum,
4228376ae3f1SStefan Hajnoczi         .done = false,
4229376ae3f1SStefan Hajnoczi     };
4230376ae3f1SStefan Hajnoczi 
4231bdad13b9SPaolo Bonzini     if (qemu_in_coroutine()) {
4232bdad13b9SPaolo Bonzini         /* Fast-path if already in coroutine context */
4233b6b8a333SPaolo Bonzini         bdrv_get_block_status_co_entry(&data);
4234bdad13b9SPaolo Bonzini     } else {
42352572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
42362572b37aSStefan Hajnoczi 
4237b6b8a333SPaolo Bonzini         co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
4238376ae3f1SStefan Hajnoczi         qemu_coroutine_enter(co, &data);
4239376ae3f1SStefan Hajnoczi         while (!data.done) {
42402572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
4241376ae3f1SStefan Hajnoczi         }
4242bdad13b9SPaolo Bonzini     }
4243376ae3f1SStefan Hajnoczi     return data.ret;
4244376ae3f1SStefan Hajnoczi }
4245f58c7b35Sths 
4246b6b8a333SPaolo Bonzini int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4247b6b8a333SPaolo Bonzini                                    int nb_sectors, int *pnum)
4248b6b8a333SPaolo Bonzini {
42494333bb71SPaolo Bonzini     int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
42504333bb71SPaolo Bonzini     if (ret < 0) {
42514333bb71SPaolo Bonzini         return ret;
42524333bb71SPaolo Bonzini     }
425301fb2705SKevin Wolf     return !!(ret & BDRV_BLOCK_ALLOCATED);
4254b6b8a333SPaolo Bonzini }
4255b6b8a333SPaolo Bonzini 
4256188a7bbfSPaolo Bonzini /*
4257188a7bbfSPaolo Bonzini  * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4258188a7bbfSPaolo Bonzini  *
4259188a7bbfSPaolo Bonzini  * Return true if the given sector is allocated in any image between
4260188a7bbfSPaolo Bonzini  * BASE and TOP (inclusive).  BASE can be NULL to check if the given
4261188a7bbfSPaolo Bonzini  * sector is allocated in any image of the chain.  Return false otherwise.
4262188a7bbfSPaolo Bonzini  *
4263188a7bbfSPaolo Bonzini  * 'pnum' is set to the number of sectors (including and immediately following
4264188a7bbfSPaolo Bonzini  *  the specified sector) that are known to be in the same
4265188a7bbfSPaolo Bonzini  *  allocated/unallocated state.
4266188a7bbfSPaolo Bonzini  *
4267188a7bbfSPaolo Bonzini  */
42684f578637SPaolo Bonzini int bdrv_is_allocated_above(BlockDriverState *top,
4269188a7bbfSPaolo Bonzini                             BlockDriverState *base,
4270188a7bbfSPaolo Bonzini                             int64_t sector_num,
4271188a7bbfSPaolo Bonzini                             int nb_sectors, int *pnum)
4272188a7bbfSPaolo Bonzini {
4273188a7bbfSPaolo Bonzini     BlockDriverState *intermediate;
4274188a7bbfSPaolo Bonzini     int ret, n = nb_sectors;
4275188a7bbfSPaolo Bonzini 
4276188a7bbfSPaolo Bonzini     intermediate = top;
4277188a7bbfSPaolo Bonzini     while (intermediate && intermediate != base) {
4278188a7bbfSPaolo Bonzini         int pnum_inter;
4279bdad13b9SPaolo Bonzini         ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4280188a7bbfSPaolo Bonzini                                 &pnum_inter);
4281188a7bbfSPaolo Bonzini         if (ret < 0) {
4282188a7bbfSPaolo Bonzini             return ret;
4283188a7bbfSPaolo Bonzini         } else if (ret) {
4284188a7bbfSPaolo Bonzini             *pnum = pnum_inter;
4285188a7bbfSPaolo Bonzini             return 1;
4286188a7bbfSPaolo Bonzini         }
4287188a7bbfSPaolo Bonzini 
4288188a7bbfSPaolo Bonzini         /*
4289188a7bbfSPaolo Bonzini          * [sector_num, nb_sectors] is unallocated on top but intermediate
4290188a7bbfSPaolo Bonzini          * might have
4291188a7bbfSPaolo Bonzini          *
4292188a7bbfSPaolo Bonzini          * [sector_num+x, nr_sectors] allocated.
4293188a7bbfSPaolo Bonzini          */
429463ba17d3SVishvananda Ishaya         if (n > pnum_inter &&
429563ba17d3SVishvananda Ishaya             (intermediate == top ||
429663ba17d3SVishvananda Ishaya              sector_num + pnum_inter < intermediate->total_sectors)) {
4297188a7bbfSPaolo Bonzini             n = pnum_inter;
4298188a7bbfSPaolo Bonzini         }
4299188a7bbfSPaolo Bonzini 
4300188a7bbfSPaolo Bonzini         intermediate = intermediate->backing_hd;
4301188a7bbfSPaolo Bonzini     }
4302188a7bbfSPaolo Bonzini 
4303188a7bbfSPaolo Bonzini     *pnum = n;
4304188a7bbfSPaolo Bonzini     return 0;
4305188a7bbfSPaolo Bonzini }
4306188a7bbfSPaolo Bonzini 
4307045df330Saliguori const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4308045df330Saliguori {
4309045df330Saliguori     if (bs->backing_hd && bs->backing_hd->encrypted)
4310045df330Saliguori         return bs->backing_file;
4311045df330Saliguori     else if (bs->encrypted)
4312045df330Saliguori         return bs->filename;
4313045df330Saliguori     else
4314045df330Saliguori         return NULL;
4315045df330Saliguori }
4316045df330Saliguori 
431783f64091Sbellard void bdrv_get_backing_filename(BlockDriverState *bs,
431883f64091Sbellard                                char *filename, int filename_size)
431983f64091Sbellard {
432083f64091Sbellard     pstrcpy(filename, filename_size, bs->backing_file);
432183f64091Sbellard }
432283f64091Sbellard 
4323faea38e7Sbellard int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
4324faea38e7Sbellard                           const uint8_t *buf, int nb_sectors)
4325faea38e7Sbellard {
4326faea38e7Sbellard     BlockDriver *drv = bs->drv;
4327b9c64947SMax Reitz     int ret;
4328b9c64947SMax Reitz 
4329b9c64947SMax Reitz     if (!drv) {
433019cb3738Sbellard         return -ENOMEDIUM;
4331b9c64947SMax Reitz     }
4332b9c64947SMax Reitz     if (!drv->bdrv_write_compressed) {
4333faea38e7Sbellard         return -ENOTSUP;
4334b9c64947SMax Reitz     }
4335b9c64947SMax Reitz     ret = bdrv_check_request(bs, sector_num, nb_sectors);
4336b9c64947SMax Reitz     if (ret < 0) {
4337b9c64947SMax Reitz         return ret;
4338b9c64947SMax Reitz     }
43397cd1e32aSlirans@il.ibm.com 
4340e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
43417cd1e32aSlirans@il.ibm.com 
4342faea38e7Sbellard     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4343faea38e7Sbellard }
4344faea38e7Sbellard 
4345faea38e7Sbellard int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4346faea38e7Sbellard {
4347faea38e7Sbellard     BlockDriver *drv = bs->drv;
4348faea38e7Sbellard     if (!drv)
434919cb3738Sbellard         return -ENOMEDIUM;
4350faea38e7Sbellard     if (!drv->bdrv_get_info)
4351faea38e7Sbellard         return -ENOTSUP;
4352faea38e7Sbellard     memset(bdi, 0, sizeof(*bdi));
4353faea38e7Sbellard     return drv->bdrv_get_info(bs, bdi);
4354faea38e7Sbellard }
4355faea38e7Sbellard 
4356eae041feSMax Reitz ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4357eae041feSMax Reitz {
4358eae041feSMax Reitz     BlockDriver *drv = bs->drv;
4359eae041feSMax Reitz     if (drv && drv->bdrv_get_specific_info) {
4360eae041feSMax Reitz         return drv->bdrv_get_specific_info(bs);
4361eae041feSMax Reitz     }
4362eae041feSMax Reitz     return NULL;
4363eae041feSMax Reitz }
4364eae041feSMax Reitz 
436545566e9cSChristoph Hellwig int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
436645566e9cSChristoph Hellwig                       int64_t pos, int size)
4367178e08a5Saliguori {
4368cf8074b3SKevin Wolf     QEMUIOVector qiov;
4369cf8074b3SKevin Wolf     struct iovec iov = {
4370cf8074b3SKevin Wolf         .iov_base   = (void *) buf,
4371cf8074b3SKevin Wolf         .iov_len    = size,
4372cf8074b3SKevin Wolf     };
4373cf8074b3SKevin Wolf 
4374cf8074b3SKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
4375cf8074b3SKevin Wolf     return bdrv_writev_vmstate(bs, &qiov, pos);
4376cf8074b3SKevin Wolf }
4377cf8074b3SKevin Wolf 
4378cf8074b3SKevin Wolf int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4379cf8074b3SKevin Wolf {
4380178e08a5Saliguori     BlockDriver *drv = bs->drv;
4381cf8074b3SKevin Wolf 
4382cf8074b3SKevin Wolf     if (!drv) {
4383178e08a5Saliguori         return -ENOMEDIUM;
4384cf8074b3SKevin Wolf     } else if (drv->bdrv_save_vmstate) {
4385cf8074b3SKevin Wolf         return drv->bdrv_save_vmstate(bs, qiov, pos);
4386cf8074b3SKevin Wolf     } else if (bs->file) {
4387cf8074b3SKevin Wolf         return bdrv_writev_vmstate(bs->file, qiov, pos);
4388cf8074b3SKevin Wolf     }
4389cf8074b3SKevin Wolf 
43907cdb1f6dSMORITA Kazutaka     return -ENOTSUP;
4391178e08a5Saliguori }
4392178e08a5Saliguori 
439345566e9cSChristoph Hellwig int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
439445566e9cSChristoph Hellwig                       int64_t pos, int size)
4395178e08a5Saliguori {
4396178e08a5Saliguori     BlockDriver *drv = bs->drv;
4397178e08a5Saliguori     if (!drv)
4398178e08a5Saliguori         return -ENOMEDIUM;
43997cdb1f6dSMORITA Kazutaka     if (drv->bdrv_load_vmstate)
440045566e9cSChristoph Hellwig         return drv->bdrv_load_vmstate(bs, buf, pos, size);
44017cdb1f6dSMORITA Kazutaka     if (bs->file)
44027cdb1f6dSMORITA Kazutaka         return bdrv_load_vmstate(bs->file, buf, pos, size);
44037cdb1f6dSMORITA Kazutaka     return -ENOTSUP;
4404178e08a5Saliguori }
4405178e08a5Saliguori 
44068b9b0cc2SKevin Wolf void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
44078b9b0cc2SKevin Wolf {
4408bf736fe3SKevin Wolf     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
44098b9b0cc2SKevin Wolf         return;
44108b9b0cc2SKevin Wolf     }
44118b9b0cc2SKevin Wolf 
4412bf736fe3SKevin Wolf     bs->drv->bdrv_debug_event(bs, event);
441341c695c7SKevin Wolf }
44148b9b0cc2SKevin Wolf 
441541c695c7SKevin Wolf int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
441641c695c7SKevin Wolf                           const char *tag)
441741c695c7SKevin Wolf {
441841c695c7SKevin Wolf     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
441941c695c7SKevin Wolf         bs = bs->file;
442041c695c7SKevin Wolf     }
442141c695c7SKevin Wolf 
442241c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
442341c695c7SKevin Wolf         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
442441c695c7SKevin Wolf     }
442541c695c7SKevin Wolf 
442641c695c7SKevin Wolf     return -ENOTSUP;
442741c695c7SKevin Wolf }
442841c695c7SKevin Wolf 
44294cc70e93SFam Zheng int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
44304cc70e93SFam Zheng {
44314cc70e93SFam Zheng     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
44324cc70e93SFam Zheng         bs = bs->file;
44334cc70e93SFam Zheng     }
44344cc70e93SFam Zheng 
44354cc70e93SFam Zheng     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
44364cc70e93SFam Zheng         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
44374cc70e93SFam Zheng     }
44384cc70e93SFam Zheng 
44394cc70e93SFam Zheng     return -ENOTSUP;
44404cc70e93SFam Zheng }
44414cc70e93SFam Zheng 
444241c695c7SKevin Wolf int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
444341c695c7SKevin Wolf {
4444938789eaSMax Reitz     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
444541c695c7SKevin Wolf         bs = bs->file;
444641c695c7SKevin Wolf     }
444741c695c7SKevin Wolf 
444841c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
444941c695c7SKevin Wolf         return bs->drv->bdrv_debug_resume(bs, tag);
445041c695c7SKevin Wolf     }
445141c695c7SKevin Wolf 
445241c695c7SKevin Wolf     return -ENOTSUP;
445341c695c7SKevin Wolf }
445441c695c7SKevin Wolf 
445541c695c7SKevin Wolf bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
445641c695c7SKevin Wolf {
445741c695c7SKevin Wolf     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
445841c695c7SKevin Wolf         bs = bs->file;
445941c695c7SKevin Wolf     }
446041c695c7SKevin Wolf 
446141c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
446241c695c7SKevin Wolf         return bs->drv->bdrv_debug_is_suspended(bs, tag);
446341c695c7SKevin Wolf     }
446441c695c7SKevin Wolf 
446541c695c7SKevin Wolf     return false;
44668b9b0cc2SKevin Wolf }
44678b9b0cc2SKevin Wolf 
4468199630b6SBlue Swirl int bdrv_is_snapshot(BlockDriverState *bs)
4469199630b6SBlue Swirl {
4470199630b6SBlue Swirl     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4471199630b6SBlue Swirl }
4472199630b6SBlue Swirl 
4473b1b1d783SJeff Cody /* backing_file can either be relative, or absolute, or a protocol.  If it is
4474b1b1d783SJeff Cody  * relative, it must be relative to the chain.  So, passing in bs->filename
4475b1b1d783SJeff Cody  * from a BDS as backing_file should not be done, as that may be relative to
4476b1b1d783SJeff Cody  * the CWD rather than the chain. */
4477e8a6bb9cSMarcelo Tosatti BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4478e8a6bb9cSMarcelo Tosatti         const char *backing_file)
4479e8a6bb9cSMarcelo Tosatti {
4480b1b1d783SJeff Cody     char *filename_full = NULL;
4481b1b1d783SJeff Cody     char *backing_file_full = NULL;
4482b1b1d783SJeff Cody     char *filename_tmp = NULL;
4483b1b1d783SJeff Cody     int is_protocol = 0;
4484b1b1d783SJeff Cody     BlockDriverState *curr_bs = NULL;
4485b1b1d783SJeff Cody     BlockDriverState *retval = NULL;
4486b1b1d783SJeff Cody 
4487b1b1d783SJeff Cody     if (!bs || !bs->drv || !backing_file) {
4488e8a6bb9cSMarcelo Tosatti         return NULL;
4489e8a6bb9cSMarcelo Tosatti     }
4490e8a6bb9cSMarcelo Tosatti 
4491b1b1d783SJeff Cody     filename_full     = g_malloc(PATH_MAX);
4492b1b1d783SJeff Cody     backing_file_full = g_malloc(PATH_MAX);
4493b1b1d783SJeff Cody     filename_tmp      = g_malloc(PATH_MAX);
4494b1b1d783SJeff Cody 
4495b1b1d783SJeff Cody     is_protocol = path_has_protocol(backing_file);
4496b1b1d783SJeff Cody 
4497b1b1d783SJeff Cody     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4498b1b1d783SJeff Cody 
4499b1b1d783SJeff Cody         /* If either of the filename paths is actually a protocol, then
4500b1b1d783SJeff Cody          * compare unmodified paths; otherwise make paths relative */
4501b1b1d783SJeff Cody         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4502b1b1d783SJeff Cody             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4503b1b1d783SJeff Cody                 retval = curr_bs->backing_hd;
4504b1b1d783SJeff Cody                 break;
4505b1b1d783SJeff Cody             }
4506e8a6bb9cSMarcelo Tosatti         } else {
4507b1b1d783SJeff Cody             /* If not an absolute filename path, make it relative to the current
4508b1b1d783SJeff Cody              * image's filename path */
4509b1b1d783SJeff Cody             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4510b1b1d783SJeff Cody                          backing_file);
4511b1b1d783SJeff Cody 
4512b1b1d783SJeff Cody             /* We are going to compare absolute pathnames */
4513b1b1d783SJeff Cody             if (!realpath(filename_tmp, filename_full)) {
4514b1b1d783SJeff Cody                 continue;
4515b1b1d783SJeff Cody             }
4516b1b1d783SJeff Cody 
4517b1b1d783SJeff Cody             /* We need to make sure the backing filename we are comparing against
4518b1b1d783SJeff Cody              * is relative to the current image filename (or absolute) */
4519b1b1d783SJeff Cody             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4520b1b1d783SJeff Cody                          curr_bs->backing_file);
4521b1b1d783SJeff Cody 
4522b1b1d783SJeff Cody             if (!realpath(filename_tmp, backing_file_full)) {
4523b1b1d783SJeff Cody                 continue;
4524b1b1d783SJeff Cody             }
4525b1b1d783SJeff Cody 
4526b1b1d783SJeff Cody             if (strcmp(backing_file_full, filename_full) == 0) {
4527b1b1d783SJeff Cody                 retval = curr_bs->backing_hd;
4528b1b1d783SJeff Cody                 break;
4529b1b1d783SJeff Cody             }
4530e8a6bb9cSMarcelo Tosatti         }
4531e8a6bb9cSMarcelo Tosatti     }
4532e8a6bb9cSMarcelo Tosatti 
4533b1b1d783SJeff Cody     g_free(filename_full);
4534b1b1d783SJeff Cody     g_free(backing_file_full);
4535b1b1d783SJeff Cody     g_free(filename_tmp);
4536b1b1d783SJeff Cody     return retval;
4537e8a6bb9cSMarcelo Tosatti }
4538e8a6bb9cSMarcelo Tosatti 
4539f198fd1cSBenoît Canet int bdrv_get_backing_file_depth(BlockDriverState *bs)
4540f198fd1cSBenoît Canet {
4541f198fd1cSBenoît Canet     if (!bs->drv) {
4542f198fd1cSBenoît Canet         return 0;
4543f198fd1cSBenoît Canet     }
4544f198fd1cSBenoît Canet 
4545f198fd1cSBenoît Canet     if (!bs->backing_hd) {
4546f198fd1cSBenoît Canet         return 0;
4547f198fd1cSBenoît Canet     }
4548f198fd1cSBenoît Canet 
4549f198fd1cSBenoît Canet     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4550f198fd1cSBenoît Canet }
4551f198fd1cSBenoît Canet 
4552ea2384d3Sbellard /**************************************************************/
455383f64091Sbellard /* async I/Os */
4554ea2384d3Sbellard 
45557c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4556f141eafeSaliguori                            QEMUIOVector *qiov, int nb_sectors,
4557097310b5SMarkus Armbruster                            BlockCompletionFunc *cb, void *opaque)
4558ea2384d3Sbellard {
4559bbf0a440SStefan Hajnoczi     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4560bbf0a440SStefan Hajnoczi 
4561d20d9b7cSPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
45628c5873d6SStefan Hajnoczi                                  cb, opaque, false);
456383f64091Sbellard }
456483f64091Sbellard 
45657c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4566f141eafeSaliguori                             QEMUIOVector *qiov, int nb_sectors,
4567097310b5SMarkus Armbruster                             BlockCompletionFunc *cb, void *opaque)
45687674e7bfSbellard {
4569bbf0a440SStefan Hajnoczi     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4570bbf0a440SStefan Hajnoczi 
4571d20d9b7cSPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
45728c5873d6SStefan Hajnoczi                                  cb, opaque, true);
457383f64091Sbellard }
457483f64091Sbellard 
45757c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4576d5ef94d4SPaolo Bonzini         int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4577097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4578d5ef94d4SPaolo Bonzini {
4579d5ef94d4SPaolo Bonzini     trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4580d5ef94d4SPaolo Bonzini 
4581d5ef94d4SPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4582d5ef94d4SPaolo Bonzini                                  BDRV_REQ_ZERO_WRITE | flags,
4583d5ef94d4SPaolo Bonzini                                  cb, opaque, true);
4584d5ef94d4SPaolo Bonzini }
4585d5ef94d4SPaolo Bonzini 
458640b4f539SKevin Wolf 
458740b4f539SKevin Wolf typedef struct MultiwriteCB {
458840b4f539SKevin Wolf     int error;
458940b4f539SKevin Wolf     int num_requests;
459040b4f539SKevin Wolf     int num_callbacks;
459140b4f539SKevin Wolf     struct {
4592097310b5SMarkus Armbruster         BlockCompletionFunc *cb;
459340b4f539SKevin Wolf         void *opaque;
459440b4f539SKevin Wolf         QEMUIOVector *free_qiov;
459540b4f539SKevin Wolf     } callbacks[];
459640b4f539SKevin Wolf } MultiwriteCB;
459740b4f539SKevin Wolf 
459840b4f539SKevin Wolf static void multiwrite_user_cb(MultiwriteCB *mcb)
459940b4f539SKevin Wolf {
460040b4f539SKevin Wolf     int i;
460140b4f539SKevin Wolf 
460240b4f539SKevin Wolf     for (i = 0; i < mcb->num_callbacks; i++) {
460340b4f539SKevin Wolf         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
46041e1ea48dSStefan Hajnoczi         if (mcb->callbacks[i].free_qiov) {
46051e1ea48dSStefan Hajnoczi             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
46061e1ea48dSStefan Hajnoczi         }
46077267c094SAnthony Liguori         g_free(mcb->callbacks[i].free_qiov);
460840b4f539SKevin Wolf     }
460940b4f539SKevin Wolf }
461040b4f539SKevin Wolf 
461140b4f539SKevin Wolf static void multiwrite_cb(void *opaque, int ret)
461240b4f539SKevin Wolf {
461340b4f539SKevin Wolf     MultiwriteCB *mcb = opaque;
461440b4f539SKevin Wolf 
46156d519a5fSStefan Hajnoczi     trace_multiwrite_cb(mcb, ret);
46166d519a5fSStefan Hajnoczi 
4617cb6d3ca0SKevin Wolf     if (ret < 0 && !mcb->error) {
461840b4f539SKevin Wolf         mcb->error = ret;
461940b4f539SKevin Wolf     }
462040b4f539SKevin Wolf 
462140b4f539SKevin Wolf     mcb->num_requests--;
462240b4f539SKevin Wolf     if (mcb->num_requests == 0) {
462340b4f539SKevin Wolf         multiwrite_user_cb(mcb);
46247267c094SAnthony Liguori         g_free(mcb);
462540b4f539SKevin Wolf     }
462640b4f539SKevin Wolf }
462740b4f539SKevin Wolf 
462840b4f539SKevin Wolf static int multiwrite_req_compare(const void *a, const void *b)
462940b4f539SKevin Wolf {
463077be4366SChristoph Hellwig     const BlockRequest *req1 = a, *req2 = b;
463177be4366SChristoph Hellwig 
463277be4366SChristoph Hellwig     /*
463377be4366SChristoph Hellwig      * Note that we can't simply subtract req2->sector from req1->sector
463477be4366SChristoph Hellwig      * here as that could overflow the return value.
463577be4366SChristoph Hellwig      */
463677be4366SChristoph Hellwig     if (req1->sector > req2->sector) {
463777be4366SChristoph Hellwig         return 1;
463877be4366SChristoph Hellwig     } else if (req1->sector < req2->sector) {
463977be4366SChristoph Hellwig         return -1;
464077be4366SChristoph Hellwig     } else {
464177be4366SChristoph Hellwig         return 0;
464277be4366SChristoph Hellwig     }
464340b4f539SKevin Wolf }
464440b4f539SKevin Wolf 
464540b4f539SKevin Wolf /*
464640b4f539SKevin Wolf  * Takes a bunch of requests and tries to merge them. Returns the number of
464740b4f539SKevin Wolf  * requests that remain after merging.
464840b4f539SKevin Wolf  */
464940b4f539SKevin Wolf static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
465040b4f539SKevin Wolf     int num_reqs, MultiwriteCB *mcb)
465140b4f539SKevin Wolf {
465240b4f539SKevin Wolf     int i, outidx;
465340b4f539SKevin Wolf 
465440b4f539SKevin Wolf     // Sort requests by start sector
465540b4f539SKevin Wolf     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
465640b4f539SKevin Wolf 
465740b4f539SKevin Wolf     // Check if adjacent requests touch the same clusters. If so, combine them,
465840b4f539SKevin Wolf     // filling up gaps with zero sectors.
465940b4f539SKevin Wolf     outidx = 0;
466040b4f539SKevin Wolf     for (i = 1; i < num_reqs; i++) {
466140b4f539SKevin Wolf         int merge = 0;
466240b4f539SKevin Wolf         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
466340b4f539SKevin Wolf 
4664b6a127a1SPaolo Bonzini         // Handle exactly sequential writes and overlapping writes.
466540b4f539SKevin Wolf         if (reqs[i].sector <= oldreq_last) {
466640b4f539SKevin Wolf             merge = 1;
466740b4f539SKevin Wolf         }
466840b4f539SKevin Wolf 
4669e2a305fbSChristoph Hellwig         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4670e2a305fbSChristoph Hellwig             merge = 0;
4671e2a305fbSChristoph Hellwig         }
4672e2a305fbSChristoph Hellwig 
46736c5a42acSPeter Lieven         if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
46746c5a42acSPeter Lieven             reqs[i].nb_sectors > bs->bl.max_transfer_length) {
46756c5a42acSPeter Lieven             merge = 0;
46766c5a42acSPeter Lieven         }
46776c5a42acSPeter Lieven 
467840b4f539SKevin Wolf         if (merge) {
467940b4f539SKevin Wolf             size_t size;
46807267c094SAnthony Liguori             QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
468140b4f539SKevin Wolf             qemu_iovec_init(qiov,
468240b4f539SKevin Wolf                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
468340b4f539SKevin Wolf 
468440b4f539SKevin Wolf             // Add the first request to the merged one. If the requests are
468540b4f539SKevin Wolf             // overlapping, drop the last sectors of the first request.
468640b4f539SKevin Wolf             size = (reqs[i].sector - reqs[outidx].sector) << 9;
46871b093c48SMichael Tokarev             qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
468840b4f539SKevin Wolf 
4689b6a127a1SPaolo Bonzini             // We should need to add any zeros between the two requests
4690b6a127a1SPaolo Bonzini             assert (reqs[i].sector <= oldreq_last);
469140b4f539SKevin Wolf 
469240b4f539SKevin Wolf             // Add the second request
46931b093c48SMichael Tokarev             qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
469440b4f539SKevin Wolf 
4695391827ebSStefan Hajnoczi             // Add tail of first request, if necessary
4696391827ebSStefan Hajnoczi             if (qiov->size < reqs[outidx].qiov->size) {
4697391827ebSStefan Hajnoczi                 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4698391827ebSStefan Hajnoczi                                   reqs[outidx].qiov->size - qiov->size);
4699391827ebSStefan Hajnoczi             }
4700391827ebSStefan Hajnoczi 
4701cbf1dff2SKevin Wolf             reqs[outidx].nb_sectors = qiov->size >> 9;
470240b4f539SKevin Wolf             reqs[outidx].qiov = qiov;
470340b4f539SKevin Wolf 
470440b4f539SKevin Wolf             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
470540b4f539SKevin Wolf         } else {
470640b4f539SKevin Wolf             outidx++;
470740b4f539SKevin Wolf             reqs[outidx].sector     = reqs[i].sector;
470840b4f539SKevin Wolf             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
470940b4f539SKevin Wolf             reqs[outidx].qiov       = reqs[i].qiov;
471040b4f539SKevin Wolf         }
471140b4f539SKevin Wolf     }
471240b4f539SKevin Wolf 
4713f4564d53SPeter Lieven     block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
4714f4564d53SPeter Lieven 
471540b4f539SKevin Wolf     return outidx + 1;
471640b4f539SKevin Wolf }
471740b4f539SKevin Wolf 
471840b4f539SKevin Wolf /*
471940b4f539SKevin Wolf  * Submit multiple AIO write requests at once.
472040b4f539SKevin Wolf  *
472140b4f539SKevin Wolf  * On success, the function returns 0 and all requests in the reqs array have
472240b4f539SKevin Wolf  * been submitted. In error case this function returns -1, and any of the
472340b4f539SKevin Wolf  * requests may or may not be submitted yet. In particular, this means that the
472440b4f539SKevin Wolf  * callback will be called for some of the requests, for others it won't. The
472540b4f539SKevin Wolf  * caller must check the error field of the BlockRequest to wait for the right
472640b4f539SKevin Wolf  * callbacks (if error != 0, no callback will be called).
472740b4f539SKevin Wolf  *
472840b4f539SKevin Wolf  * The implementation may modify the contents of the reqs array, e.g. to merge
472940b4f539SKevin Wolf  * requests. However, the fields opaque and error are left unmodified as they
473040b4f539SKevin Wolf  * are used to signal failure for a single request to the caller.
473140b4f539SKevin Wolf  */
473240b4f539SKevin Wolf int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
473340b4f539SKevin Wolf {
473440b4f539SKevin Wolf     MultiwriteCB *mcb;
473540b4f539SKevin Wolf     int i;
473640b4f539SKevin Wolf 
4737301db7c2SRyan Harper     /* don't submit writes if we don't have a medium */
4738301db7c2SRyan Harper     if (bs->drv == NULL) {
4739301db7c2SRyan Harper         for (i = 0; i < num_reqs; i++) {
4740301db7c2SRyan Harper             reqs[i].error = -ENOMEDIUM;
4741301db7c2SRyan Harper         }
4742301db7c2SRyan Harper         return -1;
4743301db7c2SRyan Harper     }
4744301db7c2SRyan Harper 
474540b4f539SKevin Wolf     if (num_reqs == 0) {
474640b4f539SKevin Wolf         return 0;
474740b4f539SKevin Wolf     }
474840b4f539SKevin Wolf 
474940b4f539SKevin Wolf     // Create MultiwriteCB structure
47507267c094SAnthony Liguori     mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
475140b4f539SKevin Wolf     mcb->num_requests = 0;
475240b4f539SKevin Wolf     mcb->num_callbacks = num_reqs;
475340b4f539SKevin Wolf 
475440b4f539SKevin Wolf     for (i = 0; i < num_reqs; i++) {
475540b4f539SKevin Wolf         mcb->callbacks[i].cb = reqs[i].cb;
475640b4f539SKevin Wolf         mcb->callbacks[i].opaque = reqs[i].opaque;
475740b4f539SKevin Wolf     }
475840b4f539SKevin Wolf 
475940b4f539SKevin Wolf     // Check for mergable requests
476040b4f539SKevin Wolf     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
476140b4f539SKevin Wolf 
47626d519a5fSStefan Hajnoczi     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
47636d519a5fSStefan Hajnoczi 
4764df9309fbSPaolo Bonzini     /* Run the aio requests. */
4765df9309fbSPaolo Bonzini     mcb->num_requests = num_reqs;
476640b4f539SKevin Wolf     for (i = 0; i < num_reqs; i++) {
4767d20d9b7cSPaolo Bonzini         bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4768d20d9b7cSPaolo Bonzini                               reqs[i].nb_sectors, reqs[i].flags,
4769d20d9b7cSPaolo Bonzini                               multiwrite_cb, mcb,
4770d20d9b7cSPaolo Bonzini                               true);
477140b4f539SKevin Wolf     }
477240b4f539SKevin Wolf 
477340b4f539SKevin Wolf     return 0;
477440b4f539SKevin Wolf }
477540b4f539SKevin Wolf 
47767c84b1b8SMarkus Armbruster void bdrv_aio_cancel(BlockAIOCB *acb)
477783f64091Sbellard {
477802c50efeSFam Zheng     qemu_aio_ref(acb);
477902c50efeSFam Zheng     bdrv_aio_cancel_async(acb);
478002c50efeSFam Zheng     while (acb->refcnt > 1) {
478102c50efeSFam Zheng         if (acb->aiocb_info->get_aio_context) {
478202c50efeSFam Zheng             aio_poll(acb->aiocb_info->get_aio_context(acb), true);
478302c50efeSFam Zheng         } else if (acb->bs) {
478402c50efeSFam Zheng             aio_poll(bdrv_get_aio_context(acb->bs), true);
478502c50efeSFam Zheng         } else {
478602c50efeSFam Zheng             abort();
478702c50efeSFam Zheng         }
478802c50efeSFam Zheng     }
47898007429aSFam Zheng     qemu_aio_unref(acb);
479002c50efeSFam Zheng }
479102c50efeSFam Zheng 
479202c50efeSFam Zheng /* Async version of aio cancel. The caller is not blocked if the acb implements
479302c50efeSFam Zheng  * cancel_async, otherwise we do nothing and let the request normally complete.
479402c50efeSFam Zheng  * In either case the completion callback must be called. */
47957c84b1b8SMarkus Armbruster void bdrv_aio_cancel_async(BlockAIOCB *acb)
479602c50efeSFam Zheng {
479702c50efeSFam Zheng     if (acb->aiocb_info->cancel_async) {
479802c50efeSFam Zheng         acb->aiocb_info->cancel_async(acb);
479902c50efeSFam Zheng     }
480083f64091Sbellard }
480183f64091Sbellard 
480283f64091Sbellard /**************************************************************/
480383f64091Sbellard /* async block device emulation */
480483f64091Sbellard 
48057c84b1b8SMarkus Armbruster typedef struct BlockAIOCBSync {
48067c84b1b8SMarkus Armbruster     BlockAIOCB common;
4807c16b5a2cSChristoph Hellwig     QEMUBH *bh;
4808c16b5a2cSChristoph Hellwig     int ret;
4809c16b5a2cSChristoph Hellwig     /* vector translation state */
4810c16b5a2cSChristoph Hellwig     QEMUIOVector *qiov;
4811c16b5a2cSChristoph Hellwig     uint8_t *bounce;
4812c16b5a2cSChristoph Hellwig     int is_write;
48137c84b1b8SMarkus Armbruster } BlockAIOCBSync;
4814c16b5a2cSChristoph Hellwig 
4815d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_aiocb_info = {
48167c84b1b8SMarkus Armbruster     .aiocb_size         = sizeof(BlockAIOCBSync),
4817c16b5a2cSChristoph Hellwig };
4818c16b5a2cSChristoph Hellwig 
481983f64091Sbellard static void bdrv_aio_bh_cb(void *opaque)
4820beac80cdSbellard {
48217c84b1b8SMarkus Armbruster     BlockAIOCBSync *acb = opaque;
4822f141eafeSaliguori 
4823857d4f46SKevin Wolf     if (!acb->is_write && acb->ret >= 0) {
482403396148SMichael Tokarev         qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
4825857d4f46SKevin Wolf     }
4826ceb42de8Saliguori     qemu_vfree(acb->bounce);
4827ce1a14dcSpbrook     acb->common.cb(acb->common.opaque, acb->ret);
48286a7ad299SDor Laor     qemu_bh_delete(acb->bh);
482936afc451SAvi Kivity     acb->bh = NULL;
48308007429aSFam Zheng     qemu_aio_unref(acb);
4831beac80cdSbellard }
4832beac80cdSbellard 
48337c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4834f141eafeSaliguori                                       int64_t sector_num,
4835f141eafeSaliguori                                       QEMUIOVector *qiov,
4836f141eafeSaliguori                                       int nb_sectors,
4837097310b5SMarkus Armbruster                                       BlockCompletionFunc *cb,
4838f141eafeSaliguori                                       void *opaque,
4839f141eafeSaliguori                                       int is_write)
4840f141eafeSaliguori 
4841ea2384d3Sbellard {
48427c84b1b8SMarkus Armbruster     BlockAIOCBSync *acb;
484383f64091Sbellard 
4844d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
4845f141eafeSaliguori     acb->is_write = is_write;
4846f141eafeSaliguori     acb->qiov = qiov;
4847857d4f46SKevin Wolf     acb->bounce = qemu_try_blockalign(bs, qiov->size);
48482572b37aSStefan Hajnoczi     acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
4849f141eafeSaliguori 
4850857d4f46SKevin Wolf     if (acb->bounce == NULL) {
4851857d4f46SKevin Wolf         acb->ret = -ENOMEM;
4852857d4f46SKevin Wolf     } else if (is_write) {
4853d5e6b161SMichael Tokarev         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
48541ed20acfSStefan Hajnoczi         acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
4855f141eafeSaliguori     } else {
48561ed20acfSStefan Hajnoczi         acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
4857f141eafeSaliguori     }
4858f141eafeSaliguori 
4859ce1a14dcSpbrook     qemu_bh_schedule(acb->bh);
4860f141eafeSaliguori 
4861ce1a14dcSpbrook     return &acb->common;
48627a6cba61Spbrook }
48637a6cba61Spbrook 
48647c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4865f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4866097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
486783f64091Sbellard {
4868f141eafeSaliguori     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
486983f64091Sbellard }
487083f64091Sbellard 
48717c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4872f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4873097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4874f141eafeSaliguori {
4875f141eafeSaliguori     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4876f141eafeSaliguori }
4877f141eafeSaliguori 
487868485420SKevin Wolf 
48797c84b1b8SMarkus Armbruster typedef struct BlockAIOCBCoroutine {
48807c84b1b8SMarkus Armbruster     BlockAIOCB common;
488168485420SKevin Wolf     BlockRequest req;
488268485420SKevin Wolf     bool is_write;
48830b5a2445SPaolo Bonzini     bool need_bh;
4884d318aea9SKevin Wolf     bool *done;
488568485420SKevin Wolf     QEMUBH* bh;
48867c84b1b8SMarkus Armbruster } BlockAIOCBCoroutine;
488768485420SKevin Wolf 
4888d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_co_aiocb_info = {
48897c84b1b8SMarkus Armbruster     .aiocb_size         = sizeof(BlockAIOCBCoroutine),
489068485420SKevin Wolf };
489168485420SKevin Wolf 
48920b5a2445SPaolo Bonzini static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
48930b5a2445SPaolo Bonzini {
48940b5a2445SPaolo Bonzini     if (!acb->need_bh) {
48950b5a2445SPaolo Bonzini         acb->common.cb(acb->common.opaque, acb->req.error);
48960b5a2445SPaolo Bonzini         qemu_aio_unref(acb);
48970b5a2445SPaolo Bonzini     }
48980b5a2445SPaolo Bonzini }
48990b5a2445SPaolo Bonzini 
490035246a68SPaolo Bonzini static void bdrv_co_em_bh(void *opaque)
490168485420SKevin Wolf {
49027c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
490368485420SKevin Wolf 
49040b5a2445SPaolo Bonzini     assert(!acb->need_bh);
490568485420SKevin Wolf     qemu_bh_delete(acb->bh);
49060b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
49070b5a2445SPaolo Bonzini }
49080b5a2445SPaolo Bonzini 
49090b5a2445SPaolo Bonzini static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
49100b5a2445SPaolo Bonzini {
49110b5a2445SPaolo Bonzini     acb->need_bh = false;
49120b5a2445SPaolo Bonzini     if (acb->req.error != -EINPROGRESS) {
49130b5a2445SPaolo Bonzini         BlockDriverState *bs = acb->common.bs;
49140b5a2445SPaolo Bonzini 
49150b5a2445SPaolo Bonzini         acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
49160b5a2445SPaolo Bonzini         qemu_bh_schedule(acb->bh);
49170b5a2445SPaolo Bonzini     }
491868485420SKevin Wolf }
491968485420SKevin Wolf 
4920b2a61371SStefan Hajnoczi /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4921b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque)
4922b2a61371SStefan Hajnoczi {
49237c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
4924b2a61371SStefan Hajnoczi     BlockDriverState *bs = acb->common.bs;
4925b2a61371SStefan Hajnoczi 
4926b2a61371SStefan Hajnoczi     if (!acb->is_write) {
4927b2a61371SStefan Hajnoczi         acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
4928d20d9b7cSPaolo Bonzini             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4929b2a61371SStefan Hajnoczi     } else {
4930b2a61371SStefan Hajnoczi         acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
4931d20d9b7cSPaolo Bonzini             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4932b2a61371SStefan Hajnoczi     }
4933b2a61371SStefan Hajnoczi 
49340b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
4935b2a61371SStefan Hajnoczi }
4936b2a61371SStefan Hajnoczi 
49377c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
493868485420SKevin Wolf                                          int64_t sector_num,
493968485420SKevin Wolf                                          QEMUIOVector *qiov,
494068485420SKevin Wolf                                          int nb_sectors,
4941d20d9b7cSPaolo Bonzini                                          BdrvRequestFlags flags,
4942097310b5SMarkus Armbruster                                          BlockCompletionFunc *cb,
494368485420SKevin Wolf                                          void *opaque,
49448c5873d6SStefan Hajnoczi                                          bool is_write)
494568485420SKevin Wolf {
494668485420SKevin Wolf     Coroutine *co;
49477c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
494868485420SKevin Wolf 
4949d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
49500b5a2445SPaolo Bonzini     acb->need_bh = true;
49510b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
495268485420SKevin Wolf     acb->req.sector = sector_num;
495368485420SKevin Wolf     acb->req.nb_sectors = nb_sectors;
495468485420SKevin Wolf     acb->req.qiov = qiov;
4955d20d9b7cSPaolo Bonzini     acb->req.flags = flags;
495668485420SKevin Wolf     acb->is_write = is_write;
495768485420SKevin Wolf 
49588c5873d6SStefan Hajnoczi     co = qemu_coroutine_create(bdrv_co_do_rw);
495968485420SKevin Wolf     qemu_coroutine_enter(co, acb);
496068485420SKevin Wolf 
49610b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
496268485420SKevin Wolf     return &acb->common;
496368485420SKevin Wolf }
496468485420SKevin Wolf 
496507f07615SPaolo Bonzini static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
4966b2e12bc6SChristoph Hellwig {
49677c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
496807f07615SPaolo Bonzini     BlockDriverState *bs = acb->common.bs;
4969b2e12bc6SChristoph Hellwig 
497007f07615SPaolo Bonzini     acb->req.error = bdrv_co_flush(bs);
49710b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
4972b2e12bc6SChristoph Hellwig }
4973b2e12bc6SChristoph Hellwig 
49747c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
4975097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
4976016f5cf6SAlexander Graf {
497707f07615SPaolo Bonzini     trace_bdrv_aio_flush(bs, opaque);
4978016f5cf6SAlexander Graf 
497907f07615SPaolo Bonzini     Coroutine *co;
49807c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
4981016f5cf6SAlexander Graf 
4982d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
49830b5a2445SPaolo Bonzini     acb->need_bh = true;
49840b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
4985d318aea9SKevin Wolf 
498607f07615SPaolo Bonzini     co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
498707f07615SPaolo Bonzini     qemu_coroutine_enter(co, acb);
4988016f5cf6SAlexander Graf 
49890b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
4990016f5cf6SAlexander Graf     return &acb->common;
4991016f5cf6SAlexander Graf }
4992016f5cf6SAlexander Graf 
49934265d620SPaolo Bonzini static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
49944265d620SPaolo Bonzini {
49957c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb = opaque;
49964265d620SPaolo Bonzini     BlockDriverState *bs = acb->common.bs;
49974265d620SPaolo Bonzini 
49984265d620SPaolo Bonzini     acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
49990b5a2445SPaolo Bonzini     bdrv_co_complete(acb);
50004265d620SPaolo Bonzini }
50014265d620SPaolo Bonzini 
50027c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
50034265d620SPaolo Bonzini         int64_t sector_num, int nb_sectors,
5004097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
50054265d620SPaolo Bonzini {
50064265d620SPaolo Bonzini     Coroutine *co;
50077c84b1b8SMarkus Armbruster     BlockAIOCBCoroutine *acb;
50084265d620SPaolo Bonzini 
50094265d620SPaolo Bonzini     trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
50104265d620SPaolo Bonzini 
5011d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
50120b5a2445SPaolo Bonzini     acb->need_bh = true;
50130b5a2445SPaolo Bonzini     acb->req.error = -EINPROGRESS;
50144265d620SPaolo Bonzini     acb->req.sector = sector_num;
50154265d620SPaolo Bonzini     acb->req.nb_sectors = nb_sectors;
50164265d620SPaolo Bonzini     co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
50174265d620SPaolo Bonzini     qemu_coroutine_enter(co, acb);
50184265d620SPaolo Bonzini 
50190b5a2445SPaolo Bonzini     bdrv_co_maybe_schedule_bh(acb);
50204265d620SPaolo Bonzini     return &acb->common;
50214265d620SPaolo Bonzini }
50224265d620SPaolo Bonzini 
5023ea2384d3Sbellard void bdrv_init(void)
5024ea2384d3Sbellard {
50255efa9d5aSAnthony Liguori     module_call_init(MODULE_INIT_BLOCK);
5026ea2384d3Sbellard }
5027ce1a14dcSpbrook 
5028eb852011SMarkus Armbruster void bdrv_init_with_whitelist(void)
5029eb852011SMarkus Armbruster {
5030eb852011SMarkus Armbruster     use_bdrv_whitelist = 1;
5031eb852011SMarkus Armbruster     bdrv_init();
5032eb852011SMarkus Armbruster }
5033eb852011SMarkus Armbruster 
5034d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
5035097310b5SMarkus Armbruster                    BlockCompletionFunc *cb, void *opaque)
50366bbff9a0Saliguori {
50377c84b1b8SMarkus Armbruster     BlockAIOCB *acb;
5038ce1a14dcSpbrook 
5039d7331bedSStefan Hajnoczi     acb = g_slice_alloc(aiocb_info->aiocb_size);
5040d7331bedSStefan Hajnoczi     acb->aiocb_info = aiocb_info;
5041ce1a14dcSpbrook     acb->bs = bs;
5042ce1a14dcSpbrook     acb->cb = cb;
5043ce1a14dcSpbrook     acb->opaque = opaque;
5044f197fe2bSFam Zheng     acb->refcnt = 1;
5045ce1a14dcSpbrook     return acb;
5046ce1a14dcSpbrook }
5047ce1a14dcSpbrook 
5048f197fe2bSFam Zheng void qemu_aio_ref(void *p)
5049f197fe2bSFam Zheng {
50507c84b1b8SMarkus Armbruster     BlockAIOCB *acb = p;
5051f197fe2bSFam Zheng     acb->refcnt++;
5052f197fe2bSFam Zheng }
5053f197fe2bSFam Zheng 
50548007429aSFam Zheng void qemu_aio_unref(void *p)
5055ce1a14dcSpbrook {
50567c84b1b8SMarkus Armbruster     BlockAIOCB *acb = p;
5057f197fe2bSFam Zheng     assert(acb->refcnt > 0);
5058f197fe2bSFam Zheng     if (--acb->refcnt == 0) {
5059d7331bedSStefan Hajnoczi         g_slice_free1(acb->aiocb_info->aiocb_size, acb);
5060ce1a14dcSpbrook     }
5061f197fe2bSFam Zheng }
506219cb3738Sbellard 
506319cb3738Sbellard /**************************************************************/
5064f9f05dc5SKevin Wolf /* Coroutine block device emulation */
5065f9f05dc5SKevin Wolf 
5066f9f05dc5SKevin Wolf typedef struct CoroutineIOCompletion {
5067f9f05dc5SKevin Wolf     Coroutine *coroutine;
5068f9f05dc5SKevin Wolf     int ret;
5069f9f05dc5SKevin Wolf } CoroutineIOCompletion;
5070f9f05dc5SKevin Wolf 
5071f9f05dc5SKevin Wolf static void bdrv_co_io_em_complete(void *opaque, int ret)
5072f9f05dc5SKevin Wolf {
5073f9f05dc5SKevin Wolf     CoroutineIOCompletion *co = opaque;
5074f9f05dc5SKevin Wolf 
5075f9f05dc5SKevin Wolf     co->ret = ret;
5076f9f05dc5SKevin Wolf     qemu_coroutine_enter(co->coroutine, NULL);
5077f9f05dc5SKevin Wolf }
5078f9f05dc5SKevin Wolf 
5079f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
5080f9f05dc5SKevin Wolf                                       int nb_sectors, QEMUIOVector *iov,
5081f9f05dc5SKevin Wolf                                       bool is_write)
5082f9f05dc5SKevin Wolf {
5083f9f05dc5SKevin Wolf     CoroutineIOCompletion co = {
5084f9f05dc5SKevin Wolf         .coroutine = qemu_coroutine_self(),
5085f9f05dc5SKevin Wolf     };
50867c84b1b8SMarkus Armbruster     BlockAIOCB *acb;
5087f9f05dc5SKevin Wolf 
5088f9f05dc5SKevin Wolf     if (is_write) {
5089a652d160SStefan Hajnoczi         acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
5090f9f05dc5SKevin Wolf                                        bdrv_co_io_em_complete, &co);
5091f9f05dc5SKevin Wolf     } else {
5092a652d160SStefan Hajnoczi         acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
5093f9f05dc5SKevin Wolf                                       bdrv_co_io_em_complete, &co);
5094f9f05dc5SKevin Wolf     }
5095f9f05dc5SKevin Wolf 
509659370aaaSStefan Hajnoczi     trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
5097f9f05dc5SKevin Wolf     if (!acb) {
5098f9f05dc5SKevin Wolf         return -EIO;
5099f9f05dc5SKevin Wolf     }
5100f9f05dc5SKevin Wolf     qemu_coroutine_yield();
5101f9f05dc5SKevin Wolf 
5102f9f05dc5SKevin Wolf     return co.ret;
5103f9f05dc5SKevin Wolf }
5104f9f05dc5SKevin Wolf 
5105f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
5106f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
5107f9f05dc5SKevin Wolf                                          QEMUIOVector *iov)
5108f9f05dc5SKevin Wolf {
5109f9f05dc5SKevin Wolf     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
5110f9f05dc5SKevin Wolf }
5111f9f05dc5SKevin Wolf 
5112f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
5113f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
5114f9f05dc5SKevin Wolf                                          QEMUIOVector *iov)
5115f9f05dc5SKevin Wolf {
5116f9f05dc5SKevin Wolf     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
5117f9f05dc5SKevin Wolf }
5118f9f05dc5SKevin Wolf 
511907f07615SPaolo Bonzini static void coroutine_fn bdrv_flush_co_entry(void *opaque)
5120e7a8a783SKevin Wolf {
512107f07615SPaolo Bonzini     RwCo *rwco = opaque;
512207f07615SPaolo Bonzini 
512307f07615SPaolo Bonzini     rwco->ret = bdrv_co_flush(rwco->bs);
512407f07615SPaolo Bonzini }
512507f07615SPaolo Bonzini 
512607f07615SPaolo Bonzini int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
512707f07615SPaolo Bonzini {
5128eb489bb1SKevin Wolf     int ret;
5129eb489bb1SKevin Wolf 
513029cdb251SPaolo Bonzini     if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
513107f07615SPaolo Bonzini         return 0;
5132eb489bb1SKevin Wolf     }
5133eb489bb1SKevin Wolf 
5134ca716364SKevin Wolf     /* Write back cached data to the OS even with cache=unsafe */
5135bf736fe3SKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
5136eb489bb1SKevin Wolf     if (bs->drv->bdrv_co_flush_to_os) {
5137eb489bb1SKevin Wolf         ret = bs->drv->bdrv_co_flush_to_os(bs);
5138eb489bb1SKevin Wolf         if (ret < 0) {
5139eb489bb1SKevin Wolf             return ret;
5140eb489bb1SKevin Wolf         }
5141eb489bb1SKevin Wolf     }
5142eb489bb1SKevin Wolf 
5143ca716364SKevin Wolf     /* But don't actually force it to the disk with cache=unsafe */
5144ca716364SKevin Wolf     if (bs->open_flags & BDRV_O_NO_FLUSH) {
5145d4c82329SKevin Wolf         goto flush_parent;
5146ca716364SKevin Wolf     }
5147ca716364SKevin Wolf 
5148bf736fe3SKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
5149eb489bb1SKevin Wolf     if (bs->drv->bdrv_co_flush_to_disk) {
515029cdb251SPaolo Bonzini         ret = bs->drv->bdrv_co_flush_to_disk(bs);
515107f07615SPaolo Bonzini     } else if (bs->drv->bdrv_aio_flush) {
51527c84b1b8SMarkus Armbruster         BlockAIOCB *acb;
5153e7a8a783SKevin Wolf         CoroutineIOCompletion co = {
5154e7a8a783SKevin Wolf             .coroutine = qemu_coroutine_self(),
5155e7a8a783SKevin Wolf         };
5156e7a8a783SKevin Wolf 
515707f07615SPaolo Bonzini         acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
515807f07615SPaolo Bonzini         if (acb == NULL) {
515929cdb251SPaolo Bonzini             ret = -EIO;
516007f07615SPaolo Bonzini         } else {
5161e7a8a783SKevin Wolf             qemu_coroutine_yield();
516229cdb251SPaolo Bonzini             ret = co.ret;
5163e7a8a783SKevin Wolf         }
516407f07615SPaolo Bonzini     } else {
516507f07615SPaolo Bonzini         /*
516607f07615SPaolo Bonzini          * Some block drivers always operate in either writethrough or unsafe
516707f07615SPaolo Bonzini          * mode and don't support bdrv_flush therefore. Usually qemu doesn't
516807f07615SPaolo Bonzini          * know how the server works (because the behaviour is hardcoded or
516907f07615SPaolo Bonzini          * depends on server-side configuration), so we can't ensure that
517007f07615SPaolo Bonzini          * everything is safe on disk. Returning an error doesn't work because
517107f07615SPaolo Bonzini          * that would break guests even if the server operates in writethrough
517207f07615SPaolo Bonzini          * mode.
517307f07615SPaolo Bonzini          *
517407f07615SPaolo Bonzini          * Let's hope the user knows what he's doing.
517507f07615SPaolo Bonzini          */
517629cdb251SPaolo Bonzini         ret = 0;
517707f07615SPaolo Bonzini     }
517829cdb251SPaolo Bonzini     if (ret < 0) {
517929cdb251SPaolo Bonzini         return ret;
518029cdb251SPaolo Bonzini     }
518129cdb251SPaolo Bonzini 
518229cdb251SPaolo Bonzini     /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
518329cdb251SPaolo Bonzini      * in the case of cache=unsafe, so there are no useless flushes.
518429cdb251SPaolo Bonzini      */
5185d4c82329SKevin Wolf flush_parent:
518629cdb251SPaolo Bonzini     return bdrv_co_flush(bs->file);
518707f07615SPaolo Bonzini }
518807f07615SPaolo Bonzini 
51895a8a30dbSKevin Wolf void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
51900f15423cSAnthony Liguori {
51915a8a30dbSKevin Wolf     Error *local_err = NULL;
51925a8a30dbSKevin Wolf     int ret;
51935a8a30dbSKevin Wolf 
51943456a8d1SKevin Wolf     if (!bs->drv)  {
51953456a8d1SKevin Wolf         return;
51960f15423cSAnthony Liguori     }
51973456a8d1SKevin Wolf 
51987ea2d269SAlexey Kardashevskiy     if (!(bs->open_flags & BDRV_O_INCOMING)) {
51997ea2d269SAlexey Kardashevskiy         return;
52007ea2d269SAlexey Kardashevskiy     }
52017ea2d269SAlexey Kardashevskiy     bs->open_flags &= ~BDRV_O_INCOMING;
52027ea2d269SAlexey Kardashevskiy 
52033456a8d1SKevin Wolf     if (bs->drv->bdrv_invalidate_cache) {
52045a8a30dbSKevin Wolf         bs->drv->bdrv_invalidate_cache(bs, &local_err);
52053456a8d1SKevin Wolf     } else if (bs->file) {
52065a8a30dbSKevin Wolf         bdrv_invalidate_cache(bs->file, &local_err);
52075a8a30dbSKevin Wolf     }
52085a8a30dbSKevin Wolf     if (local_err) {
52095a8a30dbSKevin Wolf         error_propagate(errp, local_err);
52105a8a30dbSKevin Wolf         return;
52113456a8d1SKevin Wolf     }
52123456a8d1SKevin Wolf 
52135a8a30dbSKevin Wolf     ret = refresh_total_sectors(bs, bs->total_sectors);
52145a8a30dbSKevin Wolf     if (ret < 0) {
52155a8a30dbSKevin Wolf         error_setg_errno(errp, -ret, "Could not refresh total sector count");
52165a8a30dbSKevin Wolf         return;
52175a8a30dbSKevin Wolf     }
52180f15423cSAnthony Liguori }
52190f15423cSAnthony Liguori 
52205a8a30dbSKevin Wolf void bdrv_invalidate_cache_all(Error **errp)
52210f15423cSAnthony Liguori {
52220f15423cSAnthony Liguori     BlockDriverState *bs;
52235a8a30dbSKevin Wolf     Error *local_err = NULL;
52240f15423cSAnthony Liguori 
5225dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5226ed78cda3SStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
5227ed78cda3SStefan Hajnoczi 
5228ed78cda3SStefan Hajnoczi         aio_context_acquire(aio_context);
52295a8a30dbSKevin Wolf         bdrv_invalidate_cache(bs, &local_err);
5230ed78cda3SStefan Hajnoczi         aio_context_release(aio_context);
52315a8a30dbSKevin Wolf         if (local_err) {
52325a8a30dbSKevin Wolf             error_propagate(errp, local_err);
52335a8a30dbSKevin Wolf             return;
52345a8a30dbSKevin Wolf         }
52350f15423cSAnthony Liguori     }
52360f15423cSAnthony Liguori }
52370f15423cSAnthony Liguori 
523807f07615SPaolo Bonzini int bdrv_flush(BlockDriverState *bs)
523907f07615SPaolo Bonzini {
524007f07615SPaolo Bonzini     Coroutine *co;
524107f07615SPaolo Bonzini     RwCo rwco = {
524207f07615SPaolo Bonzini         .bs = bs,
524307f07615SPaolo Bonzini         .ret = NOT_DONE,
524407f07615SPaolo Bonzini     };
524507f07615SPaolo Bonzini 
524607f07615SPaolo Bonzini     if (qemu_in_coroutine()) {
524707f07615SPaolo Bonzini         /* Fast-path if already in coroutine context */
524807f07615SPaolo Bonzini         bdrv_flush_co_entry(&rwco);
524907f07615SPaolo Bonzini     } else {
52502572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
52512572b37aSStefan Hajnoczi 
525207f07615SPaolo Bonzini         co = qemu_coroutine_create(bdrv_flush_co_entry);
525307f07615SPaolo Bonzini         qemu_coroutine_enter(co, &rwco);
525407f07615SPaolo Bonzini         while (rwco.ret == NOT_DONE) {
52552572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
525607f07615SPaolo Bonzini         }
525707f07615SPaolo Bonzini     }
525807f07615SPaolo Bonzini 
525907f07615SPaolo Bonzini     return rwco.ret;
526007f07615SPaolo Bonzini }
5261e7a8a783SKevin Wolf 
5262775aa8b6SKevin Wolf typedef struct DiscardCo {
5263775aa8b6SKevin Wolf     BlockDriverState *bs;
5264775aa8b6SKevin Wolf     int64_t sector_num;
5265775aa8b6SKevin Wolf     int nb_sectors;
5266775aa8b6SKevin Wolf     int ret;
5267775aa8b6SKevin Wolf } DiscardCo;
52684265d620SPaolo Bonzini static void coroutine_fn bdrv_discard_co_entry(void *opaque)
52694265d620SPaolo Bonzini {
5270775aa8b6SKevin Wolf     DiscardCo *rwco = opaque;
52714265d620SPaolo Bonzini 
52724265d620SPaolo Bonzini     rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
52734265d620SPaolo Bonzini }
52744265d620SPaolo Bonzini 
52754265d620SPaolo Bonzini int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
52764265d620SPaolo Bonzini                                  int nb_sectors)
52774265d620SPaolo Bonzini {
5278b9c64947SMax Reitz     int max_discard, ret;
5279d51e9fe5SPaolo Bonzini 
52804265d620SPaolo Bonzini     if (!bs->drv) {
52814265d620SPaolo Bonzini         return -ENOMEDIUM;
5282b9c64947SMax Reitz     }
5283b9c64947SMax Reitz 
5284b9c64947SMax Reitz     ret = bdrv_check_request(bs, sector_num, nb_sectors);
5285b9c64947SMax Reitz     if (ret < 0) {
5286b9c64947SMax Reitz         return ret;
52874265d620SPaolo Bonzini     } else if (bs->read_only) {
52884265d620SPaolo Bonzini         return -EROFS;
5289df702c9bSPaolo Bonzini     }
5290df702c9bSPaolo Bonzini 
52918f0720ecSPaolo Bonzini     bdrv_reset_dirty(bs, sector_num, nb_sectors);
5292df702c9bSPaolo Bonzini 
52939e8f1835SPaolo Bonzini     /* Do nothing if disabled.  */
52949e8f1835SPaolo Bonzini     if (!(bs->open_flags & BDRV_O_UNMAP)) {
52959e8f1835SPaolo Bonzini         return 0;
52969e8f1835SPaolo Bonzini     }
52979e8f1835SPaolo Bonzini 
5298d51e9fe5SPaolo Bonzini     if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
5299d51e9fe5SPaolo Bonzini         return 0;
5300d51e9fe5SPaolo Bonzini     }
53016f14da52SPeter Lieven 
530275af1f34SPeter Lieven     max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
53036f14da52SPeter Lieven     while (nb_sectors > 0) {
53046f14da52SPeter Lieven         int ret;
53056f14da52SPeter Lieven         int num = nb_sectors;
53066f14da52SPeter Lieven 
53076f14da52SPeter Lieven         /* align request */
53086f14da52SPeter Lieven         if (bs->bl.discard_alignment &&
53096f14da52SPeter Lieven             num >= bs->bl.discard_alignment &&
53106f14da52SPeter Lieven             sector_num % bs->bl.discard_alignment) {
53116f14da52SPeter Lieven             if (num > bs->bl.discard_alignment) {
53126f14da52SPeter Lieven                 num = bs->bl.discard_alignment;
53136f14da52SPeter Lieven             }
53146f14da52SPeter Lieven             num -= sector_num % bs->bl.discard_alignment;
53156f14da52SPeter Lieven         }
53166f14da52SPeter Lieven 
53176f14da52SPeter Lieven         /* limit request size */
53186f14da52SPeter Lieven         if (num > max_discard) {
53196f14da52SPeter Lieven             num = max_discard;
53206f14da52SPeter Lieven         }
53216f14da52SPeter Lieven 
5322d51e9fe5SPaolo Bonzini         if (bs->drv->bdrv_co_discard) {
53236f14da52SPeter Lieven             ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5324d51e9fe5SPaolo Bonzini         } else {
53257c84b1b8SMarkus Armbruster             BlockAIOCB *acb;
53264265d620SPaolo Bonzini             CoroutineIOCompletion co = {
53274265d620SPaolo Bonzini                 .coroutine = qemu_coroutine_self(),
53284265d620SPaolo Bonzini             };
53294265d620SPaolo Bonzini 
53304265d620SPaolo Bonzini             acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
53314265d620SPaolo Bonzini                                             bdrv_co_io_em_complete, &co);
53324265d620SPaolo Bonzini             if (acb == NULL) {
53334265d620SPaolo Bonzini                 return -EIO;
53344265d620SPaolo Bonzini             } else {
53354265d620SPaolo Bonzini                 qemu_coroutine_yield();
5336d51e9fe5SPaolo Bonzini                 ret = co.ret;
53374265d620SPaolo Bonzini             }
5338d51e9fe5SPaolo Bonzini         }
53397ce21016SPaolo Bonzini         if (ret && ret != -ENOTSUP) {
5340d51e9fe5SPaolo Bonzini             return ret;
5341d51e9fe5SPaolo Bonzini         }
5342d51e9fe5SPaolo Bonzini 
5343d51e9fe5SPaolo Bonzini         sector_num += num;
5344d51e9fe5SPaolo Bonzini         nb_sectors -= num;
5345d51e9fe5SPaolo Bonzini     }
53464265d620SPaolo Bonzini     return 0;
53474265d620SPaolo Bonzini }
53484265d620SPaolo Bonzini 
53494265d620SPaolo Bonzini int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
53504265d620SPaolo Bonzini {
53514265d620SPaolo Bonzini     Coroutine *co;
5352775aa8b6SKevin Wolf     DiscardCo rwco = {
53534265d620SPaolo Bonzini         .bs = bs,
53544265d620SPaolo Bonzini         .sector_num = sector_num,
53554265d620SPaolo Bonzini         .nb_sectors = nb_sectors,
53564265d620SPaolo Bonzini         .ret = NOT_DONE,
53574265d620SPaolo Bonzini     };
53584265d620SPaolo Bonzini 
53594265d620SPaolo Bonzini     if (qemu_in_coroutine()) {
53604265d620SPaolo Bonzini         /* Fast-path if already in coroutine context */
53614265d620SPaolo Bonzini         bdrv_discard_co_entry(&rwco);
53624265d620SPaolo Bonzini     } else {
53632572b37aSStefan Hajnoczi         AioContext *aio_context = bdrv_get_aio_context(bs);
53642572b37aSStefan Hajnoczi 
53654265d620SPaolo Bonzini         co = qemu_coroutine_create(bdrv_discard_co_entry);
53664265d620SPaolo Bonzini         qemu_coroutine_enter(co, &rwco);
53674265d620SPaolo Bonzini         while (rwco.ret == NOT_DONE) {
53682572b37aSStefan Hajnoczi             aio_poll(aio_context, true);
53694265d620SPaolo Bonzini         }
53704265d620SPaolo Bonzini     }
53714265d620SPaolo Bonzini 
53724265d620SPaolo Bonzini     return rwco.ret;
53734265d620SPaolo Bonzini }
53744265d620SPaolo Bonzini 
5375f9f05dc5SKevin Wolf /**************************************************************/
537619cb3738Sbellard /* removable device support */
537719cb3738Sbellard 
537819cb3738Sbellard /**
537919cb3738Sbellard  * Return TRUE if the media is present
538019cb3738Sbellard  */
538119cb3738Sbellard int bdrv_is_inserted(BlockDriverState *bs)
538219cb3738Sbellard {
538319cb3738Sbellard     BlockDriver *drv = bs->drv;
5384a1aff5bfSMarkus Armbruster 
538519cb3738Sbellard     if (!drv)
538619cb3738Sbellard         return 0;
538719cb3738Sbellard     if (!drv->bdrv_is_inserted)
5388a1aff5bfSMarkus Armbruster         return 1;
5389a1aff5bfSMarkus Armbruster     return drv->bdrv_is_inserted(bs);
539019cb3738Sbellard }
539119cb3738Sbellard 
539219cb3738Sbellard /**
53938e49ca46SMarkus Armbruster  * Return whether the media changed since the last call to this
53948e49ca46SMarkus Armbruster  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
539519cb3738Sbellard  */
539619cb3738Sbellard int bdrv_media_changed(BlockDriverState *bs)
539719cb3738Sbellard {
539819cb3738Sbellard     BlockDriver *drv = bs->drv;
539919cb3738Sbellard 
54008e49ca46SMarkus Armbruster     if (drv && drv->bdrv_media_changed) {
54018e49ca46SMarkus Armbruster         return drv->bdrv_media_changed(bs);
54028e49ca46SMarkus Armbruster     }
54038e49ca46SMarkus Armbruster     return -ENOTSUP;
540419cb3738Sbellard }
540519cb3738Sbellard 
540619cb3738Sbellard /**
540719cb3738Sbellard  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
540819cb3738Sbellard  */
5409f36f3949SLuiz Capitulino void bdrv_eject(BlockDriverState *bs, bool eject_flag)
541019cb3738Sbellard {
541119cb3738Sbellard     BlockDriver *drv = bs->drv;
5412bfb197e0SMarkus Armbruster     const char *device_name;
541319cb3738Sbellard 
5414822e1cd1SMarkus Armbruster     if (drv && drv->bdrv_eject) {
5415822e1cd1SMarkus Armbruster         drv->bdrv_eject(bs, eject_flag);
541619cb3738Sbellard     }
54176f382ed2SLuiz Capitulino 
5418bfb197e0SMarkus Armbruster     device_name = bdrv_get_device_name(bs);
5419bfb197e0SMarkus Armbruster     if (device_name[0] != '\0') {
5420bfb197e0SMarkus Armbruster         qapi_event_send_device_tray_moved(device_name,
5421a5ee7bd4SWenchao Xia                                           eject_flag, &error_abort);
54226f382ed2SLuiz Capitulino     }
542319cb3738Sbellard }
542419cb3738Sbellard 
542519cb3738Sbellard /**
542619cb3738Sbellard  * Lock or unlock the media (if it is locked, the user won't be able
542719cb3738Sbellard  * to eject it manually).
542819cb3738Sbellard  */
5429025e849aSMarkus Armbruster void bdrv_lock_medium(BlockDriverState *bs, bool locked)
543019cb3738Sbellard {
543119cb3738Sbellard     BlockDriver *drv = bs->drv;
543219cb3738Sbellard 
5433025e849aSMarkus Armbruster     trace_bdrv_lock_medium(bs, locked);
5434b8c6d095SStefan Hajnoczi 
5435025e849aSMarkus Armbruster     if (drv && drv->bdrv_lock_medium) {
5436025e849aSMarkus Armbruster         drv->bdrv_lock_medium(bs, locked);
543719cb3738Sbellard     }
543819cb3738Sbellard }
5439985a03b0Sths 
5440985a03b0Sths /* needed for generic scsi interface */
5441985a03b0Sths 
5442985a03b0Sths int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5443985a03b0Sths {
5444985a03b0Sths     BlockDriver *drv = bs->drv;
5445985a03b0Sths 
5446985a03b0Sths     if (drv && drv->bdrv_ioctl)
5447985a03b0Sths         return drv->bdrv_ioctl(bs, req, buf);
5448985a03b0Sths     return -ENOTSUP;
5449985a03b0Sths }
54507d780669Saliguori 
54517c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5452221f715dSaliguori         unsigned long int req, void *buf,
5453097310b5SMarkus Armbruster         BlockCompletionFunc *cb, void *opaque)
54547d780669Saliguori {
5455221f715dSaliguori     BlockDriver *drv = bs->drv;
54567d780669Saliguori 
5457221f715dSaliguori     if (drv && drv->bdrv_aio_ioctl)
5458221f715dSaliguori         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5459221f715dSaliguori     return NULL;
54607d780669Saliguori }
5461e268ca52Saliguori 
54621b7fd729SPaolo Bonzini void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
54637b6f9300SMarkus Armbruster {
54641b7fd729SPaolo Bonzini     bs->guest_block_size = align;
54657b6f9300SMarkus Armbruster }
54667cd1e32aSlirans@il.ibm.com 
5467e268ca52Saliguori void *qemu_blockalign(BlockDriverState *bs, size_t size)
5468e268ca52Saliguori {
5469339064d5SKevin Wolf     return qemu_memalign(bdrv_opt_mem_align(bs), size);
5470e268ca52Saliguori }
54717cd1e32aSlirans@il.ibm.com 
54729ebd8448SMax Reitz void *qemu_blockalign0(BlockDriverState *bs, size_t size)
54739ebd8448SMax Reitz {
54749ebd8448SMax Reitz     return memset(qemu_blockalign(bs, size), 0, size);
54759ebd8448SMax Reitz }
54769ebd8448SMax Reitz 
54777d2a35ccSKevin Wolf void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
54787d2a35ccSKevin Wolf {
54797d2a35ccSKevin Wolf     size_t align = bdrv_opt_mem_align(bs);
54807d2a35ccSKevin Wolf 
54817d2a35ccSKevin Wolf     /* Ensure that NULL is never returned on success */
54827d2a35ccSKevin Wolf     assert(align > 0);
54837d2a35ccSKevin Wolf     if (size == 0) {
54847d2a35ccSKevin Wolf         size = align;
54857d2a35ccSKevin Wolf     }
54867d2a35ccSKevin Wolf 
54877d2a35ccSKevin Wolf     return qemu_try_memalign(align, size);
54887d2a35ccSKevin Wolf }
54897d2a35ccSKevin Wolf 
54909ebd8448SMax Reitz void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
54919ebd8448SMax Reitz {
54929ebd8448SMax Reitz     void *mem = qemu_try_blockalign(bs, size);
54939ebd8448SMax Reitz 
54949ebd8448SMax Reitz     if (mem) {
54959ebd8448SMax Reitz         memset(mem, 0, size);
54969ebd8448SMax Reitz     }
54979ebd8448SMax Reitz 
54989ebd8448SMax Reitz     return mem;
54999ebd8448SMax Reitz }
55009ebd8448SMax Reitz 
5501c53b1c51SStefan Hajnoczi /*
5502c53b1c51SStefan Hajnoczi  * Check if all memory in this vector is sector aligned.
5503c53b1c51SStefan Hajnoczi  */
5504c53b1c51SStefan Hajnoczi bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5505c53b1c51SStefan Hajnoczi {
5506c53b1c51SStefan Hajnoczi     int i;
5507339064d5SKevin Wolf     size_t alignment = bdrv_opt_mem_align(bs);
5508c53b1c51SStefan Hajnoczi 
5509c53b1c51SStefan Hajnoczi     for (i = 0; i < qiov->niov; i++) {
5510339064d5SKevin Wolf         if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
5511c53b1c51SStefan Hajnoczi             return false;
5512c53b1c51SStefan Hajnoczi         }
5513339064d5SKevin Wolf         if (qiov->iov[i].iov_len % alignment) {
55141ff735bdSKevin Wolf             return false;
55151ff735bdSKevin Wolf         }
5516c53b1c51SStefan Hajnoczi     }
5517c53b1c51SStefan Hajnoczi 
5518c53b1c51SStefan Hajnoczi     return true;
5519c53b1c51SStefan Hajnoczi }
5520c53b1c51SStefan Hajnoczi 
55210db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
55220db6e54aSFam Zheng {
55230db6e54aSFam Zheng     BdrvDirtyBitmap *bm;
55240db6e54aSFam Zheng 
55250db6e54aSFam Zheng     assert(name);
55260db6e54aSFam Zheng     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
55270db6e54aSFam Zheng         if (bm->name && !strcmp(name, bm->name)) {
55280db6e54aSFam Zheng             return bm;
55290db6e54aSFam Zheng         }
55300db6e54aSFam Zheng     }
55310db6e54aSFam Zheng     return NULL;
55320db6e54aSFam Zheng }
55330db6e54aSFam Zheng 
553420dca810SJohn Snow void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
55350db6e54aSFam Zheng {
55369bd2b08fSJohn Snow     assert(!bdrv_dirty_bitmap_frozen(bitmap));
55370db6e54aSFam Zheng     g_free(bitmap->name);
55380db6e54aSFam Zheng     bitmap->name = NULL;
55390db6e54aSFam Zheng }
55400db6e54aSFam Zheng 
55410db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
55425fba6c0eSJohn Snow                                           uint32_t granularity,
55430db6e54aSFam Zheng                                           const char *name,
5544b8afb520SFam Zheng                                           Error **errp)
55457cd1e32aSlirans@il.ibm.com {
55467cd1e32aSlirans@il.ibm.com     int64_t bitmap_size;
5547e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
55485fba6c0eSJohn Snow     uint32_t sector_granularity;
5549a55eb92cSJan Kiszka 
555050717e94SPaolo Bonzini     assert((granularity & (granularity - 1)) == 0);
555150717e94SPaolo Bonzini 
55520db6e54aSFam Zheng     if (name && bdrv_find_dirty_bitmap(bs, name)) {
55530db6e54aSFam Zheng         error_setg(errp, "Bitmap already exists: %s", name);
55540db6e54aSFam Zheng         return NULL;
55550db6e54aSFam Zheng     }
55565fba6c0eSJohn Snow     sector_granularity = granularity >> BDRV_SECTOR_BITS;
55575fba6c0eSJohn Snow     assert(sector_granularity);
555857322b78SMarkus Armbruster     bitmap_size = bdrv_nb_sectors(bs);
5559b8afb520SFam Zheng     if (bitmap_size < 0) {
5560b8afb520SFam Zheng         error_setg_errno(errp, -bitmap_size, "could not get length of device");
5561b8afb520SFam Zheng         errno = -bitmap_size;
5562b8afb520SFam Zheng         return NULL;
5563b8afb520SFam Zheng     }
55645839e53bSMarkus Armbruster     bitmap = g_new0(BdrvDirtyBitmap, 1);
55655fba6c0eSJohn Snow     bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
5566e74e6b78SJohn Snow     bitmap->size = bitmap_size;
55670db6e54aSFam Zheng     bitmap->name = g_strdup(name);
5568b8e6fb75SJohn Snow     bitmap->disabled = false;
5569e4654d2dSFam Zheng     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5570e4654d2dSFam Zheng     return bitmap;
5571e4654d2dSFam Zheng }
5572e4654d2dSFam Zheng 
55739bd2b08fSJohn Snow bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
55749bd2b08fSJohn Snow {
55759bd2b08fSJohn Snow     return bitmap->successor;
55769bd2b08fSJohn Snow }
55779bd2b08fSJohn Snow 
5578b8e6fb75SJohn Snow bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
5579b8e6fb75SJohn Snow {
55809bd2b08fSJohn Snow     return !(bitmap->disabled || bitmap->successor);
55819bd2b08fSJohn Snow }
55829bd2b08fSJohn Snow 
55839bd2b08fSJohn Snow /**
55849bd2b08fSJohn Snow  * Create a successor bitmap destined to replace this bitmap after an operation.
55859bd2b08fSJohn Snow  * Requires that the bitmap is not frozen and has no successor.
55869bd2b08fSJohn Snow  */
55879bd2b08fSJohn Snow int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
55889bd2b08fSJohn Snow                                        BdrvDirtyBitmap *bitmap, Error **errp)
55899bd2b08fSJohn Snow {
55909bd2b08fSJohn Snow     uint64_t granularity;
55919bd2b08fSJohn Snow     BdrvDirtyBitmap *child;
55929bd2b08fSJohn Snow 
55939bd2b08fSJohn Snow     if (bdrv_dirty_bitmap_frozen(bitmap)) {
55949bd2b08fSJohn Snow         error_setg(errp, "Cannot create a successor for a bitmap that is "
55959bd2b08fSJohn Snow                    "currently frozen");
55969bd2b08fSJohn Snow         return -1;
55979bd2b08fSJohn Snow     }
55989bd2b08fSJohn Snow     assert(!bitmap->successor);
55999bd2b08fSJohn Snow 
56009bd2b08fSJohn Snow     /* Create an anonymous successor */
56019bd2b08fSJohn Snow     granularity = bdrv_dirty_bitmap_granularity(bitmap);
56029bd2b08fSJohn Snow     child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
56039bd2b08fSJohn Snow     if (!child) {
56049bd2b08fSJohn Snow         return -1;
56059bd2b08fSJohn Snow     }
56069bd2b08fSJohn Snow 
56079bd2b08fSJohn Snow     /* Successor will be on or off based on our current state. */
56089bd2b08fSJohn Snow     child->disabled = bitmap->disabled;
56099bd2b08fSJohn Snow 
56109bd2b08fSJohn Snow     /* Install the successor and freeze the parent */
56119bd2b08fSJohn Snow     bitmap->successor = child;
56129bd2b08fSJohn Snow     return 0;
56139bd2b08fSJohn Snow }
56149bd2b08fSJohn Snow 
56159bd2b08fSJohn Snow /**
56169bd2b08fSJohn Snow  * For a bitmap with a successor, yield our name to the successor,
56179bd2b08fSJohn Snow  * delete the old bitmap, and return a handle to the new bitmap.
56189bd2b08fSJohn Snow  */
56199bd2b08fSJohn Snow BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
56209bd2b08fSJohn Snow                                             BdrvDirtyBitmap *bitmap,
56219bd2b08fSJohn Snow                                             Error **errp)
56229bd2b08fSJohn Snow {
56239bd2b08fSJohn Snow     char *name;
56249bd2b08fSJohn Snow     BdrvDirtyBitmap *successor = bitmap->successor;
56259bd2b08fSJohn Snow 
56269bd2b08fSJohn Snow     if (successor == NULL) {
56279bd2b08fSJohn Snow         error_setg(errp, "Cannot relinquish control if "
56289bd2b08fSJohn Snow                    "there's no successor present");
56299bd2b08fSJohn Snow         return NULL;
56309bd2b08fSJohn Snow     }
56319bd2b08fSJohn Snow 
56329bd2b08fSJohn Snow     name = bitmap->name;
56339bd2b08fSJohn Snow     bitmap->name = NULL;
56349bd2b08fSJohn Snow     successor->name = name;
56359bd2b08fSJohn Snow     bitmap->successor = NULL;
56369bd2b08fSJohn Snow     bdrv_release_dirty_bitmap(bs, bitmap);
56379bd2b08fSJohn Snow 
56389bd2b08fSJohn Snow     return successor;
56399bd2b08fSJohn Snow }
56409bd2b08fSJohn Snow 
56419bd2b08fSJohn Snow /**
56429bd2b08fSJohn Snow  * In cases of failure where we can no longer safely delete the parent,
56439bd2b08fSJohn Snow  * we may wish to re-join the parent and child/successor.
56449bd2b08fSJohn Snow  * The merged parent will be un-frozen, but not explicitly re-enabled.
56459bd2b08fSJohn Snow  */
56469bd2b08fSJohn Snow BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
56479bd2b08fSJohn Snow                                            BdrvDirtyBitmap *parent,
56489bd2b08fSJohn Snow                                            Error **errp)
56499bd2b08fSJohn Snow {
56509bd2b08fSJohn Snow     BdrvDirtyBitmap *successor = parent->successor;
56519bd2b08fSJohn Snow 
56529bd2b08fSJohn Snow     if (!successor) {
56539bd2b08fSJohn Snow         error_setg(errp, "Cannot reclaim a successor when none is present");
56549bd2b08fSJohn Snow         return NULL;
56559bd2b08fSJohn Snow     }
56569bd2b08fSJohn Snow 
56579bd2b08fSJohn Snow     if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
56589bd2b08fSJohn Snow         error_setg(errp, "Merging of parent and successor bitmap failed");
56599bd2b08fSJohn Snow         return NULL;
56609bd2b08fSJohn Snow     }
56619bd2b08fSJohn Snow     bdrv_release_dirty_bitmap(bs, successor);
56629bd2b08fSJohn Snow     parent->successor = NULL;
56639bd2b08fSJohn Snow 
56649bd2b08fSJohn Snow     return parent;
5665b8e6fb75SJohn Snow }
5666b8e6fb75SJohn Snow 
5667ce1ffea8SJohn Snow /**
5668ce1ffea8SJohn Snow  * Truncates _all_ bitmaps attached to a BDS.
5669ce1ffea8SJohn Snow  */
5670ce1ffea8SJohn Snow static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
5671ce1ffea8SJohn Snow {
5672ce1ffea8SJohn Snow     BdrvDirtyBitmap *bitmap;
5673ce1ffea8SJohn Snow     uint64_t size = bdrv_nb_sectors(bs);
5674ce1ffea8SJohn Snow 
5675ce1ffea8SJohn Snow     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5676ce1ffea8SJohn Snow         if (bdrv_dirty_bitmap_frozen(bitmap)) {
5677ce1ffea8SJohn Snow             continue;
5678ce1ffea8SJohn Snow         }
5679ce1ffea8SJohn Snow         hbitmap_truncate(bitmap->bitmap, size);
5680ce1ffea8SJohn Snow     }
5681ce1ffea8SJohn Snow }
5682ce1ffea8SJohn Snow 
5683e4654d2dSFam Zheng void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5684e4654d2dSFam Zheng {
5685e4654d2dSFam Zheng     BdrvDirtyBitmap *bm, *next;
5686e4654d2dSFam Zheng     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5687e4654d2dSFam Zheng         if (bm == bitmap) {
56889bd2b08fSJohn Snow             assert(!bdrv_dirty_bitmap_frozen(bm));
5689e4654d2dSFam Zheng             QLIST_REMOVE(bitmap, list);
5690e4654d2dSFam Zheng             hbitmap_free(bitmap->bitmap);
56910db6e54aSFam Zheng             g_free(bitmap->name);
5692e4654d2dSFam Zheng             g_free(bitmap);
5693e4654d2dSFam Zheng             return;
56947cd1e32aSlirans@il.ibm.com         }
56957cd1e32aSlirans@il.ibm.com     }
56967cd1e32aSlirans@il.ibm.com }
56977cd1e32aSlirans@il.ibm.com 
5698b8e6fb75SJohn Snow void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5699b8e6fb75SJohn Snow {
57009bd2b08fSJohn Snow     assert(!bdrv_dirty_bitmap_frozen(bitmap));
5701b8e6fb75SJohn Snow     bitmap->disabled = true;
5702b8e6fb75SJohn Snow }
5703b8e6fb75SJohn Snow 
5704b8e6fb75SJohn Snow void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5705b8e6fb75SJohn Snow {
57069bd2b08fSJohn Snow     assert(!bdrv_dirty_bitmap_frozen(bitmap));
5707b8e6fb75SJohn Snow     bitmap->disabled = false;
5708b8e6fb75SJohn Snow }
5709b8e6fb75SJohn Snow 
571021b56835SFam Zheng BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
571121b56835SFam Zheng {
571221b56835SFam Zheng     BdrvDirtyBitmap *bm;
571321b56835SFam Zheng     BlockDirtyInfoList *list = NULL;
571421b56835SFam Zheng     BlockDirtyInfoList **plist = &list;
571521b56835SFam Zheng 
571621b56835SFam Zheng     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
57175839e53bSMarkus Armbruster         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
57185839e53bSMarkus Armbruster         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
571920dca810SJohn Snow         info->count = bdrv_get_dirty_count(bm);
5720592fdd02SJohn Snow         info->granularity = bdrv_dirty_bitmap_granularity(bm);
57210db6e54aSFam Zheng         info->has_name = !!bm->name;
57220db6e54aSFam Zheng         info->name = g_strdup(bm->name);
5723a113534fSJohn Snow         info->frozen = bdrv_dirty_bitmap_frozen(bm);
572421b56835SFam Zheng         entry->value = info;
572521b56835SFam Zheng         *plist = entry;
572621b56835SFam Zheng         plist = &entry->next;
572721b56835SFam Zheng     }
572821b56835SFam Zheng 
572921b56835SFam Zheng     return list;
573021b56835SFam Zheng }
573121b56835SFam Zheng 
5732e4654d2dSFam Zheng int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
57337cd1e32aSlirans@il.ibm.com {
5734e4654d2dSFam Zheng     if (bitmap) {
5735e4654d2dSFam Zheng         return hbitmap_get(bitmap->bitmap, sector);
57367cd1e32aSlirans@il.ibm.com     } else {
57377cd1e32aSlirans@il.ibm.com         return 0;
57387cd1e32aSlirans@il.ibm.com     }
57397cd1e32aSlirans@il.ibm.com }
57407cd1e32aSlirans@il.ibm.com 
5741341ebc2fSJohn Snow /**
5742341ebc2fSJohn Snow  * Chooses a default granularity based on the existing cluster size,
5743341ebc2fSJohn Snow  * but clamped between [4K, 64K]. Defaults to 64K in the case that there
5744341ebc2fSJohn Snow  * is no cluster size information available.
5745341ebc2fSJohn Snow  */
5746341ebc2fSJohn Snow uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
5747341ebc2fSJohn Snow {
5748341ebc2fSJohn Snow     BlockDriverInfo bdi;
5749341ebc2fSJohn Snow     uint32_t granularity;
5750341ebc2fSJohn Snow 
5751341ebc2fSJohn Snow     if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
5752341ebc2fSJohn Snow         granularity = MAX(4096, bdi.cluster_size);
5753341ebc2fSJohn Snow         granularity = MIN(65536, granularity);
5754341ebc2fSJohn Snow     } else {
5755341ebc2fSJohn Snow         granularity = 65536;
5756341ebc2fSJohn Snow     }
5757341ebc2fSJohn Snow 
5758341ebc2fSJohn Snow     return granularity;
5759341ebc2fSJohn Snow }
5760341ebc2fSJohn Snow 
5761592fdd02SJohn Snow uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
5762592fdd02SJohn Snow {
5763592fdd02SJohn Snow     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
5764592fdd02SJohn Snow }
5765592fdd02SJohn Snow 
576620dca810SJohn Snow void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
57671755da16SPaolo Bonzini {
5768e4654d2dSFam Zheng     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
57691755da16SPaolo Bonzini }
57701755da16SPaolo Bonzini 
577120dca810SJohn Snow void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
5772c4237dfaSVladimir Sementsov-Ogievskiy                            int64_t cur_sector, int nr_sectors)
5773c4237dfaSVladimir Sementsov-Ogievskiy {
5774b8e6fb75SJohn Snow     assert(bdrv_dirty_bitmap_enabled(bitmap));
5775c4237dfaSVladimir Sementsov-Ogievskiy     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5776c4237dfaSVladimir Sementsov-Ogievskiy }
5777c4237dfaSVladimir Sementsov-Ogievskiy 
577820dca810SJohn Snow void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
5779c4237dfaSVladimir Sementsov-Ogievskiy                              int64_t cur_sector, int nr_sectors)
5780c4237dfaSVladimir Sementsov-Ogievskiy {
5781b8e6fb75SJohn Snow     assert(bdrv_dirty_bitmap_enabled(bitmap));
5782c4237dfaSVladimir Sementsov-Ogievskiy     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5783c4237dfaSVladimir Sementsov-Ogievskiy }
5784c4237dfaSVladimir Sementsov-Ogievskiy 
5785e74e6b78SJohn Snow void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5786e74e6b78SJohn Snow {
5787e74e6b78SJohn Snow     assert(bdrv_dirty_bitmap_enabled(bitmap));
5788e74e6b78SJohn Snow     hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
5789e74e6b78SJohn Snow }
5790e74e6b78SJohn Snow 
5791e0c47b6cSStefan Hajnoczi void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
57921755da16SPaolo Bonzini                     int nr_sectors)
57931755da16SPaolo Bonzini {
5794e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
5795e4654d2dSFam Zheng     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5796b8e6fb75SJohn Snow         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5797b8e6fb75SJohn Snow             continue;
5798b8e6fb75SJohn Snow         }
5799e4654d2dSFam Zheng         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5800e4654d2dSFam Zheng     }
58011755da16SPaolo Bonzini }
58021755da16SPaolo Bonzini 
5803e0c47b6cSStefan Hajnoczi void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
5804c4237dfaSVladimir Sementsov-Ogievskiy                       int nr_sectors)
58057cd1e32aSlirans@il.ibm.com {
5806e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
5807e4654d2dSFam Zheng     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5808b8e6fb75SJohn Snow         if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5809b8e6fb75SJohn Snow             continue;
5810b8e6fb75SJohn Snow         }
5811e4654d2dSFam Zheng         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5812e4654d2dSFam Zheng     }
58137cd1e32aSlirans@il.ibm.com }
5814aaa0eb75SLiran Schour 
5815d58d8453SJohn Snow /**
5816d58d8453SJohn Snow  * Advance an HBitmapIter to an arbitrary offset.
5817d58d8453SJohn Snow  */
5818d58d8453SJohn Snow void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
5819d58d8453SJohn Snow {
5820d58d8453SJohn Snow     assert(hbi->hb);
5821d58d8453SJohn Snow     hbitmap_iter_init(hbi, hbi->hb, offset);
5822d58d8453SJohn Snow }
5823d58d8453SJohn Snow 
582420dca810SJohn Snow int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
5825aaa0eb75SLiran Schour {
5826e4654d2dSFam Zheng     return hbitmap_count(bitmap->bitmap);
5827aaa0eb75SLiran Schour }
5828f88e1a42SJes Sorensen 
58299fcb0251SFam Zheng /* Get a reference to bs */
58309fcb0251SFam Zheng void bdrv_ref(BlockDriverState *bs)
58319fcb0251SFam Zheng {
58329fcb0251SFam Zheng     bs->refcnt++;
58339fcb0251SFam Zheng }
58349fcb0251SFam Zheng 
58359fcb0251SFam Zheng /* Release a previously grabbed reference to bs.
58369fcb0251SFam Zheng  * If after releasing, reference count is zero, the BlockDriverState is
58379fcb0251SFam Zheng  * deleted. */
58389fcb0251SFam Zheng void bdrv_unref(BlockDriverState *bs)
58399fcb0251SFam Zheng {
58409a4d5ca6SJeff Cody     if (!bs) {
58419a4d5ca6SJeff Cody         return;
58429a4d5ca6SJeff Cody     }
58439fcb0251SFam Zheng     assert(bs->refcnt > 0);
58449fcb0251SFam Zheng     if (--bs->refcnt == 0) {
58459fcb0251SFam Zheng         bdrv_delete(bs);
58469fcb0251SFam Zheng     }
58479fcb0251SFam Zheng }
58489fcb0251SFam Zheng 
5849fbe40ff7SFam Zheng struct BdrvOpBlocker {
5850fbe40ff7SFam Zheng     Error *reason;
5851fbe40ff7SFam Zheng     QLIST_ENTRY(BdrvOpBlocker) list;
5852fbe40ff7SFam Zheng };
5853fbe40ff7SFam Zheng 
5854fbe40ff7SFam Zheng bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5855fbe40ff7SFam Zheng {
5856fbe40ff7SFam Zheng     BdrvOpBlocker *blocker;
5857fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5858fbe40ff7SFam Zheng     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5859fbe40ff7SFam Zheng         blocker = QLIST_FIRST(&bs->op_blockers[op]);
5860fbe40ff7SFam Zheng         if (errp) {
586181e5f78aSAlberto Garcia             error_setg(errp, "Node '%s' is busy: %s",
586281e5f78aSAlberto Garcia                        bdrv_get_device_or_node_name(bs),
5863bfb197e0SMarkus Armbruster                        error_get_pretty(blocker->reason));
5864fbe40ff7SFam Zheng         }
5865fbe40ff7SFam Zheng         return true;
5866fbe40ff7SFam Zheng     }
5867fbe40ff7SFam Zheng     return false;
5868fbe40ff7SFam Zheng }
5869fbe40ff7SFam Zheng 
5870fbe40ff7SFam Zheng void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5871fbe40ff7SFam Zheng {
5872fbe40ff7SFam Zheng     BdrvOpBlocker *blocker;
5873fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5874fbe40ff7SFam Zheng 
58755839e53bSMarkus Armbruster     blocker = g_new0(BdrvOpBlocker, 1);
5876fbe40ff7SFam Zheng     blocker->reason = reason;
5877fbe40ff7SFam Zheng     QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5878fbe40ff7SFam Zheng }
5879fbe40ff7SFam Zheng 
5880fbe40ff7SFam Zheng void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5881fbe40ff7SFam Zheng {
5882fbe40ff7SFam Zheng     BdrvOpBlocker *blocker, *next;
5883fbe40ff7SFam Zheng     assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5884fbe40ff7SFam Zheng     QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5885fbe40ff7SFam Zheng         if (blocker->reason == reason) {
5886fbe40ff7SFam Zheng             QLIST_REMOVE(blocker, list);
5887fbe40ff7SFam Zheng             g_free(blocker);
5888fbe40ff7SFam Zheng         }
5889fbe40ff7SFam Zheng     }
5890fbe40ff7SFam Zheng }
5891fbe40ff7SFam Zheng 
5892fbe40ff7SFam Zheng void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5893fbe40ff7SFam Zheng {
5894fbe40ff7SFam Zheng     int i;
5895fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5896fbe40ff7SFam Zheng         bdrv_op_block(bs, i, reason);
5897fbe40ff7SFam Zheng     }
5898fbe40ff7SFam Zheng }
5899fbe40ff7SFam Zheng 
5900fbe40ff7SFam Zheng void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5901fbe40ff7SFam Zheng {
5902fbe40ff7SFam Zheng     int i;
5903fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5904fbe40ff7SFam Zheng         bdrv_op_unblock(bs, i, reason);
5905fbe40ff7SFam Zheng     }
5906fbe40ff7SFam Zheng }
5907fbe40ff7SFam Zheng 
5908fbe40ff7SFam Zheng bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5909fbe40ff7SFam Zheng {
5910fbe40ff7SFam Zheng     int i;
5911fbe40ff7SFam Zheng 
5912fbe40ff7SFam Zheng     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5913fbe40ff7SFam Zheng         if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5914fbe40ff7SFam Zheng             return false;
5915fbe40ff7SFam Zheng         }
5916fbe40ff7SFam Zheng     }
5917fbe40ff7SFam Zheng     return true;
5918fbe40ff7SFam Zheng }
5919fbe40ff7SFam Zheng 
592028a7282aSLuiz Capitulino void bdrv_iostatus_enable(BlockDriverState *bs)
592128a7282aSLuiz Capitulino {
5922d6bf279eSLuiz Capitulino     bs->iostatus_enabled = true;
592358e21ef5SLuiz Capitulino     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
592428a7282aSLuiz Capitulino }
592528a7282aSLuiz Capitulino 
592628a7282aSLuiz Capitulino /* The I/O status is only enabled if the drive explicitly
592728a7282aSLuiz Capitulino  * enables it _and_ the VM is configured to stop on errors */
592828a7282aSLuiz Capitulino bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
592928a7282aSLuiz Capitulino {
5930d6bf279eSLuiz Capitulino     return (bs->iostatus_enabled &&
593192aa5c6dSPaolo Bonzini            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
593292aa5c6dSPaolo Bonzini             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
593392aa5c6dSPaolo Bonzini             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
593428a7282aSLuiz Capitulino }
593528a7282aSLuiz Capitulino 
593628a7282aSLuiz Capitulino void bdrv_iostatus_disable(BlockDriverState *bs)
593728a7282aSLuiz Capitulino {
5938d6bf279eSLuiz Capitulino     bs->iostatus_enabled = false;
593928a7282aSLuiz Capitulino }
594028a7282aSLuiz Capitulino 
594128a7282aSLuiz Capitulino void bdrv_iostatus_reset(BlockDriverState *bs)
594228a7282aSLuiz Capitulino {
594328a7282aSLuiz Capitulino     if (bdrv_iostatus_is_enabled(bs)) {
594458e21ef5SLuiz Capitulino         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
59453bd293c3SPaolo Bonzini         if (bs->job) {
59463bd293c3SPaolo Bonzini             block_job_iostatus_reset(bs->job);
59473bd293c3SPaolo Bonzini         }
594828a7282aSLuiz Capitulino     }
594928a7282aSLuiz Capitulino }
595028a7282aSLuiz Capitulino 
595128a7282aSLuiz Capitulino void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
595228a7282aSLuiz Capitulino {
59533e1caa5fSPaolo Bonzini     assert(bdrv_iostatus_is_enabled(bs));
59543e1caa5fSPaolo Bonzini     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
595558e21ef5SLuiz Capitulino         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
595658e21ef5SLuiz Capitulino                                          BLOCK_DEVICE_IO_STATUS_FAILED;
595728a7282aSLuiz Capitulino     }
595828a7282aSLuiz Capitulino }
595928a7282aSLuiz Capitulino 
5960d92ada22SLuiz Capitulino void bdrv_img_create(const char *filename, const char *fmt,
5961f88e1a42SJes Sorensen                      const char *base_filename, const char *base_fmt,
5962f382d43aSMiroslav Rezanina                      char *options, uint64_t img_size, int flags,
5963f382d43aSMiroslav Rezanina                      Error **errp, bool quiet)
5964f88e1a42SJes Sorensen {
596583d0521aSChunyan Liu     QemuOptsList *create_opts = NULL;
596683d0521aSChunyan Liu     QemuOpts *opts = NULL;
596783d0521aSChunyan Liu     const char *backing_fmt, *backing_file;
596883d0521aSChunyan Liu     int64_t size;
5969f88e1a42SJes Sorensen     BlockDriver *drv, *proto_drv;
597096df67d1SStefan Hajnoczi     BlockDriver *backing_drv = NULL;
5971cc84d90fSMax Reitz     Error *local_err = NULL;
5972f88e1a42SJes Sorensen     int ret = 0;
5973f88e1a42SJes Sorensen 
5974f88e1a42SJes Sorensen     /* Find driver and parse its options */
5975f88e1a42SJes Sorensen     drv = bdrv_find_format(fmt);
5976f88e1a42SJes Sorensen     if (!drv) {
597771c79813SLuiz Capitulino         error_setg(errp, "Unknown file format '%s'", fmt);
5978d92ada22SLuiz Capitulino         return;
5979f88e1a42SJes Sorensen     }
5980f88e1a42SJes Sorensen 
5981b65a5e12SMax Reitz     proto_drv = bdrv_find_protocol(filename, true, errp);
5982f88e1a42SJes Sorensen     if (!proto_drv) {
5983d92ada22SLuiz Capitulino         return;
5984f88e1a42SJes Sorensen     }
5985f88e1a42SJes Sorensen 
5986c6149724SMax Reitz     if (!drv->create_opts) {
5987c6149724SMax Reitz         error_setg(errp, "Format driver '%s' does not support image creation",
5988c6149724SMax Reitz                    drv->format_name);
5989c6149724SMax Reitz         return;
5990c6149724SMax Reitz     }
5991c6149724SMax Reitz 
5992c6149724SMax Reitz     if (!proto_drv->create_opts) {
5993c6149724SMax Reitz         error_setg(errp, "Protocol driver '%s' does not support image creation",
5994c6149724SMax Reitz                    proto_drv->format_name);
5995c6149724SMax Reitz         return;
5996c6149724SMax Reitz     }
5997c6149724SMax Reitz 
5998c282e1fdSChunyan Liu     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5999c282e1fdSChunyan Liu     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
6000f88e1a42SJes Sorensen 
6001f88e1a42SJes Sorensen     /* Create parameter list with default values */
600283d0521aSChunyan Liu     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
600339101f25SMarkus Armbruster     qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
6004f88e1a42SJes Sorensen 
6005f88e1a42SJes Sorensen     /* Parse -o options */
6006f88e1a42SJes Sorensen     if (options) {
6007dc523cd3SMarkus Armbruster         qemu_opts_do_parse(opts, options, NULL, &local_err);
6008dc523cd3SMarkus Armbruster         if (local_err) {
6009dc523cd3SMarkus Armbruster             error_report_err(local_err);
6010dc523cd3SMarkus Armbruster             local_err = NULL;
601183d0521aSChunyan Liu             error_setg(errp, "Invalid options for file format '%s'", fmt);
6012f88e1a42SJes Sorensen             goto out;
6013f88e1a42SJes Sorensen         }
6014f88e1a42SJes Sorensen     }
6015f88e1a42SJes Sorensen 
6016f88e1a42SJes Sorensen     if (base_filename) {
6017f43e47dbSMarkus Armbruster         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
60186be4194bSMarkus Armbruster         if (local_err) {
601971c79813SLuiz Capitulino             error_setg(errp, "Backing file not supported for file format '%s'",
602071c79813SLuiz Capitulino                        fmt);
6021f88e1a42SJes Sorensen             goto out;
6022f88e1a42SJes Sorensen         }
6023f88e1a42SJes Sorensen     }
6024f88e1a42SJes Sorensen 
6025f88e1a42SJes Sorensen     if (base_fmt) {
6026f43e47dbSMarkus Armbruster         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
60276be4194bSMarkus Armbruster         if (local_err) {
602871c79813SLuiz Capitulino             error_setg(errp, "Backing file format not supported for file "
602971c79813SLuiz Capitulino                              "format '%s'", fmt);
6030f88e1a42SJes Sorensen             goto out;
6031f88e1a42SJes Sorensen         }
6032f88e1a42SJes Sorensen     }
6033f88e1a42SJes Sorensen 
603483d0521aSChunyan Liu     backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
603583d0521aSChunyan Liu     if (backing_file) {
603683d0521aSChunyan Liu         if (!strcmp(filename, backing_file)) {
603771c79813SLuiz Capitulino             error_setg(errp, "Error: Trying to create an image with the "
603871c79813SLuiz Capitulino                              "same filename as the backing file");
6039792da93aSJes Sorensen             goto out;
6040792da93aSJes Sorensen         }
6041792da93aSJes Sorensen     }
6042792da93aSJes Sorensen 
604383d0521aSChunyan Liu     backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
604483d0521aSChunyan Liu     if (backing_fmt) {
604583d0521aSChunyan Liu         backing_drv = bdrv_find_format(backing_fmt);
604696df67d1SStefan Hajnoczi         if (!backing_drv) {
604771c79813SLuiz Capitulino             error_setg(errp, "Unknown backing file format '%s'",
604883d0521aSChunyan Liu                        backing_fmt);
6049f88e1a42SJes Sorensen             goto out;
6050f88e1a42SJes Sorensen         }
6051f88e1a42SJes Sorensen     }
6052f88e1a42SJes Sorensen 
6053f88e1a42SJes Sorensen     // The size for the image must always be specified, with one exception:
6054f88e1a42SJes Sorensen     // If we are using a backing file, we can obtain the size from there
605583d0521aSChunyan Liu     size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
605683d0521aSChunyan Liu     if (size == -1) {
605783d0521aSChunyan Liu         if (backing_file) {
605866f6b814SMax Reitz             BlockDriverState *bs;
605929168018SMax Reitz             char *full_backing = g_new0(char, PATH_MAX);
606052bf1e72SMarkus Armbruster             int64_t size;
606163090dacSPaolo Bonzini             int back_flags;
606263090dacSPaolo Bonzini 
606329168018SMax Reitz             bdrv_get_full_backing_filename_from_filename(filename, backing_file,
606429168018SMax Reitz                                                          full_backing, PATH_MAX,
606529168018SMax Reitz                                                          &local_err);
606629168018SMax Reitz             if (local_err) {
606729168018SMax Reitz                 g_free(full_backing);
606829168018SMax Reitz                 goto out;
606929168018SMax Reitz             }
607029168018SMax Reitz 
607163090dacSPaolo Bonzini             /* backing files always opened read-only */
607263090dacSPaolo Bonzini             back_flags =
607363090dacSPaolo Bonzini                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
6074f88e1a42SJes Sorensen 
6075f67503e5SMax Reitz             bs = NULL;
607629168018SMax Reitz             ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
6077cc84d90fSMax Reitz                             backing_drv, &local_err);
607829168018SMax Reitz             g_free(full_backing);
6079f88e1a42SJes Sorensen             if (ret < 0) {
6080f88e1a42SJes Sorensen                 goto out;
6081f88e1a42SJes Sorensen             }
608252bf1e72SMarkus Armbruster             size = bdrv_getlength(bs);
608352bf1e72SMarkus Armbruster             if (size < 0) {
608452bf1e72SMarkus Armbruster                 error_setg_errno(errp, -size, "Could not get size of '%s'",
608552bf1e72SMarkus Armbruster                                  backing_file);
608652bf1e72SMarkus Armbruster                 bdrv_unref(bs);
608752bf1e72SMarkus Armbruster                 goto out;
608852bf1e72SMarkus Armbruster             }
6089f88e1a42SJes Sorensen 
609039101f25SMarkus Armbruster             qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
609166f6b814SMax Reitz 
609266f6b814SMax Reitz             bdrv_unref(bs);
6093f88e1a42SJes Sorensen         } else {
609471c79813SLuiz Capitulino             error_setg(errp, "Image creation needs a size parameter");
6095f88e1a42SJes Sorensen             goto out;
6096f88e1a42SJes Sorensen         }
6097f88e1a42SJes Sorensen     }
6098f88e1a42SJes Sorensen 
6099f382d43aSMiroslav Rezanina     if (!quiet) {
6100f88e1a42SJes Sorensen         printf("Formatting '%s', fmt=%s", filename, fmt);
610143c5d8f8SFam Zheng         qemu_opts_print(opts, " ");
6102f88e1a42SJes Sorensen         puts("");
6103f382d43aSMiroslav Rezanina     }
610483d0521aSChunyan Liu 
6105c282e1fdSChunyan Liu     ret = bdrv_create(drv, filename, opts, &local_err);
610683d0521aSChunyan Liu 
6107cc84d90fSMax Reitz     if (ret == -EFBIG) {
6108cc84d90fSMax Reitz         /* This is generally a better message than whatever the driver would
6109cc84d90fSMax Reitz          * deliver (especially because of the cluster_size_hint), since that
6110cc84d90fSMax Reitz          * is most probably not much different from "image too large". */
6111f3f4d2c0SKevin Wolf         const char *cluster_size_hint = "";
611283d0521aSChunyan Liu         if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
6113f3f4d2c0SKevin Wolf             cluster_size_hint = " (try using a larger cluster size)";
6114f3f4d2c0SKevin Wolf         }
6115cc84d90fSMax Reitz         error_setg(errp, "The image size is too large for file format '%s'"
6116cc84d90fSMax Reitz                    "%s", fmt, cluster_size_hint);
6117cc84d90fSMax Reitz         error_free(local_err);
6118cc84d90fSMax Reitz         local_err = NULL;
6119f88e1a42SJes Sorensen     }
6120f88e1a42SJes Sorensen 
6121f88e1a42SJes Sorensen out:
612283d0521aSChunyan Liu     qemu_opts_del(opts);
612383d0521aSChunyan Liu     qemu_opts_free(create_opts);
612484d18f06SMarkus Armbruster     if (local_err) {
6125cc84d90fSMax Reitz         error_propagate(errp, local_err);
6126cc84d90fSMax Reitz     }
6127f88e1a42SJes Sorensen }
612885d126f3SStefan Hajnoczi 
612985d126f3SStefan Hajnoczi AioContext *bdrv_get_aio_context(BlockDriverState *bs)
613085d126f3SStefan Hajnoczi {
6131dcd04228SStefan Hajnoczi     return bs->aio_context;
6132dcd04228SStefan Hajnoczi }
6133dcd04228SStefan Hajnoczi 
6134dcd04228SStefan Hajnoczi void bdrv_detach_aio_context(BlockDriverState *bs)
6135dcd04228SStefan Hajnoczi {
613633384421SMax Reitz     BdrvAioNotifier *baf;
613733384421SMax Reitz 
6138dcd04228SStefan Hajnoczi     if (!bs->drv) {
6139dcd04228SStefan Hajnoczi         return;
6140dcd04228SStefan Hajnoczi     }
6141dcd04228SStefan Hajnoczi 
614233384421SMax Reitz     QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
614333384421SMax Reitz         baf->detach_aio_context(baf->opaque);
614433384421SMax Reitz     }
614533384421SMax Reitz 
614613af91ebSStefan Hajnoczi     if (bs->io_limits_enabled) {
614713af91ebSStefan Hajnoczi         throttle_detach_aio_context(&bs->throttle_state);
614813af91ebSStefan Hajnoczi     }
6149dcd04228SStefan Hajnoczi     if (bs->drv->bdrv_detach_aio_context) {
6150dcd04228SStefan Hajnoczi         bs->drv->bdrv_detach_aio_context(bs);
6151dcd04228SStefan Hajnoczi     }
6152dcd04228SStefan Hajnoczi     if (bs->file) {
6153dcd04228SStefan Hajnoczi         bdrv_detach_aio_context(bs->file);
6154dcd04228SStefan Hajnoczi     }
6155dcd04228SStefan Hajnoczi     if (bs->backing_hd) {
6156dcd04228SStefan Hajnoczi         bdrv_detach_aio_context(bs->backing_hd);
6157dcd04228SStefan Hajnoczi     }
6158dcd04228SStefan Hajnoczi 
6159dcd04228SStefan Hajnoczi     bs->aio_context = NULL;
6160dcd04228SStefan Hajnoczi }
6161dcd04228SStefan Hajnoczi 
6162dcd04228SStefan Hajnoczi void bdrv_attach_aio_context(BlockDriverState *bs,
6163dcd04228SStefan Hajnoczi                              AioContext *new_context)
6164dcd04228SStefan Hajnoczi {
616533384421SMax Reitz     BdrvAioNotifier *ban;
616633384421SMax Reitz 
6167dcd04228SStefan Hajnoczi     if (!bs->drv) {
6168dcd04228SStefan Hajnoczi         return;
6169dcd04228SStefan Hajnoczi     }
6170dcd04228SStefan Hajnoczi 
6171dcd04228SStefan Hajnoczi     bs->aio_context = new_context;
6172dcd04228SStefan Hajnoczi 
6173dcd04228SStefan Hajnoczi     if (bs->backing_hd) {
6174dcd04228SStefan Hajnoczi         bdrv_attach_aio_context(bs->backing_hd, new_context);
6175dcd04228SStefan Hajnoczi     }
6176dcd04228SStefan Hajnoczi     if (bs->file) {
6177dcd04228SStefan Hajnoczi         bdrv_attach_aio_context(bs->file, new_context);
6178dcd04228SStefan Hajnoczi     }
6179dcd04228SStefan Hajnoczi     if (bs->drv->bdrv_attach_aio_context) {
6180dcd04228SStefan Hajnoczi         bs->drv->bdrv_attach_aio_context(bs, new_context);
6181dcd04228SStefan Hajnoczi     }
618213af91ebSStefan Hajnoczi     if (bs->io_limits_enabled) {
618313af91ebSStefan Hajnoczi         throttle_attach_aio_context(&bs->throttle_state, new_context);
618413af91ebSStefan Hajnoczi     }
618533384421SMax Reitz 
618633384421SMax Reitz     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
618733384421SMax Reitz         ban->attached_aio_context(new_context, ban->opaque);
618833384421SMax Reitz     }
6189dcd04228SStefan Hajnoczi }
6190dcd04228SStefan Hajnoczi 
6191dcd04228SStefan Hajnoczi void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
6192dcd04228SStefan Hajnoczi {
6193dcd04228SStefan Hajnoczi     bdrv_drain_all(); /* ensure there are no in-flight requests */
6194dcd04228SStefan Hajnoczi 
6195dcd04228SStefan Hajnoczi     bdrv_detach_aio_context(bs);
6196dcd04228SStefan Hajnoczi 
6197dcd04228SStefan Hajnoczi     /* This function executes in the old AioContext so acquire the new one in
6198dcd04228SStefan Hajnoczi      * case it runs in a different thread.
6199dcd04228SStefan Hajnoczi      */
6200dcd04228SStefan Hajnoczi     aio_context_acquire(new_context);
6201dcd04228SStefan Hajnoczi     bdrv_attach_aio_context(bs, new_context);
6202dcd04228SStefan Hajnoczi     aio_context_release(new_context);
620385d126f3SStefan Hajnoczi }
6204d616b224SStefan Hajnoczi 
620533384421SMax Reitz void bdrv_add_aio_context_notifier(BlockDriverState *bs,
620633384421SMax Reitz         void (*attached_aio_context)(AioContext *new_context, void *opaque),
620733384421SMax Reitz         void (*detach_aio_context)(void *opaque), void *opaque)
620833384421SMax Reitz {
620933384421SMax Reitz     BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
621033384421SMax Reitz     *ban = (BdrvAioNotifier){
621133384421SMax Reitz         .attached_aio_context = attached_aio_context,
621233384421SMax Reitz         .detach_aio_context   = detach_aio_context,
621333384421SMax Reitz         .opaque               = opaque
621433384421SMax Reitz     };
621533384421SMax Reitz 
621633384421SMax Reitz     QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
621733384421SMax Reitz }
621833384421SMax Reitz 
621933384421SMax Reitz void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
622033384421SMax Reitz                                       void (*attached_aio_context)(AioContext *,
622133384421SMax Reitz                                                                    void *),
622233384421SMax Reitz                                       void (*detach_aio_context)(void *),
622333384421SMax Reitz                                       void *opaque)
622433384421SMax Reitz {
622533384421SMax Reitz     BdrvAioNotifier *ban, *ban_next;
622633384421SMax Reitz 
622733384421SMax Reitz     QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
622833384421SMax Reitz         if (ban->attached_aio_context == attached_aio_context &&
622933384421SMax Reitz             ban->detach_aio_context   == detach_aio_context   &&
623033384421SMax Reitz             ban->opaque               == opaque)
623133384421SMax Reitz         {
623233384421SMax Reitz             QLIST_REMOVE(ban, list);
623333384421SMax Reitz             g_free(ban);
623433384421SMax Reitz 
623533384421SMax Reitz             return;
623633384421SMax Reitz         }
623733384421SMax Reitz     }
623833384421SMax Reitz 
623933384421SMax Reitz     abort();
624033384421SMax Reitz }
624133384421SMax Reitz 
6242d616b224SStefan Hajnoczi void bdrv_add_before_write_notifier(BlockDriverState *bs,
6243d616b224SStefan Hajnoczi                                     NotifierWithReturn *notifier)
6244d616b224SStefan Hajnoczi {
6245d616b224SStefan Hajnoczi     notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
6246d616b224SStefan Hajnoczi }
62476f176b48SMax Reitz 
624877485434SMax Reitz int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
624977485434SMax Reitz                        BlockDriverAmendStatusCB *status_cb)
62506f176b48SMax Reitz {
6251c282e1fdSChunyan Liu     if (!bs->drv->bdrv_amend_options) {
62526f176b48SMax Reitz         return -ENOTSUP;
62536f176b48SMax Reitz     }
625477485434SMax Reitz     return bs->drv->bdrv_amend_options(bs, opts, status_cb);
62556f176b48SMax Reitz }
6256f6186f49SBenoît Canet 
6257b5042a36SBenoît Canet /* This function will be called by the bdrv_recurse_is_first_non_filter method
6258b5042a36SBenoît Canet  * of block filter and by bdrv_is_first_non_filter.
6259b5042a36SBenoît Canet  * It is used to test if the given bs is the candidate or recurse more in the
6260b5042a36SBenoît Canet  * node graph.
6261212a5a8fSBenoît Canet  */
6262212a5a8fSBenoît Canet bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
6263212a5a8fSBenoît Canet                                       BlockDriverState *candidate)
6264f6186f49SBenoît Canet {
6265b5042a36SBenoît Canet     /* return false if basic checks fails */
6266b5042a36SBenoît Canet     if (!bs || !bs->drv) {
6267b5042a36SBenoît Canet         return false;
6268b5042a36SBenoît Canet     }
6269b5042a36SBenoît Canet 
6270b5042a36SBenoît Canet     /* the code reached a non block filter driver -> check if the bs is
6271b5042a36SBenoît Canet      * the same as the candidate. It's the recursion termination condition.
6272b5042a36SBenoît Canet      */
6273b5042a36SBenoît Canet     if (!bs->drv->is_filter) {
6274b5042a36SBenoît Canet         return bs == candidate;
6275b5042a36SBenoît Canet     }
6276b5042a36SBenoît Canet     /* Down this path the driver is a block filter driver */
6277b5042a36SBenoît Canet 
6278b5042a36SBenoît Canet     /* If the block filter recursion method is defined use it to recurse down
6279b5042a36SBenoît Canet      * the node graph.
6280b5042a36SBenoît Canet      */
6281b5042a36SBenoît Canet     if (bs->drv->bdrv_recurse_is_first_non_filter) {
6282212a5a8fSBenoît Canet         return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
6283212a5a8fSBenoît Canet     }
6284212a5a8fSBenoît Canet 
6285b5042a36SBenoît Canet     /* the driver is a block filter but don't allow to recurse -> return false
6286b5042a36SBenoît Canet      */
6287b5042a36SBenoît Canet     return false;
6288212a5a8fSBenoît Canet }
6289212a5a8fSBenoît Canet 
6290212a5a8fSBenoît Canet /* This function checks if the candidate is the first non filter bs down it's
6291212a5a8fSBenoît Canet  * bs chain. Since we don't have pointers to parents it explore all bs chains
6292212a5a8fSBenoît Canet  * from the top. Some filters can choose not to pass down the recursion.
6293212a5a8fSBenoît Canet  */
6294212a5a8fSBenoît Canet bool bdrv_is_first_non_filter(BlockDriverState *candidate)
6295212a5a8fSBenoît Canet {
6296212a5a8fSBenoît Canet     BlockDriverState *bs;
6297212a5a8fSBenoît Canet 
6298212a5a8fSBenoît Canet     /* walk down the bs forest recursively */
6299212a5a8fSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
6300212a5a8fSBenoît Canet         bool perm;
6301212a5a8fSBenoît Canet 
6302b5042a36SBenoît Canet         /* try to recurse in this top level bs */
6303e6dc8a1fSKevin Wolf         perm = bdrv_recurse_is_first_non_filter(bs, candidate);
6304212a5a8fSBenoît Canet 
6305212a5a8fSBenoît Canet         /* candidate is the first non filter */
6306212a5a8fSBenoît Canet         if (perm) {
6307212a5a8fSBenoît Canet             return true;
6308212a5a8fSBenoît Canet         }
6309212a5a8fSBenoît Canet     }
6310212a5a8fSBenoît Canet 
6311212a5a8fSBenoît Canet     return false;
6312f6186f49SBenoît Canet }
631309158f00SBenoît Canet 
631409158f00SBenoît Canet BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
631509158f00SBenoît Canet {
631609158f00SBenoît Canet     BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
63175a7e7a0bSStefan Hajnoczi     AioContext *aio_context;
63185a7e7a0bSStefan Hajnoczi 
631909158f00SBenoît Canet     if (!to_replace_bs) {
632009158f00SBenoît Canet         error_setg(errp, "Node name '%s' not found", node_name);
632109158f00SBenoît Canet         return NULL;
632209158f00SBenoît Canet     }
632309158f00SBenoît Canet 
63245a7e7a0bSStefan Hajnoczi     aio_context = bdrv_get_aio_context(to_replace_bs);
63255a7e7a0bSStefan Hajnoczi     aio_context_acquire(aio_context);
63265a7e7a0bSStefan Hajnoczi 
632709158f00SBenoît Canet     if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
63285a7e7a0bSStefan Hajnoczi         to_replace_bs = NULL;
63295a7e7a0bSStefan Hajnoczi         goto out;
633009158f00SBenoît Canet     }
633109158f00SBenoît Canet 
633209158f00SBenoît Canet     /* We don't want arbitrary node of the BDS chain to be replaced only the top
633309158f00SBenoît Canet      * most non filter in order to prevent data corruption.
633409158f00SBenoît Canet      * Another benefit is that this tests exclude backing files which are
633509158f00SBenoît Canet      * blocked by the backing blockers.
633609158f00SBenoît Canet      */
633709158f00SBenoît Canet     if (!bdrv_is_first_non_filter(to_replace_bs)) {
633809158f00SBenoît Canet         error_setg(errp, "Only top most non filter can be replaced");
63395a7e7a0bSStefan Hajnoczi         to_replace_bs = NULL;
63405a7e7a0bSStefan Hajnoczi         goto out;
634109158f00SBenoît Canet     }
634209158f00SBenoît Canet 
63435a7e7a0bSStefan Hajnoczi out:
63445a7e7a0bSStefan Hajnoczi     aio_context_release(aio_context);
634509158f00SBenoît Canet     return to_replace_bs;
634609158f00SBenoît Canet }
6347448ad91dSMing Lei 
6348448ad91dSMing Lei void bdrv_io_plug(BlockDriverState *bs)
6349448ad91dSMing Lei {
6350448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6351448ad91dSMing Lei     if (drv && drv->bdrv_io_plug) {
6352448ad91dSMing Lei         drv->bdrv_io_plug(bs);
6353448ad91dSMing Lei     } else if (bs->file) {
6354448ad91dSMing Lei         bdrv_io_plug(bs->file);
6355448ad91dSMing Lei     }
6356448ad91dSMing Lei }
6357448ad91dSMing Lei 
6358448ad91dSMing Lei void bdrv_io_unplug(BlockDriverState *bs)
6359448ad91dSMing Lei {
6360448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6361448ad91dSMing Lei     if (drv && drv->bdrv_io_unplug) {
6362448ad91dSMing Lei         drv->bdrv_io_unplug(bs);
6363448ad91dSMing Lei     } else if (bs->file) {
6364448ad91dSMing Lei         bdrv_io_unplug(bs->file);
6365448ad91dSMing Lei     }
6366448ad91dSMing Lei }
6367448ad91dSMing Lei 
6368448ad91dSMing Lei void bdrv_flush_io_queue(BlockDriverState *bs)
6369448ad91dSMing Lei {
6370448ad91dSMing Lei     BlockDriver *drv = bs->drv;
6371448ad91dSMing Lei     if (drv && drv->bdrv_flush_io_queue) {
6372448ad91dSMing Lei         drv->bdrv_flush_io_queue(bs);
6373448ad91dSMing Lei     } else if (bs->file) {
6374448ad91dSMing Lei         bdrv_flush_io_queue(bs->file);
6375448ad91dSMing Lei     }
6376448ad91dSMing Lei }
637791af7014SMax Reitz 
637891af7014SMax Reitz static bool append_open_options(QDict *d, BlockDriverState *bs)
637991af7014SMax Reitz {
638091af7014SMax Reitz     const QDictEntry *entry;
638191af7014SMax Reitz     bool found_any = false;
638291af7014SMax Reitz 
638391af7014SMax Reitz     for (entry = qdict_first(bs->options); entry;
638491af7014SMax Reitz          entry = qdict_next(bs->options, entry))
638591af7014SMax Reitz     {
638691af7014SMax Reitz         /* Only take options for this level and exclude all non-driver-specific
638791af7014SMax Reitz          * options */
638891af7014SMax Reitz         if (!strchr(qdict_entry_key(entry), '.') &&
638991af7014SMax Reitz             strcmp(qdict_entry_key(entry), "node-name"))
639091af7014SMax Reitz         {
639191af7014SMax Reitz             qobject_incref(qdict_entry_value(entry));
639291af7014SMax Reitz             qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
639391af7014SMax Reitz             found_any = true;
639491af7014SMax Reitz         }
639591af7014SMax Reitz     }
639691af7014SMax Reitz 
639791af7014SMax Reitz     return found_any;
639891af7014SMax Reitz }
639991af7014SMax Reitz 
640091af7014SMax Reitz /* Updates the following BDS fields:
640191af7014SMax Reitz  *  - exact_filename: A filename which may be used for opening a block device
640291af7014SMax Reitz  *                    which (mostly) equals the given BDS (even without any
640391af7014SMax Reitz  *                    other options; so reading and writing must return the same
640491af7014SMax Reitz  *                    results, but caching etc. may be different)
640591af7014SMax Reitz  *  - full_open_options: Options which, when given when opening a block device
640691af7014SMax Reitz  *                       (without a filename), result in a BDS (mostly)
640791af7014SMax Reitz  *                       equalling the given one
640891af7014SMax Reitz  *  - filename: If exact_filename is set, it is copied here. Otherwise,
640991af7014SMax Reitz  *              full_open_options is converted to a JSON object, prefixed with
641091af7014SMax Reitz  *              "json:" (for use through the JSON pseudo protocol) and put here.
641191af7014SMax Reitz  */
641291af7014SMax Reitz void bdrv_refresh_filename(BlockDriverState *bs)
641391af7014SMax Reitz {
641491af7014SMax Reitz     BlockDriver *drv = bs->drv;
641591af7014SMax Reitz     QDict *opts;
641691af7014SMax Reitz 
641791af7014SMax Reitz     if (!drv) {
641891af7014SMax Reitz         return;
641991af7014SMax Reitz     }
642091af7014SMax Reitz 
642191af7014SMax Reitz     /* This BDS's file name will most probably depend on its file's name, so
642291af7014SMax Reitz      * refresh that first */
642391af7014SMax Reitz     if (bs->file) {
642491af7014SMax Reitz         bdrv_refresh_filename(bs->file);
642591af7014SMax Reitz     }
642691af7014SMax Reitz 
642791af7014SMax Reitz     if (drv->bdrv_refresh_filename) {
642891af7014SMax Reitz         /* Obsolete information is of no use here, so drop the old file name
642991af7014SMax Reitz          * information before refreshing it */
643091af7014SMax Reitz         bs->exact_filename[0] = '\0';
643191af7014SMax Reitz         if (bs->full_open_options) {
643291af7014SMax Reitz             QDECREF(bs->full_open_options);
643391af7014SMax Reitz             bs->full_open_options = NULL;
643491af7014SMax Reitz         }
643591af7014SMax Reitz 
643691af7014SMax Reitz         drv->bdrv_refresh_filename(bs);
643791af7014SMax Reitz     } else if (bs->file) {
643891af7014SMax Reitz         /* Try to reconstruct valid information from the underlying file */
643991af7014SMax Reitz         bool has_open_options;
644091af7014SMax Reitz 
644191af7014SMax Reitz         bs->exact_filename[0] = '\0';
644291af7014SMax Reitz         if (bs->full_open_options) {
644391af7014SMax Reitz             QDECREF(bs->full_open_options);
644491af7014SMax Reitz             bs->full_open_options = NULL;
644591af7014SMax Reitz         }
644691af7014SMax Reitz 
644791af7014SMax Reitz         opts = qdict_new();
644891af7014SMax Reitz         has_open_options = append_open_options(opts, bs);
644991af7014SMax Reitz 
645091af7014SMax Reitz         /* If no specific options have been given for this BDS, the filename of
645191af7014SMax Reitz          * the underlying file should suffice for this one as well */
645291af7014SMax Reitz         if (bs->file->exact_filename[0] && !has_open_options) {
645391af7014SMax Reitz             strcpy(bs->exact_filename, bs->file->exact_filename);
645491af7014SMax Reitz         }
645591af7014SMax Reitz         /* Reconstructing the full options QDict is simple for most format block
645691af7014SMax Reitz          * drivers, as long as the full options are known for the underlying
645791af7014SMax Reitz          * file BDS. The full options QDict of that file BDS should somehow
645891af7014SMax Reitz          * contain a representation of the filename, therefore the following
645991af7014SMax Reitz          * suffices without querying the (exact_)filename of this BDS. */
646091af7014SMax Reitz         if (bs->file->full_open_options) {
646191af7014SMax Reitz             qdict_put_obj(opts, "driver",
646291af7014SMax Reitz                           QOBJECT(qstring_from_str(drv->format_name)));
646391af7014SMax Reitz             QINCREF(bs->file->full_open_options);
646491af7014SMax Reitz             qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
646591af7014SMax Reitz 
646691af7014SMax Reitz             bs->full_open_options = opts;
646791af7014SMax Reitz         } else {
646891af7014SMax Reitz             QDECREF(opts);
646991af7014SMax Reitz         }
647091af7014SMax Reitz     } else if (!bs->full_open_options && qdict_size(bs->options)) {
647191af7014SMax Reitz         /* There is no underlying file BDS (at least referenced by BDS.file),
647291af7014SMax Reitz          * so the full options QDict should be equal to the options given
647391af7014SMax Reitz          * specifically for this block device when it was opened (plus the
647491af7014SMax Reitz          * driver specification).
647591af7014SMax Reitz          * Because those options don't change, there is no need to update
647691af7014SMax Reitz          * full_open_options when it's already set. */
647791af7014SMax Reitz 
647891af7014SMax Reitz         opts = qdict_new();
647991af7014SMax Reitz         append_open_options(opts, bs);
648091af7014SMax Reitz         qdict_put_obj(opts, "driver",
648191af7014SMax Reitz                       QOBJECT(qstring_from_str(drv->format_name)));
648291af7014SMax Reitz 
648391af7014SMax Reitz         if (bs->exact_filename[0]) {
648491af7014SMax Reitz             /* This may not work for all block protocol drivers (some may
648591af7014SMax Reitz              * require this filename to be parsed), but we have to find some
648691af7014SMax Reitz              * default solution here, so just include it. If some block driver
648791af7014SMax Reitz              * does not support pure options without any filename at all or
648891af7014SMax Reitz              * needs some special format of the options QDict, it needs to
648991af7014SMax Reitz              * implement the driver-specific bdrv_refresh_filename() function.
649091af7014SMax Reitz              */
649191af7014SMax Reitz             qdict_put_obj(opts, "filename",
649291af7014SMax Reitz                           QOBJECT(qstring_from_str(bs->exact_filename)));
649391af7014SMax Reitz         }
649491af7014SMax Reitz 
649591af7014SMax Reitz         bs->full_open_options = opts;
649691af7014SMax Reitz     }
649791af7014SMax Reitz 
649891af7014SMax Reitz     if (bs->exact_filename[0]) {
649991af7014SMax Reitz         pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
650091af7014SMax Reitz     } else if (bs->full_open_options) {
650191af7014SMax Reitz         QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
650291af7014SMax Reitz         snprintf(bs->filename, sizeof(bs->filename), "json:%s",
650391af7014SMax Reitz                  qstring_get_str(json));
650491af7014SMax Reitz         QDECREF(json);
650591af7014SMax Reitz     }
650691af7014SMax Reitz }
65075366d0c8SBenoît Canet 
65085366d0c8SBenoît Canet /* This accessor function purpose is to allow the device models to access the
65095366d0c8SBenoît Canet  * BlockAcctStats structure embedded inside a BlockDriverState without being
65105366d0c8SBenoît Canet  * aware of the BlockDriverState structure layout.
65115366d0c8SBenoît Canet  * It will go away when the BlockAcctStats structure will be moved inside
65125366d0c8SBenoît Canet  * the device models.
65135366d0c8SBenoît Canet  */
65145366d0c8SBenoît Canet BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
65155366d0c8SBenoît Canet {
65165366d0c8SBenoît Canet     return &bs->stats;
65175366d0c8SBenoît Canet }
6518