xref: /openbmc/qemu/block.c (revision 6913c0c2ce00c0e886b2bd20b05073090fa5308a)
1fc01f7e7Sbellard /*
2fc01f7e7Sbellard  * QEMU System Emulator block driver
3fc01f7e7Sbellard  *
4fc01f7e7Sbellard  * Copyright (c) 2003 Fabrice Bellard
5fc01f7e7Sbellard  *
6fc01f7e7Sbellard  * Permission is hereby granted, free of charge, to any person obtaining a copy
7fc01f7e7Sbellard  * of this software and associated documentation files (the "Software"), to deal
8fc01f7e7Sbellard  * in the Software without restriction, including without limitation the rights
9fc01f7e7Sbellard  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10fc01f7e7Sbellard  * copies of the Software, and to permit persons to whom the Software is
11fc01f7e7Sbellard  * furnished to do so, subject to the following conditions:
12fc01f7e7Sbellard  *
13fc01f7e7Sbellard  * The above copyright notice and this permission notice shall be included in
14fc01f7e7Sbellard  * all copies or substantial portions of the Software.
15fc01f7e7Sbellard  *
16fc01f7e7Sbellard  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17fc01f7e7Sbellard  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18fc01f7e7Sbellard  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19fc01f7e7Sbellard  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20fc01f7e7Sbellard  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21fc01f7e7Sbellard  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22fc01f7e7Sbellard  * THE SOFTWARE.
23fc01f7e7Sbellard  */
243990d09aSblueswir1 #include "config-host.h"
25faf07963Spbrook #include "qemu-common.h"
266d519a5fSStefan Hajnoczi #include "trace.h"
2783c9089eSPaolo Bonzini #include "monitor/monitor.h"
28737e150eSPaolo Bonzini #include "block/block_int.h"
29737e150eSPaolo Bonzini #include "block/blockjob.h"
301de7afc9SPaolo Bonzini #include "qemu/module.h"
317b1b5d19SPaolo Bonzini #include "qapi/qmp/qjson.h"
329c17d615SPaolo Bonzini #include "sysemu/sysemu.h"
331de7afc9SPaolo Bonzini #include "qemu/notify.h"
34737e150eSPaolo Bonzini #include "block/coroutine.h"
35b2023818SLuiz Capitulino #include "qmp-commands.h"
361de7afc9SPaolo Bonzini #include "qemu/timer.h"
37fc01f7e7Sbellard 
3871e72a19SJuan Quintela #ifdef CONFIG_BSD
397674e7bfSbellard #include <sys/types.h>
407674e7bfSbellard #include <sys/stat.h>
417674e7bfSbellard #include <sys/ioctl.h>
4272cf2d4fSBlue Swirl #include <sys/queue.h>
43c5e97233Sblueswir1 #ifndef __DragonFly__
447674e7bfSbellard #include <sys/disk.h>
457674e7bfSbellard #endif
46c5e97233Sblueswir1 #endif
477674e7bfSbellard 
4849dc768dSaliguori #ifdef _WIN32
4949dc768dSaliguori #include <windows.h>
5049dc768dSaliguori #endif
5149dc768dSaliguori 
52e4654d2dSFam Zheng struct BdrvDirtyBitmap {
53e4654d2dSFam Zheng     HBitmap *bitmap;
54e4654d2dSFam Zheng     QLIST_ENTRY(BdrvDirtyBitmap) list;
55e4654d2dSFam Zheng };
56e4654d2dSFam Zheng 
571c9805a3SStefan Hajnoczi #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
581c9805a3SStefan Hajnoczi 
597d4b4ba5SMarkus Armbruster static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
60f141eafeSaliguori static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
61f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
62c87c0672Saliguori         BlockDriverCompletionFunc *cb, void *opaque);
63f141eafeSaliguori static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
64f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
65ce1a14dcSpbrook         BlockDriverCompletionFunc *cb, void *opaque);
66f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
67f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
68f9f05dc5SKevin Wolf                                          QEMUIOVector *iov);
69f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
70f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
71f9f05dc5SKevin Wolf                                          QEMUIOVector *iov);
72c5fbe571SStefan Hajnoczi static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
73470c0504SStefan Hajnoczi     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74470c0504SStefan Hajnoczi     BdrvRequestFlags flags);
751c9805a3SStefan Hajnoczi static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
76f08f2ddaSStefan Hajnoczi     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
77f08f2ddaSStefan Hajnoczi     BdrvRequestFlags flags);
78b2a61371SStefan Hajnoczi static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
79b2a61371SStefan Hajnoczi                                                int64_t sector_num,
80b2a61371SStefan Hajnoczi                                                QEMUIOVector *qiov,
81b2a61371SStefan Hajnoczi                                                int nb_sectors,
82d20d9b7cSPaolo Bonzini                                                BdrvRequestFlags flags,
83b2a61371SStefan Hajnoczi                                                BlockDriverCompletionFunc *cb,
84b2a61371SStefan Hajnoczi                                                void *opaque,
858c5873d6SStefan Hajnoczi                                                bool is_write);
86b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque);
87621f0589SKevin Wolf static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
88aa7bfbffSPeter Lieven     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
89ec530c81Sbellard 
901b7bdbc1SStefan Hajnoczi static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
911b7bdbc1SStefan Hajnoczi     QTAILQ_HEAD_INITIALIZER(bdrv_states);
927ee930d0Sblueswir1 
93dc364f4cSBenoît Canet static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
94dc364f4cSBenoît Canet     QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
95dc364f4cSBenoît Canet 
968a22f02aSStefan Hajnoczi static QLIST_HEAD(, BlockDriver) bdrv_drivers =
978a22f02aSStefan Hajnoczi     QLIST_HEAD_INITIALIZER(bdrv_drivers);
98ea2384d3Sbellard 
99eb852011SMarkus Armbruster /* If non-zero, use only whitelisted block drivers */
100eb852011SMarkus Armbruster static int use_bdrv_whitelist;
101eb852011SMarkus Armbruster 
1029e0b22f4SStefan Hajnoczi #ifdef _WIN32
1039e0b22f4SStefan Hajnoczi static int is_windows_drive_prefix(const char *filename)
1049e0b22f4SStefan Hajnoczi {
1059e0b22f4SStefan Hajnoczi     return (((filename[0] >= 'a' && filename[0] <= 'z') ||
1069e0b22f4SStefan Hajnoczi              (filename[0] >= 'A' && filename[0] <= 'Z')) &&
1079e0b22f4SStefan Hajnoczi             filename[1] == ':');
1089e0b22f4SStefan Hajnoczi }
1099e0b22f4SStefan Hajnoczi 
1109e0b22f4SStefan Hajnoczi int is_windows_drive(const char *filename)
1119e0b22f4SStefan Hajnoczi {
1129e0b22f4SStefan Hajnoczi     if (is_windows_drive_prefix(filename) &&
1139e0b22f4SStefan Hajnoczi         filename[2] == '\0')
1149e0b22f4SStefan Hajnoczi         return 1;
1159e0b22f4SStefan Hajnoczi     if (strstart(filename, "\\\\.\\", NULL) ||
1169e0b22f4SStefan Hajnoczi         strstart(filename, "//./", NULL))
1179e0b22f4SStefan Hajnoczi         return 1;
1189e0b22f4SStefan Hajnoczi     return 0;
1199e0b22f4SStefan Hajnoczi }
1209e0b22f4SStefan Hajnoczi #endif
1219e0b22f4SStefan Hajnoczi 
1220563e191SZhi Yong Wu /* throttling disk I/O limits */
123cc0681c4SBenoît Canet void bdrv_set_io_limits(BlockDriverState *bs,
124cc0681c4SBenoît Canet                         ThrottleConfig *cfg)
125cc0681c4SBenoît Canet {
126cc0681c4SBenoît Canet     int i;
127cc0681c4SBenoît Canet 
128cc0681c4SBenoît Canet     throttle_config(&bs->throttle_state, cfg);
129cc0681c4SBenoît Canet 
130cc0681c4SBenoît Canet     for (i = 0; i < 2; i++) {
131cc0681c4SBenoît Canet         qemu_co_enter_next(&bs->throttled_reqs[i]);
132cc0681c4SBenoît Canet     }
133cc0681c4SBenoît Canet }
134cc0681c4SBenoît Canet 
135cc0681c4SBenoît Canet /* this function drain all the throttled IOs */
136cc0681c4SBenoît Canet static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
137cc0681c4SBenoît Canet {
138cc0681c4SBenoît Canet     bool drained = false;
139cc0681c4SBenoît Canet     bool enabled = bs->io_limits_enabled;
140cc0681c4SBenoît Canet     int i;
141cc0681c4SBenoît Canet 
142cc0681c4SBenoît Canet     bs->io_limits_enabled = false;
143cc0681c4SBenoît Canet 
144cc0681c4SBenoît Canet     for (i = 0; i < 2; i++) {
145cc0681c4SBenoît Canet         while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
146cc0681c4SBenoît Canet             drained = true;
147cc0681c4SBenoît Canet         }
148cc0681c4SBenoît Canet     }
149cc0681c4SBenoît Canet 
150cc0681c4SBenoît Canet     bs->io_limits_enabled = enabled;
151cc0681c4SBenoît Canet 
152cc0681c4SBenoît Canet     return drained;
153cc0681c4SBenoît Canet }
154cc0681c4SBenoît Canet 
15598f90dbaSZhi Yong Wu void bdrv_io_limits_disable(BlockDriverState *bs)
15698f90dbaSZhi Yong Wu {
15798f90dbaSZhi Yong Wu     bs->io_limits_enabled = false;
15898f90dbaSZhi Yong Wu 
159cc0681c4SBenoît Canet     bdrv_start_throttled_reqs(bs);
16098f90dbaSZhi Yong Wu 
161cc0681c4SBenoît Canet     throttle_destroy(&bs->throttle_state);
16298f90dbaSZhi Yong Wu }
16398f90dbaSZhi Yong Wu 
164cc0681c4SBenoît Canet static void bdrv_throttle_read_timer_cb(void *opaque)
1650563e191SZhi Yong Wu {
1660563e191SZhi Yong Wu     BlockDriverState *bs = opaque;
167cc0681c4SBenoît Canet     qemu_co_enter_next(&bs->throttled_reqs[0]);
1680563e191SZhi Yong Wu }
1690563e191SZhi Yong Wu 
170cc0681c4SBenoît Canet static void bdrv_throttle_write_timer_cb(void *opaque)
171cc0681c4SBenoît Canet {
172cc0681c4SBenoît Canet     BlockDriverState *bs = opaque;
173cc0681c4SBenoît Canet     qemu_co_enter_next(&bs->throttled_reqs[1]);
174cc0681c4SBenoît Canet }
175cc0681c4SBenoît Canet 
176cc0681c4SBenoît Canet /* should be called before bdrv_set_io_limits if a limit is set */
1770563e191SZhi Yong Wu void bdrv_io_limits_enable(BlockDriverState *bs)
1780563e191SZhi Yong Wu {
179cc0681c4SBenoît Canet     assert(!bs->io_limits_enabled);
180cc0681c4SBenoît Canet     throttle_init(&bs->throttle_state,
181cc0681c4SBenoît Canet                   QEMU_CLOCK_VIRTUAL,
182cc0681c4SBenoît Canet                   bdrv_throttle_read_timer_cb,
183cc0681c4SBenoît Canet                   bdrv_throttle_write_timer_cb,
184cc0681c4SBenoît Canet                   bs);
1850563e191SZhi Yong Wu     bs->io_limits_enabled = true;
1860563e191SZhi Yong Wu }
1870563e191SZhi Yong Wu 
188cc0681c4SBenoît Canet /* This function makes an IO wait if needed
189cc0681c4SBenoît Canet  *
190cc0681c4SBenoît Canet  * @nb_sectors: the number of sectors of the IO
191cc0681c4SBenoît Canet  * @is_write:   is the IO a write
19298f90dbaSZhi Yong Wu  */
193cc0681c4SBenoît Canet static void bdrv_io_limits_intercept(BlockDriverState *bs,
194cc0681c4SBenoît Canet                                      int nb_sectors,
195cc0681c4SBenoît Canet                                      bool is_write)
196cc0681c4SBenoît Canet {
197cc0681c4SBenoît Canet     /* does this io must wait */
198cc0681c4SBenoît Canet     bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
19998f90dbaSZhi Yong Wu 
200cc0681c4SBenoît Canet     /* if must wait or any request of this type throttled queue the IO */
201cc0681c4SBenoît Canet     if (must_wait ||
202cc0681c4SBenoît Canet         !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
203cc0681c4SBenoît Canet         qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
20498f90dbaSZhi Yong Wu     }
20598f90dbaSZhi Yong Wu 
206cc0681c4SBenoît Canet     /* the IO will be executed, do the accounting */
207cc0681c4SBenoît Canet     throttle_account(&bs->throttle_state,
208cc0681c4SBenoît Canet                      is_write,
209cc0681c4SBenoît Canet                      nb_sectors * BDRV_SECTOR_SIZE);
210cc0681c4SBenoît Canet 
211cc0681c4SBenoît Canet     /* if the next request must wait -> do nothing */
212cc0681c4SBenoît Canet     if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
213cc0681c4SBenoît Canet         return;
214cc0681c4SBenoît Canet     }
215cc0681c4SBenoît Canet 
216cc0681c4SBenoît Canet     /* else queue next request for execution */
217cc0681c4SBenoît Canet     qemu_co_queue_next(&bs->throttled_reqs[is_write]);
21898f90dbaSZhi Yong Wu }
21998f90dbaSZhi Yong Wu 
2209e0b22f4SStefan Hajnoczi /* check if the path starts with "<protocol>:" */
2219e0b22f4SStefan Hajnoczi static int path_has_protocol(const char *path)
2229e0b22f4SStefan Hajnoczi {
223947995c0SPaolo Bonzini     const char *p;
224947995c0SPaolo Bonzini 
2259e0b22f4SStefan Hajnoczi #ifdef _WIN32
2269e0b22f4SStefan Hajnoczi     if (is_windows_drive(path) ||
2279e0b22f4SStefan Hajnoczi         is_windows_drive_prefix(path)) {
2289e0b22f4SStefan Hajnoczi         return 0;
2299e0b22f4SStefan Hajnoczi     }
230947995c0SPaolo Bonzini     p = path + strcspn(path, ":/\\");
231947995c0SPaolo Bonzini #else
232947995c0SPaolo Bonzini     p = path + strcspn(path, ":/");
2339e0b22f4SStefan Hajnoczi #endif
2349e0b22f4SStefan Hajnoczi 
235947995c0SPaolo Bonzini     return *p == ':';
2369e0b22f4SStefan Hajnoczi }
2379e0b22f4SStefan Hajnoczi 
23883f64091Sbellard int path_is_absolute(const char *path)
23983f64091Sbellard {
24021664424Sbellard #ifdef _WIN32
24121664424Sbellard     /* specific case for names like: "\\.\d:" */
242f53f4da9SPaolo Bonzini     if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
24321664424Sbellard         return 1;
244f53f4da9SPaolo Bonzini     }
245f53f4da9SPaolo Bonzini     return (*path == '/' || *path == '\\');
2463b9f94e1Sbellard #else
247f53f4da9SPaolo Bonzini     return (*path == '/');
2483b9f94e1Sbellard #endif
24983f64091Sbellard }
25083f64091Sbellard 
25183f64091Sbellard /* if filename is absolute, just copy it to dest. Otherwise, build a
25283f64091Sbellard    path to it by considering it is relative to base_path. URL are
25383f64091Sbellard    supported. */
25483f64091Sbellard void path_combine(char *dest, int dest_size,
25583f64091Sbellard                   const char *base_path,
25683f64091Sbellard                   const char *filename)
25783f64091Sbellard {
25883f64091Sbellard     const char *p, *p1;
25983f64091Sbellard     int len;
26083f64091Sbellard 
26183f64091Sbellard     if (dest_size <= 0)
26283f64091Sbellard         return;
26383f64091Sbellard     if (path_is_absolute(filename)) {
26483f64091Sbellard         pstrcpy(dest, dest_size, filename);
26583f64091Sbellard     } else {
26683f64091Sbellard         p = strchr(base_path, ':');
26783f64091Sbellard         if (p)
26883f64091Sbellard             p++;
26983f64091Sbellard         else
27083f64091Sbellard             p = base_path;
2713b9f94e1Sbellard         p1 = strrchr(base_path, '/');
2723b9f94e1Sbellard #ifdef _WIN32
2733b9f94e1Sbellard         {
2743b9f94e1Sbellard             const char *p2;
2753b9f94e1Sbellard             p2 = strrchr(base_path, '\\');
2763b9f94e1Sbellard             if (!p1 || p2 > p1)
2773b9f94e1Sbellard                 p1 = p2;
2783b9f94e1Sbellard         }
2793b9f94e1Sbellard #endif
28083f64091Sbellard         if (p1)
28183f64091Sbellard             p1++;
28283f64091Sbellard         else
28383f64091Sbellard             p1 = base_path;
28483f64091Sbellard         if (p1 > p)
28583f64091Sbellard             p = p1;
28683f64091Sbellard         len = p - base_path;
28783f64091Sbellard         if (len > dest_size - 1)
28883f64091Sbellard             len = dest_size - 1;
28983f64091Sbellard         memcpy(dest, base_path, len);
29083f64091Sbellard         dest[len] = '\0';
29183f64091Sbellard         pstrcat(dest, dest_size, filename);
29283f64091Sbellard     }
29383f64091Sbellard }
29483f64091Sbellard 
295dc5a1371SPaolo Bonzini void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
296dc5a1371SPaolo Bonzini {
297dc5a1371SPaolo Bonzini     if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
298dc5a1371SPaolo Bonzini         pstrcpy(dest, sz, bs->backing_file);
299dc5a1371SPaolo Bonzini     } else {
300dc5a1371SPaolo Bonzini         path_combine(dest, sz, bs->filename, bs->backing_file);
301dc5a1371SPaolo Bonzini     }
302dc5a1371SPaolo Bonzini }
303dc5a1371SPaolo Bonzini 
3045efa9d5aSAnthony Liguori void bdrv_register(BlockDriver *bdrv)
305ea2384d3Sbellard {
3068c5873d6SStefan Hajnoczi     /* Block drivers without coroutine functions need emulation */
3078c5873d6SStefan Hajnoczi     if (!bdrv->bdrv_co_readv) {
308f9f05dc5SKevin Wolf         bdrv->bdrv_co_readv = bdrv_co_readv_em;
309f9f05dc5SKevin Wolf         bdrv->bdrv_co_writev = bdrv_co_writev_em;
310f9f05dc5SKevin Wolf 
311f8c35c1dSStefan Hajnoczi         /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
312f8c35c1dSStefan Hajnoczi          * the block driver lacks aio we need to emulate that too.
313f8c35c1dSStefan Hajnoczi          */
314f9f05dc5SKevin Wolf         if (!bdrv->bdrv_aio_readv) {
31583f64091Sbellard             /* add AIO emulation layer */
316f141eafeSaliguori             bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
317f141eafeSaliguori             bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
31883f64091Sbellard         }
319f9f05dc5SKevin Wolf     }
320b2e12bc6SChristoph Hellwig 
3218a22f02aSStefan Hajnoczi     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
322ea2384d3Sbellard }
323b338082bSbellard 
324b338082bSbellard /* create a new block device (by default it is empty) */
325b338082bSbellard BlockDriverState *bdrv_new(const char *device_name)
326fc01f7e7Sbellard {
3271b7bdbc1SStefan Hajnoczi     BlockDriverState *bs;
328b338082bSbellard 
3297267c094SAnthony Liguori     bs = g_malloc0(sizeof(BlockDriverState));
330e4654d2dSFam Zheng     QLIST_INIT(&bs->dirty_bitmaps);
331b338082bSbellard     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
332ea2384d3Sbellard     if (device_name[0] != '\0') {
333dc364f4cSBenoît Canet         QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
334ea2384d3Sbellard     }
33528a7282aSLuiz Capitulino     bdrv_iostatus_disable(bs);
336d7d512f6SPaolo Bonzini     notifier_list_init(&bs->close_notifiers);
337d616b224SStefan Hajnoczi     notifier_with_return_list_init(&bs->before_write_notifiers);
338cc0681c4SBenoît Canet     qemu_co_queue_init(&bs->throttled_reqs[0]);
339cc0681c4SBenoît Canet     qemu_co_queue_init(&bs->throttled_reqs[1]);
3409fcb0251SFam Zheng     bs->refcnt = 1;
341d7d512f6SPaolo Bonzini 
342b338082bSbellard     return bs;
343b338082bSbellard }
344b338082bSbellard 
345d7d512f6SPaolo Bonzini void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
346d7d512f6SPaolo Bonzini {
347d7d512f6SPaolo Bonzini     notifier_list_add(&bs->close_notifiers, notify);
348d7d512f6SPaolo Bonzini }
349d7d512f6SPaolo Bonzini 
350ea2384d3Sbellard BlockDriver *bdrv_find_format(const char *format_name)
351ea2384d3Sbellard {
352ea2384d3Sbellard     BlockDriver *drv1;
3538a22f02aSStefan Hajnoczi     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
3548a22f02aSStefan Hajnoczi         if (!strcmp(drv1->format_name, format_name)) {
355ea2384d3Sbellard             return drv1;
356ea2384d3Sbellard         }
3578a22f02aSStefan Hajnoczi     }
358ea2384d3Sbellard     return NULL;
359ea2384d3Sbellard }
360ea2384d3Sbellard 
361b64ec4e4SFam Zheng static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
362eb852011SMarkus Armbruster {
363b64ec4e4SFam Zheng     static const char *whitelist_rw[] = {
364b64ec4e4SFam Zheng         CONFIG_BDRV_RW_WHITELIST
365b64ec4e4SFam Zheng     };
366b64ec4e4SFam Zheng     static const char *whitelist_ro[] = {
367b64ec4e4SFam Zheng         CONFIG_BDRV_RO_WHITELIST
368eb852011SMarkus Armbruster     };
369eb852011SMarkus Armbruster     const char **p;
370eb852011SMarkus Armbruster 
371b64ec4e4SFam Zheng     if (!whitelist_rw[0] && !whitelist_ro[0]) {
372eb852011SMarkus Armbruster         return 1;               /* no whitelist, anything goes */
373b64ec4e4SFam Zheng     }
374eb852011SMarkus Armbruster 
375b64ec4e4SFam Zheng     for (p = whitelist_rw; *p; p++) {
376eb852011SMarkus Armbruster         if (!strcmp(drv->format_name, *p)) {
377eb852011SMarkus Armbruster             return 1;
378eb852011SMarkus Armbruster         }
379eb852011SMarkus Armbruster     }
380b64ec4e4SFam Zheng     if (read_only) {
381b64ec4e4SFam Zheng         for (p = whitelist_ro; *p; p++) {
382b64ec4e4SFam Zheng             if (!strcmp(drv->format_name, *p)) {
383b64ec4e4SFam Zheng                 return 1;
384b64ec4e4SFam Zheng             }
385b64ec4e4SFam Zheng         }
386b64ec4e4SFam Zheng     }
387eb852011SMarkus Armbruster     return 0;
388eb852011SMarkus Armbruster }
389eb852011SMarkus Armbruster 
390b64ec4e4SFam Zheng BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
391b64ec4e4SFam Zheng                                           bool read_only)
392eb852011SMarkus Armbruster {
393eb852011SMarkus Armbruster     BlockDriver *drv = bdrv_find_format(format_name);
394b64ec4e4SFam Zheng     return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
395eb852011SMarkus Armbruster }
396eb852011SMarkus Armbruster 
3975b7e1542SZhi Yong Wu typedef struct CreateCo {
3985b7e1542SZhi Yong Wu     BlockDriver *drv;
3995b7e1542SZhi Yong Wu     char *filename;
4005b7e1542SZhi Yong Wu     QEMUOptionParameter *options;
4015b7e1542SZhi Yong Wu     int ret;
402cc84d90fSMax Reitz     Error *err;
4035b7e1542SZhi Yong Wu } CreateCo;
4045b7e1542SZhi Yong Wu 
4055b7e1542SZhi Yong Wu static void coroutine_fn bdrv_create_co_entry(void *opaque)
4065b7e1542SZhi Yong Wu {
407cc84d90fSMax Reitz     Error *local_err = NULL;
408cc84d90fSMax Reitz     int ret;
409cc84d90fSMax Reitz 
4105b7e1542SZhi Yong Wu     CreateCo *cco = opaque;
4115b7e1542SZhi Yong Wu     assert(cco->drv);
4125b7e1542SZhi Yong Wu 
413cc84d90fSMax Reitz     ret = cco->drv->bdrv_create(cco->filename, cco->options, &local_err);
414cc84d90fSMax Reitz     if (error_is_set(&local_err)) {
415cc84d90fSMax Reitz         error_propagate(&cco->err, local_err);
416cc84d90fSMax Reitz     }
417cc84d90fSMax Reitz     cco->ret = ret;
4185b7e1542SZhi Yong Wu }
4195b7e1542SZhi Yong Wu 
4200e7e1989SKevin Wolf int bdrv_create(BlockDriver *drv, const char* filename,
421cc84d90fSMax Reitz     QEMUOptionParameter *options, Error **errp)
422ea2384d3Sbellard {
4235b7e1542SZhi Yong Wu     int ret;
4240e7e1989SKevin Wolf 
4255b7e1542SZhi Yong Wu     Coroutine *co;
4265b7e1542SZhi Yong Wu     CreateCo cco = {
4275b7e1542SZhi Yong Wu         .drv = drv,
4285b7e1542SZhi Yong Wu         .filename = g_strdup(filename),
4295b7e1542SZhi Yong Wu         .options = options,
4305b7e1542SZhi Yong Wu         .ret = NOT_DONE,
431cc84d90fSMax Reitz         .err = NULL,
4325b7e1542SZhi Yong Wu     };
4335b7e1542SZhi Yong Wu 
4345b7e1542SZhi Yong Wu     if (!drv->bdrv_create) {
435cc84d90fSMax Reitz         error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
43680168bffSLuiz Capitulino         ret = -ENOTSUP;
43780168bffSLuiz Capitulino         goto out;
4385b7e1542SZhi Yong Wu     }
4395b7e1542SZhi Yong Wu 
4405b7e1542SZhi Yong Wu     if (qemu_in_coroutine()) {
4415b7e1542SZhi Yong Wu         /* Fast-path if already in coroutine context */
4425b7e1542SZhi Yong Wu         bdrv_create_co_entry(&cco);
4435b7e1542SZhi Yong Wu     } else {
4445b7e1542SZhi Yong Wu         co = qemu_coroutine_create(bdrv_create_co_entry);
4455b7e1542SZhi Yong Wu         qemu_coroutine_enter(co, &cco);
4465b7e1542SZhi Yong Wu         while (cco.ret == NOT_DONE) {
4475b7e1542SZhi Yong Wu             qemu_aio_wait();
4485b7e1542SZhi Yong Wu         }
4495b7e1542SZhi Yong Wu     }
4505b7e1542SZhi Yong Wu 
4515b7e1542SZhi Yong Wu     ret = cco.ret;
452cc84d90fSMax Reitz     if (ret < 0) {
453cc84d90fSMax Reitz         if (error_is_set(&cco.err)) {
454cc84d90fSMax Reitz             error_propagate(errp, cco.err);
455cc84d90fSMax Reitz         } else {
456cc84d90fSMax Reitz             error_setg_errno(errp, -ret, "Could not create image");
457cc84d90fSMax Reitz         }
458cc84d90fSMax Reitz     }
4595b7e1542SZhi Yong Wu 
46080168bffSLuiz Capitulino out:
46180168bffSLuiz Capitulino     g_free(cco.filename);
4625b7e1542SZhi Yong Wu     return ret;
463ea2384d3Sbellard }
464ea2384d3Sbellard 
465cc84d90fSMax Reitz int bdrv_create_file(const char* filename, QEMUOptionParameter *options,
466cc84d90fSMax Reitz                      Error **errp)
46784a12e66SChristoph Hellwig {
46884a12e66SChristoph Hellwig     BlockDriver *drv;
469cc84d90fSMax Reitz     Error *local_err = NULL;
470cc84d90fSMax Reitz     int ret;
47184a12e66SChristoph Hellwig 
47298289620SKevin Wolf     drv = bdrv_find_protocol(filename, true);
47384a12e66SChristoph Hellwig     if (drv == NULL) {
474cc84d90fSMax Reitz         error_setg(errp, "Could not find protocol for file '%s'", filename);
47516905d71SStefan Hajnoczi         return -ENOENT;
47684a12e66SChristoph Hellwig     }
47784a12e66SChristoph Hellwig 
478cc84d90fSMax Reitz     ret = bdrv_create(drv, filename, options, &local_err);
479cc84d90fSMax Reitz     if (error_is_set(&local_err)) {
480cc84d90fSMax Reitz         error_propagate(errp, local_err);
481cc84d90fSMax Reitz     }
482cc84d90fSMax Reitz     return ret;
48384a12e66SChristoph Hellwig }
48484a12e66SChristoph Hellwig 
485eba25057SJim Meyering /*
486eba25057SJim Meyering  * Create a uniquely-named empty temporary file.
487eba25057SJim Meyering  * Return 0 upon success, otherwise a negative errno value.
488eba25057SJim Meyering  */
489eba25057SJim Meyering int get_tmp_filename(char *filename, int size)
490eba25057SJim Meyering {
491d5249393Sbellard #ifdef _WIN32
4923b9f94e1Sbellard     char temp_dir[MAX_PATH];
493eba25057SJim Meyering     /* GetTempFileName requires that its output buffer (4th param)
494eba25057SJim Meyering        have length MAX_PATH or greater.  */
495eba25057SJim Meyering     assert(size >= MAX_PATH);
496eba25057SJim Meyering     return (GetTempPath(MAX_PATH, temp_dir)
497eba25057SJim Meyering             && GetTempFileName(temp_dir, "qem", 0, filename)
498eba25057SJim Meyering             ? 0 : -GetLastError());
499d5249393Sbellard #else
500ea2384d3Sbellard     int fd;
5017ccfb2ebSblueswir1     const char *tmpdir;
5020badc1eeSaurel32     tmpdir = getenv("TMPDIR");
5030badc1eeSaurel32     if (!tmpdir)
5040badc1eeSaurel32         tmpdir = "/tmp";
505eba25057SJim Meyering     if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
506eba25057SJim Meyering         return -EOVERFLOW;
507ea2384d3Sbellard     }
508eba25057SJim Meyering     fd = mkstemp(filename);
509fe235a06SDunrong Huang     if (fd < 0) {
510fe235a06SDunrong Huang         return -errno;
511fe235a06SDunrong Huang     }
512fe235a06SDunrong Huang     if (close(fd) != 0) {
513fe235a06SDunrong Huang         unlink(filename);
514eba25057SJim Meyering         return -errno;
515eba25057SJim Meyering     }
516eba25057SJim Meyering     return 0;
517d5249393Sbellard #endif
518eba25057SJim Meyering }
519ea2384d3Sbellard 
520f3a5d3f8SChristoph Hellwig /*
521f3a5d3f8SChristoph Hellwig  * Detect host devices. By convention, /dev/cdrom[N] is always
522f3a5d3f8SChristoph Hellwig  * recognized as a host CDROM.
523f3a5d3f8SChristoph Hellwig  */
524f3a5d3f8SChristoph Hellwig static BlockDriver *find_hdev_driver(const char *filename)
525f3a5d3f8SChristoph Hellwig {
526508c7cb3SChristoph Hellwig     int score_max = 0, score;
527508c7cb3SChristoph Hellwig     BlockDriver *drv = NULL, *d;
528f3a5d3f8SChristoph Hellwig 
5298a22f02aSStefan Hajnoczi     QLIST_FOREACH(d, &bdrv_drivers, list) {
530508c7cb3SChristoph Hellwig         if (d->bdrv_probe_device) {
531508c7cb3SChristoph Hellwig             score = d->bdrv_probe_device(filename);
532508c7cb3SChristoph Hellwig             if (score > score_max) {
533508c7cb3SChristoph Hellwig                 score_max = score;
534508c7cb3SChristoph Hellwig                 drv = d;
535f3a5d3f8SChristoph Hellwig             }
536508c7cb3SChristoph Hellwig         }
537f3a5d3f8SChristoph Hellwig     }
538f3a5d3f8SChristoph Hellwig 
539508c7cb3SChristoph Hellwig     return drv;
540f3a5d3f8SChristoph Hellwig }
541f3a5d3f8SChristoph Hellwig 
54298289620SKevin Wolf BlockDriver *bdrv_find_protocol(const char *filename,
54398289620SKevin Wolf                                 bool allow_protocol_prefix)
54484a12e66SChristoph Hellwig {
54584a12e66SChristoph Hellwig     BlockDriver *drv1;
54684a12e66SChristoph Hellwig     char protocol[128];
54784a12e66SChristoph Hellwig     int len;
54884a12e66SChristoph Hellwig     const char *p;
54984a12e66SChristoph Hellwig 
55066f82ceeSKevin Wolf     /* TODO Drivers without bdrv_file_open must be specified explicitly */
55166f82ceeSKevin Wolf 
55239508e7aSChristoph Hellwig     /*
55339508e7aSChristoph Hellwig      * XXX(hch): we really should not let host device detection
55439508e7aSChristoph Hellwig      * override an explicit protocol specification, but moving this
55539508e7aSChristoph Hellwig      * later breaks access to device names with colons in them.
55639508e7aSChristoph Hellwig      * Thanks to the brain-dead persistent naming schemes on udev-
55739508e7aSChristoph Hellwig      * based Linux systems those actually are quite common.
55839508e7aSChristoph Hellwig      */
55984a12e66SChristoph Hellwig     drv1 = find_hdev_driver(filename);
56039508e7aSChristoph Hellwig     if (drv1) {
56184a12e66SChristoph Hellwig         return drv1;
56284a12e66SChristoph Hellwig     }
56339508e7aSChristoph Hellwig 
56498289620SKevin Wolf     if (!path_has_protocol(filename) || !allow_protocol_prefix) {
56539508e7aSChristoph Hellwig         return bdrv_find_format("file");
56639508e7aSChristoph Hellwig     }
56798289620SKevin Wolf 
5689e0b22f4SStefan Hajnoczi     p = strchr(filename, ':');
5699e0b22f4SStefan Hajnoczi     assert(p != NULL);
57084a12e66SChristoph Hellwig     len = p - filename;
57184a12e66SChristoph Hellwig     if (len > sizeof(protocol) - 1)
57284a12e66SChristoph Hellwig         len = sizeof(protocol) - 1;
57384a12e66SChristoph Hellwig     memcpy(protocol, filename, len);
57484a12e66SChristoph Hellwig     protocol[len] = '\0';
57584a12e66SChristoph Hellwig     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
57684a12e66SChristoph Hellwig         if (drv1->protocol_name &&
57784a12e66SChristoph Hellwig             !strcmp(drv1->protocol_name, protocol)) {
57884a12e66SChristoph Hellwig             return drv1;
57984a12e66SChristoph Hellwig         }
58084a12e66SChristoph Hellwig     }
58184a12e66SChristoph Hellwig     return NULL;
58284a12e66SChristoph Hellwig }
58384a12e66SChristoph Hellwig 
584f500a6d3SKevin Wolf static int find_image_format(BlockDriverState *bs, const char *filename,
58534b5d2c6SMax Reitz                              BlockDriver **pdrv, Error **errp)
586ea2384d3Sbellard {
587f500a6d3SKevin Wolf     int score, score_max;
588ea2384d3Sbellard     BlockDriver *drv1, *drv;
58983f64091Sbellard     uint8_t buf[2048];
590f500a6d3SKevin Wolf     int ret = 0;
591f8ea0b00SNicholas Bellinger 
59208a00559SKevin Wolf     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
5938e895599SPaolo Bonzini     if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
594c98ac35dSStefan Weil         drv = bdrv_find_format("raw");
595c98ac35dSStefan Weil         if (!drv) {
59634b5d2c6SMax Reitz             error_setg(errp, "Could not find raw image format");
597c98ac35dSStefan Weil             ret = -ENOENT;
598c98ac35dSStefan Weil         }
599c98ac35dSStefan Weil         *pdrv = drv;
600c98ac35dSStefan Weil         return ret;
6011a396859SNicholas A. Bellinger     }
602f8ea0b00SNicholas Bellinger 
60383f64091Sbellard     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
604ea2384d3Sbellard     if (ret < 0) {
60534b5d2c6SMax Reitz         error_setg_errno(errp, -ret, "Could not read image for determining its "
60634b5d2c6SMax Reitz                          "format");
607c98ac35dSStefan Weil         *pdrv = NULL;
608c98ac35dSStefan Weil         return ret;
609ea2384d3Sbellard     }
610ea2384d3Sbellard 
611ea2384d3Sbellard     score_max = 0;
61284a12e66SChristoph Hellwig     drv = NULL;
6138a22f02aSStefan Hajnoczi     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
61483f64091Sbellard         if (drv1->bdrv_probe) {
615ea2384d3Sbellard             score = drv1->bdrv_probe(buf, ret, filename);
616ea2384d3Sbellard             if (score > score_max) {
617ea2384d3Sbellard                 score_max = score;
618ea2384d3Sbellard                 drv = drv1;
619ea2384d3Sbellard             }
620ea2384d3Sbellard         }
62183f64091Sbellard     }
622c98ac35dSStefan Weil     if (!drv) {
62334b5d2c6SMax Reitz         error_setg(errp, "Could not determine image format: No compatible "
62434b5d2c6SMax Reitz                    "driver found");
625c98ac35dSStefan Weil         ret = -ENOENT;
626c98ac35dSStefan Weil     }
627c98ac35dSStefan Weil     *pdrv = drv;
628c98ac35dSStefan Weil     return ret;
629ea2384d3Sbellard }
630ea2384d3Sbellard 
63151762288SStefan Hajnoczi /**
63251762288SStefan Hajnoczi  * Set the current 'total_sectors' value
63351762288SStefan Hajnoczi  */
63451762288SStefan Hajnoczi static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
63551762288SStefan Hajnoczi {
63651762288SStefan Hajnoczi     BlockDriver *drv = bs->drv;
63751762288SStefan Hajnoczi 
638396759adSNicholas Bellinger     /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
639396759adSNicholas Bellinger     if (bs->sg)
640396759adSNicholas Bellinger         return 0;
641396759adSNicholas Bellinger 
64251762288SStefan Hajnoczi     /* query actual device if possible, otherwise just trust the hint */
64351762288SStefan Hajnoczi     if (drv->bdrv_getlength) {
64451762288SStefan Hajnoczi         int64_t length = drv->bdrv_getlength(bs);
64551762288SStefan Hajnoczi         if (length < 0) {
64651762288SStefan Hajnoczi             return length;
64751762288SStefan Hajnoczi         }
6487e382003SFam Zheng         hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
64951762288SStefan Hajnoczi     }
65051762288SStefan Hajnoczi 
65151762288SStefan Hajnoczi     bs->total_sectors = hint;
65251762288SStefan Hajnoczi     return 0;
65351762288SStefan Hajnoczi }
65451762288SStefan Hajnoczi 
655c3993cdcSStefan Hajnoczi /**
6569e8f1835SPaolo Bonzini  * Set open flags for a given discard mode
6579e8f1835SPaolo Bonzini  *
6589e8f1835SPaolo Bonzini  * Return 0 on success, -1 if the discard mode was invalid.
6599e8f1835SPaolo Bonzini  */
6609e8f1835SPaolo Bonzini int bdrv_parse_discard_flags(const char *mode, int *flags)
6619e8f1835SPaolo Bonzini {
6629e8f1835SPaolo Bonzini     *flags &= ~BDRV_O_UNMAP;
6639e8f1835SPaolo Bonzini 
6649e8f1835SPaolo Bonzini     if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
6659e8f1835SPaolo Bonzini         /* do nothing */
6669e8f1835SPaolo Bonzini     } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
6679e8f1835SPaolo Bonzini         *flags |= BDRV_O_UNMAP;
6689e8f1835SPaolo Bonzini     } else {
6699e8f1835SPaolo Bonzini         return -1;
6709e8f1835SPaolo Bonzini     }
6719e8f1835SPaolo Bonzini 
6729e8f1835SPaolo Bonzini     return 0;
6739e8f1835SPaolo Bonzini }
6749e8f1835SPaolo Bonzini 
6759e8f1835SPaolo Bonzini /**
676c3993cdcSStefan Hajnoczi  * Set open flags for a given cache mode
677c3993cdcSStefan Hajnoczi  *
678c3993cdcSStefan Hajnoczi  * Return 0 on success, -1 if the cache mode was invalid.
679c3993cdcSStefan Hajnoczi  */
680c3993cdcSStefan Hajnoczi int bdrv_parse_cache_flags(const char *mode, int *flags)
681c3993cdcSStefan Hajnoczi {
682c3993cdcSStefan Hajnoczi     *flags &= ~BDRV_O_CACHE_MASK;
683c3993cdcSStefan Hajnoczi 
684c3993cdcSStefan Hajnoczi     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
685c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
68692196b2fSStefan Hajnoczi     } else if (!strcmp(mode, "directsync")) {
68792196b2fSStefan Hajnoczi         *flags |= BDRV_O_NOCACHE;
688c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "writeback")) {
689c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_CACHE_WB;
690c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "unsafe")) {
691c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_CACHE_WB;
692c3993cdcSStefan Hajnoczi         *flags |= BDRV_O_NO_FLUSH;
693c3993cdcSStefan Hajnoczi     } else if (!strcmp(mode, "writethrough")) {
694c3993cdcSStefan Hajnoczi         /* this is the default */
695c3993cdcSStefan Hajnoczi     } else {
696c3993cdcSStefan Hajnoczi         return -1;
697c3993cdcSStefan Hajnoczi     }
698c3993cdcSStefan Hajnoczi 
699c3993cdcSStefan Hajnoczi     return 0;
700c3993cdcSStefan Hajnoczi }
701c3993cdcSStefan Hajnoczi 
70253fec9d3SStefan Hajnoczi /**
70353fec9d3SStefan Hajnoczi  * The copy-on-read flag is actually a reference count so multiple users may
70453fec9d3SStefan Hajnoczi  * use the feature without worrying about clobbering its previous state.
70553fec9d3SStefan Hajnoczi  * Copy-on-read stays enabled until all users have called to disable it.
70653fec9d3SStefan Hajnoczi  */
70753fec9d3SStefan Hajnoczi void bdrv_enable_copy_on_read(BlockDriverState *bs)
70853fec9d3SStefan Hajnoczi {
70953fec9d3SStefan Hajnoczi     bs->copy_on_read++;
71053fec9d3SStefan Hajnoczi }
71153fec9d3SStefan Hajnoczi 
71253fec9d3SStefan Hajnoczi void bdrv_disable_copy_on_read(BlockDriverState *bs)
71353fec9d3SStefan Hajnoczi {
71453fec9d3SStefan Hajnoczi     assert(bs->copy_on_read > 0);
71553fec9d3SStefan Hajnoczi     bs->copy_on_read--;
71653fec9d3SStefan Hajnoczi }
71753fec9d3SStefan Hajnoczi 
7187b272452SKevin Wolf static int bdrv_open_flags(BlockDriverState *bs, int flags)
7197b272452SKevin Wolf {
7207b272452SKevin Wolf     int open_flags = flags | BDRV_O_CACHE_WB;
7217b272452SKevin Wolf 
7227b272452SKevin Wolf     /*
7237b272452SKevin Wolf      * Clear flags that are internal to the block layer before opening the
7247b272452SKevin Wolf      * image.
7257b272452SKevin Wolf      */
7267b272452SKevin Wolf     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
7277b272452SKevin Wolf 
7287b272452SKevin Wolf     /*
7297b272452SKevin Wolf      * Snapshots should be writable.
7307b272452SKevin Wolf      */
7317b272452SKevin Wolf     if (bs->is_temporary) {
7327b272452SKevin Wolf         open_flags |= BDRV_O_RDWR;
7337b272452SKevin Wolf     }
7347b272452SKevin Wolf 
7357b272452SKevin Wolf     return open_flags;
7367b272452SKevin Wolf }
7377b272452SKevin Wolf 
738*6913c0c2SBenoît Canet static int bdrv_assign_node_name(BlockDriverState *bs,
739*6913c0c2SBenoît Canet                                  const char *node_name,
740*6913c0c2SBenoît Canet                                  Error **errp)
741*6913c0c2SBenoît Canet {
742*6913c0c2SBenoît Canet     if (!node_name) {
743*6913c0c2SBenoît Canet         return 0;
744*6913c0c2SBenoît Canet     }
745*6913c0c2SBenoît Canet 
746*6913c0c2SBenoît Canet     /* empty string node name is invalid */
747*6913c0c2SBenoît Canet     if (node_name[0] == '\0') {
748*6913c0c2SBenoît Canet         error_setg(errp, "Empty node name");
749*6913c0c2SBenoît Canet         return -EINVAL;
750*6913c0c2SBenoît Canet     }
751*6913c0c2SBenoît Canet 
752*6913c0c2SBenoît Canet     /* takes care of avoiding duplicates node names */
753*6913c0c2SBenoît Canet     if (bdrv_find_node(node_name)) {
754*6913c0c2SBenoît Canet         error_setg(errp, "Duplicate node name");
755*6913c0c2SBenoît Canet         return -EINVAL;
756*6913c0c2SBenoît Canet     }
757*6913c0c2SBenoît Canet 
758*6913c0c2SBenoît Canet     /* copy node name into the bs and insert it into the graph list */
759*6913c0c2SBenoît Canet     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
760*6913c0c2SBenoît Canet     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
761*6913c0c2SBenoît Canet 
762*6913c0c2SBenoît Canet     return 0;
763*6913c0c2SBenoît Canet }
764*6913c0c2SBenoît Canet 
765b6ce07aaSKevin Wolf /*
76657915332SKevin Wolf  * Common part for opening disk images and files
767b6ad491aSKevin Wolf  *
768b6ad491aSKevin Wolf  * Removes all processed options from *options.
76957915332SKevin Wolf  */
770f500a6d3SKevin Wolf static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
77134b5d2c6SMax Reitz     QDict *options, int flags, BlockDriver *drv, Error **errp)
77257915332SKevin Wolf {
77357915332SKevin Wolf     int ret, open_flags;
774035fccdfSKevin Wolf     const char *filename;
775*6913c0c2SBenoît Canet     const char *node_name = NULL;
77634b5d2c6SMax Reitz     Error *local_err = NULL;
77757915332SKevin Wolf 
77857915332SKevin Wolf     assert(drv != NULL);
7796405875cSPaolo Bonzini     assert(bs->file == NULL);
780707ff828SKevin Wolf     assert(options != NULL && bs->options != options);
78157915332SKevin Wolf 
78245673671SKevin Wolf     if (file != NULL) {
78345673671SKevin Wolf         filename = file->filename;
78445673671SKevin Wolf     } else {
78545673671SKevin Wolf         filename = qdict_get_try_str(options, "filename");
78645673671SKevin Wolf     }
78745673671SKevin Wolf 
78845673671SKevin Wolf     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
78928dcee10SStefan Hajnoczi 
790*6913c0c2SBenoît Canet     node_name = qdict_get_try_str(options, "node-name");
791*6913c0c2SBenoît Canet     ret = bdrv_assign_node_name(bs, node_name, errp);
792*6913c0c2SBenoît Canet     if (ret < 0) {
793*6913c0c2SBenoît Canet         return ret;
794*6913c0c2SBenoît Canet     }
795*6913c0c2SBenoît Canet     qdict_del(options, "node-name");
796*6913c0c2SBenoît Canet 
7975d186eb0SKevin Wolf     /* bdrv_open() with directly using a protocol as drv. This layer is already
7985d186eb0SKevin Wolf      * opened, so assign it to bs (while file becomes a closed BlockDriverState)
7995d186eb0SKevin Wolf      * and return immediately. */
8005d186eb0SKevin Wolf     if (file != NULL && drv->bdrv_file_open) {
8015d186eb0SKevin Wolf         bdrv_swap(file, bs);
8025d186eb0SKevin Wolf         return 0;
8035d186eb0SKevin Wolf     }
8045d186eb0SKevin Wolf 
80557915332SKevin Wolf     bs->open_flags = flags;
80657915332SKevin Wolf     bs->buffer_alignment = 512;
8070d51b4deSAsias He     bs->zero_beyond_eof = true;
808b64ec4e4SFam Zheng     open_flags = bdrv_open_flags(bs, flags);
809b64ec4e4SFam Zheng     bs->read_only = !(open_flags & BDRV_O_RDWR);
810b64ec4e4SFam Zheng 
811b64ec4e4SFam Zheng     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
8128f94a6e4SKevin Wolf         error_setg(errp,
8138f94a6e4SKevin Wolf                    !bs->read_only && bdrv_is_whitelisted(drv, true)
8148f94a6e4SKevin Wolf                         ? "Driver '%s' can only be used for read-only devices"
8158f94a6e4SKevin Wolf                         : "Driver '%s' is not whitelisted",
8168f94a6e4SKevin Wolf                    drv->format_name);
817b64ec4e4SFam Zheng         return -ENOTSUP;
818b64ec4e4SFam Zheng     }
81957915332SKevin Wolf 
82053fec9d3SStefan Hajnoczi     assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
8210ebd24e0SKevin Wolf     if (flags & BDRV_O_COPY_ON_READ) {
8220ebd24e0SKevin Wolf         if (!bs->read_only) {
82353fec9d3SStefan Hajnoczi             bdrv_enable_copy_on_read(bs);
8240ebd24e0SKevin Wolf         } else {
8250ebd24e0SKevin Wolf             error_setg(errp, "Can't use copy-on-read on read-only device");
8260ebd24e0SKevin Wolf             return -EINVAL;
8270ebd24e0SKevin Wolf         }
82853fec9d3SStefan Hajnoczi     }
82953fec9d3SStefan Hajnoczi 
830c2ad1b0cSKevin Wolf     if (filename != NULL) {
83157915332SKevin Wolf         pstrcpy(bs->filename, sizeof(bs->filename), filename);
832c2ad1b0cSKevin Wolf     } else {
833c2ad1b0cSKevin Wolf         bs->filename[0] = '\0';
834c2ad1b0cSKevin Wolf     }
83557915332SKevin Wolf 
83657915332SKevin Wolf     bs->drv = drv;
8377267c094SAnthony Liguori     bs->opaque = g_malloc0(drv->instance_size);
83857915332SKevin Wolf 
83903f541bdSStefan Hajnoczi     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
840e7c63796SStefan Hajnoczi 
84166f82ceeSKevin Wolf     /* Open the image, either directly or using a protocol */
84266f82ceeSKevin Wolf     if (drv->bdrv_file_open) {
8435d186eb0SKevin Wolf         assert(file == NULL);
844030be321SBenoît Canet         assert(!drv->bdrv_needs_filename || filename != NULL);
84534b5d2c6SMax Reitz         ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
846f500a6d3SKevin Wolf     } else {
8472af5ef70SKevin Wolf         if (file == NULL) {
84834b5d2c6SMax Reitz             error_setg(errp, "Can't use '%s' as a block driver for the "
84934b5d2c6SMax Reitz                        "protocol level", drv->format_name);
8502af5ef70SKevin Wolf             ret = -EINVAL;
8512af5ef70SKevin Wolf             goto free_and_fail;
8522af5ef70SKevin Wolf         }
853f500a6d3SKevin Wolf         bs->file = file;
85434b5d2c6SMax Reitz         ret = drv->bdrv_open(bs, options, open_flags, &local_err);
85566f82ceeSKevin Wolf     }
85666f82ceeSKevin Wolf 
85757915332SKevin Wolf     if (ret < 0) {
85834b5d2c6SMax Reitz         if (error_is_set(&local_err)) {
85934b5d2c6SMax Reitz             error_propagate(errp, local_err);
8602fa9aa59SDunrong Huang         } else if (bs->filename[0]) {
8612fa9aa59SDunrong Huang             error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
86234b5d2c6SMax Reitz         } else {
86334b5d2c6SMax Reitz             error_setg_errno(errp, -ret, "Could not open image");
86434b5d2c6SMax Reitz         }
86557915332SKevin Wolf         goto free_and_fail;
86657915332SKevin Wolf     }
86757915332SKevin Wolf 
86851762288SStefan Hajnoczi     ret = refresh_total_sectors(bs, bs->total_sectors);
86951762288SStefan Hajnoczi     if (ret < 0) {
87034b5d2c6SMax Reitz         error_setg_errno(errp, -ret, "Could not refresh total sector count");
87151762288SStefan Hajnoczi         goto free_and_fail;
87257915332SKevin Wolf     }
87351762288SStefan Hajnoczi 
87457915332SKevin Wolf #ifndef _WIN32
87557915332SKevin Wolf     if (bs->is_temporary) {
876d4cea8dfSDunrong Huang         assert(bs->filename[0] != '\0');
877d4cea8dfSDunrong Huang         unlink(bs->filename);
87857915332SKevin Wolf     }
87957915332SKevin Wolf #endif
88057915332SKevin Wolf     return 0;
88157915332SKevin Wolf 
88257915332SKevin Wolf free_and_fail:
88366f82ceeSKevin Wolf     bs->file = NULL;
8847267c094SAnthony Liguori     g_free(bs->opaque);
88557915332SKevin Wolf     bs->opaque = NULL;
88657915332SKevin Wolf     bs->drv = NULL;
88757915332SKevin Wolf     return ret;
88857915332SKevin Wolf }
88957915332SKevin Wolf 
89057915332SKevin Wolf /*
891b6ce07aaSKevin Wolf  * Opens a file using a protocol (file, host_device, nbd, ...)
892787e4a85SKevin Wolf  *
893787e4a85SKevin Wolf  * options is a QDict of options to pass to the block drivers, or NULL for an
894787e4a85SKevin Wolf  * empty set of options. The reference to the QDict belongs to the block layer
895787e4a85SKevin Wolf  * after the call (even on failure), so if the caller intends to reuse the
896787e4a85SKevin Wolf  * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
897b6ce07aaSKevin Wolf  */
898787e4a85SKevin Wolf int bdrv_file_open(BlockDriverState **pbs, const char *filename,
89972daa72eSMax Reitz                    const char *reference, QDict *options, int flags,
90072daa72eSMax Reitz                    Error **errp)
901b338082bSbellard {
90272daa72eSMax Reitz     BlockDriverState *bs = NULL;
9036db95603SChristoph Hellwig     BlockDriver *drv;
904c2ad1b0cSKevin Wolf     const char *drvname;
90598289620SKevin Wolf     bool allow_protocol_prefix = false;
90634b5d2c6SMax Reitz     Error *local_err = NULL;
90783f64091Sbellard     int ret;
9083b0d4f61Sbellard 
909707ff828SKevin Wolf     /* NULL means an empty set of options */
910707ff828SKevin Wolf     if (options == NULL) {
911707ff828SKevin Wolf         options = qdict_new();
9123b0d4f61Sbellard     }
913707ff828SKevin Wolf 
91472daa72eSMax Reitz     if (reference) {
91572daa72eSMax Reitz         if (filename || qdict_size(options)) {
91672daa72eSMax Reitz             error_setg(errp, "Cannot reference an existing block device with "
91772daa72eSMax Reitz                        "additional options or a new filename");
91872daa72eSMax Reitz             return -EINVAL;
91972daa72eSMax Reitz         }
92072daa72eSMax Reitz         QDECREF(options);
92172daa72eSMax Reitz 
92272daa72eSMax Reitz         bs = bdrv_find(reference);
92372daa72eSMax Reitz         if (!bs) {
92472daa72eSMax Reitz             error_setg(errp, "Cannot find block device '%s'", reference);
92572daa72eSMax Reitz             return -ENODEV;
92672daa72eSMax Reitz         }
92772daa72eSMax Reitz         bdrv_ref(bs);
92872daa72eSMax Reitz         *pbs = bs;
92972daa72eSMax Reitz         return 0;
93072daa72eSMax Reitz     }
93172daa72eSMax Reitz 
932707ff828SKevin Wolf     bs = bdrv_new("");
933707ff828SKevin Wolf     bs->options = options;
934707ff828SKevin Wolf     options = qdict_clone_shallow(options);
935707ff828SKevin Wolf 
936035fccdfSKevin Wolf     /* Fetch the file name from the options QDict if necessary */
937035fccdfSKevin Wolf     if (!filename) {
938035fccdfSKevin Wolf         filename = qdict_get_try_str(options, "filename");
939035fccdfSKevin Wolf     } else if (filename && !qdict_haskey(options, "filename")) {
940035fccdfSKevin Wolf         qdict_put(options, "filename", qstring_from_str(filename));
94198289620SKevin Wolf         allow_protocol_prefix = true;
942035fccdfSKevin Wolf     } else {
94334b5d2c6SMax Reitz         error_setg(errp, "Can't specify 'file' and 'filename' options at the "
94434b5d2c6SMax Reitz                    "same time");
945035fccdfSKevin Wolf         ret = -EINVAL;
946035fccdfSKevin Wolf         goto fail;
947035fccdfSKevin Wolf     }
948035fccdfSKevin Wolf 
949c2ad1b0cSKevin Wolf     /* Find the right block driver */
950c2ad1b0cSKevin Wolf     drvname = qdict_get_try_str(options, "driver");
951c2ad1b0cSKevin Wolf     if (drvname) {
9528f94a6e4SKevin Wolf         drv = bdrv_find_format(drvname);
95334b5d2c6SMax Reitz         if (!drv) {
95434b5d2c6SMax Reitz             error_setg(errp, "Unknown driver '%s'", drvname);
95534b5d2c6SMax Reitz         }
956c2ad1b0cSKevin Wolf         qdict_del(options, "driver");
957c2ad1b0cSKevin Wolf     } else if (filename) {
95898289620SKevin Wolf         drv = bdrv_find_protocol(filename, allow_protocol_prefix);
95998289620SKevin Wolf         if (!drv) {
96034b5d2c6SMax Reitz             error_setg(errp, "Unknown protocol");
96198289620SKevin Wolf         }
962c2ad1b0cSKevin Wolf     } else {
96334b5d2c6SMax Reitz         error_setg(errp, "Must specify either driver or file");
964c2ad1b0cSKevin Wolf         drv = NULL;
965c2ad1b0cSKevin Wolf     }
966c2ad1b0cSKevin Wolf 
967c2ad1b0cSKevin Wolf     if (!drv) {
96834b5d2c6SMax Reitz         /* errp has been set already */
969c2ad1b0cSKevin Wolf         ret = -ENOENT;
970c2ad1b0cSKevin Wolf         goto fail;
971c2ad1b0cSKevin Wolf     }
972c2ad1b0cSKevin Wolf 
973c2ad1b0cSKevin Wolf     /* Parse the filename and open it */
974c2ad1b0cSKevin Wolf     if (drv->bdrv_parse_filename && filename) {
9756963a30dSKevin Wolf         drv->bdrv_parse_filename(filename, options, &local_err);
9766963a30dSKevin Wolf         if (error_is_set(&local_err)) {
97734b5d2c6SMax Reitz             error_propagate(errp, local_err);
9786963a30dSKevin Wolf             ret = -EINVAL;
9796963a30dSKevin Wolf             goto fail;
9806963a30dSKevin Wolf         }
98156d1b4d2SKevin Wolf         qdict_del(options, "filename");
982030be321SBenoît Canet     } else if (drv->bdrv_needs_filename && !filename) {
98334b5d2c6SMax Reitz         error_setg(errp, "The '%s' block driver requires a file name",
984c2ad1b0cSKevin Wolf                    drv->format_name);
985c2ad1b0cSKevin Wolf         ret = -EINVAL;
986c2ad1b0cSKevin Wolf         goto fail;
9876963a30dSKevin Wolf     }
9886963a30dSKevin Wolf 
989505d7583SMax Reitz     if (!drv->bdrv_file_open) {
990505d7583SMax Reitz         ret = bdrv_open(bs, filename, options, flags, drv, &local_err);
991505d7583SMax Reitz         options = NULL;
992505d7583SMax Reitz     } else {
99334b5d2c6SMax Reitz         ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err);
994505d7583SMax Reitz     }
995707ff828SKevin Wolf     if (ret < 0) {
99634b5d2c6SMax Reitz         error_propagate(errp, local_err);
997707ff828SKevin Wolf         goto fail;
998707ff828SKevin Wolf     }
999707ff828SKevin Wolf 
1000707ff828SKevin Wolf     /* Check if any unknown options were used */
1001505d7583SMax Reitz     if (options && (qdict_size(options) != 0)) {
1002707ff828SKevin Wolf         const QDictEntry *entry = qdict_first(options);
100334b5d2c6SMax Reitz         error_setg(errp, "Block protocol '%s' doesn't support the option '%s'",
1004707ff828SKevin Wolf                    drv->format_name, entry->key);
1005707ff828SKevin Wolf         ret = -EINVAL;
1006707ff828SKevin Wolf         goto fail;
1007707ff828SKevin Wolf     }
1008707ff828SKevin Wolf     QDECREF(options);
1009707ff828SKevin Wolf 
101071d0770cSaliguori     bs->growable = 1;
101183f64091Sbellard     *pbs = bs;
101283f64091Sbellard     return 0;
1013707ff828SKevin Wolf 
1014707ff828SKevin Wolf fail:
1015707ff828SKevin Wolf     QDECREF(options);
1016707ff828SKevin Wolf     if (!bs->drv) {
1017707ff828SKevin Wolf         QDECREF(bs->options);
1018707ff828SKevin Wolf     }
10194f6fd349SFam Zheng     bdrv_unref(bs);
1020707ff828SKevin Wolf     return ret;
10213b0d4f61Sbellard }
10223b0d4f61Sbellard 
102331ca6d07SKevin Wolf /*
102431ca6d07SKevin Wolf  * Opens the backing file for a BlockDriverState if not yet open
102531ca6d07SKevin Wolf  *
102631ca6d07SKevin Wolf  * options is a QDict of options to pass to the block drivers, or NULL for an
102731ca6d07SKevin Wolf  * empty set of options. The reference to the QDict is transferred to this
102831ca6d07SKevin Wolf  * function (even on failure), so if the caller intends to reuse the dictionary,
102931ca6d07SKevin Wolf  * it needs to use QINCREF() before calling bdrv_file_open.
103031ca6d07SKevin Wolf  */
103134b5d2c6SMax Reitz int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
10329156df12SPaolo Bonzini {
10339156df12SPaolo Bonzini     char backing_filename[PATH_MAX];
10349156df12SPaolo Bonzini     int back_flags, ret;
10359156df12SPaolo Bonzini     BlockDriver *back_drv = NULL;
103634b5d2c6SMax Reitz     Error *local_err = NULL;
10379156df12SPaolo Bonzini 
10389156df12SPaolo Bonzini     if (bs->backing_hd != NULL) {
103931ca6d07SKevin Wolf         QDECREF(options);
10409156df12SPaolo Bonzini         return 0;
10419156df12SPaolo Bonzini     }
10429156df12SPaolo Bonzini 
104331ca6d07SKevin Wolf     /* NULL means an empty set of options */
104431ca6d07SKevin Wolf     if (options == NULL) {
104531ca6d07SKevin Wolf         options = qdict_new();
104631ca6d07SKevin Wolf     }
104731ca6d07SKevin Wolf 
10489156df12SPaolo Bonzini     bs->open_flags &= ~BDRV_O_NO_BACKING;
10491cb6f506SKevin Wolf     if (qdict_haskey(options, "file.filename")) {
10501cb6f506SKevin Wolf         backing_filename[0] = '\0';
10511cb6f506SKevin Wolf     } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
105231ca6d07SKevin Wolf         QDECREF(options);
10539156df12SPaolo Bonzini         return 0;
1054dbecebddSFam Zheng     } else {
1055dbecebddSFam Zheng         bdrv_get_full_backing_filename(bs, backing_filename,
1056dbecebddSFam Zheng                                        sizeof(backing_filename));
10579156df12SPaolo Bonzini     }
10589156df12SPaolo Bonzini 
10599156df12SPaolo Bonzini     bs->backing_hd = bdrv_new("");
10609156df12SPaolo Bonzini 
10619156df12SPaolo Bonzini     if (bs->backing_format[0] != '\0') {
10629156df12SPaolo Bonzini         back_drv = bdrv_find_format(bs->backing_format);
10639156df12SPaolo Bonzini     }
10649156df12SPaolo Bonzini 
10659156df12SPaolo Bonzini     /* backing files always opened read-only */
106687a5debdSThibaut LAURENT     back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT |
106787a5debdSThibaut LAURENT                                     BDRV_O_COPY_ON_READ);
10689156df12SPaolo Bonzini 
106931ca6d07SKevin Wolf     ret = bdrv_open(bs->backing_hd,
107031ca6d07SKevin Wolf                     *backing_filename ? backing_filename : NULL, options,
107134b5d2c6SMax Reitz                     back_flags, back_drv, &local_err);
10729156df12SPaolo Bonzini     if (ret < 0) {
10734f6fd349SFam Zheng         bdrv_unref(bs->backing_hd);
10749156df12SPaolo Bonzini         bs->backing_hd = NULL;
10759156df12SPaolo Bonzini         bs->open_flags |= BDRV_O_NO_BACKING;
1076b04b6b6eSFam Zheng         error_setg(errp, "Could not open backing file: %s",
1077b04b6b6eSFam Zheng                    error_get_pretty(local_err));
1078b04b6b6eSFam Zheng         error_free(local_err);
10799156df12SPaolo Bonzini         return ret;
10809156df12SPaolo Bonzini     }
1081d80ac658SPeter Feiner 
1082d80ac658SPeter Feiner     if (bs->backing_hd->file) {
108361ed2684SMax Reitz         pstrcpy(bs->backing_file, sizeof(bs->backing_file),
108461ed2684SMax Reitz                 bs->backing_hd->file->filename);
1085d80ac658SPeter Feiner     }
1086d80ac658SPeter Feiner 
10879156df12SPaolo Bonzini     return 0;
10889156df12SPaolo Bonzini }
10899156df12SPaolo Bonzini 
1090b6ce07aaSKevin Wolf /*
1091da557aacSMax Reitz  * Opens a disk image whose options are given as BlockdevRef in another block
1092da557aacSMax Reitz  * device's options.
1093da557aacSMax Reitz  *
1094da557aacSMax Reitz  * If force_raw is true, bdrv_file_open() will be used, thereby preventing any
1095da557aacSMax Reitz  * image format auto-detection. If it is false and a filename is given,
1096da557aacSMax Reitz  * bdrv_open() will be used for auto-detection.
1097da557aacSMax Reitz  *
1098da557aacSMax Reitz  * If allow_none is true, no image will be opened if filename is false and no
1099da557aacSMax Reitz  * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1100da557aacSMax Reitz  *
1101da557aacSMax Reitz  * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1102da557aacSMax Reitz  * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1103da557aacSMax Reitz  * itself, all options starting with "${bdref_key}." are considered part of the
1104da557aacSMax Reitz  * BlockdevRef.
1105da557aacSMax Reitz  *
1106da557aacSMax Reitz  * The BlockdevRef will be removed from the options QDict.
1107da557aacSMax Reitz  */
1108da557aacSMax Reitz int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1109da557aacSMax Reitz                     QDict *options, const char *bdref_key, int flags,
1110da557aacSMax Reitz                     bool force_raw, bool allow_none, Error **errp)
1111da557aacSMax Reitz {
1112da557aacSMax Reitz     QDict *image_options;
1113da557aacSMax Reitz     int ret;
1114da557aacSMax Reitz     char *bdref_key_dot;
1115da557aacSMax Reitz     const char *reference;
1116da557aacSMax Reitz 
1117da557aacSMax Reitz     bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1118da557aacSMax Reitz     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1119da557aacSMax Reitz     g_free(bdref_key_dot);
1120da557aacSMax Reitz 
1121da557aacSMax Reitz     reference = qdict_get_try_str(options, bdref_key);
1122da557aacSMax Reitz     if (!filename && !reference && !qdict_size(image_options)) {
1123da557aacSMax Reitz         if (allow_none) {
1124da557aacSMax Reitz             ret = 0;
1125da557aacSMax Reitz         } else {
1126da557aacSMax Reitz             error_setg(errp, "A block device must be specified for \"%s\"",
1127da557aacSMax Reitz                        bdref_key);
1128da557aacSMax Reitz             ret = -EINVAL;
1129da557aacSMax Reitz         }
1130da557aacSMax Reitz         goto done;
1131da557aacSMax Reitz     }
1132da557aacSMax Reitz 
1133da557aacSMax Reitz     if (filename && !force_raw) {
1134da557aacSMax Reitz         /* If a filename is given and the block driver should be detected
1135da557aacSMax Reitz            automatically (instead of using none), use bdrv_open() in order to do
1136da557aacSMax Reitz            that auto-detection. */
1137da557aacSMax Reitz         BlockDriverState *bs;
1138da557aacSMax Reitz 
1139da557aacSMax Reitz         if (reference) {
1140da557aacSMax Reitz             error_setg(errp, "Cannot reference an existing block device while "
1141da557aacSMax Reitz                        "giving a filename");
1142da557aacSMax Reitz             ret = -EINVAL;
1143da557aacSMax Reitz             goto done;
1144da557aacSMax Reitz         }
1145da557aacSMax Reitz 
1146da557aacSMax Reitz         bs = bdrv_new("");
1147da557aacSMax Reitz         ret = bdrv_open(bs, filename, image_options, flags, NULL, errp);
1148da557aacSMax Reitz         if (ret < 0) {
1149da557aacSMax Reitz             bdrv_unref(bs);
1150da557aacSMax Reitz         } else {
1151da557aacSMax Reitz             *pbs = bs;
1152da557aacSMax Reitz         }
1153da557aacSMax Reitz     } else {
1154da557aacSMax Reitz         ret = bdrv_file_open(pbs, filename, reference, image_options, flags,
1155da557aacSMax Reitz                              errp);
1156da557aacSMax Reitz     }
1157da557aacSMax Reitz 
1158da557aacSMax Reitz done:
1159da557aacSMax Reitz     qdict_del(options, bdref_key);
1160da557aacSMax Reitz     return ret;
1161da557aacSMax Reitz }
1162da557aacSMax Reitz 
1163da557aacSMax Reitz /*
1164b6ce07aaSKevin Wolf  * Opens a disk image (raw, qcow2, vmdk, ...)
1165de9c0cecSKevin Wolf  *
1166de9c0cecSKevin Wolf  * options is a QDict of options to pass to the block drivers, or NULL for an
1167de9c0cecSKevin Wolf  * empty set of options. The reference to the QDict belongs to the block layer
1168de9c0cecSKevin Wolf  * after the call (even on failure), so if the caller intends to reuse the
1169de9c0cecSKevin Wolf  * dictionary, it needs to use QINCREF() before calling bdrv_open.
1170b6ce07aaSKevin Wolf  */
1171de9c0cecSKevin Wolf int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
117234b5d2c6SMax Reitz               int flags, BlockDriver *drv, Error **errp)
1173ea2384d3Sbellard {
1174b6ce07aaSKevin Wolf     int ret;
117589c9bc3dSStefan Weil     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
117689c9bc3dSStefan Weil     char tmp_filename[PATH_MAX + 1];
1177f500a6d3SKevin Wolf     BlockDriverState *file = NULL;
117874fe54f2SKevin Wolf     const char *drvname;
117934b5d2c6SMax Reitz     Error *local_err = NULL;
118033e3963eSbellard 
1181de9c0cecSKevin Wolf     /* NULL means an empty set of options */
1182de9c0cecSKevin Wolf     if (options == NULL) {
1183de9c0cecSKevin Wolf         options = qdict_new();
1184de9c0cecSKevin Wolf     }
1185de9c0cecSKevin Wolf 
1186de9c0cecSKevin Wolf     bs->options = options;
1187b6ad491aSKevin Wolf     options = qdict_clone_shallow(options);
1188de9c0cecSKevin Wolf 
1189de9c0cecSKevin Wolf     /* For snapshot=on, create a temporary qcow2 overlay */
119083f64091Sbellard     if (flags & BDRV_O_SNAPSHOT) {
1191ea2384d3Sbellard         BlockDriverState *bs1;
1192ea2384d3Sbellard         int64_t total_size;
119391a073a9SKevin Wolf         BlockDriver *bdrv_qcow2;
119408b392e1SKevin Wolf         QEMUOptionParameter *create_options;
11959fd3171aSKevin Wolf         QDict *snapshot_options;
1196c2ad1b0cSKevin Wolf 
1197ea2384d3Sbellard         /* if snapshot, we create a temporary backing file and open it
1198ea2384d3Sbellard            instead of opening 'filename' directly */
1199ea2384d3Sbellard 
12009fd3171aSKevin Wolf         /* Get the required size from the image */
1201ea2384d3Sbellard         bs1 = bdrv_new("");
12029fd3171aSKevin Wolf         QINCREF(options);
1203c9fbb99dSKevin Wolf         ret = bdrv_open(bs1, filename, options, BDRV_O_NO_BACKING,
1204c9fbb99dSKevin Wolf                         drv, &local_err);
120551d7c00cSaliguori         if (ret < 0) {
12064f6fd349SFam Zheng             bdrv_unref(bs1);
1207de9c0cecSKevin Wolf             goto fail;
1208ea2384d3Sbellard         }
12093e82990bSJes Sorensen         total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
12107c96d46eSaliguori 
12114f6fd349SFam Zheng         bdrv_unref(bs1);
1212ea2384d3Sbellard 
12139fd3171aSKevin Wolf         /* Create the temporary image */
1214eba25057SJim Meyering         ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
1215eba25057SJim Meyering         if (ret < 0) {
121634b5d2c6SMax Reitz             error_setg_errno(errp, -ret, "Could not get temporary filename");
1217de9c0cecSKevin Wolf             goto fail;
1218eba25057SJim Meyering         }
12197c96d46eSaliguori 
122091a073a9SKevin Wolf         bdrv_qcow2 = bdrv_find_format("qcow2");
122108b392e1SKevin Wolf         create_options = parse_option_parameters("", bdrv_qcow2->create_options,
122208b392e1SKevin Wolf                                                  NULL);
122391a073a9SKevin Wolf 
122408b392e1SKevin Wolf         set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
122591a073a9SKevin Wolf 
1226cc84d90fSMax Reitz         ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err);
122708b392e1SKevin Wolf         free_option_parameters(create_options);
122851d7c00cSaliguori         if (ret < 0) {
122934b5d2c6SMax Reitz             error_setg_errno(errp, -ret, "Could not create temporary overlay "
1230cc84d90fSMax Reitz                              "'%s': %s", tmp_filename,
1231cc84d90fSMax Reitz                              error_get_pretty(local_err));
1232cc84d90fSMax Reitz             error_free(local_err);
1233cc84d90fSMax Reitz             local_err = NULL;
1234de9c0cecSKevin Wolf             goto fail;
1235ea2384d3Sbellard         }
123691a073a9SKevin Wolf 
12379fd3171aSKevin Wolf         /* Prepare a new options QDict for the temporary file, where user
12389fd3171aSKevin Wolf          * options refer to the backing file */
12399fd3171aSKevin Wolf         if (filename) {
12409fd3171aSKevin Wolf             qdict_put(options, "file.filename", qstring_from_str(filename));
12419fd3171aSKevin Wolf         }
12429fd3171aSKevin Wolf         if (drv) {
12439fd3171aSKevin Wolf             qdict_put(options, "driver", qstring_from_str(drv->format_name));
12449fd3171aSKevin Wolf         }
12459fd3171aSKevin Wolf 
12469fd3171aSKevin Wolf         snapshot_options = qdict_new();
12479fd3171aSKevin Wolf         qdict_put(snapshot_options, "backing", options);
12489fd3171aSKevin Wolf         qdict_flatten(snapshot_options);
12499fd3171aSKevin Wolf 
12509fd3171aSKevin Wolf         bs->options = snapshot_options;
12519fd3171aSKevin Wolf         options = qdict_clone_shallow(bs->options);
12529fd3171aSKevin Wolf 
1253ea2384d3Sbellard         filename = tmp_filename;
125491a073a9SKevin Wolf         drv = bdrv_qcow2;
1255ea2384d3Sbellard         bs->is_temporary = 1;
1256ea2384d3Sbellard     }
1257ea2384d3Sbellard 
1258f500a6d3SKevin Wolf     /* Open image file without format layer */
1259be028adcSJeff Cody     if (flags & BDRV_O_RDWR) {
1260be028adcSJeff Cody         flags |= BDRV_O_ALLOW_RDWR;
1261be028adcSJeff Cody     }
1262be028adcSJeff Cody 
1263054963f8SMax Reitz     ret = bdrv_open_image(&file, filename, options, "file",
1264054963f8SMax Reitz                           bdrv_open_flags(bs, flags | BDRV_O_UNMAP), true, true,
12652a05cbe4SMax Reitz                           &local_err);
1266f500a6d3SKevin Wolf     if (ret < 0) {
1267de9c0cecSKevin Wolf         goto fail;
1268f500a6d3SKevin Wolf     }
1269f500a6d3SKevin Wolf 
1270f500a6d3SKevin Wolf     /* Find the right image format driver */
127174fe54f2SKevin Wolf     drvname = qdict_get_try_str(options, "driver");
127274fe54f2SKevin Wolf     if (drvname) {
12738f94a6e4SKevin Wolf         drv = bdrv_find_format(drvname);
127474fe54f2SKevin Wolf         qdict_del(options, "driver");
127506d22aa3SKevin Wolf         if (!drv) {
127606d22aa3SKevin Wolf             error_setg(errp, "Invalid driver: '%s'", drvname);
127706d22aa3SKevin Wolf             ret = -EINVAL;
127806d22aa3SKevin Wolf             goto unlink_and_fail;
127906d22aa3SKevin Wolf         }
128074fe54f2SKevin Wolf     }
128174fe54f2SKevin Wolf 
1282f500a6d3SKevin Wolf     if (!drv) {
12832a05cbe4SMax Reitz         if (file) {
128434b5d2c6SMax Reitz             ret = find_image_format(file, filename, &drv, &local_err);
12852a05cbe4SMax Reitz         } else {
12862a05cbe4SMax Reitz             error_setg(errp, "Must specify either driver or file");
12872a05cbe4SMax Reitz             ret = -EINVAL;
12882a05cbe4SMax Reitz             goto unlink_and_fail;
12892a05cbe4SMax Reitz         }
1290f500a6d3SKevin Wolf     }
1291f500a6d3SKevin Wolf 
1292f500a6d3SKevin Wolf     if (!drv) {
1293f500a6d3SKevin Wolf         goto unlink_and_fail;
1294f500a6d3SKevin Wolf     }
1295f500a6d3SKevin Wolf 
1296b6ce07aaSKevin Wolf     /* Open the image */
129734b5d2c6SMax Reitz     ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1298b6ce07aaSKevin Wolf     if (ret < 0) {
12996987307cSChristoph Hellwig         goto unlink_and_fail;
13006987307cSChristoph Hellwig     }
13016987307cSChristoph Hellwig 
13022a05cbe4SMax Reitz     if (file && (bs->file != file)) {
13034f6fd349SFam Zheng         bdrv_unref(file);
1304f500a6d3SKevin Wolf         file = NULL;
1305f500a6d3SKevin Wolf     }
1306f500a6d3SKevin Wolf 
1307b6ce07aaSKevin Wolf     /* If there is a backing file, use it */
13089156df12SPaolo Bonzini     if ((flags & BDRV_O_NO_BACKING) == 0) {
130931ca6d07SKevin Wolf         QDict *backing_options;
131031ca6d07SKevin Wolf 
13115726d872SBenoît Canet         qdict_extract_subqdict(options, &backing_options, "backing.");
131234b5d2c6SMax Reitz         ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1313b6ce07aaSKevin Wolf         if (ret < 0) {
1314b6ad491aSKevin Wolf             goto close_and_fail;
1315b6ce07aaSKevin Wolf         }
1316b6ce07aaSKevin Wolf     }
1317b6ce07aaSKevin Wolf 
1318b6ad491aSKevin Wolf     /* Check if any unknown options were used */
1319b6ad491aSKevin Wolf     if (qdict_size(options) != 0) {
1320b6ad491aSKevin Wolf         const QDictEntry *entry = qdict_first(options);
132134b5d2c6SMax Reitz         error_setg(errp, "Block format '%s' used by device '%s' doesn't "
132234b5d2c6SMax Reitz                    "support the option '%s'", drv->format_name, bs->device_name,
132334b5d2c6SMax Reitz                    entry->key);
1324b6ad491aSKevin Wolf 
1325b6ad491aSKevin Wolf         ret = -EINVAL;
1326b6ad491aSKevin Wolf         goto close_and_fail;
1327b6ad491aSKevin Wolf     }
1328b6ad491aSKevin Wolf     QDECREF(options);
1329b6ad491aSKevin Wolf 
1330b6ce07aaSKevin Wolf     if (!bdrv_key_required(bs)) {
13317d4b4ba5SMarkus Armbruster         bdrv_dev_change_media_cb(bs, true);
1332b6ce07aaSKevin Wolf     }
1333b6ce07aaSKevin Wolf 
1334b6ce07aaSKevin Wolf     return 0;
1335b6ce07aaSKevin Wolf 
1336b6ce07aaSKevin Wolf unlink_and_fail:
1337f500a6d3SKevin Wolf     if (file != NULL) {
13384f6fd349SFam Zheng         bdrv_unref(file);
1339f500a6d3SKevin Wolf     }
1340b6ce07aaSKevin Wolf     if (bs->is_temporary) {
1341b6ce07aaSKevin Wolf         unlink(filename);
1342b6ce07aaSKevin Wolf     }
1343de9c0cecSKevin Wolf fail:
1344de9c0cecSKevin Wolf     QDECREF(bs->options);
1345b6ad491aSKevin Wolf     QDECREF(options);
1346de9c0cecSKevin Wolf     bs->options = NULL;
134734b5d2c6SMax Reitz     if (error_is_set(&local_err)) {
134834b5d2c6SMax Reitz         error_propagate(errp, local_err);
134934b5d2c6SMax Reitz     }
1350b6ad491aSKevin Wolf     return ret;
1351de9c0cecSKevin Wolf 
1352b6ad491aSKevin Wolf close_and_fail:
1353b6ad491aSKevin Wolf     bdrv_close(bs);
1354b6ad491aSKevin Wolf     QDECREF(options);
135534b5d2c6SMax Reitz     if (error_is_set(&local_err)) {
135634b5d2c6SMax Reitz         error_propagate(errp, local_err);
135734b5d2c6SMax Reitz     }
1358b6ce07aaSKevin Wolf     return ret;
1359b6ce07aaSKevin Wolf }
1360b6ce07aaSKevin Wolf 
1361e971aa12SJeff Cody typedef struct BlockReopenQueueEntry {
1362e971aa12SJeff Cody      bool prepared;
1363e971aa12SJeff Cody      BDRVReopenState state;
1364e971aa12SJeff Cody      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1365e971aa12SJeff Cody } BlockReopenQueueEntry;
1366e971aa12SJeff Cody 
1367e971aa12SJeff Cody /*
1368e971aa12SJeff Cody  * Adds a BlockDriverState to a simple queue for an atomic, transactional
1369e971aa12SJeff Cody  * reopen of multiple devices.
1370e971aa12SJeff Cody  *
1371e971aa12SJeff Cody  * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1372e971aa12SJeff Cody  * already performed, or alternatively may be NULL a new BlockReopenQueue will
1373e971aa12SJeff Cody  * be created and initialized. This newly created BlockReopenQueue should be
1374e971aa12SJeff Cody  * passed back in for subsequent calls that are intended to be of the same
1375e971aa12SJeff Cody  * atomic 'set'.
1376e971aa12SJeff Cody  *
1377e971aa12SJeff Cody  * bs is the BlockDriverState to add to the reopen queue.
1378e971aa12SJeff Cody  *
1379e971aa12SJeff Cody  * flags contains the open flags for the associated bs
1380e971aa12SJeff Cody  *
1381e971aa12SJeff Cody  * returns a pointer to bs_queue, which is either the newly allocated
1382e971aa12SJeff Cody  * bs_queue, or the existing bs_queue being used.
1383e971aa12SJeff Cody  *
1384e971aa12SJeff Cody  */
1385e971aa12SJeff Cody BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1386e971aa12SJeff Cody                                     BlockDriverState *bs, int flags)
1387e971aa12SJeff Cody {
1388e971aa12SJeff Cody     assert(bs != NULL);
1389e971aa12SJeff Cody 
1390e971aa12SJeff Cody     BlockReopenQueueEntry *bs_entry;
1391e971aa12SJeff Cody     if (bs_queue == NULL) {
1392e971aa12SJeff Cody         bs_queue = g_new0(BlockReopenQueue, 1);
1393e971aa12SJeff Cody         QSIMPLEQ_INIT(bs_queue);
1394e971aa12SJeff Cody     }
1395e971aa12SJeff Cody 
1396e971aa12SJeff Cody     if (bs->file) {
1397e971aa12SJeff Cody         bdrv_reopen_queue(bs_queue, bs->file, flags);
1398e971aa12SJeff Cody     }
1399e971aa12SJeff Cody 
1400e971aa12SJeff Cody     bs_entry = g_new0(BlockReopenQueueEntry, 1);
1401e971aa12SJeff Cody     QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1402e971aa12SJeff Cody 
1403e971aa12SJeff Cody     bs_entry->state.bs = bs;
1404e971aa12SJeff Cody     bs_entry->state.flags = flags;
1405e971aa12SJeff Cody 
1406e971aa12SJeff Cody     return bs_queue;
1407e971aa12SJeff Cody }
1408e971aa12SJeff Cody 
1409e971aa12SJeff Cody /*
1410e971aa12SJeff Cody  * Reopen multiple BlockDriverStates atomically & transactionally.
1411e971aa12SJeff Cody  *
1412e971aa12SJeff Cody  * The queue passed in (bs_queue) must have been built up previous
1413e971aa12SJeff Cody  * via bdrv_reopen_queue().
1414e971aa12SJeff Cody  *
1415e971aa12SJeff Cody  * Reopens all BDS specified in the queue, with the appropriate
1416e971aa12SJeff Cody  * flags.  All devices are prepared for reopen, and failure of any
1417e971aa12SJeff Cody  * device will cause all device changes to be abandonded, and intermediate
1418e971aa12SJeff Cody  * data cleaned up.
1419e971aa12SJeff Cody  *
1420e971aa12SJeff Cody  * If all devices prepare successfully, then the changes are committed
1421e971aa12SJeff Cody  * to all devices.
1422e971aa12SJeff Cody  *
1423e971aa12SJeff Cody  */
1424e971aa12SJeff Cody int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1425e971aa12SJeff Cody {
1426e971aa12SJeff Cody     int ret = -1;
1427e971aa12SJeff Cody     BlockReopenQueueEntry *bs_entry, *next;
1428e971aa12SJeff Cody     Error *local_err = NULL;
1429e971aa12SJeff Cody 
1430e971aa12SJeff Cody     assert(bs_queue != NULL);
1431e971aa12SJeff Cody 
1432e971aa12SJeff Cody     bdrv_drain_all();
1433e971aa12SJeff Cody 
1434e971aa12SJeff Cody     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1435e971aa12SJeff Cody         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1436e971aa12SJeff Cody             error_propagate(errp, local_err);
1437e971aa12SJeff Cody             goto cleanup;
1438e971aa12SJeff Cody         }
1439e971aa12SJeff Cody         bs_entry->prepared = true;
1440e971aa12SJeff Cody     }
1441e971aa12SJeff Cody 
1442e971aa12SJeff Cody     /* If we reach this point, we have success and just need to apply the
1443e971aa12SJeff Cody      * changes
1444e971aa12SJeff Cody      */
1445e971aa12SJeff Cody     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1446e971aa12SJeff Cody         bdrv_reopen_commit(&bs_entry->state);
1447e971aa12SJeff Cody     }
1448e971aa12SJeff Cody 
1449e971aa12SJeff Cody     ret = 0;
1450e971aa12SJeff Cody 
1451e971aa12SJeff Cody cleanup:
1452e971aa12SJeff Cody     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1453e971aa12SJeff Cody         if (ret && bs_entry->prepared) {
1454e971aa12SJeff Cody             bdrv_reopen_abort(&bs_entry->state);
1455e971aa12SJeff Cody         }
1456e971aa12SJeff Cody         g_free(bs_entry);
1457e971aa12SJeff Cody     }
1458e971aa12SJeff Cody     g_free(bs_queue);
1459e971aa12SJeff Cody     return ret;
1460e971aa12SJeff Cody }
1461e971aa12SJeff Cody 
1462e971aa12SJeff Cody 
1463e971aa12SJeff Cody /* Reopen a single BlockDriverState with the specified flags. */
1464e971aa12SJeff Cody int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1465e971aa12SJeff Cody {
1466e971aa12SJeff Cody     int ret = -1;
1467e971aa12SJeff Cody     Error *local_err = NULL;
1468e971aa12SJeff Cody     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1469e971aa12SJeff Cody 
1470e971aa12SJeff Cody     ret = bdrv_reopen_multiple(queue, &local_err);
1471e971aa12SJeff Cody     if (local_err != NULL) {
1472e971aa12SJeff Cody         error_propagate(errp, local_err);
1473e971aa12SJeff Cody     }
1474e971aa12SJeff Cody     return ret;
1475e971aa12SJeff Cody }
1476e971aa12SJeff Cody 
1477e971aa12SJeff Cody 
1478e971aa12SJeff Cody /*
1479e971aa12SJeff Cody  * Prepares a BlockDriverState for reopen. All changes are staged in the
1480e971aa12SJeff Cody  * 'opaque' field of the BDRVReopenState, which is used and allocated by
1481e971aa12SJeff Cody  * the block driver layer .bdrv_reopen_prepare()
1482e971aa12SJeff Cody  *
1483e971aa12SJeff Cody  * bs is the BlockDriverState to reopen
1484e971aa12SJeff Cody  * flags are the new open flags
1485e971aa12SJeff Cody  * queue is the reopen queue
1486e971aa12SJeff Cody  *
1487e971aa12SJeff Cody  * Returns 0 on success, non-zero on error.  On error errp will be set
1488e971aa12SJeff Cody  * as well.
1489e971aa12SJeff Cody  *
1490e971aa12SJeff Cody  * On failure, bdrv_reopen_abort() will be called to clean up any data.
1491e971aa12SJeff Cody  * It is the responsibility of the caller to then call the abort() or
1492e971aa12SJeff Cody  * commit() for any other BDS that have been left in a prepare() state
1493e971aa12SJeff Cody  *
1494e971aa12SJeff Cody  */
1495e971aa12SJeff Cody int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1496e971aa12SJeff Cody                         Error **errp)
1497e971aa12SJeff Cody {
1498e971aa12SJeff Cody     int ret = -1;
1499e971aa12SJeff Cody     Error *local_err = NULL;
1500e971aa12SJeff Cody     BlockDriver *drv;
1501e971aa12SJeff Cody 
1502e971aa12SJeff Cody     assert(reopen_state != NULL);
1503e971aa12SJeff Cody     assert(reopen_state->bs->drv != NULL);
1504e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1505e971aa12SJeff Cody 
1506e971aa12SJeff Cody     /* if we are to stay read-only, do not allow permission change
1507e971aa12SJeff Cody      * to r/w */
1508e971aa12SJeff Cody     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1509e971aa12SJeff Cody         reopen_state->flags & BDRV_O_RDWR) {
1510e971aa12SJeff Cody         error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1511e971aa12SJeff Cody                   reopen_state->bs->device_name);
1512e971aa12SJeff Cody         goto error;
1513e971aa12SJeff Cody     }
1514e971aa12SJeff Cody 
1515e971aa12SJeff Cody 
1516e971aa12SJeff Cody     ret = bdrv_flush(reopen_state->bs);
1517e971aa12SJeff Cody     if (ret) {
1518e971aa12SJeff Cody         error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1519e971aa12SJeff Cody                   strerror(-ret));
1520e971aa12SJeff Cody         goto error;
1521e971aa12SJeff Cody     }
1522e971aa12SJeff Cody 
1523e971aa12SJeff Cody     if (drv->bdrv_reopen_prepare) {
1524e971aa12SJeff Cody         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1525e971aa12SJeff Cody         if (ret) {
1526e971aa12SJeff Cody             if (local_err != NULL) {
1527e971aa12SJeff Cody                 error_propagate(errp, local_err);
1528e971aa12SJeff Cody             } else {
1529d8b6895fSLuiz Capitulino                 error_setg(errp, "failed while preparing to reopen image '%s'",
1530e971aa12SJeff Cody                            reopen_state->bs->filename);
1531e971aa12SJeff Cody             }
1532e971aa12SJeff Cody             goto error;
1533e971aa12SJeff Cody         }
1534e971aa12SJeff Cody     } else {
1535e971aa12SJeff Cody         /* It is currently mandatory to have a bdrv_reopen_prepare()
1536e971aa12SJeff Cody          * handler for each supported drv. */
1537e971aa12SJeff Cody         error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1538e971aa12SJeff Cody                   drv->format_name, reopen_state->bs->device_name,
1539e971aa12SJeff Cody                  "reopening of file");
1540e971aa12SJeff Cody         ret = -1;
1541e971aa12SJeff Cody         goto error;
1542e971aa12SJeff Cody     }
1543e971aa12SJeff Cody 
1544e971aa12SJeff Cody     ret = 0;
1545e971aa12SJeff Cody 
1546e971aa12SJeff Cody error:
1547e971aa12SJeff Cody     return ret;
1548e971aa12SJeff Cody }
1549e971aa12SJeff Cody 
1550e971aa12SJeff Cody /*
1551e971aa12SJeff Cody  * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1552e971aa12SJeff Cody  * makes them final by swapping the staging BlockDriverState contents into
1553e971aa12SJeff Cody  * the active BlockDriverState contents.
1554e971aa12SJeff Cody  */
1555e971aa12SJeff Cody void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1556e971aa12SJeff Cody {
1557e971aa12SJeff Cody     BlockDriver *drv;
1558e971aa12SJeff Cody 
1559e971aa12SJeff Cody     assert(reopen_state != NULL);
1560e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1561e971aa12SJeff Cody     assert(drv != NULL);
1562e971aa12SJeff Cody 
1563e971aa12SJeff Cody     /* If there are any driver level actions to take */
1564e971aa12SJeff Cody     if (drv->bdrv_reopen_commit) {
1565e971aa12SJeff Cody         drv->bdrv_reopen_commit(reopen_state);
1566e971aa12SJeff Cody     }
1567e971aa12SJeff Cody 
1568e971aa12SJeff Cody     /* set BDS specific flags now */
1569e971aa12SJeff Cody     reopen_state->bs->open_flags         = reopen_state->flags;
1570e971aa12SJeff Cody     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1571e971aa12SJeff Cody                                               BDRV_O_CACHE_WB);
1572e971aa12SJeff Cody     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1573e971aa12SJeff Cody }
1574e971aa12SJeff Cody 
1575e971aa12SJeff Cody /*
1576e971aa12SJeff Cody  * Abort the reopen, and delete and free the staged changes in
1577e971aa12SJeff Cody  * reopen_state
1578e971aa12SJeff Cody  */
1579e971aa12SJeff Cody void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1580e971aa12SJeff Cody {
1581e971aa12SJeff Cody     BlockDriver *drv;
1582e971aa12SJeff Cody 
1583e971aa12SJeff Cody     assert(reopen_state != NULL);
1584e971aa12SJeff Cody     drv = reopen_state->bs->drv;
1585e971aa12SJeff Cody     assert(drv != NULL);
1586e971aa12SJeff Cody 
1587e971aa12SJeff Cody     if (drv->bdrv_reopen_abort) {
1588e971aa12SJeff Cody         drv->bdrv_reopen_abort(reopen_state);
1589e971aa12SJeff Cody     }
1590e971aa12SJeff Cody }
1591e971aa12SJeff Cody 
1592e971aa12SJeff Cody 
1593fc01f7e7Sbellard void bdrv_close(BlockDriverState *bs)
1594fc01f7e7Sbellard {
15953e914655SPaolo Bonzini     if (bs->job) {
15963e914655SPaolo Bonzini         block_job_cancel_sync(bs->job);
15973e914655SPaolo Bonzini     }
159858fda173SStefan Hajnoczi     bdrv_drain_all(); /* complete I/O */
159958fda173SStefan Hajnoczi     bdrv_flush(bs);
160058fda173SStefan Hajnoczi     bdrv_drain_all(); /* in case flush left pending I/O */
1601d7d512f6SPaolo Bonzini     notifier_list_notify(&bs->close_notifiers, bs);
16027094f12fSKevin Wolf 
16033cbc002cSPaolo Bonzini     if (bs->drv) {
1604557df6acSStefan Hajnoczi         if (bs->backing_hd) {
16054f6fd349SFam Zheng             bdrv_unref(bs->backing_hd);
1606557df6acSStefan Hajnoczi             bs->backing_hd = NULL;
1607557df6acSStefan Hajnoczi         }
1608ea2384d3Sbellard         bs->drv->bdrv_close(bs);
16097267c094SAnthony Liguori         g_free(bs->opaque);
1610ea2384d3Sbellard #ifdef _WIN32
1611ea2384d3Sbellard         if (bs->is_temporary) {
1612ea2384d3Sbellard             unlink(bs->filename);
1613ea2384d3Sbellard         }
161467b915a5Sbellard #endif
1615ea2384d3Sbellard         bs->opaque = NULL;
1616ea2384d3Sbellard         bs->drv = NULL;
161753fec9d3SStefan Hajnoczi         bs->copy_on_read = 0;
1618a275fa42SPaolo Bonzini         bs->backing_file[0] = '\0';
1619a275fa42SPaolo Bonzini         bs->backing_format[0] = '\0';
16206405875cSPaolo Bonzini         bs->total_sectors = 0;
16216405875cSPaolo Bonzini         bs->encrypted = 0;
16226405875cSPaolo Bonzini         bs->valid_key = 0;
16236405875cSPaolo Bonzini         bs->sg = 0;
16246405875cSPaolo Bonzini         bs->growable = 0;
16250d51b4deSAsias He         bs->zero_beyond_eof = false;
1626de9c0cecSKevin Wolf         QDECREF(bs->options);
1627de9c0cecSKevin Wolf         bs->options = NULL;
1628b338082bSbellard 
162966f82ceeSKevin Wolf         if (bs->file != NULL) {
16304f6fd349SFam Zheng             bdrv_unref(bs->file);
16310ac9377dSPaolo Bonzini             bs->file = NULL;
163266f82ceeSKevin Wolf         }
16339ca11154SPavel Hrdina     }
163466f82ceeSKevin Wolf 
16357d4b4ba5SMarkus Armbruster     bdrv_dev_change_media_cb(bs, false);
163698f90dbaSZhi Yong Wu 
163798f90dbaSZhi Yong Wu     /*throttling disk I/O limits*/
163898f90dbaSZhi Yong Wu     if (bs->io_limits_enabled) {
163998f90dbaSZhi Yong Wu         bdrv_io_limits_disable(bs);
164098f90dbaSZhi Yong Wu     }
1641b338082bSbellard }
1642b338082bSbellard 
16432bc93fedSMORITA Kazutaka void bdrv_close_all(void)
16442bc93fedSMORITA Kazutaka {
16452bc93fedSMORITA Kazutaka     BlockDriverState *bs;
16462bc93fedSMORITA Kazutaka 
1647dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
16482bc93fedSMORITA Kazutaka         bdrv_close(bs);
16492bc93fedSMORITA Kazutaka     }
16502bc93fedSMORITA Kazutaka }
16512bc93fedSMORITA Kazutaka 
165288266f5aSStefan Hajnoczi /* Check if any requests are in-flight (including throttled requests) */
165388266f5aSStefan Hajnoczi static bool bdrv_requests_pending(BlockDriverState *bs)
165488266f5aSStefan Hajnoczi {
165588266f5aSStefan Hajnoczi     if (!QLIST_EMPTY(&bs->tracked_requests)) {
165688266f5aSStefan Hajnoczi         return true;
165788266f5aSStefan Hajnoczi     }
1658cc0681c4SBenoît Canet     if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1659cc0681c4SBenoît Canet         return true;
1660cc0681c4SBenoît Canet     }
1661cc0681c4SBenoît Canet     if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
166288266f5aSStefan Hajnoczi         return true;
166388266f5aSStefan Hajnoczi     }
166488266f5aSStefan Hajnoczi     if (bs->file && bdrv_requests_pending(bs->file)) {
166588266f5aSStefan Hajnoczi         return true;
166688266f5aSStefan Hajnoczi     }
166788266f5aSStefan Hajnoczi     if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
166888266f5aSStefan Hajnoczi         return true;
166988266f5aSStefan Hajnoczi     }
167088266f5aSStefan Hajnoczi     return false;
167188266f5aSStefan Hajnoczi }
167288266f5aSStefan Hajnoczi 
167388266f5aSStefan Hajnoczi static bool bdrv_requests_pending_all(void)
167488266f5aSStefan Hajnoczi {
167588266f5aSStefan Hajnoczi     BlockDriverState *bs;
1676dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
167788266f5aSStefan Hajnoczi         if (bdrv_requests_pending(bs)) {
167888266f5aSStefan Hajnoczi             return true;
167988266f5aSStefan Hajnoczi         }
168088266f5aSStefan Hajnoczi     }
168188266f5aSStefan Hajnoczi     return false;
168288266f5aSStefan Hajnoczi }
168388266f5aSStefan Hajnoczi 
1684922453bcSStefan Hajnoczi /*
1685922453bcSStefan Hajnoczi  * Wait for pending requests to complete across all BlockDriverStates
1686922453bcSStefan Hajnoczi  *
1687922453bcSStefan Hajnoczi  * This function does not flush data to disk, use bdrv_flush_all() for that
1688922453bcSStefan Hajnoczi  * after calling this function.
16894c355d53SZhi Yong Wu  *
16904c355d53SZhi Yong Wu  * Note that completion of an asynchronous I/O operation can trigger any
16914c355d53SZhi Yong Wu  * number of other I/O operations on other devices---for example a coroutine
16924c355d53SZhi Yong Wu  * can be arbitrarily complex and a constant flow of I/O can come until the
16934c355d53SZhi Yong Wu  * coroutine is complete.  Because of this, it is not possible to have a
16944c355d53SZhi Yong Wu  * function to drain a single device's I/O queue.
1695922453bcSStefan Hajnoczi  */
1696922453bcSStefan Hajnoczi void bdrv_drain_all(void)
1697922453bcSStefan Hajnoczi {
169888266f5aSStefan Hajnoczi     /* Always run first iteration so any pending completion BHs run */
169988266f5aSStefan Hajnoczi     bool busy = true;
1700922453bcSStefan Hajnoczi     BlockDriverState *bs;
1701922453bcSStefan Hajnoczi 
170288266f5aSStefan Hajnoczi     while (busy) {
1703dc364f4cSBenoît Canet         QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
17040b06ef3bSStefan Hajnoczi             bdrv_start_throttled_reqs(bs);
17054c355d53SZhi Yong Wu         }
1706922453bcSStefan Hajnoczi 
170788266f5aSStefan Hajnoczi         busy = bdrv_requests_pending_all();
170888266f5aSStefan Hajnoczi         busy |= aio_poll(qemu_get_aio_context(), busy);
1709922453bcSStefan Hajnoczi     }
1710922453bcSStefan Hajnoczi }
1711922453bcSStefan Hajnoczi 
1712dc364f4cSBenoît Canet /* make a BlockDriverState anonymous by removing from bdrv_state and
1713dc364f4cSBenoît Canet  * graph_bdrv_state list.
1714d22b2f41SRyan Harper    Also, NULL terminate the device_name to prevent double remove */
1715d22b2f41SRyan Harper void bdrv_make_anon(BlockDriverState *bs)
1716d22b2f41SRyan Harper {
1717d22b2f41SRyan Harper     if (bs->device_name[0] != '\0') {
1718dc364f4cSBenoît Canet         QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1719d22b2f41SRyan Harper     }
1720d22b2f41SRyan Harper     bs->device_name[0] = '\0';
1721dc364f4cSBenoît Canet     if (bs->node_name[0] != '\0') {
1722dc364f4cSBenoît Canet         QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1723dc364f4cSBenoît Canet     }
1724dc364f4cSBenoît Canet     bs->node_name[0] = '\0';
1725d22b2f41SRyan Harper }
1726d22b2f41SRyan Harper 
1727e023b2e2SPaolo Bonzini static void bdrv_rebind(BlockDriverState *bs)
1728e023b2e2SPaolo Bonzini {
1729e023b2e2SPaolo Bonzini     if (bs->drv && bs->drv->bdrv_rebind) {
1730e023b2e2SPaolo Bonzini         bs->drv->bdrv_rebind(bs);
1731e023b2e2SPaolo Bonzini     }
1732e023b2e2SPaolo Bonzini }
1733e023b2e2SPaolo Bonzini 
17344ddc07caSPaolo Bonzini static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
17354ddc07caSPaolo Bonzini                                      BlockDriverState *bs_src)
17364ddc07caSPaolo Bonzini {
17374ddc07caSPaolo Bonzini     /* move some fields that need to stay attached to the device */
17384ddc07caSPaolo Bonzini     bs_dest->open_flags         = bs_src->open_flags;
17394ddc07caSPaolo Bonzini 
17404ddc07caSPaolo Bonzini     /* dev info */
17414ddc07caSPaolo Bonzini     bs_dest->dev_ops            = bs_src->dev_ops;
17424ddc07caSPaolo Bonzini     bs_dest->dev_opaque         = bs_src->dev_opaque;
17434ddc07caSPaolo Bonzini     bs_dest->dev                = bs_src->dev;
17444ddc07caSPaolo Bonzini     bs_dest->buffer_alignment   = bs_src->buffer_alignment;
17454ddc07caSPaolo Bonzini     bs_dest->copy_on_read       = bs_src->copy_on_read;
17464ddc07caSPaolo Bonzini 
17474ddc07caSPaolo Bonzini     bs_dest->enable_write_cache = bs_src->enable_write_cache;
17484ddc07caSPaolo Bonzini 
1749cc0681c4SBenoît Canet     /* i/o throttled req */
1750cc0681c4SBenoît Canet     memcpy(&bs_dest->throttle_state,
1751cc0681c4SBenoît Canet            &bs_src->throttle_state,
1752cc0681c4SBenoît Canet            sizeof(ThrottleState));
1753cc0681c4SBenoît Canet     bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
1754cc0681c4SBenoît Canet     bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
17554ddc07caSPaolo Bonzini     bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
17564ddc07caSPaolo Bonzini 
17574ddc07caSPaolo Bonzini     /* r/w error */
17584ddc07caSPaolo Bonzini     bs_dest->on_read_error      = bs_src->on_read_error;
17594ddc07caSPaolo Bonzini     bs_dest->on_write_error     = bs_src->on_write_error;
17604ddc07caSPaolo Bonzini 
17614ddc07caSPaolo Bonzini     /* i/o status */
17624ddc07caSPaolo Bonzini     bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
17634ddc07caSPaolo Bonzini     bs_dest->iostatus           = bs_src->iostatus;
17644ddc07caSPaolo Bonzini 
17654ddc07caSPaolo Bonzini     /* dirty bitmap */
1766e4654d2dSFam Zheng     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
17674ddc07caSPaolo Bonzini 
17689fcb0251SFam Zheng     /* reference count */
17699fcb0251SFam Zheng     bs_dest->refcnt             = bs_src->refcnt;
17709fcb0251SFam Zheng 
17714ddc07caSPaolo Bonzini     /* job */
17724ddc07caSPaolo Bonzini     bs_dest->in_use             = bs_src->in_use;
17734ddc07caSPaolo Bonzini     bs_dest->job                = bs_src->job;
17744ddc07caSPaolo Bonzini 
17754ddc07caSPaolo Bonzini     /* keep the same entry in bdrv_states */
17764ddc07caSPaolo Bonzini     pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
17774ddc07caSPaolo Bonzini             bs_src->device_name);
1778dc364f4cSBenoît Canet     bs_dest->device_list = bs_src->device_list;
1779dc364f4cSBenoît Canet 
1780dc364f4cSBenoît Canet     /* keep the same entry in graph_bdrv_states
1781dc364f4cSBenoît Canet      * We do want to swap name but don't want to swap linked list entries
1782dc364f4cSBenoît Canet      */
1783dc364f4cSBenoît Canet     bs_dest->node_list   = bs_src->node_list;
17844ddc07caSPaolo Bonzini }
17854ddc07caSPaolo Bonzini 
17864ddc07caSPaolo Bonzini /*
17874ddc07caSPaolo Bonzini  * Swap bs contents for two image chains while they are live,
17884ddc07caSPaolo Bonzini  * while keeping required fields on the BlockDriverState that is
17894ddc07caSPaolo Bonzini  * actually attached to a device.
17904ddc07caSPaolo Bonzini  *
17914ddc07caSPaolo Bonzini  * This will modify the BlockDriverState fields, and swap contents
17924ddc07caSPaolo Bonzini  * between bs_new and bs_old. Both bs_new and bs_old are modified.
17934ddc07caSPaolo Bonzini  *
17944ddc07caSPaolo Bonzini  * bs_new is required to be anonymous.
17954ddc07caSPaolo Bonzini  *
17964ddc07caSPaolo Bonzini  * This function does not create any image files.
17974ddc07caSPaolo Bonzini  */
17984ddc07caSPaolo Bonzini void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
17994ddc07caSPaolo Bonzini {
18004ddc07caSPaolo Bonzini     BlockDriverState tmp;
18014ddc07caSPaolo Bonzini 
18024ddc07caSPaolo Bonzini     /* bs_new must be anonymous and shouldn't have anything fancy enabled */
18034ddc07caSPaolo Bonzini     assert(bs_new->device_name[0] == '\0');
1804e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
18054ddc07caSPaolo Bonzini     assert(bs_new->job == NULL);
18064ddc07caSPaolo Bonzini     assert(bs_new->dev == NULL);
18074ddc07caSPaolo Bonzini     assert(bs_new->in_use == 0);
18084ddc07caSPaolo Bonzini     assert(bs_new->io_limits_enabled == false);
1809cc0681c4SBenoît Canet     assert(!throttle_have_timer(&bs_new->throttle_state));
18104ddc07caSPaolo Bonzini 
18114ddc07caSPaolo Bonzini     tmp = *bs_new;
18124ddc07caSPaolo Bonzini     *bs_new = *bs_old;
18134ddc07caSPaolo Bonzini     *bs_old = tmp;
18144ddc07caSPaolo Bonzini 
18154ddc07caSPaolo Bonzini     /* there are some fields that should not be swapped, move them back */
18164ddc07caSPaolo Bonzini     bdrv_move_feature_fields(&tmp, bs_old);
18174ddc07caSPaolo Bonzini     bdrv_move_feature_fields(bs_old, bs_new);
18184ddc07caSPaolo Bonzini     bdrv_move_feature_fields(bs_new, &tmp);
18194ddc07caSPaolo Bonzini 
18204ddc07caSPaolo Bonzini     /* bs_new shouldn't be in bdrv_states even after the swap!  */
18214ddc07caSPaolo Bonzini     assert(bs_new->device_name[0] == '\0');
18224ddc07caSPaolo Bonzini 
18234ddc07caSPaolo Bonzini     /* Check a few fields that should remain attached to the device */
18244ddc07caSPaolo Bonzini     assert(bs_new->dev == NULL);
18254ddc07caSPaolo Bonzini     assert(bs_new->job == NULL);
18264ddc07caSPaolo Bonzini     assert(bs_new->in_use == 0);
18274ddc07caSPaolo Bonzini     assert(bs_new->io_limits_enabled == false);
1828cc0681c4SBenoît Canet     assert(!throttle_have_timer(&bs_new->throttle_state));
18294ddc07caSPaolo Bonzini 
18304ddc07caSPaolo Bonzini     bdrv_rebind(bs_new);
18314ddc07caSPaolo Bonzini     bdrv_rebind(bs_old);
18324ddc07caSPaolo Bonzini }
18334ddc07caSPaolo Bonzini 
18348802d1fdSJeff Cody /*
18358802d1fdSJeff Cody  * Add new bs contents at the top of an image chain while the chain is
18368802d1fdSJeff Cody  * live, while keeping required fields on the top layer.
18378802d1fdSJeff Cody  *
18388802d1fdSJeff Cody  * This will modify the BlockDriverState fields, and swap contents
18398802d1fdSJeff Cody  * between bs_new and bs_top. Both bs_new and bs_top are modified.
18408802d1fdSJeff Cody  *
1841f6801b83SJeff Cody  * bs_new is required to be anonymous.
1842f6801b83SJeff Cody  *
18438802d1fdSJeff Cody  * This function does not create any image files.
18448802d1fdSJeff Cody  */
18458802d1fdSJeff Cody void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
18468802d1fdSJeff Cody {
18474ddc07caSPaolo Bonzini     bdrv_swap(bs_new, bs_top);
18488802d1fdSJeff Cody 
18498802d1fdSJeff Cody     /* The contents of 'tmp' will become bs_top, as we are
18508802d1fdSJeff Cody      * swapping bs_new and bs_top contents. */
18514ddc07caSPaolo Bonzini     bs_top->backing_hd = bs_new;
18524ddc07caSPaolo Bonzini     bs_top->open_flags &= ~BDRV_O_NO_BACKING;
18534ddc07caSPaolo Bonzini     pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
18544ddc07caSPaolo Bonzini             bs_new->filename);
18554ddc07caSPaolo Bonzini     pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
18564ddc07caSPaolo Bonzini             bs_new->drv ? bs_new->drv->format_name : "");
18578802d1fdSJeff Cody }
18588802d1fdSJeff Cody 
18594f6fd349SFam Zheng static void bdrv_delete(BlockDriverState *bs)
1860b338082bSbellard {
1861fa879d62SMarkus Armbruster     assert(!bs->dev);
18623e914655SPaolo Bonzini     assert(!bs->job);
18633e914655SPaolo Bonzini     assert(!bs->in_use);
18644f6fd349SFam Zheng     assert(!bs->refcnt);
1865e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
186618846deeSMarkus Armbruster 
1867e1b5c52eSStefan Hajnoczi     bdrv_close(bs);
1868e1b5c52eSStefan Hajnoczi 
18691b7bdbc1SStefan Hajnoczi     /* remove from list, if necessary */
1870d22b2f41SRyan Harper     bdrv_make_anon(bs);
187134c6f050Saurel32 
18727267c094SAnthony Liguori     g_free(bs);
1873fc01f7e7Sbellard }
1874fc01f7e7Sbellard 
1875fa879d62SMarkus Armbruster int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1876fa879d62SMarkus Armbruster /* TODO change to DeviceState *dev when all users are qdevified */
187718846deeSMarkus Armbruster {
1878fa879d62SMarkus Armbruster     if (bs->dev) {
187918846deeSMarkus Armbruster         return -EBUSY;
188018846deeSMarkus Armbruster     }
1881fa879d62SMarkus Armbruster     bs->dev = dev;
188228a7282aSLuiz Capitulino     bdrv_iostatus_reset(bs);
188318846deeSMarkus Armbruster     return 0;
188418846deeSMarkus Armbruster }
188518846deeSMarkus Armbruster 
1886fa879d62SMarkus Armbruster /* TODO qdevified devices don't use this, remove when devices are qdevified */
1887fa879d62SMarkus Armbruster void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
188818846deeSMarkus Armbruster {
1889fa879d62SMarkus Armbruster     if (bdrv_attach_dev(bs, dev) < 0) {
1890fa879d62SMarkus Armbruster         abort();
1891fa879d62SMarkus Armbruster     }
1892fa879d62SMarkus Armbruster }
1893fa879d62SMarkus Armbruster 
1894fa879d62SMarkus Armbruster void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1895fa879d62SMarkus Armbruster /* TODO change to DeviceState *dev when all users are qdevified */
1896fa879d62SMarkus Armbruster {
1897fa879d62SMarkus Armbruster     assert(bs->dev == dev);
1898fa879d62SMarkus Armbruster     bs->dev = NULL;
18990e49de52SMarkus Armbruster     bs->dev_ops = NULL;
19000e49de52SMarkus Armbruster     bs->dev_opaque = NULL;
190129e05f20SMarkus Armbruster     bs->buffer_alignment = 512;
190218846deeSMarkus Armbruster }
190318846deeSMarkus Armbruster 
1904fa879d62SMarkus Armbruster /* TODO change to return DeviceState * when all users are qdevified */
1905fa879d62SMarkus Armbruster void *bdrv_get_attached_dev(BlockDriverState *bs)
190618846deeSMarkus Armbruster {
1907fa879d62SMarkus Armbruster     return bs->dev;
190818846deeSMarkus Armbruster }
190918846deeSMarkus Armbruster 
19100e49de52SMarkus Armbruster void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
19110e49de52SMarkus Armbruster                       void *opaque)
19120e49de52SMarkus Armbruster {
19130e49de52SMarkus Armbruster     bs->dev_ops = ops;
19140e49de52SMarkus Armbruster     bs->dev_opaque = opaque;
19150e49de52SMarkus Armbruster }
19160e49de52SMarkus Armbruster 
191732c81a4aSPaolo Bonzini void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
191832c81a4aSPaolo Bonzini                                enum MonitorEvent ev,
19191ceee0d5SPaolo Bonzini                                BlockErrorAction action, bool is_read)
1920329c0a48SLuiz Capitulino {
1921329c0a48SLuiz Capitulino     QObject *data;
1922329c0a48SLuiz Capitulino     const char *action_str;
1923329c0a48SLuiz Capitulino 
1924329c0a48SLuiz Capitulino     switch (action) {
1925329c0a48SLuiz Capitulino     case BDRV_ACTION_REPORT:
1926329c0a48SLuiz Capitulino         action_str = "report";
1927329c0a48SLuiz Capitulino         break;
1928329c0a48SLuiz Capitulino     case BDRV_ACTION_IGNORE:
1929329c0a48SLuiz Capitulino         action_str = "ignore";
1930329c0a48SLuiz Capitulino         break;
1931329c0a48SLuiz Capitulino     case BDRV_ACTION_STOP:
1932329c0a48SLuiz Capitulino         action_str = "stop";
1933329c0a48SLuiz Capitulino         break;
1934329c0a48SLuiz Capitulino     default:
1935329c0a48SLuiz Capitulino         abort();
1936329c0a48SLuiz Capitulino     }
1937329c0a48SLuiz Capitulino 
1938329c0a48SLuiz Capitulino     data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1939329c0a48SLuiz Capitulino                               bdrv->device_name,
1940329c0a48SLuiz Capitulino                               action_str,
1941329c0a48SLuiz Capitulino                               is_read ? "read" : "write");
194232c81a4aSPaolo Bonzini     monitor_protocol_event(ev, data);
1943329c0a48SLuiz Capitulino 
1944329c0a48SLuiz Capitulino     qobject_decref(data);
1945329c0a48SLuiz Capitulino }
1946329c0a48SLuiz Capitulino 
19476f382ed2SLuiz Capitulino static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
19486f382ed2SLuiz Capitulino {
19496f382ed2SLuiz Capitulino     QObject *data;
19506f382ed2SLuiz Capitulino 
19516f382ed2SLuiz Capitulino     data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
19526f382ed2SLuiz Capitulino                               bdrv_get_device_name(bs), ejected);
19536f382ed2SLuiz Capitulino     monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
19546f382ed2SLuiz Capitulino 
19556f382ed2SLuiz Capitulino     qobject_decref(data);
19566f382ed2SLuiz Capitulino }
19576f382ed2SLuiz Capitulino 
19587d4b4ba5SMarkus Armbruster static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
19590e49de52SMarkus Armbruster {
1960145feb17SMarkus Armbruster     if (bs->dev_ops && bs->dev_ops->change_media_cb) {
19616f382ed2SLuiz Capitulino         bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
19627d4b4ba5SMarkus Armbruster         bs->dev_ops->change_media_cb(bs->dev_opaque, load);
19636f382ed2SLuiz Capitulino         if (tray_was_closed) {
19646f382ed2SLuiz Capitulino             /* tray open */
19656f382ed2SLuiz Capitulino             bdrv_emit_qmp_eject_event(bs, true);
19666f382ed2SLuiz Capitulino         }
19676f382ed2SLuiz Capitulino         if (load) {
19686f382ed2SLuiz Capitulino             /* tray close */
19696f382ed2SLuiz Capitulino             bdrv_emit_qmp_eject_event(bs, false);
19706f382ed2SLuiz Capitulino         }
1971145feb17SMarkus Armbruster     }
1972145feb17SMarkus Armbruster }
1973145feb17SMarkus Armbruster 
19742c6942faSMarkus Armbruster bool bdrv_dev_has_removable_media(BlockDriverState *bs)
19752c6942faSMarkus Armbruster {
19762c6942faSMarkus Armbruster     return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
19772c6942faSMarkus Armbruster }
19782c6942faSMarkus Armbruster 
1979025ccaa7SPaolo Bonzini void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1980025ccaa7SPaolo Bonzini {
1981025ccaa7SPaolo Bonzini     if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1982025ccaa7SPaolo Bonzini         bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1983025ccaa7SPaolo Bonzini     }
1984025ccaa7SPaolo Bonzini }
1985025ccaa7SPaolo Bonzini 
1986e4def80bSMarkus Armbruster bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1987e4def80bSMarkus Armbruster {
1988e4def80bSMarkus Armbruster     if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1989e4def80bSMarkus Armbruster         return bs->dev_ops->is_tray_open(bs->dev_opaque);
1990e4def80bSMarkus Armbruster     }
1991e4def80bSMarkus Armbruster     return false;
1992e4def80bSMarkus Armbruster }
1993e4def80bSMarkus Armbruster 
1994145feb17SMarkus Armbruster static void bdrv_dev_resize_cb(BlockDriverState *bs)
1995145feb17SMarkus Armbruster {
1996145feb17SMarkus Armbruster     if (bs->dev_ops && bs->dev_ops->resize_cb) {
1997145feb17SMarkus Armbruster         bs->dev_ops->resize_cb(bs->dev_opaque);
19980e49de52SMarkus Armbruster     }
19990e49de52SMarkus Armbruster }
20000e49de52SMarkus Armbruster 
2001f107639aSMarkus Armbruster bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2002f107639aSMarkus Armbruster {
2003f107639aSMarkus Armbruster     if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2004f107639aSMarkus Armbruster         return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2005f107639aSMarkus Armbruster     }
2006f107639aSMarkus Armbruster     return false;
2007f107639aSMarkus Armbruster }
2008f107639aSMarkus Armbruster 
2009e97fc193Saliguori /*
2010e97fc193Saliguori  * Run consistency checks on an image
2011e97fc193Saliguori  *
2012e076f338SKevin Wolf  * Returns 0 if the check could be completed (it doesn't mean that the image is
2013a1c7273bSStefan Weil  * free of errors) or -errno when an internal error occurred. The results of the
2014e076f338SKevin Wolf  * check are stored in res.
2015e97fc193Saliguori  */
20164534ff54SKevin Wolf int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2017e97fc193Saliguori {
2018e97fc193Saliguori     if (bs->drv->bdrv_check == NULL) {
2019e97fc193Saliguori         return -ENOTSUP;
2020e97fc193Saliguori     }
2021e97fc193Saliguori 
2022e076f338SKevin Wolf     memset(res, 0, sizeof(*res));
20234534ff54SKevin Wolf     return bs->drv->bdrv_check(bs, res, fix);
2024e97fc193Saliguori }
2025e97fc193Saliguori 
20268a426614SKevin Wolf #define COMMIT_BUF_SECTORS 2048
20278a426614SKevin Wolf 
202833e3963eSbellard /* commit COW file into the raw image */
202933e3963eSbellard int bdrv_commit(BlockDriverState *bs)
203033e3963eSbellard {
203119cb3738Sbellard     BlockDriver *drv = bs->drv;
20328a426614SKevin Wolf     int64_t sector, total_sectors;
20338a426614SKevin Wolf     int n, ro, open_flags;
20340bce597dSJeff Cody     int ret = 0;
20358a426614SKevin Wolf     uint8_t *buf;
2036c2cba3d9SJim Meyering     char filename[PATH_MAX];
203733e3963eSbellard 
203819cb3738Sbellard     if (!drv)
203919cb3738Sbellard         return -ENOMEDIUM;
204033e3963eSbellard 
20414dca4b63SNaphtali Sprei     if (!bs->backing_hd) {
20424dca4b63SNaphtali Sprei         return -ENOTSUP;
20434dca4b63SNaphtali Sprei     }
20444dca4b63SNaphtali Sprei 
20452d3735d3SStefan Hajnoczi     if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
20462d3735d3SStefan Hajnoczi         return -EBUSY;
20472d3735d3SStefan Hajnoczi     }
20482d3735d3SStefan Hajnoczi 
20494dca4b63SNaphtali Sprei     ro = bs->backing_hd->read_only;
2050c2cba3d9SJim Meyering     /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2051c2cba3d9SJim Meyering     pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
20524dca4b63SNaphtali Sprei     open_flags =  bs->backing_hd->open_flags;
20534dca4b63SNaphtali Sprei 
20544dca4b63SNaphtali Sprei     if (ro) {
20550bce597dSJeff Cody         if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
20560bce597dSJeff Cody             return -EACCES;
20574dca4b63SNaphtali Sprei         }
2058ea2384d3Sbellard     }
2059ea2384d3Sbellard 
20606ea44308SJan Kiszka     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
20617267c094SAnthony Liguori     buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
20628a426614SKevin Wolf 
20638a426614SKevin Wolf     for (sector = 0; sector < total_sectors; sector += n) {
2064d663640cSPaolo Bonzini         ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2065d663640cSPaolo Bonzini         if (ret < 0) {
2066d663640cSPaolo Bonzini             goto ro_cleanup;
2067d663640cSPaolo Bonzini         }
2068d663640cSPaolo Bonzini         if (ret) {
20698a426614SKevin Wolf             if (bdrv_read(bs, sector, buf, n) != 0) {
20704dca4b63SNaphtali Sprei                 ret = -EIO;
20714dca4b63SNaphtali Sprei                 goto ro_cleanup;
207233e3963eSbellard             }
207333e3963eSbellard 
20748a426614SKevin Wolf             if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
20754dca4b63SNaphtali Sprei                 ret = -EIO;
20764dca4b63SNaphtali Sprei                 goto ro_cleanup;
207733e3963eSbellard             }
207833e3963eSbellard         }
207933e3963eSbellard     }
208095389c86Sbellard 
20811d44952fSChristoph Hellwig     if (drv->bdrv_make_empty) {
20821d44952fSChristoph Hellwig         ret = drv->bdrv_make_empty(bs);
20831d44952fSChristoph Hellwig         bdrv_flush(bs);
20841d44952fSChristoph Hellwig     }
208595389c86Sbellard 
20863f5075aeSChristoph Hellwig     /*
20873f5075aeSChristoph Hellwig      * Make sure all data we wrote to the backing device is actually
20883f5075aeSChristoph Hellwig      * stable on disk.
20893f5075aeSChristoph Hellwig      */
20903f5075aeSChristoph Hellwig     if (bs->backing_hd)
20913f5075aeSChristoph Hellwig         bdrv_flush(bs->backing_hd);
20924dca4b63SNaphtali Sprei 
20934dca4b63SNaphtali Sprei ro_cleanup:
20947267c094SAnthony Liguori     g_free(buf);
20954dca4b63SNaphtali Sprei 
20964dca4b63SNaphtali Sprei     if (ro) {
20970bce597dSJeff Cody         /* ignoring error return here */
20980bce597dSJeff Cody         bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
20994dca4b63SNaphtali Sprei     }
21004dca4b63SNaphtali Sprei 
21011d44952fSChristoph Hellwig     return ret;
210233e3963eSbellard }
210333e3963eSbellard 
2104e8877497SStefan Hajnoczi int bdrv_commit_all(void)
21056ab4b5abSMarkus Armbruster {
21066ab4b5abSMarkus Armbruster     BlockDriverState *bs;
21076ab4b5abSMarkus Armbruster 
2108dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2109272d2d8eSJeff Cody         if (bs->drv && bs->backing_hd) {
2110e8877497SStefan Hajnoczi             int ret = bdrv_commit(bs);
2111e8877497SStefan Hajnoczi             if (ret < 0) {
2112e8877497SStefan Hajnoczi                 return ret;
21136ab4b5abSMarkus Armbruster             }
21146ab4b5abSMarkus Armbruster         }
2115272d2d8eSJeff Cody     }
2116e8877497SStefan Hajnoczi     return 0;
2117e8877497SStefan Hajnoczi }
21186ab4b5abSMarkus Armbruster 
2119dbffbdcfSStefan Hajnoczi /**
2120dbffbdcfSStefan Hajnoczi  * Remove an active request from the tracked requests list
2121dbffbdcfSStefan Hajnoczi  *
2122dbffbdcfSStefan Hajnoczi  * This function should be called when a tracked request is completing.
2123dbffbdcfSStefan Hajnoczi  */
2124dbffbdcfSStefan Hajnoczi static void tracked_request_end(BdrvTrackedRequest *req)
2125dbffbdcfSStefan Hajnoczi {
2126dbffbdcfSStefan Hajnoczi     QLIST_REMOVE(req, list);
2127f4658285SStefan Hajnoczi     qemu_co_queue_restart_all(&req->wait_queue);
2128dbffbdcfSStefan Hajnoczi }
2129dbffbdcfSStefan Hajnoczi 
2130dbffbdcfSStefan Hajnoczi /**
2131dbffbdcfSStefan Hajnoczi  * Add an active request to the tracked requests list
2132dbffbdcfSStefan Hajnoczi  */
2133dbffbdcfSStefan Hajnoczi static void tracked_request_begin(BdrvTrackedRequest *req,
2134dbffbdcfSStefan Hajnoczi                                   BlockDriverState *bs,
2135dbffbdcfSStefan Hajnoczi                                   int64_t sector_num,
2136dbffbdcfSStefan Hajnoczi                                   int nb_sectors, bool is_write)
2137dbffbdcfSStefan Hajnoczi {
2138dbffbdcfSStefan Hajnoczi     *req = (BdrvTrackedRequest){
2139dbffbdcfSStefan Hajnoczi         .bs = bs,
2140dbffbdcfSStefan Hajnoczi         .sector_num = sector_num,
2141dbffbdcfSStefan Hajnoczi         .nb_sectors = nb_sectors,
2142dbffbdcfSStefan Hajnoczi         .is_write = is_write,
21435f8b6491SStefan Hajnoczi         .co = qemu_coroutine_self(),
2144dbffbdcfSStefan Hajnoczi     };
2145dbffbdcfSStefan Hajnoczi 
2146f4658285SStefan Hajnoczi     qemu_co_queue_init(&req->wait_queue);
2147f4658285SStefan Hajnoczi 
2148dbffbdcfSStefan Hajnoczi     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2149dbffbdcfSStefan Hajnoczi }
2150dbffbdcfSStefan Hajnoczi 
2151d83947acSStefan Hajnoczi /**
2152d83947acSStefan Hajnoczi  * Round a region to cluster boundaries
2153d83947acSStefan Hajnoczi  */
2154343bded4SPaolo Bonzini void bdrv_round_to_clusters(BlockDriverState *bs,
2155d83947acSStefan Hajnoczi                             int64_t sector_num, int nb_sectors,
2156d83947acSStefan Hajnoczi                             int64_t *cluster_sector_num,
2157d83947acSStefan Hajnoczi                             int *cluster_nb_sectors)
2158d83947acSStefan Hajnoczi {
2159d83947acSStefan Hajnoczi     BlockDriverInfo bdi;
2160d83947acSStefan Hajnoczi 
2161d83947acSStefan Hajnoczi     if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2162d83947acSStefan Hajnoczi         *cluster_sector_num = sector_num;
2163d83947acSStefan Hajnoczi         *cluster_nb_sectors = nb_sectors;
2164d83947acSStefan Hajnoczi     } else {
2165d83947acSStefan Hajnoczi         int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2166d83947acSStefan Hajnoczi         *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2167d83947acSStefan Hajnoczi         *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2168d83947acSStefan Hajnoczi                                             nb_sectors, c);
2169d83947acSStefan Hajnoczi     }
2170d83947acSStefan Hajnoczi }
2171d83947acSStefan Hajnoczi 
2172f4658285SStefan Hajnoczi static bool tracked_request_overlaps(BdrvTrackedRequest *req,
2173f4658285SStefan Hajnoczi                                      int64_t sector_num, int nb_sectors) {
2174d83947acSStefan Hajnoczi     /*        aaaa   bbbb */
2175d83947acSStefan Hajnoczi     if (sector_num >= req->sector_num + req->nb_sectors) {
2176d83947acSStefan Hajnoczi         return false;
2177d83947acSStefan Hajnoczi     }
2178d83947acSStefan Hajnoczi     /* bbbb   aaaa        */
2179d83947acSStefan Hajnoczi     if (req->sector_num >= sector_num + nb_sectors) {
2180d83947acSStefan Hajnoczi         return false;
2181d83947acSStefan Hajnoczi     }
2182d83947acSStefan Hajnoczi     return true;
2183f4658285SStefan Hajnoczi }
2184f4658285SStefan Hajnoczi 
2185f4658285SStefan Hajnoczi static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
2186f4658285SStefan Hajnoczi         int64_t sector_num, int nb_sectors)
2187f4658285SStefan Hajnoczi {
2188f4658285SStefan Hajnoczi     BdrvTrackedRequest *req;
2189d83947acSStefan Hajnoczi     int64_t cluster_sector_num;
2190d83947acSStefan Hajnoczi     int cluster_nb_sectors;
2191f4658285SStefan Hajnoczi     bool retry;
2192f4658285SStefan Hajnoczi 
2193d83947acSStefan Hajnoczi     /* If we touch the same cluster it counts as an overlap.  This guarantees
2194d83947acSStefan Hajnoczi      * that allocating writes will be serialized and not race with each other
2195d83947acSStefan Hajnoczi      * for the same cluster.  For example, in copy-on-read it ensures that the
2196d83947acSStefan Hajnoczi      * CoR read and write operations are atomic and guest writes cannot
2197d83947acSStefan Hajnoczi      * interleave between them.
2198d83947acSStefan Hajnoczi      */
2199343bded4SPaolo Bonzini     bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2200d83947acSStefan Hajnoczi                            &cluster_sector_num, &cluster_nb_sectors);
2201d83947acSStefan Hajnoczi 
2202f4658285SStefan Hajnoczi     do {
2203f4658285SStefan Hajnoczi         retry = false;
2204f4658285SStefan Hajnoczi         QLIST_FOREACH(req, &bs->tracked_requests, list) {
2205d83947acSStefan Hajnoczi             if (tracked_request_overlaps(req, cluster_sector_num,
2206d83947acSStefan Hajnoczi                                          cluster_nb_sectors)) {
22075f8b6491SStefan Hajnoczi                 /* Hitting this means there was a reentrant request, for
22085f8b6491SStefan Hajnoczi                  * example, a block driver issuing nested requests.  This must
22095f8b6491SStefan Hajnoczi                  * never happen since it means deadlock.
22105f8b6491SStefan Hajnoczi                  */
22115f8b6491SStefan Hajnoczi                 assert(qemu_coroutine_self() != req->co);
22125f8b6491SStefan Hajnoczi 
2213f4658285SStefan Hajnoczi                 qemu_co_queue_wait(&req->wait_queue);
2214f4658285SStefan Hajnoczi                 retry = true;
2215f4658285SStefan Hajnoczi                 break;
2216f4658285SStefan Hajnoczi             }
2217f4658285SStefan Hajnoczi         }
2218f4658285SStefan Hajnoczi     } while (retry);
2219f4658285SStefan Hajnoczi }
2220f4658285SStefan Hajnoczi 
2221756e6736SKevin Wolf /*
2222756e6736SKevin Wolf  * Return values:
2223756e6736SKevin Wolf  * 0        - success
2224756e6736SKevin Wolf  * -EINVAL  - backing format specified, but no file
2225756e6736SKevin Wolf  * -ENOSPC  - can't update the backing file because no space is left in the
2226756e6736SKevin Wolf  *            image file header
2227756e6736SKevin Wolf  * -ENOTSUP - format driver doesn't support changing the backing file
2228756e6736SKevin Wolf  */
2229756e6736SKevin Wolf int bdrv_change_backing_file(BlockDriverState *bs,
2230756e6736SKevin Wolf     const char *backing_file, const char *backing_fmt)
2231756e6736SKevin Wolf {
2232756e6736SKevin Wolf     BlockDriver *drv = bs->drv;
2233469ef350SPaolo Bonzini     int ret;
2234756e6736SKevin Wolf 
22355f377794SPaolo Bonzini     /* Backing file format doesn't make sense without a backing file */
22365f377794SPaolo Bonzini     if (backing_fmt && !backing_file) {
22375f377794SPaolo Bonzini         return -EINVAL;
22385f377794SPaolo Bonzini     }
22395f377794SPaolo Bonzini 
2240756e6736SKevin Wolf     if (drv->bdrv_change_backing_file != NULL) {
2241469ef350SPaolo Bonzini         ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2242756e6736SKevin Wolf     } else {
2243469ef350SPaolo Bonzini         ret = -ENOTSUP;
2244756e6736SKevin Wolf     }
2245469ef350SPaolo Bonzini 
2246469ef350SPaolo Bonzini     if (ret == 0) {
2247469ef350SPaolo Bonzini         pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2248469ef350SPaolo Bonzini         pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2249469ef350SPaolo Bonzini     }
2250469ef350SPaolo Bonzini     return ret;
2251756e6736SKevin Wolf }
2252756e6736SKevin Wolf 
22536ebdcee2SJeff Cody /*
22546ebdcee2SJeff Cody  * Finds the image layer in the chain that has 'bs' as its backing file.
22556ebdcee2SJeff Cody  *
22566ebdcee2SJeff Cody  * active is the current topmost image.
22576ebdcee2SJeff Cody  *
22586ebdcee2SJeff Cody  * Returns NULL if bs is not found in active's image chain,
22596ebdcee2SJeff Cody  * or if active == bs.
22606ebdcee2SJeff Cody  */
22616ebdcee2SJeff Cody BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
22626ebdcee2SJeff Cody                                     BlockDriverState *bs)
22636ebdcee2SJeff Cody {
22646ebdcee2SJeff Cody     BlockDriverState *overlay = NULL;
22656ebdcee2SJeff Cody     BlockDriverState *intermediate;
22666ebdcee2SJeff Cody 
22676ebdcee2SJeff Cody     assert(active != NULL);
22686ebdcee2SJeff Cody     assert(bs != NULL);
22696ebdcee2SJeff Cody 
22706ebdcee2SJeff Cody     /* if bs is the same as active, then by definition it has no overlay
22716ebdcee2SJeff Cody      */
22726ebdcee2SJeff Cody     if (active == bs) {
22736ebdcee2SJeff Cody         return NULL;
22746ebdcee2SJeff Cody     }
22756ebdcee2SJeff Cody 
22766ebdcee2SJeff Cody     intermediate = active;
22776ebdcee2SJeff Cody     while (intermediate->backing_hd) {
22786ebdcee2SJeff Cody         if (intermediate->backing_hd == bs) {
22796ebdcee2SJeff Cody             overlay = intermediate;
22806ebdcee2SJeff Cody             break;
22816ebdcee2SJeff Cody         }
22826ebdcee2SJeff Cody         intermediate = intermediate->backing_hd;
22836ebdcee2SJeff Cody     }
22846ebdcee2SJeff Cody 
22856ebdcee2SJeff Cody     return overlay;
22866ebdcee2SJeff Cody }
22876ebdcee2SJeff Cody 
22886ebdcee2SJeff Cody typedef struct BlkIntermediateStates {
22896ebdcee2SJeff Cody     BlockDriverState *bs;
22906ebdcee2SJeff Cody     QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
22916ebdcee2SJeff Cody } BlkIntermediateStates;
22926ebdcee2SJeff Cody 
22936ebdcee2SJeff Cody 
22946ebdcee2SJeff Cody /*
22956ebdcee2SJeff Cody  * Drops images above 'base' up to and including 'top', and sets the image
22966ebdcee2SJeff Cody  * above 'top' to have base as its backing file.
22976ebdcee2SJeff Cody  *
22986ebdcee2SJeff Cody  * Requires that the overlay to 'top' is opened r/w, so that the backing file
22996ebdcee2SJeff Cody  * information in 'bs' can be properly updated.
23006ebdcee2SJeff Cody  *
23016ebdcee2SJeff Cody  * E.g., this will convert the following chain:
23026ebdcee2SJeff Cody  * bottom <- base <- intermediate <- top <- active
23036ebdcee2SJeff Cody  *
23046ebdcee2SJeff Cody  * to
23056ebdcee2SJeff Cody  *
23066ebdcee2SJeff Cody  * bottom <- base <- active
23076ebdcee2SJeff Cody  *
23086ebdcee2SJeff Cody  * It is allowed for bottom==base, in which case it converts:
23096ebdcee2SJeff Cody  *
23106ebdcee2SJeff Cody  * base <- intermediate <- top <- active
23116ebdcee2SJeff Cody  *
23126ebdcee2SJeff Cody  * to
23136ebdcee2SJeff Cody  *
23146ebdcee2SJeff Cody  * base <- active
23156ebdcee2SJeff Cody  *
23166ebdcee2SJeff Cody  * Error conditions:
23176ebdcee2SJeff Cody  *  if active == top, that is considered an error
23186ebdcee2SJeff Cody  *
23196ebdcee2SJeff Cody  */
23206ebdcee2SJeff Cody int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
23216ebdcee2SJeff Cody                            BlockDriverState *base)
23226ebdcee2SJeff Cody {
23236ebdcee2SJeff Cody     BlockDriverState *intermediate;
23246ebdcee2SJeff Cody     BlockDriverState *base_bs = NULL;
23256ebdcee2SJeff Cody     BlockDriverState *new_top_bs = NULL;
23266ebdcee2SJeff Cody     BlkIntermediateStates *intermediate_state, *next;
23276ebdcee2SJeff Cody     int ret = -EIO;
23286ebdcee2SJeff Cody 
23296ebdcee2SJeff Cody     QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
23306ebdcee2SJeff Cody     QSIMPLEQ_INIT(&states_to_delete);
23316ebdcee2SJeff Cody 
23326ebdcee2SJeff Cody     if (!top->drv || !base->drv) {
23336ebdcee2SJeff Cody         goto exit;
23346ebdcee2SJeff Cody     }
23356ebdcee2SJeff Cody 
23366ebdcee2SJeff Cody     new_top_bs = bdrv_find_overlay(active, top);
23376ebdcee2SJeff Cody 
23386ebdcee2SJeff Cody     if (new_top_bs == NULL) {
23396ebdcee2SJeff Cody         /* we could not find the image above 'top', this is an error */
23406ebdcee2SJeff Cody         goto exit;
23416ebdcee2SJeff Cody     }
23426ebdcee2SJeff Cody 
23436ebdcee2SJeff Cody     /* special case of new_top_bs->backing_hd already pointing to base - nothing
23446ebdcee2SJeff Cody      * to do, no intermediate images */
23456ebdcee2SJeff Cody     if (new_top_bs->backing_hd == base) {
23466ebdcee2SJeff Cody         ret = 0;
23476ebdcee2SJeff Cody         goto exit;
23486ebdcee2SJeff Cody     }
23496ebdcee2SJeff Cody 
23506ebdcee2SJeff Cody     intermediate = top;
23516ebdcee2SJeff Cody 
23526ebdcee2SJeff Cody     /* now we will go down through the list, and add each BDS we find
23536ebdcee2SJeff Cody      * into our deletion queue, until we hit the 'base'
23546ebdcee2SJeff Cody      */
23556ebdcee2SJeff Cody     while (intermediate) {
23566ebdcee2SJeff Cody         intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
23576ebdcee2SJeff Cody         intermediate_state->bs = intermediate;
23586ebdcee2SJeff Cody         QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
23596ebdcee2SJeff Cody 
23606ebdcee2SJeff Cody         if (intermediate->backing_hd == base) {
23616ebdcee2SJeff Cody             base_bs = intermediate->backing_hd;
23626ebdcee2SJeff Cody             break;
23636ebdcee2SJeff Cody         }
23646ebdcee2SJeff Cody         intermediate = intermediate->backing_hd;
23656ebdcee2SJeff Cody     }
23666ebdcee2SJeff Cody     if (base_bs == NULL) {
23676ebdcee2SJeff Cody         /* something went wrong, we did not end at the base. safely
23686ebdcee2SJeff Cody          * unravel everything, and exit with error */
23696ebdcee2SJeff Cody         goto exit;
23706ebdcee2SJeff Cody     }
23716ebdcee2SJeff Cody 
23726ebdcee2SJeff Cody     /* success - we can delete the intermediate states, and link top->base */
23736ebdcee2SJeff Cody     ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
23746ebdcee2SJeff Cody                                    base_bs->drv ? base_bs->drv->format_name : "");
23756ebdcee2SJeff Cody     if (ret) {
23766ebdcee2SJeff Cody         goto exit;
23776ebdcee2SJeff Cody     }
23786ebdcee2SJeff Cody     new_top_bs->backing_hd = base_bs;
23796ebdcee2SJeff Cody 
23806ebdcee2SJeff Cody 
23816ebdcee2SJeff Cody     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
23826ebdcee2SJeff Cody         /* so that bdrv_close() does not recursively close the chain */
23836ebdcee2SJeff Cody         intermediate_state->bs->backing_hd = NULL;
23844f6fd349SFam Zheng         bdrv_unref(intermediate_state->bs);
23856ebdcee2SJeff Cody     }
23866ebdcee2SJeff Cody     ret = 0;
23876ebdcee2SJeff Cody 
23886ebdcee2SJeff Cody exit:
23896ebdcee2SJeff Cody     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
23906ebdcee2SJeff Cody         g_free(intermediate_state);
23916ebdcee2SJeff Cody     }
23926ebdcee2SJeff Cody     return ret;
23936ebdcee2SJeff Cody }
23946ebdcee2SJeff Cody 
23956ebdcee2SJeff Cody 
239671d0770cSaliguori static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
239771d0770cSaliguori                                    size_t size)
239871d0770cSaliguori {
239971d0770cSaliguori     int64_t len;
240071d0770cSaliguori 
240171d0770cSaliguori     if (!bdrv_is_inserted(bs))
240271d0770cSaliguori         return -ENOMEDIUM;
240371d0770cSaliguori 
240471d0770cSaliguori     if (bs->growable)
240571d0770cSaliguori         return 0;
240671d0770cSaliguori 
240771d0770cSaliguori     len = bdrv_getlength(bs);
240871d0770cSaliguori 
2409fbb7b4e0SKevin Wolf     if (offset < 0)
2410fbb7b4e0SKevin Wolf         return -EIO;
2411fbb7b4e0SKevin Wolf 
2412fbb7b4e0SKevin Wolf     if ((offset > len) || (len - offset < size))
241371d0770cSaliguori         return -EIO;
241471d0770cSaliguori 
241571d0770cSaliguori     return 0;
241671d0770cSaliguori }
241771d0770cSaliguori 
241871d0770cSaliguori static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
241971d0770cSaliguori                               int nb_sectors)
242071d0770cSaliguori {
2421eb5a3165SJes Sorensen     return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2422eb5a3165SJes Sorensen                                    nb_sectors * BDRV_SECTOR_SIZE);
242371d0770cSaliguori }
242471d0770cSaliguori 
24251c9805a3SStefan Hajnoczi typedef struct RwCo {
24261c9805a3SStefan Hajnoczi     BlockDriverState *bs;
24271c9805a3SStefan Hajnoczi     int64_t sector_num;
24281c9805a3SStefan Hajnoczi     int nb_sectors;
24291c9805a3SStefan Hajnoczi     QEMUIOVector *qiov;
24301c9805a3SStefan Hajnoczi     bool is_write;
24311c9805a3SStefan Hajnoczi     int ret;
24324105eaaaSPeter Lieven     BdrvRequestFlags flags;
24331c9805a3SStefan Hajnoczi } RwCo;
24341c9805a3SStefan Hajnoczi 
24351c9805a3SStefan Hajnoczi static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2436fc01f7e7Sbellard {
24371c9805a3SStefan Hajnoczi     RwCo *rwco = opaque;
2438fc01f7e7Sbellard 
24391c9805a3SStefan Hajnoczi     if (!rwco->is_write) {
24401c9805a3SStefan Hajnoczi         rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
24414105eaaaSPeter Lieven                                      rwco->nb_sectors, rwco->qiov,
24424105eaaaSPeter Lieven                                      rwco->flags);
24431c9805a3SStefan Hajnoczi     } else {
24441c9805a3SStefan Hajnoczi         rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
24454105eaaaSPeter Lieven                                       rwco->nb_sectors, rwco->qiov,
24464105eaaaSPeter Lieven                                       rwco->flags);
24471c9805a3SStefan Hajnoczi     }
24481c9805a3SStefan Hajnoczi }
2449e7a8a783SKevin Wolf 
24501c9805a3SStefan Hajnoczi /*
24518d3b1a2dSKevin Wolf  * Process a vectored synchronous request using coroutines
24521c9805a3SStefan Hajnoczi  */
24538d3b1a2dSKevin Wolf static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
24544105eaaaSPeter Lieven                        QEMUIOVector *qiov, bool is_write,
24554105eaaaSPeter Lieven                        BdrvRequestFlags flags)
24561c9805a3SStefan Hajnoczi {
24571c9805a3SStefan Hajnoczi     Coroutine *co;
24581c9805a3SStefan Hajnoczi     RwCo rwco = {
24591c9805a3SStefan Hajnoczi         .bs = bs,
24601c9805a3SStefan Hajnoczi         .sector_num = sector_num,
24618d3b1a2dSKevin Wolf         .nb_sectors = qiov->size >> BDRV_SECTOR_BITS,
24628d3b1a2dSKevin Wolf         .qiov = qiov,
24631c9805a3SStefan Hajnoczi         .is_write = is_write,
24641c9805a3SStefan Hajnoczi         .ret = NOT_DONE,
24654105eaaaSPeter Lieven         .flags = flags,
24661c9805a3SStefan Hajnoczi     };
24678d3b1a2dSKevin Wolf     assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
24681c9805a3SStefan Hajnoczi 
2469498e386cSZhi Yong Wu     /**
2470498e386cSZhi Yong Wu      * In sync call context, when the vcpu is blocked, this throttling timer
2471498e386cSZhi Yong Wu      * will not fire; so the I/O throttling function has to be disabled here
2472498e386cSZhi Yong Wu      * if it has been enabled.
2473498e386cSZhi Yong Wu      */
2474498e386cSZhi Yong Wu     if (bs->io_limits_enabled) {
2475498e386cSZhi Yong Wu         fprintf(stderr, "Disabling I/O throttling on '%s' due "
2476498e386cSZhi Yong Wu                         "to synchronous I/O.\n", bdrv_get_device_name(bs));
2477498e386cSZhi Yong Wu         bdrv_io_limits_disable(bs);
2478498e386cSZhi Yong Wu     }
2479498e386cSZhi Yong Wu 
24801c9805a3SStefan Hajnoczi     if (qemu_in_coroutine()) {
24811c9805a3SStefan Hajnoczi         /* Fast-path if already in coroutine context */
24821c9805a3SStefan Hajnoczi         bdrv_rw_co_entry(&rwco);
24831c9805a3SStefan Hajnoczi     } else {
24841c9805a3SStefan Hajnoczi         co = qemu_coroutine_create(bdrv_rw_co_entry);
24851c9805a3SStefan Hajnoczi         qemu_coroutine_enter(co, &rwco);
24861c9805a3SStefan Hajnoczi         while (rwco.ret == NOT_DONE) {
24871c9805a3SStefan Hajnoczi             qemu_aio_wait();
24881c9805a3SStefan Hajnoczi         }
24891c9805a3SStefan Hajnoczi     }
24901c9805a3SStefan Hajnoczi     return rwco.ret;
2491e7a8a783SKevin Wolf }
2492e7a8a783SKevin Wolf 
24938d3b1a2dSKevin Wolf /*
24948d3b1a2dSKevin Wolf  * Process a synchronous request using coroutines
24958d3b1a2dSKevin Wolf  */
24968d3b1a2dSKevin Wolf static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
24974105eaaaSPeter Lieven                       int nb_sectors, bool is_write, BdrvRequestFlags flags)
24988d3b1a2dSKevin Wolf {
24998d3b1a2dSKevin Wolf     QEMUIOVector qiov;
25008d3b1a2dSKevin Wolf     struct iovec iov = {
25018d3b1a2dSKevin Wolf         .iov_base = (void *)buf,
25028d3b1a2dSKevin Wolf         .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
25038d3b1a2dSKevin Wolf     };
25048d3b1a2dSKevin Wolf 
25058d3b1a2dSKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
25064105eaaaSPeter Lieven     return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags);
25078d3b1a2dSKevin Wolf }
25088d3b1a2dSKevin Wolf 
25091c9805a3SStefan Hajnoczi /* return < 0 if error. See bdrv_write() for the return codes */
25101c9805a3SStefan Hajnoczi int bdrv_read(BlockDriverState *bs, int64_t sector_num,
25111c9805a3SStefan Hajnoczi               uint8_t *buf, int nb_sectors)
25121c9805a3SStefan Hajnoczi {
25134105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
251483f64091Sbellard }
2515fc01f7e7Sbellard 
251607d27a44SMarkus Armbruster /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
251707d27a44SMarkus Armbruster int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
251807d27a44SMarkus Armbruster                           uint8_t *buf, int nb_sectors)
251907d27a44SMarkus Armbruster {
252007d27a44SMarkus Armbruster     bool enabled;
252107d27a44SMarkus Armbruster     int ret;
252207d27a44SMarkus Armbruster 
252307d27a44SMarkus Armbruster     enabled = bs->io_limits_enabled;
252407d27a44SMarkus Armbruster     bs->io_limits_enabled = false;
25254e7395e8SPeter Lieven     ret = bdrv_read(bs, sector_num, buf, nb_sectors);
252607d27a44SMarkus Armbruster     bs->io_limits_enabled = enabled;
252707d27a44SMarkus Armbruster     return ret;
252807d27a44SMarkus Armbruster }
252907d27a44SMarkus Armbruster 
253019cb3738Sbellard /* Return < 0 if error. Important errors are:
253119cb3738Sbellard   -EIO         generic I/O error (may happen for all errors)
253219cb3738Sbellard   -ENOMEDIUM   No media inserted.
253319cb3738Sbellard   -EINVAL      Invalid sector number or nb_sectors
253419cb3738Sbellard   -EACCES      Trying to write a read-only device
253519cb3738Sbellard */
2536fc01f7e7Sbellard int bdrv_write(BlockDriverState *bs, int64_t sector_num,
2537fc01f7e7Sbellard                const uint8_t *buf, int nb_sectors)
2538fc01f7e7Sbellard {
25394105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
254083f64091Sbellard }
254183f64091Sbellard 
25428d3b1a2dSKevin Wolf int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
25438d3b1a2dSKevin Wolf {
25444105eaaaSPeter Lieven     return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
25454105eaaaSPeter Lieven }
25464105eaaaSPeter Lieven 
2547aa7bfbffSPeter Lieven int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2548aa7bfbffSPeter Lieven                       int nb_sectors, BdrvRequestFlags flags)
25494105eaaaSPeter Lieven {
25504105eaaaSPeter Lieven     return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
2551aa7bfbffSPeter Lieven                       BDRV_REQ_ZERO_WRITE | flags);
25528d3b1a2dSKevin Wolf }
25538d3b1a2dSKevin Wolf 
2554d75cbb5eSPeter Lieven /*
2555d75cbb5eSPeter Lieven  * Completely zero out a block device with the help of bdrv_write_zeroes.
2556d75cbb5eSPeter Lieven  * The operation is sped up by checking the block status and only writing
2557d75cbb5eSPeter Lieven  * zeroes to the device if they currently do not return zeroes. Optional
2558d75cbb5eSPeter Lieven  * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2559d75cbb5eSPeter Lieven  *
2560d75cbb5eSPeter Lieven  * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2561d75cbb5eSPeter Lieven  */
2562d75cbb5eSPeter Lieven int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2563d75cbb5eSPeter Lieven {
2564d75cbb5eSPeter Lieven     int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
2565d75cbb5eSPeter Lieven     int64_t ret, nb_sectors, sector_num = 0;
2566d75cbb5eSPeter Lieven     int n;
2567d75cbb5eSPeter Lieven 
2568d75cbb5eSPeter Lieven     for (;;) {
2569d75cbb5eSPeter Lieven         nb_sectors = target_size - sector_num;
2570d75cbb5eSPeter Lieven         if (nb_sectors <= 0) {
2571d75cbb5eSPeter Lieven             return 0;
2572d75cbb5eSPeter Lieven         }
2573d75cbb5eSPeter Lieven         if (nb_sectors > INT_MAX) {
2574d75cbb5eSPeter Lieven             nb_sectors = INT_MAX;
2575d75cbb5eSPeter Lieven         }
2576d75cbb5eSPeter Lieven         ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
25773d94ce60SPeter Lieven         if (ret < 0) {
25783d94ce60SPeter Lieven             error_report("error getting block status at sector %" PRId64 ": %s",
25793d94ce60SPeter Lieven                          sector_num, strerror(-ret));
25803d94ce60SPeter Lieven             return ret;
25813d94ce60SPeter Lieven         }
2582d75cbb5eSPeter Lieven         if (ret & BDRV_BLOCK_ZERO) {
2583d75cbb5eSPeter Lieven             sector_num += n;
2584d75cbb5eSPeter Lieven             continue;
2585d75cbb5eSPeter Lieven         }
2586d75cbb5eSPeter Lieven         ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2587d75cbb5eSPeter Lieven         if (ret < 0) {
2588d75cbb5eSPeter Lieven             error_report("error writing zeroes at sector %" PRId64 ": %s",
2589d75cbb5eSPeter Lieven                          sector_num, strerror(-ret));
2590d75cbb5eSPeter Lieven             return ret;
2591d75cbb5eSPeter Lieven         }
2592d75cbb5eSPeter Lieven         sector_num += n;
2593d75cbb5eSPeter Lieven     }
2594d75cbb5eSPeter Lieven }
2595d75cbb5eSPeter Lieven 
2596eda578e5Saliguori int bdrv_pread(BlockDriverState *bs, int64_t offset,
2597eda578e5Saliguori                void *buf, int count1)
259883f64091Sbellard {
25996ea44308SJan Kiszka     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
260083f64091Sbellard     int len, nb_sectors, count;
260183f64091Sbellard     int64_t sector_num;
26029a8c4cceSKevin Wolf     int ret;
260383f64091Sbellard 
260483f64091Sbellard     count = count1;
260583f64091Sbellard     /* first read to align to sector start */
26066ea44308SJan Kiszka     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
260783f64091Sbellard     if (len > count)
260883f64091Sbellard         len = count;
26096ea44308SJan Kiszka     sector_num = offset >> BDRV_SECTOR_BITS;
261083f64091Sbellard     if (len > 0) {
26119a8c4cceSKevin Wolf         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
26129a8c4cceSKevin Wolf             return ret;
26136ea44308SJan Kiszka         memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
261483f64091Sbellard         count -= len;
261583f64091Sbellard         if (count == 0)
261683f64091Sbellard             return count1;
261783f64091Sbellard         sector_num++;
261883f64091Sbellard         buf += len;
261983f64091Sbellard     }
262083f64091Sbellard 
262183f64091Sbellard     /* read the sectors "in place" */
26226ea44308SJan Kiszka     nb_sectors = count >> BDRV_SECTOR_BITS;
262383f64091Sbellard     if (nb_sectors > 0) {
26249a8c4cceSKevin Wolf         if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
26259a8c4cceSKevin Wolf             return ret;
262683f64091Sbellard         sector_num += nb_sectors;
26276ea44308SJan Kiszka         len = nb_sectors << BDRV_SECTOR_BITS;
262883f64091Sbellard         buf += len;
262983f64091Sbellard         count -= len;
263083f64091Sbellard     }
263183f64091Sbellard 
263283f64091Sbellard     /* add data from the last sector */
263383f64091Sbellard     if (count > 0) {
26349a8c4cceSKevin Wolf         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
26359a8c4cceSKevin Wolf             return ret;
263683f64091Sbellard         memcpy(buf, tmp_buf, count);
263783f64091Sbellard     }
263883f64091Sbellard     return count1;
263983f64091Sbellard }
264083f64091Sbellard 
26418d3b1a2dSKevin Wolf int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
264283f64091Sbellard {
26436ea44308SJan Kiszka     uint8_t tmp_buf[BDRV_SECTOR_SIZE];
264483f64091Sbellard     int len, nb_sectors, count;
264583f64091Sbellard     int64_t sector_num;
26469a8c4cceSKevin Wolf     int ret;
264783f64091Sbellard 
26488d3b1a2dSKevin Wolf     count = qiov->size;
26498d3b1a2dSKevin Wolf 
265083f64091Sbellard     /* first write to align to sector start */
26516ea44308SJan Kiszka     len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
265283f64091Sbellard     if (len > count)
265383f64091Sbellard         len = count;
26546ea44308SJan Kiszka     sector_num = offset >> BDRV_SECTOR_BITS;
265583f64091Sbellard     if (len > 0) {
26569a8c4cceSKevin Wolf         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
26579a8c4cceSKevin Wolf             return ret;
26588d3b1a2dSKevin Wolf         qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)),
26598d3b1a2dSKevin Wolf                           len);
26609a8c4cceSKevin Wolf         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
26619a8c4cceSKevin Wolf             return ret;
266283f64091Sbellard         count -= len;
266383f64091Sbellard         if (count == 0)
26648d3b1a2dSKevin Wolf             return qiov->size;
266583f64091Sbellard         sector_num++;
266683f64091Sbellard     }
266783f64091Sbellard 
266883f64091Sbellard     /* write the sectors "in place" */
26696ea44308SJan Kiszka     nb_sectors = count >> BDRV_SECTOR_BITS;
267083f64091Sbellard     if (nb_sectors > 0) {
26718d3b1a2dSKevin Wolf         QEMUIOVector qiov_inplace;
26728d3b1a2dSKevin Wolf 
26738d3b1a2dSKevin Wolf         qemu_iovec_init(&qiov_inplace, qiov->niov);
26748d3b1a2dSKevin Wolf         qemu_iovec_concat(&qiov_inplace, qiov, len,
26758d3b1a2dSKevin Wolf                           nb_sectors << BDRV_SECTOR_BITS);
26768d3b1a2dSKevin Wolf         ret = bdrv_writev(bs, sector_num, &qiov_inplace);
26778d3b1a2dSKevin Wolf         qemu_iovec_destroy(&qiov_inplace);
26788d3b1a2dSKevin Wolf         if (ret < 0) {
26799a8c4cceSKevin Wolf             return ret;
26808d3b1a2dSKevin Wolf         }
26818d3b1a2dSKevin Wolf 
268283f64091Sbellard         sector_num += nb_sectors;
26836ea44308SJan Kiszka         len = nb_sectors << BDRV_SECTOR_BITS;
268483f64091Sbellard         count -= len;
268583f64091Sbellard     }
268683f64091Sbellard 
268783f64091Sbellard     /* add data from the last sector */
268883f64091Sbellard     if (count > 0) {
26899a8c4cceSKevin Wolf         if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
26909a8c4cceSKevin Wolf             return ret;
26918d3b1a2dSKevin Wolf         qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count);
26929a8c4cceSKevin Wolf         if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
26939a8c4cceSKevin Wolf             return ret;
269483f64091Sbellard     }
26958d3b1a2dSKevin Wolf     return qiov->size;
26968d3b1a2dSKevin Wolf }
26978d3b1a2dSKevin Wolf 
26988d3b1a2dSKevin Wolf int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
26998d3b1a2dSKevin Wolf                 const void *buf, int count1)
27008d3b1a2dSKevin Wolf {
27018d3b1a2dSKevin Wolf     QEMUIOVector qiov;
27028d3b1a2dSKevin Wolf     struct iovec iov = {
27038d3b1a2dSKevin Wolf         .iov_base   = (void *) buf,
27048d3b1a2dSKevin Wolf         .iov_len    = count1,
27058d3b1a2dSKevin Wolf     };
27068d3b1a2dSKevin Wolf 
27078d3b1a2dSKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
27088d3b1a2dSKevin Wolf     return bdrv_pwritev(bs, offset, &qiov);
270983f64091Sbellard }
271083f64091Sbellard 
2711f08145feSKevin Wolf /*
2712f08145feSKevin Wolf  * Writes to the file and ensures that no writes are reordered across this
2713f08145feSKevin Wolf  * request (acts as a barrier)
2714f08145feSKevin Wolf  *
2715f08145feSKevin Wolf  * Returns 0 on success, -errno in error cases.
2716f08145feSKevin Wolf  */
2717f08145feSKevin Wolf int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2718f08145feSKevin Wolf     const void *buf, int count)
2719f08145feSKevin Wolf {
2720f08145feSKevin Wolf     int ret;
2721f08145feSKevin Wolf 
2722f08145feSKevin Wolf     ret = bdrv_pwrite(bs, offset, buf, count);
2723f08145feSKevin Wolf     if (ret < 0) {
2724f08145feSKevin Wolf         return ret;
2725f08145feSKevin Wolf     }
2726f08145feSKevin Wolf 
2727f05fa4adSPaolo Bonzini     /* No flush needed for cache modes that already do it */
2728f05fa4adSPaolo Bonzini     if (bs->enable_write_cache) {
2729f08145feSKevin Wolf         bdrv_flush(bs);
2730f08145feSKevin Wolf     }
2731f08145feSKevin Wolf 
2732f08145feSKevin Wolf     return 0;
2733f08145feSKevin Wolf }
2734f08145feSKevin Wolf 
2735470c0504SStefan Hajnoczi static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
2736ab185921SStefan Hajnoczi         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2737ab185921SStefan Hajnoczi {
2738ab185921SStefan Hajnoczi     /* Perform I/O through a temporary buffer so that users who scribble over
2739ab185921SStefan Hajnoczi      * their read buffer while the operation is in progress do not end up
2740ab185921SStefan Hajnoczi      * modifying the image file.  This is critical for zero-copy guest I/O
2741ab185921SStefan Hajnoczi      * where anything might happen inside guest memory.
2742ab185921SStefan Hajnoczi      */
2743ab185921SStefan Hajnoczi     void *bounce_buffer;
2744ab185921SStefan Hajnoczi 
274579c053bdSStefan Hajnoczi     BlockDriver *drv = bs->drv;
2746ab185921SStefan Hajnoczi     struct iovec iov;
2747ab185921SStefan Hajnoczi     QEMUIOVector bounce_qiov;
2748ab185921SStefan Hajnoczi     int64_t cluster_sector_num;
2749ab185921SStefan Hajnoczi     int cluster_nb_sectors;
2750ab185921SStefan Hajnoczi     size_t skip_bytes;
2751ab185921SStefan Hajnoczi     int ret;
2752ab185921SStefan Hajnoczi 
2753ab185921SStefan Hajnoczi     /* Cover entire cluster so no additional backing file I/O is required when
2754ab185921SStefan Hajnoczi      * allocating cluster in the image file.
2755ab185921SStefan Hajnoczi      */
2756343bded4SPaolo Bonzini     bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2757ab185921SStefan Hajnoczi                            &cluster_sector_num, &cluster_nb_sectors);
2758ab185921SStefan Hajnoczi 
2759470c0504SStefan Hajnoczi     trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2760ab185921SStefan Hajnoczi                                    cluster_sector_num, cluster_nb_sectors);
2761ab185921SStefan Hajnoczi 
2762ab185921SStefan Hajnoczi     iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2763ab185921SStefan Hajnoczi     iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2764ab185921SStefan Hajnoczi     qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2765ab185921SStefan Hajnoczi 
276679c053bdSStefan Hajnoczi     ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2767ab185921SStefan Hajnoczi                              &bounce_qiov);
2768ab185921SStefan Hajnoczi     if (ret < 0) {
2769ab185921SStefan Hajnoczi         goto err;
2770ab185921SStefan Hajnoczi     }
2771ab185921SStefan Hajnoczi 
277279c053bdSStefan Hajnoczi     if (drv->bdrv_co_write_zeroes &&
277379c053bdSStefan Hajnoczi         buffer_is_zero(bounce_buffer, iov.iov_len)) {
2774621f0589SKevin Wolf         ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
2775aa7bfbffSPeter Lieven                                       cluster_nb_sectors, 0);
277679c053bdSStefan Hajnoczi     } else {
2777f05fa4adSPaolo Bonzini         /* This does not change the data on the disk, it is not necessary
2778f05fa4adSPaolo Bonzini          * to flush even in cache=writethrough mode.
2779f05fa4adSPaolo Bonzini          */
278079c053bdSStefan Hajnoczi         ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
2781ab185921SStefan Hajnoczi                                   &bounce_qiov);
278279c053bdSStefan Hajnoczi     }
278379c053bdSStefan Hajnoczi 
2784ab185921SStefan Hajnoczi     if (ret < 0) {
2785ab185921SStefan Hajnoczi         /* It might be okay to ignore write errors for guest requests.  If this
2786ab185921SStefan Hajnoczi          * is a deliberate copy-on-read then we don't want to ignore the error.
2787ab185921SStefan Hajnoczi          * Simply report it in all cases.
2788ab185921SStefan Hajnoczi          */
2789ab185921SStefan Hajnoczi         goto err;
2790ab185921SStefan Hajnoczi     }
2791ab185921SStefan Hajnoczi 
2792ab185921SStefan Hajnoczi     skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
279303396148SMichael Tokarev     qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2794ab185921SStefan Hajnoczi                         nb_sectors * BDRV_SECTOR_SIZE);
2795ab185921SStefan Hajnoczi 
2796ab185921SStefan Hajnoczi err:
2797ab185921SStefan Hajnoczi     qemu_vfree(bounce_buffer);
2798ab185921SStefan Hajnoczi     return ret;
2799ab185921SStefan Hajnoczi }
2800ab185921SStefan Hajnoczi 
2801c5fbe571SStefan Hajnoczi /*
2802c5fbe571SStefan Hajnoczi  * Handle a read request in coroutine context
2803c5fbe571SStefan Hajnoczi  */
2804c5fbe571SStefan Hajnoczi static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
2805470c0504SStefan Hajnoczi     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
2806470c0504SStefan Hajnoczi     BdrvRequestFlags flags)
2807da1fa91dSKevin Wolf {
2808da1fa91dSKevin Wolf     BlockDriver *drv = bs->drv;
2809dbffbdcfSStefan Hajnoczi     BdrvTrackedRequest req;
2810dbffbdcfSStefan Hajnoczi     int ret;
2811da1fa91dSKevin Wolf 
2812da1fa91dSKevin Wolf     if (!drv) {
2813da1fa91dSKevin Wolf         return -ENOMEDIUM;
2814da1fa91dSKevin Wolf     }
2815da1fa91dSKevin Wolf     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2816da1fa91dSKevin Wolf         return -EIO;
2817da1fa91dSKevin Wolf     }
2818da1fa91dSKevin Wolf 
2819f4658285SStefan Hajnoczi     if (bs->copy_on_read) {
2820470c0504SStefan Hajnoczi         flags |= BDRV_REQ_COPY_ON_READ;
2821470c0504SStefan Hajnoczi     }
2822470c0504SStefan Hajnoczi     if (flags & BDRV_REQ_COPY_ON_READ) {
2823470c0504SStefan Hajnoczi         bs->copy_on_read_in_flight++;
2824470c0504SStefan Hajnoczi     }
2825470c0504SStefan Hajnoczi 
2826470c0504SStefan Hajnoczi     if (bs->copy_on_read_in_flight) {
2827f4658285SStefan Hajnoczi         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
2828f4658285SStefan Hajnoczi     }
2829f4658285SStefan Hajnoczi 
2830cc0681c4SBenoît Canet     /* throttling disk I/O */
2831cc0681c4SBenoît Canet     if (bs->io_limits_enabled) {
2832cc0681c4SBenoît Canet         bdrv_io_limits_intercept(bs, nb_sectors, false);
2833cc0681c4SBenoît Canet     }
2834cc0681c4SBenoît Canet 
2835dbffbdcfSStefan Hajnoczi     tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
2836ab185921SStefan Hajnoczi 
2837470c0504SStefan Hajnoczi     if (flags & BDRV_REQ_COPY_ON_READ) {
2838ab185921SStefan Hajnoczi         int pnum;
2839ab185921SStefan Hajnoczi 
2840bdad13b9SPaolo Bonzini         ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
2841ab185921SStefan Hajnoczi         if (ret < 0) {
2842ab185921SStefan Hajnoczi             goto out;
2843ab185921SStefan Hajnoczi         }
2844ab185921SStefan Hajnoczi 
2845ab185921SStefan Hajnoczi         if (!ret || pnum != nb_sectors) {
2846470c0504SStefan Hajnoczi             ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
2847ab185921SStefan Hajnoczi             goto out;
2848ab185921SStefan Hajnoczi         }
2849ab185921SStefan Hajnoczi     }
2850ab185921SStefan Hajnoczi 
2851893a8f62SMORITA Kazutaka     if (!(bs->zero_beyond_eof && bs->growable)) {
2852dbffbdcfSStefan Hajnoczi         ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
2853893a8f62SMORITA Kazutaka     } else {
2854893a8f62SMORITA Kazutaka         /* Read zeros after EOF of growable BDSes */
2855893a8f62SMORITA Kazutaka         int64_t len, total_sectors, max_nb_sectors;
2856893a8f62SMORITA Kazutaka 
2857893a8f62SMORITA Kazutaka         len = bdrv_getlength(bs);
2858893a8f62SMORITA Kazutaka         if (len < 0) {
2859893a8f62SMORITA Kazutaka             ret = len;
2860893a8f62SMORITA Kazutaka             goto out;
2861893a8f62SMORITA Kazutaka         }
2862893a8f62SMORITA Kazutaka 
2863d055a1feSFam Zheng         total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
2864893a8f62SMORITA Kazutaka         max_nb_sectors = MAX(0, total_sectors - sector_num);
2865893a8f62SMORITA Kazutaka         if (max_nb_sectors > 0) {
2866893a8f62SMORITA Kazutaka             ret = drv->bdrv_co_readv(bs, sector_num,
2867893a8f62SMORITA Kazutaka                                      MIN(nb_sectors, max_nb_sectors), qiov);
2868893a8f62SMORITA Kazutaka         } else {
2869893a8f62SMORITA Kazutaka             ret = 0;
2870893a8f62SMORITA Kazutaka         }
2871893a8f62SMORITA Kazutaka 
2872893a8f62SMORITA Kazutaka         /* Reading beyond end of file is supposed to produce zeroes */
2873893a8f62SMORITA Kazutaka         if (ret == 0 && total_sectors < sector_num + nb_sectors) {
2874893a8f62SMORITA Kazutaka             uint64_t offset = MAX(0, total_sectors - sector_num);
2875893a8f62SMORITA Kazutaka             uint64_t bytes = (sector_num + nb_sectors - offset) *
2876893a8f62SMORITA Kazutaka                               BDRV_SECTOR_SIZE;
2877893a8f62SMORITA Kazutaka             qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
2878893a8f62SMORITA Kazutaka         }
2879893a8f62SMORITA Kazutaka     }
2880ab185921SStefan Hajnoczi 
2881ab185921SStefan Hajnoczi out:
2882dbffbdcfSStefan Hajnoczi     tracked_request_end(&req);
2883470c0504SStefan Hajnoczi 
2884470c0504SStefan Hajnoczi     if (flags & BDRV_REQ_COPY_ON_READ) {
2885470c0504SStefan Hajnoczi         bs->copy_on_read_in_flight--;
2886470c0504SStefan Hajnoczi     }
2887470c0504SStefan Hajnoczi 
2888dbffbdcfSStefan Hajnoczi     return ret;
2889da1fa91dSKevin Wolf }
2890da1fa91dSKevin Wolf 
2891c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
2892da1fa91dSKevin Wolf     int nb_sectors, QEMUIOVector *qiov)
2893da1fa91dSKevin Wolf {
2894c5fbe571SStefan Hajnoczi     trace_bdrv_co_readv(bs, sector_num, nb_sectors);
2895da1fa91dSKevin Wolf 
2896470c0504SStefan Hajnoczi     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
2897470c0504SStefan Hajnoczi }
2898470c0504SStefan Hajnoczi 
2899470c0504SStefan Hajnoczi int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
2900470c0504SStefan Hajnoczi     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2901470c0504SStefan Hajnoczi {
2902470c0504SStefan Hajnoczi     trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
2903470c0504SStefan Hajnoczi 
2904470c0504SStefan Hajnoczi     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
2905470c0504SStefan Hajnoczi                             BDRV_REQ_COPY_ON_READ);
2906c5fbe571SStefan Hajnoczi }
2907c5fbe571SStefan Hajnoczi 
2908c31cb707SPeter Lieven /* if no limit is specified in the BlockLimits use a default
2909c31cb707SPeter Lieven  * of 32768 512-byte sectors (16 MiB) per request.
2910c31cb707SPeter Lieven  */
2911c31cb707SPeter Lieven #define MAX_WRITE_ZEROES_DEFAULT 32768
2912c31cb707SPeter Lieven 
2913f08f2ddaSStefan Hajnoczi static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
2914aa7bfbffSPeter Lieven     int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
2915f08f2ddaSStefan Hajnoczi {
2916f08f2ddaSStefan Hajnoczi     BlockDriver *drv = bs->drv;
2917f08f2ddaSStefan Hajnoczi     QEMUIOVector qiov;
2918c31cb707SPeter Lieven     struct iovec iov = {0};
2919c31cb707SPeter Lieven     int ret = 0;
2920f08f2ddaSStefan Hajnoczi 
2921c31cb707SPeter Lieven     int max_write_zeroes = bs->bl.max_write_zeroes ?
2922c31cb707SPeter Lieven                            bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
2923621f0589SKevin Wolf 
2924c31cb707SPeter Lieven     while (nb_sectors > 0 && !ret) {
2925c31cb707SPeter Lieven         int num = nb_sectors;
2926c31cb707SPeter Lieven 
2927b8d71c09SPaolo Bonzini         /* Align request.  Block drivers can expect the "bulk" of the request
2928b8d71c09SPaolo Bonzini          * to be aligned.
2929b8d71c09SPaolo Bonzini          */
2930b8d71c09SPaolo Bonzini         if (bs->bl.write_zeroes_alignment
2931b8d71c09SPaolo Bonzini             && num > bs->bl.write_zeroes_alignment) {
2932b8d71c09SPaolo Bonzini             if (sector_num % bs->bl.write_zeroes_alignment != 0) {
2933b8d71c09SPaolo Bonzini                 /* Make a small request up to the first aligned sector.  */
2934c31cb707SPeter Lieven                 num = bs->bl.write_zeroes_alignment;
2935c31cb707SPeter Lieven                 num -= sector_num % bs->bl.write_zeroes_alignment;
2936b8d71c09SPaolo Bonzini             } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
2937b8d71c09SPaolo Bonzini                 /* Shorten the request to the last aligned sector.  num cannot
2938b8d71c09SPaolo Bonzini                  * underflow because num > bs->bl.write_zeroes_alignment.
2939b8d71c09SPaolo Bonzini                  */
2940b8d71c09SPaolo Bonzini                 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
2941b8d71c09SPaolo Bonzini             }
2942c31cb707SPeter Lieven         }
2943c31cb707SPeter Lieven 
2944c31cb707SPeter Lieven         /* limit request size */
2945c31cb707SPeter Lieven         if (num > max_write_zeroes) {
2946c31cb707SPeter Lieven             num = max_write_zeroes;
2947c31cb707SPeter Lieven         }
2948c31cb707SPeter Lieven 
2949c31cb707SPeter Lieven         ret = -ENOTSUP;
2950f08f2ddaSStefan Hajnoczi         /* First try the efficient write zeroes operation */
2951f08f2ddaSStefan Hajnoczi         if (drv->bdrv_co_write_zeroes) {
2952c31cb707SPeter Lieven             ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
2953f08f2ddaSStefan Hajnoczi         }
2954f08f2ddaSStefan Hajnoczi 
2955c31cb707SPeter Lieven         if (ret == -ENOTSUP) {
2956f08f2ddaSStefan Hajnoczi             /* Fall back to bounce buffer if write zeroes is unsupported */
2957c31cb707SPeter Lieven             iov.iov_len = num * BDRV_SECTOR_SIZE;
2958c31cb707SPeter Lieven             if (iov.iov_base == NULL) {
2959b8d71c09SPaolo Bonzini                 iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
2960b8d71c09SPaolo Bonzini                 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
2961c31cb707SPeter Lieven             }
2962f08f2ddaSStefan Hajnoczi             qemu_iovec_init_external(&qiov, &iov, 1);
2963f08f2ddaSStefan Hajnoczi 
2964c31cb707SPeter Lieven             ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
2965b8d71c09SPaolo Bonzini 
2966b8d71c09SPaolo Bonzini             /* Keep bounce buffer around if it is big enough for all
2967b8d71c09SPaolo Bonzini              * all future requests.
2968b8d71c09SPaolo Bonzini              */
2969b8d71c09SPaolo Bonzini             if (num < max_write_zeroes) {
2970b8d71c09SPaolo Bonzini                 qemu_vfree(iov.iov_base);
2971b8d71c09SPaolo Bonzini                 iov.iov_base = NULL;
2972b8d71c09SPaolo Bonzini             }
2973c31cb707SPeter Lieven         }
2974c31cb707SPeter Lieven 
2975c31cb707SPeter Lieven         sector_num += num;
2976c31cb707SPeter Lieven         nb_sectors -= num;
2977c31cb707SPeter Lieven     }
2978f08f2ddaSStefan Hajnoczi 
2979f08f2ddaSStefan Hajnoczi     qemu_vfree(iov.iov_base);
2980f08f2ddaSStefan Hajnoczi     return ret;
2981f08f2ddaSStefan Hajnoczi }
2982f08f2ddaSStefan Hajnoczi 
2983c5fbe571SStefan Hajnoczi /*
2984c5fbe571SStefan Hajnoczi  * Handle a write request in coroutine context
2985c5fbe571SStefan Hajnoczi  */
2986c5fbe571SStefan Hajnoczi static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
2987f08f2ddaSStefan Hajnoczi     int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
2988f08f2ddaSStefan Hajnoczi     BdrvRequestFlags flags)
2989c5fbe571SStefan Hajnoczi {
2990c5fbe571SStefan Hajnoczi     BlockDriver *drv = bs->drv;
2991dbffbdcfSStefan Hajnoczi     BdrvTrackedRequest req;
29926b7cb247SStefan Hajnoczi     int ret;
2993da1fa91dSKevin Wolf 
2994da1fa91dSKevin Wolf     if (!bs->drv) {
2995da1fa91dSKevin Wolf         return -ENOMEDIUM;
2996da1fa91dSKevin Wolf     }
2997da1fa91dSKevin Wolf     if (bs->read_only) {
2998da1fa91dSKevin Wolf         return -EACCES;
2999da1fa91dSKevin Wolf     }
3000da1fa91dSKevin Wolf     if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3001da1fa91dSKevin Wolf         return -EIO;
3002da1fa91dSKevin Wolf     }
3003da1fa91dSKevin Wolf 
3004470c0504SStefan Hajnoczi     if (bs->copy_on_read_in_flight) {
3005f4658285SStefan Hajnoczi         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
3006f4658285SStefan Hajnoczi     }
3007f4658285SStefan Hajnoczi 
3008cc0681c4SBenoît Canet     /* throttling disk I/O */
3009cc0681c4SBenoît Canet     if (bs->io_limits_enabled) {
3010cc0681c4SBenoît Canet         bdrv_io_limits_intercept(bs, nb_sectors, true);
3011cc0681c4SBenoît Canet     }
3012cc0681c4SBenoît Canet 
3013dbffbdcfSStefan Hajnoczi     tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
3014dbffbdcfSStefan Hajnoczi 
3015d616b224SStefan Hajnoczi     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
3016d616b224SStefan Hajnoczi 
3017d616b224SStefan Hajnoczi     if (ret < 0) {
3018d616b224SStefan Hajnoczi         /* Do nothing, write notifier decided to fail this request */
3019d616b224SStefan Hajnoczi     } else if (flags & BDRV_REQ_ZERO_WRITE) {
3020aa7bfbffSPeter Lieven         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
3021f08f2ddaSStefan Hajnoczi     } else {
30226b7cb247SStefan Hajnoczi         ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3023f08f2ddaSStefan Hajnoczi     }
30246b7cb247SStefan Hajnoczi 
3025f05fa4adSPaolo Bonzini     if (ret == 0 && !bs->enable_write_cache) {
3026f05fa4adSPaolo Bonzini         ret = bdrv_co_flush(bs);
3027f05fa4adSPaolo Bonzini     }
3028f05fa4adSPaolo Bonzini 
30291755da16SPaolo Bonzini     bdrv_set_dirty(bs, sector_num, nb_sectors);
3030da1fa91dSKevin Wolf 
3031da1fa91dSKevin Wolf     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
3032da1fa91dSKevin Wolf         bs->wr_highest_sector = sector_num + nb_sectors - 1;
3033da1fa91dSKevin Wolf     }
3034df2a6f29SPaolo Bonzini     if (bs->growable && ret >= 0) {
3035df2a6f29SPaolo Bonzini         bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3036df2a6f29SPaolo Bonzini     }
3037da1fa91dSKevin Wolf 
3038dbffbdcfSStefan Hajnoczi     tracked_request_end(&req);
3039dbffbdcfSStefan Hajnoczi 
30406b7cb247SStefan Hajnoczi     return ret;
3041da1fa91dSKevin Wolf }
3042da1fa91dSKevin Wolf 
3043c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3044c5fbe571SStefan Hajnoczi     int nb_sectors, QEMUIOVector *qiov)
3045c5fbe571SStefan Hajnoczi {
3046c5fbe571SStefan Hajnoczi     trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3047c5fbe571SStefan Hajnoczi 
3048f08f2ddaSStefan Hajnoczi     return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3049f08f2ddaSStefan Hajnoczi }
3050f08f2ddaSStefan Hajnoczi 
3051f08f2ddaSStefan Hajnoczi int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
3052aa7bfbffSPeter Lieven                                       int64_t sector_num, int nb_sectors,
3053aa7bfbffSPeter Lieven                                       BdrvRequestFlags flags)
3054f08f2ddaSStefan Hajnoczi {
305594d6ff21SPaolo Bonzini     trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
3056f08f2ddaSStefan Hajnoczi 
3057d32f35cbSPeter Lieven     if (!(bs->open_flags & BDRV_O_UNMAP)) {
3058d32f35cbSPeter Lieven         flags &= ~BDRV_REQ_MAY_UNMAP;
3059d32f35cbSPeter Lieven     }
3060d32f35cbSPeter Lieven 
3061f08f2ddaSStefan Hajnoczi     return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3062aa7bfbffSPeter Lieven                              BDRV_REQ_ZERO_WRITE | flags);
3063c5fbe571SStefan Hajnoczi }
3064c5fbe571SStefan Hajnoczi 
306583f64091Sbellard /**
306683f64091Sbellard  * Truncate file to 'offset' bytes (needed only for file protocols)
306783f64091Sbellard  */
306883f64091Sbellard int bdrv_truncate(BlockDriverState *bs, int64_t offset)
306983f64091Sbellard {
307083f64091Sbellard     BlockDriver *drv = bs->drv;
307151762288SStefan Hajnoczi     int ret;
307283f64091Sbellard     if (!drv)
307319cb3738Sbellard         return -ENOMEDIUM;
307483f64091Sbellard     if (!drv->bdrv_truncate)
307583f64091Sbellard         return -ENOTSUP;
307659f2689dSNaphtali Sprei     if (bs->read_only)
307759f2689dSNaphtali Sprei         return -EACCES;
30788591675fSMarcelo Tosatti     if (bdrv_in_use(bs))
30798591675fSMarcelo Tosatti         return -EBUSY;
308051762288SStefan Hajnoczi     ret = drv->bdrv_truncate(bs, offset);
308151762288SStefan Hajnoczi     if (ret == 0) {
308251762288SStefan Hajnoczi         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3083145feb17SMarkus Armbruster         bdrv_dev_resize_cb(bs);
308451762288SStefan Hajnoczi     }
308551762288SStefan Hajnoczi     return ret;
308683f64091Sbellard }
308783f64091Sbellard 
308883f64091Sbellard /**
30894a1d5e1fSFam Zheng  * Length of a allocated file in bytes. Sparse files are counted by actual
30904a1d5e1fSFam Zheng  * allocated space. Return < 0 if error or unknown.
30914a1d5e1fSFam Zheng  */
30924a1d5e1fSFam Zheng int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
30934a1d5e1fSFam Zheng {
30944a1d5e1fSFam Zheng     BlockDriver *drv = bs->drv;
30954a1d5e1fSFam Zheng     if (!drv) {
30964a1d5e1fSFam Zheng         return -ENOMEDIUM;
30974a1d5e1fSFam Zheng     }
30984a1d5e1fSFam Zheng     if (drv->bdrv_get_allocated_file_size) {
30994a1d5e1fSFam Zheng         return drv->bdrv_get_allocated_file_size(bs);
31004a1d5e1fSFam Zheng     }
31014a1d5e1fSFam Zheng     if (bs->file) {
31024a1d5e1fSFam Zheng         return bdrv_get_allocated_file_size(bs->file);
31034a1d5e1fSFam Zheng     }
31044a1d5e1fSFam Zheng     return -ENOTSUP;
31054a1d5e1fSFam Zheng }
31064a1d5e1fSFam Zheng 
31074a1d5e1fSFam Zheng /**
310883f64091Sbellard  * Length of a file in bytes. Return < 0 if error or unknown.
310983f64091Sbellard  */
311083f64091Sbellard int64_t bdrv_getlength(BlockDriverState *bs)
311183f64091Sbellard {
311283f64091Sbellard     BlockDriver *drv = bs->drv;
311383f64091Sbellard     if (!drv)
311419cb3738Sbellard         return -ENOMEDIUM;
311551762288SStefan Hajnoczi 
3116b94a2610SKevin Wolf     if (drv->has_variable_length) {
3117b94a2610SKevin Wolf         int ret = refresh_total_sectors(bs, bs->total_sectors);
3118b94a2610SKevin Wolf         if (ret < 0) {
3119b94a2610SKevin Wolf             return ret;
3120fc01f7e7Sbellard         }
312146a4e4e6SStefan Hajnoczi     }
312246a4e4e6SStefan Hajnoczi     return bs->total_sectors * BDRV_SECTOR_SIZE;
312346a4e4e6SStefan Hajnoczi }
3124fc01f7e7Sbellard 
312519cb3738Sbellard /* return 0 as number of sectors if no device present or error */
312696b8f136Sths void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
3127fc01f7e7Sbellard {
312819cb3738Sbellard     int64_t length;
312919cb3738Sbellard     length = bdrv_getlength(bs);
313019cb3738Sbellard     if (length < 0)
313119cb3738Sbellard         length = 0;
313219cb3738Sbellard     else
31336ea44308SJan Kiszka         length = length >> BDRV_SECTOR_BITS;
313419cb3738Sbellard     *nb_sectors_ptr = length;
3135fc01f7e7Sbellard }
3136cf98951bSbellard 
3137ff06f5f3SPaolo Bonzini void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3138ff06f5f3SPaolo Bonzini                        BlockdevOnError on_write_error)
3139abd7f68dSMarkus Armbruster {
3140abd7f68dSMarkus Armbruster     bs->on_read_error = on_read_error;
3141abd7f68dSMarkus Armbruster     bs->on_write_error = on_write_error;
3142abd7f68dSMarkus Armbruster }
3143abd7f68dSMarkus Armbruster 
31441ceee0d5SPaolo Bonzini BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
3145abd7f68dSMarkus Armbruster {
3146abd7f68dSMarkus Armbruster     return is_read ? bs->on_read_error : bs->on_write_error;
3147abd7f68dSMarkus Armbruster }
3148abd7f68dSMarkus Armbruster 
31493e1caa5fSPaolo Bonzini BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
31503e1caa5fSPaolo Bonzini {
31513e1caa5fSPaolo Bonzini     BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
31523e1caa5fSPaolo Bonzini 
31533e1caa5fSPaolo Bonzini     switch (on_err) {
31543e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_ENOSPC:
31553e1caa5fSPaolo Bonzini         return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
31563e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_STOP:
31573e1caa5fSPaolo Bonzini         return BDRV_ACTION_STOP;
31583e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_REPORT:
31593e1caa5fSPaolo Bonzini         return BDRV_ACTION_REPORT;
31603e1caa5fSPaolo Bonzini     case BLOCKDEV_ON_ERROR_IGNORE:
31613e1caa5fSPaolo Bonzini         return BDRV_ACTION_IGNORE;
31623e1caa5fSPaolo Bonzini     default:
31633e1caa5fSPaolo Bonzini         abort();
31643e1caa5fSPaolo Bonzini     }
31653e1caa5fSPaolo Bonzini }
31663e1caa5fSPaolo Bonzini 
31673e1caa5fSPaolo Bonzini /* This is done by device models because, while the block layer knows
31683e1caa5fSPaolo Bonzini  * about the error, it does not know whether an operation comes from
31693e1caa5fSPaolo Bonzini  * the device or the block layer (from a job, for example).
31703e1caa5fSPaolo Bonzini  */
31713e1caa5fSPaolo Bonzini void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
31723e1caa5fSPaolo Bonzini                        bool is_read, int error)
31733e1caa5fSPaolo Bonzini {
31743e1caa5fSPaolo Bonzini     assert(error >= 0);
317532c81a4aSPaolo Bonzini     bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
31763e1caa5fSPaolo Bonzini     if (action == BDRV_ACTION_STOP) {
31773e1caa5fSPaolo Bonzini         vm_stop(RUN_STATE_IO_ERROR);
31783e1caa5fSPaolo Bonzini         bdrv_iostatus_set_err(bs, error);
31793e1caa5fSPaolo Bonzini     }
31803e1caa5fSPaolo Bonzini }
31813e1caa5fSPaolo Bonzini 
3182b338082bSbellard int bdrv_is_read_only(BlockDriverState *bs)
3183b338082bSbellard {
3184b338082bSbellard     return bs->read_only;
3185b338082bSbellard }
3186b338082bSbellard 
3187985a03b0Sths int bdrv_is_sg(BlockDriverState *bs)
3188985a03b0Sths {
3189985a03b0Sths     return bs->sg;
3190985a03b0Sths }
3191985a03b0Sths 
3192e900a7b7SChristoph Hellwig int bdrv_enable_write_cache(BlockDriverState *bs)
3193e900a7b7SChristoph Hellwig {
3194e900a7b7SChristoph Hellwig     return bs->enable_write_cache;
3195e900a7b7SChristoph Hellwig }
3196e900a7b7SChristoph Hellwig 
3197425b0148SPaolo Bonzini void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3198425b0148SPaolo Bonzini {
3199425b0148SPaolo Bonzini     bs->enable_write_cache = wce;
320055b110f2SJeff Cody 
320155b110f2SJeff Cody     /* so a reopen() will preserve wce */
320255b110f2SJeff Cody     if (wce) {
320355b110f2SJeff Cody         bs->open_flags |= BDRV_O_CACHE_WB;
320455b110f2SJeff Cody     } else {
320555b110f2SJeff Cody         bs->open_flags &= ~BDRV_O_CACHE_WB;
320655b110f2SJeff Cody     }
3207425b0148SPaolo Bonzini }
3208425b0148SPaolo Bonzini 
3209ea2384d3Sbellard int bdrv_is_encrypted(BlockDriverState *bs)
3210ea2384d3Sbellard {
3211ea2384d3Sbellard     if (bs->backing_hd && bs->backing_hd->encrypted)
3212ea2384d3Sbellard         return 1;
3213ea2384d3Sbellard     return bs->encrypted;
3214ea2384d3Sbellard }
3215ea2384d3Sbellard 
3216c0f4ce77Saliguori int bdrv_key_required(BlockDriverState *bs)
3217c0f4ce77Saliguori {
3218c0f4ce77Saliguori     BlockDriverState *backing_hd = bs->backing_hd;
3219c0f4ce77Saliguori 
3220c0f4ce77Saliguori     if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3221c0f4ce77Saliguori         return 1;
3222c0f4ce77Saliguori     return (bs->encrypted && !bs->valid_key);
3223c0f4ce77Saliguori }
3224c0f4ce77Saliguori 
3225ea2384d3Sbellard int bdrv_set_key(BlockDriverState *bs, const char *key)
3226ea2384d3Sbellard {
3227ea2384d3Sbellard     int ret;
3228ea2384d3Sbellard     if (bs->backing_hd && bs->backing_hd->encrypted) {
3229ea2384d3Sbellard         ret = bdrv_set_key(bs->backing_hd, key);
3230ea2384d3Sbellard         if (ret < 0)
3231ea2384d3Sbellard             return ret;
3232ea2384d3Sbellard         if (!bs->encrypted)
3233ea2384d3Sbellard             return 0;
3234ea2384d3Sbellard     }
3235fd04a2aeSShahar Havivi     if (!bs->encrypted) {
3236fd04a2aeSShahar Havivi         return -EINVAL;
3237fd04a2aeSShahar Havivi     } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3238fd04a2aeSShahar Havivi         return -ENOMEDIUM;
3239fd04a2aeSShahar Havivi     }
3240c0f4ce77Saliguori     ret = bs->drv->bdrv_set_key(bs, key);
3241bb5fc20fSaliguori     if (ret < 0) {
3242bb5fc20fSaliguori         bs->valid_key = 0;
3243bb5fc20fSaliguori     } else if (!bs->valid_key) {
3244bb5fc20fSaliguori         bs->valid_key = 1;
3245bb5fc20fSaliguori         /* call the change callback now, we skipped it on open */
32467d4b4ba5SMarkus Armbruster         bdrv_dev_change_media_cb(bs, true);
3247bb5fc20fSaliguori     }
3248c0f4ce77Saliguori     return ret;
3249ea2384d3Sbellard }
3250ea2384d3Sbellard 
3251f8d6bba1SMarkus Armbruster const char *bdrv_get_format_name(BlockDriverState *bs)
3252ea2384d3Sbellard {
3253f8d6bba1SMarkus Armbruster     return bs->drv ? bs->drv->format_name : NULL;
3254ea2384d3Sbellard }
3255ea2384d3Sbellard 
3256ea2384d3Sbellard void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
3257ea2384d3Sbellard                          void *opaque)
3258ea2384d3Sbellard {
3259ea2384d3Sbellard     BlockDriver *drv;
3260ea2384d3Sbellard 
32618a22f02aSStefan Hajnoczi     QLIST_FOREACH(drv, &bdrv_drivers, list) {
3262ea2384d3Sbellard         it(opaque, drv->format_name);
3263ea2384d3Sbellard     }
3264ea2384d3Sbellard }
3265ea2384d3Sbellard 
3266dc364f4cSBenoît Canet /* This function is to find block backend bs */
3267b338082bSbellard BlockDriverState *bdrv_find(const char *name)
3268b338082bSbellard {
3269b338082bSbellard     BlockDriverState *bs;
3270b338082bSbellard 
3271dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
32721b7bdbc1SStefan Hajnoczi         if (!strcmp(name, bs->device_name)) {
3273b338082bSbellard             return bs;
3274b338082bSbellard         }
32751b7bdbc1SStefan Hajnoczi     }
3276b338082bSbellard     return NULL;
3277b338082bSbellard }
3278b338082bSbellard 
3279dc364f4cSBenoît Canet /* This function is to find a node in the bs graph */
3280dc364f4cSBenoît Canet BlockDriverState *bdrv_find_node(const char *node_name)
3281dc364f4cSBenoît Canet {
3282dc364f4cSBenoît Canet     BlockDriverState *bs;
3283dc364f4cSBenoît Canet 
3284dc364f4cSBenoît Canet     assert(node_name);
3285dc364f4cSBenoît Canet 
3286dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3287dc364f4cSBenoît Canet         if (!strcmp(node_name, bs->node_name)) {
3288dc364f4cSBenoît Canet             return bs;
3289dc364f4cSBenoît Canet         }
3290dc364f4cSBenoît Canet     }
3291dc364f4cSBenoît Canet     return NULL;
3292dc364f4cSBenoît Canet }
3293dc364f4cSBenoît Canet 
32942f399b0aSMarkus Armbruster BlockDriverState *bdrv_next(BlockDriverState *bs)
32952f399b0aSMarkus Armbruster {
32962f399b0aSMarkus Armbruster     if (!bs) {
32972f399b0aSMarkus Armbruster         return QTAILQ_FIRST(&bdrv_states);
32982f399b0aSMarkus Armbruster     }
3299dc364f4cSBenoît Canet     return QTAILQ_NEXT(bs, device_list);
33002f399b0aSMarkus Armbruster }
33012f399b0aSMarkus Armbruster 
330251de9760Saliguori void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
330381d0912dSbellard {
330481d0912dSbellard     BlockDriverState *bs;
330581d0912dSbellard 
3306dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
330751de9760Saliguori         it(opaque, bs);
330881d0912dSbellard     }
330981d0912dSbellard }
331081d0912dSbellard 
3311ea2384d3Sbellard const char *bdrv_get_device_name(BlockDriverState *bs)
3312ea2384d3Sbellard {
3313ea2384d3Sbellard     return bs->device_name;
3314ea2384d3Sbellard }
3315ea2384d3Sbellard 
3316c8433287SMarkus Armbruster int bdrv_get_flags(BlockDriverState *bs)
3317c8433287SMarkus Armbruster {
3318c8433287SMarkus Armbruster     return bs->open_flags;
3319c8433287SMarkus Armbruster }
3320c8433287SMarkus Armbruster 
3321f0f0fdfeSKevin Wolf int bdrv_flush_all(void)
3322c6ca28d6Saliguori {
3323c6ca28d6Saliguori     BlockDriverState *bs;
3324f0f0fdfeSKevin Wolf     int result = 0;
3325c6ca28d6Saliguori 
3326dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3327f0f0fdfeSKevin Wolf         int ret = bdrv_flush(bs);
3328f0f0fdfeSKevin Wolf         if (ret < 0 && !result) {
3329f0f0fdfeSKevin Wolf             result = ret;
3330c6ca28d6Saliguori         }
33311b7bdbc1SStefan Hajnoczi     }
3332c6ca28d6Saliguori 
3333f0f0fdfeSKevin Wolf     return result;
3334f0f0fdfeSKevin Wolf }
3335f0f0fdfeSKevin Wolf 
33363ac21627SPeter Lieven int bdrv_has_zero_init_1(BlockDriverState *bs)
33373ac21627SPeter Lieven {
33383ac21627SPeter Lieven     return 1;
33393ac21627SPeter Lieven }
33403ac21627SPeter Lieven 
3341f2feebbdSKevin Wolf int bdrv_has_zero_init(BlockDriverState *bs)
3342f2feebbdSKevin Wolf {
3343f2feebbdSKevin Wolf     assert(bs->drv);
3344f2feebbdSKevin Wolf 
334511212d8fSPaolo Bonzini     /* If BS is a copy on write image, it is initialized to
334611212d8fSPaolo Bonzini        the contents of the base image, which may not be zeroes.  */
334711212d8fSPaolo Bonzini     if (bs->backing_hd) {
334811212d8fSPaolo Bonzini         return 0;
334911212d8fSPaolo Bonzini     }
3350336c1c12SKevin Wolf     if (bs->drv->bdrv_has_zero_init) {
3351336c1c12SKevin Wolf         return bs->drv->bdrv_has_zero_init(bs);
3352f2feebbdSKevin Wolf     }
3353f2feebbdSKevin Wolf 
33543ac21627SPeter Lieven     /* safe default */
33553ac21627SPeter Lieven     return 0;
3356f2feebbdSKevin Wolf }
3357f2feebbdSKevin Wolf 
33584ce78691SPeter Lieven bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
33594ce78691SPeter Lieven {
33604ce78691SPeter Lieven     BlockDriverInfo bdi;
33614ce78691SPeter Lieven 
33624ce78691SPeter Lieven     if (bs->backing_hd) {
33634ce78691SPeter Lieven         return false;
33644ce78691SPeter Lieven     }
33654ce78691SPeter Lieven 
33664ce78691SPeter Lieven     if (bdrv_get_info(bs, &bdi) == 0) {
33674ce78691SPeter Lieven         return bdi.unallocated_blocks_are_zero;
33684ce78691SPeter Lieven     }
33694ce78691SPeter Lieven 
33704ce78691SPeter Lieven     return false;
33714ce78691SPeter Lieven }
33724ce78691SPeter Lieven 
33734ce78691SPeter Lieven bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
33744ce78691SPeter Lieven {
33754ce78691SPeter Lieven     BlockDriverInfo bdi;
33764ce78691SPeter Lieven 
33774ce78691SPeter Lieven     if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
33784ce78691SPeter Lieven         return false;
33794ce78691SPeter Lieven     }
33804ce78691SPeter Lieven 
33814ce78691SPeter Lieven     if (bdrv_get_info(bs, &bdi) == 0) {
33824ce78691SPeter Lieven         return bdi.can_write_zeroes_with_unmap;
33834ce78691SPeter Lieven     }
33844ce78691SPeter Lieven 
33854ce78691SPeter Lieven     return false;
33864ce78691SPeter Lieven }
33874ce78691SPeter Lieven 
3388b6b8a333SPaolo Bonzini typedef struct BdrvCoGetBlockStatusData {
3389376ae3f1SStefan Hajnoczi     BlockDriverState *bs;
3390b35b2bbaSMiroslav Rezanina     BlockDriverState *base;
3391376ae3f1SStefan Hajnoczi     int64_t sector_num;
3392376ae3f1SStefan Hajnoczi     int nb_sectors;
3393376ae3f1SStefan Hajnoczi     int *pnum;
3394b6b8a333SPaolo Bonzini     int64_t ret;
3395376ae3f1SStefan Hajnoczi     bool done;
3396b6b8a333SPaolo Bonzini } BdrvCoGetBlockStatusData;
3397376ae3f1SStefan Hajnoczi 
3398f58c7b35Sths /*
3399f58c7b35Sths  * Returns true iff the specified sector is present in the disk image. Drivers
3400f58c7b35Sths  * not implementing the functionality are assumed to not support backing files,
3401f58c7b35Sths  * hence all their sectors are reported as allocated.
3402f58c7b35Sths  *
3403bd9533e3SStefan Hajnoczi  * If 'sector_num' is beyond the end of the disk image the return value is 0
3404bd9533e3SStefan Hajnoczi  * and 'pnum' is set to 0.
3405bd9533e3SStefan Hajnoczi  *
3406f58c7b35Sths  * 'pnum' is set to the number of sectors (including and immediately following
3407f58c7b35Sths  * the specified sector) that are known to be in the same
3408f58c7b35Sths  * allocated/unallocated state.
3409f58c7b35Sths  *
3410bd9533e3SStefan Hajnoczi  * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
3411bd9533e3SStefan Hajnoczi  * beyond the end of the disk image it will be clamped.
3412f58c7b35Sths  */
3413b6b8a333SPaolo Bonzini static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3414bdad13b9SPaolo Bonzini                                                      int64_t sector_num,
3415060f51c9SStefan Hajnoczi                                                      int nb_sectors, int *pnum)
3416f58c7b35Sths {
3417617ccb46SPaolo Bonzini     int64_t length;
3418f58c7b35Sths     int64_t n;
34195daa74a6SPaolo Bonzini     int64_t ret, ret2;
3420bd9533e3SStefan Hajnoczi 
3421617ccb46SPaolo Bonzini     length = bdrv_getlength(bs);
3422617ccb46SPaolo Bonzini     if (length < 0) {
3423617ccb46SPaolo Bonzini         return length;
3424617ccb46SPaolo Bonzini     }
3425617ccb46SPaolo Bonzini 
3426617ccb46SPaolo Bonzini     if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
34276aebab14SStefan Hajnoczi         *pnum = 0;
34286aebab14SStefan Hajnoczi         return 0;
34296aebab14SStefan Hajnoczi     }
3430bd9533e3SStefan Hajnoczi 
34316aebab14SStefan Hajnoczi     n = bs->total_sectors - sector_num;
3432bd9533e3SStefan Hajnoczi     if (n < nb_sectors) {
3433bd9533e3SStefan Hajnoczi         nb_sectors = n;
3434bd9533e3SStefan Hajnoczi     }
3435bd9533e3SStefan Hajnoczi 
3436b6b8a333SPaolo Bonzini     if (!bs->drv->bdrv_co_get_block_status) {
3437bd9533e3SStefan Hajnoczi         *pnum = nb_sectors;
3438918e92d7SPaolo Bonzini         ret = BDRV_BLOCK_DATA;
3439918e92d7SPaolo Bonzini         if (bs->drv->protocol_name) {
3440918e92d7SPaolo Bonzini             ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
3441918e92d7SPaolo Bonzini         }
3442918e92d7SPaolo Bonzini         return ret;
34436aebab14SStefan Hajnoczi     }
34446aebab14SStefan Hajnoczi 
3445415b5b01SPaolo Bonzini     ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
3446415b5b01SPaolo Bonzini     if (ret < 0) {
34473e0a233dSPeter Lieven         *pnum = 0;
3448415b5b01SPaolo Bonzini         return ret;
3449415b5b01SPaolo Bonzini     }
3450415b5b01SPaolo Bonzini 
345192bc50a5SPeter Lieven     if (ret & BDRV_BLOCK_RAW) {
345292bc50a5SPeter Lieven         assert(ret & BDRV_BLOCK_OFFSET_VALID);
345392bc50a5SPeter Lieven         return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
345492bc50a5SPeter Lieven                                      *pnum, pnum);
345592bc50a5SPeter Lieven     }
345692bc50a5SPeter Lieven 
3457c3d86884SPeter Lieven     if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
3458c3d86884SPeter Lieven         if (bdrv_unallocated_blocks_are_zero(bs)) {
3459415b5b01SPaolo Bonzini             ret |= BDRV_BLOCK_ZERO;
34601f9db224SPeter Lieven         } else if (bs->backing_hd) {
3461f0ad5712SPaolo Bonzini             BlockDriverState *bs2 = bs->backing_hd;
3462f0ad5712SPaolo Bonzini             int64_t length2 = bdrv_getlength(bs2);
3463f0ad5712SPaolo Bonzini             if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
3464f0ad5712SPaolo Bonzini                 ret |= BDRV_BLOCK_ZERO;
3465f0ad5712SPaolo Bonzini             }
3466f0ad5712SPaolo Bonzini         }
3467415b5b01SPaolo Bonzini     }
34685daa74a6SPaolo Bonzini 
34695daa74a6SPaolo Bonzini     if (bs->file &&
34705daa74a6SPaolo Bonzini         (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
34715daa74a6SPaolo Bonzini         (ret & BDRV_BLOCK_OFFSET_VALID)) {
34725daa74a6SPaolo Bonzini         ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
34735daa74a6SPaolo Bonzini                                         *pnum, pnum);
34745daa74a6SPaolo Bonzini         if (ret2 >= 0) {
34755daa74a6SPaolo Bonzini             /* Ignore errors.  This is just providing extra information, it
34765daa74a6SPaolo Bonzini              * is useful but not necessary.
34775daa74a6SPaolo Bonzini              */
34785daa74a6SPaolo Bonzini             ret |= (ret2 & BDRV_BLOCK_ZERO);
34795daa74a6SPaolo Bonzini         }
34805daa74a6SPaolo Bonzini     }
34815daa74a6SPaolo Bonzini 
3482415b5b01SPaolo Bonzini     return ret;
3483060f51c9SStefan Hajnoczi }
3484060f51c9SStefan Hajnoczi 
3485b6b8a333SPaolo Bonzini /* Coroutine wrapper for bdrv_get_block_status() */
3486b6b8a333SPaolo Bonzini static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
3487060f51c9SStefan Hajnoczi {
3488b6b8a333SPaolo Bonzini     BdrvCoGetBlockStatusData *data = opaque;
3489060f51c9SStefan Hajnoczi     BlockDriverState *bs = data->bs;
3490060f51c9SStefan Hajnoczi 
3491b6b8a333SPaolo Bonzini     data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
3492060f51c9SStefan Hajnoczi                                          data->pnum);
3493060f51c9SStefan Hajnoczi     data->done = true;
3494060f51c9SStefan Hajnoczi }
3495060f51c9SStefan Hajnoczi 
3496060f51c9SStefan Hajnoczi /*
3497b6b8a333SPaolo Bonzini  * Synchronous wrapper around bdrv_co_get_block_status().
3498060f51c9SStefan Hajnoczi  *
3499b6b8a333SPaolo Bonzini  * See bdrv_co_get_block_status() for details.
3500060f51c9SStefan Hajnoczi  */
3501b6b8a333SPaolo Bonzini int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
3502b6b8a333SPaolo Bonzini                               int nb_sectors, int *pnum)
3503060f51c9SStefan Hajnoczi {
3504376ae3f1SStefan Hajnoczi     Coroutine *co;
3505b6b8a333SPaolo Bonzini     BdrvCoGetBlockStatusData data = {
3506376ae3f1SStefan Hajnoczi         .bs = bs,
3507376ae3f1SStefan Hajnoczi         .sector_num = sector_num,
3508376ae3f1SStefan Hajnoczi         .nb_sectors = nb_sectors,
3509376ae3f1SStefan Hajnoczi         .pnum = pnum,
3510376ae3f1SStefan Hajnoczi         .done = false,
3511376ae3f1SStefan Hajnoczi     };
3512376ae3f1SStefan Hajnoczi 
3513bdad13b9SPaolo Bonzini     if (qemu_in_coroutine()) {
3514bdad13b9SPaolo Bonzini         /* Fast-path if already in coroutine context */
3515b6b8a333SPaolo Bonzini         bdrv_get_block_status_co_entry(&data);
3516bdad13b9SPaolo Bonzini     } else {
3517b6b8a333SPaolo Bonzini         co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
3518376ae3f1SStefan Hajnoczi         qemu_coroutine_enter(co, &data);
3519376ae3f1SStefan Hajnoczi         while (!data.done) {
3520376ae3f1SStefan Hajnoczi             qemu_aio_wait();
3521376ae3f1SStefan Hajnoczi         }
3522bdad13b9SPaolo Bonzini     }
3523376ae3f1SStefan Hajnoczi     return data.ret;
3524376ae3f1SStefan Hajnoczi }
3525f58c7b35Sths 
3526b6b8a333SPaolo Bonzini int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
3527b6b8a333SPaolo Bonzini                                    int nb_sectors, int *pnum)
3528b6b8a333SPaolo Bonzini {
35294333bb71SPaolo Bonzini     int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
35304333bb71SPaolo Bonzini     if (ret < 0) {
35314333bb71SPaolo Bonzini         return ret;
35324333bb71SPaolo Bonzini     }
35334333bb71SPaolo Bonzini     return
35344333bb71SPaolo Bonzini         (ret & BDRV_BLOCK_DATA) ||
35354333bb71SPaolo Bonzini         ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs));
3536b6b8a333SPaolo Bonzini }
3537b6b8a333SPaolo Bonzini 
3538188a7bbfSPaolo Bonzini /*
3539188a7bbfSPaolo Bonzini  * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
3540188a7bbfSPaolo Bonzini  *
3541188a7bbfSPaolo Bonzini  * Return true if the given sector is allocated in any image between
3542188a7bbfSPaolo Bonzini  * BASE and TOP (inclusive).  BASE can be NULL to check if the given
3543188a7bbfSPaolo Bonzini  * sector is allocated in any image of the chain.  Return false otherwise.
3544188a7bbfSPaolo Bonzini  *
3545188a7bbfSPaolo Bonzini  * 'pnum' is set to the number of sectors (including and immediately following
3546188a7bbfSPaolo Bonzini  *  the specified sector) that are known to be in the same
3547188a7bbfSPaolo Bonzini  *  allocated/unallocated state.
3548188a7bbfSPaolo Bonzini  *
3549188a7bbfSPaolo Bonzini  */
35504f578637SPaolo Bonzini int bdrv_is_allocated_above(BlockDriverState *top,
3551188a7bbfSPaolo Bonzini                             BlockDriverState *base,
3552188a7bbfSPaolo Bonzini                             int64_t sector_num,
3553188a7bbfSPaolo Bonzini                             int nb_sectors, int *pnum)
3554188a7bbfSPaolo Bonzini {
3555188a7bbfSPaolo Bonzini     BlockDriverState *intermediate;
3556188a7bbfSPaolo Bonzini     int ret, n = nb_sectors;
3557188a7bbfSPaolo Bonzini 
3558188a7bbfSPaolo Bonzini     intermediate = top;
3559188a7bbfSPaolo Bonzini     while (intermediate && intermediate != base) {
3560188a7bbfSPaolo Bonzini         int pnum_inter;
3561bdad13b9SPaolo Bonzini         ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
3562188a7bbfSPaolo Bonzini                                 &pnum_inter);
3563188a7bbfSPaolo Bonzini         if (ret < 0) {
3564188a7bbfSPaolo Bonzini             return ret;
3565188a7bbfSPaolo Bonzini         } else if (ret) {
3566188a7bbfSPaolo Bonzini             *pnum = pnum_inter;
3567188a7bbfSPaolo Bonzini             return 1;
3568188a7bbfSPaolo Bonzini         }
3569188a7bbfSPaolo Bonzini 
3570188a7bbfSPaolo Bonzini         /*
3571188a7bbfSPaolo Bonzini          * [sector_num, nb_sectors] is unallocated on top but intermediate
3572188a7bbfSPaolo Bonzini          * might have
3573188a7bbfSPaolo Bonzini          *
3574188a7bbfSPaolo Bonzini          * [sector_num+x, nr_sectors] allocated.
3575188a7bbfSPaolo Bonzini          */
357663ba17d3SVishvananda Ishaya         if (n > pnum_inter &&
357763ba17d3SVishvananda Ishaya             (intermediate == top ||
357863ba17d3SVishvananda Ishaya              sector_num + pnum_inter < intermediate->total_sectors)) {
3579188a7bbfSPaolo Bonzini             n = pnum_inter;
3580188a7bbfSPaolo Bonzini         }
3581188a7bbfSPaolo Bonzini 
3582188a7bbfSPaolo Bonzini         intermediate = intermediate->backing_hd;
3583188a7bbfSPaolo Bonzini     }
3584188a7bbfSPaolo Bonzini 
3585188a7bbfSPaolo Bonzini     *pnum = n;
3586188a7bbfSPaolo Bonzini     return 0;
3587188a7bbfSPaolo Bonzini }
3588188a7bbfSPaolo Bonzini 
3589045df330Saliguori const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3590045df330Saliguori {
3591045df330Saliguori     if (bs->backing_hd && bs->backing_hd->encrypted)
3592045df330Saliguori         return bs->backing_file;
3593045df330Saliguori     else if (bs->encrypted)
3594045df330Saliguori         return bs->filename;
3595045df330Saliguori     else
3596045df330Saliguori         return NULL;
3597045df330Saliguori }
3598045df330Saliguori 
359983f64091Sbellard void bdrv_get_backing_filename(BlockDriverState *bs,
360083f64091Sbellard                                char *filename, int filename_size)
360183f64091Sbellard {
360283f64091Sbellard     pstrcpy(filename, filename_size, bs->backing_file);
360383f64091Sbellard }
360483f64091Sbellard 
3605faea38e7Sbellard int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
3606faea38e7Sbellard                           const uint8_t *buf, int nb_sectors)
3607faea38e7Sbellard {
3608faea38e7Sbellard     BlockDriver *drv = bs->drv;
3609faea38e7Sbellard     if (!drv)
361019cb3738Sbellard         return -ENOMEDIUM;
3611faea38e7Sbellard     if (!drv->bdrv_write_compressed)
3612faea38e7Sbellard         return -ENOTSUP;
3613fbb7b4e0SKevin Wolf     if (bdrv_check_request(bs, sector_num, nb_sectors))
3614fbb7b4e0SKevin Wolf         return -EIO;
36157cd1e32aSlirans@il.ibm.com 
3616e4654d2dSFam Zheng     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
36177cd1e32aSlirans@il.ibm.com 
3618faea38e7Sbellard     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
3619faea38e7Sbellard }
3620faea38e7Sbellard 
3621faea38e7Sbellard int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3622faea38e7Sbellard {
3623faea38e7Sbellard     BlockDriver *drv = bs->drv;
3624faea38e7Sbellard     if (!drv)
362519cb3738Sbellard         return -ENOMEDIUM;
3626faea38e7Sbellard     if (!drv->bdrv_get_info)
3627faea38e7Sbellard         return -ENOTSUP;
3628faea38e7Sbellard     memset(bdi, 0, sizeof(*bdi));
3629faea38e7Sbellard     return drv->bdrv_get_info(bs, bdi);
3630faea38e7Sbellard }
3631faea38e7Sbellard 
3632eae041feSMax Reitz ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3633eae041feSMax Reitz {
3634eae041feSMax Reitz     BlockDriver *drv = bs->drv;
3635eae041feSMax Reitz     if (drv && drv->bdrv_get_specific_info) {
3636eae041feSMax Reitz         return drv->bdrv_get_specific_info(bs);
3637eae041feSMax Reitz     }
3638eae041feSMax Reitz     return NULL;
3639eae041feSMax Reitz }
3640eae041feSMax Reitz 
364145566e9cSChristoph Hellwig int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
364245566e9cSChristoph Hellwig                       int64_t pos, int size)
3643178e08a5Saliguori {
3644cf8074b3SKevin Wolf     QEMUIOVector qiov;
3645cf8074b3SKevin Wolf     struct iovec iov = {
3646cf8074b3SKevin Wolf         .iov_base   = (void *) buf,
3647cf8074b3SKevin Wolf         .iov_len    = size,
3648cf8074b3SKevin Wolf     };
3649cf8074b3SKevin Wolf 
3650cf8074b3SKevin Wolf     qemu_iovec_init_external(&qiov, &iov, 1);
3651cf8074b3SKevin Wolf     return bdrv_writev_vmstate(bs, &qiov, pos);
3652cf8074b3SKevin Wolf }
3653cf8074b3SKevin Wolf 
3654cf8074b3SKevin Wolf int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
3655cf8074b3SKevin Wolf {
3656178e08a5Saliguori     BlockDriver *drv = bs->drv;
3657cf8074b3SKevin Wolf 
3658cf8074b3SKevin Wolf     if (!drv) {
3659178e08a5Saliguori         return -ENOMEDIUM;
3660cf8074b3SKevin Wolf     } else if (drv->bdrv_save_vmstate) {
3661cf8074b3SKevin Wolf         return drv->bdrv_save_vmstate(bs, qiov, pos);
3662cf8074b3SKevin Wolf     } else if (bs->file) {
3663cf8074b3SKevin Wolf         return bdrv_writev_vmstate(bs->file, qiov, pos);
3664cf8074b3SKevin Wolf     }
3665cf8074b3SKevin Wolf 
36667cdb1f6dSMORITA Kazutaka     return -ENOTSUP;
3667178e08a5Saliguori }
3668178e08a5Saliguori 
366945566e9cSChristoph Hellwig int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
367045566e9cSChristoph Hellwig                       int64_t pos, int size)
3671178e08a5Saliguori {
3672178e08a5Saliguori     BlockDriver *drv = bs->drv;
3673178e08a5Saliguori     if (!drv)
3674178e08a5Saliguori         return -ENOMEDIUM;
36757cdb1f6dSMORITA Kazutaka     if (drv->bdrv_load_vmstate)
367645566e9cSChristoph Hellwig         return drv->bdrv_load_vmstate(bs, buf, pos, size);
36777cdb1f6dSMORITA Kazutaka     if (bs->file)
36787cdb1f6dSMORITA Kazutaka         return bdrv_load_vmstate(bs->file, buf, pos, size);
36797cdb1f6dSMORITA Kazutaka     return -ENOTSUP;
3680178e08a5Saliguori }
3681178e08a5Saliguori 
36828b9b0cc2SKevin Wolf void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
36838b9b0cc2SKevin Wolf {
3684bf736fe3SKevin Wolf     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
36858b9b0cc2SKevin Wolf         return;
36868b9b0cc2SKevin Wolf     }
36878b9b0cc2SKevin Wolf 
3688bf736fe3SKevin Wolf     bs->drv->bdrv_debug_event(bs, event);
368941c695c7SKevin Wolf }
36908b9b0cc2SKevin Wolf 
369141c695c7SKevin Wolf int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
369241c695c7SKevin Wolf                           const char *tag)
369341c695c7SKevin Wolf {
369441c695c7SKevin Wolf     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
369541c695c7SKevin Wolf         bs = bs->file;
369641c695c7SKevin Wolf     }
369741c695c7SKevin Wolf 
369841c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
369941c695c7SKevin Wolf         return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
370041c695c7SKevin Wolf     }
370141c695c7SKevin Wolf 
370241c695c7SKevin Wolf     return -ENOTSUP;
370341c695c7SKevin Wolf }
370441c695c7SKevin Wolf 
37054cc70e93SFam Zheng int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
37064cc70e93SFam Zheng {
37074cc70e93SFam Zheng     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
37084cc70e93SFam Zheng         bs = bs->file;
37094cc70e93SFam Zheng     }
37104cc70e93SFam Zheng 
37114cc70e93SFam Zheng     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
37124cc70e93SFam Zheng         return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
37134cc70e93SFam Zheng     }
37144cc70e93SFam Zheng 
37154cc70e93SFam Zheng     return -ENOTSUP;
37164cc70e93SFam Zheng }
37174cc70e93SFam Zheng 
371841c695c7SKevin Wolf int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
371941c695c7SKevin Wolf {
372041c695c7SKevin Wolf     while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
372141c695c7SKevin Wolf         bs = bs->file;
372241c695c7SKevin Wolf     }
372341c695c7SKevin Wolf 
372441c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
372541c695c7SKevin Wolf         return bs->drv->bdrv_debug_resume(bs, tag);
372641c695c7SKevin Wolf     }
372741c695c7SKevin Wolf 
372841c695c7SKevin Wolf     return -ENOTSUP;
372941c695c7SKevin Wolf }
373041c695c7SKevin Wolf 
373141c695c7SKevin Wolf bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
373241c695c7SKevin Wolf {
373341c695c7SKevin Wolf     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
373441c695c7SKevin Wolf         bs = bs->file;
373541c695c7SKevin Wolf     }
373641c695c7SKevin Wolf 
373741c695c7SKevin Wolf     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
373841c695c7SKevin Wolf         return bs->drv->bdrv_debug_is_suspended(bs, tag);
373941c695c7SKevin Wolf     }
374041c695c7SKevin Wolf 
374141c695c7SKevin Wolf     return false;
37428b9b0cc2SKevin Wolf }
37438b9b0cc2SKevin Wolf 
3744199630b6SBlue Swirl int bdrv_is_snapshot(BlockDriverState *bs)
3745199630b6SBlue Swirl {
3746199630b6SBlue Swirl     return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3747199630b6SBlue Swirl }
3748199630b6SBlue Swirl 
3749b1b1d783SJeff Cody /* backing_file can either be relative, or absolute, or a protocol.  If it is
3750b1b1d783SJeff Cody  * relative, it must be relative to the chain.  So, passing in bs->filename
3751b1b1d783SJeff Cody  * from a BDS as backing_file should not be done, as that may be relative to
3752b1b1d783SJeff Cody  * the CWD rather than the chain. */
3753e8a6bb9cSMarcelo Tosatti BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3754e8a6bb9cSMarcelo Tosatti         const char *backing_file)
3755e8a6bb9cSMarcelo Tosatti {
3756b1b1d783SJeff Cody     char *filename_full = NULL;
3757b1b1d783SJeff Cody     char *backing_file_full = NULL;
3758b1b1d783SJeff Cody     char *filename_tmp = NULL;
3759b1b1d783SJeff Cody     int is_protocol = 0;
3760b1b1d783SJeff Cody     BlockDriverState *curr_bs = NULL;
3761b1b1d783SJeff Cody     BlockDriverState *retval = NULL;
3762b1b1d783SJeff Cody 
3763b1b1d783SJeff Cody     if (!bs || !bs->drv || !backing_file) {
3764e8a6bb9cSMarcelo Tosatti         return NULL;
3765e8a6bb9cSMarcelo Tosatti     }
3766e8a6bb9cSMarcelo Tosatti 
3767b1b1d783SJeff Cody     filename_full     = g_malloc(PATH_MAX);
3768b1b1d783SJeff Cody     backing_file_full = g_malloc(PATH_MAX);
3769b1b1d783SJeff Cody     filename_tmp      = g_malloc(PATH_MAX);
3770b1b1d783SJeff Cody 
3771b1b1d783SJeff Cody     is_protocol = path_has_protocol(backing_file);
3772b1b1d783SJeff Cody 
3773b1b1d783SJeff Cody     for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
3774b1b1d783SJeff Cody 
3775b1b1d783SJeff Cody         /* If either of the filename paths is actually a protocol, then
3776b1b1d783SJeff Cody          * compare unmodified paths; otherwise make paths relative */
3777b1b1d783SJeff Cody         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3778b1b1d783SJeff Cody             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3779b1b1d783SJeff Cody                 retval = curr_bs->backing_hd;
3780b1b1d783SJeff Cody                 break;
3781b1b1d783SJeff Cody             }
3782e8a6bb9cSMarcelo Tosatti         } else {
3783b1b1d783SJeff Cody             /* If not an absolute filename path, make it relative to the current
3784b1b1d783SJeff Cody              * image's filename path */
3785b1b1d783SJeff Cody             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3786b1b1d783SJeff Cody                          backing_file);
3787b1b1d783SJeff Cody 
3788b1b1d783SJeff Cody             /* We are going to compare absolute pathnames */
3789b1b1d783SJeff Cody             if (!realpath(filename_tmp, filename_full)) {
3790b1b1d783SJeff Cody                 continue;
3791b1b1d783SJeff Cody             }
3792b1b1d783SJeff Cody 
3793b1b1d783SJeff Cody             /* We need to make sure the backing filename we are comparing against
3794b1b1d783SJeff Cody              * is relative to the current image filename (or absolute) */
3795b1b1d783SJeff Cody             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3796b1b1d783SJeff Cody                          curr_bs->backing_file);
3797b1b1d783SJeff Cody 
3798b1b1d783SJeff Cody             if (!realpath(filename_tmp, backing_file_full)) {
3799b1b1d783SJeff Cody                 continue;
3800b1b1d783SJeff Cody             }
3801b1b1d783SJeff Cody 
3802b1b1d783SJeff Cody             if (strcmp(backing_file_full, filename_full) == 0) {
3803b1b1d783SJeff Cody                 retval = curr_bs->backing_hd;
3804b1b1d783SJeff Cody                 break;
3805b1b1d783SJeff Cody             }
3806e8a6bb9cSMarcelo Tosatti         }
3807e8a6bb9cSMarcelo Tosatti     }
3808e8a6bb9cSMarcelo Tosatti 
3809b1b1d783SJeff Cody     g_free(filename_full);
3810b1b1d783SJeff Cody     g_free(backing_file_full);
3811b1b1d783SJeff Cody     g_free(filename_tmp);
3812b1b1d783SJeff Cody     return retval;
3813e8a6bb9cSMarcelo Tosatti }
3814e8a6bb9cSMarcelo Tosatti 
3815f198fd1cSBenoît Canet int bdrv_get_backing_file_depth(BlockDriverState *bs)
3816f198fd1cSBenoît Canet {
3817f198fd1cSBenoît Canet     if (!bs->drv) {
3818f198fd1cSBenoît Canet         return 0;
3819f198fd1cSBenoît Canet     }
3820f198fd1cSBenoît Canet 
3821f198fd1cSBenoît Canet     if (!bs->backing_hd) {
3822f198fd1cSBenoît Canet         return 0;
3823f198fd1cSBenoît Canet     }
3824f198fd1cSBenoît Canet 
3825f198fd1cSBenoît Canet     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
3826f198fd1cSBenoît Canet }
3827f198fd1cSBenoît Canet 
382879fac568SJeff Cody BlockDriverState *bdrv_find_base(BlockDriverState *bs)
382979fac568SJeff Cody {
383079fac568SJeff Cody     BlockDriverState *curr_bs = NULL;
383179fac568SJeff Cody 
383279fac568SJeff Cody     if (!bs) {
383379fac568SJeff Cody         return NULL;
383479fac568SJeff Cody     }
383579fac568SJeff Cody 
383679fac568SJeff Cody     curr_bs = bs;
383779fac568SJeff Cody 
383879fac568SJeff Cody     while (curr_bs->backing_hd) {
383979fac568SJeff Cody         curr_bs = curr_bs->backing_hd;
384079fac568SJeff Cody     }
384179fac568SJeff Cody     return curr_bs;
384279fac568SJeff Cody }
384379fac568SJeff Cody 
3844ea2384d3Sbellard /**************************************************************/
384583f64091Sbellard /* async I/Os */
3846ea2384d3Sbellard 
38473b69e4b9Saliguori BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
3848f141eafeSaliguori                                  QEMUIOVector *qiov, int nb_sectors,
384983f64091Sbellard                                  BlockDriverCompletionFunc *cb, void *opaque)
3850ea2384d3Sbellard {
3851bbf0a440SStefan Hajnoczi     trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
3852bbf0a440SStefan Hajnoczi 
3853d20d9b7cSPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
38548c5873d6SStefan Hajnoczi                                  cb, opaque, false);
385583f64091Sbellard }
385683f64091Sbellard 
3857f141eafeSaliguori BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
3858f141eafeSaliguori                                   QEMUIOVector *qiov, int nb_sectors,
385983f64091Sbellard                                   BlockDriverCompletionFunc *cb, void *opaque)
38607674e7bfSbellard {
3861bbf0a440SStefan Hajnoczi     trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
3862bbf0a440SStefan Hajnoczi 
3863d20d9b7cSPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
38648c5873d6SStefan Hajnoczi                                  cb, opaque, true);
386583f64091Sbellard }
386683f64091Sbellard 
3867d5ef94d4SPaolo Bonzini BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
3868d5ef94d4SPaolo Bonzini         int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
3869d5ef94d4SPaolo Bonzini         BlockDriverCompletionFunc *cb, void *opaque)
3870d5ef94d4SPaolo Bonzini {
3871d5ef94d4SPaolo Bonzini     trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
3872d5ef94d4SPaolo Bonzini 
3873d5ef94d4SPaolo Bonzini     return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
3874d5ef94d4SPaolo Bonzini                                  BDRV_REQ_ZERO_WRITE | flags,
3875d5ef94d4SPaolo Bonzini                                  cb, opaque, true);
3876d5ef94d4SPaolo Bonzini }
3877d5ef94d4SPaolo Bonzini 
387840b4f539SKevin Wolf 
387940b4f539SKevin Wolf typedef struct MultiwriteCB {
388040b4f539SKevin Wolf     int error;
388140b4f539SKevin Wolf     int num_requests;
388240b4f539SKevin Wolf     int num_callbacks;
388340b4f539SKevin Wolf     struct {
388440b4f539SKevin Wolf         BlockDriverCompletionFunc *cb;
388540b4f539SKevin Wolf         void *opaque;
388640b4f539SKevin Wolf         QEMUIOVector *free_qiov;
388740b4f539SKevin Wolf     } callbacks[];
388840b4f539SKevin Wolf } MultiwriteCB;
388940b4f539SKevin Wolf 
389040b4f539SKevin Wolf static void multiwrite_user_cb(MultiwriteCB *mcb)
389140b4f539SKevin Wolf {
389240b4f539SKevin Wolf     int i;
389340b4f539SKevin Wolf 
389440b4f539SKevin Wolf     for (i = 0; i < mcb->num_callbacks; i++) {
389540b4f539SKevin Wolf         mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
38961e1ea48dSStefan Hajnoczi         if (mcb->callbacks[i].free_qiov) {
38971e1ea48dSStefan Hajnoczi             qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
38981e1ea48dSStefan Hajnoczi         }
38997267c094SAnthony Liguori         g_free(mcb->callbacks[i].free_qiov);
390040b4f539SKevin Wolf     }
390140b4f539SKevin Wolf }
390240b4f539SKevin Wolf 
390340b4f539SKevin Wolf static void multiwrite_cb(void *opaque, int ret)
390440b4f539SKevin Wolf {
390540b4f539SKevin Wolf     MultiwriteCB *mcb = opaque;
390640b4f539SKevin Wolf 
39076d519a5fSStefan Hajnoczi     trace_multiwrite_cb(mcb, ret);
39086d519a5fSStefan Hajnoczi 
3909cb6d3ca0SKevin Wolf     if (ret < 0 && !mcb->error) {
391040b4f539SKevin Wolf         mcb->error = ret;
391140b4f539SKevin Wolf     }
391240b4f539SKevin Wolf 
391340b4f539SKevin Wolf     mcb->num_requests--;
391440b4f539SKevin Wolf     if (mcb->num_requests == 0) {
391540b4f539SKevin Wolf         multiwrite_user_cb(mcb);
39167267c094SAnthony Liguori         g_free(mcb);
391740b4f539SKevin Wolf     }
391840b4f539SKevin Wolf }
391940b4f539SKevin Wolf 
392040b4f539SKevin Wolf static int multiwrite_req_compare(const void *a, const void *b)
392140b4f539SKevin Wolf {
392277be4366SChristoph Hellwig     const BlockRequest *req1 = a, *req2 = b;
392377be4366SChristoph Hellwig 
392477be4366SChristoph Hellwig     /*
392577be4366SChristoph Hellwig      * Note that we can't simply subtract req2->sector from req1->sector
392677be4366SChristoph Hellwig      * here as that could overflow the return value.
392777be4366SChristoph Hellwig      */
392877be4366SChristoph Hellwig     if (req1->sector > req2->sector) {
392977be4366SChristoph Hellwig         return 1;
393077be4366SChristoph Hellwig     } else if (req1->sector < req2->sector) {
393177be4366SChristoph Hellwig         return -1;
393277be4366SChristoph Hellwig     } else {
393377be4366SChristoph Hellwig         return 0;
393477be4366SChristoph Hellwig     }
393540b4f539SKevin Wolf }
393640b4f539SKevin Wolf 
393740b4f539SKevin Wolf /*
393840b4f539SKevin Wolf  * Takes a bunch of requests and tries to merge them. Returns the number of
393940b4f539SKevin Wolf  * requests that remain after merging.
394040b4f539SKevin Wolf  */
394140b4f539SKevin Wolf static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
394240b4f539SKevin Wolf     int num_reqs, MultiwriteCB *mcb)
394340b4f539SKevin Wolf {
394440b4f539SKevin Wolf     int i, outidx;
394540b4f539SKevin Wolf 
394640b4f539SKevin Wolf     // Sort requests by start sector
394740b4f539SKevin Wolf     qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
394840b4f539SKevin Wolf 
394940b4f539SKevin Wolf     // Check if adjacent requests touch the same clusters. If so, combine them,
395040b4f539SKevin Wolf     // filling up gaps with zero sectors.
395140b4f539SKevin Wolf     outidx = 0;
395240b4f539SKevin Wolf     for (i = 1; i < num_reqs; i++) {
395340b4f539SKevin Wolf         int merge = 0;
395440b4f539SKevin Wolf         int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
395540b4f539SKevin Wolf 
3956b6a127a1SPaolo Bonzini         // Handle exactly sequential writes and overlapping writes.
395740b4f539SKevin Wolf         if (reqs[i].sector <= oldreq_last) {
395840b4f539SKevin Wolf             merge = 1;
395940b4f539SKevin Wolf         }
396040b4f539SKevin Wolf 
3961e2a305fbSChristoph Hellwig         if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3962e2a305fbSChristoph Hellwig             merge = 0;
3963e2a305fbSChristoph Hellwig         }
3964e2a305fbSChristoph Hellwig 
396540b4f539SKevin Wolf         if (merge) {
396640b4f539SKevin Wolf             size_t size;
39677267c094SAnthony Liguori             QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
396840b4f539SKevin Wolf             qemu_iovec_init(qiov,
396940b4f539SKevin Wolf                 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
397040b4f539SKevin Wolf 
397140b4f539SKevin Wolf             // Add the first request to the merged one. If the requests are
397240b4f539SKevin Wolf             // overlapping, drop the last sectors of the first request.
397340b4f539SKevin Wolf             size = (reqs[i].sector - reqs[outidx].sector) << 9;
39741b093c48SMichael Tokarev             qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
397540b4f539SKevin Wolf 
3976b6a127a1SPaolo Bonzini             // We should need to add any zeros between the two requests
3977b6a127a1SPaolo Bonzini             assert (reqs[i].sector <= oldreq_last);
397840b4f539SKevin Wolf 
397940b4f539SKevin Wolf             // Add the second request
39801b093c48SMichael Tokarev             qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
398140b4f539SKevin Wolf 
3982cbf1dff2SKevin Wolf             reqs[outidx].nb_sectors = qiov->size >> 9;
398340b4f539SKevin Wolf             reqs[outidx].qiov = qiov;
398440b4f539SKevin Wolf 
398540b4f539SKevin Wolf             mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
398640b4f539SKevin Wolf         } else {
398740b4f539SKevin Wolf             outidx++;
398840b4f539SKevin Wolf             reqs[outidx].sector     = reqs[i].sector;
398940b4f539SKevin Wolf             reqs[outidx].nb_sectors = reqs[i].nb_sectors;
399040b4f539SKevin Wolf             reqs[outidx].qiov       = reqs[i].qiov;
399140b4f539SKevin Wolf         }
399240b4f539SKevin Wolf     }
399340b4f539SKevin Wolf 
399440b4f539SKevin Wolf     return outidx + 1;
399540b4f539SKevin Wolf }
399640b4f539SKevin Wolf 
399740b4f539SKevin Wolf /*
399840b4f539SKevin Wolf  * Submit multiple AIO write requests at once.
399940b4f539SKevin Wolf  *
400040b4f539SKevin Wolf  * On success, the function returns 0 and all requests in the reqs array have
400140b4f539SKevin Wolf  * been submitted. In error case this function returns -1, and any of the
400240b4f539SKevin Wolf  * requests may or may not be submitted yet. In particular, this means that the
400340b4f539SKevin Wolf  * callback will be called for some of the requests, for others it won't. The
400440b4f539SKevin Wolf  * caller must check the error field of the BlockRequest to wait for the right
400540b4f539SKevin Wolf  * callbacks (if error != 0, no callback will be called).
400640b4f539SKevin Wolf  *
400740b4f539SKevin Wolf  * The implementation may modify the contents of the reqs array, e.g. to merge
400840b4f539SKevin Wolf  * requests. However, the fields opaque and error are left unmodified as they
400940b4f539SKevin Wolf  * are used to signal failure for a single request to the caller.
401040b4f539SKevin Wolf  */
401140b4f539SKevin Wolf int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
401240b4f539SKevin Wolf {
401340b4f539SKevin Wolf     MultiwriteCB *mcb;
401440b4f539SKevin Wolf     int i;
401540b4f539SKevin Wolf 
4016301db7c2SRyan Harper     /* don't submit writes if we don't have a medium */
4017301db7c2SRyan Harper     if (bs->drv == NULL) {
4018301db7c2SRyan Harper         for (i = 0; i < num_reqs; i++) {
4019301db7c2SRyan Harper             reqs[i].error = -ENOMEDIUM;
4020301db7c2SRyan Harper         }
4021301db7c2SRyan Harper         return -1;
4022301db7c2SRyan Harper     }
4023301db7c2SRyan Harper 
402440b4f539SKevin Wolf     if (num_reqs == 0) {
402540b4f539SKevin Wolf         return 0;
402640b4f539SKevin Wolf     }
402740b4f539SKevin Wolf 
402840b4f539SKevin Wolf     // Create MultiwriteCB structure
40297267c094SAnthony Liguori     mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
403040b4f539SKevin Wolf     mcb->num_requests = 0;
403140b4f539SKevin Wolf     mcb->num_callbacks = num_reqs;
403240b4f539SKevin Wolf 
403340b4f539SKevin Wolf     for (i = 0; i < num_reqs; i++) {
403440b4f539SKevin Wolf         mcb->callbacks[i].cb = reqs[i].cb;
403540b4f539SKevin Wolf         mcb->callbacks[i].opaque = reqs[i].opaque;
403640b4f539SKevin Wolf     }
403740b4f539SKevin Wolf 
403840b4f539SKevin Wolf     // Check for mergable requests
403940b4f539SKevin Wolf     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
404040b4f539SKevin Wolf 
40416d519a5fSStefan Hajnoczi     trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
40426d519a5fSStefan Hajnoczi 
4043df9309fbSPaolo Bonzini     /* Run the aio requests. */
4044df9309fbSPaolo Bonzini     mcb->num_requests = num_reqs;
404540b4f539SKevin Wolf     for (i = 0; i < num_reqs; i++) {
4046d20d9b7cSPaolo Bonzini         bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4047d20d9b7cSPaolo Bonzini                               reqs[i].nb_sectors, reqs[i].flags,
4048d20d9b7cSPaolo Bonzini                               multiwrite_cb, mcb,
4049d20d9b7cSPaolo Bonzini                               true);
405040b4f539SKevin Wolf     }
405140b4f539SKevin Wolf 
405240b4f539SKevin Wolf     return 0;
405340b4f539SKevin Wolf }
405440b4f539SKevin Wolf 
405583f64091Sbellard void bdrv_aio_cancel(BlockDriverAIOCB *acb)
405683f64091Sbellard {
4057d7331bedSStefan Hajnoczi     acb->aiocb_info->cancel(acb);
405883f64091Sbellard }
405983f64091Sbellard 
406083f64091Sbellard /**************************************************************/
406183f64091Sbellard /* async block device emulation */
406283f64091Sbellard 
4063c16b5a2cSChristoph Hellwig typedef struct BlockDriverAIOCBSync {
4064c16b5a2cSChristoph Hellwig     BlockDriverAIOCB common;
4065c16b5a2cSChristoph Hellwig     QEMUBH *bh;
4066c16b5a2cSChristoph Hellwig     int ret;
4067c16b5a2cSChristoph Hellwig     /* vector translation state */
4068c16b5a2cSChristoph Hellwig     QEMUIOVector *qiov;
4069c16b5a2cSChristoph Hellwig     uint8_t *bounce;
4070c16b5a2cSChristoph Hellwig     int is_write;
4071c16b5a2cSChristoph Hellwig } BlockDriverAIOCBSync;
4072c16b5a2cSChristoph Hellwig 
4073c16b5a2cSChristoph Hellwig static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
4074c16b5a2cSChristoph Hellwig {
4075b666d239SKevin Wolf     BlockDriverAIOCBSync *acb =
4076b666d239SKevin Wolf         container_of(blockacb, BlockDriverAIOCBSync, common);
40776a7ad299SDor Laor     qemu_bh_delete(acb->bh);
407836afc451SAvi Kivity     acb->bh = NULL;
4079c16b5a2cSChristoph Hellwig     qemu_aio_release(acb);
4080c16b5a2cSChristoph Hellwig }
4081c16b5a2cSChristoph Hellwig 
4082d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_aiocb_info = {
4083c16b5a2cSChristoph Hellwig     .aiocb_size         = sizeof(BlockDriverAIOCBSync),
4084c16b5a2cSChristoph Hellwig     .cancel             = bdrv_aio_cancel_em,
4085c16b5a2cSChristoph Hellwig };
4086c16b5a2cSChristoph Hellwig 
408783f64091Sbellard static void bdrv_aio_bh_cb(void *opaque)
4088beac80cdSbellard {
4089ce1a14dcSpbrook     BlockDriverAIOCBSync *acb = opaque;
4090f141eafeSaliguori 
4091f141eafeSaliguori     if (!acb->is_write)
409203396148SMichael Tokarev         qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
4093ceb42de8Saliguori     qemu_vfree(acb->bounce);
4094ce1a14dcSpbrook     acb->common.cb(acb->common.opaque, acb->ret);
40956a7ad299SDor Laor     qemu_bh_delete(acb->bh);
409636afc451SAvi Kivity     acb->bh = NULL;
4097ce1a14dcSpbrook     qemu_aio_release(acb);
4098beac80cdSbellard }
4099beac80cdSbellard 
4100f141eafeSaliguori static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4101f141eafeSaliguori                                             int64_t sector_num,
4102f141eafeSaliguori                                             QEMUIOVector *qiov,
4103f141eafeSaliguori                                             int nb_sectors,
4104f141eafeSaliguori                                             BlockDriverCompletionFunc *cb,
4105f141eafeSaliguori                                             void *opaque,
4106f141eafeSaliguori                                             int is_write)
4107f141eafeSaliguori 
4108ea2384d3Sbellard {
4109ce1a14dcSpbrook     BlockDriverAIOCBSync *acb;
411083f64091Sbellard 
4111d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
4112f141eafeSaliguori     acb->is_write = is_write;
4113f141eafeSaliguori     acb->qiov = qiov;
4114e268ca52Saliguori     acb->bounce = qemu_blockalign(bs, qiov->size);
4115ce1a14dcSpbrook     acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
4116f141eafeSaliguori 
4117f141eafeSaliguori     if (is_write) {
4118d5e6b161SMichael Tokarev         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
41191ed20acfSStefan Hajnoczi         acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
4120f141eafeSaliguori     } else {
41211ed20acfSStefan Hajnoczi         acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
4122f141eafeSaliguori     }
4123f141eafeSaliguori 
4124ce1a14dcSpbrook     qemu_bh_schedule(acb->bh);
4125f141eafeSaliguori 
4126ce1a14dcSpbrook     return &acb->common;
41277a6cba61Spbrook }
41287a6cba61Spbrook 
4129f141eafeSaliguori static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4130f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4131ce1a14dcSpbrook         BlockDriverCompletionFunc *cb, void *opaque)
413283f64091Sbellard {
4133f141eafeSaliguori     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
413483f64091Sbellard }
413583f64091Sbellard 
4136f141eafeSaliguori static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4137f141eafeSaliguori         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4138f141eafeSaliguori         BlockDriverCompletionFunc *cb, void *opaque)
4139f141eafeSaliguori {
4140f141eafeSaliguori     return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4141f141eafeSaliguori }
4142f141eafeSaliguori 
414368485420SKevin Wolf 
414468485420SKevin Wolf typedef struct BlockDriverAIOCBCoroutine {
414568485420SKevin Wolf     BlockDriverAIOCB common;
414668485420SKevin Wolf     BlockRequest req;
414768485420SKevin Wolf     bool is_write;
4148d318aea9SKevin Wolf     bool *done;
414968485420SKevin Wolf     QEMUBH* bh;
415068485420SKevin Wolf } BlockDriverAIOCBCoroutine;
415168485420SKevin Wolf 
415268485420SKevin Wolf static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
415368485420SKevin Wolf {
4154d318aea9SKevin Wolf     BlockDriverAIOCBCoroutine *acb =
4155d318aea9SKevin Wolf         container_of(blockacb, BlockDriverAIOCBCoroutine, common);
4156d318aea9SKevin Wolf     bool done = false;
4157d318aea9SKevin Wolf 
4158d318aea9SKevin Wolf     acb->done = &done;
4159d318aea9SKevin Wolf     while (!done) {
4160d318aea9SKevin Wolf         qemu_aio_wait();
4161d318aea9SKevin Wolf     }
416268485420SKevin Wolf }
416368485420SKevin Wolf 
4164d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_co_aiocb_info = {
416568485420SKevin Wolf     .aiocb_size         = sizeof(BlockDriverAIOCBCoroutine),
416668485420SKevin Wolf     .cancel             = bdrv_aio_co_cancel_em,
416768485420SKevin Wolf };
416868485420SKevin Wolf 
416935246a68SPaolo Bonzini static void bdrv_co_em_bh(void *opaque)
417068485420SKevin Wolf {
417168485420SKevin Wolf     BlockDriverAIOCBCoroutine *acb = opaque;
417268485420SKevin Wolf 
417368485420SKevin Wolf     acb->common.cb(acb->common.opaque, acb->req.error);
4174d318aea9SKevin Wolf 
4175d318aea9SKevin Wolf     if (acb->done) {
4176d318aea9SKevin Wolf         *acb->done = true;
4177d318aea9SKevin Wolf     }
4178d318aea9SKevin Wolf 
417968485420SKevin Wolf     qemu_bh_delete(acb->bh);
418068485420SKevin Wolf     qemu_aio_release(acb);
418168485420SKevin Wolf }
418268485420SKevin Wolf 
4183b2a61371SStefan Hajnoczi /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4184b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque)
4185b2a61371SStefan Hajnoczi {
4186b2a61371SStefan Hajnoczi     BlockDriverAIOCBCoroutine *acb = opaque;
4187b2a61371SStefan Hajnoczi     BlockDriverState *bs = acb->common.bs;
4188b2a61371SStefan Hajnoczi 
4189b2a61371SStefan Hajnoczi     if (!acb->is_write) {
4190b2a61371SStefan Hajnoczi         acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
4191d20d9b7cSPaolo Bonzini             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4192b2a61371SStefan Hajnoczi     } else {
4193b2a61371SStefan Hajnoczi         acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
4194d20d9b7cSPaolo Bonzini             acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4195b2a61371SStefan Hajnoczi     }
4196b2a61371SStefan Hajnoczi 
419735246a68SPaolo Bonzini     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4198b2a61371SStefan Hajnoczi     qemu_bh_schedule(acb->bh);
4199b2a61371SStefan Hajnoczi }
4200b2a61371SStefan Hajnoczi 
420168485420SKevin Wolf static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
420268485420SKevin Wolf                                                int64_t sector_num,
420368485420SKevin Wolf                                                QEMUIOVector *qiov,
420468485420SKevin Wolf                                                int nb_sectors,
4205d20d9b7cSPaolo Bonzini                                                BdrvRequestFlags flags,
420668485420SKevin Wolf                                                BlockDriverCompletionFunc *cb,
420768485420SKevin Wolf                                                void *opaque,
42088c5873d6SStefan Hajnoczi                                                bool is_write)
420968485420SKevin Wolf {
421068485420SKevin Wolf     Coroutine *co;
421168485420SKevin Wolf     BlockDriverAIOCBCoroutine *acb;
421268485420SKevin Wolf 
4213d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
421468485420SKevin Wolf     acb->req.sector = sector_num;
421568485420SKevin Wolf     acb->req.nb_sectors = nb_sectors;
421668485420SKevin Wolf     acb->req.qiov = qiov;
4217d20d9b7cSPaolo Bonzini     acb->req.flags = flags;
421868485420SKevin Wolf     acb->is_write = is_write;
4219d318aea9SKevin Wolf     acb->done = NULL;
422068485420SKevin Wolf 
42218c5873d6SStefan Hajnoczi     co = qemu_coroutine_create(bdrv_co_do_rw);
422268485420SKevin Wolf     qemu_coroutine_enter(co, acb);
422368485420SKevin Wolf 
422468485420SKevin Wolf     return &acb->common;
422568485420SKevin Wolf }
422668485420SKevin Wolf 
422707f07615SPaolo Bonzini static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
4228b2e12bc6SChristoph Hellwig {
422907f07615SPaolo Bonzini     BlockDriverAIOCBCoroutine *acb = opaque;
423007f07615SPaolo Bonzini     BlockDriverState *bs = acb->common.bs;
4231b2e12bc6SChristoph Hellwig 
423207f07615SPaolo Bonzini     acb->req.error = bdrv_co_flush(bs);
423307f07615SPaolo Bonzini     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4234b2e12bc6SChristoph Hellwig     qemu_bh_schedule(acb->bh);
4235b2e12bc6SChristoph Hellwig }
4236b2e12bc6SChristoph Hellwig 
423707f07615SPaolo Bonzini BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
4238016f5cf6SAlexander Graf         BlockDriverCompletionFunc *cb, void *opaque)
4239016f5cf6SAlexander Graf {
424007f07615SPaolo Bonzini     trace_bdrv_aio_flush(bs, opaque);
4241016f5cf6SAlexander Graf 
424207f07615SPaolo Bonzini     Coroutine *co;
424307f07615SPaolo Bonzini     BlockDriverAIOCBCoroutine *acb;
4244016f5cf6SAlexander Graf 
4245d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4246d318aea9SKevin Wolf     acb->done = NULL;
4247d318aea9SKevin Wolf 
424807f07615SPaolo Bonzini     co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
424907f07615SPaolo Bonzini     qemu_coroutine_enter(co, acb);
4250016f5cf6SAlexander Graf 
4251016f5cf6SAlexander Graf     return &acb->common;
4252016f5cf6SAlexander Graf }
4253016f5cf6SAlexander Graf 
42544265d620SPaolo Bonzini static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
42554265d620SPaolo Bonzini {
42564265d620SPaolo Bonzini     BlockDriverAIOCBCoroutine *acb = opaque;
42574265d620SPaolo Bonzini     BlockDriverState *bs = acb->common.bs;
42584265d620SPaolo Bonzini 
42594265d620SPaolo Bonzini     acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
42604265d620SPaolo Bonzini     acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
42614265d620SPaolo Bonzini     qemu_bh_schedule(acb->bh);
42624265d620SPaolo Bonzini }
42634265d620SPaolo Bonzini 
42644265d620SPaolo Bonzini BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
42654265d620SPaolo Bonzini         int64_t sector_num, int nb_sectors,
42664265d620SPaolo Bonzini         BlockDriverCompletionFunc *cb, void *opaque)
42674265d620SPaolo Bonzini {
42684265d620SPaolo Bonzini     Coroutine *co;
42694265d620SPaolo Bonzini     BlockDriverAIOCBCoroutine *acb;
42704265d620SPaolo Bonzini 
42714265d620SPaolo Bonzini     trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
42724265d620SPaolo Bonzini 
4273d7331bedSStefan Hajnoczi     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
42744265d620SPaolo Bonzini     acb->req.sector = sector_num;
42754265d620SPaolo Bonzini     acb->req.nb_sectors = nb_sectors;
4276d318aea9SKevin Wolf     acb->done = NULL;
42774265d620SPaolo Bonzini     co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
42784265d620SPaolo Bonzini     qemu_coroutine_enter(co, acb);
42794265d620SPaolo Bonzini 
42804265d620SPaolo Bonzini     return &acb->common;
42814265d620SPaolo Bonzini }
42824265d620SPaolo Bonzini 
4283ea2384d3Sbellard void bdrv_init(void)
4284ea2384d3Sbellard {
42855efa9d5aSAnthony Liguori     module_call_init(MODULE_INIT_BLOCK);
4286ea2384d3Sbellard }
4287ce1a14dcSpbrook 
4288eb852011SMarkus Armbruster void bdrv_init_with_whitelist(void)
4289eb852011SMarkus Armbruster {
4290eb852011SMarkus Armbruster     use_bdrv_whitelist = 1;
4291eb852011SMarkus Armbruster     bdrv_init();
4292eb852011SMarkus Armbruster }
4293eb852011SMarkus Armbruster 
4294d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
42956bbff9a0Saliguori                    BlockDriverCompletionFunc *cb, void *opaque)
42966bbff9a0Saliguori {
4297ce1a14dcSpbrook     BlockDriverAIOCB *acb;
4298ce1a14dcSpbrook 
4299d7331bedSStefan Hajnoczi     acb = g_slice_alloc(aiocb_info->aiocb_size);
4300d7331bedSStefan Hajnoczi     acb->aiocb_info = aiocb_info;
4301ce1a14dcSpbrook     acb->bs = bs;
4302ce1a14dcSpbrook     acb->cb = cb;
4303ce1a14dcSpbrook     acb->opaque = opaque;
4304ce1a14dcSpbrook     return acb;
4305ce1a14dcSpbrook }
4306ce1a14dcSpbrook 
4307ce1a14dcSpbrook void qemu_aio_release(void *p)
4308ce1a14dcSpbrook {
4309d37c975fSStefan Hajnoczi     BlockDriverAIOCB *acb = p;
4310d7331bedSStefan Hajnoczi     g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4311ce1a14dcSpbrook }
431219cb3738Sbellard 
431319cb3738Sbellard /**************************************************************/
4314f9f05dc5SKevin Wolf /* Coroutine block device emulation */
4315f9f05dc5SKevin Wolf 
4316f9f05dc5SKevin Wolf typedef struct CoroutineIOCompletion {
4317f9f05dc5SKevin Wolf     Coroutine *coroutine;
4318f9f05dc5SKevin Wolf     int ret;
4319f9f05dc5SKevin Wolf } CoroutineIOCompletion;
4320f9f05dc5SKevin Wolf 
4321f9f05dc5SKevin Wolf static void bdrv_co_io_em_complete(void *opaque, int ret)
4322f9f05dc5SKevin Wolf {
4323f9f05dc5SKevin Wolf     CoroutineIOCompletion *co = opaque;
4324f9f05dc5SKevin Wolf 
4325f9f05dc5SKevin Wolf     co->ret = ret;
4326f9f05dc5SKevin Wolf     qemu_coroutine_enter(co->coroutine, NULL);
4327f9f05dc5SKevin Wolf }
4328f9f05dc5SKevin Wolf 
4329f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4330f9f05dc5SKevin Wolf                                       int nb_sectors, QEMUIOVector *iov,
4331f9f05dc5SKevin Wolf                                       bool is_write)
4332f9f05dc5SKevin Wolf {
4333f9f05dc5SKevin Wolf     CoroutineIOCompletion co = {
4334f9f05dc5SKevin Wolf         .coroutine = qemu_coroutine_self(),
4335f9f05dc5SKevin Wolf     };
4336f9f05dc5SKevin Wolf     BlockDriverAIOCB *acb;
4337f9f05dc5SKevin Wolf 
4338f9f05dc5SKevin Wolf     if (is_write) {
4339a652d160SStefan Hajnoczi         acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4340f9f05dc5SKevin Wolf                                        bdrv_co_io_em_complete, &co);
4341f9f05dc5SKevin Wolf     } else {
4342a652d160SStefan Hajnoczi         acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4343f9f05dc5SKevin Wolf                                       bdrv_co_io_em_complete, &co);
4344f9f05dc5SKevin Wolf     }
4345f9f05dc5SKevin Wolf 
434659370aaaSStefan Hajnoczi     trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
4347f9f05dc5SKevin Wolf     if (!acb) {
4348f9f05dc5SKevin Wolf         return -EIO;
4349f9f05dc5SKevin Wolf     }
4350f9f05dc5SKevin Wolf     qemu_coroutine_yield();
4351f9f05dc5SKevin Wolf 
4352f9f05dc5SKevin Wolf     return co.ret;
4353f9f05dc5SKevin Wolf }
4354f9f05dc5SKevin Wolf 
4355f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4356f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
4357f9f05dc5SKevin Wolf                                          QEMUIOVector *iov)
4358f9f05dc5SKevin Wolf {
4359f9f05dc5SKevin Wolf     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4360f9f05dc5SKevin Wolf }
4361f9f05dc5SKevin Wolf 
4362f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4363f9f05dc5SKevin Wolf                                          int64_t sector_num, int nb_sectors,
4364f9f05dc5SKevin Wolf                                          QEMUIOVector *iov)
4365f9f05dc5SKevin Wolf {
4366f9f05dc5SKevin Wolf     return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4367f9f05dc5SKevin Wolf }
4368f9f05dc5SKevin Wolf 
436907f07615SPaolo Bonzini static void coroutine_fn bdrv_flush_co_entry(void *opaque)
4370e7a8a783SKevin Wolf {
437107f07615SPaolo Bonzini     RwCo *rwco = opaque;
437207f07615SPaolo Bonzini 
437307f07615SPaolo Bonzini     rwco->ret = bdrv_co_flush(rwco->bs);
437407f07615SPaolo Bonzini }
437507f07615SPaolo Bonzini 
437607f07615SPaolo Bonzini int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
437707f07615SPaolo Bonzini {
4378eb489bb1SKevin Wolf     int ret;
4379eb489bb1SKevin Wolf 
438029cdb251SPaolo Bonzini     if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
438107f07615SPaolo Bonzini         return 0;
4382eb489bb1SKevin Wolf     }
4383eb489bb1SKevin Wolf 
4384ca716364SKevin Wolf     /* Write back cached data to the OS even with cache=unsafe */
4385bf736fe3SKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
4386eb489bb1SKevin Wolf     if (bs->drv->bdrv_co_flush_to_os) {
4387eb489bb1SKevin Wolf         ret = bs->drv->bdrv_co_flush_to_os(bs);
4388eb489bb1SKevin Wolf         if (ret < 0) {
4389eb489bb1SKevin Wolf             return ret;
4390eb489bb1SKevin Wolf         }
4391eb489bb1SKevin Wolf     }
4392eb489bb1SKevin Wolf 
4393ca716364SKevin Wolf     /* But don't actually force it to the disk with cache=unsafe */
4394ca716364SKevin Wolf     if (bs->open_flags & BDRV_O_NO_FLUSH) {
4395d4c82329SKevin Wolf         goto flush_parent;
4396ca716364SKevin Wolf     }
4397ca716364SKevin Wolf 
4398bf736fe3SKevin Wolf     BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
4399eb489bb1SKevin Wolf     if (bs->drv->bdrv_co_flush_to_disk) {
440029cdb251SPaolo Bonzini         ret = bs->drv->bdrv_co_flush_to_disk(bs);
440107f07615SPaolo Bonzini     } else if (bs->drv->bdrv_aio_flush) {
440207f07615SPaolo Bonzini         BlockDriverAIOCB *acb;
4403e7a8a783SKevin Wolf         CoroutineIOCompletion co = {
4404e7a8a783SKevin Wolf             .coroutine = qemu_coroutine_self(),
4405e7a8a783SKevin Wolf         };
4406e7a8a783SKevin Wolf 
440707f07615SPaolo Bonzini         acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
440807f07615SPaolo Bonzini         if (acb == NULL) {
440929cdb251SPaolo Bonzini             ret = -EIO;
441007f07615SPaolo Bonzini         } else {
4411e7a8a783SKevin Wolf             qemu_coroutine_yield();
441229cdb251SPaolo Bonzini             ret = co.ret;
4413e7a8a783SKevin Wolf         }
441407f07615SPaolo Bonzini     } else {
441507f07615SPaolo Bonzini         /*
441607f07615SPaolo Bonzini          * Some block drivers always operate in either writethrough or unsafe
441707f07615SPaolo Bonzini          * mode and don't support bdrv_flush therefore. Usually qemu doesn't
441807f07615SPaolo Bonzini          * know how the server works (because the behaviour is hardcoded or
441907f07615SPaolo Bonzini          * depends on server-side configuration), so we can't ensure that
442007f07615SPaolo Bonzini          * everything is safe on disk. Returning an error doesn't work because
442107f07615SPaolo Bonzini          * that would break guests even if the server operates in writethrough
442207f07615SPaolo Bonzini          * mode.
442307f07615SPaolo Bonzini          *
442407f07615SPaolo Bonzini          * Let's hope the user knows what he's doing.
442507f07615SPaolo Bonzini          */
442629cdb251SPaolo Bonzini         ret = 0;
442707f07615SPaolo Bonzini     }
442829cdb251SPaolo Bonzini     if (ret < 0) {
442929cdb251SPaolo Bonzini         return ret;
443029cdb251SPaolo Bonzini     }
443129cdb251SPaolo Bonzini 
443229cdb251SPaolo Bonzini     /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
443329cdb251SPaolo Bonzini      * in the case of cache=unsafe, so there are no useless flushes.
443429cdb251SPaolo Bonzini      */
4435d4c82329SKevin Wolf flush_parent:
443629cdb251SPaolo Bonzini     return bdrv_co_flush(bs->file);
443707f07615SPaolo Bonzini }
443807f07615SPaolo Bonzini 
44390f15423cSAnthony Liguori void bdrv_invalidate_cache(BlockDriverState *bs)
44400f15423cSAnthony Liguori {
44410f15423cSAnthony Liguori     if (bs->drv && bs->drv->bdrv_invalidate_cache) {
44420f15423cSAnthony Liguori         bs->drv->bdrv_invalidate_cache(bs);
44430f15423cSAnthony Liguori     }
44440f15423cSAnthony Liguori }
44450f15423cSAnthony Liguori 
44460f15423cSAnthony Liguori void bdrv_invalidate_cache_all(void)
44470f15423cSAnthony Liguori {
44480f15423cSAnthony Liguori     BlockDriverState *bs;
44490f15423cSAnthony Liguori 
4450dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
44510f15423cSAnthony Liguori         bdrv_invalidate_cache(bs);
44520f15423cSAnthony Liguori     }
44530f15423cSAnthony Liguori }
44540f15423cSAnthony Liguori 
445507789269SBenoît Canet void bdrv_clear_incoming_migration_all(void)
445607789269SBenoît Canet {
445707789269SBenoît Canet     BlockDriverState *bs;
445807789269SBenoît Canet 
4459dc364f4cSBenoît Canet     QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
446007789269SBenoît Canet         bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
446107789269SBenoît Canet     }
446207789269SBenoît Canet }
446307789269SBenoît Canet 
446407f07615SPaolo Bonzini int bdrv_flush(BlockDriverState *bs)
446507f07615SPaolo Bonzini {
446607f07615SPaolo Bonzini     Coroutine *co;
446707f07615SPaolo Bonzini     RwCo rwco = {
446807f07615SPaolo Bonzini         .bs = bs,
446907f07615SPaolo Bonzini         .ret = NOT_DONE,
447007f07615SPaolo Bonzini     };
447107f07615SPaolo Bonzini 
447207f07615SPaolo Bonzini     if (qemu_in_coroutine()) {
447307f07615SPaolo Bonzini         /* Fast-path if already in coroutine context */
447407f07615SPaolo Bonzini         bdrv_flush_co_entry(&rwco);
447507f07615SPaolo Bonzini     } else {
447607f07615SPaolo Bonzini         co = qemu_coroutine_create(bdrv_flush_co_entry);
447707f07615SPaolo Bonzini         qemu_coroutine_enter(co, &rwco);
447807f07615SPaolo Bonzini         while (rwco.ret == NOT_DONE) {
447907f07615SPaolo Bonzini             qemu_aio_wait();
448007f07615SPaolo Bonzini         }
448107f07615SPaolo Bonzini     }
448207f07615SPaolo Bonzini 
448307f07615SPaolo Bonzini     return rwco.ret;
448407f07615SPaolo Bonzini }
4485e7a8a783SKevin Wolf 
44864265d620SPaolo Bonzini static void coroutine_fn bdrv_discard_co_entry(void *opaque)
44874265d620SPaolo Bonzini {
44884265d620SPaolo Bonzini     RwCo *rwco = opaque;
44894265d620SPaolo Bonzini 
44904265d620SPaolo Bonzini     rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
44914265d620SPaolo Bonzini }
44924265d620SPaolo Bonzini 
44936f14da52SPeter Lieven /* if no limit is specified in the BlockLimits use a default
44946f14da52SPeter Lieven  * of 32768 512-byte sectors (16 MiB) per request.
44956f14da52SPeter Lieven  */
44966f14da52SPeter Lieven #define MAX_DISCARD_DEFAULT 32768
44976f14da52SPeter Lieven 
44984265d620SPaolo Bonzini int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
44994265d620SPaolo Bonzini                                  int nb_sectors)
45004265d620SPaolo Bonzini {
4501d51e9fe5SPaolo Bonzini     int max_discard;
4502d51e9fe5SPaolo Bonzini 
45034265d620SPaolo Bonzini     if (!bs->drv) {
45044265d620SPaolo Bonzini         return -ENOMEDIUM;
45054265d620SPaolo Bonzini     } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
45064265d620SPaolo Bonzini         return -EIO;
45074265d620SPaolo Bonzini     } else if (bs->read_only) {
45084265d620SPaolo Bonzini         return -EROFS;
4509df702c9bSPaolo Bonzini     }
4510df702c9bSPaolo Bonzini 
45118f0720ecSPaolo Bonzini     bdrv_reset_dirty(bs, sector_num, nb_sectors);
4512df702c9bSPaolo Bonzini 
45139e8f1835SPaolo Bonzini     /* Do nothing if disabled.  */
45149e8f1835SPaolo Bonzini     if (!(bs->open_flags & BDRV_O_UNMAP)) {
45159e8f1835SPaolo Bonzini         return 0;
45169e8f1835SPaolo Bonzini     }
45179e8f1835SPaolo Bonzini 
4518d51e9fe5SPaolo Bonzini     if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
4519d51e9fe5SPaolo Bonzini         return 0;
4520d51e9fe5SPaolo Bonzini     }
45216f14da52SPeter Lieven 
4522d51e9fe5SPaolo Bonzini     max_discard = bs->bl.max_discard ?  bs->bl.max_discard : MAX_DISCARD_DEFAULT;
45236f14da52SPeter Lieven     while (nb_sectors > 0) {
45246f14da52SPeter Lieven         int ret;
45256f14da52SPeter Lieven         int num = nb_sectors;
45266f14da52SPeter Lieven 
45276f14da52SPeter Lieven         /* align request */
45286f14da52SPeter Lieven         if (bs->bl.discard_alignment &&
45296f14da52SPeter Lieven             num >= bs->bl.discard_alignment &&
45306f14da52SPeter Lieven             sector_num % bs->bl.discard_alignment) {
45316f14da52SPeter Lieven             if (num > bs->bl.discard_alignment) {
45326f14da52SPeter Lieven                 num = bs->bl.discard_alignment;
45336f14da52SPeter Lieven             }
45346f14da52SPeter Lieven             num -= sector_num % bs->bl.discard_alignment;
45356f14da52SPeter Lieven         }
45366f14da52SPeter Lieven 
45376f14da52SPeter Lieven         /* limit request size */
45386f14da52SPeter Lieven         if (num > max_discard) {
45396f14da52SPeter Lieven             num = max_discard;
45406f14da52SPeter Lieven         }
45416f14da52SPeter Lieven 
4542d51e9fe5SPaolo Bonzini         if (bs->drv->bdrv_co_discard) {
45436f14da52SPeter Lieven             ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
4544d51e9fe5SPaolo Bonzini         } else {
45454265d620SPaolo Bonzini             BlockDriverAIOCB *acb;
45464265d620SPaolo Bonzini             CoroutineIOCompletion co = {
45474265d620SPaolo Bonzini                 .coroutine = qemu_coroutine_self(),
45484265d620SPaolo Bonzini             };
45494265d620SPaolo Bonzini 
45504265d620SPaolo Bonzini             acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
45514265d620SPaolo Bonzini                                             bdrv_co_io_em_complete, &co);
45524265d620SPaolo Bonzini             if (acb == NULL) {
45534265d620SPaolo Bonzini                 return -EIO;
45544265d620SPaolo Bonzini             } else {
45554265d620SPaolo Bonzini                 qemu_coroutine_yield();
4556d51e9fe5SPaolo Bonzini                 ret = co.ret;
45574265d620SPaolo Bonzini             }
4558d51e9fe5SPaolo Bonzini         }
45597ce21016SPaolo Bonzini         if (ret && ret != -ENOTSUP) {
4560d51e9fe5SPaolo Bonzini             return ret;
4561d51e9fe5SPaolo Bonzini         }
4562d51e9fe5SPaolo Bonzini 
4563d51e9fe5SPaolo Bonzini         sector_num += num;
4564d51e9fe5SPaolo Bonzini         nb_sectors -= num;
4565d51e9fe5SPaolo Bonzini     }
45664265d620SPaolo Bonzini     return 0;
45674265d620SPaolo Bonzini }
45684265d620SPaolo Bonzini 
45694265d620SPaolo Bonzini int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
45704265d620SPaolo Bonzini {
45714265d620SPaolo Bonzini     Coroutine *co;
45724265d620SPaolo Bonzini     RwCo rwco = {
45734265d620SPaolo Bonzini         .bs = bs,
45744265d620SPaolo Bonzini         .sector_num = sector_num,
45754265d620SPaolo Bonzini         .nb_sectors = nb_sectors,
45764265d620SPaolo Bonzini         .ret = NOT_DONE,
45774265d620SPaolo Bonzini     };
45784265d620SPaolo Bonzini 
45794265d620SPaolo Bonzini     if (qemu_in_coroutine()) {
45804265d620SPaolo Bonzini         /* Fast-path if already in coroutine context */
45814265d620SPaolo Bonzini         bdrv_discard_co_entry(&rwco);
45824265d620SPaolo Bonzini     } else {
45834265d620SPaolo Bonzini         co = qemu_coroutine_create(bdrv_discard_co_entry);
45844265d620SPaolo Bonzini         qemu_coroutine_enter(co, &rwco);
45854265d620SPaolo Bonzini         while (rwco.ret == NOT_DONE) {
45864265d620SPaolo Bonzini             qemu_aio_wait();
45874265d620SPaolo Bonzini         }
45884265d620SPaolo Bonzini     }
45894265d620SPaolo Bonzini 
45904265d620SPaolo Bonzini     return rwco.ret;
45914265d620SPaolo Bonzini }
45924265d620SPaolo Bonzini 
4593f9f05dc5SKevin Wolf /**************************************************************/
459419cb3738Sbellard /* removable device support */
459519cb3738Sbellard 
459619cb3738Sbellard /**
459719cb3738Sbellard  * Return TRUE if the media is present
459819cb3738Sbellard  */
459919cb3738Sbellard int bdrv_is_inserted(BlockDriverState *bs)
460019cb3738Sbellard {
460119cb3738Sbellard     BlockDriver *drv = bs->drv;
4602a1aff5bfSMarkus Armbruster 
460319cb3738Sbellard     if (!drv)
460419cb3738Sbellard         return 0;
460519cb3738Sbellard     if (!drv->bdrv_is_inserted)
4606a1aff5bfSMarkus Armbruster         return 1;
4607a1aff5bfSMarkus Armbruster     return drv->bdrv_is_inserted(bs);
460819cb3738Sbellard }
460919cb3738Sbellard 
461019cb3738Sbellard /**
46118e49ca46SMarkus Armbruster  * Return whether the media changed since the last call to this
46128e49ca46SMarkus Armbruster  * function, or -ENOTSUP if we don't know.  Most drivers don't know.
461319cb3738Sbellard  */
461419cb3738Sbellard int bdrv_media_changed(BlockDriverState *bs)
461519cb3738Sbellard {
461619cb3738Sbellard     BlockDriver *drv = bs->drv;
461719cb3738Sbellard 
46188e49ca46SMarkus Armbruster     if (drv && drv->bdrv_media_changed) {
46198e49ca46SMarkus Armbruster         return drv->bdrv_media_changed(bs);
46208e49ca46SMarkus Armbruster     }
46218e49ca46SMarkus Armbruster     return -ENOTSUP;
462219cb3738Sbellard }
462319cb3738Sbellard 
462419cb3738Sbellard /**
462519cb3738Sbellard  * If eject_flag is TRUE, eject the media. Otherwise, close the tray
462619cb3738Sbellard  */
4627f36f3949SLuiz Capitulino void bdrv_eject(BlockDriverState *bs, bool eject_flag)
462819cb3738Sbellard {
462919cb3738Sbellard     BlockDriver *drv = bs->drv;
463019cb3738Sbellard 
4631822e1cd1SMarkus Armbruster     if (drv && drv->bdrv_eject) {
4632822e1cd1SMarkus Armbruster         drv->bdrv_eject(bs, eject_flag);
463319cb3738Sbellard     }
46346f382ed2SLuiz Capitulino 
46356f382ed2SLuiz Capitulino     if (bs->device_name[0] != '\0') {
46366f382ed2SLuiz Capitulino         bdrv_emit_qmp_eject_event(bs, eject_flag);
46376f382ed2SLuiz Capitulino     }
463819cb3738Sbellard }
463919cb3738Sbellard 
464019cb3738Sbellard /**
464119cb3738Sbellard  * Lock or unlock the media (if it is locked, the user won't be able
464219cb3738Sbellard  * to eject it manually).
464319cb3738Sbellard  */
4644025e849aSMarkus Armbruster void bdrv_lock_medium(BlockDriverState *bs, bool locked)
464519cb3738Sbellard {
464619cb3738Sbellard     BlockDriver *drv = bs->drv;
464719cb3738Sbellard 
4648025e849aSMarkus Armbruster     trace_bdrv_lock_medium(bs, locked);
4649b8c6d095SStefan Hajnoczi 
4650025e849aSMarkus Armbruster     if (drv && drv->bdrv_lock_medium) {
4651025e849aSMarkus Armbruster         drv->bdrv_lock_medium(bs, locked);
465219cb3738Sbellard     }
465319cb3738Sbellard }
4654985a03b0Sths 
4655985a03b0Sths /* needed for generic scsi interface */
4656985a03b0Sths 
4657985a03b0Sths int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
4658985a03b0Sths {
4659985a03b0Sths     BlockDriver *drv = bs->drv;
4660985a03b0Sths 
4661985a03b0Sths     if (drv && drv->bdrv_ioctl)
4662985a03b0Sths         return drv->bdrv_ioctl(bs, req, buf);
4663985a03b0Sths     return -ENOTSUP;
4664985a03b0Sths }
46657d780669Saliguori 
4666221f715dSaliguori BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
4667221f715dSaliguori         unsigned long int req, void *buf,
46687d780669Saliguori         BlockDriverCompletionFunc *cb, void *opaque)
46697d780669Saliguori {
4670221f715dSaliguori     BlockDriver *drv = bs->drv;
46717d780669Saliguori 
4672221f715dSaliguori     if (drv && drv->bdrv_aio_ioctl)
4673221f715dSaliguori         return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
4674221f715dSaliguori     return NULL;
46757d780669Saliguori }
4676e268ca52Saliguori 
46777b6f9300SMarkus Armbruster void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
46787b6f9300SMarkus Armbruster {
46797b6f9300SMarkus Armbruster     bs->buffer_alignment = align;
46807b6f9300SMarkus Armbruster }
46817cd1e32aSlirans@il.ibm.com 
4682e268ca52Saliguori void *qemu_blockalign(BlockDriverState *bs, size_t size)
4683e268ca52Saliguori {
4684e268ca52Saliguori     return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
4685e268ca52Saliguori }
46867cd1e32aSlirans@il.ibm.com 
4687c53b1c51SStefan Hajnoczi /*
4688c53b1c51SStefan Hajnoczi  * Check if all memory in this vector is sector aligned.
4689c53b1c51SStefan Hajnoczi  */
4690c53b1c51SStefan Hajnoczi bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
4691c53b1c51SStefan Hajnoczi {
4692c53b1c51SStefan Hajnoczi     int i;
4693c53b1c51SStefan Hajnoczi 
4694c53b1c51SStefan Hajnoczi     for (i = 0; i < qiov->niov; i++) {
4695c53b1c51SStefan Hajnoczi         if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
4696c53b1c51SStefan Hajnoczi             return false;
4697c53b1c51SStefan Hajnoczi         }
4698c53b1c51SStefan Hajnoczi     }
4699c53b1c51SStefan Hajnoczi 
4700c53b1c51SStefan Hajnoczi     return true;
4701c53b1c51SStefan Hajnoczi }
4702c53b1c51SStefan Hajnoczi 
4703e4654d2dSFam Zheng BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
47047cd1e32aSlirans@il.ibm.com {
47057cd1e32aSlirans@il.ibm.com     int64_t bitmap_size;
4706e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
4707a55eb92cSJan Kiszka 
470850717e94SPaolo Bonzini     assert((granularity & (granularity - 1)) == 0);
470950717e94SPaolo Bonzini 
471050717e94SPaolo Bonzini     granularity >>= BDRV_SECTOR_BITS;
4711e4654d2dSFam Zheng     assert(granularity);
47128f0720ecSPaolo Bonzini     bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
4713e4654d2dSFam Zheng     bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
4714e4654d2dSFam Zheng     bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
4715e4654d2dSFam Zheng     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
4716e4654d2dSFam Zheng     return bitmap;
4717e4654d2dSFam Zheng }
4718e4654d2dSFam Zheng 
4719e4654d2dSFam Zheng void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
4720e4654d2dSFam Zheng {
4721e4654d2dSFam Zheng     BdrvDirtyBitmap *bm, *next;
4722e4654d2dSFam Zheng     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
4723e4654d2dSFam Zheng         if (bm == bitmap) {
4724e4654d2dSFam Zheng             QLIST_REMOVE(bitmap, list);
4725e4654d2dSFam Zheng             hbitmap_free(bitmap->bitmap);
4726e4654d2dSFam Zheng             g_free(bitmap);
4727e4654d2dSFam Zheng             return;
47287cd1e32aSlirans@il.ibm.com         }
47297cd1e32aSlirans@il.ibm.com     }
47307cd1e32aSlirans@il.ibm.com }
47317cd1e32aSlirans@il.ibm.com 
473221b56835SFam Zheng BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
473321b56835SFam Zheng {
473421b56835SFam Zheng     BdrvDirtyBitmap *bm;
473521b56835SFam Zheng     BlockDirtyInfoList *list = NULL;
473621b56835SFam Zheng     BlockDirtyInfoList **plist = &list;
473721b56835SFam Zheng 
473821b56835SFam Zheng     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
473921b56835SFam Zheng         BlockDirtyInfo *info = g_malloc0(sizeof(BlockDirtyInfo));
474021b56835SFam Zheng         BlockDirtyInfoList *entry = g_malloc0(sizeof(BlockDirtyInfoList));
474121b56835SFam Zheng         info->count = bdrv_get_dirty_count(bs, bm);
474221b56835SFam Zheng         info->granularity =
474321b56835SFam Zheng             ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
474421b56835SFam Zheng         entry->value = info;
474521b56835SFam Zheng         *plist = entry;
474621b56835SFam Zheng         plist = &entry->next;
474721b56835SFam Zheng     }
474821b56835SFam Zheng 
474921b56835SFam Zheng     return list;
475021b56835SFam Zheng }
475121b56835SFam Zheng 
4752e4654d2dSFam Zheng int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
47537cd1e32aSlirans@il.ibm.com {
4754e4654d2dSFam Zheng     if (bitmap) {
4755e4654d2dSFam Zheng         return hbitmap_get(bitmap->bitmap, sector);
47567cd1e32aSlirans@il.ibm.com     } else {
47577cd1e32aSlirans@il.ibm.com         return 0;
47587cd1e32aSlirans@il.ibm.com     }
47597cd1e32aSlirans@il.ibm.com }
47607cd1e32aSlirans@il.ibm.com 
4761e4654d2dSFam Zheng void bdrv_dirty_iter_init(BlockDriverState *bs,
4762e4654d2dSFam Zheng                           BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
47631755da16SPaolo Bonzini {
4764e4654d2dSFam Zheng     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
47651755da16SPaolo Bonzini }
47661755da16SPaolo Bonzini 
47671755da16SPaolo Bonzini void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
47681755da16SPaolo Bonzini                     int nr_sectors)
47691755da16SPaolo Bonzini {
4770e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
4771e4654d2dSFam Zheng     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
4772e4654d2dSFam Zheng         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
4773e4654d2dSFam Zheng     }
47741755da16SPaolo Bonzini }
47751755da16SPaolo Bonzini 
4776e4654d2dSFam Zheng void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
47777cd1e32aSlirans@il.ibm.com {
4778e4654d2dSFam Zheng     BdrvDirtyBitmap *bitmap;
4779e4654d2dSFam Zheng     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
4780e4654d2dSFam Zheng         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
4781e4654d2dSFam Zheng     }
47827cd1e32aSlirans@il.ibm.com }
4783aaa0eb75SLiran Schour 
4784e4654d2dSFam Zheng int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
4785aaa0eb75SLiran Schour {
4786e4654d2dSFam Zheng     return hbitmap_count(bitmap->bitmap);
4787aaa0eb75SLiran Schour }
4788f88e1a42SJes Sorensen 
47899fcb0251SFam Zheng /* Get a reference to bs */
47909fcb0251SFam Zheng void bdrv_ref(BlockDriverState *bs)
47919fcb0251SFam Zheng {
47929fcb0251SFam Zheng     bs->refcnt++;
47939fcb0251SFam Zheng }
47949fcb0251SFam Zheng 
47959fcb0251SFam Zheng /* Release a previously grabbed reference to bs.
47969fcb0251SFam Zheng  * If after releasing, reference count is zero, the BlockDriverState is
47979fcb0251SFam Zheng  * deleted. */
47989fcb0251SFam Zheng void bdrv_unref(BlockDriverState *bs)
47999fcb0251SFam Zheng {
48009fcb0251SFam Zheng     assert(bs->refcnt > 0);
48019fcb0251SFam Zheng     if (--bs->refcnt == 0) {
48029fcb0251SFam Zheng         bdrv_delete(bs);
48039fcb0251SFam Zheng     }
48049fcb0251SFam Zheng }
48059fcb0251SFam Zheng 
4806db593f25SMarcelo Tosatti void bdrv_set_in_use(BlockDriverState *bs, int in_use)
4807db593f25SMarcelo Tosatti {
4808db593f25SMarcelo Tosatti     assert(bs->in_use != in_use);
4809db593f25SMarcelo Tosatti     bs->in_use = in_use;
4810db593f25SMarcelo Tosatti }
4811db593f25SMarcelo Tosatti 
4812db593f25SMarcelo Tosatti int bdrv_in_use(BlockDriverState *bs)
4813db593f25SMarcelo Tosatti {
4814db593f25SMarcelo Tosatti     return bs->in_use;
4815db593f25SMarcelo Tosatti }
4816db593f25SMarcelo Tosatti 
481728a7282aSLuiz Capitulino void bdrv_iostatus_enable(BlockDriverState *bs)
481828a7282aSLuiz Capitulino {
4819d6bf279eSLuiz Capitulino     bs->iostatus_enabled = true;
482058e21ef5SLuiz Capitulino     bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
482128a7282aSLuiz Capitulino }
482228a7282aSLuiz Capitulino 
482328a7282aSLuiz Capitulino /* The I/O status is only enabled if the drive explicitly
482428a7282aSLuiz Capitulino  * enables it _and_ the VM is configured to stop on errors */
482528a7282aSLuiz Capitulino bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
482628a7282aSLuiz Capitulino {
4827d6bf279eSLuiz Capitulino     return (bs->iostatus_enabled &&
482892aa5c6dSPaolo Bonzini            (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
482992aa5c6dSPaolo Bonzini             bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
483092aa5c6dSPaolo Bonzini             bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
483128a7282aSLuiz Capitulino }
483228a7282aSLuiz Capitulino 
483328a7282aSLuiz Capitulino void bdrv_iostatus_disable(BlockDriverState *bs)
483428a7282aSLuiz Capitulino {
4835d6bf279eSLuiz Capitulino     bs->iostatus_enabled = false;
483628a7282aSLuiz Capitulino }
483728a7282aSLuiz Capitulino 
483828a7282aSLuiz Capitulino void bdrv_iostatus_reset(BlockDriverState *bs)
483928a7282aSLuiz Capitulino {
484028a7282aSLuiz Capitulino     if (bdrv_iostatus_is_enabled(bs)) {
484158e21ef5SLuiz Capitulino         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
48423bd293c3SPaolo Bonzini         if (bs->job) {
48433bd293c3SPaolo Bonzini             block_job_iostatus_reset(bs->job);
48443bd293c3SPaolo Bonzini         }
484528a7282aSLuiz Capitulino     }
484628a7282aSLuiz Capitulino }
484728a7282aSLuiz Capitulino 
484828a7282aSLuiz Capitulino void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
484928a7282aSLuiz Capitulino {
48503e1caa5fSPaolo Bonzini     assert(bdrv_iostatus_is_enabled(bs));
48513e1caa5fSPaolo Bonzini     if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
485258e21ef5SLuiz Capitulino         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
485358e21ef5SLuiz Capitulino                                          BLOCK_DEVICE_IO_STATUS_FAILED;
485428a7282aSLuiz Capitulino     }
485528a7282aSLuiz Capitulino }
485628a7282aSLuiz Capitulino 
4857a597e79cSChristoph Hellwig void
4858a597e79cSChristoph Hellwig bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
4859a597e79cSChristoph Hellwig         enum BlockAcctType type)
4860a597e79cSChristoph Hellwig {
4861a597e79cSChristoph Hellwig     assert(type < BDRV_MAX_IOTYPE);
4862a597e79cSChristoph Hellwig 
4863a597e79cSChristoph Hellwig     cookie->bytes = bytes;
4864c488c7f6SChristoph Hellwig     cookie->start_time_ns = get_clock();
4865a597e79cSChristoph Hellwig     cookie->type = type;
4866a597e79cSChristoph Hellwig }
4867a597e79cSChristoph Hellwig 
4868a597e79cSChristoph Hellwig void
4869a597e79cSChristoph Hellwig bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
4870a597e79cSChristoph Hellwig {
4871a597e79cSChristoph Hellwig     assert(cookie->type < BDRV_MAX_IOTYPE);
4872a597e79cSChristoph Hellwig 
4873a597e79cSChristoph Hellwig     bs->nr_bytes[cookie->type] += cookie->bytes;
4874a597e79cSChristoph Hellwig     bs->nr_ops[cookie->type]++;
4875c488c7f6SChristoph Hellwig     bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
4876a597e79cSChristoph Hellwig }
4877a597e79cSChristoph Hellwig 
4878d92ada22SLuiz Capitulino void bdrv_img_create(const char *filename, const char *fmt,
4879f88e1a42SJes Sorensen                      const char *base_filename, const char *base_fmt,
4880f382d43aSMiroslav Rezanina                      char *options, uint64_t img_size, int flags,
4881f382d43aSMiroslav Rezanina                      Error **errp, bool quiet)
4882f88e1a42SJes Sorensen {
4883f88e1a42SJes Sorensen     QEMUOptionParameter *param = NULL, *create_options = NULL;
4884d220894eSKevin Wolf     QEMUOptionParameter *backing_fmt, *backing_file, *size;
4885f88e1a42SJes Sorensen     BlockDriver *drv, *proto_drv;
488696df67d1SStefan Hajnoczi     BlockDriver *backing_drv = NULL;
4887cc84d90fSMax Reitz     Error *local_err = NULL;
4888f88e1a42SJes Sorensen     int ret = 0;
4889f88e1a42SJes Sorensen 
4890f88e1a42SJes Sorensen     /* Find driver and parse its options */
4891f88e1a42SJes Sorensen     drv = bdrv_find_format(fmt);
4892f88e1a42SJes Sorensen     if (!drv) {
489371c79813SLuiz Capitulino         error_setg(errp, "Unknown file format '%s'", fmt);
4894d92ada22SLuiz Capitulino         return;
4895f88e1a42SJes Sorensen     }
4896f88e1a42SJes Sorensen 
489798289620SKevin Wolf     proto_drv = bdrv_find_protocol(filename, true);
4898f88e1a42SJes Sorensen     if (!proto_drv) {
489971c79813SLuiz Capitulino         error_setg(errp, "Unknown protocol '%s'", filename);
4900d92ada22SLuiz Capitulino         return;
4901f88e1a42SJes Sorensen     }
4902f88e1a42SJes Sorensen 
4903f88e1a42SJes Sorensen     create_options = append_option_parameters(create_options,
4904f88e1a42SJes Sorensen                                               drv->create_options);
4905f88e1a42SJes Sorensen     create_options = append_option_parameters(create_options,
4906f88e1a42SJes Sorensen                                               proto_drv->create_options);
4907f88e1a42SJes Sorensen 
4908f88e1a42SJes Sorensen     /* Create parameter list with default values */
4909f88e1a42SJes Sorensen     param = parse_option_parameters("", create_options, param);
4910f88e1a42SJes Sorensen 
4911f88e1a42SJes Sorensen     set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4912f88e1a42SJes Sorensen 
4913f88e1a42SJes Sorensen     /* Parse -o options */
4914f88e1a42SJes Sorensen     if (options) {
4915f88e1a42SJes Sorensen         param = parse_option_parameters(options, create_options, param);
4916f88e1a42SJes Sorensen         if (param == NULL) {
491771c79813SLuiz Capitulino             error_setg(errp, "Invalid options for file format '%s'.", fmt);
4918f88e1a42SJes Sorensen             goto out;
4919f88e1a42SJes Sorensen         }
4920f88e1a42SJes Sorensen     }
4921f88e1a42SJes Sorensen 
4922f88e1a42SJes Sorensen     if (base_filename) {
4923f88e1a42SJes Sorensen         if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4924f88e1a42SJes Sorensen                                  base_filename)) {
492571c79813SLuiz Capitulino             error_setg(errp, "Backing file not supported for file format '%s'",
492671c79813SLuiz Capitulino                        fmt);
4927f88e1a42SJes Sorensen             goto out;
4928f88e1a42SJes Sorensen         }
4929f88e1a42SJes Sorensen     }
4930f88e1a42SJes Sorensen 
4931f88e1a42SJes Sorensen     if (base_fmt) {
4932f88e1a42SJes Sorensen         if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
493371c79813SLuiz Capitulino             error_setg(errp, "Backing file format not supported for file "
493471c79813SLuiz Capitulino                              "format '%s'", fmt);
4935f88e1a42SJes Sorensen             goto out;
4936f88e1a42SJes Sorensen         }
4937f88e1a42SJes Sorensen     }
4938f88e1a42SJes Sorensen 
4939792da93aSJes Sorensen     backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4940792da93aSJes Sorensen     if (backing_file && backing_file->value.s) {
4941792da93aSJes Sorensen         if (!strcmp(filename, backing_file->value.s)) {
494271c79813SLuiz Capitulino             error_setg(errp, "Error: Trying to create an image with the "
494371c79813SLuiz Capitulino                              "same filename as the backing file");
4944792da93aSJes Sorensen             goto out;
4945792da93aSJes Sorensen         }
4946792da93aSJes Sorensen     }
4947792da93aSJes Sorensen 
4948f88e1a42SJes Sorensen     backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4949f88e1a42SJes Sorensen     if (backing_fmt && backing_fmt->value.s) {
495096df67d1SStefan Hajnoczi         backing_drv = bdrv_find_format(backing_fmt->value.s);
495196df67d1SStefan Hajnoczi         if (!backing_drv) {
495271c79813SLuiz Capitulino             error_setg(errp, "Unknown backing file format '%s'",
495371c79813SLuiz Capitulino                        backing_fmt->value.s);
4954f88e1a42SJes Sorensen             goto out;
4955f88e1a42SJes Sorensen         }
4956f88e1a42SJes Sorensen     }
4957f88e1a42SJes Sorensen 
4958f88e1a42SJes Sorensen     // The size for the image must always be specified, with one exception:
4959f88e1a42SJes Sorensen     // If we are using a backing file, we can obtain the size from there
4960d220894eSKevin Wolf     size = get_option_parameter(param, BLOCK_OPT_SIZE);
4961d220894eSKevin Wolf     if (size && size->value.n == -1) {
4962f88e1a42SJes Sorensen         if (backing_file && backing_file->value.s) {
496366f6b814SMax Reitz             BlockDriverState *bs;
4964f88e1a42SJes Sorensen             uint64_t size;
4965f88e1a42SJes Sorensen             char buf[32];
496663090dacSPaolo Bonzini             int back_flags;
496763090dacSPaolo Bonzini 
496863090dacSPaolo Bonzini             /* backing files always opened read-only */
496963090dacSPaolo Bonzini             back_flags =
497063090dacSPaolo Bonzini                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
4971f88e1a42SJes Sorensen 
4972f88e1a42SJes Sorensen             bs = bdrv_new("");
4973f88e1a42SJes Sorensen 
4974de9c0cecSKevin Wolf             ret = bdrv_open(bs, backing_file->value.s, NULL, back_flags,
4975cc84d90fSMax Reitz                             backing_drv, &local_err);
4976f88e1a42SJes Sorensen             if (ret < 0) {
4977cc84d90fSMax Reitz                 error_setg_errno(errp, -ret, "Could not open '%s': %s",
4978cc84d90fSMax Reitz                                  backing_file->value.s,
4979cc84d90fSMax Reitz                                  error_get_pretty(local_err));
4980cc84d90fSMax Reitz                 error_free(local_err);
4981cc84d90fSMax Reitz                 local_err = NULL;
498266f6b814SMax Reitz                 bdrv_unref(bs);
4983f88e1a42SJes Sorensen                 goto out;
4984f88e1a42SJes Sorensen             }
4985f88e1a42SJes Sorensen             bdrv_get_geometry(bs, &size);
4986f88e1a42SJes Sorensen             size *= 512;
4987f88e1a42SJes Sorensen 
4988f88e1a42SJes Sorensen             snprintf(buf, sizeof(buf), "%" PRId64, size);
4989f88e1a42SJes Sorensen             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
499066f6b814SMax Reitz 
499166f6b814SMax Reitz             bdrv_unref(bs);
4992f88e1a42SJes Sorensen         } else {
499371c79813SLuiz Capitulino             error_setg(errp, "Image creation needs a size parameter");
4994f88e1a42SJes Sorensen             goto out;
4995f88e1a42SJes Sorensen         }
4996f88e1a42SJes Sorensen     }
4997f88e1a42SJes Sorensen 
4998f382d43aSMiroslav Rezanina     if (!quiet) {
4999f88e1a42SJes Sorensen         printf("Formatting '%s', fmt=%s ", filename, fmt);
5000f88e1a42SJes Sorensen         print_option_parameters(param);
5001f88e1a42SJes Sorensen         puts("");
5002f382d43aSMiroslav Rezanina     }
5003cc84d90fSMax Reitz     ret = bdrv_create(drv, filename, param, &local_err);
5004cc84d90fSMax Reitz     if (ret == -EFBIG) {
5005cc84d90fSMax Reitz         /* This is generally a better message than whatever the driver would
5006cc84d90fSMax Reitz          * deliver (especially because of the cluster_size_hint), since that
5007cc84d90fSMax Reitz          * is most probably not much different from "image too large". */
5008f3f4d2c0SKevin Wolf         const char *cluster_size_hint = "";
5009f3f4d2c0SKevin Wolf         if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
5010f3f4d2c0SKevin Wolf             cluster_size_hint = " (try using a larger cluster size)";
5011f3f4d2c0SKevin Wolf         }
5012cc84d90fSMax Reitz         error_setg(errp, "The image size is too large for file format '%s'"
5013cc84d90fSMax Reitz                    "%s", fmt, cluster_size_hint);
5014cc84d90fSMax Reitz         error_free(local_err);
5015cc84d90fSMax Reitz         local_err = NULL;
5016f88e1a42SJes Sorensen     }
5017f88e1a42SJes Sorensen 
5018f88e1a42SJes Sorensen out:
5019f88e1a42SJes Sorensen     free_option_parameters(create_options);
5020f88e1a42SJes Sorensen     free_option_parameters(param);
5021f88e1a42SJes Sorensen 
5022cc84d90fSMax Reitz     if (error_is_set(&local_err)) {
5023cc84d90fSMax Reitz         error_propagate(errp, local_err);
5024cc84d90fSMax Reitz     }
5025f88e1a42SJes Sorensen }
502685d126f3SStefan Hajnoczi 
502785d126f3SStefan Hajnoczi AioContext *bdrv_get_aio_context(BlockDriverState *bs)
502885d126f3SStefan Hajnoczi {
502985d126f3SStefan Hajnoczi     /* Currently BlockDriverState always uses the main loop AioContext */
503085d126f3SStefan Hajnoczi     return qemu_get_aio_context();
503185d126f3SStefan Hajnoczi }
5032d616b224SStefan Hajnoczi 
5033d616b224SStefan Hajnoczi void bdrv_add_before_write_notifier(BlockDriverState *bs,
5034d616b224SStefan Hajnoczi                                     NotifierWithReturn *notifier)
5035d616b224SStefan Hajnoczi {
5036d616b224SStefan Hajnoczi     notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5037d616b224SStefan Hajnoczi }
50386f176b48SMax Reitz 
50396f176b48SMax Reitz int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options)
50406f176b48SMax Reitz {
50416f176b48SMax Reitz     if (bs->drv->bdrv_amend_options == NULL) {
50426f176b48SMax Reitz         return -ENOTSUP;
50436f176b48SMax Reitz     }
50446f176b48SMax Reitz     return bs->drv->bdrv_amend_options(bs, options);
50456f176b48SMax Reitz }
5046f6186f49SBenoît Canet 
5047f6186f49SBenoît Canet ExtSnapshotPerm bdrv_check_ext_snapshot(BlockDriverState *bs)
5048f6186f49SBenoît Canet {
5049f6186f49SBenoît Canet     if (bs->drv->bdrv_check_ext_snapshot) {
5050f6186f49SBenoît Canet         return bs->drv->bdrv_check_ext_snapshot(bs);
5051f6186f49SBenoît Canet     }
5052f6186f49SBenoît Canet 
5053f6186f49SBenoît Canet     if (bs->file && bs->file->drv && bs->file->drv->bdrv_check_ext_snapshot) {
5054f6186f49SBenoît Canet         return bs->file->drv->bdrv_check_ext_snapshot(bs);
5055f6186f49SBenoît Canet     }
5056f6186f49SBenoît Canet 
5057f6186f49SBenoît Canet     /* external snapshots are allowed by default */
5058f6186f49SBenoît Canet     return EXT_SNAPSHOT_ALLOWED;
5059f6186f49SBenoît Canet }
5060f6186f49SBenoît Canet 
5061f6186f49SBenoît Canet ExtSnapshotPerm bdrv_check_ext_snapshot_forbidden(BlockDriverState *bs)
5062f6186f49SBenoît Canet {
5063f6186f49SBenoît Canet     return EXT_SNAPSHOT_FORBIDDEN;
5064f6186f49SBenoît Canet }
5065