1fc01f7e7Sbellard /* 2fc01f7e7Sbellard * QEMU System Emulator block driver 3fc01f7e7Sbellard * 4fc01f7e7Sbellard * Copyright (c) 2003 Fabrice Bellard 5fc01f7e7Sbellard * 6fc01f7e7Sbellard * Permission is hereby granted, free of charge, to any person obtaining a copy 7fc01f7e7Sbellard * of this software and associated documentation files (the "Software"), to deal 8fc01f7e7Sbellard * in the Software without restriction, including without limitation the rights 9fc01f7e7Sbellard * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10fc01f7e7Sbellard * copies of the Software, and to permit persons to whom the Software is 11fc01f7e7Sbellard * furnished to do so, subject to the following conditions: 12fc01f7e7Sbellard * 13fc01f7e7Sbellard * The above copyright notice and this permission notice shall be included in 14fc01f7e7Sbellard * all copies or substantial portions of the Software. 15fc01f7e7Sbellard * 16fc01f7e7Sbellard * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17fc01f7e7Sbellard * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18fc01f7e7Sbellard * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19fc01f7e7Sbellard * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20fc01f7e7Sbellard * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21fc01f7e7Sbellard * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22fc01f7e7Sbellard * THE SOFTWARE. 23fc01f7e7Sbellard */ 243990d09aSblueswir1 #include "config-host.h" 25faf07963Spbrook #include "qemu-common.h" 266d519a5fSStefan Hajnoczi #include "trace.h" 27737e150eSPaolo Bonzini #include "block/block_int.h" 28737e150eSPaolo Bonzini #include "block/blockjob.h" 291de7afc9SPaolo Bonzini #include "qemu/module.h" 307b1b5d19SPaolo Bonzini #include "qapi/qmp/qjson.h" 31bfb197e0SMarkus Armbruster #include "sysemu/block-backend.h" 329c17d615SPaolo Bonzini #include "sysemu/sysemu.h" 33de50a20aSFam Zheng #include "sysemu/qtest.h" 341de7afc9SPaolo Bonzini #include "qemu/notify.h" 35737e150eSPaolo Bonzini #include "block/coroutine.h" 36c13163fbSBenoît Canet #include "block/qapi.h" 37b2023818SLuiz Capitulino #include "qmp-commands.h" 381de7afc9SPaolo Bonzini #include "qemu/timer.h" 39a5ee7bd4SWenchao Xia #include "qapi-event.h" 40fc01f7e7Sbellard 4171e72a19SJuan Quintela #ifdef CONFIG_BSD 427674e7bfSbellard #include <sys/types.h> 437674e7bfSbellard #include <sys/stat.h> 447674e7bfSbellard #include <sys/ioctl.h> 4572cf2d4fSBlue Swirl #include <sys/queue.h> 46c5e97233Sblueswir1 #ifndef __DragonFly__ 477674e7bfSbellard #include <sys/disk.h> 487674e7bfSbellard #endif 49c5e97233Sblueswir1 #endif 507674e7bfSbellard 5149dc768dSaliguori #ifdef _WIN32 5249dc768dSaliguori #include <windows.h> 5349dc768dSaliguori #endif 5449dc768dSaliguori 55*9bd2b08fSJohn Snow /** 56*9bd2b08fSJohn Snow * A BdrvDirtyBitmap can be in three possible states: 57*9bd2b08fSJohn Snow * (1) successor is NULL and disabled is false: full r/w mode 58*9bd2b08fSJohn Snow * (2) successor is NULL and disabled is true: read only mode ("disabled") 59*9bd2b08fSJohn Snow * (3) successor is set: frozen mode. 60*9bd2b08fSJohn Snow * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, 61*9bd2b08fSJohn Snow * or enabled. A frozen bitmap can only abdicate() or reclaim(). 62*9bd2b08fSJohn Snow */ 63e4654d2dSFam Zheng struct BdrvDirtyBitmap { 64e4654d2dSFam Zheng HBitmap *bitmap; 65*9bd2b08fSJohn Snow BdrvDirtyBitmap *successor; 660db6e54aSFam Zheng char *name; 67b8e6fb75SJohn Snow bool disabled; 68e4654d2dSFam Zheng QLIST_ENTRY(BdrvDirtyBitmap) list; 69e4654d2dSFam Zheng }; 70e4654d2dSFam Zheng 711c9805a3SStefan Hajnoczi #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 721c9805a3SStefan Hajnoczi 737c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 74f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 75097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 767c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 77f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 78097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 79f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 80f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 81f9f05dc5SKevin Wolf QEMUIOVector *iov); 82f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 83f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 84f9f05dc5SKevin Wolf QEMUIOVector *iov); 85775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, 86775aa8b6SKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 87470c0504SStefan Hajnoczi BdrvRequestFlags flags); 88775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, 89775aa8b6SKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 90f08f2ddaSStefan Hajnoczi BdrvRequestFlags flags); 917c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 92b2a61371SStefan Hajnoczi int64_t sector_num, 93b2a61371SStefan Hajnoczi QEMUIOVector *qiov, 94b2a61371SStefan Hajnoczi int nb_sectors, 95d20d9b7cSPaolo Bonzini BdrvRequestFlags flags, 96097310b5SMarkus Armbruster BlockCompletionFunc *cb, 97b2a61371SStefan Hajnoczi void *opaque, 988c5873d6SStefan Hajnoczi bool is_write); 99b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque); 100621f0589SKevin Wolf static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, 101aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); 102ec530c81Sbellard 1031b7bdbc1SStefan Hajnoczi static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 1041b7bdbc1SStefan Hajnoczi QTAILQ_HEAD_INITIALIZER(bdrv_states); 1057ee930d0Sblueswir1 106dc364f4cSBenoît Canet static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 107dc364f4cSBenoît Canet QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 108dc364f4cSBenoît Canet 1098a22f02aSStefan Hajnoczi static QLIST_HEAD(, BlockDriver) bdrv_drivers = 1108a22f02aSStefan Hajnoczi QLIST_HEAD_INITIALIZER(bdrv_drivers); 111ea2384d3Sbellard 112c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 113c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors); 114c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 115c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors); 116eb852011SMarkus Armbruster /* If non-zero, use only whitelisted block drivers */ 117eb852011SMarkus Armbruster static int use_bdrv_whitelist; 118eb852011SMarkus Armbruster 1199e0b22f4SStefan Hajnoczi #ifdef _WIN32 1209e0b22f4SStefan Hajnoczi static int is_windows_drive_prefix(const char *filename) 1219e0b22f4SStefan Hajnoczi { 1229e0b22f4SStefan Hajnoczi return (((filename[0] >= 'a' && filename[0] <= 'z') || 1239e0b22f4SStefan Hajnoczi (filename[0] >= 'A' && filename[0] <= 'Z')) && 1249e0b22f4SStefan Hajnoczi filename[1] == ':'); 1259e0b22f4SStefan Hajnoczi } 1269e0b22f4SStefan Hajnoczi 1279e0b22f4SStefan Hajnoczi int is_windows_drive(const char *filename) 1289e0b22f4SStefan Hajnoczi { 1299e0b22f4SStefan Hajnoczi if (is_windows_drive_prefix(filename) && 1309e0b22f4SStefan Hajnoczi filename[2] == '\0') 1319e0b22f4SStefan Hajnoczi return 1; 1329e0b22f4SStefan Hajnoczi if (strstart(filename, "\\\\.\\", NULL) || 1339e0b22f4SStefan Hajnoczi strstart(filename, "//./", NULL)) 1349e0b22f4SStefan Hajnoczi return 1; 1359e0b22f4SStefan Hajnoczi return 0; 1369e0b22f4SStefan Hajnoczi } 1379e0b22f4SStefan Hajnoczi #endif 1389e0b22f4SStefan Hajnoczi 1390563e191SZhi Yong Wu /* throttling disk I/O limits */ 140cc0681c4SBenoît Canet void bdrv_set_io_limits(BlockDriverState *bs, 141cc0681c4SBenoît Canet ThrottleConfig *cfg) 142cc0681c4SBenoît Canet { 143cc0681c4SBenoît Canet int i; 144cc0681c4SBenoît Canet 145cc0681c4SBenoît Canet throttle_config(&bs->throttle_state, cfg); 146cc0681c4SBenoît Canet 147cc0681c4SBenoît Canet for (i = 0; i < 2; i++) { 148cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[i]); 149cc0681c4SBenoît Canet } 150cc0681c4SBenoît Canet } 151cc0681c4SBenoît Canet 152cc0681c4SBenoît Canet /* this function drain all the throttled IOs */ 153cc0681c4SBenoît Canet static bool bdrv_start_throttled_reqs(BlockDriverState *bs) 154cc0681c4SBenoît Canet { 155cc0681c4SBenoît Canet bool drained = false; 156cc0681c4SBenoît Canet bool enabled = bs->io_limits_enabled; 157cc0681c4SBenoît Canet int i; 158cc0681c4SBenoît Canet 159cc0681c4SBenoît Canet bs->io_limits_enabled = false; 160cc0681c4SBenoît Canet 161cc0681c4SBenoît Canet for (i = 0; i < 2; i++) { 162cc0681c4SBenoît Canet while (qemu_co_enter_next(&bs->throttled_reqs[i])) { 163cc0681c4SBenoît Canet drained = true; 164cc0681c4SBenoît Canet } 165cc0681c4SBenoît Canet } 166cc0681c4SBenoît Canet 167cc0681c4SBenoît Canet bs->io_limits_enabled = enabled; 168cc0681c4SBenoît Canet 169cc0681c4SBenoît Canet return drained; 170cc0681c4SBenoît Canet } 171cc0681c4SBenoît Canet 17298f90dbaSZhi Yong Wu void bdrv_io_limits_disable(BlockDriverState *bs) 17398f90dbaSZhi Yong Wu { 17498f90dbaSZhi Yong Wu bs->io_limits_enabled = false; 17598f90dbaSZhi Yong Wu 176cc0681c4SBenoît Canet bdrv_start_throttled_reqs(bs); 17798f90dbaSZhi Yong Wu 178cc0681c4SBenoît Canet throttle_destroy(&bs->throttle_state); 17998f90dbaSZhi Yong Wu } 18098f90dbaSZhi Yong Wu 181cc0681c4SBenoît Canet static void bdrv_throttle_read_timer_cb(void *opaque) 1820563e191SZhi Yong Wu { 1830563e191SZhi Yong Wu BlockDriverState *bs = opaque; 184cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[0]); 1850563e191SZhi Yong Wu } 1860563e191SZhi Yong Wu 187cc0681c4SBenoît Canet static void bdrv_throttle_write_timer_cb(void *opaque) 188cc0681c4SBenoît Canet { 189cc0681c4SBenoît Canet BlockDriverState *bs = opaque; 190cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[1]); 191cc0681c4SBenoît Canet } 192cc0681c4SBenoît Canet 193cc0681c4SBenoît Canet /* should be called before bdrv_set_io_limits if a limit is set */ 1940563e191SZhi Yong Wu void bdrv_io_limits_enable(BlockDriverState *bs) 1950563e191SZhi Yong Wu { 196de50a20aSFam Zheng int clock_type = QEMU_CLOCK_REALTIME; 197de50a20aSFam Zheng 198de50a20aSFam Zheng if (qtest_enabled()) { 199de50a20aSFam Zheng /* For testing block IO throttling only */ 200de50a20aSFam Zheng clock_type = QEMU_CLOCK_VIRTUAL; 201de50a20aSFam Zheng } 202cc0681c4SBenoît Canet assert(!bs->io_limits_enabled); 203cc0681c4SBenoît Canet throttle_init(&bs->throttle_state, 20413af91ebSStefan Hajnoczi bdrv_get_aio_context(bs), 205de50a20aSFam Zheng clock_type, 206cc0681c4SBenoît Canet bdrv_throttle_read_timer_cb, 207cc0681c4SBenoît Canet bdrv_throttle_write_timer_cb, 208cc0681c4SBenoît Canet bs); 2090563e191SZhi Yong Wu bs->io_limits_enabled = true; 2100563e191SZhi Yong Wu } 2110563e191SZhi Yong Wu 212cc0681c4SBenoît Canet /* This function makes an IO wait if needed 213cc0681c4SBenoît Canet * 214cc0681c4SBenoît Canet * @nb_sectors: the number of sectors of the IO 215cc0681c4SBenoît Canet * @is_write: is the IO a write 21698f90dbaSZhi Yong Wu */ 217cc0681c4SBenoît Canet static void bdrv_io_limits_intercept(BlockDriverState *bs, 218d5103588SKevin Wolf unsigned int bytes, 219cc0681c4SBenoît Canet bool is_write) 220cc0681c4SBenoît Canet { 221cc0681c4SBenoît Canet /* does this io must wait */ 222cc0681c4SBenoît Canet bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); 22398f90dbaSZhi Yong Wu 224cc0681c4SBenoît Canet /* if must wait or any request of this type throttled queue the IO */ 225cc0681c4SBenoît Canet if (must_wait || 226cc0681c4SBenoît Canet !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { 227cc0681c4SBenoît Canet qemu_co_queue_wait(&bs->throttled_reqs[is_write]); 22898f90dbaSZhi Yong Wu } 22998f90dbaSZhi Yong Wu 230cc0681c4SBenoît Canet /* the IO will be executed, do the accounting */ 231d5103588SKevin Wolf throttle_account(&bs->throttle_state, is_write, bytes); 232d5103588SKevin Wolf 233cc0681c4SBenoît Canet 234cc0681c4SBenoît Canet /* if the next request must wait -> do nothing */ 235cc0681c4SBenoît Canet if (throttle_schedule_timer(&bs->throttle_state, is_write)) { 236cc0681c4SBenoît Canet return; 237cc0681c4SBenoît Canet } 238cc0681c4SBenoît Canet 239cc0681c4SBenoît Canet /* else queue next request for execution */ 240cc0681c4SBenoît Canet qemu_co_queue_next(&bs->throttled_reqs[is_write]); 24198f90dbaSZhi Yong Wu } 24298f90dbaSZhi Yong Wu 243339064d5SKevin Wolf size_t bdrv_opt_mem_align(BlockDriverState *bs) 244339064d5SKevin Wolf { 245339064d5SKevin Wolf if (!bs || !bs->drv) { 246339064d5SKevin Wolf /* 4k should be on the safe side */ 247339064d5SKevin Wolf return 4096; 248339064d5SKevin Wolf } 249339064d5SKevin Wolf 250339064d5SKevin Wolf return bs->bl.opt_mem_alignment; 251339064d5SKevin Wolf } 252339064d5SKevin Wolf 2539e0b22f4SStefan Hajnoczi /* check if the path starts with "<protocol>:" */ 2545c98415bSMax Reitz int path_has_protocol(const char *path) 2559e0b22f4SStefan Hajnoczi { 256947995c0SPaolo Bonzini const char *p; 257947995c0SPaolo Bonzini 2589e0b22f4SStefan Hajnoczi #ifdef _WIN32 2599e0b22f4SStefan Hajnoczi if (is_windows_drive(path) || 2609e0b22f4SStefan Hajnoczi is_windows_drive_prefix(path)) { 2619e0b22f4SStefan Hajnoczi return 0; 2629e0b22f4SStefan Hajnoczi } 263947995c0SPaolo Bonzini p = path + strcspn(path, ":/\\"); 264947995c0SPaolo Bonzini #else 265947995c0SPaolo Bonzini p = path + strcspn(path, ":/"); 2669e0b22f4SStefan Hajnoczi #endif 2679e0b22f4SStefan Hajnoczi 268947995c0SPaolo Bonzini return *p == ':'; 2699e0b22f4SStefan Hajnoczi } 2709e0b22f4SStefan Hajnoczi 27183f64091Sbellard int path_is_absolute(const char *path) 27283f64091Sbellard { 27321664424Sbellard #ifdef _WIN32 27421664424Sbellard /* specific case for names like: "\\.\d:" */ 275f53f4da9SPaolo Bonzini if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 27621664424Sbellard return 1; 277f53f4da9SPaolo Bonzini } 278f53f4da9SPaolo Bonzini return (*path == '/' || *path == '\\'); 2793b9f94e1Sbellard #else 280f53f4da9SPaolo Bonzini return (*path == '/'); 2813b9f94e1Sbellard #endif 28283f64091Sbellard } 28383f64091Sbellard 28483f64091Sbellard /* if filename is absolute, just copy it to dest. Otherwise, build a 28583f64091Sbellard path to it by considering it is relative to base_path. URL are 28683f64091Sbellard supported. */ 28783f64091Sbellard void path_combine(char *dest, int dest_size, 28883f64091Sbellard const char *base_path, 28983f64091Sbellard const char *filename) 29083f64091Sbellard { 29183f64091Sbellard const char *p, *p1; 29283f64091Sbellard int len; 29383f64091Sbellard 29483f64091Sbellard if (dest_size <= 0) 29583f64091Sbellard return; 29683f64091Sbellard if (path_is_absolute(filename)) { 29783f64091Sbellard pstrcpy(dest, dest_size, filename); 29883f64091Sbellard } else { 29983f64091Sbellard p = strchr(base_path, ':'); 30083f64091Sbellard if (p) 30183f64091Sbellard p++; 30283f64091Sbellard else 30383f64091Sbellard p = base_path; 3043b9f94e1Sbellard p1 = strrchr(base_path, '/'); 3053b9f94e1Sbellard #ifdef _WIN32 3063b9f94e1Sbellard { 3073b9f94e1Sbellard const char *p2; 3083b9f94e1Sbellard p2 = strrchr(base_path, '\\'); 3093b9f94e1Sbellard if (!p1 || p2 > p1) 3103b9f94e1Sbellard p1 = p2; 3113b9f94e1Sbellard } 3123b9f94e1Sbellard #endif 31383f64091Sbellard if (p1) 31483f64091Sbellard p1++; 31583f64091Sbellard else 31683f64091Sbellard p1 = base_path; 31783f64091Sbellard if (p1 > p) 31883f64091Sbellard p = p1; 31983f64091Sbellard len = p - base_path; 32083f64091Sbellard if (len > dest_size - 1) 32183f64091Sbellard len = dest_size - 1; 32283f64091Sbellard memcpy(dest, base_path, len); 32383f64091Sbellard dest[len] = '\0'; 32483f64091Sbellard pstrcat(dest, dest_size, filename); 32583f64091Sbellard } 32683f64091Sbellard } 32783f64091Sbellard 3280a82855aSMax Reitz void bdrv_get_full_backing_filename_from_filename(const char *backed, 3290a82855aSMax Reitz const char *backing, 3309f07429eSMax Reitz char *dest, size_t sz, 3319f07429eSMax Reitz Error **errp) 3320a82855aSMax Reitz { 3339f07429eSMax Reitz if (backing[0] == '\0' || path_has_protocol(backing) || 3349f07429eSMax Reitz path_is_absolute(backing)) 3359f07429eSMax Reitz { 3360a82855aSMax Reitz pstrcpy(dest, sz, backing); 3379f07429eSMax Reitz } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 3389f07429eSMax Reitz error_setg(errp, "Cannot use relative backing file names for '%s'", 3399f07429eSMax Reitz backed); 3400a82855aSMax Reitz } else { 3410a82855aSMax Reitz path_combine(dest, sz, backed, backing); 3420a82855aSMax Reitz } 3430a82855aSMax Reitz } 3440a82855aSMax Reitz 3459f07429eSMax Reitz void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, 3469f07429eSMax Reitz Error **errp) 347dc5a1371SPaolo Bonzini { 3489f07429eSMax Reitz char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; 3499f07429eSMax Reitz 3509f07429eSMax Reitz bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, 3519f07429eSMax Reitz dest, sz, errp); 352dc5a1371SPaolo Bonzini } 353dc5a1371SPaolo Bonzini 3545efa9d5aSAnthony Liguori void bdrv_register(BlockDriver *bdrv) 355ea2384d3Sbellard { 3568c5873d6SStefan Hajnoczi /* Block drivers without coroutine functions need emulation */ 3578c5873d6SStefan Hajnoczi if (!bdrv->bdrv_co_readv) { 358f9f05dc5SKevin Wolf bdrv->bdrv_co_readv = bdrv_co_readv_em; 359f9f05dc5SKevin Wolf bdrv->bdrv_co_writev = bdrv_co_writev_em; 360f9f05dc5SKevin Wolf 361f8c35c1dSStefan Hajnoczi /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if 362f8c35c1dSStefan Hajnoczi * the block driver lacks aio we need to emulate that too. 363f8c35c1dSStefan Hajnoczi */ 364f9f05dc5SKevin Wolf if (!bdrv->bdrv_aio_readv) { 36583f64091Sbellard /* add AIO emulation layer */ 366f141eafeSaliguori bdrv->bdrv_aio_readv = bdrv_aio_readv_em; 367f141eafeSaliguori bdrv->bdrv_aio_writev = bdrv_aio_writev_em; 36883f64091Sbellard } 369f9f05dc5SKevin Wolf } 370b2e12bc6SChristoph Hellwig 3718a22f02aSStefan Hajnoczi QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 372ea2384d3Sbellard } 373b338082bSbellard 3747f06d47eSMarkus Armbruster BlockDriverState *bdrv_new_root(void) 375fc01f7e7Sbellard { 3767f06d47eSMarkus Armbruster BlockDriverState *bs = bdrv_new(); 377e4e9986bSMarkus Armbruster 378e4e9986bSMarkus Armbruster QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); 379e4e9986bSMarkus Armbruster return bs; 380e4e9986bSMarkus Armbruster } 381e4e9986bSMarkus Armbruster 382e4e9986bSMarkus Armbruster BlockDriverState *bdrv_new(void) 383e4e9986bSMarkus Armbruster { 384e4e9986bSMarkus Armbruster BlockDriverState *bs; 385e4e9986bSMarkus Armbruster int i; 386e4e9986bSMarkus Armbruster 3875839e53bSMarkus Armbruster bs = g_new0(BlockDriverState, 1); 388e4654d2dSFam Zheng QLIST_INIT(&bs->dirty_bitmaps); 389fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 390fbe40ff7SFam Zheng QLIST_INIT(&bs->op_blockers[i]); 391fbe40ff7SFam Zheng } 39228a7282aSLuiz Capitulino bdrv_iostatus_disable(bs); 393d7d512f6SPaolo Bonzini notifier_list_init(&bs->close_notifiers); 394d616b224SStefan Hajnoczi notifier_with_return_list_init(&bs->before_write_notifiers); 395cc0681c4SBenoît Canet qemu_co_queue_init(&bs->throttled_reqs[0]); 396cc0681c4SBenoît Canet qemu_co_queue_init(&bs->throttled_reqs[1]); 3979fcb0251SFam Zheng bs->refcnt = 1; 398dcd04228SStefan Hajnoczi bs->aio_context = qemu_get_aio_context(); 399d7d512f6SPaolo Bonzini 400b338082bSbellard return bs; 401b338082bSbellard } 402b338082bSbellard 403d7d512f6SPaolo Bonzini void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify) 404d7d512f6SPaolo Bonzini { 405d7d512f6SPaolo Bonzini notifier_list_add(&bs->close_notifiers, notify); 406d7d512f6SPaolo Bonzini } 407d7d512f6SPaolo Bonzini 408ea2384d3Sbellard BlockDriver *bdrv_find_format(const char *format_name) 409ea2384d3Sbellard { 410ea2384d3Sbellard BlockDriver *drv1; 4118a22f02aSStefan Hajnoczi QLIST_FOREACH(drv1, &bdrv_drivers, list) { 4128a22f02aSStefan Hajnoczi if (!strcmp(drv1->format_name, format_name)) { 413ea2384d3Sbellard return drv1; 414ea2384d3Sbellard } 4158a22f02aSStefan Hajnoczi } 416ea2384d3Sbellard return NULL; 417ea2384d3Sbellard } 418ea2384d3Sbellard 419b64ec4e4SFam Zheng static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 420eb852011SMarkus Armbruster { 421b64ec4e4SFam Zheng static const char *whitelist_rw[] = { 422b64ec4e4SFam Zheng CONFIG_BDRV_RW_WHITELIST 423b64ec4e4SFam Zheng }; 424b64ec4e4SFam Zheng static const char *whitelist_ro[] = { 425b64ec4e4SFam Zheng CONFIG_BDRV_RO_WHITELIST 426eb852011SMarkus Armbruster }; 427eb852011SMarkus Armbruster const char **p; 428eb852011SMarkus Armbruster 429b64ec4e4SFam Zheng if (!whitelist_rw[0] && !whitelist_ro[0]) { 430eb852011SMarkus Armbruster return 1; /* no whitelist, anything goes */ 431b64ec4e4SFam Zheng } 432eb852011SMarkus Armbruster 433b64ec4e4SFam Zheng for (p = whitelist_rw; *p; p++) { 434eb852011SMarkus Armbruster if (!strcmp(drv->format_name, *p)) { 435eb852011SMarkus Armbruster return 1; 436eb852011SMarkus Armbruster } 437eb852011SMarkus Armbruster } 438b64ec4e4SFam Zheng if (read_only) { 439b64ec4e4SFam Zheng for (p = whitelist_ro; *p; p++) { 440b64ec4e4SFam Zheng if (!strcmp(drv->format_name, *p)) { 441b64ec4e4SFam Zheng return 1; 442b64ec4e4SFam Zheng } 443b64ec4e4SFam Zheng } 444b64ec4e4SFam Zheng } 445eb852011SMarkus Armbruster return 0; 446eb852011SMarkus Armbruster } 447eb852011SMarkus Armbruster 448b64ec4e4SFam Zheng BlockDriver *bdrv_find_whitelisted_format(const char *format_name, 449b64ec4e4SFam Zheng bool read_only) 450eb852011SMarkus Armbruster { 451eb852011SMarkus Armbruster BlockDriver *drv = bdrv_find_format(format_name); 452b64ec4e4SFam Zheng return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL; 453eb852011SMarkus Armbruster } 454eb852011SMarkus Armbruster 4555b7e1542SZhi Yong Wu typedef struct CreateCo { 4565b7e1542SZhi Yong Wu BlockDriver *drv; 4575b7e1542SZhi Yong Wu char *filename; 45883d0521aSChunyan Liu QemuOpts *opts; 4595b7e1542SZhi Yong Wu int ret; 460cc84d90fSMax Reitz Error *err; 4615b7e1542SZhi Yong Wu } CreateCo; 4625b7e1542SZhi Yong Wu 4635b7e1542SZhi Yong Wu static void coroutine_fn bdrv_create_co_entry(void *opaque) 4645b7e1542SZhi Yong Wu { 465cc84d90fSMax Reitz Error *local_err = NULL; 466cc84d90fSMax Reitz int ret; 467cc84d90fSMax Reitz 4685b7e1542SZhi Yong Wu CreateCo *cco = opaque; 4695b7e1542SZhi Yong Wu assert(cco->drv); 4705b7e1542SZhi Yong Wu 471c282e1fdSChunyan Liu ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); 47284d18f06SMarkus Armbruster if (local_err) { 473cc84d90fSMax Reitz error_propagate(&cco->err, local_err); 474cc84d90fSMax Reitz } 475cc84d90fSMax Reitz cco->ret = ret; 4765b7e1542SZhi Yong Wu } 4775b7e1542SZhi Yong Wu 4780e7e1989SKevin Wolf int bdrv_create(BlockDriver *drv, const char* filename, 47983d0521aSChunyan Liu QemuOpts *opts, Error **errp) 480ea2384d3Sbellard { 4815b7e1542SZhi Yong Wu int ret; 4820e7e1989SKevin Wolf 4835b7e1542SZhi Yong Wu Coroutine *co; 4845b7e1542SZhi Yong Wu CreateCo cco = { 4855b7e1542SZhi Yong Wu .drv = drv, 4865b7e1542SZhi Yong Wu .filename = g_strdup(filename), 48783d0521aSChunyan Liu .opts = opts, 4885b7e1542SZhi Yong Wu .ret = NOT_DONE, 489cc84d90fSMax Reitz .err = NULL, 4905b7e1542SZhi Yong Wu }; 4915b7e1542SZhi Yong Wu 492c282e1fdSChunyan Liu if (!drv->bdrv_create) { 493cc84d90fSMax Reitz error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 49480168bffSLuiz Capitulino ret = -ENOTSUP; 49580168bffSLuiz Capitulino goto out; 4965b7e1542SZhi Yong Wu } 4975b7e1542SZhi Yong Wu 4985b7e1542SZhi Yong Wu if (qemu_in_coroutine()) { 4995b7e1542SZhi Yong Wu /* Fast-path if already in coroutine context */ 5005b7e1542SZhi Yong Wu bdrv_create_co_entry(&cco); 5015b7e1542SZhi Yong Wu } else { 5025b7e1542SZhi Yong Wu co = qemu_coroutine_create(bdrv_create_co_entry); 5035b7e1542SZhi Yong Wu qemu_coroutine_enter(co, &cco); 5045b7e1542SZhi Yong Wu while (cco.ret == NOT_DONE) { 505b47ec2c4SPaolo Bonzini aio_poll(qemu_get_aio_context(), true); 5065b7e1542SZhi Yong Wu } 5075b7e1542SZhi Yong Wu } 5085b7e1542SZhi Yong Wu 5095b7e1542SZhi Yong Wu ret = cco.ret; 510cc84d90fSMax Reitz if (ret < 0) { 51184d18f06SMarkus Armbruster if (cco.err) { 512cc84d90fSMax Reitz error_propagate(errp, cco.err); 513cc84d90fSMax Reitz } else { 514cc84d90fSMax Reitz error_setg_errno(errp, -ret, "Could not create image"); 515cc84d90fSMax Reitz } 516cc84d90fSMax Reitz } 5175b7e1542SZhi Yong Wu 51880168bffSLuiz Capitulino out: 51980168bffSLuiz Capitulino g_free(cco.filename); 5205b7e1542SZhi Yong Wu return ret; 521ea2384d3Sbellard } 522ea2384d3Sbellard 523c282e1fdSChunyan Liu int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 52484a12e66SChristoph Hellwig { 52584a12e66SChristoph Hellwig BlockDriver *drv; 526cc84d90fSMax Reitz Error *local_err = NULL; 527cc84d90fSMax Reitz int ret; 52884a12e66SChristoph Hellwig 529b65a5e12SMax Reitz drv = bdrv_find_protocol(filename, true, errp); 53084a12e66SChristoph Hellwig if (drv == NULL) { 53116905d71SStefan Hajnoczi return -ENOENT; 53284a12e66SChristoph Hellwig } 53384a12e66SChristoph Hellwig 534c282e1fdSChunyan Liu ret = bdrv_create(drv, filename, opts, &local_err); 53584d18f06SMarkus Armbruster if (local_err) { 536cc84d90fSMax Reitz error_propagate(errp, local_err); 537cc84d90fSMax Reitz } 538cc84d90fSMax Reitz return ret; 53984a12e66SChristoph Hellwig } 54084a12e66SChristoph Hellwig 5413baca891SKevin Wolf void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) 542d34682cdSKevin Wolf { 543d34682cdSKevin Wolf BlockDriver *drv = bs->drv; 5443baca891SKevin Wolf Error *local_err = NULL; 545d34682cdSKevin Wolf 546d34682cdSKevin Wolf memset(&bs->bl, 0, sizeof(bs->bl)); 547d34682cdSKevin Wolf 548466ad822SKevin Wolf if (!drv) { 5493baca891SKevin Wolf return; 550466ad822SKevin Wolf } 551466ad822SKevin Wolf 552466ad822SKevin Wolf /* Take some limits from the children as a default */ 553466ad822SKevin Wolf if (bs->file) { 5543baca891SKevin Wolf bdrv_refresh_limits(bs->file, &local_err); 5553baca891SKevin Wolf if (local_err) { 5563baca891SKevin Wolf error_propagate(errp, local_err); 5573baca891SKevin Wolf return; 5583baca891SKevin Wolf } 559466ad822SKevin Wolf bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; 5602647fab5SPeter Lieven bs->bl.max_transfer_length = bs->file->bl.max_transfer_length; 561339064d5SKevin Wolf bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; 562339064d5SKevin Wolf } else { 563339064d5SKevin Wolf bs->bl.opt_mem_alignment = 512; 564466ad822SKevin Wolf } 565466ad822SKevin Wolf 566466ad822SKevin Wolf if (bs->backing_hd) { 5673baca891SKevin Wolf bdrv_refresh_limits(bs->backing_hd, &local_err); 5683baca891SKevin Wolf if (local_err) { 5693baca891SKevin Wolf error_propagate(errp, local_err); 5703baca891SKevin Wolf return; 5713baca891SKevin Wolf } 572466ad822SKevin Wolf bs->bl.opt_transfer_length = 573466ad822SKevin Wolf MAX(bs->bl.opt_transfer_length, 574466ad822SKevin Wolf bs->backing_hd->bl.opt_transfer_length); 5752647fab5SPeter Lieven bs->bl.max_transfer_length = 5762647fab5SPeter Lieven MIN_NON_ZERO(bs->bl.max_transfer_length, 5772647fab5SPeter Lieven bs->backing_hd->bl.max_transfer_length); 578339064d5SKevin Wolf bs->bl.opt_mem_alignment = 579339064d5SKevin Wolf MAX(bs->bl.opt_mem_alignment, 580339064d5SKevin Wolf bs->backing_hd->bl.opt_mem_alignment); 581466ad822SKevin Wolf } 582466ad822SKevin Wolf 583466ad822SKevin Wolf /* Then let the driver override it */ 584466ad822SKevin Wolf if (drv->bdrv_refresh_limits) { 5853baca891SKevin Wolf drv->bdrv_refresh_limits(bs, errp); 586d34682cdSKevin Wolf } 587d34682cdSKevin Wolf } 588d34682cdSKevin Wolf 589892b7de8SEkaterina Tumanova /** 590892b7de8SEkaterina Tumanova * Try to get @bs's logical and physical block size. 591892b7de8SEkaterina Tumanova * On success, store them in @bsz struct and return 0. 592892b7de8SEkaterina Tumanova * On failure return -errno. 593892b7de8SEkaterina Tumanova * @bs must not be empty. 594892b7de8SEkaterina Tumanova */ 595892b7de8SEkaterina Tumanova int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 596892b7de8SEkaterina Tumanova { 597892b7de8SEkaterina Tumanova BlockDriver *drv = bs->drv; 598892b7de8SEkaterina Tumanova 599892b7de8SEkaterina Tumanova if (drv && drv->bdrv_probe_blocksizes) { 600892b7de8SEkaterina Tumanova return drv->bdrv_probe_blocksizes(bs, bsz); 601892b7de8SEkaterina Tumanova } 602892b7de8SEkaterina Tumanova 603892b7de8SEkaterina Tumanova return -ENOTSUP; 604892b7de8SEkaterina Tumanova } 605892b7de8SEkaterina Tumanova 606892b7de8SEkaterina Tumanova /** 607892b7de8SEkaterina Tumanova * Try to get @bs's geometry (cyls, heads, sectors). 608892b7de8SEkaterina Tumanova * On success, store them in @geo struct and return 0. 609892b7de8SEkaterina Tumanova * On failure return -errno. 610892b7de8SEkaterina Tumanova * @bs must not be empty. 611892b7de8SEkaterina Tumanova */ 612892b7de8SEkaterina Tumanova int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 613892b7de8SEkaterina Tumanova { 614892b7de8SEkaterina Tumanova BlockDriver *drv = bs->drv; 615892b7de8SEkaterina Tumanova 616892b7de8SEkaterina Tumanova if (drv && drv->bdrv_probe_geometry) { 617892b7de8SEkaterina Tumanova return drv->bdrv_probe_geometry(bs, geo); 618892b7de8SEkaterina Tumanova } 619892b7de8SEkaterina Tumanova 620892b7de8SEkaterina Tumanova return -ENOTSUP; 621892b7de8SEkaterina Tumanova } 622892b7de8SEkaterina Tumanova 623eba25057SJim Meyering /* 624eba25057SJim Meyering * Create a uniquely-named empty temporary file. 625eba25057SJim Meyering * Return 0 upon success, otherwise a negative errno value. 626eba25057SJim Meyering */ 627eba25057SJim Meyering int get_tmp_filename(char *filename, int size) 628eba25057SJim Meyering { 629d5249393Sbellard #ifdef _WIN32 6303b9f94e1Sbellard char temp_dir[MAX_PATH]; 631eba25057SJim Meyering /* GetTempFileName requires that its output buffer (4th param) 632eba25057SJim Meyering have length MAX_PATH or greater. */ 633eba25057SJim Meyering assert(size >= MAX_PATH); 634eba25057SJim Meyering return (GetTempPath(MAX_PATH, temp_dir) 635eba25057SJim Meyering && GetTempFileName(temp_dir, "qem", 0, filename) 636eba25057SJim Meyering ? 0 : -GetLastError()); 637d5249393Sbellard #else 638ea2384d3Sbellard int fd; 6397ccfb2ebSblueswir1 const char *tmpdir; 6400badc1eeSaurel32 tmpdir = getenv("TMPDIR"); 64169bef793SAmit Shah if (!tmpdir) { 64269bef793SAmit Shah tmpdir = "/var/tmp"; 64369bef793SAmit Shah } 644eba25057SJim Meyering if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 645eba25057SJim Meyering return -EOVERFLOW; 646ea2384d3Sbellard } 647eba25057SJim Meyering fd = mkstemp(filename); 648fe235a06SDunrong Huang if (fd < 0) { 649fe235a06SDunrong Huang return -errno; 650fe235a06SDunrong Huang } 651fe235a06SDunrong Huang if (close(fd) != 0) { 652fe235a06SDunrong Huang unlink(filename); 653eba25057SJim Meyering return -errno; 654eba25057SJim Meyering } 655eba25057SJim Meyering return 0; 656d5249393Sbellard #endif 657eba25057SJim Meyering } 658ea2384d3Sbellard 659f3a5d3f8SChristoph Hellwig /* 660f3a5d3f8SChristoph Hellwig * Detect host devices. By convention, /dev/cdrom[N] is always 661f3a5d3f8SChristoph Hellwig * recognized as a host CDROM. 662f3a5d3f8SChristoph Hellwig */ 663f3a5d3f8SChristoph Hellwig static BlockDriver *find_hdev_driver(const char *filename) 664f3a5d3f8SChristoph Hellwig { 665508c7cb3SChristoph Hellwig int score_max = 0, score; 666508c7cb3SChristoph Hellwig BlockDriver *drv = NULL, *d; 667f3a5d3f8SChristoph Hellwig 6688a22f02aSStefan Hajnoczi QLIST_FOREACH(d, &bdrv_drivers, list) { 669508c7cb3SChristoph Hellwig if (d->bdrv_probe_device) { 670508c7cb3SChristoph Hellwig score = d->bdrv_probe_device(filename); 671508c7cb3SChristoph Hellwig if (score > score_max) { 672508c7cb3SChristoph Hellwig score_max = score; 673508c7cb3SChristoph Hellwig drv = d; 674f3a5d3f8SChristoph Hellwig } 675508c7cb3SChristoph Hellwig } 676f3a5d3f8SChristoph Hellwig } 677f3a5d3f8SChristoph Hellwig 678508c7cb3SChristoph Hellwig return drv; 679f3a5d3f8SChristoph Hellwig } 680f3a5d3f8SChristoph Hellwig 68198289620SKevin Wolf BlockDriver *bdrv_find_protocol(const char *filename, 682b65a5e12SMax Reitz bool allow_protocol_prefix, 683b65a5e12SMax Reitz Error **errp) 68484a12e66SChristoph Hellwig { 68584a12e66SChristoph Hellwig BlockDriver *drv1; 68684a12e66SChristoph Hellwig char protocol[128]; 68784a12e66SChristoph Hellwig int len; 68884a12e66SChristoph Hellwig const char *p; 68984a12e66SChristoph Hellwig 69066f82ceeSKevin Wolf /* TODO Drivers without bdrv_file_open must be specified explicitly */ 69166f82ceeSKevin Wolf 69239508e7aSChristoph Hellwig /* 69339508e7aSChristoph Hellwig * XXX(hch): we really should not let host device detection 69439508e7aSChristoph Hellwig * override an explicit protocol specification, but moving this 69539508e7aSChristoph Hellwig * later breaks access to device names with colons in them. 69639508e7aSChristoph Hellwig * Thanks to the brain-dead persistent naming schemes on udev- 69739508e7aSChristoph Hellwig * based Linux systems those actually are quite common. 69839508e7aSChristoph Hellwig */ 69984a12e66SChristoph Hellwig drv1 = find_hdev_driver(filename); 70039508e7aSChristoph Hellwig if (drv1) { 70184a12e66SChristoph Hellwig return drv1; 70284a12e66SChristoph Hellwig } 70339508e7aSChristoph Hellwig 70498289620SKevin Wolf if (!path_has_protocol(filename) || !allow_protocol_prefix) { 705ef810437SMax Reitz return &bdrv_file; 70639508e7aSChristoph Hellwig } 70798289620SKevin Wolf 7089e0b22f4SStefan Hajnoczi p = strchr(filename, ':'); 7099e0b22f4SStefan Hajnoczi assert(p != NULL); 71084a12e66SChristoph Hellwig len = p - filename; 71184a12e66SChristoph Hellwig if (len > sizeof(protocol) - 1) 71284a12e66SChristoph Hellwig len = sizeof(protocol) - 1; 71384a12e66SChristoph Hellwig memcpy(protocol, filename, len); 71484a12e66SChristoph Hellwig protocol[len] = '\0'; 71584a12e66SChristoph Hellwig QLIST_FOREACH(drv1, &bdrv_drivers, list) { 71684a12e66SChristoph Hellwig if (drv1->protocol_name && 71784a12e66SChristoph Hellwig !strcmp(drv1->protocol_name, protocol)) { 71884a12e66SChristoph Hellwig return drv1; 71984a12e66SChristoph Hellwig } 72084a12e66SChristoph Hellwig } 721b65a5e12SMax Reitz 722b65a5e12SMax Reitz error_setg(errp, "Unknown protocol '%s'", protocol); 72384a12e66SChristoph Hellwig return NULL; 72484a12e66SChristoph Hellwig } 72584a12e66SChristoph Hellwig 726c6684249SMarkus Armbruster /* 727c6684249SMarkus Armbruster * Guess image format by probing its contents. 728c6684249SMarkus Armbruster * This is not a good idea when your image is raw (CVE-2008-2004), but 729c6684249SMarkus Armbruster * we do it anyway for backward compatibility. 730c6684249SMarkus Armbruster * 731c6684249SMarkus Armbruster * @buf contains the image's first @buf_size bytes. 7327cddd372SKevin Wolf * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 7337cddd372SKevin Wolf * but can be smaller if the image file is smaller) 734c6684249SMarkus Armbruster * @filename is its filename. 735c6684249SMarkus Armbruster * 736c6684249SMarkus Armbruster * For all block drivers, call the bdrv_probe() method to get its 737c6684249SMarkus Armbruster * probing score. 738c6684249SMarkus Armbruster * Return the first block driver with the highest probing score. 739c6684249SMarkus Armbruster */ 74038f3ef57SKevin Wolf BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 741c6684249SMarkus Armbruster const char *filename) 742c6684249SMarkus Armbruster { 743c6684249SMarkus Armbruster int score_max = 0, score; 744c6684249SMarkus Armbruster BlockDriver *drv = NULL, *d; 745c6684249SMarkus Armbruster 746c6684249SMarkus Armbruster QLIST_FOREACH(d, &bdrv_drivers, list) { 747c6684249SMarkus Armbruster if (d->bdrv_probe) { 748c6684249SMarkus Armbruster score = d->bdrv_probe(buf, buf_size, filename); 749c6684249SMarkus Armbruster if (score > score_max) { 750c6684249SMarkus Armbruster score_max = score; 751c6684249SMarkus Armbruster drv = d; 752c6684249SMarkus Armbruster } 753c6684249SMarkus Armbruster } 754c6684249SMarkus Armbruster } 755c6684249SMarkus Armbruster 756c6684249SMarkus Armbruster return drv; 757c6684249SMarkus Armbruster } 758c6684249SMarkus Armbruster 759f500a6d3SKevin Wolf static int find_image_format(BlockDriverState *bs, const char *filename, 76034b5d2c6SMax Reitz BlockDriver **pdrv, Error **errp) 761ea2384d3Sbellard { 762c6684249SMarkus Armbruster BlockDriver *drv; 7637cddd372SKevin Wolf uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 764f500a6d3SKevin Wolf int ret = 0; 765f8ea0b00SNicholas Bellinger 76608a00559SKevin Wolf /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 7678e895599SPaolo Bonzini if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { 768ef810437SMax Reitz *pdrv = &bdrv_raw; 769c98ac35dSStefan Weil return ret; 7701a396859SNicholas A. Bellinger } 771f8ea0b00SNicholas Bellinger 77283f64091Sbellard ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 773ea2384d3Sbellard if (ret < 0) { 77434b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not read image for determining its " 77534b5d2c6SMax Reitz "format"); 776c98ac35dSStefan Weil *pdrv = NULL; 777c98ac35dSStefan Weil return ret; 778ea2384d3Sbellard } 779ea2384d3Sbellard 780c6684249SMarkus Armbruster drv = bdrv_probe_all(buf, ret, filename); 781c98ac35dSStefan Weil if (!drv) { 78234b5d2c6SMax Reitz error_setg(errp, "Could not determine image format: No compatible " 78334b5d2c6SMax Reitz "driver found"); 784c98ac35dSStefan Weil ret = -ENOENT; 785c98ac35dSStefan Weil } 786c98ac35dSStefan Weil *pdrv = drv; 787c98ac35dSStefan Weil return ret; 788ea2384d3Sbellard } 789ea2384d3Sbellard 79051762288SStefan Hajnoczi /** 79151762288SStefan Hajnoczi * Set the current 'total_sectors' value 79265a9bb25SMarkus Armbruster * Return 0 on success, -errno on error. 79351762288SStefan Hajnoczi */ 79451762288SStefan Hajnoczi static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 79551762288SStefan Hajnoczi { 79651762288SStefan Hajnoczi BlockDriver *drv = bs->drv; 79751762288SStefan Hajnoczi 798396759adSNicholas Bellinger /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 799396759adSNicholas Bellinger if (bs->sg) 800396759adSNicholas Bellinger return 0; 801396759adSNicholas Bellinger 80251762288SStefan Hajnoczi /* query actual device if possible, otherwise just trust the hint */ 80351762288SStefan Hajnoczi if (drv->bdrv_getlength) { 80451762288SStefan Hajnoczi int64_t length = drv->bdrv_getlength(bs); 80551762288SStefan Hajnoczi if (length < 0) { 80651762288SStefan Hajnoczi return length; 80751762288SStefan Hajnoczi } 8087e382003SFam Zheng hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 80951762288SStefan Hajnoczi } 81051762288SStefan Hajnoczi 81151762288SStefan Hajnoczi bs->total_sectors = hint; 81251762288SStefan Hajnoczi return 0; 81351762288SStefan Hajnoczi } 81451762288SStefan Hajnoczi 815c3993cdcSStefan Hajnoczi /** 8169e8f1835SPaolo Bonzini * Set open flags for a given discard mode 8179e8f1835SPaolo Bonzini * 8189e8f1835SPaolo Bonzini * Return 0 on success, -1 if the discard mode was invalid. 8199e8f1835SPaolo Bonzini */ 8209e8f1835SPaolo Bonzini int bdrv_parse_discard_flags(const char *mode, int *flags) 8219e8f1835SPaolo Bonzini { 8229e8f1835SPaolo Bonzini *flags &= ~BDRV_O_UNMAP; 8239e8f1835SPaolo Bonzini 8249e8f1835SPaolo Bonzini if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 8259e8f1835SPaolo Bonzini /* do nothing */ 8269e8f1835SPaolo Bonzini } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 8279e8f1835SPaolo Bonzini *flags |= BDRV_O_UNMAP; 8289e8f1835SPaolo Bonzini } else { 8299e8f1835SPaolo Bonzini return -1; 8309e8f1835SPaolo Bonzini } 8319e8f1835SPaolo Bonzini 8329e8f1835SPaolo Bonzini return 0; 8339e8f1835SPaolo Bonzini } 8349e8f1835SPaolo Bonzini 8359e8f1835SPaolo Bonzini /** 836c3993cdcSStefan Hajnoczi * Set open flags for a given cache mode 837c3993cdcSStefan Hajnoczi * 838c3993cdcSStefan Hajnoczi * Return 0 on success, -1 if the cache mode was invalid. 839c3993cdcSStefan Hajnoczi */ 840c3993cdcSStefan Hajnoczi int bdrv_parse_cache_flags(const char *mode, int *flags) 841c3993cdcSStefan Hajnoczi { 842c3993cdcSStefan Hajnoczi *flags &= ~BDRV_O_CACHE_MASK; 843c3993cdcSStefan Hajnoczi 844c3993cdcSStefan Hajnoczi if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 845c3993cdcSStefan Hajnoczi *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 84692196b2fSStefan Hajnoczi } else if (!strcmp(mode, "directsync")) { 84792196b2fSStefan Hajnoczi *flags |= BDRV_O_NOCACHE; 848c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "writeback")) { 849c3993cdcSStefan Hajnoczi *flags |= BDRV_O_CACHE_WB; 850c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "unsafe")) { 851c3993cdcSStefan Hajnoczi *flags |= BDRV_O_CACHE_WB; 852c3993cdcSStefan Hajnoczi *flags |= BDRV_O_NO_FLUSH; 853c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "writethrough")) { 854c3993cdcSStefan Hajnoczi /* this is the default */ 855c3993cdcSStefan Hajnoczi } else { 856c3993cdcSStefan Hajnoczi return -1; 857c3993cdcSStefan Hajnoczi } 858c3993cdcSStefan Hajnoczi 859c3993cdcSStefan Hajnoczi return 0; 860c3993cdcSStefan Hajnoczi } 861c3993cdcSStefan Hajnoczi 86253fec9d3SStefan Hajnoczi /** 86353fec9d3SStefan Hajnoczi * The copy-on-read flag is actually a reference count so multiple users may 86453fec9d3SStefan Hajnoczi * use the feature without worrying about clobbering its previous state. 86553fec9d3SStefan Hajnoczi * Copy-on-read stays enabled until all users have called to disable it. 86653fec9d3SStefan Hajnoczi */ 86753fec9d3SStefan Hajnoczi void bdrv_enable_copy_on_read(BlockDriverState *bs) 86853fec9d3SStefan Hajnoczi { 86953fec9d3SStefan Hajnoczi bs->copy_on_read++; 87053fec9d3SStefan Hajnoczi } 87153fec9d3SStefan Hajnoczi 87253fec9d3SStefan Hajnoczi void bdrv_disable_copy_on_read(BlockDriverState *bs) 87353fec9d3SStefan Hajnoczi { 87453fec9d3SStefan Hajnoczi assert(bs->copy_on_read > 0); 87553fec9d3SStefan Hajnoczi bs->copy_on_read--; 87653fec9d3SStefan Hajnoczi } 87753fec9d3SStefan Hajnoczi 8780b50cc88SKevin Wolf /* 879b1e6fc08SKevin Wolf * Returns the flags that a temporary snapshot should get, based on the 880b1e6fc08SKevin Wolf * originally requested flags (the originally requested image will have flags 881b1e6fc08SKevin Wolf * like a backing file) 882b1e6fc08SKevin Wolf */ 883b1e6fc08SKevin Wolf static int bdrv_temp_snapshot_flags(int flags) 884b1e6fc08SKevin Wolf { 885b1e6fc08SKevin Wolf return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 886b1e6fc08SKevin Wolf } 887b1e6fc08SKevin Wolf 888b1e6fc08SKevin Wolf /* 8890b50cc88SKevin Wolf * Returns the flags that bs->file should get, based on the given flags for 8900b50cc88SKevin Wolf * the parent BDS 8910b50cc88SKevin Wolf */ 8920b50cc88SKevin Wolf static int bdrv_inherited_flags(int flags) 8930b50cc88SKevin Wolf { 8940b50cc88SKevin Wolf /* Enable protocol handling, disable format probing for bs->file */ 8950b50cc88SKevin Wolf flags |= BDRV_O_PROTOCOL; 8960b50cc88SKevin Wolf 8970b50cc88SKevin Wolf /* Our block drivers take care to send flushes and respect unmap policy, 8980b50cc88SKevin Wolf * so we can enable both unconditionally on lower layers. */ 8990b50cc88SKevin Wolf flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP; 9000b50cc88SKevin Wolf 9010b50cc88SKevin Wolf /* Clear flags that only apply to the top layer */ 9025669b44dSKevin Wolf flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 9030b50cc88SKevin Wolf 9040b50cc88SKevin Wolf return flags; 9050b50cc88SKevin Wolf } 9060b50cc88SKevin Wolf 907317fc44eSKevin Wolf /* 908317fc44eSKevin Wolf * Returns the flags that bs->backing_hd should get, based on the given flags 909317fc44eSKevin Wolf * for the parent BDS 910317fc44eSKevin Wolf */ 911317fc44eSKevin Wolf static int bdrv_backing_flags(int flags) 912317fc44eSKevin Wolf { 913317fc44eSKevin Wolf /* backing files always opened read-only */ 914317fc44eSKevin Wolf flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); 915317fc44eSKevin Wolf 916317fc44eSKevin Wolf /* snapshot=on is handled on the top layer */ 9178bfea15dSKevin Wolf flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); 918317fc44eSKevin Wolf 919317fc44eSKevin Wolf return flags; 920317fc44eSKevin Wolf } 921317fc44eSKevin Wolf 9227b272452SKevin Wolf static int bdrv_open_flags(BlockDriverState *bs, int flags) 9237b272452SKevin Wolf { 9247b272452SKevin Wolf int open_flags = flags | BDRV_O_CACHE_WB; 9257b272452SKevin Wolf 9267b272452SKevin Wolf /* 9277b272452SKevin Wolf * Clear flags that are internal to the block layer before opening the 9287b272452SKevin Wolf * image. 9297b272452SKevin Wolf */ 93020cca275SKevin Wolf open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 9317b272452SKevin Wolf 9327b272452SKevin Wolf /* 9337b272452SKevin Wolf * Snapshots should be writable. 9347b272452SKevin Wolf */ 9358bfea15dSKevin Wolf if (flags & BDRV_O_TEMPORARY) { 9367b272452SKevin Wolf open_flags |= BDRV_O_RDWR; 9377b272452SKevin Wolf } 9387b272452SKevin Wolf 9397b272452SKevin Wolf return open_flags; 9407b272452SKevin Wolf } 9417b272452SKevin Wolf 942636ea370SKevin Wolf static void bdrv_assign_node_name(BlockDriverState *bs, 9436913c0c2SBenoît Canet const char *node_name, 9446913c0c2SBenoît Canet Error **errp) 9456913c0c2SBenoît Canet { 9466913c0c2SBenoît Canet if (!node_name) { 947636ea370SKevin Wolf return; 9486913c0c2SBenoît Canet } 9496913c0c2SBenoît Canet 9509aebf3b8SKevin Wolf /* Check for empty string or invalid characters */ 951f5bebbbbSMarkus Armbruster if (!id_wellformed(node_name)) { 9529aebf3b8SKevin Wolf error_setg(errp, "Invalid node name"); 953636ea370SKevin Wolf return; 9546913c0c2SBenoît Canet } 9556913c0c2SBenoît Canet 9560c5e94eeSBenoît Canet /* takes care of avoiding namespaces collisions */ 9577f06d47eSMarkus Armbruster if (blk_by_name(node_name)) { 9580c5e94eeSBenoît Canet error_setg(errp, "node-name=%s is conflicting with a device id", 9590c5e94eeSBenoît Canet node_name); 960636ea370SKevin Wolf return; 9610c5e94eeSBenoît Canet } 9620c5e94eeSBenoît Canet 9636913c0c2SBenoît Canet /* takes care of avoiding duplicates node names */ 9646913c0c2SBenoît Canet if (bdrv_find_node(node_name)) { 9656913c0c2SBenoît Canet error_setg(errp, "Duplicate node name"); 966636ea370SKevin Wolf return; 9676913c0c2SBenoît Canet } 9686913c0c2SBenoît Canet 9696913c0c2SBenoît Canet /* copy node name into the bs and insert it into the graph list */ 9706913c0c2SBenoît Canet pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 9716913c0c2SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 9726913c0c2SBenoît Canet } 9736913c0c2SBenoît Canet 974b6ce07aaSKevin Wolf /* 97557915332SKevin Wolf * Common part for opening disk images and files 976b6ad491aSKevin Wolf * 977b6ad491aSKevin Wolf * Removes all processed options from *options. 97857915332SKevin Wolf */ 979f500a6d3SKevin Wolf static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, 98034b5d2c6SMax Reitz QDict *options, int flags, BlockDriver *drv, Error **errp) 98157915332SKevin Wolf { 98257915332SKevin Wolf int ret, open_flags; 983035fccdfSKevin Wolf const char *filename; 9846913c0c2SBenoît Canet const char *node_name = NULL; 98534b5d2c6SMax Reitz Error *local_err = NULL; 98657915332SKevin Wolf 98757915332SKevin Wolf assert(drv != NULL); 9886405875cSPaolo Bonzini assert(bs->file == NULL); 989707ff828SKevin Wolf assert(options != NULL && bs->options != options); 99057915332SKevin Wolf 99145673671SKevin Wolf if (file != NULL) { 99245673671SKevin Wolf filename = file->filename; 99345673671SKevin Wolf } else { 99445673671SKevin Wolf filename = qdict_get_try_str(options, "filename"); 99545673671SKevin Wolf } 99645673671SKevin Wolf 997765003dbSKevin Wolf if (drv->bdrv_needs_filename && !filename) { 998765003dbSKevin Wolf error_setg(errp, "The '%s' block driver requires a file name", 999765003dbSKevin Wolf drv->format_name); 1000765003dbSKevin Wolf return -EINVAL; 1001765003dbSKevin Wolf } 1002765003dbSKevin Wolf 100345673671SKevin Wolf trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); 100428dcee10SStefan Hajnoczi 10056913c0c2SBenoît Canet node_name = qdict_get_try_str(options, "node-name"); 1006636ea370SKevin Wolf bdrv_assign_node_name(bs, node_name, &local_err); 10070fb6395cSMarkus Armbruster if (local_err) { 1008636ea370SKevin Wolf error_propagate(errp, local_err); 1009636ea370SKevin Wolf return -EINVAL; 10106913c0c2SBenoît Canet } 10116913c0c2SBenoît Canet qdict_del(options, "node-name"); 10126913c0c2SBenoît Canet 10135d186eb0SKevin Wolf /* bdrv_open() with directly using a protocol as drv. This layer is already 10145d186eb0SKevin Wolf * opened, so assign it to bs (while file becomes a closed BlockDriverState) 10155d186eb0SKevin Wolf * and return immediately. */ 10165d186eb0SKevin Wolf if (file != NULL && drv->bdrv_file_open) { 10175d186eb0SKevin Wolf bdrv_swap(file, bs); 10185d186eb0SKevin Wolf return 0; 10195d186eb0SKevin Wolf } 10205d186eb0SKevin Wolf 102157915332SKevin Wolf bs->open_flags = flags; 10221b7fd729SPaolo Bonzini bs->guest_block_size = 512; 1023c25f53b0SPaolo Bonzini bs->request_alignment = 512; 10240d51b4deSAsias He bs->zero_beyond_eof = true; 1025b64ec4e4SFam Zheng open_flags = bdrv_open_flags(bs, flags); 1026b64ec4e4SFam Zheng bs->read_only = !(open_flags & BDRV_O_RDWR); 1027b64ec4e4SFam Zheng 1028b64ec4e4SFam Zheng if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 10298f94a6e4SKevin Wolf error_setg(errp, 10308f94a6e4SKevin Wolf !bs->read_only && bdrv_is_whitelisted(drv, true) 10318f94a6e4SKevin Wolf ? "Driver '%s' can only be used for read-only devices" 10328f94a6e4SKevin Wolf : "Driver '%s' is not whitelisted", 10338f94a6e4SKevin Wolf drv->format_name); 1034b64ec4e4SFam Zheng return -ENOTSUP; 1035b64ec4e4SFam Zheng } 103657915332SKevin Wolf 103753fec9d3SStefan Hajnoczi assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ 10380ebd24e0SKevin Wolf if (flags & BDRV_O_COPY_ON_READ) { 10390ebd24e0SKevin Wolf if (!bs->read_only) { 104053fec9d3SStefan Hajnoczi bdrv_enable_copy_on_read(bs); 10410ebd24e0SKevin Wolf } else { 10420ebd24e0SKevin Wolf error_setg(errp, "Can't use copy-on-read on read-only device"); 10430ebd24e0SKevin Wolf return -EINVAL; 10440ebd24e0SKevin Wolf } 104553fec9d3SStefan Hajnoczi } 104653fec9d3SStefan Hajnoczi 1047c2ad1b0cSKevin Wolf if (filename != NULL) { 104857915332SKevin Wolf pstrcpy(bs->filename, sizeof(bs->filename), filename); 1049c2ad1b0cSKevin Wolf } else { 1050c2ad1b0cSKevin Wolf bs->filename[0] = '\0'; 1051c2ad1b0cSKevin Wolf } 105291af7014SMax Reitz pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 105357915332SKevin Wolf 105457915332SKevin Wolf bs->drv = drv; 10557267c094SAnthony Liguori bs->opaque = g_malloc0(drv->instance_size); 105657915332SKevin Wolf 105703f541bdSStefan Hajnoczi bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); 1058e7c63796SStefan Hajnoczi 105966f82ceeSKevin Wolf /* Open the image, either directly or using a protocol */ 106066f82ceeSKevin Wolf if (drv->bdrv_file_open) { 10615d186eb0SKevin Wolf assert(file == NULL); 1062030be321SBenoît Canet assert(!drv->bdrv_needs_filename || filename != NULL); 106334b5d2c6SMax Reitz ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 1064f500a6d3SKevin Wolf } else { 10652af5ef70SKevin Wolf if (file == NULL) { 106634b5d2c6SMax Reitz error_setg(errp, "Can't use '%s' as a block driver for the " 106734b5d2c6SMax Reitz "protocol level", drv->format_name); 10682af5ef70SKevin Wolf ret = -EINVAL; 10692af5ef70SKevin Wolf goto free_and_fail; 10702af5ef70SKevin Wolf } 1071f500a6d3SKevin Wolf bs->file = file; 107234b5d2c6SMax Reitz ret = drv->bdrv_open(bs, options, open_flags, &local_err); 107366f82ceeSKevin Wolf } 107466f82ceeSKevin Wolf 107557915332SKevin Wolf if (ret < 0) { 107684d18f06SMarkus Armbruster if (local_err) { 107734b5d2c6SMax Reitz error_propagate(errp, local_err); 10782fa9aa59SDunrong Huang } else if (bs->filename[0]) { 10792fa9aa59SDunrong Huang error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 108034b5d2c6SMax Reitz } else { 108134b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not open image"); 108234b5d2c6SMax Reitz } 108357915332SKevin Wolf goto free_and_fail; 108457915332SKevin Wolf } 108557915332SKevin Wolf 1086a1f688f4SMarkus Armbruster if (bs->encrypted) { 1087a1f688f4SMarkus Armbruster error_report("Encrypted images are deprecated"); 1088a1f688f4SMarkus Armbruster error_printf("Support for them will be removed in a future release.\n" 1089a1f688f4SMarkus Armbruster "You can use 'qemu-img convert' to convert your image" 1090a1f688f4SMarkus Armbruster " to an unencrypted one.\n"); 1091a1f688f4SMarkus Armbruster } 1092a1f688f4SMarkus Armbruster 109351762288SStefan Hajnoczi ret = refresh_total_sectors(bs, bs->total_sectors); 109451762288SStefan Hajnoczi if (ret < 0) { 109534b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not refresh total sector count"); 109651762288SStefan Hajnoczi goto free_and_fail; 109757915332SKevin Wolf } 109851762288SStefan Hajnoczi 10993baca891SKevin Wolf bdrv_refresh_limits(bs, &local_err); 11003baca891SKevin Wolf if (local_err) { 11013baca891SKevin Wolf error_propagate(errp, local_err); 11023baca891SKevin Wolf ret = -EINVAL; 11033baca891SKevin Wolf goto free_and_fail; 11043baca891SKevin Wolf } 11053baca891SKevin Wolf 1106c25f53b0SPaolo Bonzini assert(bdrv_opt_mem_align(bs) != 0); 110747ea2de2SKevin Wolf assert((bs->request_alignment != 0) || bs->sg); 110857915332SKevin Wolf return 0; 110957915332SKevin Wolf 111057915332SKevin Wolf free_and_fail: 111166f82ceeSKevin Wolf bs->file = NULL; 11127267c094SAnthony Liguori g_free(bs->opaque); 111357915332SKevin Wolf bs->opaque = NULL; 111457915332SKevin Wolf bs->drv = NULL; 111557915332SKevin Wolf return ret; 111657915332SKevin Wolf } 111757915332SKevin Wolf 11185e5c4f63SKevin Wolf static QDict *parse_json_filename(const char *filename, Error **errp) 11195e5c4f63SKevin Wolf { 11205e5c4f63SKevin Wolf QObject *options_obj; 11215e5c4f63SKevin Wolf QDict *options; 11225e5c4f63SKevin Wolf int ret; 11235e5c4f63SKevin Wolf 11245e5c4f63SKevin Wolf ret = strstart(filename, "json:", &filename); 11255e5c4f63SKevin Wolf assert(ret); 11265e5c4f63SKevin Wolf 11275e5c4f63SKevin Wolf options_obj = qobject_from_json(filename); 11285e5c4f63SKevin Wolf if (!options_obj) { 11295e5c4f63SKevin Wolf error_setg(errp, "Could not parse the JSON options"); 11305e5c4f63SKevin Wolf return NULL; 11315e5c4f63SKevin Wolf } 11325e5c4f63SKevin Wolf 11335e5c4f63SKevin Wolf if (qobject_type(options_obj) != QTYPE_QDICT) { 11345e5c4f63SKevin Wolf qobject_decref(options_obj); 11355e5c4f63SKevin Wolf error_setg(errp, "Invalid JSON object given"); 11365e5c4f63SKevin Wolf return NULL; 11375e5c4f63SKevin Wolf } 11385e5c4f63SKevin Wolf 11395e5c4f63SKevin Wolf options = qobject_to_qdict(options_obj); 11405e5c4f63SKevin Wolf qdict_flatten(options); 11415e5c4f63SKevin Wolf 11425e5c4f63SKevin Wolf return options; 11435e5c4f63SKevin Wolf } 11445e5c4f63SKevin Wolf 114557915332SKevin Wolf /* 1146f54120ffSKevin Wolf * Fills in default options for opening images and converts the legacy 1147f54120ffSKevin Wolf * filename/flags pair to option QDict entries. 1148f54120ffSKevin Wolf */ 11495e5c4f63SKevin Wolf static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, 115017b005f1SKevin Wolf BlockDriver *drv, Error **errp) 1151f54120ffSKevin Wolf { 11525e5c4f63SKevin Wolf const char *filename = *pfilename; 1153f54120ffSKevin Wolf const char *drvname; 1154462f5bcfSKevin Wolf bool protocol = flags & BDRV_O_PROTOCOL; 1155f54120ffSKevin Wolf bool parse_filename = false; 1156f54120ffSKevin Wolf Error *local_err = NULL; 1157f54120ffSKevin Wolf 11585e5c4f63SKevin Wolf /* Parse json: pseudo-protocol */ 11595e5c4f63SKevin Wolf if (filename && g_str_has_prefix(filename, "json:")) { 11605e5c4f63SKevin Wolf QDict *json_options = parse_json_filename(filename, &local_err); 11615e5c4f63SKevin Wolf if (local_err) { 11625e5c4f63SKevin Wolf error_propagate(errp, local_err); 11635e5c4f63SKevin Wolf return -EINVAL; 11645e5c4f63SKevin Wolf } 11655e5c4f63SKevin Wolf 11665e5c4f63SKevin Wolf /* Options given in the filename have lower priority than options 11675e5c4f63SKevin Wolf * specified directly */ 11685e5c4f63SKevin Wolf qdict_join(*options, json_options, false); 11695e5c4f63SKevin Wolf QDECREF(json_options); 11705e5c4f63SKevin Wolf *pfilename = filename = NULL; 11715e5c4f63SKevin Wolf } 11725e5c4f63SKevin Wolf 1173f54120ffSKevin Wolf /* Fetch the file name from the options QDict if necessary */ 117417b005f1SKevin Wolf if (protocol && filename) { 1175f54120ffSKevin Wolf if (!qdict_haskey(*options, "filename")) { 1176f54120ffSKevin Wolf qdict_put(*options, "filename", qstring_from_str(filename)); 1177f54120ffSKevin Wolf parse_filename = true; 1178f54120ffSKevin Wolf } else { 1179f54120ffSKevin Wolf error_setg(errp, "Can't specify 'file' and 'filename' options at " 1180f54120ffSKevin Wolf "the same time"); 1181f54120ffSKevin Wolf return -EINVAL; 1182f54120ffSKevin Wolf } 1183f54120ffSKevin Wolf } 1184f54120ffSKevin Wolf 1185f54120ffSKevin Wolf /* Find the right block driver */ 1186f54120ffSKevin Wolf filename = qdict_get_try_str(*options, "filename"); 1187f54120ffSKevin Wolf drvname = qdict_get_try_str(*options, "driver"); 1188f54120ffSKevin Wolf 118917b005f1SKevin Wolf if (drv) { 119017b005f1SKevin Wolf if (drvname) { 119117b005f1SKevin Wolf error_setg(errp, "Driver specified twice"); 119217b005f1SKevin Wolf return -EINVAL; 119317b005f1SKevin Wolf } 119417b005f1SKevin Wolf drvname = drv->format_name; 119517b005f1SKevin Wolf qdict_put(*options, "driver", qstring_from_str(drvname)); 119617b005f1SKevin Wolf } else { 119717b005f1SKevin Wolf if (!drvname && protocol) { 1198f54120ffSKevin Wolf if (filename) { 1199b65a5e12SMax Reitz drv = bdrv_find_protocol(filename, parse_filename, errp); 1200f54120ffSKevin Wolf if (!drv) { 1201f54120ffSKevin Wolf return -EINVAL; 1202f54120ffSKevin Wolf } 1203f54120ffSKevin Wolf 1204f54120ffSKevin Wolf drvname = drv->format_name; 1205f54120ffSKevin Wolf qdict_put(*options, "driver", qstring_from_str(drvname)); 1206f54120ffSKevin Wolf } else { 1207f54120ffSKevin Wolf error_setg(errp, "Must specify either driver or file"); 1208f54120ffSKevin Wolf return -EINVAL; 1209f54120ffSKevin Wolf } 121017b005f1SKevin Wolf } else if (drvname) { 1211f54120ffSKevin Wolf drv = bdrv_find_format(drvname); 1212f54120ffSKevin Wolf if (!drv) { 1213f54120ffSKevin Wolf error_setg(errp, "Unknown driver '%s'", drvname); 1214f54120ffSKevin Wolf return -ENOENT; 1215f54120ffSKevin Wolf } 121617b005f1SKevin Wolf } 121717b005f1SKevin Wolf } 121817b005f1SKevin Wolf 121917b005f1SKevin Wolf assert(drv || !protocol); 1220f54120ffSKevin Wolf 1221f54120ffSKevin Wolf /* Driver-specific filename parsing */ 122217b005f1SKevin Wolf if (drv && drv->bdrv_parse_filename && parse_filename) { 1223f54120ffSKevin Wolf drv->bdrv_parse_filename(filename, *options, &local_err); 1224f54120ffSKevin Wolf if (local_err) { 1225f54120ffSKevin Wolf error_propagate(errp, local_err); 1226f54120ffSKevin Wolf return -EINVAL; 1227f54120ffSKevin Wolf } 1228f54120ffSKevin Wolf 1229f54120ffSKevin Wolf if (!drv->bdrv_needs_filename) { 1230f54120ffSKevin Wolf qdict_del(*options, "filename"); 1231f54120ffSKevin Wolf } 1232f54120ffSKevin Wolf } 1233f54120ffSKevin Wolf 1234f54120ffSKevin Wolf return 0; 1235f54120ffSKevin Wolf } 1236f54120ffSKevin Wolf 12378d24cce1SFam Zheng void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) 12388d24cce1SFam Zheng { 12398d24cce1SFam Zheng 1240826b6ca0SFam Zheng if (bs->backing_hd) { 1241826b6ca0SFam Zheng assert(bs->backing_blocker); 1242826b6ca0SFam Zheng bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); 1243826b6ca0SFam Zheng } else if (backing_hd) { 1244826b6ca0SFam Zheng error_setg(&bs->backing_blocker, 124581e5f78aSAlberto Garcia "node is used as backing hd of '%s'", 124681e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs)); 1247826b6ca0SFam Zheng } 1248826b6ca0SFam Zheng 12498d24cce1SFam Zheng bs->backing_hd = backing_hd; 12508d24cce1SFam Zheng if (!backing_hd) { 1251826b6ca0SFam Zheng error_free(bs->backing_blocker); 1252826b6ca0SFam Zheng bs->backing_blocker = NULL; 12538d24cce1SFam Zheng goto out; 12548d24cce1SFam Zheng } 12558d24cce1SFam Zheng bs->open_flags &= ~BDRV_O_NO_BACKING; 12568d24cce1SFam Zheng pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); 12578d24cce1SFam Zheng pstrcpy(bs->backing_format, sizeof(bs->backing_format), 12588d24cce1SFam Zheng backing_hd->drv ? backing_hd->drv->format_name : ""); 1259826b6ca0SFam Zheng 1260826b6ca0SFam Zheng bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); 1261826b6ca0SFam Zheng /* Otherwise we won't be able to commit due to check in bdrv_commit */ 1262bb00021dSFam Zheng bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1263826b6ca0SFam Zheng bs->backing_blocker); 12648d24cce1SFam Zheng out: 12653baca891SKevin Wolf bdrv_refresh_limits(bs, NULL); 12668d24cce1SFam Zheng } 12678d24cce1SFam Zheng 126831ca6d07SKevin Wolf /* 126931ca6d07SKevin Wolf * Opens the backing file for a BlockDriverState if not yet open 127031ca6d07SKevin Wolf * 127131ca6d07SKevin Wolf * options is a QDict of options to pass to the block drivers, or NULL for an 127231ca6d07SKevin Wolf * empty set of options. The reference to the QDict is transferred to this 127331ca6d07SKevin Wolf * function (even on failure), so if the caller intends to reuse the dictionary, 127431ca6d07SKevin Wolf * it needs to use QINCREF() before calling bdrv_file_open. 127531ca6d07SKevin Wolf */ 127634b5d2c6SMax Reitz int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) 12779156df12SPaolo Bonzini { 12781ba4b6a5SBenoît Canet char *backing_filename = g_malloc0(PATH_MAX); 1279317fc44eSKevin Wolf int ret = 0; 12808d24cce1SFam Zheng BlockDriverState *backing_hd; 128134b5d2c6SMax Reitz Error *local_err = NULL; 12829156df12SPaolo Bonzini 12839156df12SPaolo Bonzini if (bs->backing_hd != NULL) { 128431ca6d07SKevin Wolf QDECREF(options); 12851ba4b6a5SBenoît Canet goto free_exit; 12869156df12SPaolo Bonzini } 12879156df12SPaolo Bonzini 128831ca6d07SKevin Wolf /* NULL means an empty set of options */ 128931ca6d07SKevin Wolf if (options == NULL) { 129031ca6d07SKevin Wolf options = qdict_new(); 129131ca6d07SKevin Wolf } 129231ca6d07SKevin Wolf 12939156df12SPaolo Bonzini bs->open_flags &= ~BDRV_O_NO_BACKING; 12941cb6f506SKevin Wolf if (qdict_haskey(options, "file.filename")) { 12951cb6f506SKevin Wolf backing_filename[0] = '\0'; 12961cb6f506SKevin Wolf } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 129731ca6d07SKevin Wolf QDECREF(options); 12981ba4b6a5SBenoît Canet goto free_exit; 1299dbecebddSFam Zheng } else { 13009f07429eSMax Reitz bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, 13019f07429eSMax Reitz &local_err); 13029f07429eSMax Reitz if (local_err) { 13039f07429eSMax Reitz ret = -EINVAL; 13049f07429eSMax Reitz error_propagate(errp, local_err); 13059f07429eSMax Reitz QDECREF(options); 13069f07429eSMax Reitz goto free_exit; 13079f07429eSMax Reitz } 13089156df12SPaolo Bonzini } 13099156df12SPaolo Bonzini 13108ee79e70SKevin Wolf if (!bs->drv || !bs->drv->supports_backing) { 13118ee79e70SKevin Wolf ret = -EINVAL; 13128ee79e70SKevin Wolf error_setg(errp, "Driver doesn't support backing files"); 13138ee79e70SKevin Wolf QDECREF(options); 13148ee79e70SKevin Wolf goto free_exit; 13158ee79e70SKevin Wolf } 13168ee79e70SKevin Wolf 1317e4e9986bSMarkus Armbruster backing_hd = bdrv_new(); 13188d24cce1SFam Zheng 1319c5f6e493SKevin Wolf if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 1320c5f6e493SKevin Wolf qdict_put(options, "driver", qstring_from_str(bs->backing_format)); 13219156df12SPaolo Bonzini } 13229156df12SPaolo Bonzini 1323f67503e5SMax Reitz assert(bs->backing_hd == NULL); 13248d24cce1SFam Zheng ret = bdrv_open(&backing_hd, 1325ddf5636dSMax Reitz *backing_filename ? backing_filename : NULL, NULL, options, 1326c5f6e493SKevin Wolf bdrv_backing_flags(bs->open_flags), NULL, &local_err); 13279156df12SPaolo Bonzini if (ret < 0) { 13288d24cce1SFam Zheng bdrv_unref(backing_hd); 13298d24cce1SFam Zheng backing_hd = NULL; 13309156df12SPaolo Bonzini bs->open_flags |= BDRV_O_NO_BACKING; 1331b04b6b6eSFam Zheng error_setg(errp, "Could not open backing file: %s", 1332b04b6b6eSFam Zheng error_get_pretty(local_err)); 1333b04b6b6eSFam Zheng error_free(local_err); 13341ba4b6a5SBenoît Canet goto free_exit; 13359156df12SPaolo Bonzini } 13368d24cce1SFam Zheng bdrv_set_backing_hd(bs, backing_hd); 1337d80ac658SPeter Feiner 13381ba4b6a5SBenoît Canet free_exit: 13391ba4b6a5SBenoît Canet g_free(backing_filename); 13401ba4b6a5SBenoît Canet return ret; 13419156df12SPaolo Bonzini } 13429156df12SPaolo Bonzini 1343b6ce07aaSKevin Wolf /* 1344da557aacSMax Reitz * Opens a disk image whose options are given as BlockdevRef in another block 1345da557aacSMax Reitz * device's options. 1346da557aacSMax Reitz * 1347da557aacSMax Reitz * If allow_none is true, no image will be opened if filename is false and no 1348da557aacSMax Reitz * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. 1349da557aacSMax Reitz * 1350da557aacSMax Reitz * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 1351da557aacSMax Reitz * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 1352da557aacSMax Reitz * itself, all options starting with "${bdref_key}." are considered part of the 1353da557aacSMax Reitz * BlockdevRef. 1354da557aacSMax Reitz * 1355da557aacSMax Reitz * The BlockdevRef will be removed from the options QDict. 1356f67503e5SMax Reitz * 1357f67503e5SMax Reitz * To conform with the behavior of bdrv_open(), *pbs has to be NULL. 1358da557aacSMax Reitz */ 1359da557aacSMax Reitz int bdrv_open_image(BlockDriverState **pbs, const char *filename, 1360da557aacSMax Reitz QDict *options, const char *bdref_key, int flags, 1361f7d9fd8cSMax Reitz bool allow_none, Error **errp) 1362da557aacSMax Reitz { 1363da557aacSMax Reitz QDict *image_options; 1364da557aacSMax Reitz int ret; 1365da557aacSMax Reitz char *bdref_key_dot; 1366da557aacSMax Reitz const char *reference; 1367da557aacSMax Reitz 1368f67503e5SMax Reitz assert(pbs); 1369f67503e5SMax Reitz assert(*pbs == NULL); 1370f67503e5SMax Reitz 1371da557aacSMax Reitz bdref_key_dot = g_strdup_printf("%s.", bdref_key); 1372da557aacSMax Reitz qdict_extract_subqdict(options, &image_options, bdref_key_dot); 1373da557aacSMax Reitz g_free(bdref_key_dot); 1374da557aacSMax Reitz 1375da557aacSMax Reitz reference = qdict_get_try_str(options, bdref_key); 1376da557aacSMax Reitz if (!filename && !reference && !qdict_size(image_options)) { 1377da557aacSMax Reitz if (allow_none) { 1378da557aacSMax Reitz ret = 0; 1379da557aacSMax Reitz } else { 1380da557aacSMax Reitz error_setg(errp, "A block device must be specified for \"%s\"", 1381da557aacSMax Reitz bdref_key); 1382da557aacSMax Reitz ret = -EINVAL; 1383da557aacSMax Reitz } 1384b20e61e0SMarkus Armbruster QDECREF(image_options); 1385da557aacSMax Reitz goto done; 1386da557aacSMax Reitz } 1387da557aacSMax Reitz 1388f7d9fd8cSMax Reitz ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp); 1389da557aacSMax Reitz 1390da557aacSMax Reitz done: 1391da557aacSMax Reitz qdict_del(options, bdref_key); 1392da557aacSMax Reitz return ret; 1393da557aacSMax Reitz } 1394da557aacSMax Reitz 13956b8aeca5SChen Gang int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) 1396b998875dSKevin Wolf { 1397b998875dSKevin Wolf /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 13981ba4b6a5SBenoît Canet char *tmp_filename = g_malloc0(PATH_MAX + 1); 1399b998875dSKevin Wolf int64_t total_size; 140083d0521aSChunyan Liu QemuOpts *opts = NULL; 1401b998875dSKevin Wolf QDict *snapshot_options; 1402b998875dSKevin Wolf BlockDriverState *bs_snapshot; 1403b998875dSKevin Wolf Error *local_err; 1404b998875dSKevin Wolf int ret; 1405b998875dSKevin Wolf 1406b998875dSKevin Wolf /* if snapshot, we create a temporary backing file and open it 1407b998875dSKevin Wolf instead of opening 'filename' directly */ 1408b998875dSKevin Wolf 1409b998875dSKevin Wolf /* Get the required size from the image */ 1410f187743aSKevin Wolf total_size = bdrv_getlength(bs); 1411f187743aSKevin Wolf if (total_size < 0) { 14126b8aeca5SChen Gang ret = total_size; 1413f187743aSKevin Wolf error_setg_errno(errp, -total_size, "Could not get image size"); 14141ba4b6a5SBenoît Canet goto out; 1415f187743aSKevin Wolf } 1416b998875dSKevin Wolf 1417b998875dSKevin Wolf /* Create the temporary image */ 14181ba4b6a5SBenoît Canet ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 1419b998875dSKevin Wolf if (ret < 0) { 1420b998875dSKevin Wolf error_setg_errno(errp, -ret, "Could not get temporary filename"); 14211ba4b6a5SBenoît Canet goto out; 1422b998875dSKevin Wolf } 1423b998875dSKevin Wolf 1424ef810437SMax Reitz opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 1425c282e1fdSChunyan Liu &error_abort); 142639101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 1427ef810437SMax Reitz ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err); 142883d0521aSChunyan Liu qemu_opts_del(opts); 1429b998875dSKevin Wolf if (ret < 0) { 1430b998875dSKevin Wolf error_setg_errno(errp, -ret, "Could not create temporary overlay " 1431b998875dSKevin Wolf "'%s': %s", tmp_filename, 1432b998875dSKevin Wolf error_get_pretty(local_err)); 1433b998875dSKevin Wolf error_free(local_err); 14341ba4b6a5SBenoît Canet goto out; 1435b998875dSKevin Wolf } 1436b998875dSKevin Wolf 1437b998875dSKevin Wolf /* Prepare a new options QDict for the temporary file */ 1438b998875dSKevin Wolf snapshot_options = qdict_new(); 1439b998875dSKevin Wolf qdict_put(snapshot_options, "file.driver", 1440b998875dSKevin Wolf qstring_from_str("file")); 1441b998875dSKevin Wolf qdict_put(snapshot_options, "file.filename", 1442b998875dSKevin Wolf qstring_from_str(tmp_filename)); 1443b998875dSKevin Wolf 1444e4e9986bSMarkus Armbruster bs_snapshot = bdrv_new(); 1445b998875dSKevin Wolf 1446b998875dSKevin Wolf ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, 1447ef810437SMax Reitz flags, &bdrv_qcow2, &local_err); 1448b998875dSKevin Wolf if (ret < 0) { 1449b998875dSKevin Wolf error_propagate(errp, local_err); 14501ba4b6a5SBenoît Canet goto out; 1451b998875dSKevin Wolf } 1452b998875dSKevin Wolf 1453b998875dSKevin Wolf bdrv_append(bs_snapshot, bs); 14541ba4b6a5SBenoît Canet 14551ba4b6a5SBenoît Canet out: 14561ba4b6a5SBenoît Canet g_free(tmp_filename); 14576b8aeca5SChen Gang return ret; 1458b998875dSKevin Wolf } 1459b998875dSKevin Wolf 1460da557aacSMax Reitz /* 1461b6ce07aaSKevin Wolf * Opens a disk image (raw, qcow2, vmdk, ...) 1462de9c0cecSKevin Wolf * 1463de9c0cecSKevin Wolf * options is a QDict of options to pass to the block drivers, or NULL for an 1464de9c0cecSKevin Wolf * empty set of options. The reference to the QDict belongs to the block layer 1465de9c0cecSKevin Wolf * after the call (even on failure), so if the caller intends to reuse the 1466de9c0cecSKevin Wolf * dictionary, it needs to use QINCREF() before calling bdrv_open. 1467f67503e5SMax Reitz * 1468f67503e5SMax Reitz * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 1469f67503e5SMax Reitz * If it is not NULL, the referenced BDS will be reused. 1470ddf5636dSMax Reitz * 1471ddf5636dSMax Reitz * The reference parameter may be used to specify an existing block device which 1472ddf5636dSMax Reitz * should be opened. If specified, neither options nor a filename may be given, 1473ddf5636dSMax Reitz * nor can an existing BDS be reused (that is, *pbs has to be NULL). 1474b6ce07aaSKevin Wolf */ 1475ddf5636dSMax Reitz int bdrv_open(BlockDriverState **pbs, const char *filename, 1476ddf5636dSMax Reitz const char *reference, QDict *options, int flags, 1477ddf5636dSMax Reitz BlockDriver *drv, Error **errp) 1478ea2384d3Sbellard { 1479b6ce07aaSKevin Wolf int ret; 1480f67503e5SMax Reitz BlockDriverState *file = NULL, *bs; 148174fe54f2SKevin Wolf const char *drvname; 148234b5d2c6SMax Reitz Error *local_err = NULL; 1483b1e6fc08SKevin Wolf int snapshot_flags = 0; 148433e3963eSbellard 1485f67503e5SMax Reitz assert(pbs); 1486f67503e5SMax Reitz 1487ddf5636dSMax Reitz if (reference) { 1488ddf5636dSMax Reitz bool options_non_empty = options ? qdict_size(options) : false; 1489ddf5636dSMax Reitz QDECREF(options); 1490ddf5636dSMax Reitz 1491ddf5636dSMax Reitz if (*pbs) { 1492ddf5636dSMax Reitz error_setg(errp, "Cannot reuse an existing BDS when referencing " 1493ddf5636dSMax Reitz "another block device"); 1494ddf5636dSMax Reitz return -EINVAL; 1495ddf5636dSMax Reitz } 1496ddf5636dSMax Reitz 1497ddf5636dSMax Reitz if (filename || options_non_empty) { 1498ddf5636dSMax Reitz error_setg(errp, "Cannot reference an existing block device with " 1499ddf5636dSMax Reitz "additional options or a new filename"); 1500ddf5636dSMax Reitz return -EINVAL; 1501ddf5636dSMax Reitz } 1502ddf5636dSMax Reitz 1503ddf5636dSMax Reitz bs = bdrv_lookup_bs(reference, reference, errp); 1504ddf5636dSMax Reitz if (!bs) { 1505ddf5636dSMax Reitz return -ENODEV; 1506ddf5636dSMax Reitz } 1507ddf5636dSMax Reitz bdrv_ref(bs); 1508ddf5636dSMax Reitz *pbs = bs; 1509ddf5636dSMax Reitz return 0; 1510ddf5636dSMax Reitz } 1511ddf5636dSMax Reitz 1512f67503e5SMax Reitz if (*pbs) { 1513f67503e5SMax Reitz bs = *pbs; 1514f67503e5SMax Reitz } else { 1515e4e9986bSMarkus Armbruster bs = bdrv_new(); 1516f67503e5SMax Reitz } 1517f67503e5SMax Reitz 1518de9c0cecSKevin Wolf /* NULL means an empty set of options */ 1519de9c0cecSKevin Wolf if (options == NULL) { 1520de9c0cecSKevin Wolf options = qdict_new(); 1521de9c0cecSKevin Wolf } 1522de9c0cecSKevin Wolf 152317b005f1SKevin Wolf ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err); 1524462f5bcfSKevin Wolf if (local_err) { 1525462f5bcfSKevin Wolf goto fail; 1526462f5bcfSKevin Wolf } 1527462f5bcfSKevin Wolf 152876c591b0SKevin Wolf /* Find the right image format driver */ 152976c591b0SKevin Wolf drv = NULL; 153076c591b0SKevin Wolf drvname = qdict_get_try_str(options, "driver"); 153176c591b0SKevin Wolf if (drvname) { 153276c591b0SKevin Wolf drv = bdrv_find_format(drvname); 153376c591b0SKevin Wolf qdict_del(options, "driver"); 153476c591b0SKevin Wolf if (!drv) { 153576c591b0SKevin Wolf error_setg(errp, "Unknown driver: '%s'", drvname); 153676c591b0SKevin Wolf ret = -EINVAL; 153776c591b0SKevin Wolf goto fail; 153876c591b0SKevin Wolf } 153976c591b0SKevin Wolf } 154076c591b0SKevin Wolf 154176c591b0SKevin Wolf assert(drvname || !(flags & BDRV_O_PROTOCOL)); 154276c591b0SKevin Wolf if (drv && !drv->bdrv_file_open) { 154376c591b0SKevin Wolf /* If the user explicitly wants a format driver here, we'll need to add 154476c591b0SKevin Wolf * another layer for the protocol in bs->file */ 154576c591b0SKevin Wolf flags &= ~BDRV_O_PROTOCOL; 154676c591b0SKevin Wolf } 154776c591b0SKevin Wolf 1548de9c0cecSKevin Wolf bs->options = options; 1549b6ad491aSKevin Wolf options = qdict_clone_shallow(options); 1550de9c0cecSKevin Wolf 1551f500a6d3SKevin Wolf /* Open image file without format layer */ 1552f4788adcSKevin Wolf if ((flags & BDRV_O_PROTOCOL) == 0) { 1553be028adcSJeff Cody if (flags & BDRV_O_RDWR) { 1554be028adcSJeff Cody flags |= BDRV_O_ALLOW_RDWR; 1555be028adcSJeff Cody } 1556b1e6fc08SKevin Wolf if (flags & BDRV_O_SNAPSHOT) { 1557b1e6fc08SKevin Wolf snapshot_flags = bdrv_temp_snapshot_flags(flags); 1558b1e6fc08SKevin Wolf flags = bdrv_backing_flags(flags); 1559b1e6fc08SKevin Wolf } 1560be028adcSJeff Cody 1561f67503e5SMax Reitz assert(file == NULL); 1562054963f8SMax Reitz ret = bdrv_open_image(&file, filename, options, "file", 15630b50cc88SKevin Wolf bdrv_inherited_flags(flags), 15640b50cc88SKevin Wolf true, &local_err); 1565f500a6d3SKevin Wolf if (ret < 0) { 15668bfea15dSKevin Wolf goto fail; 1567f500a6d3SKevin Wolf } 1568f4788adcSKevin Wolf } 1569f500a6d3SKevin Wolf 157076c591b0SKevin Wolf /* Image format probing */ 157138f3ef57SKevin Wolf bs->probed = !drv; 157276c591b0SKevin Wolf if (!drv && file) { 157334b5d2c6SMax Reitz ret = find_image_format(file, filename, &drv, &local_err); 157417b005f1SKevin Wolf if (ret < 0) { 157517b005f1SKevin Wolf goto fail; 157617b005f1SKevin Wolf } 157776c591b0SKevin Wolf } else if (!drv) { 15782a05cbe4SMax Reitz error_setg(errp, "Must specify either driver or file"); 15792a05cbe4SMax Reitz ret = -EINVAL; 15808bfea15dSKevin Wolf goto fail; 15812a05cbe4SMax Reitz } 1582f500a6d3SKevin Wolf 1583b6ce07aaSKevin Wolf /* Open the image */ 158434b5d2c6SMax Reitz ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); 1585b6ce07aaSKevin Wolf if (ret < 0) { 15868bfea15dSKevin Wolf goto fail; 15876987307cSChristoph Hellwig } 15886987307cSChristoph Hellwig 15892a05cbe4SMax Reitz if (file && (bs->file != file)) { 15904f6fd349SFam Zheng bdrv_unref(file); 1591f500a6d3SKevin Wolf file = NULL; 1592f500a6d3SKevin Wolf } 1593f500a6d3SKevin Wolf 1594b6ce07aaSKevin Wolf /* If there is a backing file, use it */ 15959156df12SPaolo Bonzini if ((flags & BDRV_O_NO_BACKING) == 0) { 159631ca6d07SKevin Wolf QDict *backing_options; 159731ca6d07SKevin Wolf 15985726d872SBenoît Canet qdict_extract_subqdict(options, &backing_options, "backing."); 159934b5d2c6SMax Reitz ret = bdrv_open_backing_file(bs, backing_options, &local_err); 1600b6ce07aaSKevin Wolf if (ret < 0) { 1601b6ad491aSKevin Wolf goto close_and_fail; 1602b6ce07aaSKevin Wolf } 1603b6ce07aaSKevin Wolf } 1604b6ce07aaSKevin Wolf 160591af7014SMax Reitz bdrv_refresh_filename(bs); 160691af7014SMax Reitz 1607b998875dSKevin Wolf /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 1608b998875dSKevin Wolf * temporary snapshot afterwards. */ 1609b1e6fc08SKevin Wolf if (snapshot_flags) { 16106b8aeca5SChen Gang ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); 1611b998875dSKevin Wolf if (local_err) { 1612b998875dSKevin Wolf goto close_and_fail; 1613b998875dSKevin Wolf } 1614b998875dSKevin Wolf } 1615b998875dSKevin Wolf 1616b6ad491aSKevin Wolf /* Check if any unknown options were used */ 16175acd9d81SMax Reitz if (options && (qdict_size(options) != 0)) { 1618b6ad491aSKevin Wolf const QDictEntry *entry = qdict_first(options); 16195acd9d81SMax Reitz if (flags & BDRV_O_PROTOCOL) { 16205acd9d81SMax Reitz error_setg(errp, "Block protocol '%s' doesn't support the option " 16215acd9d81SMax Reitz "'%s'", drv->format_name, entry->key); 16225acd9d81SMax Reitz } else { 162334b5d2c6SMax Reitz error_setg(errp, "Block format '%s' used by device '%s' doesn't " 16245acd9d81SMax Reitz "support the option '%s'", drv->format_name, 1625bfb197e0SMarkus Armbruster bdrv_get_device_name(bs), entry->key); 16265acd9d81SMax Reitz } 1627b6ad491aSKevin Wolf 1628b6ad491aSKevin Wolf ret = -EINVAL; 1629b6ad491aSKevin Wolf goto close_and_fail; 1630b6ad491aSKevin Wolf } 1631b6ad491aSKevin Wolf 1632b6ce07aaSKevin Wolf if (!bdrv_key_required(bs)) { 1633a7f53e26SMarkus Armbruster if (bs->blk) { 1634a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, true); 1635a7f53e26SMarkus Armbruster } 1636c3adb58fSMarkus Armbruster } else if (!runstate_check(RUN_STATE_PRELAUNCH) 1637c3adb58fSMarkus Armbruster && !runstate_check(RUN_STATE_INMIGRATE) 1638c3adb58fSMarkus Armbruster && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */ 1639c3adb58fSMarkus Armbruster error_setg(errp, 1640c3adb58fSMarkus Armbruster "Guest must be stopped for opening of encrypted image"); 1641c3adb58fSMarkus Armbruster ret = -EBUSY; 1642c3adb58fSMarkus Armbruster goto close_and_fail; 1643b6ce07aaSKevin Wolf } 1644b6ce07aaSKevin Wolf 1645c3adb58fSMarkus Armbruster QDECREF(options); 1646f67503e5SMax Reitz *pbs = bs; 1647b6ce07aaSKevin Wolf return 0; 1648b6ce07aaSKevin Wolf 16498bfea15dSKevin Wolf fail: 1650f500a6d3SKevin Wolf if (file != NULL) { 16514f6fd349SFam Zheng bdrv_unref(file); 1652f500a6d3SKevin Wolf } 1653de9c0cecSKevin Wolf QDECREF(bs->options); 1654b6ad491aSKevin Wolf QDECREF(options); 1655de9c0cecSKevin Wolf bs->options = NULL; 1656f67503e5SMax Reitz if (!*pbs) { 1657f67503e5SMax Reitz /* If *pbs is NULL, a new BDS has been created in this function and 1658f67503e5SMax Reitz needs to be freed now. Otherwise, it does not need to be closed, 1659f67503e5SMax Reitz since it has not really been opened yet. */ 1660f67503e5SMax Reitz bdrv_unref(bs); 1661f67503e5SMax Reitz } 166284d18f06SMarkus Armbruster if (local_err) { 166334b5d2c6SMax Reitz error_propagate(errp, local_err); 166434b5d2c6SMax Reitz } 1665b6ad491aSKevin Wolf return ret; 1666de9c0cecSKevin Wolf 1667b6ad491aSKevin Wolf close_and_fail: 1668f67503e5SMax Reitz /* See fail path, but now the BDS has to be always closed */ 1669f67503e5SMax Reitz if (*pbs) { 1670b6ad491aSKevin Wolf bdrv_close(bs); 1671f67503e5SMax Reitz } else { 1672f67503e5SMax Reitz bdrv_unref(bs); 1673f67503e5SMax Reitz } 1674b6ad491aSKevin Wolf QDECREF(options); 167584d18f06SMarkus Armbruster if (local_err) { 167634b5d2c6SMax Reitz error_propagate(errp, local_err); 167734b5d2c6SMax Reitz } 1678b6ce07aaSKevin Wolf return ret; 1679b6ce07aaSKevin Wolf } 1680b6ce07aaSKevin Wolf 1681e971aa12SJeff Cody typedef struct BlockReopenQueueEntry { 1682e971aa12SJeff Cody bool prepared; 1683e971aa12SJeff Cody BDRVReopenState state; 1684e971aa12SJeff Cody QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1685e971aa12SJeff Cody } BlockReopenQueueEntry; 1686e971aa12SJeff Cody 1687e971aa12SJeff Cody /* 1688e971aa12SJeff Cody * Adds a BlockDriverState to a simple queue for an atomic, transactional 1689e971aa12SJeff Cody * reopen of multiple devices. 1690e971aa12SJeff Cody * 1691e971aa12SJeff Cody * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 1692e971aa12SJeff Cody * already performed, or alternatively may be NULL a new BlockReopenQueue will 1693e971aa12SJeff Cody * be created and initialized. This newly created BlockReopenQueue should be 1694e971aa12SJeff Cody * passed back in for subsequent calls that are intended to be of the same 1695e971aa12SJeff Cody * atomic 'set'. 1696e971aa12SJeff Cody * 1697e971aa12SJeff Cody * bs is the BlockDriverState to add to the reopen queue. 1698e971aa12SJeff Cody * 1699e971aa12SJeff Cody * flags contains the open flags for the associated bs 1700e971aa12SJeff Cody * 1701e971aa12SJeff Cody * returns a pointer to bs_queue, which is either the newly allocated 1702e971aa12SJeff Cody * bs_queue, or the existing bs_queue being used. 1703e971aa12SJeff Cody * 1704e971aa12SJeff Cody */ 1705e971aa12SJeff Cody BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 1706e971aa12SJeff Cody BlockDriverState *bs, int flags) 1707e971aa12SJeff Cody { 1708e971aa12SJeff Cody assert(bs != NULL); 1709e971aa12SJeff Cody 1710e971aa12SJeff Cody BlockReopenQueueEntry *bs_entry; 1711e971aa12SJeff Cody if (bs_queue == NULL) { 1712e971aa12SJeff Cody bs_queue = g_new0(BlockReopenQueue, 1); 1713e971aa12SJeff Cody QSIMPLEQ_INIT(bs_queue); 1714e971aa12SJeff Cody } 1715e971aa12SJeff Cody 1716f1f25a2eSKevin Wolf /* bdrv_open() masks this flag out */ 1717f1f25a2eSKevin Wolf flags &= ~BDRV_O_PROTOCOL; 1718f1f25a2eSKevin Wolf 1719e971aa12SJeff Cody if (bs->file) { 1720f1f25a2eSKevin Wolf bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); 1721e971aa12SJeff Cody } 1722e971aa12SJeff Cody 1723e971aa12SJeff Cody bs_entry = g_new0(BlockReopenQueueEntry, 1); 1724e971aa12SJeff Cody QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 1725e971aa12SJeff Cody 1726e971aa12SJeff Cody bs_entry->state.bs = bs; 1727e971aa12SJeff Cody bs_entry->state.flags = flags; 1728e971aa12SJeff Cody 1729e971aa12SJeff Cody return bs_queue; 1730e971aa12SJeff Cody } 1731e971aa12SJeff Cody 1732e971aa12SJeff Cody /* 1733e971aa12SJeff Cody * Reopen multiple BlockDriverStates atomically & transactionally. 1734e971aa12SJeff Cody * 1735e971aa12SJeff Cody * The queue passed in (bs_queue) must have been built up previous 1736e971aa12SJeff Cody * via bdrv_reopen_queue(). 1737e971aa12SJeff Cody * 1738e971aa12SJeff Cody * Reopens all BDS specified in the queue, with the appropriate 1739e971aa12SJeff Cody * flags. All devices are prepared for reopen, and failure of any 1740e971aa12SJeff Cody * device will cause all device changes to be abandonded, and intermediate 1741e971aa12SJeff Cody * data cleaned up. 1742e971aa12SJeff Cody * 1743e971aa12SJeff Cody * If all devices prepare successfully, then the changes are committed 1744e971aa12SJeff Cody * to all devices. 1745e971aa12SJeff Cody * 1746e971aa12SJeff Cody */ 1747e971aa12SJeff Cody int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 1748e971aa12SJeff Cody { 1749e971aa12SJeff Cody int ret = -1; 1750e971aa12SJeff Cody BlockReopenQueueEntry *bs_entry, *next; 1751e971aa12SJeff Cody Error *local_err = NULL; 1752e971aa12SJeff Cody 1753e971aa12SJeff Cody assert(bs_queue != NULL); 1754e971aa12SJeff Cody 1755e971aa12SJeff Cody bdrv_drain_all(); 1756e971aa12SJeff Cody 1757e971aa12SJeff Cody QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1758e971aa12SJeff Cody if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 1759e971aa12SJeff Cody error_propagate(errp, local_err); 1760e971aa12SJeff Cody goto cleanup; 1761e971aa12SJeff Cody } 1762e971aa12SJeff Cody bs_entry->prepared = true; 1763e971aa12SJeff Cody } 1764e971aa12SJeff Cody 1765e971aa12SJeff Cody /* If we reach this point, we have success and just need to apply the 1766e971aa12SJeff Cody * changes 1767e971aa12SJeff Cody */ 1768e971aa12SJeff Cody QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1769e971aa12SJeff Cody bdrv_reopen_commit(&bs_entry->state); 1770e971aa12SJeff Cody } 1771e971aa12SJeff Cody 1772e971aa12SJeff Cody ret = 0; 1773e971aa12SJeff Cody 1774e971aa12SJeff Cody cleanup: 1775e971aa12SJeff Cody QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 1776e971aa12SJeff Cody if (ret && bs_entry->prepared) { 1777e971aa12SJeff Cody bdrv_reopen_abort(&bs_entry->state); 1778e971aa12SJeff Cody } 1779e971aa12SJeff Cody g_free(bs_entry); 1780e971aa12SJeff Cody } 1781e971aa12SJeff Cody g_free(bs_queue); 1782e971aa12SJeff Cody return ret; 1783e971aa12SJeff Cody } 1784e971aa12SJeff Cody 1785e971aa12SJeff Cody 1786e971aa12SJeff Cody /* Reopen a single BlockDriverState with the specified flags. */ 1787e971aa12SJeff Cody int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 1788e971aa12SJeff Cody { 1789e971aa12SJeff Cody int ret = -1; 1790e971aa12SJeff Cody Error *local_err = NULL; 1791e971aa12SJeff Cody BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); 1792e971aa12SJeff Cody 1793e971aa12SJeff Cody ret = bdrv_reopen_multiple(queue, &local_err); 1794e971aa12SJeff Cody if (local_err != NULL) { 1795e971aa12SJeff Cody error_propagate(errp, local_err); 1796e971aa12SJeff Cody } 1797e971aa12SJeff Cody return ret; 1798e971aa12SJeff Cody } 1799e971aa12SJeff Cody 1800e971aa12SJeff Cody 1801e971aa12SJeff Cody /* 1802e971aa12SJeff Cody * Prepares a BlockDriverState for reopen. All changes are staged in the 1803e971aa12SJeff Cody * 'opaque' field of the BDRVReopenState, which is used and allocated by 1804e971aa12SJeff Cody * the block driver layer .bdrv_reopen_prepare() 1805e971aa12SJeff Cody * 1806e971aa12SJeff Cody * bs is the BlockDriverState to reopen 1807e971aa12SJeff Cody * flags are the new open flags 1808e971aa12SJeff Cody * queue is the reopen queue 1809e971aa12SJeff Cody * 1810e971aa12SJeff Cody * Returns 0 on success, non-zero on error. On error errp will be set 1811e971aa12SJeff Cody * as well. 1812e971aa12SJeff Cody * 1813e971aa12SJeff Cody * On failure, bdrv_reopen_abort() will be called to clean up any data. 1814e971aa12SJeff Cody * It is the responsibility of the caller to then call the abort() or 1815e971aa12SJeff Cody * commit() for any other BDS that have been left in a prepare() state 1816e971aa12SJeff Cody * 1817e971aa12SJeff Cody */ 1818e971aa12SJeff Cody int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 1819e971aa12SJeff Cody Error **errp) 1820e971aa12SJeff Cody { 1821e971aa12SJeff Cody int ret = -1; 1822e971aa12SJeff Cody Error *local_err = NULL; 1823e971aa12SJeff Cody BlockDriver *drv; 1824e971aa12SJeff Cody 1825e971aa12SJeff Cody assert(reopen_state != NULL); 1826e971aa12SJeff Cody assert(reopen_state->bs->drv != NULL); 1827e971aa12SJeff Cody drv = reopen_state->bs->drv; 1828e971aa12SJeff Cody 1829e971aa12SJeff Cody /* if we are to stay read-only, do not allow permission change 1830e971aa12SJeff Cody * to r/w */ 1831e971aa12SJeff Cody if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && 1832e971aa12SJeff Cody reopen_state->flags & BDRV_O_RDWR) { 183381e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is read only", 183481e5f78aSAlberto Garcia bdrv_get_device_or_node_name(reopen_state->bs)); 1835e971aa12SJeff Cody goto error; 1836e971aa12SJeff Cody } 1837e971aa12SJeff Cody 1838e971aa12SJeff Cody 1839e971aa12SJeff Cody ret = bdrv_flush(reopen_state->bs); 1840e971aa12SJeff Cody if (ret) { 1841e971aa12SJeff Cody error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", 1842e971aa12SJeff Cody strerror(-ret)); 1843e971aa12SJeff Cody goto error; 1844e971aa12SJeff Cody } 1845e971aa12SJeff Cody 1846e971aa12SJeff Cody if (drv->bdrv_reopen_prepare) { 1847e971aa12SJeff Cody ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 1848e971aa12SJeff Cody if (ret) { 1849e971aa12SJeff Cody if (local_err != NULL) { 1850e971aa12SJeff Cody error_propagate(errp, local_err); 1851e971aa12SJeff Cody } else { 1852d8b6895fSLuiz Capitulino error_setg(errp, "failed while preparing to reopen image '%s'", 1853e971aa12SJeff Cody reopen_state->bs->filename); 1854e971aa12SJeff Cody } 1855e971aa12SJeff Cody goto error; 1856e971aa12SJeff Cody } 1857e971aa12SJeff Cody } else { 1858e971aa12SJeff Cody /* It is currently mandatory to have a bdrv_reopen_prepare() 1859e971aa12SJeff Cody * handler for each supported drv. */ 186081e5f78aSAlberto Garcia error_setg(errp, "Block format '%s' used by node '%s' " 186181e5f78aSAlberto Garcia "does not support reopening files", drv->format_name, 186281e5f78aSAlberto Garcia bdrv_get_device_or_node_name(reopen_state->bs)); 1863e971aa12SJeff Cody ret = -1; 1864e971aa12SJeff Cody goto error; 1865e971aa12SJeff Cody } 1866e971aa12SJeff Cody 1867e971aa12SJeff Cody ret = 0; 1868e971aa12SJeff Cody 1869e971aa12SJeff Cody error: 1870e971aa12SJeff Cody return ret; 1871e971aa12SJeff Cody } 1872e971aa12SJeff Cody 1873e971aa12SJeff Cody /* 1874e971aa12SJeff Cody * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 1875e971aa12SJeff Cody * makes them final by swapping the staging BlockDriverState contents into 1876e971aa12SJeff Cody * the active BlockDriverState contents. 1877e971aa12SJeff Cody */ 1878e971aa12SJeff Cody void bdrv_reopen_commit(BDRVReopenState *reopen_state) 1879e971aa12SJeff Cody { 1880e971aa12SJeff Cody BlockDriver *drv; 1881e971aa12SJeff Cody 1882e971aa12SJeff Cody assert(reopen_state != NULL); 1883e971aa12SJeff Cody drv = reopen_state->bs->drv; 1884e971aa12SJeff Cody assert(drv != NULL); 1885e971aa12SJeff Cody 1886e971aa12SJeff Cody /* If there are any driver level actions to take */ 1887e971aa12SJeff Cody if (drv->bdrv_reopen_commit) { 1888e971aa12SJeff Cody drv->bdrv_reopen_commit(reopen_state); 1889e971aa12SJeff Cody } 1890e971aa12SJeff Cody 1891e971aa12SJeff Cody /* set BDS specific flags now */ 1892e971aa12SJeff Cody reopen_state->bs->open_flags = reopen_state->flags; 1893e971aa12SJeff Cody reopen_state->bs->enable_write_cache = !!(reopen_state->flags & 1894e971aa12SJeff Cody BDRV_O_CACHE_WB); 1895e971aa12SJeff Cody reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 1896355ef4acSKevin Wolf 18973baca891SKevin Wolf bdrv_refresh_limits(reopen_state->bs, NULL); 1898e971aa12SJeff Cody } 1899e971aa12SJeff Cody 1900e971aa12SJeff Cody /* 1901e971aa12SJeff Cody * Abort the reopen, and delete and free the staged changes in 1902e971aa12SJeff Cody * reopen_state 1903e971aa12SJeff Cody */ 1904e971aa12SJeff Cody void bdrv_reopen_abort(BDRVReopenState *reopen_state) 1905e971aa12SJeff Cody { 1906e971aa12SJeff Cody BlockDriver *drv; 1907e971aa12SJeff Cody 1908e971aa12SJeff Cody assert(reopen_state != NULL); 1909e971aa12SJeff Cody drv = reopen_state->bs->drv; 1910e971aa12SJeff Cody assert(drv != NULL); 1911e971aa12SJeff Cody 1912e971aa12SJeff Cody if (drv->bdrv_reopen_abort) { 1913e971aa12SJeff Cody drv->bdrv_reopen_abort(reopen_state); 1914e971aa12SJeff Cody } 1915e971aa12SJeff Cody } 1916e971aa12SJeff Cody 1917e971aa12SJeff Cody 1918fc01f7e7Sbellard void bdrv_close(BlockDriverState *bs) 1919fc01f7e7Sbellard { 192033384421SMax Reitz BdrvAioNotifier *ban, *ban_next; 192133384421SMax Reitz 19223e914655SPaolo Bonzini if (bs->job) { 19233e914655SPaolo Bonzini block_job_cancel_sync(bs->job); 19243e914655SPaolo Bonzini } 192558fda173SStefan Hajnoczi bdrv_drain_all(); /* complete I/O */ 192658fda173SStefan Hajnoczi bdrv_flush(bs); 192758fda173SStefan Hajnoczi bdrv_drain_all(); /* in case flush left pending I/O */ 1928d7d512f6SPaolo Bonzini notifier_list_notify(&bs->close_notifiers, bs); 19297094f12fSKevin Wolf 19303cbc002cSPaolo Bonzini if (bs->drv) { 1931557df6acSStefan Hajnoczi if (bs->backing_hd) { 1932826b6ca0SFam Zheng BlockDriverState *backing_hd = bs->backing_hd; 1933826b6ca0SFam Zheng bdrv_set_backing_hd(bs, NULL); 1934826b6ca0SFam Zheng bdrv_unref(backing_hd); 1935557df6acSStefan Hajnoczi } 1936ea2384d3Sbellard bs->drv->bdrv_close(bs); 19377267c094SAnthony Liguori g_free(bs->opaque); 1938ea2384d3Sbellard bs->opaque = NULL; 1939ea2384d3Sbellard bs->drv = NULL; 194053fec9d3SStefan Hajnoczi bs->copy_on_read = 0; 1941a275fa42SPaolo Bonzini bs->backing_file[0] = '\0'; 1942a275fa42SPaolo Bonzini bs->backing_format[0] = '\0'; 19436405875cSPaolo Bonzini bs->total_sectors = 0; 19446405875cSPaolo Bonzini bs->encrypted = 0; 19456405875cSPaolo Bonzini bs->valid_key = 0; 19466405875cSPaolo Bonzini bs->sg = 0; 19470d51b4deSAsias He bs->zero_beyond_eof = false; 1948de9c0cecSKevin Wolf QDECREF(bs->options); 1949de9c0cecSKevin Wolf bs->options = NULL; 195091af7014SMax Reitz QDECREF(bs->full_open_options); 195191af7014SMax Reitz bs->full_open_options = NULL; 1952b338082bSbellard 195366f82ceeSKevin Wolf if (bs->file != NULL) { 19544f6fd349SFam Zheng bdrv_unref(bs->file); 19550ac9377dSPaolo Bonzini bs->file = NULL; 195666f82ceeSKevin Wolf } 19579ca11154SPavel Hrdina } 195866f82ceeSKevin Wolf 1959a7f53e26SMarkus Armbruster if (bs->blk) { 1960a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, false); 1961a7f53e26SMarkus Armbruster } 196298f90dbaSZhi Yong Wu 196398f90dbaSZhi Yong Wu /*throttling disk I/O limits*/ 196498f90dbaSZhi Yong Wu if (bs->io_limits_enabled) { 196598f90dbaSZhi Yong Wu bdrv_io_limits_disable(bs); 196698f90dbaSZhi Yong Wu } 196733384421SMax Reitz 196833384421SMax Reitz QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 196933384421SMax Reitz g_free(ban); 197033384421SMax Reitz } 197133384421SMax Reitz QLIST_INIT(&bs->aio_notifiers); 1972b338082bSbellard } 1973b338082bSbellard 19742bc93fedSMORITA Kazutaka void bdrv_close_all(void) 19752bc93fedSMORITA Kazutaka { 19762bc93fedSMORITA Kazutaka BlockDriverState *bs; 19772bc93fedSMORITA Kazutaka 1978dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 1979ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 1980ed78cda3SStefan Hajnoczi 1981ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 19822bc93fedSMORITA Kazutaka bdrv_close(bs); 1983ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 19842bc93fedSMORITA Kazutaka } 19852bc93fedSMORITA Kazutaka } 19862bc93fedSMORITA Kazutaka 198788266f5aSStefan Hajnoczi /* Check if any requests are in-flight (including throttled requests) */ 198888266f5aSStefan Hajnoczi static bool bdrv_requests_pending(BlockDriverState *bs) 198988266f5aSStefan Hajnoczi { 199088266f5aSStefan Hajnoczi if (!QLIST_EMPTY(&bs->tracked_requests)) { 199188266f5aSStefan Hajnoczi return true; 199288266f5aSStefan Hajnoczi } 1993cc0681c4SBenoît Canet if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { 1994cc0681c4SBenoît Canet return true; 1995cc0681c4SBenoît Canet } 1996cc0681c4SBenoît Canet if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { 199788266f5aSStefan Hajnoczi return true; 199888266f5aSStefan Hajnoczi } 199988266f5aSStefan Hajnoczi if (bs->file && bdrv_requests_pending(bs->file)) { 200088266f5aSStefan Hajnoczi return true; 200188266f5aSStefan Hajnoczi } 200288266f5aSStefan Hajnoczi if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) { 200388266f5aSStefan Hajnoczi return true; 200488266f5aSStefan Hajnoczi } 200588266f5aSStefan Hajnoczi return false; 200688266f5aSStefan Hajnoczi } 200788266f5aSStefan Hajnoczi 20085b98db0aSStefan Hajnoczi static bool bdrv_drain_one(BlockDriverState *bs) 20095b98db0aSStefan Hajnoczi { 20105b98db0aSStefan Hajnoczi bool bs_busy; 20115b98db0aSStefan Hajnoczi 20125b98db0aSStefan Hajnoczi bdrv_flush_io_queue(bs); 20135b98db0aSStefan Hajnoczi bdrv_start_throttled_reqs(bs); 20145b98db0aSStefan Hajnoczi bs_busy = bdrv_requests_pending(bs); 20155b98db0aSStefan Hajnoczi bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy); 20165b98db0aSStefan Hajnoczi return bs_busy; 20175b98db0aSStefan Hajnoczi } 20185b98db0aSStefan Hajnoczi 20195b98db0aSStefan Hajnoczi /* 20205b98db0aSStefan Hajnoczi * Wait for pending requests to complete on a single BlockDriverState subtree 20215b98db0aSStefan Hajnoczi * 20225b98db0aSStefan Hajnoczi * See the warning in bdrv_drain_all(). This function can only be called if 20235b98db0aSStefan Hajnoczi * you are sure nothing can generate I/O because you have op blockers 20245b98db0aSStefan Hajnoczi * installed. 20255b98db0aSStefan Hajnoczi * 20265b98db0aSStefan Hajnoczi * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState 20275b98db0aSStefan Hajnoczi * AioContext. 20285b98db0aSStefan Hajnoczi */ 20295b98db0aSStefan Hajnoczi void bdrv_drain(BlockDriverState *bs) 20305b98db0aSStefan Hajnoczi { 20315b98db0aSStefan Hajnoczi while (bdrv_drain_one(bs)) { 20325b98db0aSStefan Hajnoczi /* Keep iterating */ 20335b98db0aSStefan Hajnoczi } 20345b98db0aSStefan Hajnoczi } 20355b98db0aSStefan Hajnoczi 2036922453bcSStefan Hajnoczi /* 2037922453bcSStefan Hajnoczi * Wait for pending requests to complete across all BlockDriverStates 2038922453bcSStefan Hajnoczi * 2039922453bcSStefan Hajnoczi * This function does not flush data to disk, use bdrv_flush_all() for that 2040922453bcSStefan Hajnoczi * after calling this function. 20414c355d53SZhi Yong Wu * 20424c355d53SZhi Yong Wu * Note that completion of an asynchronous I/O operation can trigger any 20434c355d53SZhi Yong Wu * number of other I/O operations on other devices---for example a coroutine 20444c355d53SZhi Yong Wu * can be arbitrarily complex and a constant flow of I/O can come until the 20454c355d53SZhi Yong Wu * coroutine is complete. Because of this, it is not possible to have a 20464c355d53SZhi Yong Wu * function to drain a single device's I/O queue. 2047922453bcSStefan Hajnoczi */ 2048922453bcSStefan Hajnoczi void bdrv_drain_all(void) 2049922453bcSStefan Hajnoczi { 205088266f5aSStefan Hajnoczi /* Always run first iteration so any pending completion BHs run */ 205188266f5aSStefan Hajnoczi bool busy = true; 2052922453bcSStefan Hajnoczi BlockDriverState *bs; 2053922453bcSStefan Hajnoczi 205469da3b0bSFam Zheng QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 205569da3b0bSFam Zheng AioContext *aio_context = bdrv_get_aio_context(bs); 205669da3b0bSFam Zheng 205769da3b0bSFam Zheng aio_context_acquire(aio_context); 205869da3b0bSFam Zheng if (bs->job) { 205969da3b0bSFam Zheng block_job_pause(bs->job); 206069da3b0bSFam Zheng } 206169da3b0bSFam Zheng aio_context_release(aio_context); 206269da3b0bSFam Zheng } 206369da3b0bSFam Zheng 206488266f5aSStefan Hajnoczi while (busy) { 20659b536adcSStefan Hajnoczi busy = false; 2066922453bcSStefan Hajnoczi 20679b536adcSStefan Hajnoczi QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 20689b536adcSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 20699b536adcSStefan Hajnoczi 20709b536adcSStefan Hajnoczi aio_context_acquire(aio_context); 20715b98db0aSStefan Hajnoczi busy |= bdrv_drain_one(bs); 20729b536adcSStefan Hajnoczi aio_context_release(aio_context); 20739b536adcSStefan Hajnoczi } 2074922453bcSStefan Hajnoczi } 207569da3b0bSFam Zheng 207669da3b0bSFam Zheng QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 207769da3b0bSFam Zheng AioContext *aio_context = bdrv_get_aio_context(bs); 207869da3b0bSFam Zheng 207969da3b0bSFam Zheng aio_context_acquire(aio_context); 208069da3b0bSFam Zheng if (bs->job) { 208169da3b0bSFam Zheng block_job_resume(bs->job); 208269da3b0bSFam Zheng } 208369da3b0bSFam Zheng aio_context_release(aio_context); 208469da3b0bSFam Zheng } 2085922453bcSStefan Hajnoczi } 2086922453bcSStefan Hajnoczi 2087dc364f4cSBenoît Canet /* make a BlockDriverState anonymous by removing from bdrv_state and 2088dc364f4cSBenoît Canet * graph_bdrv_state list. 2089d22b2f41SRyan Harper Also, NULL terminate the device_name to prevent double remove */ 2090d22b2f41SRyan Harper void bdrv_make_anon(BlockDriverState *bs) 2091d22b2f41SRyan Harper { 2092bfb197e0SMarkus Armbruster /* 2093bfb197e0SMarkus Armbruster * Take care to remove bs from bdrv_states only when it's actually 2094bfb197e0SMarkus Armbruster * in it. Note that bs->device_list.tqe_prev is initially null, 2095bfb197e0SMarkus Armbruster * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish 2096bfb197e0SMarkus Armbruster * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by 2097bfb197e0SMarkus Armbruster * resetting it to null on remove. 2098bfb197e0SMarkus Armbruster */ 2099bfb197e0SMarkus Armbruster if (bs->device_list.tqe_prev) { 2100dc364f4cSBenoît Canet QTAILQ_REMOVE(&bdrv_states, bs, device_list); 2101bfb197e0SMarkus Armbruster bs->device_list.tqe_prev = NULL; 2102d22b2f41SRyan Harper } 2103dc364f4cSBenoît Canet if (bs->node_name[0] != '\0') { 2104dc364f4cSBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 2105dc364f4cSBenoît Canet } 2106dc364f4cSBenoît Canet bs->node_name[0] = '\0'; 2107d22b2f41SRyan Harper } 2108d22b2f41SRyan Harper 2109e023b2e2SPaolo Bonzini static void bdrv_rebind(BlockDriverState *bs) 2110e023b2e2SPaolo Bonzini { 2111e023b2e2SPaolo Bonzini if (bs->drv && bs->drv->bdrv_rebind) { 2112e023b2e2SPaolo Bonzini bs->drv->bdrv_rebind(bs); 2113e023b2e2SPaolo Bonzini } 2114e023b2e2SPaolo Bonzini } 2115e023b2e2SPaolo Bonzini 21164ddc07caSPaolo Bonzini static void bdrv_move_feature_fields(BlockDriverState *bs_dest, 21174ddc07caSPaolo Bonzini BlockDriverState *bs_src) 21184ddc07caSPaolo Bonzini { 21194ddc07caSPaolo Bonzini /* move some fields that need to stay attached to the device */ 21204ddc07caSPaolo Bonzini 21214ddc07caSPaolo Bonzini /* dev info */ 21221b7fd729SPaolo Bonzini bs_dest->guest_block_size = bs_src->guest_block_size; 21234ddc07caSPaolo Bonzini bs_dest->copy_on_read = bs_src->copy_on_read; 21244ddc07caSPaolo Bonzini 21254ddc07caSPaolo Bonzini bs_dest->enable_write_cache = bs_src->enable_write_cache; 21264ddc07caSPaolo Bonzini 2127cc0681c4SBenoît Canet /* i/o throttled req */ 2128cc0681c4SBenoît Canet memcpy(&bs_dest->throttle_state, 2129cc0681c4SBenoît Canet &bs_src->throttle_state, 2130cc0681c4SBenoît Canet sizeof(ThrottleState)); 2131cc0681c4SBenoît Canet bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; 2132cc0681c4SBenoît Canet bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; 21334ddc07caSPaolo Bonzini bs_dest->io_limits_enabled = bs_src->io_limits_enabled; 21344ddc07caSPaolo Bonzini 21354ddc07caSPaolo Bonzini /* r/w error */ 21364ddc07caSPaolo Bonzini bs_dest->on_read_error = bs_src->on_read_error; 21374ddc07caSPaolo Bonzini bs_dest->on_write_error = bs_src->on_write_error; 21384ddc07caSPaolo Bonzini 21394ddc07caSPaolo Bonzini /* i/o status */ 21404ddc07caSPaolo Bonzini bs_dest->iostatus_enabled = bs_src->iostatus_enabled; 21414ddc07caSPaolo Bonzini bs_dest->iostatus = bs_src->iostatus; 21424ddc07caSPaolo Bonzini 21434ddc07caSPaolo Bonzini /* dirty bitmap */ 2144e4654d2dSFam Zheng bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps; 21454ddc07caSPaolo Bonzini 21469fcb0251SFam Zheng /* reference count */ 21479fcb0251SFam Zheng bs_dest->refcnt = bs_src->refcnt; 21489fcb0251SFam Zheng 21494ddc07caSPaolo Bonzini /* job */ 21504ddc07caSPaolo Bonzini bs_dest->job = bs_src->job; 21514ddc07caSPaolo Bonzini 21524ddc07caSPaolo Bonzini /* keep the same entry in bdrv_states */ 2153dc364f4cSBenoît Canet bs_dest->device_list = bs_src->device_list; 21547e7d56d9SMarkus Armbruster bs_dest->blk = bs_src->blk; 21557e7d56d9SMarkus Armbruster 2156fbe40ff7SFam Zheng memcpy(bs_dest->op_blockers, bs_src->op_blockers, 2157fbe40ff7SFam Zheng sizeof(bs_dest->op_blockers)); 21584ddc07caSPaolo Bonzini } 21594ddc07caSPaolo Bonzini 21604ddc07caSPaolo Bonzini /* 21614ddc07caSPaolo Bonzini * Swap bs contents for two image chains while they are live, 21624ddc07caSPaolo Bonzini * while keeping required fields on the BlockDriverState that is 21634ddc07caSPaolo Bonzini * actually attached to a device. 21644ddc07caSPaolo Bonzini * 21654ddc07caSPaolo Bonzini * This will modify the BlockDriverState fields, and swap contents 21664ddc07caSPaolo Bonzini * between bs_new and bs_old. Both bs_new and bs_old are modified. 21674ddc07caSPaolo Bonzini * 2168bfb197e0SMarkus Armbruster * bs_new must not be attached to a BlockBackend. 21694ddc07caSPaolo Bonzini * 21704ddc07caSPaolo Bonzini * This function does not create any image files. 21714ddc07caSPaolo Bonzini */ 21724ddc07caSPaolo Bonzini void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) 21734ddc07caSPaolo Bonzini { 21744ddc07caSPaolo Bonzini BlockDriverState tmp; 21754ddc07caSPaolo Bonzini 217690ce8a06SBenoît Canet /* The code needs to swap the node_name but simply swapping node_list won't 217790ce8a06SBenoît Canet * work so first remove the nodes from the graph list, do the swap then 217890ce8a06SBenoît Canet * insert them back if needed. 217990ce8a06SBenoît Canet */ 218090ce8a06SBenoît Canet if (bs_new->node_name[0] != '\0') { 218190ce8a06SBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list); 218290ce8a06SBenoît Canet } 218390ce8a06SBenoît Canet if (bs_old->node_name[0] != '\0') { 218490ce8a06SBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list); 218590ce8a06SBenoît Canet } 218690ce8a06SBenoît Canet 2187bfb197e0SMarkus Armbruster /* bs_new must be unattached and shouldn't have anything fancy enabled */ 21887e7d56d9SMarkus Armbruster assert(!bs_new->blk); 2189e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); 21904ddc07caSPaolo Bonzini assert(bs_new->job == NULL); 21914ddc07caSPaolo Bonzini assert(bs_new->io_limits_enabled == false); 2192cc0681c4SBenoît Canet assert(!throttle_have_timer(&bs_new->throttle_state)); 21934ddc07caSPaolo Bonzini 21944ddc07caSPaolo Bonzini tmp = *bs_new; 21954ddc07caSPaolo Bonzini *bs_new = *bs_old; 21964ddc07caSPaolo Bonzini *bs_old = tmp; 21974ddc07caSPaolo Bonzini 21984ddc07caSPaolo Bonzini /* there are some fields that should not be swapped, move them back */ 21994ddc07caSPaolo Bonzini bdrv_move_feature_fields(&tmp, bs_old); 22004ddc07caSPaolo Bonzini bdrv_move_feature_fields(bs_old, bs_new); 22014ddc07caSPaolo Bonzini bdrv_move_feature_fields(bs_new, &tmp); 22024ddc07caSPaolo Bonzini 2203bfb197e0SMarkus Armbruster /* bs_new must remain unattached */ 22047e7d56d9SMarkus Armbruster assert(!bs_new->blk); 22054ddc07caSPaolo Bonzini 22064ddc07caSPaolo Bonzini /* Check a few fields that should remain attached to the device */ 22074ddc07caSPaolo Bonzini assert(bs_new->job == NULL); 22084ddc07caSPaolo Bonzini assert(bs_new->io_limits_enabled == false); 2209cc0681c4SBenoît Canet assert(!throttle_have_timer(&bs_new->throttle_state)); 22104ddc07caSPaolo Bonzini 221190ce8a06SBenoît Canet /* insert the nodes back into the graph node list if needed */ 221290ce8a06SBenoît Canet if (bs_new->node_name[0] != '\0') { 221390ce8a06SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list); 221490ce8a06SBenoît Canet } 221590ce8a06SBenoît Canet if (bs_old->node_name[0] != '\0') { 221690ce8a06SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); 221790ce8a06SBenoît Canet } 221890ce8a06SBenoît Canet 22194ddc07caSPaolo Bonzini bdrv_rebind(bs_new); 22204ddc07caSPaolo Bonzini bdrv_rebind(bs_old); 22214ddc07caSPaolo Bonzini } 22224ddc07caSPaolo Bonzini 22238802d1fdSJeff Cody /* 22248802d1fdSJeff Cody * Add new bs contents at the top of an image chain while the chain is 22258802d1fdSJeff Cody * live, while keeping required fields on the top layer. 22268802d1fdSJeff Cody * 22278802d1fdSJeff Cody * This will modify the BlockDriverState fields, and swap contents 22288802d1fdSJeff Cody * between bs_new and bs_top. Both bs_new and bs_top are modified. 22298802d1fdSJeff Cody * 2230bfb197e0SMarkus Armbruster * bs_new must not be attached to a BlockBackend. 2231f6801b83SJeff Cody * 22328802d1fdSJeff Cody * This function does not create any image files. 22338802d1fdSJeff Cody */ 22348802d1fdSJeff Cody void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) 22358802d1fdSJeff Cody { 22364ddc07caSPaolo Bonzini bdrv_swap(bs_new, bs_top); 22378802d1fdSJeff Cody 22388802d1fdSJeff Cody /* The contents of 'tmp' will become bs_top, as we are 22398802d1fdSJeff Cody * swapping bs_new and bs_top contents. */ 22408d24cce1SFam Zheng bdrv_set_backing_hd(bs_top, bs_new); 22418802d1fdSJeff Cody } 22428802d1fdSJeff Cody 22434f6fd349SFam Zheng static void bdrv_delete(BlockDriverState *bs) 2244b338082bSbellard { 22453e914655SPaolo Bonzini assert(!bs->job); 22463718d8abSFam Zheng assert(bdrv_op_blocker_is_empty(bs)); 22474f6fd349SFam Zheng assert(!bs->refcnt); 2248e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 224918846deeSMarkus Armbruster 2250e1b5c52eSStefan Hajnoczi bdrv_close(bs); 2251e1b5c52eSStefan Hajnoczi 22521b7bdbc1SStefan Hajnoczi /* remove from list, if necessary */ 2253d22b2f41SRyan Harper bdrv_make_anon(bs); 225434c6f050Saurel32 22557267c094SAnthony Liguori g_free(bs); 2256fc01f7e7Sbellard } 2257fc01f7e7Sbellard 2258e97fc193Saliguori /* 2259e97fc193Saliguori * Run consistency checks on an image 2260e97fc193Saliguori * 2261e076f338SKevin Wolf * Returns 0 if the check could be completed (it doesn't mean that the image is 2262a1c7273bSStefan Weil * free of errors) or -errno when an internal error occurred. The results of the 2263e076f338SKevin Wolf * check are stored in res. 2264e97fc193Saliguori */ 22654534ff54SKevin Wolf int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 2266e97fc193Saliguori { 2267908bcd54SMax Reitz if (bs->drv == NULL) { 2268908bcd54SMax Reitz return -ENOMEDIUM; 2269908bcd54SMax Reitz } 2270e97fc193Saliguori if (bs->drv->bdrv_check == NULL) { 2271e97fc193Saliguori return -ENOTSUP; 2272e97fc193Saliguori } 2273e97fc193Saliguori 2274e076f338SKevin Wolf memset(res, 0, sizeof(*res)); 22754534ff54SKevin Wolf return bs->drv->bdrv_check(bs, res, fix); 2276e97fc193Saliguori } 2277e97fc193Saliguori 22788a426614SKevin Wolf #define COMMIT_BUF_SECTORS 2048 22798a426614SKevin Wolf 228033e3963eSbellard /* commit COW file into the raw image */ 228133e3963eSbellard int bdrv_commit(BlockDriverState *bs) 228233e3963eSbellard { 228319cb3738Sbellard BlockDriver *drv = bs->drv; 228472706ea4SJeff Cody int64_t sector, total_sectors, length, backing_length; 22858a426614SKevin Wolf int n, ro, open_flags; 22860bce597dSJeff Cody int ret = 0; 228772706ea4SJeff Cody uint8_t *buf = NULL; 228833e3963eSbellard 228919cb3738Sbellard if (!drv) 229019cb3738Sbellard return -ENOMEDIUM; 229133e3963eSbellard 22924dca4b63SNaphtali Sprei if (!bs->backing_hd) { 22934dca4b63SNaphtali Sprei return -ENOTSUP; 22944dca4b63SNaphtali Sprei } 22954dca4b63SNaphtali Sprei 2296bb00021dSFam Zheng if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || 2297bb00021dSFam Zheng bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { 22982d3735d3SStefan Hajnoczi return -EBUSY; 22992d3735d3SStefan Hajnoczi } 23002d3735d3SStefan Hajnoczi 23014dca4b63SNaphtali Sprei ro = bs->backing_hd->read_only; 23024dca4b63SNaphtali Sprei open_flags = bs->backing_hd->open_flags; 23034dca4b63SNaphtali Sprei 23044dca4b63SNaphtali Sprei if (ro) { 23050bce597dSJeff Cody if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) { 23060bce597dSJeff Cody return -EACCES; 23074dca4b63SNaphtali Sprei } 2308ea2384d3Sbellard } 2309ea2384d3Sbellard 231072706ea4SJeff Cody length = bdrv_getlength(bs); 231172706ea4SJeff Cody if (length < 0) { 231272706ea4SJeff Cody ret = length; 231372706ea4SJeff Cody goto ro_cleanup; 231472706ea4SJeff Cody } 231572706ea4SJeff Cody 231672706ea4SJeff Cody backing_length = bdrv_getlength(bs->backing_hd); 231772706ea4SJeff Cody if (backing_length < 0) { 231872706ea4SJeff Cody ret = backing_length; 231972706ea4SJeff Cody goto ro_cleanup; 232072706ea4SJeff Cody } 232172706ea4SJeff Cody 232272706ea4SJeff Cody /* If our top snapshot is larger than the backing file image, 232372706ea4SJeff Cody * grow the backing file image if possible. If not possible, 232472706ea4SJeff Cody * we must return an error */ 232572706ea4SJeff Cody if (length > backing_length) { 232672706ea4SJeff Cody ret = bdrv_truncate(bs->backing_hd, length); 232772706ea4SJeff Cody if (ret < 0) { 232872706ea4SJeff Cody goto ro_cleanup; 232972706ea4SJeff Cody } 233072706ea4SJeff Cody } 233172706ea4SJeff Cody 233272706ea4SJeff Cody total_sectors = length >> BDRV_SECTOR_BITS; 2333857d4f46SKevin Wolf 2334857d4f46SKevin Wolf /* qemu_try_blockalign() for bs will choose an alignment that works for 2335857d4f46SKevin Wolf * bs->backing_hd as well, so no need to compare the alignment manually. */ 2336857d4f46SKevin Wolf buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 2337857d4f46SKevin Wolf if (buf == NULL) { 2338857d4f46SKevin Wolf ret = -ENOMEM; 2339857d4f46SKevin Wolf goto ro_cleanup; 2340857d4f46SKevin Wolf } 23418a426614SKevin Wolf 23428a426614SKevin Wolf for (sector = 0; sector < total_sectors; sector += n) { 2343d663640cSPaolo Bonzini ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); 2344d663640cSPaolo Bonzini if (ret < 0) { 2345d663640cSPaolo Bonzini goto ro_cleanup; 2346d663640cSPaolo Bonzini } 2347d663640cSPaolo Bonzini if (ret) { 2348dabfa6ccSKevin Wolf ret = bdrv_read(bs, sector, buf, n); 2349dabfa6ccSKevin Wolf if (ret < 0) { 23504dca4b63SNaphtali Sprei goto ro_cleanup; 235133e3963eSbellard } 235233e3963eSbellard 2353dabfa6ccSKevin Wolf ret = bdrv_write(bs->backing_hd, sector, buf, n); 2354dabfa6ccSKevin Wolf if (ret < 0) { 23554dca4b63SNaphtali Sprei goto ro_cleanup; 235633e3963eSbellard } 235733e3963eSbellard } 235833e3963eSbellard } 235995389c86Sbellard 23601d44952fSChristoph Hellwig if (drv->bdrv_make_empty) { 23611d44952fSChristoph Hellwig ret = drv->bdrv_make_empty(bs); 2362dabfa6ccSKevin Wolf if (ret < 0) { 2363dabfa6ccSKevin Wolf goto ro_cleanup; 2364dabfa6ccSKevin Wolf } 23651d44952fSChristoph Hellwig bdrv_flush(bs); 23661d44952fSChristoph Hellwig } 236795389c86Sbellard 23683f5075aeSChristoph Hellwig /* 23693f5075aeSChristoph Hellwig * Make sure all data we wrote to the backing device is actually 23703f5075aeSChristoph Hellwig * stable on disk. 23713f5075aeSChristoph Hellwig */ 2372dabfa6ccSKevin Wolf if (bs->backing_hd) { 23733f5075aeSChristoph Hellwig bdrv_flush(bs->backing_hd); 2374dabfa6ccSKevin Wolf } 23754dca4b63SNaphtali Sprei 2376dabfa6ccSKevin Wolf ret = 0; 23774dca4b63SNaphtali Sprei ro_cleanup: 2378857d4f46SKevin Wolf qemu_vfree(buf); 23794dca4b63SNaphtali Sprei 23804dca4b63SNaphtali Sprei if (ro) { 23810bce597dSJeff Cody /* ignoring error return here */ 23820bce597dSJeff Cody bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL); 23834dca4b63SNaphtali Sprei } 23844dca4b63SNaphtali Sprei 23851d44952fSChristoph Hellwig return ret; 238633e3963eSbellard } 238733e3963eSbellard 2388e8877497SStefan Hajnoczi int bdrv_commit_all(void) 23896ab4b5abSMarkus Armbruster { 23906ab4b5abSMarkus Armbruster BlockDriverState *bs; 23916ab4b5abSMarkus Armbruster 2392dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 2393ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 2394ed78cda3SStefan Hajnoczi 2395ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 2396272d2d8eSJeff Cody if (bs->drv && bs->backing_hd) { 2397e8877497SStefan Hajnoczi int ret = bdrv_commit(bs); 2398e8877497SStefan Hajnoczi if (ret < 0) { 2399ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 2400e8877497SStefan Hajnoczi return ret; 24016ab4b5abSMarkus Armbruster } 24026ab4b5abSMarkus Armbruster } 2403ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 2404272d2d8eSJeff Cody } 2405e8877497SStefan Hajnoczi return 0; 2406e8877497SStefan Hajnoczi } 24076ab4b5abSMarkus Armbruster 2408dbffbdcfSStefan Hajnoczi /** 2409dbffbdcfSStefan Hajnoczi * Remove an active request from the tracked requests list 2410dbffbdcfSStefan Hajnoczi * 2411dbffbdcfSStefan Hajnoczi * This function should be called when a tracked request is completing. 2412dbffbdcfSStefan Hajnoczi */ 2413dbffbdcfSStefan Hajnoczi static void tracked_request_end(BdrvTrackedRequest *req) 2414dbffbdcfSStefan Hajnoczi { 24152dbafdc0SKevin Wolf if (req->serialising) { 24162dbafdc0SKevin Wolf req->bs->serialising_in_flight--; 24172dbafdc0SKevin Wolf } 24182dbafdc0SKevin Wolf 2419dbffbdcfSStefan Hajnoczi QLIST_REMOVE(req, list); 2420f4658285SStefan Hajnoczi qemu_co_queue_restart_all(&req->wait_queue); 2421dbffbdcfSStefan Hajnoczi } 2422dbffbdcfSStefan Hajnoczi 2423dbffbdcfSStefan Hajnoczi /** 2424dbffbdcfSStefan Hajnoczi * Add an active request to the tracked requests list 2425dbffbdcfSStefan Hajnoczi */ 2426dbffbdcfSStefan Hajnoczi static void tracked_request_begin(BdrvTrackedRequest *req, 2427dbffbdcfSStefan Hajnoczi BlockDriverState *bs, 2428793ed47aSKevin Wolf int64_t offset, 2429793ed47aSKevin Wolf unsigned int bytes, bool is_write) 2430dbffbdcfSStefan Hajnoczi { 2431dbffbdcfSStefan Hajnoczi *req = (BdrvTrackedRequest){ 2432dbffbdcfSStefan Hajnoczi .bs = bs, 2433793ed47aSKevin Wolf .offset = offset, 2434793ed47aSKevin Wolf .bytes = bytes, 2435dbffbdcfSStefan Hajnoczi .is_write = is_write, 24365f8b6491SStefan Hajnoczi .co = qemu_coroutine_self(), 24372dbafdc0SKevin Wolf .serialising = false, 24387327145fSKevin Wolf .overlap_offset = offset, 24397327145fSKevin Wolf .overlap_bytes = bytes, 2440dbffbdcfSStefan Hajnoczi }; 2441dbffbdcfSStefan Hajnoczi 2442f4658285SStefan Hajnoczi qemu_co_queue_init(&req->wait_queue); 2443f4658285SStefan Hajnoczi 2444dbffbdcfSStefan Hajnoczi QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); 2445dbffbdcfSStefan Hajnoczi } 2446dbffbdcfSStefan Hajnoczi 2447e96126ffSKevin Wolf static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) 24482dbafdc0SKevin Wolf { 24497327145fSKevin Wolf int64_t overlap_offset = req->offset & ~(align - 1); 2450e96126ffSKevin Wolf unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align) 24517327145fSKevin Wolf - overlap_offset; 24527327145fSKevin Wolf 24532dbafdc0SKevin Wolf if (!req->serialising) { 24542dbafdc0SKevin Wolf req->bs->serialising_in_flight++; 24552dbafdc0SKevin Wolf req->serialising = true; 24562dbafdc0SKevin Wolf } 24577327145fSKevin Wolf 24587327145fSKevin Wolf req->overlap_offset = MIN(req->overlap_offset, overlap_offset); 24597327145fSKevin Wolf req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes); 24602dbafdc0SKevin Wolf } 24612dbafdc0SKevin Wolf 2462d83947acSStefan Hajnoczi /** 2463d83947acSStefan Hajnoczi * Round a region to cluster boundaries 2464d83947acSStefan Hajnoczi */ 2465343bded4SPaolo Bonzini void bdrv_round_to_clusters(BlockDriverState *bs, 2466d83947acSStefan Hajnoczi int64_t sector_num, int nb_sectors, 2467d83947acSStefan Hajnoczi int64_t *cluster_sector_num, 2468d83947acSStefan Hajnoczi int *cluster_nb_sectors) 2469d83947acSStefan Hajnoczi { 2470d83947acSStefan Hajnoczi BlockDriverInfo bdi; 2471d83947acSStefan Hajnoczi 2472d83947acSStefan Hajnoczi if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { 2473d83947acSStefan Hajnoczi *cluster_sector_num = sector_num; 2474d83947acSStefan Hajnoczi *cluster_nb_sectors = nb_sectors; 2475d83947acSStefan Hajnoczi } else { 2476d83947acSStefan Hajnoczi int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE; 2477d83947acSStefan Hajnoczi *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c); 2478d83947acSStefan Hajnoczi *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num + 2479d83947acSStefan Hajnoczi nb_sectors, c); 2480d83947acSStefan Hajnoczi } 2481d83947acSStefan Hajnoczi } 2482d83947acSStefan Hajnoczi 24837327145fSKevin Wolf static int bdrv_get_cluster_size(BlockDriverState *bs) 2484793ed47aSKevin Wolf { 2485793ed47aSKevin Wolf BlockDriverInfo bdi; 24867327145fSKevin Wolf int ret; 2487793ed47aSKevin Wolf 24887327145fSKevin Wolf ret = bdrv_get_info(bs, &bdi); 24897327145fSKevin Wolf if (ret < 0 || bdi.cluster_size == 0) { 24907327145fSKevin Wolf return bs->request_alignment; 2491793ed47aSKevin Wolf } else { 24927327145fSKevin Wolf return bdi.cluster_size; 2493793ed47aSKevin Wolf } 2494793ed47aSKevin Wolf } 2495793ed47aSKevin Wolf 2496f4658285SStefan Hajnoczi static bool tracked_request_overlaps(BdrvTrackedRequest *req, 2497793ed47aSKevin Wolf int64_t offset, unsigned int bytes) 2498793ed47aSKevin Wolf { 2499d83947acSStefan Hajnoczi /* aaaa bbbb */ 25007327145fSKevin Wolf if (offset >= req->overlap_offset + req->overlap_bytes) { 2501d83947acSStefan Hajnoczi return false; 2502d83947acSStefan Hajnoczi } 2503d83947acSStefan Hajnoczi /* bbbb aaaa */ 25047327145fSKevin Wolf if (req->overlap_offset >= offset + bytes) { 2505d83947acSStefan Hajnoczi return false; 2506d83947acSStefan Hajnoczi } 2507d83947acSStefan Hajnoczi return true; 2508f4658285SStefan Hajnoczi } 2509f4658285SStefan Hajnoczi 251028de2dcdSKevin Wolf static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) 2511f4658285SStefan Hajnoczi { 25122dbafdc0SKevin Wolf BlockDriverState *bs = self->bs; 2513f4658285SStefan Hajnoczi BdrvTrackedRequest *req; 2514f4658285SStefan Hajnoczi bool retry; 251528de2dcdSKevin Wolf bool waited = false; 2516f4658285SStefan Hajnoczi 25172dbafdc0SKevin Wolf if (!bs->serialising_in_flight) { 251828de2dcdSKevin Wolf return false; 25192dbafdc0SKevin Wolf } 25202dbafdc0SKevin Wolf 2521f4658285SStefan Hajnoczi do { 2522f4658285SStefan Hajnoczi retry = false; 2523f4658285SStefan Hajnoczi QLIST_FOREACH(req, &bs->tracked_requests, list) { 25242dbafdc0SKevin Wolf if (req == self || (!req->serialising && !self->serialising)) { 252565afd211SKevin Wolf continue; 252665afd211SKevin Wolf } 25277327145fSKevin Wolf if (tracked_request_overlaps(req, self->overlap_offset, 25287327145fSKevin Wolf self->overlap_bytes)) 25297327145fSKevin Wolf { 25305f8b6491SStefan Hajnoczi /* Hitting this means there was a reentrant request, for 25315f8b6491SStefan Hajnoczi * example, a block driver issuing nested requests. This must 25325f8b6491SStefan Hajnoczi * never happen since it means deadlock. 25335f8b6491SStefan Hajnoczi */ 25345f8b6491SStefan Hajnoczi assert(qemu_coroutine_self() != req->co); 25355f8b6491SStefan Hajnoczi 25366460440fSKevin Wolf /* If the request is already (indirectly) waiting for us, or 25376460440fSKevin Wolf * will wait for us as soon as it wakes up, then just go on 25386460440fSKevin Wolf * (instead of producing a deadlock in the former case). */ 25396460440fSKevin Wolf if (!req->waiting_for) { 25406460440fSKevin Wolf self->waiting_for = req; 2541f4658285SStefan Hajnoczi qemu_co_queue_wait(&req->wait_queue); 25426460440fSKevin Wolf self->waiting_for = NULL; 2543f4658285SStefan Hajnoczi retry = true; 254428de2dcdSKevin Wolf waited = true; 2545f4658285SStefan Hajnoczi break; 2546f4658285SStefan Hajnoczi } 2547f4658285SStefan Hajnoczi } 25486460440fSKevin Wolf } 2549f4658285SStefan Hajnoczi } while (retry); 255028de2dcdSKevin Wolf 255128de2dcdSKevin Wolf return waited; 2552f4658285SStefan Hajnoczi } 2553f4658285SStefan Hajnoczi 2554756e6736SKevin Wolf /* 2555756e6736SKevin Wolf * Return values: 2556756e6736SKevin Wolf * 0 - success 2557756e6736SKevin Wolf * -EINVAL - backing format specified, but no file 2558756e6736SKevin Wolf * -ENOSPC - can't update the backing file because no space is left in the 2559756e6736SKevin Wolf * image file header 2560756e6736SKevin Wolf * -ENOTSUP - format driver doesn't support changing the backing file 2561756e6736SKevin Wolf */ 2562756e6736SKevin Wolf int bdrv_change_backing_file(BlockDriverState *bs, 2563756e6736SKevin Wolf const char *backing_file, const char *backing_fmt) 2564756e6736SKevin Wolf { 2565756e6736SKevin Wolf BlockDriver *drv = bs->drv; 2566469ef350SPaolo Bonzini int ret; 2567756e6736SKevin Wolf 25685f377794SPaolo Bonzini /* Backing file format doesn't make sense without a backing file */ 25695f377794SPaolo Bonzini if (backing_fmt && !backing_file) { 25705f377794SPaolo Bonzini return -EINVAL; 25715f377794SPaolo Bonzini } 25725f377794SPaolo Bonzini 2573756e6736SKevin Wolf if (drv->bdrv_change_backing_file != NULL) { 2574469ef350SPaolo Bonzini ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 2575756e6736SKevin Wolf } else { 2576469ef350SPaolo Bonzini ret = -ENOTSUP; 2577756e6736SKevin Wolf } 2578469ef350SPaolo Bonzini 2579469ef350SPaolo Bonzini if (ret == 0) { 2580469ef350SPaolo Bonzini pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2581469ef350SPaolo Bonzini pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2582469ef350SPaolo Bonzini } 2583469ef350SPaolo Bonzini return ret; 2584756e6736SKevin Wolf } 2585756e6736SKevin Wolf 25866ebdcee2SJeff Cody /* 25876ebdcee2SJeff Cody * Finds the image layer in the chain that has 'bs' as its backing file. 25886ebdcee2SJeff Cody * 25896ebdcee2SJeff Cody * active is the current topmost image. 25906ebdcee2SJeff Cody * 25916ebdcee2SJeff Cody * Returns NULL if bs is not found in active's image chain, 25926ebdcee2SJeff Cody * or if active == bs. 25934caf0fcdSJeff Cody * 25944caf0fcdSJeff Cody * Returns the bottommost base image if bs == NULL. 25956ebdcee2SJeff Cody */ 25966ebdcee2SJeff Cody BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 25976ebdcee2SJeff Cody BlockDriverState *bs) 25986ebdcee2SJeff Cody { 25994caf0fcdSJeff Cody while (active && bs != active->backing_hd) { 26004caf0fcdSJeff Cody active = active->backing_hd; 26016ebdcee2SJeff Cody } 26026ebdcee2SJeff Cody 26034caf0fcdSJeff Cody return active; 26046ebdcee2SJeff Cody } 26056ebdcee2SJeff Cody 26064caf0fcdSJeff Cody /* Given a BDS, searches for the base layer. */ 26074caf0fcdSJeff Cody BlockDriverState *bdrv_find_base(BlockDriverState *bs) 26084caf0fcdSJeff Cody { 26094caf0fcdSJeff Cody return bdrv_find_overlay(bs, NULL); 26106ebdcee2SJeff Cody } 26116ebdcee2SJeff Cody 26126ebdcee2SJeff Cody typedef struct BlkIntermediateStates { 26136ebdcee2SJeff Cody BlockDriverState *bs; 26146ebdcee2SJeff Cody QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; 26156ebdcee2SJeff Cody } BlkIntermediateStates; 26166ebdcee2SJeff Cody 26176ebdcee2SJeff Cody 26186ebdcee2SJeff Cody /* 26196ebdcee2SJeff Cody * Drops images above 'base' up to and including 'top', and sets the image 26206ebdcee2SJeff Cody * above 'top' to have base as its backing file. 26216ebdcee2SJeff Cody * 26226ebdcee2SJeff Cody * Requires that the overlay to 'top' is opened r/w, so that the backing file 26236ebdcee2SJeff Cody * information in 'bs' can be properly updated. 26246ebdcee2SJeff Cody * 26256ebdcee2SJeff Cody * E.g., this will convert the following chain: 26266ebdcee2SJeff Cody * bottom <- base <- intermediate <- top <- active 26276ebdcee2SJeff Cody * 26286ebdcee2SJeff Cody * to 26296ebdcee2SJeff Cody * 26306ebdcee2SJeff Cody * bottom <- base <- active 26316ebdcee2SJeff Cody * 26326ebdcee2SJeff Cody * It is allowed for bottom==base, in which case it converts: 26336ebdcee2SJeff Cody * 26346ebdcee2SJeff Cody * base <- intermediate <- top <- active 26356ebdcee2SJeff Cody * 26366ebdcee2SJeff Cody * to 26376ebdcee2SJeff Cody * 26386ebdcee2SJeff Cody * base <- active 26396ebdcee2SJeff Cody * 264054e26900SJeff Cody * If backing_file_str is non-NULL, it will be used when modifying top's 264154e26900SJeff Cody * overlay image metadata. 264254e26900SJeff Cody * 26436ebdcee2SJeff Cody * Error conditions: 26446ebdcee2SJeff Cody * if active == top, that is considered an error 26456ebdcee2SJeff Cody * 26466ebdcee2SJeff Cody */ 26476ebdcee2SJeff Cody int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, 264854e26900SJeff Cody BlockDriverState *base, const char *backing_file_str) 26496ebdcee2SJeff Cody { 26506ebdcee2SJeff Cody BlockDriverState *intermediate; 26516ebdcee2SJeff Cody BlockDriverState *base_bs = NULL; 26526ebdcee2SJeff Cody BlockDriverState *new_top_bs = NULL; 26536ebdcee2SJeff Cody BlkIntermediateStates *intermediate_state, *next; 26546ebdcee2SJeff Cody int ret = -EIO; 26556ebdcee2SJeff Cody 26566ebdcee2SJeff Cody QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; 26576ebdcee2SJeff Cody QSIMPLEQ_INIT(&states_to_delete); 26586ebdcee2SJeff Cody 26596ebdcee2SJeff Cody if (!top->drv || !base->drv) { 26606ebdcee2SJeff Cody goto exit; 26616ebdcee2SJeff Cody } 26626ebdcee2SJeff Cody 26636ebdcee2SJeff Cody new_top_bs = bdrv_find_overlay(active, top); 26646ebdcee2SJeff Cody 26656ebdcee2SJeff Cody if (new_top_bs == NULL) { 26666ebdcee2SJeff Cody /* we could not find the image above 'top', this is an error */ 26676ebdcee2SJeff Cody goto exit; 26686ebdcee2SJeff Cody } 26696ebdcee2SJeff Cody 26706ebdcee2SJeff Cody /* special case of new_top_bs->backing_hd already pointing to base - nothing 26716ebdcee2SJeff Cody * to do, no intermediate images */ 26726ebdcee2SJeff Cody if (new_top_bs->backing_hd == base) { 26736ebdcee2SJeff Cody ret = 0; 26746ebdcee2SJeff Cody goto exit; 26756ebdcee2SJeff Cody } 26766ebdcee2SJeff Cody 26776ebdcee2SJeff Cody intermediate = top; 26786ebdcee2SJeff Cody 26796ebdcee2SJeff Cody /* now we will go down through the list, and add each BDS we find 26806ebdcee2SJeff Cody * into our deletion queue, until we hit the 'base' 26816ebdcee2SJeff Cody */ 26826ebdcee2SJeff Cody while (intermediate) { 26835839e53bSMarkus Armbruster intermediate_state = g_new0(BlkIntermediateStates, 1); 26846ebdcee2SJeff Cody intermediate_state->bs = intermediate; 26856ebdcee2SJeff Cody QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); 26866ebdcee2SJeff Cody 26876ebdcee2SJeff Cody if (intermediate->backing_hd == base) { 26886ebdcee2SJeff Cody base_bs = intermediate->backing_hd; 26896ebdcee2SJeff Cody break; 26906ebdcee2SJeff Cody } 26916ebdcee2SJeff Cody intermediate = intermediate->backing_hd; 26926ebdcee2SJeff Cody } 26936ebdcee2SJeff Cody if (base_bs == NULL) { 26946ebdcee2SJeff Cody /* something went wrong, we did not end at the base. safely 26956ebdcee2SJeff Cody * unravel everything, and exit with error */ 26966ebdcee2SJeff Cody goto exit; 26976ebdcee2SJeff Cody } 26986ebdcee2SJeff Cody 26996ebdcee2SJeff Cody /* success - we can delete the intermediate states, and link top->base */ 270054e26900SJeff Cody backing_file_str = backing_file_str ? backing_file_str : base_bs->filename; 270154e26900SJeff Cody ret = bdrv_change_backing_file(new_top_bs, backing_file_str, 27026ebdcee2SJeff Cody base_bs->drv ? base_bs->drv->format_name : ""); 27036ebdcee2SJeff Cody if (ret) { 27046ebdcee2SJeff Cody goto exit; 27056ebdcee2SJeff Cody } 2706920beae1SFam Zheng bdrv_set_backing_hd(new_top_bs, base_bs); 27076ebdcee2SJeff Cody 27086ebdcee2SJeff Cody QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 27096ebdcee2SJeff Cody /* so that bdrv_close() does not recursively close the chain */ 2710920beae1SFam Zheng bdrv_set_backing_hd(intermediate_state->bs, NULL); 27114f6fd349SFam Zheng bdrv_unref(intermediate_state->bs); 27126ebdcee2SJeff Cody } 27136ebdcee2SJeff Cody ret = 0; 27146ebdcee2SJeff Cody 27156ebdcee2SJeff Cody exit: 27166ebdcee2SJeff Cody QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 27176ebdcee2SJeff Cody g_free(intermediate_state); 27186ebdcee2SJeff Cody } 27196ebdcee2SJeff Cody return ret; 27206ebdcee2SJeff Cody } 27216ebdcee2SJeff Cody 27226ebdcee2SJeff Cody 272371d0770cSaliguori static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, 272471d0770cSaliguori size_t size) 272571d0770cSaliguori { 272675af1f34SPeter Lieven if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) { 27271dd3a447SKevin Wolf return -EIO; 27281dd3a447SKevin Wolf } 27291dd3a447SKevin Wolf 2730c0191e76SMax Reitz if (!bdrv_is_inserted(bs)) { 273171d0770cSaliguori return -ENOMEDIUM; 2732c0191e76SMax Reitz } 273371d0770cSaliguori 2734c0191e76SMax Reitz if (offset < 0) { 2735fbb7b4e0SKevin Wolf return -EIO; 2736c0191e76SMax Reitz } 273771d0770cSaliguori 273871d0770cSaliguori return 0; 273971d0770cSaliguori } 274071d0770cSaliguori 274171d0770cSaliguori static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, 274271d0770cSaliguori int nb_sectors) 274371d0770cSaliguori { 274475af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 27458f4754edSKevin Wolf return -EIO; 27468f4754edSKevin Wolf } 27478f4754edSKevin Wolf 2748eb5a3165SJes Sorensen return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE, 2749eb5a3165SJes Sorensen nb_sectors * BDRV_SECTOR_SIZE); 275071d0770cSaliguori } 275171d0770cSaliguori 27521c9805a3SStefan Hajnoczi typedef struct RwCo { 27531c9805a3SStefan Hajnoczi BlockDriverState *bs; 2754775aa8b6SKevin Wolf int64_t offset; 27551c9805a3SStefan Hajnoczi QEMUIOVector *qiov; 27561c9805a3SStefan Hajnoczi bool is_write; 27571c9805a3SStefan Hajnoczi int ret; 27584105eaaaSPeter Lieven BdrvRequestFlags flags; 27591c9805a3SStefan Hajnoczi } RwCo; 27601c9805a3SStefan Hajnoczi 27611c9805a3SStefan Hajnoczi static void coroutine_fn bdrv_rw_co_entry(void *opaque) 2762fc01f7e7Sbellard { 27631c9805a3SStefan Hajnoczi RwCo *rwco = opaque; 2764fc01f7e7Sbellard 27651c9805a3SStefan Hajnoczi if (!rwco->is_write) { 2766775aa8b6SKevin Wolf rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset, 2767775aa8b6SKevin Wolf rwco->qiov->size, rwco->qiov, 27684105eaaaSPeter Lieven rwco->flags); 27691c9805a3SStefan Hajnoczi } else { 2770775aa8b6SKevin Wolf rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset, 2771775aa8b6SKevin Wolf rwco->qiov->size, rwco->qiov, 27724105eaaaSPeter Lieven rwco->flags); 27731c9805a3SStefan Hajnoczi } 27741c9805a3SStefan Hajnoczi } 2775e7a8a783SKevin Wolf 27761c9805a3SStefan Hajnoczi /* 27778d3b1a2dSKevin Wolf * Process a vectored synchronous request using coroutines 27781c9805a3SStefan Hajnoczi */ 2779775aa8b6SKevin Wolf static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, 27804105eaaaSPeter Lieven QEMUIOVector *qiov, bool is_write, 27814105eaaaSPeter Lieven BdrvRequestFlags flags) 27821c9805a3SStefan Hajnoczi { 27831c9805a3SStefan Hajnoczi Coroutine *co; 27841c9805a3SStefan Hajnoczi RwCo rwco = { 27851c9805a3SStefan Hajnoczi .bs = bs, 2786775aa8b6SKevin Wolf .offset = offset, 27878d3b1a2dSKevin Wolf .qiov = qiov, 27881c9805a3SStefan Hajnoczi .is_write = is_write, 27891c9805a3SStefan Hajnoczi .ret = NOT_DONE, 27904105eaaaSPeter Lieven .flags = flags, 27911c9805a3SStefan Hajnoczi }; 27921c9805a3SStefan Hajnoczi 2793498e386cSZhi Yong Wu /** 2794498e386cSZhi Yong Wu * In sync call context, when the vcpu is blocked, this throttling timer 2795498e386cSZhi Yong Wu * will not fire; so the I/O throttling function has to be disabled here 2796498e386cSZhi Yong Wu * if it has been enabled. 2797498e386cSZhi Yong Wu */ 2798498e386cSZhi Yong Wu if (bs->io_limits_enabled) { 2799498e386cSZhi Yong Wu fprintf(stderr, "Disabling I/O throttling on '%s' due " 2800498e386cSZhi Yong Wu "to synchronous I/O.\n", bdrv_get_device_name(bs)); 2801498e386cSZhi Yong Wu bdrv_io_limits_disable(bs); 2802498e386cSZhi Yong Wu } 2803498e386cSZhi Yong Wu 28041c9805a3SStefan Hajnoczi if (qemu_in_coroutine()) { 28051c9805a3SStefan Hajnoczi /* Fast-path if already in coroutine context */ 28061c9805a3SStefan Hajnoczi bdrv_rw_co_entry(&rwco); 28071c9805a3SStefan Hajnoczi } else { 28082572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 28092572b37aSStefan Hajnoczi 28101c9805a3SStefan Hajnoczi co = qemu_coroutine_create(bdrv_rw_co_entry); 28111c9805a3SStefan Hajnoczi qemu_coroutine_enter(co, &rwco); 28121c9805a3SStefan Hajnoczi while (rwco.ret == NOT_DONE) { 28132572b37aSStefan Hajnoczi aio_poll(aio_context, true); 28141c9805a3SStefan Hajnoczi } 28151c9805a3SStefan Hajnoczi } 28161c9805a3SStefan Hajnoczi return rwco.ret; 2817e7a8a783SKevin Wolf } 2818e7a8a783SKevin Wolf 28198d3b1a2dSKevin Wolf /* 28208d3b1a2dSKevin Wolf * Process a synchronous request using coroutines 28218d3b1a2dSKevin Wolf */ 28228d3b1a2dSKevin Wolf static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, 28234105eaaaSPeter Lieven int nb_sectors, bool is_write, BdrvRequestFlags flags) 28248d3b1a2dSKevin Wolf { 28258d3b1a2dSKevin Wolf QEMUIOVector qiov; 28268d3b1a2dSKevin Wolf struct iovec iov = { 28278d3b1a2dSKevin Wolf .iov_base = (void *)buf, 28288d3b1a2dSKevin Wolf .iov_len = nb_sectors * BDRV_SECTOR_SIZE, 28298d3b1a2dSKevin Wolf }; 28308d3b1a2dSKevin Wolf 283175af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 2832da15ee51SKevin Wolf return -EINVAL; 2833da15ee51SKevin Wolf } 2834da15ee51SKevin Wolf 28358d3b1a2dSKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 2836775aa8b6SKevin Wolf return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS, 2837775aa8b6SKevin Wolf &qiov, is_write, flags); 28388d3b1a2dSKevin Wolf } 28398d3b1a2dSKevin Wolf 28401c9805a3SStefan Hajnoczi /* return < 0 if error. See bdrv_write() for the return codes */ 28411c9805a3SStefan Hajnoczi int bdrv_read(BlockDriverState *bs, int64_t sector_num, 28421c9805a3SStefan Hajnoczi uint8_t *buf, int nb_sectors) 28431c9805a3SStefan Hajnoczi { 28444105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0); 284583f64091Sbellard } 2846fc01f7e7Sbellard 284707d27a44SMarkus Armbruster /* Just like bdrv_read(), but with I/O throttling temporarily disabled */ 284807d27a44SMarkus Armbruster int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, 284907d27a44SMarkus Armbruster uint8_t *buf, int nb_sectors) 285007d27a44SMarkus Armbruster { 285107d27a44SMarkus Armbruster bool enabled; 285207d27a44SMarkus Armbruster int ret; 285307d27a44SMarkus Armbruster 285407d27a44SMarkus Armbruster enabled = bs->io_limits_enabled; 285507d27a44SMarkus Armbruster bs->io_limits_enabled = false; 28564e7395e8SPeter Lieven ret = bdrv_read(bs, sector_num, buf, nb_sectors); 285707d27a44SMarkus Armbruster bs->io_limits_enabled = enabled; 285807d27a44SMarkus Armbruster return ret; 285907d27a44SMarkus Armbruster } 286007d27a44SMarkus Armbruster 286119cb3738Sbellard /* Return < 0 if error. Important errors are: 286219cb3738Sbellard -EIO generic I/O error (may happen for all errors) 286319cb3738Sbellard -ENOMEDIUM No media inserted. 286419cb3738Sbellard -EINVAL Invalid sector number or nb_sectors 286519cb3738Sbellard -EACCES Trying to write a read-only device 286619cb3738Sbellard */ 2867fc01f7e7Sbellard int bdrv_write(BlockDriverState *bs, int64_t sector_num, 2868fc01f7e7Sbellard const uint8_t *buf, int nb_sectors) 2869fc01f7e7Sbellard { 28704105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0); 287183f64091Sbellard } 287283f64091Sbellard 2873aa7bfbffSPeter Lieven int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, 2874aa7bfbffSPeter Lieven int nb_sectors, BdrvRequestFlags flags) 28754105eaaaSPeter Lieven { 28764105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true, 2877aa7bfbffSPeter Lieven BDRV_REQ_ZERO_WRITE | flags); 28788d3b1a2dSKevin Wolf } 28798d3b1a2dSKevin Wolf 2880d75cbb5eSPeter Lieven /* 2881d75cbb5eSPeter Lieven * Completely zero out a block device with the help of bdrv_write_zeroes. 2882d75cbb5eSPeter Lieven * The operation is sped up by checking the block status and only writing 2883d75cbb5eSPeter Lieven * zeroes to the device if they currently do not return zeroes. Optional 2884d75cbb5eSPeter Lieven * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP). 2885d75cbb5eSPeter Lieven * 2886d75cbb5eSPeter Lieven * Returns < 0 on error, 0 on success. For error codes see bdrv_write(). 2887d75cbb5eSPeter Lieven */ 2888d75cbb5eSPeter Lieven int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) 2889d75cbb5eSPeter Lieven { 2890d32f7c10SMarkus Armbruster int64_t target_sectors, ret, nb_sectors, sector_num = 0; 2891d75cbb5eSPeter Lieven int n; 2892d75cbb5eSPeter Lieven 2893d32f7c10SMarkus Armbruster target_sectors = bdrv_nb_sectors(bs); 2894d32f7c10SMarkus Armbruster if (target_sectors < 0) { 2895d32f7c10SMarkus Armbruster return target_sectors; 28969ce10c0bSKevin Wolf } 28979ce10c0bSKevin Wolf 2898d75cbb5eSPeter Lieven for (;;) { 289975af1f34SPeter Lieven nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); 2900d75cbb5eSPeter Lieven if (nb_sectors <= 0) { 2901d75cbb5eSPeter Lieven return 0; 2902d75cbb5eSPeter Lieven } 2903d75cbb5eSPeter Lieven ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n); 29043d94ce60SPeter Lieven if (ret < 0) { 29053d94ce60SPeter Lieven error_report("error getting block status at sector %" PRId64 ": %s", 29063d94ce60SPeter Lieven sector_num, strerror(-ret)); 29073d94ce60SPeter Lieven return ret; 29083d94ce60SPeter Lieven } 2909d75cbb5eSPeter Lieven if (ret & BDRV_BLOCK_ZERO) { 2910d75cbb5eSPeter Lieven sector_num += n; 2911d75cbb5eSPeter Lieven continue; 2912d75cbb5eSPeter Lieven } 2913d75cbb5eSPeter Lieven ret = bdrv_write_zeroes(bs, sector_num, n, flags); 2914d75cbb5eSPeter Lieven if (ret < 0) { 2915d75cbb5eSPeter Lieven error_report("error writing zeroes at sector %" PRId64 ": %s", 2916d75cbb5eSPeter Lieven sector_num, strerror(-ret)); 2917d75cbb5eSPeter Lieven return ret; 2918d75cbb5eSPeter Lieven } 2919d75cbb5eSPeter Lieven sector_num += n; 2920d75cbb5eSPeter Lieven } 2921d75cbb5eSPeter Lieven } 2922d75cbb5eSPeter Lieven 2923a3ef6571SKevin Wolf int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) 292483f64091Sbellard { 2925a3ef6571SKevin Wolf QEMUIOVector qiov; 2926a3ef6571SKevin Wolf struct iovec iov = { 2927a3ef6571SKevin Wolf .iov_base = (void *)buf, 2928a3ef6571SKevin Wolf .iov_len = bytes, 2929a3ef6571SKevin Wolf }; 29309a8c4cceSKevin Wolf int ret; 293183f64091Sbellard 2932a3ef6571SKevin Wolf if (bytes < 0) { 2933a3ef6571SKevin Wolf return -EINVAL; 293483f64091Sbellard } 293583f64091Sbellard 2936a3ef6571SKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 2937a3ef6571SKevin Wolf ret = bdrv_prwv_co(bs, offset, &qiov, false, 0); 2938a3ef6571SKevin Wolf if (ret < 0) { 29399a8c4cceSKevin Wolf return ret; 294083f64091Sbellard } 294183f64091Sbellard 2942a3ef6571SKevin Wolf return bytes; 294383f64091Sbellard } 294483f64091Sbellard 29458d3b1a2dSKevin Wolf int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) 294683f64091Sbellard { 29479a8c4cceSKevin Wolf int ret; 294883f64091Sbellard 29498407d5d7SKevin Wolf ret = bdrv_prwv_co(bs, offset, qiov, true, 0); 29508d3b1a2dSKevin Wolf if (ret < 0) { 29519a8c4cceSKevin Wolf return ret; 29528d3b1a2dSKevin Wolf } 29538d3b1a2dSKevin Wolf 29548d3b1a2dSKevin Wolf return qiov->size; 29558d3b1a2dSKevin Wolf } 29568d3b1a2dSKevin Wolf 29578d3b1a2dSKevin Wolf int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 29588407d5d7SKevin Wolf const void *buf, int bytes) 29598d3b1a2dSKevin Wolf { 29608d3b1a2dSKevin Wolf QEMUIOVector qiov; 29618d3b1a2dSKevin Wolf struct iovec iov = { 29628d3b1a2dSKevin Wolf .iov_base = (void *) buf, 29638407d5d7SKevin Wolf .iov_len = bytes, 29648d3b1a2dSKevin Wolf }; 29658d3b1a2dSKevin Wolf 29668407d5d7SKevin Wolf if (bytes < 0) { 29678407d5d7SKevin Wolf return -EINVAL; 29688407d5d7SKevin Wolf } 29698407d5d7SKevin Wolf 29708d3b1a2dSKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 29718d3b1a2dSKevin Wolf return bdrv_pwritev(bs, offset, &qiov); 297283f64091Sbellard } 297383f64091Sbellard 2974f08145feSKevin Wolf /* 2975f08145feSKevin Wolf * Writes to the file and ensures that no writes are reordered across this 2976f08145feSKevin Wolf * request (acts as a barrier) 2977f08145feSKevin Wolf * 2978f08145feSKevin Wolf * Returns 0 on success, -errno in error cases. 2979f08145feSKevin Wolf */ 2980f08145feSKevin Wolf int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, 2981f08145feSKevin Wolf const void *buf, int count) 2982f08145feSKevin Wolf { 2983f08145feSKevin Wolf int ret; 2984f08145feSKevin Wolf 2985f08145feSKevin Wolf ret = bdrv_pwrite(bs, offset, buf, count); 2986f08145feSKevin Wolf if (ret < 0) { 2987f08145feSKevin Wolf return ret; 2988f08145feSKevin Wolf } 2989f08145feSKevin Wolf 2990f05fa4adSPaolo Bonzini /* No flush needed for cache modes that already do it */ 2991f05fa4adSPaolo Bonzini if (bs->enable_write_cache) { 2992f08145feSKevin Wolf bdrv_flush(bs); 2993f08145feSKevin Wolf } 2994f08145feSKevin Wolf 2995f08145feSKevin Wolf return 0; 2996f08145feSKevin Wolf } 2997f08145feSKevin Wolf 2998470c0504SStefan Hajnoczi static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, 2999ab185921SStefan Hajnoczi int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 3000ab185921SStefan Hajnoczi { 3001ab185921SStefan Hajnoczi /* Perform I/O through a temporary buffer so that users who scribble over 3002ab185921SStefan Hajnoczi * their read buffer while the operation is in progress do not end up 3003ab185921SStefan Hajnoczi * modifying the image file. This is critical for zero-copy guest I/O 3004ab185921SStefan Hajnoczi * where anything might happen inside guest memory. 3005ab185921SStefan Hajnoczi */ 3006ab185921SStefan Hajnoczi void *bounce_buffer; 3007ab185921SStefan Hajnoczi 300879c053bdSStefan Hajnoczi BlockDriver *drv = bs->drv; 3009ab185921SStefan Hajnoczi struct iovec iov; 3010ab185921SStefan Hajnoczi QEMUIOVector bounce_qiov; 3011ab185921SStefan Hajnoczi int64_t cluster_sector_num; 3012ab185921SStefan Hajnoczi int cluster_nb_sectors; 3013ab185921SStefan Hajnoczi size_t skip_bytes; 3014ab185921SStefan Hajnoczi int ret; 3015ab185921SStefan Hajnoczi 3016ab185921SStefan Hajnoczi /* Cover entire cluster so no additional backing file I/O is required when 3017ab185921SStefan Hajnoczi * allocating cluster in the image file. 3018ab185921SStefan Hajnoczi */ 3019343bded4SPaolo Bonzini bdrv_round_to_clusters(bs, sector_num, nb_sectors, 3020ab185921SStefan Hajnoczi &cluster_sector_num, &cluster_nb_sectors); 3021ab185921SStefan Hajnoczi 3022470c0504SStefan Hajnoczi trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, 3023ab185921SStefan Hajnoczi cluster_sector_num, cluster_nb_sectors); 3024ab185921SStefan Hajnoczi 3025ab185921SStefan Hajnoczi iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE; 3026857d4f46SKevin Wolf iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); 3027857d4f46SKevin Wolf if (bounce_buffer == NULL) { 3028857d4f46SKevin Wolf ret = -ENOMEM; 3029857d4f46SKevin Wolf goto err; 3030857d4f46SKevin Wolf } 3031857d4f46SKevin Wolf 3032ab185921SStefan Hajnoczi qemu_iovec_init_external(&bounce_qiov, &iov, 1); 3033ab185921SStefan Hajnoczi 303479c053bdSStefan Hajnoczi ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors, 3035ab185921SStefan Hajnoczi &bounce_qiov); 3036ab185921SStefan Hajnoczi if (ret < 0) { 3037ab185921SStefan Hajnoczi goto err; 3038ab185921SStefan Hajnoczi } 3039ab185921SStefan Hajnoczi 304079c053bdSStefan Hajnoczi if (drv->bdrv_co_write_zeroes && 304179c053bdSStefan Hajnoczi buffer_is_zero(bounce_buffer, iov.iov_len)) { 3042621f0589SKevin Wolf ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num, 3043aa7bfbffSPeter Lieven cluster_nb_sectors, 0); 304479c053bdSStefan Hajnoczi } else { 3045f05fa4adSPaolo Bonzini /* This does not change the data on the disk, it is not necessary 3046f05fa4adSPaolo Bonzini * to flush even in cache=writethrough mode. 3047f05fa4adSPaolo Bonzini */ 304879c053bdSStefan Hajnoczi ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors, 3049ab185921SStefan Hajnoczi &bounce_qiov); 305079c053bdSStefan Hajnoczi } 305179c053bdSStefan Hajnoczi 3052ab185921SStefan Hajnoczi if (ret < 0) { 3053ab185921SStefan Hajnoczi /* It might be okay to ignore write errors for guest requests. If this 3054ab185921SStefan Hajnoczi * is a deliberate copy-on-read then we don't want to ignore the error. 3055ab185921SStefan Hajnoczi * Simply report it in all cases. 3056ab185921SStefan Hajnoczi */ 3057ab185921SStefan Hajnoczi goto err; 3058ab185921SStefan Hajnoczi } 3059ab185921SStefan Hajnoczi 3060ab185921SStefan Hajnoczi skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; 306103396148SMichael Tokarev qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, 3062ab185921SStefan Hajnoczi nb_sectors * BDRV_SECTOR_SIZE); 3063ab185921SStefan Hajnoczi 3064ab185921SStefan Hajnoczi err: 3065ab185921SStefan Hajnoczi qemu_vfree(bounce_buffer); 3066ab185921SStefan Hajnoczi return ret; 3067ab185921SStefan Hajnoczi } 3068ab185921SStefan Hajnoczi 3069c5fbe571SStefan Hajnoczi /* 3070d0c7f642SKevin Wolf * Forwards an already correctly aligned request to the BlockDriver. This 3071d0c7f642SKevin Wolf * handles copy on read and zeroing after EOF; any other features must be 3072d0c7f642SKevin Wolf * implemented by the caller. 3073c5fbe571SStefan Hajnoczi */ 3074d0c7f642SKevin Wolf static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, 307565afd211SKevin Wolf BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, 3076ec746e10SKevin Wolf int64_t align, QEMUIOVector *qiov, int flags) 3077da1fa91dSKevin Wolf { 3078da1fa91dSKevin Wolf BlockDriver *drv = bs->drv; 3079dbffbdcfSStefan Hajnoczi int ret; 3080da1fa91dSKevin Wolf 3081d0c7f642SKevin Wolf int64_t sector_num = offset >> BDRV_SECTOR_BITS; 3082d0c7f642SKevin Wolf unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; 3083da1fa91dSKevin Wolf 3084d0c7f642SKevin Wolf assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 3085d0c7f642SKevin Wolf assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 30868eb029c2SKevin Wolf assert(!qiov || bytes == qiov->size); 3087d0c7f642SKevin Wolf 3088d0c7f642SKevin Wolf /* Handle Copy on Read and associated serialisation */ 3089470c0504SStefan Hajnoczi if (flags & BDRV_REQ_COPY_ON_READ) { 30907327145fSKevin Wolf /* If we touch the same cluster it counts as an overlap. This 30917327145fSKevin Wolf * guarantees that allocating writes will be serialized and not race 30927327145fSKevin Wolf * with each other for the same cluster. For example, in copy-on-read 30937327145fSKevin Wolf * it ensures that the CoR read and write operations are atomic and 30947327145fSKevin Wolf * guest writes cannot interleave between them. */ 30957327145fSKevin Wolf mark_request_serialising(req, bdrv_get_cluster_size(bs)); 3096470c0504SStefan Hajnoczi } 3097470c0504SStefan Hajnoczi 30982dbafdc0SKevin Wolf wait_serialising_requests(req); 3099f4658285SStefan Hajnoczi 3100470c0504SStefan Hajnoczi if (flags & BDRV_REQ_COPY_ON_READ) { 3101ab185921SStefan Hajnoczi int pnum; 3102ab185921SStefan Hajnoczi 3103bdad13b9SPaolo Bonzini ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum); 3104ab185921SStefan Hajnoczi if (ret < 0) { 3105ab185921SStefan Hajnoczi goto out; 3106ab185921SStefan Hajnoczi } 3107ab185921SStefan Hajnoczi 3108ab185921SStefan Hajnoczi if (!ret || pnum != nb_sectors) { 3109470c0504SStefan Hajnoczi ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov); 3110ab185921SStefan Hajnoczi goto out; 3111ab185921SStefan Hajnoczi } 3112ab185921SStefan Hajnoczi } 3113ab185921SStefan Hajnoczi 3114d0c7f642SKevin Wolf /* Forward the request to the BlockDriver */ 3115c0191e76SMax Reitz if (!bs->zero_beyond_eof) { 3116dbffbdcfSStefan Hajnoczi ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 3117893a8f62SMORITA Kazutaka } else { 3118c0191e76SMax Reitz /* Read zeros after EOF */ 31194049082cSMarkus Armbruster int64_t total_sectors, max_nb_sectors; 3120893a8f62SMORITA Kazutaka 31214049082cSMarkus Armbruster total_sectors = bdrv_nb_sectors(bs); 31224049082cSMarkus Armbruster if (total_sectors < 0) { 31234049082cSMarkus Armbruster ret = total_sectors; 3124893a8f62SMORITA Kazutaka goto out; 3125893a8f62SMORITA Kazutaka } 3126893a8f62SMORITA Kazutaka 31275f5bcd80SKevin Wolf max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), 31285f5bcd80SKevin Wolf align >> BDRV_SECTOR_BITS); 3129e012b78cSPaolo Bonzini if (nb_sectors < max_nb_sectors) { 3130e012b78cSPaolo Bonzini ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 3131e012b78cSPaolo Bonzini } else if (max_nb_sectors > 0) { 313233f461e0SKevin Wolf QEMUIOVector local_qiov; 313333f461e0SKevin Wolf 313433f461e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov); 313533f461e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, 3136e012b78cSPaolo Bonzini max_nb_sectors * BDRV_SECTOR_SIZE); 313733f461e0SKevin Wolf 3138e012b78cSPaolo Bonzini ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors, 313933f461e0SKevin Wolf &local_qiov); 314033f461e0SKevin Wolf 314133f461e0SKevin Wolf qemu_iovec_destroy(&local_qiov); 3142893a8f62SMORITA Kazutaka } else { 3143893a8f62SMORITA Kazutaka ret = 0; 3144893a8f62SMORITA Kazutaka } 3145893a8f62SMORITA Kazutaka 3146893a8f62SMORITA Kazutaka /* Reading beyond end of file is supposed to produce zeroes */ 3147893a8f62SMORITA Kazutaka if (ret == 0 && total_sectors < sector_num + nb_sectors) { 3148893a8f62SMORITA Kazutaka uint64_t offset = MAX(0, total_sectors - sector_num); 3149893a8f62SMORITA Kazutaka uint64_t bytes = (sector_num + nb_sectors - offset) * 3150893a8f62SMORITA Kazutaka BDRV_SECTOR_SIZE; 3151893a8f62SMORITA Kazutaka qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes); 3152893a8f62SMORITA Kazutaka } 3153893a8f62SMORITA Kazutaka } 3154ab185921SStefan Hajnoczi 3155ab185921SStefan Hajnoczi out: 3156dbffbdcfSStefan Hajnoczi return ret; 3157da1fa91dSKevin Wolf } 3158da1fa91dSKevin Wolf 3159fc3959e4SFam Zheng static inline uint64_t bdrv_get_align(BlockDriverState *bs) 3160fc3959e4SFam Zheng { 3161fc3959e4SFam Zheng /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ 3162fc3959e4SFam Zheng return MAX(BDRV_SECTOR_SIZE, bs->request_alignment); 3163fc3959e4SFam Zheng } 3164fc3959e4SFam Zheng 3165fc3959e4SFam Zheng static inline bool bdrv_req_is_aligned(BlockDriverState *bs, 3166fc3959e4SFam Zheng int64_t offset, size_t bytes) 3167fc3959e4SFam Zheng { 3168fc3959e4SFam Zheng int64_t align = bdrv_get_align(bs); 3169fc3959e4SFam Zheng return !(offset & (align - 1) || (bytes & (align - 1))); 3170fc3959e4SFam Zheng } 3171fc3959e4SFam Zheng 3172d0c7f642SKevin Wolf /* 3173d0c7f642SKevin Wolf * Handle a read request in coroutine context 3174d0c7f642SKevin Wolf */ 31751b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, 31761b0288aeSKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 3177d0c7f642SKevin Wolf BdrvRequestFlags flags) 3178d0c7f642SKevin Wolf { 3179d0c7f642SKevin Wolf BlockDriver *drv = bs->drv; 318065afd211SKevin Wolf BdrvTrackedRequest req; 318165afd211SKevin Wolf 3182fc3959e4SFam Zheng uint64_t align = bdrv_get_align(bs); 31831b0288aeSKevin Wolf uint8_t *head_buf = NULL; 31841b0288aeSKevin Wolf uint8_t *tail_buf = NULL; 31851b0288aeSKevin Wolf QEMUIOVector local_qiov; 31861b0288aeSKevin Wolf bool use_local_qiov = false; 3187d0c7f642SKevin Wolf int ret; 3188d0c7f642SKevin Wolf 3189d0c7f642SKevin Wolf if (!drv) { 3190d0c7f642SKevin Wolf return -ENOMEDIUM; 3191d0c7f642SKevin Wolf } 3192b9c64947SMax Reitz 3193b9c64947SMax Reitz ret = bdrv_check_byte_request(bs, offset, bytes); 3194b9c64947SMax Reitz if (ret < 0) { 3195b9c64947SMax Reitz return ret; 3196d0c7f642SKevin Wolf } 3197d0c7f642SKevin Wolf 3198d0c7f642SKevin Wolf if (bs->copy_on_read) { 3199d0c7f642SKevin Wolf flags |= BDRV_REQ_COPY_ON_READ; 3200d0c7f642SKevin Wolf } 3201d0c7f642SKevin Wolf 3202d0c7f642SKevin Wolf /* throttling disk I/O */ 3203d0c7f642SKevin Wolf if (bs->io_limits_enabled) { 3204d5103588SKevin Wolf bdrv_io_limits_intercept(bs, bytes, false); 3205d0c7f642SKevin Wolf } 3206d0c7f642SKevin Wolf 32071b0288aeSKevin Wolf /* Align read if necessary by padding qiov */ 32081b0288aeSKevin Wolf if (offset & (align - 1)) { 32091b0288aeSKevin Wolf head_buf = qemu_blockalign(bs, align); 32101b0288aeSKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 2); 32111b0288aeSKevin Wolf qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); 32121b0288aeSKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 32131b0288aeSKevin Wolf use_local_qiov = true; 32141b0288aeSKevin Wolf 32151b0288aeSKevin Wolf bytes += offset & (align - 1); 32161b0288aeSKevin Wolf offset = offset & ~(align - 1); 32171b0288aeSKevin Wolf } 32181b0288aeSKevin Wolf 32191b0288aeSKevin Wolf if ((offset + bytes) & (align - 1)) { 32201b0288aeSKevin Wolf if (!use_local_qiov) { 32211b0288aeSKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 1); 32221b0288aeSKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 32231b0288aeSKevin Wolf use_local_qiov = true; 32241b0288aeSKevin Wolf } 32251b0288aeSKevin Wolf tail_buf = qemu_blockalign(bs, align); 32261b0288aeSKevin Wolf qemu_iovec_add(&local_qiov, tail_buf, 32271b0288aeSKevin Wolf align - ((offset + bytes) & (align - 1))); 32281b0288aeSKevin Wolf 32291b0288aeSKevin Wolf bytes = ROUND_UP(bytes, align); 32301b0288aeSKevin Wolf } 32311b0288aeSKevin Wolf 323265afd211SKevin Wolf tracked_request_begin(&req, bs, offset, bytes, false); 3233ec746e10SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, 32341b0288aeSKevin Wolf use_local_qiov ? &local_qiov : qiov, 32351b0288aeSKevin Wolf flags); 323665afd211SKevin Wolf tracked_request_end(&req); 32371b0288aeSKevin Wolf 32381b0288aeSKevin Wolf if (use_local_qiov) { 32391b0288aeSKevin Wolf qemu_iovec_destroy(&local_qiov); 32401b0288aeSKevin Wolf qemu_vfree(head_buf); 32411b0288aeSKevin Wolf qemu_vfree(tail_buf); 32421b0288aeSKevin Wolf } 32431b0288aeSKevin Wolf 3244d0c7f642SKevin Wolf return ret; 3245d0c7f642SKevin Wolf } 3246d0c7f642SKevin Wolf 32471b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, 32481b0288aeSKevin Wolf int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 32491b0288aeSKevin Wolf BdrvRequestFlags flags) 32501b0288aeSKevin Wolf { 325175af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 32521b0288aeSKevin Wolf return -EINVAL; 32531b0288aeSKevin Wolf } 32541b0288aeSKevin Wolf 32551b0288aeSKevin Wolf return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS, 32561b0288aeSKevin Wolf nb_sectors << BDRV_SECTOR_BITS, qiov, flags); 32571b0288aeSKevin Wolf } 32581b0288aeSKevin Wolf 3259c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, 3260da1fa91dSKevin Wolf int nb_sectors, QEMUIOVector *qiov) 3261da1fa91dSKevin Wolf { 3262c5fbe571SStefan Hajnoczi trace_bdrv_co_readv(bs, sector_num, nb_sectors); 3263da1fa91dSKevin Wolf 3264470c0504SStefan Hajnoczi return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); 3265470c0504SStefan Hajnoczi } 3266470c0504SStefan Hajnoczi 3267470c0504SStefan Hajnoczi int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, 3268470c0504SStefan Hajnoczi int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 3269470c0504SStefan Hajnoczi { 3270470c0504SStefan Hajnoczi trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors); 3271470c0504SStefan Hajnoczi 3272470c0504SStefan Hajnoczi return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 3273470c0504SStefan Hajnoczi BDRV_REQ_COPY_ON_READ); 3274c5fbe571SStefan Hajnoczi } 3275c5fbe571SStefan Hajnoczi 327698764152SPeter Lieven #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768 3277c31cb707SPeter Lieven 3278f08f2ddaSStefan Hajnoczi static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, 3279aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) 3280f08f2ddaSStefan Hajnoczi { 3281f08f2ddaSStefan Hajnoczi BlockDriver *drv = bs->drv; 3282f08f2ddaSStefan Hajnoczi QEMUIOVector qiov; 3283c31cb707SPeter Lieven struct iovec iov = {0}; 3284c31cb707SPeter Lieven int ret = 0; 3285f08f2ddaSStefan Hajnoczi 328675af1f34SPeter Lieven int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes, 328775af1f34SPeter Lieven BDRV_REQUEST_MAX_SECTORS); 3288621f0589SKevin Wolf 3289c31cb707SPeter Lieven while (nb_sectors > 0 && !ret) { 3290c31cb707SPeter Lieven int num = nb_sectors; 3291c31cb707SPeter Lieven 3292b8d71c09SPaolo Bonzini /* Align request. Block drivers can expect the "bulk" of the request 3293b8d71c09SPaolo Bonzini * to be aligned. 3294b8d71c09SPaolo Bonzini */ 3295b8d71c09SPaolo Bonzini if (bs->bl.write_zeroes_alignment 3296b8d71c09SPaolo Bonzini && num > bs->bl.write_zeroes_alignment) { 3297b8d71c09SPaolo Bonzini if (sector_num % bs->bl.write_zeroes_alignment != 0) { 3298b8d71c09SPaolo Bonzini /* Make a small request up to the first aligned sector. */ 3299c31cb707SPeter Lieven num = bs->bl.write_zeroes_alignment; 3300c31cb707SPeter Lieven num -= sector_num % bs->bl.write_zeroes_alignment; 3301b8d71c09SPaolo Bonzini } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) { 3302b8d71c09SPaolo Bonzini /* Shorten the request to the last aligned sector. num cannot 3303b8d71c09SPaolo Bonzini * underflow because num > bs->bl.write_zeroes_alignment. 3304b8d71c09SPaolo Bonzini */ 3305b8d71c09SPaolo Bonzini num -= (sector_num + num) % bs->bl.write_zeroes_alignment; 3306b8d71c09SPaolo Bonzini } 3307c31cb707SPeter Lieven } 3308c31cb707SPeter Lieven 3309c31cb707SPeter Lieven /* limit request size */ 3310c31cb707SPeter Lieven if (num > max_write_zeroes) { 3311c31cb707SPeter Lieven num = max_write_zeroes; 3312c31cb707SPeter Lieven } 3313c31cb707SPeter Lieven 3314c31cb707SPeter Lieven ret = -ENOTSUP; 3315f08f2ddaSStefan Hajnoczi /* First try the efficient write zeroes operation */ 3316f08f2ddaSStefan Hajnoczi if (drv->bdrv_co_write_zeroes) { 3317c31cb707SPeter Lieven ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags); 3318f08f2ddaSStefan Hajnoczi } 3319f08f2ddaSStefan Hajnoczi 3320c31cb707SPeter Lieven if (ret == -ENOTSUP) { 3321f08f2ddaSStefan Hajnoczi /* Fall back to bounce buffer if write zeroes is unsupported */ 3322095e4fa4SPeter Lieven int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length, 332398764152SPeter Lieven MAX_WRITE_ZEROES_BOUNCE_BUFFER); 3324095e4fa4SPeter Lieven num = MIN(num, max_xfer_len); 3325c31cb707SPeter Lieven iov.iov_len = num * BDRV_SECTOR_SIZE; 3326c31cb707SPeter Lieven if (iov.iov_base == NULL) { 3327857d4f46SKevin Wolf iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE); 3328857d4f46SKevin Wolf if (iov.iov_base == NULL) { 3329857d4f46SKevin Wolf ret = -ENOMEM; 3330857d4f46SKevin Wolf goto fail; 3331857d4f46SKevin Wolf } 3332b8d71c09SPaolo Bonzini memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE); 3333c31cb707SPeter Lieven } 3334f08f2ddaSStefan Hajnoczi qemu_iovec_init_external(&qiov, &iov, 1); 3335f08f2ddaSStefan Hajnoczi 3336c31cb707SPeter Lieven ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov); 3337b8d71c09SPaolo Bonzini 3338b8d71c09SPaolo Bonzini /* Keep bounce buffer around if it is big enough for all 3339b8d71c09SPaolo Bonzini * all future requests. 3340b8d71c09SPaolo Bonzini */ 3341095e4fa4SPeter Lieven if (num < max_xfer_len) { 3342b8d71c09SPaolo Bonzini qemu_vfree(iov.iov_base); 3343b8d71c09SPaolo Bonzini iov.iov_base = NULL; 3344b8d71c09SPaolo Bonzini } 3345c31cb707SPeter Lieven } 3346c31cb707SPeter Lieven 3347c31cb707SPeter Lieven sector_num += num; 3348c31cb707SPeter Lieven nb_sectors -= num; 3349c31cb707SPeter Lieven } 3350f08f2ddaSStefan Hajnoczi 3351857d4f46SKevin Wolf fail: 3352f08f2ddaSStefan Hajnoczi qemu_vfree(iov.iov_base); 3353f08f2ddaSStefan Hajnoczi return ret; 3354f08f2ddaSStefan Hajnoczi } 3355f08f2ddaSStefan Hajnoczi 3356c5fbe571SStefan Hajnoczi /* 3357b404f720SKevin Wolf * Forwards an already correctly aligned write request to the BlockDriver. 3358c5fbe571SStefan Hajnoczi */ 3359b404f720SKevin Wolf static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, 336065afd211SKevin Wolf BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, 336165afd211SKevin Wolf QEMUIOVector *qiov, int flags) 3362c5fbe571SStefan Hajnoczi { 3363c5fbe571SStefan Hajnoczi BlockDriver *drv = bs->drv; 336428de2dcdSKevin Wolf bool waited; 33656b7cb247SStefan Hajnoczi int ret; 3366da1fa91dSKevin Wolf 3367b404f720SKevin Wolf int64_t sector_num = offset >> BDRV_SECTOR_BITS; 3368b404f720SKevin Wolf unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; 3369da1fa91dSKevin Wolf 3370b404f720SKevin Wolf assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 3371b404f720SKevin Wolf assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 33728eb029c2SKevin Wolf assert(!qiov || bytes == qiov->size); 3373cc0681c4SBenoît Canet 337428de2dcdSKevin Wolf waited = wait_serialising_requests(req); 337528de2dcdSKevin Wolf assert(!waited || !req->serialising); 3376af91f9a7SKevin Wolf assert(req->overlap_offset <= offset); 3377af91f9a7SKevin Wolf assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); 3378244eadefSKevin Wolf 337965afd211SKevin Wolf ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); 3380d616b224SStefan Hajnoczi 3381465bee1dSPeter Lieven if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && 3382465bee1dSPeter Lieven !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes && 3383465bee1dSPeter Lieven qemu_iovec_is_zero(qiov)) { 3384465bee1dSPeter Lieven flags |= BDRV_REQ_ZERO_WRITE; 3385465bee1dSPeter Lieven if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { 3386465bee1dSPeter Lieven flags |= BDRV_REQ_MAY_UNMAP; 3387465bee1dSPeter Lieven } 3388465bee1dSPeter Lieven } 3389465bee1dSPeter Lieven 3390d616b224SStefan Hajnoczi if (ret < 0) { 3391d616b224SStefan Hajnoczi /* Do nothing, write notifier decided to fail this request */ 3392d616b224SStefan Hajnoczi } else if (flags & BDRV_REQ_ZERO_WRITE) { 33939e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO); 3394aa7bfbffSPeter Lieven ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); 3395f08f2ddaSStefan Hajnoczi } else { 33969e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV); 33976b7cb247SStefan Hajnoczi ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); 3398f08f2ddaSStefan Hajnoczi } 33999e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE); 34006b7cb247SStefan Hajnoczi 3401f05fa4adSPaolo Bonzini if (ret == 0 && !bs->enable_write_cache) { 3402f05fa4adSPaolo Bonzini ret = bdrv_co_flush(bs); 3403f05fa4adSPaolo Bonzini } 3404f05fa4adSPaolo Bonzini 34051755da16SPaolo Bonzini bdrv_set_dirty(bs, sector_num, nb_sectors); 3406da1fa91dSKevin Wolf 34075366d0c8SBenoît Canet block_acct_highest_sector(&bs->stats, sector_num, nb_sectors); 34085e5a94b6SBenoît Canet 3409c0191e76SMax Reitz if (ret >= 0) { 3410df2a6f29SPaolo Bonzini bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); 3411df2a6f29SPaolo Bonzini } 3412da1fa91dSKevin Wolf 34136b7cb247SStefan Hajnoczi return ret; 3414da1fa91dSKevin Wolf } 3415da1fa91dSKevin Wolf 3416b404f720SKevin Wolf /* 3417b404f720SKevin Wolf * Handle a write request in coroutine context 3418b404f720SKevin Wolf */ 34196601553eSKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, 34206601553eSKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 3421b404f720SKevin Wolf BdrvRequestFlags flags) 3422b404f720SKevin Wolf { 342365afd211SKevin Wolf BdrvTrackedRequest req; 3424fc3959e4SFam Zheng uint64_t align = bdrv_get_align(bs); 34253b8242e0SKevin Wolf uint8_t *head_buf = NULL; 34263b8242e0SKevin Wolf uint8_t *tail_buf = NULL; 34273b8242e0SKevin Wolf QEMUIOVector local_qiov; 34283b8242e0SKevin Wolf bool use_local_qiov = false; 3429b404f720SKevin Wolf int ret; 3430b404f720SKevin Wolf 3431b404f720SKevin Wolf if (!bs->drv) { 3432b404f720SKevin Wolf return -ENOMEDIUM; 3433b404f720SKevin Wolf } 3434b404f720SKevin Wolf if (bs->read_only) { 3435b404f720SKevin Wolf return -EACCES; 3436b404f720SKevin Wolf } 3437b9c64947SMax Reitz 3438b9c64947SMax Reitz ret = bdrv_check_byte_request(bs, offset, bytes); 3439b9c64947SMax Reitz if (ret < 0) { 3440b9c64947SMax Reitz return ret; 3441b404f720SKevin Wolf } 3442b404f720SKevin Wolf 3443b404f720SKevin Wolf /* throttling disk I/O */ 3444b404f720SKevin Wolf if (bs->io_limits_enabled) { 3445d5103588SKevin Wolf bdrv_io_limits_intercept(bs, bytes, true); 3446b404f720SKevin Wolf } 3447b404f720SKevin Wolf 34483b8242e0SKevin Wolf /* 34493b8242e0SKevin Wolf * Align write if necessary by performing a read-modify-write cycle. 34503b8242e0SKevin Wolf * Pad qiov with the read parts and be sure to have a tracked request not 34513b8242e0SKevin Wolf * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle. 34523b8242e0SKevin Wolf */ 345365afd211SKevin Wolf tracked_request_begin(&req, bs, offset, bytes, true); 34543b8242e0SKevin Wolf 34553b8242e0SKevin Wolf if (offset & (align - 1)) { 34563b8242e0SKevin Wolf QEMUIOVector head_qiov; 34573b8242e0SKevin Wolf struct iovec head_iov; 34583b8242e0SKevin Wolf 34593b8242e0SKevin Wolf mark_request_serialising(&req, align); 34603b8242e0SKevin Wolf wait_serialising_requests(&req); 34613b8242e0SKevin Wolf 34623b8242e0SKevin Wolf head_buf = qemu_blockalign(bs, align); 34633b8242e0SKevin Wolf head_iov = (struct iovec) { 34643b8242e0SKevin Wolf .iov_base = head_buf, 34653b8242e0SKevin Wolf .iov_len = align, 34663b8242e0SKevin Wolf }; 34673b8242e0SKevin Wolf qemu_iovec_init_external(&head_qiov, &head_iov, 1); 34683b8242e0SKevin Wolf 34699e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); 34703b8242e0SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, 34713b8242e0SKevin Wolf align, &head_qiov, 0); 34723b8242e0SKevin Wolf if (ret < 0) { 34733b8242e0SKevin Wolf goto fail; 34743b8242e0SKevin Wolf } 34759e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); 34763b8242e0SKevin Wolf 34773b8242e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 2); 34783b8242e0SKevin Wolf qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); 34793b8242e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 34803b8242e0SKevin Wolf use_local_qiov = true; 34813b8242e0SKevin Wolf 34823b8242e0SKevin Wolf bytes += offset & (align - 1); 34833b8242e0SKevin Wolf offset = offset & ~(align - 1); 34843b8242e0SKevin Wolf } 34853b8242e0SKevin Wolf 34863b8242e0SKevin Wolf if ((offset + bytes) & (align - 1)) { 34873b8242e0SKevin Wolf QEMUIOVector tail_qiov; 34883b8242e0SKevin Wolf struct iovec tail_iov; 34893b8242e0SKevin Wolf size_t tail_bytes; 349028de2dcdSKevin Wolf bool waited; 34913b8242e0SKevin Wolf 34923b8242e0SKevin Wolf mark_request_serialising(&req, align); 349328de2dcdSKevin Wolf waited = wait_serialising_requests(&req); 349428de2dcdSKevin Wolf assert(!waited || !use_local_qiov); 34953b8242e0SKevin Wolf 34963b8242e0SKevin Wolf tail_buf = qemu_blockalign(bs, align); 34973b8242e0SKevin Wolf tail_iov = (struct iovec) { 34983b8242e0SKevin Wolf .iov_base = tail_buf, 34993b8242e0SKevin Wolf .iov_len = align, 35003b8242e0SKevin Wolf }; 35013b8242e0SKevin Wolf qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); 35023b8242e0SKevin Wolf 35039e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); 35043b8242e0SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, 35053b8242e0SKevin Wolf align, &tail_qiov, 0); 35063b8242e0SKevin Wolf if (ret < 0) { 35073b8242e0SKevin Wolf goto fail; 35083b8242e0SKevin Wolf } 35099e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); 35103b8242e0SKevin Wolf 35113b8242e0SKevin Wolf if (!use_local_qiov) { 35123b8242e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 1); 35133b8242e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 35143b8242e0SKevin Wolf use_local_qiov = true; 35153b8242e0SKevin Wolf } 35163b8242e0SKevin Wolf 35173b8242e0SKevin Wolf tail_bytes = (offset + bytes) & (align - 1); 35183b8242e0SKevin Wolf qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); 35193b8242e0SKevin Wolf 35203b8242e0SKevin Wolf bytes = ROUND_UP(bytes, align); 35213b8242e0SKevin Wolf } 35223b8242e0SKevin Wolf 3523fc3959e4SFam Zheng if (use_local_qiov) { 3524fc3959e4SFam Zheng /* Local buffer may have non-zero data. */ 3525fc3959e4SFam Zheng flags &= ~BDRV_REQ_ZERO_WRITE; 3526fc3959e4SFam Zheng } 35273b8242e0SKevin Wolf ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, 35283b8242e0SKevin Wolf use_local_qiov ? &local_qiov : qiov, 35293b8242e0SKevin Wolf flags); 35303b8242e0SKevin Wolf 35313b8242e0SKevin Wolf fail: 353265afd211SKevin Wolf tracked_request_end(&req); 3533b404f720SKevin Wolf 35343b8242e0SKevin Wolf if (use_local_qiov) { 35353b8242e0SKevin Wolf qemu_iovec_destroy(&local_qiov); 353699c4a85cSKevin Wolf } 35373b8242e0SKevin Wolf qemu_vfree(head_buf); 35383b8242e0SKevin Wolf qemu_vfree(tail_buf); 35393b8242e0SKevin Wolf 3540b404f720SKevin Wolf return ret; 3541b404f720SKevin Wolf } 3542b404f720SKevin Wolf 35436601553eSKevin Wolf static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, 35446601553eSKevin Wolf int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 35456601553eSKevin Wolf BdrvRequestFlags flags) 35466601553eSKevin Wolf { 354775af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 35486601553eSKevin Wolf return -EINVAL; 35496601553eSKevin Wolf } 35506601553eSKevin Wolf 35516601553eSKevin Wolf return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS, 35526601553eSKevin Wolf nb_sectors << BDRV_SECTOR_BITS, qiov, flags); 35536601553eSKevin Wolf } 35546601553eSKevin Wolf 3555c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, 3556c5fbe571SStefan Hajnoczi int nb_sectors, QEMUIOVector *qiov) 3557c5fbe571SStefan Hajnoczi { 3558c5fbe571SStefan Hajnoczi trace_bdrv_co_writev(bs, sector_num, nb_sectors); 3559c5fbe571SStefan Hajnoczi 3560f08f2ddaSStefan Hajnoczi return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0); 3561f08f2ddaSStefan Hajnoczi } 3562f08f2ddaSStefan Hajnoczi 3563f08f2ddaSStefan Hajnoczi int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, 3564aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, 3565aa7bfbffSPeter Lieven BdrvRequestFlags flags) 3566f08f2ddaSStefan Hajnoczi { 3567fc3959e4SFam Zheng int ret; 3568fc3959e4SFam Zheng 356994d6ff21SPaolo Bonzini trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); 3570f08f2ddaSStefan Hajnoczi 3571d32f35cbSPeter Lieven if (!(bs->open_flags & BDRV_O_UNMAP)) { 3572d32f35cbSPeter Lieven flags &= ~BDRV_REQ_MAY_UNMAP; 3573d32f35cbSPeter Lieven } 3574fc3959e4SFam Zheng if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS, 3575fc3959e4SFam Zheng nb_sectors << BDRV_SECTOR_BITS)) { 3576fc3959e4SFam Zheng ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, 3577aa7bfbffSPeter Lieven BDRV_REQ_ZERO_WRITE | flags); 3578fc3959e4SFam Zheng } else { 3579fc3959e4SFam Zheng uint8_t *buf; 3580fc3959e4SFam Zheng QEMUIOVector local_qiov; 3581fc3959e4SFam Zheng size_t bytes = nb_sectors << BDRV_SECTOR_BITS; 3582fc3959e4SFam Zheng 3583fc3959e4SFam Zheng buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes); 3584fc3959e4SFam Zheng memset(buf, 0, bytes); 3585fc3959e4SFam Zheng qemu_iovec_init(&local_qiov, 1); 3586fc3959e4SFam Zheng qemu_iovec_add(&local_qiov, buf, bytes); 3587fc3959e4SFam Zheng 3588fc3959e4SFam Zheng ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov, 3589fc3959e4SFam Zheng BDRV_REQ_ZERO_WRITE | flags); 3590fc3959e4SFam Zheng qemu_vfree(buf); 3591fc3959e4SFam Zheng } 3592fc3959e4SFam Zheng return ret; 3593c5fbe571SStefan Hajnoczi } 3594c5fbe571SStefan Hajnoczi 359583f64091Sbellard /** 359683f64091Sbellard * Truncate file to 'offset' bytes (needed only for file protocols) 359783f64091Sbellard */ 359883f64091Sbellard int bdrv_truncate(BlockDriverState *bs, int64_t offset) 359983f64091Sbellard { 360083f64091Sbellard BlockDriver *drv = bs->drv; 360151762288SStefan Hajnoczi int ret; 360283f64091Sbellard if (!drv) 360319cb3738Sbellard return -ENOMEDIUM; 360483f64091Sbellard if (!drv->bdrv_truncate) 360583f64091Sbellard return -ENOTSUP; 360659f2689dSNaphtali Sprei if (bs->read_only) 360759f2689dSNaphtali Sprei return -EACCES; 36089c75e168SJeff Cody 360951762288SStefan Hajnoczi ret = drv->bdrv_truncate(bs, offset); 361051762288SStefan Hajnoczi if (ret == 0) { 361151762288SStefan Hajnoczi ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 3612a7f53e26SMarkus Armbruster if (bs->blk) { 3613a7f53e26SMarkus Armbruster blk_dev_resize_cb(bs->blk); 3614a7f53e26SMarkus Armbruster } 361551762288SStefan Hajnoczi } 361651762288SStefan Hajnoczi return ret; 361783f64091Sbellard } 361883f64091Sbellard 361983f64091Sbellard /** 36204a1d5e1fSFam Zheng * Length of a allocated file in bytes. Sparse files are counted by actual 36214a1d5e1fSFam Zheng * allocated space. Return < 0 if error or unknown. 36224a1d5e1fSFam Zheng */ 36234a1d5e1fSFam Zheng int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 36244a1d5e1fSFam Zheng { 36254a1d5e1fSFam Zheng BlockDriver *drv = bs->drv; 36264a1d5e1fSFam Zheng if (!drv) { 36274a1d5e1fSFam Zheng return -ENOMEDIUM; 36284a1d5e1fSFam Zheng } 36294a1d5e1fSFam Zheng if (drv->bdrv_get_allocated_file_size) { 36304a1d5e1fSFam Zheng return drv->bdrv_get_allocated_file_size(bs); 36314a1d5e1fSFam Zheng } 36324a1d5e1fSFam Zheng if (bs->file) { 36334a1d5e1fSFam Zheng return bdrv_get_allocated_file_size(bs->file); 36344a1d5e1fSFam Zheng } 36354a1d5e1fSFam Zheng return -ENOTSUP; 36364a1d5e1fSFam Zheng } 36374a1d5e1fSFam Zheng 36384a1d5e1fSFam Zheng /** 363965a9bb25SMarkus Armbruster * Return number of sectors on success, -errno on error. 364083f64091Sbellard */ 364165a9bb25SMarkus Armbruster int64_t bdrv_nb_sectors(BlockDriverState *bs) 364283f64091Sbellard { 364383f64091Sbellard BlockDriver *drv = bs->drv; 364465a9bb25SMarkus Armbruster 364583f64091Sbellard if (!drv) 364619cb3738Sbellard return -ENOMEDIUM; 364751762288SStefan Hajnoczi 3648b94a2610SKevin Wolf if (drv->has_variable_length) { 3649b94a2610SKevin Wolf int ret = refresh_total_sectors(bs, bs->total_sectors); 3650b94a2610SKevin Wolf if (ret < 0) { 3651b94a2610SKevin Wolf return ret; 3652fc01f7e7Sbellard } 365346a4e4e6SStefan Hajnoczi } 365465a9bb25SMarkus Armbruster return bs->total_sectors; 365565a9bb25SMarkus Armbruster } 365665a9bb25SMarkus Armbruster 365765a9bb25SMarkus Armbruster /** 365865a9bb25SMarkus Armbruster * Return length in bytes on success, -errno on error. 365965a9bb25SMarkus Armbruster * The length is always a multiple of BDRV_SECTOR_SIZE. 366065a9bb25SMarkus Armbruster */ 366165a9bb25SMarkus Armbruster int64_t bdrv_getlength(BlockDriverState *bs) 366265a9bb25SMarkus Armbruster { 366365a9bb25SMarkus Armbruster int64_t ret = bdrv_nb_sectors(bs); 366465a9bb25SMarkus Armbruster 366565a9bb25SMarkus Armbruster return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; 366646a4e4e6SStefan Hajnoczi } 3667fc01f7e7Sbellard 366819cb3738Sbellard /* return 0 as number of sectors if no device present or error */ 366996b8f136Sths void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 3670fc01f7e7Sbellard { 367165a9bb25SMarkus Armbruster int64_t nb_sectors = bdrv_nb_sectors(bs); 367265a9bb25SMarkus Armbruster 367365a9bb25SMarkus Armbruster *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 3674fc01f7e7Sbellard } 3675cf98951bSbellard 3676ff06f5f3SPaolo Bonzini void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, 3677ff06f5f3SPaolo Bonzini BlockdevOnError on_write_error) 3678abd7f68dSMarkus Armbruster { 3679abd7f68dSMarkus Armbruster bs->on_read_error = on_read_error; 3680abd7f68dSMarkus Armbruster bs->on_write_error = on_write_error; 3681abd7f68dSMarkus Armbruster } 3682abd7f68dSMarkus Armbruster 36831ceee0d5SPaolo Bonzini BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) 3684abd7f68dSMarkus Armbruster { 3685abd7f68dSMarkus Armbruster return is_read ? bs->on_read_error : bs->on_write_error; 3686abd7f68dSMarkus Armbruster } 3687abd7f68dSMarkus Armbruster 36883e1caa5fSPaolo Bonzini BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) 36893e1caa5fSPaolo Bonzini { 36903e1caa5fSPaolo Bonzini BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; 36913e1caa5fSPaolo Bonzini 36923e1caa5fSPaolo Bonzini switch (on_err) { 36933e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_ENOSPC: 3694a589569fSWenchao Xia return (error == ENOSPC) ? 3695a589569fSWenchao Xia BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 36963e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_STOP: 3697a589569fSWenchao Xia return BLOCK_ERROR_ACTION_STOP; 36983e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_REPORT: 3699a589569fSWenchao Xia return BLOCK_ERROR_ACTION_REPORT; 37003e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_IGNORE: 3701a589569fSWenchao Xia return BLOCK_ERROR_ACTION_IGNORE; 37023e1caa5fSPaolo Bonzini default: 37033e1caa5fSPaolo Bonzini abort(); 37043e1caa5fSPaolo Bonzini } 37053e1caa5fSPaolo Bonzini } 37063e1caa5fSPaolo Bonzini 3707c7c2ff0cSLuiz Capitulino static void send_qmp_error_event(BlockDriverState *bs, 3708c7c2ff0cSLuiz Capitulino BlockErrorAction action, 3709c7c2ff0cSLuiz Capitulino bool is_read, int error) 3710c7c2ff0cSLuiz Capitulino { 3711573742a5SPeter Maydell IoOperationType optype; 3712c7c2ff0cSLuiz Capitulino 3713573742a5SPeter Maydell optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; 3714573742a5SPeter Maydell qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action, 3715c7c2ff0cSLuiz Capitulino bdrv_iostatus_is_enabled(bs), 3716624ff573SLuiz Capitulino error == ENOSPC, strerror(error), 3717624ff573SLuiz Capitulino &error_abort); 3718c7c2ff0cSLuiz Capitulino } 3719c7c2ff0cSLuiz Capitulino 37203e1caa5fSPaolo Bonzini /* This is done by device models because, while the block layer knows 37213e1caa5fSPaolo Bonzini * about the error, it does not know whether an operation comes from 37223e1caa5fSPaolo Bonzini * the device or the block layer (from a job, for example). 37233e1caa5fSPaolo Bonzini */ 37243e1caa5fSPaolo Bonzini void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, 37253e1caa5fSPaolo Bonzini bool is_read, int error) 37263e1caa5fSPaolo Bonzini { 37273e1caa5fSPaolo Bonzini assert(error >= 0); 37282bd3bce8SPaolo Bonzini 3729a589569fSWenchao Xia if (action == BLOCK_ERROR_ACTION_STOP) { 37302bd3bce8SPaolo Bonzini /* First set the iostatus, so that "info block" returns an iostatus 37312bd3bce8SPaolo Bonzini * that matches the events raised so far (an additional error iostatus 37322bd3bce8SPaolo Bonzini * is fine, but not a lost one). 37332bd3bce8SPaolo Bonzini */ 37343e1caa5fSPaolo Bonzini bdrv_iostatus_set_err(bs, error); 37352bd3bce8SPaolo Bonzini 37362bd3bce8SPaolo Bonzini /* Then raise the request to stop the VM and the event. 37372bd3bce8SPaolo Bonzini * qemu_system_vmstop_request_prepare has two effects. First, 37382bd3bce8SPaolo Bonzini * it ensures that the STOP event always comes after the 37392bd3bce8SPaolo Bonzini * BLOCK_IO_ERROR event. Second, it ensures that even if management 37402bd3bce8SPaolo Bonzini * can observe the STOP event and do a "cont" before the STOP 37412bd3bce8SPaolo Bonzini * event is issued, the VM will not stop. In this case, vm_start() 37422bd3bce8SPaolo Bonzini * also ensures that the STOP/RESUME pair of events is emitted. 37432bd3bce8SPaolo Bonzini */ 37442bd3bce8SPaolo Bonzini qemu_system_vmstop_request_prepare(); 3745c7c2ff0cSLuiz Capitulino send_qmp_error_event(bs, action, is_read, error); 37462bd3bce8SPaolo Bonzini qemu_system_vmstop_request(RUN_STATE_IO_ERROR); 37472bd3bce8SPaolo Bonzini } else { 3748c7c2ff0cSLuiz Capitulino send_qmp_error_event(bs, action, is_read, error); 37493e1caa5fSPaolo Bonzini } 37503e1caa5fSPaolo Bonzini } 37513e1caa5fSPaolo Bonzini 3752b338082bSbellard int bdrv_is_read_only(BlockDriverState *bs) 3753b338082bSbellard { 3754b338082bSbellard return bs->read_only; 3755b338082bSbellard } 3756b338082bSbellard 3757985a03b0Sths int bdrv_is_sg(BlockDriverState *bs) 3758985a03b0Sths { 3759985a03b0Sths return bs->sg; 3760985a03b0Sths } 3761985a03b0Sths 3762e900a7b7SChristoph Hellwig int bdrv_enable_write_cache(BlockDriverState *bs) 3763e900a7b7SChristoph Hellwig { 3764e900a7b7SChristoph Hellwig return bs->enable_write_cache; 3765e900a7b7SChristoph Hellwig } 3766e900a7b7SChristoph Hellwig 3767425b0148SPaolo Bonzini void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce) 3768425b0148SPaolo Bonzini { 3769425b0148SPaolo Bonzini bs->enable_write_cache = wce; 377055b110f2SJeff Cody 377155b110f2SJeff Cody /* so a reopen() will preserve wce */ 377255b110f2SJeff Cody if (wce) { 377355b110f2SJeff Cody bs->open_flags |= BDRV_O_CACHE_WB; 377455b110f2SJeff Cody } else { 377555b110f2SJeff Cody bs->open_flags &= ~BDRV_O_CACHE_WB; 377655b110f2SJeff Cody } 3777425b0148SPaolo Bonzini } 3778425b0148SPaolo Bonzini 3779ea2384d3Sbellard int bdrv_is_encrypted(BlockDriverState *bs) 3780ea2384d3Sbellard { 3781ea2384d3Sbellard if (bs->backing_hd && bs->backing_hd->encrypted) 3782ea2384d3Sbellard return 1; 3783ea2384d3Sbellard return bs->encrypted; 3784ea2384d3Sbellard } 3785ea2384d3Sbellard 3786c0f4ce77Saliguori int bdrv_key_required(BlockDriverState *bs) 3787c0f4ce77Saliguori { 3788c0f4ce77Saliguori BlockDriverState *backing_hd = bs->backing_hd; 3789c0f4ce77Saliguori 3790c0f4ce77Saliguori if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 3791c0f4ce77Saliguori return 1; 3792c0f4ce77Saliguori return (bs->encrypted && !bs->valid_key); 3793c0f4ce77Saliguori } 3794c0f4ce77Saliguori 3795ea2384d3Sbellard int bdrv_set_key(BlockDriverState *bs, const char *key) 3796ea2384d3Sbellard { 3797ea2384d3Sbellard int ret; 3798ea2384d3Sbellard if (bs->backing_hd && bs->backing_hd->encrypted) { 3799ea2384d3Sbellard ret = bdrv_set_key(bs->backing_hd, key); 3800ea2384d3Sbellard if (ret < 0) 3801ea2384d3Sbellard return ret; 3802ea2384d3Sbellard if (!bs->encrypted) 3803ea2384d3Sbellard return 0; 3804ea2384d3Sbellard } 3805fd04a2aeSShahar Havivi if (!bs->encrypted) { 3806fd04a2aeSShahar Havivi return -EINVAL; 3807fd04a2aeSShahar Havivi } else if (!bs->drv || !bs->drv->bdrv_set_key) { 3808fd04a2aeSShahar Havivi return -ENOMEDIUM; 3809fd04a2aeSShahar Havivi } 3810c0f4ce77Saliguori ret = bs->drv->bdrv_set_key(bs, key); 3811bb5fc20fSaliguori if (ret < 0) { 3812bb5fc20fSaliguori bs->valid_key = 0; 3813bb5fc20fSaliguori } else if (!bs->valid_key) { 3814bb5fc20fSaliguori bs->valid_key = 1; 3815a7f53e26SMarkus Armbruster if (bs->blk) { 3816bb5fc20fSaliguori /* call the change callback now, we skipped it on open */ 3817a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, true); 3818a7f53e26SMarkus Armbruster } 3819bb5fc20fSaliguori } 3820c0f4ce77Saliguori return ret; 3821ea2384d3Sbellard } 3822ea2384d3Sbellard 38234d2855a3SMarkus Armbruster /* 38244d2855a3SMarkus Armbruster * Provide an encryption key for @bs. 38254d2855a3SMarkus Armbruster * If @key is non-null: 38264d2855a3SMarkus Armbruster * If @bs is not encrypted, fail. 38274d2855a3SMarkus Armbruster * Else if the key is invalid, fail. 38284d2855a3SMarkus Armbruster * Else set @bs's key to @key, replacing the existing key, if any. 38294d2855a3SMarkus Armbruster * If @key is null: 38304d2855a3SMarkus Armbruster * If @bs is encrypted and still lacks a key, fail. 38314d2855a3SMarkus Armbruster * Else do nothing. 38324d2855a3SMarkus Armbruster * On failure, store an error object through @errp if non-null. 38334d2855a3SMarkus Armbruster */ 38344d2855a3SMarkus Armbruster void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) 38354d2855a3SMarkus Armbruster { 38364d2855a3SMarkus Armbruster if (key) { 38374d2855a3SMarkus Armbruster if (!bdrv_is_encrypted(bs)) { 383881e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is not encrypted", 383981e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs)); 38404d2855a3SMarkus Armbruster } else if (bdrv_set_key(bs, key) < 0) { 38414d2855a3SMarkus Armbruster error_set(errp, QERR_INVALID_PASSWORD); 38424d2855a3SMarkus Armbruster } 38434d2855a3SMarkus Armbruster } else { 38444d2855a3SMarkus Armbruster if (bdrv_key_required(bs)) { 3845b1ca6391SMarkus Armbruster error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED, 3846b1ca6391SMarkus Armbruster "'%s' (%s) is encrypted", 384781e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs), 38484d2855a3SMarkus Armbruster bdrv_get_encrypted_filename(bs)); 38494d2855a3SMarkus Armbruster } 38504d2855a3SMarkus Armbruster } 38514d2855a3SMarkus Armbruster } 38524d2855a3SMarkus Armbruster 3853f8d6bba1SMarkus Armbruster const char *bdrv_get_format_name(BlockDriverState *bs) 3854ea2384d3Sbellard { 3855f8d6bba1SMarkus Armbruster return bs->drv ? bs->drv->format_name : NULL; 3856ea2384d3Sbellard } 3857ea2384d3Sbellard 3858ada42401SStefan Hajnoczi static int qsort_strcmp(const void *a, const void *b) 3859ada42401SStefan Hajnoczi { 3860ada42401SStefan Hajnoczi return strcmp(a, b); 3861ada42401SStefan Hajnoczi } 3862ada42401SStefan Hajnoczi 3863ea2384d3Sbellard void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 3864ea2384d3Sbellard void *opaque) 3865ea2384d3Sbellard { 3866ea2384d3Sbellard BlockDriver *drv; 3867e855e4fbSJeff Cody int count = 0; 3868ada42401SStefan Hajnoczi int i; 3869e855e4fbSJeff Cody const char **formats = NULL; 3870ea2384d3Sbellard 38718a22f02aSStefan Hajnoczi QLIST_FOREACH(drv, &bdrv_drivers, list) { 3872e855e4fbSJeff Cody if (drv->format_name) { 3873e855e4fbSJeff Cody bool found = false; 3874e855e4fbSJeff Cody int i = count; 3875e855e4fbSJeff Cody while (formats && i && !found) { 3876e855e4fbSJeff Cody found = !strcmp(formats[--i], drv->format_name); 3877e855e4fbSJeff Cody } 3878e855e4fbSJeff Cody 3879e855e4fbSJeff Cody if (!found) { 38805839e53bSMarkus Armbruster formats = g_renew(const char *, formats, count + 1); 3881e855e4fbSJeff Cody formats[count++] = drv->format_name; 3882ea2384d3Sbellard } 3883ea2384d3Sbellard } 3884e855e4fbSJeff Cody } 3885ada42401SStefan Hajnoczi 3886ada42401SStefan Hajnoczi qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 3887ada42401SStefan Hajnoczi 3888ada42401SStefan Hajnoczi for (i = 0; i < count; i++) { 3889ada42401SStefan Hajnoczi it(opaque, formats[i]); 3890ada42401SStefan Hajnoczi } 3891ada42401SStefan Hajnoczi 3892e855e4fbSJeff Cody g_free(formats); 3893e855e4fbSJeff Cody } 3894ea2384d3Sbellard 3895dc364f4cSBenoît Canet /* This function is to find a node in the bs graph */ 3896dc364f4cSBenoît Canet BlockDriverState *bdrv_find_node(const char *node_name) 3897dc364f4cSBenoît Canet { 3898dc364f4cSBenoît Canet BlockDriverState *bs; 3899dc364f4cSBenoît Canet 3900dc364f4cSBenoît Canet assert(node_name); 3901dc364f4cSBenoît Canet 3902dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 3903dc364f4cSBenoît Canet if (!strcmp(node_name, bs->node_name)) { 3904dc364f4cSBenoît Canet return bs; 3905dc364f4cSBenoît Canet } 3906dc364f4cSBenoît Canet } 3907dc364f4cSBenoît Canet return NULL; 3908dc364f4cSBenoît Canet } 3909dc364f4cSBenoît Canet 3910c13163fbSBenoît Canet /* Put this QMP function here so it can access the static graph_bdrv_states. */ 3911d5a8ee60SAlberto Garcia BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp) 3912c13163fbSBenoît Canet { 3913c13163fbSBenoît Canet BlockDeviceInfoList *list, *entry; 3914c13163fbSBenoît Canet BlockDriverState *bs; 3915c13163fbSBenoît Canet 3916c13163fbSBenoît Canet list = NULL; 3917c13163fbSBenoît Canet QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 3918d5a8ee60SAlberto Garcia BlockDeviceInfo *info = bdrv_block_device_info(bs, errp); 3919d5a8ee60SAlberto Garcia if (!info) { 3920d5a8ee60SAlberto Garcia qapi_free_BlockDeviceInfoList(list); 3921d5a8ee60SAlberto Garcia return NULL; 3922d5a8ee60SAlberto Garcia } 3923c13163fbSBenoît Canet entry = g_malloc0(sizeof(*entry)); 3924d5a8ee60SAlberto Garcia entry->value = info; 3925c13163fbSBenoît Canet entry->next = list; 3926c13163fbSBenoît Canet list = entry; 3927c13163fbSBenoît Canet } 3928c13163fbSBenoît Canet 3929c13163fbSBenoît Canet return list; 3930c13163fbSBenoît Canet } 3931c13163fbSBenoît Canet 393212d3ba82SBenoît Canet BlockDriverState *bdrv_lookup_bs(const char *device, 393312d3ba82SBenoît Canet const char *node_name, 393412d3ba82SBenoît Canet Error **errp) 393512d3ba82SBenoît Canet { 39367f06d47eSMarkus Armbruster BlockBackend *blk; 39377f06d47eSMarkus Armbruster BlockDriverState *bs; 393812d3ba82SBenoît Canet 393912d3ba82SBenoît Canet if (device) { 39407f06d47eSMarkus Armbruster blk = blk_by_name(device); 394112d3ba82SBenoît Canet 39427f06d47eSMarkus Armbruster if (blk) { 39437f06d47eSMarkus Armbruster return blk_bs(blk); 394412d3ba82SBenoît Canet } 3945dd67fa50SBenoît Canet } 394612d3ba82SBenoît Canet 3947dd67fa50SBenoît Canet if (node_name) { 394812d3ba82SBenoît Canet bs = bdrv_find_node(node_name); 394912d3ba82SBenoît Canet 3950dd67fa50SBenoît Canet if (bs) { 3951dd67fa50SBenoît Canet return bs; 3952dd67fa50SBenoît Canet } 395312d3ba82SBenoît Canet } 395412d3ba82SBenoît Canet 3955dd67fa50SBenoît Canet error_setg(errp, "Cannot find device=%s nor node_name=%s", 3956dd67fa50SBenoît Canet device ? device : "", 3957dd67fa50SBenoît Canet node_name ? node_name : ""); 3958dd67fa50SBenoît Canet return NULL; 395912d3ba82SBenoît Canet } 396012d3ba82SBenoît Canet 39615a6684d2SJeff Cody /* If 'base' is in the same chain as 'top', return true. Otherwise, 39625a6684d2SJeff Cody * return false. If either argument is NULL, return false. */ 39635a6684d2SJeff Cody bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 39645a6684d2SJeff Cody { 39655a6684d2SJeff Cody while (top && top != base) { 39665a6684d2SJeff Cody top = top->backing_hd; 39675a6684d2SJeff Cody } 39685a6684d2SJeff Cody 39695a6684d2SJeff Cody return top != NULL; 39705a6684d2SJeff Cody } 39715a6684d2SJeff Cody 397204df765aSFam Zheng BlockDriverState *bdrv_next_node(BlockDriverState *bs) 397304df765aSFam Zheng { 397404df765aSFam Zheng if (!bs) { 397504df765aSFam Zheng return QTAILQ_FIRST(&graph_bdrv_states); 397604df765aSFam Zheng } 397704df765aSFam Zheng return QTAILQ_NEXT(bs, node_list); 397804df765aSFam Zheng } 397904df765aSFam Zheng 39802f399b0aSMarkus Armbruster BlockDriverState *bdrv_next(BlockDriverState *bs) 39812f399b0aSMarkus Armbruster { 39822f399b0aSMarkus Armbruster if (!bs) { 39832f399b0aSMarkus Armbruster return QTAILQ_FIRST(&bdrv_states); 39842f399b0aSMarkus Armbruster } 3985dc364f4cSBenoît Canet return QTAILQ_NEXT(bs, device_list); 39862f399b0aSMarkus Armbruster } 39872f399b0aSMarkus Armbruster 398820a9e77dSFam Zheng const char *bdrv_get_node_name(const BlockDriverState *bs) 398920a9e77dSFam Zheng { 399020a9e77dSFam Zheng return bs->node_name; 399120a9e77dSFam Zheng } 399220a9e77dSFam Zheng 39937f06d47eSMarkus Armbruster /* TODO check what callers really want: bs->node_name or blk_name() */ 3994bfb197e0SMarkus Armbruster const char *bdrv_get_device_name(const BlockDriverState *bs) 3995ea2384d3Sbellard { 3996bfb197e0SMarkus Armbruster return bs->blk ? blk_name(bs->blk) : ""; 3997ea2384d3Sbellard } 3998ea2384d3Sbellard 39999b2aa84fSAlberto Garcia /* This can be used to identify nodes that might not have a device 40009b2aa84fSAlberto Garcia * name associated. Since node and device names live in the same 40019b2aa84fSAlberto Garcia * namespace, the result is unambiguous. The exception is if both are 40029b2aa84fSAlberto Garcia * absent, then this returns an empty (non-null) string. */ 40039b2aa84fSAlberto Garcia const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 40049b2aa84fSAlberto Garcia { 40059b2aa84fSAlberto Garcia return bs->blk ? blk_name(bs->blk) : bs->node_name; 40069b2aa84fSAlberto Garcia } 40079b2aa84fSAlberto Garcia 4008c8433287SMarkus Armbruster int bdrv_get_flags(BlockDriverState *bs) 4009c8433287SMarkus Armbruster { 4010c8433287SMarkus Armbruster return bs->open_flags; 4011c8433287SMarkus Armbruster } 4012c8433287SMarkus Armbruster 4013f0f0fdfeSKevin Wolf int bdrv_flush_all(void) 4014c6ca28d6Saliguori { 4015c6ca28d6Saliguori BlockDriverState *bs; 4016f0f0fdfeSKevin Wolf int result = 0; 4017c6ca28d6Saliguori 4018dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 4019ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 4020ed78cda3SStefan Hajnoczi int ret; 4021ed78cda3SStefan Hajnoczi 4022ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 4023ed78cda3SStefan Hajnoczi ret = bdrv_flush(bs); 4024f0f0fdfeSKevin Wolf if (ret < 0 && !result) { 4025f0f0fdfeSKevin Wolf result = ret; 4026c6ca28d6Saliguori } 4027ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 40281b7bdbc1SStefan Hajnoczi } 4029c6ca28d6Saliguori 4030f0f0fdfeSKevin Wolf return result; 4031f0f0fdfeSKevin Wolf } 4032f0f0fdfeSKevin Wolf 40333ac21627SPeter Lieven int bdrv_has_zero_init_1(BlockDriverState *bs) 40343ac21627SPeter Lieven { 40353ac21627SPeter Lieven return 1; 40363ac21627SPeter Lieven } 40373ac21627SPeter Lieven 4038f2feebbdSKevin Wolf int bdrv_has_zero_init(BlockDriverState *bs) 4039f2feebbdSKevin Wolf { 4040f2feebbdSKevin Wolf assert(bs->drv); 4041f2feebbdSKevin Wolf 404211212d8fSPaolo Bonzini /* If BS is a copy on write image, it is initialized to 404311212d8fSPaolo Bonzini the contents of the base image, which may not be zeroes. */ 404411212d8fSPaolo Bonzini if (bs->backing_hd) { 404511212d8fSPaolo Bonzini return 0; 404611212d8fSPaolo Bonzini } 4047336c1c12SKevin Wolf if (bs->drv->bdrv_has_zero_init) { 4048336c1c12SKevin Wolf return bs->drv->bdrv_has_zero_init(bs); 4049f2feebbdSKevin Wolf } 4050f2feebbdSKevin Wolf 40513ac21627SPeter Lieven /* safe default */ 40523ac21627SPeter Lieven return 0; 4053f2feebbdSKevin Wolf } 4054f2feebbdSKevin Wolf 40554ce78691SPeter Lieven bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) 40564ce78691SPeter Lieven { 40574ce78691SPeter Lieven BlockDriverInfo bdi; 40584ce78691SPeter Lieven 40594ce78691SPeter Lieven if (bs->backing_hd) { 40604ce78691SPeter Lieven return false; 40614ce78691SPeter Lieven } 40624ce78691SPeter Lieven 40634ce78691SPeter Lieven if (bdrv_get_info(bs, &bdi) == 0) { 40644ce78691SPeter Lieven return bdi.unallocated_blocks_are_zero; 40654ce78691SPeter Lieven } 40664ce78691SPeter Lieven 40674ce78691SPeter Lieven return false; 40684ce78691SPeter Lieven } 40694ce78691SPeter Lieven 40704ce78691SPeter Lieven bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 40714ce78691SPeter Lieven { 40724ce78691SPeter Lieven BlockDriverInfo bdi; 40734ce78691SPeter Lieven 40744ce78691SPeter Lieven if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) { 40754ce78691SPeter Lieven return false; 40764ce78691SPeter Lieven } 40774ce78691SPeter Lieven 40784ce78691SPeter Lieven if (bdrv_get_info(bs, &bdi) == 0) { 40794ce78691SPeter Lieven return bdi.can_write_zeroes_with_unmap; 40804ce78691SPeter Lieven } 40814ce78691SPeter Lieven 40824ce78691SPeter Lieven return false; 40834ce78691SPeter Lieven } 40844ce78691SPeter Lieven 4085b6b8a333SPaolo Bonzini typedef struct BdrvCoGetBlockStatusData { 4086376ae3f1SStefan Hajnoczi BlockDriverState *bs; 4087b35b2bbaSMiroslav Rezanina BlockDriverState *base; 4088376ae3f1SStefan Hajnoczi int64_t sector_num; 4089376ae3f1SStefan Hajnoczi int nb_sectors; 4090376ae3f1SStefan Hajnoczi int *pnum; 4091b6b8a333SPaolo Bonzini int64_t ret; 4092376ae3f1SStefan Hajnoczi bool done; 4093b6b8a333SPaolo Bonzini } BdrvCoGetBlockStatusData; 4094376ae3f1SStefan Hajnoczi 4095f58c7b35Sths /* 4096705be728SFam Zheng * Returns the allocation status of the specified sectors. 4097705be728SFam Zheng * Drivers not implementing the functionality are assumed to not support 4098705be728SFam Zheng * backing files, hence all their sectors are reported as allocated. 4099f58c7b35Sths * 4100bd9533e3SStefan Hajnoczi * If 'sector_num' is beyond the end of the disk image the return value is 0 4101bd9533e3SStefan Hajnoczi * and 'pnum' is set to 0. 4102bd9533e3SStefan Hajnoczi * 4103f58c7b35Sths * 'pnum' is set to the number of sectors (including and immediately following 4104f58c7b35Sths * the specified sector) that are known to be in the same 4105f58c7b35Sths * allocated/unallocated state. 4106f58c7b35Sths * 4107bd9533e3SStefan Hajnoczi * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes 4108bd9533e3SStefan Hajnoczi * beyond the end of the disk image it will be clamped. 4109f58c7b35Sths */ 4110b6b8a333SPaolo Bonzini static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, 4111bdad13b9SPaolo Bonzini int64_t sector_num, 4112060f51c9SStefan Hajnoczi int nb_sectors, int *pnum) 4113f58c7b35Sths { 411430a7f2fcSMarkus Armbruster int64_t total_sectors; 4115f58c7b35Sths int64_t n; 41165daa74a6SPaolo Bonzini int64_t ret, ret2; 4117bd9533e3SStefan Hajnoczi 411830a7f2fcSMarkus Armbruster total_sectors = bdrv_nb_sectors(bs); 411930a7f2fcSMarkus Armbruster if (total_sectors < 0) { 412030a7f2fcSMarkus Armbruster return total_sectors; 4121617ccb46SPaolo Bonzini } 4122617ccb46SPaolo Bonzini 412330a7f2fcSMarkus Armbruster if (sector_num >= total_sectors) { 41246aebab14SStefan Hajnoczi *pnum = 0; 41256aebab14SStefan Hajnoczi return 0; 41266aebab14SStefan Hajnoczi } 4127bd9533e3SStefan Hajnoczi 412830a7f2fcSMarkus Armbruster n = total_sectors - sector_num; 4129bd9533e3SStefan Hajnoczi if (n < nb_sectors) { 4130bd9533e3SStefan Hajnoczi nb_sectors = n; 4131bd9533e3SStefan Hajnoczi } 4132bd9533e3SStefan Hajnoczi 4133b6b8a333SPaolo Bonzini if (!bs->drv->bdrv_co_get_block_status) { 4134bd9533e3SStefan Hajnoczi *pnum = nb_sectors; 4135e88ae226SKevin Wolf ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; 4136918e92d7SPaolo Bonzini if (bs->drv->protocol_name) { 4137918e92d7SPaolo Bonzini ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE); 4138918e92d7SPaolo Bonzini } 4139918e92d7SPaolo Bonzini return ret; 41406aebab14SStefan Hajnoczi } 41416aebab14SStefan Hajnoczi 4142415b5b01SPaolo Bonzini ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum); 4143415b5b01SPaolo Bonzini if (ret < 0) { 41443e0a233dSPeter Lieven *pnum = 0; 4145415b5b01SPaolo Bonzini return ret; 4146415b5b01SPaolo Bonzini } 4147415b5b01SPaolo Bonzini 414892bc50a5SPeter Lieven if (ret & BDRV_BLOCK_RAW) { 414992bc50a5SPeter Lieven assert(ret & BDRV_BLOCK_OFFSET_VALID); 415092bc50a5SPeter Lieven return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, 415192bc50a5SPeter Lieven *pnum, pnum); 415292bc50a5SPeter Lieven } 415392bc50a5SPeter Lieven 4154e88ae226SKevin Wolf if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { 4155e88ae226SKevin Wolf ret |= BDRV_BLOCK_ALLOCATED; 4156e88ae226SKevin Wolf } 4157e88ae226SKevin Wolf 4158c3d86884SPeter Lieven if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) { 4159c3d86884SPeter Lieven if (bdrv_unallocated_blocks_are_zero(bs)) { 4160415b5b01SPaolo Bonzini ret |= BDRV_BLOCK_ZERO; 41611f9db224SPeter Lieven } else if (bs->backing_hd) { 4162f0ad5712SPaolo Bonzini BlockDriverState *bs2 = bs->backing_hd; 416330a7f2fcSMarkus Armbruster int64_t nb_sectors2 = bdrv_nb_sectors(bs2); 416430a7f2fcSMarkus Armbruster if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) { 4165f0ad5712SPaolo Bonzini ret |= BDRV_BLOCK_ZERO; 4166f0ad5712SPaolo Bonzini } 4167f0ad5712SPaolo Bonzini } 4168415b5b01SPaolo Bonzini } 41695daa74a6SPaolo Bonzini 41705daa74a6SPaolo Bonzini if (bs->file && 41715daa74a6SPaolo Bonzini (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && 41725daa74a6SPaolo Bonzini (ret & BDRV_BLOCK_OFFSET_VALID)) { 417359c9a95fSMax Reitz int file_pnum; 417459c9a95fSMax Reitz 41755daa74a6SPaolo Bonzini ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, 417659c9a95fSMax Reitz *pnum, &file_pnum); 41775daa74a6SPaolo Bonzini if (ret2 >= 0) { 41785daa74a6SPaolo Bonzini /* Ignore errors. This is just providing extra information, it 41795daa74a6SPaolo Bonzini * is useful but not necessary. 41805daa74a6SPaolo Bonzini */ 418159c9a95fSMax Reitz if (!file_pnum) { 418259c9a95fSMax Reitz /* !file_pnum indicates an offset at or beyond the EOF; it is 418359c9a95fSMax Reitz * perfectly valid for the format block driver to point to such 418459c9a95fSMax Reitz * offsets, so catch it and mark everything as zero */ 418559c9a95fSMax Reitz ret |= BDRV_BLOCK_ZERO; 418659c9a95fSMax Reitz } else { 418759c9a95fSMax Reitz /* Limit request to the range reported by the protocol driver */ 418859c9a95fSMax Reitz *pnum = file_pnum; 41895daa74a6SPaolo Bonzini ret |= (ret2 & BDRV_BLOCK_ZERO); 41905daa74a6SPaolo Bonzini } 41915daa74a6SPaolo Bonzini } 419259c9a95fSMax Reitz } 41935daa74a6SPaolo Bonzini 4194415b5b01SPaolo Bonzini return ret; 4195060f51c9SStefan Hajnoczi } 4196060f51c9SStefan Hajnoczi 4197b6b8a333SPaolo Bonzini /* Coroutine wrapper for bdrv_get_block_status() */ 4198b6b8a333SPaolo Bonzini static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque) 4199060f51c9SStefan Hajnoczi { 4200b6b8a333SPaolo Bonzini BdrvCoGetBlockStatusData *data = opaque; 4201060f51c9SStefan Hajnoczi BlockDriverState *bs = data->bs; 4202060f51c9SStefan Hajnoczi 4203b6b8a333SPaolo Bonzini data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors, 4204060f51c9SStefan Hajnoczi data->pnum); 4205060f51c9SStefan Hajnoczi data->done = true; 4206060f51c9SStefan Hajnoczi } 4207060f51c9SStefan Hajnoczi 4208060f51c9SStefan Hajnoczi /* 4209b6b8a333SPaolo Bonzini * Synchronous wrapper around bdrv_co_get_block_status(). 4210060f51c9SStefan Hajnoczi * 4211b6b8a333SPaolo Bonzini * See bdrv_co_get_block_status() for details. 4212060f51c9SStefan Hajnoczi */ 4213b6b8a333SPaolo Bonzini int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, 4214b6b8a333SPaolo Bonzini int nb_sectors, int *pnum) 4215060f51c9SStefan Hajnoczi { 4216376ae3f1SStefan Hajnoczi Coroutine *co; 4217b6b8a333SPaolo Bonzini BdrvCoGetBlockStatusData data = { 4218376ae3f1SStefan Hajnoczi .bs = bs, 4219376ae3f1SStefan Hajnoczi .sector_num = sector_num, 4220376ae3f1SStefan Hajnoczi .nb_sectors = nb_sectors, 4221376ae3f1SStefan Hajnoczi .pnum = pnum, 4222376ae3f1SStefan Hajnoczi .done = false, 4223376ae3f1SStefan Hajnoczi }; 4224376ae3f1SStefan Hajnoczi 4225bdad13b9SPaolo Bonzini if (qemu_in_coroutine()) { 4226bdad13b9SPaolo Bonzini /* Fast-path if already in coroutine context */ 4227b6b8a333SPaolo Bonzini bdrv_get_block_status_co_entry(&data); 4228bdad13b9SPaolo Bonzini } else { 42292572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 42302572b37aSStefan Hajnoczi 4231b6b8a333SPaolo Bonzini co = qemu_coroutine_create(bdrv_get_block_status_co_entry); 4232376ae3f1SStefan Hajnoczi qemu_coroutine_enter(co, &data); 4233376ae3f1SStefan Hajnoczi while (!data.done) { 42342572b37aSStefan Hajnoczi aio_poll(aio_context, true); 4235376ae3f1SStefan Hajnoczi } 4236bdad13b9SPaolo Bonzini } 4237376ae3f1SStefan Hajnoczi return data.ret; 4238376ae3f1SStefan Hajnoczi } 4239f58c7b35Sths 4240b6b8a333SPaolo Bonzini int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, 4241b6b8a333SPaolo Bonzini int nb_sectors, int *pnum) 4242b6b8a333SPaolo Bonzini { 42434333bb71SPaolo Bonzini int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum); 42444333bb71SPaolo Bonzini if (ret < 0) { 42454333bb71SPaolo Bonzini return ret; 42464333bb71SPaolo Bonzini } 424701fb2705SKevin Wolf return !!(ret & BDRV_BLOCK_ALLOCATED); 4248b6b8a333SPaolo Bonzini } 4249b6b8a333SPaolo Bonzini 4250188a7bbfSPaolo Bonzini /* 4251188a7bbfSPaolo Bonzini * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] 4252188a7bbfSPaolo Bonzini * 4253188a7bbfSPaolo Bonzini * Return true if the given sector is allocated in any image between 4254188a7bbfSPaolo Bonzini * BASE and TOP (inclusive). BASE can be NULL to check if the given 4255188a7bbfSPaolo Bonzini * sector is allocated in any image of the chain. Return false otherwise. 4256188a7bbfSPaolo Bonzini * 4257188a7bbfSPaolo Bonzini * 'pnum' is set to the number of sectors (including and immediately following 4258188a7bbfSPaolo Bonzini * the specified sector) that are known to be in the same 4259188a7bbfSPaolo Bonzini * allocated/unallocated state. 4260188a7bbfSPaolo Bonzini * 4261188a7bbfSPaolo Bonzini */ 42624f578637SPaolo Bonzini int bdrv_is_allocated_above(BlockDriverState *top, 4263188a7bbfSPaolo Bonzini BlockDriverState *base, 4264188a7bbfSPaolo Bonzini int64_t sector_num, 4265188a7bbfSPaolo Bonzini int nb_sectors, int *pnum) 4266188a7bbfSPaolo Bonzini { 4267188a7bbfSPaolo Bonzini BlockDriverState *intermediate; 4268188a7bbfSPaolo Bonzini int ret, n = nb_sectors; 4269188a7bbfSPaolo Bonzini 4270188a7bbfSPaolo Bonzini intermediate = top; 4271188a7bbfSPaolo Bonzini while (intermediate && intermediate != base) { 4272188a7bbfSPaolo Bonzini int pnum_inter; 4273bdad13b9SPaolo Bonzini ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors, 4274188a7bbfSPaolo Bonzini &pnum_inter); 4275188a7bbfSPaolo Bonzini if (ret < 0) { 4276188a7bbfSPaolo Bonzini return ret; 4277188a7bbfSPaolo Bonzini } else if (ret) { 4278188a7bbfSPaolo Bonzini *pnum = pnum_inter; 4279188a7bbfSPaolo Bonzini return 1; 4280188a7bbfSPaolo Bonzini } 4281188a7bbfSPaolo Bonzini 4282188a7bbfSPaolo Bonzini /* 4283188a7bbfSPaolo Bonzini * [sector_num, nb_sectors] is unallocated on top but intermediate 4284188a7bbfSPaolo Bonzini * might have 4285188a7bbfSPaolo Bonzini * 4286188a7bbfSPaolo Bonzini * [sector_num+x, nr_sectors] allocated. 4287188a7bbfSPaolo Bonzini */ 428863ba17d3SVishvananda Ishaya if (n > pnum_inter && 428963ba17d3SVishvananda Ishaya (intermediate == top || 429063ba17d3SVishvananda Ishaya sector_num + pnum_inter < intermediate->total_sectors)) { 4291188a7bbfSPaolo Bonzini n = pnum_inter; 4292188a7bbfSPaolo Bonzini } 4293188a7bbfSPaolo Bonzini 4294188a7bbfSPaolo Bonzini intermediate = intermediate->backing_hd; 4295188a7bbfSPaolo Bonzini } 4296188a7bbfSPaolo Bonzini 4297188a7bbfSPaolo Bonzini *pnum = n; 4298188a7bbfSPaolo Bonzini return 0; 4299188a7bbfSPaolo Bonzini } 4300188a7bbfSPaolo Bonzini 4301045df330Saliguori const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 4302045df330Saliguori { 4303045df330Saliguori if (bs->backing_hd && bs->backing_hd->encrypted) 4304045df330Saliguori return bs->backing_file; 4305045df330Saliguori else if (bs->encrypted) 4306045df330Saliguori return bs->filename; 4307045df330Saliguori else 4308045df330Saliguori return NULL; 4309045df330Saliguori } 4310045df330Saliguori 431183f64091Sbellard void bdrv_get_backing_filename(BlockDriverState *bs, 431283f64091Sbellard char *filename, int filename_size) 431383f64091Sbellard { 431483f64091Sbellard pstrcpy(filename, filename_size, bs->backing_file); 431583f64091Sbellard } 431683f64091Sbellard 4317faea38e7Sbellard int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, 4318faea38e7Sbellard const uint8_t *buf, int nb_sectors) 4319faea38e7Sbellard { 4320faea38e7Sbellard BlockDriver *drv = bs->drv; 4321b9c64947SMax Reitz int ret; 4322b9c64947SMax Reitz 4323b9c64947SMax Reitz if (!drv) { 432419cb3738Sbellard return -ENOMEDIUM; 4325b9c64947SMax Reitz } 4326b9c64947SMax Reitz if (!drv->bdrv_write_compressed) { 4327faea38e7Sbellard return -ENOTSUP; 4328b9c64947SMax Reitz } 4329b9c64947SMax Reitz ret = bdrv_check_request(bs, sector_num, nb_sectors); 4330b9c64947SMax Reitz if (ret < 0) { 4331b9c64947SMax Reitz return ret; 4332b9c64947SMax Reitz } 43337cd1e32aSlirans@il.ibm.com 4334e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 43357cd1e32aSlirans@il.ibm.com 4336faea38e7Sbellard return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); 4337faea38e7Sbellard } 4338faea38e7Sbellard 4339faea38e7Sbellard int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 4340faea38e7Sbellard { 4341faea38e7Sbellard BlockDriver *drv = bs->drv; 4342faea38e7Sbellard if (!drv) 434319cb3738Sbellard return -ENOMEDIUM; 4344faea38e7Sbellard if (!drv->bdrv_get_info) 4345faea38e7Sbellard return -ENOTSUP; 4346faea38e7Sbellard memset(bdi, 0, sizeof(*bdi)); 4347faea38e7Sbellard return drv->bdrv_get_info(bs, bdi); 4348faea38e7Sbellard } 4349faea38e7Sbellard 4350eae041feSMax Reitz ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) 4351eae041feSMax Reitz { 4352eae041feSMax Reitz BlockDriver *drv = bs->drv; 4353eae041feSMax Reitz if (drv && drv->bdrv_get_specific_info) { 4354eae041feSMax Reitz return drv->bdrv_get_specific_info(bs); 4355eae041feSMax Reitz } 4356eae041feSMax Reitz return NULL; 4357eae041feSMax Reitz } 4358eae041feSMax Reitz 435945566e9cSChristoph Hellwig int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, 436045566e9cSChristoph Hellwig int64_t pos, int size) 4361178e08a5Saliguori { 4362cf8074b3SKevin Wolf QEMUIOVector qiov; 4363cf8074b3SKevin Wolf struct iovec iov = { 4364cf8074b3SKevin Wolf .iov_base = (void *) buf, 4365cf8074b3SKevin Wolf .iov_len = size, 4366cf8074b3SKevin Wolf }; 4367cf8074b3SKevin Wolf 4368cf8074b3SKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 4369cf8074b3SKevin Wolf return bdrv_writev_vmstate(bs, &qiov, pos); 4370cf8074b3SKevin Wolf } 4371cf8074b3SKevin Wolf 4372cf8074b3SKevin Wolf int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) 4373cf8074b3SKevin Wolf { 4374178e08a5Saliguori BlockDriver *drv = bs->drv; 4375cf8074b3SKevin Wolf 4376cf8074b3SKevin Wolf if (!drv) { 4377178e08a5Saliguori return -ENOMEDIUM; 4378cf8074b3SKevin Wolf } else if (drv->bdrv_save_vmstate) { 4379cf8074b3SKevin Wolf return drv->bdrv_save_vmstate(bs, qiov, pos); 4380cf8074b3SKevin Wolf } else if (bs->file) { 4381cf8074b3SKevin Wolf return bdrv_writev_vmstate(bs->file, qiov, pos); 4382cf8074b3SKevin Wolf } 4383cf8074b3SKevin Wolf 43847cdb1f6dSMORITA Kazutaka return -ENOTSUP; 4385178e08a5Saliguori } 4386178e08a5Saliguori 438745566e9cSChristoph Hellwig int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, 438845566e9cSChristoph Hellwig int64_t pos, int size) 4389178e08a5Saliguori { 4390178e08a5Saliguori BlockDriver *drv = bs->drv; 4391178e08a5Saliguori if (!drv) 4392178e08a5Saliguori return -ENOMEDIUM; 43937cdb1f6dSMORITA Kazutaka if (drv->bdrv_load_vmstate) 439445566e9cSChristoph Hellwig return drv->bdrv_load_vmstate(bs, buf, pos, size); 43957cdb1f6dSMORITA Kazutaka if (bs->file) 43967cdb1f6dSMORITA Kazutaka return bdrv_load_vmstate(bs->file, buf, pos, size); 43977cdb1f6dSMORITA Kazutaka return -ENOTSUP; 4398178e08a5Saliguori } 4399178e08a5Saliguori 44008b9b0cc2SKevin Wolf void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 44018b9b0cc2SKevin Wolf { 4402bf736fe3SKevin Wolf if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 44038b9b0cc2SKevin Wolf return; 44048b9b0cc2SKevin Wolf } 44058b9b0cc2SKevin Wolf 4406bf736fe3SKevin Wolf bs->drv->bdrv_debug_event(bs, event); 440741c695c7SKevin Wolf } 44088b9b0cc2SKevin Wolf 440941c695c7SKevin Wolf int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 441041c695c7SKevin Wolf const char *tag) 441141c695c7SKevin Wolf { 441241c695c7SKevin Wolf while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 441341c695c7SKevin Wolf bs = bs->file; 441441c695c7SKevin Wolf } 441541c695c7SKevin Wolf 441641c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 441741c695c7SKevin Wolf return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 441841c695c7SKevin Wolf } 441941c695c7SKevin Wolf 442041c695c7SKevin Wolf return -ENOTSUP; 442141c695c7SKevin Wolf } 442241c695c7SKevin Wolf 44234cc70e93SFam Zheng int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 44244cc70e93SFam Zheng { 44254cc70e93SFam Zheng while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { 44264cc70e93SFam Zheng bs = bs->file; 44274cc70e93SFam Zheng } 44284cc70e93SFam Zheng 44294cc70e93SFam Zheng if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { 44304cc70e93SFam Zheng return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 44314cc70e93SFam Zheng } 44324cc70e93SFam Zheng 44334cc70e93SFam Zheng return -ENOTSUP; 44344cc70e93SFam Zheng } 44354cc70e93SFam Zheng 443641c695c7SKevin Wolf int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 443741c695c7SKevin Wolf { 4438938789eaSMax Reitz while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 443941c695c7SKevin Wolf bs = bs->file; 444041c695c7SKevin Wolf } 444141c695c7SKevin Wolf 444241c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 444341c695c7SKevin Wolf return bs->drv->bdrv_debug_resume(bs, tag); 444441c695c7SKevin Wolf } 444541c695c7SKevin Wolf 444641c695c7SKevin Wolf return -ENOTSUP; 444741c695c7SKevin Wolf } 444841c695c7SKevin Wolf 444941c695c7SKevin Wolf bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 445041c695c7SKevin Wolf { 445141c695c7SKevin Wolf while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 445241c695c7SKevin Wolf bs = bs->file; 445341c695c7SKevin Wolf } 445441c695c7SKevin Wolf 445541c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 445641c695c7SKevin Wolf return bs->drv->bdrv_debug_is_suspended(bs, tag); 445741c695c7SKevin Wolf } 445841c695c7SKevin Wolf 445941c695c7SKevin Wolf return false; 44608b9b0cc2SKevin Wolf } 44618b9b0cc2SKevin Wolf 4462199630b6SBlue Swirl int bdrv_is_snapshot(BlockDriverState *bs) 4463199630b6SBlue Swirl { 4464199630b6SBlue Swirl return !!(bs->open_flags & BDRV_O_SNAPSHOT); 4465199630b6SBlue Swirl } 4466199630b6SBlue Swirl 4467b1b1d783SJeff Cody /* backing_file can either be relative, or absolute, or a protocol. If it is 4468b1b1d783SJeff Cody * relative, it must be relative to the chain. So, passing in bs->filename 4469b1b1d783SJeff Cody * from a BDS as backing_file should not be done, as that may be relative to 4470b1b1d783SJeff Cody * the CWD rather than the chain. */ 4471e8a6bb9cSMarcelo Tosatti BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 4472e8a6bb9cSMarcelo Tosatti const char *backing_file) 4473e8a6bb9cSMarcelo Tosatti { 4474b1b1d783SJeff Cody char *filename_full = NULL; 4475b1b1d783SJeff Cody char *backing_file_full = NULL; 4476b1b1d783SJeff Cody char *filename_tmp = NULL; 4477b1b1d783SJeff Cody int is_protocol = 0; 4478b1b1d783SJeff Cody BlockDriverState *curr_bs = NULL; 4479b1b1d783SJeff Cody BlockDriverState *retval = NULL; 4480b1b1d783SJeff Cody 4481b1b1d783SJeff Cody if (!bs || !bs->drv || !backing_file) { 4482e8a6bb9cSMarcelo Tosatti return NULL; 4483e8a6bb9cSMarcelo Tosatti } 4484e8a6bb9cSMarcelo Tosatti 4485b1b1d783SJeff Cody filename_full = g_malloc(PATH_MAX); 4486b1b1d783SJeff Cody backing_file_full = g_malloc(PATH_MAX); 4487b1b1d783SJeff Cody filename_tmp = g_malloc(PATH_MAX); 4488b1b1d783SJeff Cody 4489b1b1d783SJeff Cody is_protocol = path_has_protocol(backing_file); 4490b1b1d783SJeff Cody 4491b1b1d783SJeff Cody for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) { 4492b1b1d783SJeff Cody 4493b1b1d783SJeff Cody /* If either of the filename paths is actually a protocol, then 4494b1b1d783SJeff Cody * compare unmodified paths; otherwise make paths relative */ 4495b1b1d783SJeff Cody if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 4496b1b1d783SJeff Cody if (strcmp(backing_file, curr_bs->backing_file) == 0) { 4497b1b1d783SJeff Cody retval = curr_bs->backing_hd; 4498b1b1d783SJeff Cody break; 4499b1b1d783SJeff Cody } 4500e8a6bb9cSMarcelo Tosatti } else { 4501b1b1d783SJeff Cody /* If not an absolute filename path, make it relative to the current 4502b1b1d783SJeff Cody * image's filename path */ 4503b1b1d783SJeff Cody path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 4504b1b1d783SJeff Cody backing_file); 4505b1b1d783SJeff Cody 4506b1b1d783SJeff Cody /* We are going to compare absolute pathnames */ 4507b1b1d783SJeff Cody if (!realpath(filename_tmp, filename_full)) { 4508b1b1d783SJeff Cody continue; 4509b1b1d783SJeff Cody } 4510b1b1d783SJeff Cody 4511b1b1d783SJeff Cody /* We need to make sure the backing filename we are comparing against 4512b1b1d783SJeff Cody * is relative to the current image filename (or absolute) */ 4513b1b1d783SJeff Cody path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 4514b1b1d783SJeff Cody curr_bs->backing_file); 4515b1b1d783SJeff Cody 4516b1b1d783SJeff Cody if (!realpath(filename_tmp, backing_file_full)) { 4517b1b1d783SJeff Cody continue; 4518b1b1d783SJeff Cody } 4519b1b1d783SJeff Cody 4520b1b1d783SJeff Cody if (strcmp(backing_file_full, filename_full) == 0) { 4521b1b1d783SJeff Cody retval = curr_bs->backing_hd; 4522b1b1d783SJeff Cody break; 4523b1b1d783SJeff Cody } 4524e8a6bb9cSMarcelo Tosatti } 4525e8a6bb9cSMarcelo Tosatti } 4526e8a6bb9cSMarcelo Tosatti 4527b1b1d783SJeff Cody g_free(filename_full); 4528b1b1d783SJeff Cody g_free(backing_file_full); 4529b1b1d783SJeff Cody g_free(filename_tmp); 4530b1b1d783SJeff Cody return retval; 4531e8a6bb9cSMarcelo Tosatti } 4532e8a6bb9cSMarcelo Tosatti 4533f198fd1cSBenoît Canet int bdrv_get_backing_file_depth(BlockDriverState *bs) 4534f198fd1cSBenoît Canet { 4535f198fd1cSBenoît Canet if (!bs->drv) { 4536f198fd1cSBenoît Canet return 0; 4537f198fd1cSBenoît Canet } 4538f198fd1cSBenoît Canet 4539f198fd1cSBenoît Canet if (!bs->backing_hd) { 4540f198fd1cSBenoît Canet return 0; 4541f198fd1cSBenoît Canet } 4542f198fd1cSBenoît Canet 4543f198fd1cSBenoît Canet return 1 + bdrv_get_backing_file_depth(bs->backing_hd); 4544f198fd1cSBenoît Canet } 4545f198fd1cSBenoît Canet 4546ea2384d3Sbellard /**************************************************************/ 454783f64091Sbellard /* async I/Os */ 4548ea2384d3Sbellard 45497c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, 4550f141eafeSaliguori QEMUIOVector *qiov, int nb_sectors, 4551097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4552ea2384d3Sbellard { 4553bbf0a440SStefan Hajnoczi trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); 4554bbf0a440SStefan Hajnoczi 4555d20d9b7cSPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0, 45568c5873d6SStefan Hajnoczi cb, opaque, false); 455783f64091Sbellard } 455883f64091Sbellard 45597c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, 4560f141eafeSaliguori QEMUIOVector *qiov, int nb_sectors, 4561097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 45627674e7bfSbellard { 4563bbf0a440SStefan Hajnoczi trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); 4564bbf0a440SStefan Hajnoczi 4565d20d9b7cSPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0, 45668c5873d6SStefan Hajnoczi cb, opaque, true); 456783f64091Sbellard } 456883f64091Sbellard 45697c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, 4570d5ef94d4SPaolo Bonzini int64_t sector_num, int nb_sectors, BdrvRequestFlags flags, 4571097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4572d5ef94d4SPaolo Bonzini { 4573d5ef94d4SPaolo Bonzini trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque); 4574d5ef94d4SPaolo Bonzini 4575d5ef94d4SPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors, 4576d5ef94d4SPaolo Bonzini BDRV_REQ_ZERO_WRITE | flags, 4577d5ef94d4SPaolo Bonzini cb, opaque, true); 4578d5ef94d4SPaolo Bonzini } 4579d5ef94d4SPaolo Bonzini 458040b4f539SKevin Wolf 458140b4f539SKevin Wolf typedef struct MultiwriteCB { 458240b4f539SKevin Wolf int error; 458340b4f539SKevin Wolf int num_requests; 458440b4f539SKevin Wolf int num_callbacks; 458540b4f539SKevin Wolf struct { 4586097310b5SMarkus Armbruster BlockCompletionFunc *cb; 458740b4f539SKevin Wolf void *opaque; 458840b4f539SKevin Wolf QEMUIOVector *free_qiov; 458940b4f539SKevin Wolf } callbacks[]; 459040b4f539SKevin Wolf } MultiwriteCB; 459140b4f539SKevin Wolf 459240b4f539SKevin Wolf static void multiwrite_user_cb(MultiwriteCB *mcb) 459340b4f539SKevin Wolf { 459440b4f539SKevin Wolf int i; 459540b4f539SKevin Wolf 459640b4f539SKevin Wolf for (i = 0; i < mcb->num_callbacks; i++) { 459740b4f539SKevin Wolf mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); 45981e1ea48dSStefan Hajnoczi if (mcb->callbacks[i].free_qiov) { 45991e1ea48dSStefan Hajnoczi qemu_iovec_destroy(mcb->callbacks[i].free_qiov); 46001e1ea48dSStefan Hajnoczi } 46017267c094SAnthony Liguori g_free(mcb->callbacks[i].free_qiov); 460240b4f539SKevin Wolf } 460340b4f539SKevin Wolf } 460440b4f539SKevin Wolf 460540b4f539SKevin Wolf static void multiwrite_cb(void *opaque, int ret) 460640b4f539SKevin Wolf { 460740b4f539SKevin Wolf MultiwriteCB *mcb = opaque; 460840b4f539SKevin Wolf 46096d519a5fSStefan Hajnoczi trace_multiwrite_cb(mcb, ret); 46106d519a5fSStefan Hajnoczi 4611cb6d3ca0SKevin Wolf if (ret < 0 && !mcb->error) { 461240b4f539SKevin Wolf mcb->error = ret; 461340b4f539SKevin Wolf } 461440b4f539SKevin Wolf 461540b4f539SKevin Wolf mcb->num_requests--; 461640b4f539SKevin Wolf if (mcb->num_requests == 0) { 461740b4f539SKevin Wolf multiwrite_user_cb(mcb); 46187267c094SAnthony Liguori g_free(mcb); 461940b4f539SKevin Wolf } 462040b4f539SKevin Wolf } 462140b4f539SKevin Wolf 462240b4f539SKevin Wolf static int multiwrite_req_compare(const void *a, const void *b) 462340b4f539SKevin Wolf { 462477be4366SChristoph Hellwig const BlockRequest *req1 = a, *req2 = b; 462577be4366SChristoph Hellwig 462677be4366SChristoph Hellwig /* 462777be4366SChristoph Hellwig * Note that we can't simply subtract req2->sector from req1->sector 462877be4366SChristoph Hellwig * here as that could overflow the return value. 462977be4366SChristoph Hellwig */ 463077be4366SChristoph Hellwig if (req1->sector > req2->sector) { 463177be4366SChristoph Hellwig return 1; 463277be4366SChristoph Hellwig } else if (req1->sector < req2->sector) { 463377be4366SChristoph Hellwig return -1; 463477be4366SChristoph Hellwig } else { 463577be4366SChristoph Hellwig return 0; 463677be4366SChristoph Hellwig } 463740b4f539SKevin Wolf } 463840b4f539SKevin Wolf 463940b4f539SKevin Wolf /* 464040b4f539SKevin Wolf * Takes a bunch of requests and tries to merge them. Returns the number of 464140b4f539SKevin Wolf * requests that remain after merging. 464240b4f539SKevin Wolf */ 464340b4f539SKevin Wolf static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, 464440b4f539SKevin Wolf int num_reqs, MultiwriteCB *mcb) 464540b4f539SKevin Wolf { 464640b4f539SKevin Wolf int i, outidx; 464740b4f539SKevin Wolf 464840b4f539SKevin Wolf // Sort requests by start sector 464940b4f539SKevin Wolf qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); 465040b4f539SKevin Wolf 465140b4f539SKevin Wolf // Check if adjacent requests touch the same clusters. If so, combine them, 465240b4f539SKevin Wolf // filling up gaps with zero sectors. 465340b4f539SKevin Wolf outidx = 0; 465440b4f539SKevin Wolf for (i = 1; i < num_reqs; i++) { 465540b4f539SKevin Wolf int merge = 0; 465640b4f539SKevin Wolf int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; 465740b4f539SKevin Wolf 4658b6a127a1SPaolo Bonzini // Handle exactly sequential writes and overlapping writes. 465940b4f539SKevin Wolf if (reqs[i].sector <= oldreq_last) { 466040b4f539SKevin Wolf merge = 1; 466140b4f539SKevin Wolf } 466240b4f539SKevin Wolf 4663e2a305fbSChristoph Hellwig if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { 4664e2a305fbSChristoph Hellwig merge = 0; 4665e2a305fbSChristoph Hellwig } 4666e2a305fbSChristoph Hellwig 46676c5a42acSPeter Lieven if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors + 46686c5a42acSPeter Lieven reqs[i].nb_sectors > bs->bl.max_transfer_length) { 46696c5a42acSPeter Lieven merge = 0; 46706c5a42acSPeter Lieven } 46716c5a42acSPeter Lieven 467240b4f539SKevin Wolf if (merge) { 467340b4f539SKevin Wolf size_t size; 46747267c094SAnthony Liguori QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); 467540b4f539SKevin Wolf qemu_iovec_init(qiov, 467640b4f539SKevin Wolf reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); 467740b4f539SKevin Wolf 467840b4f539SKevin Wolf // Add the first request to the merged one. If the requests are 467940b4f539SKevin Wolf // overlapping, drop the last sectors of the first request. 468040b4f539SKevin Wolf size = (reqs[i].sector - reqs[outidx].sector) << 9; 46811b093c48SMichael Tokarev qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size); 468240b4f539SKevin Wolf 4683b6a127a1SPaolo Bonzini // We should need to add any zeros between the two requests 4684b6a127a1SPaolo Bonzini assert (reqs[i].sector <= oldreq_last); 468540b4f539SKevin Wolf 468640b4f539SKevin Wolf // Add the second request 46871b093c48SMichael Tokarev qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size); 468840b4f539SKevin Wolf 4689391827ebSStefan Hajnoczi // Add tail of first request, if necessary 4690391827ebSStefan Hajnoczi if (qiov->size < reqs[outidx].qiov->size) { 4691391827ebSStefan Hajnoczi qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size, 4692391827ebSStefan Hajnoczi reqs[outidx].qiov->size - qiov->size); 4693391827ebSStefan Hajnoczi } 4694391827ebSStefan Hajnoczi 4695cbf1dff2SKevin Wolf reqs[outidx].nb_sectors = qiov->size >> 9; 469640b4f539SKevin Wolf reqs[outidx].qiov = qiov; 469740b4f539SKevin Wolf 469840b4f539SKevin Wolf mcb->callbacks[i].free_qiov = reqs[outidx].qiov; 469940b4f539SKevin Wolf } else { 470040b4f539SKevin Wolf outidx++; 470140b4f539SKevin Wolf reqs[outidx].sector = reqs[i].sector; 470240b4f539SKevin Wolf reqs[outidx].nb_sectors = reqs[i].nb_sectors; 470340b4f539SKevin Wolf reqs[outidx].qiov = reqs[i].qiov; 470440b4f539SKevin Wolf } 470540b4f539SKevin Wolf } 470640b4f539SKevin Wolf 4707f4564d53SPeter Lieven block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1); 4708f4564d53SPeter Lieven 470940b4f539SKevin Wolf return outidx + 1; 471040b4f539SKevin Wolf } 471140b4f539SKevin Wolf 471240b4f539SKevin Wolf /* 471340b4f539SKevin Wolf * Submit multiple AIO write requests at once. 471440b4f539SKevin Wolf * 471540b4f539SKevin Wolf * On success, the function returns 0 and all requests in the reqs array have 471640b4f539SKevin Wolf * been submitted. In error case this function returns -1, and any of the 471740b4f539SKevin Wolf * requests may or may not be submitted yet. In particular, this means that the 471840b4f539SKevin Wolf * callback will be called for some of the requests, for others it won't. The 471940b4f539SKevin Wolf * caller must check the error field of the BlockRequest to wait for the right 472040b4f539SKevin Wolf * callbacks (if error != 0, no callback will be called). 472140b4f539SKevin Wolf * 472240b4f539SKevin Wolf * The implementation may modify the contents of the reqs array, e.g. to merge 472340b4f539SKevin Wolf * requests. However, the fields opaque and error are left unmodified as they 472440b4f539SKevin Wolf * are used to signal failure for a single request to the caller. 472540b4f539SKevin Wolf */ 472640b4f539SKevin Wolf int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) 472740b4f539SKevin Wolf { 472840b4f539SKevin Wolf MultiwriteCB *mcb; 472940b4f539SKevin Wolf int i; 473040b4f539SKevin Wolf 4731301db7c2SRyan Harper /* don't submit writes if we don't have a medium */ 4732301db7c2SRyan Harper if (bs->drv == NULL) { 4733301db7c2SRyan Harper for (i = 0; i < num_reqs; i++) { 4734301db7c2SRyan Harper reqs[i].error = -ENOMEDIUM; 4735301db7c2SRyan Harper } 4736301db7c2SRyan Harper return -1; 4737301db7c2SRyan Harper } 4738301db7c2SRyan Harper 473940b4f539SKevin Wolf if (num_reqs == 0) { 474040b4f539SKevin Wolf return 0; 474140b4f539SKevin Wolf } 474240b4f539SKevin Wolf 474340b4f539SKevin Wolf // Create MultiwriteCB structure 47447267c094SAnthony Liguori mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); 474540b4f539SKevin Wolf mcb->num_requests = 0; 474640b4f539SKevin Wolf mcb->num_callbacks = num_reqs; 474740b4f539SKevin Wolf 474840b4f539SKevin Wolf for (i = 0; i < num_reqs; i++) { 474940b4f539SKevin Wolf mcb->callbacks[i].cb = reqs[i].cb; 475040b4f539SKevin Wolf mcb->callbacks[i].opaque = reqs[i].opaque; 475140b4f539SKevin Wolf } 475240b4f539SKevin Wolf 475340b4f539SKevin Wolf // Check for mergable requests 475440b4f539SKevin Wolf num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); 475540b4f539SKevin Wolf 47566d519a5fSStefan Hajnoczi trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs); 47576d519a5fSStefan Hajnoczi 4758df9309fbSPaolo Bonzini /* Run the aio requests. */ 4759df9309fbSPaolo Bonzini mcb->num_requests = num_reqs; 476040b4f539SKevin Wolf for (i = 0; i < num_reqs; i++) { 4761d20d9b7cSPaolo Bonzini bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov, 4762d20d9b7cSPaolo Bonzini reqs[i].nb_sectors, reqs[i].flags, 4763d20d9b7cSPaolo Bonzini multiwrite_cb, mcb, 4764d20d9b7cSPaolo Bonzini true); 476540b4f539SKevin Wolf } 476640b4f539SKevin Wolf 476740b4f539SKevin Wolf return 0; 476840b4f539SKevin Wolf } 476940b4f539SKevin Wolf 47707c84b1b8SMarkus Armbruster void bdrv_aio_cancel(BlockAIOCB *acb) 477183f64091Sbellard { 477202c50efeSFam Zheng qemu_aio_ref(acb); 477302c50efeSFam Zheng bdrv_aio_cancel_async(acb); 477402c50efeSFam Zheng while (acb->refcnt > 1) { 477502c50efeSFam Zheng if (acb->aiocb_info->get_aio_context) { 477602c50efeSFam Zheng aio_poll(acb->aiocb_info->get_aio_context(acb), true); 477702c50efeSFam Zheng } else if (acb->bs) { 477802c50efeSFam Zheng aio_poll(bdrv_get_aio_context(acb->bs), true); 477902c50efeSFam Zheng } else { 478002c50efeSFam Zheng abort(); 478102c50efeSFam Zheng } 478202c50efeSFam Zheng } 47838007429aSFam Zheng qemu_aio_unref(acb); 478402c50efeSFam Zheng } 478502c50efeSFam Zheng 478602c50efeSFam Zheng /* Async version of aio cancel. The caller is not blocked if the acb implements 478702c50efeSFam Zheng * cancel_async, otherwise we do nothing and let the request normally complete. 478802c50efeSFam Zheng * In either case the completion callback must be called. */ 47897c84b1b8SMarkus Armbruster void bdrv_aio_cancel_async(BlockAIOCB *acb) 479002c50efeSFam Zheng { 479102c50efeSFam Zheng if (acb->aiocb_info->cancel_async) { 479202c50efeSFam Zheng acb->aiocb_info->cancel_async(acb); 479302c50efeSFam Zheng } 479483f64091Sbellard } 479583f64091Sbellard 479683f64091Sbellard /**************************************************************/ 479783f64091Sbellard /* async block device emulation */ 479883f64091Sbellard 47997c84b1b8SMarkus Armbruster typedef struct BlockAIOCBSync { 48007c84b1b8SMarkus Armbruster BlockAIOCB common; 4801c16b5a2cSChristoph Hellwig QEMUBH *bh; 4802c16b5a2cSChristoph Hellwig int ret; 4803c16b5a2cSChristoph Hellwig /* vector translation state */ 4804c16b5a2cSChristoph Hellwig QEMUIOVector *qiov; 4805c16b5a2cSChristoph Hellwig uint8_t *bounce; 4806c16b5a2cSChristoph Hellwig int is_write; 48077c84b1b8SMarkus Armbruster } BlockAIOCBSync; 4808c16b5a2cSChristoph Hellwig 4809d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_aiocb_info = { 48107c84b1b8SMarkus Armbruster .aiocb_size = sizeof(BlockAIOCBSync), 4811c16b5a2cSChristoph Hellwig }; 4812c16b5a2cSChristoph Hellwig 481383f64091Sbellard static void bdrv_aio_bh_cb(void *opaque) 4814beac80cdSbellard { 48157c84b1b8SMarkus Armbruster BlockAIOCBSync *acb = opaque; 4816f141eafeSaliguori 4817857d4f46SKevin Wolf if (!acb->is_write && acb->ret >= 0) { 481803396148SMichael Tokarev qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); 4819857d4f46SKevin Wolf } 4820ceb42de8Saliguori qemu_vfree(acb->bounce); 4821ce1a14dcSpbrook acb->common.cb(acb->common.opaque, acb->ret); 48226a7ad299SDor Laor qemu_bh_delete(acb->bh); 482336afc451SAvi Kivity acb->bh = NULL; 48248007429aSFam Zheng qemu_aio_unref(acb); 4825beac80cdSbellard } 4826beac80cdSbellard 48277c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, 4828f141eafeSaliguori int64_t sector_num, 4829f141eafeSaliguori QEMUIOVector *qiov, 4830f141eafeSaliguori int nb_sectors, 4831097310b5SMarkus Armbruster BlockCompletionFunc *cb, 4832f141eafeSaliguori void *opaque, 4833f141eafeSaliguori int is_write) 4834f141eafeSaliguori 4835ea2384d3Sbellard { 48367c84b1b8SMarkus Armbruster BlockAIOCBSync *acb; 483783f64091Sbellard 4838d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque); 4839f141eafeSaliguori acb->is_write = is_write; 4840f141eafeSaliguori acb->qiov = qiov; 4841857d4f46SKevin Wolf acb->bounce = qemu_try_blockalign(bs, qiov->size); 48422572b37aSStefan Hajnoczi acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb); 4843f141eafeSaliguori 4844857d4f46SKevin Wolf if (acb->bounce == NULL) { 4845857d4f46SKevin Wolf acb->ret = -ENOMEM; 4846857d4f46SKevin Wolf } else if (is_write) { 4847d5e6b161SMichael Tokarev qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); 48481ed20acfSStefan Hajnoczi acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors); 4849f141eafeSaliguori } else { 48501ed20acfSStefan Hajnoczi acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors); 4851f141eafeSaliguori } 4852f141eafeSaliguori 4853ce1a14dcSpbrook qemu_bh_schedule(acb->bh); 4854f141eafeSaliguori 4855ce1a14dcSpbrook return &acb->common; 48567a6cba61Spbrook } 48577a6cba61Spbrook 48587c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 4859f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 4860097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 486183f64091Sbellard { 4862f141eafeSaliguori return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 486383f64091Sbellard } 486483f64091Sbellard 48657c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 4866f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 4867097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4868f141eafeSaliguori { 4869f141eafeSaliguori return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 4870f141eafeSaliguori } 4871f141eafeSaliguori 487268485420SKevin Wolf 48737c84b1b8SMarkus Armbruster typedef struct BlockAIOCBCoroutine { 48747c84b1b8SMarkus Armbruster BlockAIOCB common; 487568485420SKevin Wolf BlockRequest req; 487668485420SKevin Wolf bool is_write; 48770b5a2445SPaolo Bonzini bool need_bh; 4878d318aea9SKevin Wolf bool *done; 487968485420SKevin Wolf QEMUBH* bh; 48807c84b1b8SMarkus Armbruster } BlockAIOCBCoroutine; 488168485420SKevin Wolf 4882d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_co_aiocb_info = { 48837c84b1b8SMarkus Armbruster .aiocb_size = sizeof(BlockAIOCBCoroutine), 488468485420SKevin Wolf }; 488568485420SKevin Wolf 48860b5a2445SPaolo Bonzini static void bdrv_co_complete(BlockAIOCBCoroutine *acb) 48870b5a2445SPaolo Bonzini { 48880b5a2445SPaolo Bonzini if (!acb->need_bh) { 48890b5a2445SPaolo Bonzini acb->common.cb(acb->common.opaque, acb->req.error); 48900b5a2445SPaolo Bonzini qemu_aio_unref(acb); 48910b5a2445SPaolo Bonzini } 48920b5a2445SPaolo Bonzini } 48930b5a2445SPaolo Bonzini 489435246a68SPaolo Bonzini static void bdrv_co_em_bh(void *opaque) 489568485420SKevin Wolf { 48967c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 489768485420SKevin Wolf 48980b5a2445SPaolo Bonzini assert(!acb->need_bh); 489968485420SKevin Wolf qemu_bh_delete(acb->bh); 49000b5a2445SPaolo Bonzini bdrv_co_complete(acb); 49010b5a2445SPaolo Bonzini } 49020b5a2445SPaolo Bonzini 49030b5a2445SPaolo Bonzini static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb) 49040b5a2445SPaolo Bonzini { 49050b5a2445SPaolo Bonzini acb->need_bh = false; 49060b5a2445SPaolo Bonzini if (acb->req.error != -EINPROGRESS) { 49070b5a2445SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 49080b5a2445SPaolo Bonzini 49090b5a2445SPaolo Bonzini acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); 49100b5a2445SPaolo Bonzini qemu_bh_schedule(acb->bh); 49110b5a2445SPaolo Bonzini } 491268485420SKevin Wolf } 491368485420SKevin Wolf 4914b2a61371SStefan Hajnoczi /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */ 4915b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque) 4916b2a61371SStefan Hajnoczi { 49177c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 4918b2a61371SStefan Hajnoczi BlockDriverState *bs = acb->common.bs; 4919b2a61371SStefan Hajnoczi 4920b2a61371SStefan Hajnoczi if (!acb->is_write) { 4921b2a61371SStefan Hajnoczi acb->req.error = bdrv_co_do_readv(bs, acb->req.sector, 4922d20d9b7cSPaolo Bonzini acb->req.nb_sectors, acb->req.qiov, acb->req.flags); 4923b2a61371SStefan Hajnoczi } else { 4924b2a61371SStefan Hajnoczi acb->req.error = bdrv_co_do_writev(bs, acb->req.sector, 4925d20d9b7cSPaolo Bonzini acb->req.nb_sectors, acb->req.qiov, acb->req.flags); 4926b2a61371SStefan Hajnoczi } 4927b2a61371SStefan Hajnoczi 49280b5a2445SPaolo Bonzini bdrv_co_complete(acb); 4929b2a61371SStefan Hajnoczi } 4930b2a61371SStefan Hajnoczi 49317c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 493268485420SKevin Wolf int64_t sector_num, 493368485420SKevin Wolf QEMUIOVector *qiov, 493468485420SKevin Wolf int nb_sectors, 4935d20d9b7cSPaolo Bonzini BdrvRequestFlags flags, 4936097310b5SMarkus Armbruster BlockCompletionFunc *cb, 493768485420SKevin Wolf void *opaque, 49388c5873d6SStefan Hajnoczi bool is_write) 493968485420SKevin Wolf { 494068485420SKevin Wolf Coroutine *co; 49417c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 494268485420SKevin Wolf 4943d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 49440b5a2445SPaolo Bonzini acb->need_bh = true; 49450b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 494668485420SKevin Wolf acb->req.sector = sector_num; 494768485420SKevin Wolf acb->req.nb_sectors = nb_sectors; 494868485420SKevin Wolf acb->req.qiov = qiov; 4949d20d9b7cSPaolo Bonzini acb->req.flags = flags; 495068485420SKevin Wolf acb->is_write = is_write; 495168485420SKevin Wolf 49528c5873d6SStefan Hajnoczi co = qemu_coroutine_create(bdrv_co_do_rw); 495368485420SKevin Wolf qemu_coroutine_enter(co, acb); 495468485420SKevin Wolf 49550b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 495668485420SKevin Wolf return &acb->common; 495768485420SKevin Wolf } 495868485420SKevin Wolf 495907f07615SPaolo Bonzini static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) 4960b2e12bc6SChristoph Hellwig { 49617c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 496207f07615SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 4963b2e12bc6SChristoph Hellwig 496407f07615SPaolo Bonzini acb->req.error = bdrv_co_flush(bs); 49650b5a2445SPaolo Bonzini bdrv_co_complete(acb); 4966b2e12bc6SChristoph Hellwig } 4967b2e12bc6SChristoph Hellwig 49687c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, 4969097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4970016f5cf6SAlexander Graf { 497107f07615SPaolo Bonzini trace_bdrv_aio_flush(bs, opaque); 4972016f5cf6SAlexander Graf 497307f07615SPaolo Bonzini Coroutine *co; 49747c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 4975016f5cf6SAlexander Graf 4976d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 49770b5a2445SPaolo Bonzini acb->need_bh = true; 49780b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 4979d318aea9SKevin Wolf 498007f07615SPaolo Bonzini co = qemu_coroutine_create(bdrv_aio_flush_co_entry); 498107f07615SPaolo Bonzini qemu_coroutine_enter(co, acb); 4982016f5cf6SAlexander Graf 49830b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 4984016f5cf6SAlexander Graf return &acb->common; 4985016f5cf6SAlexander Graf } 4986016f5cf6SAlexander Graf 49874265d620SPaolo Bonzini static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) 49884265d620SPaolo Bonzini { 49897c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 49904265d620SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 49914265d620SPaolo Bonzini 49924265d620SPaolo Bonzini acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors); 49930b5a2445SPaolo Bonzini bdrv_co_complete(acb); 49944265d620SPaolo Bonzini } 49954265d620SPaolo Bonzini 49967c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs, 49974265d620SPaolo Bonzini int64_t sector_num, int nb_sectors, 4998097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 49994265d620SPaolo Bonzini { 50004265d620SPaolo Bonzini Coroutine *co; 50017c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 50024265d620SPaolo Bonzini 50034265d620SPaolo Bonzini trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); 50044265d620SPaolo Bonzini 5005d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 50060b5a2445SPaolo Bonzini acb->need_bh = true; 50070b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 50084265d620SPaolo Bonzini acb->req.sector = sector_num; 50094265d620SPaolo Bonzini acb->req.nb_sectors = nb_sectors; 50104265d620SPaolo Bonzini co = qemu_coroutine_create(bdrv_aio_discard_co_entry); 50114265d620SPaolo Bonzini qemu_coroutine_enter(co, acb); 50124265d620SPaolo Bonzini 50130b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 50144265d620SPaolo Bonzini return &acb->common; 50154265d620SPaolo Bonzini } 50164265d620SPaolo Bonzini 5017ea2384d3Sbellard void bdrv_init(void) 5018ea2384d3Sbellard { 50195efa9d5aSAnthony Liguori module_call_init(MODULE_INIT_BLOCK); 5020ea2384d3Sbellard } 5021ce1a14dcSpbrook 5022eb852011SMarkus Armbruster void bdrv_init_with_whitelist(void) 5023eb852011SMarkus Armbruster { 5024eb852011SMarkus Armbruster use_bdrv_whitelist = 1; 5025eb852011SMarkus Armbruster bdrv_init(); 5026eb852011SMarkus Armbruster } 5027eb852011SMarkus Armbruster 5028d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 5029097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 50306bbff9a0Saliguori { 50317c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5032ce1a14dcSpbrook 5033d7331bedSStefan Hajnoczi acb = g_slice_alloc(aiocb_info->aiocb_size); 5034d7331bedSStefan Hajnoczi acb->aiocb_info = aiocb_info; 5035ce1a14dcSpbrook acb->bs = bs; 5036ce1a14dcSpbrook acb->cb = cb; 5037ce1a14dcSpbrook acb->opaque = opaque; 5038f197fe2bSFam Zheng acb->refcnt = 1; 5039ce1a14dcSpbrook return acb; 5040ce1a14dcSpbrook } 5041ce1a14dcSpbrook 5042f197fe2bSFam Zheng void qemu_aio_ref(void *p) 5043f197fe2bSFam Zheng { 50447c84b1b8SMarkus Armbruster BlockAIOCB *acb = p; 5045f197fe2bSFam Zheng acb->refcnt++; 5046f197fe2bSFam Zheng } 5047f197fe2bSFam Zheng 50488007429aSFam Zheng void qemu_aio_unref(void *p) 5049ce1a14dcSpbrook { 50507c84b1b8SMarkus Armbruster BlockAIOCB *acb = p; 5051f197fe2bSFam Zheng assert(acb->refcnt > 0); 5052f197fe2bSFam Zheng if (--acb->refcnt == 0) { 5053d7331bedSStefan Hajnoczi g_slice_free1(acb->aiocb_info->aiocb_size, acb); 5054ce1a14dcSpbrook } 5055f197fe2bSFam Zheng } 505619cb3738Sbellard 505719cb3738Sbellard /**************************************************************/ 5058f9f05dc5SKevin Wolf /* Coroutine block device emulation */ 5059f9f05dc5SKevin Wolf 5060f9f05dc5SKevin Wolf typedef struct CoroutineIOCompletion { 5061f9f05dc5SKevin Wolf Coroutine *coroutine; 5062f9f05dc5SKevin Wolf int ret; 5063f9f05dc5SKevin Wolf } CoroutineIOCompletion; 5064f9f05dc5SKevin Wolf 5065f9f05dc5SKevin Wolf static void bdrv_co_io_em_complete(void *opaque, int ret) 5066f9f05dc5SKevin Wolf { 5067f9f05dc5SKevin Wolf CoroutineIOCompletion *co = opaque; 5068f9f05dc5SKevin Wolf 5069f9f05dc5SKevin Wolf co->ret = ret; 5070f9f05dc5SKevin Wolf qemu_coroutine_enter(co->coroutine, NULL); 5071f9f05dc5SKevin Wolf } 5072f9f05dc5SKevin Wolf 5073f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, 5074f9f05dc5SKevin Wolf int nb_sectors, QEMUIOVector *iov, 5075f9f05dc5SKevin Wolf bool is_write) 5076f9f05dc5SKevin Wolf { 5077f9f05dc5SKevin Wolf CoroutineIOCompletion co = { 5078f9f05dc5SKevin Wolf .coroutine = qemu_coroutine_self(), 5079f9f05dc5SKevin Wolf }; 50807c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5081f9f05dc5SKevin Wolf 5082f9f05dc5SKevin Wolf if (is_write) { 5083a652d160SStefan Hajnoczi acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors, 5084f9f05dc5SKevin Wolf bdrv_co_io_em_complete, &co); 5085f9f05dc5SKevin Wolf } else { 5086a652d160SStefan Hajnoczi acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors, 5087f9f05dc5SKevin Wolf bdrv_co_io_em_complete, &co); 5088f9f05dc5SKevin Wolf } 5089f9f05dc5SKevin Wolf 509059370aaaSStefan Hajnoczi trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb); 5091f9f05dc5SKevin Wolf if (!acb) { 5092f9f05dc5SKevin Wolf return -EIO; 5093f9f05dc5SKevin Wolf } 5094f9f05dc5SKevin Wolf qemu_coroutine_yield(); 5095f9f05dc5SKevin Wolf 5096f9f05dc5SKevin Wolf return co.ret; 5097f9f05dc5SKevin Wolf } 5098f9f05dc5SKevin Wolf 5099f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 5100f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 5101f9f05dc5SKevin Wolf QEMUIOVector *iov) 5102f9f05dc5SKevin Wolf { 5103f9f05dc5SKevin Wolf return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); 5104f9f05dc5SKevin Wolf } 5105f9f05dc5SKevin Wolf 5106f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 5107f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 5108f9f05dc5SKevin Wolf QEMUIOVector *iov) 5109f9f05dc5SKevin Wolf { 5110f9f05dc5SKevin Wolf return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); 5111f9f05dc5SKevin Wolf } 5112f9f05dc5SKevin Wolf 511307f07615SPaolo Bonzini static void coroutine_fn bdrv_flush_co_entry(void *opaque) 5114e7a8a783SKevin Wolf { 511507f07615SPaolo Bonzini RwCo *rwco = opaque; 511607f07615SPaolo Bonzini 511707f07615SPaolo Bonzini rwco->ret = bdrv_co_flush(rwco->bs); 511807f07615SPaolo Bonzini } 511907f07615SPaolo Bonzini 512007f07615SPaolo Bonzini int coroutine_fn bdrv_co_flush(BlockDriverState *bs) 512107f07615SPaolo Bonzini { 5122eb489bb1SKevin Wolf int ret; 5123eb489bb1SKevin Wolf 512429cdb251SPaolo Bonzini if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { 512507f07615SPaolo Bonzini return 0; 5126eb489bb1SKevin Wolf } 5127eb489bb1SKevin Wolf 5128ca716364SKevin Wolf /* Write back cached data to the OS even with cache=unsafe */ 5129bf736fe3SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS); 5130eb489bb1SKevin Wolf if (bs->drv->bdrv_co_flush_to_os) { 5131eb489bb1SKevin Wolf ret = bs->drv->bdrv_co_flush_to_os(bs); 5132eb489bb1SKevin Wolf if (ret < 0) { 5133eb489bb1SKevin Wolf return ret; 5134eb489bb1SKevin Wolf } 5135eb489bb1SKevin Wolf } 5136eb489bb1SKevin Wolf 5137ca716364SKevin Wolf /* But don't actually force it to the disk with cache=unsafe */ 5138ca716364SKevin Wolf if (bs->open_flags & BDRV_O_NO_FLUSH) { 5139d4c82329SKevin Wolf goto flush_parent; 5140ca716364SKevin Wolf } 5141ca716364SKevin Wolf 5142bf736fe3SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK); 5143eb489bb1SKevin Wolf if (bs->drv->bdrv_co_flush_to_disk) { 514429cdb251SPaolo Bonzini ret = bs->drv->bdrv_co_flush_to_disk(bs); 514507f07615SPaolo Bonzini } else if (bs->drv->bdrv_aio_flush) { 51467c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5147e7a8a783SKevin Wolf CoroutineIOCompletion co = { 5148e7a8a783SKevin Wolf .coroutine = qemu_coroutine_self(), 5149e7a8a783SKevin Wolf }; 5150e7a8a783SKevin Wolf 515107f07615SPaolo Bonzini acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); 515207f07615SPaolo Bonzini if (acb == NULL) { 515329cdb251SPaolo Bonzini ret = -EIO; 515407f07615SPaolo Bonzini } else { 5155e7a8a783SKevin Wolf qemu_coroutine_yield(); 515629cdb251SPaolo Bonzini ret = co.ret; 5157e7a8a783SKevin Wolf } 515807f07615SPaolo Bonzini } else { 515907f07615SPaolo Bonzini /* 516007f07615SPaolo Bonzini * Some block drivers always operate in either writethrough or unsafe 516107f07615SPaolo Bonzini * mode and don't support bdrv_flush therefore. Usually qemu doesn't 516207f07615SPaolo Bonzini * know how the server works (because the behaviour is hardcoded or 516307f07615SPaolo Bonzini * depends on server-side configuration), so we can't ensure that 516407f07615SPaolo Bonzini * everything is safe on disk. Returning an error doesn't work because 516507f07615SPaolo Bonzini * that would break guests even if the server operates in writethrough 516607f07615SPaolo Bonzini * mode. 516707f07615SPaolo Bonzini * 516807f07615SPaolo Bonzini * Let's hope the user knows what he's doing. 516907f07615SPaolo Bonzini */ 517029cdb251SPaolo Bonzini ret = 0; 517107f07615SPaolo Bonzini } 517229cdb251SPaolo Bonzini if (ret < 0) { 517329cdb251SPaolo Bonzini return ret; 517429cdb251SPaolo Bonzini } 517529cdb251SPaolo Bonzini 517629cdb251SPaolo Bonzini /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH 517729cdb251SPaolo Bonzini * in the case of cache=unsafe, so there are no useless flushes. 517829cdb251SPaolo Bonzini */ 5179d4c82329SKevin Wolf flush_parent: 518029cdb251SPaolo Bonzini return bdrv_co_flush(bs->file); 518107f07615SPaolo Bonzini } 518207f07615SPaolo Bonzini 51835a8a30dbSKevin Wolf void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) 51840f15423cSAnthony Liguori { 51855a8a30dbSKevin Wolf Error *local_err = NULL; 51865a8a30dbSKevin Wolf int ret; 51875a8a30dbSKevin Wolf 51883456a8d1SKevin Wolf if (!bs->drv) { 51893456a8d1SKevin Wolf return; 51900f15423cSAnthony Liguori } 51913456a8d1SKevin Wolf 51927ea2d269SAlexey Kardashevskiy if (!(bs->open_flags & BDRV_O_INCOMING)) { 51937ea2d269SAlexey Kardashevskiy return; 51947ea2d269SAlexey Kardashevskiy } 51957ea2d269SAlexey Kardashevskiy bs->open_flags &= ~BDRV_O_INCOMING; 51967ea2d269SAlexey Kardashevskiy 51973456a8d1SKevin Wolf if (bs->drv->bdrv_invalidate_cache) { 51985a8a30dbSKevin Wolf bs->drv->bdrv_invalidate_cache(bs, &local_err); 51993456a8d1SKevin Wolf } else if (bs->file) { 52005a8a30dbSKevin Wolf bdrv_invalidate_cache(bs->file, &local_err); 52015a8a30dbSKevin Wolf } 52025a8a30dbSKevin Wolf if (local_err) { 52035a8a30dbSKevin Wolf error_propagate(errp, local_err); 52045a8a30dbSKevin Wolf return; 52053456a8d1SKevin Wolf } 52063456a8d1SKevin Wolf 52075a8a30dbSKevin Wolf ret = refresh_total_sectors(bs, bs->total_sectors); 52085a8a30dbSKevin Wolf if (ret < 0) { 52095a8a30dbSKevin Wolf error_setg_errno(errp, -ret, "Could not refresh total sector count"); 52105a8a30dbSKevin Wolf return; 52115a8a30dbSKevin Wolf } 52120f15423cSAnthony Liguori } 52130f15423cSAnthony Liguori 52145a8a30dbSKevin Wolf void bdrv_invalidate_cache_all(Error **errp) 52150f15423cSAnthony Liguori { 52160f15423cSAnthony Liguori BlockDriverState *bs; 52175a8a30dbSKevin Wolf Error *local_err = NULL; 52180f15423cSAnthony Liguori 5219dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 5220ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 5221ed78cda3SStefan Hajnoczi 5222ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 52235a8a30dbSKevin Wolf bdrv_invalidate_cache(bs, &local_err); 5224ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 52255a8a30dbSKevin Wolf if (local_err) { 52265a8a30dbSKevin Wolf error_propagate(errp, local_err); 52275a8a30dbSKevin Wolf return; 52285a8a30dbSKevin Wolf } 52290f15423cSAnthony Liguori } 52300f15423cSAnthony Liguori } 52310f15423cSAnthony Liguori 523207f07615SPaolo Bonzini int bdrv_flush(BlockDriverState *bs) 523307f07615SPaolo Bonzini { 523407f07615SPaolo Bonzini Coroutine *co; 523507f07615SPaolo Bonzini RwCo rwco = { 523607f07615SPaolo Bonzini .bs = bs, 523707f07615SPaolo Bonzini .ret = NOT_DONE, 523807f07615SPaolo Bonzini }; 523907f07615SPaolo Bonzini 524007f07615SPaolo Bonzini if (qemu_in_coroutine()) { 524107f07615SPaolo Bonzini /* Fast-path if already in coroutine context */ 524207f07615SPaolo Bonzini bdrv_flush_co_entry(&rwco); 524307f07615SPaolo Bonzini } else { 52442572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 52452572b37aSStefan Hajnoczi 524607f07615SPaolo Bonzini co = qemu_coroutine_create(bdrv_flush_co_entry); 524707f07615SPaolo Bonzini qemu_coroutine_enter(co, &rwco); 524807f07615SPaolo Bonzini while (rwco.ret == NOT_DONE) { 52492572b37aSStefan Hajnoczi aio_poll(aio_context, true); 525007f07615SPaolo Bonzini } 525107f07615SPaolo Bonzini } 525207f07615SPaolo Bonzini 525307f07615SPaolo Bonzini return rwco.ret; 525407f07615SPaolo Bonzini } 5255e7a8a783SKevin Wolf 5256775aa8b6SKevin Wolf typedef struct DiscardCo { 5257775aa8b6SKevin Wolf BlockDriverState *bs; 5258775aa8b6SKevin Wolf int64_t sector_num; 5259775aa8b6SKevin Wolf int nb_sectors; 5260775aa8b6SKevin Wolf int ret; 5261775aa8b6SKevin Wolf } DiscardCo; 52624265d620SPaolo Bonzini static void coroutine_fn bdrv_discard_co_entry(void *opaque) 52634265d620SPaolo Bonzini { 5264775aa8b6SKevin Wolf DiscardCo *rwco = opaque; 52654265d620SPaolo Bonzini 52664265d620SPaolo Bonzini rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); 52674265d620SPaolo Bonzini } 52684265d620SPaolo Bonzini 52694265d620SPaolo Bonzini int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, 52704265d620SPaolo Bonzini int nb_sectors) 52714265d620SPaolo Bonzini { 5272b9c64947SMax Reitz int max_discard, ret; 5273d51e9fe5SPaolo Bonzini 52744265d620SPaolo Bonzini if (!bs->drv) { 52754265d620SPaolo Bonzini return -ENOMEDIUM; 5276b9c64947SMax Reitz } 5277b9c64947SMax Reitz 5278b9c64947SMax Reitz ret = bdrv_check_request(bs, sector_num, nb_sectors); 5279b9c64947SMax Reitz if (ret < 0) { 5280b9c64947SMax Reitz return ret; 52814265d620SPaolo Bonzini } else if (bs->read_only) { 52824265d620SPaolo Bonzini return -EROFS; 5283df702c9bSPaolo Bonzini } 5284df702c9bSPaolo Bonzini 52858f0720ecSPaolo Bonzini bdrv_reset_dirty(bs, sector_num, nb_sectors); 5286df702c9bSPaolo Bonzini 52879e8f1835SPaolo Bonzini /* Do nothing if disabled. */ 52889e8f1835SPaolo Bonzini if (!(bs->open_flags & BDRV_O_UNMAP)) { 52899e8f1835SPaolo Bonzini return 0; 52909e8f1835SPaolo Bonzini } 52919e8f1835SPaolo Bonzini 5292d51e9fe5SPaolo Bonzini if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) { 5293d51e9fe5SPaolo Bonzini return 0; 5294d51e9fe5SPaolo Bonzini } 52956f14da52SPeter Lieven 529675af1f34SPeter Lieven max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS); 52976f14da52SPeter Lieven while (nb_sectors > 0) { 52986f14da52SPeter Lieven int ret; 52996f14da52SPeter Lieven int num = nb_sectors; 53006f14da52SPeter Lieven 53016f14da52SPeter Lieven /* align request */ 53026f14da52SPeter Lieven if (bs->bl.discard_alignment && 53036f14da52SPeter Lieven num >= bs->bl.discard_alignment && 53046f14da52SPeter Lieven sector_num % bs->bl.discard_alignment) { 53056f14da52SPeter Lieven if (num > bs->bl.discard_alignment) { 53066f14da52SPeter Lieven num = bs->bl.discard_alignment; 53076f14da52SPeter Lieven } 53086f14da52SPeter Lieven num -= sector_num % bs->bl.discard_alignment; 53096f14da52SPeter Lieven } 53106f14da52SPeter Lieven 53116f14da52SPeter Lieven /* limit request size */ 53126f14da52SPeter Lieven if (num > max_discard) { 53136f14da52SPeter Lieven num = max_discard; 53146f14da52SPeter Lieven } 53156f14da52SPeter Lieven 5316d51e9fe5SPaolo Bonzini if (bs->drv->bdrv_co_discard) { 53176f14da52SPeter Lieven ret = bs->drv->bdrv_co_discard(bs, sector_num, num); 5318d51e9fe5SPaolo Bonzini } else { 53197c84b1b8SMarkus Armbruster BlockAIOCB *acb; 53204265d620SPaolo Bonzini CoroutineIOCompletion co = { 53214265d620SPaolo Bonzini .coroutine = qemu_coroutine_self(), 53224265d620SPaolo Bonzini }; 53234265d620SPaolo Bonzini 53244265d620SPaolo Bonzini acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors, 53254265d620SPaolo Bonzini bdrv_co_io_em_complete, &co); 53264265d620SPaolo Bonzini if (acb == NULL) { 53274265d620SPaolo Bonzini return -EIO; 53284265d620SPaolo Bonzini } else { 53294265d620SPaolo Bonzini qemu_coroutine_yield(); 5330d51e9fe5SPaolo Bonzini ret = co.ret; 53314265d620SPaolo Bonzini } 5332d51e9fe5SPaolo Bonzini } 53337ce21016SPaolo Bonzini if (ret && ret != -ENOTSUP) { 5334d51e9fe5SPaolo Bonzini return ret; 5335d51e9fe5SPaolo Bonzini } 5336d51e9fe5SPaolo Bonzini 5337d51e9fe5SPaolo Bonzini sector_num += num; 5338d51e9fe5SPaolo Bonzini nb_sectors -= num; 5339d51e9fe5SPaolo Bonzini } 53404265d620SPaolo Bonzini return 0; 53414265d620SPaolo Bonzini } 53424265d620SPaolo Bonzini 53434265d620SPaolo Bonzini int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) 53444265d620SPaolo Bonzini { 53454265d620SPaolo Bonzini Coroutine *co; 5346775aa8b6SKevin Wolf DiscardCo rwco = { 53474265d620SPaolo Bonzini .bs = bs, 53484265d620SPaolo Bonzini .sector_num = sector_num, 53494265d620SPaolo Bonzini .nb_sectors = nb_sectors, 53504265d620SPaolo Bonzini .ret = NOT_DONE, 53514265d620SPaolo Bonzini }; 53524265d620SPaolo Bonzini 53534265d620SPaolo Bonzini if (qemu_in_coroutine()) { 53544265d620SPaolo Bonzini /* Fast-path if already in coroutine context */ 53554265d620SPaolo Bonzini bdrv_discard_co_entry(&rwco); 53564265d620SPaolo Bonzini } else { 53572572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 53582572b37aSStefan Hajnoczi 53594265d620SPaolo Bonzini co = qemu_coroutine_create(bdrv_discard_co_entry); 53604265d620SPaolo Bonzini qemu_coroutine_enter(co, &rwco); 53614265d620SPaolo Bonzini while (rwco.ret == NOT_DONE) { 53622572b37aSStefan Hajnoczi aio_poll(aio_context, true); 53634265d620SPaolo Bonzini } 53644265d620SPaolo Bonzini } 53654265d620SPaolo Bonzini 53664265d620SPaolo Bonzini return rwco.ret; 53674265d620SPaolo Bonzini } 53684265d620SPaolo Bonzini 5369f9f05dc5SKevin Wolf /**************************************************************/ 537019cb3738Sbellard /* removable device support */ 537119cb3738Sbellard 537219cb3738Sbellard /** 537319cb3738Sbellard * Return TRUE if the media is present 537419cb3738Sbellard */ 537519cb3738Sbellard int bdrv_is_inserted(BlockDriverState *bs) 537619cb3738Sbellard { 537719cb3738Sbellard BlockDriver *drv = bs->drv; 5378a1aff5bfSMarkus Armbruster 537919cb3738Sbellard if (!drv) 538019cb3738Sbellard return 0; 538119cb3738Sbellard if (!drv->bdrv_is_inserted) 5382a1aff5bfSMarkus Armbruster return 1; 5383a1aff5bfSMarkus Armbruster return drv->bdrv_is_inserted(bs); 538419cb3738Sbellard } 538519cb3738Sbellard 538619cb3738Sbellard /** 53878e49ca46SMarkus Armbruster * Return whether the media changed since the last call to this 53888e49ca46SMarkus Armbruster * function, or -ENOTSUP if we don't know. Most drivers don't know. 538919cb3738Sbellard */ 539019cb3738Sbellard int bdrv_media_changed(BlockDriverState *bs) 539119cb3738Sbellard { 539219cb3738Sbellard BlockDriver *drv = bs->drv; 539319cb3738Sbellard 53948e49ca46SMarkus Armbruster if (drv && drv->bdrv_media_changed) { 53958e49ca46SMarkus Armbruster return drv->bdrv_media_changed(bs); 53968e49ca46SMarkus Armbruster } 53978e49ca46SMarkus Armbruster return -ENOTSUP; 539819cb3738Sbellard } 539919cb3738Sbellard 540019cb3738Sbellard /** 540119cb3738Sbellard * If eject_flag is TRUE, eject the media. Otherwise, close the tray 540219cb3738Sbellard */ 5403f36f3949SLuiz Capitulino void bdrv_eject(BlockDriverState *bs, bool eject_flag) 540419cb3738Sbellard { 540519cb3738Sbellard BlockDriver *drv = bs->drv; 5406bfb197e0SMarkus Armbruster const char *device_name; 540719cb3738Sbellard 5408822e1cd1SMarkus Armbruster if (drv && drv->bdrv_eject) { 5409822e1cd1SMarkus Armbruster drv->bdrv_eject(bs, eject_flag); 541019cb3738Sbellard } 54116f382ed2SLuiz Capitulino 5412bfb197e0SMarkus Armbruster device_name = bdrv_get_device_name(bs); 5413bfb197e0SMarkus Armbruster if (device_name[0] != '\0') { 5414bfb197e0SMarkus Armbruster qapi_event_send_device_tray_moved(device_name, 5415a5ee7bd4SWenchao Xia eject_flag, &error_abort); 54166f382ed2SLuiz Capitulino } 541719cb3738Sbellard } 541819cb3738Sbellard 541919cb3738Sbellard /** 542019cb3738Sbellard * Lock or unlock the media (if it is locked, the user won't be able 542119cb3738Sbellard * to eject it manually). 542219cb3738Sbellard */ 5423025e849aSMarkus Armbruster void bdrv_lock_medium(BlockDriverState *bs, bool locked) 542419cb3738Sbellard { 542519cb3738Sbellard BlockDriver *drv = bs->drv; 542619cb3738Sbellard 5427025e849aSMarkus Armbruster trace_bdrv_lock_medium(bs, locked); 5428b8c6d095SStefan Hajnoczi 5429025e849aSMarkus Armbruster if (drv && drv->bdrv_lock_medium) { 5430025e849aSMarkus Armbruster drv->bdrv_lock_medium(bs, locked); 543119cb3738Sbellard } 543219cb3738Sbellard } 5433985a03b0Sths 5434985a03b0Sths /* needed for generic scsi interface */ 5435985a03b0Sths 5436985a03b0Sths int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) 5437985a03b0Sths { 5438985a03b0Sths BlockDriver *drv = bs->drv; 5439985a03b0Sths 5440985a03b0Sths if (drv && drv->bdrv_ioctl) 5441985a03b0Sths return drv->bdrv_ioctl(bs, req, buf); 5442985a03b0Sths return -ENOTSUP; 5443985a03b0Sths } 54447d780669Saliguori 54457c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, 5446221f715dSaliguori unsigned long int req, void *buf, 5447097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 54487d780669Saliguori { 5449221f715dSaliguori BlockDriver *drv = bs->drv; 54507d780669Saliguori 5451221f715dSaliguori if (drv && drv->bdrv_aio_ioctl) 5452221f715dSaliguori return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); 5453221f715dSaliguori return NULL; 54547d780669Saliguori } 5455e268ca52Saliguori 54561b7fd729SPaolo Bonzini void bdrv_set_guest_block_size(BlockDriverState *bs, int align) 54577b6f9300SMarkus Armbruster { 54581b7fd729SPaolo Bonzini bs->guest_block_size = align; 54597b6f9300SMarkus Armbruster } 54607cd1e32aSlirans@il.ibm.com 5461e268ca52Saliguori void *qemu_blockalign(BlockDriverState *bs, size_t size) 5462e268ca52Saliguori { 5463339064d5SKevin Wolf return qemu_memalign(bdrv_opt_mem_align(bs), size); 5464e268ca52Saliguori } 54657cd1e32aSlirans@il.ibm.com 54669ebd8448SMax Reitz void *qemu_blockalign0(BlockDriverState *bs, size_t size) 54679ebd8448SMax Reitz { 54689ebd8448SMax Reitz return memset(qemu_blockalign(bs, size), 0, size); 54699ebd8448SMax Reitz } 54709ebd8448SMax Reitz 54717d2a35ccSKevin Wolf void *qemu_try_blockalign(BlockDriverState *bs, size_t size) 54727d2a35ccSKevin Wolf { 54737d2a35ccSKevin Wolf size_t align = bdrv_opt_mem_align(bs); 54747d2a35ccSKevin Wolf 54757d2a35ccSKevin Wolf /* Ensure that NULL is never returned on success */ 54767d2a35ccSKevin Wolf assert(align > 0); 54777d2a35ccSKevin Wolf if (size == 0) { 54787d2a35ccSKevin Wolf size = align; 54797d2a35ccSKevin Wolf } 54807d2a35ccSKevin Wolf 54817d2a35ccSKevin Wolf return qemu_try_memalign(align, size); 54827d2a35ccSKevin Wolf } 54837d2a35ccSKevin Wolf 54849ebd8448SMax Reitz void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) 54859ebd8448SMax Reitz { 54869ebd8448SMax Reitz void *mem = qemu_try_blockalign(bs, size); 54879ebd8448SMax Reitz 54889ebd8448SMax Reitz if (mem) { 54899ebd8448SMax Reitz memset(mem, 0, size); 54909ebd8448SMax Reitz } 54919ebd8448SMax Reitz 54929ebd8448SMax Reitz return mem; 54939ebd8448SMax Reitz } 54949ebd8448SMax Reitz 5495c53b1c51SStefan Hajnoczi /* 5496c53b1c51SStefan Hajnoczi * Check if all memory in this vector is sector aligned. 5497c53b1c51SStefan Hajnoczi */ 5498c53b1c51SStefan Hajnoczi bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) 5499c53b1c51SStefan Hajnoczi { 5500c53b1c51SStefan Hajnoczi int i; 5501339064d5SKevin Wolf size_t alignment = bdrv_opt_mem_align(bs); 5502c53b1c51SStefan Hajnoczi 5503c53b1c51SStefan Hajnoczi for (i = 0; i < qiov->niov; i++) { 5504339064d5SKevin Wolf if ((uintptr_t) qiov->iov[i].iov_base % alignment) { 5505c53b1c51SStefan Hajnoczi return false; 5506c53b1c51SStefan Hajnoczi } 5507339064d5SKevin Wolf if (qiov->iov[i].iov_len % alignment) { 55081ff735bdSKevin Wolf return false; 55091ff735bdSKevin Wolf } 5510c53b1c51SStefan Hajnoczi } 5511c53b1c51SStefan Hajnoczi 5512c53b1c51SStefan Hajnoczi return true; 5513c53b1c51SStefan Hajnoczi } 5514c53b1c51SStefan Hajnoczi 55150db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) 55160db6e54aSFam Zheng { 55170db6e54aSFam Zheng BdrvDirtyBitmap *bm; 55180db6e54aSFam Zheng 55190db6e54aSFam Zheng assert(name); 55200db6e54aSFam Zheng QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 55210db6e54aSFam Zheng if (bm->name && !strcmp(name, bm->name)) { 55220db6e54aSFam Zheng return bm; 55230db6e54aSFam Zheng } 55240db6e54aSFam Zheng } 55250db6e54aSFam Zheng return NULL; 55260db6e54aSFam Zheng } 55270db6e54aSFam Zheng 55280db6e54aSFam Zheng void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 55290db6e54aSFam Zheng { 5530*9bd2b08fSJohn Snow assert(!bdrv_dirty_bitmap_frozen(bitmap)); 55310db6e54aSFam Zheng g_free(bitmap->name); 55320db6e54aSFam Zheng bitmap->name = NULL; 55330db6e54aSFam Zheng } 55340db6e54aSFam Zheng 55350db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, 55365fba6c0eSJohn Snow uint32_t granularity, 55370db6e54aSFam Zheng const char *name, 5538b8afb520SFam Zheng Error **errp) 55397cd1e32aSlirans@il.ibm.com { 55407cd1e32aSlirans@il.ibm.com int64_t bitmap_size; 5541e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 55425fba6c0eSJohn Snow uint32_t sector_granularity; 5543a55eb92cSJan Kiszka 554450717e94SPaolo Bonzini assert((granularity & (granularity - 1)) == 0); 554550717e94SPaolo Bonzini 55460db6e54aSFam Zheng if (name && bdrv_find_dirty_bitmap(bs, name)) { 55470db6e54aSFam Zheng error_setg(errp, "Bitmap already exists: %s", name); 55480db6e54aSFam Zheng return NULL; 55490db6e54aSFam Zheng } 55505fba6c0eSJohn Snow sector_granularity = granularity >> BDRV_SECTOR_BITS; 55515fba6c0eSJohn Snow assert(sector_granularity); 555257322b78SMarkus Armbruster bitmap_size = bdrv_nb_sectors(bs); 5553b8afb520SFam Zheng if (bitmap_size < 0) { 5554b8afb520SFam Zheng error_setg_errno(errp, -bitmap_size, "could not get length of device"); 5555b8afb520SFam Zheng errno = -bitmap_size; 5556b8afb520SFam Zheng return NULL; 5557b8afb520SFam Zheng } 55585839e53bSMarkus Armbruster bitmap = g_new0(BdrvDirtyBitmap, 1); 55595fba6c0eSJohn Snow bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); 55600db6e54aSFam Zheng bitmap->name = g_strdup(name); 5561b8e6fb75SJohn Snow bitmap->disabled = false; 5562e4654d2dSFam Zheng QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); 5563e4654d2dSFam Zheng return bitmap; 5564e4654d2dSFam Zheng } 5565e4654d2dSFam Zheng 5566*9bd2b08fSJohn Snow bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) 5567*9bd2b08fSJohn Snow { 5568*9bd2b08fSJohn Snow return bitmap->successor; 5569*9bd2b08fSJohn Snow } 5570*9bd2b08fSJohn Snow 5571b8e6fb75SJohn Snow bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) 5572b8e6fb75SJohn Snow { 5573*9bd2b08fSJohn Snow return !(bitmap->disabled || bitmap->successor); 5574*9bd2b08fSJohn Snow } 5575*9bd2b08fSJohn Snow 5576*9bd2b08fSJohn Snow /** 5577*9bd2b08fSJohn Snow * Create a successor bitmap destined to replace this bitmap after an operation. 5578*9bd2b08fSJohn Snow * Requires that the bitmap is not frozen and has no successor. 5579*9bd2b08fSJohn Snow */ 5580*9bd2b08fSJohn Snow int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, 5581*9bd2b08fSJohn Snow BdrvDirtyBitmap *bitmap, Error **errp) 5582*9bd2b08fSJohn Snow { 5583*9bd2b08fSJohn Snow uint64_t granularity; 5584*9bd2b08fSJohn Snow BdrvDirtyBitmap *child; 5585*9bd2b08fSJohn Snow 5586*9bd2b08fSJohn Snow if (bdrv_dirty_bitmap_frozen(bitmap)) { 5587*9bd2b08fSJohn Snow error_setg(errp, "Cannot create a successor for a bitmap that is " 5588*9bd2b08fSJohn Snow "currently frozen"); 5589*9bd2b08fSJohn Snow return -1; 5590*9bd2b08fSJohn Snow } 5591*9bd2b08fSJohn Snow assert(!bitmap->successor); 5592*9bd2b08fSJohn Snow 5593*9bd2b08fSJohn Snow /* Create an anonymous successor */ 5594*9bd2b08fSJohn Snow granularity = bdrv_dirty_bitmap_granularity(bitmap); 5595*9bd2b08fSJohn Snow child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); 5596*9bd2b08fSJohn Snow if (!child) { 5597*9bd2b08fSJohn Snow return -1; 5598*9bd2b08fSJohn Snow } 5599*9bd2b08fSJohn Snow 5600*9bd2b08fSJohn Snow /* Successor will be on or off based on our current state. */ 5601*9bd2b08fSJohn Snow child->disabled = bitmap->disabled; 5602*9bd2b08fSJohn Snow 5603*9bd2b08fSJohn Snow /* Install the successor and freeze the parent */ 5604*9bd2b08fSJohn Snow bitmap->successor = child; 5605*9bd2b08fSJohn Snow return 0; 5606*9bd2b08fSJohn Snow } 5607*9bd2b08fSJohn Snow 5608*9bd2b08fSJohn Snow /** 5609*9bd2b08fSJohn Snow * For a bitmap with a successor, yield our name to the successor, 5610*9bd2b08fSJohn Snow * delete the old bitmap, and return a handle to the new bitmap. 5611*9bd2b08fSJohn Snow */ 5612*9bd2b08fSJohn Snow BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, 5613*9bd2b08fSJohn Snow BdrvDirtyBitmap *bitmap, 5614*9bd2b08fSJohn Snow Error **errp) 5615*9bd2b08fSJohn Snow { 5616*9bd2b08fSJohn Snow char *name; 5617*9bd2b08fSJohn Snow BdrvDirtyBitmap *successor = bitmap->successor; 5618*9bd2b08fSJohn Snow 5619*9bd2b08fSJohn Snow if (successor == NULL) { 5620*9bd2b08fSJohn Snow error_setg(errp, "Cannot relinquish control if " 5621*9bd2b08fSJohn Snow "there's no successor present"); 5622*9bd2b08fSJohn Snow return NULL; 5623*9bd2b08fSJohn Snow } 5624*9bd2b08fSJohn Snow 5625*9bd2b08fSJohn Snow name = bitmap->name; 5626*9bd2b08fSJohn Snow bitmap->name = NULL; 5627*9bd2b08fSJohn Snow successor->name = name; 5628*9bd2b08fSJohn Snow bitmap->successor = NULL; 5629*9bd2b08fSJohn Snow bdrv_release_dirty_bitmap(bs, bitmap); 5630*9bd2b08fSJohn Snow 5631*9bd2b08fSJohn Snow return successor; 5632*9bd2b08fSJohn Snow } 5633*9bd2b08fSJohn Snow 5634*9bd2b08fSJohn Snow /** 5635*9bd2b08fSJohn Snow * In cases of failure where we can no longer safely delete the parent, 5636*9bd2b08fSJohn Snow * we may wish to re-join the parent and child/successor. 5637*9bd2b08fSJohn Snow * The merged parent will be un-frozen, but not explicitly re-enabled. 5638*9bd2b08fSJohn Snow */ 5639*9bd2b08fSJohn Snow BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, 5640*9bd2b08fSJohn Snow BdrvDirtyBitmap *parent, 5641*9bd2b08fSJohn Snow Error **errp) 5642*9bd2b08fSJohn Snow { 5643*9bd2b08fSJohn Snow BdrvDirtyBitmap *successor = parent->successor; 5644*9bd2b08fSJohn Snow 5645*9bd2b08fSJohn Snow if (!successor) { 5646*9bd2b08fSJohn Snow error_setg(errp, "Cannot reclaim a successor when none is present"); 5647*9bd2b08fSJohn Snow return NULL; 5648*9bd2b08fSJohn Snow } 5649*9bd2b08fSJohn Snow 5650*9bd2b08fSJohn Snow if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { 5651*9bd2b08fSJohn Snow error_setg(errp, "Merging of parent and successor bitmap failed"); 5652*9bd2b08fSJohn Snow return NULL; 5653*9bd2b08fSJohn Snow } 5654*9bd2b08fSJohn Snow bdrv_release_dirty_bitmap(bs, successor); 5655*9bd2b08fSJohn Snow parent->successor = NULL; 5656*9bd2b08fSJohn Snow 5657*9bd2b08fSJohn Snow return parent; 5658b8e6fb75SJohn Snow } 5659b8e6fb75SJohn Snow 5660e4654d2dSFam Zheng void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 5661e4654d2dSFam Zheng { 5662e4654d2dSFam Zheng BdrvDirtyBitmap *bm, *next; 5663e4654d2dSFam Zheng QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { 5664e4654d2dSFam Zheng if (bm == bitmap) { 5665*9bd2b08fSJohn Snow assert(!bdrv_dirty_bitmap_frozen(bm)); 5666e4654d2dSFam Zheng QLIST_REMOVE(bitmap, list); 5667e4654d2dSFam Zheng hbitmap_free(bitmap->bitmap); 56680db6e54aSFam Zheng g_free(bitmap->name); 5669e4654d2dSFam Zheng g_free(bitmap); 5670e4654d2dSFam Zheng return; 56717cd1e32aSlirans@il.ibm.com } 56727cd1e32aSlirans@il.ibm.com } 56737cd1e32aSlirans@il.ibm.com } 56747cd1e32aSlirans@il.ibm.com 5675b8e6fb75SJohn Snow void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 5676b8e6fb75SJohn Snow { 5677*9bd2b08fSJohn Snow assert(!bdrv_dirty_bitmap_frozen(bitmap)); 5678b8e6fb75SJohn Snow bitmap->disabled = true; 5679b8e6fb75SJohn Snow } 5680b8e6fb75SJohn Snow 5681b8e6fb75SJohn Snow void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 5682b8e6fb75SJohn Snow { 5683*9bd2b08fSJohn Snow assert(!bdrv_dirty_bitmap_frozen(bitmap)); 5684b8e6fb75SJohn Snow bitmap->disabled = false; 5685b8e6fb75SJohn Snow } 5686b8e6fb75SJohn Snow 568721b56835SFam Zheng BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) 568821b56835SFam Zheng { 568921b56835SFam Zheng BdrvDirtyBitmap *bm; 569021b56835SFam Zheng BlockDirtyInfoList *list = NULL; 569121b56835SFam Zheng BlockDirtyInfoList **plist = &list; 569221b56835SFam Zheng 569321b56835SFam Zheng QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 56945839e53bSMarkus Armbruster BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); 56955839e53bSMarkus Armbruster BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); 569621b56835SFam Zheng info->count = bdrv_get_dirty_count(bs, bm); 5697592fdd02SJohn Snow info->granularity = bdrv_dirty_bitmap_granularity(bm); 56980db6e54aSFam Zheng info->has_name = !!bm->name; 56990db6e54aSFam Zheng info->name = g_strdup(bm->name); 570021b56835SFam Zheng entry->value = info; 570121b56835SFam Zheng *plist = entry; 570221b56835SFam Zheng plist = &entry->next; 570321b56835SFam Zheng } 570421b56835SFam Zheng 570521b56835SFam Zheng return list; 570621b56835SFam Zheng } 570721b56835SFam Zheng 5708e4654d2dSFam Zheng int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) 57097cd1e32aSlirans@il.ibm.com { 5710e4654d2dSFam Zheng if (bitmap) { 5711e4654d2dSFam Zheng return hbitmap_get(bitmap->bitmap, sector); 57127cd1e32aSlirans@il.ibm.com } else { 57137cd1e32aSlirans@il.ibm.com return 0; 57147cd1e32aSlirans@il.ibm.com } 57157cd1e32aSlirans@il.ibm.com } 57167cd1e32aSlirans@il.ibm.com 5717341ebc2fSJohn Snow /** 5718341ebc2fSJohn Snow * Chooses a default granularity based on the existing cluster size, 5719341ebc2fSJohn Snow * but clamped between [4K, 64K]. Defaults to 64K in the case that there 5720341ebc2fSJohn Snow * is no cluster size information available. 5721341ebc2fSJohn Snow */ 5722341ebc2fSJohn Snow uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) 5723341ebc2fSJohn Snow { 5724341ebc2fSJohn Snow BlockDriverInfo bdi; 5725341ebc2fSJohn Snow uint32_t granularity; 5726341ebc2fSJohn Snow 5727341ebc2fSJohn Snow if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { 5728341ebc2fSJohn Snow granularity = MAX(4096, bdi.cluster_size); 5729341ebc2fSJohn Snow granularity = MIN(65536, granularity); 5730341ebc2fSJohn Snow } else { 5731341ebc2fSJohn Snow granularity = 65536; 5732341ebc2fSJohn Snow } 5733341ebc2fSJohn Snow 5734341ebc2fSJohn Snow return granularity; 5735341ebc2fSJohn Snow } 5736341ebc2fSJohn Snow 5737592fdd02SJohn Snow uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) 5738592fdd02SJohn Snow { 5739592fdd02SJohn Snow return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); 5740592fdd02SJohn Snow } 5741592fdd02SJohn Snow 5742e4654d2dSFam Zheng void bdrv_dirty_iter_init(BlockDriverState *bs, 5743e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) 57441755da16SPaolo Bonzini { 5745e4654d2dSFam Zheng hbitmap_iter_init(hbi, bitmap->bitmap, 0); 57461755da16SPaolo Bonzini } 57471755da16SPaolo Bonzini 5748c4237dfaSVladimir Sementsov-Ogievskiy void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, 5749c4237dfaSVladimir Sementsov-Ogievskiy int64_t cur_sector, int nr_sectors) 5750c4237dfaSVladimir Sementsov-Ogievskiy { 5751b8e6fb75SJohn Snow assert(bdrv_dirty_bitmap_enabled(bitmap)); 5752c4237dfaSVladimir Sementsov-Ogievskiy hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 5753c4237dfaSVladimir Sementsov-Ogievskiy } 5754c4237dfaSVladimir Sementsov-Ogievskiy 5755c4237dfaSVladimir Sementsov-Ogievskiy void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, 5756c4237dfaSVladimir Sementsov-Ogievskiy int64_t cur_sector, int nr_sectors) 5757c4237dfaSVladimir Sementsov-Ogievskiy { 5758b8e6fb75SJohn Snow assert(bdrv_dirty_bitmap_enabled(bitmap)); 5759c4237dfaSVladimir Sementsov-Ogievskiy hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 5760c4237dfaSVladimir Sementsov-Ogievskiy } 5761c4237dfaSVladimir Sementsov-Ogievskiy 5762c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 57631755da16SPaolo Bonzini int nr_sectors) 57641755da16SPaolo Bonzini { 5765e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 5766e4654d2dSFam Zheng QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 5767b8e6fb75SJohn Snow if (!bdrv_dirty_bitmap_enabled(bitmap)) { 5768b8e6fb75SJohn Snow continue; 5769b8e6fb75SJohn Snow } 5770e4654d2dSFam Zheng hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 5771e4654d2dSFam Zheng } 57721755da16SPaolo Bonzini } 57731755da16SPaolo Bonzini 5774c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 5775c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors) 57767cd1e32aSlirans@il.ibm.com { 5777e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 5778e4654d2dSFam Zheng QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 5779b8e6fb75SJohn Snow if (!bdrv_dirty_bitmap_enabled(bitmap)) { 5780b8e6fb75SJohn Snow continue; 5781b8e6fb75SJohn Snow } 5782e4654d2dSFam Zheng hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 5783e4654d2dSFam Zheng } 57847cd1e32aSlirans@il.ibm.com } 5785aaa0eb75SLiran Schour 5786e4654d2dSFam Zheng int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 5787aaa0eb75SLiran Schour { 5788e4654d2dSFam Zheng return hbitmap_count(bitmap->bitmap); 5789aaa0eb75SLiran Schour } 5790f88e1a42SJes Sorensen 57919fcb0251SFam Zheng /* Get a reference to bs */ 57929fcb0251SFam Zheng void bdrv_ref(BlockDriverState *bs) 57939fcb0251SFam Zheng { 57949fcb0251SFam Zheng bs->refcnt++; 57959fcb0251SFam Zheng } 57969fcb0251SFam Zheng 57979fcb0251SFam Zheng /* Release a previously grabbed reference to bs. 57989fcb0251SFam Zheng * If after releasing, reference count is zero, the BlockDriverState is 57999fcb0251SFam Zheng * deleted. */ 58009fcb0251SFam Zheng void bdrv_unref(BlockDriverState *bs) 58019fcb0251SFam Zheng { 58029a4d5ca6SJeff Cody if (!bs) { 58039a4d5ca6SJeff Cody return; 58049a4d5ca6SJeff Cody } 58059fcb0251SFam Zheng assert(bs->refcnt > 0); 58069fcb0251SFam Zheng if (--bs->refcnt == 0) { 58079fcb0251SFam Zheng bdrv_delete(bs); 58089fcb0251SFam Zheng } 58099fcb0251SFam Zheng } 58109fcb0251SFam Zheng 5811fbe40ff7SFam Zheng struct BdrvOpBlocker { 5812fbe40ff7SFam Zheng Error *reason; 5813fbe40ff7SFam Zheng QLIST_ENTRY(BdrvOpBlocker) list; 5814fbe40ff7SFam Zheng }; 5815fbe40ff7SFam Zheng 5816fbe40ff7SFam Zheng bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 5817fbe40ff7SFam Zheng { 5818fbe40ff7SFam Zheng BdrvOpBlocker *blocker; 5819fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5820fbe40ff7SFam Zheng if (!QLIST_EMPTY(&bs->op_blockers[op])) { 5821fbe40ff7SFam Zheng blocker = QLIST_FIRST(&bs->op_blockers[op]); 5822fbe40ff7SFam Zheng if (errp) { 582381e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is busy: %s", 582481e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs), 5825bfb197e0SMarkus Armbruster error_get_pretty(blocker->reason)); 5826fbe40ff7SFam Zheng } 5827fbe40ff7SFam Zheng return true; 5828fbe40ff7SFam Zheng } 5829fbe40ff7SFam Zheng return false; 5830fbe40ff7SFam Zheng } 5831fbe40ff7SFam Zheng 5832fbe40ff7SFam Zheng void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 5833fbe40ff7SFam Zheng { 5834fbe40ff7SFam Zheng BdrvOpBlocker *blocker; 5835fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5836fbe40ff7SFam Zheng 58375839e53bSMarkus Armbruster blocker = g_new0(BdrvOpBlocker, 1); 5838fbe40ff7SFam Zheng blocker->reason = reason; 5839fbe40ff7SFam Zheng QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 5840fbe40ff7SFam Zheng } 5841fbe40ff7SFam Zheng 5842fbe40ff7SFam Zheng void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 5843fbe40ff7SFam Zheng { 5844fbe40ff7SFam Zheng BdrvOpBlocker *blocker, *next; 5845fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5846fbe40ff7SFam Zheng QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 5847fbe40ff7SFam Zheng if (blocker->reason == reason) { 5848fbe40ff7SFam Zheng QLIST_REMOVE(blocker, list); 5849fbe40ff7SFam Zheng g_free(blocker); 5850fbe40ff7SFam Zheng } 5851fbe40ff7SFam Zheng } 5852fbe40ff7SFam Zheng } 5853fbe40ff7SFam Zheng 5854fbe40ff7SFam Zheng void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 5855fbe40ff7SFam Zheng { 5856fbe40ff7SFam Zheng int i; 5857fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5858fbe40ff7SFam Zheng bdrv_op_block(bs, i, reason); 5859fbe40ff7SFam Zheng } 5860fbe40ff7SFam Zheng } 5861fbe40ff7SFam Zheng 5862fbe40ff7SFam Zheng void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 5863fbe40ff7SFam Zheng { 5864fbe40ff7SFam Zheng int i; 5865fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5866fbe40ff7SFam Zheng bdrv_op_unblock(bs, i, reason); 5867fbe40ff7SFam Zheng } 5868fbe40ff7SFam Zheng } 5869fbe40ff7SFam Zheng 5870fbe40ff7SFam Zheng bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 5871fbe40ff7SFam Zheng { 5872fbe40ff7SFam Zheng int i; 5873fbe40ff7SFam Zheng 5874fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5875fbe40ff7SFam Zheng if (!QLIST_EMPTY(&bs->op_blockers[i])) { 5876fbe40ff7SFam Zheng return false; 5877fbe40ff7SFam Zheng } 5878fbe40ff7SFam Zheng } 5879fbe40ff7SFam Zheng return true; 5880fbe40ff7SFam Zheng } 5881fbe40ff7SFam Zheng 588228a7282aSLuiz Capitulino void bdrv_iostatus_enable(BlockDriverState *bs) 588328a7282aSLuiz Capitulino { 5884d6bf279eSLuiz Capitulino bs->iostatus_enabled = true; 588558e21ef5SLuiz Capitulino bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 588628a7282aSLuiz Capitulino } 588728a7282aSLuiz Capitulino 588828a7282aSLuiz Capitulino /* The I/O status is only enabled if the drive explicitly 588928a7282aSLuiz Capitulino * enables it _and_ the VM is configured to stop on errors */ 589028a7282aSLuiz Capitulino bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) 589128a7282aSLuiz Capitulino { 5892d6bf279eSLuiz Capitulino return (bs->iostatus_enabled && 589392aa5c6dSPaolo Bonzini (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || 589492aa5c6dSPaolo Bonzini bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || 589592aa5c6dSPaolo Bonzini bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); 589628a7282aSLuiz Capitulino } 589728a7282aSLuiz Capitulino 589828a7282aSLuiz Capitulino void bdrv_iostatus_disable(BlockDriverState *bs) 589928a7282aSLuiz Capitulino { 5900d6bf279eSLuiz Capitulino bs->iostatus_enabled = false; 590128a7282aSLuiz Capitulino } 590228a7282aSLuiz Capitulino 590328a7282aSLuiz Capitulino void bdrv_iostatus_reset(BlockDriverState *bs) 590428a7282aSLuiz Capitulino { 590528a7282aSLuiz Capitulino if (bdrv_iostatus_is_enabled(bs)) { 590658e21ef5SLuiz Capitulino bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 59073bd293c3SPaolo Bonzini if (bs->job) { 59083bd293c3SPaolo Bonzini block_job_iostatus_reset(bs->job); 59093bd293c3SPaolo Bonzini } 591028a7282aSLuiz Capitulino } 591128a7282aSLuiz Capitulino } 591228a7282aSLuiz Capitulino 591328a7282aSLuiz Capitulino void bdrv_iostatus_set_err(BlockDriverState *bs, int error) 591428a7282aSLuiz Capitulino { 59153e1caa5fSPaolo Bonzini assert(bdrv_iostatus_is_enabled(bs)); 59163e1caa5fSPaolo Bonzini if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 591758e21ef5SLuiz Capitulino bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 591858e21ef5SLuiz Capitulino BLOCK_DEVICE_IO_STATUS_FAILED; 591928a7282aSLuiz Capitulino } 592028a7282aSLuiz Capitulino } 592128a7282aSLuiz Capitulino 5922d92ada22SLuiz Capitulino void bdrv_img_create(const char *filename, const char *fmt, 5923f88e1a42SJes Sorensen const char *base_filename, const char *base_fmt, 5924f382d43aSMiroslav Rezanina char *options, uint64_t img_size, int flags, 5925f382d43aSMiroslav Rezanina Error **errp, bool quiet) 5926f88e1a42SJes Sorensen { 592783d0521aSChunyan Liu QemuOptsList *create_opts = NULL; 592883d0521aSChunyan Liu QemuOpts *opts = NULL; 592983d0521aSChunyan Liu const char *backing_fmt, *backing_file; 593083d0521aSChunyan Liu int64_t size; 5931f88e1a42SJes Sorensen BlockDriver *drv, *proto_drv; 593296df67d1SStefan Hajnoczi BlockDriver *backing_drv = NULL; 5933cc84d90fSMax Reitz Error *local_err = NULL; 5934f88e1a42SJes Sorensen int ret = 0; 5935f88e1a42SJes Sorensen 5936f88e1a42SJes Sorensen /* Find driver and parse its options */ 5937f88e1a42SJes Sorensen drv = bdrv_find_format(fmt); 5938f88e1a42SJes Sorensen if (!drv) { 593971c79813SLuiz Capitulino error_setg(errp, "Unknown file format '%s'", fmt); 5940d92ada22SLuiz Capitulino return; 5941f88e1a42SJes Sorensen } 5942f88e1a42SJes Sorensen 5943b65a5e12SMax Reitz proto_drv = bdrv_find_protocol(filename, true, errp); 5944f88e1a42SJes Sorensen if (!proto_drv) { 5945d92ada22SLuiz Capitulino return; 5946f88e1a42SJes Sorensen } 5947f88e1a42SJes Sorensen 5948c6149724SMax Reitz if (!drv->create_opts) { 5949c6149724SMax Reitz error_setg(errp, "Format driver '%s' does not support image creation", 5950c6149724SMax Reitz drv->format_name); 5951c6149724SMax Reitz return; 5952c6149724SMax Reitz } 5953c6149724SMax Reitz 5954c6149724SMax Reitz if (!proto_drv->create_opts) { 5955c6149724SMax Reitz error_setg(errp, "Protocol driver '%s' does not support image creation", 5956c6149724SMax Reitz proto_drv->format_name); 5957c6149724SMax Reitz return; 5958c6149724SMax Reitz } 5959c6149724SMax Reitz 5960c282e1fdSChunyan Liu create_opts = qemu_opts_append(create_opts, drv->create_opts); 5961c282e1fdSChunyan Liu create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 5962f88e1a42SJes Sorensen 5963f88e1a42SJes Sorensen /* Create parameter list with default values */ 596483d0521aSChunyan Liu opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 596539101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 5966f88e1a42SJes Sorensen 5967f88e1a42SJes Sorensen /* Parse -o options */ 5968f88e1a42SJes Sorensen if (options) { 5969dc523cd3SMarkus Armbruster qemu_opts_do_parse(opts, options, NULL, &local_err); 5970dc523cd3SMarkus Armbruster if (local_err) { 5971dc523cd3SMarkus Armbruster error_report_err(local_err); 5972dc523cd3SMarkus Armbruster local_err = NULL; 597383d0521aSChunyan Liu error_setg(errp, "Invalid options for file format '%s'", fmt); 5974f88e1a42SJes Sorensen goto out; 5975f88e1a42SJes Sorensen } 5976f88e1a42SJes Sorensen } 5977f88e1a42SJes Sorensen 5978f88e1a42SJes Sorensen if (base_filename) { 5979f43e47dbSMarkus Armbruster qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err); 59806be4194bSMarkus Armbruster if (local_err) { 598171c79813SLuiz Capitulino error_setg(errp, "Backing file not supported for file format '%s'", 598271c79813SLuiz Capitulino fmt); 5983f88e1a42SJes Sorensen goto out; 5984f88e1a42SJes Sorensen } 5985f88e1a42SJes Sorensen } 5986f88e1a42SJes Sorensen 5987f88e1a42SJes Sorensen if (base_fmt) { 5988f43e47dbSMarkus Armbruster qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err); 59896be4194bSMarkus Armbruster if (local_err) { 599071c79813SLuiz Capitulino error_setg(errp, "Backing file format not supported for file " 599171c79813SLuiz Capitulino "format '%s'", fmt); 5992f88e1a42SJes Sorensen goto out; 5993f88e1a42SJes Sorensen } 5994f88e1a42SJes Sorensen } 5995f88e1a42SJes Sorensen 599683d0521aSChunyan Liu backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 599783d0521aSChunyan Liu if (backing_file) { 599883d0521aSChunyan Liu if (!strcmp(filename, backing_file)) { 599971c79813SLuiz Capitulino error_setg(errp, "Error: Trying to create an image with the " 600071c79813SLuiz Capitulino "same filename as the backing file"); 6001792da93aSJes Sorensen goto out; 6002792da93aSJes Sorensen } 6003792da93aSJes Sorensen } 6004792da93aSJes Sorensen 600583d0521aSChunyan Liu backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 600683d0521aSChunyan Liu if (backing_fmt) { 600783d0521aSChunyan Liu backing_drv = bdrv_find_format(backing_fmt); 600896df67d1SStefan Hajnoczi if (!backing_drv) { 600971c79813SLuiz Capitulino error_setg(errp, "Unknown backing file format '%s'", 601083d0521aSChunyan Liu backing_fmt); 6011f88e1a42SJes Sorensen goto out; 6012f88e1a42SJes Sorensen } 6013f88e1a42SJes Sorensen } 6014f88e1a42SJes Sorensen 6015f88e1a42SJes Sorensen // The size for the image must always be specified, with one exception: 6016f88e1a42SJes Sorensen // If we are using a backing file, we can obtain the size from there 601783d0521aSChunyan Liu size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 601883d0521aSChunyan Liu if (size == -1) { 601983d0521aSChunyan Liu if (backing_file) { 602066f6b814SMax Reitz BlockDriverState *bs; 602129168018SMax Reitz char *full_backing = g_new0(char, PATH_MAX); 602252bf1e72SMarkus Armbruster int64_t size; 602363090dacSPaolo Bonzini int back_flags; 602463090dacSPaolo Bonzini 602529168018SMax Reitz bdrv_get_full_backing_filename_from_filename(filename, backing_file, 602629168018SMax Reitz full_backing, PATH_MAX, 602729168018SMax Reitz &local_err); 602829168018SMax Reitz if (local_err) { 602929168018SMax Reitz g_free(full_backing); 603029168018SMax Reitz goto out; 603129168018SMax Reitz } 603229168018SMax Reitz 603363090dacSPaolo Bonzini /* backing files always opened read-only */ 603463090dacSPaolo Bonzini back_flags = 603563090dacSPaolo Bonzini flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 6036f88e1a42SJes Sorensen 6037f67503e5SMax Reitz bs = NULL; 603829168018SMax Reitz ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags, 6039cc84d90fSMax Reitz backing_drv, &local_err); 604029168018SMax Reitz g_free(full_backing); 6041f88e1a42SJes Sorensen if (ret < 0) { 6042f88e1a42SJes Sorensen goto out; 6043f88e1a42SJes Sorensen } 604452bf1e72SMarkus Armbruster size = bdrv_getlength(bs); 604552bf1e72SMarkus Armbruster if (size < 0) { 604652bf1e72SMarkus Armbruster error_setg_errno(errp, -size, "Could not get size of '%s'", 604752bf1e72SMarkus Armbruster backing_file); 604852bf1e72SMarkus Armbruster bdrv_unref(bs); 604952bf1e72SMarkus Armbruster goto out; 605052bf1e72SMarkus Armbruster } 6051f88e1a42SJes Sorensen 605239101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 605366f6b814SMax Reitz 605466f6b814SMax Reitz bdrv_unref(bs); 6055f88e1a42SJes Sorensen } else { 605671c79813SLuiz Capitulino error_setg(errp, "Image creation needs a size parameter"); 6057f88e1a42SJes Sorensen goto out; 6058f88e1a42SJes Sorensen } 6059f88e1a42SJes Sorensen } 6060f88e1a42SJes Sorensen 6061f382d43aSMiroslav Rezanina if (!quiet) { 6062f88e1a42SJes Sorensen printf("Formatting '%s', fmt=%s", filename, fmt); 606343c5d8f8SFam Zheng qemu_opts_print(opts, " "); 6064f88e1a42SJes Sorensen puts(""); 6065f382d43aSMiroslav Rezanina } 606683d0521aSChunyan Liu 6067c282e1fdSChunyan Liu ret = bdrv_create(drv, filename, opts, &local_err); 606883d0521aSChunyan Liu 6069cc84d90fSMax Reitz if (ret == -EFBIG) { 6070cc84d90fSMax Reitz /* This is generally a better message than whatever the driver would 6071cc84d90fSMax Reitz * deliver (especially because of the cluster_size_hint), since that 6072cc84d90fSMax Reitz * is most probably not much different from "image too large". */ 6073f3f4d2c0SKevin Wolf const char *cluster_size_hint = ""; 607483d0521aSChunyan Liu if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 6075f3f4d2c0SKevin Wolf cluster_size_hint = " (try using a larger cluster size)"; 6076f3f4d2c0SKevin Wolf } 6077cc84d90fSMax Reitz error_setg(errp, "The image size is too large for file format '%s'" 6078cc84d90fSMax Reitz "%s", fmt, cluster_size_hint); 6079cc84d90fSMax Reitz error_free(local_err); 6080cc84d90fSMax Reitz local_err = NULL; 6081f88e1a42SJes Sorensen } 6082f88e1a42SJes Sorensen 6083f88e1a42SJes Sorensen out: 608483d0521aSChunyan Liu qemu_opts_del(opts); 608583d0521aSChunyan Liu qemu_opts_free(create_opts); 608684d18f06SMarkus Armbruster if (local_err) { 6087cc84d90fSMax Reitz error_propagate(errp, local_err); 6088cc84d90fSMax Reitz } 6089f88e1a42SJes Sorensen } 609085d126f3SStefan Hajnoczi 609185d126f3SStefan Hajnoczi AioContext *bdrv_get_aio_context(BlockDriverState *bs) 609285d126f3SStefan Hajnoczi { 6093dcd04228SStefan Hajnoczi return bs->aio_context; 6094dcd04228SStefan Hajnoczi } 6095dcd04228SStefan Hajnoczi 6096dcd04228SStefan Hajnoczi void bdrv_detach_aio_context(BlockDriverState *bs) 6097dcd04228SStefan Hajnoczi { 609833384421SMax Reitz BdrvAioNotifier *baf; 609933384421SMax Reitz 6100dcd04228SStefan Hajnoczi if (!bs->drv) { 6101dcd04228SStefan Hajnoczi return; 6102dcd04228SStefan Hajnoczi } 6103dcd04228SStefan Hajnoczi 610433384421SMax Reitz QLIST_FOREACH(baf, &bs->aio_notifiers, list) { 610533384421SMax Reitz baf->detach_aio_context(baf->opaque); 610633384421SMax Reitz } 610733384421SMax Reitz 610813af91ebSStefan Hajnoczi if (bs->io_limits_enabled) { 610913af91ebSStefan Hajnoczi throttle_detach_aio_context(&bs->throttle_state); 611013af91ebSStefan Hajnoczi } 6111dcd04228SStefan Hajnoczi if (bs->drv->bdrv_detach_aio_context) { 6112dcd04228SStefan Hajnoczi bs->drv->bdrv_detach_aio_context(bs); 6113dcd04228SStefan Hajnoczi } 6114dcd04228SStefan Hajnoczi if (bs->file) { 6115dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs->file); 6116dcd04228SStefan Hajnoczi } 6117dcd04228SStefan Hajnoczi if (bs->backing_hd) { 6118dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs->backing_hd); 6119dcd04228SStefan Hajnoczi } 6120dcd04228SStefan Hajnoczi 6121dcd04228SStefan Hajnoczi bs->aio_context = NULL; 6122dcd04228SStefan Hajnoczi } 6123dcd04228SStefan Hajnoczi 6124dcd04228SStefan Hajnoczi void bdrv_attach_aio_context(BlockDriverState *bs, 6125dcd04228SStefan Hajnoczi AioContext *new_context) 6126dcd04228SStefan Hajnoczi { 612733384421SMax Reitz BdrvAioNotifier *ban; 612833384421SMax Reitz 6129dcd04228SStefan Hajnoczi if (!bs->drv) { 6130dcd04228SStefan Hajnoczi return; 6131dcd04228SStefan Hajnoczi } 6132dcd04228SStefan Hajnoczi 6133dcd04228SStefan Hajnoczi bs->aio_context = new_context; 6134dcd04228SStefan Hajnoczi 6135dcd04228SStefan Hajnoczi if (bs->backing_hd) { 6136dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs->backing_hd, new_context); 6137dcd04228SStefan Hajnoczi } 6138dcd04228SStefan Hajnoczi if (bs->file) { 6139dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs->file, new_context); 6140dcd04228SStefan Hajnoczi } 6141dcd04228SStefan Hajnoczi if (bs->drv->bdrv_attach_aio_context) { 6142dcd04228SStefan Hajnoczi bs->drv->bdrv_attach_aio_context(bs, new_context); 6143dcd04228SStefan Hajnoczi } 614413af91ebSStefan Hajnoczi if (bs->io_limits_enabled) { 614513af91ebSStefan Hajnoczi throttle_attach_aio_context(&bs->throttle_state, new_context); 614613af91ebSStefan Hajnoczi } 614733384421SMax Reitz 614833384421SMax Reitz QLIST_FOREACH(ban, &bs->aio_notifiers, list) { 614933384421SMax Reitz ban->attached_aio_context(new_context, ban->opaque); 615033384421SMax Reitz } 6151dcd04228SStefan Hajnoczi } 6152dcd04228SStefan Hajnoczi 6153dcd04228SStefan Hajnoczi void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) 6154dcd04228SStefan Hajnoczi { 6155dcd04228SStefan Hajnoczi bdrv_drain_all(); /* ensure there are no in-flight requests */ 6156dcd04228SStefan Hajnoczi 6157dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs); 6158dcd04228SStefan Hajnoczi 6159dcd04228SStefan Hajnoczi /* This function executes in the old AioContext so acquire the new one in 6160dcd04228SStefan Hajnoczi * case it runs in a different thread. 6161dcd04228SStefan Hajnoczi */ 6162dcd04228SStefan Hajnoczi aio_context_acquire(new_context); 6163dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs, new_context); 6164dcd04228SStefan Hajnoczi aio_context_release(new_context); 616585d126f3SStefan Hajnoczi } 6166d616b224SStefan Hajnoczi 616733384421SMax Reitz void bdrv_add_aio_context_notifier(BlockDriverState *bs, 616833384421SMax Reitz void (*attached_aio_context)(AioContext *new_context, void *opaque), 616933384421SMax Reitz void (*detach_aio_context)(void *opaque), void *opaque) 617033384421SMax Reitz { 617133384421SMax Reitz BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 617233384421SMax Reitz *ban = (BdrvAioNotifier){ 617333384421SMax Reitz .attached_aio_context = attached_aio_context, 617433384421SMax Reitz .detach_aio_context = detach_aio_context, 617533384421SMax Reitz .opaque = opaque 617633384421SMax Reitz }; 617733384421SMax Reitz 617833384421SMax Reitz QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 617933384421SMax Reitz } 618033384421SMax Reitz 618133384421SMax Reitz void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 618233384421SMax Reitz void (*attached_aio_context)(AioContext *, 618333384421SMax Reitz void *), 618433384421SMax Reitz void (*detach_aio_context)(void *), 618533384421SMax Reitz void *opaque) 618633384421SMax Reitz { 618733384421SMax Reitz BdrvAioNotifier *ban, *ban_next; 618833384421SMax Reitz 618933384421SMax Reitz QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 619033384421SMax Reitz if (ban->attached_aio_context == attached_aio_context && 619133384421SMax Reitz ban->detach_aio_context == detach_aio_context && 619233384421SMax Reitz ban->opaque == opaque) 619333384421SMax Reitz { 619433384421SMax Reitz QLIST_REMOVE(ban, list); 619533384421SMax Reitz g_free(ban); 619633384421SMax Reitz 619733384421SMax Reitz return; 619833384421SMax Reitz } 619933384421SMax Reitz } 620033384421SMax Reitz 620133384421SMax Reitz abort(); 620233384421SMax Reitz } 620333384421SMax Reitz 6204d616b224SStefan Hajnoczi void bdrv_add_before_write_notifier(BlockDriverState *bs, 6205d616b224SStefan Hajnoczi NotifierWithReturn *notifier) 6206d616b224SStefan Hajnoczi { 6207d616b224SStefan Hajnoczi notifier_with_return_list_add(&bs->before_write_notifiers, notifier); 6208d616b224SStefan Hajnoczi } 62096f176b48SMax Reitz 621077485434SMax Reitz int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 621177485434SMax Reitz BlockDriverAmendStatusCB *status_cb) 62126f176b48SMax Reitz { 6213c282e1fdSChunyan Liu if (!bs->drv->bdrv_amend_options) { 62146f176b48SMax Reitz return -ENOTSUP; 62156f176b48SMax Reitz } 621677485434SMax Reitz return bs->drv->bdrv_amend_options(bs, opts, status_cb); 62176f176b48SMax Reitz } 6218f6186f49SBenoît Canet 6219b5042a36SBenoît Canet /* This function will be called by the bdrv_recurse_is_first_non_filter method 6220b5042a36SBenoît Canet * of block filter and by bdrv_is_first_non_filter. 6221b5042a36SBenoît Canet * It is used to test if the given bs is the candidate or recurse more in the 6222b5042a36SBenoît Canet * node graph. 6223212a5a8fSBenoît Canet */ 6224212a5a8fSBenoît Canet bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, 6225212a5a8fSBenoît Canet BlockDriverState *candidate) 6226f6186f49SBenoît Canet { 6227b5042a36SBenoît Canet /* return false if basic checks fails */ 6228b5042a36SBenoît Canet if (!bs || !bs->drv) { 6229b5042a36SBenoît Canet return false; 6230b5042a36SBenoît Canet } 6231b5042a36SBenoît Canet 6232b5042a36SBenoît Canet /* the code reached a non block filter driver -> check if the bs is 6233b5042a36SBenoît Canet * the same as the candidate. It's the recursion termination condition. 6234b5042a36SBenoît Canet */ 6235b5042a36SBenoît Canet if (!bs->drv->is_filter) { 6236b5042a36SBenoît Canet return bs == candidate; 6237b5042a36SBenoît Canet } 6238b5042a36SBenoît Canet /* Down this path the driver is a block filter driver */ 6239b5042a36SBenoît Canet 6240b5042a36SBenoît Canet /* If the block filter recursion method is defined use it to recurse down 6241b5042a36SBenoît Canet * the node graph. 6242b5042a36SBenoît Canet */ 6243b5042a36SBenoît Canet if (bs->drv->bdrv_recurse_is_first_non_filter) { 6244212a5a8fSBenoît Canet return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); 6245212a5a8fSBenoît Canet } 6246212a5a8fSBenoît Canet 6247b5042a36SBenoît Canet /* the driver is a block filter but don't allow to recurse -> return false 6248b5042a36SBenoît Canet */ 6249b5042a36SBenoît Canet return false; 6250212a5a8fSBenoît Canet } 6251212a5a8fSBenoît Canet 6252212a5a8fSBenoît Canet /* This function checks if the candidate is the first non filter bs down it's 6253212a5a8fSBenoît Canet * bs chain. Since we don't have pointers to parents it explore all bs chains 6254212a5a8fSBenoît Canet * from the top. Some filters can choose not to pass down the recursion. 6255212a5a8fSBenoît Canet */ 6256212a5a8fSBenoît Canet bool bdrv_is_first_non_filter(BlockDriverState *candidate) 6257212a5a8fSBenoît Canet { 6258212a5a8fSBenoît Canet BlockDriverState *bs; 6259212a5a8fSBenoît Canet 6260212a5a8fSBenoît Canet /* walk down the bs forest recursively */ 6261212a5a8fSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 6262212a5a8fSBenoît Canet bool perm; 6263212a5a8fSBenoît Canet 6264b5042a36SBenoît Canet /* try to recurse in this top level bs */ 6265e6dc8a1fSKevin Wolf perm = bdrv_recurse_is_first_non_filter(bs, candidate); 6266212a5a8fSBenoît Canet 6267212a5a8fSBenoît Canet /* candidate is the first non filter */ 6268212a5a8fSBenoît Canet if (perm) { 6269212a5a8fSBenoît Canet return true; 6270212a5a8fSBenoît Canet } 6271212a5a8fSBenoît Canet } 6272212a5a8fSBenoît Canet 6273212a5a8fSBenoît Canet return false; 6274f6186f49SBenoît Canet } 627509158f00SBenoît Canet 627609158f00SBenoît Canet BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) 627709158f00SBenoît Canet { 627809158f00SBenoît Canet BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 62795a7e7a0bSStefan Hajnoczi AioContext *aio_context; 62805a7e7a0bSStefan Hajnoczi 628109158f00SBenoît Canet if (!to_replace_bs) { 628209158f00SBenoît Canet error_setg(errp, "Node name '%s' not found", node_name); 628309158f00SBenoît Canet return NULL; 628409158f00SBenoît Canet } 628509158f00SBenoît Canet 62865a7e7a0bSStefan Hajnoczi aio_context = bdrv_get_aio_context(to_replace_bs); 62875a7e7a0bSStefan Hajnoczi aio_context_acquire(aio_context); 62885a7e7a0bSStefan Hajnoczi 628909158f00SBenoît Canet if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 62905a7e7a0bSStefan Hajnoczi to_replace_bs = NULL; 62915a7e7a0bSStefan Hajnoczi goto out; 629209158f00SBenoît Canet } 629309158f00SBenoît Canet 629409158f00SBenoît Canet /* We don't want arbitrary node of the BDS chain to be replaced only the top 629509158f00SBenoît Canet * most non filter in order to prevent data corruption. 629609158f00SBenoît Canet * Another benefit is that this tests exclude backing files which are 629709158f00SBenoît Canet * blocked by the backing blockers. 629809158f00SBenoît Canet */ 629909158f00SBenoît Canet if (!bdrv_is_first_non_filter(to_replace_bs)) { 630009158f00SBenoît Canet error_setg(errp, "Only top most non filter can be replaced"); 63015a7e7a0bSStefan Hajnoczi to_replace_bs = NULL; 63025a7e7a0bSStefan Hajnoczi goto out; 630309158f00SBenoît Canet } 630409158f00SBenoît Canet 63055a7e7a0bSStefan Hajnoczi out: 63065a7e7a0bSStefan Hajnoczi aio_context_release(aio_context); 630709158f00SBenoît Canet return to_replace_bs; 630809158f00SBenoît Canet } 6309448ad91dSMing Lei 6310448ad91dSMing Lei void bdrv_io_plug(BlockDriverState *bs) 6311448ad91dSMing Lei { 6312448ad91dSMing Lei BlockDriver *drv = bs->drv; 6313448ad91dSMing Lei if (drv && drv->bdrv_io_plug) { 6314448ad91dSMing Lei drv->bdrv_io_plug(bs); 6315448ad91dSMing Lei } else if (bs->file) { 6316448ad91dSMing Lei bdrv_io_plug(bs->file); 6317448ad91dSMing Lei } 6318448ad91dSMing Lei } 6319448ad91dSMing Lei 6320448ad91dSMing Lei void bdrv_io_unplug(BlockDriverState *bs) 6321448ad91dSMing Lei { 6322448ad91dSMing Lei BlockDriver *drv = bs->drv; 6323448ad91dSMing Lei if (drv && drv->bdrv_io_unplug) { 6324448ad91dSMing Lei drv->bdrv_io_unplug(bs); 6325448ad91dSMing Lei } else if (bs->file) { 6326448ad91dSMing Lei bdrv_io_unplug(bs->file); 6327448ad91dSMing Lei } 6328448ad91dSMing Lei } 6329448ad91dSMing Lei 6330448ad91dSMing Lei void bdrv_flush_io_queue(BlockDriverState *bs) 6331448ad91dSMing Lei { 6332448ad91dSMing Lei BlockDriver *drv = bs->drv; 6333448ad91dSMing Lei if (drv && drv->bdrv_flush_io_queue) { 6334448ad91dSMing Lei drv->bdrv_flush_io_queue(bs); 6335448ad91dSMing Lei } else if (bs->file) { 6336448ad91dSMing Lei bdrv_flush_io_queue(bs->file); 6337448ad91dSMing Lei } 6338448ad91dSMing Lei } 633991af7014SMax Reitz 634091af7014SMax Reitz static bool append_open_options(QDict *d, BlockDriverState *bs) 634191af7014SMax Reitz { 634291af7014SMax Reitz const QDictEntry *entry; 634391af7014SMax Reitz bool found_any = false; 634491af7014SMax Reitz 634591af7014SMax Reitz for (entry = qdict_first(bs->options); entry; 634691af7014SMax Reitz entry = qdict_next(bs->options, entry)) 634791af7014SMax Reitz { 634891af7014SMax Reitz /* Only take options for this level and exclude all non-driver-specific 634991af7014SMax Reitz * options */ 635091af7014SMax Reitz if (!strchr(qdict_entry_key(entry), '.') && 635191af7014SMax Reitz strcmp(qdict_entry_key(entry), "node-name")) 635291af7014SMax Reitz { 635391af7014SMax Reitz qobject_incref(qdict_entry_value(entry)); 635491af7014SMax Reitz qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry)); 635591af7014SMax Reitz found_any = true; 635691af7014SMax Reitz } 635791af7014SMax Reitz } 635891af7014SMax Reitz 635991af7014SMax Reitz return found_any; 636091af7014SMax Reitz } 636191af7014SMax Reitz 636291af7014SMax Reitz /* Updates the following BDS fields: 636391af7014SMax Reitz * - exact_filename: A filename which may be used for opening a block device 636491af7014SMax Reitz * which (mostly) equals the given BDS (even without any 636591af7014SMax Reitz * other options; so reading and writing must return the same 636691af7014SMax Reitz * results, but caching etc. may be different) 636791af7014SMax Reitz * - full_open_options: Options which, when given when opening a block device 636891af7014SMax Reitz * (without a filename), result in a BDS (mostly) 636991af7014SMax Reitz * equalling the given one 637091af7014SMax Reitz * - filename: If exact_filename is set, it is copied here. Otherwise, 637191af7014SMax Reitz * full_open_options is converted to a JSON object, prefixed with 637291af7014SMax Reitz * "json:" (for use through the JSON pseudo protocol) and put here. 637391af7014SMax Reitz */ 637491af7014SMax Reitz void bdrv_refresh_filename(BlockDriverState *bs) 637591af7014SMax Reitz { 637691af7014SMax Reitz BlockDriver *drv = bs->drv; 637791af7014SMax Reitz QDict *opts; 637891af7014SMax Reitz 637991af7014SMax Reitz if (!drv) { 638091af7014SMax Reitz return; 638191af7014SMax Reitz } 638291af7014SMax Reitz 638391af7014SMax Reitz /* This BDS's file name will most probably depend on its file's name, so 638491af7014SMax Reitz * refresh that first */ 638591af7014SMax Reitz if (bs->file) { 638691af7014SMax Reitz bdrv_refresh_filename(bs->file); 638791af7014SMax Reitz } 638891af7014SMax Reitz 638991af7014SMax Reitz if (drv->bdrv_refresh_filename) { 639091af7014SMax Reitz /* Obsolete information is of no use here, so drop the old file name 639191af7014SMax Reitz * information before refreshing it */ 639291af7014SMax Reitz bs->exact_filename[0] = '\0'; 639391af7014SMax Reitz if (bs->full_open_options) { 639491af7014SMax Reitz QDECREF(bs->full_open_options); 639591af7014SMax Reitz bs->full_open_options = NULL; 639691af7014SMax Reitz } 639791af7014SMax Reitz 639891af7014SMax Reitz drv->bdrv_refresh_filename(bs); 639991af7014SMax Reitz } else if (bs->file) { 640091af7014SMax Reitz /* Try to reconstruct valid information from the underlying file */ 640191af7014SMax Reitz bool has_open_options; 640291af7014SMax Reitz 640391af7014SMax Reitz bs->exact_filename[0] = '\0'; 640491af7014SMax Reitz if (bs->full_open_options) { 640591af7014SMax Reitz QDECREF(bs->full_open_options); 640691af7014SMax Reitz bs->full_open_options = NULL; 640791af7014SMax Reitz } 640891af7014SMax Reitz 640991af7014SMax Reitz opts = qdict_new(); 641091af7014SMax Reitz has_open_options = append_open_options(opts, bs); 641191af7014SMax Reitz 641291af7014SMax Reitz /* If no specific options have been given for this BDS, the filename of 641391af7014SMax Reitz * the underlying file should suffice for this one as well */ 641491af7014SMax Reitz if (bs->file->exact_filename[0] && !has_open_options) { 641591af7014SMax Reitz strcpy(bs->exact_filename, bs->file->exact_filename); 641691af7014SMax Reitz } 641791af7014SMax Reitz /* Reconstructing the full options QDict is simple for most format block 641891af7014SMax Reitz * drivers, as long as the full options are known for the underlying 641991af7014SMax Reitz * file BDS. The full options QDict of that file BDS should somehow 642091af7014SMax Reitz * contain a representation of the filename, therefore the following 642191af7014SMax Reitz * suffices without querying the (exact_)filename of this BDS. */ 642291af7014SMax Reitz if (bs->file->full_open_options) { 642391af7014SMax Reitz qdict_put_obj(opts, "driver", 642491af7014SMax Reitz QOBJECT(qstring_from_str(drv->format_name))); 642591af7014SMax Reitz QINCREF(bs->file->full_open_options); 642691af7014SMax Reitz qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options)); 642791af7014SMax Reitz 642891af7014SMax Reitz bs->full_open_options = opts; 642991af7014SMax Reitz } else { 643091af7014SMax Reitz QDECREF(opts); 643191af7014SMax Reitz } 643291af7014SMax Reitz } else if (!bs->full_open_options && qdict_size(bs->options)) { 643391af7014SMax Reitz /* There is no underlying file BDS (at least referenced by BDS.file), 643491af7014SMax Reitz * so the full options QDict should be equal to the options given 643591af7014SMax Reitz * specifically for this block device when it was opened (plus the 643691af7014SMax Reitz * driver specification). 643791af7014SMax Reitz * Because those options don't change, there is no need to update 643891af7014SMax Reitz * full_open_options when it's already set. */ 643991af7014SMax Reitz 644091af7014SMax Reitz opts = qdict_new(); 644191af7014SMax Reitz append_open_options(opts, bs); 644291af7014SMax Reitz qdict_put_obj(opts, "driver", 644391af7014SMax Reitz QOBJECT(qstring_from_str(drv->format_name))); 644491af7014SMax Reitz 644591af7014SMax Reitz if (bs->exact_filename[0]) { 644691af7014SMax Reitz /* This may not work for all block protocol drivers (some may 644791af7014SMax Reitz * require this filename to be parsed), but we have to find some 644891af7014SMax Reitz * default solution here, so just include it. If some block driver 644991af7014SMax Reitz * does not support pure options without any filename at all or 645091af7014SMax Reitz * needs some special format of the options QDict, it needs to 645191af7014SMax Reitz * implement the driver-specific bdrv_refresh_filename() function. 645291af7014SMax Reitz */ 645391af7014SMax Reitz qdict_put_obj(opts, "filename", 645491af7014SMax Reitz QOBJECT(qstring_from_str(bs->exact_filename))); 645591af7014SMax Reitz } 645691af7014SMax Reitz 645791af7014SMax Reitz bs->full_open_options = opts; 645891af7014SMax Reitz } 645991af7014SMax Reitz 646091af7014SMax Reitz if (bs->exact_filename[0]) { 646191af7014SMax Reitz pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 646291af7014SMax Reitz } else if (bs->full_open_options) { 646391af7014SMax Reitz QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 646491af7014SMax Reitz snprintf(bs->filename, sizeof(bs->filename), "json:%s", 646591af7014SMax Reitz qstring_get_str(json)); 646691af7014SMax Reitz QDECREF(json); 646791af7014SMax Reitz } 646891af7014SMax Reitz } 64695366d0c8SBenoît Canet 64705366d0c8SBenoît Canet /* This accessor function purpose is to allow the device models to access the 64715366d0c8SBenoît Canet * BlockAcctStats structure embedded inside a BlockDriverState without being 64725366d0c8SBenoît Canet * aware of the BlockDriverState structure layout. 64735366d0c8SBenoît Canet * It will go away when the BlockAcctStats structure will be moved inside 64745366d0c8SBenoît Canet * the device models. 64755366d0c8SBenoît Canet */ 64765366d0c8SBenoît Canet BlockAcctStats *bdrv_get_stats(BlockDriverState *bs) 64775366d0c8SBenoît Canet { 64785366d0c8SBenoît Canet return &bs->stats; 64795366d0c8SBenoît Canet } 6480