1fc01f7e7Sbellard /* 2fc01f7e7Sbellard * QEMU System Emulator block driver 3fc01f7e7Sbellard * 4fc01f7e7Sbellard * Copyright (c) 2003 Fabrice Bellard 5fc01f7e7Sbellard * 6fc01f7e7Sbellard * Permission is hereby granted, free of charge, to any person obtaining a copy 7fc01f7e7Sbellard * of this software and associated documentation files (the "Software"), to deal 8fc01f7e7Sbellard * in the Software without restriction, including without limitation the rights 9fc01f7e7Sbellard * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10fc01f7e7Sbellard * copies of the Software, and to permit persons to whom the Software is 11fc01f7e7Sbellard * furnished to do so, subject to the following conditions: 12fc01f7e7Sbellard * 13fc01f7e7Sbellard * The above copyright notice and this permission notice shall be included in 14fc01f7e7Sbellard * all copies or substantial portions of the Software. 15fc01f7e7Sbellard * 16fc01f7e7Sbellard * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17fc01f7e7Sbellard * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18fc01f7e7Sbellard * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19fc01f7e7Sbellard * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20fc01f7e7Sbellard * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21fc01f7e7Sbellard * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22fc01f7e7Sbellard * THE SOFTWARE. 23fc01f7e7Sbellard */ 243990d09aSblueswir1 #include "config-host.h" 25faf07963Spbrook #include "qemu-common.h" 266d519a5fSStefan Hajnoczi #include "trace.h" 27737e150eSPaolo Bonzini #include "block/block_int.h" 28737e150eSPaolo Bonzini #include "block/blockjob.h" 291de7afc9SPaolo Bonzini #include "qemu/module.h" 307b1b5d19SPaolo Bonzini #include "qapi/qmp/qjson.h" 31bfb197e0SMarkus Armbruster #include "sysemu/block-backend.h" 329c17d615SPaolo Bonzini #include "sysemu/sysemu.h" 33de50a20aSFam Zheng #include "sysemu/qtest.h" 341de7afc9SPaolo Bonzini #include "qemu/notify.h" 35737e150eSPaolo Bonzini #include "block/coroutine.h" 36c13163fbSBenoît Canet #include "block/qapi.h" 37b2023818SLuiz Capitulino #include "qmp-commands.h" 381de7afc9SPaolo Bonzini #include "qemu/timer.h" 39a5ee7bd4SWenchao Xia #include "qapi-event.h" 40fc01f7e7Sbellard 4171e72a19SJuan Quintela #ifdef CONFIG_BSD 427674e7bfSbellard #include <sys/types.h> 437674e7bfSbellard #include <sys/stat.h> 447674e7bfSbellard #include <sys/ioctl.h> 4572cf2d4fSBlue Swirl #include <sys/queue.h> 46c5e97233Sblueswir1 #ifndef __DragonFly__ 477674e7bfSbellard #include <sys/disk.h> 487674e7bfSbellard #endif 49c5e97233Sblueswir1 #endif 507674e7bfSbellard 5149dc768dSaliguori #ifdef _WIN32 5249dc768dSaliguori #include <windows.h> 5349dc768dSaliguori #endif 5449dc768dSaliguori 55e4654d2dSFam Zheng struct BdrvDirtyBitmap { 56e4654d2dSFam Zheng HBitmap *bitmap; 570db6e54aSFam Zheng char *name; 58*b8e6fb75SJohn Snow bool disabled; 59e4654d2dSFam Zheng QLIST_ENTRY(BdrvDirtyBitmap) list; 60e4654d2dSFam Zheng }; 61e4654d2dSFam Zheng 621c9805a3SStefan Hajnoczi #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 631c9805a3SStefan Hajnoczi 647c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 65f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 66097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 677c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 68f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 69097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 70f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 71f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 72f9f05dc5SKevin Wolf QEMUIOVector *iov); 73f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 74f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 75f9f05dc5SKevin Wolf QEMUIOVector *iov); 76775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, 77775aa8b6SKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 78470c0504SStefan Hajnoczi BdrvRequestFlags flags); 79775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, 80775aa8b6SKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 81f08f2ddaSStefan Hajnoczi BdrvRequestFlags flags); 827c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 83b2a61371SStefan Hajnoczi int64_t sector_num, 84b2a61371SStefan Hajnoczi QEMUIOVector *qiov, 85b2a61371SStefan Hajnoczi int nb_sectors, 86d20d9b7cSPaolo Bonzini BdrvRequestFlags flags, 87097310b5SMarkus Armbruster BlockCompletionFunc *cb, 88b2a61371SStefan Hajnoczi void *opaque, 898c5873d6SStefan Hajnoczi bool is_write); 90b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque); 91621f0589SKevin Wolf static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, 92aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); 93ec530c81Sbellard 941b7bdbc1SStefan Hajnoczi static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 951b7bdbc1SStefan Hajnoczi QTAILQ_HEAD_INITIALIZER(bdrv_states); 967ee930d0Sblueswir1 97dc364f4cSBenoît Canet static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 98dc364f4cSBenoît Canet QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 99dc364f4cSBenoît Canet 1008a22f02aSStefan Hajnoczi static QLIST_HEAD(, BlockDriver) bdrv_drivers = 1018a22f02aSStefan Hajnoczi QLIST_HEAD_INITIALIZER(bdrv_drivers); 102ea2384d3Sbellard 103c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 104c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors); 105c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 106c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors); 107eb852011SMarkus Armbruster /* If non-zero, use only whitelisted block drivers */ 108eb852011SMarkus Armbruster static int use_bdrv_whitelist; 109eb852011SMarkus Armbruster 1109e0b22f4SStefan Hajnoczi #ifdef _WIN32 1119e0b22f4SStefan Hajnoczi static int is_windows_drive_prefix(const char *filename) 1129e0b22f4SStefan Hajnoczi { 1139e0b22f4SStefan Hajnoczi return (((filename[0] >= 'a' && filename[0] <= 'z') || 1149e0b22f4SStefan Hajnoczi (filename[0] >= 'A' && filename[0] <= 'Z')) && 1159e0b22f4SStefan Hajnoczi filename[1] == ':'); 1169e0b22f4SStefan Hajnoczi } 1179e0b22f4SStefan Hajnoczi 1189e0b22f4SStefan Hajnoczi int is_windows_drive(const char *filename) 1199e0b22f4SStefan Hajnoczi { 1209e0b22f4SStefan Hajnoczi if (is_windows_drive_prefix(filename) && 1219e0b22f4SStefan Hajnoczi filename[2] == '\0') 1229e0b22f4SStefan Hajnoczi return 1; 1239e0b22f4SStefan Hajnoczi if (strstart(filename, "\\\\.\\", NULL) || 1249e0b22f4SStefan Hajnoczi strstart(filename, "//./", NULL)) 1259e0b22f4SStefan Hajnoczi return 1; 1269e0b22f4SStefan Hajnoczi return 0; 1279e0b22f4SStefan Hajnoczi } 1289e0b22f4SStefan Hajnoczi #endif 1299e0b22f4SStefan Hajnoczi 1300563e191SZhi Yong Wu /* throttling disk I/O limits */ 131cc0681c4SBenoît Canet void bdrv_set_io_limits(BlockDriverState *bs, 132cc0681c4SBenoît Canet ThrottleConfig *cfg) 133cc0681c4SBenoît Canet { 134cc0681c4SBenoît Canet int i; 135cc0681c4SBenoît Canet 136cc0681c4SBenoît Canet throttle_config(&bs->throttle_state, cfg); 137cc0681c4SBenoît Canet 138cc0681c4SBenoît Canet for (i = 0; i < 2; i++) { 139cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[i]); 140cc0681c4SBenoît Canet } 141cc0681c4SBenoît Canet } 142cc0681c4SBenoît Canet 143cc0681c4SBenoît Canet /* this function drain all the throttled IOs */ 144cc0681c4SBenoît Canet static bool bdrv_start_throttled_reqs(BlockDriverState *bs) 145cc0681c4SBenoît Canet { 146cc0681c4SBenoît Canet bool drained = false; 147cc0681c4SBenoît Canet bool enabled = bs->io_limits_enabled; 148cc0681c4SBenoît Canet int i; 149cc0681c4SBenoît Canet 150cc0681c4SBenoît Canet bs->io_limits_enabled = false; 151cc0681c4SBenoît Canet 152cc0681c4SBenoît Canet for (i = 0; i < 2; i++) { 153cc0681c4SBenoît Canet while (qemu_co_enter_next(&bs->throttled_reqs[i])) { 154cc0681c4SBenoît Canet drained = true; 155cc0681c4SBenoît Canet } 156cc0681c4SBenoît Canet } 157cc0681c4SBenoît Canet 158cc0681c4SBenoît Canet bs->io_limits_enabled = enabled; 159cc0681c4SBenoît Canet 160cc0681c4SBenoît Canet return drained; 161cc0681c4SBenoît Canet } 162cc0681c4SBenoît Canet 16398f90dbaSZhi Yong Wu void bdrv_io_limits_disable(BlockDriverState *bs) 16498f90dbaSZhi Yong Wu { 16598f90dbaSZhi Yong Wu bs->io_limits_enabled = false; 16698f90dbaSZhi Yong Wu 167cc0681c4SBenoît Canet bdrv_start_throttled_reqs(bs); 16898f90dbaSZhi Yong Wu 169cc0681c4SBenoît Canet throttle_destroy(&bs->throttle_state); 17098f90dbaSZhi Yong Wu } 17198f90dbaSZhi Yong Wu 172cc0681c4SBenoît Canet static void bdrv_throttle_read_timer_cb(void *opaque) 1730563e191SZhi Yong Wu { 1740563e191SZhi Yong Wu BlockDriverState *bs = opaque; 175cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[0]); 1760563e191SZhi Yong Wu } 1770563e191SZhi Yong Wu 178cc0681c4SBenoît Canet static void bdrv_throttle_write_timer_cb(void *opaque) 179cc0681c4SBenoît Canet { 180cc0681c4SBenoît Canet BlockDriverState *bs = opaque; 181cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[1]); 182cc0681c4SBenoît Canet } 183cc0681c4SBenoît Canet 184cc0681c4SBenoît Canet /* should be called before bdrv_set_io_limits if a limit is set */ 1850563e191SZhi Yong Wu void bdrv_io_limits_enable(BlockDriverState *bs) 1860563e191SZhi Yong Wu { 187de50a20aSFam Zheng int clock_type = QEMU_CLOCK_REALTIME; 188de50a20aSFam Zheng 189de50a20aSFam Zheng if (qtest_enabled()) { 190de50a20aSFam Zheng /* For testing block IO throttling only */ 191de50a20aSFam Zheng clock_type = QEMU_CLOCK_VIRTUAL; 192de50a20aSFam Zheng } 193cc0681c4SBenoît Canet assert(!bs->io_limits_enabled); 194cc0681c4SBenoît Canet throttle_init(&bs->throttle_state, 19513af91ebSStefan Hajnoczi bdrv_get_aio_context(bs), 196de50a20aSFam Zheng clock_type, 197cc0681c4SBenoît Canet bdrv_throttle_read_timer_cb, 198cc0681c4SBenoît Canet bdrv_throttle_write_timer_cb, 199cc0681c4SBenoît Canet bs); 2000563e191SZhi Yong Wu bs->io_limits_enabled = true; 2010563e191SZhi Yong Wu } 2020563e191SZhi Yong Wu 203cc0681c4SBenoît Canet /* This function makes an IO wait if needed 204cc0681c4SBenoît Canet * 205cc0681c4SBenoît Canet * @nb_sectors: the number of sectors of the IO 206cc0681c4SBenoît Canet * @is_write: is the IO a write 20798f90dbaSZhi Yong Wu */ 208cc0681c4SBenoît Canet static void bdrv_io_limits_intercept(BlockDriverState *bs, 209d5103588SKevin Wolf unsigned int bytes, 210cc0681c4SBenoît Canet bool is_write) 211cc0681c4SBenoît Canet { 212cc0681c4SBenoît Canet /* does this io must wait */ 213cc0681c4SBenoît Canet bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); 21498f90dbaSZhi Yong Wu 215cc0681c4SBenoît Canet /* if must wait or any request of this type throttled queue the IO */ 216cc0681c4SBenoît Canet if (must_wait || 217cc0681c4SBenoît Canet !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { 218cc0681c4SBenoît Canet qemu_co_queue_wait(&bs->throttled_reqs[is_write]); 21998f90dbaSZhi Yong Wu } 22098f90dbaSZhi Yong Wu 221cc0681c4SBenoît Canet /* the IO will be executed, do the accounting */ 222d5103588SKevin Wolf throttle_account(&bs->throttle_state, is_write, bytes); 223d5103588SKevin Wolf 224cc0681c4SBenoît Canet 225cc0681c4SBenoît Canet /* if the next request must wait -> do nothing */ 226cc0681c4SBenoît Canet if (throttle_schedule_timer(&bs->throttle_state, is_write)) { 227cc0681c4SBenoît Canet return; 228cc0681c4SBenoît Canet } 229cc0681c4SBenoît Canet 230cc0681c4SBenoît Canet /* else queue next request for execution */ 231cc0681c4SBenoît Canet qemu_co_queue_next(&bs->throttled_reqs[is_write]); 23298f90dbaSZhi Yong Wu } 23398f90dbaSZhi Yong Wu 234339064d5SKevin Wolf size_t bdrv_opt_mem_align(BlockDriverState *bs) 235339064d5SKevin Wolf { 236339064d5SKevin Wolf if (!bs || !bs->drv) { 237339064d5SKevin Wolf /* 4k should be on the safe side */ 238339064d5SKevin Wolf return 4096; 239339064d5SKevin Wolf } 240339064d5SKevin Wolf 241339064d5SKevin Wolf return bs->bl.opt_mem_alignment; 242339064d5SKevin Wolf } 243339064d5SKevin Wolf 2449e0b22f4SStefan Hajnoczi /* check if the path starts with "<protocol>:" */ 2455c98415bSMax Reitz int path_has_protocol(const char *path) 2469e0b22f4SStefan Hajnoczi { 247947995c0SPaolo Bonzini const char *p; 248947995c0SPaolo Bonzini 2499e0b22f4SStefan Hajnoczi #ifdef _WIN32 2509e0b22f4SStefan Hajnoczi if (is_windows_drive(path) || 2519e0b22f4SStefan Hajnoczi is_windows_drive_prefix(path)) { 2529e0b22f4SStefan Hajnoczi return 0; 2539e0b22f4SStefan Hajnoczi } 254947995c0SPaolo Bonzini p = path + strcspn(path, ":/\\"); 255947995c0SPaolo Bonzini #else 256947995c0SPaolo Bonzini p = path + strcspn(path, ":/"); 2579e0b22f4SStefan Hajnoczi #endif 2589e0b22f4SStefan Hajnoczi 259947995c0SPaolo Bonzini return *p == ':'; 2609e0b22f4SStefan Hajnoczi } 2619e0b22f4SStefan Hajnoczi 26283f64091Sbellard int path_is_absolute(const char *path) 26383f64091Sbellard { 26421664424Sbellard #ifdef _WIN32 26521664424Sbellard /* specific case for names like: "\\.\d:" */ 266f53f4da9SPaolo Bonzini if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 26721664424Sbellard return 1; 268f53f4da9SPaolo Bonzini } 269f53f4da9SPaolo Bonzini return (*path == '/' || *path == '\\'); 2703b9f94e1Sbellard #else 271f53f4da9SPaolo Bonzini return (*path == '/'); 2723b9f94e1Sbellard #endif 27383f64091Sbellard } 27483f64091Sbellard 27583f64091Sbellard /* if filename is absolute, just copy it to dest. Otherwise, build a 27683f64091Sbellard path to it by considering it is relative to base_path. URL are 27783f64091Sbellard supported. */ 27883f64091Sbellard void path_combine(char *dest, int dest_size, 27983f64091Sbellard const char *base_path, 28083f64091Sbellard const char *filename) 28183f64091Sbellard { 28283f64091Sbellard const char *p, *p1; 28383f64091Sbellard int len; 28483f64091Sbellard 28583f64091Sbellard if (dest_size <= 0) 28683f64091Sbellard return; 28783f64091Sbellard if (path_is_absolute(filename)) { 28883f64091Sbellard pstrcpy(dest, dest_size, filename); 28983f64091Sbellard } else { 29083f64091Sbellard p = strchr(base_path, ':'); 29183f64091Sbellard if (p) 29283f64091Sbellard p++; 29383f64091Sbellard else 29483f64091Sbellard p = base_path; 2953b9f94e1Sbellard p1 = strrchr(base_path, '/'); 2963b9f94e1Sbellard #ifdef _WIN32 2973b9f94e1Sbellard { 2983b9f94e1Sbellard const char *p2; 2993b9f94e1Sbellard p2 = strrchr(base_path, '\\'); 3003b9f94e1Sbellard if (!p1 || p2 > p1) 3013b9f94e1Sbellard p1 = p2; 3023b9f94e1Sbellard } 3033b9f94e1Sbellard #endif 30483f64091Sbellard if (p1) 30583f64091Sbellard p1++; 30683f64091Sbellard else 30783f64091Sbellard p1 = base_path; 30883f64091Sbellard if (p1 > p) 30983f64091Sbellard p = p1; 31083f64091Sbellard len = p - base_path; 31183f64091Sbellard if (len > dest_size - 1) 31283f64091Sbellard len = dest_size - 1; 31383f64091Sbellard memcpy(dest, base_path, len); 31483f64091Sbellard dest[len] = '\0'; 31583f64091Sbellard pstrcat(dest, dest_size, filename); 31683f64091Sbellard } 31783f64091Sbellard } 31883f64091Sbellard 3190a82855aSMax Reitz void bdrv_get_full_backing_filename_from_filename(const char *backed, 3200a82855aSMax Reitz const char *backing, 3219f07429eSMax Reitz char *dest, size_t sz, 3229f07429eSMax Reitz Error **errp) 3230a82855aSMax Reitz { 3249f07429eSMax Reitz if (backing[0] == '\0' || path_has_protocol(backing) || 3259f07429eSMax Reitz path_is_absolute(backing)) 3269f07429eSMax Reitz { 3270a82855aSMax Reitz pstrcpy(dest, sz, backing); 3289f07429eSMax Reitz } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 3299f07429eSMax Reitz error_setg(errp, "Cannot use relative backing file names for '%s'", 3309f07429eSMax Reitz backed); 3310a82855aSMax Reitz } else { 3320a82855aSMax Reitz path_combine(dest, sz, backed, backing); 3330a82855aSMax Reitz } 3340a82855aSMax Reitz } 3350a82855aSMax Reitz 3369f07429eSMax Reitz void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, 3379f07429eSMax Reitz Error **errp) 338dc5a1371SPaolo Bonzini { 3399f07429eSMax Reitz char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; 3409f07429eSMax Reitz 3419f07429eSMax Reitz bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, 3429f07429eSMax Reitz dest, sz, errp); 343dc5a1371SPaolo Bonzini } 344dc5a1371SPaolo Bonzini 3455efa9d5aSAnthony Liguori void bdrv_register(BlockDriver *bdrv) 346ea2384d3Sbellard { 3478c5873d6SStefan Hajnoczi /* Block drivers without coroutine functions need emulation */ 3488c5873d6SStefan Hajnoczi if (!bdrv->bdrv_co_readv) { 349f9f05dc5SKevin Wolf bdrv->bdrv_co_readv = bdrv_co_readv_em; 350f9f05dc5SKevin Wolf bdrv->bdrv_co_writev = bdrv_co_writev_em; 351f9f05dc5SKevin Wolf 352f8c35c1dSStefan Hajnoczi /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if 353f8c35c1dSStefan Hajnoczi * the block driver lacks aio we need to emulate that too. 354f8c35c1dSStefan Hajnoczi */ 355f9f05dc5SKevin Wolf if (!bdrv->bdrv_aio_readv) { 35683f64091Sbellard /* add AIO emulation layer */ 357f141eafeSaliguori bdrv->bdrv_aio_readv = bdrv_aio_readv_em; 358f141eafeSaliguori bdrv->bdrv_aio_writev = bdrv_aio_writev_em; 35983f64091Sbellard } 360f9f05dc5SKevin Wolf } 361b2e12bc6SChristoph Hellwig 3628a22f02aSStefan Hajnoczi QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 363ea2384d3Sbellard } 364b338082bSbellard 3657f06d47eSMarkus Armbruster BlockDriverState *bdrv_new_root(void) 366fc01f7e7Sbellard { 3677f06d47eSMarkus Armbruster BlockDriverState *bs = bdrv_new(); 368e4e9986bSMarkus Armbruster 369e4e9986bSMarkus Armbruster QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); 370e4e9986bSMarkus Armbruster return bs; 371e4e9986bSMarkus Armbruster } 372e4e9986bSMarkus Armbruster 373e4e9986bSMarkus Armbruster BlockDriverState *bdrv_new(void) 374e4e9986bSMarkus Armbruster { 375e4e9986bSMarkus Armbruster BlockDriverState *bs; 376e4e9986bSMarkus Armbruster int i; 377e4e9986bSMarkus Armbruster 3785839e53bSMarkus Armbruster bs = g_new0(BlockDriverState, 1); 379e4654d2dSFam Zheng QLIST_INIT(&bs->dirty_bitmaps); 380fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 381fbe40ff7SFam Zheng QLIST_INIT(&bs->op_blockers[i]); 382fbe40ff7SFam Zheng } 38328a7282aSLuiz Capitulino bdrv_iostatus_disable(bs); 384d7d512f6SPaolo Bonzini notifier_list_init(&bs->close_notifiers); 385d616b224SStefan Hajnoczi notifier_with_return_list_init(&bs->before_write_notifiers); 386cc0681c4SBenoît Canet qemu_co_queue_init(&bs->throttled_reqs[0]); 387cc0681c4SBenoît Canet qemu_co_queue_init(&bs->throttled_reqs[1]); 3889fcb0251SFam Zheng bs->refcnt = 1; 389dcd04228SStefan Hajnoczi bs->aio_context = qemu_get_aio_context(); 390d7d512f6SPaolo Bonzini 391b338082bSbellard return bs; 392b338082bSbellard } 393b338082bSbellard 394d7d512f6SPaolo Bonzini void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify) 395d7d512f6SPaolo Bonzini { 396d7d512f6SPaolo Bonzini notifier_list_add(&bs->close_notifiers, notify); 397d7d512f6SPaolo Bonzini } 398d7d512f6SPaolo Bonzini 399ea2384d3Sbellard BlockDriver *bdrv_find_format(const char *format_name) 400ea2384d3Sbellard { 401ea2384d3Sbellard BlockDriver *drv1; 4028a22f02aSStefan Hajnoczi QLIST_FOREACH(drv1, &bdrv_drivers, list) { 4038a22f02aSStefan Hajnoczi if (!strcmp(drv1->format_name, format_name)) { 404ea2384d3Sbellard return drv1; 405ea2384d3Sbellard } 4068a22f02aSStefan Hajnoczi } 407ea2384d3Sbellard return NULL; 408ea2384d3Sbellard } 409ea2384d3Sbellard 410b64ec4e4SFam Zheng static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 411eb852011SMarkus Armbruster { 412b64ec4e4SFam Zheng static const char *whitelist_rw[] = { 413b64ec4e4SFam Zheng CONFIG_BDRV_RW_WHITELIST 414b64ec4e4SFam Zheng }; 415b64ec4e4SFam Zheng static const char *whitelist_ro[] = { 416b64ec4e4SFam Zheng CONFIG_BDRV_RO_WHITELIST 417eb852011SMarkus Armbruster }; 418eb852011SMarkus Armbruster const char **p; 419eb852011SMarkus Armbruster 420b64ec4e4SFam Zheng if (!whitelist_rw[0] && !whitelist_ro[0]) { 421eb852011SMarkus Armbruster return 1; /* no whitelist, anything goes */ 422b64ec4e4SFam Zheng } 423eb852011SMarkus Armbruster 424b64ec4e4SFam Zheng for (p = whitelist_rw; *p; p++) { 425eb852011SMarkus Armbruster if (!strcmp(drv->format_name, *p)) { 426eb852011SMarkus Armbruster return 1; 427eb852011SMarkus Armbruster } 428eb852011SMarkus Armbruster } 429b64ec4e4SFam Zheng if (read_only) { 430b64ec4e4SFam Zheng for (p = whitelist_ro; *p; p++) { 431b64ec4e4SFam Zheng if (!strcmp(drv->format_name, *p)) { 432b64ec4e4SFam Zheng return 1; 433b64ec4e4SFam Zheng } 434b64ec4e4SFam Zheng } 435b64ec4e4SFam Zheng } 436eb852011SMarkus Armbruster return 0; 437eb852011SMarkus Armbruster } 438eb852011SMarkus Armbruster 439b64ec4e4SFam Zheng BlockDriver *bdrv_find_whitelisted_format(const char *format_name, 440b64ec4e4SFam Zheng bool read_only) 441eb852011SMarkus Armbruster { 442eb852011SMarkus Armbruster BlockDriver *drv = bdrv_find_format(format_name); 443b64ec4e4SFam Zheng return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL; 444eb852011SMarkus Armbruster } 445eb852011SMarkus Armbruster 4465b7e1542SZhi Yong Wu typedef struct CreateCo { 4475b7e1542SZhi Yong Wu BlockDriver *drv; 4485b7e1542SZhi Yong Wu char *filename; 44983d0521aSChunyan Liu QemuOpts *opts; 4505b7e1542SZhi Yong Wu int ret; 451cc84d90fSMax Reitz Error *err; 4525b7e1542SZhi Yong Wu } CreateCo; 4535b7e1542SZhi Yong Wu 4545b7e1542SZhi Yong Wu static void coroutine_fn bdrv_create_co_entry(void *opaque) 4555b7e1542SZhi Yong Wu { 456cc84d90fSMax Reitz Error *local_err = NULL; 457cc84d90fSMax Reitz int ret; 458cc84d90fSMax Reitz 4595b7e1542SZhi Yong Wu CreateCo *cco = opaque; 4605b7e1542SZhi Yong Wu assert(cco->drv); 4615b7e1542SZhi Yong Wu 462c282e1fdSChunyan Liu ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); 46384d18f06SMarkus Armbruster if (local_err) { 464cc84d90fSMax Reitz error_propagate(&cco->err, local_err); 465cc84d90fSMax Reitz } 466cc84d90fSMax Reitz cco->ret = ret; 4675b7e1542SZhi Yong Wu } 4685b7e1542SZhi Yong Wu 4690e7e1989SKevin Wolf int bdrv_create(BlockDriver *drv, const char* filename, 47083d0521aSChunyan Liu QemuOpts *opts, Error **errp) 471ea2384d3Sbellard { 4725b7e1542SZhi Yong Wu int ret; 4730e7e1989SKevin Wolf 4745b7e1542SZhi Yong Wu Coroutine *co; 4755b7e1542SZhi Yong Wu CreateCo cco = { 4765b7e1542SZhi Yong Wu .drv = drv, 4775b7e1542SZhi Yong Wu .filename = g_strdup(filename), 47883d0521aSChunyan Liu .opts = opts, 4795b7e1542SZhi Yong Wu .ret = NOT_DONE, 480cc84d90fSMax Reitz .err = NULL, 4815b7e1542SZhi Yong Wu }; 4825b7e1542SZhi Yong Wu 483c282e1fdSChunyan Liu if (!drv->bdrv_create) { 484cc84d90fSMax Reitz error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 48580168bffSLuiz Capitulino ret = -ENOTSUP; 48680168bffSLuiz Capitulino goto out; 4875b7e1542SZhi Yong Wu } 4885b7e1542SZhi Yong Wu 4895b7e1542SZhi Yong Wu if (qemu_in_coroutine()) { 4905b7e1542SZhi Yong Wu /* Fast-path if already in coroutine context */ 4915b7e1542SZhi Yong Wu bdrv_create_co_entry(&cco); 4925b7e1542SZhi Yong Wu } else { 4935b7e1542SZhi Yong Wu co = qemu_coroutine_create(bdrv_create_co_entry); 4945b7e1542SZhi Yong Wu qemu_coroutine_enter(co, &cco); 4955b7e1542SZhi Yong Wu while (cco.ret == NOT_DONE) { 496b47ec2c4SPaolo Bonzini aio_poll(qemu_get_aio_context(), true); 4975b7e1542SZhi Yong Wu } 4985b7e1542SZhi Yong Wu } 4995b7e1542SZhi Yong Wu 5005b7e1542SZhi Yong Wu ret = cco.ret; 501cc84d90fSMax Reitz if (ret < 0) { 50284d18f06SMarkus Armbruster if (cco.err) { 503cc84d90fSMax Reitz error_propagate(errp, cco.err); 504cc84d90fSMax Reitz } else { 505cc84d90fSMax Reitz error_setg_errno(errp, -ret, "Could not create image"); 506cc84d90fSMax Reitz } 507cc84d90fSMax Reitz } 5085b7e1542SZhi Yong Wu 50980168bffSLuiz Capitulino out: 51080168bffSLuiz Capitulino g_free(cco.filename); 5115b7e1542SZhi Yong Wu return ret; 512ea2384d3Sbellard } 513ea2384d3Sbellard 514c282e1fdSChunyan Liu int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 51584a12e66SChristoph Hellwig { 51684a12e66SChristoph Hellwig BlockDriver *drv; 517cc84d90fSMax Reitz Error *local_err = NULL; 518cc84d90fSMax Reitz int ret; 51984a12e66SChristoph Hellwig 520b65a5e12SMax Reitz drv = bdrv_find_protocol(filename, true, errp); 52184a12e66SChristoph Hellwig if (drv == NULL) { 52216905d71SStefan Hajnoczi return -ENOENT; 52384a12e66SChristoph Hellwig } 52484a12e66SChristoph Hellwig 525c282e1fdSChunyan Liu ret = bdrv_create(drv, filename, opts, &local_err); 52684d18f06SMarkus Armbruster if (local_err) { 527cc84d90fSMax Reitz error_propagate(errp, local_err); 528cc84d90fSMax Reitz } 529cc84d90fSMax Reitz return ret; 53084a12e66SChristoph Hellwig } 53184a12e66SChristoph Hellwig 5323baca891SKevin Wolf void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) 533d34682cdSKevin Wolf { 534d34682cdSKevin Wolf BlockDriver *drv = bs->drv; 5353baca891SKevin Wolf Error *local_err = NULL; 536d34682cdSKevin Wolf 537d34682cdSKevin Wolf memset(&bs->bl, 0, sizeof(bs->bl)); 538d34682cdSKevin Wolf 539466ad822SKevin Wolf if (!drv) { 5403baca891SKevin Wolf return; 541466ad822SKevin Wolf } 542466ad822SKevin Wolf 543466ad822SKevin Wolf /* Take some limits from the children as a default */ 544466ad822SKevin Wolf if (bs->file) { 5453baca891SKevin Wolf bdrv_refresh_limits(bs->file, &local_err); 5463baca891SKevin Wolf if (local_err) { 5473baca891SKevin Wolf error_propagate(errp, local_err); 5483baca891SKevin Wolf return; 5493baca891SKevin Wolf } 550466ad822SKevin Wolf bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; 5512647fab5SPeter Lieven bs->bl.max_transfer_length = bs->file->bl.max_transfer_length; 552339064d5SKevin Wolf bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; 553339064d5SKevin Wolf } else { 554339064d5SKevin Wolf bs->bl.opt_mem_alignment = 512; 555466ad822SKevin Wolf } 556466ad822SKevin Wolf 557466ad822SKevin Wolf if (bs->backing_hd) { 5583baca891SKevin Wolf bdrv_refresh_limits(bs->backing_hd, &local_err); 5593baca891SKevin Wolf if (local_err) { 5603baca891SKevin Wolf error_propagate(errp, local_err); 5613baca891SKevin Wolf return; 5623baca891SKevin Wolf } 563466ad822SKevin Wolf bs->bl.opt_transfer_length = 564466ad822SKevin Wolf MAX(bs->bl.opt_transfer_length, 565466ad822SKevin Wolf bs->backing_hd->bl.opt_transfer_length); 5662647fab5SPeter Lieven bs->bl.max_transfer_length = 5672647fab5SPeter Lieven MIN_NON_ZERO(bs->bl.max_transfer_length, 5682647fab5SPeter Lieven bs->backing_hd->bl.max_transfer_length); 569339064d5SKevin Wolf bs->bl.opt_mem_alignment = 570339064d5SKevin Wolf MAX(bs->bl.opt_mem_alignment, 571339064d5SKevin Wolf bs->backing_hd->bl.opt_mem_alignment); 572466ad822SKevin Wolf } 573466ad822SKevin Wolf 574466ad822SKevin Wolf /* Then let the driver override it */ 575466ad822SKevin Wolf if (drv->bdrv_refresh_limits) { 5763baca891SKevin Wolf drv->bdrv_refresh_limits(bs, errp); 577d34682cdSKevin Wolf } 578d34682cdSKevin Wolf } 579d34682cdSKevin Wolf 580892b7de8SEkaterina Tumanova /** 581892b7de8SEkaterina Tumanova * Try to get @bs's logical and physical block size. 582892b7de8SEkaterina Tumanova * On success, store them in @bsz struct and return 0. 583892b7de8SEkaterina Tumanova * On failure return -errno. 584892b7de8SEkaterina Tumanova * @bs must not be empty. 585892b7de8SEkaterina Tumanova */ 586892b7de8SEkaterina Tumanova int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 587892b7de8SEkaterina Tumanova { 588892b7de8SEkaterina Tumanova BlockDriver *drv = bs->drv; 589892b7de8SEkaterina Tumanova 590892b7de8SEkaterina Tumanova if (drv && drv->bdrv_probe_blocksizes) { 591892b7de8SEkaterina Tumanova return drv->bdrv_probe_blocksizes(bs, bsz); 592892b7de8SEkaterina Tumanova } 593892b7de8SEkaterina Tumanova 594892b7de8SEkaterina Tumanova return -ENOTSUP; 595892b7de8SEkaterina Tumanova } 596892b7de8SEkaterina Tumanova 597892b7de8SEkaterina Tumanova /** 598892b7de8SEkaterina Tumanova * Try to get @bs's geometry (cyls, heads, sectors). 599892b7de8SEkaterina Tumanova * On success, store them in @geo struct and return 0. 600892b7de8SEkaterina Tumanova * On failure return -errno. 601892b7de8SEkaterina Tumanova * @bs must not be empty. 602892b7de8SEkaterina Tumanova */ 603892b7de8SEkaterina Tumanova int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 604892b7de8SEkaterina Tumanova { 605892b7de8SEkaterina Tumanova BlockDriver *drv = bs->drv; 606892b7de8SEkaterina Tumanova 607892b7de8SEkaterina Tumanova if (drv && drv->bdrv_probe_geometry) { 608892b7de8SEkaterina Tumanova return drv->bdrv_probe_geometry(bs, geo); 609892b7de8SEkaterina Tumanova } 610892b7de8SEkaterina Tumanova 611892b7de8SEkaterina Tumanova return -ENOTSUP; 612892b7de8SEkaterina Tumanova } 613892b7de8SEkaterina Tumanova 614eba25057SJim Meyering /* 615eba25057SJim Meyering * Create a uniquely-named empty temporary file. 616eba25057SJim Meyering * Return 0 upon success, otherwise a negative errno value. 617eba25057SJim Meyering */ 618eba25057SJim Meyering int get_tmp_filename(char *filename, int size) 619eba25057SJim Meyering { 620d5249393Sbellard #ifdef _WIN32 6213b9f94e1Sbellard char temp_dir[MAX_PATH]; 622eba25057SJim Meyering /* GetTempFileName requires that its output buffer (4th param) 623eba25057SJim Meyering have length MAX_PATH or greater. */ 624eba25057SJim Meyering assert(size >= MAX_PATH); 625eba25057SJim Meyering return (GetTempPath(MAX_PATH, temp_dir) 626eba25057SJim Meyering && GetTempFileName(temp_dir, "qem", 0, filename) 627eba25057SJim Meyering ? 0 : -GetLastError()); 628d5249393Sbellard #else 629ea2384d3Sbellard int fd; 6307ccfb2ebSblueswir1 const char *tmpdir; 6310badc1eeSaurel32 tmpdir = getenv("TMPDIR"); 63269bef793SAmit Shah if (!tmpdir) { 63369bef793SAmit Shah tmpdir = "/var/tmp"; 63469bef793SAmit Shah } 635eba25057SJim Meyering if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 636eba25057SJim Meyering return -EOVERFLOW; 637ea2384d3Sbellard } 638eba25057SJim Meyering fd = mkstemp(filename); 639fe235a06SDunrong Huang if (fd < 0) { 640fe235a06SDunrong Huang return -errno; 641fe235a06SDunrong Huang } 642fe235a06SDunrong Huang if (close(fd) != 0) { 643fe235a06SDunrong Huang unlink(filename); 644eba25057SJim Meyering return -errno; 645eba25057SJim Meyering } 646eba25057SJim Meyering return 0; 647d5249393Sbellard #endif 648eba25057SJim Meyering } 649ea2384d3Sbellard 650f3a5d3f8SChristoph Hellwig /* 651f3a5d3f8SChristoph Hellwig * Detect host devices. By convention, /dev/cdrom[N] is always 652f3a5d3f8SChristoph Hellwig * recognized as a host CDROM. 653f3a5d3f8SChristoph Hellwig */ 654f3a5d3f8SChristoph Hellwig static BlockDriver *find_hdev_driver(const char *filename) 655f3a5d3f8SChristoph Hellwig { 656508c7cb3SChristoph Hellwig int score_max = 0, score; 657508c7cb3SChristoph Hellwig BlockDriver *drv = NULL, *d; 658f3a5d3f8SChristoph Hellwig 6598a22f02aSStefan Hajnoczi QLIST_FOREACH(d, &bdrv_drivers, list) { 660508c7cb3SChristoph Hellwig if (d->bdrv_probe_device) { 661508c7cb3SChristoph Hellwig score = d->bdrv_probe_device(filename); 662508c7cb3SChristoph Hellwig if (score > score_max) { 663508c7cb3SChristoph Hellwig score_max = score; 664508c7cb3SChristoph Hellwig drv = d; 665f3a5d3f8SChristoph Hellwig } 666508c7cb3SChristoph Hellwig } 667f3a5d3f8SChristoph Hellwig } 668f3a5d3f8SChristoph Hellwig 669508c7cb3SChristoph Hellwig return drv; 670f3a5d3f8SChristoph Hellwig } 671f3a5d3f8SChristoph Hellwig 67298289620SKevin Wolf BlockDriver *bdrv_find_protocol(const char *filename, 673b65a5e12SMax Reitz bool allow_protocol_prefix, 674b65a5e12SMax Reitz Error **errp) 67584a12e66SChristoph Hellwig { 67684a12e66SChristoph Hellwig BlockDriver *drv1; 67784a12e66SChristoph Hellwig char protocol[128]; 67884a12e66SChristoph Hellwig int len; 67984a12e66SChristoph Hellwig const char *p; 68084a12e66SChristoph Hellwig 68166f82ceeSKevin Wolf /* TODO Drivers without bdrv_file_open must be specified explicitly */ 68266f82ceeSKevin Wolf 68339508e7aSChristoph Hellwig /* 68439508e7aSChristoph Hellwig * XXX(hch): we really should not let host device detection 68539508e7aSChristoph Hellwig * override an explicit protocol specification, but moving this 68639508e7aSChristoph Hellwig * later breaks access to device names with colons in them. 68739508e7aSChristoph Hellwig * Thanks to the brain-dead persistent naming schemes on udev- 68839508e7aSChristoph Hellwig * based Linux systems those actually are quite common. 68939508e7aSChristoph Hellwig */ 69084a12e66SChristoph Hellwig drv1 = find_hdev_driver(filename); 69139508e7aSChristoph Hellwig if (drv1) { 69284a12e66SChristoph Hellwig return drv1; 69384a12e66SChristoph Hellwig } 69439508e7aSChristoph Hellwig 69598289620SKevin Wolf if (!path_has_protocol(filename) || !allow_protocol_prefix) { 696ef810437SMax Reitz return &bdrv_file; 69739508e7aSChristoph Hellwig } 69898289620SKevin Wolf 6999e0b22f4SStefan Hajnoczi p = strchr(filename, ':'); 7009e0b22f4SStefan Hajnoczi assert(p != NULL); 70184a12e66SChristoph Hellwig len = p - filename; 70284a12e66SChristoph Hellwig if (len > sizeof(protocol) - 1) 70384a12e66SChristoph Hellwig len = sizeof(protocol) - 1; 70484a12e66SChristoph Hellwig memcpy(protocol, filename, len); 70584a12e66SChristoph Hellwig protocol[len] = '\0'; 70684a12e66SChristoph Hellwig QLIST_FOREACH(drv1, &bdrv_drivers, list) { 70784a12e66SChristoph Hellwig if (drv1->protocol_name && 70884a12e66SChristoph Hellwig !strcmp(drv1->protocol_name, protocol)) { 70984a12e66SChristoph Hellwig return drv1; 71084a12e66SChristoph Hellwig } 71184a12e66SChristoph Hellwig } 712b65a5e12SMax Reitz 713b65a5e12SMax Reitz error_setg(errp, "Unknown protocol '%s'", protocol); 71484a12e66SChristoph Hellwig return NULL; 71584a12e66SChristoph Hellwig } 71684a12e66SChristoph Hellwig 717c6684249SMarkus Armbruster /* 718c6684249SMarkus Armbruster * Guess image format by probing its contents. 719c6684249SMarkus Armbruster * This is not a good idea when your image is raw (CVE-2008-2004), but 720c6684249SMarkus Armbruster * we do it anyway for backward compatibility. 721c6684249SMarkus Armbruster * 722c6684249SMarkus Armbruster * @buf contains the image's first @buf_size bytes. 7237cddd372SKevin Wolf * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 7247cddd372SKevin Wolf * but can be smaller if the image file is smaller) 725c6684249SMarkus Armbruster * @filename is its filename. 726c6684249SMarkus Armbruster * 727c6684249SMarkus Armbruster * For all block drivers, call the bdrv_probe() method to get its 728c6684249SMarkus Armbruster * probing score. 729c6684249SMarkus Armbruster * Return the first block driver with the highest probing score. 730c6684249SMarkus Armbruster */ 73138f3ef57SKevin Wolf BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 732c6684249SMarkus Armbruster const char *filename) 733c6684249SMarkus Armbruster { 734c6684249SMarkus Armbruster int score_max = 0, score; 735c6684249SMarkus Armbruster BlockDriver *drv = NULL, *d; 736c6684249SMarkus Armbruster 737c6684249SMarkus Armbruster QLIST_FOREACH(d, &bdrv_drivers, list) { 738c6684249SMarkus Armbruster if (d->bdrv_probe) { 739c6684249SMarkus Armbruster score = d->bdrv_probe(buf, buf_size, filename); 740c6684249SMarkus Armbruster if (score > score_max) { 741c6684249SMarkus Armbruster score_max = score; 742c6684249SMarkus Armbruster drv = d; 743c6684249SMarkus Armbruster } 744c6684249SMarkus Armbruster } 745c6684249SMarkus Armbruster } 746c6684249SMarkus Armbruster 747c6684249SMarkus Armbruster return drv; 748c6684249SMarkus Armbruster } 749c6684249SMarkus Armbruster 750f500a6d3SKevin Wolf static int find_image_format(BlockDriverState *bs, const char *filename, 75134b5d2c6SMax Reitz BlockDriver **pdrv, Error **errp) 752ea2384d3Sbellard { 753c6684249SMarkus Armbruster BlockDriver *drv; 7547cddd372SKevin Wolf uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 755f500a6d3SKevin Wolf int ret = 0; 756f8ea0b00SNicholas Bellinger 75708a00559SKevin Wolf /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 7588e895599SPaolo Bonzini if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { 759ef810437SMax Reitz *pdrv = &bdrv_raw; 760c98ac35dSStefan Weil return ret; 7611a396859SNicholas A. Bellinger } 762f8ea0b00SNicholas Bellinger 76383f64091Sbellard ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 764ea2384d3Sbellard if (ret < 0) { 76534b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not read image for determining its " 76634b5d2c6SMax Reitz "format"); 767c98ac35dSStefan Weil *pdrv = NULL; 768c98ac35dSStefan Weil return ret; 769ea2384d3Sbellard } 770ea2384d3Sbellard 771c6684249SMarkus Armbruster drv = bdrv_probe_all(buf, ret, filename); 772c98ac35dSStefan Weil if (!drv) { 77334b5d2c6SMax Reitz error_setg(errp, "Could not determine image format: No compatible " 77434b5d2c6SMax Reitz "driver found"); 775c98ac35dSStefan Weil ret = -ENOENT; 776c98ac35dSStefan Weil } 777c98ac35dSStefan Weil *pdrv = drv; 778c98ac35dSStefan Weil return ret; 779ea2384d3Sbellard } 780ea2384d3Sbellard 78151762288SStefan Hajnoczi /** 78251762288SStefan Hajnoczi * Set the current 'total_sectors' value 78365a9bb25SMarkus Armbruster * Return 0 on success, -errno on error. 78451762288SStefan Hajnoczi */ 78551762288SStefan Hajnoczi static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 78651762288SStefan Hajnoczi { 78751762288SStefan Hajnoczi BlockDriver *drv = bs->drv; 78851762288SStefan Hajnoczi 789396759adSNicholas Bellinger /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 790396759adSNicholas Bellinger if (bs->sg) 791396759adSNicholas Bellinger return 0; 792396759adSNicholas Bellinger 79351762288SStefan Hajnoczi /* query actual device if possible, otherwise just trust the hint */ 79451762288SStefan Hajnoczi if (drv->bdrv_getlength) { 79551762288SStefan Hajnoczi int64_t length = drv->bdrv_getlength(bs); 79651762288SStefan Hajnoczi if (length < 0) { 79751762288SStefan Hajnoczi return length; 79851762288SStefan Hajnoczi } 7997e382003SFam Zheng hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 80051762288SStefan Hajnoczi } 80151762288SStefan Hajnoczi 80251762288SStefan Hajnoczi bs->total_sectors = hint; 80351762288SStefan Hajnoczi return 0; 80451762288SStefan Hajnoczi } 80551762288SStefan Hajnoczi 806c3993cdcSStefan Hajnoczi /** 8079e8f1835SPaolo Bonzini * Set open flags for a given discard mode 8089e8f1835SPaolo Bonzini * 8099e8f1835SPaolo Bonzini * Return 0 on success, -1 if the discard mode was invalid. 8109e8f1835SPaolo Bonzini */ 8119e8f1835SPaolo Bonzini int bdrv_parse_discard_flags(const char *mode, int *flags) 8129e8f1835SPaolo Bonzini { 8139e8f1835SPaolo Bonzini *flags &= ~BDRV_O_UNMAP; 8149e8f1835SPaolo Bonzini 8159e8f1835SPaolo Bonzini if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 8169e8f1835SPaolo Bonzini /* do nothing */ 8179e8f1835SPaolo Bonzini } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 8189e8f1835SPaolo Bonzini *flags |= BDRV_O_UNMAP; 8199e8f1835SPaolo Bonzini } else { 8209e8f1835SPaolo Bonzini return -1; 8219e8f1835SPaolo Bonzini } 8229e8f1835SPaolo Bonzini 8239e8f1835SPaolo Bonzini return 0; 8249e8f1835SPaolo Bonzini } 8259e8f1835SPaolo Bonzini 8269e8f1835SPaolo Bonzini /** 827c3993cdcSStefan Hajnoczi * Set open flags for a given cache mode 828c3993cdcSStefan Hajnoczi * 829c3993cdcSStefan Hajnoczi * Return 0 on success, -1 if the cache mode was invalid. 830c3993cdcSStefan Hajnoczi */ 831c3993cdcSStefan Hajnoczi int bdrv_parse_cache_flags(const char *mode, int *flags) 832c3993cdcSStefan Hajnoczi { 833c3993cdcSStefan Hajnoczi *flags &= ~BDRV_O_CACHE_MASK; 834c3993cdcSStefan Hajnoczi 835c3993cdcSStefan Hajnoczi if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 836c3993cdcSStefan Hajnoczi *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 83792196b2fSStefan Hajnoczi } else if (!strcmp(mode, "directsync")) { 83892196b2fSStefan Hajnoczi *flags |= BDRV_O_NOCACHE; 839c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "writeback")) { 840c3993cdcSStefan Hajnoczi *flags |= BDRV_O_CACHE_WB; 841c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "unsafe")) { 842c3993cdcSStefan Hajnoczi *flags |= BDRV_O_CACHE_WB; 843c3993cdcSStefan Hajnoczi *flags |= BDRV_O_NO_FLUSH; 844c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "writethrough")) { 845c3993cdcSStefan Hajnoczi /* this is the default */ 846c3993cdcSStefan Hajnoczi } else { 847c3993cdcSStefan Hajnoczi return -1; 848c3993cdcSStefan Hajnoczi } 849c3993cdcSStefan Hajnoczi 850c3993cdcSStefan Hajnoczi return 0; 851c3993cdcSStefan Hajnoczi } 852c3993cdcSStefan Hajnoczi 85353fec9d3SStefan Hajnoczi /** 85453fec9d3SStefan Hajnoczi * The copy-on-read flag is actually a reference count so multiple users may 85553fec9d3SStefan Hajnoczi * use the feature without worrying about clobbering its previous state. 85653fec9d3SStefan Hajnoczi * Copy-on-read stays enabled until all users have called to disable it. 85753fec9d3SStefan Hajnoczi */ 85853fec9d3SStefan Hajnoczi void bdrv_enable_copy_on_read(BlockDriverState *bs) 85953fec9d3SStefan Hajnoczi { 86053fec9d3SStefan Hajnoczi bs->copy_on_read++; 86153fec9d3SStefan Hajnoczi } 86253fec9d3SStefan Hajnoczi 86353fec9d3SStefan Hajnoczi void bdrv_disable_copy_on_read(BlockDriverState *bs) 86453fec9d3SStefan Hajnoczi { 86553fec9d3SStefan Hajnoczi assert(bs->copy_on_read > 0); 86653fec9d3SStefan Hajnoczi bs->copy_on_read--; 86753fec9d3SStefan Hajnoczi } 86853fec9d3SStefan Hajnoczi 8690b50cc88SKevin Wolf /* 870b1e6fc08SKevin Wolf * Returns the flags that a temporary snapshot should get, based on the 871b1e6fc08SKevin Wolf * originally requested flags (the originally requested image will have flags 872b1e6fc08SKevin Wolf * like a backing file) 873b1e6fc08SKevin Wolf */ 874b1e6fc08SKevin Wolf static int bdrv_temp_snapshot_flags(int flags) 875b1e6fc08SKevin Wolf { 876b1e6fc08SKevin Wolf return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 877b1e6fc08SKevin Wolf } 878b1e6fc08SKevin Wolf 879b1e6fc08SKevin Wolf /* 8800b50cc88SKevin Wolf * Returns the flags that bs->file should get, based on the given flags for 8810b50cc88SKevin Wolf * the parent BDS 8820b50cc88SKevin Wolf */ 8830b50cc88SKevin Wolf static int bdrv_inherited_flags(int flags) 8840b50cc88SKevin Wolf { 8850b50cc88SKevin Wolf /* Enable protocol handling, disable format probing for bs->file */ 8860b50cc88SKevin Wolf flags |= BDRV_O_PROTOCOL; 8870b50cc88SKevin Wolf 8880b50cc88SKevin Wolf /* Our block drivers take care to send flushes and respect unmap policy, 8890b50cc88SKevin Wolf * so we can enable both unconditionally on lower layers. */ 8900b50cc88SKevin Wolf flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP; 8910b50cc88SKevin Wolf 8920b50cc88SKevin Wolf /* Clear flags that only apply to the top layer */ 8935669b44dSKevin Wolf flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 8940b50cc88SKevin Wolf 8950b50cc88SKevin Wolf return flags; 8960b50cc88SKevin Wolf } 8970b50cc88SKevin Wolf 898317fc44eSKevin Wolf /* 899317fc44eSKevin Wolf * Returns the flags that bs->backing_hd should get, based on the given flags 900317fc44eSKevin Wolf * for the parent BDS 901317fc44eSKevin Wolf */ 902317fc44eSKevin Wolf static int bdrv_backing_flags(int flags) 903317fc44eSKevin Wolf { 904317fc44eSKevin Wolf /* backing files always opened read-only */ 905317fc44eSKevin Wolf flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); 906317fc44eSKevin Wolf 907317fc44eSKevin Wolf /* snapshot=on is handled on the top layer */ 9088bfea15dSKevin Wolf flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); 909317fc44eSKevin Wolf 910317fc44eSKevin Wolf return flags; 911317fc44eSKevin Wolf } 912317fc44eSKevin Wolf 9137b272452SKevin Wolf static int bdrv_open_flags(BlockDriverState *bs, int flags) 9147b272452SKevin Wolf { 9157b272452SKevin Wolf int open_flags = flags | BDRV_O_CACHE_WB; 9167b272452SKevin Wolf 9177b272452SKevin Wolf /* 9187b272452SKevin Wolf * Clear flags that are internal to the block layer before opening the 9197b272452SKevin Wolf * image. 9207b272452SKevin Wolf */ 92120cca275SKevin Wolf open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 9227b272452SKevin Wolf 9237b272452SKevin Wolf /* 9247b272452SKevin Wolf * Snapshots should be writable. 9257b272452SKevin Wolf */ 9268bfea15dSKevin Wolf if (flags & BDRV_O_TEMPORARY) { 9277b272452SKevin Wolf open_flags |= BDRV_O_RDWR; 9287b272452SKevin Wolf } 9297b272452SKevin Wolf 9307b272452SKevin Wolf return open_flags; 9317b272452SKevin Wolf } 9327b272452SKevin Wolf 933636ea370SKevin Wolf static void bdrv_assign_node_name(BlockDriverState *bs, 9346913c0c2SBenoît Canet const char *node_name, 9356913c0c2SBenoît Canet Error **errp) 9366913c0c2SBenoît Canet { 9376913c0c2SBenoît Canet if (!node_name) { 938636ea370SKevin Wolf return; 9396913c0c2SBenoît Canet } 9406913c0c2SBenoît Canet 9419aebf3b8SKevin Wolf /* Check for empty string or invalid characters */ 942f5bebbbbSMarkus Armbruster if (!id_wellformed(node_name)) { 9439aebf3b8SKevin Wolf error_setg(errp, "Invalid node name"); 944636ea370SKevin Wolf return; 9456913c0c2SBenoît Canet } 9466913c0c2SBenoît Canet 9470c5e94eeSBenoît Canet /* takes care of avoiding namespaces collisions */ 9487f06d47eSMarkus Armbruster if (blk_by_name(node_name)) { 9490c5e94eeSBenoît Canet error_setg(errp, "node-name=%s is conflicting with a device id", 9500c5e94eeSBenoît Canet node_name); 951636ea370SKevin Wolf return; 9520c5e94eeSBenoît Canet } 9530c5e94eeSBenoît Canet 9546913c0c2SBenoît Canet /* takes care of avoiding duplicates node names */ 9556913c0c2SBenoît Canet if (bdrv_find_node(node_name)) { 9566913c0c2SBenoît Canet error_setg(errp, "Duplicate node name"); 957636ea370SKevin Wolf return; 9586913c0c2SBenoît Canet } 9596913c0c2SBenoît Canet 9606913c0c2SBenoît Canet /* copy node name into the bs and insert it into the graph list */ 9616913c0c2SBenoît Canet pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 9626913c0c2SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 9636913c0c2SBenoît Canet } 9646913c0c2SBenoît Canet 965b6ce07aaSKevin Wolf /* 96657915332SKevin Wolf * Common part for opening disk images and files 967b6ad491aSKevin Wolf * 968b6ad491aSKevin Wolf * Removes all processed options from *options. 96957915332SKevin Wolf */ 970f500a6d3SKevin Wolf static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, 97134b5d2c6SMax Reitz QDict *options, int flags, BlockDriver *drv, Error **errp) 97257915332SKevin Wolf { 97357915332SKevin Wolf int ret, open_flags; 974035fccdfSKevin Wolf const char *filename; 9756913c0c2SBenoît Canet const char *node_name = NULL; 97634b5d2c6SMax Reitz Error *local_err = NULL; 97757915332SKevin Wolf 97857915332SKevin Wolf assert(drv != NULL); 9796405875cSPaolo Bonzini assert(bs->file == NULL); 980707ff828SKevin Wolf assert(options != NULL && bs->options != options); 98157915332SKevin Wolf 98245673671SKevin Wolf if (file != NULL) { 98345673671SKevin Wolf filename = file->filename; 98445673671SKevin Wolf } else { 98545673671SKevin Wolf filename = qdict_get_try_str(options, "filename"); 98645673671SKevin Wolf } 98745673671SKevin Wolf 988765003dbSKevin Wolf if (drv->bdrv_needs_filename && !filename) { 989765003dbSKevin Wolf error_setg(errp, "The '%s' block driver requires a file name", 990765003dbSKevin Wolf drv->format_name); 991765003dbSKevin Wolf return -EINVAL; 992765003dbSKevin Wolf } 993765003dbSKevin Wolf 99445673671SKevin Wolf trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); 99528dcee10SStefan Hajnoczi 9966913c0c2SBenoît Canet node_name = qdict_get_try_str(options, "node-name"); 997636ea370SKevin Wolf bdrv_assign_node_name(bs, node_name, &local_err); 9980fb6395cSMarkus Armbruster if (local_err) { 999636ea370SKevin Wolf error_propagate(errp, local_err); 1000636ea370SKevin Wolf return -EINVAL; 10016913c0c2SBenoît Canet } 10026913c0c2SBenoît Canet qdict_del(options, "node-name"); 10036913c0c2SBenoît Canet 10045d186eb0SKevin Wolf /* bdrv_open() with directly using a protocol as drv. This layer is already 10055d186eb0SKevin Wolf * opened, so assign it to bs (while file becomes a closed BlockDriverState) 10065d186eb0SKevin Wolf * and return immediately. */ 10075d186eb0SKevin Wolf if (file != NULL && drv->bdrv_file_open) { 10085d186eb0SKevin Wolf bdrv_swap(file, bs); 10095d186eb0SKevin Wolf return 0; 10105d186eb0SKevin Wolf } 10115d186eb0SKevin Wolf 101257915332SKevin Wolf bs->open_flags = flags; 10131b7fd729SPaolo Bonzini bs->guest_block_size = 512; 1014c25f53b0SPaolo Bonzini bs->request_alignment = 512; 10150d51b4deSAsias He bs->zero_beyond_eof = true; 1016b64ec4e4SFam Zheng open_flags = bdrv_open_flags(bs, flags); 1017b64ec4e4SFam Zheng bs->read_only = !(open_flags & BDRV_O_RDWR); 1018b64ec4e4SFam Zheng 1019b64ec4e4SFam Zheng if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 10208f94a6e4SKevin Wolf error_setg(errp, 10218f94a6e4SKevin Wolf !bs->read_only && bdrv_is_whitelisted(drv, true) 10228f94a6e4SKevin Wolf ? "Driver '%s' can only be used for read-only devices" 10238f94a6e4SKevin Wolf : "Driver '%s' is not whitelisted", 10248f94a6e4SKevin Wolf drv->format_name); 1025b64ec4e4SFam Zheng return -ENOTSUP; 1026b64ec4e4SFam Zheng } 102757915332SKevin Wolf 102853fec9d3SStefan Hajnoczi assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ 10290ebd24e0SKevin Wolf if (flags & BDRV_O_COPY_ON_READ) { 10300ebd24e0SKevin Wolf if (!bs->read_only) { 103153fec9d3SStefan Hajnoczi bdrv_enable_copy_on_read(bs); 10320ebd24e0SKevin Wolf } else { 10330ebd24e0SKevin Wolf error_setg(errp, "Can't use copy-on-read on read-only device"); 10340ebd24e0SKevin Wolf return -EINVAL; 10350ebd24e0SKevin Wolf } 103653fec9d3SStefan Hajnoczi } 103753fec9d3SStefan Hajnoczi 1038c2ad1b0cSKevin Wolf if (filename != NULL) { 103957915332SKevin Wolf pstrcpy(bs->filename, sizeof(bs->filename), filename); 1040c2ad1b0cSKevin Wolf } else { 1041c2ad1b0cSKevin Wolf bs->filename[0] = '\0'; 1042c2ad1b0cSKevin Wolf } 104391af7014SMax Reitz pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 104457915332SKevin Wolf 104557915332SKevin Wolf bs->drv = drv; 10467267c094SAnthony Liguori bs->opaque = g_malloc0(drv->instance_size); 104757915332SKevin Wolf 104803f541bdSStefan Hajnoczi bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); 1049e7c63796SStefan Hajnoczi 105066f82ceeSKevin Wolf /* Open the image, either directly or using a protocol */ 105166f82ceeSKevin Wolf if (drv->bdrv_file_open) { 10525d186eb0SKevin Wolf assert(file == NULL); 1053030be321SBenoît Canet assert(!drv->bdrv_needs_filename || filename != NULL); 105434b5d2c6SMax Reitz ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 1055f500a6d3SKevin Wolf } else { 10562af5ef70SKevin Wolf if (file == NULL) { 105734b5d2c6SMax Reitz error_setg(errp, "Can't use '%s' as a block driver for the " 105834b5d2c6SMax Reitz "protocol level", drv->format_name); 10592af5ef70SKevin Wolf ret = -EINVAL; 10602af5ef70SKevin Wolf goto free_and_fail; 10612af5ef70SKevin Wolf } 1062f500a6d3SKevin Wolf bs->file = file; 106334b5d2c6SMax Reitz ret = drv->bdrv_open(bs, options, open_flags, &local_err); 106466f82ceeSKevin Wolf } 106566f82ceeSKevin Wolf 106657915332SKevin Wolf if (ret < 0) { 106784d18f06SMarkus Armbruster if (local_err) { 106834b5d2c6SMax Reitz error_propagate(errp, local_err); 10692fa9aa59SDunrong Huang } else if (bs->filename[0]) { 10702fa9aa59SDunrong Huang error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 107134b5d2c6SMax Reitz } else { 107234b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not open image"); 107334b5d2c6SMax Reitz } 107457915332SKevin Wolf goto free_and_fail; 107557915332SKevin Wolf } 107657915332SKevin Wolf 1077a1f688f4SMarkus Armbruster if (bs->encrypted) { 1078a1f688f4SMarkus Armbruster error_report("Encrypted images are deprecated"); 1079a1f688f4SMarkus Armbruster error_printf("Support for them will be removed in a future release.\n" 1080a1f688f4SMarkus Armbruster "You can use 'qemu-img convert' to convert your image" 1081a1f688f4SMarkus Armbruster " to an unencrypted one.\n"); 1082a1f688f4SMarkus Armbruster } 1083a1f688f4SMarkus Armbruster 108451762288SStefan Hajnoczi ret = refresh_total_sectors(bs, bs->total_sectors); 108551762288SStefan Hajnoczi if (ret < 0) { 108634b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not refresh total sector count"); 108751762288SStefan Hajnoczi goto free_and_fail; 108857915332SKevin Wolf } 108951762288SStefan Hajnoczi 10903baca891SKevin Wolf bdrv_refresh_limits(bs, &local_err); 10913baca891SKevin Wolf if (local_err) { 10923baca891SKevin Wolf error_propagate(errp, local_err); 10933baca891SKevin Wolf ret = -EINVAL; 10943baca891SKevin Wolf goto free_and_fail; 10953baca891SKevin Wolf } 10963baca891SKevin Wolf 1097c25f53b0SPaolo Bonzini assert(bdrv_opt_mem_align(bs) != 0); 109847ea2de2SKevin Wolf assert((bs->request_alignment != 0) || bs->sg); 109957915332SKevin Wolf return 0; 110057915332SKevin Wolf 110157915332SKevin Wolf free_and_fail: 110266f82ceeSKevin Wolf bs->file = NULL; 11037267c094SAnthony Liguori g_free(bs->opaque); 110457915332SKevin Wolf bs->opaque = NULL; 110557915332SKevin Wolf bs->drv = NULL; 110657915332SKevin Wolf return ret; 110757915332SKevin Wolf } 110857915332SKevin Wolf 11095e5c4f63SKevin Wolf static QDict *parse_json_filename(const char *filename, Error **errp) 11105e5c4f63SKevin Wolf { 11115e5c4f63SKevin Wolf QObject *options_obj; 11125e5c4f63SKevin Wolf QDict *options; 11135e5c4f63SKevin Wolf int ret; 11145e5c4f63SKevin Wolf 11155e5c4f63SKevin Wolf ret = strstart(filename, "json:", &filename); 11165e5c4f63SKevin Wolf assert(ret); 11175e5c4f63SKevin Wolf 11185e5c4f63SKevin Wolf options_obj = qobject_from_json(filename); 11195e5c4f63SKevin Wolf if (!options_obj) { 11205e5c4f63SKevin Wolf error_setg(errp, "Could not parse the JSON options"); 11215e5c4f63SKevin Wolf return NULL; 11225e5c4f63SKevin Wolf } 11235e5c4f63SKevin Wolf 11245e5c4f63SKevin Wolf if (qobject_type(options_obj) != QTYPE_QDICT) { 11255e5c4f63SKevin Wolf qobject_decref(options_obj); 11265e5c4f63SKevin Wolf error_setg(errp, "Invalid JSON object given"); 11275e5c4f63SKevin Wolf return NULL; 11285e5c4f63SKevin Wolf } 11295e5c4f63SKevin Wolf 11305e5c4f63SKevin Wolf options = qobject_to_qdict(options_obj); 11315e5c4f63SKevin Wolf qdict_flatten(options); 11325e5c4f63SKevin Wolf 11335e5c4f63SKevin Wolf return options; 11345e5c4f63SKevin Wolf } 11355e5c4f63SKevin Wolf 113657915332SKevin Wolf /* 1137f54120ffSKevin Wolf * Fills in default options for opening images and converts the legacy 1138f54120ffSKevin Wolf * filename/flags pair to option QDict entries. 1139f54120ffSKevin Wolf */ 11405e5c4f63SKevin Wolf static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, 114117b005f1SKevin Wolf BlockDriver *drv, Error **errp) 1142f54120ffSKevin Wolf { 11435e5c4f63SKevin Wolf const char *filename = *pfilename; 1144f54120ffSKevin Wolf const char *drvname; 1145462f5bcfSKevin Wolf bool protocol = flags & BDRV_O_PROTOCOL; 1146f54120ffSKevin Wolf bool parse_filename = false; 1147f54120ffSKevin Wolf Error *local_err = NULL; 1148f54120ffSKevin Wolf 11495e5c4f63SKevin Wolf /* Parse json: pseudo-protocol */ 11505e5c4f63SKevin Wolf if (filename && g_str_has_prefix(filename, "json:")) { 11515e5c4f63SKevin Wolf QDict *json_options = parse_json_filename(filename, &local_err); 11525e5c4f63SKevin Wolf if (local_err) { 11535e5c4f63SKevin Wolf error_propagate(errp, local_err); 11545e5c4f63SKevin Wolf return -EINVAL; 11555e5c4f63SKevin Wolf } 11565e5c4f63SKevin Wolf 11575e5c4f63SKevin Wolf /* Options given in the filename have lower priority than options 11585e5c4f63SKevin Wolf * specified directly */ 11595e5c4f63SKevin Wolf qdict_join(*options, json_options, false); 11605e5c4f63SKevin Wolf QDECREF(json_options); 11615e5c4f63SKevin Wolf *pfilename = filename = NULL; 11625e5c4f63SKevin Wolf } 11635e5c4f63SKevin Wolf 1164f54120ffSKevin Wolf /* Fetch the file name from the options QDict if necessary */ 116517b005f1SKevin Wolf if (protocol && filename) { 1166f54120ffSKevin Wolf if (!qdict_haskey(*options, "filename")) { 1167f54120ffSKevin Wolf qdict_put(*options, "filename", qstring_from_str(filename)); 1168f54120ffSKevin Wolf parse_filename = true; 1169f54120ffSKevin Wolf } else { 1170f54120ffSKevin Wolf error_setg(errp, "Can't specify 'file' and 'filename' options at " 1171f54120ffSKevin Wolf "the same time"); 1172f54120ffSKevin Wolf return -EINVAL; 1173f54120ffSKevin Wolf } 1174f54120ffSKevin Wolf } 1175f54120ffSKevin Wolf 1176f54120ffSKevin Wolf /* Find the right block driver */ 1177f54120ffSKevin Wolf filename = qdict_get_try_str(*options, "filename"); 1178f54120ffSKevin Wolf drvname = qdict_get_try_str(*options, "driver"); 1179f54120ffSKevin Wolf 118017b005f1SKevin Wolf if (drv) { 118117b005f1SKevin Wolf if (drvname) { 118217b005f1SKevin Wolf error_setg(errp, "Driver specified twice"); 118317b005f1SKevin Wolf return -EINVAL; 118417b005f1SKevin Wolf } 118517b005f1SKevin Wolf drvname = drv->format_name; 118617b005f1SKevin Wolf qdict_put(*options, "driver", qstring_from_str(drvname)); 118717b005f1SKevin Wolf } else { 118817b005f1SKevin Wolf if (!drvname && protocol) { 1189f54120ffSKevin Wolf if (filename) { 1190b65a5e12SMax Reitz drv = bdrv_find_protocol(filename, parse_filename, errp); 1191f54120ffSKevin Wolf if (!drv) { 1192f54120ffSKevin Wolf return -EINVAL; 1193f54120ffSKevin Wolf } 1194f54120ffSKevin Wolf 1195f54120ffSKevin Wolf drvname = drv->format_name; 1196f54120ffSKevin Wolf qdict_put(*options, "driver", qstring_from_str(drvname)); 1197f54120ffSKevin Wolf } else { 1198f54120ffSKevin Wolf error_setg(errp, "Must specify either driver or file"); 1199f54120ffSKevin Wolf return -EINVAL; 1200f54120ffSKevin Wolf } 120117b005f1SKevin Wolf } else if (drvname) { 1202f54120ffSKevin Wolf drv = bdrv_find_format(drvname); 1203f54120ffSKevin Wolf if (!drv) { 1204f54120ffSKevin Wolf error_setg(errp, "Unknown driver '%s'", drvname); 1205f54120ffSKevin Wolf return -ENOENT; 1206f54120ffSKevin Wolf } 120717b005f1SKevin Wolf } 120817b005f1SKevin Wolf } 120917b005f1SKevin Wolf 121017b005f1SKevin Wolf assert(drv || !protocol); 1211f54120ffSKevin Wolf 1212f54120ffSKevin Wolf /* Driver-specific filename parsing */ 121317b005f1SKevin Wolf if (drv && drv->bdrv_parse_filename && parse_filename) { 1214f54120ffSKevin Wolf drv->bdrv_parse_filename(filename, *options, &local_err); 1215f54120ffSKevin Wolf if (local_err) { 1216f54120ffSKevin Wolf error_propagate(errp, local_err); 1217f54120ffSKevin Wolf return -EINVAL; 1218f54120ffSKevin Wolf } 1219f54120ffSKevin Wolf 1220f54120ffSKevin Wolf if (!drv->bdrv_needs_filename) { 1221f54120ffSKevin Wolf qdict_del(*options, "filename"); 1222f54120ffSKevin Wolf } 1223f54120ffSKevin Wolf } 1224f54120ffSKevin Wolf 1225f54120ffSKevin Wolf return 0; 1226f54120ffSKevin Wolf } 1227f54120ffSKevin Wolf 12288d24cce1SFam Zheng void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) 12298d24cce1SFam Zheng { 12308d24cce1SFam Zheng 1231826b6ca0SFam Zheng if (bs->backing_hd) { 1232826b6ca0SFam Zheng assert(bs->backing_blocker); 1233826b6ca0SFam Zheng bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); 1234826b6ca0SFam Zheng } else if (backing_hd) { 1235826b6ca0SFam Zheng error_setg(&bs->backing_blocker, 123681e5f78aSAlberto Garcia "node is used as backing hd of '%s'", 123781e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs)); 1238826b6ca0SFam Zheng } 1239826b6ca0SFam Zheng 12408d24cce1SFam Zheng bs->backing_hd = backing_hd; 12418d24cce1SFam Zheng if (!backing_hd) { 1242826b6ca0SFam Zheng error_free(bs->backing_blocker); 1243826b6ca0SFam Zheng bs->backing_blocker = NULL; 12448d24cce1SFam Zheng goto out; 12458d24cce1SFam Zheng } 12468d24cce1SFam Zheng bs->open_flags &= ~BDRV_O_NO_BACKING; 12478d24cce1SFam Zheng pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); 12488d24cce1SFam Zheng pstrcpy(bs->backing_format, sizeof(bs->backing_format), 12498d24cce1SFam Zheng backing_hd->drv ? backing_hd->drv->format_name : ""); 1250826b6ca0SFam Zheng 1251826b6ca0SFam Zheng bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); 1252826b6ca0SFam Zheng /* Otherwise we won't be able to commit due to check in bdrv_commit */ 1253bb00021dSFam Zheng bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1254826b6ca0SFam Zheng bs->backing_blocker); 12558d24cce1SFam Zheng out: 12563baca891SKevin Wolf bdrv_refresh_limits(bs, NULL); 12578d24cce1SFam Zheng } 12588d24cce1SFam Zheng 125931ca6d07SKevin Wolf /* 126031ca6d07SKevin Wolf * Opens the backing file for a BlockDriverState if not yet open 126131ca6d07SKevin Wolf * 126231ca6d07SKevin Wolf * options is a QDict of options to pass to the block drivers, or NULL for an 126331ca6d07SKevin Wolf * empty set of options. The reference to the QDict is transferred to this 126431ca6d07SKevin Wolf * function (even on failure), so if the caller intends to reuse the dictionary, 126531ca6d07SKevin Wolf * it needs to use QINCREF() before calling bdrv_file_open. 126631ca6d07SKevin Wolf */ 126734b5d2c6SMax Reitz int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) 12689156df12SPaolo Bonzini { 12691ba4b6a5SBenoît Canet char *backing_filename = g_malloc0(PATH_MAX); 1270317fc44eSKevin Wolf int ret = 0; 12718d24cce1SFam Zheng BlockDriverState *backing_hd; 127234b5d2c6SMax Reitz Error *local_err = NULL; 12739156df12SPaolo Bonzini 12749156df12SPaolo Bonzini if (bs->backing_hd != NULL) { 127531ca6d07SKevin Wolf QDECREF(options); 12761ba4b6a5SBenoît Canet goto free_exit; 12779156df12SPaolo Bonzini } 12789156df12SPaolo Bonzini 127931ca6d07SKevin Wolf /* NULL means an empty set of options */ 128031ca6d07SKevin Wolf if (options == NULL) { 128131ca6d07SKevin Wolf options = qdict_new(); 128231ca6d07SKevin Wolf } 128331ca6d07SKevin Wolf 12849156df12SPaolo Bonzini bs->open_flags &= ~BDRV_O_NO_BACKING; 12851cb6f506SKevin Wolf if (qdict_haskey(options, "file.filename")) { 12861cb6f506SKevin Wolf backing_filename[0] = '\0'; 12871cb6f506SKevin Wolf } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 128831ca6d07SKevin Wolf QDECREF(options); 12891ba4b6a5SBenoît Canet goto free_exit; 1290dbecebddSFam Zheng } else { 12919f07429eSMax Reitz bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, 12929f07429eSMax Reitz &local_err); 12939f07429eSMax Reitz if (local_err) { 12949f07429eSMax Reitz ret = -EINVAL; 12959f07429eSMax Reitz error_propagate(errp, local_err); 12969f07429eSMax Reitz QDECREF(options); 12979f07429eSMax Reitz goto free_exit; 12989f07429eSMax Reitz } 12999156df12SPaolo Bonzini } 13009156df12SPaolo Bonzini 13018ee79e70SKevin Wolf if (!bs->drv || !bs->drv->supports_backing) { 13028ee79e70SKevin Wolf ret = -EINVAL; 13038ee79e70SKevin Wolf error_setg(errp, "Driver doesn't support backing files"); 13048ee79e70SKevin Wolf QDECREF(options); 13058ee79e70SKevin Wolf goto free_exit; 13068ee79e70SKevin Wolf } 13078ee79e70SKevin Wolf 1308e4e9986bSMarkus Armbruster backing_hd = bdrv_new(); 13098d24cce1SFam Zheng 1310c5f6e493SKevin Wolf if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 1311c5f6e493SKevin Wolf qdict_put(options, "driver", qstring_from_str(bs->backing_format)); 13129156df12SPaolo Bonzini } 13139156df12SPaolo Bonzini 1314f67503e5SMax Reitz assert(bs->backing_hd == NULL); 13158d24cce1SFam Zheng ret = bdrv_open(&backing_hd, 1316ddf5636dSMax Reitz *backing_filename ? backing_filename : NULL, NULL, options, 1317c5f6e493SKevin Wolf bdrv_backing_flags(bs->open_flags), NULL, &local_err); 13189156df12SPaolo Bonzini if (ret < 0) { 13198d24cce1SFam Zheng bdrv_unref(backing_hd); 13208d24cce1SFam Zheng backing_hd = NULL; 13219156df12SPaolo Bonzini bs->open_flags |= BDRV_O_NO_BACKING; 1322b04b6b6eSFam Zheng error_setg(errp, "Could not open backing file: %s", 1323b04b6b6eSFam Zheng error_get_pretty(local_err)); 1324b04b6b6eSFam Zheng error_free(local_err); 13251ba4b6a5SBenoît Canet goto free_exit; 13269156df12SPaolo Bonzini } 13278d24cce1SFam Zheng bdrv_set_backing_hd(bs, backing_hd); 1328d80ac658SPeter Feiner 13291ba4b6a5SBenoît Canet free_exit: 13301ba4b6a5SBenoît Canet g_free(backing_filename); 13311ba4b6a5SBenoît Canet return ret; 13329156df12SPaolo Bonzini } 13339156df12SPaolo Bonzini 1334b6ce07aaSKevin Wolf /* 1335da557aacSMax Reitz * Opens a disk image whose options are given as BlockdevRef in another block 1336da557aacSMax Reitz * device's options. 1337da557aacSMax Reitz * 1338da557aacSMax Reitz * If allow_none is true, no image will be opened if filename is false and no 1339da557aacSMax Reitz * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. 1340da557aacSMax Reitz * 1341da557aacSMax Reitz * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 1342da557aacSMax Reitz * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 1343da557aacSMax Reitz * itself, all options starting with "${bdref_key}." are considered part of the 1344da557aacSMax Reitz * BlockdevRef. 1345da557aacSMax Reitz * 1346da557aacSMax Reitz * The BlockdevRef will be removed from the options QDict. 1347f67503e5SMax Reitz * 1348f67503e5SMax Reitz * To conform with the behavior of bdrv_open(), *pbs has to be NULL. 1349da557aacSMax Reitz */ 1350da557aacSMax Reitz int bdrv_open_image(BlockDriverState **pbs, const char *filename, 1351da557aacSMax Reitz QDict *options, const char *bdref_key, int flags, 1352f7d9fd8cSMax Reitz bool allow_none, Error **errp) 1353da557aacSMax Reitz { 1354da557aacSMax Reitz QDict *image_options; 1355da557aacSMax Reitz int ret; 1356da557aacSMax Reitz char *bdref_key_dot; 1357da557aacSMax Reitz const char *reference; 1358da557aacSMax Reitz 1359f67503e5SMax Reitz assert(pbs); 1360f67503e5SMax Reitz assert(*pbs == NULL); 1361f67503e5SMax Reitz 1362da557aacSMax Reitz bdref_key_dot = g_strdup_printf("%s.", bdref_key); 1363da557aacSMax Reitz qdict_extract_subqdict(options, &image_options, bdref_key_dot); 1364da557aacSMax Reitz g_free(bdref_key_dot); 1365da557aacSMax Reitz 1366da557aacSMax Reitz reference = qdict_get_try_str(options, bdref_key); 1367da557aacSMax Reitz if (!filename && !reference && !qdict_size(image_options)) { 1368da557aacSMax Reitz if (allow_none) { 1369da557aacSMax Reitz ret = 0; 1370da557aacSMax Reitz } else { 1371da557aacSMax Reitz error_setg(errp, "A block device must be specified for \"%s\"", 1372da557aacSMax Reitz bdref_key); 1373da557aacSMax Reitz ret = -EINVAL; 1374da557aacSMax Reitz } 1375b20e61e0SMarkus Armbruster QDECREF(image_options); 1376da557aacSMax Reitz goto done; 1377da557aacSMax Reitz } 1378da557aacSMax Reitz 1379f7d9fd8cSMax Reitz ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp); 1380da557aacSMax Reitz 1381da557aacSMax Reitz done: 1382da557aacSMax Reitz qdict_del(options, bdref_key); 1383da557aacSMax Reitz return ret; 1384da557aacSMax Reitz } 1385da557aacSMax Reitz 13866b8aeca5SChen Gang int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) 1387b998875dSKevin Wolf { 1388b998875dSKevin Wolf /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 13891ba4b6a5SBenoît Canet char *tmp_filename = g_malloc0(PATH_MAX + 1); 1390b998875dSKevin Wolf int64_t total_size; 139183d0521aSChunyan Liu QemuOpts *opts = NULL; 1392b998875dSKevin Wolf QDict *snapshot_options; 1393b998875dSKevin Wolf BlockDriverState *bs_snapshot; 1394b998875dSKevin Wolf Error *local_err; 1395b998875dSKevin Wolf int ret; 1396b998875dSKevin Wolf 1397b998875dSKevin Wolf /* if snapshot, we create a temporary backing file and open it 1398b998875dSKevin Wolf instead of opening 'filename' directly */ 1399b998875dSKevin Wolf 1400b998875dSKevin Wolf /* Get the required size from the image */ 1401f187743aSKevin Wolf total_size = bdrv_getlength(bs); 1402f187743aSKevin Wolf if (total_size < 0) { 14036b8aeca5SChen Gang ret = total_size; 1404f187743aSKevin Wolf error_setg_errno(errp, -total_size, "Could not get image size"); 14051ba4b6a5SBenoît Canet goto out; 1406f187743aSKevin Wolf } 1407b998875dSKevin Wolf 1408b998875dSKevin Wolf /* Create the temporary image */ 14091ba4b6a5SBenoît Canet ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 1410b998875dSKevin Wolf if (ret < 0) { 1411b998875dSKevin Wolf error_setg_errno(errp, -ret, "Could not get temporary filename"); 14121ba4b6a5SBenoît Canet goto out; 1413b998875dSKevin Wolf } 1414b998875dSKevin Wolf 1415ef810437SMax Reitz opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 1416c282e1fdSChunyan Liu &error_abort); 141739101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 1418ef810437SMax Reitz ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err); 141983d0521aSChunyan Liu qemu_opts_del(opts); 1420b998875dSKevin Wolf if (ret < 0) { 1421b998875dSKevin Wolf error_setg_errno(errp, -ret, "Could not create temporary overlay " 1422b998875dSKevin Wolf "'%s': %s", tmp_filename, 1423b998875dSKevin Wolf error_get_pretty(local_err)); 1424b998875dSKevin Wolf error_free(local_err); 14251ba4b6a5SBenoît Canet goto out; 1426b998875dSKevin Wolf } 1427b998875dSKevin Wolf 1428b998875dSKevin Wolf /* Prepare a new options QDict for the temporary file */ 1429b998875dSKevin Wolf snapshot_options = qdict_new(); 1430b998875dSKevin Wolf qdict_put(snapshot_options, "file.driver", 1431b998875dSKevin Wolf qstring_from_str("file")); 1432b998875dSKevin Wolf qdict_put(snapshot_options, "file.filename", 1433b998875dSKevin Wolf qstring_from_str(tmp_filename)); 1434b998875dSKevin Wolf 1435e4e9986bSMarkus Armbruster bs_snapshot = bdrv_new(); 1436b998875dSKevin Wolf 1437b998875dSKevin Wolf ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, 1438ef810437SMax Reitz flags, &bdrv_qcow2, &local_err); 1439b998875dSKevin Wolf if (ret < 0) { 1440b998875dSKevin Wolf error_propagate(errp, local_err); 14411ba4b6a5SBenoît Canet goto out; 1442b998875dSKevin Wolf } 1443b998875dSKevin Wolf 1444b998875dSKevin Wolf bdrv_append(bs_snapshot, bs); 14451ba4b6a5SBenoît Canet 14461ba4b6a5SBenoît Canet out: 14471ba4b6a5SBenoît Canet g_free(tmp_filename); 14486b8aeca5SChen Gang return ret; 1449b998875dSKevin Wolf } 1450b998875dSKevin Wolf 1451da557aacSMax Reitz /* 1452b6ce07aaSKevin Wolf * Opens a disk image (raw, qcow2, vmdk, ...) 1453de9c0cecSKevin Wolf * 1454de9c0cecSKevin Wolf * options is a QDict of options to pass to the block drivers, or NULL for an 1455de9c0cecSKevin Wolf * empty set of options. The reference to the QDict belongs to the block layer 1456de9c0cecSKevin Wolf * after the call (even on failure), so if the caller intends to reuse the 1457de9c0cecSKevin Wolf * dictionary, it needs to use QINCREF() before calling bdrv_open. 1458f67503e5SMax Reitz * 1459f67503e5SMax Reitz * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 1460f67503e5SMax Reitz * If it is not NULL, the referenced BDS will be reused. 1461ddf5636dSMax Reitz * 1462ddf5636dSMax Reitz * The reference parameter may be used to specify an existing block device which 1463ddf5636dSMax Reitz * should be opened. If specified, neither options nor a filename may be given, 1464ddf5636dSMax Reitz * nor can an existing BDS be reused (that is, *pbs has to be NULL). 1465b6ce07aaSKevin Wolf */ 1466ddf5636dSMax Reitz int bdrv_open(BlockDriverState **pbs, const char *filename, 1467ddf5636dSMax Reitz const char *reference, QDict *options, int flags, 1468ddf5636dSMax Reitz BlockDriver *drv, Error **errp) 1469ea2384d3Sbellard { 1470b6ce07aaSKevin Wolf int ret; 1471f67503e5SMax Reitz BlockDriverState *file = NULL, *bs; 147274fe54f2SKevin Wolf const char *drvname; 147334b5d2c6SMax Reitz Error *local_err = NULL; 1474b1e6fc08SKevin Wolf int snapshot_flags = 0; 147533e3963eSbellard 1476f67503e5SMax Reitz assert(pbs); 1477f67503e5SMax Reitz 1478ddf5636dSMax Reitz if (reference) { 1479ddf5636dSMax Reitz bool options_non_empty = options ? qdict_size(options) : false; 1480ddf5636dSMax Reitz QDECREF(options); 1481ddf5636dSMax Reitz 1482ddf5636dSMax Reitz if (*pbs) { 1483ddf5636dSMax Reitz error_setg(errp, "Cannot reuse an existing BDS when referencing " 1484ddf5636dSMax Reitz "another block device"); 1485ddf5636dSMax Reitz return -EINVAL; 1486ddf5636dSMax Reitz } 1487ddf5636dSMax Reitz 1488ddf5636dSMax Reitz if (filename || options_non_empty) { 1489ddf5636dSMax Reitz error_setg(errp, "Cannot reference an existing block device with " 1490ddf5636dSMax Reitz "additional options or a new filename"); 1491ddf5636dSMax Reitz return -EINVAL; 1492ddf5636dSMax Reitz } 1493ddf5636dSMax Reitz 1494ddf5636dSMax Reitz bs = bdrv_lookup_bs(reference, reference, errp); 1495ddf5636dSMax Reitz if (!bs) { 1496ddf5636dSMax Reitz return -ENODEV; 1497ddf5636dSMax Reitz } 1498ddf5636dSMax Reitz bdrv_ref(bs); 1499ddf5636dSMax Reitz *pbs = bs; 1500ddf5636dSMax Reitz return 0; 1501ddf5636dSMax Reitz } 1502ddf5636dSMax Reitz 1503f67503e5SMax Reitz if (*pbs) { 1504f67503e5SMax Reitz bs = *pbs; 1505f67503e5SMax Reitz } else { 1506e4e9986bSMarkus Armbruster bs = bdrv_new(); 1507f67503e5SMax Reitz } 1508f67503e5SMax Reitz 1509de9c0cecSKevin Wolf /* NULL means an empty set of options */ 1510de9c0cecSKevin Wolf if (options == NULL) { 1511de9c0cecSKevin Wolf options = qdict_new(); 1512de9c0cecSKevin Wolf } 1513de9c0cecSKevin Wolf 151417b005f1SKevin Wolf ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err); 1515462f5bcfSKevin Wolf if (local_err) { 1516462f5bcfSKevin Wolf goto fail; 1517462f5bcfSKevin Wolf } 1518462f5bcfSKevin Wolf 151976c591b0SKevin Wolf /* Find the right image format driver */ 152076c591b0SKevin Wolf drv = NULL; 152176c591b0SKevin Wolf drvname = qdict_get_try_str(options, "driver"); 152276c591b0SKevin Wolf if (drvname) { 152376c591b0SKevin Wolf drv = bdrv_find_format(drvname); 152476c591b0SKevin Wolf qdict_del(options, "driver"); 152576c591b0SKevin Wolf if (!drv) { 152676c591b0SKevin Wolf error_setg(errp, "Unknown driver: '%s'", drvname); 152776c591b0SKevin Wolf ret = -EINVAL; 152876c591b0SKevin Wolf goto fail; 152976c591b0SKevin Wolf } 153076c591b0SKevin Wolf } 153176c591b0SKevin Wolf 153276c591b0SKevin Wolf assert(drvname || !(flags & BDRV_O_PROTOCOL)); 153376c591b0SKevin Wolf if (drv && !drv->bdrv_file_open) { 153476c591b0SKevin Wolf /* If the user explicitly wants a format driver here, we'll need to add 153576c591b0SKevin Wolf * another layer for the protocol in bs->file */ 153676c591b0SKevin Wolf flags &= ~BDRV_O_PROTOCOL; 153776c591b0SKevin Wolf } 153876c591b0SKevin Wolf 1539de9c0cecSKevin Wolf bs->options = options; 1540b6ad491aSKevin Wolf options = qdict_clone_shallow(options); 1541de9c0cecSKevin Wolf 1542f500a6d3SKevin Wolf /* Open image file without format layer */ 1543f4788adcSKevin Wolf if ((flags & BDRV_O_PROTOCOL) == 0) { 1544be028adcSJeff Cody if (flags & BDRV_O_RDWR) { 1545be028adcSJeff Cody flags |= BDRV_O_ALLOW_RDWR; 1546be028adcSJeff Cody } 1547b1e6fc08SKevin Wolf if (flags & BDRV_O_SNAPSHOT) { 1548b1e6fc08SKevin Wolf snapshot_flags = bdrv_temp_snapshot_flags(flags); 1549b1e6fc08SKevin Wolf flags = bdrv_backing_flags(flags); 1550b1e6fc08SKevin Wolf } 1551be028adcSJeff Cody 1552f67503e5SMax Reitz assert(file == NULL); 1553054963f8SMax Reitz ret = bdrv_open_image(&file, filename, options, "file", 15540b50cc88SKevin Wolf bdrv_inherited_flags(flags), 15550b50cc88SKevin Wolf true, &local_err); 1556f500a6d3SKevin Wolf if (ret < 0) { 15578bfea15dSKevin Wolf goto fail; 1558f500a6d3SKevin Wolf } 1559f4788adcSKevin Wolf } 1560f500a6d3SKevin Wolf 156176c591b0SKevin Wolf /* Image format probing */ 156238f3ef57SKevin Wolf bs->probed = !drv; 156376c591b0SKevin Wolf if (!drv && file) { 156434b5d2c6SMax Reitz ret = find_image_format(file, filename, &drv, &local_err); 156517b005f1SKevin Wolf if (ret < 0) { 156617b005f1SKevin Wolf goto fail; 156717b005f1SKevin Wolf } 156876c591b0SKevin Wolf } else if (!drv) { 15692a05cbe4SMax Reitz error_setg(errp, "Must specify either driver or file"); 15702a05cbe4SMax Reitz ret = -EINVAL; 15718bfea15dSKevin Wolf goto fail; 15722a05cbe4SMax Reitz } 1573f500a6d3SKevin Wolf 1574b6ce07aaSKevin Wolf /* Open the image */ 157534b5d2c6SMax Reitz ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); 1576b6ce07aaSKevin Wolf if (ret < 0) { 15778bfea15dSKevin Wolf goto fail; 15786987307cSChristoph Hellwig } 15796987307cSChristoph Hellwig 15802a05cbe4SMax Reitz if (file && (bs->file != file)) { 15814f6fd349SFam Zheng bdrv_unref(file); 1582f500a6d3SKevin Wolf file = NULL; 1583f500a6d3SKevin Wolf } 1584f500a6d3SKevin Wolf 1585b6ce07aaSKevin Wolf /* If there is a backing file, use it */ 15869156df12SPaolo Bonzini if ((flags & BDRV_O_NO_BACKING) == 0) { 158731ca6d07SKevin Wolf QDict *backing_options; 158831ca6d07SKevin Wolf 15895726d872SBenoît Canet qdict_extract_subqdict(options, &backing_options, "backing."); 159034b5d2c6SMax Reitz ret = bdrv_open_backing_file(bs, backing_options, &local_err); 1591b6ce07aaSKevin Wolf if (ret < 0) { 1592b6ad491aSKevin Wolf goto close_and_fail; 1593b6ce07aaSKevin Wolf } 1594b6ce07aaSKevin Wolf } 1595b6ce07aaSKevin Wolf 159691af7014SMax Reitz bdrv_refresh_filename(bs); 159791af7014SMax Reitz 1598b998875dSKevin Wolf /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 1599b998875dSKevin Wolf * temporary snapshot afterwards. */ 1600b1e6fc08SKevin Wolf if (snapshot_flags) { 16016b8aeca5SChen Gang ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); 1602b998875dSKevin Wolf if (local_err) { 1603b998875dSKevin Wolf goto close_and_fail; 1604b998875dSKevin Wolf } 1605b998875dSKevin Wolf } 1606b998875dSKevin Wolf 1607b6ad491aSKevin Wolf /* Check if any unknown options were used */ 16085acd9d81SMax Reitz if (options && (qdict_size(options) != 0)) { 1609b6ad491aSKevin Wolf const QDictEntry *entry = qdict_first(options); 16105acd9d81SMax Reitz if (flags & BDRV_O_PROTOCOL) { 16115acd9d81SMax Reitz error_setg(errp, "Block protocol '%s' doesn't support the option " 16125acd9d81SMax Reitz "'%s'", drv->format_name, entry->key); 16135acd9d81SMax Reitz } else { 161434b5d2c6SMax Reitz error_setg(errp, "Block format '%s' used by device '%s' doesn't " 16155acd9d81SMax Reitz "support the option '%s'", drv->format_name, 1616bfb197e0SMarkus Armbruster bdrv_get_device_name(bs), entry->key); 16175acd9d81SMax Reitz } 1618b6ad491aSKevin Wolf 1619b6ad491aSKevin Wolf ret = -EINVAL; 1620b6ad491aSKevin Wolf goto close_and_fail; 1621b6ad491aSKevin Wolf } 1622b6ad491aSKevin Wolf 1623b6ce07aaSKevin Wolf if (!bdrv_key_required(bs)) { 1624a7f53e26SMarkus Armbruster if (bs->blk) { 1625a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, true); 1626a7f53e26SMarkus Armbruster } 1627c3adb58fSMarkus Armbruster } else if (!runstate_check(RUN_STATE_PRELAUNCH) 1628c3adb58fSMarkus Armbruster && !runstate_check(RUN_STATE_INMIGRATE) 1629c3adb58fSMarkus Armbruster && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */ 1630c3adb58fSMarkus Armbruster error_setg(errp, 1631c3adb58fSMarkus Armbruster "Guest must be stopped for opening of encrypted image"); 1632c3adb58fSMarkus Armbruster ret = -EBUSY; 1633c3adb58fSMarkus Armbruster goto close_and_fail; 1634b6ce07aaSKevin Wolf } 1635b6ce07aaSKevin Wolf 1636c3adb58fSMarkus Armbruster QDECREF(options); 1637f67503e5SMax Reitz *pbs = bs; 1638b6ce07aaSKevin Wolf return 0; 1639b6ce07aaSKevin Wolf 16408bfea15dSKevin Wolf fail: 1641f500a6d3SKevin Wolf if (file != NULL) { 16424f6fd349SFam Zheng bdrv_unref(file); 1643f500a6d3SKevin Wolf } 1644de9c0cecSKevin Wolf QDECREF(bs->options); 1645b6ad491aSKevin Wolf QDECREF(options); 1646de9c0cecSKevin Wolf bs->options = NULL; 1647f67503e5SMax Reitz if (!*pbs) { 1648f67503e5SMax Reitz /* If *pbs is NULL, a new BDS has been created in this function and 1649f67503e5SMax Reitz needs to be freed now. Otherwise, it does not need to be closed, 1650f67503e5SMax Reitz since it has not really been opened yet. */ 1651f67503e5SMax Reitz bdrv_unref(bs); 1652f67503e5SMax Reitz } 165384d18f06SMarkus Armbruster if (local_err) { 165434b5d2c6SMax Reitz error_propagate(errp, local_err); 165534b5d2c6SMax Reitz } 1656b6ad491aSKevin Wolf return ret; 1657de9c0cecSKevin Wolf 1658b6ad491aSKevin Wolf close_and_fail: 1659f67503e5SMax Reitz /* See fail path, but now the BDS has to be always closed */ 1660f67503e5SMax Reitz if (*pbs) { 1661b6ad491aSKevin Wolf bdrv_close(bs); 1662f67503e5SMax Reitz } else { 1663f67503e5SMax Reitz bdrv_unref(bs); 1664f67503e5SMax Reitz } 1665b6ad491aSKevin Wolf QDECREF(options); 166684d18f06SMarkus Armbruster if (local_err) { 166734b5d2c6SMax Reitz error_propagate(errp, local_err); 166834b5d2c6SMax Reitz } 1669b6ce07aaSKevin Wolf return ret; 1670b6ce07aaSKevin Wolf } 1671b6ce07aaSKevin Wolf 1672e971aa12SJeff Cody typedef struct BlockReopenQueueEntry { 1673e971aa12SJeff Cody bool prepared; 1674e971aa12SJeff Cody BDRVReopenState state; 1675e971aa12SJeff Cody QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1676e971aa12SJeff Cody } BlockReopenQueueEntry; 1677e971aa12SJeff Cody 1678e971aa12SJeff Cody /* 1679e971aa12SJeff Cody * Adds a BlockDriverState to a simple queue for an atomic, transactional 1680e971aa12SJeff Cody * reopen of multiple devices. 1681e971aa12SJeff Cody * 1682e971aa12SJeff Cody * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 1683e971aa12SJeff Cody * already performed, or alternatively may be NULL a new BlockReopenQueue will 1684e971aa12SJeff Cody * be created and initialized. This newly created BlockReopenQueue should be 1685e971aa12SJeff Cody * passed back in for subsequent calls that are intended to be of the same 1686e971aa12SJeff Cody * atomic 'set'. 1687e971aa12SJeff Cody * 1688e971aa12SJeff Cody * bs is the BlockDriverState to add to the reopen queue. 1689e971aa12SJeff Cody * 1690e971aa12SJeff Cody * flags contains the open flags for the associated bs 1691e971aa12SJeff Cody * 1692e971aa12SJeff Cody * returns a pointer to bs_queue, which is either the newly allocated 1693e971aa12SJeff Cody * bs_queue, or the existing bs_queue being used. 1694e971aa12SJeff Cody * 1695e971aa12SJeff Cody */ 1696e971aa12SJeff Cody BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 1697e971aa12SJeff Cody BlockDriverState *bs, int flags) 1698e971aa12SJeff Cody { 1699e971aa12SJeff Cody assert(bs != NULL); 1700e971aa12SJeff Cody 1701e971aa12SJeff Cody BlockReopenQueueEntry *bs_entry; 1702e971aa12SJeff Cody if (bs_queue == NULL) { 1703e971aa12SJeff Cody bs_queue = g_new0(BlockReopenQueue, 1); 1704e971aa12SJeff Cody QSIMPLEQ_INIT(bs_queue); 1705e971aa12SJeff Cody } 1706e971aa12SJeff Cody 1707f1f25a2eSKevin Wolf /* bdrv_open() masks this flag out */ 1708f1f25a2eSKevin Wolf flags &= ~BDRV_O_PROTOCOL; 1709f1f25a2eSKevin Wolf 1710e971aa12SJeff Cody if (bs->file) { 1711f1f25a2eSKevin Wolf bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); 1712e971aa12SJeff Cody } 1713e971aa12SJeff Cody 1714e971aa12SJeff Cody bs_entry = g_new0(BlockReopenQueueEntry, 1); 1715e971aa12SJeff Cody QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 1716e971aa12SJeff Cody 1717e971aa12SJeff Cody bs_entry->state.bs = bs; 1718e971aa12SJeff Cody bs_entry->state.flags = flags; 1719e971aa12SJeff Cody 1720e971aa12SJeff Cody return bs_queue; 1721e971aa12SJeff Cody } 1722e971aa12SJeff Cody 1723e971aa12SJeff Cody /* 1724e971aa12SJeff Cody * Reopen multiple BlockDriverStates atomically & transactionally. 1725e971aa12SJeff Cody * 1726e971aa12SJeff Cody * The queue passed in (bs_queue) must have been built up previous 1727e971aa12SJeff Cody * via bdrv_reopen_queue(). 1728e971aa12SJeff Cody * 1729e971aa12SJeff Cody * Reopens all BDS specified in the queue, with the appropriate 1730e971aa12SJeff Cody * flags. All devices are prepared for reopen, and failure of any 1731e971aa12SJeff Cody * device will cause all device changes to be abandonded, and intermediate 1732e971aa12SJeff Cody * data cleaned up. 1733e971aa12SJeff Cody * 1734e971aa12SJeff Cody * If all devices prepare successfully, then the changes are committed 1735e971aa12SJeff Cody * to all devices. 1736e971aa12SJeff Cody * 1737e971aa12SJeff Cody */ 1738e971aa12SJeff Cody int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 1739e971aa12SJeff Cody { 1740e971aa12SJeff Cody int ret = -1; 1741e971aa12SJeff Cody BlockReopenQueueEntry *bs_entry, *next; 1742e971aa12SJeff Cody Error *local_err = NULL; 1743e971aa12SJeff Cody 1744e971aa12SJeff Cody assert(bs_queue != NULL); 1745e971aa12SJeff Cody 1746e971aa12SJeff Cody bdrv_drain_all(); 1747e971aa12SJeff Cody 1748e971aa12SJeff Cody QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1749e971aa12SJeff Cody if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 1750e971aa12SJeff Cody error_propagate(errp, local_err); 1751e971aa12SJeff Cody goto cleanup; 1752e971aa12SJeff Cody } 1753e971aa12SJeff Cody bs_entry->prepared = true; 1754e971aa12SJeff Cody } 1755e971aa12SJeff Cody 1756e971aa12SJeff Cody /* If we reach this point, we have success and just need to apply the 1757e971aa12SJeff Cody * changes 1758e971aa12SJeff Cody */ 1759e971aa12SJeff Cody QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1760e971aa12SJeff Cody bdrv_reopen_commit(&bs_entry->state); 1761e971aa12SJeff Cody } 1762e971aa12SJeff Cody 1763e971aa12SJeff Cody ret = 0; 1764e971aa12SJeff Cody 1765e971aa12SJeff Cody cleanup: 1766e971aa12SJeff Cody QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 1767e971aa12SJeff Cody if (ret && bs_entry->prepared) { 1768e971aa12SJeff Cody bdrv_reopen_abort(&bs_entry->state); 1769e971aa12SJeff Cody } 1770e971aa12SJeff Cody g_free(bs_entry); 1771e971aa12SJeff Cody } 1772e971aa12SJeff Cody g_free(bs_queue); 1773e971aa12SJeff Cody return ret; 1774e971aa12SJeff Cody } 1775e971aa12SJeff Cody 1776e971aa12SJeff Cody 1777e971aa12SJeff Cody /* Reopen a single BlockDriverState with the specified flags. */ 1778e971aa12SJeff Cody int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 1779e971aa12SJeff Cody { 1780e971aa12SJeff Cody int ret = -1; 1781e971aa12SJeff Cody Error *local_err = NULL; 1782e971aa12SJeff Cody BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); 1783e971aa12SJeff Cody 1784e971aa12SJeff Cody ret = bdrv_reopen_multiple(queue, &local_err); 1785e971aa12SJeff Cody if (local_err != NULL) { 1786e971aa12SJeff Cody error_propagate(errp, local_err); 1787e971aa12SJeff Cody } 1788e971aa12SJeff Cody return ret; 1789e971aa12SJeff Cody } 1790e971aa12SJeff Cody 1791e971aa12SJeff Cody 1792e971aa12SJeff Cody /* 1793e971aa12SJeff Cody * Prepares a BlockDriverState for reopen. All changes are staged in the 1794e971aa12SJeff Cody * 'opaque' field of the BDRVReopenState, which is used and allocated by 1795e971aa12SJeff Cody * the block driver layer .bdrv_reopen_prepare() 1796e971aa12SJeff Cody * 1797e971aa12SJeff Cody * bs is the BlockDriverState to reopen 1798e971aa12SJeff Cody * flags are the new open flags 1799e971aa12SJeff Cody * queue is the reopen queue 1800e971aa12SJeff Cody * 1801e971aa12SJeff Cody * Returns 0 on success, non-zero on error. On error errp will be set 1802e971aa12SJeff Cody * as well. 1803e971aa12SJeff Cody * 1804e971aa12SJeff Cody * On failure, bdrv_reopen_abort() will be called to clean up any data. 1805e971aa12SJeff Cody * It is the responsibility of the caller to then call the abort() or 1806e971aa12SJeff Cody * commit() for any other BDS that have been left in a prepare() state 1807e971aa12SJeff Cody * 1808e971aa12SJeff Cody */ 1809e971aa12SJeff Cody int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 1810e971aa12SJeff Cody Error **errp) 1811e971aa12SJeff Cody { 1812e971aa12SJeff Cody int ret = -1; 1813e971aa12SJeff Cody Error *local_err = NULL; 1814e971aa12SJeff Cody BlockDriver *drv; 1815e971aa12SJeff Cody 1816e971aa12SJeff Cody assert(reopen_state != NULL); 1817e971aa12SJeff Cody assert(reopen_state->bs->drv != NULL); 1818e971aa12SJeff Cody drv = reopen_state->bs->drv; 1819e971aa12SJeff Cody 1820e971aa12SJeff Cody /* if we are to stay read-only, do not allow permission change 1821e971aa12SJeff Cody * to r/w */ 1822e971aa12SJeff Cody if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && 1823e971aa12SJeff Cody reopen_state->flags & BDRV_O_RDWR) { 182481e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is read only", 182581e5f78aSAlberto Garcia bdrv_get_device_or_node_name(reopen_state->bs)); 1826e971aa12SJeff Cody goto error; 1827e971aa12SJeff Cody } 1828e971aa12SJeff Cody 1829e971aa12SJeff Cody 1830e971aa12SJeff Cody ret = bdrv_flush(reopen_state->bs); 1831e971aa12SJeff Cody if (ret) { 1832e971aa12SJeff Cody error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", 1833e971aa12SJeff Cody strerror(-ret)); 1834e971aa12SJeff Cody goto error; 1835e971aa12SJeff Cody } 1836e971aa12SJeff Cody 1837e971aa12SJeff Cody if (drv->bdrv_reopen_prepare) { 1838e971aa12SJeff Cody ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 1839e971aa12SJeff Cody if (ret) { 1840e971aa12SJeff Cody if (local_err != NULL) { 1841e971aa12SJeff Cody error_propagate(errp, local_err); 1842e971aa12SJeff Cody } else { 1843d8b6895fSLuiz Capitulino error_setg(errp, "failed while preparing to reopen image '%s'", 1844e971aa12SJeff Cody reopen_state->bs->filename); 1845e971aa12SJeff Cody } 1846e971aa12SJeff Cody goto error; 1847e971aa12SJeff Cody } 1848e971aa12SJeff Cody } else { 1849e971aa12SJeff Cody /* It is currently mandatory to have a bdrv_reopen_prepare() 1850e971aa12SJeff Cody * handler for each supported drv. */ 185181e5f78aSAlberto Garcia error_setg(errp, "Block format '%s' used by node '%s' " 185281e5f78aSAlberto Garcia "does not support reopening files", drv->format_name, 185381e5f78aSAlberto Garcia bdrv_get_device_or_node_name(reopen_state->bs)); 1854e971aa12SJeff Cody ret = -1; 1855e971aa12SJeff Cody goto error; 1856e971aa12SJeff Cody } 1857e971aa12SJeff Cody 1858e971aa12SJeff Cody ret = 0; 1859e971aa12SJeff Cody 1860e971aa12SJeff Cody error: 1861e971aa12SJeff Cody return ret; 1862e971aa12SJeff Cody } 1863e971aa12SJeff Cody 1864e971aa12SJeff Cody /* 1865e971aa12SJeff Cody * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 1866e971aa12SJeff Cody * makes them final by swapping the staging BlockDriverState contents into 1867e971aa12SJeff Cody * the active BlockDriverState contents. 1868e971aa12SJeff Cody */ 1869e971aa12SJeff Cody void bdrv_reopen_commit(BDRVReopenState *reopen_state) 1870e971aa12SJeff Cody { 1871e971aa12SJeff Cody BlockDriver *drv; 1872e971aa12SJeff Cody 1873e971aa12SJeff Cody assert(reopen_state != NULL); 1874e971aa12SJeff Cody drv = reopen_state->bs->drv; 1875e971aa12SJeff Cody assert(drv != NULL); 1876e971aa12SJeff Cody 1877e971aa12SJeff Cody /* If there are any driver level actions to take */ 1878e971aa12SJeff Cody if (drv->bdrv_reopen_commit) { 1879e971aa12SJeff Cody drv->bdrv_reopen_commit(reopen_state); 1880e971aa12SJeff Cody } 1881e971aa12SJeff Cody 1882e971aa12SJeff Cody /* set BDS specific flags now */ 1883e971aa12SJeff Cody reopen_state->bs->open_flags = reopen_state->flags; 1884e971aa12SJeff Cody reopen_state->bs->enable_write_cache = !!(reopen_state->flags & 1885e971aa12SJeff Cody BDRV_O_CACHE_WB); 1886e971aa12SJeff Cody reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 1887355ef4acSKevin Wolf 18883baca891SKevin Wolf bdrv_refresh_limits(reopen_state->bs, NULL); 1889e971aa12SJeff Cody } 1890e971aa12SJeff Cody 1891e971aa12SJeff Cody /* 1892e971aa12SJeff Cody * Abort the reopen, and delete and free the staged changes in 1893e971aa12SJeff Cody * reopen_state 1894e971aa12SJeff Cody */ 1895e971aa12SJeff Cody void bdrv_reopen_abort(BDRVReopenState *reopen_state) 1896e971aa12SJeff Cody { 1897e971aa12SJeff Cody BlockDriver *drv; 1898e971aa12SJeff Cody 1899e971aa12SJeff Cody assert(reopen_state != NULL); 1900e971aa12SJeff Cody drv = reopen_state->bs->drv; 1901e971aa12SJeff Cody assert(drv != NULL); 1902e971aa12SJeff Cody 1903e971aa12SJeff Cody if (drv->bdrv_reopen_abort) { 1904e971aa12SJeff Cody drv->bdrv_reopen_abort(reopen_state); 1905e971aa12SJeff Cody } 1906e971aa12SJeff Cody } 1907e971aa12SJeff Cody 1908e971aa12SJeff Cody 1909fc01f7e7Sbellard void bdrv_close(BlockDriverState *bs) 1910fc01f7e7Sbellard { 191133384421SMax Reitz BdrvAioNotifier *ban, *ban_next; 191233384421SMax Reitz 19133e914655SPaolo Bonzini if (bs->job) { 19143e914655SPaolo Bonzini block_job_cancel_sync(bs->job); 19153e914655SPaolo Bonzini } 191658fda173SStefan Hajnoczi bdrv_drain_all(); /* complete I/O */ 191758fda173SStefan Hajnoczi bdrv_flush(bs); 191858fda173SStefan Hajnoczi bdrv_drain_all(); /* in case flush left pending I/O */ 1919d7d512f6SPaolo Bonzini notifier_list_notify(&bs->close_notifiers, bs); 19207094f12fSKevin Wolf 19213cbc002cSPaolo Bonzini if (bs->drv) { 1922557df6acSStefan Hajnoczi if (bs->backing_hd) { 1923826b6ca0SFam Zheng BlockDriverState *backing_hd = bs->backing_hd; 1924826b6ca0SFam Zheng bdrv_set_backing_hd(bs, NULL); 1925826b6ca0SFam Zheng bdrv_unref(backing_hd); 1926557df6acSStefan Hajnoczi } 1927ea2384d3Sbellard bs->drv->bdrv_close(bs); 19287267c094SAnthony Liguori g_free(bs->opaque); 1929ea2384d3Sbellard bs->opaque = NULL; 1930ea2384d3Sbellard bs->drv = NULL; 193153fec9d3SStefan Hajnoczi bs->copy_on_read = 0; 1932a275fa42SPaolo Bonzini bs->backing_file[0] = '\0'; 1933a275fa42SPaolo Bonzini bs->backing_format[0] = '\0'; 19346405875cSPaolo Bonzini bs->total_sectors = 0; 19356405875cSPaolo Bonzini bs->encrypted = 0; 19366405875cSPaolo Bonzini bs->valid_key = 0; 19376405875cSPaolo Bonzini bs->sg = 0; 19380d51b4deSAsias He bs->zero_beyond_eof = false; 1939de9c0cecSKevin Wolf QDECREF(bs->options); 1940de9c0cecSKevin Wolf bs->options = NULL; 194191af7014SMax Reitz QDECREF(bs->full_open_options); 194291af7014SMax Reitz bs->full_open_options = NULL; 1943b338082bSbellard 194466f82ceeSKevin Wolf if (bs->file != NULL) { 19454f6fd349SFam Zheng bdrv_unref(bs->file); 19460ac9377dSPaolo Bonzini bs->file = NULL; 194766f82ceeSKevin Wolf } 19489ca11154SPavel Hrdina } 194966f82ceeSKevin Wolf 1950a7f53e26SMarkus Armbruster if (bs->blk) { 1951a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, false); 1952a7f53e26SMarkus Armbruster } 195398f90dbaSZhi Yong Wu 195498f90dbaSZhi Yong Wu /*throttling disk I/O limits*/ 195598f90dbaSZhi Yong Wu if (bs->io_limits_enabled) { 195698f90dbaSZhi Yong Wu bdrv_io_limits_disable(bs); 195798f90dbaSZhi Yong Wu } 195833384421SMax Reitz 195933384421SMax Reitz QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 196033384421SMax Reitz g_free(ban); 196133384421SMax Reitz } 196233384421SMax Reitz QLIST_INIT(&bs->aio_notifiers); 1963b338082bSbellard } 1964b338082bSbellard 19652bc93fedSMORITA Kazutaka void bdrv_close_all(void) 19662bc93fedSMORITA Kazutaka { 19672bc93fedSMORITA Kazutaka BlockDriverState *bs; 19682bc93fedSMORITA Kazutaka 1969dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 1970ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 1971ed78cda3SStefan Hajnoczi 1972ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 19732bc93fedSMORITA Kazutaka bdrv_close(bs); 1974ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 19752bc93fedSMORITA Kazutaka } 19762bc93fedSMORITA Kazutaka } 19772bc93fedSMORITA Kazutaka 197888266f5aSStefan Hajnoczi /* Check if any requests are in-flight (including throttled requests) */ 197988266f5aSStefan Hajnoczi static bool bdrv_requests_pending(BlockDriverState *bs) 198088266f5aSStefan Hajnoczi { 198188266f5aSStefan Hajnoczi if (!QLIST_EMPTY(&bs->tracked_requests)) { 198288266f5aSStefan Hajnoczi return true; 198388266f5aSStefan Hajnoczi } 1984cc0681c4SBenoît Canet if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { 1985cc0681c4SBenoît Canet return true; 1986cc0681c4SBenoît Canet } 1987cc0681c4SBenoît Canet if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { 198888266f5aSStefan Hajnoczi return true; 198988266f5aSStefan Hajnoczi } 199088266f5aSStefan Hajnoczi if (bs->file && bdrv_requests_pending(bs->file)) { 199188266f5aSStefan Hajnoczi return true; 199288266f5aSStefan Hajnoczi } 199388266f5aSStefan Hajnoczi if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) { 199488266f5aSStefan Hajnoczi return true; 199588266f5aSStefan Hajnoczi } 199688266f5aSStefan Hajnoczi return false; 199788266f5aSStefan Hajnoczi } 199888266f5aSStefan Hajnoczi 19995b98db0aSStefan Hajnoczi static bool bdrv_drain_one(BlockDriverState *bs) 20005b98db0aSStefan Hajnoczi { 20015b98db0aSStefan Hajnoczi bool bs_busy; 20025b98db0aSStefan Hajnoczi 20035b98db0aSStefan Hajnoczi bdrv_flush_io_queue(bs); 20045b98db0aSStefan Hajnoczi bdrv_start_throttled_reqs(bs); 20055b98db0aSStefan Hajnoczi bs_busy = bdrv_requests_pending(bs); 20065b98db0aSStefan Hajnoczi bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy); 20075b98db0aSStefan Hajnoczi return bs_busy; 20085b98db0aSStefan Hajnoczi } 20095b98db0aSStefan Hajnoczi 20105b98db0aSStefan Hajnoczi /* 20115b98db0aSStefan Hajnoczi * Wait for pending requests to complete on a single BlockDriverState subtree 20125b98db0aSStefan Hajnoczi * 20135b98db0aSStefan Hajnoczi * See the warning in bdrv_drain_all(). This function can only be called if 20145b98db0aSStefan Hajnoczi * you are sure nothing can generate I/O because you have op blockers 20155b98db0aSStefan Hajnoczi * installed. 20165b98db0aSStefan Hajnoczi * 20175b98db0aSStefan Hajnoczi * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState 20185b98db0aSStefan Hajnoczi * AioContext. 20195b98db0aSStefan Hajnoczi */ 20205b98db0aSStefan Hajnoczi void bdrv_drain(BlockDriverState *bs) 20215b98db0aSStefan Hajnoczi { 20225b98db0aSStefan Hajnoczi while (bdrv_drain_one(bs)) { 20235b98db0aSStefan Hajnoczi /* Keep iterating */ 20245b98db0aSStefan Hajnoczi } 20255b98db0aSStefan Hajnoczi } 20265b98db0aSStefan Hajnoczi 2027922453bcSStefan Hajnoczi /* 2028922453bcSStefan Hajnoczi * Wait for pending requests to complete across all BlockDriverStates 2029922453bcSStefan Hajnoczi * 2030922453bcSStefan Hajnoczi * This function does not flush data to disk, use bdrv_flush_all() for that 2031922453bcSStefan Hajnoczi * after calling this function. 20324c355d53SZhi Yong Wu * 20334c355d53SZhi Yong Wu * Note that completion of an asynchronous I/O operation can trigger any 20344c355d53SZhi Yong Wu * number of other I/O operations on other devices---for example a coroutine 20354c355d53SZhi Yong Wu * can be arbitrarily complex and a constant flow of I/O can come until the 20364c355d53SZhi Yong Wu * coroutine is complete. Because of this, it is not possible to have a 20374c355d53SZhi Yong Wu * function to drain a single device's I/O queue. 2038922453bcSStefan Hajnoczi */ 2039922453bcSStefan Hajnoczi void bdrv_drain_all(void) 2040922453bcSStefan Hajnoczi { 204188266f5aSStefan Hajnoczi /* Always run first iteration so any pending completion BHs run */ 204288266f5aSStefan Hajnoczi bool busy = true; 2043922453bcSStefan Hajnoczi BlockDriverState *bs; 2044922453bcSStefan Hajnoczi 204569da3b0bSFam Zheng QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 204669da3b0bSFam Zheng AioContext *aio_context = bdrv_get_aio_context(bs); 204769da3b0bSFam Zheng 204869da3b0bSFam Zheng aio_context_acquire(aio_context); 204969da3b0bSFam Zheng if (bs->job) { 205069da3b0bSFam Zheng block_job_pause(bs->job); 205169da3b0bSFam Zheng } 205269da3b0bSFam Zheng aio_context_release(aio_context); 205369da3b0bSFam Zheng } 205469da3b0bSFam Zheng 205588266f5aSStefan Hajnoczi while (busy) { 20569b536adcSStefan Hajnoczi busy = false; 2057922453bcSStefan Hajnoczi 20589b536adcSStefan Hajnoczi QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 20599b536adcSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 20609b536adcSStefan Hajnoczi 20619b536adcSStefan Hajnoczi aio_context_acquire(aio_context); 20625b98db0aSStefan Hajnoczi busy |= bdrv_drain_one(bs); 20639b536adcSStefan Hajnoczi aio_context_release(aio_context); 20649b536adcSStefan Hajnoczi } 2065922453bcSStefan Hajnoczi } 206669da3b0bSFam Zheng 206769da3b0bSFam Zheng QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 206869da3b0bSFam Zheng AioContext *aio_context = bdrv_get_aio_context(bs); 206969da3b0bSFam Zheng 207069da3b0bSFam Zheng aio_context_acquire(aio_context); 207169da3b0bSFam Zheng if (bs->job) { 207269da3b0bSFam Zheng block_job_resume(bs->job); 207369da3b0bSFam Zheng } 207469da3b0bSFam Zheng aio_context_release(aio_context); 207569da3b0bSFam Zheng } 2076922453bcSStefan Hajnoczi } 2077922453bcSStefan Hajnoczi 2078dc364f4cSBenoît Canet /* make a BlockDriverState anonymous by removing from bdrv_state and 2079dc364f4cSBenoît Canet * graph_bdrv_state list. 2080d22b2f41SRyan Harper Also, NULL terminate the device_name to prevent double remove */ 2081d22b2f41SRyan Harper void bdrv_make_anon(BlockDriverState *bs) 2082d22b2f41SRyan Harper { 2083bfb197e0SMarkus Armbruster /* 2084bfb197e0SMarkus Armbruster * Take care to remove bs from bdrv_states only when it's actually 2085bfb197e0SMarkus Armbruster * in it. Note that bs->device_list.tqe_prev is initially null, 2086bfb197e0SMarkus Armbruster * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish 2087bfb197e0SMarkus Armbruster * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by 2088bfb197e0SMarkus Armbruster * resetting it to null on remove. 2089bfb197e0SMarkus Armbruster */ 2090bfb197e0SMarkus Armbruster if (bs->device_list.tqe_prev) { 2091dc364f4cSBenoît Canet QTAILQ_REMOVE(&bdrv_states, bs, device_list); 2092bfb197e0SMarkus Armbruster bs->device_list.tqe_prev = NULL; 2093d22b2f41SRyan Harper } 2094dc364f4cSBenoît Canet if (bs->node_name[0] != '\0') { 2095dc364f4cSBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 2096dc364f4cSBenoît Canet } 2097dc364f4cSBenoît Canet bs->node_name[0] = '\0'; 2098d22b2f41SRyan Harper } 2099d22b2f41SRyan Harper 2100e023b2e2SPaolo Bonzini static void bdrv_rebind(BlockDriverState *bs) 2101e023b2e2SPaolo Bonzini { 2102e023b2e2SPaolo Bonzini if (bs->drv && bs->drv->bdrv_rebind) { 2103e023b2e2SPaolo Bonzini bs->drv->bdrv_rebind(bs); 2104e023b2e2SPaolo Bonzini } 2105e023b2e2SPaolo Bonzini } 2106e023b2e2SPaolo Bonzini 21074ddc07caSPaolo Bonzini static void bdrv_move_feature_fields(BlockDriverState *bs_dest, 21084ddc07caSPaolo Bonzini BlockDriverState *bs_src) 21094ddc07caSPaolo Bonzini { 21104ddc07caSPaolo Bonzini /* move some fields that need to stay attached to the device */ 21114ddc07caSPaolo Bonzini 21124ddc07caSPaolo Bonzini /* dev info */ 21131b7fd729SPaolo Bonzini bs_dest->guest_block_size = bs_src->guest_block_size; 21144ddc07caSPaolo Bonzini bs_dest->copy_on_read = bs_src->copy_on_read; 21154ddc07caSPaolo Bonzini 21164ddc07caSPaolo Bonzini bs_dest->enable_write_cache = bs_src->enable_write_cache; 21174ddc07caSPaolo Bonzini 2118cc0681c4SBenoît Canet /* i/o throttled req */ 2119cc0681c4SBenoît Canet memcpy(&bs_dest->throttle_state, 2120cc0681c4SBenoît Canet &bs_src->throttle_state, 2121cc0681c4SBenoît Canet sizeof(ThrottleState)); 2122cc0681c4SBenoît Canet bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; 2123cc0681c4SBenoît Canet bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; 21244ddc07caSPaolo Bonzini bs_dest->io_limits_enabled = bs_src->io_limits_enabled; 21254ddc07caSPaolo Bonzini 21264ddc07caSPaolo Bonzini /* r/w error */ 21274ddc07caSPaolo Bonzini bs_dest->on_read_error = bs_src->on_read_error; 21284ddc07caSPaolo Bonzini bs_dest->on_write_error = bs_src->on_write_error; 21294ddc07caSPaolo Bonzini 21304ddc07caSPaolo Bonzini /* i/o status */ 21314ddc07caSPaolo Bonzini bs_dest->iostatus_enabled = bs_src->iostatus_enabled; 21324ddc07caSPaolo Bonzini bs_dest->iostatus = bs_src->iostatus; 21334ddc07caSPaolo Bonzini 21344ddc07caSPaolo Bonzini /* dirty bitmap */ 2135e4654d2dSFam Zheng bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps; 21364ddc07caSPaolo Bonzini 21379fcb0251SFam Zheng /* reference count */ 21389fcb0251SFam Zheng bs_dest->refcnt = bs_src->refcnt; 21399fcb0251SFam Zheng 21404ddc07caSPaolo Bonzini /* job */ 21414ddc07caSPaolo Bonzini bs_dest->job = bs_src->job; 21424ddc07caSPaolo Bonzini 21434ddc07caSPaolo Bonzini /* keep the same entry in bdrv_states */ 2144dc364f4cSBenoît Canet bs_dest->device_list = bs_src->device_list; 21457e7d56d9SMarkus Armbruster bs_dest->blk = bs_src->blk; 21467e7d56d9SMarkus Armbruster 2147fbe40ff7SFam Zheng memcpy(bs_dest->op_blockers, bs_src->op_blockers, 2148fbe40ff7SFam Zheng sizeof(bs_dest->op_blockers)); 21494ddc07caSPaolo Bonzini } 21504ddc07caSPaolo Bonzini 21514ddc07caSPaolo Bonzini /* 21524ddc07caSPaolo Bonzini * Swap bs contents for two image chains while they are live, 21534ddc07caSPaolo Bonzini * while keeping required fields on the BlockDriverState that is 21544ddc07caSPaolo Bonzini * actually attached to a device. 21554ddc07caSPaolo Bonzini * 21564ddc07caSPaolo Bonzini * This will modify the BlockDriverState fields, and swap contents 21574ddc07caSPaolo Bonzini * between bs_new and bs_old. Both bs_new and bs_old are modified. 21584ddc07caSPaolo Bonzini * 2159bfb197e0SMarkus Armbruster * bs_new must not be attached to a BlockBackend. 21604ddc07caSPaolo Bonzini * 21614ddc07caSPaolo Bonzini * This function does not create any image files. 21624ddc07caSPaolo Bonzini */ 21634ddc07caSPaolo Bonzini void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) 21644ddc07caSPaolo Bonzini { 21654ddc07caSPaolo Bonzini BlockDriverState tmp; 21664ddc07caSPaolo Bonzini 216790ce8a06SBenoît Canet /* The code needs to swap the node_name but simply swapping node_list won't 216890ce8a06SBenoît Canet * work so first remove the nodes from the graph list, do the swap then 216990ce8a06SBenoît Canet * insert them back if needed. 217090ce8a06SBenoît Canet */ 217190ce8a06SBenoît Canet if (bs_new->node_name[0] != '\0') { 217290ce8a06SBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list); 217390ce8a06SBenoît Canet } 217490ce8a06SBenoît Canet if (bs_old->node_name[0] != '\0') { 217590ce8a06SBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list); 217690ce8a06SBenoît Canet } 217790ce8a06SBenoît Canet 2178bfb197e0SMarkus Armbruster /* bs_new must be unattached and shouldn't have anything fancy enabled */ 21797e7d56d9SMarkus Armbruster assert(!bs_new->blk); 2180e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); 21814ddc07caSPaolo Bonzini assert(bs_new->job == NULL); 21824ddc07caSPaolo Bonzini assert(bs_new->io_limits_enabled == false); 2183cc0681c4SBenoît Canet assert(!throttle_have_timer(&bs_new->throttle_state)); 21844ddc07caSPaolo Bonzini 21854ddc07caSPaolo Bonzini tmp = *bs_new; 21864ddc07caSPaolo Bonzini *bs_new = *bs_old; 21874ddc07caSPaolo Bonzini *bs_old = tmp; 21884ddc07caSPaolo Bonzini 21894ddc07caSPaolo Bonzini /* there are some fields that should not be swapped, move them back */ 21904ddc07caSPaolo Bonzini bdrv_move_feature_fields(&tmp, bs_old); 21914ddc07caSPaolo Bonzini bdrv_move_feature_fields(bs_old, bs_new); 21924ddc07caSPaolo Bonzini bdrv_move_feature_fields(bs_new, &tmp); 21934ddc07caSPaolo Bonzini 2194bfb197e0SMarkus Armbruster /* bs_new must remain unattached */ 21957e7d56d9SMarkus Armbruster assert(!bs_new->blk); 21964ddc07caSPaolo Bonzini 21974ddc07caSPaolo Bonzini /* Check a few fields that should remain attached to the device */ 21984ddc07caSPaolo Bonzini assert(bs_new->job == NULL); 21994ddc07caSPaolo Bonzini assert(bs_new->io_limits_enabled == false); 2200cc0681c4SBenoît Canet assert(!throttle_have_timer(&bs_new->throttle_state)); 22014ddc07caSPaolo Bonzini 220290ce8a06SBenoît Canet /* insert the nodes back into the graph node list if needed */ 220390ce8a06SBenoît Canet if (bs_new->node_name[0] != '\0') { 220490ce8a06SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list); 220590ce8a06SBenoît Canet } 220690ce8a06SBenoît Canet if (bs_old->node_name[0] != '\0') { 220790ce8a06SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); 220890ce8a06SBenoît Canet } 220990ce8a06SBenoît Canet 22104ddc07caSPaolo Bonzini bdrv_rebind(bs_new); 22114ddc07caSPaolo Bonzini bdrv_rebind(bs_old); 22124ddc07caSPaolo Bonzini } 22134ddc07caSPaolo Bonzini 22148802d1fdSJeff Cody /* 22158802d1fdSJeff Cody * Add new bs contents at the top of an image chain while the chain is 22168802d1fdSJeff Cody * live, while keeping required fields on the top layer. 22178802d1fdSJeff Cody * 22188802d1fdSJeff Cody * This will modify the BlockDriverState fields, and swap contents 22198802d1fdSJeff Cody * between bs_new and bs_top. Both bs_new and bs_top are modified. 22208802d1fdSJeff Cody * 2221bfb197e0SMarkus Armbruster * bs_new must not be attached to a BlockBackend. 2222f6801b83SJeff Cody * 22238802d1fdSJeff Cody * This function does not create any image files. 22248802d1fdSJeff Cody */ 22258802d1fdSJeff Cody void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) 22268802d1fdSJeff Cody { 22274ddc07caSPaolo Bonzini bdrv_swap(bs_new, bs_top); 22288802d1fdSJeff Cody 22298802d1fdSJeff Cody /* The contents of 'tmp' will become bs_top, as we are 22308802d1fdSJeff Cody * swapping bs_new and bs_top contents. */ 22318d24cce1SFam Zheng bdrv_set_backing_hd(bs_top, bs_new); 22328802d1fdSJeff Cody } 22338802d1fdSJeff Cody 22344f6fd349SFam Zheng static void bdrv_delete(BlockDriverState *bs) 2235b338082bSbellard { 22363e914655SPaolo Bonzini assert(!bs->job); 22373718d8abSFam Zheng assert(bdrv_op_blocker_is_empty(bs)); 22384f6fd349SFam Zheng assert(!bs->refcnt); 2239e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 224018846deeSMarkus Armbruster 2241e1b5c52eSStefan Hajnoczi bdrv_close(bs); 2242e1b5c52eSStefan Hajnoczi 22431b7bdbc1SStefan Hajnoczi /* remove from list, if necessary */ 2244d22b2f41SRyan Harper bdrv_make_anon(bs); 224534c6f050Saurel32 22467267c094SAnthony Liguori g_free(bs); 2247fc01f7e7Sbellard } 2248fc01f7e7Sbellard 2249e97fc193Saliguori /* 2250e97fc193Saliguori * Run consistency checks on an image 2251e97fc193Saliguori * 2252e076f338SKevin Wolf * Returns 0 if the check could be completed (it doesn't mean that the image is 2253a1c7273bSStefan Weil * free of errors) or -errno when an internal error occurred. The results of the 2254e076f338SKevin Wolf * check are stored in res. 2255e97fc193Saliguori */ 22564534ff54SKevin Wolf int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 2257e97fc193Saliguori { 2258908bcd54SMax Reitz if (bs->drv == NULL) { 2259908bcd54SMax Reitz return -ENOMEDIUM; 2260908bcd54SMax Reitz } 2261e97fc193Saliguori if (bs->drv->bdrv_check == NULL) { 2262e97fc193Saliguori return -ENOTSUP; 2263e97fc193Saliguori } 2264e97fc193Saliguori 2265e076f338SKevin Wolf memset(res, 0, sizeof(*res)); 22664534ff54SKevin Wolf return bs->drv->bdrv_check(bs, res, fix); 2267e97fc193Saliguori } 2268e97fc193Saliguori 22698a426614SKevin Wolf #define COMMIT_BUF_SECTORS 2048 22708a426614SKevin Wolf 227133e3963eSbellard /* commit COW file into the raw image */ 227233e3963eSbellard int bdrv_commit(BlockDriverState *bs) 227333e3963eSbellard { 227419cb3738Sbellard BlockDriver *drv = bs->drv; 227572706ea4SJeff Cody int64_t sector, total_sectors, length, backing_length; 22768a426614SKevin Wolf int n, ro, open_flags; 22770bce597dSJeff Cody int ret = 0; 227872706ea4SJeff Cody uint8_t *buf = NULL; 227933e3963eSbellard 228019cb3738Sbellard if (!drv) 228119cb3738Sbellard return -ENOMEDIUM; 228233e3963eSbellard 22834dca4b63SNaphtali Sprei if (!bs->backing_hd) { 22844dca4b63SNaphtali Sprei return -ENOTSUP; 22854dca4b63SNaphtali Sprei } 22864dca4b63SNaphtali Sprei 2287bb00021dSFam Zheng if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || 2288bb00021dSFam Zheng bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { 22892d3735d3SStefan Hajnoczi return -EBUSY; 22902d3735d3SStefan Hajnoczi } 22912d3735d3SStefan Hajnoczi 22924dca4b63SNaphtali Sprei ro = bs->backing_hd->read_only; 22934dca4b63SNaphtali Sprei open_flags = bs->backing_hd->open_flags; 22944dca4b63SNaphtali Sprei 22954dca4b63SNaphtali Sprei if (ro) { 22960bce597dSJeff Cody if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) { 22970bce597dSJeff Cody return -EACCES; 22984dca4b63SNaphtali Sprei } 2299ea2384d3Sbellard } 2300ea2384d3Sbellard 230172706ea4SJeff Cody length = bdrv_getlength(bs); 230272706ea4SJeff Cody if (length < 0) { 230372706ea4SJeff Cody ret = length; 230472706ea4SJeff Cody goto ro_cleanup; 230572706ea4SJeff Cody } 230672706ea4SJeff Cody 230772706ea4SJeff Cody backing_length = bdrv_getlength(bs->backing_hd); 230872706ea4SJeff Cody if (backing_length < 0) { 230972706ea4SJeff Cody ret = backing_length; 231072706ea4SJeff Cody goto ro_cleanup; 231172706ea4SJeff Cody } 231272706ea4SJeff Cody 231372706ea4SJeff Cody /* If our top snapshot is larger than the backing file image, 231472706ea4SJeff Cody * grow the backing file image if possible. If not possible, 231572706ea4SJeff Cody * we must return an error */ 231672706ea4SJeff Cody if (length > backing_length) { 231772706ea4SJeff Cody ret = bdrv_truncate(bs->backing_hd, length); 231872706ea4SJeff Cody if (ret < 0) { 231972706ea4SJeff Cody goto ro_cleanup; 232072706ea4SJeff Cody } 232172706ea4SJeff Cody } 232272706ea4SJeff Cody 232372706ea4SJeff Cody total_sectors = length >> BDRV_SECTOR_BITS; 2324857d4f46SKevin Wolf 2325857d4f46SKevin Wolf /* qemu_try_blockalign() for bs will choose an alignment that works for 2326857d4f46SKevin Wolf * bs->backing_hd as well, so no need to compare the alignment manually. */ 2327857d4f46SKevin Wolf buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 2328857d4f46SKevin Wolf if (buf == NULL) { 2329857d4f46SKevin Wolf ret = -ENOMEM; 2330857d4f46SKevin Wolf goto ro_cleanup; 2331857d4f46SKevin Wolf } 23328a426614SKevin Wolf 23338a426614SKevin Wolf for (sector = 0; sector < total_sectors; sector += n) { 2334d663640cSPaolo Bonzini ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); 2335d663640cSPaolo Bonzini if (ret < 0) { 2336d663640cSPaolo Bonzini goto ro_cleanup; 2337d663640cSPaolo Bonzini } 2338d663640cSPaolo Bonzini if (ret) { 2339dabfa6ccSKevin Wolf ret = bdrv_read(bs, sector, buf, n); 2340dabfa6ccSKevin Wolf if (ret < 0) { 23414dca4b63SNaphtali Sprei goto ro_cleanup; 234233e3963eSbellard } 234333e3963eSbellard 2344dabfa6ccSKevin Wolf ret = bdrv_write(bs->backing_hd, sector, buf, n); 2345dabfa6ccSKevin Wolf if (ret < 0) { 23464dca4b63SNaphtali Sprei goto ro_cleanup; 234733e3963eSbellard } 234833e3963eSbellard } 234933e3963eSbellard } 235095389c86Sbellard 23511d44952fSChristoph Hellwig if (drv->bdrv_make_empty) { 23521d44952fSChristoph Hellwig ret = drv->bdrv_make_empty(bs); 2353dabfa6ccSKevin Wolf if (ret < 0) { 2354dabfa6ccSKevin Wolf goto ro_cleanup; 2355dabfa6ccSKevin Wolf } 23561d44952fSChristoph Hellwig bdrv_flush(bs); 23571d44952fSChristoph Hellwig } 235895389c86Sbellard 23593f5075aeSChristoph Hellwig /* 23603f5075aeSChristoph Hellwig * Make sure all data we wrote to the backing device is actually 23613f5075aeSChristoph Hellwig * stable on disk. 23623f5075aeSChristoph Hellwig */ 2363dabfa6ccSKevin Wolf if (bs->backing_hd) { 23643f5075aeSChristoph Hellwig bdrv_flush(bs->backing_hd); 2365dabfa6ccSKevin Wolf } 23664dca4b63SNaphtali Sprei 2367dabfa6ccSKevin Wolf ret = 0; 23684dca4b63SNaphtali Sprei ro_cleanup: 2369857d4f46SKevin Wolf qemu_vfree(buf); 23704dca4b63SNaphtali Sprei 23714dca4b63SNaphtali Sprei if (ro) { 23720bce597dSJeff Cody /* ignoring error return here */ 23730bce597dSJeff Cody bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL); 23744dca4b63SNaphtali Sprei } 23754dca4b63SNaphtali Sprei 23761d44952fSChristoph Hellwig return ret; 237733e3963eSbellard } 237833e3963eSbellard 2379e8877497SStefan Hajnoczi int bdrv_commit_all(void) 23806ab4b5abSMarkus Armbruster { 23816ab4b5abSMarkus Armbruster BlockDriverState *bs; 23826ab4b5abSMarkus Armbruster 2383dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 2384ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 2385ed78cda3SStefan Hajnoczi 2386ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 2387272d2d8eSJeff Cody if (bs->drv && bs->backing_hd) { 2388e8877497SStefan Hajnoczi int ret = bdrv_commit(bs); 2389e8877497SStefan Hajnoczi if (ret < 0) { 2390ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 2391e8877497SStefan Hajnoczi return ret; 23926ab4b5abSMarkus Armbruster } 23936ab4b5abSMarkus Armbruster } 2394ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 2395272d2d8eSJeff Cody } 2396e8877497SStefan Hajnoczi return 0; 2397e8877497SStefan Hajnoczi } 23986ab4b5abSMarkus Armbruster 2399dbffbdcfSStefan Hajnoczi /** 2400dbffbdcfSStefan Hajnoczi * Remove an active request from the tracked requests list 2401dbffbdcfSStefan Hajnoczi * 2402dbffbdcfSStefan Hajnoczi * This function should be called when a tracked request is completing. 2403dbffbdcfSStefan Hajnoczi */ 2404dbffbdcfSStefan Hajnoczi static void tracked_request_end(BdrvTrackedRequest *req) 2405dbffbdcfSStefan Hajnoczi { 24062dbafdc0SKevin Wolf if (req->serialising) { 24072dbafdc0SKevin Wolf req->bs->serialising_in_flight--; 24082dbafdc0SKevin Wolf } 24092dbafdc0SKevin Wolf 2410dbffbdcfSStefan Hajnoczi QLIST_REMOVE(req, list); 2411f4658285SStefan Hajnoczi qemu_co_queue_restart_all(&req->wait_queue); 2412dbffbdcfSStefan Hajnoczi } 2413dbffbdcfSStefan Hajnoczi 2414dbffbdcfSStefan Hajnoczi /** 2415dbffbdcfSStefan Hajnoczi * Add an active request to the tracked requests list 2416dbffbdcfSStefan Hajnoczi */ 2417dbffbdcfSStefan Hajnoczi static void tracked_request_begin(BdrvTrackedRequest *req, 2418dbffbdcfSStefan Hajnoczi BlockDriverState *bs, 2419793ed47aSKevin Wolf int64_t offset, 2420793ed47aSKevin Wolf unsigned int bytes, bool is_write) 2421dbffbdcfSStefan Hajnoczi { 2422dbffbdcfSStefan Hajnoczi *req = (BdrvTrackedRequest){ 2423dbffbdcfSStefan Hajnoczi .bs = bs, 2424793ed47aSKevin Wolf .offset = offset, 2425793ed47aSKevin Wolf .bytes = bytes, 2426dbffbdcfSStefan Hajnoczi .is_write = is_write, 24275f8b6491SStefan Hajnoczi .co = qemu_coroutine_self(), 24282dbafdc0SKevin Wolf .serialising = false, 24297327145fSKevin Wolf .overlap_offset = offset, 24307327145fSKevin Wolf .overlap_bytes = bytes, 2431dbffbdcfSStefan Hajnoczi }; 2432dbffbdcfSStefan Hajnoczi 2433f4658285SStefan Hajnoczi qemu_co_queue_init(&req->wait_queue); 2434f4658285SStefan Hajnoczi 2435dbffbdcfSStefan Hajnoczi QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); 2436dbffbdcfSStefan Hajnoczi } 2437dbffbdcfSStefan Hajnoczi 2438e96126ffSKevin Wolf static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) 24392dbafdc0SKevin Wolf { 24407327145fSKevin Wolf int64_t overlap_offset = req->offset & ~(align - 1); 2441e96126ffSKevin Wolf unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align) 24427327145fSKevin Wolf - overlap_offset; 24437327145fSKevin Wolf 24442dbafdc0SKevin Wolf if (!req->serialising) { 24452dbafdc0SKevin Wolf req->bs->serialising_in_flight++; 24462dbafdc0SKevin Wolf req->serialising = true; 24472dbafdc0SKevin Wolf } 24487327145fSKevin Wolf 24497327145fSKevin Wolf req->overlap_offset = MIN(req->overlap_offset, overlap_offset); 24507327145fSKevin Wolf req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes); 24512dbafdc0SKevin Wolf } 24522dbafdc0SKevin Wolf 2453d83947acSStefan Hajnoczi /** 2454d83947acSStefan Hajnoczi * Round a region to cluster boundaries 2455d83947acSStefan Hajnoczi */ 2456343bded4SPaolo Bonzini void bdrv_round_to_clusters(BlockDriverState *bs, 2457d83947acSStefan Hajnoczi int64_t sector_num, int nb_sectors, 2458d83947acSStefan Hajnoczi int64_t *cluster_sector_num, 2459d83947acSStefan Hajnoczi int *cluster_nb_sectors) 2460d83947acSStefan Hajnoczi { 2461d83947acSStefan Hajnoczi BlockDriverInfo bdi; 2462d83947acSStefan Hajnoczi 2463d83947acSStefan Hajnoczi if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { 2464d83947acSStefan Hajnoczi *cluster_sector_num = sector_num; 2465d83947acSStefan Hajnoczi *cluster_nb_sectors = nb_sectors; 2466d83947acSStefan Hajnoczi } else { 2467d83947acSStefan Hajnoczi int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE; 2468d83947acSStefan Hajnoczi *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c); 2469d83947acSStefan Hajnoczi *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num + 2470d83947acSStefan Hajnoczi nb_sectors, c); 2471d83947acSStefan Hajnoczi } 2472d83947acSStefan Hajnoczi } 2473d83947acSStefan Hajnoczi 24747327145fSKevin Wolf static int bdrv_get_cluster_size(BlockDriverState *bs) 2475793ed47aSKevin Wolf { 2476793ed47aSKevin Wolf BlockDriverInfo bdi; 24777327145fSKevin Wolf int ret; 2478793ed47aSKevin Wolf 24797327145fSKevin Wolf ret = bdrv_get_info(bs, &bdi); 24807327145fSKevin Wolf if (ret < 0 || bdi.cluster_size == 0) { 24817327145fSKevin Wolf return bs->request_alignment; 2482793ed47aSKevin Wolf } else { 24837327145fSKevin Wolf return bdi.cluster_size; 2484793ed47aSKevin Wolf } 2485793ed47aSKevin Wolf } 2486793ed47aSKevin Wolf 2487f4658285SStefan Hajnoczi static bool tracked_request_overlaps(BdrvTrackedRequest *req, 2488793ed47aSKevin Wolf int64_t offset, unsigned int bytes) 2489793ed47aSKevin Wolf { 2490d83947acSStefan Hajnoczi /* aaaa bbbb */ 24917327145fSKevin Wolf if (offset >= req->overlap_offset + req->overlap_bytes) { 2492d83947acSStefan Hajnoczi return false; 2493d83947acSStefan Hajnoczi } 2494d83947acSStefan Hajnoczi /* bbbb aaaa */ 24957327145fSKevin Wolf if (req->overlap_offset >= offset + bytes) { 2496d83947acSStefan Hajnoczi return false; 2497d83947acSStefan Hajnoczi } 2498d83947acSStefan Hajnoczi return true; 2499f4658285SStefan Hajnoczi } 2500f4658285SStefan Hajnoczi 250128de2dcdSKevin Wolf static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) 2502f4658285SStefan Hajnoczi { 25032dbafdc0SKevin Wolf BlockDriverState *bs = self->bs; 2504f4658285SStefan Hajnoczi BdrvTrackedRequest *req; 2505f4658285SStefan Hajnoczi bool retry; 250628de2dcdSKevin Wolf bool waited = false; 2507f4658285SStefan Hajnoczi 25082dbafdc0SKevin Wolf if (!bs->serialising_in_flight) { 250928de2dcdSKevin Wolf return false; 25102dbafdc0SKevin Wolf } 25112dbafdc0SKevin Wolf 2512f4658285SStefan Hajnoczi do { 2513f4658285SStefan Hajnoczi retry = false; 2514f4658285SStefan Hajnoczi QLIST_FOREACH(req, &bs->tracked_requests, list) { 25152dbafdc0SKevin Wolf if (req == self || (!req->serialising && !self->serialising)) { 251665afd211SKevin Wolf continue; 251765afd211SKevin Wolf } 25187327145fSKevin Wolf if (tracked_request_overlaps(req, self->overlap_offset, 25197327145fSKevin Wolf self->overlap_bytes)) 25207327145fSKevin Wolf { 25215f8b6491SStefan Hajnoczi /* Hitting this means there was a reentrant request, for 25225f8b6491SStefan Hajnoczi * example, a block driver issuing nested requests. This must 25235f8b6491SStefan Hajnoczi * never happen since it means deadlock. 25245f8b6491SStefan Hajnoczi */ 25255f8b6491SStefan Hajnoczi assert(qemu_coroutine_self() != req->co); 25265f8b6491SStefan Hajnoczi 25276460440fSKevin Wolf /* If the request is already (indirectly) waiting for us, or 25286460440fSKevin Wolf * will wait for us as soon as it wakes up, then just go on 25296460440fSKevin Wolf * (instead of producing a deadlock in the former case). */ 25306460440fSKevin Wolf if (!req->waiting_for) { 25316460440fSKevin Wolf self->waiting_for = req; 2532f4658285SStefan Hajnoczi qemu_co_queue_wait(&req->wait_queue); 25336460440fSKevin Wolf self->waiting_for = NULL; 2534f4658285SStefan Hajnoczi retry = true; 253528de2dcdSKevin Wolf waited = true; 2536f4658285SStefan Hajnoczi break; 2537f4658285SStefan Hajnoczi } 2538f4658285SStefan Hajnoczi } 25396460440fSKevin Wolf } 2540f4658285SStefan Hajnoczi } while (retry); 254128de2dcdSKevin Wolf 254228de2dcdSKevin Wolf return waited; 2543f4658285SStefan Hajnoczi } 2544f4658285SStefan Hajnoczi 2545756e6736SKevin Wolf /* 2546756e6736SKevin Wolf * Return values: 2547756e6736SKevin Wolf * 0 - success 2548756e6736SKevin Wolf * -EINVAL - backing format specified, but no file 2549756e6736SKevin Wolf * -ENOSPC - can't update the backing file because no space is left in the 2550756e6736SKevin Wolf * image file header 2551756e6736SKevin Wolf * -ENOTSUP - format driver doesn't support changing the backing file 2552756e6736SKevin Wolf */ 2553756e6736SKevin Wolf int bdrv_change_backing_file(BlockDriverState *bs, 2554756e6736SKevin Wolf const char *backing_file, const char *backing_fmt) 2555756e6736SKevin Wolf { 2556756e6736SKevin Wolf BlockDriver *drv = bs->drv; 2557469ef350SPaolo Bonzini int ret; 2558756e6736SKevin Wolf 25595f377794SPaolo Bonzini /* Backing file format doesn't make sense without a backing file */ 25605f377794SPaolo Bonzini if (backing_fmt && !backing_file) { 25615f377794SPaolo Bonzini return -EINVAL; 25625f377794SPaolo Bonzini } 25635f377794SPaolo Bonzini 2564756e6736SKevin Wolf if (drv->bdrv_change_backing_file != NULL) { 2565469ef350SPaolo Bonzini ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 2566756e6736SKevin Wolf } else { 2567469ef350SPaolo Bonzini ret = -ENOTSUP; 2568756e6736SKevin Wolf } 2569469ef350SPaolo Bonzini 2570469ef350SPaolo Bonzini if (ret == 0) { 2571469ef350SPaolo Bonzini pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2572469ef350SPaolo Bonzini pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2573469ef350SPaolo Bonzini } 2574469ef350SPaolo Bonzini return ret; 2575756e6736SKevin Wolf } 2576756e6736SKevin Wolf 25776ebdcee2SJeff Cody /* 25786ebdcee2SJeff Cody * Finds the image layer in the chain that has 'bs' as its backing file. 25796ebdcee2SJeff Cody * 25806ebdcee2SJeff Cody * active is the current topmost image. 25816ebdcee2SJeff Cody * 25826ebdcee2SJeff Cody * Returns NULL if bs is not found in active's image chain, 25836ebdcee2SJeff Cody * or if active == bs. 25844caf0fcdSJeff Cody * 25854caf0fcdSJeff Cody * Returns the bottommost base image if bs == NULL. 25866ebdcee2SJeff Cody */ 25876ebdcee2SJeff Cody BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 25886ebdcee2SJeff Cody BlockDriverState *bs) 25896ebdcee2SJeff Cody { 25904caf0fcdSJeff Cody while (active && bs != active->backing_hd) { 25914caf0fcdSJeff Cody active = active->backing_hd; 25926ebdcee2SJeff Cody } 25936ebdcee2SJeff Cody 25944caf0fcdSJeff Cody return active; 25956ebdcee2SJeff Cody } 25966ebdcee2SJeff Cody 25974caf0fcdSJeff Cody /* Given a BDS, searches for the base layer. */ 25984caf0fcdSJeff Cody BlockDriverState *bdrv_find_base(BlockDriverState *bs) 25994caf0fcdSJeff Cody { 26004caf0fcdSJeff Cody return bdrv_find_overlay(bs, NULL); 26016ebdcee2SJeff Cody } 26026ebdcee2SJeff Cody 26036ebdcee2SJeff Cody typedef struct BlkIntermediateStates { 26046ebdcee2SJeff Cody BlockDriverState *bs; 26056ebdcee2SJeff Cody QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; 26066ebdcee2SJeff Cody } BlkIntermediateStates; 26076ebdcee2SJeff Cody 26086ebdcee2SJeff Cody 26096ebdcee2SJeff Cody /* 26106ebdcee2SJeff Cody * Drops images above 'base' up to and including 'top', and sets the image 26116ebdcee2SJeff Cody * above 'top' to have base as its backing file. 26126ebdcee2SJeff Cody * 26136ebdcee2SJeff Cody * Requires that the overlay to 'top' is opened r/w, so that the backing file 26146ebdcee2SJeff Cody * information in 'bs' can be properly updated. 26156ebdcee2SJeff Cody * 26166ebdcee2SJeff Cody * E.g., this will convert the following chain: 26176ebdcee2SJeff Cody * bottom <- base <- intermediate <- top <- active 26186ebdcee2SJeff Cody * 26196ebdcee2SJeff Cody * to 26206ebdcee2SJeff Cody * 26216ebdcee2SJeff Cody * bottom <- base <- active 26226ebdcee2SJeff Cody * 26236ebdcee2SJeff Cody * It is allowed for bottom==base, in which case it converts: 26246ebdcee2SJeff Cody * 26256ebdcee2SJeff Cody * base <- intermediate <- top <- active 26266ebdcee2SJeff Cody * 26276ebdcee2SJeff Cody * to 26286ebdcee2SJeff Cody * 26296ebdcee2SJeff Cody * base <- active 26306ebdcee2SJeff Cody * 263154e26900SJeff Cody * If backing_file_str is non-NULL, it will be used when modifying top's 263254e26900SJeff Cody * overlay image metadata. 263354e26900SJeff Cody * 26346ebdcee2SJeff Cody * Error conditions: 26356ebdcee2SJeff Cody * if active == top, that is considered an error 26366ebdcee2SJeff Cody * 26376ebdcee2SJeff Cody */ 26386ebdcee2SJeff Cody int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, 263954e26900SJeff Cody BlockDriverState *base, const char *backing_file_str) 26406ebdcee2SJeff Cody { 26416ebdcee2SJeff Cody BlockDriverState *intermediate; 26426ebdcee2SJeff Cody BlockDriverState *base_bs = NULL; 26436ebdcee2SJeff Cody BlockDriverState *new_top_bs = NULL; 26446ebdcee2SJeff Cody BlkIntermediateStates *intermediate_state, *next; 26456ebdcee2SJeff Cody int ret = -EIO; 26466ebdcee2SJeff Cody 26476ebdcee2SJeff Cody QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; 26486ebdcee2SJeff Cody QSIMPLEQ_INIT(&states_to_delete); 26496ebdcee2SJeff Cody 26506ebdcee2SJeff Cody if (!top->drv || !base->drv) { 26516ebdcee2SJeff Cody goto exit; 26526ebdcee2SJeff Cody } 26536ebdcee2SJeff Cody 26546ebdcee2SJeff Cody new_top_bs = bdrv_find_overlay(active, top); 26556ebdcee2SJeff Cody 26566ebdcee2SJeff Cody if (new_top_bs == NULL) { 26576ebdcee2SJeff Cody /* we could not find the image above 'top', this is an error */ 26586ebdcee2SJeff Cody goto exit; 26596ebdcee2SJeff Cody } 26606ebdcee2SJeff Cody 26616ebdcee2SJeff Cody /* special case of new_top_bs->backing_hd already pointing to base - nothing 26626ebdcee2SJeff Cody * to do, no intermediate images */ 26636ebdcee2SJeff Cody if (new_top_bs->backing_hd == base) { 26646ebdcee2SJeff Cody ret = 0; 26656ebdcee2SJeff Cody goto exit; 26666ebdcee2SJeff Cody } 26676ebdcee2SJeff Cody 26686ebdcee2SJeff Cody intermediate = top; 26696ebdcee2SJeff Cody 26706ebdcee2SJeff Cody /* now we will go down through the list, and add each BDS we find 26716ebdcee2SJeff Cody * into our deletion queue, until we hit the 'base' 26726ebdcee2SJeff Cody */ 26736ebdcee2SJeff Cody while (intermediate) { 26745839e53bSMarkus Armbruster intermediate_state = g_new0(BlkIntermediateStates, 1); 26756ebdcee2SJeff Cody intermediate_state->bs = intermediate; 26766ebdcee2SJeff Cody QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); 26776ebdcee2SJeff Cody 26786ebdcee2SJeff Cody if (intermediate->backing_hd == base) { 26796ebdcee2SJeff Cody base_bs = intermediate->backing_hd; 26806ebdcee2SJeff Cody break; 26816ebdcee2SJeff Cody } 26826ebdcee2SJeff Cody intermediate = intermediate->backing_hd; 26836ebdcee2SJeff Cody } 26846ebdcee2SJeff Cody if (base_bs == NULL) { 26856ebdcee2SJeff Cody /* something went wrong, we did not end at the base. safely 26866ebdcee2SJeff Cody * unravel everything, and exit with error */ 26876ebdcee2SJeff Cody goto exit; 26886ebdcee2SJeff Cody } 26896ebdcee2SJeff Cody 26906ebdcee2SJeff Cody /* success - we can delete the intermediate states, and link top->base */ 269154e26900SJeff Cody backing_file_str = backing_file_str ? backing_file_str : base_bs->filename; 269254e26900SJeff Cody ret = bdrv_change_backing_file(new_top_bs, backing_file_str, 26936ebdcee2SJeff Cody base_bs->drv ? base_bs->drv->format_name : ""); 26946ebdcee2SJeff Cody if (ret) { 26956ebdcee2SJeff Cody goto exit; 26966ebdcee2SJeff Cody } 2697920beae1SFam Zheng bdrv_set_backing_hd(new_top_bs, base_bs); 26986ebdcee2SJeff Cody 26996ebdcee2SJeff Cody QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 27006ebdcee2SJeff Cody /* so that bdrv_close() does not recursively close the chain */ 2701920beae1SFam Zheng bdrv_set_backing_hd(intermediate_state->bs, NULL); 27024f6fd349SFam Zheng bdrv_unref(intermediate_state->bs); 27036ebdcee2SJeff Cody } 27046ebdcee2SJeff Cody ret = 0; 27056ebdcee2SJeff Cody 27066ebdcee2SJeff Cody exit: 27076ebdcee2SJeff Cody QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 27086ebdcee2SJeff Cody g_free(intermediate_state); 27096ebdcee2SJeff Cody } 27106ebdcee2SJeff Cody return ret; 27116ebdcee2SJeff Cody } 27126ebdcee2SJeff Cody 27136ebdcee2SJeff Cody 271471d0770cSaliguori static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, 271571d0770cSaliguori size_t size) 271671d0770cSaliguori { 271775af1f34SPeter Lieven if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) { 27181dd3a447SKevin Wolf return -EIO; 27191dd3a447SKevin Wolf } 27201dd3a447SKevin Wolf 2721c0191e76SMax Reitz if (!bdrv_is_inserted(bs)) { 272271d0770cSaliguori return -ENOMEDIUM; 2723c0191e76SMax Reitz } 272471d0770cSaliguori 2725c0191e76SMax Reitz if (offset < 0) { 2726fbb7b4e0SKevin Wolf return -EIO; 2727c0191e76SMax Reitz } 272871d0770cSaliguori 272971d0770cSaliguori return 0; 273071d0770cSaliguori } 273171d0770cSaliguori 273271d0770cSaliguori static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, 273371d0770cSaliguori int nb_sectors) 273471d0770cSaliguori { 273575af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 27368f4754edSKevin Wolf return -EIO; 27378f4754edSKevin Wolf } 27388f4754edSKevin Wolf 2739eb5a3165SJes Sorensen return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE, 2740eb5a3165SJes Sorensen nb_sectors * BDRV_SECTOR_SIZE); 274171d0770cSaliguori } 274271d0770cSaliguori 27431c9805a3SStefan Hajnoczi typedef struct RwCo { 27441c9805a3SStefan Hajnoczi BlockDriverState *bs; 2745775aa8b6SKevin Wolf int64_t offset; 27461c9805a3SStefan Hajnoczi QEMUIOVector *qiov; 27471c9805a3SStefan Hajnoczi bool is_write; 27481c9805a3SStefan Hajnoczi int ret; 27494105eaaaSPeter Lieven BdrvRequestFlags flags; 27501c9805a3SStefan Hajnoczi } RwCo; 27511c9805a3SStefan Hajnoczi 27521c9805a3SStefan Hajnoczi static void coroutine_fn bdrv_rw_co_entry(void *opaque) 2753fc01f7e7Sbellard { 27541c9805a3SStefan Hajnoczi RwCo *rwco = opaque; 2755fc01f7e7Sbellard 27561c9805a3SStefan Hajnoczi if (!rwco->is_write) { 2757775aa8b6SKevin Wolf rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset, 2758775aa8b6SKevin Wolf rwco->qiov->size, rwco->qiov, 27594105eaaaSPeter Lieven rwco->flags); 27601c9805a3SStefan Hajnoczi } else { 2761775aa8b6SKevin Wolf rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset, 2762775aa8b6SKevin Wolf rwco->qiov->size, rwco->qiov, 27634105eaaaSPeter Lieven rwco->flags); 27641c9805a3SStefan Hajnoczi } 27651c9805a3SStefan Hajnoczi } 2766e7a8a783SKevin Wolf 27671c9805a3SStefan Hajnoczi /* 27688d3b1a2dSKevin Wolf * Process a vectored synchronous request using coroutines 27691c9805a3SStefan Hajnoczi */ 2770775aa8b6SKevin Wolf static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, 27714105eaaaSPeter Lieven QEMUIOVector *qiov, bool is_write, 27724105eaaaSPeter Lieven BdrvRequestFlags flags) 27731c9805a3SStefan Hajnoczi { 27741c9805a3SStefan Hajnoczi Coroutine *co; 27751c9805a3SStefan Hajnoczi RwCo rwco = { 27761c9805a3SStefan Hajnoczi .bs = bs, 2777775aa8b6SKevin Wolf .offset = offset, 27788d3b1a2dSKevin Wolf .qiov = qiov, 27791c9805a3SStefan Hajnoczi .is_write = is_write, 27801c9805a3SStefan Hajnoczi .ret = NOT_DONE, 27814105eaaaSPeter Lieven .flags = flags, 27821c9805a3SStefan Hajnoczi }; 27831c9805a3SStefan Hajnoczi 2784498e386cSZhi Yong Wu /** 2785498e386cSZhi Yong Wu * In sync call context, when the vcpu is blocked, this throttling timer 2786498e386cSZhi Yong Wu * will not fire; so the I/O throttling function has to be disabled here 2787498e386cSZhi Yong Wu * if it has been enabled. 2788498e386cSZhi Yong Wu */ 2789498e386cSZhi Yong Wu if (bs->io_limits_enabled) { 2790498e386cSZhi Yong Wu fprintf(stderr, "Disabling I/O throttling on '%s' due " 2791498e386cSZhi Yong Wu "to synchronous I/O.\n", bdrv_get_device_name(bs)); 2792498e386cSZhi Yong Wu bdrv_io_limits_disable(bs); 2793498e386cSZhi Yong Wu } 2794498e386cSZhi Yong Wu 27951c9805a3SStefan Hajnoczi if (qemu_in_coroutine()) { 27961c9805a3SStefan Hajnoczi /* Fast-path if already in coroutine context */ 27971c9805a3SStefan Hajnoczi bdrv_rw_co_entry(&rwco); 27981c9805a3SStefan Hajnoczi } else { 27992572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 28002572b37aSStefan Hajnoczi 28011c9805a3SStefan Hajnoczi co = qemu_coroutine_create(bdrv_rw_co_entry); 28021c9805a3SStefan Hajnoczi qemu_coroutine_enter(co, &rwco); 28031c9805a3SStefan Hajnoczi while (rwco.ret == NOT_DONE) { 28042572b37aSStefan Hajnoczi aio_poll(aio_context, true); 28051c9805a3SStefan Hajnoczi } 28061c9805a3SStefan Hajnoczi } 28071c9805a3SStefan Hajnoczi return rwco.ret; 2808e7a8a783SKevin Wolf } 2809e7a8a783SKevin Wolf 28108d3b1a2dSKevin Wolf /* 28118d3b1a2dSKevin Wolf * Process a synchronous request using coroutines 28128d3b1a2dSKevin Wolf */ 28138d3b1a2dSKevin Wolf static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, 28144105eaaaSPeter Lieven int nb_sectors, bool is_write, BdrvRequestFlags flags) 28158d3b1a2dSKevin Wolf { 28168d3b1a2dSKevin Wolf QEMUIOVector qiov; 28178d3b1a2dSKevin Wolf struct iovec iov = { 28188d3b1a2dSKevin Wolf .iov_base = (void *)buf, 28198d3b1a2dSKevin Wolf .iov_len = nb_sectors * BDRV_SECTOR_SIZE, 28208d3b1a2dSKevin Wolf }; 28218d3b1a2dSKevin Wolf 282275af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 2823da15ee51SKevin Wolf return -EINVAL; 2824da15ee51SKevin Wolf } 2825da15ee51SKevin Wolf 28268d3b1a2dSKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 2827775aa8b6SKevin Wolf return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS, 2828775aa8b6SKevin Wolf &qiov, is_write, flags); 28298d3b1a2dSKevin Wolf } 28308d3b1a2dSKevin Wolf 28311c9805a3SStefan Hajnoczi /* return < 0 if error. See bdrv_write() for the return codes */ 28321c9805a3SStefan Hajnoczi int bdrv_read(BlockDriverState *bs, int64_t sector_num, 28331c9805a3SStefan Hajnoczi uint8_t *buf, int nb_sectors) 28341c9805a3SStefan Hajnoczi { 28354105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0); 283683f64091Sbellard } 2837fc01f7e7Sbellard 283807d27a44SMarkus Armbruster /* Just like bdrv_read(), but with I/O throttling temporarily disabled */ 283907d27a44SMarkus Armbruster int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, 284007d27a44SMarkus Armbruster uint8_t *buf, int nb_sectors) 284107d27a44SMarkus Armbruster { 284207d27a44SMarkus Armbruster bool enabled; 284307d27a44SMarkus Armbruster int ret; 284407d27a44SMarkus Armbruster 284507d27a44SMarkus Armbruster enabled = bs->io_limits_enabled; 284607d27a44SMarkus Armbruster bs->io_limits_enabled = false; 28474e7395e8SPeter Lieven ret = bdrv_read(bs, sector_num, buf, nb_sectors); 284807d27a44SMarkus Armbruster bs->io_limits_enabled = enabled; 284907d27a44SMarkus Armbruster return ret; 285007d27a44SMarkus Armbruster } 285107d27a44SMarkus Armbruster 285219cb3738Sbellard /* Return < 0 if error. Important errors are: 285319cb3738Sbellard -EIO generic I/O error (may happen for all errors) 285419cb3738Sbellard -ENOMEDIUM No media inserted. 285519cb3738Sbellard -EINVAL Invalid sector number or nb_sectors 285619cb3738Sbellard -EACCES Trying to write a read-only device 285719cb3738Sbellard */ 2858fc01f7e7Sbellard int bdrv_write(BlockDriverState *bs, int64_t sector_num, 2859fc01f7e7Sbellard const uint8_t *buf, int nb_sectors) 2860fc01f7e7Sbellard { 28614105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0); 286283f64091Sbellard } 286383f64091Sbellard 2864aa7bfbffSPeter Lieven int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, 2865aa7bfbffSPeter Lieven int nb_sectors, BdrvRequestFlags flags) 28664105eaaaSPeter Lieven { 28674105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true, 2868aa7bfbffSPeter Lieven BDRV_REQ_ZERO_WRITE | flags); 28698d3b1a2dSKevin Wolf } 28708d3b1a2dSKevin Wolf 2871d75cbb5eSPeter Lieven /* 2872d75cbb5eSPeter Lieven * Completely zero out a block device with the help of bdrv_write_zeroes. 2873d75cbb5eSPeter Lieven * The operation is sped up by checking the block status and only writing 2874d75cbb5eSPeter Lieven * zeroes to the device if they currently do not return zeroes. Optional 2875d75cbb5eSPeter Lieven * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP). 2876d75cbb5eSPeter Lieven * 2877d75cbb5eSPeter Lieven * Returns < 0 on error, 0 on success. For error codes see bdrv_write(). 2878d75cbb5eSPeter Lieven */ 2879d75cbb5eSPeter Lieven int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) 2880d75cbb5eSPeter Lieven { 2881d32f7c10SMarkus Armbruster int64_t target_sectors, ret, nb_sectors, sector_num = 0; 2882d75cbb5eSPeter Lieven int n; 2883d75cbb5eSPeter Lieven 2884d32f7c10SMarkus Armbruster target_sectors = bdrv_nb_sectors(bs); 2885d32f7c10SMarkus Armbruster if (target_sectors < 0) { 2886d32f7c10SMarkus Armbruster return target_sectors; 28879ce10c0bSKevin Wolf } 28889ce10c0bSKevin Wolf 2889d75cbb5eSPeter Lieven for (;;) { 289075af1f34SPeter Lieven nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); 2891d75cbb5eSPeter Lieven if (nb_sectors <= 0) { 2892d75cbb5eSPeter Lieven return 0; 2893d75cbb5eSPeter Lieven } 2894d75cbb5eSPeter Lieven ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n); 28953d94ce60SPeter Lieven if (ret < 0) { 28963d94ce60SPeter Lieven error_report("error getting block status at sector %" PRId64 ": %s", 28973d94ce60SPeter Lieven sector_num, strerror(-ret)); 28983d94ce60SPeter Lieven return ret; 28993d94ce60SPeter Lieven } 2900d75cbb5eSPeter Lieven if (ret & BDRV_BLOCK_ZERO) { 2901d75cbb5eSPeter Lieven sector_num += n; 2902d75cbb5eSPeter Lieven continue; 2903d75cbb5eSPeter Lieven } 2904d75cbb5eSPeter Lieven ret = bdrv_write_zeroes(bs, sector_num, n, flags); 2905d75cbb5eSPeter Lieven if (ret < 0) { 2906d75cbb5eSPeter Lieven error_report("error writing zeroes at sector %" PRId64 ": %s", 2907d75cbb5eSPeter Lieven sector_num, strerror(-ret)); 2908d75cbb5eSPeter Lieven return ret; 2909d75cbb5eSPeter Lieven } 2910d75cbb5eSPeter Lieven sector_num += n; 2911d75cbb5eSPeter Lieven } 2912d75cbb5eSPeter Lieven } 2913d75cbb5eSPeter Lieven 2914a3ef6571SKevin Wolf int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) 291583f64091Sbellard { 2916a3ef6571SKevin Wolf QEMUIOVector qiov; 2917a3ef6571SKevin Wolf struct iovec iov = { 2918a3ef6571SKevin Wolf .iov_base = (void *)buf, 2919a3ef6571SKevin Wolf .iov_len = bytes, 2920a3ef6571SKevin Wolf }; 29219a8c4cceSKevin Wolf int ret; 292283f64091Sbellard 2923a3ef6571SKevin Wolf if (bytes < 0) { 2924a3ef6571SKevin Wolf return -EINVAL; 292583f64091Sbellard } 292683f64091Sbellard 2927a3ef6571SKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 2928a3ef6571SKevin Wolf ret = bdrv_prwv_co(bs, offset, &qiov, false, 0); 2929a3ef6571SKevin Wolf if (ret < 0) { 29309a8c4cceSKevin Wolf return ret; 293183f64091Sbellard } 293283f64091Sbellard 2933a3ef6571SKevin Wolf return bytes; 293483f64091Sbellard } 293583f64091Sbellard 29368d3b1a2dSKevin Wolf int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) 293783f64091Sbellard { 29389a8c4cceSKevin Wolf int ret; 293983f64091Sbellard 29408407d5d7SKevin Wolf ret = bdrv_prwv_co(bs, offset, qiov, true, 0); 29418d3b1a2dSKevin Wolf if (ret < 0) { 29429a8c4cceSKevin Wolf return ret; 29438d3b1a2dSKevin Wolf } 29448d3b1a2dSKevin Wolf 29458d3b1a2dSKevin Wolf return qiov->size; 29468d3b1a2dSKevin Wolf } 29478d3b1a2dSKevin Wolf 29488d3b1a2dSKevin Wolf int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 29498407d5d7SKevin Wolf const void *buf, int bytes) 29508d3b1a2dSKevin Wolf { 29518d3b1a2dSKevin Wolf QEMUIOVector qiov; 29528d3b1a2dSKevin Wolf struct iovec iov = { 29538d3b1a2dSKevin Wolf .iov_base = (void *) buf, 29548407d5d7SKevin Wolf .iov_len = bytes, 29558d3b1a2dSKevin Wolf }; 29568d3b1a2dSKevin Wolf 29578407d5d7SKevin Wolf if (bytes < 0) { 29588407d5d7SKevin Wolf return -EINVAL; 29598407d5d7SKevin Wolf } 29608407d5d7SKevin Wolf 29618d3b1a2dSKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 29628d3b1a2dSKevin Wolf return bdrv_pwritev(bs, offset, &qiov); 296383f64091Sbellard } 296483f64091Sbellard 2965f08145feSKevin Wolf /* 2966f08145feSKevin Wolf * Writes to the file and ensures that no writes are reordered across this 2967f08145feSKevin Wolf * request (acts as a barrier) 2968f08145feSKevin Wolf * 2969f08145feSKevin Wolf * Returns 0 on success, -errno in error cases. 2970f08145feSKevin Wolf */ 2971f08145feSKevin Wolf int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, 2972f08145feSKevin Wolf const void *buf, int count) 2973f08145feSKevin Wolf { 2974f08145feSKevin Wolf int ret; 2975f08145feSKevin Wolf 2976f08145feSKevin Wolf ret = bdrv_pwrite(bs, offset, buf, count); 2977f08145feSKevin Wolf if (ret < 0) { 2978f08145feSKevin Wolf return ret; 2979f08145feSKevin Wolf } 2980f08145feSKevin Wolf 2981f05fa4adSPaolo Bonzini /* No flush needed for cache modes that already do it */ 2982f05fa4adSPaolo Bonzini if (bs->enable_write_cache) { 2983f08145feSKevin Wolf bdrv_flush(bs); 2984f08145feSKevin Wolf } 2985f08145feSKevin Wolf 2986f08145feSKevin Wolf return 0; 2987f08145feSKevin Wolf } 2988f08145feSKevin Wolf 2989470c0504SStefan Hajnoczi static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, 2990ab185921SStefan Hajnoczi int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 2991ab185921SStefan Hajnoczi { 2992ab185921SStefan Hajnoczi /* Perform I/O through a temporary buffer so that users who scribble over 2993ab185921SStefan Hajnoczi * their read buffer while the operation is in progress do not end up 2994ab185921SStefan Hajnoczi * modifying the image file. This is critical for zero-copy guest I/O 2995ab185921SStefan Hajnoczi * where anything might happen inside guest memory. 2996ab185921SStefan Hajnoczi */ 2997ab185921SStefan Hajnoczi void *bounce_buffer; 2998ab185921SStefan Hajnoczi 299979c053bdSStefan Hajnoczi BlockDriver *drv = bs->drv; 3000ab185921SStefan Hajnoczi struct iovec iov; 3001ab185921SStefan Hajnoczi QEMUIOVector bounce_qiov; 3002ab185921SStefan Hajnoczi int64_t cluster_sector_num; 3003ab185921SStefan Hajnoczi int cluster_nb_sectors; 3004ab185921SStefan Hajnoczi size_t skip_bytes; 3005ab185921SStefan Hajnoczi int ret; 3006ab185921SStefan Hajnoczi 3007ab185921SStefan Hajnoczi /* Cover entire cluster so no additional backing file I/O is required when 3008ab185921SStefan Hajnoczi * allocating cluster in the image file. 3009ab185921SStefan Hajnoczi */ 3010343bded4SPaolo Bonzini bdrv_round_to_clusters(bs, sector_num, nb_sectors, 3011ab185921SStefan Hajnoczi &cluster_sector_num, &cluster_nb_sectors); 3012ab185921SStefan Hajnoczi 3013470c0504SStefan Hajnoczi trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, 3014ab185921SStefan Hajnoczi cluster_sector_num, cluster_nb_sectors); 3015ab185921SStefan Hajnoczi 3016ab185921SStefan Hajnoczi iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE; 3017857d4f46SKevin Wolf iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); 3018857d4f46SKevin Wolf if (bounce_buffer == NULL) { 3019857d4f46SKevin Wolf ret = -ENOMEM; 3020857d4f46SKevin Wolf goto err; 3021857d4f46SKevin Wolf } 3022857d4f46SKevin Wolf 3023ab185921SStefan Hajnoczi qemu_iovec_init_external(&bounce_qiov, &iov, 1); 3024ab185921SStefan Hajnoczi 302579c053bdSStefan Hajnoczi ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors, 3026ab185921SStefan Hajnoczi &bounce_qiov); 3027ab185921SStefan Hajnoczi if (ret < 0) { 3028ab185921SStefan Hajnoczi goto err; 3029ab185921SStefan Hajnoczi } 3030ab185921SStefan Hajnoczi 303179c053bdSStefan Hajnoczi if (drv->bdrv_co_write_zeroes && 303279c053bdSStefan Hajnoczi buffer_is_zero(bounce_buffer, iov.iov_len)) { 3033621f0589SKevin Wolf ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num, 3034aa7bfbffSPeter Lieven cluster_nb_sectors, 0); 303579c053bdSStefan Hajnoczi } else { 3036f05fa4adSPaolo Bonzini /* This does not change the data on the disk, it is not necessary 3037f05fa4adSPaolo Bonzini * to flush even in cache=writethrough mode. 3038f05fa4adSPaolo Bonzini */ 303979c053bdSStefan Hajnoczi ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors, 3040ab185921SStefan Hajnoczi &bounce_qiov); 304179c053bdSStefan Hajnoczi } 304279c053bdSStefan Hajnoczi 3043ab185921SStefan Hajnoczi if (ret < 0) { 3044ab185921SStefan Hajnoczi /* It might be okay to ignore write errors for guest requests. If this 3045ab185921SStefan Hajnoczi * is a deliberate copy-on-read then we don't want to ignore the error. 3046ab185921SStefan Hajnoczi * Simply report it in all cases. 3047ab185921SStefan Hajnoczi */ 3048ab185921SStefan Hajnoczi goto err; 3049ab185921SStefan Hajnoczi } 3050ab185921SStefan Hajnoczi 3051ab185921SStefan Hajnoczi skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; 305203396148SMichael Tokarev qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, 3053ab185921SStefan Hajnoczi nb_sectors * BDRV_SECTOR_SIZE); 3054ab185921SStefan Hajnoczi 3055ab185921SStefan Hajnoczi err: 3056ab185921SStefan Hajnoczi qemu_vfree(bounce_buffer); 3057ab185921SStefan Hajnoczi return ret; 3058ab185921SStefan Hajnoczi } 3059ab185921SStefan Hajnoczi 3060c5fbe571SStefan Hajnoczi /* 3061d0c7f642SKevin Wolf * Forwards an already correctly aligned request to the BlockDriver. This 3062d0c7f642SKevin Wolf * handles copy on read and zeroing after EOF; any other features must be 3063d0c7f642SKevin Wolf * implemented by the caller. 3064c5fbe571SStefan Hajnoczi */ 3065d0c7f642SKevin Wolf static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, 306665afd211SKevin Wolf BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, 3067ec746e10SKevin Wolf int64_t align, QEMUIOVector *qiov, int flags) 3068da1fa91dSKevin Wolf { 3069da1fa91dSKevin Wolf BlockDriver *drv = bs->drv; 3070dbffbdcfSStefan Hajnoczi int ret; 3071da1fa91dSKevin Wolf 3072d0c7f642SKevin Wolf int64_t sector_num = offset >> BDRV_SECTOR_BITS; 3073d0c7f642SKevin Wolf unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; 3074da1fa91dSKevin Wolf 3075d0c7f642SKevin Wolf assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 3076d0c7f642SKevin Wolf assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 30778eb029c2SKevin Wolf assert(!qiov || bytes == qiov->size); 3078d0c7f642SKevin Wolf 3079d0c7f642SKevin Wolf /* Handle Copy on Read and associated serialisation */ 3080470c0504SStefan Hajnoczi if (flags & BDRV_REQ_COPY_ON_READ) { 30817327145fSKevin Wolf /* If we touch the same cluster it counts as an overlap. This 30827327145fSKevin Wolf * guarantees that allocating writes will be serialized and not race 30837327145fSKevin Wolf * with each other for the same cluster. For example, in copy-on-read 30847327145fSKevin Wolf * it ensures that the CoR read and write operations are atomic and 30857327145fSKevin Wolf * guest writes cannot interleave between them. */ 30867327145fSKevin Wolf mark_request_serialising(req, bdrv_get_cluster_size(bs)); 3087470c0504SStefan Hajnoczi } 3088470c0504SStefan Hajnoczi 30892dbafdc0SKevin Wolf wait_serialising_requests(req); 3090f4658285SStefan Hajnoczi 3091470c0504SStefan Hajnoczi if (flags & BDRV_REQ_COPY_ON_READ) { 3092ab185921SStefan Hajnoczi int pnum; 3093ab185921SStefan Hajnoczi 3094bdad13b9SPaolo Bonzini ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum); 3095ab185921SStefan Hajnoczi if (ret < 0) { 3096ab185921SStefan Hajnoczi goto out; 3097ab185921SStefan Hajnoczi } 3098ab185921SStefan Hajnoczi 3099ab185921SStefan Hajnoczi if (!ret || pnum != nb_sectors) { 3100470c0504SStefan Hajnoczi ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov); 3101ab185921SStefan Hajnoczi goto out; 3102ab185921SStefan Hajnoczi } 3103ab185921SStefan Hajnoczi } 3104ab185921SStefan Hajnoczi 3105d0c7f642SKevin Wolf /* Forward the request to the BlockDriver */ 3106c0191e76SMax Reitz if (!bs->zero_beyond_eof) { 3107dbffbdcfSStefan Hajnoczi ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 3108893a8f62SMORITA Kazutaka } else { 3109c0191e76SMax Reitz /* Read zeros after EOF */ 31104049082cSMarkus Armbruster int64_t total_sectors, max_nb_sectors; 3111893a8f62SMORITA Kazutaka 31124049082cSMarkus Armbruster total_sectors = bdrv_nb_sectors(bs); 31134049082cSMarkus Armbruster if (total_sectors < 0) { 31144049082cSMarkus Armbruster ret = total_sectors; 3115893a8f62SMORITA Kazutaka goto out; 3116893a8f62SMORITA Kazutaka } 3117893a8f62SMORITA Kazutaka 31185f5bcd80SKevin Wolf max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), 31195f5bcd80SKevin Wolf align >> BDRV_SECTOR_BITS); 3120e012b78cSPaolo Bonzini if (nb_sectors < max_nb_sectors) { 3121e012b78cSPaolo Bonzini ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 3122e012b78cSPaolo Bonzini } else if (max_nb_sectors > 0) { 312333f461e0SKevin Wolf QEMUIOVector local_qiov; 312433f461e0SKevin Wolf 312533f461e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov); 312633f461e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, 3127e012b78cSPaolo Bonzini max_nb_sectors * BDRV_SECTOR_SIZE); 312833f461e0SKevin Wolf 3129e012b78cSPaolo Bonzini ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors, 313033f461e0SKevin Wolf &local_qiov); 313133f461e0SKevin Wolf 313233f461e0SKevin Wolf qemu_iovec_destroy(&local_qiov); 3133893a8f62SMORITA Kazutaka } else { 3134893a8f62SMORITA Kazutaka ret = 0; 3135893a8f62SMORITA Kazutaka } 3136893a8f62SMORITA Kazutaka 3137893a8f62SMORITA Kazutaka /* Reading beyond end of file is supposed to produce zeroes */ 3138893a8f62SMORITA Kazutaka if (ret == 0 && total_sectors < sector_num + nb_sectors) { 3139893a8f62SMORITA Kazutaka uint64_t offset = MAX(0, total_sectors - sector_num); 3140893a8f62SMORITA Kazutaka uint64_t bytes = (sector_num + nb_sectors - offset) * 3141893a8f62SMORITA Kazutaka BDRV_SECTOR_SIZE; 3142893a8f62SMORITA Kazutaka qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes); 3143893a8f62SMORITA Kazutaka } 3144893a8f62SMORITA Kazutaka } 3145ab185921SStefan Hajnoczi 3146ab185921SStefan Hajnoczi out: 3147dbffbdcfSStefan Hajnoczi return ret; 3148da1fa91dSKevin Wolf } 3149da1fa91dSKevin Wolf 3150fc3959e4SFam Zheng static inline uint64_t bdrv_get_align(BlockDriverState *bs) 3151fc3959e4SFam Zheng { 3152fc3959e4SFam Zheng /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ 3153fc3959e4SFam Zheng return MAX(BDRV_SECTOR_SIZE, bs->request_alignment); 3154fc3959e4SFam Zheng } 3155fc3959e4SFam Zheng 3156fc3959e4SFam Zheng static inline bool bdrv_req_is_aligned(BlockDriverState *bs, 3157fc3959e4SFam Zheng int64_t offset, size_t bytes) 3158fc3959e4SFam Zheng { 3159fc3959e4SFam Zheng int64_t align = bdrv_get_align(bs); 3160fc3959e4SFam Zheng return !(offset & (align - 1) || (bytes & (align - 1))); 3161fc3959e4SFam Zheng } 3162fc3959e4SFam Zheng 3163d0c7f642SKevin Wolf /* 3164d0c7f642SKevin Wolf * Handle a read request in coroutine context 3165d0c7f642SKevin Wolf */ 31661b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, 31671b0288aeSKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 3168d0c7f642SKevin Wolf BdrvRequestFlags flags) 3169d0c7f642SKevin Wolf { 3170d0c7f642SKevin Wolf BlockDriver *drv = bs->drv; 317165afd211SKevin Wolf BdrvTrackedRequest req; 317265afd211SKevin Wolf 3173fc3959e4SFam Zheng uint64_t align = bdrv_get_align(bs); 31741b0288aeSKevin Wolf uint8_t *head_buf = NULL; 31751b0288aeSKevin Wolf uint8_t *tail_buf = NULL; 31761b0288aeSKevin Wolf QEMUIOVector local_qiov; 31771b0288aeSKevin Wolf bool use_local_qiov = false; 3178d0c7f642SKevin Wolf int ret; 3179d0c7f642SKevin Wolf 3180d0c7f642SKevin Wolf if (!drv) { 3181d0c7f642SKevin Wolf return -ENOMEDIUM; 3182d0c7f642SKevin Wolf } 3183b9c64947SMax Reitz 3184b9c64947SMax Reitz ret = bdrv_check_byte_request(bs, offset, bytes); 3185b9c64947SMax Reitz if (ret < 0) { 3186b9c64947SMax Reitz return ret; 3187d0c7f642SKevin Wolf } 3188d0c7f642SKevin Wolf 3189d0c7f642SKevin Wolf if (bs->copy_on_read) { 3190d0c7f642SKevin Wolf flags |= BDRV_REQ_COPY_ON_READ; 3191d0c7f642SKevin Wolf } 3192d0c7f642SKevin Wolf 3193d0c7f642SKevin Wolf /* throttling disk I/O */ 3194d0c7f642SKevin Wolf if (bs->io_limits_enabled) { 3195d5103588SKevin Wolf bdrv_io_limits_intercept(bs, bytes, false); 3196d0c7f642SKevin Wolf } 3197d0c7f642SKevin Wolf 31981b0288aeSKevin Wolf /* Align read if necessary by padding qiov */ 31991b0288aeSKevin Wolf if (offset & (align - 1)) { 32001b0288aeSKevin Wolf head_buf = qemu_blockalign(bs, align); 32011b0288aeSKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 2); 32021b0288aeSKevin Wolf qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); 32031b0288aeSKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 32041b0288aeSKevin Wolf use_local_qiov = true; 32051b0288aeSKevin Wolf 32061b0288aeSKevin Wolf bytes += offset & (align - 1); 32071b0288aeSKevin Wolf offset = offset & ~(align - 1); 32081b0288aeSKevin Wolf } 32091b0288aeSKevin Wolf 32101b0288aeSKevin Wolf if ((offset + bytes) & (align - 1)) { 32111b0288aeSKevin Wolf if (!use_local_qiov) { 32121b0288aeSKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 1); 32131b0288aeSKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 32141b0288aeSKevin Wolf use_local_qiov = true; 32151b0288aeSKevin Wolf } 32161b0288aeSKevin Wolf tail_buf = qemu_blockalign(bs, align); 32171b0288aeSKevin Wolf qemu_iovec_add(&local_qiov, tail_buf, 32181b0288aeSKevin Wolf align - ((offset + bytes) & (align - 1))); 32191b0288aeSKevin Wolf 32201b0288aeSKevin Wolf bytes = ROUND_UP(bytes, align); 32211b0288aeSKevin Wolf } 32221b0288aeSKevin Wolf 322365afd211SKevin Wolf tracked_request_begin(&req, bs, offset, bytes, false); 3224ec746e10SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, 32251b0288aeSKevin Wolf use_local_qiov ? &local_qiov : qiov, 32261b0288aeSKevin Wolf flags); 322765afd211SKevin Wolf tracked_request_end(&req); 32281b0288aeSKevin Wolf 32291b0288aeSKevin Wolf if (use_local_qiov) { 32301b0288aeSKevin Wolf qemu_iovec_destroy(&local_qiov); 32311b0288aeSKevin Wolf qemu_vfree(head_buf); 32321b0288aeSKevin Wolf qemu_vfree(tail_buf); 32331b0288aeSKevin Wolf } 32341b0288aeSKevin Wolf 3235d0c7f642SKevin Wolf return ret; 3236d0c7f642SKevin Wolf } 3237d0c7f642SKevin Wolf 32381b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, 32391b0288aeSKevin Wolf int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 32401b0288aeSKevin Wolf BdrvRequestFlags flags) 32411b0288aeSKevin Wolf { 324275af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 32431b0288aeSKevin Wolf return -EINVAL; 32441b0288aeSKevin Wolf } 32451b0288aeSKevin Wolf 32461b0288aeSKevin Wolf return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS, 32471b0288aeSKevin Wolf nb_sectors << BDRV_SECTOR_BITS, qiov, flags); 32481b0288aeSKevin Wolf } 32491b0288aeSKevin Wolf 3250c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, 3251da1fa91dSKevin Wolf int nb_sectors, QEMUIOVector *qiov) 3252da1fa91dSKevin Wolf { 3253c5fbe571SStefan Hajnoczi trace_bdrv_co_readv(bs, sector_num, nb_sectors); 3254da1fa91dSKevin Wolf 3255470c0504SStefan Hajnoczi return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); 3256470c0504SStefan Hajnoczi } 3257470c0504SStefan Hajnoczi 3258470c0504SStefan Hajnoczi int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, 3259470c0504SStefan Hajnoczi int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 3260470c0504SStefan Hajnoczi { 3261470c0504SStefan Hajnoczi trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors); 3262470c0504SStefan Hajnoczi 3263470c0504SStefan Hajnoczi return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 3264470c0504SStefan Hajnoczi BDRV_REQ_COPY_ON_READ); 3265c5fbe571SStefan Hajnoczi } 3266c5fbe571SStefan Hajnoczi 326798764152SPeter Lieven #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768 3268c31cb707SPeter Lieven 3269f08f2ddaSStefan Hajnoczi static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, 3270aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) 3271f08f2ddaSStefan Hajnoczi { 3272f08f2ddaSStefan Hajnoczi BlockDriver *drv = bs->drv; 3273f08f2ddaSStefan Hajnoczi QEMUIOVector qiov; 3274c31cb707SPeter Lieven struct iovec iov = {0}; 3275c31cb707SPeter Lieven int ret = 0; 3276f08f2ddaSStefan Hajnoczi 327775af1f34SPeter Lieven int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes, 327875af1f34SPeter Lieven BDRV_REQUEST_MAX_SECTORS); 3279621f0589SKevin Wolf 3280c31cb707SPeter Lieven while (nb_sectors > 0 && !ret) { 3281c31cb707SPeter Lieven int num = nb_sectors; 3282c31cb707SPeter Lieven 3283b8d71c09SPaolo Bonzini /* Align request. Block drivers can expect the "bulk" of the request 3284b8d71c09SPaolo Bonzini * to be aligned. 3285b8d71c09SPaolo Bonzini */ 3286b8d71c09SPaolo Bonzini if (bs->bl.write_zeroes_alignment 3287b8d71c09SPaolo Bonzini && num > bs->bl.write_zeroes_alignment) { 3288b8d71c09SPaolo Bonzini if (sector_num % bs->bl.write_zeroes_alignment != 0) { 3289b8d71c09SPaolo Bonzini /* Make a small request up to the first aligned sector. */ 3290c31cb707SPeter Lieven num = bs->bl.write_zeroes_alignment; 3291c31cb707SPeter Lieven num -= sector_num % bs->bl.write_zeroes_alignment; 3292b8d71c09SPaolo Bonzini } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) { 3293b8d71c09SPaolo Bonzini /* Shorten the request to the last aligned sector. num cannot 3294b8d71c09SPaolo Bonzini * underflow because num > bs->bl.write_zeroes_alignment. 3295b8d71c09SPaolo Bonzini */ 3296b8d71c09SPaolo Bonzini num -= (sector_num + num) % bs->bl.write_zeroes_alignment; 3297b8d71c09SPaolo Bonzini } 3298c31cb707SPeter Lieven } 3299c31cb707SPeter Lieven 3300c31cb707SPeter Lieven /* limit request size */ 3301c31cb707SPeter Lieven if (num > max_write_zeroes) { 3302c31cb707SPeter Lieven num = max_write_zeroes; 3303c31cb707SPeter Lieven } 3304c31cb707SPeter Lieven 3305c31cb707SPeter Lieven ret = -ENOTSUP; 3306f08f2ddaSStefan Hajnoczi /* First try the efficient write zeroes operation */ 3307f08f2ddaSStefan Hajnoczi if (drv->bdrv_co_write_zeroes) { 3308c31cb707SPeter Lieven ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags); 3309f08f2ddaSStefan Hajnoczi } 3310f08f2ddaSStefan Hajnoczi 3311c31cb707SPeter Lieven if (ret == -ENOTSUP) { 3312f08f2ddaSStefan Hajnoczi /* Fall back to bounce buffer if write zeroes is unsupported */ 3313095e4fa4SPeter Lieven int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length, 331498764152SPeter Lieven MAX_WRITE_ZEROES_BOUNCE_BUFFER); 3315095e4fa4SPeter Lieven num = MIN(num, max_xfer_len); 3316c31cb707SPeter Lieven iov.iov_len = num * BDRV_SECTOR_SIZE; 3317c31cb707SPeter Lieven if (iov.iov_base == NULL) { 3318857d4f46SKevin Wolf iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE); 3319857d4f46SKevin Wolf if (iov.iov_base == NULL) { 3320857d4f46SKevin Wolf ret = -ENOMEM; 3321857d4f46SKevin Wolf goto fail; 3322857d4f46SKevin Wolf } 3323b8d71c09SPaolo Bonzini memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE); 3324c31cb707SPeter Lieven } 3325f08f2ddaSStefan Hajnoczi qemu_iovec_init_external(&qiov, &iov, 1); 3326f08f2ddaSStefan Hajnoczi 3327c31cb707SPeter Lieven ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov); 3328b8d71c09SPaolo Bonzini 3329b8d71c09SPaolo Bonzini /* Keep bounce buffer around if it is big enough for all 3330b8d71c09SPaolo Bonzini * all future requests. 3331b8d71c09SPaolo Bonzini */ 3332095e4fa4SPeter Lieven if (num < max_xfer_len) { 3333b8d71c09SPaolo Bonzini qemu_vfree(iov.iov_base); 3334b8d71c09SPaolo Bonzini iov.iov_base = NULL; 3335b8d71c09SPaolo Bonzini } 3336c31cb707SPeter Lieven } 3337c31cb707SPeter Lieven 3338c31cb707SPeter Lieven sector_num += num; 3339c31cb707SPeter Lieven nb_sectors -= num; 3340c31cb707SPeter Lieven } 3341f08f2ddaSStefan Hajnoczi 3342857d4f46SKevin Wolf fail: 3343f08f2ddaSStefan Hajnoczi qemu_vfree(iov.iov_base); 3344f08f2ddaSStefan Hajnoczi return ret; 3345f08f2ddaSStefan Hajnoczi } 3346f08f2ddaSStefan Hajnoczi 3347c5fbe571SStefan Hajnoczi /* 3348b404f720SKevin Wolf * Forwards an already correctly aligned write request to the BlockDriver. 3349c5fbe571SStefan Hajnoczi */ 3350b404f720SKevin Wolf static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, 335165afd211SKevin Wolf BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, 335265afd211SKevin Wolf QEMUIOVector *qiov, int flags) 3353c5fbe571SStefan Hajnoczi { 3354c5fbe571SStefan Hajnoczi BlockDriver *drv = bs->drv; 335528de2dcdSKevin Wolf bool waited; 33566b7cb247SStefan Hajnoczi int ret; 3357da1fa91dSKevin Wolf 3358b404f720SKevin Wolf int64_t sector_num = offset >> BDRV_SECTOR_BITS; 3359b404f720SKevin Wolf unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; 3360da1fa91dSKevin Wolf 3361b404f720SKevin Wolf assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 3362b404f720SKevin Wolf assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 33638eb029c2SKevin Wolf assert(!qiov || bytes == qiov->size); 3364cc0681c4SBenoît Canet 336528de2dcdSKevin Wolf waited = wait_serialising_requests(req); 336628de2dcdSKevin Wolf assert(!waited || !req->serialising); 3367af91f9a7SKevin Wolf assert(req->overlap_offset <= offset); 3368af91f9a7SKevin Wolf assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); 3369244eadefSKevin Wolf 337065afd211SKevin Wolf ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); 3371d616b224SStefan Hajnoczi 3372465bee1dSPeter Lieven if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && 3373465bee1dSPeter Lieven !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes && 3374465bee1dSPeter Lieven qemu_iovec_is_zero(qiov)) { 3375465bee1dSPeter Lieven flags |= BDRV_REQ_ZERO_WRITE; 3376465bee1dSPeter Lieven if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { 3377465bee1dSPeter Lieven flags |= BDRV_REQ_MAY_UNMAP; 3378465bee1dSPeter Lieven } 3379465bee1dSPeter Lieven } 3380465bee1dSPeter Lieven 3381d616b224SStefan Hajnoczi if (ret < 0) { 3382d616b224SStefan Hajnoczi /* Do nothing, write notifier decided to fail this request */ 3383d616b224SStefan Hajnoczi } else if (flags & BDRV_REQ_ZERO_WRITE) { 33849e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO); 3385aa7bfbffSPeter Lieven ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); 3386f08f2ddaSStefan Hajnoczi } else { 33879e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV); 33886b7cb247SStefan Hajnoczi ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); 3389f08f2ddaSStefan Hajnoczi } 33909e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE); 33916b7cb247SStefan Hajnoczi 3392f05fa4adSPaolo Bonzini if (ret == 0 && !bs->enable_write_cache) { 3393f05fa4adSPaolo Bonzini ret = bdrv_co_flush(bs); 3394f05fa4adSPaolo Bonzini } 3395f05fa4adSPaolo Bonzini 33961755da16SPaolo Bonzini bdrv_set_dirty(bs, sector_num, nb_sectors); 3397da1fa91dSKevin Wolf 33985366d0c8SBenoît Canet block_acct_highest_sector(&bs->stats, sector_num, nb_sectors); 33995e5a94b6SBenoît Canet 3400c0191e76SMax Reitz if (ret >= 0) { 3401df2a6f29SPaolo Bonzini bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); 3402df2a6f29SPaolo Bonzini } 3403da1fa91dSKevin Wolf 34046b7cb247SStefan Hajnoczi return ret; 3405da1fa91dSKevin Wolf } 3406da1fa91dSKevin Wolf 3407b404f720SKevin Wolf /* 3408b404f720SKevin Wolf * Handle a write request in coroutine context 3409b404f720SKevin Wolf */ 34106601553eSKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, 34116601553eSKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 3412b404f720SKevin Wolf BdrvRequestFlags flags) 3413b404f720SKevin Wolf { 341465afd211SKevin Wolf BdrvTrackedRequest req; 3415fc3959e4SFam Zheng uint64_t align = bdrv_get_align(bs); 34163b8242e0SKevin Wolf uint8_t *head_buf = NULL; 34173b8242e0SKevin Wolf uint8_t *tail_buf = NULL; 34183b8242e0SKevin Wolf QEMUIOVector local_qiov; 34193b8242e0SKevin Wolf bool use_local_qiov = false; 3420b404f720SKevin Wolf int ret; 3421b404f720SKevin Wolf 3422b404f720SKevin Wolf if (!bs->drv) { 3423b404f720SKevin Wolf return -ENOMEDIUM; 3424b404f720SKevin Wolf } 3425b404f720SKevin Wolf if (bs->read_only) { 3426b404f720SKevin Wolf return -EACCES; 3427b404f720SKevin Wolf } 3428b9c64947SMax Reitz 3429b9c64947SMax Reitz ret = bdrv_check_byte_request(bs, offset, bytes); 3430b9c64947SMax Reitz if (ret < 0) { 3431b9c64947SMax Reitz return ret; 3432b404f720SKevin Wolf } 3433b404f720SKevin Wolf 3434b404f720SKevin Wolf /* throttling disk I/O */ 3435b404f720SKevin Wolf if (bs->io_limits_enabled) { 3436d5103588SKevin Wolf bdrv_io_limits_intercept(bs, bytes, true); 3437b404f720SKevin Wolf } 3438b404f720SKevin Wolf 34393b8242e0SKevin Wolf /* 34403b8242e0SKevin Wolf * Align write if necessary by performing a read-modify-write cycle. 34413b8242e0SKevin Wolf * Pad qiov with the read parts and be sure to have a tracked request not 34423b8242e0SKevin Wolf * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle. 34433b8242e0SKevin Wolf */ 344465afd211SKevin Wolf tracked_request_begin(&req, bs, offset, bytes, true); 34453b8242e0SKevin Wolf 34463b8242e0SKevin Wolf if (offset & (align - 1)) { 34473b8242e0SKevin Wolf QEMUIOVector head_qiov; 34483b8242e0SKevin Wolf struct iovec head_iov; 34493b8242e0SKevin Wolf 34503b8242e0SKevin Wolf mark_request_serialising(&req, align); 34513b8242e0SKevin Wolf wait_serialising_requests(&req); 34523b8242e0SKevin Wolf 34533b8242e0SKevin Wolf head_buf = qemu_blockalign(bs, align); 34543b8242e0SKevin Wolf head_iov = (struct iovec) { 34553b8242e0SKevin Wolf .iov_base = head_buf, 34563b8242e0SKevin Wolf .iov_len = align, 34573b8242e0SKevin Wolf }; 34583b8242e0SKevin Wolf qemu_iovec_init_external(&head_qiov, &head_iov, 1); 34593b8242e0SKevin Wolf 34609e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); 34613b8242e0SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, 34623b8242e0SKevin Wolf align, &head_qiov, 0); 34633b8242e0SKevin Wolf if (ret < 0) { 34643b8242e0SKevin Wolf goto fail; 34653b8242e0SKevin Wolf } 34669e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); 34673b8242e0SKevin Wolf 34683b8242e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 2); 34693b8242e0SKevin Wolf qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); 34703b8242e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 34713b8242e0SKevin Wolf use_local_qiov = true; 34723b8242e0SKevin Wolf 34733b8242e0SKevin Wolf bytes += offset & (align - 1); 34743b8242e0SKevin Wolf offset = offset & ~(align - 1); 34753b8242e0SKevin Wolf } 34763b8242e0SKevin Wolf 34773b8242e0SKevin Wolf if ((offset + bytes) & (align - 1)) { 34783b8242e0SKevin Wolf QEMUIOVector tail_qiov; 34793b8242e0SKevin Wolf struct iovec tail_iov; 34803b8242e0SKevin Wolf size_t tail_bytes; 348128de2dcdSKevin Wolf bool waited; 34823b8242e0SKevin Wolf 34833b8242e0SKevin Wolf mark_request_serialising(&req, align); 348428de2dcdSKevin Wolf waited = wait_serialising_requests(&req); 348528de2dcdSKevin Wolf assert(!waited || !use_local_qiov); 34863b8242e0SKevin Wolf 34873b8242e0SKevin Wolf tail_buf = qemu_blockalign(bs, align); 34883b8242e0SKevin Wolf tail_iov = (struct iovec) { 34893b8242e0SKevin Wolf .iov_base = tail_buf, 34903b8242e0SKevin Wolf .iov_len = align, 34913b8242e0SKevin Wolf }; 34923b8242e0SKevin Wolf qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); 34933b8242e0SKevin Wolf 34949e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); 34953b8242e0SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, 34963b8242e0SKevin Wolf align, &tail_qiov, 0); 34973b8242e0SKevin Wolf if (ret < 0) { 34983b8242e0SKevin Wolf goto fail; 34993b8242e0SKevin Wolf } 35009e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); 35013b8242e0SKevin Wolf 35023b8242e0SKevin Wolf if (!use_local_qiov) { 35033b8242e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 1); 35043b8242e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 35053b8242e0SKevin Wolf use_local_qiov = true; 35063b8242e0SKevin Wolf } 35073b8242e0SKevin Wolf 35083b8242e0SKevin Wolf tail_bytes = (offset + bytes) & (align - 1); 35093b8242e0SKevin Wolf qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); 35103b8242e0SKevin Wolf 35113b8242e0SKevin Wolf bytes = ROUND_UP(bytes, align); 35123b8242e0SKevin Wolf } 35133b8242e0SKevin Wolf 3514fc3959e4SFam Zheng if (use_local_qiov) { 3515fc3959e4SFam Zheng /* Local buffer may have non-zero data. */ 3516fc3959e4SFam Zheng flags &= ~BDRV_REQ_ZERO_WRITE; 3517fc3959e4SFam Zheng } 35183b8242e0SKevin Wolf ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, 35193b8242e0SKevin Wolf use_local_qiov ? &local_qiov : qiov, 35203b8242e0SKevin Wolf flags); 35213b8242e0SKevin Wolf 35223b8242e0SKevin Wolf fail: 352365afd211SKevin Wolf tracked_request_end(&req); 3524b404f720SKevin Wolf 35253b8242e0SKevin Wolf if (use_local_qiov) { 35263b8242e0SKevin Wolf qemu_iovec_destroy(&local_qiov); 352799c4a85cSKevin Wolf } 35283b8242e0SKevin Wolf qemu_vfree(head_buf); 35293b8242e0SKevin Wolf qemu_vfree(tail_buf); 35303b8242e0SKevin Wolf 3531b404f720SKevin Wolf return ret; 3532b404f720SKevin Wolf } 3533b404f720SKevin Wolf 35346601553eSKevin Wolf static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, 35356601553eSKevin Wolf int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 35366601553eSKevin Wolf BdrvRequestFlags flags) 35376601553eSKevin Wolf { 353875af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 35396601553eSKevin Wolf return -EINVAL; 35406601553eSKevin Wolf } 35416601553eSKevin Wolf 35426601553eSKevin Wolf return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS, 35436601553eSKevin Wolf nb_sectors << BDRV_SECTOR_BITS, qiov, flags); 35446601553eSKevin Wolf } 35456601553eSKevin Wolf 3546c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, 3547c5fbe571SStefan Hajnoczi int nb_sectors, QEMUIOVector *qiov) 3548c5fbe571SStefan Hajnoczi { 3549c5fbe571SStefan Hajnoczi trace_bdrv_co_writev(bs, sector_num, nb_sectors); 3550c5fbe571SStefan Hajnoczi 3551f08f2ddaSStefan Hajnoczi return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0); 3552f08f2ddaSStefan Hajnoczi } 3553f08f2ddaSStefan Hajnoczi 3554f08f2ddaSStefan Hajnoczi int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, 3555aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, 3556aa7bfbffSPeter Lieven BdrvRequestFlags flags) 3557f08f2ddaSStefan Hajnoczi { 3558fc3959e4SFam Zheng int ret; 3559fc3959e4SFam Zheng 356094d6ff21SPaolo Bonzini trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); 3561f08f2ddaSStefan Hajnoczi 3562d32f35cbSPeter Lieven if (!(bs->open_flags & BDRV_O_UNMAP)) { 3563d32f35cbSPeter Lieven flags &= ~BDRV_REQ_MAY_UNMAP; 3564d32f35cbSPeter Lieven } 3565fc3959e4SFam Zheng if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS, 3566fc3959e4SFam Zheng nb_sectors << BDRV_SECTOR_BITS)) { 3567fc3959e4SFam Zheng ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, 3568aa7bfbffSPeter Lieven BDRV_REQ_ZERO_WRITE | flags); 3569fc3959e4SFam Zheng } else { 3570fc3959e4SFam Zheng uint8_t *buf; 3571fc3959e4SFam Zheng QEMUIOVector local_qiov; 3572fc3959e4SFam Zheng size_t bytes = nb_sectors << BDRV_SECTOR_BITS; 3573fc3959e4SFam Zheng 3574fc3959e4SFam Zheng buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes); 3575fc3959e4SFam Zheng memset(buf, 0, bytes); 3576fc3959e4SFam Zheng qemu_iovec_init(&local_qiov, 1); 3577fc3959e4SFam Zheng qemu_iovec_add(&local_qiov, buf, bytes); 3578fc3959e4SFam Zheng 3579fc3959e4SFam Zheng ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov, 3580fc3959e4SFam Zheng BDRV_REQ_ZERO_WRITE | flags); 3581fc3959e4SFam Zheng qemu_vfree(buf); 3582fc3959e4SFam Zheng } 3583fc3959e4SFam Zheng return ret; 3584c5fbe571SStefan Hajnoczi } 3585c5fbe571SStefan Hajnoczi 358683f64091Sbellard /** 358783f64091Sbellard * Truncate file to 'offset' bytes (needed only for file protocols) 358883f64091Sbellard */ 358983f64091Sbellard int bdrv_truncate(BlockDriverState *bs, int64_t offset) 359083f64091Sbellard { 359183f64091Sbellard BlockDriver *drv = bs->drv; 359251762288SStefan Hajnoczi int ret; 359383f64091Sbellard if (!drv) 359419cb3738Sbellard return -ENOMEDIUM; 359583f64091Sbellard if (!drv->bdrv_truncate) 359683f64091Sbellard return -ENOTSUP; 359759f2689dSNaphtali Sprei if (bs->read_only) 359859f2689dSNaphtali Sprei return -EACCES; 35999c75e168SJeff Cody 360051762288SStefan Hajnoczi ret = drv->bdrv_truncate(bs, offset); 360151762288SStefan Hajnoczi if (ret == 0) { 360251762288SStefan Hajnoczi ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 3603a7f53e26SMarkus Armbruster if (bs->blk) { 3604a7f53e26SMarkus Armbruster blk_dev_resize_cb(bs->blk); 3605a7f53e26SMarkus Armbruster } 360651762288SStefan Hajnoczi } 360751762288SStefan Hajnoczi return ret; 360883f64091Sbellard } 360983f64091Sbellard 361083f64091Sbellard /** 36114a1d5e1fSFam Zheng * Length of a allocated file in bytes. Sparse files are counted by actual 36124a1d5e1fSFam Zheng * allocated space. Return < 0 if error or unknown. 36134a1d5e1fSFam Zheng */ 36144a1d5e1fSFam Zheng int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 36154a1d5e1fSFam Zheng { 36164a1d5e1fSFam Zheng BlockDriver *drv = bs->drv; 36174a1d5e1fSFam Zheng if (!drv) { 36184a1d5e1fSFam Zheng return -ENOMEDIUM; 36194a1d5e1fSFam Zheng } 36204a1d5e1fSFam Zheng if (drv->bdrv_get_allocated_file_size) { 36214a1d5e1fSFam Zheng return drv->bdrv_get_allocated_file_size(bs); 36224a1d5e1fSFam Zheng } 36234a1d5e1fSFam Zheng if (bs->file) { 36244a1d5e1fSFam Zheng return bdrv_get_allocated_file_size(bs->file); 36254a1d5e1fSFam Zheng } 36264a1d5e1fSFam Zheng return -ENOTSUP; 36274a1d5e1fSFam Zheng } 36284a1d5e1fSFam Zheng 36294a1d5e1fSFam Zheng /** 363065a9bb25SMarkus Armbruster * Return number of sectors on success, -errno on error. 363183f64091Sbellard */ 363265a9bb25SMarkus Armbruster int64_t bdrv_nb_sectors(BlockDriverState *bs) 363383f64091Sbellard { 363483f64091Sbellard BlockDriver *drv = bs->drv; 363565a9bb25SMarkus Armbruster 363683f64091Sbellard if (!drv) 363719cb3738Sbellard return -ENOMEDIUM; 363851762288SStefan Hajnoczi 3639b94a2610SKevin Wolf if (drv->has_variable_length) { 3640b94a2610SKevin Wolf int ret = refresh_total_sectors(bs, bs->total_sectors); 3641b94a2610SKevin Wolf if (ret < 0) { 3642b94a2610SKevin Wolf return ret; 3643fc01f7e7Sbellard } 364446a4e4e6SStefan Hajnoczi } 364565a9bb25SMarkus Armbruster return bs->total_sectors; 364665a9bb25SMarkus Armbruster } 364765a9bb25SMarkus Armbruster 364865a9bb25SMarkus Armbruster /** 364965a9bb25SMarkus Armbruster * Return length in bytes on success, -errno on error. 365065a9bb25SMarkus Armbruster * The length is always a multiple of BDRV_SECTOR_SIZE. 365165a9bb25SMarkus Armbruster */ 365265a9bb25SMarkus Armbruster int64_t bdrv_getlength(BlockDriverState *bs) 365365a9bb25SMarkus Armbruster { 365465a9bb25SMarkus Armbruster int64_t ret = bdrv_nb_sectors(bs); 365565a9bb25SMarkus Armbruster 365665a9bb25SMarkus Armbruster return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; 365746a4e4e6SStefan Hajnoczi } 3658fc01f7e7Sbellard 365919cb3738Sbellard /* return 0 as number of sectors if no device present or error */ 366096b8f136Sths void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 3661fc01f7e7Sbellard { 366265a9bb25SMarkus Armbruster int64_t nb_sectors = bdrv_nb_sectors(bs); 366365a9bb25SMarkus Armbruster 366465a9bb25SMarkus Armbruster *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 3665fc01f7e7Sbellard } 3666cf98951bSbellard 3667ff06f5f3SPaolo Bonzini void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, 3668ff06f5f3SPaolo Bonzini BlockdevOnError on_write_error) 3669abd7f68dSMarkus Armbruster { 3670abd7f68dSMarkus Armbruster bs->on_read_error = on_read_error; 3671abd7f68dSMarkus Armbruster bs->on_write_error = on_write_error; 3672abd7f68dSMarkus Armbruster } 3673abd7f68dSMarkus Armbruster 36741ceee0d5SPaolo Bonzini BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) 3675abd7f68dSMarkus Armbruster { 3676abd7f68dSMarkus Armbruster return is_read ? bs->on_read_error : bs->on_write_error; 3677abd7f68dSMarkus Armbruster } 3678abd7f68dSMarkus Armbruster 36793e1caa5fSPaolo Bonzini BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) 36803e1caa5fSPaolo Bonzini { 36813e1caa5fSPaolo Bonzini BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; 36823e1caa5fSPaolo Bonzini 36833e1caa5fSPaolo Bonzini switch (on_err) { 36843e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_ENOSPC: 3685a589569fSWenchao Xia return (error == ENOSPC) ? 3686a589569fSWenchao Xia BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 36873e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_STOP: 3688a589569fSWenchao Xia return BLOCK_ERROR_ACTION_STOP; 36893e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_REPORT: 3690a589569fSWenchao Xia return BLOCK_ERROR_ACTION_REPORT; 36913e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_IGNORE: 3692a589569fSWenchao Xia return BLOCK_ERROR_ACTION_IGNORE; 36933e1caa5fSPaolo Bonzini default: 36943e1caa5fSPaolo Bonzini abort(); 36953e1caa5fSPaolo Bonzini } 36963e1caa5fSPaolo Bonzini } 36973e1caa5fSPaolo Bonzini 3698c7c2ff0cSLuiz Capitulino static void send_qmp_error_event(BlockDriverState *bs, 3699c7c2ff0cSLuiz Capitulino BlockErrorAction action, 3700c7c2ff0cSLuiz Capitulino bool is_read, int error) 3701c7c2ff0cSLuiz Capitulino { 3702573742a5SPeter Maydell IoOperationType optype; 3703c7c2ff0cSLuiz Capitulino 3704573742a5SPeter Maydell optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; 3705573742a5SPeter Maydell qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action, 3706c7c2ff0cSLuiz Capitulino bdrv_iostatus_is_enabled(bs), 3707624ff573SLuiz Capitulino error == ENOSPC, strerror(error), 3708624ff573SLuiz Capitulino &error_abort); 3709c7c2ff0cSLuiz Capitulino } 3710c7c2ff0cSLuiz Capitulino 37113e1caa5fSPaolo Bonzini /* This is done by device models because, while the block layer knows 37123e1caa5fSPaolo Bonzini * about the error, it does not know whether an operation comes from 37133e1caa5fSPaolo Bonzini * the device or the block layer (from a job, for example). 37143e1caa5fSPaolo Bonzini */ 37153e1caa5fSPaolo Bonzini void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, 37163e1caa5fSPaolo Bonzini bool is_read, int error) 37173e1caa5fSPaolo Bonzini { 37183e1caa5fSPaolo Bonzini assert(error >= 0); 37192bd3bce8SPaolo Bonzini 3720a589569fSWenchao Xia if (action == BLOCK_ERROR_ACTION_STOP) { 37212bd3bce8SPaolo Bonzini /* First set the iostatus, so that "info block" returns an iostatus 37222bd3bce8SPaolo Bonzini * that matches the events raised so far (an additional error iostatus 37232bd3bce8SPaolo Bonzini * is fine, but not a lost one). 37242bd3bce8SPaolo Bonzini */ 37253e1caa5fSPaolo Bonzini bdrv_iostatus_set_err(bs, error); 37262bd3bce8SPaolo Bonzini 37272bd3bce8SPaolo Bonzini /* Then raise the request to stop the VM and the event. 37282bd3bce8SPaolo Bonzini * qemu_system_vmstop_request_prepare has two effects. First, 37292bd3bce8SPaolo Bonzini * it ensures that the STOP event always comes after the 37302bd3bce8SPaolo Bonzini * BLOCK_IO_ERROR event. Second, it ensures that even if management 37312bd3bce8SPaolo Bonzini * can observe the STOP event and do a "cont" before the STOP 37322bd3bce8SPaolo Bonzini * event is issued, the VM will not stop. In this case, vm_start() 37332bd3bce8SPaolo Bonzini * also ensures that the STOP/RESUME pair of events is emitted. 37342bd3bce8SPaolo Bonzini */ 37352bd3bce8SPaolo Bonzini qemu_system_vmstop_request_prepare(); 3736c7c2ff0cSLuiz Capitulino send_qmp_error_event(bs, action, is_read, error); 37372bd3bce8SPaolo Bonzini qemu_system_vmstop_request(RUN_STATE_IO_ERROR); 37382bd3bce8SPaolo Bonzini } else { 3739c7c2ff0cSLuiz Capitulino send_qmp_error_event(bs, action, is_read, error); 37403e1caa5fSPaolo Bonzini } 37413e1caa5fSPaolo Bonzini } 37423e1caa5fSPaolo Bonzini 3743b338082bSbellard int bdrv_is_read_only(BlockDriverState *bs) 3744b338082bSbellard { 3745b338082bSbellard return bs->read_only; 3746b338082bSbellard } 3747b338082bSbellard 3748985a03b0Sths int bdrv_is_sg(BlockDriverState *bs) 3749985a03b0Sths { 3750985a03b0Sths return bs->sg; 3751985a03b0Sths } 3752985a03b0Sths 3753e900a7b7SChristoph Hellwig int bdrv_enable_write_cache(BlockDriverState *bs) 3754e900a7b7SChristoph Hellwig { 3755e900a7b7SChristoph Hellwig return bs->enable_write_cache; 3756e900a7b7SChristoph Hellwig } 3757e900a7b7SChristoph Hellwig 3758425b0148SPaolo Bonzini void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce) 3759425b0148SPaolo Bonzini { 3760425b0148SPaolo Bonzini bs->enable_write_cache = wce; 376155b110f2SJeff Cody 376255b110f2SJeff Cody /* so a reopen() will preserve wce */ 376355b110f2SJeff Cody if (wce) { 376455b110f2SJeff Cody bs->open_flags |= BDRV_O_CACHE_WB; 376555b110f2SJeff Cody } else { 376655b110f2SJeff Cody bs->open_flags &= ~BDRV_O_CACHE_WB; 376755b110f2SJeff Cody } 3768425b0148SPaolo Bonzini } 3769425b0148SPaolo Bonzini 3770ea2384d3Sbellard int bdrv_is_encrypted(BlockDriverState *bs) 3771ea2384d3Sbellard { 3772ea2384d3Sbellard if (bs->backing_hd && bs->backing_hd->encrypted) 3773ea2384d3Sbellard return 1; 3774ea2384d3Sbellard return bs->encrypted; 3775ea2384d3Sbellard } 3776ea2384d3Sbellard 3777c0f4ce77Saliguori int bdrv_key_required(BlockDriverState *bs) 3778c0f4ce77Saliguori { 3779c0f4ce77Saliguori BlockDriverState *backing_hd = bs->backing_hd; 3780c0f4ce77Saliguori 3781c0f4ce77Saliguori if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 3782c0f4ce77Saliguori return 1; 3783c0f4ce77Saliguori return (bs->encrypted && !bs->valid_key); 3784c0f4ce77Saliguori } 3785c0f4ce77Saliguori 3786ea2384d3Sbellard int bdrv_set_key(BlockDriverState *bs, const char *key) 3787ea2384d3Sbellard { 3788ea2384d3Sbellard int ret; 3789ea2384d3Sbellard if (bs->backing_hd && bs->backing_hd->encrypted) { 3790ea2384d3Sbellard ret = bdrv_set_key(bs->backing_hd, key); 3791ea2384d3Sbellard if (ret < 0) 3792ea2384d3Sbellard return ret; 3793ea2384d3Sbellard if (!bs->encrypted) 3794ea2384d3Sbellard return 0; 3795ea2384d3Sbellard } 3796fd04a2aeSShahar Havivi if (!bs->encrypted) { 3797fd04a2aeSShahar Havivi return -EINVAL; 3798fd04a2aeSShahar Havivi } else if (!bs->drv || !bs->drv->bdrv_set_key) { 3799fd04a2aeSShahar Havivi return -ENOMEDIUM; 3800fd04a2aeSShahar Havivi } 3801c0f4ce77Saliguori ret = bs->drv->bdrv_set_key(bs, key); 3802bb5fc20fSaliguori if (ret < 0) { 3803bb5fc20fSaliguori bs->valid_key = 0; 3804bb5fc20fSaliguori } else if (!bs->valid_key) { 3805bb5fc20fSaliguori bs->valid_key = 1; 3806a7f53e26SMarkus Armbruster if (bs->blk) { 3807bb5fc20fSaliguori /* call the change callback now, we skipped it on open */ 3808a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, true); 3809a7f53e26SMarkus Armbruster } 3810bb5fc20fSaliguori } 3811c0f4ce77Saliguori return ret; 3812ea2384d3Sbellard } 3813ea2384d3Sbellard 38144d2855a3SMarkus Armbruster /* 38154d2855a3SMarkus Armbruster * Provide an encryption key for @bs. 38164d2855a3SMarkus Armbruster * If @key is non-null: 38174d2855a3SMarkus Armbruster * If @bs is not encrypted, fail. 38184d2855a3SMarkus Armbruster * Else if the key is invalid, fail. 38194d2855a3SMarkus Armbruster * Else set @bs's key to @key, replacing the existing key, if any. 38204d2855a3SMarkus Armbruster * If @key is null: 38214d2855a3SMarkus Armbruster * If @bs is encrypted and still lacks a key, fail. 38224d2855a3SMarkus Armbruster * Else do nothing. 38234d2855a3SMarkus Armbruster * On failure, store an error object through @errp if non-null. 38244d2855a3SMarkus Armbruster */ 38254d2855a3SMarkus Armbruster void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) 38264d2855a3SMarkus Armbruster { 38274d2855a3SMarkus Armbruster if (key) { 38284d2855a3SMarkus Armbruster if (!bdrv_is_encrypted(bs)) { 382981e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is not encrypted", 383081e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs)); 38314d2855a3SMarkus Armbruster } else if (bdrv_set_key(bs, key) < 0) { 38324d2855a3SMarkus Armbruster error_set(errp, QERR_INVALID_PASSWORD); 38334d2855a3SMarkus Armbruster } 38344d2855a3SMarkus Armbruster } else { 38354d2855a3SMarkus Armbruster if (bdrv_key_required(bs)) { 3836b1ca6391SMarkus Armbruster error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED, 3837b1ca6391SMarkus Armbruster "'%s' (%s) is encrypted", 383881e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs), 38394d2855a3SMarkus Armbruster bdrv_get_encrypted_filename(bs)); 38404d2855a3SMarkus Armbruster } 38414d2855a3SMarkus Armbruster } 38424d2855a3SMarkus Armbruster } 38434d2855a3SMarkus Armbruster 3844f8d6bba1SMarkus Armbruster const char *bdrv_get_format_name(BlockDriverState *bs) 3845ea2384d3Sbellard { 3846f8d6bba1SMarkus Armbruster return bs->drv ? bs->drv->format_name : NULL; 3847ea2384d3Sbellard } 3848ea2384d3Sbellard 3849ada42401SStefan Hajnoczi static int qsort_strcmp(const void *a, const void *b) 3850ada42401SStefan Hajnoczi { 3851ada42401SStefan Hajnoczi return strcmp(a, b); 3852ada42401SStefan Hajnoczi } 3853ada42401SStefan Hajnoczi 3854ea2384d3Sbellard void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 3855ea2384d3Sbellard void *opaque) 3856ea2384d3Sbellard { 3857ea2384d3Sbellard BlockDriver *drv; 3858e855e4fbSJeff Cody int count = 0; 3859ada42401SStefan Hajnoczi int i; 3860e855e4fbSJeff Cody const char **formats = NULL; 3861ea2384d3Sbellard 38628a22f02aSStefan Hajnoczi QLIST_FOREACH(drv, &bdrv_drivers, list) { 3863e855e4fbSJeff Cody if (drv->format_name) { 3864e855e4fbSJeff Cody bool found = false; 3865e855e4fbSJeff Cody int i = count; 3866e855e4fbSJeff Cody while (formats && i && !found) { 3867e855e4fbSJeff Cody found = !strcmp(formats[--i], drv->format_name); 3868e855e4fbSJeff Cody } 3869e855e4fbSJeff Cody 3870e855e4fbSJeff Cody if (!found) { 38715839e53bSMarkus Armbruster formats = g_renew(const char *, formats, count + 1); 3872e855e4fbSJeff Cody formats[count++] = drv->format_name; 3873ea2384d3Sbellard } 3874ea2384d3Sbellard } 3875e855e4fbSJeff Cody } 3876ada42401SStefan Hajnoczi 3877ada42401SStefan Hajnoczi qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 3878ada42401SStefan Hajnoczi 3879ada42401SStefan Hajnoczi for (i = 0; i < count; i++) { 3880ada42401SStefan Hajnoczi it(opaque, formats[i]); 3881ada42401SStefan Hajnoczi } 3882ada42401SStefan Hajnoczi 3883e855e4fbSJeff Cody g_free(formats); 3884e855e4fbSJeff Cody } 3885ea2384d3Sbellard 3886dc364f4cSBenoît Canet /* This function is to find a node in the bs graph */ 3887dc364f4cSBenoît Canet BlockDriverState *bdrv_find_node(const char *node_name) 3888dc364f4cSBenoît Canet { 3889dc364f4cSBenoît Canet BlockDriverState *bs; 3890dc364f4cSBenoît Canet 3891dc364f4cSBenoît Canet assert(node_name); 3892dc364f4cSBenoît Canet 3893dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 3894dc364f4cSBenoît Canet if (!strcmp(node_name, bs->node_name)) { 3895dc364f4cSBenoît Canet return bs; 3896dc364f4cSBenoît Canet } 3897dc364f4cSBenoît Canet } 3898dc364f4cSBenoît Canet return NULL; 3899dc364f4cSBenoît Canet } 3900dc364f4cSBenoît Canet 3901c13163fbSBenoît Canet /* Put this QMP function here so it can access the static graph_bdrv_states. */ 3902d5a8ee60SAlberto Garcia BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp) 3903c13163fbSBenoît Canet { 3904c13163fbSBenoît Canet BlockDeviceInfoList *list, *entry; 3905c13163fbSBenoît Canet BlockDriverState *bs; 3906c13163fbSBenoît Canet 3907c13163fbSBenoît Canet list = NULL; 3908c13163fbSBenoît Canet QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 3909d5a8ee60SAlberto Garcia BlockDeviceInfo *info = bdrv_block_device_info(bs, errp); 3910d5a8ee60SAlberto Garcia if (!info) { 3911d5a8ee60SAlberto Garcia qapi_free_BlockDeviceInfoList(list); 3912d5a8ee60SAlberto Garcia return NULL; 3913d5a8ee60SAlberto Garcia } 3914c13163fbSBenoît Canet entry = g_malloc0(sizeof(*entry)); 3915d5a8ee60SAlberto Garcia entry->value = info; 3916c13163fbSBenoît Canet entry->next = list; 3917c13163fbSBenoît Canet list = entry; 3918c13163fbSBenoît Canet } 3919c13163fbSBenoît Canet 3920c13163fbSBenoît Canet return list; 3921c13163fbSBenoît Canet } 3922c13163fbSBenoît Canet 392312d3ba82SBenoît Canet BlockDriverState *bdrv_lookup_bs(const char *device, 392412d3ba82SBenoît Canet const char *node_name, 392512d3ba82SBenoît Canet Error **errp) 392612d3ba82SBenoît Canet { 39277f06d47eSMarkus Armbruster BlockBackend *blk; 39287f06d47eSMarkus Armbruster BlockDriverState *bs; 392912d3ba82SBenoît Canet 393012d3ba82SBenoît Canet if (device) { 39317f06d47eSMarkus Armbruster blk = blk_by_name(device); 393212d3ba82SBenoît Canet 39337f06d47eSMarkus Armbruster if (blk) { 39347f06d47eSMarkus Armbruster return blk_bs(blk); 393512d3ba82SBenoît Canet } 3936dd67fa50SBenoît Canet } 393712d3ba82SBenoît Canet 3938dd67fa50SBenoît Canet if (node_name) { 393912d3ba82SBenoît Canet bs = bdrv_find_node(node_name); 394012d3ba82SBenoît Canet 3941dd67fa50SBenoît Canet if (bs) { 3942dd67fa50SBenoît Canet return bs; 3943dd67fa50SBenoît Canet } 394412d3ba82SBenoît Canet } 394512d3ba82SBenoît Canet 3946dd67fa50SBenoît Canet error_setg(errp, "Cannot find device=%s nor node_name=%s", 3947dd67fa50SBenoît Canet device ? device : "", 3948dd67fa50SBenoît Canet node_name ? node_name : ""); 3949dd67fa50SBenoît Canet return NULL; 395012d3ba82SBenoît Canet } 395112d3ba82SBenoît Canet 39525a6684d2SJeff Cody /* If 'base' is in the same chain as 'top', return true. Otherwise, 39535a6684d2SJeff Cody * return false. If either argument is NULL, return false. */ 39545a6684d2SJeff Cody bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 39555a6684d2SJeff Cody { 39565a6684d2SJeff Cody while (top && top != base) { 39575a6684d2SJeff Cody top = top->backing_hd; 39585a6684d2SJeff Cody } 39595a6684d2SJeff Cody 39605a6684d2SJeff Cody return top != NULL; 39615a6684d2SJeff Cody } 39625a6684d2SJeff Cody 396304df765aSFam Zheng BlockDriverState *bdrv_next_node(BlockDriverState *bs) 396404df765aSFam Zheng { 396504df765aSFam Zheng if (!bs) { 396604df765aSFam Zheng return QTAILQ_FIRST(&graph_bdrv_states); 396704df765aSFam Zheng } 396804df765aSFam Zheng return QTAILQ_NEXT(bs, node_list); 396904df765aSFam Zheng } 397004df765aSFam Zheng 39712f399b0aSMarkus Armbruster BlockDriverState *bdrv_next(BlockDriverState *bs) 39722f399b0aSMarkus Armbruster { 39732f399b0aSMarkus Armbruster if (!bs) { 39742f399b0aSMarkus Armbruster return QTAILQ_FIRST(&bdrv_states); 39752f399b0aSMarkus Armbruster } 3976dc364f4cSBenoît Canet return QTAILQ_NEXT(bs, device_list); 39772f399b0aSMarkus Armbruster } 39782f399b0aSMarkus Armbruster 397920a9e77dSFam Zheng const char *bdrv_get_node_name(const BlockDriverState *bs) 398020a9e77dSFam Zheng { 398120a9e77dSFam Zheng return bs->node_name; 398220a9e77dSFam Zheng } 398320a9e77dSFam Zheng 39847f06d47eSMarkus Armbruster /* TODO check what callers really want: bs->node_name or blk_name() */ 3985bfb197e0SMarkus Armbruster const char *bdrv_get_device_name(const BlockDriverState *bs) 3986ea2384d3Sbellard { 3987bfb197e0SMarkus Armbruster return bs->blk ? blk_name(bs->blk) : ""; 3988ea2384d3Sbellard } 3989ea2384d3Sbellard 39909b2aa84fSAlberto Garcia /* This can be used to identify nodes that might not have a device 39919b2aa84fSAlberto Garcia * name associated. Since node and device names live in the same 39929b2aa84fSAlberto Garcia * namespace, the result is unambiguous. The exception is if both are 39939b2aa84fSAlberto Garcia * absent, then this returns an empty (non-null) string. */ 39949b2aa84fSAlberto Garcia const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 39959b2aa84fSAlberto Garcia { 39969b2aa84fSAlberto Garcia return bs->blk ? blk_name(bs->blk) : bs->node_name; 39979b2aa84fSAlberto Garcia } 39989b2aa84fSAlberto Garcia 3999c8433287SMarkus Armbruster int bdrv_get_flags(BlockDriverState *bs) 4000c8433287SMarkus Armbruster { 4001c8433287SMarkus Armbruster return bs->open_flags; 4002c8433287SMarkus Armbruster } 4003c8433287SMarkus Armbruster 4004f0f0fdfeSKevin Wolf int bdrv_flush_all(void) 4005c6ca28d6Saliguori { 4006c6ca28d6Saliguori BlockDriverState *bs; 4007f0f0fdfeSKevin Wolf int result = 0; 4008c6ca28d6Saliguori 4009dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 4010ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 4011ed78cda3SStefan Hajnoczi int ret; 4012ed78cda3SStefan Hajnoczi 4013ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 4014ed78cda3SStefan Hajnoczi ret = bdrv_flush(bs); 4015f0f0fdfeSKevin Wolf if (ret < 0 && !result) { 4016f0f0fdfeSKevin Wolf result = ret; 4017c6ca28d6Saliguori } 4018ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 40191b7bdbc1SStefan Hajnoczi } 4020c6ca28d6Saliguori 4021f0f0fdfeSKevin Wolf return result; 4022f0f0fdfeSKevin Wolf } 4023f0f0fdfeSKevin Wolf 40243ac21627SPeter Lieven int bdrv_has_zero_init_1(BlockDriverState *bs) 40253ac21627SPeter Lieven { 40263ac21627SPeter Lieven return 1; 40273ac21627SPeter Lieven } 40283ac21627SPeter Lieven 4029f2feebbdSKevin Wolf int bdrv_has_zero_init(BlockDriverState *bs) 4030f2feebbdSKevin Wolf { 4031f2feebbdSKevin Wolf assert(bs->drv); 4032f2feebbdSKevin Wolf 403311212d8fSPaolo Bonzini /* If BS is a copy on write image, it is initialized to 403411212d8fSPaolo Bonzini the contents of the base image, which may not be zeroes. */ 403511212d8fSPaolo Bonzini if (bs->backing_hd) { 403611212d8fSPaolo Bonzini return 0; 403711212d8fSPaolo Bonzini } 4038336c1c12SKevin Wolf if (bs->drv->bdrv_has_zero_init) { 4039336c1c12SKevin Wolf return bs->drv->bdrv_has_zero_init(bs); 4040f2feebbdSKevin Wolf } 4041f2feebbdSKevin Wolf 40423ac21627SPeter Lieven /* safe default */ 40433ac21627SPeter Lieven return 0; 4044f2feebbdSKevin Wolf } 4045f2feebbdSKevin Wolf 40464ce78691SPeter Lieven bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) 40474ce78691SPeter Lieven { 40484ce78691SPeter Lieven BlockDriverInfo bdi; 40494ce78691SPeter Lieven 40504ce78691SPeter Lieven if (bs->backing_hd) { 40514ce78691SPeter Lieven return false; 40524ce78691SPeter Lieven } 40534ce78691SPeter Lieven 40544ce78691SPeter Lieven if (bdrv_get_info(bs, &bdi) == 0) { 40554ce78691SPeter Lieven return bdi.unallocated_blocks_are_zero; 40564ce78691SPeter Lieven } 40574ce78691SPeter Lieven 40584ce78691SPeter Lieven return false; 40594ce78691SPeter Lieven } 40604ce78691SPeter Lieven 40614ce78691SPeter Lieven bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 40624ce78691SPeter Lieven { 40634ce78691SPeter Lieven BlockDriverInfo bdi; 40644ce78691SPeter Lieven 40654ce78691SPeter Lieven if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) { 40664ce78691SPeter Lieven return false; 40674ce78691SPeter Lieven } 40684ce78691SPeter Lieven 40694ce78691SPeter Lieven if (bdrv_get_info(bs, &bdi) == 0) { 40704ce78691SPeter Lieven return bdi.can_write_zeroes_with_unmap; 40714ce78691SPeter Lieven } 40724ce78691SPeter Lieven 40734ce78691SPeter Lieven return false; 40744ce78691SPeter Lieven } 40754ce78691SPeter Lieven 4076b6b8a333SPaolo Bonzini typedef struct BdrvCoGetBlockStatusData { 4077376ae3f1SStefan Hajnoczi BlockDriverState *bs; 4078b35b2bbaSMiroslav Rezanina BlockDriverState *base; 4079376ae3f1SStefan Hajnoczi int64_t sector_num; 4080376ae3f1SStefan Hajnoczi int nb_sectors; 4081376ae3f1SStefan Hajnoczi int *pnum; 4082b6b8a333SPaolo Bonzini int64_t ret; 4083376ae3f1SStefan Hajnoczi bool done; 4084b6b8a333SPaolo Bonzini } BdrvCoGetBlockStatusData; 4085376ae3f1SStefan Hajnoczi 4086f58c7b35Sths /* 4087705be728SFam Zheng * Returns the allocation status of the specified sectors. 4088705be728SFam Zheng * Drivers not implementing the functionality are assumed to not support 4089705be728SFam Zheng * backing files, hence all their sectors are reported as allocated. 4090f58c7b35Sths * 4091bd9533e3SStefan Hajnoczi * If 'sector_num' is beyond the end of the disk image the return value is 0 4092bd9533e3SStefan Hajnoczi * and 'pnum' is set to 0. 4093bd9533e3SStefan Hajnoczi * 4094f58c7b35Sths * 'pnum' is set to the number of sectors (including and immediately following 4095f58c7b35Sths * the specified sector) that are known to be in the same 4096f58c7b35Sths * allocated/unallocated state. 4097f58c7b35Sths * 4098bd9533e3SStefan Hajnoczi * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes 4099bd9533e3SStefan Hajnoczi * beyond the end of the disk image it will be clamped. 4100f58c7b35Sths */ 4101b6b8a333SPaolo Bonzini static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, 4102bdad13b9SPaolo Bonzini int64_t sector_num, 4103060f51c9SStefan Hajnoczi int nb_sectors, int *pnum) 4104f58c7b35Sths { 410530a7f2fcSMarkus Armbruster int64_t total_sectors; 4106f58c7b35Sths int64_t n; 41075daa74a6SPaolo Bonzini int64_t ret, ret2; 4108bd9533e3SStefan Hajnoczi 410930a7f2fcSMarkus Armbruster total_sectors = bdrv_nb_sectors(bs); 411030a7f2fcSMarkus Armbruster if (total_sectors < 0) { 411130a7f2fcSMarkus Armbruster return total_sectors; 4112617ccb46SPaolo Bonzini } 4113617ccb46SPaolo Bonzini 411430a7f2fcSMarkus Armbruster if (sector_num >= total_sectors) { 41156aebab14SStefan Hajnoczi *pnum = 0; 41166aebab14SStefan Hajnoczi return 0; 41176aebab14SStefan Hajnoczi } 4118bd9533e3SStefan Hajnoczi 411930a7f2fcSMarkus Armbruster n = total_sectors - sector_num; 4120bd9533e3SStefan Hajnoczi if (n < nb_sectors) { 4121bd9533e3SStefan Hajnoczi nb_sectors = n; 4122bd9533e3SStefan Hajnoczi } 4123bd9533e3SStefan Hajnoczi 4124b6b8a333SPaolo Bonzini if (!bs->drv->bdrv_co_get_block_status) { 4125bd9533e3SStefan Hajnoczi *pnum = nb_sectors; 4126e88ae226SKevin Wolf ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; 4127918e92d7SPaolo Bonzini if (bs->drv->protocol_name) { 4128918e92d7SPaolo Bonzini ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE); 4129918e92d7SPaolo Bonzini } 4130918e92d7SPaolo Bonzini return ret; 41316aebab14SStefan Hajnoczi } 41326aebab14SStefan Hajnoczi 4133415b5b01SPaolo Bonzini ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum); 4134415b5b01SPaolo Bonzini if (ret < 0) { 41353e0a233dSPeter Lieven *pnum = 0; 4136415b5b01SPaolo Bonzini return ret; 4137415b5b01SPaolo Bonzini } 4138415b5b01SPaolo Bonzini 413992bc50a5SPeter Lieven if (ret & BDRV_BLOCK_RAW) { 414092bc50a5SPeter Lieven assert(ret & BDRV_BLOCK_OFFSET_VALID); 414192bc50a5SPeter Lieven return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, 414292bc50a5SPeter Lieven *pnum, pnum); 414392bc50a5SPeter Lieven } 414492bc50a5SPeter Lieven 4145e88ae226SKevin Wolf if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { 4146e88ae226SKevin Wolf ret |= BDRV_BLOCK_ALLOCATED; 4147e88ae226SKevin Wolf } 4148e88ae226SKevin Wolf 4149c3d86884SPeter Lieven if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) { 4150c3d86884SPeter Lieven if (bdrv_unallocated_blocks_are_zero(bs)) { 4151415b5b01SPaolo Bonzini ret |= BDRV_BLOCK_ZERO; 41521f9db224SPeter Lieven } else if (bs->backing_hd) { 4153f0ad5712SPaolo Bonzini BlockDriverState *bs2 = bs->backing_hd; 415430a7f2fcSMarkus Armbruster int64_t nb_sectors2 = bdrv_nb_sectors(bs2); 415530a7f2fcSMarkus Armbruster if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) { 4156f0ad5712SPaolo Bonzini ret |= BDRV_BLOCK_ZERO; 4157f0ad5712SPaolo Bonzini } 4158f0ad5712SPaolo Bonzini } 4159415b5b01SPaolo Bonzini } 41605daa74a6SPaolo Bonzini 41615daa74a6SPaolo Bonzini if (bs->file && 41625daa74a6SPaolo Bonzini (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && 41635daa74a6SPaolo Bonzini (ret & BDRV_BLOCK_OFFSET_VALID)) { 416459c9a95fSMax Reitz int file_pnum; 416559c9a95fSMax Reitz 41665daa74a6SPaolo Bonzini ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, 416759c9a95fSMax Reitz *pnum, &file_pnum); 41685daa74a6SPaolo Bonzini if (ret2 >= 0) { 41695daa74a6SPaolo Bonzini /* Ignore errors. This is just providing extra information, it 41705daa74a6SPaolo Bonzini * is useful but not necessary. 41715daa74a6SPaolo Bonzini */ 417259c9a95fSMax Reitz if (!file_pnum) { 417359c9a95fSMax Reitz /* !file_pnum indicates an offset at or beyond the EOF; it is 417459c9a95fSMax Reitz * perfectly valid for the format block driver to point to such 417559c9a95fSMax Reitz * offsets, so catch it and mark everything as zero */ 417659c9a95fSMax Reitz ret |= BDRV_BLOCK_ZERO; 417759c9a95fSMax Reitz } else { 417859c9a95fSMax Reitz /* Limit request to the range reported by the protocol driver */ 417959c9a95fSMax Reitz *pnum = file_pnum; 41805daa74a6SPaolo Bonzini ret |= (ret2 & BDRV_BLOCK_ZERO); 41815daa74a6SPaolo Bonzini } 41825daa74a6SPaolo Bonzini } 418359c9a95fSMax Reitz } 41845daa74a6SPaolo Bonzini 4185415b5b01SPaolo Bonzini return ret; 4186060f51c9SStefan Hajnoczi } 4187060f51c9SStefan Hajnoczi 4188b6b8a333SPaolo Bonzini /* Coroutine wrapper for bdrv_get_block_status() */ 4189b6b8a333SPaolo Bonzini static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque) 4190060f51c9SStefan Hajnoczi { 4191b6b8a333SPaolo Bonzini BdrvCoGetBlockStatusData *data = opaque; 4192060f51c9SStefan Hajnoczi BlockDriverState *bs = data->bs; 4193060f51c9SStefan Hajnoczi 4194b6b8a333SPaolo Bonzini data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors, 4195060f51c9SStefan Hajnoczi data->pnum); 4196060f51c9SStefan Hajnoczi data->done = true; 4197060f51c9SStefan Hajnoczi } 4198060f51c9SStefan Hajnoczi 4199060f51c9SStefan Hajnoczi /* 4200b6b8a333SPaolo Bonzini * Synchronous wrapper around bdrv_co_get_block_status(). 4201060f51c9SStefan Hajnoczi * 4202b6b8a333SPaolo Bonzini * See bdrv_co_get_block_status() for details. 4203060f51c9SStefan Hajnoczi */ 4204b6b8a333SPaolo Bonzini int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, 4205b6b8a333SPaolo Bonzini int nb_sectors, int *pnum) 4206060f51c9SStefan Hajnoczi { 4207376ae3f1SStefan Hajnoczi Coroutine *co; 4208b6b8a333SPaolo Bonzini BdrvCoGetBlockStatusData data = { 4209376ae3f1SStefan Hajnoczi .bs = bs, 4210376ae3f1SStefan Hajnoczi .sector_num = sector_num, 4211376ae3f1SStefan Hajnoczi .nb_sectors = nb_sectors, 4212376ae3f1SStefan Hajnoczi .pnum = pnum, 4213376ae3f1SStefan Hajnoczi .done = false, 4214376ae3f1SStefan Hajnoczi }; 4215376ae3f1SStefan Hajnoczi 4216bdad13b9SPaolo Bonzini if (qemu_in_coroutine()) { 4217bdad13b9SPaolo Bonzini /* Fast-path if already in coroutine context */ 4218b6b8a333SPaolo Bonzini bdrv_get_block_status_co_entry(&data); 4219bdad13b9SPaolo Bonzini } else { 42202572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 42212572b37aSStefan Hajnoczi 4222b6b8a333SPaolo Bonzini co = qemu_coroutine_create(bdrv_get_block_status_co_entry); 4223376ae3f1SStefan Hajnoczi qemu_coroutine_enter(co, &data); 4224376ae3f1SStefan Hajnoczi while (!data.done) { 42252572b37aSStefan Hajnoczi aio_poll(aio_context, true); 4226376ae3f1SStefan Hajnoczi } 4227bdad13b9SPaolo Bonzini } 4228376ae3f1SStefan Hajnoczi return data.ret; 4229376ae3f1SStefan Hajnoczi } 4230f58c7b35Sths 4231b6b8a333SPaolo Bonzini int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, 4232b6b8a333SPaolo Bonzini int nb_sectors, int *pnum) 4233b6b8a333SPaolo Bonzini { 42344333bb71SPaolo Bonzini int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum); 42354333bb71SPaolo Bonzini if (ret < 0) { 42364333bb71SPaolo Bonzini return ret; 42374333bb71SPaolo Bonzini } 423801fb2705SKevin Wolf return !!(ret & BDRV_BLOCK_ALLOCATED); 4239b6b8a333SPaolo Bonzini } 4240b6b8a333SPaolo Bonzini 4241188a7bbfSPaolo Bonzini /* 4242188a7bbfSPaolo Bonzini * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] 4243188a7bbfSPaolo Bonzini * 4244188a7bbfSPaolo Bonzini * Return true if the given sector is allocated in any image between 4245188a7bbfSPaolo Bonzini * BASE and TOP (inclusive). BASE can be NULL to check if the given 4246188a7bbfSPaolo Bonzini * sector is allocated in any image of the chain. Return false otherwise. 4247188a7bbfSPaolo Bonzini * 4248188a7bbfSPaolo Bonzini * 'pnum' is set to the number of sectors (including and immediately following 4249188a7bbfSPaolo Bonzini * the specified sector) that are known to be in the same 4250188a7bbfSPaolo Bonzini * allocated/unallocated state. 4251188a7bbfSPaolo Bonzini * 4252188a7bbfSPaolo Bonzini */ 42534f578637SPaolo Bonzini int bdrv_is_allocated_above(BlockDriverState *top, 4254188a7bbfSPaolo Bonzini BlockDriverState *base, 4255188a7bbfSPaolo Bonzini int64_t sector_num, 4256188a7bbfSPaolo Bonzini int nb_sectors, int *pnum) 4257188a7bbfSPaolo Bonzini { 4258188a7bbfSPaolo Bonzini BlockDriverState *intermediate; 4259188a7bbfSPaolo Bonzini int ret, n = nb_sectors; 4260188a7bbfSPaolo Bonzini 4261188a7bbfSPaolo Bonzini intermediate = top; 4262188a7bbfSPaolo Bonzini while (intermediate && intermediate != base) { 4263188a7bbfSPaolo Bonzini int pnum_inter; 4264bdad13b9SPaolo Bonzini ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors, 4265188a7bbfSPaolo Bonzini &pnum_inter); 4266188a7bbfSPaolo Bonzini if (ret < 0) { 4267188a7bbfSPaolo Bonzini return ret; 4268188a7bbfSPaolo Bonzini } else if (ret) { 4269188a7bbfSPaolo Bonzini *pnum = pnum_inter; 4270188a7bbfSPaolo Bonzini return 1; 4271188a7bbfSPaolo Bonzini } 4272188a7bbfSPaolo Bonzini 4273188a7bbfSPaolo Bonzini /* 4274188a7bbfSPaolo Bonzini * [sector_num, nb_sectors] is unallocated on top but intermediate 4275188a7bbfSPaolo Bonzini * might have 4276188a7bbfSPaolo Bonzini * 4277188a7bbfSPaolo Bonzini * [sector_num+x, nr_sectors] allocated. 4278188a7bbfSPaolo Bonzini */ 427963ba17d3SVishvananda Ishaya if (n > pnum_inter && 428063ba17d3SVishvananda Ishaya (intermediate == top || 428163ba17d3SVishvananda Ishaya sector_num + pnum_inter < intermediate->total_sectors)) { 4282188a7bbfSPaolo Bonzini n = pnum_inter; 4283188a7bbfSPaolo Bonzini } 4284188a7bbfSPaolo Bonzini 4285188a7bbfSPaolo Bonzini intermediate = intermediate->backing_hd; 4286188a7bbfSPaolo Bonzini } 4287188a7bbfSPaolo Bonzini 4288188a7bbfSPaolo Bonzini *pnum = n; 4289188a7bbfSPaolo Bonzini return 0; 4290188a7bbfSPaolo Bonzini } 4291188a7bbfSPaolo Bonzini 4292045df330Saliguori const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 4293045df330Saliguori { 4294045df330Saliguori if (bs->backing_hd && bs->backing_hd->encrypted) 4295045df330Saliguori return bs->backing_file; 4296045df330Saliguori else if (bs->encrypted) 4297045df330Saliguori return bs->filename; 4298045df330Saliguori else 4299045df330Saliguori return NULL; 4300045df330Saliguori } 4301045df330Saliguori 430283f64091Sbellard void bdrv_get_backing_filename(BlockDriverState *bs, 430383f64091Sbellard char *filename, int filename_size) 430483f64091Sbellard { 430583f64091Sbellard pstrcpy(filename, filename_size, bs->backing_file); 430683f64091Sbellard } 430783f64091Sbellard 4308faea38e7Sbellard int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, 4309faea38e7Sbellard const uint8_t *buf, int nb_sectors) 4310faea38e7Sbellard { 4311faea38e7Sbellard BlockDriver *drv = bs->drv; 4312b9c64947SMax Reitz int ret; 4313b9c64947SMax Reitz 4314b9c64947SMax Reitz if (!drv) { 431519cb3738Sbellard return -ENOMEDIUM; 4316b9c64947SMax Reitz } 4317b9c64947SMax Reitz if (!drv->bdrv_write_compressed) { 4318faea38e7Sbellard return -ENOTSUP; 4319b9c64947SMax Reitz } 4320b9c64947SMax Reitz ret = bdrv_check_request(bs, sector_num, nb_sectors); 4321b9c64947SMax Reitz if (ret < 0) { 4322b9c64947SMax Reitz return ret; 4323b9c64947SMax Reitz } 43247cd1e32aSlirans@il.ibm.com 4325e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 43267cd1e32aSlirans@il.ibm.com 4327faea38e7Sbellard return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); 4328faea38e7Sbellard } 4329faea38e7Sbellard 4330faea38e7Sbellard int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 4331faea38e7Sbellard { 4332faea38e7Sbellard BlockDriver *drv = bs->drv; 4333faea38e7Sbellard if (!drv) 433419cb3738Sbellard return -ENOMEDIUM; 4335faea38e7Sbellard if (!drv->bdrv_get_info) 4336faea38e7Sbellard return -ENOTSUP; 4337faea38e7Sbellard memset(bdi, 0, sizeof(*bdi)); 4338faea38e7Sbellard return drv->bdrv_get_info(bs, bdi); 4339faea38e7Sbellard } 4340faea38e7Sbellard 4341eae041feSMax Reitz ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) 4342eae041feSMax Reitz { 4343eae041feSMax Reitz BlockDriver *drv = bs->drv; 4344eae041feSMax Reitz if (drv && drv->bdrv_get_specific_info) { 4345eae041feSMax Reitz return drv->bdrv_get_specific_info(bs); 4346eae041feSMax Reitz } 4347eae041feSMax Reitz return NULL; 4348eae041feSMax Reitz } 4349eae041feSMax Reitz 435045566e9cSChristoph Hellwig int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, 435145566e9cSChristoph Hellwig int64_t pos, int size) 4352178e08a5Saliguori { 4353cf8074b3SKevin Wolf QEMUIOVector qiov; 4354cf8074b3SKevin Wolf struct iovec iov = { 4355cf8074b3SKevin Wolf .iov_base = (void *) buf, 4356cf8074b3SKevin Wolf .iov_len = size, 4357cf8074b3SKevin Wolf }; 4358cf8074b3SKevin Wolf 4359cf8074b3SKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 4360cf8074b3SKevin Wolf return bdrv_writev_vmstate(bs, &qiov, pos); 4361cf8074b3SKevin Wolf } 4362cf8074b3SKevin Wolf 4363cf8074b3SKevin Wolf int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) 4364cf8074b3SKevin Wolf { 4365178e08a5Saliguori BlockDriver *drv = bs->drv; 4366cf8074b3SKevin Wolf 4367cf8074b3SKevin Wolf if (!drv) { 4368178e08a5Saliguori return -ENOMEDIUM; 4369cf8074b3SKevin Wolf } else if (drv->bdrv_save_vmstate) { 4370cf8074b3SKevin Wolf return drv->bdrv_save_vmstate(bs, qiov, pos); 4371cf8074b3SKevin Wolf } else if (bs->file) { 4372cf8074b3SKevin Wolf return bdrv_writev_vmstate(bs->file, qiov, pos); 4373cf8074b3SKevin Wolf } 4374cf8074b3SKevin Wolf 43757cdb1f6dSMORITA Kazutaka return -ENOTSUP; 4376178e08a5Saliguori } 4377178e08a5Saliguori 437845566e9cSChristoph Hellwig int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, 437945566e9cSChristoph Hellwig int64_t pos, int size) 4380178e08a5Saliguori { 4381178e08a5Saliguori BlockDriver *drv = bs->drv; 4382178e08a5Saliguori if (!drv) 4383178e08a5Saliguori return -ENOMEDIUM; 43847cdb1f6dSMORITA Kazutaka if (drv->bdrv_load_vmstate) 438545566e9cSChristoph Hellwig return drv->bdrv_load_vmstate(bs, buf, pos, size); 43867cdb1f6dSMORITA Kazutaka if (bs->file) 43877cdb1f6dSMORITA Kazutaka return bdrv_load_vmstate(bs->file, buf, pos, size); 43887cdb1f6dSMORITA Kazutaka return -ENOTSUP; 4389178e08a5Saliguori } 4390178e08a5Saliguori 43918b9b0cc2SKevin Wolf void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 43928b9b0cc2SKevin Wolf { 4393bf736fe3SKevin Wolf if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 43948b9b0cc2SKevin Wolf return; 43958b9b0cc2SKevin Wolf } 43968b9b0cc2SKevin Wolf 4397bf736fe3SKevin Wolf bs->drv->bdrv_debug_event(bs, event); 439841c695c7SKevin Wolf } 43998b9b0cc2SKevin Wolf 440041c695c7SKevin Wolf int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 440141c695c7SKevin Wolf const char *tag) 440241c695c7SKevin Wolf { 440341c695c7SKevin Wolf while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 440441c695c7SKevin Wolf bs = bs->file; 440541c695c7SKevin Wolf } 440641c695c7SKevin Wolf 440741c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 440841c695c7SKevin Wolf return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 440941c695c7SKevin Wolf } 441041c695c7SKevin Wolf 441141c695c7SKevin Wolf return -ENOTSUP; 441241c695c7SKevin Wolf } 441341c695c7SKevin Wolf 44144cc70e93SFam Zheng int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 44154cc70e93SFam Zheng { 44164cc70e93SFam Zheng while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { 44174cc70e93SFam Zheng bs = bs->file; 44184cc70e93SFam Zheng } 44194cc70e93SFam Zheng 44204cc70e93SFam Zheng if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { 44214cc70e93SFam Zheng return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 44224cc70e93SFam Zheng } 44234cc70e93SFam Zheng 44244cc70e93SFam Zheng return -ENOTSUP; 44254cc70e93SFam Zheng } 44264cc70e93SFam Zheng 442741c695c7SKevin Wolf int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 442841c695c7SKevin Wolf { 4429938789eaSMax Reitz while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 443041c695c7SKevin Wolf bs = bs->file; 443141c695c7SKevin Wolf } 443241c695c7SKevin Wolf 443341c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 443441c695c7SKevin Wolf return bs->drv->bdrv_debug_resume(bs, tag); 443541c695c7SKevin Wolf } 443641c695c7SKevin Wolf 443741c695c7SKevin Wolf return -ENOTSUP; 443841c695c7SKevin Wolf } 443941c695c7SKevin Wolf 444041c695c7SKevin Wolf bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 444141c695c7SKevin Wolf { 444241c695c7SKevin Wolf while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 444341c695c7SKevin Wolf bs = bs->file; 444441c695c7SKevin Wolf } 444541c695c7SKevin Wolf 444641c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 444741c695c7SKevin Wolf return bs->drv->bdrv_debug_is_suspended(bs, tag); 444841c695c7SKevin Wolf } 444941c695c7SKevin Wolf 445041c695c7SKevin Wolf return false; 44518b9b0cc2SKevin Wolf } 44528b9b0cc2SKevin Wolf 4453199630b6SBlue Swirl int bdrv_is_snapshot(BlockDriverState *bs) 4454199630b6SBlue Swirl { 4455199630b6SBlue Swirl return !!(bs->open_flags & BDRV_O_SNAPSHOT); 4456199630b6SBlue Swirl } 4457199630b6SBlue Swirl 4458b1b1d783SJeff Cody /* backing_file can either be relative, or absolute, or a protocol. If it is 4459b1b1d783SJeff Cody * relative, it must be relative to the chain. So, passing in bs->filename 4460b1b1d783SJeff Cody * from a BDS as backing_file should not be done, as that may be relative to 4461b1b1d783SJeff Cody * the CWD rather than the chain. */ 4462e8a6bb9cSMarcelo Tosatti BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 4463e8a6bb9cSMarcelo Tosatti const char *backing_file) 4464e8a6bb9cSMarcelo Tosatti { 4465b1b1d783SJeff Cody char *filename_full = NULL; 4466b1b1d783SJeff Cody char *backing_file_full = NULL; 4467b1b1d783SJeff Cody char *filename_tmp = NULL; 4468b1b1d783SJeff Cody int is_protocol = 0; 4469b1b1d783SJeff Cody BlockDriverState *curr_bs = NULL; 4470b1b1d783SJeff Cody BlockDriverState *retval = NULL; 4471b1b1d783SJeff Cody 4472b1b1d783SJeff Cody if (!bs || !bs->drv || !backing_file) { 4473e8a6bb9cSMarcelo Tosatti return NULL; 4474e8a6bb9cSMarcelo Tosatti } 4475e8a6bb9cSMarcelo Tosatti 4476b1b1d783SJeff Cody filename_full = g_malloc(PATH_MAX); 4477b1b1d783SJeff Cody backing_file_full = g_malloc(PATH_MAX); 4478b1b1d783SJeff Cody filename_tmp = g_malloc(PATH_MAX); 4479b1b1d783SJeff Cody 4480b1b1d783SJeff Cody is_protocol = path_has_protocol(backing_file); 4481b1b1d783SJeff Cody 4482b1b1d783SJeff Cody for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) { 4483b1b1d783SJeff Cody 4484b1b1d783SJeff Cody /* If either of the filename paths is actually a protocol, then 4485b1b1d783SJeff Cody * compare unmodified paths; otherwise make paths relative */ 4486b1b1d783SJeff Cody if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 4487b1b1d783SJeff Cody if (strcmp(backing_file, curr_bs->backing_file) == 0) { 4488b1b1d783SJeff Cody retval = curr_bs->backing_hd; 4489b1b1d783SJeff Cody break; 4490b1b1d783SJeff Cody } 4491e8a6bb9cSMarcelo Tosatti } else { 4492b1b1d783SJeff Cody /* If not an absolute filename path, make it relative to the current 4493b1b1d783SJeff Cody * image's filename path */ 4494b1b1d783SJeff Cody path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 4495b1b1d783SJeff Cody backing_file); 4496b1b1d783SJeff Cody 4497b1b1d783SJeff Cody /* We are going to compare absolute pathnames */ 4498b1b1d783SJeff Cody if (!realpath(filename_tmp, filename_full)) { 4499b1b1d783SJeff Cody continue; 4500b1b1d783SJeff Cody } 4501b1b1d783SJeff Cody 4502b1b1d783SJeff Cody /* We need to make sure the backing filename we are comparing against 4503b1b1d783SJeff Cody * is relative to the current image filename (or absolute) */ 4504b1b1d783SJeff Cody path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 4505b1b1d783SJeff Cody curr_bs->backing_file); 4506b1b1d783SJeff Cody 4507b1b1d783SJeff Cody if (!realpath(filename_tmp, backing_file_full)) { 4508b1b1d783SJeff Cody continue; 4509b1b1d783SJeff Cody } 4510b1b1d783SJeff Cody 4511b1b1d783SJeff Cody if (strcmp(backing_file_full, filename_full) == 0) { 4512b1b1d783SJeff Cody retval = curr_bs->backing_hd; 4513b1b1d783SJeff Cody break; 4514b1b1d783SJeff Cody } 4515e8a6bb9cSMarcelo Tosatti } 4516e8a6bb9cSMarcelo Tosatti } 4517e8a6bb9cSMarcelo Tosatti 4518b1b1d783SJeff Cody g_free(filename_full); 4519b1b1d783SJeff Cody g_free(backing_file_full); 4520b1b1d783SJeff Cody g_free(filename_tmp); 4521b1b1d783SJeff Cody return retval; 4522e8a6bb9cSMarcelo Tosatti } 4523e8a6bb9cSMarcelo Tosatti 4524f198fd1cSBenoît Canet int bdrv_get_backing_file_depth(BlockDriverState *bs) 4525f198fd1cSBenoît Canet { 4526f198fd1cSBenoît Canet if (!bs->drv) { 4527f198fd1cSBenoît Canet return 0; 4528f198fd1cSBenoît Canet } 4529f198fd1cSBenoît Canet 4530f198fd1cSBenoît Canet if (!bs->backing_hd) { 4531f198fd1cSBenoît Canet return 0; 4532f198fd1cSBenoît Canet } 4533f198fd1cSBenoît Canet 4534f198fd1cSBenoît Canet return 1 + bdrv_get_backing_file_depth(bs->backing_hd); 4535f198fd1cSBenoît Canet } 4536f198fd1cSBenoît Canet 4537ea2384d3Sbellard /**************************************************************/ 453883f64091Sbellard /* async I/Os */ 4539ea2384d3Sbellard 45407c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, 4541f141eafeSaliguori QEMUIOVector *qiov, int nb_sectors, 4542097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4543ea2384d3Sbellard { 4544bbf0a440SStefan Hajnoczi trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); 4545bbf0a440SStefan Hajnoczi 4546d20d9b7cSPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0, 45478c5873d6SStefan Hajnoczi cb, opaque, false); 454883f64091Sbellard } 454983f64091Sbellard 45507c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, 4551f141eafeSaliguori QEMUIOVector *qiov, int nb_sectors, 4552097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 45537674e7bfSbellard { 4554bbf0a440SStefan Hajnoczi trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); 4555bbf0a440SStefan Hajnoczi 4556d20d9b7cSPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0, 45578c5873d6SStefan Hajnoczi cb, opaque, true); 455883f64091Sbellard } 455983f64091Sbellard 45607c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, 4561d5ef94d4SPaolo Bonzini int64_t sector_num, int nb_sectors, BdrvRequestFlags flags, 4562097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4563d5ef94d4SPaolo Bonzini { 4564d5ef94d4SPaolo Bonzini trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque); 4565d5ef94d4SPaolo Bonzini 4566d5ef94d4SPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors, 4567d5ef94d4SPaolo Bonzini BDRV_REQ_ZERO_WRITE | flags, 4568d5ef94d4SPaolo Bonzini cb, opaque, true); 4569d5ef94d4SPaolo Bonzini } 4570d5ef94d4SPaolo Bonzini 457140b4f539SKevin Wolf 457240b4f539SKevin Wolf typedef struct MultiwriteCB { 457340b4f539SKevin Wolf int error; 457440b4f539SKevin Wolf int num_requests; 457540b4f539SKevin Wolf int num_callbacks; 457640b4f539SKevin Wolf struct { 4577097310b5SMarkus Armbruster BlockCompletionFunc *cb; 457840b4f539SKevin Wolf void *opaque; 457940b4f539SKevin Wolf QEMUIOVector *free_qiov; 458040b4f539SKevin Wolf } callbacks[]; 458140b4f539SKevin Wolf } MultiwriteCB; 458240b4f539SKevin Wolf 458340b4f539SKevin Wolf static void multiwrite_user_cb(MultiwriteCB *mcb) 458440b4f539SKevin Wolf { 458540b4f539SKevin Wolf int i; 458640b4f539SKevin Wolf 458740b4f539SKevin Wolf for (i = 0; i < mcb->num_callbacks; i++) { 458840b4f539SKevin Wolf mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); 45891e1ea48dSStefan Hajnoczi if (mcb->callbacks[i].free_qiov) { 45901e1ea48dSStefan Hajnoczi qemu_iovec_destroy(mcb->callbacks[i].free_qiov); 45911e1ea48dSStefan Hajnoczi } 45927267c094SAnthony Liguori g_free(mcb->callbacks[i].free_qiov); 459340b4f539SKevin Wolf } 459440b4f539SKevin Wolf } 459540b4f539SKevin Wolf 459640b4f539SKevin Wolf static void multiwrite_cb(void *opaque, int ret) 459740b4f539SKevin Wolf { 459840b4f539SKevin Wolf MultiwriteCB *mcb = opaque; 459940b4f539SKevin Wolf 46006d519a5fSStefan Hajnoczi trace_multiwrite_cb(mcb, ret); 46016d519a5fSStefan Hajnoczi 4602cb6d3ca0SKevin Wolf if (ret < 0 && !mcb->error) { 460340b4f539SKevin Wolf mcb->error = ret; 460440b4f539SKevin Wolf } 460540b4f539SKevin Wolf 460640b4f539SKevin Wolf mcb->num_requests--; 460740b4f539SKevin Wolf if (mcb->num_requests == 0) { 460840b4f539SKevin Wolf multiwrite_user_cb(mcb); 46097267c094SAnthony Liguori g_free(mcb); 461040b4f539SKevin Wolf } 461140b4f539SKevin Wolf } 461240b4f539SKevin Wolf 461340b4f539SKevin Wolf static int multiwrite_req_compare(const void *a, const void *b) 461440b4f539SKevin Wolf { 461577be4366SChristoph Hellwig const BlockRequest *req1 = a, *req2 = b; 461677be4366SChristoph Hellwig 461777be4366SChristoph Hellwig /* 461877be4366SChristoph Hellwig * Note that we can't simply subtract req2->sector from req1->sector 461977be4366SChristoph Hellwig * here as that could overflow the return value. 462077be4366SChristoph Hellwig */ 462177be4366SChristoph Hellwig if (req1->sector > req2->sector) { 462277be4366SChristoph Hellwig return 1; 462377be4366SChristoph Hellwig } else if (req1->sector < req2->sector) { 462477be4366SChristoph Hellwig return -1; 462577be4366SChristoph Hellwig } else { 462677be4366SChristoph Hellwig return 0; 462777be4366SChristoph Hellwig } 462840b4f539SKevin Wolf } 462940b4f539SKevin Wolf 463040b4f539SKevin Wolf /* 463140b4f539SKevin Wolf * Takes a bunch of requests and tries to merge them. Returns the number of 463240b4f539SKevin Wolf * requests that remain after merging. 463340b4f539SKevin Wolf */ 463440b4f539SKevin Wolf static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, 463540b4f539SKevin Wolf int num_reqs, MultiwriteCB *mcb) 463640b4f539SKevin Wolf { 463740b4f539SKevin Wolf int i, outidx; 463840b4f539SKevin Wolf 463940b4f539SKevin Wolf // Sort requests by start sector 464040b4f539SKevin Wolf qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); 464140b4f539SKevin Wolf 464240b4f539SKevin Wolf // Check if adjacent requests touch the same clusters. If so, combine them, 464340b4f539SKevin Wolf // filling up gaps with zero sectors. 464440b4f539SKevin Wolf outidx = 0; 464540b4f539SKevin Wolf for (i = 1; i < num_reqs; i++) { 464640b4f539SKevin Wolf int merge = 0; 464740b4f539SKevin Wolf int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; 464840b4f539SKevin Wolf 4649b6a127a1SPaolo Bonzini // Handle exactly sequential writes and overlapping writes. 465040b4f539SKevin Wolf if (reqs[i].sector <= oldreq_last) { 465140b4f539SKevin Wolf merge = 1; 465240b4f539SKevin Wolf } 465340b4f539SKevin Wolf 4654e2a305fbSChristoph Hellwig if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { 4655e2a305fbSChristoph Hellwig merge = 0; 4656e2a305fbSChristoph Hellwig } 4657e2a305fbSChristoph Hellwig 46586c5a42acSPeter Lieven if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors + 46596c5a42acSPeter Lieven reqs[i].nb_sectors > bs->bl.max_transfer_length) { 46606c5a42acSPeter Lieven merge = 0; 46616c5a42acSPeter Lieven } 46626c5a42acSPeter Lieven 466340b4f539SKevin Wolf if (merge) { 466440b4f539SKevin Wolf size_t size; 46657267c094SAnthony Liguori QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); 466640b4f539SKevin Wolf qemu_iovec_init(qiov, 466740b4f539SKevin Wolf reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); 466840b4f539SKevin Wolf 466940b4f539SKevin Wolf // Add the first request to the merged one. If the requests are 467040b4f539SKevin Wolf // overlapping, drop the last sectors of the first request. 467140b4f539SKevin Wolf size = (reqs[i].sector - reqs[outidx].sector) << 9; 46721b093c48SMichael Tokarev qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size); 467340b4f539SKevin Wolf 4674b6a127a1SPaolo Bonzini // We should need to add any zeros between the two requests 4675b6a127a1SPaolo Bonzini assert (reqs[i].sector <= oldreq_last); 467640b4f539SKevin Wolf 467740b4f539SKevin Wolf // Add the second request 46781b093c48SMichael Tokarev qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size); 467940b4f539SKevin Wolf 4680391827ebSStefan Hajnoczi // Add tail of first request, if necessary 4681391827ebSStefan Hajnoczi if (qiov->size < reqs[outidx].qiov->size) { 4682391827ebSStefan Hajnoczi qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size, 4683391827ebSStefan Hajnoczi reqs[outidx].qiov->size - qiov->size); 4684391827ebSStefan Hajnoczi } 4685391827ebSStefan Hajnoczi 4686cbf1dff2SKevin Wolf reqs[outidx].nb_sectors = qiov->size >> 9; 468740b4f539SKevin Wolf reqs[outidx].qiov = qiov; 468840b4f539SKevin Wolf 468940b4f539SKevin Wolf mcb->callbacks[i].free_qiov = reqs[outidx].qiov; 469040b4f539SKevin Wolf } else { 469140b4f539SKevin Wolf outidx++; 469240b4f539SKevin Wolf reqs[outidx].sector = reqs[i].sector; 469340b4f539SKevin Wolf reqs[outidx].nb_sectors = reqs[i].nb_sectors; 469440b4f539SKevin Wolf reqs[outidx].qiov = reqs[i].qiov; 469540b4f539SKevin Wolf } 469640b4f539SKevin Wolf } 469740b4f539SKevin Wolf 4698f4564d53SPeter Lieven block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1); 4699f4564d53SPeter Lieven 470040b4f539SKevin Wolf return outidx + 1; 470140b4f539SKevin Wolf } 470240b4f539SKevin Wolf 470340b4f539SKevin Wolf /* 470440b4f539SKevin Wolf * Submit multiple AIO write requests at once. 470540b4f539SKevin Wolf * 470640b4f539SKevin Wolf * On success, the function returns 0 and all requests in the reqs array have 470740b4f539SKevin Wolf * been submitted. In error case this function returns -1, and any of the 470840b4f539SKevin Wolf * requests may or may not be submitted yet. In particular, this means that the 470940b4f539SKevin Wolf * callback will be called for some of the requests, for others it won't. The 471040b4f539SKevin Wolf * caller must check the error field of the BlockRequest to wait for the right 471140b4f539SKevin Wolf * callbacks (if error != 0, no callback will be called). 471240b4f539SKevin Wolf * 471340b4f539SKevin Wolf * The implementation may modify the contents of the reqs array, e.g. to merge 471440b4f539SKevin Wolf * requests. However, the fields opaque and error are left unmodified as they 471540b4f539SKevin Wolf * are used to signal failure for a single request to the caller. 471640b4f539SKevin Wolf */ 471740b4f539SKevin Wolf int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) 471840b4f539SKevin Wolf { 471940b4f539SKevin Wolf MultiwriteCB *mcb; 472040b4f539SKevin Wolf int i; 472140b4f539SKevin Wolf 4722301db7c2SRyan Harper /* don't submit writes if we don't have a medium */ 4723301db7c2SRyan Harper if (bs->drv == NULL) { 4724301db7c2SRyan Harper for (i = 0; i < num_reqs; i++) { 4725301db7c2SRyan Harper reqs[i].error = -ENOMEDIUM; 4726301db7c2SRyan Harper } 4727301db7c2SRyan Harper return -1; 4728301db7c2SRyan Harper } 4729301db7c2SRyan Harper 473040b4f539SKevin Wolf if (num_reqs == 0) { 473140b4f539SKevin Wolf return 0; 473240b4f539SKevin Wolf } 473340b4f539SKevin Wolf 473440b4f539SKevin Wolf // Create MultiwriteCB structure 47357267c094SAnthony Liguori mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); 473640b4f539SKevin Wolf mcb->num_requests = 0; 473740b4f539SKevin Wolf mcb->num_callbacks = num_reqs; 473840b4f539SKevin Wolf 473940b4f539SKevin Wolf for (i = 0; i < num_reqs; i++) { 474040b4f539SKevin Wolf mcb->callbacks[i].cb = reqs[i].cb; 474140b4f539SKevin Wolf mcb->callbacks[i].opaque = reqs[i].opaque; 474240b4f539SKevin Wolf } 474340b4f539SKevin Wolf 474440b4f539SKevin Wolf // Check for mergable requests 474540b4f539SKevin Wolf num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); 474640b4f539SKevin Wolf 47476d519a5fSStefan Hajnoczi trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs); 47486d519a5fSStefan Hajnoczi 4749df9309fbSPaolo Bonzini /* Run the aio requests. */ 4750df9309fbSPaolo Bonzini mcb->num_requests = num_reqs; 475140b4f539SKevin Wolf for (i = 0; i < num_reqs; i++) { 4752d20d9b7cSPaolo Bonzini bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov, 4753d20d9b7cSPaolo Bonzini reqs[i].nb_sectors, reqs[i].flags, 4754d20d9b7cSPaolo Bonzini multiwrite_cb, mcb, 4755d20d9b7cSPaolo Bonzini true); 475640b4f539SKevin Wolf } 475740b4f539SKevin Wolf 475840b4f539SKevin Wolf return 0; 475940b4f539SKevin Wolf } 476040b4f539SKevin Wolf 47617c84b1b8SMarkus Armbruster void bdrv_aio_cancel(BlockAIOCB *acb) 476283f64091Sbellard { 476302c50efeSFam Zheng qemu_aio_ref(acb); 476402c50efeSFam Zheng bdrv_aio_cancel_async(acb); 476502c50efeSFam Zheng while (acb->refcnt > 1) { 476602c50efeSFam Zheng if (acb->aiocb_info->get_aio_context) { 476702c50efeSFam Zheng aio_poll(acb->aiocb_info->get_aio_context(acb), true); 476802c50efeSFam Zheng } else if (acb->bs) { 476902c50efeSFam Zheng aio_poll(bdrv_get_aio_context(acb->bs), true); 477002c50efeSFam Zheng } else { 477102c50efeSFam Zheng abort(); 477202c50efeSFam Zheng } 477302c50efeSFam Zheng } 47748007429aSFam Zheng qemu_aio_unref(acb); 477502c50efeSFam Zheng } 477602c50efeSFam Zheng 477702c50efeSFam Zheng /* Async version of aio cancel. The caller is not blocked if the acb implements 477802c50efeSFam Zheng * cancel_async, otherwise we do nothing and let the request normally complete. 477902c50efeSFam Zheng * In either case the completion callback must be called. */ 47807c84b1b8SMarkus Armbruster void bdrv_aio_cancel_async(BlockAIOCB *acb) 478102c50efeSFam Zheng { 478202c50efeSFam Zheng if (acb->aiocb_info->cancel_async) { 478302c50efeSFam Zheng acb->aiocb_info->cancel_async(acb); 478402c50efeSFam Zheng } 478583f64091Sbellard } 478683f64091Sbellard 478783f64091Sbellard /**************************************************************/ 478883f64091Sbellard /* async block device emulation */ 478983f64091Sbellard 47907c84b1b8SMarkus Armbruster typedef struct BlockAIOCBSync { 47917c84b1b8SMarkus Armbruster BlockAIOCB common; 4792c16b5a2cSChristoph Hellwig QEMUBH *bh; 4793c16b5a2cSChristoph Hellwig int ret; 4794c16b5a2cSChristoph Hellwig /* vector translation state */ 4795c16b5a2cSChristoph Hellwig QEMUIOVector *qiov; 4796c16b5a2cSChristoph Hellwig uint8_t *bounce; 4797c16b5a2cSChristoph Hellwig int is_write; 47987c84b1b8SMarkus Armbruster } BlockAIOCBSync; 4799c16b5a2cSChristoph Hellwig 4800d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_aiocb_info = { 48017c84b1b8SMarkus Armbruster .aiocb_size = sizeof(BlockAIOCBSync), 4802c16b5a2cSChristoph Hellwig }; 4803c16b5a2cSChristoph Hellwig 480483f64091Sbellard static void bdrv_aio_bh_cb(void *opaque) 4805beac80cdSbellard { 48067c84b1b8SMarkus Armbruster BlockAIOCBSync *acb = opaque; 4807f141eafeSaliguori 4808857d4f46SKevin Wolf if (!acb->is_write && acb->ret >= 0) { 480903396148SMichael Tokarev qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); 4810857d4f46SKevin Wolf } 4811ceb42de8Saliguori qemu_vfree(acb->bounce); 4812ce1a14dcSpbrook acb->common.cb(acb->common.opaque, acb->ret); 48136a7ad299SDor Laor qemu_bh_delete(acb->bh); 481436afc451SAvi Kivity acb->bh = NULL; 48158007429aSFam Zheng qemu_aio_unref(acb); 4816beac80cdSbellard } 4817beac80cdSbellard 48187c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, 4819f141eafeSaliguori int64_t sector_num, 4820f141eafeSaliguori QEMUIOVector *qiov, 4821f141eafeSaliguori int nb_sectors, 4822097310b5SMarkus Armbruster BlockCompletionFunc *cb, 4823f141eafeSaliguori void *opaque, 4824f141eafeSaliguori int is_write) 4825f141eafeSaliguori 4826ea2384d3Sbellard { 48277c84b1b8SMarkus Armbruster BlockAIOCBSync *acb; 482883f64091Sbellard 4829d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque); 4830f141eafeSaliguori acb->is_write = is_write; 4831f141eafeSaliguori acb->qiov = qiov; 4832857d4f46SKevin Wolf acb->bounce = qemu_try_blockalign(bs, qiov->size); 48332572b37aSStefan Hajnoczi acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb); 4834f141eafeSaliguori 4835857d4f46SKevin Wolf if (acb->bounce == NULL) { 4836857d4f46SKevin Wolf acb->ret = -ENOMEM; 4837857d4f46SKevin Wolf } else if (is_write) { 4838d5e6b161SMichael Tokarev qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); 48391ed20acfSStefan Hajnoczi acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors); 4840f141eafeSaliguori } else { 48411ed20acfSStefan Hajnoczi acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors); 4842f141eafeSaliguori } 4843f141eafeSaliguori 4844ce1a14dcSpbrook qemu_bh_schedule(acb->bh); 4845f141eafeSaliguori 4846ce1a14dcSpbrook return &acb->common; 48477a6cba61Spbrook } 48487a6cba61Spbrook 48497c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 4850f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 4851097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 485283f64091Sbellard { 4853f141eafeSaliguori return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 485483f64091Sbellard } 485583f64091Sbellard 48567c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 4857f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 4858097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4859f141eafeSaliguori { 4860f141eafeSaliguori return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 4861f141eafeSaliguori } 4862f141eafeSaliguori 486368485420SKevin Wolf 48647c84b1b8SMarkus Armbruster typedef struct BlockAIOCBCoroutine { 48657c84b1b8SMarkus Armbruster BlockAIOCB common; 486668485420SKevin Wolf BlockRequest req; 486768485420SKevin Wolf bool is_write; 48680b5a2445SPaolo Bonzini bool need_bh; 4869d318aea9SKevin Wolf bool *done; 487068485420SKevin Wolf QEMUBH* bh; 48717c84b1b8SMarkus Armbruster } BlockAIOCBCoroutine; 487268485420SKevin Wolf 4873d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_co_aiocb_info = { 48747c84b1b8SMarkus Armbruster .aiocb_size = sizeof(BlockAIOCBCoroutine), 487568485420SKevin Wolf }; 487668485420SKevin Wolf 48770b5a2445SPaolo Bonzini static void bdrv_co_complete(BlockAIOCBCoroutine *acb) 48780b5a2445SPaolo Bonzini { 48790b5a2445SPaolo Bonzini if (!acb->need_bh) { 48800b5a2445SPaolo Bonzini acb->common.cb(acb->common.opaque, acb->req.error); 48810b5a2445SPaolo Bonzini qemu_aio_unref(acb); 48820b5a2445SPaolo Bonzini } 48830b5a2445SPaolo Bonzini } 48840b5a2445SPaolo Bonzini 488535246a68SPaolo Bonzini static void bdrv_co_em_bh(void *opaque) 488668485420SKevin Wolf { 48877c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 488868485420SKevin Wolf 48890b5a2445SPaolo Bonzini assert(!acb->need_bh); 489068485420SKevin Wolf qemu_bh_delete(acb->bh); 48910b5a2445SPaolo Bonzini bdrv_co_complete(acb); 48920b5a2445SPaolo Bonzini } 48930b5a2445SPaolo Bonzini 48940b5a2445SPaolo Bonzini static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb) 48950b5a2445SPaolo Bonzini { 48960b5a2445SPaolo Bonzini acb->need_bh = false; 48970b5a2445SPaolo Bonzini if (acb->req.error != -EINPROGRESS) { 48980b5a2445SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 48990b5a2445SPaolo Bonzini 49000b5a2445SPaolo Bonzini acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); 49010b5a2445SPaolo Bonzini qemu_bh_schedule(acb->bh); 49020b5a2445SPaolo Bonzini } 490368485420SKevin Wolf } 490468485420SKevin Wolf 4905b2a61371SStefan Hajnoczi /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */ 4906b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque) 4907b2a61371SStefan Hajnoczi { 49087c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 4909b2a61371SStefan Hajnoczi BlockDriverState *bs = acb->common.bs; 4910b2a61371SStefan Hajnoczi 4911b2a61371SStefan Hajnoczi if (!acb->is_write) { 4912b2a61371SStefan Hajnoczi acb->req.error = bdrv_co_do_readv(bs, acb->req.sector, 4913d20d9b7cSPaolo Bonzini acb->req.nb_sectors, acb->req.qiov, acb->req.flags); 4914b2a61371SStefan Hajnoczi } else { 4915b2a61371SStefan Hajnoczi acb->req.error = bdrv_co_do_writev(bs, acb->req.sector, 4916d20d9b7cSPaolo Bonzini acb->req.nb_sectors, acb->req.qiov, acb->req.flags); 4917b2a61371SStefan Hajnoczi } 4918b2a61371SStefan Hajnoczi 49190b5a2445SPaolo Bonzini bdrv_co_complete(acb); 4920b2a61371SStefan Hajnoczi } 4921b2a61371SStefan Hajnoczi 49227c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 492368485420SKevin Wolf int64_t sector_num, 492468485420SKevin Wolf QEMUIOVector *qiov, 492568485420SKevin Wolf int nb_sectors, 4926d20d9b7cSPaolo Bonzini BdrvRequestFlags flags, 4927097310b5SMarkus Armbruster BlockCompletionFunc *cb, 492868485420SKevin Wolf void *opaque, 49298c5873d6SStefan Hajnoczi bool is_write) 493068485420SKevin Wolf { 493168485420SKevin Wolf Coroutine *co; 49327c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 493368485420SKevin Wolf 4934d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 49350b5a2445SPaolo Bonzini acb->need_bh = true; 49360b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 493768485420SKevin Wolf acb->req.sector = sector_num; 493868485420SKevin Wolf acb->req.nb_sectors = nb_sectors; 493968485420SKevin Wolf acb->req.qiov = qiov; 4940d20d9b7cSPaolo Bonzini acb->req.flags = flags; 494168485420SKevin Wolf acb->is_write = is_write; 494268485420SKevin Wolf 49438c5873d6SStefan Hajnoczi co = qemu_coroutine_create(bdrv_co_do_rw); 494468485420SKevin Wolf qemu_coroutine_enter(co, acb); 494568485420SKevin Wolf 49460b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 494768485420SKevin Wolf return &acb->common; 494868485420SKevin Wolf } 494968485420SKevin Wolf 495007f07615SPaolo Bonzini static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) 4951b2e12bc6SChristoph Hellwig { 49527c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 495307f07615SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 4954b2e12bc6SChristoph Hellwig 495507f07615SPaolo Bonzini acb->req.error = bdrv_co_flush(bs); 49560b5a2445SPaolo Bonzini bdrv_co_complete(acb); 4957b2e12bc6SChristoph Hellwig } 4958b2e12bc6SChristoph Hellwig 49597c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, 4960097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4961016f5cf6SAlexander Graf { 496207f07615SPaolo Bonzini trace_bdrv_aio_flush(bs, opaque); 4963016f5cf6SAlexander Graf 496407f07615SPaolo Bonzini Coroutine *co; 49657c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 4966016f5cf6SAlexander Graf 4967d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 49680b5a2445SPaolo Bonzini acb->need_bh = true; 49690b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 4970d318aea9SKevin Wolf 497107f07615SPaolo Bonzini co = qemu_coroutine_create(bdrv_aio_flush_co_entry); 497207f07615SPaolo Bonzini qemu_coroutine_enter(co, acb); 4973016f5cf6SAlexander Graf 49740b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 4975016f5cf6SAlexander Graf return &acb->common; 4976016f5cf6SAlexander Graf } 4977016f5cf6SAlexander Graf 49784265d620SPaolo Bonzini static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) 49794265d620SPaolo Bonzini { 49807c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 49814265d620SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 49824265d620SPaolo Bonzini 49834265d620SPaolo Bonzini acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors); 49840b5a2445SPaolo Bonzini bdrv_co_complete(acb); 49854265d620SPaolo Bonzini } 49864265d620SPaolo Bonzini 49877c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs, 49884265d620SPaolo Bonzini int64_t sector_num, int nb_sectors, 4989097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 49904265d620SPaolo Bonzini { 49914265d620SPaolo Bonzini Coroutine *co; 49927c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 49934265d620SPaolo Bonzini 49944265d620SPaolo Bonzini trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); 49954265d620SPaolo Bonzini 4996d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 49970b5a2445SPaolo Bonzini acb->need_bh = true; 49980b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 49994265d620SPaolo Bonzini acb->req.sector = sector_num; 50004265d620SPaolo Bonzini acb->req.nb_sectors = nb_sectors; 50014265d620SPaolo Bonzini co = qemu_coroutine_create(bdrv_aio_discard_co_entry); 50024265d620SPaolo Bonzini qemu_coroutine_enter(co, acb); 50034265d620SPaolo Bonzini 50040b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 50054265d620SPaolo Bonzini return &acb->common; 50064265d620SPaolo Bonzini } 50074265d620SPaolo Bonzini 5008ea2384d3Sbellard void bdrv_init(void) 5009ea2384d3Sbellard { 50105efa9d5aSAnthony Liguori module_call_init(MODULE_INIT_BLOCK); 5011ea2384d3Sbellard } 5012ce1a14dcSpbrook 5013eb852011SMarkus Armbruster void bdrv_init_with_whitelist(void) 5014eb852011SMarkus Armbruster { 5015eb852011SMarkus Armbruster use_bdrv_whitelist = 1; 5016eb852011SMarkus Armbruster bdrv_init(); 5017eb852011SMarkus Armbruster } 5018eb852011SMarkus Armbruster 5019d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 5020097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 50216bbff9a0Saliguori { 50227c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5023ce1a14dcSpbrook 5024d7331bedSStefan Hajnoczi acb = g_slice_alloc(aiocb_info->aiocb_size); 5025d7331bedSStefan Hajnoczi acb->aiocb_info = aiocb_info; 5026ce1a14dcSpbrook acb->bs = bs; 5027ce1a14dcSpbrook acb->cb = cb; 5028ce1a14dcSpbrook acb->opaque = opaque; 5029f197fe2bSFam Zheng acb->refcnt = 1; 5030ce1a14dcSpbrook return acb; 5031ce1a14dcSpbrook } 5032ce1a14dcSpbrook 5033f197fe2bSFam Zheng void qemu_aio_ref(void *p) 5034f197fe2bSFam Zheng { 50357c84b1b8SMarkus Armbruster BlockAIOCB *acb = p; 5036f197fe2bSFam Zheng acb->refcnt++; 5037f197fe2bSFam Zheng } 5038f197fe2bSFam Zheng 50398007429aSFam Zheng void qemu_aio_unref(void *p) 5040ce1a14dcSpbrook { 50417c84b1b8SMarkus Armbruster BlockAIOCB *acb = p; 5042f197fe2bSFam Zheng assert(acb->refcnt > 0); 5043f197fe2bSFam Zheng if (--acb->refcnt == 0) { 5044d7331bedSStefan Hajnoczi g_slice_free1(acb->aiocb_info->aiocb_size, acb); 5045ce1a14dcSpbrook } 5046f197fe2bSFam Zheng } 504719cb3738Sbellard 504819cb3738Sbellard /**************************************************************/ 5049f9f05dc5SKevin Wolf /* Coroutine block device emulation */ 5050f9f05dc5SKevin Wolf 5051f9f05dc5SKevin Wolf typedef struct CoroutineIOCompletion { 5052f9f05dc5SKevin Wolf Coroutine *coroutine; 5053f9f05dc5SKevin Wolf int ret; 5054f9f05dc5SKevin Wolf } CoroutineIOCompletion; 5055f9f05dc5SKevin Wolf 5056f9f05dc5SKevin Wolf static void bdrv_co_io_em_complete(void *opaque, int ret) 5057f9f05dc5SKevin Wolf { 5058f9f05dc5SKevin Wolf CoroutineIOCompletion *co = opaque; 5059f9f05dc5SKevin Wolf 5060f9f05dc5SKevin Wolf co->ret = ret; 5061f9f05dc5SKevin Wolf qemu_coroutine_enter(co->coroutine, NULL); 5062f9f05dc5SKevin Wolf } 5063f9f05dc5SKevin Wolf 5064f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, 5065f9f05dc5SKevin Wolf int nb_sectors, QEMUIOVector *iov, 5066f9f05dc5SKevin Wolf bool is_write) 5067f9f05dc5SKevin Wolf { 5068f9f05dc5SKevin Wolf CoroutineIOCompletion co = { 5069f9f05dc5SKevin Wolf .coroutine = qemu_coroutine_self(), 5070f9f05dc5SKevin Wolf }; 50717c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5072f9f05dc5SKevin Wolf 5073f9f05dc5SKevin Wolf if (is_write) { 5074a652d160SStefan Hajnoczi acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors, 5075f9f05dc5SKevin Wolf bdrv_co_io_em_complete, &co); 5076f9f05dc5SKevin Wolf } else { 5077a652d160SStefan Hajnoczi acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors, 5078f9f05dc5SKevin Wolf bdrv_co_io_em_complete, &co); 5079f9f05dc5SKevin Wolf } 5080f9f05dc5SKevin Wolf 508159370aaaSStefan Hajnoczi trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb); 5082f9f05dc5SKevin Wolf if (!acb) { 5083f9f05dc5SKevin Wolf return -EIO; 5084f9f05dc5SKevin Wolf } 5085f9f05dc5SKevin Wolf qemu_coroutine_yield(); 5086f9f05dc5SKevin Wolf 5087f9f05dc5SKevin Wolf return co.ret; 5088f9f05dc5SKevin Wolf } 5089f9f05dc5SKevin Wolf 5090f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 5091f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 5092f9f05dc5SKevin Wolf QEMUIOVector *iov) 5093f9f05dc5SKevin Wolf { 5094f9f05dc5SKevin Wolf return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); 5095f9f05dc5SKevin Wolf } 5096f9f05dc5SKevin Wolf 5097f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 5098f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 5099f9f05dc5SKevin Wolf QEMUIOVector *iov) 5100f9f05dc5SKevin Wolf { 5101f9f05dc5SKevin Wolf return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); 5102f9f05dc5SKevin Wolf } 5103f9f05dc5SKevin Wolf 510407f07615SPaolo Bonzini static void coroutine_fn bdrv_flush_co_entry(void *opaque) 5105e7a8a783SKevin Wolf { 510607f07615SPaolo Bonzini RwCo *rwco = opaque; 510707f07615SPaolo Bonzini 510807f07615SPaolo Bonzini rwco->ret = bdrv_co_flush(rwco->bs); 510907f07615SPaolo Bonzini } 511007f07615SPaolo Bonzini 511107f07615SPaolo Bonzini int coroutine_fn bdrv_co_flush(BlockDriverState *bs) 511207f07615SPaolo Bonzini { 5113eb489bb1SKevin Wolf int ret; 5114eb489bb1SKevin Wolf 511529cdb251SPaolo Bonzini if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { 511607f07615SPaolo Bonzini return 0; 5117eb489bb1SKevin Wolf } 5118eb489bb1SKevin Wolf 5119ca716364SKevin Wolf /* Write back cached data to the OS even with cache=unsafe */ 5120bf736fe3SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS); 5121eb489bb1SKevin Wolf if (bs->drv->bdrv_co_flush_to_os) { 5122eb489bb1SKevin Wolf ret = bs->drv->bdrv_co_flush_to_os(bs); 5123eb489bb1SKevin Wolf if (ret < 0) { 5124eb489bb1SKevin Wolf return ret; 5125eb489bb1SKevin Wolf } 5126eb489bb1SKevin Wolf } 5127eb489bb1SKevin Wolf 5128ca716364SKevin Wolf /* But don't actually force it to the disk with cache=unsafe */ 5129ca716364SKevin Wolf if (bs->open_flags & BDRV_O_NO_FLUSH) { 5130d4c82329SKevin Wolf goto flush_parent; 5131ca716364SKevin Wolf } 5132ca716364SKevin Wolf 5133bf736fe3SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK); 5134eb489bb1SKevin Wolf if (bs->drv->bdrv_co_flush_to_disk) { 513529cdb251SPaolo Bonzini ret = bs->drv->bdrv_co_flush_to_disk(bs); 513607f07615SPaolo Bonzini } else if (bs->drv->bdrv_aio_flush) { 51377c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5138e7a8a783SKevin Wolf CoroutineIOCompletion co = { 5139e7a8a783SKevin Wolf .coroutine = qemu_coroutine_self(), 5140e7a8a783SKevin Wolf }; 5141e7a8a783SKevin Wolf 514207f07615SPaolo Bonzini acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); 514307f07615SPaolo Bonzini if (acb == NULL) { 514429cdb251SPaolo Bonzini ret = -EIO; 514507f07615SPaolo Bonzini } else { 5146e7a8a783SKevin Wolf qemu_coroutine_yield(); 514729cdb251SPaolo Bonzini ret = co.ret; 5148e7a8a783SKevin Wolf } 514907f07615SPaolo Bonzini } else { 515007f07615SPaolo Bonzini /* 515107f07615SPaolo Bonzini * Some block drivers always operate in either writethrough or unsafe 515207f07615SPaolo Bonzini * mode and don't support bdrv_flush therefore. Usually qemu doesn't 515307f07615SPaolo Bonzini * know how the server works (because the behaviour is hardcoded or 515407f07615SPaolo Bonzini * depends on server-side configuration), so we can't ensure that 515507f07615SPaolo Bonzini * everything is safe on disk. Returning an error doesn't work because 515607f07615SPaolo Bonzini * that would break guests even if the server operates in writethrough 515707f07615SPaolo Bonzini * mode. 515807f07615SPaolo Bonzini * 515907f07615SPaolo Bonzini * Let's hope the user knows what he's doing. 516007f07615SPaolo Bonzini */ 516129cdb251SPaolo Bonzini ret = 0; 516207f07615SPaolo Bonzini } 516329cdb251SPaolo Bonzini if (ret < 0) { 516429cdb251SPaolo Bonzini return ret; 516529cdb251SPaolo Bonzini } 516629cdb251SPaolo Bonzini 516729cdb251SPaolo Bonzini /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH 516829cdb251SPaolo Bonzini * in the case of cache=unsafe, so there are no useless flushes. 516929cdb251SPaolo Bonzini */ 5170d4c82329SKevin Wolf flush_parent: 517129cdb251SPaolo Bonzini return bdrv_co_flush(bs->file); 517207f07615SPaolo Bonzini } 517307f07615SPaolo Bonzini 51745a8a30dbSKevin Wolf void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) 51750f15423cSAnthony Liguori { 51765a8a30dbSKevin Wolf Error *local_err = NULL; 51775a8a30dbSKevin Wolf int ret; 51785a8a30dbSKevin Wolf 51793456a8d1SKevin Wolf if (!bs->drv) { 51803456a8d1SKevin Wolf return; 51810f15423cSAnthony Liguori } 51823456a8d1SKevin Wolf 51837ea2d269SAlexey Kardashevskiy if (!(bs->open_flags & BDRV_O_INCOMING)) { 51847ea2d269SAlexey Kardashevskiy return; 51857ea2d269SAlexey Kardashevskiy } 51867ea2d269SAlexey Kardashevskiy bs->open_flags &= ~BDRV_O_INCOMING; 51877ea2d269SAlexey Kardashevskiy 51883456a8d1SKevin Wolf if (bs->drv->bdrv_invalidate_cache) { 51895a8a30dbSKevin Wolf bs->drv->bdrv_invalidate_cache(bs, &local_err); 51903456a8d1SKevin Wolf } else if (bs->file) { 51915a8a30dbSKevin Wolf bdrv_invalidate_cache(bs->file, &local_err); 51925a8a30dbSKevin Wolf } 51935a8a30dbSKevin Wolf if (local_err) { 51945a8a30dbSKevin Wolf error_propagate(errp, local_err); 51955a8a30dbSKevin Wolf return; 51963456a8d1SKevin Wolf } 51973456a8d1SKevin Wolf 51985a8a30dbSKevin Wolf ret = refresh_total_sectors(bs, bs->total_sectors); 51995a8a30dbSKevin Wolf if (ret < 0) { 52005a8a30dbSKevin Wolf error_setg_errno(errp, -ret, "Could not refresh total sector count"); 52015a8a30dbSKevin Wolf return; 52025a8a30dbSKevin Wolf } 52030f15423cSAnthony Liguori } 52040f15423cSAnthony Liguori 52055a8a30dbSKevin Wolf void bdrv_invalidate_cache_all(Error **errp) 52060f15423cSAnthony Liguori { 52070f15423cSAnthony Liguori BlockDriverState *bs; 52085a8a30dbSKevin Wolf Error *local_err = NULL; 52090f15423cSAnthony Liguori 5210dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 5211ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 5212ed78cda3SStefan Hajnoczi 5213ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 52145a8a30dbSKevin Wolf bdrv_invalidate_cache(bs, &local_err); 5215ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 52165a8a30dbSKevin Wolf if (local_err) { 52175a8a30dbSKevin Wolf error_propagate(errp, local_err); 52185a8a30dbSKevin Wolf return; 52195a8a30dbSKevin Wolf } 52200f15423cSAnthony Liguori } 52210f15423cSAnthony Liguori } 52220f15423cSAnthony Liguori 522307f07615SPaolo Bonzini int bdrv_flush(BlockDriverState *bs) 522407f07615SPaolo Bonzini { 522507f07615SPaolo Bonzini Coroutine *co; 522607f07615SPaolo Bonzini RwCo rwco = { 522707f07615SPaolo Bonzini .bs = bs, 522807f07615SPaolo Bonzini .ret = NOT_DONE, 522907f07615SPaolo Bonzini }; 523007f07615SPaolo Bonzini 523107f07615SPaolo Bonzini if (qemu_in_coroutine()) { 523207f07615SPaolo Bonzini /* Fast-path if already in coroutine context */ 523307f07615SPaolo Bonzini bdrv_flush_co_entry(&rwco); 523407f07615SPaolo Bonzini } else { 52352572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 52362572b37aSStefan Hajnoczi 523707f07615SPaolo Bonzini co = qemu_coroutine_create(bdrv_flush_co_entry); 523807f07615SPaolo Bonzini qemu_coroutine_enter(co, &rwco); 523907f07615SPaolo Bonzini while (rwco.ret == NOT_DONE) { 52402572b37aSStefan Hajnoczi aio_poll(aio_context, true); 524107f07615SPaolo Bonzini } 524207f07615SPaolo Bonzini } 524307f07615SPaolo Bonzini 524407f07615SPaolo Bonzini return rwco.ret; 524507f07615SPaolo Bonzini } 5246e7a8a783SKevin Wolf 5247775aa8b6SKevin Wolf typedef struct DiscardCo { 5248775aa8b6SKevin Wolf BlockDriverState *bs; 5249775aa8b6SKevin Wolf int64_t sector_num; 5250775aa8b6SKevin Wolf int nb_sectors; 5251775aa8b6SKevin Wolf int ret; 5252775aa8b6SKevin Wolf } DiscardCo; 52534265d620SPaolo Bonzini static void coroutine_fn bdrv_discard_co_entry(void *opaque) 52544265d620SPaolo Bonzini { 5255775aa8b6SKevin Wolf DiscardCo *rwco = opaque; 52564265d620SPaolo Bonzini 52574265d620SPaolo Bonzini rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); 52584265d620SPaolo Bonzini } 52594265d620SPaolo Bonzini 52604265d620SPaolo Bonzini int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, 52614265d620SPaolo Bonzini int nb_sectors) 52624265d620SPaolo Bonzini { 5263b9c64947SMax Reitz int max_discard, ret; 5264d51e9fe5SPaolo Bonzini 52654265d620SPaolo Bonzini if (!bs->drv) { 52664265d620SPaolo Bonzini return -ENOMEDIUM; 5267b9c64947SMax Reitz } 5268b9c64947SMax Reitz 5269b9c64947SMax Reitz ret = bdrv_check_request(bs, sector_num, nb_sectors); 5270b9c64947SMax Reitz if (ret < 0) { 5271b9c64947SMax Reitz return ret; 52724265d620SPaolo Bonzini } else if (bs->read_only) { 52734265d620SPaolo Bonzini return -EROFS; 5274df702c9bSPaolo Bonzini } 5275df702c9bSPaolo Bonzini 52768f0720ecSPaolo Bonzini bdrv_reset_dirty(bs, sector_num, nb_sectors); 5277df702c9bSPaolo Bonzini 52789e8f1835SPaolo Bonzini /* Do nothing if disabled. */ 52799e8f1835SPaolo Bonzini if (!(bs->open_flags & BDRV_O_UNMAP)) { 52809e8f1835SPaolo Bonzini return 0; 52819e8f1835SPaolo Bonzini } 52829e8f1835SPaolo Bonzini 5283d51e9fe5SPaolo Bonzini if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) { 5284d51e9fe5SPaolo Bonzini return 0; 5285d51e9fe5SPaolo Bonzini } 52866f14da52SPeter Lieven 528775af1f34SPeter Lieven max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS); 52886f14da52SPeter Lieven while (nb_sectors > 0) { 52896f14da52SPeter Lieven int ret; 52906f14da52SPeter Lieven int num = nb_sectors; 52916f14da52SPeter Lieven 52926f14da52SPeter Lieven /* align request */ 52936f14da52SPeter Lieven if (bs->bl.discard_alignment && 52946f14da52SPeter Lieven num >= bs->bl.discard_alignment && 52956f14da52SPeter Lieven sector_num % bs->bl.discard_alignment) { 52966f14da52SPeter Lieven if (num > bs->bl.discard_alignment) { 52976f14da52SPeter Lieven num = bs->bl.discard_alignment; 52986f14da52SPeter Lieven } 52996f14da52SPeter Lieven num -= sector_num % bs->bl.discard_alignment; 53006f14da52SPeter Lieven } 53016f14da52SPeter Lieven 53026f14da52SPeter Lieven /* limit request size */ 53036f14da52SPeter Lieven if (num > max_discard) { 53046f14da52SPeter Lieven num = max_discard; 53056f14da52SPeter Lieven } 53066f14da52SPeter Lieven 5307d51e9fe5SPaolo Bonzini if (bs->drv->bdrv_co_discard) { 53086f14da52SPeter Lieven ret = bs->drv->bdrv_co_discard(bs, sector_num, num); 5309d51e9fe5SPaolo Bonzini } else { 53107c84b1b8SMarkus Armbruster BlockAIOCB *acb; 53114265d620SPaolo Bonzini CoroutineIOCompletion co = { 53124265d620SPaolo Bonzini .coroutine = qemu_coroutine_self(), 53134265d620SPaolo Bonzini }; 53144265d620SPaolo Bonzini 53154265d620SPaolo Bonzini acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors, 53164265d620SPaolo Bonzini bdrv_co_io_em_complete, &co); 53174265d620SPaolo Bonzini if (acb == NULL) { 53184265d620SPaolo Bonzini return -EIO; 53194265d620SPaolo Bonzini } else { 53204265d620SPaolo Bonzini qemu_coroutine_yield(); 5321d51e9fe5SPaolo Bonzini ret = co.ret; 53224265d620SPaolo Bonzini } 5323d51e9fe5SPaolo Bonzini } 53247ce21016SPaolo Bonzini if (ret && ret != -ENOTSUP) { 5325d51e9fe5SPaolo Bonzini return ret; 5326d51e9fe5SPaolo Bonzini } 5327d51e9fe5SPaolo Bonzini 5328d51e9fe5SPaolo Bonzini sector_num += num; 5329d51e9fe5SPaolo Bonzini nb_sectors -= num; 5330d51e9fe5SPaolo Bonzini } 53314265d620SPaolo Bonzini return 0; 53324265d620SPaolo Bonzini } 53334265d620SPaolo Bonzini 53344265d620SPaolo Bonzini int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) 53354265d620SPaolo Bonzini { 53364265d620SPaolo Bonzini Coroutine *co; 5337775aa8b6SKevin Wolf DiscardCo rwco = { 53384265d620SPaolo Bonzini .bs = bs, 53394265d620SPaolo Bonzini .sector_num = sector_num, 53404265d620SPaolo Bonzini .nb_sectors = nb_sectors, 53414265d620SPaolo Bonzini .ret = NOT_DONE, 53424265d620SPaolo Bonzini }; 53434265d620SPaolo Bonzini 53444265d620SPaolo Bonzini if (qemu_in_coroutine()) { 53454265d620SPaolo Bonzini /* Fast-path if already in coroutine context */ 53464265d620SPaolo Bonzini bdrv_discard_co_entry(&rwco); 53474265d620SPaolo Bonzini } else { 53482572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 53492572b37aSStefan Hajnoczi 53504265d620SPaolo Bonzini co = qemu_coroutine_create(bdrv_discard_co_entry); 53514265d620SPaolo Bonzini qemu_coroutine_enter(co, &rwco); 53524265d620SPaolo Bonzini while (rwco.ret == NOT_DONE) { 53532572b37aSStefan Hajnoczi aio_poll(aio_context, true); 53544265d620SPaolo Bonzini } 53554265d620SPaolo Bonzini } 53564265d620SPaolo Bonzini 53574265d620SPaolo Bonzini return rwco.ret; 53584265d620SPaolo Bonzini } 53594265d620SPaolo Bonzini 5360f9f05dc5SKevin Wolf /**************************************************************/ 536119cb3738Sbellard /* removable device support */ 536219cb3738Sbellard 536319cb3738Sbellard /** 536419cb3738Sbellard * Return TRUE if the media is present 536519cb3738Sbellard */ 536619cb3738Sbellard int bdrv_is_inserted(BlockDriverState *bs) 536719cb3738Sbellard { 536819cb3738Sbellard BlockDriver *drv = bs->drv; 5369a1aff5bfSMarkus Armbruster 537019cb3738Sbellard if (!drv) 537119cb3738Sbellard return 0; 537219cb3738Sbellard if (!drv->bdrv_is_inserted) 5373a1aff5bfSMarkus Armbruster return 1; 5374a1aff5bfSMarkus Armbruster return drv->bdrv_is_inserted(bs); 537519cb3738Sbellard } 537619cb3738Sbellard 537719cb3738Sbellard /** 53788e49ca46SMarkus Armbruster * Return whether the media changed since the last call to this 53798e49ca46SMarkus Armbruster * function, or -ENOTSUP if we don't know. Most drivers don't know. 538019cb3738Sbellard */ 538119cb3738Sbellard int bdrv_media_changed(BlockDriverState *bs) 538219cb3738Sbellard { 538319cb3738Sbellard BlockDriver *drv = bs->drv; 538419cb3738Sbellard 53858e49ca46SMarkus Armbruster if (drv && drv->bdrv_media_changed) { 53868e49ca46SMarkus Armbruster return drv->bdrv_media_changed(bs); 53878e49ca46SMarkus Armbruster } 53888e49ca46SMarkus Armbruster return -ENOTSUP; 538919cb3738Sbellard } 539019cb3738Sbellard 539119cb3738Sbellard /** 539219cb3738Sbellard * If eject_flag is TRUE, eject the media. Otherwise, close the tray 539319cb3738Sbellard */ 5394f36f3949SLuiz Capitulino void bdrv_eject(BlockDriverState *bs, bool eject_flag) 539519cb3738Sbellard { 539619cb3738Sbellard BlockDriver *drv = bs->drv; 5397bfb197e0SMarkus Armbruster const char *device_name; 539819cb3738Sbellard 5399822e1cd1SMarkus Armbruster if (drv && drv->bdrv_eject) { 5400822e1cd1SMarkus Armbruster drv->bdrv_eject(bs, eject_flag); 540119cb3738Sbellard } 54026f382ed2SLuiz Capitulino 5403bfb197e0SMarkus Armbruster device_name = bdrv_get_device_name(bs); 5404bfb197e0SMarkus Armbruster if (device_name[0] != '\0') { 5405bfb197e0SMarkus Armbruster qapi_event_send_device_tray_moved(device_name, 5406a5ee7bd4SWenchao Xia eject_flag, &error_abort); 54076f382ed2SLuiz Capitulino } 540819cb3738Sbellard } 540919cb3738Sbellard 541019cb3738Sbellard /** 541119cb3738Sbellard * Lock or unlock the media (if it is locked, the user won't be able 541219cb3738Sbellard * to eject it manually). 541319cb3738Sbellard */ 5414025e849aSMarkus Armbruster void bdrv_lock_medium(BlockDriverState *bs, bool locked) 541519cb3738Sbellard { 541619cb3738Sbellard BlockDriver *drv = bs->drv; 541719cb3738Sbellard 5418025e849aSMarkus Armbruster trace_bdrv_lock_medium(bs, locked); 5419b8c6d095SStefan Hajnoczi 5420025e849aSMarkus Armbruster if (drv && drv->bdrv_lock_medium) { 5421025e849aSMarkus Armbruster drv->bdrv_lock_medium(bs, locked); 542219cb3738Sbellard } 542319cb3738Sbellard } 5424985a03b0Sths 5425985a03b0Sths /* needed for generic scsi interface */ 5426985a03b0Sths 5427985a03b0Sths int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) 5428985a03b0Sths { 5429985a03b0Sths BlockDriver *drv = bs->drv; 5430985a03b0Sths 5431985a03b0Sths if (drv && drv->bdrv_ioctl) 5432985a03b0Sths return drv->bdrv_ioctl(bs, req, buf); 5433985a03b0Sths return -ENOTSUP; 5434985a03b0Sths } 54357d780669Saliguori 54367c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, 5437221f715dSaliguori unsigned long int req, void *buf, 5438097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 54397d780669Saliguori { 5440221f715dSaliguori BlockDriver *drv = bs->drv; 54417d780669Saliguori 5442221f715dSaliguori if (drv && drv->bdrv_aio_ioctl) 5443221f715dSaliguori return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); 5444221f715dSaliguori return NULL; 54457d780669Saliguori } 5446e268ca52Saliguori 54471b7fd729SPaolo Bonzini void bdrv_set_guest_block_size(BlockDriverState *bs, int align) 54487b6f9300SMarkus Armbruster { 54491b7fd729SPaolo Bonzini bs->guest_block_size = align; 54507b6f9300SMarkus Armbruster } 54517cd1e32aSlirans@il.ibm.com 5452e268ca52Saliguori void *qemu_blockalign(BlockDriverState *bs, size_t size) 5453e268ca52Saliguori { 5454339064d5SKevin Wolf return qemu_memalign(bdrv_opt_mem_align(bs), size); 5455e268ca52Saliguori } 54567cd1e32aSlirans@il.ibm.com 54579ebd8448SMax Reitz void *qemu_blockalign0(BlockDriverState *bs, size_t size) 54589ebd8448SMax Reitz { 54599ebd8448SMax Reitz return memset(qemu_blockalign(bs, size), 0, size); 54609ebd8448SMax Reitz } 54619ebd8448SMax Reitz 54627d2a35ccSKevin Wolf void *qemu_try_blockalign(BlockDriverState *bs, size_t size) 54637d2a35ccSKevin Wolf { 54647d2a35ccSKevin Wolf size_t align = bdrv_opt_mem_align(bs); 54657d2a35ccSKevin Wolf 54667d2a35ccSKevin Wolf /* Ensure that NULL is never returned on success */ 54677d2a35ccSKevin Wolf assert(align > 0); 54687d2a35ccSKevin Wolf if (size == 0) { 54697d2a35ccSKevin Wolf size = align; 54707d2a35ccSKevin Wolf } 54717d2a35ccSKevin Wolf 54727d2a35ccSKevin Wolf return qemu_try_memalign(align, size); 54737d2a35ccSKevin Wolf } 54747d2a35ccSKevin Wolf 54759ebd8448SMax Reitz void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) 54769ebd8448SMax Reitz { 54779ebd8448SMax Reitz void *mem = qemu_try_blockalign(bs, size); 54789ebd8448SMax Reitz 54799ebd8448SMax Reitz if (mem) { 54809ebd8448SMax Reitz memset(mem, 0, size); 54819ebd8448SMax Reitz } 54829ebd8448SMax Reitz 54839ebd8448SMax Reitz return mem; 54849ebd8448SMax Reitz } 54859ebd8448SMax Reitz 5486c53b1c51SStefan Hajnoczi /* 5487c53b1c51SStefan Hajnoczi * Check if all memory in this vector is sector aligned. 5488c53b1c51SStefan Hajnoczi */ 5489c53b1c51SStefan Hajnoczi bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) 5490c53b1c51SStefan Hajnoczi { 5491c53b1c51SStefan Hajnoczi int i; 5492339064d5SKevin Wolf size_t alignment = bdrv_opt_mem_align(bs); 5493c53b1c51SStefan Hajnoczi 5494c53b1c51SStefan Hajnoczi for (i = 0; i < qiov->niov; i++) { 5495339064d5SKevin Wolf if ((uintptr_t) qiov->iov[i].iov_base % alignment) { 5496c53b1c51SStefan Hajnoczi return false; 5497c53b1c51SStefan Hajnoczi } 5498339064d5SKevin Wolf if (qiov->iov[i].iov_len % alignment) { 54991ff735bdSKevin Wolf return false; 55001ff735bdSKevin Wolf } 5501c53b1c51SStefan Hajnoczi } 5502c53b1c51SStefan Hajnoczi 5503c53b1c51SStefan Hajnoczi return true; 5504c53b1c51SStefan Hajnoczi } 5505c53b1c51SStefan Hajnoczi 55060db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) 55070db6e54aSFam Zheng { 55080db6e54aSFam Zheng BdrvDirtyBitmap *bm; 55090db6e54aSFam Zheng 55100db6e54aSFam Zheng assert(name); 55110db6e54aSFam Zheng QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 55120db6e54aSFam Zheng if (bm->name && !strcmp(name, bm->name)) { 55130db6e54aSFam Zheng return bm; 55140db6e54aSFam Zheng } 55150db6e54aSFam Zheng } 55160db6e54aSFam Zheng return NULL; 55170db6e54aSFam Zheng } 55180db6e54aSFam Zheng 55190db6e54aSFam Zheng void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 55200db6e54aSFam Zheng { 55210db6e54aSFam Zheng g_free(bitmap->name); 55220db6e54aSFam Zheng bitmap->name = NULL; 55230db6e54aSFam Zheng } 55240db6e54aSFam Zheng 55250db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, 55265fba6c0eSJohn Snow uint32_t granularity, 55270db6e54aSFam Zheng const char *name, 5528b8afb520SFam Zheng Error **errp) 55297cd1e32aSlirans@il.ibm.com { 55307cd1e32aSlirans@il.ibm.com int64_t bitmap_size; 5531e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 55325fba6c0eSJohn Snow uint32_t sector_granularity; 5533a55eb92cSJan Kiszka 553450717e94SPaolo Bonzini assert((granularity & (granularity - 1)) == 0); 553550717e94SPaolo Bonzini 55360db6e54aSFam Zheng if (name && bdrv_find_dirty_bitmap(bs, name)) { 55370db6e54aSFam Zheng error_setg(errp, "Bitmap already exists: %s", name); 55380db6e54aSFam Zheng return NULL; 55390db6e54aSFam Zheng } 55405fba6c0eSJohn Snow sector_granularity = granularity >> BDRV_SECTOR_BITS; 55415fba6c0eSJohn Snow assert(sector_granularity); 554257322b78SMarkus Armbruster bitmap_size = bdrv_nb_sectors(bs); 5543b8afb520SFam Zheng if (bitmap_size < 0) { 5544b8afb520SFam Zheng error_setg_errno(errp, -bitmap_size, "could not get length of device"); 5545b8afb520SFam Zheng errno = -bitmap_size; 5546b8afb520SFam Zheng return NULL; 5547b8afb520SFam Zheng } 55485839e53bSMarkus Armbruster bitmap = g_new0(BdrvDirtyBitmap, 1); 55495fba6c0eSJohn Snow bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); 55500db6e54aSFam Zheng bitmap->name = g_strdup(name); 5551*b8e6fb75SJohn Snow bitmap->disabled = false; 5552e4654d2dSFam Zheng QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); 5553e4654d2dSFam Zheng return bitmap; 5554e4654d2dSFam Zheng } 5555e4654d2dSFam Zheng 5556*b8e6fb75SJohn Snow bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) 5557*b8e6fb75SJohn Snow { 5558*b8e6fb75SJohn Snow return !bitmap->disabled; 5559*b8e6fb75SJohn Snow } 5560*b8e6fb75SJohn Snow 5561e4654d2dSFam Zheng void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 5562e4654d2dSFam Zheng { 5563e4654d2dSFam Zheng BdrvDirtyBitmap *bm, *next; 5564e4654d2dSFam Zheng QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { 5565e4654d2dSFam Zheng if (bm == bitmap) { 5566e4654d2dSFam Zheng QLIST_REMOVE(bitmap, list); 5567e4654d2dSFam Zheng hbitmap_free(bitmap->bitmap); 55680db6e54aSFam Zheng g_free(bitmap->name); 5569e4654d2dSFam Zheng g_free(bitmap); 5570e4654d2dSFam Zheng return; 55717cd1e32aSlirans@il.ibm.com } 55727cd1e32aSlirans@il.ibm.com } 55737cd1e32aSlirans@il.ibm.com } 55747cd1e32aSlirans@il.ibm.com 5575*b8e6fb75SJohn Snow void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 5576*b8e6fb75SJohn Snow { 5577*b8e6fb75SJohn Snow bitmap->disabled = true; 5578*b8e6fb75SJohn Snow } 5579*b8e6fb75SJohn Snow 5580*b8e6fb75SJohn Snow void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 5581*b8e6fb75SJohn Snow { 5582*b8e6fb75SJohn Snow bitmap->disabled = false; 5583*b8e6fb75SJohn Snow } 5584*b8e6fb75SJohn Snow 558521b56835SFam Zheng BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) 558621b56835SFam Zheng { 558721b56835SFam Zheng BdrvDirtyBitmap *bm; 558821b56835SFam Zheng BlockDirtyInfoList *list = NULL; 558921b56835SFam Zheng BlockDirtyInfoList **plist = &list; 559021b56835SFam Zheng 559121b56835SFam Zheng QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 55925839e53bSMarkus Armbruster BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); 55935839e53bSMarkus Armbruster BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); 559421b56835SFam Zheng info->count = bdrv_get_dirty_count(bs, bm); 5595592fdd02SJohn Snow info->granularity = bdrv_dirty_bitmap_granularity(bm); 55960db6e54aSFam Zheng info->has_name = !!bm->name; 55970db6e54aSFam Zheng info->name = g_strdup(bm->name); 559821b56835SFam Zheng entry->value = info; 559921b56835SFam Zheng *plist = entry; 560021b56835SFam Zheng plist = &entry->next; 560121b56835SFam Zheng } 560221b56835SFam Zheng 560321b56835SFam Zheng return list; 560421b56835SFam Zheng } 560521b56835SFam Zheng 5606e4654d2dSFam Zheng int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) 56077cd1e32aSlirans@il.ibm.com { 5608e4654d2dSFam Zheng if (bitmap) { 5609e4654d2dSFam Zheng return hbitmap_get(bitmap->bitmap, sector); 56107cd1e32aSlirans@il.ibm.com } else { 56117cd1e32aSlirans@il.ibm.com return 0; 56127cd1e32aSlirans@il.ibm.com } 56137cd1e32aSlirans@il.ibm.com } 56147cd1e32aSlirans@il.ibm.com 5615341ebc2fSJohn Snow /** 5616341ebc2fSJohn Snow * Chooses a default granularity based on the existing cluster size, 5617341ebc2fSJohn Snow * but clamped between [4K, 64K]. Defaults to 64K in the case that there 5618341ebc2fSJohn Snow * is no cluster size information available. 5619341ebc2fSJohn Snow */ 5620341ebc2fSJohn Snow uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) 5621341ebc2fSJohn Snow { 5622341ebc2fSJohn Snow BlockDriverInfo bdi; 5623341ebc2fSJohn Snow uint32_t granularity; 5624341ebc2fSJohn Snow 5625341ebc2fSJohn Snow if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { 5626341ebc2fSJohn Snow granularity = MAX(4096, bdi.cluster_size); 5627341ebc2fSJohn Snow granularity = MIN(65536, granularity); 5628341ebc2fSJohn Snow } else { 5629341ebc2fSJohn Snow granularity = 65536; 5630341ebc2fSJohn Snow } 5631341ebc2fSJohn Snow 5632341ebc2fSJohn Snow return granularity; 5633341ebc2fSJohn Snow } 5634341ebc2fSJohn Snow 5635592fdd02SJohn Snow uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) 5636592fdd02SJohn Snow { 5637592fdd02SJohn Snow return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); 5638592fdd02SJohn Snow } 5639592fdd02SJohn Snow 5640e4654d2dSFam Zheng void bdrv_dirty_iter_init(BlockDriverState *bs, 5641e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) 56421755da16SPaolo Bonzini { 5643e4654d2dSFam Zheng hbitmap_iter_init(hbi, bitmap->bitmap, 0); 56441755da16SPaolo Bonzini } 56451755da16SPaolo Bonzini 5646c4237dfaSVladimir Sementsov-Ogievskiy void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, 5647c4237dfaSVladimir Sementsov-Ogievskiy int64_t cur_sector, int nr_sectors) 5648c4237dfaSVladimir Sementsov-Ogievskiy { 5649*b8e6fb75SJohn Snow assert(bdrv_dirty_bitmap_enabled(bitmap)); 5650c4237dfaSVladimir Sementsov-Ogievskiy hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 5651c4237dfaSVladimir Sementsov-Ogievskiy } 5652c4237dfaSVladimir Sementsov-Ogievskiy 5653c4237dfaSVladimir Sementsov-Ogievskiy void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, 5654c4237dfaSVladimir Sementsov-Ogievskiy int64_t cur_sector, int nr_sectors) 5655c4237dfaSVladimir Sementsov-Ogievskiy { 5656*b8e6fb75SJohn Snow assert(bdrv_dirty_bitmap_enabled(bitmap)); 5657c4237dfaSVladimir Sementsov-Ogievskiy hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 5658c4237dfaSVladimir Sementsov-Ogievskiy } 5659c4237dfaSVladimir Sementsov-Ogievskiy 5660c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 56611755da16SPaolo Bonzini int nr_sectors) 56621755da16SPaolo Bonzini { 5663e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 5664e4654d2dSFam Zheng QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 5665*b8e6fb75SJohn Snow if (!bdrv_dirty_bitmap_enabled(bitmap)) { 5666*b8e6fb75SJohn Snow continue; 5667*b8e6fb75SJohn Snow } 5668e4654d2dSFam Zheng hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 5669e4654d2dSFam Zheng } 56701755da16SPaolo Bonzini } 56711755da16SPaolo Bonzini 5672c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 5673c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors) 56747cd1e32aSlirans@il.ibm.com { 5675e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 5676e4654d2dSFam Zheng QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 5677*b8e6fb75SJohn Snow if (!bdrv_dirty_bitmap_enabled(bitmap)) { 5678*b8e6fb75SJohn Snow continue; 5679*b8e6fb75SJohn Snow } 5680e4654d2dSFam Zheng hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 5681e4654d2dSFam Zheng } 56827cd1e32aSlirans@il.ibm.com } 5683aaa0eb75SLiran Schour 5684e4654d2dSFam Zheng int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 5685aaa0eb75SLiran Schour { 5686e4654d2dSFam Zheng return hbitmap_count(bitmap->bitmap); 5687aaa0eb75SLiran Schour } 5688f88e1a42SJes Sorensen 56899fcb0251SFam Zheng /* Get a reference to bs */ 56909fcb0251SFam Zheng void bdrv_ref(BlockDriverState *bs) 56919fcb0251SFam Zheng { 56929fcb0251SFam Zheng bs->refcnt++; 56939fcb0251SFam Zheng } 56949fcb0251SFam Zheng 56959fcb0251SFam Zheng /* Release a previously grabbed reference to bs. 56969fcb0251SFam Zheng * If after releasing, reference count is zero, the BlockDriverState is 56979fcb0251SFam Zheng * deleted. */ 56989fcb0251SFam Zheng void bdrv_unref(BlockDriverState *bs) 56999fcb0251SFam Zheng { 57009a4d5ca6SJeff Cody if (!bs) { 57019a4d5ca6SJeff Cody return; 57029a4d5ca6SJeff Cody } 57039fcb0251SFam Zheng assert(bs->refcnt > 0); 57049fcb0251SFam Zheng if (--bs->refcnt == 0) { 57059fcb0251SFam Zheng bdrv_delete(bs); 57069fcb0251SFam Zheng } 57079fcb0251SFam Zheng } 57089fcb0251SFam Zheng 5709fbe40ff7SFam Zheng struct BdrvOpBlocker { 5710fbe40ff7SFam Zheng Error *reason; 5711fbe40ff7SFam Zheng QLIST_ENTRY(BdrvOpBlocker) list; 5712fbe40ff7SFam Zheng }; 5713fbe40ff7SFam Zheng 5714fbe40ff7SFam Zheng bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 5715fbe40ff7SFam Zheng { 5716fbe40ff7SFam Zheng BdrvOpBlocker *blocker; 5717fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5718fbe40ff7SFam Zheng if (!QLIST_EMPTY(&bs->op_blockers[op])) { 5719fbe40ff7SFam Zheng blocker = QLIST_FIRST(&bs->op_blockers[op]); 5720fbe40ff7SFam Zheng if (errp) { 572181e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is busy: %s", 572281e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs), 5723bfb197e0SMarkus Armbruster error_get_pretty(blocker->reason)); 5724fbe40ff7SFam Zheng } 5725fbe40ff7SFam Zheng return true; 5726fbe40ff7SFam Zheng } 5727fbe40ff7SFam Zheng return false; 5728fbe40ff7SFam Zheng } 5729fbe40ff7SFam Zheng 5730fbe40ff7SFam Zheng void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 5731fbe40ff7SFam Zheng { 5732fbe40ff7SFam Zheng BdrvOpBlocker *blocker; 5733fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5734fbe40ff7SFam Zheng 57355839e53bSMarkus Armbruster blocker = g_new0(BdrvOpBlocker, 1); 5736fbe40ff7SFam Zheng blocker->reason = reason; 5737fbe40ff7SFam Zheng QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 5738fbe40ff7SFam Zheng } 5739fbe40ff7SFam Zheng 5740fbe40ff7SFam Zheng void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 5741fbe40ff7SFam Zheng { 5742fbe40ff7SFam Zheng BdrvOpBlocker *blocker, *next; 5743fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5744fbe40ff7SFam Zheng QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 5745fbe40ff7SFam Zheng if (blocker->reason == reason) { 5746fbe40ff7SFam Zheng QLIST_REMOVE(blocker, list); 5747fbe40ff7SFam Zheng g_free(blocker); 5748fbe40ff7SFam Zheng } 5749fbe40ff7SFam Zheng } 5750fbe40ff7SFam Zheng } 5751fbe40ff7SFam Zheng 5752fbe40ff7SFam Zheng void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 5753fbe40ff7SFam Zheng { 5754fbe40ff7SFam Zheng int i; 5755fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5756fbe40ff7SFam Zheng bdrv_op_block(bs, i, reason); 5757fbe40ff7SFam Zheng } 5758fbe40ff7SFam Zheng } 5759fbe40ff7SFam Zheng 5760fbe40ff7SFam Zheng void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 5761fbe40ff7SFam Zheng { 5762fbe40ff7SFam Zheng int i; 5763fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5764fbe40ff7SFam Zheng bdrv_op_unblock(bs, i, reason); 5765fbe40ff7SFam Zheng } 5766fbe40ff7SFam Zheng } 5767fbe40ff7SFam Zheng 5768fbe40ff7SFam Zheng bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 5769fbe40ff7SFam Zheng { 5770fbe40ff7SFam Zheng int i; 5771fbe40ff7SFam Zheng 5772fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5773fbe40ff7SFam Zheng if (!QLIST_EMPTY(&bs->op_blockers[i])) { 5774fbe40ff7SFam Zheng return false; 5775fbe40ff7SFam Zheng } 5776fbe40ff7SFam Zheng } 5777fbe40ff7SFam Zheng return true; 5778fbe40ff7SFam Zheng } 5779fbe40ff7SFam Zheng 578028a7282aSLuiz Capitulino void bdrv_iostatus_enable(BlockDriverState *bs) 578128a7282aSLuiz Capitulino { 5782d6bf279eSLuiz Capitulino bs->iostatus_enabled = true; 578358e21ef5SLuiz Capitulino bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 578428a7282aSLuiz Capitulino } 578528a7282aSLuiz Capitulino 578628a7282aSLuiz Capitulino /* The I/O status is only enabled if the drive explicitly 578728a7282aSLuiz Capitulino * enables it _and_ the VM is configured to stop on errors */ 578828a7282aSLuiz Capitulino bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) 578928a7282aSLuiz Capitulino { 5790d6bf279eSLuiz Capitulino return (bs->iostatus_enabled && 579192aa5c6dSPaolo Bonzini (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || 579292aa5c6dSPaolo Bonzini bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || 579392aa5c6dSPaolo Bonzini bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); 579428a7282aSLuiz Capitulino } 579528a7282aSLuiz Capitulino 579628a7282aSLuiz Capitulino void bdrv_iostatus_disable(BlockDriverState *bs) 579728a7282aSLuiz Capitulino { 5798d6bf279eSLuiz Capitulino bs->iostatus_enabled = false; 579928a7282aSLuiz Capitulino } 580028a7282aSLuiz Capitulino 580128a7282aSLuiz Capitulino void bdrv_iostatus_reset(BlockDriverState *bs) 580228a7282aSLuiz Capitulino { 580328a7282aSLuiz Capitulino if (bdrv_iostatus_is_enabled(bs)) { 580458e21ef5SLuiz Capitulino bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 58053bd293c3SPaolo Bonzini if (bs->job) { 58063bd293c3SPaolo Bonzini block_job_iostatus_reset(bs->job); 58073bd293c3SPaolo Bonzini } 580828a7282aSLuiz Capitulino } 580928a7282aSLuiz Capitulino } 581028a7282aSLuiz Capitulino 581128a7282aSLuiz Capitulino void bdrv_iostatus_set_err(BlockDriverState *bs, int error) 581228a7282aSLuiz Capitulino { 58133e1caa5fSPaolo Bonzini assert(bdrv_iostatus_is_enabled(bs)); 58143e1caa5fSPaolo Bonzini if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 581558e21ef5SLuiz Capitulino bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 581658e21ef5SLuiz Capitulino BLOCK_DEVICE_IO_STATUS_FAILED; 581728a7282aSLuiz Capitulino } 581828a7282aSLuiz Capitulino } 581928a7282aSLuiz Capitulino 5820d92ada22SLuiz Capitulino void bdrv_img_create(const char *filename, const char *fmt, 5821f88e1a42SJes Sorensen const char *base_filename, const char *base_fmt, 5822f382d43aSMiroslav Rezanina char *options, uint64_t img_size, int flags, 5823f382d43aSMiroslav Rezanina Error **errp, bool quiet) 5824f88e1a42SJes Sorensen { 582583d0521aSChunyan Liu QemuOptsList *create_opts = NULL; 582683d0521aSChunyan Liu QemuOpts *opts = NULL; 582783d0521aSChunyan Liu const char *backing_fmt, *backing_file; 582883d0521aSChunyan Liu int64_t size; 5829f88e1a42SJes Sorensen BlockDriver *drv, *proto_drv; 583096df67d1SStefan Hajnoczi BlockDriver *backing_drv = NULL; 5831cc84d90fSMax Reitz Error *local_err = NULL; 5832f88e1a42SJes Sorensen int ret = 0; 5833f88e1a42SJes Sorensen 5834f88e1a42SJes Sorensen /* Find driver and parse its options */ 5835f88e1a42SJes Sorensen drv = bdrv_find_format(fmt); 5836f88e1a42SJes Sorensen if (!drv) { 583771c79813SLuiz Capitulino error_setg(errp, "Unknown file format '%s'", fmt); 5838d92ada22SLuiz Capitulino return; 5839f88e1a42SJes Sorensen } 5840f88e1a42SJes Sorensen 5841b65a5e12SMax Reitz proto_drv = bdrv_find_protocol(filename, true, errp); 5842f88e1a42SJes Sorensen if (!proto_drv) { 5843d92ada22SLuiz Capitulino return; 5844f88e1a42SJes Sorensen } 5845f88e1a42SJes Sorensen 5846c6149724SMax Reitz if (!drv->create_opts) { 5847c6149724SMax Reitz error_setg(errp, "Format driver '%s' does not support image creation", 5848c6149724SMax Reitz drv->format_name); 5849c6149724SMax Reitz return; 5850c6149724SMax Reitz } 5851c6149724SMax Reitz 5852c6149724SMax Reitz if (!proto_drv->create_opts) { 5853c6149724SMax Reitz error_setg(errp, "Protocol driver '%s' does not support image creation", 5854c6149724SMax Reitz proto_drv->format_name); 5855c6149724SMax Reitz return; 5856c6149724SMax Reitz } 5857c6149724SMax Reitz 5858c282e1fdSChunyan Liu create_opts = qemu_opts_append(create_opts, drv->create_opts); 5859c282e1fdSChunyan Liu create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 5860f88e1a42SJes Sorensen 5861f88e1a42SJes Sorensen /* Create parameter list with default values */ 586283d0521aSChunyan Liu opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 586339101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 5864f88e1a42SJes Sorensen 5865f88e1a42SJes Sorensen /* Parse -o options */ 5866f88e1a42SJes Sorensen if (options) { 5867dc523cd3SMarkus Armbruster qemu_opts_do_parse(opts, options, NULL, &local_err); 5868dc523cd3SMarkus Armbruster if (local_err) { 5869dc523cd3SMarkus Armbruster error_report_err(local_err); 5870dc523cd3SMarkus Armbruster local_err = NULL; 587183d0521aSChunyan Liu error_setg(errp, "Invalid options for file format '%s'", fmt); 5872f88e1a42SJes Sorensen goto out; 5873f88e1a42SJes Sorensen } 5874f88e1a42SJes Sorensen } 5875f88e1a42SJes Sorensen 5876f88e1a42SJes Sorensen if (base_filename) { 5877f43e47dbSMarkus Armbruster qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err); 58786be4194bSMarkus Armbruster if (local_err) { 587971c79813SLuiz Capitulino error_setg(errp, "Backing file not supported for file format '%s'", 588071c79813SLuiz Capitulino fmt); 5881f88e1a42SJes Sorensen goto out; 5882f88e1a42SJes Sorensen } 5883f88e1a42SJes Sorensen } 5884f88e1a42SJes Sorensen 5885f88e1a42SJes Sorensen if (base_fmt) { 5886f43e47dbSMarkus Armbruster qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err); 58876be4194bSMarkus Armbruster if (local_err) { 588871c79813SLuiz Capitulino error_setg(errp, "Backing file format not supported for file " 588971c79813SLuiz Capitulino "format '%s'", fmt); 5890f88e1a42SJes Sorensen goto out; 5891f88e1a42SJes Sorensen } 5892f88e1a42SJes Sorensen } 5893f88e1a42SJes Sorensen 589483d0521aSChunyan Liu backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 589583d0521aSChunyan Liu if (backing_file) { 589683d0521aSChunyan Liu if (!strcmp(filename, backing_file)) { 589771c79813SLuiz Capitulino error_setg(errp, "Error: Trying to create an image with the " 589871c79813SLuiz Capitulino "same filename as the backing file"); 5899792da93aSJes Sorensen goto out; 5900792da93aSJes Sorensen } 5901792da93aSJes Sorensen } 5902792da93aSJes Sorensen 590383d0521aSChunyan Liu backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 590483d0521aSChunyan Liu if (backing_fmt) { 590583d0521aSChunyan Liu backing_drv = bdrv_find_format(backing_fmt); 590696df67d1SStefan Hajnoczi if (!backing_drv) { 590771c79813SLuiz Capitulino error_setg(errp, "Unknown backing file format '%s'", 590883d0521aSChunyan Liu backing_fmt); 5909f88e1a42SJes Sorensen goto out; 5910f88e1a42SJes Sorensen } 5911f88e1a42SJes Sorensen } 5912f88e1a42SJes Sorensen 5913f88e1a42SJes Sorensen // The size for the image must always be specified, with one exception: 5914f88e1a42SJes Sorensen // If we are using a backing file, we can obtain the size from there 591583d0521aSChunyan Liu size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 591683d0521aSChunyan Liu if (size == -1) { 591783d0521aSChunyan Liu if (backing_file) { 591866f6b814SMax Reitz BlockDriverState *bs; 591929168018SMax Reitz char *full_backing = g_new0(char, PATH_MAX); 592052bf1e72SMarkus Armbruster int64_t size; 592163090dacSPaolo Bonzini int back_flags; 592263090dacSPaolo Bonzini 592329168018SMax Reitz bdrv_get_full_backing_filename_from_filename(filename, backing_file, 592429168018SMax Reitz full_backing, PATH_MAX, 592529168018SMax Reitz &local_err); 592629168018SMax Reitz if (local_err) { 592729168018SMax Reitz g_free(full_backing); 592829168018SMax Reitz goto out; 592929168018SMax Reitz } 593029168018SMax Reitz 593163090dacSPaolo Bonzini /* backing files always opened read-only */ 593263090dacSPaolo Bonzini back_flags = 593363090dacSPaolo Bonzini flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 5934f88e1a42SJes Sorensen 5935f67503e5SMax Reitz bs = NULL; 593629168018SMax Reitz ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags, 5937cc84d90fSMax Reitz backing_drv, &local_err); 593829168018SMax Reitz g_free(full_backing); 5939f88e1a42SJes Sorensen if (ret < 0) { 5940f88e1a42SJes Sorensen goto out; 5941f88e1a42SJes Sorensen } 594252bf1e72SMarkus Armbruster size = bdrv_getlength(bs); 594352bf1e72SMarkus Armbruster if (size < 0) { 594452bf1e72SMarkus Armbruster error_setg_errno(errp, -size, "Could not get size of '%s'", 594552bf1e72SMarkus Armbruster backing_file); 594652bf1e72SMarkus Armbruster bdrv_unref(bs); 594752bf1e72SMarkus Armbruster goto out; 594852bf1e72SMarkus Armbruster } 5949f88e1a42SJes Sorensen 595039101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 595166f6b814SMax Reitz 595266f6b814SMax Reitz bdrv_unref(bs); 5953f88e1a42SJes Sorensen } else { 595471c79813SLuiz Capitulino error_setg(errp, "Image creation needs a size parameter"); 5955f88e1a42SJes Sorensen goto out; 5956f88e1a42SJes Sorensen } 5957f88e1a42SJes Sorensen } 5958f88e1a42SJes Sorensen 5959f382d43aSMiroslav Rezanina if (!quiet) { 5960f88e1a42SJes Sorensen printf("Formatting '%s', fmt=%s", filename, fmt); 596143c5d8f8SFam Zheng qemu_opts_print(opts, " "); 5962f88e1a42SJes Sorensen puts(""); 5963f382d43aSMiroslav Rezanina } 596483d0521aSChunyan Liu 5965c282e1fdSChunyan Liu ret = bdrv_create(drv, filename, opts, &local_err); 596683d0521aSChunyan Liu 5967cc84d90fSMax Reitz if (ret == -EFBIG) { 5968cc84d90fSMax Reitz /* This is generally a better message than whatever the driver would 5969cc84d90fSMax Reitz * deliver (especially because of the cluster_size_hint), since that 5970cc84d90fSMax Reitz * is most probably not much different from "image too large". */ 5971f3f4d2c0SKevin Wolf const char *cluster_size_hint = ""; 597283d0521aSChunyan Liu if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 5973f3f4d2c0SKevin Wolf cluster_size_hint = " (try using a larger cluster size)"; 5974f3f4d2c0SKevin Wolf } 5975cc84d90fSMax Reitz error_setg(errp, "The image size is too large for file format '%s'" 5976cc84d90fSMax Reitz "%s", fmt, cluster_size_hint); 5977cc84d90fSMax Reitz error_free(local_err); 5978cc84d90fSMax Reitz local_err = NULL; 5979f88e1a42SJes Sorensen } 5980f88e1a42SJes Sorensen 5981f88e1a42SJes Sorensen out: 598283d0521aSChunyan Liu qemu_opts_del(opts); 598383d0521aSChunyan Liu qemu_opts_free(create_opts); 598484d18f06SMarkus Armbruster if (local_err) { 5985cc84d90fSMax Reitz error_propagate(errp, local_err); 5986cc84d90fSMax Reitz } 5987f88e1a42SJes Sorensen } 598885d126f3SStefan Hajnoczi 598985d126f3SStefan Hajnoczi AioContext *bdrv_get_aio_context(BlockDriverState *bs) 599085d126f3SStefan Hajnoczi { 5991dcd04228SStefan Hajnoczi return bs->aio_context; 5992dcd04228SStefan Hajnoczi } 5993dcd04228SStefan Hajnoczi 5994dcd04228SStefan Hajnoczi void bdrv_detach_aio_context(BlockDriverState *bs) 5995dcd04228SStefan Hajnoczi { 599633384421SMax Reitz BdrvAioNotifier *baf; 599733384421SMax Reitz 5998dcd04228SStefan Hajnoczi if (!bs->drv) { 5999dcd04228SStefan Hajnoczi return; 6000dcd04228SStefan Hajnoczi } 6001dcd04228SStefan Hajnoczi 600233384421SMax Reitz QLIST_FOREACH(baf, &bs->aio_notifiers, list) { 600333384421SMax Reitz baf->detach_aio_context(baf->opaque); 600433384421SMax Reitz } 600533384421SMax Reitz 600613af91ebSStefan Hajnoczi if (bs->io_limits_enabled) { 600713af91ebSStefan Hajnoczi throttle_detach_aio_context(&bs->throttle_state); 600813af91ebSStefan Hajnoczi } 6009dcd04228SStefan Hajnoczi if (bs->drv->bdrv_detach_aio_context) { 6010dcd04228SStefan Hajnoczi bs->drv->bdrv_detach_aio_context(bs); 6011dcd04228SStefan Hajnoczi } 6012dcd04228SStefan Hajnoczi if (bs->file) { 6013dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs->file); 6014dcd04228SStefan Hajnoczi } 6015dcd04228SStefan Hajnoczi if (bs->backing_hd) { 6016dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs->backing_hd); 6017dcd04228SStefan Hajnoczi } 6018dcd04228SStefan Hajnoczi 6019dcd04228SStefan Hajnoczi bs->aio_context = NULL; 6020dcd04228SStefan Hajnoczi } 6021dcd04228SStefan Hajnoczi 6022dcd04228SStefan Hajnoczi void bdrv_attach_aio_context(BlockDriverState *bs, 6023dcd04228SStefan Hajnoczi AioContext *new_context) 6024dcd04228SStefan Hajnoczi { 602533384421SMax Reitz BdrvAioNotifier *ban; 602633384421SMax Reitz 6027dcd04228SStefan Hajnoczi if (!bs->drv) { 6028dcd04228SStefan Hajnoczi return; 6029dcd04228SStefan Hajnoczi } 6030dcd04228SStefan Hajnoczi 6031dcd04228SStefan Hajnoczi bs->aio_context = new_context; 6032dcd04228SStefan Hajnoczi 6033dcd04228SStefan Hajnoczi if (bs->backing_hd) { 6034dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs->backing_hd, new_context); 6035dcd04228SStefan Hajnoczi } 6036dcd04228SStefan Hajnoczi if (bs->file) { 6037dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs->file, new_context); 6038dcd04228SStefan Hajnoczi } 6039dcd04228SStefan Hajnoczi if (bs->drv->bdrv_attach_aio_context) { 6040dcd04228SStefan Hajnoczi bs->drv->bdrv_attach_aio_context(bs, new_context); 6041dcd04228SStefan Hajnoczi } 604213af91ebSStefan Hajnoczi if (bs->io_limits_enabled) { 604313af91ebSStefan Hajnoczi throttle_attach_aio_context(&bs->throttle_state, new_context); 604413af91ebSStefan Hajnoczi } 604533384421SMax Reitz 604633384421SMax Reitz QLIST_FOREACH(ban, &bs->aio_notifiers, list) { 604733384421SMax Reitz ban->attached_aio_context(new_context, ban->opaque); 604833384421SMax Reitz } 6049dcd04228SStefan Hajnoczi } 6050dcd04228SStefan Hajnoczi 6051dcd04228SStefan Hajnoczi void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) 6052dcd04228SStefan Hajnoczi { 6053dcd04228SStefan Hajnoczi bdrv_drain_all(); /* ensure there are no in-flight requests */ 6054dcd04228SStefan Hajnoczi 6055dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs); 6056dcd04228SStefan Hajnoczi 6057dcd04228SStefan Hajnoczi /* This function executes in the old AioContext so acquire the new one in 6058dcd04228SStefan Hajnoczi * case it runs in a different thread. 6059dcd04228SStefan Hajnoczi */ 6060dcd04228SStefan Hajnoczi aio_context_acquire(new_context); 6061dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs, new_context); 6062dcd04228SStefan Hajnoczi aio_context_release(new_context); 606385d126f3SStefan Hajnoczi } 6064d616b224SStefan Hajnoczi 606533384421SMax Reitz void bdrv_add_aio_context_notifier(BlockDriverState *bs, 606633384421SMax Reitz void (*attached_aio_context)(AioContext *new_context, void *opaque), 606733384421SMax Reitz void (*detach_aio_context)(void *opaque), void *opaque) 606833384421SMax Reitz { 606933384421SMax Reitz BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 607033384421SMax Reitz *ban = (BdrvAioNotifier){ 607133384421SMax Reitz .attached_aio_context = attached_aio_context, 607233384421SMax Reitz .detach_aio_context = detach_aio_context, 607333384421SMax Reitz .opaque = opaque 607433384421SMax Reitz }; 607533384421SMax Reitz 607633384421SMax Reitz QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 607733384421SMax Reitz } 607833384421SMax Reitz 607933384421SMax Reitz void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 608033384421SMax Reitz void (*attached_aio_context)(AioContext *, 608133384421SMax Reitz void *), 608233384421SMax Reitz void (*detach_aio_context)(void *), 608333384421SMax Reitz void *opaque) 608433384421SMax Reitz { 608533384421SMax Reitz BdrvAioNotifier *ban, *ban_next; 608633384421SMax Reitz 608733384421SMax Reitz QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 608833384421SMax Reitz if (ban->attached_aio_context == attached_aio_context && 608933384421SMax Reitz ban->detach_aio_context == detach_aio_context && 609033384421SMax Reitz ban->opaque == opaque) 609133384421SMax Reitz { 609233384421SMax Reitz QLIST_REMOVE(ban, list); 609333384421SMax Reitz g_free(ban); 609433384421SMax Reitz 609533384421SMax Reitz return; 609633384421SMax Reitz } 609733384421SMax Reitz } 609833384421SMax Reitz 609933384421SMax Reitz abort(); 610033384421SMax Reitz } 610133384421SMax Reitz 6102d616b224SStefan Hajnoczi void bdrv_add_before_write_notifier(BlockDriverState *bs, 6103d616b224SStefan Hajnoczi NotifierWithReturn *notifier) 6104d616b224SStefan Hajnoczi { 6105d616b224SStefan Hajnoczi notifier_with_return_list_add(&bs->before_write_notifiers, notifier); 6106d616b224SStefan Hajnoczi } 61076f176b48SMax Reitz 610877485434SMax Reitz int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 610977485434SMax Reitz BlockDriverAmendStatusCB *status_cb) 61106f176b48SMax Reitz { 6111c282e1fdSChunyan Liu if (!bs->drv->bdrv_amend_options) { 61126f176b48SMax Reitz return -ENOTSUP; 61136f176b48SMax Reitz } 611477485434SMax Reitz return bs->drv->bdrv_amend_options(bs, opts, status_cb); 61156f176b48SMax Reitz } 6116f6186f49SBenoît Canet 6117b5042a36SBenoît Canet /* This function will be called by the bdrv_recurse_is_first_non_filter method 6118b5042a36SBenoît Canet * of block filter and by bdrv_is_first_non_filter. 6119b5042a36SBenoît Canet * It is used to test if the given bs is the candidate or recurse more in the 6120b5042a36SBenoît Canet * node graph. 6121212a5a8fSBenoît Canet */ 6122212a5a8fSBenoît Canet bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, 6123212a5a8fSBenoît Canet BlockDriverState *candidate) 6124f6186f49SBenoît Canet { 6125b5042a36SBenoît Canet /* return false if basic checks fails */ 6126b5042a36SBenoît Canet if (!bs || !bs->drv) { 6127b5042a36SBenoît Canet return false; 6128b5042a36SBenoît Canet } 6129b5042a36SBenoît Canet 6130b5042a36SBenoît Canet /* the code reached a non block filter driver -> check if the bs is 6131b5042a36SBenoît Canet * the same as the candidate. It's the recursion termination condition. 6132b5042a36SBenoît Canet */ 6133b5042a36SBenoît Canet if (!bs->drv->is_filter) { 6134b5042a36SBenoît Canet return bs == candidate; 6135b5042a36SBenoît Canet } 6136b5042a36SBenoît Canet /* Down this path the driver is a block filter driver */ 6137b5042a36SBenoît Canet 6138b5042a36SBenoît Canet /* If the block filter recursion method is defined use it to recurse down 6139b5042a36SBenoît Canet * the node graph. 6140b5042a36SBenoît Canet */ 6141b5042a36SBenoît Canet if (bs->drv->bdrv_recurse_is_first_non_filter) { 6142212a5a8fSBenoît Canet return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); 6143212a5a8fSBenoît Canet } 6144212a5a8fSBenoît Canet 6145b5042a36SBenoît Canet /* the driver is a block filter but don't allow to recurse -> return false 6146b5042a36SBenoît Canet */ 6147b5042a36SBenoît Canet return false; 6148212a5a8fSBenoît Canet } 6149212a5a8fSBenoît Canet 6150212a5a8fSBenoît Canet /* This function checks if the candidate is the first non filter bs down it's 6151212a5a8fSBenoît Canet * bs chain. Since we don't have pointers to parents it explore all bs chains 6152212a5a8fSBenoît Canet * from the top. Some filters can choose not to pass down the recursion. 6153212a5a8fSBenoît Canet */ 6154212a5a8fSBenoît Canet bool bdrv_is_first_non_filter(BlockDriverState *candidate) 6155212a5a8fSBenoît Canet { 6156212a5a8fSBenoît Canet BlockDriverState *bs; 6157212a5a8fSBenoît Canet 6158212a5a8fSBenoît Canet /* walk down the bs forest recursively */ 6159212a5a8fSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 6160212a5a8fSBenoît Canet bool perm; 6161212a5a8fSBenoît Canet 6162b5042a36SBenoît Canet /* try to recurse in this top level bs */ 6163e6dc8a1fSKevin Wolf perm = bdrv_recurse_is_first_non_filter(bs, candidate); 6164212a5a8fSBenoît Canet 6165212a5a8fSBenoît Canet /* candidate is the first non filter */ 6166212a5a8fSBenoît Canet if (perm) { 6167212a5a8fSBenoît Canet return true; 6168212a5a8fSBenoît Canet } 6169212a5a8fSBenoît Canet } 6170212a5a8fSBenoît Canet 6171212a5a8fSBenoît Canet return false; 6172f6186f49SBenoît Canet } 617309158f00SBenoît Canet 617409158f00SBenoît Canet BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) 617509158f00SBenoît Canet { 617609158f00SBenoît Canet BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 61775a7e7a0bSStefan Hajnoczi AioContext *aio_context; 61785a7e7a0bSStefan Hajnoczi 617909158f00SBenoît Canet if (!to_replace_bs) { 618009158f00SBenoît Canet error_setg(errp, "Node name '%s' not found", node_name); 618109158f00SBenoît Canet return NULL; 618209158f00SBenoît Canet } 618309158f00SBenoît Canet 61845a7e7a0bSStefan Hajnoczi aio_context = bdrv_get_aio_context(to_replace_bs); 61855a7e7a0bSStefan Hajnoczi aio_context_acquire(aio_context); 61865a7e7a0bSStefan Hajnoczi 618709158f00SBenoît Canet if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 61885a7e7a0bSStefan Hajnoczi to_replace_bs = NULL; 61895a7e7a0bSStefan Hajnoczi goto out; 619009158f00SBenoît Canet } 619109158f00SBenoît Canet 619209158f00SBenoît Canet /* We don't want arbitrary node of the BDS chain to be replaced only the top 619309158f00SBenoît Canet * most non filter in order to prevent data corruption. 619409158f00SBenoît Canet * Another benefit is that this tests exclude backing files which are 619509158f00SBenoît Canet * blocked by the backing blockers. 619609158f00SBenoît Canet */ 619709158f00SBenoît Canet if (!bdrv_is_first_non_filter(to_replace_bs)) { 619809158f00SBenoît Canet error_setg(errp, "Only top most non filter can be replaced"); 61995a7e7a0bSStefan Hajnoczi to_replace_bs = NULL; 62005a7e7a0bSStefan Hajnoczi goto out; 620109158f00SBenoît Canet } 620209158f00SBenoît Canet 62035a7e7a0bSStefan Hajnoczi out: 62045a7e7a0bSStefan Hajnoczi aio_context_release(aio_context); 620509158f00SBenoît Canet return to_replace_bs; 620609158f00SBenoît Canet } 6207448ad91dSMing Lei 6208448ad91dSMing Lei void bdrv_io_plug(BlockDriverState *bs) 6209448ad91dSMing Lei { 6210448ad91dSMing Lei BlockDriver *drv = bs->drv; 6211448ad91dSMing Lei if (drv && drv->bdrv_io_plug) { 6212448ad91dSMing Lei drv->bdrv_io_plug(bs); 6213448ad91dSMing Lei } else if (bs->file) { 6214448ad91dSMing Lei bdrv_io_plug(bs->file); 6215448ad91dSMing Lei } 6216448ad91dSMing Lei } 6217448ad91dSMing Lei 6218448ad91dSMing Lei void bdrv_io_unplug(BlockDriverState *bs) 6219448ad91dSMing Lei { 6220448ad91dSMing Lei BlockDriver *drv = bs->drv; 6221448ad91dSMing Lei if (drv && drv->bdrv_io_unplug) { 6222448ad91dSMing Lei drv->bdrv_io_unplug(bs); 6223448ad91dSMing Lei } else if (bs->file) { 6224448ad91dSMing Lei bdrv_io_unplug(bs->file); 6225448ad91dSMing Lei } 6226448ad91dSMing Lei } 6227448ad91dSMing Lei 6228448ad91dSMing Lei void bdrv_flush_io_queue(BlockDriverState *bs) 6229448ad91dSMing Lei { 6230448ad91dSMing Lei BlockDriver *drv = bs->drv; 6231448ad91dSMing Lei if (drv && drv->bdrv_flush_io_queue) { 6232448ad91dSMing Lei drv->bdrv_flush_io_queue(bs); 6233448ad91dSMing Lei } else if (bs->file) { 6234448ad91dSMing Lei bdrv_flush_io_queue(bs->file); 6235448ad91dSMing Lei } 6236448ad91dSMing Lei } 623791af7014SMax Reitz 623891af7014SMax Reitz static bool append_open_options(QDict *d, BlockDriverState *bs) 623991af7014SMax Reitz { 624091af7014SMax Reitz const QDictEntry *entry; 624191af7014SMax Reitz bool found_any = false; 624291af7014SMax Reitz 624391af7014SMax Reitz for (entry = qdict_first(bs->options); entry; 624491af7014SMax Reitz entry = qdict_next(bs->options, entry)) 624591af7014SMax Reitz { 624691af7014SMax Reitz /* Only take options for this level and exclude all non-driver-specific 624791af7014SMax Reitz * options */ 624891af7014SMax Reitz if (!strchr(qdict_entry_key(entry), '.') && 624991af7014SMax Reitz strcmp(qdict_entry_key(entry), "node-name")) 625091af7014SMax Reitz { 625191af7014SMax Reitz qobject_incref(qdict_entry_value(entry)); 625291af7014SMax Reitz qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry)); 625391af7014SMax Reitz found_any = true; 625491af7014SMax Reitz } 625591af7014SMax Reitz } 625691af7014SMax Reitz 625791af7014SMax Reitz return found_any; 625891af7014SMax Reitz } 625991af7014SMax Reitz 626091af7014SMax Reitz /* Updates the following BDS fields: 626191af7014SMax Reitz * - exact_filename: A filename which may be used for opening a block device 626291af7014SMax Reitz * which (mostly) equals the given BDS (even without any 626391af7014SMax Reitz * other options; so reading and writing must return the same 626491af7014SMax Reitz * results, but caching etc. may be different) 626591af7014SMax Reitz * - full_open_options: Options which, when given when opening a block device 626691af7014SMax Reitz * (without a filename), result in a BDS (mostly) 626791af7014SMax Reitz * equalling the given one 626891af7014SMax Reitz * - filename: If exact_filename is set, it is copied here. Otherwise, 626991af7014SMax Reitz * full_open_options is converted to a JSON object, prefixed with 627091af7014SMax Reitz * "json:" (for use through the JSON pseudo protocol) and put here. 627191af7014SMax Reitz */ 627291af7014SMax Reitz void bdrv_refresh_filename(BlockDriverState *bs) 627391af7014SMax Reitz { 627491af7014SMax Reitz BlockDriver *drv = bs->drv; 627591af7014SMax Reitz QDict *opts; 627691af7014SMax Reitz 627791af7014SMax Reitz if (!drv) { 627891af7014SMax Reitz return; 627991af7014SMax Reitz } 628091af7014SMax Reitz 628191af7014SMax Reitz /* This BDS's file name will most probably depend on its file's name, so 628291af7014SMax Reitz * refresh that first */ 628391af7014SMax Reitz if (bs->file) { 628491af7014SMax Reitz bdrv_refresh_filename(bs->file); 628591af7014SMax Reitz } 628691af7014SMax Reitz 628791af7014SMax Reitz if (drv->bdrv_refresh_filename) { 628891af7014SMax Reitz /* Obsolete information is of no use here, so drop the old file name 628991af7014SMax Reitz * information before refreshing it */ 629091af7014SMax Reitz bs->exact_filename[0] = '\0'; 629191af7014SMax Reitz if (bs->full_open_options) { 629291af7014SMax Reitz QDECREF(bs->full_open_options); 629391af7014SMax Reitz bs->full_open_options = NULL; 629491af7014SMax Reitz } 629591af7014SMax Reitz 629691af7014SMax Reitz drv->bdrv_refresh_filename(bs); 629791af7014SMax Reitz } else if (bs->file) { 629891af7014SMax Reitz /* Try to reconstruct valid information from the underlying file */ 629991af7014SMax Reitz bool has_open_options; 630091af7014SMax Reitz 630191af7014SMax Reitz bs->exact_filename[0] = '\0'; 630291af7014SMax Reitz if (bs->full_open_options) { 630391af7014SMax Reitz QDECREF(bs->full_open_options); 630491af7014SMax Reitz bs->full_open_options = NULL; 630591af7014SMax Reitz } 630691af7014SMax Reitz 630791af7014SMax Reitz opts = qdict_new(); 630891af7014SMax Reitz has_open_options = append_open_options(opts, bs); 630991af7014SMax Reitz 631091af7014SMax Reitz /* If no specific options have been given for this BDS, the filename of 631191af7014SMax Reitz * the underlying file should suffice for this one as well */ 631291af7014SMax Reitz if (bs->file->exact_filename[0] && !has_open_options) { 631391af7014SMax Reitz strcpy(bs->exact_filename, bs->file->exact_filename); 631491af7014SMax Reitz } 631591af7014SMax Reitz /* Reconstructing the full options QDict is simple for most format block 631691af7014SMax Reitz * drivers, as long as the full options are known for the underlying 631791af7014SMax Reitz * file BDS. The full options QDict of that file BDS should somehow 631891af7014SMax Reitz * contain a representation of the filename, therefore the following 631991af7014SMax Reitz * suffices without querying the (exact_)filename of this BDS. */ 632091af7014SMax Reitz if (bs->file->full_open_options) { 632191af7014SMax Reitz qdict_put_obj(opts, "driver", 632291af7014SMax Reitz QOBJECT(qstring_from_str(drv->format_name))); 632391af7014SMax Reitz QINCREF(bs->file->full_open_options); 632491af7014SMax Reitz qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options)); 632591af7014SMax Reitz 632691af7014SMax Reitz bs->full_open_options = opts; 632791af7014SMax Reitz } else { 632891af7014SMax Reitz QDECREF(opts); 632991af7014SMax Reitz } 633091af7014SMax Reitz } else if (!bs->full_open_options && qdict_size(bs->options)) { 633191af7014SMax Reitz /* There is no underlying file BDS (at least referenced by BDS.file), 633291af7014SMax Reitz * so the full options QDict should be equal to the options given 633391af7014SMax Reitz * specifically for this block device when it was opened (plus the 633491af7014SMax Reitz * driver specification). 633591af7014SMax Reitz * Because those options don't change, there is no need to update 633691af7014SMax Reitz * full_open_options when it's already set. */ 633791af7014SMax Reitz 633891af7014SMax Reitz opts = qdict_new(); 633991af7014SMax Reitz append_open_options(opts, bs); 634091af7014SMax Reitz qdict_put_obj(opts, "driver", 634191af7014SMax Reitz QOBJECT(qstring_from_str(drv->format_name))); 634291af7014SMax Reitz 634391af7014SMax Reitz if (bs->exact_filename[0]) { 634491af7014SMax Reitz /* This may not work for all block protocol drivers (some may 634591af7014SMax Reitz * require this filename to be parsed), but we have to find some 634691af7014SMax Reitz * default solution here, so just include it. If some block driver 634791af7014SMax Reitz * does not support pure options without any filename at all or 634891af7014SMax Reitz * needs some special format of the options QDict, it needs to 634991af7014SMax Reitz * implement the driver-specific bdrv_refresh_filename() function. 635091af7014SMax Reitz */ 635191af7014SMax Reitz qdict_put_obj(opts, "filename", 635291af7014SMax Reitz QOBJECT(qstring_from_str(bs->exact_filename))); 635391af7014SMax Reitz } 635491af7014SMax Reitz 635591af7014SMax Reitz bs->full_open_options = opts; 635691af7014SMax Reitz } 635791af7014SMax Reitz 635891af7014SMax Reitz if (bs->exact_filename[0]) { 635991af7014SMax Reitz pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 636091af7014SMax Reitz } else if (bs->full_open_options) { 636191af7014SMax Reitz QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 636291af7014SMax Reitz snprintf(bs->filename, sizeof(bs->filename), "json:%s", 636391af7014SMax Reitz qstring_get_str(json)); 636491af7014SMax Reitz QDECREF(json); 636591af7014SMax Reitz } 636691af7014SMax Reitz } 63675366d0c8SBenoît Canet 63685366d0c8SBenoît Canet /* This accessor function purpose is to allow the device models to access the 63695366d0c8SBenoît Canet * BlockAcctStats structure embedded inside a BlockDriverState without being 63705366d0c8SBenoît Canet * aware of the BlockDriverState structure layout. 63715366d0c8SBenoît Canet * It will go away when the BlockAcctStats structure will be moved inside 63725366d0c8SBenoît Canet * the device models. 63735366d0c8SBenoît Canet */ 63745366d0c8SBenoît Canet BlockAcctStats *bdrv_get_stats(BlockDriverState *bs) 63755366d0c8SBenoît Canet { 63765366d0c8SBenoît Canet return &bs->stats; 63775366d0c8SBenoît Canet } 6378