1fc01f7e7Sbellard /* 2fc01f7e7Sbellard * QEMU System Emulator block driver 3fc01f7e7Sbellard * 4fc01f7e7Sbellard * Copyright (c) 2003 Fabrice Bellard 5fc01f7e7Sbellard * 6fc01f7e7Sbellard * Permission is hereby granted, free of charge, to any person obtaining a copy 7fc01f7e7Sbellard * of this software and associated documentation files (the "Software"), to deal 8fc01f7e7Sbellard * in the Software without restriction, including without limitation the rights 9fc01f7e7Sbellard * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10fc01f7e7Sbellard * copies of the Software, and to permit persons to whom the Software is 11fc01f7e7Sbellard * furnished to do so, subject to the following conditions: 12fc01f7e7Sbellard * 13fc01f7e7Sbellard * The above copyright notice and this permission notice shall be included in 14fc01f7e7Sbellard * all copies or substantial portions of the Software. 15fc01f7e7Sbellard * 16fc01f7e7Sbellard * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17fc01f7e7Sbellard * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18fc01f7e7Sbellard * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19fc01f7e7Sbellard * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20fc01f7e7Sbellard * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21fc01f7e7Sbellard * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22fc01f7e7Sbellard * THE SOFTWARE. 23fc01f7e7Sbellard */ 243990d09aSblueswir1 #include "config-host.h" 25faf07963Spbrook #include "qemu-common.h" 266d519a5fSStefan Hajnoczi #include "trace.h" 27737e150eSPaolo Bonzini #include "block/block_int.h" 28737e150eSPaolo Bonzini #include "block/blockjob.h" 291de7afc9SPaolo Bonzini #include "qemu/module.h" 307b1b5d19SPaolo Bonzini #include "qapi/qmp/qjson.h" 31bfb197e0SMarkus Armbruster #include "sysemu/block-backend.h" 329c17d615SPaolo Bonzini #include "sysemu/sysemu.h" 33de50a20aSFam Zheng #include "sysemu/qtest.h" 341de7afc9SPaolo Bonzini #include "qemu/notify.h" 35737e150eSPaolo Bonzini #include "block/coroutine.h" 36c13163fbSBenoît Canet #include "block/qapi.h" 37b2023818SLuiz Capitulino #include "qmp-commands.h" 381de7afc9SPaolo Bonzini #include "qemu/timer.h" 39a5ee7bd4SWenchao Xia #include "qapi-event.h" 40fc01f7e7Sbellard 4171e72a19SJuan Quintela #ifdef CONFIG_BSD 427674e7bfSbellard #include <sys/types.h> 437674e7bfSbellard #include <sys/stat.h> 447674e7bfSbellard #include <sys/ioctl.h> 4572cf2d4fSBlue Swirl #include <sys/queue.h> 46c5e97233Sblueswir1 #ifndef __DragonFly__ 477674e7bfSbellard #include <sys/disk.h> 487674e7bfSbellard #endif 49c5e97233Sblueswir1 #endif 507674e7bfSbellard 5149dc768dSaliguori #ifdef _WIN32 5249dc768dSaliguori #include <windows.h> 5349dc768dSaliguori #endif 5449dc768dSaliguori 55e4654d2dSFam Zheng struct BdrvDirtyBitmap { 56e4654d2dSFam Zheng HBitmap *bitmap; 570db6e54aSFam Zheng char *name; 58e4654d2dSFam Zheng QLIST_ENTRY(BdrvDirtyBitmap) list; 59e4654d2dSFam Zheng }; 60e4654d2dSFam Zheng 611c9805a3SStefan Hajnoczi #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 621c9805a3SStefan Hajnoczi 637c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 64f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 65097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 667c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 67f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 68097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 69f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 70f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 71f9f05dc5SKevin Wolf QEMUIOVector *iov); 72f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 73f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 74f9f05dc5SKevin Wolf QEMUIOVector *iov); 75775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, 76775aa8b6SKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 77470c0504SStefan Hajnoczi BdrvRequestFlags flags); 78775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, 79775aa8b6SKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 80f08f2ddaSStefan Hajnoczi BdrvRequestFlags flags); 817c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 82b2a61371SStefan Hajnoczi int64_t sector_num, 83b2a61371SStefan Hajnoczi QEMUIOVector *qiov, 84b2a61371SStefan Hajnoczi int nb_sectors, 85d20d9b7cSPaolo Bonzini BdrvRequestFlags flags, 86097310b5SMarkus Armbruster BlockCompletionFunc *cb, 87b2a61371SStefan Hajnoczi void *opaque, 888c5873d6SStefan Hajnoczi bool is_write); 89b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque); 90621f0589SKevin Wolf static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, 91aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); 92ec530c81Sbellard 931b7bdbc1SStefan Hajnoczi static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 941b7bdbc1SStefan Hajnoczi QTAILQ_HEAD_INITIALIZER(bdrv_states); 957ee930d0Sblueswir1 96dc364f4cSBenoît Canet static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 97dc364f4cSBenoît Canet QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 98dc364f4cSBenoît Canet 998a22f02aSStefan Hajnoczi static QLIST_HEAD(, BlockDriver) bdrv_drivers = 1008a22f02aSStefan Hajnoczi QLIST_HEAD_INITIALIZER(bdrv_drivers); 101ea2384d3Sbellard 102c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 103c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors); 104c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 105c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors); 106eb852011SMarkus Armbruster /* If non-zero, use only whitelisted block drivers */ 107eb852011SMarkus Armbruster static int use_bdrv_whitelist; 108eb852011SMarkus Armbruster 1099e0b22f4SStefan Hajnoczi #ifdef _WIN32 1109e0b22f4SStefan Hajnoczi static int is_windows_drive_prefix(const char *filename) 1119e0b22f4SStefan Hajnoczi { 1129e0b22f4SStefan Hajnoczi return (((filename[0] >= 'a' && filename[0] <= 'z') || 1139e0b22f4SStefan Hajnoczi (filename[0] >= 'A' && filename[0] <= 'Z')) && 1149e0b22f4SStefan Hajnoczi filename[1] == ':'); 1159e0b22f4SStefan Hajnoczi } 1169e0b22f4SStefan Hajnoczi 1179e0b22f4SStefan Hajnoczi int is_windows_drive(const char *filename) 1189e0b22f4SStefan Hajnoczi { 1199e0b22f4SStefan Hajnoczi if (is_windows_drive_prefix(filename) && 1209e0b22f4SStefan Hajnoczi filename[2] == '\0') 1219e0b22f4SStefan Hajnoczi return 1; 1229e0b22f4SStefan Hajnoczi if (strstart(filename, "\\\\.\\", NULL) || 1239e0b22f4SStefan Hajnoczi strstart(filename, "//./", NULL)) 1249e0b22f4SStefan Hajnoczi return 1; 1259e0b22f4SStefan Hajnoczi return 0; 1269e0b22f4SStefan Hajnoczi } 1279e0b22f4SStefan Hajnoczi #endif 1289e0b22f4SStefan Hajnoczi 1290563e191SZhi Yong Wu /* throttling disk I/O limits */ 130cc0681c4SBenoît Canet void bdrv_set_io_limits(BlockDriverState *bs, 131cc0681c4SBenoît Canet ThrottleConfig *cfg) 132cc0681c4SBenoît Canet { 133cc0681c4SBenoît Canet int i; 134cc0681c4SBenoît Canet 135cc0681c4SBenoît Canet throttle_config(&bs->throttle_state, cfg); 136cc0681c4SBenoît Canet 137cc0681c4SBenoît Canet for (i = 0; i < 2; i++) { 138cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[i]); 139cc0681c4SBenoît Canet } 140cc0681c4SBenoît Canet } 141cc0681c4SBenoît Canet 142cc0681c4SBenoît Canet /* this function drain all the throttled IOs */ 143cc0681c4SBenoît Canet static bool bdrv_start_throttled_reqs(BlockDriverState *bs) 144cc0681c4SBenoît Canet { 145cc0681c4SBenoît Canet bool drained = false; 146cc0681c4SBenoît Canet bool enabled = bs->io_limits_enabled; 147cc0681c4SBenoît Canet int i; 148cc0681c4SBenoît Canet 149cc0681c4SBenoît Canet bs->io_limits_enabled = false; 150cc0681c4SBenoît Canet 151cc0681c4SBenoît Canet for (i = 0; i < 2; i++) { 152cc0681c4SBenoît Canet while (qemu_co_enter_next(&bs->throttled_reqs[i])) { 153cc0681c4SBenoît Canet drained = true; 154cc0681c4SBenoît Canet } 155cc0681c4SBenoît Canet } 156cc0681c4SBenoît Canet 157cc0681c4SBenoît Canet bs->io_limits_enabled = enabled; 158cc0681c4SBenoît Canet 159cc0681c4SBenoît Canet return drained; 160cc0681c4SBenoît Canet } 161cc0681c4SBenoît Canet 16298f90dbaSZhi Yong Wu void bdrv_io_limits_disable(BlockDriverState *bs) 16398f90dbaSZhi Yong Wu { 16498f90dbaSZhi Yong Wu bs->io_limits_enabled = false; 16598f90dbaSZhi Yong Wu 166cc0681c4SBenoît Canet bdrv_start_throttled_reqs(bs); 16798f90dbaSZhi Yong Wu 168cc0681c4SBenoît Canet throttle_destroy(&bs->throttle_state); 16998f90dbaSZhi Yong Wu } 17098f90dbaSZhi Yong Wu 171cc0681c4SBenoît Canet static void bdrv_throttle_read_timer_cb(void *opaque) 1720563e191SZhi Yong Wu { 1730563e191SZhi Yong Wu BlockDriverState *bs = opaque; 174cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[0]); 1750563e191SZhi Yong Wu } 1760563e191SZhi Yong Wu 177cc0681c4SBenoît Canet static void bdrv_throttle_write_timer_cb(void *opaque) 178cc0681c4SBenoît Canet { 179cc0681c4SBenoît Canet BlockDriverState *bs = opaque; 180cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[1]); 181cc0681c4SBenoît Canet } 182cc0681c4SBenoît Canet 183cc0681c4SBenoît Canet /* should be called before bdrv_set_io_limits if a limit is set */ 1840563e191SZhi Yong Wu void bdrv_io_limits_enable(BlockDriverState *bs) 1850563e191SZhi Yong Wu { 186de50a20aSFam Zheng int clock_type = QEMU_CLOCK_REALTIME; 187de50a20aSFam Zheng 188de50a20aSFam Zheng if (qtest_enabled()) { 189de50a20aSFam Zheng /* For testing block IO throttling only */ 190de50a20aSFam Zheng clock_type = QEMU_CLOCK_VIRTUAL; 191de50a20aSFam Zheng } 192cc0681c4SBenoît Canet assert(!bs->io_limits_enabled); 193cc0681c4SBenoît Canet throttle_init(&bs->throttle_state, 19413af91ebSStefan Hajnoczi bdrv_get_aio_context(bs), 195de50a20aSFam Zheng clock_type, 196cc0681c4SBenoît Canet bdrv_throttle_read_timer_cb, 197cc0681c4SBenoît Canet bdrv_throttle_write_timer_cb, 198cc0681c4SBenoît Canet bs); 1990563e191SZhi Yong Wu bs->io_limits_enabled = true; 2000563e191SZhi Yong Wu } 2010563e191SZhi Yong Wu 202cc0681c4SBenoît Canet /* This function makes an IO wait if needed 203cc0681c4SBenoît Canet * 204cc0681c4SBenoît Canet * @nb_sectors: the number of sectors of the IO 205cc0681c4SBenoît Canet * @is_write: is the IO a write 20698f90dbaSZhi Yong Wu */ 207cc0681c4SBenoît Canet static void bdrv_io_limits_intercept(BlockDriverState *bs, 208d5103588SKevin Wolf unsigned int bytes, 209cc0681c4SBenoît Canet bool is_write) 210cc0681c4SBenoît Canet { 211cc0681c4SBenoît Canet /* does this io must wait */ 212cc0681c4SBenoît Canet bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); 21398f90dbaSZhi Yong Wu 214cc0681c4SBenoît Canet /* if must wait or any request of this type throttled queue the IO */ 215cc0681c4SBenoît Canet if (must_wait || 216cc0681c4SBenoît Canet !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { 217cc0681c4SBenoît Canet qemu_co_queue_wait(&bs->throttled_reqs[is_write]); 21898f90dbaSZhi Yong Wu } 21998f90dbaSZhi Yong Wu 220cc0681c4SBenoît Canet /* the IO will be executed, do the accounting */ 221d5103588SKevin Wolf throttle_account(&bs->throttle_state, is_write, bytes); 222d5103588SKevin Wolf 223cc0681c4SBenoît Canet 224cc0681c4SBenoît Canet /* if the next request must wait -> do nothing */ 225cc0681c4SBenoît Canet if (throttle_schedule_timer(&bs->throttle_state, is_write)) { 226cc0681c4SBenoît Canet return; 227cc0681c4SBenoît Canet } 228cc0681c4SBenoît Canet 229cc0681c4SBenoît Canet /* else queue next request for execution */ 230cc0681c4SBenoît Canet qemu_co_queue_next(&bs->throttled_reqs[is_write]); 23198f90dbaSZhi Yong Wu } 23298f90dbaSZhi Yong Wu 233339064d5SKevin Wolf size_t bdrv_opt_mem_align(BlockDriverState *bs) 234339064d5SKevin Wolf { 235339064d5SKevin Wolf if (!bs || !bs->drv) { 236339064d5SKevin Wolf /* 4k should be on the safe side */ 237339064d5SKevin Wolf return 4096; 238339064d5SKevin Wolf } 239339064d5SKevin Wolf 240339064d5SKevin Wolf return bs->bl.opt_mem_alignment; 241339064d5SKevin Wolf } 242339064d5SKevin Wolf 2439e0b22f4SStefan Hajnoczi /* check if the path starts with "<protocol>:" */ 2445c98415bSMax Reitz int path_has_protocol(const char *path) 2459e0b22f4SStefan Hajnoczi { 246947995c0SPaolo Bonzini const char *p; 247947995c0SPaolo Bonzini 2489e0b22f4SStefan Hajnoczi #ifdef _WIN32 2499e0b22f4SStefan Hajnoczi if (is_windows_drive(path) || 2509e0b22f4SStefan Hajnoczi is_windows_drive_prefix(path)) { 2519e0b22f4SStefan Hajnoczi return 0; 2529e0b22f4SStefan Hajnoczi } 253947995c0SPaolo Bonzini p = path + strcspn(path, ":/\\"); 254947995c0SPaolo Bonzini #else 255947995c0SPaolo Bonzini p = path + strcspn(path, ":/"); 2569e0b22f4SStefan Hajnoczi #endif 2579e0b22f4SStefan Hajnoczi 258947995c0SPaolo Bonzini return *p == ':'; 2599e0b22f4SStefan Hajnoczi } 2609e0b22f4SStefan Hajnoczi 26183f64091Sbellard int path_is_absolute(const char *path) 26283f64091Sbellard { 26321664424Sbellard #ifdef _WIN32 26421664424Sbellard /* specific case for names like: "\\.\d:" */ 265f53f4da9SPaolo Bonzini if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 26621664424Sbellard return 1; 267f53f4da9SPaolo Bonzini } 268f53f4da9SPaolo Bonzini return (*path == '/' || *path == '\\'); 2693b9f94e1Sbellard #else 270f53f4da9SPaolo Bonzini return (*path == '/'); 2713b9f94e1Sbellard #endif 27283f64091Sbellard } 27383f64091Sbellard 27483f64091Sbellard /* if filename is absolute, just copy it to dest. Otherwise, build a 27583f64091Sbellard path to it by considering it is relative to base_path. URL are 27683f64091Sbellard supported. */ 27783f64091Sbellard void path_combine(char *dest, int dest_size, 27883f64091Sbellard const char *base_path, 27983f64091Sbellard const char *filename) 28083f64091Sbellard { 28183f64091Sbellard const char *p, *p1; 28283f64091Sbellard int len; 28383f64091Sbellard 28483f64091Sbellard if (dest_size <= 0) 28583f64091Sbellard return; 28683f64091Sbellard if (path_is_absolute(filename)) { 28783f64091Sbellard pstrcpy(dest, dest_size, filename); 28883f64091Sbellard } else { 28983f64091Sbellard p = strchr(base_path, ':'); 29083f64091Sbellard if (p) 29183f64091Sbellard p++; 29283f64091Sbellard else 29383f64091Sbellard p = base_path; 2943b9f94e1Sbellard p1 = strrchr(base_path, '/'); 2953b9f94e1Sbellard #ifdef _WIN32 2963b9f94e1Sbellard { 2973b9f94e1Sbellard const char *p2; 2983b9f94e1Sbellard p2 = strrchr(base_path, '\\'); 2993b9f94e1Sbellard if (!p1 || p2 > p1) 3003b9f94e1Sbellard p1 = p2; 3013b9f94e1Sbellard } 3023b9f94e1Sbellard #endif 30383f64091Sbellard if (p1) 30483f64091Sbellard p1++; 30583f64091Sbellard else 30683f64091Sbellard p1 = base_path; 30783f64091Sbellard if (p1 > p) 30883f64091Sbellard p = p1; 30983f64091Sbellard len = p - base_path; 31083f64091Sbellard if (len > dest_size - 1) 31183f64091Sbellard len = dest_size - 1; 31283f64091Sbellard memcpy(dest, base_path, len); 31383f64091Sbellard dest[len] = '\0'; 31483f64091Sbellard pstrcat(dest, dest_size, filename); 31583f64091Sbellard } 31683f64091Sbellard } 31783f64091Sbellard 3180a82855aSMax Reitz void bdrv_get_full_backing_filename_from_filename(const char *backed, 3190a82855aSMax Reitz const char *backing, 3209f07429eSMax Reitz char *dest, size_t sz, 3219f07429eSMax Reitz Error **errp) 3220a82855aSMax Reitz { 3239f07429eSMax Reitz if (backing[0] == '\0' || path_has_protocol(backing) || 3249f07429eSMax Reitz path_is_absolute(backing)) 3259f07429eSMax Reitz { 3260a82855aSMax Reitz pstrcpy(dest, sz, backing); 3279f07429eSMax Reitz } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 3289f07429eSMax Reitz error_setg(errp, "Cannot use relative backing file names for '%s'", 3299f07429eSMax Reitz backed); 3300a82855aSMax Reitz } else { 3310a82855aSMax Reitz path_combine(dest, sz, backed, backing); 3320a82855aSMax Reitz } 3330a82855aSMax Reitz } 3340a82855aSMax Reitz 3359f07429eSMax Reitz void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, 3369f07429eSMax Reitz Error **errp) 337dc5a1371SPaolo Bonzini { 3389f07429eSMax Reitz char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; 3399f07429eSMax Reitz 3409f07429eSMax Reitz bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, 3419f07429eSMax Reitz dest, sz, errp); 342dc5a1371SPaolo Bonzini } 343dc5a1371SPaolo Bonzini 3445efa9d5aSAnthony Liguori void bdrv_register(BlockDriver *bdrv) 345ea2384d3Sbellard { 3468c5873d6SStefan Hajnoczi /* Block drivers without coroutine functions need emulation */ 3478c5873d6SStefan Hajnoczi if (!bdrv->bdrv_co_readv) { 348f9f05dc5SKevin Wolf bdrv->bdrv_co_readv = bdrv_co_readv_em; 349f9f05dc5SKevin Wolf bdrv->bdrv_co_writev = bdrv_co_writev_em; 350f9f05dc5SKevin Wolf 351f8c35c1dSStefan Hajnoczi /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if 352f8c35c1dSStefan Hajnoczi * the block driver lacks aio we need to emulate that too. 353f8c35c1dSStefan Hajnoczi */ 354f9f05dc5SKevin Wolf if (!bdrv->bdrv_aio_readv) { 35583f64091Sbellard /* add AIO emulation layer */ 356f141eafeSaliguori bdrv->bdrv_aio_readv = bdrv_aio_readv_em; 357f141eafeSaliguori bdrv->bdrv_aio_writev = bdrv_aio_writev_em; 35883f64091Sbellard } 359f9f05dc5SKevin Wolf } 360b2e12bc6SChristoph Hellwig 3618a22f02aSStefan Hajnoczi QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 362ea2384d3Sbellard } 363b338082bSbellard 3647f06d47eSMarkus Armbruster BlockDriverState *bdrv_new_root(void) 365fc01f7e7Sbellard { 3667f06d47eSMarkus Armbruster BlockDriverState *bs = bdrv_new(); 367e4e9986bSMarkus Armbruster 368e4e9986bSMarkus Armbruster QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); 369e4e9986bSMarkus Armbruster return bs; 370e4e9986bSMarkus Armbruster } 371e4e9986bSMarkus Armbruster 372e4e9986bSMarkus Armbruster BlockDriverState *bdrv_new(void) 373e4e9986bSMarkus Armbruster { 374e4e9986bSMarkus Armbruster BlockDriverState *bs; 375e4e9986bSMarkus Armbruster int i; 376e4e9986bSMarkus Armbruster 3775839e53bSMarkus Armbruster bs = g_new0(BlockDriverState, 1); 378e4654d2dSFam Zheng QLIST_INIT(&bs->dirty_bitmaps); 379fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 380fbe40ff7SFam Zheng QLIST_INIT(&bs->op_blockers[i]); 381fbe40ff7SFam Zheng } 38228a7282aSLuiz Capitulino bdrv_iostatus_disable(bs); 383d7d512f6SPaolo Bonzini notifier_list_init(&bs->close_notifiers); 384d616b224SStefan Hajnoczi notifier_with_return_list_init(&bs->before_write_notifiers); 385cc0681c4SBenoît Canet qemu_co_queue_init(&bs->throttled_reqs[0]); 386cc0681c4SBenoît Canet qemu_co_queue_init(&bs->throttled_reqs[1]); 3879fcb0251SFam Zheng bs->refcnt = 1; 388dcd04228SStefan Hajnoczi bs->aio_context = qemu_get_aio_context(); 389d7d512f6SPaolo Bonzini 390b338082bSbellard return bs; 391b338082bSbellard } 392b338082bSbellard 393d7d512f6SPaolo Bonzini void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify) 394d7d512f6SPaolo Bonzini { 395d7d512f6SPaolo Bonzini notifier_list_add(&bs->close_notifiers, notify); 396d7d512f6SPaolo Bonzini } 397d7d512f6SPaolo Bonzini 398ea2384d3Sbellard BlockDriver *bdrv_find_format(const char *format_name) 399ea2384d3Sbellard { 400ea2384d3Sbellard BlockDriver *drv1; 4018a22f02aSStefan Hajnoczi QLIST_FOREACH(drv1, &bdrv_drivers, list) { 4028a22f02aSStefan Hajnoczi if (!strcmp(drv1->format_name, format_name)) { 403ea2384d3Sbellard return drv1; 404ea2384d3Sbellard } 4058a22f02aSStefan Hajnoczi } 406ea2384d3Sbellard return NULL; 407ea2384d3Sbellard } 408ea2384d3Sbellard 409b64ec4e4SFam Zheng static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 410eb852011SMarkus Armbruster { 411b64ec4e4SFam Zheng static const char *whitelist_rw[] = { 412b64ec4e4SFam Zheng CONFIG_BDRV_RW_WHITELIST 413b64ec4e4SFam Zheng }; 414b64ec4e4SFam Zheng static const char *whitelist_ro[] = { 415b64ec4e4SFam Zheng CONFIG_BDRV_RO_WHITELIST 416eb852011SMarkus Armbruster }; 417eb852011SMarkus Armbruster const char **p; 418eb852011SMarkus Armbruster 419b64ec4e4SFam Zheng if (!whitelist_rw[0] && !whitelist_ro[0]) { 420eb852011SMarkus Armbruster return 1; /* no whitelist, anything goes */ 421b64ec4e4SFam Zheng } 422eb852011SMarkus Armbruster 423b64ec4e4SFam Zheng for (p = whitelist_rw; *p; p++) { 424eb852011SMarkus Armbruster if (!strcmp(drv->format_name, *p)) { 425eb852011SMarkus Armbruster return 1; 426eb852011SMarkus Armbruster } 427eb852011SMarkus Armbruster } 428b64ec4e4SFam Zheng if (read_only) { 429b64ec4e4SFam Zheng for (p = whitelist_ro; *p; p++) { 430b64ec4e4SFam Zheng if (!strcmp(drv->format_name, *p)) { 431b64ec4e4SFam Zheng return 1; 432b64ec4e4SFam Zheng } 433b64ec4e4SFam Zheng } 434b64ec4e4SFam Zheng } 435eb852011SMarkus Armbruster return 0; 436eb852011SMarkus Armbruster } 437eb852011SMarkus Armbruster 438b64ec4e4SFam Zheng BlockDriver *bdrv_find_whitelisted_format(const char *format_name, 439b64ec4e4SFam Zheng bool read_only) 440eb852011SMarkus Armbruster { 441eb852011SMarkus Armbruster BlockDriver *drv = bdrv_find_format(format_name); 442b64ec4e4SFam Zheng return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL; 443eb852011SMarkus Armbruster } 444eb852011SMarkus Armbruster 4455b7e1542SZhi Yong Wu typedef struct CreateCo { 4465b7e1542SZhi Yong Wu BlockDriver *drv; 4475b7e1542SZhi Yong Wu char *filename; 44883d0521aSChunyan Liu QemuOpts *opts; 4495b7e1542SZhi Yong Wu int ret; 450cc84d90fSMax Reitz Error *err; 4515b7e1542SZhi Yong Wu } CreateCo; 4525b7e1542SZhi Yong Wu 4535b7e1542SZhi Yong Wu static void coroutine_fn bdrv_create_co_entry(void *opaque) 4545b7e1542SZhi Yong Wu { 455cc84d90fSMax Reitz Error *local_err = NULL; 456cc84d90fSMax Reitz int ret; 457cc84d90fSMax Reitz 4585b7e1542SZhi Yong Wu CreateCo *cco = opaque; 4595b7e1542SZhi Yong Wu assert(cco->drv); 4605b7e1542SZhi Yong Wu 461c282e1fdSChunyan Liu ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); 46284d18f06SMarkus Armbruster if (local_err) { 463cc84d90fSMax Reitz error_propagate(&cco->err, local_err); 464cc84d90fSMax Reitz } 465cc84d90fSMax Reitz cco->ret = ret; 4665b7e1542SZhi Yong Wu } 4675b7e1542SZhi Yong Wu 4680e7e1989SKevin Wolf int bdrv_create(BlockDriver *drv, const char* filename, 46983d0521aSChunyan Liu QemuOpts *opts, Error **errp) 470ea2384d3Sbellard { 4715b7e1542SZhi Yong Wu int ret; 4720e7e1989SKevin Wolf 4735b7e1542SZhi Yong Wu Coroutine *co; 4745b7e1542SZhi Yong Wu CreateCo cco = { 4755b7e1542SZhi Yong Wu .drv = drv, 4765b7e1542SZhi Yong Wu .filename = g_strdup(filename), 47783d0521aSChunyan Liu .opts = opts, 4785b7e1542SZhi Yong Wu .ret = NOT_DONE, 479cc84d90fSMax Reitz .err = NULL, 4805b7e1542SZhi Yong Wu }; 4815b7e1542SZhi Yong Wu 482c282e1fdSChunyan Liu if (!drv->bdrv_create) { 483cc84d90fSMax Reitz error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 48480168bffSLuiz Capitulino ret = -ENOTSUP; 48580168bffSLuiz Capitulino goto out; 4865b7e1542SZhi Yong Wu } 4875b7e1542SZhi Yong Wu 4885b7e1542SZhi Yong Wu if (qemu_in_coroutine()) { 4895b7e1542SZhi Yong Wu /* Fast-path if already in coroutine context */ 4905b7e1542SZhi Yong Wu bdrv_create_co_entry(&cco); 4915b7e1542SZhi Yong Wu } else { 4925b7e1542SZhi Yong Wu co = qemu_coroutine_create(bdrv_create_co_entry); 4935b7e1542SZhi Yong Wu qemu_coroutine_enter(co, &cco); 4945b7e1542SZhi Yong Wu while (cco.ret == NOT_DONE) { 495b47ec2c4SPaolo Bonzini aio_poll(qemu_get_aio_context(), true); 4965b7e1542SZhi Yong Wu } 4975b7e1542SZhi Yong Wu } 4985b7e1542SZhi Yong Wu 4995b7e1542SZhi Yong Wu ret = cco.ret; 500cc84d90fSMax Reitz if (ret < 0) { 50184d18f06SMarkus Armbruster if (cco.err) { 502cc84d90fSMax Reitz error_propagate(errp, cco.err); 503cc84d90fSMax Reitz } else { 504cc84d90fSMax Reitz error_setg_errno(errp, -ret, "Could not create image"); 505cc84d90fSMax Reitz } 506cc84d90fSMax Reitz } 5075b7e1542SZhi Yong Wu 50880168bffSLuiz Capitulino out: 50980168bffSLuiz Capitulino g_free(cco.filename); 5105b7e1542SZhi Yong Wu return ret; 511ea2384d3Sbellard } 512ea2384d3Sbellard 513c282e1fdSChunyan Liu int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 51484a12e66SChristoph Hellwig { 51584a12e66SChristoph Hellwig BlockDriver *drv; 516cc84d90fSMax Reitz Error *local_err = NULL; 517cc84d90fSMax Reitz int ret; 51884a12e66SChristoph Hellwig 519b65a5e12SMax Reitz drv = bdrv_find_protocol(filename, true, errp); 52084a12e66SChristoph Hellwig if (drv == NULL) { 52116905d71SStefan Hajnoczi return -ENOENT; 52284a12e66SChristoph Hellwig } 52384a12e66SChristoph Hellwig 524c282e1fdSChunyan Liu ret = bdrv_create(drv, filename, opts, &local_err); 52584d18f06SMarkus Armbruster if (local_err) { 526cc84d90fSMax Reitz error_propagate(errp, local_err); 527cc84d90fSMax Reitz } 528cc84d90fSMax Reitz return ret; 52984a12e66SChristoph Hellwig } 53084a12e66SChristoph Hellwig 5313baca891SKevin Wolf void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) 532d34682cdSKevin Wolf { 533d34682cdSKevin Wolf BlockDriver *drv = bs->drv; 5343baca891SKevin Wolf Error *local_err = NULL; 535d34682cdSKevin Wolf 536d34682cdSKevin Wolf memset(&bs->bl, 0, sizeof(bs->bl)); 537d34682cdSKevin Wolf 538466ad822SKevin Wolf if (!drv) { 5393baca891SKevin Wolf return; 540466ad822SKevin Wolf } 541466ad822SKevin Wolf 542466ad822SKevin Wolf /* Take some limits from the children as a default */ 543466ad822SKevin Wolf if (bs->file) { 5443baca891SKevin Wolf bdrv_refresh_limits(bs->file, &local_err); 5453baca891SKevin Wolf if (local_err) { 5463baca891SKevin Wolf error_propagate(errp, local_err); 5473baca891SKevin Wolf return; 5483baca891SKevin Wolf } 549466ad822SKevin Wolf bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; 5502647fab5SPeter Lieven bs->bl.max_transfer_length = bs->file->bl.max_transfer_length; 551339064d5SKevin Wolf bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; 552339064d5SKevin Wolf } else { 553339064d5SKevin Wolf bs->bl.opt_mem_alignment = 512; 554466ad822SKevin Wolf } 555466ad822SKevin Wolf 556466ad822SKevin Wolf if (bs->backing_hd) { 5573baca891SKevin Wolf bdrv_refresh_limits(bs->backing_hd, &local_err); 5583baca891SKevin Wolf if (local_err) { 5593baca891SKevin Wolf error_propagate(errp, local_err); 5603baca891SKevin Wolf return; 5613baca891SKevin Wolf } 562466ad822SKevin Wolf bs->bl.opt_transfer_length = 563466ad822SKevin Wolf MAX(bs->bl.opt_transfer_length, 564466ad822SKevin Wolf bs->backing_hd->bl.opt_transfer_length); 5652647fab5SPeter Lieven bs->bl.max_transfer_length = 5662647fab5SPeter Lieven MIN_NON_ZERO(bs->bl.max_transfer_length, 5672647fab5SPeter Lieven bs->backing_hd->bl.max_transfer_length); 568339064d5SKevin Wolf bs->bl.opt_mem_alignment = 569339064d5SKevin Wolf MAX(bs->bl.opt_mem_alignment, 570339064d5SKevin Wolf bs->backing_hd->bl.opt_mem_alignment); 571466ad822SKevin Wolf } 572466ad822SKevin Wolf 573466ad822SKevin Wolf /* Then let the driver override it */ 574466ad822SKevin Wolf if (drv->bdrv_refresh_limits) { 5753baca891SKevin Wolf drv->bdrv_refresh_limits(bs, errp); 576d34682cdSKevin Wolf } 577d34682cdSKevin Wolf } 578d34682cdSKevin Wolf 579892b7de8SEkaterina Tumanova /** 580892b7de8SEkaterina Tumanova * Try to get @bs's logical and physical block size. 581892b7de8SEkaterina Tumanova * On success, store them in @bsz struct and return 0. 582892b7de8SEkaterina Tumanova * On failure return -errno. 583892b7de8SEkaterina Tumanova * @bs must not be empty. 584892b7de8SEkaterina Tumanova */ 585892b7de8SEkaterina Tumanova int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 586892b7de8SEkaterina Tumanova { 587892b7de8SEkaterina Tumanova BlockDriver *drv = bs->drv; 588892b7de8SEkaterina Tumanova 589892b7de8SEkaterina Tumanova if (drv && drv->bdrv_probe_blocksizes) { 590892b7de8SEkaterina Tumanova return drv->bdrv_probe_blocksizes(bs, bsz); 591892b7de8SEkaterina Tumanova } 592892b7de8SEkaterina Tumanova 593892b7de8SEkaterina Tumanova return -ENOTSUP; 594892b7de8SEkaterina Tumanova } 595892b7de8SEkaterina Tumanova 596892b7de8SEkaterina Tumanova /** 597892b7de8SEkaterina Tumanova * Try to get @bs's geometry (cyls, heads, sectors). 598892b7de8SEkaterina Tumanova * On success, store them in @geo struct and return 0. 599892b7de8SEkaterina Tumanova * On failure return -errno. 600892b7de8SEkaterina Tumanova * @bs must not be empty. 601892b7de8SEkaterina Tumanova */ 602892b7de8SEkaterina Tumanova int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 603892b7de8SEkaterina Tumanova { 604892b7de8SEkaterina Tumanova BlockDriver *drv = bs->drv; 605892b7de8SEkaterina Tumanova 606892b7de8SEkaterina Tumanova if (drv && drv->bdrv_probe_geometry) { 607892b7de8SEkaterina Tumanova return drv->bdrv_probe_geometry(bs, geo); 608892b7de8SEkaterina Tumanova } 609892b7de8SEkaterina Tumanova 610892b7de8SEkaterina Tumanova return -ENOTSUP; 611892b7de8SEkaterina Tumanova } 612892b7de8SEkaterina Tumanova 613eba25057SJim Meyering /* 614eba25057SJim Meyering * Create a uniquely-named empty temporary file. 615eba25057SJim Meyering * Return 0 upon success, otherwise a negative errno value. 616eba25057SJim Meyering */ 617eba25057SJim Meyering int get_tmp_filename(char *filename, int size) 618eba25057SJim Meyering { 619d5249393Sbellard #ifdef _WIN32 6203b9f94e1Sbellard char temp_dir[MAX_PATH]; 621eba25057SJim Meyering /* GetTempFileName requires that its output buffer (4th param) 622eba25057SJim Meyering have length MAX_PATH or greater. */ 623eba25057SJim Meyering assert(size >= MAX_PATH); 624eba25057SJim Meyering return (GetTempPath(MAX_PATH, temp_dir) 625eba25057SJim Meyering && GetTempFileName(temp_dir, "qem", 0, filename) 626eba25057SJim Meyering ? 0 : -GetLastError()); 627d5249393Sbellard #else 628ea2384d3Sbellard int fd; 6297ccfb2ebSblueswir1 const char *tmpdir; 6300badc1eeSaurel32 tmpdir = getenv("TMPDIR"); 63169bef793SAmit Shah if (!tmpdir) { 63269bef793SAmit Shah tmpdir = "/var/tmp"; 63369bef793SAmit Shah } 634eba25057SJim Meyering if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 635eba25057SJim Meyering return -EOVERFLOW; 636ea2384d3Sbellard } 637eba25057SJim Meyering fd = mkstemp(filename); 638fe235a06SDunrong Huang if (fd < 0) { 639fe235a06SDunrong Huang return -errno; 640fe235a06SDunrong Huang } 641fe235a06SDunrong Huang if (close(fd) != 0) { 642fe235a06SDunrong Huang unlink(filename); 643eba25057SJim Meyering return -errno; 644eba25057SJim Meyering } 645eba25057SJim Meyering return 0; 646d5249393Sbellard #endif 647eba25057SJim Meyering } 648ea2384d3Sbellard 649f3a5d3f8SChristoph Hellwig /* 650f3a5d3f8SChristoph Hellwig * Detect host devices. By convention, /dev/cdrom[N] is always 651f3a5d3f8SChristoph Hellwig * recognized as a host CDROM. 652f3a5d3f8SChristoph Hellwig */ 653f3a5d3f8SChristoph Hellwig static BlockDriver *find_hdev_driver(const char *filename) 654f3a5d3f8SChristoph Hellwig { 655508c7cb3SChristoph Hellwig int score_max = 0, score; 656508c7cb3SChristoph Hellwig BlockDriver *drv = NULL, *d; 657f3a5d3f8SChristoph Hellwig 6588a22f02aSStefan Hajnoczi QLIST_FOREACH(d, &bdrv_drivers, list) { 659508c7cb3SChristoph Hellwig if (d->bdrv_probe_device) { 660508c7cb3SChristoph Hellwig score = d->bdrv_probe_device(filename); 661508c7cb3SChristoph Hellwig if (score > score_max) { 662508c7cb3SChristoph Hellwig score_max = score; 663508c7cb3SChristoph Hellwig drv = d; 664f3a5d3f8SChristoph Hellwig } 665508c7cb3SChristoph Hellwig } 666f3a5d3f8SChristoph Hellwig } 667f3a5d3f8SChristoph Hellwig 668508c7cb3SChristoph Hellwig return drv; 669f3a5d3f8SChristoph Hellwig } 670f3a5d3f8SChristoph Hellwig 67198289620SKevin Wolf BlockDriver *bdrv_find_protocol(const char *filename, 672b65a5e12SMax Reitz bool allow_protocol_prefix, 673b65a5e12SMax Reitz Error **errp) 67484a12e66SChristoph Hellwig { 67584a12e66SChristoph Hellwig BlockDriver *drv1; 67684a12e66SChristoph Hellwig char protocol[128]; 67784a12e66SChristoph Hellwig int len; 67884a12e66SChristoph Hellwig const char *p; 67984a12e66SChristoph Hellwig 68066f82ceeSKevin Wolf /* TODO Drivers without bdrv_file_open must be specified explicitly */ 68166f82ceeSKevin Wolf 68239508e7aSChristoph Hellwig /* 68339508e7aSChristoph Hellwig * XXX(hch): we really should not let host device detection 68439508e7aSChristoph Hellwig * override an explicit protocol specification, but moving this 68539508e7aSChristoph Hellwig * later breaks access to device names with colons in them. 68639508e7aSChristoph Hellwig * Thanks to the brain-dead persistent naming schemes on udev- 68739508e7aSChristoph Hellwig * based Linux systems those actually are quite common. 68839508e7aSChristoph Hellwig */ 68984a12e66SChristoph Hellwig drv1 = find_hdev_driver(filename); 69039508e7aSChristoph Hellwig if (drv1) { 69184a12e66SChristoph Hellwig return drv1; 69284a12e66SChristoph Hellwig } 69339508e7aSChristoph Hellwig 69498289620SKevin Wolf if (!path_has_protocol(filename) || !allow_protocol_prefix) { 695ef810437SMax Reitz return &bdrv_file; 69639508e7aSChristoph Hellwig } 69798289620SKevin Wolf 6989e0b22f4SStefan Hajnoczi p = strchr(filename, ':'); 6999e0b22f4SStefan Hajnoczi assert(p != NULL); 70084a12e66SChristoph Hellwig len = p - filename; 70184a12e66SChristoph Hellwig if (len > sizeof(protocol) - 1) 70284a12e66SChristoph Hellwig len = sizeof(protocol) - 1; 70384a12e66SChristoph Hellwig memcpy(protocol, filename, len); 70484a12e66SChristoph Hellwig protocol[len] = '\0'; 70584a12e66SChristoph Hellwig QLIST_FOREACH(drv1, &bdrv_drivers, list) { 70684a12e66SChristoph Hellwig if (drv1->protocol_name && 70784a12e66SChristoph Hellwig !strcmp(drv1->protocol_name, protocol)) { 70884a12e66SChristoph Hellwig return drv1; 70984a12e66SChristoph Hellwig } 71084a12e66SChristoph Hellwig } 711b65a5e12SMax Reitz 712b65a5e12SMax Reitz error_setg(errp, "Unknown protocol '%s'", protocol); 71384a12e66SChristoph Hellwig return NULL; 71484a12e66SChristoph Hellwig } 71584a12e66SChristoph Hellwig 716c6684249SMarkus Armbruster /* 717c6684249SMarkus Armbruster * Guess image format by probing its contents. 718c6684249SMarkus Armbruster * This is not a good idea when your image is raw (CVE-2008-2004), but 719c6684249SMarkus Armbruster * we do it anyway for backward compatibility. 720c6684249SMarkus Armbruster * 721c6684249SMarkus Armbruster * @buf contains the image's first @buf_size bytes. 7227cddd372SKevin Wolf * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 7237cddd372SKevin Wolf * but can be smaller if the image file is smaller) 724c6684249SMarkus Armbruster * @filename is its filename. 725c6684249SMarkus Armbruster * 726c6684249SMarkus Armbruster * For all block drivers, call the bdrv_probe() method to get its 727c6684249SMarkus Armbruster * probing score. 728c6684249SMarkus Armbruster * Return the first block driver with the highest probing score. 729c6684249SMarkus Armbruster */ 73038f3ef57SKevin Wolf BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 731c6684249SMarkus Armbruster const char *filename) 732c6684249SMarkus Armbruster { 733c6684249SMarkus Armbruster int score_max = 0, score; 734c6684249SMarkus Armbruster BlockDriver *drv = NULL, *d; 735c6684249SMarkus Armbruster 736c6684249SMarkus Armbruster QLIST_FOREACH(d, &bdrv_drivers, list) { 737c6684249SMarkus Armbruster if (d->bdrv_probe) { 738c6684249SMarkus Armbruster score = d->bdrv_probe(buf, buf_size, filename); 739c6684249SMarkus Armbruster if (score > score_max) { 740c6684249SMarkus Armbruster score_max = score; 741c6684249SMarkus Armbruster drv = d; 742c6684249SMarkus Armbruster } 743c6684249SMarkus Armbruster } 744c6684249SMarkus Armbruster } 745c6684249SMarkus Armbruster 746c6684249SMarkus Armbruster return drv; 747c6684249SMarkus Armbruster } 748c6684249SMarkus Armbruster 749f500a6d3SKevin Wolf static int find_image_format(BlockDriverState *bs, const char *filename, 75034b5d2c6SMax Reitz BlockDriver **pdrv, Error **errp) 751ea2384d3Sbellard { 752c6684249SMarkus Armbruster BlockDriver *drv; 7537cddd372SKevin Wolf uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 754f500a6d3SKevin Wolf int ret = 0; 755f8ea0b00SNicholas Bellinger 75608a00559SKevin Wolf /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 7578e895599SPaolo Bonzini if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { 758ef810437SMax Reitz *pdrv = &bdrv_raw; 759c98ac35dSStefan Weil return ret; 7601a396859SNicholas A. Bellinger } 761f8ea0b00SNicholas Bellinger 76283f64091Sbellard ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 763ea2384d3Sbellard if (ret < 0) { 76434b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not read image for determining its " 76534b5d2c6SMax Reitz "format"); 766c98ac35dSStefan Weil *pdrv = NULL; 767c98ac35dSStefan Weil return ret; 768ea2384d3Sbellard } 769ea2384d3Sbellard 770c6684249SMarkus Armbruster drv = bdrv_probe_all(buf, ret, filename); 771c98ac35dSStefan Weil if (!drv) { 77234b5d2c6SMax Reitz error_setg(errp, "Could not determine image format: No compatible " 77334b5d2c6SMax Reitz "driver found"); 774c98ac35dSStefan Weil ret = -ENOENT; 775c98ac35dSStefan Weil } 776c98ac35dSStefan Weil *pdrv = drv; 777c98ac35dSStefan Weil return ret; 778ea2384d3Sbellard } 779ea2384d3Sbellard 78051762288SStefan Hajnoczi /** 78151762288SStefan Hajnoczi * Set the current 'total_sectors' value 78265a9bb25SMarkus Armbruster * Return 0 on success, -errno on error. 78351762288SStefan Hajnoczi */ 78451762288SStefan Hajnoczi static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 78551762288SStefan Hajnoczi { 78651762288SStefan Hajnoczi BlockDriver *drv = bs->drv; 78751762288SStefan Hajnoczi 788396759adSNicholas Bellinger /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 789396759adSNicholas Bellinger if (bs->sg) 790396759adSNicholas Bellinger return 0; 791396759adSNicholas Bellinger 79251762288SStefan Hajnoczi /* query actual device if possible, otherwise just trust the hint */ 79351762288SStefan Hajnoczi if (drv->bdrv_getlength) { 79451762288SStefan Hajnoczi int64_t length = drv->bdrv_getlength(bs); 79551762288SStefan Hajnoczi if (length < 0) { 79651762288SStefan Hajnoczi return length; 79751762288SStefan Hajnoczi } 7987e382003SFam Zheng hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 79951762288SStefan Hajnoczi } 80051762288SStefan Hajnoczi 80151762288SStefan Hajnoczi bs->total_sectors = hint; 80251762288SStefan Hajnoczi return 0; 80351762288SStefan Hajnoczi } 80451762288SStefan Hajnoczi 805c3993cdcSStefan Hajnoczi /** 8069e8f1835SPaolo Bonzini * Set open flags for a given discard mode 8079e8f1835SPaolo Bonzini * 8089e8f1835SPaolo Bonzini * Return 0 on success, -1 if the discard mode was invalid. 8099e8f1835SPaolo Bonzini */ 8109e8f1835SPaolo Bonzini int bdrv_parse_discard_flags(const char *mode, int *flags) 8119e8f1835SPaolo Bonzini { 8129e8f1835SPaolo Bonzini *flags &= ~BDRV_O_UNMAP; 8139e8f1835SPaolo Bonzini 8149e8f1835SPaolo Bonzini if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 8159e8f1835SPaolo Bonzini /* do nothing */ 8169e8f1835SPaolo Bonzini } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 8179e8f1835SPaolo Bonzini *flags |= BDRV_O_UNMAP; 8189e8f1835SPaolo Bonzini } else { 8199e8f1835SPaolo Bonzini return -1; 8209e8f1835SPaolo Bonzini } 8219e8f1835SPaolo Bonzini 8229e8f1835SPaolo Bonzini return 0; 8239e8f1835SPaolo Bonzini } 8249e8f1835SPaolo Bonzini 8259e8f1835SPaolo Bonzini /** 826c3993cdcSStefan Hajnoczi * Set open flags for a given cache mode 827c3993cdcSStefan Hajnoczi * 828c3993cdcSStefan Hajnoczi * Return 0 on success, -1 if the cache mode was invalid. 829c3993cdcSStefan Hajnoczi */ 830c3993cdcSStefan Hajnoczi int bdrv_parse_cache_flags(const char *mode, int *flags) 831c3993cdcSStefan Hajnoczi { 832c3993cdcSStefan Hajnoczi *flags &= ~BDRV_O_CACHE_MASK; 833c3993cdcSStefan Hajnoczi 834c3993cdcSStefan Hajnoczi if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 835c3993cdcSStefan Hajnoczi *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 83692196b2fSStefan Hajnoczi } else if (!strcmp(mode, "directsync")) { 83792196b2fSStefan Hajnoczi *flags |= BDRV_O_NOCACHE; 838c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "writeback")) { 839c3993cdcSStefan Hajnoczi *flags |= BDRV_O_CACHE_WB; 840c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "unsafe")) { 841c3993cdcSStefan Hajnoczi *flags |= BDRV_O_CACHE_WB; 842c3993cdcSStefan Hajnoczi *flags |= BDRV_O_NO_FLUSH; 843c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "writethrough")) { 844c3993cdcSStefan Hajnoczi /* this is the default */ 845c3993cdcSStefan Hajnoczi } else { 846c3993cdcSStefan Hajnoczi return -1; 847c3993cdcSStefan Hajnoczi } 848c3993cdcSStefan Hajnoczi 849c3993cdcSStefan Hajnoczi return 0; 850c3993cdcSStefan Hajnoczi } 851c3993cdcSStefan Hajnoczi 85253fec9d3SStefan Hajnoczi /** 85353fec9d3SStefan Hajnoczi * The copy-on-read flag is actually a reference count so multiple users may 85453fec9d3SStefan Hajnoczi * use the feature without worrying about clobbering its previous state. 85553fec9d3SStefan Hajnoczi * Copy-on-read stays enabled until all users have called to disable it. 85653fec9d3SStefan Hajnoczi */ 85753fec9d3SStefan Hajnoczi void bdrv_enable_copy_on_read(BlockDriverState *bs) 85853fec9d3SStefan Hajnoczi { 85953fec9d3SStefan Hajnoczi bs->copy_on_read++; 86053fec9d3SStefan Hajnoczi } 86153fec9d3SStefan Hajnoczi 86253fec9d3SStefan Hajnoczi void bdrv_disable_copy_on_read(BlockDriverState *bs) 86353fec9d3SStefan Hajnoczi { 86453fec9d3SStefan Hajnoczi assert(bs->copy_on_read > 0); 86553fec9d3SStefan Hajnoczi bs->copy_on_read--; 86653fec9d3SStefan Hajnoczi } 86753fec9d3SStefan Hajnoczi 8680b50cc88SKevin Wolf /* 869b1e6fc08SKevin Wolf * Returns the flags that a temporary snapshot should get, based on the 870b1e6fc08SKevin Wolf * originally requested flags (the originally requested image will have flags 871b1e6fc08SKevin Wolf * like a backing file) 872b1e6fc08SKevin Wolf */ 873b1e6fc08SKevin Wolf static int bdrv_temp_snapshot_flags(int flags) 874b1e6fc08SKevin Wolf { 875b1e6fc08SKevin Wolf return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 876b1e6fc08SKevin Wolf } 877b1e6fc08SKevin Wolf 878b1e6fc08SKevin Wolf /* 8790b50cc88SKevin Wolf * Returns the flags that bs->file should get, based on the given flags for 8800b50cc88SKevin Wolf * the parent BDS 8810b50cc88SKevin Wolf */ 8820b50cc88SKevin Wolf static int bdrv_inherited_flags(int flags) 8830b50cc88SKevin Wolf { 8840b50cc88SKevin Wolf /* Enable protocol handling, disable format probing for bs->file */ 8850b50cc88SKevin Wolf flags |= BDRV_O_PROTOCOL; 8860b50cc88SKevin Wolf 8870b50cc88SKevin Wolf /* Our block drivers take care to send flushes and respect unmap policy, 8880b50cc88SKevin Wolf * so we can enable both unconditionally on lower layers. */ 8890b50cc88SKevin Wolf flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP; 8900b50cc88SKevin Wolf 8910b50cc88SKevin Wolf /* Clear flags that only apply to the top layer */ 8925669b44dSKevin Wolf flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 8930b50cc88SKevin Wolf 8940b50cc88SKevin Wolf return flags; 8950b50cc88SKevin Wolf } 8960b50cc88SKevin Wolf 897317fc44eSKevin Wolf /* 898317fc44eSKevin Wolf * Returns the flags that bs->backing_hd should get, based on the given flags 899317fc44eSKevin Wolf * for the parent BDS 900317fc44eSKevin Wolf */ 901317fc44eSKevin Wolf static int bdrv_backing_flags(int flags) 902317fc44eSKevin Wolf { 903317fc44eSKevin Wolf /* backing files always opened read-only */ 904317fc44eSKevin Wolf flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); 905317fc44eSKevin Wolf 906317fc44eSKevin Wolf /* snapshot=on is handled on the top layer */ 9078bfea15dSKevin Wolf flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); 908317fc44eSKevin Wolf 909317fc44eSKevin Wolf return flags; 910317fc44eSKevin Wolf } 911317fc44eSKevin Wolf 9127b272452SKevin Wolf static int bdrv_open_flags(BlockDriverState *bs, int flags) 9137b272452SKevin Wolf { 9147b272452SKevin Wolf int open_flags = flags | BDRV_O_CACHE_WB; 9157b272452SKevin Wolf 9167b272452SKevin Wolf /* 9177b272452SKevin Wolf * Clear flags that are internal to the block layer before opening the 9187b272452SKevin Wolf * image. 9197b272452SKevin Wolf */ 92020cca275SKevin Wolf open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 9217b272452SKevin Wolf 9227b272452SKevin Wolf /* 9237b272452SKevin Wolf * Snapshots should be writable. 9247b272452SKevin Wolf */ 9258bfea15dSKevin Wolf if (flags & BDRV_O_TEMPORARY) { 9267b272452SKevin Wolf open_flags |= BDRV_O_RDWR; 9277b272452SKevin Wolf } 9287b272452SKevin Wolf 9297b272452SKevin Wolf return open_flags; 9307b272452SKevin Wolf } 9317b272452SKevin Wolf 932636ea370SKevin Wolf static void bdrv_assign_node_name(BlockDriverState *bs, 9336913c0c2SBenoît Canet const char *node_name, 9346913c0c2SBenoît Canet Error **errp) 9356913c0c2SBenoît Canet { 9366913c0c2SBenoît Canet if (!node_name) { 937636ea370SKevin Wolf return; 9386913c0c2SBenoît Canet } 9396913c0c2SBenoît Canet 9409aebf3b8SKevin Wolf /* Check for empty string or invalid characters */ 941f5bebbbbSMarkus Armbruster if (!id_wellformed(node_name)) { 9429aebf3b8SKevin Wolf error_setg(errp, "Invalid node name"); 943636ea370SKevin Wolf return; 9446913c0c2SBenoît Canet } 9456913c0c2SBenoît Canet 9460c5e94eeSBenoît Canet /* takes care of avoiding namespaces collisions */ 9477f06d47eSMarkus Armbruster if (blk_by_name(node_name)) { 9480c5e94eeSBenoît Canet error_setg(errp, "node-name=%s is conflicting with a device id", 9490c5e94eeSBenoît Canet node_name); 950636ea370SKevin Wolf return; 9510c5e94eeSBenoît Canet } 9520c5e94eeSBenoît Canet 9536913c0c2SBenoît Canet /* takes care of avoiding duplicates node names */ 9546913c0c2SBenoît Canet if (bdrv_find_node(node_name)) { 9556913c0c2SBenoît Canet error_setg(errp, "Duplicate node name"); 956636ea370SKevin Wolf return; 9576913c0c2SBenoît Canet } 9586913c0c2SBenoît Canet 9596913c0c2SBenoît Canet /* copy node name into the bs and insert it into the graph list */ 9606913c0c2SBenoît Canet pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 9616913c0c2SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 9626913c0c2SBenoît Canet } 9636913c0c2SBenoît Canet 964b6ce07aaSKevin Wolf /* 96557915332SKevin Wolf * Common part for opening disk images and files 966b6ad491aSKevin Wolf * 967b6ad491aSKevin Wolf * Removes all processed options from *options. 96857915332SKevin Wolf */ 969f500a6d3SKevin Wolf static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, 97034b5d2c6SMax Reitz QDict *options, int flags, BlockDriver *drv, Error **errp) 97157915332SKevin Wolf { 97257915332SKevin Wolf int ret, open_flags; 973035fccdfSKevin Wolf const char *filename; 9746913c0c2SBenoît Canet const char *node_name = NULL; 97534b5d2c6SMax Reitz Error *local_err = NULL; 97657915332SKevin Wolf 97757915332SKevin Wolf assert(drv != NULL); 9786405875cSPaolo Bonzini assert(bs->file == NULL); 979707ff828SKevin Wolf assert(options != NULL && bs->options != options); 98057915332SKevin Wolf 98145673671SKevin Wolf if (file != NULL) { 98245673671SKevin Wolf filename = file->filename; 98345673671SKevin Wolf } else { 98445673671SKevin Wolf filename = qdict_get_try_str(options, "filename"); 98545673671SKevin Wolf } 98645673671SKevin Wolf 987765003dbSKevin Wolf if (drv->bdrv_needs_filename && !filename) { 988765003dbSKevin Wolf error_setg(errp, "The '%s' block driver requires a file name", 989765003dbSKevin Wolf drv->format_name); 990765003dbSKevin Wolf return -EINVAL; 991765003dbSKevin Wolf } 992765003dbSKevin Wolf 99345673671SKevin Wolf trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); 99428dcee10SStefan Hajnoczi 9956913c0c2SBenoît Canet node_name = qdict_get_try_str(options, "node-name"); 996636ea370SKevin Wolf bdrv_assign_node_name(bs, node_name, &local_err); 9970fb6395cSMarkus Armbruster if (local_err) { 998636ea370SKevin Wolf error_propagate(errp, local_err); 999636ea370SKevin Wolf return -EINVAL; 10006913c0c2SBenoît Canet } 10016913c0c2SBenoît Canet qdict_del(options, "node-name"); 10026913c0c2SBenoît Canet 10035d186eb0SKevin Wolf /* bdrv_open() with directly using a protocol as drv. This layer is already 10045d186eb0SKevin Wolf * opened, so assign it to bs (while file becomes a closed BlockDriverState) 10055d186eb0SKevin Wolf * and return immediately. */ 10065d186eb0SKevin Wolf if (file != NULL && drv->bdrv_file_open) { 10075d186eb0SKevin Wolf bdrv_swap(file, bs); 10085d186eb0SKevin Wolf return 0; 10095d186eb0SKevin Wolf } 10105d186eb0SKevin Wolf 101157915332SKevin Wolf bs->open_flags = flags; 10121b7fd729SPaolo Bonzini bs->guest_block_size = 512; 1013c25f53b0SPaolo Bonzini bs->request_alignment = 512; 10140d51b4deSAsias He bs->zero_beyond_eof = true; 1015b64ec4e4SFam Zheng open_flags = bdrv_open_flags(bs, flags); 1016b64ec4e4SFam Zheng bs->read_only = !(open_flags & BDRV_O_RDWR); 1017b64ec4e4SFam Zheng 1018b64ec4e4SFam Zheng if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 10198f94a6e4SKevin Wolf error_setg(errp, 10208f94a6e4SKevin Wolf !bs->read_only && bdrv_is_whitelisted(drv, true) 10218f94a6e4SKevin Wolf ? "Driver '%s' can only be used for read-only devices" 10228f94a6e4SKevin Wolf : "Driver '%s' is not whitelisted", 10238f94a6e4SKevin Wolf drv->format_name); 1024b64ec4e4SFam Zheng return -ENOTSUP; 1025b64ec4e4SFam Zheng } 102657915332SKevin Wolf 102753fec9d3SStefan Hajnoczi assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ 10280ebd24e0SKevin Wolf if (flags & BDRV_O_COPY_ON_READ) { 10290ebd24e0SKevin Wolf if (!bs->read_only) { 103053fec9d3SStefan Hajnoczi bdrv_enable_copy_on_read(bs); 10310ebd24e0SKevin Wolf } else { 10320ebd24e0SKevin Wolf error_setg(errp, "Can't use copy-on-read on read-only device"); 10330ebd24e0SKevin Wolf return -EINVAL; 10340ebd24e0SKevin Wolf } 103553fec9d3SStefan Hajnoczi } 103653fec9d3SStefan Hajnoczi 1037c2ad1b0cSKevin Wolf if (filename != NULL) { 103857915332SKevin Wolf pstrcpy(bs->filename, sizeof(bs->filename), filename); 1039c2ad1b0cSKevin Wolf } else { 1040c2ad1b0cSKevin Wolf bs->filename[0] = '\0'; 1041c2ad1b0cSKevin Wolf } 104291af7014SMax Reitz pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 104357915332SKevin Wolf 104457915332SKevin Wolf bs->drv = drv; 10457267c094SAnthony Liguori bs->opaque = g_malloc0(drv->instance_size); 104657915332SKevin Wolf 104703f541bdSStefan Hajnoczi bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); 1048e7c63796SStefan Hajnoczi 104966f82ceeSKevin Wolf /* Open the image, either directly or using a protocol */ 105066f82ceeSKevin Wolf if (drv->bdrv_file_open) { 10515d186eb0SKevin Wolf assert(file == NULL); 1052030be321SBenoît Canet assert(!drv->bdrv_needs_filename || filename != NULL); 105334b5d2c6SMax Reitz ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 1054f500a6d3SKevin Wolf } else { 10552af5ef70SKevin Wolf if (file == NULL) { 105634b5d2c6SMax Reitz error_setg(errp, "Can't use '%s' as a block driver for the " 105734b5d2c6SMax Reitz "protocol level", drv->format_name); 10582af5ef70SKevin Wolf ret = -EINVAL; 10592af5ef70SKevin Wolf goto free_and_fail; 10602af5ef70SKevin Wolf } 1061f500a6d3SKevin Wolf bs->file = file; 106234b5d2c6SMax Reitz ret = drv->bdrv_open(bs, options, open_flags, &local_err); 106366f82ceeSKevin Wolf } 106466f82ceeSKevin Wolf 106557915332SKevin Wolf if (ret < 0) { 106684d18f06SMarkus Armbruster if (local_err) { 106734b5d2c6SMax Reitz error_propagate(errp, local_err); 10682fa9aa59SDunrong Huang } else if (bs->filename[0]) { 10692fa9aa59SDunrong Huang error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 107034b5d2c6SMax Reitz } else { 107134b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not open image"); 107234b5d2c6SMax Reitz } 107357915332SKevin Wolf goto free_and_fail; 107457915332SKevin Wolf } 107557915332SKevin Wolf 1076a1f688f4SMarkus Armbruster if (bs->encrypted) { 1077a1f688f4SMarkus Armbruster error_report("Encrypted images are deprecated"); 1078a1f688f4SMarkus Armbruster error_printf("Support for them will be removed in a future release.\n" 1079a1f688f4SMarkus Armbruster "You can use 'qemu-img convert' to convert your image" 1080a1f688f4SMarkus Armbruster " to an unencrypted one.\n"); 1081a1f688f4SMarkus Armbruster } 1082a1f688f4SMarkus Armbruster 108351762288SStefan Hajnoczi ret = refresh_total_sectors(bs, bs->total_sectors); 108451762288SStefan Hajnoczi if (ret < 0) { 108534b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not refresh total sector count"); 108651762288SStefan Hajnoczi goto free_and_fail; 108757915332SKevin Wolf } 108851762288SStefan Hajnoczi 10893baca891SKevin Wolf bdrv_refresh_limits(bs, &local_err); 10903baca891SKevin Wolf if (local_err) { 10913baca891SKevin Wolf error_propagate(errp, local_err); 10923baca891SKevin Wolf ret = -EINVAL; 10933baca891SKevin Wolf goto free_and_fail; 10943baca891SKevin Wolf } 10953baca891SKevin Wolf 1096c25f53b0SPaolo Bonzini assert(bdrv_opt_mem_align(bs) != 0); 109747ea2de2SKevin Wolf assert((bs->request_alignment != 0) || bs->sg); 109857915332SKevin Wolf return 0; 109957915332SKevin Wolf 110057915332SKevin Wolf free_and_fail: 110166f82ceeSKevin Wolf bs->file = NULL; 11027267c094SAnthony Liguori g_free(bs->opaque); 110357915332SKevin Wolf bs->opaque = NULL; 110457915332SKevin Wolf bs->drv = NULL; 110557915332SKevin Wolf return ret; 110657915332SKevin Wolf } 110757915332SKevin Wolf 11085e5c4f63SKevin Wolf static QDict *parse_json_filename(const char *filename, Error **errp) 11095e5c4f63SKevin Wolf { 11105e5c4f63SKevin Wolf QObject *options_obj; 11115e5c4f63SKevin Wolf QDict *options; 11125e5c4f63SKevin Wolf int ret; 11135e5c4f63SKevin Wolf 11145e5c4f63SKevin Wolf ret = strstart(filename, "json:", &filename); 11155e5c4f63SKevin Wolf assert(ret); 11165e5c4f63SKevin Wolf 11175e5c4f63SKevin Wolf options_obj = qobject_from_json(filename); 11185e5c4f63SKevin Wolf if (!options_obj) { 11195e5c4f63SKevin Wolf error_setg(errp, "Could not parse the JSON options"); 11205e5c4f63SKevin Wolf return NULL; 11215e5c4f63SKevin Wolf } 11225e5c4f63SKevin Wolf 11235e5c4f63SKevin Wolf if (qobject_type(options_obj) != QTYPE_QDICT) { 11245e5c4f63SKevin Wolf qobject_decref(options_obj); 11255e5c4f63SKevin Wolf error_setg(errp, "Invalid JSON object given"); 11265e5c4f63SKevin Wolf return NULL; 11275e5c4f63SKevin Wolf } 11285e5c4f63SKevin Wolf 11295e5c4f63SKevin Wolf options = qobject_to_qdict(options_obj); 11305e5c4f63SKevin Wolf qdict_flatten(options); 11315e5c4f63SKevin Wolf 11325e5c4f63SKevin Wolf return options; 11335e5c4f63SKevin Wolf } 11345e5c4f63SKevin Wolf 113557915332SKevin Wolf /* 1136f54120ffSKevin Wolf * Fills in default options for opening images and converts the legacy 1137f54120ffSKevin Wolf * filename/flags pair to option QDict entries. 1138f54120ffSKevin Wolf */ 11395e5c4f63SKevin Wolf static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, 114017b005f1SKevin Wolf BlockDriver *drv, Error **errp) 1141f54120ffSKevin Wolf { 11425e5c4f63SKevin Wolf const char *filename = *pfilename; 1143f54120ffSKevin Wolf const char *drvname; 1144462f5bcfSKevin Wolf bool protocol = flags & BDRV_O_PROTOCOL; 1145f54120ffSKevin Wolf bool parse_filename = false; 1146f54120ffSKevin Wolf Error *local_err = NULL; 1147f54120ffSKevin Wolf 11485e5c4f63SKevin Wolf /* Parse json: pseudo-protocol */ 11495e5c4f63SKevin Wolf if (filename && g_str_has_prefix(filename, "json:")) { 11505e5c4f63SKevin Wolf QDict *json_options = parse_json_filename(filename, &local_err); 11515e5c4f63SKevin Wolf if (local_err) { 11525e5c4f63SKevin Wolf error_propagate(errp, local_err); 11535e5c4f63SKevin Wolf return -EINVAL; 11545e5c4f63SKevin Wolf } 11555e5c4f63SKevin Wolf 11565e5c4f63SKevin Wolf /* Options given in the filename have lower priority than options 11575e5c4f63SKevin Wolf * specified directly */ 11585e5c4f63SKevin Wolf qdict_join(*options, json_options, false); 11595e5c4f63SKevin Wolf QDECREF(json_options); 11605e5c4f63SKevin Wolf *pfilename = filename = NULL; 11615e5c4f63SKevin Wolf } 11625e5c4f63SKevin Wolf 1163f54120ffSKevin Wolf /* Fetch the file name from the options QDict if necessary */ 116417b005f1SKevin Wolf if (protocol && filename) { 1165f54120ffSKevin Wolf if (!qdict_haskey(*options, "filename")) { 1166f54120ffSKevin Wolf qdict_put(*options, "filename", qstring_from_str(filename)); 1167f54120ffSKevin Wolf parse_filename = true; 1168f54120ffSKevin Wolf } else { 1169f54120ffSKevin Wolf error_setg(errp, "Can't specify 'file' and 'filename' options at " 1170f54120ffSKevin Wolf "the same time"); 1171f54120ffSKevin Wolf return -EINVAL; 1172f54120ffSKevin Wolf } 1173f54120ffSKevin Wolf } 1174f54120ffSKevin Wolf 1175f54120ffSKevin Wolf /* Find the right block driver */ 1176f54120ffSKevin Wolf filename = qdict_get_try_str(*options, "filename"); 1177f54120ffSKevin Wolf drvname = qdict_get_try_str(*options, "driver"); 1178f54120ffSKevin Wolf 117917b005f1SKevin Wolf if (drv) { 118017b005f1SKevin Wolf if (drvname) { 118117b005f1SKevin Wolf error_setg(errp, "Driver specified twice"); 118217b005f1SKevin Wolf return -EINVAL; 118317b005f1SKevin Wolf } 118417b005f1SKevin Wolf drvname = drv->format_name; 118517b005f1SKevin Wolf qdict_put(*options, "driver", qstring_from_str(drvname)); 118617b005f1SKevin Wolf } else { 118717b005f1SKevin Wolf if (!drvname && protocol) { 1188f54120ffSKevin Wolf if (filename) { 1189b65a5e12SMax Reitz drv = bdrv_find_protocol(filename, parse_filename, errp); 1190f54120ffSKevin Wolf if (!drv) { 1191f54120ffSKevin Wolf return -EINVAL; 1192f54120ffSKevin Wolf } 1193f54120ffSKevin Wolf 1194f54120ffSKevin Wolf drvname = drv->format_name; 1195f54120ffSKevin Wolf qdict_put(*options, "driver", qstring_from_str(drvname)); 1196f54120ffSKevin Wolf } else { 1197f54120ffSKevin Wolf error_setg(errp, "Must specify either driver or file"); 1198f54120ffSKevin Wolf return -EINVAL; 1199f54120ffSKevin Wolf } 120017b005f1SKevin Wolf } else if (drvname) { 1201f54120ffSKevin Wolf drv = bdrv_find_format(drvname); 1202f54120ffSKevin Wolf if (!drv) { 1203f54120ffSKevin Wolf error_setg(errp, "Unknown driver '%s'", drvname); 1204f54120ffSKevin Wolf return -ENOENT; 1205f54120ffSKevin Wolf } 120617b005f1SKevin Wolf } 120717b005f1SKevin Wolf } 120817b005f1SKevin Wolf 120917b005f1SKevin Wolf assert(drv || !protocol); 1210f54120ffSKevin Wolf 1211f54120ffSKevin Wolf /* Driver-specific filename parsing */ 121217b005f1SKevin Wolf if (drv && drv->bdrv_parse_filename && parse_filename) { 1213f54120ffSKevin Wolf drv->bdrv_parse_filename(filename, *options, &local_err); 1214f54120ffSKevin Wolf if (local_err) { 1215f54120ffSKevin Wolf error_propagate(errp, local_err); 1216f54120ffSKevin Wolf return -EINVAL; 1217f54120ffSKevin Wolf } 1218f54120ffSKevin Wolf 1219f54120ffSKevin Wolf if (!drv->bdrv_needs_filename) { 1220f54120ffSKevin Wolf qdict_del(*options, "filename"); 1221f54120ffSKevin Wolf } 1222f54120ffSKevin Wolf } 1223f54120ffSKevin Wolf 1224f54120ffSKevin Wolf return 0; 1225f54120ffSKevin Wolf } 1226f54120ffSKevin Wolf 12278d24cce1SFam Zheng void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) 12288d24cce1SFam Zheng { 12298d24cce1SFam Zheng 1230826b6ca0SFam Zheng if (bs->backing_hd) { 1231826b6ca0SFam Zheng assert(bs->backing_blocker); 1232826b6ca0SFam Zheng bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); 1233826b6ca0SFam Zheng } else if (backing_hd) { 1234826b6ca0SFam Zheng error_setg(&bs->backing_blocker, 123581e5f78aSAlberto Garcia "node is used as backing hd of '%s'", 123681e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs)); 1237826b6ca0SFam Zheng } 1238826b6ca0SFam Zheng 12398d24cce1SFam Zheng bs->backing_hd = backing_hd; 12408d24cce1SFam Zheng if (!backing_hd) { 1241826b6ca0SFam Zheng error_free(bs->backing_blocker); 1242826b6ca0SFam Zheng bs->backing_blocker = NULL; 12438d24cce1SFam Zheng goto out; 12448d24cce1SFam Zheng } 12458d24cce1SFam Zheng bs->open_flags &= ~BDRV_O_NO_BACKING; 12468d24cce1SFam Zheng pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); 12478d24cce1SFam Zheng pstrcpy(bs->backing_format, sizeof(bs->backing_format), 12488d24cce1SFam Zheng backing_hd->drv ? backing_hd->drv->format_name : ""); 1249826b6ca0SFam Zheng 1250826b6ca0SFam Zheng bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); 1251826b6ca0SFam Zheng /* Otherwise we won't be able to commit due to check in bdrv_commit */ 1252bb00021dSFam Zheng bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1253826b6ca0SFam Zheng bs->backing_blocker); 12548d24cce1SFam Zheng out: 12553baca891SKevin Wolf bdrv_refresh_limits(bs, NULL); 12568d24cce1SFam Zheng } 12578d24cce1SFam Zheng 125831ca6d07SKevin Wolf /* 125931ca6d07SKevin Wolf * Opens the backing file for a BlockDriverState if not yet open 126031ca6d07SKevin Wolf * 126131ca6d07SKevin Wolf * options is a QDict of options to pass to the block drivers, or NULL for an 126231ca6d07SKevin Wolf * empty set of options. The reference to the QDict is transferred to this 126331ca6d07SKevin Wolf * function (even on failure), so if the caller intends to reuse the dictionary, 126431ca6d07SKevin Wolf * it needs to use QINCREF() before calling bdrv_file_open. 126531ca6d07SKevin Wolf */ 126634b5d2c6SMax Reitz int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) 12679156df12SPaolo Bonzini { 12681ba4b6a5SBenoît Canet char *backing_filename = g_malloc0(PATH_MAX); 1269317fc44eSKevin Wolf int ret = 0; 12708d24cce1SFam Zheng BlockDriverState *backing_hd; 127134b5d2c6SMax Reitz Error *local_err = NULL; 12729156df12SPaolo Bonzini 12739156df12SPaolo Bonzini if (bs->backing_hd != NULL) { 127431ca6d07SKevin Wolf QDECREF(options); 12751ba4b6a5SBenoît Canet goto free_exit; 12769156df12SPaolo Bonzini } 12779156df12SPaolo Bonzini 127831ca6d07SKevin Wolf /* NULL means an empty set of options */ 127931ca6d07SKevin Wolf if (options == NULL) { 128031ca6d07SKevin Wolf options = qdict_new(); 128131ca6d07SKevin Wolf } 128231ca6d07SKevin Wolf 12839156df12SPaolo Bonzini bs->open_flags &= ~BDRV_O_NO_BACKING; 12841cb6f506SKevin Wolf if (qdict_haskey(options, "file.filename")) { 12851cb6f506SKevin Wolf backing_filename[0] = '\0'; 12861cb6f506SKevin Wolf } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 128731ca6d07SKevin Wolf QDECREF(options); 12881ba4b6a5SBenoît Canet goto free_exit; 1289dbecebddSFam Zheng } else { 12909f07429eSMax Reitz bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, 12919f07429eSMax Reitz &local_err); 12929f07429eSMax Reitz if (local_err) { 12939f07429eSMax Reitz ret = -EINVAL; 12949f07429eSMax Reitz error_propagate(errp, local_err); 12959f07429eSMax Reitz QDECREF(options); 12969f07429eSMax Reitz goto free_exit; 12979f07429eSMax Reitz } 12989156df12SPaolo Bonzini } 12999156df12SPaolo Bonzini 13008ee79e70SKevin Wolf if (!bs->drv || !bs->drv->supports_backing) { 13018ee79e70SKevin Wolf ret = -EINVAL; 13028ee79e70SKevin Wolf error_setg(errp, "Driver doesn't support backing files"); 13038ee79e70SKevin Wolf QDECREF(options); 13048ee79e70SKevin Wolf goto free_exit; 13058ee79e70SKevin Wolf } 13068ee79e70SKevin Wolf 1307e4e9986bSMarkus Armbruster backing_hd = bdrv_new(); 13088d24cce1SFam Zheng 1309c5f6e493SKevin Wolf if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 1310c5f6e493SKevin Wolf qdict_put(options, "driver", qstring_from_str(bs->backing_format)); 13119156df12SPaolo Bonzini } 13129156df12SPaolo Bonzini 1313f67503e5SMax Reitz assert(bs->backing_hd == NULL); 13148d24cce1SFam Zheng ret = bdrv_open(&backing_hd, 1315ddf5636dSMax Reitz *backing_filename ? backing_filename : NULL, NULL, options, 1316c5f6e493SKevin Wolf bdrv_backing_flags(bs->open_flags), NULL, &local_err); 13179156df12SPaolo Bonzini if (ret < 0) { 13188d24cce1SFam Zheng bdrv_unref(backing_hd); 13198d24cce1SFam Zheng backing_hd = NULL; 13209156df12SPaolo Bonzini bs->open_flags |= BDRV_O_NO_BACKING; 1321b04b6b6eSFam Zheng error_setg(errp, "Could not open backing file: %s", 1322b04b6b6eSFam Zheng error_get_pretty(local_err)); 1323b04b6b6eSFam Zheng error_free(local_err); 13241ba4b6a5SBenoît Canet goto free_exit; 13259156df12SPaolo Bonzini } 13268d24cce1SFam Zheng bdrv_set_backing_hd(bs, backing_hd); 1327d80ac658SPeter Feiner 13281ba4b6a5SBenoît Canet free_exit: 13291ba4b6a5SBenoît Canet g_free(backing_filename); 13301ba4b6a5SBenoît Canet return ret; 13319156df12SPaolo Bonzini } 13329156df12SPaolo Bonzini 1333b6ce07aaSKevin Wolf /* 1334da557aacSMax Reitz * Opens a disk image whose options are given as BlockdevRef in another block 1335da557aacSMax Reitz * device's options. 1336da557aacSMax Reitz * 1337da557aacSMax Reitz * If allow_none is true, no image will be opened if filename is false and no 1338da557aacSMax Reitz * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. 1339da557aacSMax Reitz * 1340da557aacSMax Reitz * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 1341da557aacSMax Reitz * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 1342da557aacSMax Reitz * itself, all options starting with "${bdref_key}." are considered part of the 1343da557aacSMax Reitz * BlockdevRef. 1344da557aacSMax Reitz * 1345da557aacSMax Reitz * The BlockdevRef will be removed from the options QDict. 1346f67503e5SMax Reitz * 1347f67503e5SMax Reitz * To conform with the behavior of bdrv_open(), *pbs has to be NULL. 1348da557aacSMax Reitz */ 1349da557aacSMax Reitz int bdrv_open_image(BlockDriverState **pbs, const char *filename, 1350da557aacSMax Reitz QDict *options, const char *bdref_key, int flags, 1351f7d9fd8cSMax Reitz bool allow_none, Error **errp) 1352da557aacSMax Reitz { 1353da557aacSMax Reitz QDict *image_options; 1354da557aacSMax Reitz int ret; 1355da557aacSMax Reitz char *bdref_key_dot; 1356da557aacSMax Reitz const char *reference; 1357da557aacSMax Reitz 1358f67503e5SMax Reitz assert(pbs); 1359f67503e5SMax Reitz assert(*pbs == NULL); 1360f67503e5SMax Reitz 1361da557aacSMax Reitz bdref_key_dot = g_strdup_printf("%s.", bdref_key); 1362da557aacSMax Reitz qdict_extract_subqdict(options, &image_options, bdref_key_dot); 1363da557aacSMax Reitz g_free(bdref_key_dot); 1364da557aacSMax Reitz 1365da557aacSMax Reitz reference = qdict_get_try_str(options, bdref_key); 1366da557aacSMax Reitz if (!filename && !reference && !qdict_size(image_options)) { 1367da557aacSMax Reitz if (allow_none) { 1368da557aacSMax Reitz ret = 0; 1369da557aacSMax Reitz } else { 1370da557aacSMax Reitz error_setg(errp, "A block device must be specified for \"%s\"", 1371da557aacSMax Reitz bdref_key); 1372da557aacSMax Reitz ret = -EINVAL; 1373da557aacSMax Reitz } 1374b20e61e0SMarkus Armbruster QDECREF(image_options); 1375da557aacSMax Reitz goto done; 1376da557aacSMax Reitz } 1377da557aacSMax Reitz 1378f7d9fd8cSMax Reitz ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp); 1379da557aacSMax Reitz 1380da557aacSMax Reitz done: 1381da557aacSMax Reitz qdict_del(options, bdref_key); 1382da557aacSMax Reitz return ret; 1383da557aacSMax Reitz } 1384da557aacSMax Reitz 13856b8aeca5SChen Gang int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) 1386b998875dSKevin Wolf { 1387b998875dSKevin Wolf /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 13881ba4b6a5SBenoît Canet char *tmp_filename = g_malloc0(PATH_MAX + 1); 1389b998875dSKevin Wolf int64_t total_size; 139083d0521aSChunyan Liu QemuOpts *opts = NULL; 1391b998875dSKevin Wolf QDict *snapshot_options; 1392b998875dSKevin Wolf BlockDriverState *bs_snapshot; 1393b998875dSKevin Wolf Error *local_err; 1394b998875dSKevin Wolf int ret; 1395b998875dSKevin Wolf 1396b998875dSKevin Wolf /* if snapshot, we create a temporary backing file and open it 1397b998875dSKevin Wolf instead of opening 'filename' directly */ 1398b998875dSKevin Wolf 1399b998875dSKevin Wolf /* Get the required size from the image */ 1400f187743aSKevin Wolf total_size = bdrv_getlength(bs); 1401f187743aSKevin Wolf if (total_size < 0) { 14026b8aeca5SChen Gang ret = total_size; 1403f187743aSKevin Wolf error_setg_errno(errp, -total_size, "Could not get image size"); 14041ba4b6a5SBenoît Canet goto out; 1405f187743aSKevin Wolf } 1406b998875dSKevin Wolf 1407b998875dSKevin Wolf /* Create the temporary image */ 14081ba4b6a5SBenoît Canet ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 1409b998875dSKevin Wolf if (ret < 0) { 1410b998875dSKevin Wolf error_setg_errno(errp, -ret, "Could not get temporary filename"); 14111ba4b6a5SBenoît Canet goto out; 1412b998875dSKevin Wolf } 1413b998875dSKevin Wolf 1414ef810437SMax Reitz opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 1415c282e1fdSChunyan Liu &error_abort); 141639101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 1417ef810437SMax Reitz ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err); 141883d0521aSChunyan Liu qemu_opts_del(opts); 1419b998875dSKevin Wolf if (ret < 0) { 1420b998875dSKevin Wolf error_setg_errno(errp, -ret, "Could not create temporary overlay " 1421b998875dSKevin Wolf "'%s': %s", tmp_filename, 1422b998875dSKevin Wolf error_get_pretty(local_err)); 1423b998875dSKevin Wolf error_free(local_err); 14241ba4b6a5SBenoît Canet goto out; 1425b998875dSKevin Wolf } 1426b998875dSKevin Wolf 1427b998875dSKevin Wolf /* Prepare a new options QDict for the temporary file */ 1428b998875dSKevin Wolf snapshot_options = qdict_new(); 1429b998875dSKevin Wolf qdict_put(snapshot_options, "file.driver", 1430b998875dSKevin Wolf qstring_from_str("file")); 1431b998875dSKevin Wolf qdict_put(snapshot_options, "file.filename", 1432b998875dSKevin Wolf qstring_from_str(tmp_filename)); 1433b998875dSKevin Wolf 1434e4e9986bSMarkus Armbruster bs_snapshot = bdrv_new(); 1435b998875dSKevin Wolf 1436b998875dSKevin Wolf ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, 1437ef810437SMax Reitz flags, &bdrv_qcow2, &local_err); 1438b998875dSKevin Wolf if (ret < 0) { 1439b998875dSKevin Wolf error_propagate(errp, local_err); 14401ba4b6a5SBenoît Canet goto out; 1441b998875dSKevin Wolf } 1442b998875dSKevin Wolf 1443b998875dSKevin Wolf bdrv_append(bs_snapshot, bs); 14441ba4b6a5SBenoît Canet 14451ba4b6a5SBenoît Canet out: 14461ba4b6a5SBenoît Canet g_free(tmp_filename); 14476b8aeca5SChen Gang return ret; 1448b998875dSKevin Wolf } 1449b998875dSKevin Wolf 1450da557aacSMax Reitz /* 1451b6ce07aaSKevin Wolf * Opens a disk image (raw, qcow2, vmdk, ...) 1452de9c0cecSKevin Wolf * 1453de9c0cecSKevin Wolf * options is a QDict of options to pass to the block drivers, or NULL for an 1454de9c0cecSKevin Wolf * empty set of options. The reference to the QDict belongs to the block layer 1455de9c0cecSKevin Wolf * after the call (even on failure), so if the caller intends to reuse the 1456de9c0cecSKevin Wolf * dictionary, it needs to use QINCREF() before calling bdrv_open. 1457f67503e5SMax Reitz * 1458f67503e5SMax Reitz * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 1459f67503e5SMax Reitz * If it is not NULL, the referenced BDS will be reused. 1460ddf5636dSMax Reitz * 1461ddf5636dSMax Reitz * The reference parameter may be used to specify an existing block device which 1462ddf5636dSMax Reitz * should be opened. If specified, neither options nor a filename may be given, 1463ddf5636dSMax Reitz * nor can an existing BDS be reused (that is, *pbs has to be NULL). 1464b6ce07aaSKevin Wolf */ 1465ddf5636dSMax Reitz int bdrv_open(BlockDriverState **pbs, const char *filename, 1466ddf5636dSMax Reitz const char *reference, QDict *options, int flags, 1467ddf5636dSMax Reitz BlockDriver *drv, Error **errp) 1468ea2384d3Sbellard { 1469b6ce07aaSKevin Wolf int ret; 1470f67503e5SMax Reitz BlockDriverState *file = NULL, *bs; 147174fe54f2SKevin Wolf const char *drvname; 147234b5d2c6SMax Reitz Error *local_err = NULL; 1473b1e6fc08SKevin Wolf int snapshot_flags = 0; 147433e3963eSbellard 1475f67503e5SMax Reitz assert(pbs); 1476f67503e5SMax Reitz 1477ddf5636dSMax Reitz if (reference) { 1478ddf5636dSMax Reitz bool options_non_empty = options ? qdict_size(options) : false; 1479ddf5636dSMax Reitz QDECREF(options); 1480ddf5636dSMax Reitz 1481ddf5636dSMax Reitz if (*pbs) { 1482ddf5636dSMax Reitz error_setg(errp, "Cannot reuse an existing BDS when referencing " 1483ddf5636dSMax Reitz "another block device"); 1484ddf5636dSMax Reitz return -EINVAL; 1485ddf5636dSMax Reitz } 1486ddf5636dSMax Reitz 1487ddf5636dSMax Reitz if (filename || options_non_empty) { 1488ddf5636dSMax Reitz error_setg(errp, "Cannot reference an existing block device with " 1489ddf5636dSMax Reitz "additional options or a new filename"); 1490ddf5636dSMax Reitz return -EINVAL; 1491ddf5636dSMax Reitz } 1492ddf5636dSMax Reitz 1493ddf5636dSMax Reitz bs = bdrv_lookup_bs(reference, reference, errp); 1494ddf5636dSMax Reitz if (!bs) { 1495ddf5636dSMax Reitz return -ENODEV; 1496ddf5636dSMax Reitz } 1497ddf5636dSMax Reitz bdrv_ref(bs); 1498ddf5636dSMax Reitz *pbs = bs; 1499ddf5636dSMax Reitz return 0; 1500ddf5636dSMax Reitz } 1501ddf5636dSMax Reitz 1502f67503e5SMax Reitz if (*pbs) { 1503f67503e5SMax Reitz bs = *pbs; 1504f67503e5SMax Reitz } else { 1505e4e9986bSMarkus Armbruster bs = bdrv_new(); 1506f67503e5SMax Reitz } 1507f67503e5SMax Reitz 1508de9c0cecSKevin Wolf /* NULL means an empty set of options */ 1509de9c0cecSKevin Wolf if (options == NULL) { 1510de9c0cecSKevin Wolf options = qdict_new(); 1511de9c0cecSKevin Wolf } 1512de9c0cecSKevin Wolf 151317b005f1SKevin Wolf ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err); 1514462f5bcfSKevin Wolf if (local_err) { 1515462f5bcfSKevin Wolf goto fail; 1516462f5bcfSKevin Wolf } 1517462f5bcfSKevin Wolf 151876c591b0SKevin Wolf /* Find the right image format driver */ 151976c591b0SKevin Wolf drv = NULL; 152076c591b0SKevin Wolf drvname = qdict_get_try_str(options, "driver"); 152176c591b0SKevin Wolf if (drvname) { 152276c591b0SKevin Wolf drv = bdrv_find_format(drvname); 152376c591b0SKevin Wolf qdict_del(options, "driver"); 152476c591b0SKevin Wolf if (!drv) { 152576c591b0SKevin Wolf error_setg(errp, "Unknown driver: '%s'", drvname); 152676c591b0SKevin Wolf ret = -EINVAL; 152776c591b0SKevin Wolf goto fail; 152876c591b0SKevin Wolf } 152976c591b0SKevin Wolf } 153076c591b0SKevin Wolf 153176c591b0SKevin Wolf assert(drvname || !(flags & BDRV_O_PROTOCOL)); 153276c591b0SKevin Wolf if (drv && !drv->bdrv_file_open) { 153376c591b0SKevin Wolf /* If the user explicitly wants a format driver here, we'll need to add 153476c591b0SKevin Wolf * another layer for the protocol in bs->file */ 153576c591b0SKevin Wolf flags &= ~BDRV_O_PROTOCOL; 153676c591b0SKevin Wolf } 153776c591b0SKevin Wolf 1538de9c0cecSKevin Wolf bs->options = options; 1539b6ad491aSKevin Wolf options = qdict_clone_shallow(options); 1540de9c0cecSKevin Wolf 1541f500a6d3SKevin Wolf /* Open image file without format layer */ 1542f4788adcSKevin Wolf if ((flags & BDRV_O_PROTOCOL) == 0) { 1543be028adcSJeff Cody if (flags & BDRV_O_RDWR) { 1544be028adcSJeff Cody flags |= BDRV_O_ALLOW_RDWR; 1545be028adcSJeff Cody } 1546b1e6fc08SKevin Wolf if (flags & BDRV_O_SNAPSHOT) { 1547b1e6fc08SKevin Wolf snapshot_flags = bdrv_temp_snapshot_flags(flags); 1548b1e6fc08SKevin Wolf flags = bdrv_backing_flags(flags); 1549b1e6fc08SKevin Wolf } 1550be028adcSJeff Cody 1551f67503e5SMax Reitz assert(file == NULL); 1552054963f8SMax Reitz ret = bdrv_open_image(&file, filename, options, "file", 15530b50cc88SKevin Wolf bdrv_inherited_flags(flags), 15540b50cc88SKevin Wolf true, &local_err); 1555f500a6d3SKevin Wolf if (ret < 0) { 15568bfea15dSKevin Wolf goto fail; 1557f500a6d3SKevin Wolf } 1558f4788adcSKevin Wolf } 1559f500a6d3SKevin Wolf 156076c591b0SKevin Wolf /* Image format probing */ 156138f3ef57SKevin Wolf bs->probed = !drv; 156276c591b0SKevin Wolf if (!drv && file) { 156334b5d2c6SMax Reitz ret = find_image_format(file, filename, &drv, &local_err); 156417b005f1SKevin Wolf if (ret < 0) { 156517b005f1SKevin Wolf goto fail; 156617b005f1SKevin Wolf } 156776c591b0SKevin Wolf } else if (!drv) { 15682a05cbe4SMax Reitz error_setg(errp, "Must specify either driver or file"); 15692a05cbe4SMax Reitz ret = -EINVAL; 15708bfea15dSKevin Wolf goto fail; 15712a05cbe4SMax Reitz } 1572f500a6d3SKevin Wolf 1573b6ce07aaSKevin Wolf /* Open the image */ 157434b5d2c6SMax Reitz ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); 1575b6ce07aaSKevin Wolf if (ret < 0) { 15768bfea15dSKevin Wolf goto fail; 15776987307cSChristoph Hellwig } 15786987307cSChristoph Hellwig 15792a05cbe4SMax Reitz if (file && (bs->file != file)) { 15804f6fd349SFam Zheng bdrv_unref(file); 1581f500a6d3SKevin Wolf file = NULL; 1582f500a6d3SKevin Wolf } 1583f500a6d3SKevin Wolf 1584b6ce07aaSKevin Wolf /* If there is a backing file, use it */ 15859156df12SPaolo Bonzini if ((flags & BDRV_O_NO_BACKING) == 0) { 158631ca6d07SKevin Wolf QDict *backing_options; 158731ca6d07SKevin Wolf 15885726d872SBenoît Canet qdict_extract_subqdict(options, &backing_options, "backing."); 158934b5d2c6SMax Reitz ret = bdrv_open_backing_file(bs, backing_options, &local_err); 1590b6ce07aaSKevin Wolf if (ret < 0) { 1591b6ad491aSKevin Wolf goto close_and_fail; 1592b6ce07aaSKevin Wolf } 1593b6ce07aaSKevin Wolf } 1594b6ce07aaSKevin Wolf 159591af7014SMax Reitz bdrv_refresh_filename(bs); 159691af7014SMax Reitz 1597b998875dSKevin Wolf /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 1598b998875dSKevin Wolf * temporary snapshot afterwards. */ 1599b1e6fc08SKevin Wolf if (snapshot_flags) { 16006b8aeca5SChen Gang ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); 1601b998875dSKevin Wolf if (local_err) { 1602b998875dSKevin Wolf goto close_and_fail; 1603b998875dSKevin Wolf } 1604b998875dSKevin Wolf } 1605b998875dSKevin Wolf 1606b6ad491aSKevin Wolf /* Check if any unknown options were used */ 16075acd9d81SMax Reitz if (options && (qdict_size(options) != 0)) { 1608b6ad491aSKevin Wolf const QDictEntry *entry = qdict_first(options); 16095acd9d81SMax Reitz if (flags & BDRV_O_PROTOCOL) { 16105acd9d81SMax Reitz error_setg(errp, "Block protocol '%s' doesn't support the option " 16115acd9d81SMax Reitz "'%s'", drv->format_name, entry->key); 16125acd9d81SMax Reitz } else { 161334b5d2c6SMax Reitz error_setg(errp, "Block format '%s' used by device '%s' doesn't " 16145acd9d81SMax Reitz "support the option '%s'", drv->format_name, 1615bfb197e0SMarkus Armbruster bdrv_get_device_name(bs), entry->key); 16165acd9d81SMax Reitz } 1617b6ad491aSKevin Wolf 1618b6ad491aSKevin Wolf ret = -EINVAL; 1619b6ad491aSKevin Wolf goto close_and_fail; 1620b6ad491aSKevin Wolf } 1621b6ad491aSKevin Wolf 1622b6ce07aaSKevin Wolf if (!bdrv_key_required(bs)) { 1623a7f53e26SMarkus Armbruster if (bs->blk) { 1624a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, true); 1625a7f53e26SMarkus Armbruster } 1626c3adb58fSMarkus Armbruster } else if (!runstate_check(RUN_STATE_PRELAUNCH) 1627c3adb58fSMarkus Armbruster && !runstate_check(RUN_STATE_INMIGRATE) 1628c3adb58fSMarkus Armbruster && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */ 1629c3adb58fSMarkus Armbruster error_setg(errp, 1630c3adb58fSMarkus Armbruster "Guest must be stopped for opening of encrypted image"); 1631c3adb58fSMarkus Armbruster ret = -EBUSY; 1632c3adb58fSMarkus Armbruster goto close_and_fail; 1633b6ce07aaSKevin Wolf } 1634b6ce07aaSKevin Wolf 1635c3adb58fSMarkus Armbruster QDECREF(options); 1636f67503e5SMax Reitz *pbs = bs; 1637b6ce07aaSKevin Wolf return 0; 1638b6ce07aaSKevin Wolf 16398bfea15dSKevin Wolf fail: 1640f500a6d3SKevin Wolf if (file != NULL) { 16414f6fd349SFam Zheng bdrv_unref(file); 1642f500a6d3SKevin Wolf } 1643de9c0cecSKevin Wolf QDECREF(bs->options); 1644b6ad491aSKevin Wolf QDECREF(options); 1645de9c0cecSKevin Wolf bs->options = NULL; 1646f67503e5SMax Reitz if (!*pbs) { 1647f67503e5SMax Reitz /* If *pbs is NULL, a new BDS has been created in this function and 1648f67503e5SMax Reitz needs to be freed now. Otherwise, it does not need to be closed, 1649f67503e5SMax Reitz since it has not really been opened yet. */ 1650f67503e5SMax Reitz bdrv_unref(bs); 1651f67503e5SMax Reitz } 165284d18f06SMarkus Armbruster if (local_err) { 165334b5d2c6SMax Reitz error_propagate(errp, local_err); 165434b5d2c6SMax Reitz } 1655b6ad491aSKevin Wolf return ret; 1656de9c0cecSKevin Wolf 1657b6ad491aSKevin Wolf close_and_fail: 1658f67503e5SMax Reitz /* See fail path, but now the BDS has to be always closed */ 1659f67503e5SMax Reitz if (*pbs) { 1660b6ad491aSKevin Wolf bdrv_close(bs); 1661f67503e5SMax Reitz } else { 1662f67503e5SMax Reitz bdrv_unref(bs); 1663f67503e5SMax Reitz } 1664b6ad491aSKevin Wolf QDECREF(options); 166584d18f06SMarkus Armbruster if (local_err) { 166634b5d2c6SMax Reitz error_propagate(errp, local_err); 166734b5d2c6SMax Reitz } 1668b6ce07aaSKevin Wolf return ret; 1669b6ce07aaSKevin Wolf } 1670b6ce07aaSKevin Wolf 1671e971aa12SJeff Cody typedef struct BlockReopenQueueEntry { 1672e971aa12SJeff Cody bool prepared; 1673e971aa12SJeff Cody BDRVReopenState state; 1674e971aa12SJeff Cody QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1675e971aa12SJeff Cody } BlockReopenQueueEntry; 1676e971aa12SJeff Cody 1677e971aa12SJeff Cody /* 1678e971aa12SJeff Cody * Adds a BlockDriverState to a simple queue for an atomic, transactional 1679e971aa12SJeff Cody * reopen of multiple devices. 1680e971aa12SJeff Cody * 1681e971aa12SJeff Cody * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 1682e971aa12SJeff Cody * already performed, or alternatively may be NULL a new BlockReopenQueue will 1683e971aa12SJeff Cody * be created and initialized. This newly created BlockReopenQueue should be 1684e971aa12SJeff Cody * passed back in for subsequent calls that are intended to be of the same 1685e971aa12SJeff Cody * atomic 'set'. 1686e971aa12SJeff Cody * 1687e971aa12SJeff Cody * bs is the BlockDriverState to add to the reopen queue. 1688e971aa12SJeff Cody * 1689e971aa12SJeff Cody * flags contains the open flags for the associated bs 1690e971aa12SJeff Cody * 1691e971aa12SJeff Cody * returns a pointer to bs_queue, which is either the newly allocated 1692e971aa12SJeff Cody * bs_queue, or the existing bs_queue being used. 1693e971aa12SJeff Cody * 1694e971aa12SJeff Cody */ 1695e971aa12SJeff Cody BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 1696e971aa12SJeff Cody BlockDriverState *bs, int flags) 1697e971aa12SJeff Cody { 1698e971aa12SJeff Cody assert(bs != NULL); 1699e971aa12SJeff Cody 1700e971aa12SJeff Cody BlockReopenQueueEntry *bs_entry; 1701e971aa12SJeff Cody if (bs_queue == NULL) { 1702e971aa12SJeff Cody bs_queue = g_new0(BlockReopenQueue, 1); 1703e971aa12SJeff Cody QSIMPLEQ_INIT(bs_queue); 1704e971aa12SJeff Cody } 1705e971aa12SJeff Cody 1706f1f25a2eSKevin Wolf /* bdrv_open() masks this flag out */ 1707f1f25a2eSKevin Wolf flags &= ~BDRV_O_PROTOCOL; 1708f1f25a2eSKevin Wolf 1709e971aa12SJeff Cody if (bs->file) { 1710f1f25a2eSKevin Wolf bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); 1711e971aa12SJeff Cody } 1712e971aa12SJeff Cody 1713e971aa12SJeff Cody bs_entry = g_new0(BlockReopenQueueEntry, 1); 1714e971aa12SJeff Cody QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 1715e971aa12SJeff Cody 1716e971aa12SJeff Cody bs_entry->state.bs = bs; 1717e971aa12SJeff Cody bs_entry->state.flags = flags; 1718e971aa12SJeff Cody 1719e971aa12SJeff Cody return bs_queue; 1720e971aa12SJeff Cody } 1721e971aa12SJeff Cody 1722e971aa12SJeff Cody /* 1723e971aa12SJeff Cody * Reopen multiple BlockDriverStates atomically & transactionally. 1724e971aa12SJeff Cody * 1725e971aa12SJeff Cody * The queue passed in (bs_queue) must have been built up previous 1726e971aa12SJeff Cody * via bdrv_reopen_queue(). 1727e971aa12SJeff Cody * 1728e971aa12SJeff Cody * Reopens all BDS specified in the queue, with the appropriate 1729e971aa12SJeff Cody * flags. All devices are prepared for reopen, and failure of any 1730e971aa12SJeff Cody * device will cause all device changes to be abandonded, and intermediate 1731e971aa12SJeff Cody * data cleaned up. 1732e971aa12SJeff Cody * 1733e971aa12SJeff Cody * If all devices prepare successfully, then the changes are committed 1734e971aa12SJeff Cody * to all devices. 1735e971aa12SJeff Cody * 1736e971aa12SJeff Cody */ 1737e971aa12SJeff Cody int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 1738e971aa12SJeff Cody { 1739e971aa12SJeff Cody int ret = -1; 1740e971aa12SJeff Cody BlockReopenQueueEntry *bs_entry, *next; 1741e971aa12SJeff Cody Error *local_err = NULL; 1742e971aa12SJeff Cody 1743e971aa12SJeff Cody assert(bs_queue != NULL); 1744e971aa12SJeff Cody 1745e971aa12SJeff Cody bdrv_drain_all(); 1746e971aa12SJeff Cody 1747e971aa12SJeff Cody QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1748e971aa12SJeff Cody if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 1749e971aa12SJeff Cody error_propagate(errp, local_err); 1750e971aa12SJeff Cody goto cleanup; 1751e971aa12SJeff Cody } 1752e971aa12SJeff Cody bs_entry->prepared = true; 1753e971aa12SJeff Cody } 1754e971aa12SJeff Cody 1755e971aa12SJeff Cody /* If we reach this point, we have success and just need to apply the 1756e971aa12SJeff Cody * changes 1757e971aa12SJeff Cody */ 1758e971aa12SJeff Cody QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1759e971aa12SJeff Cody bdrv_reopen_commit(&bs_entry->state); 1760e971aa12SJeff Cody } 1761e971aa12SJeff Cody 1762e971aa12SJeff Cody ret = 0; 1763e971aa12SJeff Cody 1764e971aa12SJeff Cody cleanup: 1765e971aa12SJeff Cody QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 1766e971aa12SJeff Cody if (ret && bs_entry->prepared) { 1767e971aa12SJeff Cody bdrv_reopen_abort(&bs_entry->state); 1768e971aa12SJeff Cody } 1769e971aa12SJeff Cody g_free(bs_entry); 1770e971aa12SJeff Cody } 1771e971aa12SJeff Cody g_free(bs_queue); 1772e971aa12SJeff Cody return ret; 1773e971aa12SJeff Cody } 1774e971aa12SJeff Cody 1775e971aa12SJeff Cody 1776e971aa12SJeff Cody /* Reopen a single BlockDriverState with the specified flags. */ 1777e971aa12SJeff Cody int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 1778e971aa12SJeff Cody { 1779e971aa12SJeff Cody int ret = -1; 1780e971aa12SJeff Cody Error *local_err = NULL; 1781e971aa12SJeff Cody BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); 1782e971aa12SJeff Cody 1783e971aa12SJeff Cody ret = bdrv_reopen_multiple(queue, &local_err); 1784e971aa12SJeff Cody if (local_err != NULL) { 1785e971aa12SJeff Cody error_propagate(errp, local_err); 1786e971aa12SJeff Cody } 1787e971aa12SJeff Cody return ret; 1788e971aa12SJeff Cody } 1789e971aa12SJeff Cody 1790e971aa12SJeff Cody 1791e971aa12SJeff Cody /* 1792e971aa12SJeff Cody * Prepares a BlockDriverState for reopen. All changes are staged in the 1793e971aa12SJeff Cody * 'opaque' field of the BDRVReopenState, which is used and allocated by 1794e971aa12SJeff Cody * the block driver layer .bdrv_reopen_prepare() 1795e971aa12SJeff Cody * 1796e971aa12SJeff Cody * bs is the BlockDriverState to reopen 1797e971aa12SJeff Cody * flags are the new open flags 1798e971aa12SJeff Cody * queue is the reopen queue 1799e971aa12SJeff Cody * 1800e971aa12SJeff Cody * Returns 0 on success, non-zero on error. On error errp will be set 1801e971aa12SJeff Cody * as well. 1802e971aa12SJeff Cody * 1803e971aa12SJeff Cody * On failure, bdrv_reopen_abort() will be called to clean up any data. 1804e971aa12SJeff Cody * It is the responsibility of the caller to then call the abort() or 1805e971aa12SJeff Cody * commit() for any other BDS that have been left in a prepare() state 1806e971aa12SJeff Cody * 1807e971aa12SJeff Cody */ 1808e971aa12SJeff Cody int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 1809e971aa12SJeff Cody Error **errp) 1810e971aa12SJeff Cody { 1811e971aa12SJeff Cody int ret = -1; 1812e971aa12SJeff Cody Error *local_err = NULL; 1813e971aa12SJeff Cody BlockDriver *drv; 1814e971aa12SJeff Cody 1815e971aa12SJeff Cody assert(reopen_state != NULL); 1816e971aa12SJeff Cody assert(reopen_state->bs->drv != NULL); 1817e971aa12SJeff Cody drv = reopen_state->bs->drv; 1818e971aa12SJeff Cody 1819e971aa12SJeff Cody /* if we are to stay read-only, do not allow permission change 1820e971aa12SJeff Cody * to r/w */ 1821e971aa12SJeff Cody if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && 1822e971aa12SJeff Cody reopen_state->flags & BDRV_O_RDWR) { 182381e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is read only", 182481e5f78aSAlberto Garcia bdrv_get_device_or_node_name(reopen_state->bs)); 1825e971aa12SJeff Cody goto error; 1826e971aa12SJeff Cody } 1827e971aa12SJeff Cody 1828e971aa12SJeff Cody 1829e971aa12SJeff Cody ret = bdrv_flush(reopen_state->bs); 1830e971aa12SJeff Cody if (ret) { 1831e971aa12SJeff Cody error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", 1832e971aa12SJeff Cody strerror(-ret)); 1833e971aa12SJeff Cody goto error; 1834e971aa12SJeff Cody } 1835e971aa12SJeff Cody 1836e971aa12SJeff Cody if (drv->bdrv_reopen_prepare) { 1837e971aa12SJeff Cody ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 1838e971aa12SJeff Cody if (ret) { 1839e971aa12SJeff Cody if (local_err != NULL) { 1840e971aa12SJeff Cody error_propagate(errp, local_err); 1841e971aa12SJeff Cody } else { 1842d8b6895fSLuiz Capitulino error_setg(errp, "failed while preparing to reopen image '%s'", 1843e971aa12SJeff Cody reopen_state->bs->filename); 1844e971aa12SJeff Cody } 1845e971aa12SJeff Cody goto error; 1846e971aa12SJeff Cody } 1847e971aa12SJeff Cody } else { 1848e971aa12SJeff Cody /* It is currently mandatory to have a bdrv_reopen_prepare() 1849e971aa12SJeff Cody * handler for each supported drv. */ 185081e5f78aSAlberto Garcia error_setg(errp, "Block format '%s' used by node '%s' " 185181e5f78aSAlberto Garcia "does not support reopening files", drv->format_name, 185281e5f78aSAlberto Garcia bdrv_get_device_or_node_name(reopen_state->bs)); 1853e971aa12SJeff Cody ret = -1; 1854e971aa12SJeff Cody goto error; 1855e971aa12SJeff Cody } 1856e971aa12SJeff Cody 1857e971aa12SJeff Cody ret = 0; 1858e971aa12SJeff Cody 1859e971aa12SJeff Cody error: 1860e971aa12SJeff Cody return ret; 1861e971aa12SJeff Cody } 1862e971aa12SJeff Cody 1863e971aa12SJeff Cody /* 1864e971aa12SJeff Cody * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 1865e971aa12SJeff Cody * makes them final by swapping the staging BlockDriverState contents into 1866e971aa12SJeff Cody * the active BlockDriverState contents. 1867e971aa12SJeff Cody */ 1868e971aa12SJeff Cody void bdrv_reopen_commit(BDRVReopenState *reopen_state) 1869e971aa12SJeff Cody { 1870e971aa12SJeff Cody BlockDriver *drv; 1871e971aa12SJeff Cody 1872e971aa12SJeff Cody assert(reopen_state != NULL); 1873e971aa12SJeff Cody drv = reopen_state->bs->drv; 1874e971aa12SJeff Cody assert(drv != NULL); 1875e971aa12SJeff Cody 1876e971aa12SJeff Cody /* If there are any driver level actions to take */ 1877e971aa12SJeff Cody if (drv->bdrv_reopen_commit) { 1878e971aa12SJeff Cody drv->bdrv_reopen_commit(reopen_state); 1879e971aa12SJeff Cody } 1880e971aa12SJeff Cody 1881e971aa12SJeff Cody /* set BDS specific flags now */ 1882e971aa12SJeff Cody reopen_state->bs->open_flags = reopen_state->flags; 1883e971aa12SJeff Cody reopen_state->bs->enable_write_cache = !!(reopen_state->flags & 1884e971aa12SJeff Cody BDRV_O_CACHE_WB); 1885e971aa12SJeff Cody reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 1886355ef4acSKevin Wolf 18873baca891SKevin Wolf bdrv_refresh_limits(reopen_state->bs, NULL); 1888e971aa12SJeff Cody } 1889e971aa12SJeff Cody 1890e971aa12SJeff Cody /* 1891e971aa12SJeff Cody * Abort the reopen, and delete and free the staged changes in 1892e971aa12SJeff Cody * reopen_state 1893e971aa12SJeff Cody */ 1894e971aa12SJeff Cody void bdrv_reopen_abort(BDRVReopenState *reopen_state) 1895e971aa12SJeff Cody { 1896e971aa12SJeff Cody BlockDriver *drv; 1897e971aa12SJeff Cody 1898e971aa12SJeff Cody assert(reopen_state != NULL); 1899e971aa12SJeff Cody drv = reopen_state->bs->drv; 1900e971aa12SJeff Cody assert(drv != NULL); 1901e971aa12SJeff Cody 1902e971aa12SJeff Cody if (drv->bdrv_reopen_abort) { 1903e971aa12SJeff Cody drv->bdrv_reopen_abort(reopen_state); 1904e971aa12SJeff Cody } 1905e971aa12SJeff Cody } 1906e971aa12SJeff Cody 1907e971aa12SJeff Cody 1908fc01f7e7Sbellard void bdrv_close(BlockDriverState *bs) 1909fc01f7e7Sbellard { 191033384421SMax Reitz BdrvAioNotifier *ban, *ban_next; 191133384421SMax Reitz 19123e914655SPaolo Bonzini if (bs->job) { 19133e914655SPaolo Bonzini block_job_cancel_sync(bs->job); 19143e914655SPaolo Bonzini } 191558fda173SStefan Hajnoczi bdrv_drain_all(); /* complete I/O */ 191658fda173SStefan Hajnoczi bdrv_flush(bs); 191758fda173SStefan Hajnoczi bdrv_drain_all(); /* in case flush left pending I/O */ 1918d7d512f6SPaolo Bonzini notifier_list_notify(&bs->close_notifiers, bs); 19197094f12fSKevin Wolf 19203cbc002cSPaolo Bonzini if (bs->drv) { 1921557df6acSStefan Hajnoczi if (bs->backing_hd) { 1922826b6ca0SFam Zheng BlockDriverState *backing_hd = bs->backing_hd; 1923826b6ca0SFam Zheng bdrv_set_backing_hd(bs, NULL); 1924826b6ca0SFam Zheng bdrv_unref(backing_hd); 1925557df6acSStefan Hajnoczi } 1926ea2384d3Sbellard bs->drv->bdrv_close(bs); 19277267c094SAnthony Liguori g_free(bs->opaque); 1928ea2384d3Sbellard bs->opaque = NULL; 1929ea2384d3Sbellard bs->drv = NULL; 193053fec9d3SStefan Hajnoczi bs->copy_on_read = 0; 1931a275fa42SPaolo Bonzini bs->backing_file[0] = '\0'; 1932a275fa42SPaolo Bonzini bs->backing_format[0] = '\0'; 19336405875cSPaolo Bonzini bs->total_sectors = 0; 19346405875cSPaolo Bonzini bs->encrypted = 0; 19356405875cSPaolo Bonzini bs->valid_key = 0; 19366405875cSPaolo Bonzini bs->sg = 0; 19370d51b4deSAsias He bs->zero_beyond_eof = false; 1938de9c0cecSKevin Wolf QDECREF(bs->options); 1939de9c0cecSKevin Wolf bs->options = NULL; 194091af7014SMax Reitz QDECREF(bs->full_open_options); 194191af7014SMax Reitz bs->full_open_options = NULL; 1942b338082bSbellard 194366f82ceeSKevin Wolf if (bs->file != NULL) { 19444f6fd349SFam Zheng bdrv_unref(bs->file); 19450ac9377dSPaolo Bonzini bs->file = NULL; 194666f82ceeSKevin Wolf } 19479ca11154SPavel Hrdina } 194866f82ceeSKevin Wolf 1949a7f53e26SMarkus Armbruster if (bs->blk) { 1950a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, false); 1951a7f53e26SMarkus Armbruster } 195298f90dbaSZhi Yong Wu 195398f90dbaSZhi Yong Wu /*throttling disk I/O limits*/ 195498f90dbaSZhi Yong Wu if (bs->io_limits_enabled) { 195598f90dbaSZhi Yong Wu bdrv_io_limits_disable(bs); 195698f90dbaSZhi Yong Wu } 195733384421SMax Reitz 195833384421SMax Reitz QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 195933384421SMax Reitz g_free(ban); 196033384421SMax Reitz } 196133384421SMax Reitz QLIST_INIT(&bs->aio_notifiers); 1962b338082bSbellard } 1963b338082bSbellard 19642bc93fedSMORITA Kazutaka void bdrv_close_all(void) 19652bc93fedSMORITA Kazutaka { 19662bc93fedSMORITA Kazutaka BlockDriverState *bs; 19672bc93fedSMORITA Kazutaka 1968dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 1969ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 1970ed78cda3SStefan Hajnoczi 1971ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 19722bc93fedSMORITA Kazutaka bdrv_close(bs); 1973ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 19742bc93fedSMORITA Kazutaka } 19752bc93fedSMORITA Kazutaka } 19762bc93fedSMORITA Kazutaka 197788266f5aSStefan Hajnoczi /* Check if any requests are in-flight (including throttled requests) */ 197888266f5aSStefan Hajnoczi static bool bdrv_requests_pending(BlockDriverState *bs) 197988266f5aSStefan Hajnoczi { 198088266f5aSStefan Hajnoczi if (!QLIST_EMPTY(&bs->tracked_requests)) { 198188266f5aSStefan Hajnoczi return true; 198288266f5aSStefan Hajnoczi } 1983cc0681c4SBenoît Canet if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { 1984cc0681c4SBenoît Canet return true; 1985cc0681c4SBenoît Canet } 1986cc0681c4SBenoît Canet if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { 198788266f5aSStefan Hajnoczi return true; 198888266f5aSStefan Hajnoczi } 198988266f5aSStefan Hajnoczi if (bs->file && bdrv_requests_pending(bs->file)) { 199088266f5aSStefan Hajnoczi return true; 199188266f5aSStefan Hajnoczi } 199288266f5aSStefan Hajnoczi if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) { 199388266f5aSStefan Hajnoczi return true; 199488266f5aSStefan Hajnoczi } 199588266f5aSStefan Hajnoczi return false; 199688266f5aSStefan Hajnoczi } 199788266f5aSStefan Hajnoczi 19985b98db0aSStefan Hajnoczi static bool bdrv_drain_one(BlockDriverState *bs) 19995b98db0aSStefan Hajnoczi { 20005b98db0aSStefan Hajnoczi bool bs_busy; 20015b98db0aSStefan Hajnoczi 20025b98db0aSStefan Hajnoczi bdrv_flush_io_queue(bs); 20035b98db0aSStefan Hajnoczi bdrv_start_throttled_reqs(bs); 20045b98db0aSStefan Hajnoczi bs_busy = bdrv_requests_pending(bs); 20055b98db0aSStefan Hajnoczi bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy); 20065b98db0aSStefan Hajnoczi return bs_busy; 20075b98db0aSStefan Hajnoczi } 20085b98db0aSStefan Hajnoczi 20095b98db0aSStefan Hajnoczi /* 20105b98db0aSStefan Hajnoczi * Wait for pending requests to complete on a single BlockDriverState subtree 20115b98db0aSStefan Hajnoczi * 20125b98db0aSStefan Hajnoczi * See the warning in bdrv_drain_all(). This function can only be called if 20135b98db0aSStefan Hajnoczi * you are sure nothing can generate I/O because you have op blockers 20145b98db0aSStefan Hajnoczi * installed. 20155b98db0aSStefan Hajnoczi * 20165b98db0aSStefan Hajnoczi * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState 20175b98db0aSStefan Hajnoczi * AioContext. 20185b98db0aSStefan Hajnoczi */ 20195b98db0aSStefan Hajnoczi void bdrv_drain(BlockDriverState *bs) 20205b98db0aSStefan Hajnoczi { 20215b98db0aSStefan Hajnoczi while (bdrv_drain_one(bs)) { 20225b98db0aSStefan Hajnoczi /* Keep iterating */ 20235b98db0aSStefan Hajnoczi } 20245b98db0aSStefan Hajnoczi } 20255b98db0aSStefan Hajnoczi 2026922453bcSStefan Hajnoczi /* 2027922453bcSStefan Hajnoczi * Wait for pending requests to complete across all BlockDriverStates 2028922453bcSStefan Hajnoczi * 2029922453bcSStefan Hajnoczi * This function does not flush data to disk, use bdrv_flush_all() for that 2030922453bcSStefan Hajnoczi * after calling this function. 20314c355d53SZhi Yong Wu * 20324c355d53SZhi Yong Wu * Note that completion of an asynchronous I/O operation can trigger any 20334c355d53SZhi Yong Wu * number of other I/O operations on other devices---for example a coroutine 20344c355d53SZhi Yong Wu * can be arbitrarily complex and a constant flow of I/O can come until the 20354c355d53SZhi Yong Wu * coroutine is complete. Because of this, it is not possible to have a 20364c355d53SZhi Yong Wu * function to drain a single device's I/O queue. 2037922453bcSStefan Hajnoczi */ 2038922453bcSStefan Hajnoczi void bdrv_drain_all(void) 2039922453bcSStefan Hajnoczi { 204088266f5aSStefan Hajnoczi /* Always run first iteration so any pending completion BHs run */ 204188266f5aSStefan Hajnoczi bool busy = true; 2042922453bcSStefan Hajnoczi BlockDriverState *bs; 2043922453bcSStefan Hajnoczi 204469da3b0bSFam Zheng QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 204569da3b0bSFam Zheng AioContext *aio_context = bdrv_get_aio_context(bs); 204669da3b0bSFam Zheng 204769da3b0bSFam Zheng aio_context_acquire(aio_context); 204869da3b0bSFam Zheng if (bs->job) { 204969da3b0bSFam Zheng block_job_pause(bs->job); 205069da3b0bSFam Zheng } 205169da3b0bSFam Zheng aio_context_release(aio_context); 205269da3b0bSFam Zheng } 205369da3b0bSFam Zheng 205488266f5aSStefan Hajnoczi while (busy) { 20559b536adcSStefan Hajnoczi busy = false; 2056922453bcSStefan Hajnoczi 20579b536adcSStefan Hajnoczi QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 20589b536adcSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 20599b536adcSStefan Hajnoczi 20609b536adcSStefan Hajnoczi aio_context_acquire(aio_context); 20615b98db0aSStefan Hajnoczi busy |= bdrv_drain_one(bs); 20629b536adcSStefan Hajnoczi aio_context_release(aio_context); 20639b536adcSStefan Hajnoczi } 2064922453bcSStefan Hajnoczi } 206569da3b0bSFam Zheng 206669da3b0bSFam Zheng QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 206769da3b0bSFam Zheng AioContext *aio_context = bdrv_get_aio_context(bs); 206869da3b0bSFam Zheng 206969da3b0bSFam Zheng aio_context_acquire(aio_context); 207069da3b0bSFam Zheng if (bs->job) { 207169da3b0bSFam Zheng block_job_resume(bs->job); 207269da3b0bSFam Zheng } 207369da3b0bSFam Zheng aio_context_release(aio_context); 207469da3b0bSFam Zheng } 2075922453bcSStefan Hajnoczi } 2076922453bcSStefan Hajnoczi 2077dc364f4cSBenoît Canet /* make a BlockDriverState anonymous by removing from bdrv_state and 2078dc364f4cSBenoît Canet * graph_bdrv_state list. 2079d22b2f41SRyan Harper Also, NULL terminate the device_name to prevent double remove */ 2080d22b2f41SRyan Harper void bdrv_make_anon(BlockDriverState *bs) 2081d22b2f41SRyan Harper { 2082bfb197e0SMarkus Armbruster /* 2083bfb197e0SMarkus Armbruster * Take care to remove bs from bdrv_states only when it's actually 2084bfb197e0SMarkus Armbruster * in it. Note that bs->device_list.tqe_prev is initially null, 2085bfb197e0SMarkus Armbruster * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish 2086bfb197e0SMarkus Armbruster * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by 2087bfb197e0SMarkus Armbruster * resetting it to null on remove. 2088bfb197e0SMarkus Armbruster */ 2089bfb197e0SMarkus Armbruster if (bs->device_list.tqe_prev) { 2090dc364f4cSBenoît Canet QTAILQ_REMOVE(&bdrv_states, bs, device_list); 2091bfb197e0SMarkus Armbruster bs->device_list.tqe_prev = NULL; 2092d22b2f41SRyan Harper } 2093dc364f4cSBenoît Canet if (bs->node_name[0] != '\0') { 2094dc364f4cSBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 2095dc364f4cSBenoît Canet } 2096dc364f4cSBenoît Canet bs->node_name[0] = '\0'; 2097d22b2f41SRyan Harper } 2098d22b2f41SRyan Harper 2099e023b2e2SPaolo Bonzini static void bdrv_rebind(BlockDriverState *bs) 2100e023b2e2SPaolo Bonzini { 2101e023b2e2SPaolo Bonzini if (bs->drv && bs->drv->bdrv_rebind) { 2102e023b2e2SPaolo Bonzini bs->drv->bdrv_rebind(bs); 2103e023b2e2SPaolo Bonzini } 2104e023b2e2SPaolo Bonzini } 2105e023b2e2SPaolo Bonzini 21064ddc07caSPaolo Bonzini static void bdrv_move_feature_fields(BlockDriverState *bs_dest, 21074ddc07caSPaolo Bonzini BlockDriverState *bs_src) 21084ddc07caSPaolo Bonzini { 21094ddc07caSPaolo Bonzini /* move some fields that need to stay attached to the device */ 21104ddc07caSPaolo Bonzini 21114ddc07caSPaolo Bonzini /* dev info */ 21121b7fd729SPaolo Bonzini bs_dest->guest_block_size = bs_src->guest_block_size; 21134ddc07caSPaolo Bonzini bs_dest->copy_on_read = bs_src->copy_on_read; 21144ddc07caSPaolo Bonzini 21154ddc07caSPaolo Bonzini bs_dest->enable_write_cache = bs_src->enable_write_cache; 21164ddc07caSPaolo Bonzini 2117cc0681c4SBenoît Canet /* i/o throttled req */ 2118cc0681c4SBenoît Canet memcpy(&bs_dest->throttle_state, 2119cc0681c4SBenoît Canet &bs_src->throttle_state, 2120cc0681c4SBenoît Canet sizeof(ThrottleState)); 2121cc0681c4SBenoît Canet bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; 2122cc0681c4SBenoît Canet bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; 21234ddc07caSPaolo Bonzini bs_dest->io_limits_enabled = bs_src->io_limits_enabled; 21244ddc07caSPaolo Bonzini 21254ddc07caSPaolo Bonzini /* r/w error */ 21264ddc07caSPaolo Bonzini bs_dest->on_read_error = bs_src->on_read_error; 21274ddc07caSPaolo Bonzini bs_dest->on_write_error = bs_src->on_write_error; 21284ddc07caSPaolo Bonzini 21294ddc07caSPaolo Bonzini /* i/o status */ 21304ddc07caSPaolo Bonzini bs_dest->iostatus_enabled = bs_src->iostatus_enabled; 21314ddc07caSPaolo Bonzini bs_dest->iostatus = bs_src->iostatus; 21324ddc07caSPaolo Bonzini 21334ddc07caSPaolo Bonzini /* dirty bitmap */ 2134e4654d2dSFam Zheng bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps; 21354ddc07caSPaolo Bonzini 21369fcb0251SFam Zheng /* reference count */ 21379fcb0251SFam Zheng bs_dest->refcnt = bs_src->refcnt; 21389fcb0251SFam Zheng 21394ddc07caSPaolo Bonzini /* job */ 21404ddc07caSPaolo Bonzini bs_dest->job = bs_src->job; 21414ddc07caSPaolo Bonzini 21424ddc07caSPaolo Bonzini /* keep the same entry in bdrv_states */ 2143dc364f4cSBenoît Canet bs_dest->device_list = bs_src->device_list; 21447e7d56d9SMarkus Armbruster bs_dest->blk = bs_src->blk; 21457e7d56d9SMarkus Armbruster 2146fbe40ff7SFam Zheng memcpy(bs_dest->op_blockers, bs_src->op_blockers, 2147fbe40ff7SFam Zheng sizeof(bs_dest->op_blockers)); 21484ddc07caSPaolo Bonzini } 21494ddc07caSPaolo Bonzini 21504ddc07caSPaolo Bonzini /* 21514ddc07caSPaolo Bonzini * Swap bs contents for two image chains while they are live, 21524ddc07caSPaolo Bonzini * while keeping required fields on the BlockDriverState that is 21534ddc07caSPaolo Bonzini * actually attached to a device. 21544ddc07caSPaolo Bonzini * 21554ddc07caSPaolo Bonzini * This will modify the BlockDriverState fields, and swap contents 21564ddc07caSPaolo Bonzini * between bs_new and bs_old. Both bs_new and bs_old are modified. 21574ddc07caSPaolo Bonzini * 2158bfb197e0SMarkus Armbruster * bs_new must not be attached to a BlockBackend. 21594ddc07caSPaolo Bonzini * 21604ddc07caSPaolo Bonzini * This function does not create any image files. 21614ddc07caSPaolo Bonzini */ 21624ddc07caSPaolo Bonzini void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) 21634ddc07caSPaolo Bonzini { 21644ddc07caSPaolo Bonzini BlockDriverState tmp; 21654ddc07caSPaolo Bonzini 216690ce8a06SBenoît Canet /* The code needs to swap the node_name but simply swapping node_list won't 216790ce8a06SBenoît Canet * work so first remove the nodes from the graph list, do the swap then 216890ce8a06SBenoît Canet * insert them back if needed. 216990ce8a06SBenoît Canet */ 217090ce8a06SBenoît Canet if (bs_new->node_name[0] != '\0') { 217190ce8a06SBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list); 217290ce8a06SBenoît Canet } 217390ce8a06SBenoît Canet if (bs_old->node_name[0] != '\0') { 217490ce8a06SBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list); 217590ce8a06SBenoît Canet } 217690ce8a06SBenoît Canet 2177bfb197e0SMarkus Armbruster /* bs_new must be unattached and shouldn't have anything fancy enabled */ 21787e7d56d9SMarkus Armbruster assert(!bs_new->blk); 2179e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); 21804ddc07caSPaolo Bonzini assert(bs_new->job == NULL); 21814ddc07caSPaolo Bonzini assert(bs_new->io_limits_enabled == false); 2182cc0681c4SBenoît Canet assert(!throttle_have_timer(&bs_new->throttle_state)); 21834ddc07caSPaolo Bonzini 21844ddc07caSPaolo Bonzini tmp = *bs_new; 21854ddc07caSPaolo Bonzini *bs_new = *bs_old; 21864ddc07caSPaolo Bonzini *bs_old = tmp; 21874ddc07caSPaolo Bonzini 21884ddc07caSPaolo Bonzini /* there are some fields that should not be swapped, move them back */ 21894ddc07caSPaolo Bonzini bdrv_move_feature_fields(&tmp, bs_old); 21904ddc07caSPaolo Bonzini bdrv_move_feature_fields(bs_old, bs_new); 21914ddc07caSPaolo Bonzini bdrv_move_feature_fields(bs_new, &tmp); 21924ddc07caSPaolo Bonzini 2193bfb197e0SMarkus Armbruster /* bs_new must remain unattached */ 21947e7d56d9SMarkus Armbruster assert(!bs_new->blk); 21954ddc07caSPaolo Bonzini 21964ddc07caSPaolo Bonzini /* Check a few fields that should remain attached to the device */ 21974ddc07caSPaolo Bonzini assert(bs_new->job == NULL); 21984ddc07caSPaolo Bonzini assert(bs_new->io_limits_enabled == false); 2199cc0681c4SBenoît Canet assert(!throttle_have_timer(&bs_new->throttle_state)); 22004ddc07caSPaolo Bonzini 220190ce8a06SBenoît Canet /* insert the nodes back into the graph node list if needed */ 220290ce8a06SBenoît Canet if (bs_new->node_name[0] != '\0') { 220390ce8a06SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list); 220490ce8a06SBenoît Canet } 220590ce8a06SBenoît Canet if (bs_old->node_name[0] != '\0') { 220690ce8a06SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); 220790ce8a06SBenoît Canet } 220890ce8a06SBenoît Canet 22094ddc07caSPaolo Bonzini bdrv_rebind(bs_new); 22104ddc07caSPaolo Bonzini bdrv_rebind(bs_old); 22114ddc07caSPaolo Bonzini } 22124ddc07caSPaolo Bonzini 22138802d1fdSJeff Cody /* 22148802d1fdSJeff Cody * Add new bs contents at the top of an image chain while the chain is 22158802d1fdSJeff Cody * live, while keeping required fields on the top layer. 22168802d1fdSJeff Cody * 22178802d1fdSJeff Cody * This will modify the BlockDriverState fields, and swap contents 22188802d1fdSJeff Cody * between bs_new and bs_top. Both bs_new and bs_top are modified. 22198802d1fdSJeff Cody * 2220bfb197e0SMarkus Armbruster * bs_new must not be attached to a BlockBackend. 2221f6801b83SJeff Cody * 22228802d1fdSJeff Cody * This function does not create any image files. 22238802d1fdSJeff Cody */ 22248802d1fdSJeff Cody void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) 22258802d1fdSJeff Cody { 22264ddc07caSPaolo Bonzini bdrv_swap(bs_new, bs_top); 22278802d1fdSJeff Cody 22288802d1fdSJeff Cody /* The contents of 'tmp' will become bs_top, as we are 22298802d1fdSJeff Cody * swapping bs_new and bs_top contents. */ 22308d24cce1SFam Zheng bdrv_set_backing_hd(bs_top, bs_new); 22318802d1fdSJeff Cody } 22328802d1fdSJeff Cody 22334f6fd349SFam Zheng static void bdrv_delete(BlockDriverState *bs) 2234b338082bSbellard { 22353e914655SPaolo Bonzini assert(!bs->job); 22363718d8abSFam Zheng assert(bdrv_op_blocker_is_empty(bs)); 22374f6fd349SFam Zheng assert(!bs->refcnt); 2238e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 223918846deeSMarkus Armbruster 2240e1b5c52eSStefan Hajnoczi bdrv_close(bs); 2241e1b5c52eSStefan Hajnoczi 22421b7bdbc1SStefan Hajnoczi /* remove from list, if necessary */ 2243d22b2f41SRyan Harper bdrv_make_anon(bs); 224434c6f050Saurel32 22457267c094SAnthony Liguori g_free(bs); 2246fc01f7e7Sbellard } 2247fc01f7e7Sbellard 2248e97fc193Saliguori /* 2249e97fc193Saliguori * Run consistency checks on an image 2250e97fc193Saliguori * 2251e076f338SKevin Wolf * Returns 0 if the check could be completed (it doesn't mean that the image is 2252a1c7273bSStefan Weil * free of errors) or -errno when an internal error occurred. The results of the 2253e076f338SKevin Wolf * check are stored in res. 2254e97fc193Saliguori */ 22554534ff54SKevin Wolf int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 2256e97fc193Saliguori { 2257908bcd54SMax Reitz if (bs->drv == NULL) { 2258908bcd54SMax Reitz return -ENOMEDIUM; 2259908bcd54SMax Reitz } 2260e97fc193Saliguori if (bs->drv->bdrv_check == NULL) { 2261e97fc193Saliguori return -ENOTSUP; 2262e97fc193Saliguori } 2263e97fc193Saliguori 2264e076f338SKevin Wolf memset(res, 0, sizeof(*res)); 22654534ff54SKevin Wolf return bs->drv->bdrv_check(bs, res, fix); 2266e97fc193Saliguori } 2267e97fc193Saliguori 22688a426614SKevin Wolf #define COMMIT_BUF_SECTORS 2048 22698a426614SKevin Wolf 227033e3963eSbellard /* commit COW file into the raw image */ 227133e3963eSbellard int bdrv_commit(BlockDriverState *bs) 227233e3963eSbellard { 227319cb3738Sbellard BlockDriver *drv = bs->drv; 227472706ea4SJeff Cody int64_t sector, total_sectors, length, backing_length; 22758a426614SKevin Wolf int n, ro, open_flags; 22760bce597dSJeff Cody int ret = 0; 227772706ea4SJeff Cody uint8_t *buf = NULL; 227833e3963eSbellard 227919cb3738Sbellard if (!drv) 228019cb3738Sbellard return -ENOMEDIUM; 228133e3963eSbellard 22824dca4b63SNaphtali Sprei if (!bs->backing_hd) { 22834dca4b63SNaphtali Sprei return -ENOTSUP; 22844dca4b63SNaphtali Sprei } 22854dca4b63SNaphtali Sprei 2286bb00021dSFam Zheng if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || 2287bb00021dSFam Zheng bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { 22882d3735d3SStefan Hajnoczi return -EBUSY; 22892d3735d3SStefan Hajnoczi } 22902d3735d3SStefan Hajnoczi 22914dca4b63SNaphtali Sprei ro = bs->backing_hd->read_only; 22924dca4b63SNaphtali Sprei open_flags = bs->backing_hd->open_flags; 22934dca4b63SNaphtali Sprei 22944dca4b63SNaphtali Sprei if (ro) { 22950bce597dSJeff Cody if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) { 22960bce597dSJeff Cody return -EACCES; 22974dca4b63SNaphtali Sprei } 2298ea2384d3Sbellard } 2299ea2384d3Sbellard 230072706ea4SJeff Cody length = bdrv_getlength(bs); 230172706ea4SJeff Cody if (length < 0) { 230272706ea4SJeff Cody ret = length; 230372706ea4SJeff Cody goto ro_cleanup; 230472706ea4SJeff Cody } 230572706ea4SJeff Cody 230672706ea4SJeff Cody backing_length = bdrv_getlength(bs->backing_hd); 230772706ea4SJeff Cody if (backing_length < 0) { 230872706ea4SJeff Cody ret = backing_length; 230972706ea4SJeff Cody goto ro_cleanup; 231072706ea4SJeff Cody } 231172706ea4SJeff Cody 231272706ea4SJeff Cody /* If our top snapshot is larger than the backing file image, 231372706ea4SJeff Cody * grow the backing file image if possible. If not possible, 231472706ea4SJeff Cody * we must return an error */ 231572706ea4SJeff Cody if (length > backing_length) { 231672706ea4SJeff Cody ret = bdrv_truncate(bs->backing_hd, length); 231772706ea4SJeff Cody if (ret < 0) { 231872706ea4SJeff Cody goto ro_cleanup; 231972706ea4SJeff Cody } 232072706ea4SJeff Cody } 232172706ea4SJeff Cody 232272706ea4SJeff Cody total_sectors = length >> BDRV_SECTOR_BITS; 2323857d4f46SKevin Wolf 2324857d4f46SKevin Wolf /* qemu_try_blockalign() for bs will choose an alignment that works for 2325857d4f46SKevin Wolf * bs->backing_hd as well, so no need to compare the alignment manually. */ 2326857d4f46SKevin Wolf buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 2327857d4f46SKevin Wolf if (buf == NULL) { 2328857d4f46SKevin Wolf ret = -ENOMEM; 2329857d4f46SKevin Wolf goto ro_cleanup; 2330857d4f46SKevin Wolf } 23318a426614SKevin Wolf 23328a426614SKevin Wolf for (sector = 0; sector < total_sectors; sector += n) { 2333d663640cSPaolo Bonzini ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); 2334d663640cSPaolo Bonzini if (ret < 0) { 2335d663640cSPaolo Bonzini goto ro_cleanup; 2336d663640cSPaolo Bonzini } 2337d663640cSPaolo Bonzini if (ret) { 2338dabfa6ccSKevin Wolf ret = bdrv_read(bs, sector, buf, n); 2339dabfa6ccSKevin Wolf if (ret < 0) { 23404dca4b63SNaphtali Sprei goto ro_cleanup; 234133e3963eSbellard } 234233e3963eSbellard 2343dabfa6ccSKevin Wolf ret = bdrv_write(bs->backing_hd, sector, buf, n); 2344dabfa6ccSKevin Wolf if (ret < 0) { 23454dca4b63SNaphtali Sprei goto ro_cleanup; 234633e3963eSbellard } 234733e3963eSbellard } 234833e3963eSbellard } 234995389c86Sbellard 23501d44952fSChristoph Hellwig if (drv->bdrv_make_empty) { 23511d44952fSChristoph Hellwig ret = drv->bdrv_make_empty(bs); 2352dabfa6ccSKevin Wolf if (ret < 0) { 2353dabfa6ccSKevin Wolf goto ro_cleanup; 2354dabfa6ccSKevin Wolf } 23551d44952fSChristoph Hellwig bdrv_flush(bs); 23561d44952fSChristoph Hellwig } 235795389c86Sbellard 23583f5075aeSChristoph Hellwig /* 23593f5075aeSChristoph Hellwig * Make sure all data we wrote to the backing device is actually 23603f5075aeSChristoph Hellwig * stable on disk. 23613f5075aeSChristoph Hellwig */ 2362dabfa6ccSKevin Wolf if (bs->backing_hd) { 23633f5075aeSChristoph Hellwig bdrv_flush(bs->backing_hd); 2364dabfa6ccSKevin Wolf } 23654dca4b63SNaphtali Sprei 2366dabfa6ccSKevin Wolf ret = 0; 23674dca4b63SNaphtali Sprei ro_cleanup: 2368857d4f46SKevin Wolf qemu_vfree(buf); 23694dca4b63SNaphtali Sprei 23704dca4b63SNaphtali Sprei if (ro) { 23710bce597dSJeff Cody /* ignoring error return here */ 23720bce597dSJeff Cody bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL); 23734dca4b63SNaphtali Sprei } 23744dca4b63SNaphtali Sprei 23751d44952fSChristoph Hellwig return ret; 237633e3963eSbellard } 237733e3963eSbellard 2378e8877497SStefan Hajnoczi int bdrv_commit_all(void) 23796ab4b5abSMarkus Armbruster { 23806ab4b5abSMarkus Armbruster BlockDriverState *bs; 23816ab4b5abSMarkus Armbruster 2382dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 2383ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 2384ed78cda3SStefan Hajnoczi 2385ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 2386272d2d8eSJeff Cody if (bs->drv && bs->backing_hd) { 2387e8877497SStefan Hajnoczi int ret = bdrv_commit(bs); 2388e8877497SStefan Hajnoczi if (ret < 0) { 2389ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 2390e8877497SStefan Hajnoczi return ret; 23916ab4b5abSMarkus Armbruster } 23926ab4b5abSMarkus Armbruster } 2393ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 2394272d2d8eSJeff Cody } 2395e8877497SStefan Hajnoczi return 0; 2396e8877497SStefan Hajnoczi } 23976ab4b5abSMarkus Armbruster 2398dbffbdcfSStefan Hajnoczi /** 2399dbffbdcfSStefan Hajnoczi * Remove an active request from the tracked requests list 2400dbffbdcfSStefan Hajnoczi * 2401dbffbdcfSStefan Hajnoczi * This function should be called when a tracked request is completing. 2402dbffbdcfSStefan Hajnoczi */ 2403dbffbdcfSStefan Hajnoczi static void tracked_request_end(BdrvTrackedRequest *req) 2404dbffbdcfSStefan Hajnoczi { 24052dbafdc0SKevin Wolf if (req->serialising) { 24062dbafdc0SKevin Wolf req->bs->serialising_in_flight--; 24072dbafdc0SKevin Wolf } 24082dbafdc0SKevin Wolf 2409dbffbdcfSStefan Hajnoczi QLIST_REMOVE(req, list); 2410f4658285SStefan Hajnoczi qemu_co_queue_restart_all(&req->wait_queue); 2411dbffbdcfSStefan Hajnoczi } 2412dbffbdcfSStefan Hajnoczi 2413dbffbdcfSStefan Hajnoczi /** 2414dbffbdcfSStefan Hajnoczi * Add an active request to the tracked requests list 2415dbffbdcfSStefan Hajnoczi */ 2416dbffbdcfSStefan Hajnoczi static void tracked_request_begin(BdrvTrackedRequest *req, 2417dbffbdcfSStefan Hajnoczi BlockDriverState *bs, 2418793ed47aSKevin Wolf int64_t offset, 2419793ed47aSKevin Wolf unsigned int bytes, bool is_write) 2420dbffbdcfSStefan Hajnoczi { 2421dbffbdcfSStefan Hajnoczi *req = (BdrvTrackedRequest){ 2422dbffbdcfSStefan Hajnoczi .bs = bs, 2423793ed47aSKevin Wolf .offset = offset, 2424793ed47aSKevin Wolf .bytes = bytes, 2425dbffbdcfSStefan Hajnoczi .is_write = is_write, 24265f8b6491SStefan Hajnoczi .co = qemu_coroutine_self(), 24272dbafdc0SKevin Wolf .serialising = false, 24287327145fSKevin Wolf .overlap_offset = offset, 24297327145fSKevin Wolf .overlap_bytes = bytes, 2430dbffbdcfSStefan Hajnoczi }; 2431dbffbdcfSStefan Hajnoczi 2432f4658285SStefan Hajnoczi qemu_co_queue_init(&req->wait_queue); 2433f4658285SStefan Hajnoczi 2434dbffbdcfSStefan Hajnoczi QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); 2435dbffbdcfSStefan Hajnoczi } 2436dbffbdcfSStefan Hajnoczi 2437e96126ffSKevin Wolf static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) 24382dbafdc0SKevin Wolf { 24397327145fSKevin Wolf int64_t overlap_offset = req->offset & ~(align - 1); 2440e96126ffSKevin Wolf unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align) 24417327145fSKevin Wolf - overlap_offset; 24427327145fSKevin Wolf 24432dbafdc0SKevin Wolf if (!req->serialising) { 24442dbafdc0SKevin Wolf req->bs->serialising_in_flight++; 24452dbafdc0SKevin Wolf req->serialising = true; 24462dbafdc0SKevin Wolf } 24477327145fSKevin Wolf 24487327145fSKevin Wolf req->overlap_offset = MIN(req->overlap_offset, overlap_offset); 24497327145fSKevin Wolf req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes); 24502dbafdc0SKevin Wolf } 24512dbafdc0SKevin Wolf 2452d83947acSStefan Hajnoczi /** 2453d83947acSStefan Hajnoczi * Round a region to cluster boundaries 2454d83947acSStefan Hajnoczi */ 2455343bded4SPaolo Bonzini void bdrv_round_to_clusters(BlockDriverState *bs, 2456d83947acSStefan Hajnoczi int64_t sector_num, int nb_sectors, 2457d83947acSStefan Hajnoczi int64_t *cluster_sector_num, 2458d83947acSStefan Hajnoczi int *cluster_nb_sectors) 2459d83947acSStefan Hajnoczi { 2460d83947acSStefan Hajnoczi BlockDriverInfo bdi; 2461d83947acSStefan Hajnoczi 2462d83947acSStefan Hajnoczi if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { 2463d83947acSStefan Hajnoczi *cluster_sector_num = sector_num; 2464d83947acSStefan Hajnoczi *cluster_nb_sectors = nb_sectors; 2465d83947acSStefan Hajnoczi } else { 2466d83947acSStefan Hajnoczi int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE; 2467d83947acSStefan Hajnoczi *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c); 2468d83947acSStefan Hajnoczi *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num + 2469d83947acSStefan Hajnoczi nb_sectors, c); 2470d83947acSStefan Hajnoczi } 2471d83947acSStefan Hajnoczi } 2472d83947acSStefan Hajnoczi 24737327145fSKevin Wolf static int bdrv_get_cluster_size(BlockDriverState *bs) 2474793ed47aSKevin Wolf { 2475793ed47aSKevin Wolf BlockDriverInfo bdi; 24767327145fSKevin Wolf int ret; 2477793ed47aSKevin Wolf 24787327145fSKevin Wolf ret = bdrv_get_info(bs, &bdi); 24797327145fSKevin Wolf if (ret < 0 || bdi.cluster_size == 0) { 24807327145fSKevin Wolf return bs->request_alignment; 2481793ed47aSKevin Wolf } else { 24827327145fSKevin Wolf return bdi.cluster_size; 2483793ed47aSKevin Wolf } 2484793ed47aSKevin Wolf } 2485793ed47aSKevin Wolf 2486f4658285SStefan Hajnoczi static bool tracked_request_overlaps(BdrvTrackedRequest *req, 2487793ed47aSKevin Wolf int64_t offset, unsigned int bytes) 2488793ed47aSKevin Wolf { 2489d83947acSStefan Hajnoczi /* aaaa bbbb */ 24907327145fSKevin Wolf if (offset >= req->overlap_offset + req->overlap_bytes) { 2491d83947acSStefan Hajnoczi return false; 2492d83947acSStefan Hajnoczi } 2493d83947acSStefan Hajnoczi /* bbbb aaaa */ 24947327145fSKevin Wolf if (req->overlap_offset >= offset + bytes) { 2495d83947acSStefan Hajnoczi return false; 2496d83947acSStefan Hajnoczi } 2497d83947acSStefan Hajnoczi return true; 2498f4658285SStefan Hajnoczi } 2499f4658285SStefan Hajnoczi 250028de2dcdSKevin Wolf static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) 2501f4658285SStefan Hajnoczi { 25022dbafdc0SKevin Wolf BlockDriverState *bs = self->bs; 2503f4658285SStefan Hajnoczi BdrvTrackedRequest *req; 2504f4658285SStefan Hajnoczi bool retry; 250528de2dcdSKevin Wolf bool waited = false; 2506f4658285SStefan Hajnoczi 25072dbafdc0SKevin Wolf if (!bs->serialising_in_flight) { 250828de2dcdSKevin Wolf return false; 25092dbafdc0SKevin Wolf } 25102dbafdc0SKevin Wolf 2511f4658285SStefan Hajnoczi do { 2512f4658285SStefan Hajnoczi retry = false; 2513f4658285SStefan Hajnoczi QLIST_FOREACH(req, &bs->tracked_requests, list) { 25142dbafdc0SKevin Wolf if (req == self || (!req->serialising && !self->serialising)) { 251565afd211SKevin Wolf continue; 251665afd211SKevin Wolf } 25177327145fSKevin Wolf if (tracked_request_overlaps(req, self->overlap_offset, 25187327145fSKevin Wolf self->overlap_bytes)) 25197327145fSKevin Wolf { 25205f8b6491SStefan Hajnoczi /* Hitting this means there was a reentrant request, for 25215f8b6491SStefan Hajnoczi * example, a block driver issuing nested requests. This must 25225f8b6491SStefan Hajnoczi * never happen since it means deadlock. 25235f8b6491SStefan Hajnoczi */ 25245f8b6491SStefan Hajnoczi assert(qemu_coroutine_self() != req->co); 25255f8b6491SStefan Hajnoczi 25266460440fSKevin Wolf /* If the request is already (indirectly) waiting for us, or 25276460440fSKevin Wolf * will wait for us as soon as it wakes up, then just go on 25286460440fSKevin Wolf * (instead of producing a deadlock in the former case). */ 25296460440fSKevin Wolf if (!req->waiting_for) { 25306460440fSKevin Wolf self->waiting_for = req; 2531f4658285SStefan Hajnoczi qemu_co_queue_wait(&req->wait_queue); 25326460440fSKevin Wolf self->waiting_for = NULL; 2533f4658285SStefan Hajnoczi retry = true; 253428de2dcdSKevin Wolf waited = true; 2535f4658285SStefan Hajnoczi break; 2536f4658285SStefan Hajnoczi } 2537f4658285SStefan Hajnoczi } 25386460440fSKevin Wolf } 2539f4658285SStefan Hajnoczi } while (retry); 254028de2dcdSKevin Wolf 254128de2dcdSKevin Wolf return waited; 2542f4658285SStefan Hajnoczi } 2543f4658285SStefan Hajnoczi 2544756e6736SKevin Wolf /* 2545756e6736SKevin Wolf * Return values: 2546756e6736SKevin Wolf * 0 - success 2547756e6736SKevin Wolf * -EINVAL - backing format specified, but no file 2548756e6736SKevin Wolf * -ENOSPC - can't update the backing file because no space is left in the 2549756e6736SKevin Wolf * image file header 2550756e6736SKevin Wolf * -ENOTSUP - format driver doesn't support changing the backing file 2551756e6736SKevin Wolf */ 2552756e6736SKevin Wolf int bdrv_change_backing_file(BlockDriverState *bs, 2553756e6736SKevin Wolf const char *backing_file, const char *backing_fmt) 2554756e6736SKevin Wolf { 2555756e6736SKevin Wolf BlockDriver *drv = bs->drv; 2556469ef350SPaolo Bonzini int ret; 2557756e6736SKevin Wolf 25585f377794SPaolo Bonzini /* Backing file format doesn't make sense without a backing file */ 25595f377794SPaolo Bonzini if (backing_fmt && !backing_file) { 25605f377794SPaolo Bonzini return -EINVAL; 25615f377794SPaolo Bonzini } 25625f377794SPaolo Bonzini 2563756e6736SKevin Wolf if (drv->bdrv_change_backing_file != NULL) { 2564469ef350SPaolo Bonzini ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 2565756e6736SKevin Wolf } else { 2566469ef350SPaolo Bonzini ret = -ENOTSUP; 2567756e6736SKevin Wolf } 2568469ef350SPaolo Bonzini 2569469ef350SPaolo Bonzini if (ret == 0) { 2570469ef350SPaolo Bonzini pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2571469ef350SPaolo Bonzini pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2572469ef350SPaolo Bonzini } 2573469ef350SPaolo Bonzini return ret; 2574756e6736SKevin Wolf } 2575756e6736SKevin Wolf 25766ebdcee2SJeff Cody /* 25776ebdcee2SJeff Cody * Finds the image layer in the chain that has 'bs' as its backing file. 25786ebdcee2SJeff Cody * 25796ebdcee2SJeff Cody * active is the current topmost image. 25806ebdcee2SJeff Cody * 25816ebdcee2SJeff Cody * Returns NULL if bs is not found in active's image chain, 25826ebdcee2SJeff Cody * or if active == bs. 25834caf0fcdSJeff Cody * 25844caf0fcdSJeff Cody * Returns the bottommost base image if bs == NULL. 25856ebdcee2SJeff Cody */ 25866ebdcee2SJeff Cody BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 25876ebdcee2SJeff Cody BlockDriverState *bs) 25886ebdcee2SJeff Cody { 25894caf0fcdSJeff Cody while (active && bs != active->backing_hd) { 25904caf0fcdSJeff Cody active = active->backing_hd; 25916ebdcee2SJeff Cody } 25926ebdcee2SJeff Cody 25934caf0fcdSJeff Cody return active; 25946ebdcee2SJeff Cody } 25956ebdcee2SJeff Cody 25964caf0fcdSJeff Cody /* Given a BDS, searches for the base layer. */ 25974caf0fcdSJeff Cody BlockDriverState *bdrv_find_base(BlockDriverState *bs) 25984caf0fcdSJeff Cody { 25994caf0fcdSJeff Cody return bdrv_find_overlay(bs, NULL); 26006ebdcee2SJeff Cody } 26016ebdcee2SJeff Cody 26026ebdcee2SJeff Cody typedef struct BlkIntermediateStates { 26036ebdcee2SJeff Cody BlockDriverState *bs; 26046ebdcee2SJeff Cody QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; 26056ebdcee2SJeff Cody } BlkIntermediateStates; 26066ebdcee2SJeff Cody 26076ebdcee2SJeff Cody 26086ebdcee2SJeff Cody /* 26096ebdcee2SJeff Cody * Drops images above 'base' up to and including 'top', and sets the image 26106ebdcee2SJeff Cody * above 'top' to have base as its backing file. 26116ebdcee2SJeff Cody * 26126ebdcee2SJeff Cody * Requires that the overlay to 'top' is opened r/w, so that the backing file 26136ebdcee2SJeff Cody * information in 'bs' can be properly updated. 26146ebdcee2SJeff Cody * 26156ebdcee2SJeff Cody * E.g., this will convert the following chain: 26166ebdcee2SJeff Cody * bottom <- base <- intermediate <- top <- active 26176ebdcee2SJeff Cody * 26186ebdcee2SJeff Cody * to 26196ebdcee2SJeff Cody * 26206ebdcee2SJeff Cody * bottom <- base <- active 26216ebdcee2SJeff Cody * 26226ebdcee2SJeff Cody * It is allowed for bottom==base, in which case it converts: 26236ebdcee2SJeff Cody * 26246ebdcee2SJeff Cody * base <- intermediate <- top <- active 26256ebdcee2SJeff Cody * 26266ebdcee2SJeff Cody * to 26276ebdcee2SJeff Cody * 26286ebdcee2SJeff Cody * base <- active 26296ebdcee2SJeff Cody * 263054e26900SJeff Cody * If backing_file_str is non-NULL, it will be used when modifying top's 263154e26900SJeff Cody * overlay image metadata. 263254e26900SJeff Cody * 26336ebdcee2SJeff Cody * Error conditions: 26346ebdcee2SJeff Cody * if active == top, that is considered an error 26356ebdcee2SJeff Cody * 26366ebdcee2SJeff Cody */ 26376ebdcee2SJeff Cody int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, 263854e26900SJeff Cody BlockDriverState *base, const char *backing_file_str) 26396ebdcee2SJeff Cody { 26406ebdcee2SJeff Cody BlockDriverState *intermediate; 26416ebdcee2SJeff Cody BlockDriverState *base_bs = NULL; 26426ebdcee2SJeff Cody BlockDriverState *new_top_bs = NULL; 26436ebdcee2SJeff Cody BlkIntermediateStates *intermediate_state, *next; 26446ebdcee2SJeff Cody int ret = -EIO; 26456ebdcee2SJeff Cody 26466ebdcee2SJeff Cody QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; 26476ebdcee2SJeff Cody QSIMPLEQ_INIT(&states_to_delete); 26486ebdcee2SJeff Cody 26496ebdcee2SJeff Cody if (!top->drv || !base->drv) { 26506ebdcee2SJeff Cody goto exit; 26516ebdcee2SJeff Cody } 26526ebdcee2SJeff Cody 26536ebdcee2SJeff Cody new_top_bs = bdrv_find_overlay(active, top); 26546ebdcee2SJeff Cody 26556ebdcee2SJeff Cody if (new_top_bs == NULL) { 26566ebdcee2SJeff Cody /* we could not find the image above 'top', this is an error */ 26576ebdcee2SJeff Cody goto exit; 26586ebdcee2SJeff Cody } 26596ebdcee2SJeff Cody 26606ebdcee2SJeff Cody /* special case of new_top_bs->backing_hd already pointing to base - nothing 26616ebdcee2SJeff Cody * to do, no intermediate images */ 26626ebdcee2SJeff Cody if (new_top_bs->backing_hd == base) { 26636ebdcee2SJeff Cody ret = 0; 26646ebdcee2SJeff Cody goto exit; 26656ebdcee2SJeff Cody } 26666ebdcee2SJeff Cody 26676ebdcee2SJeff Cody intermediate = top; 26686ebdcee2SJeff Cody 26696ebdcee2SJeff Cody /* now we will go down through the list, and add each BDS we find 26706ebdcee2SJeff Cody * into our deletion queue, until we hit the 'base' 26716ebdcee2SJeff Cody */ 26726ebdcee2SJeff Cody while (intermediate) { 26735839e53bSMarkus Armbruster intermediate_state = g_new0(BlkIntermediateStates, 1); 26746ebdcee2SJeff Cody intermediate_state->bs = intermediate; 26756ebdcee2SJeff Cody QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); 26766ebdcee2SJeff Cody 26776ebdcee2SJeff Cody if (intermediate->backing_hd == base) { 26786ebdcee2SJeff Cody base_bs = intermediate->backing_hd; 26796ebdcee2SJeff Cody break; 26806ebdcee2SJeff Cody } 26816ebdcee2SJeff Cody intermediate = intermediate->backing_hd; 26826ebdcee2SJeff Cody } 26836ebdcee2SJeff Cody if (base_bs == NULL) { 26846ebdcee2SJeff Cody /* something went wrong, we did not end at the base. safely 26856ebdcee2SJeff Cody * unravel everything, and exit with error */ 26866ebdcee2SJeff Cody goto exit; 26876ebdcee2SJeff Cody } 26886ebdcee2SJeff Cody 26896ebdcee2SJeff Cody /* success - we can delete the intermediate states, and link top->base */ 269054e26900SJeff Cody backing_file_str = backing_file_str ? backing_file_str : base_bs->filename; 269154e26900SJeff Cody ret = bdrv_change_backing_file(new_top_bs, backing_file_str, 26926ebdcee2SJeff Cody base_bs->drv ? base_bs->drv->format_name : ""); 26936ebdcee2SJeff Cody if (ret) { 26946ebdcee2SJeff Cody goto exit; 26956ebdcee2SJeff Cody } 2696920beae1SFam Zheng bdrv_set_backing_hd(new_top_bs, base_bs); 26976ebdcee2SJeff Cody 26986ebdcee2SJeff Cody QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 26996ebdcee2SJeff Cody /* so that bdrv_close() does not recursively close the chain */ 2700920beae1SFam Zheng bdrv_set_backing_hd(intermediate_state->bs, NULL); 27014f6fd349SFam Zheng bdrv_unref(intermediate_state->bs); 27026ebdcee2SJeff Cody } 27036ebdcee2SJeff Cody ret = 0; 27046ebdcee2SJeff Cody 27056ebdcee2SJeff Cody exit: 27066ebdcee2SJeff Cody QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 27076ebdcee2SJeff Cody g_free(intermediate_state); 27086ebdcee2SJeff Cody } 27096ebdcee2SJeff Cody return ret; 27106ebdcee2SJeff Cody } 27116ebdcee2SJeff Cody 27126ebdcee2SJeff Cody 271371d0770cSaliguori static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, 271471d0770cSaliguori size_t size) 271571d0770cSaliguori { 271675af1f34SPeter Lieven if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) { 27171dd3a447SKevin Wolf return -EIO; 27181dd3a447SKevin Wolf } 27191dd3a447SKevin Wolf 2720c0191e76SMax Reitz if (!bdrv_is_inserted(bs)) { 272171d0770cSaliguori return -ENOMEDIUM; 2722c0191e76SMax Reitz } 272371d0770cSaliguori 2724c0191e76SMax Reitz if (offset < 0) { 2725fbb7b4e0SKevin Wolf return -EIO; 2726c0191e76SMax Reitz } 272771d0770cSaliguori 272871d0770cSaliguori return 0; 272971d0770cSaliguori } 273071d0770cSaliguori 273171d0770cSaliguori static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, 273271d0770cSaliguori int nb_sectors) 273371d0770cSaliguori { 273475af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 27358f4754edSKevin Wolf return -EIO; 27368f4754edSKevin Wolf } 27378f4754edSKevin Wolf 2738eb5a3165SJes Sorensen return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE, 2739eb5a3165SJes Sorensen nb_sectors * BDRV_SECTOR_SIZE); 274071d0770cSaliguori } 274171d0770cSaliguori 27421c9805a3SStefan Hajnoczi typedef struct RwCo { 27431c9805a3SStefan Hajnoczi BlockDriverState *bs; 2744775aa8b6SKevin Wolf int64_t offset; 27451c9805a3SStefan Hajnoczi QEMUIOVector *qiov; 27461c9805a3SStefan Hajnoczi bool is_write; 27471c9805a3SStefan Hajnoczi int ret; 27484105eaaaSPeter Lieven BdrvRequestFlags flags; 27491c9805a3SStefan Hajnoczi } RwCo; 27501c9805a3SStefan Hajnoczi 27511c9805a3SStefan Hajnoczi static void coroutine_fn bdrv_rw_co_entry(void *opaque) 2752fc01f7e7Sbellard { 27531c9805a3SStefan Hajnoczi RwCo *rwco = opaque; 2754fc01f7e7Sbellard 27551c9805a3SStefan Hajnoczi if (!rwco->is_write) { 2756775aa8b6SKevin Wolf rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset, 2757775aa8b6SKevin Wolf rwco->qiov->size, rwco->qiov, 27584105eaaaSPeter Lieven rwco->flags); 27591c9805a3SStefan Hajnoczi } else { 2760775aa8b6SKevin Wolf rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset, 2761775aa8b6SKevin Wolf rwco->qiov->size, rwco->qiov, 27624105eaaaSPeter Lieven rwco->flags); 27631c9805a3SStefan Hajnoczi } 27641c9805a3SStefan Hajnoczi } 2765e7a8a783SKevin Wolf 27661c9805a3SStefan Hajnoczi /* 27678d3b1a2dSKevin Wolf * Process a vectored synchronous request using coroutines 27681c9805a3SStefan Hajnoczi */ 2769775aa8b6SKevin Wolf static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, 27704105eaaaSPeter Lieven QEMUIOVector *qiov, bool is_write, 27714105eaaaSPeter Lieven BdrvRequestFlags flags) 27721c9805a3SStefan Hajnoczi { 27731c9805a3SStefan Hajnoczi Coroutine *co; 27741c9805a3SStefan Hajnoczi RwCo rwco = { 27751c9805a3SStefan Hajnoczi .bs = bs, 2776775aa8b6SKevin Wolf .offset = offset, 27778d3b1a2dSKevin Wolf .qiov = qiov, 27781c9805a3SStefan Hajnoczi .is_write = is_write, 27791c9805a3SStefan Hajnoczi .ret = NOT_DONE, 27804105eaaaSPeter Lieven .flags = flags, 27811c9805a3SStefan Hajnoczi }; 27821c9805a3SStefan Hajnoczi 2783498e386cSZhi Yong Wu /** 2784498e386cSZhi Yong Wu * In sync call context, when the vcpu is blocked, this throttling timer 2785498e386cSZhi Yong Wu * will not fire; so the I/O throttling function has to be disabled here 2786498e386cSZhi Yong Wu * if it has been enabled. 2787498e386cSZhi Yong Wu */ 2788498e386cSZhi Yong Wu if (bs->io_limits_enabled) { 2789498e386cSZhi Yong Wu fprintf(stderr, "Disabling I/O throttling on '%s' due " 2790498e386cSZhi Yong Wu "to synchronous I/O.\n", bdrv_get_device_name(bs)); 2791498e386cSZhi Yong Wu bdrv_io_limits_disable(bs); 2792498e386cSZhi Yong Wu } 2793498e386cSZhi Yong Wu 27941c9805a3SStefan Hajnoczi if (qemu_in_coroutine()) { 27951c9805a3SStefan Hajnoczi /* Fast-path if already in coroutine context */ 27961c9805a3SStefan Hajnoczi bdrv_rw_co_entry(&rwco); 27971c9805a3SStefan Hajnoczi } else { 27982572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 27992572b37aSStefan Hajnoczi 28001c9805a3SStefan Hajnoczi co = qemu_coroutine_create(bdrv_rw_co_entry); 28011c9805a3SStefan Hajnoczi qemu_coroutine_enter(co, &rwco); 28021c9805a3SStefan Hajnoczi while (rwco.ret == NOT_DONE) { 28032572b37aSStefan Hajnoczi aio_poll(aio_context, true); 28041c9805a3SStefan Hajnoczi } 28051c9805a3SStefan Hajnoczi } 28061c9805a3SStefan Hajnoczi return rwco.ret; 2807e7a8a783SKevin Wolf } 2808e7a8a783SKevin Wolf 28098d3b1a2dSKevin Wolf /* 28108d3b1a2dSKevin Wolf * Process a synchronous request using coroutines 28118d3b1a2dSKevin Wolf */ 28128d3b1a2dSKevin Wolf static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, 28134105eaaaSPeter Lieven int nb_sectors, bool is_write, BdrvRequestFlags flags) 28148d3b1a2dSKevin Wolf { 28158d3b1a2dSKevin Wolf QEMUIOVector qiov; 28168d3b1a2dSKevin Wolf struct iovec iov = { 28178d3b1a2dSKevin Wolf .iov_base = (void *)buf, 28188d3b1a2dSKevin Wolf .iov_len = nb_sectors * BDRV_SECTOR_SIZE, 28198d3b1a2dSKevin Wolf }; 28208d3b1a2dSKevin Wolf 282175af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 2822da15ee51SKevin Wolf return -EINVAL; 2823da15ee51SKevin Wolf } 2824da15ee51SKevin Wolf 28258d3b1a2dSKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 2826775aa8b6SKevin Wolf return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS, 2827775aa8b6SKevin Wolf &qiov, is_write, flags); 28288d3b1a2dSKevin Wolf } 28298d3b1a2dSKevin Wolf 28301c9805a3SStefan Hajnoczi /* return < 0 if error. See bdrv_write() for the return codes */ 28311c9805a3SStefan Hajnoczi int bdrv_read(BlockDriverState *bs, int64_t sector_num, 28321c9805a3SStefan Hajnoczi uint8_t *buf, int nb_sectors) 28331c9805a3SStefan Hajnoczi { 28344105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0); 283583f64091Sbellard } 2836fc01f7e7Sbellard 283707d27a44SMarkus Armbruster /* Just like bdrv_read(), but with I/O throttling temporarily disabled */ 283807d27a44SMarkus Armbruster int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, 283907d27a44SMarkus Armbruster uint8_t *buf, int nb_sectors) 284007d27a44SMarkus Armbruster { 284107d27a44SMarkus Armbruster bool enabled; 284207d27a44SMarkus Armbruster int ret; 284307d27a44SMarkus Armbruster 284407d27a44SMarkus Armbruster enabled = bs->io_limits_enabled; 284507d27a44SMarkus Armbruster bs->io_limits_enabled = false; 28464e7395e8SPeter Lieven ret = bdrv_read(bs, sector_num, buf, nb_sectors); 284707d27a44SMarkus Armbruster bs->io_limits_enabled = enabled; 284807d27a44SMarkus Armbruster return ret; 284907d27a44SMarkus Armbruster } 285007d27a44SMarkus Armbruster 285119cb3738Sbellard /* Return < 0 if error. Important errors are: 285219cb3738Sbellard -EIO generic I/O error (may happen for all errors) 285319cb3738Sbellard -ENOMEDIUM No media inserted. 285419cb3738Sbellard -EINVAL Invalid sector number or nb_sectors 285519cb3738Sbellard -EACCES Trying to write a read-only device 285619cb3738Sbellard */ 2857fc01f7e7Sbellard int bdrv_write(BlockDriverState *bs, int64_t sector_num, 2858fc01f7e7Sbellard const uint8_t *buf, int nb_sectors) 2859fc01f7e7Sbellard { 28604105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0); 286183f64091Sbellard } 286283f64091Sbellard 2863aa7bfbffSPeter Lieven int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, 2864aa7bfbffSPeter Lieven int nb_sectors, BdrvRequestFlags flags) 28654105eaaaSPeter Lieven { 28664105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true, 2867aa7bfbffSPeter Lieven BDRV_REQ_ZERO_WRITE | flags); 28688d3b1a2dSKevin Wolf } 28698d3b1a2dSKevin Wolf 2870d75cbb5eSPeter Lieven /* 2871d75cbb5eSPeter Lieven * Completely zero out a block device with the help of bdrv_write_zeroes. 2872d75cbb5eSPeter Lieven * The operation is sped up by checking the block status and only writing 2873d75cbb5eSPeter Lieven * zeroes to the device if they currently do not return zeroes. Optional 2874d75cbb5eSPeter Lieven * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP). 2875d75cbb5eSPeter Lieven * 2876d75cbb5eSPeter Lieven * Returns < 0 on error, 0 on success. For error codes see bdrv_write(). 2877d75cbb5eSPeter Lieven */ 2878d75cbb5eSPeter Lieven int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) 2879d75cbb5eSPeter Lieven { 2880d32f7c10SMarkus Armbruster int64_t target_sectors, ret, nb_sectors, sector_num = 0; 2881d75cbb5eSPeter Lieven int n; 2882d75cbb5eSPeter Lieven 2883d32f7c10SMarkus Armbruster target_sectors = bdrv_nb_sectors(bs); 2884d32f7c10SMarkus Armbruster if (target_sectors < 0) { 2885d32f7c10SMarkus Armbruster return target_sectors; 28869ce10c0bSKevin Wolf } 28879ce10c0bSKevin Wolf 2888d75cbb5eSPeter Lieven for (;;) { 288975af1f34SPeter Lieven nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); 2890d75cbb5eSPeter Lieven if (nb_sectors <= 0) { 2891d75cbb5eSPeter Lieven return 0; 2892d75cbb5eSPeter Lieven } 2893d75cbb5eSPeter Lieven ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n); 28943d94ce60SPeter Lieven if (ret < 0) { 28953d94ce60SPeter Lieven error_report("error getting block status at sector %" PRId64 ": %s", 28963d94ce60SPeter Lieven sector_num, strerror(-ret)); 28973d94ce60SPeter Lieven return ret; 28983d94ce60SPeter Lieven } 2899d75cbb5eSPeter Lieven if (ret & BDRV_BLOCK_ZERO) { 2900d75cbb5eSPeter Lieven sector_num += n; 2901d75cbb5eSPeter Lieven continue; 2902d75cbb5eSPeter Lieven } 2903d75cbb5eSPeter Lieven ret = bdrv_write_zeroes(bs, sector_num, n, flags); 2904d75cbb5eSPeter Lieven if (ret < 0) { 2905d75cbb5eSPeter Lieven error_report("error writing zeroes at sector %" PRId64 ": %s", 2906d75cbb5eSPeter Lieven sector_num, strerror(-ret)); 2907d75cbb5eSPeter Lieven return ret; 2908d75cbb5eSPeter Lieven } 2909d75cbb5eSPeter Lieven sector_num += n; 2910d75cbb5eSPeter Lieven } 2911d75cbb5eSPeter Lieven } 2912d75cbb5eSPeter Lieven 2913a3ef6571SKevin Wolf int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) 291483f64091Sbellard { 2915a3ef6571SKevin Wolf QEMUIOVector qiov; 2916a3ef6571SKevin Wolf struct iovec iov = { 2917a3ef6571SKevin Wolf .iov_base = (void *)buf, 2918a3ef6571SKevin Wolf .iov_len = bytes, 2919a3ef6571SKevin Wolf }; 29209a8c4cceSKevin Wolf int ret; 292183f64091Sbellard 2922a3ef6571SKevin Wolf if (bytes < 0) { 2923a3ef6571SKevin Wolf return -EINVAL; 292483f64091Sbellard } 292583f64091Sbellard 2926a3ef6571SKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 2927a3ef6571SKevin Wolf ret = bdrv_prwv_co(bs, offset, &qiov, false, 0); 2928a3ef6571SKevin Wolf if (ret < 0) { 29299a8c4cceSKevin Wolf return ret; 293083f64091Sbellard } 293183f64091Sbellard 2932a3ef6571SKevin Wolf return bytes; 293383f64091Sbellard } 293483f64091Sbellard 29358d3b1a2dSKevin Wolf int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) 293683f64091Sbellard { 29379a8c4cceSKevin Wolf int ret; 293883f64091Sbellard 29398407d5d7SKevin Wolf ret = bdrv_prwv_co(bs, offset, qiov, true, 0); 29408d3b1a2dSKevin Wolf if (ret < 0) { 29419a8c4cceSKevin Wolf return ret; 29428d3b1a2dSKevin Wolf } 29438d3b1a2dSKevin Wolf 29448d3b1a2dSKevin Wolf return qiov->size; 29458d3b1a2dSKevin Wolf } 29468d3b1a2dSKevin Wolf 29478d3b1a2dSKevin Wolf int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 29488407d5d7SKevin Wolf const void *buf, int bytes) 29498d3b1a2dSKevin Wolf { 29508d3b1a2dSKevin Wolf QEMUIOVector qiov; 29518d3b1a2dSKevin Wolf struct iovec iov = { 29528d3b1a2dSKevin Wolf .iov_base = (void *) buf, 29538407d5d7SKevin Wolf .iov_len = bytes, 29548d3b1a2dSKevin Wolf }; 29558d3b1a2dSKevin Wolf 29568407d5d7SKevin Wolf if (bytes < 0) { 29578407d5d7SKevin Wolf return -EINVAL; 29588407d5d7SKevin Wolf } 29598407d5d7SKevin Wolf 29608d3b1a2dSKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 29618d3b1a2dSKevin Wolf return bdrv_pwritev(bs, offset, &qiov); 296283f64091Sbellard } 296383f64091Sbellard 2964f08145feSKevin Wolf /* 2965f08145feSKevin Wolf * Writes to the file and ensures that no writes are reordered across this 2966f08145feSKevin Wolf * request (acts as a barrier) 2967f08145feSKevin Wolf * 2968f08145feSKevin Wolf * Returns 0 on success, -errno in error cases. 2969f08145feSKevin Wolf */ 2970f08145feSKevin Wolf int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, 2971f08145feSKevin Wolf const void *buf, int count) 2972f08145feSKevin Wolf { 2973f08145feSKevin Wolf int ret; 2974f08145feSKevin Wolf 2975f08145feSKevin Wolf ret = bdrv_pwrite(bs, offset, buf, count); 2976f08145feSKevin Wolf if (ret < 0) { 2977f08145feSKevin Wolf return ret; 2978f08145feSKevin Wolf } 2979f08145feSKevin Wolf 2980f05fa4adSPaolo Bonzini /* No flush needed for cache modes that already do it */ 2981f05fa4adSPaolo Bonzini if (bs->enable_write_cache) { 2982f08145feSKevin Wolf bdrv_flush(bs); 2983f08145feSKevin Wolf } 2984f08145feSKevin Wolf 2985f08145feSKevin Wolf return 0; 2986f08145feSKevin Wolf } 2987f08145feSKevin Wolf 2988470c0504SStefan Hajnoczi static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, 2989ab185921SStefan Hajnoczi int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 2990ab185921SStefan Hajnoczi { 2991ab185921SStefan Hajnoczi /* Perform I/O through a temporary buffer so that users who scribble over 2992ab185921SStefan Hajnoczi * their read buffer while the operation is in progress do not end up 2993ab185921SStefan Hajnoczi * modifying the image file. This is critical for zero-copy guest I/O 2994ab185921SStefan Hajnoczi * where anything might happen inside guest memory. 2995ab185921SStefan Hajnoczi */ 2996ab185921SStefan Hajnoczi void *bounce_buffer; 2997ab185921SStefan Hajnoczi 299879c053bdSStefan Hajnoczi BlockDriver *drv = bs->drv; 2999ab185921SStefan Hajnoczi struct iovec iov; 3000ab185921SStefan Hajnoczi QEMUIOVector bounce_qiov; 3001ab185921SStefan Hajnoczi int64_t cluster_sector_num; 3002ab185921SStefan Hajnoczi int cluster_nb_sectors; 3003ab185921SStefan Hajnoczi size_t skip_bytes; 3004ab185921SStefan Hajnoczi int ret; 3005ab185921SStefan Hajnoczi 3006ab185921SStefan Hajnoczi /* Cover entire cluster so no additional backing file I/O is required when 3007ab185921SStefan Hajnoczi * allocating cluster in the image file. 3008ab185921SStefan Hajnoczi */ 3009343bded4SPaolo Bonzini bdrv_round_to_clusters(bs, sector_num, nb_sectors, 3010ab185921SStefan Hajnoczi &cluster_sector_num, &cluster_nb_sectors); 3011ab185921SStefan Hajnoczi 3012470c0504SStefan Hajnoczi trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, 3013ab185921SStefan Hajnoczi cluster_sector_num, cluster_nb_sectors); 3014ab185921SStefan Hajnoczi 3015ab185921SStefan Hajnoczi iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE; 3016857d4f46SKevin Wolf iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); 3017857d4f46SKevin Wolf if (bounce_buffer == NULL) { 3018857d4f46SKevin Wolf ret = -ENOMEM; 3019857d4f46SKevin Wolf goto err; 3020857d4f46SKevin Wolf } 3021857d4f46SKevin Wolf 3022ab185921SStefan Hajnoczi qemu_iovec_init_external(&bounce_qiov, &iov, 1); 3023ab185921SStefan Hajnoczi 302479c053bdSStefan Hajnoczi ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors, 3025ab185921SStefan Hajnoczi &bounce_qiov); 3026ab185921SStefan Hajnoczi if (ret < 0) { 3027ab185921SStefan Hajnoczi goto err; 3028ab185921SStefan Hajnoczi } 3029ab185921SStefan Hajnoczi 303079c053bdSStefan Hajnoczi if (drv->bdrv_co_write_zeroes && 303179c053bdSStefan Hajnoczi buffer_is_zero(bounce_buffer, iov.iov_len)) { 3032621f0589SKevin Wolf ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num, 3033aa7bfbffSPeter Lieven cluster_nb_sectors, 0); 303479c053bdSStefan Hajnoczi } else { 3035f05fa4adSPaolo Bonzini /* This does not change the data on the disk, it is not necessary 3036f05fa4adSPaolo Bonzini * to flush even in cache=writethrough mode. 3037f05fa4adSPaolo Bonzini */ 303879c053bdSStefan Hajnoczi ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors, 3039ab185921SStefan Hajnoczi &bounce_qiov); 304079c053bdSStefan Hajnoczi } 304179c053bdSStefan Hajnoczi 3042ab185921SStefan Hajnoczi if (ret < 0) { 3043ab185921SStefan Hajnoczi /* It might be okay to ignore write errors for guest requests. If this 3044ab185921SStefan Hajnoczi * is a deliberate copy-on-read then we don't want to ignore the error. 3045ab185921SStefan Hajnoczi * Simply report it in all cases. 3046ab185921SStefan Hajnoczi */ 3047ab185921SStefan Hajnoczi goto err; 3048ab185921SStefan Hajnoczi } 3049ab185921SStefan Hajnoczi 3050ab185921SStefan Hajnoczi skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; 305103396148SMichael Tokarev qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, 3052ab185921SStefan Hajnoczi nb_sectors * BDRV_SECTOR_SIZE); 3053ab185921SStefan Hajnoczi 3054ab185921SStefan Hajnoczi err: 3055ab185921SStefan Hajnoczi qemu_vfree(bounce_buffer); 3056ab185921SStefan Hajnoczi return ret; 3057ab185921SStefan Hajnoczi } 3058ab185921SStefan Hajnoczi 3059c5fbe571SStefan Hajnoczi /* 3060d0c7f642SKevin Wolf * Forwards an already correctly aligned request to the BlockDriver. This 3061d0c7f642SKevin Wolf * handles copy on read and zeroing after EOF; any other features must be 3062d0c7f642SKevin Wolf * implemented by the caller. 3063c5fbe571SStefan Hajnoczi */ 3064d0c7f642SKevin Wolf static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, 306565afd211SKevin Wolf BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, 3066ec746e10SKevin Wolf int64_t align, QEMUIOVector *qiov, int flags) 3067da1fa91dSKevin Wolf { 3068da1fa91dSKevin Wolf BlockDriver *drv = bs->drv; 3069dbffbdcfSStefan Hajnoczi int ret; 3070da1fa91dSKevin Wolf 3071d0c7f642SKevin Wolf int64_t sector_num = offset >> BDRV_SECTOR_BITS; 3072d0c7f642SKevin Wolf unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; 3073da1fa91dSKevin Wolf 3074d0c7f642SKevin Wolf assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 3075d0c7f642SKevin Wolf assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 30768eb029c2SKevin Wolf assert(!qiov || bytes == qiov->size); 3077d0c7f642SKevin Wolf 3078d0c7f642SKevin Wolf /* Handle Copy on Read and associated serialisation */ 3079470c0504SStefan Hajnoczi if (flags & BDRV_REQ_COPY_ON_READ) { 30807327145fSKevin Wolf /* If we touch the same cluster it counts as an overlap. This 30817327145fSKevin Wolf * guarantees that allocating writes will be serialized and not race 30827327145fSKevin Wolf * with each other for the same cluster. For example, in copy-on-read 30837327145fSKevin Wolf * it ensures that the CoR read and write operations are atomic and 30847327145fSKevin Wolf * guest writes cannot interleave between them. */ 30857327145fSKevin Wolf mark_request_serialising(req, bdrv_get_cluster_size(bs)); 3086470c0504SStefan Hajnoczi } 3087470c0504SStefan Hajnoczi 30882dbafdc0SKevin Wolf wait_serialising_requests(req); 3089f4658285SStefan Hajnoczi 3090470c0504SStefan Hajnoczi if (flags & BDRV_REQ_COPY_ON_READ) { 3091ab185921SStefan Hajnoczi int pnum; 3092ab185921SStefan Hajnoczi 3093bdad13b9SPaolo Bonzini ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum); 3094ab185921SStefan Hajnoczi if (ret < 0) { 3095ab185921SStefan Hajnoczi goto out; 3096ab185921SStefan Hajnoczi } 3097ab185921SStefan Hajnoczi 3098ab185921SStefan Hajnoczi if (!ret || pnum != nb_sectors) { 3099470c0504SStefan Hajnoczi ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov); 3100ab185921SStefan Hajnoczi goto out; 3101ab185921SStefan Hajnoczi } 3102ab185921SStefan Hajnoczi } 3103ab185921SStefan Hajnoczi 3104d0c7f642SKevin Wolf /* Forward the request to the BlockDriver */ 3105c0191e76SMax Reitz if (!bs->zero_beyond_eof) { 3106dbffbdcfSStefan Hajnoczi ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 3107893a8f62SMORITA Kazutaka } else { 3108c0191e76SMax Reitz /* Read zeros after EOF */ 31094049082cSMarkus Armbruster int64_t total_sectors, max_nb_sectors; 3110893a8f62SMORITA Kazutaka 31114049082cSMarkus Armbruster total_sectors = bdrv_nb_sectors(bs); 31124049082cSMarkus Armbruster if (total_sectors < 0) { 31134049082cSMarkus Armbruster ret = total_sectors; 3114893a8f62SMORITA Kazutaka goto out; 3115893a8f62SMORITA Kazutaka } 3116893a8f62SMORITA Kazutaka 31175f5bcd80SKevin Wolf max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), 31185f5bcd80SKevin Wolf align >> BDRV_SECTOR_BITS); 3119e012b78cSPaolo Bonzini if (nb_sectors < max_nb_sectors) { 3120e012b78cSPaolo Bonzini ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 3121e012b78cSPaolo Bonzini } else if (max_nb_sectors > 0) { 312233f461e0SKevin Wolf QEMUIOVector local_qiov; 312333f461e0SKevin Wolf 312433f461e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov); 312533f461e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, 3126e012b78cSPaolo Bonzini max_nb_sectors * BDRV_SECTOR_SIZE); 312733f461e0SKevin Wolf 3128e012b78cSPaolo Bonzini ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors, 312933f461e0SKevin Wolf &local_qiov); 313033f461e0SKevin Wolf 313133f461e0SKevin Wolf qemu_iovec_destroy(&local_qiov); 3132893a8f62SMORITA Kazutaka } else { 3133893a8f62SMORITA Kazutaka ret = 0; 3134893a8f62SMORITA Kazutaka } 3135893a8f62SMORITA Kazutaka 3136893a8f62SMORITA Kazutaka /* Reading beyond end of file is supposed to produce zeroes */ 3137893a8f62SMORITA Kazutaka if (ret == 0 && total_sectors < sector_num + nb_sectors) { 3138893a8f62SMORITA Kazutaka uint64_t offset = MAX(0, total_sectors - sector_num); 3139893a8f62SMORITA Kazutaka uint64_t bytes = (sector_num + nb_sectors - offset) * 3140893a8f62SMORITA Kazutaka BDRV_SECTOR_SIZE; 3141893a8f62SMORITA Kazutaka qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes); 3142893a8f62SMORITA Kazutaka } 3143893a8f62SMORITA Kazutaka } 3144ab185921SStefan Hajnoczi 3145ab185921SStefan Hajnoczi out: 3146dbffbdcfSStefan Hajnoczi return ret; 3147da1fa91dSKevin Wolf } 3148da1fa91dSKevin Wolf 3149fc3959e4SFam Zheng static inline uint64_t bdrv_get_align(BlockDriverState *bs) 3150fc3959e4SFam Zheng { 3151fc3959e4SFam Zheng /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ 3152fc3959e4SFam Zheng return MAX(BDRV_SECTOR_SIZE, bs->request_alignment); 3153fc3959e4SFam Zheng } 3154fc3959e4SFam Zheng 3155fc3959e4SFam Zheng static inline bool bdrv_req_is_aligned(BlockDriverState *bs, 3156fc3959e4SFam Zheng int64_t offset, size_t bytes) 3157fc3959e4SFam Zheng { 3158fc3959e4SFam Zheng int64_t align = bdrv_get_align(bs); 3159fc3959e4SFam Zheng return !(offset & (align - 1) || (bytes & (align - 1))); 3160fc3959e4SFam Zheng } 3161fc3959e4SFam Zheng 3162d0c7f642SKevin Wolf /* 3163d0c7f642SKevin Wolf * Handle a read request in coroutine context 3164d0c7f642SKevin Wolf */ 31651b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, 31661b0288aeSKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 3167d0c7f642SKevin Wolf BdrvRequestFlags flags) 3168d0c7f642SKevin Wolf { 3169d0c7f642SKevin Wolf BlockDriver *drv = bs->drv; 317065afd211SKevin Wolf BdrvTrackedRequest req; 317165afd211SKevin Wolf 3172fc3959e4SFam Zheng uint64_t align = bdrv_get_align(bs); 31731b0288aeSKevin Wolf uint8_t *head_buf = NULL; 31741b0288aeSKevin Wolf uint8_t *tail_buf = NULL; 31751b0288aeSKevin Wolf QEMUIOVector local_qiov; 31761b0288aeSKevin Wolf bool use_local_qiov = false; 3177d0c7f642SKevin Wolf int ret; 3178d0c7f642SKevin Wolf 3179d0c7f642SKevin Wolf if (!drv) { 3180d0c7f642SKevin Wolf return -ENOMEDIUM; 3181d0c7f642SKevin Wolf } 3182b9c64947SMax Reitz 3183b9c64947SMax Reitz ret = bdrv_check_byte_request(bs, offset, bytes); 3184b9c64947SMax Reitz if (ret < 0) { 3185b9c64947SMax Reitz return ret; 3186d0c7f642SKevin Wolf } 3187d0c7f642SKevin Wolf 3188d0c7f642SKevin Wolf if (bs->copy_on_read) { 3189d0c7f642SKevin Wolf flags |= BDRV_REQ_COPY_ON_READ; 3190d0c7f642SKevin Wolf } 3191d0c7f642SKevin Wolf 3192d0c7f642SKevin Wolf /* throttling disk I/O */ 3193d0c7f642SKevin Wolf if (bs->io_limits_enabled) { 3194d5103588SKevin Wolf bdrv_io_limits_intercept(bs, bytes, false); 3195d0c7f642SKevin Wolf } 3196d0c7f642SKevin Wolf 31971b0288aeSKevin Wolf /* Align read if necessary by padding qiov */ 31981b0288aeSKevin Wolf if (offset & (align - 1)) { 31991b0288aeSKevin Wolf head_buf = qemu_blockalign(bs, align); 32001b0288aeSKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 2); 32011b0288aeSKevin Wolf qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); 32021b0288aeSKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 32031b0288aeSKevin Wolf use_local_qiov = true; 32041b0288aeSKevin Wolf 32051b0288aeSKevin Wolf bytes += offset & (align - 1); 32061b0288aeSKevin Wolf offset = offset & ~(align - 1); 32071b0288aeSKevin Wolf } 32081b0288aeSKevin Wolf 32091b0288aeSKevin Wolf if ((offset + bytes) & (align - 1)) { 32101b0288aeSKevin Wolf if (!use_local_qiov) { 32111b0288aeSKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 1); 32121b0288aeSKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 32131b0288aeSKevin Wolf use_local_qiov = true; 32141b0288aeSKevin Wolf } 32151b0288aeSKevin Wolf tail_buf = qemu_blockalign(bs, align); 32161b0288aeSKevin Wolf qemu_iovec_add(&local_qiov, tail_buf, 32171b0288aeSKevin Wolf align - ((offset + bytes) & (align - 1))); 32181b0288aeSKevin Wolf 32191b0288aeSKevin Wolf bytes = ROUND_UP(bytes, align); 32201b0288aeSKevin Wolf } 32211b0288aeSKevin Wolf 322265afd211SKevin Wolf tracked_request_begin(&req, bs, offset, bytes, false); 3223ec746e10SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, 32241b0288aeSKevin Wolf use_local_qiov ? &local_qiov : qiov, 32251b0288aeSKevin Wolf flags); 322665afd211SKevin Wolf tracked_request_end(&req); 32271b0288aeSKevin Wolf 32281b0288aeSKevin Wolf if (use_local_qiov) { 32291b0288aeSKevin Wolf qemu_iovec_destroy(&local_qiov); 32301b0288aeSKevin Wolf qemu_vfree(head_buf); 32311b0288aeSKevin Wolf qemu_vfree(tail_buf); 32321b0288aeSKevin Wolf } 32331b0288aeSKevin Wolf 3234d0c7f642SKevin Wolf return ret; 3235d0c7f642SKevin Wolf } 3236d0c7f642SKevin Wolf 32371b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, 32381b0288aeSKevin Wolf int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 32391b0288aeSKevin Wolf BdrvRequestFlags flags) 32401b0288aeSKevin Wolf { 324175af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 32421b0288aeSKevin Wolf return -EINVAL; 32431b0288aeSKevin Wolf } 32441b0288aeSKevin Wolf 32451b0288aeSKevin Wolf return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS, 32461b0288aeSKevin Wolf nb_sectors << BDRV_SECTOR_BITS, qiov, flags); 32471b0288aeSKevin Wolf } 32481b0288aeSKevin Wolf 3249c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, 3250da1fa91dSKevin Wolf int nb_sectors, QEMUIOVector *qiov) 3251da1fa91dSKevin Wolf { 3252c5fbe571SStefan Hajnoczi trace_bdrv_co_readv(bs, sector_num, nb_sectors); 3253da1fa91dSKevin Wolf 3254470c0504SStefan Hajnoczi return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); 3255470c0504SStefan Hajnoczi } 3256470c0504SStefan Hajnoczi 3257470c0504SStefan Hajnoczi int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, 3258470c0504SStefan Hajnoczi int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 3259470c0504SStefan Hajnoczi { 3260470c0504SStefan Hajnoczi trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors); 3261470c0504SStefan Hajnoczi 3262470c0504SStefan Hajnoczi return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 3263470c0504SStefan Hajnoczi BDRV_REQ_COPY_ON_READ); 3264c5fbe571SStefan Hajnoczi } 3265c5fbe571SStefan Hajnoczi 326698764152SPeter Lieven #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768 3267c31cb707SPeter Lieven 3268f08f2ddaSStefan Hajnoczi static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, 3269aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) 3270f08f2ddaSStefan Hajnoczi { 3271f08f2ddaSStefan Hajnoczi BlockDriver *drv = bs->drv; 3272f08f2ddaSStefan Hajnoczi QEMUIOVector qiov; 3273c31cb707SPeter Lieven struct iovec iov = {0}; 3274c31cb707SPeter Lieven int ret = 0; 3275f08f2ddaSStefan Hajnoczi 327675af1f34SPeter Lieven int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes, 327775af1f34SPeter Lieven BDRV_REQUEST_MAX_SECTORS); 3278621f0589SKevin Wolf 3279c31cb707SPeter Lieven while (nb_sectors > 0 && !ret) { 3280c31cb707SPeter Lieven int num = nb_sectors; 3281c31cb707SPeter Lieven 3282b8d71c09SPaolo Bonzini /* Align request. Block drivers can expect the "bulk" of the request 3283b8d71c09SPaolo Bonzini * to be aligned. 3284b8d71c09SPaolo Bonzini */ 3285b8d71c09SPaolo Bonzini if (bs->bl.write_zeroes_alignment 3286b8d71c09SPaolo Bonzini && num > bs->bl.write_zeroes_alignment) { 3287b8d71c09SPaolo Bonzini if (sector_num % bs->bl.write_zeroes_alignment != 0) { 3288b8d71c09SPaolo Bonzini /* Make a small request up to the first aligned sector. */ 3289c31cb707SPeter Lieven num = bs->bl.write_zeroes_alignment; 3290c31cb707SPeter Lieven num -= sector_num % bs->bl.write_zeroes_alignment; 3291b8d71c09SPaolo Bonzini } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) { 3292b8d71c09SPaolo Bonzini /* Shorten the request to the last aligned sector. num cannot 3293b8d71c09SPaolo Bonzini * underflow because num > bs->bl.write_zeroes_alignment. 3294b8d71c09SPaolo Bonzini */ 3295b8d71c09SPaolo Bonzini num -= (sector_num + num) % bs->bl.write_zeroes_alignment; 3296b8d71c09SPaolo Bonzini } 3297c31cb707SPeter Lieven } 3298c31cb707SPeter Lieven 3299c31cb707SPeter Lieven /* limit request size */ 3300c31cb707SPeter Lieven if (num > max_write_zeroes) { 3301c31cb707SPeter Lieven num = max_write_zeroes; 3302c31cb707SPeter Lieven } 3303c31cb707SPeter Lieven 3304c31cb707SPeter Lieven ret = -ENOTSUP; 3305f08f2ddaSStefan Hajnoczi /* First try the efficient write zeroes operation */ 3306f08f2ddaSStefan Hajnoczi if (drv->bdrv_co_write_zeroes) { 3307c31cb707SPeter Lieven ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags); 3308f08f2ddaSStefan Hajnoczi } 3309f08f2ddaSStefan Hajnoczi 3310c31cb707SPeter Lieven if (ret == -ENOTSUP) { 3311f08f2ddaSStefan Hajnoczi /* Fall back to bounce buffer if write zeroes is unsupported */ 3312095e4fa4SPeter Lieven int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length, 331398764152SPeter Lieven MAX_WRITE_ZEROES_BOUNCE_BUFFER); 3314095e4fa4SPeter Lieven num = MIN(num, max_xfer_len); 3315c31cb707SPeter Lieven iov.iov_len = num * BDRV_SECTOR_SIZE; 3316c31cb707SPeter Lieven if (iov.iov_base == NULL) { 3317857d4f46SKevin Wolf iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE); 3318857d4f46SKevin Wolf if (iov.iov_base == NULL) { 3319857d4f46SKevin Wolf ret = -ENOMEM; 3320857d4f46SKevin Wolf goto fail; 3321857d4f46SKevin Wolf } 3322b8d71c09SPaolo Bonzini memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE); 3323c31cb707SPeter Lieven } 3324f08f2ddaSStefan Hajnoczi qemu_iovec_init_external(&qiov, &iov, 1); 3325f08f2ddaSStefan Hajnoczi 3326c31cb707SPeter Lieven ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov); 3327b8d71c09SPaolo Bonzini 3328b8d71c09SPaolo Bonzini /* Keep bounce buffer around if it is big enough for all 3329b8d71c09SPaolo Bonzini * all future requests. 3330b8d71c09SPaolo Bonzini */ 3331095e4fa4SPeter Lieven if (num < max_xfer_len) { 3332b8d71c09SPaolo Bonzini qemu_vfree(iov.iov_base); 3333b8d71c09SPaolo Bonzini iov.iov_base = NULL; 3334b8d71c09SPaolo Bonzini } 3335c31cb707SPeter Lieven } 3336c31cb707SPeter Lieven 3337c31cb707SPeter Lieven sector_num += num; 3338c31cb707SPeter Lieven nb_sectors -= num; 3339c31cb707SPeter Lieven } 3340f08f2ddaSStefan Hajnoczi 3341857d4f46SKevin Wolf fail: 3342f08f2ddaSStefan Hajnoczi qemu_vfree(iov.iov_base); 3343f08f2ddaSStefan Hajnoczi return ret; 3344f08f2ddaSStefan Hajnoczi } 3345f08f2ddaSStefan Hajnoczi 3346c5fbe571SStefan Hajnoczi /* 3347b404f720SKevin Wolf * Forwards an already correctly aligned write request to the BlockDriver. 3348c5fbe571SStefan Hajnoczi */ 3349b404f720SKevin Wolf static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, 335065afd211SKevin Wolf BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, 335165afd211SKevin Wolf QEMUIOVector *qiov, int flags) 3352c5fbe571SStefan Hajnoczi { 3353c5fbe571SStefan Hajnoczi BlockDriver *drv = bs->drv; 335428de2dcdSKevin Wolf bool waited; 33556b7cb247SStefan Hajnoczi int ret; 3356da1fa91dSKevin Wolf 3357b404f720SKevin Wolf int64_t sector_num = offset >> BDRV_SECTOR_BITS; 3358b404f720SKevin Wolf unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; 3359da1fa91dSKevin Wolf 3360b404f720SKevin Wolf assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 3361b404f720SKevin Wolf assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 33628eb029c2SKevin Wolf assert(!qiov || bytes == qiov->size); 3363cc0681c4SBenoît Canet 336428de2dcdSKevin Wolf waited = wait_serialising_requests(req); 336528de2dcdSKevin Wolf assert(!waited || !req->serialising); 3366af91f9a7SKevin Wolf assert(req->overlap_offset <= offset); 3367af91f9a7SKevin Wolf assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); 3368244eadefSKevin Wolf 336965afd211SKevin Wolf ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); 3370d616b224SStefan Hajnoczi 3371465bee1dSPeter Lieven if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && 3372465bee1dSPeter Lieven !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes && 3373465bee1dSPeter Lieven qemu_iovec_is_zero(qiov)) { 3374465bee1dSPeter Lieven flags |= BDRV_REQ_ZERO_WRITE; 3375465bee1dSPeter Lieven if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { 3376465bee1dSPeter Lieven flags |= BDRV_REQ_MAY_UNMAP; 3377465bee1dSPeter Lieven } 3378465bee1dSPeter Lieven } 3379465bee1dSPeter Lieven 3380d616b224SStefan Hajnoczi if (ret < 0) { 3381d616b224SStefan Hajnoczi /* Do nothing, write notifier decided to fail this request */ 3382d616b224SStefan Hajnoczi } else if (flags & BDRV_REQ_ZERO_WRITE) { 33839e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO); 3384aa7bfbffSPeter Lieven ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); 3385f08f2ddaSStefan Hajnoczi } else { 33869e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV); 33876b7cb247SStefan Hajnoczi ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); 3388f08f2ddaSStefan Hajnoczi } 33899e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE); 33906b7cb247SStefan Hajnoczi 3391f05fa4adSPaolo Bonzini if (ret == 0 && !bs->enable_write_cache) { 3392f05fa4adSPaolo Bonzini ret = bdrv_co_flush(bs); 3393f05fa4adSPaolo Bonzini } 3394f05fa4adSPaolo Bonzini 33951755da16SPaolo Bonzini bdrv_set_dirty(bs, sector_num, nb_sectors); 3396da1fa91dSKevin Wolf 33975366d0c8SBenoît Canet block_acct_highest_sector(&bs->stats, sector_num, nb_sectors); 33985e5a94b6SBenoît Canet 3399c0191e76SMax Reitz if (ret >= 0) { 3400df2a6f29SPaolo Bonzini bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); 3401df2a6f29SPaolo Bonzini } 3402da1fa91dSKevin Wolf 34036b7cb247SStefan Hajnoczi return ret; 3404da1fa91dSKevin Wolf } 3405da1fa91dSKevin Wolf 3406b404f720SKevin Wolf /* 3407b404f720SKevin Wolf * Handle a write request in coroutine context 3408b404f720SKevin Wolf */ 34096601553eSKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, 34106601553eSKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 3411b404f720SKevin Wolf BdrvRequestFlags flags) 3412b404f720SKevin Wolf { 341365afd211SKevin Wolf BdrvTrackedRequest req; 3414fc3959e4SFam Zheng uint64_t align = bdrv_get_align(bs); 34153b8242e0SKevin Wolf uint8_t *head_buf = NULL; 34163b8242e0SKevin Wolf uint8_t *tail_buf = NULL; 34173b8242e0SKevin Wolf QEMUIOVector local_qiov; 34183b8242e0SKevin Wolf bool use_local_qiov = false; 3419b404f720SKevin Wolf int ret; 3420b404f720SKevin Wolf 3421b404f720SKevin Wolf if (!bs->drv) { 3422b404f720SKevin Wolf return -ENOMEDIUM; 3423b404f720SKevin Wolf } 3424b404f720SKevin Wolf if (bs->read_only) { 3425b404f720SKevin Wolf return -EACCES; 3426b404f720SKevin Wolf } 3427b9c64947SMax Reitz 3428b9c64947SMax Reitz ret = bdrv_check_byte_request(bs, offset, bytes); 3429b9c64947SMax Reitz if (ret < 0) { 3430b9c64947SMax Reitz return ret; 3431b404f720SKevin Wolf } 3432b404f720SKevin Wolf 3433b404f720SKevin Wolf /* throttling disk I/O */ 3434b404f720SKevin Wolf if (bs->io_limits_enabled) { 3435d5103588SKevin Wolf bdrv_io_limits_intercept(bs, bytes, true); 3436b404f720SKevin Wolf } 3437b404f720SKevin Wolf 34383b8242e0SKevin Wolf /* 34393b8242e0SKevin Wolf * Align write if necessary by performing a read-modify-write cycle. 34403b8242e0SKevin Wolf * Pad qiov with the read parts and be sure to have a tracked request not 34413b8242e0SKevin Wolf * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle. 34423b8242e0SKevin Wolf */ 344365afd211SKevin Wolf tracked_request_begin(&req, bs, offset, bytes, true); 34443b8242e0SKevin Wolf 34453b8242e0SKevin Wolf if (offset & (align - 1)) { 34463b8242e0SKevin Wolf QEMUIOVector head_qiov; 34473b8242e0SKevin Wolf struct iovec head_iov; 34483b8242e0SKevin Wolf 34493b8242e0SKevin Wolf mark_request_serialising(&req, align); 34503b8242e0SKevin Wolf wait_serialising_requests(&req); 34513b8242e0SKevin Wolf 34523b8242e0SKevin Wolf head_buf = qemu_blockalign(bs, align); 34533b8242e0SKevin Wolf head_iov = (struct iovec) { 34543b8242e0SKevin Wolf .iov_base = head_buf, 34553b8242e0SKevin Wolf .iov_len = align, 34563b8242e0SKevin Wolf }; 34573b8242e0SKevin Wolf qemu_iovec_init_external(&head_qiov, &head_iov, 1); 34583b8242e0SKevin Wolf 34599e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); 34603b8242e0SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, 34613b8242e0SKevin Wolf align, &head_qiov, 0); 34623b8242e0SKevin Wolf if (ret < 0) { 34633b8242e0SKevin Wolf goto fail; 34643b8242e0SKevin Wolf } 34659e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); 34663b8242e0SKevin Wolf 34673b8242e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 2); 34683b8242e0SKevin Wolf qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); 34693b8242e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 34703b8242e0SKevin Wolf use_local_qiov = true; 34713b8242e0SKevin Wolf 34723b8242e0SKevin Wolf bytes += offset & (align - 1); 34733b8242e0SKevin Wolf offset = offset & ~(align - 1); 34743b8242e0SKevin Wolf } 34753b8242e0SKevin Wolf 34763b8242e0SKevin Wolf if ((offset + bytes) & (align - 1)) { 34773b8242e0SKevin Wolf QEMUIOVector tail_qiov; 34783b8242e0SKevin Wolf struct iovec tail_iov; 34793b8242e0SKevin Wolf size_t tail_bytes; 348028de2dcdSKevin Wolf bool waited; 34813b8242e0SKevin Wolf 34823b8242e0SKevin Wolf mark_request_serialising(&req, align); 348328de2dcdSKevin Wolf waited = wait_serialising_requests(&req); 348428de2dcdSKevin Wolf assert(!waited || !use_local_qiov); 34853b8242e0SKevin Wolf 34863b8242e0SKevin Wolf tail_buf = qemu_blockalign(bs, align); 34873b8242e0SKevin Wolf tail_iov = (struct iovec) { 34883b8242e0SKevin Wolf .iov_base = tail_buf, 34893b8242e0SKevin Wolf .iov_len = align, 34903b8242e0SKevin Wolf }; 34913b8242e0SKevin Wolf qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); 34923b8242e0SKevin Wolf 34939e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); 34943b8242e0SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, 34953b8242e0SKevin Wolf align, &tail_qiov, 0); 34963b8242e0SKevin Wolf if (ret < 0) { 34973b8242e0SKevin Wolf goto fail; 34983b8242e0SKevin Wolf } 34999e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); 35003b8242e0SKevin Wolf 35013b8242e0SKevin Wolf if (!use_local_qiov) { 35023b8242e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 1); 35033b8242e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 35043b8242e0SKevin Wolf use_local_qiov = true; 35053b8242e0SKevin Wolf } 35063b8242e0SKevin Wolf 35073b8242e0SKevin Wolf tail_bytes = (offset + bytes) & (align - 1); 35083b8242e0SKevin Wolf qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); 35093b8242e0SKevin Wolf 35103b8242e0SKevin Wolf bytes = ROUND_UP(bytes, align); 35113b8242e0SKevin Wolf } 35123b8242e0SKevin Wolf 3513fc3959e4SFam Zheng if (use_local_qiov) { 3514fc3959e4SFam Zheng /* Local buffer may have non-zero data. */ 3515fc3959e4SFam Zheng flags &= ~BDRV_REQ_ZERO_WRITE; 3516fc3959e4SFam Zheng } 35173b8242e0SKevin Wolf ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, 35183b8242e0SKevin Wolf use_local_qiov ? &local_qiov : qiov, 35193b8242e0SKevin Wolf flags); 35203b8242e0SKevin Wolf 35213b8242e0SKevin Wolf fail: 352265afd211SKevin Wolf tracked_request_end(&req); 3523b404f720SKevin Wolf 35243b8242e0SKevin Wolf if (use_local_qiov) { 35253b8242e0SKevin Wolf qemu_iovec_destroy(&local_qiov); 352699c4a85cSKevin Wolf } 35273b8242e0SKevin Wolf qemu_vfree(head_buf); 35283b8242e0SKevin Wolf qemu_vfree(tail_buf); 35293b8242e0SKevin Wolf 3530b404f720SKevin Wolf return ret; 3531b404f720SKevin Wolf } 3532b404f720SKevin Wolf 35336601553eSKevin Wolf static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, 35346601553eSKevin Wolf int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 35356601553eSKevin Wolf BdrvRequestFlags flags) 35366601553eSKevin Wolf { 353775af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 35386601553eSKevin Wolf return -EINVAL; 35396601553eSKevin Wolf } 35406601553eSKevin Wolf 35416601553eSKevin Wolf return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS, 35426601553eSKevin Wolf nb_sectors << BDRV_SECTOR_BITS, qiov, flags); 35436601553eSKevin Wolf } 35446601553eSKevin Wolf 3545c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, 3546c5fbe571SStefan Hajnoczi int nb_sectors, QEMUIOVector *qiov) 3547c5fbe571SStefan Hajnoczi { 3548c5fbe571SStefan Hajnoczi trace_bdrv_co_writev(bs, sector_num, nb_sectors); 3549c5fbe571SStefan Hajnoczi 3550f08f2ddaSStefan Hajnoczi return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0); 3551f08f2ddaSStefan Hajnoczi } 3552f08f2ddaSStefan Hajnoczi 3553f08f2ddaSStefan Hajnoczi int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, 3554aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, 3555aa7bfbffSPeter Lieven BdrvRequestFlags flags) 3556f08f2ddaSStefan Hajnoczi { 3557fc3959e4SFam Zheng int ret; 3558fc3959e4SFam Zheng 355994d6ff21SPaolo Bonzini trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); 3560f08f2ddaSStefan Hajnoczi 3561d32f35cbSPeter Lieven if (!(bs->open_flags & BDRV_O_UNMAP)) { 3562d32f35cbSPeter Lieven flags &= ~BDRV_REQ_MAY_UNMAP; 3563d32f35cbSPeter Lieven } 3564fc3959e4SFam Zheng if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS, 3565fc3959e4SFam Zheng nb_sectors << BDRV_SECTOR_BITS)) { 3566fc3959e4SFam Zheng ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, 3567aa7bfbffSPeter Lieven BDRV_REQ_ZERO_WRITE | flags); 3568fc3959e4SFam Zheng } else { 3569fc3959e4SFam Zheng uint8_t *buf; 3570fc3959e4SFam Zheng QEMUIOVector local_qiov; 3571fc3959e4SFam Zheng size_t bytes = nb_sectors << BDRV_SECTOR_BITS; 3572fc3959e4SFam Zheng 3573fc3959e4SFam Zheng buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes); 3574fc3959e4SFam Zheng memset(buf, 0, bytes); 3575fc3959e4SFam Zheng qemu_iovec_init(&local_qiov, 1); 3576fc3959e4SFam Zheng qemu_iovec_add(&local_qiov, buf, bytes); 3577fc3959e4SFam Zheng 3578fc3959e4SFam Zheng ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov, 3579fc3959e4SFam Zheng BDRV_REQ_ZERO_WRITE | flags); 3580fc3959e4SFam Zheng qemu_vfree(buf); 3581fc3959e4SFam Zheng } 3582fc3959e4SFam Zheng return ret; 3583c5fbe571SStefan Hajnoczi } 3584c5fbe571SStefan Hajnoczi 358583f64091Sbellard /** 358683f64091Sbellard * Truncate file to 'offset' bytes (needed only for file protocols) 358783f64091Sbellard */ 358883f64091Sbellard int bdrv_truncate(BlockDriverState *bs, int64_t offset) 358983f64091Sbellard { 359083f64091Sbellard BlockDriver *drv = bs->drv; 359151762288SStefan Hajnoczi int ret; 359283f64091Sbellard if (!drv) 359319cb3738Sbellard return -ENOMEDIUM; 359483f64091Sbellard if (!drv->bdrv_truncate) 359583f64091Sbellard return -ENOTSUP; 359659f2689dSNaphtali Sprei if (bs->read_only) 359759f2689dSNaphtali Sprei return -EACCES; 35989c75e168SJeff Cody 359951762288SStefan Hajnoczi ret = drv->bdrv_truncate(bs, offset); 360051762288SStefan Hajnoczi if (ret == 0) { 360151762288SStefan Hajnoczi ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 3602a7f53e26SMarkus Armbruster if (bs->blk) { 3603a7f53e26SMarkus Armbruster blk_dev_resize_cb(bs->blk); 3604a7f53e26SMarkus Armbruster } 360551762288SStefan Hajnoczi } 360651762288SStefan Hajnoczi return ret; 360783f64091Sbellard } 360883f64091Sbellard 360983f64091Sbellard /** 36104a1d5e1fSFam Zheng * Length of a allocated file in bytes. Sparse files are counted by actual 36114a1d5e1fSFam Zheng * allocated space. Return < 0 if error or unknown. 36124a1d5e1fSFam Zheng */ 36134a1d5e1fSFam Zheng int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 36144a1d5e1fSFam Zheng { 36154a1d5e1fSFam Zheng BlockDriver *drv = bs->drv; 36164a1d5e1fSFam Zheng if (!drv) { 36174a1d5e1fSFam Zheng return -ENOMEDIUM; 36184a1d5e1fSFam Zheng } 36194a1d5e1fSFam Zheng if (drv->bdrv_get_allocated_file_size) { 36204a1d5e1fSFam Zheng return drv->bdrv_get_allocated_file_size(bs); 36214a1d5e1fSFam Zheng } 36224a1d5e1fSFam Zheng if (bs->file) { 36234a1d5e1fSFam Zheng return bdrv_get_allocated_file_size(bs->file); 36244a1d5e1fSFam Zheng } 36254a1d5e1fSFam Zheng return -ENOTSUP; 36264a1d5e1fSFam Zheng } 36274a1d5e1fSFam Zheng 36284a1d5e1fSFam Zheng /** 362965a9bb25SMarkus Armbruster * Return number of sectors on success, -errno on error. 363083f64091Sbellard */ 363165a9bb25SMarkus Armbruster int64_t bdrv_nb_sectors(BlockDriverState *bs) 363283f64091Sbellard { 363383f64091Sbellard BlockDriver *drv = bs->drv; 363465a9bb25SMarkus Armbruster 363583f64091Sbellard if (!drv) 363619cb3738Sbellard return -ENOMEDIUM; 363751762288SStefan Hajnoczi 3638b94a2610SKevin Wolf if (drv->has_variable_length) { 3639b94a2610SKevin Wolf int ret = refresh_total_sectors(bs, bs->total_sectors); 3640b94a2610SKevin Wolf if (ret < 0) { 3641b94a2610SKevin Wolf return ret; 3642fc01f7e7Sbellard } 364346a4e4e6SStefan Hajnoczi } 364465a9bb25SMarkus Armbruster return bs->total_sectors; 364565a9bb25SMarkus Armbruster } 364665a9bb25SMarkus Armbruster 364765a9bb25SMarkus Armbruster /** 364865a9bb25SMarkus Armbruster * Return length in bytes on success, -errno on error. 364965a9bb25SMarkus Armbruster * The length is always a multiple of BDRV_SECTOR_SIZE. 365065a9bb25SMarkus Armbruster */ 365165a9bb25SMarkus Armbruster int64_t bdrv_getlength(BlockDriverState *bs) 365265a9bb25SMarkus Armbruster { 365365a9bb25SMarkus Armbruster int64_t ret = bdrv_nb_sectors(bs); 365465a9bb25SMarkus Armbruster 365565a9bb25SMarkus Armbruster return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; 365646a4e4e6SStefan Hajnoczi } 3657fc01f7e7Sbellard 365819cb3738Sbellard /* return 0 as number of sectors if no device present or error */ 365996b8f136Sths void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 3660fc01f7e7Sbellard { 366165a9bb25SMarkus Armbruster int64_t nb_sectors = bdrv_nb_sectors(bs); 366265a9bb25SMarkus Armbruster 366365a9bb25SMarkus Armbruster *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 3664fc01f7e7Sbellard } 3665cf98951bSbellard 3666ff06f5f3SPaolo Bonzini void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, 3667ff06f5f3SPaolo Bonzini BlockdevOnError on_write_error) 3668abd7f68dSMarkus Armbruster { 3669abd7f68dSMarkus Armbruster bs->on_read_error = on_read_error; 3670abd7f68dSMarkus Armbruster bs->on_write_error = on_write_error; 3671abd7f68dSMarkus Armbruster } 3672abd7f68dSMarkus Armbruster 36731ceee0d5SPaolo Bonzini BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) 3674abd7f68dSMarkus Armbruster { 3675abd7f68dSMarkus Armbruster return is_read ? bs->on_read_error : bs->on_write_error; 3676abd7f68dSMarkus Armbruster } 3677abd7f68dSMarkus Armbruster 36783e1caa5fSPaolo Bonzini BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) 36793e1caa5fSPaolo Bonzini { 36803e1caa5fSPaolo Bonzini BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; 36813e1caa5fSPaolo Bonzini 36823e1caa5fSPaolo Bonzini switch (on_err) { 36833e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_ENOSPC: 3684a589569fSWenchao Xia return (error == ENOSPC) ? 3685a589569fSWenchao Xia BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 36863e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_STOP: 3687a589569fSWenchao Xia return BLOCK_ERROR_ACTION_STOP; 36883e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_REPORT: 3689a589569fSWenchao Xia return BLOCK_ERROR_ACTION_REPORT; 36903e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_IGNORE: 3691a589569fSWenchao Xia return BLOCK_ERROR_ACTION_IGNORE; 36923e1caa5fSPaolo Bonzini default: 36933e1caa5fSPaolo Bonzini abort(); 36943e1caa5fSPaolo Bonzini } 36953e1caa5fSPaolo Bonzini } 36963e1caa5fSPaolo Bonzini 3697c7c2ff0cSLuiz Capitulino static void send_qmp_error_event(BlockDriverState *bs, 3698c7c2ff0cSLuiz Capitulino BlockErrorAction action, 3699c7c2ff0cSLuiz Capitulino bool is_read, int error) 3700c7c2ff0cSLuiz Capitulino { 3701573742a5SPeter Maydell IoOperationType optype; 3702c7c2ff0cSLuiz Capitulino 3703573742a5SPeter Maydell optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; 3704573742a5SPeter Maydell qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action, 3705c7c2ff0cSLuiz Capitulino bdrv_iostatus_is_enabled(bs), 3706624ff573SLuiz Capitulino error == ENOSPC, strerror(error), 3707624ff573SLuiz Capitulino &error_abort); 3708c7c2ff0cSLuiz Capitulino } 3709c7c2ff0cSLuiz Capitulino 37103e1caa5fSPaolo Bonzini /* This is done by device models because, while the block layer knows 37113e1caa5fSPaolo Bonzini * about the error, it does not know whether an operation comes from 37123e1caa5fSPaolo Bonzini * the device or the block layer (from a job, for example). 37133e1caa5fSPaolo Bonzini */ 37143e1caa5fSPaolo Bonzini void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, 37153e1caa5fSPaolo Bonzini bool is_read, int error) 37163e1caa5fSPaolo Bonzini { 37173e1caa5fSPaolo Bonzini assert(error >= 0); 37182bd3bce8SPaolo Bonzini 3719a589569fSWenchao Xia if (action == BLOCK_ERROR_ACTION_STOP) { 37202bd3bce8SPaolo Bonzini /* First set the iostatus, so that "info block" returns an iostatus 37212bd3bce8SPaolo Bonzini * that matches the events raised so far (an additional error iostatus 37222bd3bce8SPaolo Bonzini * is fine, but not a lost one). 37232bd3bce8SPaolo Bonzini */ 37243e1caa5fSPaolo Bonzini bdrv_iostatus_set_err(bs, error); 37252bd3bce8SPaolo Bonzini 37262bd3bce8SPaolo Bonzini /* Then raise the request to stop the VM and the event. 37272bd3bce8SPaolo Bonzini * qemu_system_vmstop_request_prepare has two effects. First, 37282bd3bce8SPaolo Bonzini * it ensures that the STOP event always comes after the 37292bd3bce8SPaolo Bonzini * BLOCK_IO_ERROR event. Second, it ensures that even if management 37302bd3bce8SPaolo Bonzini * can observe the STOP event and do a "cont" before the STOP 37312bd3bce8SPaolo Bonzini * event is issued, the VM will not stop. In this case, vm_start() 37322bd3bce8SPaolo Bonzini * also ensures that the STOP/RESUME pair of events is emitted. 37332bd3bce8SPaolo Bonzini */ 37342bd3bce8SPaolo Bonzini qemu_system_vmstop_request_prepare(); 3735c7c2ff0cSLuiz Capitulino send_qmp_error_event(bs, action, is_read, error); 37362bd3bce8SPaolo Bonzini qemu_system_vmstop_request(RUN_STATE_IO_ERROR); 37372bd3bce8SPaolo Bonzini } else { 3738c7c2ff0cSLuiz Capitulino send_qmp_error_event(bs, action, is_read, error); 37393e1caa5fSPaolo Bonzini } 37403e1caa5fSPaolo Bonzini } 37413e1caa5fSPaolo Bonzini 3742b338082bSbellard int bdrv_is_read_only(BlockDriverState *bs) 3743b338082bSbellard { 3744b338082bSbellard return bs->read_only; 3745b338082bSbellard } 3746b338082bSbellard 3747985a03b0Sths int bdrv_is_sg(BlockDriverState *bs) 3748985a03b0Sths { 3749985a03b0Sths return bs->sg; 3750985a03b0Sths } 3751985a03b0Sths 3752e900a7b7SChristoph Hellwig int bdrv_enable_write_cache(BlockDriverState *bs) 3753e900a7b7SChristoph Hellwig { 3754e900a7b7SChristoph Hellwig return bs->enable_write_cache; 3755e900a7b7SChristoph Hellwig } 3756e900a7b7SChristoph Hellwig 3757425b0148SPaolo Bonzini void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce) 3758425b0148SPaolo Bonzini { 3759425b0148SPaolo Bonzini bs->enable_write_cache = wce; 376055b110f2SJeff Cody 376155b110f2SJeff Cody /* so a reopen() will preserve wce */ 376255b110f2SJeff Cody if (wce) { 376355b110f2SJeff Cody bs->open_flags |= BDRV_O_CACHE_WB; 376455b110f2SJeff Cody } else { 376555b110f2SJeff Cody bs->open_flags &= ~BDRV_O_CACHE_WB; 376655b110f2SJeff Cody } 3767425b0148SPaolo Bonzini } 3768425b0148SPaolo Bonzini 3769ea2384d3Sbellard int bdrv_is_encrypted(BlockDriverState *bs) 3770ea2384d3Sbellard { 3771ea2384d3Sbellard if (bs->backing_hd && bs->backing_hd->encrypted) 3772ea2384d3Sbellard return 1; 3773ea2384d3Sbellard return bs->encrypted; 3774ea2384d3Sbellard } 3775ea2384d3Sbellard 3776c0f4ce77Saliguori int bdrv_key_required(BlockDriverState *bs) 3777c0f4ce77Saliguori { 3778c0f4ce77Saliguori BlockDriverState *backing_hd = bs->backing_hd; 3779c0f4ce77Saliguori 3780c0f4ce77Saliguori if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 3781c0f4ce77Saliguori return 1; 3782c0f4ce77Saliguori return (bs->encrypted && !bs->valid_key); 3783c0f4ce77Saliguori } 3784c0f4ce77Saliguori 3785ea2384d3Sbellard int bdrv_set_key(BlockDriverState *bs, const char *key) 3786ea2384d3Sbellard { 3787ea2384d3Sbellard int ret; 3788ea2384d3Sbellard if (bs->backing_hd && bs->backing_hd->encrypted) { 3789ea2384d3Sbellard ret = bdrv_set_key(bs->backing_hd, key); 3790ea2384d3Sbellard if (ret < 0) 3791ea2384d3Sbellard return ret; 3792ea2384d3Sbellard if (!bs->encrypted) 3793ea2384d3Sbellard return 0; 3794ea2384d3Sbellard } 3795fd04a2aeSShahar Havivi if (!bs->encrypted) { 3796fd04a2aeSShahar Havivi return -EINVAL; 3797fd04a2aeSShahar Havivi } else if (!bs->drv || !bs->drv->bdrv_set_key) { 3798fd04a2aeSShahar Havivi return -ENOMEDIUM; 3799fd04a2aeSShahar Havivi } 3800c0f4ce77Saliguori ret = bs->drv->bdrv_set_key(bs, key); 3801bb5fc20fSaliguori if (ret < 0) { 3802bb5fc20fSaliguori bs->valid_key = 0; 3803bb5fc20fSaliguori } else if (!bs->valid_key) { 3804bb5fc20fSaliguori bs->valid_key = 1; 3805a7f53e26SMarkus Armbruster if (bs->blk) { 3806bb5fc20fSaliguori /* call the change callback now, we skipped it on open */ 3807a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, true); 3808a7f53e26SMarkus Armbruster } 3809bb5fc20fSaliguori } 3810c0f4ce77Saliguori return ret; 3811ea2384d3Sbellard } 3812ea2384d3Sbellard 38134d2855a3SMarkus Armbruster /* 38144d2855a3SMarkus Armbruster * Provide an encryption key for @bs. 38154d2855a3SMarkus Armbruster * If @key is non-null: 38164d2855a3SMarkus Armbruster * If @bs is not encrypted, fail. 38174d2855a3SMarkus Armbruster * Else if the key is invalid, fail. 38184d2855a3SMarkus Armbruster * Else set @bs's key to @key, replacing the existing key, if any. 38194d2855a3SMarkus Armbruster * If @key is null: 38204d2855a3SMarkus Armbruster * If @bs is encrypted and still lacks a key, fail. 38214d2855a3SMarkus Armbruster * Else do nothing. 38224d2855a3SMarkus Armbruster * On failure, store an error object through @errp if non-null. 38234d2855a3SMarkus Armbruster */ 38244d2855a3SMarkus Armbruster void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) 38254d2855a3SMarkus Armbruster { 38264d2855a3SMarkus Armbruster if (key) { 38274d2855a3SMarkus Armbruster if (!bdrv_is_encrypted(bs)) { 382881e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is not encrypted", 382981e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs)); 38304d2855a3SMarkus Armbruster } else if (bdrv_set_key(bs, key) < 0) { 38314d2855a3SMarkus Armbruster error_set(errp, QERR_INVALID_PASSWORD); 38324d2855a3SMarkus Armbruster } 38334d2855a3SMarkus Armbruster } else { 38344d2855a3SMarkus Armbruster if (bdrv_key_required(bs)) { 3835b1ca6391SMarkus Armbruster error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED, 3836b1ca6391SMarkus Armbruster "'%s' (%s) is encrypted", 383781e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs), 38384d2855a3SMarkus Armbruster bdrv_get_encrypted_filename(bs)); 38394d2855a3SMarkus Armbruster } 38404d2855a3SMarkus Armbruster } 38414d2855a3SMarkus Armbruster } 38424d2855a3SMarkus Armbruster 3843f8d6bba1SMarkus Armbruster const char *bdrv_get_format_name(BlockDriverState *bs) 3844ea2384d3Sbellard { 3845f8d6bba1SMarkus Armbruster return bs->drv ? bs->drv->format_name : NULL; 3846ea2384d3Sbellard } 3847ea2384d3Sbellard 3848ada42401SStefan Hajnoczi static int qsort_strcmp(const void *a, const void *b) 3849ada42401SStefan Hajnoczi { 3850ada42401SStefan Hajnoczi return strcmp(a, b); 3851ada42401SStefan Hajnoczi } 3852ada42401SStefan Hajnoczi 3853ea2384d3Sbellard void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 3854ea2384d3Sbellard void *opaque) 3855ea2384d3Sbellard { 3856ea2384d3Sbellard BlockDriver *drv; 3857e855e4fbSJeff Cody int count = 0; 3858ada42401SStefan Hajnoczi int i; 3859e855e4fbSJeff Cody const char **formats = NULL; 3860ea2384d3Sbellard 38618a22f02aSStefan Hajnoczi QLIST_FOREACH(drv, &bdrv_drivers, list) { 3862e855e4fbSJeff Cody if (drv->format_name) { 3863e855e4fbSJeff Cody bool found = false; 3864e855e4fbSJeff Cody int i = count; 3865e855e4fbSJeff Cody while (formats && i && !found) { 3866e855e4fbSJeff Cody found = !strcmp(formats[--i], drv->format_name); 3867e855e4fbSJeff Cody } 3868e855e4fbSJeff Cody 3869e855e4fbSJeff Cody if (!found) { 38705839e53bSMarkus Armbruster formats = g_renew(const char *, formats, count + 1); 3871e855e4fbSJeff Cody formats[count++] = drv->format_name; 3872ea2384d3Sbellard } 3873ea2384d3Sbellard } 3874e855e4fbSJeff Cody } 3875ada42401SStefan Hajnoczi 3876ada42401SStefan Hajnoczi qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 3877ada42401SStefan Hajnoczi 3878ada42401SStefan Hajnoczi for (i = 0; i < count; i++) { 3879ada42401SStefan Hajnoczi it(opaque, formats[i]); 3880ada42401SStefan Hajnoczi } 3881ada42401SStefan Hajnoczi 3882e855e4fbSJeff Cody g_free(formats); 3883e855e4fbSJeff Cody } 3884ea2384d3Sbellard 3885dc364f4cSBenoît Canet /* This function is to find a node in the bs graph */ 3886dc364f4cSBenoît Canet BlockDriverState *bdrv_find_node(const char *node_name) 3887dc364f4cSBenoît Canet { 3888dc364f4cSBenoît Canet BlockDriverState *bs; 3889dc364f4cSBenoît Canet 3890dc364f4cSBenoît Canet assert(node_name); 3891dc364f4cSBenoît Canet 3892dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 3893dc364f4cSBenoît Canet if (!strcmp(node_name, bs->node_name)) { 3894dc364f4cSBenoît Canet return bs; 3895dc364f4cSBenoît Canet } 3896dc364f4cSBenoît Canet } 3897dc364f4cSBenoît Canet return NULL; 3898dc364f4cSBenoît Canet } 3899dc364f4cSBenoît Canet 3900c13163fbSBenoît Canet /* Put this QMP function here so it can access the static graph_bdrv_states. */ 3901d5a8ee60SAlberto Garcia BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp) 3902c13163fbSBenoît Canet { 3903c13163fbSBenoît Canet BlockDeviceInfoList *list, *entry; 3904c13163fbSBenoît Canet BlockDriverState *bs; 3905c13163fbSBenoît Canet 3906c13163fbSBenoît Canet list = NULL; 3907c13163fbSBenoît Canet QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 3908d5a8ee60SAlberto Garcia BlockDeviceInfo *info = bdrv_block_device_info(bs, errp); 3909d5a8ee60SAlberto Garcia if (!info) { 3910d5a8ee60SAlberto Garcia qapi_free_BlockDeviceInfoList(list); 3911d5a8ee60SAlberto Garcia return NULL; 3912d5a8ee60SAlberto Garcia } 3913c13163fbSBenoît Canet entry = g_malloc0(sizeof(*entry)); 3914d5a8ee60SAlberto Garcia entry->value = info; 3915c13163fbSBenoît Canet entry->next = list; 3916c13163fbSBenoît Canet list = entry; 3917c13163fbSBenoît Canet } 3918c13163fbSBenoît Canet 3919c13163fbSBenoît Canet return list; 3920c13163fbSBenoît Canet } 3921c13163fbSBenoît Canet 392212d3ba82SBenoît Canet BlockDriverState *bdrv_lookup_bs(const char *device, 392312d3ba82SBenoît Canet const char *node_name, 392412d3ba82SBenoît Canet Error **errp) 392512d3ba82SBenoît Canet { 39267f06d47eSMarkus Armbruster BlockBackend *blk; 39277f06d47eSMarkus Armbruster BlockDriverState *bs; 392812d3ba82SBenoît Canet 392912d3ba82SBenoît Canet if (device) { 39307f06d47eSMarkus Armbruster blk = blk_by_name(device); 393112d3ba82SBenoît Canet 39327f06d47eSMarkus Armbruster if (blk) { 39337f06d47eSMarkus Armbruster return blk_bs(blk); 393412d3ba82SBenoît Canet } 3935dd67fa50SBenoît Canet } 393612d3ba82SBenoît Canet 3937dd67fa50SBenoît Canet if (node_name) { 393812d3ba82SBenoît Canet bs = bdrv_find_node(node_name); 393912d3ba82SBenoît Canet 3940dd67fa50SBenoît Canet if (bs) { 3941dd67fa50SBenoît Canet return bs; 3942dd67fa50SBenoît Canet } 394312d3ba82SBenoît Canet } 394412d3ba82SBenoît Canet 3945dd67fa50SBenoît Canet error_setg(errp, "Cannot find device=%s nor node_name=%s", 3946dd67fa50SBenoît Canet device ? device : "", 3947dd67fa50SBenoît Canet node_name ? node_name : ""); 3948dd67fa50SBenoît Canet return NULL; 394912d3ba82SBenoît Canet } 395012d3ba82SBenoît Canet 39515a6684d2SJeff Cody /* If 'base' is in the same chain as 'top', return true. Otherwise, 39525a6684d2SJeff Cody * return false. If either argument is NULL, return false. */ 39535a6684d2SJeff Cody bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 39545a6684d2SJeff Cody { 39555a6684d2SJeff Cody while (top && top != base) { 39565a6684d2SJeff Cody top = top->backing_hd; 39575a6684d2SJeff Cody } 39585a6684d2SJeff Cody 39595a6684d2SJeff Cody return top != NULL; 39605a6684d2SJeff Cody } 39615a6684d2SJeff Cody 396204df765aSFam Zheng BlockDriverState *bdrv_next_node(BlockDriverState *bs) 396304df765aSFam Zheng { 396404df765aSFam Zheng if (!bs) { 396504df765aSFam Zheng return QTAILQ_FIRST(&graph_bdrv_states); 396604df765aSFam Zheng } 396704df765aSFam Zheng return QTAILQ_NEXT(bs, node_list); 396804df765aSFam Zheng } 396904df765aSFam Zheng 39702f399b0aSMarkus Armbruster BlockDriverState *bdrv_next(BlockDriverState *bs) 39712f399b0aSMarkus Armbruster { 39722f399b0aSMarkus Armbruster if (!bs) { 39732f399b0aSMarkus Armbruster return QTAILQ_FIRST(&bdrv_states); 39742f399b0aSMarkus Armbruster } 3975dc364f4cSBenoît Canet return QTAILQ_NEXT(bs, device_list); 39762f399b0aSMarkus Armbruster } 39772f399b0aSMarkus Armbruster 397820a9e77dSFam Zheng const char *bdrv_get_node_name(const BlockDriverState *bs) 397920a9e77dSFam Zheng { 398020a9e77dSFam Zheng return bs->node_name; 398120a9e77dSFam Zheng } 398220a9e77dSFam Zheng 39837f06d47eSMarkus Armbruster /* TODO check what callers really want: bs->node_name or blk_name() */ 3984bfb197e0SMarkus Armbruster const char *bdrv_get_device_name(const BlockDriverState *bs) 3985ea2384d3Sbellard { 3986bfb197e0SMarkus Armbruster return bs->blk ? blk_name(bs->blk) : ""; 3987ea2384d3Sbellard } 3988ea2384d3Sbellard 39899b2aa84fSAlberto Garcia /* This can be used to identify nodes that might not have a device 39909b2aa84fSAlberto Garcia * name associated. Since node and device names live in the same 39919b2aa84fSAlberto Garcia * namespace, the result is unambiguous. The exception is if both are 39929b2aa84fSAlberto Garcia * absent, then this returns an empty (non-null) string. */ 39939b2aa84fSAlberto Garcia const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 39949b2aa84fSAlberto Garcia { 39959b2aa84fSAlberto Garcia return bs->blk ? blk_name(bs->blk) : bs->node_name; 39969b2aa84fSAlberto Garcia } 39979b2aa84fSAlberto Garcia 3998c8433287SMarkus Armbruster int bdrv_get_flags(BlockDriverState *bs) 3999c8433287SMarkus Armbruster { 4000c8433287SMarkus Armbruster return bs->open_flags; 4001c8433287SMarkus Armbruster } 4002c8433287SMarkus Armbruster 4003f0f0fdfeSKevin Wolf int bdrv_flush_all(void) 4004c6ca28d6Saliguori { 4005c6ca28d6Saliguori BlockDriverState *bs; 4006f0f0fdfeSKevin Wolf int result = 0; 4007c6ca28d6Saliguori 4008dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 4009ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 4010ed78cda3SStefan Hajnoczi int ret; 4011ed78cda3SStefan Hajnoczi 4012ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 4013ed78cda3SStefan Hajnoczi ret = bdrv_flush(bs); 4014f0f0fdfeSKevin Wolf if (ret < 0 && !result) { 4015f0f0fdfeSKevin Wolf result = ret; 4016c6ca28d6Saliguori } 4017ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 40181b7bdbc1SStefan Hajnoczi } 4019c6ca28d6Saliguori 4020f0f0fdfeSKevin Wolf return result; 4021f0f0fdfeSKevin Wolf } 4022f0f0fdfeSKevin Wolf 40233ac21627SPeter Lieven int bdrv_has_zero_init_1(BlockDriverState *bs) 40243ac21627SPeter Lieven { 40253ac21627SPeter Lieven return 1; 40263ac21627SPeter Lieven } 40273ac21627SPeter Lieven 4028f2feebbdSKevin Wolf int bdrv_has_zero_init(BlockDriverState *bs) 4029f2feebbdSKevin Wolf { 4030f2feebbdSKevin Wolf assert(bs->drv); 4031f2feebbdSKevin Wolf 403211212d8fSPaolo Bonzini /* If BS is a copy on write image, it is initialized to 403311212d8fSPaolo Bonzini the contents of the base image, which may not be zeroes. */ 403411212d8fSPaolo Bonzini if (bs->backing_hd) { 403511212d8fSPaolo Bonzini return 0; 403611212d8fSPaolo Bonzini } 4037336c1c12SKevin Wolf if (bs->drv->bdrv_has_zero_init) { 4038336c1c12SKevin Wolf return bs->drv->bdrv_has_zero_init(bs); 4039f2feebbdSKevin Wolf } 4040f2feebbdSKevin Wolf 40413ac21627SPeter Lieven /* safe default */ 40423ac21627SPeter Lieven return 0; 4043f2feebbdSKevin Wolf } 4044f2feebbdSKevin Wolf 40454ce78691SPeter Lieven bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) 40464ce78691SPeter Lieven { 40474ce78691SPeter Lieven BlockDriverInfo bdi; 40484ce78691SPeter Lieven 40494ce78691SPeter Lieven if (bs->backing_hd) { 40504ce78691SPeter Lieven return false; 40514ce78691SPeter Lieven } 40524ce78691SPeter Lieven 40534ce78691SPeter Lieven if (bdrv_get_info(bs, &bdi) == 0) { 40544ce78691SPeter Lieven return bdi.unallocated_blocks_are_zero; 40554ce78691SPeter Lieven } 40564ce78691SPeter Lieven 40574ce78691SPeter Lieven return false; 40584ce78691SPeter Lieven } 40594ce78691SPeter Lieven 40604ce78691SPeter Lieven bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 40614ce78691SPeter Lieven { 40624ce78691SPeter Lieven BlockDriverInfo bdi; 40634ce78691SPeter Lieven 40644ce78691SPeter Lieven if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) { 40654ce78691SPeter Lieven return false; 40664ce78691SPeter Lieven } 40674ce78691SPeter Lieven 40684ce78691SPeter Lieven if (bdrv_get_info(bs, &bdi) == 0) { 40694ce78691SPeter Lieven return bdi.can_write_zeroes_with_unmap; 40704ce78691SPeter Lieven } 40714ce78691SPeter Lieven 40724ce78691SPeter Lieven return false; 40734ce78691SPeter Lieven } 40744ce78691SPeter Lieven 4075b6b8a333SPaolo Bonzini typedef struct BdrvCoGetBlockStatusData { 4076376ae3f1SStefan Hajnoczi BlockDriverState *bs; 4077b35b2bbaSMiroslav Rezanina BlockDriverState *base; 4078376ae3f1SStefan Hajnoczi int64_t sector_num; 4079376ae3f1SStefan Hajnoczi int nb_sectors; 4080376ae3f1SStefan Hajnoczi int *pnum; 4081b6b8a333SPaolo Bonzini int64_t ret; 4082376ae3f1SStefan Hajnoczi bool done; 4083b6b8a333SPaolo Bonzini } BdrvCoGetBlockStatusData; 4084376ae3f1SStefan Hajnoczi 4085f58c7b35Sths /* 4086705be728SFam Zheng * Returns the allocation status of the specified sectors. 4087705be728SFam Zheng * Drivers not implementing the functionality are assumed to not support 4088705be728SFam Zheng * backing files, hence all their sectors are reported as allocated. 4089f58c7b35Sths * 4090bd9533e3SStefan Hajnoczi * If 'sector_num' is beyond the end of the disk image the return value is 0 4091bd9533e3SStefan Hajnoczi * and 'pnum' is set to 0. 4092bd9533e3SStefan Hajnoczi * 4093f58c7b35Sths * 'pnum' is set to the number of sectors (including and immediately following 4094f58c7b35Sths * the specified sector) that are known to be in the same 4095f58c7b35Sths * allocated/unallocated state. 4096f58c7b35Sths * 4097bd9533e3SStefan Hajnoczi * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes 4098bd9533e3SStefan Hajnoczi * beyond the end of the disk image it will be clamped. 4099f58c7b35Sths */ 4100b6b8a333SPaolo Bonzini static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, 4101bdad13b9SPaolo Bonzini int64_t sector_num, 4102060f51c9SStefan Hajnoczi int nb_sectors, int *pnum) 4103f58c7b35Sths { 410430a7f2fcSMarkus Armbruster int64_t total_sectors; 4105f58c7b35Sths int64_t n; 41065daa74a6SPaolo Bonzini int64_t ret, ret2; 4107bd9533e3SStefan Hajnoczi 410830a7f2fcSMarkus Armbruster total_sectors = bdrv_nb_sectors(bs); 410930a7f2fcSMarkus Armbruster if (total_sectors < 0) { 411030a7f2fcSMarkus Armbruster return total_sectors; 4111617ccb46SPaolo Bonzini } 4112617ccb46SPaolo Bonzini 411330a7f2fcSMarkus Armbruster if (sector_num >= total_sectors) { 41146aebab14SStefan Hajnoczi *pnum = 0; 41156aebab14SStefan Hajnoczi return 0; 41166aebab14SStefan Hajnoczi } 4117bd9533e3SStefan Hajnoczi 411830a7f2fcSMarkus Armbruster n = total_sectors - sector_num; 4119bd9533e3SStefan Hajnoczi if (n < nb_sectors) { 4120bd9533e3SStefan Hajnoczi nb_sectors = n; 4121bd9533e3SStefan Hajnoczi } 4122bd9533e3SStefan Hajnoczi 4123b6b8a333SPaolo Bonzini if (!bs->drv->bdrv_co_get_block_status) { 4124bd9533e3SStefan Hajnoczi *pnum = nb_sectors; 4125e88ae226SKevin Wolf ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; 4126918e92d7SPaolo Bonzini if (bs->drv->protocol_name) { 4127918e92d7SPaolo Bonzini ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE); 4128918e92d7SPaolo Bonzini } 4129918e92d7SPaolo Bonzini return ret; 41306aebab14SStefan Hajnoczi } 41316aebab14SStefan Hajnoczi 4132415b5b01SPaolo Bonzini ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum); 4133415b5b01SPaolo Bonzini if (ret < 0) { 41343e0a233dSPeter Lieven *pnum = 0; 4135415b5b01SPaolo Bonzini return ret; 4136415b5b01SPaolo Bonzini } 4137415b5b01SPaolo Bonzini 413892bc50a5SPeter Lieven if (ret & BDRV_BLOCK_RAW) { 413992bc50a5SPeter Lieven assert(ret & BDRV_BLOCK_OFFSET_VALID); 414092bc50a5SPeter Lieven return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, 414192bc50a5SPeter Lieven *pnum, pnum); 414292bc50a5SPeter Lieven } 414392bc50a5SPeter Lieven 4144e88ae226SKevin Wolf if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { 4145e88ae226SKevin Wolf ret |= BDRV_BLOCK_ALLOCATED; 4146e88ae226SKevin Wolf } 4147e88ae226SKevin Wolf 4148c3d86884SPeter Lieven if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) { 4149c3d86884SPeter Lieven if (bdrv_unallocated_blocks_are_zero(bs)) { 4150415b5b01SPaolo Bonzini ret |= BDRV_BLOCK_ZERO; 41511f9db224SPeter Lieven } else if (bs->backing_hd) { 4152f0ad5712SPaolo Bonzini BlockDriverState *bs2 = bs->backing_hd; 415330a7f2fcSMarkus Armbruster int64_t nb_sectors2 = bdrv_nb_sectors(bs2); 415430a7f2fcSMarkus Armbruster if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) { 4155f0ad5712SPaolo Bonzini ret |= BDRV_BLOCK_ZERO; 4156f0ad5712SPaolo Bonzini } 4157f0ad5712SPaolo Bonzini } 4158415b5b01SPaolo Bonzini } 41595daa74a6SPaolo Bonzini 41605daa74a6SPaolo Bonzini if (bs->file && 41615daa74a6SPaolo Bonzini (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && 41625daa74a6SPaolo Bonzini (ret & BDRV_BLOCK_OFFSET_VALID)) { 416359c9a95fSMax Reitz int file_pnum; 416459c9a95fSMax Reitz 41655daa74a6SPaolo Bonzini ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, 416659c9a95fSMax Reitz *pnum, &file_pnum); 41675daa74a6SPaolo Bonzini if (ret2 >= 0) { 41685daa74a6SPaolo Bonzini /* Ignore errors. This is just providing extra information, it 41695daa74a6SPaolo Bonzini * is useful but not necessary. 41705daa74a6SPaolo Bonzini */ 417159c9a95fSMax Reitz if (!file_pnum) { 417259c9a95fSMax Reitz /* !file_pnum indicates an offset at or beyond the EOF; it is 417359c9a95fSMax Reitz * perfectly valid for the format block driver to point to such 417459c9a95fSMax Reitz * offsets, so catch it and mark everything as zero */ 417559c9a95fSMax Reitz ret |= BDRV_BLOCK_ZERO; 417659c9a95fSMax Reitz } else { 417759c9a95fSMax Reitz /* Limit request to the range reported by the protocol driver */ 417859c9a95fSMax Reitz *pnum = file_pnum; 41795daa74a6SPaolo Bonzini ret |= (ret2 & BDRV_BLOCK_ZERO); 41805daa74a6SPaolo Bonzini } 41815daa74a6SPaolo Bonzini } 418259c9a95fSMax Reitz } 41835daa74a6SPaolo Bonzini 4184415b5b01SPaolo Bonzini return ret; 4185060f51c9SStefan Hajnoczi } 4186060f51c9SStefan Hajnoczi 4187b6b8a333SPaolo Bonzini /* Coroutine wrapper for bdrv_get_block_status() */ 4188b6b8a333SPaolo Bonzini static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque) 4189060f51c9SStefan Hajnoczi { 4190b6b8a333SPaolo Bonzini BdrvCoGetBlockStatusData *data = opaque; 4191060f51c9SStefan Hajnoczi BlockDriverState *bs = data->bs; 4192060f51c9SStefan Hajnoczi 4193b6b8a333SPaolo Bonzini data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors, 4194060f51c9SStefan Hajnoczi data->pnum); 4195060f51c9SStefan Hajnoczi data->done = true; 4196060f51c9SStefan Hajnoczi } 4197060f51c9SStefan Hajnoczi 4198060f51c9SStefan Hajnoczi /* 4199b6b8a333SPaolo Bonzini * Synchronous wrapper around bdrv_co_get_block_status(). 4200060f51c9SStefan Hajnoczi * 4201b6b8a333SPaolo Bonzini * See bdrv_co_get_block_status() for details. 4202060f51c9SStefan Hajnoczi */ 4203b6b8a333SPaolo Bonzini int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, 4204b6b8a333SPaolo Bonzini int nb_sectors, int *pnum) 4205060f51c9SStefan Hajnoczi { 4206376ae3f1SStefan Hajnoczi Coroutine *co; 4207b6b8a333SPaolo Bonzini BdrvCoGetBlockStatusData data = { 4208376ae3f1SStefan Hajnoczi .bs = bs, 4209376ae3f1SStefan Hajnoczi .sector_num = sector_num, 4210376ae3f1SStefan Hajnoczi .nb_sectors = nb_sectors, 4211376ae3f1SStefan Hajnoczi .pnum = pnum, 4212376ae3f1SStefan Hajnoczi .done = false, 4213376ae3f1SStefan Hajnoczi }; 4214376ae3f1SStefan Hajnoczi 4215bdad13b9SPaolo Bonzini if (qemu_in_coroutine()) { 4216bdad13b9SPaolo Bonzini /* Fast-path if already in coroutine context */ 4217b6b8a333SPaolo Bonzini bdrv_get_block_status_co_entry(&data); 4218bdad13b9SPaolo Bonzini } else { 42192572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 42202572b37aSStefan Hajnoczi 4221b6b8a333SPaolo Bonzini co = qemu_coroutine_create(bdrv_get_block_status_co_entry); 4222376ae3f1SStefan Hajnoczi qemu_coroutine_enter(co, &data); 4223376ae3f1SStefan Hajnoczi while (!data.done) { 42242572b37aSStefan Hajnoczi aio_poll(aio_context, true); 4225376ae3f1SStefan Hajnoczi } 4226bdad13b9SPaolo Bonzini } 4227376ae3f1SStefan Hajnoczi return data.ret; 4228376ae3f1SStefan Hajnoczi } 4229f58c7b35Sths 4230b6b8a333SPaolo Bonzini int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, 4231b6b8a333SPaolo Bonzini int nb_sectors, int *pnum) 4232b6b8a333SPaolo Bonzini { 42334333bb71SPaolo Bonzini int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum); 42344333bb71SPaolo Bonzini if (ret < 0) { 42354333bb71SPaolo Bonzini return ret; 42364333bb71SPaolo Bonzini } 423701fb2705SKevin Wolf return !!(ret & BDRV_BLOCK_ALLOCATED); 4238b6b8a333SPaolo Bonzini } 4239b6b8a333SPaolo Bonzini 4240188a7bbfSPaolo Bonzini /* 4241188a7bbfSPaolo Bonzini * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] 4242188a7bbfSPaolo Bonzini * 4243188a7bbfSPaolo Bonzini * Return true if the given sector is allocated in any image between 4244188a7bbfSPaolo Bonzini * BASE and TOP (inclusive). BASE can be NULL to check if the given 4245188a7bbfSPaolo Bonzini * sector is allocated in any image of the chain. Return false otherwise. 4246188a7bbfSPaolo Bonzini * 4247188a7bbfSPaolo Bonzini * 'pnum' is set to the number of sectors (including and immediately following 4248188a7bbfSPaolo Bonzini * the specified sector) that are known to be in the same 4249188a7bbfSPaolo Bonzini * allocated/unallocated state. 4250188a7bbfSPaolo Bonzini * 4251188a7bbfSPaolo Bonzini */ 42524f578637SPaolo Bonzini int bdrv_is_allocated_above(BlockDriverState *top, 4253188a7bbfSPaolo Bonzini BlockDriverState *base, 4254188a7bbfSPaolo Bonzini int64_t sector_num, 4255188a7bbfSPaolo Bonzini int nb_sectors, int *pnum) 4256188a7bbfSPaolo Bonzini { 4257188a7bbfSPaolo Bonzini BlockDriverState *intermediate; 4258188a7bbfSPaolo Bonzini int ret, n = nb_sectors; 4259188a7bbfSPaolo Bonzini 4260188a7bbfSPaolo Bonzini intermediate = top; 4261188a7bbfSPaolo Bonzini while (intermediate && intermediate != base) { 4262188a7bbfSPaolo Bonzini int pnum_inter; 4263bdad13b9SPaolo Bonzini ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors, 4264188a7bbfSPaolo Bonzini &pnum_inter); 4265188a7bbfSPaolo Bonzini if (ret < 0) { 4266188a7bbfSPaolo Bonzini return ret; 4267188a7bbfSPaolo Bonzini } else if (ret) { 4268188a7bbfSPaolo Bonzini *pnum = pnum_inter; 4269188a7bbfSPaolo Bonzini return 1; 4270188a7bbfSPaolo Bonzini } 4271188a7bbfSPaolo Bonzini 4272188a7bbfSPaolo Bonzini /* 4273188a7bbfSPaolo Bonzini * [sector_num, nb_sectors] is unallocated on top but intermediate 4274188a7bbfSPaolo Bonzini * might have 4275188a7bbfSPaolo Bonzini * 4276188a7bbfSPaolo Bonzini * [sector_num+x, nr_sectors] allocated. 4277188a7bbfSPaolo Bonzini */ 427863ba17d3SVishvananda Ishaya if (n > pnum_inter && 427963ba17d3SVishvananda Ishaya (intermediate == top || 428063ba17d3SVishvananda Ishaya sector_num + pnum_inter < intermediate->total_sectors)) { 4281188a7bbfSPaolo Bonzini n = pnum_inter; 4282188a7bbfSPaolo Bonzini } 4283188a7bbfSPaolo Bonzini 4284188a7bbfSPaolo Bonzini intermediate = intermediate->backing_hd; 4285188a7bbfSPaolo Bonzini } 4286188a7bbfSPaolo Bonzini 4287188a7bbfSPaolo Bonzini *pnum = n; 4288188a7bbfSPaolo Bonzini return 0; 4289188a7bbfSPaolo Bonzini } 4290188a7bbfSPaolo Bonzini 4291045df330Saliguori const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 4292045df330Saliguori { 4293045df330Saliguori if (bs->backing_hd && bs->backing_hd->encrypted) 4294045df330Saliguori return bs->backing_file; 4295045df330Saliguori else if (bs->encrypted) 4296045df330Saliguori return bs->filename; 4297045df330Saliguori else 4298045df330Saliguori return NULL; 4299045df330Saliguori } 4300045df330Saliguori 430183f64091Sbellard void bdrv_get_backing_filename(BlockDriverState *bs, 430283f64091Sbellard char *filename, int filename_size) 430383f64091Sbellard { 430483f64091Sbellard pstrcpy(filename, filename_size, bs->backing_file); 430583f64091Sbellard } 430683f64091Sbellard 4307faea38e7Sbellard int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, 4308faea38e7Sbellard const uint8_t *buf, int nb_sectors) 4309faea38e7Sbellard { 4310faea38e7Sbellard BlockDriver *drv = bs->drv; 4311b9c64947SMax Reitz int ret; 4312b9c64947SMax Reitz 4313b9c64947SMax Reitz if (!drv) { 431419cb3738Sbellard return -ENOMEDIUM; 4315b9c64947SMax Reitz } 4316b9c64947SMax Reitz if (!drv->bdrv_write_compressed) { 4317faea38e7Sbellard return -ENOTSUP; 4318b9c64947SMax Reitz } 4319b9c64947SMax Reitz ret = bdrv_check_request(bs, sector_num, nb_sectors); 4320b9c64947SMax Reitz if (ret < 0) { 4321b9c64947SMax Reitz return ret; 4322b9c64947SMax Reitz } 43237cd1e32aSlirans@il.ibm.com 4324e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 43257cd1e32aSlirans@il.ibm.com 4326faea38e7Sbellard return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); 4327faea38e7Sbellard } 4328faea38e7Sbellard 4329faea38e7Sbellard int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 4330faea38e7Sbellard { 4331faea38e7Sbellard BlockDriver *drv = bs->drv; 4332faea38e7Sbellard if (!drv) 433319cb3738Sbellard return -ENOMEDIUM; 4334faea38e7Sbellard if (!drv->bdrv_get_info) 4335faea38e7Sbellard return -ENOTSUP; 4336faea38e7Sbellard memset(bdi, 0, sizeof(*bdi)); 4337faea38e7Sbellard return drv->bdrv_get_info(bs, bdi); 4338faea38e7Sbellard } 4339faea38e7Sbellard 4340eae041feSMax Reitz ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) 4341eae041feSMax Reitz { 4342eae041feSMax Reitz BlockDriver *drv = bs->drv; 4343eae041feSMax Reitz if (drv && drv->bdrv_get_specific_info) { 4344eae041feSMax Reitz return drv->bdrv_get_specific_info(bs); 4345eae041feSMax Reitz } 4346eae041feSMax Reitz return NULL; 4347eae041feSMax Reitz } 4348eae041feSMax Reitz 434945566e9cSChristoph Hellwig int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, 435045566e9cSChristoph Hellwig int64_t pos, int size) 4351178e08a5Saliguori { 4352cf8074b3SKevin Wolf QEMUIOVector qiov; 4353cf8074b3SKevin Wolf struct iovec iov = { 4354cf8074b3SKevin Wolf .iov_base = (void *) buf, 4355cf8074b3SKevin Wolf .iov_len = size, 4356cf8074b3SKevin Wolf }; 4357cf8074b3SKevin Wolf 4358cf8074b3SKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 4359cf8074b3SKevin Wolf return bdrv_writev_vmstate(bs, &qiov, pos); 4360cf8074b3SKevin Wolf } 4361cf8074b3SKevin Wolf 4362cf8074b3SKevin Wolf int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) 4363cf8074b3SKevin Wolf { 4364178e08a5Saliguori BlockDriver *drv = bs->drv; 4365cf8074b3SKevin Wolf 4366cf8074b3SKevin Wolf if (!drv) { 4367178e08a5Saliguori return -ENOMEDIUM; 4368cf8074b3SKevin Wolf } else if (drv->bdrv_save_vmstate) { 4369cf8074b3SKevin Wolf return drv->bdrv_save_vmstate(bs, qiov, pos); 4370cf8074b3SKevin Wolf } else if (bs->file) { 4371cf8074b3SKevin Wolf return bdrv_writev_vmstate(bs->file, qiov, pos); 4372cf8074b3SKevin Wolf } 4373cf8074b3SKevin Wolf 43747cdb1f6dSMORITA Kazutaka return -ENOTSUP; 4375178e08a5Saliguori } 4376178e08a5Saliguori 437745566e9cSChristoph Hellwig int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, 437845566e9cSChristoph Hellwig int64_t pos, int size) 4379178e08a5Saliguori { 4380178e08a5Saliguori BlockDriver *drv = bs->drv; 4381178e08a5Saliguori if (!drv) 4382178e08a5Saliguori return -ENOMEDIUM; 43837cdb1f6dSMORITA Kazutaka if (drv->bdrv_load_vmstate) 438445566e9cSChristoph Hellwig return drv->bdrv_load_vmstate(bs, buf, pos, size); 43857cdb1f6dSMORITA Kazutaka if (bs->file) 43867cdb1f6dSMORITA Kazutaka return bdrv_load_vmstate(bs->file, buf, pos, size); 43877cdb1f6dSMORITA Kazutaka return -ENOTSUP; 4388178e08a5Saliguori } 4389178e08a5Saliguori 43908b9b0cc2SKevin Wolf void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 43918b9b0cc2SKevin Wolf { 4392bf736fe3SKevin Wolf if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 43938b9b0cc2SKevin Wolf return; 43948b9b0cc2SKevin Wolf } 43958b9b0cc2SKevin Wolf 4396bf736fe3SKevin Wolf bs->drv->bdrv_debug_event(bs, event); 439741c695c7SKevin Wolf } 43988b9b0cc2SKevin Wolf 439941c695c7SKevin Wolf int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 440041c695c7SKevin Wolf const char *tag) 440141c695c7SKevin Wolf { 440241c695c7SKevin Wolf while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 440341c695c7SKevin Wolf bs = bs->file; 440441c695c7SKevin Wolf } 440541c695c7SKevin Wolf 440641c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 440741c695c7SKevin Wolf return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 440841c695c7SKevin Wolf } 440941c695c7SKevin Wolf 441041c695c7SKevin Wolf return -ENOTSUP; 441141c695c7SKevin Wolf } 441241c695c7SKevin Wolf 44134cc70e93SFam Zheng int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 44144cc70e93SFam Zheng { 44154cc70e93SFam Zheng while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { 44164cc70e93SFam Zheng bs = bs->file; 44174cc70e93SFam Zheng } 44184cc70e93SFam Zheng 44194cc70e93SFam Zheng if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { 44204cc70e93SFam Zheng return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 44214cc70e93SFam Zheng } 44224cc70e93SFam Zheng 44234cc70e93SFam Zheng return -ENOTSUP; 44244cc70e93SFam Zheng } 44254cc70e93SFam Zheng 442641c695c7SKevin Wolf int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 442741c695c7SKevin Wolf { 4428938789eaSMax Reitz while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 442941c695c7SKevin Wolf bs = bs->file; 443041c695c7SKevin Wolf } 443141c695c7SKevin Wolf 443241c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 443341c695c7SKevin Wolf return bs->drv->bdrv_debug_resume(bs, tag); 443441c695c7SKevin Wolf } 443541c695c7SKevin Wolf 443641c695c7SKevin Wolf return -ENOTSUP; 443741c695c7SKevin Wolf } 443841c695c7SKevin Wolf 443941c695c7SKevin Wolf bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 444041c695c7SKevin Wolf { 444141c695c7SKevin Wolf while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 444241c695c7SKevin Wolf bs = bs->file; 444341c695c7SKevin Wolf } 444441c695c7SKevin Wolf 444541c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 444641c695c7SKevin Wolf return bs->drv->bdrv_debug_is_suspended(bs, tag); 444741c695c7SKevin Wolf } 444841c695c7SKevin Wolf 444941c695c7SKevin Wolf return false; 44508b9b0cc2SKevin Wolf } 44518b9b0cc2SKevin Wolf 4452199630b6SBlue Swirl int bdrv_is_snapshot(BlockDriverState *bs) 4453199630b6SBlue Swirl { 4454199630b6SBlue Swirl return !!(bs->open_flags & BDRV_O_SNAPSHOT); 4455199630b6SBlue Swirl } 4456199630b6SBlue Swirl 4457b1b1d783SJeff Cody /* backing_file can either be relative, or absolute, or a protocol. If it is 4458b1b1d783SJeff Cody * relative, it must be relative to the chain. So, passing in bs->filename 4459b1b1d783SJeff Cody * from a BDS as backing_file should not be done, as that may be relative to 4460b1b1d783SJeff Cody * the CWD rather than the chain. */ 4461e8a6bb9cSMarcelo Tosatti BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 4462e8a6bb9cSMarcelo Tosatti const char *backing_file) 4463e8a6bb9cSMarcelo Tosatti { 4464b1b1d783SJeff Cody char *filename_full = NULL; 4465b1b1d783SJeff Cody char *backing_file_full = NULL; 4466b1b1d783SJeff Cody char *filename_tmp = NULL; 4467b1b1d783SJeff Cody int is_protocol = 0; 4468b1b1d783SJeff Cody BlockDriverState *curr_bs = NULL; 4469b1b1d783SJeff Cody BlockDriverState *retval = NULL; 4470b1b1d783SJeff Cody 4471b1b1d783SJeff Cody if (!bs || !bs->drv || !backing_file) { 4472e8a6bb9cSMarcelo Tosatti return NULL; 4473e8a6bb9cSMarcelo Tosatti } 4474e8a6bb9cSMarcelo Tosatti 4475b1b1d783SJeff Cody filename_full = g_malloc(PATH_MAX); 4476b1b1d783SJeff Cody backing_file_full = g_malloc(PATH_MAX); 4477b1b1d783SJeff Cody filename_tmp = g_malloc(PATH_MAX); 4478b1b1d783SJeff Cody 4479b1b1d783SJeff Cody is_protocol = path_has_protocol(backing_file); 4480b1b1d783SJeff Cody 4481b1b1d783SJeff Cody for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) { 4482b1b1d783SJeff Cody 4483b1b1d783SJeff Cody /* If either of the filename paths is actually a protocol, then 4484b1b1d783SJeff Cody * compare unmodified paths; otherwise make paths relative */ 4485b1b1d783SJeff Cody if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 4486b1b1d783SJeff Cody if (strcmp(backing_file, curr_bs->backing_file) == 0) { 4487b1b1d783SJeff Cody retval = curr_bs->backing_hd; 4488b1b1d783SJeff Cody break; 4489b1b1d783SJeff Cody } 4490e8a6bb9cSMarcelo Tosatti } else { 4491b1b1d783SJeff Cody /* If not an absolute filename path, make it relative to the current 4492b1b1d783SJeff Cody * image's filename path */ 4493b1b1d783SJeff Cody path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 4494b1b1d783SJeff Cody backing_file); 4495b1b1d783SJeff Cody 4496b1b1d783SJeff Cody /* We are going to compare absolute pathnames */ 4497b1b1d783SJeff Cody if (!realpath(filename_tmp, filename_full)) { 4498b1b1d783SJeff Cody continue; 4499b1b1d783SJeff Cody } 4500b1b1d783SJeff Cody 4501b1b1d783SJeff Cody /* We need to make sure the backing filename we are comparing against 4502b1b1d783SJeff Cody * is relative to the current image filename (or absolute) */ 4503b1b1d783SJeff Cody path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 4504b1b1d783SJeff Cody curr_bs->backing_file); 4505b1b1d783SJeff Cody 4506b1b1d783SJeff Cody if (!realpath(filename_tmp, backing_file_full)) { 4507b1b1d783SJeff Cody continue; 4508b1b1d783SJeff Cody } 4509b1b1d783SJeff Cody 4510b1b1d783SJeff Cody if (strcmp(backing_file_full, filename_full) == 0) { 4511b1b1d783SJeff Cody retval = curr_bs->backing_hd; 4512b1b1d783SJeff Cody break; 4513b1b1d783SJeff Cody } 4514e8a6bb9cSMarcelo Tosatti } 4515e8a6bb9cSMarcelo Tosatti } 4516e8a6bb9cSMarcelo Tosatti 4517b1b1d783SJeff Cody g_free(filename_full); 4518b1b1d783SJeff Cody g_free(backing_file_full); 4519b1b1d783SJeff Cody g_free(filename_tmp); 4520b1b1d783SJeff Cody return retval; 4521e8a6bb9cSMarcelo Tosatti } 4522e8a6bb9cSMarcelo Tosatti 4523f198fd1cSBenoît Canet int bdrv_get_backing_file_depth(BlockDriverState *bs) 4524f198fd1cSBenoît Canet { 4525f198fd1cSBenoît Canet if (!bs->drv) { 4526f198fd1cSBenoît Canet return 0; 4527f198fd1cSBenoît Canet } 4528f198fd1cSBenoît Canet 4529f198fd1cSBenoît Canet if (!bs->backing_hd) { 4530f198fd1cSBenoît Canet return 0; 4531f198fd1cSBenoît Canet } 4532f198fd1cSBenoît Canet 4533f198fd1cSBenoît Canet return 1 + bdrv_get_backing_file_depth(bs->backing_hd); 4534f198fd1cSBenoît Canet } 4535f198fd1cSBenoît Canet 4536ea2384d3Sbellard /**************************************************************/ 453783f64091Sbellard /* async I/Os */ 4538ea2384d3Sbellard 45397c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, 4540f141eafeSaliguori QEMUIOVector *qiov, int nb_sectors, 4541097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4542ea2384d3Sbellard { 4543bbf0a440SStefan Hajnoczi trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); 4544bbf0a440SStefan Hajnoczi 4545d20d9b7cSPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0, 45468c5873d6SStefan Hajnoczi cb, opaque, false); 454783f64091Sbellard } 454883f64091Sbellard 45497c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, 4550f141eafeSaliguori QEMUIOVector *qiov, int nb_sectors, 4551097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 45527674e7bfSbellard { 4553bbf0a440SStefan Hajnoczi trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); 4554bbf0a440SStefan Hajnoczi 4555d20d9b7cSPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0, 45568c5873d6SStefan Hajnoczi cb, opaque, true); 455783f64091Sbellard } 455883f64091Sbellard 45597c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, 4560d5ef94d4SPaolo Bonzini int64_t sector_num, int nb_sectors, BdrvRequestFlags flags, 4561097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4562d5ef94d4SPaolo Bonzini { 4563d5ef94d4SPaolo Bonzini trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque); 4564d5ef94d4SPaolo Bonzini 4565d5ef94d4SPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors, 4566d5ef94d4SPaolo Bonzini BDRV_REQ_ZERO_WRITE | flags, 4567d5ef94d4SPaolo Bonzini cb, opaque, true); 4568d5ef94d4SPaolo Bonzini } 4569d5ef94d4SPaolo Bonzini 457040b4f539SKevin Wolf 457140b4f539SKevin Wolf typedef struct MultiwriteCB { 457240b4f539SKevin Wolf int error; 457340b4f539SKevin Wolf int num_requests; 457440b4f539SKevin Wolf int num_callbacks; 457540b4f539SKevin Wolf struct { 4576097310b5SMarkus Armbruster BlockCompletionFunc *cb; 457740b4f539SKevin Wolf void *opaque; 457840b4f539SKevin Wolf QEMUIOVector *free_qiov; 457940b4f539SKevin Wolf } callbacks[]; 458040b4f539SKevin Wolf } MultiwriteCB; 458140b4f539SKevin Wolf 458240b4f539SKevin Wolf static void multiwrite_user_cb(MultiwriteCB *mcb) 458340b4f539SKevin Wolf { 458440b4f539SKevin Wolf int i; 458540b4f539SKevin Wolf 458640b4f539SKevin Wolf for (i = 0; i < mcb->num_callbacks; i++) { 458740b4f539SKevin Wolf mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); 45881e1ea48dSStefan Hajnoczi if (mcb->callbacks[i].free_qiov) { 45891e1ea48dSStefan Hajnoczi qemu_iovec_destroy(mcb->callbacks[i].free_qiov); 45901e1ea48dSStefan Hajnoczi } 45917267c094SAnthony Liguori g_free(mcb->callbacks[i].free_qiov); 459240b4f539SKevin Wolf } 459340b4f539SKevin Wolf } 459440b4f539SKevin Wolf 459540b4f539SKevin Wolf static void multiwrite_cb(void *opaque, int ret) 459640b4f539SKevin Wolf { 459740b4f539SKevin Wolf MultiwriteCB *mcb = opaque; 459840b4f539SKevin Wolf 45996d519a5fSStefan Hajnoczi trace_multiwrite_cb(mcb, ret); 46006d519a5fSStefan Hajnoczi 4601cb6d3ca0SKevin Wolf if (ret < 0 && !mcb->error) { 460240b4f539SKevin Wolf mcb->error = ret; 460340b4f539SKevin Wolf } 460440b4f539SKevin Wolf 460540b4f539SKevin Wolf mcb->num_requests--; 460640b4f539SKevin Wolf if (mcb->num_requests == 0) { 460740b4f539SKevin Wolf multiwrite_user_cb(mcb); 46087267c094SAnthony Liguori g_free(mcb); 460940b4f539SKevin Wolf } 461040b4f539SKevin Wolf } 461140b4f539SKevin Wolf 461240b4f539SKevin Wolf static int multiwrite_req_compare(const void *a, const void *b) 461340b4f539SKevin Wolf { 461477be4366SChristoph Hellwig const BlockRequest *req1 = a, *req2 = b; 461577be4366SChristoph Hellwig 461677be4366SChristoph Hellwig /* 461777be4366SChristoph Hellwig * Note that we can't simply subtract req2->sector from req1->sector 461877be4366SChristoph Hellwig * here as that could overflow the return value. 461977be4366SChristoph Hellwig */ 462077be4366SChristoph Hellwig if (req1->sector > req2->sector) { 462177be4366SChristoph Hellwig return 1; 462277be4366SChristoph Hellwig } else if (req1->sector < req2->sector) { 462377be4366SChristoph Hellwig return -1; 462477be4366SChristoph Hellwig } else { 462577be4366SChristoph Hellwig return 0; 462677be4366SChristoph Hellwig } 462740b4f539SKevin Wolf } 462840b4f539SKevin Wolf 462940b4f539SKevin Wolf /* 463040b4f539SKevin Wolf * Takes a bunch of requests and tries to merge them. Returns the number of 463140b4f539SKevin Wolf * requests that remain after merging. 463240b4f539SKevin Wolf */ 463340b4f539SKevin Wolf static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, 463440b4f539SKevin Wolf int num_reqs, MultiwriteCB *mcb) 463540b4f539SKevin Wolf { 463640b4f539SKevin Wolf int i, outidx; 463740b4f539SKevin Wolf 463840b4f539SKevin Wolf // Sort requests by start sector 463940b4f539SKevin Wolf qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); 464040b4f539SKevin Wolf 464140b4f539SKevin Wolf // Check if adjacent requests touch the same clusters. If so, combine them, 464240b4f539SKevin Wolf // filling up gaps with zero sectors. 464340b4f539SKevin Wolf outidx = 0; 464440b4f539SKevin Wolf for (i = 1; i < num_reqs; i++) { 464540b4f539SKevin Wolf int merge = 0; 464640b4f539SKevin Wolf int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; 464740b4f539SKevin Wolf 4648b6a127a1SPaolo Bonzini // Handle exactly sequential writes and overlapping writes. 464940b4f539SKevin Wolf if (reqs[i].sector <= oldreq_last) { 465040b4f539SKevin Wolf merge = 1; 465140b4f539SKevin Wolf } 465240b4f539SKevin Wolf 4653e2a305fbSChristoph Hellwig if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { 4654e2a305fbSChristoph Hellwig merge = 0; 4655e2a305fbSChristoph Hellwig } 4656e2a305fbSChristoph Hellwig 46576c5a42acSPeter Lieven if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors + 46586c5a42acSPeter Lieven reqs[i].nb_sectors > bs->bl.max_transfer_length) { 46596c5a42acSPeter Lieven merge = 0; 46606c5a42acSPeter Lieven } 46616c5a42acSPeter Lieven 466240b4f539SKevin Wolf if (merge) { 466340b4f539SKevin Wolf size_t size; 46647267c094SAnthony Liguori QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); 466540b4f539SKevin Wolf qemu_iovec_init(qiov, 466640b4f539SKevin Wolf reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); 466740b4f539SKevin Wolf 466840b4f539SKevin Wolf // Add the first request to the merged one. If the requests are 466940b4f539SKevin Wolf // overlapping, drop the last sectors of the first request. 467040b4f539SKevin Wolf size = (reqs[i].sector - reqs[outidx].sector) << 9; 46711b093c48SMichael Tokarev qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size); 467240b4f539SKevin Wolf 4673b6a127a1SPaolo Bonzini // We should need to add any zeros between the two requests 4674b6a127a1SPaolo Bonzini assert (reqs[i].sector <= oldreq_last); 467540b4f539SKevin Wolf 467640b4f539SKevin Wolf // Add the second request 46771b093c48SMichael Tokarev qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size); 467840b4f539SKevin Wolf 4679391827ebSStefan Hajnoczi // Add tail of first request, if necessary 4680391827ebSStefan Hajnoczi if (qiov->size < reqs[outidx].qiov->size) { 4681391827ebSStefan Hajnoczi qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size, 4682391827ebSStefan Hajnoczi reqs[outidx].qiov->size - qiov->size); 4683391827ebSStefan Hajnoczi } 4684391827ebSStefan Hajnoczi 4685cbf1dff2SKevin Wolf reqs[outidx].nb_sectors = qiov->size >> 9; 468640b4f539SKevin Wolf reqs[outidx].qiov = qiov; 468740b4f539SKevin Wolf 468840b4f539SKevin Wolf mcb->callbacks[i].free_qiov = reqs[outidx].qiov; 468940b4f539SKevin Wolf } else { 469040b4f539SKevin Wolf outidx++; 469140b4f539SKevin Wolf reqs[outidx].sector = reqs[i].sector; 469240b4f539SKevin Wolf reqs[outidx].nb_sectors = reqs[i].nb_sectors; 469340b4f539SKevin Wolf reqs[outidx].qiov = reqs[i].qiov; 469440b4f539SKevin Wolf } 469540b4f539SKevin Wolf } 469640b4f539SKevin Wolf 4697f4564d53SPeter Lieven block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1); 4698f4564d53SPeter Lieven 469940b4f539SKevin Wolf return outidx + 1; 470040b4f539SKevin Wolf } 470140b4f539SKevin Wolf 470240b4f539SKevin Wolf /* 470340b4f539SKevin Wolf * Submit multiple AIO write requests at once. 470440b4f539SKevin Wolf * 470540b4f539SKevin Wolf * On success, the function returns 0 and all requests in the reqs array have 470640b4f539SKevin Wolf * been submitted. In error case this function returns -1, and any of the 470740b4f539SKevin Wolf * requests may or may not be submitted yet. In particular, this means that the 470840b4f539SKevin Wolf * callback will be called for some of the requests, for others it won't. The 470940b4f539SKevin Wolf * caller must check the error field of the BlockRequest to wait for the right 471040b4f539SKevin Wolf * callbacks (if error != 0, no callback will be called). 471140b4f539SKevin Wolf * 471240b4f539SKevin Wolf * The implementation may modify the contents of the reqs array, e.g. to merge 471340b4f539SKevin Wolf * requests. However, the fields opaque and error are left unmodified as they 471440b4f539SKevin Wolf * are used to signal failure for a single request to the caller. 471540b4f539SKevin Wolf */ 471640b4f539SKevin Wolf int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) 471740b4f539SKevin Wolf { 471840b4f539SKevin Wolf MultiwriteCB *mcb; 471940b4f539SKevin Wolf int i; 472040b4f539SKevin Wolf 4721301db7c2SRyan Harper /* don't submit writes if we don't have a medium */ 4722301db7c2SRyan Harper if (bs->drv == NULL) { 4723301db7c2SRyan Harper for (i = 0; i < num_reqs; i++) { 4724301db7c2SRyan Harper reqs[i].error = -ENOMEDIUM; 4725301db7c2SRyan Harper } 4726301db7c2SRyan Harper return -1; 4727301db7c2SRyan Harper } 4728301db7c2SRyan Harper 472940b4f539SKevin Wolf if (num_reqs == 0) { 473040b4f539SKevin Wolf return 0; 473140b4f539SKevin Wolf } 473240b4f539SKevin Wolf 473340b4f539SKevin Wolf // Create MultiwriteCB structure 47347267c094SAnthony Liguori mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); 473540b4f539SKevin Wolf mcb->num_requests = 0; 473640b4f539SKevin Wolf mcb->num_callbacks = num_reqs; 473740b4f539SKevin Wolf 473840b4f539SKevin Wolf for (i = 0; i < num_reqs; i++) { 473940b4f539SKevin Wolf mcb->callbacks[i].cb = reqs[i].cb; 474040b4f539SKevin Wolf mcb->callbacks[i].opaque = reqs[i].opaque; 474140b4f539SKevin Wolf } 474240b4f539SKevin Wolf 474340b4f539SKevin Wolf // Check for mergable requests 474440b4f539SKevin Wolf num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); 474540b4f539SKevin Wolf 47466d519a5fSStefan Hajnoczi trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs); 47476d519a5fSStefan Hajnoczi 4748df9309fbSPaolo Bonzini /* Run the aio requests. */ 4749df9309fbSPaolo Bonzini mcb->num_requests = num_reqs; 475040b4f539SKevin Wolf for (i = 0; i < num_reqs; i++) { 4751d20d9b7cSPaolo Bonzini bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov, 4752d20d9b7cSPaolo Bonzini reqs[i].nb_sectors, reqs[i].flags, 4753d20d9b7cSPaolo Bonzini multiwrite_cb, mcb, 4754d20d9b7cSPaolo Bonzini true); 475540b4f539SKevin Wolf } 475640b4f539SKevin Wolf 475740b4f539SKevin Wolf return 0; 475840b4f539SKevin Wolf } 475940b4f539SKevin Wolf 47607c84b1b8SMarkus Armbruster void bdrv_aio_cancel(BlockAIOCB *acb) 476183f64091Sbellard { 476202c50efeSFam Zheng qemu_aio_ref(acb); 476302c50efeSFam Zheng bdrv_aio_cancel_async(acb); 476402c50efeSFam Zheng while (acb->refcnt > 1) { 476502c50efeSFam Zheng if (acb->aiocb_info->get_aio_context) { 476602c50efeSFam Zheng aio_poll(acb->aiocb_info->get_aio_context(acb), true); 476702c50efeSFam Zheng } else if (acb->bs) { 476802c50efeSFam Zheng aio_poll(bdrv_get_aio_context(acb->bs), true); 476902c50efeSFam Zheng } else { 477002c50efeSFam Zheng abort(); 477102c50efeSFam Zheng } 477202c50efeSFam Zheng } 47738007429aSFam Zheng qemu_aio_unref(acb); 477402c50efeSFam Zheng } 477502c50efeSFam Zheng 477602c50efeSFam Zheng /* Async version of aio cancel. The caller is not blocked if the acb implements 477702c50efeSFam Zheng * cancel_async, otherwise we do nothing and let the request normally complete. 477802c50efeSFam Zheng * In either case the completion callback must be called. */ 47797c84b1b8SMarkus Armbruster void bdrv_aio_cancel_async(BlockAIOCB *acb) 478002c50efeSFam Zheng { 478102c50efeSFam Zheng if (acb->aiocb_info->cancel_async) { 478202c50efeSFam Zheng acb->aiocb_info->cancel_async(acb); 478302c50efeSFam Zheng } 478483f64091Sbellard } 478583f64091Sbellard 478683f64091Sbellard /**************************************************************/ 478783f64091Sbellard /* async block device emulation */ 478883f64091Sbellard 47897c84b1b8SMarkus Armbruster typedef struct BlockAIOCBSync { 47907c84b1b8SMarkus Armbruster BlockAIOCB common; 4791c16b5a2cSChristoph Hellwig QEMUBH *bh; 4792c16b5a2cSChristoph Hellwig int ret; 4793c16b5a2cSChristoph Hellwig /* vector translation state */ 4794c16b5a2cSChristoph Hellwig QEMUIOVector *qiov; 4795c16b5a2cSChristoph Hellwig uint8_t *bounce; 4796c16b5a2cSChristoph Hellwig int is_write; 47977c84b1b8SMarkus Armbruster } BlockAIOCBSync; 4798c16b5a2cSChristoph Hellwig 4799d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_aiocb_info = { 48007c84b1b8SMarkus Armbruster .aiocb_size = sizeof(BlockAIOCBSync), 4801c16b5a2cSChristoph Hellwig }; 4802c16b5a2cSChristoph Hellwig 480383f64091Sbellard static void bdrv_aio_bh_cb(void *opaque) 4804beac80cdSbellard { 48057c84b1b8SMarkus Armbruster BlockAIOCBSync *acb = opaque; 4806f141eafeSaliguori 4807857d4f46SKevin Wolf if (!acb->is_write && acb->ret >= 0) { 480803396148SMichael Tokarev qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); 4809857d4f46SKevin Wolf } 4810ceb42de8Saliguori qemu_vfree(acb->bounce); 4811ce1a14dcSpbrook acb->common.cb(acb->common.opaque, acb->ret); 48126a7ad299SDor Laor qemu_bh_delete(acb->bh); 481336afc451SAvi Kivity acb->bh = NULL; 48148007429aSFam Zheng qemu_aio_unref(acb); 4815beac80cdSbellard } 4816beac80cdSbellard 48177c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, 4818f141eafeSaliguori int64_t sector_num, 4819f141eafeSaliguori QEMUIOVector *qiov, 4820f141eafeSaliguori int nb_sectors, 4821097310b5SMarkus Armbruster BlockCompletionFunc *cb, 4822f141eafeSaliguori void *opaque, 4823f141eafeSaliguori int is_write) 4824f141eafeSaliguori 4825ea2384d3Sbellard { 48267c84b1b8SMarkus Armbruster BlockAIOCBSync *acb; 482783f64091Sbellard 4828d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque); 4829f141eafeSaliguori acb->is_write = is_write; 4830f141eafeSaliguori acb->qiov = qiov; 4831857d4f46SKevin Wolf acb->bounce = qemu_try_blockalign(bs, qiov->size); 48322572b37aSStefan Hajnoczi acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb); 4833f141eafeSaliguori 4834857d4f46SKevin Wolf if (acb->bounce == NULL) { 4835857d4f46SKevin Wolf acb->ret = -ENOMEM; 4836857d4f46SKevin Wolf } else if (is_write) { 4837d5e6b161SMichael Tokarev qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); 48381ed20acfSStefan Hajnoczi acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors); 4839f141eafeSaliguori } else { 48401ed20acfSStefan Hajnoczi acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors); 4841f141eafeSaliguori } 4842f141eafeSaliguori 4843ce1a14dcSpbrook qemu_bh_schedule(acb->bh); 4844f141eafeSaliguori 4845ce1a14dcSpbrook return &acb->common; 48467a6cba61Spbrook } 48477a6cba61Spbrook 48487c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 4849f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 4850097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 485183f64091Sbellard { 4852f141eafeSaliguori return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 485383f64091Sbellard } 485483f64091Sbellard 48557c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 4856f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 4857097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4858f141eafeSaliguori { 4859f141eafeSaliguori return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 4860f141eafeSaliguori } 4861f141eafeSaliguori 486268485420SKevin Wolf 48637c84b1b8SMarkus Armbruster typedef struct BlockAIOCBCoroutine { 48647c84b1b8SMarkus Armbruster BlockAIOCB common; 486568485420SKevin Wolf BlockRequest req; 486668485420SKevin Wolf bool is_write; 48670b5a2445SPaolo Bonzini bool need_bh; 4868d318aea9SKevin Wolf bool *done; 486968485420SKevin Wolf QEMUBH* bh; 48707c84b1b8SMarkus Armbruster } BlockAIOCBCoroutine; 487168485420SKevin Wolf 4872d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_co_aiocb_info = { 48737c84b1b8SMarkus Armbruster .aiocb_size = sizeof(BlockAIOCBCoroutine), 487468485420SKevin Wolf }; 487568485420SKevin Wolf 48760b5a2445SPaolo Bonzini static void bdrv_co_complete(BlockAIOCBCoroutine *acb) 48770b5a2445SPaolo Bonzini { 48780b5a2445SPaolo Bonzini if (!acb->need_bh) { 48790b5a2445SPaolo Bonzini acb->common.cb(acb->common.opaque, acb->req.error); 48800b5a2445SPaolo Bonzini qemu_aio_unref(acb); 48810b5a2445SPaolo Bonzini } 48820b5a2445SPaolo Bonzini } 48830b5a2445SPaolo Bonzini 488435246a68SPaolo Bonzini static void bdrv_co_em_bh(void *opaque) 488568485420SKevin Wolf { 48867c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 488768485420SKevin Wolf 48880b5a2445SPaolo Bonzini assert(!acb->need_bh); 488968485420SKevin Wolf qemu_bh_delete(acb->bh); 48900b5a2445SPaolo Bonzini bdrv_co_complete(acb); 48910b5a2445SPaolo Bonzini } 48920b5a2445SPaolo Bonzini 48930b5a2445SPaolo Bonzini static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb) 48940b5a2445SPaolo Bonzini { 48950b5a2445SPaolo Bonzini acb->need_bh = false; 48960b5a2445SPaolo Bonzini if (acb->req.error != -EINPROGRESS) { 48970b5a2445SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 48980b5a2445SPaolo Bonzini 48990b5a2445SPaolo Bonzini acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); 49000b5a2445SPaolo Bonzini qemu_bh_schedule(acb->bh); 49010b5a2445SPaolo Bonzini } 490268485420SKevin Wolf } 490368485420SKevin Wolf 4904b2a61371SStefan Hajnoczi /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */ 4905b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque) 4906b2a61371SStefan Hajnoczi { 49077c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 4908b2a61371SStefan Hajnoczi BlockDriverState *bs = acb->common.bs; 4909b2a61371SStefan Hajnoczi 4910b2a61371SStefan Hajnoczi if (!acb->is_write) { 4911b2a61371SStefan Hajnoczi acb->req.error = bdrv_co_do_readv(bs, acb->req.sector, 4912d20d9b7cSPaolo Bonzini acb->req.nb_sectors, acb->req.qiov, acb->req.flags); 4913b2a61371SStefan Hajnoczi } else { 4914b2a61371SStefan Hajnoczi acb->req.error = bdrv_co_do_writev(bs, acb->req.sector, 4915d20d9b7cSPaolo Bonzini acb->req.nb_sectors, acb->req.qiov, acb->req.flags); 4916b2a61371SStefan Hajnoczi } 4917b2a61371SStefan Hajnoczi 49180b5a2445SPaolo Bonzini bdrv_co_complete(acb); 4919b2a61371SStefan Hajnoczi } 4920b2a61371SStefan Hajnoczi 49217c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 492268485420SKevin Wolf int64_t sector_num, 492368485420SKevin Wolf QEMUIOVector *qiov, 492468485420SKevin Wolf int nb_sectors, 4925d20d9b7cSPaolo Bonzini BdrvRequestFlags flags, 4926097310b5SMarkus Armbruster BlockCompletionFunc *cb, 492768485420SKevin Wolf void *opaque, 49288c5873d6SStefan Hajnoczi bool is_write) 492968485420SKevin Wolf { 493068485420SKevin Wolf Coroutine *co; 49317c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 493268485420SKevin Wolf 4933d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 49340b5a2445SPaolo Bonzini acb->need_bh = true; 49350b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 493668485420SKevin Wolf acb->req.sector = sector_num; 493768485420SKevin Wolf acb->req.nb_sectors = nb_sectors; 493868485420SKevin Wolf acb->req.qiov = qiov; 4939d20d9b7cSPaolo Bonzini acb->req.flags = flags; 494068485420SKevin Wolf acb->is_write = is_write; 494168485420SKevin Wolf 49428c5873d6SStefan Hajnoczi co = qemu_coroutine_create(bdrv_co_do_rw); 494368485420SKevin Wolf qemu_coroutine_enter(co, acb); 494468485420SKevin Wolf 49450b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 494668485420SKevin Wolf return &acb->common; 494768485420SKevin Wolf } 494868485420SKevin Wolf 494907f07615SPaolo Bonzini static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) 4950b2e12bc6SChristoph Hellwig { 49517c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 495207f07615SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 4953b2e12bc6SChristoph Hellwig 495407f07615SPaolo Bonzini acb->req.error = bdrv_co_flush(bs); 49550b5a2445SPaolo Bonzini bdrv_co_complete(acb); 4956b2e12bc6SChristoph Hellwig } 4957b2e12bc6SChristoph Hellwig 49587c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, 4959097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4960016f5cf6SAlexander Graf { 496107f07615SPaolo Bonzini trace_bdrv_aio_flush(bs, opaque); 4962016f5cf6SAlexander Graf 496307f07615SPaolo Bonzini Coroutine *co; 49647c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 4965016f5cf6SAlexander Graf 4966d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 49670b5a2445SPaolo Bonzini acb->need_bh = true; 49680b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 4969d318aea9SKevin Wolf 497007f07615SPaolo Bonzini co = qemu_coroutine_create(bdrv_aio_flush_co_entry); 497107f07615SPaolo Bonzini qemu_coroutine_enter(co, acb); 4972016f5cf6SAlexander Graf 49730b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 4974016f5cf6SAlexander Graf return &acb->common; 4975016f5cf6SAlexander Graf } 4976016f5cf6SAlexander Graf 49774265d620SPaolo Bonzini static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) 49784265d620SPaolo Bonzini { 49797c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 49804265d620SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 49814265d620SPaolo Bonzini 49824265d620SPaolo Bonzini acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors); 49830b5a2445SPaolo Bonzini bdrv_co_complete(acb); 49844265d620SPaolo Bonzini } 49854265d620SPaolo Bonzini 49867c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs, 49874265d620SPaolo Bonzini int64_t sector_num, int nb_sectors, 4988097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 49894265d620SPaolo Bonzini { 49904265d620SPaolo Bonzini Coroutine *co; 49917c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 49924265d620SPaolo Bonzini 49934265d620SPaolo Bonzini trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); 49944265d620SPaolo Bonzini 4995d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 49960b5a2445SPaolo Bonzini acb->need_bh = true; 49970b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 49984265d620SPaolo Bonzini acb->req.sector = sector_num; 49994265d620SPaolo Bonzini acb->req.nb_sectors = nb_sectors; 50004265d620SPaolo Bonzini co = qemu_coroutine_create(bdrv_aio_discard_co_entry); 50014265d620SPaolo Bonzini qemu_coroutine_enter(co, acb); 50024265d620SPaolo Bonzini 50030b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 50044265d620SPaolo Bonzini return &acb->common; 50054265d620SPaolo Bonzini } 50064265d620SPaolo Bonzini 5007ea2384d3Sbellard void bdrv_init(void) 5008ea2384d3Sbellard { 50095efa9d5aSAnthony Liguori module_call_init(MODULE_INIT_BLOCK); 5010ea2384d3Sbellard } 5011ce1a14dcSpbrook 5012eb852011SMarkus Armbruster void bdrv_init_with_whitelist(void) 5013eb852011SMarkus Armbruster { 5014eb852011SMarkus Armbruster use_bdrv_whitelist = 1; 5015eb852011SMarkus Armbruster bdrv_init(); 5016eb852011SMarkus Armbruster } 5017eb852011SMarkus Armbruster 5018d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 5019097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 50206bbff9a0Saliguori { 50217c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5022ce1a14dcSpbrook 5023d7331bedSStefan Hajnoczi acb = g_slice_alloc(aiocb_info->aiocb_size); 5024d7331bedSStefan Hajnoczi acb->aiocb_info = aiocb_info; 5025ce1a14dcSpbrook acb->bs = bs; 5026ce1a14dcSpbrook acb->cb = cb; 5027ce1a14dcSpbrook acb->opaque = opaque; 5028f197fe2bSFam Zheng acb->refcnt = 1; 5029ce1a14dcSpbrook return acb; 5030ce1a14dcSpbrook } 5031ce1a14dcSpbrook 5032f197fe2bSFam Zheng void qemu_aio_ref(void *p) 5033f197fe2bSFam Zheng { 50347c84b1b8SMarkus Armbruster BlockAIOCB *acb = p; 5035f197fe2bSFam Zheng acb->refcnt++; 5036f197fe2bSFam Zheng } 5037f197fe2bSFam Zheng 50388007429aSFam Zheng void qemu_aio_unref(void *p) 5039ce1a14dcSpbrook { 50407c84b1b8SMarkus Armbruster BlockAIOCB *acb = p; 5041f197fe2bSFam Zheng assert(acb->refcnt > 0); 5042f197fe2bSFam Zheng if (--acb->refcnt == 0) { 5043d7331bedSStefan Hajnoczi g_slice_free1(acb->aiocb_info->aiocb_size, acb); 5044ce1a14dcSpbrook } 5045f197fe2bSFam Zheng } 504619cb3738Sbellard 504719cb3738Sbellard /**************************************************************/ 5048f9f05dc5SKevin Wolf /* Coroutine block device emulation */ 5049f9f05dc5SKevin Wolf 5050f9f05dc5SKevin Wolf typedef struct CoroutineIOCompletion { 5051f9f05dc5SKevin Wolf Coroutine *coroutine; 5052f9f05dc5SKevin Wolf int ret; 5053f9f05dc5SKevin Wolf } CoroutineIOCompletion; 5054f9f05dc5SKevin Wolf 5055f9f05dc5SKevin Wolf static void bdrv_co_io_em_complete(void *opaque, int ret) 5056f9f05dc5SKevin Wolf { 5057f9f05dc5SKevin Wolf CoroutineIOCompletion *co = opaque; 5058f9f05dc5SKevin Wolf 5059f9f05dc5SKevin Wolf co->ret = ret; 5060f9f05dc5SKevin Wolf qemu_coroutine_enter(co->coroutine, NULL); 5061f9f05dc5SKevin Wolf } 5062f9f05dc5SKevin Wolf 5063f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, 5064f9f05dc5SKevin Wolf int nb_sectors, QEMUIOVector *iov, 5065f9f05dc5SKevin Wolf bool is_write) 5066f9f05dc5SKevin Wolf { 5067f9f05dc5SKevin Wolf CoroutineIOCompletion co = { 5068f9f05dc5SKevin Wolf .coroutine = qemu_coroutine_self(), 5069f9f05dc5SKevin Wolf }; 50707c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5071f9f05dc5SKevin Wolf 5072f9f05dc5SKevin Wolf if (is_write) { 5073a652d160SStefan Hajnoczi acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors, 5074f9f05dc5SKevin Wolf bdrv_co_io_em_complete, &co); 5075f9f05dc5SKevin Wolf } else { 5076a652d160SStefan Hajnoczi acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors, 5077f9f05dc5SKevin Wolf bdrv_co_io_em_complete, &co); 5078f9f05dc5SKevin Wolf } 5079f9f05dc5SKevin Wolf 508059370aaaSStefan Hajnoczi trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb); 5081f9f05dc5SKevin Wolf if (!acb) { 5082f9f05dc5SKevin Wolf return -EIO; 5083f9f05dc5SKevin Wolf } 5084f9f05dc5SKevin Wolf qemu_coroutine_yield(); 5085f9f05dc5SKevin Wolf 5086f9f05dc5SKevin Wolf return co.ret; 5087f9f05dc5SKevin Wolf } 5088f9f05dc5SKevin Wolf 5089f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 5090f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 5091f9f05dc5SKevin Wolf QEMUIOVector *iov) 5092f9f05dc5SKevin Wolf { 5093f9f05dc5SKevin Wolf return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); 5094f9f05dc5SKevin Wolf } 5095f9f05dc5SKevin Wolf 5096f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 5097f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 5098f9f05dc5SKevin Wolf QEMUIOVector *iov) 5099f9f05dc5SKevin Wolf { 5100f9f05dc5SKevin Wolf return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); 5101f9f05dc5SKevin Wolf } 5102f9f05dc5SKevin Wolf 510307f07615SPaolo Bonzini static void coroutine_fn bdrv_flush_co_entry(void *opaque) 5104e7a8a783SKevin Wolf { 510507f07615SPaolo Bonzini RwCo *rwco = opaque; 510607f07615SPaolo Bonzini 510707f07615SPaolo Bonzini rwco->ret = bdrv_co_flush(rwco->bs); 510807f07615SPaolo Bonzini } 510907f07615SPaolo Bonzini 511007f07615SPaolo Bonzini int coroutine_fn bdrv_co_flush(BlockDriverState *bs) 511107f07615SPaolo Bonzini { 5112eb489bb1SKevin Wolf int ret; 5113eb489bb1SKevin Wolf 511429cdb251SPaolo Bonzini if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { 511507f07615SPaolo Bonzini return 0; 5116eb489bb1SKevin Wolf } 5117eb489bb1SKevin Wolf 5118ca716364SKevin Wolf /* Write back cached data to the OS even with cache=unsafe */ 5119bf736fe3SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS); 5120eb489bb1SKevin Wolf if (bs->drv->bdrv_co_flush_to_os) { 5121eb489bb1SKevin Wolf ret = bs->drv->bdrv_co_flush_to_os(bs); 5122eb489bb1SKevin Wolf if (ret < 0) { 5123eb489bb1SKevin Wolf return ret; 5124eb489bb1SKevin Wolf } 5125eb489bb1SKevin Wolf } 5126eb489bb1SKevin Wolf 5127ca716364SKevin Wolf /* But don't actually force it to the disk with cache=unsafe */ 5128ca716364SKevin Wolf if (bs->open_flags & BDRV_O_NO_FLUSH) { 5129d4c82329SKevin Wolf goto flush_parent; 5130ca716364SKevin Wolf } 5131ca716364SKevin Wolf 5132bf736fe3SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK); 5133eb489bb1SKevin Wolf if (bs->drv->bdrv_co_flush_to_disk) { 513429cdb251SPaolo Bonzini ret = bs->drv->bdrv_co_flush_to_disk(bs); 513507f07615SPaolo Bonzini } else if (bs->drv->bdrv_aio_flush) { 51367c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5137e7a8a783SKevin Wolf CoroutineIOCompletion co = { 5138e7a8a783SKevin Wolf .coroutine = qemu_coroutine_self(), 5139e7a8a783SKevin Wolf }; 5140e7a8a783SKevin Wolf 514107f07615SPaolo Bonzini acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); 514207f07615SPaolo Bonzini if (acb == NULL) { 514329cdb251SPaolo Bonzini ret = -EIO; 514407f07615SPaolo Bonzini } else { 5145e7a8a783SKevin Wolf qemu_coroutine_yield(); 514629cdb251SPaolo Bonzini ret = co.ret; 5147e7a8a783SKevin Wolf } 514807f07615SPaolo Bonzini } else { 514907f07615SPaolo Bonzini /* 515007f07615SPaolo Bonzini * Some block drivers always operate in either writethrough or unsafe 515107f07615SPaolo Bonzini * mode and don't support bdrv_flush therefore. Usually qemu doesn't 515207f07615SPaolo Bonzini * know how the server works (because the behaviour is hardcoded or 515307f07615SPaolo Bonzini * depends on server-side configuration), so we can't ensure that 515407f07615SPaolo Bonzini * everything is safe on disk. Returning an error doesn't work because 515507f07615SPaolo Bonzini * that would break guests even if the server operates in writethrough 515607f07615SPaolo Bonzini * mode. 515707f07615SPaolo Bonzini * 515807f07615SPaolo Bonzini * Let's hope the user knows what he's doing. 515907f07615SPaolo Bonzini */ 516029cdb251SPaolo Bonzini ret = 0; 516107f07615SPaolo Bonzini } 516229cdb251SPaolo Bonzini if (ret < 0) { 516329cdb251SPaolo Bonzini return ret; 516429cdb251SPaolo Bonzini } 516529cdb251SPaolo Bonzini 516629cdb251SPaolo Bonzini /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH 516729cdb251SPaolo Bonzini * in the case of cache=unsafe, so there are no useless flushes. 516829cdb251SPaolo Bonzini */ 5169d4c82329SKevin Wolf flush_parent: 517029cdb251SPaolo Bonzini return bdrv_co_flush(bs->file); 517107f07615SPaolo Bonzini } 517207f07615SPaolo Bonzini 51735a8a30dbSKevin Wolf void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) 51740f15423cSAnthony Liguori { 51755a8a30dbSKevin Wolf Error *local_err = NULL; 51765a8a30dbSKevin Wolf int ret; 51775a8a30dbSKevin Wolf 51783456a8d1SKevin Wolf if (!bs->drv) { 51793456a8d1SKevin Wolf return; 51800f15423cSAnthony Liguori } 51813456a8d1SKevin Wolf 51827ea2d269SAlexey Kardashevskiy if (!(bs->open_flags & BDRV_O_INCOMING)) { 51837ea2d269SAlexey Kardashevskiy return; 51847ea2d269SAlexey Kardashevskiy } 51857ea2d269SAlexey Kardashevskiy bs->open_flags &= ~BDRV_O_INCOMING; 51867ea2d269SAlexey Kardashevskiy 51873456a8d1SKevin Wolf if (bs->drv->bdrv_invalidate_cache) { 51885a8a30dbSKevin Wolf bs->drv->bdrv_invalidate_cache(bs, &local_err); 51893456a8d1SKevin Wolf } else if (bs->file) { 51905a8a30dbSKevin Wolf bdrv_invalidate_cache(bs->file, &local_err); 51915a8a30dbSKevin Wolf } 51925a8a30dbSKevin Wolf if (local_err) { 51935a8a30dbSKevin Wolf error_propagate(errp, local_err); 51945a8a30dbSKevin Wolf return; 51953456a8d1SKevin Wolf } 51963456a8d1SKevin Wolf 51975a8a30dbSKevin Wolf ret = refresh_total_sectors(bs, bs->total_sectors); 51985a8a30dbSKevin Wolf if (ret < 0) { 51995a8a30dbSKevin Wolf error_setg_errno(errp, -ret, "Could not refresh total sector count"); 52005a8a30dbSKevin Wolf return; 52015a8a30dbSKevin Wolf } 52020f15423cSAnthony Liguori } 52030f15423cSAnthony Liguori 52045a8a30dbSKevin Wolf void bdrv_invalidate_cache_all(Error **errp) 52050f15423cSAnthony Liguori { 52060f15423cSAnthony Liguori BlockDriverState *bs; 52075a8a30dbSKevin Wolf Error *local_err = NULL; 52080f15423cSAnthony Liguori 5209dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 5210ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 5211ed78cda3SStefan Hajnoczi 5212ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 52135a8a30dbSKevin Wolf bdrv_invalidate_cache(bs, &local_err); 5214ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 52155a8a30dbSKevin Wolf if (local_err) { 52165a8a30dbSKevin Wolf error_propagate(errp, local_err); 52175a8a30dbSKevin Wolf return; 52185a8a30dbSKevin Wolf } 52190f15423cSAnthony Liguori } 52200f15423cSAnthony Liguori } 52210f15423cSAnthony Liguori 522207f07615SPaolo Bonzini int bdrv_flush(BlockDriverState *bs) 522307f07615SPaolo Bonzini { 522407f07615SPaolo Bonzini Coroutine *co; 522507f07615SPaolo Bonzini RwCo rwco = { 522607f07615SPaolo Bonzini .bs = bs, 522707f07615SPaolo Bonzini .ret = NOT_DONE, 522807f07615SPaolo Bonzini }; 522907f07615SPaolo Bonzini 523007f07615SPaolo Bonzini if (qemu_in_coroutine()) { 523107f07615SPaolo Bonzini /* Fast-path if already in coroutine context */ 523207f07615SPaolo Bonzini bdrv_flush_co_entry(&rwco); 523307f07615SPaolo Bonzini } else { 52342572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 52352572b37aSStefan Hajnoczi 523607f07615SPaolo Bonzini co = qemu_coroutine_create(bdrv_flush_co_entry); 523707f07615SPaolo Bonzini qemu_coroutine_enter(co, &rwco); 523807f07615SPaolo Bonzini while (rwco.ret == NOT_DONE) { 52392572b37aSStefan Hajnoczi aio_poll(aio_context, true); 524007f07615SPaolo Bonzini } 524107f07615SPaolo Bonzini } 524207f07615SPaolo Bonzini 524307f07615SPaolo Bonzini return rwco.ret; 524407f07615SPaolo Bonzini } 5245e7a8a783SKevin Wolf 5246775aa8b6SKevin Wolf typedef struct DiscardCo { 5247775aa8b6SKevin Wolf BlockDriverState *bs; 5248775aa8b6SKevin Wolf int64_t sector_num; 5249775aa8b6SKevin Wolf int nb_sectors; 5250775aa8b6SKevin Wolf int ret; 5251775aa8b6SKevin Wolf } DiscardCo; 52524265d620SPaolo Bonzini static void coroutine_fn bdrv_discard_co_entry(void *opaque) 52534265d620SPaolo Bonzini { 5254775aa8b6SKevin Wolf DiscardCo *rwco = opaque; 52554265d620SPaolo Bonzini 52564265d620SPaolo Bonzini rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); 52574265d620SPaolo Bonzini } 52584265d620SPaolo Bonzini 52594265d620SPaolo Bonzini int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, 52604265d620SPaolo Bonzini int nb_sectors) 52614265d620SPaolo Bonzini { 5262b9c64947SMax Reitz int max_discard, ret; 5263d51e9fe5SPaolo Bonzini 52644265d620SPaolo Bonzini if (!bs->drv) { 52654265d620SPaolo Bonzini return -ENOMEDIUM; 5266b9c64947SMax Reitz } 5267b9c64947SMax Reitz 5268b9c64947SMax Reitz ret = bdrv_check_request(bs, sector_num, nb_sectors); 5269b9c64947SMax Reitz if (ret < 0) { 5270b9c64947SMax Reitz return ret; 52714265d620SPaolo Bonzini } else if (bs->read_only) { 52724265d620SPaolo Bonzini return -EROFS; 5273df702c9bSPaolo Bonzini } 5274df702c9bSPaolo Bonzini 52758f0720ecSPaolo Bonzini bdrv_reset_dirty(bs, sector_num, nb_sectors); 5276df702c9bSPaolo Bonzini 52779e8f1835SPaolo Bonzini /* Do nothing if disabled. */ 52789e8f1835SPaolo Bonzini if (!(bs->open_flags & BDRV_O_UNMAP)) { 52799e8f1835SPaolo Bonzini return 0; 52809e8f1835SPaolo Bonzini } 52819e8f1835SPaolo Bonzini 5282d51e9fe5SPaolo Bonzini if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) { 5283d51e9fe5SPaolo Bonzini return 0; 5284d51e9fe5SPaolo Bonzini } 52856f14da52SPeter Lieven 528675af1f34SPeter Lieven max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS); 52876f14da52SPeter Lieven while (nb_sectors > 0) { 52886f14da52SPeter Lieven int ret; 52896f14da52SPeter Lieven int num = nb_sectors; 52906f14da52SPeter Lieven 52916f14da52SPeter Lieven /* align request */ 52926f14da52SPeter Lieven if (bs->bl.discard_alignment && 52936f14da52SPeter Lieven num >= bs->bl.discard_alignment && 52946f14da52SPeter Lieven sector_num % bs->bl.discard_alignment) { 52956f14da52SPeter Lieven if (num > bs->bl.discard_alignment) { 52966f14da52SPeter Lieven num = bs->bl.discard_alignment; 52976f14da52SPeter Lieven } 52986f14da52SPeter Lieven num -= sector_num % bs->bl.discard_alignment; 52996f14da52SPeter Lieven } 53006f14da52SPeter Lieven 53016f14da52SPeter Lieven /* limit request size */ 53026f14da52SPeter Lieven if (num > max_discard) { 53036f14da52SPeter Lieven num = max_discard; 53046f14da52SPeter Lieven } 53056f14da52SPeter Lieven 5306d51e9fe5SPaolo Bonzini if (bs->drv->bdrv_co_discard) { 53076f14da52SPeter Lieven ret = bs->drv->bdrv_co_discard(bs, sector_num, num); 5308d51e9fe5SPaolo Bonzini } else { 53097c84b1b8SMarkus Armbruster BlockAIOCB *acb; 53104265d620SPaolo Bonzini CoroutineIOCompletion co = { 53114265d620SPaolo Bonzini .coroutine = qemu_coroutine_self(), 53124265d620SPaolo Bonzini }; 53134265d620SPaolo Bonzini 53144265d620SPaolo Bonzini acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors, 53154265d620SPaolo Bonzini bdrv_co_io_em_complete, &co); 53164265d620SPaolo Bonzini if (acb == NULL) { 53174265d620SPaolo Bonzini return -EIO; 53184265d620SPaolo Bonzini } else { 53194265d620SPaolo Bonzini qemu_coroutine_yield(); 5320d51e9fe5SPaolo Bonzini ret = co.ret; 53214265d620SPaolo Bonzini } 5322d51e9fe5SPaolo Bonzini } 53237ce21016SPaolo Bonzini if (ret && ret != -ENOTSUP) { 5324d51e9fe5SPaolo Bonzini return ret; 5325d51e9fe5SPaolo Bonzini } 5326d51e9fe5SPaolo Bonzini 5327d51e9fe5SPaolo Bonzini sector_num += num; 5328d51e9fe5SPaolo Bonzini nb_sectors -= num; 5329d51e9fe5SPaolo Bonzini } 53304265d620SPaolo Bonzini return 0; 53314265d620SPaolo Bonzini } 53324265d620SPaolo Bonzini 53334265d620SPaolo Bonzini int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) 53344265d620SPaolo Bonzini { 53354265d620SPaolo Bonzini Coroutine *co; 5336775aa8b6SKevin Wolf DiscardCo rwco = { 53374265d620SPaolo Bonzini .bs = bs, 53384265d620SPaolo Bonzini .sector_num = sector_num, 53394265d620SPaolo Bonzini .nb_sectors = nb_sectors, 53404265d620SPaolo Bonzini .ret = NOT_DONE, 53414265d620SPaolo Bonzini }; 53424265d620SPaolo Bonzini 53434265d620SPaolo Bonzini if (qemu_in_coroutine()) { 53444265d620SPaolo Bonzini /* Fast-path if already in coroutine context */ 53454265d620SPaolo Bonzini bdrv_discard_co_entry(&rwco); 53464265d620SPaolo Bonzini } else { 53472572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 53482572b37aSStefan Hajnoczi 53494265d620SPaolo Bonzini co = qemu_coroutine_create(bdrv_discard_co_entry); 53504265d620SPaolo Bonzini qemu_coroutine_enter(co, &rwco); 53514265d620SPaolo Bonzini while (rwco.ret == NOT_DONE) { 53522572b37aSStefan Hajnoczi aio_poll(aio_context, true); 53534265d620SPaolo Bonzini } 53544265d620SPaolo Bonzini } 53554265d620SPaolo Bonzini 53564265d620SPaolo Bonzini return rwco.ret; 53574265d620SPaolo Bonzini } 53584265d620SPaolo Bonzini 5359f9f05dc5SKevin Wolf /**************************************************************/ 536019cb3738Sbellard /* removable device support */ 536119cb3738Sbellard 536219cb3738Sbellard /** 536319cb3738Sbellard * Return TRUE if the media is present 536419cb3738Sbellard */ 536519cb3738Sbellard int bdrv_is_inserted(BlockDriverState *bs) 536619cb3738Sbellard { 536719cb3738Sbellard BlockDriver *drv = bs->drv; 5368a1aff5bfSMarkus Armbruster 536919cb3738Sbellard if (!drv) 537019cb3738Sbellard return 0; 537119cb3738Sbellard if (!drv->bdrv_is_inserted) 5372a1aff5bfSMarkus Armbruster return 1; 5373a1aff5bfSMarkus Armbruster return drv->bdrv_is_inserted(bs); 537419cb3738Sbellard } 537519cb3738Sbellard 537619cb3738Sbellard /** 53778e49ca46SMarkus Armbruster * Return whether the media changed since the last call to this 53788e49ca46SMarkus Armbruster * function, or -ENOTSUP if we don't know. Most drivers don't know. 537919cb3738Sbellard */ 538019cb3738Sbellard int bdrv_media_changed(BlockDriverState *bs) 538119cb3738Sbellard { 538219cb3738Sbellard BlockDriver *drv = bs->drv; 538319cb3738Sbellard 53848e49ca46SMarkus Armbruster if (drv && drv->bdrv_media_changed) { 53858e49ca46SMarkus Armbruster return drv->bdrv_media_changed(bs); 53868e49ca46SMarkus Armbruster } 53878e49ca46SMarkus Armbruster return -ENOTSUP; 538819cb3738Sbellard } 538919cb3738Sbellard 539019cb3738Sbellard /** 539119cb3738Sbellard * If eject_flag is TRUE, eject the media. Otherwise, close the tray 539219cb3738Sbellard */ 5393f36f3949SLuiz Capitulino void bdrv_eject(BlockDriverState *bs, bool eject_flag) 539419cb3738Sbellard { 539519cb3738Sbellard BlockDriver *drv = bs->drv; 5396bfb197e0SMarkus Armbruster const char *device_name; 539719cb3738Sbellard 5398822e1cd1SMarkus Armbruster if (drv && drv->bdrv_eject) { 5399822e1cd1SMarkus Armbruster drv->bdrv_eject(bs, eject_flag); 540019cb3738Sbellard } 54016f382ed2SLuiz Capitulino 5402bfb197e0SMarkus Armbruster device_name = bdrv_get_device_name(bs); 5403bfb197e0SMarkus Armbruster if (device_name[0] != '\0') { 5404bfb197e0SMarkus Armbruster qapi_event_send_device_tray_moved(device_name, 5405a5ee7bd4SWenchao Xia eject_flag, &error_abort); 54066f382ed2SLuiz Capitulino } 540719cb3738Sbellard } 540819cb3738Sbellard 540919cb3738Sbellard /** 541019cb3738Sbellard * Lock or unlock the media (if it is locked, the user won't be able 541119cb3738Sbellard * to eject it manually). 541219cb3738Sbellard */ 5413025e849aSMarkus Armbruster void bdrv_lock_medium(BlockDriverState *bs, bool locked) 541419cb3738Sbellard { 541519cb3738Sbellard BlockDriver *drv = bs->drv; 541619cb3738Sbellard 5417025e849aSMarkus Armbruster trace_bdrv_lock_medium(bs, locked); 5418b8c6d095SStefan Hajnoczi 5419025e849aSMarkus Armbruster if (drv && drv->bdrv_lock_medium) { 5420025e849aSMarkus Armbruster drv->bdrv_lock_medium(bs, locked); 542119cb3738Sbellard } 542219cb3738Sbellard } 5423985a03b0Sths 5424985a03b0Sths /* needed for generic scsi interface */ 5425985a03b0Sths 5426985a03b0Sths int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) 5427985a03b0Sths { 5428985a03b0Sths BlockDriver *drv = bs->drv; 5429985a03b0Sths 5430985a03b0Sths if (drv && drv->bdrv_ioctl) 5431985a03b0Sths return drv->bdrv_ioctl(bs, req, buf); 5432985a03b0Sths return -ENOTSUP; 5433985a03b0Sths } 54347d780669Saliguori 54357c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, 5436221f715dSaliguori unsigned long int req, void *buf, 5437097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 54387d780669Saliguori { 5439221f715dSaliguori BlockDriver *drv = bs->drv; 54407d780669Saliguori 5441221f715dSaliguori if (drv && drv->bdrv_aio_ioctl) 5442221f715dSaliguori return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); 5443221f715dSaliguori return NULL; 54447d780669Saliguori } 5445e268ca52Saliguori 54461b7fd729SPaolo Bonzini void bdrv_set_guest_block_size(BlockDriverState *bs, int align) 54477b6f9300SMarkus Armbruster { 54481b7fd729SPaolo Bonzini bs->guest_block_size = align; 54497b6f9300SMarkus Armbruster } 54507cd1e32aSlirans@il.ibm.com 5451e268ca52Saliguori void *qemu_blockalign(BlockDriverState *bs, size_t size) 5452e268ca52Saliguori { 5453339064d5SKevin Wolf return qemu_memalign(bdrv_opt_mem_align(bs), size); 5454e268ca52Saliguori } 54557cd1e32aSlirans@il.ibm.com 54569ebd8448SMax Reitz void *qemu_blockalign0(BlockDriverState *bs, size_t size) 54579ebd8448SMax Reitz { 54589ebd8448SMax Reitz return memset(qemu_blockalign(bs, size), 0, size); 54599ebd8448SMax Reitz } 54609ebd8448SMax Reitz 54617d2a35ccSKevin Wolf void *qemu_try_blockalign(BlockDriverState *bs, size_t size) 54627d2a35ccSKevin Wolf { 54637d2a35ccSKevin Wolf size_t align = bdrv_opt_mem_align(bs); 54647d2a35ccSKevin Wolf 54657d2a35ccSKevin Wolf /* Ensure that NULL is never returned on success */ 54667d2a35ccSKevin Wolf assert(align > 0); 54677d2a35ccSKevin Wolf if (size == 0) { 54687d2a35ccSKevin Wolf size = align; 54697d2a35ccSKevin Wolf } 54707d2a35ccSKevin Wolf 54717d2a35ccSKevin Wolf return qemu_try_memalign(align, size); 54727d2a35ccSKevin Wolf } 54737d2a35ccSKevin Wolf 54749ebd8448SMax Reitz void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) 54759ebd8448SMax Reitz { 54769ebd8448SMax Reitz void *mem = qemu_try_blockalign(bs, size); 54779ebd8448SMax Reitz 54789ebd8448SMax Reitz if (mem) { 54799ebd8448SMax Reitz memset(mem, 0, size); 54809ebd8448SMax Reitz } 54819ebd8448SMax Reitz 54829ebd8448SMax Reitz return mem; 54839ebd8448SMax Reitz } 54849ebd8448SMax Reitz 5485c53b1c51SStefan Hajnoczi /* 5486c53b1c51SStefan Hajnoczi * Check if all memory in this vector is sector aligned. 5487c53b1c51SStefan Hajnoczi */ 5488c53b1c51SStefan Hajnoczi bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) 5489c53b1c51SStefan Hajnoczi { 5490c53b1c51SStefan Hajnoczi int i; 5491339064d5SKevin Wolf size_t alignment = bdrv_opt_mem_align(bs); 5492c53b1c51SStefan Hajnoczi 5493c53b1c51SStefan Hajnoczi for (i = 0; i < qiov->niov; i++) { 5494339064d5SKevin Wolf if ((uintptr_t) qiov->iov[i].iov_base % alignment) { 5495c53b1c51SStefan Hajnoczi return false; 5496c53b1c51SStefan Hajnoczi } 5497339064d5SKevin Wolf if (qiov->iov[i].iov_len % alignment) { 54981ff735bdSKevin Wolf return false; 54991ff735bdSKevin Wolf } 5500c53b1c51SStefan Hajnoczi } 5501c53b1c51SStefan Hajnoczi 5502c53b1c51SStefan Hajnoczi return true; 5503c53b1c51SStefan Hajnoczi } 5504c53b1c51SStefan Hajnoczi 55050db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) 55060db6e54aSFam Zheng { 55070db6e54aSFam Zheng BdrvDirtyBitmap *bm; 55080db6e54aSFam Zheng 55090db6e54aSFam Zheng assert(name); 55100db6e54aSFam Zheng QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 55110db6e54aSFam Zheng if (bm->name && !strcmp(name, bm->name)) { 55120db6e54aSFam Zheng return bm; 55130db6e54aSFam Zheng } 55140db6e54aSFam Zheng } 55150db6e54aSFam Zheng return NULL; 55160db6e54aSFam Zheng } 55170db6e54aSFam Zheng 55180db6e54aSFam Zheng void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 55190db6e54aSFam Zheng { 55200db6e54aSFam Zheng g_free(bitmap->name); 55210db6e54aSFam Zheng bitmap->name = NULL; 55220db6e54aSFam Zheng } 55230db6e54aSFam Zheng 55240db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, 5525*5fba6c0eSJohn Snow uint32_t granularity, 55260db6e54aSFam Zheng const char *name, 5527b8afb520SFam Zheng Error **errp) 55287cd1e32aSlirans@il.ibm.com { 55297cd1e32aSlirans@il.ibm.com int64_t bitmap_size; 5530e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 5531*5fba6c0eSJohn Snow uint32_t sector_granularity; 5532a55eb92cSJan Kiszka 553350717e94SPaolo Bonzini assert((granularity & (granularity - 1)) == 0); 553450717e94SPaolo Bonzini 55350db6e54aSFam Zheng if (name && bdrv_find_dirty_bitmap(bs, name)) { 55360db6e54aSFam Zheng error_setg(errp, "Bitmap already exists: %s", name); 55370db6e54aSFam Zheng return NULL; 55380db6e54aSFam Zheng } 5539*5fba6c0eSJohn Snow sector_granularity = granularity >> BDRV_SECTOR_BITS; 5540*5fba6c0eSJohn Snow assert(sector_granularity); 554157322b78SMarkus Armbruster bitmap_size = bdrv_nb_sectors(bs); 5542b8afb520SFam Zheng if (bitmap_size < 0) { 5543b8afb520SFam Zheng error_setg_errno(errp, -bitmap_size, "could not get length of device"); 5544b8afb520SFam Zheng errno = -bitmap_size; 5545b8afb520SFam Zheng return NULL; 5546b8afb520SFam Zheng } 55475839e53bSMarkus Armbruster bitmap = g_new0(BdrvDirtyBitmap, 1); 5548*5fba6c0eSJohn Snow bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); 55490db6e54aSFam Zheng bitmap->name = g_strdup(name); 5550e4654d2dSFam Zheng QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); 5551e4654d2dSFam Zheng return bitmap; 5552e4654d2dSFam Zheng } 5553e4654d2dSFam Zheng 5554e4654d2dSFam Zheng void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 5555e4654d2dSFam Zheng { 5556e4654d2dSFam Zheng BdrvDirtyBitmap *bm, *next; 5557e4654d2dSFam Zheng QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { 5558e4654d2dSFam Zheng if (bm == bitmap) { 5559e4654d2dSFam Zheng QLIST_REMOVE(bitmap, list); 5560e4654d2dSFam Zheng hbitmap_free(bitmap->bitmap); 55610db6e54aSFam Zheng g_free(bitmap->name); 5562e4654d2dSFam Zheng g_free(bitmap); 5563e4654d2dSFam Zheng return; 55647cd1e32aSlirans@il.ibm.com } 55657cd1e32aSlirans@il.ibm.com } 55667cd1e32aSlirans@il.ibm.com } 55677cd1e32aSlirans@il.ibm.com 556821b56835SFam Zheng BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) 556921b56835SFam Zheng { 557021b56835SFam Zheng BdrvDirtyBitmap *bm; 557121b56835SFam Zheng BlockDirtyInfoList *list = NULL; 557221b56835SFam Zheng BlockDirtyInfoList **plist = &list; 557321b56835SFam Zheng 557421b56835SFam Zheng QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 55755839e53bSMarkus Armbruster BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); 55765839e53bSMarkus Armbruster BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); 557721b56835SFam Zheng info->count = bdrv_get_dirty_count(bs, bm); 557821b56835SFam Zheng info->granularity = 5579*5fba6c0eSJohn Snow ((uint32_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap)); 55800db6e54aSFam Zheng info->has_name = !!bm->name; 55810db6e54aSFam Zheng info->name = g_strdup(bm->name); 558221b56835SFam Zheng entry->value = info; 558321b56835SFam Zheng *plist = entry; 558421b56835SFam Zheng plist = &entry->next; 558521b56835SFam Zheng } 558621b56835SFam Zheng 558721b56835SFam Zheng return list; 558821b56835SFam Zheng } 558921b56835SFam Zheng 5590e4654d2dSFam Zheng int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) 55917cd1e32aSlirans@il.ibm.com { 5592e4654d2dSFam Zheng if (bitmap) { 5593e4654d2dSFam Zheng return hbitmap_get(bitmap->bitmap, sector); 55947cd1e32aSlirans@il.ibm.com } else { 55957cd1e32aSlirans@il.ibm.com return 0; 55967cd1e32aSlirans@il.ibm.com } 55977cd1e32aSlirans@il.ibm.com } 55987cd1e32aSlirans@il.ibm.com 5599e4654d2dSFam Zheng void bdrv_dirty_iter_init(BlockDriverState *bs, 5600e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) 56011755da16SPaolo Bonzini { 5602e4654d2dSFam Zheng hbitmap_iter_init(hbi, bitmap->bitmap, 0); 56031755da16SPaolo Bonzini } 56041755da16SPaolo Bonzini 5605c4237dfaSVladimir Sementsov-Ogievskiy void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, 5606c4237dfaSVladimir Sementsov-Ogievskiy int64_t cur_sector, int nr_sectors) 5607c4237dfaSVladimir Sementsov-Ogievskiy { 5608c4237dfaSVladimir Sementsov-Ogievskiy hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 5609c4237dfaSVladimir Sementsov-Ogievskiy } 5610c4237dfaSVladimir Sementsov-Ogievskiy 5611c4237dfaSVladimir Sementsov-Ogievskiy void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, 5612c4237dfaSVladimir Sementsov-Ogievskiy int64_t cur_sector, int nr_sectors) 5613c4237dfaSVladimir Sementsov-Ogievskiy { 5614c4237dfaSVladimir Sementsov-Ogievskiy hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 5615c4237dfaSVladimir Sementsov-Ogievskiy } 5616c4237dfaSVladimir Sementsov-Ogievskiy 5617c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 56181755da16SPaolo Bonzini int nr_sectors) 56191755da16SPaolo Bonzini { 5620e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 5621e4654d2dSFam Zheng QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 5622e4654d2dSFam Zheng hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 5623e4654d2dSFam Zheng } 56241755da16SPaolo Bonzini } 56251755da16SPaolo Bonzini 5626c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 5627c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors) 56287cd1e32aSlirans@il.ibm.com { 5629e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 5630e4654d2dSFam Zheng QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 5631e4654d2dSFam Zheng hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 5632e4654d2dSFam Zheng } 56337cd1e32aSlirans@il.ibm.com } 5634aaa0eb75SLiran Schour 5635e4654d2dSFam Zheng int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 5636aaa0eb75SLiran Schour { 5637e4654d2dSFam Zheng return hbitmap_count(bitmap->bitmap); 5638aaa0eb75SLiran Schour } 5639f88e1a42SJes Sorensen 56409fcb0251SFam Zheng /* Get a reference to bs */ 56419fcb0251SFam Zheng void bdrv_ref(BlockDriverState *bs) 56429fcb0251SFam Zheng { 56439fcb0251SFam Zheng bs->refcnt++; 56449fcb0251SFam Zheng } 56459fcb0251SFam Zheng 56469fcb0251SFam Zheng /* Release a previously grabbed reference to bs. 56479fcb0251SFam Zheng * If after releasing, reference count is zero, the BlockDriverState is 56489fcb0251SFam Zheng * deleted. */ 56499fcb0251SFam Zheng void bdrv_unref(BlockDriverState *bs) 56509fcb0251SFam Zheng { 56519a4d5ca6SJeff Cody if (!bs) { 56529a4d5ca6SJeff Cody return; 56539a4d5ca6SJeff Cody } 56549fcb0251SFam Zheng assert(bs->refcnt > 0); 56559fcb0251SFam Zheng if (--bs->refcnt == 0) { 56569fcb0251SFam Zheng bdrv_delete(bs); 56579fcb0251SFam Zheng } 56589fcb0251SFam Zheng } 56599fcb0251SFam Zheng 5660fbe40ff7SFam Zheng struct BdrvOpBlocker { 5661fbe40ff7SFam Zheng Error *reason; 5662fbe40ff7SFam Zheng QLIST_ENTRY(BdrvOpBlocker) list; 5663fbe40ff7SFam Zheng }; 5664fbe40ff7SFam Zheng 5665fbe40ff7SFam Zheng bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 5666fbe40ff7SFam Zheng { 5667fbe40ff7SFam Zheng BdrvOpBlocker *blocker; 5668fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5669fbe40ff7SFam Zheng if (!QLIST_EMPTY(&bs->op_blockers[op])) { 5670fbe40ff7SFam Zheng blocker = QLIST_FIRST(&bs->op_blockers[op]); 5671fbe40ff7SFam Zheng if (errp) { 567281e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is busy: %s", 567381e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs), 5674bfb197e0SMarkus Armbruster error_get_pretty(blocker->reason)); 5675fbe40ff7SFam Zheng } 5676fbe40ff7SFam Zheng return true; 5677fbe40ff7SFam Zheng } 5678fbe40ff7SFam Zheng return false; 5679fbe40ff7SFam Zheng } 5680fbe40ff7SFam Zheng 5681fbe40ff7SFam Zheng void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 5682fbe40ff7SFam Zheng { 5683fbe40ff7SFam Zheng BdrvOpBlocker *blocker; 5684fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5685fbe40ff7SFam Zheng 56865839e53bSMarkus Armbruster blocker = g_new0(BdrvOpBlocker, 1); 5687fbe40ff7SFam Zheng blocker->reason = reason; 5688fbe40ff7SFam Zheng QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 5689fbe40ff7SFam Zheng } 5690fbe40ff7SFam Zheng 5691fbe40ff7SFam Zheng void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 5692fbe40ff7SFam Zheng { 5693fbe40ff7SFam Zheng BdrvOpBlocker *blocker, *next; 5694fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5695fbe40ff7SFam Zheng QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 5696fbe40ff7SFam Zheng if (blocker->reason == reason) { 5697fbe40ff7SFam Zheng QLIST_REMOVE(blocker, list); 5698fbe40ff7SFam Zheng g_free(blocker); 5699fbe40ff7SFam Zheng } 5700fbe40ff7SFam Zheng } 5701fbe40ff7SFam Zheng } 5702fbe40ff7SFam Zheng 5703fbe40ff7SFam Zheng void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 5704fbe40ff7SFam Zheng { 5705fbe40ff7SFam Zheng int i; 5706fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5707fbe40ff7SFam Zheng bdrv_op_block(bs, i, reason); 5708fbe40ff7SFam Zheng } 5709fbe40ff7SFam Zheng } 5710fbe40ff7SFam Zheng 5711fbe40ff7SFam Zheng void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 5712fbe40ff7SFam Zheng { 5713fbe40ff7SFam Zheng int i; 5714fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5715fbe40ff7SFam Zheng bdrv_op_unblock(bs, i, reason); 5716fbe40ff7SFam Zheng } 5717fbe40ff7SFam Zheng } 5718fbe40ff7SFam Zheng 5719fbe40ff7SFam Zheng bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 5720fbe40ff7SFam Zheng { 5721fbe40ff7SFam Zheng int i; 5722fbe40ff7SFam Zheng 5723fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5724fbe40ff7SFam Zheng if (!QLIST_EMPTY(&bs->op_blockers[i])) { 5725fbe40ff7SFam Zheng return false; 5726fbe40ff7SFam Zheng } 5727fbe40ff7SFam Zheng } 5728fbe40ff7SFam Zheng return true; 5729fbe40ff7SFam Zheng } 5730fbe40ff7SFam Zheng 573128a7282aSLuiz Capitulino void bdrv_iostatus_enable(BlockDriverState *bs) 573228a7282aSLuiz Capitulino { 5733d6bf279eSLuiz Capitulino bs->iostatus_enabled = true; 573458e21ef5SLuiz Capitulino bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 573528a7282aSLuiz Capitulino } 573628a7282aSLuiz Capitulino 573728a7282aSLuiz Capitulino /* The I/O status is only enabled if the drive explicitly 573828a7282aSLuiz Capitulino * enables it _and_ the VM is configured to stop on errors */ 573928a7282aSLuiz Capitulino bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) 574028a7282aSLuiz Capitulino { 5741d6bf279eSLuiz Capitulino return (bs->iostatus_enabled && 574292aa5c6dSPaolo Bonzini (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || 574392aa5c6dSPaolo Bonzini bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || 574492aa5c6dSPaolo Bonzini bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); 574528a7282aSLuiz Capitulino } 574628a7282aSLuiz Capitulino 574728a7282aSLuiz Capitulino void bdrv_iostatus_disable(BlockDriverState *bs) 574828a7282aSLuiz Capitulino { 5749d6bf279eSLuiz Capitulino bs->iostatus_enabled = false; 575028a7282aSLuiz Capitulino } 575128a7282aSLuiz Capitulino 575228a7282aSLuiz Capitulino void bdrv_iostatus_reset(BlockDriverState *bs) 575328a7282aSLuiz Capitulino { 575428a7282aSLuiz Capitulino if (bdrv_iostatus_is_enabled(bs)) { 575558e21ef5SLuiz Capitulino bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 57563bd293c3SPaolo Bonzini if (bs->job) { 57573bd293c3SPaolo Bonzini block_job_iostatus_reset(bs->job); 57583bd293c3SPaolo Bonzini } 575928a7282aSLuiz Capitulino } 576028a7282aSLuiz Capitulino } 576128a7282aSLuiz Capitulino 576228a7282aSLuiz Capitulino void bdrv_iostatus_set_err(BlockDriverState *bs, int error) 576328a7282aSLuiz Capitulino { 57643e1caa5fSPaolo Bonzini assert(bdrv_iostatus_is_enabled(bs)); 57653e1caa5fSPaolo Bonzini if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 576658e21ef5SLuiz Capitulino bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 576758e21ef5SLuiz Capitulino BLOCK_DEVICE_IO_STATUS_FAILED; 576828a7282aSLuiz Capitulino } 576928a7282aSLuiz Capitulino } 577028a7282aSLuiz Capitulino 5771d92ada22SLuiz Capitulino void bdrv_img_create(const char *filename, const char *fmt, 5772f88e1a42SJes Sorensen const char *base_filename, const char *base_fmt, 5773f382d43aSMiroslav Rezanina char *options, uint64_t img_size, int flags, 5774f382d43aSMiroslav Rezanina Error **errp, bool quiet) 5775f88e1a42SJes Sorensen { 577683d0521aSChunyan Liu QemuOptsList *create_opts = NULL; 577783d0521aSChunyan Liu QemuOpts *opts = NULL; 577883d0521aSChunyan Liu const char *backing_fmt, *backing_file; 577983d0521aSChunyan Liu int64_t size; 5780f88e1a42SJes Sorensen BlockDriver *drv, *proto_drv; 578196df67d1SStefan Hajnoczi BlockDriver *backing_drv = NULL; 5782cc84d90fSMax Reitz Error *local_err = NULL; 5783f88e1a42SJes Sorensen int ret = 0; 5784f88e1a42SJes Sorensen 5785f88e1a42SJes Sorensen /* Find driver and parse its options */ 5786f88e1a42SJes Sorensen drv = bdrv_find_format(fmt); 5787f88e1a42SJes Sorensen if (!drv) { 578871c79813SLuiz Capitulino error_setg(errp, "Unknown file format '%s'", fmt); 5789d92ada22SLuiz Capitulino return; 5790f88e1a42SJes Sorensen } 5791f88e1a42SJes Sorensen 5792b65a5e12SMax Reitz proto_drv = bdrv_find_protocol(filename, true, errp); 5793f88e1a42SJes Sorensen if (!proto_drv) { 5794d92ada22SLuiz Capitulino return; 5795f88e1a42SJes Sorensen } 5796f88e1a42SJes Sorensen 5797c6149724SMax Reitz if (!drv->create_opts) { 5798c6149724SMax Reitz error_setg(errp, "Format driver '%s' does not support image creation", 5799c6149724SMax Reitz drv->format_name); 5800c6149724SMax Reitz return; 5801c6149724SMax Reitz } 5802c6149724SMax Reitz 5803c6149724SMax Reitz if (!proto_drv->create_opts) { 5804c6149724SMax Reitz error_setg(errp, "Protocol driver '%s' does not support image creation", 5805c6149724SMax Reitz proto_drv->format_name); 5806c6149724SMax Reitz return; 5807c6149724SMax Reitz } 5808c6149724SMax Reitz 5809c282e1fdSChunyan Liu create_opts = qemu_opts_append(create_opts, drv->create_opts); 5810c282e1fdSChunyan Liu create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 5811f88e1a42SJes Sorensen 5812f88e1a42SJes Sorensen /* Create parameter list with default values */ 581383d0521aSChunyan Liu opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 581439101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 5815f88e1a42SJes Sorensen 5816f88e1a42SJes Sorensen /* Parse -o options */ 5817f88e1a42SJes Sorensen if (options) { 5818dc523cd3SMarkus Armbruster qemu_opts_do_parse(opts, options, NULL, &local_err); 5819dc523cd3SMarkus Armbruster if (local_err) { 5820dc523cd3SMarkus Armbruster error_report_err(local_err); 5821dc523cd3SMarkus Armbruster local_err = NULL; 582283d0521aSChunyan Liu error_setg(errp, "Invalid options for file format '%s'", fmt); 5823f88e1a42SJes Sorensen goto out; 5824f88e1a42SJes Sorensen } 5825f88e1a42SJes Sorensen } 5826f88e1a42SJes Sorensen 5827f88e1a42SJes Sorensen if (base_filename) { 5828f43e47dbSMarkus Armbruster qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err); 58296be4194bSMarkus Armbruster if (local_err) { 583071c79813SLuiz Capitulino error_setg(errp, "Backing file not supported for file format '%s'", 583171c79813SLuiz Capitulino fmt); 5832f88e1a42SJes Sorensen goto out; 5833f88e1a42SJes Sorensen } 5834f88e1a42SJes Sorensen } 5835f88e1a42SJes Sorensen 5836f88e1a42SJes Sorensen if (base_fmt) { 5837f43e47dbSMarkus Armbruster qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err); 58386be4194bSMarkus Armbruster if (local_err) { 583971c79813SLuiz Capitulino error_setg(errp, "Backing file format not supported for file " 584071c79813SLuiz Capitulino "format '%s'", fmt); 5841f88e1a42SJes Sorensen goto out; 5842f88e1a42SJes Sorensen } 5843f88e1a42SJes Sorensen } 5844f88e1a42SJes Sorensen 584583d0521aSChunyan Liu backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 584683d0521aSChunyan Liu if (backing_file) { 584783d0521aSChunyan Liu if (!strcmp(filename, backing_file)) { 584871c79813SLuiz Capitulino error_setg(errp, "Error: Trying to create an image with the " 584971c79813SLuiz Capitulino "same filename as the backing file"); 5850792da93aSJes Sorensen goto out; 5851792da93aSJes Sorensen } 5852792da93aSJes Sorensen } 5853792da93aSJes Sorensen 585483d0521aSChunyan Liu backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 585583d0521aSChunyan Liu if (backing_fmt) { 585683d0521aSChunyan Liu backing_drv = bdrv_find_format(backing_fmt); 585796df67d1SStefan Hajnoczi if (!backing_drv) { 585871c79813SLuiz Capitulino error_setg(errp, "Unknown backing file format '%s'", 585983d0521aSChunyan Liu backing_fmt); 5860f88e1a42SJes Sorensen goto out; 5861f88e1a42SJes Sorensen } 5862f88e1a42SJes Sorensen } 5863f88e1a42SJes Sorensen 5864f88e1a42SJes Sorensen // The size for the image must always be specified, with one exception: 5865f88e1a42SJes Sorensen // If we are using a backing file, we can obtain the size from there 586683d0521aSChunyan Liu size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 586783d0521aSChunyan Liu if (size == -1) { 586883d0521aSChunyan Liu if (backing_file) { 586966f6b814SMax Reitz BlockDriverState *bs; 587029168018SMax Reitz char *full_backing = g_new0(char, PATH_MAX); 587152bf1e72SMarkus Armbruster int64_t size; 587263090dacSPaolo Bonzini int back_flags; 587363090dacSPaolo Bonzini 587429168018SMax Reitz bdrv_get_full_backing_filename_from_filename(filename, backing_file, 587529168018SMax Reitz full_backing, PATH_MAX, 587629168018SMax Reitz &local_err); 587729168018SMax Reitz if (local_err) { 587829168018SMax Reitz g_free(full_backing); 587929168018SMax Reitz goto out; 588029168018SMax Reitz } 588129168018SMax Reitz 588263090dacSPaolo Bonzini /* backing files always opened read-only */ 588363090dacSPaolo Bonzini back_flags = 588463090dacSPaolo Bonzini flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 5885f88e1a42SJes Sorensen 5886f67503e5SMax Reitz bs = NULL; 588729168018SMax Reitz ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags, 5888cc84d90fSMax Reitz backing_drv, &local_err); 588929168018SMax Reitz g_free(full_backing); 5890f88e1a42SJes Sorensen if (ret < 0) { 5891f88e1a42SJes Sorensen goto out; 5892f88e1a42SJes Sorensen } 589352bf1e72SMarkus Armbruster size = bdrv_getlength(bs); 589452bf1e72SMarkus Armbruster if (size < 0) { 589552bf1e72SMarkus Armbruster error_setg_errno(errp, -size, "Could not get size of '%s'", 589652bf1e72SMarkus Armbruster backing_file); 589752bf1e72SMarkus Armbruster bdrv_unref(bs); 589852bf1e72SMarkus Armbruster goto out; 589952bf1e72SMarkus Armbruster } 5900f88e1a42SJes Sorensen 590139101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 590266f6b814SMax Reitz 590366f6b814SMax Reitz bdrv_unref(bs); 5904f88e1a42SJes Sorensen } else { 590571c79813SLuiz Capitulino error_setg(errp, "Image creation needs a size parameter"); 5906f88e1a42SJes Sorensen goto out; 5907f88e1a42SJes Sorensen } 5908f88e1a42SJes Sorensen } 5909f88e1a42SJes Sorensen 5910f382d43aSMiroslav Rezanina if (!quiet) { 5911f88e1a42SJes Sorensen printf("Formatting '%s', fmt=%s", filename, fmt); 591243c5d8f8SFam Zheng qemu_opts_print(opts, " "); 5913f88e1a42SJes Sorensen puts(""); 5914f382d43aSMiroslav Rezanina } 591583d0521aSChunyan Liu 5916c282e1fdSChunyan Liu ret = bdrv_create(drv, filename, opts, &local_err); 591783d0521aSChunyan Liu 5918cc84d90fSMax Reitz if (ret == -EFBIG) { 5919cc84d90fSMax Reitz /* This is generally a better message than whatever the driver would 5920cc84d90fSMax Reitz * deliver (especially because of the cluster_size_hint), since that 5921cc84d90fSMax Reitz * is most probably not much different from "image too large". */ 5922f3f4d2c0SKevin Wolf const char *cluster_size_hint = ""; 592383d0521aSChunyan Liu if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 5924f3f4d2c0SKevin Wolf cluster_size_hint = " (try using a larger cluster size)"; 5925f3f4d2c0SKevin Wolf } 5926cc84d90fSMax Reitz error_setg(errp, "The image size is too large for file format '%s'" 5927cc84d90fSMax Reitz "%s", fmt, cluster_size_hint); 5928cc84d90fSMax Reitz error_free(local_err); 5929cc84d90fSMax Reitz local_err = NULL; 5930f88e1a42SJes Sorensen } 5931f88e1a42SJes Sorensen 5932f88e1a42SJes Sorensen out: 593383d0521aSChunyan Liu qemu_opts_del(opts); 593483d0521aSChunyan Liu qemu_opts_free(create_opts); 593584d18f06SMarkus Armbruster if (local_err) { 5936cc84d90fSMax Reitz error_propagate(errp, local_err); 5937cc84d90fSMax Reitz } 5938f88e1a42SJes Sorensen } 593985d126f3SStefan Hajnoczi 594085d126f3SStefan Hajnoczi AioContext *bdrv_get_aio_context(BlockDriverState *bs) 594185d126f3SStefan Hajnoczi { 5942dcd04228SStefan Hajnoczi return bs->aio_context; 5943dcd04228SStefan Hajnoczi } 5944dcd04228SStefan Hajnoczi 5945dcd04228SStefan Hajnoczi void bdrv_detach_aio_context(BlockDriverState *bs) 5946dcd04228SStefan Hajnoczi { 594733384421SMax Reitz BdrvAioNotifier *baf; 594833384421SMax Reitz 5949dcd04228SStefan Hajnoczi if (!bs->drv) { 5950dcd04228SStefan Hajnoczi return; 5951dcd04228SStefan Hajnoczi } 5952dcd04228SStefan Hajnoczi 595333384421SMax Reitz QLIST_FOREACH(baf, &bs->aio_notifiers, list) { 595433384421SMax Reitz baf->detach_aio_context(baf->opaque); 595533384421SMax Reitz } 595633384421SMax Reitz 595713af91ebSStefan Hajnoczi if (bs->io_limits_enabled) { 595813af91ebSStefan Hajnoczi throttle_detach_aio_context(&bs->throttle_state); 595913af91ebSStefan Hajnoczi } 5960dcd04228SStefan Hajnoczi if (bs->drv->bdrv_detach_aio_context) { 5961dcd04228SStefan Hajnoczi bs->drv->bdrv_detach_aio_context(bs); 5962dcd04228SStefan Hajnoczi } 5963dcd04228SStefan Hajnoczi if (bs->file) { 5964dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs->file); 5965dcd04228SStefan Hajnoczi } 5966dcd04228SStefan Hajnoczi if (bs->backing_hd) { 5967dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs->backing_hd); 5968dcd04228SStefan Hajnoczi } 5969dcd04228SStefan Hajnoczi 5970dcd04228SStefan Hajnoczi bs->aio_context = NULL; 5971dcd04228SStefan Hajnoczi } 5972dcd04228SStefan Hajnoczi 5973dcd04228SStefan Hajnoczi void bdrv_attach_aio_context(BlockDriverState *bs, 5974dcd04228SStefan Hajnoczi AioContext *new_context) 5975dcd04228SStefan Hajnoczi { 597633384421SMax Reitz BdrvAioNotifier *ban; 597733384421SMax Reitz 5978dcd04228SStefan Hajnoczi if (!bs->drv) { 5979dcd04228SStefan Hajnoczi return; 5980dcd04228SStefan Hajnoczi } 5981dcd04228SStefan Hajnoczi 5982dcd04228SStefan Hajnoczi bs->aio_context = new_context; 5983dcd04228SStefan Hajnoczi 5984dcd04228SStefan Hajnoczi if (bs->backing_hd) { 5985dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs->backing_hd, new_context); 5986dcd04228SStefan Hajnoczi } 5987dcd04228SStefan Hajnoczi if (bs->file) { 5988dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs->file, new_context); 5989dcd04228SStefan Hajnoczi } 5990dcd04228SStefan Hajnoczi if (bs->drv->bdrv_attach_aio_context) { 5991dcd04228SStefan Hajnoczi bs->drv->bdrv_attach_aio_context(bs, new_context); 5992dcd04228SStefan Hajnoczi } 599313af91ebSStefan Hajnoczi if (bs->io_limits_enabled) { 599413af91ebSStefan Hajnoczi throttle_attach_aio_context(&bs->throttle_state, new_context); 599513af91ebSStefan Hajnoczi } 599633384421SMax Reitz 599733384421SMax Reitz QLIST_FOREACH(ban, &bs->aio_notifiers, list) { 599833384421SMax Reitz ban->attached_aio_context(new_context, ban->opaque); 599933384421SMax Reitz } 6000dcd04228SStefan Hajnoczi } 6001dcd04228SStefan Hajnoczi 6002dcd04228SStefan Hajnoczi void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) 6003dcd04228SStefan Hajnoczi { 6004dcd04228SStefan Hajnoczi bdrv_drain_all(); /* ensure there are no in-flight requests */ 6005dcd04228SStefan Hajnoczi 6006dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs); 6007dcd04228SStefan Hajnoczi 6008dcd04228SStefan Hajnoczi /* This function executes in the old AioContext so acquire the new one in 6009dcd04228SStefan Hajnoczi * case it runs in a different thread. 6010dcd04228SStefan Hajnoczi */ 6011dcd04228SStefan Hajnoczi aio_context_acquire(new_context); 6012dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs, new_context); 6013dcd04228SStefan Hajnoczi aio_context_release(new_context); 601485d126f3SStefan Hajnoczi } 6015d616b224SStefan Hajnoczi 601633384421SMax Reitz void bdrv_add_aio_context_notifier(BlockDriverState *bs, 601733384421SMax Reitz void (*attached_aio_context)(AioContext *new_context, void *opaque), 601833384421SMax Reitz void (*detach_aio_context)(void *opaque), void *opaque) 601933384421SMax Reitz { 602033384421SMax Reitz BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 602133384421SMax Reitz *ban = (BdrvAioNotifier){ 602233384421SMax Reitz .attached_aio_context = attached_aio_context, 602333384421SMax Reitz .detach_aio_context = detach_aio_context, 602433384421SMax Reitz .opaque = opaque 602533384421SMax Reitz }; 602633384421SMax Reitz 602733384421SMax Reitz QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 602833384421SMax Reitz } 602933384421SMax Reitz 603033384421SMax Reitz void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 603133384421SMax Reitz void (*attached_aio_context)(AioContext *, 603233384421SMax Reitz void *), 603333384421SMax Reitz void (*detach_aio_context)(void *), 603433384421SMax Reitz void *opaque) 603533384421SMax Reitz { 603633384421SMax Reitz BdrvAioNotifier *ban, *ban_next; 603733384421SMax Reitz 603833384421SMax Reitz QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 603933384421SMax Reitz if (ban->attached_aio_context == attached_aio_context && 604033384421SMax Reitz ban->detach_aio_context == detach_aio_context && 604133384421SMax Reitz ban->opaque == opaque) 604233384421SMax Reitz { 604333384421SMax Reitz QLIST_REMOVE(ban, list); 604433384421SMax Reitz g_free(ban); 604533384421SMax Reitz 604633384421SMax Reitz return; 604733384421SMax Reitz } 604833384421SMax Reitz } 604933384421SMax Reitz 605033384421SMax Reitz abort(); 605133384421SMax Reitz } 605233384421SMax Reitz 6053d616b224SStefan Hajnoczi void bdrv_add_before_write_notifier(BlockDriverState *bs, 6054d616b224SStefan Hajnoczi NotifierWithReturn *notifier) 6055d616b224SStefan Hajnoczi { 6056d616b224SStefan Hajnoczi notifier_with_return_list_add(&bs->before_write_notifiers, notifier); 6057d616b224SStefan Hajnoczi } 60586f176b48SMax Reitz 605977485434SMax Reitz int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 606077485434SMax Reitz BlockDriverAmendStatusCB *status_cb) 60616f176b48SMax Reitz { 6062c282e1fdSChunyan Liu if (!bs->drv->bdrv_amend_options) { 60636f176b48SMax Reitz return -ENOTSUP; 60646f176b48SMax Reitz } 606577485434SMax Reitz return bs->drv->bdrv_amend_options(bs, opts, status_cb); 60666f176b48SMax Reitz } 6067f6186f49SBenoît Canet 6068b5042a36SBenoît Canet /* This function will be called by the bdrv_recurse_is_first_non_filter method 6069b5042a36SBenoît Canet * of block filter and by bdrv_is_first_non_filter. 6070b5042a36SBenoît Canet * It is used to test if the given bs is the candidate or recurse more in the 6071b5042a36SBenoît Canet * node graph. 6072212a5a8fSBenoît Canet */ 6073212a5a8fSBenoît Canet bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, 6074212a5a8fSBenoît Canet BlockDriverState *candidate) 6075f6186f49SBenoît Canet { 6076b5042a36SBenoît Canet /* return false if basic checks fails */ 6077b5042a36SBenoît Canet if (!bs || !bs->drv) { 6078b5042a36SBenoît Canet return false; 6079b5042a36SBenoît Canet } 6080b5042a36SBenoît Canet 6081b5042a36SBenoît Canet /* the code reached a non block filter driver -> check if the bs is 6082b5042a36SBenoît Canet * the same as the candidate. It's the recursion termination condition. 6083b5042a36SBenoît Canet */ 6084b5042a36SBenoît Canet if (!bs->drv->is_filter) { 6085b5042a36SBenoît Canet return bs == candidate; 6086b5042a36SBenoît Canet } 6087b5042a36SBenoît Canet /* Down this path the driver is a block filter driver */ 6088b5042a36SBenoît Canet 6089b5042a36SBenoît Canet /* If the block filter recursion method is defined use it to recurse down 6090b5042a36SBenoît Canet * the node graph. 6091b5042a36SBenoît Canet */ 6092b5042a36SBenoît Canet if (bs->drv->bdrv_recurse_is_first_non_filter) { 6093212a5a8fSBenoît Canet return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); 6094212a5a8fSBenoît Canet } 6095212a5a8fSBenoît Canet 6096b5042a36SBenoît Canet /* the driver is a block filter but don't allow to recurse -> return false 6097b5042a36SBenoît Canet */ 6098b5042a36SBenoît Canet return false; 6099212a5a8fSBenoît Canet } 6100212a5a8fSBenoît Canet 6101212a5a8fSBenoît Canet /* This function checks if the candidate is the first non filter bs down it's 6102212a5a8fSBenoît Canet * bs chain. Since we don't have pointers to parents it explore all bs chains 6103212a5a8fSBenoît Canet * from the top. Some filters can choose not to pass down the recursion. 6104212a5a8fSBenoît Canet */ 6105212a5a8fSBenoît Canet bool bdrv_is_first_non_filter(BlockDriverState *candidate) 6106212a5a8fSBenoît Canet { 6107212a5a8fSBenoît Canet BlockDriverState *bs; 6108212a5a8fSBenoît Canet 6109212a5a8fSBenoît Canet /* walk down the bs forest recursively */ 6110212a5a8fSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 6111212a5a8fSBenoît Canet bool perm; 6112212a5a8fSBenoît Canet 6113b5042a36SBenoît Canet /* try to recurse in this top level bs */ 6114e6dc8a1fSKevin Wolf perm = bdrv_recurse_is_first_non_filter(bs, candidate); 6115212a5a8fSBenoît Canet 6116212a5a8fSBenoît Canet /* candidate is the first non filter */ 6117212a5a8fSBenoît Canet if (perm) { 6118212a5a8fSBenoît Canet return true; 6119212a5a8fSBenoît Canet } 6120212a5a8fSBenoît Canet } 6121212a5a8fSBenoît Canet 6122212a5a8fSBenoît Canet return false; 6123f6186f49SBenoît Canet } 612409158f00SBenoît Canet 612509158f00SBenoît Canet BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) 612609158f00SBenoît Canet { 612709158f00SBenoît Canet BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 61285a7e7a0bSStefan Hajnoczi AioContext *aio_context; 61295a7e7a0bSStefan Hajnoczi 613009158f00SBenoît Canet if (!to_replace_bs) { 613109158f00SBenoît Canet error_setg(errp, "Node name '%s' not found", node_name); 613209158f00SBenoît Canet return NULL; 613309158f00SBenoît Canet } 613409158f00SBenoît Canet 61355a7e7a0bSStefan Hajnoczi aio_context = bdrv_get_aio_context(to_replace_bs); 61365a7e7a0bSStefan Hajnoczi aio_context_acquire(aio_context); 61375a7e7a0bSStefan Hajnoczi 613809158f00SBenoît Canet if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 61395a7e7a0bSStefan Hajnoczi to_replace_bs = NULL; 61405a7e7a0bSStefan Hajnoczi goto out; 614109158f00SBenoît Canet } 614209158f00SBenoît Canet 614309158f00SBenoît Canet /* We don't want arbitrary node of the BDS chain to be replaced only the top 614409158f00SBenoît Canet * most non filter in order to prevent data corruption. 614509158f00SBenoît Canet * Another benefit is that this tests exclude backing files which are 614609158f00SBenoît Canet * blocked by the backing blockers. 614709158f00SBenoît Canet */ 614809158f00SBenoît Canet if (!bdrv_is_first_non_filter(to_replace_bs)) { 614909158f00SBenoît Canet error_setg(errp, "Only top most non filter can be replaced"); 61505a7e7a0bSStefan Hajnoczi to_replace_bs = NULL; 61515a7e7a0bSStefan Hajnoczi goto out; 615209158f00SBenoît Canet } 615309158f00SBenoît Canet 61545a7e7a0bSStefan Hajnoczi out: 61555a7e7a0bSStefan Hajnoczi aio_context_release(aio_context); 615609158f00SBenoît Canet return to_replace_bs; 615709158f00SBenoît Canet } 6158448ad91dSMing Lei 6159448ad91dSMing Lei void bdrv_io_plug(BlockDriverState *bs) 6160448ad91dSMing Lei { 6161448ad91dSMing Lei BlockDriver *drv = bs->drv; 6162448ad91dSMing Lei if (drv && drv->bdrv_io_plug) { 6163448ad91dSMing Lei drv->bdrv_io_plug(bs); 6164448ad91dSMing Lei } else if (bs->file) { 6165448ad91dSMing Lei bdrv_io_plug(bs->file); 6166448ad91dSMing Lei } 6167448ad91dSMing Lei } 6168448ad91dSMing Lei 6169448ad91dSMing Lei void bdrv_io_unplug(BlockDriverState *bs) 6170448ad91dSMing Lei { 6171448ad91dSMing Lei BlockDriver *drv = bs->drv; 6172448ad91dSMing Lei if (drv && drv->bdrv_io_unplug) { 6173448ad91dSMing Lei drv->bdrv_io_unplug(bs); 6174448ad91dSMing Lei } else if (bs->file) { 6175448ad91dSMing Lei bdrv_io_unplug(bs->file); 6176448ad91dSMing Lei } 6177448ad91dSMing Lei } 6178448ad91dSMing Lei 6179448ad91dSMing Lei void bdrv_flush_io_queue(BlockDriverState *bs) 6180448ad91dSMing Lei { 6181448ad91dSMing Lei BlockDriver *drv = bs->drv; 6182448ad91dSMing Lei if (drv && drv->bdrv_flush_io_queue) { 6183448ad91dSMing Lei drv->bdrv_flush_io_queue(bs); 6184448ad91dSMing Lei } else if (bs->file) { 6185448ad91dSMing Lei bdrv_flush_io_queue(bs->file); 6186448ad91dSMing Lei } 6187448ad91dSMing Lei } 618891af7014SMax Reitz 618991af7014SMax Reitz static bool append_open_options(QDict *d, BlockDriverState *bs) 619091af7014SMax Reitz { 619191af7014SMax Reitz const QDictEntry *entry; 619291af7014SMax Reitz bool found_any = false; 619391af7014SMax Reitz 619491af7014SMax Reitz for (entry = qdict_first(bs->options); entry; 619591af7014SMax Reitz entry = qdict_next(bs->options, entry)) 619691af7014SMax Reitz { 619791af7014SMax Reitz /* Only take options for this level and exclude all non-driver-specific 619891af7014SMax Reitz * options */ 619991af7014SMax Reitz if (!strchr(qdict_entry_key(entry), '.') && 620091af7014SMax Reitz strcmp(qdict_entry_key(entry), "node-name")) 620191af7014SMax Reitz { 620291af7014SMax Reitz qobject_incref(qdict_entry_value(entry)); 620391af7014SMax Reitz qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry)); 620491af7014SMax Reitz found_any = true; 620591af7014SMax Reitz } 620691af7014SMax Reitz } 620791af7014SMax Reitz 620891af7014SMax Reitz return found_any; 620991af7014SMax Reitz } 621091af7014SMax Reitz 621191af7014SMax Reitz /* Updates the following BDS fields: 621291af7014SMax Reitz * - exact_filename: A filename which may be used for opening a block device 621391af7014SMax Reitz * which (mostly) equals the given BDS (even without any 621491af7014SMax Reitz * other options; so reading and writing must return the same 621591af7014SMax Reitz * results, but caching etc. may be different) 621691af7014SMax Reitz * - full_open_options: Options which, when given when opening a block device 621791af7014SMax Reitz * (without a filename), result in a BDS (mostly) 621891af7014SMax Reitz * equalling the given one 621991af7014SMax Reitz * - filename: If exact_filename is set, it is copied here. Otherwise, 622091af7014SMax Reitz * full_open_options is converted to a JSON object, prefixed with 622191af7014SMax Reitz * "json:" (for use through the JSON pseudo protocol) and put here. 622291af7014SMax Reitz */ 622391af7014SMax Reitz void bdrv_refresh_filename(BlockDriverState *bs) 622491af7014SMax Reitz { 622591af7014SMax Reitz BlockDriver *drv = bs->drv; 622691af7014SMax Reitz QDict *opts; 622791af7014SMax Reitz 622891af7014SMax Reitz if (!drv) { 622991af7014SMax Reitz return; 623091af7014SMax Reitz } 623191af7014SMax Reitz 623291af7014SMax Reitz /* This BDS's file name will most probably depend on its file's name, so 623391af7014SMax Reitz * refresh that first */ 623491af7014SMax Reitz if (bs->file) { 623591af7014SMax Reitz bdrv_refresh_filename(bs->file); 623691af7014SMax Reitz } 623791af7014SMax Reitz 623891af7014SMax Reitz if (drv->bdrv_refresh_filename) { 623991af7014SMax Reitz /* Obsolete information is of no use here, so drop the old file name 624091af7014SMax Reitz * information before refreshing it */ 624191af7014SMax Reitz bs->exact_filename[0] = '\0'; 624291af7014SMax Reitz if (bs->full_open_options) { 624391af7014SMax Reitz QDECREF(bs->full_open_options); 624491af7014SMax Reitz bs->full_open_options = NULL; 624591af7014SMax Reitz } 624691af7014SMax Reitz 624791af7014SMax Reitz drv->bdrv_refresh_filename(bs); 624891af7014SMax Reitz } else if (bs->file) { 624991af7014SMax Reitz /* Try to reconstruct valid information from the underlying file */ 625091af7014SMax Reitz bool has_open_options; 625191af7014SMax Reitz 625291af7014SMax Reitz bs->exact_filename[0] = '\0'; 625391af7014SMax Reitz if (bs->full_open_options) { 625491af7014SMax Reitz QDECREF(bs->full_open_options); 625591af7014SMax Reitz bs->full_open_options = NULL; 625691af7014SMax Reitz } 625791af7014SMax Reitz 625891af7014SMax Reitz opts = qdict_new(); 625991af7014SMax Reitz has_open_options = append_open_options(opts, bs); 626091af7014SMax Reitz 626191af7014SMax Reitz /* If no specific options have been given for this BDS, the filename of 626291af7014SMax Reitz * the underlying file should suffice for this one as well */ 626391af7014SMax Reitz if (bs->file->exact_filename[0] && !has_open_options) { 626491af7014SMax Reitz strcpy(bs->exact_filename, bs->file->exact_filename); 626591af7014SMax Reitz } 626691af7014SMax Reitz /* Reconstructing the full options QDict is simple for most format block 626791af7014SMax Reitz * drivers, as long as the full options are known for the underlying 626891af7014SMax Reitz * file BDS. The full options QDict of that file BDS should somehow 626991af7014SMax Reitz * contain a representation of the filename, therefore the following 627091af7014SMax Reitz * suffices without querying the (exact_)filename of this BDS. */ 627191af7014SMax Reitz if (bs->file->full_open_options) { 627291af7014SMax Reitz qdict_put_obj(opts, "driver", 627391af7014SMax Reitz QOBJECT(qstring_from_str(drv->format_name))); 627491af7014SMax Reitz QINCREF(bs->file->full_open_options); 627591af7014SMax Reitz qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options)); 627691af7014SMax Reitz 627791af7014SMax Reitz bs->full_open_options = opts; 627891af7014SMax Reitz } else { 627991af7014SMax Reitz QDECREF(opts); 628091af7014SMax Reitz } 628191af7014SMax Reitz } else if (!bs->full_open_options && qdict_size(bs->options)) { 628291af7014SMax Reitz /* There is no underlying file BDS (at least referenced by BDS.file), 628391af7014SMax Reitz * so the full options QDict should be equal to the options given 628491af7014SMax Reitz * specifically for this block device when it was opened (plus the 628591af7014SMax Reitz * driver specification). 628691af7014SMax Reitz * Because those options don't change, there is no need to update 628791af7014SMax Reitz * full_open_options when it's already set. */ 628891af7014SMax Reitz 628991af7014SMax Reitz opts = qdict_new(); 629091af7014SMax Reitz append_open_options(opts, bs); 629191af7014SMax Reitz qdict_put_obj(opts, "driver", 629291af7014SMax Reitz QOBJECT(qstring_from_str(drv->format_name))); 629391af7014SMax Reitz 629491af7014SMax Reitz if (bs->exact_filename[0]) { 629591af7014SMax Reitz /* This may not work for all block protocol drivers (some may 629691af7014SMax Reitz * require this filename to be parsed), but we have to find some 629791af7014SMax Reitz * default solution here, so just include it. If some block driver 629891af7014SMax Reitz * does not support pure options without any filename at all or 629991af7014SMax Reitz * needs some special format of the options QDict, it needs to 630091af7014SMax Reitz * implement the driver-specific bdrv_refresh_filename() function. 630191af7014SMax Reitz */ 630291af7014SMax Reitz qdict_put_obj(opts, "filename", 630391af7014SMax Reitz QOBJECT(qstring_from_str(bs->exact_filename))); 630491af7014SMax Reitz } 630591af7014SMax Reitz 630691af7014SMax Reitz bs->full_open_options = opts; 630791af7014SMax Reitz } 630891af7014SMax Reitz 630991af7014SMax Reitz if (bs->exact_filename[0]) { 631091af7014SMax Reitz pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 631191af7014SMax Reitz } else if (bs->full_open_options) { 631291af7014SMax Reitz QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 631391af7014SMax Reitz snprintf(bs->filename, sizeof(bs->filename), "json:%s", 631491af7014SMax Reitz qstring_get_str(json)); 631591af7014SMax Reitz QDECREF(json); 631691af7014SMax Reitz } 631791af7014SMax Reitz } 63185366d0c8SBenoît Canet 63195366d0c8SBenoît Canet /* This accessor function purpose is to allow the device models to access the 63205366d0c8SBenoît Canet * BlockAcctStats structure embedded inside a BlockDriverState without being 63215366d0c8SBenoît Canet * aware of the BlockDriverState structure layout. 63225366d0c8SBenoît Canet * It will go away when the BlockAcctStats structure will be moved inside 63235366d0c8SBenoît Canet * the device models. 63245366d0c8SBenoît Canet */ 63255366d0c8SBenoît Canet BlockAcctStats *bdrv_get_stats(BlockDriverState *bs) 63265366d0c8SBenoît Canet { 63275366d0c8SBenoît Canet return &bs->stats; 63285366d0c8SBenoît Canet } 6329