1fc01f7e7Sbellard /* 2fc01f7e7Sbellard * QEMU System Emulator block driver 3fc01f7e7Sbellard * 4fc01f7e7Sbellard * Copyright (c) 2003 Fabrice Bellard 5fc01f7e7Sbellard * 6fc01f7e7Sbellard * Permission is hereby granted, free of charge, to any person obtaining a copy 7fc01f7e7Sbellard * of this software and associated documentation files (the "Software"), to deal 8fc01f7e7Sbellard * in the Software without restriction, including without limitation the rights 9fc01f7e7Sbellard * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10fc01f7e7Sbellard * copies of the Software, and to permit persons to whom the Software is 11fc01f7e7Sbellard * furnished to do so, subject to the following conditions: 12fc01f7e7Sbellard * 13fc01f7e7Sbellard * The above copyright notice and this permission notice shall be included in 14fc01f7e7Sbellard * all copies or substantial portions of the Software. 15fc01f7e7Sbellard * 16fc01f7e7Sbellard * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17fc01f7e7Sbellard * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18fc01f7e7Sbellard * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19fc01f7e7Sbellard * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20fc01f7e7Sbellard * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21fc01f7e7Sbellard * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22fc01f7e7Sbellard * THE SOFTWARE. 23fc01f7e7Sbellard */ 243990d09aSblueswir1 #include "config-host.h" 25faf07963Spbrook #include "qemu-common.h" 266d519a5fSStefan Hajnoczi #include "trace.h" 27737e150eSPaolo Bonzini #include "block/block_int.h" 28737e150eSPaolo Bonzini #include "block/blockjob.h" 291de7afc9SPaolo Bonzini #include "qemu/module.h" 307b1b5d19SPaolo Bonzini #include "qapi/qmp/qjson.h" 31bfb197e0SMarkus Armbruster #include "sysemu/block-backend.h" 329c17d615SPaolo Bonzini #include "sysemu/sysemu.h" 33de50a20aSFam Zheng #include "sysemu/qtest.h" 341de7afc9SPaolo Bonzini #include "qemu/notify.h" 35737e150eSPaolo Bonzini #include "block/coroutine.h" 36c13163fbSBenoît Canet #include "block/qapi.h" 37b2023818SLuiz Capitulino #include "qmp-commands.h" 381de7afc9SPaolo Bonzini #include "qemu/timer.h" 39a5ee7bd4SWenchao Xia #include "qapi-event.h" 40fc01f7e7Sbellard 4171e72a19SJuan Quintela #ifdef CONFIG_BSD 427674e7bfSbellard #include <sys/types.h> 437674e7bfSbellard #include <sys/stat.h> 447674e7bfSbellard #include <sys/ioctl.h> 4572cf2d4fSBlue Swirl #include <sys/queue.h> 46c5e97233Sblueswir1 #ifndef __DragonFly__ 477674e7bfSbellard #include <sys/disk.h> 487674e7bfSbellard #endif 49c5e97233Sblueswir1 #endif 507674e7bfSbellard 5149dc768dSaliguori #ifdef _WIN32 5249dc768dSaliguori #include <windows.h> 5349dc768dSaliguori #endif 5449dc768dSaliguori 559bd2b08fSJohn Snow /** 569bd2b08fSJohn Snow * A BdrvDirtyBitmap can be in three possible states: 579bd2b08fSJohn Snow * (1) successor is NULL and disabled is false: full r/w mode 589bd2b08fSJohn Snow * (2) successor is NULL and disabled is true: read only mode ("disabled") 599bd2b08fSJohn Snow * (3) successor is set: frozen mode. 609bd2b08fSJohn Snow * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, 619bd2b08fSJohn Snow * or enabled. A frozen bitmap can only abdicate() or reclaim(). 629bd2b08fSJohn Snow */ 63e4654d2dSFam Zheng struct BdrvDirtyBitmap { 64aa0c7ca5SJohn Snow HBitmap *bitmap; /* Dirty sector bitmap implementation */ 65aa0c7ca5SJohn Snow BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ 66aa0c7ca5SJohn Snow char *name; /* Optional non-empty unique ID */ 67aa0c7ca5SJohn Snow int64_t size; /* Size of the bitmap (Number of sectors) */ 68aa0c7ca5SJohn Snow bool disabled; /* Bitmap is read-only */ 69e4654d2dSFam Zheng QLIST_ENTRY(BdrvDirtyBitmap) list; 70e4654d2dSFam Zheng }; 71e4654d2dSFam Zheng 721c9805a3SStefan Hajnoczi #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ 731c9805a3SStefan Hajnoczi 747c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 75f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 76097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 777c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 78f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 79097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque); 80f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 81f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 82f9f05dc5SKevin Wolf QEMUIOVector *iov); 83f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 84f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 85f9f05dc5SKevin Wolf QEMUIOVector *iov); 86775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, 87775aa8b6SKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 88470c0504SStefan Hajnoczi BdrvRequestFlags flags); 89775aa8b6SKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, 90775aa8b6SKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 91f08f2ddaSStefan Hajnoczi BdrvRequestFlags flags); 927c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 93b2a61371SStefan Hajnoczi int64_t sector_num, 94b2a61371SStefan Hajnoczi QEMUIOVector *qiov, 95b2a61371SStefan Hajnoczi int nb_sectors, 96d20d9b7cSPaolo Bonzini BdrvRequestFlags flags, 97097310b5SMarkus Armbruster BlockCompletionFunc *cb, 98b2a61371SStefan Hajnoczi void *opaque, 998c5873d6SStefan Hajnoczi bool is_write); 100b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque); 101621f0589SKevin Wolf static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, 102aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); 103ec530c81Sbellard 1041b7bdbc1SStefan Hajnoczi static QTAILQ_HEAD(, BlockDriverState) bdrv_states = 1051b7bdbc1SStefan Hajnoczi QTAILQ_HEAD_INITIALIZER(bdrv_states); 1067ee930d0Sblueswir1 107dc364f4cSBenoît Canet static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = 108dc364f4cSBenoît Canet QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); 109dc364f4cSBenoît Canet 1108a22f02aSStefan Hajnoczi static QLIST_HEAD(, BlockDriver) bdrv_drivers = 1118a22f02aSStefan Hajnoczi QLIST_HEAD_INITIALIZER(bdrv_drivers); 112ea2384d3Sbellard 113c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 114c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors); 115c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 116c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors); 117*ce1ffea8SJohn Snow static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); 118eb852011SMarkus Armbruster /* If non-zero, use only whitelisted block drivers */ 119eb852011SMarkus Armbruster static int use_bdrv_whitelist; 120eb852011SMarkus Armbruster 1219e0b22f4SStefan Hajnoczi #ifdef _WIN32 1229e0b22f4SStefan Hajnoczi static int is_windows_drive_prefix(const char *filename) 1239e0b22f4SStefan Hajnoczi { 1249e0b22f4SStefan Hajnoczi return (((filename[0] >= 'a' && filename[0] <= 'z') || 1259e0b22f4SStefan Hajnoczi (filename[0] >= 'A' && filename[0] <= 'Z')) && 1269e0b22f4SStefan Hajnoczi filename[1] == ':'); 1279e0b22f4SStefan Hajnoczi } 1289e0b22f4SStefan Hajnoczi 1299e0b22f4SStefan Hajnoczi int is_windows_drive(const char *filename) 1309e0b22f4SStefan Hajnoczi { 1319e0b22f4SStefan Hajnoczi if (is_windows_drive_prefix(filename) && 1329e0b22f4SStefan Hajnoczi filename[2] == '\0') 1339e0b22f4SStefan Hajnoczi return 1; 1349e0b22f4SStefan Hajnoczi if (strstart(filename, "\\\\.\\", NULL) || 1359e0b22f4SStefan Hajnoczi strstart(filename, "//./", NULL)) 1369e0b22f4SStefan Hajnoczi return 1; 1379e0b22f4SStefan Hajnoczi return 0; 1389e0b22f4SStefan Hajnoczi } 1399e0b22f4SStefan Hajnoczi #endif 1409e0b22f4SStefan Hajnoczi 1410563e191SZhi Yong Wu /* throttling disk I/O limits */ 142cc0681c4SBenoît Canet void bdrv_set_io_limits(BlockDriverState *bs, 143cc0681c4SBenoît Canet ThrottleConfig *cfg) 144cc0681c4SBenoît Canet { 145cc0681c4SBenoît Canet int i; 146cc0681c4SBenoît Canet 147cc0681c4SBenoît Canet throttle_config(&bs->throttle_state, cfg); 148cc0681c4SBenoît Canet 149cc0681c4SBenoît Canet for (i = 0; i < 2; i++) { 150cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[i]); 151cc0681c4SBenoît Canet } 152cc0681c4SBenoît Canet } 153cc0681c4SBenoît Canet 154cc0681c4SBenoît Canet /* this function drain all the throttled IOs */ 155cc0681c4SBenoît Canet static bool bdrv_start_throttled_reqs(BlockDriverState *bs) 156cc0681c4SBenoît Canet { 157cc0681c4SBenoît Canet bool drained = false; 158cc0681c4SBenoît Canet bool enabled = bs->io_limits_enabled; 159cc0681c4SBenoît Canet int i; 160cc0681c4SBenoît Canet 161cc0681c4SBenoît Canet bs->io_limits_enabled = false; 162cc0681c4SBenoît Canet 163cc0681c4SBenoît Canet for (i = 0; i < 2; i++) { 164cc0681c4SBenoît Canet while (qemu_co_enter_next(&bs->throttled_reqs[i])) { 165cc0681c4SBenoît Canet drained = true; 166cc0681c4SBenoît Canet } 167cc0681c4SBenoît Canet } 168cc0681c4SBenoît Canet 169cc0681c4SBenoît Canet bs->io_limits_enabled = enabled; 170cc0681c4SBenoît Canet 171cc0681c4SBenoît Canet return drained; 172cc0681c4SBenoît Canet } 173cc0681c4SBenoît Canet 17498f90dbaSZhi Yong Wu void bdrv_io_limits_disable(BlockDriverState *bs) 17598f90dbaSZhi Yong Wu { 17698f90dbaSZhi Yong Wu bs->io_limits_enabled = false; 17798f90dbaSZhi Yong Wu 178cc0681c4SBenoît Canet bdrv_start_throttled_reqs(bs); 17998f90dbaSZhi Yong Wu 180cc0681c4SBenoît Canet throttle_destroy(&bs->throttle_state); 18198f90dbaSZhi Yong Wu } 18298f90dbaSZhi Yong Wu 183cc0681c4SBenoît Canet static void bdrv_throttle_read_timer_cb(void *opaque) 1840563e191SZhi Yong Wu { 1850563e191SZhi Yong Wu BlockDriverState *bs = opaque; 186cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[0]); 1870563e191SZhi Yong Wu } 1880563e191SZhi Yong Wu 189cc0681c4SBenoît Canet static void bdrv_throttle_write_timer_cb(void *opaque) 190cc0681c4SBenoît Canet { 191cc0681c4SBenoît Canet BlockDriverState *bs = opaque; 192cc0681c4SBenoît Canet qemu_co_enter_next(&bs->throttled_reqs[1]); 193cc0681c4SBenoît Canet } 194cc0681c4SBenoît Canet 195cc0681c4SBenoît Canet /* should be called before bdrv_set_io_limits if a limit is set */ 1960563e191SZhi Yong Wu void bdrv_io_limits_enable(BlockDriverState *bs) 1970563e191SZhi Yong Wu { 198de50a20aSFam Zheng int clock_type = QEMU_CLOCK_REALTIME; 199de50a20aSFam Zheng 200de50a20aSFam Zheng if (qtest_enabled()) { 201de50a20aSFam Zheng /* For testing block IO throttling only */ 202de50a20aSFam Zheng clock_type = QEMU_CLOCK_VIRTUAL; 203de50a20aSFam Zheng } 204cc0681c4SBenoît Canet assert(!bs->io_limits_enabled); 205cc0681c4SBenoît Canet throttle_init(&bs->throttle_state, 20613af91ebSStefan Hajnoczi bdrv_get_aio_context(bs), 207de50a20aSFam Zheng clock_type, 208cc0681c4SBenoît Canet bdrv_throttle_read_timer_cb, 209cc0681c4SBenoît Canet bdrv_throttle_write_timer_cb, 210cc0681c4SBenoît Canet bs); 2110563e191SZhi Yong Wu bs->io_limits_enabled = true; 2120563e191SZhi Yong Wu } 2130563e191SZhi Yong Wu 214cc0681c4SBenoît Canet /* This function makes an IO wait if needed 215cc0681c4SBenoît Canet * 216cc0681c4SBenoît Canet * @nb_sectors: the number of sectors of the IO 217cc0681c4SBenoît Canet * @is_write: is the IO a write 21898f90dbaSZhi Yong Wu */ 219cc0681c4SBenoît Canet static void bdrv_io_limits_intercept(BlockDriverState *bs, 220d5103588SKevin Wolf unsigned int bytes, 221cc0681c4SBenoît Canet bool is_write) 222cc0681c4SBenoît Canet { 223cc0681c4SBenoît Canet /* does this io must wait */ 224cc0681c4SBenoît Canet bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); 22598f90dbaSZhi Yong Wu 226cc0681c4SBenoît Canet /* if must wait or any request of this type throttled queue the IO */ 227cc0681c4SBenoît Canet if (must_wait || 228cc0681c4SBenoît Canet !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { 229cc0681c4SBenoît Canet qemu_co_queue_wait(&bs->throttled_reqs[is_write]); 23098f90dbaSZhi Yong Wu } 23198f90dbaSZhi Yong Wu 232cc0681c4SBenoît Canet /* the IO will be executed, do the accounting */ 233d5103588SKevin Wolf throttle_account(&bs->throttle_state, is_write, bytes); 234d5103588SKevin Wolf 235cc0681c4SBenoît Canet 236cc0681c4SBenoît Canet /* if the next request must wait -> do nothing */ 237cc0681c4SBenoît Canet if (throttle_schedule_timer(&bs->throttle_state, is_write)) { 238cc0681c4SBenoît Canet return; 239cc0681c4SBenoît Canet } 240cc0681c4SBenoît Canet 241cc0681c4SBenoît Canet /* else queue next request for execution */ 242cc0681c4SBenoît Canet qemu_co_queue_next(&bs->throttled_reqs[is_write]); 24398f90dbaSZhi Yong Wu } 24498f90dbaSZhi Yong Wu 245339064d5SKevin Wolf size_t bdrv_opt_mem_align(BlockDriverState *bs) 246339064d5SKevin Wolf { 247339064d5SKevin Wolf if (!bs || !bs->drv) { 248339064d5SKevin Wolf /* 4k should be on the safe side */ 249339064d5SKevin Wolf return 4096; 250339064d5SKevin Wolf } 251339064d5SKevin Wolf 252339064d5SKevin Wolf return bs->bl.opt_mem_alignment; 253339064d5SKevin Wolf } 254339064d5SKevin Wolf 2559e0b22f4SStefan Hajnoczi /* check if the path starts with "<protocol>:" */ 2565c98415bSMax Reitz int path_has_protocol(const char *path) 2579e0b22f4SStefan Hajnoczi { 258947995c0SPaolo Bonzini const char *p; 259947995c0SPaolo Bonzini 2609e0b22f4SStefan Hajnoczi #ifdef _WIN32 2619e0b22f4SStefan Hajnoczi if (is_windows_drive(path) || 2629e0b22f4SStefan Hajnoczi is_windows_drive_prefix(path)) { 2639e0b22f4SStefan Hajnoczi return 0; 2649e0b22f4SStefan Hajnoczi } 265947995c0SPaolo Bonzini p = path + strcspn(path, ":/\\"); 266947995c0SPaolo Bonzini #else 267947995c0SPaolo Bonzini p = path + strcspn(path, ":/"); 2689e0b22f4SStefan Hajnoczi #endif 2699e0b22f4SStefan Hajnoczi 270947995c0SPaolo Bonzini return *p == ':'; 2719e0b22f4SStefan Hajnoczi } 2729e0b22f4SStefan Hajnoczi 27383f64091Sbellard int path_is_absolute(const char *path) 27483f64091Sbellard { 27521664424Sbellard #ifdef _WIN32 27621664424Sbellard /* specific case for names like: "\\.\d:" */ 277f53f4da9SPaolo Bonzini if (is_windows_drive(path) || is_windows_drive_prefix(path)) { 27821664424Sbellard return 1; 279f53f4da9SPaolo Bonzini } 280f53f4da9SPaolo Bonzini return (*path == '/' || *path == '\\'); 2813b9f94e1Sbellard #else 282f53f4da9SPaolo Bonzini return (*path == '/'); 2833b9f94e1Sbellard #endif 28483f64091Sbellard } 28583f64091Sbellard 28683f64091Sbellard /* if filename is absolute, just copy it to dest. Otherwise, build a 28783f64091Sbellard path to it by considering it is relative to base_path. URL are 28883f64091Sbellard supported. */ 28983f64091Sbellard void path_combine(char *dest, int dest_size, 29083f64091Sbellard const char *base_path, 29183f64091Sbellard const char *filename) 29283f64091Sbellard { 29383f64091Sbellard const char *p, *p1; 29483f64091Sbellard int len; 29583f64091Sbellard 29683f64091Sbellard if (dest_size <= 0) 29783f64091Sbellard return; 29883f64091Sbellard if (path_is_absolute(filename)) { 29983f64091Sbellard pstrcpy(dest, dest_size, filename); 30083f64091Sbellard } else { 30183f64091Sbellard p = strchr(base_path, ':'); 30283f64091Sbellard if (p) 30383f64091Sbellard p++; 30483f64091Sbellard else 30583f64091Sbellard p = base_path; 3063b9f94e1Sbellard p1 = strrchr(base_path, '/'); 3073b9f94e1Sbellard #ifdef _WIN32 3083b9f94e1Sbellard { 3093b9f94e1Sbellard const char *p2; 3103b9f94e1Sbellard p2 = strrchr(base_path, '\\'); 3113b9f94e1Sbellard if (!p1 || p2 > p1) 3123b9f94e1Sbellard p1 = p2; 3133b9f94e1Sbellard } 3143b9f94e1Sbellard #endif 31583f64091Sbellard if (p1) 31683f64091Sbellard p1++; 31783f64091Sbellard else 31883f64091Sbellard p1 = base_path; 31983f64091Sbellard if (p1 > p) 32083f64091Sbellard p = p1; 32183f64091Sbellard len = p - base_path; 32283f64091Sbellard if (len > dest_size - 1) 32383f64091Sbellard len = dest_size - 1; 32483f64091Sbellard memcpy(dest, base_path, len); 32583f64091Sbellard dest[len] = '\0'; 32683f64091Sbellard pstrcat(dest, dest_size, filename); 32783f64091Sbellard } 32883f64091Sbellard } 32983f64091Sbellard 3300a82855aSMax Reitz void bdrv_get_full_backing_filename_from_filename(const char *backed, 3310a82855aSMax Reitz const char *backing, 3329f07429eSMax Reitz char *dest, size_t sz, 3339f07429eSMax Reitz Error **errp) 3340a82855aSMax Reitz { 3359f07429eSMax Reitz if (backing[0] == '\0' || path_has_protocol(backing) || 3369f07429eSMax Reitz path_is_absolute(backing)) 3379f07429eSMax Reitz { 3380a82855aSMax Reitz pstrcpy(dest, sz, backing); 3399f07429eSMax Reitz } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) { 3409f07429eSMax Reitz error_setg(errp, "Cannot use relative backing file names for '%s'", 3419f07429eSMax Reitz backed); 3420a82855aSMax Reitz } else { 3430a82855aSMax Reitz path_combine(dest, sz, backed, backing); 3440a82855aSMax Reitz } 3450a82855aSMax Reitz } 3460a82855aSMax Reitz 3479f07429eSMax Reitz void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, 3489f07429eSMax Reitz Error **errp) 349dc5a1371SPaolo Bonzini { 3509f07429eSMax Reitz char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename; 3519f07429eSMax Reitz 3529f07429eSMax Reitz bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file, 3539f07429eSMax Reitz dest, sz, errp); 354dc5a1371SPaolo Bonzini } 355dc5a1371SPaolo Bonzini 3565efa9d5aSAnthony Liguori void bdrv_register(BlockDriver *bdrv) 357ea2384d3Sbellard { 3588c5873d6SStefan Hajnoczi /* Block drivers without coroutine functions need emulation */ 3598c5873d6SStefan Hajnoczi if (!bdrv->bdrv_co_readv) { 360f9f05dc5SKevin Wolf bdrv->bdrv_co_readv = bdrv_co_readv_em; 361f9f05dc5SKevin Wolf bdrv->bdrv_co_writev = bdrv_co_writev_em; 362f9f05dc5SKevin Wolf 363f8c35c1dSStefan Hajnoczi /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if 364f8c35c1dSStefan Hajnoczi * the block driver lacks aio we need to emulate that too. 365f8c35c1dSStefan Hajnoczi */ 366f9f05dc5SKevin Wolf if (!bdrv->bdrv_aio_readv) { 36783f64091Sbellard /* add AIO emulation layer */ 368f141eafeSaliguori bdrv->bdrv_aio_readv = bdrv_aio_readv_em; 369f141eafeSaliguori bdrv->bdrv_aio_writev = bdrv_aio_writev_em; 37083f64091Sbellard } 371f9f05dc5SKevin Wolf } 372b2e12bc6SChristoph Hellwig 3738a22f02aSStefan Hajnoczi QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); 374ea2384d3Sbellard } 375b338082bSbellard 3767f06d47eSMarkus Armbruster BlockDriverState *bdrv_new_root(void) 377fc01f7e7Sbellard { 3787f06d47eSMarkus Armbruster BlockDriverState *bs = bdrv_new(); 379e4e9986bSMarkus Armbruster 380e4e9986bSMarkus Armbruster QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); 381e4e9986bSMarkus Armbruster return bs; 382e4e9986bSMarkus Armbruster } 383e4e9986bSMarkus Armbruster 384e4e9986bSMarkus Armbruster BlockDriverState *bdrv_new(void) 385e4e9986bSMarkus Armbruster { 386e4e9986bSMarkus Armbruster BlockDriverState *bs; 387e4e9986bSMarkus Armbruster int i; 388e4e9986bSMarkus Armbruster 3895839e53bSMarkus Armbruster bs = g_new0(BlockDriverState, 1); 390e4654d2dSFam Zheng QLIST_INIT(&bs->dirty_bitmaps); 391fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 392fbe40ff7SFam Zheng QLIST_INIT(&bs->op_blockers[i]); 393fbe40ff7SFam Zheng } 39428a7282aSLuiz Capitulino bdrv_iostatus_disable(bs); 395d7d512f6SPaolo Bonzini notifier_list_init(&bs->close_notifiers); 396d616b224SStefan Hajnoczi notifier_with_return_list_init(&bs->before_write_notifiers); 397cc0681c4SBenoît Canet qemu_co_queue_init(&bs->throttled_reqs[0]); 398cc0681c4SBenoît Canet qemu_co_queue_init(&bs->throttled_reqs[1]); 3999fcb0251SFam Zheng bs->refcnt = 1; 400dcd04228SStefan Hajnoczi bs->aio_context = qemu_get_aio_context(); 401d7d512f6SPaolo Bonzini 402b338082bSbellard return bs; 403b338082bSbellard } 404b338082bSbellard 405d7d512f6SPaolo Bonzini void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify) 406d7d512f6SPaolo Bonzini { 407d7d512f6SPaolo Bonzini notifier_list_add(&bs->close_notifiers, notify); 408d7d512f6SPaolo Bonzini } 409d7d512f6SPaolo Bonzini 410ea2384d3Sbellard BlockDriver *bdrv_find_format(const char *format_name) 411ea2384d3Sbellard { 412ea2384d3Sbellard BlockDriver *drv1; 4138a22f02aSStefan Hajnoczi QLIST_FOREACH(drv1, &bdrv_drivers, list) { 4148a22f02aSStefan Hajnoczi if (!strcmp(drv1->format_name, format_name)) { 415ea2384d3Sbellard return drv1; 416ea2384d3Sbellard } 4178a22f02aSStefan Hajnoczi } 418ea2384d3Sbellard return NULL; 419ea2384d3Sbellard } 420ea2384d3Sbellard 421b64ec4e4SFam Zheng static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) 422eb852011SMarkus Armbruster { 423b64ec4e4SFam Zheng static const char *whitelist_rw[] = { 424b64ec4e4SFam Zheng CONFIG_BDRV_RW_WHITELIST 425b64ec4e4SFam Zheng }; 426b64ec4e4SFam Zheng static const char *whitelist_ro[] = { 427b64ec4e4SFam Zheng CONFIG_BDRV_RO_WHITELIST 428eb852011SMarkus Armbruster }; 429eb852011SMarkus Armbruster const char **p; 430eb852011SMarkus Armbruster 431b64ec4e4SFam Zheng if (!whitelist_rw[0] && !whitelist_ro[0]) { 432eb852011SMarkus Armbruster return 1; /* no whitelist, anything goes */ 433b64ec4e4SFam Zheng } 434eb852011SMarkus Armbruster 435b64ec4e4SFam Zheng for (p = whitelist_rw; *p; p++) { 436eb852011SMarkus Armbruster if (!strcmp(drv->format_name, *p)) { 437eb852011SMarkus Armbruster return 1; 438eb852011SMarkus Armbruster } 439eb852011SMarkus Armbruster } 440b64ec4e4SFam Zheng if (read_only) { 441b64ec4e4SFam Zheng for (p = whitelist_ro; *p; p++) { 442b64ec4e4SFam Zheng if (!strcmp(drv->format_name, *p)) { 443b64ec4e4SFam Zheng return 1; 444b64ec4e4SFam Zheng } 445b64ec4e4SFam Zheng } 446b64ec4e4SFam Zheng } 447eb852011SMarkus Armbruster return 0; 448eb852011SMarkus Armbruster } 449eb852011SMarkus Armbruster 450b64ec4e4SFam Zheng BlockDriver *bdrv_find_whitelisted_format(const char *format_name, 451b64ec4e4SFam Zheng bool read_only) 452eb852011SMarkus Armbruster { 453eb852011SMarkus Armbruster BlockDriver *drv = bdrv_find_format(format_name); 454b64ec4e4SFam Zheng return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL; 455eb852011SMarkus Armbruster } 456eb852011SMarkus Armbruster 4575b7e1542SZhi Yong Wu typedef struct CreateCo { 4585b7e1542SZhi Yong Wu BlockDriver *drv; 4595b7e1542SZhi Yong Wu char *filename; 46083d0521aSChunyan Liu QemuOpts *opts; 4615b7e1542SZhi Yong Wu int ret; 462cc84d90fSMax Reitz Error *err; 4635b7e1542SZhi Yong Wu } CreateCo; 4645b7e1542SZhi Yong Wu 4655b7e1542SZhi Yong Wu static void coroutine_fn bdrv_create_co_entry(void *opaque) 4665b7e1542SZhi Yong Wu { 467cc84d90fSMax Reitz Error *local_err = NULL; 468cc84d90fSMax Reitz int ret; 469cc84d90fSMax Reitz 4705b7e1542SZhi Yong Wu CreateCo *cco = opaque; 4715b7e1542SZhi Yong Wu assert(cco->drv); 4725b7e1542SZhi Yong Wu 473c282e1fdSChunyan Liu ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); 47484d18f06SMarkus Armbruster if (local_err) { 475cc84d90fSMax Reitz error_propagate(&cco->err, local_err); 476cc84d90fSMax Reitz } 477cc84d90fSMax Reitz cco->ret = ret; 4785b7e1542SZhi Yong Wu } 4795b7e1542SZhi Yong Wu 4800e7e1989SKevin Wolf int bdrv_create(BlockDriver *drv, const char* filename, 48183d0521aSChunyan Liu QemuOpts *opts, Error **errp) 482ea2384d3Sbellard { 4835b7e1542SZhi Yong Wu int ret; 4840e7e1989SKevin Wolf 4855b7e1542SZhi Yong Wu Coroutine *co; 4865b7e1542SZhi Yong Wu CreateCo cco = { 4875b7e1542SZhi Yong Wu .drv = drv, 4885b7e1542SZhi Yong Wu .filename = g_strdup(filename), 48983d0521aSChunyan Liu .opts = opts, 4905b7e1542SZhi Yong Wu .ret = NOT_DONE, 491cc84d90fSMax Reitz .err = NULL, 4925b7e1542SZhi Yong Wu }; 4935b7e1542SZhi Yong Wu 494c282e1fdSChunyan Liu if (!drv->bdrv_create) { 495cc84d90fSMax Reitz error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); 49680168bffSLuiz Capitulino ret = -ENOTSUP; 49780168bffSLuiz Capitulino goto out; 4985b7e1542SZhi Yong Wu } 4995b7e1542SZhi Yong Wu 5005b7e1542SZhi Yong Wu if (qemu_in_coroutine()) { 5015b7e1542SZhi Yong Wu /* Fast-path if already in coroutine context */ 5025b7e1542SZhi Yong Wu bdrv_create_co_entry(&cco); 5035b7e1542SZhi Yong Wu } else { 5045b7e1542SZhi Yong Wu co = qemu_coroutine_create(bdrv_create_co_entry); 5055b7e1542SZhi Yong Wu qemu_coroutine_enter(co, &cco); 5065b7e1542SZhi Yong Wu while (cco.ret == NOT_DONE) { 507b47ec2c4SPaolo Bonzini aio_poll(qemu_get_aio_context(), true); 5085b7e1542SZhi Yong Wu } 5095b7e1542SZhi Yong Wu } 5105b7e1542SZhi Yong Wu 5115b7e1542SZhi Yong Wu ret = cco.ret; 512cc84d90fSMax Reitz if (ret < 0) { 51384d18f06SMarkus Armbruster if (cco.err) { 514cc84d90fSMax Reitz error_propagate(errp, cco.err); 515cc84d90fSMax Reitz } else { 516cc84d90fSMax Reitz error_setg_errno(errp, -ret, "Could not create image"); 517cc84d90fSMax Reitz } 518cc84d90fSMax Reitz } 5195b7e1542SZhi Yong Wu 52080168bffSLuiz Capitulino out: 52180168bffSLuiz Capitulino g_free(cco.filename); 5225b7e1542SZhi Yong Wu return ret; 523ea2384d3Sbellard } 524ea2384d3Sbellard 525c282e1fdSChunyan Liu int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) 52684a12e66SChristoph Hellwig { 52784a12e66SChristoph Hellwig BlockDriver *drv; 528cc84d90fSMax Reitz Error *local_err = NULL; 529cc84d90fSMax Reitz int ret; 53084a12e66SChristoph Hellwig 531b65a5e12SMax Reitz drv = bdrv_find_protocol(filename, true, errp); 53284a12e66SChristoph Hellwig if (drv == NULL) { 53316905d71SStefan Hajnoczi return -ENOENT; 53484a12e66SChristoph Hellwig } 53584a12e66SChristoph Hellwig 536c282e1fdSChunyan Liu ret = bdrv_create(drv, filename, opts, &local_err); 53784d18f06SMarkus Armbruster if (local_err) { 538cc84d90fSMax Reitz error_propagate(errp, local_err); 539cc84d90fSMax Reitz } 540cc84d90fSMax Reitz return ret; 54184a12e66SChristoph Hellwig } 54284a12e66SChristoph Hellwig 5433baca891SKevin Wolf void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) 544d34682cdSKevin Wolf { 545d34682cdSKevin Wolf BlockDriver *drv = bs->drv; 5463baca891SKevin Wolf Error *local_err = NULL; 547d34682cdSKevin Wolf 548d34682cdSKevin Wolf memset(&bs->bl, 0, sizeof(bs->bl)); 549d34682cdSKevin Wolf 550466ad822SKevin Wolf if (!drv) { 5513baca891SKevin Wolf return; 552466ad822SKevin Wolf } 553466ad822SKevin Wolf 554466ad822SKevin Wolf /* Take some limits from the children as a default */ 555466ad822SKevin Wolf if (bs->file) { 5563baca891SKevin Wolf bdrv_refresh_limits(bs->file, &local_err); 5573baca891SKevin Wolf if (local_err) { 5583baca891SKevin Wolf error_propagate(errp, local_err); 5593baca891SKevin Wolf return; 5603baca891SKevin Wolf } 561466ad822SKevin Wolf bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; 5622647fab5SPeter Lieven bs->bl.max_transfer_length = bs->file->bl.max_transfer_length; 563339064d5SKevin Wolf bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; 564339064d5SKevin Wolf } else { 565339064d5SKevin Wolf bs->bl.opt_mem_alignment = 512; 566466ad822SKevin Wolf } 567466ad822SKevin Wolf 568466ad822SKevin Wolf if (bs->backing_hd) { 5693baca891SKevin Wolf bdrv_refresh_limits(bs->backing_hd, &local_err); 5703baca891SKevin Wolf if (local_err) { 5713baca891SKevin Wolf error_propagate(errp, local_err); 5723baca891SKevin Wolf return; 5733baca891SKevin Wolf } 574466ad822SKevin Wolf bs->bl.opt_transfer_length = 575466ad822SKevin Wolf MAX(bs->bl.opt_transfer_length, 576466ad822SKevin Wolf bs->backing_hd->bl.opt_transfer_length); 5772647fab5SPeter Lieven bs->bl.max_transfer_length = 5782647fab5SPeter Lieven MIN_NON_ZERO(bs->bl.max_transfer_length, 5792647fab5SPeter Lieven bs->backing_hd->bl.max_transfer_length); 580339064d5SKevin Wolf bs->bl.opt_mem_alignment = 581339064d5SKevin Wolf MAX(bs->bl.opt_mem_alignment, 582339064d5SKevin Wolf bs->backing_hd->bl.opt_mem_alignment); 583466ad822SKevin Wolf } 584466ad822SKevin Wolf 585466ad822SKevin Wolf /* Then let the driver override it */ 586466ad822SKevin Wolf if (drv->bdrv_refresh_limits) { 5873baca891SKevin Wolf drv->bdrv_refresh_limits(bs, errp); 588d34682cdSKevin Wolf } 589d34682cdSKevin Wolf } 590d34682cdSKevin Wolf 591892b7de8SEkaterina Tumanova /** 592892b7de8SEkaterina Tumanova * Try to get @bs's logical and physical block size. 593892b7de8SEkaterina Tumanova * On success, store them in @bsz struct and return 0. 594892b7de8SEkaterina Tumanova * On failure return -errno. 595892b7de8SEkaterina Tumanova * @bs must not be empty. 596892b7de8SEkaterina Tumanova */ 597892b7de8SEkaterina Tumanova int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) 598892b7de8SEkaterina Tumanova { 599892b7de8SEkaterina Tumanova BlockDriver *drv = bs->drv; 600892b7de8SEkaterina Tumanova 601892b7de8SEkaterina Tumanova if (drv && drv->bdrv_probe_blocksizes) { 602892b7de8SEkaterina Tumanova return drv->bdrv_probe_blocksizes(bs, bsz); 603892b7de8SEkaterina Tumanova } 604892b7de8SEkaterina Tumanova 605892b7de8SEkaterina Tumanova return -ENOTSUP; 606892b7de8SEkaterina Tumanova } 607892b7de8SEkaterina Tumanova 608892b7de8SEkaterina Tumanova /** 609892b7de8SEkaterina Tumanova * Try to get @bs's geometry (cyls, heads, sectors). 610892b7de8SEkaterina Tumanova * On success, store them in @geo struct and return 0. 611892b7de8SEkaterina Tumanova * On failure return -errno. 612892b7de8SEkaterina Tumanova * @bs must not be empty. 613892b7de8SEkaterina Tumanova */ 614892b7de8SEkaterina Tumanova int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) 615892b7de8SEkaterina Tumanova { 616892b7de8SEkaterina Tumanova BlockDriver *drv = bs->drv; 617892b7de8SEkaterina Tumanova 618892b7de8SEkaterina Tumanova if (drv && drv->bdrv_probe_geometry) { 619892b7de8SEkaterina Tumanova return drv->bdrv_probe_geometry(bs, geo); 620892b7de8SEkaterina Tumanova } 621892b7de8SEkaterina Tumanova 622892b7de8SEkaterina Tumanova return -ENOTSUP; 623892b7de8SEkaterina Tumanova } 624892b7de8SEkaterina Tumanova 625eba25057SJim Meyering /* 626eba25057SJim Meyering * Create a uniquely-named empty temporary file. 627eba25057SJim Meyering * Return 0 upon success, otherwise a negative errno value. 628eba25057SJim Meyering */ 629eba25057SJim Meyering int get_tmp_filename(char *filename, int size) 630eba25057SJim Meyering { 631d5249393Sbellard #ifdef _WIN32 6323b9f94e1Sbellard char temp_dir[MAX_PATH]; 633eba25057SJim Meyering /* GetTempFileName requires that its output buffer (4th param) 634eba25057SJim Meyering have length MAX_PATH or greater. */ 635eba25057SJim Meyering assert(size >= MAX_PATH); 636eba25057SJim Meyering return (GetTempPath(MAX_PATH, temp_dir) 637eba25057SJim Meyering && GetTempFileName(temp_dir, "qem", 0, filename) 638eba25057SJim Meyering ? 0 : -GetLastError()); 639d5249393Sbellard #else 640ea2384d3Sbellard int fd; 6417ccfb2ebSblueswir1 const char *tmpdir; 6420badc1eeSaurel32 tmpdir = getenv("TMPDIR"); 64369bef793SAmit Shah if (!tmpdir) { 64469bef793SAmit Shah tmpdir = "/var/tmp"; 64569bef793SAmit Shah } 646eba25057SJim Meyering if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) { 647eba25057SJim Meyering return -EOVERFLOW; 648ea2384d3Sbellard } 649eba25057SJim Meyering fd = mkstemp(filename); 650fe235a06SDunrong Huang if (fd < 0) { 651fe235a06SDunrong Huang return -errno; 652fe235a06SDunrong Huang } 653fe235a06SDunrong Huang if (close(fd) != 0) { 654fe235a06SDunrong Huang unlink(filename); 655eba25057SJim Meyering return -errno; 656eba25057SJim Meyering } 657eba25057SJim Meyering return 0; 658d5249393Sbellard #endif 659eba25057SJim Meyering } 660ea2384d3Sbellard 661f3a5d3f8SChristoph Hellwig /* 662f3a5d3f8SChristoph Hellwig * Detect host devices. By convention, /dev/cdrom[N] is always 663f3a5d3f8SChristoph Hellwig * recognized as a host CDROM. 664f3a5d3f8SChristoph Hellwig */ 665f3a5d3f8SChristoph Hellwig static BlockDriver *find_hdev_driver(const char *filename) 666f3a5d3f8SChristoph Hellwig { 667508c7cb3SChristoph Hellwig int score_max = 0, score; 668508c7cb3SChristoph Hellwig BlockDriver *drv = NULL, *d; 669f3a5d3f8SChristoph Hellwig 6708a22f02aSStefan Hajnoczi QLIST_FOREACH(d, &bdrv_drivers, list) { 671508c7cb3SChristoph Hellwig if (d->bdrv_probe_device) { 672508c7cb3SChristoph Hellwig score = d->bdrv_probe_device(filename); 673508c7cb3SChristoph Hellwig if (score > score_max) { 674508c7cb3SChristoph Hellwig score_max = score; 675508c7cb3SChristoph Hellwig drv = d; 676f3a5d3f8SChristoph Hellwig } 677508c7cb3SChristoph Hellwig } 678f3a5d3f8SChristoph Hellwig } 679f3a5d3f8SChristoph Hellwig 680508c7cb3SChristoph Hellwig return drv; 681f3a5d3f8SChristoph Hellwig } 682f3a5d3f8SChristoph Hellwig 68398289620SKevin Wolf BlockDriver *bdrv_find_protocol(const char *filename, 684b65a5e12SMax Reitz bool allow_protocol_prefix, 685b65a5e12SMax Reitz Error **errp) 68684a12e66SChristoph Hellwig { 68784a12e66SChristoph Hellwig BlockDriver *drv1; 68884a12e66SChristoph Hellwig char protocol[128]; 68984a12e66SChristoph Hellwig int len; 69084a12e66SChristoph Hellwig const char *p; 69184a12e66SChristoph Hellwig 69266f82ceeSKevin Wolf /* TODO Drivers without bdrv_file_open must be specified explicitly */ 69366f82ceeSKevin Wolf 69439508e7aSChristoph Hellwig /* 69539508e7aSChristoph Hellwig * XXX(hch): we really should not let host device detection 69639508e7aSChristoph Hellwig * override an explicit protocol specification, but moving this 69739508e7aSChristoph Hellwig * later breaks access to device names with colons in them. 69839508e7aSChristoph Hellwig * Thanks to the brain-dead persistent naming schemes on udev- 69939508e7aSChristoph Hellwig * based Linux systems those actually are quite common. 70039508e7aSChristoph Hellwig */ 70184a12e66SChristoph Hellwig drv1 = find_hdev_driver(filename); 70239508e7aSChristoph Hellwig if (drv1) { 70384a12e66SChristoph Hellwig return drv1; 70484a12e66SChristoph Hellwig } 70539508e7aSChristoph Hellwig 70698289620SKevin Wolf if (!path_has_protocol(filename) || !allow_protocol_prefix) { 707ef810437SMax Reitz return &bdrv_file; 70839508e7aSChristoph Hellwig } 70998289620SKevin Wolf 7109e0b22f4SStefan Hajnoczi p = strchr(filename, ':'); 7119e0b22f4SStefan Hajnoczi assert(p != NULL); 71284a12e66SChristoph Hellwig len = p - filename; 71384a12e66SChristoph Hellwig if (len > sizeof(protocol) - 1) 71484a12e66SChristoph Hellwig len = sizeof(protocol) - 1; 71584a12e66SChristoph Hellwig memcpy(protocol, filename, len); 71684a12e66SChristoph Hellwig protocol[len] = '\0'; 71784a12e66SChristoph Hellwig QLIST_FOREACH(drv1, &bdrv_drivers, list) { 71884a12e66SChristoph Hellwig if (drv1->protocol_name && 71984a12e66SChristoph Hellwig !strcmp(drv1->protocol_name, protocol)) { 72084a12e66SChristoph Hellwig return drv1; 72184a12e66SChristoph Hellwig } 72284a12e66SChristoph Hellwig } 723b65a5e12SMax Reitz 724b65a5e12SMax Reitz error_setg(errp, "Unknown protocol '%s'", protocol); 72584a12e66SChristoph Hellwig return NULL; 72684a12e66SChristoph Hellwig } 72784a12e66SChristoph Hellwig 728c6684249SMarkus Armbruster /* 729c6684249SMarkus Armbruster * Guess image format by probing its contents. 730c6684249SMarkus Armbruster * This is not a good idea when your image is raw (CVE-2008-2004), but 731c6684249SMarkus Armbruster * we do it anyway for backward compatibility. 732c6684249SMarkus Armbruster * 733c6684249SMarkus Armbruster * @buf contains the image's first @buf_size bytes. 7347cddd372SKevin Wolf * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE, 7357cddd372SKevin Wolf * but can be smaller if the image file is smaller) 736c6684249SMarkus Armbruster * @filename is its filename. 737c6684249SMarkus Armbruster * 738c6684249SMarkus Armbruster * For all block drivers, call the bdrv_probe() method to get its 739c6684249SMarkus Armbruster * probing score. 740c6684249SMarkus Armbruster * Return the first block driver with the highest probing score. 741c6684249SMarkus Armbruster */ 74238f3ef57SKevin Wolf BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, 743c6684249SMarkus Armbruster const char *filename) 744c6684249SMarkus Armbruster { 745c6684249SMarkus Armbruster int score_max = 0, score; 746c6684249SMarkus Armbruster BlockDriver *drv = NULL, *d; 747c6684249SMarkus Armbruster 748c6684249SMarkus Armbruster QLIST_FOREACH(d, &bdrv_drivers, list) { 749c6684249SMarkus Armbruster if (d->bdrv_probe) { 750c6684249SMarkus Armbruster score = d->bdrv_probe(buf, buf_size, filename); 751c6684249SMarkus Armbruster if (score > score_max) { 752c6684249SMarkus Armbruster score_max = score; 753c6684249SMarkus Armbruster drv = d; 754c6684249SMarkus Armbruster } 755c6684249SMarkus Armbruster } 756c6684249SMarkus Armbruster } 757c6684249SMarkus Armbruster 758c6684249SMarkus Armbruster return drv; 759c6684249SMarkus Armbruster } 760c6684249SMarkus Armbruster 761f500a6d3SKevin Wolf static int find_image_format(BlockDriverState *bs, const char *filename, 76234b5d2c6SMax Reitz BlockDriver **pdrv, Error **errp) 763ea2384d3Sbellard { 764c6684249SMarkus Armbruster BlockDriver *drv; 7657cddd372SKevin Wolf uint8_t buf[BLOCK_PROBE_BUF_SIZE]; 766f500a6d3SKevin Wolf int ret = 0; 767f8ea0b00SNicholas Bellinger 76808a00559SKevin Wolf /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ 7698e895599SPaolo Bonzini if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { 770ef810437SMax Reitz *pdrv = &bdrv_raw; 771c98ac35dSStefan Weil return ret; 7721a396859SNicholas A. Bellinger } 773f8ea0b00SNicholas Bellinger 77483f64091Sbellard ret = bdrv_pread(bs, 0, buf, sizeof(buf)); 775ea2384d3Sbellard if (ret < 0) { 77634b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not read image for determining its " 77734b5d2c6SMax Reitz "format"); 778c98ac35dSStefan Weil *pdrv = NULL; 779c98ac35dSStefan Weil return ret; 780ea2384d3Sbellard } 781ea2384d3Sbellard 782c6684249SMarkus Armbruster drv = bdrv_probe_all(buf, ret, filename); 783c98ac35dSStefan Weil if (!drv) { 78434b5d2c6SMax Reitz error_setg(errp, "Could not determine image format: No compatible " 78534b5d2c6SMax Reitz "driver found"); 786c98ac35dSStefan Weil ret = -ENOENT; 787c98ac35dSStefan Weil } 788c98ac35dSStefan Weil *pdrv = drv; 789c98ac35dSStefan Weil return ret; 790ea2384d3Sbellard } 791ea2384d3Sbellard 79251762288SStefan Hajnoczi /** 79351762288SStefan Hajnoczi * Set the current 'total_sectors' value 79465a9bb25SMarkus Armbruster * Return 0 on success, -errno on error. 79551762288SStefan Hajnoczi */ 79651762288SStefan Hajnoczi static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) 79751762288SStefan Hajnoczi { 79851762288SStefan Hajnoczi BlockDriver *drv = bs->drv; 79951762288SStefan Hajnoczi 800396759adSNicholas Bellinger /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ 801396759adSNicholas Bellinger if (bs->sg) 802396759adSNicholas Bellinger return 0; 803396759adSNicholas Bellinger 80451762288SStefan Hajnoczi /* query actual device if possible, otherwise just trust the hint */ 80551762288SStefan Hajnoczi if (drv->bdrv_getlength) { 80651762288SStefan Hajnoczi int64_t length = drv->bdrv_getlength(bs); 80751762288SStefan Hajnoczi if (length < 0) { 80851762288SStefan Hajnoczi return length; 80951762288SStefan Hajnoczi } 8107e382003SFam Zheng hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE); 81151762288SStefan Hajnoczi } 81251762288SStefan Hajnoczi 81351762288SStefan Hajnoczi bs->total_sectors = hint; 81451762288SStefan Hajnoczi return 0; 81551762288SStefan Hajnoczi } 81651762288SStefan Hajnoczi 817c3993cdcSStefan Hajnoczi /** 8189e8f1835SPaolo Bonzini * Set open flags for a given discard mode 8199e8f1835SPaolo Bonzini * 8209e8f1835SPaolo Bonzini * Return 0 on success, -1 if the discard mode was invalid. 8219e8f1835SPaolo Bonzini */ 8229e8f1835SPaolo Bonzini int bdrv_parse_discard_flags(const char *mode, int *flags) 8239e8f1835SPaolo Bonzini { 8249e8f1835SPaolo Bonzini *flags &= ~BDRV_O_UNMAP; 8259e8f1835SPaolo Bonzini 8269e8f1835SPaolo Bonzini if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) { 8279e8f1835SPaolo Bonzini /* do nothing */ 8289e8f1835SPaolo Bonzini } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) { 8299e8f1835SPaolo Bonzini *flags |= BDRV_O_UNMAP; 8309e8f1835SPaolo Bonzini } else { 8319e8f1835SPaolo Bonzini return -1; 8329e8f1835SPaolo Bonzini } 8339e8f1835SPaolo Bonzini 8349e8f1835SPaolo Bonzini return 0; 8359e8f1835SPaolo Bonzini } 8369e8f1835SPaolo Bonzini 8379e8f1835SPaolo Bonzini /** 838c3993cdcSStefan Hajnoczi * Set open flags for a given cache mode 839c3993cdcSStefan Hajnoczi * 840c3993cdcSStefan Hajnoczi * Return 0 on success, -1 if the cache mode was invalid. 841c3993cdcSStefan Hajnoczi */ 842c3993cdcSStefan Hajnoczi int bdrv_parse_cache_flags(const char *mode, int *flags) 843c3993cdcSStefan Hajnoczi { 844c3993cdcSStefan Hajnoczi *flags &= ~BDRV_O_CACHE_MASK; 845c3993cdcSStefan Hajnoczi 846c3993cdcSStefan Hajnoczi if (!strcmp(mode, "off") || !strcmp(mode, "none")) { 847c3993cdcSStefan Hajnoczi *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; 84892196b2fSStefan Hajnoczi } else if (!strcmp(mode, "directsync")) { 84992196b2fSStefan Hajnoczi *flags |= BDRV_O_NOCACHE; 850c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "writeback")) { 851c3993cdcSStefan Hajnoczi *flags |= BDRV_O_CACHE_WB; 852c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "unsafe")) { 853c3993cdcSStefan Hajnoczi *flags |= BDRV_O_CACHE_WB; 854c3993cdcSStefan Hajnoczi *flags |= BDRV_O_NO_FLUSH; 855c3993cdcSStefan Hajnoczi } else if (!strcmp(mode, "writethrough")) { 856c3993cdcSStefan Hajnoczi /* this is the default */ 857c3993cdcSStefan Hajnoczi } else { 858c3993cdcSStefan Hajnoczi return -1; 859c3993cdcSStefan Hajnoczi } 860c3993cdcSStefan Hajnoczi 861c3993cdcSStefan Hajnoczi return 0; 862c3993cdcSStefan Hajnoczi } 863c3993cdcSStefan Hajnoczi 86453fec9d3SStefan Hajnoczi /** 86553fec9d3SStefan Hajnoczi * The copy-on-read flag is actually a reference count so multiple users may 86653fec9d3SStefan Hajnoczi * use the feature without worrying about clobbering its previous state. 86753fec9d3SStefan Hajnoczi * Copy-on-read stays enabled until all users have called to disable it. 86853fec9d3SStefan Hajnoczi */ 86953fec9d3SStefan Hajnoczi void bdrv_enable_copy_on_read(BlockDriverState *bs) 87053fec9d3SStefan Hajnoczi { 87153fec9d3SStefan Hajnoczi bs->copy_on_read++; 87253fec9d3SStefan Hajnoczi } 87353fec9d3SStefan Hajnoczi 87453fec9d3SStefan Hajnoczi void bdrv_disable_copy_on_read(BlockDriverState *bs) 87553fec9d3SStefan Hajnoczi { 87653fec9d3SStefan Hajnoczi assert(bs->copy_on_read > 0); 87753fec9d3SStefan Hajnoczi bs->copy_on_read--; 87853fec9d3SStefan Hajnoczi } 87953fec9d3SStefan Hajnoczi 8800b50cc88SKevin Wolf /* 881b1e6fc08SKevin Wolf * Returns the flags that a temporary snapshot should get, based on the 882b1e6fc08SKevin Wolf * originally requested flags (the originally requested image will have flags 883b1e6fc08SKevin Wolf * like a backing file) 884b1e6fc08SKevin Wolf */ 885b1e6fc08SKevin Wolf static int bdrv_temp_snapshot_flags(int flags) 886b1e6fc08SKevin Wolf { 887b1e6fc08SKevin Wolf return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; 888b1e6fc08SKevin Wolf } 889b1e6fc08SKevin Wolf 890b1e6fc08SKevin Wolf /* 8910b50cc88SKevin Wolf * Returns the flags that bs->file should get, based on the given flags for 8920b50cc88SKevin Wolf * the parent BDS 8930b50cc88SKevin Wolf */ 8940b50cc88SKevin Wolf static int bdrv_inherited_flags(int flags) 8950b50cc88SKevin Wolf { 8960b50cc88SKevin Wolf /* Enable protocol handling, disable format probing for bs->file */ 8970b50cc88SKevin Wolf flags |= BDRV_O_PROTOCOL; 8980b50cc88SKevin Wolf 8990b50cc88SKevin Wolf /* Our block drivers take care to send flushes and respect unmap policy, 9000b50cc88SKevin Wolf * so we can enable both unconditionally on lower layers. */ 9010b50cc88SKevin Wolf flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP; 9020b50cc88SKevin Wolf 9030b50cc88SKevin Wolf /* Clear flags that only apply to the top layer */ 9045669b44dSKevin Wolf flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); 9050b50cc88SKevin Wolf 9060b50cc88SKevin Wolf return flags; 9070b50cc88SKevin Wolf } 9080b50cc88SKevin Wolf 909317fc44eSKevin Wolf /* 910317fc44eSKevin Wolf * Returns the flags that bs->backing_hd should get, based on the given flags 911317fc44eSKevin Wolf * for the parent BDS 912317fc44eSKevin Wolf */ 913317fc44eSKevin Wolf static int bdrv_backing_flags(int flags) 914317fc44eSKevin Wolf { 915317fc44eSKevin Wolf /* backing files always opened read-only */ 916317fc44eSKevin Wolf flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); 917317fc44eSKevin Wolf 918317fc44eSKevin Wolf /* snapshot=on is handled on the top layer */ 9198bfea15dSKevin Wolf flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); 920317fc44eSKevin Wolf 921317fc44eSKevin Wolf return flags; 922317fc44eSKevin Wolf } 923317fc44eSKevin Wolf 9247b272452SKevin Wolf static int bdrv_open_flags(BlockDriverState *bs, int flags) 9257b272452SKevin Wolf { 9267b272452SKevin Wolf int open_flags = flags | BDRV_O_CACHE_WB; 9277b272452SKevin Wolf 9287b272452SKevin Wolf /* 9297b272452SKevin Wolf * Clear flags that are internal to the block layer before opening the 9307b272452SKevin Wolf * image. 9317b272452SKevin Wolf */ 93220cca275SKevin Wolf open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); 9337b272452SKevin Wolf 9347b272452SKevin Wolf /* 9357b272452SKevin Wolf * Snapshots should be writable. 9367b272452SKevin Wolf */ 9378bfea15dSKevin Wolf if (flags & BDRV_O_TEMPORARY) { 9387b272452SKevin Wolf open_flags |= BDRV_O_RDWR; 9397b272452SKevin Wolf } 9407b272452SKevin Wolf 9417b272452SKevin Wolf return open_flags; 9427b272452SKevin Wolf } 9437b272452SKevin Wolf 944636ea370SKevin Wolf static void bdrv_assign_node_name(BlockDriverState *bs, 9456913c0c2SBenoît Canet const char *node_name, 9466913c0c2SBenoît Canet Error **errp) 9476913c0c2SBenoît Canet { 9486913c0c2SBenoît Canet if (!node_name) { 949636ea370SKevin Wolf return; 9506913c0c2SBenoît Canet } 9516913c0c2SBenoît Canet 9529aebf3b8SKevin Wolf /* Check for empty string or invalid characters */ 953f5bebbbbSMarkus Armbruster if (!id_wellformed(node_name)) { 9549aebf3b8SKevin Wolf error_setg(errp, "Invalid node name"); 955636ea370SKevin Wolf return; 9566913c0c2SBenoît Canet } 9576913c0c2SBenoît Canet 9580c5e94eeSBenoît Canet /* takes care of avoiding namespaces collisions */ 9597f06d47eSMarkus Armbruster if (blk_by_name(node_name)) { 9600c5e94eeSBenoît Canet error_setg(errp, "node-name=%s is conflicting with a device id", 9610c5e94eeSBenoît Canet node_name); 962636ea370SKevin Wolf return; 9630c5e94eeSBenoît Canet } 9640c5e94eeSBenoît Canet 9656913c0c2SBenoît Canet /* takes care of avoiding duplicates node names */ 9666913c0c2SBenoît Canet if (bdrv_find_node(node_name)) { 9676913c0c2SBenoît Canet error_setg(errp, "Duplicate node name"); 968636ea370SKevin Wolf return; 9696913c0c2SBenoît Canet } 9706913c0c2SBenoît Canet 9716913c0c2SBenoît Canet /* copy node name into the bs and insert it into the graph list */ 9726913c0c2SBenoît Canet pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); 9736913c0c2SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); 9746913c0c2SBenoît Canet } 9756913c0c2SBenoît Canet 976b6ce07aaSKevin Wolf /* 97757915332SKevin Wolf * Common part for opening disk images and files 978b6ad491aSKevin Wolf * 979b6ad491aSKevin Wolf * Removes all processed options from *options. 98057915332SKevin Wolf */ 981f500a6d3SKevin Wolf static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, 98234b5d2c6SMax Reitz QDict *options, int flags, BlockDriver *drv, Error **errp) 98357915332SKevin Wolf { 98457915332SKevin Wolf int ret, open_flags; 985035fccdfSKevin Wolf const char *filename; 9866913c0c2SBenoît Canet const char *node_name = NULL; 98734b5d2c6SMax Reitz Error *local_err = NULL; 98857915332SKevin Wolf 98957915332SKevin Wolf assert(drv != NULL); 9906405875cSPaolo Bonzini assert(bs->file == NULL); 991707ff828SKevin Wolf assert(options != NULL && bs->options != options); 99257915332SKevin Wolf 99345673671SKevin Wolf if (file != NULL) { 99445673671SKevin Wolf filename = file->filename; 99545673671SKevin Wolf } else { 99645673671SKevin Wolf filename = qdict_get_try_str(options, "filename"); 99745673671SKevin Wolf } 99845673671SKevin Wolf 999765003dbSKevin Wolf if (drv->bdrv_needs_filename && !filename) { 1000765003dbSKevin Wolf error_setg(errp, "The '%s' block driver requires a file name", 1001765003dbSKevin Wolf drv->format_name); 1002765003dbSKevin Wolf return -EINVAL; 1003765003dbSKevin Wolf } 1004765003dbSKevin Wolf 100545673671SKevin Wolf trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); 100628dcee10SStefan Hajnoczi 10076913c0c2SBenoît Canet node_name = qdict_get_try_str(options, "node-name"); 1008636ea370SKevin Wolf bdrv_assign_node_name(bs, node_name, &local_err); 10090fb6395cSMarkus Armbruster if (local_err) { 1010636ea370SKevin Wolf error_propagate(errp, local_err); 1011636ea370SKevin Wolf return -EINVAL; 10126913c0c2SBenoît Canet } 10136913c0c2SBenoît Canet qdict_del(options, "node-name"); 10146913c0c2SBenoît Canet 10155d186eb0SKevin Wolf /* bdrv_open() with directly using a protocol as drv. This layer is already 10165d186eb0SKevin Wolf * opened, so assign it to bs (while file becomes a closed BlockDriverState) 10175d186eb0SKevin Wolf * and return immediately. */ 10185d186eb0SKevin Wolf if (file != NULL && drv->bdrv_file_open) { 10195d186eb0SKevin Wolf bdrv_swap(file, bs); 10205d186eb0SKevin Wolf return 0; 10215d186eb0SKevin Wolf } 10225d186eb0SKevin Wolf 102357915332SKevin Wolf bs->open_flags = flags; 10241b7fd729SPaolo Bonzini bs->guest_block_size = 512; 1025c25f53b0SPaolo Bonzini bs->request_alignment = 512; 10260d51b4deSAsias He bs->zero_beyond_eof = true; 1027b64ec4e4SFam Zheng open_flags = bdrv_open_flags(bs, flags); 1028b64ec4e4SFam Zheng bs->read_only = !(open_flags & BDRV_O_RDWR); 1029b64ec4e4SFam Zheng 1030b64ec4e4SFam Zheng if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { 10318f94a6e4SKevin Wolf error_setg(errp, 10328f94a6e4SKevin Wolf !bs->read_only && bdrv_is_whitelisted(drv, true) 10338f94a6e4SKevin Wolf ? "Driver '%s' can only be used for read-only devices" 10348f94a6e4SKevin Wolf : "Driver '%s' is not whitelisted", 10358f94a6e4SKevin Wolf drv->format_name); 1036b64ec4e4SFam Zheng return -ENOTSUP; 1037b64ec4e4SFam Zheng } 103857915332SKevin Wolf 103953fec9d3SStefan Hajnoczi assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ 10400ebd24e0SKevin Wolf if (flags & BDRV_O_COPY_ON_READ) { 10410ebd24e0SKevin Wolf if (!bs->read_only) { 104253fec9d3SStefan Hajnoczi bdrv_enable_copy_on_read(bs); 10430ebd24e0SKevin Wolf } else { 10440ebd24e0SKevin Wolf error_setg(errp, "Can't use copy-on-read on read-only device"); 10450ebd24e0SKevin Wolf return -EINVAL; 10460ebd24e0SKevin Wolf } 104753fec9d3SStefan Hajnoczi } 104853fec9d3SStefan Hajnoczi 1049c2ad1b0cSKevin Wolf if (filename != NULL) { 105057915332SKevin Wolf pstrcpy(bs->filename, sizeof(bs->filename), filename); 1051c2ad1b0cSKevin Wolf } else { 1052c2ad1b0cSKevin Wolf bs->filename[0] = '\0'; 1053c2ad1b0cSKevin Wolf } 105491af7014SMax Reitz pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename); 105557915332SKevin Wolf 105657915332SKevin Wolf bs->drv = drv; 10577267c094SAnthony Liguori bs->opaque = g_malloc0(drv->instance_size); 105857915332SKevin Wolf 105903f541bdSStefan Hajnoczi bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); 1060e7c63796SStefan Hajnoczi 106166f82ceeSKevin Wolf /* Open the image, either directly or using a protocol */ 106266f82ceeSKevin Wolf if (drv->bdrv_file_open) { 10635d186eb0SKevin Wolf assert(file == NULL); 1064030be321SBenoît Canet assert(!drv->bdrv_needs_filename || filename != NULL); 106534b5d2c6SMax Reitz ret = drv->bdrv_file_open(bs, options, open_flags, &local_err); 1066f500a6d3SKevin Wolf } else { 10672af5ef70SKevin Wolf if (file == NULL) { 106834b5d2c6SMax Reitz error_setg(errp, "Can't use '%s' as a block driver for the " 106934b5d2c6SMax Reitz "protocol level", drv->format_name); 10702af5ef70SKevin Wolf ret = -EINVAL; 10712af5ef70SKevin Wolf goto free_and_fail; 10722af5ef70SKevin Wolf } 1073f500a6d3SKevin Wolf bs->file = file; 107434b5d2c6SMax Reitz ret = drv->bdrv_open(bs, options, open_flags, &local_err); 107566f82ceeSKevin Wolf } 107666f82ceeSKevin Wolf 107757915332SKevin Wolf if (ret < 0) { 107884d18f06SMarkus Armbruster if (local_err) { 107934b5d2c6SMax Reitz error_propagate(errp, local_err); 10802fa9aa59SDunrong Huang } else if (bs->filename[0]) { 10812fa9aa59SDunrong Huang error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename); 108234b5d2c6SMax Reitz } else { 108334b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not open image"); 108434b5d2c6SMax Reitz } 108557915332SKevin Wolf goto free_and_fail; 108657915332SKevin Wolf } 108757915332SKevin Wolf 1088a1f688f4SMarkus Armbruster if (bs->encrypted) { 1089a1f688f4SMarkus Armbruster error_report("Encrypted images are deprecated"); 1090a1f688f4SMarkus Armbruster error_printf("Support for them will be removed in a future release.\n" 1091a1f688f4SMarkus Armbruster "You can use 'qemu-img convert' to convert your image" 1092a1f688f4SMarkus Armbruster " to an unencrypted one.\n"); 1093a1f688f4SMarkus Armbruster } 1094a1f688f4SMarkus Armbruster 109551762288SStefan Hajnoczi ret = refresh_total_sectors(bs, bs->total_sectors); 109651762288SStefan Hajnoczi if (ret < 0) { 109734b5d2c6SMax Reitz error_setg_errno(errp, -ret, "Could not refresh total sector count"); 109851762288SStefan Hajnoczi goto free_and_fail; 109957915332SKevin Wolf } 110051762288SStefan Hajnoczi 11013baca891SKevin Wolf bdrv_refresh_limits(bs, &local_err); 11023baca891SKevin Wolf if (local_err) { 11033baca891SKevin Wolf error_propagate(errp, local_err); 11043baca891SKevin Wolf ret = -EINVAL; 11053baca891SKevin Wolf goto free_and_fail; 11063baca891SKevin Wolf } 11073baca891SKevin Wolf 1108c25f53b0SPaolo Bonzini assert(bdrv_opt_mem_align(bs) != 0); 110947ea2de2SKevin Wolf assert((bs->request_alignment != 0) || bs->sg); 111057915332SKevin Wolf return 0; 111157915332SKevin Wolf 111257915332SKevin Wolf free_and_fail: 111366f82ceeSKevin Wolf bs->file = NULL; 11147267c094SAnthony Liguori g_free(bs->opaque); 111557915332SKevin Wolf bs->opaque = NULL; 111657915332SKevin Wolf bs->drv = NULL; 111757915332SKevin Wolf return ret; 111857915332SKevin Wolf } 111957915332SKevin Wolf 11205e5c4f63SKevin Wolf static QDict *parse_json_filename(const char *filename, Error **errp) 11215e5c4f63SKevin Wolf { 11225e5c4f63SKevin Wolf QObject *options_obj; 11235e5c4f63SKevin Wolf QDict *options; 11245e5c4f63SKevin Wolf int ret; 11255e5c4f63SKevin Wolf 11265e5c4f63SKevin Wolf ret = strstart(filename, "json:", &filename); 11275e5c4f63SKevin Wolf assert(ret); 11285e5c4f63SKevin Wolf 11295e5c4f63SKevin Wolf options_obj = qobject_from_json(filename); 11305e5c4f63SKevin Wolf if (!options_obj) { 11315e5c4f63SKevin Wolf error_setg(errp, "Could not parse the JSON options"); 11325e5c4f63SKevin Wolf return NULL; 11335e5c4f63SKevin Wolf } 11345e5c4f63SKevin Wolf 11355e5c4f63SKevin Wolf if (qobject_type(options_obj) != QTYPE_QDICT) { 11365e5c4f63SKevin Wolf qobject_decref(options_obj); 11375e5c4f63SKevin Wolf error_setg(errp, "Invalid JSON object given"); 11385e5c4f63SKevin Wolf return NULL; 11395e5c4f63SKevin Wolf } 11405e5c4f63SKevin Wolf 11415e5c4f63SKevin Wolf options = qobject_to_qdict(options_obj); 11425e5c4f63SKevin Wolf qdict_flatten(options); 11435e5c4f63SKevin Wolf 11445e5c4f63SKevin Wolf return options; 11455e5c4f63SKevin Wolf } 11465e5c4f63SKevin Wolf 114757915332SKevin Wolf /* 1148f54120ffSKevin Wolf * Fills in default options for opening images and converts the legacy 1149f54120ffSKevin Wolf * filename/flags pair to option QDict entries. 1150f54120ffSKevin Wolf */ 11515e5c4f63SKevin Wolf static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, 115217b005f1SKevin Wolf BlockDriver *drv, Error **errp) 1153f54120ffSKevin Wolf { 11545e5c4f63SKevin Wolf const char *filename = *pfilename; 1155f54120ffSKevin Wolf const char *drvname; 1156462f5bcfSKevin Wolf bool protocol = flags & BDRV_O_PROTOCOL; 1157f54120ffSKevin Wolf bool parse_filename = false; 1158f54120ffSKevin Wolf Error *local_err = NULL; 1159f54120ffSKevin Wolf 11605e5c4f63SKevin Wolf /* Parse json: pseudo-protocol */ 11615e5c4f63SKevin Wolf if (filename && g_str_has_prefix(filename, "json:")) { 11625e5c4f63SKevin Wolf QDict *json_options = parse_json_filename(filename, &local_err); 11635e5c4f63SKevin Wolf if (local_err) { 11645e5c4f63SKevin Wolf error_propagate(errp, local_err); 11655e5c4f63SKevin Wolf return -EINVAL; 11665e5c4f63SKevin Wolf } 11675e5c4f63SKevin Wolf 11685e5c4f63SKevin Wolf /* Options given in the filename have lower priority than options 11695e5c4f63SKevin Wolf * specified directly */ 11705e5c4f63SKevin Wolf qdict_join(*options, json_options, false); 11715e5c4f63SKevin Wolf QDECREF(json_options); 11725e5c4f63SKevin Wolf *pfilename = filename = NULL; 11735e5c4f63SKevin Wolf } 11745e5c4f63SKevin Wolf 1175f54120ffSKevin Wolf /* Fetch the file name from the options QDict if necessary */ 117617b005f1SKevin Wolf if (protocol && filename) { 1177f54120ffSKevin Wolf if (!qdict_haskey(*options, "filename")) { 1178f54120ffSKevin Wolf qdict_put(*options, "filename", qstring_from_str(filename)); 1179f54120ffSKevin Wolf parse_filename = true; 1180f54120ffSKevin Wolf } else { 1181f54120ffSKevin Wolf error_setg(errp, "Can't specify 'file' and 'filename' options at " 1182f54120ffSKevin Wolf "the same time"); 1183f54120ffSKevin Wolf return -EINVAL; 1184f54120ffSKevin Wolf } 1185f54120ffSKevin Wolf } 1186f54120ffSKevin Wolf 1187f54120ffSKevin Wolf /* Find the right block driver */ 1188f54120ffSKevin Wolf filename = qdict_get_try_str(*options, "filename"); 1189f54120ffSKevin Wolf drvname = qdict_get_try_str(*options, "driver"); 1190f54120ffSKevin Wolf 119117b005f1SKevin Wolf if (drv) { 119217b005f1SKevin Wolf if (drvname) { 119317b005f1SKevin Wolf error_setg(errp, "Driver specified twice"); 119417b005f1SKevin Wolf return -EINVAL; 119517b005f1SKevin Wolf } 119617b005f1SKevin Wolf drvname = drv->format_name; 119717b005f1SKevin Wolf qdict_put(*options, "driver", qstring_from_str(drvname)); 119817b005f1SKevin Wolf } else { 119917b005f1SKevin Wolf if (!drvname && protocol) { 1200f54120ffSKevin Wolf if (filename) { 1201b65a5e12SMax Reitz drv = bdrv_find_protocol(filename, parse_filename, errp); 1202f54120ffSKevin Wolf if (!drv) { 1203f54120ffSKevin Wolf return -EINVAL; 1204f54120ffSKevin Wolf } 1205f54120ffSKevin Wolf 1206f54120ffSKevin Wolf drvname = drv->format_name; 1207f54120ffSKevin Wolf qdict_put(*options, "driver", qstring_from_str(drvname)); 1208f54120ffSKevin Wolf } else { 1209f54120ffSKevin Wolf error_setg(errp, "Must specify either driver or file"); 1210f54120ffSKevin Wolf return -EINVAL; 1211f54120ffSKevin Wolf } 121217b005f1SKevin Wolf } else if (drvname) { 1213f54120ffSKevin Wolf drv = bdrv_find_format(drvname); 1214f54120ffSKevin Wolf if (!drv) { 1215f54120ffSKevin Wolf error_setg(errp, "Unknown driver '%s'", drvname); 1216f54120ffSKevin Wolf return -ENOENT; 1217f54120ffSKevin Wolf } 121817b005f1SKevin Wolf } 121917b005f1SKevin Wolf } 122017b005f1SKevin Wolf 122117b005f1SKevin Wolf assert(drv || !protocol); 1222f54120ffSKevin Wolf 1223f54120ffSKevin Wolf /* Driver-specific filename parsing */ 122417b005f1SKevin Wolf if (drv && drv->bdrv_parse_filename && parse_filename) { 1225f54120ffSKevin Wolf drv->bdrv_parse_filename(filename, *options, &local_err); 1226f54120ffSKevin Wolf if (local_err) { 1227f54120ffSKevin Wolf error_propagate(errp, local_err); 1228f54120ffSKevin Wolf return -EINVAL; 1229f54120ffSKevin Wolf } 1230f54120ffSKevin Wolf 1231f54120ffSKevin Wolf if (!drv->bdrv_needs_filename) { 1232f54120ffSKevin Wolf qdict_del(*options, "filename"); 1233f54120ffSKevin Wolf } 1234f54120ffSKevin Wolf } 1235f54120ffSKevin Wolf 1236f54120ffSKevin Wolf return 0; 1237f54120ffSKevin Wolf } 1238f54120ffSKevin Wolf 12398d24cce1SFam Zheng void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) 12408d24cce1SFam Zheng { 12418d24cce1SFam Zheng 1242826b6ca0SFam Zheng if (bs->backing_hd) { 1243826b6ca0SFam Zheng assert(bs->backing_blocker); 1244826b6ca0SFam Zheng bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); 1245826b6ca0SFam Zheng } else if (backing_hd) { 1246826b6ca0SFam Zheng error_setg(&bs->backing_blocker, 124781e5f78aSAlberto Garcia "node is used as backing hd of '%s'", 124881e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs)); 1249826b6ca0SFam Zheng } 1250826b6ca0SFam Zheng 12518d24cce1SFam Zheng bs->backing_hd = backing_hd; 12528d24cce1SFam Zheng if (!backing_hd) { 1253826b6ca0SFam Zheng error_free(bs->backing_blocker); 1254826b6ca0SFam Zheng bs->backing_blocker = NULL; 12558d24cce1SFam Zheng goto out; 12568d24cce1SFam Zheng } 12578d24cce1SFam Zheng bs->open_flags &= ~BDRV_O_NO_BACKING; 12588d24cce1SFam Zheng pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); 12598d24cce1SFam Zheng pstrcpy(bs->backing_format, sizeof(bs->backing_format), 12608d24cce1SFam Zheng backing_hd->drv ? backing_hd->drv->format_name : ""); 1261826b6ca0SFam Zheng 1262826b6ca0SFam Zheng bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); 1263826b6ca0SFam Zheng /* Otherwise we won't be able to commit due to check in bdrv_commit */ 1264bb00021dSFam Zheng bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, 1265826b6ca0SFam Zheng bs->backing_blocker); 12668d24cce1SFam Zheng out: 12673baca891SKevin Wolf bdrv_refresh_limits(bs, NULL); 12688d24cce1SFam Zheng } 12698d24cce1SFam Zheng 127031ca6d07SKevin Wolf /* 127131ca6d07SKevin Wolf * Opens the backing file for a BlockDriverState if not yet open 127231ca6d07SKevin Wolf * 127331ca6d07SKevin Wolf * options is a QDict of options to pass to the block drivers, or NULL for an 127431ca6d07SKevin Wolf * empty set of options. The reference to the QDict is transferred to this 127531ca6d07SKevin Wolf * function (even on failure), so if the caller intends to reuse the dictionary, 127631ca6d07SKevin Wolf * it needs to use QINCREF() before calling bdrv_file_open. 127731ca6d07SKevin Wolf */ 127834b5d2c6SMax Reitz int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) 12799156df12SPaolo Bonzini { 12801ba4b6a5SBenoît Canet char *backing_filename = g_malloc0(PATH_MAX); 1281317fc44eSKevin Wolf int ret = 0; 12828d24cce1SFam Zheng BlockDriverState *backing_hd; 128334b5d2c6SMax Reitz Error *local_err = NULL; 12849156df12SPaolo Bonzini 12859156df12SPaolo Bonzini if (bs->backing_hd != NULL) { 128631ca6d07SKevin Wolf QDECREF(options); 12871ba4b6a5SBenoît Canet goto free_exit; 12889156df12SPaolo Bonzini } 12899156df12SPaolo Bonzini 129031ca6d07SKevin Wolf /* NULL means an empty set of options */ 129131ca6d07SKevin Wolf if (options == NULL) { 129231ca6d07SKevin Wolf options = qdict_new(); 129331ca6d07SKevin Wolf } 129431ca6d07SKevin Wolf 12959156df12SPaolo Bonzini bs->open_flags &= ~BDRV_O_NO_BACKING; 12961cb6f506SKevin Wolf if (qdict_haskey(options, "file.filename")) { 12971cb6f506SKevin Wolf backing_filename[0] = '\0'; 12981cb6f506SKevin Wolf } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { 129931ca6d07SKevin Wolf QDECREF(options); 13001ba4b6a5SBenoît Canet goto free_exit; 1301dbecebddSFam Zheng } else { 13029f07429eSMax Reitz bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX, 13039f07429eSMax Reitz &local_err); 13049f07429eSMax Reitz if (local_err) { 13059f07429eSMax Reitz ret = -EINVAL; 13069f07429eSMax Reitz error_propagate(errp, local_err); 13079f07429eSMax Reitz QDECREF(options); 13089f07429eSMax Reitz goto free_exit; 13099f07429eSMax Reitz } 13109156df12SPaolo Bonzini } 13119156df12SPaolo Bonzini 13128ee79e70SKevin Wolf if (!bs->drv || !bs->drv->supports_backing) { 13138ee79e70SKevin Wolf ret = -EINVAL; 13148ee79e70SKevin Wolf error_setg(errp, "Driver doesn't support backing files"); 13158ee79e70SKevin Wolf QDECREF(options); 13168ee79e70SKevin Wolf goto free_exit; 13178ee79e70SKevin Wolf } 13188ee79e70SKevin Wolf 1319e4e9986bSMarkus Armbruster backing_hd = bdrv_new(); 13208d24cce1SFam Zheng 1321c5f6e493SKevin Wolf if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) { 1322c5f6e493SKevin Wolf qdict_put(options, "driver", qstring_from_str(bs->backing_format)); 13239156df12SPaolo Bonzini } 13249156df12SPaolo Bonzini 1325f67503e5SMax Reitz assert(bs->backing_hd == NULL); 13268d24cce1SFam Zheng ret = bdrv_open(&backing_hd, 1327ddf5636dSMax Reitz *backing_filename ? backing_filename : NULL, NULL, options, 1328c5f6e493SKevin Wolf bdrv_backing_flags(bs->open_flags), NULL, &local_err); 13299156df12SPaolo Bonzini if (ret < 0) { 13308d24cce1SFam Zheng bdrv_unref(backing_hd); 13318d24cce1SFam Zheng backing_hd = NULL; 13329156df12SPaolo Bonzini bs->open_flags |= BDRV_O_NO_BACKING; 1333b04b6b6eSFam Zheng error_setg(errp, "Could not open backing file: %s", 1334b04b6b6eSFam Zheng error_get_pretty(local_err)); 1335b04b6b6eSFam Zheng error_free(local_err); 13361ba4b6a5SBenoît Canet goto free_exit; 13379156df12SPaolo Bonzini } 13388d24cce1SFam Zheng bdrv_set_backing_hd(bs, backing_hd); 1339d80ac658SPeter Feiner 13401ba4b6a5SBenoît Canet free_exit: 13411ba4b6a5SBenoît Canet g_free(backing_filename); 13421ba4b6a5SBenoît Canet return ret; 13439156df12SPaolo Bonzini } 13449156df12SPaolo Bonzini 1345b6ce07aaSKevin Wolf /* 1346da557aacSMax Reitz * Opens a disk image whose options are given as BlockdevRef in another block 1347da557aacSMax Reitz * device's options. 1348da557aacSMax Reitz * 1349da557aacSMax Reitz * If allow_none is true, no image will be opened if filename is false and no 1350da557aacSMax Reitz * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. 1351da557aacSMax Reitz * 1352da557aacSMax Reitz * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. 1353da557aacSMax Reitz * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict 1354da557aacSMax Reitz * itself, all options starting with "${bdref_key}." are considered part of the 1355da557aacSMax Reitz * BlockdevRef. 1356da557aacSMax Reitz * 1357da557aacSMax Reitz * The BlockdevRef will be removed from the options QDict. 1358f67503e5SMax Reitz * 1359f67503e5SMax Reitz * To conform with the behavior of bdrv_open(), *pbs has to be NULL. 1360da557aacSMax Reitz */ 1361da557aacSMax Reitz int bdrv_open_image(BlockDriverState **pbs, const char *filename, 1362da557aacSMax Reitz QDict *options, const char *bdref_key, int flags, 1363f7d9fd8cSMax Reitz bool allow_none, Error **errp) 1364da557aacSMax Reitz { 1365da557aacSMax Reitz QDict *image_options; 1366da557aacSMax Reitz int ret; 1367da557aacSMax Reitz char *bdref_key_dot; 1368da557aacSMax Reitz const char *reference; 1369da557aacSMax Reitz 1370f67503e5SMax Reitz assert(pbs); 1371f67503e5SMax Reitz assert(*pbs == NULL); 1372f67503e5SMax Reitz 1373da557aacSMax Reitz bdref_key_dot = g_strdup_printf("%s.", bdref_key); 1374da557aacSMax Reitz qdict_extract_subqdict(options, &image_options, bdref_key_dot); 1375da557aacSMax Reitz g_free(bdref_key_dot); 1376da557aacSMax Reitz 1377da557aacSMax Reitz reference = qdict_get_try_str(options, bdref_key); 1378da557aacSMax Reitz if (!filename && !reference && !qdict_size(image_options)) { 1379da557aacSMax Reitz if (allow_none) { 1380da557aacSMax Reitz ret = 0; 1381da557aacSMax Reitz } else { 1382da557aacSMax Reitz error_setg(errp, "A block device must be specified for \"%s\"", 1383da557aacSMax Reitz bdref_key); 1384da557aacSMax Reitz ret = -EINVAL; 1385da557aacSMax Reitz } 1386b20e61e0SMarkus Armbruster QDECREF(image_options); 1387da557aacSMax Reitz goto done; 1388da557aacSMax Reitz } 1389da557aacSMax Reitz 1390f7d9fd8cSMax Reitz ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp); 1391da557aacSMax Reitz 1392da557aacSMax Reitz done: 1393da557aacSMax Reitz qdict_del(options, bdref_key); 1394da557aacSMax Reitz return ret; 1395da557aacSMax Reitz } 1396da557aacSMax Reitz 13976b8aeca5SChen Gang int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) 1398b998875dSKevin Wolf { 1399b998875dSKevin Wolf /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ 14001ba4b6a5SBenoît Canet char *tmp_filename = g_malloc0(PATH_MAX + 1); 1401b998875dSKevin Wolf int64_t total_size; 140283d0521aSChunyan Liu QemuOpts *opts = NULL; 1403b998875dSKevin Wolf QDict *snapshot_options; 1404b998875dSKevin Wolf BlockDriverState *bs_snapshot; 1405b998875dSKevin Wolf Error *local_err; 1406b998875dSKevin Wolf int ret; 1407b998875dSKevin Wolf 1408b998875dSKevin Wolf /* if snapshot, we create a temporary backing file and open it 1409b998875dSKevin Wolf instead of opening 'filename' directly */ 1410b998875dSKevin Wolf 1411b998875dSKevin Wolf /* Get the required size from the image */ 1412f187743aSKevin Wolf total_size = bdrv_getlength(bs); 1413f187743aSKevin Wolf if (total_size < 0) { 14146b8aeca5SChen Gang ret = total_size; 1415f187743aSKevin Wolf error_setg_errno(errp, -total_size, "Could not get image size"); 14161ba4b6a5SBenoît Canet goto out; 1417f187743aSKevin Wolf } 1418b998875dSKevin Wolf 1419b998875dSKevin Wolf /* Create the temporary image */ 14201ba4b6a5SBenoît Canet ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); 1421b998875dSKevin Wolf if (ret < 0) { 1422b998875dSKevin Wolf error_setg_errno(errp, -ret, "Could not get temporary filename"); 14231ba4b6a5SBenoît Canet goto out; 1424b998875dSKevin Wolf } 1425b998875dSKevin Wolf 1426ef810437SMax Reitz opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0, 1427c282e1fdSChunyan Liu &error_abort); 142839101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort); 1429ef810437SMax Reitz ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err); 143083d0521aSChunyan Liu qemu_opts_del(opts); 1431b998875dSKevin Wolf if (ret < 0) { 1432b998875dSKevin Wolf error_setg_errno(errp, -ret, "Could not create temporary overlay " 1433b998875dSKevin Wolf "'%s': %s", tmp_filename, 1434b998875dSKevin Wolf error_get_pretty(local_err)); 1435b998875dSKevin Wolf error_free(local_err); 14361ba4b6a5SBenoît Canet goto out; 1437b998875dSKevin Wolf } 1438b998875dSKevin Wolf 1439b998875dSKevin Wolf /* Prepare a new options QDict for the temporary file */ 1440b998875dSKevin Wolf snapshot_options = qdict_new(); 1441b998875dSKevin Wolf qdict_put(snapshot_options, "file.driver", 1442b998875dSKevin Wolf qstring_from_str("file")); 1443b998875dSKevin Wolf qdict_put(snapshot_options, "file.filename", 1444b998875dSKevin Wolf qstring_from_str(tmp_filename)); 1445b998875dSKevin Wolf 1446e4e9986bSMarkus Armbruster bs_snapshot = bdrv_new(); 1447b998875dSKevin Wolf 1448b998875dSKevin Wolf ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, 1449ef810437SMax Reitz flags, &bdrv_qcow2, &local_err); 1450b998875dSKevin Wolf if (ret < 0) { 1451b998875dSKevin Wolf error_propagate(errp, local_err); 14521ba4b6a5SBenoît Canet goto out; 1453b998875dSKevin Wolf } 1454b998875dSKevin Wolf 1455b998875dSKevin Wolf bdrv_append(bs_snapshot, bs); 14561ba4b6a5SBenoît Canet 14571ba4b6a5SBenoît Canet out: 14581ba4b6a5SBenoît Canet g_free(tmp_filename); 14596b8aeca5SChen Gang return ret; 1460b998875dSKevin Wolf } 1461b998875dSKevin Wolf 1462da557aacSMax Reitz /* 1463b6ce07aaSKevin Wolf * Opens a disk image (raw, qcow2, vmdk, ...) 1464de9c0cecSKevin Wolf * 1465de9c0cecSKevin Wolf * options is a QDict of options to pass to the block drivers, or NULL for an 1466de9c0cecSKevin Wolf * empty set of options. The reference to the QDict belongs to the block layer 1467de9c0cecSKevin Wolf * after the call (even on failure), so if the caller intends to reuse the 1468de9c0cecSKevin Wolf * dictionary, it needs to use QINCREF() before calling bdrv_open. 1469f67503e5SMax Reitz * 1470f67503e5SMax Reitz * If *pbs is NULL, a new BDS will be created with a pointer to it stored there. 1471f67503e5SMax Reitz * If it is not NULL, the referenced BDS will be reused. 1472ddf5636dSMax Reitz * 1473ddf5636dSMax Reitz * The reference parameter may be used to specify an existing block device which 1474ddf5636dSMax Reitz * should be opened. If specified, neither options nor a filename may be given, 1475ddf5636dSMax Reitz * nor can an existing BDS be reused (that is, *pbs has to be NULL). 1476b6ce07aaSKevin Wolf */ 1477ddf5636dSMax Reitz int bdrv_open(BlockDriverState **pbs, const char *filename, 1478ddf5636dSMax Reitz const char *reference, QDict *options, int flags, 1479ddf5636dSMax Reitz BlockDriver *drv, Error **errp) 1480ea2384d3Sbellard { 1481b6ce07aaSKevin Wolf int ret; 1482f67503e5SMax Reitz BlockDriverState *file = NULL, *bs; 148374fe54f2SKevin Wolf const char *drvname; 148434b5d2c6SMax Reitz Error *local_err = NULL; 1485b1e6fc08SKevin Wolf int snapshot_flags = 0; 148633e3963eSbellard 1487f67503e5SMax Reitz assert(pbs); 1488f67503e5SMax Reitz 1489ddf5636dSMax Reitz if (reference) { 1490ddf5636dSMax Reitz bool options_non_empty = options ? qdict_size(options) : false; 1491ddf5636dSMax Reitz QDECREF(options); 1492ddf5636dSMax Reitz 1493ddf5636dSMax Reitz if (*pbs) { 1494ddf5636dSMax Reitz error_setg(errp, "Cannot reuse an existing BDS when referencing " 1495ddf5636dSMax Reitz "another block device"); 1496ddf5636dSMax Reitz return -EINVAL; 1497ddf5636dSMax Reitz } 1498ddf5636dSMax Reitz 1499ddf5636dSMax Reitz if (filename || options_non_empty) { 1500ddf5636dSMax Reitz error_setg(errp, "Cannot reference an existing block device with " 1501ddf5636dSMax Reitz "additional options or a new filename"); 1502ddf5636dSMax Reitz return -EINVAL; 1503ddf5636dSMax Reitz } 1504ddf5636dSMax Reitz 1505ddf5636dSMax Reitz bs = bdrv_lookup_bs(reference, reference, errp); 1506ddf5636dSMax Reitz if (!bs) { 1507ddf5636dSMax Reitz return -ENODEV; 1508ddf5636dSMax Reitz } 1509ddf5636dSMax Reitz bdrv_ref(bs); 1510ddf5636dSMax Reitz *pbs = bs; 1511ddf5636dSMax Reitz return 0; 1512ddf5636dSMax Reitz } 1513ddf5636dSMax Reitz 1514f67503e5SMax Reitz if (*pbs) { 1515f67503e5SMax Reitz bs = *pbs; 1516f67503e5SMax Reitz } else { 1517e4e9986bSMarkus Armbruster bs = bdrv_new(); 1518f67503e5SMax Reitz } 1519f67503e5SMax Reitz 1520de9c0cecSKevin Wolf /* NULL means an empty set of options */ 1521de9c0cecSKevin Wolf if (options == NULL) { 1522de9c0cecSKevin Wolf options = qdict_new(); 1523de9c0cecSKevin Wolf } 1524de9c0cecSKevin Wolf 152517b005f1SKevin Wolf ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err); 1526462f5bcfSKevin Wolf if (local_err) { 1527462f5bcfSKevin Wolf goto fail; 1528462f5bcfSKevin Wolf } 1529462f5bcfSKevin Wolf 153076c591b0SKevin Wolf /* Find the right image format driver */ 153176c591b0SKevin Wolf drv = NULL; 153276c591b0SKevin Wolf drvname = qdict_get_try_str(options, "driver"); 153376c591b0SKevin Wolf if (drvname) { 153476c591b0SKevin Wolf drv = bdrv_find_format(drvname); 153576c591b0SKevin Wolf qdict_del(options, "driver"); 153676c591b0SKevin Wolf if (!drv) { 153776c591b0SKevin Wolf error_setg(errp, "Unknown driver: '%s'", drvname); 153876c591b0SKevin Wolf ret = -EINVAL; 153976c591b0SKevin Wolf goto fail; 154076c591b0SKevin Wolf } 154176c591b0SKevin Wolf } 154276c591b0SKevin Wolf 154376c591b0SKevin Wolf assert(drvname || !(flags & BDRV_O_PROTOCOL)); 154476c591b0SKevin Wolf if (drv && !drv->bdrv_file_open) { 154576c591b0SKevin Wolf /* If the user explicitly wants a format driver here, we'll need to add 154676c591b0SKevin Wolf * another layer for the protocol in bs->file */ 154776c591b0SKevin Wolf flags &= ~BDRV_O_PROTOCOL; 154876c591b0SKevin Wolf } 154976c591b0SKevin Wolf 1550de9c0cecSKevin Wolf bs->options = options; 1551b6ad491aSKevin Wolf options = qdict_clone_shallow(options); 1552de9c0cecSKevin Wolf 1553f500a6d3SKevin Wolf /* Open image file without format layer */ 1554f4788adcSKevin Wolf if ((flags & BDRV_O_PROTOCOL) == 0) { 1555be028adcSJeff Cody if (flags & BDRV_O_RDWR) { 1556be028adcSJeff Cody flags |= BDRV_O_ALLOW_RDWR; 1557be028adcSJeff Cody } 1558b1e6fc08SKevin Wolf if (flags & BDRV_O_SNAPSHOT) { 1559b1e6fc08SKevin Wolf snapshot_flags = bdrv_temp_snapshot_flags(flags); 1560b1e6fc08SKevin Wolf flags = bdrv_backing_flags(flags); 1561b1e6fc08SKevin Wolf } 1562be028adcSJeff Cody 1563f67503e5SMax Reitz assert(file == NULL); 1564054963f8SMax Reitz ret = bdrv_open_image(&file, filename, options, "file", 15650b50cc88SKevin Wolf bdrv_inherited_flags(flags), 15660b50cc88SKevin Wolf true, &local_err); 1567f500a6d3SKevin Wolf if (ret < 0) { 15688bfea15dSKevin Wolf goto fail; 1569f500a6d3SKevin Wolf } 1570f4788adcSKevin Wolf } 1571f500a6d3SKevin Wolf 157276c591b0SKevin Wolf /* Image format probing */ 157338f3ef57SKevin Wolf bs->probed = !drv; 157476c591b0SKevin Wolf if (!drv && file) { 157534b5d2c6SMax Reitz ret = find_image_format(file, filename, &drv, &local_err); 157617b005f1SKevin Wolf if (ret < 0) { 157717b005f1SKevin Wolf goto fail; 157817b005f1SKevin Wolf } 157976c591b0SKevin Wolf } else if (!drv) { 15802a05cbe4SMax Reitz error_setg(errp, "Must specify either driver or file"); 15812a05cbe4SMax Reitz ret = -EINVAL; 15828bfea15dSKevin Wolf goto fail; 15832a05cbe4SMax Reitz } 1584f500a6d3SKevin Wolf 1585b6ce07aaSKevin Wolf /* Open the image */ 158634b5d2c6SMax Reitz ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); 1587b6ce07aaSKevin Wolf if (ret < 0) { 15888bfea15dSKevin Wolf goto fail; 15896987307cSChristoph Hellwig } 15906987307cSChristoph Hellwig 15912a05cbe4SMax Reitz if (file && (bs->file != file)) { 15924f6fd349SFam Zheng bdrv_unref(file); 1593f500a6d3SKevin Wolf file = NULL; 1594f500a6d3SKevin Wolf } 1595f500a6d3SKevin Wolf 1596b6ce07aaSKevin Wolf /* If there is a backing file, use it */ 15979156df12SPaolo Bonzini if ((flags & BDRV_O_NO_BACKING) == 0) { 159831ca6d07SKevin Wolf QDict *backing_options; 159931ca6d07SKevin Wolf 16005726d872SBenoît Canet qdict_extract_subqdict(options, &backing_options, "backing."); 160134b5d2c6SMax Reitz ret = bdrv_open_backing_file(bs, backing_options, &local_err); 1602b6ce07aaSKevin Wolf if (ret < 0) { 1603b6ad491aSKevin Wolf goto close_and_fail; 1604b6ce07aaSKevin Wolf } 1605b6ce07aaSKevin Wolf } 1606b6ce07aaSKevin Wolf 160791af7014SMax Reitz bdrv_refresh_filename(bs); 160891af7014SMax Reitz 1609b998875dSKevin Wolf /* For snapshot=on, create a temporary qcow2 overlay. bs points to the 1610b998875dSKevin Wolf * temporary snapshot afterwards. */ 1611b1e6fc08SKevin Wolf if (snapshot_flags) { 16126b8aeca5SChen Gang ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); 1613b998875dSKevin Wolf if (local_err) { 1614b998875dSKevin Wolf goto close_and_fail; 1615b998875dSKevin Wolf } 1616b998875dSKevin Wolf } 1617b998875dSKevin Wolf 1618b6ad491aSKevin Wolf /* Check if any unknown options were used */ 16195acd9d81SMax Reitz if (options && (qdict_size(options) != 0)) { 1620b6ad491aSKevin Wolf const QDictEntry *entry = qdict_first(options); 16215acd9d81SMax Reitz if (flags & BDRV_O_PROTOCOL) { 16225acd9d81SMax Reitz error_setg(errp, "Block protocol '%s' doesn't support the option " 16235acd9d81SMax Reitz "'%s'", drv->format_name, entry->key); 16245acd9d81SMax Reitz } else { 162534b5d2c6SMax Reitz error_setg(errp, "Block format '%s' used by device '%s' doesn't " 16265acd9d81SMax Reitz "support the option '%s'", drv->format_name, 1627bfb197e0SMarkus Armbruster bdrv_get_device_name(bs), entry->key); 16285acd9d81SMax Reitz } 1629b6ad491aSKevin Wolf 1630b6ad491aSKevin Wolf ret = -EINVAL; 1631b6ad491aSKevin Wolf goto close_and_fail; 1632b6ad491aSKevin Wolf } 1633b6ad491aSKevin Wolf 1634b6ce07aaSKevin Wolf if (!bdrv_key_required(bs)) { 1635a7f53e26SMarkus Armbruster if (bs->blk) { 1636a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, true); 1637a7f53e26SMarkus Armbruster } 1638c3adb58fSMarkus Armbruster } else if (!runstate_check(RUN_STATE_PRELAUNCH) 1639c3adb58fSMarkus Armbruster && !runstate_check(RUN_STATE_INMIGRATE) 1640c3adb58fSMarkus Armbruster && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */ 1641c3adb58fSMarkus Armbruster error_setg(errp, 1642c3adb58fSMarkus Armbruster "Guest must be stopped for opening of encrypted image"); 1643c3adb58fSMarkus Armbruster ret = -EBUSY; 1644c3adb58fSMarkus Armbruster goto close_and_fail; 1645b6ce07aaSKevin Wolf } 1646b6ce07aaSKevin Wolf 1647c3adb58fSMarkus Armbruster QDECREF(options); 1648f67503e5SMax Reitz *pbs = bs; 1649b6ce07aaSKevin Wolf return 0; 1650b6ce07aaSKevin Wolf 16518bfea15dSKevin Wolf fail: 1652f500a6d3SKevin Wolf if (file != NULL) { 16534f6fd349SFam Zheng bdrv_unref(file); 1654f500a6d3SKevin Wolf } 1655de9c0cecSKevin Wolf QDECREF(bs->options); 1656b6ad491aSKevin Wolf QDECREF(options); 1657de9c0cecSKevin Wolf bs->options = NULL; 1658f67503e5SMax Reitz if (!*pbs) { 1659f67503e5SMax Reitz /* If *pbs is NULL, a new BDS has been created in this function and 1660f67503e5SMax Reitz needs to be freed now. Otherwise, it does not need to be closed, 1661f67503e5SMax Reitz since it has not really been opened yet. */ 1662f67503e5SMax Reitz bdrv_unref(bs); 1663f67503e5SMax Reitz } 166484d18f06SMarkus Armbruster if (local_err) { 166534b5d2c6SMax Reitz error_propagate(errp, local_err); 166634b5d2c6SMax Reitz } 1667b6ad491aSKevin Wolf return ret; 1668de9c0cecSKevin Wolf 1669b6ad491aSKevin Wolf close_and_fail: 1670f67503e5SMax Reitz /* See fail path, but now the BDS has to be always closed */ 1671f67503e5SMax Reitz if (*pbs) { 1672b6ad491aSKevin Wolf bdrv_close(bs); 1673f67503e5SMax Reitz } else { 1674f67503e5SMax Reitz bdrv_unref(bs); 1675f67503e5SMax Reitz } 1676b6ad491aSKevin Wolf QDECREF(options); 167784d18f06SMarkus Armbruster if (local_err) { 167834b5d2c6SMax Reitz error_propagate(errp, local_err); 167934b5d2c6SMax Reitz } 1680b6ce07aaSKevin Wolf return ret; 1681b6ce07aaSKevin Wolf } 1682b6ce07aaSKevin Wolf 1683e971aa12SJeff Cody typedef struct BlockReopenQueueEntry { 1684e971aa12SJeff Cody bool prepared; 1685e971aa12SJeff Cody BDRVReopenState state; 1686e971aa12SJeff Cody QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1687e971aa12SJeff Cody } BlockReopenQueueEntry; 1688e971aa12SJeff Cody 1689e971aa12SJeff Cody /* 1690e971aa12SJeff Cody * Adds a BlockDriverState to a simple queue for an atomic, transactional 1691e971aa12SJeff Cody * reopen of multiple devices. 1692e971aa12SJeff Cody * 1693e971aa12SJeff Cody * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT 1694e971aa12SJeff Cody * already performed, or alternatively may be NULL a new BlockReopenQueue will 1695e971aa12SJeff Cody * be created and initialized. This newly created BlockReopenQueue should be 1696e971aa12SJeff Cody * passed back in for subsequent calls that are intended to be of the same 1697e971aa12SJeff Cody * atomic 'set'. 1698e971aa12SJeff Cody * 1699e971aa12SJeff Cody * bs is the BlockDriverState to add to the reopen queue. 1700e971aa12SJeff Cody * 1701e971aa12SJeff Cody * flags contains the open flags for the associated bs 1702e971aa12SJeff Cody * 1703e971aa12SJeff Cody * returns a pointer to bs_queue, which is either the newly allocated 1704e971aa12SJeff Cody * bs_queue, or the existing bs_queue being used. 1705e971aa12SJeff Cody * 1706e971aa12SJeff Cody */ 1707e971aa12SJeff Cody BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, 1708e971aa12SJeff Cody BlockDriverState *bs, int flags) 1709e971aa12SJeff Cody { 1710e971aa12SJeff Cody assert(bs != NULL); 1711e971aa12SJeff Cody 1712e971aa12SJeff Cody BlockReopenQueueEntry *bs_entry; 1713e971aa12SJeff Cody if (bs_queue == NULL) { 1714e971aa12SJeff Cody bs_queue = g_new0(BlockReopenQueue, 1); 1715e971aa12SJeff Cody QSIMPLEQ_INIT(bs_queue); 1716e971aa12SJeff Cody } 1717e971aa12SJeff Cody 1718f1f25a2eSKevin Wolf /* bdrv_open() masks this flag out */ 1719f1f25a2eSKevin Wolf flags &= ~BDRV_O_PROTOCOL; 1720f1f25a2eSKevin Wolf 1721e971aa12SJeff Cody if (bs->file) { 1722f1f25a2eSKevin Wolf bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); 1723e971aa12SJeff Cody } 1724e971aa12SJeff Cody 1725e971aa12SJeff Cody bs_entry = g_new0(BlockReopenQueueEntry, 1); 1726e971aa12SJeff Cody QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 1727e971aa12SJeff Cody 1728e971aa12SJeff Cody bs_entry->state.bs = bs; 1729e971aa12SJeff Cody bs_entry->state.flags = flags; 1730e971aa12SJeff Cody 1731e971aa12SJeff Cody return bs_queue; 1732e971aa12SJeff Cody } 1733e971aa12SJeff Cody 1734e971aa12SJeff Cody /* 1735e971aa12SJeff Cody * Reopen multiple BlockDriverStates atomically & transactionally. 1736e971aa12SJeff Cody * 1737e971aa12SJeff Cody * The queue passed in (bs_queue) must have been built up previous 1738e971aa12SJeff Cody * via bdrv_reopen_queue(). 1739e971aa12SJeff Cody * 1740e971aa12SJeff Cody * Reopens all BDS specified in the queue, with the appropriate 1741e971aa12SJeff Cody * flags. All devices are prepared for reopen, and failure of any 1742e971aa12SJeff Cody * device will cause all device changes to be abandonded, and intermediate 1743e971aa12SJeff Cody * data cleaned up. 1744e971aa12SJeff Cody * 1745e971aa12SJeff Cody * If all devices prepare successfully, then the changes are committed 1746e971aa12SJeff Cody * to all devices. 1747e971aa12SJeff Cody * 1748e971aa12SJeff Cody */ 1749e971aa12SJeff Cody int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) 1750e971aa12SJeff Cody { 1751e971aa12SJeff Cody int ret = -1; 1752e971aa12SJeff Cody BlockReopenQueueEntry *bs_entry, *next; 1753e971aa12SJeff Cody Error *local_err = NULL; 1754e971aa12SJeff Cody 1755e971aa12SJeff Cody assert(bs_queue != NULL); 1756e971aa12SJeff Cody 1757e971aa12SJeff Cody bdrv_drain_all(); 1758e971aa12SJeff Cody 1759e971aa12SJeff Cody QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1760e971aa12SJeff Cody if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { 1761e971aa12SJeff Cody error_propagate(errp, local_err); 1762e971aa12SJeff Cody goto cleanup; 1763e971aa12SJeff Cody } 1764e971aa12SJeff Cody bs_entry->prepared = true; 1765e971aa12SJeff Cody } 1766e971aa12SJeff Cody 1767e971aa12SJeff Cody /* If we reach this point, we have success and just need to apply the 1768e971aa12SJeff Cody * changes 1769e971aa12SJeff Cody */ 1770e971aa12SJeff Cody QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { 1771e971aa12SJeff Cody bdrv_reopen_commit(&bs_entry->state); 1772e971aa12SJeff Cody } 1773e971aa12SJeff Cody 1774e971aa12SJeff Cody ret = 0; 1775e971aa12SJeff Cody 1776e971aa12SJeff Cody cleanup: 1777e971aa12SJeff Cody QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { 1778e971aa12SJeff Cody if (ret && bs_entry->prepared) { 1779e971aa12SJeff Cody bdrv_reopen_abort(&bs_entry->state); 1780e971aa12SJeff Cody } 1781e971aa12SJeff Cody g_free(bs_entry); 1782e971aa12SJeff Cody } 1783e971aa12SJeff Cody g_free(bs_queue); 1784e971aa12SJeff Cody return ret; 1785e971aa12SJeff Cody } 1786e971aa12SJeff Cody 1787e971aa12SJeff Cody 1788e971aa12SJeff Cody /* Reopen a single BlockDriverState with the specified flags. */ 1789e971aa12SJeff Cody int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) 1790e971aa12SJeff Cody { 1791e971aa12SJeff Cody int ret = -1; 1792e971aa12SJeff Cody Error *local_err = NULL; 1793e971aa12SJeff Cody BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags); 1794e971aa12SJeff Cody 1795e971aa12SJeff Cody ret = bdrv_reopen_multiple(queue, &local_err); 1796e971aa12SJeff Cody if (local_err != NULL) { 1797e971aa12SJeff Cody error_propagate(errp, local_err); 1798e971aa12SJeff Cody } 1799e971aa12SJeff Cody return ret; 1800e971aa12SJeff Cody } 1801e971aa12SJeff Cody 1802e971aa12SJeff Cody 1803e971aa12SJeff Cody /* 1804e971aa12SJeff Cody * Prepares a BlockDriverState for reopen. All changes are staged in the 1805e971aa12SJeff Cody * 'opaque' field of the BDRVReopenState, which is used and allocated by 1806e971aa12SJeff Cody * the block driver layer .bdrv_reopen_prepare() 1807e971aa12SJeff Cody * 1808e971aa12SJeff Cody * bs is the BlockDriverState to reopen 1809e971aa12SJeff Cody * flags are the new open flags 1810e971aa12SJeff Cody * queue is the reopen queue 1811e971aa12SJeff Cody * 1812e971aa12SJeff Cody * Returns 0 on success, non-zero on error. On error errp will be set 1813e971aa12SJeff Cody * as well. 1814e971aa12SJeff Cody * 1815e971aa12SJeff Cody * On failure, bdrv_reopen_abort() will be called to clean up any data. 1816e971aa12SJeff Cody * It is the responsibility of the caller to then call the abort() or 1817e971aa12SJeff Cody * commit() for any other BDS that have been left in a prepare() state 1818e971aa12SJeff Cody * 1819e971aa12SJeff Cody */ 1820e971aa12SJeff Cody int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, 1821e971aa12SJeff Cody Error **errp) 1822e971aa12SJeff Cody { 1823e971aa12SJeff Cody int ret = -1; 1824e971aa12SJeff Cody Error *local_err = NULL; 1825e971aa12SJeff Cody BlockDriver *drv; 1826e971aa12SJeff Cody 1827e971aa12SJeff Cody assert(reopen_state != NULL); 1828e971aa12SJeff Cody assert(reopen_state->bs->drv != NULL); 1829e971aa12SJeff Cody drv = reopen_state->bs->drv; 1830e971aa12SJeff Cody 1831e971aa12SJeff Cody /* if we are to stay read-only, do not allow permission change 1832e971aa12SJeff Cody * to r/w */ 1833e971aa12SJeff Cody if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && 1834e971aa12SJeff Cody reopen_state->flags & BDRV_O_RDWR) { 183581e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is read only", 183681e5f78aSAlberto Garcia bdrv_get_device_or_node_name(reopen_state->bs)); 1837e971aa12SJeff Cody goto error; 1838e971aa12SJeff Cody } 1839e971aa12SJeff Cody 1840e971aa12SJeff Cody 1841e971aa12SJeff Cody ret = bdrv_flush(reopen_state->bs); 1842e971aa12SJeff Cody if (ret) { 1843e971aa12SJeff Cody error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive", 1844e971aa12SJeff Cody strerror(-ret)); 1845e971aa12SJeff Cody goto error; 1846e971aa12SJeff Cody } 1847e971aa12SJeff Cody 1848e971aa12SJeff Cody if (drv->bdrv_reopen_prepare) { 1849e971aa12SJeff Cody ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err); 1850e971aa12SJeff Cody if (ret) { 1851e971aa12SJeff Cody if (local_err != NULL) { 1852e971aa12SJeff Cody error_propagate(errp, local_err); 1853e971aa12SJeff Cody } else { 1854d8b6895fSLuiz Capitulino error_setg(errp, "failed while preparing to reopen image '%s'", 1855e971aa12SJeff Cody reopen_state->bs->filename); 1856e971aa12SJeff Cody } 1857e971aa12SJeff Cody goto error; 1858e971aa12SJeff Cody } 1859e971aa12SJeff Cody } else { 1860e971aa12SJeff Cody /* It is currently mandatory to have a bdrv_reopen_prepare() 1861e971aa12SJeff Cody * handler for each supported drv. */ 186281e5f78aSAlberto Garcia error_setg(errp, "Block format '%s' used by node '%s' " 186381e5f78aSAlberto Garcia "does not support reopening files", drv->format_name, 186481e5f78aSAlberto Garcia bdrv_get_device_or_node_name(reopen_state->bs)); 1865e971aa12SJeff Cody ret = -1; 1866e971aa12SJeff Cody goto error; 1867e971aa12SJeff Cody } 1868e971aa12SJeff Cody 1869e971aa12SJeff Cody ret = 0; 1870e971aa12SJeff Cody 1871e971aa12SJeff Cody error: 1872e971aa12SJeff Cody return ret; 1873e971aa12SJeff Cody } 1874e971aa12SJeff Cody 1875e971aa12SJeff Cody /* 1876e971aa12SJeff Cody * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and 1877e971aa12SJeff Cody * makes them final by swapping the staging BlockDriverState contents into 1878e971aa12SJeff Cody * the active BlockDriverState contents. 1879e971aa12SJeff Cody */ 1880e971aa12SJeff Cody void bdrv_reopen_commit(BDRVReopenState *reopen_state) 1881e971aa12SJeff Cody { 1882e971aa12SJeff Cody BlockDriver *drv; 1883e971aa12SJeff Cody 1884e971aa12SJeff Cody assert(reopen_state != NULL); 1885e971aa12SJeff Cody drv = reopen_state->bs->drv; 1886e971aa12SJeff Cody assert(drv != NULL); 1887e971aa12SJeff Cody 1888e971aa12SJeff Cody /* If there are any driver level actions to take */ 1889e971aa12SJeff Cody if (drv->bdrv_reopen_commit) { 1890e971aa12SJeff Cody drv->bdrv_reopen_commit(reopen_state); 1891e971aa12SJeff Cody } 1892e971aa12SJeff Cody 1893e971aa12SJeff Cody /* set BDS specific flags now */ 1894e971aa12SJeff Cody reopen_state->bs->open_flags = reopen_state->flags; 1895e971aa12SJeff Cody reopen_state->bs->enable_write_cache = !!(reopen_state->flags & 1896e971aa12SJeff Cody BDRV_O_CACHE_WB); 1897e971aa12SJeff Cody reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); 1898355ef4acSKevin Wolf 18993baca891SKevin Wolf bdrv_refresh_limits(reopen_state->bs, NULL); 1900e971aa12SJeff Cody } 1901e971aa12SJeff Cody 1902e971aa12SJeff Cody /* 1903e971aa12SJeff Cody * Abort the reopen, and delete and free the staged changes in 1904e971aa12SJeff Cody * reopen_state 1905e971aa12SJeff Cody */ 1906e971aa12SJeff Cody void bdrv_reopen_abort(BDRVReopenState *reopen_state) 1907e971aa12SJeff Cody { 1908e971aa12SJeff Cody BlockDriver *drv; 1909e971aa12SJeff Cody 1910e971aa12SJeff Cody assert(reopen_state != NULL); 1911e971aa12SJeff Cody drv = reopen_state->bs->drv; 1912e971aa12SJeff Cody assert(drv != NULL); 1913e971aa12SJeff Cody 1914e971aa12SJeff Cody if (drv->bdrv_reopen_abort) { 1915e971aa12SJeff Cody drv->bdrv_reopen_abort(reopen_state); 1916e971aa12SJeff Cody } 1917e971aa12SJeff Cody } 1918e971aa12SJeff Cody 1919e971aa12SJeff Cody 1920fc01f7e7Sbellard void bdrv_close(BlockDriverState *bs) 1921fc01f7e7Sbellard { 192233384421SMax Reitz BdrvAioNotifier *ban, *ban_next; 192333384421SMax Reitz 19243e914655SPaolo Bonzini if (bs->job) { 19253e914655SPaolo Bonzini block_job_cancel_sync(bs->job); 19263e914655SPaolo Bonzini } 192758fda173SStefan Hajnoczi bdrv_drain_all(); /* complete I/O */ 192858fda173SStefan Hajnoczi bdrv_flush(bs); 192958fda173SStefan Hajnoczi bdrv_drain_all(); /* in case flush left pending I/O */ 1930d7d512f6SPaolo Bonzini notifier_list_notify(&bs->close_notifiers, bs); 19317094f12fSKevin Wolf 19323cbc002cSPaolo Bonzini if (bs->drv) { 1933557df6acSStefan Hajnoczi if (bs->backing_hd) { 1934826b6ca0SFam Zheng BlockDriverState *backing_hd = bs->backing_hd; 1935826b6ca0SFam Zheng bdrv_set_backing_hd(bs, NULL); 1936826b6ca0SFam Zheng bdrv_unref(backing_hd); 1937557df6acSStefan Hajnoczi } 1938ea2384d3Sbellard bs->drv->bdrv_close(bs); 19397267c094SAnthony Liguori g_free(bs->opaque); 1940ea2384d3Sbellard bs->opaque = NULL; 1941ea2384d3Sbellard bs->drv = NULL; 194253fec9d3SStefan Hajnoczi bs->copy_on_read = 0; 1943a275fa42SPaolo Bonzini bs->backing_file[0] = '\0'; 1944a275fa42SPaolo Bonzini bs->backing_format[0] = '\0'; 19456405875cSPaolo Bonzini bs->total_sectors = 0; 19466405875cSPaolo Bonzini bs->encrypted = 0; 19476405875cSPaolo Bonzini bs->valid_key = 0; 19486405875cSPaolo Bonzini bs->sg = 0; 19490d51b4deSAsias He bs->zero_beyond_eof = false; 1950de9c0cecSKevin Wolf QDECREF(bs->options); 1951de9c0cecSKevin Wolf bs->options = NULL; 195291af7014SMax Reitz QDECREF(bs->full_open_options); 195391af7014SMax Reitz bs->full_open_options = NULL; 1954b338082bSbellard 195566f82ceeSKevin Wolf if (bs->file != NULL) { 19564f6fd349SFam Zheng bdrv_unref(bs->file); 19570ac9377dSPaolo Bonzini bs->file = NULL; 195866f82ceeSKevin Wolf } 19599ca11154SPavel Hrdina } 196066f82ceeSKevin Wolf 1961a7f53e26SMarkus Armbruster if (bs->blk) { 1962a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, false); 1963a7f53e26SMarkus Armbruster } 196498f90dbaSZhi Yong Wu 196598f90dbaSZhi Yong Wu /*throttling disk I/O limits*/ 196698f90dbaSZhi Yong Wu if (bs->io_limits_enabled) { 196798f90dbaSZhi Yong Wu bdrv_io_limits_disable(bs); 196898f90dbaSZhi Yong Wu } 196933384421SMax Reitz 197033384421SMax Reitz QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 197133384421SMax Reitz g_free(ban); 197233384421SMax Reitz } 197333384421SMax Reitz QLIST_INIT(&bs->aio_notifiers); 1974b338082bSbellard } 1975b338082bSbellard 19762bc93fedSMORITA Kazutaka void bdrv_close_all(void) 19772bc93fedSMORITA Kazutaka { 19782bc93fedSMORITA Kazutaka BlockDriverState *bs; 19792bc93fedSMORITA Kazutaka 1980dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 1981ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 1982ed78cda3SStefan Hajnoczi 1983ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 19842bc93fedSMORITA Kazutaka bdrv_close(bs); 1985ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 19862bc93fedSMORITA Kazutaka } 19872bc93fedSMORITA Kazutaka } 19882bc93fedSMORITA Kazutaka 198988266f5aSStefan Hajnoczi /* Check if any requests are in-flight (including throttled requests) */ 199088266f5aSStefan Hajnoczi static bool bdrv_requests_pending(BlockDriverState *bs) 199188266f5aSStefan Hajnoczi { 199288266f5aSStefan Hajnoczi if (!QLIST_EMPTY(&bs->tracked_requests)) { 199388266f5aSStefan Hajnoczi return true; 199488266f5aSStefan Hajnoczi } 1995cc0681c4SBenoît Canet if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { 1996cc0681c4SBenoît Canet return true; 1997cc0681c4SBenoît Canet } 1998cc0681c4SBenoît Canet if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { 199988266f5aSStefan Hajnoczi return true; 200088266f5aSStefan Hajnoczi } 200188266f5aSStefan Hajnoczi if (bs->file && bdrv_requests_pending(bs->file)) { 200288266f5aSStefan Hajnoczi return true; 200388266f5aSStefan Hajnoczi } 200488266f5aSStefan Hajnoczi if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) { 200588266f5aSStefan Hajnoczi return true; 200688266f5aSStefan Hajnoczi } 200788266f5aSStefan Hajnoczi return false; 200888266f5aSStefan Hajnoczi } 200988266f5aSStefan Hajnoczi 20105b98db0aSStefan Hajnoczi static bool bdrv_drain_one(BlockDriverState *bs) 20115b98db0aSStefan Hajnoczi { 20125b98db0aSStefan Hajnoczi bool bs_busy; 20135b98db0aSStefan Hajnoczi 20145b98db0aSStefan Hajnoczi bdrv_flush_io_queue(bs); 20155b98db0aSStefan Hajnoczi bdrv_start_throttled_reqs(bs); 20165b98db0aSStefan Hajnoczi bs_busy = bdrv_requests_pending(bs); 20175b98db0aSStefan Hajnoczi bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy); 20185b98db0aSStefan Hajnoczi return bs_busy; 20195b98db0aSStefan Hajnoczi } 20205b98db0aSStefan Hajnoczi 20215b98db0aSStefan Hajnoczi /* 20225b98db0aSStefan Hajnoczi * Wait for pending requests to complete on a single BlockDriverState subtree 20235b98db0aSStefan Hajnoczi * 20245b98db0aSStefan Hajnoczi * See the warning in bdrv_drain_all(). This function can only be called if 20255b98db0aSStefan Hajnoczi * you are sure nothing can generate I/O because you have op blockers 20265b98db0aSStefan Hajnoczi * installed. 20275b98db0aSStefan Hajnoczi * 20285b98db0aSStefan Hajnoczi * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState 20295b98db0aSStefan Hajnoczi * AioContext. 20305b98db0aSStefan Hajnoczi */ 20315b98db0aSStefan Hajnoczi void bdrv_drain(BlockDriverState *bs) 20325b98db0aSStefan Hajnoczi { 20335b98db0aSStefan Hajnoczi while (bdrv_drain_one(bs)) { 20345b98db0aSStefan Hajnoczi /* Keep iterating */ 20355b98db0aSStefan Hajnoczi } 20365b98db0aSStefan Hajnoczi } 20375b98db0aSStefan Hajnoczi 2038922453bcSStefan Hajnoczi /* 2039922453bcSStefan Hajnoczi * Wait for pending requests to complete across all BlockDriverStates 2040922453bcSStefan Hajnoczi * 2041922453bcSStefan Hajnoczi * This function does not flush data to disk, use bdrv_flush_all() for that 2042922453bcSStefan Hajnoczi * after calling this function. 20434c355d53SZhi Yong Wu * 20444c355d53SZhi Yong Wu * Note that completion of an asynchronous I/O operation can trigger any 20454c355d53SZhi Yong Wu * number of other I/O operations on other devices---for example a coroutine 20464c355d53SZhi Yong Wu * can be arbitrarily complex and a constant flow of I/O can come until the 20474c355d53SZhi Yong Wu * coroutine is complete. Because of this, it is not possible to have a 20484c355d53SZhi Yong Wu * function to drain a single device's I/O queue. 2049922453bcSStefan Hajnoczi */ 2050922453bcSStefan Hajnoczi void bdrv_drain_all(void) 2051922453bcSStefan Hajnoczi { 205288266f5aSStefan Hajnoczi /* Always run first iteration so any pending completion BHs run */ 205388266f5aSStefan Hajnoczi bool busy = true; 2054922453bcSStefan Hajnoczi BlockDriverState *bs; 2055922453bcSStefan Hajnoczi 205669da3b0bSFam Zheng QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 205769da3b0bSFam Zheng AioContext *aio_context = bdrv_get_aio_context(bs); 205869da3b0bSFam Zheng 205969da3b0bSFam Zheng aio_context_acquire(aio_context); 206069da3b0bSFam Zheng if (bs->job) { 206169da3b0bSFam Zheng block_job_pause(bs->job); 206269da3b0bSFam Zheng } 206369da3b0bSFam Zheng aio_context_release(aio_context); 206469da3b0bSFam Zheng } 206569da3b0bSFam Zheng 206688266f5aSStefan Hajnoczi while (busy) { 20679b536adcSStefan Hajnoczi busy = false; 2068922453bcSStefan Hajnoczi 20699b536adcSStefan Hajnoczi QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 20709b536adcSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 20719b536adcSStefan Hajnoczi 20729b536adcSStefan Hajnoczi aio_context_acquire(aio_context); 20735b98db0aSStefan Hajnoczi busy |= bdrv_drain_one(bs); 20749b536adcSStefan Hajnoczi aio_context_release(aio_context); 20759b536adcSStefan Hajnoczi } 2076922453bcSStefan Hajnoczi } 207769da3b0bSFam Zheng 207869da3b0bSFam Zheng QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 207969da3b0bSFam Zheng AioContext *aio_context = bdrv_get_aio_context(bs); 208069da3b0bSFam Zheng 208169da3b0bSFam Zheng aio_context_acquire(aio_context); 208269da3b0bSFam Zheng if (bs->job) { 208369da3b0bSFam Zheng block_job_resume(bs->job); 208469da3b0bSFam Zheng } 208569da3b0bSFam Zheng aio_context_release(aio_context); 208669da3b0bSFam Zheng } 2087922453bcSStefan Hajnoczi } 2088922453bcSStefan Hajnoczi 2089dc364f4cSBenoît Canet /* make a BlockDriverState anonymous by removing from bdrv_state and 2090dc364f4cSBenoît Canet * graph_bdrv_state list. 2091d22b2f41SRyan Harper Also, NULL terminate the device_name to prevent double remove */ 2092d22b2f41SRyan Harper void bdrv_make_anon(BlockDriverState *bs) 2093d22b2f41SRyan Harper { 2094bfb197e0SMarkus Armbruster /* 2095bfb197e0SMarkus Armbruster * Take care to remove bs from bdrv_states only when it's actually 2096bfb197e0SMarkus Armbruster * in it. Note that bs->device_list.tqe_prev is initially null, 2097bfb197e0SMarkus Armbruster * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish 2098bfb197e0SMarkus Armbruster * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by 2099bfb197e0SMarkus Armbruster * resetting it to null on remove. 2100bfb197e0SMarkus Armbruster */ 2101bfb197e0SMarkus Armbruster if (bs->device_list.tqe_prev) { 2102dc364f4cSBenoît Canet QTAILQ_REMOVE(&bdrv_states, bs, device_list); 2103bfb197e0SMarkus Armbruster bs->device_list.tqe_prev = NULL; 2104d22b2f41SRyan Harper } 2105dc364f4cSBenoît Canet if (bs->node_name[0] != '\0') { 2106dc364f4cSBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); 2107dc364f4cSBenoît Canet } 2108dc364f4cSBenoît Canet bs->node_name[0] = '\0'; 2109d22b2f41SRyan Harper } 2110d22b2f41SRyan Harper 2111e023b2e2SPaolo Bonzini static void bdrv_rebind(BlockDriverState *bs) 2112e023b2e2SPaolo Bonzini { 2113e023b2e2SPaolo Bonzini if (bs->drv && bs->drv->bdrv_rebind) { 2114e023b2e2SPaolo Bonzini bs->drv->bdrv_rebind(bs); 2115e023b2e2SPaolo Bonzini } 2116e023b2e2SPaolo Bonzini } 2117e023b2e2SPaolo Bonzini 21184ddc07caSPaolo Bonzini static void bdrv_move_feature_fields(BlockDriverState *bs_dest, 21194ddc07caSPaolo Bonzini BlockDriverState *bs_src) 21204ddc07caSPaolo Bonzini { 21214ddc07caSPaolo Bonzini /* move some fields that need to stay attached to the device */ 21224ddc07caSPaolo Bonzini 21234ddc07caSPaolo Bonzini /* dev info */ 21241b7fd729SPaolo Bonzini bs_dest->guest_block_size = bs_src->guest_block_size; 21254ddc07caSPaolo Bonzini bs_dest->copy_on_read = bs_src->copy_on_read; 21264ddc07caSPaolo Bonzini 21274ddc07caSPaolo Bonzini bs_dest->enable_write_cache = bs_src->enable_write_cache; 21284ddc07caSPaolo Bonzini 2129cc0681c4SBenoît Canet /* i/o throttled req */ 2130cc0681c4SBenoît Canet memcpy(&bs_dest->throttle_state, 2131cc0681c4SBenoît Canet &bs_src->throttle_state, 2132cc0681c4SBenoît Canet sizeof(ThrottleState)); 2133cc0681c4SBenoît Canet bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; 2134cc0681c4SBenoît Canet bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; 21354ddc07caSPaolo Bonzini bs_dest->io_limits_enabled = bs_src->io_limits_enabled; 21364ddc07caSPaolo Bonzini 21374ddc07caSPaolo Bonzini /* r/w error */ 21384ddc07caSPaolo Bonzini bs_dest->on_read_error = bs_src->on_read_error; 21394ddc07caSPaolo Bonzini bs_dest->on_write_error = bs_src->on_write_error; 21404ddc07caSPaolo Bonzini 21414ddc07caSPaolo Bonzini /* i/o status */ 21424ddc07caSPaolo Bonzini bs_dest->iostatus_enabled = bs_src->iostatus_enabled; 21434ddc07caSPaolo Bonzini bs_dest->iostatus = bs_src->iostatus; 21444ddc07caSPaolo Bonzini 21454ddc07caSPaolo Bonzini /* dirty bitmap */ 2146e4654d2dSFam Zheng bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps; 21474ddc07caSPaolo Bonzini 21489fcb0251SFam Zheng /* reference count */ 21499fcb0251SFam Zheng bs_dest->refcnt = bs_src->refcnt; 21509fcb0251SFam Zheng 21514ddc07caSPaolo Bonzini /* job */ 21524ddc07caSPaolo Bonzini bs_dest->job = bs_src->job; 21534ddc07caSPaolo Bonzini 21544ddc07caSPaolo Bonzini /* keep the same entry in bdrv_states */ 2155dc364f4cSBenoît Canet bs_dest->device_list = bs_src->device_list; 21567e7d56d9SMarkus Armbruster bs_dest->blk = bs_src->blk; 21577e7d56d9SMarkus Armbruster 2158fbe40ff7SFam Zheng memcpy(bs_dest->op_blockers, bs_src->op_blockers, 2159fbe40ff7SFam Zheng sizeof(bs_dest->op_blockers)); 21604ddc07caSPaolo Bonzini } 21614ddc07caSPaolo Bonzini 21624ddc07caSPaolo Bonzini /* 21634ddc07caSPaolo Bonzini * Swap bs contents for two image chains while they are live, 21644ddc07caSPaolo Bonzini * while keeping required fields on the BlockDriverState that is 21654ddc07caSPaolo Bonzini * actually attached to a device. 21664ddc07caSPaolo Bonzini * 21674ddc07caSPaolo Bonzini * This will modify the BlockDriverState fields, and swap contents 21684ddc07caSPaolo Bonzini * between bs_new and bs_old. Both bs_new and bs_old are modified. 21694ddc07caSPaolo Bonzini * 2170bfb197e0SMarkus Armbruster * bs_new must not be attached to a BlockBackend. 21714ddc07caSPaolo Bonzini * 21724ddc07caSPaolo Bonzini * This function does not create any image files. 21734ddc07caSPaolo Bonzini */ 21744ddc07caSPaolo Bonzini void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) 21754ddc07caSPaolo Bonzini { 21764ddc07caSPaolo Bonzini BlockDriverState tmp; 21774ddc07caSPaolo Bonzini 217890ce8a06SBenoît Canet /* The code needs to swap the node_name but simply swapping node_list won't 217990ce8a06SBenoît Canet * work so first remove the nodes from the graph list, do the swap then 218090ce8a06SBenoît Canet * insert them back if needed. 218190ce8a06SBenoît Canet */ 218290ce8a06SBenoît Canet if (bs_new->node_name[0] != '\0') { 218390ce8a06SBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list); 218490ce8a06SBenoît Canet } 218590ce8a06SBenoît Canet if (bs_old->node_name[0] != '\0') { 218690ce8a06SBenoît Canet QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list); 218790ce8a06SBenoît Canet } 218890ce8a06SBenoît Canet 2189bfb197e0SMarkus Armbruster /* bs_new must be unattached and shouldn't have anything fancy enabled */ 21907e7d56d9SMarkus Armbruster assert(!bs_new->blk); 2191e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); 21924ddc07caSPaolo Bonzini assert(bs_new->job == NULL); 21934ddc07caSPaolo Bonzini assert(bs_new->io_limits_enabled == false); 2194cc0681c4SBenoît Canet assert(!throttle_have_timer(&bs_new->throttle_state)); 21954ddc07caSPaolo Bonzini 21964ddc07caSPaolo Bonzini tmp = *bs_new; 21974ddc07caSPaolo Bonzini *bs_new = *bs_old; 21984ddc07caSPaolo Bonzini *bs_old = tmp; 21994ddc07caSPaolo Bonzini 22004ddc07caSPaolo Bonzini /* there are some fields that should not be swapped, move them back */ 22014ddc07caSPaolo Bonzini bdrv_move_feature_fields(&tmp, bs_old); 22024ddc07caSPaolo Bonzini bdrv_move_feature_fields(bs_old, bs_new); 22034ddc07caSPaolo Bonzini bdrv_move_feature_fields(bs_new, &tmp); 22044ddc07caSPaolo Bonzini 2205bfb197e0SMarkus Armbruster /* bs_new must remain unattached */ 22067e7d56d9SMarkus Armbruster assert(!bs_new->blk); 22074ddc07caSPaolo Bonzini 22084ddc07caSPaolo Bonzini /* Check a few fields that should remain attached to the device */ 22094ddc07caSPaolo Bonzini assert(bs_new->job == NULL); 22104ddc07caSPaolo Bonzini assert(bs_new->io_limits_enabled == false); 2211cc0681c4SBenoît Canet assert(!throttle_have_timer(&bs_new->throttle_state)); 22124ddc07caSPaolo Bonzini 221390ce8a06SBenoît Canet /* insert the nodes back into the graph node list if needed */ 221490ce8a06SBenoît Canet if (bs_new->node_name[0] != '\0') { 221590ce8a06SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list); 221690ce8a06SBenoît Canet } 221790ce8a06SBenoît Canet if (bs_old->node_name[0] != '\0') { 221890ce8a06SBenoît Canet QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); 221990ce8a06SBenoît Canet } 222090ce8a06SBenoît Canet 22214ddc07caSPaolo Bonzini bdrv_rebind(bs_new); 22224ddc07caSPaolo Bonzini bdrv_rebind(bs_old); 22234ddc07caSPaolo Bonzini } 22244ddc07caSPaolo Bonzini 22258802d1fdSJeff Cody /* 22268802d1fdSJeff Cody * Add new bs contents at the top of an image chain while the chain is 22278802d1fdSJeff Cody * live, while keeping required fields on the top layer. 22288802d1fdSJeff Cody * 22298802d1fdSJeff Cody * This will modify the BlockDriverState fields, and swap contents 22308802d1fdSJeff Cody * between bs_new and bs_top. Both bs_new and bs_top are modified. 22318802d1fdSJeff Cody * 2232bfb197e0SMarkus Armbruster * bs_new must not be attached to a BlockBackend. 2233f6801b83SJeff Cody * 22348802d1fdSJeff Cody * This function does not create any image files. 22358802d1fdSJeff Cody */ 22368802d1fdSJeff Cody void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) 22378802d1fdSJeff Cody { 22384ddc07caSPaolo Bonzini bdrv_swap(bs_new, bs_top); 22398802d1fdSJeff Cody 22408802d1fdSJeff Cody /* The contents of 'tmp' will become bs_top, as we are 22418802d1fdSJeff Cody * swapping bs_new and bs_top contents. */ 22428d24cce1SFam Zheng bdrv_set_backing_hd(bs_top, bs_new); 22438802d1fdSJeff Cody } 22448802d1fdSJeff Cody 22454f6fd349SFam Zheng static void bdrv_delete(BlockDriverState *bs) 2246b338082bSbellard { 22473e914655SPaolo Bonzini assert(!bs->job); 22483718d8abSFam Zheng assert(bdrv_op_blocker_is_empty(bs)); 22494f6fd349SFam Zheng assert(!bs->refcnt); 2250e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 225118846deeSMarkus Armbruster 2252e1b5c52eSStefan Hajnoczi bdrv_close(bs); 2253e1b5c52eSStefan Hajnoczi 22541b7bdbc1SStefan Hajnoczi /* remove from list, if necessary */ 2255d22b2f41SRyan Harper bdrv_make_anon(bs); 225634c6f050Saurel32 22577267c094SAnthony Liguori g_free(bs); 2258fc01f7e7Sbellard } 2259fc01f7e7Sbellard 2260e97fc193Saliguori /* 2261e97fc193Saliguori * Run consistency checks on an image 2262e97fc193Saliguori * 2263e076f338SKevin Wolf * Returns 0 if the check could be completed (it doesn't mean that the image is 2264a1c7273bSStefan Weil * free of errors) or -errno when an internal error occurred. The results of the 2265e076f338SKevin Wolf * check are stored in res. 2266e97fc193Saliguori */ 22674534ff54SKevin Wolf int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) 2268e97fc193Saliguori { 2269908bcd54SMax Reitz if (bs->drv == NULL) { 2270908bcd54SMax Reitz return -ENOMEDIUM; 2271908bcd54SMax Reitz } 2272e97fc193Saliguori if (bs->drv->bdrv_check == NULL) { 2273e97fc193Saliguori return -ENOTSUP; 2274e97fc193Saliguori } 2275e97fc193Saliguori 2276e076f338SKevin Wolf memset(res, 0, sizeof(*res)); 22774534ff54SKevin Wolf return bs->drv->bdrv_check(bs, res, fix); 2278e97fc193Saliguori } 2279e97fc193Saliguori 22808a426614SKevin Wolf #define COMMIT_BUF_SECTORS 2048 22818a426614SKevin Wolf 228233e3963eSbellard /* commit COW file into the raw image */ 228333e3963eSbellard int bdrv_commit(BlockDriverState *bs) 228433e3963eSbellard { 228519cb3738Sbellard BlockDriver *drv = bs->drv; 228672706ea4SJeff Cody int64_t sector, total_sectors, length, backing_length; 22878a426614SKevin Wolf int n, ro, open_flags; 22880bce597dSJeff Cody int ret = 0; 228972706ea4SJeff Cody uint8_t *buf = NULL; 229033e3963eSbellard 229119cb3738Sbellard if (!drv) 229219cb3738Sbellard return -ENOMEDIUM; 229333e3963eSbellard 22944dca4b63SNaphtali Sprei if (!bs->backing_hd) { 22954dca4b63SNaphtali Sprei return -ENOTSUP; 22964dca4b63SNaphtali Sprei } 22974dca4b63SNaphtali Sprei 2298bb00021dSFam Zheng if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || 2299bb00021dSFam Zheng bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { 23002d3735d3SStefan Hajnoczi return -EBUSY; 23012d3735d3SStefan Hajnoczi } 23022d3735d3SStefan Hajnoczi 23034dca4b63SNaphtali Sprei ro = bs->backing_hd->read_only; 23044dca4b63SNaphtali Sprei open_flags = bs->backing_hd->open_flags; 23054dca4b63SNaphtali Sprei 23064dca4b63SNaphtali Sprei if (ro) { 23070bce597dSJeff Cody if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) { 23080bce597dSJeff Cody return -EACCES; 23094dca4b63SNaphtali Sprei } 2310ea2384d3Sbellard } 2311ea2384d3Sbellard 231272706ea4SJeff Cody length = bdrv_getlength(bs); 231372706ea4SJeff Cody if (length < 0) { 231472706ea4SJeff Cody ret = length; 231572706ea4SJeff Cody goto ro_cleanup; 231672706ea4SJeff Cody } 231772706ea4SJeff Cody 231872706ea4SJeff Cody backing_length = bdrv_getlength(bs->backing_hd); 231972706ea4SJeff Cody if (backing_length < 0) { 232072706ea4SJeff Cody ret = backing_length; 232172706ea4SJeff Cody goto ro_cleanup; 232272706ea4SJeff Cody } 232372706ea4SJeff Cody 232472706ea4SJeff Cody /* If our top snapshot is larger than the backing file image, 232572706ea4SJeff Cody * grow the backing file image if possible. If not possible, 232672706ea4SJeff Cody * we must return an error */ 232772706ea4SJeff Cody if (length > backing_length) { 232872706ea4SJeff Cody ret = bdrv_truncate(bs->backing_hd, length); 232972706ea4SJeff Cody if (ret < 0) { 233072706ea4SJeff Cody goto ro_cleanup; 233172706ea4SJeff Cody } 233272706ea4SJeff Cody } 233372706ea4SJeff Cody 233472706ea4SJeff Cody total_sectors = length >> BDRV_SECTOR_BITS; 2335857d4f46SKevin Wolf 2336857d4f46SKevin Wolf /* qemu_try_blockalign() for bs will choose an alignment that works for 2337857d4f46SKevin Wolf * bs->backing_hd as well, so no need to compare the alignment manually. */ 2338857d4f46SKevin Wolf buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 2339857d4f46SKevin Wolf if (buf == NULL) { 2340857d4f46SKevin Wolf ret = -ENOMEM; 2341857d4f46SKevin Wolf goto ro_cleanup; 2342857d4f46SKevin Wolf } 23438a426614SKevin Wolf 23448a426614SKevin Wolf for (sector = 0; sector < total_sectors; sector += n) { 2345d663640cSPaolo Bonzini ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); 2346d663640cSPaolo Bonzini if (ret < 0) { 2347d663640cSPaolo Bonzini goto ro_cleanup; 2348d663640cSPaolo Bonzini } 2349d663640cSPaolo Bonzini if (ret) { 2350dabfa6ccSKevin Wolf ret = bdrv_read(bs, sector, buf, n); 2351dabfa6ccSKevin Wolf if (ret < 0) { 23524dca4b63SNaphtali Sprei goto ro_cleanup; 235333e3963eSbellard } 235433e3963eSbellard 2355dabfa6ccSKevin Wolf ret = bdrv_write(bs->backing_hd, sector, buf, n); 2356dabfa6ccSKevin Wolf if (ret < 0) { 23574dca4b63SNaphtali Sprei goto ro_cleanup; 235833e3963eSbellard } 235933e3963eSbellard } 236033e3963eSbellard } 236195389c86Sbellard 23621d44952fSChristoph Hellwig if (drv->bdrv_make_empty) { 23631d44952fSChristoph Hellwig ret = drv->bdrv_make_empty(bs); 2364dabfa6ccSKevin Wolf if (ret < 0) { 2365dabfa6ccSKevin Wolf goto ro_cleanup; 2366dabfa6ccSKevin Wolf } 23671d44952fSChristoph Hellwig bdrv_flush(bs); 23681d44952fSChristoph Hellwig } 236995389c86Sbellard 23703f5075aeSChristoph Hellwig /* 23713f5075aeSChristoph Hellwig * Make sure all data we wrote to the backing device is actually 23723f5075aeSChristoph Hellwig * stable on disk. 23733f5075aeSChristoph Hellwig */ 2374dabfa6ccSKevin Wolf if (bs->backing_hd) { 23753f5075aeSChristoph Hellwig bdrv_flush(bs->backing_hd); 2376dabfa6ccSKevin Wolf } 23774dca4b63SNaphtali Sprei 2378dabfa6ccSKevin Wolf ret = 0; 23794dca4b63SNaphtali Sprei ro_cleanup: 2380857d4f46SKevin Wolf qemu_vfree(buf); 23814dca4b63SNaphtali Sprei 23824dca4b63SNaphtali Sprei if (ro) { 23830bce597dSJeff Cody /* ignoring error return here */ 23840bce597dSJeff Cody bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL); 23854dca4b63SNaphtali Sprei } 23864dca4b63SNaphtali Sprei 23871d44952fSChristoph Hellwig return ret; 238833e3963eSbellard } 238933e3963eSbellard 2390e8877497SStefan Hajnoczi int bdrv_commit_all(void) 23916ab4b5abSMarkus Armbruster { 23926ab4b5abSMarkus Armbruster BlockDriverState *bs; 23936ab4b5abSMarkus Armbruster 2394dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 2395ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 2396ed78cda3SStefan Hajnoczi 2397ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 2398272d2d8eSJeff Cody if (bs->drv && bs->backing_hd) { 2399e8877497SStefan Hajnoczi int ret = bdrv_commit(bs); 2400e8877497SStefan Hajnoczi if (ret < 0) { 2401ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 2402e8877497SStefan Hajnoczi return ret; 24036ab4b5abSMarkus Armbruster } 24046ab4b5abSMarkus Armbruster } 2405ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 2406272d2d8eSJeff Cody } 2407e8877497SStefan Hajnoczi return 0; 2408e8877497SStefan Hajnoczi } 24096ab4b5abSMarkus Armbruster 2410dbffbdcfSStefan Hajnoczi /** 2411dbffbdcfSStefan Hajnoczi * Remove an active request from the tracked requests list 2412dbffbdcfSStefan Hajnoczi * 2413dbffbdcfSStefan Hajnoczi * This function should be called when a tracked request is completing. 2414dbffbdcfSStefan Hajnoczi */ 2415dbffbdcfSStefan Hajnoczi static void tracked_request_end(BdrvTrackedRequest *req) 2416dbffbdcfSStefan Hajnoczi { 24172dbafdc0SKevin Wolf if (req->serialising) { 24182dbafdc0SKevin Wolf req->bs->serialising_in_flight--; 24192dbafdc0SKevin Wolf } 24202dbafdc0SKevin Wolf 2421dbffbdcfSStefan Hajnoczi QLIST_REMOVE(req, list); 2422f4658285SStefan Hajnoczi qemu_co_queue_restart_all(&req->wait_queue); 2423dbffbdcfSStefan Hajnoczi } 2424dbffbdcfSStefan Hajnoczi 2425dbffbdcfSStefan Hajnoczi /** 2426dbffbdcfSStefan Hajnoczi * Add an active request to the tracked requests list 2427dbffbdcfSStefan Hajnoczi */ 2428dbffbdcfSStefan Hajnoczi static void tracked_request_begin(BdrvTrackedRequest *req, 2429dbffbdcfSStefan Hajnoczi BlockDriverState *bs, 2430793ed47aSKevin Wolf int64_t offset, 2431793ed47aSKevin Wolf unsigned int bytes, bool is_write) 2432dbffbdcfSStefan Hajnoczi { 2433dbffbdcfSStefan Hajnoczi *req = (BdrvTrackedRequest){ 2434dbffbdcfSStefan Hajnoczi .bs = bs, 2435793ed47aSKevin Wolf .offset = offset, 2436793ed47aSKevin Wolf .bytes = bytes, 2437dbffbdcfSStefan Hajnoczi .is_write = is_write, 24385f8b6491SStefan Hajnoczi .co = qemu_coroutine_self(), 24392dbafdc0SKevin Wolf .serialising = false, 24407327145fSKevin Wolf .overlap_offset = offset, 24417327145fSKevin Wolf .overlap_bytes = bytes, 2442dbffbdcfSStefan Hajnoczi }; 2443dbffbdcfSStefan Hajnoczi 2444f4658285SStefan Hajnoczi qemu_co_queue_init(&req->wait_queue); 2445f4658285SStefan Hajnoczi 2446dbffbdcfSStefan Hajnoczi QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); 2447dbffbdcfSStefan Hajnoczi } 2448dbffbdcfSStefan Hajnoczi 2449e96126ffSKevin Wolf static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) 24502dbafdc0SKevin Wolf { 24517327145fSKevin Wolf int64_t overlap_offset = req->offset & ~(align - 1); 2452e96126ffSKevin Wolf unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align) 24537327145fSKevin Wolf - overlap_offset; 24547327145fSKevin Wolf 24552dbafdc0SKevin Wolf if (!req->serialising) { 24562dbafdc0SKevin Wolf req->bs->serialising_in_flight++; 24572dbafdc0SKevin Wolf req->serialising = true; 24582dbafdc0SKevin Wolf } 24597327145fSKevin Wolf 24607327145fSKevin Wolf req->overlap_offset = MIN(req->overlap_offset, overlap_offset); 24617327145fSKevin Wolf req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes); 24622dbafdc0SKevin Wolf } 24632dbafdc0SKevin Wolf 2464d83947acSStefan Hajnoczi /** 2465d83947acSStefan Hajnoczi * Round a region to cluster boundaries 2466d83947acSStefan Hajnoczi */ 2467343bded4SPaolo Bonzini void bdrv_round_to_clusters(BlockDriverState *bs, 2468d83947acSStefan Hajnoczi int64_t sector_num, int nb_sectors, 2469d83947acSStefan Hajnoczi int64_t *cluster_sector_num, 2470d83947acSStefan Hajnoczi int *cluster_nb_sectors) 2471d83947acSStefan Hajnoczi { 2472d83947acSStefan Hajnoczi BlockDriverInfo bdi; 2473d83947acSStefan Hajnoczi 2474d83947acSStefan Hajnoczi if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { 2475d83947acSStefan Hajnoczi *cluster_sector_num = sector_num; 2476d83947acSStefan Hajnoczi *cluster_nb_sectors = nb_sectors; 2477d83947acSStefan Hajnoczi } else { 2478d83947acSStefan Hajnoczi int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE; 2479d83947acSStefan Hajnoczi *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c); 2480d83947acSStefan Hajnoczi *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num + 2481d83947acSStefan Hajnoczi nb_sectors, c); 2482d83947acSStefan Hajnoczi } 2483d83947acSStefan Hajnoczi } 2484d83947acSStefan Hajnoczi 24857327145fSKevin Wolf static int bdrv_get_cluster_size(BlockDriverState *bs) 2486793ed47aSKevin Wolf { 2487793ed47aSKevin Wolf BlockDriverInfo bdi; 24887327145fSKevin Wolf int ret; 2489793ed47aSKevin Wolf 24907327145fSKevin Wolf ret = bdrv_get_info(bs, &bdi); 24917327145fSKevin Wolf if (ret < 0 || bdi.cluster_size == 0) { 24927327145fSKevin Wolf return bs->request_alignment; 2493793ed47aSKevin Wolf } else { 24947327145fSKevin Wolf return bdi.cluster_size; 2495793ed47aSKevin Wolf } 2496793ed47aSKevin Wolf } 2497793ed47aSKevin Wolf 2498f4658285SStefan Hajnoczi static bool tracked_request_overlaps(BdrvTrackedRequest *req, 2499793ed47aSKevin Wolf int64_t offset, unsigned int bytes) 2500793ed47aSKevin Wolf { 2501d83947acSStefan Hajnoczi /* aaaa bbbb */ 25027327145fSKevin Wolf if (offset >= req->overlap_offset + req->overlap_bytes) { 2503d83947acSStefan Hajnoczi return false; 2504d83947acSStefan Hajnoczi } 2505d83947acSStefan Hajnoczi /* bbbb aaaa */ 25067327145fSKevin Wolf if (req->overlap_offset >= offset + bytes) { 2507d83947acSStefan Hajnoczi return false; 2508d83947acSStefan Hajnoczi } 2509d83947acSStefan Hajnoczi return true; 2510f4658285SStefan Hajnoczi } 2511f4658285SStefan Hajnoczi 251228de2dcdSKevin Wolf static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) 2513f4658285SStefan Hajnoczi { 25142dbafdc0SKevin Wolf BlockDriverState *bs = self->bs; 2515f4658285SStefan Hajnoczi BdrvTrackedRequest *req; 2516f4658285SStefan Hajnoczi bool retry; 251728de2dcdSKevin Wolf bool waited = false; 2518f4658285SStefan Hajnoczi 25192dbafdc0SKevin Wolf if (!bs->serialising_in_flight) { 252028de2dcdSKevin Wolf return false; 25212dbafdc0SKevin Wolf } 25222dbafdc0SKevin Wolf 2523f4658285SStefan Hajnoczi do { 2524f4658285SStefan Hajnoczi retry = false; 2525f4658285SStefan Hajnoczi QLIST_FOREACH(req, &bs->tracked_requests, list) { 25262dbafdc0SKevin Wolf if (req == self || (!req->serialising && !self->serialising)) { 252765afd211SKevin Wolf continue; 252865afd211SKevin Wolf } 25297327145fSKevin Wolf if (tracked_request_overlaps(req, self->overlap_offset, 25307327145fSKevin Wolf self->overlap_bytes)) 25317327145fSKevin Wolf { 25325f8b6491SStefan Hajnoczi /* Hitting this means there was a reentrant request, for 25335f8b6491SStefan Hajnoczi * example, a block driver issuing nested requests. This must 25345f8b6491SStefan Hajnoczi * never happen since it means deadlock. 25355f8b6491SStefan Hajnoczi */ 25365f8b6491SStefan Hajnoczi assert(qemu_coroutine_self() != req->co); 25375f8b6491SStefan Hajnoczi 25386460440fSKevin Wolf /* If the request is already (indirectly) waiting for us, or 25396460440fSKevin Wolf * will wait for us as soon as it wakes up, then just go on 25406460440fSKevin Wolf * (instead of producing a deadlock in the former case). */ 25416460440fSKevin Wolf if (!req->waiting_for) { 25426460440fSKevin Wolf self->waiting_for = req; 2543f4658285SStefan Hajnoczi qemu_co_queue_wait(&req->wait_queue); 25446460440fSKevin Wolf self->waiting_for = NULL; 2545f4658285SStefan Hajnoczi retry = true; 254628de2dcdSKevin Wolf waited = true; 2547f4658285SStefan Hajnoczi break; 2548f4658285SStefan Hajnoczi } 2549f4658285SStefan Hajnoczi } 25506460440fSKevin Wolf } 2551f4658285SStefan Hajnoczi } while (retry); 255228de2dcdSKevin Wolf 255328de2dcdSKevin Wolf return waited; 2554f4658285SStefan Hajnoczi } 2555f4658285SStefan Hajnoczi 2556756e6736SKevin Wolf /* 2557756e6736SKevin Wolf * Return values: 2558756e6736SKevin Wolf * 0 - success 2559756e6736SKevin Wolf * -EINVAL - backing format specified, but no file 2560756e6736SKevin Wolf * -ENOSPC - can't update the backing file because no space is left in the 2561756e6736SKevin Wolf * image file header 2562756e6736SKevin Wolf * -ENOTSUP - format driver doesn't support changing the backing file 2563756e6736SKevin Wolf */ 2564756e6736SKevin Wolf int bdrv_change_backing_file(BlockDriverState *bs, 2565756e6736SKevin Wolf const char *backing_file, const char *backing_fmt) 2566756e6736SKevin Wolf { 2567756e6736SKevin Wolf BlockDriver *drv = bs->drv; 2568469ef350SPaolo Bonzini int ret; 2569756e6736SKevin Wolf 25705f377794SPaolo Bonzini /* Backing file format doesn't make sense without a backing file */ 25715f377794SPaolo Bonzini if (backing_fmt && !backing_file) { 25725f377794SPaolo Bonzini return -EINVAL; 25735f377794SPaolo Bonzini } 25745f377794SPaolo Bonzini 2575756e6736SKevin Wolf if (drv->bdrv_change_backing_file != NULL) { 2576469ef350SPaolo Bonzini ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt); 2577756e6736SKevin Wolf } else { 2578469ef350SPaolo Bonzini ret = -ENOTSUP; 2579756e6736SKevin Wolf } 2580469ef350SPaolo Bonzini 2581469ef350SPaolo Bonzini if (ret == 0) { 2582469ef350SPaolo Bonzini pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2583469ef350SPaolo Bonzini pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2584469ef350SPaolo Bonzini } 2585469ef350SPaolo Bonzini return ret; 2586756e6736SKevin Wolf } 2587756e6736SKevin Wolf 25886ebdcee2SJeff Cody /* 25896ebdcee2SJeff Cody * Finds the image layer in the chain that has 'bs' as its backing file. 25906ebdcee2SJeff Cody * 25916ebdcee2SJeff Cody * active is the current topmost image. 25926ebdcee2SJeff Cody * 25936ebdcee2SJeff Cody * Returns NULL if bs is not found in active's image chain, 25946ebdcee2SJeff Cody * or if active == bs. 25954caf0fcdSJeff Cody * 25964caf0fcdSJeff Cody * Returns the bottommost base image if bs == NULL. 25976ebdcee2SJeff Cody */ 25986ebdcee2SJeff Cody BlockDriverState *bdrv_find_overlay(BlockDriverState *active, 25996ebdcee2SJeff Cody BlockDriverState *bs) 26006ebdcee2SJeff Cody { 26014caf0fcdSJeff Cody while (active && bs != active->backing_hd) { 26024caf0fcdSJeff Cody active = active->backing_hd; 26036ebdcee2SJeff Cody } 26046ebdcee2SJeff Cody 26054caf0fcdSJeff Cody return active; 26066ebdcee2SJeff Cody } 26076ebdcee2SJeff Cody 26084caf0fcdSJeff Cody /* Given a BDS, searches for the base layer. */ 26094caf0fcdSJeff Cody BlockDriverState *bdrv_find_base(BlockDriverState *bs) 26104caf0fcdSJeff Cody { 26114caf0fcdSJeff Cody return bdrv_find_overlay(bs, NULL); 26126ebdcee2SJeff Cody } 26136ebdcee2SJeff Cody 26146ebdcee2SJeff Cody typedef struct BlkIntermediateStates { 26156ebdcee2SJeff Cody BlockDriverState *bs; 26166ebdcee2SJeff Cody QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; 26176ebdcee2SJeff Cody } BlkIntermediateStates; 26186ebdcee2SJeff Cody 26196ebdcee2SJeff Cody 26206ebdcee2SJeff Cody /* 26216ebdcee2SJeff Cody * Drops images above 'base' up to and including 'top', and sets the image 26226ebdcee2SJeff Cody * above 'top' to have base as its backing file. 26236ebdcee2SJeff Cody * 26246ebdcee2SJeff Cody * Requires that the overlay to 'top' is opened r/w, so that the backing file 26256ebdcee2SJeff Cody * information in 'bs' can be properly updated. 26266ebdcee2SJeff Cody * 26276ebdcee2SJeff Cody * E.g., this will convert the following chain: 26286ebdcee2SJeff Cody * bottom <- base <- intermediate <- top <- active 26296ebdcee2SJeff Cody * 26306ebdcee2SJeff Cody * to 26316ebdcee2SJeff Cody * 26326ebdcee2SJeff Cody * bottom <- base <- active 26336ebdcee2SJeff Cody * 26346ebdcee2SJeff Cody * It is allowed for bottom==base, in which case it converts: 26356ebdcee2SJeff Cody * 26366ebdcee2SJeff Cody * base <- intermediate <- top <- active 26376ebdcee2SJeff Cody * 26386ebdcee2SJeff Cody * to 26396ebdcee2SJeff Cody * 26406ebdcee2SJeff Cody * base <- active 26416ebdcee2SJeff Cody * 264254e26900SJeff Cody * If backing_file_str is non-NULL, it will be used when modifying top's 264354e26900SJeff Cody * overlay image metadata. 264454e26900SJeff Cody * 26456ebdcee2SJeff Cody * Error conditions: 26466ebdcee2SJeff Cody * if active == top, that is considered an error 26476ebdcee2SJeff Cody * 26486ebdcee2SJeff Cody */ 26496ebdcee2SJeff Cody int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, 265054e26900SJeff Cody BlockDriverState *base, const char *backing_file_str) 26516ebdcee2SJeff Cody { 26526ebdcee2SJeff Cody BlockDriverState *intermediate; 26536ebdcee2SJeff Cody BlockDriverState *base_bs = NULL; 26546ebdcee2SJeff Cody BlockDriverState *new_top_bs = NULL; 26556ebdcee2SJeff Cody BlkIntermediateStates *intermediate_state, *next; 26566ebdcee2SJeff Cody int ret = -EIO; 26576ebdcee2SJeff Cody 26586ebdcee2SJeff Cody QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; 26596ebdcee2SJeff Cody QSIMPLEQ_INIT(&states_to_delete); 26606ebdcee2SJeff Cody 26616ebdcee2SJeff Cody if (!top->drv || !base->drv) { 26626ebdcee2SJeff Cody goto exit; 26636ebdcee2SJeff Cody } 26646ebdcee2SJeff Cody 26656ebdcee2SJeff Cody new_top_bs = bdrv_find_overlay(active, top); 26666ebdcee2SJeff Cody 26676ebdcee2SJeff Cody if (new_top_bs == NULL) { 26686ebdcee2SJeff Cody /* we could not find the image above 'top', this is an error */ 26696ebdcee2SJeff Cody goto exit; 26706ebdcee2SJeff Cody } 26716ebdcee2SJeff Cody 26726ebdcee2SJeff Cody /* special case of new_top_bs->backing_hd already pointing to base - nothing 26736ebdcee2SJeff Cody * to do, no intermediate images */ 26746ebdcee2SJeff Cody if (new_top_bs->backing_hd == base) { 26756ebdcee2SJeff Cody ret = 0; 26766ebdcee2SJeff Cody goto exit; 26776ebdcee2SJeff Cody } 26786ebdcee2SJeff Cody 26796ebdcee2SJeff Cody intermediate = top; 26806ebdcee2SJeff Cody 26816ebdcee2SJeff Cody /* now we will go down through the list, and add each BDS we find 26826ebdcee2SJeff Cody * into our deletion queue, until we hit the 'base' 26836ebdcee2SJeff Cody */ 26846ebdcee2SJeff Cody while (intermediate) { 26855839e53bSMarkus Armbruster intermediate_state = g_new0(BlkIntermediateStates, 1); 26866ebdcee2SJeff Cody intermediate_state->bs = intermediate; 26876ebdcee2SJeff Cody QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); 26886ebdcee2SJeff Cody 26896ebdcee2SJeff Cody if (intermediate->backing_hd == base) { 26906ebdcee2SJeff Cody base_bs = intermediate->backing_hd; 26916ebdcee2SJeff Cody break; 26926ebdcee2SJeff Cody } 26936ebdcee2SJeff Cody intermediate = intermediate->backing_hd; 26946ebdcee2SJeff Cody } 26956ebdcee2SJeff Cody if (base_bs == NULL) { 26966ebdcee2SJeff Cody /* something went wrong, we did not end at the base. safely 26976ebdcee2SJeff Cody * unravel everything, and exit with error */ 26986ebdcee2SJeff Cody goto exit; 26996ebdcee2SJeff Cody } 27006ebdcee2SJeff Cody 27016ebdcee2SJeff Cody /* success - we can delete the intermediate states, and link top->base */ 270254e26900SJeff Cody backing_file_str = backing_file_str ? backing_file_str : base_bs->filename; 270354e26900SJeff Cody ret = bdrv_change_backing_file(new_top_bs, backing_file_str, 27046ebdcee2SJeff Cody base_bs->drv ? base_bs->drv->format_name : ""); 27056ebdcee2SJeff Cody if (ret) { 27066ebdcee2SJeff Cody goto exit; 27076ebdcee2SJeff Cody } 2708920beae1SFam Zheng bdrv_set_backing_hd(new_top_bs, base_bs); 27096ebdcee2SJeff Cody 27106ebdcee2SJeff Cody QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 27116ebdcee2SJeff Cody /* so that bdrv_close() does not recursively close the chain */ 2712920beae1SFam Zheng bdrv_set_backing_hd(intermediate_state->bs, NULL); 27134f6fd349SFam Zheng bdrv_unref(intermediate_state->bs); 27146ebdcee2SJeff Cody } 27156ebdcee2SJeff Cody ret = 0; 27166ebdcee2SJeff Cody 27176ebdcee2SJeff Cody exit: 27186ebdcee2SJeff Cody QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { 27196ebdcee2SJeff Cody g_free(intermediate_state); 27206ebdcee2SJeff Cody } 27216ebdcee2SJeff Cody return ret; 27226ebdcee2SJeff Cody } 27236ebdcee2SJeff Cody 27246ebdcee2SJeff Cody 272571d0770cSaliguori static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, 272671d0770cSaliguori size_t size) 272771d0770cSaliguori { 272875af1f34SPeter Lieven if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) { 27291dd3a447SKevin Wolf return -EIO; 27301dd3a447SKevin Wolf } 27311dd3a447SKevin Wolf 2732c0191e76SMax Reitz if (!bdrv_is_inserted(bs)) { 273371d0770cSaliguori return -ENOMEDIUM; 2734c0191e76SMax Reitz } 273571d0770cSaliguori 2736c0191e76SMax Reitz if (offset < 0) { 2737fbb7b4e0SKevin Wolf return -EIO; 2738c0191e76SMax Reitz } 273971d0770cSaliguori 274071d0770cSaliguori return 0; 274171d0770cSaliguori } 274271d0770cSaliguori 274371d0770cSaliguori static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, 274471d0770cSaliguori int nb_sectors) 274571d0770cSaliguori { 274675af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 27478f4754edSKevin Wolf return -EIO; 27488f4754edSKevin Wolf } 27498f4754edSKevin Wolf 2750eb5a3165SJes Sorensen return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE, 2751eb5a3165SJes Sorensen nb_sectors * BDRV_SECTOR_SIZE); 275271d0770cSaliguori } 275371d0770cSaliguori 27541c9805a3SStefan Hajnoczi typedef struct RwCo { 27551c9805a3SStefan Hajnoczi BlockDriverState *bs; 2756775aa8b6SKevin Wolf int64_t offset; 27571c9805a3SStefan Hajnoczi QEMUIOVector *qiov; 27581c9805a3SStefan Hajnoczi bool is_write; 27591c9805a3SStefan Hajnoczi int ret; 27604105eaaaSPeter Lieven BdrvRequestFlags flags; 27611c9805a3SStefan Hajnoczi } RwCo; 27621c9805a3SStefan Hajnoczi 27631c9805a3SStefan Hajnoczi static void coroutine_fn bdrv_rw_co_entry(void *opaque) 2764fc01f7e7Sbellard { 27651c9805a3SStefan Hajnoczi RwCo *rwco = opaque; 2766fc01f7e7Sbellard 27671c9805a3SStefan Hajnoczi if (!rwco->is_write) { 2768775aa8b6SKevin Wolf rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset, 2769775aa8b6SKevin Wolf rwco->qiov->size, rwco->qiov, 27704105eaaaSPeter Lieven rwco->flags); 27711c9805a3SStefan Hajnoczi } else { 2772775aa8b6SKevin Wolf rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset, 2773775aa8b6SKevin Wolf rwco->qiov->size, rwco->qiov, 27744105eaaaSPeter Lieven rwco->flags); 27751c9805a3SStefan Hajnoczi } 27761c9805a3SStefan Hajnoczi } 2777e7a8a783SKevin Wolf 27781c9805a3SStefan Hajnoczi /* 27798d3b1a2dSKevin Wolf * Process a vectored synchronous request using coroutines 27801c9805a3SStefan Hajnoczi */ 2781775aa8b6SKevin Wolf static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, 27824105eaaaSPeter Lieven QEMUIOVector *qiov, bool is_write, 27834105eaaaSPeter Lieven BdrvRequestFlags flags) 27841c9805a3SStefan Hajnoczi { 27851c9805a3SStefan Hajnoczi Coroutine *co; 27861c9805a3SStefan Hajnoczi RwCo rwco = { 27871c9805a3SStefan Hajnoczi .bs = bs, 2788775aa8b6SKevin Wolf .offset = offset, 27898d3b1a2dSKevin Wolf .qiov = qiov, 27901c9805a3SStefan Hajnoczi .is_write = is_write, 27911c9805a3SStefan Hajnoczi .ret = NOT_DONE, 27924105eaaaSPeter Lieven .flags = flags, 27931c9805a3SStefan Hajnoczi }; 27941c9805a3SStefan Hajnoczi 2795498e386cSZhi Yong Wu /** 2796498e386cSZhi Yong Wu * In sync call context, when the vcpu is blocked, this throttling timer 2797498e386cSZhi Yong Wu * will not fire; so the I/O throttling function has to be disabled here 2798498e386cSZhi Yong Wu * if it has been enabled. 2799498e386cSZhi Yong Wu */ 2800498e386cSZhi Yong Wu if (bs->io_limits_enabled) { 2801498e386cSZhi Yong Wu fprintf(stderr, "Disabling I/O throttling on '%s' due " 2802498e386cSZhi Yong Wu "to synchronous I/O.\n", bdrv_get_device_name(bs)); 2803498e386cSZhi Yong Wu bdrv_io_limits_disable(bs); 2804498e386cSZhi Yong Wu } 2805498e386cSZhi Yong Wu 28061c9805a3SStefan Hajnoczi if (qemu_in_coroutine()) { 28071c9805a3SStefan Hajnoczi /* Fast-path if already in coroutine context */ 28081c9805a3SStefan Hajnoczi bdrv_rw_co_entry(&rwco); 28091c9805a3SStefan Hajnoczi } else { 28102572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 28112572b37aSStefan Hajnoczi 28121c9805a3SStefan Hajnoczi co = qemu_coroutine_create(bdrv_rw_co_entry); 28131c9805a3SStefan Hajnoczi qemu_coroutine_enter(co, &rwco); 28141c9805a3SStefan Hajnoczi while (rwco.ret == NOT_DONE) { 28152572b37aSStefan Hajnoczi aio_poll(aio_context, true); 28161c9805a3SStefan Hajnoczi } 28171c9805a3SStefan Hajnoczi } 28181c9805a3SStefan Hajnoczi return rwco.ret; 2819e7a8a783SKevin Wolf } 2820e7a8a783SKevin Wolf 28218d3b1a2dSKevin Wolf /* 28228d3b1a2dSKevin Wolf * Process a synchronous request using coroutines 28238d3b1a2dSKevin Wolf */ 28248d3b1a2dSKevin Wolf static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, 28254105eaaaSPeter Lieven int nb_sectors, bool is_write, BdrvRequestFlags flags) 28268d3b1a2dSKevin Wolf { 28278d3b1a2dSKevin Wolf QEMUIOVector qiov; 28288d3b1a2dSKevin Wolf struct iovec iov = { 28298d3b1a2dSKevin Wolf .iov_base = (void *)buf, 28308d3b1a2dSKevin Wolf .iov_len = nb_sectors * BDRV_SECTOR_SIZE, 28318d3b1a2dSKevin Wolf }; 28328d3b1a2dSKevin Wolf 283375af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 2834da15ee51SKevin Wolf return -EINVAL; 2835da15ee51SKevin Wolf } 2836da15ee51SKevin Wolf 28378d3b1a2dSKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 2838775aa8b6SKevin Wolf return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS, 2839775aa8b6SKevin Wolf &qiov, is_write, flags); 28408d3b1a2dSKevin Wolf } 28418d3b1a2dSKevin Wolf 28421c9805a3SStefan Hajnoczi /* return < 0 if error. See bdrv_write() for the return codes */ 28431c9805a3SStefan Hajnoczi int bdrv_read(BlockDriverState *bs, int64_t sector_num, 28441c9805a3SStefan Hajnoczi uint8_t *buf, int nb_sectors) 28451c9805a3SStefan Hajnoczi { 28464105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0); 284783f64091Sbellard } 2848fc01f7e7Sbellard 284907d27a44SMarkus Armbruster /* Just like bdrv_read(), but with I/O throttling temporarily disabled */ 285007d27a44SMarkus Armbruster int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, 285107d27a44SMarkus Armbruster uint8_t *buf, int nb_sectors) 285207d27a44SMarkus Armbruster { 285307d27a44SMarkus Armbruster bool enabled; 285407d27a44SMarkus Armbruster int ret; 285507d27a44SMarkus Armbruster 285607d27a44SMarkus Armbruster enabled = bs->io_limits_enabled; 285707d27a44SMarkus Armbruster bs->io_limits_enabled = false; 28584e7395e8SPeter Lieven ret = bdrv_read(bs, sector_num, buf, nb_sectors); 285907d27a44SMarkus Armbruster bs->io_limits_enabled = enabled; 286007d27a44SMarkus Armbruster return ret; 286107d27a44SMarkus Armbruster } 286207d27a44SMarkus Armbruster 286319cb3738Sbellard /* Return < 0 if error. Important errors are: 286419cb3738Sbellard -EIO generic I/O error (may happen for all errors) 286519cb3738Sbellard -ENOMEDIUM No media inserted. 286619cb3738Sbellard -EINVAL Invalid sector number or nb_sectors 286719cb3738Sbellard -EACCES Trying to write a read-only device 286819cb3738Sbellard */ 2869fc01f7e7Sbellard int bdrv_write(BlockDriverState *bs, int64_t sector_num, 2870fc01f7e7Sbellard const uint8_t *buf, int nb_sectors) 2871fc01f7e7Sbellard { 28724105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0); 287383f64091Sbellard } 287483f64091Sbellard 2875aa7bfbffSPeter Lieven int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, 2876aa7bfbffSPeter Lieven int nb_sectors, BdrvRequestFlags flags) 28774105eaaaSPeter Lieven { 28784105eaaaSPeter Lieven return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true, 2879aa7bfbffSPeter Lieven BDRV_REQ_ZERO_WRITE | flags); 28808d3b1a2dSKevin Wolf } 28818d3b1a2dSKevin Wolf 2882d75cbb5eSPeter Lieven /* 2883d75cbb5eSPeter Lieven * Completely zero out a block device with the help of bdrv_write_zeroes. 2884d75cbb5eSPeter Lieven * The operation is sped up by checking the block status and only writing 2885d75cbb5eSPeter Lieven * zeroes to the device if they currently do not return zeroes. Optional 2886d75cbb5eSPeter Lieven * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP). 2887d75cbb5eSPeter Lieven * 2888d75cbb5eSPeter Lieven * Returns < 0 on error, 0 on success. For error codes see bdrv_write(). 2889d75cbb5eSPeter Lieven */ 2890d75cbb5eSPeter Lieven int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) 2891d75cbb5eSPeter Lieven { 2892d32f7c10SMarkus Armbruster int64_t target_sectors, ret, nb_sectors, sector_num = 0; 2893d75cbb5eSPeter Lieven int n; 2894d75cbb5eSPeter Lieven 2895d32f7c10SMarkus Armbruster target_sectors = bdrv_nb_sectors(bs); 2896d32f7c10SMarkus Armbruster if (target_sectors < 0) { 2897d32f7c10SMarkus Armbruster return target_sectors; 28989ce10c0bSKevin Wolf } 28999ce10c0bSKevin Wolf 2900d75cbb5eSPeter Lieven for (;;) { 290175af1f34SPeter Lieven nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); 2902d75cbb5eSPeter Lieven if (nb_sectors <= 0) { 2903d75cbb5eSPeter Lieven return 0; 2904d75cbb5eSPeter Lieven } 2905d75cbb5eSPeter Lieven ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n); 29063d94ce60SPeter Lieven if (ret < 0) { 29073d94ce60SPeter Lieven error_report("error getting block status at sector %" PRId64 ": %s", 29083d94ce60SPeter Lieven sector_num, strerror(-ret)); 29093d94ce60SPeter Lieven return ret; 29103d94ce60SPeter Lieven } 2911d75cbb5eSPeter Lieven if (ret & BDRV_BLOCK_ZERO) { 2912d75cbb5eSPeter Lieven sector_num += n; 2913d75cbb5eSPeter Lieven continue; 2914d75cbb5eSPeter Lieven } 2915d75cbb5eSPeter Lieven ret = bdrv_write_zeroes(bs, sector_num, n, flags); 2916d75cbb5eSPeter Lieven if (ret < 0) { 2917d75cbb5eSPeter Lieven error_report("error writing zeroes at sector %" PRId64 ": %s", 2918d75cbb5eSPeter Lieven sector_num, strerror(-ret)); 2919d75cbb5eSPeter Lieven return ret; 2920d75cbb5eSPeter Lieven } 2921d75cbb5eSPeter Lieven sector_num += n; 2922d75cbb5eSPeter Lieven } 2923d75cbb5eSPeter Lieven } 2924d75cbb5eSPeter Lieven 2925a3ef6571SKevin Wolf int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) 292683f64091Sbellard { 2927a3ef6571SKevin Wolf QEMUIOVector qiov; 2928a3ef6571SKevin Wolf struct iovec iov = { 2929a3ef6571SKevin Wolf .iov_base = (void *)buf, 2930a3ef6571SKevin Wolf .iov_len = bytes, 2931a3ef6571SKevin Wolf }; 29329a8c4cceSKevin Wolf int ret; 293383f64091Sbellard 2934a3ef6571SKevin Wolf if (bytes < 0) { 2935a3ef6571SKevin Wolf return -EINVAL; 293683f64091Sbellard } 293783f64091Sbellard 2938a3ef6571SKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 2939a3ef6571SKevin Wolf ret = bdrv_prwv_co(bs, offset, &qiov, false, 0); 2940a3ef6571SKevin Wolf if (ret < 0) { 29419a8c4cceSKevin Wolf return ret; 294283f64091Sbellard } 294383f64091Sbellard 2944a3ef6571SKevin Wolf return bytes; 294583f64091Sbellard } 294683f64091Sbellard 29478d3b1a2dSKevin Wolf int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) 294883f64091Sbellard { 29499a8c4cceSKevin Wolf int ret; 295083f64091Sbellard 29518407d5d7SKevin Wolf ret = bdrv_prwv_co(bs, offset, qiov, true, 0); 29528d3b1a2dSKevin Wolf if (ret < 0) { 29539a8c4cceSKevin Wolf return ret; 29548d3b1a2dSKevin Wolf } 29558d3b1a2dSKevin Wolf 29568d3b1a2dSKevin Wolf return qiov->size; 29578d3b1a2dSKevin Wolf } 29588d3b1a2dSKevin Wolf 29598d3b1a2dSKevin Wolf int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 29608407d5d7SKevin Wolf const void *buf, int bytes) 29618d3b1a2dSKevin Wolf { 29628d3b1a2dSKevin Wolf QEMUIOVector qiov; 29638d3b1a2dSKevin Wolf struct iovec iov = { 29648d3b1a2dSKevin Wolf .iov_base = (void *) buf, 29658407d5d7SKevin Wolf .iov_len = bytes, 29668d3b1a2dSKevin Wolf }; 29678d3b1a2dSKevin Wolf 29688407d5d7SKevin Wolf if (bytes < 0) { 29698407d5d7SKevin Wolf return -EINVAL; 29708407d5d7SKevin Wolf } 29718407d5d7SKevin Wolf 29728d3b1a2dSKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 29738d3b1a2dSKevin Wolf return bdrv_pwritev(bs, offset, &qiov); 297483f64091Sbellard } 297583f64091Sbellard 2976f08145feSKevin Wolf /* 2977f08145feSKevin Wolf * Writes to the file and ensures that no writes are reordered across this 2978f08145feSKevin Wolf * request (acts as a barrier) 2979f08145feSKevin Wolf * 2980f08145feSKevin Wolf * Returns 0 on success, -errno in error cases. 2981f08145feSKevin Wolf */ 2982f08145feSKevin Wolf int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, 2983f08145feSKevin Wolf const void *buf, int count) 2984f08145feSKevin Wolf { 2985f08145feSKevin Wolf int ret; 2986f08145feSKevin Wolf 2987f08145feSKevin Wolf ret = bdrv_pwrite(bs, offset, buf, count); 2988f08145feSKevin Wolf if (ret < 0) { 2989f08145feSKevin Wolf return ret; 2990f08145feSKevin Wolf } 2991f08145feSKevin Wolf 2992f05fa4adSPaolo Bonzini /* No flush needed for cache modes that already do it */ 2993f05fa4adSPaolo Bonzini if (bs->enable_write_cache) { 2994f08145feSKevin Wolf bdrv_flush(bs); 2995f08145feSKevin Wolf } 2996f08145feSKevin Wolf 2997f08145feSKevin Wolf return 0; 2998f08145feSKevin Wolf } 2999f08145feSKevin Wolf 3000470c0504SStefan Hajnoczi static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, 3001ab185921SStefan Hajnoczi int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 3002ab185921SStefan Hajnoczi { 3003ab185921SStefan Hajnoczi /* Perform I/O through a temporary buffer so that users who scribble over 3004ab185921SStefan Hajnoczi * their read buffer while the operation is in progress do not end up 3005ab185921SStefan Hajnoczi * modifying the image file. This is critical for zero-copy guest I/O 3006ab185921SStefan Hajnoczi * where anything might happen inside guest memory. 3007ab185921SStefan Hajnoczi */ 3008ab185921SStefan Hajnoczi void *bounce_buffer; 3009ab185921SStefan Hajnoczi 301079c053bdSStefan Hajnoczi BlockDriver *drv = bs->drv; 3011ab185921SStefan Hajnoczi struct iovec iov; 3012ab185921SStefan Hajnoczi QEMUIOVector bounce_qiov; 3013ab185921SStefan Hajnoczi int64_t cluster_sector_num; 3014ab185921SStefan Hajnoczi int cluster_nb_sectors; 3015ab185921SStefan Hajnoczi size_t skip_bytes; 3016ab185921SStefan Hajnoczi int ret; 3017ab185921SStefan Hajnoczi 3018ab185921SStefan Hajnoczi /* Cover entire cluster so no additional backing file I/O is required when 3019ab185921SStefan Hajnoczi * allocating cluster in the image file. 3020ab185921SStefan Hajnoczi */ 3021343bded4SPaolo Bonzini bdrv_round_to_clusters(bs, sector_num, nb_sectors, 3022ab185921SStefan Hajnoczi &cluster_sector_num, &cluster_nb_sectors); 3023ab185921SStefan Hajnoczi 3024470c0504SStefan Hajnoczi trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, 3025ab185921SStefan Hajnoczi cluster_sector_num, cluster_nb_sectors); 3026ab185921SStefan Hajnoczi 3027ab185921SStefan Hajnoczi iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE; 3028857d4f46SKevin Wolf iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); 3029857d4f46SKevin Wolf if (bounce_buffer == NULL) { 3030857d4f46SKevin Wolf ret = -ENOMEM; 3031857d4f46SKevin Wolf goto err; 3032857d4f46SKevin Wolf } 3033857d4f46SKevin Wolf 3034ab185921SStefan Hajnoczi qemu_iovec_init_external(&bounce_qiov, &iov, 1); 3035ab185921SStefan Hajnoczi 303679c053bdSStefan Hajnoczi ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors, 3037ab185921SStefan Hajnoczi &bounce_qiov); 3038ab185921SStefan Hajnoczi if (ret < 0) { 3039ab185921SStefan Hajnoczi goto err; 3040ab185921SStefan Hajnoczi } 3041ab185921SStefan Hajnoczi 304279c053bdSStefan Hajnoczi if (drv->bdrv_co_write_zeroes && 304379c053bdSStefan Hajnoczi buffer_is_zero(bounce_buffer, iov.iov_len)) { 3044621f0589SKevin Wolf ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num, 3045aa7bfbffSPeter Lieven cluster_nb_sectors, 0); 304679c053bdSStefan Hajnoczi } else { 3047f05fa4adSPaolo Bonzini /* This does not change the data on the disk, it is not necessary 3048f05fa4adSPaolo Bonzini * to flush even in cache=writethrough mode. 3049f05fa4adSPaolo Bonzini */ 305079c053bdSStefan Hajnoczi ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors, 3051ab185921SStefan Hajnoczi &bounce_qiov); 305279c053bdSStefan Hajnoczi } 305379c053bdSStefan Hajnoczi 3054ab185921SStefan Hajnoczi if (ret < 0) { 3055ab185921SStefan Hajnoczi /* It might be okay to ignore write errors for guest requests. If this 3056ab185921SStefan Hajnoczi * is a deliberate copy-on-read then we don't want to ignore the error. 3057ab185921SStefan Hajnoczi * Simply report it in all cases. 3058ab185921SStefan Hajnoczi */ 3059ab185921SStefan Hajnoczi goto err; 3060ab185921SStefan Hajnoczi } 3061ab185921SStefan Hajnoczi 3062ab185921SStefan Hajnoczi skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; 306303396148SMichael Tokarev qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, 3064ab185921SStefan Hajnoczi nb_sectors * BDRV_SECTOR_SIZE); 3065ab185921SStefan Hajnoczi 3066ab185921SStefan Hajnoczi err: 3067ab185921SStefan Hajnoczi qemu_vfree(bounce_buffer); 3068ab185921SStefan Hajnoczi return ret; 3069ab185921SStefan Hajnoczi } 3070ab185921SStefan Hajnoczi 3071c5fbe571SStefan Hajnoczi /* 3072d0c7f642SKevin Wolf * Forwards an already correctly aligned request to the BlockDriver. This 3073d0c7f642SKevin Wolf * handles copy on read and zeroing after EOF; any other features must be 3074d0c7f642SKevin Wolf * implemented by the caller. 3075c5fbe571SStefan Hajnoczi */ 3076d0c7f642SKevin Wolf static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, 307765afd211SKevin Wolf BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, 3078ec746e10SKevin Wolf int64_t align, QEMUIOVector *qiov, int flags) 3079da1fa91dSKevin Wolf { 3080da1fa91dSKevin Wolf BlockDriver *drv = bs->drv; 3081dbffbdcfSStefan Hajnoczi int ret; 3082da1fa91dSKevin Wolf 3083d0c7f642SKevin Wolf int64_t sector_num = offset >> BDRV_SECTOR_BITS; 3084d0c7f642SKevin Wolf unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; 3085da1fa91dSKevin Wolf 3086d0c7f642SKevin Wolf assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 3087d0c7f642SKevin Wolf assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 30888eb029c2SKevin Wolf assert(!qiov || bytes == qiov->size); 3089d0c7f642SKevin Wolf 3090d0c7f642SKevin Wolf /* Handle Copy on Read and associated serialisation */ 3091470c0504SStefan Hajnoczi if (flags & BDRV_REQ_COPY_ON_READ) { 30927327145fSKevin Wolf /* If we touch the same cluster it counts as an overlap. This 30937327145fSKevin Wolf * guarantees that allocating writes will be serialized and not race 30947327145fSKevin Wolf * with each other for the same cluster. For example, in copy-on-read 30957327145fSKevin Wolf * it ensures that the CoR read and write operations are atomic and 30967327145fSKevin Wolf * guest writes cannot interleave between them. */ 30977327145fSKevin Wolf mark_request_serialising(req, bdrv_get_cluster_size(bs)); 3098470c0504SStefan Hajnoczi } 3099470c0504SStefan Hajnoczi 31002dbafdc0SKevin Wolf wait_serialising_requests(req); 3101f4658285SStefan Hajnoczi 3102470c0504SStefan Hajnoczi if (flags & BDRV_REQ_COPY_ON_READ) { 3103ab185921SStefan Hajnoczi int pnum; 3104ab185921SStefan Hajnoczi 3105bdad13b9SPaolo Bonzini ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum); 3106ab185921SStefan Hajnoczi if (ret < 0) { 3107ab185921SStefan Hajnoczi goto out; 3108ab185921SStefan Hajnoczi } 3109ab185921SStefan Hajnoczi 3110ab185921SStefan Hajnoczi if (!ret || pnum != nb_sectors) { 3111470c0504SStefan Hajnoczi ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov); 3112ab185921SStefan Hajnoczi goto out; 3113ab185921SStefan Hajnoczi } 3114ab185921SStefan Hajnoczi } 3115ab185921SStefan Hajnoczi 3116d0c7f642SKevin Wolf /* Forward the request to the BlockDriver */ 3117c0191e76SMax Reitz if (!bs->zero_beyond_eof) { 3118dbffbdcfSStefan Hajnoczi ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 3119893a8f62SMORITA Kazutaka } else { 3120c0191e76SMax Reitz /* Read zeros after EOF */ 31214049082cSMarkus Armbruster int64_t total_sectors, max_nb_sectors; 3122893a8f62SMORITA Kazutaka 31234049082cSMarkus Armbruster total_sectors = bdrv_nb_sectors(bs); 31244049082cSMarkus Armbruster if (total_sectors < 0) { 31254049082cSMarkus Armbruster ret = total_sectors; 3126893a8f62SMORITA Kazutaka goto out; 3127893a8f62SMORITA Kazutaka } 3128893a8f62SMORITA Kazutaka 31295f5bcd80SKevin Wolf max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), 31305f5bcd80SKevin Wolf align >> BDRV_SECTOR_BITS); 3131e012b78cSPaolo Bonzini if (nb_sectors < max_nb_sectors) { 3132e012b78cSPaolo Bonzini ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); 3133e012b78cSPaolo Bonzini } else if (max_nb_sectors > 0) { 313433f461e0SKevin Wolf QEMUIOVector local_qiov; 313533f461e0SKevin Wolf 313633f461e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov); 313733f461e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, 3138e012b78cSPaolo Bonzini max_nb_sectors * BDRV_SECTOR_SIZE); 313933f461e0SKevin Wolf 3140e012b78cSPaolo Bonzini ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors, 314133f461e0SKevin Wolf &local_qiov); 314233f461e0SKevin Wolf 314333f461e0SKevin Wolf qemu_iovec_destroy(&local_qiov); 3144893a8f62SMORITA Kazutaka } else { 3145893a8f62SMORITA Kazutaka ret = 0; 3146893a8f62SMORITA Kazutaka } 3147893a8f62SMORITA Kazutaka 3148893a8f62SMORITA Kazutaka /* Reading beyond end of file is supposed to produce zeroes */ 3149893a8f62SMORITA Kazutaka if (ret == 0 && total_sectors < sector_num + nb_sectors) { 3150893a8f62SMORITA Kazutaka uint64_t offset = MAX(0, total_sectors - sector_num); 3151893a8f62SMORITA Kazutaka uint64_t bytes = (sector_num + nb_sectors - offset) * 3152893a8f62SMORITA Kazutaka BDRV_SECTOR_SIZE; 3153893a8f62SMORITA Kazutaka qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes); 3154893a8f62SMORITA Kazutaka } 3155893a8f62SMORITA Kazutaka } 3156ab185921SStefan Hajnoczi 3157ab185921SStefan Hajnoczi out: 3158dbffbdcfSStefan Hajnoczi return ret; 3159da1fa91dSKevin Wolf } 3160da1fa91dSKevin Wolf 3161fc3959e4SFam Zheng static inline uint64_t bdrv_get_align(BlockDriverState *bs) 3162fc3959e4SFam Zheng { 3163fc3959e4SFam Zheng /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ 3164fc3959e4SFam Zheng return MAX(BDRV_SECTOR_SIZE, bs->request_alignment); 3165fc3959e4SFam Zheng } 3166fc3959e4SFam Zheng 3167fc3959e4SFam Zheng static inline bool bdrv_req_is_aligned(BlockDriverState *bs, 3168fc3959e4SFam Zheng int64_t offset, size_t bytes) 3169fc3959e4SFam Zheng { 3170fc3959e4SFam Zheng int64_t align = bdrv_get_align(bs); 3171fc3959e4SFam Zheng return !(offset & (align - 1) || (bytes & (align - 1))); 3172fc3959e4SFam Zheng } 3173fc3959e4SFam Zheng 3174d0c7f642SKevin Wolf /* 3175d0c7f642SKevin Wolf * Handle a read request in coroutine context 3176d0c7f642SKevin Wolf */ 31771b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, 31781b0288aeSKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 3179d0c7f642SKevin Wolf BdrvRequestFlags flags) 3180d0c7f642SKevin Wolf { 3181d0c7f642SKevin Wolf BlockDriver *drv = bs->drv; 318265afd211SKevin Wolf BdrvTrackedRequest req; 318365afd211SKevin Wolf 3184fc3959e4SFam Zheng uint64_t align = bdrv_get_align(bs); 31851b0288aeSKevin Wolf uint8_t *head_buf = NULL; 31861b0288aeSKevin Wolf uint8_t *tail_buf = NULL; 31871b0288aeSKevin Wolf QEMUIOVector local_qiov; 31881b0288aeSKevin Wolf bool use_local_qiov = false; 3189d0c7f642SKevin Wolf int ret; 3190d0c7f642SKevin Wolf 3191d0c7f642SKevin Wolf if (!drv) { 3192d0c7f642SKevin Wolf return -ENOMEDIUM; 3193d0c7f642SKevin Wolf } 3194b9c64947SMax Reitz 3195b9c64947SMax Reitz ret = bdrv_check_byte_request(bs, offset, bytes); 3196b9c64947SMax Reitz if (ret < 0) { 3197b9c64947SMax Reitz return ret; 3198d0c7f642SKevin Wolf } 3199d0c7f642SKevin Wolf 3200d0c7f642SKevin Wolf if (bs->copy_on_read) { 3201d0c7f642SKevin Wolf flags |= BDRV_REQ_COPY_ON_READ; 3202d0c7f642SKevin Wolf } 3203d0c7f642SKevin Wolf 3204d0c7f642SKevin Wolf /* throttling disk I/O */ 3205d0c7f642SKevin Wolf if (bs->io_limits_enabled) { 3206d5103588SKevin Wolf bdrv_io_limits_intercept(bs, bytes, false); 3207d0c7f642SKevin Wolf } 3208d0c7f642SKevin Wolf 32091b0288aeSKevin Wolf /* Align read if necessary by padding qiov */ 32101b0288aeSKevin Wolf if (offset & (align - 1)) { 32111b0288aeSKevin Wolf head_buf = qemu_blockalign(bs, align); 32121b0288aeSKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 2); 32131b0288aeSKevin Wolf qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); 32141b0288aeSKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 32151b0288aeSKevin Wolf use_local_qiov = true; 32161b0288aeSKevin Wolf 32171b0288aeSKevin Wolf bytes += offset & (align - 1); 32181b0288aeSKevin Wolf offset = offset & ~(align - 1); 32191b0288aeSKevin Wolf } 32201b0288aeSKevin Wolf 32211b0288aeSKevin Wolf if ((offset + bytes) & (align - 1)) { 32221b0288aeSKevin Wolf if (!use_local_qiov) { 32231b0288aeSKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 1); 32241b0288aeSKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 32251b0288aeSKevin Wolf use_local_qiov = true; 32261b0288aeSKevin Wolf } 32271b0288aeSKevin Wolf tail_buf = qemu_blockalign(bs, align); 32281b0288aeSKevin Wolf qemu_iovec_add(&local_qiov, tail_buf, 32291b0288aeSKevin Wolf align - ((offset + bytes) & (align - 1))); 32301b0288aeSKevin Wolf 32311b0288aeSKevin Wolf bytes = ROUND_UP(bytes, align); 32321b0288aeSKevin Wolf } 32331b0288aeSKevin Wolf 323465afd211SKevin Wolf tracked_request_begin(&req, bs, offset, bytes, false); 3235ec746e10SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, 32361b0288aeSKevin Wolf use_local_qiov ? &local_qiov : qiov, 32371b0288aeSKevin Wolf flags); 323865afd211SKevin Wolf tracked_request_end(&req); 32391b0288aeSKevin Wolf 32401b0288aeSKevin Wolf if (use_local_qiov) { 32411b0288aeSKevin Wolf qemu_iovec_destroy(&local_qiov); 32421b0288aeSKevin Wolf qemu_vfree(head_buf); 32431b0288aeSKevin Wolf qemu_vfree(tail_buf); 32441b0288aeSKevin Wolf } 32451b0288aeSKevin Wolf 3246d0c7f642SKevin Wolf return ret; 3247d0c7f642SKevin Wolf } 3248d0c7f642SKevin Wolf 32491b0288aeSKevin Wolf static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, 32501b0288aeSKevin Wolf int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 32511b0288aeSKevin Wolf BdrvRequestFlags flags) 32521b0288aeSKevin Wolf { 325375af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 32541b0288aeSKevin Wolf return -EINVAL; 32551b0288aeSKevin Wolf } 32561b0288aeSKevin Wolf 32571b0288aeSKevin Wolf return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS, 32581b0288aeSKevin Wolf nb_sectors << BDRV_SECTOR_BITS, qiov, flags); 32591b0288aeSKevin Wolf } 32601b0288aeSKevin Wolf 3261c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, 3262da1fa91dSKevin Wolf int nb_sectors, QEMUIOVector *qiov) 3263da1fa91dSKevin Wolf { 3264c5fbe571SStefan Hajnoczi trace_bdrv_co_readv(bs, sector_num, nb_sectors); 3265da1fa91dSKevin Wolf 3266470c0504SStefan Hajnoczi return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); 3267470c0504SStefan Hajnoczi } 3268470c0504SStefan Hajnoczi 3269470c0504SStefan Hajnoczi int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, 3270470c0504SStefan Hajnoczi int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) 3271470c0504SStefan Hajnoczi { 3272470c0504SStefan Hajnoczi trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors); 3273470c0504SStefan Hajnoczi 3274470c0504SStefan Hajnoczi return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 3275470c0504SStefan Hajnoczi BDRV_REQ_COPY_ON_READ); 3276c5fbe571SStefan Hajnoczi } 3277c5fbe571SStefan Hajnoczi 327898764152SPeter Lieven #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768 3279c31cb707SPeter Lieven 3280f08f2ddaSStefan Hajnoczi static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, 3281aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) 3282f08f2ddaSStefan Hajnoczi { 3283f08f2ddaSStefan Hajnoczi BlockDriver *drv = bs->drv; 3284f08f2ddaSStefan Hajnoczi QEMUIOVector qiov; 3285c31cb707SPeter Lieven struct iovec iov = {0}; 3286c31cb707SPeter Lieven int ret = 0; 3287f08f2ddaSStefan Hajnoczi 328875af1f34SPeter Lieven int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes, 328975af1f34SPeter Lieven BDRV_REQUEST_MAX_SECTORS); 3290621f0589SKevin Wolf 3291c31cb707SPeter Lieven while (nb_sectors > 0 && !ret) { 3292c31cb707SPeter Lieven int num = nb_sectors; 3293c31cb707SPeter Lieven 3294b8d71c09SPaolo Bonzini /* Align request. Block drivers can expect the "bulk" of the request 3295b8d71c09SPaolo Bonzini * to be aligned. 3296b8d71c09SPaolo Bonzini */ 3297b8d71c09SPaolo Bonzini if (bs->bl.write_zeroes_alignment 3298b8d71c09SPaolo Bonzini && num > bs->bl.write_zeroes_alignment) { 3299b8d71c09SPaolo Bonzini if (sector_num % bs->bl.write_zeroes_alignment != 0) { 3300b8d71c09SPaolo Bonzini /* Make a small request up to the first aligned sector. */ 3301c31cb707SPeter Lieven num = bs->bl.write_zeroes_alignment; 3302c31cb707SPeter Lieven num -= sector_num % bs->bl.write_zeroes_alignment; 3303b8d71c09SPaolo Bonzini } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) { 3304b8d71c09SPaolo Bonzini /* Shorten the request to the last aligned sector. num cannot 3305b8d71c09SPaolo Bonzini * underflow because num > bs->bl.write_zeroes_alignment. 3306b8d71c09SPaolo Bonzini */ 3307b8d71c09SPaolo Bonzini num -= (sector_num + num) % bs->bl.write_zeroes_alignment; 3308b8d71c09SPaolo Bonzini } 3309c31cb707SPeter Lieven } 3310c31cb707SPeter Lieven 3311c31cb707SPeter Lieven /* limit request size */ 3312c31cb707SPeter Lieven if (num > max_write_zeroes) { 3313c31cb707SPeter Lieven num = max_write_zeroes; 3314c31cb707SPeter Lieven } 3315c31cb707SPeter Lieven 3316c31cb707SPeter Lieven ret = -ENOTSUP; 3317f08f2ddaSStefan Hajnoczi /* First try the efficient write zeroes operation */ 3318f08f2ddaSStefan Hajnoczi if (drv->bdrv_co_write_zeroes) { 3319c31cb707SPeter Lieven ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags); 3320f08f2ddaSStefan Hajnoczi } 3321f08f2ddaSStefan Hajnoczi 3322c31cb707SPeter Lieven if (ret == -ENOTSUP) { 3323f08f2ddaSStefan Hajnoczi /* Fall back to bounce buffer if write zeroes is unsupported */ 3324095e4fa4SPeter Lieven int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length, 332598764152SPeter Lieven MAX_WRITE_ZEROES_BOUNCE_BUFFER); 3326095e4fa4SPeter Lieven num = MIN(num, max_xfer_len); 3327c31cb707SPeter Lieven iov.iov_len = num * BDRV_SECTOR_SIZE; 3328c31cb707SPeter Lieven if (iov.iov_base == NULL) { 3329857d4f46SKevin Wolf iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE); 3330857d4f46SKevin Wolf if (iov.iov_base == NULL) { 3331857d4f46SKevin Wolf ret = -ENOMEM; 3332857d4f46SKevin Wolf goto fail; 3333857d4f46SKevin Wolf } 3334b8d71c09SPaolo Bonzini memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE); 3335c31cb707SPeter Lieven } 3336f08f2ddaSStefan Hajnoczi qemu_iovec_init_external(&qiov, &iov, 1); 3337f08f2ddaSStefan Hajnoczi 3338c31cb707SPeter Lieven ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov); 3339b8d71c09SPaolo Bonzini 3340b8d71c09SPaolo Bonzini /* Keep bounce buffer around if it is big enough for all 3341b8d71c09SPaolo Bonzini * all future requests. 3342b8d71c09SPaolo Bonzini */ 3343095e4fa4SPeter Lieven if (num < max_xfer_len) { 3344b8d71c09SPaolo Bonzini qemu_vfree(iov.iov_base); 3345b8d71c09SPaolo Bonzini iov.iov_base = NULL; 3346b8d71c09SPaolo Bonzini } 3347c31cb707SPeter Lieven } 3348c31cb707SPeter Lieven 3349c31cb707SPeter Lieven sector_num += num; 3350c31cb707SPeter Lieven nb_sectors -= num; 3351c31cb707SPeter Lieven } 3352f08f2ddaSStefan Hajnoczi 3353857d4f46SKevin Wolf fail: 3354f08f2ddaSStefan Hajnoczi qemu_vfree(iov.iov_base); 3355f08f2ddaSStefan Hajnoczi return ret; 3356f08f2ddaSStefan Hajnoczi } 3357f08f2ddaSStefan Hajnoczi 3358c5fbe571SStefan Hajnoczi /* 3359b404f720SKevin Wolf * Forwards an already correctly aligned write request to the BlockDriver. 3360c5fbe571SStefan Hajnoczi */ 3361b404f720SKevin Wolf static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, 336265afd211SKevin Wolf BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, 336365afd211SKevin Wolf QEMUIOVector *qiov, int flags) 3364c5fbe571SStefan Hajnoczi { 3365c5fbe571SStefan Hajnoczi BlockDriver *drv = bs->drv; 336628de2dcdSKevin Wolf bool waited; 33676b7cb247SStefan Hajnoczi int ret; 3368da1fa91dSKevin Wolf 3369b404f720SKevin Wolf int64_t sector_num = offset >> BDRV_SECTOR_BITS; 3370b404f720SKevin Wolf unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; 3371da1fa91dSKevin Wolf 3372b404f720SKevin Wolf assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 3373b404f720SKevin Wolf assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 33748eb029c2SKevin Wolf assert(!qiov || bytes == qiov->size); 3375cc0681c4SBenoît Canet 337628de2dcdSKevin Wolf waited = wait_serialising_requests(req); 337728de2dcdSKevin Wolf assert(!waited || !req->serialising); 3378af91f9a7SKevin Wolf assert(req->overlap_offset <= offset); 3379af91f9a7SKevin Wolf assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); 3380244eadefSKevin Wolf 338165afd211SKevin Wolf ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); 3382d616b224SStefan Hajnoczi 3383465bee1dSPeter Lieven if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && 3384465bee1dSPeter Lieven !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes && 3385465bee1dSPeter Lieven qemu_iovec_is_zero(qiov)) { 3386465bee1dSPeter Lieven flags |= BDRV_REQ_ZERO_WRITE; 3387465bee1dSPeter Lieven if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { 3388465bee1dSPeter Lieven flags |= BDRV_REQ_MAY_UNMAP; 3389465bee1dSPeter Lieven } 3390465bee1dSPeter Lieven } 3391465bee1dSPeter Lieven 3392d616b224SStefan Hajnoczi if (ret < 0) { 3393d616b224SStefan Hajnoczi /* Do nothing, write notifier decided to fail this request */ 3394d616b224SStefan Hajnoczi } else if (flags & BDRV_REQ_ZERO_WRITE) { 33959e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO); 3396aa7bfbffSPeter Lieven ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); 3397f08f2ddaSStefan Hajnoczi } else { 33989e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV); 33996b7cb247SStefan Hajnoczi ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); 3400f08f2ddaSStefan Hajnoczi } 34019e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE); 34026b7cb247SStefan Hajnoczi 3403f05fa4adSPaolo Bonzini if (ret == 0 && !bs->enable_write_cache) { 3404f05fa4adSPaolo Bonzini ret = bdrv_co_flush(bs); 3405f05fa4adSPaolo Bonzini } 3406f05fa4adSPaolo Bonzini 34071755da16SPaolo Bonzini bdrv_set_dirty(bs, sector_num, nb_sectors); 3408da1fa91dSKevin Wolf 34095366d0c8SBenoît Canet block_acct_highest_sector(&bs->stats, sector_num, nb_sectors); 34105e5a94b6SBenoît Canet 3411c0191e76SMax Reitz if (ret >= 0) { 3412df2a6f29SPaolo Bonzini bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); 3413df2a6f29SPaolo Bonzini } 3414da1fa91dSKevin Wolf 34156b7cb247SStefan Hajnoczi return ret; 3416da1fa91dSKevin Wolf } 3417da1fa91dSKevin Wolf 3418b404f720SKevin Wolf /* 3419b404f720SKevin Wolf * Handle a write request in coroutine context 3420b404f720SKevin Wolf */ 34216601553eSKevin Wolf static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, 34226601553eSKevin Wolf int64_t offset, unsigned int bytes, QEMUIOVector *qiov, 3423b404f720SKevin Wolf BdrvRequestFlags flags) 3424b404f720SKevin Wolf { 342565afd211SKevin Wolf BdrvTrackedRequest req; 3426fc3959e4SFam Zheng uint64_t align = bdrv_get_align(bs); 34273b8242e0SKevin Wolf uint8_t *head_buf = NULL; 34283b8242e0SKevin Wolf uint8_t *tail_buf = NULL; 34293b8242e0SKevin Wolf QEMUIOVector local_qiov; 34303b8242e0SKevin Wolf bool use_local_qiov = false; 3431b404f720SKevin Wolf int ret; 3432b404f720SKevin Wolf 3433b404f720SKevin Wolf if (!bs->drv) { 3434b404f720SKevin Wolf return -ENOMEDIUM; 3435b404f720SKevin Wolf } 3436b404f720SKevin Wolf if (bs->read_only) { 3437b404f720SKevin Wolf return -EACCES; 3438b404f720SKevin Wolf } 3439b9c64947SMax Reitz 3440b9c64947SMax Reitz ret = bdrv_check_byte_request(bs, offset, bytes); 3441b9c64947SMax Reitz if (ret < 0) { 3442b9c64947SMax Reitz return ret; 3443b404f720SKevin Wolf } 3444b404f720SKevin Wolf 3445b404f720SKevin Wolf /* throttling disk I/O */ 3446b404f720SKevin Wolf if (bs->io_limits_enabled) { 3447d5103588SKevin Wolf bdrv_io_limits_intercept(bs, bytes, true); 3448b404f720SKevin Wolf } 3449b404f720SKevin Wolf 34503b8242e0SKevin Wolf /* 34513b8242e0SKevin Wolf * Align write if necessary by performing a read-modify-write cycle. 34523b8242e0SKevin Wolf * Pad qiov with the read parts and be sure to have a tracked request not 34533b8242e0SKevin Wolf * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle. 34543b8242e0SKevin Wolf */ 345565afd211SKevin Wolf tracked_request_begin(&req, bs, offset, bytes, true); 34563b8242e0SKevin Wolf 34573b8242e0SKevin Wolf if (offset & (align - 1)) { 34583b8242e0SKevin Wolf QEMUIOVector head_qiov; 34593b8242e0SKevin Wolf struct iovec head_iov; 34603b8242e0SKevin Wolf 34613b8242e0SKevin Wolf mark_request_serialising(&req, align); 34623b8242e0SKevin Wolf wait_serialising_requests(&req); 34633b8242e0SKevin Wolf 34643b8242e0SKevin Wolf head_buf = qemu_blockalign(bs, align); 34653b8242e0SKevin Wolf head_iov = (struct iovec) { 34663b8242e0SKevin Wolf .iov_base = head_buf, 34673b8242e0SKevin Wolf .iov_len = align, 34683b8242e0SKevin Wolf }; 34693b8242e0SKevin Wolf qemu_iovec_init_external(&head_qiov, &head_iov, 1); 34703b8242e0SKevin Wolf 34719e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); 34723b8242e0SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, 34733b8242e0SKevin Wolf align, &head_qiov, 0); 34743b8242e0SKevin Wolf if (ret < 0) { 34753b8242e0SKevin Wolf goto fail; 34763b8242e0SKevin Wolf } 34779e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); 34783b8242e0SKevin Wolf 34793b8242e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 2); 34803b8242e0SKevin Wolf qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); 34813b8242e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 34823b8242e0SKevin Wolf use_local_qiov = true; 34833b8242e0SKevin Wolf 34843b8242e0SKevin Wolf bytes += offset & (align - 1); 34853b8242e0SKevin Wolf offset = offset & ~(align - 1); 34863b8242e0SKevin Wolf } 34873b8242e0SKevin Wolf 34883b8242e0SKevin Wolf if ((offset + bytes) & (align - 1)) { 34893b8242e0SKevin Wolf QEMUIOVector tail_qiov; 34903b8242e0SKevin Wolf struct iovec tail_iov; 34913b8242e0SKevin Wolf size_t tail_bytes; 349228de2dcdSKevin Wolf bool waited; 34933b8242e0SKevin Wolf 34943b8242e0SKevin Wolf mark_request_serialising(&req, align); 349528de2dcdSKevin Wolf waited = wait_serialising_requests(&req); 349628de2dcdSKevin Wolf assert(!waited || !use_local_qiov); 34973b8242e0SKevin Wolf 34983b8242e0SKevin Wolf tail_buf = qemu_blockalign(bs, align); 34993b8242e0SKevin Wolf tail_iov = (struct iovec) { 35003b8242e0SKevin Wolf .iov_base = tail_buf, 35013b8242e0SKevin Wolf .iov_len = align, 35023b8242e0SKevin Wolf }; 35033b8242e0SKevin Wolf qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); 35043b8242e0SKevin Wolf 35059e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); 35063b8242e0SKevin Wolf ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, 35073b8242e0SKevin Wolf align, &tail_qiov, 0); 35083b8242e0SKevin Wolf if (ret < 0) { 35093b8242e0SKevin Wolf goto fail; 35103b8242e0SKevin Wolf } 35119e1cb96dSKevin Wolf BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); 35123b8242e0SKevin Wolf 35133b8242e0SKevin Wolf if (!use_local_qiov) { 35143b8242e0SKevin Wolf qemu_iovec_init(&local_qiov, qiov->niov + 1); 35153b8242e0SKevin Wolf qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); 35163b8242e0SKevin Wolf use_local_qiov = true; 35173b8242e0SKevin Wolf } 35183b8242e0SKevin Wolf 35193b8242e0SKevin Wolf tail_bytes = (offset + bytes) & (align - 1); 35203b8242e0SKevin Wolf qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); 35213b8242e0SKevin Wolf 35223b8242e0SKevin Wolf bytes = ROUND_UP(bytes, align); 35233b8242e0SKevin Wolf } 35243b8242e0SKevin Wolf 3525fc3959e4SFam Zheng if (use_local_qiov) { 3526fc3959e4SFam Zheng /* Local buffer may have non-zero data. */ 3527fc3959e4SFam Zheng flags &= ~BDRV_REQ_ZERO_WRITE; 3528fc3959e4SFam Zheng } 35293b8242e0SKevin Wolf ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, 35303b8242e0SKevin Wolf use_local_qiov ? &local_qiov : qiov, 35313b8242e0SKevin Wolf flags); 35323b8242e0SKevin Wolf 35333b8242e0SKevin Wolf fail: 353465afd211SKevin Wolf tracked_request_end(&req); 3535b404f720SKevin Wolf 35363b8242e0SKevin Wolf if (use_local_qiov) { 35373b8242e0SKevin Wolf qemu_iovec_destroy(&local_qiov); 353899c4a85cSKevin Wolf } 35393b8242e0SKevin Wolf qemu_vfree(head_buf); 35403b8242e0SKevin Wolf qemu_vfree(tail_buf); 35413b8242e0SKevin Wolf 3542b404f720SKevin Wolf return ret; 3543b404f720SKevin Wolf } 3544b404f720SKevin Wolf 35456601553eSKevin Wolf static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, 35466601553eSKevin Wolf int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, 35476601553eSKevin Wolf BdrvRequestFlags flags) 35486601553eSKevin Wolf { 354975af1f34SPeter Lieven if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { 35506601553eSKevin Wolf return -EINVAL; 35516601553eSKevin Wolf } 35526601553eSKevin Wolf 35536601553eSKevin Wolf return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS, 35546601553eSKevin Wolf nb_sectors << BDRV_SECTOR_BITS, qiov, flags); 35556601553eSKevin Wolf } 35566601553eSKevin Wolf 3557c5fbe571SStefan Hajnoczi int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, 3558c5fbe571SStefan Hajnoczi int nb_sectors, QEMUIOVector *qiov) 3559c5fbe571SStefan Hajnoczi { 3560c5fbe571SStefan Hajnoczi trace_bdrv_co_writev(bs, sector_num, nb_sectors); 3561c5fbe571SStefan Hajnoczi 3562f08f2ddaSStefan Hajnoczi return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0); 3563f08f2ddaSStefan Hajnoczi } 3564f08f2ddaSStefan Hajnoczi 3565f08f2ddaSStefan Hajnoczi int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, 3566aa7bfbffSPeter Lieven int64_t sector_num, int nb_sectors, 3567aa7bfbffSPeter Lieven BdrvRequestFlags flags) 3568f08f2ddaSStefan Hajnoczi { 3569fc3959e4SFam Zheng int ret; 3570fc3959e4SFam Zheng 357194d6ff21SPaolo Bonzini trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); 3572f08f2ddaSStefan Hajnoczi 3573d32f35cbSPeter Lieven if (!(bs->open_flags & BDRV_O_UNMAP)) { 3574d32f35cbSPeter Lieven flags &= ~BDRV_REQ_MAY_UNMAP; 3575d32f35cbSPeter Lieven } 3576fc3959e4SFam Zheng if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS, 3577fc3959e4SFam Zheng nb_sectors << BDRV_SECTOR_BITS)) { 3578fc3959e4SFam Zheng ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, 3579aa7bfbffSPeter Lieven BDRV_REQ_ZERO_WRITE | flags); 3580fc3959e4SFam Zheng } else { 3581fc3959e4SFam Zheng uint8_t *buf; 3582fc3959e4SFam Zheng QEMUIOVector local_qiov; 3583fc3959e4SFam Zheng size_t bytes = nb_sectors << BDRV_SECTOR_BITS; 3584fc3959e4SFam Zheng 3585fc3959e4SFam Zheng buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes); 3586fc3959e4SFam Zheng memset(buf, 0, bytes); 3587fc3959e4SFam Zheng qemu_iovec_init(&local_qiov, 1); 3588fc3959e4SFam Zheng qemu_iovec_add(&local_qiov, buf, bytes); 3589fc3959e4SFam Zheng 3590fc3959e4SFam Zheng ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov, 3591fc3959e4SFam Zheng BDRV_REQ_ZERO_WRITE | flags); 3592fc3959e4SFam Zheng qemu_vfree(buf); 3593fc3959e4SFam Zheng } 3594fc3959e4SFam Zheng return ret; 3595c5fbe571SStefan Hajnoczi } 3596c5fbe571SStefan Hajnoczi 359783f64091Sbellard /** 359883f64091Sbellard * Truncate file to 'offset' bytes (needed only for file protocols) 359983f64091Sbellard */ 360083f64091Sbellard int bdrv_truncate(BlockDriverState *bs, int64_t offset) 360183f64091Sbellard { 360283f64091Sbellard BlockDriver *drv = bs->drv; 360351762288SStefan Hajnoczi int ret; 360483f64091Sbellard if (!drv) 360519cb3738Sbellard return -ENOMEDIUM; 360683f64091Sbellard if (!drv->bdrv_truncate) 360783f64091Sbellard return -ENOTSUP; 360859f2689dSNaphtali Sprei if (bs->read_only) 360959f2689dSNaphtali Sprei return -EACCES; 36109c75e168SJeff Cody 361151762288SStefan Hajnoczi ret = drv->bdrv_truncate(bs, offset); 361251762288SStefan Hajnoczi if (ret == 0) { 361351762288SStefan Hajnoczi ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); 3614*ce1ffea8SJohn Snow bdrv_dirty_bitmap_truncate(bs); 3615a7f53e26SMarkus Armbruster if (bs->blk) { 3616a7f53e26SMarkus Armbruster blk_dev_resize_cb(bs->blk); 3617a7f53e26SMarkus Armbruster } 361851762288SStefan Hajnoczi } 361951762288SStefan Hajnoczi return ret; 362083f64091Sbellard } 362183f64091Sbellard 362283f64091Sbellard /** 36234a1d5e1fSFam Zheng * Length of a allocated file in bytes. Sparse files are counted by actual 36244a1d5e1fSFam Zheng * allocated space. Return < 0 if error or unknown. 36254a1d5e1fSFam Zheng */ 36264a1d5e1fSFam Zheng int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) 36274a1d5e1fSFam Zheng { 36284a1d5e1fSFam Zheng BlockDriver *drv = bs->drv; 36294a1d5e1fSFam Zheng if (!drv) { 36304a1d5e1fSFam Zheng return -ENOMEDIUM; 36314a1d5e1fSFam Zheng } 36324a1d5e1fSFam Zheng if (drv->bdrv_get_allocated_file_size) { 36334a1d5e1fSFam Zheng return drv->bdrv_get_allocated_file_size(bs); 36344a1d5e1fSFam Zheng } 36354a1d5e1fSFam Zheng if (bs->file) { 36364a1d5e1fSFam Zheng return bdrv_get_allocated_file_size(bs->file); 36374a1d5e1fSFam Zheng } 36384a1d5e1fSFam Zheng return -ENOTSUP; 36394a1d5e1fSFam Zheng } 36404a1d5e1fSFam Zheng 36414a1d5e1fSFam Zheng /** 364265a9bb25SMarkus Armbruster * Return number of sectors on success, -errno on error. 364383f64091Sbellard */ 364465a9bb25SMarkus Armbruster int64_t bdrv_nb_sectors(BlockDriverState *bs) 364583f64091Sbellard { 364683f64091Sbellard BlockDriver *drv = bs->drv; 364765a9bb25SMarkus Armbruster 364883f64091Sbellard if (!drv) 364919cb3738Sbellard return -ENOMEDIUM; 365051762288SStefan Hajnoczi 3651b94a2610SKevin Wolf if (drv->has_variable_length) { 3652b94a2610SKevin Wolf int ret = refresh_total_sectors(bs, bs->total_sectors); 3653b94a2610SKevin Wolf if (ret < 0) { 3654b94a2610SKevin Wolf return ret; 3655fc01f7e7Sbellard } 365646a4e4e6SStefan Hajnoczi } 365765a9bb25SMarkus Armbruster return bs->total_sectors; 365865a9bb25SMarkus Armbruster } 365965a9bb25SMarkus Armbruster 366065a9bb25SMarkus Armbruster /** 366165a9bb25SMarkus Armbruster * Return length in bytes on success, -errno on error. 366265a9bb25SMarkus Armbruster * The length is always a multiple of BDRV_SECTOR_SIZE. 366365a9bb25SMarkus Armbruster */ 366465a9bb25SMarkus Armbruster int64_t bdrv_getlength(BlockDriverState *bs) 366565a9bb25SMarkus Armbruster { 366665a9bb25SMarkus Armbruster int64_t ret = bdrv_nb_sectors(bs); 366765a9bb25SMarkus Armbruster 366865a9bb25SMarkus Armbruster return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; 366946a4e4e6SStefan Hajnoczi } 3670fc01f7e7Sbellard 367119cb3738Sbellard /* return 0 as number of sectors if no device present or error */ 367296b8f136Sths void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) 3673fc01f7e7Sbellard { 367465a9bb25SMarkus Armbruster int64_t nb_sectors = bdrv_nb_sectors(bs); 367565a9bb25SMarkus Armbruster 367665a9bb25SMarkus Armbruster *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; 3677fc01f7e7Sbellard } 3678cf98951bSbellard 3679ff06f5f3SPaolo Bonzini void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, 3680ff06f5f3SPaolo Bonzini BlockdevOnError on_write_error) 3681abd7f68dSMarkus Armbruster { 3682abd7f68dSMarkus Armbruster bs->on_read_error = on_read_error; 3683abd7f68dSMarkus Armbruster bs->on_write_error = on_write_error; 3684abd7f68dSMarkus Armbruster } 3685abd7f68dSMarkus Armbruster 36861ceee0d5SPaolo Bonzini BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) 3687abd7f68dSMarkus Armbruster { 3688abd7f68dSMarkus Armbruster return is_read ? bs->on_read_error : bs->on_write_error; 3689abd7f68dSMarkus Armbruster } 3690abd7f68dSMarkus Armbruster 36913e1caa5fSPaolo Bonzini BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) 36923e1caa5fSPaolo Bonzini { 36933e1caa5fSPaolo Bonzini BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; 36943e1caa5fSPaolo Bonzini 36953e1caa5fSPaolo Bonzini switch (on_err) { 36963e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_ENOSPC: 3697a589569fSWenchao Xia return (error == ENOSPC) ? 3698a589569fSWenchao Xia BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; 36993e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_STOP: 3700a589569fSWenchao Xia return BLOCK_ERROR_ACTION_STOP; 37013e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_REPORT: 3702a589569fSWenchao Xia return BLOCK_ERROR_ACTION_REPORT; 37033e1caa5fSPaolo Bonzini case BLOCKDEV_ON_ERROR_IGNORE: 3704a589569fSWenchao Xia return BLOCK_ERROR_ACTION_IGNORE; 37053e1caa5fSPaolo Bonzini default: 37063e1caa5fSPaolo Bonzini abort(); 37073e1caa5fSPaolo Bonzini } 37083e1caa5fSPaolo Bonzini } 37093e1caa5fSPaolo Bonzini 3710c7c2ff0cSLuiz Capitulino static void send_qmp_error_event(BlockDriverState *bs, 3711c7c2ff0cSLuiz Capitulino BlockErrorAction action, 3712c7c2ff0cSLuiz Capitulino bool is_read, int error) 3713c7c2ff0cSLuiz Capitulino { 3714573742a5SPeter Maydell IoOperationType optype; 3715c7c2ff0cSLuiz Capitulino 3716573742a5SPeter Maydell optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; 3717573742a5SPeter Maydell qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action, 3718c7c2ff0cSLuiz Capitulino bdrv_iostatus_is_enabled(bs), 3719624ff573SLuiz Capitulino error == ENOSPC, strerror(error), 3720624ff573SLuiz Capitulino &error_abort); 3721c7c2ff0cSLuiz Capitulino } 3722c7c2ff0cSLuiz Capitulino 37233e1caa5fSPaolo Bonzini /* This is done by device models because, while the block layer knows 37243e1caa5fSPaolo Bonzini * about the error, it does not know whether an operation comes from 37253e1caa5fSPaolo Bonzini * the device or the block layer (from a job, for example). 37263e1caa5fSPaolo Bonzini */ 37273e1caa5fSPaolo Bonzini void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, 37283e1caa5fSPaolo Bonzini bool is_read, int error) 37293e1caa5fSPaolo Bonzini { 37303e1caa5fSPaolo Bonzini assert(error >= 0); 37312bd3bce8SPaolo Bonzini 3732a589569fSWenchao Xia if (action == BLOCK_ERROR_ACTION_STOP) { 37332bd3bce8SPaolo Bonzini /* First set the iostatus, so that "info block" returns an iostatus 37342bd3bce8SPaolo Bonzini * that matches the events raised so far (an additional error iostatus 37352bd3bce8SPaolo Bonzini * is fine, but not a lost one). 37362bd3bce8SPaolo Bonzini */ 37373e1caa5fSPaolo Bonzini bdrv_iostatus_set_err(bs, error); 37382bd3bce8SPaolo Bonzini 37392bd3bce8SPaolo Bonzini /* Then raise the request to stop the VM and the event. 37402bd3bce8SPaolo Bonzini * qemu_system_vmstop_request_prepare has two effects. First, 37412bd3bce8SPaolo Bonzini * it ensures that the STOP event always comes after the 37422bd3bce8SPaolo Bonzini * BLOCK_IO_ERROR event. Second, it ensures that even if management 37432bd3bce8SPaolo Bonzini * can observe the STOP event and do a "cont" before the STOP 37442bd3bce8SPaolo Bonzini * event is issued, the VM will not stop. In this case, vm_start() 37452bd3bce8SPaolo Bonzini * also ensures that the STOP/RESUME pair of events is emitted. 37462bd3bce8SPaolo Bonzini */ 37472bd3bce8SPaolo Bonzini qemu_system_vmstop_request_prepare(); 3748c7c2ff0cSLuiz Capitulino send_qmp_error_event(bs, action, is_read, error); 37492bd3bce8SPaolo Bonzini qemu_system_vmstop_request(RUN_STATE_IO_ERROR); 37502bd3bce8SPaolo Bonzini } else { 3751c7c2ff0cSLuiz Capitulino send_qmp_error_event(bs, action, is_read, error); 37523e1caa5fSPaolo Bonzini } 37533e1caa5fSPaolo Bonzini } 37543e1caa5fSPaolo Bonzini 3755b338082bSbellard int bdrv_is_read_only(BlockDriverState *bs) 3756b338082bSbellard { 3757b338082bSbellard return bs->read_only; 3758b338082bSbellard } 3759b338082bSbellard 3760985a03b0Sths int bdrv_is_sg(BlockDriverState *bs) 3761985a03b0Sths { 3762985a03b0Sths return bs->sg; 3763985a03b0Sths } 3764985a03b0Sths 3765e900a7b7SChristoph Hellwig int bdrv_enable_write_cache(BlockDriverState *bs) 3766e900a7b7SChristoph Hellwig { 3767e900a7b7SChristoph Hellwig return bs->enable_write_cache; 3768e900a7b7SChristoph Hellwig } 3769e900a7b7SChristoph Hellwig 3770425b0148SPaolo Bonzini void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce) 3771425b0148SPaolo Bonzini { 3772425b0148SPaolo Bonzini bs->enable_write_cache = wce; 377355b110f2SJeff Cody 377455b110f2SJeff Cody /* so a reopen() will preserve wce */ 377555b110f2SJeff Cody if (wce) { 377655b110f2SJeff Cody bs->open_flags |= BDRV_O_CACHE_WB; 377755b110f2SJeff Cody } else { 377855b110f2SJeff Cody bs->open_flags &= ~BDRV_O_CACHE_WB; 377955b110f2SJeff Cody } 3780425b0148SPaolo Bonzini } 3781425b0148SPaolo Bonzini 3782ea2384d3Sbellard int bdrv_is_encrypted(BlockDriverState *bs) 3783ea2384d3Sbellard { 3784ea2384d3Sbellard if (bs->backing_hd && bs->backing_hd->encrypted) 3785ea2384d3Sbellard return 1; 3786ea2384d3Sbellard return bs->encrypted; 3787ea2384d3Sbellard } 3788ea2384d3Sbellard 3789c0f4ce77Saliguori int bdrv_key_required(BlockDriverState *bs) 3790c0f4ce77Saliguori { 3791c0f4ce77Saliguori BlockDriverState *backing_hd = bs->backing_hd; 3792c0f4ce77Saliguori 3793c0f4ce77Saliguori if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key) 3794c0f4ce77Saliguori return 1; 3795c0f4ce77Saliguori return (bs->encrypted && !bs->valid_key); 3796c0f4ce77Saliguori } 3797c0f4ce77Saliguori 3798ea2384d3Sbellard int bdrv_set_key(BlockDriverState *bs, const char *key) 3799ea2384d3Sbellard { 3800ea2384d3Sbellard int ret; 3801ea2384d3Sbellard if (bs->backing_hd && bs->backing_hd->encrypted) { 3802ea2384d3Sbellard ret = bdrv_set_key(bs->backing_hd, key); 3803ea2384d3Sbellard if (ret < 0) 3804ea2384d3Sbellard return ret; 3805ea2384d3Sbellard if (!bs->encrypted) 3806ea2384d3Sbellard return 0; 3807ea2384d3Sbellard } 3808fd04a2aeSShahar Havivi if (!bs->encrypted) { 3809fd04a2aeSShahar Havivi return -EINVAL; 3810fd04a2aeSShahar Havivi } else if (!bs->drv || !bs->drv->bdrv_set_key) { 3811fd04a2aeSShahar Havivi return -ENOMEDIUM; 3812fd04a2aeSShahar Havivi } 3813c0f4ce77Saliguori ret = bs->drv->bdrv_set_key(bs, key); 3814bb5fc20fSaliguori if (ret < 0) { 3815bb5fc20fSaliguori bs->valid_key = 0; 3816bb5fc20fSaliguori } else if (!bs->valid_key) { 3817bb5fc20fSaliguori bs->valid_key = 1; 3818a7f53e26SMarkus Armbruster if (bs->blk) { 3819bb5fc20fSaliguori /* call the change callback now, we skipped it on open */ 3820a7f53e26SMarkus Armbruster blk_dev_change_media_cb(bs->blk, true); 3821a7f53e26SMarkus Armbruster } 3822bb5fc20fSaliguori } 3823c0f4ce77Saliguori return ret; 3824ea2384d3Sbellard } 3825ea2384d3Sbellard 38264d2855a3SMarkus Armbruster /* 38274d2855a3SMarkus Armbruster * Provide an encryption key for @bs. 38284d2855a3SMarkus Armbruster * If @key is non-null: 38294d2855a3SMarkus Armbruster * If @bs is not encrypted, fail. 38304d2855a3SMarkus Armbruster * Else if the key is invalid, fail. 38314d2855a3SMarkus Armbruster * Else set @bs's key to @key, replacing the existing key, if any. 38324d2855a3SMarkus Armbruster * If @key is null: 38334d2855a3SMarkus Armbruster * If @bs is encrypted and still lacks a key, fail. 38344d2855a3SMarkus Armbruster * Else do nothing. 38354d2855a3SMarkus Armbruster * On failure, store an error object through @errp if non-null. 38364d2855a3SMarkus Armbruster */ 38374d2855a3SMarkus Armbruster void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) 38384d2855a3SMarkus Armbruster { 38394d2855a3SMarkus Armbruster if (key) { 38404d2855a3SMarkus Armbruster if (!bdrv_is_encrypted(bs)) { 384181e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is not encrypted", 384281e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs)); 38434d2855a3SMarkus Armbruster } else if (bdrv_set_key(bs, key) < 0) { 38444d2855a3SMarkus Armbruster error_set(errp, QERR_INVALID_PASSWORD); 38454d2855a3SMarkus Armbruster } 38464d2855a3SMarkus Armbruster } else { 38474d2855a3SMarkus Armbruster if (bdrv_key_required(bs)) { 3848b1ca6391SMarkus Armbruster error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED, 3849b1ca6391SMarkus Armbruster "'%s' (%s) is encrypted", 385081e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs), 38514d2855a3SMarkus Armbruster bdrv_get_encrypted_filename(bs)); 38524d2855a3SMarkus Armbruster } 38534d2855a3SMarkus Armbruster } 38544d2855a3SMarkus Armbruster } 38554d2855a3SMarkus Armbruster 3856f8d6bba1SMarkus Armbruster const char *bdrv_get_format_name(BlockDriverState *bs) 3857ea2384d3Sbellard { 3858f8d6bba1SMarkus Armbruster return bs->drv ? bs->drv->format_name : NULL; 3859ea2384d3Sbellard } 3860ea2384d3Sbellard 3861ada42401SStefan Hajnoczi static int qsort_strcmp(const void *a, const void *b) 3862ada42401SStefan Hajnoczi { 3863ada42401SStefan Hajnoczi return strcmp(a, b); 3864ada42401SStefan Hajnoczi } 3865ada42401SStefan Hajnoczi 3866ea2384d3Sbellard void bdrv_iterate_format(void (*it)(void *opaque, const char *name), 3867ea2384d3Sbellard void *opaque) 3868ea2384d3Sbellard { 3869ea2384d3Sbellard BlockDriver *drv; 3870e855e4fbSJeff Cody int count = 0; 3871ada42401SStefan Hajnoczi int i; 3872e855e4fbSJeff Cody const char **formats = NULL; 3873ea2384d3Sbellard 38748a22f02aSStefan Hajnoczi QLIST_FOREACH(drv, &bdrv_drivers, list) { 3875e855e4fbSJeff Cody if (drv->format_name) { 3876e855e4fbSJeff Cody bool found = false; 3877e855e4fbSJeff Cody int i = count; 3878e855e4fbSJeff Cody while (formats && i && !found) { 3879e855e4fbSJeff Cody found = !strcmp(formats[--i], drv->format_name); 3880e855e4fbSJeff Cody } 3881e855e4fbSJeff Cody 3882e855e4fbSJeff Cody if (!found) { 38835839e53bSMarkus Armbruster formats = g_renew(const char *, formats, count + 1); 3884e855e4fbSJeff Cody formats[count++] = drv->format_name; 3885ea2384d3Sbellard } 3886ea2384d3Sbellard } 3887e855e4fbSJeff Cody } 3888ada42401SStefan Hajnoczi 3889ada42401SStefan Hajnoczi qsort(formats, count, sizeof(formats[0]), qsort_strcmp); 3890ada42401SStefan Hajnoczi 3891ada42401SStefan Hajnoczi for (i = 0; i < count; i++) { 3892ada42401SStefan Hajnoczi it(opaque, formats[i]); 3893ada42401SStefan Hajnoczi } 3894ada42401SStefan Hajnoczi 3895e855e4fbSJeff Cody g_free(formats); 3896e855e4fbSJeff Cody } 3897ea2384d3Sbellard 3898dc364f4cSBenoît Canet /* This function is to find a node in the bs graph */ 3899dc364f4cSBenoît Canet BlockDriverState *bdrv_find_node(const char *node_name) 3900dc364f4cSBenoît Canet { 3901dc364f4cSBenoît Canet BlockDriverState *bs; 3902dc364f4cSBenoît Canet 3903dc364f4cSBenoît Canet assert(node_name); 3904dc364f4cSBenoît Canet 3905dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 3906dc364f4cSBenoît Canet if (!strcmp(node_name, bs->node_name)) { 3907dc364f4cSBenoît Canet return bs; 3908dc364f4cSBenoît Canet } 3909dc364f4cSBenoît Canet } 3910dc364f4cSBenoît Canet return NULL; 3911dc364f4cSBenoît Canet } 3912dc364f4cSBenoît Canet 3913c13163fbSBenoît Canet /* Put this QMP function here so it can access the static graph_bdrv_states. */ 3914d5a8ee60SAlberto Garcia BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp) 3915c13163fbSBenoît Canet { 3916c13163fbSBenoît Canet BlockDeviceInfoList *list, *entry; 3917c13163fbSBenoît Canet BlockDriverState *bs; 3918c13163fbSBenoît Canet 3919c13163fbSBenoît Canet list = NULL; 3920c13163fbSBenoît Canet QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { 3921d5a8ee60SAlberto Garcia BlockDeviceInfo *info = bdrv_block_device_info(bs, errp); 3922d5a8ee60SAlberto Garcia if (!info) { 3923d5a8ee60SAlberto Garcia qapi_free_BlockDeviceInfoList(list); 3924d5a8ee60SAlberto Garcia return NULL; 3925d5a8ee60SAlberto Garcia } 3926c13163fbSBenoît Canet entry = g_malloc0(sizeof(*entry)); 3927d5a8ee60SAlberto Garcia entry->value = info; 3928c13163fbSBenoît Canet entry->next = list; 3929c13163fbSBenoît Canet list = entry; 3930c13163fbSBenoît Canet } 3931c13163fbSBenoît Canet 3932c13163fbSBenoît Canet return list; 3933c13163fbSBenoît Canet } 3934c13163fbSBenoît Canet 393512d3ba82SBenoît Canet BlockDriverState *bdrv_lookup_bs(const char *device, 393612d3ba82SBenoît Canet const char *node_name, 393712d3ba82SBenoît Canet Error **errp) 393812d3ba82SBenoît Canet { 39397f06d47eSMarkus Armbruster BlockBackend *blk; 39407f06d47eSMarkus Armbruster BlockDriverState *bs; 394112d3ba82SBenoît Canet 394212d3ba82SBenoît Canet if (device) { 39437f06d47eSMarkus Armbruster blk = blk_by_name(device); 394412d3ba82SBenoît Canet 39457f06d47eSMarkus Armbruster if (blk) { 39467f06d47eSMarkus Armbruster return blk_bs(blk); 394712d3ba82SBenoît Canet } 3948dd67fa50SBenoît Canet } 394912d3ba82SBenoît Canet 3950dd67fa50SBenoît Canet if (node_name) { 395112d3ba82SBenoît Canet bs = bdrv_find_node(node_name); 395212d3ba82SBenoît Canet 3953dd67fa50SBenoît Canet if (bs) { 3954dd67fa50SBenoît Canet return bs; 3955dd67fa50SBenoît Canet } 395612d3ba82SBenoît Canet } 395712d3ba82SBenoît Canet 3958dd67fa50SBenoît Canet error_setg(errp, "Cannot find device=%s nor node_name=%s", 3959dd67fa50SBenoît Canet device ? device : "", 3960dd67fa50SBenoît Canet node_name ? node_name : ""); 3961dd67fa50SBenoît Canet return NULL; 396212d3ba82SBenoît Canet } 396312d3ba82SBenoît Canet 39645a6684d2SJeff Cody /* If 'base' is in the same chain as 'top', return true. Otherwise, 39655a6684d2SJeff Cody * return false. If either argument is NULL, return false. */ 39665a6684d2SJeff Cody bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) 39675a6684d2SJeff Cody { 39685a6684d2SJeff Cody while (top && top != base) { 39695a6684d2SJeff Cody top = top->backing_hd; 39705a6684d2SJeff Cody } 39715a6684d2SJeff Cody 39725a6684d2SJeff Cody return top != NULL; 39735a6684d2SJeff Cody } 39745a6684d2SJeff Cody 397504df765aSFam Zheng BlockDriverState *bdrv_next_node(BlockDriverState *bs) 397604df765aSFam Zheng { 397704df765aSFam Zheng if (!bs) { 397804df765aSFam Zheng return QTAILQ_FIRST(&graph_bdrv_states); 397904df765aSFam Zheng } 398004df765aSFam Zheng return QTAILQ_NEXT(bs, node_list); 398104df765aSFam Zheng } 398204df765aSFam Zheng 39832f399b0aSMarkus Armbruster BlockDriverState *bdrv_next(BlockDriverState *bs) 39842f399b0aSMarkus Armbruster { 39852f399b0aSMarkus Armbruster if (!bs) { 39862f399b0aSMarkus Armbruster return QTAILQ_FIRST(&bdrv_states); 39872f399b0aSMarkus Armbruster } 3988dc364f4cSBenoît Canet return QTAILQ_NEXT(bs, device_list); 39892f399b0aSMarkus Armbruster } 39902f399b0aSMarkus Armbruster 399120a9e77dSFam Zheng const char *bdrv_get_node_name(const BlockDriverState *bs) 399220a9e77dSFam Zheng { 399320a9e77dSFam Zheng return bs->node_name; 399420a9e77dSFam Zheng } 399520a9e77dSFam Zheng 39967f06d47eSMarkus Armbruster /* TODO check what callers really want: bs->node_name or blk_name() */ 3997bfb197e0SMarkus Armbruster const char *bdrv_get_device_name(const BlockDriverState *bs) 3998ea2384d3Sbellard { 3999bfb197e0SMarkus Armbruster return bs->blk ? blk_name(bs->blk) : ""; 4000ea2384d3Sbellard } 4001ea2384d3Sbellard 40029b2aa84fSAlberto Garcia /* This can be used to identify nodes that might not have a device 40039b2aa84fSAlberto Garcia * name associated. Since node and device names live in the same 40049b2aa84fSAlberto Garcia * namespace, the result is unambiguous. The exception is if both are 40059b2aa84fSAlberto Garcia * absent, then this returns an empty (non-null) string. */ 40069b2aa84fSAlberto Garcia const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) 40079b2aa84fSAlberto Garcia { 40089b2aa84fSAlberto Garcia return bs->blk ? blk_name(bs->blk) : bs->node_name; 40099b2aa84fSAlberto Garcia } 40109b2aa84fSAlberto Garcia 4011c8433287SMarkus Armbruster int bdrv_get_flags(BlockDriverState *bs) 4012c8433287SMarkus Armbruster { 4013c8433287SMarkus Armbruster return bs->open_flags; 4014c8433287SMarkus Armbruster } 4015c8433287SMarkus Armbruster 4016f0f0fdfeSKevin Wolf int bdrv_flush_all(void) 4017c6ca28d6Saliguori { 4018c6ca28d6Saliguori BlockDriverState *bs; 4019f0f0fdfeSKevin Wolf int result = 0; 4020c6ca28d6Saliguori 4021dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 4022ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 4023ed78cda3SStefan Hajnoczi int ret; 4024ed78cda3SStefan Hajnoczi 4025ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 4026ed78cda3SStefan Hajnoczi ret = bdrv_flush(bs); 4027f0f0fdfeSKevin Wolf if (ret < 0 && !result) { 4028f0f0fdfeSKevin Wolf result = ret; 4029c6ca28d6Saliguori } 4030ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 40311b7bdbc1SStefan Hajnoczi } 4032c6ca28d6Saliguori 4033f0f0fdfeSKevin Wolf return result; 4034f0f0fdfeSKevin Wolf } 4035f0f0fdfeSKevin Wolf 40363ac21627SPeter Lieven int bdrv_has_zero_init_1(BlockDriverState *bs) 40373ac21627SPeter Lieven { 40383ac21627SPeter Lieven return 1; 40393ac21627SPeter Lieven } 40403ac21627SPeter Lieven 4041f2feebbdSKevin Wolf int bdrv_has_zero_init(BlockDriverState *bs) 4042f2feebbdSKevin Wolf { 4043f2feebbdSKevin Wolf assert(bs->drv); 4044f2feebbdSKevin Wolf 404511212d8fSPaolo Bonzini /* If BS is a copy on write image, it is initialized to 404611212d8fSPaolo Bonzini the contents of the base image, which may not be zeroes. */ 404711212d8fSPaolo Bonzini if (bs->backing_hd) { 404811212d8fSPaolo Bonzini return 0; 404911212d8fSPaolo Bonzini } 4050336c1c12SKevin Wolf if (bs->drv->bdrv_has_zero_init) { 4051336c1c12SKevin Wolf return bs->drv->bdrv_has_zero_init(bs); 4052f2feebbdSKevin Wolf } 4053f2feebbdSKevin Wolf 40543ac21627SPeter Lieven /* safe default */ 40553ac21627SPeter Lieven return 0; 4056f2feebbdSKevin Wolf } 4057f2feebbdSKevin Wolf 40584ce78691SPeter Lieven bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs) 40594ce78691SPeter Lieven { 40604ce78691SPeter Lieven BlockDriverInfo bdi; 40614ce78691SPeter Lieven 40624ce78691SPeter Lieven if (bs->backing_hd) { 40634ce78691SPeter Lieven return false; 40644ce78691SPeter Lieven } 40654ce78691SPeter Lieven 40664ce78691SPeter Lieven if (bdrv_get_info(bs, &bdi) == 0) { 40674ce78691SPeter Lieven return bdi.unallocated_blocks_are_zero; 40684ce78691SPeter Lieven } 40694ce78691SPeter Lieven 40704ce78691SPeter Lieven return false; 40714ce78691SPeter Lieven } 40724ce78691SPeter Lieven 40734ce78691SPeter Lieven bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) 40744ce78691SPeter Lieven { 40754ce78691SPeter Lieven BlockDriverInfo bdi; 40764ce78691SPeter Lieven 40774ce78691SPeter Lieven if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) { 40784ce78691SPeter Lieven return false; 40794ce78691SPeter Lieven } 40804ce78691SPeter Lieven 40814ce78691SPeter Lieven if (bdrv_get_info(bs, &bdi) == 0) { 40824ce78691SPeter Lieven return bdi.can_write_zeroes_with_unmap; 40834ce78691SPeter Lieven } 40844ce78691SPeter Lieven 40854ce78691SPeter Lieven return false; 40864ce78691SPeter Lieven } 40874ce78691SPeter Lieven 4088b6b8a333SPaolo Bonzini typedef struct BdrvCoGetBlockStatusData { 4089376ae3f1SStefan Hajnoczi BlockDriverState *bs; 4090b35b2bbaSMiroslav Rezanina BlockDriverState *base; 4091376ae3f1SStefan Hajnoczi int64_t sector_num; 4092376ae3f1SStefan Hajnoczi int nb_sectors; 4093376ae3f1SStefan Hajnoczi int *pnum; 4094b6b8a333SPaolo Bonzini int64_t ret; 4095376ae3f1SStefan Hajnoczi bool done; 4096b6b8a333SPaolo Bonzini } BdrvCoGetBlockStatusData; 4097376ae3f1SStefan Hajnoczi 4098f58c7b35Sths /* 4099705be728SFam Zheng * Returns the allocation status of the specified sectors. 4100705be728SFam Zheng * Drivers not implementing the functionality are assumed to not support 4101705be728SFam Zheng * backing files, hence all their sectors are reported as allocated. 4102f58c7b35Sths * 4103bd9533e3SStefan Hajnoczi * If 'sector_num' is beyond the end of the disk image the return value is 0 4104bd9533e3SStefan Hajnoczi * and 'pnum' is set to 0. 4105bd9533e3SStefan Hajnoczi * 4106f58c7b35Sths * 'pnum' is set to the number of sectors (including and immediately following 4107f58c7b35Sths * the specified sector) that are known to be in the same 4108f58c7b35Sths * allocated/unallocated state. 4109f58c7b35Sths * 4110bd9533e3SStefan Hajnoczi * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes 4111bd9533e3SStefan Hajnoczi * beyond the end of the disk image it will be clamped. 4112f58c7b35Sths */ 4113b6b8a333SPaolo Bonzini static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, 4114bdad13b9SPaolo Bonzini int64_t sector_num, 4115060f51c9SStefan Hajnoczi int nb_sectors, int *pnum) 4116f58c7b35Sths { 411730a7f2fcSMarkus Armbruster int64_t total_sectors; 4118f58c7b35Sths int64_t n; 41195daa74a6SPaolo Bonzini int64_t ret, ret2; 4120bd9533e3SStefan Hajnoczi 412130a7f2fcSMarkus Armbruster total_sectors = bdrv_nb_sectors(bs); 412230a7f2fcSMarkus Armbruster if (total_sectors < 0) { 412330a7f2fcSMarkus Armbruster return total_sectors; 4124617ccb46SPaolo Bonzini } 4125617ccb46SPaolo Bonzini 412630a7f2fcSMarkus Armbruster if (sector_num >= total_sectors) { 41276aebab14SStefan Hajnoczi *pnum = 0; 41286aebab14SStefan Hajnoczi return 0; 41296aebab14SStefan Hajnoczi } 4130bd9533e3SStefan Hajnoczi 413130a7f2fcSMarkus Armbruster n = total_sectors - sector_num; 4132bd9533e3SStefan Hajnoczi if (n < nb_sectors) { 4133bd9533e3SStefan Hajnoczi nb_sectors = n; 4134bd9533e3SStefan Hajnoczi } 4135bd9533e3SStefan Hajnoczi 4136b6b8a333SPaolo Bonzini if (!bs->drv->bdrv_co_get_block_status) { 4137bd9533e3SStefan Hajnoczi *pnum = nb_sectors; 4138e88ae226SKevin Wolf ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; 4139918e92d7SPaolo Bonzini if (bs->drv->protocol_name) { 4140918e92d7SPaolo Bonzini ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE); 4141918e92d7SPaolo Bonzini } 4142918e92d7SPaolo Bonzini return ret; 41436aebab14SStefan Hajnoczi } 41446aebab14SStefan Hajnoczi 4145415b5b01SPaolo Bonzini ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum); 4146415b5b01SPaolo Bonzini if (ret < 0) { 41473e0a233dSPeter Lieven *pnum = 0; 4148415b5b01SPaolo Bonzini return ret; 4149415b5b01SPaolo Bonzini } 4150415b5b01SPaolo Bonzini 415192bc50a5SPeter Lieven if (ret & BDRV_BLOCK_RAW) { 415292bc50a5SPeter Lieven assert(ret & BDRV_BLOCK_OFFSET_VALID); 415392bc50a5SPeter Lieven return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, 415492bc50a5SPeter Lieven *pnum, pnum); 415592bc50a5SPeter Lieven } 415692bc50a5SPeter Lieven 4157e88ae226SKevin Wolf if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { 4158e88ae226SKevin Wolf ret |= BDRV_BLOCK_ALLOCATED; 4159e88ae226SKevin Wolf } 4160e88ae226SKevin Wolf 4161c3d86884SPeter Lieven if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) { 4162c3d86884SPeter Lieven if (bdrv_unallocated_blocks_are_zero(bs)) { 4163415b5b01SPaolo Bonzini ret |= BDRV_BLOCK_ZERO; 41641f9db224SPeter Lieven } else if (bs->backing_hd) { 4165f0ad5712SPaolo Bonzini BlockDriverState *bs2 = bs->backing_hd; 416630a7f2fcSMarkus Armbruster int64_t nb_sectors2 = bdrv_nb_sectors(bs2); 416730a7f2fcSMarkus Armbruster if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) { 4168f0ad5712SPaolo Bonzini ret |= BDRV_BLOCK_ZERO; 4169f0ad5712SPaolo Bonzini } 4170f0ad5712SPaolo Bonzini } 4171415b5b01SPaolo Bonzini } 41725daa74a6SPaolo Bonzini 41735daa74a6SPaolo Bonzini if (bs->file && 41745daa74a6SPaolo Bonzini (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && 41755daa74a6SPaolo Bonzini (ret & BDRV_BLOCK_OFFSET_VALID)) { 417659c9a95fSMax Reitz int file_pnum; 417759c9a95fSMax Reitz 41785daa74a6SPaolo Bonzini ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, 417959c9a95fSMax Reitz *pnum, &file_pnum); 41805daa74a6SPaolo Bonzini if (ret2 >= 0) { 41815daa74a6SPaolo Bonzini /* Ignore errors. This is just providing extra information, it 41825daa74a6SPaolo Bonzini * is useful but not necessary. 41835daa74a6SPaolo Bonzini */ 418459c9a95fSMax Reitz if (!file_pnum) { 418559c9a95fSMax Reitz /* !file_pnum indicates an offset at or beyond the EOF; it is 418659c9a95fSMax Reitz * perfectly valid for the format block driver to point to such 418759c9a95fSMax Reitz * offsets, so catch it and mark everything as zero */ 418859c9a95fSMax Reitz ret |= BDRV_BLOCK_ZERO; 418959c9a95fSMax Reitz } else { 419059c9a95fSMax Reitz /* Limit request to the range reported by the protocol driver */ 419159c9a95fSMax Reitz *pnum = file_pnum; 41925daa74a6SPaolo Bonzini ret |= (ret2 & BDRV_BLOCK_ZERO); 41935daa74a6SPaolo Bonzini } 41945daa74a6SPaolo Bonzini } 419559c9a95fSMax Reitz } 41965daa74a6SPaolo Bonzini 4197415b5b01SPaolo Bonzini return ret; 4198060f51c9SStefan Hajnoczi } 4199060f51c9SStefan Hajnoczi 4200b6b8a333SPaolo Bonzini /* Coroutine wrapper for bdrv_get_block_status() */ 4201b6b8a333SPaolo Bonzini static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque) 4202060f51c9SStefan Hajnoczi { 4203b6b8a333SPaolo Bonzini BdrvCoGetBlockStatusData *data = opaque; 4204060f51c9SStefan Hajnoczi BlockDriverState *bs = data->bs; 4205060f51c9SStefan Hajnoczi 4206b6b8a333SPaolo Bonzini data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors, 4207060f51c9SStefan Hajnoczi data->pnum); 4208060f51c9SStefan Hajnoczi data->done = true; 4209060f51c9SStefan Hajnoczi } 4210060f51c9SStefan Hajnoczi 4211060f51c9SStefan Hajnoczi /* 4212b6b8a333SPaolo Bonzini * Synchronous wrapper around bdrv_co_get_block_status(). 4213060f51c9SStefan Hajnoczi * 4214b6b8a333SPaolo Bonzini * See bdrv_co_get_block_status() for details. 4215060f51c9SStefan Hajnoczi */ 4216b6b8a333SPaolo Bonzini int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, 4217b6b8a333SPaolo Bonzini int nb_sectors, int *pnum) 4218060f51c9SStefan Hajnoczi { 4219376ae3f1SStefan Hajnoczi Coroutine *co; 4220b6b8a333SPaolo Bonzini BdrvCoGetBlockStatusData data = { 4221376ae3f1SStefan Hajnoczi .bs = bs, 4222376ae3f1SStefan Hajnoczi .sector_num = sector_num, 4223376ae3f1SStefan Hajnoczi .nb_sectors = nb_sectors, 4224376ae3f1SStefan Hajnoczi .pnum = pnum, 4225376ae3f1SStefan Hajnoczi .done = false, 4226376ae3f1SStefan Hajnoczi }; 4227376ae3f1SStefan Hajnoczi 4228bdad13b9SPaolo Bonzini if (qemu_in_coroutine()) { 4229bdad13b9SPaolo Bonzini /* Fast-path if already in coroutine context */ 4230b6b8a333SPaolo Bonzini bdrv_get_block_status_co_entry(&data); 4231bdad13b9SPaolo Bonzini } else { 42322572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 42332572b37aSStefan Hajnoczi 4234b6b8a333SPaolo Bonzini co = qemu_coroutine_create(bdrv_get_block_status_co_entry); 4235376ae3f1SStefan Hajnoczi qemu_coroutine_enter(co, &data); 4236376ae3f1SStefan Hajnoczi while (!data.done) { 42372572b37aSStefan Hajnoczi aio_poll(aio_context, true); 4238376ae3f1SStefan Hajnoczi } 4239bdad13b9SPaolo Bonzini } 4240376ae3f1SStefan Hajnoczi return data.ret; 4241376ae3f1SStefan Hajnoczi } 4242f58c7b35Sths 4243b6b8a333SPaolo Bonzini int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, 4244b6b8a333SPaolo Bonzini int nb_sectors, int *pnum) 4245b6b8a333SPaolo Bonzini { 42464333bb71SPaolo Bonzini int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum); 42474333bb71SPaolo Bonzini if (ret < 0) { 42484333bb71SPaolo Bonzini return ret; 42494333bb71SPaolo Bonzini } 425001fb2705SKevin Wolf return !!(ret & BDRV_BLOCK_ALLOCATED); 4251b6b8a333SPaolo Bonzini } 4252b6b8a333SPaolo Bonzini 4253188a7bbfSPaolo Bonzini /* 4254188a7bbfSPaolo Bonzini * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] 4255188a7bbfSPaolo Bonzini * 4256188a7bbfSPaolo Bonzini * Return true if the given sector is allocated in any image between 4257188a7bbfSPaolo Bonzini * BASE and TOP (inclusive). BASE can be NULL to check if the given 4258188a7bbfSPaolo Bonzini * sector is allocated in any image of the chain. Return false otherwise. 4259188a7bbfSPaolo Bonzini * 4260188a7bbfSPaolo Bonzini * 'pnum' is set to the number of sectors (including and immediately following 4261188a7bbfSPaolo Bonzini * the specified sector) that are known to be in the same 4262188a7bbfSPaolo Bonzini * allocated/unallocated state. 4263188a7bbfSPaolo Bonzini * 4264188a7bbfSPaolo Bonzini */ 42654f578637SPaolo Bonzini int bdrv_is_allocated_above(BlockDriverState *top, 4266188a7bbfSPaolo Bonzini BlockDriverState *base, 4267188a7bbfSPaolo Bonzini int64_t sector_num, 4268188a7bbfSPaolo Bonzini int nb_sectors, int *pnum) 4269188a7bbfSPaolo Bonzini { 4270188a7bbfSPaolo Bonzini BlockDriverState *intermediate; 4271188a7bbfSPaolo Bonzini int ret, n = nb_sectors; 4272188a7bbfSPaolo Bonzini 4273188a7bbfSPaolo Bonzini intermediate = top; 4274188a7bbfSPaolo Bonzini while (intermediate && intermediate != base) { 4275188a7bbfSPaolo Bonzini int pnum_inter; 4276bdad13b9SPaolo Bonzini ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors, 4277188a7bbfSPaolo Bonzini &pnum_inter); 4278188a7bbfSPaolo Bonzini if (ret < 0) { 4279188a7bbfSPaolo Bonzini return ret; 4280188a7bbfSPaolo Bonzini } else if (ret) { 4281188a7bbfSPaolo Bonzini *pnum = pnum_inter; 4282188a7bbfSPaolo Bonzini return 1; 4283188a7bbfSPaolo Bonzini } 4284188a7bbfSPaolo Bonzini 4285188a7bbfSPaolo Bonzini /* 4286188a7bbfSPaolo Bonzini * [sector_num, nb_sectors] is unallocated on top but intermediate 4287188a7bbfSPaolo Bonzini * might have 4288188a7bbfSPaolo Bonzini * 4289188a7bbfSPaolo Bonzini * [sector_num+x, nr_sectors] allocated. 4290188a7bbfSPaolo Bonzini */ 429163ba17d3SVishvananda Ishaya if (n > pnum_inter && 429263ba17d3SVishvananda Ishaya (intermediate == top || 429363ba17d3SVishvananda Ishaya sector_num + pnum_inter < intermediate->total_sectors)) { 4294188a7bbfSPaolo Bonzini n = pnum_inter; 4295188a7bbfSPaolo Bonzini } 4296188a7bbfSPaolo Bonzini 4297188a7bbfSPaolo Bonzini intermediate = intermediate->backing_hd; 4298188a7bbfSPaolo Bonzini } 4299188a7bbfSPaolo Bonzini 4300188a7bbfSPaolo Bonzini *pnum = n; 4301188a7bbfSPaolo Bonzini return 0; 4302188a7bbfSPaolo Bonzini } 4303188a7bbfSPaolo Bonzini 4304045df330Saliguori const char *bdrv_get_encrypted_filename(BlockDriverState *bs) 4305045df330Saliguori { 4306045df330Saliguori if (bs->backing_hd && bs->backing_hd->encrypted) 4307045df330Saliguori return bs->backing_file; 4308045df330Saliguori else if (bs->encrypted) 4309045df330Saliguori return bs->filename; 4310045df330Saliguori else 4311045df330Saliguori return NULL; 4312045df330Saliguori } 4313045df330Saliguori 431483f64091Sbellard void bdrv_get_backing_filename(BlockDriverState *bs, 431583f64091Sbellard char *filename, int filename_size) 431683f64091Sbellard { 431783f64091Sbellard pstrcpy(filename, filename_size, bs->backing_file); 431883f64091Sbellard } 431983f64091Sbellard 4320faea38e7Sbellard int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, 4321faea38e7Sbellard const uint8_t *buf, int nb_sectors) 4322faea38e7Sbellard { 4323faea38e7Sbellard BlockDriver *drv = bs->drv; 4324b9c64947SMax Reitz int ret; 4325b9c64947SMax Reitz 4326b9c64947SMax Reitz if (!drv) { 432719cb3738Sbellard return -ENOMEDIUM; 4328b9c64947SMax Reitz } 4329b9c64947SMax Reitz if (!drv->bdrv_write_compressed) { 4330faea38e7Sbellard return -ENOTSUP; 4331b9c64947SMax Reitz } 4332b9c64947SMax Reitz ret = bdrv_check_request(bs, sector_num, nb_sectors); 4333b9c64947SMax Reitz if (ret < 0) { 4334b9c64947SMax Reitz return ret; 4335b9c64947SMax Reitz } 43367cd1e32aSlirans@il.ibm.com 4337e4654d2dSFam Zheng assert(QLIST_EMPTY(&bs->dirty_bitmaps)); 43387cd1e32aSlirans@il.ibm.com 4339faea38e7Sbellard return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); 4340faea38e7Sbellard } 4341faea38e7Sbellard 4342faea38e7Sbellard int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 4343faea38e7Sbellard { 4344faea38e7Sbellard BlockDriver *drv = bs->drv; 4345faea38e7Sbellard if (!drv) 434619cb3738Sbellard return -ENOMEDIUM; 4347faea38e7Sbellard if (!drv->bdrv_get_info) 4348faea38e7Sbellard return -ENOTSUP; 4349faea38e7Sbellard memset(bdi, 0, sizeof(*bdi)); 4350faea38e7Sbellard return drv->bdrv_get_info(bs, bdi); 4351faea38e7Sbellard } 4352faea38e7Sbellard 4353eae041feSMax Reitz ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) 4354eae041feSMax Reitz { 4355eae041feSMax Reitz BlockDriver *drv = bs->drv; 4356eae041feSMax Reitz if (drv && drv->bdrv_get_specific_info) { 4357eae041feSMax Reitz return drv->bdrv_get_specific_info(bs); 4358eae041feSMax Reitz } 4359eae041feSMax Reitz return NULL; 4360eae041feSMax Reitz } 4361eae041feSMax Reitz 436245566e9cSChristoph Hellwig int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, 436345566e9cSChristoph Hellwig int64_t pos, int size) 4364178e08a5Saliguori { 4365cf8074b3SKevin Wolf QEMUIOVector qiov; 4366cf8074b3SKevin Wolf struct iovec iov = { 4367cf8074b3SKevin Wolf .iov_base = (void *) buf, 4368cf8074b3SKevin Wolf .iov_len = size, 4369cf8074b3SKevin Wolf }; 4370cf8074b3SKevin Wolf 4371cf8074b3SKevin Wolf qemu_iovec_init_external(&qiov, &iov, 1); 4372cf8074b3SKevin Wolf return bdrv_writev_vmstate(bs, &qiov, pos); 4373cf8074b3SKevin Wolf } 4374cf8074b3SKevin Wolf 4375cf8074b3SKevin Wolf int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) 4376cf8074b3SKevin Wolf { 4377178e08a5Saliguori BlockDriver *drv = bs->drv; 4378cf8074b3SKevin Wolf 4379cf8074b3SKevin Wolf if (!drv) { 4380178e08a5Saliguori return -ENOMEDIUM; 4381cf8074b3SKevin Wolf } else if (drv->bdrv_save_vmstate) { 4382cf8074b3SKevin Wolf return drv->bdrv_save_vmstate(bs, qiov, pos); 4383cf8074b3SKevin Wolf } else if (bs->file) { 4384cf8074b3SKevin Wolf return bdrv_writev_vmstate(bs->file, qiov, pos); 4385cf8074b3SKevin Wolf } 4386cf8074b3SKevin Wolf 43877cdb1f6dSMORITA Kazutaka return -ENOTSUP; 4388178e08a5Saliguori } 4389178e08a5Saliguori 439045566e9cSChristoph Hellwig int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, 439145566e9cSChristoph Hellwig int64_t pos, int size) 4392178e08a5Saliguori { 4393178e08a5Saliguori BlockDriver *drv = bs->drv; 4394178e08a5Saliguori if (!drv) 4395178e08a5Saliguori return -ENOMEDIUM; 43967cdb1f6dSMORITA Kazutaka if (drv->bdrv_load_vmstate) 439745566e9cSChristoph Hellwig return drv->bdrv_load_vmstate(bs, buf, pos, size); 43987cdb1f6dSMORITA Kazutaka if (bs->file) 43997cdb1f6dSMORITA Kazutaka return bdrv_load_vmstate(bs->file, buf, pos, size); 44007cdb1f6dSMORITA Kazutaka return -ENOTSUP; 4401178e08a5Saliguori } 4402178e08a5Saliguori 44038b9b0cc2SKevin Wolf void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) 44048b9b0cc2SKevin Wolf { 4405bf736fe3SKevin Wolf if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { 44068b9b0cc2SKevin Wolf return; 44078b9b0cc2SKevin Wolf } 44088b9b0cc2SKevin Wolf 4409bf736fe3SKevin Wolf bs->drv->bdrv_debug_event(bs, event); 441041c695c7SKevin Wolf } 44118b9b0cc2SKevin Wolf 441241c695c7SKevin Wolf int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, 441341c695c7SKevin Wolf const char *tag) 441441c695c7SKevin Wolf { 441541c695c7SKevin Wolf while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { 441641c695c7SKevin Wolf bs = bs->file; 441741c695c7SKevin Wolf } 441841c695c7SKevin Wolf 441941c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { 442041c695c7SKevin Wolf return bs->drv->bdrv_debug_breakpoint(bs, event, tag); 442141c695c7SKevin Wolf } 442241c695c7SKevin Wolf 442341c695c7SKevin Wolf return -ENOTSUP; 442441c695c7SKevin Wolf } 442541c695c7SKevin Wolf 44264cc70e93SFam Zheng int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) 44274cc70e93SFam Zheng { 44284cc70e93SFam Zheng while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) { 44294cc70e93SFam Zheng bs = bs->file; 44304cc70e93SFam Zheng } 44314cc70e93SFam Zheng 44324cc70e93SFam Zheng if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) { 44334cc70e93SFam Zheng return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); 44344cc70e93SFam Zheng } 44354cc70e93SFam Zheng 44364cc70e93SFam Zheng return -ENOTSUP; 44374cc70e93SFam Zheng } 44384cc70e93SFam Zheng 443941c695c7SKevin Wolf int bdrv_debug_resume(BlockDriverState *bs, const char *tag) 444041c695c7SKevin Wolf { 4441938789eaSMax Reitz while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { 444241c695c7SKevin Wolf bs = bs->file; 444341c695c7SKevin Wolf } 444441c695c7SKevin Wolf 444541c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_resume) { 444641c695c7SKevin Wolf return bs->drv->bdrv_debug_resume(bs, tag); 444741c695c7SKevin Wolf } 444841c695c7SKevin Wolf 444941c695c7SKevin Wolf return -ENOTSUP; 445041c695c7SKevin Wolf } 445141c695c7SKevin Wolf 445241c695c7SKevin Wolf bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) 445341c695c7SKevin Wolf { 445441c695c7SKevin Wolf while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { 445541c695c7SKevin Wolf bs = bs->file; 445641c695c7SKevin Wolf } 445741c695c7SKevin Wolf 445841c695c7SKevin Wolf if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { 445941c695c7SKevin Wolf return bs->drv->bdrv_debug_is_suspended(bs, tag); 446041c695c7SKevin Wolf } 446141c695c7SKevin Wolf 446241c695c7SKevin Wolf return false; 44638b9b0cc2SKevin Wolf } 44648b9b0cc2SKevin Wolf 4465199630b6SBlue Swirl int bdrv_is_snapshot(BlockDriverState *bs) 4466199630b6SBlue Swirl { 4467199630b6SBlue Swirl return !!(bs->open_flags & BDRV_O_SNAPSHOT); 4468199630b6SBlue Swirl } 4469199630b6SBlue Swirl 4470b1b1d783SJeff Cody /* backing_file can either be relative, or absolute, or a protocol. If it is 4471b1b1d783SJeff Cody * relative, it must be relative to the chain. So, passing in bs->filename 4472b1b1d783SJeff Cody * from a BDS as backing_file should not be done, as that may be relative to 4473b1b1d783SJeff Cody * the CWD rather than the chain. */ 4474e8a6bb9cSMarcelo Tosatti BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, 4475e8a6bb9cSMarcelo Tosatti const char *backing_file) 4476e8a6bb9cSMarcelo Tosatti { 4477b1b1d783SJeff Cody char *filename_full = NULL; 4478b1b1d783SJeff Cody char *backing_file_full = NULL; 4479b1b1d783SJeff Cody char *filename_tmp = NULL; 4480b1b1d783SJeff Cody int is_protocol = 0; 4481b1b1d783SJeff Cody BlockDriverState *curr_bs = NULL; 4482b1b1d783SJeff Cody BlockDriverState *retval = NULL; 4483b1b1d783SJeff Cody 4484b1b1d783SJeff Cody if (!bs || !bs->drv || !backing_file) { 4485e8a6bb9cSMarcelo Tosatti return NULL; 4486e8a6bb9cSMarcelo Tosatti } 4487e8a6bb9cSMarcelo Tosatti 4488b1b1d783SJeff Cody filename_full = g_malloc(PATH_MAX); 4489b1b1d783SJeff Cody backing_file_full = g_malloc(PATH_MAX); 4490b1b1d783SJeff Cody filename_tmp = g_malloc(PATH_MAX); 4491b1b1d783SJeff Cody 4492b1b1d783SJeff Cody is_protocol = path_has_protocol(backing_file); 4493b1b1d783SJeff Cody 4494b1b1d783SJeff Cody for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) { 4495b1b1d783SJeff Cody 4496b1b1d783SJeff Cody /* If either of the filename paths is actually a protocol, then 4497b1b1d783SJeff Cody * compare unmodified paths; otherwise make paths relative */ 4498b1b1d783SJeff Cody if (is_protocol || path_has_protocol(curr_bs->backing_file)) { 4499b1b1d783SJeff Cody if (strcmp(backing_file, curr_bs->backing_file) == 0) { 4500b1b1d783SJeff Cody retval = curr_bs->backing_hd; 4501b1b1d783SJeff Cody break; 4502b1b1d783SJeff Cody } 4503e8a6bb9cSMarcelo Tosatti } else { 4504b1b1d783SJeff Cody /* If not an absolute filename path, make it relative to the current 4505b1b1d783SJeff Cody * image's filename path */ 4506b1b1d783SJeff Cody path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 4507b1b1d783SJeff Cody backing_file); 4508b1b1d783SJeff Cody 4509b1b1d783SJeff Cody /* We are going to compare absolute pathnames */ 4510b1b1d783SJeff Cody if (!realpath(filename_tmp, filename_full)) { 4511b1b1d783SJeff Cody continue; 4512b1b1d783SJeff Cody } 4513b1b1d783SJeff Cody 4514b1b1d783SJeff Cody /* We need to make sure the backing filename we are comparing against 4515b1b1d783SJeff Cody * is relative to the current image filename (or absolute) */ 4516b1b1d783SJeff Cody path_combine(filename_tmp, PATH_MAX, curr_bs->filename, 4517b1b1d783SJeff Cody curr_bs->backing_file); 4518b1b1d783SJeff Cody 4519b1b1d783SJeff Cody if (!realpath(filename_tmp, backing_file_full)) { 4520b1b1d783SJeff Cody continue; 4521b1b1d783SJeff Cody } 4522b1b1d783SJeff Cody 4523b1b1d783SJeff Cody if (strcmp(backing_file_full, filename_full) == 0) { 4524b1b1d783SJeff Cody retval = curr_bs->backing_hd; 4525b1b1d783SJeff Cody break; 4526b1b1d783SJeff Cody } 4527e8a6bb9cSMarcelo Tosatti } 4528e8a6bb9cSMarcelo Tosatti } 4529e8a6bb9cSMarcelo Tosatti 4530b1b1d783SJeff Cody g_free(filename_full); 4531b1b1d783SJeff Cody g_free(backing_file_full); 4532b1b1d783SJeff Cody g_free(filename_tmp); 4533b1b1d783SJeff Cody return retval; 4534e8a6bb9cSMarcelo Tosatti } 4535e8a6bb9cSMarcelo Tosatti 4536f198fd1cSBenoît Canet int bdrv_get_backing_file_depth(BlockDriverState *bs) 4537f198fd1cSBenoît Canet { 4538f198fd1cSBenoît Canet if (!bs->drv) { 4539f198fd1cSBenoît Canet return 0; 4540f198fd1cSBenoît Canet } 4541f198fd1cSBenoît Canet 4542f198fd1cSBenoît Canet if (!bs->backing_hd) { 4543f198fd1cSBenoît Canet return 0; 4544f198fd1cSBenoît Canet } 4545f198fd1cSBenoît Canet 4546f198fd1cSBenoît Canet return 1 + bdrv_get_backing_file_depth(bs->backing_hd); 4547f198fd1cSBenoît Canet } 4548f198fd1cSBenoît Canet 4549ea2384d3Sbellard /**************************************************************/ 455083f64091Sbellard /* async I/Os */ 4551ea2384d3Sbellard 45527c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, 4553f141eafeSaliguori QEMUIOVector *qiov, int nb_sectors, 4554097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4555ea2384d3Sbellard { 4556bbf0a440SStefan Hajnoczi trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); 4557bbf0a440SStefan Hajnoczi 4558d20d9b7cSPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0, 45598c5873d6SStefan Hajnoczi cb, opaque, false); 456083f64091Sbellard } 456183f64091Sbellard 45627c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, 4563f141eafeSaliguori QEMUIOVector *qiov, int nb_sectors, 4564097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 45657674e7bfSbellard { 4566bbf0a440SStefan Hajnoczi trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); 4567bbf0a440SStefan Hajnoczi 4568d20d9b7cSPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0, 45698c5873d6SStefan Hajnoczi cb, opaque, true); 457083f64091Sbellard } 457183f64091Sbellard 45727c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, 4573d5ef94d4SPaolo Bonzini int64_t sector_num, int nb_sectors, BdrvRequestFlags flags, 4574097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4575d5ef94d4SPaolo Bonzini { 4576d5ef94d4SPaolo Bonzini trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque); 4577d5ef94d4SPaolo Bonzini 4578d5ef94d4SPaolo Bonzini return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors, 4579d5ef94d4SPaolo Bonzini BDRV_REQ_ZERO_WRITE | flags, 4580d5ef94d4SPaolo Bonzini cb, opaque, true); 4581d5ef94d4SPaolo Bonzini } 4582d5ef94d4SPaolo Bonzini 458340b4f539SKevin Wolf 458440b4f539SKevin Wolf typedef struct MultiwriteCB { 458540b4f539SKevin Wolf int error; 458640b4f539SKevin Wolf int num_requests; 458740b4f539SKevin Wolf int num_callbacks; 458840b4f539SKevin Wolf struct { 4589097310b5SMarkus Armbruster BlockCompletionFunc *cb; 459040b4f539SKevin Wolf void *opaque; 459140b4f539SKevin Wolf QEMUIOVector *free_qiov; 459240b4f539SKevin Wolf } callbacks[]; 459340b4f539SKevin Wolf } MultiwriteCB; 459440b4f539SKevin Wolf 459540b4f539SKevin Wolf static void multiwrite_user_cb(MultiwriteCB *mcb) 459640b4f539SKevin Wolf { 459740b4f539SKevin Wolf int i; 459840b4f539SKevin Wolf 459940b4f539SKevin Wolf for (i = 0; i < mcb->num_callbacks; i++) { 460040b4f539SKevin Wolf mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); 46011e1ea48dSStefan Hajnoczi if (mcb->callbacks[i].free_qiov) { 46021e1ea48dSStefan Hajnoczi qemu_iovec_destroy(mcb->callbacks[i].free_qiov); 46031e1ea48dSStefan Hajnoczi } 46047267c094SAnthony Liguori g_free(mcb->callbacks[i].free_qiov); 460540b4f539SKevin Wolf } 460640b4f539SKevin Wolf } 460740b4f539SKevin Wolf 460840b4f539SKevin Wolf static void multiwrite_cb(void *opaque, int ret) 460940b4f539SKevin Wolf { 461040b4f539SKevin Wolf MultiwriteCB *mcb = opaque; 461140b4f539SKevin Wolf 46126d519a5fSStefan Hajnoczi trace_multiwrite_cb(mcb, ret); 46136d519a5fSStefan Hajnoczi 4614cb6d3ca0SKevin Wolf if (ret < 0 && !mcb->error) { 461540b4f539SKevin Wolf mcb->error = ret; 461640b4f539SKevin Wolf } 461740b4f539SKevin Wolf 461840b4f539SKevin Wolf mcb->num_requests--; 461940b4f539SKevin Wolf if (mcb->num_requests == 0) { 462040b4f539SKevin Wolf multiwrite_user_cb(mcb); 46217267c094SAnthony Liguori g_free(mcb); 462240b4f539SKevin Wolf } 462340b4f539SKevin Wolf } 462440b4f539SKevin Wolf 462540b4f539SKevin Wolf static int multiwrite_req_compare(const void *a, const void *b) 462640b4f539SKevin Wolf { 462777be4366SChristoph Hellwig const BlockRequest *req1 = a, *req2 = b; 462877be4366SChristoph Hellwig 462977be4366SChristoph Hellwig /* 463077be4366SChristoph Hellwig * Note that we can't simply subtract req2->sector from req1->sector 463177be4366SChristoph Hellwig * here as that could overflow the return value. 463277be4366SChristoph Hellwig */ 463377be4366SChristoph Hellwig if (req1->sector > req2->sector) { 463477be4366SChristoph Hellwig return 1; 463577be4366SChristoph Hellwig } else if (req1->sector < req2->sector) { 463677be4366SChristoph Hellwig return -1; 463777be4366SChristoph Hellwig } else { 463877be4366SChristoph Hellwig return 0; 463977be4366SChristoph Hellwig } 464040b4f539SKevin Wolf } 464140b4f539SKevin Wolf 464240b4f539SKevin Wolf /* 464340b4f539SKevin Wolf * Takes a bunch of requests and tries to merge them. Returns the number of 464440b4f539SKevin Wolf * requests that remain after merging. 464540b4f539SKevin Wolf */ 464640b4f539SKevin Wolf static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, 464740b4f539SKevin Wolf int num_reqs, MultiwriteCB *mcb) 464840b4f539SKevin Wolf { 464940b4f539SKevin Wolf int i, outidx; 465040b4f539SKevin Wolf 465140b4f539SKevin Wolf // Sort requests by start sector 465240b4f539SKevin Wolf qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); 465340b4f539SKevin Wolf 465440b4f539SKevin Wolf // Check if adjacent requests touch the same clusters. If so, combine them, 465540b4f539SKevin Wolf // filling up gaps with zero sectors. 465640b4f539SKevin Wolf outidx = 0; 465740b4f539SKevin Wolf for (i = 1; i < num_reqs; i++) { 465840b4f539SKevin Wolf int merge = 0; 465940b4f539SKevin Wolf int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; 466040b4f539SKevin Wolf 4661b6a127a1SPaolo Bonzini // Handle exactly sequential writes and overlapping writes. 466240b4f539SKevin Wolf if (reqs[i].sector <= oldreq_last) { 466340b4f539SKevin Wolf merge = 1; 466440b4f539SKevin Wolf } 466540b4f539SKevin Wolf 4666e2a305fbSChristoph Hellwig if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { 4667e2a305fbSChristoph Hellwig merge = 0; 4668e2a305fbSChristoph Hellwig } 4669e2a305fbSChristoph Hellwig 46706c5a42acSPeter Lieven if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors + 46716c5a42acSPeter Lieven reqs[i].nb_sectors > bs->bl.max_transfer_length) { 46726c5a42acSPeter Lieven merge = 0; 46736c5a42acSPeter Lieven } 46746c5a42acSPeter Lieven 467540b4f539SKevin Wolf if (merge) { 467640b4f539SKevin Wolf size_t size; 46777267c094SAnthony Liguori QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); 467840b4f539SKevin Wolf qemu_iovec_init(qiov, 467940b4f539SKevin Wolf reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); 468040b4f539SKevin Wolf 468140b4f539SKevin Wolf // Add the first request to the merged one. If the requests are 468240b4f539SKevin Wolf // overlapping, drop the last sectors of the first request. 468340b4f539SKevin Wolf size = (reqs[i].sector - reqs[outidx].sector) << 9; 46841b093c48SMichael Tokarev qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size); 468540b4f539SKevin Wolf 4686b6a127a1SPaolo Bonzini // We should need to add any zeros between the two requests 4687b6a127a1SPaolo Bonzini assert (reqs[i].sector <= oldreq_last); 468840b4f539SKevin Wolf 468940b4f539SKevin Wolf // Add the second request 46901b093c48SMichael Tokarev qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size); 469140b4f539SKevin Wolf 4692391827ebSStefan Hajnoczi // Add tail of first request, if necessary 4693391827ebSStefan Hajnoczi if (qiov->size < reqs[outidx].qiov->size) { 4694391827ebSStefan Hajnoczi qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size, 4695391827ebSStefan Hajnoczi reqs[outidx].qiov->size - qiov->size); 4696391827ebSStefan Hajnoczi } 4697391827ebSStefan Hajnoczi 4698cbf1dff2SKevin Wolf reqs[outidx].nb_sectors = qiov->size >> 9; 469940b4f539SKevin Wolf reqs[outidx].qiov = qiov; 470040b4f539SKevin Wolf 470140b4f539SKevin Wolf mcb->callbacks[i].free_qiov = reqs[outidx].qiov; 470240b4f539SKevin Wolf } else { 470340b4f539SKevin Wolf outidx++; 470440b4f539SKevin Wolf reqs[outidx].sector = reqs[i].sector; 470540b4f539SKevin Wolf reqs[outidx].nb_sectors = reqs[i].nb_sectors; 470640b4f539SKevin Wolf reqs[outidx].qiov = reqs[i].qiov; 470740b4f539SKevin Wolf } 470840b4f539SKevin Wolf } 470940b4f539SKevin Wolf 4710f4564d53SPeter Lieven block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1); 4711f4564d53SPeter Lieven 471240b4f539SKevin Wolf return outidx + 1; 471340b4f539SKevin Wolf } 471440b4f539SKevin Wolf 471540b4f539SKevin Wolf /* 471640b4f539SKevin Wolf * Submit multiple AIO write requests at once. 471740b4f539SKevin Wolf * 471840b4f539SKevin Wolf * On success, the function returns 0 and all requests in the reqs array have 471940b4f539SKevin Wolf * been submitted. In error case this function returns -1, and any of the 472040b4f539SKevin Wolf * requests may or may not be submitted yet. In particular, this means that the 472140b4f539SKevin Wolf * callback will be called for some of the requests, for others it won't. The 472240b4f539SKevin Wolf * caller must check the error field of the BlockRequest to wait for the right 472340b4f539SKevin Wolf * callbacks (if error != 0, no callback will be called). 472440b4f539SKevin Wolf * 472540b4f539SKevin Wolf * The implementation may modify the contents of the reqs array, e.g. to merge 472640b4f539SKevin Wolf * requests. However, the fields opaque and error are left unmodified as they 472740b4f539SKevin Wolf * are used to signal failure for a single request to the caller. 472840b4f539SKevin Wolf */ 472940b4f539SKevin Wolf int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) 473040b4f539SKevin Wolf { 473140b4f539SKevin Wolf MultiwriteCB *mcb; 473240b4f539SKevin Wolf int i; 473340b4f539SKevin Wolf 4734301db7c2SRyan Harper /* don't submit writes if we don't have a medium */ 4735301db7c2SRyan Harper if (bs->drv == NULL) { 4736301db7c2SRyan Harper for (i = 0; i < num_reqs; i++) { 4737301db7c2SRyan Harper reqs[i].error = -ENOMEDIUM; 4738301db7c2SRyan Harper } 4739301db7c2SRyan Harper return -1; 4740301db7c2SRyan Harper } 4741301db7c2SRyan Harper 474240b4f539SKevin Wolf if (num_reqs == 0) { 474340b4f539SKevin Wolf return 0; 474440b4f539SKevin Wolf } 474540b4f539SKevin Wolf 474640b4f539SKevin Wolf // Create MultiwriteCB structure 47477267c094SAnthony Liguori mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); 474840b4f539SKevin Wolf mcb->num_requests = 0; 474940b4f539SKevin Wolf mcb->num_callbacks = num_reqs; 475040b4f539SKevin Wolf 475140b4f539SKevin Wolf for (i = 0; i < num_reqs; i++) { 475240b4f539SKevin Wolf mcb->callbacks[i].cb = reqs[i].cb; 475340b4f539SKevin Wolf mcb->callbacks[i].opaque = reqs[i].opaque; 475440b4f539SKevin Wolf } 475540b4f539SKevin Wolf 475640b4f539SKevin Wolf // Check for mergable requests 475740b4f539SKevin Wolf num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); 475840b4f539SKevin Wolf 47596d519a5fSStefan Hajnoczi trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs); 47606d519a5fSStefan Hajnoczi 4761df9309fbSPaolo Bonzini /* Run the aio requests. */ 4762df9309fbSPaolo Bonzini mcb->num_requests = num_reqs; 476340b4f539SKevin Wolf for (i = 0; i < num_reqs; i++) { 4764d20d9b7cSPaolo Bonzini bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov, 4765d20d9b7cSPaolo Bonzini reqs[i].nb_sectors, reqs[i].flags, 4766d20d9b7cSPaolo Bonzini multiwrite_cb, mcb, 4767d20d9b7cSPaolo Bonzini true); 476840b4f539SKevin Wolf } 476940b4f539SKevin Wolf 477040b4f539SKevin Wolf return 0; 477140b4f539SKevin Wolf } 477240b4f539SKevin Wolf 47737c84b1b8SMarkus Armbruster void bdrv_aio_cancel(BlockAIOCB *acb) 477483f64091Sbellard { 477502c50efeSFam Zheng qemu_aio_ref(acb); 477602c50efeSFam Zheng bdrv_aio_cancel_async(acb); 477702c50efeSFam Zheng while (acb->refcnt > 1) { 477802c50efeSFam Zheng if (acb->aiocb_info->get_aio_context) { 477902c50efeSFam Zheng aio_poll(acb->aiocb_info->get_aio_context(acb), true); 478002c50efeSFam Zheng } else if (acb->bs) { 478102c50efeSFam Zheng aio_poll(bdrv_get_aio_context(acb->bs), true); 478202c50efeSFam Zheng } else { 478302c50efeSFam Zheng abort(); 478402c50efeSFam Zheng } 478502c50efeSFam Zheng } 47868007429aSFam Zheng qemu_aio_unref(acb); 478702c50efeSFam Zheng } 478802c50efeSFam Zheng 478902c50efeSFam Zheng /* Async version of aio cancel. The caller is not blocked if the acb implements 479002c50efeSFam Zheng * cancel_async, otherwise we do nothing and let the request normally complete. 479102c50efeSFam Zheng * In either case the completion callback must be called. */ 47927c84b1b8SMarkus Armbruster void bdrv_aio_cancel_async(BlockAIOCB *acb) 479302c50efeSFam Zheng { 479402c50efeSFam Zheng if (acb->aiocb_info->cancel_async) { 479502c50efeSFam Zheng acb->aiocb_info->cancel_async(acb); 479602c50efeSFam Zheng } 479783f64091Sbellard } 479883f64091Sbellard 479983f64091Sbellard /**************************************************************/ 480083f64091Sbellard /* async block device emulation */ 480183f64091Sbellard 48027c84b1b8SMarkus Armbruster typedef struct BlockAIOCBSync { 48037c84b1b8SMarkus Armbruster BlockAIOCB common; 4804c16b5a2cSChristoph Hellwig QEMUBH *bh; 4805c16b5a2cSChristoph Hellwig int ret; 4806c16b5a2cSChristoph Hellwig /* vector translation state */ 4807c16b5a2cSChristoph Hellwig QEMUIOVector *qiov; 4808c16b5a2cSChristoph Hellwig uint8_t *bounce; 4809c16b5a2cSChristoph Hellwig int is_write; 48107c84b1b8SMarkus Armbruster } BlockAIOCBSync; 4811c16b5a2cSChristoph Hellwig 4812d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_aiocb_info = { 48137c84b1b8SMarkus Armbruster .aiocb_size = sizeof(BlockAIOCBSync), 4814c16b5a2cSChristoph Hellwig }; 4815c16b5a2cSChristoph Hellwig 481683f64091Sbellard static void bdrv_aio_bh_cb(void *opaque) 4817beac80cdSbellard { 48187c84b1b8SMarkus Armbruster BlockAIOCBSync *acb = opaque; 4819f141eafeSaliguori 4820857d4f46SKevin Wolf if (!acb->is_write && acb->ret >= 0) { 482103396148SMichael Tokarev qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); 4822857d4f46SKevin Wolf } 4823ceb42de8Saliguori qemu_vfree(acb->bounce); 4824ce1a14dcSpbrook acb->common.cb(acb->common.opaque, acb->ret); 48256a7ad299SDor Laor qemu_bh_delete(acb->bh); 482636afc451SAvi Kivity acb->bh = NULL; 48278007429aSFam Zheng qemu_aio_unref(acb); 4828beac80cdSbellard } 4829beac80cdSbellard 48307c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, 4831f141eafeSaliguori int64_t sector_num, 4832f141eafeSaliguori QEMUIOVector *qiov, 4833f141eafeSaliguori int nb_sectors, 4834097310b5SMarkus Armbruster BlockCompletionFunc *cb, 4835f141eafeSaliguori void *opaque, 4836f141eafeSaliguori int is_write) 4837f141eafeSaliguori 4838ea2384d3Sbellard { 48397c84b1b8SMarkus Armbruster BlockAIOCBSync *acb; 484083f64091Sbellard 4841d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque); 4842f141eafeSaliguori acb->is_write = is_write; 4843f141eafeSaliguori acb->qiov = qiov; 4844857d4f46SKevin Wolf acb->bounce = qemu_try_blockalign(bs, qiov->size); 48452572b37aSStefan Hajnoczi acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb); 4846f141eafeSaliguori 4847857d4f46SKevin Wolf if (acb->bounce == NULL) { 4848857d4f46SKevin Wolf acb->ret = -ENOMEM; 4849857d4f46SKevin Wolf } else if (is_write) { 4850d5e6b161SMichael Tokarev qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); 48511ed20acfSStefan Hajnoczi acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors); 4852f141eafeSaliguori } else { 48531ed20acfSStefan Hajnoczi acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors); 4854f141eafeSaliguori } 4855f141eafeSaliguori 4856ce1a14dcSpbrook qemu_bh_schedule(acb->bh); 4857f141eafeSaliguori 4858ce1a14dcSpbrook return &acb->common; 48597a6cba61Spbrook } 48607a6cba61Spbrook 48617c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, 4862f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 4863097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 486483f64091Sbellard { 4865f141eafeSaliguori return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 486683f64091Sbellard } 486783f64091Sbellard 48687c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, 4869f141eafeSaliguori int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 4870097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4871f141eafeSaliguori { 4872f141eafeSaliguori return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 4873f141eafeSaliguori } 4874f141eafeSaliguori 487568485420SKevin Wolf 48767c84b1b8SMarkus Armbruster typedef struct BlockAIOCBCoroutine { 48777c84b1b8SMarkus Armbruster BlockAIOCB common; 487868485420SKevin Wolf BlockRequest req; 487968485420SKevin Wolf bool is_write; 48800b5a2445SPaolo Bonzini bool need_bh; 4881d318aea9SKevin Wolf bool *done; 488268485420SKevin Wolf QEMUBH* bh; 48837c84b1b8SMarkus Armbruster } BlockAIOCBCoroutine; 488468485420SKevin Wolf 4885d7331bedSStefan Hajnoczi static const AIOCBInfo bdrv_em_co_aiocb_info = { 48867c84b1b8SMarkus Armbruster .aiocb_size = sizeof(BlockAIOCBCoroutine), 488768485420SKevin Wolf }; 488868485420SKevin Wolf 48890b5a2445SPaolo Bonzini static void bdrv_co_complete(BlockAIOCBCoroutine *acb) 48900b5a2445SPaolo Bonzini { 48910b5a2445SPaolo Bonzini if (!acb->need_bh) { 48920b5a2445SPaolo Bonzini acb->common.cb(acb->common.opaque, acb->req.error); 48930b5a2445SPaolo Bonzini qemu_aio_unref(acb); 48940b5a2445SPaolo Bonzini } 48950b5a2445SPaolo Bonzini } 48960b5a2445SPaolo Bonzini 489735246a68SPaolo Bonzini static void bdrv_co_em_bh(void *opaque) 489868485420SKevin Wolf { 48997c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 490068485420SKevin Wolf 49010b5a2445SPaolo Bonzini assert(!acb->need_bh); 490268485420SKevin Wolf qemu_bh_delete(acb->bh); 49030b5a2445SPaolo Bonzini bdrv_co_complete(acb); 49040b5a2445SPaolo Bonzini } 49050b5a2445SPaolo Bonzini 49060b5a2445SPaolo Bonzini static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb) 49070b5a2445SPaolo Bonzini { 49080b5a2445SPaolo Bonzini acb->need_bh = false; 49090b5a2445SPaolo Bonzini if (acb->req.error != -EINPROGRESS) { 49100b5a2445SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 49110b5a2445SPaolo Bonzini 49120b5a2445SPaolo Bonzini acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); 49130b5a2445SPaolo Bonzini qemu_bh_schedule(acb->bh); 49140b5a2445SPaolo Bonzini } 491568485420SKevin Wolf } 491668485420SKevin Wolf 4917b2a61371SStefan Hajnoczi /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */ 4918b2a61371SStefan Hajnoczi static void coroutine_fn bdrv_co_do_rw(void *opaque) 4919b2a61371SStefan Hajnoczi { 49207c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 4921b2a61371SStefan Hajnoczi BlockDriverState *bs = acb->common.bs; 4922b2a61371SStefan Hajnoczi 4923b2a61371SStefan Hajnoczi if (!acb->is_write) { 4924b2a61371SStefan Hajnoczi acb->req.error = bdrv_co_do_readv(bs, acb->req.sector, 4925d20d9b7cSPaolo Bonzini acb->req.nb_sectors, acb->req.qiov, acb->req.flags); 4926b2a61371SStefan Hajnoczi } else { 4927b2a61371SStefan Hajnoczi acb->req.error = bdrv_co_do_writev(bs, acb->req.sector, 4928d20d9b7cSPaolo Bonzini acb->req.nb_sectors, acb->req.qiov, acb->req.flags); 4929b2a61371SStefan Hajnoczi } 4930b2a61371SStefan Hajnoczi 49310b5a2445SPaolo Bonzini bdrv_co_complete(acb); 4932b2a61371SStefan Hajnoczi } 4933b2a61371SStefan Hajnoczi 49347c84b1b8SMarkus Armbruster static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, 493568485420SKevin Wolf int64_t sector_num, 493668485420SKevin Wolf QEMUIOVector *qiov, 493768485420SKevin Wolf int nb_sectors, 4938d20d9b7cSPaolo Bonzini BdrvRequestFlags flags, 4939097310b5SMarkus Armbruster BlockCompletionFunc *cb, 494068485420SKevin Wolf void *opaque, 49418c5873d6SStefan Hajnoczi bool is_write) 494268485420SKevin Wolf { 494368485420SKevin Wolf Coroutine *co; 49447c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 494568485420SKevin Wolf 4946d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 49470b5a2445SPaolo Bonzini acb->need_bh = true; 49480b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 494968485420SKevin Wolf acb->req.sector = sector_num; 495068485420SKevin Wolf acb->req.nb_sectors = nb_sectors; 495168485420SKevin Wolf acb->req.qiov = qiov; 4952d20d9b7cSPaolo Bonzini acb->req.flags = flags; 495368485420SKevin Wolf acb->is_write = is_write; 495468485420SKevin Wolf 49558c5873d6SStefan Hajnoczi co = qemu_coroutine_create(bdrv_co_do_rw); 495668485420SKevin Wolf qemu_coroutine_enter(co, acb); 495768485420SKevin Wolf 49580b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 495968485420SKevin Wolf return &acb->common; 496068485420SKevin Wolf } 496168485420SKevin Wolf 496207f07615SPaolo Bonzini static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) 4963b2e12bc6SChristoph Hellwig { 49647c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 496507f07615SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 4966b2e12bc6SChristoph Hellwig 496707f07615SPaolo Bonzini acb->req.error = bdrv_co_flush(bs); 49680b5a2445SPaolo Bonzini bdrv_co_complete(acb); 4969b2e12bc6SChristoph Hellwig } 4970b2e12bc6SChristoph Hellwig 49717c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, 4972097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 4973016f5cf6SAlexander Graf { 497407f07615SPaolo Bonzini trace_bdrv_aio_flush(bs, opaque); 4975016f5cf6SAlexander Graf 497607f07615SPaolo Bonzini Coroutine *co; 49777c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 4978016f5cf6SAlexander Graf 4979d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 49800b5a2445SPaolo Bonzini acb->need_bh = true; 49810b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 4982d318aea9SKevin Wolf 498307f07615SPaolo Bonzini co = qemu_coroutine_create(bdrv_aio_flush_co_entry); 498407f07615SPaolo Bonzini qemu_coroutine_enter(co, acb); 4985016f5cf6SAlexander Graf 49860b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 4987016f5cf6SAlexander Graf return &acb->common; 4988016f5cf6SAlexander Graf } 4989016f5cf6SAlexander Graf 49904265d620SPaolo Bonzini static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) 49914265d620SPaolo Bonzini { 49927c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb = opaque; 49934265d620SPaolo Bonzini BlockDriverState *bs = acb->common.bs; 49944265d620SPaolo Bonzini 49954265d620SPaolo Bonzini acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors); 49960b5a2445SPaolo Bonzini bdrv_co_complete(acb); 49974265d620SPaolo Bonzini } 49984265d620SPaolo Bonzini 49997c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs, 50004265d620SPaolo Bonzini int64_t sector_num, int nb_sectors, 5001097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 50024265d620SPaolo Bonzini { 50034265d620SPaolo Bonzini Coroutine *co; 50047c84b1b8SMarkus Armbruster BlockAIOCBCoroutine *acb; 50054265d620SPaolo Bonzini 50064265d620SPaolo Bonzini trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); 50074265d620SPaolo Bonzini 5008d7331bedSStefan Hajnoczi acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); 50090b5a2445SPaolo Bonzini acb->need_bh = true; 50100b5a2445SPaolo Bonzini acb->req.error = -EINPROGRESS; 50114265d620SPaolo Bonzini acb->req.sector = sector_num; 50124265d620SPaolo Bonzini acb->req.nb_sectors = nb_sectors; 50134265d620SPaolo Bonzini co = qemu_coroutine_create(bdrv_aio_discard_co_entry); 50144265d620SPaolo Bonzini qemu_coroutine_enter(co, acb); 50154265d620SPaolo Bonzini 50160b5a2445SPaolo Bonzini bdrv_co_maybe_schedule_bh(acb); 50174265d620SPaolo Bonzini return &acb->common; 50184265d620SPaolo Bonzini } 50194265d620SPaolo Bonzini 5020ea2384d3Sbellard void bdrv_init(void) 5021ea2384d3Sbellard { 50225efa9d5aSAnthony Liguori module_call_init(MODULE_INIT_BLOCK); 5023ea2384d3Sbellard } 5024ce1a14dcSpbrook 5025eb852011SMarkus Armbruster void bdrv_init_with_whitelist(void) 5026eb852011SMarkus Armbruster { 5027eb852011SMarkus Armbruster use_bdrv_whitelist = 1; 5028eb852011SMarkus Armbruster bdrv_init(); 5029eb852011SMarkus Armbruster } 5030eb852011SMarkus Armbruster 5031d7331bedSStefan Hajnoczi void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, 5032097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 50336bbff9a0Saliguori { 50347c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5035ce1a14dcSpbrook 5036d7331bedSStefan Hajnoczi acb = g_slice_alloc(aiocb_info->aiocb_size); 5037d7331bedSStefan Hajnoczi acb->aiocb_info = aiocb_info; 5038ce1a14dcSpbrook acb->bs = bs; 5039ce1a14dcSpbrook acb->cb = cb; 5040ce1a14dcSpbrook acb->opaque = opaque; 5041f197fe2bSFam Zheng acb->refcnt = 1; 5042ce1a14dcSpbrook return acb; 5043ce1a14dcSpbrook } 5044ce1a14dcSpbrook 5045f197fe2bSFam Zheng void qemu_aio_ref(void *p) 5046f197fe2bSFam Zheng { 50477c84b1b8SMarkus Armbruster BlockAIOCB *acb = p; 5048f197fe2bSFam Zheng acb->refcnt++; 5049f197fe2bSFam Zheng } 5050f197fe2bSFam Zheng 50518007429aSFam Zheng void qemu_aio_unref(void *p) 5052ce1a14dcSpbrook { 50537c84b1b8SMarkus Armbruster BlockAIOCB *acb = p; 5054f197fe2bSFam Zheng assert(acb->refcnt > 0); 5055f197fe2bSFam Zheng if (--acb->refcnt == 0) { 5056d7331bedSStefan Hajnoczi g_slice_free1(acb->aiocb_info->aiocb_size, acb); 5057ce1a14dcSpbrook } 5058f197fe2bSFam Zheng } 505919cb3738Sbellard 506019cb3738Sbellard /**************************************************************/ 5061f9f05dc5SKevin Wolf /* Coroutine block device emulation */ 5062f9f05dc5SKevin Wolf 5063f9f05dc5SKevin Wolf typedef struct CoroutineIOCompletion { 5064f9f05dc5SKevin Wolf Coroutine *coroutine; 5065f9f05dc5SKevin Wolf int ret; 5066f9f05dc5SKevin Wolf } CoroutineIOCompletion; 5067f9f05dc5SKevin Wolf 5068f9f05dc5SKevin Wolf static void bdrv_co_io_em_complete(void *opaque, int ret) 5069f9f05dc5SKevin Wolf { 5070f9f05dc5SKevin Wolf CoroutineIOCompletion *co = opaque; 5071f9f05dc5SKevin Wolf 5072f9f05dc5SKevin Wolf co->ret = ret; 5073f9f05dc5SKevin Wolf qemu_coroutine_enter(co->coroutine, NULL); 5074f9f05dc5SKevin Wolf } 5075f9f05dc5SKevin Wolf 5076f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, 5077f9f05dc5SKevin Wolf int nb_sectors, QEMUIOVector *iov, 5078f9f05dc5SKevin Wolf bool is_write) 5079f9f05dc5SKevin Wolf { 5080f9f05dc5SKevin Wolf CoroutineIOCompletion co = { 5081f9f05dc5SKevin Wolf .coroutine = qemu_coroutine_self(), 5082f9f05dc5SKevin Wolf }; 50837c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5084f9f05dc5SKevin Wolf 5085f9f05dc5SKevin Wolf if (is_write) { 5086a652d160SStefan Hajnoczi acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors, 5087f9f05dc5SKevin Wolf bdrv_co_io_em_complete, &co); 5088f9f05dc5SKevin Wolf } else { 5089a652d160SStefan Hajnoczi acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors, 5090f9f05dc5SKevin Wolf bdrv_co_io_em_complete, &co); 5091f9f05dc5SKevin Wolf } 5092f9f05dc5SKevin Wolf 509359370aaaSStefan Hajnoczi trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb); 5094f9f05dc5SKevin Wolf if (!acb) { 5095f9f05dc5SKevin Wolf return -EIO; 5096f9f05dc5SKevin Wolf } 5097f9f05dc5SKevin Wolf qemu_coroutine_yield(); 5098f9f05dc5SKevin Wolf 5099f9f05dc5SKevin Wolf return co.ret; 5100f9f05dc5SKevin Wolf } 5101f9f05dc5SKevin Wolf 5102f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, 5103f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 5104f9f05dc5SKevin Wolf QEMUIOVector *iov) 5105f9f05dc5SKevin Wolf { 5106f9f05dc5SKevin Wolf return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); 5107f9f05dc5SKevin Wolf } 5108f9f05dc5SKevin Wolf 5109f9f05dc5SKevin Wolf static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, 5110f9f05dc5SKevin Wolf int64_t sector_num, int nb_sectors, 5111f9f05dc5SKevin Wolf QEMUIOVector *iov) 5112f9f05dc5SKevin Wolf { 5113f9f05dc5SKevin Wolf return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); 5114f9f05dc5SKevin Wolf } 5115f9f05dc5SKevin Wolf 511607f07615SPaolo Bonzini static void coroutine_fn bdrv_flush_co_entry(void *opaque) 5117e7a8a783SKevin Wolf { 511807f07615SPaolo Bonzini RwCo *rwco = opaque; 511907f07615SPaolo Bonzini 512007f07615SPaolo Bonzini rwco->ret = bdrv_co_flush(rwco->bs); 512107f07615SPaolo Bonzini } 512207f07615SPaolo Bonzini 512307f07615SPaolo Bonzini int coroutine_fn bdrv_co_flush(BlockDriverState *bs) 512407f07615SPaolo Bonzini { 5125eb489bb1SKevin Wolf int ret; 5126eb489bb1SKevin Wolf 512729cdb251SPaolo Bonzini if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { 512807f07615SPaolo Bonzini return 0; 5129eb489bb1SKevin Wolf } 5130eb489bb1SKevin Wolf 5131ca716364SKevin Wolf /* Write back cached data to the OS even with cache=unsafe */ 5132bf736fe3SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS); 5133eb489bb1SKevin Wolf if (bs->drv->bdrv_co_flush_to_os) { 5134eb489bb1SKevin Wolf ret = bs->drv->bdrv_co_flush_to_os(bs); 5135eb489bb1SKevin Wolf if (ret < 0) { 5136eb489bb1SKevin Wolf return ret; 5137eb489bb1SKevin Wolf } 5138eb489bb1SKevin Wolf } 5139eb489bb1SKevin Wolf 5140ca716364SKevin Wolf /* But don't actually force it to the disk with cache=unsafe */ 5141ca716364SKevin Wolf if (bs->open_flags & BDRV_O_NO_FLUSH) { 5142d4c82329SKevin Wolf goto flush_parent; 5143ca716364SKevin Wolf } 5144ca716364SKevin Wolf 5145bf736fe3SKevin Wolf BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK); 5146eb489bb1SKevin Wolf if (bs->drv->bdrv_co_flush_to_disk) { 514729cdb251SPaolo Bonzini ret = bs->drv->bdrv_co_flush_to_disk(bs); 514807f07615SPaolo Bonzini } else if (bs->drv->bdrv_aio_flush) { 51497c84b1b8SMarkus Armbruster BlockAIOCB *acb; 5150e7a8a783SKevin Wolf CoroutineIOCompletion co = { 5151e7a8a783SKevin Wolf .coroutine = qemu_coroutine_self(), 5152e7a8a783SKevin Wolf }; 5153e7a8a783SKevin Wolf 515407f07615SPaolo Bonzini acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); 515507f07615SPaolo Bonzini if (acb == NULL) { 515629cdb251SPaolo Bonzini ret = -EIO; 515707f07615SPaolo Bonzini } else { 5158e7a8a783SKevin Wolf qemu_coroutine_yield(); 515929cdb251SPaolo Bonzini ret = co.ret; 5160e7a8a783SKevin Wolf } 516107f07615SPaolo Bonzini } else { 516207f07615SPaolo Bonzini /* 516307f07615SPaolo Bonzini * Some block drivers always operate in either writethrough or unsafe 516407f07615SPaolo Bonzini * mode and don't support bdrv_flush therefore. Usually qemu doesn't 516507f07615SPaolo Bonzini * know how the server works (because the behaviour is hardcoded or 516607f07615SPaolo Bonzini * depends on server-side configuration), so we can't ensure that 516707f07615SPaolo Bonzini * everything is safe on disk. Returning an error doesn't work because 516807f07615SPaolo Bonzini * that would break guests even if the server operates in writethrough 516907f07615SPaolo Bonzini * mode. 517007f07615SPaolo Bonzini * 517107f07615SPaolo Bonzini * Let's hope the user knows what he's doing. 517207f07615SPaolo Bonzini */ 517329cdb251SPaolo Bonzini ret = 0; 517407f07615SPaolo Bonzini } 517529cdb251SPaolo Bonzini if (ret < 0) { 517629cdb251SPaolo Bonzini return ret; 517729cdb251SPaolo Bonzini } 517829cdb251SPaolo Bonzini 517929cdb251SPaolo Bonzini /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH 518029cdb251SPaolo Bonzini * in the case of cache=unsafe, so there are no useless flushes. 518129cdb251SPaolo Bonzini */ 5182d4c82329SKevin Wolf flush_parent: 518329cdb251SPaolo Bonzini return bdrv_co_flush(bs->file); 518407f07615SPaolo Bonzini } 518507f07615SPaolo Bonzini 51865a8a30dbSKevin Wolf void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) 51870f15423cSAnthony Liguori { 51885a8a30dbSKevin Wolf Error *local_err = NULL; 51895a8a30dbSKevin Wolf int ret; 51905a8a30dbSKevin Wolf 51913456a8d1SKevin Wolf if (!bs->drv) { 51923456a8d1SKevin Wolf return; 51930f15423cSAnthony Liguori } 51943456a8d1SKevin Wolf 51957ea2d269SAlexey Kardashevskiy if (!(bs->open_flags & BDRV_O_INCOMING)) { 51967ea2d269SAlexey Kardashevskiy return; 51977ea2d269SAlexey Kardashevskiy } 51987ea2d269SAlexey Kardashevskiy bs->open_flags &= ~BDRV_O_INCOMING; 51997ea2d269SAlexey Kardashevskiy 52003456a8d1SKevin Wolf if (bs->drv->bdrv_invalidate_cache) { 52015a8a30dbSKevin Wolf bs->drv->bdrv_invalidate_cache(bs, &local_err); 52023456a8d1SKevin Wolf } else if (bs->file) { 52035a8a30dbSKevin Wolf bdrv_invalidate_cache(bs->file, &local_err); 52045a8a30dbSKevin Wolf } 52055a8a30dbSKevin Wolf if (local_err) { 52065a8a30dbSKevin Wolf error_propagate(errp, local_err); 52075a8a30dbSKevin Wolf return; 52083456a8d1SKevin Wolf } 52093456a8d1SKevin Wolf 52105a8a30dbSKevin Wolf ret = refresh_total_sectors(bs, bs->total_sectors); 52115a8a30dbSKevin Wolf if (ret < 0) { 52125a8a30dbSKevin Wolf error_setg_errno(errp, -ret, "Could not refresh total sector count"); 52135a8a30dbSKevin Wolf return; 52145a8a30dbSKevin Wolf } 52150f15423cSAnthony Liguori } 52160f15423cSAnthony Liguori 52175a8a30dbSKevin Wolf void bdrv_invalidate_cache_all(Error **errp) 52180f15423cSAnthony Liguori { 52190f15423cSAnthony Liguori BlockDriverState *bs; 52205a8a30dbSKevin Wolf Error *local_err = NULL; 52210f15423cSAnthony Liguori 5222dc364f4cSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 5223ed78cda3SStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 5224ed78cda3SStefan Hajnoczi 5225ed78cda3SStefan Hajnoczi aio_context_acquire(aio_context); 52265a8a30dbSKevin Wolf bdrv_invalidate_cache(bs, &local_err); 5227ed78cda3SStefan Hajnoczi aio_context_release(aio_context); 52285a8a30dbSKevin Wolf if (local_err) { 52295a8a30dbSKevin Wolf error_propagate(errp, local_err); 52305a8a30dbSKevin Wolf return; 52315a8a30dbSKevin Wolf } 52320f15423cSAnthony Liguori } 52330f15423cSAnthony Liguori } 52340f15423cSAnthony Liguori 523507f07615SPaolo Bonzini int bdrv_flush(BlockDriverState *bs) 523607f07615SPaolo Bonzini { 523707f07615SPaolo Bonzini Coroutine *co; 523807f07615SPaolo Bonzini RwCo rwco = { 523907f07615SPaolo Bonzini .bs = bs, 524007f07615SPaolo Bonzini .ret = NOT_DONE, 524107f07615SPaolo Bonzini }; 524207f07615SPaolo Bonzini 524307f07615SPaolo Bonzini if (qemu_in_coroutine()) { 524407f07615SPaolo Bonzini /* Fast-path if already in coroutine context */ 524507f07615SPaolo Bonzini bdrv_flush_co_entry(&rwco); 524607f07615SPaolo Bonzini } else { 52472572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 52482572b37aSStefan Hajnoczi 524907f07615SPaolo Bonzini co = qemu_coroutine_create(bdrv_flush_co_entry); 525007f07615SPaolo Bonzini qemu_coroutine_enter(co, &rwco); 525107f07615SPaolo Bonzini while (rwco.ret == NOT_DONE) { 52522572b37aSStefan Hajnoczi aio_poll(aio_context, true); 525307f07615SPaolo Bonzini } 525407f07615SPaolo Bonzini } 525507f07615SPaolo Bonzini 525607f07615SPaolo Bonzini return rwco.ret; 525707f07615SPaolo Bonzini } 5258e7a8a783SKevin Wolf 5259775aa8b6SKevin Wolf typedef struct DiscardCo { 5260775aa8b6SKevin Wolf BlockDriverState *bs; 5261775aa8b6SKevin Wolf int64_t sector_num; 5262775aa8b6SKevin Wolf int nb_sectors; 5263775aa8b6SKevin Wolf int ret; 5264775aa8b6SKevin Wolf } DiscardCo; 52654265d620SPaolo Bonzini static void coroutine_fn bdrv_discard_co_entry(void *opaque) 52664265d620SPaolo Bonzini { 5267775aa8b6SKevin Wolf DiscardCo *rwco = opaque; 52684265d620SPaolo Bonzini 52694265d620SPaolo Bonzini rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); 52704265d620SPaolo Bonzini } 52714265d620SPaolo Bonzini 52724265d620SPaolo Bonzini int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, 52734265d620SPaolo Bonzini int nb_sectors) 52744265d620SPaolo Bonzini { 5275b9c64947SMax Reitz int max_discard, ret; 5276d51e9fe5SPaolo Bonzini 52774265d620SPaolo Bonzini if (!bs->drv) { 52784265d620SPaolo Bonzini return -ENOMEDIUM; 5279b9c64947SMax Reitz } 5280b9c64947SMax Reitz 5281b9c64947SMax Reitz ret = bdrv_check_request(bs, sector_num, nb_sectors); 5282b9c64947SMax Reitz if (ret < 0) { 5283b9c64947SMax Reitz return ret; 52844265d620SPaolo Bonzini } else if (bs->read_only) { 52854265d620SPaolo Bonzini return -EROFS; 5286df702c9bSPaolo Bonzini } 5287df702c9bSPaolo Bonzini 52888f0720ecSPaolo Bonzini bdrv_reset_dirty(bs, sector_num, nb_sectors); 5289df702c9bSPaolo Bonzini 52909e8f1835SPaolo Bonzini /* Do nothing if disabled. */ 52919e8f1835SPaolo Bonzini if (!(bs->open_flags & BDRV_O_UNMAP)) { 52929e8f1835SPaolo Bonzini return 0; 52939e8f1835SPaolo Bonzini } 52949e8f1835SPaolo Bonzini 5295d51e9fe5SPaolo Bonzini if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) { 5296d51e9fe5SPaolo Bonzini return 0; 5297d51e9fe5SPaolo Bonzini } 52986f14da52SPeter Lieven 529975af1f34SPeter Lieven max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS); 53006f14da52SPeter Lieven while (nb_sectors > 0) { 53016f14da52SPeter Lieven int ret; 53026f14da52SPeter Lieven int num = nb_sectors; 53036f14da52SPeter Lieven 53046f14da52SPeter Lieven /* align request */ 53056f14da52SPeter Lieven if (bs->bl.discard_alignment && 53066f14da52SPeter Lieven num >= bs->bl.discard_alignment && 53076f14da52SPeter Lieven sector_num % bs->bl.discard_alignment) { 53086f14da52SPeter Lieven if (num > bs->bl.discard_alignment) { 53096f14da52SPeter Lieven num = bs->bl.discard_alignment; 53106f14da52SPeter Lieven } 53116f14da52SPeter Lieven num -= sector_num % bs->bl.discard_alignment; 53126f14da52SPeter Lieven } 53136f14da52SPeter Lieven 53146f14da52SPeter Lieven /* limit request size */ 53156f14da52SPeter Lieven if (num > max_discard) { 53166f14da52SPeter Lieven num = max_discard; 53176f14da52SPeter Lieven } 53186f14da52SPeter Lieven 5319d51e9fe5SPaolo Bonzini if (bs->drv->bdrv_co_discard) { 53206f14da52SPeter Lieven ret = bs->drv->bdrv_co_discard(bs, sector_num, num); 5321d51e9fe5SPaolo Bonzini } else { 53227c84b1b8SMarkus Armbruster BlockAIOCB *acb; 53234265d620SPaolo Bonzini CoroutineIOCompletion co = { 53244265d620SPaolo Bonzini .coroutine = qemu_coroutine_self(), 53254265d620SPaolo Bonzini }; 53264265d620SPaolo Bonzini 53274265d620SPaolo Bonzini acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors, 53284265d620SPaolo Bonzini bdrv_co_io_em_complete, &co); 53294265d620SPaolo Bonzini if (acb == NULL) { 53304265d620SPaolo Bonzini return -EIO; 53314265d620SPaolo Bonzini } else { 53324265d620SPaolo Bonzini qemu_coroutine_yield(); 5333d51e9fe5SPaolo Bonzini ret = co.ret; 53344265d620SPaolo Bonzini } 5335d51e9fe5SPaolo Bonzini } 53367ce21016SPaolo Bonzini if (ret && ret != -ENOTSUP) { 5337d51e9fe5SPaolo Bonzini return ret; 5338d51e9fe5SPaolo Bonzini } 5339d51e9fe5SPaolo Bonzini 5340d51e9fe5SPaolo Bonzini sector_num += num; 5341d51e9fe5SPaolo Bonzini nb_sectors -= num; 5342d51e9fe5SPaolo Bonzini } 53434265d620SPaolo Bonzini return 0; 53444265d620SPaolo Bonzini } 53454265d620SPaolo Bonzini 53464265d620SPaolo Bonzini int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) 53474265d620SPaolo Bonzini { 53484265d620SPaolo Bonzini Coroutine *co; 5349775aa8b6SKevin Wolf DiscardCo rwco = { 53504265d620SPaolo Bonzini .bs = bs, 53514265d620SPaolo Bonzini .sector_num = sector_num, 53524265d620SPaolo Bonzini .nb_sectors = nb_sectors, 53534265d620SPaolo Bonzini .ret = NOT_DONE, 53544265d620SPaolo Bonzini }; 53554265d620SPaolo Bonzini 53564265d620SPaolo Bonzini if (qemu_in_coroutine()) { 53574265d620SPaolo Bonzini /* Fast-path if already in coroutine context */ 53584265d620SPaolo Bonzini bdrv_discard_co_entry(&rwco); 53594265d620SPaolo Bonzini } else { 53602572b37aSStefan Hajnoczi AioContext *aio_context = bdrv_get_aio_context(bs); 53612572b37aSStefan Hajnoczi 53624265d620SPaolo Bonzini co = qemu_coroutine_create(bdrv_discard_co_entry); 53634265d620SPaolo Bonzini qemu_coroutine_enter(co, &rwco); 53644265d620SPaolo Bonzini while (rwco.ret == NOT_DONE) { 53652572b37aSStefan Hajnoczi aio_poll(aio_context, true); 53664265d620SPaolo Bonzini } 53674265d620SPaolo Bonzini } 53684265d620SPaolo Bonzini 53694265d620SPaolo Bonzini return rwco.ret; 53704265d620SPaolo Bonzini } 53714265d620SPaolo Bonzini 5372f9f05dc5SKevin Wolf /**************************************************************/ 537319cb3738Sbellard /* removable device support */ 537419cb3738Sbellard 537519cb3738Sbellard /** 537619cb3738Sbellard * Return TRUE if the media is present 537719cb3738Sbellard */ 537819cb3738Sbellard int bdrv_is_inserted(BlockDriverState *bs) 537919cb3738Sbellard { 538019cb3738Sbellard BlockDriver *drv = bs->drv; 5381a1aff5bfSMarkus Armbruster 538219cb3738Sbellard if (!drv) 538319cb3738Sbellard return 0; 538419cb3738Sbellard if (!drv->bdrv_is_inserted) 5385a1aff5bfSMarkus Armbruster return 1; 5386a1aff5bfSMarkus Armbruster return drv->bdrv_is_inserted(bs); 538719cb3738Sbellard } 538819cb3738Sbellard 538919cb3738Sbellard /** 53908e49ca46SMarkus Armbruster * Return whether the media changed since the last call to this 53918e49ca46SMarkus Armbruster * function, or -ENOTSUP if we don't know. Most drivers don't know. 539219cb3738Sbellard */ 539319cb3738Sbellard int bdrv_media_changed(BlockDriverState *bs) 539419cb3738Sbellard { 539519cb3738Sbellard BlockDriver *drv = bs->drv; 539619cb3738Sbellard 53978e49ca46SMarkus Armbruster if (drv && drv->bdrv_media_changed) { 53988e49ca46SMarkus Armbruster return drv->bdrv_media_changed(bs); 53998e49ca46SMarkus Armbruster } 54008e49ca46SMarkus Armbruster return -ENOTSUP; 540119cb3738Sbellard } 540219cb3738Sbellard 540319cb3738Sbellard /** 540419cb3738Sbellard * If eject_flag is TRUE, eject the media. Otherwise, close the tray 540519cb3738Sbellard */ 5406f36f3949SLuiz Capitulino void bdrv_eject(BlockDriverState *bs, bool eject_flag) 540719cb3738Sbellard { 540819cb3738Sbellard BlockDriver *drv = bs->drv; 5409bfb197e0SMarkus Armbruster const char *device_name; 541019cb3738Sbellard 5411822e1cd1SMarkus Armbruster if (drv && drv->bdrv_eject) { 5412822e1cd1SMarkus Armbruster drv->bdrv_eject(bs, eject_flag); 541319cb3738Sbellard } 54146f382ed2SLuiz Capitulino 5415bfb197e0SMarkus Armbruster device_name = bdrv_get_device_name(bs); 5416bfb197e0SMarkus Armbruster if (device_name[0] != '\0') { 5417bfb197e0SMarkus Armbruster qapi_event_send_device_tray_moved(device_name, 5418a5ee7bd4SWenchao Xia eject_flag, &error_abort); 54196f382ed2SLuiz Capitulino } 542019cb3738Sbellard } 542119cb3738Sbellard 542219cb3738Sbellard /** 542319cb3738Sbellard * Lock or unlock the media (if it is locked, the user won't be able 542419cb3738Sbellard * to eject it manually). 542519cb3738Sbellard */ 5426025e849aSMarkus Armbruster void bdrv_lock_medium(BlockDriverState *bs, bool locked) 542719cb3738Sbellard { 542819cb3738Sbellard BlockDriver *drv = bs->drv; 542919cb3738Sbellard 5430025e849aSMarkus Armbruster trace_bdrv_lock_medium(bs, locked); 5431b8c6d095SStefan Hajnoczi 5432025e849aSMarkus Armbruster if (drv && drv->bdrv_lock_medium) { 5433025e849aSMarkus Armbruster drv->bdrv_lock_medium(bs, locked); 543419cb3738Sbellard } 543519cb3738Sbellard } 5436985a03b0Sths 5437985a03b0Sths /* needed for generic scsi interface */ 5438985a03b0Sths 5439985a03b0Sths int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) 5440985a03b0Sths { 5441985a03b0Sths BlockDriver *drv = bs->drv; 5442985a03b0Sths 5443985a03b0Sths if (drv && drv->bdrv_ioctl) 5444985a03b0Sths return drv->bdrv_ioctl(bs, req, buf); 5445985a03b0Sths return -ENOTSUP; 5446985a03b0Sths } 54477d780669Saliguori 54487c84b1b8SMarkus Armbruster BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, 5449221f715dSaliguori unsigned long int req, void *buf, 5450097310b5SMarkus Armbruster BlockCompletionFunc *cb, void *opaque) 54517d780669Saliguori { 5452221f715dSaliguori BlockDriver *drv = bs->drv; 54537d780669Saliguori 5454221f715dSaliguori if (drv && drv->bdrv_aio_ioctl) 5455221f715dSaliguori return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); 5456221f715dSaliguori return NULL; 54577d780669Saliguori } 5458e268ca52Saliguori 54591b7fd729SPaolo Bonzini void bdrv_set_guest_block_size(BlockDriverState *bs, int align) 54607b6f9300SMarkus Armbruster { 54611b7fd729SPaolo Bonzini bs->guest_block_size = align; 54627b6f9300SMarkus Armbruster } 54637cd1e32aSlirans@il.ibm.com 5464e268ca52Saliguori void *qemu_blockalign(BlockDriverState *bs, size_t size) 5465e268ca52Saliguori { 5466339064d5SKevin Wolf return qemu_memalign(bdrv_opt_mem_align(bs), size); 5467e268ca52Saliguori } 54687cd1e32aSlirans@il.ibm.com 54699ebd8448SMax Reitz void *qemu_blockalign0(BlockDriverState *bs, size_t size) 54709ebd8448SMax Reitz { 54719ebd8448SMax Reitz return memset(qemu_blockalign(bs, size), 0, size); 54729ebd8448SMax Reitz } 54739ebd8448SMax Reitz 54747d2a35ccSKevin Wolf void *qemu_try_blockalign(BlockDriverState *bs, size_t size) 54757d2a35ccSKevin Wolf { 54767d2a35ccSKevin Wolf size_t align = bdrv_opt_mem_align(bs); 54777d2a35ccSKevin Wolf 54787d2a35ccSKevin Wolf /* Ensure that NULL is never returned on success */ 54797d2a35ccSKevin Wolf assert(align > 0); 54807d2a35ccSKevin Wolf if (size == 0) { 54817d2a35ccSKevin Wolf size = align; 54827d2a35ccSKevin Wolf } 54837d2a35ccSKevin Wolf 54847d2a35ccSKevin Wolf return qemu_try_memalign(align, size); 54857d2a35ccSKevin Wolf } 54867d2a35ccSKevin Wolf 54879ebd8448SMax Reitz void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) 54889ebd8448SMax Reitz { 54899ebd8448SMax Reitz void *mem = qemu_try_blockalign(bs, size); 54909ebd8448SMax Reitz 54919ebd8448SMax Reitz if (mem) { 54929ebd8448SMax Reitz memset(mem, 0, size); 54939ebd8448SMax Reitz } 54949ebd8448SMax Reitz 54959ebd8448SMax Reitz return mem; 54969ebd8448SMax Reitz } 54979ebd8448SMax Reitz 5498c53b1c51SStefan Hajnoczi /* 5499c53b1c51SStefan Hajnoczi * Check if all memory in this vector is sector aligned. 5500c53b1c51SStefan Hajnoczi */ 5501c53b1c51SStefan Hajnoczi bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) 5502c53b1c51SStefan Hajnoczi { 5503c53b1c51SStefan Hajnoczi int i; 5504339064d5SKevin Wolf size_t alignment = bdrv_opt_mem_align(bs); 5505c53b1c51SStefan Hajnoczi 5506c53b1c51SStefan Hajnoczi for (i = 0; i < qiov->niov; i++) { 5507339064d5SKevin Wolf if ((uintptr_t) qiov->iov[i].iov_base % alignment) { 5508c53b1c51SStefan Hajnoczi return false; 5509c53b1c51SStefan Hajnoczi } 5510339064d5SKevin Wolf if (qiov->iov[i].iov_len % alignment) { 55111ff735bdSKevin Wolf return false; 55121ff735bdSKevin Wolf } 5513c53b1c51SStefan Hajnoczi } 5514c53b1c51SStefan Hajnoczi 5515c53b1c51SStefan Hajnoczi return true; 5516c53b1c51SStefan Hajnoczi } 5517c53b1c51SStefan Hajnoczi 55180db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) 55190db6e54aSFam Zheng { 55200db6e54aSFam Zheng BdrvDirtyBitmap *bm; 55210db6e54aSFam Zheng 55220db6e54aSFam Zheng assert(name); 55230db6e54aSFam Zheng QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 55240db6e54aSFam Zheng if (bm->name && !strcmp(name, bm->name)) { 55250db6e54aSFam Zheng return bm; 55260db6e54aSFam Zheng } 55270db6e54aSFam Zheng } 55280db6e54aSFam Zheng return NULL; 55290db6e54aSFam Zheng } 55300db6e54aSFam Zheng 553120dca810SJohn Snow void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) 55320db6e54aSFam Zheng { 55339bd2b08fSJohn Snow assert(!bdrv_dirty_bitmap_frozen(bitmap)); 55340db6e54aSFam Zheng g_free(bitmap->name); 55350db6e54aSFam Zheng bitmap->name = NULL; 55360db6e54aSFam Zheng } 55370db6e54aSFam Zheng 55380db6e54aSFam Zheng BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, 55395fba6c0eSJohn Snow uint32_t granularity, 55400db6e54aSFam Zheng const char *name, 5541b8afb520SFam Zheng Error **errp) 55427cd1e32aSlirans@il.ibm.com { 55437cd1e32aSlirans@il.ibm.com int64_t bitmap_size; 5544e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 55455fba6c0eSJohn Snow uint32_t sector_granularity; 5546a55eb92cSJan Kiszka 554750717e94SPaolo Bonzini assert((granularity & (granularity - 1)) == 0); 554850717e94SPaolo Bonzini 55490db6e54aSFam Zheng if (name && bdrv_find_dirty_bitmap(bs, name)) { 55500db6e54aSFam Zheng error_setg(errp, "Bitmap already exists: %s", name); 55510db6e54aSFam Zheng return NULL; 55520db6e54aSFam Zheng } 55535fba6c0eSJohn Snow sector_granularity = granularity >> BDRV_SECTOR_BITS; 55545fba6c0eSJohn Snow assert(sector_granularity); 555557322b78SMarkus Armbruster bitmap_size = bdrv_nb_sectors(bs); 5556b8afb520SFam Zheng if (bitmap_size < 0) { 5557b8afb520SFam Zheng error_setg_errno(errp, -bitmap_size, "could not get length of device"); 5558b8afb520SFam Zheng errno = -bitmap_size; 5559b8afb520SFam Zheng return NULL; 5560b8afb520SFam Zheng } 55615839e53bSMarkus Armbruster bitmap = g_new0(BdrvDirtyBitmap, 1); 55625fba6c0eSJohn Snow bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); 5563e74e6b78SJohn Snow bitmap->size = bitmap_size; 55640db6e54aSFam Zheng bitmap->name = g_strdup(name); 5565b8e6fb75SJohn Snow bitmap->disabled = false; 5566e4654d2dSFam Zheng QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); 5567e4654d2dSFam Zheng return bitmap; 5568e4654d2dSFam Zheng } 5569e4654d2dSFam Zheng 55709bd2b08fSJohn Snow bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) 55719bd2b08fSJohn Snow { 55729bd2b08fSJohn Snow return bitmap->successor; 55739bd2b08fSJohn Snow } 55749bd2b08fSJohn Snow 5575b8e6fb75SJohn Snow bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) 5576b8e6fb75SJohn Snow { 55779bd2b08fSJohn Snow return !(bitmap->disabled || bitmap->successor); 55789bd2b08fSJohn Snow } 55799bd2b08fSJohn Snow 55809bd2b08fSJohn Snow /** 55819bd2b08fSJohn Snow * Create a successor bitmap destined to replace this bitmap after an operation. 55829bd2b08fSJohn Snow * Requires that the bitmap is not frozen and has no successor. 55839bd2b08fSJohn Snow */ 55849bd2b08fSJohn Snow int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, 55859bd2b08fSJohn Snow BdrvDirtyBitmap *bitmap, Error **errp) 55869bd2b08fSJohn Snow { 55879bd2b08fSJohn Snow uint64_t granularity; 55889bd2b08fSJohn Snow BdrvDirtyBitmap *child; 55899bd2b08fSJohn Snow 55909bd2b08fSJohn Snow if (bdrv_dirty_bitmap_frozen(bitmap)) { 55919bd2b08fSJohn Snow error_setg(errp, "Cannot create a successor for a bitmap that is " 55929bd2b08fSJohn Snow "currently frozen"); 55939bd2b08fSJohn Snow return -1; 55949bd2b08fSJohn Snow } 55959bd2b08fSJohn Snow assert(!bitmap->successor); 55969bd2b08fSJohn Snow 55979bd2b08fSJohn Snow /* Create an anonymous successor */ 55989bd2b08fSJohn Snow granularity = bdrv_dirty_bitmap_granularity(bitmap); 55999bd2b08fSJohn Snow child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); 56009bd2b08fSJohn Snow if (!child) { 56019bd2b08fSJohn Snow return -1; 56029bd2b08fSJohn Snow } 56039bd2b08fSJohn Snow 56049bd2b08fSJohn Snow /* Successor will be on or off based on our current state. */ 56059bd2b08fSJohn Snow child->disabled = bitmap->disabled; 56069bd2b08fSJohn Snow 56079bd2b08fSJohn Snow /* Install the successor and freeze the parent */ 56089bd2b08fSJohn Snow bitmap->successor = child; 56099bd2b08fSJohn Snow return 0; 56109bd2b08fSJohn Snow } 56119bd2b08fSJohn Snow 56129bd2b08fSJohn Snow /** 56139bd2b08fSJohn Snow * For a bitmap with a successor, yield our name to the successor, 56149bd2b08fSJohn Snow * delete the old bitmap, and return a handle to the new bitmap. 56159bd2b08fSJohn Snow */ 56169bd2b08fSJohn Snow BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, 56179bd2b08fSJohn Snow BdrvDirtyBitmap *bitmap, 56189bd2b08fSJohn Snow Error **errp) 56199bd2b08fSJohn Snow { 56209bd2b08fSJohn Snow char *name; 56219bd2b08fSJohn Snow BdrvDirtyBitmap *successor = bitmap->successor; 56229bd2b08fSJohn Snow 56239bd2b08fSJohn Snow if (successor == NULL) { 56249bd2b08fSJohn Snow error_setg(errp, "Cannot relinquish control if " 56259bd2b08fSJohn Snow "there's no successor present"); 56269bd2b08fSJohn Snow return NULL; 56279bd2b08fSJohn Snow } 56289bd2b08fSJohn Snow 56299bd2b08fSJohn Snow name = bitmap->name; 56309bd2b08fSJohn Snow bitmap->name = NULL; 56319bd2b08fSJohn Snow successor->name = name; 56329bd2b08fSJohn Snow bitmap->successor = NULL; 56339bd2b08fSJohn Snow bdrv_release_dirty_bitmap(bs, bitmap); 56349bd2b08fSJohn Snow 56359bd2b08fSJohn Snow return successor; 56369bd2b08fSJohn Snow } 56379bd2b08fSJohn Snow 56389bd2b08fSJohn Snow /** 56399bd2b08fSJohn Snow * In cases of failure where we can no longer safely delete the parent, 56409bd2b08fSJohn Snow * we may wish to re-join the parent and child/successor. 56419bd2b08fSJohn Snow * The merged parent will be un-frozen, but not explicitly re-enabled. 56429bd2b08fSJohn Snow */ 56439bd2b08fSJohn Snow BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, 56449bd2b08fSJohn Snow BdrvDirtyBitmap *parent, 56459bd2b08fSJohn Snow Error **errp) 56469bd2b08fSJohn Snow { 56479bd2b08fSJohn Snow BdrvDirtyBitmap *successor = parent->successor; 56489bd2b08fSJohn Snow 56499bd2b08fSJohn Snow if (!successor) { 56509bd2b08fSJohn Snow error_setg(errp, "Cannot reclaim a successor when none is present"); 56519bd2b08fSJohn Snow return NULL; 56529bd2b08fSJohn Snow } 56539bd2b08fSJohn Snow 56549bd2b08fSJohn Snow if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { 56559bd2b08fSJohn Snow error_setg(errp, "Merging of parent and successor bitmap failed"); 56569bd2b08fSJohn Snow return NULL; 56579bd2b08fSJohn Snow } 56589bd2b08fSJohn Snow bdrv_release_dirty_bitmap(bs, successor); 56599bd2b08fSJohn Snow parent->successor = NULL; 56609bd2b08fSJohn Snow 56619bd2b08fSJohn Snow return parent; 5662b8e6fb75SJohn Snow } 5663b8e6fb75SJohn Snow 5664*ce1ffea8SJohn Snow /** 5665*ce1ffea8SJohn Snow * Truncates _all_ bitmaps attached to a BDS. 5666*ce1ffea8SJohn Snow */ 5667*ce1ffea8SJohn Snow static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) 5668*ce1ffea8SJohn Snow { 5669*ce1ffea8SJohn Snow BdrvDirtyBitmap *bitmap; 5670*ce1ffea8SJohn Snow uint64_t size = bdrv_nb_sectors(bs); 5671*ce1ffea8SJohn Snow 5672*ce1ffea8SJohn Snow QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 5673*ce1ffea8SJohn Snow if (bdrv_dirty_bitmap_frozen(bitmap)) { 5674*ce1ffea8SJohn Snow continue; 5675*ce1ffea8SJohn Snow } 5676*ce1ffea8SJohn Snow hbitmap_truncate(bitmap->bitmap, size); 5677*ce1ffea8SJohn Snow } 5678*ce1ffea8SJohn Snow } 5679*ce1ffea8SJohn Snow 5680e4654d2dSFam Zheng void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) 5681e4654d2dSFam Zheng { 5682e4654d2dSFam Zheng BdrvDirtyBitmap *bm, *next; 5683e4654d2dSFam Zheng QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { 5684e4654d2dSFam Zheng if (bm == bitmap) { 56859bd2b08fSJohn Snow assert(!bdrv_dirty_bitmap_frozen(bm)); 5686e4654d2dSFam Zheng QLIST_REMOVE(bitmap, list); 5687e4654d2dSFam Zheng hbitmap_free(bitmap->bitmap); 56880db6e54aSFam Zheng g_free(bitmap->name); 5689e4654d2dSFam Zheng g_free(bitmap); 5690e4654d2dSFam Zheng return; 56917cd1e32aSlirans@il.ibm.com } 56927cd1e32aSlirans@il.ibm.com } 56937cd1e32aSlirans@il.ibm.com } 56947cd1e32aSlirans@il.ibm.com 5695b8e6fb75SJohn Snow void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 5696b8e6fb75SJohn Snow { 56979bd2b08fSJohn Snow assert(!bdrv_dirty_bitmap_frozen(bitmap)); 5698b8e6fb75SJohn Snow bitmap->disabled = true; 5699b8e6fb75SJohn Snow } 5700b8e6fb75SJohn Snow 5701b8e6fb75SJohn Snow void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) 5702b8e6fb75SJohn Snow { 57039bd2b08fSJohn Snow assert(!bdrv_dirty_bitmap_frozen(bitmap)); 5704b8e6fb75SJohn Snow bitmap->disabled = false; 5705b8e6fb75SJohn Snow } 5706b8e6fb75SJohn Snow 570721b56835SFam Zheng BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) 570821b56835SFam Zheng { 570921b56835SFam Zheng BdrvDirtyBitmap *bm; 571021b56835SFam Zheng BlockDirtyInfoList *list = NULL; 571121b56835SFam Zheng BlockDirtyInfoList **plist = &list; 571221b56835SFam Zheng 571321b56835SFam Zheng QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { 57145839e53bSMarkus Armbruster BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); 57155839e53bSMarkus Armbruster BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); 571620dca810SJohn Snow info->count = bdrv_get_dirty_count(bm); 5717592fdd02SJohn Snow info->granularity = bdrv_dirty_bitmap_granularity(bm); 57180db6e54aSFam Zheng info->has_name = !!bm->name; 57190db6e54aSFam Zheng info->name = g_strdup(bm->name); 5720a113534fSJohn Snow info->frozen = bdrv_dirty_bitmap_frozen(bm); 572121b56835SFam Zheng entry->value = info; 572221b56835SFam Zheng *plist = entry; 572321b56835SFam Zheng plist = &entry->next; 572421b56835SFam Zheng } 572521b56835SFam Zheng 572621b56835SFam Zheng return list; 572721b56835SFam Zheng } 572821b56835SFam Zheng 5729e4654d2dSFam Zheng int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector) 57307cd1e32aSlirans@il.ibm.com { 5731e4654d2dSFam Zheng if (bitmap) { 5732e4654d2dSFam Zheng return hbitmap_get(bitmap->bitmap, sector); 57337cd1e32aSlirans@il.ibm.com } else { 57347cd1e32aSlirans@il.ibm.com return 0; 57357cd1e32aSlirans@il.ibm.com } 57367cd1e32aSlirans@il.ibm.com } 57377cd1e32aSlirans@il.ibm.com 5738341ebc2fSJohn Snow /** 5739341ebc2fSJohn Snow * Chooses a default granularity based on the existing cluster size, 5740341ebc2fSJohn Snow * but clamped between [4K, 64K]. Defaults to 64K in the case that there 5741341ebc2fSJohn Snow * is no cluster size information available. 5742341ebc2fSJohn Snow */ 5743341ebc2fSJohn Snow uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) 5744341ebc2fSJohn Snow { 5745341ebc2fSJohn Snow BlockDriverInfo bdi; 5746341ebc2fSJohn Snow uint32_t granularity; 5747341ebc2fSJohn Snow 5748341ebc2fSJohn Snow if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { 5749341ebc2fSJohn Snow granularity = MAX(4096, bdi.cluster_size); 5750341ebc2fSJohn Snow granularity = MIN(65536, granularity); 5751341ebc2fSJohn Snow } else { 5752341ebc2fSJohn Snow granularity = 65536; 5753341ebc2fSJohn Snow } 5754341ebc2fSJohn Snow 5755341ebc2fSJohn Snow return granularity; 5756341ebc2fSJohn Snow } 5757341ebc2fSJohn Snow 5758592fdd02SJohn Snow uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) 5759592fdd02SJohn Snow { 5760592fdd02SJohn Snow return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); 5761592fdd02SJohn Snow } 5762592fdd02SJohn Snow 576320dca810SJohn Snow void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) 57641755da16SPaolo Bonzini { 5765e4654d2dSFam Zheng hbitmap_iter_init(hbi, bitmap->bitmap, 0); 57661755da16SPaolo Bonzini } 57671755da16SPaolo Bonzini 576820dca810SJohn Snow void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, 5769c4237dfaSVladimir Sementsov-Ogievskiy int64_t cur_sector, int nr_sectors) 5770c4237dfaSVladimir Sementsov-Ogievskiy { 5771b8e6fb75SJohn Snow assert(bdrv_dirty_bitmap_enabled(bitmap)); 5772c4237dfaSVladimir Sementsov-Ogievskiy hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 5773c4237dfaSVladimir Sementsov-Ogievskiy } 5774c4237dfaSVladimir Sementsov-Ogievskiy 577520dca810SJohn Snow void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, 5776c4237dfaSVladimir Sementsov-Ogievskiy int64_t cur_sector, int nr_sectors) 5777c4237dfaSVladimir Sementsov-Ogievskiy { 5778b8e6fb75SJohn Snow assert(bdrv_dirty_bitmap_enabled(bitmap)); 5779c4237dfaSVladimir Sementsov-Ogievskiy hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 5780c4237dfaSVladimir Sementsov-Ogievskiy } 5781c4237dfaSVladimir Sementsov-Ogievskiy 5782e74e6b78SJohn Snow void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap) 5783e74e6b78SJohn Snow { 5784e74e6b78SJohn Snow assert(bdrv_dirty_bitmap_enabled(bitmap)); 5785e74e6b78SJohn Snow hbitmap_reset(bitmap->bitmap, 0, bitmap->size); 5786e74e6b78SJohn Snow } 5787e74e6b78SJohn Snow 5788c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, 57891755da16SPaolo Bonzini int nr_sectors) 57901755da16SPaolo Bonzini { 5791e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 5792e4654d2dSFam Zheng QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 5793b8e6fb75SJohn Snow if (!bdrv_dirty_bitmap_enabled(bitmap)) { 5794b8e6fb75SJohn Snow continue; 5795b8e6fb75SJohn Snow } 5796e4654d2dSFam Zheng hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); 5797e4654d2dSFam Zheng } 57981755da16SPaolo Bonzini } 57991755da16SPaolo Bonzini 5800c4237dfaSVladimir Sementsov-Ogievskiy static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, 5801c4237dfaSVladimir Sementsov-Ogievskiy int nr_sectors) 58027cd1e32aSlirans@il.ibm.com { 5803e4654d2dSFam Zheng BdrvDirtyBitmap *bitmap; 5804e4654d2dSFam Zheng QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { 5805b8e6fb75SJohn Snow if (!bdrv_dirty_bitmap_enabled(bitmap)) { 5806b8e6fb75SJohn Snow continue; 5807b8e6fb75SJohn Snow } 5808e4654d2dSFam Zheng hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); 5809e4654d2dSFam Zheng } 58107cd1e32aSlirans@il.ibm.com } 5811aaa0eb75SLiran Schour 5812d58d8453SJohn Snow /** 5813d58d8453SJohn Snow * Advance an HBitmapIter to an arbitrary offset. 5814d58d8453SJohn Snow */ 5815d58d8453SJohn Snow void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) 5816d58d8453SJohn Snow { 5817d58d8453SJohn Snow assert(hbi->hb); 5818d58d8453SJohn Snow hbitmap_iter_init(hbi, hbi->hb, offset); 5819d58d8453SJohn Snow } 5820d58d8453SJohn Snow 582120dca810SJohn Snow int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) 5822aaa0eb75SLiran Schour { 5823e4654d2dSFam Zheng return hbitmap_count(bitmap->bitmap); 5824aaa0eb75SLiran Schour } 5825f88e1a42SJes Sorensen 58269fcb0251SFam Zheng /* Get a reference to bs */ 58279fcb0251SFam Zheng void bdrv_ref(BlockDriverState *bs) 58289fcb0251SFam Zheng { 58299fcb0251SFam Zheng bs->refcnt++; 58309fcb0251SFam Zheng } 58319fcb0251SFam Zheng 58329fcb0251SFam Zheng /* Release a previously grabbed reference to bs. 58339fcb0251SFam Zheng * If after releasing, reference count is zero, the BlockDriverState is 58349fcb0251SFam Zheng * deleted. */ 58359fcb0251SFam Zheng void bdrv_unref(BlockDriverState *bs) 58369fcb0251SFam Zheng { 58379a4d5ca6SJeff Cody if (!bs) { 58389a4d5ca6SJeff Cody return; 58399a4d5ca6SJeff Cody } 58409fcb0251SFam Zheng assert(bs->refcnt > 0); 58419fcb0251SFam Zheng if (--bs->refcnt == 0) { 58429fcb0251SFam Zheng bdrv_delete(bs); 58439fcb0251SFam Zheng } 58449fcb0251SFam Zheng } 58459fcb0251SFam Zheng 5846fbe40ff7SFam Zheng struct BdrvOpBlocker { 5847fbe40ff7SFam Zheng Error *reason; 5848fbe40ff7SFam Zheng QLIST_ENTRY(BdrvOpBlocker) list; 5849fbe40ff7SFam Zheng }; 5850fbe40ff7SFam Zheng 5851fbe40ff7SFam Zheng bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) 5852fbe40ff7SFam Zheng { 5853fbe40ff7SFam Zheng BdrvOpBlocker *blocker; 5854fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5855fbe40ff7SFam Zheng if (!QLIST_EMPTY(&bs->op_blockers[op])) { 5856fbe40ff7SFam Zheng blocker = QLIST_FIRST(&bs->op_blockers[op]); 5857fbe40ff7SFam Zheng if (errp) { 585881e5f78aSAlberto Garcia error_setg(errp, "Node '%s' is busy: %s", 585981e5f78aSAlberto Garcia bdrv_get_device_or_node_name(bs), 5860bfb197e0SMarkus Armbruster error_get_pretty(blocker->reason)); 5861fbe40ff7SFam Zheng } 5862fbe40ff7SFam Zheng return true; 5863fbe40ff7SFam Zheng } 5864fbe40ff7SFam Zheng return false; 5865fbe40ff7SFam Zheng } 5866fbe40ff7SFam Zheng 5867fbe40ff7SFam Zheng void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) 5868fbe40ff7SFam Zheng { 5869fbe40ff7SFam Zheng BdrvOpBlocker *blocker; 5870fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5871fbe40ff7SFam Zheng 58725839e53bSMarkus Armbruster blocker = g_new0(BdrvOpBlocker, 1); 5873fbe40ff7SFam Zheng blocker->reason = reason; 5874fbe40ff7SFam Zheng QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); 5875fbe40ff7SFam Zheng } 5876fbe40ff7SFam Zheng 5877fbe40ff7SFam Zheng void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) 5878fbe40ff7SFam Zheng { 5879fbe40ff7SFam Zheng BdrvOpBlocker *blocker, *next; 5880fbe40ff7SFam Zheng assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); 5881fbe40ff7SFam Zheng QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { 5882fbe40ff7SFam Zheng if (blocker->reason == reason) { 5883fbe40ff7SFam Zheng QLIST_REMOVE(blocker, list); 5884fbe40ff7SFam Zheng g_free(blocker); 5885fbe40ff7SFam Zheng } 5886fbe40ff7SFam Zheng } 5887fbe40ff7SFam Zheng } 5888fbe40ff7SFam Zheng 5889fbe40ff7SFam Zheng void bdrv_op_block_all(BlockDriverState *bs, Error *reason) 5890fbe40ff7SFam Zheng { 5891fbe40ff7SFam Zheng int i; 5892fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5893fbe40ff7SFam Zheng bdrv_op_block(bs, i, reason); 5894fbe40ff7SFam Zheng } 5895fbe40ff7SFam Zheng } 5896fbe40ff7SFam Zheng 5897fbe40ff7SFam Zheng void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) 5898fbe40ff7SFam Zheng { 5899fbe40ff7SFam Zheng int i; 5900fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5901fbe40ff7SFam Zheng bdrv_op_unblock(bs, i, reason); 5902fbe40ff7SFam Zheng } 5903fbe40ff7SFam Zheng } 5904fbe40ff7SFam Zheng 5905fbe40ff7SFam Zheng bool bdrv_op_blocker_is_empty(BlockDriverState *bs) 5906fbe40ff7SFam Zheng { 5907fbe40ff7SFam Zheng int i; 5908fbe40ff7SFam Zheng 5909fbe40ff7SFam Zheng for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { 5910fbe40ff7SFam Zheng if (!QLIST_EMPTY(&bs->op_blockers[i])) { 5911fbe40ff7SFam Zheng return false; 5912fbe40ff7SFam Zheng } 5913fbe40ff7SFam Zheng } 5914fbe40ff7SFam Zheng return true; 5915fbe40ff7SFam Zheng } 5916fbe40ff7SFam Zheng 591728a7282aSLuiz Capitulino void bdrv_iostatus_enable(BlockDriverState *bs) 591828a7282aSLuiz Capitulino { 5919d6bf279eSLuiz Capitulino bs->iostatus_enabled = true; 592058e21ef5SLuiz Capitulino bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 592128a7282aSLuiz Capitulino } 592228a7282aSLuiz Capitulino 592328a7282aSLuiz Capitulino /* The I/O status is only enabled if the drive explicitly 592428a7282aSLuiz Capitulino * enables it _and_ the VM is configured to stop on errors */ 592528a7282aSLuiz Capitulino bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) 592628a7282aSLuiz Capitulino { 5927d6bf279eSLuiz Capitulino return (bs->iostatus_enabled && 592892aa5c6dSPaolo Bonzini (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || 592992aa5c6dSPaolo Bonzini bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || 593092aa5c6dSPaolo Bonzini bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); 593128a7282aSLuiz Capitulino } 593228a7282aSLuiz Capitulino 593328a7282aSLuiz Capitulino void bdrv_iostatus_disable(BlockDriverState *bs) 593428a7282aSLuiz Capitulino { 5935d6bf279eSLuiz Capitulino bs->iostatus_enabled = false; 593628a7282aSLuiz Capitulino } 593728a7282aSLuiz Capitulino 593828a7282aSLuiz Capitulino void bdrv_iostatus_reset(BlockDriverState *bs) 593928a7282aSLuiz Capitulino { 594028a7282aSLuiz Capitulino if (bdrv_iostatus_is_enabled(bs)) { 594158e21ef5SLuiz Capitulino bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; 59423bd293c3SPaolo Bonzini if (bs->job) { 59433bd293c3SPaolo Bonzini block_job_iostatus_reset(bs->job); 59443bd293c3SPaolo Bonzini } 594528a7282aSLuiz Capitulino } 594628a7282aSLuiz Capitulino } 594728a7282aSLuiz Capitulino 594828a7282aSLuiz Capitulino void bdrv_iostatus_set_err(BlockDriverState *bs, int error) 594928a7282aSLuiz Capitulino { 59503e1caa5fSPaolo Bonzini assert(bdrv_iostatus_is_enabled(bs)); 59513e1caa5fSPaolo Bonzini if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { 595258e21ef5SLuiz Capitulino bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : 595358e21ef5SLuiz Capitulino BLOCK_DEVICE_IO_STATUS_FAILED; 595428a7282aSLuiz Capitulino } 595528a7282aSLuiz Capitulino } 595628a7282aSLuiz Capitulino 5957d92ada22SLuiz Capitulino void bdrv_img_create(const char *filename, const char *fmt, 5958f88e1a42SJes Sorensen const char *base_filename, const char *base_fmt, 5959f382d43aSMiroslav Rezanina char *options, uint64_t img_size, int flags, 5960f382d43aSMiroslav Rezanina Error **errp, bool quiet) 5961f88e1a42SJes Sorensen { 596283d0521aSChunyan Liu QemuOptsList *create_opts = NULL; 596383d0521aSChunyan Liu QemuOpts *opts = NULL; 596483d0521aSChunyan Liu const char *backing_fmt, *backing_file; 596583d0521aSChunyan Liu int64_t size; 5966f88e1a42SJes Sorensen BlockDriver *drv, *proto_drv; 596796df67d1SStefan Hajnoczi BlockDriver *backing_drv = NULL; 5968cc84d90fSMax Reitz Error *local_err = NULL; 5969f88e1a42SJes Sorensen int ret = 0; 5970f88e1a42SJes Sorensen 5971f88e1a42SJes Sorensen /* Find driver and parse its options */ 5972f88e1a42SJes Sorensen drv = bdrv_find_format(fmt); 5973f88e1a42SJes Sorensen if (!drv) { 597471c79813SLuiz Capitulino error_setg(errp, "Unknown file format '%s'", fmt); 5975d92ada22SLuiz Capitulino return; 5976f88e1a42SJes Sorensen } 5977f88e1a42SJes Sorensen 5978b65a5e12SMax Reitz proto_drv = bdrv_find_protocol(filename, true, errp); 5979f88e1a42SJes Sorensen if (!proto_drv) { 5980d92ada22SLuiz Capitulino return; 5981f88e1a42SJes Sorensen } 5982f88e1a42SJes Sorensen 5983c6149724SMax Reitz if (!drv->create_opts) { 5984c6149724SMax Reitz error_setg(errp, "Format driver '%s' does not support image creation", 5985c6149724SMax Reitz drv->format_name); 5986c6149724SMax Reitz return; 5987c6149724SMax Reitz } 5988c6149724SMax Reitz 5989c6149724SMax Reitz if (!proto_drv->create_opts) { 5990c6149724SMax Reitz error_setg(errp, "Protocol driver '%s' does not support image creation", 5991c6149724SMax Reitz proto_drv->format_name); 5992c6149724SMax Reitz return; 5993c6149724SMax Reitz } 5994c6149724SMax Reitz 5995c282e1fdSChunyan Liu create_opts = qemu_opts_append(create_opts, drv->create_opts); 5996c282e1fdSChunyan Liu create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); 5997f88e1a42SJes Sorensen 5998f88e1a42SJes Sorensen /* Create parameter list with default values */ 599983d0521aSChunyan Liu opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); 600039101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); 6001f88e1a42SJes Sorensen 6002f88e1a42SJes Sorensen /* Parse -o options */ 6003f88e1a42SJes Sorensen if (options) { 6004dc523cd3SMarkus Armbruster qemu_opts_do_parse(opts, options, NULL, &local_err); 6005dc523cd3SMarkus Armbruster if (local_err) { 6006dc523cd3SMarkus Armbruster error_report_err(local_err); 6007dc523cd3SMarkus Armbruster local_err = NULL; 600883d0521aSChunyan Liu error_setg(errp, "Invalid options for file format '%s'", fmt); 6009f88e1a42SJes Sorensen goto out; 6010f88e1a42SJes Sorensen } 6011f88e1a42SJes Sorensen } 6012f88e1a42SJes Sorensen 6013f88e1a42SJes Sorensen if (base_filename) { 6014f43e47dbSMarkus Armbruster qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err); 60156be4194bSMarkus Armbruster if (local_err) { 601671c79813SLuiz Capitulino error_setg(errp, "Backing file not supported for file format '%s'", 601771c79813SLuiz Capitulino fmt); 6018f88e1a42SJes Sorensen goto out; 6019f88e1a42SJes Sorensen } 6020f88e1a42SJes Sorensen } 6021f88e1a42SJes Sorensen 6022f88e1a42SJes Sorensen if (base_fmt) { 6023f43e47dbSMarkus Armbruster qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err); 60246be4194bSMarkus Armbruster if (local_err) { 602571c79813SLuiz Capitulino error_setg(errp, "Backing file format not supported for file " 602671c79813SLuiz Capitulino "format '%s'", fmt); 6027f88e1a42SJes Sorensen goto out; 6028f88e1a42SJes Sorensen } 6029f88e1a42SJes Sorensen } 6030f88e1a42SJes Sorensen 603183d0521aSChunyan Liu backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 603283d0521aSChunyan Liu if (backing_file) { 603383d0521aSChunyan Liu if (!strcmp(filename, backing_file)) { 603471c79813SLuiz Capitulino error_setg(errp, "Error: Trying to create an image with the " 603571c79813SLuiz Capitulino "same filename as the backing file"); 6036792da93aSJes Sorensen goto out; 6037792da93aSJes Sorensen } 6038792da93aSJes Sorensen } 6039792da93aSJes Sorensen 604083d0521aSChunyan Liu backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 604183d0521aSChunyan Liu if (backing_fmt) { 604283d0521aSChunyan Liu backing_drv = bdrv_find_format(backing_fmt); 604396df67d1SStefan Hajnoczi if (!backing_drv) { 604471c79813SLuiz Capitulino error_setg(errp, "Unknown backing file format '%s'", 604583d0521aSChunyan Liu backing_fmt); 6046f88e1a42SJes Sorensen goto out; 6047f88e1a42SJes Sorensen } 6048f88e1a42SJes Sorensen } 6049f88e1a42SJes Sorensen 6050f88e1a42SJes Sorensen // The size for the image must always be specified, with one exception: 6051f88e1a42SJes Sorensen // If we are using a backing file, we can obtain the size from there 605283d0521aSChunyan Liu size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 605383d0521aSChunyan Liu if (size == -1) { 605483d0521aSChunyan Liu if (backing_file) { 605566f6b814SMax Reitz BlockDriverState *bs; 605629168018SMax Reitz char *full_backing = g_new0(char, PATH_MAX); 605752bf1e72SMarkus Armbruster int64_t size; 605863090dacSPaolo Bonzini int back_flags; 605963090dacSPaolo Bonzini 606029168018SMax Reitz bdrv_get_full_backing_filename_from_filename(filename, backing_file, 606129168018SMax Reitz full_backing, PATH_MAX, 606229168018SMax Reitz &local_err); 606329168018SMax Reitz if (local_err) { 606429168018SMax Reitz g_free(full_backing); 606529168018SMax Reitz goto out; 606629168018SMax Reitz } 606729168018SMax Reitz 606863090dacSPaolo Bonzini /* backing files always opened read-only */ 606963090dacSPaolo Bonzini back_flags = 607063090dacSPaolo Bonzini flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); 6071f88e1a42SJes Sorensen 6072f67503e5SMax Reitz bs = NULL; 607329168018SMax Reitz ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags, 6074cc84d90fSMax Reitz backing_drv, &local_err); 607529168018SMax Reitz g_free(full_backing); 6076f88e1a42SJes Sorensen if (ret < 0) { 6077f88e1a42SJes Sorensen goto out; 6078f88e1a42SJes Sorensen } 607952bf1e72SMarkus Armbruster size = bdrv_getlength(bs); 608052bf1e72SMarkus Armbruster if (size < 0) { 608152bf1e72SMarkus Armbruster error_setg_errno(errp, -size, "Could not get size of '%s'", 608252bf1e72SMarkus Armbruster backing_file); 608352bf1e72SMarkus Armbruster bdrv_unref(bs); 608452bf1e72SMarkus Armbruster goto out; 608552bf1e72SMarkus Armbruster } 6086f88e1a42SJes Sorensen 608739101f25SMarkus Armbruster qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort); 608866f6b814SMax Reitz 608966f6b814SMax Reitz bdrv_unref(bs); 6090f88e1a42SJes Sorensen } else { 609171c79813SLuiz Capitulino error_setg(errp, "Image creation needs a size parameter"); 6092f88e1a42SJes Sorensen goto out; 6093f88e1a42SJes Sorensen } 6094f88e1a42SJes Sorensen } 6095f88e1a42SJes Sorensen 6096f382d43aSMiroslav Rezanina if (!quiet) { 6097f88e1a42SJes Sorensen printf("Formatting '%s', fmt=%s", filename, fmt); 609843c5d8f8SFam Zheng qemu_opts_print(opts, " "); 6099f88e1a42SJes Sorensen puts(""); 6100f382d43aSMiroslav Rezanina } 610183d0521aSChunyan Liu 6102c282e1fdSChunyan Liu ret = bdrv_create(drv, filename, opts, &local_err); 610383d0521aSChunyan Liu 6104cc84d90fSMax Reitz if (ret == -EFBIG) { 6105cc84d90fSMax Reitz /* This is generally a better message than whatever the driver would 6106cc84d90fSMax Reitz * deliver (especially because of the cluster_size_hint), since that 6107cc84d90fSMax Reitz * is most probably not much different from "image too large". */ 6108f3f4d2c0SKevin Wolf const char *cluster_size_hint = ""; 610983d0521aSChunyan Liu if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { 6110f3f4d2c0SKevin Wolf cluster_size_hint = " (try using a larger cluster size)"; 6111f3f4d2c0SKevin Wolf } 6112cc84d90fSMax Reitz error_setg(errp, "The image size is too large for file format '%s'" 6113cc84d90fSMax Reitz "%s", fmt, cluster_size_hint); 6114cc84d90fSMax Reitz error_free(local_err); 6115cc84d90fSMax Reitz local_err = NULL; 6116f88e1a42SJes Sorensen } 6117f88e1a42SJes Sorensen 6118f88e1a42SJes Sorensen out: 611983d0521aSChunyan Liu qemu_opts_del(opts); 612083d0521aSChunyan Liu qemu_opts_free(create_opts); 612184d18f06SMarkus Armbruster if (local_err) { 6122cc84d90fSMax Reitz error_propagate(errp, local_err); 6123cc84d90fSMax Reitz } 6124f88e1a42SJes Sorensen } 612585d126f3SStefan Hajnoczi 612685d126f3SStefan Hajnoczi AioContext *bdrv_get_aio_context(BlockDriverState *bs) 612785d126f3SStefan Hajnoczi { 6128dcd04228SStefan Hajnoczi return bs->aio_context; 6129dcd04228SStefan Hajnoczi } 6130dcd04228SStefan Hajnoczi 6131dcd04228SStefan Hajnoczi void bdrv_detach_aio_context(BlockDriverState *bs) 6132dcd04228SStefan Hajnoczi { 613333384421SMax Reitz BdrvAioNotifier *baf; 613433384421SMax Reitz 6135dcd04228SStefan Hajnoczi if (!bs->drv) { 6136dcd04228SStefan Hajnoczi return; 6137dcd04228SStefan Hajnoczi } 6138dcd04228SStefan Hajnoczi 613933384421SMax Reitz QLIST_FOREACH(baf, &bs->aio_notifiers, list) { 614033384421SMax Reitz baf->detach_aio_context(baf->opaque); 614133384421SMax Reitz } 614233384421SMax Reitz 614313af91ebSStefan Hajnoczi if (bs->io_limits_enabled) { 614413af91ebSStefan Hajnoczi throttle_detach_aio_context(&bs->throttle_state); 614513af91ebSStefan Hajnoczi } 6146dcd04228SStefan Hajnoczi if (bs->drv->bdrv_detach_aio_context) { 6147dcd04228SStefan Hajnoczi bs->drv->bdrv_detach_aio_context(bs); 6148dcd04228SStefan Hajnoczi } 6149dcd04228SStefan Hajnoczi if (bs->file) { 6150dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs->file); 6151dcd04228SStefan Hajnoczi } 6152dcd04228SStefan Hajnoczi if (bs->backing_hd) { 6153dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs->backing_hd); 6154dcd04228SStefan Hajnoczi } 6155dcd04228SStefan Hajnoczi 6156dcd04228SStefan Hajnoczi bs->aio_context = NULL; 6157dcd04228SStefan Hajnoczi } 6158dcd04228SStefan Hajnoczi 6159dcd04228SStefan Hajnoczi void bdrv_attach_aio_context(BlockDriverState *bs, 6160dcd04228SStefan Hajnoczi AioContext *new_context) 6161dcd04228SStefan Hajnoczi { 616233384421SMax Reitz BdrvAioNotifier *ban; 616333384421SMax Reitz 6164dcd04228SStefan Hajnoczi if (!bs->drv) { 6165dcd04228SStefan Hajnoczi return; 6166dcd04228SStefan Hajnoczi } 6167dcd04228SStefan Hajnoczi 6168dcd04228SStefan Hajnoczi bs->aio_context = new_context; 6169dcd04228SStefan Hajnoczi 6170dcd04228SStefan Hajnoczi if (bs->backing_hd) { 6171dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs->backing_hd, new_context); 6172dcd04228SStefan Hajnoczi } 6173dcd04228SStefan Hajnoczi if (bs->file) { 6174dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs->file, new_context); 6175dcd04228SStefan Hajnoczi } 6176dcd04228SStefan Hajnoczi if (bs->drv->bdrv_attach_aio_context) { 6177dcd04228SStefan Hajnoczi bs->drv->bdrv_attach_aio_context(bs, new_context); 6178dcd04228SStefan Hajnoczi } 617913af91ebSStefan Hajnoczi if (bs->io_limits_enabled) { 618013af91ebSStefan Hajnoczi throttle_attach_aio_context(&bs->throttle_state, new_context); 618113af91ebSStefan Hajnoczi } 618233384421SMax Reitz 618333384421SMax Reitz QLIST_FOREACH(ban, &bs->aio_notifiers, list) { 618433384421SMax Reitz ban->attached_aio_context(new_context, ban->opaque); 618533384421SMax Reitz } 6186dcd04228SStefan Hajnoczi } 6187dcd04228SStefan Hajnoczi 6188dcd04228SStefan Hajnoczi void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) 6189dcd04228SStefan Hajnoczi { 6190dcd04228SStefan Hajnoczi bdrv_drain_all(); /* ensure there are no in-flight requests */ 6191dcd04228SStefan Hajnoczi 6192dcd04228SStefan Hajnoczi bdrv_detach_aio_context(bs); 6193dcd04228SStefan Hajnoczi 6194dcd04228SStefan Hajnoczi /* This function executes in the old AioContext so acquire the new one in 6195dcd04228SStefan Hajnoczi * case it runs in a different thread. 6196dcd04228SStefan Hajnoczi */ 6197dcd04228SStefan Hajnoczi aio_context_acquire(new_context); 6198dcd04228SStefan Hajnoczi bdrv_attach_aio_context(bs, new_context); 6199dcd04228SStefan Hajnoczi aio_context_release(new_context); 620085d126f3SStefan Hajnoczi } 6201d616b224SStefan Hajnoczi 620233384421SMax Reitz void bdrv_add_aio_context_notifier(BlockDriverState *bs, 620333384421SMax Reitz void (*attached_aio_context)(AioContext *new_context, void *opaque), 620433384421SMax Reitz void (*detach_aio_context)(void *opaque), void *opaque) 620533384421SMax Reitz { 620633384421SMax Reitz BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1); 620733384421SMax Reitz *ban = (BdrvAioNotifier){ 620833384421SMax Reitz .attached_aio_context = attached_aio_context, 620933384421SMax Reitz .detach_aio_context = detach_aio_context, 621033384421SMax Reitz .opaque = opaque 621133384421SMax Reitz }; 621233384421SMax Reitz 621333384421SMax Reitz QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); 621433384421SMax Reitz } 621533384421SMax Reitz 621633384421SMax Reitz void bdrv_remove_aio_context_notifier(BlockDriverState *bs, 621733384421SMax Reitz void (*attached_aio_context)(AioContext *, 621833384421SMax Reitz void *), 621933384421SMax Reitz void (*detach_aio_context)(void *), 622033384421SMax Reitz void *opaque) 622133384421SMax Reitz { 622233384421SMax Reitz BdrvAioNotifier *ban, *ban_next; 622333384421SMax Reitz 622433384421SMax Reitz QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { 622533384421SMax Reitz if (ban->attached_aio_context == attached_aio_context && 622633384421SMax Reitz ban->detach_aio_context == detach_aio_context && 622733384421SMax Reitz ban->opaque == opaque) 622833384421SMax Reitz { 622933384421SMax Reitz QLIST_REMOVE(ban, list); 623033384421SMax Reitz g_free(ban); 623133384421SMax Reitz 623233384421SMax Reitz return; 623333384421SMax Reitz } 623433384421SMax Reitz } 623533384421SMax Reitz 623633384421SMax Reitz abort(); 623733384421SMax Reitz } 623833384421SMax Reitz 6239d616b224SStefan Hajnoczi void bdrv_add_before_write_notifier(BlockDriverState *bs, 6240d616b224SStefan Hajnoczi NotifierWithReturn *notifier) 6241d616b224SStefan Hajnoczi { 6242d616b224SStefan Hajnoczi notifier_with_return_list_add(&bs->before_write_notifiers, notifier); 6243d616b224SStefan Hajnoczi } 62446f176b48SMax Reitz 624577485434SMax Reitz int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, 624677485434SMax Reitz BlockDriverAmendStatusCB *status_cb) 62476f176b48SMax Reitz { 6248c282e1fdSChunyan Liu if (!bs->drv->bdrv_amend_options) { 62496f176b48SMax Reitz return -ENOTSUP; 62506f176b48SMax Reitz } 625177485434SMax Reitz return bs->drv->bdrv_amend_options(bs, opts, status_cb); 62526f176b48SMax Reitz } 6253f6186f49SBenoît Canet 6254b5042a36SBenoît Canet /* This function will be called by the bdrv_recurse_is_first_non_filter method 6255b5042a36SBenoît Canet * of block filter and by bdrv_is_first_non_filter. 6256b5042a36SBenoît Canet * It is used to test if the given bs is the candidate or recurse more in the 6257b5042a36SBenoît Canet * node graph. 6258212a5a8fSBenoît Canet */ 6259212a5a8fSBenoît Canet bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, 6260212a5a8fSBenoît Canet BlockDriverState *candidate) 6261f6186f49SBenoît Canet { 6262b5042a36SBenoît Canet /* return false if basic checks fails */ 6263b5042a36SBenoît Canet if (!bs || !bs->drv) { 6264b5042a36SBenoît Canet return false; 6265b5042a36SBenoît Canet } 6266b5042a36SBenoît Canet 6267b5042a36SBenoît Canet /* the code reached a non block filter driver -> check if the bs is 6268b5042a36SBenoît Canet * the same as the candidate. It's the recursion termination condition. 6269b5042a36SBenoît Canet */ 6270b5042a36SBenoît Canet if (!bs->drv->is_filter) { 6271b5042a36SBenoît Canet return bs == candidate; 6272b5042a36SBenoît Canet } 6273b5042a36SBenoît Canet /* Down this path the driver is a block filter driver */ 6274b5042a36SBenoît Canet 6275b5042a36SBenoît Canet /* If the block filter recursion method is defined use it to recurse down 6276b5042a36SBenoît Canet * the node graph. 6277b5042a36SBenoît Canet */ 6278b5042a36SBenoît Canet if (bs->drv->bdrv_recurse_is_first_non_filter) { 6279212a5a8fSBenoît Canet return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); 6280212a5a8fSBenoît Canet } 6281212a5a8fSBenoît Canet 6282b5042a36SBenoît Canet /* the driver is a block filter but don't allow to recurse -> return false 6283b5042a36SBenoît Canet */ 6284b5042a36SBenoît Canet return false; 6285212a5a8fSBenoît Canet } 6286212a5a8fSBenoît Canet 6287212a5a8fSBenoît Canet /* This function checks if the candidate is the first non filter bs down it's 6288212a5a8fSBenoît Canet * bs chain. Since we don't have pointers to parents it explore all bs chains 6289212a5a8fSBenoît Canet * from the top. Some filters can choose not to pass down the recursion. 6290212a5a8fSBenoît Canet */ 6291212a5a8fSBenoît Canet bool bdrv_is_first_non_filter(BlockDriverState *candidate) 6292212a5a8fSBenoît Canet { 6293212a5a8fSBenoît Canet BlockDriverState *bs; 6294212a5a8fSBenoît Canet 6295212a5a8fSBenoît Canet /* walk down the bs forest recursively */ 6296212a5a8fSBenoît Canet QTAILQ_FOREACH(bs, &bdrv_states, device_list) { 6297212a5a8fSBenoît Canet bool perm; 6298212a5a8fSBenoît Canet 6299b5042a36SBenoît Canet /* try to recurse in this top level bs */ 6300e6dc8a1fSKevin Wolf perm = bdrv_recurse_is_first_non_filter(bs, candidate); 6301212a5a8fSBenoît Canet 6302212a5a8fSBenoît Canet /* candidate is the first non filter */ 6303212a5a8fSBenoît Canet if (perm) { 6304212a5a8fSBenoît Canet return true; 6305212a5a8fSBenoît Canet } 6306212a5a8fSBenoît Canet } 6307212a5a8fSBenoît Canet 6308212a5a8fSBenoît Canet return false; 6309f6186f49SBenoît Canet } 631009158f00SBenoît Canet 631109158f00SBenoît Canet BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) 631209158f00SBenoît Canet { 631309158f00SBenoît Canet BlockDriverState *to_replace_bs = bdrv_find_node(node_name); 63145a7e7a0bSStefan Hajnoczi AioContext *aio_context; 63155a7e7a0bSStefan Hajnoczi 631609158f00SBenoît Canet if (!to_replace_bs) { 631709158f00SBenoît Canet error_setg(errp, "Node name '%s' not found", node_name); 631809158f00SBenoît Canet return NULL; 631909158f00SBenoît Canet } 632009158f00SBenoît Canet 63215a7e7a0bSStefan Hajnoczi aio_context = bdrv_get_aio_context(to_replace_bs); 63225a7e7a0bSStefan Hajnoczi aio_context_acquire(aio_context); 63235a7e7a0bSStefan Hajnoczi 632409158f00SBenoît Canet if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { 63255a7e7a0bSStefan Hajnoczi to_replace_bs = NULL; 63265a7e7a0bSStefan Hajnoczi goto out; 632709158f00SBenoît Canet } 632809158f00SBenoît Canet 632909158f00SBenoît Canet /* We don't want arbitrary node of the BDS chain to be replaced only the top 633009158f00SBenoît Canet * most non filter in order to prevent data corruption. 633109158f00SBenoît Canet * Another benefit is that this tests exclude backing files which are 633209158f00SBenoît Canet * blocked by the backing blockers. 633309158f00SBenoît Canet */ 633409158f00SBenoît Canet if (!bdrv_is_first_non_filter(to_replace_bs)) { 633509158f00SBenoît Canet error_setg(errp, "Only top most non filter can be replaced"); 63365a7e7a0bSStefan Hajnoczi to_replace_bs = NULL; 63375a7e7a0bSStefan Hajnoczi goto out; 633809158f00SBenoît Canet } 633909158f00SBenoît Canet 63405a7e7a0bSStefan Hajnoczi out: 63415a7e7a0bSStefan Hajnoczi aio_context_release(aio_context); 634209158f00SBenoît Canet return to_replace_bs; 634309158f00SBenoît Canet } 6344448ad91dSMing Lei 6345448ad91dSMing Lei void bdrv_io_plug(BlockDriverState *bs) 6346448ad91dSMing Lei { 6347448ad91dSMing Lei BlockDriver *drv = bs->drv; 6348448ad91dSMing Lei if (drv && drv->bdrv_io_plug) { 6349448ad91dSMing Lei drv->bdrv_io_plug(bs); 6350448ad91dSMing Lei } else if (bs->file) { 6351448ad91dSMing Lei bdrv_io_plug(bs->file); 6352448ad91dSMing Lei } 6353448ad91dSMing Lei } 6354448ad91dSMing Lei 6355448ad91dSMing Lei void bdrv_io_unplug(BlockDriverState *bs) 6356448ad91dSMing Lei { 6357448ad91dSMing Lei BlockDriver *drv = bs->drv; 6358448ad91dSMing Lei if (drv && drv->bdrv_io_unplug) { 6359448ad91dSMing Lei drv->bdrv_io_unplug(bs); 6360448ad91dSMing Lei } else if (bs->file) { 6361448ad91dSMing Lei bdrv_io_unplug(bs->file); 6362448ad91dSMing Lei } 6363448ad91dSMing Lei } 6364448ad91dSMing Lei 6365448ad91dSMing Lei void bdrv_flush_io_queue(BlockDriverState *bs) 6366448ad91dSMing Lei { 6367448ad91dSMing Lei BlockDriver *drv = bs->drv; 6368448ad91dSMing Lei if (drv && drv->bdrv_flush_io_queue) { 6369448ad91dSMing Lei drv->bdrv_flush_io_queue(bs); 6370448ad91dSMing Lei } else if (bs->file) { 6371448ad91dSMing Lei bdrv_flush_io_queue(bs->file); 6372448ad91dSMing Lei } 6373448ad91dSMing Lei } 637491af7014SMax Reitz 637591af7014SMax Reitz static bool append_open_options(QDict *d, BlockDriverState *bs) 637691af7014SMax Reitz { 637791af7014SMax Reitz const QDictEntry *entry; 637891af7014SMax Reitz bool found_any = false; 637991af7014SMax Reitz 638091af7014SMax Reitz for (entry = qdict_first(bs->options); entry; 638191af7014SMax Reitz entry = qdict_next(bs->options, entry)) 638291af7014SMax Reitz { 638391af7014SMax Reitz /* Only take options for this level and exclude all non-driver-specific 638491af7014SMax Reitz * options */ 638591af7014SMax Reitz if (!strchr(qdict_entry_key(entry), '.') && 638691af7014SMax Reitz strcmp(qdict_entry_key(entry), "node-name")) 638791af7014SMax Reitz { 638891af7014SMax Reitz qobject_incref(qdict_entry_value(entry)); 638991af7014SMax Reitz qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry)); 639091af7014SMax Reitz found_any = true; 639191af7014SMax Reitz } 639291af7014SMax Reitz } 639391af7014SMax Reitz 639491af7014SMax Reitz return found_any; 639591af7014SMax Reitz } 639691af7014SMax Reitz 639791af7014SMax Reitz /* Updates the following BDS fields: 639891af7014SMax Reitz * - exact_filename: A filename which may be used for opening a block device 639991af7014SMax Reitz * which (mostly) equals the given BDS (even without any 640091af7014SMax Reitz * other options; so reading and writing must return the same 640191af7014SMax Reitz * results, but caching etc. may be different) 640291af7014SMax Reitz * - full_open_options: Options which, when given when opening a block device 640391af7014SMax Reitz * (without a filename), result in a BDS (mostly) 640491af7014SMax Reitz * equalling the given one 640591af7014SMax Reitz * - filename: If exact_filename is set, it is copied here. Otherwise, 640691af7014SMax Reitz * full_open_options is converted to a JSON object, prefixed with 640791af7014SMax Reitz * "json:" (for use through the JSON pseudo protocol) and put here. 640891af7014SMax Reitz */ 640991af7014SMax Reitz void bdrv_refresh_filename(BlockDriverState *bs) 641091af7014SMax Reitz { 641191af7014SMax Reitz BlockDriver *drv = bs->drv; 641291af7014SMax Reitz QDict *opts; 641391af7014SMax Reitz 641491af7014SMax Reitz if (!drv) { 641591af7014SMax Reitz return; 641691af7014SMax Reitz } 641791af7014SMax Reitz 641891af7014SMax Reitz /* This BDS's file name will most probably depend on its file's name, so 641991af7014SMax Reitz * refresh that first */ 642091af7014SMax Reitz if (bs->file) { 642191af7014SMax Reitz bdrv_refresh_filename(bs->file); 642291af7014SMax Reitz } 642391af7014SMax Reitz 642491af7014SMax Reitz if (drv->bdrv_refresh_filename) { 642591af7014SMax Reitz /* Obsolete information is of no use here, so drop the old file name 642691af7014SMax Reitz * information before refreshing it */ 642791af7014SMax Reitz bs->exact_filename[0] = '\0'; 642891af7014SMax Reitz if (bs->full_open_options) { 642991af7014SMax Reitz QDECREF(bs->full_open_options); 643091af7014SMax Reitz bs->full_open_options = NULL; 643191af7014SMax Reitz } 643291af7014SMax Reitz 643391af7014SMax Reitz drv->bdrv_refresh_filename(bs); 643491af7014SMax Reitz } else if (bs->file) { 643591af7014SMax Reitz /* Try to reconstruct valid information from the underlying file */ 643691af7014SMax Reitz bool has_open_options; 643791af7014SMax Reitz 643891af7014SMax Reitz bs->exact_filename[0] = '\0'; 643991af7014SMax Reitz if (bs->full_open_options) { 644091af7014SMax Reitz QDECREF(bs->full_open_options); 644191af7014SMax Reitz bs->full_open_options = NULL; 644291af7014SMax Reitz } 644391af7014SMax Reitz 644491af7014SMax Reitz opts = qdict_new(); 644591af7014SMax Reitz has_open_options = append_open_options(opts, bs); 644691af7014SMax Reitz 644791af7014SMax Reitz /* If no specific options have been given for this BDS, the filename of 644891af7014SMax Reitz * the underlying file should suffice for this one as well */ 644991af7014SMax Reitz if (bs->file->exact_filename[0] && !has_open_options) { 645091af7014SMax Reitz strcpy(bs->exact_filename, bs->file->exact_filename); 645191af7014SMax Reitz } 645291af7014SMax Reitz /* Reconstructing the full options QDict is simple for most format block 645391af7014SMax Reitz * drivers, as long as the full options are known for the underlying 645491af7014SMax Reitz * file BDS. The full options QDict of that file BDS should somehow 645591af7014SMax Reitz * contain a representation of the filename, therefore the following 645691af7014SMax Reitz * suffices without querying the (exact_)filename of this BDS. */ 645791af7014SMax Reitz if (bs->file->full_open_options) { 645891af7014SMax Reitz qdict_put_obj(opts, "driver", 645991af7014SMax Reitz QOBJECT(qstring_from_str(drv->format_name))); 646091af7014SMax Reitz QINCREF(bs->file->full_open_options); 646191af7014SMax Reitz qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options)); 646291af7014SMax Reitz 646391af7014SMax Reitz bs->full_open_options = opts; 646491af7014SMax Reitz } else { 646591af7014SMax Reitz QDECREF(opts); 646691af7014SMax Reitz } 646791af7014SMax Reitz } else if (!bs->full_open_options && qdict_size(bs->options)) { 646891af7014SMax Reitz /* There is no underlying file BDS (at least referenced by BDS.file), 646991af7014SMax Reitz * so the full options QDict should be equal to the options given 647091af7014SMax Reitz * specifically for this block device when it was opened (plus the 647191af7014SMax Reitz * driver specification). 647291af7014SMax Reitz * Because those options don't change, there is no need to update 647391af7014SMax Reitz * full_open_options when it's already set. */ 647491af7014SMax Reitz 647591af7014SMax Reitz opts = qdict_new(); 647691af7014SMax Reitz append_open_options(opts, bs); 647791af7014SMax Reitz qdict_put_obj(opts, "driver", 647891af7014SMax Reitz QOBJECT(qstring_from_str(drv->format_name))); 647991af7014SMax Reitz 648091af7014SMax Reitz if (bs->exact_filename[0]) { 648191af7014SMax Reitz /* This may not work for all block protocol drivers (some may 648291af7014SMax Reitz * require this filename to be parsed), but we have to find some 648391af7014SMax Reitz * default solution here, so just include it. If some block driver 648491af7014SMax Reitz * does not support pure options without any filename at all or 648591af7014SMax Reitz * needs some special format of the options QDict, it needs to 648691af7014SMax Reitz * implement the driver-specific bdrv_refresh_filename() function. 648791af7014SMax Reitz */ 648891af7014SMax Reitz qdict_put_obj(opts, "filename", 648991af7014SMax Reitz QOBJECT(qstring_from_str(bs->exact_filename))); 649091af7014SMax Reitz } 649191af7014SMax Reitz 649291af7014SMax Reitz bs->full_open_options = opts; 649391af7014SMax Reitz } 649491af7014SMax Reitz 649591af7014SMax Reitz if (bs->exact_filename[0]) { 649691af7014SMax Reitz pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); 649791af7014SMax Reitz } else if (bs->full_open_options) { 649891af7014SMax Reitz QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); 649991af7014SMax Reitz snprintf(bs->filename, sizeof(bs->filename), "json:%s", 650091af7014SMax Reitz qstring_get_str(json)); 650191af7014SMax Reitz QDECREF(json); 650291af7014SMax Reitz } 650391af7014SMax Reitz } 65045366d0c8SBenoît Canet 65055366d0c8SBenoît Canet /* This accessor function purpose is to allow the device models to access the 65065366d0c8SBenoît Canet * BlockAcctStats structure embedded inside a BlockDriverState without being 65075366d0c8SBenoît Canet * aware of the BlockDriverState structure layout. 65085366d0c8SBenoît Canet * It will go away when the BlockAcctStats structure will be moved inside 65095366d0c8SBenoît Canet * the device models. 65105366d0c8SBenoît Canet */ 65115366d0c8SBenoît Canet BlockAcctStats *bdrv_get_stats(BlockDriverState *bs) 65125366d0c8SBenoît Canet { 65135366d0c8SBenoît Canet return &bs->stats; 65145366d0c8SBenoît Canet } 6515