1d003e0aeSVladimir Sementsov-Ogievskiy /* 2d003e0aeSVladimir Sementsov-Ogievskiy * copy-before-write filter driver 3d003e0aeSVladimir Sementsov-Ogievskiy * 4d003e0aeSVladimir Sementsov-Ogievskiy * The driver performs Copy-Before-Write (CBW) operation: it is injected above 5d003e0aeSVladimir Sementsov-Ogievskiy * some node, and before each write it copies _old_ data to the target node. 6d003e0aeSVladimir Sementsov-Ogievskiy * 7d003e0aeSVladimir Sementsov-Ogievskiy * Copyright (c) 2018-2021 Virtuozzo International GmbH. 8d003e0aeSVladimir Sementsov-Ogievskiy * 9d003e0aeSVladimir Sementsov-Ogievskiy * Author: 10d003e0aeSVladimir Sementsov-Ogievskiy * Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com> 11d003e0aeSVladimir Sementsov-Ogievskiy * 12d003e0aeSVladimir Sementsov-Ogievskiy * This program is free software; you can redistribute it and/or modify 13d003e0aeSVladimir Sementsov-Ogievskiy * it under the terms of the GNU General Public License as published by 14d003e0aeSVladimir Sementsov-Ogievskiy * the Free Software Foundation; either version 2 of the License, or 15d003e0aeSVladimir Sementsov-Ogievskiy * (at your option) any later version. 16d003e0aeSVladimir Sementsov-Ogievskiy * 17d003e0aeSVladimir Sementsov-Ogievskiy * This program is distributed in the hope that it will be useful, 18d003e0aeSVladimir Sementsov-Ogievskiy * but WITHOUT ANY WARRANTY; without even the implied warranty of 19d003e0aeSVladimir Sementsov-Ogievskiy * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20d003e0aeSVladimir Sementsov-Ogievskiy * GNU General Public License for more details. 21d003e0aeSVladimir Sementsov-Ogievskiy * 22d003e0aeSVladimir Sementsov-Ogievskiy * You should have received a copy of the GNU General Public License 23d003e0aeSVladimir Sementsov-Ogievskiy * along with this program. If not, see <http://www.gnu.org/licenses/>. 24d003e0aeSVladimir Sementsov-Ogievskiy */ 25d003e0aeSVladimir Sementsov-Ogievskiy 26d003e0aeSVladimir Sementsov-Ogievskiy #include "qemu/osdep.h" 27*79ef0cebSVladimir Sementsov-Ogievskiy #include "qapi/qmp/qjson.h" 28d003e0aeSVladimir Sementsov-Ogievskiy 29d003e0aeSVladimir Sementsov-Ogievskiy #include "sysemu/block-backend.h" 30d003e0aeSVladimir Sementsov-Ogievskiy #include "qemu/cutils.h" 31d003e0aeSVladimir Sementsov-Ogievskiy #include "qapi/error.h" 32d003e0aeSVladimir Sementsov-Ogievskiy #include "block/block_int.h" 33d003e0aeSVladimir Sementsov-Ogievskiy #include "block/qdict.h" 34d003e0aeSVladimir Sementsov-Ogievskiy #include "block/block-copy.h" 35d003e0aeSVladimir Sementsov-Ogievskiy 36d003e0aeSVladimir Sementsov-Ogievskiy #include "block/copy-before-write.h" 37af5bcd77SVladimir Sementsov-Ogievskiy #include "block/reqlist.h" 38d003e0aeSVladimir Sementsov-Ogievskiy 395f3a3cd7SVladimir Sementsov-Ogievskiy #include "qapi/qapi-visit-block-core.h" 405f3a3cd7SVladimir Sementsov-Ogievskiy 41d003e0aeSVladimir Sementsov-Ogievskiy typedef struct BDRVCopyBeforeWriteState { 42d003e0aeSVladimir Sementsov-Ogievskiy BlockCopyState *bcs; 43d003e0aeSVladimir Sementsov-Ogievskiy BdrvChild *target; 44af5bcd77SVladimir Sementsov-Ogievskiy 45af5bcd77SVladimir Sementsov-Ogievskiy /* 46af5bcd77SVladimir Sementsov-Ogievskiy * @lock: protects access to @access_bitmap, @done_bitmap and 47af5bcd77SVladimir Sementsov-Ogievskiy * @frozen_read_reqs 48af5bcd77SVladimir Sementsov-Ogievskiy */ 49af5bcd77SVladimir Sementsov-Ogievskiy CoMutex lock; 50af5bcd77SVladimir Sementsov-Ogievskiy 51af5bcd77SVladimir Sementsov-Ogievskiy /* 52af5bcd77SVladimir Sementsov-Ogievskiy * @access_bitmap: represents areas allowed for reading by fleecing user. 53af5bcd77SVladimir Sementsov-Ogievskiy * Reading from non-dirty areas leads to -EACCES. 54af5bcd77SVladimir Sementsov-Ogievskiy */ 55af5bcd77SVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *access_bitmap; 56af5bcd77SVladimir Sementsov-Ogievskiy 57af5bcd77SVladimir Sementsov-Ogievskiy /* 58af5bcd77SVladimir Sementsov-Ogievskiy * @done_bitmap: represents areas that was successfully copied to @target by 59af5bcd77SVladimir Sementsov-Ogievskiy * copy-before-write operations. 60af5bcd77SVladimir Sementsov-Ogievskiy */ 61af5bcd77SVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *done_bitmap; 62af5bcd77SVladimir Sementsov-Ogievskiy 63af5bcd77SVladimir Sementsov-Ogievskiy /* 64af5bcd77SVladimir Sementsov-Ogievskiy * @frozen_read_reqs: current read requests for fleecing user in bs->file 65af5bcd77SVladimir Sementsov-Ogievskiy * node. These areas must not be rewritten by guest. 66af5bcd77SVladimir Sementsov-Ogievskiy */ 67af5bcd77SVladimir Sementsov-Ogievskiy BlockReqList frozen_read_reqs; 68d003e0aeSVladimir Sementsov-Ogievskiy } BDRVCopyBeforeWriteState; 69d003e0aeSVladimir Sementsov-Ogievskiy 70d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_co_preadv( 71f7ef38ddSVladimir Sementsov-Ogievskiy BlockDriverState *bs, int64_t offset, int64_t bytes, 72f7ef38ddSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, BdrvRequestFlags flags) 73d003e0aeSVladimir Sementsov-Ogievskiy { 743c1e6327SVladimir Sementsov-Ogievskiy return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); 75d003e0aeSVladimir Sementsov-Ogievskiy } 76d003e0aeSVladimir Sementsov-Ogievskiy 77af5bcd77SVladimir Sementsov-Ogievskiy /* 78af5bcd77SVladimir Sementsov-Ogievskiy * Do copy-before-write operation. 79af5bcd77SVladimir Sementsov-Ogievskiy * 80af5bcd77SVladimir Sementsov-Ogievskiy * On failure guest request must be failed too. 81af5bcd77SVladimir Sementsov-Ogievskiy * 82af5bcd77SVladimir Sementsov-Ogievskiy * On success, we also wait for all in-flight fleecing read requests in source 83af5bcd77SVladimir Sementsov-Ogievskiy * node, and it's guaranteed that after cbw_do_copy_before_write() successful 84af5bcd77SVladimir Sementsov-Ogievskiy * return there are no such requests and they will never appear. 85af5bcd77SVladimir Sementsov-Ogievskiy */ 86d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs, 87d003e0aeSVladimir Sementsov-Ogievskiy uint64_t offset, uint64_t bytes, BdrvRequestFlags flags) 88d003e0aeSVladimir Sementsov-Ogievskiy { 89d003e0aeSVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 90af5bcd77SVladimir Sementsov-Ogievskiy int ret; 91d003e0aeSVladimir Sementsov-Ogievskiy uint64_t off, end; 92b518e9e9SVladimir Sementsov-Ogievskiy int64_t cluster_size = block_copy_cluster_size(s->bcs); 93d003e0aeSVladimir Sementsov-Ogievskiy 94d003e0aeSVladimir Sementsov-Ogievskiy if (flags & BDRV_REQ_WRITE_UNCHANGED) { 95d003e0aeSVladimir Sementsov-Ogievskiy return 0; 96d003e0aeSVladimir Sementsov-Ogievskiy } 97d003e0aeSVladimir Sementsov-Ogievskiy 98b518e9e9SVladimir Sementsov-Ogievskiy off = QEMU_ALIGN_DOWN(offset, cluster_size); 99b518e9e9SVladimir Sementsov-Ogievskiy end = QEMU_ALIGN_UP(offset + bytes, cluster_size); 100d003e0aeSVladimir Sementsov-Ogievskiy 101af5bcd77SVladimir Sementsov-Ogievskiy ret = block_copy(s->bcs, off, end - off, true); 102af5bcd77SVladimir Sementsov-Ogievskiy if (ret < 0) { 103af5bcd77SVladimir Sementsov-Ogievskiy return ret; 104af5bcd77SVladimir Sementsov-Ogievskiy } 105af5bcd77SVladimir Sementsov-Ogievskiy 106af5bcd77SVladimir Sementsov-Ogievskiy WITH_QEMU_LOCK_GUARD(&s->lock) { 107af5bcd77SVladimir Sementsov-Ogievskiy bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off); 108af5bcd77SVladimir Sementsov-Ogievskiy reqlist_wait_all(&s->frozen_read_reqs, off, end - off, &s->lock); 109af5bcd77SVladimir Sementsov-Ogievskiy } 110af5bcd77SVladimir Sementsov-Ogievskiy 111af5bcd77SVladimir Sementsov-Ogievskiy return 0; 112d003e0aeSVladimir Sementsov-Ogievskiy } 113d003e0aeSVladimir Sementsov-Ogievskiy 114d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs, 1150c802287SVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes) 116d003e0aeSVladimir Sementsov-Ogievskiy { 117d003e0aeSVladimir Sementsov-Ogievskiy int ret = cbw_do_copy_before_write(bs, offset, bytes, 0); 118d003e0aeSVladimir Sementsov-Ogievskiy if (ret < 0) { 119d003e0aeSVladimir Sementsov-Ogievskiy return ret; 120d003e0aeSVladimir Sementsov-Ogievskiy } 121d003e0aeSVladimir Sementsov-Ogievskiy 1223c1e6327SVladimir Sementsov-Ogievskiy return bdrv_co_pdiscard(bs->file, offset, bytes); 123d003e0aeSVladimir Sementsov-Ogievskiy } 124d003e0aeSVladimir Sementsov-Ogievskiy 125d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs, 126f34b2bcfSVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes, BdrvRequestFlags flags) 127d003e0aeSVladimir Sementsov-Ogievskiy { 128d003e0aeSVladimir Sementsov-Ogievskiy int ret = cbw_do_copy_before_write(bs, offset, bytes, flags); 129d003e0aeSVladimir Sementsov-Ogievskiy if (ret < 0) { 130d003e0aeSVladimir Sementsov-Ogievskiy return ret; 131d003e0aeSVladimir Sementsov-Ogievskiy } 132d003e0aeSVladimir Sementsov-Ogievskiy 1333c1e6327SVladimir Sementsov-Ogievskiy return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); 134d003e0aeSVladimir Sementsov-Ogievskiy } 135d003e0aeSVladimir Sementsov-Ogievskiy 136d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs, 137e75abedaSVladimir Sementsov-Ogievskiy int64_t offset, 138e75abedaSVladimir Sementsov-Ogievskiy int64_t bytes, 139e75abedaSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, 140e75abedaSVladimir Sementsov-Ogievskiy BdrvRequestFlags flags) 141d003e0aeSVladimir Sementsov-Ogievskiy { 142d003e0aeSVladimir Sementsov-Ogievskiy int ret = cbw_do_copy_before_write(bs, offset, bytes, flags); 143d003e0aeSVladimir Sementsov-Ogievskiy if (ret < 0) { 144d003e0aeSVladimir Sementsov-Ogievskiy return ret; 145d003e0aeSVladimir Sementsov-Ogievskiy } 146d003e0aeSVladimir Sementsov-Ogievskiy 1473c1e6327SVladimir Sementsov-Ogievskiy return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); 148d003e0aeSVladimir Sementsov-Ogievskiy } 149d003e0aeSVladimir Sementsov-Ogievskiy 150d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_flush(BlockDriverState *bs) 151d003e0aeSVladimir Sementsov-Ogievskiy { 1523c1e6327SVladimir Sementsov-Ogievskiy if (!bs->file) { 153d003e0aeSVladimir Sementsov-Ogievskiy return 0; 154d003e0aeSVladimir Sementsov-Ogievskiy } 155d003e0aeSVladimir Sementsov-Ogievskiy 1563c1e6327SVladimir Sementsov-Ogievskiy return bdrv_co_flush(bs->file->bs); 157d003e0aeSVladimir Sementsov-Ogievskiy } 158d003e0aeSVladimir Sementsov-Ogievskiy 159af5bcd77SVladimir Sementsov-Ogievskiy /* 160af5bcd77SVladimir Sementsov-Ogievskiy * If @offset not accessible - return NULL. 161af5bcd77SVladimir Sementsov-Ogievskiy * 162af5bcd77SVladimir Sementsov-Ogievskiy * Otherwise, set @pnum to some bytes that accessible from @file (@file is set 163af5bcd77SVladimir Sementsov-Ogievskiy * to bs->file or to s->target). Return newly allocated BlockReq object that 164af5bcd77SVladimir Sementsov-Ogievskiy * should be than passed to cbw_snapshot_read_unlock(). 165af5bcd77SVladimir Sementsov-Ogievskiy * 166af5bcd77SVladimir Sementsov-Ogievskiy * It's guaranteed that guest writes will not interact in the region until 167af5bcd77SVladimir Sementsov-Ogievskiy * cbw_snapshot_read_unlock() called. 168af5bcd77SVladimir Sementsov-Ogievskiy */ 169af5bcd77SVladimir Sementsov-Ogievskiy static BlockReq *cbw_snapshot_read_lock(BlockDriverState *bs, 170af5bcd77SVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes, 171af5bcd77SVladimir Sementsov-Ogievskiy int64_t *pnum, BdrvChild **file) 172af5bcd77SVladimir Sementsov-Ogievskiy { 173af5bcd77SVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 174af5bcd77SVladimir Sementsov-Ogievskiy BlockReq *req = g_new(BlockReq, 1); 175af5bcd77SVladimir Sementsov-Ogievskiy bool done; 176af5bcd77SVladimir Sementsov-Ogievskiy 177af5bcd77SVladimir Sementsov-Ogievskiy QEMU_LOCK_GUARD(&s->lock); 178af5bcd77SVladimir Sementsov-Ogievskiy 179af5bcd77SVladimir Sementsov-Ogievskiy if (bdrv_dirty_bitmap_next_zero(s->access_bitmap, offset, bytes) != -1) { 180af5bcd77SVladimir Sementsov-Ogievskiy g_free(req); 181af5bcd77SVladimir Sementsov-Ogievskiy return NULL; 182af5bcd77SVladimir Sementsov-Ogievskiy } 183af5bcd77SVladimir Sementsov-Ogievskiy 184af5bcd77SVladimir Sementsov-Ogievskiy done = bdrv_dirty_bitmap_status(s->done_bitmap, offset, bytes, pnum); 185af5bcd77SVladimir Sementsov-Ogievskiy if (done) { 186af5bcd77SVladimir Sementsov-Ogievskiy /* 187af5bcd77SVladimir Sementsov-Ogievskiy * Special invalid BlockReq, that is handled in 188af5bcd77SVladimir Sementsov-Ogievskiy * cbw_snapshot_read_unlock(). We don't need to lock something to read 189af5bcd77SVladimir Sementsov-Ogievskiy * from s->target. 190af5bcd77SVladimir Sementsov-Ogievskiy */ 191af5bcd77SVladimir Sementsov-Ogievskiy *req = (BlockReq) {.offset = -1, .bytes = -1}; 192af5bcd77SVladimir Sementsov-Ogievskiy *file = s->target; 193af5bcd77SVladimir Sementsov-Ogievskiy } else { 194af5bcd77SVladimir Sementsov-Ogievskiy reqlist_init_req(&s->frozen_read_reqs, req, offset, bytes); 195af5bcd77SVladimir Sementsov-Ogievskiy *file = bs->file; 196af5bcd77SVladimir Sementsov-Ogievskiy } 197af5bcd77SVladimir Sementsov-Ogievskiy 198af5bcd77SVladimir Sementsov-Ogievskiy return req; 199af5bcd77SVladimir Sementsov-Ogievskiy } 200af5bcd77SVladimir Sementsov-Ogievskiy 201af5bcd77SVladimir Sementsov-Ogievskiy static void cbw_snapshot_read_unlock(BlockDriverState *bs, BlockReq *req) 202af5bcd77SVladimir Sementsov-Ogievskiy { 203af5bcd77SVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 204af5bcd77SVladimir Sementsov-Ogievskiy 205af5bcd77SVladimir Sementsov-Ogievskiy if (req->offset == -1 && req->bytes == -1) { 206af5bcd77SVladimir Sementsov-Ogievskiy g_free(req); 207af5bcd77SVladimir Sementsov-Ogievskiy return; 208af5bcd77SVladimir Sementsov-Ogievskiy } 209af5bcd77SVladimir Sementsov-Ogievskiy 210af5bcd77SVladimir Sementsov-Ogievskiy QEMU_LOCK_GUARD(&s->lock); 211af5bcd77SVladimir Sementsov-Ogievskiy 212af5bcd77SVladimir Sementsov-Ogievskiy reqlist_remove_req(req); 213af5bcd77SVladimir Sementsov-Ogievskiy g_free(req); 214af5bcd77SVladimir Sementsov-Ogievskiy } 215af5bcd77SVladimir Sementsov-Ogievskiy 216af5bcd77SVladimir Sementsov-Ogievskiy static coroutine_fn int 217af5bcd77SVladimir Sementsov-Ogievskiy cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes, 218af5bcd77SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, size_t qiov_offset) 219af5bcd77SVladimir Sementsov-Ogievskiy { 220af5bcd77SVladimir Sementsov-Ogievskiy BlockReq *req; 221af5bcd77SVladimir Sementsov-Ogievskiy BdrvChild *file; 222af5bcd77SVladimir Sementsov-Ogievskiy int ret; 223af5bcd77SVladimir Sementsov-Ogievskiy 224af5bcd77SVladimir Sementsov-Ogievskiy /* TODO: upgrade to async loop using AioTask */ 225af5bcd77SVladimir Sementsov-Ogievskiy while (bytes) { 226af5bcd77SVladimir Sementsov-Ogievskiy int64_t cur_bytes; 227af5bcd77SVladimir Sementsov-Ogievskiy 228af5bcd77SVladimir Sementsov-Ogievskiy req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &file); 229af5bcd77SVladimir Sementsov-Ogievskiy if (!req) { 230af5bcd77SVladimir Sementsov-Ogievskiy return -EACCES; 231af5bcd77SVladimir Sementsov-Ogievskiy } 232af5bcd77SVladimir Sementsov-Ogievskiy 233af5bcd77SVladimir Sementsov-Ogievskiy ret = bdrv_co_preadv_part(file, offset, cur_bytes, 234af5bcd77SVladimir Sementsov-Ogievskiy qiov, qiov_offset, 0); 235af5bcd77SVladimir Sementsov-Ogievskiy cbw_snapshot_read_unlock(bs, req); 236af5bcd77SVladimir Sementsov-Ogievskiy if (ret < 0) { 237af5bcd77SVladimir Sementsov-Ogievskiy return ret; 238af5bcd77SVladimir Sementsov-Ogievskiy } 239af5bcd77SVladimir Sementsov-Ogievskiy 240af5bcd77SVladimir Sementsov-Ogievskiy bytes -= cur_bytes; 241af5bcd77SVladimir Sementsov-Ogievskiy offset += cur_bytes; 242af5bcd77SVladimir Sementsov-Ogievskiy qiov_offset += cur_bytes; 243af5bcd77SVladimir Sementsov-Ogievskiy } 244af5bcd77SVladimir Sementsov-Ogievskiy 245af5bcd77SVladimir Sementsov-Ogievskiy return 0; 246af5bcd77SVladimir Sementsov-Ogievskiy } 247af5bcd77SVladimir Sementsov-Ogievskiy 248af5bcd77SVladimir Sementsov-Ogievskiy static int coroutine_fn 249af5bcd77SVladimir Sementsov-Ogievskiy cbw_co_snapshot_block_status(BlockDriverState *bs, 250af5bcd77SVladimir Sementsov-Ogievskiy bool want_zero, int64_t offset, int64_t bytes, 251af5bcd77SVladimir Sementsov-Ogievskiy int64_t *pnum, int64_t *map, 252af5bcd77SVladimir Sementsov-Ogievskiy BlockDriverState **file) 253af5bcd77SVladimir Sementsov-Ogievskiy { 254af5bcd77SVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 255af5bcd77SVladimir Sementsov-Ogievskiy BlockReq *req; 256af5bcd77SVladimir Sementsov-Ogievskiy int ret; 257af5bcd77SVladimir Sementsov-Ogievskiy int64_t cur_bytes; 258af5bcd77SVladimir Sementsov-Ogievskiy BdrvChild *child; 259af5bcd77SVladimir Sementsov-Ogievskiy 260af5bcd77SVladimir Sementsov-Ogievskiy req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &child); 261af5bcd77SVladimir Sementsov-Ogievskiy if (!req) { 262af5bcd77SVladimir Sementsov-Ogievskiy return -EACCES; 263af5bcd77SVladimir Sementsov-Ogievskiy } 264af5bcd77SVladimir Sementsov-Ogievskiy 265af5bcd77SVladimir Sementsov-Ogievskiy ret = bdrv_block_status(child->bs, offset, cur_bytes, pnum, map, file); 266af5bcd77SVladimir Sementsov-Ogievskiy if (child == s->target) { 267af5bcd77SVladimir Sementsov-Ogievskiy /* 268af5bcd77SVladimir Sementsov-Ogievskiy * We refer to s->target only for areas that we've written to it. 269af5bcd77SVladimir Sementsov-Ogievskiy * And we can not report unallocated blocks in s->target: this will 270af5bcd77SVladimir Sementsov-Ogievskiy * break generic block-status-above logic, that will go to 271af5bcd77SVladimir Sementsov-Ogievskiy * copy-before-write filtered child in this case. 272af5bcd77SVladimir Sementsov-Ogievskiy */ 273af5bcd77SVladimir Sementsov-Ogievskiy assert(ret & BDRV_BLOCK_ALLOCATED); 274af5bcd77SVladimir Sementsov-Ogievskiy } 275af5bcd77SVladimir Sementsov-Ogievskiy 276af5bcd77SVladimir Sementsov-Ogievskiy cbw_snapshot_read_unlock(bs, req); 277af5bcd77SVladimir Sementsov-Ogievskiy 278af5bcd77SVladimir Sementsov-Ogievskiy return ret; 279af5bcd77SVladimir Sementsov-Ogievskiy } 280af5bcd77SVladimir Sementsov-Ogievskiy 281af5bcd77SVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pdiscard_snapshot(BlockDriverState *bs, 282af5bcd77SVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes) 283af5bcd77SVladimir Sementsov-Ogievskiy { 284af5bcd77SVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 285af5bcd77SVladimir Sementsov-Ogievskiy 286af5bcd77SVladimir Sementsov-Ogievskiy WITH_QEMU_LOCK_GUARD(&s->lock) { 287af5bcd77SVladimir Sementsov-Ogievskiy bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes); 288af5bcd77SVladimir Sementsov-Ogievskiy } 289af5bcd77SVladimir Sementsov-Ogievskiy 290af5bcd77SVladimir Sementsov-Ogievskiy block_copy_reset(s->bcs, offset, bytes); 291af5bcd77SVladimir Sementsov-Ogievskiy 292af5bcd77SVladimir Sementsov-Ogievskiy return bdrv_co_pdiscard(s->target, offset, bytes); 293af5bcd77SVladimir Sementsov-Ogievskiy } 294af5bcd77SVladimir Sementsov-Ogievskiy 295d003e0aeSVladimir Sementsov-Ogievskiy static void cbw_refresh_filename(BlockDriverState *bs) 296d003e0aeSVladimir Sementsov-Ogievskiy { 297d003e0aeSVladimir Sementsov-Ogievskiy pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), 2983c1e6327SVladimir Sementsov-Ogievskiy bs->file->bs->filename); 299d003e0aeSVladimir Sementsov-Ogievskiy } 300d003e0aeSVladimir Sementsov-Ogievskiy 301d003e0aeSVladimir Sementsov-Ogievskiy static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c, 302d003e0aeSVladimir Sementsov-Ogievskiy BdrvChildRole role, 303d003e0aeSVladimir Sementsov-Ogievskiy BlockReopenQueue *reopen_queue, 304d003e0aeSVladimir Sementsov-Ogievskiy uint64_t perm, uint64_t shared, 305d003e0aeSVladimir Sementsov-Ogievskiy uint64_t *nperm, uint64_t *nshared) 306d003e0aeSVladimir Sementsov-Ogievskiy { 307d003e0aeSVladimir Sementsov-Ogievskiy if (!(role & BDRV_CHILD_FILTERED)) { 308d003e0aeSVladimir Sementsov-Ogievskiy /* 309d003e0aeSVladimir Sementsov-Ogievskiy * Target child 310d003e0aeSVladimir Sementsov-Ogievskiy * 311d003e0aeSVladimir Sementsov-Ogievskiy * Share write to target (child_file), to not interfere 312d003e0aeSVladimir Sementsov-Ogievskiy * with guest writes to its disk which may be in target backing chain. 313d003e0aeSVladimir Sementsov-Ogievskiy * Can't resize during a backup block job because we check the size 314d003e0aeSVladimir Sementsov-Ogievskiy * only upfront. 315d003e0aeSVladimir Sementsov-Ogievskiy */ 316d003e0aeSVladimir Sementsov-Ogievskiy *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; 317d003e0aeSVladimir Sementsov-Ogievskiy *nperm = BLK_PERM_WRITE; 318d003e0aeSVladimir Sementsov-Ogievskiy } else { 319d003e0aeSVladimir Sementsov-Ogievskiy /* Source child */ 320d003e0aeSVladimir Sementsov-Ogievskiy bdrv_default_perms(bs, c, role, reopen_queue, 321d003e0aeSVladimir Sementsov-Ogievskiy perm, shared, nperm, nshared); 322d003e0aeSVladimir Sementsov-Ogievskiy 3233860c020SVladimir Sementsov-Ogievskiy if (!QLIST_EMPTY(&bs->parents)) { 324d003e0aeSVladimir Sementsov-Ogievskiy if (perm & BLK_PERM_WRITE) { 325d003e0aeSVladimir Sementsov-Ogievskiy *nperm = *nperm | BLK_PERM_CONSISTENT_READ; 326d003e0aeSVladimir Sementsov-Ogievskiy } 327d003e0aeSVladimir Sementsov-Ogievskiy *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); 328d003e0aeSVladimir Sementsov-Ogievskiy } 329d003e0aeSVladimir Sementsov-Ogievskiy } 3303860c020SVladimir Sementsov-Ogievskiy } 331d003e0aeSVladimir Sementsov-Ogievskiy 332*79ef0cebSVladimir Sementsov-Ogievskiy static BlockdevOptions *cbw_parse_options(QDict *options, Error **errp) 3335f3a3cd7SVladimir Sementsov-Ogievskiy { 334*79ef0cebSVladimir Sementsov-Ogievskiy BlockdevOptions *opts = NULL; 3355f3a3cd7SVladimir Sementsov-Ogievskiy Visitor *v = NULL; 3365f3a3cd7SVladimir Sementsov-Ogievskiy 337*79ef0cebSVladimir Sementsov-Ogievskiy qdict_put_str(options, "driver", "copy-before-write"); 3385f3a3cd7SVladimir Sementsov-Ogievskiy 339*79ef0cebSVladimir Sementsov-Ogievskiy v = qobject_input_visitor_new_flat_confused(options, errp); 3405f3a3cd7SVladimir Sementsov-Ogievskiy if (!v) { 3415f3a3cd7SVladimir Sementsov-Ogievskiy goto out; 3425f3a3cd7SVladimir Sementsov-Ogievskiy } 3435f3a3cd7SVladimir Sementsov-Ogievskiy 344*79ef0cebSVladimir Sementsov-Ogievskiy visit_type_BlockdevOptions(v, NULL, &opts, errp); 345*79ef0cebSVladimir Sementsov-Ogievskiy if (!opts) { 3465f3a3cd7SVladimir Sementsov-Ogievskiy goto out; 3475f3a3cd7SVladimir Sementsov-Ogievskiy } 3485f3a3cd7SVladimir Sementsov-Ogievskiy 349*79ef0cebSVladimir Sementsov-Ogievskiy /* 350*79ef0cebSVladimir Sementsov-Ogievskiy * Delete options which we are going to parse through BlockdevOptions 351*79ef0cebSVladimir Sementsov-Ogievskiy * object for original options. 352*79ef0cebSVladimir Sementsov-Ogievskiy */ 353*79ef0cebSVladimir Sementsov-Ogievskiy qdict_extract_subqdict(options, NULL, "bitmap"); 3545f3a3cd7SVladimir Sementsov-Ogievskiy 3555f3a3cd7SVladimir Sementsov-Ogievskiy out: 3565f3a3cd7SVladimir Sementsov-Ogievskiy visit_free(v); 357*79ef0cebSVladimir Sementsov-Ogievskiy qdict_del(options, "driver"); 3585f3a3cd7SVladimir Sementsov-Ogievskiy 359*79ef0cebSVladimir Sementsov-Ogievskiy return opts; 3605f3a3cd7SVladimir Sementsov-Ogievskiy } 3615f3a3cd7SVladimir Sementsov-Ogievskiy 362751cec7aSVladimir Sementsov-Ogievskiy static int cbw_open(BlockDriverState *bs, QDict *options, int flags, 363751cec7aSVladimir Sementsov-Ogievskiy Error **errp) 3641f0cacb9SVladimir Sementsov-Ogievskiy { 365fe7ea40cSVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 3665f3a3cd7SVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *bitmap = NULL; 367af5bcd77SVladimir Sementsov-Ogievskiy int64_t cluster_size; 368*79ef0cebSVladimir Sementsov-Ogievskiy g_autoptr(BlockdevOptions) full_opts = NULL; 369*79ef0cebSVladimir Sementsov-Ogievskiy BlockdevOptionsCbw *opts; 370*79ef0cebSVladimir Sementsov-Ogievskiy 371*79ef0cebSVladimir Sementsov-Ogievskiy full_opts = cbw_parse_options(options, errp); 372*79ef0cebSVladimir Sementsov-Ogievskiy if (!full_opts) { 373*79ef0cebSVladimir Sementsov-Ogievskiy return -EINVAL; 374*79ef0cebSVladimir Sementsov-Ogievskiy } 375*79ef0cebSVladimir Sementsov-Ogievskiy assert(full_opts->driver == BLOCKDEV_DRIVER_COPY_BEFORE_WRITE); 376*79ef0cebSVladimir Sementsov-Ogievskiy opts = &full_opts->u.copy_before_write; 3771f0cacb9SVladimir Sementsov-Ogievskiy 378f44fd739SVladimir Sementsov-Ogievskiy bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, 379f44fd739SVladimir Sementsov-Ogievskiy BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, 380f44fd739SVladimir Sementsov-Ogievskiy false, errp); 381f44fd739SVladimir Sementsov-Ogievskiy if (!bs->file) { 3821f0cacb9SVladimir Sementsov-Ogievskiy return -EINVAL; 3831f0cacb9SVladimir Sementsov-Ogievskiy } 3841f0cacb9SVladimir Sementsov-Ogievskiy 385f44fd739SVladimir Sementsov-Ogievskiy s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds, 386f44fd739SVladimir Sementsov-Ogievskiy BDRV_CHILD_DATA, false, errp); 387f44fd739SVladimir Sementsov-Ogievskiy if (!s->target) { 3881f0cacb9SVladimir Sementsov-Ogievskiy return -EINVAL; 3891f0cacb9SVladimir Sementsov-Ogievskiy } 3901f0cacb9SVladimir Sementsov-Ogievskiy 391*79ef0cebSVladimir Sementsov-Ogievskiy if (opts->has_bitmap) { 392*79ef0cebSVladimir Sementsov-Ogievskiy bitmap = block_dirty_bitmap_lookup(opts->bitmap->node, 393*79ef0cebSVladimir Sementsov-Ogievskiy opts->bitmap->name, NULL, errp); 394*79ef0cebSVladimir Sementsov-Ogievskiy if (!bitmap) { 3955f3a3cd7SVladimir Sementsov-Ogievskiy return -EINVAL; 3965f3a3cd7SVladimir Sementsov-Ogievskiy } 397*79ef0cebSVladimir Sementsov-Ogievskiy } 3985f3a3cd7SVladimir Sementsov-Ogievskiy 3995a507426SVladimir Sementsov-Ogievskiy bs->total_sectors = bs->file->bs->total_sectors; 4005a507426SVladimir Sementsov-Ogievskiy bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | 4015a507426SVladimir Sementsov-Ogievskiy (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); 4025a507426SVladimir Sementsov-Ogievskiy bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | 4035a507426SVladimir Sementsov-Ogievskiy ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & 4045a507426SVladimir Sementsov-Ogievskiy bs->file->bs->supported_zero_flags); 4055a507426SVladimir Sementsov-Ogievskiy 4065f3a3cd7SVladimir Sementsov-Ogievskiy s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp); 407fe7ea40cSVladimir Sementsov-Ogievskiy if (!s->bcs) { 4081f0cacb9SVladimir Sementsov-Ogievskiy error_prepend(errp, "Cannot create block-copy-state: "); 4091f0cacb9SVladimir Sementsov-Ogievskiy return -EINVAL; 4101f0cacb9SVladimir Sementsov-Ogievskiy } 4111f0cacb9SVladimir Sementsov-Ogievskiy 412af5bcd77SVladimir Sementsov-Ogievskiy cluster_size = block_copy_cluster_size(s->bcs); 413af5bcd77SVladimir Sementsov-Ogievskiy 414af5bcd77SVladimir Sementsov-Ogievskiy s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); 415af5bcd77SVladimir Sementsov-Ogievskiy if (!s->done_bitmap) { 416af5bcd77SVladimir Sementsov-Ogievskiy return -EINVAL; 417af5bcd77SVladimir Sementsov-Ogievskiy } 418af5bcd77SVladimir Sementsov-Ogievskiy bdrv_disable_dirty_bitmap(s->done_bitmap); 419af5bcd77SVladimir Sementsov-Ogievskiy 420af5bcd77SVladimir Sementsov-Ogievskiy /* s->access_bitmap starts equal to bcs bitmap */ 421af5bcd77SVladimir Sementsov-Ogievskiy s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); 422af5bcd77SVladimir Sementsov-Ogievskiy if (!s->access_bitmap) { 423af5bcd77SVladimir Sementsov-Ogievskiy return -EINVAL; 424af5bcd77SVladimir Sementsov-Ogievskiy } 425af5bcd77SVladimir Sementsov-Ogievskiy bdrv_disable_dirty_bitmap(s->access_bitmap); 426af5bcd77SVladimir Sementsov-Ogievskiy bdrv_dirty_bitmap_merge_internal(s->access_bitmap, 427af5bcd77SVladimir Sementsov-Ogievskiy block_copy_dirty_bitmap(s->bcs), NULL, 428af5bcd77SVladimir Sementsov-Ogievskiy true); 429af5bcd77SVladimir Sementsov-Ogievskiy 430af5bcd77SVladimir Sementsov-Ogievskiy qemu_co_mutex_init(&s->lock); 431af5bcd77SVladimir Sementsov-Ogievskiy QLIST_INIT(&s->frozen_read_reqs); 432af5bcd77SVladimir Sementsov-Ogievskiy 4331f0cacb9SVladimir Sementsov-Ogievskiy return 0; 4341f0cacb9SVladimir Sementsov-Ogievskiy } 4351f0cacb9SVladimir Sementsov-Ogievskiy 436751cec7aSVladimir Sementsov-Ogievskiy static void cbw_close(BlockDriverState *bs) 437751cec7aSVladimir Sementsov-Ogievskiy { 438751cec7aSVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 439751cec7aSVladimir Sementsov-Ogievskiy 440af5bcd77SVladimir Sementsov-Ogievskiy bdrv_release_dirty_bitmap(s->access_bitmap); 441af5bcd77SVladimir Sementsov-Ogievskiy bdrv_release_dirty_bitmap(s->done_bitmap); 442af5bcd77SVladimir Sementsov-Ogievskiy 443751cec7aSVladimir Sementsov-Ogievskiy block_copy_state_free(s->bcs); 444751cec7aSVladimir Sementsov-Ogievskiy s->bcs = NULL; 445751cec7aSVladimir Sementsov-Ogievskiy } 446751cec7aSVladimir Sementsov-Ogievskiy 447d003e0aeSVladimir Sementsov-Ogievskiy BlockDriver bdrv_cbw_filter = { 448d003e0aeSVladimir Sementsov-Ogievskiy .format_name = "copy-before-write", 449d003e0aeSVladimir Sementsov-Ogievskiy .instance_size = sizeof(BDRVCopyBeforeWriteState), 450d003e0aeSVladimir Sementsov-Ogievskiy 451751cec7aSVladimir Sementsov-Ogievskiy .bdrv_open = cbw_open, 452751cec7aSVladimir Sementsov-Ogievskiy .bdrv_close = cbw_close, 453751cec7aSVladimir Sementsov-Ogievskiy 454d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_co_preadv = cbw_co_preadv, 455d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_co_pwritev = cbw_co_pwritev, 456d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_co_pwrite_zeroes = cbw_co_pwrite_zeroes, 457d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_co_pdiscard = cbw_co_pdiscard, 458d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_co_flush = cbw_co_flush, 459d003e0aeSVladimir Sementsov-Ogievskiy 460af5bcd77SVladimir Sementsov-Ogievskiy .bdrv_co_preadv_snapshot = cbw_co_preadv_snapshot, 461af5bcd77SVladimir Sementsov-Ogievskiy .bdrv_co_pdiscard_snapshot = cbw_co_pdiscard_snapshot, 462af5bcd77SVladimir Sementsov-Ogievskiy .bdrv_co_snapshot_block_status = cbw_co_snapshot_block_status, 463af5bcd77SVladimir Sementsov-Ogievskiy 464d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_refresh_filename = cbw_refresh_filename, 465d003e0aeSVladimir Sementsov-Ogievskiy 466d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_child_perm = cbw_child_perm, 467d003e0aeSVladimir Sementsov-Ogievskiy 468d003e0aeSVladimir Sementsov-Ogievskiy .is_filter = true, 469d003e0aeSVladimir Sementsov-Ogievskiy }; 470d003e0aeSVladimir Sementsov-Ogievskiy 471d003e0aeSVladimir Sementsov-Ogievskiy BlockDriverState *bdrv_cbw_append(BlockDriverState *source, 472d003e0aeSVladimir Sementsov-Ogievskiy BlockDriverState *target, 473d003e0aeSVladimir Sementsov-Ogievskiy const char *filter_node_name, 474d003e0aeSVladimir Sementsov-Ogievskiy BlockCopyState **bcs, 475d003e0aeSVladimir Sementsov-Ogievskiy Error **errp) 476d003e0aeSVladimir Sementsov-Ogievskiy { 477d003e0aeSVladimir Sementsov-Ogievskiy ERRP_GUARD(); 478d003e0aeSVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *state; 479d003e0aeSVladimir Sementsov-Ogievskiy BlockDriverState *top; 480f44fd739SVladimir Sementsov-Ogievskiy QDict *opts; 481d003e0aeSVladimir Sementsov-Ogievskiy 482d003e0aeSVladimir Sementsov-Ogievskiy assert(source->total_sectors == target->total_sectors); 483377cc15bSEmanuele Giuseppe Esposito GLOBAL_STATE_CODE(); 484d003e0aeSVladimir Sementsov-Ogievskiy 485f44fd739SVladimir Sementsov-Ogievskiy opts = qdict_new(); 486751cec7aSVladimir Sementsov-Ogievskiy qdict_put_str(opts, "driver", "copy-before-write"); 487751cec7aSVladimir Sementsov-Ogievskiy if (filter_node_name) { 488751cec7aSVladimir Sementsov-Ogievskiy qdict_put_str(opts, "node-name", filter_node_name); 489751cec7aSVladimir Sementsov-Ogievskiy } 490f44fd739SVladimir Sementsov-Ogievskiy qdict_put_str(opts, "file", bdrv_get_node_name(source)); 491f44fd739SVladimir Sementsov-Ogievskiy qdict_put_str(opts, "target", bdrv_get_node_name(target)); 492f44fd739SVladimir Sementsov-Ogievskiy 493751cec7aSVladimir Sementsov-Ogievskiy top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp); 494751cec7aSVladimir Sementsov-Ogievskiy if (!top) { 495751cec7aSVladimir Sementsov-Ogievskiy return NULL; 496d003e0aeSVladimir Sementsov-Ogievskiy } 497d003e0aeSVladimir Sementsov-Ogievskiy 498751cec7aSVladimir Sementsov-Ogievskiy state = top->opaque; 4997ddbce2dSVladimir Sementsov-Ogievskiy *bcs = state->bcs; 500d003e0aeSVladimir Sementsov-Ogievskiy 501d003e0aeSVladimir Sementsov-Ogievskiy return top; 502d003e0aeSVladimir Sementsov-Ogievskiy } 503d003e0aeSVladimir Sementsov-Ogievskiy 504d003e0aeSVladimir Sementsov-Ogievskiy void bdrv_cbw_drop(BlockDriverState *bs) 505d003e0aeSVladimir Sementsov-Ogievskiy { 506377cc15bSEmanuele Giuseppe Esposito GLOBAL_STATE_CODE(); 507d003e0aeSVladimir Sementsov-Ogievskiy bdrv_drop_filter(bs, &error_abort); 508d003e0aeSVladimir Sementsov-Ogievskiy bdrv_unref(bs); 509d003e0aeSVladimir Sementsov-Ogievskiy } 510751cec7aSVladimir Sementsov-Ogievskiy 511751cec7aSVladimir Sementsov-Ogievskiy static void cbw_init(void) 512751cec7aSVladimir Sementsov-Ogievskiy { 513751cec7aSVladimir Sementsov-Ogievskiy bdrv_register(&bdrv_cbw_filter); 514751cec7aSVladimir Sementsov-Ogievskiy } 515751cec7aSVladimir Sementsov-Ogievskiy 516751cec7aSVladimir Sementsov-Ogievskiy block_init(cbw_init); 517