1d003e0aeSVladimir Sementsov-Ogievskiy /* 2d003e0aeSVladimir Sementsov-Ogievskiy * copy-before-write filter driver 3d003e0aeSVladimir Sementsov-Ogievskiy * 4d003e0aeSVladimir Sementsov-Ogievskiy * The driver performs Copy-Before-Write (CBW) operation: it is injected above 5d003e0aeSVladimir Sementsov-Ogievskiy * some node, and before each write it copies _old_ data to the target node. 6d003e0aeSVladimir Sementsov-Ogievskiy * 7d003e0aeSVladimir Sementsov-Ogievskiy * Copyright (c) 2018-2021 Virtuozzo International GmbH. 8d003e0aeSVladimir Sementsov-Ogievskiy * 9d003e0aeSVladimir Sementsov-Ogievskiy * Author: 10d003e0aeSVladimir Sementsov-Ogievskiy * Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com> 11d003e0aeSVladimir Sementsov-Ogievskiy * 12d003e0aeSVladimir Sementsov-Ogievskiy * This program is free software; you can redistribute it and/or modify 13d003e0aeSVladimir Sementsov-Ogievskiy * it under the terms of the GNU General Public License as published by 14d003e0aeSVladimir Sementsov-Ogievskiy * the Free Software Foundation; either version 2 of the License, or 15d003e0aeSVladimir Sementsov-Ogievskiy * (at your option) any later version. 16d003e0aeSVladimir Sementsov-Ogievskiy * 17d003e0aeSVladimir Sementsov-Ogievskiy * This program is distributed in the hope that it will be useful, 18d003e0aeSVladimir Sementsov-Ogievskiy * but WITHOUT ANY WARRANTY; without even the implied warranty of 19d003e0aeSVladimir Sementsov-Ogievskiy * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20d003e0aeSVladimir Sementsov-Ogievskiy * GNU General Public License for more details. 21d003e0aeSVladimir Sementsov-Ogievskiy * 22d003e0aeSVladimir Sementsov-Ogievskiy * You should have received a copy of the GNU General Public License 23d003e0aeSVladimir Sementsov-Ogievskiy * along with this program. If not, see <http://www.gnu.org/licenses/>. 24d003e0aeSVladimir Sementsov-Ogievskiy */ 25d003e0aeSVladimir Sementsov-Ogievskiy 26d003e0aeSVladimir Sementsov-Ogievskiy #include "qemu/osdep.h" 27d003e0aeSVladimir Sementsov-Ogievskiy 28d003e0aeSVladimir Sementsov-Ogievskiy #include "sysemu/block-backend.h" 29d003e0aeSVladimir Sementsov-Ogievskiy #include "qemu/cutils.h" 30d003e0aeSVladimir Sementsov-Ogievskiy #include "qapi/error.h" 31d003e0aeSVladimir Sementsov-Ogievskiy #include "block/block_int.h" 32d003e0aeSVladimir Sementsov-Ogievskiy #include "block/qdict.h" 33d003e0aeSVladimir Sementsov-Ogievskiy #include "block/block-copy.h" 34d003e0aeSVladimir Sementsov-Ogievskiy 35d003e0aeSVladimir Sementsov-Ogievskiy #include "block/copy-before-write.h" 36*af5bcd77SVladimir Sementsov-Ogievskiy #include "block/reqlist.h" 37d003e0aeSVladimir Sementsov-Ogievskiy 385f3a3cd7SVladimir Sementsov-Ogievskiy #include "qapi/qapi-visit-block-core.h" 395f3a3cd7SVladimir Sementsov-Ogievskiy 40d003e0aeSVladimir Sementsov-Ogievskiy typedef struct BDRVCopyBeforeWriteState { 41d003e0aeSVladimir Sementsov-Ogievskiy BlockCopyState *bcs; 42d003e0aeSVladimir Sementsov-Ogievskiy BdrvChild *target; 43*af5bcd77SVladimir Sementsov-Ogievskiy 44*af5bcd77SVladimir Sementsov-Ogievskiy /* 45*af5bcd77SVladimir Sementsov-Ogievskiy * @lock: protects access to @access_bitmap, @done_bitmap and 46*af5bcd77SVladimir Sementsov-Ogievskiy * @frozen_read_reqs 47*af5bcd77SVladimir Sementsov-Ogievskiy */ 48*af5bcd77SVladimir Sementsov-Ogievskiy CoMutex lock; 49*af5bcd77SVladimir Sementsov-Ogievskiy 50*af5bcd77SVladimir Sementsov-Ogievskiy /* 51*af5bcd77SVladimir Sementsov-Ogievskiy * @access_bitmap: represents areas allowed for reading by fleecing user. 52*af5bcd77SVladimir Sementsov-Ogievskiy * Reading from non-dirty areas leads to -EACCES. 53*af5bcd77SVladimir Sementsov-Ogievskiy */ 54*af5bcd77SVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *access_bitmap; 55*af5bcd77SVladimir Sementsov-Ogievskiy 56*af5bcd77SVladimir Sementsov-Ogievskiy /* 57*af5bcd77SVladimir Sementsov-Ogievskiy * @done_bitmap: represents areas that was successfully copied to @target by 58*af5bcd77SVladimir Sementsov-Ogievskiy * copy-before-write operations. 59*af5bcd77SVladimir Sementsov-Ogievskiy */ 60*af5bcd77SVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *done_bitmap; 61*af5bcd77SVladimir Sementsov-Ogievskiy 62*af5bcd77SVladimir Sementsov-Ogievskiy /* 63*af5bcd77SVladimir Sementsov-Ogievskiy * @frozen_read_reqs: current read requests for fleecing user in bs->file 64*af5bcd77SVladimir Sementsov-Ogievskiy * node. These areas must not be rewritten by guest. 65*af5bcd77SVladimir Sementsov-Ogievskiy */ 66*af5bcd77SVladimir Sementsov-Ogievskiy BlockReqList frozen_read_reqs; 67d003e0aeSVladimir Sementsov-Ogievskiy } BDRVCopyBeforeWriteState; 68d003e0aeSVladimir Sementsov-Ogievskiy 69d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_co_preadv( 70f7ef38ddSVladimir Sementsov-Ogievskiy BlockDriverState *bs, int64_t offset, int64_t bytes, 71f7ef38ddSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, BdrvRequestFlags flags) 72d003e0aeSVladimir Sementsov-Ogievskiy { 733c1e6327SVladimir Sementsov-Ogievskiy return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); 74d003e0aeSVladimir Sementsov-Ogievskiy } 75d003e0aeSVladimir Sementsov-Ogievskiy 76*af5bcd77SVladimir Sementsov-Ogievskiy /* 77*af5bcd77SVladimir Sementsov-Ogievskiy * Do copy-before-write operation. 78*af5bcd77SVladimir Sementsov-Ogievskiy * 79*af5bcd77SVladimir Sementsov-Ogievskiy * On failure guest request must be failed too. 80*af5bcd77SVladimir Sementsov-Ogievskiy * 81*af5bcd77SVladimir Sementsov-Ogievskiy * On success, we also wait for all in-flight fleecing read requests in source 82*af5bcd77SVladimir Sementsov-Ogievskiy * node, and it's guaranteed that after cbw_do_copy_before_write() successful 83*af5bcd77SVladimir Sementsov-Ogievskiy * return there are no such requests and they will never appear. 84*af5bcd77SVladimir Sementsov-Ogievskiy */ 85d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs, 86d003e0aeSVladimir Sementsov-Ogievskiy uint64_t offset, uint64_t bytes, BdrvRequestFlags flags) 87d003e0aeSVladimir Sementsov-Ogievskiy { 88d003e0aeSVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 89*af5bcd77SVladimir Sementsov-Ogievskiy int ret; 90d003e0aeSVladimir Sementsov-Ogievskiy uint64_t off, end; 91b518e9e9SVladimir Sementsov-Ogievskiy int64_t cluster_size = block_copy_cluster_size(s->bcs); 92d003e0aeSVladimir Sementsov-Ogievskiy 93d003e0aeSVladimir Sementsov-Ogievskiy if (flags & BDRV_REQ_WRITE_UNCHANGED) { 94d003e0aeSVladimir Sementsov-Ogievskiy return 0; 95d003e0aeSVladimir Sementsov-Ogievskiy } 96d003e0aeSVladimir Sementsov-Ogievskiy 97b518e9e9SVladimir Sementsov-Ogievskiy off = QEMU_ALIGN_DOWN(offset, cluster_size); 98b518e9e9SVladimir Sementsov-Ogievskiy end = QEMU_ALIGN_UP(offset + bytes, cluster_size); 99d003e0aeSVladimir Sementsov-Ogievskiy 100*af5bcd77SVladimir Sementsov-Ogievskiy ret = block_copy(s->bcs, off, end - off, true); 101*af5bcd77SVladimir Sementsov-Ogievskiy if (ret < 0) { 102*af5bcd77SVladimir Sementsov-Ogievskiy return ret; 103*af5bcd77SVladimir Sementsov-Ogievskiy } 104*af5bcd77SVladimir Sementsov-Ogievskiy 105*af5bcd77SVladimir Sementsov-Ogievskiy WITH_QEMU_LOCK_GUARD(&s->lock) { 106*af5bcd77SVladimir Sementsov-Ogievskiy bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off); 107*af5bcd77SVladimir Sementsov-Ogievskiy reqlist_wait_all(&s->frozen_read_reqs, off, end - off, &s->lock); 108*af5bcd77SVladimir Sementsov-Ogievskiy } 109*af5bcd77SVladimir Sementsov-Ogievskiy 110*af5bcd77SVladimir Sementsov-Ogievskiy return 0; 111d003e0aeSVladimir Sementsov-Ogievskiy } 112d003e0aeSVladimir Sementsov-Ogievskiy 113d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs, 1140c802287SVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes) 115d003e0aeSVladimir Sementsov-Ogievskiy { 116d003e0aeSVladimir Sementsov-Ogievskiy int ret = cbw_do_copy_before_write(bs, offset, bytes, 0); 117d003e0aeSVladimir Sementsov-Ogievskiy if (ret < 0) { 118d003e0aeSVladimir Sementsov-Ogievskiy return ret; 119d003e0aeSVladimir Sementsov-Ogievskiy } 120d003e0aeSVladimir Sementsov-Ogievskiy 1213c1e6327SVladimir Sementsov-Ogievskiy return bdrv_co_pdiscard(bs->file, offset, bytes); 122d003e0aeSVladimir Sementsov-Ogievskiy } 123d003e0aeSVladimir Sementsov-Ogievskiy 124d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs, 125f34b2bcfSVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes, BdrvRequestFlags flags) 126d003e0aeSVladimir Sementsov-Ogievskiy { 127d003e0aeSVladimir Sementsov-Ogievskiy int ret = cbw_do_copy_before_write(bs, offset, bytes, flags); 128d003e0aeSVladimir Sementsov-Ogievskiy if (ret < 0) { 129d003e0aeSVladimir Sementsov-Ogievskiy return ret; 130d003e0aeSVladimir Sementsov-Ogievskiy } 131d003e0aeSVladimir Sementsov-Ogievskiy 1323c1e6327SVladimir Sementsov-Ogievskiy return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); 133d003e0aeSVladimir Sementsov-Ogievskiy } 134d003e0aeSVladimir Sementsov-Ogievskiy 135d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs, 136e75abedaSVladimir Sementsov-Ogievskiy int64_t offset, 137e75abedaSVladimir Sementsov-Ogievskiy int64_t bytes, 138e75abedaSVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, 139e75abedaSVladimir Sementsov-Ogievskiy BdrvRequestFlags flags) 140d003e0aeSVladimir Sementsov-Ogievskiy { 141d003e0aeSVladimir Sementsov-Ogievskiy int ret = cbw_do_copy_before_write(bs, offset, bytes, flags); 142d003e0aeSVladimir Sementsov-Ogievskiy if (ret < 0) { 143d003e0aeSVladimir Sementsov-Ogievskiy return ret; 144d003e0aeSVladimir Sementsov-Ogievskiy } 145d003e0aeSVladimir Sementsov-Ogievskiy 1463c1e6327SVladimir Sementsov-Ogievskiy return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); 147d003e0aeSVladimir Sementsov-Ogievskiy } 148d003e0aeSVladimir Sementsov-Ogievskiy 149d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_flush(BlockDriverState *bs) 150d003e0aeSVladimir Sementsov-Ogievskiy { 1513c1e6327SVladimir Sementsov-Ogievskiy if (!bs->file) { 152d003e0aeSVladimir Sementsov-Ogievskiy return 0; 153d003e0aeSVladimir Sementsov-Ogievskiy } 154d003e0aeSVladimir Sementsov-Ogievskiy 1553c1e6327SVladimir Sementsov-Ogievskiy return bdrv_co_flush(bs->file->bs); 156d003e0aeSVladimir Sementsov-Ogievskiy } 157d003e0aeSVladimir Sementsov-Ogievskiy 158*af5bcd77SVladimir Sementsov-Ogievskiy /* 159*af5bcd77SVladimir Sementsov-Ogievskiy * If @offset not accessible - return NULL. 160*af5bcd77SVladimir Sementsov-Ogievskiy * 161*af5bcd77SVladimir Sementsov-Ogievskiy * Otherwise, set @pnum to some bytes that accessible from @file (@file is set 162*af5bcd77SVladimir Sementsov-Ogievskiy * to bs->file or to s->target). Return newly allocated BlockReq object that 163*af5bcd77SVladimir Sementsov-Ogievskiy * should be than passed to cbw_snapshot_read_unlock(). 164*af5bcd77SVladimir Sementsov-Ogievskiy * 165*af5bcd77SVladimir Sementsov-Ogievskiy * It's guaranteed that guest writes will not interact in the region until 166*af5bcd77SVladimir Sementsov-Ogievskiy * cbw_snapshot_read_unlock() called. 167*af5bcd77SVladimir Sementsov-Ogievskiy */ 168*af5bcd77SVladimir Sementsov-Ogievskiy static BlockReq *cbw_snapshot_read_lock(BlockDriverState *bs, 169*af5bcd77SVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes, 170*af5bcd77SVladimir Sementsov-Ogievskiy int64_t *pnum, BdrvChild **file) 171*af5bcd77SVladimir Sementsov-Ogievskiy { 172*af5bcd77SVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 173*af5bcd77SVladimir Sementsov-Ogievskiy BlockReq *req = g_new(BlockReq, 1); 174*af5bcd77SVladimir Sementsov-Ogievskiy bool done; 175*af5bcd77SVladimir Sementsov-Ogievskiy 176*af5bcd77SVladimir Sementsov-Ogievskiy QEMU_LOCK_GUARD(&s->lock); 177*af5bcd77SVladimir Sementsov-Ogievskiy 178*af5bcd77SVladimir Sementsov-Ogievskiy if (bdrv_dirty_bitmap_next_zero(s->access_bitmap, offset, bytes) != -1) { 179*af5bcd77SVladimir Sementsov-Ogievskiy g_free(req); 180*af5bcd77SVladimir Sementsov-Ogievskiy return NULL; 181*af5bcd77SVladimir Sementsov-Ogievskiy } 182*af5bcd77SVladimir Sementsov-Ogievskiy 183*af5bcd77SVladimir Sementsov-Ogievskiy done = bdrv_dirty_bitmap_status(s->done_bitmap, offset, bytes, pnum); 184*af5bcd77SVladimir Sementsov-Ogievskiy if (done) { 185*af5bcd77SVladimir Sementsov-Ogievskiy /* 186*af5bcd77SVladimir Sementsov-Ogievskiy * Special invalid BlockReq, that is handled in 187*af5bcd77SVladimir Sementsov-Ogievskiy * cbw_snapshot_read_unlock(). We don't need to lock something to read 188*af5bcd77SVladimir Sementsov-Ogievskiy * from s->target. 189*af5bcd77SVladimir Sementsov-Ogievskiy */ 190*af5bcd77SVladimir Sementsov-Ogievskiy *req = (BlockReq) {.offset = -1, .bytes = -1}; 191*af5bcd77SVladimir Sementsov-Ogievskiy *file = s->target; 192*af5bcd77SVladimir Sementsov-Ogievskiy } else { 193*af5bcd77SVladimir Sementsov-Ogievskiy reqlist_init_req(&s->frozen_read_reqs, req, offset, bytes); 194*af5bcd77SVladimir Sementsov-Ogievskiy *file = bs->file; 195*af5bcd77SVladimir Sementsov-Ogievskiy } 196*af5bcd77SVladimir Sementsov-Ogievskiy 197*af5bcd77SVladimir Sementsov-Ogievskiy return req; 198*af5bcd77SVladimir Sementsov-Ogievskiy } 199*af5bcd77SVladimir Sementsov-Ogievskiy 200*af5bcd77SVladimir Sementsov-Ogievskiy static void cbw_snapshot_read_unlock(BlockDriverState *bs, BlockReq *req) 201*af5bcd77SVladimir Sementsov-Ogievskiy { 202*af5bcd77SVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 203*af5bcd77SVladimir Sementsov-Ogievskiy 204*af5bcd77SVladimir Sementsov-Ogievskiy if (req->offset == -1 && req->bytes == -1) { 205*af5bcd77SVladimir Sementsov-Ogievskiy g_free(req); 206*af5bcd77SVladimir Sementsov-Ogievskiy return; 207*af5bcd77SVladimir Sementsov-Ogievskiy } 208*af5bcd77SVladimir Sementsov-Ogievskiy 209*af5bcd77SVladimir Sementsov-Ogievskiy QEMU_LOCK_GUARD(&s->lock); 210*af5bcd77SVladimir Sementsov-Ogievskiy 211*af5bcd77SVladimir Sementsov-Ogievskiy reqlist_remove_req(req); 212*af5bcd77SVladimir Sementsov-Ogievskiy g_free(req); 213*af5bcd77SVladimir Sementsov-Ogievskiy } 214*af5bcd77SVladimir Sementsov-Ogievskiy 215*af5bcd77SVladimir Sementsov-Ogievskiy static coroutine_fn int 216*af5bcd77SVladimir Sementsov-Ogievskiy cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes, 217*af5bcd77SVladimir Sementsov-Ogievskiy QEMUIOVector *qiov, size_t qiov_offset) 218*af5bcd77SVladimir Sementsov-Ogievskiy { 219*af5bcd77SVladimir Sementsov-Ogievskiy BlockReq *req; 220*af5bcd77SVladimir Sementsov-Ogievskiy BdrvChild *file; 221*af5bcd77SVladimir Sementsov-Ogievskiy int ret; 222*af5bcd77SVladimir Sementsov-Ogievskiy 223*af5bcd77SVladimir Sementsov-Ogievskiy /* TODO: upgrade to async loop using AioTask */ 224*af5bcd77SVladimir Sementsov-Ogievskiy while (bytes) { 225*af5bcd77SVladimir Sementsov-Ogievskiy int64_t cur_bytes; 226*af5bcd77SVladimir Sementsov-Ogievskiy 227*af5bcd77SVladimir Sementsov-Ogievskiy req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &file); 228*af5bcd77SVladimir Sementsov-Ogievskiy if (!req) { 229*af5bcd77SVladimir Sementsov-Ogievskiy return -EACCES; 230*af5bcd77SVladimir Sementsov-Ogievskiy } 231*af5bcd77SVladimir Sementsov-Ogievskiy 232*af5bcd77SVladimir Sementsov-Ogievskiy ret = bdrv_co_preadv_part(file, offset, cur_bytes, 233*af5bcd77SVladimir Sementsov-Ogievskiy qiov, qiov_offset, 0); 234*af5bcd77SVladimir Sementsov-Ogievskiy cbw_snapshot_read_unlock(bs, req); 235*af5bcd77SVladimir Sementsov-Ogievskiy if (ret < 0) { 236*af5bcd77SVladimir Sementsov-Ogievskiy return ret; 237*af5bcd77SVladimir Sementsov-Ogievskiy } 238*af5bcd77SVladimir Sementsov-Ogievskiy 239*af5bcd77SVladimir Sementsov-Ogievskiy bytes -= cur_bytes; 240*af5bcd77SVladimir Sementsov-Ogievskiy offset += cur_bytes; 241*af5bcd77SVladimir Sementsov-Ogievskiy qiov_offset += cur_bytes; 242*af5bcd77SVladimir Sementsov-Ogievskiy } 243*af5bcd77SVladimir Sementsov-Ogievskiy 244*af5bcd77SVladimir Sementsov-Ogievskiy return 0; 245*af5bcd77SVladimir Sementsov-Ogievskiy } 246*af5bcd77SVladimir Sementsov-Ogievskiy 247*af5bcd77SVladimir Sementsov-Ogievskiy static int coroutine_fn 248*af5bcd77SVladimir Sementsov-Ogievskiy cbw_co_snapshot_block_status(BlockDriverState *bs, 249*af5bcd77SVladimir Sementsov-Ogievskiy bool want_zero, int64_t offset, int64_t bytes, 250*af5bcd77SVladimir Sementsov-Ogievskiy int64_t *pnum, int64_t *map, 251*af5bcd77SVladimir Sementsov-Ogievskiy BlockDriverState **file) 252*af5bcd77SVladimir Sementsov-Ogievskiy { 253*af5bcd77SVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 254*af5bcd77SVladimir Sementsov-Ogievskiy BlockReq *req; 255*af5bcd77SVladimir Sementsov-Ogievskiy int ret; 256*af5bcd77SVladimir Sementsov-Ogievskiy int64_t cur_bytes; 257*af5bcd77SVladimir Sementsov-Ogievskiy BdrvChild *child; 258*af5bcd77SVladimir Sementsov-Ogievskiy 259*af5bcd77SVladimir Sementsov-Ogievskiy req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &child); 260*af5bcd77SVladimir Sementsov-Ogievskiy if (!req) { 261*af5bcd77SVladimir Sementsov-Ogievskiy return -EACCES; 262*af5bcd77SVladimir Sementsov-Ogievskiy } 263*af5bcd77SVladimir Sementsov-Ogievskiy 264*af5bcd77SVladimir Sementsov-Ogievskiy ret = bdrv_block_status(child->bs, offset, cur_bytes, pnum, map, file); 265*af5bcd77SVladimir Sementsov-Ogievskiy if (child == s->target) { 266*af5bcd77SVladimir Sementsov-Ogievskiy /* 267*af5bcd77SVladimir Sementsov-Ogievskiy * We refer to s->target only for areas that we've written to it. 268*af5bcd77SVladimir Sementsov-Ogievskiy * And we can not report unallocated blocks in s->target: this will 269*af5bcd77SVladimir Sementsov-Ogievskiy * break generic block-status-above logic, that will go to 270*af5bcd77SVladimir Sementsov-Ogievskiy * copy-before-write filtered child in this case. 271*af5bcd77SVladimir Sementsov-Ogievskiy */ 272*af5bcd77SVladimir Sementsov-Ogievskiy assert(ret & BDRV_BLOCK_ALLOCATED); 273*af5bcd77SVladimir Sementsov-Ogievskiy } 274*af5bcd77SVladimir Sementsov-Ogievskiy 275*af5bcd77SVladimir Sementsov-Ogievskiy cbw_snapshot_read_unlock(bs, req); 276*af5bcd77SVladimir Sementsov-Ogievskiy 277*af5bcd77SVladimir Sementsov-Ogievskiy return ret; 278*af5bcd77SVladimir Sementsov-Ogievskiy } 279*af5bcd77SVladimir Sementsov-Ogievskiy 280*af5bcd77SVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pdiscard_snapshot(BlockDriverState *bs, 281*af5bcd77SVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes) 282*af5bcd77SVladimir Sementsov-Ogievskiy { 283*af5bcd77SVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 284*af5bcd77SVladimir Sementsov-Ogievskiy 285*af5bcd77SVladimir Sementsov-Ogievskiy WITH_QEMU_LOCK_GUARD(&s->lock) { 286*af5bcd77SVladimir Sementsov-Ogievskiy bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes); 287*af5bcd77SVladimir Sementsov-Ogievskiy } 288*af5bcd77SVladimir Sementsov-Ogievskiy 289*af5bcd77SVladimir Sementsov-Ogievskiy block_copy_reset(s->bcs, offset, bytes); 290*af5bcd77SVladimir Sementsov-Ogievskiy 291*af5bcd77SVladimir Sementsov-Ogievskiy return bdrv_co_pdiscard(s->target, offset, bytes); 292*af5bcd77SVladimir Sementsov-Ogievskiy } 293*af5bcd77SVladimir Sementsov-Ogievskiy 294d003e0aeSVladimir Sementsov-Ogievskiy static void cbw_refresh_filename(BlockDriverState *bs) 295d003e0aeSVladimir Sementsov-Ogievskiy { 296d003e0aeSVladimir Sementsov-Ogievskiy pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), 2973c1e6327SVladimir Sementsov-Ogievskiy bs->file->bs->filename); 298d003e0aeSVladimir Sementsov-Ogievskiy } 299d003e0aeSVladimir Sementsov-Ogievskiy 300d003e0aeSVladimir Sementsov-Ogievskiy static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c, 301d003e0aeSVladimir Sementsov-Ogievskiy BdrvChildRole role, 302d003e0aeSVladimir Sementsov-Ogievskiy BlockReopenQueue *reopen_queue, 303d003e0aeSVladimir Sementsov-Ogievskiy uint64_t perm, uint64_t shared, 304d003e0aeSVladimir Sementsov-Ogievskiy uint64_t *nperm, uint64_t *nshared) 305d003e0aeSVladimir Sementsov-Ogievskiy { 306d003e0aeSVladimir Sementsov-Ogievskiy if (!(role & BDRV_CHILD_FILTERED)) { 307d003e0aeSVladimir Sementsov-Ogievskiy /* 308d003e0aeSVladimir Sementsov-Ogievskiy * Target child 309d003e0aeSVladimir Sementsov-Ogievskiy * 310d003e0aeSVladimir Sementsov-Ogievskiy * Share write to target (child_file), to not interfere 311d003e0aeSVladimir Sementsov-Ogievskiy * with guest writes to its disk which may be in target backing chain. 312d003e0aeSVladimir Sementsov-Ogievskiy * Can't resize during a backup block job because we check the size 313d003e0aeSVladimir Sementsov-Ogievskiy * only upfront. 314d003e0aeSVladimir Sementsov-Ogievskiy */ 315d003e0aeSVladimir Sementsov-Ogievskiy *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; 316d003e0aeSVladimir Sementsov-Ogievskiy *nperm = BLK_PERM_WRITE; 317d003e0aeSVladimir Sementsov-Ogievskiy } else { 318d003e0aeSVladimir Sementsov-Ogievskiy /* Source child */ 319d003e0aeSVladimir Sementsov-Ogievskiy bdrv_default_perms(bs, c, role, reopen_queue, 320d003e0aeSVladimir Sementsov-Ogievskiy perm, shared, nperm, nshared); 321d003e0aeSVladimir Sementsov-Ogievskiy 3223860c020SVladimir Sementsov-Ogievskiy if (!QLIST_EMPTY(&bs->parents)) { 323d003e0aeSVladimir Sementsov-Ogievskiy if (perm & BLK_PERM_WRITE) { 324d003e0aeSVladimir Sementsov-Ogievskiy *nperm = *nperm | BLK_PERM_CONSISTENT_READ; 325d003e0aeSVladimir Sementsov-Ogievskiy } 326d003e0aeSVladimir Sementsov-Ogievskiy *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); 327d003e0aeSVladimir Sementsov-Ogievskiy } 328d003e0aeSVladimir Sementsov-Ogievskiy } 3293860c020SVladimir Sementsov-Ogievskiy } 330d003e0aeSVladimir Sementsov-Ogievskiy 3315f3a3cd7SVladimir Sementsov-Ogievskiy static bool cbw_parse_bitmap_option(QDict *options, BdrvDirtyBitmap **bitmap, 3325f3a3cd7SVladimir Sementsov-Ogievskiy Error **errp) 3335f3a3cd7SVladimir Sementsov-Ogievskiy { 3345f3a3cd7SVladimir Sementsov-Ogievskiy QDict *bitmap_qdict = NULL; 3355f3a3cd7SVladimir Sementsov-Ogievskiy BlockDirtyBitmap *bmp_param = NULL; 3365f3a3cd7SVladimir Sementsov-Ogievskiy Visitor *v = NULL; 3375f3a3cd7SVladimir Sementsov-Ogievskiy bool ret = false; 3385f3a3cd7SVladimir Sementsov-Ogievskiy 3395f3a3cd7SVladimir Sementsov-Ogievskiy *bitmap = NULL; 3405f3a3cd7SVladimir Sementsov-Ogievskiy 3415f3a3cd7SVladimir Sementsov-Ogievskiy qdict_extract_subqdict(options, &bitmap_qdict, "bitmap."); 3425f3a3cd7SVladimir Sementsov-Ogievskiy if (!qdict_size(bitmap_qdict)) { 3435f3a3cd7SVladimir Sementsov-Ogievskiy ret = true; 3445f3a3cd7SVladimir Sementsov-Ogievskiy goto out; 3455f3a3cd7SVladimir Sementsov-Ogievskiy } 3465f3a3cd7SVladimir Sementsov-Ogievskiy 3475f3a3cd7SVladimir Sementsov-Ogievskiy v = qobject_input_visitor_new_flat_confused(bitmap_qdict, errp); 3485f3a3cd7SVladimir Sementsov-Ogievskiy if (!v) { 3495f3a3cd7SVladimir Sementsov-Ogievskiy goto out; 3505f3a3cd7SVladimir Sementsov-Ogievskiy } 3515f3a3cd7SVladimir Sementsov-Ogievskiy 3525f3a3cd7SVladimir Sementsov-Ogievskiy visit_type_BlockDirtyBitmap(v, NULL, &bmp_param, errp); 3535f3a3cd7SVladimir Sementsov-Ogievskiy if (!bmp_param) { 3545f3a3cd7SVladimir Sementsov-Ogievskiy goto out; 3555f3a3cd7SVladimir Sementsov-Ogievskiy } 3565f3a3cd7SVladimir Sementsov-Ogievskiy 3575f3a3cd7SVladimir Sementsov-Ogievskiy *bitmap = block_dirty_bitmap_lookup(bmp_param->node, bmp_param->name, NULL, 3585f3a3cd7SVladimir Sementsov-Ogievskiy errp); 3595f3a3cd7SVladimir Sementsov-Ogievskiy if (!*bitmap) { 3605f3a3cd7SVladimir Sementsov-Ogievskiy goto out; 3615f3a3cd7SVladimir Sementsov-Ogievskiy } 3625f3a3cd7SVladimir Sementsov-Ogievskiy 3635f3a3cd7SVladimir Sementsov-Ogievskiy ret = true; 3645f3a3cd7SVladimir Sementsov-Ogievskiy 3655f3a3cd7SVladimir Sementsov-Ogievskiy out: 3665f3a3cd7SVladimir Sementsov-Ogievskiy qapi_free_BlockDirtyBitmap(bmp_param); 3675f3a3cd7SVladimir Sementsov-Ogievskiy visit_free(v); 3685f3a3cd7SVladimir Sementsov-Ogievskiy qobject_unref(bitmap_qdict); 3695f3a3cd7SVladimir Sementsov-Ogievskiy 3705f3a3cd7SVladimir Sementsov-Ogievskiy return ret; 3715f3a3cd7SVladimir Sementsov-Ogievskiy } 3725f3a3cd7SVladimir Sementsov-Ogievskiy 373751cec7aSVladimir Sementsov-Ogievskiy static int cbw_open(BlockDriverState *bs, QDict *options, int flags, 374751cec7aSVladimir Sementsov-Ogievskiy Error **errp) 3751f0cacb9SVladimir Sementsov-Ogievskiy { 376fe7ea40cSVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 3775f3a3cd7SVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *bitmap = NULL; 378*af5bcd77SVladimir Sementsov-Ogievskiy int64_t cluster_size; 3791f0cacb9SVladimir Sementsov-Ogievskiy 380f44fd739SVladimir Sementsov-Ogievskiy bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, 381f44fd739SVladimir Sementsov-Ogievskiy BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, 382f44fd739SVladimir Sementsov-Ogievskiy false, errp); 383f44fd739SVladimir Sementsov-Ogievskiy if (!bs->file) { 3841f0cacb9SVladimir Sementsov-Ogievskiy return -EINVAL; 3851f0cacb9SVladimir Sementsov-Ogievskiy } 3861f0cacb9SVladimir Sementsov-Ogievskiy 387f44fd739SVladimir Sementsov-Ogievskiy s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds, 388f44fd739SVladimir Sementsov-Ogievskiy BDRV_CHILD_DATA, false, errp); 389f44fd739SVladimir Sementsov-Ogievskiy if (!s->target) { 3901f0cacb9SVladimir Sementsov-Ogievskiy return -EINVAL; 3911f0cacb9SVladimir Sementsov-Ogievskiy } 3921f0cacb9SVladimir Sementsov-Ogievskiy 3935f3a3cd7SVladimir Sementsov-Ogievskiy if (!cbw_parse_bitmap_option(options, &bitmap, errp)) { 3945f3a3cd7SVladimir Sementsov-Ogievskiy return -EINVAL; 3955f3a3cd7SVladimir Sementsov-Ogievskiy } 3965f3a3cd7SVladimir Sementsov-Ogievskiy 3975a507426SVladimir Sementsov-Ogievskiy bs->total_sectors = bs->file->bs->total_sectors; 3985a507426SVladimir Sementsov-Ogievskiy bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | 3995a507426SVladimir Sementsov-Ogievskiy (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); 4005a507426SVladimir Sementsov-Ogievskiy bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | 4015a507426SVladimir Sementsov-Ogievskiy ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & 4025a507426SVladimir Sementsov-Ogievskiy bs->file->bs->supported_zero_flags); 4035a507426SVladimir Sementsov-Ogievskiy 4045f3a3cd7SVladimir Sementsov-Ogievskiy s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp); 405fe7ea40cSVladimir Sementsov-Ogievskiy if (!s->bcs) { 4061f0cacb9SVladimir Sementsov-Ogievskiy error_prepend(errp, "Cannot create block-copy-state: "); 4071f0cacb9SVladimir Sementsov-Ogievskiy return -EINVAL; 4081f0cacb9SVladimir Sementsov-Ogievskiy } 4091f0cacb9SVladimir Sementsov-Ogievskiy 410*af5bcd77SVladimir Sementsov-Ogievskiy cluster_size = block_copy_cluster_size(s->bcs); 411*af5bcd77SVladimir Sementsov-Ogievskiy 412*af5bcd77SVladimir Sementsov-Ogievskiy s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); 413*af5bcd77SVladimir Sementsov-Ogievskiy if (!s->done_bitmap) { 414*af5bcd77SVladimir Sementsov-Ogievskiy return -EINVAL; 415*af5bcd77SVladimir Sementsov-Ogievskiy } 416*af5bcd77SVladimir Sementsov-Ogievskiy bdrv_disable_dirty_bitmap(s->done_bitmap); 417*af5bcd77SVladimir Sementsov-Ogievskiy 418*af5bcd77SVladimir Sementsov-Ogievskiy /* s->access_bitmap starts equal to bcs bitmap */ 419*af5bcd77SVladimir Sementsov-Ogievskiy s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); 420*af5bcd77SVladimir Sementsov-Ogievskiy if (!s->access_bitmap) { 421*af5bcd77SVladimir Sementsov-Ogievskiy return -EINVAL; 422*af5bcd77SVladimir Sementsov-Ogievskiy } 423*af5bcd77SVladimir Sementsov-Ogievskiy bdrv_disable_dirty_bitmap(s->access_bitmap); 424*af5bcd77SVladimir Sementsov-Ogievskiy bdrv_dirty_bitmap_merge_internal(s->access_bitmap, 425*af5bcd77SVladimir Sementsov-Ogievskiy block_copy_dirty_bitmap(s->bcs), NULL, 426*af5bcd77SVladimir Sementsov-Ogievskiy true); 427*af5bcd77SVladimir Sementsov-Ogievskiy 428*af5bcd77SVladimir Sementsov-Ogievskiy qemu_co_mutex_init(&s->lock); 429*af5bcd77SVladimir Sementsov-Ogievskiy QLIST_INIT(&s->frozen_read_reqs); 430*af5bcd77SVladimir Sementsov-Ogievskiy 4311f0cacb9SVladimir Sementsov-Ogievskiy return 0; 4321f0cacb9SVladimir Sementsov-Ogievskiy } 4331f0cacb9SVladimir Sementsov-Ogievskiy 434751cec7aSVladimir Sementsov-Ogievskiy static void cbw_close(BlockDriverState *bs) 435751cec7aSVladimir Sementsov-Ogievskiy { 436751cec7aSVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *s = bs->opaque; 437751cec7aSVladimir Sementsov-Ogievskiy 438*af5bcd77SVladimir Sementsov-Ogievskiy bdrv_release_dirty_bitmap(s->access_bitmap); 439*af5bcd77SVladimir Sementsov-Ogievskiy bdrv_release_dirty_bitmap(s->done_bitmap); 440*af5bcd77SVladimir Sementsov-Ogievskiy 441751cec7aSVladimir Sementsov-Ogievskiy block_copy_state_free(s->bcs); 442751cec7aSVladimir Sementsov-Ogievskiy s->bcs = NULL; 443751cec7aSVladimir Sementsov-Ogievskiy } 444751cec7aSVladimir Sementsov-Ogievskiy 445d003e0aeSVladimir Sementsov-Ogievskiy BlockDriver bdrv_cbw_filter = { 446d003e0aeSVladimir Sementsov-Ogievskiy .format_name = "copy-before-write", 447d003e0aeSVladimir Sementsov-Ogievskiy .instance_size = sizeof(BDRVCopyBeforeWriteState), 448d003e0aeSVladimir Sementsov-Ogievskiy 449751cec7aSVladimir Sementsov-Ogievskiy .bdrv_open = cbw_open, 450751cec7aSVladimir Sementsov-Ogievskiy .bdrv_close = cbw_close, 451751cec7aSVladimir Sementsov-Ogievskiy 452d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_co_preadv = cbw_co_preadv, 453d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_co_pwritev = cbw_co_pwritev, 454d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_co_pwrite_zeroes = cbw_co_pwrite_zeroes, 455d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_co_pdiscard = cbw_co_pdiscard, 456d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_co_flush = cbw_co_flush, 457d003e0aeSVladimir Sementsov-Ogievskiy 458*af5bcd77SVladimir Sementsov-Ogievskiy .bdrv_co_preadv_snapshot = cbw_co_preadv_snapshot, 459*af5bcd77SVladimir Sementsov-Ogievskiy .bdrv_co_pdiscard_snapshot = cbw_co_pdiscard_snapshot, 460*af5bcd77SVladimir Sementsov-Ogievskiy .bdrv_co_snapshot_block_status = cbw_co_snapshot_block_status, 461*af5bcd77SVladimir Sementsov-Ogievskiy 462d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_refresh_filename = cbw_refresh_filename, 463d003e0aeSVladimir Sementsov-Ogievskiy 464d003e0aeSVladimir Sementsov-Ogievskiy .bdrv_child_perm = cbw_child_perm, 465d003e0aeSVladimir Sementsov-Ogievskiy 466d003e0aeSVladimir Sementsov-Ogievskiy .is_filter = true, 467d003e0aeSVladimir Sementsov-Ogievskiy }; 468d003e0aeSVladimir Sementsov-Ogievskiy 469d003e0aeSVladimir Sementsov-Ogievskiy BlockDriverState *bdrv_cbw_append(BlockDriverState *source, 470d003e0aeSVladimir Sementsov-Ogievskiy BlockDriverState *target, 471d003e0aeSVladimir Sementsov-Ogievskiy const char *filter_node_name, 472d003e0aeSVladimir Sementsov-Ogievskiy BlockCopyState **bcs, 473d003e0aeSVladimir Sementsov-Ogievskiy Error **errp) 474d003e0aeSVladimir Sementsov-Ogievskiy { 475d003e0aeSVladimir Sementsov-Ogievskiy ERRP_GUARD(); 476d003e0aeSVladimir Sementsov-Ogievskiy BDRVCopyBeforeWriteState *state; 477d003e0aeSVladimir Sementsov-Ogievskiy BlockDriverState *top; 478f44fd739SVladimir Sementsov-Ogievskiy QDict *opts; 479d003e0aeSVladimir Sementsov-Ogievskiy 480d003e0aeSVladimir Sementsov-Ogievskiy assert(source->total_sectors == target->total_sectors); 481377cc15bSEmanuele Giuseppe Esposito GLOBAL_STATE_CODE(); 482d003e0aeSVladimir Sementsov-Ogievskiy 483f44fd739SVladimir Sementsov-Ogievskiy opts = qdict_new(); 484751cec7aSVladimir Sementsov-Ogievskiy qdict_put_str(opts, "driver", "copy-before-write"); 485751cec7aSVladimir Sementsov-Ogievskiy if (filter_node_name) { 486751cec7aSVladimir Sementsov-Ogievskiy qdict_put_str(opts, "node-name", filter_node_name); 487751cec7aSVladimir Sementsov-Ogievskiy } 488f44fd739SVladimir Sementsov-Ogievskiy qdict_put_str(opts, "file", bdrv_get_node_name(source)); 489f44fd739SVladimir Sementsov-Ogievskiy qdict_put_str(opts, "target", bdrv_get_node_name(target)); 490f44fd739SVladimir Sementsov-Ogievskiy 491751cec7aSVladimir Sementsov-Ogievskiy top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp); 492751cec7aSVladimir Sementsov-Ogievskiy if (!top) { 493751cec7aSVladimir Sementsov-Ogievskiy return NULL; 494d003e0aeSVladimir Sementsov-Ogievskiy } 495d003e0aeSVladimir Sementsov-Ogievskiy 496751cec7aSVladimir Sementsov-Ogievskiy state = top->opaque; 4977ddbce2dSVladimir Sementsov-Ogievskiy *bcs = state->bcs; 498d003e0aeSVladimir Sementsov-Ogievskiy 499d003e0aeSVladimir Sementsov-Ogievskiy return top; 500d003e0aeSVladimir Sementsov-Ogievskiy } 501d003e0aeSVladimir Sementsov-Ogievskiy 502d003e0aeSVladimir Sementsov-Ogievskiy void bdrv_cbw_drop(BlockDriverState *bs) 503d003e0aeSVladimir Sementsov-Ogievskiy { 504377cc15bSEmanuele Giuseppe Esposito GLOBAL_STATE_CODE(); 505d003e0aeSVladimir Sementsov-Ogievskiy bdrv_drop_filter(bs, &error_abort); 506d003e0aeSVladimir Sementsov-Ogievskiy bdrv_unref(bs); 507d003e0aeSVladimir Sementsov-Ogievskiy } 508751cec7aSVladimir Sementsov-Ogievskiy 509751cec7aSVladimir Sementsov-Ogievskiy static void cbw_init(void) 510751cec7aSVladimir Sementsov-Ogievskiy { 511751cec7aSVladimir Sementsov-Ogievskiy bdrv_register(&bdrv_cbw_filter); 512751cec7aSVladimir Sementsov-Ogievskiy } 513751cec7aSVladimir Sementsov-Ogievskiy 514751cec7aSVladimir Sementsov-Ogievskiy block_init(cbw_init); 515