xref: /openbmc/qemu/block/copy-before-write.c (revision af5bcd775f3115e4c1b7715920a67b31b119de30)
1d003e0aeSVladimir Sementsov-Ogievskiy /*
2d003e0aeSVladimir Sementsov-Ogievskiy  * copy-before-write filter driver
3d003e0aeSVladimir Sementsov-Ogievskiy  *
4d003e0aeSVladimir Sementsov-Ogievskiy  * The driver performs Copy-Before-Write (CBW) operation: it is injected above
5d003e0aeSVladimir Sementsov-Ogievskiy  * some node, and before each write it copies _old_ data to the target node.
6d003e0aeSVladimir Sementsov-Ogievskiy  *
7d003e0aeSVladimir Sementsov-Ogievskiy  * Copyright (c) 2018-2021 Virtuozzo International GmbH.
8d003e0aeSVladimir Sementsov-Ogievskiy  *
9d003e0aeSVladimir Sementsov-Ogievskiy  * Author:
10d003e0aeSVladimir Sementsov-Ogievskiy  *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
11d003e0aeSVladimir Sementsov-Ogievskiy  *
12d003e0aeSVladimir Sementsov-Ogievskiy  * This program is free software; you can redistribute it and/or modify
13d003e0aeSVladimir Sementsov-Ogievskiy  * it under the terms of the GNU General Public License as published by
14d003e0aeSVladimir Sementsov-Ogievskiy  * the Free Software Foundation; either version 2 of the License, or
15d003e0aeSVladimir Sementsov-Ogievskiy  * (at your option) any later version.
16d003e0aeSVladimir Sementsov-Ogievskiy  *
17d003e0aeSVladimir Sementsov-Ogievskiy  * This program is distributed in the hope that it will be useful,
18d003e0aeSVladimir Sementsov-Ogievskiy  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19d003e0aeSVladimir Sementsov-Ogievskiy  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20d003e0aeSVladimir Sementsov-Ogievskiy  * GNU General Public License for more details.
21d003e0aeSVladimir Sementsov-Ogievskiy  *
22d003e0aeSVladimir Sementsov-Ogievskiy  * You should have received a copy of the GNU General Public License
23d003e0aeSVladimir Sementsov-Ogievskiy  * along with this program. If not, see <http://www.gnu.org/licenses/>.
24d003e0aeSVladimir Sementsov-Ogievskiy  */
25d003e0aeSVladimir Sementsov-Ogievskiy 
26d003e0aeSVladimir Sementsov-Ogievskiy #include "qemu/osdep.h"
27d003e0aeSVladimir Sementsov-Ogievskiy 
28d003e0aeSVladimir Sementsov-Ogievskiy #include "sysemu/block-backend.h"
29d003e0aeSVladimir Sementsov-Ogievskiy #include "qemu/cutils.h"
30d003e0aeSVladimir Sementsov-Ogievskiy #include "qapi/error.h"
31d003e0aeSVladimir Sementsov-Ogievskiy #include "block/block_int.h"
32d003e0aeSVladimir Sementsov-Ogievskiy #include "block/qdict.h"
33d003e0aeSVladimir Sementsov-Ogievskiy #include "block/block-copy.h"
34d003e0aeSVladimir Sementsov-Ogievskiy 
35d003e0aeSVladimir Sementsov-Ogievskiy #include "block/copy-before-write.h"
36*af5bcd77SVladimir Sementsov-Ogievskiy #include "block/reqlist.h"
37d003e0aeSVladimir Sementsov-Ogievskiy 
385f3a3cd7SVladimir Sementsov-Ogievskiy #include "qapi/qapi-visit-block-core.h"
395f3a3cd7SVladimir Sementsov-Ogievskiy 
40d003e0aeSVladimir Sementsov-Ogievskiy typedef struct BDRVCopyBeforeWriteState {
41d003e0aeSVladimir Sementsov-Ogievskiy     BlockCopyState *bcs;
42d003e0aeSVladimir Sementsov-Ogievskiy     BdrvChild *target;
43*af5bcd77SVladimir Sementsov-Ogievskiy 
44*af5bcd77SVladimir Sementsov-Ogievskiy     /*
45*af5bcd77SVladimir Sementsov-Ogievskiy      * @lock: protects access to @access_bitmap, @done_bitmap and
46*af5bcd77SVladimir Sementsov-Ogievskiy      * @frozen_read_reqs
47*af5bcd77SVladimir Sementsov-Ogievskiy      */
48*af5bcd77SVladimir Sementsov-Ogievskiy     CoMutex lock;
49*af5bcd77SVladimir Sementsov-Ogievskiy 
50*af5bcd77SVladimir Sementsov-Ogievskiy     /*
51*af5bcd77SVladimir Sementsov-Ogievskiy      * @access_bitmap: represents areas allowed for reading by fleecing user.
52*af5bcd77SVladimir Sementsov-Ogievskiy      * Reading from non-dirty areas leads to -EACCES.
53*af5bcd77SVladimir Sementsov-Ogievskiy      */
54*af5bcd77SVladimir Sementsov-Ogievskiy     BdrvDirtyBitmap *access_bitmap;
55*af5bcd77SVladimir Sementsov-Ogievskiy 
56*af5bcd77SVladimir Sementsov-Ogievskiy     /*
57*af5bcd77SVladimir Sementsov-Ogievskiy      * @done_bitmap: represents areas that was successfully copied to @target by
58*af5bcd77SVladimir Sementsov-Ogievskiy      * copy-before-write operations.
59*af5bcd77SVladimir Sementsov-Ogievskiy      */
60*af5bcd77SVladimir Sementsov-Ogievskiy     BdrvDirtyBitmap *done_bitmap;
61*af5bcd77SVladimir Sementsov-Ogievskiy 
62*af5bcd77SVladimir Sementsov-Ogievskiy     /*
63*af5bcd77SVladimir Sementsov-Ogievskiy      * @frozen_read_reqs: current read requests for fleecing user in bs->file
64*af5bcd77SVladimir Sementsov-Ogievskiy      * node. These areas must not be rewritten by guest.
65*af5bcd77SVladimir Sementsov-Ogievskiy      */
66*af5bcd77SVladimir Sementsov-Ogievskiy     BlockReqList frozen_read_reqs;
67d003e0aeSVladimir Sementsov-Ogievskiy } BDRVCopyBeforeWriteState;
68d003e0aeSVladimir Sementsov-Ogievskiy 
69d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_co_preadv(
70f7ef38ddSVladimir Sementsov-Ogievskiy         BlockDriverState *bs, int64_t offset, int64_t bytes,
71f7ef38ddSVladimir Sementsov-Ogievskiy         QEMUIOVector *qiov, BdrvRequestFlags flags)
72d003e0aeSVladimir Sementsov-Ogievskiy {
733c1e6327SVladimir Sementsov-Ogievskiy     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
74d003e0aeSVladimir Sementsov-Ogievskiy }
75d003e0aeSVladimir Sementsov-Ogievskiy 
76*af5bcd77SVladimir Sementsov-Ogievskiy /*
77*af5bcd77SVladimir Sementsov-Ogievskiy  * Do copy-before-write operation.
78*af5bcd77SVladimir Sementsov-Ogievskiy  *
79*af5bcd77SVladimir Sementsov-Ogievskiy  * On failure guest request must be failed too.
80*af5bcd77SVladimir Sementsov-Ogievskiy  *
81*af5bcd77SVladimir Sementsov-Ogievskiy  * On success, we also wait for all in-flight fleecing read requests in source
82*af5bcd77SVladimir Sementsov-Ogievskiy  * node, and it's guaranteed that after cbw_do_copy_before_write() successful
83*af5bcd77SVladimir Sementsov-Ogievskiy  * return there are no such requests and they will never appear.
84*af5bcd77SVladimir Sementsov-Ogievskiy  */
85d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
86d003e0aeSVladimir Sementsov-Ogievskiy         uint64_t offset, uint64_t bytes, BdrvRequestFlags flags)
87d003e0aeSVladimir Sementsov-Ogievskiy {
88d003e0aeSVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
89*af5bcd77SVladimir Sementsov-Ogievskiy     int ret;
90d003e0aeSVladimir Sementsov-Ogievskiy     uint64_t off, end;
91b518e9e9SVladimir Sementsov-Ogievskiy     int64_t cluster_size = block_copy_cluster_size(s->bcs);
92d003e0aeSVladimir Sementsov-Ogievskiy 
93d003e0aeSVladimir Sementsov-Ogievskiy     if (flags & BDRV_REQ_WRITE_UNCHANGED) {
94d003e0aeSVladimir Sementsov-Ogievskiy         return 0;
95d003e0aeSVladimir Sementsov-Ogievskiy     }
96d003e0aeSVladimir Sementsov-Ogievskiy 
97b518e9e9SVladimir Sementsov-Ogievskiy     off = QEMU_ALIGN_DOWN(offset, cluster_size);
98b518e9e9SVladimir Sementsov-Ogievskiy     end = QEMU_ALIGN_UP(offset + bytes, cluster_size);
99d003e0aeSVladimir Sementsov-Ogievskiy 
100*af5bcd77SVladimir Sementsov-Ogievskiy     ret = block_copy(s->bcs, off, end - off, true);
101*af5bcd77SVladimir Sementsov-Ogievskiy     if (ret < 0) {
102*af5bcd77SVladimir Sementsov-Ogievskiy         return ret;
103*af5bcd77SVladimir Sementsov-Ogievskiy     }
104*af5bcd77SVladimir Sementsov-Ogievskiy 
105*af5bcd77SVladimir Sementsov-Ogievskiy     WITH_QEMU_LOCK_GUARD(&s->lock) {
106*af5bcd77SVladimir Sementsov-Ogievskiy         bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
107*af5bcd77SVladimir Sementsov-Ogievskiy         reqlist_wait_all(&s->frozen_read_reqs, off, end - off, &s->lock);
108*af5bcd77SVladimir Sementsov-Ogievskiy     }
109*af5bcd77SVladimir Sementsov-Ogievskiy 
110*af5bcd77SVladimir Sementsov-Ogievskiy     return 0;
111d003e0aeSVladimir Sementsov-Ogievskiy }
112d003e0aeSVladimir Sementsov-Ogievskiy 
113d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs,
1140c802287SVladimir Sementsov-Ogievskiy                                         int64_t offset, int64_t bytes)
115d003e0aeSVladimir Sementsov-Ogievskiy {
116d003e0aeSVladimir Sementsov-Ogievskiy     int ret = cbw_do_copy_before_write(bs, offset, bytes, 0);
117d003e0aeSVladimir Sementsov-Ogievskiy     if (ret < 0) {
118d003e0aeSVladimir Sementsov-Ogievskiy         return ret;
119d003e0aeSVladimir Sementsov-Ogievskiy     }
120d003e0aeSVladimir Sementsov-Ogievskiy 
1213c1e6327SVladimir Sementsov-Ogievskiy     return bdrv_co_pdiscard(bs->file, offset, bytes);
122d003e0aeSVladimir Sementsov-Ogievskiy }
123d003e0aeSVladimir Sementsov-Ogievskiy 
124d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs,
125f34b2bcfSVladimir Sementsov-Ogievskiy         int64_t offset, int64_t bytes, BdrvRequestFlags flags)
126d003e0aeSVladimir Sementsov-Ogievskiy {
127d003e0aeSVladimir Sementsov-Ogievskiy     int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
128d003e0aeSVladimir Sementsov-Ogievskiy     if (ret < 0) {
129d003e0aeSVladimir Sementsov-Ogievskiy         return ret;
130d003e0aeSVladimir Sementsov-Ogievskiy     }
131d003e0aeSVladimir Sementsov-Ogievskiy 
1323c1e6327SVladimir Sementsov-Ogievskiy     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
133d003e0aeSVladimir Sementsov-Ogievskiy }
134d003e0aeSVladimir Sementsov-Ogievskiy 
135d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs,
136e75abedaSVladimir Sementsov-Ogievskiy                                        int64_t offset,
137e75abedaSVladimir Sementsov-Ogievskiy                                        int64_t bytes,
138e75abedaSVladimir Sementsov-Ogievskiy                                        QEMUIOVector *qiov,
139e75abedaSVladimir Sementsov-Ogievskiy                                        BdrvRequestFlags flags)
140d003e0aeSVladimir Sementsov-Ogievskiy {
141d003e0aeSVladimir Sementsov-Ogievskiy     int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
142d003e0aeSVladimir Sementsov-Ogievskiy     if (ret < 0) {
143d003e0aeSVladimir Sementsov-Ogievskiy         return ret;
144d003e0aeSVladimir Sementsov-Ogievskiy     }
145d003e0aeSVladimir Sementsov-Ogievskiy 
1463c1e6327SVladimir Sementsov-Ogievskiy     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
147d003e0aeSVladimir Sementsov-Ogievskiy }
148d003e0aeSVladimir Sementsov-Ogievskiy 
149d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_flush(BlockDriverState *bs)
150d003e0aeSVladimir Sementsov-Ogievskiy {
1513c1e6327SVladimir Sementsov-Ogievskiy     if (!bs->file) {
152d003e0aeSVladimir Sementsov-Ogievskiy         return 0;
153d003e0aeSVladimir Sementsov-Ogievskiy     }
154d003e0aeSVladimir Sementsov-Ogievskiy 
1553c1e6327SVladimir Sementsov-Ogievskiy     return bdrv_co_flush(bs->file->bs);
156d003e0aeSVladimir Sementsov-Ogievskiy }
157d003e0aeSVladimir Sementsov-Ogievskiy 
158*af5bcd77SVladimir Sementsov-Ogievskiy /*
159*af5bcd77SVladimir Sementsov-Ogievskiy  * If @offset not accessible - return NULL.
160*af5bcd77SVladimir Sementsov-Ogievskiy  *
161*af5bcd77SVladimir Sementsov-Ogievskiy  * Otherwise, set @pnum to some bytes that accessible from @file (@file is set
162*af5bcd77SVladimir Sementsov-Ogievskiy  * to bs->file or to s->target). Return newly allocated BlockReq object that
163*af5bcd77SVladimir Sementsov-Ogievskiy  * should be than passed to cbw_snapshot_read_unlock().
164*af5bcd77SVladimir Sementsov-Ogievskiy  *
165*af5bcd77SVladimir Sementsov-Ogievskiy  * It's guaranteed that guest writes will not interact in the region until
166*af5bcd77SVladimir Sementsov-Ogievskiy  * cbw_snapshot_read_unlock() called.
167*af5bcd77SVladimir Sementsov-Ogievskiy  */
168*af5bcd77SVladimir Sementsov-Ogievskiy static BlockReq *cbw_snapshot_read_lock(BlockDriverState *bs,
169*af5bcd77SVladimir Sementsov-Ogievskiy                                         int64_t offset, int64_t bytes,
170*af5bcd77SVladimir Sementsov-Ogievskiy                                         int64_t *pnum, BdrvChild **file)
171*af5bcd77SVladimir Sementsov-Ogievskiy {
172*af5bcd77SVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
173*af5bcd77SVladimir Sementsov-Ogievskiy     BlockReq *req = g_new(BlockReq, 1);
174*af5bcd77SVladimir Sementsov-Ogievskiy     bool done;
175*af5bcd77SVladimir Sementsov-Ogievskiy 
176*af5bcd77SVladimir Sementsov-Ogievskiy     QEMU_LOCK_GUARD(&s->lock);
177*af5bcd77SVladimir Sementsov-Ogievskiy 
178*af5bcd77SVladimir Sementsov-Ogievskiy     if (bdrv_dirty_bitmap_next_zero(s->access_bitmap, offset, bytes) != -1) {
179*af5bcd77SVladimir Sementsov-Ogievskiy         g_free(req);
180*af5bcd77SVladimir Sementsov-Ogievskiy         return NULL;
181*af5bcd77SVladimir Sementsov-Ogievskiy     }
182*af5bcd77SVladimir Sementsov-Ogievskiy 
183*af5bcd77SVladimir Sementsov-Ogievskiy     done = bdrv_dirty_bitmap_status(s->done_bitmap, offset, bytes, pnum);
184*af5bcd77SVladimir Sementsov-Ogievskiy     if (done) {
185*af5bcd77SVladimir Sementsov-Ogievskiy         /*
186*af5bcd77SVladimir Sementsov-Ogievskiy          * Special invalid BlockReq, that is handled in
187*af5bcd77SVladimir Sementsov-Ogievskiy          * cbw_snapshot_read_unlock(). We don't need to lock something to read
188*af5bcd77SVladimir Sementsov-Ogievskiy          * from s->target.
189*af5bcd77SVladimir Sementsov-Ogievskiy          */
190*af5bcd77SVladimir Sementsov-Ogievskiy         *req = (BlockReq) {.offset = -1, .bytes = -1};
191*af5bcd77SVladimir Sementsov-Ogievskiy         *file = s->target;
192*af5bcd77SVladimir Sementsov-Ogievskiy     } else {
193*af5bcd77SVladimir Sementsov-Ogievskiy         reqlist_init_req(&s->frozen_read_reqs, req, offset, bytes);
194*af5bcd77SVladimir Sementsov-Ogievskiy         *file = bs->file;
195*af5bcd77SVladimir Sementsov-Ogievskiy     }
196*af5bcd77SVladimir Sementsov-Ogievskiy 
197*af5bcd77SVladimir Sementsov-Ogievskiy     return req;
198*af5bcd77SVladimir Sementsov-Ogievskiy }
199*af5bcd77SVladimir Sementsov-Ogievskiy 
200*af5bcd77SVladimir Sementsov-Ogievskiy static void cbw_snapshot_read_unlock(BlockDriverState *bs, BlockReq *req)
201*af5bcd77SVladimir Sementsov-Ogievskiy {
202*af5bcd77SVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
203*af5bcd77SVladimir Sementsov-Ogievskiy 
204*af5bcd77SVladimir Sementsov-Ogievskiy     if (req->offset == -1 && req->bytes == -1) {
205*af5bcd77SVladimir Sementsov-Ogievskiy         g_free(req);
206*af5bcd77SVladimir Sementsov-Ogievskiy         return;
207*af5bcd77SVladimir Sementsov-Ogievskiy     }
208*af5bcd77SVladimir Sementsov-Ogievskiy 
209*af5bcd77SVladimir Sementsov-Ogievskiy     QEMU_LOCK_GUARD(&s->lock);
210*af5bcd77SVladimir Sementsov-Ogievskiy 
211*af5bcd77SVladimir Sementsov-Ogievskiy     reqlist_remove_req(req);
212*af5bcd77SVladimir Sementsov-Ogievskiy     g_free(req);
213*af5bcd77SVladimir Sementsov-Ogievskiy }
214*af5bcd77SVladimir Sementsov-Ogievskiy 
215*af5bcd77SVladimir Sementsov-Ogievskiy static coroutine_fn int
216*af5bcd77SVladimir Sementsov-Ogievskiy cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes,
217*af5bcd77SVladimir Sementsov-Ogievskiy                        QEMUIOVector *qiov, size_t qiov_offset)
218*af5bcd77SVladimir Sementsov-Ogievskiy {
219*af5bcd77SVladimir Sementsov-Ogievskiy     BlockReq *req;
220*af5bcd77SVladimir Sementsov-Ogievskiy     BdrvChild *file;
221*af5bcd77SVladimir Sementsov-Ogievskiy     int ret;
222*af5bcd77SVladimir Sementsov-Ogievskiy 
223*af5bcd77SVladimir Sementsov-Ogievskiy     /* TODO: upgrade to async loop using AioTask */
224*af5bcd77SVladimir Sementsov-Ogievskiy     while (bytes) {
225*af5bcd77SVladimir Sementsov-Ogievskiy         int64_t cur_bytes;
226*af5bcd77SVladimir Sementsov-Ogievskiy 
227*af5bcd77SVladimir Sementsov-Ogievskiy         req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &file);
228*af5bcd77SVladimir Sementsov-Ogievskiy         if (!req) {
229*af5bcd77SVladimir Sementsov-Ogievskiy             return -EACCES;
230*af5bcd77SVladimir Sementsov-Ogievskiy         }
231*af5bcd77SVladimir Sementsov-Ogievskiy 
232*af5bcd77SVladimir Sementsov-Ogievskiy         ret = bdrv_co_preadv_part(file, offset, cur_bytes,
233*af5bcd77SVladimir Sementsov-Ogievskiy                                   qiov, qiov_offset, 0);
234*af5bcd77SVladimir Sementsov-Ogievskiy         cbw_snapshot_read_unlock(bs, req);
235*af5bcd77SVladimir Sementsov-Ogievskiy         if (ret < 0) {
236*af5bcd77SVladimir Sementsov-Ogievskiy             return ret;
237*af5bcd77SVladimir Sementsov-Ogievskiy         }
238*af5bcd77SVladimir Sementsov-Ogievskiy 
239*af5bcd77SVladimir Sementsov-Ogievskiy         bytes -= cur_bytes;
240*af5bcd77SVladimir Sementsov-Ogievskiy         offset += cur_bytes;
241*af5bcd77SVladimir Sementsov-Ogievskiy         qiov_offset += cur_bytes;
242*af5bcd77SVladimir Sementsov-Ogievskiy     }
243*af5bcd77SVladimir Sementsov-Ogievskiy 
244*af5bcd77SVladimir Sementsov-Ogievskiy     return 0;
245*af5bcd77SVladimir Sementsov-Ogievskiy }
246*af5bcd77SVladimir Sementsov-Ogievskiy 
247*af5bcd77SVladimir Sementsov-Ogievskiy static int coroutine_fn
248*af5bcd77SVladimir Sementsov-Ogievskiy cbw_co_snapshot_block_status(BlockDriverState *bs,
249*af5bcd77SVladimir Sementsov-Ogievskiy                              bool want_zero, int64_t offset, int64_t bytes,
250*af5bcd77SVladimir Sementsov-Ogievskiy                              int64_t *pnum, int64_t *map,
251*af5bcd77SVladimir Sementsov-Ogievskiy                              BlockDriverState **file)
252*af5bcd77SVladimir Sementsov-Ogievskiy {
253*af5bcd77SVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
254*af5bcd77SVladimir Sementsov-Ogievskiy     BlockReq *req;
255*af5bcd77SVladimir Sementsov-Ogievskiy     int ret;
256*af5bcd77SVladimir Sementsov-Ogievskiy     int64_t cur_bytes;
257*af5bcd77SVladimir Sementsov-Ogievskiy     BdrvChild *child;
258*af5bcd77SVladimir Sementsov-Ogievskiy 
259*af5bcd77SVladimir Sementsov-Ogievskiy     req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &child);
260*af5bcd77SVladimir Sementsov-Ogievskiy     if (!req) {
261*af5bcd77SVladimir Sementsov-Ogievskiy         return -EACCES;
262*af5bcd77SVladimir Sementsov-Ogievskiy     }
263*af5bcd77SVladimir Sementsov-Ogievskiy 
264*af5bcd77SVladimir Sementsov-Ogievskiy     ret = bdrv_block_status(child->bs, offset, cur_bytes, pnum, map, file);
265*af5bcd77SVladimir Sementsov-Ogievskiy     if (child == s->target) {
266*af5bcd77SVladimir Sementsov-Ogievskiy         /*
267*af5bcd77SVladimir Sementsov-Ogievskiy          * We refer to s->target only for areas that we've written to it.
268*af5bcd77SVladimir Sementsov-Ogievskiy          * And we can not report unallocated blocks in s->target: this will
269*af5bcd77SVladimir Sementsov-Ogievskiy          * break generic block-status-above logic, that will go to
270*af5bcd77SVladimir Sementsov-Ogievskiy          * copy-before-write filtered child in this case.
271*af5bcd77SVladimir Sementsov-Ogievskiy          */
272*af5bcd77SVladimir Sementsov-Ogievskiy         assert(ret & BDRV_BLOCK_ALLOCATED);
273*af5bcd77SVladimir Sementsov-Ogievskiy     }
274*af5bcd77SVladimir Sementsov-Ogievskiy 
275*af5bcd77SVladimir Sementsov-Ogievskiy     cbw_snapshot_read_unlock(bs, req);
276*af5bcd77SVladimir Sementsov-Ogievskiy 
277*af5bcd77SVladimir Sementsov-Ogievskiy     return ret;
278*af5bcd77SVladimir Sementsov-Ogievskiy }
279*af5bcd77SVladimir Sementsov-Ogievskiy 
280*af5bcd77SVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pdiscard_snapshot(BlockDriverState *bs,
281*af5bcd77SVladimir Sementsov-Ogievskiy                                                  int64_t offset, int64_t bytes)
282*af5bcd77SVladimir Sementsov-Ogievskiy {
283*af5bcd77SVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
284*af5bcd77SVladimir Sementsov-Ogievskiy 
285*af5bcd77SVladimir Sementsov-Ogievskiy     WITH_QEMU_LOCK_GUARD(&s->lock) {
286*af5bcd77SVladimir Sementsov-Ogievskiy         bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
287*af5bcd77SVladimir Sementsov-Ogievskiy     }
288*af5bcd77SVladimir Sementsov-Ogievskiy 
289*af5bcd77SVladimir Sementsov-Ogievskiy     block_copy_reset(s->bcs, offset, bytes);
290*af5bcd77SVladimir Sementsov-Ogievskiy 
291*af5bcd77SVladimir Sementsov-Ogievskiy     return bdrv_co_pdiscard(s->target, offset, bytes);
292*af5bcd77SVladimir Sementsov-Ogievskiy }
293*af5bcd77SVladimir Sementsov-Ogievskiy 
294d003e0aeSVladimir Sementsov-Ogievskiy static void cbw_refresh_filename(BlockDriverState *bs)
295d003e0aeSVladimir Sementsov-Ogievskiy {
296d003e0aeSVladimir Sementsov-Ogievskiy     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
2973c1e6327SVladimir Sementsov-Ogievskiy             bs->file->bs->filename);
298d003e0aeSVladimir Sementsov-Ogievskiy }
299d003e0aeSVladimir Sementsov-Ogievskiy 
300d003e0aeSVladimir Sementsov-Ogievskiy static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
301d003e0aeSVladimir Sementsov-Ogievskiy                            BdrvChildRole role,
302d003e0aeSVladimir Sementsov-Ogievskiy                            BlockReopenQueue *reopen_queue,
303d003e0aeSVladimir Sementsov-Ogievskiy                            uint64_t perm, uint64_t shared,
304d003e0aeSVladimir Sementsov-Ogievskiy                            uint64_t *nperm, uint64_t *nshared)
305d003e0aeSVladimir Sementsov-Ogievskiy {
306d003e0aeSVladimir Sementsov-Ogievskiy     if (!(role & BDRV_CHILD_FILTERED)) {
307d003e0aeSVladimir Sementsov-Ogievskiy         /*
308d003e0aeSVladimir Sementsov-Ogievskiy          * Target child
309d003e0aeSVladimir Sementsov-Ogievskiy          *
310d003e0aeSVladimir Sementsov-Ogievskiy          * Share write to target (child_file), to not interfere
311d003e0aeSVladimir Sementsov-Ogievskiy          * with guest writes to its disk which may be in target backing chain.
312d003e0aeSVladimir Sementsov-Ogievskiy          * Can't resize during a backup block job because we check the size
313d003e0aeSVladimir Sementsov-Ogievskiy          * only upfront.
314d003e0aeSVladimir Sementsov-Ogievskiy          */
315d003e0aeSVladimir Sementsov-Ogievskiy         *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
316d003e0aeSVladimir Sementsov-Ogievskiy         *nperm = BLK_PERM_WRITE;
317d003e0aeSVladimir Sementsov-Ogievskiy     } else {
318d003e0aeSVladimir Sementsov-Ogievskiy         /* Source child */
319d003e0aeSVladimir Sementsov-Ogievskiy         bdrv_default_perms(bs, c, role, reopen_queue,
320d003e0aeSVladimir Sementsov-Ogievskiy                            perm, shared, nperm, nshared);
321d003e0aeSVladimir Sementsov-Ogievskiy 
3223860c020SVladimir Sementsov-Ogievskiy         if (!QLIST_EMPTY(&bs->parents)) {
323d003e0aeSVladimir Sementsov-Ogievskiy             if (perm & BLK_PERM_WRITE) {
324d003e0aeSVladimir Sementsov-Ogievskiy                 *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
325d003e0aeSVladimir Sementsov-Ogievskiy             }
326d003e0aeSVladimir Sementsov-Ogievskiy             *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
327d003e0aeSVladimir Sementsov-Ogievskiy         }
328d003e0aeSVladimir Sementsov-Ogievskiy     }
3293860c020SVladimir Sementsov-Ogievskiy }
330d003e0aeSVladimir Sementsov-Ogievskiy 
3315f3a3cd7SVladimir Sementsov-Ogievskiy static bool cbw_parse_bitmap_option(QDict *options, BdrvDirtyBitmap **bitmap,
3325f3a3cd7SVladimir Sementsov-Ogievskiy                                     Error **errp)
3335f3a3cd7SVladimir Sementsov-Ogievskiy {
3345f3a3cd7SVladimir Sementsov-Ogievskiy     QDict *bitmap_qdict = NULL;
3355f3a3cd7SVladimir Sementsov-Ogievskiy     BlockDirtyBitmap *bmp_param = NULL;
3365f3a3cd7SVladimir Sementsov-Ogievskiy     Visitor *v = NULL;
3375f3a3cd7SVladimir Sementsov-Ogievskiy     bool ret = false;
3385f3a3cd7SVladimir Sementsov-Ogievskiy 
3395f3a3cd7SVladimir Sementsov-Ogievskiy     *bitmap = NULL;
3405f3a3cd7SVladimir Sementsov-Ogievskiy 
3415f3a3cd7SVladimir Sementsov-Ogievskiy     qdict_extract_subqdict(options, &bitmap_qdict, "bitmap.");
3425f3a3cd7SVladimir Sementsov-Ogievskiy     if (!qdict_size(bitmap_qdict)) {
3435f3a3cd7SVladimir Sementsov-Ogievskiy         ret = true;
3445f3a3cd7SVladimir Sementsov-Ogievskiy         goto out;
3455f3a3cd7SVladimir Sementsov-Ogievskiy     }
3465f3a3cd7SVladimir Sementsov-Ogievskiy 
3475f3a3cd7SVladimir Sementsov-Ogievskiy     v = qobject_input_visitor_new_flat_confused(bitmap_qdict, errp);
3485f3a3cd7SVladimir Sementsov-Ogievskiy     if (!v) {
3495f3a3cd7SVladimir Sementsov-Ogievskiy         goto out;
3505f3a3cd7SVladimir Sementsov-Ogievskiy     }
3515f3a3cd7SVladimir Sementsov-Ogievskiy 
3525f3a3cd7SVladimir Sementsov-Ogievskiy     visit_type_BlockDirtyBitmap(v, NULL, &bmp_param, errp);
3535f3a3cd7SVladimir Sementsov-Ogievskiy     if (!bmp_param) {
3545f3a3cd7SVladimir Sementsov-Ogievskiy         goto out;
3555f3a3cd7SVladimir Sementsov-Ogievskiy     }
3565f3a3cd7SVladimir Sementsov-Ogievskiy 
3575f3a3cd7SVladimir Sementsov-Ogievskiy     *bitmap = block_dirty_bitmap_lookup(bmp_param->node, bmp_param->name, NULL,
3585f3a3cd7SVladimir Sementsov-Ogievskiy                                         errp);
3595f3a3cd7SVladimir Sementsov-Ogievskiy     if (!*bitmap) {
3605f3a3cd7SVladimir Sementsov-Ogievskiy         goto out;
3615f3a3cd7SVladimir Sementsov-Ogievskiy     }
3625f3a3cd7SVladimir Sementsov-Ogievskiy 
3635f3a3cd7SVladimir Sementsov-Ogievskiy     ret = true;
3645f3a3cd7SVladimir Sementsov-Ogievskiy 
3655f3a3cd7SVladimir Sementsov-Ogievskiy out:
3665f3a3cd7SVladimir Sementsov-Ogievskiy     qapi_free_BlockDirtyBitmap(bmp_param);
3675f3a3cd7SVladimir Sementsov-Ogievskiy     visit_free(v);
3685f3a3cd7SVladimir Sementsov-Ogievskiy     qobject_unref(bitmap_qdict);
3695f3a3cd7SVladimir Sementsov-Ogievskiy 
3705f3a3cd7SVladimir Sementsov-Ogievskiy     return ret;
3715f3a3cd7SVladimir Sementsov-Ogievskiy }
3725f3a3cd7SVladimir Sementsov-Ogievskiy 
373751cec7aSVladimir Sementsov-Ogievskiy static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
374751cec7aSVladimir Sementsov-Ogievskiy                     Error **errp)
3751f0cacb9SVladimir Sementsov-Ogievskiy {
376fe7ea40cSVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
3775f3a3cd7SVladimir Sementsov-Ogievskiy     BdrvDirtyBitmap *bitmap = NULL;
378*af5bcd77SVladimir Sementsov-Ogievskiy     int64_t cluster_size;
3791f0cacb9SVladimir Sementsov-Ogievskiy 
380f44fd739SVladimir Sementsov-Ogievskiy     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
381f44fd739SVladimir Sementsov-Ogievskiy                                BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
382f44fd739SVladimir Sementsov-Ogievskiy                                false, errp);
383f44fd739SVladimir Sementsov-Ogievskiy     if (!bs->file) {
3841f0cacb9SVladimir Sementsov-Ogievskiy         return -EINVAL;
3851f0cacb9SVladimir Sementsov-Ogievskiy     }
3861f0cacb9SVladimir Sementsov-Ogievskiy 
387f44fd739SVladimir Sementsov-Ogievskiy     s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds,
388f44fd739SVladimir Sementsov-Ogievskiy                                 BDRV_CHILD_DATA, false, errp);
389f44fd739SVladimir Sementsov-Ogievskiy     if (!s->target) {
3901f0cacb9SVladimir Sementsov-Ogievskiy         return -EINVAL;
3911f0cacb9SVladimir Sementsov-Ogievskiy     }
3921f0cacb9SVladimir Sementsov-Ogievskiy 
3935f3a3cd7SVladimir Sementsov-Ogievskiy     if (!cbw_parse_bitmap_option(options, &bitmap, errp)) {
3945f3a3cd7SVladimir Sementsov-Ogievskiy         return -EINVAL;
3955f3a3cd7SVladimir Sementsov-Ogievskiy     }
3965f3a3cd7SVladimir Sementsov-Ogievskiy 
3975a507426SVladimir Sementsov-Ogievskiy     bs->total_sectors = bs->file->bs->total_sectors;
3985a507426SVladimir Sementsov-Ogievskiy     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
3995a507426SVladimir Sementsov-Ogievskiy             (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
4005a507426SVladimir Sementsov-Ogievskiy     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
4015a507426SVladimir Sementsov-Ogievskiy             ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
4025a507426SVladimir Sementsov-Ogievskiy              bs->file->bs->supported_zero_flags);
4035a507426SVladimir Sementsov-Ogievskiy 
4045f3a3cd7SVladimir Sementsov-Ogievskiy     s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
405fe7ea40cSVladimir Sementsov-Ogievskiy     if (!s->bcs) {
4061f0cacb9SVladimir Sementsov-Ogievskiy         error_prepend(errp, "Cannot create block-copy-state: ");
4071f0cacb9SVladimir Sementsov-Ogievskiy         return -EINVAL;
4081f0cacb9SVladimir Sementsov-Ogievskiy     }
4091f0cacb9SVladimir Sementsov-Ogievskiy 
410*af5bcd77SVladimir Sementsov-Ogievskiy     cluster_size = block_copy_cluster_size(s->bcs);
411*af5bcd77SVladimir Sementsov-Ogievskiy 
412*af5bcd77SVladimir Sementsov-Ogievskiy     s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
413*af5bcd77SVladimir Sementsov-Ogievskiy     if (!s->done_bitmap) {
414*af5bcd77SVladimir Sementsov-Ogievskiy         return -EINVAL;
415*af5bcd77SVladimir Sementsov-Ogievskiy     }
416*af5bcd77SVladimir Sementsov-Ogievskiy     bdrv_disable_dirty_bitmap(s->done_bitmap);
417*af5bcd77SVladimir Sementsov-Ogievskiy 
418*af5bcd77SVladimir Sementsov-Ogievskiy     /* s->access_bitmap starts equal to bcs bitmap */
419*af5bcd77SVladimir Sementsov-Ogievskiy     s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
420*af5bcd77SVladimir Sementsov-Ogievskiy     if (!s->access_bitmap) {
421*af5bcd77SVladimir Sementsov-Ogievskiy         return -EINVAL;
422*af5bcd77SVladimir Sementsov-Ogievskiy     }
423*af5bcd77SVladimir Sementsov-Ogievskiy     bdrv_disable_dirty_bitmap(s->access_bitmap);
424*af5bcd77SVladimir Sementsov-Ogievskiy     bdrv_dirty_bitmap_merge_internal(s->access_bitmap,
425*af5bcd77SVladimir Sementsov-Ogievskiy                                      block_copy_dirty_bitmap(s->bcs), NULL,
426*af5bcd77SVladimir Sementsov-Ogievskiy                                      true);
427*af5bcd77SVladimir Sementsov-Ogievskiy 
428*af5bcd77SVladimir Sementsov-Ogievskiy     qemu_co_mutex_init(&s->lock);
429*af5bcd77SVladimir Sementsov-Ogievskiy     QLIST_INIT(&s->frozen_read_reqs);
430*af5bcd77SVladimir Sementsov-Ogievskiy 
4311f0cacb9SVladimir Sementsov-Ogievskiy     return 0;
4321f0cacb9SVladimir Sementsov-Ogievskiy }
4331f0cacb9SVladimir Sementsov-Ogievskiy 
434751cec7aSVladimir Sementsov-Ogievskiy static void cbw_close(BlockDriverState *bs)
435751cec7aSVladimir Sementsov-Ogievskiy {
436751cec7aSVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
437751cec7aSVladimir Sementsov-Ogievskiy 
438*af5bcd77SVladimir Sementsov-Ogievskiy     bdrv_release_dirty_bitmap(s->access_bitmap);
439*af5bcd77SVladimir Sementsov-Ogievskiy     bdrv_release_dirty_bitmap(s->done_bitmap);
440*af5bcd77SVladimir Sementsov-Ogievskiy 
441751cec7aSVladimir Sementsov-Ogievskiy     block_copy_state_free(s->bcs);
442751cec7aSVladimir Sementsov-Ogievskiy     s->bcs = NULL;
443751cec7aSVladimir Sementsov-Ogievskiy }
444751cec7aSVladimir Sementsov-Ogievskiy 
445d003e0aeSVladimir Sementsov-Ogievskiy BlockDriver bdrv_cbw_filter = {
446d003e0aeSVladimir Sementsov-Ogievskiy     .format_name = "copy-before-write",
447d003e0aeSVladimir Sementsov-Ogievskiy     .instance_size = sizeof(BDRVCopyBeforeWriteState),
448d003e0aeSVladimir Sementsov-Ogievskiy 
449751cec7aSVladimir Sementsov-Ogievskiy     .bdrv_open                  = cbw_open,
450751cec7aSVladimir Sementsov-Ogievskiy     .bdrv_close                 = cbw_close,
451751cec7aSVladimir Sementsov-Ogievskiy 
452d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_co_preadv             = cbw_co_preadv,
453d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_co_pwritev            = cbw_co_pwritev,
454d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_co_pwrite_zeroes      = cbw_co_pwrite_zeroes,
455d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_co_pdiscard           = cbw_co_pdiscard,
456d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_co_flush              = cbw_co_flush,
457d003e0aeSVladimir Sementsov-Ogievskiy 
458*af5bcd77SVladimir Sementsov-Ogievskiy     .bdrv_co_preadv_snapshot       = cbw_co_preadv_snapshot,
459*af5bcd77SVladimir Sementsov-Ogievskiy     .bdrv_co_pdiscard_snapshot     = cbw_co_pdiscard_snapshot,
460*af5bcd77SVladimir Sementsov-Ogievskiy     .bdrv_co_snapshot_block_status = cbw_co_snapshot_block_status,
461*af5bcd77SVladimir Sementsov-Ogievskiy 
462d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_refresh_filename      = cbw_refresh_filename,
463d003e0aeSVladimir Sementsov-Ogievskiy 
464d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_child_perm            = cbw_child_perm,
465d003e0aeSVladimir Sementsov-Ogievskiy 
466d003e0aeSVladimir Sementsov-Ogievskiy     .is_filter = true,
467d003e0aeSVladimir Sementsov-Ogievskiy };
468d003e0aeSVladimir Sementsov-Ogievskiy 
469d003e0aeSVladimir Sementsov-Ogievskiy BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
470d003e0aeSVladimir Sementsov-Ogievskiy                                   BlockDriverState *target,
471d003e0aeSVladimir Sementsov-Ogievskiy                                   const char *filter_node_name,
472d003e0aeSVladimir Sementsov-Ogievskiy                                   BlockCopyState **bcs,
473d003e0aeSVladimir Sementsov-Ogievskiy                                   Error **errp)
474d003e0aeSVladimir Sementsov-Ogievskiy {
475d003e0aeSVladimir Sementsov-Ogievskiy     ERRP_GUARD();
476d003e0aeSVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *state;
477d003e0aeSVladimir Sementsov-Ogievskiy     BlockDriverState *top;
478f44fd739SVladimir Sementsov-Ogievskiy     QDict *opts;
479d003e0aeSVladimir Sementsov-Ogievskiy 
480d003e0aeSVladimir Sementsov-Ogievskiy     assert(source->total_sectors == target->total_sectors);
481377cc15bSEmanuele Giuseppe Esposito     GLOBAL_STATE_CODE();
482d003e0aeSVladimir Sementsov-Ogievskiy 
483f44fd739SVladimir Sementsov-Ogievskiy     opts = qdict_new();
484751cec7aSVladimir Sementsov-Ogievskiy     qdict_put_str(opts, "driver", "copy-before-write");
485751cec7aSVladimir Sementsov-Ogievskiy     if (filter_node_name) {
486751cec7aSVladimir Sementsov-Ogievskiy         qdict_put_str(opts, "node-name", filter_node_name);
487751cec7aSVladimir Sementsov-Ogievskiy     }
488f44fd739SVladimir Sementsov-Ogievskiy     qdict_put_str(opts, "file", bdrv_get_node_name(source));
489f44fd739SVladimir Sementsov-Ogievskiy     qdict_put_str(opts, "target", bdrv_get_node_name(target));
490f44fd739SVladimir Sementsov-Ogievskiy 
491751cec7aSVladimir Sementsov-Ogievskiy     top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
492751cec7aSVladimir Sementsov-Ogievskiy     if (!top) {
493751cec7aSVladimir Sementsov-Ogievskiy         return NULL;
494d003e0aeSVladimir Sementsov-Ogievskiy     }
495d003e0aeSVladimir Sementsov-Ogievskiy 
496751cec7aSVladimir Sementsov-Ogievskiy     state = top->opaque;
4977ddbce2dSVladimir Sementsov-Ogievskiy     *bcs = state->bcs;
498d003e0aeSVladimir Sementsov-Ogievskiy 
499d003e0aeSVladimir Sementsov-Ogievskiy     return top;
500d003e0aeSVladimir Sementsov-Ogievskiy }
501d003e0aeSVladimir Sementsov-Ogievskiy 
502d003e0aeSVladimir Sementsov-Ogievskiy void bdrv_cbw_drop(BlockDriverState *bs)
503d003e0aeSVladimir Sementsov-Ogievskiy {
504377cc15bSEmanuele Giuseppe Esposito     GLOBAL_STATE_CODE();
505d003e0aeSVladimir Sementsov-Ogievskiy     bdrv_drop_filter(bs, &error_abort);
506d003e0aeSVladimir Sementsov-Ogievskiy     bdrv_unref(bs);
507d003e0aeSVladimir Sementsov-Ogievskiy }
508751cec7aSVladimir Sementsov-Ogievskiy 
509751cec7aSVladimir Sementsov-Ogievskiy static void cbw_init(void)
510751cec7aSVladimir Sementsov-Ogievskiy {
511751cec7aSVladimir Sementsov-Ogievskiy     bdrv_register(&bdrv_cbw_filter);
512751cec7aSVladimir Sementsov-Ogievskiy }
513751cec7aSVladimir Sementsov-Ogievskiy 
514751cec7aSVladimir Sementsov-Ogievskiy block_init(cbw_init);
515