xref: /openbmc/qemu/block/copy-before-write.c (revision 79ef0cebb5694411e7452f0cf15c4bd170c7f2d6)
1d003e0aeSVladimir Sementsov-Ogievskiy /*
2d003e0aeSVladimir Sementsov-Ogievskiy  * copy-before-write filter driver
3d003e0aeSVladimir Sementsov-Ogievskiy  *
4d003e0aeSVladimir Sementsov-Ogievskiy  * The driver performs Copy-Before-Write (CBW) operation: it is injected above
5d003e0aeSVladimir Sementsov-Ogievskiy  * some node, and before each write it copies _old_ data to the target node.
6d003e0aeSVladimir Sementsov-Ogievskiy  *
7d003e0aeSVladimir Sementsov-Ogievskiy  * Copyright (c) 2018-2021 Virtuozzo International GmbH.
8d003e0aeSVladimir Sementsov-Ogievskiy  *
9d003e0aeSVladimir Sementsov-Ogievskiy  * Author:
10d003e0aeSVladimir Sementsov-Ogievskiy  *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
11d003e0aeSVladimir Sementsov-Ogievskiy  *
12d003e0aeSVladimir Sementsov-Ogievskiy  * This program is free software; you can redistribute it and/or modify
13d003e0aeSVladimir Sementsov-Ogievskiy  * it under the terms of the GNU General Public License as published by
14d003e0aeSVladimir Sementsov-Ogievskiy  * the Free Software Foundation; either version 2 of the License, or
15d003e0aeSVladimir Sementsov-Ogievskiy  * (at your option) any later version.
16d003e0aeSVladimir Sementsov-Ogievskiy  *
17d003e0aeSVladimir Sementsov-Ogievskiy  * This program is distributed in the hope that it will be useful,
18d003e0aeSVladimir Sementsov-Ogievskiy  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19d003e0aeSVladimir Sementsov-Ogievskiy  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20d003e0aeSVladimir Sementsov-Ogievskiy  * GNU General Public License for more details.
21d003e0aeSVladimir Sementsov-Ogievskiy  *
22d003e0aeSVladimir Sementsov-Ogievskiy  * You should have received a copy of the GNU General Public License
23d003e0aeSVladimir Sementsov-Ogievskiy  * along with this program. If not, see <http://www.gnu.org/licenses/>.
24d003e0aeSVladimir Sementsov-Ogievskiy  */
25d003e0aeSVladimir Sementsov-Ogievskiy 
26d003e0aeSVladimir Sementsov-Ogievskiy #include "qemu/osdep.h"
27*79ef0cebSVladimir Sementsov-Ogievskiy #include "qapi/qmp/qjson.h"
28d003e0aeSVladimir Sementsov-Ogievskiy 
29d003e0aeSVladimir Sementsov-Ogievskiy #include "sysemu/block-backend.h"
30d003e0aeSVladimir Sementsov-Ogievskiy #include "qemu/cutils.h"
31d003e0aeSVladimir Sementsov-Ogievskiy #include "qapi/error.h"
32d003e0aeSVladimir Sementsov-Ogievskiy #include "block/block_int.h"
33d003e0aeSVladimir Sementsov-Ogievskiy #include "block/qdict.h"
34d003e0aeSVladimir Sementsov-Ogievskiy #include "block/block-copy.h"
35d003e0aeSVladimir Sementsov-Ogievskiy 
36d003e0aeSVladimir Sementsov-Ogievskiy #include "block/copy-before-write.h"
37af5bcd77SVladimir Sementsov-Ogievskiy #include "block/reqlist.h"
38d003e0aeSVladimir Sementsov-Ogievskiy 
395f3a3cd7SVladimir Sementsov-Ogievskiy #include "qapi/qapi-visit-block-core.h"
405f3a3cd7SVladimir Sementsov-Ogievskiy 
41d003e0aeSVladimir Sementsov-Ogievskiy typedef struct BDRVCopyBeforeWriteState {
42d003e0aeSVladimir Sementsov-Ogievskiy     BlockCopyState *bcs;
43d003e0aeSVladimir Sementsov-Ogievskiy     BdrvChild *target;
44af5bcd77SVladimir Sementsov-Ogievskiy 
45af5bcd77SVladimir Sementsov-Ogievskiy     /*
46af5bcd77SVladimir Sementsov-Ogievskiy      * @lock: protects access to @access_bitmap, @done_bitmap and
47af5bcd77SVladimir Sementsov-Ogievskiy      * @frozen_read_reqs
48af5bcd77SVladimir Sementsov-Ogievskiy      */
49af5bcd77SVladimir Sementsov-Ogievskiy     CoMutex lock;
50af5bcd77SVladimir Sementsov-Ogievskiy 
51af5bcd77SVladimir Sementsov-Ogievskiy     /*
52af5bcd77SVladimir Sementsov-Ogievskiy      * @access_bitmap: represents areas allowed for reading by fleecing user.
53af5bcd77SVladimir Sementsov-Ogievskiy      * Reading from non-dirty areas leads to -EACCES.
54af5bcd77SVladimir Sementsov-Ogievskiy      */
55af5bcd77SVladimir Sementsov-Ogievskiy     BdrvDirtyBitmap *access_bitmap;
56af5bcd77SVladimir Sementsov-Ogievskiy 
57af5bcd77SVladimir Sementsov-Ogievskiy     /*
58af5bcd77SVladimir Sementsov-Ogievskiy      * @done_bitmap: represents areas that was successfully copied to @target by
59af5bcd77SVladimir Sementsov-Ogievskiy      * copy-before-write operations.
60af5bcd77SVladimir Sementsov-Ogievskiy      */
61af5bcd77SVladimir Sementsov-Ogievskiy     BdrvDirtyBitmap *done_bitmap;
62af5bcd77SVladimir Sementsov-Ogievskiy 
63af5bcd77SVladimir Sementsov-Ogievskiy     /*
64af5bcd77SVladimir Sementsov-Ogievskiy      * @frozen_read_reqs: current read requests for fleecing user in bs->file
65af5bcd77SVladimir Sementsov-Ogievskiy      * node. These areas must not be rewritten by guest.
66af5bcd77SVladimir Sementsov-Ogievskiy      */
67af5bcd77SVladimir Sementsov-Ogievskiy     BlockReqList frozen_read_reqs;
68d003e0aeSVladimir Sementsov-Ogievskiy } BDRVCopyBeforeWriteState;
69d003e0aeSVladimir Sementsov-Ogievskiy 
70d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_co_preadv(
71f7ef38ddSVladimir Sementsov-Ogievskiy         BlockDriverState *bs, int64_t offset, int64_t bytes,
72f7ef38ddSVladimir Sementsov-Ogievskiy         QEMUIOVector *qiov, BdrvRequestFlags flags)
73d003e0aeSVladimir Sementsov-Ogievskiy {
743c1e6327SVladimir Sementsov-Ogievskiy     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
75d003e0aeSVladimir Sementsov-Ogievskiy }
76d003e0aeSVladimir Sementsov-Ogievskiy 
77af5bcd77SVladimir Sementsov-Ogievskiy /*
78af5bcd77SVladimir Sementsov-Ogievskiy  * Do copy-before-write operation.
79af5bcd77SVladimir Sementsov-Ogievskiy  *
80af5bcd77SVladimir Sementsov-Ogievskiy  * On failure guest request must be failed too.
81af5bcd77SVladimir Sementsov-Ogievskiy  *
82af5bcd77SVladimir Sementsov-Ogievskiy  * On success, we also wait for all in-flight fleecing read requests in source
83af5bcd77SVladimir Sementsov-Ogievskiy  * node, and it's guaranteed that after cbw_do_copy_before_write() successful
84af5bcd77SVladimir Sementsov-Ogievskiy  * return there are no such requests and they will never appear.
85af5bcd77SVladimir Sementsov-Ogievskiy  */
86d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
87d003e0aeSVladimir Sementsov-Ogievskiy         uint64_t offset, uint64_t bytes, BdrvRequestFlags flags)
88d003e0aeSVladimir Sementsov-Ogievskiy {
89d003e0aeSVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
90af5bcd77SVladimir Sementsov-Ogievskiy     int ret;
91d003e0aeSVladimir Sementsov-Ogievskiy     uint64_t off, end;
92b518e9e9SVladimir Sementsov-Ogievskiy     int64_t cluster_size = block_copy_cluster_size(s->bcs);
93d003e0aeSVladimir Sementsov-Ogievskiy 
94d003e0aeSVladimir Sementsov-Ogievskiy     if (flags & BDRV_REQ_WRITE_UNCHANGED) {
95d003e0aeSVladimir Sementsov-Ogievskiy         return 0;
96d003e0aeSVladimir Sementsov-Ogievskiy     }
97d003e0aeSVladimir Sementsov-Ogievskiy 
98b518e9e9SVladimir Sementsov-Ogievskiy     off = QEMU_ALIGN_DOWN(offset, cluster_size);
99b518e9e9SVladimir Sementsov-Ogievskiy     end = QEMU_ALIGN_UP(offset + bytes, cluster_size);
100d003e0aeSVladimir Sementsov-Ogievskiy 
101af5bcd77SVladimir Sementsov-Ogievskiy     ret = block_copy(s->bcs, off, end - off, true);
102af5bcd77SVladimir Sementsov-Ogievskiy     if (ret < 0) {
103af5bcd77SVladimir Sementsov-Ogievskiy         return ret;
104af5bcd77SVladimir Sementsov-Ogievskiy     }
105af5bcd77SVladimir Sementsov-Ogievskiy 
106af5bcd77SVladimir Sementsov-Ogievskiy     WITH_QEMU_LOCK_GUARD(&s->lock) {
107af5bcd77SVladimir Sementsov-Ogievskiy         bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
108af5bcd77SVladimir Sementsov-Ogievskiy         reqlist_wait_all(&s->frozen_read_reqs, off, end - off, &s->lock);
109af5bcd77SVladimir Sementsov-Ogievskiy     }
110af5bcd77SVladimir Sementsov-Ogievskiy 
111af5bcd77SVladimir Sementsov-Ogievskiy     return 0;
112d003e0aeSVladimir Sementsov-Ogievskiy }
113d003e0aeSVladimir Sementsov-Ogievskiy 
114d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs,
1150c802287SVladimir Sementsov-Ogievskiy                                         int64_t offset, int64_t bytes)
116d003e0aeSVladimir Sementsov-Ogievskiy {
117d003e0aeSVladimir Sementsov-Ogievskiy     int ret = cbw_do_copy_before_write(bs, offset, bytes, 0);
118d003e0aeSVladimir Sementsov-Ogievskiy     if (ret < 0) {
119d003e0aeSVladimir Sementsov-Ogievskiy         return ret;
120d003e0aeSVladimir Sementsov-Ogievskiy     }
121d003e0aeSVladimir Sementsov-Ogievskiy 
1223c1e6327SVladimir Sementsov-Ogievskiy     return bdrv_co_pdiscard(bs->file, offset, bytes);
123d003e0aeSVladimir Sementsov-Ogievskiy }
124d003e0aeSVladimir Sementsov-Ogievskiy 
125d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs,
126f34b2bcfSVladimir Sementsov-Ogievskiy         int64_t offset, int64_t bytes, BdrvRequestFlags flags)
127d003e0aeSVladimir Sementsov-Ogievskiy {
128d003e0aeSVladimir Sementsov-Ogievskiy     int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
129d003e0aeSVladimir Sementsov-Ogievskiy     if (ret < 0) {
130d003e0aeSVladimir Sementsov-Ogievskiy         return ret;
131d003e0aeSVladimir Sementsov-Ogievskiy     }
132d003e0aeSVladimir Sementsov-Ogievskiy 
1333c1e6327SVladimir Sementsov-Ogievskiy     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
134d003e0aeSVladimir Sementsov-Ogievskiy }
135d003e0aeSVladimir Sementsov-Ogievskiy 
136d003e0aeSVladimir Sementsov-Ogievskiy static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs,
137e75abedaSVladimir Sementsov-Ogievskiy                                        int64_t offset,
138e75abedaSVladimir Sementsov-Ogievskiy                                        int64_t bytes,
139e75abedaSVladimir Sementsov-Ogievskiy                                        QEMUIOVector *qiov,
140e75abedaSVladimir Sementsov-Ogievskiy                                        BdrvRequestFlags flags)
141d003e0aeSVladimir Sementsov-Ogievskiy {
142d003e0aeSVladimir Sementsov-Ogievskiy     int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
143d003e0aeSVladimir Sementsov-Ogievskiy     if (ret < 0) {
144d003e0aeSVladimir Sementsov-Ogievskiy         return ret;
145d003e0aeSVladimir Sementsov-Ogievskiy     }
146d003e0aeSVladimir Sementsov-Ogievskiy 
1473c1e6327SVladimir Sementsov-Ogievskiy     return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
148d003e0aeSVladimir Sementsov-Ogievskiy }
149d003e0aeSVladimir Sementsov-Ogievskiy 
150d003e0aeSVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_flush(BlockDriverState *bs)
151d003e0aeSVladimir Sementsov-Ogievskiy {
1523c1e6327SVladimir Sementsov-Ogievskiy     if (!bs->file) {
153d003e0aeSVladimir Sementsov-Ogievskiy         return 0;
154d003e0aeSVladimir Sementsov-Ogievskiy     }
155d003e0aeSVladimir Sementsov-Ogievskiy 
1563c1e6327SVladimir Sementsov-Ogievskiy     return bdrv_co_flush(bs->file->bs);
157d003e0aeSVladimir Sementsov-Ogievskiy }
158d003e0aeSVladimir Sementsov-Ogievskiy 
159af5bcd77SVladimir Sementsov-Ogievskiy /*
160af5bcd77SVladimir Sementsov-Ogievskiy  * If @offset not accessible - return NULL.
161af5bcd77SVladimir Sementsov-Ogievskiy  *
162af5bcd77SVladimir Sementsov-Ogievskiy  * Otherwise, set @pnum to some bytes that accessible from @file (@file is set
163af5bcd77SVladimir Sementsov-Ogievskiy  * to bs->file or to s->target). Return newly allocated BlockReq object that
164af5bcd77SVladimir Sementsov-Ogievskiy  * should be than passed to cbw_snapshot_read_unlock().
165af5bcd77SVladimir Sementsov-Ogievskiy  *
166af5bcd77SVladimir Sementsov-Ogievskiy  * It's guaranteed that guest writes will not interact in the region until
167af5bcd77SVladimir Sementsov-Ogievskiy  * cbw_snapshot_read_unlock() called.
168af5bcd77SVladimir Sementsov-Ogievskiy  */
169af5bcd77SVladimir Sementsov-Ogievskiy static BlockReq *cbw_snapshot_read_lock(BlockDriverState *bs,
170af5bcd77SVladimir Sementsov-Ogievskiy                                         int64_t offset, int64_t bytes,
171af5bcd77SVladimir Sementsov-Ogievskiy                                         int64_t *pnum, BdrvChild **file)
172af5bcd77SVladimir Sementsov-Ogievskiy {
173af5bcd77SVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
174af5bcd77SVladimir Sementsov-Ogievskiy     BlockReq *req = g_new(BlockReq, 1);
175af5bcd77SVladimir Sementsov-Ogievskiy     bool done;
176af5bcd77SVladimir Sementsov-Ogievskiy 
177af5bcd77SVladimir Sementsov-Ogievskiy     QEMU_LOCK_GUARD(&s->lock);
178af5bcd77SVladimir Sementsov-Ogievskiy 
179af5bcd77SVladimir Sementsov-Ogievskiy     if (bdrv_dirty_bitmap_next_zero(s->access_bitmap, offset, bytes) != -1) {
180af5bcd77SVladimir Sementsov-Ogievskiy         g_free(req);
181af5bcd77SVladimir Sementsov-Ogievskiy         return NULL;
182af5bcd77SVladimir Sementsov-Ogievskiy     }
183af5bcd77SVladimir Sementsov-Ogievskiy 
184af5bcd77SVladimir Sementsov-Ogievskiy     done = bdrv_dirty_bitmap_status(s->done_bitmap, offset, bytes, pnum);
185af5bcd77SVladimir Sementsov-Ogievskiy     if (done) {
186af5bcd77SVladimir Sementsov-Ogievskiy         /*
187af5bcd77SVladimir Sementsov-Ogievskiy          * Special invalid BlockReq, that is handled in
188af5bcd77SVladimir Sementsov-Ogievskiy          * cbw_snapshot_read_unlock(). We don't need to lock something to read
189af5bcd77SVladimir Sementsov-Ogievskiy          * from s->target.
190af5bcd77SVladimir Sementsov-Ogievskiy          */
191af5bcd77SVladimir Sementsov-Ogievskiy         *req = (BlockReq) {.offset = -1, .bytes = -1};
192af5bcd77SVladimir Sementsov-Ogievskiy         *file = s->target;
193af5bcd77SVladimir Sementsov-Ogievskiy     } else {
194af5bcd77SVladimir Sementsov-Ogievskiy         reqlist_init_req(&s->frozen_read_reqs, req, offset, bytes);
195af5bcd77SVladimir Sementsov-Ogievskiy         *file = bs->file;
196af5bcd77SVladimir Sementsov-Ogievskiy     }
197af5bcd77SVladimir Sementsov-Ogievskiy 
198af5bcd77SVladimir Sementsov-Ogievskiy     return req;
199af5bcd77SVladimir Sementsov-Ogievskiy }
200af5bcd77SVladimir Sementsov-Ogievskiy 
201af5bcd77SVladimir Sementsov-Ogievskiy static void cbw_snapshot_read_unlock(BlockDriverState *bs, BlockReq *req)
202af5bcd77SVladimir Sementsov-Ogievskiy {
203af5bcd77SVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
204af5bcd77SVladimir Sementsov-Ogievskiy 
205af5bcd77SVladimir Sementsov-Ogievskiy     if (req->offset == -1 && req->bytes == -1) {
206af5bcd77SVladimir Sementsov-Ogievskiy         g_free(req);
207af5bcd77SVladimir Sementsov-Ogievskiy         return;
208af5bcd77SVladimir Sementsov-Ogievskiy     }
209af5bcd77SVladimir Sementsov-Ogievskiy 
210af5bcd77SVladimir Sementsov-Ogievskiy     QEMU_LOCK_GUARD(&s->lock);
211af5bcd77SVladimir Sementsov-Ogievskiy 
212af5bcd77SVladimir Sementsov-Ogievskiy     reqlist_remove_req(req);
213af5bcd77SVladimir Sementsov-Ogievskiy     g_free(req);
214af5bcd77SVladimir Sementsov-Ogievskiy }
215af5bcd77SVladimir Sementsov-Ogievskiy 
216af5bcd77SVladimir Sementsov-Ogievskiy static coroutine_fn int
217af5bcd77SVladimir Sementsov-Ogievskiy cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes,
218af5bcd77SVladimir Sementsov-Ogievskiy                        QEMUIOVector *qiov, size_t qiov_offset)
219af5bcd77SVladimir Sementsov-Ogievskiy {
220af5bcd77SVladimir Sementsov-Ogievskiy     BlockReq *req;
221af5bcd77SVladimir Sementsov-Ogievskiy     BdrvChild *file;
222af5bcd77SVladimir Sementsov-Ogievskiy     int ret;
223af5bcd77SVladimir Sementsov-Ogievskiy 
224af5bcd77SVladimir Sementsov-Ogievskiy     /* TODO: upgrade to async loop using AioTask */
225af5bcd77SVladimir Sementsov-Ogievskiy     while (bytes) {
226af5bcd77SVladimir Sementsov-Ogievskiy         int64_t cur_bytes;
227af5bcd77SVladimir Sementsov-Ogievskiy 
228af5bcd77SVladimir Sementsov-Ogievskiy         req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &file);
229af5bcd77SVladimir Sementsov-Ogievskiy         if (!req) {
230af5bcd77SVladimir Sementsov-Ogievskiy             return -EACCES;
231af5bcd77SVladimir Sementsov-Ogievskiy         }
232af5bcd77SVladimir Sementsov-Ogievskiy 
233af5bcd77SVladimir Sementsov-Ogievskiy         ret = bdrv_co_preadv_part(file, offset, cur_bytes,
234af5bcd77SVladimir Sementsov-Ogievskiy                                   qiov, qiov_offset, 0);
235af5bcd77SVladimir Sementsov-Ogievskiy         cbw_snapshot_read_unlock(bs, req);
236af5bcd77SVladimir Sementsov-Ogievskiy         if (ret < 0) {
237af5bcd77SVladimir Sementsov-Ogievskiy             return ret;
238af5bcd77SVladimir Sementsov-Ogievskiy         }
239af5bcd77SVladimir Sementsov-Ogievskiy 
240af5bcd77SVladimir Sementsov-Ogievskiy         bytes -= cur_bytes;
241af5bcd77SVladimir Sementsov-Ogievskiy         offset += cur_bytes;
242af5bcd77SVladimir Sementsov-Ogievskiy         qiov_offset += cur_bytes;
243af5bcd77SVladimir Sementsov-Ogievskiy     }
244af5bcd77SVladimir Sementsov-Ogievskiy 
245af5bcd77SVladimir Sementsov-Ogievskiy     return 0;
246af5bcd77SVladimir Sementsov-Ogievskiy }
247af5bcd77SVladimir Sementsov-Ogievskiy 
248af5bcd77SVladimir Sementsov-Ogievskiy static int coroutine_fn
249af5bcd77SVladimir Sementsov-Ogievskiy cbw_co_snapshot_block_status(BlockDriverState *bs,
250af5bcd77SVladimir Sementsov-Ogievskiy                              bool want_zero, int64_t offset, int64_t bytes,
251af5bcd77SVladimir Sementsov-Ogievskiy                              int64_t *pnum, int64_t *map,
252af5bcd77SVladimir Sementsov-Ogievskiy                              BlockDriverState **file)
253af5bcd77SVladimir Sementsov-Ogievskiy {
254af5bcd77SVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
255af5bcd77SVladimir Sementsov-Ogievskiy     BlockReq *req;
256af5bcd77SVladimir Sementsov-Ogievskiy     int ret;
257af5bcd77SVladimir Sementsov-Ogievskiy     int64_t cur_bytes;
258af5bcd77SVladimir Sementsov-Ogievskiy     BdrvChild *child;
259af5bcd77SVladimir Sementsov-Ogievskiy 
260af5bcd77SVladimir Sementsov-Ogievskiy     req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &child);
261af5bcd77SVladimir Sementsov-Ogievskiy     if (!req) {
262af5bcd77SVladimir Sementsov-Ogievskiy         return -EACCES;
263af5bcd77SVladimir Sementsov-Ogievskiy     }
264af5bcd77SVladimir Sementsov-Ogievskiy 
265af5bcd77SVladimir Sementsov-Ogievskiy     ret = bdrv_block_status(child->bs, offset, cur_bytes, pnum, map, file);
266af5bcd77SVladimir Sementsov-Ogievskiy     if (child == s->target) {
267af5bcd77SVladimir Sementsov-Ogievskiy         /*
268af5bcd77SVladimir Sementsov-Ogievskiy          * We refer to s->target only for areas that we've written to it.
269af5bcd77SVladimir Sementsov-Ogievskiy          * And we can not report unallocated blocks in s->target: this will
270af5bcd77SVladimir Sementsov-Ogievskiy          * break generic block-status-above logic, that will go to
271af5bcd77SVladimir Sementsov-Ogievskiy          * copy-before-write filtered child in this case.
272af5bcd77SVladimir Sementsov-Ogievskiy          */
273af5bcd77SVladimir Sementsov-Ogievskiy         assert(ret & BDRV_BLOCK_ALLOCATED);
274af5bcd77SVladimir Sementsov-Ogievskiy     }
275af5bcd77SVladimir Sementsov-Ogievskiy 
276af5bcd77SVladimir Sementsov-Ogievskiy     cbw_snapshot_read_unlock(bs, req);
277af5bcd77SVladimir Sementsov-Ogievskiy 
278af5bcd77SVladimir Sementsov-Ogievskiy     return ret;
279af5bcd77SVladimir Sementsov-Ogievskiy }
280af5bcd77SVladimir Sementsov-Ogievskiy 
281af5bcd77SVladimir Sementsov-Ogievskiy static int coroutine_fn cbw_co_pdiscard_snapshot(BlockDriverState *bs,
282af5bcd77SVladimir Sementsov-Ogievskiy                                                  int64_t offset, int64_t bytes)
283af5bcd77SVladimir Sementsov-Ogievskiy {
284af5bcd77SVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
285af5bcd77SVladimir Sementsov-Ogievskiy 
286af5bcd77SVladimir Sementsov-Ogievskiy     WITH_QEMU_LOCK_GUARD(&s->lock) {
287af5bcd77SVladimir Sementsov-Ogievskiy         bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
288af5bcd77SVladimir Sementsov-Ogievskiy     }
289af5bcd77SVladimir Sementsov-Ogievskiy 
290af5bcd77SVladimir Sementsov-Ogievskiy     block_copy_reset(s->bcs, offset, bytes);
291af5bcd77SVladimir Sementsov-Ogievskiy 
292af5bcd77SVladimir Sementsov-Ogievskiy     return bdrv_co_pdiscard(s->target, offset, bytes);
293af5bcd77SVladimir Sementsov-Ogievskiy }
294af5bcd77SVladimir Sementsov-Ogievskiy 
295d003e0aeSVladimir Sementsov-Ogievskiy static void cbw_refresh_filename(BlockDriverState *bs)
296d003e0aeSVladimir Sementsov-Ogievskiy {
297d003e0aeSVladimir Sementsov-Ogievskiy     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
2983c1e6327SVladimir Sementsov-Ogievskiy             bs->file->bs->filename);
299d003e0aeSVladimir Sementsov-Ogievskiy }
300d003e0aeSVladimir Sementsov-Ogievskiy 
301d003e0aeSVladimir Sementsov-Ogievskiy static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
302d003e0aeSVladimir Sementsov-Ogievskiy                            BdrvChildRole role,
303d003e0aeSVladimir Sementsov-Ogievskiy                            BlockReopenQueue *reopen_queue,
304d003e0aeSVladimir Sementsov-Ogievskiy                            uint64_t perm, uint64_t shared,
305d003e0aeSVladimir Sementsov-Ogievskiy                            uint64_t *nperm, uint64_t *nshared)
306d003e0aeSVladimir Sementsov-Ogievskiy {
307d003e0aeSVladimir Sementsov-Ogievskiy     if (!(role & BDRV_CHILD_FILTERED)) {
308d003e0aeSVladimir Sementsov-Ogievskiy         /*
309d003e0aeSVladimir Sementsov-Ogievskiy          * Target child
310d003e0aeSVladimir Sementsov-Ogievskiy          *
311d003e0aeSVladimir Sementsov-Ogievskiy          * Share write to target (child_file), to not interfere
312d003e0aeSVladimir Sementsov-Ogievskiy          * with guest writes to its disk which may be in target backing chain.
313d003e0aeSVladimir Sementsov-Ogievskiy          * Can't resize during a backup block job because we check the size
314d003e0aeSVladimir Sementsov-Ogievskiy          * only upfront.
315d003e0aeSVladimir Sementsov-Ogievskiy          */
316d003e0aeSVladimir Sementsov-Ogievskiy         *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
317d003e0aeSVladimir Sementsov-Ogievskiy         *nperm = BLK_PERM_WRITE;
318d003e0aeSVladimir Sementsov-Ogievskiy     } else {
319d003e0aeSVladimir Sementsov-Ogievskiy         /* Source child */
320d003e0aeSVladimir Sementsov-Ogievskiy         bdrv_default_perms(bs, c, role, reopen_queue,
321d003e0aeSVladimir Sementsov-Ogievskiy                            perm, shared, nperm, nshared);
322d003e0aeSVladimir Sementsov-Ogievskiy 
3233860c020SVladimir Sementsov-Ogievskiy         if (!QLIST_EMPTY(&bs->parents)) {
324d003e0aeSVladimir Sementsov-Ogievskiy             if (perm & BLK_PERM_WRITE) {
325d003e0aeSVladimir Sementsov-Ogievskiy                 *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
326d003e0aeSVladimir Sementsov-Ogievskiy             }
327d003e0aeSVladimir Sementsov-Ogievskiy             *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
328d003e0aeSVladimir Sementsov-Ogievskiy         }
329d003e0aeSVladimir Sementsov-Ogievskiy     }
3303860c020SVladimir Sementsov-Ogievskiy }
331d003e0aeSVladimir Sementsov-Ogievskiy 
332*79ef0cebSVladimir Sementsov-Ogievskiy static BlockdevOptions *cbw_parse_options(QDict *options, Error **errp)
3335f3a3cd7SVladimir Sementsov-Ogievskiy {
334*79ef0cebSVladimir Sementsov-Ogievskiy     BlockdevOptions *opts = NULL;
3355f3a3cd7SVladimir Sementsov-Ogievskiy     Visitor *v = NULL;
3365f3a3cd7SVladimir Sementsov-Ogievskiy 
337*79ef0cebSVladimir Sementsov-Ogievskiy     qdict_put_str(options, "driver", "copy-before-write");
3385f3a3cd7SVladimir Sementsov-Ogievskiy 
339*79ef0cebSVladimir Sementsov-Ogievskiy     v = qobject_input_visitor_new_flat_confused(options, errp);
3405f3a3cd7SVladimir Sementsov-Ogievskiy     if (!v) {
3415f3a3cd7SVladimir Sementsov-Ogievskiy         goto out;
3425f3a3cd7SVladimir Sementsov-Ogievskiy     }
3435f3a3cd7SVladimir Sementsov-Ogievskiy 
344*79ef0cebSVladimir Sementsov-Ogievskiy     visit_type_BlockdevOptions(v, NULL, &opts, errp);
345*79ef0cebSVladimir Sementsov-Ogievskiy     if (!opts) {
3465f3a3cd7SVladimir Sementsov-Ogievskiy         goto out;
3475f3a3cd7SVladimir Sementsov-Ogievskiy     }
3485f3a3cd7SVladimir Sementsov-Ogievskiy 
349*79ef0cebSVladimir Sementsov-Ogievskiy     /*
350*79ef0cebSVladimir Sementsov-Ogievskiy      * Delete options which we are going to parse through BlockdevOptions
351*79ef0cebSVladimir Sementsov-Ogievskiy      * object for original options.
352*79ef0cebSVladimir Sementsov-Ogievskiy      */
353*79ef0cebSVladimir Sementsov-Ogievskiy     qdict_extract_subqdict(options, NULL, "bitmap");
3545f3a3cd7SVladimir Sementsov-Ogievskiy 
3555f3a3cd7SVladimir Sementsov-Ogievskiy out:
3565f3a3cd7SVladimir Sementsov-Ogievskiy     visit_free(v);
357*79ef0cebSVladimir Sementsov-Ogievskiy     qdict_del(options, "driver");
3585f3a3cd7SVladimir Sementsov-Ogievskiy 
359*79ef0cebSVladimir Sementsov-Ogievskiy     return opts;
3605f3a3cd7SVladimir Sementsov-Ogievskiy }
3615f3a3cd7SVladimir Sementsov-Ogievskiy 
362751cec7aSVladimir Sementsov-Ogievskiy static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
363751cec7aSVladimir Sementsov-Ogievskiy                     Error **errp)
3641f0cacb9SVladimir Sementsov-Ogievskiy {
365fe7ea40cSVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
3665f3a3cd7SVladimir Sementsov-Ogievskiy     BdrvDirtyBitmap *bitmap = NULL;
367af5bcd77SVladimir Sementsov-Ogievskiy     int64_t cluster_size;
368*79ef0cebSVladimir Sementsov-Ogievskiy     g_autoptr(BlockdevOptions) full_opts = NULL;
369*79ef0cebSVladimir Sementsov-Ogievskiy     BlockdevOptionsCbw *opts;
370*79ef0cebSVladimir Sementsov-Ogievskiy 
371*79ef0cebSVladimir Sementsov-Ogievskiy     full_opts = cbw_parse_options(options, errp);
372*79ef0cebSVladimir Sementsov-Ogievskiy     if (!full_opts) {
373*79ef0cebSVladimir Sementsov-Ogievskiy         return -EINVAL;
374*79ef0cebSVladimir Sementsov-Ogievskiy     }
375*79ef0cebSVladimir Sementsov-Ogievskiy     assert(full_opts->driver == BLOCKDEV_DRIVER_COPY_BEFORE_WRITE);
376*79ef0cebSVladimir Sementsov-Ogievskiy     opts = &full_opts->u.copy_before_write;
3771f0cacb9SVladimir Sementsov-Ogievskiy 
378f44fd739SVladimir Sementsov-Ogievskiy     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
379f44fd739SVladimir Sementsov-Ogievskiy                                BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
380f44fd739SVladimir Sementsov-Ogievskiy                                false, errp);
381f44fd739SVladimir Sementsov-Ogievskiy     if (!bs->file) {
3821f0cacb9SVladimir Sementsov-Ogievskiy         return -EINVAL;
3831f0cacb9SVladimir Sementsov-Ogievskiy     }
3841f0cacb9SVladimir Sementsov-Ogievskiy 
385f44fd739SVladimir Sementsov-Ogievskiy     s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds,
386f44fd739SVladimir Sementsov-Ogievskiy                                 BDRV_CHILD_DATA, false, errp);
387f44fd739SVladimir Sementsov-Ogievskiy     if (!s->target) {
3881f0cacb9SVladimir Sementsov-Ogievskiy         return -EINVAL;
3891f0cacb9SVladimir Sementsov-Ogievskiy     }
3901f0cacb9SVladimir Sementsov-Ogievskiy 
391*79ef0cebSVladimir Sementsov-Ogievskiy     if (opts->has_bitmap) {
392*79ef0cebSVladimir Sementsov-Ogievskiy         bitmap = block_dirty_bitmap_lookup(opts->bitmap->node,
393*79ef0cebSVladimir Sementsov-Ogievskiy                                            opts->bitmap->name, NULL, errp);
394*79ef0cebSVladimir Sementsov-Ogievskiy         if (!bitmap) {
3955f3a3cd7SVladimir Sementsov-Ogievskiy             return -EINVAL;
3965f3a3cd7SVladimir Sementsov-Ogievskiy         }
397*79ef0cebSVladimir Sementsov-Ogievskiy     }
3985f3a3cd7SVladimir Sementsov-Ogievskiy 
3995a507426SVladimir Sementsov-Ogievskiy     bs->total_sectors = bs->file->bs->total_sectors;
4005a507426SVladimir Sementsov-Ogievskiy     bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
4015a507426SVladimir Sementsov-Ogievskiy             (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
4025a507426SVladimir Sementsov-Ogievskiy     bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
4035a507426SVladimir Sementsov-Ogievskiy             ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
4045a507426SVladimir Sementsov-Ogievskiy              bs->file->bs->supported_zero_flags);
4055a507426SVladimir Sementsov-Ogievskiy 
4065f3a3cd7SVladimir Sementsov-Ogievskiy     s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
407fe7ea40cSVladimir Sementsov-Ogievskiy     if (!s->bcs) {
4081f0cacb9SVladimir Sementsov-Ogievskiy         error_prepend(errp, "Cannot create block-copy-state: ");
4091f0cacb9SVladimir Sementsov-Ogievskiy         return -EINVAL;
4101f0cacb9SVladimir Sementsov-Ogievskiy     }
4111f0cacb9SVladimir Sementsov-Ogievskiy 
412af5bcd77SVladimir Sementsov-Ogievskiy     cluster_size = block_copy_cluster_size(s->bcs);
413af5bcd77SVladimir Sementsov-Ogievskiy 
414af5bcd77SVladimir Sementsov-Ogievskiy     s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
415af5bcd77SVladimir Sementsov-Ogievskiy     if (!s->done_bitmap) {
416af5bcd77SVladimir Sementsov-Ogievskiy         return -EINVAL;
417af5bcd77SVladimir Sementsov-Ogievskiy     }
418af5bcd77SVladimir Sementsov-Ogievskiy     bdrv_disable_dirty_bitmap(s->done_bitmap);
419af5bcd77SVladimir Sementsov-Ogievskiy 
420af5bcd77SVladimir Sementsov-Ogievskiy     /* s->access_bitmap starts equal to bcs bitmap */
421af5bcd77SVladimir Sementsov-Ogievskiy     s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
422af5bcd77SVladimir Sementsov-Ogievskiy     if (!s->access_bitmap) {
423af5bcd77SVladimir Sementsov-Ogievskiy         return -EINVAL;
424af5bcd77SVladimir Sementsov-Ogievskiy     }
425af5bcd77SVladimir Sementsov-Ogievskiy     bdrv_disable_dirty_bitmap(s->access_bitmap);
426af5bcd77SVladimir Sementsov-Ogievskiy     bdrv_dirty_bitmap_merge_internal(s->access_bitmap,
427af5bcd77SVladimir Sementsov-Ogievskiy                                      block_copy_dirty_bitmap(s->bcs), NULL,
428af5bcd77SVladimir Sementsov-Ogievskiy                                      true);
429af5bcd77SVladimir Sementsov-Ogievskiy 
430af5bcd77SVladimir Sementsov-Ogievskiy     qemu_co_mutex_init(&s->lock);
431af5bcd77SVladimir Sementsov-Ogievskiy     QLIST_INIT(&s->frozen_read_reqs);
432af5bcd77SVladimir Sementsov-Ogievskiy 
4331f0cacb9SVladimir Sementsov-Ogievskiy     return 0;
4341f0cacb9SVladimir Sementsov-Ogievskiy }
4351f0cacb9SVladimir Sementsov-Ogievskiy 
436751cec7aSVladimir Sementsov-Ogievskiy static void cbw_close(BlockDriverState *bs)
437751cec7aSVladimir Sementsov-Ogievskiy {
438751cec7aSVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *s = bs->opaque;
439751cec7aSVladimir Sementsov-Ogievskiy 
440af5bcd77SVladimir Sementsov-Ogievskiy     bdrv_release_dirty_bitmap(s->access_bitmap);
441af5bcd77SVladimir Sementsov-Ogievskiy     bdrv_release_dirty_bitmap(s->done_bitmap);
442af5bcd77SVladimir Sementsov-Ogievskiy 
443751cec7aSVladimir Sementsov-Ogievskiy     block_copy_state_free(s->bcs);
444751cec7aSVladimir Sementsov-Ogievskiy     s->bcs = NULL;
445751cec7aSVladimir Sementsov-Ogievskiy }
446751cec7aSVladimir Sementsov-Ogievskiy 
447d003e0aeSVladimir Sementsov-Ogievskiy BlockDriver bdrv_cbw_filter = {
448d003e0aeSVladimir Sementsov-Ogievskiy     .format_name = "copy-before-write",
449d003e0aeSVladimir Sementsov-Ogievskiy     .instance_size = sizeof(BDRVCopyBeforeWriteState),
450d003e0aeSVladimir Sementsov-Ogievskiy 
451751cec7aSVladimir Sementsov-Ogievskiy     .bdrv_open                  = cbw_open,
452751cec7aSVladimir Sementsov-Ogievskiy     .bdrv_close                 = cbw_close,
453751cec7aSVladimir Sementsov-Ogievskiy 
454d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_co_preadv             = cbw_co_preadv,
455d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_co_pwritev            = cbw_co_pwritev,
456d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_co_pwrite_zeroes      = cbw_co_pwrite_zeroes,
457d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_co_pdiscard           = cbw_co_pdiscard,
458d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_co_flush              = cbw_co_flush,
459d003e0aeSVladimir Sementsov-Ogievskiy 
460af5bcd77SVladimir Sementsov-Ogievskiy     .bdrv_co_preadv_snapshot       = cbw_co_preadv_snapshot,
461af5bcd77SVladimir Sementsov-Ogievskiy     .bdrv_co_pdiscard_snapshot     = cbw_co_pdiscard_snapshot,
462af5bcd77SVladimir Sementsov-Ogievskiy     .bdrv_co_snapshot_block_status = cbw_co_snapshot_block_status,
463af5bcd77SVladimir Sementsov-Ogievskiy 
464d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_refresh_filename      = cbw_refresh_filename,
465d003e0aeSVladimir Sementsov-Ogievskiy 
466d003e0aeSVladimir Sementsov-Ogievskiy     .bdrv_child_perm            = cbw_child_perm,
467d003e0aeSVladimir Sementsov-Ogievskiy 
468d003e0aeSVladimir Sementsov-Ogievskiy     .is_filter = true,
469d003e0aeSVladimir Sementsov-Ogievskiy };
470d003e0aeSVladimir Sementsov-Ogievskiy 
471d003e0aeSVladimir Sementsov-Ogievskiy BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
472d003e0aeSVladimir Sementsov-Ogievskiy                                   BlockDriverState *target,
473d003e0aeSVladimir Sementsov-Ogievskiy                                   const char *filter_node_name,
474d003e0aeSVladimir Sementsov-Ogievskiy                                   BlockCopyState **bcs,
475d003e0aeSVladimir Sementsov-Ogievskiy                                   Error **errp)
476d003e0aeSVladimir Sementsov-Ogievskiy {
477d003e0aeSVladimir Sementsov-Ogievskiy     ERRP_GUARD();
478d003e0aeSVladimir Sementsov-Ogievskiy     BDRVCopyBeforeWriteState *state;
479d003e0aeSVladimir Sementsov-Ogievskiy     BlockDriverState *top;
480f44fd739SVladimir Sementsov-Ogievskiy     QDict *opts;
481d003e0aeSVladimir Sementsov-Ogievskiy 
482d003e0aeSVladimir Sementsov-Ogievskiy     assert(source->total_sectors == target->total_sectors);
483377cc15bSEmanuele Giuseppe Esposito     GLOBAL_STATE_CODE();
484d003e0aeSVladimir Sementsov-Ogievskiy 
485f44fd739SVladimir Sementsov-Ogievskiy     opts = qdict_new();
486751cec7aSVladimir Sementsov-Ogievskiy     qdict_put_str(opts, "driver", "copy-before-write");
487751cec7aSVladimir Sementsov-Ogievskiy     if (filter_node_name) {
488751cec7aSVladimir Sementsov-Ogievskiy         qdict_put_str(opts, "node-name", filter_node_name);
489751cec7aSVladimir Sementsov-Ogievskiy     }
490f44fd739SVladimir Sementsov-Ogievskiy     qdict_put_str(opts, "file", bdrv_get_node_name(source));
491f44fd739SVladimir Sementsov-Ogievskiy     qdict_put_str(opts, "target", bdrv_get_node_name(target));
492f44fd739SVladimir Sementsov-Ogievskiy 
493751cec7aSVladimir Sementsov-Ogievskiy     top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
494751cec7aSVladimir Sementsov-Ogievskiy     if (!top) {
495751cec7aSVladimir Sementsov-Ogievskiy         return NULL;
496d003e0aeSVladimir Sementsov-Ogievskiy     }
497d003e0aeSVladimir Sementsov-Ogievskiy 
498751cec7aSVladimir Sementsov-Ogievskiy     state = top->opaque;
4997ddbce2dSVladimir Sementsov-Ogievskiy     *bcs = state->bcs;
500d003e0aeSVladimir Sementsov-Ogievskiy 
501d003e0aeSVladimir Sementsov-Ogievskiy     return top;
502d003e0aeSVladimir Sementsov-Ogievskiy }
503d003e0aeSVladimir Sementsov-Ogievskiy 
504d003e0aeSVladimir Sementsov-Ogievskiy void bdrv_cbw_drop(BlockDriverState *bs)
505d003e0aeSVladimir Sementsov-Ogievskiy {
506377cc15bSEmanuele Giuseppe Esposito     GLOBAL_STATE_CODE();
507d003e0aeSVladimir Sementsov-Ogievskiy     bdrv_drop_filter(bs, &error_abort);
508d003e0aeSVladimir Sementsov-Ogievskiy     bdrv_unref(bs);
509d003e0aeSVladimir Sementsov-Ogievskiy }
510751cec7aSVladimir Sementsov-Ogievskiy 
511751cec7aSVladimir Sementsov-Ogievskiy static void cbw_init(void)
512751cec7aSVladimir Sementsov-Ogievskiy {
513751cec7aSVladimir Sementsov-Ogievskiy     bdrv_register(&bdrv_cbw_filter);
514751cec7aSVladimir Sementsov-Ogievskiy }
515751cec7aSVladimir Sementsov-Ogievskiy 
516751cec7aSVladimir Sementsov-Ogievskiy block_init(cbw_init);
517