16c6f24fdSMax Reitz /*
26c6f24fdSMax Reitz * Copy-on-read filter block driver
36c6f24fdSMax Reitz *
46c6f24fdSMax Reitz * Copyright (c) 2018 Red Hat, Inc.
56c6f24fdSMax Reitz *
66c6f24fdSMax Reitz * Author:
76c6f24fdSMax Reitz * Max Reitz <mreitz@redhat.com>
86c6f24fdSMax Reitz *
96c6f24fdSMax Reitz * This program is free software; you can redistribute it and/or
106c6f24fdSMax Reitz * modify it under the terms of the GNU General Public License as
116c6f24fdSMax Reitz * published by the Free Software Foundation; either version 2 or
126c6f24fdSMax Reitz * (at your option) version 3 of the License.
136c6f24fdSMax Reitz *
146c6f24fdSMax Reitz * This program is distributed in the hope that it will be useful,
156c6f24fdSMax Reitz * but WITHOUT ANY WARRANTY; without even the implied warranty of
166c6f24fdSMax Reitz * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
176c6f24fdSMax Reitz * GNU General Public License for more details.
186c6f24fdSMax Reitz *
196c6f24fdSMax Reitz * You should have received a copy of the GNU General Public License
206c6f24fdSMax Reitz * along with this program; if not, see <http://www.gnu.org/licenses/>.
216c6f24fdSMax Reitz */
226c6f24fdSMax Reitz
236c6f24fdSMax Reitz #include "qemu/osdep.h"
24e2c1c34fSMarkus Armbruster #include "block/block-io.h"
256c6f24fdSMax Reitz #include "block/block_int.h"
260b8fa32fSMarkus Armbruster #include "qemu/module.h"
2716e09a21SAndrey Shinkevich #include "qapi/error.h"
28e4c8fdddSAndrey Shinkevich #include "qapi/qmp/qdict.h"
2916e09a21SAndrey Shinkevich #include "block/copy-on-read.h"
3016e09a21SAndrey Shinkevich
3116e09a21SAndrey Shinkevich
3216e09a21SAndrey Shinkevich typedef struct BDRVStateCOR {
33e4c8fdddSAndrey Shinkevich BlockDriverState *bottom_bs;
34e4c8fdddSAndrey Shinkevich bool chain_frozen;
3516e09a21SAndrey Shinkevich } BDRVStateCOR;
366c6f24fdSMax Reitz
376c6f24fdSMax Reitz
389275fc72SKevin Wolf static int GRAPH_UNLOCKED
cor_open(BlockDriverState * bs,QDict * options,int flags,Error ** errp)399275fc72SKevin Wolf cor_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
406c6f24fdSMax Reitz {
41e4c8fdddSAndrey Shinkevich BlockDriverState *bottom_bs = NULL;
4216e09a21SAndrey Shinkevich BDRVStateCOR *state = bs->opaque;
43e4c8fdddSAndrey Shinkevich /* Find a bottom node name, if any */
44e4c8fdddSAndrey Shinkevich const char *bottom_node = qdict_get_try_str(options, "bottom");
4583930780SVladimir Sementsov-Ogievskiy int ret;
4616e09a21SAndrey Shinkevich
479275fc72SKevin Wolf GLOBAL_STATE_CODE();
489275fc72SKevin Wolf
4983930780SVladimir Sementsov-Ogievskiy ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
5083930780SVladimir Sementsov-Ogievskiy if (ret < 0) {
5183930780SVladimir Sementsov-Ogievskiy return ret;
526c6f24fdSMax Reitz }
536c6f24fdSMax Reitz
54*a4b740dbSKevin Wolf GRAPH_RDLOCK_GUARD_MAINLOOP();
55*a4b740dbSKevin Wolf
56e275458bSAndrey Shinkevich bs->supported_read_flags = BDRV_REQ_PREFETCH;
57e275458bSAndrey Shinkevich
58228345bfSMax Reitz bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
5980f5c33fSKevin Wolf (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
606c6f24fdSMax Reitz
61228345bfSMax Reitz bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
6280f5c33fSKevin Wolf ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
63228345bfSMax Reitz bs->file->bs->supported_zero_flags);
646c6f24fdSMax Reitz
65e4c8fdddSAndrey Shinkevich if (bottom_node) {
66e4c8fdddSAndrey Shinkevich bottom_bs = bdrv_find_node(bottom_node);
67e4c8fdddSAndrey Shinkevich if (!bottom_bs) {
68e4c8fdddSAndrey Shinkevich error_setg(errp, "Bottom node '%s' not found", bottom_node);
69e4c8fdddSAndrey Shinkevich qdict_del(options, "bottom");
70e4c8fdddSAndrey Shinkevich return -EINVAL;
71e4c8fdddSAndrey Shinkevich }
72e4c8fdddSAndrey Shinkevich qdict_del(options, "bottom");
73e4c8fdddSAndrey Shinkevich
74e4c8fdddSAndrey Shinkevich if (!bottom_bs->drv) {
75e4c8fdddSAndrey Shinkevich error_setg(errp, "Bottom node '%s' not opened", bottom_node);
76e4c8fdddSAndrey Shinkevich return -EINVAL;
77e4c8fdddSAndrey Shinkevich }
78e4c8fdddSAndrey Shinkevich
79e4c8fdddSAndrey Shinkevich if (bottom_bs->drv->is_filter) {
80e4c8fdddSAndrey Shinkevich error_setg(errp, "Bottom node '%s' is a filter", bottom_node);
81e4c8fdddSAndrey Shinkevich return -EINVAL;
82e4c8fdddSAndrey Shinkevich }
83e4c8fdddSAndrey Shinkevich
84e4c8fdddSAndrey Shinkevich if (bdrv_freeze_backing_chain(bs, bottom_bs, errp) < 0) {
85e4c8fdddSAndrey Shinkevich return -EINVAL;
86e4c8fdddSAndrey Shinkevich }
87e4c8fdddSAndrey Shinkevich state->chain_frozen = true;
88e4c8fdddSAndrey Shinkevich
89e4c8fdddSAndrey Shinkevich /*
90e4c8fdddSAndrey Shinkevich * We do freeze the chain, so it shouldn't be removed. Still, storing a
91e4c8fdddSAndrey Shinkevich * pointer worth bdrv_ref().
92e4c8fdddSAndrey Shinkevich */
93e4c8fdddSAndrey Shinkevich bdrv_ref(bottom_bs);
94e4c8fdddSAndrey Shinkevich }
95e4c8fdddSAndrey Shinkevich state->bottom_bs = bottom_bs;
9616e09a21SAndrey Shinkevich
9716e09a21SAndrey Shinkevich /*
9816e09a21SAndrey Shinkevich * We don't need to call bdrv_child_refresh_perms() now as the permissions
9916e09a21SAndrey Shinkevich * will be updated later when the filter node gets its parent.
10016e09a21SAndrey Shinkevich */
10116e09a21SAndrey Shinkevich
1026c6f24fdSMax Reitz return 0;
1036c6f24fdSMax Reitz }
1046c6f24fdSMax Reitz
1056c6f24fdSMax Reitz
1066c6f24fdSMax Reitz #define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
1076c6f24fdSMax Reitz | BLK_PERM_WRITE \
1086c6f24fdSMax Reitz | BLK_PERM_RESIZE)
1096c6f24fdSMax Reitz #define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH)
1106c6f24fdSMax Reitz
cor_child_perm(BlockDriverState * bs,BdrvChild * c,BdrvChildRole role,BlockReopenQueue * reopen_queue,uint64_t perm,uint64_t shared,uint64_t * nperm,uint64_t * nshared)1116c6f24fdSMax Reitz static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
112bf8e925eSMax Reitz BdrvChildRole role,
1136c6f24fdSMax Reitz BlockReopenQueue *reopen_queue,
1146c6f24fdSMax Reitz uint64_t perm, uint64_t shared,
1156c6f24fdSMax Reitz uint64_t *nperm, uint64_t *nshared)
1166c6f24fdSMax Reitz {
1172b23f286SKevin Wolf *nperm = perm & PERM_PASSTHROUGH;
1186c6f24fdSMax Reitz *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
1196c6f24fdSMax Reitz
1202b23f286SKevin Wolf /* We must not request write permissions for an inactive node, the child
1212b23f286SKevin Wolf * cannot provide it. */
1222b23f286SKevin Wolf if (!(bs->open_flags & BDRV_O_INACTIVE)) {
1232b23f286SKevin Wolf *nperm |= BLK_PERM_WRITE_UNCHANGED;
1242b23f286SKevin Wolf }
1256c6f24fdSMax Reitz }
1266c6f24fdSMax Reitz
1276c6f24fdSMax Reitz
cor_co_getlength(BlockDriverState * bs)1288ab8140aSKevin Wolf static int64_t coroutine_fn GRAPH_RDLOCK cor_co_getlength(BlockDriverState *bs)
1296c6f24fdSMax Reitz {
130c86422c5SEmanuele Giuseppe Esposito return bdrv_co_getlength(bs->file->bs);
1316c6f24fdSMax Reitz }
1326c6f24fdSMax Reitz
1336c6f24fdSMax Reitz
134b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
cor_co_preadv_part(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,size_t qiov_offset,BdrvRequestFlags flags)135b9b10c35SKevin Wolf cor_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
136b9b10c35SKevin Wolf QEMUIOVector *qiov, size_t qiov_offset,
137f7ef38ddSVladimir Sementsov-Ogievskiy BdrvRequestFlags flags)
1386c6f24fdSMax Reitz {
139e4c8fdddSAndrey Shinkevich int64_t n;
140e4c8fdddSAndrey Shinkevich int local_flags;
141e4c8fdddSAndrey Shinkevich int ret;
142e4c8fdddSAndrey Shinkevich BDRVStateCOR *state = bs->opaque;
143e4c8fdddSAndrey Shinkevich
144e4c8fdddSAndrey Shinkevich if (!state->bottom_bs) {
1451252e03bSAndrey Shinkevich return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
1466c6f24fdSMax Reitz flags | BDRV_REQ_COPY_ON_READ);
1476c6f24fdSMax Reitz }
1486c6f24fdSMax Reitz
149e4c8fdddSAndrey Shinkevich while (bytes) {
150e4c8fdddSAndrey Shinkevich local_flags = flags;
151e4c8fdddSAndrey Shinkevich
152e4c8fdddSAndrey Shinkevich /* In case of failure, try to copy-on-read anyway */
153cc323997SPaolo Bonzini ret = bdrv_co_is_allocated(bs->file->bs, offset, bytes, &n);
154e4c8fdddSAndrey Shinkevich if (ret <= 0) {
155cc323997SPaolo Bonzini ret = bdrv_co_is_allocated_above(bdrv_backing_chain_next(bs->file->bs),
156e4c8fdddSAndrey Shinkevich state->bottom_bs, true, offset,
157e4c8fdddSAndrey Shinkevich n, &n);
158e4c8fdddSAndrey Shinkevich if (ret > 0 || ret < 0) {
159e4c8fdddSAndrey Shinkevich local_flags |= BDRV_REQ_COPY_ON_READ;
160e4c8fdddSAndrey Shinkevich }
161e4c8fdddSAndrey Shinkevich /* Finish earlier if the end of a backing file has been reached */
162e4c8fdddSAndrey Shinkevich if (n == 0) {
163e4c8fdddSAndrey Shinkevich break;
164e4c8fdddSAndrey Shinkevich }
165e4c8fdddSAndrey Shinkevich }
166e4c8fdddSAndrey Shinkevich
167e275458bSAndrey Shinkevich /* Skip if neither read nor write are needed */
168e275458bSAndrey Shinkevich if ((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) !=
169e275458bSAndrey Shinkevich BDRV_REQ_PREFETCH) {
170e4c8fdddSAndrey Shinkevich ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
171e4c8fdddSAndrey Shinkevich local_flags);
172e4c8fdddSAndrey Shinkevich if (ret < 0) {
173e4c8fdddSAndrey Shinkevich return ret;
174e4c8fdddSAndrey Shinkevich }
175e275458bSAndrey Shinkevich }
176e4c8fdddSAndrey Shinkevich
177e4c8fdddSAndrey Shinkevich offset += n;
178e4c8fdddSAndrey Shinkevich qiov_offset += n;
179e4c8fdddSAndrey Shinkevich bytes -= n;
180e4c8fdddSAndrey Shinkevich }
181e4c8fdddSAndrey Shinkevich
182e4c8fdddSAndrey Shinkevich return 0;
183e4c8fdddSAndrey Shinkevich }
184e4c8fdddSAndrey Shinkevich
1856c6f24fdSMax Reitz
186b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
cor_co_pwritev_part(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,size_t qiov_offset,BdrvRequestFlags flags)187b9b10c35SKevin Wolf cor_co_pwritev_part(BlockDriverState *bs, int64_t offset, int64_t bytes,
188b9b10c35SKevin Wolf QEMUIOVector *qiov, size_t qiov_offset,
189e75abedaSVladimir Sementsov-Ogievskiy BdrvRequestFlags flags)
1906c6f24fdSMax Reitz {
1911252e03bSAndrey Shinkevich return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
1921252e03bSAndrey Shinkevich flags);
1936c6f24fdSMax Reitz }
1946c6f24fdSMax Reitz
1956c6f24fdSMax Reitz
196abaf8b75SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
cor_co_pwrite_zeroes(BlockDriverState * bs,int64_t offset,int64_t bytes,BdrvRequestFlags flags)197abaf8b75SKevin Wolf cor_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
1986c6f24fdSMax Reitz BdrvRequestFlags flags)
1996c6f24fdSMax Reitz {
2006c6f24fdSMax Reitz return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
2016c6f24fdSMax Reitz }
2026c6f24fdSMax Reitz
2036c6f24fdSMax Reitz
2049a5a1c62SEmanuele Giuseppe Esposito static int coroutine_fn GRAPH_RDLOCK
cor_co_pdiscard(BlockDriverState * bs,int64_t offset,int64_t bytes)2059a5a1c62SEmanuele Giuseppe Esposito cor_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
2066c6f24fdSMax Reitz {
2070b9fd3f4SFam Zheng return bdrv_co_pdiscard(bs->file, offset, bytes);
2086c6f24fdSMax Reitz }
2096c6f24fdSMax Reitz
2106c6f24fdSMax Reitz
211b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
cor_co_pwritev_compressed(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov)212b9b10c35SKevin Wolf cor_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
2134935e8beSMax Reitz QEMUIOVector *qiov)
2144935e8beSMax Reitz {
2154935e8beSMax Reitz return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
2164935e8beSMax Reitz BDRV_REQ_WRITE_COMPRESSED);
2174935e8beSMax Reitz }
2184935e8beSMax Reitz
2194935e8beSMax Reitz
22079a292e5SKevin Wolf static void coroutine_fn GRAPH_RDLOCK
cor_co_eject(BlockDriverState * bs,bool eject_flag)22179a292e5SKevin Wolf cor_co_eject(BlockDriverState *bs, bool eject_flag)
2226c6f24fdSMax Reitz {
2232531b390SEmanuele Giuseppe Esposito bdrv_co_eject(bs->file->bs, eject_flag);
2246c6f24fdSMax Reitz }
2256c6f24fdSMax Reitz
2266c6f24fdSMax Reitz
22779a292e5SKevin Wolf static void coroutine_fn GRAPH_RDLOCK
cor_co_lock_medium(BlockDriverState * bs,bool locked)22879a292e5SKevin Wolf cor_co_lock_medium(BlockDriverState *bs, bool locked)
2296c6f24fdSMax Reitz {
2302c75261cSEmanuele Giuseppe Esposito bdrv_co_lock_medium(bs->file->bs, locked);
2316c6f24fdSMax Reitz }
2326c6f24fdSMax Reitz
2336c6f24fdSMax Reitz
cor_close(BlockDriverState * bs)2349275fc72SKevin Wolf static void GRAPH_UNLOCKED cor_close(BlockDriverState *bs)
235e4c8fdddSAndrey Shinkevich {
236e4c8fdddSAndrey Shinkevich BDRVStateCOR *s = bs->opaque;
237e4c8fdddSAndrey Shinkevich
2389275fc72SKevin Wolf GLOBAL_STATE_CODE();
2399275fc72SKevin Wolf
240e4c8fdddSAndrey Shinkevich if (s->chain_frozen) {
2419275fc72SKevin Wolf bdrv_graph_rdlock_main_loop();
242e4c8fdddSAndrey Shinkevich s->chain_frozen = false;
243e4c8fdddSAndrey Shinkevich bdrv_unfreeze_backing_chain(bs, s->bottom_bs);
2449275fc72SKevin Wolf bdrv_graph_rdunlock_main_loop();
245e4c8fdddSAndrey Shinkevich }
246e4c8fdddSAndrey Shinkevich
247e4c8fdddSAndrey Shinkevich bdrv_unref(s->bottom_bs);
248e4c8fdddSAndrey Shinkevich }
249e4c8fdddSAndrey Shinkevich
250e4c8fdddSAndrey Shinkevich
251782b9d06SAlberto Garcia static BlockDriver bdrv_copy_on_read = {
2526c6f24fdSMax Reitz .format_name = "copy-on-read",
25316e09a21SAndrey Shinkevich .instance_size = sizeof(BDRVStateCOR),
2546c6f24fdSMax Reitz
2556c6f24fdSMax Reitz .bdrv_open = cor_open,
256e4c8fdddSAndrey Shinkevich .bdrv_close = cor_close,
2576c6f24fdSMax Reitz .bdrv_child_perm = cor_child_perm,
2586c6f24fdSMax Reitz
259c86422c5SEmanuele Giuseppe Esposito .bdrv_co_getlength = cor_co_getlength,
2606c6f24fdSMax Reitz
2611252e03bSAndrey Shinkevich .bdrv_co_preadv_part = cor_co_preadv_part,
2621252e03bSAndrey Shinkevich .bdrv_co_pwritev_part = cor_co_pwritev_part,
2636c6f24fdSMax Reitz .bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes,
2646c6f24fdSMax Reitz .bdrv_co_pdiscard = cor_co_pdiscard,
2654935e8beSMax Reitz .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
2666c6f24fdSMax Reitz
2672531b390SEmanuele Giuseppe Esposito .bdrv_co_eject = cor_co_eject,
2682c75261cSEmanuele Giuseppe Esposito .bdrv_co_lock_medium = cor_co_lock_medium,
2696c6f24fdSMax Reitz
2706c6f24fdSMax Reitz .is_filter = true,
2716c6f24fdSMax Reitz };
2726c6f24fdSMax Reitz
27316e09a21SAndrey Shinkevich
bdrv_cor_filter_drop(BlockDriverState * cor_filter_bs)2749275fc72SKevin Wolf void no_coroutine_fn bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
27516e09a21SAndrey Shinkevich {
27616e09a21SAndrey Shinkevich BDRVStateCOR *s = cor_filter_bs->opaque;
27716e09a21SAndrey Shinkevich
2789275fc72SKevin Wolf GLOBAL_STATE_CODE();
2799275fc72SKevin Wolf
280e4c8fdddSAndrey Shinkevich /* unfreeze, as otherwise bdrv_replace_node() will fail */
281e4c8fdddSAndrey Shinkevich if (s->chain_frozen) {
2829275fc72SKevin Wolf GRAPH_RDLOCK_GUARD_MAINLOOP();
283e4c8fdddSAndrey Shinkevich s->chain_frozen = false;
284e4c8fdddSAndrey Shinkevich bdrv_unfreeze_backing_chain(cor_filter_bs, s->bottom_bs);
285e4c8fdddSAndrey Shinkevich }
286bcc8584cSVladimir Sementsov-Ogievskiy bdrv_drop_filter(cor_filter_bs, &error_abort);
28716e09a21SAndrey Shinkevich bdrv_unref(cor_filter_bs);
28816e09a21SAndrey Shinkevich }
28916e09a21SAndrey Shinkevich
29016e09a21SAndrey Shinkevich
bdrv_copy_on_read_init(void)2916c6f24fdSMax Reitz static void bdrv_copy_on_read_init(void)
2926c6f24fdSMax Reitz {
2936c6f24fdSMax Reitz bdrv_register(&bdrv_copy_on_read);
2946c6f24fdSMax Reitz }
2956c6f24fdSMax Reitz
2966c6f24fdSMax Reitz block_init(bdrv_copy_on_read_init);
297