1 /* 2 * Copy-on-read filter block driver 3 * 4 * Copyright (c) 2018 Red Hat, Inc. 5 * 6 * Author: 7 * Max Reitz <mreitz@redhat.com> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License as 11 * published by the Free Software Foundation; either version 2 or 12 * (at your option) version 3 of the License. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, see <http://www.gnu.org/licenses/>. 21 */ 22 23 #include "qemu/osdep.h" 24 #include "block/block_int.h" 25 #include "qemu/module.h" 26 #include "qapi/error.h" 27 #include "qapi/qmp/qdict.h" 28 #include "block/copy-on-read.h" 29 30 31 typedef struct BDRVStateCOR { 32 bool active; 33 BlockDriverState *bottom_bs; 34 bool chain_frozen; 35 } BDRVStateCOR; 36 37 38 static int cor_open(BlockDriverState *bs, QDict *options, int flags, 39 Error **errp) 40 { 41 BlockDriverState *bottom_bs = NULL; 42 BDRVStateCOR *state = bs->opaque; 43 /* Find a bottom node name, if any */ 44 const char *bottom_node = qdict_get_try_str(options, "bottom"); 45 46 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, 47 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, 48 false, errp); 49 if (!bs->file) { 50 return -EINVAL; 51 } 52 53 bs->supported_read_flags = BDRV_REQ_PREFETCH; 54 55 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | 56 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); 57 58 bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | 59 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & 60 bs->file->bs->supported_zero_flags); 61 62 if (bottom_node) { 63 bottom_bs = bdrv_find_node(bottom_node); 64 if (!bottom_bs) { 65 error_setg(errp, "Bottom node '%s' not found", bottom_node); 66 qdict_del(options, "bottom"); 67 return -EINVAL; 68 } 69 qdict_del(options, "bottom"); 70 71 if (!bottom_bs->drv) { 72 error_setg(errp, "Bottom node '%s' not opened", bottom_node); 73 return -EINVAL; 74 } 75 76 if (bottom_bs->drv->is_filter) { 77 error_setg(errp, "Bottom node '%s' is a filter", bottom_node); 78 return -EINVAL; 79 } 80 81 if (bdrv_freeze_backing_chain(bs, bottom_bs, errp) < 0) { 82 return -EINVAL; 83 } 84 state->chain_frozen = true; 85 86 /* 87 * We do freeze the chain, so it shouldn't be removed. Still, storing a 88 * pointer worth bdrv_ref(). 89 */ 90 bdrv_ref(bottom_bs); 91 } 92 state->active = true; 93 state->bottom_bs = bottom_bs; 94 95 /* 96 * We don't need to call bdrv_child_refresh_perms() now as the permissions 97 * will be updated later when the filter node gets its parent. 98 */ 99 100 return 0; 101 } 102 103 104 #define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ 105 | BLK_PERM_WRITE \ 106 | BLK_PERM_RESIZE) 107 #define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH) 108 109 static void cor_child_perm(BlockDriverState *bs, BdrvChild *c, 110 BdrvChildRole role, 111 BlockReopenQueue *reopen_queue, 112 uint64_t perm, uint64_t shared, 113 uint64_t *nperm, uint64_t *nshared) 114 { 115 BDRVStateCOR *s = bs->opaque; 116 117 if (!s->active) { 118 /* 119 * While the filter is being removed 120 */ 121 *nperm = 0; 122 *nshared = BLK_PERM_ALL; 123 return; 124 } 125 126 *nperm = perm & PERM_PASSTHROUGH; 127 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED; 128 129 /* We must not request write permissions for an inactive node, the child 130 * cannot provide it. */ 131 if (!(bs->open_flags & BDRV_O_INACTIVE)) { 132 *nperm |= BLK_PERM_WRITE_UNCHANGED; 133 } 134 } 135 136 137 static int64_t cor_getlength(BlockDriverState *bs) 138 { 139 return bdrv_getlength(bs->file->bs); 140 } 141 142 143 static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs, 144 uint64_t offset, uint64_t bytes, 145 QEMUIOVector *qiov, 146 size_t qiov_offset, 147 int flags) 148 { 149 int64_t n; 150 int local_flags; 151 int ret; 152 BDRVStateCOR *state = bs->opaque; 153 154 if (!state->bottom_bs) { 155 return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset, 156 flags | BDRV_REQ_COPY_ON_READ); 157 } 158 159 while (bytes) { 160 local_flags = flags; 161 162 /* In case of failure, try to copy-on-read anyway */ 163 ret = bdrv_is_allocated(bs->file->bs, offset, bytes, &n); 164 if (ret <= 0) { 165 ret = bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs), 166 state->bottom_bs, true, offset, 167 n, &n); 168 if (ret > 0 || ret < 0) { 169 local_flags |= BDRV_REQ_COPY_ON_READ; 170 } 171 /* Finish earlier if the end of a backing file has been reached */ 172 if (n == 0) { 173 break; 174 } 175 } 176 177 /* Skip if neither read nor write are needed */ 178 if ((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) != 179 BDRV_REQ_PREFETCH) { 180 ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset, 181 local_flags); 182 if (ret < 0) { 183 return ret; 184 } 185 } 186 187 offset += n; 188 qiov_offset += n; 189 bytes -= n; 190 } 191 192 return 0; 193 } 194 195 196 static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs, 197 uint64_t offset, 198 uint64_t bytes, 199 QEMUIOVector *qiov, 200 size_t qiov_offset, int flags) 201 { 202 return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset, 203 flags); 204 } 205 206 207 static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs, 208 int64_t offset, int bytes, 209 BdrvRequestFlags flags) 210 { 211 return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); 212 } 213 214 215 static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs, 216 int64_t offset, int bytes) 217 { 218 return bdrv_co_pdiscard(bs->file, offset, bytes); 219 } 220 221 222 static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs, 223 uint64_t offset, 224 uint64_t bytes, 225 QEMUIOVector *qiov) 226 { 227 return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 228 BDRV_REQ_WRITE_COMPRESSED); 229 } 230 231 232 static void cor_eject(BlockDriverState *bs, bool eject_flag) 233 { 234 bdrv_eject(bs->file->bs, eject_flag); 235 } 236 237 238 static void cor_lock_medium(BlockDriverState *bs, bool locked) 239 { 240 bdrv_lock_medium(bs->file->bs, locked); 241 } 242 243 244 static void cor_close(BlockDriverState *bs) 245 { 246 BDRVStateCOR *s = bs->opaque; 247 248 if (s->chain_frozen) { 249 s->chain_frozen = false; 250 bdrv_unfreeze_backing_chain(bs, s->bottom_bs); 251 } 252 253 bdrv_unref(s->bottom_bs); 254 } 255 256 257 static BlockDriver bdrv_copy_on_read = { 258 .format_name = "copy-on-read", 259 .instance_size = sizeof(BDRVStateCOR), 260 261 .bdrv_open = cor_open, 262 .bdrv_close = cor_close, 263 .bdrv_child_perm = cor_child_perm, 264 265 .bdrv_getlength = cor_getlength, 266 267 .bdrv_co_preadv_part = cor_co_preadv_part, 268 .bdrv_co_pwritev_part = cor_co_pwritev_part, 269 .bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes, 270 .bdrv_co_pdiscard = cor_co_pdiscard, 271 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed, 272 273 .bdrv_eject = cor_eject, 274 .bdrv_lock_medium = cor_lock_medium, 275 276 .has_variable_length = true, 277 .is_filter = true, 278 }; 279 280 281 void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs) 282 { 283 BdrvChild *child; 284 BlockDriverState *bs; 285 BDRVStateCOR *s = cor_filter_bs->opaque; 286 287 child = bdrv_filter_child(cor_filter_bs); 288 if (!child) { 289 return; 290 } 291 bs = child->bs; 292 293 /* Retain the BDS until we complete the graph change. */ 294 bdrv_ref(bs); 295 /* Hold a guest back from writing while permissions are being reset. */ 296 bdrv_drained_begin(bs); 297 /* Drop permissions before the graph change. */ 298 s->active = false; 299 /* unfreeze, as otherwise bdrv_replace_node() will fail */ 300 if (s->chain_frozen) { 301 s->chain_frozen = false; 302 bdrv_unfreeze_backing_chain(cor_filter_bs, s->bottom_bs); 303 } 304 bdrv_child_refresh_perms(cor_filter_bs, child, &error_abort); 305 bdrv_replace_node(cor_filter_bs, bs, &error_abort); 306 307 bdrv_drained_end(bs); 308 bdrv_unref(bs); 309 bdrv_unref(cor_filter_bs); 310 } 311 312 313 static void bdrv_copy_on_read_init(void) 314 { 315 bdrv_register(&bdrv_copy_on_read); 316 } 317 318 block_init(bdrv_copy_on_read_init); 319