1 /* 2 * Live block commit 3 * 4 * Copyright Red Hat, Inc. 2012 5 * 6 * Authors: 7 * Jeff Cody <jcody@redhat.com> 8 * Based on stream.c by Stefan Hajnoczi 9 * 10 * This work is licensed under the terms of the GNU LGPL, version 2 or later. 11 * See the COPYING.LIB file in the top-level directory. 12 * 13 */ 14 15 #include "qemu/osdep.h" 16 #include "trace.h" 17 #include "block/block_int.h" 18 #include "block/blockjob.h" 19 #include "qapi/error.h" 20 #include "qapi/qmp/qerror.h" 21 #include "qemu/ratelimit.h" 22 #include "sysemu/block-backend.h" 23 24 enum { 25 /* 26 * Size of data buffer for populating the image file. This should be large 27 * enough to process multiple clusters in a single call, so that populating 28 * contiguous regions of the image is efficient. 29 */ 30 COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */ 31 }; 32 33 #define SLICE_TIME 100000000ULL /* ns */ 34 35 typedef struct CommitBlockJob { 36 BlockJob common; 37 RateLimit limit; 38 BlockDriverState *active; 39 BlockBackend *top; 40 BlockBackend *base; 41 BlockdevOnError on_error; 42 int base_flags; 43 int orig_overlay_flags; 44 char *backing_file_str; 45 } CommitBlockJob; 46 47 static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, 48 int64_t sector_num, int nb_sectors, 49 void *buf) 50 { 51 int ret = 0; 52 QEMUIOVector qiov; 53 struct iovec iov = { 54 .iov_base = buf, 55 .iov_len = nb_sectors * BDRV_SECTOR_SIZE, 56 }; 57 58 qemu_iovec_init_external(&qiov, &iov, 1); 59 60 ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE, 61 qiov.size, &qiov, 0); 62 if (ret < 0) { 63 return ret; 64 } 65 66 ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE, 67 qiov.size, &qiov, 0); 68 if (ret < 0) { 69 return ret; 70 } 71 72 return 0; 73 } 74 75 typedef struct { 76 int ret; 77 } CommitCompleteData; 78 79 static void commit_complete(BlockJob *job, void *opaque) 80 { 81 CommitBlockJob *s = container_of(job, CommitBlockJob, common); 82 CommitCompleteData *data = opaque; 83 BlockDriverState *active = s->active; 84 BlockDriverState *top = blk_bs(s->top); 85 BlockDriverState *base = blk_bs(s->base); 86 BlockDriverState *overlay_bs; 87 int ret = data->ret; 88 89 if (!block_job_is_cancelled(&s->common) && ret == 0) { 90 /* success */ 91 ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str); 92 } 93 94 /* restore base open flags here if appropriate (e.g., change the base back 95 * to r/o). These reopens do not need to be atomic, since we won't abort 96 * even on failure here */ 97 if (s->base_flags != bdrv_get_flags(base)) { 98 bdrv_reopen(base, s->base_flags, NULL); 99 } 100 overlay_bs = bdrv_find_overlay(active, top); 101 if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { 102 bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); 103 } 104 g_free(s->backing_file_str); 105 blk_unref(s->top); 106 blk_unref(s->base); 107 block_job_completed(&s->common, ret); 108 g_free(data); 109 } 110 111 static void coroutine_fn commit_run(void *opaque) 112 { 113 CommitBlockJob *s = opaque; 114 CommitCompleteData *data; 115 int64_t sector_num, end; 116 uint64_t delay_ns = 0; 117 int ret = 0; 118 int n = 0; 119 void *buf = NULL; 120 int bytes_written = 0; 121 int64_t base_len; 122 123 ret = s->common.len = blk_getlength(s->top); 124 125 126 if (s->common.len < 0) { 127 goto out; 128 } 129 130 ret = base_len = blk_getlength(s->base); 131 if (base_len < 0) { 132 goto out; 133 } 134 135 if (base_len < s->common.len) { 136 ret = blk_truncate(s->base, s->common.len); 137 if (ret) { 138 goto out; 139 } 140 } 141 142 end = s->common.len >> BDRV_SECTOR_BITS; 143 buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); 144 145 for (sector_num = 0; sector_num < end; sector_num += n) { 146 bool copy; 147 148 /* Note that even when no rate limit is applied we need to yield 149 * with no pending I/O here so that bdrv_drain_all() returns. 150 */ 151 block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns); 152 if (block_job_is_cancelled(&s->common)) { 153 break; 154 } 155 /* Copy if allocated above the base */ 156 ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), 157 sector_num, 158 COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, 159 &n); 160 copy = (ret == 1); 161 trace_commit_one_iteration(s, sector_num, n, ret); 162 if (copy) { 163 ret = commit_populate(s->top, s->base, sector_num, n, buf); 164 bytes_written += n * BDRV_SECTOR_SIZE; 165 } 166 if (ret < 0) { 167 BlockErrorAction action = 168 block_job_error_action(&s->common, false, s->on_error, -ret); 169 if (action == BLOCK_ERROR_ACTION_REPORT) { 170 goto out; 171 } else { 172 n = 0; 173 continue; 174 } 175 } 176 /* Publish progress */ 177 s->common.offset += n * BDRV_SECTOR_SIZE; 178 179 if (copy && s->common.speed) { 180 delay_ns = ratelimit_calculate_delay(&s->limit, n); 181 } 182 } 183 184 ret = 0; 185 186 out: 187 qemu_vfree(buf); 188 189 data = g_malloc(sizeof(*data)); 190 data->ret = ret; 191 block_job_defer_to_main_loop(&s->common, commit_complete, data); 192 } 193 194 static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) 195 { 196 CommitBlockJob *s = container_of(job, CommitBlockJob, common); 197 198 if (speed < 0) { 199 error_setg(errp, QERR_INVALID_PARAMETER, "speed"); 200 return; 201 } 202 ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); 203 } 204 205 static const BlockJobDriver commit_job_driver = { 206 .instance_size = sizeof(CommitBlockJob), 207 .job_type = BLOCK_JOB_TYPE_COMMIT, 208 .set_speed = commit_set_speed, 209 }; 210 211 void commit_start(const char *job_id, BlockDriverState *bs, 212 BlockDriverState *base, BlockDriverState *top, int64_t speed, 213 BlockdevOnError on_error, BlockCompletionFunc *cb, 214 void *opaque, const char *backing_file_str, Error **errp) 215 { 216 CommitBlockJob *s; 217 BlockReopenQueue *reopen_queue = NULL; 218 int orig_overlay_flags; 219 int orig_base_flags; 220 BlockDriverState *overlay_bs; 221 Error *local_err = NULL; 222 223 assert(top != bs); 224 if (top == base) { 225 error_setg(errp, "Invalid files for merge: top and base are the same"); 226 return; 227 } 228 229 overlay_bs = bdrv_find_overlay(bs, top); 230 231 if (overlay_bs == NULL) { 232 error_setg(errp, "Could not find overlay image for %s:", top->filename); 233 return; 234 } 235 236 s = block_job_create(job_id, &commit_job_driver, bs, speed, 237 cb, opaque, errp); 238 if (!s) { 239 return; 240 } 241 242 orig_base_flags = bdrv_get_flags(base); 243 orig_overlay_flags = bdrv_get_flags(overlay_bs); 244 245 /* convert base & overlay_bs to r/w, if necessary */ 246 if (!(orig_overlay_flags & BDRV_O_RDWR)) { 247 reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL, 248 orig_overlay_flags | BDRV_O_RDWR); 249 } 250 if (!(orig_base_flags & BDRV_O_RDWR)) { 251 reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, 252 orig_base_flags | BDRV_O_RDWR); 253 } 254 if (reopen_queue) { 255 bdrv_reopen_multiple(reopen_queue, &local_err); 256 if (local_err != NULL) { 257 error_propagate(errp, local_err); 258 block_job_unref(&s->common); 259 return; 260 } 261 } 262 263 264 s->base = blk_new(); 265 blk_insert_bs(s->base, base); 266 267 s->top = blk_new(); 268 blk_insert_bs(s->top, top); 269 270 s->active = bs; 271 272 s->base_flags = orig_base_flags; 273 s->orig_overlay_flags = orig_overlay_flags; 274 275 s->backing_file_str = g_strdup(backing_file_str); 276 277 s->on_error = on_error; 278 s->common.co = qemu_coroutine_create(commit_run, s); 279 280 trace_commit_start(bs, base, top, s, s->common.co, opaque); 281 qemu_coroutine_enter(s->common.co); 282 } 283 284 285 #define COMMIT_BUF_SECTORS 2048 286 287 /* commit COW file into the raw image */ 288 int bdrv_commit(BlockDriverState *bs) 289 { 290 BlockBackend *src, *backing; 291 BlockDriver *drv = bs->drv; 292 int64_t sector, total_sectors, length, backing_length; 293 int n, ro, open_flags; 294 int ret = 0; 295 uint8_t *buf = NULL; 296 297 if (!drv) 298 return -ENOMEDIUM; 299 300 if (!bs->backing) { 301 return -ENOTSUP; 302 } 303 304 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || 305 bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { 306 return -EBUSY; 307 } 308 309 ro = bs->backing->bs->read_only; 310 open_flags = bs->backing->bs->open_flags; 311 312 if (ro) { 313 if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) { 314 return -EACCES; 315 } 316 } 317 318 src = blk_new(); 319 blk_insert_bs(src, bs); 320 321 backing = blk_new(); 322 blk_insert_bs(backing, bs->backing->bs); 323 324 length = blk_getlength(src); 325 if (length < 0) { 326 ret = length; 327 goto ro_cleanup; 328 } 329 330 backing_length = blk_getlength(backing); 331 if (backing_length < 0) { 332 ret = backing_length; 333 goto ro_cleanup; 334 } 335 336 /* If our top snapshot is larger than the backing file image, 337 * grow the backing file image if possible. If not possible, 338 * we must return an error */ 339 if (length > backing_length) { 340 ret = blk_truncate(backing, length); 341 if (ret < 0) { 342 goto ro_cleanup; 343 } 344 } 345 346 total_sectors = length >> BDRV_SECTOR_BITS; 347 348 /* blk_try_blockalign() for src will choose an alignment that works for 349 * backing as well, so no need to compare the alignment manually. */ 350 buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); 351 if (buf == NULL) { 352 ret = -ENOMEM; 353 goto ro_cleanup; 354 } 355 356 for (sector = 0; sector < total_sectors; sector += n) { 357 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); 358 if (ret < 0) { 359 goto ro_cleanup; 360 } 361 if (ret) { 362 ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf, 363 n * BDRV_SECTOR_SIZE); 364 if (ret < 0) { 365 goto ro_cleanup; 366 } 367 368 ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf, 369 n * BDRV_SECTOR_SIZE, 0); 370 if (ret < 0) { 371 goto ro_cleanup; 372 } 373 } 374 } 375 376 if (drv->bdrv_make_empty) { 377 ret = drv->bdrv_make_empty(bs); 378 if (ret < 0) { 379 goto ro_cleanup; 380 } 381 blk_flush(src); 382 } 383 384 /* 385 * Make sure all data we wrote to the backing device is actually 386 * stable on disk. 387 */ 388 blk_flush(backing); 389 390 ret = 0; 391 ro_cleanup: 392 qemu_vfree(buf); 393 394 blk_unref(src); 395 blk_unref(backing); 396 397 if (ro) { 398 /* ignoring error return here */ 399 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL); 400 } 401 402 return ret; 403 } 404