1 /* 2 * QEMU backup 3 * 4 * Copyright (C) 2013 Proxmox Server Solutions 5 * 6 * Authors: 7 * Dietmar Maurer (dietmar@proxmox.com) 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 16 #include "trace.h" 17 #include "block/block.h" 18 #include "block/block_int.h" 19 #include "block/blockjob_int.h" 20 #include "block/block_backup.h" 21 #include "qapi/error.h" 22 #include "qapi/qmp/qerror.h" 23 #include "qemu/ratelimit.h" 24 #include "qemu/cutils.h" 25 #include "sysemu/block-backend.h" 26 #include "qemu/bitmap.h" 27 #include "qemu/error-report.h" 28 29 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) 30 31 typedef struct CowRequest { 32 int64_t start_byte; 33 int64_t end_byte; 34 QLIST_ENTRY(CowRequest) list; 35 CoQueue wait_queue; /* coroutines blocked on this request */ 36 } CowRequest; 37 38 typedef struct BackupBlockJob { 39 BlockJob common; 40 BlockBackend *target; 41 /* bitmap for sync=incremental */ 42 BdrvDirtyBitmap *sync_bitmap; 43 MirrorSyncMode sync_mode; 44 BlockdevOnError on_source_error; 45 BlockdevOnError on_target_error; 46 CoRwlock flush_rwlock; 47 uint64_t len; 48 uint64_t bytes_read; 49 int64_t cluster_size; 50 bool compress; 51 NotifierWithReturn before_write; 52 QLIST_HEAD(, CowRequest) inflight_reqs; 53 54 HBitmap *copy_bitmap; 55 bool use_copy_range; 56 int64_t copy_range_size; 57 58 bool serialize_target_writes; 59 } BackupBlockJob; 60 61 static const BlockJobDriver backup_job_driver; 62 63 /* See if in-flight requests overlap and wait for them to complete */ 64 static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job, 65 int64_t start, 66 int64_t end) 67 { 68 CowRequest *req; 69 bool retry; 70 71 do { 72 retry = false; 73 QLIST_FOREACH(req, &job->inflight_reqs, list) { 74 if (end > req->start_byte && start < req->end_byte) { 75 qemu_co_queue_wait(&req->wait_queue, NULL); 76 retry = true; 77 break; 78 } 79 } 80 } while (retry); 81 } 82 83 /* Keep track of an in-flight request */ 84 static void cow_request_begin(CowRequest *req, BackupBlockJob *job, 85 int64_t start, int64_t end) 86 { 87 req->start_byte = start; 88 req->end_byte = end; 89 qemu_co_queue_init(&req->wait_queue); 90 QLIST_INSERT_HEAD(&job->inflight_reqs, req, list); 91 } 92 93 /* Forget about a completed request */ 94 static void cow_request_end(CowRequest *req) 95 { 96 QLIST_REMOVE(req, list); 97 qemu_co_queue_restart_all(&req->wait_queue); 98 } 99 100 /* Copy range to target with a bounce buffer and return the bytes copied. If 101 * error occurred, return a negative error number */ 102 static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, 103 int64_t start, 104 int64_t end, 105 bool is_write_notifier, 106 bool *error_is_read, 107 void **bounce_buffer) 108 { 109 int ret; 110 BlockBackend *blk = job->common.blk; 111 int nbytes; 112 int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0; 113 int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0; 114 115 assert(QEMU_IS_ALIGNED(start, job->cluster_size)); 116 hbitmap_reset(job->copy_bitmap, start, job->cluster_size); 117 nbytes = MIN(job->cluster_size, job->len - start); 118 if (!*bounce_buffer) { 119 *bounce_buffer = blk_blockalign(blk, job->cluster_size); 120 } 121 122 ret = blk_co_pread(blk, start, nbytes, *bounce_buffer, read_flags); 123 if (ret < 0) { 124 trace_backup_do_cow_read_fail(job, start, ret); 125 if (error_is_read) { 126 *error_is_read = true; 127 } 128 goto fail; 129 } 130 131 if (buffer_is_zero(*bounce_buffer, nbytes)) { 132 ret = blk_co_pwrite_zeroes(job->target, start, 133 nbytes, write_flags | BDRV_REQ_MAY_UNMAP); 134 } else { 135 ret = blk_co_pwrite(job->target, start, 136 nbytes, *bounce_buffer, write_flags | 137 (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0)); 138 } 139 if (ret < 0) { 140 trace_backup_do_cow_write_fail(job, start, ret); 141 if (error_is_read) { 142 *error_is_read = false; 143 } 144 goto fail; 145 } 146 147 return nbytes; 148 fail: 149 hbitmap_set(job->copy_bitmap, start, job->cluster_size); 150 return ret; 151 152 } 153 154 /* Copy range to target and return the bytes copied. If error occurred, return a 155 * negative error number. */ 156 static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job, 157 int64_t start, 158 int64_t end, 159 bool is_write_notifier) 160 { 161 int ret; 162 int nr_clusters; 163 BlockBackend *blk = job->common.blk; 164 int nbytes; 165 int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0; 166 int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0; 167 168 assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size)); 169 assert(QEMU_IS_ALIGNED(start, job->cluster_size)); 170 nbytes = MIN(job->copy_range_size, end - start); 171 nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size); 172 hbitmap_reset(job->copy_bitmap, start, job->cluster_size * nr_clusters); 173 ret = blk_co_copy_range(blk, start, job->target, start, nbytes, 174 read_flags, write_flags); 175 if (ret < 0) { 176 trace_backup_do_cow_copy_range_fail(job, start, ret); 177 hbitmap_set(job->copy_bitmap, start, job->cluster_size * nr_clusters); 178 return ret; 179 } 180 181 return nbytes; 182 } 183 184 static int coroutine_fn backup_do_cow(BackupBlockJob *job, 185 int64_t offset, uint64_t bytes, 186 bool *error_is_read, 187 bool is_write_notifier) 188 { 189 CowRequest cow_request; 190 int ret = 0; 191 int64_t start, end; /* bytes */ 192 void *bounce_buffer = NULL; 193 194 qemu_co_rwlock_rdlock(&job->flush_rwlock); 195 196 start = QEMU_ALIGN_DOWN(offset, job->cluster_size); 197 end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size); 198 199 trace_backup_do_cow_enter(job, start, offset, bytes); 200 201 wait_for_overlapping_requests(job, start, end); 202 cow_request_begin(&cow_request, job, start, end); 203 204 while (start < end) { 205 int64_t dirty_end; 206 207 if (!hbitmap_get(job->copy_bitmap, start)) { 208 trace_backup_do_cow_skip(job, start); 209 start += job->cluster_size; 210 continue; /* already copied */ 211 } 212 213 dirty_end = hbitmap_next_zero(job->copy_bitmap, start, (end - start)); 214 if (dirty_end < 0) { 215 dirty_end = end; 216 } 217 218 trace_backup_do_cow_process(job, start); 219 220 if (job->use_copy_range) { 221 ret = backup_cow_with_offload(job, start, dirty_end, 222 is_write_notifier); 223 if (ret < 0) { 224 job->use_copy_range = false; 225 } 226 } 227 if (!job->use_copy_range) { 228 ret = backup_cow_with_bounce_buffer(job, start, dirty_end, 229 is_write_notifier, 230 error_is_read, &bounce_buffer); 231 } 232 if (ret < 0) { 233 break; 234 } 235 236 /* Publish progress, guest I/O counts as progress too. Note that the 237 * offset field is an opaque progress value, it is not a disk offset. 238 */ 239 start += ret; 240 job->bytes_read += ret; 241 job_progress_update(&job->common.job, ret); 242 ret = 0; 243 } 244 245 if (bounce_buffer) { 246 qemu_vfree(bounce_buffer); 247 } 248 249 cow_request_end(&cow_request); 250 251 trace_backup_do_cow_return(job, offset, bytes, ret); 252 253 qemu_co_rwlock_unlock(&job->flush_rwlock); 254 255 return ret; 256 } 257 258 static int coroutine_fn backup_before_write_notify( 259 NotifierWithReturn *notifier, 260 void *opaque) 261 { 262 BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write); 263 BdrvTrackedRequest *req = opaque; 264 265 assert(req->bs == blk_bs(job->common.blk)); 266 assert(QEMU_IS_ALIGNED(req->offset, BDRV_SECTOR_SIZE)); 267 assert(QEMU_IS_ALIGNED(req->bytes, BDRV_SECTOR_SIZE)); 268 269 return backup_do_cow(job, req->offset, req->bytes, NULL, true); 270 } 271 272 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) 273 { 274 BdrvDirtyBitmap *bm; 275 BlockDriverState *bs = blk_bs(job->common.blk); 276 277 if (ret < 0) { 278 /* Merge the successor back into the parent, delete nothing. */ 279 bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL); 280 assert(bm); 281 } else { 282 /* Everything is fine, delete this bitmap and install the backup. */ 283 bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL); 284 assert(bm); 285 } 286 } 287 288 static void backup_commit(Job *job) 289 { 290 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 291 if (s->sync_bitmap) { 292 backup_cleanup_sync_bitmap(s, 0); 293 } 294 } 295 296 static void backup_abort(Job *job) 297 { 298 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 299 if (s->sync_bitmap) { 300 backup_cleanup_sync_bitmap(s, -1); 301 } 302 } 303 304 static void backup_clean(Job *job) 305 { 306 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 307 assert(s->target); 308 blk_unref(s->target); 309 s->target = NULL; 310 311 if (s->copy_bitmap) { 312 hbitmap_free(s->copy_bitmap); 313 s->copy_bitmap = NULL; 314 } 315 } 316 317 void backup_do_checkpoint(BlockJob *job, Error **errp) 318 { 319 BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); 320 321 assert(block_job_driver(job) == &backup_job_driver); 322 323 if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) { 324 error_setg(errp, "The backup job only supports block checkpoint in" 325 " sync=none mode"); 326 return; 327 } 328 329 hbitmap_set(backup_job->copy_bitmap, 0, backup_job->len); 330 } 331 332 static void backup_drain(BlockJob *job) 333 { 334 BackupBlockJob *s = container_of(job, BackupBlockJob, common); 335 336 /* Need to keep a reference in case blk_drain triggers execution 337 * of backup_complete... 338 */ 339 if (s->target) { 340 BlockBackend *target = s->target; 341 blk_ref(target); 342 blk_drain(target); 343 blk_unref(target); 344 } 345 } 346 347 static BlockErrorAction backup_error_action(BackupBlockJob *job, 348 bool read, int error) 349 { 350 if (read) { 351 return block_job_error_action(&job->common, job->on_source_error, 352 true, error); 353 } else { 354 return block_job_error_action(&job->common, job->on_target_error, 355 false, error); 356 } 357 } 358 359 static bool coroutine_fn yield_and_check(BackupBlockJob *job) 360 { 361 uint64_t delay_ns; 362 363 if (job_is_cancelled(&job->common.job)) { 364 return true; 365 } 366 367 /* We need to yield even for delay_ns = 0 so that bdrv_drain_all() can 368 * return. Without a yield, the VM would not reboot. */ 369 delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read); 370 job->bytes_read = 0; 371 job_sleep_ns(&job->common.job, delay_ns); 372 373 if (job_is_cancelled(&job->common.job)) { 374 return true; 375 } 376 377 return false; 378 } 379 380 static bool bdrv_is_unallocated_range(BlockDriverState *bs, 381 int64_t offset, int64_t bytes) 382 { 383 int64_t end = offset + bytes; 384 385 while (offset < end && !bdrv_is_allocated(bs, offset, bytes, &bytes)) { 386 if (bytes == 0) { 387 return true; 388 } 389 offset += bytes; 390 bytes = end - offset; 391 } 392 393 return offset >= end; 394 } 395 396 static int coroutine_fn backup_loop(BackupBlockJob *job) 397 { 398 int ret; 399 bool error_is_read; 400 int64_t offset; 401 HBitmapIter hbi; 402 BlockDriverState *bs = blk_bs(job->common.blk); 403 404 hbitmap_iter_init(&hbi, job->copy_bitmap, 0); 405 while ((offset = hbitmap_iter_next(&hbi)) != -1) { 406 if (job->sync_mode == MIRROR_SYNC_MODE_TOP && 407 bdrv_is_unallocated_range(bs, offset, job->cluster_size)) 408 { 409 hbitmap_reset(job->copy_bitmap, offset, job->cluster_size); 410 continue; 411 } 412 413 do { 414 if (yield_and_check(job)) { 415 return 0; 416 } 417 ret = backup_do_cow(job, offset, 418 job->cluster_size, &error_is_read, false); 419 if (ret < 0 && backup_error_action(job, error_is_read, -ret) == 420 BLOCK_ERROR_ACTION_REPORT) 421 { 422 return ret; 423 } 424 } while (ret < 0); 425 } 426 427 return 0; 428 } 429 430 /* init copy_bitmap from sync_bitmap */ 431 static void backup_incremental_init_copy_bitmap(BackupBlockJob *job) 432 { 433 uint64_t offset = 0; 434 uint64_t bytes = job->len; 435 436 while (bdrv_dirty_bitmap_next_dirty_area(job->sync_bitmap, 437 &offset, &bytes)) 438 { 439 hbitmap_set(job->copy_bitmap, offset, bytes); 440 441 offset += bytes; 442 if (offset >= job->len) { 443 break; 444 } 445 bytes = job->len - offset; 446 } 447 448 /* TODO job_progress_set_remaining() would make more sense */ 449 job_progress_update(&job->common.job, 450 job->len - hbitmap_count(job->copy_bitmap)); 451 } 452 453 static int coroutine_fn backup_run(Job *job, Error **errp) 454 { 455 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 456 BlockDriverState *bs = blk_bs(s->common.blk); 457 int ret = 0; 458 459 QLIST_INIT(&s->inflight_reqs); 460 qemu_co_rwlock_init(&s->flush_rwlock); 461 462 job_progress_set_remaining(job, s->len); 463 464 if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { 465 backup_incremental_init_copy_bitmap(s); 466 } else { 467 hbitmap_set(s->copy_bitmap, 0, s->len); 468 } 469 470 s->before_write.notify = backup_before_write_notify; 471 bdrv_add_before_write_notifier(bs, &s->before_write); 472 473 if (s->sync_mode == MIRROR_SYNC_MODE_NONE) { 474 /* All bits are set in copy_bitmap to allow any cluster to be copied. 475 * This does not actually require them to be copied. */ 476 while (!job_is_cancelled(job)) { 477 /* Yield until the job is cancelled. We just let our before_write 478 * notify callback service CoW requests. */ 479 job_yield(job); 480 } 481 } else { 482 ret = backup_loop(s); 483 } 484 485 notifier_with_return_remove(&s->before_write); 486 487 /* wait until pending backup_do_cow() calls have completed */ 488 qemu_co_rwlock_wrlock(&s->flush_rwlock); 489 qemu_co_rwlock_unlock(&s->flush_rwlock); 490 491 return ret; 492 } 493 494 static const BlockJobDriver backup_job_driver = { 495 .job_driver = { 496 .instance_size = sizeof(BackupBlockJob), 497 .job_type = JOB_TYPE_BACKUP, 498 .free = block_job_free, 499 .user_resume = block_job_user_resume, 500 .drain = block_job_drain, 501 .run = backup_run, 502 .commit = backup_commit, 503 .abort = backup_abort, 504 .clean = backup_clean, 505 }, 506 .drain = backup_drain, 507 }; 508 509 static int64_t backup_calculate_cluster_size(BlockDriverState *target, 510 Error **errp) 511 { 512 int ret; 513 BlockDriverInfo bdi; 514 515 /* 516 * If there is no backing file on the target, we cannot rely on COW if our 517 * backup cluster size is smaller than the target cluster size. Even for 518 * targets with a backing file, try to avoid COW if possible. 519 */ 520 ret = bdrv_get_info(target, &bdi); 521 if (ret == -ENOTSUP && !target->backing) { 522 /* Cluster size is not defined */ 523 warn_report("The target block device doesn't provide " 524 "information about the block size and it doesn't have a " 525 "backing file. The default block size of %u bytes is " 526 "used. If the actual block size of the target exceeds " 527 "this default, the backup may be unusable", 528 BACKUP_CLUSTER_SIZE_DEFAULT); 529 return BACKUP_CLUSTER_SIZE_DEFAULT; 530 } else if (ret < 0 && !target->backing) { 531 error_setg_errno(errp, -ret, 532 "Couldn't determine the cluster size of the target image, " 533 "which has no backing file"); 534 error_append_hint(errp, 535 "Aborting, since this may create an unusable destination image\n"); 536 return ret; 537 } else if (ret < 0 && target->backing) { 538 /* Not fatal; just trudge on ahead. */ 539 return BACKUP_CLUSTER_SIZE_DEFAULT; 540 } 541 542 return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); 543 } 544 545 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, 546 BlockDriverState *target, int64_t speed, 547 MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, 548 bool compress, 549 BlockdevOnError on_source_error, 550 BlockdevOnError on_target_error, 551 int creation_flags, 552 BlockCompletionFunc *cb, void *opaque, 553 JobTxn *txn, Error **errp) 554 { 555 int64_t len; 556 BackupBlockJob *job = NULL; 557 int ret; 558 int64_t cluster_size; 559 HBitmap *copy_bitmap = NULL; 560 561 assert(bs); 562 assert(target); 563 564 if (bs == target) { 565 error_setg(errp, "Source and target cannot be the same"); 566 return NULL; 567 } 568 569 if (!bdrv_is_inserted(bs)) { 570 error_setg(errp, "Device is not inserted: %s", 571 bdrv_get_device_name(bs)); 572 return NULL; 573 } 574 575 if (!bdrv_is_inserted(target)) { 576 error_setg(errp, "Device is not inserted: %s", 577 bdrv_get_device_name(target)); 578 return NULL; 579 } 580 581 if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) { 582 error_setg(errp, "Compression is not supported for this drive %s", 583 bdrv_get_device_name(target)); 584 return NULL; 585 } 586 587 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { 588 return NULL; 589 } 590 591 if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) { 592 return NULL; 593 } 594 595 if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { 596 if (!sync_bitmap) { 597 error_setg(errp, "must provide a valid bitmap name for " 598 "\"incremental\" sync mode"); 599 return NULL; 600 } 601 602 /* Create a new bitmap, and freeze/disable this one. */ 603 if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) { 604 return NULL; 605 } 606 } else if (sync_bitmap) { 607 error_setg(errp, 608 "a sync_bitmap was provided to backup_run, " 609 "but received an incompatible sync_mode (%s)", 610 MirrorSyncMode_str(sync_mode)); 611 return NULL; 612 } 613 614 len = bdrv_getlength(bs); 615 if (len < 0) { 616 error_setg_errno(errp, -len, "unable to get length for '%s'", 617 bdrv_get_device_name(bs)); 618 goto error; 619 } 620 621 cluster_size = backup_calculate_cluster_size(target, errp); 622 if (cluster_size < 0) { 623 goto error; 624 } 625 626 copy_bitmap = hbitmap_alloc(len, ctz32(cluster_size)); 627 628 /* job->len is fixed, so we can't allow resize */ 629 job = block_job_create(job_id, &backup_job_driver, txn, bs, 630 BLK_PERM_CONSISTENT_READ, 631 BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | 632 BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD, 633 speed, creation_flags, cb, opaque, errp); 634 if (!job) { 635 goto error; 636 } 637 638 /* The target must match the source in size, so no resize here either */ 639 job->target = blk_new(job->common.job.aio_context, 640 BLK_PERM_WRITE, 641 BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | 642 BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD); 643 ret = blk_insert_bs(job->target, target, errp); 644 if (ret < 0) { 645 goto error; 646 } 647 648 job->on_source_error = on_source_error; 649 job->on_target_error = on_target_error; 650 job->sync_mode = sync_mode; 651 job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ? 652 sync_bitmap : NULL; 653 job->compress = compress; 654 655 /* Detect image-fleecing (and similar) schemes */ 656 job->serialize_target_writes = bdrv_chain_contains(target, bs); 657 job->cluster_size = cluster_size; 658 job->copy_bitmap = copy_bitmap; 659 copy_bitmap = NULL; 660 job->use_copy_range = !compress; /* compression isn't supported for it */ 661 job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk), 662 blk_get_max_transfer(job->target)); 663 job->copy_range_size = MAX(job->cluster_size, 664 QEMU_ALIGN_UP(job->copy_range_size, 665 job->cluster_size)); 666 667 /* Required permissions are already taken with target's blk_new() */ 668 block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, 669 &error_abort); 670 job->len = len; 671 672 return &job->common; 673 674 error: 675 if (copy_bitmap) { 676 assert(!job || !job->copy_bitmap); 677 hbitmap_free(copy_bitmap); 678 } 679 if (sync_bitmap) { 680 bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL); 681 } 682 if (job) { 683 backup_clean(&job->common.job); 684 job_early_fail(&job->common.job); 685 } 686 687 return NULL; 688 } 689