1 /* 2 * QEMU backup 3 * 4 * Copyright (C) 2013 Proxmox Server Solutions 5 * Copyright (c) 2019 Virtuozzo International GmbH. 6 * 7 * Authors: 8 * Dietmar Maurer (dietmar@proxmox.com) 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2 or later. 11 * See the COPYING file in the top-level directory. 12 * 13 */ 14 15 #include "qemu/osdep.h" 16 17 #include "trace.h" 18 #include "block/block.h" 19 #include "block/block_int.h" 20 #include "block/blockjob_int.h" 21 #include "block/block_backup.h" 22 #include "block/block-copy.h" 23 #include "qapi/error.h" 24 #include "qapi/qmp/qerror.h" 25 #include "qemu/cutils.h" 26 #include "sysemu/block-backend.h" 27 #include "qemu/bitmap.h" 28 #include "qemu/error-report.h" 29 30 #include "block/backup-top.h" 31 32 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) 33 34 typedef struct BackupBlockJob { 35 BlockJob common; 36 BlockDriverState *backup_top; 37 BlockDriverState *source_bs; 38 BlockDriverState *target_bs; 39 40 BdrvDirtyBitmap *sync_bitmap; 41 42 MirrorSyncMode sync_mode; 43 BitmapSyncMode bitmap_mode; 44 BlockdevOnError on_source_error; 45 BlockdevOnError on_target_error; 46 uint64_t len; 47 int64_t cluster_size; 48 BackupPerf perf; 49 50 BlockCopyState *bcs; 51 52 bool wait; 53 BlockCopyCallState *bg_bcs_call; 54 } BackupBlockJob; 55 56 static const BlockJobDriver backup_job_driver; 57 58 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) 59 { 60 BdrvDirtyBitmap *bm; 61 bool sync = (((ret == 0) || (job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS)) \ 62 && (job->bitmap_mode != BITMAP_SYNC_MODE_NEVER)); 63 64 if (sync) { 65 /* 66 * We succeeded, or we always intended to sync the bitmap. 67 * Delete this bitmap and install the child. 68 */ 69 bm = bdrv_dirty_bitmap_abdicate(job->sync_bitmap, NULL); 70 } else { 71 /* 72 * We failed, or we never intended to sync the bitmap anyway. 73 * Merge the successor back into the parent, keeping all data. 74 */ 75 bm = bdrv_reclaim_dirty_bitmap(job->sync_bitmap, NULL); 76 } 77 78 assert(bm); 79 80 if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) { 81 /* If we failed and synced, merge in the bits we didn't copy: */ 82 bdrv_dirty_bitmap_merge_internal(bm, block_copy_dirty_bitmap(job->bcs), 83 NULL, true); 84 } 85 } 86 87 static void backup_commit(Job *job) 88 { 89 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 90 if (s->sync_bitmap) { 91 backup_cleanup_sync_bitmap(s, 0); 92 } 93 } 94 95 static void backup_abort(Job *job) 96 { 97 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 98 if (s->sync_bitmap) { 99 backup_cleanup_sync_bitmap(s, -1); 100 } 101 } 102 103 static void backup_clean(Job *job) 104 { 105 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 106 block_job_remove_all_bdrv(&s->common); 107 bdrv_backup_top_drop(s->backup_top); 108 } 109 110 void backup_do_checkpoint(BlockJob *job, Error **errp) 111 { 112 BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); 113 114 assert(block_job_driver(job) == &backup_job_driver); 115 116 if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) { 117 error_setg(errp, "The backup job only supports block checkpoint in" 118 " sync=none mode"); 119 return; 120 } 121 122 bdrv_set_dirty_bitmap(block_copy_dirty_bitmap(backup_job->bcs), 0, 123 backup_job->len); 124 } 125 126 static BlockErrorAction backup_error_action(BackupBlockJob *job, 127 bool read, int error) 128 { 129 if (read) { 130 return block_job_error_action(&job->common, job->on_source_error, 131 true, error); 132 } else { 133 return block_job_error_action(&job->common, job->on_target_error, 134 false, error); 135 } 136 } 137 138 static void coroutine_fn backup_block_copy_callback(void *opaque) 139 { 140 BackupBlockJob *s = opaque; 141 142 if (s->wait) { 143 s->wait = false; 144 aio_co_wake(s->common.job.co); 145 } else { 146 job_enter(&s->common.job); 147 } 148 } 149 150 static int coroutine_fn backup_loop(BackupBlockJob *job) 151 { 152 BlockCopyCallState *s = NULL; 153 int ret = 0; 154 bool error_is_read; 155 BlockErrorAction act; 156 157 while (true) { /* retry loop */ 158 job->bg_bcs_call = s = block_copy_async(job->bcs, 0, 159 QEMU_ALIGN_UP(job->len, job->cluster_size), 160 job->perf.max_workers, job->perf.max_chunk, 161 backup_block_copy_callback, job); 162 163 while (!block_copy_call_finished(s) && 164 !job_is_cancelled(&job->common.job)) 165 { 166 job_yield(&job->common.job); 167 } 168 169 if (!block_copy_call_finished(s)) { 170 assert(job_is_cancelled(&job->common.job)); 171 /* 172 * Note that we can't use job_yield() here, as it doesn't work for 173 * cancelled job. 174 */ 175 block_copy_call_cancel(s); 176 job->wait = true; 177 qemu_coroutine_yield(); 178 assert(block_copy_call_finished(s)); 179 ret = 0; 180 goto out; 181 } 182 183 if (job_is_cancelled(&job->common.job) || 184 block_copy_call_succeeded(s)) 185 { 186 ret = 0; 187 goto out; 188 } 189 190 if (block_copy_call_cancelled(s)) { 191 /* 192 * Job is not cancelled but only block-copy call. This is possible 193 * after job pause. Now the pause is finished, start new block-copy 194 * iteration. 195 */ 196 block_copy_call_free(s); 197 continue; 198 } 199 200 /* The only remaining case is failed block-copy call. */ 201 assert(block_copy_call_failed(s)); 202 203 ret = block_copy_call_status(s, &error_is_read); 204 act = backup_error_action(job, error_is_read, -ret); 205 switch (act) { 206 case BLOCK_ERROR_ACTION_REPORT: 207 goto out; 208 case BLOCK_ERROR_ACTION_STOP: 209 /* 210 * Go to pause prior to starting new block-copy call on the next 211 * iteration. 212 */ 213 job_pause_point(&job->common.job); 214 break; 215 case BLOCK_ERROR_ACTION_IGNORE: 216 /* Proceed to new block-copy call to retry. */ 217 break; 218 default: 219 abort(); 220 } 221 222 block_copy_call_free(s); 223 } 224 225 out: 226 block_copy_call_free(s); 227 job->bg_bcs_call = NULL; 228 return ret; 229 } 230 231 static void backup_init_bcs_bitmap(BackupBlockJob *job) 232 { 233 bool ret; 234 uint64_t estimate; 235 BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs); 236 237 if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) { 238 ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap, 239 NULL, true); 240 assert(ret); 241 } else { 242 if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { 243 /* 244 * We can't hog the coroutine to initialize this thoroughly. 245 * Set a flag and resume work when we are able to yield safely. 246 */ 247 block_copy_set_skip_unallocated(job->bcs, true); 248 } 249 bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len); 250 } 251 252 estimate = bdrv_get_dirty_count(bcs_bitmap); 253 job_progress_set_remaining(&job->common.job, estimate); 254 } 255 256 static int coroutine_fn backup_run(Job *job, Error **errp) 257 { 258 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 259 int ret; 260 261 backup_init_bcs_bitmap(s); 262 263 if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { 264 int64_t offset = 0; 265 int64_t count; 266 267 for (offset = 0; offset < s->len; ) { 268 if (job_is_cancelled(job)) { 269 return -ECANCELED; 270 } 271 272 job_pause_point(job); 273 274 if (job_is_cancelled(job)) { 275 return -ECANCELED; 276 } 277 278 ret = block_copy_reset_unallocated(s->bcs, offset, &count); 279 if (ret < 0) { 280 return ret; 281 } 282 283 offset += count; 284 } 285 block_copy_set_skip_unallocated(s->bcs, false); 286 } 287 288 if (s->sync_mode == MIRROR_SYNC_MODE_NONE) { 289 /* 290 * All bits are set in bcs bitmap to allow any cluster to be copied. 291 * This does not actually require them to be copied. 292 */ 293 while (!job_is_cancelled(job)) { 294 /* 295 * Yield until the job is cancelled. We just let our before_write 296 * notify callback service CoW requests. 297 */ 298 job_yield(job); 299 } 300 } else { 301 return backup_loop(s); 302 } 303 304 return 0; 305 } 306 307 static void coroutine_fn backup_pause(Job *job) 308 { 309 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 310 311 if (s->bg_bcs_call && !block_copy_call_finished(s->bg_bcs_call)) { 312 block_copy_call_cancel(s->bg_bcs_call); 313 s->wait = true; 314 qemu_coroutine_yield(); 315 } 316 } 317 318 static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed) 319 { 320 BackupBlockJob *s = container_of(job, BackupBlockJob, common); 321 322 /* 323 * block_job_set_speed() is called first from block_job_create(), when we 324 * don't yet have s->bcs. 325 */ 326 if (s->bcs) { 327 block_copy_set_speed(s->bcs, speed); 328 if (s->bg_bcs_call) { 329 block_copy_kick(s->bg_bcs_call); 330 } 331 } 332 } 333 334 static void backup_cancel(Job *job, bool force) 335 { 336 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 337 338 bdrv_cancel_in_flight(s->target_bs); 339 } 340 341 static const BlockJobDriver backup_job_driver = { 342 .job_driver = { 343 .instance_size = sizeof(BackupBlockJob), 344 .job_type = JOB_TYPE_BACKUP, 345 .free = block_job_free, 346 .user_resume = block_job_user_resume, 347 .run = backup_run, 348 .commit = backup_commit, 349 .abort = backup_abort, 350 .clean = backup_clean, 351 .pause = backup_pause, 352 .cancel = backup_cancel, 353 }, 354 .set_speed = backup_set_speed, 355 }; 356 357 static int64_t backup_calculate_cluster_size(BlockDriverState *target, 358 Error **errp) 359 { 360 int ret; 361 BlockDriverInfo bdi; 362 bool target_does_cow = bdrv_backing_chain_next(target); 363 364 /* 365 * If there is no backing file on the target, we cannot rely on COW if our 366 * backup cluster size is smaller than the target cluster size. Even for 367 * targets with a backing file, try to avoid COW if possible. 368 */ 369 ret = bdrv_get_info(target, &bdi); 370 if (ret == -ENOTSUP && !target_does_cow) { 371 /* Cluster size is not defined */ 372 warn_report("The target block device doesn't provide " 373 "information about the block size and it doesn't have a " 374 "backing file. The default block size of %u bytes is " 375 "used. If the actual block size of the target exceeds " 376 "this default, the backup may be unusable", 377 BACKUP_CLUSTER_SIZE_DEFAULT); 378 return BACKUP_CLUSTER_SIZE_DEFAULT; 379 } else if (ret < 0 && !target_does_cow) { 380 error_setg_errno(errp, -ret, 381 "Couldn't determine the cluster size of the target image, " 382 "which has no backing file"); 383 error_append_hint(errp, 384 "Aborting, since this may create an unusable destination image\n"); 385 return ret; 386 } else if (ret < 0 && target_does_cow) { 387 /* Not fatal; just trudge on ahead. */ 388 return BACKUP_CLUSTER_SIZE_DEFAULT; 389 } 390 391 return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); 392 } 393 394 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, 395 BlockDriverState *target, int64_t speed, 396 MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, 397 BitmapSyncMode bitmap_mode, 398 bool compress, 399 const char *filter_node_name, 400 BackupPerf *perf, 401 BlockdevOnError on_source_error, 402 BlockdevOnError on_target_error, 403 int creation_flags, 404 BlockCompletionFunc *cb, void *opaque, 405 JobTxn *txn, Error **errp) 406 { 407 int64_t len, target_len; 408 BackupBlockJob *job = NULL; 409 int64_t cluster_size; 410 BdrvRequestFlags write_flags; 411 BlockDriverState *backup_top = NULL; 412 BlockCopyState *bcs = NULL; 413 414 assert(bs); 415 assert(target); 416 417 /* QMP interface protects us from these cases */ 418 assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); 419 assert(sync_bitmap || sync_mode != MIRROR_SYNC_MODE_BITMAP); 420 421 if (bs == target) { 422 error_setg(errp, "Source and target cannot be the same"); 423 return NULL; 424 } 425 426 if (!bdrv_is_inserted(bs)) { 427 error_setg(errp, "Device is not inserted: %s", 428 bdrv_get_device_name(bs)); 429 return NULL; 430 } 431 432 if (!bdrv_is_inserted(target)) { 433 error_setg(errp, "Device is not inserted: %s", 434 bdrv_get_device_name(target)); 435 return NULL; 436 } 437 438 if (compress && !bdrv_supports_compressed_writes(target)) { 439 error_setg(errp, "Compression is not supported for this drive %s", 440 bdrv_get_device_name(target)); 441 return NULL; 442 } 443 444 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { 445 return NULL; 446 } 447 448 if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) { 449 return NULL; 450 } 451 452 cluster_size = backup_calculate_cluster_size(target, errp); 453 if (cluster_size < 0) { 454 goto error; 455 } 456 457 if (perf->max_workers < 1) { 458 error_setg(errp, "max-workers must be greater than zero"); 459 return NULL; 460 } 461 462 if (perf->max_chunk < 0) { 463 error_setg(errp, "max-chunk must be zero (which means no limit) or " 464 "positive"); 465 return NULL; 466 } 467 468 if (perf->max_chunk && perf->max_chunk < cluster_size) { 469 error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup " 470 "cluster size (%" PRIi64 ")", perf->max_chunk, cluster_size); 471 return NULL; 472 } 473 474 475 if (sync_bitmap) { 476 /* If we need to write to this bitmap, check that we can: */ 477 if (bitmap_mode != BITMAP_SYNC_MODE_NEVER && 478 bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) { 479 return NULL; 480 } 481 482 /* Create a new bitmap, and freeze/disable this one. */ 483 if (bdrv_dirty_bitmap_create_successor(sync_bitmap, errp) < 0) { 484 return NULL; 485 } 486 } 487 488 len = bdrv_getlength(bs); 489 if (len < 0) { 490 error_setg_errno(errp, -len, "Unable to get length for '%s'", 491 bdrv_get_device_or_node_name(bs)); 492 goto error; 493 } 494 495 target_len = bdrv_getlength(target); 496 if (target_len < 0) { 497 error_setg_errno(errp, -target_len, "Unable to get length for '%s'", 498 bdrv_get_device_or_node_name(bs)); 499 goto error; 500 } 501 502 if (target_len != len) { 503 error_setg(errp, "Source and target image have different sizes"); 504 goto error; 505 } 506 507 /* 508 * If source is in backing chain of target assume that target is going to be 509 * used for "image fleecing", i.e. it should represent a kind of snapshot of 510 * source at backup-start point in time. And target is going to be read by 511 * somebody (for example, used as NBD export) during backup job. 512 * 513 * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid 514 * intersection of backup writes and third party reads from target, 515 * otherwise reading from target we may occasionally read already updated by 516 * guest data. 517 * 518 * For more information see commit f8d59dfb40bb and test 519 * tests/qemu-iotests/222 520 */ 521 write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) | 522 (compress ? BDRV_REQ_WRITE_COMPRESSED : 0), 523 524 backup_top = bdrv_backup_top_append(bs, target, filter_node_name, 525 cluster_size, perf, 526 write_flags, &bcs, errp); 527 if (!backup_top) { 528 goto error; 529 } 530 531 /* job->len is fixed, so we can't allow resize */ 532 job = block_job_create(job_id, &backup_job_driver, txn, backup_top, 533 0, BLK_PERM_ALL, 534 speed, creation_flags, cb, opaque, errp); 535 if (!job) { 536 goto error; 537 } 538 539 job->backup_top = backup_top; 540 job->source_bs = bs; 541 job->target_bs = target; 542 job->on_source_error = on_source_error; 543 job->on_target_error = on_target_error; 544 job->sync_mode = sync_mode; 545 job->sync_bitmap = sync_bitmap; 546 job->bitmap_mode = bitmap_mode; 547 job->bcs = bcs; 548 job->cluster_size = cluster_size; 549 job->len = len; 550 job->perf = *perf; 551 552 block_copy_set_progress_meter(bcs, &job->common.job.progress); 553 block_copy_set_speed(bcs, speed); 554 555 /* Required permissions are already taken by backup-top target */ 556 block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, 557 &error_abort); 558 559 return &job->common; 560 561 error: 562 if (sync_bitmap) { 563 bdrv_reclaim_dirty_bitmap(sync_bitmap, NULL); 564 } 565 if (backup_top) { 566 bdrv_backup_top_drop(backup_top); 567 } 568 569 return NULL; 570 } 571