1 /* 2 * QEMU backup 3 * 4 * Copyright (C) 2013 Proxmox Server Solutions 5 * Copyright (c) 2019 Virtuozzo International GmbH. 6 * 7 * Authors: 8 * Dietmar Maurer (dietmar@proxmox.com) 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2 or later. 11 * See the COPYING file in the top-level directory. 12 * 13 */ 14 15 #include "qemu/osdep.h" 16 17 #include "trace.h" 18 #include "block/block.h" 19 #include "block/block_int.h" 20 #include "block/blockjob_int.h" 21 #include "block/block_backup.h" 22 #include "block/block-copy.h" 23 #include "qapi/error.h" 24 #include "qapi/qmp/qerror.h" 25 #include "qemu/cutils.h" 26 #include "sysemu/block-backend.h" 27 #include "qemu/bitmap.h" 28 #include "qemu/error-report.h" 29 30 #include "block/backup-top.h" 31 32 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) 33 34 typedef struct BackupBlockJob { 35 BlockJob common; 36 BlockDriverState *backup_top; 37 BlockDriverState *source_bs; 38 39 BdrvDirtyBitmap *sync_bitmap; 40 41 MirrorSyncMode sync_mode; 42 BitmapSyncMode bitmap_mode; 43 BlockdevOnError on_source_error; 44 BlockdevOnError on_target_error; 45 uint64_t len; 46 int64_t cluster_size; 47 BackupPerf perf; 48 49 BlockCopyState *bcs; 50 51 bool wait; 52 BlockCopyCallState *bg_bcs_call; 53 } BackupBlockJob; 54 55 static const BlockJobDriver backup_job_driver; 56 57 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) 58 { 59 BdrvDirtyBitmap *bm; 60 bool sync = (((ret == 0) || (job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS)) \ 61 && (job->bitmap_mode != BITMAP_SYNC_MODE_NEVER)); 62 63 if (sync) { 64 /* 65 * We succeeded, or we always intended to sync the bitmap. 66 * Delete this bitmap and install the child. 67 */ 68 bm = bdrv_dirty_bitmap_abdicate(job->sync_bitmap, NULL); 69 } else { 70 /* 71 * We failed, or we never intended to sync the bitmap anyway. 72 * Merge the successor back into the parent, keeping all data. 73 */ 74 bm = bdrv_reclaim_dirty_bitmap(job->sync_bitmap, NULL); 75 } 76 77 assert(bm); 78 79 if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) { 80 /* If we failed and synced, merge in the bits we didn't copy: */ 81 bdrv_dirty_bitmap_merge_internal(bm, block_copy_dirty_bitmap(job->bcs), 82 NULL, true); 83 } 84 } 85 86 static void backup_commit(Job *job) 87 { 88 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 89 if (s->sync_bitmap) { 90 backup_cleanup_sync_bitmap(s, 0); 91 } 92 } 93 94 static void backup_abort(Job *job) 95 { 96 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 97 if (s->sync_bitmap) { 98 backup_cleanup_sync_bitmap(s, -1); 99 } 100 } 101 102 static void backup_clean(Job *job) 103 { 104 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 105 bdrv_backup_top_drop(s->backup_top); 106 } 107 108 void backup_do_checkpoint(BlockJob *job, Error **errp) 109 { 110 BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); 111 112 assert(block_job_driver(job) == &backup_job_driver); 113 114 if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) { 115 error_setg(errp, "The backup job only supports block checkpoint in" 116 " sync=none mode"); 117 return; 118 } 119 120 bdrv_set_dirty_bitmap(block_copy_dirty_bitmap(backup_job->bcs), 0, 121 backup_job->len); 122 } 123 124 static BlockErrorAction backup_error_action(BackupBlockJob *job, 125 bool read, int error) 126 { 127 if (read) { 128 return block_job_error_action(&job->common, job->on_source_error, 129 true, error); 130 } else { 131 return block_job_error_action(&job->common, job->on_target_error, 132 false, error); 133 } 134 } 135 136 static void coroutine_fn backup_block_copy_callback(void *opaque) 137 { 138 BackupBlockJob *s = opaque; 139 140 if (s->wait) { 141 s->wait = false; 142 aio_co_wake(s->common.job.co); 143 } else { 144 job_enter(&s->common.job); 145 } 146 } 147 148 static int coroutine_fn backup_loop(BackupBlockJob *job) 149 { 150 BlockCopyCallState *s = NULL; 151 int ret = 0; 152 bool error_is_read; 153 BlockErrorAction act; 154 155 while (true) { /* retry loop */ 156 job->bg_bcs_call = s = block_copy_async(job->bcs, 0, 157 QEMU_ALIGN_UP(job->len, job->cluster_size), 158 job->perf.max_workers, job->perf.max_chunk, 159 backup_block_copy_callback, job); 160 161 while (!block_copy_call_finished(s) && 162 !job_is_cancelled(&job->common.job)) 163 { 164 job_yield(&job->common.job); 165 } 166 167 if (!block_copy_call_finished(s)) { 168 assert(job_is_cancelled(&job->common.job)); 169 /* 170 * Note that we can't use job_yield() here, as it doesn't work for 171 * cancelled job. 172 */ 173 block_copy_call_cancel(s); 174 job->wait = true; 175 qemu_coroutine_yield(); 176 assert(block_copy_call_finished(s)); 177 ret = 0; 178 goto out; 179 } 180 181 if (job_is_cancelled(&job->common.job) || 182 block_copy_call_succeeded(s)) 183 { 184 ret = 0; 185 goto out; 186 } 187 188 if (block_copy_call_cancelled(s)) { 189 /* 190 * Job is not cancelled but only block-copy call. This is possible 191 * after job pause. Now the pause is finished, start new block-copy 192 * iteration. 193 */ 194 block_copy_call_free(s); 195 continue; 196 } 197 198 /* The only remaining case is failed block-copy call. */ 199 assert(block_copy_call_failed(s)); 200 201 ret = block_copy_call_status(s, &error_is_read); 202 act = backup_error_action(job, error_is_read, -ret); 203 switch (act) { 204 case BLOCK_ERROR_ACTION_REPORT: 205 goto out; 206 case BLOCK_ERROR_ACTION_STOP: 207 /* 208 * Go to pause prior to starting new block-copy call on the next 209 * iteration. 210 */ 211 job_pause_point(&job->common.job); 212 break; 213 case BLOCK_ERROR_ACTION_IGNORE: 214 /* Proceed to new block-copy call to retry. */ 215 break; 216 default: 217 abort(); 218 } 219 220 block_copy_call_free(s); 221 } 222 223 out: 224 block_copy_call_free(s); 225 job->bg_bcs_call = NULL; 226 return ret; 227 } 228 229 static void backup_init_bcs_bitmap(BackupBlockJob *job) 230 { 231 bool ret; 232 uint64_t estimate; 233 BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs); 234 235 if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) { 236 ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap, 237 NULL, true); 238 assert(ret); 239 } else { 240 if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { 241 /* 242 * We can't hog the coroutine to initialize this thoroughly. 243 * Set a flag and resume work when we are able to yield safely. 244 */ 245 block_copy_set_skip_unallocated(job->bcs, true); 246 } 247 bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len); 248 } 249 250 estimate = bdrv_get_dirty_count(bcs_bitmap); 251 job_progress_set_remaining(&job->common.job, estimate); 252 } 253 254 static int coroutine_fn backup_run(Job *job, Error **errp) 255 { 256 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 257 int ret; 258 259 backup_init_bcs_bitmap(s); 260 261 if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { 262 int64_t offset = 0; 263 int64_t count; 264 265 for (offset = 0; offset < s->len; ) { 266 if (job_is_cancelled(job)) { 267 return -ECANCELED; 268 } 269 270 job_pause_point(job); 271 272 if (job_is_cancelled(job)) { 273 return -ECANCELED; 274 } 275 276 ret = block_copy_reset_unallocated(s->bcs, offset, &count); 277 if (ret < 0) { 278 return ret; 279 } 280 281 offset += count; 282 } 283 block_copy_set_skip_unallocated(s->bcs, false); 284 } 285 286 if (s->sync_mode == MIRROR_SYNC_MODE_NONE) { 287 /* 288 * All bits are set in bcs bitmap to allow any cluster to be copied. 289 * This does not actually require them to be copied. 290 */ 291 while (!job_is_cancelled(job)) { 292 /* 293 * Yield until the job is cancelled. We just let our before_write 294 * notify callback service CoW requests. 295 */ 296 job_yield(job); 297 } 298 } else { 299 return backup_loop(s); 300 } 301 302 return 0; 303 } 304 305 static void coroutine_fn backup_pause(Job *job) 306 { 307 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 308 309 if (s->bg_bcs_call && !block_copy_call_finished(s->bg_bcs_call)) { 310 block_copy_call_cancel(s->bg_bcs_call); 311 s->wait = true; 312 qemu_coroutine_yield(); 313 } 314 } 315 316 static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed) 317 { 318 BackupBlockJob *s = container_of(job, BackupBlockJob, common); 319 320 /* 321 * block_job_set_speed() is called first from block_job_create(), when we 322 * don't yet have s->bcs. 323 */ 324 if (s->bcs) { 325 block_copy_set_speed(s->bcs, speed); 326 if (s->bg_bcs_call) { 327 block_copy_kick(s->bg_bcs_call); 328 } 329 } 330 } 331 332 static const BlockJobDriver backup_job_driver = { 333 .job_driver = { 334 .instance_size = sizeof(BackupBlockJob), 335 .job_type = JOB_TYPE_BACKUP, 336 .free = block_job_free, 337 .user_resume = block_job_user_resume, 338 .run = backup_run, 339 .commit = backup_commit, 340 .abort = backup_abort, 341 .clean = backup_clean, 342 .pause = backup_pause, 343 }, 344 .set_speed = backup_set_speed, 345 }; 346 347 static int64_t backup_calculate_cluster_size(BlockDriverState *target, 348 Error **errp) 349 { 350 int ret; 351 BlockDriverInfo bdi; 352 bool target_does_cow = bdrv_backing_chain_next(target); 353 354 /* 355 * If there is no backing file on the target, we cannot rely on COW if our 356 * backup cluster size is smaller than the target cluster size. Even for 357 * targets with a backing file, try to avoid COW if possible. 358 */ 359 ret = bdrv_get_info(target, &bdi); 360 if (ret == -ENOTSUP && !target_does_cow) { 361 /* Cluster size is not defined */ 362 warn_report("The target block device doesn't provide " 363 "information about the block size and it doesn't have a " 364 "backing file. The default block size of %u bytes is " 365 "used. If the actual block size of the target exceeds " 366 "this default, the backup may be unusable", 367 BACKUP_CLUSTER_SIZE_DEFAULT); 368 return BACKUP_CLUSTER_SIZE_DEFAULT; 369 } else if (ret < 0 && !target_does_cow) { 370 error_setg_errno(errp, -ret, 371 "Couldn't determine the cluster size of the target image, " 372 "which has no backing file"); 373 error_append_hint(errp, 374 "Aborting, since this may create an unusable destination image\n"); 375 return ret; 376 } else if (ret < 0 && target_does_cow) { 377 /* Not fatal; just trudge on ahead. */ 378 return BACKUP_CLUSTER_SIZE_DEFAULT; 379 } 380 381 return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); 382 } 383 384 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, 385 BlockDriverState *target, int64_t speed, 386 MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, 387 BitmapSyncMode bitmap_mode, 388 bool compress, 389 const char *filter_node_name, 390 BackupPerf *perf, 391 BlockdevOnError on_source_error, 392 BlockdevOnError on_target_error, 393 int creation_flags, 394 BlockCompletionFunc *cb, void *opaque, 395 JobTxn *txn, Error **errp) 396 { 397 int64_t len, target_len; 398 BackupBlockJob *job = NULL; 399 int64_t cluster_size; 400 BdrvRequestFlags write_flags; 401 BlockDriverState *backup_top = NULL; 402 BlockCopyState *bcs = NULL; 403 404 assert(bs); 405 assert(target); 406 407 /* QMP interface protects us from these cases */ 408 assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); 409 assert(sync_bitmap || sync_mode != MIRROR_SYNC_MODE_BITMAP); 410 411 if (bs == target) { 412 error_setg(errp, "Source and target cannot be the same"); 413 return NULL; 414 } 415 416 if (!bdrv_is_inserted(bs)) { 417 error_setg(errp, "Device is not inserted: %s", 418 bdrv_get_device_name(bs)); 419 return NULL; 420 } 421 422 if (!bdrv_is_inserted(target)) { 423 error_setg(errp, "Device is not inserted: %s", 424 bdrv_get_device_name(target)); 425 return NULL; 426 } 427 428 if (compress && !bdrv_supports_compressed_writes(target)) { 429 error_setg(errp, "Compression is not supported for this drive %s", 430 bdrv_get_device_name(target)); 431 return NULL; 432 } 433 434 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { 435 return NULL; 436 } 437 438 if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) { 439 return NULL; 440 } 441 442 cluster_size = backup_calculate_cluster_size(target, errp); 443 if (cluster_size < 0) { 444 goto error; 445 } 446 447 if (perf->max_workers < 1) { 448 error_setg(errp, "max-workers must be greater than zero"); 449 return NULL; 450 } 451 452 if (perf->max_chunk < 0) { 453 error_setg(errp, "max-chunk must be zero (which means no limit) or " 454 "positive"); 455 return NULL; 456 } 457 458 if (perf->max_chunk && perf->max_chunk < cluster_size) { 459 error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup " 460 "cluster size (%" PRIi64 ")", perf->max_chunk, cluster_size); 461 return NULL; 462 } 463 464 465 if (sync_bitmap) { 466 /* If we need to write to this bitmap, check that we can: */ 467 if (bitmap_mode != BITMAP_SYNC_MODE_NEVER && 468 bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) { 469 return NULL; 470 } 471 472 /* Create a new bitmap, and freeze/disable this one. */ 473 if (bdrv_dirty_bitmap_create_successor(sync_bitmap, errp) < 0) { 474 return NULL; 475 } 476 } 477 478 len = bdrv_getlength(bs); 479 if (len < 0) { 480 error_setg_errno(errp, -len, "Unable to get length for '%s'", 481 bdrv_get_device_or_node_name(bs)); 482 goto error; 483 } 484 485 target_len = bdrv_getlength(target); 486 if (target_len < 0) { 487 error_setg_errno(errp, -target_len, "Unable to get length for '%s'", 488 bdrv_get_device_or_node_name(bs)); 489 goto error; 490 } 491 492 if (target_len != len) { 493 error_setg(errp, "Source and target image have different sizes"); 494 goto error; 495 } 496 497 /* 498 * If source is in backing chain of target assume that target is going to be 499 * used for "image fleecing", i.e. it should represent a kind of snapshot of 500 * source at backup-start point in time. And target is going to be read by 501 * somebody (for example, used as NBD export) during backup job. 502 * 503 * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid 504 * intersection of backup writes and third party reads from target, 505 * otherwise reading from target we may occasionally read already updated by 506 * guest data. 507 * 508 * For more information see commit f8d59dfb40bb and test 509 * tests/qemu-iotests/222 510 */ 511 write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) | 512 (compress ? BDRV_REQ_WRITE_COMPRESSED : 0), 513 514 backup_top = bdrv_backup_top_append(bs, target, filter_node_name, 515 cluster_size, perf, 516 write_flags, &bcs, errp); 517 if (!backup_top) { 518 goto error; 519 } 520 521 /* job->len is fixed, so we can't allow resize */ 522 job = block_job_create(job_id, &backup_job_driver, txn, backup_top, 523 0, BLK_PERM_ALL, 524 speed, creation_flags, cb, opaque, errp); 525 if (!job) { 526 goto error; 527 } 528 529 job->backup_top = backup_top; 530 job->source_bs = bs; 531 job->on_source_error = on_source_error; 532 job->on_target_error = on_target_error; 533 job->sync_mode = sync_mode; 534 job->sync_bitmap = sync_bitmap; 535 job->bitmap_mode = bitmap_mode; 536 job->bcs = bcs; 537 job->cluster_size = cluster_size; 538 job->len = len; 539 job->perf = *perf; 540 541 block_copy_set_progress_meter(bcs, &job->common.job.progress); 542 block_copy_set_speed(bcs, speed); 543 544 /* Required permissions are already taken by backup-top target */ 545 block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, 546 &error_abort); 547 548 return &job->common; 549 550 error: 551 if (sync_bitmap) { 552 bdrv_reclaim_dirty_bitmap(sync_bitmap, NULL); 553 } 554 if (backup_top) { 555 bdrv_backup_top_drop(backup_top); 556 } 557 558 return NULL; 559 } 560