1 /* 2 * QEMU backup 3 * 4 * Copyright (C) 2013 Proxmox Server Solutions 5 * Copyright (c) 2019 Virtuozzo International GmbH. 6 * 7 * Authors: 8 * Dietmar Maurer (dietmar@proxmox.com) 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2 or later. 11 * See the COPYING file in the top-level directory. 12 * 13 */ 14 15 #include "qemu/osdep.h" 16 17 #include "trace.h" 18 #include "block/block.h" 19 #include "block/block_int.h" 20 #include "block/blockjob_int.h" 21 #include "block/block_backup.h" 22 #include "block/block-copy.h" 23 #include "qapi/error.h" 24 #include "qapi/qmp/qerror.h" 25 #include "qemu/ratelimit.h" 26 #include "qemu/cutils.h" 27 #include "sysemu/block-backend.h" 28 #include "qemu/bitmap.h" 29 #include "qemu/error-report.h" 30 31 #include "block/backup-top.h" 32 33 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) 34 35 typedef struct BackupBlockJob { 36 BlockJob common; 37 BlockDriverState *backup_top; 38 BlockDriverState *source_bs; 39 40 BdrvDirtyBitmap *sync_bitmap; 41 42 MirrorSyncMode sync_mode; 43 BitmapSyncMode bitmap_mode; 44 BlockdevOnError on_source_error; 45 BlockdevOnError on_target_error; 46 uint64_t len; 47 uint64_t bytes_read; 48 int64_t cluster_size; 49 50 BlockCopyState *bcs; 51 } BackupBlockJob; 52 53 static const BlockJobDriver backup_job_driver; 54 55 static void backup_progress_bytes_callback(int64_t bytes, void *opaque) 56 { 57 BackupBlockJob *s = opaque; 58 59 s->bytes_read += bytes; 60 job_progress_update(&s->common.job, bytes); 61 } 62 63 static void backup_progress_reset_callback(void *opaque) 64 { 65 BackupBlockJob *s = opaque; 66 uint64_t estimate = bdrv_get_dirty_count(s->bcs->copy_bitmap); 67 68 job_progress_set_remaining(&s->common.job, estimate); 69 } 70 71 static int coroutine_fn backup_do_cow(BackupBlockJob *job, 72 int64_t offset, uint64_t bytes, 73 bool *error_is_read) 74 { 75 int ret = 0; 76 int64_t start, end; /* bytes */ 77 78 start = QEMU_ALIGN_DOWN(offset, job->cluster_size); 79 end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size); 80 81 trace_backup_do_cow_enter(job, start, offset, bytes); 82 83 ret = block_copy(job->bcs, start, end - start, error_is_read); 84 85 trace_backup_do_cow_return(job, offset, bytes, ret); 86 87 return ret; 88 } 89 90 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) 91 { 92 BdrvDirtyBitmap *bm; 93 bool sync = (((ret == 0) || (job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS)) \ 94 && (job->bitmap_mode != BITMAP_SYNC_MODE_NEVER)); 95 96 if (sync) { 97 /* 98 * We succeeded, or we always intended to sync the bitmap. 99 * Delete this bitmap and install the child. 100 */ 101 bm = bdrv_dirty_bitmap_abdicate(job->sync_bitmap, NULL); 102 } else { 103 /* 104 * We failed, or we never intended to sync the bitmap anyway. 105 * Merge the successor back into the parent, keeping all data. 106 */ 107 bm = bdrv_reclaim_dirty_bitmap(job->sync_bitmap, NULL); 108 } 109 110 assert(bm); 111 112 if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) { 113 /* If we failed and synced, merge in the bits we didn't copy: */ 114 bdrv_dirty_bitmap_merge_internal(bm, job->bcs->copy_bitmap, 115 NULL, true); 116 } 117 } 118 119 static void backup_commit(Job *job) 120 { 121 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 122 if (s->sync_bitmap) { 123 backup_cleanup_sync_bitmap(s, 0); 124 } 125 } 126 127 static void backup_abort(Job *job) 128 { 129 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 130 if (s->sync_bitmap) { 131 backup_cleanup_sync_bitmap(s, -1); 132 } 133 } 134 135 static void backup_clean(Job *job) 136 { 137 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 138 139 bdrv_backup_top_drop(s->backup_top); 140 } 141 142 void backup_do_checkpoint(BlockJob *job, Error **errp) 143 { 144 BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common); 145 146 assert(block_job_driver(job) == &backup_job_driver); 147 148 if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) { 149 error_setg(errp, "The backup job only supports block checkpoint in" 150 " sync=none mode"); 151 return; 152 } 153 154 bdrv_set_dirty_bitmap(backup_job->bcs->copy_bitmap, 0, backup_job->len); 155 } 156 157 static BlockErrorAction backup_error_action(BackupBlockJob *job, 158 bool read, int error) 159 { 160 if (read) { 161 return block_job_error_action(&job->common, job->on_source_error, 162 true, error); 163 } else { 164 return block_job_error_action(&job->common, job->on_target_error, 165 false, error); 166 } 167 } 168 169 static bool coroutine_fn yield_and_check(BackupBlockJob *job) 170 { 171 uint64_t delay_ns; 172 173 if (job_is_cancelled(&job->common.job)) { 174 return true; 175 } 176 177 /* 178 * We need to yield even for delay_ns = 0 so that bdrv_drain_all() can 179 * return. Without a yield, the VM would not reboot. 180 */ 181 delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read); 182 job->bytes_read = 0; 183 job_sleep_ns(&job->common.job, delay_ns); 184 185 if (job_is_cancelled(&job->common.job)) { 186 return true; 187 } 188 189 return false; 190 } 191 192 static int coroutine_fn backup_loop(BackupBlockJob *job) 193 { 194 bool error_is_read; 195 int64_t offset; 196 BdrvDirtyBitmapIter *bdbi; 197 int ret = 0; 198 199 bdbi = bdrv_dirty_iter_new(job->bcs->copy_bitmap); 200 while ((offset = bdrv_dirty_iter_next(bdbi)) != -1) { 201 do { 202 if (yield_and_check(job)) { 203 goto out; 204 } 205 ret = backup_do_cow(job, offset, job->cluster_size, &error_is_read); 206 if (ret < 0 && backup_error_action(job, error_is_read, -ret) == 207 BLOCK_ERROR_ACTION_REPORT) 208 { 209 goto out; 210 } 211 } while (ret < 0); 212 } 213 214 out: 215 bdrv_dirty_iter_free(bdbi); 216 return ret; 217 } 218 219 static void backup_init_copy_bitmap(BackupBlockJob *job) 220 { 221 bool ret; 222 uint64_t estimate; 223 224 if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) { 225 ret = bdrv_dirty_bitmap_merge_internal(job->bcs->copy_bitmap, 226 job->sync_bitmap, 227 NULL, true); 228 assert(ret); 229 } else { 230 if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { 231 /* 232 * We can't hog the coroutine to initialize this thoroughly. 233 * Set a flag and resume work when we are able to yield safely. 234 */ 235 job->bcs->skip_unallocated = true; 236 } 237 bdrv_set_dirty_bitmap(job->bcs->copy_bitmap, 0, job->len); 238 } 239 240 estimate = bdrv_get_dirty_count(job->bcs->copy_bitmap); 241 job_progress_set_remaining(&job->common.job, estimate); 242 } 243 244 static int coroutine_fn backup_run(Job *job, Error **errp) 245 { 246 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); 247 int ret = 0; 248 249 backup_init_copy_bitmap(s); 250 251 if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { 252 int64_t offset = 0; 253 int64_t count; 254 255 for (offset = 0; offset < s->len; ) { 256 if (yield_and_check(s)) { 257 ret = -ECANCELED; 258 goto out; 259 } 260 261 ret = block_copy_reset_unallocated(s->bcs, offset, &count); 262 if (ret < 0) { 263 goto out; 264 } 265 266 offset += count; 267 } 268 s->bcs->skip_unallocated = false; 269 } 270 271 if (s->sync_mode == MIRROR_SYNC_MODE_NONE) { 272 /* 273 * All bits are set in copy_bitmap to allow any cluster to be copied. 274 * This does not actually require them to be copied. 275 */ 276 while (!job_is_cancelled(job)) { 277 /* 278 * Yield until the job is cancelled. We just let our before_write 279 * notify callback service CoW requests. 280 */ 281 job_yield(job); 282 } 283 } else { 284 ret = backup_loop(s); 285 } 286 287 out: 288 return ret; 289 } 290 291 static const BlockJobDriver backup_job_driver = { 292 .job_driver = { 293 .instance_size = sizeof(BackupBlockJob), 294 .job_type = JOB_TYPE_BACKUP, 295 .free = block_job_free, 296 .user_resume = block_job_user_resume, 297 .run = backup_run, 298 .commit = backup_commit, 299 .abort = backup_abort, 300 .clean = backup_clean, 301 } 302 }; 303 304 static int64_t backup_calculate_cluster_size(BlockDriverState *target, 305 Error **errp) 306 { 307 int ret; 308 BlockDriverInfo bdi; 309 310 /* 311 * If there is no backing file on the target, we cannot rely on COW if our 312 * backup cluster size is smaller than the target cluster size. Even for 313 * targets with a backing file, try to avoid COW if possible. 314 */ 315 ret = bdrv_get_info(target, &bdi); 316 if (ret == -ENOTSUP && !target->backing) { 317 /* Cluster size is not defined */ 318 warn_report("The target block device doesn't provide " 319 "information about the block size and it doesn't have a " 320 "backing file. The default block size of %u bytes is " 321 "used. If the actual block size of the target exceeds " 322 "this default, the backup may be unusable", 323 BACKUP_CLUSTER_SIZE_DEFAULT); 324 return BACKUP_CLUSTER_SIZE_DEFAULT; 325 } else if (ret < 0 && !target->backing) { 326 error_setg_errno(errp, -ret, 327 "Couldn't determine the cluster size of the target image, " 328 "which has no backing file"); 329 error_append_hint(errp, 330 "Aborting, since this may create an unusable destination image\n"); 331 return ret; 332 } else if (ret < 0 && target->backing) { 333 /* Not fatal; just trudge on ahead. */ 334 return BACKUP_CLUSTER_SIZE_DEFAULT; 335 } 336 337 return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); 338 } 339 340 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, 341 BlockDriverState *target, int64_t speed, 342 MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, 343 BitmapSyncMode bitmap_mode, 344 bool compress, 345 const char *filter_node_name, 346 BlockdevOnError on_source_error, 347 BlockdevOnError on_target_error, 348 int creation_flags, 349 BlockCompletionFunc *cb, void *opaque, 350 JobTxn *txn, Error **errp) 351 { 352 int64_t len; 353 BackupBlockJob *job = NULL; 354 int64_t cluster_size; 355 BdrvRequestFlags write_flags; 356 BlockDriverState *backup_top = NULL; 357 BlockCopyState *bcs = NULL; 358 359 assert(bs); 360 assert(target); 361 362 /* QMP interface protects us from these cases */ 363 assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); 364 assert(sync_bitmap || sync_mode != MIRROR_SYNC_MODE_BITMAP); 365 366 if (bs == target) { 367 error_setg(errp, "Source and target cannot be the same"); 368 return NULL; 369 } 370 371 if (!bdrv_is_inserted(bs)) { 372 error_setg(errp, "Device is not inserted: %s", 373 bdrv_get_device_name(bs)); 374 return NULL; 375 } 376 377 if (!bdrv_is_inserted(target)) { 378 error_setg(errp, "Device is not inserted: %s", 379 bdrv_get_device_name(target)); 380 return NULL; 381 } 382 383 if (compress && !block_driver_can_compress(target->drv)) { 384 error_setg(errp, "Compression is not supported for this drive %s", 385 bdrv_get_device_name(target)); 386 return NULL; 387 } 388 389 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { 390 return NULL; 391 } 392 393 if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) { 394 return NULL; 395 } 396 397 if (sync_bitmap) { 398 /* If we need to write to this bitmap, check that we can: */ 399 if (bitmap_mode != BITMAP_SYNC_MODE_NEVER && 400 bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) { 401 return NULL; 402 } 403 404 /* Create a new bitmap, and freeze/disable this one. */ 405 if (bdrv_dirty_bitmap_create_successor(sync_bitmap, errp) < 0) { 406 return NULL; 407 } 408 } 409 410 len = bdrv_getlength(bs); 411 if (len < 0) { 412 error_setg_errno(errp, -len, "unable to get length for '%s'", 413 bdrv_get_device_name(bs)); 414 goto error; 415 } 416 417 cluster_size = backup_calculate_cluster_size(target, errp); 418 if (cluster_size < 0) { 419 goto error; 420 } 421 422 /* 423 * If source is in backing chain of target assume that target is going to be 424 * used for "image fleecing", i.e. it should represent a kind of snapshot of 425 * source at backup-start point in time. And target is going to be read by 426 * somebody (for example, used as NBD export) during backup job. 427 * 428 * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid 429 * intersection of backup writes and third party reads from target, 430 * otherwise reading from target we may occasionally read already updated by 431 * guest data. 432 * 433 * For more information see commit f8d59dfb40bb and test 434 * tests/qemu-iotests/222 435 */ 436 write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) | 437 (compress ? BDRV_REQ_WRITE_COMPRESSED : 0), 438 439 backup_top = bdrv_backup_top_append(bs, target, filter_node_name, 440 cluster_size, write_flags, &bcs, errp); 441 if (!backup_top) { 442 goto error; 443 } 444 445 /* job->len is fixed, so we can't allow resize */ 446 job = block_job_create(job_id, &backup_job_driver, txn, backup_top, 447 0, BLK_PERM_ALL, 448 speed, creation_flags, cb, opaque, errp); 449 if (!job) { 450 goto error; 451 } 452 453 job->backup_top = backup_top; 454 job->source_bs = bs; 455 job->on_source_error = on_source_error; 456 job->on_target_error = on_target_error; 457 job->sync_mode = sync_mode; 458 job->sync_bitmap = sync_bitmap; 459 job->bitmap_mode = bitmap_mode; 460 job->bcs = bcs; 461 job->cluster_size = cluster_size; 462 job->len = len; 463 464 block_copy_set_callbacks(bcs, backup_progress_bytes_callback, 465 backup_progress_reset_callback, job); 466 467 /* Required permissions are already taken by backup-top target */ 468 block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, 469 &error_abort); 470 471 return &job->common; 472 473 error: 474 if (sync_bitmap) { 475 bdrv_reclaim_dirty_bitmap(sync_bitmap, NULL); 476 } 477 if (backup_top) { 478 bdrv_backup_top_drop(backup_top); 479 } 480 481 return NULL; 482 } 483