xref: /openbmc/qemu/block/backup.c (revision ed5abf46)
1 /*
2  * QEMU backup
3  *
4  * Copyright (C) 2013 Proxmox Server Solutions
5  * Copyright (c) 2019 Virtuozzo International GmbH.
6  *
7  * Authors:
8  *  Dietmar Maurer (dietmar@proxmox.com)
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  *
13  */
14 
15 #include "qemu/osdep.h"
16 
17 #include "trace.h"
18 #include "block/block.h"
19 #include "block/block_int.h"
20 #include "block/blockjob_int.h"
21 #include "block/block_backup.h"
22 #include "block/block-copy.h"
23 #include "qapi/error.h"
24 #include "qapi/qmp/qerror.h"
25 #include "qemu/ratelimit.h"
26 #include "qemu/cutils.h"
27 #include "sysemu/block-backend.h"
28 #include "qemu/bitmap.h"
29 #include "qemu/error-report.h"
30 
31 #include "block/backup-top.h"
32 
33 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
34 
35 typedef struct BackupBlockJob {
36     BlockJob common;
37     BlockDriverState *backup_top;
38     BlockDriverState *source_bs;
39 
40     BdrvDirtyBitmap *sync_bitmap;
41 
42     MirrorSyncMode sync_mode;
43     BitmapSyncMode bitmap_mode;
44     BlockdevOnError on_source_error;
45     BlockdevOnError on_target_error;
46     uint64_t len;
47     uint64_t bytes_read;
48     int64_t cluster_size;
49 
50     BlockCopyState *bcs;
51 } BackupBlockJob;
52 
53 static const BlockJobDriver backup_job_driver;
54 
55 static void backup_progress_bytes_callback(int64_t bytes, void *opaque)
56 {
57     BackupBlockJob *s = opaque;
58 
59     s->bytes_read += bytes;
60 }
61 
62 static int coroutine_fn backup_do_cow(BackupBlockJob *job,
63                                       int64_t offset, uint64_t bytes,
64                                       bool *error_is_read)
65 {
66     int ret = 0;
67     int64_t start, end; /* bytes */
68 
69     start = QEMU_ALIGN_DOWN(offset, job->cluster_size);
70     end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size);
71 
72     trace_backup_do_cow_enter(job, start, offset, bytes);
73 
74     ret = block_copy(job->bcs, start, end - start, error_is_read);
75 
76     trace_backup_do_cow_return(job, offset, bytes, ret);
77 
78     return ret;
79 }
80 
81 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
82 {
83     BdrvDirtyBitmap *bm;
84     bool sync = (((ret == 0) || (job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS)) \
85                  && (job->bitmap_mode != BITMAP_SYNC_MODE_NEVER));
86 
87     if (sync) {
88         /*
89          * We succeeded, or we always intended to sync the bitmap.
90          * Delete this bitmap and install the child.
91          */
92         bm = bdrv_dirty_bitmap_abdicate(job->sync_bitmap, NULL);
93     } else {
94         /*
95          * We failed, or we never intended to sync the bitmap anyway.
96          * Merge the successor back into the parent, keeping all data.
97          */
98         bm = bdrv_reclaim_dirty_bitmap(job->sync_bitmap, NULL);
99     }
100 
101     assert(bm);
102 
103     if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) {
104         /* If we failed and synced, merge in the bits we didn't copy: */
105         bdrv_dirty_bitmap_merge_internal(bm, block_copy_dirty_bitmap(job->bcs),
106                                          NULL, true);
107     }
108 }
109 
110 static void backup_commit(Job *job)
111 {
112     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
113     if (s->sync_bitmap) {
114         backup_cleanup_sync_bitmap(s, 0);
115     }
116 }
117 
118 static void backup_abort(Job *job)
119 {
120     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
121     if (s->sync_bitmap) {
122         backup_cleanup_sync_bitmap(s, -1);
123     }
124 }
125 
126 static void backup_clean(Job *job)
127 {
128     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
129     AioContext *aio_context = bdrv_get_aio_context(s->backup_top);
130 
131     aio_context_acquire(aio_context);
132     bdrv_backup_top_drop(s->backup_top);
133     aio_context_release(aio_context);
134 }
135 
136 void backup_do_checkpoint(BlockJob *job, Error **errp)
137 {
138     BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
139 
140     assert(block_job_driver(job) == &backup_job_driver);
141 
142     if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
143         error_setg(errp, "The backup job only supports block checkpoint in"
144                    " sync=none mode");
145         return;
146     }
147 
148     bdrv_set_dirty_bitmap(block_copy_dirty_bitmap(backup_job->bcs), 0,
149                           backup_job->len);
150 }
151 
152 static BlockErrorAction backup_error_action(BackupBlockJob *job,
153                                             bool read, int error)
154 {
155     if (read) {
156         return block_job_error_action(&job->common, job->on_source_error,
157                                       true, error);
158     } else {
159         return block_job_error_action(&job->common, job->on_target_error,
160                                       false, error);
161     }
162 }
163 
164 static bool coroutine_fn yield_and_check(BackupBlockJob *job)
165 {
166     uint64_t delay_ns;
167 
168     if (job_is_cancelled(&job->common.job)) {
169         return true;
170     }
171 
172     /*
173      * We need to yield even for delay_ns = 0 so that bdrv_drain_all() can
174      * return. Without a yield, the VM would not reboot.
175      */
176     delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read);
177     job->bytes_read = 0;
178     job_sleep_ns(&job->common.job, delay_ns);
179 
180     if (job_is_cancelled(&job->common.job)) {
181         return true;
182     }
183 
184     return false;
185 }
186 
187 static int coroutine_fn backup_loop(BackupBlockJob *job)
188 {
189     bool error_is_read;
190     int64_t offset;
191     BdrvDirtyBitmapIter *bdbi;
192     int ret = 0;
193 
194     bdbi = bdrv_dirty_iter_new(block_copy_dirty_bitmap(job->bcs));
195     while ((offset = bdrv_dirty_iter_next(bdbi)) != -1) {
196         do {
197             if (yield_and_check(job)) {
198                 goto out;
199             }
200             ret = backup_do_cow(job, offset, job->cluster_size, &error_is_read);
201             if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
202                            BLOCK_ERROR_ACTION_REPORT)
203             {
204                 goto out;
205             }
206         } while (ret < 0);
207     }
208 
209  out:
210     bdrv_dirty_iter_free(bdbi);
211     return ret;
212 }
213 
214 static void backup_init_bcs_bitmap(BackupBlockJob *job)
215 {
216     bool ret;
217     uint64_t estimate;
218     BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs);
219 
220     if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
221         ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap,
222                                                NULL, true);
223         assert(ret);
224     } else {
225         if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
226             /*
227              * We can't hog the coroutine to initialize this thoroughly.
228              * Set a flag and resume work when we are able to yield safely.
229              */
230             block_copy_set_skip_unallocated(job->bcs, true);
231         }
232         bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len);
233     }
234 
235     estimate = bdrv_get_dirty_count(bcs_bitmap);
236     job_progress_set_remaining(&job->common.job, estimate);
237 }
238 
239 static int coroutine_fn backup_run(Job *job, Error **errp)
240 {
241     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
242     int ret = 0;
243 
244     backup_init_bcs_bitmap(s);
245 
246     if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
247         int64_t offset = 0;
248         int64_t count;
249 
250         for (offset = 0; offset < s->len; ) {
251             if (yield_and_check(s)) {
252                 ret = -ECANCELED;
253                 goto out;
254             }
255 
256             ret = block_copy_reset_unallocated(s->bcs, offset, &count);
257             if (ret < 0) {
258                 goto out;
259             }
260 
261             offset += count;
262         }
263         block_copy_set_skip_unallocated(s->bcs, false);
264     }
265 
266     if (s->sync_mode == MIRROR_SYNC_MODE_NONE) {
267         /*
268          * All bits are set in bcs bitmap to allow any cluster to be copied.
269          * This does not actually require them to be copied.
270          */
271         while (!job_is_cancelled(job)) {
272             /*
273              * Yield until the job is cancelled.  We just let our before_write
274              * notify callback service CoW requests.
275              */
276             job_yield(job);
277         }
278     } else {
279         ret = backup_loop(s);
280     }
281 
282  out:
283     return ret;
284 }
285 
286 static const BlockJobDriver backup_job_driver = {
287     .job_driver = {
288         .instance_size          = sizeof(BackupBlockJob),
289         .job_type               = JOB_TYPE_BACKUP,
290         .free                   = block_job_free,
291         .user_resume            = block_job_user_resume,
292         .run                    = backup_run,
293         .commit                 = backup_commit,
294         .abort                  = backup_abort,
295         .clean                  = backup_clean,
296     }
297 };
298 
299 static int64_t backup_calculate_cluster_size(BlockDriverState *target,
300                                              Error **errp)
301 {
302     int ret;
303     BlockDriverInfo bdi;
304 
305     /*
306      * If there is no backing file on the target, we cannot rely on COW if our
307      * backup cluster size is smaller than the target cluster size. Even for
308      * targets with a backing file, try to avoid COW if possible.
309      */
310     ret = bdrv_get_info(target, &bdi);
311     if (ret == -ENOTSUP && !target->backing) {
312         /* Cluster size is not defined */
313         warn_report("The target block device doesn't provide "
314                     "information about the block size and it doesn't have a "
315                     "backing file. The default block size of %u bytes is "
316                     "used. If the actual block size of the target exceeds "
317                     "this default, the backup may be unusable",
318                     BACKUP_CLUSTER_SIZE_DEFAULT);
319         return BACKUP_CLUSTER_SIZE_DEFAULT;
320     } else if (ret < 0 && !target->backing) {
321         error_setg_errno(errp, -ret,
322             "Couldn't determine the cluster size of the target image, "
323             "which has no backing file");
324         error_append_hint(errp,
325             "Aborting, since this may create an unusable destination image\n");
326         return ret;
327     } else if (ret < 0 && target->backing) {
328         /* Not fatal; just trudge on ahead. */
329         return BACKUP_CLUSTER_SIZE_DEFAULT;
330     }
331 
332     return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
333 }
334 
335 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
336                   BlockDriverState *target, int64_t speed,
337                   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
338                   BitmapSyncMode bitmap_mode,
339                   bool compress,
340                   const char *filter_node_name,
341                   BlockdevOnError on_source_error,
342                   BlockdevOnError on_target_error,
343                   int creation_flags,
344                   BlockCompletionFunc *cb, void *opaque,
345                   JobTxn *txn, Error **errp)
346 {
347     int64_t len;
348     BackupBlockJob *job = NULL;
349     int64_t cluster_size;
350     BdrvRequestFlags write_flags;
351     BlockDriverState *backup_top = NULL;
352     BlockCopyState *bcs = NULL;
353 
354     assert(bs);
355     assert(target);
356 
357     /* QMP interface protects us from these cases */
358     assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL);
359     assert(sync_bitmap || sync_mode != MIRROR_SYNC_MODE_BITMAP);
360 
361     if (bs == target) {
362         error_setg(errp, "Source and target cannot be the same");
363         return NULL;
364     }
365 
366     if (!bdrv_is_inserted(bs)) {
367         error_setg(errp, "Device is not inserted: %s",
368                    bdrv_get_device_name(bs));
369         return NULL;
370     }
371 
372     if (!bdrv_is_inserted(target)) {
373         error_setg(errp, "Device is not inserted: %s",
374                    bdrv_get_device_name(target));
375         return NULL;
376     }
377 
378     if (compress && !block_driver_can_compress(target->drv)) {
379         error_setg(errp, "Compression is not supported for this drive %s",
380                    bdrv_get_device_name(target));
381         return NULL;
382     }
383 
384     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
385         return NULL;
386     }
387 
388     if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
389         return NULL;
390     }
391 
392     if (sync_bitmap) {
393         /* If we need to write to this bitmap, check that we can: */
394         if (bitmap_mode != BITMAP_SYNC_MODE_NEVER &&
395             bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) {
396             return NULL;
397         }
398 
399         /* Create a new bitmap, and freeze/disable this one. */
400         if (bdrv_dirty_bitmap_create_successor(sync_bitmap, errp) < 0) {
401             return NULL;
402         }
403     }
404 
405     len = bdrv_getlength(bs);
406     if (len < 0) {
407         error_setg_errno(errp, -len, "unable to get length for '%s'",
408                          bdrv_get_device_name(bs));
409         goto error;
410     }
411 
412     cluster_size = backup_calculate_cluster_size(target, errp);
413     if (cluster_size < 0) {
414         goto error;
415     }
416 
417     /*
418      * If source is in backing chain of target assume that target is going to be
419      * used for "image fleecing", i.e. it should represent a kind of snapshot of
420      * source at backup-start point in time. And target is going to be read by
421      * somebody (for example, used as NBD export) during backup job.
422      *
423      * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
424      * intersection of backup writes and third party reads from target,
425      * otherwise reading from target we may occasionally read already updated by
426      * guest data.
427      *
428      * For more information see commit f8d59dfb40bb and test
429      * tests/qemu-iotests/222
430      */
431     write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) |
432                   (compress ? BDRV_REQ_WRITE_COMPRESSED : 0),
433 
434     backup_top = bdrv_backup_top_append(bs, target, filter_node_name,
435                                         cluster_size, write_flags, &bcs, errp);
436     if (!backup_top) {
437         goto error;
438     }
439 
440     /* job->len is fixed, so we can't allow resize */
441     job = block_job_create(job_id, &backup_job_driver, txn, backup_top,
442                            0, BLK_PERM_ALL,
443                            speed, creation_flags, cb, opaque, errp);
444     if (!job) {
445         goto error;
446     }
447 
448     job->backup_top = backup_top;
449     job->source_bs = bs;
450     job->on_source_error = on_source_error;
451     job->on_target_error = on_target_error;
452     job->sync_mode = sync_mode;
453     job->sync_bitmap = sync_bitmap;
454     job->bitmap_mode = bitmap_mode;
455     job->bcs = bcs;
456     job->cluster_size = cluster_size;
457     job->len = len;
458 
459     block_copy_set_progress_callback(bcs, backup_progress_bytes_callback, job);
460     block_copy_set_progress_meter(bcs, &job->common.job.progress);
461 
462     /* Required permissions are already taken by backup-top target */
463     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
464                        &error_abort);
465 
466     return &job->common;
467 
468  error:
469     if (sync_bitmap) {
470         bdrv_reclaim_dirty_bitmap(sync_bitmap, NULL);
471     }
472     if (backup_top) {
473         bdrv_backup_top_drop(backup_top);
474     }
475 
476     return NULL;
477 }
478