xref: /openbmc/qemu/block/mirror.c (revision 1de7afc9)
1 /*
2  * Image mirroring
3  *
4  * Copyright Red Hat, Inc. 2012
5  *
6  * Authors:
7  *  Paolo Bonzini  <pbonzini@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include "trace.h"
15 #include "block/blockjob.h"
16 #include "block/block_int.h"
17 #include "qemu/ratelimit.h"
18 
19 enum {
20     /*
21      * Size of data buffer for populating the image file.  This should be large
22      * enough to process multiple clusters in a single call, so that populating
23      * contiguous regions of the image is efficient.
24      */
25     BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
26 };
27 
28 #define SLICE_TIME 100000000ULL /* ns */
29 
30 typedef struct MirrorBlockJob {
31     BlockJob common;
32     RateLimit limit;
33     BlockDriverState *target;
34     MirrorSyncMode mode;
35     BlockdevOnError on_source_error, on_target_error;
36     bool synced;
37     bool should_complete;
38     int64_t sector_num;
39     uint8_t *buf;
40 } MirrorBlockJob;
41 
42 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
43                                             int error)
44 {
45     s->synced = false;
46     if (read) {
47         return block_job_error_action(&s->common, s->common.bs,
48                                       s->on_source_error, true, error);
49     } else {
50         return block_job_error_action(&s->common, s->target,
51                                       s->on_target_error, false, error);
52     }
53 }
54 
55 static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
56                                          BlockErrorAction *p_action)
57 {
58     BlockDriverState *source = s->common.bs;
59     BlockDriverState *target = s->target;
60     QEMUIOVector qiov;
61     int ret, nb_sectors;
62     int64_t end;
63     struct iovec iov;
64 
65     end = s->common.len >> BDRV_SECTOR_BITS;
66     s->sector_num = bdrv_get_next_dirty(source, s->sector_num);
67     nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
68     bdrv_reset_dirty(source, s->sector_num, nb_sectors);
69 
70     /* Copy the dirty cluster.  */
71     iov.iov_base = s->buf;
72     iov.iov_len  = nb_sectors * 512;
73     qemu_iovec_init_external(&qiov, &iov, 1);
74 
75     trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
76     ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
77     if (ret < 0) {
78         *p_action = mirror_error_action(s, true, -ret);
79         goto fail;
80     }
81     ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
82     if (ret < 0) {
83         *p_action = mirror_error_action(s, false, -ret);
84         s->synced = false;
85         goto fail;
86     }
87     return 0;
88 
89 fail:
90     /* Try again later.  */
91     bdrv_set_dirty(source, s->sector_num, nb_sectors);
92     return ret;
93 }
94 
95 static void coroutine_fn mirror_run(void *opaque)
96 {
97     MirrorBlockJob *s = opaque;
98     BlockDriverState *bs = s->common.bs;
99     int64_t sector_num, end;
100     int ret = 0;
101     int n;
102 
103     if (block_job_is_cancelled(&s->common)) {
104         goto immediate_exit;
105     }
106 
107     s->common.len = bdrv_getlength(bs);
108     if (s->common.len < 0) {
109         block_job_completed(&s->common, s->common.len);
110         return;
111     }
112 
113     end = s->common.len >> BDRV_SECTOR_BITS;
114     s->buf = qemu_blockalign(bs, BLOCK_SIZE);
115 
116     if (s->mode != MIRROR_SYNC_MODE_NONE) {
117         /* First part, loop on the sectors and initialize the dirty bitmap.  */
118         BlockDriverState *base;
119         base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
120         for (sector_num = 0; sector_num < end; ) {
121             int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
122             ret = bdrv_co_is_allocated_above(bs, base,
123                                              sector_num, next - sector_num, &n);
124 
125             if (ret < 0) {
126                 goto immediate_exit;
127             }
128 
129             assert(n > 0);
130             if (ret == 1) {
131                 bdrv_set_dirty(bs, sector_num, n);
132                 sector_num = next;
133             } else {
134                 sector_num += n;
135             }
136         }
137     }
138 
139     s->sector_num = -1;
140     for (;;) {
141         uint64_t delay_ns;
142         int64_t cnt;
143         bool should_complete;
144 
145         cnt = bdrv_get_dirty_count(bs);
146         if (cnt != 0) {
147             BlockErrorAction action = BDRV_ACTION_REPORT;
148             ret = mirror_iteration(s, &action);
149             if (ret < 0 && action == BDRV_ACTION_REPORT) {
150                 goto immediate_exit;
151             }
152             cnt = bdrv_get_dirty_count(bs);
153         }
154 
155         should_complete = false;
156         if (cnt == 0) {
157             trace_mirror_before_flush(s);
158             ret = bdrv_flush(s->target);
159             if (ret < 0) {
160                 if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
161                     goto immediate_exit;
162                 }
163             } else {
164                 /* We're out of the streaming phase.  From now on, if the job
165                  * is cancelled we will actually complete all pending I/O and
166                  * report completion.  This way, block-job-cancel will leave
167                  * the target in a consistent state.
168                  */
169                 s->common.offset = end * BDRV_SECTOR_SIZE;
170                 if (!s->synced) {
171                     block_job_ready(&s->common);
172                     s->synced = true;
173                 }
174 
175                 should_complete = s->should_complete ||
176                     block_job_is_cancelled(&s->common);
177                 cnt = bdrv_get_dirty_count(bs);
178             }
179         }
180 
181         if (cnt == 0 && should_complete) {
182             /* The dirty bitmap is not updated while operations are pending.
183              * If we're about to exit, wait for pending operations before
184              * calling bdrv_get_dirty_count(bs), or we may exit while the
185              * source has dirty data to copy!
186              *
187              * Note that I/O can be submitted by the guest while
188              * mirror_populate runs.
189              */
190             trace_mirror_before_drain(s, cnt);
191             bdrv_drain_all();
192             cnt = bdrv_get_dirty_count(bs);
193         }
194 
195         ret = 0;
196         trace_mirror_before_sleep(s, cnt, s->synced);
197         if (!s->synced) {
198             /* Publish progress */
199             s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
200 
201             if (s->common.speed) {
202                 delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
203             } else {
204                 delay_ns = 0;
205             }
206 
207             /* Note that even when no rate limit is applied we need to yield
208              * with no pending I/O here so that bdrv_drain_all() returns.
209              */
210             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
211             if (block_job_is_cancelled(&s->common)) {
212                 break;
213             }
214         } else if (!should_complete) {
215             delay_ns = (cnt == 0 ? SLICE_TIME : 0);
216             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
217         } else if (cnt == 0) {
218             /* The two disks are in sync.  Exit and report successful
219              * completion.
220              */
221             assert(QLIST_EMPTY(&bs->tracked_requests));
222             s->common.cancelled = false;
223             break;
224         }
225     }
226 
227 immediate_exit:
228     g_free(s->buf);
229     bdrv_set_dirty_tracking(bs, false);
230     bdrv_iostatus_disable(s->target);
231     if (s->should_complete && ret == 0) {
232         if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
233             bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
234         }
235         bdrv_swap(s->target, s->common.bs);
236     }
237     bdrv_close(s->target);
238     bdrv_delete(s->target);
239     block_job_completed(&s->common, ret);
240 }
241 
242 static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
243 {
244     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
245 
246     if (speed < 0) {
247         error_set(errp, QERR_INVALID_PARAMETER, "speed");
248         return;
249     }
250     ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
251 }
252 
253 static void mirror_iostatus_reset(BlockJob *job)
254 {
255     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
256 
257     bdrv_iostatus_reset(s->target);
258 }
259 
260 static void mirror_complete(BlockJob *job, Error **errp)
261 {
262     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
263     int ret;
264 
265     ret = bdrv_open_backing_file(s->target);
266     if (ret < 0) {
267         char backing_filename[PATH_MAX];
268         bdrv_get_full_backing_filename(s->target, backing_filename,
269                                        sizeof(backing_filename));
270         error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
271         return;
272     }
273     if (!s->synced) {
274         error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
275         return;
276     }
277 
278     s->should_complete = true;
279     block_job_resume(job);
280 }
281 
282 static BlockJobType mirror_job_type = {
283     .instance_size = sizeof(MirrorBlockJob),
284     .job_type      = "mirror",
285     .set_speed     = mirror_set_speed,
286     .iostatus_reset= mirror_iostatus_reset,
287     .complete      = mirror_complete,
288 };
289 
290 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
291                   int64_t speed, MirrorSyncMode mode,
292                   BlockdevOnError on_source_error,
293                   BlockdevOnError on_target_error,
294                   BlockDriverCompletionFunc *cb,
295                   void *opaque, Error **errp)
296 {
297     MirrorBlockJob *s;
298 
299     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
300          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
301         !bdrv_iostatus_is_enabled(bs)) {
302         error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
303         return;
304     }
305 
306     s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
307     if (!s) {
308         return;
309     }
310 
311     s->on_source_error = on_source_error;
312     s->on_target_error = on_target_error;
313     s->target = target;
314     s->mode = mode;
315     bdrv_set_dirty_tracking(bs, true);
316     bdrv_set_enable_write_cache(s->target, true);
317     bdrv_set_on_error(s->target, on_target_error, on_target_error);
318     bdrv_iostatus_enable(s->target);
319     s->common.co = qemu_coroutine_create(mirror_run);
320     trace_mirror_start(bs, s, s->common.co, opaque);
321     qemu_coroutine_enter(s->common.co, s);
322 }
323