1 /* 2 * Image mirroring 3 * 4 * Copyright Red Hat, Inc. 2012 5 * 6 * Authors: 7 * Paolo Bonzini <pbonzini@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2 or later. 10 * See the COPYING.LIB file in the top-level directory. 11 * 12 */ 13 14 #include "trace.h" 15 #include "blockjob.h" 16 #include "block_int.h" 17 #include "qemu/ratelimit.h" 18 19 enum { 20 /* 21 * Size of data buffer for populating the image file. This should be large 22 * enough to process multiple clusters in a single call, so that populating 23 * contiguous regions of the image is efficient. 24 */ 25 BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */ 26 }; 27 28 #define SLICE_TIME 100000000ULL /* ns */ 29 30 typedef struct MirrorBlockJob { 31 BlockJob common; 32 RateLimit limit; 33 BlockDriverState *target; 34 MirrorSyncMode mode; 35 BlockdevOnError on_source_error, on_target_error; 36 bool synced; 37 bool should_complete; 38 int64_t sector_num; 39 uint8_t *buf; 40 } MirrorBlockJob; 41 42 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, 43 int error) 44 { 45 s->synced = false; 46 if (read) { 47 return block_job_error_action(&s->common, s->common.bs, 48 s->on_source_error, true, error); 49 } else { 50 return block_job_error_action(&s->common, s->target, 51 s->on_target_error, false, error); 52 } 53 } 54 55 static int coroutine_fn mirror_iteration(MirrorBlockJob *s, 56 BlockErrorAction *p_action) 57 { 58 BlockDriverState *source = s->common.bs; 59 BlockDriverState *target = s->target; 60 QEMUIOVector qiov; 61 int ret, nb_sectors; 62 int64_t end; 63 struct iovec iov; 64 65 end = s->common.len >> BDRV_SECTOR_BITS; 66 s->sector_num = bdrv_get_next_dirty(source, s->sector_num); 67 nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num); 68 bdrv_reset_dirty(source, s->sector_num, nb_sectors); 69 70 /* Copy the dirty cluster. */ 71 iov.iov_base = s->buf; 72 iov.iov_len = nb_sectors * 512; 73 qemu_iovec_init_external(&qiov, &iov, 1); 74 75 trace_mirror_one_iteration(s, s->sector_num, nb_sectors); 76 ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); 77 if (ret < 0) { 78 *p_action = mirror_error_action(s, true, -ret); 79 goto fail; 80 } 81 ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); 82 if (ret < 0) { 83 *p_action = mirror_error_action(s, false, -ret); 84 s->synced = false; 85 goto fail; 86 } 87 return 0; 88 89 fail: 90 /* Try again later. */ 91 bdrv_set_dirty(source, s->sector_num, nb_sectors); 92 return ret; 93 } 94 95 static void coroutine_fn mirror_run(void *opaque) 96 { 97 MirrorBlockJob *s = opaque; 98 BlockDriverState *bs = s->common.bs; 99 int64_t sector_num, end; 100 int ret = 0; 101 int n; 102 103 if (block_job_is_cancelled(&s->common)) { 104 goto immediate_exit; 105 } 106 107 s->common.len = bdrv_getlength(bs); 108 if (s->common.len < 0) { 109 block_job_completed(&s->common, s->common.len); 110 return; 111 } 112 113 end = s->common.len >> BDRV_SECTOR_BITS; 114 s->buf = qemu_blockalign(bs, BLOCK_SIZE); 115 116 if (s->mode != MIRROR_SYNC_MODE_NONE) { 117 /* First part, loop on the sectors and initialize the dirty bitmap. */ 118 BlockDriverState *base; 119 base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; 120 for (sector_num = 0; sector_num < end; ) { 121 int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; 122 ret = bdrv_co_is_allocated_above(bs, base, 123 sector_num, next - sector_num, &n); 124 125 if (ret < 0) { 126 goto immediate_exit; 127 } 128 129 assert(n > 0); 130 if (ret == 1) { 131 bdrv_set_dirty(bs, sector_num, n); 132 sector_num = next; 133 } else { 134 sector_num += n; 135 } 136 } 137 } 138 139 s->sector_num = -1; 140 for (;;) { 141 uint64_t delay_ns; 142 int64_t cnt; 143 bool should_complete; 144 145 cnt = bdrv_get_dirty_count(bs); 146 if (cnt != 0) { 147 BlockErrorAction action = BDRV_ACTION_REPORT; 148 ret = mirror_iteration(s, &action); 149 if (ret < 0 && action == BDRV_ACTION_REPORT) { 150 goto immediate_exit; 151 } 152 cnt = bdrv_get_dirty_count(bs); 153 } 154 155 should_complete = false; 156 if (cnt == 0) { 157 trace_mirror_before_flush(s); 158 ret = bdrv_flush(s->target); 159 if (ret < 0) { 160 if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) { 161 goto immediate_exit; 162 } 163 } else { 164 /* We're out of the streaming phase. From now on, if the job 165 * is cancelled we will actually complete all pending I/O and 166 * report completion. This way, block-job-cancel will leave 167 * the target in a consistent state. 168 */ 169 s->common.offset = end * BDRV_SECTOR_SIZE; 170 if (!s->synced) { 171 block_job_ready(&s->common); 172 s->synced = true; 173 } 174 175 should_complete = s->should_complete || 176 block_job_is_cancelled(&s->common); 177 cnt = bdrv_get_dirty_count(bs); 178 } 179 } 180 181 if (cnt == 0 && should_complete) { 182 /* The dirty bitmap is not updated while operations are pending. 183 * If we're about to exit, wait for pending operations before 184 * calling bdrv_get_dirty_count(bs), or we may exit while the 185 * source has dirty data to copy! 186 * 187 * Note that I/O can be submitted by the guest while 188 * mirror_populate runs. 189 */ 190 trace_mirror_before_drain(s, cnt); 191 bdrv_drain_all(); 192 cnt = bdrv_get_dirty_count(bs); 193 } 194 195 ret = 0; 196 trace_mirror_before_sleep(s, cnt, s->synced); 197 if (!s->synced) { 198 /* Publish progress */ 199 s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; 200 201 if (s->common.speed) { 202 delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK); 203 } else { 204 delay_ns = 0; 205 } 206 207 /* Note that even when no rate limit is applied we need to yield 208 * with no pending I/O here so that bdrv_drain_all() returns. 209 */ 210 block_job_sleep_ns(&s->common, rt_clock, delay_ns); 211 if (block_job_is_cancelled(&s->common)) { 212 break; 213 } 214 } else if (!should_complete) { 215 delay_ns = (cnt == 0 ? SLICE_TIME : 0); 216 block_job_sleep_ns(&s->common, rt_clock, delay_ns); 217 } else if (cnt == 0) { 218 /* The two disks are in sync. Exit and report successful 219 * completion. 220 */ 221 assert(QLIST_EMPTY(&bs->tracked_requests)); 222 s->common.cancelled = false; 223 break; 224 } 225 } 226 227 immediate_exit: 228 g_free(s->buf); 229 bdrv_set_dirty_tracking(bs, false); 230 bdrv_iostatus_disable(s->target); 231 if (s->should_complete && ret == 0) { 232 if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { 233 bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); 234 } 235 bdrv_swap(s->target, s->common.bs); 236 } 237 bdrv_close(s->target); 238 bdrv_delete(s->target); 239 block_job_completed(&s->common, ret); 240 } 241 242 static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) 243 { 244 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); 245 246 if (speed < 0) { 247 error_set(errp, QERR_INVALID_PARAMETER, "speed"); 248 return; 249 } 250 ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); 251 } 252 253 static void mirror_iostatus_reset(BlockJob *job) 254 { 255 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); 256 257 bdrv_iostatus_reset(s->target); 258 } 259 260 static void mirror_complete(BlockJob *job, Error **errp) 261 { 262 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); 263 int ret; 264 265 ret = bdrv_open_backing_file(s->target); 266 if (ret < 0) { 267 char backing_filename[PATH_MAX]; 268 bdrv_get_full_backing_filename(s->target, backing_filename, 269 sizeof(backing_filename)); 270 error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename); 271 return; 272 } 273 if (!s->synced) { 274 error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); 275 return; 276 } 277 278 s->should_complete = true; 279 block_job_resume(job); 280 } 281 282 static BlockJobType mirror_job_type = { 283 .instance_size = sizeof(MirrorBlockJob), 284 .job_type = "mirror", 285 .set_speed = mirror_set_speed, 286 .iostatus_reset= mirror_iostatus_reset, 287 .complete = mirror_complete, 288 }; 289 290 void mirror_start(BlockDriverState *bs, BlockDriverState *target, 291 int64_t speed, MirrorSyncMode mode, 292 BlockdevOnError on_source_error, 293 BlockdevOnError on_target_error, 294 BlockDriverCompletionFunc *cb, 295 void *opaque, Error **errp) 296 { 297 MirrorBlockJob *s; 298 299 if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || 300 on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && 301 !bdrv_iostatus_is_enabled(bs)) { 302 error_set(errp, QERR_INVALID_PARAMETER, "on-source-error"); 303 return; 304 } 305 306 s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); 307 if (!s) { 308 return; 309 } 310 311 s->on_source_error = on_source_error; 312 s->on_target_error = on_target_error; 313 s->target = target; 314 s->mode = mode; 315 bdrv_set_dirty_tracking(bs, true); 316 bdrv_set_enable_write_cache(s->target, true); 317 bdrv_set_on_error(s->target, on_target_error, on_target_error); 318 bdrv_iostatus_enable(s->target); 319 s->common.co = qemu_coroutine_create(mirror_run); 320 trace_mirror_start(bs, s, s->common.co, opaque); 321 qemu_coroutine_enter(s->common.co, s); 322 } 323