129ff7890SWen Congyang /* 229ff7890SWen Congyang * Replication Block filter 329ff7890SWen Congyang * 429ff7890SWen Congyang * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD. 529ff7890SWen Congyang * Copyright (c) 2016 Intel Corporation 629ff7890SWen Congyang * Copyright (c) 2016 FUJITSU LIMITED 729ff7890SWen Congyang * 829ff7890SWen Congyang * Author: 929ff7890SWen Congyang * Wen Congyang <wency@cn.fujitsu.com> 1029ff7890SWen Congyang * 1129ff7890SWen Congyang * This work is licensed under the terms of the GNU GPL, version 2 or later. 1229ff7890SWen Congyang * See the COPYING file in the top-level directory. 1329ff7890SWen Congyang */ 1429ff7890SWen Congyang 1529ff7890SWen Congyang #include "qemu/osdep.h" 1629ff7890SWen Congyang #include "qemu-common.h" 1729ff7890SWen Congyang #include "block/nbd.h" 1829ff7890SWen Congyang #include "block/blockjob.h" 1929ff7890SWen Congyang #include "block/block_int.h" 2029ff7890SWen Congyang #include "block/block_backup.h" 2129ff7890SWen Congyang #include "sysemu/block-backend.h" 2229ff7890SWen Congyang #include "qapi/error.h" 2329ff7890SWen Congyang #include "replication.h" 2429ff7890SWen Congyang 2529ff7890SWen Congyang typedef struct BDRVReplicationState { 2629ff7890SWen Congyang ReplicationMode mode; 2729ff7890SWen Congyang int replication_state; 2829ff7890SWen Congyang BdrvChild *active_disk; 2929ff7890SWen Congyang BdrvChild *hidden_disk; 3029ff7890SWen Congyang BdrvChild *secondary_disk; 3129ff7890SWen Congyang char *top_id; 3229ff7890SWen Congyang ReplicationState *rs; 3329ff7890SWen Congyang Error *blocker; 3429ff7890SWen Congyang int orig_hidden_flags; 3529ff7890SWen Congyang int orig_secondary_flags; 3629ff7890SWen Congyang int error; 3729ff7890SWen Congyang } BDRVReplicationState; 3829ff7890SWen Congyang 3929ff7890SWen Congyang enum { 4029ff7890SWen Congyang BLOCK_REPLICATION_NONE, /* block replication is not started */ 4129ff7890SWen Congyang BLOCK_REPLICATION_RUNNING, /* block replication is running */ 4229ff7890SWen Congyang BLOCK_REPLICATION_FAILOVER, /* failover is running in background */ 4329ff7890SWen Congyang BLOCK_REPLICATION_FAILOVER_FAILED, /* failover failed */ 4429ff7890SWen Congyang BLOCK_REPLICATION_DONE, /* block replication is done */ 4529ff7890SWen Congyang }; 4629ff7890SWen Congyang 4729ff7890SWen Congyang static void replication_start(ReplicationState *rs, ReplicationMode mode, 4829ff7890SWen Congyang Error **errp); 4929ff7890SWen Congyang static void replication_do_checkpoint(ReplicationState *rs, Error **errp); 5029ff7890SWen Congyang static void replication_get_error(ReplicationState *rs, Error **errp); 5129ff7890SWen Congyang static void replication_stop(ReplicationState *rs, bool failover, 5229ff7890SWen Congyang Error **errp); 5329ff7890SWen Congyang 5429ff7890SWen Congyang #define REPLICATION_MODE "mode" 5529ff7890SWen Congyang #define REPLICATION_TOP_ID "top-id" 5629ff7890SWen Congyang static QemuOptsList replication_runtime_opts = { 5729ff7890SWen Congyang .name = "replication", 5829ff7890SWen Congyang .head = QTAILQ_HEAD_INITIALIZER(replication_runtime_opts.head), 5929ff7890SWen Congyang .desc = { 6029ff7890SWen Congyang { 6129ff7890SWen Congyang .name = REPLICATION_MODE, 6229ff7890SWen Congyang .type = QEMU_OPT_STRING, 6329ff7890SWen Congyang }, 6429ff7890SWen Congyang { 6529ff7890SWen Congyang .name = REPLICATION_TOP_ID, 6629ff7890SWen Congyang .type = QEMU_OPT_STRING, 6729ff7890SWen Congyang }, 6829ff7890SWen Congyang { /* end of list */ } 6929ff7890SWen Congyang }, 7029ff7890SWen Congyang }; 7129ff7890SWen Congyang 7229ff7890SWen Congyang static ReplicationOps replication_ops = { 7329ff7890SWen Congyang .start = replication_start, 7429ff7890SWen Congyang .checkpoint = replication_do_checkpoint, 7529ff7890SWen Congyang .get_error = replication_get_error, 7629ff7890SWen Congyang .stop = replication_stop, 7729ff7890SWen Congyang }; 7829ff7890SWen Congyang 7929ff7890SWen Congyang static int replication_open(BlockDriverState *bs, QDict *options, 8029ff7890SWen Congyang int flags, Error **errp) 8129ff7890SWen Congyang { 8229ff7890SWen Congyang int ret; 8329ff7890SWen Congyang BDRVReplicationState *s = bs->opaque; 8429ff7890SWen Congyang Error *local_err = NULL; 8529ff7890SWen Congyang QemuOpts *opts = NULL; 8629ff7890SWen Congyang const char *mode; 8729ff7890SWen Congyang const char *top_id; 8829ff7890SWen Congyang 8929ff7890SWen Congyang ret = -EINVAL; 9029ff7890SWen Congyang opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort); 9129ff7890SWen Congyang qemu_opts_absorb_qdict(opts, options, &local_err); 9229ff7890SWen Congyang if (local_err) { 9329ff7890SWen Congyang goto fail; 9429ff7890SWen Congyang } 9529ff7890SWen Congyang 9629ff7890SWen Congyang mode = qemu_opt_get(opts, REPLICATION_MODE); 9729ff7890SWen Congyang if (!mode) { 9829ff7890SWen Congyang error_setg(&local_err, "Missing the option mode"); 9929ff7890SWen Congyang goto fail; 10029ff7890SWen Congyang } 10129ff7890SWen Congyang 10229ff7890SWen Congyang if (!strcmp(mode, "primary")) { 10329ff7890SWen Congyang s->mode = REPLICATION_MODE_PRIMARY; 104f4f2539bSChanglong Xie top_id = qemu_opt_get(opts, REPLICATION_TOP_ID); 105f4f2539bSChanglong Xie if (top_id) { 106f4f2539bSChanglong Xie error_setg(&local_err, "The primary side does not support option top-id"); 107f4f2539bSChanglong Xie goto fail; 108f4f2539bSChanglong Xie } 10929ff7890SWen Congyang } else if (!strcmp(mode, "secondary")) { 11029ff7890SWen Congyang s->mode = REPLICATION_MODE_SECONDARY; 11129ff7890SWen Congyang top_id = qemu_opt_get(opts, REPLICATION_TOP_ID); 11229ff7890SWen Congyang s->top_id = g_strdup(top_id); 11329ff7890SWen Congyang if (!s->top_id) { 11429ff7890SWen Congyang error_setg(&local_err, "Missing the option top-id"); 11529ff7890SWen Congyang goto fail; 11629ff7890SWen Congyang } 11729ff7890SWen Congyang } else { 11829ff7890SWen Congyang error_setg(&local_err, 11929ff7890SWen Congyang "The option mode's value should be primary or secondary"); 12029ff7890SWen Congyang goto fail; 12129ff7890SWen Congyang } 12229ff7890SWen Congyang 12329ff7890SWen Congyang s->rs = replication_new(bs, &replication_ops); 12429ff7890SWen Congyang 12529ff7890SWen Congyang ret = 0; 12629ff7890SWen Congyang 12729ff7890SWen Congyang fail: 12829ff7890SWen Congyang qemu_opts_del(opts); 12929ff7890SWen Congyang error_propagate(errp, local_err); 13029ff7890SWen Congyang 13129ff7890SWen Congyang return ret; 13229ff7890SWen Congyang } 13329ff7890SWen Congyang 13429ff7890SWen Congyang static void replication_close(BlockDriverState *bs) 13529ff7890SWen Congyang { 13629ff7890SWen Congyang BDRVReplicationState *s = bs->opaque; 13729ff7890SWen Congyang 13829ff7890SWen Congyang if (s->replication_state == BLOCK_REPLICATION_RUNNING) { 13929ff7890SWen Congyang replication_stop(s->rs, false, NULL); 14029ff7890SWen Congyang } 14150ab0e09SPaolo Bonzini if (s->replication_state == BLOCK_REPLICATION_FAILOVER) { 14250ab0e09SPaolo Bonzini block_job_cancel_sync(s->active_disk->bs->job); 14350ab0e09SPaolo Bonzini } 14429ff7890SWen Congyang 14529ff7890SWen Congyang if (s->mode == REPLICATION_MODE_SECONDARY) { 14629ff7890SWen Congyang g_free(s->top_id); 14729ff7890SWen Congyang } 14829ff7890SWen Congyang 14929ff7890SWen Congyang replication_remove(s->rs); 15029ff7890SWen Congyang } 15129ff7890SWen Congyang 15229ff7890SWen Congyang static int64_t replication_getlength(BlockDriverState *bs) 15329ff7890SWen Congyang { 15429ff7890SWen Congyang return bdrv_getlength(bs->file->bs); 15529ff7890SWen Congyang } 15629ff7890SWen Congyang 15729ff7890SWen Congyang static int replication_get_io_status(BDRVReplicationState *s) 15829ff7890SWen Congyang { 15929ff7890SWen Congyang switch (s->replication_state) { 16029ff7890SWen Congyang case BLOCK_REPLICATION_NONE: 16129ff7890SWen Congyang return -EIO; 16229ff7890SWen Congyang case BLOCK_REPLICATION_RUNNING: 16329ff7890SWen Congyang return 0; 16429ff7890SWen Congyang case BLOCK_REPLICATION_FAILOVER: 16529ff7890SWen Congyang return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0; 16629ff7890SWen Congyang case BLOCK_REPLICATION_FAILOVER_FAILED: 16729ff7890SWen Congyang return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 1; 16829ff7890SWen Congyang case BLOCK_REPLICATION_DONE: 16929ff7890SWen Congyang /* 17029ff7890SWen Congyang * active commit job completes, and active disk and secondary_disk 17129ff7890SWen Congyang * is swapped, so we can operate bs->file directly 17229ff7890SWen Congyang */ 17329ff7890SWen Congyang return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0; 17429ff7890SWen Congyang default: 17529ff7890SWen Congyang abort(); 17629ff7890SWen Congyang } 17729ff7890SWen Congyang } 17829ff7890SWen Congyang 17929ff7890SWen Congyang static int replication_return_value(BDRVReplicationState *s, int ret) 18029ff7890SWen Congyang { 18129ff7890SWen Congyang if (s->mode == REPLICATION_MODE_SECONDARY) { 18229ff7890SWen Congyang return ret; 18329ff7890SWen Congyang } 18429ff7890SWen Congyang 18529ff7890SWen Congyang if (ret < 0) { 18629ff7890SWen Congyang s->error = ret; 18729ff7890SWen Congyang ret = 0; 18829ff7890SWen Congyang } 18929ff7890SWen Congyang 19029ff7890SWen Congyang return ret; 19129ff7890SWen Congyang } 19229ff7890SWen Congyang 19329ff7890SWen Congyang static coroutine_fn int replication_co_readv(BlockDriverState *bs, 19429ff7890SWen Congyang int64_t sector_num, 19529ff7890SWen Congyang int remaining_sectors, 19629ff7890SWen Congyang QEMUIOVector *qiov) 19729ff7890SWen Congyang { 19829ff7890SWen Congyang BDRVReplicationState *s = bs->opaque; 19929ff7890SWen Congyang BdrvChild *child = s->secondary_disk; 20029ff7890SWen Congyang BlockJob *job = NULL; 20129ff7890SWen Congyang CowRequest req; 20229ff7890SWen Congyang int ret; 20329ff7890SWen Congyang 20429ff7890SWen Congyang if (s->mode == REPLICATION_MODE_PRIMARY) { 20529ff7890SWen Congyang /* We only use it to forward primary write requests */ 20629ff7890SWen Congyang return -EIO; 20729ff7890SWen Congyang } 20829ff7890SWen Congyang 20929ff7890SWen Congyang ret = replication_get_io_status(s); 21029ff7890SWen Congyang if (ret < 0) { 21129ff7890SWen Congyang return ret; 21229ff7890SWen Congyang } 21329ff7890SWen Congyang 21429ff7890SWen Congyang if (child && child->bs) { 21529ff7890SWen Congyang job = child->bs->job; 21629ff7890SWen Congyang } 21729ff7890SWen Congyang 21829ff7890SWen Congyang if (job) { 21929ff7890SWen Congyang backup_wait_for_overlapping_requests(child->bs->job, sector_num, 22029ff7890SWen Congyang remaining_sectors); 22129ff7890SWen Congyang backup_cow_request_begin(&req, child->bs->job, sector_num, 22229ff7890SWen Congyang remaining_sectors); 22329ff7890SWen Congyang ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, 22429ff7890SWen Congyang qiov); 22529ff7890SWen Congyang backup_cow_request_end(&req); 22629ff7890SWen Congyang goto out; 22729ff7890SWen Congyang } 22829ff7890SWen Congyang 22929ff7890SWen Congyang ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, qiov); 23029ff7890SWen Congyang out: 23129ff7890SWen Congyang return replication_return_value(s, ret); 23229ff7890SWen Congyang } 23329ff7890SWen Congyang 23429ff7890SWen Congyang static coroutine_fn int replication_co_writev(BlockDriverState *bs, 23529ff7890SWen Congyang int64_t sector_num, 23629ff7890SWen Congyang int remaining_sectors, 23729ff7890SWen Congyang QEMUIOVector *qiov) 23829ff7890SWen Congyang { 23929ff7890SWen Congyang BDRVReplicationState *s = bs->opaque; 24029ff7890SWen Congyang QEMUIOVector hd_qiov; 24129ff7890SWen Congyang uint64_t bytes_done = 0; 24229ff7890SWen Congyang BdrvChild *top = bs->file; 24329ff7890SWen Congyang BdrvChild *base = s->secondary_disk; 24429ff7890SWen Congyang BdrvChild *target; 24529ff7890SWen Congyang int ret, n; 24629ff7890SWen Congyang 24729ff7890SWen Congyang ret = replication_get_io_status(s); 24829ff7890SWen Congyang if (ret < 0) { 24929ff7890SWen Congyang goto out; 25029ff7890SWen Congyang } 25129ff7890SWen Congyang 25229ff7890SWen Congyang if (ret == 0) { 25329ff7890SWen Congyang ret = bdrv_co_writev(top, sector_num, 25429ff7890SWen Congyang remaining_sectors, qiov); 25529ff7890SWen Congyang return replication_return_value(s, ret); 25629ff7890SWen Congyang } 25729ff7890SWen Congyang 25829ff7890SWen Congyang /* 25929ff7890SWen Congyang * Failover failed, only write to active disk if the sectors 26029ff7890SWen Congyang * have already been allocated in active disk/hidden disk. 26129ff7890SWen Congyang */ 26229ff7890SWen Congyang qemu_iovec_init(&hd_qiov, qiov->niov); 26329ff7890SWen Congyang while (remaining_sectors > 0) { 26429ff7890SWen Congyang ret = bdrv_is_allocated_above(top->bs, base->bs, sector_num, 26529ff7890SWen Congyang remaining_sectors, &n); 26629ff7890SWen Congyang if (ret < 0) { 26729ff7890SWen Congyang goto out1; 26829ff7890SWen Congyang } 26929ff7890SWen Congyang 27029ff7890SWen Congyang qemu_iovec_reset(&hd_qiov); 27129ff7890SWen Congyang qemu_iovec_concat(&hd_qiov, qiov, bytes_done, n * BDRV_SECTOR_SIZE); 27229ff7890SWen Congyang 27329ff7890SWen Congyang target = ret ? top : base; 27429ff7890SWen Congyang ret = bdrv_co_writev(target, sector_num, n, &hd_qiov); 27529ff7890SWen Congyang if (ret < 0) { 27629ff7890SWen Congyang goto out1; 27729ff7890SWen Congyang } 27829ff7890SWen Congyang 27929ff7890SWen Congyang remaining_sectors -= n; 28029ff7890SWen Congyang sector_num += n; 28129ff7890SWen Congyang bytes_done += n * BDRV_SECTOR_SIZE; 28229ff7890SWen Congyang } 28329ff7890SWen Congyang 28429ff7890SWen Congyang out1: 28529ff7890SWen Congyang qemu_iovec_destroy(&hd_qiov); 28629ff7890SWen Congyang out: 28729ff7890SWen Congyang return ret; 28829ff7890SWen Congyang } 28929ff7890SWen Congyang 29029ff7890SWen Congyang static bool replication_recurse_is_first_non_filter(BlockDriverState *bs, 29129ff7890SWen Congyang BlockDriverState *candidate) 29229ff7890SWen Congyang { 29329ff7890SWen Congyang return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate); 29429ff7890SWen Congyang } 29529ff7890SWen Congyang 29629ff7890SWen Congyang static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp) 29729ff7890SWen Congyang { 29829ff7890SWen Congyang Error *local_err = NULL; 29929ff7890SWen Congyang int ret; 30029ff7890SWen Congyang 30129ff7890SWen Congyang if (!s->secondary_disk->bs->job) { 30229ff7890SWen Congyang error_setg(errp, "Backup job was cancelled unexpectedly"); 30329ff7890SWen Congyang return; 30429ff7890SWen Congyang } 30529ff7890SWen Congyang 30629ff7890SWen Congyang backup_do_checkpoint(s->secondary_disk->bs->job, &local_err); 30729ff7890SWen Congyang if (local_err) { 30829ff7890SWen Congyang error_propagate(errp, local_err); 30929ff7890SWen Congyang return; 31029ff7890SWen Congyang } 31129ff7890SWen Congyang 31229ff7890SWen Congyang ret = s->active_disk->bs->drv->bdrv_make_empty(s->active_disk->bs); 31329ff7890SWen Congyang if (ret < 0) { 31429ff7890SWen Congyang error_setg(errp, "Cannot make active disk empty"); 31529ff7890SWen Congyang return; 31629ff7890SWen Congyang } 31729ff7890SWen Congyang 31829ff7890SWen Congyang ret = s->hidden_disk->bs->drv->bdrv_make_empty(s->hidden_disk->bs); 31929ff7890SWen Congyang if (ret < 0) { 32029ff7890SWen Congyang error_setg(errp, "Cannot make hidden disk empty"); 32129ff7890SWen Congyang return; 32229ff7890SWen Congyang } 32329ff7890SWen Congyang } 32429ff7890SWen Congyang 325*8dd9006eSPaolo Bonzini static void reopen_backing_file(BlockDriverState *bs, bool writable, 32629ff7890SWen Congyang Error **errp) 32729ff7890SWen Congyang { 328*8dd9006eSPaolo Bonzini BDRVReplicationState *s = bs->opaque; 32929ff7890SWen Congyang BlockReopenQueue *reopen_queue = NULL; 33029ff7890SWen Congyang int orig_hidden_flags, orig_secondary_flags; 33129ff7890SWen Congyang int new_hidden_flags, new_secondary_flags; 33229ff7890SWen Congyang Error *local_err = NULL; 33329ff7890SWen Congyang 33429ff7890SWen Congyang if (writable) { 33529ff7890SWen Congyang orig_hidden_flags = s->orig_hidden_flags = 33629ff7890SWen Congyang bdrv_get_flags(s->hidden_disk->bs); 33729ff7890SWen Congyang new_hidden_flags = (orig_hidden_flags | BDRV_O_RDWR) & 33829ff7890SWen Congyang ~BDRV_O_INACTIVE; 33929ff7890SWen Congyang orig_secondary_flags = s->orig_secondary_flags = 34029ff7890SWen Congyang bdrv_get_flags(s->secondary_disk->bs); 34129ff7890SWen Congyang new_secondary_flags = (orig_secondary_flags | BDRV_O_RDWR) & 34229ff7890SWen Congyang ~BDRV_O_INACTIVE; 34329ff7890SWen Congyang } else { 34429ff7890SWen Congyang orig_hidden_flags = (s->orig_hidden_flags | BDRV_O_RDWR) & 34529ff7890SWen Congyang ~BDRV_O_INACTIVE; 34629ff7890SWen Congyang new_hidden_flags = s->orig_hidden_flags; 34729ff7890SWen Congyang orig_secondary_flags = (s->orig_secondary_flags | BDRV_O_RDWR) & 34829ff7890SWen Congyang ~BDRV_O_INACTIVE; 34929ff7890SWen Congyang new_secondary_flags = s->orig_secondary_flags; 35029ff7890SWen Congyang } 35129ff7890SWen Congyang 35229ff7890SWen Congyang if (orig_hidden_flags != new_hidden_flags) { 35329ff7890SWen Congyang reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL, 35429ff7890SWen Congyang new_hidden_flags); 35529ff7890SWen Congyang } 35629ff7890SWen Congyang 35729ff7890SWen Congyang if (!(orig_secondary_flags & BDRV_O_RDWR)) { 35829ff7890SWen Congyang reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs, 35929ff7890SWen Congyang NULL, new_secondary_flags); 36029ff7890SWen Congyang } 36129ff7890SWen Congyang 36229ff7890SWen Congyang if (reopen_queue) { 36329ff7890SWen Congyang bdrv_reopen_multiple(reopen_queue, &local_err); 36429ff7890SWen Congyang error_propagate(errp, local_err); 36529ff7890SWen Congyang } 36629ff7890SWen Congyang } 36729ff7890SWen Congyang 368*8dd9006eSPaolo Bonzini static void backup_job_cleanup(BlockDriverState *bs) 36929ff7890SWen Congyang { 370*8dd9006eSPaolo Bonzini BDRVReplicationState *s = bs->opaque; 37129ff7890SWen Congyang BlockDriverState *top_bs; 37229ff7890SWen Congyang 37329ff7890SWen Congyang top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL); 37429ff7890SWen Congyang if (!top_bs) { 37529ff7890SWen Congyang return; 37629ff7890SWen Congyang } 37729ff7890SWen Congyang bdrv_op_unblock_all(top_bs, s->blocker); 37829ff7890SWen Congyang error_free(s->blocker); 379*8dd9006eSPaolo Bonzini reopen_backing_file(bs, false, NULL); 38029ff7890SWen Congyang } 38129ff7890SWen Congyang 38229ff7890SWen Congyang static void backup_job_completed(void *opaque, int ret) 38329ff7890SWen Congyang { 384*8dd9006eSPaolo Bonzini BlockDriverState *bs = opaque; 385*8dd9006eSPaolo Bonzini BDRVReplicationState *s = bs->opaque; 38629ff7890SWen Congyang 38729ff7890SWen Congyang if (s->replication_state != BLOCK_REPLICATION_FAILOVER) { 38829ff7890SWen Congyang /* The backup job is cancelled unexpectedly */ 38929ff7890SWen Congyang s->error = -EIO; 39029ff7890SWen Congyang } 39129ff7890SWen Congyang 392*8dd9006eSPaolo Bonzini backup_job_cleanup(bs); 39329ff7890SWen Congyang } 39429ff7890SWen Congyang 39529ff7890SWen Congyang static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs) 39629ff7890SWen Congyang { 39729ff7890SWen Congyang BdrvChild *child; 39829ff7890SWen Congyang 39929ff7890SWen Congyang /* The bs itself is the top_bs */ 40029ff7890SWen Congyang if (top_bs == bs) { 40129ff7890SWen Congyang return true; 40229ff7890SWen Congyang } 40329ff7890SWen Congyang 40429ff7890SWen Congyang /* Iterate over top_bs's children */ 40529ff7890SWen Congyang QLIST_FOREACH(child, &top_bs->children, next) { 40629ff7890SWen Congyang if (child->bs == bs || check_top_bs(child->bs, bs)) { 40729ff7890SWen Congyang return true; 40829ff7890SWen Congyang } 40929ff7890SWen Congyang } 41029ff7890SWen Congyang 41129ff7890SWen Congyang return false; 41229ff7890SWen Congyang } 41329ff7890SWen Congyang 41429ff7890SWen Congyang static void replication_start(ReplicationState *rs, ReplicationMode mode, 41529ff7890SWen Congyang Error **errp) 41629ff7890SWen Congyang { 41729ff7890SWen Congyang BlockDriverState *bs = rs->opaque; 41829ff7890SWen Congyang BDRVReplicationState *s; 41929ff7890SWen Congyang BlockDriverState *top_bs; 42029ff7890SWen Congyang int64_t active_length, hidden_length, disk_length; 42129ff7890SWen Congyang AioContext *aio_context; 42229ff7890SWen Congyang Error *local_err = NULL; 42329ff7890SWen Congyang 42429ff7890SWen Congyang aio_context = bdrv_get_aio_context(bs); 42529ff7890SWen Congyang aio_context_acquire(aio_context); 42629ff7890SWen Congyang s = bs->opaque; 42729ff7890SWen Congyang 42829ff7890SWen Congyang if (s->replication_state != BLOCK_REPLICATION_NONE) { 42929ff7890SWen Congyang error_setg(errp, "Block replication is running or done"); 43029ff7890SWen Congyang aio_context_release(aio_context); 43129ff7890SWen Congyang return; 43229ff7890SWen Congyang } 43329ff7890SWen Congyang 43429ff7890SWen Congyang if (s->mode != mode) { 43529ff7890SWen Congyang error_setg(errp, "The parameter mode's value is invalid, needs %d," 43629ff7890SWen Congyang " but got %d", s->mode, mode); 43729ff7890SWen Congyang aio_context_release(aio_context); 43829ff7890SWen Congyang return; 43929ff7890SWen Congyang } 44029ff7890SWen Congyang 44129ff7890SWen Congyang switch (s->mode) { 44229ff7890SWen Congyang case REPLICATION_MODE_PRIMARY: 44329ff7890SWen Congyang break; 44429ff7890SWen Congyang case REPLICATION_MODE_SECONDARY: 44529ff7890SWen Congyang s->active_disk = bs->file; 44629ff7890SWen Congyang if (!s->active_disk || !s->active_disk->bs || 44729ff7890SWen Congyang !s->active_disk->bs->backing) { 44829ff7890SWen Congyang error_setg(errp, "Active disk doesn't have backing file"); 44929ff7890SWen Congyang aio_context_release(aio_context); 45029ff7890SWen Congyang return; 45129ff7890SWen Congyang } 45229ff7890SWen Congyang 45329ff7890SWen Congyang s->hidden_disk = s->active_disk->bs->backing; 45429ff7890SWen Congyang if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) { 45529ff7890SWen Congyang error_setg(errp, "Hidden disk doesn't have backing file"); 45629ff7890SWen Congyang aio_context_release(aio_context); 45729ff7890SWen Congyang return; 45829ff7890SWen Congyang } 45929ff7890SWen Congyang 46029ff7890SWen Congyang s->secondary_disk = s->hidden_disk->bs->backing; 46129ff7890SWen Congyang if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) { 46229ff7890SWen Congyang error_setg(errp, "The secondary disk doesn't have block backend"); 46329ff7890SWen Congyang aio_context_release(aio_context); 46429ff7890SWen Congyang return; 46529ff7890SWen Congyang } 46629ff7890SWen Congyang 46729ff7890SWen Congyang /* verify the length */ 46829ff7890SWen Congyang active_length = bdrv_getlength(s->active_disk->bs); 46929ff7890SWen Congyang hidden_length = bdrv_getlength(s->hidden_disk->bs); 47029ff7890SWen Congyang disk_length = bdrv_getlength(s->secondary_disk->bs); 47129ff7890SWen Congyang if (active_length < 0 || hidden_length < 0 || disk_length < 0 || 47229ff7890SWen Congyang active_length != hidden_length || hidden_length != disk_length) { 47329ff7890SWen Congyang error_setg(errp, "Active disk, hidden disk, secondary disk's length" 47429ff7890SWen Congyang " are not the same"); 47529ff7890SWen Congyang aio_context_release(aio_context); 47629ff7890SWen Congyang return; 47729ff7890SWen Congyang } 47829ff7890SWen Congyang 47929ff7890SWen Congyang if (!s->active_disk->bs->drv->bdrv_make_empty || 48029ff7890SWen Congyang !s->hidden_disk->bs->drv->bdrv_make_empty) { 48129ff7890SWen Congyang error_setg(errp, 48229ff7890SWen Congyang "Active disk or hidden disk doesn't support make_empty"); 48329ff7890SWen Congyang aio_context_release(aio_context); 48429ff7890SWen Congyang return; 48529ff7890SWen Congyang } 48629ff7890SWen Congyang 48729ff7890SWen Congyang /* reopen the backing file in r/w mode */ 488*8dd9006eSPaolo Bonzini reopen_backing_file(bs, true, &local_err); 48929ff7890SWen Congyang if (local_err) { 49029ff7890SWen Congyang error_propagate(errp, local_err); 49129ff7890SWen Congyang aio_context_release(aio_context); 49229ff7890SWen Congyang return; 49329ff7890SWen Congyang } 49429ff7890SWen Congyang 49529ff7890SWen Congyang /* start backup job now */ 49629ff7890SWen Congyang error_setg(&s->blocker, 49729ff7890SWen Congyang "Block device is in use by internal backup job"); 49829ff7890SWen Congyang 49929ff7890SWen Congyang top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL); 50029ff7890SWen Congyang if (!top_bs || !bdrv_is_root_node(top_bs) || 50129ff7890SWen Congyang !check_top_bs(top_bs, bs)) { 50229ff7890SWen Congyang error_setg(errp, "No top_bs or it is invalid"); 503*8dd9006eSPaolo Bonzini reopen_backing_file(bs, false, NULL); 50429ff7890SWen Congyang aio_context_release(aio_context); 50529ff7890SWen Congyang return; 50629ff7890SWen Congyang } 50729ff7890SWen Congyang bdrv_op_block_all(top_bs, s->blocker); 50829ff7890SWen Congyang bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker); 50929ff7890SWen Congyang 51029ff7890SWen Congyang backup_start("replication-backup", s->secondary_disk->bs, 51129ff7890SWen Congyang s->hidden_disk->bs, 0, MIRROR_SYNC_MODE_NONE, NULL, false, 51229ff7890SWen Congyang BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, 513*8dd9006eSPaolo Bonzini backup_job_completed, bs, NULL, &local_err); 51429ff7890SWen Congyang if (local_err) { 51529ff7890SWen Congyang error_propagate(errp, local_err); 516*8dd9006eSPaolo Bonzini backup_job_cleanup(bs); 51729ff7890SWen Congyang aio_context_release(aio_context); 51829ff7890SWen Congyang return; 51929ff7890SWen Congyang } 52029ff7890SWen Congyang break; 52129ff7890SWen Congyang default: 52229ff7890SWen Congyang aio_context_release(aio_context); 52329ff7890SWen Congyang abort(); 52429ff7890SWen Congyang } 52529ff7890SWen Congyang 52629ff7890SWen Congyang s->replication_state = BLOCK_REPLICATION_RUNNING; 52729ff7890SWen Congyang 52829ff7890SWen Congyang if (s->mode == REPLICATION_MODE_SECONDARY) { 52929ff7890SWen Congyang secondary_do_checkpoint(s, errp); 53029ff7890SWen Congyang } 53129ff7890SWen Congyang 53229ff7890SWen Congyang s->error = 0; 53329ff7890SWen Congyang aio_context_release(aio_context); 53429ff7890SWen Congyang } 53529ff7890SWen Congyang 53629ff7890SWen Congyang static void replication_do_checkpoint(ReplicationState *rs, Error **errp) 53729ff7890SWen Congyang { 53829ff7890SWen Congyang BlockDriverState *bs = rs->opaque; 53929ff7890SWen Congyang BDRVReplicationState *s; 54029ff7890SWen Congyang AioContext *aio_context; 54129ff7890SWen Congyang 54229ff7890SWen Congyang aio_context = bdrv_get_aio_context(bs); 54329ff7890SWen Congyang aio_context_acquire(aio_context); 54429ff7890SWen Congyang s = bs->opaque; 54529ff7890SWen Congyang 54629ff7890SWen Congyang if (s->mode == REPLICATION_MODE_SECONDARY) { 54729ff7890SWen Congyang secondary_do_checkpoint(s, errp); 54829ff7890SWen Congyang } 54929ff7890SWen Congyang aio_context_release(aio_context); 55029ff7890SWen Congyang } 55129ff7890SWen Congyang 55229ff7890SWen Congyang static void replication_get_error(ReplicationState *rs, Error **errp) 55329ff7890SWen Congyang { 55429ff7890SWen Congyang BlockDriverState *bs = rs->opaque; 55529ff7890SWen Congyang BDRVReplicationState *s; 55629ff7890SWen Congyang AioContext *aio_context; 55729ff7890SWen Congyang 55829ff7890SWen Congyang aio_context = bdrv_get_aio_context(bs); 55929ff7890SWen Congyang aio_context_acquire(aio_context); 56029ff7890SWen Congyang s = bs->opaque; 56129ff7890SWen Congyang 56229ff7890SWen Congyang if (s->replication_state != BLOCK_REPLICATION_RUNNING) { 56329ff7890SWen Congyang error_setg(errp, "Block replication is not running"); 56429ff7890SWen Congyang aio_context_release(aio_context); 56529ff7890SWen Congyang return; 56629ff7890SWen Congyang } 56729ff7890SWen Congyang 56829ff7890SWen Congyang if (s->error) { 56929ff7890SWen Congyang error_setg(errp, "I/O error occurred"); 57029ff7890SWen Congyang aio_context_release(aio_context); 57129ff7890SWen Congyang return; 57229ff7890SWen Congyang } 57329ff7890SWen Congyang aio_context_release(aio_context); 57429ff7890SWen Congyang } 57529ff7890SWen Congyang 57629ff7890SWen Congyang static void replication_done(void *opaque, int ret) 57729ff7890SWen Congyang { 57829ff7890SWen Congyang BlockDriverState *bs = opaque; 57929ff7890SWen Congyang BDRVReplicationState *s = bs->opaque; 58029ff7890SWen Congyang 58129ff7890SWen Congyang if (ret == 0) { 58229ff7890SWen Congyang s->replication_state = BLOCK_REPLICATION_DONE; 58329ff7890SWen Congyang 58429ff7890SWen Congyang /* refresh top bs's filename */ 58529ff7890SWen Congyang bdrv_refresh_filename(bs); 58629ff7890SWen Congyang s->active_disk = NULL; 58729ff7890SWen Congyang s->secondary_disk = NULL; 58829ff7890SWen Congyang s->hidden_disk = NULL; 58929ff7890SWen Congyang s->error = 0; 59029ff7890SWen Congyang } else { 59129ff7890SWen Congyang s->replication_state = BLOCK_REPLICATION_FAILOVER_FAILED; 59229ff7890SWen Congyang s->error = -EIO; 59329ff7890SWen Congyang } 59429ff7890SWen Congyang } 59529ff7890SWen Congyang 59629ff7890SWen Congyang static void replication_stop(ReplicationState *rs, bool failover, Error **errp) 59729ff7890SWen Congyang { 59829ff7890SWen Congyang BlockDriverState *bs = rs->opaque; 59929ff7890SWen Congyang BDRVReplicationState *s; 60029ff7890SWen Congyang AioContext *aio_context; 60129ff7890SWen Congyang 60229ff7890SWen Congyang aio_context = bdrv_get_aio_context(bs); 60329ff7890SWen Congyang aio_context_acquire(aio_context); 60429ff7890SWen Congyang s = bs->opaque; 60529ff7890SWen Congyang 60629ff7890SWen Congyang if (s->replication_state != BLOCK_REPLICATION_RUNNING) { 60729ff7890SWen Congyang error_setg(errp, "Block replication is not running"); 60829ff7890SWen Congyang aio_context_release(aio_context); 60929ff7890SWen Congyang return; 61029ff7890SWen Congyang } 61129ff7890SWen Congyang 61229ff7890SWen Congyang switch (s->mode) { 61329ff7890SWen Congyang case REPLICATION_MODE_PRIMARY: 61429ff7890SWen Congyang s->replication_state = BLOCK_REPLICATION_DONE; 61529ff7890SWen Congyang s->error = 0; 61629ff7890SWen Congyang break; 61729ff7890SWen Congyang case REPLICATION_MODE_SECONDARY: 61829ff7890SWen Congyang /* 61929ff7890SWen Congyang * This BDS will be closed, and the job should be completed 62029ff7890SWen Congyang * before the BDS is closed, because we will access hidden 62129ff7890SWen Congyang * disk, secondary disk in backup_job_completed(). 62229ff7890SWen Congyang */ 62329ff7890SWen Congyang if (s->secondary_disk->bs->job) { 62429ff7890SWen Congyang block_job_cancel_sync(s->secondary_disk->bs->job); 62529ff7890SWen Congyang } 62629ff7890SWen Congyang 62729ff7890SWen Congyang if (!failover) { 62829ff7890SWen Congyang secondary_do_checkpoint(s, errp); 62929ff7890SWen Congyang s->replication_state = BLOCK_REPLICATION_DONE; 63029ff7890SWen Congyang aio_context_release(aio_context); 63129ff7890SWen Congyang return; 63229ff7890SWen Congyang } 63329ff7890SWen Congyang 63429ff7890SWen Congyang s->replication_state = BLOCK_REPLICATION_FAILOVER; 63529ff7890SWen Congyang commit_active_start("replication-commit", s->active_disk->bs, 63629ff7890SWen Congyang s->secondary_disk->bs, 0, BLOCKDEV_ON_ERROR_REPORT, 63729ff7890SWen Congyang replication_done, 63829ff7890SWen Congyang bs, errp, true); 63929ff7890SWen Congyang break; 64029ff7890SWen Congyang default: 64129ff7890SWen Congyang aio_context_release(aio_context); 64229ff7890SWen Congyang abort(); 64329ff7890SWen Congyang } 64429ff7890SWen Congyang aio_context_release(aio_context); 64529ff7890SWen Congyang } 64629ff7890SWen Congyang 64729ff7890SWen Congyang BlockDriver bdrv_replication = { 64829ff7890SWen Congyang .format_name = "replication", 64929ff7890SWen Congyang .protocol_name = "replication", 65029ff7890SWen Congyang .instance_size = sizeof(BDRVReplicationState), 65129ff7890SWen Congyang 65229ff7890SWen Congyang .bdrv_open = replication_open, 65329ff7890SWen Congyang .bdrv_close = replication_close, 65429ff7890SWen Congyang 65529ff7890SWen Congyang .bdrv_getlength = replication_getlength, 65629ff7890SWen Congyang .bdrv_co_readv = replication_co_readv, 65729ff7890SWen Congyang .bdrv_co_writev = replication_co_writev, 65829ff7890SWen Congyang 65929ff7890SWen Congyang .is_filter = true, 66029ff7890SWen Congyang .bdrv_recurse_is_first_non_filter = replication_recurse_is_first_non_filter, 66129ff7890SWen Congyang 66229ff7890SWen Congyang .has_variable_length = true, 66329ff7890SWen Congyang }; 66429ff7890SWen Congyang 66529ff7890SWen Congyang static void bdrv_replication_init(void) 66629ff7890SWen Congyang { 66729ff7890SWen Congyang bdrv_register(&bdrv_replication); 66829ff7890SWen Congyang } 66929ff7890SWen Congyang 67029ff7890SWen Congyang block_init(bdrv_replication_init); 671