1747ff602SJeff Cody /*
2747ff602SJeff Cody * Live block commit
3747ff602SJeff Cody *
4747ff602SJeff Cody * Copyright Red Hat, Inc. 2012
5747ff602SJeff Cody *
6747ff602SJeff Cody * Authors:
7747ff602SJeff Cody * Jeff Cody <jcody@redhat.com>
8747ff602SJeff Cody * Based on stream.c by Stefan Hajnoczi
9747ff602SJeff Cody *
10747ff602SJeff Cody * This work is licensed under the terms of the GNU LGPL, version 2 or later.
11747ff602SJeff Cody * See the COPYING.LIB file in the top-level directory.
12747ff602SJeff Cody *
13747ff602SJeff Cody */
14747ff602SJeff Cody
1580c71a24SPeter Maydell #include "qemu/osdep.h"
16dcbf37ceSKevin Wolf #include "qemu/cutils.h"
17747ff602SJeff Cody #include "trace.h"
18737e150eSPaolo Bonzini #include "block/block_int.h"
19c87621eaSJohn Snow #include "block/blockjob_int.h"
20da34e65cSMarkus Armbruster #include "qapi/error.h"
21747ff602SJeff Cody #include "qemu/ratelimit.h"
225df022cfSPeter Maydell #include "qemu/memalign.h"
23373340b2SMax Reitz #include "sysemu/block-backend.h"
24747ff602SJeff Cody
25747ff602SJeff Cody enum {
26747ff602SJeff Cody /*
27747ff602SJeff Cody * Size of data buffer for populating the image file. This should be large
28747ff602SJeff Cody * enough to process multiple clusters in a single call, so that populating
29747ff602SJeff Cody * contiguous regions of the image is efficient.
30747ff602SJeff Cody */
31747ff602SJeff Cody COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
32747ff602SJeff Cody };
33747ff602SJeff Cody
34747ff602SJeff Cody typedef struct CommitBlockJob {
35747ff602SJeff Cody BlockJob common;
368dfba279SKevin Wolf BlockDriverState *commit_top_bs;
374653456aSKevin Wolf BlockBackend *top;
384653456aSKevin Wolf BlockBackend *base;
3922dffcbeSJohn Snow BlockDriverState *base_bs;
409a71b9deSMax Reitz BlockDriverState *base_overlay;
4192aa5c6dSPaolo Bonzini BlockdevOnError on_error;
42e70cdc57SAlberto Garcia bool base_read_only;
43df827336SAlberto Garcia bool chain_frozen;
4454e26900SJeff Cody char *backing_file_str;
45*4b028cbeSPeter Krempa bool backing_mask_protocol;
46747ff602SJeff Cody } CommitBlockJob;
47747ff602SJeff Cody
commit_prepare(Job * job)4822dffcbeSJohn Snow static int commit_prepare(Job *job)
49747ff602SJeff Cody {
501908a559SKevin Wolf CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
5119ebd13eSKevin Wolf
529275fc72SKevin Wolf bdrv_graph_rdlock_main_loop();
53df827336SAlberto Garcia bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
54df827336SAlberto Garcia s->chain_frozen = false;
559275fc72SKevin Wolf bdrv_graph_rdunlock_main_loop();
56df827336SAlberto Garcia
578dfba279SKevin Wolf /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
588dfba279SKevin Wolf * the normal backing chain can be restored. */
598dfba279SKevin Wolf blk_unref(s->base);
6022dffcbeSJohn Snow s->base = NULL;
619e85cd5cSStefan Hajnoczi
6222dffcbeSJohn Snow /* FIXME: bdrv_drop_intermediate treats total failures and partial failures
6322dffcbeSJohn Snow * identically. Further work is needed to disambiguate these cases. */
6422dffcbeSJohn Snow return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
65*4b028cbeSPeter Krempa s->backing_file_str,
66*4b028cbeSPeter Krempa s->backing_mask_protocol);
6722dffcbeSJohn Snow }
6822dffcbeSJohn Snow
commit_abort(Job * job)6922dffcbeSJohn Snow static void commit_abort(Job *job)
7022dffcbeSJohn Snow {
7122dffcbeSJohn Snow CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
7222dffcbeSJohn Snow BlockDriverState *top_bs = blk_bs(s->top);
73ccd6a379SKevin Wolf BlockDriverState *commit_top_backing_bs;
7422dffcbeSJohn Snow
75df827336SAlberto Garcia if (s->chain_frozen) {
769275fc72SKevin Wolf bdrv_graph_rdlock_main_loop();
77df827336SAlberto Garcia bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
789275fc72SKevin Wolf bdrv_graph_rdunlock_main_loop();
79df827336SAlberto Garcia }
80df827336SAlberto Garcia
8122dffcbeSJohn Snow /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
8222dffcbeSJohn Snow bdrv_ref(top_bs);
8322dffcbeSJohn Snow bdrv_ref(s->commit_top_bs);
8422dffcbeSJohn Snow
8522dffcbeSJohn Snow if (s->base) {
8622dffcbeSJohn Snow blk_unref(s->base);
8722dffcbeSJohn Snow }
8822dffcbeSJohn Snow
8922dffcbeSJohn Snow /* free the blockers on the intermediate nodes so that bdrv_replace_nodes
9022dffcbeSJohn Snow * can succeed */
9122dffcbeSJohn Snow block_job_remove_all_bdrv(&s->common);
9222dffcbeSJohn Snow
9322dffcbeSJohn Snow /* If bdrv_drop_intermediate() failed (or was not invoked), remove the
9422dffcbeSJohn Snow * commit filter driver from the backing chain now. Do this as the final
9522dffcbeSJohn Snow * step so that the 'consistent read' permission can be granted.
9622dffcbeSJohn Snow *
9722dffcbeSJohn Snow * XXX Can (or should) we somehow keep 'consistent read' blocked even
988dfba279SKevin Wolf * after the failed/cancelled commit job is gone? If we already wrote
998dfba279SKevin Wolf * something to base, the intermediate images aren't valid any more. */
100004915a9SKevin Wolf bdrv_graph_rdlock_main_loop();
101ccd6a379SKevin Wolf commit_top_backing_bs = s->commit_top_bs->backing->bs;
102004915a9SKevin Wolf bdrv_graph_rdunlock_main_loop();
103004915a9SKevin Wolf
104ccd6a379SKevin Wolf bdrv_drained_begin(commit_top_backing_bs);
1056bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
106ccd6a379SKevin Wolf bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort);
1076bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
108ccd6a379SKevin Wolf bdrv_drained_end(commit_top_backing_bs);
10922dffcbeSJohn Snow
11022dffcbeSJohn Snow bdrv_unref(s->commit_top_bs);
11122dffcbeSJohn Snow bdrv_unref(top_bs);
1129e85cd5cSStefan Hajnoczi }
1139e85cd5cSStefan Hajnoczi
commit_clean(Job * job)11422dffcbeSJohn Snow static void commit_clean(Job *job)
11522dffcbeSJohn Snow {
11622dffcbeSJohn Snow CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
11722dffcbeSJohn Snow
1189e85cd5cSStefan Hajnoczi /* restore base open flags here if appropriate (e.g., change the base back
1199e85cd5cSStefan Hajnoczi * to r/o). These reopens do not need to be atomic, since we won't abort
1209e85cd5cSStefan Hajnoczi * even on failure here */
121e70cdc57SAlberto Garcia if (s->base_read_only) {
122e70cdc57SAlberto Garcia bdrv_reopen_set_read_only(s->base_bs, true, NULL);
1239e85cd5cSStefan Hajnoczi }
12422dffcbeSJohn Snow
1259e85cd5cSStefan Hajnoczi g_free(s->backing_file_str);
1264653456aSKevin Wolf blk_unref(s->top);
1279e85cd5cSStefan Hajnoczi }
1289e85cd5cSStefan Hajnoczi
commit_run(Job * job,Error ** errp)129f67432a2SJohn Snow static int coroutine_fn commit_run(Job *job, Error **errp)
1309e85cd5cSStefan Hajnoczi {
131f67432a2SJohn Snow CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
132317a6676SEric Blake int64_t offset;
133747ff602SJeff Cody int ret = 0;
13451b0a488SEric Blake int64_t n = 0; /* bytes */
13571701708SVladimir Sementsov-Ogievskiy QEMU_AUTO_VFREE void *buf = NULL;
13605df8a6aSKevin Wolf int64_t len, base_len;
137747ff602SJeff Cody
138c86422c5SEmanuele Giuseppe Esposito len = blk_co_getlength(s->top);
13905df8a6aSKevin Wolf if (len < 0) {
14071701708SVladimir Sementsov-Ogievskiy return len;
141747ff602SJeff Cody }
14230a5c887SKevin Wolf job_progress_set_remaining(&s->common.job, len);
143747ff602SJeff Cody
144c86422c5SEmanuele Giuseppe Esposito base_len = blk_co_getlength(s->base);
145747ff602SJeff Cody if (base_len < 0) {
14671701708SVladimir Sementsov-Ogievskiy return base_len;
147747ff602SJeff Cody }
148747ff602SJeff Cody
14905df8a6aSKevin Wolf if (base_len < len) {
150a0667887SAlberto Faria ret = blk_co_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
151747ff602SJeff Cody if (ret) {
15271701708SVladimir Sementsov-Ogievskiy return ret;
153747ff602SJeff Cody }
154747ff602SJeff Cody }
155747ff602SJeff Cody
1564653456aSKevin Wolf buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
157747ff602SJeff Cody
15805df8a6aSKevin Wolf for (offset = 0; offset < len; offset += n) {
159747ff602SJeff Cody bool copy;
1609ad1e79fSKevin Wolf bool error_in_source = true;
161747ff602SJeff Cody
162747ff602SJeff Cody /* Note that even when no rate limit is applied we need to yield
163c57b6656SKevin Wolf * with no pending I/O here so that bdrv_drain_all() returns.
164747ff602SJeff Cody */
165018e5987SKevin Wolf block_job_ratelimit_sleep(&s->common);
166daa7f2f9SKevin Wolf if (job_is_cancelled(&s->common.job)) {
167747ff602SJeff Cody break;
168747ff602SJeff Cody }
169747ff602SJeff Cody /* Copy if allocated above the base */
170ff7e261bSEmanuele Giuseppe Esposito ret = blk_co_is_allocated_above(s->top, s->base_overlay, true,
17151b0a488SEric Blake offset, COMMIT_BUFFER_SIZE, &n);
172a92b1b06SEric Blake copy = (ret > 0);
17351b0a488SEric Blake trace_commit_one_iteration(s, offset, n, ret);
174747ff602SJeff Cody if (copy) {
1750c42e175SKevin Wolf assert(n < SIZE_MAX);
1760c42e175SKevin Wolf
1770c42e175SKevin Wolf ret = blk_co_pread(s->top, offset, n, buf, 0);
1780c42e175SKevin Wolf if (ret >= 0) {
1790c42e175SKevin Wolf ret = blk_co_pwrite(s->base, offset, n, buf, 0);
1809ad1e79fSKevin Wolf if (ret < 0) {
1819ad1e79fSKevin Wolf error_in_source = false;
1829ad1e79fSKevin Wolf }
1830c42e175SKevin Wolf }
184747ff602SJeff Cody }
185747ff602SJeff Cody if (ret < 0) {
1861e8fb7f1SKevin Wolf BlockErrorAction action =
1879ad1e79fSKevin Wolf block_job_error_action(&s->common, s->on_error,
1889ad1e79fSKevin Wolf error_in_source, -ret);
1891e8fb7f1SKevin Wolf if (action == BLOCK_ERROR_ACTION_REPORT) {
19071701708SVladimir Sementsov-Ogievskiy return ret;
191747ff602SJeff Cody } else {
192747ff602SJeff Cody n = 0;
193747ff602SJeff Cody continue;
194747ff602SJeff Cody }
195747ff602SJeff Cody }
196747ff602SJeff Cody /* Publish progress */
19730a5c887SKevin Wolf job_progress_update(&s->common.job, n);
198f14a39ccSSascha Silbe
199dee81d51SKevin Wolf if (copy) {
200018e5987SKevin Wolf block_job_ratelimit_processed_bytes(&s->common, n);
201f14a39ccSSascha Silbe }
202747ff602SJeff Cody }
203747ff602SJeff Cody
20471701708SVladimir Sementsov-Ogievskiy return 0;
205747ff602SJeff Cody }
206747ff602SJeff Cody
2073fc4b10aSFam Zheng static const BlockJobDriver commit_job_driver = {
20833e9e9bdSKevin Wolf .job_driver = {
209747ff602SJeff Cody .instance_size = sizeof(CommitBlockJob),
2108e4c8700SKevin Wolf .job_type = JOB_TYPE_COMMIT,
21180fa2c75SKevin Wolf .free = block_job_free,
212b15de828SKevin Wolf .user_resume = block_job_user_resume,
213f67432a2SJohn Snow .run = commit_run,
21422dffcbeSJohn Snow .prepare = commit_prepare,
21522dffcbeSJohn Snow .abort = commit_abort,
21622dffcbeSJohn Snow .clean = commit_clean
217da01ff7fSKevin Wolf },
218747ff602SJeff Cody };
219747ff602SJeff Cody
220b9b10c35SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
bdrv_commit_top_preadv(BlockDriverState * bs,int64_t offset,int64_t bytes,QEMUIOVector * qiov,BdrvRequestFlags flags)221b9b10c35SKevin Wolf bdrv_commit_top_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
222b9b10c35SKevin Wolf QEMUIOVector *qiov, BdrvRequestFlags flags)
2238dfba279SKevin Wolf {
2248dfba279SKevin Wolf return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
2258dfba279SKevin Wolf }
2268dfba279SKevin Wolf
bdrv_commit_top_refresh_filename(BlockDriverState * bs)227004915a9SKevin Wolf static GRAPH_RDLOCK void bdrv_commit_top_refresh_filename(BlockDriverState *bs)
228dcbf37ceSKevin Wolf {
229dcbf37ceSKevin Wolf pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
230dcbf37ceSKevin Wolf bs->backing->bs->filename);
231dcbf37ceSKevin Wolf }
23291965658SKevin Wolf
bdrv_commit_top_child_perm(BlockDriverState * bs,BdrvChild * c,BdrvChildRole role,BlockReopenQueue * reopen_queue,uint64_t perm,uint64_t shared,uint64_t * nperm,uint64_t * nshared)2338dfba279SKevin Wolf static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
234bf8e925eSMax Reitz BdrvChildRole role,
235e0995dc3SKevin Wolf BlockReopenQueue *reopen_queue,
2368dfba279SKevin Wolf uint64_t perm, uint64_t shared,
2378dfba279SKevin Wolf uint64_t *nperm, uint64_t *nshared)
2388dfba279SKevin Wolf {
2398dfba279SKevin Wolf *nperm = 0;
2408dfba279SKevin Wolf *nshared = BLK_PERM_ALL;
2418dfba279SKevin Wolf }
2428dfba279SKevin Wolf
2438dfba279SKevin Wolf /* Dummy node that provides consistent read to its users without requiring it
2448dfba279SKevin Wolf * from its backing file and that allows writes on the backing file chain. */
2458dfba279SKevin Wolf static BlockDriver bdrv_commit_top = {
2468dfba279SKevin Wolf .format_name = "commit_top",
2478dfba279SKevin Wolf .bdrv_co_preadv = bdrv_commit_top_preadv,
248dcbf37ceSKevin Wolf .bdrv_refresh_filename = bdrv_commit_top_refresh_filename,
2498dfba279SKevin Wolf .bdrv_child_perm = bdrv_commit_top_child_perm,
2506540fd15SMax Reitz
2516540fd15SMax Reitz .is_filter = true,
252046fd84fSVladimir Sementsov-Ogievskiy .filtered_child_is_backing = true,
2538dfba279SKevin Wolf };
2548dfba279SKevin Wolf
commit_start(const char * job_id,BlockDriverState * bs,BlockDriverState * base,BlockDriverState * top,int creation_flags,int64_t speed,BlockdevOnError on_error,const char * backing_file_str,bool backing_mask_protocol,const char * filter_node_name,Error ** errp)255fd62c609SAlberto Garcia void commit_start(const char *job_id, BlockDriverState *bs,
2565360782dSJohn Snow BlockDriverState *base, BlockDriverState *top,
2575360782dSJohn Snow int creation_flags, int64_t speed,
2588254b6d9SJohn Snow BlockdevOnError on_error, const char *backing_file_str,
259*4b028cbeSPeter Krempa bool backing_mask_protocol,
2600db832f4SKevin Wolf const char *filter_node_name, Error **errp)
261747ff602SJeff Cody {
262747ff602SJeff Cody CommitBlockJob *s;
2633e4c5122SAlberto Garcia BlockDriverState *iter;
2648dfba279SKevin Wolf BlockDriverState *commit_top_bs = NULL;
2659a71b9deSMax Reitz BlockDriverState *filtered_base;
2669a71b9deSMax Reitz int64_t base_size, top_size;
2679a71b9deSMax Reitz uint64_t base_perms, iter_shared_perms;
268d7086422SKevin Wolf int ret;
269747ff602SJeff Cody
270b4ad82aaSEmanuele Giuseppe Esposito GLOBAL_STATE_CODE();
271b4ad82aaSEmanuele Giuseppe Esposito
27218da7f94SFam Zheng assert(top != bs);
273ad74751fSKevin Wolf bdrv_graph_rdlock_main_loop();
2749a71b9deSMax Reitz if (bdrv_skip_filters(top) == bdrv_skip_filters(base)) {
275747ff602SJeff Cody error_setg(errp, "Invalid files for merge: top and base are the same");
276ad74751fSKevin Wolf bdrv_graph_rdunlock_main_loop();
277747ff602SJeff Cody return;
278747ff602SJeff Cody }
279ad74751fSKevin Wolf bdrv_graph_rdunlock_main_loop();
280747ff602SJeff Cody
2819a71b9deSMax Reitz base_size = bdrv_getlength(base);
2829a71b9deSMax Reitz if (base_size < 0) {
2839a71b9deSMax Reitz error_setg_errno(errp, -base_size, "Could not inquire base image size");
2849a71b9deSMax Reitz return;
2859a71b9deSMax Reitz }
2869a71b9deSMax Reitz
2879a71b9deSMax Reitz top_size = bdrv_getlength(top);
2889a71b9deSMax Reitz if (top_size < 0) {
2899a71b9deSMax Reitz error_setg_errno(errp, -top_size, "Could not inquire top image size");
2909a71b9deSMax Reitz return;
2919a71b9deSMax Reitz }
2929a71b9deSMax Reitz
2939a71b9deSMax Reitz base_perms = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
2949a71b9deSMax Reitz if (base_size < top_size) {
2959a71b9deSMax Reitz base_perms |= BLK_PERM_RESIZE;
2969a71b9deSMax Reitz }
2979a71b9deSMax Reitz
29875859b94SJohn Snow s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
2995360782dSJohn Snow speed, creation_flags, NULL, NULL, errp);
300834fe28dSAlberto Garcia if (!s) {
301834fe28dSAlberto Garcia return;
302834fe28dSAlberto Garcia }
303834fe28dSAlberto Garcia
304bde70715SKevin Wolf /* convert base to r/w, if necessary */
305e70cdc57SAlberto Garcia s->base_read_only = bdrv_is_read_only(base);
306e70cdc57SAlberto Garcia if (s->base_read_only) {
307e70cdc57SAlberto Garcia if (bdrv_reopen_set_read_only(base, false, errp) != 0) {
308d7086422SKevin Wolf goto fail;
309747ff602SJeff Cody }
310747ff602SJeff Cody }
311747ff602SJeff Cody
3128dfba279SKevin Wolf /* Insert commit_top block node above top, so we can block consistent read
3138dfba279SKevin Wolf * on the backing chain below it */
3140db832f4SKevin Wolf commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
3150db832f4SKevin Wolf errp);
3168dfba279SKevin Wolf if (commit_top_bs == NULL) {
3178dfba279SKevin Wolf goto fail;
3188dfba279SKevin Wolf }
319d3c8c674SKevin Wolf if (!filter_node_name) {
320d3c8c674SKevin Wolf commit_top_bs->implicit = true;
321d3c8c674SKevin Wolf }
322e5182c1cSMax Reitz
323e5182c1cSMax Reitz /* So that we can always drop this node */
324e5182c1cSMax Reitz commit_top_bs->never_freeze = true;
325e5182c1cSMax Reitz
3260d0676a1SKevin Wolf commit_top_bs->total_sectors = top->total_sectors;
3278dfba279SKevin Wolf
328934aee14SVladimir Sementsov-Ogievskiy ret = bdrv_append(commit_top_bs, top, errp);
329ae9d4417SVladimir Sementsov-Ogievskiy bdrv_unref(commit_top_bs); /* referenced by new parents or failed */
330934aee14SVladimir Sementsov-Ogievskiy if (ret < 0) {
331b69f00ddSFam Zheng commit_top_bs = NULL;
332b69f00ddSFam Zheng goto fail;
333b69f00ddSFam Zheng }
3348dfba279SKevin Wolf
3358dfba279SKevin Wolf s->commit_top_bs = commit_top_bs;
336747ff602SJeff Cody
3379a71b9deSMax Reitz /*
3389a71b9deSMax Reitz * Block all nodes between top and base, because they will
3399a71b9deSMax Reitz * disappear from the chain after this operation.
3409a71b9deSMax Reitz * Note that this assumes that the user is fine with removing all
3419a71b9deSMax Reitz * nodes (including R/W filters) between top and base. Assuring
3429a71b9deSMax Reitz * this is the responsibility of the interface (i.e. whoever calls
3439a71b9deSMax Reitz * commit_start()).
3449a71b9deSMax Reitz */
3456bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
3469a71b9deSMax Reitz s->base_overlay = bdrv_find_overlay(top, base);
3479a71b9deSMax Reitz assert(s->base_overlay);
3489a71b9deSMax Reitz
3499a71b9deSMax Reitz /*
3509a71b9deSMax Reitz * The topmost node with
3519a71b9deSMax Reitz * bdrv_skip_filters(filtered_base) == bdrv_skip_filters(base)
3529a71b9deSMax Reitz */
3539a71b9deSMax Reitz filtered_base = bdrv_cow_bs(s->base_overlay);
3549a71b9deSMax Reitz assert(bdrv_skip_filters(filtered_base) == bdrv_skip_filters(base));
3559a71b9deSMax Reitz
3569a71b9deSMax Reitz /*
3579a71b9deSMax Reitz * XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
3588dfba279SKevin Wolf * at s->base (if writes are blocked for a node, they are also blocked
3598dfba279SKevin Wolf * for its backing file). The other options would be a second filter
3609a71b9deSMax Reitz * driver above s->base.
3619a71b9deSMax Reitz */
3629a71b9deSMax Reitz iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
3639a71b9deSMax Reitz
3649a71b9deSMax Reitz for (iter = top; iter != base; iter = bdrv_filter_or_cow_bs(iter)) {
3659a71b9deSMax Reitz if (iter == filtered_base) {
3669a71b9deSMax Reitz /*
3679a71b9deSMax Reitz * From here on, all nodes are filters on the base. This
3689a71b9deSMax Reitz * allows us to share BLK_PERM_CONSISTENT_READ.
3699a71b9deSMax Reitz */
3709a71b9deSMax Reitz iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
3719a71b9deSMax Reitz }
3729a71b9deSMax Reitz
3738dfba279SKevin Wolf ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
3749a71b9deSMax Reitz iter_shared_perms, errp);
3758dfba279SKevin Wolf if (ret < 0) {
3766bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
3778dfba279SKevin Wolf goto fail;
3783e4c5122SAlberto Garcia }
3793e4c5122SAlberto Garcia }
3803e4c5122SAlberto Garcia
381df827336SAlberto Garcia if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) {
3826bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
383df827336SAlberto Garcia goto fail;
384df827336SAlberto Garcia }
385df827336SAlberto Garcia s->chain_frozen = true;
386df827336SAlberto Garcia
3878dfba279SKevin Wolf ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
3886bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
389f3bbc53dSKevin Wolf
3908dfba279SKevin Wolf if (ret < 0) {
3918dfba279SKevin Wolf goto fail;
3928dfba279SKevin Wolf }
3938dfba279SKevin Wolf
394d861ab3aSKevin Wolf s->base = blk_new(s->common.job.aio_context,
3959a71b9deSMax Reitz base_perms,
3968dfba279SKevin Wolf BLK_PERM_CONSISTENT_READ
3978dfba279SKevin Wolf | BLK_PERM_WRITE_UNCHANGED);
398d7086422SKevin Wolf ret = blk_insert_bs(s->base, base, errp);
399d7086422SKevin Wolf if (ret < 0) {
400d7086422SKevin Wolf goto fail;
401d7086422SKevin Wolf }
402cf312932SKevin Wolf blk_set_disable_request_queuing(s->base, true);
40322dffcbeSJohn Snow s->base_bs = base;
4044653456aSKevin Wolf
4058dfba279SKevin Wolf /* Required permissions are already taken with block_job_add_bdrv() */
406d861ab3aSKevin Wolf s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL);
407b247767aSKevin Wolf ret = blk_insert_bs(s->top, top, errp);
408d7086422SKevin Wolf if (ret < 0) {
409d7086422SKevin Wolf goto fail;
410d7086422SKevin Wolf }
411cf312932SKevin Wolf blk_set_disable_request_queuing(s->top, true);
4124653456aSKevin Wolf
41354e26900SJeff Cody s->backing_file_str = g_strdup(backing_file_str);
414*4b028cbeSPeter Krempa s->backing_mask_protocol = backing_mask_protocol;
415747ff602SJeff Cody s->on_error = on_error;
416747ff602SJeff Cody
4175ccac6f1SJohn Snow trace_commit_start(bs, base, top, s);
418da01ff7fSKevin Wolf job_start(&s->common.job);
419d7086422SKevin Wolf return;
420d7086422SKevin Wolf
421d7086422SKevin Wolf fail:
422df827336SAlberto Garcia if (s->chain_frozen) {
4239275fc72SKevin Wolf bdrv_graph_rdlock_main_loop();
424df827336SAlberto Garcia bdrv_unfreeze_backing_chain(commit_top_bs, base);
4259275fc72SKevin Wolf bdrv_graph_rdunlock_main_loop();
426df827336SAlberto Garcia }
427d7086422SKevin Wolf if (s->base) {
428d7086422SKevin Wolf blk_unref(s->base);
429d7086422SKevin Wolf }
430d7086422SKevin Wolf if (s->top) {
431d7086422SKevin Wolf blk_unref(s->top);
432d7086422SKevin Wolf }
433065abf9fSAlberto Garcia if (s->base_read_only) {
434065abf9fSAlberto Garcia bdrv_reopen_set_read_only(base, true, NULL);
435065abf9fSAlberto Garcia }
4362468eed3SAlberto Garcia job_early_fail(&s->common.job);
4372468eed3SAlberto Garcia /* commit_top_bs has to be replaced after deleting the block job,
4382468eed3SAlberto Garcia * otherwise this would fail because of lack of permissions. */
4398dfba279SKevin Wolf if (commit_top_bs) {
440ccd6a379SKevin Wolf bdrv_drained_begin(top);
4416bc30f19SStefan Hajnoczi bdrv_graph_wrlock();
442bde70715SKevin Wolf bdrv_replace_node(commit_top_bs, top, &error_abort);
4436bc30f19SStefan Hajnoczi bdrv_graph_wrunlock();
444ccd6a379SKevin Wolf bdrv_drained_end(top);
4458dfba279SKevin Wolf }
446747ff602SJeff Cody }
44783fd6dd3SKevin Wolf
44883fd6dd3SKevin Wolf
449d6a644bbSEric Blake #define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE)
45083fd6dd3SKevin Wolf
45183fd6dd3SKevin Wolf /* commit COW file into the raw image */
bdrv_commit(BlockDriverState * bs)45283fd6dd3SKevin Wolf int bdrv_commit(BlockDriverState *bs)
45383fd6dd3SKevin Wolf {
454f8e2bd53SKevin Wolf BlockBackend *src, *backing;
455d3f06759SKevin Wolf BlockDriverState *backing_file_bs = NULL;
456d3f06759SKevin Wolf BlockDriverState *commit_top_bs = NULL;
45783fd6dd3SKevin Wolf BlockDriver *drv = bs->drv;
458d861ab3aSKevin Wolf AioContext *ctx;
459d6a644bbSEric Blake int64_t offset, length, backing_length;
460c742a364SAlberto Garcia int ro;
461d6a644bbSEric Blake int64_t n;
46283fd6dd3SKevin Wolf int ret = 0;
46371701708SVladimir Sementsov-Ogievskiy QEMU_AUTO_VFREE uint8_t *buf = NULL;
464d3f06759SKevin Wolf Error *local_err = NULL;
46583fd6dd3SKevin Wolf
466f791bf7fSEmanuele Giuseppe Esposito GLOBAL_STATE_CODE();
467277f2007SKevin Wolf GRAPH_RDLOCK_GUARD_MAINLOOP();
468f791bf7fSEmanuele Giuseppe Esposito
46983fd6dd3SKevin Wolf if (!drv)
47083fd6dd3SKevin Wolf return -ENOMEDIUM;
47183fd6dd3SKevin Wolf
4729a71b9deSMax Reitz backing_file_bs = bdrv_cow_bs(bs);
4739a71b9deSMax Reitz
4749a71b9deSMax Reitz if (!backing_file_bs) {
47583fd6dd3SKevin Wolf return -ENOTSUP;
47683fd6dd3SKevin Wolf }
47783fd6dd3SKevin Wolf
47883fd6dd3SKevin Wolf if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
4799a71b9deSMax Reitz bdrv_op_is_blocked(backing_file_bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL))
4809a71b9deSMax Reitz {
48183fd6dd3SKevin Wolf return -EBUSY;
48283fd6dd3SKevin Wolf }
48383fd6dd3SKevin Wolf
484307261b2SVladimir Sementsov-Ogievskiy ro = bdrv_is_read_only(backing_file_bs);
48583fd6dd3SKevin Wolf
48683fd6dd3SKevin Wolf if (ro) {
4879a71b9deSMax Reitz if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) {
48883fd6dd3SKevin Wolf return -EACCES;
48983fd6dd3SKevin Wolf }
49083fd6dd3SKevin Wolf }
49183fd6dd3SKevin Wolf
492d861ab3aSKevin Wolf ctx = bdrv_get_aio_context(bs);
4932d97fde4SMax Reitz /* WRITE_UNCHANGED is required for bdrv_make_empty() */
4942d97fde4SMax Reitz src = blk_new(ctx, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
4952d97fde4SMax Reitz BLK_PERM_ALL);
496d861ab3aSKevin Wolf backing = blk_new(ctx, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
497d7086422SKevin Wolf
498d3f06759SKevin Wolf ret = blk_insert_bs(src, bs, &local_err);
499d7086422SKevin Wolf if (ret < 0) {
500d3f06759SKevin Wolf error_report_err(local_err);
501d7086422SKevin Wolf goto ro_cleanup;
502d7086422SKevin Wolf }
503d7086422SKevin Wolf
504d3f06759SKevin Wolf /* Insert commit_top block node above backing, so we can write to it */
505d3f06759SKevin Wolf commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
506d3f06759SKevin Wolf &local_err);
507d3f06759SKevin Wolf if (commit_top_bs == NULL) {
508d3f06759SKevin Wolf error_report_err(local_err);
509d3f06759SKevin Wolf goto ro_cleanup;
510d3f06759SKevin Wolf }
511d3f06759SKevin Wolf
51212fa4af6SKevin Wolf bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
51312fa4af6SKevin Wolf bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
514d3f06759SKevin Wolf
515d3f06759SKevin Wolf ret = blk_insert_bs(backing, backing_file_bs, &local_err);
516d7086422SKevin Wolf if (ret < 0) {
517d3f06759SKevin Wolf error_report_err(local_err);
518d7086422SKevin Wolf goto ro_cleanup;
519d7086422SKevin Wolf }
520f8e2bd53SKevin Wolf
521f8e2bd53SKevin Wolf length = blk_getlength(src);
52283fd6dd3SKevin Wolf if (length < 0) {
52383fd6dd3SKevin Wolf ret = length;
52483fd6dd3SKevin Wolf goto ro_cleanup;
52583fd6dd3SKevin Wolf }
52683fd6dd3SKevin Wolf
527f8e2bd53SKevin Wolf backing_length = blk_getlength(backing);
52883fd6dd3SKevin Wolf if (backing_length < 0) {
52983fd6dd3SKevin Wolf ret = backing_length;
53083fd6dd3SKevin Wolf goto ro_cleanup;
53183fd6dd3SKevin Wolf }
53283fd6dd3SKevin Wolf
53383fd6dd3SKevin Wolf /* If our top snapshot is larger than the backing file image,
53483fd6dd3SKevin Wolf * grow the backing file image if possible. If not possible,
53583fd6dd3SKevin Wolf * we must return an error */
53683fd6dd3SKevin Wolf if (length > backing_length) {
5378c6242b6SKevin Wolf ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0,
538c80d8b06SMax Reitz &local_err);
53983fd6dd3SKevin Wolf if (ret < 0) {
540ed3d2ec9SMax Reitz error_report_err(local_err);
54183fd6dd3SKevin Wolf goto ro_cleanup;
54283fd6dd3SKevin Wolf }
54383fd6dd3SKevin Wolf }
54483fd6dd3SKevin Wolf
545f8e2bd53SKevin Wolf /* blk_try_blockalign() for src will choose an alignment that works for
546f8e2bd53SKevin Wolf * backing as well, so no need to compare the alignment manually. */
547d6a644bbSEric Blake buf = blk_try_blockalign(src, COMMIT_BUF_SIZE);
54883fd6dd3SKevin Wolf if (buf == NULL) {
54983fd6dd3SKevin Wolf ret = -ENOMEM;
55083fd6dd3SKevin Wolf goto ro_cleanup;
55183fd6dd3SKevin Wolf }
55283fd6dd3SKevin Wolf
553d6a644bbSEric Blake for (offset = 0; offset < length; offset += n) {
554d6a644bbSEric Blake ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n);
55583fd6dd3SKevin Wolf if (ret < 0) {
55683fd6dd3SKevin Wolf goto ro_cleanup;
55783fd6dd3SKevin Wolf }
55883fd6dd3SKevin Wolf if (ret) {
559a9262f55SAlberto Faria ret = blk_pread(src, offset, n, buf, 0);
56083fd6dd3SKevin Wolf if (ret < 0) {
56183fd6dd3SKevin Wolf goto ro_cleanup;
56283fd6dd3SKevin Wolf }
56383fd6dd3SKevin Wolf
564a9262f55SAlberto Faria ret = blk_pwrite(backing, offset, n, buf, 0);
56583fd6dd3SKevin Wolf if (ret < 0) {
56683fd6dd3SKevin Wolf goto ro_cleanup;
56783fd6dd3SKevin Wolf }
56883fd6dd3SKevin Wolf }
56983fd6dd3SKevin Wolf }
57083fd6dd3SKevin Wolf
5712d97fde4SMax Reitz ret = blk_make_empty(src, NULL);
5722d97fde4SMax Reitz /* Ignore -ENOTSUP */
5732d97fde4SMax Reitz if (ret < 0 && ret != -ENOTSUP) {
57483fd6dd3SKevin Wolf goto ro_cleanup;
57583fd6dd3SKevin Wolf }
5762d97fde4SMax Reitz
577f8e2bd53SKevin Wolf blk_flush(src);
57883fd6dd3SKevin Wolf
57983fd6dd3SKevin Wolf /*
58083fd6dd3SKevin Wolf * Make sure all data we wrote to the backing device is actually
58183fd6dd3SKevin Wolf * stable on disk.
58283fd6dd3SKevin Wolf */
583f8e2bd53SKevin Wolf blk_flush(backing);
58483fd6dd3SKevin Wolf
58583fd6dd3SKevin Wolf ret = 0;
58683fd6dd3SKevin Wolf ro_cleanup:
587f8e2bd53SKevin Wolf blk_unref(backing);
5889a71b9deSMax Reitz if (bdrv_cow_bs(bs) != backing_file_bs) {
58912fa4af6SKevin Wolf bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
590d3f06759SKevin Wolf }
591d3f06759SKevin Wolf bdrv_unref(commit_top_bs);
592d3f06759SKevin Wolf blk_unref(src);
593f8e2bd53SKevin Wolf
59483fd6dd3SKevin Wolf if (ro) {
59583fd6dd3SKevin Wolf /* ignoring error return here */
5969a71b9deSMax Reitz bdrv_reopen_set_read_only(backing_file_bs, true, NULL);
59783fd6dd3SKevin Wolf }
59883fd6dd3SKevin Wolf
59983fd6dd3SKevin Wolf return ret;
60083fd6dd3SKevin Wolf }
601