1beb5f545SVladimir Sementsov-Ogievskiy /*
2beb5f545SVladimir Sementsov-Ogievskiy * block_copy API
3beb5f545SVladimir Sementsov-Ogievskiy *
4beb5f545SVladimir Sementsov-Ogievskiy * Copyright (C) 2013 Proxmox Server Solutions
5beb5f545SVladimir Sementsov-Ogievskiy * Copyright (c) 2019 Virtuozzo International GmbH.
6beb5f545SVladimir Sementsov-Ogievskiy *
7beb5f545SVladimir Sementsov-Ogievskiy * Authors:
8beb5f545SVladimir Sementsov-Ogievskiy * Dietmar Maurer (dietmar@proxmox.com)
9beb5f545SVladimir Sementsov-Ogievskiy * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10beb5f545SVladimir Sementsov-Ogievskiy *
11beb5f545SVladimir Sementsov-Ogievskiy * This work is licensed under the terms of the GNU GPL, version 2 or later.
12beb5f545SVladimir Sementsov-Ogievskiy * See the COPYING file in the top-level directory.
13beb5f545SVladimir Sementsov-Ogievskiy */
14beb5f545SVladimir Sementsov-Ogievskiy
15beb5f545SVladimir Sementsov-Ogievskiy #include "qemu/osdep.h"
16beb5f545SVladimir Sementsov-Ogievskiy
17beb5f545SVladimir Sementsov-Ogievskiy #include "trace.h"
18beb5f545SVladimir Sementsov-Ogievskiy #include "qapi/error.h"
19beb5f545SVladimir Sementsov-Ogievskiy #include "block/block-copy.h"
20e2c1c34fSMarkus Armbruster #include "block/block_int-io.h"
21e2c1c34fSMarkus Armbruster #include "block/dirty-bitmap.h"
22d088e6a4SVladimir Sementsov-Ogievskiy #include "block/reqlist.h"
23beb5f545SVladimir Sementsov-Ogievskiy #include "sysemu/block-backend.h"
24b3b7036aSVladimir Sementsov-Ogievskiy #include "qemu/units.h"
25e2c1c34fSMarkus Armbruster #include "qemu/co-shared-resource.h"
264ce5dd3eSVladimir Sementsov-Ogievskiy #include "qemu/coroutine.h"
27e2c1c34fSMarkus Armbruster #include "qemu/ratelimit.h"
284ce5dd3eSVladimir Sementsov-Ogievskiy #include "block/aio_task.h"
29b518e9e9SVladimir Sementsov-Ogievskiy #include "qemu/error-report.h"
305df022cfSPeter Maydell #include "qemu/memalign.h"
31b3b7036aSVladimir Sementsov-Ogievskiy
32b3b7036aSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
330e240245SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
347f739d0eSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_MEM (128 * MiB)
354ce5dd3eSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_WORKERS 64
367e032df0SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
37b518e9e9SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_CLUSTER_SIZE_DEFAULT (1 << 16)
384ce5dd3eSVladimir Sementsov-Ogievskiy
3905d5e12bSPaolo Bonzini typedef enum {
4005d5e12bSPaolo Bonzini COPY_READ_WRITE_CLUSTER,
4105d5e12bSPaolo Bonzini COPY_READ_WRITE,
4205d5e12bSPaolo Bonzini COPY_WRITE_ZEROES,
4305d5e12bSPaolo Bonzini COPY_RANGE_SMALL,
4405d5e12bSPaolo Bonzini COPY_RANGE_FULL
4505d5e12bSPaolo Bonzini } BlockCopyMethod;
4605d5e12bSPaolo Bonzini
474ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_entry(AioTask *task);
484ce5dd3eSVladimir Sementsov-Ogievskiy
494ce5dd3eSVladimir Sementsov-Ogievskiy typedef struct BlockCopyCallState {
50d0c389d2SEmanuele Giuseppe Esposito /* Fields initialized in block_copy_async() and never changed. */
513b8c2329SVladimir Sementsov-Ogievskiy BlockCopyState *s;
523b8c2329SVladimir Sementsov-Ogievskiy int64_t offset;
533b8c2329SVladimir Sementsov-Ogievskiy int64_t bytes;
5426be9d62SVladimir Sementsov-Ogievskiy int max_workers;
5526be9d62SVladimir Sementsov-Ogievskiy int64_t max_chunk;
567e032df0SVladimir Sementsov-Ogievskiy bool ignore_ratelimit;
57de4641b4SVladimir Sementsov-Ogievskiy BlockCopyAsyncCallbackFunc cb;
58de4641b4SVladimir Sementsov-Ogievskiy void *cb_opaque;
59de4641b4SVladimir Sementsov-Ogievskiy /* Coroutine where async block-copy is running */
60de4641b4SVladimir Sementsov-Ogievskiy Coroutine *co;
613b8c2329SVladimir Sementsov-Ogievskiy
62d0c389d2SEmanuele Giuseppe Esposito /* Fields whose state changes throughout the execution */
63149009beSEmanuele Giuseppe Esposito bool finished; /* atomic */
64d0c389d2SEmanuele Giuseppe Esposito QemuCoSleep sleep; /* TODO: protect API with a lock */
65149009beSEmanuele Giuseppe Esposito bool cancelled; /* atomic */
662e099a9dSVladimir Sementsov-Ogievskiy /* To reference all call states from BlockCopyState */
672e099a9dSVladimir Sementsov-Ogievskiy QLIST_ENTRY(BlockCopyCallState) list;
682e099a9dSVladimir Sementsov-Ogievskiy
69d0c389d2SEmanuele Giuseppe Esposito /*
703202d8e4SMichael Tokarev * Fields that report information about return values and errors.
71d0c389d2SEmanuele Giuseppe Esposito * Protected by lock in BlockCopyState.
72d0c389d2SEmanuele Giuseppe Esposito */
734ce5dd3eSVladimir Sementsov-Ogievskiy bool error_is_read;
74d0c389d2SEmanuele Giuseppe Esposito /*
75d0c389d2SEmanuele Giuseppe Esposito * @ret is set concurrently by tasks under mutex. Only set once by first
76d0c389d2SEmanuele Giuseppe Esposito * failed task (and untouched if no task failed).
77d0c389d2SEmanuele Giuseppe Esposito * After finishing (call_state->finished is true), it is not modified
78d0c389d2SEmanuele Giuseppe Esposito * anymore and may be safely read without mutex.
79d0c389d2SEmanuele Giuseppe Esposito */
80d0c389d2SEmanuele Giuseppe Esposito int ret;
814ce5dd3eSVladimir Sementsov-Ogievskiy } BlockCopyCallState;
82beb5f545SVladimir Sementsov-Ogievskiy
83e9407785SVladimir Sementsov-Ogievskiy typedef struct BlockCopyTask {
844ce5dd3eSVladimir Sementsov-Ogievskiy AioTask task;
854ce5dd3eSVladimir Sementsov-Ogievskiy
86d0c389d2SEmanuele Giuseppe Esposito /*
87d0c389d2SEmanuele Giuseppe Esposito * Fields initialized in block_copy_task_create()
88d0c389d2SEmanuele Giuseppe Esposito * and never changed.
89d0c389d2SEmanuele Giuseppe Esposito */
901348a657SVladimir Sementsov-Ogievskiy BlockCopyState *s;
914ce5dd3eSVladimir Sementsov-Ogievskiy BlockCopyCallState *call_state;
92d0c389d2SEmanuele Giuseppe Esposito /*
93d0c389d2SEmanuele Giuseppe Esposito * @method can also be set again in the while loop of
94d0c389d2SEmanuele Giuseppe Esposito * block_copy_dirty_clusters(), but it is never accessed concurrently
95d0c389d2SEmanuele Giuseppe Esposito * because the only other function that reads it is
96d0c389d2SEmanuele Giuseppe Esposito * block_copy_task_entry() and it is invoked afterwards in the same
97d0c389d2SEmanuele Giuseppe Esposito * iteration.
98d0c389d2SEmanuele Giuseppe Esposito */
9905d5e12bSPaolo Bonzini BlockCopyMethod method;
100d0c389d2SEmanuele Giuseppe Esposito
101d0c389d2SEmanuele Giuseppe Esposito /*
102d088e6a4SVladimir Sementsov-Ogievskiy * Generally, req is protected by lock in BlockCopyState, Still req.offset
103d088e6a4SVladimir Sementsov-Ogievskiy * is only set on task creation, so may be read concurrently after creation.
104d088e6a4SVladimir Sementsov-Ogievskiy * req.bytes is changed at most once, and need only protecting the case of
105d088e6a4SVladimir Sementsov-Ogievskiy * parallel read while updating @bytes value in block_copy_task_shrink().
106d0c389d2SEmanuele Giuseppe Esposito */
107d088e6a4SVladimir Sementsov-Ogievskiy BlockReq req;
108e9407785SVladimir Sementsov-Ogievskiy } BlockCopyTask;
109397f4e9dSVladimir Sementsov-Ogievskiy
task_end(BlockCopyTask * task)11042ac2144SVladimir Sementsov-Ogievskiy static int64_t task_end(BlockCopyTask *task)
11142ac2144SVladimir Sementsov-Ogievskiy {
112d088e6a4SVladimir Sementsov-Ogievskiy return task->req.offset + task->req.bytes;
11342ac2144SVladimir Sementsov-Ogievskiy }
11442ac2144SVladimir Sementsov-Ogievskiy
115397f4e9dSVladimir Sementsov-Ogievskiy typedef struct BlockCopyState {
116397f4e9dSVladimir Sementsov-Ogievskiy /*
117397f4e9dSVladimir Sementsov-Ogievskiy * BdrvChild objects are not owned or managed by block-copy. They are
118397f4e9dSVladimir Sementsov-Ogievskiy * provided by block-copy user and user is responsible for appropriate
119397f4e9dSVladimir Sementsov-Ogievskiy * permissions on these children.
120397f4e9dSVladimir Sementsov-Ogievskiy */
121397f4e9dSVladimir Sementsov-Ogievskiy BdrvChild *source;
122397f4e9dSVladimir Sementsov-Ogievskiy BdrvChild *target;
123d0c389d2SEmanuele Giuseppe Esposito
124d0c389d2SEmanuele Giuseppe Esposito /*
125d0c389d2SEmanuele Giuseppe Esposito * Fields initialized in block_copy_state_new()
126d0c389d2SEmanuele Giuseppe Esposito * and never changed.
127d0c389d2SEmanuele Giuseppe Esposito */
128397f4e9dSVladimir Sementsov-Ogievskiy int64_t cluster_size;
12905d5e12bSPaolo Bonzini int64_t max_transfer;
130397f4e9dSVladimir Sementsov-Ogievskiy uint64_t len;
131397f4e9dSVladimir Sementsov-Ogievskiy BdrvRequestFlags write_flags;
132397f4e9dSVladimir Sementsov-Ogievskiy
133397f4e9dSVladimir Sementsov-Ogievskiy /*
134d0c389d2SEmanuele Giuseppe Esposito * Fields whose state changes throughout the execution
135d0c389d2SEmanuele Giuseppe Esposito * Protected by lock.
136d0c389d2SEmanuele Giuseppe Esposito */
137d0c389d2SEmanuele Giuseppe Esposito CoMutex lock;
138d0c389d2SEmanuele Giuseppe Esposito int64_t in_flight_bytes;
139d0c389d2SEmanuele Giuseppe Esposito BlockCopyMethod method;
1400fd05c8dSVladimir Sementsov-Ogievskiy bool discard_source;
141d088e6a4SVladimir Sementsov-Ogievskiy BlockReqList reqs;
142d0c389d2SEmanuele Giuseppe Esposito QLIST_HEAD(, BlockCopyCallState) calls;
143d0c389d2SEmanuele Giuseppe Esposito /*
144397f4e9dSVladimir Sementsov-Ogievskiy * skip_unallocated:
145397f4e9dSVladimir Sementsov-Ogievskiy *
146397f4e9dSVladimir Sementsov-Ogievskiy * Used by sync=top jobs, which first scan the source node for unallocated
147397f4e9dSVladimir Sementsov-Ogievskiy * areas and clear them in the copy_bitmap. During this process, the bitmap
148397f4e9dSVladimir Sementsov-Ogievskiy * is thus not fully initialized: It may still have bits set for areas that
149397f4e9dSVladimir Sementsov-Ogievskiy * are unallocated and should actually not be copied.
150397f4e9dSVladimir Sementsov-Ogievskiy *
151397f4e9dSVladimir Sementsov-Ogievskiy * This is indicated by skip_unallocated.
152397f4e9dSVladimir Sementsov-Ogievskiy *
153397f4e9dSVladimir Sementsov-Ogievskiy * In this case, block_copy() will query the source’s allocation status,
154397f4e9dSVladimir Sementsov-Ogievskiy * skip unallocated regions, clear them in the copy_bitmap, and invoke
155397f4e9dSVladimir Sementsov-Ogievskiy * block_copy_reset_unallocated() every time it does.
156397f4e9dSVladimir Sementsov-Ogievskiy */
157d0c389d2SEmanuele Giuseppe Esposito bool skip_unallocated; /* atomic */
158d0c389d2SEmanuele Giuseppe Esposito /* State fields that use a thread-safe API */
159d0c389d2SEmanuele Giuseppe Esposito BdrvDirtyBitmap *copy_bitmap;
160397f4e9dSVladimir Sementsov-Ogievskiy ProgressMeter *progress;
161397f4e9dSVladimir Sementsov-Ogievskiy SharedResource *mem;
1627e032df0SVladimir Sementsov-Ogievskiy RateLimit rate_limit;
163397f4e9dSVladimir Sementsov-Ogievskiy } BlockCopyState;
164397f4e9dSVladimir Sementsov-Ogievskiy
165d0c389d2SEmanuele Giuseppe Esposito /* Called with lock held */
block_copy_chunk_size(BlockCopyState * s)16605d5e12bSPaolo Bonzini static int64_t block_copy_chunk_size(BlockCopyState *s)
16705d5e12bSPaolo Bonzini {
16805d5e12bSPaolo Bonzini switch (s->method) {
16905d5e12bSPaolo Bonzini case COPY_READ_WRITE_CLUSTER:
17005d5e12bSPaolo Bonzini return s->cluster_size;
17105d5e12bSPaolo Bonzini case COPY_READ_WRITE:
17205d5e12bSPaolo Bonzini case COPY_RANGE_SMALL:
17305d5e12bSPaolo Bonzini return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
17405d5e12bSPaolo Bonzini s->max_transfer);
17505d5e12bSPaolo Bonzini case COPY_RANGE_FULL:
17605d5e12bSPaolo Bonzini return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
17705d5e12bSPaolo Bonzini s->max_transfer);
17805d5e12bSPaolo Bonzini default:
17905d5e12bSPaolo Bonzini /* Cannot have COPY_WRITE_ZEROES here. */
18005d5e12bSPaolo Bonzini abort();
18105d5e12bSPaolo Bonzini }
18205d5e12bSPaolo Bonzini }
18305d5e12bSPaolo Bonzini
18442ac2144SVladimir Sementsov-Ogievskiy /*
18542ac2144SVladimir Sementsov-Ogievskiy * Search for the first dirty area in offset/bytes range and create task at
18642ac2144SVladimir Sementsov-Ogievskiy * the beginning of it.
18742ac2144SVladimir Sementsov-Ogievskiy */
188d0c389d2SEmanuele Giuseppe Esposito static coroutine_fn BlockCopyTask *
block_copy_task_create(BlockCopyState * s,BlockCopyCallState * call_state,int64_t offset,int64_t bytes)189d0c389d2SEmanuele Giuseppe Esposito block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state,
1908719091fSVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes)
191a6ffe199SVladimir Sementsov-Ogievskiy {
19242ac2144SVladimir Sementsov-Ogievskiy BlockCopyTask *task;
19305d5e12bSPaolo Bonzini int64_t max_chunk;
194f13e60a9SVladimir Sementsov-Ogievskiy
195d0c389d2SEmanuele Giuseppe Esposito QEMU_LOCK_GUARD(&s->lock);
19605d5e12bSPaolo Bonzini max_chunk = MIN_NON_ZERO(block_copy_chunk_size(s), call_state->max_chunk);
19742ac2144SVladimir Sementsov-Ogievskiy if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
19842ac2144SVladimir Sementsov-Ogievskiy offset, offset + bytes,
19926be9d62SVladimir Sementsov-Ogievskiy max_chunk, &offset, &bytes))
20042ac2144SVladimir Sementsov-Ogievskiy {
20142ac2144SVladimir Sementsov-Ogievskiy return NULL;
20242ac2144SVladimir Sementsov-Ogievskiy }
20342ac2144SVladimir Sementsov-Ogievskiy
2047661a886SStefan Reiter assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
2057661a886SStefan Reiter bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
2067661a886SStefan Reiter
20742ac2144SVladimir Sementsov-Ogievskiy /* region is dirty, so no existent tasks possible in it */
208d088e6a4SVladimir Sementsov-Ogievskiy assert(!reqlist_find_conflict(&s->reqs, offset, bytes));
2095332e5d2SVladimir Sementsov-Ogievskiy
2105332e5d2SVladimir Sementsov-Ogievskiy bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
2115332e5d2SVladimir Sementsov-Ogievskiy s->in_flight_bytes += bytes;
2125332e5d2SVladimir Sementsov-Ogievskiy
21342ac2144SVladimir Sementsov-Ogievskiy task = g_new(BlockCopyTask, 1);
2141348a657SVladimir Sementsov-Ogievskiy *task = (BlockCopyTask) {
2154ce5dd3eSVladimir Sementsov-Ogievskiy .task.func = block_copy_task_entry,
2161348a657SVladimir Sementsov-Ogievskiy .s = s,
2174ce5dd3eSVladimir Sementsov-Ogievskiy .call_state = call_state,
21805d5e12bSPaolo Bonzini .method = s->method,
2191348a657SVladimir Sementsov-Ogievskiy };
220d088e6a4SVladimir Sementsov-Ogievskiy reqlist_init_req(&s->reqs, &task->req, offset, bytes);
221f13e60a9SVladimir Sementsov-Ogievskiy
222f13e60a9SVladimir Sementsov-Ogievskiy return task;
223a6ffe199SVladimir Sementsov-Ogievskiy }
224a6ffe199SVladimir Sementsov-Ogievskiy
2255332e5d2SVladimir Sementsov-Ogievskiy /*
226e9407785SVladimir Sementsov-Ogievskiy * block_copy_task_shrink
2275332e5d2SVladimir Sementsov-Ogievskiy *
228e9407785SVladimir Sementsov-Ogievskiy * Drop the tail of the task to be handled later. Set dirty bits back and
229e9407785SVladimir Sementsov-Ogievskiy * wake up all tasks waiting for us (may be some of them are not intersecting
230e9407785SVladimir Sementsov-Ogievskiy * with shrunk task)
2315332e5d2SVladimir Sementsov-Ogievskiy */
block_copy_task_shrink(BlockCopyTask * task,int64_t new_bytes)2321348a657SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
233e9407785SVladimir Sementsov-Ogievskiy int64_t new_bytes)
234a6ffe199SVladimir Sementsov-Ogievskiy {
235d0c389d2SEmanuele Giuseppe Esposito QEMU_LOCK_GUARD(&task->s->lock);
236d088e6a4SVladimir Sementsov-Ogievskiy if (new_bytes == task->req.bytes) {
2375332e5d2SVladimir Sementsov-Ogievskiy return;
2385332e5d2SVladimir Sementsov-Ogievskiy }
2395332e5d2SVladimir Sementsov-Ogievskiy
240d088e6a4SVladimir Sementsov-Ogievskiy assert(new_bytes > 0 && new_bytes < task->req.bytes);
2415332e5d2SVladimir Sementsov-Ogievskiy
242d088e6a4SVladimir Sementsov-Ogievskiy task->s->in_flight_bytes -= task->req.bytes - new_bytes;
2431348a657SVladimir Sementsov-Ogievskiy bdrv_set_dirty_bitmap(task->s->copy_bitmap,
244d088e6a4SVladimir Sementsov-Ogievskiy task->req.offset + new_bytes,
245d088e6a4SVladimir Sementsov-Ogievskiy task->req.bytes - new_bytes);
2465332e5d2SVladimir Sementsov-Ogievskiy
247d088e6a4SVladimir Sementsov-Ogievskiy reqlist_shrink_req(&task->req, new_bytes);
2485332e5d2SVladimir Sementsov-Ogievskiy }
2495332e5d2SVladimir Sementsov-Ogievskiy
block_copy_task_end(BlockCopyTask * task,int ret)2501348a657SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
2515332e5d2SVladimir Sementsov-Ogievskiy {
252d0c389d2SEmanuele Giuseppe Esposito QEMU_LOCK_GUARD(&task->s->lock);
253d088e6a4SVladimir Sementsov-Ogievskiy task->s->in_flight_bytes -= task->req.bytes;
2545332e5d2SVladimir Sementsov-Ogievskiy if (ret < 0) {
255d088e6a4SVladimir Sementsov-Ogievskiy bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->req.offset,
256d088e6a4SVladimir Sementsov-Ogievskiy task->req.bytes);
2575332e5d2SVladimir Sementsov-Ogievskiy }
258201b4bb6SVladimir Sementsov-Ogievskiy if (task->s->progress) {
259e3dd339fSEmanuele Giuseppe Esposito progress_set_remaining(task->s->progress,
260e3dd339fSEmanuele Giuseppe Esposito bdrv_get_dirty_count(task->s->copy_bitmap) +
261e3dd339fSEmanuele Giuseppe Esposito task->s->in_flight_bytes);
262201b4bb6SVladimir Sementsov-Ogievskiy }
263d088e6a4SVladimir Sementsov-Ogievskiy reqlist_remove_req(&task->req);
264a6ffe199SVladimir Sementsov-Ogievskiy }
265a6ffe199SVladimir Sementsov-Ogievskiy
block_copy_state_free(BlockCopyState * s)266beb5f545SVladimir Sementsov-Ogievskiy void block_copy_state_free(BlockCopyState *s)
267beb5f545SVladimir Sementsov-Ogievskiy {
268beb5f545SVladimir Sementsov-Ogievskiy if (!s) {
269beb5f545SVladimir Sementsov-Ogievskiy return;
270beb5f545SVladimir Sementsov-Ogievskiy }
271beb5f545SVladimir Sementsov-Ogievskiy
2724951967dSPaolo Bonzini ratelimit_destroy(&s->rate_limit);
2735deb6cbdSVladimir Sementsov-Ogievskiy bdrv_release_dirty_bitmap(s->copy_bitmap);
2747f739d0eSVladimir Sementsov-Ogievskiy shres_destroy(s->mem);
275beb5f545SVladimir Sementsov-Ogievskiy g_free(s);
276beb5f545SVladimir Sementsov-Ogievskiy }
277beb5f545SVladimir Sementsov-Ogievskiy
block_copy_max_transfer(BdrvChild * source,BdrvChild * target)2789d31bc53SVladimir Sementsov-Ogievskiy static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
2799d31bc53SVladimir Sementsov-Ogievskiy {
2809d31bc53SVladimir Sementsov-Ogievskiy return MIN_NON_ZERO(INT_MAX,
2819d31bc53SVladimir Sementsov-Ogievskiy MIN_NON_ZERO(source->bs->bl.max_transfer,
2829d31bc53SVladimir Sementsov-Ogievskiy target->bs->bl.max_transfer));
2839d31bc53SVladimir Sementsov-Ogievskiy }
2849d31bc53SVladimir Sementsov-Ogievskiy
block_copy_set_copy_opts(BlockCopyState * s,bool use_copy_range,bool compress)285f8b9504bSVladimir Sementsov-Ogievskiy void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
286f8b9504bSVladimir Sementsov-Ogievskiy bool compress)
287f8b9504bSVladimir Sementsov-Ogievskiy {
288f8b9504bSVladimir Sementsov-Ogievskiy /* Keep BDRV_REQ_SERIALISING set (or not set) in block_copy_state_new() */
289f8b9504bSVladimir Sementsov-Ogievskiy s->write_flags = (s->write_flags & BDRV_REQ_SERIALISING) |
290f8b9504bSVladimir Sementsov-Ogievskiy (compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
291f8b9504bSVladimir Sementsov-Ogievskiy
292f8b9504bSVladimir Sementsov-Ogievskiy if (s->max_transfer < s->cluster_size) {
293f8b9504bSVladimir Sementsov-Ogievskiy /*
294f8b9504bSVladimir Sementsov-Ogievskiy * copy_range does not respect max_transfer. We don't want to bother
295f8b9504bSVladimir Sementsov-Ogievskiy * with requests smaller than block-copy cluster size, so fallback to
296f8b9504bSVladimir Sementsov-Ogievskiy * buffered copying (read and write respect max_transfer on their
297f8b9504bSVladimir Sementsov-Ogievskiy * behalf).
298f8b9504bSVladimir Sementsov-Ogievskiy */
299f8b9504bSVladimir Sementsov-Ogievskiy s->method = COPY_READ_WRITE_CLUSTER;
300f8b9504bSVladimir Sementsov-Ogievskiy } else if (compress) {
301f8b9504bSVladimir Sementsov-Ogievskiy /* Compression supports only cluster-size writes and no copy-range. */
302f8b9504bSVladimir Sementsov-Ogievskiy s->method = COPY_READ_WRITE_CLUSTER;
303f8b9504bSVladimir Sementsov-Ogievskiy } else {
304f8b9504bSVladimir Sementsov-Ogievskiy /*
305f8b9504bSVladimir Sementsov-Ogievskiy * If copy range enabled, start with COPY_RANGE_SMALL, until first
306f8b9504bSVladimir Sementsov-Ogievskiy * successful copy_range (look at block_copy_do_copy).
307f8b9504bSVladimir Sementsov-Ogievskiy */
308f8b9504bSVladimir Sementsov-Ogievskiy s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
309f8b9504bSVladimir Sementsov-Ogievskiy }
310f8b9504bSVladimir Sementsov-Ogievskiy }
311f8b9504bSVladimir Sementsov-Ogievskiy
block_copy_calculate_cluster_size(BlockDriverState * target,int64_t min_cluster_size,Error ** errp)312b518e9e9SVladimir Sementsov-Ogievskiy static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
3139484ad6cSFiona Ebner int64_t min_cluster_size,
314b518e9e9SVladimir Sementsov-Ogievskiy Error **errp)
315b518e9e9SVladimir Sementsov-Ogievskiy {
316b518e9e9SVladimir Sementsov-Ogievskiy int ret;
317b518e9e9SVladimir Sementsov-Ogievskiy BlockDriverInfo bdi;
318ad74751fSKevin Wolf bool target_does_cow;
319ad74751fSKevin Wolf
320ad74751fSKevin Wolf GLOBAL_STATE_CODE();
321ad74751fSKevin Wolf GRAPH_RDLOCK_GUARD_MAINLOOP();
322ad74751fSKevin Wolf
3239484ad6cSFiona Ebner min_cluster_size = MAX(min_cluster_size,
3249484ad6cSFiona Ebner (int64_t)BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
3259484ad6cSFiona Ebner
326ad74751fSKevin Wolf target_does_cow = bdrv_backing_chain_next(target);
327b518e9e9SVladimir Sementsov-Ogievskiy
328b518e9e9SVladimir Sementsov-Ogievskiy /*
329b518e9e9SVladimir Sementsov-Ogievskiy * If there is no backing file on the target, we cannot rely on COW if our
330b518e9e9SVladimir Sementsov-Ogievskiy * backup cluster size is smaller than the target cluster size. Even for
331b518e9e9SVladimir Sementsov-Ogievskiy * targets with a backing file, try to avoid COW if possible.
332b518e9e9SVladimir Sementsov-Ogievskiy */
333b518e9e9SVladimir Sementsov-Ogievskiy ret = bdrv_get_info(target, &bdi);
334b518e9e9SVladimir Sementsov-Ogievskiy if (ret == -ENOTSUP && !target_does_cow) {
335b518e9e9SVladimir Sementsov-Ogievskiy /* Cluster size is not defined */
3369484ad6cSFiona Ebner warn_report("The target block device doesn't provide information about "
3379484ad6cSFiona Ebner "the block size and it doesn't have a backing file. The "
3389484ad6cSFiona Ebner "(default) block size of %" PRIi64 " bytes is used. If the "
3399484ad6cSFiona Ebner "actual block size of the target exceeds this value, the "
3409484ad6cSFiona Ebner "backup may be unusable",
3419484ad6cSFiona Ebner min_cluster_size);
3429484ad6cSFiona Ebner return min_cluster_size;
343b518e9e9SVladimir Sementsov-Ogievskiy } else if (ret < 0 && !target_does_cow) {
344b518e9e9SVladimir Sementsov-Ogievskiy error_setg_errno(errp, -ret,
345b518e9e9SVladimir Sementsov-Ogievskiy "Couldn't determine the cluster size of the target image, "
346b518e9e9SVladimir Sementsov-Ogievskiy "which has no backing file");
347b518e9e9SVladimir Sementsov-Ogievskiy error_append_hint(errp,
348b518e9e9SVladimir Sementsov-Ogievskiy "Aborting, since this may create an unusable destination image\n");
349b518e9e9SVladimir Sementsov-Ogievskiy return ret;
350b518e9e9SVladimir Sementsov-Ogievskiy } else if (ret < 0 && target_does_cow) {
351b518e9e9SVladimir Sementsov-Ogievskiy /* Not fatal; just trudge on ahead. */
3529484ad6cSFiona Ebner return min_cluster_size;
353b518e9e9SVladimir Sementsov-Ogievskiy }
354b518e9e9SVladimir Sementsov-Ogievskiy
3559484ad6cSFiona Ebner return MAX(min_cluster_size, bdi.cluster_size);
356b518e9e9SVladimir Sementsov-Ogievskiy }
357b518e9e9SVladimir Sementsov-Ogievskiy
block_copy_state_new(BdrvChild * source,BdrvChild * target,BlockDriverState * copy_bitmap_bs,const BdrvDirtyBitmap * bitmap,bool discard_source,uint64_t min_cluster_size,Error ** errp)35800e30f05SVladimir Sementsov-Ogievskiy BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
359006e845bSVladimir Sementsov-Ogievskiy BlockDriverState *copy_bitmap_bs,
3601f7252e8SVladimir Sementsov-Ogievskiy const BdrvDirtyBitmap *bitmap,
3610fd05c8dSVladimir Sementsov-Ogievskiy bool discard_source,
3629484ad6cSFiona Ebner uint64_t min_cluster_size,
363abde8ac2SVladimir Sementsov-Ogievskiy Error **errp)
364beb5f545SVladimir Sementsov-Ogievskiy {
3651f7252e8SVladimir Sementsov-Ogievskiy ERRP_GUARD();
366beb5f545SVladimir Sementsov-Ogievskiy BlockCopyState *s;
367b518e9e9SVladimir Sementsov-Ogievskiy int64_t cluster_size;
368beb5f545SVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *copy_bitmap;
36949577723SVladimir Sementsov-Ogievskiy bool is_fleecing;
370beb5f545SVladimir Sementsov-Ogievskiy
371ad74751fSKevin Wolf GLOBAL_STATE_CODE();
372ad74751fSKevin Wolf
3739484ad6cSFiona Ebner if (min_cluster_size > INT64_MAX) {
3749484ad6cSFiona Ebner error_setg(errp, "min-cluster-size too large: %" PRIu64 " > %" PRIi64,
3759484ad6cSFiona Ebner min_cluster_size, INT64_MAX);
3769484ad6cSFiona Ebner return NULL;
3779484ad6cSFiona Ebner } else if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
3789484ad6cSFiona Ebner error_setg(errp, "min-cluster-size needs to be a power of 2");
3799484ad6cSFiona Ebner return NULL;
3809484ad6cSFiona Ebner }
3819484ad6cSFiona Ebner
3829484ad6cSFiona Ebner cluster_size = block_copy_calculate_cluster_size(target->bs,
3839484ad6cSFiona Ebner (int64_t)min_cluster_size,
3849484ad6cSFiona Ebner errp);
385b518e9e9SVladimir Sementsov-Ogievskiy if (cluster_size < 0) {
386b518e9e9SVladimir Sementsov-Ogievskiy return NULL;
387b518e9e9SVladimir Sementsov-Ogievskiy }
388b518e9e9SVladimir Sementsov-Ogievskiy
389006e845bSVladimir Sementsov-Ogievskiy copy_bitmap = bdrv_create_dirty_bitmap(copy_bitmap_bs, cluster_size, NULL,
39000e30f05SVladimir Sementsov-Ogievskiy errp);
391beb5f545SVladimir Sementsov-Ogievskiy if (!copy_bitmap) {
392beb5f545SVladimir Sementsov-Ogievskiy return NULL;
393beb5f545SVladimir Sementsov-Ogievskiy }
394beb5f545SVladimir Sementsov-Ogievskiy bdrv_disable_dirty_bitmap(copy_bitmap);
3951f7252e8SVladimir Sementsov-Ogievskiy if (bitmap) {
3961f7252e8SVladimir Sementsov-Ogievskiy if (!bdrv_merge_dirty_bitmap(copy_bitmap, bitmap, NULL, errp)) {
3971f7252e8SVladimir Sementsov-Ogievskiy error_prepend(errp, "Failed to merge bitmap '%s' to internal "
3981f7252e8SVladimir Sementsov-Ogievskiy "copy-bitmap: ", bdrv_dirty_bitmap_name(bitmap));
3991f7252e8SVladimir Sementsov-Ogievskiy bdrv_release_dirty_bitmap(copy_bitmap);
4001f7252e8SVladimir Sementsov-Ogievskiy return NULL;
4011f7252e8SVladimir Sementsov-Ogievskiy }
4021f7252e8SVladimir Sementsov-Ogievskiy } else {
4031f7252e8SVladimir Sementsov-Ogievskiy bdrv_set_dirty_bitmap(copy_bitmap, 0,
4041f7252e8SVladimir Sementsov-Ogievskiy bdrv_dirty_bitmap_size(copy_bitmap));
4051f7252e8SVladimir Sementsov-Ogievskiy }
406beb5f545SVladimir Sementsov-Ogievskiy
40749577723SVladimir Sementsov-Ogievskiy /*
40849577723SVladimir Sementsov-Ogievskiy * If source is in backing chain of target assume that target is going to be
40949577723SVladimir Sementsov-Ogievskiy * used for "image fleecing", i.e. it should represent a kind of snapshot of
41049577723SVladimir Sementsov-Ogievskiy * source at backup-start point in time. And target is going to be read by
41149577723SVladimir Sementsov-Ogievskiy * somebody (for example, used as NBD export) during backup job.
41249577723SVladimir Sementsov-Ogievskiy *
41349577723SVladimir Sementsov-Ogievskiy * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
41449577723SVladimir Sementsov-Ogievskiy * intersection of backup writes and third party reads from target,
41549577723SVladimir Sementsov-Ogievskiy * otherwise reading from target we may occasionally read already updated by
41649577723SVladimir Sementsov-Ogievskiy * guest data.
41749577723SVladimir Sementsov-Ogievskiy *
41849577723SVladimir Sementsov-Ogievskiy * For more information see commit f8d59dfb40bb and test
41949577723SVladimir Sementsov-Ogievskiy * tests/qemu-iotests/222
42049577723SVladimir Sementsov-Ogievskiy */
42179bb7627SKevin Wolf bdrv_graph_rdlock_main_loop();
42249577723SVladimir Sementsov-Ogievskiy is_fleecing = bdrv_chain_contains(target->bs, source->bs);
42379bb7627SKevin Wolf bdrv_graph_rdunlock_main_loop();
42449577723SVladimir Sementsov-Ogievskiy
425beb5f545SVladimir Sementsov-Ogievskiy s = g_new(BlockCopyState, 1);
426beb5f545SVladimir Sementsov-Ogievskiy *s = (BlockCopyState) {
42700e30f05SVladimir Sementsov-Ogievskiy .source = source,
42800e30f05SVladimir Sementsov-Ogievskiy .target = target,
429beb5f545SVladimir Sementsov-Ogievskiy .copy_bitmap = copy_bitmap,
430beb5f545SVladimir Sementsov-Ogievskiy .cluster_size = cluster_size,
431beb5f545SVladimir Sementsov-Ogievskiy .len = bdrv_dirty_bitmap_size(copy_bitmap),
432f8b9504bSVladimir Sementsov-Ogievskiy .write_flags = (is_fleecing ? BDRV_REQ_SERIALISING : 0),
4337f739d0eSVladimir Sementsov-Ogievskiy .mem = shres_create(BLOCK_COPY_MAX_MEM),
43405d5e12bSPaolo Bonzini .max_transfer = QEMU_ALIGN_DOWN(
43505d5e12bSPaolo Bonzini block_copy_max_transfer(source, target),
43605d5e12bSPaolo Bonzini cluster_size),
437beb5f545SVladimir Sementsov-Ogievskiy };
438beb5f545SVladimir Sementsov-Ogievskiy
4390fd05c8dSVladimir Sementsov-Ogievskiy s->discard_source = discard_source;
440abde8ac2SVladimir Sementsov-Ogievskiy block_copy_set_copy_opts(s, false, false);
441beb5f545SVladimir Sementsov-Ogievskiy
4424951967dSPaolo Bonzini ratelimit_init(&s->rate_limit);
443d0c389d2SEmanuele Giuseppe Esposito qemu_co_mutex_init(&s->lock);
444d088e6a4SVladimir Sementsov-Ogievskiy QLIST_INIT(&s->reqs);
4452e099a9dSVladimir Sementsov-Ogievskiy QLIST_INIT(&s->calls);
446a6ffe199SVladimir Sementsov-Ogievskiy
447beb5f545SVladimir Sementsov-Ogievskiy return s;
448beb5f545SVladimir Sementsov-Ogievskiy }
449beb5f545SVladimir Sementsov-Ogievskiy
450d0c389d2SEmanuele Giuseppe Esposito /* Only set before running the job, no need for locking. */
block_copy_set_progress_meter(BlockCopyState * s,ProgressMeter * pm)451d0ebeca1SVladimir Sementsov-Ogievskiy void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
452d0ebeca1SVladimir Sementsov-Ogievskiy {
453d0ebeca1SVladimir Sementsov-Ogievskiy s->progress = pm;
454d0ebeca1SVladimir Sementsov-Ogievskiy }
455d0ebeca1SVladimir Sementsov-Ogievskiy
456beb5f545SVladimir Sementsov-Ogievskiy /*
4574ce5dd3eSVladimir Sementsov-Ogievskiy * Takes ownership of @task
4584ce5dd3eSVladimir Sementsov-Ogievskiy *
4594ce5dd3eSVladimir Sementsov-Ogievskiy * If pool is NULL directly run the task, otherwise schedule it into the pool.
4604ce5dd3eSVladimir Sementsov-Ogievskiy *
4614ce5dd3eSVladimir Sementsov-Ogievskiy * Returns: task.func return code if pool is NULL
4624ce5dd3eSVladimir Sementsov-Ogievskiy * otherwise -ECANCELED if pool status is bad
4634ce5dd3eSVladimir Sementsov-Ogievskiy * otherwise 0 (successfully scheduled)
4644ce5dd3eSVladimir Sementsov-Ogievskiy */
block_copy_task_run(AioTaskPool * pool,BlockCopyTask * task)4654ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
4664ce5dd3eSVladimir Sementsov-Ogievskiy BlockCopyTask *task)
4674ce5dd3eSVladimir Sementsov-Ogievskiy {
4684ce5dd3eSVladimir Sementsov-Ogievskiy if (!pool) {
4694ce5dd3eSVladimir Sementsov-Ogievskiy int ret = task->task.func(&task->task);
4704ce5dd3eSVladimir Sementsov-Ogievskiy
4714ce5dd3eSVladimir Sementsov-Ogievskiy g_free(task);
4724ce5dd3eSVladimir Sementsov-Ogievskiy return ret;
4734ce5dd3eSVladimir Sementsov-Ogievskiy }
4744ce5dd3eSVladimir Sementsov-Ogievskiy
4754ce5dd3eSVladimir Sementsov-Ogievskiy aio_task_pool_wait_slot(pool);
4764ce5dd3eSVladimir Sementsov-Ogievskiy if (aio_task_pool_status(pool) < 0) {
477d088e6a4SVladimir Sementsov-Ogievskiy co_put_to_shres(task->s->mem, task->req.bytes);
4784ce5dd3eSVladimir Sementsov-Ogievskiy block_copy_task_end(task, -ECANCELED);
4794ce5dd3eSVladimir Sementsov-Ogievskiy g_free(task);
4804ce5dd3eSVladimir Sementsov-Ogievskiy return -ECANCELED;
4814ce5dd3eSVladimir Sementsov-Ogievskiy }
4824ce5dd3eSVladimir Sementsov-Ogievskiy
4834ce5dd3eSVladimir Sementsov-Ogievskiy aio_task_pool_start_task(pool, &task->task);
4844ce5dd3eSVladimir Sementsov-Ogievskiy
4854ce5dd3eSVladimir Sementsov-Ogievskiy return 0;
4864ce5dd3eSVladimir Sementsov-Ogievskiy }
4874ce5dd3eSVladimir Sementsov-Ogievskiy
4884ce5dd3eSVladimir Sementsov-Ogievskiy /*
489e332a726SVladimir Sementsov-Ogievskiy * block_copy_do_copy
490e332a726SVladimir Sementsov-Ogievskiy *
491dafaf135SVladimir Sementsov-Ogievskiy * Do copy of cluster-aligned chunk. Requested region is allowed to exceed
492dafaf135SVladimir Sementsov-Ogievskiy * s->len only to cover last cluster when s->len is not aligned to clusters.
493e332a726SVladimir Sementsov-Ogievskiy *
4943202d8e4SMichael Tokarev * No sync here: neither bitmap nor intersecting requests handling, only copy.
495e332a726SVladimir Sementsov-Ogievskiy *
49605d5e12bSPaolo Bonzini * @method is an in-out argument, so that copy_range can be either extended to
49705d5e12bSPaolo Bonzini * a full-size buffer or disabled if the copy_range attempt fails. The output
49805d5e12bSPaolo Bonzini * value of @method should be used for subsequent tasks.
499e332a726SVladimir Sementsov-Ogievskiy * Returns 0 on success.
500beb5f545SVladimir Sementsov-Ogievskiy */
501abaf8b75SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_do_copy(BlockCopyState * s,int64_t offset,int64_t bytes,BlockCopyMethod * method,bool * error_is_read)502abaf8b75SKevin Wolf block_copy_do_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
503abaf8b75SKevin Wolf BlockCopyMethod *method, bool *error_is_read)
504beb5f545SVladimir Sementsov-Ogievskiy {
505beb5f545SVladimir Sementsov-Ogievskiy int ret;
5068719091fSVladimir Sementsov-Ogievskiy int64_t nbytes = MIN(offset + bytes, s->len) - offset;
507e332a726SVladimir Sementsov-Ogievskiy void *bounce_buffer = NULL;
508beb5f545SVladimir Sementsov-Ogievskiy
5098719091fSVladimir Sementsov-Ogievskiy assert(offset >= 0 && bytes > 0 && INT64_MAX - offset >= bytes);
5108719091fSVladimir Sementsov-Ogievskiy assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
511dafaf135SVladimir Sementsov-Ogievskiy assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
5128719091fSVladimir Sementsov-Ogievskiy assert(offset < s->len);
5138719091fSVladimir Sementsov-Ogievskiy assert(offset + bytes <= s->len ||
5148719091fSVladimir Sementsov-Ogievskiy offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
515dafaf135SVladimir Sementsov-Ogievskiy assert(nbytes < INT_MAX);
516e332a726SVladimir Sementsov-Ogievskiy
51705d5e12bSPaolo Bonzini switch (*method) {
51805d5e12bSPaolo Bonzini case COPY_WRITE_ZEROES:
5198719091fSVladimir Sementsov-Ogievskiy ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
5202d57511aSVladimir Sementsov-Ogievskiy ~BDRV_REQ_WRITE_COMPRESSED);
5212d57511aSVladimir Sementsov-Ogievskiy if (ret < 0) {
5228719091fSVladimir Sementsov-Ogievskiy trace_block_copy_write_zeroes_fail(s, offset, ret);
5232d57511aSVladimir Sementsov-Ogievskiy *error_is_read = false;
5242d57511aSVladimir Sementsov-Ogievskiy }
5252d57511aSVladimir Sementsov-Ogievskiy return ret;
5262d57511aSVladimir Sementsov-Ogievskiy
52705d5e12bSPaolo Bonzini case COPY_RANGE_SMALL:
52805d5e12bSPaolo Bonzini case COPY_RANGE_FULL:
5298719091fSVladimir Sementsov-Ogievskiy ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
530e332a726SVladimir Sementsov-Ogievskiy 0, s->write_flags);
53105d5e12bSPaolo Bonzini if (ret >= 0) {
53205d5e12bSPaolo Bonzini /* Successful copy-range, increase chunk size. */
53305d5e12bSPaolo Bonzini *method = COPY_RANGE_FULL;
534bed95234SVladimir Sementsov-Ogievskiy return 0;
535e332a726SVladimir Sementsov-Ogievskiy }
536e332a726SVladimir Sementsov-Ogievskiy
53705d5e12bSPaolo Bonzini trace_block_copy_copy_range_fail(s, offset, ret);
53805d5e12bSPaolo Bonzini *method = COPY_READ_WRITE;
53905d5e12bSPaolo Bonzini /* Fall through to read+write with allocated buffer */
54005d5e12bSPaolo Bonzini
54105d5e12bSPaolo Bonzini case COPY_READ_WRITE_CLUSTER:
54205d5e12bSPaolo Bonzini case COPY_READ_WRITE:
5430e240245SVladimir Sementsov-Ogievskiy /*
54405d5e12bSPaolo Bonzini * In case of failed copy_range request above, we may proceed with
54505d5e12bSPaolo Bonzini * buffered request larger than BLOCK_COPY_MAX_BUFFER.
54605d5e12bSPaolo Bonzini * Still, further requests will be properly limited, so don't care too
54705d5e12bSPaolo Bonzini * much. Moreover the most likely case (copy_range is unsupported for
54805d5e12bSPaolo Bonzini * the configuration, so the very first copy_range request fails)
54905d5e12bSPaolo Bonzini * is handled by setting large copy_size only after first successful
55005d5e12bSPaolo Bonzini * copy_range.
5510e240245SVladimir Sementsov-Ogievskiy */
5520e240245SVladimir Sementsov-Ogievskiy
553e332a726SVladimir Sementsov-Ogievskiy bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
554beb5f545SVladimir Sementsov-Ogievskiy
5558719091fSVladimir Sementsov-Ogievskiy ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
556beb5f545SVladimir Sementsov-Ogievskiy if (ret < 0) {
5578719091fSVladimir Sementsov-Ogievskiy trace_block_copy_read_fail(s, offset, ret);
558beb5f545SVladimir Sementsov-Ogievskiy *error_is_read = true;
559e332a726SVladimir Sementsov-Ogievskiy goto out;
560beb5f545SVladimir Sementsov-Ogievskiy }
561beb5f545SVladimir Sementsov-Ogievskiy
5628719091fSVladimir Sementsov-Ogievskiy ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
563beb5f545SVladimir Sementsov-Ogievskiy s->write_flags);
564beb5f545SVladimir Sementsov-Ogievskiy if (ret < 0) {
5658719091fSVladimir Sementsov-Ogievskiy trace_block_copy_write_fail(s, offset, ret);
566beb5f545SVladimir Sementsov-Ogievskiy *error_is_read = false;
567e332a726SVladimir Sementsov-Ogievskiy goto out;
568beb5f545SVladimir Sementsov-Ogievskiy }
569beb5f545SVladimir Sementsov-Ogievskiy
570e332a726SVladimir Sementsov-Ogievskiy out:
5713816edd2SVladimir Sementsov-Ogievskiy qemu_vfree(bounce_buffer);
57205d5e12bSPaolo Bonzini break;
57305d5e12bSPaolo Bonzini
57405d5e12bSPaolo Bonzini default:
57505d5e12bSPaolo Bonzini abort();
57605d5e12bSPaolo Bonzini }
5773816edd2SVladimir Sementsov-Ogievskiy
578beb5f545SVladimir Sementsov-Ogievskiy return ret;
579beb5f545SVladimir Sementsov-Ogievskiy }
580beb5f545SVladimir Sementsov-Ogievskiy
block_copy_task_entry(AioTask * task)5814ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_entry(AioTask *task)
5824ce5dd3eSVladimir Sementsov-Ogievskiy {
5834ce5dd3eSVladimir Sementsov-Ogievskiy BlockCopyTask *t = container_of(task, BlockCopyTask, task);
584c6a3e3dfSEmanuele Giuseppe Esposito BlockCopyState *s = t->s;
585c78dd00eSPhilippe Mathieu-Daudé bool error_is_read = false;
58605d5e12bSPaolo Bonzini BlockCopyMethod method = t->method;
587*ae11f6caSMarc-André Lureau int ret = -1;
5884ce5dd3eSVladimir Sementsov-Ogievskiy
589abaf8b75SKevin Wolf WITH_GRAPH_RDLOCK_GUARD() {
590d088e6a4SVladimir Sementsov-Ogievskiy ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method,
591d088e6a4SVladimir Sementsov-Ogievskiy &error_is_read);
592abaf8b75SKevin Wolf }
593d0c389d2SEmanuele Giuseppe Esposito
594d0c389d2SEmanuele Giuseppe Esposito WITH_QEMU_LOCK_GUARD(&s->lock) {
59505d5e12bSPaolo Bonzini if (s->method == t->method) {
59605d5e12bSPaolo Bonzini s->method = method;
597bed95234SVladimir Sementsov-Ogievskiy }
598d0c389d2SEmanuele Giuseppe Esposito
5998146b357SVladimir Sementsov-Ogievskiy if (ret < 0) {
6008146b357SVladimir Sementsov-Ogievskiy if (!t->call_state->ret) {
601de4641b4SVladimir Sementsov-Ogievskiy t->call_state->ret = ret;
6024ce5dd3eSVladimir Sementsov-Ogievskiy t->call_state->error_is_read = error_is_read;
6038146b357SVladimir Sementsov-Ogievskiy }
604201b4bb6SVladimir Sementsov-Ogievskiy } else if (s->progress) {
605d088e6a4SVladimir Sementsov-Ogievskiy progress_work_done(s->progress, t->req.bytes);
606d51590fcSVladimir Sementsov-Ogievskiy }
607d0c389d2SEmanuele Giuseppe Esposito }
608d088e6a4SVladimir Sementsov-Ogievskiy co_put_to_shres(s->mem, t->req.bytes);
6094ce5dd3eSVladimir Sementsov-Ogievskiy block_copy_task_end(t, ret);
6104ce5dd3eSVladimir Sementsov-Ogievskiy
6110fd05c8dSVladimir Sementsov-Ogievskiy if (s->discard_source && ret == 0) {
6120fd05c8dSVladimir Sementsov-Ogievskiy int64_t nbytes =
6130fd05c8dSVladimir Sementsov-Ogievskiy MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
614d5f6cbb2SKevin Wolf WITH_GRAPH_RDLOCK_GUARD() {
6150fd05c8dSVladimir Sementsov-Ogievskiy bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
6160fd05c8dSVladimir Sementsov-Ogievskiy }
617d5f6cbb2SKevin Wolf }
6180fd05c8dSVladimir Sementsov-Ogievskiy
6194ce5dd3eSVladimir Sementsov-Ogievskiy return ret;
6204ce5dd3eSVladimir Sementsov-Ogievskiy }
6214ce5dd3eSVladimir Sementsov-Ogievskiy
6227ff9579eSKevin Wolf static coroutine_fn GRAPH_RDLOCK
block_copy_block_status(BlockCopyState * s,int64_t offset,int64_t bytes,int64_t * pnum)6237ff9579eSKevin Wolf int block_copy_block_status(BlockCopyState *s, int64_t offset, int64_t bytes,
6247ff9579eSKevin Wolf int64_t *pnum)
6252d57511aSVladimir Sementsov-Ogievskiy {
6262d57511aSVladimir Sementsov-Ogievskiy int64_t num;
6272d57511aSVladimir Sementsov-Ogievskiy BlockDriverState *base;
6282d57511aSVladimir Sementsov-Ogievskiy int ret;
6292d57511aSVladimir Sementsov-Ogievskiy
630d0c389d2SEmanuele Giuseppe Esposito if (qatomic_read(&s->skip_unallocated)) {
631c6f6d846SMax Reitz base = bdrv_backing_chain_next(s->source->bs);
6322d57511aSVladimir Sementsov-Ogievskiy } else {
6332d57511aSVladimir Sementsov-Ogievskiy base = NULL;
6342d57511aSVladimir Sementsov-Ogievskiy }
6352d57511aSVladimir Sementsov-Ogievskiy
63643a0d4f0SEmanuele Giuseppe Esposito ret = bdrv_co_block_status_above(s->source->bs, base, offset, bytes, &num,
6372d57511aSVladimir Sementsov-Ogievskiy NULL, NULL);
6382d57511aSVladimir Sementsov-Ogievskiy if (ret < 0 || num < s->cluster_size) {
6392d57511aSVladimir Sementsov-Ogievskiy /*
6402d57511aSVladimir Sementsov-Ogievskiy * On error or if failed to obtain large enough chunk just fallback to
6412d57511aSVladimir Sementsov-Ogievskiy * copy one cluster.
6422d57511aSVladimir Sementsov-Ogievskiy */
6432d57511aSVladimir Sementsov-Ogievskiy num = s->cluster_size;
6442d57511aSVladimir Sementsov-Ogievskiy ret = BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_DATA;
6452d57511aSVladimir Sementsov-Ogievskiy } else if (offset + num == s->len) {
6462d57511aSVladimir Sementsov-Ogievskiy num = QEMU_ALIGN_UP(num, s->cluster_size);
6472d57511aSVladimir Sementsov-Ogievskiy } else {
6482d57511aSVladimir Sementsov-Ogievskiy num = QEMU_ALIGN_DOWN(num, s->cluster_size);
6492d57511aSVladimir Sementsov-Ogievskiy }
6502d57511aSVladimir Sementsov-Ogievskiy
6512d57511aSVladimir Sementsov-Ogievskiy *pnum = num;
6522d57511aSVladimir Sementsov-Ogievskiy return ret;
6532d57511aSVladimir Sementsov-Ogievskiy }
6542d57511aSVladimir Sementsov-Ogievskiy
655beb5f545SVladimir Sementsov-Ogievskiy /*
656beb5f545SVladimir Sementsov-Ogievskiy * Check if the cluster starting at offset is allocated or not.
657beb5f545SVladimir Sementsov-Ogievskiy * return via pnum the number of contiguous clusters sharing this allocation.
658beb5f545SVladimir Sementsov-Ogievskiy */
6597ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_is_cluster_allocated(BlockCopyState * s,int64_t offset,int64_t * pnum)6607ff9579eSKevin Wolf block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
661beb5f545SVladimir Sementsov-Ogievskiy int64_t *pnum)
662beb5f545SVladimir Sementsov-Ogievskiy {
66300e30f05SVladimir Sementsov-Ogievskiy BlockDriverState *bs = s->source->bs;
664beb5f545SVladimir Sementsov-Ogievskiy int64_t count, total_count = 0;
665beb5f545SVladimir Sementsov-Ogievskiy int64_t bytes = s->len - offset;
666beb5f545SVladimir Sementsov-Ogievskiy int ret;
667beb5f545SVladimir Sementsov-Ogievskiy
668beb5f545SVladimir Sementsov-Ogievskiy assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
669beb5f545SVladimir Sementsov-Ogievskiy
670beb5f545SVladimir Sementsov-Ogievskiy while (true) {
6717ff9579eSKevin Wolf /* protected in backup_run() */
67243a0d4f0SEmanuele Giuseppe Esposito ret = bdrv_co_is_allocated(bs, offset, bytes, &count);
673beb5f545SVladimir Sementsov-Ogievskiy if (ret < 0) {
674beb5f545SVladimir Sementsov-Ogievskiy return ret;
675beb5f545SVladimir Sementsov-Ogievskiy }
676beb5f545SVladimir Sementsov-Ogievskiy
677beb5f545SVladimir Sementsov-Ogievskiy total_count += count;
678beb5f545SVladimir Sementsov-Ogievskiy
679beb5f545SVladimir Sementsov-Ogievskiy if (ret || count == 0) {
680beb5f545SVladimir Sementsov-Ogievskiy /*
681beb5f545SVladimir Sementsov-Ogievskiy * ret: partial segment(s) are considered allocated.
682beb5f545SVladimir Sementsov-Ogievskiy * otherwise: unallocated tail is treated as an entire segment.
683beb5f545SVladimir Sementsov-Ogievskiy */
684beb5f545SVladimir Sementsov-Ogievskiy *pnum = DIV_ROUND_UP(total_count, s->cluster_size);
685beb5f545SVladimir Sementsov-Ogievskiy return ret;
686beb5f545SVladimir Sementsov-Ogievskiy }
687beb5f545SVladimir Sementsov-Ogievskiy
688beb5f545SVladimir Sementsov-Ogievskiy /* Unallocated segment(s) with uncertain following segment(s) */
689beb5f545SVladimir Sementsov-Ogievskiy if (total_count >= s->cluster_size) {
690beb5f545SVladimir Sementsov-Ogievskiy *pnum = total_count / s->cluster_size;
691beb5f545SVladimir Sementsov-Ogievskiy return 0;
692beb5f545SVladimir Sementsov-Ogievskiy }
693beb5f545SVladimir Sementsov-Ogievskiy
694beb5f545SVladimir Sementsov-Ogievskiy offset += count;
695beb5f545SVladimir Sementsov-Ogievskiy bytes -= count;
696beb5f545SVladimir Sementsov-Ogievskiy }
697beb5f545SVladimir Sementsov-Ogievskiy }
698beb5f545SVladimir Sementsov-Ogievskiy
block_copy_reset(BlockCopyState * s,int64_t offset,int64_t bytes)699177541e6SVladimir Sementsov-Ogievskiy void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes)
700177541e6SVladimir Sementsov-Ogievskiy {
701177541e6SVladimir Sementsov-Ogievskiy QEMU_LOCK_GUARD(&s->lock);
702177541e6SVladimir Sementsov-Ogievskiy
703177541e6SVladimir Sementsov-Ogievskiy bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
704177541e6SVladimir Sementsov-Ogievskiy if (s->progress) {
705177541e6SVladimir Sementsov-Ogievskiy progress_set_remaining(s->progress,
706177541e6SVladimir Sementsov-Ogievskiy bdrv_get_dirty_count(s->copy_bitmap) +
707177541e6SVladimir Sementsov-Ogievskiy s->in_flight_bytes);
708177541e6SVladimir Sementsov-Ogievskiy }
709177541e6SVladimir Sementsov-Ogievskiy }
710177541e6SVladimir Sementsov-Ogievskiy
711beb5f545SVladimir Sementsov-Ogievskiy /*
712beb5f545SVladimir Sementsov-Ogievskiy * Reset bits in copy_bitmap starting at offset if they represent unallocated
713beb5f545SVladimir Sementsov-Ogievskiy * data in the image. May reset subsequent contiguous bits.
714beb5f545SVladimir Sementsov-Ogievskiy * @return 0 when the cluster at @offset was unallocated,
715beb5f545SVladimir Sementsov-Ogievskiy * 1 otherwise, and -ret on error.
716beb5f545SVladimir Sementsov-Ogievskiy */
block_copy_reset_unallocated(BlockCopyState * s,int64_t offset,int64_t * count)71743a0d4f0SEmanuele Giuseppe Esposito int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
71843a0d4f0SEmanuele Giuseppe Esposito int64_t offset,
71943a0d4f0SEmanuele Giuseppe Esposito int64_t *count)
720beb5f545SVladimir Sementsov-Ogievskiy {
721beb5f545SVladimir Sementsov-Ogievskiy int ret;
722beb5f545SVladimir Sementsov-Ogievskiy int64_t clusters, bytes;
723beb5f545SVladimir Sementsov-Ogievskiy
724beb5f545SVladimir Sementsov-Ogievskiy ret = block_copy_is_cluster_allocated(s, offset, &clusters);
725beb5f545SVladimir Sementsov-Ogievskiy if (ret < 0) {
726beb5f545SVladimir Sementsov-Ogievskiy return ret;
727beb5f545SVladimir Sementsov-Ogievskiy }
728beb5f545SVladimir Sementsov-Ogievskiy
729beb5f545SVladimir Sementsov-Ogievskiy bytes = clusters * s->cluster_size;
730beb5f545SVladimir Sementsov-Ogievskiy
731beb5f545SVladimir Sementsov-Ogievskiy if (!ret) {
732177541e6SVladimir Sementsov-Ogievskiy block_copy_reset(s, offset, bytes);
733beb5f545SVladimir Sementsov-Ogievskiy }
734beb5f545SVladimir Sementsov-Ogievskiy
735beb5f545SVladimir Sementsov-Ogievskiy *count = bytes;
736beb5f545SVladimir Sementsov-Ogievskiy return ret;
737beb5f545SVladimir Sementsov-Ogievskiy }
738beb5f545SVladimir Sementsov-Ogievskiy
7395332e5d2SVladimir Sementsov-Ogievskiy /*
7405332e5d2SVladimir Sementsov-Ogievskiy * block_copy_dirty_clusters
7415332e5d2SVladimir Sementsov-Ogievskiy *
7425332e5d2SVladimir Sementsov-Ogievskiy * Copy dirty clusters in @offset/@bytes range.
7435332e5d2SVladimir Sementsov-Ogievskiy * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
7445332e5d2SVladimir Sementsov-Ogievskiy * clusters found and -errno on failure.
7455332e5d2SVladimir Sementsov-Ogievskiy */
7467ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_dirty_clusters(BlockCopyCallState * call_state)7473b8c2329SVladimir Sementsov-Ogievskiy block_copy_dirty_clusters(BlockCopyCallState *call_state)
748beb5f545SVladimir Sementsov-Ogievskiy {
7493b8c2329SVladimir Sementsov-Ogievskiy BlockCopyState *s = call_state->s;
7503b8c2329SVladimir Sementsov-Ogievskiy int64_t offset = call_state->offset;
7513b8c2329SVladimir Sementsov-Ogievskiy int64_t bytes = call_state->bytes;
7523b8c2329SVladimir Sementsov-Ogievskiy
753beb5f545SVladimir Sementsov-Ogievskiy int ret = 0;
7545332e5d2SVladimir Sementsov-Ogievskiy bool found_dirty = false;
75542ac2144SVladimir Sementsov-Ogievskiy int64_t end = offset + bytes;
7564ce5dd3eSVladimir Sementsov-Ogievskiy AioTaskPool *aio = NULL;
757beb5f545SVladimir Sementsov-Ogievskiy
758beb5f545SVladimir Sementsov-Ogievskiy /*
759beb5f545SVladimir Sementsov-Ogievskiy * block_copy() user is responsible for keeping source and target in same
760beb5f545SVladimir Sementsov-Ogievskiy * aio context
761beb5f545SVladimir Sementsov-Ogievskiy */
76200e30f05SVladimir Sementsov-Ogievskiy assert(bdrv_get_aio_context(s->source->bs) ==
76300e30f05SVladimir Sementsov-Ogievskiy bdrv_get_aio_context(s->target->bs));
764beb5f545SVladimir Sementsov-Ogievskiy
7658719091fSVladimir Sementsov-Ogievskiy assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
766dafaf135SVladimir Sementsov-Ogievskiy assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
767beb5f545SVladimir Sementsov-Ogievskiy
768149009beSEmanuele Giuseppe Esposito while (bytes && aio_task_pool_status(aio) == 0 &&
769149009beSEmanuele Giuseppe Esposito !qatomic_read(&call_state->cancelled)) {
7704ce5dd3eSVladimir Sementsov-Ogievskiy BlockCopyTask *task;
77142ac2144SVladimir Sementsov-Ogievskiy int64_t status_bytes;
772beb5f545SVladimir Sementsov-Ogievskiy
7733b8c2329SVladimir Sementsov-Ogievskiy task = block_copy_task_create(s, call_state, offset, bytes);
77442ac2144SVladimir Sementsov-Ogievskiy if (!task) {
77542ac2144SVladimir Sementsov-Ogievskiy /* No more dirty bits in the bitmap */
77642ac2144SVladimir Sementsov-Ogievskiy trace_block_copy_skip_range(s, offset, bytes);
77742ac2144SVladimir Sementsov-Ogievskiy break;
77842ac2144SVladimir Sementsov-Ogievskiy }
779d088e6a4SVladimir Sementsov-Ogievskiy if (task->req.offset > offset) {
780d088e6a4SVladimir Sementsov-Ogievskiy trace_block_copy_skip_range(s, offset, task->req.offset - offset);
781beb5f545SVladimir Sementsov-Ogievskiy }
782beb5f545SVladimir Sementsov-Ogievskiy
7835332e5d2SVladimir Sementsov-Ogievskiy found_dirty = true;
7845332e5d2SVladimir Sementsov-Ogievskiy
785d088e6a4SVladimir Sementsov-Ogievskiy ret = block_copy_block_status(s, task->req.offset, task->req.bytes,
78642ac2144SVladimir Sementsov-Ogievskiy &status_bytes);
7875332e5d2SVladimir Sementsov-Ogievskiy assert(ret >= 0); /* never fail */
788d088e6a4SVladimir Sementsov-Ogievskiy if (status_bytes < task->req.bytes) {
78942ac2144SVladimir Sementsov-Ogievskiy block_copy_task_shrink(task, status_bytes);
79042ac2144SVladimir Sementsov-Ogievskiy }
791d0c389d2SEmanuele Giuseppe Esposito if (qatomic_read(&s->skip_unallocated) &&
792d0c389d2SEmanuele Giuseppe Esposito !(ret & BDRV_BLOCK_ALLOCATED)) {
7931348a657SVladimir Sementsov-Ogievskiy block_copy_task_end(task, 0);
794d088e6a4SVladimir Sementsov-Ogievskiy trace_block_copy_skip_range(s, task->req.offset, task->req.bytes);
79542ac2144SVladimir Sementsov-Ogievskiy offset = task_end(task);
79642ac2144SVladimir Sementsov-Ogievskiy bytes = end - offset;
797fc9aefc8SVladimir Sementsov-Ogievskiy g_free(task);
798beb5f545SVladimir Sementsov-Ogievskiy continue;
799beb5f545SVladimir Sementsov-Ogievskiy }
800bed95234SVladimir Sementsov-Ogievskiy if (ret & BDRV_BLOCK_ZERO) {
80105d5e12bSPaolo Bonzini task->method = COPY_WRITE_ZEROES;
802bed95234SVladimir Sementsov-Ogievskiy }
8032d57511aSVladimir Sementsov-Ogievskiy
8047e032df0SVladimir Sementsov-Ogievskiy if (!call_state->ignore_ratelimit) {
8057e032df0SVladimir Sementsov-Ogievskiy uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
8067e032df0SVladimir Sementsov-Ogievskiy if (ns > 0) {
8077e032df0SVladimir Sementsov-Ogievskiy block_copy_task_end(task, -EAGAIN);
8087e032df0SVladimir Sementsov-Ogievskiy g_free(task);
80929a6ea24SPaolo Bonzini qemu_co_sleep_ns_wakeable(&call_state->sleep,
81029a6ea24SPaolo Bonzini QEMU_CLOCK_REALTIME, ns);
8117e032df0SVladimir Sementsov-Ogievskiy continue;
8127e032df0SVladimir Sementsov-Ogievskiy }
8137e032df0SVladimir Sementsov-Ogievskiy }
8147e032df0SVladimir Sementsov-Ogievskiy
815d088e6a4SVladimir Sementsov-Ogievskiy ratelimit_calculate_delay(&s->rate_limit, task->req.bytes);
8167e032df0SVladimir Sementsov-Ogievskiy
817d088e6a4SVladimir Sementsov-Ogievskiy trace_block_copy_process(s, task->req.offset);
818beb5f545SVladimir Sementsov-Ogievskiy
819d088e6a4SVladimir Sementsov-Ogievskiy co_get_from_shres(s->mem, task->req.bytes);
820beb5f545SVladimir Sementsov-Ogievskiy
82142ac2144SVladimir Sementsov-Ogievskiy offset = task_end(task);
82242ac2144SVladimir Sementsov-Ogievskiy bytes = end - offset;
8234ce5dd3eSVladimir Sementsov-Ogievskiy
8244ce5dd3eSVladimir Sementsov-Ogievskiy if (!aio && bytes) {
82526be9d62SVladimir Sementsov-Ogievskiy aio = aio_task_pool_new(call_state->max_workers);
826beb5f545SVladimir Sementsov-Ogievskiy }
827beb5f545SVladimir Sementsov-Ogievskiy
8284ce5dd3eSVladimir Sementsov-Ogievskiy ret = block_copy_task_run(aio, task);
8294ce5dd3eSVladimir Sementsov-Ogievskiy if (ret < 0) {
8304ce5dd3eSVladimir Sementsov-Ogievskiy goto out;
8314ce5dd3eSVladimir Sementsov-Ogievskiy }
8324ce5dd3eSVladimir Sementsov-Ogievskiy }
8334ce5dd3eSVladimir Sementsov-Ogievskiy
8344ce5dd3eSVladimir Sementsov-Ogievskiy out:
8354ce5dd3eSVladimir Sementsov-Ogievskiy if (aio) {
8364ce5dd3eSVladimir Sementsov-Ogievskiy aio_task_pool_wait_all(aio);
8374ce5dd3eSVladimir Sementsov-Ogievskiy
8384ce5dd3eSVladimir Sementsov-Ogievskiy /*
8394ce5dd3eSVladimir Sementsov-Ogievskiy * We are not really interested in -ECANCELED returned from
8404ce5dd3eSVladimir Sementsov-Ogievskiy * block_copy_task_run. If it fails, it means some task already failed
8414ce5dd3eSVladimir Sementsov-Ogievskiy * for real reason, let's return first failure.
8424ce5dd3eSVladimir Sementsov-Ogievskiy * Still, assert that we don't rewrite failure by success.
843e8de7ba9SVladimir Sementsov-Ogievskiy *
844e8de7ba9SVladimir Sementsov-Ogievskiy * Note: ret may be positive here because of block-status result.
8454ce5dd3eSVladimir Sementsov-Ogievskiy */
846e8de7ba9SVladimir Sementsov-Ogievskiy assert(ret >= 0 || aio_task_pool_status(aio) < 0);
8474ce5dd3eSVladimir Sementsov-Ogievskiy ret = aio_task_pool_status(aio);
8484ce5dd3eSVladimir Sementsov-Ogievskiy
8494ce5dd3eSVladimir Sementsov-Ogievskiy aio_task_pool_free(aio);
8504ce5dd3eSVladimir Sementsov-Ogievskiy }
8514ce5dd3eSVladimir Sementsov-Ogievskiy
8524ce5dd3eSVladimir Sementsov-Ogievskiy return ret < 0 ? ret : found_dirty;
8535332e5d2SVladimir Sementsov-Ogievskiy }
8545332e5d2SVladimir Sementsov-Ogievskiy
block_copy_kick(BlockCopyCallState * call_state)8557e032df0SVladimir Sementsov-Ogievskiy void block_copy_kick(BlockCopyCallState *call_state)
8567e032df0SVladimir Sementsov-Ogievskiy {
85729a6ea24SPaolo Bonzini qemu_co_sleep_wake(&call_state->sleep);
8587e032df0SVladimir Sementsov-Ogievskiy }
8597e032df0SVladimir Sementsov-Ogievskiy
8605332e5d2SVladimir Sementsov-Ogievskiy /*
8613b8c2329SVladimir Sementsov-Ogievskiy * block_copy_common
8625332e5d2SVladimir Sementsov-Ogievskiy *
8635332e5d2SVladimir Sementsov-Ogievskiy * Copy requested region, accordingly to dirty bitmap.
8645332e5d2SVladimir Sementsov-Ogievskiy * Collaborate with parallel block_copy requests: if they succeed it will help
8655332e5d2SVladimir Sementsov-Ogievskiy * us. If they fail, we will retry not-copied regions. So, if we return error,
8665332e5d2SVladimir Sementsov-Ogievskiy * it means that some I/O operation failed in context of _this_ block_copy call,
8675332e5d2SVladimir Sementsov-Ogievskiy * not some parallel operation.
8685332e5d2SVladimir Sementsov-Ogievskiy */
8697ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_common(BlockCopyCallState * call_state)8707ff9579eSKevin Wolf block_copy_common(BlockCopyCallState *call_state)
8715332e5d2SVladimir Sementsov-Ogievskiy {
8725332e5d2SVladimir Sementsov-Ogievskiy int ret;
873c6a3e3dfSEmanuele Giuseppe Esposito BlockCopyState *s = call_state->s;
8745332e5d2SVladimir Sementsov-Ogievskiy
875d0c389d2SEmanuele Giuseppe Esposito qemu_co_mutex_lock(&s->lock);
876c6a3e3dfSEmanuele Giuseppe Esposito QLIST_INSERT_HEAD(&s->calls, call_state, list);
877d0c389d2SEmanuele Giuseppe Esposito qemu_co_mutex_unlock(&s->lock);
8782e099a9dSVladimir Sementsov-Ogievskiy
8795332e5d2SVladimir Sementsov-Ogievskiy do {
8803b8c2329SVladimir Sementsov-Ogievskiy ret = block_copy_dirty_clusters(call_state);
8815332e5d2SVladimir Sementsov-Ogievskiy
882149009beSEmanuele Giuseppe Esposito if (ret == 0 && !qatomic_read(&call_state->cancelled)) {
883d0c389d2SEmanuele Giuseppe Esposito WITH_QEMU_LOCK_GUARD(&s->lock) {
884d0c389d2SEmanuele Giuseppe Esposito /*
885d0c389d2SEmanuele Giuseppe Esposito * Check that there is no task we still need to
886d0c389d2SEmanuele Giuseppe Esposito * wait to complete
887d0c389d2SEmanuele Giuseppe Esposito */
888d088e6a4SVladimir Sementsov-Ogievskiy ret = reqlist_wait_one(&s->reqs, call_state->offset,
889d088e6a4SVladimir Sementsov-Ogievskiy call_state->bytes, &s->lock);
890d0c389d2SEmanuele Giuseppe Esposito if (ret == 0) {
891d0c389d2SEmanuele Giuseppe Esposito /*
892d0c389d2SEmanuele Giuseppe Esposito * No pending tasks, but check again the bitmap in this
893d0c389d2SEmanuele Giuseppe Esposito * same critical section, since a task might have failed
894d0c389d2SEmanuele Giuseppe Esposito * between this and the critical section in
895d0c389d2SEmanuele Giuseppe Esposito * block_copy_dirty_clusters().
896d0c389d2SEmanuele Giuseppe Esposito *
897d088e6a4SVladimir Sementsov-Ogievskiy * reqlist_wait_one return value 0 also means that it
898d0c389d2SEmanuele Giuseppe Esposito * didn't release the lock. So, we are still in the same
899d0c389d2SEmanuele Giuseppe Esposito * critical section, not interrupted by any concurrent
900d0c389d2SEmanuele Giuseppe Esposito * access to state.
901d0c389d2SEmanuele Giuseppe Esposito */
902d0c389d2SEmanuele Giuseppe Esposito ret = bdrv_dirty_bitmap_next_dirty(s->copy_bitmap,
903d0c389d2SEmanuele Giuseppe Esposito call_state->offset,
904d0c389d2SEmanuele Giuseppe Esposito call_state->bytes) >= 0;
905d0c389d2SEmanuele Giuseppe Esposito }
906d0c389d2SEmanuele Giuseppe Esposito }
9075332e5d2SVladimir Sementsov-Ogievskiy }
9085332e5d2SVladimir Sementsov-Ogievskiy
9095332e5d2SVladimir Sementsov-Ogievskiy /*
9105332e5d2SVladimir Sementsov-Ogievskiy * We retry in two cases:
9115332e5d2SVladimir Sementsov-Ogievskiy * 1. Some progress done
9125332e5d2SVladimir Sementsov-Ogievskiy * Something was copied, which means that there were yield points
9135332e5d2SVladimir Sementsov-Ogievskiy * and some new dirty bits may have appeared (due to failed parallel
9145332e5d2SVladimir Sementsov-Ogievskiy * block-copy requests).
9155332e5d2SVladimir Sementsov-Ogievskiy * 2. We have waited for some intersecting block-copy request
9165332e5d2SVladimir Sementsov-Ogievskiy * It may have failed and produced new dirty bits.
9175332e5d2SVladimir Sementsov-Ogievskiy */
918149009beSEmanuele Giuseppe Esposito } while (ret > 0 && !qatomic_read(&call_state->cancelled));
919a6ffe199SVladimir Sementsov-Ogievskiy
920149009beSEmanuele Giuseppe Esposito qatomic_store_release(&call_state->finished, true);
921de4641b4SVladimir Sementsov-Ogievskiy
922de4641b4SVladimir Sementsov-Ogievskiy if (call_state->cb) {
923de4641b4SVladimir Sementsov-Ogievskiy call_state->cb(call_state->cb_opaque);
924de4641b4SVladimir Sementsov-Ogievskiy }
925de4641b4SVladimir Sementsov-Ogievskiy
926d0c389d2SEmanuele Giuseppe Esposito qemu_co_mutex_lock(&s->lock);
9272e099a9dSVladimir Sementsov-Ogievskiy QLIST_REMOVE(call_state, list);
928d0c389d2SEmanuele Giuseppe Esposito qemu_co_mutex_unlock(&s->lock);
9292e099a9dSVladimir Sementsov-Ogievskiy
930beb5f545SVladimir Sementsov-Ogievskiy return ret;
931beb5f545SVladimir Sementsov-Ogievskiy }
932397f4e9dSVladimir Sementsov-Ogievskiy
block_copy_async_co_entry(void * opaque)93315df6e69SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_async_co_entry(void *opaque)
9343b8c2329SVladimir Sementsov-Ogievskiy {
9357ff9579eSKevin Wolf GRAPH_RDLOCK_GUARD();
93615df6e69SVladimir Sementsov-Ogievskiy block_copy_common(opaque);
93715df6e69SVladimir Sementsov-Ogievskiy }
93815df6e69SVladimir Sementsov-Ogievskiy
block_copy(BlockCopyState * s,int64_t start,int64_t bytes,bool ignore_ratelimit,uint64_t timeout_ns,BlockCopyAsyncCallbackFunc cb,void * cb_opaque)93915df6e69SVladimir Sementsov-Ogievskiy int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes,
94015df6e69SVladimir Sementsov-Ogievskiy bool ignore_ratelimit, uint64_t timeout_ns,
94115df6e69SVladimir Sementsov-Ogievskiy BlockCopyAsyncCallbackFunc cb,
94215df6e69SVladimir Sementsov-Ogievskiy void *cb_opaque)
94315df6e69SVladimir Sementsov-Ogievskiy {
94415df6e69SVladimir Sementsov-Ogievskiy int ret;
94515df6e69SVladimir Sementsov-Ogievskiy BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
94615df6e69SVladimir Sementsov-Ogievskiy
94715df6e69SVladimir Sementsov-Ogievskiy *call_state = (BlockCopyCallState) {
9483b8c2329SVladimir Sementsov-Ogievskiy .s = s,
9493b8c2329SVladimir Sementsov-Ogievskiy .offset = start,
9503b8c2329SVladimir Sementsov-Ogievskiy .bytes = bytes,
9517e032df0SVladimir Sementsov-Ogievskiy .ignore_ratelimit = ignore_ratelimit,
95226be9d62SVladimir Sementsov-Ogievskiy .max_workers = BLOCK_COPY_MAX_WORKERS,
95315df6e69SVladimir Sementsov-Ogievskiy .cb = cb,
95415df6e69SVladimir Sementsov-Ogievskiy .cb_opaque = cb_opaque,
9553b8c2329SVladimir Sementsov-Ogievskiy };
9563b8c2329SVladimir Sementsov-Ogievskiy
95715df6e69SVladimir Sementsov-Ogievskiy ret = qemu_co_timeout(block_copy_async_co_entry, call_state, timeout_ns,
95815df6e69SVladimir Sementsov-Ogievskiy g_free);
95915df6e69SVladimir Sementsov-Ogievskiy if (ret < 0) {
96015df6e69SVladimir Sementsov-Ogievskiy assert(ret == -ETIMEDOUT);
96115df6e69SVladimir Sementsov-Ogievskiy block_copy_call_cancel(call_state);
96215df6e69SVladimir Sementsov-Ogievskiy /* call_state will be freed by running coroutine. */
96315df6e69SVladimir Sementsov-Ogievskiy return ret;
9643b8c2329SVladimir Sementsov-Ogievskiy }
9653b8c2329SVladimir Sementsov-Ogievskiy
96615df6e69SVladimir Sementsov-Ogievskiy ret = call_state->ret;
96715df6e69SVladimir Sementsov-Ogievskiy g_free(call_state);
96815df6e69SVladimir Sementsov-Ogievskiy
96915df6e69SVladimir Sementsov-Ogievskiy return ret;
970de4641b4SVladimir Sementsov-Ogievskiy }
971de4641b4SVladimir Sementsov-Ogievskiy
block_copy_async(BlockCopyState * s,int64_t offset,int64_t bytes,int max_workers,int64_t max_chunk,BlockCopyAsyncCallbackFunc cb,void * cb_opaque)972de4641b4SVladimir Sementsov-Ogievskiy BlockCopyCallState *block_copy_async(BlockCopyState *s,
973de4641b4SVladimir Sementsov-Ogievskiy int64_t offset, int64_t bytes,
97426be9d62SVladimir Sementsov-Ogievskiy int max_workers, int64_t max_chunk,
975de4641b4SVladimir Sementsov-Ogievskiy BlockCopyAsyncCallbackFunc cb,
976de4641b4SVladimir Sementsov-Ogievskiy void *cb_opaque)
977de4641b4SVladimir Sementsov-Ogievskiy {
978de4641b4SVladimir Sementsov-Ogievskiy BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
979de4641b4SVladimir Sementsov-Ogievskiy
980de4641b4SVladimir Sementsov-Ogievskiy *call_state = (BlockCopyCallState) {
981de4641b4SVladimir Sementsov-Ogievskiy .s = s,
982de4641b4SVladimir Sementsov-Ogievskiy .offset = offset,
983de4641b4SVladimir Sementsov-Ogievskiy .bytes = bytes,
98426be9d62SVladimir Sementsov-Ogievskiy .max_workers = max_workers,
98526be9d62SVladimir Sementsov-Ogievskiy .max_chunk = max_chunk,
986de4641b4SVladimir Sementsov-Ogievskiy .cb = cb,
987de4641b4SVladimir Sementsov-Ogievskiy .cb_opaque = cb_opaque,
988de4641b4SVladimir Sementsov-Ogievskiy
989de4641b4SVladimir Sementsov-Ogievskiy .co = qemu_coroutine_create(block_copy_async_co_entry, call_state),
990de4641b4SVladimir Sementsov-Ogievskiy };
991de4641b4SVladimir Sementsov-Ogievskiy
992de4641b4SVladimir Sementsov-Ogievskiy qemu_coroutine_enter(call_state->co);
993de4641b4SVladimir Sementsov-Ogievskiy
994de4641b4SVladimir Sementsov-Ogievskiy return call_state;
995de4641b4SVladimir Sementsov-Ogievskiy }
996de4641b4SVladimir Sementsov-Ogievskiy
block_copy_call_free(BlockCopyCallState * call_state)997de4641b4SVladimir Sementsov-Ogievskiy void block_copy_call_free(BlockCopyCallState *call_state)
998de4641b4SVladimir Sementsov-Ogievskiy {
999de4641b4SVladimir Sementsov-Ogievskiy if (!call_state) {
1000de4641b4SVladimir Sementsov-Ogievskiy return;
1001de4641b4SVladimir Sementsov-Ogievskiy }
1002de4641b4SVladimir Sementsov-Ogievskiy
1003149009beSEmanuele Giuseppe Esposito assert(qatomic_read(&call_state->finished));
1004de4641b4SVladimir Sementsov-Ogievskiy g_free(call_state);
1005de4641b4SVladimir Sementsov-Ogievskiy }
1006de4641b4SVladimir Sementsov-Ogievskiy
block_copy_call_finished(BlockCopyCallState * call_state)1007de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_finished(BlockCopyCallState *call_state)
1008de4641b4SVladimir Sementsov-Ogievskiy {
1009149009beSEmanuele Giuseppe Esposito return qatomic_read(&call_state->finished);
1010de4641b4SVladimir Sementsov-Ogievskiy }
1011de4641b4SVladimir Sementsov-Ogievskiy
block_copy_call_succeeded(BlockCopyCallState * call_state)1012de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_succeeded(BlockCopyCallState *call_state)
1013de4641b4SVladimir Sementsov-Ogievskiy {
1014149009beSEmanuele Giuseppe Esposito return qatomic_load_acquire(&call_state->finished) &&
1015149009beSEmanuele Giuseppe Esposito !qatomic_read(&call_state->cancelled) &&
1016a6d23d56SVladimir Sementsov-Ogievskiy call_state->ret == 0;
1017de4641b4SVladimir Sementsov-Ogievskiy }
1018de4641b4SVladimir Sementsov-Ogievskiy
block_copy_call_failed(BlockCopyCallState * call_state)1019de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_failed(BlockCopyCallState *call_state)
1020de4641b4SVladimir Sementsov-Ogievskiy {
1021149009beSEmanuele Giuseppe Esposito return qatomic_load_acquire(&call_state->finished) &&
1022149009beSEmanuele Giuseppe Esposito !qatomic_read(&call_state->cancelled) &&
1023a6d23d56SVladimir Sementsov-Ogievskiy call_state->ret < 0;
1024a6d23d56SVladimir Sementsov-Ogievskiy }
1025a6d23d56SVladimir Sementsov-Ogievskiy
block_copy_call_cancelled(BlockCopyCallState * call_state)1026a6d23d56SVladimir Sementsov-Ogievskiy bool block_copy_call_cancelled(BlockCopyCallState *call_state)
1027a6d23d56SVladimir Sementsov-Ogievskiy {
1028149009beSEmanuele Giuseppe Esposito return qatomic_read(&call_state->cancelled);
1029de4641b4SVladimir Sementsov-Ogievskiy }
1030de4641b4SVladimir Sementsov-Ogievskiy
block_copy_call_status(BlockCopyCallState * call_state,bool * error_is_read)1031de4641b4SVladimir Sementsov-Ogievskiy int block_copy_call_status(BlockCopyCallState *call_state, bool *error_is_read)
1032de4641b4SVladimir Sementsov-Ogievskiy {
1033149009beSEmanuele Giuseppe Esposito assert(qatomic_load_acquire(&call_state->finished));
1034de4641b4SVladimir Sementsov-Ogievskiy if (error_is_read) {
1035de4641b4SVladimir Sementsov-Ogievskiy *error_is_read = call_state->error_is_read;
1036de4641b4SVladimir Sementsov-Ogievskiy }
1037de4641b4SVladimir Sementsov-Ogievskiy return call_state->ret;
1038de4641b4SVladimir Sementsov-Ogievskiy }
1039de4641b4SVladimir Sementsov-Ogievskiy
1040149009beSEmanuele Giuseppe Esposito /*
1041149009beSEmanuele Giuseppe Esposito * Note that cancelling and finishing are racy.
1042149009beSEmanuele Giuseppe Esposito * User can cancel a block-copy that is already finished.
1043149009beSEmanuele Giuseppe Esposito */
block_copy_call_cancel(BlockCopyCallState * call_state)1044a6d23d56SVladimir Sementsov-Ogievskiy void block_copy_call_cancel(BlockCopyCallState *call_state)
1045a6d23d56SVladimir Sementsov-Ogievskiy {
1046149009beSEmanuele Giuseppe Esposito qatomic_set(&call_state->cancelled, true);
1047a6d23d56SVladimir Sementsov-Ogievskiy block_copy_kick(call_state);
1048a6d23d56SVladimir Sementsov-Ogievskiy }
1049a6d23d56SVladimir Sementsov-Ogievskiy
block_copy_dirty_bitmap(BlockCopyState * s)1050397f4e9dSVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
1051397f4e9dSVladimir Sementsov-Ogievskiy {
1052397f4e9dSVladimir Sementsov-Ogievskiy return s->copy_bitmap;
1053397f4e9dSVladimir Sementsov-Ogievskiy }
1054397f4e9dSVladimir Sementsov-Ogievskiy
block_copy_cluster_size(BlockCopyState * s)1055b518e9e9SVladimir Sementsov-Ogievskiy int64_t block_copy_cluster_size(BlockCopyState *s)
1056b518e9e9SVladimir Sementsov-Ogievskiy {
1057b518e9e9SVladimir Sementsov-Ogievskiy return s->cluster_size;
1058b518e9e9SVladimir Sementsov-Ogievskiy }
1059b518e9e9SVladimir Sementsov-Ogievskiy
block_copy_set_skip_unallocated(BlockCopyState * s,bool skip)1060397f4e9dSVladimir Sementsov-Ogievskiy void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
1061397f4e9dSVladimir Sementsov-Ogievskiy {
1062d0c389d2SEmanuele Giuseppe Esposito qatomic_set(&s->skip_unallocated, skip);
1063397f4e9dSVladimir Sementsov-Ogievskiy }
10647e032df0SVladimir Sementsov-Ogievskiy
block_copy_set_speed(BlockCopyState * s,uint64_t speed)10657e032df0SVladimir Sementsov-Ogievskiy void block_copy_set_speed(BlockCopyState *s, uint64_t speed)
10667e032df0SVladimir Sementsov-Ogievskiy {
10677e032df0SVladimir Sementsov-Ogievskiy ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
10687e032df0SVladimir Sementsov-Ogievskiy
10697e032df0SVladimir Sementsov-Ogievskiy /*
10707e032df0SVladimir Sementsov-Ogievskiy * Note: it's good to kick all call states from here, but it should be done
10717e032df0SVladimir Sementsov-Ogievskiy * only from a coroutine, to not crash if s->calls list changed while
10727e032df0SVladimir Sementsov-Ogievskiy * entering one call. So for now, the only user of this function kicks its
10737e032df0SVladimir Sementsov-Ogievskiy * only one call_state by hand.
10747e032df0SVladimir Sementsov-Ogievskiy */
10757e032df0SVladimir Sementsov-Ogievskiy }
1076