xref: /openbmc/qemu/block/block-copy.c (revision 423be09ab9492735924e73a2d36069784441ebc6)
1beb5f545SVladimir Sementsov-Ogievskiy /*
2beb5f545SVladimir Sementsov-Ogievskiy  * block_copy API
3beb5f545SVladimir Sementsov-Ogievskiy  *
4beb5f545SVladimir Sementsov-Ogievskiy  * Copyright (C) 2013 Proxmox Server Solutions
5beb5f545SVladimir Sementsov-Ogievskiy  * Copyright (c) 2019 Virtuozzo International GmbH.
6beb5f545SVladimir Sementsov-Ogievskiy  *
7beb5f545SVladimir Sementsov-Ogievskiy  * Authors:
8beb5f545SVladimir Sementsov-Ogievskiy  *  Dietmar Maurer (dietmar@proxmox.com)
9beb5f545SVladimir Sementsov-Ogievskiy  *  Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10beb5f545SVladimir Sementsov-Ogievskiy  *
11beb5f545SVladimir Sementsov-Ogievskiy  * This work is licensed under the terms of the GNU GPL, version 2 or later.
12beb5f545SVladimir Sementsov-Ogievskiy  * See the COPYING file in the top-level directory.
13beb5f545SVladimir Sementsov-Ogievskiy  */
14beb5f545SVladimir Sementsov-Ogievskiy 
15beb5f545SVladimir Sementsov-Ogievskiy #include "qemu/osdep.h"
16beb5f545SVladimir Sementsov-Ogievskiy 
17beb5f545SVladimir Sementsov-Ogievskiy #include "trace.h"
18beb5f545SVladimir Sementsov-Ogievskiy #include "qapi/error.h"
19beb5f545SVladimir Sementsov-Ogievskiy #include "block/block-copy.h"
20e2c1c34fSMarkus Armbruster #include "block/block_int-io.h"
21e2c1c34fSMarkus Armbruster #include "block/dirty-bitmap.h"
22d088e6a4SVladimir Sementsov-Ogievskiy #include "block/reqlist.h"
23beb5f545SVladimir Sementsov-Ogievskiy #include "sysemu/block-backend.h"
24b3b7036aSVladimir Sementsov-Ogievskiy #include "qemu/units.h"
25e2c1c34fSMarkus Armbruster #include "qemu/co-shared-resource.h"
264ce5dd3eSVladimir Sementsov-Ogievskiy #include "qemu/coroutine.h"
27e2c1c34fSMarkus Armbruster #include "qemu/ratelimit.h"
284ce5dd3eSVladimir Sementsov-Ogievskiy #include "block/aio_task.h"
29b518e9e9SVladimir Sementsov-Ogievskiy #include "qemu/error-report.h"
305df022cfSPeter Maydell #include "qemu/memalign.h"
31b3b7036aSVladimir Sementsov-Ogievskiy 
32b3b7036aSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
330e240245SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
347f739d0eSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_MEM (128 * MiB)
354ce5dd3eSVladimir Sementsov-Ogievskiy #define BLOCK_COPY_MAX_WORKERS 64
367e032df0SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
37b518e9e9SVladimir Sementsov-Ogievskiy #define BLOCK_COPY_CLUSTER_SIZE_DEFAULT (1 << 16)
384ce5dd3eSVladimir Sementsov-Ogievskiy 
3905d5e12bSPaolo Bonzini typedef enum {
4005d5e12bSPaolo Bonzini     COPY_READ_WRITE_CLUSTER,
4105d5e12bSPaolo Bonzini     COPY_READ_WRITE,
4205d5e12bSPaolo Bonzini     COPY_WRITE_ZEROES,
4305d5e12bSPaolo Bonzini     COPY_RANGE_SMALL,
4405d5e12bSPaolo Bonzini     COPY_RANGE_FULL
4505d5e12bSPaolo Bonzini } BlockCopyMethod;
4605d5e12bSPaolo Bonzini 
474ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_entry(AioTask *task);
484ce5dd3eSVladimir Sementsov-Ogievskiy 
494ce5dd3eSVladimir Sementsov-Ogievskiy typedef struct BlockCopyCallState {
50d0c389d2SEmanuele Giuseppe Esposito     /* Fields initialized in block_copy_async() and never changed. */
513b8c2329SVladimir Sementsov-Ogievskiy     BlockCopyState *s;
523b8c2329SVladimir Sementsov-Ogievskiy     int64_t offset;
533b8c2329SVladimir Sementsov-Ogievskiy     int64_t bytes;
5426be9d62SVladimir Sementsov-Ogievskiy     int max_workers;
5526be9d62SVladimir Sementsov-Ogievskiy     int64_t max_chunk;
567e032df0SVladimir Sementsov-Ogievskiy     bool ignore_ratelimit;
57de4641b4SVladimir Sementsov-Ogievskiy     BlockCopyAsyncCallbackFunc cb;
58de4641b4SVladimir Sementsov-Ogievskiy     void *cb_opaque;
59de4641b4SVladimir Sementsov-Ogievskiy     /* Coroutine where async block-copy is running */
60de4641b4SVladimir Sementsov-Ogievskiy     Coroutine *co;
613b8c2329SVladimir Sementsov-Ogievskiy 
62d0c389d2SEmanuele Giuseppe Esposito     /* Fields whose state changes throughout the execution */
63149009beSEmanuele Giuseppe Esposito     bool finished; /* atomic */
64d0c389d2SEmanuele Giuseppe Esposito     QemuCoSleep sleep; /* TODO: protect API with a lock */
65149009beSEmanuele Giuseppe Esposito     bool cancelled; /* atomic */
662e099a9dSVladimir Sementsov-Ogievskiy     /* To reference all call states from BlockCopyState */
672e099a9dSVladimir Sementsov-Ogievskiy     QLIST_ENTRY(BlockCopyCallState) list;
682e099a9dSVladimir Sementsov-Ogievskiy 
69d0c389d2SEmanuele Giuseppe Esposito     /*
703202d8e4SMichael Tokarev      * Fields that report information about return values and errors.
71d0c389d2SEmanuele Giuseppe Esposito      * Protected by lock in BlockCopyState.
72d0c389d2SEmanuele Giuseppe Esposito      */
734ce5dd3eSVladimir Sementsov-Ogievskiy     bool error_is_read;
74d0c389d2SEmanuele Giuseppe Esposito     /*
75d0c389d2SEmanuele Giuseppe Esposito      * @ret is set concurrently by tasks under mutex. Only set once by first
76d0c389d2SEmanuele Giuseppe Esposito      * failed task (and untouched if no task failed).
77d0c389d2SEmanuele Giuseppe Esposito      * After finishing (call_state->finished is true), it is not modified
78d0c389d2SEmanuele Giuseppe Esposito      * anymore and may be safely read without mutex.
79d0c389d2SEmanuele Giuseppe Esposito      */
80d0c389d2SEmanuele Giuseppe Esposito     int ret;
814ce5dd3eSVladimir Sementsov-Ogievskiy } BlockCopyCallState;
82beb5f545SVladimir Sementsov-Ogievskiy 
83e9407785SVladimir Sementsov-Ogievskiy typedef struct BlockCopyTask {
844ce5dd3eSVladimir Sementsov-Ogievskiy     AioTask task;
854ce5dd3eSVladimir Sementsov-Ogievskiy 
86d0c389d2SEmanuele Giuseppe Esposito     /*
87d0c389d2SEmanuele Giuseppe Esposito      * Fields initialized in block_copy_task_create()
88d0c389d2SEmanuele Giuseppe Esposito      * and never changed.
89d0c389d2SEmanuele Giuseppe Esposito      */
901348a657SVladimir Sementsov-Ogievskiy     BlockCopyState *s;
914ce5dd3eSVladimir Sementsov-Ogievskiy     BlockCopyCallState *call_state;
92d0c389d2SEmanuele Giuseppe Esposito     /*
93d0c389d2SEmanuele Giuseppe Esposito      * @method can also be set again in the while loop of
94d0c389d2SEmanuele Giuseppe Esposito      * block_copy_dirty_clusters(), but it is never accessed concurrently
95d0c389d2SEmanuele Giuseppe Esposito      * because the only other function that reads it is
96d0c389d2SEmanuele Giuseppe Esposito      * block_copy_task_entry() and it is invoked afterwards in the same
97d0c389d2SEmanuele Giuseppe Esposito      * iteration.
98d0c389d2SEmanuele Giuseppe Esposito      */
9905d5e12bSPaolo Bonzini     BlockCopyMethod method;
100d0c389d2SEmanuele Giuseppe Esposito 
101d0c389d2SEmanuele Giuseppe Esposito     /*
102d088e6a4SVladimir Sementsov-Ogievskiy      * Generally, req is protected by lock in BlockCopyState, Still req.offset
103d088e6a4SVladimir Sementsov-Ogievskiy      * is only set on task creation, so may be read concurrently after creation.
104d088e6a4SVladimir Sementsov-Ogievskiy      * req.bytes is changed at most once, and need only protecting the case of
105d088e6a4SVladimir Sementsov-Ogievskiy      * parallel read while updating @bytes value in block_copy_task_shrink().
106d0c389d2SEmanuele Giuseppe Esposito      */
107d088e6a4SVladimir Sementsov-Ogievskiy     BlockReq req;
108e9407785SVladimir Sementsov-Ogievskiy } BlockCopyTask;
109397f4e9dSVladimir Sementsov-Ogievskiy 
task_end(BlockCopyTask * task)11042ac2144SVladimir Sementsov-Ogievskiy static int64_t task_end(BlockCopyTask *task)
11142ac2144SVladimir Sementsov-Ogievskiy {
112d088e6a4SVladimir Sementsov-Ogievskiy     return task->req.offset + task->req.bytes;
11342ac2144SVladimir Sementsov-Ogievskiy }
11442ac2144SVladimir Sementsov-Ogievskiy 
115397f4e9dSVladimir Sementsov-Ogievskiy typedef struct BlockCopyState {
116397f4e9dSVladimir Sementsov-Ogievskiy     /*
117397f4e9dSVladimir Sementsov-Ogievskiy      * BdrvChild objects are not owned or managed by block-copy. They are
118397f4e9dSVladimir Sementsov-Ogievskiy      * provided by block-copy user and user is responsible for appropriate
119397f4e9dSVladimir Sementsov-Ogievskiy      * permissions on these children.
120397f4e9dSVladimir Sementsov-Ogievskiy      */
121397f4e9dSVladimir Sementsov-Ogievskiy     BdrvChild *source;
122397f4e9dSVladimir Sementsov-Ogievskiy     BdrvChild *target;
123d0c389d2SEmanuele Giuseppe Esposito 
124d0c389d2SEmanuele Giuseppe Esposito     /*
125d0c389d2SEmanuele Giuseppe Esposito      * Fields initialized in block_copy_state_new()
126d0c389d2SEmanuele Giuseppe Esposito      * and never changed.
127d0c389d2SEmanuele Giuseppe Esposito      */
128397f4e9dSVladimir Sementsov-Ogievskiy     int64_t cluster_size;
12905d5e12bSPaolo Bonzini     int64_t max_transfer;
130397f4e9dSVladimir Sementsov-Ogievskiy     uint64_t len;
131397f4e9dSVladimir Sementsov-Ogievskiy     BdrvRequestFlags write_flags;
132397f4e9dSVladimir Sementsov-Ogievskiy 
133397f4e9dSVladimir Sementsov-Ogievskiy     /*
134d0c389d2SEmanuele Giuseppe Esposito      * Fields whose state changes throughout the execution
135d0c389d2SEmanuele Giuseppe Esposito      * Protected by lock.
136d0c389d2SEmanuele Giuseppe Esposito      */
137d0c389d2SEmanuele Giuseppe Esposito     CoMutex lock;
138d0c389d2SEmanuele Giuseppe Esposito     int64_t in_flight_bytes;
139d0c389d2SEmanuele Giuseppe Esposito     BlockCopyMethod method;
1400fd05c8dSVladimir Sementsov-Ogievskiy     bool discard_source;
141d088e6a4SVladimir Sementsov-Ogievskiy     BlockReqList reqs;
142d0c389d2SEmanuele Giuseppe Esposito     QLIST_HEAD(, BlockCopyCallState) calls;
143d0c389d2SEmanuele Giuseppe Esposito     /*
144397f4e9dSVladimir Sementsov-Ogievskiy      * skip_unallocated:
145397f4e9dSVladimir Sementsov-Ogievskiy      *
146397f4e9dSVladimir Sementsov-Ogievskiy      * Used by sync=top jobs, which first scan the source node for unallocated
147397f4e9dSVladimir Sementsov-Ogievskiy      * areas and clear them in the copy_bitmap.  During this process, the bitmap
148397f4e9dSVladimir Sementsov-Ogievskiy      * is thus not fully initialized: It may still have bits set for areas that
149397f4e9dSVladimir Sementsov-Ogievskiy      * are unallocated and should actually not be copied.
150397f4e9dSVladimir Sementsov-Ogievskiy      *
151397f4e9dSVladimir Sementsov-Ogievskiy      * This is indicated by skip_unallocated.
152397f4e9dSVladimir Sementsov-Ogievskiy      *
153397f4e9dSVladimir Sementsov-Ogievskiy      * In this case, block_copy() will query the source’s allocation status,
154397f4e9dSVladimir Sementsov-Ogievskiy      * skip unallocated regions, clear them in the copy_bitmap, and invoke
155397f4e9dSVladimir Sementsov-Ogievskiy      * block_copy_reset_unallocated() every time it does.
156397f4e9dSVladimir Sementsov-Ogievskiy      */
157d0c389d2SEmanuele Giuseppe Esposito     bool skip_unallocated; /* atomic */
158d0c389d2SEmanuele Giuseppe Esposito     /* State fields that use a thread-safe API */
159d0c389d2SEmanuele Giuseppe Esposito     BdrvDirtyBitmap *copy_bitmap;
160397f4e9dSVladimir Sementsov-Ogievskiy     ProgressMeter *progress;
161397f4e9dSVladimir Sementsov-Ogievskiy     SharedResource *mem;
1627e032df0SVladimir Sementsov-Ogievskiy     RateLimit rate_limit;
163397f4e9dSVladimir Sementsov-Ogievskiy } BlockCopyState;
164397f4e9dSVladimir Sementsov-Ogievskiy 
165d0c389d2SEmanuele Giuseppe Esposito /* Called with lock held */
block_copy_chunk_size(BlockCopyState * s)16605d5e12bSPaolo Bonzini static int64_t block_copy_chunk_size(BlockCopyState *s)
16705d5e12bSPaolo Bonzini {
16805d5e12bSPaolo Bonzini     switch (s->method) {
16905d5e12bSPaolo Bonzini     case COPY_READ_WRITE_CLUSTER:
17005d5e12bSPaolo Bonzini         return s->cluster_size;
17105d5e12bSPaolo Bonzini     case COPY_READ_WRITE:
17205d5e12bSPaolo Bonzini     case COPY_RANGE_SMALL:
17305d5e12bSPaolo Bonzini         return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
17405d5e12bSPaolo Bonzini                    s->max_transfer);
17505d5e12bSPaolo Bonzini     case COPY_RANGE_FULL:
17605d5e12bSPaolo Bonzini         return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
17705d5e12bSPaolo Bonzini                    s->max_transfer);
17805d5e12bSPaolo Bonzini     default:
17905d5e12bSPaolo Bonzini         /* Cannot have COPY_WRITE_ZEROES here.  */
18005d5e12bSPaolo Bonzini         abort();
18105d5e12bSPaolo Bonzini     }
18205d5e12bSPaolo Bonzini }
18305d5e12bSPaolo Bonzini 
18442ac2144SVladimir Sementsov-Ogievskiy /*
18542ac2144SVladimir Sementsov-Ogievskiy  * Search for the first dirty area in offset/bytes range and create task at
18642ac2144SVladimir Sementsov-Ogievskiy  * the beginning of it.
18742ac2144SVladimir Sementsov-Ogievskiy  */
188d0c389d2SEmanuele Giuseppe Esposito static coroutine_fn BlockCopyTask *
block_copy_task_create(BlockCopyState * s,BlockCopyCallState * call_state,int64_t offset,int64_t bytes)189d0c389d2SEmanuele Giuseppe Esposito block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state,
1908719091fSVladimir Sementsov-Ogievskiy                        int64_t offset, int64_t bytes)
191a6ffe199SVladimir Sementsov-Ogievskiy {
19242ac2144SVladimir Sementsov-Ogievskiy     BlockCopyTask *task;
19305d5e12bSPaolo Bonzini     int64_t max_chunk;
194f13e60a9SVladimir Sementsov-Ogievskiy 
195d0c389d2SEmanuele Giuseppe Esposito     QEMU_LOCK_GUARD(&s->lock);
19605d5e12bSPaolo Bonzini     max_chunk = MIN_NON_ZERO(block_copy_chunk_size(s), call_state->max_chunk);
19742ac2144SVladimir Sementsov-Ogievskiy     if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
19842ac2144SVladimir Sementsov-Ogievskiy                                            offset, offset + bytes,
19926be9d62SVladimir Sementsov-Ogievskiy                                            max_chunk, &offset, &bytes))
20042ac2144SVladimir Sementsov-Ogievskiy     {
20142ac2144SVladimir Sementsov-Ogievskiy         return NULL;
20242ac2144SVladimir Sementsov-Ogievskiy     }
20342ac2144SVladimir Sementsov-Ogievskiy 
2047661a886SStefan Reiter     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
2057661a886SStefan Reiter     bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
2067661a886SStefan Reiter 
20742ac2144SVladimir Sementsov-Ogievskiy     /* region is dirty, so no existent tasks possible in it */
208d088e6a4SVladimir Sementsov-Ogievskiy     assert(!reqlist_find_conflict(&s->reqs, offset, bytes));
2095332e5d2SVladimir Sementsov-Ogievskiy 
2105332e5d2SVladimir Sementsov-Ogievskiy     bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
2115332e5d2SVladimir Sementsov-Ogievskiy     s->in_flight_bytes += bytes;
2125332e5d2SVladimir Sementsov-Ogievskiy 
21342ac2144SVladimir Sementsov-Ogievskiy     task = g_new(BlockCopyTask, 1);
2141348a657SVladimir Sementsov-Ogievskiy     *task = (BlockCopyTask) {
2154ce5dd3eSVladimir Sementsov-Ogievskiy         .task.func = block_copy_task_entry,
2161348a657SVladimir Sementsov-Ogievskiy         .s = s,
2174ce5dd3eSVladimir Sementsov-Ogievskiy         .call_state = call_state,
21805d5e12bSPaolo Bonzini         .method = s->method,
2191348a657SVladimir Sementsov-Ogievskiy     };
220d088e6a4SVladimir Sementsov-Ogievskiy     reqlist_init_req(&s->reqs, &task->req, offset, bytes);
221f13e60a9SVladimir Sementsov-Ogievskiy 
222f13e60a9SVladimir Sementsov-Ogievskiy     return task;
223a6ffe199SVladimir Sementsov-Ogievskiy }
224a6ffe199SVladimir Sementsov-Ogievskiy 
2255332e5d2SVladimir Sementsov-Ogievskiy /*
226e9407785SVladimir Sementsov-Ogievskiy  * block_copy_task_shrink
2275332e5d2SVladimir Sementsov-Ogievskiy  *
228e9407785SVladimir Sementsov-Ogievskiy  * Drop the tail of the task to be handled later. Set dirty bits back and
229e9407785SVladimir Sementsov-Ogievskiy  * wake up all tasks waiting for us (may be some of them are not intersecting
230e9407785SVladimir Sementsov-Ogievskiy  * with shrunk task)
2315332e5d2SVladimir Sementsov-Ogievskiy  */
block_copy_task_shrink(BlockCopyTask * task,int64_t new_bytes)2321348a657SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
233e9407785SVladimir Sementsov-Ogievskiy                                                 int64_t new_bytes)
234a6ffe199SVladimir Sementsov-Ogievskiy {
235d0c389d2SEmanuele Giuseppe Esposito     QEMU_LOCK_GUARD(&task->s->lock);
236d088e6a4SVladimir Sementsov-Ogievskiy     if (new_bytes == task->req.bytes) {
2375332e5d2SVladimir Sementsov-Ogievskiy         return;
2385332e5d2SVladimir Sementsov-Ogievskiy     }
2395332e5d2SVladimir Sementsov-Ogievskiy 
240d088e6a4SVladimir Sementsov-Ogievskiy     assert(new_bytes > 0 && new_bytes < task->req.bytes);
2415332e5d2SVladimir Sementsov-Ogievskiy 
242d088e6a4SVladimir Sementsov-Ogievskiy     task->s->in_flight_bytes -= task->req.bytes - new_bytes;
2431348a657SVladimir Sementsov-Ogievskiy     bdrv_set_dirty_bitmap(task->s->copy_bitmap,
244d088e6a4SVladimir Sementsov-Ogievskiy                           task->req.offset + new_bytes,
245d088e6a4SVladimir Sementsov-Ogievskiy                           task->req.bytes - new_bytes);
2465332e5d2SVladimir Sementsov-Ogievskiy 
247d088e6a4SVladimir Sementsov-Ogievskiy     reqlist_shrink_req(&task->req, new_bytes);
2485332e5d2SVladimir Sementsov-Ogievskiy }
2495332e5d2SVladimir Sementsov-Ogievskiy 
block_copy_task_end(BlockCopyTask * task,int ret)2501348a657SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
2515332e5d2SVladimir Sementsov-Ogievskiy {
252d0c389d2SEmanuele Giuseppe Esposito     QEMU_LOCK_GUARD(&task->s->lock);
253d088e6a4SVladimir Sementsov-Ogievskiy     task->s->in_flight_bytes -= task->req.bytes;
2545332e5d2SVladimir Sementsov-Ogievskiy     if (ret < 0) {
255d088e6a4SVladimir Sementsov-Ogievskiy         bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->req.offset,
256d088e6a4SVladimir Sementsov-Ogievskiy                               task->req.bytes);
2575332e5d2SVladimir Sementsov-Ogievskiy     }
258201b4bb6SVladimir Sementsov-Ogievskiy     if (task->s->progress) {
259e3dd339fSEmanuele Giuseppe Esposito         progress_set_remaining(task->s->progress,
260e3dd339fSEmanuele Giuseppe Esposito                                bdrv_get_dirty_count(task->s->copy_bitmap) +
261e3dd339fSEmanuele Giuseppe Esposito                                task->s->in_flight_bytes);
262201b4bb6SVladimir Sementsov-Ogievskiy     }
263d088e6a4SVladimir Sementsov-Ogievskiy     reqlist_remove_req(&task->req);
264a6ffe199SVladimir Sementsov-Ogievskiy }
265a6ffe199SVladimir Sementsov-Ogievskiy 
block_copy_state_free(BlockCopyState * s)266beb5f545SVladimir Sementsov-Ogievskiy void block_copy_state_free(BlockCopyState *s)
267beb5f545SVladimir Sementsov-Ogievskiy {
268beb5f545SVladimir Sementsov-Ogievskiy     if (!s) {
269beb5f545SVladimir Sementsov-Ogievskiy         return;
270beb5f545SVladimir Sementsov-Ogievskiy     }
271beb5f545SVladimir Sementsov-Ogievskiy 
2724951967dSPaolo Bonzini     ratelimit_destroy(&s->rate_limit);
2735deb6cbdSVladimir Sementsov-Ogievskiy     bdrv_release_dirty_bitmap(s->copy_bitmap);
2747f739d0eSVladimir Sementsov-Ogievskiy     shres_destroy(s->mem);
275beb5f545SVladimir Sementsov-Ogievskiy     g_free(s);
276beb5f545SVladimir Sementsov-Ogievskiy }
277beb5f545SVladimir Sementsov-Ogievskiy 
block_copy_max_transfer(BdrvChild * source,BdrvChild * target)2789d31bc53SVladimir Sementsov-Ogievskiy static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
2799d31bc53SVladimir Sementsov-Ogievskiy {
2809d31bc53SVladimir Sementsov-Ogievskiy     return MIN_NON_ZERO(INT_MAX,
2819d31bc53SVladimir Sementsov-Ogievskiy                         MIN_NON_ZERO(source->bs->bl.max_transfer,
2829d31bc53SVladimir Sementsov-Ogievskiy                                      target->bs->bl.max_transfer));
2839d31bc53SVladimir Sementsov-Ogievskiy }
2849d31bc53SVladimir Sementsov-Ogievskiy 
block_copy_set_copy_opts(BlockCopyState * s,bool use_copy_range,bool compress)285f8b9504bSVladimir Sementsov-Ogievskiy void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
286f8b9504bSVladimir Sementsov-Ogievskiy                               bool compress)
287f8b9504bSVladimir Sementsov-Ogievskiy {
288f8b9504bSVladimir Sementsov-Ogievskiy     /* Keep BDRV_REQ_SERIALISING set (or not set) in block_copy_state_new() */
289f8b9504bSVladimir Sementsov-Ogievskiy     s->write_flags = (s->write_flags & BDRV_REQ_SERIALISING) |
290f8b9504bSVladimir Sementsov-Ogievskiy         (compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
291f8b9504bSVladimir Sementsov-Ogievskiy 
292f8b9504bSVladimir Sementsov-Ogievskiy     if (s->max_transfer < s->cluster_size) {
293f8b9504bSVladimir Sementsov-Ogievskiy         /*
294f8b9504bSVladimir Sementsov-Ogievskiy          * copy_range does not respect max_transfer. We don't want to bother
295f8b9504bSVladimir Sementsov-Ogievskiy          * with requests smaller than block-copy cluster size, so fallback to
296f8b9504bSVladimir Sementsov-Ogievskiy          * buffered copying (read and write respect max_transfer on their
297f8b9504bSVladimir Sementsov-Ogievskiy          * behalf).
298f8b9504bSVladimir Sementsov-Ogievskiy          */
299f8b9504bSVladimir Sementsov-Ogievskiy         s->method = COPY_READ_WRITE_CLUSTER;
300f8b9504bSVladimir Sementsov-Ogievskiy     } else if (compress) {
301f8b9504bSVladimir Sementsov-Ogievskiy         /* Compression supports only cluster-size writes and no copy-range. */
302f8b9504bSVladimir Sementsov-Ogievskiy         s->method = COPY_READ_WRITE_CLUSTER;
303f8b9504bSVladimir Sementsov-Ogievskiy     } else {
304f8b9504bSVladimir Sementsov-Ogievskiy         /*
305f8b9504bSVladimir Sementsov-Ogievskiy          * If copy range enabled, start with COPY_RANGE_SMALL, until first
306f8b9504bSVladimir Sementsov-Ogievskiy          * successful copy_range (look at block_copy_do_copy).
307f8b9504bSVladimir Sementsov-Ogievskiy          */
308f8b9504bSVladimir Sementsov-Ogievskiy         s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
309f8b9504bSVladimir Sementsov-Ogievskiy     }
310f8b9504bSVladimir Sementsov-Ogievskiy }
311f8b9504bSVladimir Sementsov-Ogievskiy 
block_copy_calculate_cluster_size(BlockDriverState * target,int64_t min_cluster_size,Error ** errp)312b518e9e9SVladimir Sementsov-Ogievskiy static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
3139484ad6cSFiona Ebner                                                  int64_t min_cluster_size,
314b518e9e9SVladimir Sementsov-Ogievskiy                                                  Error **errp)
315b518e9e9SVladimir Sementsov-Ogievskiy {
316b518e9e9SVladimir Sementsov-Ogievskiy     int ret;
317b518e9e9SVladimir Sementsov-Ogievskiy     BlockDriverInfo bdi;
318ad74751fSKevin Wolf     bool target_does_cow;
319ad74751fSKevin Wolf 
320ad74751fSKevin Wolf     GLOBAL_STATE_CODE();
321ad74751fSKevin Wolf     GRAPH_RDLOCK_GUARD_MAINLOOP();
322ad74751fSKevin Wolf 
3239484ad6cSFiona Ebner     min_cluster_size = MAX(min_cluster_size,
3249484ad6cSFiona Ebner                            (int64_t)BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
3259484ad6cSFiona Ebner 
326ad74751fSKevin Wolf     target_does_cow = bdrv_backing_chain_next(target);
327b518e9e9SVladimir Sementsov-Ogievskiy 
328b518e9e9SVladimir Sementsov-Ogievskiy     /*
329b518e9e9SVladimir Sementsov-Ogievskiy      * If there is no backing file on the target, we cannot rely on COW if our
330b518e9e9SVladimir Sementsov-Ogievskiy      * backup cluster size is smaller than the target cluster size. Even for
331b518e9e9SVladimir Sementsov-Ogievskiy      * targets with a backing file, try to avoid COW if possible.
332b518e9e9SVladimir Sementsov-Ogievskiy      */
333b518e9e9SVladimir Sementsov-Ogievskiy     ret = bdrv_get_info(target, &bdi);
334b518e9e9SVladimir Sementsov-Ogievskiy     if (ret == -ENOTSUP && !target_does_cow) {
335b518e9e9SVladimir Sementsov-Ogievskiy         /* Cluster size is not defined */
3369484ad6cSFiona Ebner         warn_report("The target block device doesn't provide information about "
3379484ad6cSFiona Ebner                     "the block size and it doesn't have a backing file. The "
3389484ad6cSFiona Ebner                     "(default) block size of %" PRIi64 " bytes is used. If the "
3399484ad6cSFiona Ebner                     "actual block size of the target exceeds this value, the "
3409484ad6cSFiona Ebner                     "backup may be unusable",
3419484ad6cSFiona Ebner                     min_cluster_size);
3429484ad6cSFiona Ebner         return min_cluster_size;
343b518e9e9SVladimir Sementsov-Ogievskiy     } else if (ret < 0 && !target_does_cow) {
344b518e9e9SVladimir Sementsov-Ogievskiy         error_setg_errno(errp, -ret,
345b518e9e9SVladimir Sementsov-Ogievskiy             "Couldn't determine the cluster size of the target image, "
346b518e9e9SVladimir Sementsov-Ogievskiy             "which has no backing file");
347b518e9e9SVladimir Sementsov-Ogievskiy         error_append_hint(errp,
348b518e9e9SVladimir Sementsov-Ogievskiy             "Aborting, since this may create an unusable destination image\n");
349b518e9e9SVladimir Sementsov-Ogievskiy         return ret;
350b518e9e9SVladimir Sementsov-Ogievskiy     } else if (ret < 0 && target_does_cow) {
351b518e9e9SVladimir Sementsov-Ogievskiy         /* Not fatal; just trudge on ahead. */
3529484ad6cSFiona Ebner         return min_cluster_size;
353b518e9e9SVladimir Sementsov-Ogievskiy     }
354b518e9e9SVladimir Sementsov-Ogievskiy 
3559484ad6cSFiona Ebner     return MAX(min_cluster_size, bdi.cluster_size);
356b518e9e9SVladimir Sementsov-Ogievskiy }
357b518e9e9SVladimir Sementsov-Ogievskiy 
block_copy_state_new(BdrvChild * source,BdrvChild * target,BlockDriverState * copy_bitmap_bs,const BdrvDirtyBitmap * bitmap,bool discard_source,uint64_t min_cluster_size,Error ** errp)35800e30f05SVladimir Sementsov-Ogievskiy BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
359006e845bSVladimir Sementsov-Ogievskiy                                      BlockDriverState *copy_bitmap_bs,
3601f7252e8SVladimir Sementsov-Ogievskiy                                      const BdrvDirtyBitmap *bitmap,
3610fd05c8dSVladimir Sementsov-Ogievskiy                                      bool discard_source,
3629484ad6cSFiona Ebner                                      uint64_t min_cluster_size,
363abde8ac2SVladimir Sementsov-Ogievskiy                                      Error **errp)
364beb5f545SVladimir Sementsov-Ogievskiy {
3651f7252e8SVladimir Sementsov-Ogievskiy     ERRP_GUARD();
366beb5f545SVladimir Sementsov-Ogievskiy     BlockCopyState *s;
367b518e9e9SVladimir Sementsov-Ogievskiy     int64_t cluster_size;
368beb5f545SVladimir Sementsov-Ogievskiy     BdrvDirtyBitmap *copy_bitmap;
36949577723SVladimir Sementsov-Ogievskiy     bool is_fleecing;
370beb5f545SVladimir Sementsov-Ogievskiy 
371ad74751fSKevin Wolf     GLOBAL_STATE_CODE();
372ad74751fSKevin Wolf 
3739484ad6cSFiona Ebner     if (min_cluster_size > INT64_MAX) {
3749484ad6cSFiona Ebner         error_setg(errp, "min-cluster-size too large: %" PRIu64 " > %" PRIi64,
3759484ad6cSFiona Ebner                    min_cluster_size, INT64_MAX);
3769484ad6cSFiona Ebner         return NULL;
3779484ad6cSFiona Ebner     } else if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
3789484ad6cSFiona Ebner         error_setg(errp, "min-cluster-size needs to be a power of 2");
3799484ad6cSFiona Ebner         return NULL;
3809484ad6cSFiona Ebner     }
3819484ad6cSFiona Ebner 
3829484ad6cSFiona Ebner     cluster_size = block_copy_calculate_cluster_size(target->bs,
3839484ad6cSFiona Ebner                                                      (int64_t)min_cluster_size,
3849484ad6cSFiona Ebner                                                      errp);
385b518e9e9SVladimir Sementsov-Ogievskiy     if (cluster_size < 0) {
386b518e9e9SVladimir Sementsov-Ogievskiy         return NULL;
387b518e9e9SVladimir Sementsov-Ogievskiy     }
388b518e9e9SVladimir Sementsov-Ogievskiy 
389006e845bSVladimir Sementsov-Ogievskiy     copy_bitmap = bdrv_create_dirty_bitmap(copy_bitmap_bs, cluster_size, NULL,
39000e30f05SVladimir Sementsov-Ogievskiy                                            errp);
391beb5f545SVladimir Sementsov-Ogievskiy     if (!copy_bitmap) {
392beb5f545SVladimir Sementsov-Ogievskiy         return NULL;
393beb5f545SVladimir Sementsov-Ogievskiy     }
394beb5f545SVladimir Sementsov-Ogievskiy     bdrv_disable_dirty_bitmap(copy_bitmap);
3951f7252e8SVladimir Sementsov-Ogievskiy     if (bitmap) {
3961f7252e8SVladimir Sementsov-Ogievskiy         if (!bdrv_merge_dirty_bitmap(copy_bitmap, bitmap, NULL, errp)) {
3971f7252e8SVladimir Sementsov-Ogievskiy             error_prepend(errp, "Failed to merge bitmap '%s' to internal "
3981f7252e8SVladimir Sementsov-Ogievskiy                           "copy-bitmap: ", bdrv_dirty_bitmap_name(bitmap));
3991f7252e8SVladimir Sementsov-Ogievskiy             bdrv_release_dirty_bitmap(copy_bitmap);
4001f7252e8SVladimir Sementsov-Ogievskiy             return NULL;
4011f7252e8SVladimir Sementsov-Ogievskiy         }
4021f7252e8SVladimir Sementsov-Ogievskiy     } else {
4031f7252e8SVladimir Sementsov-Ogievskiy         bdrv_set_dirty_bitmap(copy_bitmap, 0,
4041f7252e8SVladimir Sementsov-Ogievskiy                               bdrv_dirty_bitmap_size(copy_bitmap));
4051f7252e8SVladimir Sementsov-Ogievskiy     }
406beb5f545SVladimir Sementsov-Ogievskiy 
40749577723SVladimir Sementsov-Ogievskiy     /*
40849577723SVladimir Sementsov-Ogievskiy      * If source is in backing chain of target assume that target is going to be
40949577723SVladimir Sementsov-Ogievskiy      * used for "image fleecing", i.e. it should represent a kind of snapshot of
41049577723SVladimir Sementsov-Ogievskiy      * source at backup-start point in time. And target is going to be read by
41149577723SVladimir Sementsov-Ogievskiy      * somebody (for example, used as NBD export) during backup job.
41249577723SVladimir Sementsov-Ogievskiy      *
41349577723SVladimir Sementsov-Ogievskiy      * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
41449577723SVladimir Sementsov-Ogievskiy      * intersection of backup writes and third party reads from target,
41549577723SVladimir Sementsov-Ogievskiy      * otherwise reading from target we may occasionally read already updated by
41649577723SVladimir Sementsov-Ogievskiy      * guest data.
41749577723SVladimir Sementsov-Ogievskiy      *
41849577723SVladimir Sementsov-Ogievskiy      * For more information see commit f8d59dfb40bb and test
41949577723SVladimir Sementsov-Ogievskiy      * tests/qemu-iotests/222
42049577723SVladimir Sementsov-Ogievskiy      */
42179bb7627SKevin Wolf     bdrv_graph_rdlock_main_loop();
42249577723SVladimir Sementsov-Ogievskiy     is_fleecing = bdrv_chain_contains(target->bs, source->bs);
42379bb7627SKevin Wolf     bdrv_graph_rdunlock_main_loop();
42449577723SVladimir Sementsov-Ogievskiy 
425beb5f545SVladimir Sementsov-Ogievskiy     s = g_new(BlockCopyState, 1);
426beb5f545SVladimir Sementsov-Ogievskiy     *s = (BlockCopyState) {
42700e30f05SVladimir Sementsov-Ogievskiy         .source = source,
42800e30f05SVladimir Sementsov-Ogievskiy         .target = target,
429beb5f545SVladimir Sementsov-Ogievskiy         .copy_bitmap = copy_bitmap,
430beb5f545SVladimir Sementsov-Ogievskiy         .cluster_size = cluster_size,
431beb5f545SVladimir Sementsov-Ogievskiy         .len = bdrv_dirty_bitmap_size(copy_bitmap),
432f8b9504bSVladimir Sementsov-Ogievskiy         .write_flags = (is_fleecing ? BDRV_REQ_SERIALISING : 0),
4337f739d0eSVladimir Sementsov-Ogievskiy         .mem = shres_create(BLOCK_COPY_MAX_MEM),
43405d5e12bSPaolo Bonzini         .max_transfer = QEMU_ALIGN_DOWN(
43505d5e12bSPaolo Bonzini                                     block_copy_max_transfer(source, target),
43605d5e12bSPaolo Bonzini                                     cluster_size),
437beb5f545SVladimir Sementsov-Ogievskiy     };
438beb5f545SVladimir Sementsov-Ogievskiy 
4390fd05c8dSVladimir Sementsov-Ogievskiy     s->discard_source = discard_source;
440abde8ac2SVladimir Sementsov-Ogievskiy     block_copy_set_copy_opts(s, false, false);
441beb5f545SVladimir Sementsov-Ogievskiy 
4424951967dSPaolo Bonzini     ratelimit_init(&s->rate_limit);
443d0c389d2SEmanuele Giuseppe Esposito     qemu_co_mutex_init(&s->lock);
444d088e6a4SVladimir Sementsov-Ogievskiy     QLIST_INIT(&s->reqs);
4452e099a9dSVladimir Sementsov-Ogievskiy     QLIST_INIT(&s->calls);
446a6ffe199SVladimir Sementsov-Ogievskiy 
447beb5f545SVladimir Sementsov-Ogievskiy     return s;
448beb5f545SVladimir Sementsov-Ogievskiy }
449beb5f545SVladimir Sementsov-Ogievskiy 
450d0c389d2SEmanuele Giuseppe Esposito /* Only set before running the job, no need for locking. */
block_copy_set_progress_meter(BlockCopyState * s,ProgressMeter * pm)451d0ebeca1SVladimir Sementsov-Ogievskiy void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
452d0ebeca1SVladimir Sementsov-Ogievskiy {
453d0ebeca1SVladimir Sementsov-Ogievskiy     s->progress = pm;
454d0ebeca1SVladimir Sementsov-Ogievskiy }
455d0ebeca1SVladimir Sementsov-Ogievskiy 
456beb5f545SVladimir Sementsov-Ogievskiy /*
4574ce5dd3eSVladimir Sementsov-Ogievskiy  * Takes ownership of @task
4584ce5dd3eSVladimir Sementsov-Ogievskiy  *
4594ce5dd3eSVladimir Sementsov-Ogievskiy  * If pool is NULL directly run the task, otherwise schedule it into the pool.
4604ce5dd3eSVladimir Sementsov-Ogievskiy  *
4614ce5dd3eSVladimir Sementsov-Ogievskiy  * Returns: task.func return code if pool is NULL
4624ce5dd3eSVladimir Sementsov-Ogievskiy  *          otherwise -ECANCELED if pool status is bad
4634ce5dd3eSVladimir Sementsov-Ogievskiy  *          otherwise 0 (successfully scheduled)
4644ce5dd3eSVladimir Sementsov-Ogievskiy  */
block_copy_task_run(AioTaskPool * pool,BlockCopyTask * task)4654ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
4664ce5dd3eSVladimir Sementsov-Ogievskiy                                             BlockCopyTask *task)
4674ce5dd3eSVladimir Sementsov-Ogievskiy {
4684ce5dd3eSVladimir Sementsov-Ogievskiy     if (!pool) {
4694ce5dd3eSVladimir Sementsov-Ogievskiy         int ret = task->task.func(&task->task);
4704ce5dd3eSVladimir Sementsov-Ogievskiy 
4714ce5dd3eSVladimir Sementsov-Ogievskiy         g_free(task);
4724ce5dd3eSVladimir Sementsov-Ogievskiy         return ret;
4734ce5dd3eSVladimir Sementsov-Ogievskiy     }
4744ce5dd3eSVladimir Sementsov-Ogievskiy 
4754ce5dd3eSVladimir Sementsov-Ogievskiy     aio_task_pool_wait_slot(pool);
4764ce5dd3eSVladimir Sementsov-Ogievskiy     if (aio_task_pool_status(pool) < 0) {
477d088e6a4SVladimir Sementsov-Ogievskiy         co_put_to_shres(task->s->mem, task->req.bytes);
4784ce5dd3eSVladimir Sementsov-Ogievskiy         block_copy_task_end(task, -ECANCELED);
4794ce5dd3eSVladimir Sementsov-Ogievskiy         g_free(task);
4804ce5dd3eSVladimir Sementsov-Ogievskiy         return -ECANCELED;
4814ce5dd3eSVladimir Sementsov-Ogievskiy     }
4824ce5dd3eSVladimir Sementsov-Ogievskiy 
4834ce5dd3eSVladimir Sementsov-Ogievskiy     aio_task_pool_start_task(pool, &task->task);
4844ce5dd3eSVladimir Sementsov-Ogievskiy 
4854ce5dd3eSVladimir Sementsov-Ogievskiy     return 0;
4864ce5dd3eSVladimir Sementsov-Ogievskiy }
4874ce5dd3eSVladimir Sementsov-Ogievskiy 
4884ce5dd3eSVladimir Sementsov-Ogievskiy /*
489e332a726SVladimir Sementsov-Ogievskiy  * block_copy_do_copy
490e332a726SVladimir Sementsov-Ogievskiy  *
491dafaf135SVladimir Sementsov-Ogievskiy  * Do copy of cluster-aligned chunk. Requested region is allowed to exceed
492dafaf135SVladimir Sementsov-Ogievskiy  * s->len only to cover last cluster when s->len is not aligned to clusters.
493e332a726SVladimir Sementsov-Ogievskiy  *
4943202d8e4SMichael Tokarev  * No sync here: neither bitmap nor intersecting requests handling, only copy.
495e332a726SVladimir Sementsov-Ogievskiy  *
49605d5e12bSPaolo Bonzini  * @method is an in-out argument, so that copy_range can be either extended to
49705d5e12bSPaolo Bonzini  * a full-size buffer or disabled if the copy_range attempt fails.  The output
49805d5e12bSPaolo Bonzini  * value of @method should be used for subsequent tasks.
499e332a726SVladimir Sementsov-Ogievskiy  * Returns 0 on success.
500beb5f545SVladimir Sementsov-Ogievskiy  */
501abaf8b75SKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_do_copy(BlockCopyState * s,int64_t offset,int64_t bytes,BlockCopyMethod * method,bool * error_is_read)502abaf8b75SKevin Wolf block_copy_do_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
503abaf8b75SKevin Wolf                    BlockCopyMethod *method, bool *error_is_read)
504beb5f545SVladimir Sementsov-Ogievskiy {
505beb5f545SVladimir Sementsov-Ogievskiy     int ret;
5068719091fSVladimir Sementsov-Ogievskiy     int64_t nbytes = MIN(offset + bytes, s->len) - offset;
507e332a726SVladimir Sementsov-Ogievskiy     void *bounce_buffer = NULL;
508beb5f545SVladimir Sementsov-Ogievskiy 
5098719091fSVladimir Sementsov-Ogievskiy     assert(offset >= 0 && bytes > 0 && INT64_MAX - offset >= bytes);
5108719091fSVladimir Sementsov-Ogievskiy     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
511dafaf135SVladimir Sementsov-Ogievskiy     assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
5128719091fSVladimir Sementsov-Ogievskiy     assert(offset < s->len);
5138719091fSVladimir Sementsov-Ogievskiy     assert(offset + bytes <= s->len ||
5148719091fSVladimir Sementsov-Ogievskiy            offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
515dafaf135SVladimir Sementsov-Ogievskiy     assert(nbytes < INT_MAX);
516e332a726SVladimir Sementsov-Ogievskiy 
51705d5e12bSPaolo Bonzini     switch (*method) {
51805d5e12bSPaolo Bonzini     case COPY_WRITE_ZEROES:
5198719091fSVladimir Sementsov-Ogievskiy         ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
5202d57511aSVladimir Sementsov-Ogievskiy                                     ~BDRV_REQ_WRITE_COMPRESSED);
5212d57511aSVladimir Sementsov-Ogievskiy         if (ret < 0) {
5228719091fSVladimir Sementsov-Ogievskiy             trace_block_copy_write_zeroes_fail(s, offset, ret);
5232d57511aSVladimir Sementsov-Ogievskiy             *error_is_read = false;
5242d57511aSVladimir Sementsov-Ogievskiy         }
5252d57511aSVladimir Sementsov-Ogievskiy         return ret;
5262d57511aSVladimir Sementsov-Ogievskiy 
52705d5e12bSPaolo Bonzini     case COPY_RANGE_SMALL:
52805d5e12bSPaolo Bonzini     case COPY_RANGE_FULL:
5298719091fSVladimir Sementsov-Ogievskiy         ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
530e332a726SVladimir Sementsov-Ogievskiy                                  0, s->write_flags);
53105d5e12bSPaolo Bonzini         if (ret >= 0) {
53205d5e12bSPaolo Bonzini             /* Successful copy-range, increase chunk size.  */
53305d5e12bSPaolo Bonzini             *method = COPY_RANGE_FULL;
534bed95234SVladimir Sementsov-Ogievskiy             return 0;
535e332a726SVladimir Sementsov-Ogievskiy         }
536e332a726SVladimir Sementsov-Ogievskiy 
53705d5e12bSPaolo Bonzini         trace_block_copy_copy_range_fail(s, offset, ret);
53805d5e12bSPaolo Bonzini         *method = COPY_READ_WRITE;
53905d5e12bSPaolo Bonzini         /* Fall through to read+write with allocated buffer */
54005d5e12bSPaolo Bonzini 
54105d5e12bSPaolo Bonzini     case COPY_READ_WRITE_CLUSTER:
54205d5e12bSPaolo Bonzini     case COPY_READ_WRITE:
5430e240245SVladimir Sementsov-Ogievskiy         /*
54405d5e12bSPaolo Bonzini          * In case of failed copy_range request above, we may proceed with
54505d5e12bSPaolo Bonzini          * buffered request larger than BLOCK_COPY_MAX_BUFFER.
54605d5e12bSPaolo Bonzini          * Still, further requests will be properly limited, so don't care too
54705d5e12bSPaolo Bonzini          * much. Moreover the most likely case (copy_range is unsupported for
54805d5e12bSPaolo Bonzini          * the configuration, so the very first copy_range request fails)
54905d5e12bSPaolo Bonzini          * is handled by setting large copy_size only after first successful
55005d5e12bSPaolo Bonzini          * copy_range.
5510e240245SVladimir Sementsov-Ogievskiy          */
5520e240245SVladimir Sementsov-Ogievskiy 
553e332a726SVladimir Sementsov-Ogievskiy         bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
554beb5f545SVladimir Sementsov-Ogievskiy 
5558719091fSVladimir Sementsov-Ogievskiy         ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
556beb5f545SVladimir Sementsov-Ogievskiy         if (ret < 0) {
5578719091fSVladimir Sementsov-Ogievskiy             trace_block_copy_read_fail(s, offset, ret);
558beb5f545SVladimir Sementsov-Ogievskiy             *error_is_read = true;
559e332a726SVladimir Sementsov-Ogievskiy             goto out;
560beb5f545SVladimir Sementsov-Ogievskiy         }
561beb5f545SVladimir Sementsov-Ogievskiy 
5628719091fSVladimir Sementsov-Ogievskiy         ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
563beb5f545SVladimir Sementsov-Ogievskiy                              s->write_flags);
564beb5f545SVladimir Sementsov-Ogievskiy         if (ret < 0) {
5658719091fSVladimir Sementsov-Ogievskiy             trace_block_copy_write_fail(s, offset, ret);
566beb5f545SVladimir Sementsov-Ogievskiy             *error_is_read = false;
567e332a726SVladimir Sementsov-Ogievskiy             goto out;
568beb5f545SVladimir Sementsov-Ogievskiy         }
569beb5f545SVladimir Sementsov-Ogievskiy 
570e332a726SVladimir Sementsov-Ogievskiy     out:
5713816edd2SVladimir Sementsov-Ogievskiy         qemu_vfree(bounce_buffer);
57205d5e12bSPaolo Bonzini         break;
57305d5e12bSPaolo Bonzini 
57405d5e12bSPaolo Bonzini     default:
57505d5e12bSPaolo Bonzini         abort();
57605d5e12bSPaolo Bonzini     }
5773816edd2SVladimir Sementsov-Ogievskiy 
578beb5f545SVladimir Sementsov-Ogievskiy     return ret;
579beb5f545SVladimir Sementsov-Ogievskiy }
580beb5f545SVladimir Sementsov-Ogievskiy 
block_copy_task_entry(AioTask * task)5814ce5dd3eSVladimir Sementsov-Ogievskiy static coroutine_fn int block_copy_task_entry(AioTask *task)
5824ce5dd3eSVladimir Sementsov-Ogievskiy {
5834ce5dd3eSVladimir Sementsov-Ogievskiy     BlockCopyTask *t = container_of(task, BlockCopyTask, task);
584c6a3e3dfSEmanuele Giuseppe Esposito     BlockCopyState *s = t->s;
585c78dd00eSPhilippe Mathieu-Daudé     bool error_is_read = false;
58605d5e12bSPaolo Bonzini     BlockCopyMethod method = t->method;
587*ae11f6caSMarc-André Lureau     int ret = -1;
5884ce5dd3eSVladimir Sementsov-Ogievskiy 
589abaf8b75SKevin Wolf     WITH_GRAPH_RDLOCK_GUARD() {
590d088e6a4SVladimir Sementsov-Ogievskiy         ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method,
591d088e6a4SVladimir Sementsov-Ogievskiy                                  &error_is_read);
592abaf8b75SKevin Wolf     }
593d0c389d2SEmanuele Giuseppe Esposito 
594d0c389d2SEmanuele Giuseppe Esposito     WITH_QEMU_LOCK_GUARD(&s->lock) {
59505d5e12bSPaolo Bonzini         if (s->method == t->method) {
59605d5e12bSPaolo Bonzini             s->method = method;
597bed95234SVladimir Sementsov-Ogievskiy         }
598d0c389d2SEmanuele Giuseppe Esposito 
5998146b357SVladimir Sementsov-Ogievskiy         if (ret < 0) {
6008146b357SVladimir Sementsov-Ogievskiy             if (!t->call_state->ret) {
601de4641b4SVladimir Sementsov-Ogievskiy                 t->call_state->ret = ret;
6024ce5dd3eSVladimir Sementsov-Ogievskiy                 t->call_state->error_is_read = error_is_read;
6038146b357SVladimir Sementsov-Ogievskiy             }
604201b4bb6SVladimir Sementsov-Ogievskiy         } else if (s->progress) {
605d088e6a4SVladimir Sementsov-Ogievskiy             progress_work_done(s->progress, t->req.bytes);
606d51590fcSVladimir Sementsov-Ogievskiy         }
607d0c389d2SEmanuele Giuseppe Esposito     }
608d088e6a4SVladimir Sementsov-Ogievskiy     co_put_to_shres(s->mem, t->req.bytes);
6094ce5dd3eSVladimir Sementsov-Ogievskiy     block_copy_task_end(t, ret);
6104ce5dd3eSVladimir Sementsov-Ogievskiy 
6110fd05c8dSVladimir Sementsov-Ogievskiy     if (s->discard_source && ret == 0) {
6120fd05c8dSVladimir Sementsov-Ogievskiy         int64_t nbytes =
6130fd05c8dSVladimir Sementsov-Ogievskiy             MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
614d5f6cbb2SKevin Wolf         WITH_GRAPH_RDLOCK_GUARD() {
6150fd05c8dSVladimir Sementsov-Ogievskiy             bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
6160fd05c8dSVladimir Sementsov-Ogievskiy         }
617d5f6cbb2SKevin Wolf     }
6180fd05c8dSVladimir Sementsov-Ogievskiy 
6194ce5dd3eSVladimir Sementsov-Ogievskiy     return ret;
6204ce5dd3eSVladimir Sementsov-Ogievskiy }
6214ce5dd3eSVladimir Sementsov-Ogievskiy 
6227ff9579eSKevin Wolf static coroutine_fn GRAPH_RDLOCK
block_copy_block_status(BlockCopyState * s,int64_t offset,int64_t bytes,int64_t * pnum)6237ff9579eSKevin Wolf int block_copy_block_status(BlockCopyState *s, int64_t offset, int64_t bytes,
6247ff9579eSKevin Wolf                             int64_t *pnum)
6252d57511aSVladimir Sementsov-Ogievskiy {
6262d57511aSVladimir Sementsov-Ogievskiy     int64_t num;
6272d57511aSVladimir Sementsov-Ogievskiy     BlockDriverState *base;
6282d57511aSVladimir Sementsov-Ogievskiy     int ret;
6292d57511aSVladimir Sementsov-Ogievskiy 
630d0c389d2SEmanuele Giuseppe Esposito     if (qatomic_read(&s->skip_unallocated)) {
631c6f6d846SMax Reitz         base = bdrv_backing_chain_next(s->source->bs);
6322d57511aSVladimir Sementsov-Ogievskiy     } else {
6332d57511aSVladimir Sementsov-Ogievskiy         base = NULL;
6342d57511aSVladimir Sementsov-Ogievskiy     }
6352d57511aSVladimir Sementsov-Ogievskiy 
63643a0d4f0SEmanuele Giuseppe Esposito     ret = bdrv_co_block_status_above(s->source->bs, base, offset, bytes, &num,
6372d57511aSVladimir Sementsov-Ogievskiy                                      NULL, NULL);
6382d57511aSVladimir Sementsov-Ogievskiy     if (ret < 0 || num < s->cluster_size) {
6392d57511aSVladimir Sementsov-Ogievskiy         /*
6402d57511aSVladimir Sementsov-Ogievskiy          * On error or if failed to obtain large enough chunk just fallback to
6412d57511aSVladimir Sementsov-Ogievskiy          * copy one cluster.
6422d57511aSVladimir Sementsov-Ogievskiy          */
6432d57511aSVladimir Sementsov-Ogievskiy         num = s->cluster_size;
6442d57511aSVladimir Sementsov-Ogievskiy         ret = BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_DATA;
6452d57511aSVladimir Sementsov-Ogievskiy     } else if (offset + num == s->len) {
6462d57511aSVladimir Sementsov-Ogievskiy         num = QEMU_ALIGN_UP(num, s->cluster_size);
6472d57511aSVladimir Sementsov-Ogievskiy     } else {
6482d57511aSVladimir Sementsov-Ogievskiy         num = QEMU_ALIGN_DOWN(num, s->cluster_size);
6492d57511aSVladimir Sementsov-Ogievskiy     }
6502d57511aSVladimir Sementsov-Ogievskiy 
6512d57511aSVladimir Sementsov-Ogievskiy     *pnum = num;
6522d57511aSVladimir Sementsov-Ogievskiy     return ret;
6532d57511aSVladimir Sementsov-Ogievskiy }
6542d57511aSVladimir Sementsov-Ogievskiy 
655beb5f545SVladimir Sementsov-Ogievskiy /*
656beb5f545SVladimir Sementsov-Ogievskiy  * Check if the cluster starting at offset is allocated or not.
657beb5f545SVladimir Sementsov-Ogievskiy  * return via pnum the number of contiguous clusters sharing this allocation.
658beb5f545SVladimir Sementsov-Ogievskiy  */
6597ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_is_cluster_allocated(BlockCopyState * s,int64_t offset,int64_t * pnum)6607ff9579eSKevin Wolf block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
661beb5f545SVladimir Sementsov-Ogievskiy                                 int64_t *pnum)
662beb5f545SVladimir Sementsov-Ogievskiy {
66300e30f05SVladimir Sementsov-Ogievskiy     BlockDriverState *bs = s->source->bs;
664beb5f545SVladimir Sementsov-Ogievskiy     int64_t count, total_count = 0;
665beb5f545SVladimir Sementsov-Ogievskiy     int64_t bytes = s->len - offset;
666beb5f545SVladimir Sementsov-Ogievskiy     int ret;
667beb5f545SVladimir Sementsov-Ogievskiy 
668beb5f545SVladimir Sementsov-Ogievskiy     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
669beb5f545SVladimir Sementsov-Ogievskiy 
670beb5f545SVladimir Sementsov-Ogievskiy     while (true) {
6717ff9579eSKevin Wolf         /* protected in backup_run() */
67243a0d4f0SEmanuele Giuseppe Esposito         ret = bdrv_co_is_allocated(bs, offset, bytes, &count);
673beb5f545SVladimir Sementsov-Ogievskiy         if (ret < 0) {
674beb5f545SVladimir Sementsov-Ogievskiy             return ret;
675beb5f545SVladimir Sementsov-Ogievskiy         }
676beb5f545SVladimir Sementsov-Ogievskiy 
677beb5f545SVladimir Sementsov-Ogievskiy         total_count += count;
678beb5f545SVladimir Sementsov-Ogievskiy 
679beb5f545SVladimir Sementsov-Ogievskiy         if (ret || count == 0) {
680beb5f545SVladimir Sementsov-Ogievskiy             /*
681beb5f545SVladimir Sementsov-Ogievskiy              * ret: partial segment(s) are considered allocated.
682beb5f545SVladimir Sementsov-Ogievskiy              * otherwise: unallocated tail is treated as an entire segment.
683beb5f545SVladimir Sementsov-Ogievskiy              */
684beb5f545SVladimir Sementsov-Ogievskiy             *pnum = DIV_ROUND_UP(total_count, s->cluster_size);
685beb5f545SVladimir Sementsov-Ogievskiy             return ret;
686beb5f545SVladimir Sementsov-Ogievskiy         }
687beb5f545SVladimir Sementsov-Ogievskiy 
688beb5f545SVladimir Sementsov-Ogievskiy         /* Unallocated segment(s) with uncertain following segment(s) */
689beb5f545SVladimir Sementsov-Ogievskiy         if (total_count >= s->cluster_size) {
690beb5f545SVladimir Sementsov-Ogievskiy             *pnum = total_count / s->cluster_size;
691beb5f545SVladimir Sementsov-Ogievskiy             return 0;
692beb5f545SVladimir Sementsov-Ogievskiy         }
693beb5f545SVladimir Sementsov-Ogievskiy 
694beb5f545SVladimir Sementsov-Ogievskiy         offset += count;
695beb5f545SVladimir Sementsov-Ogievskiy         bytes -= count;
696beb5f545SVladimir Sementsov-Ogievskiy     }
697beb5f545SVladimir Sementsov-Ogievskiy }
698beb5f545SVladimir Sementsov-Ogievskiy 
block_copy_reset(BlockCopyState * s,int64_t offset,int64_t bytes)699177541e6SVladimir Sementsov-Ogievskiy void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes)
700177541e6SVladimir Sementsov-Ogievskiy {
701177541e6SVladimir Sementsov-Ogievskiy     QEMU_LOCK_GUARD(&s->lock);
702177541e6SVladimir Sementsov-Ogievskiy 
703177541e6SVladimir Sementsov-Ogievskiy     bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
704177541e6SVladimir Sementsov-Ogievskiy     if (s->progress) {
705177541e6SVladimir Sementsov-Ogievskiy         progress_set_remaining(s->progress,
706177541e6SVladimir Sementsov-Ogievskiy                                bdrv_get_dirty_count(s->copy_bitmap) +
707177541e6SVladimir Sementsov-Ogievskiy                                s->in_flight_bytes);
708177541e6SVladimir Sementsov-Ogievskiy     }
709177541e6SVladimir Sementsov-Ogievskiy }
710177541e6SVladimir Sementsov-Ogievskiy 
711beb5f545SVladimir Sementsov-Ogievskiy /*
712beb5f545SVladimir Sementsov-Ogievskiy  * Reset bits in copy_bitmap starting at offset if they represent unallocated
713beb5f545SVladimir Sementsov-Ogievskiy  * data in the image. May reset subsequent contiguous bits.
714beb5f545SVladimir Sementsov-Ogievskiy  * @return 0 when the cluster at @offset was unallocated,
715beb5f545SVladimir Sementsov-Ogievskiy  *         1 otherwise, and -ret on error.
716beb5f545SVladimir Sementsov-Ogievskiy  */
block_copy_reset_unallocated(BlockCopyState * s,int64_t offset,int64_t * count)71743a0d4f0SEmanuele Giuseppe Esposito int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
71843a0d4f0SEmanuele Giuseppe Esposito                                                   int64_t offset,
71943a0d4f0SEmanuele Giuseppe Esposito                                                   int64_t *count)
720beb5f545SVladimir Sementsov-Ogievskiy {
721beb5f545SVladimir Sementsov-Ogievskiy     int ret;
722beb5f545SVladimir Sementsov-Ogievskiy     int64_t clusters, bytes;
723beb5f545SVladimir Sementsov-Ogievskiy 
724beb5f545SVladimir Sementsov-Ogievskiy     ret = block_copy_is_cluster_allocated(s, offset, &clusters);
725beb5f545SVladimir Sementsov-Ogievskiy     if (ret < 0) {
726beb5f545SVladimir Sementsov-Ogievskiy         return ret;
727beb5f545SVladimir Sementsov-Ogievskiy     }
728beb5f545SVladimir Sementsov-Ogievskiy 
729beb5f545SVladimir Sementsov-Ogievskiy     bytes = clusters * s->cluster_size;
730beb5f545SVladimir Sementsov-Ogievskiy 
731beb5f545SVladimir Sementsov-Ogievskiy     if (!ret) {
732177541e6SVladimir Sementsov-Ogievskiy         block_copy_reset(s, offset, bytes);
733beb5f545SVladimir Sementsov-Ogievskiy     }
734beb5f545SVladimir Sementsov-Ogievskiy 
735beb5f545SVladimir Sementsov-Ogievskiy     *count = bytes;
736beb5f545SVladimir Sementsov-Ogievskiy     return ret;
737beb5f545SVladimir Sementsov-Ogievskiy }
738beb5f545SVladimir Sementsov-Ogievskiy 
7395332e5d2SVladimir Sementsov-Ogievskiy /*
7405332e5d2SVladimir Sementsov-Ogievskiy  * block_copy_dirty_clusters
7415332e5d2SVladimir Sementsov-Ogievskiy  *
7425332e5d2SVladimir Sementsov-Ogievskiy  * Copy dirty clusters in @offset/@bytes range.
7435332e5d2SVladimir Sementsov-Ogievskiy  * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
7445332e5d2SVladimir Sementsov-Ogievskiy  * clusters found and -errno on failure.
7455332e5d2SVladimir Sementsov-Ogievskiy  */
7467ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_dirty_clusters(BlockCopyCallState * call_state)7473b8c2329SVladimir Sementsov-Ogievskiy block_copy_dirty_clusters(BlockCopyCallState *call_state)
748beb5f545SVladimir Sementsov-Ogievskiy {
7493b8c2329SVladimir Sementsov-Ogievskiy     BlockCopyState *s = call_state->s;
7503b8c2329SVladimir Sementsov-Ogievskiy     int64_t offset = call_state->offset;
7513b8c2329SVladimir Sementsov-Ogievskiy     int64_t bytes = call_state->bytes;
7523b8c2329SVladimir Sementsov-Ogievskiy 
753beb5f545SVladimir Sementsov-Ogievskiy     int ret = 0;
7545332e5d2SVladimir Sementsov-Ogievskiy     bool found_dirty = false;
75542ac2144SVladimir Sementsov-Ogievskiy     int64_t end = offset + bytes;
7564ce5dd3eSVladimir Sementsov-Ogievskiy     AioTaskPool *aio = NULL;
757beb5f545SVladimir Sementsov-Ogievskiy 
758beb5f545SVladimir Sementsov-Ogievskiy     /*
759beb5f545SVladimir Sementsov-Ogievskiy      * block_copy() user is responsible for keeping source and target in same
760beb5f545SVladimir Sementsov-Ogievskiy      * aio context
761beb5f545SVladimir Sementsov-Ogievskiy      */
76200e30f05SVladimir Sementsov-Ogievskiy     assert(bdrv_get_aio_context(s->source->bs) ==
76300e30f05SVladimir Sementsov-Ogievskiy            bdrv_get_aio_context(s->target->bs));
764beb5f545SVladimir Sementsov-Ogievskiy 
7658719091fSVladimir Sementsov-Ogievskiy     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
766dafaf135SVladimir Sementsov-Ogievskiy     assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
767beb5f545SVladimir Sementsov-Ogievskiy 
768149009beSEmanuele Giuseppe Esposito     while (bytes && aio_task_pool_status(aio) == 0 &&
769149009beSEmanuele Giuseppe Esposito            !qatomic_read(&call_state->cancelled)) {
7704ce5dd3eSVladimir Sementsov-Ogievskiy         BlockCopyTask *task;
77142ac2144SVladimir Sementsov-Ogievskiy         int64_t status_bytes;
772beb5f545SVladimir Sementsov-Ogievskiy 
7733b8c2329SVladimir Sementsov-Ogievskiy         task = block_copy_task_create(s, call_state, offset, bytes);
77442ac2144SVladimir Sementsov-Ogievskiy         if (!task) {
77542ac2144SVladimir Sementsov-Ogievskiy             /* No more dirty bits in the bitmap */
77642ac2144SVladimir Sementsov-Ogievskiy             trace_block_copy_skip_range(s, offset, bytes);
77742ac2144SVladimir Sementsov-Ogievskiy             break;
77842ac2144SVladimir Sementsov-Ogievskiy         }
779d088e6a4SVladimir Sementsov-Ogievskiy         if (task->req.offset > offset) {
780d088e6a4SVladimir Sementsov-Ogievskiy             trace_block_copy_skip_range(s, offset, task->req.offset - offset);
781beb5f545SVladimir Sementsov-Ogievskiy         }
782beb5f545SVladimir Sementsov-Ogievskiy 
7835332e5d2SVladimir Sementsov-Ogievskiy         found_dirty = true;
7845332e5d2SVladimir Sementsov-Ogievskiy 
785d088e6a4SVladimir Sementsov-Ogievskiy         ret = block_copy_block_status(s, task->req.offset, task->req.bytes,
78642ac2144SVladimir Sementsov-Ogievskiy                                       &status_bytes);
7875332e5d2SVladimir Sementsov-Ogievskiy         assert(ret >= 0); /* never fail */
788d088e6a4SVladimir Sementsov-Ogievskiy         if (status_bytes < task->req.bytes) {
78942ac2144SVladimir Sementsov-Ogievskiy             block_copy_task_shrink(task, status_bytes);
79042ac2144SVladimir Sementsov-Ogievskiy         }
791d0c389d2SEmanuele Giuseppe Esposito         if (qatomic_read(&s->skip_unallocated) &&
792d0c389d2SEmanuele Giuseppe Esposito             !(ret & BDRV_BLOCK_ALLOCATED)) {
7931348a657SVladimir Sementsov-Ogievskiy             block_copy_task_end(task, 0);
794d088e6a4SVladimir Sementsov-Ogievskiy             trace_block_copy_skip_range(s, task->req.offset, task->req.bytes);
79542ac2144SVladimir Sementsov-Ogievskiy             offset = task_end(task);
79642ac2144SVladimir Sementsov-Ogievskiy             bytes = end - offset;
797fc9aefc8SVladimir Sementsov-Ogievskiy             g_free(task);
798beb5f545SVladimir Sementsov-Ogievskiy             continue;
799beb5f545SVladimir Sementsov-Ogievskiy         }
800bed95234SVladimir Sementsov-Ogievskiy         if (ret & BDRV_BLOCK_ZERO) {
80105d5e12bSPaolo Bonzini             task->method = COPY_WRITE_ZEROES;
802bed95234SVladimir Sementsov-Ogievskiy         }
8032d57511aSVladimir Sementsov-Ogievskiy 
8047e032df0SVladimir Sementsov-Ogievskiy         if (!call_state->ignore_ratelimit) {
8057e032df0SVladimir Sementsov-Ogievskiy             uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
8067e032df0SVladimir Sementsov-Ogievskiy             if (ns > 0) {
8077e032df0SVladimir Sementsov-Ogievskiy                 block_copy_task_end(task, -EAGAIN);
8087e032df0SVladimir Sementsov-Ogievskiy                 g_free(task);
80929a6ea24SPaolo Bonzini                 qemu_co_sleep_ns_wakeable(&call_state->sleep,
81029a6ea24SPaolo Bonzini                                           QEMU_CLOCK_REALTIME, ns);
8117e032df0SVladimir Sementsov-Ogievskiy                 continue;
8127e032df0SVladimir Sementsov-Ogievskiy             }
8137e032df0SVladimir Sementsov-Ogievskiy         }
8147e032df0SVladimir Sementsov-Ogievskiy 
815d088e6a4SVladimir Sementsov-Ogievskiy         ratelimit_calculate_delay(&s->rate_limit, task->req.bytes);
8167e032df0SVladimir Sementsov-Ogievskiy 
817d088e6a4SVladimir Sementsov-Ogievskiy         trace_block_copy_process(s, task->req.offset);
818beb5f545SVladimir Sementsov-Ogievskiy 
819d088e6a4SVladimir Sementsov-Ogievskiy         co_get_from_shres(s->mem, task->req.bytes);
820beb5f545SVladimir Sementsov-Ogievskiy 
82142ac2144SVladimir Sementsov-Ogievskiy         offset = task_end(task);
82242ac2144SVladimir Sementsov-Ogievskiy         bytes = end - offset;
8234ce5dd3eSVladimir Sementsov-Ogievskiy 
8244ce5dd3eSVladimir Sementsov-Ogievskiy         if (!aio && bytes) {
82526be9d62SVladimir Sementsov-Ogievskiy             aio = aio_task_pool_new(call_state->max_workers);
826beb5f545SVladimir Sementsov-Ogievskiy         }
827beb5f545SVladimir Sementsov-Ogievskiy 
8284ce5dd3eSVladimir Sementsov-Ogievskiy         ret = block_copy_task_run(aio, task);
8294ce5dd3eSVladimir Sementsov-Ogievskiy         if (ret < 0) {
8304ce5dd3eSVladimir Sementsov-Ogievskiy             goto out;
8314ce5dd3eSVladimir Sementsov-Ogievskiy         }
8324ce5dd3eSVladimir Sementsov-Ogievskiy     }
8334ce5dd3eSVladimir Sementsov-Ogievskiy 
8344ce5dd3eSVladimir Sementsov-Ogievskiy out:
8354ce5dd3eSVladimir Sementsov-Ogievskiy     if (aio) {
8364ce5dd3eSVladimir Sementsov-Ogievskiy         aio_task_pool_wait_all(aio);
8374ce5dd3eSVladimir Sementsov-Ogievskiy 
8384ce5dd3eSVladimir Sementsov-Ogievskiy         /*
8394ce5dd3eSVladimir Sementsov-Ogievskiy          * We are not really interested in -ECANCELED returned from
8404ce5dd3eSVladimir Sementsov-Ogievskiy          * block_copy_task_run. If it fails, it means some task already failed
8414ce5dd3eSVladimir Sementsov-Ogievskiy          * for real reason, let's return first failure.
8424ce5dd3eSVladimir Sementsov-Ogievskiy          * Still, assert that we don't rewrite failure by success.
843e8de7ba9SVladimir Sementsov-Ogievskiy          *
844e8de7ba9SVladimir Sementsov-Ogievskiy          * Note: ret may be positive here because of block-status result.
8454ce5dd3eSVladimir Sementsov-Ogievskiy          */
846e8de7ba9SVladimir Sementsov-Ogievskiy         assert(ret >= 0 || aio_task_pool_status(aio) < 0);
8474ce5dd3eSVladimir Sementsov-Ogievskiy         ret = aio_task_pool_status(aio);
8484ce5dd3eSVladimir Sementsov-Ogievskiy 
8494ce5dd3eSVladimir Sementsov-Ogievskiy         aio_task_pool_free(aio);
8504ce5dd3eSVladimir Sementsov-Ogievskiy     }
8514ce5dd3eSVladimir Sementsov-Ogievskiy 
8524ce5dd3eSVladimir Sementsov-Ogievskiy     return ret < 0 ? ret : found_dirty;
8535332e5d2SVladimir Sementsov-Ogievskiy }
8545332e5d2SVladimir Sementsov-Ogievskiy 
block_copy_kick(BlockCopyCallState * call_state)8557e032df0SVladimir Sementsov-Ogievskiy void block_copy_kick(BlockCopyCallState *call_state)
8567e032df0SVladimir Sementsov-Ogievskiy {
85729a6ea24SPaolo Bonzini     qemu_co_sleep_wake(&call_state->sleep);
8587e032df0SVladimir Sementsov-Ogievskiy }
8597e032df0SVladimir Sementsov-Ogievskiy 
8605332e5d2SVladimir Sementsov-Ogievskiy /*
8613b8c2329SVladimir Sementsov-Ogievskiy  * block_copy_common
8625332e5d2SVladimir Sementsov-Ogievskiy  *
8635332e5d2SVladimir Sementsov-Ogievskiy  * Copy requested region, accordingly to dirty bitmap.
8645332e5d2SVladimir Sementsov-Ogievskiy  * Collaborate with parallel block_copy requests: if they succeed it will help
8655332e5d2SVladimir Sementsov-Ogievskiy  * us. If they fail, we will retry not-copied regions. So, if we return error,
8665332e5d2SVladimir Sementsov-Ogievskiy  * it means that some I/O operation failed in context of _this_ block_copy call,
8675332e5d2SVladimir Sementsov-Ogievskiy  * not some parallel operation.
8685332e5d2SVladimir Sementsov-Ogievskiy  */
8697ff9579eSKevin Wolf static int coroutine_fn GRAPH_RDLOCK
block_copy_common(BlockCopyCallState * call_state)8707ff9579eSKevin Wolf block_copy_common(BlockCopyCallState *call_state)
8715332e5d2SVladimir Sementsov-Ogievskiy {
8725332e5d2SVladimir Sementsov-Ogievskiy     int ret;
873c6a3e3dfSEmanuele Giuseppe Esposito     BlockCopyState *s = call_state->s;
8745332e5d2SVladimir Sementsov-Ogievskiy 
875d0c389d2SEmanuele Giuseppe Esposito     qemu_co_mutex_lock(&s->lock);
876c6a3e3dfSEmanuele Giuseppe Esposito     QLIST_INSERT_HEAD(&s->calls, call_state, list);
877d0c389d2SEmanuele Giuseppe Esposito     qemu_co_mutex_unlock(&s->lock);
8782e099a9dSVladimir Sementsov-Ogievskiy 
8795332e5d2SVladimir Sementsov-Ogievskiy     do {
8803b8c2329SVladimir Sementsov-Ogievskiy         ret = block_copy_dirty_clusters(call_state);
8815332e5d2SVladimir Sementsov-Ogievskiy 
882149009beSEmanuele Giuseppe Esposito         if (ret == 0 && !qatomic_read(&call_state->cancelled)) {
883d0c389d2SEmanuele Giuseppe Esposito             WITH_QEMU_LOCK_GUARD(&s->lock) {
884d0c389d2SEmanuele Giuseppe Esposito                 /*
885d0c389d2SEmanuele Giuseppe Esposito                  * Check that there is no task we still need to
886d0c389d2SEmanuele Giuseppe Esposito                  * wait to complete
887d0c389d2SEmanuele Giuseppe Esposito                  */
888d088e6a4SVladimir Sementsov-Ogievskiy                 ret = reqlist_wait_one(&s->reqs, call_state->offset,
889d088e6a4SVladimir Sementsov-Ogievskiy                                        call_state->bytes, &s->lock);
890d0c389d2SEmanuele Giuseppe Esposito                 if (ret == 0) {
891d0c389d2SEmanuele Giuseppe Esposito                     /*
892d0c389d2SEmanuele Giuseppe Esposito                      * No pending tasks, but check again the bitmap in this
893d0c389d2SEmanuele Giuseppe Esposito                      * same critical section, since a task might have failed
894d0c389d2SEmanuele Giuseppe Esposito                      * between this and the critical section in
895d0c389d2SEmanuele Giuseppe Esposito                      * block_copy_dirty_clusters().
896d0c389d2SEmanuele Giuseppe Esposito                      *
897d088e6a4SVladimir Sementsov-Ogievskiy                      * reqlist_wait_one return value 0 also means that it
898d0c389d2SEmanuele Giuseppe Esposito                      * didn't release the lock. So, we are still in the same
899d0c389d2SEmanuele Giuseppe Esposito                      * critical section, not interrupted by any concurrent
900d0c389d2SEmanuele Giuseppe Esposito                      * access to state.
901d0c389d2SEmanuele Giuseppe Esposito                      */
902d0c389d2SEmanuele Giuseppe Esposito                     ret = bdrv_dirty_bitmap_next_dirty(s->copy_bitmap,
903d0c389d2SEmanuele Giuseppe Esposito                                                        call_state->offset,
904d0c389d2SEmanuele Giuseppe Esposito                                                        call_state->bytes) >= 0;
905d0c389d2SEmanuele Giuseppe Esposito                 }
906d0c389d2SEmanuele Giuseppe Esposito             }
9075332e5d2SVladimir Sementsov-Ogievskiy         }
9085332e5d2SVladimir Sementsov-Ogievskiy 
9095332e5d2SVladimir Sementsov-Ogievskiy         /*
9105332e5d2SVladimir Sementsov-Ogievskiy          * We retry in two cases:
9115332e5d2SVladimir Sementsov-Ogievskiy          * 1. Some progress done
9125332e5d2SVladimir Sementsov-Ogievskiy          *    Something was copied, which means that there were yield points
9135332e5d2SVladimir Sementsov-Ogievskiy          *    and some new dirty bits may have appeared (due to failed parallel
9145332e5d2SVladimir Sementsov-Ogievskiy          *    block-copy requests).
9155332e5d2SVladimir Sementsov-Ogievskiy          * 2. We have waited for some intersecting block-copy request
9165332e5d2SVladimir Sementsov-Ogievskiy          *    It may have failed and produced new dirty bits.
9175332e5d2SVladimir Sementsov-Ogievskiy          */
918149009beSEmanuele Giuseppe Esposito     } while (ret > 0 && !qatomic_read(&call_state->cancelled));
919a6ffe199SVladimir Sementsov-Ogievskiy 
920149009beSEmanuele Giuseppe Esposito     qatomic_store_release(&call_state->finished, true);
921de4641b4SVladimir Sementsov-Ogievskiy 
922de4641b4SVladimir Sementsov-Ogievskiy     if (call_state->cb) {
923de4641b4SVladimir Sementsov-Ogievskiy         call_state->cb(call_state->cb_opaque);
924de4641b4SVladimir Sementsov-Ogievskiy     }
925de4641b4SVladimir Sementsov-Ogievskiy 
926d0c389d2SEmanuele Giuseppe Esposito     qemu_co_mutex_lock(&s->lock);
9272e099a9dSVladimir Sementsov-Ogievskiy     QLIST_REMOVE(call_state, list);
928d0c389d2SEmanuele Giuseppe Esposito     qemu_co_mutex_unlock(&s->lock);
9292e099a9dSVladimir Sementsov-Ogievskiy 
930beb5f545SVladimir Sementsov-Ogievskiy     return ret;
931beb5f545SVladimir Sementsov-Ogievskiy }
932397f4e9dSVladimir Sementsov-Ogievskiy 
block_copy_async_co_entry(void * opaque)93315df6e69SVladimir Sementsov-Ogievskiy static void coroutine_fn block_copy_async_co_entry(void *opaque)
9343b8c2329SVladimir Sementsov-Ogievskiy {
9357ff9579eSKevin Wolf     GRAPH_RDLOCK_GUARD();
93615df6e69SVladimir Sementsov-Ogievskiy     block_copy_common(opaque);
93715df6e69SVladimir Sementsov-Ogievskiy }
93815df6e69SVladimir Sementsov-Ogievskiy 
block_copy(BlockCopyState * s,int64_t start,int64_t bytes,bool ignore_ratelimit,uint64_t timeout_ns,BlockCopyAsyncCallbackFunc cb,void * cb_opaque)93915df6e69SVladimir Sementsov-Ogievskiy int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes,
94015df6e69SVladimir Sementsov-Ogievskiy                             bool ignore_ratelimit, uint64_t timeout_ns,
94115df6e69SVladimir Sementsov-Ogievskiy                             BlockCopyAsyncCallbackFunc cb,
94215df6e69SVladimir Sementsov-Ogievskiy                             void *cb_opaque)
94315df6e69SVladimir Sementsov-Ogievskiy {
94415df6e69SVladimir Sementsov-Ogievskiy     int ret;
94515df6e69SVladimir Sementsov-Ogievskiy     BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
94615df6e69SVladimir Sementsov-Ogievskiy 
94715df6e69SVladimir Sementsov-Ogievskiy     *call_state = (BlockCopyCallState) {
9483b8c2329SVladimir Sementsov-Ogievskiy         .s = s,
9493b8c2329SVladimir Sementsov-Ogievskiy         .offset = start,
9503b8c2329SVladimir Sementsov-Ogievskiy         .bytes = bytes,
9517e032df0SVladimir Sementsov-Ogievskiy         .ignore_ratelimit = ignore_ratelimit,
95226be9d62SVladimir Sementsov-Ogievskiy         .max_workers = BLOCK_COPY_MAX_WORKERS,
95315df6e69SVladimir Sementsov-Ogievskiy         .cb = cb,
95415df6e69SVladimir Sementsov-Ogievskiy         .cb_opaque = cb_opaque,
9553b8c2329SVladimir Sementsov-Ogievskiy     };
9563b8c2329SVladimir Sementsov-Ogievskiy 
95715df6e69SVladimir Sementsov-Ogievskiy     ret = qemu_co_timeout(block_copy_async_co_entry, call_state, timeout_ns,
95815df6e69SVladimir Sementsov-Ogievskiy                           g_free);
95915df6e69SVladimir Sementsov-Ogievskiy     if (ret < 0) {
96015df6e69SVladimir Sementsov-Ogievskiy         assert(ret == -ETIMEDOUT);
96115df6e69SVladimir Sementsov-Ogievskiy         block_copy_call_cancel(call_state);
96215df6e69SVladimir Sementsov-Ogievskiy         /* call_state will be freed by running coroutine. */
96315df6e69SVladimir Sementsov-Ogievskiy         return ret;
9643b8c2329SVladimir Sementsov-Ogievskiy     }
9653b8c2329SVladimir Sementsov-Ogievskiy 
96615df6e69SVladimir Sementsov-Ogievskiy     ret = call_state->ret;
96715df6e69SVladimir Sementsov-Ogievskiy     g_free(call_state);
96815df6e69SVladimir Sementsov-Ogievskiy 
96915df6e69SVladimir Sementsov-Ogievskiy     return ret;
970de4641b4SVladimir Sementsov-Ogievskiy }
971de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_async(BlockCopyState * s,int64_t offset,int64_t bytes,int max_workers,int64_t max_chunk,BlockCopyAsyncCallbackFunc cb,void * cb_opaque)972de4641b4SVladimir Sementsov-Ogievskiy BlockCopyCallState *block_copy_async(BlockCopyState *s,
973de4641b4SVladimir Sementsov-Ogievskiy                                      int64_t offset, int64_t bytes,
97426be9d62SVladimir Sementsov-Ogievskiy                                      int max_workers, int64_t max_chunk,
975de4641b4SVladimir Sementsov-Ogievskiy                                      BlockCopyAsyncCallbackFunc cb,
976de4641b4SVladimir Sementsov-Ogievskiy                                      void *cb_opaque)
977de4641b4SVladimir Sementsov-Ogievskiy {
978de4641b4SVladimir Sementsov-Ogievskiy     BlockCopyCallState *call_state = g_new(BlockCopyCallState, 1);
979de4641b4SVladimir Sementsov-Ogievskiy 
980de4641b4SVladimir Sementsov-Ogievskiy     *call_state = (BlockCopyCallState) {
981de4641b4SVladimir Sementsov-Ogievskiy         .s = s,
982de4641b4SVladimir Sementsov-Ogievskiy         .offset = offset,
983de4641b4SVladimir Sementsov-Ogievskiy         .bytes = bytes,
98426be9d62SVladimir Sementsov-Ogievskiy         .max_workers = max_workers,
98526be9d62SVladimir Sementsov-Ogievskiy         .max_chunk = max_chunk,
986de4641b4SVladimir Sementsov-Ogievskiy         .cb = cb,
987de4641b4SVladimir Sementsov-Ogievskiy         .cb_opaque = cb_opaque,
988de4641b4SVladimir Sementsov-Ogievskiy 
989de4641b4SVladimir Sementsov-Ogievskiy         .co = qemu_coroutine_create(block_copy_async_co_entry, call_state),
990de4641b4SVladimir Sementsov-Ogievskiy     };
991de4641b4SVladimir Sementsov-Ogievskiy 
992de4641b4SVladimir Sementsov-Ogievskiy     qemu_coroutine_enter(call_state->co);
993de4641b4SVladimir Sementsov-Ogievskiy 
994de4641b4SVladimir Sementsov-Ogievskiy     return call_state;
995de4641b4SVladimir Sementsov-Ogievskiy }
996de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_call_free(BlockCopyCallState * call_state)997de4641b4SVladimir Sementsov-Ogievskiy void block_copy_call_free(BlockCopyCallState *call_state)
998de4641b4SVladimir Sementsov-Ogievskiy {
999de4641b4SVladimir Sementsov-Ogievskiy     if (!call_state) {
1000de4641b4SVladimir Sementsov-Ogievskiy         return;
1001de4641b4SVladimir Sementsov-Ogievskiy     }
1002de4641b4SVladimir Sementsov-Ogievskiy 
1003149009beSEmanuele Giuseppe Esposito     assert(qatomic_read(&call_state->finished));
1004de4641b4SVladimir Sementsov-Ogievskiy     g_free(call_state);
1005de4641b4SVladimir Sementsov-Ogievskiy }
1006de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_call_finished(BlockCopyCallState * call_state)1007de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_finished(BlockCopyCallState *call_state)
1008de4641b4SVladimir Sementsov-Ogievskiy {
1009149009beSEmanuele Giuseppe Esposito     return qatomic_read(&call_state->finished);
1010de4641b4SVladimir Sementsov-Ogievskiy }
1011de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_call_succeeded(BlockCopyCallState * call_state)1012de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_succeeded(BlockCopyCallState *call_state)
1013de4641b4SVladimir Sementsov-Ogievskiy {
1014149009beSEmanuele Giuseppe Esposito     return qatomic_load_acquire(&call_state->finished) &&
1015149009beSEmanuele Giuseppe Esposito            !qatomic_read(&call_state->cancelled) &&
1016a6d23d56SVladimir Sementsov-Ogievskiy            call_state->ret == 0;
1017de4641b4SVladimir Sementsov-Ogievskiy }
1018de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_call_failed(BlockCopyCallState * call_state)1019de4641b4SVladimir Sementsov-Ogievskiy bool block_copy_call_failed(BlockCopyCallState *call_state)
1020de4641b4SVladimir Sementsov-Ogievskiy {
1021149009beSEmanuele Giuseppe Esposito     return qatomic_load_acquire(&call_state->finished) &&
1022149009beSEmanuele Giuseppe Esposito            !qatomic_read(&call_state->cancelled) &&
1023a6d23d56SVladimir Sementsov-Ogievskiy            call_state->ret < 0;
1024a6d23d56SVladimir Sementsov-Ogievskiy }
1025a6d23d56SVladimir Sementsov-Ogievskiy 
block_copy_call_cancelled(BlockCopyCallState * call_state)1026a6d23d56SVladimir Sementsov-Ogievskiy bool block_copy_call_cancelled(BlockCopyCallState *call_state)
1027a6d23d56SVladimir Sementsov-Ogievskiy {
1028149009beSEmanuele Giuseppe Esposito     return qatomic_read(&call_state->cancelled);
1029de4641b4SVladimir Sementsov-Ogievskiy }
1030de4641b4SVladimir Sementsov-Ogievskiy 
block_copy_call_status(BlockCopyCallState * call_state,bool * error_is_read)1031de4641b4SVladimir Sementsov-Ogievskiy int block_copy_call_status(BlockCopyCallState *call_state, bool *error_is_read)
1032de4641b4SVladimir Sementsov-Ogievskiy {
1033149009beSEmanuele Giuseppe Esposito     assert(qatomic_load_acquire(&call_state->finished));
1034de4641b4SVladimir Sementsov-Ogievskiy     if (error_is_read) {
1035de4641b4SVladimir Sementsov-Ogievskiy         *error_is_read = call_state->error_is_read;
1036de4641b4SVladimir Sementsov-Ogievskiy     }
1037de4641b4SVladimir Sementsov-Ogievskiy     return call_state->ret;
1038de4641b4SVladimir Sementsov-Ogievskiy }
1039de4641b4SVladimir Sementsov-Ogievskiy 
1040149009beSEmanuele Giuseppe Esposito /*
1041149009beSEmanuele Giuseppe Esposito  * Note that cancelling and finishing are racy.
1042149009beSEmanuele Giuseppe Esposito  * User can cancel a block-copy that is already finished.
1043149009beSEmanuele Giuseppe Esposito  */
block_copy_call_cancel(BlockCopyCallState * call_state)1044a6d23d56SVladimir Sementsov-Ogievskiy void block_copy_call_cancel(BlockCopyCallState *call_state)
1045a6d23d56SVladimir Sementsov-Ogievskiy {
1046149009beSEmanuele Giuseppe Esposito     qatomic_set(&call_state->cancelled, true);
1047a6d23d56SVladimir Sementsov-Ogievskiy     block_copy_kick(call_state);
1048a6d23d56SVladimir Sementsov-Ogievskiy }
1049a6d23d56SVladimir Sementsov-Ogievskiy 
block_copy_dirty_bitmap(BlockCopyState * s)1050397f4e9dSVladimir Sementsov-Ogievskiy BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
1051397f4e9dSVladimir Sementsov-Ogievskiy {
1052397f4e9dSVladimir Sementsov-Ogievskiy     return s->copy_bitmap;
1053397f4e9dSVladimir Sementsov-Ogievskiy }
1054397f4e9dSVladimir Sementsov-Ogievskiy 
block_copy_cluster_size(BlockCopyState * s)1055b518e9e9SVladimir Sementsov-Ogievskiy int64_t block_copy_cluster_size(BlockCopyState *s)
1056b518e9e9SVladimir Sementsov-Ogievskiy {
1057b518e9e9SVladimir Sementsov-Ogievskiy     return s->cluster_size;
1058b518e9e9SVladimir Sementsov-Ogievskiy }
1059b518e9e9SVladimir Sementsov-Ogievskiy 
block_copy_set_skip_unallocated(BlockCopyState * s,bool skip)1060397f4e9dSVladimir Sementsov-Ogievskiy void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
1061397f4e9dSVladimir Sementsov-Ogievskiy {
1062d0c389d2SEmanuele Giuseppe Esposito     qatomic_set(&s->skip_unallocated, skip);
1063397f4e9dSVladimir Sementsov-Ogievskiy }
10647e032df0SVladimir Sementsov-Ogievskiy 
block_copy_set_speed(BlockCopyState * s,uint64_t speed)10657e032df0SVladimir Sementsov-Ogievskiy void block_copy_set_speed(BlockCopyState *s, uint64_t speed)
10667e032df0SVladimir Sementsov-Ogievskiy {
10677e032df0SVladimir Sementsov-Ogievskiy     ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
10687e032df0SVladimir Sementsov-Ogievskiy 
10697e032df0SVladimir Sementsov-Ogievskiy     /*
10707e032df0SVladimir Sementsov-Ogievskiy      * Note: it's good to kick all call states from here, but it should be done
10717e032df0SVladimir Sementsov-Ogievskiy      * only from a coroutine, to not crash if s->calls list changed while
10727e032df0SVladimir Sementsov-Ogievskiy      * entering one call. So for now, the only user of this function kicks its
10737e032df0SVladimir Sementsov-Ogievskiy      * only one call_state by hand.
10747e032df0SVladimir Sementsov-Ogievskiy      */
10757e032df0SVladimir Sementsov-Ogievskiy }
1076