xref: /openbmc/qemu/block/block-copy.c (revision e1ecf8c8)
1 /*
2  * block_copy API
3  *
4  * Copyright (C) 2013 Proxmox Server Solutions
5  * Copyright (c) 2019 Virtuozzo International GmbH.
6  *
7  * Authors:
8  *  Dietmar Maurer (dietmar@proxmox.com)
9  *  Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10  *
11  * This work is licensed under the terms of the GNU GPL, version 2 or later.
12  * See the COPYING file in the top-level directory.
13  */
14 
15 #include "qemu/osdep.h"
16 
17 #include "trace.h"
18 #include "qapi/error.h"
19 #include "block/block-copy.h"
20 #include "sysemu/block-backend.h"
21 
22 static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
23                                                        int64_t start,
24                                                        int64_t end)
25 {
26     BlockCopyInFlightReq *req;
27     bool waited;
28 
29     do {
30         waited = false;
31         QLIST_FOREACH(req, &s->inflight_reqs, list) {
32             if (end > req->start_byte && start < req->end_byte) {
33                 qemu_co_queue_wait(&req->wait_queue, NULL);
34                 waited = true;
35                 break;
36             }
37         }
38     } while (waited);
39 }
40 
41 static void block_copy_inflight_req_begin(BlockCopyState *s,
42                                           BlockCopyInFlightReq *req,
43                                           int64_t start, int64_t end)
44 {
45     req->start_byte = start;
46     req->end_byte = end;
47     qemu_co_queue_init(&req->wait_queue);
48     QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
49 }
50 
51 static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req)
52 {
53     QLIST_REMOVE(req, list);
54     qemu_co_queue_restart_all(&req->wait_queue);
55 }
56 
57 void block_copy_state_free(BlockCopyState *s)
58 {
59     if (!s) {
60         return;
61     }
62 
63     bdrv_release_dirty_bitmap(s->source->bs, s->copy_bitmap);
64     g_free(s);
65 }
66 
67 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
68                                      int64_t cluster_size,
69                                      BdrvRequestFlags write_flags, Error **errp)
70 {
71     BlockCopyState *s;
72     BdrvDirtyBitmap *copy_bitmap;
73     uint32_t max_transfer =
74             MIN_NON_ZERO(INT_MAX, MIN_NON_ZERO(source->bs->bl.max_transfer,
75                                                target->bs->bl.max_transfer));
76 
77     copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
78                                            errp);
79     if (!copy_bitmap) {
80         return NULL;
81     }
82     bdrv_disable_dirty_bitmap(copy_bitmap);
83 
84     s = g_new(BlockCopyState, 1);
85     *s = (BlockCopyState) {
86         .source = source,
87         .target = target,
88         .copy_bitmap = copy_bitmap,
89         .cluster_size = cluster_size,
90         .len = bdrv_dirty_bitmap_size(copy_bitmap),
91         .write_flags = write_flags,
92     };
93 
94     s->copy_range_size = QEMU_ALIGN_DOWN(max_transfer, cluster_size),
95     /*
96      * Set use_copy_range, consider the following:
97      * 1. Compression is not supported for copy_range.
98      * 2. copy_range does not respect max_transfer (it's a TODO), so we factor
99      *    that in here. If max_transfer is smaller than the job->cluster_size,
100      *    we do not use copy_range (in that case it's zero after aligning down
101      *    above).
102      */
103     s->use_copy_range =
104         !(write_flags & BDRV_REQ_WRITE_COMPRESSED) && s->copy_range_size > 0;
105 
106     QLIST_INIT(&s->inflight_reqs);
107 
108     return s;
109 }
110 
111 void block_copy_set_callbacks(
112         BlockCopyState *s,
113         ProgressBytesCallbackFunc progress_bytes_callback,
114         ProgressResetCallbackFunc progress_reset_callback,
115         void *progress_opaque)
116 {
117     s->progress_bytes_callback = progress_bytes_callback;
118     s->progress_reset_callback = progress_reset_callback;
119     s->progress_opaque = progress_opaque;
120 }
121 
122 /*
123  * Copy range to target with a bounce buffer and return the bytes copied. If
124  * error occurred, return a negative error number
125  */
126 static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
127                                                       int64_t start,
128                                                       int64_t end,
129                                                       bool *error_is_read,
130                                                       void **bounce_buffer)
131 {
132     int ret;
133     int nbytes;
134 
135     assert(QEMU_IS_ALIGNED(start, s->cluster_size));
136     bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
137     nbytes = MIN(s->cluster_size, s->len - start);
138     if (!*bounce_buffer) {
139         *bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size);
140     }
141 
142     ret = bdrv_co_pread(s->source, start, nbytes, *bounce_buffer, 0);
143     if (ret < 0) {
144         trace_block_copy_with_bounce_buffer_read_fail(s, start, ret);
145         if (error_is_read) {
146             *error_is_read = true;
147         }
148         goto fail;
149     }
150 
151     ret = bdrv_co_pwrite(s->target, start, nbytes, *bounce_buffer,
152                          s->write_flags);
153     if (ret < 0) {
154         trace_block_copy_with_bounce_buffer_write_fail(s, start, ret);
155         if (error_is_read) {
156             *error_is_read = false;
157         }
158         goto fail;
159     }
160 
161     return nbytes;
162 fail:
163     bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
164     return ret;
165 
166 }
167 
168 /*
169  * Copy range to target and return the bytes copied. If error occurred, return a
170  * negative error number.
171  */
172 static int coroutine_fn block_copy_with_offload(BlockCopyState *s,
173                                                 int64_t start,
174                                                 int64_t end)
175 {
176     int ret;
177     int nr_clusters;
178     int nbytes;
179 
180     assert(QEMU_IS_ALIGNED(s->copy_range_size, s->cluster_size));
181     assert(QEMU_IS_ALIGNED(start, s->cluster_size));
182     nbytes = MIN(s->copy_range_size, MIN(end, s->len) - start);
183     nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size);
184     bdrv_reset_dirty_bitmap(s->copy_bitmap, start,
185                             s->cluster_size * nr_clusters);
186     ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
187                              0, s->write_flags);
188     if (ret < 0) {
189         trace_block_copy_with_offload_fail(s, start, ret);
190         bdrv_set_dirty_bitmap(s->copy_bitmap, start,
191                               s->cluster_size * nr_clusters);
192         return ret;
193     }
194 
195     return nbytes;
196 }
197 
198 /*
199  * Check if the cluster starting at offset is allocated or not.
200  * return via pnum the number of contiguous clusters sharing this allocation.
201  */
202 static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
203                                            int64_t *pnum)
204 {
205     BlockDriverState *bs = s->source->bs;
206     int64_t count, total_count = 0;
207     int64_t bytes = s->len - offset;
208     int ret;
209 
210     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
211 
212     while (true) {
213         ret = bdrv_is_allocated(bs, offset, bytes, &count);
214         if (ret < 0) {
215             return ret;
216         }
217 
218         total_count += count;
219 
220         if (ret || count == 0) {
221             /*
222              * ret: partial segment(s) are considered allocated.
223              * otherwise: unallocated tail is treated as an entire segment.
224              */
225             *pnum = DIV_ROUND_UP(total_count, s->cluster_size);
226             return ret;
227         }
228 
229         /* Unallocated segment(s) with uncertain following segment(s) */
230         if (total_count >= s->cluster_size) {
231             *pnum = total_count / s->cluster_size;
232             return 0;
233         }
234 
235         offset += count;
236         bytes -= count;
237     }
238 }
239 
240 /*
241  * Reset bits in copy_bitmap starting at offset if they represent unallocated
242  * data in the image. May reset subsequent contiguous bits.
243  * @return 0 when the cluster at @offset was unallocated,
244  *         1 otherwise, and -ret on error.
245  */
246 int64_t block_copy_reset_unallocated(BlockCopyState *s,
247                                      int64_t offset, int64_t *count)
248 {
249     int ret;
250     int64_t clusters, bytes;
251 
252     ret = block_copy_is_cluster_allocated(s, offset, &clusters);
253     if (ret < 0) {
254         return ret;
255     }
256 
257     bytes = clusters * s->cluster_size;
258 
259     if (!ret) {
260         bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
261         s->progress_reset_callback(s->progress_opaque);
262     }
263 
264     *count = bytes;
265     return ret;
266 }
267 
268 int coroutine_fn block_copy(BlockCopyState *s,
269                             int64_t start, uint64_t bytes,
270                             bool *error_is_read)
271 {
272     int ret = 0;
273     int64_t end = bytes + start; /* bytes */
274     void *bounce_buffer = NULL;
275     int64_t status_bytes;
276     BlockCopyInFlightReq req;
277 
278     /*
279      * block_copy() user is responsible for keeping source and target in same
280      * aio context
281      */
282     assert(bdrv_get_aio_context(s->source->bs) ==
283            bdrv_get_aio_context(s->target->bs));
284 
285     assert(QEMU_IS_ALIGNED(start, s->cluster_size));
286     assert(QEMU_IS_ALIGNED(end, s->cluster_size));
287 
288     block_copy_wait_inflight_reqs(s, start, bytes);
289     block_copy_inflight_req_begin(s, &req, start, end);
290 
291     while (start < end) {
292         int64_t dirty_end;
293 
294         if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
295             trace_block_copy_skip(s, start);
296             start += s->cluster_size;
297             continue; /* already copied */
298         }
299 
300         dirty_end = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
301                                                 (end - start));
302         if (dirty_end < 0) {
303             dirty_end = end;
304         }
305 
306         if (s->skip_unallocated) {
307             ret = block_copy_reset_unallocated(s, start, &status_bytes);
308             if (ret == 0) {
309                 trace_block_copy_skip_range(s, start, status_bytes);
310                 start += status_bytes;
311                 continue;
312             }
313             /* Clamp to known allocated region */
314             dirty_end = MIN(dirty_end, start + status_bytes);
315         }
316 
317         trace_block_copy_process(s, start);
318 
319         if (s->use_copy_range) {
320             ret = block_copy_with_offload(s, start, dirty_end);
321             if (ret < 0) {
322                 s->use_copy_range = false;
323             }
324         }
325         if (!s->use_copy_range) {
326             ret = block_copy_with_bounce_buffer(s, start, dirty_end,
327                                                 error_is_read, &bounce_buffer);
328         }
329         if (ret < 0) {
330             break;
331         }
332 
333         start += ret;
334         s->progress_bytes_callback(ret, s->progress_opaque);
335         ret = 0;
336     }
337 
338     if (bounce_buffer) {
339         qemu_vfree(bounce_buffer);
340     }
341 
342     block_copy_inflight_req_end(&req);
343 
344     return ret;
345 }
346