1 /* 2 * block_copy API 3 * 4 * Copyright (C) 2013 Proxmox Server Solutions 5 * Copyright (c) 2019 Virtuozzo International GmbH. 6 * 7 * Authors: 8 * Dietmar Maurer (dietmar@proxmox.com) 9 * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> 10 * 11 * This work is licensed under the terms of the GNU GPL, version 2 or later. 12 * See the COPYING file in the top-level directory. 13 */ 14 15 #include "qemu/osdep.h" 16 17 #include "trace.h" 18 #include "qapi/error.h" 19 #include "block/block-copy.h" 20 #include "sysemu/block-backend.h" 21 22 static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s, 23 int64_t start, 24 int64_t end) 25 { 26 BlockCopyInFlightReq *req; 27 bool waited; 28 29 do { 30 waited = false; 31 QLIST_FOREACH(req, &s->inflight_reqs, list) { 32 if (end > req->start_byte && start < req->end_byte) { 33 qemu_co_queue_wait(&req->wait_queue, NULL); 34 waited = true; 35 break; 36 } 37 } 38 } while (waited); 39 } 40 41 static void block_copy_inflight_req_begin(BlockCopyState *s, 42 BlockCopyInFlightReq *req, 43 int64_t start, int64_t end) 44 { 45 req->start_byte = start; 46 req->end_byte = end; 47 qemu_co_queue_init(&req->wait_queue); 48 QLIST_INSERT_HEAD(&s->inflight_reqs, req, list); 49 } 50 51 static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req) 52 { 53 QLIST_REMOVE(req, list); 54 qemu_co_queue_restart_all(&req->wait_queue); 55 } 56 57 void block_copy_state_free(BlockCopyState *s) 58 { 59 if (!s) { 60 return; 61 } 62 63 bdrv_release_dirty_bitmap(s->copy_bitmap); 64 g_free(s); 65 } 66 67 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, 68 int64_t cluster_size, 69 BdrvRequestFlags write_flags, Error **errp) 70 { 71 BlockCopyState *s; 72 BdrvDirtyBitmap *copy_bitmap; 73 uint32_t max_transfer = 74 MIN_NON_ZERO(INT_MAX, MIN_NON_ZERO(source->bs->bl.max_transfer, 75 target->bs->bl.max_transfer)); 76 77 copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL, 78 errp); 79 if (!copy_bitmap) { 80 return NULL; 81 } 82 bdrv_disable_dirty_bitmap(copy_bitmap); 83 84 s = g_new(BlockCopyState, 1); 85 *s = (BlockCopyState) { 86 .source = source, 87 .target = target, 88 .copy_bitmap = copy_bitmap, 89 .cluster_size = cluster_size, 90 .len = bdrv_dirty_bitmap_size(copy_bitmap), 91 .write_flags = write_flags, 92 }; 93 94 s->copy_range_size = QEMU_ALIGN_DOWN(max_transfer, cluster_size), 95 /* 96 * Set use_copy_range, consider the following: 97 * 1. Compression is not supported for copy_range. 98 * 2. copy_range does not respect max_transfer (it's a TODO), so we factor 99 * that in here. If max_transfer is smaller than the job->cluster_size, 100 * we do not use copy_range (in that case it's zero after aligning down 101 * above). 102 */ 103 s->use_copy_range = 104 !(write_flags & BDRV_REQ_WRITE_COMPRESSED) && s->copy_range_size > 0; 105 106 QLIST_INIT(&s->inflight_reqs); 107 108 return s; 109 } 110 111 void block_copy_set_callbacks( 112 BlockCopyState *s, 113 ProgressBytesCallbackFunc progress_bytes_callback, 114 ProgressResetCallbackFunc progress_reset_callback, 115 void *progress_opaque) 116 { 117 s->progress_bytes_callback = progress_bytes_callback; 118 s->progress_reset_callback = progress_reset_callback; 119 s->progress_opaque = progress_opaque; 120 } 121 122 /* 123 * Copy range to target with a bounce buffer and return the bytes copied. If 124 * error occurred, return a negative error number 125 */ 126 static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s, 127 int64_t start, 128 int64_t end, 129 bool *error_is_read, 130 void **bounce_buffer) 131 { 132 int ret; 133 int nbytes; 134 135 assert(QEMU_IS_ALIGNED(start, s->cluster_size)); 136 bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size); 137 nbytes = MIN(s->cluster_size, s->len - start); 138 if (!*bounce_buffer) { 139 *bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size); 140 } 141 142 ret = bdrv_co_pread(s->source, start, nbytes, *bounce_buffer, 0); 143 if (ret < 0) { 144 trace_block_copy_with_bounce_buffer_read_fail(s, start, ret); 145 if (error_is_read) { 146 *error_is_read = true; 147 } 148 goto fail; 149 } 150 151 ret = bdrv_co_pwrite(s->target, start, nbytes, *bounce_buffer, 152 s->write_flags); 153 if (ret < 0) { 154 trace_block_copy_with_bounce_buffer_write_fail(s, start, ret); 155 if (error_is_read) { 156 *error_is_read = false; 157 } 158 goto fail; 159 } 160 161 return nbytes; 162 fail: 163 bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size); 164 return ret; 165 166 } 167 168 /* 169 * Copy range to target and return the bytes copied. If error occurred, return a 170 * negative error number. 171 */ 172 static int coroutine_fn block_copy_with_offload(BlockCopyState *s, 173 int64_t start, 174 int64_t end) 175 { 176 int ret; 177 int nr_clusters; 178 int nbytes; 179 180 assert(QEMU_IS_ALIGNED(s->copy_range_size, s->cluster_size)); 181 assert(QEMU_IS_ALIGNED(start, s->cluster_size)); 182 nbytes = MIN(s->copy_range_size, MIN(end, s->len) - start); 183 nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size); 184 bdrv_reset_dirty_bitmap(s->copy_bitmap, start, 185 s->cluster_size * nr_clusters); 186 ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes, 187 0, s->write_flags); 188 if (ret < 0) { 189 trace_block_copy_with_offload_fail(s, start, ret); 190 bdrv_set_dirty_bitmap(s->copy_bitmap, start, 191 s->cluster_size * nr_clusters); 192 return ret; 193 } 194 195 return nbytes; 196 } 197 198 /* 199 * Check if the cluster starting at offset is allocated or not. 200 * return via pnum the number of contiguous clusters sharing this allocation. 201 */ 202 static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset, 203 int64_t *pnum) 204 { 205 BlockDriverState *bs = s->source->bs; 206 int64_t count, total_count = 0; 207 int64_t bytes = s->len - offset; 208 int ret; 209 210 assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); 211 212 while (true) { 213 ret = bdrv_is_allocated(bs, offset, bytes, &count); 214 if (ret < 0) { 215 return ret; 216 } 217 218 total_count += count; 219 220 if (ret || count == 0) { 221 /* 222 * ret: partial segment(s) are considered allocated. 223 * otherwise: unallocated tail is treated as an entire segment. 224 */ 225 *pnum = DIV_ROUND_UP(total_count, s->cluster_size); 226 return ret; 227 } 228 229 /* Unallocated segment(s) with uncertain following segment(s) */ 230 if (total_count >= s->cluster_size) { 231 *pnum = total_count / s->cluster_size; 232 return 0; 233 } 234 235 offset += count; 236 bytes -= count; 237 } 238 } 239 240 /* 241 * Reset bits in copy_bitmap starting at offset if they represent unallocated 242 * data in the image. May reset subsequent contiguous bits. 243 * @return 0 when the cluster at @offset was unallocated, 244 * 1 otherwise, and -ret on error. 245 */ 246 int64_t block_copy_reset_unallocated(BlockCopyState *s, 247 int64_t offset, int64_t *count) 248 { 249 int ret; 250 int64_t clusters, bytes; 251 252 ret = block_copy_is_cluster_allocated(s, offset, &clusters); 253 if (ret < 0) { 254 return ret; 255 } 256 257 bytes = clusters * s->cluster_size; 258 259 if (!ret) { 260 bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); 261 s->progress_reset_callback(s->progress_opaque); 262 } 263 264 *count = bytes; 265 return ret; 266 } 267 268 int coroutine_fn block_copy(BlockCopyState *s, 269 int64_t start, uint64_t bytes, 270 bool *error_is_read) 271 { 272 int ret = 0; 273 int64_t end = bytes + start; /* bytes */ 274 void *bounce_buffer = NULL; 275 int64_t status_bytes; 276 BlockCopyInFlightReq req; 277 278 /* 279 * block_copy() user is responsible for keeping source and target in same 280 * aio context 281 */ 282 assert(bdrv_get_aio_context(s->source->bs) == 283 bdrv_get_aio_context(s->target->bs)); 284 285 assert(QEMU_IS_ALIGNED(start, s->cluster_size)); 286 assert(QEMU_IS_ALIGNED(end, s->cluster_size)); 287 288 block_copy_wait_inflight_reqs(s, start, bytes); 289 block_copy_inflight_req_begin(s, &req, start, end); 290 291 while (start < end) { 292 int64_t dirty_end; 293 294 if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) { 295 trace_block_copy_skip(s, start); 296 start += s->cluster_size; 297 continue; /* already copied */ 298 } 299 300 dirty_end = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start, 301 (end - start)); 302 if (dirty_end < 0) { 303 dirty_end = end; 304 } 305 306 if (s->skip_unallocated) { 307 ret = block_copy_reset_unallocated(s, start, &status_bytes); 308 if (ret == 0) { 309 trace_block_copy_skip_range(s, start, status_bytes); 310 start += status_bytes; 311 continue; 312 } 313 /* Clamp to known allocated region */ 314 dirty_end = MIN(dirty_end, start + status_bytes); 315 } 316 317 trace_block_copy_process(s, start); 318 319 if (s->use_copy_range) { 320 ret = block_copy_with_offload(s, start, dirty_end); 321 if (ret < 0) { 322 s->use_copy_range = false; 323 } 324 } 325 if (!s->use_copy_range) { 326 ret = block_copy_with_bounce_buffer(s, start, dirty_end, 327 error_is_read, &bounce_buffer); 328 } 329 if (ret < 0) { 330 break; 331 } 332 333 start += ret; 334 s->progress_bytes_callback(ret, s->progress_opaque); 335 ret = 0; 336 } 337 338 if (bounce_buffer) { 339 qemu_vfree(bounce_buffer); 340 } 341 342 block_copy_inflight_req_end(&req); 343 344 return ret; 345 } 346