1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/iversion.h> 4 #include "ctree.h" 5 #include "reflink.h" 6 #include "transaction.h" 7 8 #define BTRFS_MAX_DEDUPE_LEN SZ_16M 9 10 static int clone_finish_inode_update(struct btrfs_trans_handle *trans, 11 struct inode *inode, 12 u64 endoff, 13 const u64 destoff, 14 const u64 olen, 15 int no_time_update) 16 { 17 struct btrfs_root *root = BTRFS_I(inode)->root; 18 int ret; 19 20 inode_inc_iversion(inode); 21 if (!no_time_update) 22 inode->i_mtime = inode->i_ctime = current_time(inode); 23 /* 24 * We round up to the block size at eof when determining which 25 * extents to clone above, but shouldn't round up the file size. 26 */ 27 if (endoff > destoff + olen) 28 endoff = destoff + olen; 29 if (endoff > inode->i_size) { 30 i_size_write(inode, endoff); 31 btrfs_inode_safe_disk_i_size_write(inode, 0); 32 } 33 34 ret = btrfs_update_inode(trans, root, inode); 35 if (ret) { 36 btrfs_abort_transaction(trans, ret); 37 btrfs_end_transaction(trans); 38 goto out; 39 } 40 ret = btrfs_end_transaction(trans); 41 out: 42 return ret; 43 } 44 45 /* 46 * Make sure we do not end up inserting an inline extent into a file that has 47 * already other (non-inline) extents. If a file has an inline extent it can 48 * not have any other extents and the (single) inline extent must start at the 49 * file offset 0. Failing to respect these rules will lead to file corruption, 50 * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc 51 * 52 * We can have extents that have been already written to disk or we can have 53 * dirty ranges still in delalloc, in which case the extent maps and items are 54 * created only when we run delalloc, and the delalloc ranges might fall outside 55 * the range we are currently locking in the inode's io tree. So we check the 56 * inode's i_size because of that (i_size updates are done while holding the 57 * i_mutex, which we are holding here). 58 * We also check to see if the inode has a size not greater than "datal" but has 59 * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are 60 * protected against such concurrent fallocate calls by the i_mutex). 61 * 62 * If the file has no extents but a size greater than datal, do not allow the 63 * copy because we would need turn the inline extent into a non-inline one (even 64 * with NO_HOLES enabled). If we find our destination inode only has one inline 65 * extent, just overwrite it with the source inline extent if its size is less 66 * than the source extent's size, or we could copy the source inline extent's 67 * data into the destination inode's inline extent if the later is greater then 68 * the former. 69 */ 70 static int clone_copy_inline_extent(struct inode *dst, 71 struct btrfs_trans_handle *trans, 72 struct btrfs_path *path, 73 struct btrfs_key *new_key, 74 const u64 drop_start, 75 const u64 datal, 76 const u64 skip, 77 const u64 size, 78 char *inline_data) 79 { 80 struct btrfs_fs_info *fs_info = btrfs_sb(dst->i_sb); 81 struct btrfs_root *root = BTRFS_I(dst)->root; 82 const u64 aligned_end = ALIGN(new_key->offset + datal, 83 fs_info->sectorsize); 84 int ret; 85 struct btrfs_key key; 86 87 if (new_key->offset > 0) 88 return -EOPNOTSUPP; 89 90 key.objectid = btrfs_ino(BTRFS_I(dst)); 91 key.type = BTRFS_EXTENT_DATA_KEY; 92 key.offset = 0; 93 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 94 if (ret < 0) { 95 return ret; 96 } else if (ret > 0) { 97 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 98 ret = btrfs_next_leaf(root, path); 99 if (ret < 0) 100 return ret; 101 else if (ret > 0) 102 goto copy_inline_extent; 103 } 104 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 105 if (key.objectid == btrfs_ino(BTRFS_I(dst)) && 106 key.type == BTRFS_EXTENT_DATA_KEY) { 107 ASSERT(key.offset > 0); 108 return -EOPNOTSUPP; 109 } 110 } else if (i_size_read(dst) <= datal) { 111 struct btrfs_file_extent_item *ei; 112 u64 ext_len; 113 114 /* 115 * If the file size is <= datal, make sure there are no other 116 * extents following (can happen do to an fallocate call with 117 * the flag FALLOC_FL_KEEP_SIZE). 118 */ 119 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 120 struct btrfs_file_extent_item); 121 /* 122 * If it's an inline extent, it can not have other extents 123 * following it. 124 */ 125 if (btrfs_file_extent_type(path->nodes[0], ei) == 126 BTRFS_FILE_EXTENT_INLINE) 127 goto copy_inline_extent; 128 129 ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); 130 if (ext_len > aligned_end) 131 return -EOPNOTSUPP; 132 133 ret = btrfs_next_item(root, path); 134 if (ret < 0) { 135 return ret; 136 } else if (ret == 0) { 137 btrfs_item_key_to_cpu(path->nodes[0], &key, 138 path->slots[0]); 139 if (key.objectid == btrfs_ino(BTRFS_I(dst)) && 140 key.type == BTRFS_EXTENT_DATA_KEY) 141 return -EOPNOTSUPP; 142 } 143 } 144 145 copy_inline_extent: 146 /* 147 * We have no extent items, or we have an extent at offset 0 which may 148 * or may not be inlined. All these cases are dealt the same way. 149 */ 150 if (i_size_read(dst) > datal) { 151 /* 152 * If the destination inode has an inline extent. 153 * This would require copying the data from the source inline 154 * extent into the beginning of the destination's inline extent. 155 * But this is really complex, both extents can be compressed 156 * or just one of them, which would require decompressing and 157 * re-compressing data (which could increase the new compressed 158 * size, not allowing the compressed data to fit anymore in an 159 * inline extent). 160 * So just don't support this case for now (it should be rare, 161 * we are not really saving space when cloning inline extents). 162 */ 163 return -EOPNOTSUPP; 164 } 165 166 btrfs_release_path(path); 167 ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); 168 if (ret) 169 return ret; 170 ret = btrfs_insert_empty_item(trans, root, path, new_key, size); 171 if (ret) 172 return ret; 173 174 if (skip) { 175 const u32 start = btrfs_file_extent_calc_inline_size(0); 176 177 memmove(inline_data + start, inline_data + start + skip, datal); 178 } 179 180 write_extent_buffer(path->nodes[0], inline_data, 181 btrfs_item_ptr_offset(path->nodes[0], 182 path->slots[0]), 183 size); 184 inode_add_bytes(dst, datal); 185 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags); 186 187 return 0; 188 } 189 190 /** 191 * btrfs_clone() - clone a range from inode file to another 192 * 193 * @src: Inode to clone from 194 * @inode: Inode to clone to 195 * @off: Offset within source to start clone from 196 * @olen: Original length, passed by user, of range to clone 197 * @olen_aligned: Block-aligned value of olen 198 * @destoff: Offset within @inode to start clone 199 * @no_time_update: Whether to update mtime/ctime on the target inode 200 */ 201 static int btrfs_clone(struct inode *src, struct inode *inode, 202 const u64 off, const u64 olen, const u64 olen_aligned, 203 const u64 destoff, int no_time_update) 204 { 205 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 206 struct btrfs_root *root = BTRFS_I(inode)->root; 207 struct btrfs_path *path = NULL; 208 struct extent_buffer *leaf; 209 struct btrfs_trans_handle *trans; 210 char *buf = NULL; 211 struct btrfs_key key; 212 u32 nritems; 213 int slot; 214 int ret; 215 const u64 len = olen_aligned; 216 u64 last_dest_end = destoff; 217 218 ret = -ENOMEM; 219 buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); 220 if (!buf) 221 return ret; 222 223 path = btrfs_alloc_path(); 224 if (!path) { 225 kvfree(buf); 226 return ret; 227 } 228 229 path->reada = READA_FORWARD; 230 /* Clone data */ 231 key.objectid = btrfs_ino(BTRFS_I(src)); 232 key.type = BTRFS_EXTENT_DATA_KEY; 233 key.offset = off; 234 235 while (1) { 236 u64 next_key_min_offset = key.offset + 1; 237 struct btrfs_file_extent_item *extent; 238 int type; 239 u32 size; 240 struct btrfs_key new_key; 241 u64 disko = 0, diskl = 0; 242 u64 datao = 0, datal = 0; 243 u8 comp; 244 u64 drop_start; 245 246 /* Note the key will change type as we walk through the tree */ 247 path->leave_spinning = 1; 248 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, 249 0, 0); 250 if (ret < 0) 251 goto out; 252 /* 253 * First search, if no extent item that starts at offset off was 254 * found but the previous item is an extent item, it's possible 255 * it might overlap our target range, therefore process it. 256 */ 257 if (key.offset == off && ret > 0 && path->slots[0] > 0) { 258 btrfs_item_key_to_cpu(path->nodes[0], &key, 259 path->slots[0] - 1); 260 if (key.type == BTRFS_EXTENT_DATA_KEY) 261 path->slots[0]--; 262 } 263 264 nritems = btrfs_header_nritems(path->nodes[0]); 265 process_slot: 266 if (path->slots[0] >= nritems) { 267 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 268 if (ret < 0) 269 goto out; 270 if (ret > 0) 271 break; 272 nritems = btrfs_header_nritems(path->nodes[0]); 273 } 274 leaf = path->nodes[0]; 275 slot = path->slots[0]; 276 277 btrfs_item_key_to_cpu(leaf, &key, slot); 278 if (key.type > BTRFS_EXTENT_DATA_KEY || 279 key.objectid != btrfs_ino(BTRFS_I(src))) 280 break; 281 282 ASSERT(key.type == BTRFS_EXTENT_DATA_KEY); 283 284 extent = btrfs_item_ptr(leaf, slot, 285 struct btrfs_file_extent_item); 286 comp = btrfs_file_extent_compression(leaf, extent); 287 type = btrfs_file_extent_type(leaf, extent); 288 if (type == BTRFS_FILE_EXTENT_REG || 289 type == BTRFS_FILE_EXTENT_PREALLOC) { 290 disko = btrfs_file_extent_disk_bytenr(leaf, extent); 291 diskl = btrfs_file_extent_disk_num_bytes(leaf, extent); 292 datao = btrfs_file_extent_offset(leaf, extent); 293 datal = btrfs_file_extent_num_bytes(leaf, extent); 294 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 295 /* Take upper bound, may be compressed */ 296 datal = btrfs_file_extent_ram_bytes(leaf, extent); 297 } 298 299 /* 300 * The first search might have left us at an extent item that 301 * ends before our target range's start, can happen if we have 302 * holes and NO_HOLES feature enabled. 303 */ 304 if (key.offset + datal <= off) { 305 path->slots[0]++; 306 goto process_slot; 307 } else if (key.offset >= off + len) { 308 break; 309 } 310 next_key_min_offset = key.offset + datal; 311 size = btrfs_item_size_nr(leaf, slot); 312 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, slot), 313 size); 314 315 btrfs_release_path(path); 316 path->leave_spinning = 0; 317 318 memcpy(&new_key, &key, sizeof(new_key)); 319 new_key.objectid = btrfs_ino(BTRFS_I(inode)); 320 if (off <= key.offset) 321 new_key.offset = key.offset + destoff - off; 322 else 323 new_key.offset = destoff; 324 325 /* 326 * Deal with a hole that doesn't have an extent item that 327 * represents it (NO_HOLES feature enabled). 328 * This hole is either in the middle of the cloning range or at 329 * the beginning (fully overlaps it or partially overlaps it). 330 */ 331 if (new_key.offset != last_dest_end) 332 drop_start = last_dest_end; 333 else 334 drop_start = new_key.offset; 335 336 if (type == BTRFS_FILE_EXTENT_REG || 337 type == BTRFS_FILE_EXTENT_PREALLOC) { 338 struct btrfs_clone_extent_info clone_info; 339 340 /* 341 * a | --- range to clone ---| b 342 * | ------------- extent ------------- | 343 */ 344 345 /* Subtract range b */ 346 if (key.offset + datal > off + len) 347 datal = off + len - key.offset; 348 349 /* Subtract range a */ 350 if (off > key.offset) { 351 datao += off - key.offset; 352 datal -= off - key.offset; 353 } 354 355 clone_info.disk_offset = disko; 356 clone_info.disk_len = diskl; 357 clone_info.data_offset = datao; 358 clone_info.data_len = datal; 359 clone_info.file_offset = new_key.offset; 360 clone_info.extent_buf = buf; 361 clone_info.item_size = size; 362 ret = btrfs_punch_hole_range(inode, path, drop_start, 363 new_key.offset + datal - 1, &clone_info, 364 &trans); 365 if (ret) 366 goto out; 367 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 368 u64 skip = 0; 369 u64 trim = 0; 370 371 if (off > key.offset) { 372 skip = off - key.offset; 373 new_key.offset += skip; 374 } 375 376 if (key.offset + datal > off + len) 377 trim = key.offset + datal - (off + len); 378 379 if (comp && (skip || trim)) { 380 ret = -EINVAL; 381 goto out; 382 } 383 size -= skip + trim; 384 datal -= skip + trim; 385 386 /* 387 * If our extent is inline, we know we will drop or 388 * adjust at most 1 extent item in the destination root. 389 * 390 * 1 - adjusting old extent (we may have to split it) 391 * 1 - add new extent 392 * 1 - inode update 393 */ 394 trans = btrfs_start_transaction(root, 3); 395 if (IS_ERR(trans)) { 396 ret = PTR_ERR(trans); 397 goto out; 398 } 399 400 ret = clone_copy_inline_extent(inode, trans, path, 401 &new_key, drop_start, 402 datal, skip, size, buf); 403 if (ret) { 404 if (ret != -EOPNOTSUPP) 405 btrfs_abort_transaction(trans, ret); 406 btrfs_end_transaction(trans); 407 goto out; 408 } 409 } 410 411 btrfs_release_path(path); 412 413 last_dest_end = ALIGN(new_key.offset + datal, 414 fs_info->sectorsize); 415 ret = clone_finish_inode_update(trans, inode, last_dest_end, 416 destoff, olen, no_time_update); 417 if (ret) 418 goto out; 419 if (new_key.offset + datal >= destoff + len) 420 break; 421 422 btrfs_release_path(path); 423 key.offset = next_key_min_offset; 424 425 if (fatal_signal_pending(current)) { 426 ret = -EINTR; 427 goto out; 428 } 429 } 430 ret = 0; 431 432 if (last_dest_end < destoff + len) { 433 /* 434 * We have an implicit hole that fully or partially overlaps our 435 * cloning range at its end. This means that we either have the 436 * NO_HOLES feature enabled or the implicit hole happened due to 437 * mixing buffered and direct IO writes against this file. 438 */ 439 btrfs_release_path(path); 440 path->leave_spinning = 0; 441 442 ret = btrfs_punch_hole_range(inode, path, last_dest_end, 443 destoff + len - 1, NULL, &trans); 444 if (ret) 445 goto out; 446 447 ret = clone_finish_inode_update(trans, inode, destoff + len, 448 destoff, olen, no_time_update); 449 } 450 451 out: 452 btrfs_free_path(path); 453 kvfree(buf); 454 return ret; 455 } 456 457 static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, 458 struct inode *inode2, u64 loff2, u64 len) 459 { 460 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 461 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 462 } 463 464 static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, 465 struct inode *inode2, u64 loff2, u64 len) 466 { 467 if (inode1 < inode2) { 468 swap(inode1, inode2); 469 swap(loff1, loff2); 470 } else if (inode1 == inode2 && loff2 < loff1) { 471 swap(loff1, loff2); 472 } 473 lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 474 lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 475 } 476 477 static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, 478 struct inode *dst, u64 dst_loff) 479 { 480 const u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 481 int ret; 482 483 /* 484 * Lock destination range to serialize with concurrent readpages() and 485 * source range to serialize with relocation. 486 */ 487 btrfs_double_extent_lock(src, loff, dst, dst_loff, len); 488 ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1); 489 btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); 490 491 return ret; 492 } 493 494 static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, 495 struct inode *dst, u64 dst_loff) 496 { 497 int ret; 498 u64 i, tail_len, chunk_count; 499 struct btrfs_root *root_dst = BTRFS_I(dst)->root; 500 501 spin_lock(&root_dst->root_item_lock); 502 if (root_dst->send_in_progress) { 503 btrfs_warn_rl(root_dst->fs_info, 504 "cannot deduplicate to root %llu while send operations are using it (%d in progress)", 505 root_dst->root_key.objectid, 506 root_dst->send_in_progress); 507 spin_unlock(&root_dst->root_item_lock); 508 return -EAGAIN; 509 } 510 root_dst->dedupe_in_progress++; 511 spin_unlock(&root_dst->root_item_lock); 512 513 tail_len = olen % BTRFS_MAX_DEDUPE_LEN; 514 chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN); 515 516 for (i = 0; i < chunk_count; i++) { 517 ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN, 518 dst, dst_loff); 519 if (ret) 520 goto out; 521 522 loff += BTRFS_MAX_DEDUPE_LEN; 523 dst_loff += BTRFS_MAX_DEDUPE_LEN; 524 } 525 526 if (tail_len > 0) 527 ret = btrfs_extent_same_range(src, loff, tail_len, dst, dst_loff); 528 out: 529 spin_lock(&root_dst->root_item_lock); 530 root_dst->dedupe_in_progress--; 531 spin_unlock(&root_dst->root_item_lock); 532 533 return ret; 534 } 535 536 static noinline int btrfs_clone_files(struct file *file, struct file *file_src, 537 u64 off, u64 olen, u64 destoff) 538 { 539 struct inode *inode = file_inode(file); 540 struct inode *src = file_inode(file_src); 541 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 542 int ret; 543 u64 len = olen; 544 u64 bs = fs_info->sb->s_blocksize; 545 546 /* 547 * TODO: 548 * - split compressed inline extents. annoying: we need to 549 * decompress into destination's address_space (the file offset 550 * may change, so source mapping won't do), then recompress (or 551 * otherwise reinsert) a subrange. 552 * 553 * - split destination inode's inline extents. The inline extents can 554 * be either compressed or non-compressed. 555 */ 556 557 /* 558 * VFS's generic_remap_file_range_prep() protects us from cloning the 559 * eof block into the middle of a file, which would result in corruption 560 * if the file size is not blocksize aligned. So we don't need to check 561 * for that case here. 562 */ 563 if (off + len == src->i_size) 564 len = ALIGN(src->i_size, bs) - off; 565 566 if (destoff > inode->i_size) { 567 const u64 wb_start = ALIGN_DOWN(inode->i_size, bs); 568 569 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 570 if (ret) 571 return ret; 572 /* 573 * We may have truncated the last block if the inode's size is 574 * not sector size aligned, so we need to wait for writeback to 575 * complete before proceeding further, otherwise we can race 576 * with cloning and attempt to increment a reference to an 577 * extent that no longer exists (writeback completed right after 578 * we found the previous extent covering eof and before we 579 * attempted to increment its reference count). 580 */ 581 ret = btrfs_wait_ordered_range(inode, wb_start, 582 destoff - wb_start); 583 if (ret) 584 return ret; 585 } 586 587 /* 588 * Lock destination range to serialize with concurrent readpages() and 589 * source range to serialize with relocation. 590 */ 591 btrfs_double_extent_lock(src, off, inode, destoff, len); 592 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); 593 btrfs_double_extent_unlock(src, off, inode, destoff, len); 594 /* 595 * Truncate page cache pages so that future reads will see the cloned 596 * data immediately and not the previous data. 597 */ 598 truncate_inode_pages_range(&inode->i_data, 599 round_down(destoff, PAGE_SIZE), 600 round_up(destoff + len, PAGE_SIZE) - 1); 601 602 return ret; 603 } 604 605 static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in, 606 struct file *file_out, loff_t pos_out, 607 loff_t *len, unsigned int remap_flags) 608 { 609 struct inode *inode_in = file_inode(file_in); 610 struct inode *inode_out = file_inode(file_out); 611 u64 bs = BTRFS_I(inode_out)->root->fs_info->sb->s_blocksize; 612 bool same_inode = inode_out == inode_in; 613 u64 wb_len; 614 int ret; 615 616 if (!(remap_flags & REMAP_FILE_DEDUP)) { 617 struct btrfs_root *root_out = BTRFS_I(inode_out)->root; 618 619 if (btrfs_root_readonly(root_out)) 620 return -EROFS; 621 622 if (file_in->f_path.mnt != file_out->f_path.mnt || 623 inode_in->i_sb != inode_out->i_sb) 624 return -EXDEV; 625 } 626 627 /* Don't make the dst file partly checksummed */ 628 if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) != 629 (BTRFS_I(inode_out)->flags & BTRFS_INODE_NODATASUM)) { 630 return -EINVAL; 631 } 632 633 /* 634 * Now that the inodes are locked, we need to start writeback ourselves 635 * and can not rely on the writeback from the VFS's generic helper 636 * generic_remap_file_range_prep() because: 637 * 638 * 1) For compression we must call filemap_fdatawrite_range() range 639 * twice (btrfs_fdatawrite_range() does it for us), and the generic 640 * helper only calls it once; 641 * 642 * 2) filemap_fdatawrite_range(), called by the generic helper only 643 * waits for the writeback to complete, i.e. for IO to be done, and 644 * not for the ordered extents to complete. We need to wait for them 645 * to complete so that new file extent items are in the fs tree. 646 */ 647 if (*len == 0 && !(remap_flags & REMAP_FILE_DEDUP)) 648 wb_len = ALIGN(inode_in->i_size, bs) - ALIGN_DOWN(pos_in, bs); 649 else 650 wb_len = ALIGN(*len, bs); 651 652 /* 653 * Since we don't lock ranges, wait for ongoing lockless dio writes (as 654 * any in progress could create its ordered extents after we wait for 655 * existing ordered extents below). 656 */ 657 inode_dio_wait(inode_in); 658 if (!same_inode) 659 inode_dio_wait(inode_out); 660 661 /* 662 * Workaround to make sure NOCOW buffered write reach disk as NOCOW. 663 * 664 * Btrfs' back references do not have a block level granularity, they 665 * work at the whole extent level. 666 * NOCOW buffered write without data space reserved may not be able 667 * to fall back to CoW due to lack of data space, thus could cause 668 * data loss. 669 * 670 * Here we take a shortcut by flushing the whole inode, so that all 671 * nocow write should reach disk as nocow before we increase the 672 * reference of the extent. We could do better by only flushing NOCOW 673 * data, but that needs extra accounting. 674 * 675 * Also we don't need to check ASYNC_EXTENT, as async extent will be 676 * CoWed anyway, not affecting nocow part. 677 */ 678 ret = filemap_flush(inode_in->i_mapping); 679 if (ret < 0) 680 return ret; 681 682 ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs), 683 wb_len); 684 if (ret < 0) 685 return ret; 686 ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs), 687 wb_len); 688 if (ret < 0) 689 return ret; 690 691 return generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, 692 len, remap_flags); 693 } 694 695 loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, 696 struct file *dst_file, loff_t destoff, loff_t len, 697 unsigned int remap_flags) 698 { 699 struct inode *src_inode = file_inode(src_file); 700 struct inode *dst_inode = file_inode(dst_file); 701 bool same_inode = dst_inode == src_inode; 702 int ret; 703 704 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 705 return -EINVAL; 706 707 if (same_inode) 708 inode_lock(src_inode); 709 else 710 lock_two_nondirectories(src_inode, dst_inode); 711 712 ret = btrfs_remap_file_range_prep(src_file, off, dst_file, destoff, 713 &len, remap_flags); 714 if (ret < 0 || len == 0) 715 goto out_unlock; 716 717 if (remap_flags & REMAP_FILE_DEDUP) 718 ret = btrfs_extent_same(src_inode, off, len, dst_inode, destoff); 719 else 720 ret = btrfs_clone_files(dst_file, src_file, off, len, destoff); 721 722 out_unlock: 723 if (same_inode) 724 inode_unlock(src_inode); 725 else 726 unlock_two_nondirectories(src_inode, dst_inode); 727 728 return ret < 0 ? ret : len; 729 } 730