1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 */ 5 6 #include <linux/bio.h> 7 #include <linux/slab.h> 8 #include <linux/pagemap.h> 9 #include <linux/highmem.h> 10 #include <linux/sched/mm.h> 11 #include <crypto/hash.h> 12 #include "misc.h" 13 #include "ctree.h" 14 #include "disk-io.h" 15 #include "transaction.h" 16 #include "volumes.h" 17 #include "print-tree.h" 18 #include "compression.h" 19 20 #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ 21 sizeof(struct btrfs_item) * 2) / \ 22 size) - 1)) 23 24 #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ 25 PAGE_SIZE)) 26 27 /** 28 * Set inode's size according to filesystem options 29 * 30 * @inode: inode we want to update the disk_i_size for 31 * @new_i_size: i_size we want to set to, 0 if we use i_size 32 * 33 * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read() 34 * returns as it is perfectly fine with a file that has holes without hole file 35 * extent items. 36 * 37 * However without NO_HOLES we need to only return the area that is contiguous 38 * from the 0 offset of the file. Otherwise we could end up adjust i_size up 39 * to an extent that has a gap in between. 40 * 41 * Finally new_i_size should only be set in the case of truncate where we're not 42 * ready to use i_size_read() as the limiter yet. 43 */ 44 void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size) 45 { 46 struct btrfs_fs_info *fs_info = inode->root->fs_info; 47 u64 start, end, i_size; 48 int ret; 49 50 i_size = new_i_size ?: i_size_read(&inode->vfs_inode); 51 if (btrfs_fs_incompat(fs_info, NO_HOLES)) { 52 inode->disk_i_size = i_size; 53 return; 54 } 55 56 spin_lock(&inode->lock); 57 ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start, 58 &end, EXTENT_DIRTY); 59 if (!ret && start == 0) 60 i_size = min(i_size, end + 1); 61 else 62 i_size = 0; 63 inode->disk_i_size = i_size; 64 spin_unlock(&inode->lock); 65 } 66 67 /** 68 * Mark range within a file as having a new extent inserted 69 * 70 * @inode: inode being modified 71 * @start: start file offset of the file extent we've inserted 72 * @len: logical length of the file extent item 73 * 74 * Call when we are inserting a new file extent where there was none before. 75 * Does not need to call this in the case where we're replacing an existing file 76 * extent, however if not sure it's fine to call this multiple times. 77 * 78 * The start and len must match the file extent item, so thus must be sectorsize 79 * aligned. 80 */ 81 int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, 82 u64 len) 83 { 84 if (len == 0) 85 return 0; 86 87 ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize)); 88 89 if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) 90 return 0; 91 return set_extent_bits(&inode->file_extent_tree, start, start + len - 1, 92 EXTENT_DIRTY); 93 } 94 95 /** 96 * Marks an inode range as not having a backing extent 97 * 98 * @inode: inode being modified 99 * @start: start file offset of the file extent we've inserted 100 * @len: logical length of the file extent item 101 * 102 * Called when we drop a file extent, for example when we truncate. Doesn't 103 * need to be called for cases where we're replacing a file extent, like when 104 * we've COWed a file extent. 105 * 106 * The start and len must match the file extent item, so thus must be sectorsize 107 * aligned. 108 */ 109 int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start, 110 u64 len) 111 { 112 if (len == 0) 113 return 0; 114 115 ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) || 116 len == (u64)-1); 117 118 if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) 119 return 0; 120 return clear_extent_bit(&inode->file_extent_tree, start, 121 start + len - 1, EXTENT_DIRTY, 0, 0, NULL); 122 } 123 124 static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info, 125 u16 csum_size) 126 { 127 u32 ncsums = (PAGE_SIZE - sizeof(struct btrfs_ordered_sum)) / csum_size; 128 129 return ncsums * fs_info->sectorsize; 130 } 131 132 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 133 struct btrfs_root *root, 134 u64 objectid, u64 pos, 135 u64 disk_offset, u64 disk_num_bytes, 136 u64 num_bytes, u64 offset, u64 ram_bytes, 137 u8 compression, u8 encryption, u16 other_encoding) 138 { 139 int ret = 0; 140 struct btrfs_file_extent_item *item; 141 struct btrfs_key file_key; 142 struct btrfs_path *path; 143 struct extent_buffer *leaf; 144 145 path = btrfs_alloc_path(); 146 if (!path) 147 return -ENOMEM; 148 file_key.objectid = objectid; 149 file_key.offset = pos; 150 file_key.type = BTRFS_EXTENT_DATA_KEY; 151 152 ret = btrfs_insert_empty_item(trans, root, path, &file_key, 153 sizeof(*item)); 154 if (ret < 0) 155 goto out; 156 BUG_ON(ret); /* Can't happen */ 157 leaf = path->nodes[0]; 158 item = btrfs_item_ptr(leaf, path->slots[0], 159 struct btrfs_file_extent_item); 160 btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset); 161 btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); 162 btrfs_set_file_extent_offset(leaf, item, offset); 163 btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); 164 btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes); 165 btrfs_set_file_extent_generation(leaf, item, trans->transid); 166 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); 167 btrfs_set_file_extent_compression(leaf, item, compression); 168 btrfs_set_file_extent_encryption(leaf, item, encryption); 169 btrfs_set_file_extent_other_encoding(leaf, item, other_encoding); 170 171 btrfs_mark_buffer_dirty(leaf); 172 out: 173 btrfs_free_path(path); 174 return ret; 175 } 176 177 static struct btrfs_csum_item * 178 btrfs_lookup_csum(struct btrfs_trans_handle *trans, 179 struct btrfs_root *root, 180 struct btrfs_path *path, 181 u64 bytenr, int cow) 182 { 183 struct btrfs_fs_info *fs_info = root->fs_info; 184 int ret; 185 struct btrfs_key file_key; 186 struct btrfs_key found_key; 187 struct btrfs_csum_item *item; 188 struct extent_buffer *leaf; 189 u64 csum_offset = 0; 190 const u32 csum_size = fs_info->csum_size; 191 int csums_in_item; 192 193 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 194 file_key.offset = bytenr; 195 file_key.type = BTRFS_EXTENT_CSUM_KEY; 196 ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 197 if (ret < 0) 198 goto fail; 199 leaf = path->nodes[0]; 200 if (ret > 0) { 201 ret = 1; 202 if (path->slots[0] == 0) 203 goto fail; 204 path->slots[0]--; 205 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 206 if (found_key.type != BTRFS_EXTENT_CSUM_KEY) 207 goto fail; 208 209 csum_offset = (bytenr - found_key.offset) >> 210 fs_info->sectorsize_bits; 211 csums_in_item = btrfs_item_size(leaf, path->slots[0]); 212 csums_in_item /= csum_size; 213 214 if (csum_offset == csums_in_item) { 215 ret = -EFBIG; 216 goto fail; 217 } else if (csum_offset > csums_in_item) { 218 goto fail; 219 } 220 } 221 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 222 item = (struct btrfs_csum_item *)((unsigned char *)item + 223 csum_offset * csum_size); 224 return item; 225 fail: 226 if (ret > 0) 227 ret = -ENOENT; 228 return ERR_PTR(ret); 229 } 230 231 int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, 232 struct btrfs_root *root, 233 struct btrfs_path *path, u64 objectid, 234 u64 offset, int mod) 235 { 236 struct btrfs_key file_key; 237 int ins_len = mod < 0 ? -1 : 0; 238 int cow = mod != 0; 239 240 file_key.objectid = objectid; 241 file_key.offset = offset; 242 file_key.type = BTRFS_EXTENT_DATA_KEY; 243 244 return btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); 245 } 246 247 /* 248 * Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and 249 * estore the result to @dst. 250 * 251 * Return >0 for the number of sectors we found. 252 * Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum 253 * for it. Caller may want to try next sector until one range is hit. 254 * Return <0 for fatal error. 255 */ 256 static int search_csum_tree(struct btrfs_fs_info *fs_info, 257 struct btrfs_path *path, u64 disk_bytenr, 258 u64 len, u8 *dst) 259 { 260 struct btrfs_root *csum_root; 261 struct btrfs_csum_item *item = NULL; 262 struct btrfs_key key; 263 const u32 sectorsize = fs_info->sectorsize; 264 const u32 csum_size = fs_info->csum_size; 265 u32 itemsize; 266 int ret; 267 u64 csum_start; 268 u64 csum_len; 269 270 ASSERT(IS_ALIGNED(disk_bytenr, sectorsize) && 271 IS_ALIGNED(len, sectorsize)); 272 273 /* Check if the current csum item covers disk_bytenr */ 274 if (path->nodes[0]) { 275 item = btrfs_item_ptr(path->nodes[0], path->slots[0], 276 struct btrfs_csum_item); 277 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 278 itemsize = btrfs_item_size(path->nodes[0], path->slots[0]); 279 280 csum_start = key.offset; 281 csum_len = (itemsize / csum_size) * sectorsize; 282 283 if (in_range(disk_bytenr, csum_start, csum_len)) 284 goto found; 285 } 286 287 /* Current item doesn't contain the desired range, search again */ 288 btrfs_release_path(path); 289 csum_root = btrfs_csum_root(fs_info, disk_bytenr); 290 item = btrfs_lookup_csum(NULL, csum_root, path, disk_bytenr, 0); 291 if (IS_ERR(item)) { 292 ret = PTR_ERR(item); 293 goto out; 294 } 295 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 296 itemsize = btrfs_item_size(path->nodes[0], path->slots[0]); 297 298 csum_start = key.offset; 299 csum_len = (itemsize / csum_size) * sectorsize; 300 ASSERT(in_range(disk_bytenr, csum_start, csum_len)); 301 302 found: 303 ret = (min(csum_start + csum_len, disk_bytenr + len) - 304 disk_bytenr) >> fs_info->sectorsize_bits; 305 read_extent_buffer(path->nodes[0], dst, (unsigned long)item, 306 ret * csum_size); 307 out: 308 if (ret == -ENOENT || ret == -EFBIG) 309 ret = 0; 310 return ret; 311 } 312 313 /* 314 * Locate the file_offset of @cur_disk_bytenr of a @bio. 315 * 316 * Bio of btrfs represents read range of 317 * [bi_sector << 9, bi_sector << 9 + bi_size). 318 * Knowing this, we can iterate through each bvec to locate the page belong to 319 * @cur_disk_bytenr and get the file offset. 320 * 321 * @inode is used to determine if the bvec page really belongs to @inode. 322 * 323 * Return 0 if we can't find the file offset 324 * Return >0 if we find the file offset and restore it to @file_offset_ret 325 */ 326 static int search_file_offset_in_bio(struct bio *bio, struct inode *inode, 327 u64 disk_bytenr, u64 *file_offset_ret) 328 { 329 struct bvec_iter iter; 330 struct bio_vec bvec; 331 u64 cur = bio->bi_iter.bi_sector << SECTOR_SHIFT; 332 int ret = 0; 333 334 bio_for_each_segment(bvec, bio, iter) { 335 struct page *page = bvec.bv_page; 336 337 if (cur > disk_bytenr) 338 break; 339 if (cur + bvec.bv_len <= disk_bytenr) { 340 cur += bvec.bv_len; 341 continue; 342 } 343 ASSERT(in_range(disk_bytenr, cur, bvec.bv_len)); 344 if (page->mapping && page->mapping->host && 345 page->mapping->host == inode) { 346 ret = 1; 347 *file_offset_ret = page_offset(page) + bvec.bv_offset + 348 disk_bytenr - cur; 349 break; 350 } 351 } 352 return ret; 353 } 354 355 /** 356 * Lookup the checksum for the read bio in csum tree. 357 * 358 * @inode: inode that the bio is for. 359 * @bio: bio to look up. 360 * @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return 361 * checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If 362 * NULL, the checksum buffer is allocated and returned in 363 * btrfs_bio(bio)->csum instead. 364 * 365 * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. 366 */ 367 blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst) 368 { 369 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 370 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 371 struct btrfs_bio *bbio = NULL; 372 struct btrfs_path *path; 373 const u32 sectorsize = fs_info->sectorsize; 374 const u32 csum_size = fs_info->csum_size; 375 u32 orig_len = bio->bi_iter.bi_size; 376 u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT; 377 u64 cur_disk_bytenr; 378 u8 *csum; 379 const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits; 380 int count = 0; 381 blk_status_t ret = BLK_STS_OK; 382 383 if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) || 384 test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) 385 return BLK_STS_OK; 386 387 /* 388 * This function is only called for read bio. 389 * 390 * This means two things: 391 * - All our csums should only be in csum tree 392 * No ordered extents csums, as ordered extents are only for write 393 * path. 394 * - No need to bother any other info from bvec 395 * Since we're looking up csums, the only important info is the 396 * disk_bytenr and the length, which can be extracted from bi_iter 397 * directly. 398 */ 399 ASSERT(bio_op(bio) == REQ_OP_READ); 400 path = btrfs_alloc_path(); 401 if (!path) 402 return BLK_STS_RESOURCE; 403 404 if (!dst) { 405 bbio = btrfs_bio(bio); 406 407 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { 408 bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS); 409 if (!bbio->csum) { 410 btrfs_free_path(path); 411 return BLK_STS_RESOURCE; 412 } 413 } else { 414 bbio->csum = bbio->csum_inline; 415 } 416 csum = bbio->csum; 417 } else { 418 csum = dst; 419 } 420 421 /* 422 * If requested number of sectors is larger than one leaf can contain, 423 * kick the readahead for csum tree. 424 */ 425 if (nblocks > fs_info->csums_per_leaf) 426 path->reada = READA_FORWARD; 427 428 /* 429 * the free space stuff is only read when it hasn't been 430 * updated in the current transaction. So, we can safely 431 * read from the commit root and sidestep a nasty deadlock 432 * between reading the free space cache and updating the csum tree. 433 */ 434 if (btrfs_is_free_space_inode(BTRFS_I(inode))) { 435 path->search_commit_root = 1; 436 path->skip_locking = 1; 437 } 438 439 for (cur_disk_bytenr = orig_disk_bytenr; 440 cur_disk_bytenr < orig_disk_bytenr + orig_len; 441 cur_disk_bytenr += (count * sectorsize)) { 442 u64 search_len = orig_disk_bytenr + orig_len - cur_disk_bytenr; 443 unsigned int sector_offset; 444 u8 *csum_dst; 445 446 /* 447 * Although both cur_disk_bytenr and orig_disk_bytenr is u64, 448 * we're calculating the offset to the bio start. 449 * 450 * Bio size is limited to UINT_MAX, thus unsigned int is large 451 * enough to contain the raw result, not to mention the right 452 * shifted result. 453 */ 454 ASSERT(cur_disk_bytenr - orig_disk_bytenr < UINT_MAX); 455 sector_offset = (cur_disk_bytenr - orig_disk_bytenr) >> 456 fs_info->sectorsize_bits; 457 csum_dst = csum + sector_offset * csum_size; 458 459 count = search_csum_tree(fs_info, path, cur_disk_bytenr, 460 search_len, csum_dst); 461 if (count < 0) { 462 ret = errno_to_blk_status(count); 463 if (bbio) 464 btrfs_bio_free_csum(bbio); 465 break; 466 } 467 468 /* 469 * We didn't find a csum for this range. We need to make sure 470 * we complain loudly about this, because we are not NODATASUM. 471 * 472 * However for the DATA_RELOC inode we could potentially be 473 * relocating data extents for a NODATASUM inode, so the inode 474 * itself won't be marked with NODATASUM, but the extent we're 475 * copying is in fact NODATASUM. If we don't find a csum we 476 * assume this is the case. 477 */ 478 if (count == 0) { 479 memset(csum_dst, 0, csum_size); 480 count = 1; 481 482 if (BTRFS_I(inode)->root->root_key.objectid == 483 BTRFS_DATA_RELOC_TREE_OBJECTID) { 484 u64 file_offset; 485 int ret; 486 487 ret = search_file_offset_in_bio(bio, inode, 488 cur_disk_bytenr, &file_offset); 489 if (ret) 490 set_extent_bits(io_tree, file_offset, 491 file_offset + sectorsize - 1, 492 EXTENT_NODATASUM); 493 } else { 494 btrfs_warn_rl(fs_info, 495 "csum hole found for disk bytenr range [%llu, %llu)", 496 cur_disk_bytenr, cur_disk_bytenr + sectorsize); 497 } 498 } 499 } 500 501 btrfs_free_path(path); 502 return ret; 503 } 504 505 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 506 struct list_head *list, int search_commit) 507 { 508 struct btrfs_fs_info *fs_info = root->fs_info; 509 struct btrfs_key key; 510 struct btrfs_path *path; 511 struct extent_buffer *leaf; 512 struct btrfs_ordered_sum *sums; 513 struct btrfs_csum_item *item; 514 LIST_HEAD(tmplist); 515 unsigned long offset; 516 int ret; 517 size_t size; 518 u64 csum_end; 519 const u32 csum_size = fs_info->csum_size; 520 521 ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && 522 IS_ALIGNED(end + 1, fs_info->sectorsize)); 523 524 path = btrfs_alloc_path(); 525 if (!path) 526 return -ENOMEM; 527 528 if (search_commit) { 529 path->skip_locking = 1; 530 path->reada = READA_FORWARD; 531 path->search_commit_root = 1; 532 } 533 534 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 535 key.offset = start; 536 key.type = BTRFS_EXTENT_CSUM_KEY; 537 538 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 539 if (ret < 0) 540 goto fail; 541 if (ret > 0 && path->slots[0] > 0) { 542 leaf = path->nodes[0]; 543 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); 544 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && 545 key.type == BTRFS_EXTENT_CSUM_KEY) { 546 offset = (start - key.offset) >> fs_info->sectorsize_bits; 547 if (offset * csum_size < 548 btrfs_item_size(leaf, path->slots[0] - 1)) 549 path->slots[0]--; 550 } 551 } 552 553 while (start <= end) { 554 leaf = path->nodes[0]; 555 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 556 ret = btrfs_next_leaf(root, path); 557 if (ret < 0) 558 goto fail; 559 if (ret > 0) 560 break; 561 leaf = path->nodes[0]; 562 } 563 564 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 565 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 566 key.type != BTRFS_EXTENT_CSUM_KEY || 567 key.offset > end) 568 break; 569 570 if (key.offset > start) 571 start = key.offset; 572 573 size = btrfs_item_size(leaf, path->slots[0]); 574 csum_end = key.offset + (size / csum_size) * fs_info->sectorsize; 575 if (csum_end <= start) { 576 path->slots[0]++; 577 continue; 578 } 579 580 csum_end = min(csum_end, end + 1); 581 item = btrfs_item_ptr(path->nodes[0], path->slots[0], 582 struct btrfs_csum_item); 583 while (start < csum_end) { 584 size = min_t(size_t, csum_end - start, 585 max_ordered_sum_bytes(fs_info, csum_size)); 586 sums = kzalloc(btrfs_ordered_sum_size(fs_info, size), 587 GFP_NOFS); 588 if (!sums) { 589 ret = -ENOMEM; 590 goto fail; 591 } 592 593 sums->bytenr = start; 594 sums->len = (int)size; 595 596 offset = (start - key.offset) >> fs_info->sectorsize_bits; 597 offset *= csum_size; 598 size >>= fs_info->sectorsize_bits; 599 600 read_extent_buffer(path->nodes[0], 601 sums->sums, 602 ((unsigned long)item) + offset, 603 csum_size * size); 604 605 start += fs_info->sectorsize * size; 606 list_add_tail(&sums->list, &tmplist); 607 } 608 path->slots[0]++; 609 } 610 ret = 0; 611 fail: 612 while (ret < 0 && !list_empty(&tmplist)) { 613 sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list); 614 list_del(&sums->list); 615 kfree(sums); 616 } 617 list_splice_tail(&tmplist, list); 618 619 btrfs_free_path(path); 620 return ret; 621 } 622 623 /** 624 * Calculate checksums of the data contained inside a bio 625 * 626 * @inode: Owner of the data inside the bio 627 * @bio: Contains the data to be checksummed 628 * @offset: If (u64)-1, @bio may contain discontiguous bio vecs, so the 629 * file offsets are determined from the page offsets in the bio. 630 * Otherwise, this is the starting file offset of the bio vecs in 631 * @bio, which must be contiguous. 632 * @one_ordered: If true, @bio only refers to one ordered extent. 633 */ 634 blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio, 635 u64 offset, bool one_ordered) 636 { 637 struct btrfs_fs_info *fs_info = inode->root->fs_info; 638 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 639 struct btrfs_ordered_sum *sums; 640 struct btrfs_ordered_extent *ordered = NULL; 641 const bool use_page_offsets = (offset == (u64)-1); 642 char *data; 643 struct bvec_iter iter; 644 struct bio_vec bvec; 645 int index; 646 unsigned int blockcount; 647 unsigned long total_bytes = 0; 648 unsigned long this_sum_bytes = 0; 649 int i; 650 unsigned nofs_flag; 651 652 nofs_flag = memalloc_nofs_save(); 653 sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), 654 GFP_KERNEL); 655 memalloc_nofs_restore(nofs_flag); 656 657 if (!sums) 658 return BLK_STS_RESOURCE; 659 660 sums->len = bio->bi_iter.bi_size; 661 INIT_LIST_HEAD(&sums->list); 662 663 sums->bytenr = bio->bi_iter.bi_sector << 9; 664 index = 0; 665 666 shash->tfm = fs_info->csum_shash; 667 668 bio_for_each_segment(bvec, bio, iter) { 669 if (use_page_offsets) 670 offset = page_offset(bvec.bv_page) + bvec.bv_offset; 671 672 if (!ordered) { 673 ordered = btrfs_lookup_ordered_extent(inode, offset); 674 /* 675 * The bio range is not covered by any ordered extent, 676 * must be a code logic error. 677 */ 678 if (unlikely(!ordered)) { 679 WARN(1, KERN_WARNING 680 "no ordered extent for root %llu ino %llu offset %llu\n", 681 inode->root->root_key.objectid, 682 btrfs_ino(inode), offset); 683 kvfree(sums); 684 return BLK_STS_IOERR; 685 } 686 } 687 688 blockcount = BTRFS_BYTES_TO_BLKS(fs_info, 689 bvec.bv_len + fs_info->sectorsize 690 - 1); 691 692 for (i = 0; i < blockcount; i++) { 693 if (!one_ordered && 694 !in_range(offset, ordered->file_offset, 695 ordered->num_bytes)) { 696 unsigned long bytes_left; 697 698 sums->len = this_sum_bytes; 699 this_sum_bytes = 0; 700 btrfs_add_ordered_sum(ordered, sums); 701 btrfs_put_ordered_extent(ordered); 702 703 bytes_left = bio->bi_iter.bi_size - total_bytes; 704 705 nofs_flag = memalloc_nofs_save(); 706 sums = kvzalloc(btrfs_ordered_sum_size(fs_info, 707 bytes_left), GFP_KERNEL); 708 memalloc_nofs_restore(nofs_flag); 709 BUG_ON(!sums); /* -ENOMEM */ 710 sums->len = bytes_left; 711 ordered = btrfs_lookup_ordered_extent(inode, 712 offset); 713 ASSERT(ordered); /* Logic error */ 714 sums->bytenr = (bio->bi_iter.bi_sector << 9) 715 + total_bytes; 716 index = 0; 717 } 718 719 data = bvec_kmap_local(&bvec); 720 crypto_shash_digest(shash, 721 data + (i * fs_info->sectorsize), 722 fs_info->sectorsize, 723 sums->sums + index); 724 kunmap_local(data); 725 index += fs_info->csum_size; 726 offset += fs_info->sectorsize; 727 this_sum_bytes += fs_info->sectorsize; 728 total_bytes += fs_info->sectorsize; 729 } 730 731 } 732 this_sum_bytes = 0; 733 btrfs_add_ordered_sum(ordered, sums); 734 btrfs_put_ordered_extent(ordered); 735 return 0; 736 } 737 738 /* 739 * helper function for csum removal, this expects the 740 * key to describe the csum pointed to by the path, and it expects 741 * the csum to overlap the range [bytenr, len] 742 * 743 * The csum should not be entirely contained in the range and the 744 * range should not be entirely contained in the csum. 745 * 746 * This calls btrfs_truncate_item with the correct args based on the 747 * overlap, and fixes up the key as required. 748 */ 749 static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info, 750 struct btrfs_path *path, 751 struct btrfs_key *key, 752 u64 bytenr, u64 len) 753 { 754 struct extent_buffer *leaf; 755 const u32 csum_size = fs_info->csum_size; 756 u64 csum_end; 757 u64 end_byte = bytenr + len; 758 u32 blocksize_bits = fs_info->sectorsize_bits; 759 760 leaf = path->nodes[0]; 761 csum_end = btrfs_item_size(leaf, path->slots[0]) / csum_size; 762 csum_end <<= blocksize_bits; 763 csum_end += key->offset; 764 765 if (key->offset < bytenr && csum_end <= end_byte) { 766 /* 767 * [ bytenr - len ] 768 * [ ] 769 * [csum ] 770 * A simple truncate off the end of the item 771 */ 772 u32 new_size = (bytenr - key->offset) >> blocksize_bits; 773 new_size *= csum_size; 774 btrfs_truncate_item(path, new_size, 1); 775 } else if (key->offset >= bytenr && csum_end > end_byte && 776 end_byte > key->offset) { 777 /* 778 * [ bytenr - len ] 779 * [ ] 780 * [csum ] 781 * we need to truncate from the beginning of the csum 782 */ 783 u32 new_size = (csum_end - end_byte) >> blocksize_bits; 784 new_size *= csum_size; 785 786 btrfs_truncate_item(path, new_size, 0); 787 788 key->offset = end_byte; 789 btrfs_set_item_key_safe(fs_info, path, key); 790 } else { 791 BUG(); 792 } 793 } 794 795 /* 796 * deletes the csum items from the csum tree for a given 797 * range of bytes. 798 */ 799 int btrfs_del_csums(struct btrfs_trans_handle *trans, 800 struct btrfs_root *root, u64 bytenr, u64 len) 801 { 802 struct btrfs_fs_info *fs_info = trans->fs_info; 803 struct btrfs_path *path; 804 struct btrfs_key key; 805 u64 end_byte = bytenr + len; 806 u64 csum_end; 807 struct extent_buffer *leaf; 808 int ret = 0; 809 const u32 csum_size = fs_info->csum_size; 810 u32 blocksize_bits = fs_info->sectorsize_bits; 811 812 ASSERT(root->root_key.objectid == BTRFS_CSUM_TREE_OBJECTID || 813 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); 814 815 path = btrfs_alloc_path(); 816 if (!path) 817 return -ENOMEM; 818 819 while (1) { 820 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 821 key.offset = end_byte - 1; 822 key.type = BTRFS_EXTENT_CSUM_KEY; 823 824 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 825 if (ret > 0) { 826 ret = 0; 827 if (path->slots[0] == 0) 828 break; 829 path->slots[0]--; 830 } else if (ret < 0) { 831 break; 832 } 833 834 leaf = path->nodes[0]; 835 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 836 837 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 838 key.type != BTRFS_EXTENT_CSUM_KEY) { 839 break; 840 } 841 842 if (key.offset >= end_byte) 843 break; 844 845 csum_end = btrfs_item_size(leaf, path->slots[0]) / csum_size; 846 csum_end <<= blocksize_bits; 847 csum_end += key.offset; 848 849 /* this csum ends before we start, we're done */ 850 if (csum_end <= bytenr) 851 break; 852 853 /* delete the entire item, it is inside our range */ 854 if (key.offset >= bytenr && csum_end <= end_byte) { 855 int del_nr = 1; 856 857 /* 858 * Check how many csum items preceding this one in this 859 * leaf correspond to our range and then delete them all 860 * at once. 861 */ 862 if (key.offset > bytenr && path->slots[0] > 0) { 863 int slot = path->slots[0] - 1; 864 865 while (slot >= 0) { 866 struct btrfs_key pk; 867 868 btrfs_item_key_to_cpu(leaf, &pk, slot); 869 if (pk.offset < bytenr || 870 pk.type != BTRFS_EXTENT_CSUM_KEY || 871 pk.objectid != 872 BTRFS_EXTENT_CSUM_OBJECTID) 873 break; 874 path->slots[0] = slot; 875 del_nr++; 876 key.offset = pk.offset; 877 slot--; 878 } 879 } 880 ret = btrfs_del_items(trans, root, path, 881 path->slots[0], del_nr); 882 if (ret) 883 break; 884 if (key.offset == bytenr) 885 break; 886 } else if (key.offset < bytenr && csum_end > end_byte) { 887 unsigned long offset; 888 unsigned long shift_len; 889 unsigned long item_offset; 890 /* 891 * [ bytenr - len ] 892 * [csum ] 893 * 894 * Our bytes are in the middle of the csum, 895 * we need to split this item and insert a new one. 896 * 897 * But we can't drop the path because the 898 * csum could change, get removed, extended etc. 899 * 900 * The trick here is the max size of a csum item leaves 901 * enough room in the tree block for a single 902 * item header. So, we split the item in place, 903 * adding a new header pointing to the existing 904 * bytes. Then we loop around again and we have 905 * a nicely formed csum item that we can neatly 906 * truncate. 907 */ 908 offset = (bytenr - key.offset) >> blocksize_bits; 909 offset *= csum_size; 910 911 shift_len = (len >> blocksize_bits) * csum_size; 912 913 item_offset = btrfs_item_ptr_offset(leaf, 914 path->slots[0]); 915 916 memzero_extent_buffer(leaf, item_offset + offset, 917 shift_len); 918 key.offset = bytenr; 919 920 /* 921 * btrfs_split_item returns -EAGAIN when the 922 * item changed size or key 923 */ 924 ret = btrfs_split_item(trans, root, path, &key, offset); 925 if (ret && ret != -EAGAIN) { 926 btrfs_abort_transaction(trans, ret); 927 break; 928 } 929 ret = 0; 930 931 key.offset = end_byte - 1; 932 } else { 933 truncate_one_csum(fs_info, path, &key, bytenr, len); 934 if (key.offset < bytenr) 935 break; 936 } 937 btrfs_release_path(path); 938 } 939 btrfs_free_path(path); 940 return ret; 941 } 942 943 static int find_next_csum_offset(struct btrfs_root *root, 944 struct btrfs_path *path, 945 u64 *next_offset) 946 { 947 const u32 nritems = btrfs_header_nritems(path->nodes[0]); 948 struct btrfs_key found_key; 949 int slot = path->slots[0] + 1; 950 int ret; 951 952 if (nritems == 0 || slot >= nritems) { 953 ret = btrfs_next_leaf(root, path); 954 if (ret < 0) { 955 return ret; 956 } else if (ret > 0) { 957 *next_offset = (u64)-1; 958 return 0; 959 } 960 slot = path->slots[0]; 961 } 962 963 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); 964 965 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 966 found_key.type != BTRFS_EXTENT_CSUM_KEY) 967 *next_offset = (u64)-1; 968 else 969 *next_offset = found_key.offset; 970 971 return 0; 972 } 973 974 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 975 struct btrfs_root *root, 976 struct btrfs_ordered_sum *sums) 977 { 978 struct btrfs_fs_info *fs_info = root->fs_info; 979 struct btrfs_key file_key; 980 struct btrfs_key found_key; 981 struct btrfs_path *path; 982 struct btrfs_csum_item *item; 983 struct btrfs_csum_item *item_end; 984 struct extent_buffer *leaf = NULL; 985 u64 next_offset; 986 u64 total_bytes = 0; 987 u64 csum_offset; 988 u64 bytenr; 989 u32 ins_size; 990 int index = 0; 991 int found_next; 992 int ret; 993 const u32 csum_size = fs_info->csum_size; 994 995 path = btrfs_alloc_path(); 996 if (!path) 997 return -ENOMEM; 998 again: 999 next_offset = (u64)-1; 1000 found_next = 0; 1001 bytenr = sums->bytenr + total_bytes; 1002 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 1003 file_key.offset = bytenr; 1004 file_key.type = BTRFS_EXTENT_CSUM_KEY; 1005 1006 item = btrfs_lookup_csum(trans, root, path, bytenr, 1); 1007 if (!IS_ERR(item)) { 1008 ret = 0; 1009 leaf = path->nodes[0]; 1010 item_end = btrfs_item_ptr(leaf, path->slots[0], 1011 struct btrfs_csum_item); 1012 item_end = (struct btrfs_csum_item *)((char *)item_end + 1013 btrfs_item_size(leaf, path->slots[0])); 1014 goto found; 1015 } 1016 ret = PTR_ERR(item); 1017 if (ret != -EFBIG && ret != -ENOENT) 1018 goto out; 1019 1020 if (ret == -EFBIG) { 1021 u32 item_size; 1022 /* we found one, but it isn't big enough yet */ 1023 leaf = path->nodes[0]; 1024 item_size = btrfs_item_size(leaf, path->slots[0]); 1025 if ((item_size / csum_size) >= 1026 MAX_CSUM_ITEMS(fs_info, csum_size)) { 1027 /* already at max size, make a new one */ 1028 goto insert; 1029 } 1030 } else { 1031 /* We didn't find a csum item, insert one. */ 1032 ret = find_next_csum_offset(root, path, &next_offset); 1033 if (ret < 0) 1034 goto out; 1035 found_next = 1; 1036 goto insert; 1037 } 1038 1039 /* 1040 * At this point, we know the tree has a checksum item that ends at an 1041 * offset matching the start of the checksum range we want to insert. 1042 * We try to extend that item as much as possible and then add as many 1043 * checksums to it as they fit. 1044 * 1045 * First check if the leaf has enough free space for at least one 1046 * checksum. If it has go directly to the item extension code, otherwise 1047 * release the path and do a search for insertion before the extension. 1048 */ 1049 if (btrfs_leaf_free_space(leaf) >= csum_size) { 1050 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1051 csum_offset = (bytenr - found_key.offset) >> 1052 fs_info->sectorsize_bits; 1053 goto extend_csum; 1054 } 1055 1056 btrfs_release_path(path); 1057 path->search_for_extension = 1; 1058 ret = btrfs_search_slot(trans, root, &file_key, path, 1059 csum_size, 1); 1060 path->search_for_extension = 0; 1061 if (ret < 0) 1062 goto out; 1063 1064 if (ret > 0) { 1065 if (path->slots[0] == 0) 1066 goto insert; 1067 path->slots[0]--; 1068 } 1069 1070 leaf = path->nodes[0]; 1071 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1072 csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits; 1073 1074 if (found_key.type != BTRFS_EXTENT_CSUM_KEY || 1075 found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 1076 csum_offset >= MAX_CSUM_ITEMS(fs_info, csum_size)) { 1077 goto insert; 1078 } 1079 1080 extend_csum: 1081 if (csum_offset == btrfs_item_size(leaf, path->slots[0]) / 1082 csum_size) { 1083 int extend_nr; 1084 u64 tmp; 1085 u32 diff; 1086 1087 tmp = sums->len - total_bytes; 1088 tmp >>= fs_info->sectorsize_bits; 1089 WARN_ON(tmp < 1); 1090 extend_nr = max_t(int, 1, tmp); 1091 1092 /* 1093 * A log tree can already have checksum items with a subset of 1094 * the checksums we are trying to log. This can happen after 1095 * doing a sequence of partial writes into prealloc extents and 1096 * fsyncs in between, with a full fsync logging a larger subrange 1097 * of an extent for which a previous fast fsync logged a smaller 1098 * subrange. And this happens in particular due to merging file 1099 * extent items when we complete an ordered extent for a range 1100 * covered by a prealloc extent - this is done at 1101 * btrfs_mark_extent_written(). 1102 * 1103 * So if we try to extend the previous checksum item, which has 1104 * a range that ends at the start of the range we want to insert, 1105 * make sure we don't extend beyond the start offset of the next 1106 * checksum item. If we are at the last item in the leaf, then 1107 * forget the optimization of extending and add a new checksum 1108 * item - it is not worth the complexity of releasing the path, 1109 * getting the first key for the next leaf, repeat the btree 1110 * search, etc, because log trees are temporary anyway and it 1111 * would only save a few bytes of leaf space. 1112 */ 1113 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 1114 if (path->slots[0] + 1 >= 1115 btrfs_header_nritems(path->nodes[0])) { 1116 ret = find_next_csum_offset(root, path, &next_offset); 1117 if (ret < 0) 1118 goto out; 1119 found_next = 1; 1120 goto insert; 1121 } 1122 1123 ret = find_next_csum_offset(root, path, &next_offset); 1124 if (ret < 0) 1125 goto out; 1126 1127 tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits; 1128 if (tmp <= INT_MAX) 1129 extend_nr = min_t(int, extend_nr, tmp); 1130 } 1131 1132 diff = (csum_offset + extend_nr) * csum_size; 1133 diff = min(diff, 1134 MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size); 1135 1136 diff = diff - btrfs_item_size(leaf, path->slots[0]); 1137 diff = min_t(u32, btrfs_leaf_free_space(leaf), diff); 1138 diff /= csum_size; 1139 diff *= csum_size; 1140 1141 btrfs_extend_item(path, diff); 1142 ret = 0; 1143 goto csum; 1144 } 1145 1146 insert: 1147 btrfs_release_path(path); 1148 csum_offset = 0; 1149 if (found_next) { 1150 u64 tmp; 1151 1152 tmp = sums->len - total_bytes; 1153 tmp >>= fs_info->sectorsize_bits; 1154 tmp = min(tmp, (next_offset - file_key.offset) >> 1155 fs_info->sectorsize_bits); 1156 1157 tmp = max_t(u64, 1, tmp); 1158 tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size)); 1159 ins_size = csum_size * tmp; 1160 } else { 1161 ins_size = csum_size; 1162 } 1163 ret = btrfs_insert_empty_item(trans, root, path, &file_key, 1164 ins_size); 1165 if (ret < 0) 1166 goto out; 1167 if (WARN_ON(ret != 0)) 1168 goto out; 1169 leaf = path->nodes[0]; 1170 csum: 1171 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 1172 item_end = (struct btrfs_csum_item *)((unsigned char *)item + 1173 btrfs_item_size(leaf, path->slots[0])); 1174 item = (struct btrfs_csum_item *)((unsigned char *)item + 1175 csum_offset * csum_size); 1176 found: 1177 ins_size = (u32)(sums->len - total_bytes) >> fs_info->sectorsize_bits; 1178 ins_size *= csum_size; 1179 ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item, 1180 ins_size); 1181 write_extent_buffer(leaf, sums->sums + index, (unsigned long)item, 1182 ins_size); 1183 1184 index += ins_size; 1185 ins_size /= csum_size; 1186 total_bytes += ins_size * fs_info->sectorsize; 1187 1188 btrfs_mark_buffer_dirty(path->nodes[0]); 1189 if (total_bytes < sums->len) { 1190 btrfs_release_path(path); 1191 cond_resched(); 1192 goto again; 1193 } 1194 out: 1195 btrfs_free_path(path); 1196 return ret; 1197 } 1198 1199 void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, 1200 const struct btrfs_path *path, 1201 struct btrfs_file_extent_item *fi, 1202 const bool new_inline, 1203 struct extent_map *em) 1204 { 1205 struct btrfs_fs_info *fs_info = inode->root->fs_info; 1206 struct btrfs_root *root = inode->root; 1207 struct extent_buffer *leaf = path->nodes[0]; 1208 const int slot = path->slots[0]; 1209 struct btrfs_key key; 1210 u64 extent_start, extent_end; 1211 u64 bytenr; 1212 u8 type = btrfs_file_extent_type(leaf, fi); 1213 int compress_type = btrfs_file_extent_compression(leaf, fi); 1214 1215 btrfs_item_key_to_cpu(leaf, &key, slot); 1216 extent_start = key.offset; 1217 extent_end = btrfs_file_extent_end(path); 1218 em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 1219 em->generation = btrfs_file_extent_generation(leaf, fi); 1220 if (type == BTRFS_FILE_EXTENT_REG || 1221 type == BTRFS_FILE_EXTENT_PREALLOC) { 1222 em->start = extent_start; 1223 em->len = extent_end - extent_start; 1224 em->orig_start = extent_start - 1225 btrfs_file_extent_offset(leaf, fi); 1226 em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); 1227 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1228 if (bytenr == 0) { 1229 em->block_start = EXTENT_MAP_HOLE; 1230 return; 1231 } 1232 if (compress_type != BTRFS_COMPRESS_NONE) { 1233 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 1234 em->compress_type = compress_type; 1235 em->block_start = bytenr; 1236 em->block_len = em->orig_block_len; 1237 } else { 1238 bytenr += btrfs_file_extent_offset(leaf, fi); 1239 em->block_start = bytenr; 1240 em->block_len = em->len; 1241 if (type == BTRFS_FILE_EXTENT_PREALLOC) 1242 set_bit(EXTENT_FLAG_PREALLOC, &em->flags); 1243 } 1244 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 1245 em->block_start = EXTENT_MAP_INLINE; 1246 em->start = extent_start; 1247 em->len = extent_end - extent_start; 1248 /* 1249 * Initialize orig_start and block_len with the same values 1250 * as in inode.c:btrfs_get_extent(). 1251 */ 1252 em->orig_start = EXTENT_MAP_HOLE; 1253 em->block_len = (u64)-1; 1254 if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) { 1255 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 1256 em->compress_type = compress_type; 1257 } 1258 } else { 1259 btrfs_err(fs_info, 1260 "unknown file extent item type %d, inode %llu, offset %llu, " 1261 "root %llu", type, btrfs_ino(inode), extent_start, 1262 root->root_key.objectid); 1263 } 1264 } 1265 1266 /* 1267 * Returns the end offset (non inclusive) of the file extent item the given path 1268 * points to. If it points to an inline extent, the returned offset is rounded 1269 * up to the sector size. 1270 */ 1271 u64 btrfs_file_extent_end(const struct btrfs_path *path) 1272 { 1273 const struct extent_buffer *leaf = path->nodes[0]; 1274 const int slot = path->slots[0]; 1275 struct btrfs_file_extent_item *fi; 1276 struct btrfs_key key; 1277 u64 end; 1278 1279 btrfs_item_key_to_cpu(leaf, &key, slot); 1280 ASSERT(key.type == BTRFS_EXTENT_DATA_KEY); 1281 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 1282 1283 if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { 1284 end = btrfs_file_extent_ram_bytes(leaf, fi); 1285 end = ALIGN(key.offset + end, leaf->fs_info->sectorsize); 1286 } else { 1287 end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 1288 } 1289 1290 return end; 1291 } 1292