xref: /openbmc/linux/fs/btrfs/file-item.c (revision dc6a81c3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2007 Oracle.  All rights reserved.
4  */
5 
6 #include <linux/bio.h>
7 #include <linux/slab.h>
8 #include <linux/pagemap.h>
9 #include <linux/highmem.h>
10 #include <linux/sched/mm.h>
11 #include <crypto/hash.h>
12 #include "ctree.h"
13 #include "disk-io.h"
14 #include "transaction.h"
15 #include "volumes.h"
16 #include "print-tree.h"
17 #include "compression.h"
18 
19 #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
20 				   sizeof(struct btrfs_item) * 2) / \
21 				  size) - 1))
22 
23 #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
24 				       PAGE_SIZE))
25 
26 static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
27 					u16 csum_size)
28 {
29 	u32 ncsums = (PAGE_SIZE - sizeof(struct btrfs_ordered_sum)) / csum_size;
30 
31 	return ncsums * fs_info->sectorsize;
32 }
33 
34 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
35 			     struct btrfs_root *root,
36 			     u64 objectid, u64 pos,
37 			     u64 disk_offset, u64 disk_num_bytes,
38 			     u64 num_bytes, u64 offset, u64 ram_bytes,
39 			     u8 compression, u8 encryption, u16 other_encoding)
40 {
41 	int ret = 0;
42 	struct btrfs_file_extent_item *item;
43 	struct btrfs_key file_key;
44 	struct btrfs_path *path;
45 	struct extent_buffer *leaf;
46 
47 	path = btrfs_alloc_path();
48 	if (!path)
49 		return -ENOMEM;
50 	file_key.objectid = objectid;
51 	file_key.offset = pos;
52 	file_key.type = BTRFS_EXTENT_DATA_KEY;
53 
54 	path->leave_spinning = 1;
55 	ret = btrfs_insert_empty_item(trans, root, path, &file_key,
56 				      sizeof(*item));
57 	if (ret < 0)
58 		goto out;
59 	BUG_ON(ret); /* Can't happen */
60 	leaf = path->nodes[0];
61 	item = btrfs_item_ptr(leaf, path->slots[0],
62 			      struct btrfs_file_extent_item);
63 	btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset);
64 	btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
65 	btrfs_set_file_extent_offset(leaf, item, offset);
66 	btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
67 	btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes);
68 	btrfs_set_file_extent_generation(leaf, item, trans->transid);
69 	btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
70 	btrfs_set_file_extent_compression(leaf, item, compression);
71 	btrfs_set_file_extent_encryption(leaf, item, encryption);
72 	btrfs_set_file_extent_other_encoding(leaf, item, other_encoding);
73 
74 	btrfs_mark_buffer_dirty(leaf);
75 out:
76 	btrfs_free_path(path);
77 	return ret;
78 }
79 
80 static struct btrfs_csum_item *
81 btrfs_lookup_csum(struct btrfs_trans_handle *trans,
82 		  struct btrfs_root *root,
83 		  struct btrfs_path *path,
84 		  u64 bytenr, int cow)
85 {
86 	struct btrfs_fs_info *fs_info = root->fs_info;
87 	int ret;
88 	struct btrfs_key file_key;
89 	struct btrfs_key found_key;
90 	struct btrfs_csum_item *item;
91 	struct extent_buffer *leaf;
92 	u64 csum_offset = 0;
93 	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
94 	int csums_in_item;
95 
96 	file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
97 	file_key.offset = bytenr;
98 	file_key.type = BTRFS_EXTENT_CSUM_KEY;
99 	ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
100 	if (ret < 0)
101 		goto fail;
102 	leaf = path->nodes[0];
103 	if (ret > 0) {
104 		ret = 1;
105 		if (path->slots[0] == 0)
106 			goto fail;
107 		path->slots[0]--;
108 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
109 		if (found_key.type != BTRFS_EXTENT_CSUM_KEY)
110 			goto fail;
111 
112 		csum_offset = (bytenr - found_key.offset) >>
113 				fs_info->sb->s_blocksize_bits;
114 		csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
115 		csums_in_item /= csum_size;
116 
117 		if (csum_offset == csums_in_item) {
118 			ret = -EFBIG;
119 			goto fail;
120 		} else if (csum_offset > csums_in_item) {
121 			goto fail;
122 		}
123 	}
124 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
125 	item = (struct btrfs_csum_item *)((unsigned char *)item +
126 					  csum_offset * csum_size);
127 	return item;
128 fail:
129 	if (ret > 0)
130 		ret = -ENOENT;
131 	return ERR_PTR(ret);
132 }
133 
134 int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
135 			     struct btrfs_root *root,
136 			     struct btrfs_path *path, u64 objectid,
137 			     u64 offset, int mod)
138 {
139 	int ret;
140 	struct btrfs_key file_key;
141 	int ins_len = mod < 0 ? -1 : 0;
142 	int cow = mod != 0;
143 
144 	file_key.objectid = objectid;
145 	file_key.offset = offset;
146 	file_key.type = BTRFS_EXTENT_DATA_KEY;
147 	ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
148 	return ret;
149 }
150 
151 /**
152  * btrfs_lookup_bio_sums - Look up checksums for a bio.
153  * @inode: inode that the bio is for.
154  * @bio: bio embedded in btrfs_io_bio.
155  * @offset: Unless (u64)-1, look up checksums for this offset in the file.
156  *          If (u64)-1, use the page offsets from the bio instead.
157  * @dst: Buffer of size btrfs_super_csum_size() used to return checksum. If
158  *       NULL, the checksum is returned in btrfs_io_bio(bio)->csum instead.
159  *
160  * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
161  */
162 blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
163 				   u64 offset, u8 *dst)
164 {
165 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
166 	struct bio_vec bvec;
167 	struct bvec_iter iter;
168 	struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
169 	struct btrfs_csum_item *item = NULL;
170 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
171 	struct btrfs_path *path;
172 	const bool page_offsets = (offset == (u64)-1);
173 	u8 *csum;
174 	u64 item_start_offset = 0;
175 	u64 item_last_offset = 0;
176 	u64 disk_bytenr;
177 	u64 page_bytes_left;
178 	u32 diff;
179 	int nblocks;
180 	int count = 0;
181 	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
182 
183 	path = btrfs_alloc_path();
184 	if (!path)
185 		return BLK_STS_RESOURCE;
186 
187 	nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
188 	if (!dst) {
189 		if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
190 			btrfs_bio->csum = kmalloc_array(nblocks, csum_size,
191 							GFP_NOFS);
192 			if (!btrfs_bio->csum) {
193 				btrfs_free_path(path);
194 				return BLK_STS_RESOURCE;
195 			}
196 		} else {
197 			btrfs_bio->csum = btrfs_bio->csum_inline;
198 		}
199 		csum = btrfs_bio->csum;
200 	} else {
201 		csum = dst;
202 	}
203 
204 	if (bio->bi_iter.bi_size > PAGE_SIZE * 8)
205 		path->reada = READA_FORWARD;
206 
207 	/*
208 	 * the free space stuff is only read when it hasn't been
209 	 * updated in the current transaction.  So, we can safely
210 	 * read from the commit root and sidestep a nasty deadlock
211 	 * between reading the free space cache and updating the csum tree.
212 	 */
213 	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
214 		path->search_commit_root = 1;
215 		path->skip_locking = 1;
216 	}
217 
218 	disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
219 
220 	bio_for_each_segment(bvec, bio, iter) {
221 		page_bytes_left = bvec.bv_len;
222 		if (count)
223 			goto next;
224 
225 		if (page_offsets)
226 			offset = page_offset(bvec.bv_page) + bvec.bv_offset;
227 		count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
228 					       csum, nblocks);
229 		if (count)
230 			goto found;
231 
232 		if (!item || disk_bytenr < item_start_offset ||
233 		    disk_bytenr >= item_last_offset) {
234 			struct btrfs_key found_key;
235 			u32 item_size;
236 
237 			if (item)
238 				btrfs_release_path(path);
239 			item = btrfs_lookup_csum(NULL, fs_info->csum_root,
240 						 path, disk_bytenr, 0);
241 			if (IS_ERR(item)) {
242 				count = 1;
243 				memset(csum, 0, csum_size);
244 				if (BTRFS_I(inode)->root->root_key.objectid ==
245 				    BTRFS_DATA_RELOC_TREE_OBJECTID) {
246 					set_extent_bits(io_tree, offset,
247 						offset + fs_info->sectorsize - 1,
248 						EXTENT_NODATASUM);
249 				} else {
250 					btrfs_info_rl(fs_info,
251 						   "no csum found for inode %llu start %llu",
252 					       btrfs_ino(BTRFS_I(inode)), offset);
253 				}
254 				item = NULL;
255 				btrfs_release_path(path);
256 				goto found;
257 			}
258 			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
259 					      path->slots[0]);
260 
261 			item_start_offset = found_key.offset;
262 			item_size = btrfs_item_size_nr(path->nodes[0],
263 						       path->slots[0]);
264 			item_last_offset = item_start_offset +
265 				(item_size / csum_size) *
266 				fs_info->sectorsize;
267 			item = btrfs_item_ptr(path->nodes[0], path->slots[0],
268 					      struct btrfs_csum_item);
269 		}
270 		/*
271 		 * this byte range must be able to fit inside
272 		 * a single leaf so it will also fit inside a u32
273 		 */
274 		diff = disk_bytenr - item_start_offset;
275 		diff = diff / fs_info->sectorsize;
276 		diff = diff * csum_size;
277 		count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >>
278 					    inode->i_sb->s_blocksize_bits);
279 		read_extent_buffer(path->nodes[0], csum,
280 				   ((unsigned long)item) + diff,
281 				   csum_size * count);
282 found:
283 		csum += count * csum_size;
284 		nblocks -= count;
285 next:
286 		while (count > 0) {
287 			count--;
288 			disk_bytenr += fs_info->sectorsize;
289 			offset += fs_info->sectorsize;
290 			page_bytes_left -= fs_info->sectorsize;
291 			if (!page_bytes_left)
292 				break; /* move to next bio */
293 		}
294 	}
295 
296 	WARN_ON_ONCE(count);
297 	btrfs_free_path(path);
298 	return BLK_STS_OK;
299 }
300 
301 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
302 			     struct list_head *list, int search_commit)
303 {
304 	struct btrfs_fs_info *fs_info = root->fs_info;
305 	struct btrfs_key key;
306 	struct btrfs_path *path;
307 	struct extent_buffer *leaf;
308 	struct btrfs_ordered_sum *sums;
309 	struct btrfs_csum_item *item;
310 	LIST_HEAD(tmplist);
311 	unsigned long offset;
312 	int ret;
313 	size_t size;
314 	u64 csum_end;
315 	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
316 
317 	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
318 	       IS_ALIGNED(end + 1, fs_info->sectorsize));
319 
320 	path = btrfs_alloc_path();
321 	if (!path)
322 		return -ENOMEM;
323 
324 	if (search_commit) {
325 		path->skip_locking = 1;
326 		path->reada = READA_FORWARD;
327 		path->search_commit_root = 1;
328 	}
329 
330 	key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
331 	key.offset = start;
332 	key.type = BTRFS_EXTENT_CSUM_KEY;
333 
334 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
335 	if (ret < 0)
336 		goto fail;
337 	if (ret > 0 && path->slots[0] > 0) {
338 		leaf = path->nodes[0];
339 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
340 		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
341 		    key.type == BTRFS_EXTENT_CSUM_KEY) {
342 			offset = (start - key.offset) >>
343 				 fs_info->sb->s_blocksize_bits;
344 			if (offset * csum_size <
345 			    btrfs_item_size_nr(leaf, path->slots[0] - 1))
346 				path->slots[0]--;
347 		}
348 	}
349 
350 	while (start <= end) {
351 		leaf = path->nodes[0];
352 		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
353 			ret = btrfs_next_leaf(root, path);
354 			if (ret < 0)
355 				goto fail;
356 			if (ret > 0)
357 				break;
358 			leaf = path->nodes[0];
359 		}
360 
361 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
362 		if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
363 		    key.type != BTRFS_EXTENT_CSUM_KEY ||
364 		    key.offset > end)
365 			break;
366 
367 		if (key.offset > start)
368 			start = key.offset;
369 
370 		size = btrfs_item_size_nr(leaf, path->slots[0]);
371 		csum_end = key.offset + (size / csum_size) * fs_info->sectorsize;
372 		if (csum_end <= start) {
373 			path->slots[0]++;
374 			continue;
375 		}
376 
377 		csum_end = min(csum_end, end + 1);
378 		item = btrfs_item_ptr(path->nodes[0], path->slots[0],
379 				      struct btrfs_csum_item);
380 		while (start < csum_end) {
381 			size = min_t(size_t, csum_end - start,
382 				     max_ordered_sum_bytes(fs_info, csum_size));
383 			sums = kzalloc(btrfs_ordered_sum_size(fs_info, size),
384 				       GFP_NOFS);
385 			if (!sums) {
386 				ret = -ENOMEM;
387 				goto fail;
388 			}
389 
390 			sums->bytenr = start;
391 			sums->len = (int)size;
392 
393 			offset = (start - key.offset) >>
394 				fs_info->sb->s_blocksize_bits;
395 			offset *= csum_size;
396 			size >>= fs_info->sb->s_blocksize_bits;
397 
398 			read_extent_buffer(path->nodes[0],
399 					   sums->sums,
400 					   ((unsigned long)item) + offset,
401 					   csum_size * size);
402 
403 			start += fs_info->sectorsize * size;
404 			list_add_tail(&sums->list, &tmplist);
405 		}
406 		path->slots[0]++;
407 	}
408 	ret = 0;
409 fail:
410 	while (ret < 0 && !list_empty(&tmplist)) {
411 		sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list);
412 		list_del(&sums->list);
413 		kfree(sums);
414 	}
415 	list_splice_tail(&tmplist, list);
416 
417 	btrfs_free_path(path);
418 	return ret;
419 }
420 
421 /*
422  * btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio
423  * @inode:	 Owner of the data inside the bio
424  * @bio:	 Contains the data to be checksummed
425  * @file_start:  offset in file this bio begins to describe
426  * @contig:	 Boolean. If true/1 means all bio vecs in this bio are
427  *		 contiguous and they begin at @file_start in the file. False/0
428  *		 means this bio can contains potentially discontigous bio vecs
429  *		 so the logical offset of each should be calculated separately.
430  */
431 blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
432 		       u64 file_start, int contig)
433 {
434 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
435 	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
436 	struct btrfs_ordered_sum *sums;
437 	struct btrfs_ordered_extent *ordered = NULL;
438 	char *data;
439 	struct bvec_iter iter;
440 	struct bio_vec bvec;
441 	int index;
442 	int nr_sectors;
443 	unsigned long total_bytes = 0;
444 	unsigned long this_sum_bytes = 0;
445 	int i;
446 	u64 offset;
447 	unsigned nofs_flag;
448 	const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
449 
450 	nofs_flag = memalloc_nofs_save();
451 	sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
452 		       GFP_KERNEL);
453 	memalloc_nofs_restore(nofs_flag);
454 
455 	if (!sums)
456 		return BLK_STS_RESOURCE;
457 
458 	sums->len = bio->bi_iter.bi_size;
459 	INIT_LIST_HEAD(&sums->list);
460 
461 	if (contig)
462 		offset = file_start;
463 	else
464 		offset = 0; /* shut up gcc */
465 
466 	sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
467 	index = 0;
468 
469 	shash->tfm = fs_info->csum_shash;
470 
471 	bio_for_each_segment(bvec, bio, iter) {
472 		if (!contig)
473 			offset = page_offset(bvec.bv_page) + bvec.bv_offset;
474 
475 		if (!ordered) {
476 			ordered = btrfs_lookup_ordered_extent(inode, offset);
477 			BUG_ON(!ordered); /* Logic error */
478 		}
479 
480 		nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
481 						 bvec.bv_len + fs_info->sectorsize
482 						 - 1);
483 
484 		for (i = 0; i < nr_sectors; i++) {
485 			if (offset >= ordered->file_offset + ordered->num_bytes ||
486 			    offset < ordered->file_offset) {
487 				unsigned long bytes_left;
488 
489 				sums->len = this_sum_bytes;
490 				this_sum_bytes = 0;
491 				btrfs_add_ordered_sum(ordered, sums);
492 				btrfs_put_ordered_extent(ordered);
493 
494 				bytes_left = bio->bi_iter.bi_size - total_bytes;
495 
496 				nofs_flag = memalloc_nofs_save();
497 				sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
498 						      bytes_left), GFP_KERNEL);
499 				memalloc_nofs_restore(nofs_flag);
500 				BUG_ON(!sums); /* -ENOMEM */
501 				sums->len = bytes_left;
502 				ordered = btrfs_lookup_ordered_extent(inode,
503 								offset);
504 				ASSERT(ordered); /* Logic error */
505 				sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9)
506 					+ total_bytes;
507 				index = 0;
508 			}
509 
510 			crypto_shash_init(shash);
511 			data = kmap_atomic(bvec.bv_page);
512 			crypto_shash_update(shash, data + bvec.bv_offset
513 					    + (i * fs_info->sectorsize),
514 					    fs_info->sectorsize);
515 			kunmap_atomic(data);
516 			crypto_shash_final(shash, (char *)(sums->sums + index));
517 			index += csum_size;
518 			offset += fs_info->sectorsize;
519 			this_sum_bytes += fs_info->sectorsize;
520 			total_bytes += fs_info->sectorsize;
521 		}
522 
523 	}
524 	this_sum_bytes = 0;
525 	btrfs_add_ordered_sum(ordered, sums);
526 	btrfs_put_ordered_extent(ordered);
527 	return 0;
528 }
529 
530 /*
531  * helper function for csum removal, this expects the
532  * key to describe the csum pointed to by the path, and it expects
533  * the csum to overlap the range [bytenr, len]
534  *
535  * The csum should not be entirely contained in the range and the
536  * range should not be entirely contained in the csum.
537  *
538  * This calls btrfs_truncate_item with the correct args based on the
539  * overlap, and fixes up the key as required.
540  */
541 static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
542 				       struct btrfs_path *path,
543 				       struct btrfs_key *key,
544 				       u64 bytenr, u64 len)
545 {
546 	struct extent_buffer *leaf;
547 	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
548 	u64 csum_end;
549 	u64 end_byte = bytenr + len;
550 	u32 blocksize_bits = fs_info->sb->s_blocksize_bits;
551 
552 	leaf = path->nodes[0];
553 	csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
554 	csum_end <<= fs_info->sb->s_blocksize_bits;
555 	csum_end += key->offset;
556 
557 	if (key->offset < bytenr && csum_end <= end_byte) {
558 		/*
559 		 *         [ bytenr - len ]
560 		 *         [   ]
561 		 *   [csum     ]
562 		 *   A simple truncate off the end of the item
563 		 */
564 		u32 new_size = (bytenr - key->offset) >> blocksize_bits;
565 		new_size *= csum_size;
566 		btrfs_truncate_item(path, new_size, 1);
567 	} else if (key->offset >= bytenr && csum_end > end_byte &&
568 		   end_byte > key->offset) {
569 		/*
570 		 *         [ bytenr - len ]
571 		 *                 [ ]
572 		 *                 [csum     ]
573 		 * we need to truncate from the beginning of the csum
574 		 */
575 		u32 new_size = (csum_end - end_byte) >> blocksize_bits;
576 		new_size *= csum_size;
577 
578 		btrfs_truncate_item(path, new_size, 0);
579 
580 		key->offset = end_byte;
581 		btrfs_set_item_key_safe(fs_info, path, key);
582 	} else {
583 		BUG();
584 	}
585 }
586 
587 /*
588  * deletes the csum items from the csum tree for a given
589  * range of bytes.
590  */
591 int btrfs_del_csums(struct btrfs_trans_handle *trans,
592 		    struct btrfs_root *root, u64 bytenr, u64 len)
593 {
594 	struct btrfs_fs_info *fs_info = trans->fs_info;
595 	struct btrfs_path *path;
596 	struct btrfs_key key;
597 	u64 end_byte = bytenr + len;
598 	u64 csum_end;
599 	struct extent_buffer *leaf;
600 	int ret;
601 	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
602 	int blocksize_bits = fs_info->sb->s_blocksize_bits;
603 
604 	ASSERT(root == fs_info->csum_root ||
605 	       root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
606 
607 	path = btrfs_alloc_path();
608 	if (!path)
609 		return -ENOMEM;
610 
611 	while (1) {
612 		key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
613 		key.offset = end_byte - 1;
614 		key.type = BTRFS_EXTENT_CSUM_KEY;
615 
616 		path->leave_spinning = 1;
617 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
618 		if (ret > 0) {
619 			if (path->slots[0] == 0)
620 				break;
621 			path->slots[0]--;
622 		} else if (ret < 0) {
623 			break;
624 		}
625 
626 		leaf = path->nodes[0];
627 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
628 
629 		if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
630 		    key.type != BTRFS_EXTENT_CSUM_KEY) {
631 			break;
632 		}
633 
634 		if (key.offset >= end_byte)
635 			break;
636 
637 		csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
638 		csum_end <<= blocksize_bits;
639 		csum_end += key.offset;
640 
641 		/* this csum ends before we start, we're done */
642 		if (csum_end <= bytenr)
643 			break;
644 
645 		/* delete the entire item, it is inside our range */
646 		if (key.offset >= bytenr && csum_end <= end_byte) {
647 			int del_nr = 1;
648 
649 			/*
650 			 * Check how many csum items preceding this one in this
651 			 * leaf correspond to our range and then delete them all
652 			 * at once.
653 			 */
654 			if (key.offset > bytenr && path->slots[0] > 0) {
655 				int slot = path->slots[0] - 1;
656 
657 				while (slot >= 0) {
658 					struct btrfs_key pk;
659 
660 					btrfs_item_key_to_cpu(leaf, &pk, slot);
661 					if (pk.offset < bytenr ||
662 					    pk.type != BTRFS_EXTENT_CSUM_KEY ||
663 					    pk.objectid !=
664 					    BTRFS_EXTENT_CSUM_OBJECTID)
665 						break;
666 					path->slots[0] = slot;
667 					del_nr++;
668 					key.offset = pk.offset;
669 					slot--;
670 				}
671 			}
672 			ret = btrfs_del_items(trans, root, path,
673 					      path->slots[0], del_nr);
674 			if (ret)
675 				goto out;
676 			if (key.offset == bytenr)
677 				break;
678 		} else if (key.offset < bytenr && csum_end > end_byte) {
679 			unsigned long offset;
680 			unsigned long shift_len;
681 			unsigned long item_offset;
682 			/*
683 			 *        [ bytenr - len ]
684 			 *     [csum                ]
685 			 *
686 			 * Our bytes are in the middle of the csum,
687 			 * we need to split this item and insert a new one.
688 			 *
689 			 * But we can't drop the path because the
690 			 * csum could change, get removed, extended etc.
691 			 *
692 			 * The trick here is the max size of a csum item leaves
693 			 * enough room in the tree block for a single
694 			 * item header.  So, we split the item in place,
695 			 * adding a new header pointing to the existing
696 			 * bytes.  Then we loop around again and we have
697 			 * a nicely formed csum item that we can neatly
698 			 * truncate.
699 			 */
700 			offset = (bytenr - key.offset) >> blocksize_bits;
701 			offset *= csum_size;
702 
703 			shift_len = (len >> blocksize_bits) * csum_size;
704 
705 			item_offset = btrfs_item_ptr_offset(leaf,
706 							    path->slots[0]);
707 
708 			memzero_extent_buffer(leaf, item_offset + offset,
709 					     shift_len);
710 			key.offset = bytenr;
711 
712 			/*
713 			 * btrfs_split_item returns -EAGAIN when the
714 			 * item changed size or key
715 			 */
716 			ret = btrfs_split_item(trans, root, path, &key, offset);
717 			if (ret && ret != -EAGAIN) {
718 				btrfs_abort_transaction(trans, ret);
719 				goto out;
720 			}
721 
722 			key.offset = end_byte - 1;
723 		} else {
724 			truncate_one_csum(fs_info, path, &key, bytenr, len);
725 			if (key.offset < bytenr)
726 				break;
727 		}
728 		btrfs_release_path(path);
729 	}
730 	ret = 0;
731 out:
732 	btrfs_free_path(path);
733 	return ret;
734 }
735 
736 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
737 			   struct btrfs_root *root,
738 			   struct btrfs_ordered_sum *sums)
739 {
740 	struct btrfs_fs_info *fs_info = root->fs_info;
741 	struct btrfs_key file_key;
742 	struct btrfs_key found_key;
743 	struct btrfs_path *path;
744 	struct btrfs_csum_item *item;
745 	struct btrfs_csum_item *item_end;
746 	struct extent_buffer *leaf = NULL;
747 	u64 next_offset;
748 	u64 total_bytes = 0;
749 	u64 csum_offset;
750 	u64 bytenr;
751 	u32 nritems;
752 	u32 ins_size;
753 	int index = 0;
754 	int found_next;
755 	int ret;
756 	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
757 
758 	path = btrfs_alloc_path();
759 	if (!path)
760 		return -ENOMEM;
761 again:
762 	next_offset = (u64)-1;
763 	found_next = 0;
764 	bytenr = sums->bytenr + total_bytes;
765 	file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
766 	file_key.offset = bytenr;
767 	file_key.type = BTRFS_EXTENT_CSUM_KEY;
768 
769 	item = btrfs_lookup_csum(trans, root, path, bytenr, 1);
770 	if (!IS_ERR(item)) {
771 		ret = 0;
772 		leaf = path->nodes[0];
773 		item_end = btrfs_item_ptr(leaf, path->slots[0],
774 					  struct btrfs_csum_item);
775 		item_end = (struct btrfs_csum_item *)((char *)item_end +
776 			   btrfs_item_size_nr(leaf, path->slots[0]));
777 		goto found;
778 	}
779 	ret = PTR_ERR(item);
780 	if (ret != -EFBIG && ret != -ENOENT)
781 		goto fail_unlock;
782 
783 	if (ret == -EFBIG) {
784 		u32 item_size;
785 		/* we found one, but it isn't big enough yet */
786 		leaf = path->nodes[0];
787 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
788 		if ((item_size / csum_size) >=
789 		    MAX_CSUM_ITEMS(fs_info, csum_size)) {
790 			/* already at max size, make a new one */
791 			goto insert;
792 		}
793 	} else {
794 		int slot = path->slots[0] + 1;
795 		/* we didn't find a csum item, insert one */
796 		nritems = btrfs_header_nritems(path->nodes[0]);
797 		if (!nritems || (path->slots[0] >= nritems - 1)) {
798 			ret = btrfs_next_leaf(root, path);
799 			if (ret == 1)
800 				found_next = 1;
801 			if (ret != 0)
802 				goto insert;
803 			slot = path->slots[0];
804 		}
805 		btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
806 		if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
807 		    found_key.type != BTRFS_EXTENT_CSUM_KEY) {
808 			found_next = 1;
809 			goto insert;
810 		}
811 		next_offset = found_key.offset;
812 		found_next = 1;
813 		goto insert;
814 	}
815 
816 	/*
817 	 * at this point, we know the tree has an item, but it isn't big
818 	 * enough yet to put our csum in.  Grow it
819 	 */
820 	btrfs_release_path(path);
821 	ret = btrfs_search_slot(trans, root, &file_key, path,
822 				csum_size, 1);
823 	if (ret < 0)
824 		goto fail_unlock;
825 
826 	if (ret > 0) {
827 		if (path->slots[0] == 0)
828 			goto insert;
829 		path->slots[0]--;
830 	}
831 
832 	leaf = path->nodes[0];
833 	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
834 	csum_offset = (bytenr - found_key.offset) >>
835 			fs_info->sb->s_blocksize_bits;
836 
837 	if (found_key.type != BTRFS_EXTENT_CSUM_KEY ||
838 	    found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
839 	    csum_offset >= MAX_CSUM_ITEMS(fs_info, csum_size)) {
840 		goto insert;
841 	}
842 
843 	if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) /
844 	    csum_size) {
845 		int extend_nr;
846 		u64 tmp;
847 		u32 diff;
848 		u32 free_space;
849 
850 		if (btrfs_leaf_free_space(leaf) <
851 				 sizeof(struct btrfs_item) + csum_size * 2)
852 			goto insert;
853 
854 		free_space = btrfs_leaf_free_space(leaf) -
855 					 sizeof(struct btrfs_item) - csum_size;
856 		tmp = sums->len - total_bytes;
857 		tmp >>= fs_info->sb->s_blocksize_bits;
858 		WARN_ON(tmp < 1);
859 
860 		extend_nr = max_t(int, 1, (int)tmp);
861 		diff = (csum_offset + extend_nr) * csum_size;
862 		diff = min(diff,
863 			   MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);
864 
865 		diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
866 		diff = min(free_space, diff);
867 		diff /= csum_size;
868 		diff *= csum_size;
869 
870 		btrfs_extend_item(path, diff);
871 		ret = 0;
872 		goto csum;
873 	}
874 
875 insert:
876 	btrfs_release_path(path);
877 	csum_offset = 0;
878 	if (found_next) {
879 		u64 tmp;
880 
881 		tmp = sums->len - total_bytes;
882 		tmp >>= fs_info->sb->s_blocksize_bits;
883 		tmp = min(tmp, (next_offset - file_key.offset) >>
884 					 fs_info->sb->s_blocksize_bits);
885 
886 		tmp = max_t(u64, 1, tmp);
887 		tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size));
888 		ins_size = csum_size * tmp;
889 	} else {
890 		ins_size = csum_size;
891 	}
892 	path->leave_spinning = 1;
893 	ret = btrfs_insert_empty_item(trans, root, path, &file_key,
894 				      ins_size);
895 	path->leave_spinning = 0;
896 	if (ret < 0)
897 		goto fail_unlock;
898 	if (WARN_ON(ret != 0))
899 		goto fail_unlock;
900 	leaf = path->nodes[0];
901 csum:
902 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
903 	item_end = (struct btrfs_csum_item *)((unsigned char *)item +
904 				      btrfs_item_size_nr(leaf, path->slots[0]));
905 	item = (struct btrfs_csum_item *)((unsigned char *)item +
906 					  csum_offset * csum_size);
907 found:
908 	ins_size = (u32)(sums->len - total_bytes) >>
909 		   fs_info->sb->s_blocksize_bits;
910 	ins_size *= csum_size;
911 	ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item,
912 			      ins_size);
913 	write_extent_buffer(leaf, sums->sums + index, (unsigned long)item,
914 			    ins_size);
915 
916 	index += ins_size;
917 	ins_size /= csum_size;
918 	total_bytes += ins_size * fs_info->sectorsize;
919 
920 	btrfs_mark_buffer_dirty(path->nodes[0]);
921 	if (total_bytes < sums->len) {
922 		btrfs_release_path(path);
923 		cond_resched();
924 		goto again;
925 	}
926 out:
927 	btrfs_free_path(path);
928 	return ret;
929 
930 fail_unlock:
931 	goto out;
932 }
933 
934 void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
935 				     const struct btrfs_path *path,
936 				     struct btrfs_file_extent_item *fi,
937 				     const bool new_inline,
938 				     struct extent_map *em)
939 {
940 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
941 	struct btrfs_root *root = inode->root;
942 	struct extent_buffer *leaf = path->nodes[0];
943 	const int slot = path->slots[0];
944 	struct btrfs_key key;
945 	u64 extent_start, extent_end;
946 	u64 bytenr;
947 	u8 type = btrfs_file_extent_type(leaf, fi);
948 	int compress_type = btrfs_file_extent_compression(leaf, fi);
949 
950 	btrfs_item_key_to_cpu(leaf, &key, slot);
951 	extent_start = key.offset;
952 
953 	if (type == BTRFS_FILE_EXTENT_REG ||
954 	    type == BTRFS_FILE_EXTENT_PREALLOC) {
955 		extent_end = extent_start +
956 			btrfs_file_extent_num_bytes(leaf, fi);
957 	} else if (type == BTRFS_FILE_EXTENT_INLINE) {
958 		size_t size;
959 		size = btrfs_file_extent_ram_bytes(leaf, fi);
960 		extent_end = ALIGN(extent_start + size,
961 				   fs_info->sectorsize);
962 	}
963 
964 	em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
965 	if (type == BTRFS_FILE_EXTENT_REG ||
966 	    type == BTRFS_FILE_EXTENT_PREALLOC) {
967 		em->start = extent_start;
968 		em->len = extent_end - extent_start;
969 		em->orig_start = extent_start -
970 			btrfs_file_extent_offset(leaf, fi);
971 		em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
972 		bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
973 		if (bytenr == 0) {
974 			em->block_start = EXTENT_MAP_HOLE;
975 			return;
976 		}
977 		if (compress_type != BTRFS_COMPRESS_NONE) {
978 			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
979 			em->compress_type = compress_type;
980 			em->block_start = bytenr;
981 			em->block_len = em->orig_block_len;
982 		} else {
983 			bytenr += btrfs_file_extent_offset(leaf, fi);
984 			em->block_start = bytenr;
985 			em->block_len = em->len;
986 			if (type == BTRFS_FILE_EXTENT_PREALLOC)
987 				set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
988 		}
989 	} else if (type == BTRFS_FILE_EXTENT_INLINE) {
990 		em->block_start = EXTENT_MAP_INLINE;
991 		em->start = extent_start;
992 		em->len = extent_end - extent_start;
993 		/*
994 		 * Initialize orig_start and block_len with the same values
995 		 * as in inode.c:btrfs_get_extent().
996 		 */
997 		em->orig_start = EXTENT_MAP_HOLE;
998 		em->block_len = (u64)-1;
999 		if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) {
1000 			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
1001 			em->compress_type = compress_type;
1002 		}
1003 	} else {
1004 		btrfs_err(fs_info,
1005 			  "unknown file extent item type %d, inode %llu, offset %llu, "
1006 			  "root %llu", type, btrfs_ino(inode), extent_start,
1007 			  root->root_key.objectid);
1008 	}
1009 }
1010