xref: /openbmc/linux/fs/f2fs/extent_cache.c (revision d47a97bd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * f2fs extent cache support
4  *
5  * Copyright (c) 2015 Motorola Mobility
6  * Copyright (c) 2015 Samsung Electronics
7  * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
8  *          Chao Yu <chao2.yu@samsung.com>
9  *
10  * block_age-based extent cache added by:
11  * Copyright (c) 2022 xiaomi Co., Ltd.
12  *             http://www.xiaomi.com/
13  */
14 
15 #include <linux/fs.h>
16 #include <linux/f2fs_fs.h>
17 
18 #include "f2fs.h"
19 #include "node.h"
20 #include <trace/events/f2fs.h>
21 
22 bool sanity_check_extent_cache(struct inode *inode)
23 {
24 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
25 	struct f2fs_inode_info *fi = F2FS_I(inode);
26 	struct extent_info *ei;
27 
28 	if (!fi->extent_tree[EX_READ])
29 		return true;
30 
31 	ei = &fi->extent_tree[EX_READ]->largest;
32 
33 	if (ei->len &&
34 		(!f2fs_is_valid_blkaddr(sbi, ei->blk,
35 					DATA_GENERIC_ENHANCE) ||
36 		!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
37 					DATA_GENERIC_ENHANCE))) {
38 		set_sbi_flag(sbi, SBI_NEED_FSCK);
39 		f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
40 			  __func__, inode->i_ino,
41 			  ei->blk, ei->fofs, ei->len);
42 		return false;
43 	}
44 	return true;
45 }
46 
47 static void __set_extent_info(struct extent_info *ei,
48 				unsigned int fofs, unsigned int len,
49 				block_t blk, bool keep_clen,
50 				unsigned long age, unsigned long last_blocks,
51 				enum extent_type type)
52 {
53 	ei->fofs = fofs;
54 	ei->len = len;
55 
56 	if (type == EX_READ) {
57 		ei->blk = blk;
58 		if (keep_clen)
59 			return;
60 #ifdef CONFIG_F2FS_FS_COMPRESSION
61 		ei->c_len = 0;
62 #endif
63 	} else if (type == EX_BLOCK_AGE) {
64 		ei->age = age;
65 		ei->last_blocks = last_blocks;
66 	}
67 }
68 
69 static bool __may_read_extent_tree(struct inode *inode)
70 {
71 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
72 
73 	if (!test_opt(sbi, READ_EXTENT_CACHE))
74 		return false;
75 	if (is_inode_flag_set(inode, FI_NO_EXTENT))
76 		return false;
77 	if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
78 			 !f2fs_sb_has_readonly(sbi))
79 		return false;
80 	return S_ISREG(inode->i_mode);
81 }
82 
83 static bool __may_age_extent_tree(struct inode *inode)
84 {
85 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
86 
87 	if (!test_opt(sbi, AGE_EXTENT_CACHE))
88 		return false;
89 	/* don't cache block age info for cold file */
90 	if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
91 		return false;
92 	if (file_is_cold(inode))
93 		return false;
94 
95 	return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode);
96 }
97 
98 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
99 {
100 	if (type == EX_READ)
101 		return __may_read_extent_tree(inode);
102 	else if (type == EX_BLOCK_AGE)
103 		return __may_age_extent_tree(inode);
104 	return false;
105 }
106 
107 static bool __may_extent_tree(struct inode *inode, enum extent_type type)
108 {
109 	/*
110 	 * for recovered files during mount do not create extents
111 	 * if shrinker is not registered.
112 	 */
113 	if (list_empty(&F2FS_I_SB(inode)->s_list))
114 		return false;
115 
116 	return __init_may_extent_tree(inode, type);
117 }
118 
119 static void __try_update_largest_extent(struct extent_tree *et,
120 						struct extent_node *en)
121 {
122 	if (et->type != EX_READ)
123 		return;
124 	if (en->ei.len <= et->largest.len)
125 		return;
126 
127 	et->largest = en->ei;
128 	et->largest_updated = true;
129 }
130 
131 static bool __is_extent_mergeable(struct extent_info *back,
132 		struct extent_info *front, enum extent_type type)
133 {
134 	if (type == EX_READ) {
135 #ifdef CONFIG_F2FS_FS_COMPRESSION
136 		if (back->c_len && back->len != back->c_len)
137 			return false;
138 		if (front->c_len && front->len != front->c_len)
139 			return false;
140 #endif
141 		return (back->fofs + back->len == front->fofs &&
142 				back->blk + back->len == front->blk);
143 	} else if (type == EX_BLOCK_AGE) {
144 		return (back->fofs + back->len == front->fofs &&
145 			abs(back->age - front->age) <= SAME_AGE_REGION &&
146 			abs(back->last_blocks - front->last_blocks) <=
147 							SAME_AGE_REGION);
148 	}
149 	return false;
150 }
151 
152 static bool __is_back_mergeable(struct extent_info *cur,
153 		struct extent_info *back, enum extent_type type)
154 {
155 	return __is_extent_mergeable(back, cur, type);
156 }
157 
158 static bool __is_front_mergeable(struct extent_info *cur,
159 		struct extent_info *front, enum extent_type type)
160 {
161 	return __is_extent_mergeable(cur, front, type);
162 }
163 
164 static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
165 							unsigned int ofs)
166 {
167 	if (cached_re) {
168 		if (cached_re->ofs <= ofs &&
169 				cached_re->ofs + cached_re->len > ofs) {
170 			return cached_re;
171 		}
172 	}
173 	return NULL;
174 }
175 
176 static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root,
177 							unsigned int ofs)
178 {
179 	struct rb_node *node = root->rb_root.rb_node;
180 	struct rb_entry *re;
181 
182 	while (node) {
183 		re = rb_entry(node, struct rb_entry, rb_node);
184 
185 		if (ofs < re->ofs)
186 			node = node->rb_left;
187 		else if (ofs >= re->ofs + re->len)
188 			node = node->rb_right;
189 		else
190 			return re;
191 	}
192 	return NULL;
193 }
194 
195 struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
196 				struct rb_entry *cached_re, unsigned int ofs)
197 {
198 	struct rb_entry *re;
199 
200 	re = __lookup_rb_tree_fast(cached_re, ofs);
201 	if (!re)
202 		return __lookup_rb_tree_slow(root, ofs);
203 
204 	return re;
205 }
206 
207 struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
208 					struct rb_root_cached *root,
209 					struct rb_node **parent,
210 					unsigned long long key, bool *leftmost)
211 {
212 	struct rb_node **p = &root->rb_root.rb_node;
213 	struct rb_entry *re;
214 
215 	while (*p) {
216 		*parent = *p;
217 		re = rb_entry(*parent, struct rb_entry, rb_node);
218 
219 		if (key < re->key) {
220 			p = &(*p)->rb_left;
221 		} else {
222 			p = &(*p)->rb_right;
223 			*leftmost = false;
224 		}
225 	}
226 
227 	return p;
228 }
229 
230 struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
231 				struct rb_root_cached *root,
232 				struct rb_node **parent,
233 				unsigned int ofs, bool *leftmost)
234 {
235 	struct rb_node **p = &root->rb_root.rb_node;
236 	struct rb_entry *re;
237 
238 	while (*p) {
239 		*parent = *p;
240 		re = rb_entry(*parent, struct rb_entry, rb_node);
241 
242 		if (ofs < re->ofs) {
243 			p = &(*p)->rb_left;
244 		} else if (ofs >= re->ofs + re->len) {
245 			p = &(*p)->rb_right;
246 			*leftmost = false;
247 		} else {
248 			f2fs_bug_on(sbi, 1);
249 		}
250 	}
251 
252 	return p;
253 }
254 
255 /*
256  * lookup rb entry in position of @ofs in rb-tree,
257  * if hit, return the entry, otherwise, return NULL
258  * @prev_ex: extent before ofs
259  * @next_ex: extent after ofs
260  * @insert_p: insert point for new extent at ofs
261  * in order to simplify the insertion after.
262  * tree must stay unchanged between lookup and insertion.
263  */
264 struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
265 				struct rb_entry *cached_re,
266 				unsigned int ofs,
267 				struct rb_entry **prev_entry,
268 				struct rb_entry **next_entry,
269 				struct rb_node ***insert_p,
270 				struct rb_node **insert_parent,
271 				bool force, bool *leftmost)
272 {
273 	struct rb_node **pnode = &root->rb_root.rb_node;
274 	struct rb_node *parent = NULL, *tmp_node;
275 	struct rb_entry *re = cached_re;
276 
277 	*insert_p = NULL;
278 	*insert_parent = NULL;
279 	*prev_entry = NULL;
280 	*next_entry = NULL;
281 
282 	if (RB_EMPTY_ROOT(&root->rb_root))
283 		return NULL;
284 
285 	if (re) {
286 		if (re->ofs <= ofs && re->ofs + re->len > ofs)
287 			goto lookup_neighbors;
288 	}
289 
290 	if (leftmost)
291 		*leftmost = true;
292 
293 	while (*pnode) {
294 		parent = *pnode;
295 		re = rb_entry(*pnode, struct rb_entry, rb_node);
296 
297 		if (ofs < re->ofs) {
298 			pnode = &(*pnode)->rb_left;
299 		} else if (ofs >= re->ofs + re->len) {
300 			pnode = &(*pnode)->rb_right;
301 			if (leftmost)
302 				*leftmost = false;
303 		} else {
304 			goto lookup_neighbors;
305 		}
306 	}
307 
308 	*insert_p = pnode;
309 	*insert_parent = parent;
310 
311 	re = rb_entry(parent, struct rb_entry, rb_node);
312 	tmp_node = parent;
313 	if (parent && ofs > re->ofs)
314 		tmp_node = rb_next(parent);
315 	*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
316 
317 	tmp_node = parent;
318 	if (parent && ofs < re->ofs)
319 		tmp_node = rb_prev(parent);
320 	*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
321 	return NULL;
322 
323 lookup_neighbors:
324 	if (ofs == re->ofs || force) {
325 		/* lookup prev node for merging backward later */
326 		tmp_node = rb_prev(&re->rb_node);
327 		*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
328 	}
329 	if (ofs == re->ofs + re->len - 1 || force) {
330 		/* lookup next node for merging frontward later */
331 		tmp_node = rb_next(&re->rb_node);
332 		*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
333 	}
334 	return re;
335 }
336 
337 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
338 				struct rb_root_cached *root, bool check_key)
339 {
340 #ifdef CONFIG_F2FS_CHECK_FS
341 	struct rb_node *cur = rb_first_cached(root), *next;
342 	struct rb_entry *cur_re, *next_re;
343 
344 	if (!cur)
345 		return true;
346 
347 	while (cur) {
348 		next = rb_next(cur);
349 		if (!next)
350 			return true;
351 
352 		cur_re = rb_entry(cur, struct rb_entry, rb_node);
353 		next_re = rb_entry(next, struct rb_entry, rb_node);
354 
355 		if (check_key) {
356 			if (cur_re->key > next_re->key) {
357 				f2fs_info(sbi, "inconsistent rbtree, "
358 					"cur(%llu) next(%llu)",
359 					cur_re->key, next_re->key);
360 				return false;
361 			}
362 			goto next;
363 		}
364 
365 		if (cur_re->ofs + cur_re->len > next_re->ofs) {
366 			f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
367 				  cur_re->ofs, cur_re->len,
368 				  next_re->ofs, next_re->len);
369 			return false;
370 		}
371 next:
372 		cur = next;
373 	}
374 #endif
375 	return true;
376 }
377 
378 static struct kmem_cache *extent_tree_slab;
379 static struct kmem_cache *extent_node_slab;
380 
381 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
382 				struct extent_tree *et, struct extent_info *ei,
383 				struct rb_node *parent, struct rb_node **p,
384 				bool leftmost)
385 {
386 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
387 	struct extent_node *en;
388 
389 	en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
390 	if (!en)
391 		return NULL;
392 
393 	en->ei = *ei;
394 	INIT_LIST_HEAD(&en->list);
395 	en->et = et;
396 
397 	rb_link_node(&en->rb_node, parent, p);
398 	rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
399 	atomic_inc(&et->node_cnt);
400 	atomic_inc(&eti->total_ext_node);
401 	return en;
402 }
403 
404 static void __detach_extent_node(struct f2fs_sb_info *sbi,
405 				struct extent_tree *et, struct extent_node *en)
406 {
407 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
408 
409 	rb_erase_cached(&en->rb_node, &et->root);
410 	atomic_dec(&et->node_cnt);
411 	atomic_dec(&eti->total_ext_node);
412 
413 	if (et->cached_en == en)
414 		et->cached_en = NULL;
415 	kmem_cache_free(extent_node_slab, en);
416 }
417 
418 /*
419  * Flow to release an extent_node:
420  * 1. list_del_init
421  * 2. __detach_extent_node
422  * 3. kmem_cache_free.
423  */
424 static void __release_extent_node(struct f2fs_sb_info *sbi,
425 			struct extent_tree *et, struct extent_node *en)
426 {
427 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
428 
429 	spin_lock(&eti->extent_lock);
430 	f2fs_bug_on(sbi, list_empty(&en->list));
431 	list_del_init(&en->list);
432 	spin_unlock(&eti->extent_lock);
433 
434 	__detach_extent_node(sbi, et, en);
435 }
436 
437 static struct extent_tree *__grab_extent_tree(struct inode *inode,
438 						enum extent_type type)
439 {
440 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
441 	struct extent_tree_info *eti = &sbi->extent_tree[type];
442 	struct extent_tree *et;
443 	nid_t ino = inode->i_ino;
444 
445 	mutex_lock(&eti->extent_tree_lock);
446 	et = radix_tree_lookup(&eti->extent_tree_root, ino);
447 	if (!et) {
448 		et = f2fs_kmem_cache_alloc(extent_tree_slab,
449 					GFP_NOFS, true, NULL);
450 		f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
451 		memset(et, 0, sizeof(struct extent_tree));
452 		et->ino = ino;
453 		et->type = type;
454 		et->root = RB_ROOT_CACHED;
455 		et->cached_en = NULL;
456 		rwlock_init(&et->lock);
457 		INIT_LIST_HEAD(&et->list);
458 		atomic_set(&et->node_cnt, 0);
459 		atomic_inc(&eti->total_ext_tree);
460 	} else {
461 		atomic_dec(&eti->total_zombie_tree);
462 		list_del_init(&et->list);
463 	}
464 	mutex_unlock(&eti->extent_tree_lock);
465 
466 	/* never died until evict_inode */
467 	F2FS_I(inode)->extent_tree[type] = et;
468 
469 	return et;
470 }
471 
472 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
473 					struct extent_tree *et)
474 {
475 	struct rb_node *node, *next;
476 	struct extent_node *en;
477 	unsigned int count = atomic_read(&et->node_cnt);
478 
479 	node = rb_first_cached(&et->root);
480 	while (node) {
481 		next = rb_next(node);
482 		en = rb_entry(node, struct extent_node, rb_node);
483 		__release_extent_node(sbi, et, en);
484 		node = next;
485 	}
486 
487 	return count - atomic_read(&et->node_cnt);
488 }
489 
490 static void __drop_largest_extent(struct extent_tree *et,
491 					pgoff_t fofs, unsigned int len)
492 {
493 	if (fofs < et->largest.fofs + et->largest.len &&
494 			fofs + len > et->largest.fofs) {
495 		et->largest.len = 0;
496 		et->largest_updated = true;
497 	}
498 }
499 
500 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
501 {
502 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
503 	struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
504 	struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
505 	struct extent_tree *et;
506 	struct extent_node *en;
507 	struct extent_info ei;
508 
509 	if (!__may_extent_tree(inode, EX_READ)) {
510 		/* drop largest read extent */
511 		if (i_ext && i_ext->len) {
512 			f2fs_wait_on_page_writeback(ipage, NODE, true, true);
513 			i_ext->len = 0;
514 			set_page_dirty(ipage);
515 		}
516 		goto out;
517 	}
518 
519 	et = __grab_extent_tree(inode, EX_READ);
520 
521 	if (!i_ext || !i_ext->len)
522 		goto out;
523 
524 	get_read_extent_info(&ei, i_ext);
525 
526 	write_lock(&et->lock);
527 	if (atomic_read(&et->node_cnt))
528 		goto unlock_out;
529 
530 	en = __attach_extent_node(sbi, et, &ei, NULL,
531 				&et->root.rb_root.rb_node, true);
532 	if (en) {
533 		et->largest = en->ei;
534 		et->cached_en = en;
535 
536 		spin_lock(&eti->extent_lock);
537 		list_add_tail(&en->list, &eti->extent_list);
538 		spin_unlock(&eti->extent_lock);
539 	}
540 unlock_out:
541 	write_unlock(&et->lock);
542 out:
543 	if (!F2FS_I(inode)->extent_tree[EX_READ])
544 		set_inode_flag(inode, FI_NO_EXTENT);
545 }
546 
547 void f2fs_init_age_extent_tree(struct inode *inode)
548 {
549 	if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
550 		return;
551 	__grab_extent_tree(inode, EX_BLOCK_AGE);
552 }
553 
554 void f2fs_init_extent_tree(struct inode *inode)
555 {
556 	/* initialize read cache */
557 	if (__init_may_extent_tree(inode, EX_READ))
558 		__grab_extent_tree(inode, EX_READ);
559 
560 	/* initialize block age cache */
561 	if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
562 		__grab_extent_tree(inode, EX_BLOCK_AGE);
563 }
564 
565 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
566 			struct extent_info *ei, enum extent_type type)
567 {
568 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
569 	struct extent_tree_info *eti = &sbi->extent_tree[type];
570 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
571 	struct extent_node *en;
572 	bool ret = false;
573 
574 	if (!et)
575 		return false;
576 
577 	trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
578 
579 	read_lock(&et->lock);
580 
581 	if (type == EX_READ &&
582 			et->largest.fofs <= pgofs &&
583 			et->largest.fofs + et->largest.len > pgofs) {
584 		*ei = et->largest;
585 		ret = true;
586 		stat_inc_largest_node_hit(sbi);
587 		goto out;
588 	}
589 
590 	en = (struct extent_node *)f2fs_lookup_rb_tree(&et->root,
591 				(struct rb_entry *)et->cached_en, pgofs);
592 	if (!en)
593 		goto out;
594 
595 	if (en == et->cached_en)
596 		stat_inc_cached_node_hit(sbi, type);
597 	else
598 		stat_inc_rbtree_node_hit(sbi, type);
599 
600 	*ei = en->ei;
601 	spin_lock(&eti->extent_lock);
602 	if (!list_empty(&en->list)) {
603 		list_move_tail(&en->list, &eti->extent_list);
604 		et->cached_en = en;
605 	}
606 	spin_unlock(&eti->extent_lock);
607 	ret = true;
608 out:
609 	stat_inc_total_hit(sbi, type);
610 	read_unlock(&et->lock);
611 
612 	if (type == EX_READ)
613 		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
614 	else if (type == EX_BLOCK_AGE)
615 		trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
616 	return ret;
617 }
618 
619 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
620 				struct extent_tree *et, struct extent_info *ei,
621 				struct extent_node *prev_ex,
622 				struct extent_node *next_ex)
623 {
624 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
625 	struct extent_node *en = NULL;
626 
627 	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
628 		prev_ex->ei.len += ei->len;
629 		ei = &prev_ex->ei;
630 		en = prev_ex;
631 	}
632 
633 	if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
634 		next_ex->ei.fofs = ei->fofs;
635 		next_ex->ei.len += ei->len;
636 		if (et->type == EX_READ)
637 			next_ex->ei.blk = ei->blk;
638 		if (en)
639 			__release_extent_node(sbi, et, prev_ex);
640 
641 		en = next_ex;
642 	}
643 
644 	if (!en)
645 		return NULL;
646 
647 	__try_update_largest_extent(et, en);
648 
649 	spin_lock(&eti->extent_lock);
650 	if (!list_empty(&en->list)) {
651 		list_move_tail(&en->list, &eti->extent_list);
652 		et->cached_en = en;
653 	}
654 	spin_unlock(&eti->extent_lock);
655 	return en;
656 }
657 
658 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
659 				struct extent_tree *et, struct extent_info *ei,
660 				struct rb_node **insert_p,
661 				struct rb_node *insert_parent,
662 				bool leftmost)
663 {
664 	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
665 	struct rb_node **p;
666 	struct rb_node *parent = NULL;
667 	struct extent_node *en = NULL;
668 
669 	if (insert_p && insert_parent) {
670 		parent = insert_parent;
671 		p = insert_p;
672 		goto do_insert;
673 	}
674 
675 	leftmost = true;
676 
677 	p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent,
678 						ei->fofs, &leftmost);
679 do_insert:
680 	en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
681 	if (!en)
682 		return NULL;
683 
684 	__try_update_largest_extent(et, en);
685 
686 	/* update in global extent list */
687 	spin_lock(&eti->extent_lock);
688 	list_add_tail(&en->list, &eti->extent_list);
689 	et->cached_en = en;
690 	spin_unlock(&eti->extent_lock);
691 	return en;
692 }
693 
694 static void __update_extent_tree_range(struct inode *inode,
695 			struct extent_info *tei, enum extent_type type)
696 {
697 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
698 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
699 	struct extent_node *en = NULL, *en1 = NULL;
700 	struct extent_node *prev_en = NULL, *next_en = NULL;
701 	struct extent_info ei, dei, prev;
702 	struct rb_node **insert_p = NULL, *insert_parent = NULL;
703 	unsigned int fofs = tei->fofs, len = tei->len;
704 	unsigned int end = fofs + len;
705 	bool updated = false;
706 	bool leftmost = false;
707 
708 	if (!et)
709 		return;
710 
711 	if (type == EX_READ)
712 		trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
713 						tei->blk, 0);
714 	else if (type == EX_BLOCK_AGE)
715 		trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
716 						tei->age, tei->last_blocks);
717 
718 	write_lock(&et->lock);
719 
720 	if (type == EX_READ) {
721 		if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
722 			write_unlock(&et->lock);
723 			return;
724 		}
725 
726 		prev = et->largest;
727 		dei.len = 0;
728 
729 		/*
730 		 * drop largest extent before lookup, in case it's already
731 		 * been shrunk from extent tree
732 		 */
733 		__drop_largest_extent(et, fofs, len);
734 	}
735 
736 	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
737 	en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
738 					(struct rb_entry *)et->cached_en, fofs,
739 					(struct rb_entry **)&prev_en,
740 					(struct rb_entry **)&next_en,
741 					&insert_p, &insert_parent, false,
742 					&leftmost);
743 	if (!en)
744 		en = next_en;
745 
746 	/* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
747 	while (en && en->ei.fofs < end) {
748 		unsigned int org_end;
749 		int parts = 0;	/* # of parts current extent split into */
750 
751 		next_en = en1 = NULL;
752 
753 		dei = en->ei;
754 		org_end = dei.fofs + dei.len;
755 		f2fs_bug_on(sbi, fofs >= org_end);
756 
757 		if (fofs > dei.fofs && (type != EX_READ ||
758 				fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
759 			en->ei.len = fofs - en->ei.fofs;
760 			prev_en = en;
761 			parts = 1;
762 		}
763 
764 		if (end < org_end && (type != EX_READ ||
765 				org_end - end >= F2FS_MIN_EXTENT_LEN)) {
766 			if (parts) {
767 				__set_extent_info(&ei,
768 					end, org_end - end,
769 					end - dei.fofs + dei.blk, false,
770 					dei.age, dei.last_blocks,
771 					type);
772 				en1 = __insert_extent_tree(sbi, et, &ei,
773 							NULL, NULL, true);
774 				next_en = en1;
775 			} else {
776 				__set_extent_info(&en->ei,
777 					end, en->ei.len - (end - dei.fofs),
778 					en->ei.blk + (end - dei.fofs), true,
779 					dei.age, dei.last_blocks,
780 					type);
781 				next_en = en;
782 			}
783 			parts++;
784 		}
785 
786 		if (!next_en) {
787 			struct rb_node *node = rb_next(&en->rb_node);
788 
789 			next_en = rb_entry_safe(node, struct extent_node,
790 						rb_node);
791 		}
792 
793 		if (parts)
794 			__try_update_largest_extent(et, en);
795 		else
796 			__release_extent_node(sbi, et, en);
797 
798 		/*
799 		 * if original extent is split into zero or two parts, extent
800 		 * tree has been altered by deletion or insertion, therefore
801 		 * invalidate pointers regard to tree.
802 		 */
803 		if (parts != 1) {
804 			insert_p = NULL;
805 			insert_parent = NULL;
806 		}
807 		en = next_en;
808 	}
809 
810 	if (type == EX_BLOCK_AGE)
811 		goto update_age_extent_cache;
812 
813 	/* 3. update extent in read extent cache */
814 	BUG_ON(type != EX_READ);
815 
816 	if (tei->blk) {
817 		__set_extent_info(&ei, fofs, len, tei->blk, false,
818 				  0, 0, EX_READ);
819 		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
820 			__insert_extent_tree(sbi, et, &ei,
821 					insert_p, insert_parent, leftmost);
822 
823 		/* give up extent_cache, if split and small updates happen */
824 		if (dei.len >= 1 &&
825 				prev.len < F2FS_MIN_EXTENT_LEN &&
826 				et->largest.len < F2FS_MIN_EXTENT_LEN) {
827 			et->largest.len = 0;
828 			et->largest_updated = true;
829 			set_inode_flag(inode, FI_NO_EXTENT);
830 		}
831 	}
832 
833 	if (is_inode_flag_set(inode, FI_NO_EXTENT))
834 		__free_extent_tree(sbi, et);
835 
836 	if (et->largest_updated) {
837 		et->largest_updated = false;
838 		updated = true;
839 	}
840 	goto out_read_extent_cache;
841 update_age_extent_cache:
842 	if (!tei->last_blocks)
843 		goto out_read_extent_cache;
844 
845 	__set_extent_info(&ei, fofs, len, 0, false,
846 			tei->age, tei->last_blocks, EX_BLOCK_AGE);
847 	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
848 		__insert_extent_tree(sbi, et, &ei,
849 					insert_p, insert_parent, leftmost);
850 out_read_extent_cache:
851 	write_unlock(&et->lock);
852 
853 	if (updated)
854 		f2fs_mark_inode_dirty_sync(inode, true);
855 }
856 
857 #ifdef CONFIG_F2FS_FS_COMPRESSION
858 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
859 				pgoff_t fofs, block_t blkaddr, unsigned int llen,
860 				unsigned int c_len)
861 {
862 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
863 	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
864 	struct extent_node *en = NULL;
865 	struct extent_node *prev_en = NULL, *next_en = NULL;
866 	struct extent_info ei;
867 	struct rb_node **insert_p = NULL, *insert_parent = NULL;
868 	bool leftmost = false;
869 
870 	trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
871 						blkaddr, c_len);
872 
873 	/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
874 	if (is_inode_flag_set(inode, FI_NO_EXTENT))
875 		return;
876 
877 	write_lock(&et->lock);
878 
879 	en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
880 				(struct rb_entry *)et->cached_en, fofs,
881 				(struct rb_entry **)&prev_en,
882 				(struct rb_entry **)&next_en,
883 				&insert_p, &insert_parent, false,
884 				&leftmost);
885 	if (en)
886 		goto unlock_out;
887 
888 	__set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
889 	ei.c_len = c_len;
890 
891 	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
892 		__insert_extent_tree(sbi, et, &ei,
893 				insert_p, insert_parent, leftmost);
894 unlock_out:
895 	write_unlock(&et->lock);
896 }
897 #endif
898 
899 static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
900 						unsigned long long new,
901 						unsigned long long old)
902 {
903 	unsigned int rem_old, rem_new;
904 	unsigned long long res;
905 	unsigned int weight = sbi->last_age_weight;
906 
907 	res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
908 		+ div_u64_rem(old, 100, &rem_old) * weight;
909 
910 	if (rem_new)
911 		res += rem_new * (100 - weight) / 100;
912 	if (rem_old)
913 		res += rem_old * weight / 100;
914 
915 	return res;
916 }
917 
918 /* This returns a new age and allocated blocks in ei */
919 static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
920 						block_t blkaddr)
921 {
922 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
923 	loff_t f_size = i_size_read(inode);
924 	unsigned long long cur_blocks =
925 				atomic64_read(&sbi->allocated_data_blocks);
926 	struct extent_info tei = *ei;	/* only fofs and len are valid */
927 
928 	/*
929 	 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
930 	 * file block even in seq write. So don't record age for newly last file
931 	 * block here.
932 	 */
933 	if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
934 			blkaddr == NEW_ADDR)
935 		return -EINVAL;
936 
937 	if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
938 		unsigned long long cur_age;
939 
940 		if (cur_blocks >= tei.last_blocks)
941 			cur_age = cur_blocks - tei.last_blocks;
942 		else
943 			/* allocated_data_blocks overflow */
944 			cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
945 
946 		if (tei.age)
947 			ei->age = __calculate_block_age(sbi, cur_age, tei.age);
948 		else
949 			ei->age = cur_age;
950 		ei->last_blocks = cur_blocks;
951 		WARN_ON(ei->age > cur_blocks);
952 		return 0;
953 	}
954 
955 	f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
956 
957 	/* the data block was allocated for the first time */
958 	if (blkaddr == NEW_ADDR)
959 		goto out;
960 
961 	if (__is_valid_data_blkaddr(blkaddr) &&
962 	    !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
963 		f2fs_bug_on(sbi, 1);
964 		return -EINVAL;
965 	}
966 out:
967 	/*
968 	 * init block age with zero, this can happen when the block age extent
969 	 * was reclaimed due to memory constraint or system reboot
970 	 */
971 	ei->age = 0;
972 	ei->last_blocks = cur_blocks;
973 	return 0;
974 }
975 
976 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
977 {
978 	struct extent_info ei = {};
979 
980 	if (!__may_extent_tree(dn->inode, type))
981 		return;
982 
983 	ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
984 								dn->ofs_in_node;
985 	ei.len = 1;
986 
987 	if (type == EX_READ) {
988 		if (dn->data_blkaddr == NEW_ADDR)
989 			ei.blk = NULL_ADDR;
990 		else
991 			ei.blk = dn->data_blkaddr;
992 	} else if (type == EX_BLOCK_AGE) {
993 		if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
994 			return;
995 	}
996 	__update_extent_tree_range(dn->inode, &ei, type);
997 }
998 
999 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
1000 					enum extent_type type)
1001 {
1002 	struct extent_tree_info *eti = &sbi->extent_tree[type];
1003 	struct extent_tree *et, *next;
1004 	struct extent_node *en;
1005 	unsigned int node_cnt = 0, tree_cnt = 0;
1006 	int remained;
1007 
1008 	if (!atomic_read(&eti->total_zombie_tree))
1009 		goto free_node;
1010 
1011 	if (!mutex_trylock(&eti->extent_tree_lock))
1012 		goto out;
1013 
1014 	/* 1. remove unreferenced extent tree */
1015 	list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
1016 		if (atomic_read(&et->node_cnt)) {
1017 			write_lock(&et->lock);
1018 			node_cnt += __free_extent_tree(sbi, et);
1019 			write_unlock(&et->lock);
1020 		}
1021 		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1022 		list_del_init(&et->list);
1023 		radix_tree_delete(&eti->extent_tree_root, et->ino);
1024 		kmem_cache_free(extent_tree_slab, et);
1025 		atomic_dec(&eti->total_ext_tree);
1026 		atomic_dec(&eti->total_zombie_tree);
1027 		tree_cnt++;
1028 
1029 		if (node_cnt + tree_cnt >= nr_shrink)
1030 			goto unlock_out;
1031 		cond_resched();
1032 	}
1033 	mutex_unlock(&eti->extent_tree_lock);
1034 
1035 free_node:
1036 	/* 2. remove LRU extent entries */
1037 	if (!mutex_trylock(&eti->extent_tree_lock))
1038 		goto out;
1039 
1040 	remained = nr_shrink - (node_cnt + tree_cnt);
1041 
1042 	spin_lock(&eti->extent_lock);
1043 	for (; remained > 0; remained--) {
1044 		if (list_empty(&eti->extent_list))
1045 			break;
1046 		en = list_first_entry(&eti->extent_list,
1047 					struct extent_node, list);
1048 		et = en->et;
1049 		if (!write_trylock(&et->lock)) {
1050 			/* refresh this extent node's position in extent list */
1051 			list_move_tail(&en->list, &eti->extent_list);
1052 			continue;
1053 		}
1054 
1055 		list_del_init(&en->list);
1056 		spin_unlock(&eti->extent_lock);
1057 
1058 		__detach_extent_node(sbi, et, en);
1059 
1060 		write_unlock(&et->lock);
1061 		node_cnt++;
1062 		spin_lock(&eti->extent_lock);
1063 	}
1064 	spin_unlock(&eti->extent_lock);
1065 
1066 unlock_out:
1067 	mutex_unlock(&eti->extent_tree_lock);
1068 out:
1069 	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
1070 
1071 	return node_cnt + tree_cnt;
1072 }
1073 
1074 /* read extent cache operations */
1075 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
1076 				struct extent_info *ei)
1077 {
1078 	if (!__may_extent_tree(inode, EX_READ))
1079 		return false;
1080 
1081 	return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
1082 }
1083 
1084 bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
1085 				block_t *blkaddr)
1086 {
1087 	struct extent_info ei = {};
1088 
1089 	if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
1090 		return false;
1091 	*blkaddr = ei.blk + index - ei.fofs;
1092 	return true;
1093 }
1094 
1095 void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
1096 {
1097 	return __update_extent_cache(dn, EX_READ);
1098 }
1099 
1100 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
1101 				pgoff_t fofs, block_t blkaddr, unsigned int len)
1102 {
1103 	struct extent_info ei = {
1104 		.fofs = fofs,
1105 		.len = len,
1106 		.blk = blkaddr,
1107 	};
1108 
1109 	if (!__may_extent_tree(dn->inode, EX_READ))
1110 		return;
1111 
1112 	__update_extent_tree_range(dn->inode, &ei, EX_READ);
1113 }
1114 
1115 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1116 {
1117 	if (!test_opt(sbi, READ_EXTENT_CACHE))
1118 		return 0;
1119 
1120 	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1121 }
1122 
1123 /* block age extent cache operations */
1124 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1125 				struct extent_info *ei)
1126 {
1127 	if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1128 		return false;
1129 
1130 	return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1131 }
1132 
1133 void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1134 {
1135 	return __update_extent_cache(dn, EX_BLOCK_AGE);
1136 }
1137 
1138 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1139 				pgoff_t fofs, unsigned int len)
1140 {
1141 	struct extent_info ei = {
1142 		.fofs = fofs,
1143 		.len = len,
1144 	};
1145 
1146 	if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1147 		return;
1148 
1149 	__update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1150 }
1151 
1152 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1153 {
1154 	if (!test_opt(sbi, AGE_EXTENT_CACHE))
1155 		return 0;
1156 
1157 	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1158 }
1159 
1160 static unsigned int __destroy_extent_node(struct inode *inode,
1161 					enum extent_type type)
1162 {
1163 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1164 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1165 	unsigned int node_cnt = 0;
1166 
1167 	if (!et || !atomic_read(&et->node_cnt))
1168 		return 0;
1169 
1170 	write_lock(&et->lock);
1171 	node_cnt = __free_extent_tree(sbi, et);
1172 	write_unlock(&et->lock);
1173 
1174 	return node_cnt;
1175 }
1176 
1177 void f2fs_destroy_extent_node(struct inode *inode)
1178 {
1179 	__destroy_extent_node(inode, EX_READ);
1180 	__destroy_extent_node(inode, EX_BLOCK_AGE);
1181 }
1182 
1183 static void __drop_extent_tree(struct inode *inode, enum extent_type type)
1184 {
1185 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1186 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1187 	bool updated = false;
1188 
1189 	if (!__may_extent_tree(inode, type))
1190 		return;
1191 
1192 	write_lock(&et->lock);
1193 	__free_extent_tree(sbi, et);
1194 	if (type == EX_READ) {
1195 		set_inode_flag(inode, FI_NO_EXTENT);
1196 		if (et->largest.len) {
1197 			et->largest.len = 0;
1198 			updated = true;
1199 		}
1200 	}
1201 	write_unlock(&et->lock);
1202 	if (updated)
1203 		f2fs_mark_inode_dirty_sync(inode, true);
1204 }
1205 
1206 void f2fs_drop_extent_tree(struct inode *inode)
1207 {
1208 	__drop_extent_tree(inode, EX_READ);
1209 	__drop_extent_tree(inode, EX_BLOCK_AGE);
1210 }
1211 
1212 static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
1213 {
1214 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1215 	struct extent_tree_info *eti = &sbi->extent_tree[type];
1216 	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1217 	unsigned int node_cnt = 0;
1218 
1219 	if (!et)
1220 		return;
1221 
1222 	if (inode->i_nlink && !is_bad_inode(inode) &&
1223 					atomic_read(&et->node_cnt)) {
1224 		mutex_lock(&eti->extent_tree_lock);
1225 		list_add_tail(&et->list, &eti->zombie_list);
1226 		atomic_inc(&eti->total_zombie_tree);
1227 		mutex_unlock(&eti->extent_tree_lock);
1228 		return;
1229 	}
1230 
1231 	/* free all extent info belong to this extent tree */
1232 	node_cnt = __destroy_extent_node(inode, type);
1233 
1234 	/* delete extent tree entry in radix tree */
1235 	mutex_lock(&eti->extent_tree_lock);
1236 	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1237 	radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
1238 	kmem_cache_free(extent_tree_slab, et);
1239 	atomic_dec(&eti->total_ext_tree);
1240 	mutex_unlock(&eti->extent_tree_lock);
1241 
1242 	F2FS_I(inode)->extent_tree[type] = NULL;
1243 
1244 	trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
1245 }
1246 
1247 void f2fs_destroy_extent_tree(struct inode *inode)
1248 {
1249 	__destroy_extent_tree(inode, EX_READ);
1250 	__destroy_extent_tree(inode, EX_BLOCK_AGE);
1251 }
1252 
1253 static void __init_extent_tree_info(struct extent_tree_info *eti)
1254 {
1255 	INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1256 	mutex_init(&eti->extent_tree_lock);
1257 	INIT_LIST_HEAD(&eti->extent_list);
1258 	spin_lock_init(&eti->extent_lock);
1259 	atomic_set(&eti->total_ext_tree, 0);
1260 	INIT_LIST_HEAD(&eti->zombie_list);
1261 	atomic_set(&eti->total_zombie_tree, 0);
1262 	atomic_set(&eti->total_ext_node, 0);
1263 }
1264 
1265 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
1266 {
1267 	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1268 	__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1269 
1270 	/* initialize for block age extents */
1271 	atomic64_set(&sbi->allocated_data_blocks, 0);
1272 	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1273 	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1274 	sbi->last_age_weight = LAST_AGE_WEIGHT;
1275 }
1276 
1277 int __init f2fs_create_extent_cache(void)
1278 {
1279 	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1280 			sizeof(struct extent_tree));
1281 	if (!extent_tree_slab)
1282 		return -ENOMEM;
1283 	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1284 			sizeof(struct extent_node));
1285 	if (!extent_node_slab) {
1286 		kmem_cache_destroy(extent_tree_slab);
1287 		return -ENOMEM;
1288 	}
1289 	return 0;
1290 }
1291 
1292 void f2fs_destroy_extent_cache(void)
1293 {
1294 	kmem_cache_destroy(extent_node_slab);
1295 	kmem_cache_destroy(extent_tree_slab);
1296 }
1297