xref: /openbmc/linux/fs/f2fs/node.c (revision 94c7b6fc)
1 /*
2  * fs/f2fs/node.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/mpage.h>
14 #include <linux/backing-dev.h>
15 #include <linux/blkdev.h>
16 #include <linux/pagevec.h>
17 #include <linux/swap.h>
18 
19 #include "f2fs.h"
20 #include "node.h"
21 #include "segment.h"
22 #include <trace/events/f2fs.h>
23 
24 #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
25 
26 static struct kmem_cache *nat_entry_slab;
27 static struct kmem_cache *free_nid_slab;
28 
29 bool available_free_memory(struct f2fs_sb_info *sbi, int type)
30 {
31 	struct f2fs_nm_info *nm_i = NM_I(sbi);
32 	struct sysinfo val;
33 	unsigned long mem_size = 0;
34 	bool res = false;
35 
36 	si_meminfo(&val);
37 	/* give 25%, 25%, 50% memory for each components respectively */
38 	if (type == FREE_NIDS) {
39 		mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
40 		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
41 	} else if (type == NAT_ENTRIES) {
42 		mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
43 		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
44 	} else if (type == DIRTY_DENTS) {
45 		if (sbi->sb->s_bdi->dirty_exceeded)
46 			return false;
47 		mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
48 		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
49 	}
50 	return res;
51 }
52 
53 static void clear_node_page_dirty(struct page *page)
54 {
55 	struct address_space *mapping = page->mapping;
56 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
57 	unsigned int long flags;
58 
59 	if (PageDirty(page)) {
60 		spin_lock_irqsave(&mapping->tree_lock, flags);
61 		radix_tree_tag_clear(&mapping->page_tree,
62 				page_index(page),
63 				PAGECACHE_TAG_DIRTY);
64 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
65 
66 		clear_page_dirty_for_io(page);
67 		dec_page_count(sbi, F2FS_DIRTY_NODES);
68 	}
69 	ClearPageUptodate(page);
70 }
71 
72 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
73 {
74 	pgoff_t index = current_nat_addr(sbi, nid);
75 	return get_meta_page(sbi, index);
76 }
77 
78 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
79 {
80 	struct page *src_page;
81 	struct page *dst_page;
82 	pgoff_t src_off;
83 	pgoff_t dst_off;
84 	void *src_addr;
85 	void *dst_addr;
86 	struct f2fs_nm_info *nm_i = NM_I(sbi);
87 
88 	src_off = current_nat_addr(sbi, nid);
89 	dst_off = next_nat_addr(sbi, src_off);
90 
91 	/* get current nat block page with lock */
92 	src_page = get_meta_page(sbi, src_off);
93 
94 	/* Dirty src_page means that it is already the new target NAT page. */
95 	if (PageDirty(src_page))
96 		return src_page;
97 
98 	dst_page = grab_meta_page(sbi, dst_off);
99 
100 	src_addr = page_address(src_page);
101 	dst_addr = page_address(dst_page);
102 	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
103 	set_page_dirty(dst_page);
104 	f2fs_put_page(src_page, 1);
105 
106 	set_to_next_nat(nm_i, nid);
107 
108 	return dst_page;
109 }
110 
111 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
112 {
113 	return radix_tree_lookup(&nm_i->nat_root, n);
114 }
115 
116 static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
117 		nid_t start, unsigned int nr, struct nat_entry **ep)
118 {
119 	return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
120 }
121 
122 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
123 {
124 	list_del(&e->list);
125 	radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
126 	nm_i->nat_cnt--;
127 	kmem_cache_free(nat_entry_slab, e);
128 }
129 
130 int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
131 {
132 	struct f2fs_nm_info *nm_i = NM_I(sbi);
133 	struct nat_entry *e;
134 	int is_cp = 1;
135 
136 	read_lock(&nm_i->nat_tree_lock);
137 	e = __lookup_nat_cache(nm_i, nid);
138 	if (e && !e->checkpointed)
139 		is_cp = 0;
140 	read_unlock(&nm_i->nat_tree_lock);
141 	return is_cp;
142 }
143 
144 bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
145 {
146 	struct f2fs_nm_info *nm_i = NM_I(sbi);
147 	struct nat_entry *e;
148 	bool fsync_done = false;
149 
150 	read_lock(&nm_i->nat_tree_lock);
151 	e = __lookup_nat_cache(nm_i, nid);
152 	if (e)
153 		fsync_done = e->fsync_done;
154 	read_unlock(&nm_i->nat_tree_lock);
155 	return fsync_done;
156 }
157 
158 void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
159 {
160 	struct f2fs_nm_info *nm_i = NM_I(sbi);
161 	struct nat_entry *e;
162 
163 	write_lock(&nm_i->nat_tree_lock);
164 	e = __lookup_nat_cache(nm_i, nid);
165 	if (e)
166 		e->fsync_done = false;
167 	write_unlock(&nm_i->nat_tree_lock);
168 }
169 
170 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
171 {
172 	struct nat_entry *new;
173 
174 	new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
175 	if (!new)
176 		return NULL;
177 	if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
178 		kmem_cache_free(nat_entry_slab, new);
179 		return NULL;
180 	}
181 	memset(new, 0, sizeof(struct nat_entry));
182 	nat_set_nid(new, nid);
183 	new->checkpointed = true;
184 	list_add_tail(&new->list, &nm_i->nat_entries);
185 	nm_i->nat_cnt++;
186 	return new;
187 }
188 
189 static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
190 						struct f2fs_nat_entry *ne)
191 {
192 	struct nat_entry *e;
193 retry:
194 	write_lock(&nm_i->nat_tree_lock);
195 	e = __lookup_nat_cache(nm_i, nid);
196 	if (!e) {
197 		e = grab_nat_entry(nm_i, nid);
198 		if (!e) {
199 			write_unlock(&nm_i->nat_tree_lock);
200 			goto retry;
201 		}
202 		node_info_from_raw_nat(&e->ni, ne);
203 	}
204 	write_unlock(&nm_i->nat_tree_lock);
205 }
206 
207 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
208 			block_t new_blkaddr, bool fsync_done)
209 {
210 	struct f2fs_nm_info *nm_i = NM_I(sbi);
211 	struct nat_entry *e;
212 retry:
213 	write_lock(&nm_i->nat_tree_lock);
214 	e = __lookup_nat_cache(nm_i, ni->nid);
215 	if (!e) {
216 		e = grab_nat_entry(nm_i, ni->nid);
217 		if (!e) {
218 			write_unlock(&nm_i->nat_tree_lock);
219 			goto retry;
220 		}
221 		e->ni = *ni;
222 		f2fs_bug_on(ni->blk_addr == NEW_ADDR);
223 	} else if (new_blkaddr == NEW_ADDR) {
224 		/*
225 		 * when nid is reallocated,
226 		 * previous nat entry can be remained in nat cache.
227 		 * So, reinitialize it with new information.
228 		 */
229 		e->ni = *ni;
230 		f2fs_bug_on(ni->blk_addr != NULL_ADDR);
231 	}
232 
233 	/* sanity check */
234 	f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
235 	f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
236 			new_blkaddr == NULL_ADDR);
237 	f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR &&
238 			new_blkaddr == NEW_ADDR);
239 	f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR &&
240 			nat_get_blkaddr(e) != NULL_ADDR &&
241 			new_blkaddr == NEW_ADDR);
242 
243 	/* increament version no as node is removed */
244 	if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
245 		unsigned char version = nat_get_version(e);
246 		nat_set_version(e, inc_node_version(version));
247 	}
248 
249 	/* change address */
250 	nat_set_blkaddr(e, new_blkaddr);
251 	__set_nat_cache_dirty(nm_i, e);
252 
253 	/* update fsync_mark if its inode nat entry is still alive */
254 	e = __lookup_nat_cache(nm_i, ni->ino);
255 	if (e)
256 		e->fsync_done = fsync_done;
257 	write_unlock(&nm_i->nat_tree_lock);
258 }
259 
260 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
261 {
262 	struct f2fs_nm_info *nm_i = NM_I(sbi);
263 
264 	if (available_free_memory(sbi, NAT_ENTRIES))
265 		return 0;
266 
267 	write_lock(&nm_i->nat_tree_lock);
268 	while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
269 		struct nat_entry *ne;
270 		ne = list_first_entry(&nm_i->nat_entries,
271 					struct nat_entry, list);
272 		__del_from_nat_cache(nm_i, ne);
273 		nr_shrink--;
274 	}
275 	write_unlock(&nm_i->nat_tree_lock);
276 	return nr_shrink;
277 }
278 
279 /*
280  * This function returns always success
281  */
282 void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
283 {
284 	struct f2fs_nm_info *nm_i = NM_I(sbi);
285 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
286 	struct f2fs_summary_block *sum = curseg->sum_blk;
287 	nid_t start_nid = START_NID(nid);
288 	struct f2fs_nat_block *nat_blk;
289 	struct page *page = NULL;
290 	struct f2fs_nat_entry ne;
291 	struct nat_entry *e;
292 	int i;
293 
294 	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
295 	ni->nid = nid;
296 
297 	/* Check nat cache */
298 	read_lock(&nm_i->nat_tree_lock);
299 	e = __lookup_nat_cache(nm_i, nid);
300 	if (e) {
301 		ni->ino = nat_get_ino(e);
302 		ni->blk_addr = nat_get_blkaddr(e);
303 		ni->version = nat_get_version(e);
304 	}
305 	read_unlock(&nm_i->nat_tree_lock);
306 	if (e)
307 		return;
308 
309 	/* Check current segment summary */
310 	mutex_lock(&curseg->curseg_mutex);
311 	i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
312 	if (i >= 0) {
313 		ne = nat_in_journal(sum, i);
314 		node_info_from_raw_nat(ni, &ne);
315 	}
316 	mutex_unlock(&curseg->curseg_mutex);
317 	if (i >= 0)
318 		goto cache;
319 
320 	/* Fill node_info from nat page */
321 	page = get_current_nat_page(sbi, start_nid);
322 	nat_blk = (struct f2fs_nat_block *)page_address(page);
323 	ne = nat_blk->entries[nid - start_nid];
324 	node_info_from_raw_nat(ni, &ne);
325 	f2fs_put_page(page, 1);
326 cache:
327 	/* cache nat entry */
328 	cache_nat_entry(NM_I(sbi), nid, &ne);
329 }
330 
331 /*
332  * The maximum depth is four.
333  * Offset[0] will have raw inode offset.
334  */
335 static int get_node_path(struct f2fs_inode_info *fi, long block,
336 				int offset[4], unsigned int noffset[4])
337 {
338 	const long direct_index = ADDRS_PER_INODE(fi);
339 	const long direct_blks = ADDRS_PER_BLOCK;
340 	const long dptrs_per_blk = NIDS_PER_BLOCK;
341 	const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
342 	const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
343 	int n = 0;
344 	int level = 0;
345 
346 	noffset[0] = 0;
347 
348 	if (block < direct_index) {
349 		offset[n] = block;
350 		goto got;
351 	}
352 	block -= direct_index;
353 	if (block < direct_blks) {
354 		offset[n++] = NODE_DIR1_BLOCK;
355 		noffset[n] = 1;
356 		offset[n] = block;
357 		level = 1;
358 		goto got;
359 	}
360 	block -= direct_blks;
361 	if (block < direct_blks) {
362 		offset[n++] = NODE_DIR2_BLOCK;
363 		noffset[n] = 2;
364 		offset[n] = block;
365 		level = 1;
366 		goto got;
367 	}
368 	block -= direct_blks;
369 	if (block < indirect_blks) {
370 		offset[n++] = NODE_IND1_BLOCK;
371 		noffset[n] = 3;
372 		offset[n++] = block / direct_blks;
373 		noffset[n] = 4 + offset[n - 1];
374 		offset[n] = block % direct_blks;
375 		level = 2;
376 		goto got;
377 	}
378 	block -= indirect_blks;
379 	if (block < indirect_blks) {
380 		offset[n++] = NODE_IND2_BLOCK;
381 		noffset[n] = 4 + dptrs_per_blk;
382 		offset[n++] = block / direct_blks;
383 		noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
384 		offset[n] = block % direct_blks;
385 		level = 2;
386 		goto got;
387 	}
388 	block -= indirect_blks;
389 	if (block < dindirect_blks) {
390 		offset[n++] = NODE_DIND_BLOCK;
391 		noffset[n] = 5 + (dptrs_per_blk * 2);
392 		offset[n++] = block / indirect_blks;
393 		noffset[n] = 6 + (dptrs_per_blk * 2) +
394 			      offset[n - 1] * (dptrs_per_blk + 1);
395 		offset[n++] = (block / direct_blks) % dptrs_per_blk;
396 		noffset[n] = 7 + (dptrs_per_blk * 2) +
397 			      offset[n - 2] * (dptrs_per_blk + 1) +
398 			      offset[n - 1];
399 		offset[n] = block % direct_blks;
400 		level = 3;
401 		goto got;
402 	} else {
403 		BUG();
404 	}
405 got:
406 	return level;
407 }
408 
409 /*
410  * Caller should call f2fs_put_dnode(dn).
411  * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
412  * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
413  * In the case of RDONLY_NODE, we don't need to care about mutex.
414  */
415 int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
416 {
417 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
418 	struct page *npage[4];
419 	struct page *parent;
420 	int offset[4];
421 	unsigned int noffset[4];
422 	nid_t nids[4];
423 	int level, i;
424 	int err = 0;
425 
426 	level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
427 
428 	nids[0] = dn->inode->i_ino;
429 	npage[0] = dn->inode_page;
430 
431 	if (!npage[0]) {
432 		npage[0] = get_node_page(sbi, nids[0]);
433 		if (IS_ERR(npage[0]))
434 			return PTR_ERR(npage[0]);
435 	}
436 	parent = npage[0];
437 	if (level != 0)
438 		nids[1] = get_nid(parent, offset[0], true);
439 	dn->inode_page = npage[0];
440 	dn->inode_page_locked = true;
441 
442 	/* get indirect or direct nodes */
443 	for (i = 1; i <= level; i++) {
444 		bool done = false;
445 
446 		if (!nids[i] && mode == ALLOC_NODE) {
447 			/* alloc new node */
448 			if (!alloc_nid(sbi, &(nids[i]))) {
449 				err = -ENOSPC;
450 				goto release_pages;
451 			}
452 
453 			dn->nid = nids[i];
454 			npage[i] = new_node_page(dn, noffset[i], NULL);
455 			if (IS_ERR(npage[i])) {
456 				alloc_nid_failed(sbi, nids[i]);
457 				err = PTR_ERR(npage[i]);
458 				goto release_pages;
459 			}
460 
461 			set_nid(parent, offset[i - 1], nids[i], i == 1);
462 			alloc_nid_done(sbi, nids[i]);
463 			done = true;
464 		} else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
465 			npage[i] = get_node_page_ra(parent, offset[i - 1]);
466 			if (IS_ERR(npage[i])) {
467 				err = PTR_ERR(npage[i]);
468 				goto release_pages;
469 			}
470 			done = true;
471 		}
472 		if (i == 1) {
473 			dn->inode_page_locked = false;
474 			unlock_page(parent);
475 		} else {
476 			f2fs_put_page(parent, 1);
477 		}
478 
479 		if (!done) {
480 			npage[i] = get_node_page(sbi, nids[i]);
481 			if (IS_ERR(npage[i])) {
482 				err = PTR_ERR(npage[i]);
483 				f2fs_put_page(npage[0], 0);
484 				goto release_out;
485 			}
486 		}
487 		if (i < level) {
488 			parent = npage[i];
489 			nids[i + 1] = get_nid(parent, offset[i], false);
490 		}
491 	}
492 	dn->nid = nids[level];
493 	dn->ofs_in_node = offset[level];
494 	dn->node_page = npage[level];
495 	dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
496 	return 0;
497 
498 release_pages:
499 	f2fs_put_page(parent, 1);
500 	if (i > 1)
501 		f2fs_put_page(npage[0], 0);
502 release_out:
503 	dn->inode_page = NULL;
504 	dn->node_page = NULL;
505 	return err;
506 }
507 
508 static void truncate_node(struct dnode_of_data *dn)
509 {
510 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
511 	struct node_info ni;
512 
513 	get_node_info(sbi, dn->nid, &ni);
514 	if (dn->inode->i_blocks == 0) {
515 		f2fs_bug_on(ni.blk_addr != NULL_ADDR);
516 		goto invalidate;
517 	}
518 	f2fs_bug_on(ni.blk_addr == NULL_ADDR);
519 
520 	/* Deallocate node address */
521 	invalidate_blocks(sbi, ni.blk_addr);
522 	dec_valid_node_count(sbi, dn->inode);
523 	set_node_addr(sbi, &ni, NULL_ADDR, false);
524 
525 	if (dn->nid == dn->inode->i_ino) {
526 		remove_orphan_inode(sbi, dn->nid);
527 		dec_valid_inode_count(sbi);
528 	} else {
529 		sync_inode_page(dn);
530 	}
531 invalidate:
532 	clear_node_page_dirty(dn->node_page);
533 	F2FS_SET_SB_DIRT(sbi);
534 
535 	f2fs_put_page(dn->node_page, 1);
536 
537 	invalidate_mapping_pages(NODE_MAPPING(sbi),
538 			dn->node_page->index, dn->node_page->index);
539 
540 	dn->node_page = NULL;
541 	trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
542 }
543 
544 static int truncate_dnode(struct dnode_of_data *dn)
545 {
546 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
547 	struct page *page;
548 
549 	if (dn->nid == 0)
550 		return 1;
551 
552 	/* get direct node */
553 	page = get_node_page(sbi, dn->nid);
554 	if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
555 		return 1;
556 	else if (IS_ERR(page))
557 		return PTR_ERR(page);
558 
559 	/* Make dnode_of_data for parameter */
560 	dn->node_page = page;
561 	dn->ofs_in_node = 0;
562 	truncate_data_blocks(dn);
563 	truncate_node(dn);
564 	return 1;
565 }
566 
567 static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
568 						int ofs, int depth)
569 {
570 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
571 	struct dnode_of_data rdn = *dn;
572 	struct page *page;
573 	struct f2fs_node *rn;
574 	nid_t child_nid;
575 	unsigned int child_nofs;
576 	int freed = 0;
577 	int i, ret;
578 
579 	if (dn->nid == 0)
580 		return NIDS_PER_BLOCK + 1;
581 
582 	trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
583 
584 	page = get_node_page(sbi, dn->nid);
585 	if (IS_ERR(page)) {
586 		trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
587 		return PTR_ERR(page);
588 	}
589 
590 	rn = F2FS_NODE(page);
591 	if (depth < 3) {
592 		for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
593 			child_nid = le32_to_cpu(rn->in.nid[i]);
594 			if (child_nid == 0)
595 				continue;
596 			rdn.nid = child_nid;
597 			ret = truncate_dnode(&rdn);
598 			if (ret < 0)
599 				goto out_err;
600 			set_nid(page, i, 0, false);
601 		}
602 	} else {
603 		child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
604 		for (i = ofs; i < NIDS_PER_BLOCK; i++) {
605 			child_nid = le32_to_cpu(rn->in.nid[i]);
606 			if (child_nid == 0) {
607 				child_nofs += NIDS_PER_BLOCK + 1;
608 				continue;
609 			}
610 			rdn.nid = child_nid;
611 			ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
612 			if (ret == (NIDS_PER_BLOCK + 1)) {
613 				set_nid(page, i, 0, false);
614 				child_nofs += ret;
615 			} else if (ret < 0 && ret != -ENOENT) {
616 				goto out_err;
617 			}
618 		}
619 		freed = child_nofs;
620 	}
621 
622 	if (!ofs) {
623 		/* remove current indirect node */
624 		dn->node_page = page;
625 		truncate_node(dn);
626 		freed++;
627 	} else {
628 		f2fs_put_page(page, 1);
629 	}
630 	trace_f2fs_truncate_nodes_exit(dn->inode, freed);
631 	return freed;
632 
633 out_err:
634 	f2fs_put_page(page, 1);
635 	trace_f2fs_truncate_nodes_exit(dn->inode, ret);
636 	return ret;
637 }
638 
639 static int truncate_partial_nodes(struct dnode_of_data *dn,
640 			struct f2fs_inode *ri, int *offset, int depth)
641 {
642 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
643 	struct page *pages[2];
644 	nid_t nid[3];
645 	nid_t child_nid;
646 	int err = 0;
647 	int i;
648 	int idx = depth - 2;
649 
650 	nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
651 	if (!nid[0])
652 		return 0;
653 
654 	/* get indirect nodes in the path */
655 	for (i = 0; i < idx + 1; i++) {
656 		/* refernece count'll be increased */
657 		pages[i] = get_node_page(sbi, nid[i]);
658 		if (IS_ERR(pages[i])) {
659 			err = PTR_ERR(pages[i]);
660 			idx = i - 1;
661 			goto fail;
662 		}
663 		nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
664 	}
665 
666 	/* free direct nodes linked to a partial indirect node */
667 	for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
668 		child_nid = get_nid(pages[idx], i, false);
669 		if (!child_nid)
670 			continue;
671 		dn->nid = child_nid;
672 		err = truncate_dnode(dn);
673 		if (err < 0)
674 			goto fail;
675 		set_nid(pages[idx], i, 0, false);
676 	}
677 
678 	if (offset[idx + 1] == 0) {
679 		dn->node_page = pages[idx];
680 		dn->nid = nid[idx];
681 		truncate_node(dn);
682 	} else {
683 		f2fs_put_page(pages[idx], 1);
684 	}
685 	offset[idx]++;
686 	offset[idx + 1] = 0;
687 	idx--;
688 fail:
689 	for (i = idx; i >= 0; i--)
690 		f2fs_put_page(pages[i], 1);
691 
692 	trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
693 
694 	return err;
695 }
696 
697 /*
698  * All the block addresses of data and nodes should be nullified.
699  */
700 int truncate_inode_blocks(struct inode *inode, pgoff_t from)
701 {
702 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
703 	int err = 0, cont = 1;
704 	int level, offset[4], noffset[4];
705 	unsigned int nofs = 0;
706 	struct f2fs_inode *ri;
707 	struct dnode_of_data dn;
708 	struct page *page;
709 
710 	trace_f2fs_truncate_inode_blocks_enter(inode, from);
711 
712 	level = get_node_path(F2FS_I(inode), from, offset, noffset);
713 restart:
714 	page = get_node_page(sbi, inode->i_ino);
715 	if (IS_ERR(page)) {
716 		trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
717 		return PTR_ERR(page);
718 	}
719 
720 	set_new_dnode(&dn, inode, page, NULL, 0);
721 	unlock_page(page);
722 
723 	ri = F2FS_INODE(page);
724 	switch (level) {
725 	case 0:
726 	case 1:
727 		nofs = noffset[1];
728 		break;
729 	case 2:
730 		nofs = noffset[1];
731 		if (!offset[level - 1])
732 			goto skip_partial;
733 		err = truncate_partial_nodes(&dn, ri, offset, level);
734 		if (err < 0 && err != -ENOENT)
735 			goto fail;
736 		nofs += 1 + NIDS_PER_BLOCK;
737 		break;
738 	case 3:
739 		nofs = 5 + 2 * NIDS_PER_BLOCK;
740 		if (!offset[level - 1])
741 			goto skip_partial;
742 		err = truncate_partial_nodes(&dn, ri, offset, level);
743 		if (err < 0 && err != -ENOENT)
744 			goto fail;
745 		break;
746 	default:
747 		BUG();
748 	}
749 
750 skip_partial:
751 	while (cont) {
752 		dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
753 		switch (offset[0]) {
754 		case NODE_DIR1_BLOCK:
755 		case NODE_DIR2_BLOCK:
756 			err = truncate_dnode(&dn);
757 			break;
758 
759 		case NODE_IND1_BLOCK:
760 		case NODE_IND2_BLOCK:
761 			err = truncate_nodes(&dn, nofs, offset[1], 2);
762 			break;
763 
764 		case NODE_DIND_BLOCK:
765 			err = truncate_nodes(&dn, nofs, offset[1], 3);
766 			cont = 0;
767 			break;
768 
769 		default:
770 			BUG();
771 		}
772 		if (err < 0 && err != -ENOENT)
773 			goto fail;
774 		if (offset[1] == 0 &&
775 				ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
776 			lock_page(page);
777 			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
778 				f2fs_put_page(page, 1);
779 				goto restart;
780 			}
781 			f2fs_wait_on_page_writeback(page, NODE);
782 			ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
783 			set_page_dirty(page);
784 			unlock_page(page);
785 		}
786 		offset[1] = 0;
787 		offset[0]++;
788 		nofs += err;
789 	}
790 fail:
791 	f2fs_put_page(page, 0);
792 	trace_f2fs_truncate_inode_blocks_exit(inode, err);
793 	return err > 0 ? 0 : err;
794 }
795 
796 int truncate_xattr_node(struct inode *inode, struct page *page)
797 {
798 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
799 	nid_t nid = F2FS_I(inode)->i_xattr_nid;
800 	struct dnode_of_data dn;
801 	struct page *npage;
802 
803 	if (!nid)
804 		return 0;
805 
806 	npage = get_node_page(sbi, nid);
807 	if (IS_ERR(npage))
808 		return PTR_ERR(npage);
809 
810 	F2FS_I(inode)->i_xattr_nid = 0;
811 
812 	/* need to do checkpoint during fsync */
813 	F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
814 
815 	set_new_dnode(&dn, inode, page, npage, nid);
816 
817 	if (page)
818 		dn.inode_page_locked = true;
819 	truncate_node(&dn);
820 	return 0;
821 }
822 
823 /*
824  * Caller should grab and release a rwsem by calling f2fs_lock_op() and
825  * f2fs_unlock_op().
826  */
827 void remove_inode_page(struct inode *inode)
828 {
829 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
830 	struct page *page;
831 	nid_t ino = inode->i_ino;
832 	struct dnode_of_data dn;
833 
834 	page = get_node_page(sbi, ino);
835 	if (IS_ERR(page))
836 		return;
837 
838 	if (truncate_xattr_node(inode, page)) {
839 		f2fs_put_page(page, 1);
840 		return;
841 	}
842 	/* 0 is possible, after f2fs_new_inode() is failed */
843 	f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
844 	set_new_dnode(&dn, inode, page, page, ino);
845 	truncate_node(&dn);
846 }
847 
848 struct page *new_inode_page(struct inode *inode, const struct qstr *name)
849 {
850 	struct dnode_of_data dn;
851 
852 	/* allocate inode page for new inode */
853 	set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
854 
855 	/* caller should f2fs_put_page(page, 1); */
856 	return new_node_page(&dn, 0, NULL);
857 }
858 
859 struct page *new_node_page(struct dnode_of_data *dn,
860 				unsigned int ofs, struct page *ipage)
861 {
862 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
863 	struct node_info old_ni, new_ni;
864 	struct page *page;
865 	int err;
866 
867 	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
868 		return ERR_PTR(-EPERM);
869 
870 	page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
871 	if (!page)
872 		return ERR_PTR(-ENOMEM);
873 
874 	if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
875 		err = -ENOSPC;
876 		goto fail;
877 	}
878 
879 	get_node_info(sbi, dn->nid, &old_ni);
880 
881 	/* Reinitialize old_ni with new node page */
882 	f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
883 	new_ni = old_ni;
884 	new_ni.ino = dn->inode->i_ino;
885 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
886 
887 	f2fs_wait_on_page_writeback(page, NODE);
888 	fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
889 	set_cold_node(dn->inode, page);
890 	SetPageUptodate(page);
891 	set_page_dirty(page);
892 
893 	if (f2fs_has_xattr_block(ofs))
894 		F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
895 
896 	dn->node_page = page;
897 	if (ipage)
898 		update_inode(dn->inode, ipage);
899 	else
900 		sync_inode_page(dn);
901 	if (ofs == 0)
902 		inc_valid_inode_count(sbi);
903 
904 	return page;
905 
906 fail:
907 	clear_node_page_dirty(page);
908 	f2fs_put_page(page, 1);
909 	return ERR_PTR(err);
910 }
911 
912 /*
913  * Caller should do after getting the following values.
914  * 0: f2fs_put_page(page, 0)
915  * LOCKED_PAGE: f2fs_put_page(page, 1)
916  * error: nothing
917  */
918 static int read_node_page(struct page *page, int rw)
919 {
920 	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
921 	struct node_info ni;
922 
923 	get_node_info(sbi, page->index, &ni);
924 
925 	if (unlikely(ni.blk_addr == NULL_ADDR)) {
926 		f2fs_put_page(page, 1);
927 		return -ENOENT;
928 	}
929 
930 	if (PageUptodate(page))
931 		return LOCKED_PAGE;
932 
933 	return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw);
934 }
935 
936 /*
937  * Readahead a node page
938  */
939 void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
940 {
941 	struct page *apage;
942 	int err;
943 
944 	apage = find_get_page(NODE_MAPPING(sbi), nid);
945 	if (apage && PageUptodate(apage)) {
946 		f2fs_put_page(apage, 0);
947 		return;
948 	}
949 	f2fs_put_page(apage, 0);
950 
951 	apage = grab_cache_page(NODE_MAPPING(sbi), nid);
952 	if (!apage)
953 		return;
954 
955 	err = read_node_page(apage, READA);
956 	if (err == 0)
957 		f2fs_put_page(apage, 0);
958 	else if (err == LOCKED_PAGE)
959 		f2fs_put_page(apage, 1);
960 }
961 
962 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
963 {
964 	struct page *page;
965 	int err;
966 repeat:
967 	page = grab_cache_page(NODE_MAPPING(sbi), nid);
968 	if (!page)
969 		return ERR_PTR(-ENOMEM);
970 
971 	err = read_node_page(page, READ_SYNC);
972 	if (err < 0)
973 		return ERR_PTR(err);
974 	else if (err == LOCKED_PAGE)
975 		goto got_it;
976 
977 	lock_page(page);
978 	if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
979 		f2fs_put_page(page, 1);
980 		return ERR_PTR(-EIO);
981 	}
982 	if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
983 		f2fs_put_page(page, 1);
984 		goto repeat;
985 	}
986 got_it:
987 	return page;
988 }
989 
990 /*
991  * Return a locked page for the desired node page.
992  * And, readahead MAX_RA_NODE number of node pages.
993  */
994 struct page *get_node_page_ra(struct page *parent, int start)
995 {
996 	struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
997 	struct blk_plug plug;
998 	struct page *page;
999 	int err, i, end;
1000 	nid_t nid;
1001 
1002 	/* First, try getting the desired direct node. */
1003 	nid = get_nid(parent, start, false);
1004 	if (!nid)
1005 		return ERR_PTR(-ENOENT);
1006 repeat:
1007 	page = grab_cache_page(NODE_MAPPING(sbi), nid);
1008 	if (!page)
1009 		return ERR_PTR(-ENOMEM);
1010 
1011 	err = read_node_page(page, READ_SYNC);
1012 	if (err < 0)
1013 		return ERR_PTR(err);
1014 	else if (err == LOCKED_PAGE)
1015 		goto page_hit;
1016 
1017 	blk_start_plug(&plug);
1018 
1019 	/* Then, try readahead for siblings of the desired node */
1020 	end = start + MAX_RA_NODE;
1021 	end = min(end, NIDS_PER_BLOCK);
1022 	for (i = start + 1; i < end; i++) {
1023 		nid = get_nid(parent, i, false);
1024 		if (!nid)
1025 			continue;
1026 		ra_node_page(sbi, nid);
1027 	}
1028 
1029 	blk_finish_plug(&plug);
1030 
1031 	lock_page(page);
1032 	if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1033 		f2fs_put_page(page, 1);
1034 		goto repeat;
1035 	}
1036 page_hit:
1037 	if (unlikely(!PageUptodate(page))) {
1038 		f2fs_put_page(page, 1);
1039 		return ERR_PTR(-EIO);
1040 	}
1041 	return page;
1042 }
1043 
1044 void sync_inode_page(struct dnode_of_data *dn)
1045 {
1046 	if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
1047 		update_inode(dn->inode, dn->node_page);
1048 	} else if (dn->inode_page) {
1049 		if (!dn->inode_page_locked)
1050 			lock_page(dn->inode_page);
1051 		update_inode(dn->inode, dn->inode_page);
1052 		if (!dn->inode_page_locked)
1053 			unlock_page(dn->inode_page);
1054 	} else {
1055 		update_inode_page(dn->inode);
1056 	}
1057 }
1058 
1059 int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
1060 					struct writeback_control *wbc)
1061 {
1062 	pgoff_t index, end;
1063 	struct pagevec pvec;
1064 	int step = ino ? 2 : 0;
1065 	int nwritten = 0, wrote = 0;
1066 
1067 	pagevec_init(&pvec, 0);
1068 
1069 next_step:
1070 	index = 0;
1071 	end = LONG_MAX;
1072 
1073 	while (index <= end) {
1074 		int i, nr_pages;
1075 		nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1076 				PAGECACHE_TAG_DIRTY,
1077 				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1078 		if (nr_pages == 0)
1079 			break;
1080 
1081 		for (i = 0; i < nr_pages; i++) {
1082 			struct page *page = pvec.pages[i];
1083 
1084 			/*
1085 			 * flushing sequence with step:
1086 			 * 0. indirect nodes
1087 			 * 1. dentry dnodes
1088 			 * 2. file dnodes
1089 			 */
1090 			if (step == 0 && IS_DNODE(page))
1091 				continue;
1092 			if (step == 1 && (!IS_DNODE(page) ||
1093 						is_cold_node(page)))
1094 				continue;
1095 			if (step == 2 && (!IS_DNODE(page) ||
1096 						!is_cold_node(page)))
1097 				continue;
1098 
1099 			/*
1100 			 * If an fsync mode,
1101 			 * we should not skip writing node pages.
1102 			 */
1103 			if (ino && ino_of_node(page) == ino)
1104 				lock_page(page);
1105 			else if (!trylock_page(page))
1106 				continue;
1107 
1108 			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1109 continue_unlock:
1110 				unlock_page(page);
1111 				continue;
1112 			}
1113 			if (ino && ino_of_node(page) != ino)
1114 				goto continue_unlock;
1115 
1116 			if (!PageDirty(page)) {
1117 				/* someone wrote it for us */
1118 				goto continue_unlock;
1119 			}
1120 
1121 			if (!clear_page_dirty_for_io(page))
1122 				goto continue_unlock;
1123 
1124 			/* called by fsync() */
1125 			if (ino && IS_DNODE(page)) {
1126 				int mark = !is_checkpointed_node(sbi, ino);
1127 				set_fsync_mark(page, 1);
1128 				if (IS_INODE(page))
1129 					set_dentry_mark(page, mark);
1130 				nwritten++;
1131 			} else {
1132 				set_fsync_mark(page, 0);
1133 				set_dentry_mark(page, 0);
1134 			}
1135 			NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
1136 			wrote++;
1137 
1138 			if (--wbc->nr_to_write == 0)
1139 				break;
1140 		}
1141 		pagevec_release(&pvec);
1142 		cond_resched();
1143 
1144 		if (wbc->nr_to_write == 0) {
1145 			step = 2;
1146 			break;
1147 		}
1148 	}
1149 
1150 	if (step < 2) {
1151 		step++;
1152 		goto next_step;
1153 	}
1154 
1155 	if (wrote)
1156 		f2fs_submit_merged_bio(sbi, NODE, WRITE);
1157 	return nwritten;
1158 }
1159 
1160 int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1161 {
1162 	pgoff_t index = 0, end = LONG_MAX;
1163 	struct pagevec pvec;
1164 	int ret2 = 0, ret = 0;
1165 
1166 	pagevec_init(&pvec, 0);
1167 
1168 	while (index <= end) {
1169 		int i, nr_pages;
1170 		nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1171 				PAGECACHE_TAG_WRITEBACK,
1172 				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1173 		if (nr_pages == 0)
1174 			break;
1175 
1176 		for (i = 0; i < nr_pages; i++) {
1177 			struct page *page = pvec.pages[i];
1178 
1179 			/* until radix tree lookup accepts end_index */
1180 			if (unlikely(page->index > end))
1181 				continue;
1182 
1183 			if (ino && ino_of_node(page) == ino) {
1184 				f2fs_wait_on_page_writeback(page, NODE);
1185 				if (TestClearPageError(page))
1186 					ret = -EIO;
1187 			}
1188 		}
1189 		pagevec_release(&pvec);
1190 		cond_resched();
1191 	}
1192 
1193 	if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
1194 		ret2 = -ENOSPC;
1195 	if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
1196 		ret2 = -EIO;
1197 	if (!ret)
1198 		ret = ret2;
1199 	return ret;
1200 }
1201 
1202 static int f2fs_write_node_page(struct page *page,
1203 				struct writeback_control *wbc)
1204 {
1205 	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1206 	nid_t nid;
1207 	block_t new_addr;
1208 	struct node_info ni;
1209 	struct f2fs_io_info fio = {
1210 		.type = NODE,
1211 		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
1212 	};
1213 
1214 	trace_f2fs_writepage(page, NODE);
1215 
1216 	if (unlikely(sbi->por_doing))
1217 		goto redirty_out;
1218 
1219 	f2fs_wait_on_page_writeback(page, NODE);
1220 
1221 	/* get old block addr of this node page */
1222 	nid = nid_of_node(page);
1223 	f2fs_bug_on(page->index != nid);
1224 
1225 	get_node_info(sbi, nid, &ni);
1226 
1227 	/* This page is already truncated */
1228 	if (unlikely(ni.blk_addr == NULL_ADDR)) {
1229 		dec_page_count(sbi, F2FS_DIRTY_NODES);
1230 		unlock_page(page);
1231 		return 0;
1232 	}
1233 
1234 	if (wbc->for_reclaim)
1235 		goto redirty_out;
1236 
1237 	mutex_lock(&sbi->node_write);
1238 	set_page_writeback(page);
1239 	write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
1240 	set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
1241 	dec_page_count(sbi, F2FS_DIRTY_NODES);
1242 	mutex_unlock(&sbi->node_write);
1243 	unlock_page(page);
1244 	return 0;
1245 
1246 redirty_out:
1247 	redirty_page_for_writepage(wbc, page);
1248 	return AOP_WRITEPAGE_ACTIVATE;
1249 }
1250 
1251 static int f2fs_write_node_pages(struct address_space *mapping,
1252 			    struct writeback_control *wbc)
1253 {
1254 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1255 	long diff;
1256 
1257 	trace_f2fs_writepages(mapping->host, wbc, NODE);
1258 
1259 	/* balancing f2fs's metadata in background */
1260 	f2fs_balance_fs_bg(sbi);
1261 
1262 	/* collect a number of dirty node pages and write together */
1263 	if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
1264 		goto skip_write;
1265 
1266 	diff = nr_pages_to_write(sbi, NODE, wbc);
1267 	wbc->sync_mode = WB_SYNC_NONE;
1268 	sync_node_pages(sbi, 0, wbc);
1269 	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1270 	return 0;
1271 
1272 skip_write:
1273 	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
1274 	return 0;
1275 }
1276 
1277 static int f2fs_set_node_page_dirty(struct page *page)
1278 {
1279 	struct address_space *mapping = page->mapping;
1280 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1281 
1282 	trace_f2fs_set_page_dirty(page, NODE);
1283 
1284 	SetPageUptodate(page);
1285 	if (!PageDirty(page)) {
1286 		__set_page_dirty_nobuffers(page);
1287 		inc_page_count(sbi, F2FS_DIRTY_NODES);
1288 		SetPagePrivate(page);
1289 		return 1;
1290 	}
1291 	return 0;
1292 }
1293 
1294 static void f2fs_invalidate_node_page(struct page *page, unsigned int offset,
1295 				      unsigned int length)
1296 {
1297 	struct inode *inode = page->mapping->host;
1298 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1299 	if (PageDirty(page))
1300 		dec_page_count(sbi, F2FS_DIRTY_NODES);
1301 	ClearPagePrivate(page);
1302 }
1303 
1304 static int f2fs_release_node_page(struct page *page, gfp_t wait)
1305 {
1306 	ClearPagePrivate(page);
1307 	return 1;
1308 }
1309 
1310 /*
1311  * Structure of the f2fs node operations
1312  */
1313 const struct address_space_operations f2fs_node_aops = {
1314 	.writepage	= f2fs_write_node_page,
1315 	.writepages	= f2fs_write_node_pages,
1316 	.set_page_dirty	= f2fs_set_node_page_dirty,
1317 	.invalidatepage	= f2fs_invalidate_node_page,
1318 	.releasepage	= f2fs_release_node_page,
1319 };
1320 
1321 static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1322 						nid_t n)
1323 {
1324 	return radix_tree_lookup(&nm_i->free_nid_root, n);
1325 }
1326 
1327 static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
1328 						struct free_nid *i)
1329 {
1330 	list_del(&i->list);
1331 	radix_tree_delete(&nm_i->free_nid_root, i->nid);
1332 }
1333 
1334 static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1335 {
1336 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1337 	struct free_nid *i;
1338 	struct nat_entry *ne;
1339 	bool allocated = false;
1340 
1341 	if (!available_free_memory(sbi, FREE_NIDS))
1342 		return -1;
1343 
1344 	/* 0 nid should not be used */
1345 	if (unlikely(nid == 0))
1346 		return 0;
1347 
1348 	if (build) {
1349 		/* do not add allocated nids */
1350 		read_lock(&nm_i->nat_tree_lock);
1351 		ne = __lookup_nat_cache(nm_i, nid);
1352 		if (ne &&
1353 			(!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR))
1354 			allocated = true;
1355 		read_unlock(&nm_i->nat_tree_lock);
1356 		if (allocated)
1357 			return 0;
1358 	}
1359 
1360 	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1361 	i->nid = nid;
1362 	i->state = NID_NEW;
1363 
1364 	spin_lock(&nm_i->free_nid_list_lock);
1365 	if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
1366 		spin_unlock(&nm_i->free_nid_list_lock);
1367 		kmem_cache_free(free_nid_slab, i);
1368 		return 0;
1369 	}
1370 	list_add_tail(&i->list, &nm_i->free_nid_list);
1371 	nm_i->fcnt++;
1372 	spin_unlock(&nm_i->free_nid_list_lock);
1373 	return 1;
1374 }
1375 
1376 static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1377 {
1378 	struct free_nid *i;
1379 	bool need_free = false;
1380 
1381 	spin_lock(&nm_i->free_nid_list_lock);
1382 	i = __lookup_free_nid_list(nm_i, nid);
1383 	if (i && i->state == NID_NEW) {
1384 		__del_from_free_nid_list(nm_i, i);
1385 		nm_i->fcnt--;
1386 		need_free = true;
1387 	}
1388 	spin_unlock(&nm_i->free_nid_list_lock);
1389 
1390 	if (need_free)
1391 		kmem_cache_free(free_nid_slab, i);
1392 }
1393 
1394 static void scan_nat_page(struct f2fs_sb_info *sbi,
1395 			struct page *nat_page, nid_t start_nid)
1396 {
1397 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1398 	struct f2fs_nat_block *nat_blk = page_address(nat_page);
1399 	block_t blk_addr;
1400 	int i;
1401 
1402 	i = start_nid % NAT_ENTRY_PER_BLOCK;
1403 
1404 	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1405 
1406 		if (unlikely(start_nid >= nm_i->max_nid))
1407 			break;
1408 
1409 		blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1410 		f2fs_bug_on(blk_addr == NEW_ADDR);
1411 		if (blk_addr == NULL_ADDR) {
1412 			if (add_free_nid(sbi, start_nid, true) < 0)
1413 				break;
1414 		}
1415 	}
1416 }
1417 
1418 static void build_free_nids(struct f2fs_sb_info *sbi)
1419 {
1420 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1421 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1422 	struct f2fs_summary_block *sum = curseg->sum_blk;
1423 	int i = 0;
1424 	nid_t nid = nm_i->next_scan_nid;
1425 
1426 	/* Enough entries */
1427 	if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
1428 		return;
1429 
1430 	/* readahead nat pages to be scanned */
1431 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
1432 
1433 	while (1) {
1434 		struct page *page = get_current_nat_page(sbi, nid);
1435 
1436 		scan_nat_page(sbi, page, nid);
1437 		f2fs_put_page(page, 1);
1438 
1439 		nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
1440 		if (unlikely(nid >= nm_i->max_nid))
1441 			nid = 0;
1442 
1443 		if (i++ == FREE_NID_PAGES)
1444 			break;
1445 	}
1446 
1447 	/* go to the next free nat pages to find free nids abundantly */
1448 	nm_i->next_scan_nid = nid;
1449 
1450 	/* find free nids from current sum_pages */
1451 	mutex_lock(&curseg->curseg_mutex);
1452 	for (i = 0; i < nats_in_cursum(sum); i++) {
1453 		block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
1454 		nid = le32_to_cpu(nid_in_journal(sum, i));
1455 		if (addr == NULL_ADDR)
1456 			add_free_nid(sbi, nid, true);
1457 		else
1458 			remove_free_nid(nm_i, nid);
1459 	}
1460 	mutex_unlock(&curseg->curseg_mutex);
1461 }
1462 
1463 /*
1464  * If this function returns success, caller can obtain a new nid
1465  * from second parameter of this function.
1466  * The returned nid could be used ino as well as nid when inode is created.
1467  */
1468 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1469 {
1470 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1471 	struct free_nid *i = NULL;
1472 retry:
1473 	if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
1474 		return false;
1475 
1476 	spin_lock(&nm_i->free_nid_list_lock);
1477 
1478 	/* We should not use stale free nids created by build_free_nids */
1479 	if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
1480 		f2fs_bug_on(list_empty(&nm_i->free_nid_list));
1481 		list_for_each_entry(i, &nm_i->free_nid_list, list)
1482 			if (i->state == NID_NEW)
1483 				break;
1484 
1485 		f2fs_bug_on(i->state != NID_NEW);
1486 		*nid = i->nid;
1487 		i->state = NID_ALLOC;
1488 		nm_i->fcnt--;
1489 		spin_unlock(&nm_i->free_nid_list_lock);
1490 		return true;
1491 	}
1492 	spin_unlock(&nm_i->free_nid_list_lock);
1493 
1494 	/* Let's scan nat pages and its caches to get free nids */
1495 	mutex_lock(&nm_i->build_lock);
1496 	build_free_nids(sbi);
1497 	mutex_unlock(&nm_i->build_lock);
1498 	goto retry;
1499 }
1500 
1501 /*
1502  * alloc_nid() should be called prior to this function.
1503  */
1504 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1505 {
1506 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1507 	struct free_nid *i;
1508 
1509 	spin_lock(&nm_i->free_nid_list_lock);
1510 	i = __lookup_free_nid_list(nm_i, nid);
1511 	f2fs_bug_on(!i || i->state != NID_ALLOC);
1512 	__del_from_free_nid_list(nm_i, i);
1513 	spin_unlock(&nm_i->free_nid_list_lock);
1514 
1515 	kmem_cache_free(free_nid_slab, i);
1516 }
1517 
1518 /*
1519  * alloc_nid() should be called prior to this function.
1520  */
1521 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1522 {
1523 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1524 	struct free_nid *i;
1525 	bool need_free = false;
1526 
1527 	if (!nid)
1528 		return;
1529 
1530 	spin_lock(&nm_i->free_nid_list_lock);
1531 	i = __lookup_free_nid_list(nm_i, nid);
1532 	f2fs_bug_on(!i || i->state != NID_ALLOC);
1533 	if (!available_free_memory(sbi, FREE_NIDS)) {
1534 		__del_from_free_nid_list(nm_i, i);
1535 		need_free = true;
1536 	} else {
1537 		i->state = NID_NEW;
1538 		nm_i->fcnt++;
1539 	}
1540 	spin_unlock(&nm_i->free_nid_list_lock);
1541 
1542 	if (need_free)
1543 		kmem_cache_free(free_nid_slab, i);
1544 }
1545 
1546 void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1547 		struct f2fs_summary *sum, struct node_info *ni,
1548 		block_t new_blkaddr)
1549 {
1550 	rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1551 	set_node_addr(sbi, ni, new_blkaddr, false);
1552 	clear_node_page_dirty(page);
1553 }
1554 
1555 static void recover_inline_xattr(struct inode *inode, struct page *page)
1556 {
1557 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1558 	void *src_addr, *dst_addr;
1559 	size_t inline_size;
1560 	struct page *ipage;
1561 	struct f2fs_inode *ri;
1562 
1563 	if (!f2fs_has_inline_xattr(inode))
1564 		return;
1565 
1566 	if (!IS_INODE(page))
1567 		return;
1568 
1569 	ri = F2FS_INODE(page);
1570 	if (!(ri->i_inline & F2FS_INLINE_XATTR))
1571 		return;
1572 
1573 	ipage = get_node_page(sbi, inode->i_ino);
1574 	f2fs_bug_on(IS_ERR(ipage));
1575 
1576 	dst_addr = inline_xattr_addr(ipage);
1577 	src_addr = inline_xattr_addr(page);
1578 	inline_size = inline_xattr_size(inode);
1579 
1580 	f2fs_wait_on_page_writeback(ipage, NODE);
1581 	memcpy(dst_addr, src_addr, inline_size);
1582 
1583 	update_inode(inode, ipage);
1584 	f2fs_put_page(ipage, 1);
1585 }
1586 
1587 bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1588 {
1589 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1590 	nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1591 	nid_t new_xnid = nid_of_node(page);
1592 	struct node_info ni;
1593 
1594 	recover_inline_xattr(inode, page);
1595 
1596 	if (!f2fs_has_xattr_block(ofs_of_node(page)))
1597 		return false;
1598 
1599 	/* 1: invalidate the previous xattr nid */
1600 	if (!prev_xnid)
1601 		goto recover_xnid;
1602 
1603 	/* Deallocate node address */
1604 	get_node_info(sbi, prev_xnid, &ni);
1605 	f2fs_bug_on(ni.blk_addr == NULL_ADDR);
1606 	invalidate_blocks(sbi, ni.blk_addr);
1607 	dec_valid_node_count(sbi, inode);
1608 	set_node_addr(sbi, &ni, NULL_ADDR, false);
1609 
1610 recover_xnid:
1611 	/* 2: allocate new xattr nid */
1612 	if (unlikely(!inc_valid_node_count(sbi, inode)))
1613 		f2fs_bug_on(1);
1614 
1615 	remove_free_nid(NM_I(sbi), new_xnid);
1616 	get_node_info(sbi, new_xnid, &ni);
1617 	ni.ino = inode->i_ino;
1618 	set_node_addr(sbi, &ni, NEW_ADDR, false);
1619 	F2FS_I(inode)->i_xattr_nid = new_xnid;
1620 
1621 	/* 3: update xattr blkaddr */
1622 	refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
1623 	set_node_addr(sbi, &ni, blkaddr, false);
1624 
1625 	update_inode_page(inode);
1626 	return true;
1627 }
1628 
1629 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1630 {
1631 	struct f2fs_inode *src, *dst;
1632 	nid_t ino = ino_of_node(page);
1633 	struct node_info old_ni, new_ni;
1634 	struct page *ipage;
1635 
1636 	get_node_info(sbi, ino, &old_ni);
1637 
1638 	if (unlikely(old_ni.blk_addr != NULL_ADDR))
1639 		return -EINVAL;
1640 
1641 	ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
1642 	if (!ipage)
1643 		return -ENOMEM;
1644 
1645 	/* Should not use this inode  from free nid list */
1646 	remove_free_nid(NM_I(sbi), ino);
1647 
1648 	SetPageUptodate(ipage);
1649 	fill_node_footer(ipage, ino, ino, 0, true);
1650 
1651 	src = F2FS_INODE(page);
1652 	dst = F2FS_INODE(ipage);
1653 
1654 	memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
1655 	dst->i_size = 0;
1656 	dst->i_blocks = cpu_to_le64(1);
1657 	dst->i_links = cpu_to_le32(1);
1658 	dst->i_xattr_nid = 0;
1659 
1660 	new_ni = old_ni;
1661 	new_ni.ino = ino;
1662 
1663 	if (unlikely(!inc_valid_node_count(sbi, NULL)))
1664 		WARN_ON(1);
1665 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1666 	inc_valid_inode_count(sbi);
1667 	f2fs_put_page(ipage, 1);
1668 	return 0;
1669 }
1670 
1671 /*
1672  * ra_sum_pages() merge contiguous pages into one bio and submit.
1673  * these pre-readed pages are alloced in bd_inode's mapping tree.
1674  */
1675 static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
1676 				int start, int nrpages)
1677 {
1678 	struct inode *inode = sbi->sb->s_bdev->bd_inode;
1679 	struct address_space *mapping = inode->i_mapping;
1680 	int i, page_idx = start;
1681 	struct f2fs_io_info fio = {
1682 		.type = META,
1683 		.rw = READ_SYNC | REQ_META | REQ_PRIO
1684 	};
1685 
1686 	for (i = 0; page_idx < start + nrpages; page_idx++, i++) {
1687 		/* alloc page in bd_inode for reading node summary info */
1688 		pages[i] = grab_cache_page(mapping, page_idx);
1689 		if (!pages[i])
1690 			break;
1691 		f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio);
1692 	}
1693 
1694 	f2fs_submit_merged_bio(sbi, META, READ);
1695 	return i;
1696 }
1697 
1698 int restore_node_summary(struct f2fs_sb_info *sbi,
1699 			unsigned int segno, struct f2fs_summary_block *sum)
1700 {
1701 	struct f2fs_node *rn;
1702 	struct f2fs_summary *sum_entry;
1703 	struct inode *inode = sbi->sb->s_bdev->bd_inode;
1704 	block_t addr;
1705 	int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
1706 	struct page *pages[bio_blocks];
1707 	int i, idx, last_offset, nrpages, err = 0;
1708 
1709 	/* scan the node segment */
1710 	last_offset = sbi->blocks_per_seg;
1711 	addr = START_BLOCK(sbi, segno);
1712 	sum_entry = &sum->entries[0];
1713 
1714 	for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
1715 		nrpages = min(last_offset - i, bio_blocks);
1716 
1717 		/* read ahead node pages */
1718 		nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
1719 		if (!nrpages)
1720 			return -ENOMEM;
1721 
1722 		for (idx = 0; idx < nrpages; idx++) {
1723 			if (err)
1724 				goto skip;
1725 
1726 			lock_page(pages[idx]);
1727 			if (unlikely(!PageUptodate(pages[idx]))) {
1728 				err = -EIO;
1729 			} else {
1730 				rn = F2FS_NODE(pages[idx]);
1731 				sum_entry->nid = rn->footer.nid;
1732 				sum_entry->version = 0;
1733 				sum_entry->ofs_in_node = 0;
1734 				sum_entry++;
1735 			}
1736 			unlock_page(pages[idx]);
1737 skip:
1738 			page_cache_release(pages[idx]);
1739 		}
1740 
1741 		invalidate_mapping_pages(inode->i_mapping, addr,
1742 							addr + nrpages);
1743 	}
1744 	return err;
1745 }
1746 
1747 static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1748 {
1749 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1750 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1751 	struct f2fs_summary_block *sum = curseg->sum_blk;
1752 	int i;
1753 
1754 	mutex_lock(&curseg->curseg_mutex);
1755 
1756 	if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
1757 		mutex_unlock(&curseg->curseg_mutex);
1758 		return false;
1759 	}
1760 
1761 	for (i = 0; i < nats_in_cursum(sum); i++) {
1762 		struct nat_entry *ne;
1763 		struct f2fs_nat_entry raw_ne;
1764 		nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
1765 
1766 		raw_ne = nat_in_journal(sum, i);
1767 retry:
1768 		write_lock(&nm_i->nat_tree_lock);
1769 		ne = __lookup_nat_cache(nm_i, nid);
1770 		if (ne) {
1771 			__set_nat_cache_dirty(nm_i, ne);
1772 			write_unlock(&nm_i->nat_tree_lock);
1773 			continue;
1774 		}
1775 		ne = grab_nat_entry(nm_i, nid);
1776 		if (!ne) {
1777 			write_unlock(&nm_i->nat_tree_lock);
1778 			goto retry;
1779 		}
1780 		node_info_from_raw_nat(&ne->ni, &raw_ne);
1781 		__set_nat_cache_dirty(nm_i, ne);
1782 		write_unlock(&nm_i->nat_tree_lock);
1783 	}
1784 	update_nats_in_cursum(sum, -i);
1785 	mutex_unlock(&curseg->curseg_mutex);
1786 	return true;
1787 }
1788 
1789 /*
1790  * This function is called during the checkpointing process.
1791  */
1792 void flush_nat_entries(struct f2fs_sb_info *sbi)
1793 {
1794 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1795 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1796 	struct f2fs_summary_block *sum = curseg->sum_blk;
1797 	struct nat_entry *ne, *cur;
1798 	struct page *page = NULL;
1799 	struct f2fs_nat_block *nat_blk = NULL;
1800 	nid_t start_nid = 0, end_nid = 0;
1801 	bool flushed;
1802 
1803 	flushed = flush_nats_in_journal(sbi);
1804 
1805 	if (!flushed)
1806 		mutex_lock(&curseg->curseg_mutex);
1807 
1808 	/* 1) flush dirty nat caches */
1809 	list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
1810 		nid_t nid;
1811 		struct f2fs_nat_entry raw_ne;
1812 		int offset = -1;
1813 
1814 		if (nat_get_blkaddr(ne) == NEW_ADDR)
1815 			continue;
1816 
1817 		nid = nat_get_nid(ne);
1818 
1819 		if (flushed)
1820 			goto to_nat_page;
1821 
1822 		/* if there is room for nat enries in curseg->sumpage */
1823 		offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
1824 		if (offset >= 0) {
1825 			raw_ne = nat_in_journal(sum, offset);
1826 			goto flush_now;
1827 		}
1828 to_nat_page:
1829 		if (!page || (start_nid > nid || nid > end_nid)) {
1830 			if (page) {
1831 				f2fs_put_page(page, 1);
1832 				page = NULL;
1833 			}
1834 			start_nid = START_NID(nid);
1835 			end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
1836 
1837 			/*
1838 			 * get nat block with dirty flag, increased reference
1839 			 * count, mapped and lock
1840 			 */
1841 			page = get_next_nat_page(sbi, start_nid);
1842 			nat_blk = page_address(page);
1843 		}
1844 
1845 		f2fs_bug_on(!nat_blk);
1846 		raw_ne = nat_blk->entries[nid - start_nid];
1847 flush_now:
1848 		raw_nat_from_node_info(&raw_ne, &ne->ni);
1849 
1850 		if (offset < 0) {
1851 			nat_blk->entries[nid - start_nid] = raw_ne;
1852 		} else {
1853 			nat_in_journal(sum, offset) = raw_ne;
1854 			nid_in_journal(sum, offset) = cpu_to_le32(nid);
1855 		}
1856 
1857 		if (nat_get_blkaddr(ne) == NULL_ADDR &&
1858 				add_free_nid(sbi, nid, false) <= 0) {
1859 			write_lock(&nm_i->nat_tree_lock);
1860 			__del_from_nat_cache(nm_i, ne);
1861 			write_unlock(&nm_i->nat_tree_lock);
1862 		} else {
1863 			write_lock(&nm_i->nat_tree_lock);
1864 			__clear_nat_cache_dirty(nm_i, ne);
1865 			write_unlock(&nm_i->nat_tree_lock);
1866 		}
1867 	}
1868 	if (!flushed)
1869 		mutex_unlock(&curseg->curseg_mutex);
1870 	f2fs_put_page(page, 1);
1871 }
1872 
1873 static int init_node_manager(struct f2fs_sb_info *sbi)
1874 {
1875 	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
1876 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1877 	unsigned char *version_bitmap;
1878 	unsigned int nat_segs, nat_blocks;
1879 
1880 	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
1881 
1882 	/* segment_count_nat includes pair segment so divide to 2. */
1883 	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1884 	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
1885 
1886 	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
1887 
1888 	/* not used nids: 0, node, meta, (and root counted as valid node) */
1889 	nm_i->available_nids = nm_i->max_nid - 3;
1890 	nm_i->fcnt = 0;
1891 	nm_i->nat_cnt = 0;
1892 	nm_i->ram_thresh = DEF_RAM_THRESHOLD;
1893 
1894 	INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
1895 	INIT_LIST_HEAD(&nm_i->free_nid_list);
1896 	INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1897 	INIT_LIST_HEAD(&nm_i->nat_entries);
1898 	INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1899 
1900 	mutex_init(&nm_i->build_lock);
1901 	spin_lock_init(&nm_i->free_nid_list_lock);
1902 	rwlock_init(&nm_i->nat_tree_lock);
1903 
1904 	nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1905 	nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
1906 	version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
1907 	if (!version_bitmap)
1908 		return -EFAULT;
1909 
1910 	nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
1911 					GFP_KERNEL);
1912 	if (!nm_i->nat_bitmap)
1913 		return -ENOMEM;
1914 	return 0;
1915 }
1916 
1917 int build_node_manager(struct f2fs_sb_info *sbi)
1918 {
1919 	int err;
1920 
1921 	sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
1922 	if (!sbi->nm_info)
1923 		return -ENOMEM;
1924 
1925 	err = init_node_manager(sbi);
1926 	if (err)
1927 		return err;
1928 
1929 	build_free_nids(sbi);
1930 	return 0;
1931 }
1932 
1933 void destroy_node_manager(struct f2fs_sb_info *sbi)
1934 {
1935 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1936 	struct free_nid *i, *next_i;
1937 	struct nat_entry *natvec[NATVEC_SIZE];
1938 	nid_t nid = 0;
1939 	unsigned int found;
1940 
1941 	if (!nm_i)
1942 		return;
1943 
1944 	/* destroy free nid list */
1945 	spin_lock(&nm_i->free_nid_list_lock);
1946 	list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
1947 		f2fs_bug_on(i->state == NID_ALLOC);
1948 		__del_from_free_nid_list(nm_i, i);
1949 		nm_i->fcnt--;
1950 		spin_unlock(&nm_i->free_nid_list_lock);
1951 		kmem_cache_free(free_nid_slab, i);
1952 		spin_lock(&nm_i->free_nid_list_lock);
1953 	}
1954 	f2fs_bug_on(nm_i->fcnt);
1955 	spin_unlock(&nm_i->free_nid_list_lock);
1956 
1957 	/* destroy nat cache */
1958 	write_lock(&nm_i->nat_tree_lock);
1959 	while ((found = __gang_lookup_nat_cache(nm_i,
1960 					nid, NATVEC_SIZE, natvec))) {
1961 		unsigned idx;
1962 		nid = nat_get_nid(natvec[found - 1]) + 1;
1963 		for (idx = 0; idx < found; idx++)
1964 			__del_from_nat_cache(nm_i, natvec[idx]);
1965 	}
1966 	f2fs_bug_on(nm_i->nat_cnt);
1967 	write_unlock(&nm_i->nat_tree_lock);
1968 
1969 	kfree(nm_i->nat_bitmap);
1970 	sbi->nm_info = NULL;
1971 	kfree(nm_i);
1972 }
1973 
1974 int __init create_node_manager_caches(void)
1975 {
1976 	nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1977 			sizeof(struct nat_entry));
1978 	if (!nat_entry_slab)
1979 		return -ENOMEM;
1980 
1981 	free_nid_slab = f2fs_kmem_cache_create("free_nid",
1982 			sizeof(struct free_nid));
1983 	if (!free_nid_slab) {
1984 		kmem_cache_destroy(nat_entry_slab);
1985 		return -ENOMEM;
1986 	}
1987 	return 0;
1988 }
1989 
1990 void destroy_node_manager_caches(void)
1991 {
1992 	kmem_cache_destroy(free_nid_slab);
1993 	kmem_cache_destroy(nat_entry_slab);
1994 }
1995