xref: /openbmc/linux/fs/f2fs/node.c (revision 36bccb11)
1 /*
2  * fs/f2fs/node.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/mpage.h>
14 #include <linux/backing-dev.h>
15 #include <linux/blkdev.h>
16 #include <linux/pagevec.h>
17 #include <linux/swap.h>
18 
19 #include "f2fs.h"
20 #include "node.h"
21 #include "segment.h"
22 #include <trace/events/f2fs.h>
23 
24 #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
25 
26 static struct kmem_cache *nat_entry_slab;
27 static struct kmem_cache *free_nid_slab;
28 
29 static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type)
30 {
31 	struct sysinfo val;
32 	unsigned long mem_size = 0;
33 
34 	si_meminfo(&val);
35 	if (type == FREE_NIDS)
36 		mem_size = nm_i->fcnt * sizeof(struct free_nid);
37 	else if (type == NAT_ENTRIES)
38 		mem_size += nm_i->nat_cnt * sizeof(struct nat_entry);
39 	mem_size >>= 12;
40 
41 	/* give 50:50 memory for free nids and nat caches respectively */
42 	return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11));
43 }
44 
45 static void clear_node_page_dirty(struct page *page)
46 {
47 	struct address_space *mapping = page->mapping;
48 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
49 	unsigned int long flags;
50 
51 	if (PageDirty(page)) {
52 		spin_lock_irqsave(&mapping->tree_lock, flags);
53 		radix_tree_tag_clear(&mapping->page_tree,
54 				page_index(page),
55 				PAGECACHE_TAG_DIRTY);
56 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
57 
58 		clear_page_dirty_for_io(page);
59 		dec_page_count(sbi, F2FS_DIRTY_NODES);
60 	}
61 	ClearPageUptodate(page);
62 }
63 
64 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
65 {
66 	pgoff_t index = current_nat_addr(sbi, nid);
67 	return get_meta_page(sbi, index);
68 }
69 
70 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
71 {
72 	struct page *src_page;
73 	struct page *dst_page;
74 	pgoff_t src_off;
75 	pgoff_t dst_off;
76 	void *src_addr;
77 	void *dst_addr;
78 	struct f2fs_nm_info *nm_i = NM_I(sbi);
79 
80 	src_off = current_nat_addr(sbi, nid);
81 	dst_off = next_nat_addr(sbi, src_off);
82 
83 	/* get current nat block page with lock */
84 	src_page = get_meta_page(sbi, src_off);
85 
86 	/* Dirty src_page means that it is already the new target NAT page. */
87 	if (PageDirty(src_page))
88 		return src_page;
89 
90 	dst_page = grab_meta_page(sbi, dst_off);
91 
92 	src_addr = page_address(src_page);
93 	dst_addr = page_address(dst_page);
94 	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
95 	set_page_dirty(dst_page);
96 	f2fs_put_page(src_page, 1);
97 
98 	set_to_next_nat(nm_i, nid);
99 
100 	return dst_page;
101 }
102 
103 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
104 {
105 	return radix_tree_lookup(&nm_i->nat_root, n);
106 }
107 
108 static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
109 		nid_t start, unsigned int nr, struct nat_entry **ep)
110 {
111 	return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
112 }
113 
114 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
115 {
116 	list_del(&e->list);
117 	radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
118 	nm_i->nat_cnt--;
119 	kmem_cache_free(nat_entry_slab, e);
120 }
121 
122 int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
123 {
124 	struct f2fs_nm_info *nm_i = NM_I(sbi);
125 	struct nat_entry *e;
126 	int is_cp = 1;
127 
128 	read_lock(&nm_i->nat_tree_lock);
129 	e = __lookup_nat_cache(nm_i, nid);
130 	if (e && !e->checkpointed)
131 		is_cp = 0;
132 	read_unlock(&nm_i->nat_tree_lock);
133 	return is_cp;
134 }
135 
136 bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
137 {
138 	struct f2fs_nm_info *nm_i = NM_I(sbi);
139 	struct nat_entry *e;
140 	bool fsync_done = false;
141 
142 	read_lock(&nm_i->nat_tree_lock);
143 	e = __lookup_nat_cache(nm_i, nid);
144 	if (e)
145 		fsync_done = e->fsync_done;
146 	read_unlock(&nm_i->nat_tree_lock);
147 	return fsync_done;
148 }
149 
150 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
151 {
152 	struct nat_entry *new;
153 
154 	new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
155 	if (!new)
156 		return NULL;
157 	if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
158 		kmem_cache_free(nat_entry_slab, new);
159 		return NULL;
160 	}
161 	memset(new, 0, sizeof(struct nat_entry));
162 	nat_set_nid(new, nid);
163 	new->checkpointed = true;
164 	list_add_tail(&new->list, &nm_i->nat_entries);
165 	nm_i->nat_cnt++;
166 	return new;
167 }
168 
169 static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
170 						struct f2fs_nat_entry *ne)
171 {
172 	struct nat_entry *e;
173 retry:
174 	write_lock(&nm_i->nat_tree_lock);
175 	e = __lookup_nat_cache(nm_i, nid);
176 	if (!e) {
177 		e = grab_nat_entry(nm_i, nid);
178 		if (!e) {
179 			write_unlock(&nm_i->nat_tree_lock);
180 			goto retry;
181 		}
182 		nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
183 		nat_set_ino(e, le32_to_cpu(ne->ino));
184 		nat_set_version(e, ne->version);
185 	}
186 	write_unlock(&nm_i->nat_tree_lock);
187 }
188 
189 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
190 			block_t new_blkaddr, bool fsync_done)
191 {
192 	struct f2fs_nm_info *nm_i = NM_I(sbi);
193 	struct nat_entry *e;
194 retry:
195 	write_lock(&nm_i->nat_tree_lock);
196 	e = __lookup_nat_cache(nm_i, ni->nid);
197 	if (!e) {
198 		e = grab_nat_entry(nm_i, ni->nid);
199 		if (!e) {
200 			write_unlock(&nm_i->nat_tree_lock);
201 			goto retry;
202 		}
203 		e->ni = *ni;
204 		f2fs_bug_on(ni->blk_addr == NEW_ADDR);
205 	} else if (new_blkaddr == NEW_ADDR) {
206 		/*
207 		 * when nid is reallocated,
208 		 * previous nat entry can be remained in nat cache.
209 		 * So, reinitialize it with new information.
210 		 */
211 		e->ni = *ni;
212 		f2fs_bug_on(ni->blk_addr != NULL_ADDR);
213 	}
214 
215 	/* sanity check */
216 	f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
217 	f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
218 			new_blkaddr == NULL_ADDR);
219 	f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR &&
220 			new_blkaddr == NEW_ADDR);
221 	f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR &&
222 			nat_get_blkaddr(e) != NULL_ADDR &&
223 			new_blkaddr == NEW_ADDR);
224 
225 	/* increament version no as node is removed */
226 	if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
227 		unsigned char version = nat_get_version(e);
228 		nat_set_version(e, inc_node_version(version));
229 	}
230 
231 	/* change address */
232 	nat_set_blkaddr(e, new_blkaddr);
233 	__set_nat_cache_dirty(nm_i, e);
234 
235 	/* update fsync_mark if its inode nat entry is still alive */
236 	e = __lookup_nat_cache(nm_i, ni->ino);
237 	if (e)
238 		e->fsync_done = fsync_done;
239 	write_unlock(&nm_i->nat_tree_lock);
240 }
241 
242 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
243 {
244 	struct f2fs_nm_info *nm_i = NM_I(sbi);
245 
246 	if (available_free_memory(nm_i, NAT_ENTRIES))
247 		return 0;
248 
249 	write_lock(&nm_i->nat_tree_lock);
250 	while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
251 		struct nat_entry *ne;
252 		ne = list_first_entry(&nm_i->nat_entries,
253 					struct nat_entry, list);
254 		__del_from_nat_cache(nm_i, ne);
255 		nr_shrink--;
256 	}
257 	write_unlock(&nm_i->nat_tree_lock);
258 	return nr_shrink;
259 }
260 
261 /*
262  * This function returns always success
263  */
264 void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
265 {
266 	struct f2fs_nm_info *nm_i = NM_I(sbi);
267 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
268 	struct f2fs_summary_block *sum = curseg->sum_blk;
269 	nid_t start_nid = START_NID(nid);
270 	struct f2fs_nat_block *nat_blk;
271 	struct page *page = NULL;
272 	struct f2fs_nat_entry ne;
273 	struct nat_entry *e;
274 	int i;
275 
276 	memset(&ne, 0, sizeof(struct f2fs_nat_entry));
277 	ni->nid = nid;
278 
279 	/* Check nat cache */
280 	read_lock(&nm_i->nat_tree_lock);
281 	e = __lookup_nat_cache(nm_i, nid);
282 	if (e) {
283 		ni->ino = nat_get_ino(e);
284 		ni->blk_addr = nat_get_blkaddr(e);
285 		ni->version = nat_get_version(e);
286 	}
287 	read_unlock(&nm_i->nat_tree_lock);
288 	if (e)
289 		return;
290 
291 	/* Check current segment summary */
292 	mutex_lock(&curseg->curseg_mutex);
293 	i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
294 	if (i >= 0) {
295 		ne = nat_in_journal(sum, i);
296 		node_info_from_raw_nat(ni, &ne);
297 	}
298 	mutex_unlock(&curseg->curseg_mutex);
299 	if (i >= 0)
300 		goto cache;
301 
302 	/* Fill node_info from nat page */
303 	page = get_current_nat_page(sbi, start_nid);
304 	nat_blk = (struct f2fs_nat_block *)page_address(page);
305 	ne = nat_blk->entries[nid - start_nid];
306 	node_info_from_raw_nat(ni, &ne);
307 	f2fs_put_page(page, 1);
308 cache:
309 	/* cache nat entry */
310 	cache_nat_entry(NM_I(sbi), nid, &ne);
311 }
312 
313 /*
314  * The maximum depth is four.
315  * Offset[0] will have raw inode offset.
316  */
317 static int get_node_path(struct f2fs_inode_info *fi, long block,
318 				int offset[4], unsigned int noffset[4])
319 {
320 	const long direct_index = ADDRS_PER_INODE(fi);
321 	const long direct_blks = ADDRS_PER_BLOCK;
322 	const long dptrs_per_blk = NIDS_PER_BLOCK;
323 	const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
324 	const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
325 	int n = 0;
326 	int level = 0;
327 
328 	noffset[0] = 0;
329 
330 	if (block < direct_index) {
331 		offset[n] = block;
332 		goto got;
333 	}
334 	block -= direct_index;
335 	if (block < direct_blks) {
336 		offset[n++] = NODE_DIR1_BLOCK;
337 		noffset[n] = 1;
338 		offset[n] = block;
339 		level = 1;
340 		goto got;
341 	}
342 	block -= direct_blks;
343 	if (block < direct_blks) {
344 		offset[n++] = NODE_DIR2_BLOCK;
345 		noffset[n] = 2;
346 		offset[n] = block;
347 		level = 1;
348 		goto got;
349 	}
350 	block -= direct_blks;
351 	if (block < indirect_blks) {
352 		offset[n++] = NODE_IND1_BLOCK;
353 		noffset[n] = 3;
354 		offset[n++] = block / direct_blks;
355 		noffset[n] = 4 + offset[n - 1];
356 		offset[n] = block % direct_blks;
357 		level = 2;
358 		goto got;
359 	}
360 	block -= indirect_blks;
361 	if (block < indirect_blks) {
362 		offset[n++] = NODE_IND2_BLOCK;
363 		noffset[n] = 4 + dptrs_per_blk;
364 		offset[n++] = block / direct_blks;
365 		noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
366 		offset[n] = block % direct_blks;
367 		level = 2;
368 		goto got;
369 	}
370 	block -= indirect_blks;
371 	if (block < dindirect_blks) {
372 		offset[n++] = NODE_DIND_BLOCK;
373 		noffset[n] = 5 + (dptrs_per_blk * 2);
374 		offset[n++] = block / indirect_blks;
375 		noffset[n] = 6 + (dptrs_per_blk * 2) +
376 			      offset[n - 1] * (dptrs_per_blk + 1);
377 		offset[n++] = (block / direct_blks) % dptrs_per_blk;
378 		noffset[n] = 7 + (dptrs_per_blk * 2) +
379 			      offset[n - 2] * (dptrs_per_blk + 1) +
380 			      offset[n - 1];
381 		offset[n] = block % direct_blks;
382 		level = 3;
383 		goto got;
384 	} else {
385 		BUG();
386 	}
387 got:
388 	return level;
389 }
390 
391 /*
392  * Caller should call f2fs_put_dnode(dn).
393  * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
394  * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
395  * In the case of RDONLY_NODE, we don't need to care about mutex.
396  */
397 int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
398 {
399 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
400 	struct page *npage[4];
401 	struct page *parent;
402 	int offset[4];
403 	unsigned int noffset[4];
404 	nid_t nids[4];
405 	int level, i;
406 	int err = 0;
407 
408 	level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
409 
410 	nids[0] = dn->inode->i_ino;
411 	npage[0] = dn->inode_page;
412 
413 	if (!npage[0]) {
414 		npage[0] = get_node_page(sbi, nids[0]);
415 		if (IS_ERR(npage[0]))
416 			return PTR_ERR(npage[0]);
417 	}
418 	parent = npage[0];
419 	if (level != 0)
420 		nids[1] = get_nid(parent, offset[0], true);
421 	dn->inode_page = npage[0];
422 	dn->inode_page_locked = true;
423 
424 	/* get indirect or direct nodes */
425 	for (i = 1; i <= level; i++) {
426 		bool done = false;
427 
428 		if (!nids[i] && mode == ALLOC_NODE) {
429 			/* alloc new node */
430 			if (!alloc_nid(sbi, &(nids[i]))) {
431 				err = -ENOSPC;
432 				goto release_pages;
433 			}
434 
435 			dn->nid = nids[i];
436 			npage[i] = new_node_page(dn, noffset[i], NULL);
437 			if (IS_ERR(npage[i])) {
438 				alloc_nid_failed(sbi, nids[i]);
439 				err = PTR_ERR(npage[i]);
440 				goto release_pages;
441 			}
442 
443 			set_nid(parent, offset[i - 1], nids[i], i == 1);
444 			alloc_nid_done(sbi, nids[i]);
445 			done = true;
446 		} else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
447 			npage[i] = get_node_page_ra(parent, offset[i - 1]);
448 			if (IS_ERR(npage[i])) {
449 				err = PTR_ERR(npage[i]);
450 				goto release_pages;
451 			}
452 			done = true;
453 		}
454 		if (i == 1) {
455 			dn->inode_page_locked = false;
456 			unlock_page(parent);
457 		} else {
458 			f2fs_put_page(parent, 1);
459 		}
460 
461 		if (!done) {
462 			npage[i] = get_node_page(sbi, nids[i]);
463 			if (IS_ERR(npage[i])) {
464 				err = PTR_ERR(npage[i]);
465 				f2fs_put_page(npage[0], 0);
466 				goto release_out;
467 			}
468 		}
469 		if (i < level) {
470 			parent = npage[i];
471 			nids[i + 1] = get_nid(parent, offset[i], false);
472 		}
473 	}
474 	dn->nid = nids[level];
475 	dn->ofs_in_node = offset[level];
476 	dn->node_page = npage[level];
477 	dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
478 	return 0;
479 
480 release_pages:
481 	f2fs_put_page(parent, 1);
482 	if (i > 1)
483 		f2fs_put_page(npage[0], 0);
484 release_out:
485 	dn->inode_page = NULL;
486 	dn->node_page = NULL;
487 	return err;
488 }
489 
490 static void truncate_node(struct dnode_of_data *dn)
491 {
492 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
493 	struct node_info ni;
494 
495 	get_node_info(sbi, dn->nid, &ni);
496 	if (dn->inode->i_blocks == 0) {
497 		f2fs_bug_on(ni.blk_addr != NULL_ADDR);
498 		goto invalidate;
499 	}
500 	f2fs_bug_on(ni.blk_addr == NULL_ADDR);
501 
502 	/* Deallocate node address */
503 	invalidate_blocks(sbi, ni.blk_addr);
504 	dec_valid_node_count(sbi, dn->inode);
505 	set_node_addr(sbi, &ni, NULL_ADDR, false);
506 
507 	if (dn->nid == dn->inode->i_ino) {
508 		remove_orphan_inode(sbi, dn->nid);
509 		dec_valid_inode_count(sbi);
510 	} else {
511 		sync_inode_page(dn);
512 	}
513 invalidate:
514 	clear_node_page_dirty(dn->node_page);
515 	F2FS_SET_SB_DIRT(sbi);
516 
517 	f2fs_put_page(dn->node_page, 1);
518 
519 	invalidate_mapping_pages(NODE_MAPPING(sbi),
520 			dn->node_page->index, dn->node_page->index);
521 
522 	dn->node_page = NULL;
523 	trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
524 }
525 
526 static int truncate_dnode(struct dnode_of_data *dn)
527 {
528 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
529 	struct page *page;
530 
531 	if (dn->nid == 0)
532 		return 1;
533 
534 	/* get direct node */
535 	page = get_node_page(sbi, dn->nid);
536 	if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
537 		return 1;
538 	else if (IS_ERR(page))
539 		return PTR_ERR(page);
540 
541 	/* Make dnode_of_data for parameter */
542 	dn->node_page = page;
543 	dn->ofs_in_node = 0;
544 	truncate_data_blocks(dn);
545 	truncate_node(dn);
546 	return 1;
547 }
548 
549 static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
550 						int ofs, int depth)
551 {
552 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
553 	struct dnode_of_data rdn = *dn;
554 	struct page *page;
555 	struct f2fs_node *rn;
556 	nid_t child_nid;
557 	unsigned int child_nofs;
558 	int freed = 0;
559 	int i, ret;
560 
561 	if (dn->nid == 0)
562 		return NIDS_PER_BLOCK + 1;
563 
564 	trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
565 
566 	page = get_node_page(sbi, dn->nid);
567 	if (IS_ERR(page)) {
568 		trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
569 		return PTR_ERR(page);
570 	}
571 
572 	rn = F2FS_NODE(page);
573 	if (depth < 3) {
574 		for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
575 			child_nid = le32_to_cpu(rn->in.nid[i]);
576 			if (child_nid == 0)
577 				continue;
578 			rdn.nid = child_nid;
579 			ret = truncate_dnode(&rdn);
580 			if (ret < 0)
581 				goto out_err;
582 			set_nid(page, i, 0, false);
583 		}
584 	} else {
585 		child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
586 		for (i = ofs; i < NIDS_PER_BLOCK; i++) {
587 			child_nid = le32_to_cpu(rn->in.nid[i]);
588 			if (child_nid == 0) {
589 				child_nofs += NIDS_PER_BLOCK + 1;
590 				continue;
591 			}
592 			rdn.nid = child_nid;
593 			ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
594 			if (ret == (NIDS_PER_BLOCK + 1)) {
595 				set_nid(page, i, 0, false);
596 				child_nofs += ret;
597 			} else if (ret < 0 && ret != -ENOENT) {
598 				goto out_err;
599 			}
600 		}
601 		freed = child_nofs;
602 	}
603 
604 	if (!ofs) {
605 		/* remove current indirect node */
606 		dn->node_page = page;
607 		truncate_node(dn);
608 		freed++;
609 	} else {
610 		f2fs_put_page(page, 1);
611 	}
612 	trace_f2fs_truncate_nodes_exit(dn->inode, freed);
613 	return freed;
614 
615 out_err:
616 	f2fs_put_page(page, 1);
617 	trace_f2fs_truncate_nodes_exit(dn->inode, ret);
618 	return ret;
619 }
620 
621 static int truncate_partial_nodes(struct dnode_of_data *dn,
622 			struct f2fs_inode *ri, int *offset, int depth)
623 {
624 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
625 	struct page *pages[2];
626 	nid_t nid[3];
627 	nid_t child_nid;
628 	int err = 0;
629 	int i;
630 	int idx = depth - 2;
631 
632 	nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
633 	if (!nid[0])
634 		return 0;
635 
636 	/* get indirect nodes in the path */
637 	for (i = 0; i < idx + 1; i++) {
638 		/* refernece count'll be increased */
639 		pages[i] = get_node_page(sbi, nid[i]);
640 		if (IS_ERR(pages[i])) {
641 			err = PTR_ERR(pages[i]);
642 			idx = i - 1;
643 			goto fail;
644 		}
645 		nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
646 	}
647 
648 	/* free direct nodes linked to a partial indirect node */
649 	for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
650 		child_nid = get_nid(pages[idx], i, false);
651 		if (!child_nid)
652 			continue;
653 		dn->nid = child_nid;
654 		err = truncate_dnode(dn);
655 		if (err < 0)
656 			goto fail;
657 		set_nid(pages[idx], i, 0, false);
658 	}
659 
660 	if (offset[idx + 1] == 0) {
661 		dn->node_page = pages[idx];
662 		dn->nid = nid[idx];
663 		truncate_node(dn);
664 	} else {
665 		f2fs_put_page(pages[idx], 1);
666 	}
667 	offset[idx]++;
668 	offset[idx + 1] = 0;
669 	idx--;
670 fail:
671 	for (i = idx; i >= 0; i--)
672 		f2fs_put_page(pages[i], 1);
673 
674 	trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
675 
676 	return err;
677 }
678 
679 /*
680  * All the block addresses of data and nodes should be nullified.
681  */
682 int truncate_inode_blocks(struct inode *inode, pgoff_t from)
683 {
684 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
685 	int err = 0, cont = 1;
686 	int level, offset[4], noffset[4];
687 	unsigned int nofs = 0;
688 	struct f2fs_inode *ri;
689 	struct dnode_of_data dn;
690 	struct page *page;
691 
692 	trace_f2fs_truncate_inode_blocks_enter(inode, from);
693 
694 	level = get_node_path(F2FS_I(inode), from, offset, noffset);
695 restart:
696 	page = get_node_page(sbi, inode->i_ino);
697 	if (IS_ERR(page)) {
698 		trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
699 		return PTR_ERR(page);
700 	}
701 
702 	set_new_dnode(&dn, inode, page, NULL, 0);
703 	unlock_page(page);
704 
705 	ri = F2FS_INODE(page);
706 	switch (level) {
707 	case 0:
708 	case 1:
709 		nofs = noffset[1];
710 		break;
711 	case 2:
712 		nofs = noffset[1];
713 		if (!offset[level - 1])
714 			goto skip_partial;
715 		err = truncate_partial_nodes(&dn, ri, offset, level);
716 		if (err < 0 && err != -ENOENT)
717 			goto fail;
718 		nofs += 1 + NIDS_PER_BLOCK;
719 		break;
720 	case 3:
721 		nofs = 5 + 2 * NIDS_PER_BLOCK;
722 		if (!offset[level - 1])
723 			goto skip_partial;
724 		err = truncate_partial_nodes(&dn, ri, offset, level);
725 		if (err < 0 && err != -ENOENT)
726 			goto fail;
727 		break;
728 	default:
729 		BUG();
730 	}
731 
732 skip_partial:
733 	while (cont) {
734 		dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
735 		switch (offset[0]) {
736 		case NODE_DIR1_BLOCK:
737 		case NODE_DIR2_BLOCK:
738 			err = truncate_dnode(&dn);
739 			break;
740 
741 		case NODE_IND1_BLOCK:
742 		case NODE_IND2_BLOCK:
743 			err = truncate_nodes(&dn, nofs, offset[1], 2);
744 			break;
745 
746 		case NODE_DIND_BLOCK:
747 			err = truncate_nodes(&dn, nofs, offset[1], 3);
748 			cont = 0;
749 			break;
750 
751 		default:
752 			BUG();
753 		}
754 		if (err < 0 && err != -ENOENT)
755 			goto fail;
756 		if (offset[1] == 0 &&
757 				ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
758 			lock_page(page);
759 			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
760 				f2fs_put_page(page, 1);
761 				goto restart;
762 			}
763 			f2fs_wait_on_page_writeback(page, NODE);
764 			ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
765 			set_page_dirty(page);
766 			unlock_page(page);
767 		}
768 		offset[1] = 0;
769 		offset[0]++;
770 		nofs += err;
771 	}
772 fail:
773 	f2fs_put_page(page, 0);
774 	trace_f2fs_truncate_inode_blocks_exit(inode, err);
775 	return err > 0 ? 0 : err;
776 }
777 
778 int truncate_xattr_node(struct inode *inode, struct page *page)
779 {
780 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
781 	nid_t nid = F2FS_I(inode)->i_xattr_nid;
782 	struct dnode_of_data dn;
783 	struct page *npage;
784 
785 	if (!nid)
786 		return 0;
787 
788 	npage = get_node_page(sbi, nid);
789 	if (IS_ERR(npage))
790 		return PTR_ERR(npage);
791 
792 	F2FS_I(inode)->i_xattr_nid = 0;
793 
794 	/* need to do checkpoint during fsync */
795 	F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
796 
797 	set_new_dnode(&dn, inode, page, npage, nid);
798 
799 	if (page)
800 		dn.inode_page_locked = true;
801 	truncate_node(&dn);
802 	return 0;
803 }
804 
805 /*
806  * Caller should grab and release a rwsem by calling f2fs_lock_op() and
807  * f2fs_unlock_op().
808  */
809 void remove_inode_page(struct inode *inode)
810 {
811 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
812 	struct page *page;
813 	nid_t ino = inode->i_ino;
814 	struct dnode_of_data dn;
815 
816 	page = get_node_page(sbi, ino);
817 	if (IS_ERR(page))
818 		return;
819 
820 	if (truncate_xattr_node(inode, page)) {
821 		f2fs_put_page(page, 1);
822 		return;
823 	}
824 	/* 0 is possible, after f2fs_new_inode() is failed */
825 	f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
826 	set_new_dnode(&dn, inode, page, page, ino);
827 	truncate_node(&dn);
828 }
829 
830 struct page *new_inode_page(struct inode *inode, const struct qstr *name)
831 {
832 	struct dnode_of_data dn;
833 
834 	/* allocate inode page for new inode */
835 	set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
836 
837 	/* caller should f2fs_put_page(page, 1); */
838 	return new_node_page(&dn, 0, NULL);
839 }
840 
841 struct page *new_node_page(struct dnode_of_data *dn,
842 				unsigned int ofs, struct page *ipage)
843 {
844 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
845 	struct node_info old_ni, new_ni;
846 	struct page *page;
847 	int err;
848 
849 	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
850 		return ERR_PTR(-EPERM);
851 
852 	page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
853 					dn->nid, AOP_FLAG_NOFS);
854 	if (!page)
855 		return ERR_PTR(-ENOMEM);
856 
857 	if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
858 		err = -ENOSPC;
859 		goto fail;
860 	}
861 
862 	get_node_info(sbi, dn->nid, &old_ni);
863 
864 	/* Reinitialize old_ni with new node page */
865 	f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
866 	new_ni = old_ni;
867 	new_ni.ino = dn->inode->i_ino;
868 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
869 
870 	fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
871 	set_cold_node(dn->inode, page);
872 	SetPageUptodate(page);
873 	set_page_dirty(page);
874 
875 	if (f2fs_has_xattr_block(ofs))
876 		F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
877 
878 	dn->node_page = page;
879 	if (ipage)
880 		update_inode(dn->inode, ipage);
881 	else
882 		sync_inode_page(dn);
883 	if (ofs == 0)
884 		inc_valid_inode_count(sbi);
885 
886 	return page;
887 
888 fail:
889 	clear_node_page_dirty(page);
890 	f2fs_put_page(page, 1);
891 	return ERR_PTR(err);
892 }
893 
894 /*
895  * Caller should do after getting the following values.
896  * 0: f2fs_put_page(page, 0)
897  * LOCKED_PAGE: f2fs_put_page(page, 1)
898  * error: nothing
899  */
900 static int read_node_page(struct page *page, int rw)
901 {
902 	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
903 	struct node_info ni;
904 
905 	get_node_info(sbi, page->index, &ni);
906 
907 	if (unlikely(ni.blk_addr == NULL_ADDR)) {
908 		f2fs_put_page(page, 1);
909 		return -ENOENT;
910 	}
911 
912 	if (PageUptodate(page))
913 		return LOCKED_PAGE;
914 
915 	return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw);
916 }
917 
918 /*
919  * Readahead a node page
920  */
921 void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
922 {
923 	struct page *apage;
924 	int err;
925 
926 	apage = find_get_page(NODE_MAPPING(sbi), nid);
927 	if (apage && PageUptodate(apage)) {
928 		f2fs_put_page(apage, 0);
929 		return;
930 	}
931 	f2fs_put_page(apage, 0);
932 
933 	apage = grab_cache_page(NODE_MAPPING(sbi), nid);
934 	if (!apage)
935 		return;
936 
937 	err = read_node_page(apage, READA);
938 	if (err == 0)
939 		f2fs_put_page(apage, 0);
940 	else if (err == LOCKED_PAGE)
941 		f2fs_put_page(apage, 1);
942 }
943 
944 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
945 {
946 	struct page *page;
947 	int err;
948 repeat:
949 	page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
950 					nid, AOP_FLAG_NOFS);
951 	if (!page)
952 		return ERR_PTR(-ENOMEM);
953 
954 	err = read_node_page(page, READ_SYNC);
955 	if (err < 0)
956 		return ERR_PTR(err);
957 	else if (err == LOCKED_PAGE)
958 		goto got_it;
959 
960 	lock_page(page);
961 	if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
962 		f2fs_put_page(page, 1);
963 		return ERR_PTR(-EIO);
964 	}
965 	if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
966 		f2fs_put_page(page, 1);
967 		goto repeat;
968 	}
969 got_it:
970 	mark_page_accessed(page);
971 	return page;
972 }
973 
974 /*
975  * Return a locked page for the desired node page.
976  * And, readahead MAX_RA_NODE number of node pages.
977  */
978 struct page *get_node_page_ra(struct page *parent, int start)
979 {
980 	struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
981 	struct blk_plug plug;
982 	struct page *page;
983 	int err, i, end;
984 	nid_t nid;
985 
986 	/* First, try getting the desired direct node. */
987 	nid = get_nid(parent, start, false);
988 	if (!nid)
989 		return ERR_PTR(-ENOENT);
990 repeat:
991 	page = grab_cache_page(NODE_MAPPING(sbi), nid);
992 	if (!page)
993 		return ERR_PTR(-ENOMEM);
994 
995 	err = read_node_page(page, READ_SYNC);
996 	if (err < 0)
997 		return ERR_PTR(err);
998 	else if (err == LOCKED_PAGE)
999 		goto page_hit;
1000 
1001 	blk_start_plug(&plug);
1002 
1003 	/* Then, try readahead for siblings of the desired node */
1004 	end = start + MAX_RA_NODE;
1005 	end = min(end, NIDS_PER_BLOCK);
1006 	for (i = start + 1; i < end; i++) {
1007 		nid = get_nid(parent, i, false);
1008 		if (!nid)
1009 			continue;
1010 		ra_node_page(sbi, nid);
1011 	}
1012 
1013 	blk_finish_plug(&plug);
1014 
1015 	lock_page(page);
1016 	if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1017 		f2fs_put_page(page, 1);
1018 		goto repeat;
1019 	}
1020 page_hit:
1021 	if (unlikely(!PageUptodate(page))) {
1022 		f2fs_put_page(page, 1);
1023 		return ERR_PTR(-EIO);
1024 	}
1025 	mark_page_accessed(page);
1026 	return page;
1027 }
1028 
1029 void sync_inode_page(struct dnode_of_data *dn)
1030 {
1031 	if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
1032 		update_inode(dn->inode, dn->node_page);
1033 	} else if (dn->inode_page) {
1034 		if (!dn->inode_page_locked)
1035 			lock_page(dn->inode_page);
1036 		update_inode(dn->inode, dn->inode_page);
1037 		if (!dn->inode_page_locked)
1038 			unlock_page(dn->inode_page);
1039 	} else {
1040 		update_inode_page(dn->inode);
1041 	}
1042 }
1043 
1044 int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
1045 					struct writeback_control *wbc)
1046 {
1047 	pgoff_t index, end;
1048 	struct pagevec pvec;
1049 	int step = ino ? 2 : 0;
1050 	int nwritten = 0, wrote = 0;
1051 
1052 	pagevec_init(&pvec, 0);
1053 
1054 next_step:
1055 	index = 0;
1056 	end = LONG_MAX;
1057 
1058 	while (index <= end) {
1059 		int i, nr_pages;
1060 		nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1061 				PAGECACHE_TAG_DIRTY,
1062 				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1063 		if (nr_pages == 0)
1064 			break;
1065 
1066 		for (i = 0; i < nr_pages; i++) {
1067 			struct page *page = pvec.pages[i];
1068 
1069 			/*
1070 			 * flushing sequence with step:
1071 			 * 0. indirect nodes
1072 			 * 1. dentry dnodes
1073 			 * 2. file dnodes
1074 			 */
1075 			if (step == 0 && IS_DNODE(page))
1076 				continue;
1077 			if (step == 1 && (!IS_DNODE(page) ||
1078 						is_cold_node(page)))
1079 				continue;
1080 			if (step == 2 && (!IS_DNODE(page) ||
1081 						!is_cold_node(page)))
1082 				continue;
1083 
1084 			/*
1085 			 * If an fsync mode,
1086 			 * we should not skip writing node pages.
1087 			 */
1088 			if (ino && ino_of_node(page) == ino)
1089 				lock_page(page);
1090 			else if (!trylock_page(page))
1091 				continue;
1092 
1093 			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1094 continue_unlock:
1095 				unlock_page(page);
1096 				continue;
1097 			}
1098 			if (ino && ino_of_node(page) != ino)
1099 				goto continue_unlock;
1100 
1101 			if (!PageDirty(page)) {
1102 				/* someone wrote it for us */
1103 				goto continue_unlock;
1104 			}
1105 
1106 			if (!clear_page_dirty_for_io(page))
1107 				goto continue_unlock;
1108 
1109 			/* called by fsync() */
1110 			if (ino && IS_DNODE(page)) {
1111 				int mark = !is_checkpointed_node(sbi, ino);
1112 				set_fsync_mark(page, 1);
1113 				if (IS_INODE(page))
1114 					set_dentry_mark(page, mark);
1115 				nwritten++;
1116 			} else {
1117 				set_fsync_mark(page, 0);
1118 				set_dentry_mark(page, 0);
1119 			}
1120 			NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
1121 			wrote++;
1122 
1123 			if (--wbc->nr_to_write == 0)
1124 				break;
1125 		}
1126 		pagevec_release(&pvec);
1127 		cond_resched();
1128 
1129 		if (wbc->nr_to_write == 0) {
1130 			step = 2;
1131 			break;
1132 		}
1133 	}
1134 
1135 	if (step < 2) {
1136 		step++;
1137 		goto next_step;
1138 	}
1139 
1140 	if (wrote)
1141 		f2fs_submit_merged_bio(sbi, NODE, WRITE);
1142 	return nwritten;
1143 }
1144 
1145 int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1146 {
1147 	pgoff_t index = 0, end = LONG_MAX;
1148 	struct pagevec pvec;
1149 	int ret2 = 0, ret = 0;
1150 
1151 	pagevec_init(&pvec, 0);
1152 
1153 	while (index <= end) {
1154 		int i, nr_pages;
1155 		nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1156 				PAGECACHE_TAG_WRITEBACK,
1157 				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1158 		if (nr_pages == 0)
1159 			break;
1160 
1161 		for (i = 0; i < nr_pages; i++) {
1162 			struct page *page = pvec.pages[i];
1163 
1164 			/* until radix tree lookup accepts end_index */
1165 			if (unlikely(page->index > end))
1166 				continue;
1167 
1168 			if (ino && ino_of_node(page) == ino) {
1169 				f2fs_wait_on_page_writeback(page, NODE);
1170 				if (TestClearPageError(page))
1171 					ret = -EIO;
1172 			}
1173 		}
1174 		pagevec_release(&pvec);
1175 		cond_resched();
1176 	}
1177 
1178 	if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
1179 		ret2 = -ENOSPC;
1180 	if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
1181 		ret2 = -EIO;
1182 	if (!ret)
1183 		ret = ret2;
1184 	return ret;
1185 }
1186 
1187 static int f2fs_write_node_page(struct page *page,
1188 				struct writeback_control *wbc)
1189 {
1190 	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1191 	nid_t nid;
1192 	block_t new_addr;
1193 	struct node_info ni;
1194 	struct f2fs_io_info fio = {
1195 		.type = NODE,
1196 		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
1197 	};
1198 
1199 	if (unlikely(sbi->por_doing))
1200 		goto redirty_out;
1201 
1202 	f2fs_wait_on_page_writeback(page, NODE);
1203 
1204 	/* get old block addr of this node page */
1205 	nid = nid_of_node(page);
1206 	f2fs_bug_on(page->index != nid);
1207 
1208 	get_node_info(sbi, nid, &ni);
1209 
1210 	/* This page is already truncated */
1211 	if (unlikely(ni.blk_addr == NULL_ADDR)) {
1212 		dec_page_count(sbi, F2FS_DIRTY_NODES);
1213 		unlock_page(page);
1214 		return 0;
1215 	}
1216 
1217 	if (wbc->for_reclaim)
1218 		goto redirty_out;
1219 
1220 	mutex_lock(&sbi->node_write);
1221 	set_page_writeback(page);
1222 	write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
1223 	set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
1224 	dec_page_count(sbi, F2FS_DIRTY_NODES);
1225 	mutex_unlock(&sbi->node_write);
1226 	unlock_page(page);
1227 	return 0;
1228 
1229 redirty_out:
1230 	dec_page_count(sbi, F2FS_DIRTY_NODES);
1231 	wbc->pages_skipped++;
1232 	account_page_redirty(page);
1233 	set_page_dirty(page);
1234 	return AOP_WRITEPAGE_ACTIVATE;
1235 }
1236 
1237 static int f2fs_write_node_pages(struct address_space *mapping,
1238 			    struct writeback_control *wbc)
1239 {
1240 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1241 	long diff;
1242 
1243 	/* balancing f2fs's metadata in background */
1244 	f2fs_balance_fs_bg(sbi);
1245 
1246 	/* collect a number of dirty node pages and write together */
1247 	if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
1248 		goto skip_write;
1249 
1250 	diff = nr_pages_to_write(sbi, NODE, wbc);
1251 	wbc->sync_mode = WB_SYNC_NONE;
1252 	sync_node_pages(sbi, 0, wbc);
1253 	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1254 	return 0;
1255 
1256 skip_write:
1257 	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
1258 	return 0;
1259 }
1260 
1261 static int f2fs_set_node_page_dirty(struct page *page)
1262 {
1263 	struct address_space *mapping = page->mapping;
1264 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1265 
1266 	trace_f2fs_set_page_dirty(page, NODE);
1267 
1268 	SetPageUptodate(page);
1269 	if (!PageDirty(page)) {
1270 		__set_page_dirty_nobuffers(page);
1271 		inc_page_count(sbi, F2FS_DIRTY_NODES);
1272 		SetPagePrivate(page);
1273 		return 1;
1274 	}
1275 	return 0;
1276 }
1277 
1278 static void f2fs_invalidate_node_page(struct page *page, unsigned int offset,
1279 				      unsigned int length)
1280 {
1281 	struct inode *inode = page->mapping->host;
1282 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1283 	if (PageDirty(page))
1284 		dec_page_count(sbi, F2FS_DIRTY_NODES);
1285 	ClearPagePrivate(page);
1286 }
1287 
1288 static int f2fs_release_node_page(struct page *page, gfp_t wait)
1289 {
1290 	ClearPagePrivate(page);
1291 	return 1;
1292 }
1293 
1294 /*
1295  * Structure of the f2fs node operations
1296  */
1297 const struct address_space_operations f2fs_node_aops = {
1298 	.writepage	= f2fs_write_node_page,
1299 	.writepages	= f2fs_write_node_pages,
1300 	.set_page_dirty	= f2fs_set_node_page_dirty,
1301 	.invalidatepage	= f2fs_invalidate_node_page,
1302 	.releasepage	= f2fs_release_node_page,
1303 };
1304 
1305 static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1306 						nid_t n)
1307 {
1308 	return radix_tree_lookup(&nm_i->free_nid_root, n);
1309 }
1310 
1311 static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
1312 						struct free_nid *i)
1313 {
1314 	list_del(&i->list);
1315 	radix_tree_delete(&nm_i->free_nid_root, i->nid);
1316 }
1317 
1318 static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1319 {
1320 	struct free_nid *i;
1321 	struct nat_entry *ne;
1322 	bool allocated = false;
1323 
1324 	if (!available_free_memory(nm_i, FREE_NIDS))
1325 		return -1;
1326 
1327 	/* 0 nid should not be used */
1328 	if (unlikely(nid == 0))
1329 		return 0;
1330 
1331 	if (build) {
1332 		/* do not add allocated nids */
1333 		read_lock(&nm_i->nat_tree_lock);
1334 		ne = __lookup_nat_cache(nm_i, nid);
1335 		if (ne &&
1336 			(!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR))
1337 			allocated = true;
1338 		read_unlock(&nm_i->nat_tree_lock);
1339 		if (allocated)
1340 			return 0;
1341 	}
1342 
1343 	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1344 	i->nid = nid;
1345 	i->state = NID_NEW;
1346 
1347 	spin_lock(&nm_i->free_nid_list_lock);
1348 	if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
1349 		spin_unlock(&nm_i->free_nid_list_lock);
1350 		kmem_cache_free(free_nid_slab, i);
1351 		return 0;
1352 	}
1353 	list_add_tail(&i->list, &nm_i->free_nid_list);
1354 	nm_i->fcnt++;
1355 	spin_unlock(&nm_i->free_nid_list_lock);
1356 	return 1;
1357 }
1358 
1359 static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1360 {
1361 	struct free_nid *i;
1362 	bool need_free = false;
1363 
1364 	spin_lock(&nm_i->free_nid_list_lock);
1365 	i = __lookup_free_nid_list(nm_i, nid);
1366 	if (i && i->state == NID_NEW) {
1367 		__del_from_free_nid_list(nm_i, i);
1368 		nm_i->fcnt--;
1369 		need_free = true;
1370 	}
1371 	spin_unlock(&nm_i->free_nid_list_lock);
1372 
1373 	if (need_free)
1374 		kmem_cache_free(free_nid_slab, i);
1375 }
1376 
1377 static void scan_nat_page(struct f2fs_nm_info *nm_i,
1378 			struct page *nat_page, nid_t start_nid)
1379 {
1380 	struct f2fs_nat_block *nat_blk = page_address(nat_page);
1381 	block_t blk_addr;
1382 	int i;
1383 
1384 	i = start_nid % NAT_ENTRY_PER_BLOCK;
1385 
1386 	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1387 
1388 		if (unlikely(start_nid >= nm_i->max_nid))
1389 			break;
1390 
1391 		blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1392 		f2fs_bug_on(blk_addr == NEW_ADDR);
1393 		if (blk_addr == NULL_ADDR) {
1394 			if (add_free_nid(nm_i, start_nid, true) < 0)
1395 				break;
1396 		}
1397 	}
1398 }
1399 
1400 static void build_free_nids(struct f2fs_sb_info *sbi)
1401 {
1402 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1403 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1404 	struct f2fs_summary_block *sum = curseg->sum_blk;
1405 	int i = 0;
1406 	nid_t nid = nm_i->next_scan_nid;
1407 
1408 	/* Enough entries */
1409 	if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
1410 		return;
1411 
1412 	/* readahead nat pages to be scanned */
1413 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
1414 
1415 	while (1) {
1416 		struct page *page = get_current_nat_page(sbi, nid);
1417 
1418 		scan_nat_page(nm_i, page, nid);
1419 		f2fs_put_page(page, 1);
1420 
1421 		nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
1422 		if (unlikely(nid >= nm_i->max_nid))
1423 			nid = 0;
1424 
1425 		if (i++ == FREE_NID_PAGES)
1426 			break;
1427 	}
1428 
1429 	/* go to the next free nat pages to find free nids abundantly */
1430 	nm_i->next_scan_nid = nid;
1431 
1432 	/* find free nids from current sum_pages */
1433 	mutex_lock(&curseg->curseg_mutex);
1434 	for (i = 0; i < nats_in_cursum(sum); i++) {
1435 		block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
1436 		nid = le32_to_cpu(nid_in_journal(sum, i));
1437 		if (addr == NULL_ADDR)
1438 			add_free_nid(nm_i, nid, true);
1439 		else
1440 			remove_free_nid(nm_i, nid);
1441 	}
1442 	mutex_unlock(&curseg->curseg_mutex);
1443 }
1444 
1445 /*
1446  * If this function returns success, caller can obtain a new nid
1447  * from second parameter of this function.
1448  * The returned nid could be used ino as well as nid when inode is created.
1449  */
1450 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1451 {
1452 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1453 	struct free_nid *i = NULL;
1454 retry:
1455 	if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
1456 		return false;
1457 
1458 	spin_lock(&nm_i->free_nid_list_lock);
1459 
1460 	/* We should not use stale free nids created by build_free_nids */
1461 	if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
1462 		f2fs_bug_on(list_empty(&nm_i->free_nid_list));
1463 		list_for_each_entry(i, &nm_i->free_nid_list, list)
1464 			if (i->state == NID_NEW)
1465 				break;
1466 
1467 		f2fs_bug_on(i->state != NID_NEW);
1468 		*nid = i->nid;
1469 		i->state = NID_ALLOC;
1470 		nm_i->fcnt--;
1471 		spin_unlock(&nm_i->free_nid_list_lock);
1472 		return true;
1473 	}
1474 	spin_unlock(&nm_i->free_nid_list_lock);
1475 
1476 	/* Let's scan nat pages and its caches to get free nids */
1477 	mutex_lock(&nm_i->build_lock);
1478 	build_free_nids(sbi);
1479 	mutex_unlock(&nm_i->build_lock);
1480 	goto retry;
1481 }
1482 
1483 /*
1484  * alloc_nid() should be called prior to this function.
1485  */
1486 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1487 {
1488 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1489 	struct free_nid *i;
1490 
1491 	spin_lock(&nm_i->free_nid_list_lock);
1492 	i = __lookup_free_nid_list(nm_i, nid);
1493 	f2fs_bug_on(!i || i->state != NID_ALLOC);
1494 	__del_from_free_nid_list(nm_i, i);
1495 	spin_unlock(&nm_i->free_nid_list_lock);
1496 
1497 	kmem_cache_free(free_nid_slab, i);
1498 }
1499 
1500 /*
1501  * alloc_nid() should be called prior to this function.
1502  */
1503 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1504 {
1505 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1506 	struct free_nid *i;
1507 	bool need_free = false;
1508 
1509 	if (!nid)
1510 		return;
1511 
1512 	spin_lock(&nm_i->free_nid_list_lock);
1513 	i = __lookup_free_nid_list(nm_i, nid);
1514 	f2fs_bug_on(!i || i->state != NID_ALLOC);
1515 	if (!available_free_memory(nm_i, FREE_NIDS)) {
1516 		__del_from_free_nid_list(nm_i, i);
1517 		need_free = true;
1518 	} else {
1519 		i->state = NID_NEW;
1520 		nm_i->fcnt++;
1521 	}
1522 	spin_unlock(&nm_i->free_nid_list_lock);
1523 
1524 	if (need_free)
1525 		kmem_cache_free(free_nid_slab, i);
1526 }
1527 
1528 void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1529 		struct f2fs_summary *sum, struct node_info *ni,
1530 		block_t new_blkaddr)
1531 {
1532 	rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1533 	set_node_addr(sbi, ni, new_blkaddr, false);
1534 	clear_node_page_dirty(page);
1535 }
1536 
1537 void recover_inline_xattr(struct inode *inode, struct page *page)
1538 {
1539 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1540 	void *src_addr, *dst_addr;
1541 	size_t inline_size;
1542 	struct page *ipage;
1543 	struct f2fs_inode *ri;
1544 
1545 	if (!f2fs_has_inline_xattr(inode))
1546 		return;
1547 
1548 	if (!IS_INODE(page))
1549 		return;
1550 
1551 	ri = F2FS_INODE(page);
1552 	if (!(ri->i_inline & F2FS_INLINE_XATTR))
1553 		return;
1554 
1555 	ipage = get_node_page(sbi, inode->i_ino);
1556 	f2fs_bug_on(IS_ERR(ipage));
1557 
1558 	dst_addr = inline_xattr_addr(ipage);
1559 	src_addr = inline_xattr_addr(page);
1560 	inline_size = inline_xattr_size(inode);
1561 
1562 	memcpy(dst_addr, src_addr, inline_size);
1563 
1564 	update_inode(inode, ipage);
1565 	f2fs_put_page(ipage, 1);
1566 }
1567 
1568 bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1569 {
1570 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1571 	nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1572 	nid_t new_xnid = nid_of_node(page);
1573 	struct node_info ni;
1574 
1575 	recover_inline_xattr(inode, page);
1576 
1577 	if (!f2fs_has_xattr_block(ofs_of_node(page)))
1578 		return false;
1579 
1580 	/* 1: invalidate the previous xattr nid */
1581 	if (!prev_xnid)
1582 		goto recover_xnid;
1583 
1584 	/* Deallocate node address */
1585 	get_node_info(sbi, prev_xnid, &ni);
1586 	f2fs_bug_on(ni.blk_addr == NULL_ADDR);
1587 	invalidate_blocks(sbi, ni.blk_addr);
1588 	dec_valid_node_count(sbi, inode);
1589 	set_node_addr(sbi, &ni, NULL_ADDR, false);
1590 
1591 recover_xnid:
1592 	/* 2: allocate new xattr nid */
1593 	if (unlikely(!inc_valid_node_count(sbi, inode)))
1594 		f2fs_bug_on(1);
1595 
1596 	remove_free_nid(NM_I(sbi), new_xnid);
1597 	get_node_info(sbi, new_xnid, &ni);
1598 	ni.ino = inode->i_ino;
1599 	set_node_addr(sbi, &ni, NEW_ADDR, false);
1600 	F2FS_I(inode)->i_xattr_nid = new_xnid;
1601 
1602 	/* 3: update xattr blkaddr */
1603 	refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
1604 	set_node_addr(sbi, &ni, blkaddr, false);
1605 
1606 	update_inode_page(inode);
1607 	return true;
1608 }
1609 
1610 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1611 {
1612 	struct f2fs_inode *src, *dst;
1613 	nid_t ino = ino_of_node(page);
1614 	struct node_info old_ni, new_ni;
1615 	struct page *ipage;
1616 
1617 	ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
1618 	if (!ipage)
1619 		return -ENOMEM;
1620 
1621 	/* Should not use this inode  from free nid list */
1622 	remove_free_nid(NM_I(sbi), ino);
1623 
1624 	get_node_info(sbi, ino, &old_ni);
1625 	SetPageUptodate(ipage);
1626 	fill_node_footer(ipage, ino, ino, 0, true);
1627 
1628 	src = F2FS_INODE(page);
1629 	dst = F2FS_INODE(ipage);
1630 
1631 	memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
1632 	dst->i_size = 0;
1633 	dst->i_blocks = cpu_to_le64(1);
1634 	dst->i_links = cpu_to_le32(1);
1635 	dst->i_xattr_nid = 0;
1636 
1637 	new_ni = old_ni;
1638 	new_ni.ino = ino;
1639 
1640 	if (unlikely(!inc_valid_node_count(sbi, NULL)))
1641 		WARN_ON(1);
1642 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1643 	inc_valid_inode_count(sbi);
1644 	f2fs_put_page(ipage, 1);
1645 	return 0;
1646 }
1647 
1648 /*
1649  * ra_sum_pages() merge contiguous pages into one bio and submit.
1650  * these pre-readed pages are linked in pages list.
1651  */
1652 static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
1653 				int start, int nrpages)
1654 {
1655 	struct page *page;
1656 	int page_idx = start;
1657 	struct f2fs_io_info fio = {
1658 		.type = META,
1659 		.rw = READ_SYNC | REQ_META | REQ_PRIO
1660 	};
1661 
1662 	for (; page_idx < start + nrpages; page_idx++) {
1663 		/* alloc temporal page for read node summary info*/
1664 		page = alloc_page(GFP_F2FS_ZERO);
1665 		if (!page)
1666 			break;
1667 
1668 		lock_page(page);
1669 		page->index = page_idx;
1670 		list_add_tail(&page->lru, pages);
1671 	}
1672 
1673 	list_for_each_entry(page, pages, lru)
1674 		f2fs_submit_page_mbio(sbi, page, page->index, &fio);
1675 
1676 	f2fs_submit_merged_bio(sbi, META, READ);
1677 
1678 	return page_idx - start;
1679 }
1680 
1681 int restore_node_summary(struct f2fs_sb_info *sbi,
1682 			unsigned int segno, struct f2fs_summary_block *sum)
1683 {
1684 	struct f2fs_node *rn;
1685 	struct f2fs_summary *sum_entry;
1686 	struct page *page, *tmp;
1687 	block_t addr;
1688 	int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
1689 	int i, last_offset, nrpages, err = 0;
1690 	LIST_HEAD(page_list);
1691 
1692 	/* scan the node segment */
1693 	last_offset = sbi->blocks_per_seg;
1694 	addr = START_BLOCK(sbi, segno);
1695 	sum_entry = &sum->entries[0];
1696 
1697 	for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
1698 		nrpages = min(last_offset - i, bio_blocks);
1699 
1700 		/* read ahead node pages */
1701 		nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages);
1702 		if (!nrpages)
1703 			return -ENOMEM;
1704 
1705 		list_for_each_entry_safe(page, tmp, &page_list, lru) {
1706 			if (err)
1707 				goto skip;
1708 
1709 			lock_page(page);
1710 			if (unlikely(!PageUptodate(page))) {
1711 				err = -EIO;
1712 			} else {
1713 				rn = F2FS_NODE(page);
1714 				sum_entry->nid = rn->footer.nid;
1715 				sum_entry->version = 0;
1716 				sum_entry->ofs_in_node = 0;
1717 				sum_entry++;
1718 			}
1719 			unlock_page(page);
1720 skip:
1721 			list_del(&page->lru);
1722 			__free_pages(page, 0);
1723 		}
1724 	}
1725 	return err;
1726 }
1727 
1728 static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1729 {
1730 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1731 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1732 	struct f2fs_summary_block *sum = curseg->sum_blk;
1733 	int i;
1734 
1735 	mutex_lock(&curseg->curseg_mutex);
1736 
1737 	if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
1738 		mutex_unlock(&curseg->curseg_mutex);
1739 		return false;
1740 	}
1741 
1742 	for (i = 0; i < nats_in_cursum(sum); i++) {
1743 		struct nat_entry *ne;
1744 		struct f2fs_nat_entry raw_ne;
1745 		nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
1746 
1747 		raw_ne = nat_in_journal(sum, i);
1748 retry:
1749 		write_lock(&nm_i->nat_tree_lock);
1750 		ne = __lookup_nat_cache(nm_i, nid);
1751 		if (ne) {
1752 			__set_nat_cache_dirty(nm_i, ne);
1753 			write_unlock(&nm_i->nat_tree_lock);
1754 			continue;
1755 		}
1756 		ne = grab_nat_entry(nm_i, nid);
1757 		if (!ne) {
1758 			write_unlock(&nm_i->nat_tree_lock);
1759 			goto retry;
1760 		}
1761 		nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr));
1762 		nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
1763 		nat_set_version(ne, raw_ne.version);
1764 		__set_nat_cache_dirty(nm_i, ne);
1765 		write_unlock(&nm_i->nat_tree_lock);
1766 	}
1767 	update_nats_in_cursum(sum, -i);
1768 	mutex_unlock(&curseg->curseg_mutex);
1769 	return true;
1770 }
1771 
1772 /*
1773  * This function is called during the checkpointing process.
1774  */
1775 void flush_nat_entries(struct f2fs_sb_info *sbi)
1776 {
1777 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1778 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1779 	struct f2fs_summary_block *sum = curseg->sum_blk;
1780 	struct nat_entry *ne, *cur;
1781 	struct page *page = NULL;
1782 	struct f2fs_nat_block *nat_blk = NULL;
1783 	nid_t start_nid = 0, end_nid = 0;
1784 	bool flushed;
1785 
1786 	flushed = flush_nats_in_journal(sbi);
1787 
1788 	if (!flushed)
1789 		mutex_lock(&curseg->curseg_mutex);
1790 
1791 	/* 1) flush dirty nat caches */
1792 	list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
1793 		nid_t nid;
1794 		struct f2fs_nat_entry raw_ne;
1795 		int offset = -1;
1796 		block_t new_blkaddr;
1797 
1798 		if (nat_get_blkaddr(ne) == NEW_ADDR)
1799 			continue;
1800 
1801 		nid = nat_get_nid(ne);
1802 
1803 		if (flushed)
1804 			goto to_nat_page;
1805 
1806 		/* if there is room for nat enries in curseg->sumpage */
1807 		offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
1808 		if (offset >= 0) {
1809 			raw_ne = nat_in_journal(sum, offset);
1810 			goto flush_now;
1811 		}
1812 to_nat_page:
1813 		if (!page || (start_nid > nid || nid > end_nid)) {
1814 			if (page) {
1815 				f2fs_put_page(page, 1);
1816 				page = NULL;
1817 			}
1818 			start_nid = START_NID(nid);
1819 			end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
1820 
1821 			/*
1822 			 * get nat block with dirty flag, increased reference
1823 			 * count, mapped and lock
1824 			 */
1825 			page = get_next_nat_page(sbi, start_nid);
1826 			nat_blk = page_address(page);
1827 		}
1828 
1829 		f2fs_bug_on(!nat_blk);
1830 		raw_ne = nat_blk->entries[nid - start_nid];
1831 flush_now:
1832 		new_blkaddr = nat_get_blkaddr(ne);
1833 
1834 		raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
1835 		raw_ne.block_addr = cpu_to_le32(new_blkaddr);
1836 		raw_ne.version = nat_get_version(ne);
1837 
1838 		if (offset < 0) {
1839 			nat_blk->entries[nid - start_nid] = raw_ne;
1840 		} else {
1841 			nat_in_journal(sum, offset) = raw_ne;
1842 			nid_in_journal(sum, offset) = cpu_to_le32(nid);
1843 		}
1844 
1845 		if (nat_get_blkaddr(ne) == NULL_ADDR &&
1846 				add_free_nid(NM_I(sbi), nid, false) <= 0) {
1847 			write_lock(&nm_i->nat_tree_lock);
1848 			__del_from_nat_cache(nm_i, ne);
1849 			write_unlock(&nm_i->nat_tree_lock);
1850 		} else {
1851 			write_lock(&nm_i->nat_tree_lock);
1852 			__clear_nat_cache_dirty(nm_i, ne);
1853 			write_unlock(&nm_i->nat_tree_lock);
1854 		}
1855 	}
1856 	if (!flushed)
1857 		mutex_unlock(&curseg->curseg_mutex);
1858 	f2fs_put_page(page, 1);
1859 }
1860 
1861 static int init_node_manager(struct f2fs_sb_info *sbi)
1862 {
1863 	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
1864 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1865 	unsigned char *version_bitmap;
1866 	unsigned int nat_segs, nat_blocks;
1867 
1868 	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
1869 
1870 	/* segment_count_nat includes pair segment so divide to 2. */
1871 	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1872 	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
1873 
1874 	/* not used nids: 0, node, meta, (and root counted as valid node) */
1875 	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3;
1876 	nm_i->fcnt = 0;
1877 	nm_i->nat_cnt = 0;
1878 	nm_i->ram_thresh = DEF_RAM_THRESHOLD;
1879 
1880 	INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
1881 	INIT_LIST_HEAD(&nm_i->free_nid_list);
1882 	INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1883 	INIT_LIST_HEAD(&nm_i->nat_entries);
1884 	INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1885 
1886 	mutex_init(&nm_i->build_lock);
1887 	spin_lock_init(&nm_i->free_nid_list_lock);
1888 	rwlock_init(&nm_i->nat_tree_lock);
1889 
1890 	nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1891 	nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
1892 	version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
1893 	if (!version_bitmap)
1894 		return -EFAULT;
1895 
1896 	nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
1897 					GFP_KERNEL);
1898 	if (!nm_i->nat_bitmap)
1899 		return -ENOMEM;
1900 	return 0;
1901 }
1902 
1903 int build_node_manager(struct f2fs_sb_info *sbi)
1904 {
1905 	int err;
1906 
1907 	sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
1908 	if (!sbi->nm_info)
1909 		return -ENOMEM;
1910 
1911 	err = init_node_manager(sbi);
1912 	if (err)
1913 		return err;
1914 
1915 	build_free_nids(sbi);
1916 	return 0;
1917 }
1918 
1919 void destroy_node_manager(struct f2fs_sb_info *sbi)
1920 {
1921 	struct f2fs_nm_info *nm_i = NM_I(sbi);
1922 	struct free_nid *i, *next_i;
1923 	struct nat_entry *natvec[NATVEC_SIZE];
1924 	nid_t nid = 0;
1925 	unsigned int found;
1926 
1927 	if (!nm_i)
1928 		return;
1929 
1930 	/* destroy free nid list */
1931 	spin_lock(&nm_i->free_nid_list_lock);
1932 	list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
1933 		f2fs_bug_on(i->state == NID_ALLOC);
1934 		__del_from_free_nid_list(nm_i, i);
1935 		nm_i->fcnt--;
1936 		spin_unlock(&nm_i->free_nid_list_lock);
1937 		kmem_cache_free(free_nid_slab, i);
1938 		spin_lock(&nm_i->free_nid_list_lock);
1939 	}
1940 	f2fs_bug_on(nm_i->fcnt);
1941 	spin_unlock(&nm_i->free_nid_list_lock);
1942 
1943 	/* destroy nat cache */
1944 	write_lock(&nm_i->nat_tree_lock);
1945 	while ((found = __gang_lookup_nat_cache(nm_i,
1946 					nid, NATVEC_SIZE, natvec))) {
1947 		unsigned idx;
1948 		nid = nat_get_nid(natvec[found - 1]) + 1;
1949 		for (idx = 0; idx < found; idx++)
1950 			__del_from_nat_cache(nm_i, natvec[idx]);
1951 	}
1952 	f2fs_bug_on(nm_i->nat_cnt);
1953 	write_unlock(&nm_i->nat_tree_lock);
1954 
1955 	kfree(nm_i->nat_bitmap);
1956 	sbi->nm_info = NULL;
1957 	kfree(nm_i);
1958 }
1959 
1960 int __init create_node_manager_caches(void)
1961 {
1962 	nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1963 			sizeof(struct nat_entry));
1964 	if (!nat_entry_slab)
1965 		return -ENOMEM;
1966 
1967 	free_nid_slab = f2fs_kmem_cache_create("free_nid",
1968 			sizeof(struct free_nid));
1969 	if (!free_nid_slab) {
1970 		kmem_cache_destroy(nat_entry_slab);
1971 		return -ENOMEM;
1972 	}
1973 	return 0;
1974 }
1975 
1976 void destroy_node_manager_caches(void)
1977 {
1978 	kmem_cache_destroy(free_nid_slab);
1979 	kmem_cache_destroy(nat_entry_slab);
1980 }
1981