xref: /openbmc/linux/fs/f2fs/recovery.c (revision 6724ed7f)
1 /*
2  * fs/f2fs/recovery.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include "f2fs.h"
14 #include "node.h"
15 #include "segment.h"
16 
17 /*
18  * Roll forward recovery scenarios.
19  *
20  * [Term] F: fsync_mark, D: dentry_mark
21  *
22  * 1. inode(x) | CP | inode(x) | dnode(F)
23  * -> Update the latest inode(x).
24  *
25  * 2. inode(x) | CP | inode(F) | dnode(F)
26  * -> No problem.
27  *
28  * 3. inode(x) | CP | dnode(F) | inode(x)
29  * -> Recover to the latest dnode(F), and drop the last inode(x)
30  *
31  * 4. inode(x) | CP | dnode(F) | inode(F)
32  * -> No problem.
33  *
34  * 5. CP | inode(x) | dnode(F)
35  * -> The inode(DF) was missing. Should drop this dnode(F).
36  *
37  * 6. CP | inode(DF) | dnode(F)
38  * -> No problem.
39  *
40  * 7. CP | dnode(F) | inode(DF)
41  * -> If f2fs_iget fails, then goto next to find inode(DF).
42  *
43  * 8. CP | dnode(F) | inode(x)
44  * -> If f2fs_iget fails, then goto next to find inode(DF).
45  *    But it will fail due to no inode(DF).
46  */
47 
48 static struct kmem_cache *fsync_entry_slab;
49 
50 bool space_for_roll_forward(struct f2fs_sb_info *sbi)
51 {
52 	s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
53 
54 	if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
55 		return false;
56 	return true;
57 }
58 
59 static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
60 								nid_t ino)
61 {
62 	struct fsync_inode_entry *entry;
63 
64 	list_for_each_entry(entry, head, list)
65 		if (entry->inode->i_ino == ino)
66 			return entry;
67 
68 	return NULL;
69 }
70 
71 static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
72 			struct list_head *head, nid_t ino, bool quota_inode)
73 {
74 	struct inode *inode;
75 	struct fsync_inode_entry *entry;
76 	int err;
77 
78 	inode = f2fs_iget_retry(sbi->sb, ino);
79 	if (IS_ERR(inode))
80 		return ERR_CAST(inode);
81 
82 	err = dquot_initialize(inode);
83 	if (err)
84 		goto err_out;
85 
86 	if (quota_inode) {
87 		err = dquot_alloc_inode(inode);
88 		if (err)
89 			goto err_out;
90 	}
91 
92 	entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
93 	entry->inode = inode;
94 	list_add_tail(&entry->list, head);
95 
96 	return entry;
97 err_out:
98 	iput(inode);
99 	return ERR_PTR(err);
100 }
101 
102 static void del_fsync_inode(struct fsync_inode_entry *entry)
103 {
104 	iput(entry->inode);
105 	list_del(&entry->list);
106 	kmem_cache_free(fsync_entry_slab, entry);
107 }
108 
109 static int recover_dentry(struct inode *inode, struct page *ipage,
110 						struct list_head *dir_list)
111 {
112 	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
113 	nid_t pino = le32_to_cpu(raw_inode->i_pino);
114 	struct f2fs_dir_entry *de;
115 	struct fscrypt_name fname;
116 	struct page *page;
117 	struct inode *dir, *einode;
118 	struct fsync_inode_entry *entry;
119 	int err = 0;
120 	char *name;
121 
122 	entry = get_fsync_inode(dir_list, pino);
123 	if (!entry) {
124 		entry = add_fsync_inode(F2FS_I_SB(inode), dir_list,
125 							pino, false);
126 		if (IS_ERR(entry)) {
127 			dir = ERR_CAST(entry);
128 			err = PTR_ERR(entry);
129 			goto out;
130 		}
131 	}
132 
133 	dir = entry->inode;
134 
135 	memset(&fname, 0, sizeof(struct fscrypt_name));
136 	fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
137 	fname.disk_name.name = raw_inode->i_name;
138 
139 	if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
140 		WARN_ON(1);
141 		err = -ENAMETOOLONG;
142 		goto out;
143 	}
144 retry:
145 	de = __f2fs_find_entry(dir, &fname, &page);
146 	if (de && inode->i_ino == le32_to_cpu(de->ino))
147 		goto out_unmap_put;
148 
149 	if (de) {
150 		einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
151 		if (IS_ERR(einode)) {
152 			WARN_ON(1);
153 			err = PTR_ERR(einode);
154 			if (err == -ENOENT)
155 				err = -EEXIST;
156 			goto out_unmap_put;
157 		}
158 
159 		err = dquot_initialize(einode);
160 		if (err) {
161 			iput(einode);
162 			goto out_unmap_put;
163 		}
164 
165 		err = acquire_orphan_inode(F2FS_I_SB(inode));
166 		if (err) {
167 			iput(einode);
168 			goto out_unmap_put;
169 		}
170 		f2fs_delete_entry(de, page, dir, einode);
171 		iput(einode);
172 		goto retry;
173 	} else if (IS_ERR(page)) {
174 		err = PTR_ERR(page);
175 	} else {
176 		err = __f2fs_do_add_link(dir, &fname, inode,
177 					inode->i_ino, inode->i_mode);
178 	}
179 	if (err == -ENOMEM)
180 		goto retry;
181 	goto out;
182 
183 out_unmap_put:
184 	f2fs_dentry_kunmap(dir, page);
185 	f2fs_put_page(page, 0);
186 out:
187 	if (file_enc_name(inode))
188 		name = "<encrypted>";
189 	else
190 		name = raw_inode->i_name;
191 	f2fs_msg(inode->i_sb, KERN_NOTICE,
192 			"%s: ino = %x, name = %s, dir = %lx, err = %d",
193 			__func__, ino_of_node(ipage), name,
194 			IS_ERR(dir) ? 0 : dir->i_ino, err);
195 	return err;
196 }
197 
198 static void recover_inode(struct inode *inode, struct page *page)
199 {
200 	struct f2fs_inode *raw = F2FS_INODE(page);
201 	char *name;
202 
203 	inode->i_mode = le16_to_cpu(raw->i_mode);
204 	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
205 	inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
206 	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
207 	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
208 	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
209 	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
210 	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
211 
212 	F2FS_I(inode)->i_advise = raw->i_advise;
213 
214 	if (file_enc_name(inode))
215 		name = "<encrypted>";
216 	else
217 		name = F2FS_INODE(page)->i_name;
218 
219 	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
220 			ino_of_node(page), name);
221 }
222 
223 static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
224 				bool check_only)
225 {
226 	struct curseg_info *curseg;
227 	struct page *page = NULL;
228 	block_t blkaddr;
229 	int err = 0;
230 
231 	/* get node pages in the current segment */
232 	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
233 	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
234 
235 	while (1) {
236 		struct fsync_inode_entry *entry;
237 
238 		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
239 			return 0;
240 
241 		page = get_tmp_page(sbi, blkaddr);
242 
243 		if (!is_recoverable_dnode(page))
244 			break;
245 
246 		if (!is_fsync_dnode(page))
247 			goto next;
248 
249 		entry = get_fsync_inode(head, ino_of_node(page));
250 		if (!entry) {
251 			bool quota_inode = false;
252 
253 			if (!check_only &&
254 					IS_INODE(page) && is_dent_dnode(page)) {
255 				err = recover_inode_page(sbi, page);
256 				if (err)
257 					break;
258 				quota_inode = true;
259 			}
260 
261 			/*
262 			 * CP | dnode(F) | inode(DF)
263 			 * For this case, we should not give up now.
264 			 */
265 			entry = add_fsync_inode(sbi, head, ino_of_node(page),
266 								quota_inode);
267 			if (IS_ERR(entry)) {
268 				err = PTR_ERR(entry);
269 				if (err == -ENOENT) {
270 					err = 0;
271 					goto next;
272 				}
273 				break;
274 			}
275 		}
276 		entry->blkaddr = blkaddr;
277 
278 		if (IS_INODE(page) && is_dent_dnode(page))
279 			entry->last_dentry = blkaddr;
280 next:
281 		/* check next segment */
282 		blkaddr = next_blkaddr_of_node(page);
283 		f2fs_put_page(page, 1);
284 
285 		ra_meta_pages_cond(sbi, blkaddr);
286 	}
287 	f2fs_put_page(page, 1);
288 	return err;
289 }
290 
291 static void destroy_fsync_dnodes(struct list_head *head)
292 {
293 	struct fsync_inode_entry *entry, *tmp;
294 
295 	list_for_each_entry_safe(entry, tmp, head, list)
296 		del_fsync_inode(entry);
297 }
298 
299 static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
300 			block_t blkaddr, struct dnode_of_data *dn)
301 {
302 	struct seg_entry *sentry;
303 	unsigned int segno = GET_SEGNO(sbi, blkaddr);
304 	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
305 	struct f2fs_summary_block *sum_node;
306 	struct f2fs_summary sum;
307 	struct page *sum_page, *node_page;
308 	struct dnode_of_data tdn = *dn;
309 	nid_t ino, nid;
310 	struct inode *inode;
311 	unsigned int offset;
312 	block_t bidx;
313 	int i;
314 
315 	sentry = get_seg_entry(sbi, segno);
316 	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
317 		return 0;
318 
319 	/* Get the previous summary */
320 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
321 		struct curseg_info *curseg = CURSEG_I(sbi, i);
322 		if (curseg->segno == segno) {
323 			sum = curseg->sum_blk->entries[blkoff];
324 			goto got_it;
325 		}
326 	}
327 
328 	sum_page = get_sum_page(sbi, segno);
329 	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
330 	sum = sum_node->entries[blkoff];
331 	f2fs_put_page(sum_page, 1);
332 got_it:
333 	/* Use the locked dnode page and inode */
334 	nid = le32_to_cpu(sum.nid);
335 	if (dn->inode->i_ino == nid) {
336 		tdn.nid = nid;
337 		if (!dn->inode_page_locked)
338 			lock_page(dn->inode_page);
339 		tdn.node_page = dn->inode_page;
340 		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
341 		goto truncate_out;
342 	} else if (dn->nid == nid) {
343 		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
344 		goto truncate_out;
345 	}
346 
347 	/* Get the node page */
348 	node_page = get_node_page(sbi, nid);
349 	if (IS_ERR(node_page))
350 		return PTR_ERR(node_page);
351 
352 	offset = ofs_of_node(node_page);
353 	ino = ino_of_node(node_page);
354 	f2fs_put_page(node_page, 1);
355 
356 	if (ino != dn->inode->i_ino) {
357 		int ret;
358 
359 		/* Deallocate previous index in the node page */
360 		inode = f2fs_iget_retry(sbi->sb, ino);
361 		if (IS_ERR(inode))
362 			return PTR_ERR(inode);
363 
364 		ret = dquot_initialize(inode);
365 		if (ret) {
366 			iput(inode);
367 			return ret;
368 		}
369 	} else {
370 		inode = dn->inode;
371 	}
372 
373 	bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
374 
375 	/*
376 	 * if inode page is locked, unlock temporarily, but its reference
377 	 * count keeps alive.
378 	 */
379 	if (ino == dn->inode->i_ino && dn->inode_page_locked)
380 		unlock_page(dn->inode_page);
381 
382 	set_new_dnode(&tdn, inode, NULL, NULL, 0);
383 	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
384 		goto out;
385 
386 	if (tdn.data_blkaddr == blkaddr)
387 		truncate_data_blocks_range(&tdn, 1);
388 
389 	f2fs_put_dnode(&tdn);
390 out:
391 	if (ino != dn->inode->i_ino)
392 		iput(inode);
393 	else if (dn->inode_page_locked)
394 		lock_page(dn->inode_page);
395 	return 0;
396 
397 truncate_out:
398 	if (datablock_addr(tdn.inode, tdn.node_page,
399 					tdn.ofs_in_node) == blkaddr)
400 		truncate_data_blocks_range(&tdn, 1);
401 	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
402 		unlock_page(dn->inode_page);
403 	return 0;
404 }
405 
406 static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
407 					struct page *page, block_t blkaddr)
408 {
409 	struct dnode_of_data dn;
410 	struct node_info ni;
411 	unsigned int start, end;
412 	int err = 0, recovered = 0;
413 
414 	/* step 1: recover xattr */
415 	if (IS_INODE(page)) {
416 		recover_inline_xattr(inode, page);
417 	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
418 		err = recover_xattr_data(inode, page, blkaddr);
419 		if (!err)
420 			recovered++;
421 		goto out;
422 	}
423 
424 	/* step 2: recover inline data */
425 	if (recover_inline_data(inode, page))
426 		goto out;
427 
428 	/* step 3: recover data indices */
429 	start = start_bidx_of_node(ofs_of_node(page), inode);
430 	end = start + ADDRS_PER_PAGE(page, inode);
431 
432 	set_new_dnode(&dn, inode, NULL, NULL, 0);
433 retry_dn:
434 	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
435 	if (err) {
436 		if (err == -ENOMEM) {
437 			congestion_wait(BLK_RW_ASYNC, HZ/50);
438 			goto retry_dn;
439 		}
440 		goto out;
441 	}
442 
443 	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
444 
445 	get_node_info(sbi, dn.nid, &ni);
446 	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
447 	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
448 
449 	for (; start < end; start++, dn.ofs_in_node++) {
450 		block_t src, dest;
451 
452 		src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
453 		dest = datablock_addr(dn.inode, page, dn.ofs_in_node);
454 
455 		/* skip recovering if dest is the same as src */
456 		if (src == dest)
457 			continue;
458 
459 		/* dest is invalid, just invalidate src block */
460 		if (dest == NULL_ADDR) {
461 			truncate_data_blocks_range(&dn, 1);
462 			continue;
463 		}
464 
465 		if (!file_keep_isize(inode) &&
466 			(i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
467 			f2fs_i_size_write(inode,
468 				(loff_t)(start + 1) << PAGE_SHIFT);
469 
470 		/*
471 		 * dest is reserved block, invalidate src block
472 		 * and then reserve one new block in dnode page.
473 		 */
474 		if (dest == NEW_ADDR) {
475 			truncate_data_blocks_range(&dn, 1);
476 			reserve_new_block(&dn);
477 			continue;
478 		}
479 
480 		/* dest is valid block, try to recover from src to dest */
481 		if (is_valid_blkaddr(sbi, dest, META_POR)) {
482 
483 			if (src == NULL_ADDR) {
484 				err = reserve_new_block(&dn);
485 #ifdef CONFIG_F2FS_FAULT_INJECTION
486 				while (err)
487 					err = reserve_new_block(&dn);
488 #endif
489 				/* We should not get -ENOSPC */
490 				f2fs_bug_on(sbi, err);
491 				if (err)
492 					goto err;
493 			}
494 retry_prev:
495 			/* Check the previous node page having this index */
496 			err = check_index_in_prev_nodes(sbi, dest, &dn);
497 			if (err) {
498 				if (err == -ENOMEM) {
499 					congestion_wait(BLK_RW_ASYNC, HZ/50);
500 					goto retry_prev;
501 				}
502 				goto err;
503 			}
504 
505 			/* write dummy data page */
506 			f2fs_replace_block(sbi, &dn, src, dest,
507 						ni.version, false, false);
508 			recovered++;
509 		}
510 	}
511 
512 	copy_node_footer(dn.node_page, page);
513 	fill_node_footer(dn.node_page, dn.nid, ni.ino,
514 					ofs_of_node(page), false);
515 	set_page_dirty(dn.node_page);
516 err:
517 	f2fs_put_dnode(&dn);
518 out:
519 	f2fs_msg(sbi->sb, KERN_NOTICE,
520 		"recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
521 		inode->i_ino,
522 		file_keep_isize(inode) ? "keep" : "recover",
523 		recovered, err);
524 	return err;
525 }
526 
527 static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
528 						struct list_head *dir_list)
529 {
530 	struct curseg_info *curseg;
531 	struct page *page = NULL;
532 	int err = 0;
533 	block_t blkaddr;
534 
535 	/* get node pages in the current segment */
536 	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
537 	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
538 
539 	while (1) {
540 		struct fsync_inode_entry *entry;
541 
542 		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
543 			break;
544 
545 		ra_meta_pages_cond(sbi, blkaddr);
546 
547 		page = get_tmp_page(sbi, blkaddr);
548 
549 		if (!is_recoverable_dnode(page)) {
550 			f2fs_put_page(page, 1);
551 			break;
552 		}
553 
554 		entry = get_fsync_inode(inode_list, ino_of_node(page));
555 		if (!entry)
556 			goto next;
557 		/*
558 		 * inode(x) | CP | inode(x) | dnode(F)
559 		 * In this case, we can lose the latest inode(x).
560 		 * So, call recover_inode for the inode update.
561 		 */
562 		if (IS_INODE(page))
563 			recover_inode(entry->inode, page);
564 		if (entry->last_dentry == blkaddr) {
565 			err = recover_dentry(entry->inode, page, dir_list);
566 			if (err) {
567 				f2fs_put_page(page, 1);
568 				break;
569 			}
570 		}
571 		err = do_recover_data(sbi, entry->inode, page, blkaddr);
572 		if (err) {
573 			f2fs_put_page(page, 1);
574 			break;
575 		}
576 
577 		if (entry->blkaddr == blkaddr)
578 			del_fsync_inode(entry);
579 next:
580 		/* check next segment */
581 		blkaddr = next_blkaddr_of_node(page);
582 		f2fs_put_page(page, 1);
583 	}
584 	if (!err)
585 		allocate_new_segments(sbi);
586 	return err;
587 }
588 
589 int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
590 {
591 	struct list_head inode_list;
592 	struct list_head dir_list;
593 	int err;
594 	int ret = 0;
595 	unsigned long s_flags = sbi->sb->s_flags;
596 	bool need_writecp = false;
597 #ifdef CONFIG_QUOTA
598 	int quota_enabled;
599 #endif
600 
601 	if (s_flags & SB_RDONLY) {
602 		f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
603 		sbi->sb->s_flags &= ~SB_RDONLY;
604 	}
605 
606 #ifdef CONFIG_QUOTA
607 	/* Needed for iput() to work correctly and not trash data */
608 	sbi->sb->s_flags |= SB_ACTIVE;
609 	/* Turn on quotas so that they are updated correctly */
610 	quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
611 #endif
612 
613 	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
614 			sizeof(struct fsync_inode_entry));
615 	if (!fsync_entry_slab) {
616 		err = -ENOMEM;
617 		goto out;
618 	}
619 
620 	INIT_LIST_HEAD(&inode_list);
621 	INIT_LIST_HEAD(&dir_list);
622 
623 	/* prevent checkpoint */
624 	mutex_lock(&sbi->cp_mutex);
625 
626 	/* step #1: find fsynced inode numbers */
627 	err = find_fsync_dnodes(sbi, &inode_list, check_only);
628 	if (err || list_empty(&inode_list))
629 		goto skip;
630 
631 	if (check_only) {
632 		ret = 1;
633 		goto skip;
634 	}
635 
636 	need_writecp = true;
637 
638 	/* step #2: recover data */
639 	err = recover_data(sbi, &inode_list, &dir_list);
640 	if (!err)
641 		f2fs_bug_on(sbi, !list_empty(&inode_list));
642 skip:
643 	destroy_fsync_dnodes(&inode_list);
644 
645 	/* truncate meta pages to be used by the recovery */
646 	truncate_inode_pages_range(META_MAPPING(sbi),
647 			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
648 
649 	if (err) {
650 		truncate_inode_pages_final(NODE_MAPPING(sbi));
651 		truncate_inode_pages_final(META_MAPPING(sbi));
652 	}
653 
654 	clear_sbi_flag(sbi, SBI_POR_DOING);
655 	mutex_unlock(&sbi->cp_mutex);
656 
657 	/* let's drop all the directory inodes for clean checkpoint */
658 	destroy_fsync_dnodes(&dir_list);
659 
660 	if (!err && need_writecp) {
661 		struct cp_control cpc = {
662 			.reason = CP_RECOVERY,
663 		};
664 		err = write_checkpoint(sbi, &cpc);
665 	}
666 
667 	kmem_cache_destroy(fsync_entry_slab);
668 out:
669 #ifdef CONFIG_QUOTA
670 	/* Turn quotas off */
671 	if (quota_enabled)
672 		f2fs_quota_off_umount(sbi->sb);
673 #endif
674 	sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
675 
676 	return ret ? ret: err;
677 }
678