xref: /openbmc/linux/fs/f2fs/recovery.c (revision f3a8b664)
1 /*
2  * fs/f2fs/recovery.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include "f2fs.h"
14 #include "node.h"
15 #include "segment.h"
16 
17 /*
18  * Roll forward recovery scenarios.
19  *
20  * [Term] F: fsync_mark, D: dentry_mark
21  *
22  * 1. inode(x) | CP | inode(x) | dnode(F)
23  * -> Update the latest inode(x).
24  *
25  * 2. inode(x) | CP | inode(F) | dnode(F)
26  * -> No problem.
27  *
28  * 3. inode(x) | CP | dnode(F) | inode(x)
29  * -> Recover to the latest dnode(F), and drop the last inode(x)
30  *
31  * 4. inode(x) | CP | dnode(F) | inode(F)
32  * -> No problem.
33  *
34  * 5. CP | inode(x) | dnode(F)
35  * -> The inode(DF) was missing. Should drop this dnode(F).
36  *
37  * 6. CP | inode(DF) | dnode(F)
38  * -> No problem.
39  *
40  * 7. CP | dnode(F) | inode(DF)
41  * -> If f2fs_iget fails, then goto next to find inode(DF).
42  *
43  * 8. CP | dnode(F) | inode(x)
44  * -> If f2fs_iget fails, then goto next to find inode(DF).
45  *    But it will fail due to no inode(DF).
46  */
47 
48 static struct kmem_cache *fsync_entry_slab;
49 
50 bool space_for_roll_forward(struct f2fs_sb_info *sbi)
51 {
52 	s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
53 
54 	if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
55 		return false;
56 	return true;
57 }
58 
59 static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
60 								nid_t ino)
61 {
62 	struct fsync_inode_entry *entry;
63 
64 	list_for_each_entry(entry, head, list)
65 		if (entry->inode->i_ino == ino)
66 			return entry;
67 
68 	return NULL;
69 }
70 
71 static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
72 					struct list_head *head, nid_t ino)
73 {
74 	struct inode *inode;
75 	struct fsync_inode_entry *entry;
76 
77 	inode = f2fs_iget_retry(sbi->sb, ino);
78 	if (IS_ERR(inode))
79 		return ERR_CAST(inode);
80 
81 	entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
82 	entry->inode = inode;
83 	list_add_tail(&entry->list, head);
84 
85 	return entry;
86 }
87 
88 static void del_fsync_inode(struct fsync_inode_entry *entry)
89 {
90 	iput(entry->inode);
91 	list_del(&entry->list);
92 	kmem_cache_free(fsync_entry_slab, entry);
93 }
94 
95 static int recover_dentry(struct inode *inode, struct page *ipage,
96 						struct list_head *dir_list)
97 {
98 	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
99 	nid_t pino = le32_to_cpu(raw_inode->i_pino);
100 	struct f2fs_dir_entry *de;
101 	struct fscrypt_name fname;
102 	struct page *page;
103 	struct inode *dir, *einode;
104 	struct fsync_inode_entry *entry;
105 	int err = 0;
106 	char *name;
107 
108 	entry = get_fsync_inode(dir_list, pino);
109 	if (!entry) {
110 		entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino);
111 		if (IS_ERR(entry)) {
112 			dir = ERR_CAST(entry);
113 			err = PTR_ERR(entry);
114 			goto out;
115 		}
116 	}
117 
118 	dir = entry->inode;
119 
120 	memset(&fname, 0, sizeof(struct fscrypt_name));
121 	fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
122 	fname.disk_name.name = raw_inode->i_name;
123 
124 	if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
125 		WARN_ON(1);
126 		err = -ENAMETOOLONG;
127 		goto out;
128 	}
129 retry:
130 	de = __f2fs_find_entry(dir, &fname, &page);
131 	if (de && inode->i_ino == le32_to_cpu(de->ino))
132 		goto out_unmap_put;
133 
134 	if (de) {
135 		einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
136 		if (IS_ERR(einode)) {
137 			WARN_ON(1);
138 			err = PTR_ERR(einode);
139 			if (err == -ENOENT)
140 				err = -EEXIST;
141 			goto out_unmap_put;
142 		}
143 		err = acquire_orphan_inode(F2FS_I_SB(inode));
144 		if (err) {
145 			iput(einode);
146 			goto out_unmap_put;
147 		}
148 		f2fs_delete_entry(de, page, dir, einode);
149 		iput(einode);
150 		goto retry;
151 	} else if (IS_ERR(page)) {
152 		err = PTR_ERR(page);
153 	} else {
154 		err = __f2fs_do_add_link(dir, &fname, inode,
155 					inode->i_ino, inode->i_mode);
156 	}
157 	if (err == -ENOMEM)
158 		goto retry;
159 	goto out;
160 
161 out_unmap_put:
162 	f2fs_dentry_kunmap(dir, page);
163 	f2fs_put_page(page, 0);
164 out:
165 	if (file_enc_name(inode))
166 		name = "<encrypted>";
167 	else
168 		name = raw_inode->i_name;
169 	f2fs_msg(inode->i_sb, KERN_NOTICE,
170 			"%s: ino = %x, name = %s, dir = %lx, err = %d",
171 			__func__, ino_of_node(ipage), name,
172 			IS_ERR(dir) ? 0 : dir->i_ino, err);
173 	return err;
174 }
175 
176 static void recover_inode(struct inode *inode, struct page *page)
177 {
178 	struct f2fs_inode *raw = F2FS_INODE(page);
179 	char *name;
180 
181 	inode->i_mode = le16_to_cpu(raw->i_mode);
182 	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
183 	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
184 	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
185 	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
186 	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
187 	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
188 	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
189 
190 	if (file_enc_name(inode))
191 		name = "<encrypted>";
192 	else
193 		name = F2FS_INODE(page)->i_name;
194 
195 	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
196 			ino_of_node(page), name);
197 }
198 
199 static bool is_same_inode(struct inode *inode, struct page *ipage)
200 {
201 	struct f2fs_inode *ri = F2FS_INODE(ipage);
202 	struct timespec disk;
203 
204 	if (!IS_INODE(ipage))
205 		return true;
206 
207 	disk.tv_sec = le64_to_cpu(ri->i_ctime);
208 	disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
209 	if (timespec_compare(&inode->i_ctime, &disk) > 0)
210 		return false;
211 
212 	disk.tv_sec = le64_to_cpu(ri->i_atime);
213 	disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
214 	if (timespec_compare(&inode->i_atime, &disk) > 0)
215 		return false;
216 
217 	disk.tv_sec = le64_to_cpu(ri->i_mtime);
218 	disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
219 	if (timespec_compare(&inode->i_mtime, &disk) > 0)
220 		return false;
221 
222 	return true;
223 }
224 
225 static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
226 {
227 	struct curseg_info *curseg;
228 	struct page *page = NULL;
229 	block_t blkaddr;
230 	int err = 0;
231 
232 	/* get node pages in the current segment */
233 	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
234 	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
235 
236 	while (1) {
237 		struct fsync_inode_entry *entry;
238 
239 		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
240 			return 0;
241 
242 		page = get_tmp_page(sbi, blkaddr);
243 
244 		if (!is_recoverable_dnode(page))
245 			break;
246 
247 		if (!is_fsync_dnode(page))
248 			goto next;
249 
250 		entry = get_fsync_inode(head, ino_of_node(page));
251 		if (entry) {
252 			if (!is_same_inode(entry->inode, page))
253 				goto next;
254 		} else {
255 			if (IS_INODE(page) && is_dent_dnode(page)) {
256 				err = recover_inode_page(sbi, page);
257 				if (err)
258 					break;
259 			}
260 
261 			/*
262 			 * CP | dnode(F) | inode(DF)
263 			 * For this case, we should not give up now.
264 			 */
265 			entry = add_fsync_inode(sbi, head, ino_of_node(page));
266 			if (IS_ERR(entry)) {
267 				err = PTR_ERR(entry);
268 				if (err == -ENOENT) {
269 					err = 0;
270 					goto next;
271 				}
272 				break;
273 			}
274 		}
275 		entry->blkaddr = blkaddr;
276 
277 		if (IS_INODE(page) && is_dent_dnode(page))
278 			entry->last_dentry = blkaddr;
279 next:
280 		/* check next segment */
281 		blkaddr = next_blkaddr_of_node(page);
282 		f2fs_put_page(page, 1);
283 
284 		ra_meta_pages_cond(sbi, blkaddr);
285 	}
286 	f2fs_put_page(page, 1);
287 	return err;
288 }
289 
290 static void destroy_fsync_dnodes(struct list_head *head)
291 {
292 	struct fsync_inode_entry *entry, *tmp;
293 
294 	list_for_each_entry_safe(entry, tmp, head, list)
295 		del_fsync_inode(entry);
296 }
297 
298 static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
299 			block_t blkaddr, struct dnode_of_data *dn)
300 {
301 	struct seg_entry *sentry;
302 	unsigned int segno = GET_SEGNO(sbi, blkaddr);
303 	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
304 	struct f2fs_summary_block *sum_node;
305 	struct f2fs_summary sum;
306 	struct page *sum_page, *node_page;
307 	struct dnode_of_data tdn = *dn;
308 	nid_t ino, nid;
309 	struct inode *inode;
310 	unsigned int offset;
311 	block_t bidx;
312 	int i;
313 
314 	sentry = get_seg_entry(sbi, segno);
315 	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
316 		return 0;
317 
318 	/* Get the previous summary */
319 	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
320 		struct curseg_info *curseg = CURSEG_I(sbi, i);
321 		if (curseg->segno == segno) {
322 			sum = curseg->sum_blk->entries[blkoff];
323 			goto got_it;
324 		}
325 	}
326 
327 	sum_page = get_sum_page(sbi, segno);
328 	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
329 	sum = sum_node->entries[blkoff];
330 	f2fs_put_page(sum_page, 1);
331 got_it:
332 	/* Use the locked dnode page and inode */
333 	nid = le32_to_cpu(sum.nid);
334 	if (dn->inode->i_ino == nid) {
335 		tdn.nid = nid;
336 		if (!dn->inode_page_locked)
337 			lock_page(dn->inode_page);
338 		tdn.node_page = dn->inode_page;
339 		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
340 		goto truncate_out;
341 	} else if (dn->nid == nid) {
342 		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
343 		goto truncate_out;
344 	}
345 
346 	/* Get the node page */
347 	node_page = get_node_page(sbi, nid);
348 	if (IS_ERR(node_page))
349 		return PTR_ERR(node_page);
350 
351 	offset = ofs_of_node(node_page);
352 	ino = ino_of_node(node_page);
353 	f2fs_put_page(node_page, 1);
354 
355 	if (ino != dn->inode->i_ino) {
356 		/* Deallocate previous index in the node page */
357 		inode = f2fs_iget_retry(sbi->sb, ino);
358 		if (IS_ERR(inode))
359 			return PTR_ERR(inode);
360 	} else {
361 		inode = dn->inode;
362 	}
363 
364 	bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
365 
366 	/*
367 	 * if inode page is locked, unlock temporarily, but its reference
368 	 * count keeps alive.
369 	 */
370 	if (ino == dn->inode->i_ino && dn->inode_page_locked)
371 		unlock_page(dn->inode_page);
372 
373 	set_new_dnode(&tdn, inode, NULL, NULL, 0);
374 	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
375 		goto out;
376 
377 	if (tdn.data_blkaddr == blkaddr)
378 		truncate_data_blocks_range(&tdn, 1);
379 
380 	f2fs_put_dnode(&tdn);
381 out:
382 	if (ino != dn->inode->i_ino)
383 		iput(inode);
384 	else if (dn->inode_page_locked)
385 		lock_page(dn->inode_page);
386 	return 0;
387 
388 truncate_out:
389 	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
390 		truncate_data_blocks_range(&tdn, 1);
391 	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
392 		unlock_page(dn->inode_page);
393 	return 0;
394 }
395 
396 static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
397 					struct page *page, block_t blkaddr)
398 {
399 	struct dnode_of_data dn;
400 	struct node_info ni;
401 	unsigned int start, end;
402 	int err = 0, recovered = 0;
403 
404 	/* step 1: recover xattr */
405 	if (IS_INODE(page)) {
406 		recover_inline_xattr(inode, page);
407 	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
408 		/*
409 		 * Deprecated; xattr blocks should be found from cold log.
410 		 * But, we should remain this for backward compatibility.
411 		 */
412 		recover_xattr_data(inode, page, blkaddr);
413 		goto out;
414 	}
415 
416 	/* step 2: recover inline data */
417 	if (recover_inline_data(inode, page))
418 		goto out;
419 
420 	/* step 3: recover data indices */
421 	start = start_bidx_of_node(ofs_of_node(page), inode);
422 	end = start + ADDRS_PER_PAGE(page, inode);
423 
424 	set_new_dnode(&dn, inode, NULL, NULL, 0);
425 retry_dn:
426 	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
427 	if (err) {
428 		if (err == -ENOMEM) {
429 			congestion_wait(BLK_RW_ASYNC, HZ/50);
430 			goto retry_dn;
431 		}
432 		goto out;
433 	}
434 
435 	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
436 
437 	get_node_info(sbi, dn.nid, &ni);
438 	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
439 	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
440 
441 	for (; start < end; start++, dn.ofs_in_node++) {
442 		block_t src, dest;
443 
444 		src = datablock_addr(dn.node_page, dn.ofs_in_node);
445 		dest = datablock_addr(page, dn.ofs_in_node);
446 
447 		/* skip recovering if dest is the same as src */
448 		if (src == dest)
449 			continue;
450 
451 		/* dest is invalid, just invalidate src block */
452 		if (dest == NULL_ADDR) {
453 			truncate_data_blocks_range(&dn, 1);
454 			continue;
455 		}
456 
457 		if ((start + 1) << PAGE_SHIFT > i_size_read(inode))
458 			f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
459 
460 		/*
461 		 * dest is reserved block, invalidate src block
462 		 * and then reserve one new block in dnode page.
463 		 */
464 		if (dest == NEW_ADDR) {
465 			truncate_data_blocks_range(&dn, 1);
466 			reserve_new_block(&dn);
467 			continue;
468 		}
469 
470 		/* dest is valid block, try to recover from src to dest */
471 		if (is_valid_blkaddr(sbi, dest, META_POR)) {
472 
473 			if (src == NULL_ADDR) {
474 				err = reserve_new_block(&dn);
475 #ifdef CONFIG_F2FS_FAULT_INJECTION
476 				while (err)
477 					err = reserve_new_block(&dn);
478 #endif
479 				/* We should not get -ENOSPC */
480 				f2fs_bug_on(sbi, err);
481 				if (err)
482 					goto err;
483 			}
484 retry_prev:
485 			/* Check the previous node page having this index */
486 			err = check_index_in_prev_nodes(sbi, dest, &dn);
487 			if (err) {
488 				if (err == -ENOMEM) {
489 					congestion_wait(BLK_RW_ASYNC, HZ/50);
490 					goto retry_prev;
491 				}
492 				goto err;
493 			}
494 
495 			/* write dummy data page */
496 			f2fs_replace_block(sbi, &dn, src, dest,
497 						ni.version, false, false);
498 			recovered++;
499 		}
500 	}
501 
502 	copy_node_footer(dn.node_page, page);
503 	fill_node_footer(dn.node_page, dn.nid, ni.ino,
504 					ofs_of_node(page), false);
505 	set_page_dirty(dn.node_page);
506 err:
507 	f2fs_put_dnode(&dn);
508 out:
509 	f2fs_msg(sbi->sb, KERN_NOTICE,
510 		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
511 		inode->i_ino, recovered, err);
512 	return err;
513 }
514 
515 static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
516 						struct list_head *dir_list)
517 {
518 	struct curseg_info *curseg;
519 	struct page *page = NULL;
520 	int err = 0;
521 	block_t blkaddr;
522 
523 	/* get node pages in the current segment */
524 	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
525 	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
526 
527 	while (1) {
528 		struct fsync_inode_entry *entry;
529 
530 		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
531 			break;
532 
533 		ra_meta_pages_cond(sbi, blkaddr);
534 
535 		page = get_tmp_page(sbi, blkaddr);
536 
537 		if (!is_recoverable_dnode(page)) {
538 			f2fs_put_page(page, 1);
539 			break;
540 		}
541 
542 		entry = get_fsync_inode(inode_list, ino_of_node(page));
543 		if (!entry)
544 			goto next;
545 		/*
546 		 * inode(x) | CP | inode(x) | dnode(F)
547 		 * In this case, we can lose the latest inode(x).
548 		 * So, call recover_inode for the inode update.
549 		 */
550 		if (IS_INODE(page))
551 			recover_inode(entry->inode, page);
552 		if (entry->last_dentry == blkaddr) {
553 			err = recover_dentry(entry->inode, page, dir_list);
554 			if (err) {
555 				f2fs_put_page(page, 1);
556 				break;
557 			}
558 		}
559 		err = do_recover_data(sbi, entry->inode, page, blkaddr);
560 		if (err) {
561 			f2fs_put_page(page, 1);
562 			break;
563 		}
564 
565 		if (entry->blkaddr == blkaddr)
566 			del_fsync_inode(entry);
567 next:
568 		/* check next segment */
569 		blkaddr = next_blkaddr_of_node(page);
570 		f2fs_put_page(page, 1);
571 	}
572 	if (!err)
573 		allocate_new_segments(sbi);
574 	return err;
575 }
576 
577 int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
578 {
579 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
580 	struct list_head inode_list;
581 	struct list_head dir_list;
582 	block_t blkaddr;
583 	int err;
584 	int ret = 0;
585 	bool need_writecp = false;
586 
587 	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
588 			sizeof(struct fsync_inode_entry));
589 	if (!fsync_entry_slab)
590 		return -ENOMEM;
591 
592 	INIT_LIST_HEAD(&inode_list);
593 	INIT_LIST_HEAD(&dir_list);
594 
595 	/* prevent checkpoint */
596 	mutex_lock(&sbi->cp_mutex);
597 
598 	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
599 
600 	/* step #1: find fsynced inode numbers */
601 	err = find_fsync_dnodes(sbi, &inode_list);
602 	if (err || list_empty(&inode_list))
603 		goto out;
604 
605 	if (check_only) {
606 		ret = 1;
607 		goto out;
608 	}
609 
610 	need_writecp = true;
611 
612 	/* step #2: recover data */
613 	err = recover_data(sbi, &inode_list, &dir_list);
614 	if (!err)
615 		f2fs_bug_on(sbi, !list_empty(&inode_list));
616 out:
617 	destroy_fsync_dnodes(&inode_list);
618 
619 	/* truncate meta pages to be used by the recovery */
620 	truncate_inode_pages_range(META_MAPPING(sbi),
621 			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
622 
623 	if (err) {
624 		truncate_inode_pages_final(NODE_MAPPING(sbi));
625 		truncate_inode_pages_final(META_MAPPING(sbi));
626 	}
627 
628 	clear_sbi_flag(sbi, SBI_POR_DOING);
629 	if (err)
630 		set_ckpt_flags(sbi, CP_ERROR_FLAG);
631 	mutex_unlock(&sbi->cp_mutex);
632 
633 	/* let's drop all the directory inodes for clean checkpoint */
634 	destroy_fsync_dnodes(&dir_list);
635 
636 	if (!err && need_writecp) {
637 		struct cp_control cpc = {
638 			.reason = CP_RECOVERY,
639 		};
640 		err = write_checkpoint(sbi, &cpc);
641 	}
642 
643 	kmem_cache_destroy(fsync_entry_slab);
644 	return ret ? ret: err;
645 }
646