xref: /openbmc/linux/fs/nilfs2/recovery.c (revision b04b4f78)
1 /*
2  * recovery.c - NILFS recovery logic
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Ryusuke Konishi <ryusuke@osrg.net>
21  */
22 
23 #include <linux/buffer_head.h>
24 #include <linux/blkdev.h>
25 #include <linux/swap.h>
26 #include <linux/crc32.h>
27 #include "nilfs.h"
28 #include "segment.h"
29 #include "sufile.h"
30 #include "page.h"
31 #include "seglist.h"
32 #include "segbuf.h"
33 
34 /*
35  * Segment check result
36  */
37 enum {
38 	NILFS_SEG_VALID,
39 	NILFS_SEG_NO_SUPER_ROOT,
40 	NILFS_SEG_FAIL_IO,
41 	NILFS_SEG_FAIL_MAGIC,
42 	NILFS_SEG_FAIL_SEQ,
43 	NILFS_SEG_FAIL_CHECKSUM_SEGSUM,
44 	NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT,
45 	NILFS_SEG_FAIL_CHECKSUM_FULL,
46 	NILFS_SEG_FAIL_CONSISTENCY,
47 };
48 
49 /* work structure for recovery */
50 struct nilfs_recovery_block {
51 	ino_t ino;		/* Inode number of the file that this block
52 				   belongs to */
53 	sector_t blocknr;	/* block number */
54 	__u64 vblocknr;		/* virtual block number */
55 	unsigned long blkoff;	/* File offset of the data block (per block) */
56 	struct list_head list;
57 };
58 
59 
60 static int nilfs_warn_segment_error(int err)
61 {
62 	switch (err) {
63 	case NILFS_SEG_FAIL_IO:
64 		printk(KERN_WARNING
65 		       "NILFS warning: I/O error on loading last segment\n");
66 		return -EIO;
67 	case NILFS_SEG_FAIL_MAGIC:
68 		printk(KERN_WARNING
69 		       "NILFS warning: Segment magic number invalid\n");
70 		break;
71 	case NILFS_SEG_FAIL_SEQ:
72 		printk(KERN_WARNING
73 		       "NILFS warning: Sequence number mismatch\n");
74 		break;
75 	case NILFS_SEG_FAIL_CHECKSUM_SEGSUM:
76 		printk(KERN_WARNING
77 		       "NILFS warning: Checksum error in segment summary\n");
78 		break;
79 	case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT:
80 		printk(KERN_WARNING
81 		       "NILFS warning: Checksum error in super root\n");
82 		break;
83 	case NILFS_SEG_FAIL_CHECKSUM_FULL:
84 		printk(KERN_WARNING
85 		       "NILFS warning: Checksum error in segment payload\n");
86 		break;
87 	case NILFS_SEG_FAIL_CONSISTENCY:
88 		printk(KERN_WARNING
89 		       "NILFS warning: Inconsistent segment\n");
90 		break;
91 	case NILFS_SEG_NO_SUPER_ROOT:
92 		printk(KERN_WARNING
93 		       "NILFS warning: No super root in the last segment\n");
94 		break;
95 	}
96 	return -EINVAL;
97 }
98 
99 static void store_segsum_info(struct nilfs_segsum_info *ssi,
100 			      struct nilfs_segment_summary *sum,
101 			      unsigned int blocksize)
102 {
103 	ssi->flags = le16_to_cpu(sum->ss_flags);
104 	ssi->seg_seq = le64_to_cpu(sum->ss_seq);
105 	ssi->ctime = le64_to_cpu(sum->ss_create);
106 	ssi->next = le64_to_cpu(sum->ss_next);
107 	ssi->nblocks = le32_to_cpu(sum->ss_nblocks);
108 	ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo);
109 	ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes);
110 
111 	ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize);
112 	ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi);
113 }
114 
115 /**
116  * calc_crc_cont - check CRC of blocks continuously
117  * @sbi: nilfs_sb_info
118  * @bhs: buffer head of start block
119  * @sum: place to store result
120  * @offset: offset bytes in the first block
121  * @check_bytes: number of bytes to be checked
122  * @start: DBN of start block
123  * @nblock: number of blocks to be checked
124  */
125 static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs,
126 			 u32 *sum, unsigned long offset, u64 check_bytes,
127 			 sector_t start, unsigned long nblock)
128 {
129 	unsigned long blocksize = sbi->s_super->s_blocksize;
130 	unsigned long size;
131 	u32 crc;
132 
133 	BUG_ON(offset >= blocksize);
134 	check_bytes -= offset;
135 	size = min_t(u64, check_bytes, blocksize - offset);
136 	crc = crc32_le(sbi->s_nilfs->ns_crc_seed,
137 		       (unsigned char *)bhs->b_data + offset, size);
138 	if (--nblock > 0) {
139 		do {
140 			struct buffer_head *bh
141 				= sb_bread(sbi->s_super, ++start);
142 			if (!bh)
143 				return -EIO;
144 			check_bytes -= size;
145 			size = min_t(u64, check_bytes, blocksize);
146 			crc = crc32_le(crc, bh->b_data, size);
147 			brelse(bh);
148 		} while (--nblock > 0);
149 	}
150 	*sum = crc;
151 	return 0;
152 }
153 
154 /**
155  * nilfs_read_super_root_block - read super root block
156  * @sb: super_block
157  * @sr_block: disk block number of the super root block
158  * @pbh: address of a buffer_head pointer to return super root buffer
159  * @check: CRC check flag
160  */
161 int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
162 				struct buffer_head **pbh, int check)
163 {
164 	struct buffer_head *bh_sr;
165 	struct nilfs_super_root *sr;
166 	u32 crc;
167 	int ret;
168 
169 	*pbh = NULL;
170 	bh_sr = sb_bread(sb, sr_block);
171 	if (unlikely(!bh_sr)) {
172 		ret = NILFS_SEG_FAIL_IO;
173 		goto failed;
174 	}
175 
176 	sr = (struct nilfs_super_root *)bh_sr->b_data;
177 	if (check) {
178 		unsigned bytes = le16_to_cpu(sr->sr_bytes);
179 
180 		if (bytes == 0 || bytes > sb->s_blocksize) {
181 			ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
182 			goto failed_bh;
183 		}
184 		if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc,
185 				  sizeof(sr->sr_sum), bytes, sr_block, 1)) {
186 			ret = NILFS_SEG_FAIL_IO;
187 			goto failed_bh;
188 		}
189 		if (crc != le32_to_cpu(sr->sr_sum)) {
190 			ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
191 			goto failed_bh;
192 		}
193 	}
194 	*pbh = bh_sr;
195 	return 0;
196 
197  failed_bh:
198 	brelse(bh_sr);
199 
200  failed:
201 	return nilfs_warn_segment_error(ret);
202 }
203 
204 /**
205  * load_segment_summary - read segment summary of the specified partial segment
206  * @sbi: nilfs_sb_info
207  * @pseg_start: start disk block number of partial segment
208  * @seg_seq: sequence number requested
209  * @ssi: pointer to nilfs_segsum_info struct to store information
210  * @full_check: full check flag
211  *              (0: only checks segment summary CRC, 1: data CRC)
212  */
213 static int
214 load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start,
215 		     u64 seg_seq, struct nilfs_segsum_info *ssi,
216 		     int full_check)
217 {
218 	struct buffer_head *bh_sum;
219 	struct nilfs_segment_summary *sum;
220 	unsigned long offset, nblock;
221 	u64 check_bytes;
222 	u32 crc, crc_sum;
223 	int ret = NILFS_SEG_FAIL_IO;
224 
225 	bh_sum = sb_bread(sbi->s_super, pseg_start);
226 	if (!bh_sum)
227 		goto out;
228 
229 	sum = (struct nilfs_segment_summary *)bh_sum->b_data;
230 
231 	/* Check consistency of segment summary */
232 	if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) {
233 		ret = NILFS_SEG_FAIL_MAGIC;
234 		goto failed;
235 	}
236 	store_segsum_info(ssi, sum, sbi->s_super->s_blocksize);
237 	if (seg_seq != ssi->seg_seq) {
238 		ret = NILFS_SEG_FAIL_SEQ;
239 		goto failed;
240 	}
241 	if (full_check) {
242 		offset = sizeof(sum->ss_datasum);
243 		check_bytes =
244 			((u64)ssi->nblocks << sbi->s_super->s_blocksize_bits);
245 		nblock = ssi->nblocks;
246 		crc_sum = le32_to_cpu(sum->ss_datasum);
247 		ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
248 	} else { /* only checks segment summary */
249 		offset = sizeof(sum->ss_datasum) + sizeof(sum->ss_sumsum);
250 		check_bytes = ssi->sumbytes;
251 		nblock = ssi->nsumblk;
252 		crc_sum = le32_to_cpu(sum->ss_sumsum);
253 		ret = NILFS_SEG_FAIL_CHECKSUM_SEGSUM;
254 	}
255 
256 	if (unlikely(nblock == 0 ||
257 		     nblock > sbi->s_nilfs->ns_blocks_per_segment)) {
258 		/* This limits the number of blocks read in the CRC check */
259 		ret = NILFS_SEG_FAIL_CONSISTENCY;
260 		goto failed;
261 	}
262 	if (calc_crc_cont(sbi, bh_sum, &crc, offset, check_bytes,
263 			  pseg_start, nblock)) {
264 		ret = NILFS_SEG_FAIL_IO;
265 		goto failed;
266 	}
267 	if (crc == crc_sum)
268 		ret = 0;
269  failed:
270 	brelse(bh_sum);
271  out:
272 	return ret;
273 }
274 
275 static void *segsum_get(struct super_block *sb, struct buffer_head **pbh,
276 			unsigned int *offset, unsigned int bytes)
277 {
278 	void *ptr;
279 	sector_t blocknr;
280 
281 	BUG_ON((*pbh)->b_size < *offset);
282 	if (bytes > (*pbh)->b_size - *offset) {
283 		blocknr = (*pbh)->b_blocknr;
284 		brelse(*pbh);
285 		*pbh = sb_bread(sb, blocknr + 1);
286 		if (unlikely(!*pbh))
287 			return NULL;
288 		*offset = 0;
289 	}
290 	ptr = (*pbh)->b_data + *offset;
291 	*offset += bytes;
292 	return ptr;
293 }
294 
295 static void segsum_skip(struct super_block *sb, struct buffer_head **pbh,
296 			unsigned int *offset, unsigned int bytes,
297 			unsigned long count)
298 {
299 	unsigned int rest_item_in_current_block
300 		= ((*pbh)->b_size - *offset) / bytes;
301 
302 	if (count <= rest_item_in_current_block) {
303 		*offset += bytes * count;
304 	} else {
305 		sector_t blocknr = (*pbh)->b_blocknr;
306 		unsigned int nitem_per_block = (*pbh)->b_size / bytes;
307 		unsigned int bcnt;
308 
309 		count -= rest_item_in_current_block;
310 		bcnt = DIV_ROUND_UP(count, nitem_per_block);
311 		*offset = bytes * (count - (bcnt - 1) * nitem_per_block);
312 
313 		brelse(*pbh);
314 		*pbh = sb_bread(sb, blocknr + bcnt);
315 	}
316 }
317 
318 static int
319 collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr,
320 			   struct nilfs_segsum_info *ssi,
321 			   struct list_head *head)
322 {
323 	struct buffer_head *bh;
324 	unsigned int offset;
325 	unsigned long nfinfo = ssi->nfinfo;
326 	sector_t blocknr = sum_blocknr + ssi->nsumblk;
327 	ino_t ino;
328 	int err = -EIO;
329 
330 	if (!nfinfo)
331 		return 0;
332 
333 	bh = sb_bread(sbi->s_super, sum_blocknr);
334 	if (unlikely(!bh))
335 		goto out;
336 
337 	offset = le16_to_cpu(
338 		((struct nilfs_segment_summary *)bh->b_data)->ss_bytes);
339 	for (;;) {
340 		unsigned long nblocks, ndatablk, nnodeblk;
341 		struct nilfs_finfo *finfo;
342 
343 		finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo));
344 		if (unlikely(!finfo))
345 			goto out;
346 
347 		ino = le64_to_cpu(finfo->fi_ino);
348 		nblocks = le32_to_cpu(finfo->fi_nblocks);
349 		ndatablk = le32_to_cpu(finfo->fi_ndatablk);
350 		nnodeblk = nblocks - ndatablk;
351 
352 		while (ndatablk-- > 0) {
353 			struct nilfs_recovery_block *rb;
354 			struct nilfs_binfo_v *binfo;
355 
356 			binfo = segsum_get(sbi->s_super, &bh, &offset,
357 					   sizeof(*binfo));
358 			if (unlikely(!binfo))
359 				goto out;
360 
361 			rb = kmalloc(sizeof(*rb), GFP_NOFS);
362 			if (unlikely(!rb)) {
363 				err = -ENOMEM;
364 				goto out;
365 			}
366 			rb->ino = ino;
367 			rb->blocknr = blocknr++;
368 			rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr);
369 			rb->blkoff = le64_to_cpu(binfo->bi_blkoff);
370 			/* INIT_LIST_HEAD(&rb->list); */
371 			list_add_tail(&rb->list, head);
372 		}
373 		if (--nfinfo == 0)
374 			break;
375 		blocknr += nnodeblk; /* always 0 for the data sync segments */
376 		segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64),
377 			    nnodeblk);
378 		if (unlikely(!bh))
379 			goto out;
380 	}
381 	err = 0;
382  out:
383 	brelse(bh);   /* brelse(NULL) is just ignored */
384 	return err;
385 }
386 
387 static void dispose_recovery_list(struct list_head *head)
388 {
389 	while (!list_empty(head)) {
390 		struct nilfs_recovery_block *rb
391 			= list_entry(head->next,
392 				     struct nilfs_recovery_block, list);
393 		list_del(&rb->list);
394 		kfree(rb);
395 	}
396 }
397 
398 void nilfs_dispose_segment_list(struct list_head *head)
399 {
400 	while (!list_empty(head)) {
401 		struct nilfs_segment_entry *ent
402 			= list_entry(head->next,
403 				     struct nilfs_segment_entry, list);
404 		list_del(&ent->list);
405 		nilfs_free_segment_entry(ent);
406 	}
407 }
408 
409 static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
410 					      struct nilfs_recovery_info *ri)
411 {
412 	struct list_head *head = &ri->ri_used_segments;
413 	struct nilfs_segment_entry *ent, *n;
414 	struct inode *sufile = nilfs->ns_sufile;
415 	__u64 segnum[4];
416 	int err;
417 	int i;
418 
419 	segnum[0] = nilfs->ns_segnum;
420 	segnum[1] = nilfs->ns_nextnum;
421 	segnum[2] = ri->ri_segnum;
422 	segnum[3] = ri->ri_nextnum;
423 
424 	/*
425 	 * Releasing the next segment of the latest super root.
426 	 * The next segment is invalidated by this recovery.
427 	 */
428 	err = nilfs_sufile_free(sufile, segnum[1]);
429 	if (unlikely(err))
430 		goto failed;
431 
432 	err = -ENOMEM;
433 	for (i = 1; i < 4; i++) {
434 		ent = nilfs_alloc_segment_entry(segnum[i]);
435 		if (unlikely(!ent))
436 			goto failed;
437 		list_add_tail(&ent->list, head);
438 	}
439 
440 	/*
441 	 * Collecting segments written after the latest super root.
442 	 * These are marked dirty to avoid being reallocated in the next write.
443 	 */
444 	list_for_each_entry_safe(ent, n, head, list) {
445 		if (ent->segnum != segnum[0]) {
446 			err = nilfs_sufile_scrap(sufile, ent->segnum);
447 			if (unlikely(err))
448 				goto failed;
449 		}
450 		list_del(&ent->list);
451 		nilfs_free_segment_entry(ent);
452 	}
453 
454 	/* Allocate new segments for recovery */
455 	err = nilfs_sufile_alloc(sufile, &segnum[0]);
456 	if (unlikely(err))
457 		goto failed;
458 
459 	nilfs->ns_pseg_offset = 0;
460 	nilfs->ns_seg_seq = ri->ri_seq + 2;
461 	nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0];
462 	return 0;
463 
464  failed:
465 	/* No need to recover sufile because it will be destroyed on error */
466 	return err;
467 }
468 
469 static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi,
470 				     struct nilfs_recovery_block *rb,
471 				     struct page *page)
472 {
473 	struct buffer_head *bh_org;
474 	void *kaddr;
475 
476 	bh_org = sb_bread(sbi->s_super, rb->blocknr);
477 	if (unlikely(!bh_org))
478 		return -EIO;
479 
480 	kaddr = kmap_atomic(page, KM_USER0);
481 	memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
482 	kunmap_atomic(kaddr, KM_USER0);
483 	brelse(bh_org);
484 	return 0;
485 }
486 
487 static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
488 				struct list_head *head,
489 				unsigned long *nr_salvaged_blocks)
490 {
491 	struct inode *inode;
492 	struct nilfs_recovery_block *rb, *n;
493 	unsigned blocksize = sbi->s_super->s_blocksize;
494 	struct page *page;
495 	loff_t pos;
496 	int err = 0, err2 = 0;
497 
498 	list_for_each_entry_safe(rb, n, head, list) {
499 		inode = nilfs_iget(sbi->s_super, rb->ino);
500 		if (IS_ERR(inode)) {
501 			err = PTR_ERR(inode);
502 			inode = NULL;
503 			goto failed_inode;
504 		}
505 
506 		pos = rb->blkoff << inode->i_blkbits;
507 		page = NULL;
508 		err = block_write_begin(NULL, inode->i_mapping, pos, blocksize,
509 					0, &page, NULL, nilfs_get_block);
510 		if (unlikely(err))
511 			goto failed_inode;
512 
513 		err = nilfs_recovery_copy_block(sbi, rb, page);
514 		if (unlikely(err))
515 			goto failed_page;
516 
517 		err = nilfs_set_file_dirty(sbi, inode, 1);
518 		if (unlikely(err))
519 			goto failed_page;
520 
521 		block_write_end(NULL, inode->i_mapping, pos, blocksize,
522 				blocksize, page, NULL);
523 
524 		unlock_page(page);
525 		page_cache_release(page);
526 
527 		(*nr_salvaged_blocks)++;
528 		goto next;
529 
530  failed_page:
531 		unlock_page(page);
532 		page_cache_release(page);
533 
534  failed_inode:
535 		printk(KERN_WARNING
536 		       "NILFS warning: error recovering data block "
537 		       "(err=%d, ino=%lu, block-offset=%llu)\n",
538 		       err, rb->ino, (unsigned long long)rb->blkoff);
539 		if (!err2)
540 			err2 = err;
541  next:
542 		iput(inode); /* iput(NULL) is just ignored */
543 		list_del_init(&rb->list);
544 		kfree(rb);
545 	}
546 	return err2;
547 }
548 
549 /**
550  * nilfs_do_roll_forward - salvage logical segments newer than the latest
551  * checkpoint
552  * @sbi: nilfs_sb_info
553  * @nilfs: the_nilfs
554  * @ri: pointer to a nilfs_recovery_info
555  */
556 static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
557 				 struct nilfs_sb_info *sbi,
558 				 struct nilfs_recovery_info *ri)
559 {
560 	struct nilfs_segsum_info ssi;
561 	sector_t pseg_start;
562 	sector_t seg_start, seg_end;  /* Starting/ending DBN of full segment */
563 	unsigned long nsalvaged_blocks = 0;
564 	u64 seg_seq;
565 	__u64 segnum, nextnum = 0;
566 	int empty_seg = 0;
567 	int err = 0, ret;
568 	LIST_HEAD(dsync_blocks);  /* list of data blocks to be recovered */
569 	enum {
570 		RF_INIT_ST,
571 		RF_DSYNC_ST,   /* scanning data-sync segments */
572 	};
573 	int state = RF_INIT_ST;
574 
575 	nilfs_attach_writer(nilfs, sbi);
576 	pseg_start = ri->ri_lsegs_start;
577 	seg_seq = ri->ri_lsegs_start_seq;
578 	segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
579 	nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
580 
581 	while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) {
582 
583 		ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
584 		if (ret) {
585 			if (ret == NILFS_SEG_FAIL_IO) {
586 				err = -EIO;
587 				goto failed;
588 			}
589 			goto strayed;
590 		}
591 		if (unlikely(NILFS_SEG_HAS_SR(&ssi)))
592 			goto confused;
593 
594 		/* Found a valid partial segment; do recovery actions */
595 		nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
596 		empty_seg = 0;
597 		nilfs->ns_ctime = ssi.ctime;
598 		if (!(ssi.flags & NILFS_SS_GC))
599 			nilfs->ns_nongc_ctime = ssi.ctime;
600 
601 		switch (state) {
602 		case RF_INIT_ST:
603 			if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi))
604 				goto try_next_pseg;
605 			state = RF_DSYNC_ST;
606 			/* Fall through */
607 		case RF_DSYNC_ST:
608 			if (!NILFS_SEG_DSYNC(&ssi))
609 				goto confused;
610 
611 			err = collect_blocks_from_segsum(
612 				sbi, pseg_start, &ssi, &dsync_blocks);
613 			if (unlikely(err))
614 				goto failed;
615 			if (NILFS_SEG_LOGEND(&ssi)) {
616 				err = recover_dsync_blocks(
617 					sbi, &dsync_blocks, &nsalvaged_blocks);
618 				if (unlikely(err))
619 					goto failed;
620 				state = RF_INIT_ST;
621 			}
622 			break; /* Fall through to try_next_pseg */
623 		}
624 
625  try_next_pseg:
626 		if (pseg_start == ri->ri_lsegs_end)
627 			break;
628 		pseg_start += ssi.nblocks;
629 		if (pseg_start < seg_end)
630 			continue;
631 		goto feed_segment;
632 
633  strayed:
634 		if (pseg_start == ri->ri_lsegs_end)
635 			break;
636 
637  feed_segment:
638 		/* Looking to the next full segment */
639 		if (empty_seg++)
640 			break;
641 		seg_seq++;
642 		segnum = nextnum;
643 		nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
644 		pseg_start = seg_start;
645 	}
646 
647 	if (nsalvaged_blocks) {
648 		printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
649 		       sbi->s_super->s_id, nsalvaged_blocks);
650 		ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
651 	}
652  out:
653 	dispose_recovery_list(&dsync_blocks);
654 	nilfs_detach_writer(sbi->s_nilfs, sbi);
655 	return err;
656 
657  confused:
658 	err = -EINVAL;
659  failed:
660 	printk(KERN_ERR
661 	       "NILFS (device %s): Error roll-forwarding "
662 	       "(err=%d, pseg block=%llu). ",
663 	       sbi->s_super->s_id, err, (unsigned long long)pseg_start);
664 	goto out;
665 }
666 
667 static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
668 				      struct nilfs_sb_info *sbi,
669 				      struct nilfs_recovery_info *ri)
670 {
671 	struct buffer_head *bh;
672 	int err;
673 
674 	if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) !=
675 	    nilfs_get_segnum_of_block(nilfs, ri->ri_super_root))
676 		return;
677 
678 	bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start);
679 	BUG_ON(!bh);
680 	memset(bh->b_data, 0, bh->b_size);
681 	set_buffer_dirty(bh);
682 	err = sync_dirty_buffer(bh);
683 	if (unlikely(err))
684 		printk(KERN_WARNING
685 		       "NILFS warning: buffer sync write failed during "
686 		       "post-cleaning of recovery.\n");
687 	brelse(bh);
688 }
689 
690 /**
691  * nilfs_recover_logical_segments - salvage logical segments written after
692  * the latest super root
693  * @nilfs: the_nilfs
694  * @sbi: nilfs_sb_info
695  * @ri: pointer to a nilfs_recovery_info struct to store search results.
696  *
697  * Return Value: On success, 0 is returned.  On error, one of the following
698  * negative error code is returned.
699  *
700  * %-EINVAL - Inconsistent filesystem state.
701  *
702  * %-EIO - I/O error
703  *
704  * %-ENOSPC - No space left on device (only in a panic state).
705  *
706  * %-ERESTARTSYS - Interrupted.
707  *
708  * %-ENOMEM - Insufficient memory available.
709  */
710 int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
711 				   struct nilfs_sb_info *sbi,
712 				   struct nilfs_recovery_info *ri)
713 {
714 	int err;
715 
716 	if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
717 		return 0;
718 
719 	err = nilfs_attach_checkpoint(sbi, ri->ri_cno);
720 	if (unlikely(err)) {
721 		printk(KERN_ERR
722 		       "NILFS: error loading the latest checkpoint.\n");
723 		return err;
724 	}
725 
726 	err = nilfs_do_roll_forward(nilfs, sbi, ri);
727 	if (unlikely(err))
728 		goto failed;
729 
730 	if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
731 		err = nilfs_prepare_segment_for_recovery(nilfs, ri);
732 		if (unlikely(err)) {
733 			printk(KERN_ERR "NILFS: Error preparing segments for "
734 			       "recovery.\n");
735 			goto failed;
736 		}
737 
738 		err = nilfs_attach_segment_constructor(sbi);
739 		if (unlikely(err))
740 			goto failed;
741 
742 		set_nilfs_discontinued(nilfs);
743 		err = nilfs_construct_segment(sbi->s_super);
744 		nilfs_detach_segment_constructor(sbi);
745 
746 		if (unlikely(err)) {
747 			printk(KERN_ERR "NILFS: Oops! recovery failed. "
748 			       "(err=%d)\n", err);
749 			goto failed;
750 		}
751 
752 		nilfs_finish_roll_forward(nilfs, sbi, ri);
753 	}
754 
755 	nilfs_detach_checkpoint(sbi);
756 	return 0;
757 
758  failed:
759 	nilfs_detach_checkpoint(sbi);
760 	nilfs_mdt_clear(nilfs->ns_cpfile);
761 	nilfs_mdt_clear(nilfs->ns_sufile);
762 	nilfs_mdt_clear(nilfs->ns_dat);
763 	return err;
764 }
765 
766 /**
767  * nilfs_search_super_root - search the latest valid super root
768  * @nilfs: the_nilfs
769  * @sbi: nilfs_sb_info
770  * @ri: pointer to a nilfs_recovery_info struct to store search results.
771  *
772  * nilfs_search_super_root() looks for the latest super-root from a partial
773  * segment pointed by the superblock.  It sets up struct the_nilfs through
774  * this search. It fills nilfs_recovery_info (ri) required for recovery.
775  *
776  * Return Value: On success, 0 is returned.  On error, one of the following
777  * negative error code is returned.
778  *
779  * %-EINVAL - No valid segment found
780  *
781  * %-EIO - I/O error
782  */
783 int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
784 			    struct nilfs_recovery_info *ri)
785 {
786 	struct nilfs_segsum_info ssi;
787 	sector_t pseg_start, pseg_end, sr_pseg_start = 0;
788 	sector_t seg_start, seg_end; /* range of full segment (block number) */
789 	u64 seg_seq;
790 	__u64 segnum, nextnum = 0;
791 	__u64 cno;
792 	struct nilfs_segment_entry *ent;
793 	LIST_HEAD(segments);
794 	int empty_seg = 0, scan_newer = 0;
795 	int ret;
796 
797 	pseg_start = nilfs->ns_last_pseg;
798 	seg_seq = nilfs->ns_last_seq;
799 	cno = nilfs->ns_last_cno;
800 	segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
801 
802 	/* Calculate range of segment */
803 	nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
804 
805 	for (;;) {
806 		/* Load segment summary */
807 		ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
808 		if (ret) {
809 			if (ret == NILFS_SEG_FAIL_IO)
810 				goto failed;
811 			goto strayed;
812 		}
813 		pseg_end = pseg_start + ssi.nblocks - 1;
814 		if (unlikely(pseg_end > seg_end)) {
815 			ret = NILFS_SEG_FAIL_CONSISTENCY;
816 			goto strayed;
817 		}
818 
819 		/* A valid partial segment */
820 		ri->ri_pseg_start = pseg_start;
821 		ri->ri_seq = seg_seq;
822 		ri->ri_segnum = segnum;
823 		nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
824 		ri->ri_nextnum = nextnum;
825 		empty_seg = 0;
826 
827 		if (!NILFS_SEG_HAS_SR(&ssi)) {
828 			if (!scan_newer) {
829 				/* This will never happen because a superblock
830 				   (last_segment) always points to a pseg
831 				   having a super root. */
832 				ret = NILFS_SEG_FAIL_CONSISTENCY;
833 				goto failed;
834 			}
835 			if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) {
836 				ri->ri_lsegs_start = pseg_start;
837 				ri->ri_lsegs_start_seq = seg_seq;
838 			}
839 			if (NILFS_SEG_LOGEND(&ssi))
840 				ri->ri_lsegs_end = pseg_start;
841 			goto try_next_pseg;
842 		}
843 
844 		/* A valid super root was found. */
845 		ri->ri_cno = cno++;
846 		ri->ri_super_root = pseg_end;
847 		ri->ri_lsegs_start = ri->ri_lsegs_end = 0;
848 
849 		nilfs_dispose_segment_list(&segments);
850 		nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start)
851 			+ ssi.nblocks - seg_start;
852 		nilfs->ns_seg_seq = seg_seq;
853 		nilfs->ns_segnum = segnum;
854 		nilfs->ns_cno = cno;  /* nilfs->ns_cno = ri->ri_cno + 1 */
855 		nilfs->ns_ctime = ssi.ctime;
856 		nilfs->ns_nextnum = nextnum;
857 
858 		if (scan_newer)
859 			ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED;
860 		else {
861 			if (nilfs->ns_mount_state & NILFS_VALID_FS)
862 				goto super_root_found;
863 			scan_newer = 1;
864 		}
865 
866 		/* reset region for roll-forward */
867 		pseg_start += ssi.nblocks;
868 		if (pseg_start < seg_end)
869 			continue;
870 		goto feed_segment;
871 
872  try_next_pseg:
873 		/* Standing on a course, or met an inconsistent state */
874 		pseg_start += ssi.nblocks;
875 		if (pseg_start < seg_end)
876 			continue;
877 		goto feed_segment;
878 
879  strayed:
880 		/* Off the trail */
881 		if (!scan_newer)
882 			/*
883 			 * This can happen if a checkpoint was written without
884 			 * barriers, or as a result of an I/O failure.
885 			 */
886 			goto failed;
887 
888  feed_segment:
889 		/* Looking to the next full segment */
890 		if (empty_seg++)
891 			goto super_root_found; /* found a valid super root */
892 
893 		ent = nilfs_alloc_segment_entry(segnum);
894 		if (unlikely(!ent)) {
895 			ret = -ENOMEM;
896 			goto failed;
897 		}
898 		list_add_tail(&ent->list, &segments);
899 
900 		seg_seq++;
901 		segnum = nextnum;
902 		nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
903 		pseg_start = seg_start;
904 	}
905 
906  super_root_found:
907 	/* Updating pointers relating to the latest checkpoint */
908 	list_splice(&segments, ri->ri_used_segments.prev);
909 	nilfs->ns_last_pseg = sr_pseg_start;
910 	nilfs->ns_last_seq = nilfs->ns_seg_seq;
911 	nilfs->ns_last_cno = ri->ri_cno;
912 	return 0;
913 
914  failed:
915 	nilfs_dispose_segment_list(&segments);
916 	return (ret < 0) ? ret : nilfs_warn_segment_error(ret);
917 }
918