xref: /openbmc/linux/fs/nilfs2/recovery.c (revision 78c99ba1)
1 /*
2  * recovery.c - NILFS recovery logic
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Ryusuke Konishi <ryusuke@osrg.net>
21  */
22 
23 #include <linux/buffer_head.h>
24 #include <linux/blkdev.h>
25 #include <linux/swap.h>
26 #include <linux/crc32.h>
27 #include "nilfs.h"
28 #include "segment.h"
29 #include "sufile.h"
30 #include "page.h"
31 #include "seglist.h"
32 #include "segbuf.h"
33 
34 /*
35  * Segment check result
36  */
37 enum {
38 	NILFS_SEG_VALID,
39 	NILFS_SEG_NO_SUPER_ROOT,
40 	NILFS_SEG_FAIL_IO,
41 	NILFS_SEG_FAIL_MAGIC,
42 	NILFS_SEG_FAIL_SEQ,
43 	NILFS_SEG_FAIL_CHECKSUM_SEGSUM,
44 	NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT,
45 	NILFS_SEG_FAIL_CHECKSUM_FULL,
46 	NILFS_SEG_FAIL_CONSISTENCY,
47 };
48 
49 /* work structure for recovery */
50 struct nilfs_recovery_block {
51 	ino_t ino;		/* Inode number of the file that this block
52 				   belongs to */
53 	sector_t blocknr;	/* block number */
54 	__u64 vblocknr;		/* virtual block number */
55 	unsigned long blkoff;	/* File offset of the data block (per block) */
56 	struct list_head list;
57 };
58 
59 
60 static int nilfs_warn_segment_error(int err)
61 {
62 	switch (err) {
63 	case NILFS_SEG_FAIL_IO:
64 		printk(KERN_WARNING
65 		       "NILFS warning: I/O error on loading last segment\n");
66 		return -EIO;
67 	case NILFS_SEG_FAIL_MAGIC:
68 		printk(KERN_WARNING
69 		       "NILFS warning: Segment magic number invalid\n");
70 		break;
71 	case NILFS_SEG_FAIL_SEQ:
72 		printk(KERN_WARNING
73 		       "NILFS warning: Sequence number mismatch\n");
74 		break;
75 	case NILFS_SEG_FAIL_CHECKSUM_SEGSUM:
76 		printk(KERN_WARNING
77 		       "NILFS warning: Checksum error in segment summary\n");
78 		break;
79 	case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT:
80 		printk(KERN_WARNING
81 		       "NILFS warning: Checksum error in super root\n");
82 		break;
83 	case NILFS_SEG_FAIL_CHECKSUM_FULL:
84 		printk(KERN_WARNING
85 		       "NILFS warning: Checksum error in segment payload\n");
86 		break;
87 	case NILFS_SEG_FAIL_CONSISTENCY:
88 		printk(KERN_WARNING
89 		       "NILFS warning: Inconsistent segment\n");
90 		break;
91 	case NILFS_SEG_NO_SUPER_ROOT:
92 		printk(KERN_WARNING
93 		       "NILFS warning: No super root in the last segment\n");
94 		break;
95 	}
96 	return -EINVAL;
97 }
98 
99 static void store_segsum_info(struct nilfs_segsum_info *ssi,
100 			      struct nilfs_segment_summary *sum,
101 			      unsigned int blocksize)
102 {
103 	ssi->flags = le16_to_cpu(sum->ss_flags);
104 	ssi->seg_seq = le64_to_cpu(sum->ss_seq);
105 	ssi->ctime = le64_to_cpu(sum->ss_create);
106 	ssi->next = le64_to_cpu(sum->ss_next);
107 	ssi->nblocks = le32_to_cpu(sum->ss_nblocks);
108 	ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo);
109 	ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes);
110 
111 	ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize);
112 	ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi);
113 }
114 
115 /**
116  * calc_crc_cont - check CRC of blocks continuously
117  * @sbi: nilfs_sb_info
118  * @bhs: buffer head of start block
119  * @sum: place to store result
120  * @offset: offset bytes in the first block
121  * @check_bytes: number of bytes to be checked
122  * @start: DBN of start block
123  * @nblock: number of blocks to be checked
124  */
125 static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs,
126 			 u32 *sum, unsigned long offset, u64 check_bytes,
127 			 sector_t start, unsigned long nblock)
128 {
129 	unsigned long blocksize = sbi->s_super->s_blocksize;
130 	unsigned long size;
131 	u32 crc;
132 
133 	BUG_ON(offset >= blocksize);
134 	check_bytes -= offset;
135 	size = min_t(u64, check_bytes, blocksize - offset);
136 	crc = crc32_le(sbi->s_nilfs->ns_crc_seed,
137 		       (unsigned char *)bhs->b_data + offset, size);
138 	if (--nblock > 0) {
139 		do {
140 			struct buffer_head *bh
141 				= sb_bread(sbi->s_super, ++start);
142 			if (!bh)
143 				return -EIO;
144 			check_bytes -= size;
145 			size = min_t(u64, check_bytes, blocksize);
146 			crc = crc32_le(crc, bh->b_data, size);
147 			brelse(bh);
148 		} while (--nblock > 0);
149 	}
150 	*sum = crc;
151 	return 0;
152 }
153 
154 /**
155  * nilfs_read_super_root_block - read super root block
156  * @sb: super_block
157  * @sr_block: disk block number of the super root block
158  * @pbh: address of a buffer_head pointer to return super root buffer
159  * @check: CRC check flag
160  */
161 int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
162 				struct buffer_head **pbh, int check)
163 {
164 	struct buffer_head *bh_sr;
165 	struct nilfs_super_root *sr;
166 	u32 crc;
167 	int ret;
168 
169 	*pbh = NULL;
170 	bh_sr = sb_bread(sb, sr_block);
171 	if (unlikely(!bh_sr)) {
172 		ret = NILFS_SEG_FAIL_IO;
173 		goto failed;
174 	}
175 
176 	sr = (struct nilfs_super_root *)bh_sr->b_data;
177 	if (check) {
178 		unsigned bytes = le16_to_cpu(sr->sr_bytes);
179 
180 		if (bytes == 0 || bytes > sb->s_blocksize) {
181 			ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
182 			goto failed_bh;
183 		}
184 		if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc,
185 				  sizeof(sr->sr_sum), bytes, sr_block, 1)) {
186 			ret = NILFS_SEG_FAIL_IO;
187 			goto failed_bh;
188 		}
189 		if (crc != le32_to_cpu(sr->sr_sum)) {
190 			ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
191 			goto failed_bh;
192 		}
193 	}
194 	*pbh = bh_sr;
195 	return 0;
196 
197  failed_bh:
198 	brelse(bh_sr);
199 
200  failed:
201 	return nilfs_warn_segment_error(ret);
202 }
203 
204 /**
205  * load_segment_summary - read segment summary of the specified partial segment
206  * @sbi: nilfs_sb_info
207  * @pseg_start: start disk block number of partial segment
208  * @seg_seq: sequence number requested
209  * @ssi: pointer to nilfs_segsum_info struct to store information
210  * @full_check: full check flag
211  *              (0: only checks segment summary CRC, 1: data CRC)
212  */
213 static int
214 load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start,
215 		     u64 seg_seq, struct nilfs_segsum_info *ssi,
216 		     int full_check)
217 {
218 	struct buffer_head *bh_sum;
219 	struct nilfs_segment_summary *sum;
220 	unsigned long offset, nblock;
221 	u64 check_bytes;
222 	u32 crc, crc_sum;
223 	int ret = NILFS_SEG_FAIL_IO;
224 
225 	bh_sum = sb_bread(sbi->s_super, pseg_start);
226 	if (!bh_sum)
227 		goto out;
228 
229 	sum = (struct nilfs_segment_summary *)bh_sum->b_data;
230 
231 	/* Check consistency of segment summary */
232 	if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) {
233 		ret = NILFS_SEG_FAIL_MAGIC;
234 		goto failed;
235 	}
236 	store_segsum_info(ssi, sum, sbi->s_super->s_blocksize);
237 	if (seg_seq != ssi->seg_seq) {
238 		ret = NILFS_SEG_FAIL_SEQ;
239 		goto failed;
240 	}
241 	if (full_check) {
242 		offset = sizeof(sum->ss_datasum);
243 		check_bytes =
244 			((u64)ssi->nblocks << sbi->s_super->s_blocksize_bits);
245 		nblock = ssi->nblocks;
246 		crc_sum = le32_to_cpu(sum->ss_datasum);
247 		ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
248 	} else { /* only checks segment summary */
249 		offset = sizeof(sum->ss_datasum) + sizeof(sum->ss_sumsum);
250 		check_bytes = ssi->sumbytes;
251 		nblock = ssi->nsumblk;
252 		crc_sum = le32_to_cpu(sum->ss_sumsum);
253 		ret = NILFS_SEG_FAIL_CHECKSUM_SEGSUM;
254 	}
255 
256 	if (unlikely(nblock == 0 ||
257 		     nblock > sbi->s_nilfs->ns_blocks_per_segment)) {
258 		/* This limits the number of blocks read in the CRC check */
259 		ret = NILFS_SEG_FAIL_CONSISTENCY;
260 		goto failed;
261 	}
262 	if (calc_crc_cont(sbi, bh_sum, &crc, offset, check_bytes,
263 			  pseg_start, nblock)) {
264 		ret = NILFS_SEG_FAIL_IO;
265 		goto failed;
266 	}
267 	if (crc == crc_sum)
268 		ret = 0;
269  failed:
270 	brelse(bh_sum);
271  out:
272 	return ret;
273 }
274 
275 static void *segsum_get(struct super_block *sb, struct buffer_head **pbh,
276 			unsigned int *offset, unsigned int bytes)
277 {
278 	void *ptr;
279 	sector_t blocknr;
280 
281 	BUG_ON((*pbh)->b_size < *offset);
282 	if (bytes > (*pbh)->b_size - *offset) {
283 		blocknr = (*pbh)->b_blocknr;
284 		brelse(*pbh);
285 		*pbh = sb_bread(sb, blocknr + 1);
286 		if (unlikely(!*pbh))
287 			return NULL;
288 		*offset = 0;
289 	}
290 	ptr = (*pbh)->b_data + *offset;
291 	*offset += bytes;
292 	return ptr;
293 }
294 
295 static void segsum_skip(struct super_block *sb, struct buffer_head **pbh,
296 			unsigned int *offset, unsigned int bytes,
297 			unsigned long count)
298 {
299 	unsigned int rest_item_in_current_block
300 		= ((*pbh)->b_size - *offset) / bytes;
301 
302 	if (count <= rest_item_in_current_block) {
303 		*offset += bytes * count;
304 	} else {
305 		sector_t blocknr = (*pbh)->b_blocknr;
306 		unsigned int nitem_per_block = (*pbh)->b_size / bytes;
307 		unsigned int bcnt;
308 
309 		count -= rest_item_in_current_block;
310 		bcnt = DIV_ROUND_UP(count, nitem_per_block);
311 		*offset = bytes * (count - (bcnt - 1) * nitem_per_block);
312 
313 		brelse(*pbh);
314 		*pbh = sb_bread(sb, blocknr + bcnt);
315 	}
316 }
317 
318 static int
319 collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr,
320 			   struct nilfs_segsum_info *ssi,
321 			   struct list_head *head)
322 {
323 	struct buffer_head *bh;
324 	unsigned int offset;
325 	unsigned long nfinfo = ssi->nfinfo;
326 	sector_t blocknr = sum_blocknr + ssi->nsumblk;
327 	ino_t ino;
328 	int err = -EIO;
329 
330 	if (!nfinfo)
331 		return 0;
332 
333 	bh = sb_bread(sbi->s_super, sum_blocknr);
334 	if (unlikely(!bh))
335 		goto out;
336 
337 	offset = le16_to_cpu(
338 		((struct nilfs_segment_summary *)bh->b_data)->ss_bytes);
339 	for (;;) {
340 		unsigned long nblocks, ndatablk, nnodeblk;
341 		struct nilfs_finfo *finfo;
342 
343 		finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo));
344 		if (unlikely(!finfo))
345 			goto out;
346 
347 		ino = le64_to_cpu(finfo->fi_ino);
348 		nblocks = le32_to_cpu(finfo->fi_nblocks);
349 		ndatablk = le32_to_cpu(finfo->fi_ndatablk);
350 		nnodeblk = nblocks - ndatablk;
351 
352 		while (ndatablk-- > 0) {
353 			struct nilfs_recovery_block *rb;
354 			struct nilfs_binfo_v *binfo;
355 
356 			binfo = segsum_get(sbi->s_super, &bh, &offset,
357 					   sizeof(*binfo));
358 			if (unlikely(!binfo))
359 				goto out;
360 
361 			rb = kmalloc(sizeof(*rb), GFP_NOFS);
362 			if (unlikely(!rb)) {
363 				err = -ENOMEM;
364 				goto out;
365 			}
366 			rb->ino = ino;
367 			rb->blocknr = blocknr++;
368 			rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr);
369 			rb->blkoff = le64_to_cpu(binfo->bi_blkoff);
370 			/* INIT_LIST_HEAD(&rb->list); */
371 			list_add_tail(&rb->list, head);
372 		}
373 		if (--nfinfo == 0)
374 			break;
375 		blocknr += nnodeblk; /* always 0 for the data sync segments */
376 		segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64),
377 			    nnodeblk);
378 		if (unlikely(!bh))
379 			goto out;
380 	}
381 	err = 0;
382  out:
383 	brelse(bh);   /* brelse(NULL) is just ignored */
384 	return err;
385 }
386 
387 static void dispose_recovery_list(struct list_head *head)
388 {
389 	while (!list_empty(head)) {
390 		struct nilfs_recovery_block *rb
391 			= list_entry(head->next,
392 				     struct nilfs_recovery_block, list);
393 		list_del(&rb->list);
394 		kfree(rb);
395 	}
396 }
397 
398 void nilfs_dispose_segment_list(struct list_head *head)
399 {
400 	while (!list_empty(head)) {
401 		struct nilfs_segment_entry *ent
402 			= list_entry(head->next,
403 				     struct nilfs_segment_entry, list);
404 		list_del(&ent->list);
405 		nilfs_free_segment_entry(ent);
406 	}
407 }
408 
409 static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
410 					      struct nilfs_sb_info *sbi,
411 					      struct nilfs_recovery_info *ri)
412 {
413 	struct list_head *head = &ri->ri_used_segments;
414 	struct nilfs_segment_entry *ent, *n;
415 	struct inode *sufile = nilfs->ns_sufile;
416 	__u64 segnum[4];
417 	int err;
418 	int i;
419 
420 	segnum[0] = nilfs->ns_segnum;
421 	segnum[1] = nilfs->ns_nextnum;
422 	segnum[2] = ri->ri_segnum;
423 	segnum[3] = ri->ri_nextnum;
424 
425 	nilfs_attach_writer(nilfs, sbi);
426 	/*
427 	 * Releasing the next segment of the latest super root.
428 	 * The next segment is invalidated by this recovery.
429 	 */
430 	err = nilfs_sufile_free(sufile, segnum[1]);
431 	if (unlikely(err))
432 		goto failed;
433 
434 	err = -ENOMEM;
435 	for (i = 1; i < 4; i++) {
436 		ent = nilfs_alloc_segment_entry(segnum[i]);
437 		if (unlikely(!ent))
438 			goto failed;
439 		list_add_tail(&ent->list, head);
440 	}
441 
442 	/*
443 	 * Collecting segments written after the latest super root.
444 	 * These are marked dirty to avoid being reallocated in the next write.
445 	 */
446 	list_for_each_entry_safe(ent, n, head, list) {
447 		if (ent->segnum != segnum[0]) {
448 			err = nilfs_sufile_scrap(sufile, ent->segnum);
449 			if (unlikely(err))
450 				goto failed;
451 		}
452 		list_del(&ent->list);
453 		nilfs_free_segment_entry(ent);
454 	}
455 
456 	/* Allocate new segments for recovery */
457 	err = nilfs_sufile_alloc(sufile, &segnum[0]);
458 	if (unlikely(err))
459 		goto failed;
460 
461 	nilfs->ns_pseg_offset = 0;
462 	nilfs->ns_seg_seq = ri->ri_seq + 2;
463 	nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0];
464 
465  failed:
466 	/* No need to recover sufile because it will be destroyed on error */
467 	nilfs_detach_writer(nilfs, sbi);
468 	return err;
469 }
470 
471 static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi,
472 				     struct nilfs_recovery_block *rb,
473 				     struct page *page)
474 {
475 	struct buffer_head *bh_org;
476 	void *kaddr;
477 
478 	bh_org = sb_bread(sbi->s_super, rb->blocknr);
479 	if (unlikely(!bh_org))
480 		return -EIO;
481 
482 	kaddr = kmap_atomic(page, KM_USER0);
483 	memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
484 	kunmap_atomic(kaddr, KM_USER0);
485 	brelse(bh_org);
486 	return 0;
487 }
488 
489 static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
490 				struct list_head *head,
491 				unsigned long *nr_salvaged_blocks)
492 {
493 	struct inode *inode;
494 	struct nilfs_recovery_block *rb, *n;
495 	unsigned blocksize = sbi->s_super->s_blocksize;
496 	struct page *page;
497 	loff_t pos;
498 	int err = 0, err2 = 0;
499 
500 	list_for_each_entry_safe(rb, n, head, list) {
501 		inode = nilfs_iget(sbi->s_super, rb->ino);
502 		if (IS_ERR(inode)) {
503 			err = PTR_ERR(inode);
504 			inode = NULL;
505 			goto failed_inode;
506 		}
507 
508 		pos = rb->blkoff << inode->i_blkbits;
509 		page = NULL;
510 		err = block_write_begin(NULL, inode->i_mapping, pos, blocksize,
511 					0, &page, NULL, nilfs_get_block);
512 		if (unlikely(err))
513 			goto failed_inode;
514 
515 		err = nilfs_recovery_copy_block(sbi, rb, page);
516 		if (unlikely(err))
517 			goto failed_page;
518 
519 		err = nilfs_set_file_dirty(sbi, inode, 1);
520 		if (unlikely(err))
521 			goto failed_page;
522 
523 		block_write_end(NULL, inode->i_mapping, pos, blocksize,
524 				blocksize, page, NULL);
525 
526 		unlock_page(page);
527 		page_cache_release(page);
528 
529 		(*nr_salvaged_blocks)++;
530 		goto next;
531 
532  failed_page:
533 		unlock_page(page);
534 		page_cache_release(page);
535 
536  failed_inode:
537 		printk(KERN_WARNING
538 		       "NILFS warning: error recovering data block "
539 		       "(err=%d, ino=%lu, block-offset=%llu)\n",
540 		       err, rb->ino, (unsigned long long)rb->blkoff);
541 		if (!err2)
542 			err2 = err;
543  next:
544 		iput(inode); /* iput(NULL) is just ignored */
545 		list_del_init(&rb->list);
546 		kfree(rb);
547 	}
548 	return err2;
549 }
550 
551 /**
552  * nilfs_do_roll_forward - salvage logical segments newer than the latest
553  * checkpoint
554  * @sbi: nilfs_sb_info
555  * @nilfs: the_nilfs
556  * @ri: pointer to a nilfs_recovery_info
557  */
558 static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
559 				 struct nilfs_sb_info *sbi,
560 				 struct nilfs_recovery_info *ri)
561 {
562 	struct nilfs_segsum_info ssi;
563 	sector_t pseg_start;
564 	sector_t seg_start, seg_end;  /* Starting/ending DBN of full segment */
565 	unsigned long nsalvaged_blocks = 0;
566 	u64 seg_seq;
567 	__u64 segnum, nextnum = 0;
568 	int empty_seg = 0;
569 	int err = 0, ret;
570 	LIST_HEAD(dsync_blocks);  /* list of data blocks to be recovered */
571 	enum {
572 		RF_INIT_ST,
573 		RF_DSYNC_ST,   /* scanning data-sync segments */
574 	};
575 	int state = RF_INIT_ST;
576 
577 	nilfs_attach_writer(nilfs, sbi);
578 	pseg_start = ri->ri_lsegs_start;
579 	seg_seq = ri->ri_lsegs_start_seq;
580 	segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
581 	nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
582 
583 	while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) {
584 
585 		ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
586 		if (ret) {
587 			if (ret == NILFS_SEG_FAIL_IO) {
588 				err = -EIO;
589 				goto failed;
590 			}
591 			goto strayed;
592 		}
593 		if (unlikely(NILFS_SEG_HAS_SR(&ssi)))
594 			goto confused;
595 
596 		/* Found a valid partial segment; do recovery actions */
597 		nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
598 		empty_seg = 0;
599 		nilfs->ns_ctime = ssi.ctime;
600 		if (!(ssi.flags & NILFS_SS_GC))
601 			nilfs->ns_nongc_ctime = ssi.ctime;
602 
603 		switch (state) {
604 		case RF_INIT_ST:
605 			if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi))
606 				goto try_next_pseg;
607 			state = RF_DSYNC_ST;
608 			/* Fall through */
609 		case RF_DSYNC_ST:
610 			if (!NILFS_SEG_DSYNC(&ssi))
611 				goto confused;
612 
613 			err = collect_blocks_from_segsum(
614 				sbi, pseg_start, &ssi, &dsync_blocks);
615 			if (unlikely(err))
616 				goto failed;
617 			if (NILFS_SEG_LOGEND(&ssi)) {
618 				err = recover_dsync_blocks(
619 					sbi, &dsync_blocks, &nsalvaged_blocks);
620 				if (unlikely(err))
621 					goto failed;
622 				state = RF_INIT_ST;
623 			}
624 			break; /* Fall through to try_next_pseg */
625 		}
626 
627  try_next_pseg:
628 		if (pseg_start == ri->ri_lsegs_end)
629 			break;
630 		pseg_start += ssi.nblocks;
631 		if (pseg_start < seg_end)
632 			continue;
633 		goto feed_segment;
634 
635  strayed:
636 		if (pseg_start == ri->ri_lsegs_end)
637 			break;
638 
639  feed_segment:
640 		/* Looking to the next full segment */
641 		if (empty_seg++)
642 			break;
643 		seg_seq++;
644 		segnum = nextnum;
645 		nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
646 		pseg_start = seg_start;
647 	}
648 
649 	if (nsalvaged_blocks) {
650 		printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
651 		       sbi->s_super->s_id, nsalvaged_blocks);
652 		ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
653 	}
654  out:
655 	dispose_recovery_list(&dsync_blocks);
656 	nilfs_detach_writer(sbi->s_nilfs, sbi);
657 	return err;
658 
659  confused:
660 	err = -EINVAL;
661  failed:
662 	printk(KERN_ERR
663 	       "NILFS (device %s): Error roll-forwarding "
664 	       "(err=%d, pseg block=%llu). ",
665 	       sbi->s_super->s_id, err, (unsigned long long)pseg_start);
666 	goto out;
667 }
668 
669 static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
670 				      struct nilfs_sb_info *sbi,
671 				      struct nilfs_recovery_info *ri)
672 {
673 	struct buffer_head *bh;
674 	int err;
675 
676 	if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) !=
677 	    nilfs_get_segnum_of_block(nilfs, ri->ri_super_root))
678 		return;
679 
680 	bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start);
681 	BUG_ON(!bh);
682 	memset(bh->b_data, 0, bh->b_size);
683 	set_buffer_dirty(bh);
684 	err = sync_dirty_buffer(bh);
685 	if (unlikely(err))
686 		printk(KERN_WARNING
687 		       "NILFS warning: buffer sync write failed during "
688 		       "post-cleaning of recovery.\n");
689 	brelse(bh);
690 }
691 
692 /**
693  * nilfs_recover_logical_segments - salvage logical segments written after
694  * the latest super root
695  * @nilfs: the_nilfs
696  * @sbi: nilfs_sb_info
697  * @ri: pointer to a nilfs_recovery_info struct to store search results.
698  *
699  * Return Value: On success, 0 is returned.  On error, one of the following
700  * negative error code is returned.
701  *
702  * %-EINVAL - Inconsistent filesystem state.
703  *
704  * %-EIO - I/O error
705  *
706  * %-ENOSPC - No space left on device (only in a panic state).
707  *
708  * %-ERESTARTSYS - Interrupted.
709  *
710  * %-ENOMEM - Insufficient memory available.
711  */
712 int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
713 				   struct nilfs_sb_info *sbi,
714 				   struct nilfs_recovery_info *ri)
715 {
716 	int err;
717 
718 	if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
719 		return 0;
720 
721 	err = nilfs_attach_checkpoint(sbi, ri->ri_cno);
722 	if (unlikely(err)) {
723 		printk(KERN_ERR
724 		       "NILFS: error loading the latest checkpoint.\n");
725 		return err;
726 	}
727 
728 	err = nilfs_do_roll_forward(nilfs, sbi, ri);
729 	if (unlikely(err))
730 		goto failed;
731 
732 	if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
733 		err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri);
734 		if (unlikely(err)) {
735 			printk(KERN_ERR "NILFS: Error preparing segments for "
736 			       "recovery.\n");
737 			goto failed;
738 		}
739 
740 		err = nilfs_attach_segment_constructor(sbi);
741 		if (unlikely(err))
742 			goto failed;
743 
744 		set_nilfs_discontinued(nilfs);
745 		err = nilfs_construct_segment(sbi->s_super);
746 		nilfs_detach_segment_constructor(sbi);
747 
748 		if (unlikely(err)) {
749 			printk(KERN_ERR "NILFS: Oops! recovery failed. "
750 			       "(err=%d)\n", err);
751 			goto failed;
752 		}
753 
754 		nilfs_finish_roll_forward(nilfs, sbi, ri);
755 	}
756 
757 	nilfs_detach_checkpoint(sbi);
758 	return 0;
759 
760  failed:
761 	nilfs_detach_checkpoint(sbi);
762 	nilfs_mdt_clear(nilfs->ns_cpfile);
763 	nilfs_mdt_clear(nilfs->ns_sufile);
764 	nilfs_mdt_clear(nilfs->ns_dat);
765 	return err;
766 }
767 
768 /**
769  * nilfs_search_super_root - search the latest valid super root
770  * @nilfs: the_nilfs
771  * @sbi: nilfs_sb_info
772  * @ri: pointer to a nilfs_recovery_info struct to store search results.
773  *
774  * nilfs_search_super_root() looks for the latest super-root from a partial
775  * segment pointed by the superblock.  It sets up struct the_nilfs through
776  * this search. It fills nilfs_recovery_info (ri) required for recovery.
777  *
778  * Return Value: On success, 0 is returned.  On error, one of the following
779  * negative error code is returned.
780  *
781  * %-EINVAL - No valid segment found
782  *
783  * %-EIO - I/O error
784  */
785 int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
786 			    struct nilfs_recovery_info *ri)
787 {
788 	struct nilfs_segsum_info ssi;
789 	sector_t pseg_start, pseg_end, sr_pseg_start = 0;
790 	sector_t seg_start, seg_end; /* range of full segment (block number) */
791 	u64 seg_seq;
792 	__u64 segnum, nextnum = 0;
793 	__u64 cno;
794 	struct nilfs_segment_entry *ent;
795 	LIST_HEAD(segments);
796 	int empty_seg = 0, scan_newer = 0;
797 	int ret;
798 
799 	pseg_start = nilfs->ns_last_pseg;
800 	seg_seq = nilfs->ns_last_seq;
801 	cno = nilfs->ns_last_cno;
802 	segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
803 
804 	/* Calculate range of segment */
805 	nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
806 
807 	for (;;) {
808 		/* Load segment summary */
809 		ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
810 		if (ret) {
811 			if (ret == NILFS_SEG_FAIL_IO)
812 				goto failed;
813 			goto strayed;
814 		}
815 		pseg_end = pseg_start + ssi.nblocks - 1;
816 		if (unlikely(pseg_end > seg_end)) {
817 			ret = NILFS_SEG_FAIL_CONSISTENCY;
818 			goto strayed;
819 		}
820 
821 		/* A valid partial segment */
822 		ri->ri_pseg_start = pseg_start;
823 		ri->ri_seq = seg_seq;
824 		ri->ri_segnum = segnum;
825 		nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
826 		ri->ri_nextnum = nextnum;
827 		empty_seg = 0;
828 
829 		if (!NILFS_SEG_HAS_SR(&ssi)) {
830 			if (!scan_newer) {
831 				/* This will never happen because a superblock
832 				   (last_segment) always points to a pseg
833 				   having a super root. */
834 				ret = NILFS_SEG_FAIL_CONSISTENCY;
835 				goto failed;
836 			}
837 			if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) {
838 				ri->ri_lsegs_start = pseg_start;
839 				ri->ri_lsegs_start_seq = seg_seq;
840 			}
841 			if (NILFS_SEG_LOGEND(&ssi))
842 				ri->ri_lsegs_end = pseg_start;
843 			goto try_next_pseg;
844 		}
845 
846 		/* A valid super root was found. */
847 		ri->ri_cno = cno++;
848 		ri->ri_super_root = pseg_end;
849 		ri->ri_lsegs_start = ri->ri_lsegs_end = 0;
850 
851 		nilfs_dispose_segment_list(&segments);
852 		nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start)
853 			+ ssi.nblocks - seg_start;
854 		nilfs->ns_seg_seq = seg_seq;
855 		nilfs->ns_segnum = segnum;
856 		nilfs->ns_cno = cno;  /* nilfs->ns_cno = ri->ri_cno + 1 */
857 		nilfs->ns_ctime = ssi.ctime;
858 		nilfs->ns_nextnum = nextnum;
859 
860 		if (scan_newer)
861 			ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED;
862 		else {
863 			if (nilfs->ns_mount_state & NILFS_VALID_FS)
864 				goto super_root_found;
865 			scan_newer = 1;
866 		}
867 
868 		/* reset region for roll-forward */
869 		pseg_start += ssi.nblocks;
870 		if (pseg_start < seg_end)
871 			continue;
872 		goto feed_segment;
873 
874  try_next_pseg:
875 		/* Standing on a course, or met an inconsistent state */
876 		pseg_start += ssi.nblocks;
877 		if (pseg_start < seg_end)
878 			continue;
879 		goto feed_segment;
880 
881  strayed:
882 		/* Off the trail */
883 		if (!scan_newer)
884 			/*
885 			 * This can happen if a checkpoint was written without
886 			 * barriers, or as a result of an I/O failure.
887 			 */
888 			goto failed;
889 
890  feed_segment:
891 		/* Looking to the next full segment */
892 		if (empty_seg++)
893 			goto super_root_found; /* found a valid super root */
894 
895 		ent = nilfs_alloc_segment_entry(segnum);
896 		if (unlikely(!ent)) {
897 			ret = -ENOMEM;
898 			goto failed;
899 		}
900 		list_add_tail(&ent->list, &segments);
901 
902 		seg_seq++;
903 		segnum = nextnum;
904 		nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
905 		pseg_start = seg_start;
906 	}
907 
908  super_root_found:
909 	/* Updating pointers relating to the latest checkpoint */
910 	list_splice(&segments, ri->ri_used_segments.prev);
911 	nilfs->ns_last_pseg = sr_pseg_start;
912 	nilfs->ns_last_seq = nilfs->ns_seg_seq;
913 	nilfs->ns_last_cno = ri->ri_cno;
914 	return 0;
915 
916  failed:
917 	nilfs_dispose_segment_list(&segments);
918 	return (ret < 0) ? ret : nilfs_warn_segment_error(ret);
919 }
920