xref: /openbmc/u-boot/fs/ubifs/replay.c (revision 4614b891)
1 /*
2  * This file is part of UBIFS.
3  *
4  * Copyright (C) 2006-2008 Nokia Corporation.
5  *
6  * SPDX-License-Identifier:	GPL-2.0+
7  *
8  * Authors: Adrian Hunter
9  *          Artem Bityutskiy (Битюцкий Артём)
10  */
11 
12 /*
13  * This file contains journal replay code. It runs when the file-system is being
14  * mounted and requires no locking.
15  *
16  * The larger is the journal, the longer it takes to scan it, so the longer it
17  * takes to mount UBIFS. This is why the journal has limited size which may be
18  * changed depending on the system requirements. But a larger journal gives
19  * faster I/O speed because it writes the index less frequently. So this is a
20  * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
21  * larger is the journal, the more memory its index may consume.
22  */
23 
24 #ifdef __UBOOT__
25 #include <linux/compat.h>
26 #include <linux/err.h>
27 #endif
28 #include "ubifs.h"
29 #include <linux/list_sort.h>
30 
31 /**
32  * struct replay_entry - replay list entry.
33  * @lnum: logical eraseblock number of the node
34  * @offs: node offset
35  * @len: node length
36  * @deletion: non-zero if this entry corresponds to a node deletion
37  * @sqnum: node sequence number
38  * @list: links the replay list
39  * @key: node key
40  * @nm: directory entry name
41  * @old_size: truncation old size
42  * @new_size: truncation new size
43  *
44  * The replay process first scans all buds and builds the replay list, then
45  * sorts the replay list in nodes sequence number order, and then inserts all
46  * the replay entries to the TNC.
47  */
48 struct replay_entry {
49 	int lnum;
50 	int offs;
51 	int len;
52 	unsigned int deletion:1;
53 	unsigned long long sqnum;
54 	struct list_head list;
55 	union ubifs_key key;
56 	union {
57 		struct qstr nm;
58 		struct {
59 			loff_t old_size;
60 			loff_t new_size;
61 		};
62 	};
63 };
64 
65 /**
66  * struct bud_entry - entry in the list of buds to replay.
67  * @list: next bud in the list
68  * @bud: bud description object
69  * @sqnum: reference node sequence number
70  * @free: free bytes in the bud
71  * @dirty: dirty bytes in the bud
72  */
73 struct bud_entry {
74 	struct list_head list;
75 	struct ubifs_bud *bud;
76 	unsigned long long sqnum;
77 	int free;
78 	int dirty;
79 };
80 
81 /**
82  * set_bud_lprops - set free and dirty space used by a bud.
83  * @c: UBIFS file-system description object
84  * @b: bud entry which describes the bud
85  *
86  * This function makes sure the LEB properties of bud @b are set correctly
87  * after the replay. Returns zero in case of success and a negative error code
88  * in case of failure.
89  */
90 static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
91 {
92 	const struct ubifs_lprops *lp;
93 	int err = 0, dirty;
94 
95 	ubifs_get_lprops(c);
96 
97 	lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
98 	if (IS_ERR(lp)) {
99 		err = PTR_ERR(lp);
100 		goto out;
101 	}
102 
103 	dirty = lp->dirty;
104 	if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
105 		/*
106 		 * The LEB was added to the journal with a starting offset of
107 		 * zero which means the LEB must have been empty. The LEB
108 		 * property values should be @lp->free == @c->leb_size and
109 		 * @lp->dirty == 0, but that is not the case. The reason is that
110 		 * the LEB had been garbage collected before it became the bud,
111 		 * and there was not commit inbetween. The garbage collector
112 		 * resets the free and dirty space without recording it
113 		 * anywhere except lprops, so if there was no commit then
114 		 * lprops does not have that information.
115 		 *
116 		 * We do not need to adjust free space because the scan has told
117 		 * us the exact value which is recorded in the replay entry as
118 		 * @b->free.
119 		 *
120 		 * However we do need to subtract from the dirty space the
121 		 * amount of space that the garbage collector reclaimed, which
122 		 * is the whole LEB minus the amount of space that was free.
123 		 */
124 		dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
125 			lp->free, lp->dirty);
126 		dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
127 			lp->free, lp->dirty);
128 		dirty -= c->leb_size - lp->free;
129 		/*
130 		 * If the replay order was perfect the dirty space would now be
131 		 * zero. The order is not perfect because the journal heads
132 		 * race with each other. This is not a problem but is does mean
133 		 * that the dirty space may temporarily exceed c->leb_size
134 		 * during the replay.
135 		 */
136 		if (dirty != 0)
137 			dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty",
138 				b->bud->lnum, lp->free, lp->dirty, b->free,
139 				b->dirty);
140 	}
141 	lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
142 			     lp->flags | LPROPS_TAKEN, 0);
143 	if (IS_ERR(lp)) {
144 		err = PTR_ERR(lp);
145 		goto out;
146 	}
147 
148 	/* Make sure the journal head points to the latest bud */
149 	err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
150 				     b->bud->lnum, c->leb_size - b->free);
151 
152 out:
153 	ubifs_release_lprops(c);
154 	return err;
155 }
156 
157 /**
158  * set_buds_lprops - set free and dirty space for all replayed buds.
159  * @c: UBIFS file-system description object
160  *
161  * This function sets LEB properties for all replayed buds. Returns zero in
162  * case of success and a negative error code in case of failure.
163  */
164 static int set_buds_lprops(struct ubifs_info *c)
165 {
166 	struct bud_entry *b;
167 	int err;
168 
169 	list_for_each_entry(b, &c->replay_buds, list) {
170 		err = set_bud_lprops(c, b);
171 		if (err)
172 			return err;
173 	}
174 
175 	return 0;
176 }
177 
178 /**
179  * trun_remove_range - apply a replay entry for a truncation to the TNC.
180  * @c: UBIFS file-system description object
181  * @r: replay entry of truncation
182  */
183 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
184 {
185 	unsigned min_blk, max_blk;
186 	union ubifs_key min_key, max_key;
187 	ino_t ino;
188 
189 	min_blk = r->new_size / UBIFS_BLOCK_SIZE;
190 	if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
191 		min_blk += 1;
192 
193 	max_blk = r->old_size / UBIFS_BLOCK_SIZE;
194 	if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
195 		max_blk -= 1;
196 
197 	ino = key_inum(c, &r->key);
198 
199 	data_key_init(c, &min_key, ino, min_blk);
200 	data_key_init(c, &max_key, ino, max_blk);
201 
202 	return ubifs_tnc_remove_range(c, &min_key, &max_key);
203 }
204 
205 /**
206  * apply_replay_entry - apply a replay entry to the TNC.
207  * @c: UBIFS file-system description object
208  * @r: replay entry to apply
209  *
210  * Apply a replay entry to the TNC.
211  */
212 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
213 {
214 	int err;
215 
216 	dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ",
217 		 r->lnum, r->offs, r->len, r->deletion, r->sqnum);
218 
219 	/* Set c->replay_sqnum to help deal with dangling branches. */
220 	c->replay_sqnum = r->sqnum;
221 
222 	if (is_hash_key(c, &r->key)) {
223 		if (r->deletion)
224 			err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
225 		else
226 			err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
227 					       r->len, &r->nm);
228 	} else {
229 		if (r->deletion)
230 			switch (key_type(c, &r->key)) {
231 			case UBIFS_INO_KEY:
232 			{
233 				ino_t inum = key_inum(c, &r->key);
234 
235 				err = ubifs_tnc_remove_ino(c, inum);
236 				break;
237 			}
238 			case UBIFS_TRUN_KEY:
239 				err = trun_remove_range(c, r);
240 				break;
241 			default:
242 				err = ubifs_tnc_remove(c, &r->key);
243 				break;
244 			}
245 		else
246 			err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
247 					    r->len);
248 		if (err)
249 			return err;
250 
251 		if (c->need_recovery)
252 			err = ubifs_recover_size_accum(c, &r->key, r->deletion,
253 						       r->new_size);
254 	}
255 
256 	return err;
257 }
258 
259 /**
260  * replay_entries_cmp - compare 2 replay entries.
261  * @priv: UBIFS file-system description object
262  * @a: first replay entry
263  * @a: second replay entry
264  *
265  * This is a comparios function for 'list_sort()' which compares 2 replay
266  * entries @a and @b by comparing their sequence numer.  Returns %1 if @a has
267  * greater sequence number and %-1 otherwise.
268  */
269 static int replay_entries_cmp(void *priv, struct list_head *a,
270 			      struct list_head *b)
271 {
272 	struct replay_entry *ra, *rb;
273 
274 	cond_resched();
275 	if (a == b)
276 		return 0;
277 
278 	ra = list_entry(a, struct replay_entry, list);
279 	rb = list_entry(b, struct replay_entry, list);
280 	ubifs_assert(ra->sqnum != rb->sqnum);
281 	if (ra->sqnum > rb->sqnum)
282 		return 1;
283 	return -1;
284 }
285 
286 /**
287  * apply_replay_list - apply the replay list to the TNC.
288  * @c: UBIFS file-system description object
289  *
290  * Apply all entries in the replay list to the TNC. Returns zero in case of
291  * success and a negative error code in case of failure.
292  */
293 static int apply_replay_list(struct ubifs_info *c)
294 {
295 	struct replay_entry *r;
296 	int err;
297 
298 	list_sort(c, &c->replay_list, &replay_entries_cmp);
299 
300 	list_for_each_entry(r, &c->replay_list, list) {
301 		cond_resched();
302 
303 		err = apply_replay_entry(c, r);
304 		if (err)
305 			return err;
306 	}
307 
308 	return 0;
309 }
310 
311 /**
312  * destroy_replay_list - destroy the replay.
313  * @c: UBIFS file-system description object
314  *
315  * Destroy the replay list.
316  */
317 static void destroy_replay_list(struct ubifs_info *c)
318 {
319 	struct replay_entry *r, *tmp;
320 
321 	list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
322 		if (is_hash_key(c, &r->key))
323 			kfree(r->nm.name);
324 		list_del(&r->list);
325 		kfree(r);
326 	}
327 }
328 
329 /**
330  * insert_node - insert a node to the replay list
331  * @c: UBIFS file-system description object
332  * @lnum: node logical eraseblock number
333  * @offs: node offset
334  * @len: node length
335  * @key: node key
336  * @sqnum: sequence number
337  * @deletion: non-zero if this is a deletion
338  * @used: number of bytes in use in a LEB
339  * @old_size: truncation old size
340  * @new_size: truncation new size
341  *
342  * This function inserts a scanned non-direntry node to the replay list. The
343  * replay list contains @struct replay_entry elements, and we sort this list in
344  * sequence number order before applying it. The replay list is applied at the
345  * very end of the replay process. Since the list is sorted in sequence number
346  * order, the older modifications are applied first. This function returns zero
347  * in case of success and a negative error code in case of failure.
348  */
349 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
350 		       union ubifs_key *key, unsigned long long sqnum,
351 		       int deletion, int *used, loff_t old_size,
352 		       loff_t new_size)
353 {
354 	struct replay_entry *r;
355 
356 	dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
357 
358 	if (key_inum(c, key) >= c->highest_inum)
359 		c->highest_inum = key_inum(c, key);
360 
361 	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
362 	if (!r)
363 		return -ENOMEM;
364 
365 	if (!deletion)
366 		*used += ALIGN(len, 8);
367 	r->lnum = lnum;
368 	r->offs = offs;
369 	r->len = len;
370 	r->deletion = !!deletion;
371 	r->sqnum = sqnum;
372 	key_copy(c, key, &r->key);
373 	r->old_size = old_size;
374 	r->new_size = new_size;
375 
376 	list_add_tail(&r->list, &c->replay_list);
377 	return 0;
378 }
379 
380 /**
381  * insert_dent - insert a directory entry node into the replay list.
382  * @c: UBIFS file-system description object
383  * @lnum: node logical eraseblock number
384  * @offs: node offset
385  * @len: node length
386  * @key: node key
387  * @name: directory entry name
388  * @nlen: directory entry name length
389  * @sqnum: sequence number
390  * @deletion: non-zero if this is a deletion
391  * @used: number of bytes in use in a LEB
392  *
393  * This function inserts a scanned directory entry node or an extended
394  * attribute entry to the replay list. Returns zero in case of success and a
395  * negative error code in case of failure.
396  */
397 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
398 		       union ubifs_key *key, const char *name, int nlen,
399 		       unsigned long long sqnum, int deletion, int *used)
400 {
401 	struct replay_entry *r;
402 	char *nbuf;
403 
404 	dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
405 	if (key_inum(c, key) >= c->highest_inum)
406 		c->highest_inum = key_inum(c, key);
407 
408 	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
409 	if (!r)
410 		return -ENOMEM;
411 
412 	nbuf = kmalloc(nlen + 1, GFP_KERNEL);
413 	if (!nbuf) {
414 		kfree(r);
415 		return -ENOMEM;
416 	}
417 
418 	if (!deletion)
419 		*used += ALIGN(len, 8);
420 	r->lnum = lnum;
421 	r->offs = offs;
422 	r->len = len;
423 	r->deletion = !!deletion;
424 	r->sqnum = sqnum;
425 	key_copy(c, key, &r->key);
426 	r->nm.len = nlen;
427 	memcpy(nbuf, name, nlen);
428 	nbuf[nlen] = '\0';
429 	r->nm.name = nbuf;
430 
431 	list_add_tail(&r->list, &c->replay_list);
432 	return 0;
433 }
434 
435 /**
436  * ubifs_validate_entry - validate directory or extended attribute entry node.
437  * @c: UBIFS file-system description object
438  * @dent: the node to validate
439  *
440  * This function validates directory or extended attribute entry node @dent.
441  * Returns zero if the node is all right and a %-EINVAL if not.
442  */
443 int ubifs_validate_entry(struct ubifs_info *c,
444 			 const struct ubifs_dent_node *dent)
445 {
446 	int key_type = key_type_flash(c, dent->key);
447 	int nlen = le16_to_cpu(dent->nlen);
448 
449 	if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
450 	    dent->type >= UBIFS_ITYPES_CNT ||
451 	    nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
452 	    strnlen(dent->name, nlen) != nlen ||
453 	    le64_to_cpu(dent->inum) > MAX_INUM) {
454 		ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ?
455 			  "directory entry" : "extended attribute entry");
456 		return -EINVAL;
457 	}
458 
459 	if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
460 		ubifs_err("bad key type %d", key_type);
461 		return -EINVAL;
462 	}
463 
464 	return 0;
465 }
466 
467 /**
468  * is_last_bud - check if the bud is the last in the journal head.
469  * @c: UBIFS file-system description object
470  * @bud: bud description object
471  *
472  * This function checks if bud @bud is the last bud in its journal head. This
473  * information is then used by 'replay_bud()' to decide whether the bud can
474  * have corruptions or not. Indeed, only last buds can be corrupted by power
475  * cuts. Returns %1 if this is the last bud, and %0 if not.
476  */
477 static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
478 {
479 	struct ubifs_jhead *jh = &c->jheads[bud->jhead];
480 	struct ubifs_bud *next;
481 	uint32_t data;
482 	int err;
483 
484 	if (list_is_last(&bud->list, &jh->buds_list))
485 		return 1;
486 
487 	/*
488 	 * The following is a quirk to make sure we work correctly with UBIFS
489 	 * images used with older UBIFS.
490 	 *
491 	 * Normally, the last bud will be the last in the journal head's list
492 	 * of bud. However, there is one exception if the UBIFS image belongs
493 	 * to older UBIFS. This is fairly unlikely: one would need to use old
494 	 * UBIFS, then have a power cut exactly at the right point, and then
495 	 * try to mount this image with new UBIFS.
496 	 *
497 	 * The exception is: it is possible to have 2 buds A and B, A goes
498 	 * before B, and B is the last, bud B is contains no data, and bud A is
499 	 * corrupted at the end. The reason is that in older versions when the
500 	 * journal code switched the next bud (from A to B), it first added a
501 	 * log reference node for the new bud (B), and only after this it
502 	 * synchronized the write-buffer of current bud (A). But later this was
503 	 * changed and UBIFS started to always synchronize the write-buffer of
504 	 * the bud (A) before writing the log reference for the new bud (B).
505 	 *
506 	 * But because older UBIFS always synchronized A's write-buffer before
507 	 * writing to B, we can recognize this exceptional situation but
508 	 * checking the contents of bud B - if it is empty, then A can be
509 	 * treated as the last and we can recover it.
510 	 *
511 	 * TODO: remove this piece of code in a couple of years (today it is
512 	 * 16.05.2011).
513 	 */
514 	next = list_entry(bud->list.next, struct ubifs_bud, list);
515 	if (!list_is_last(&next->list, &jh->buds_list))
516 		return 0;
517 
518 	err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1);
519 	if (err)
520 		return 0;
521 
522 	return data == 0xFFFFFFFF;
523 }
524 
525 /**
526  * replay_bud - replay a bud logical eraseblock.
527  * @c: UBIFS file-system description object
528  * @b: bud entry which describes the bud
529  *
530  * This function replays bud @bud, recovers it if needed, and adds all nodes
531  * from this bud to the replay list. Returns zero in case of success and a
532  * negative error code in case of failure.
533  */
534 static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
535 {
536 	int is_last = is_last_bud(c, b->bud);
537 	int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
538 	struct ubifs_scan_leb *sleb;
539 	struct ubifs_scan_node *snod;
540 
541 	dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
542 		lnum, b->bud->jhead, offs, is_last);
543 
544 	if (c->need_recovery && is_last)
545 		/*
546 		 * Recover only last LEBs in the journal heads, because power
547 		 * cuts may cause corruptions only in these LEBs, because only
548 		 * these LEBs could possibly be written to at the power cut
549 		 * time.
550 		 */
551 		sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
552 	else
553 		sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
554 	if (IS_ERR(sleb))
555 		return PTR_ERR(sleb);
556 
557 	/*
558 	 * The bud does not have to start from offset zero - the beginning of
559 	 * the 'lnum' LEB may contain previously committed data. One of the
560 	 * things we have to do in replay is to correctly update lprops with
561 	 * newer information about this LEB.
562 	 *
563 	 * At this point lprops thinks that this LEB has 'c->leb_size - offs'
564 	 * bytes of free space because it only contain information about
565 	 * committed data.
566 	 *
567 	 * But we know that real amount of free space is 'c->leb_size -
568 	 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
569 	 * 'sleb->endpt' is used by bud data. We have to correctly calculate
570 	 * how much of these data are dirty and update lprops with this
571 	 * information.
572 	 *
573 	 * The dirt in that LEB region is comprised of padding nodes, deletion
574 	 * nodes, truncation nodes and nodes which are obsoleted by subsequent
575 	 * nodes in this LEB. So instead of calculating clean space, we
576 	 * calculate used space ('used' variable).
577 	 */
578 
579 	list_for_each_entry(snod, &sleb->nodes, list) {
580 		int deletion = 0;
581 
582 		cond_resched();
583 
584 		if (snod->sqnum >= SQNUM_WATERMARK) {
585 			ubifs_err("file system's life ended");
586 			goto out_dump;
587 		}
588 
589 		if (snod->sqnum > c->max_sqnum)
590 			c->max_sqnum = snod->sqnum;
591 
592 		switch (snod->type) {
593 		case UBIFS_INO_NODE:
594 		{
595 			struct ubifs_ino_node *ino = snod->node;
596 			loff_t new_size = le64_to_cpu(ino->size);
597 
598 			if (le32_to_cpu(ino->nlink) == 0)
599 				deletion = 1;
600 			err = insert_node(c, lnum, snod->offs, snod->len,
601 					  &snod->key, snod->sqnum, deletion,
602 					  &used, 0, new_size);
603 			break;
604 		}
605 		case UBIFS_DATA_NODE:
606 		{
607 			struct ubifs_data_node *dn = snod->node;
608 			loff_t new_size = le32_to_cpu(dn->size) +
609 					  key_block(c, &snod->key) *
610 					  UBIFS_BLOCK_SIZE;
611 
612 			err = insert_node(c, lnum, snod->offs, snod->len,
613 					  &snod->key, snod->sqnum, deletion,
614 					  &used, 0, new_size);
615 			break;
616 		}
617 		case UBIFS_DENT_NODE:
618 		case UBIFS_XENT_NODE:
619 		{
620 			struct ubifs_dent_node *dent = snod->node;
621 
622 			err = ubifs_validate_entry(c, dent);
623 			if (err)
624 				goto out_dump;
625 
626 			err = insert_dent(c, lnum, snod->offs, snod->len,
627 					  &snod->key, dent->name,
628 					  le16_to_cpu(dent->nlen), snod->sqnum,
629 					  !le64_to_cpu(dent->inum), &used);
630 			break;
631 		}
632 		case UBIFS_TRUN_NODE:
633 		{
634 			struct ubifs_trun_node *trun = snod->node;
635 			loff_t old_size = le64_to_cpu(trun->old_size);
636 			loff_t new_size = le64_to_cpu(trun->new_size);
637 			union ubifs_key key;
638 
639 			/* Validate truncation node */
640 			if (old_size < 0 || old_size > c->max_inode_sz ||
641 			    new_size < 0 || new_size > c->max_inode_sz ||
642 			    old_size <= new_size) {
643 				ubifs_err("bad truncation node");
644 				goto out_dump;
645 			}
646 
647 			/*
648 			 * Create a fake truncation key just to use the same
649 			 * functions which expect nodes to have keys.
650 			 */
651 			trun_key_init(c, &key, le32_to_cpu(trun->inum));
652 			err = insert_node(c, lnum, snod->offs, snod->len,
653 					  &key, snod->sqnum, 1, &used,
654 					  old_size, new_size);
655 			break;
656 		}
657 		default:
658 			ubifs_err("unexpected node type %d in bud LEB %d:%d",
659 				  snod->type, lnum, snod->offs);
660 			err = -EINVAL;
661 			goto out_dump;
662 		}
663 		if (err)
664 			goto out;
665 	}
666 
667 	ubifs_assert(ubifs_search_bud(c, lnum));
668 	ubifs_assert(sleb->endpt - offs >= used);
669 	ubifs_assert(sleb->endpt % c->min_io_size == 0);
670 
671 	b->dirty = sleb->endpt - offs - used;
672 	b->free = c->leb_size - sleb->endpt;
673 	dbg_mnt("bud LEB %d replied: dirty %d, free %d",
674 		lnum, b->dirty, b->free);
675 
676 out:
677 	ubifs_scan_destroy(sleb);
678 	return err;
679 
680 out_dump:
681 	ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
682 	ubifs_dump_node(c, snod->node);
683 	ubifs_scan_destroy(sleb);
684 	return -EINVAL;
685 }
686 
687 /**
688  * replay_buds - replay all buds.
689  * @c: UBIFS file-system description object
690  *
691  * This function returns zero in case of success and a negative error code in
692  * case of failure.
693  */
694 static int replay_buds(struct ubifs_info *c)
695 {
696 	struct bud_entry *b;
697 	int err;
698 	unsigned long long prev_sqnum = 0;
699 
700 	list_for_each_entry(b, &c->replay_buds, list) {
701 		err = replay_bud(c, b);
702 		if (err)
703 			return err;
704 
705 		ubifs_assert(b->sqnum > prev_sqnum);
706 		prev_sqnum = b->sqnum;
707 	}
708 
709 	return 0;
710 }
711 
712 /**
713  * destroy_bud_list - destroy the list of buds to replay.
714  * @c: UBIFS file-system description object
715  */
716 static void destroy_bud_list(struct ubifs_info *c)
717 {
718 	struct bud_entry *b;
719 
720 	while (!list_empty(&c->replay_buds)) {
721 		b = list_entry(c->replay_buds.next, struct bud_entry, list);
722 		list_del(&b->list);
723 		kfree(b);
724 	}
725 }
726 
727 /**
728  * add_replay_bud - add a bud to the list of buds to replay.
729  * @c: UBIFS file-system description object
730  * @lnum: bud logical eraseblock number to replay
731  * @offs: bud start offset
732  * @jhead: journal head to which this bud belongs
733  * @sqnum: reference node sequence number
734  *
735  * This function returns zero in case of success and a negative error code in
736  * case of failure.
737  */
738 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
739 			  unsigned long long sqnum)
740 {
741 	struct ubifs_bud *bud;
742 	struct bud_entry *b;
743 
744 	dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
745 
746 	bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
747 	if (!bud)
748 		return -ENOMEM;
749 
750 	b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
751 	if (!b) {
752 		kfree(bud);
753 		return -ENOMEM;
754 	}
755 
756 	bud->lnum = lnum;
757 	bud->start = offs;
758 	bud->jhead = jhead;
759 	ubifs_add_bud(c, bud);
760 
761 	b->bud = bud;
762 	b->sqnum = sqnum;
763 	list_add_tail(&b->list, &c->replay_buds);
764 
765 	return 0;
766 }
767 
768 /**
769  * validate_ref - validate a reference node.
770  * @c: UBIFS file-system description object
771  * @ref: the reference node to validate
772  * @ref_lnum: LEB number of the reference node
773  * @ref_offs: reference node offset
774  *
775  * This function returns %1 if a bud reference already exists for the LEB. %0 is
776  * returned if the reference node is new, otherwise %-EINVAL is returned if
777  * validation failed.
778  */
779 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
780 {
781 	struct ubifs_bud *bud;
782 	int lnum = le32_to_cpu(ref->lnum);
783 	unsigned int offs = le32_to_cpu(ref->offs);
784 	unsigned int jhead = le32_to_cpu(ref->jhead);
785 
786 	/*
787 	 * ref->offs may point to the end of LEB when the journal head points
788 	 * to the end of LEB and we write reference node for it during commit.
789 	 * So this is why we require 'offs > c->leb_size'.
790 	 */
791 	if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
792 	    lnum < c->main_first || offs > c->leb_size ||
793 	    offs & (c->min_io_size - 1))
794 		return -EINVAL;
795 
796 	/* Make sure we have not already looked at this bud */
797 	bud = ubifs_search_bud(c, lnum);
798 	if (bud) {
799 		if (bud->jhead == jhead && bud->start <= offs)
800 			return 1;
801 		ubifs_err("bud at LEB %d:%d was already referred", lnum, offs);
802 		return -EINVAL;
803 	}
804 
805 	return 0;
806 }
807 
808 /**
809  * replay_log_leb - replay a log logical eraseblock.
810  * @c: UBIFS file-system description object
811  * @lnum: log logical eraseblock to replay
812  * @offs: offset to start replaying from
813  * @sbuf: scan buffer
814  *
815  * This function replays a log LEB and returns zero in case of success, %1 if
816  * this is the last LEB in the log, and a negative error code in case of
817  * failure.
818  */
819 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
820 {
821 	int err;
822 	struct ubifs_scan_leb *sleb;
823 	struct ubifs_scan_node *snod;
824 	const struct ubifs_cs_node *node;
825 
826 	dbg_mnt("replay log LEB %d:%d", lnum, offs);
827 	sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
828 	if (IS_ERR(sleb)) {
829 		if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
830 			return PTR_ERR(sleb);
831 		/*
832 		 * Note, the below function will recover this log LEB only if
833 		 * it is the last, because unclean reboots can possibly corrupt
834 		 * only the tail of the log.
835 		 */
836 		sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
837 		if (IS_ERR(sleb))
838 			return PTR_ERR(sleb);
839 	}
840 
841 	if (sleb->nodes_cnt == 0) {
842 		err = 1;
843 		goto out;
844 	}
845 
846 	node = sleb->buf;
847 	snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
848 	if (c->cs_sqnum == 0) {
849 		/*
850 		 * This is the first log LEB we are looking at, make sure that
851 		 * the first node is a commit start node. Also record its
852 		 * sequence number so that UBIFS can determine where the log
853 		 * ends, because all nodes which were have higher sequence
854 		 * numbers.
855 		 */
856 		if (snod->type != UBIFS_CS_NODE) {
857 			ubifs_err("first log node at LEB %d:%d is not CS node",
858 				  lnum, offs);
859 			goto out_dump;
860 		}
861 		if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
862 			ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
863 				  lnum, offs,
864 				  (unsigned long long)le64_to_cpu(node->cmt_no),
865 				  c->cmt_no);
866 			goto out_dump;
867 		}
868 
869 		c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
870 		dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
871 	}
872 
873 	if (snod->sqnum < c->cs_sqnum) {
874 		/*
875 		 * This means that we reached end of log and now
876 		 * look to the older log data, which was already
877 		 * committed but the eraseblock was not erased (UBIFS
878 		 * only un-maps it). So this basically means we have to
879 		 * exit with "end of log" code.
880 		 */
881 		err = 1;
882 		goto out;
883 	}
884 
885 	/* Make sure the first node sits at offset zero of the LEB */
886 	if (snod->offs != 0) {
887 		ubifs_err("first node is not at zero offset");
888 		goto out_dump;
889 	}
890 
891 	list_for_each_entry(snod, &sleb->nodes, list) {
892 		cond_resched();
893 
894 		if (snod->sqnum >= SQNUM_WATERMARK) {
895 			ubifs_err("file system's life ended");
896 			goto out_dump;
897 		}
898 
899 		if (snod->sqnum < c->cs_sqnum) {
900 			ubifs_err("bad sqnum %llu, commit sqnum %llu",
901 				  snod->sqnum, c->cs_sqnum);
902 			goto out_dump;
903 		}
904 
905 		if (snod->sqnum > c->max_sqnum)
906 			c->max_sqnum = snod->sqnum;
907 
908 		switch (snod->type) {
909 		case UBIFS_REF_NODE: {
910 			const struct ubifs_ref_node *ref = snod->node;
911 
912 			err = validate_ref(c, ref);
913 			if (err == 1)
914 				break; /* Already have this bud */
915 			if (err)
916 				goto out_dump;
917 
918 			err = add_replay_bud(c, le32_to_cpu(ref->lnum),
919 					     le32_to_cpu(ref->offs),
920 					     le32_to_cpu(ref->jhead),
921 					     snod->sqnum);
922 			if (err)
923 				goto out;
924 
925 			break;
926 		}
927 		case UBIFS_CS_NODE:
928 			/* Make sure it sits at the beginning of LEB */
929 			if (snod->offs != 0) {
930 				ubifs_err("unexpected node in log");
931 				goto out_dump;
932 			}
933 			break;
934 		default:
935 			ubifs_err("unexpected node in log");
936 			goto out_dump;
937 		}
938 	}
939 
940 	if (sleb->endpt || c->lhead_offs >= c->leb_size) {
941 		c->lhead_lnum = lnum;
942 		c->lhead_offs = sleb->endpt;
943 	}
944 
945 	err = !sleb->endpt;
946 out:
947 	ubifs_scan_destroy(sleb);
948 	return err;
949 
950 out_dump:
951 	ubifs_err("log error detected while replaying the log at LEB %d:%d",
952 		  lnum, offs + snod->offs);
953 	ubifs_dump_node(c, snod->node);
954 	ubifs_scan_destroy(sleb);
955 	return -EINVAL;
956 }
957 
958 /**
959  * take_ihead - update the status of the index head in lprops to 'taken'.
960  * @c: UBIFS file-system description object
961  *
962  * This function returns the amount of free space in the index head LEB or a
963  * negative error code.
964  */
965 static int take_ihead(struct ubifs_info *c)
966 {
967 	const struct ubifs_lprops *lp;
968 	int err, free;
969 
970 	ubifs_get_lprops(c);
971 
972 	lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
973 	if (IS_ERR(lp)) {
974 		err = PTR_ERR(lp);
975 		goto out;
976 	}
977 
978 	free = lp->free;
979 
980 	lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
981 			     lp->flags | LPROPS_TAKEN, 0);
982 	if (IS_ERR(lp)) {
983 		err = PTR_ERR(lp);
984 		goto out;
985 	}
986 
987 	err = free;
988 out:
989 	ubifs_release_lprops(c);
990 	return err;
991 }
992 
993 /**
994  * ubifs_replay_journal - replay journal.
995  * @c: UBIFS file-system description object
996  *
997  * This function scans the journal, replays and cleans it up. It makes sure all
998  * memory data structures related to uncommitted journal are built (dirty TNC
999  * tree, tree of buds, modified lprops, etc).
1000  */
1001 int ubifs_replay_journal(struct ubifs_info *c)
1002 {
1003 	int err, lnum, free;
1004 
1005 	BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
1006 
1007 	/* Update the status of the index head in lprops to 'taken' */
1008 	free = take_ihead(c);
1009 	if (free < 0)
1010 		return free; /* Error code */
1011 
1012 	if (c->ihead_offs != c->leb_size - free) {
1013 		ubifs_err("bad index head LEB %d:%d", c->ihead_lnum,
1014 			  c->ihead_offs);
1015 		return -EINVAL;
1016 	}
1017 
1018 	dbg_mnt("start replaying the journal");
1019 	c->replaying = 1;
1020 	lnum = c->ltail_lnum = c->lhead_lnum;
1021 
1022 	do {
1023 		err = replay_log_leb(c, lnum, 0, c->sbuf);
1024 		if (err == 1)
1025 			/* We hit the end of the log */
1026 			break;
1027 		if (err)
1028 			goto out;
1029 		lnum = ubifs_next_log_lnum(c, lnum);
1030 	} while (lnum != c->ltail_lnum);
1031 
1032 	err = replay_buds(c);
1033 	if (err)
1034 		goto out;
1035 
1036 	err = apply_replay_list(c);
1037 	if (err)
1038 		goto out;
1039 
1040 	err = set_buds_lprops(c);
1041 	if (err)
1042 		goto out;
1043 
1044 	/*
1045 	 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
1046 	 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
1047 	 * depend on it. This means we have to initialize it to make sure
1048 	 * budgeting works properly.
1049 	 */
1050 	c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1051 	c->bi.uncommitted_idx *= c->max_idx_node_sz;
1052 
1053 	ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1054 	dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu",
1055 		c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1056 		(unsigned long)c->highest_inum);
1057 out:
1058 	destroy_replay_list(c);
1059 	destroy_bud_list(c);
1060 	c->replaying = 0;
1061 	return err;
1062 }
1063