1 /* 2 * This file is part of UBIFS. 3 * 4 * Copyright (C) 2006-2008 Nokia Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published by 8 * the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program; if not, write to the Free Software Foundation, Inc., 51 17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 * 19 * Authors: Adrian Hunter 20 * Artem Bityutskiy (Битюцкий Артём) 21 */ 22 23 /* 24 * This file contains journal replay code. It runs when the file-system is being 25 * mounted and requires no locking. 26 * 27 * The larger is the journal, the longer it takes to scan it, so the longer it 28 * takes to mount UBIFS. This is why the journal has limited size which may be 29 * changed depending on the system requirements. But a larger journal gives 30 * faster I/O speed because it writes the index less frequently. So this is a 31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 32 * larger is the journal, the more memory its index may consume. 33 */ 34 35 #include "ubifs.h" 36 #include <linux/list_sort.h> 37 38 /** 39 * struct replay_entry - replay list entry. 40 * @lnum: logical eraseblock number of the node 41 * @offs: node offset 42 * @len: node length 43 * @deletion: non-zero if this entry corresponds to a node deletion 44 * @sqnum: node sequence number 45 * @list: links the replay list 46 * @key: node key 47 * @nm: directory entry name 48 * @old_size: truncation old size 49 * @new_size: truncation new size 50 * 51 * The replay process first scans all buds and builds the replay list, then 52 * sorts the replay list in nodes sequence number order, and then inserts all 53 * the replay entries to the TNC. 54 */ 55 struct replay_entry { 56 int lnum; 57 int offs; 58 int len; 59 unsigned int deletion:1; 60 unsigned long long sqnum; 61 struct list_head list; 62 union ubifs_key key; 63 union { 64 struct fscrypt_name nm; 65 struct { 66 loff_t old_size; 67 loff_t new_size; 68 }; 69 }; 70 }; 71 72 /** 73 * struct bud_entry - entry in the list of buds to replay. 74 * @list: next bud in the list 75 * @bud: bud description object 76 * @sqnum: reference node sequence number 77 * @free: free bytes in the bud 78 * @dirty: dirty bytes in the bud 79 */ 80 struct bud_entry { 81 struct list_head list; 82 struct ubifs_bud *bud; 83 unsigned long long sqnum; 84 int free; 85 int dirty; 86 }; 87 88 /** 89 * set_bud_lprops - set free and dirty space used by a bud. 90 * @c: UBIFS file-system description object 91 * @b: bud entry which describes the bud 92 * 93 * This function makes sure the LEB properties of bud @b are set correctly 94 * after the replay. Returns zero in case of success and a negative error code 95 * in case of failure. 96 */ 97 static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) 98 { 99 const struct ubifs_lprops *lp; 100 int err = 0, dirty; 101 102 ubifs_get_lprops(c); 103 104 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); 105 if (IS_ERR(lp)) { 106 err = PTR_ERR(lp); 107 goto out; 108 } 109 110 dirty = lp->dirty; 111 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 112 /* 113 * The LEB was added to the journal with a starting offset of 114 * zero which means the LEB must have been empty. The LEB 115 * property values should be @lp->free == @c->leb_size and 116 * @lp->dirty == 0, but that is not the case. The reason is that 117 * the LEB had been garbage collected before it became the bud, 118 * and there was not commit inbetween. The garbage collector 119 * resets the free and dirty space without recording it 120 * anywhere except lprops, so if there was no commit then 121 * lprops does not have that information. 122 * 123 * We do not need to adjust free space because the scan has told 124 * us the exact value which is recorded in the replay entry as 125 * @b->free. 126 * 127 * However we do need to subtract from the dirty space the 128 * amount of space that the garbage collector reclaimed, which 129 * is the whole LEB minus the amount of space that was free. 130 */ 131 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 132 lp->free, lp->dirty); 133 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 134 lp->free, lp->dirty); 135 dirty -= c->leb_size - lp->free; 136 /* 137 * If the replay order was perfect the dirty space would now be 138 * zero. The order is not perfect because the journal heads 139 * race with each other. This is not a problem but is does mean 140 * that the dirty space may temporarily exceed c->leb_size 141 * during the replay. 142 */ 143 if (dirty != 0) 144 dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", 145 b->bud->lnum, lp->free, lp->dirty, b->free, 146 b->dirty); 147 } 148 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, 149 lp->flags | LPROPS_TAKEN, 0); 150 if (IS_ERR(lp)) { 151 err = PTR_ERR(lp); 152 goto out; 153 } 154 155 /* Make sure the journal head points to the latest bud */ 156 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, 157 b->bud->lnum, c->leb_size - b->free); 158 159 out: 160 ubifs_release_lprops(c); 161 return err; 162 } 163 164 /** 165 * set_buds_lprops - set free and dirty space for all replayed buds. 166 * @c: UBIFS file-system description object 167 * 168 * This function sets LEB properties for all replayed buds. Returns zero in 169 * case of success and a negative error code in case of failure. 170 */ 171 static int set_buds_lprops(struct ubifs_info *c) 172 { 173 struct bud_entry *b; 174 int err; 175 176 list_for_each_entry(b, &c->replay_buds, list) { 177 err = set_bud_lprops(c, b); 178 if (err) 179 return err; 180 } 181 182 return 0; 183 } 184 185 /** 186 * trun_remove_range - apply a replay entry for a truncation to the TNC. 187 * @c: UBIFS file-system description object 188 * @r: replay entry of truncation 189 */ 190 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 191 { 192 unsigned min_blk, max_blk; 193 union ubifs_key min_key, max_key; 194 ino_t ino; 195 196 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 197 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 198 min_blk += 1; 199 200 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 201 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 202 max_blk -= 1; 203 204 ino = key_inum(c, &r->key); 205 206 data_key_init(c, &min_key, ino, min_blk); 207 data_key_init(c, &max_key, ino, max_blk); 208 209 return ubifs_tnc_remove_range(c, &min_key, &max_key); 210 } 211 212 /** 213 * apply_replay_entry - apply a replay entry to the TNC. 214 * @c: UBIFS file-system description object 215 * @r: replay entry to apply 216 * 217 * Apply a replay entry to the TNC. 218 */ 219 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 220 { 221 int err; 222 223 dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", 224 r->lnum, r->offs, r->len, r->deletion, r->sqnum); 225 226 if (is_hash_key(c, &r->key)) { 227 if (r->deletion) 228 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 229 else 230 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 231 r->len, &r->nm); 232 } else { 233 if (r->deletion) 234 switch (key_type(c, &r->key)) { 235 case UBIFS_INO_KEY: 236 { 237 ino_t inum = key_inum(c, &r->key); 238 239 err = ubifs_tnc_remove_ino(c, inum); 240 break; 241 } 242 case UBIFS_TRUN_KEY: 243 err = trun_remove_range(c, r); 244 break; 245 default: 246 err = ubifs_tnc_remove(c, &r->key); 247 break; 248 } 249 else 250 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 251 r->len); 252 if (err) 253 return err; 254 255 if (c->need_recovery) 256 err = ubifs_recover_size_accum(c, &r->key, r->deletion, 257 r->new_size); 258 } 259 260 return err; 261 } 262 263 /** 264 * replay_entries_cmp - compare 2 replay entries. 265 * @priv: UBIFS file-system description object 266 * @a: first replay entry 267 * @b: second replay entry 268 * 269 * This is a comparios function for 'list_sort()' which compares 2 replay 270 * entries @a and @b by comparing their sequence numer. Returns %1 if @a has 271 * greater sequence number and %-1 otherwise. 272 */ 273 static int replay_entries_cmp(void *priv, struct list_head *a, 274 struct list_head *b) 275 { 276 struct ubifs_info *c = priv; 277 struct replay_entry *ra, *rb; 278 279 cond_resched(); 280 if (a == b) 281 return 0; 282 283 ra = list_entry(a, struct replay_entry, list); 284 rb = list_entry(b, struct replay_entry, list); 285 ubifs_assert(c, ra->sqnum != rb->sqnum); 286 if (ra->sqnum > rb->sqnum) 287 return 1; 288 return -1; 289 } 290 291 /** 292 * apply_replay_list - apply the replay list to the TNC. 293 * @c: UBIFS file-system description object 294 * 295 * Apply all entries in the replay list to the TNC. Returns zero in case of 296 * success and a negative error code in case of failure. 297 */ 298 static int apply_replay_list(struct ubifs_info *c) 299 { 300 struct replay_entry *r; 301 int err; 302 303 list_sort(c, &c->replay_list, &replay_entries_cmp); 304 305 list_for_each_entry(r, &c->replay_list, list) { 306 cond_resched(); 307 308 err = apply_replay_entry(c, r); 309 if (err) 310 return err; 311 } 312 313 return 0; 314 } 315 316 /** 317 * destroy_replay_list - destroy the replay. 318 * @c: UBIFS file-system description object 319 * 320 * Destroy the replay list. 321 */ 322 static void destroy_replay_list(struct ubifs_info *c) 323 { 324 struct replay_entry *r, *tmp; 325 326 list_for_each_entry_safe(r, tmp, &c->replay_list, list) { 327 if (is_hash_key(c, &r->key)) 328 kfree(fname_name(&r->nm)); 329 list_del(&r->list); 330 kfree(r); 331 } 332 } 333 334 /** 335 * insert_node - insert a node to the replay list 336 * @c: UBIFS file-system description object 337 * @lnum: node logical eraseblock number 338 * @offs: node offset 339 * @len: node length 340 * @key: node key 341 * @sqnum: sequence number 342 * @deletion: non-zero if this is a deletion 343 * @used: number of bytes in use in a LEB 344 * @old_size: truncation old size 345 * @new_size: truncation new size 346 * 347 * This function inserts a scanned non-direntry node to the replay list. The 348 * replay list contains @struct replay_entry elements, and we sort this list in 349 * sequence number order before applying it. The replay list is applied at the 350 * very end of the replay process. Since the list is sorted in sequence number 351 * order, the older modifications are applied first. This function returns zero 352 * in case of success and a negative error code in case of failure. 353 */ 354 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 355 union ubifs_key *key, unsigned long long sqnum, 356 int deletion, int *used, loff_t old_size, 357 loff_t new_size) 358 { 359 struct replay_entry *r; 360 361 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 362 363 if (key_inum(c, key) >= c->highest_inum) 364 c->highest_inum = key_inum(c, key); 365 366 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 367 if (!r) 368 return -ENOMEM; 369 370 if (!deletion) 371 *used += ALIGN(len, 8); 372 r->lnum = lnum; 373 r->offs = offs; 374 r->len = len; 375 r->deletion = !!deletion; 376 r->sqnum = sqnum; 377 key_copy(c, key, &r->key); 378 r->old_size = old_size; 379 r->new_size = new_size; 380 381 list_add_tail(&r->list, &c->replay_list); 382 return 0; 383 } 384 385 /** 386 * insert_dent - insert a directory entry node into the replay list. 387 * @c: UBIFS file-system description object 388 * @lnum: node logical eraseblock number 389 * @offs: node offset 390 * @len: node length 391 * @key: node key 392 * @name: directory entry name 393 * @nlen: directory entry name length 394 * @sqnum: sequence number 395 * @deletion: non-zero if this is a deletion 396 * @used: number of bytes in use in a LEB 397 * 398 * This function inserts a scanned directory entry node or an extended 399 * attribute entry to the replay list. Returns zero in case of success and a 400 * negative error code in case of failure. 401 */ 402 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 403 union ubifs_key *key, const char *name, int nlen, 404 unsigned long long sqnum, int deletion, int *used) 405 { 406 struct replay_entry *r; 407 char *nbuf; 408 409 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 410 if (key_inum(c, key) >= c->highest_inum) 411 c->highest_inum = key_inum(c, key); 412 413 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 414 if (!r) 415 return -ENOMEM; 416 417 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 418 if (!nbuf) { 419 kfree(r); 420 return -ENOMEM; 421 } 422 423 if (!deletion) 424 *used += ALIGN(len, 8); 425 r->lnum = lnum; 426 r->offs = offs; 427 r->len = len; 428 r->deletion = !!deletion; 429 r->sqnum = sqnum; 430 key_copy(c, key, &r->key); 431 fname_len(&r->nm) = nlen; 432 memcpy(nbuf, name, nlen); 433 nbuf[nlen] = '\0'; 434 fname_name(&r->nm) = nbuf; 435 436 list_add_tail(&r->list, &c->replay_list); 437 return 0; 438 } 439 440 /** 441 * ubifs_validate_entry - validate directory or extended attribute entry node. 442 * @c: UBIFS file-system description object 443 * @dent: the node to validate 444 * 445 * This function validates directory or extended attribute entry node @dent. 446 * Returns zero if the node is all right and a %-EINVAL if not. 447 */ 448 int ubifs_validate_entry(struct ubifs_info *c, 449 const struct ubifs_dent_node *dent) 450 { 451 int key_type = key_type_flash(c, dent->key); 452 int nlen = le16_to_cpu(dent->nlen); 453 454 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 455 dent->type >= UBIFS_ITYPES_CNT || 456 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 457 (key_type == UBIFS_XENT_KEY && strnlen(dent->name, nlen) != nlen) || 458 le64_to_cpu(dent->inum) > MAX_INUM) { 459 ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ? 460 "directory entry" : "extended attribute entry"); 461 return -EINVAL; 462 } 463 464 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 465 ubifs_err(c, "bad key type %d", key_type); 466 return -EINVAL; 467 } 468 469 return 0; 470 } 471 472 /** 473 * is_last_bud - check if the bud is the last in the journal head. 474 * @c: UBIFS file-system description object 475 * @bud: bud description object 476 * 477 * This function checks if bud @bud is the last bud in its journal head. This 478 * information is then used by 'replay_bud()' to decide whether the bud can 479 * have corruptions or not. Indeed, only last buds can be corrupted by power 480 * cuts. Returns %1 if this is the last bud, and %0 if not. 481 */ 482 static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) 483 { 484 struct ubifs_jhead *jh = &c->jheads[bud->jhead]; 485 struct ubifs_bud *next; 486 uint32_t data; 487 int err; 488 489 if (list_is_last(&bud->list, &jh->buds_list)) 490 return 1; 491 492 /* 493 * The following is a quirk to make sure we work correctly with UBIFS 494 * images used with older UBIFS. 495 * 496 * Normally, the last bud will be the last in the journal head's list 497 * of bud. However, there is one exception if the UBIFS image belongs 498 * to older UBIFS. This is fairly unlikely: one would need to use old 499 * UBIFS, then have a power cut exactly at the right point, and then 500 * try to mount this image with new UBIFS. 501 * 502 * The exception is: it is possible to have 2 buds A and B, A goes 503 * before B, and B is the last, bud B is contains no data, and bud A is 504 * corrupted at the end. The reason is that in older versions when the 505 * journal code switched the next bud (from A to B), it first added a 506 * log reference node for the new bud (B), and only after this it 507 * synchronized the write-buffer of current bud (A). But later this was 508 * changed and UBIFS started to always synchronize the write-buffer of 509 * the bud (A) before writing the log reference for the new bud (B). 510 * 511 * But because older UBIFS always synchronized A's write-buffer before 512 * writing to B, we can recognize this exceptional situation but 513 * checking the contents of bud B - if it is empty, then A can be 514 * treated as the last and we can recover it. 515 * 516 * TODO: remove this piece of code in a couple of years (today it is 517 * 16.05.2011). 518 */ 519 next = list_entry(bud->list.next, struct ubifs_bud, list); 520 if (!list_is_last(&next->list, &jh->buds_list)) 521 return 0; 522 523 err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); 524 if (err) 525 return 0; 526 527 return data == 0xFFFFFFFF; 528 } 529 530 /** 531 * replay_bud - replay a bud logical eraseblock. 532 * @c: UBIFS file-system description object 533 * @b: bud entry which describes the bud 534 * 535 * This function replays bud @bud, recovers it if needed, and adds all nodes 536 * from this bud to the replay list. Returns zero in case of success and a 537 * negative error code in case of failure. 538 */ 539 static int replay_bud(struct ubifs_info *c, struct bud_entry *b) 540 { 541 int is_last = is_last_bud(c, b->bud); 542 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; 543 struct ubifs_scan_leb *sleb; 544 struct ubifs_scan_node *snod; 545 546 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", 547 lnum, b->bud->jhead, offs, is_last); 548 549 if (c->need_recovery && is_last) 550 /* 551 * Recover only last LEBs in the journal heads, because power 552 * cuts may cause corruptions only in these LEBs, because only 553 * these LEBs could possibly be written to at the power cut 554 * time. 555 */ 556 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); 557 else 558 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 559 if (IS_ERR(sleb)) 560 return PTR_ERR(sleb); 561 562 /* 563 * The bud does not have to start from offset zero - the beginning of 564 * the 'lnum' LEB may contain previously committed data. One of the 565 * things we have to do in replay is to correctly update lprops with 566 * newer information about this LEB. 567 * 568 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 569 * bytes of free space because it only contain information about 570 * committed data. 571 * 572 * But we know that real amount of free space is 'c->leb_size - 573 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 574 * 'sleb->endpt' is used by bud data. We have to correctly calculate 575 * how much of these data are dirty and update lprops with this 576 * information. 577 * 578 * The dirt in that LEB region is comprised of padding nodes, deletion 579 * nodes, truncation nodes and nodes which are obsoleted by subsequent 580 * nodes in this LEB. So instead of calculating clean space, we 581 * calculate used space ('used' variable). 582 */ 583 584 list_for_each_entry(snod, &sleb->nodes, list) { 585 int deletion = 0; 586 587 cond_resched(); 588 589 if (snod->sqnum >= SQNUM_WATERMARK) { 590 ubifs_err(c, "file system's life ended"); 591 goto out_dump; 592 } 593 594 if (snod->sqnum > c->max_sqnum) 595 c->max_sqnum = snod->sqnum; 596 597 switch (snod->type) { 598 case UBIFS_INO_NODE: 599 { 600 struct ubifs_ino_node *ino = snod->node; 601 loff_t new_size = le64_to_cpu(ino->size); 602 603 if (le32_to_cpu(ino->nlink) == 0) 604 deletion = 1; 605 err = insert_node(c, lnum, snod->offs, snod->len, 606 &snod->key, snod->sqnum, deletion, 607 &used, 0, new_size); 608 break; 609 } 610 case UBIFS_DATA_NODE: 611 { 612 struct ubifs_data_node *dn = snod->node; 613 loff_t new_size = le32_to_cpu(dn->size) + 614 key_block(c, &snod->key) * 615 UBIFS_BLOCK_SIZE; 616 617 err = insert_node(c, lnum, snod->offs, snod->len, 618 &snod->key, snod->sqnum, deletion, 619 &used, 0, new_size); 620 break; 621 } 622 case UBIFS_DENT_NODE: 623 case UBIFS_XENT_NODE: 624 { 625 struct ubifs_dent_node *dent = snod->node; 626 627 err = ubifs_validate_entry(c, dent); 628 if (err) 629 goto out_dump; 630 631 err = insert_dent(c, lnum, snod->offs, snod->len, 632 &snod->key, dent->name, 633 le16_to_cpu(dent->nlen), snod->sqnum, 634 !le64_to_cpu(dent->inum), &used); 635 break; 636 } 637 case UBIFS_TRUN_NODE: 638 { 639 struct ubifs_trun_node *trun = snod->node; 640 loff_t old_size = le64_to_cpu(trun->old_size); 641 loff_t new_size = le64_to_cpu(trun->new_size); 642 union ubifs_key key; 643 644 /* Validate truncation node */ 645 if (old_size < 0 || old_size > c->max_inode_sz || 646 new_size < 0 || new_size > c->max_inode_sz || 647 old_size <= new_size) { 648 ubifs_err(c, "bad truncation node"); 649 goto out_dump; 650 } 651 652 /* 653 * Create a fake truncation key just to use the same 654 * functions which expect nodes to have keys. 655 */ 656 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 657 err = insert_node(c, lnum, snod->offs, snod->len, 658 &key, snod->sqnum, 1, &used, 659 old_size, new_size); 660 break; 661 } 662 default: 663 ubifs_err(c, "unexpected node type %d in bud LEB %d:%d", 664 snod->type, lnum, snod->offs); 665 err = -EINVAL; 666 goto out_dump; 667 } 668 if (err) 669 goto out; 670 } 671 672 ubifs_assert(c, ubifs_search_bud(c, lnum)); 673 ubifs_assert(c, sleb->endpt - offs >= used); 674 ubifs_assert(c, sleb->endpt % c->min_io_size == 0); 675 676 b->dirty = sleb->endpt - offs - used; 677 b->free = c->leb_size - sleb->endpt; 678 dbg_mnt("bud LEB %d replied: dirty %d, free %d", 679 lnum, b->dirty, b->free); 680 681 out: 682 ubifs_scan_destroy(sleb); 683 return err; 684 685 out_dump: 686 ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); 687 ubifs_dump_node(c, snod->node); 688 ubifs_scan_destroy(sleb); 689 return -EINVAL; 690 } 691 692 /** 693 * replay_buds - replay all buds. 694 * @c: UBIFS file-system description object 695 * 696 * This function returns zero in case of success and a negative error code in 697 * case of failure. 698 */ 699 static int replay_buds(struct ubifs_info *c) 700 { 701 struct bud_entry *b; 702 int err; 703 unsigned long long prev_sqnum = 0; 704 705 list_for_each_entry(b, &c->replay_buds, list) { 706 err = replay_bud(c, b); 707 if (err) 708 return err; 709 710 ubifs_assert(c, b->sqnum > prev_sqnum); 711 prev_sqnum = b->sqnum; 712 } 713 714 return 0; 715 } 716 717 /** 718 * destroy_bud_list - destroy the list of buds to replay. 719 * @c: UBIFS file-system description object 720 */ 721 static void destroy_bud_list(struct ubifs_info *c) 722 { 723 struct bud_entry *b; 724 725 while (!list_empty(&c->replay_buds)) { 726 b = list_entry(c->replay_buds.next, struct bud_entry, list); 727 list_del(&b->list); 728 kfree(b); 729 } 730 } 731 732 /** 733 * add_replay_bud - add a bud to the list of buds to replay. 734 * @c: UBIFS file-system description object 735 * @lnum: bud logical eraseblock number to replay 736 * @offs: bud start offset 737 * @jhead: journal head to which this bud belongs 738 * @sqnum: reference node sequence number 739 * 740 * This function returns zero in case of success and a negative error code in 741 * case of failure. 742 */ 743 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 744 unsigned long long sqnum) 745 { 746 struct ubifs_bud *bud; 747 struct bud_entry *b; 748 749 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 750 751 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 752 if (!bud) 753 return -ENOMEM; 754 755 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 756 if (!b) { 757 kfree(bud); 758 return -ENOMEM; 759 } 760 761 bud->lnum = lnum; 762 bud->start = offs; 763 bud->jhead = jhead; 764 ubifs_add_bud(c, bud); 765 766 b->bud = bud; 767 b->sqnum = sqnum; 768 list_add_tail(&b->list, &c->replay_buds); 769 770 return 0; 771 } 772 773 /** 774 * validate_ref - validate a reference node. 775 * @c: UBIFS file-system description object 776 * @ref: the reference node to validate 777 * @ref_lnum: LEB number of the reference node 778 * @ref_offs: reference node offset 779 * 780 * This function returns %1 if a bud reference already exists for the LEB. %0 is 781 * returned if the reference node is new, otherwise %-EINVAL is returned if 782 * validation failed. 783 */ 784 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 785 { 786 struct ubifs_bud *bud; 787 int lnum = le32_to_cpu(ref->lnum); 788 unsigned int offs = le32_to_cpu(ref->offs); 789 unsigned int jhead = le32_to_cpu(ref->jhead); 790 791 /* 792 * ref->offs may point to the end of LEB when the journal head points 793 * to the end of LEB and we write reference node for it during commit. 794 * So this is why we require 'offs > c->leb_size'. 795 */ 796 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 797 lnum < c->main_first || offs > c->leb_size || 798 offs & (c->min_io_size - 1)) 799 return -EINVAL; 800 801 /* Make sure we have not already looked at this bud */ 802 bud = ubifs_search_bud(c, lnum); 803 if (bud) { 804 if (bud->jhead == jhead && bud->start <= offs) 805 return 1; 806 ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs); 807 return -EINVAL; 808 } 809 810 return 0; 811 } 812 813 /** 814 * replay_log_leb - replay a log logical eraseblock. 815 * @c: UBIFS file-system description object 816 * @lnum: log logical eraseblock to replay 817 * @offs: offset to start replaying from 818 * @sbuf: scan buffer 819 * 820 * This function replays a log LEB and returns zero in case of success, %1 if 821 * this is the last LEB in the log, and a negative error code in case of 822 * failure. 823 */ 824 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 825 { 826 int err; 827 struct ubifs_scan_leb *sleb; 828 struct ubifs_scan_node *snod; 829 const struct ubifs_cs_node *node; 830 831 dbg_mnt("replay log LEB %d:%d", lnum, offs); 832 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); 833 if (IS_ERR(sleb)) { 834 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 835 return PTR_ERR(sleb); 836 /* 837 * Note, the below function will recover this log LEB only if 838 * it is the last, because unclean reboots can possibly corrupt 839 * only the tail of the log. 840 */ 841 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 842 if (IS_ERR(sleb)) 843 return PTR_ERR(sleb); 844 } 845 846 if (sleb->nodes_cnt == 0) { 847 err = 1; 848 goto out; 849 } 850 851 node = sleb->buf; 852 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 853 if (c->cs_sqnum == 0) { 854 /* 855 * This is the first log LEB we are looking at, make sure that 856 * the first node is a commit start node. Also record its 857 * sequence number so that UBIFS can determine where the log 858 * ends, because all nodes which were have higher sequence 859 * numbers. 860 */ 861 if (snod->type != UBIFS_CS_NODE) { 862 ubifs_err(c, "first log node at LEB %d:%d is not CS node", 863 lnum, offs); 864 goto out_dump; 865 } 866 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 867 ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu", 868 lnum, offs, 869 (unsigned long long)le64_to_cpu(node->cmt_no), 870 c->cmt_no); 871 goto out_dump; 872 } 873 874 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 875 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 876 } 877 878 if (snod->sqnum < c->cs_sqnum) { 879 /* 880 * This means that we reached end of log and now 881 * look to the older log data, which was already 882 * committed but the eraseblock was not erased (UBIFS 883 * only un-maps it). So this basically means we have to 884 * exit with "end of log" code. 885 */ 886 err = 1; 887 goto out; 888 } 889 890 /* Make sure the first node sits at offset zero of the LEB */ 891 if (snod->offs != 0) { 892 ubifs_err(c, "first node is not at zero offset"); 893 goto out_dump; 894 } 895 896 list_for_each_entry(snod, &sleb->nodes, list) { 897 cond_resched(); 898 899 if (snod->sqnum >= SQNUM_WATERMARK) { 900 ubifs_err(c, "file system's life ended"); 901 goto out_dump; 902 } 903 904 if (snod->sqnum < c->cs_sqnum) { 905 ubifs_err(c, "bad sqnum %llu, commit sqnum %llu", 906 snod->sqnum, c->cs_sqnum); 907 goto out_dump; 908 } 909 910 if (snod->sqnum > c->max_sqnum) 911 c->max_sqnum = snod->sqnum; 912 913 switch (snod->type) { 914 case UBIFS_REF_NODE: { 915 const struct ubifs_ref_node *ref = snod->node; 916 917 err = validate_ref(c, ref); 918 if (err == 1) 919 break; /* Already have this bud */ 920 if (err) 921 goto out_dump; 922 923 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 924 le32_to_cpu(ref->offs), 925 le32_to_cpu(ref->jhead), 926 snod->sqnum); 927 if (err) 928 goto out; 929 930 break; 931 } 932 case UBIFS_CS_NODE: 933 /* Make sure it sits at the beginning of LEB */ 934 if (snod->offs != 0) { 935 ubifs_err(c, "unexpected node in log"); 936 goto out_dump; 937 } 938 break; 939 default: 940 ubifs_err(c, "unexpected node in log"); 941 goto out_dump; 942 } 943 } 944 945 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 946 c->lhead_lnum = lnum; 947 c->lhead_offs = sleb->endpt; 948 } 949 950 err = !sleb->endpt; 951 out: 952 ubifs_scan_destroy(sleb); 953 return err; 954 955 out_dump: 956 ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", 957 lnum, offs + snod->offs); 958 ubifs_dump_node(c, snod->node); 959 ubifs_scan_destroy(sleb); 960 return -EINVAL; 961 } 962 963 /** 964 * take_ihead - update the status of the index head in lprops to 'taken'. 965 * @c: UBIFS file-system description object 966 * 967 * This function returns the amount of free space in the index head LEB or a 968 * negative error code. 969 */ 970 static int take_ihead(struct ubifs_info *c) 971 { 972 const struct ubifs_lprops *lp; 973 int err, free; 974 975 ubifs_get_lprops(c); 976 977 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 978 if (IS_ERR(lp)) { 979 err = PTR_ERR(lp); 980 goto out; 981 } 982 983 free = lp->free; 984 985 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 986 lp->flags | LPROPS_TAKEN, 0); 987 if (IS_ERR(lp)) { 988 err = PTR_ERR(lp); 989 goto out; 990 } 991 992 err = free; 993 out: 994 ubifs_release_lprops(c); 995 return err; 996 } 997 998 /** 999 * ubifs_replay_journal - replay journal. 1000 * @c: UBIFS file-system description object 1001 * 1002 * This function scans the journal, replays and cleans it up. It makes sure all 1003 * memory data structures related to uncommitted journal are built (dirty TNC 1004 * tree, tree of buds, modified lprops, etc). 1005 */ 1006 int ubifs_replay_journal(struct ubifs_info *c) 1007 { 1008 int err, lnum, free; 1009 1010 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1011 1012 /* Update the status of the index head in lprops to 'taken' */ 1013 free = take_ihead(c); 1014 if (free < 0) 1015 return free; /* Error code */ 1016 1017 if (c->ihead_offs != c->leb_size - free) { 1018 ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum, 1019 c->ihead_offs); 1020 return -EINVAL; 1021 } 1022 1023 dbg_mnt("start replaying the journal"); 1024 c->replaying = 1; 1025 lnum = c->ltail_lnum = c->lhead_lnum; 1026 1027 do { 1028 err = replay_log_leb(c, lnum, 0, c->sbuf); 1029 if (err == 1) { 1030 if (lnum != c->lhead_lnum) 1031 /* We hit the end of the log */ 1032 break; 1033 1034 /* 1035 * The head of the log must always start with the 1036 * "commit start" node on a properly formatted UBIFS. 1037 * But we found no nodes at all, which means that 1038 * something went wrong and we cannot proceed mounting 1039 * the file-system. 1040 */ 1041 ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", 1042 lnum, 0); 1043 err = -EINVAL; 1044 } 1045 if (err) 1046 goto out; 1047 lnum = ubifs_next_log_lnum(c, lnum); 1048 } while (lnum != c->ltail_lnum); 1049 1050 err = replay_buds(c); 1051 if (err) 1052 goto out; 1053 1054 err = apply_replay_list(c); 1055 if (err) 1056 goto out; 1057 1058 err = set_buds_lprops(c); 1059 if (err) 1060 goto out; 1061 1062 /* 1063 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable 1064 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs 1065 * depend on it. This means we have to initialize it to make sure 1066 * budgeting works properly. 1067 */ 1068 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1069 c->bi.uncommitted_idx *= c->max_idx_node_sz; 1070 1071 ubifs_assert(c, c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1072 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", 1073 c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1074 (unsigned long)c->highest_inum); 1075 out: 1076 destroy_replay_list(c); 1077 destroy_bud_list(c); 1078 c->replaying = 0; 1079 return err; 1080 } 1081