1 /* 2 * This file is part of UBIFS. 3 * 4 * Copyright (C) 2006-2008 Nokia Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published by 8 * the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program; if not, write to the Free Software Foundation, Inc., 51 17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 * 19 * Authors: Adrian Hunter 20 * Artem Bityutskiy (Битюцкий Артём) 21 */ 22 23 /* 24 * This file contains journal replay code. It runs when the file-system is being 25 * mounted and requires no locking. 26 * 27 * The larger is the journal, the longer it takes to scan it, so the longer it 28 * takes to mount UBIFS. This is why the journal has limited size which may be 29 * changed depending on the system requirements. But a larger journal gives 30 * faster I/O speed because it writes the index less frequently. So this is a 31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 32 * larger is the journal, the more memory its index may consume. 33 */ 34 35 #include "ubifs.h" 36 37 /* 38 * Replay flags. 39 * 40 * REPLAY_DELETION: node was deleted 41 * REPLAY_REF: node is a reference node 42 */ 43 enum { 44 REPLAY_DELETION = 1, 45 REPLAY_REF = 2, 46 }; 47 48 /** 49 * struct replay_entry - replay tree entry. 50 * @lnum: logical eraseblock number of the node 51 * @offs: node offset 52 * @len: node length 53 * @sqnum: node sequence number 54 * @flags: replay flags 55 * @rb: links the replay tree 56 * @key: node key 57 * @nm: directory entry name 58 * @old_size: truncation old size 59 * @new_size: truncation new size 60 * @free: amount of free space in a bud 61 * @dirty: amount of dirty space in a bud from padding and deletion nodes 62 * 63 * UBIFS journal replay must compare node sequence numbers, which means it must 64 * build a tree of node information to insert into the TNC. 65 */ 66 struct replay_entry { 67 int lnum; 68 int offs; 69 int len; 70 unsigned long long sqnum; 71 int flags; 72 struct rb_node rb; 73 union ubifs_key key; 74 union { 75 struct qstr nm; 76 struct { 77 loff_t old_size; 78 loff_t new_size; 79 }; 80 struct { 81 int free; 82 int dirty; 83 }; 84 }; 85 }; 86 87 /** 88 * struct bud_entry - entry in the list of buds to replay. 89 * @list: next bud in the list 90 * @bud: bud description object 91 * @free: free bytes in the bud 92 * @sqnum: reference node sequence number 93 */ 94 struct bud_entry { 95 struct list_head list; 96 struct ubifs_bud *bud; 97 int free; 98 unsigned long long sqnum; 99 }; 100 101 /** 102 * set_bud_lprops - set free and dirty space used by a bud. 103 * @c: UBIFS file-system description object 104 * @r: replay entry of bud 105 */ 106 static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) 107 { 108 const struct ubifs_lprops *lp; 109 int err = 0, dirty; 110 111 ubifs_get_lprops(c); 112 113 lp = ubifs_lpt_lookup_dirty(c, r->lnum); 114 if (IS_ERR(lp)) { 115 err = PTR_ERR(lp); 116 goto out; 117 } 118 119 dirty = lp->dirty; 120 if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 121 /* 122 * The LEB was added to the journal with a starting offset of 123 * zero which means the LEB must have been empty. The LEB 124 * property values should be lp->free == c->leb_size and 125 * lp->dirty == 0, but that is not the case. The reason is that 126 * the LEB was garbage collected. The garbage collector resets 127 * the free and dirty space without recording it anywhere except 128 * lprops, so if there is not a commit then lprops does not have 129 * that information next time the file system is mounted. 130 * 131 * We do not need to adjust free space because the scan has told 132 * us the exact value which is recorded in the replay entry as 133 * r->free. 134 * 135 * However we do need to subtract from the dirty space the 136 * amount of space that the garbage collector reclaimed, which 137 * is the whole LEB minus the amount of space that was free. 138 */ 139 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 140 lp->free, lp->dirty); 141 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 142 lp->free, lp->dirty); 143 dirty -= c->leb_size - lp->free; 144 /* 145 * If the replay order was perfect the dirty space would now be 146 * zero. The order is not perfect because the the journal heads 147 * race with each other. This is not a problem but is does mean 148 * that the dirty space may temporarily exceed c->leb_size 149 * during the replay. 150 */ 151 if (dirty != 0) 152 dbg_msg("LEB %d lp: %d free %d dirty " 153 "replay: %d free %d dirty", r->lnum, lp->free, 154 lp->dirty, r->free, r->dirty); 155 } 156 lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, 157 lp->flags | LPROPS_TAKEN, 0); 158 if (IS_ERR(lp)) { 159 err = PTR_ERR(lp); 160 goto out; 161 } 162 out: 163 ubifs_release_lprops(c); 164 return err; 165 } 166 167 /** 168 * trun_remove_range - apply a replay entry for a truncation to the TNC. 169 * @c: UBIFS file-system description object 170 * @r: replay entry of truncation 171 */ 172 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 173 { 174 unsigned min_blk, max_blk; 175 union ubifs_key min_key, max_key; 176 ino_t ino; 177 178 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 179 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 180 min_blk += 1; 181 182 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 183 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 184 max_blk -= 1; 185 186 ino = key_inum(c, &r->key); 187 188 data_key_init(c, &min_key, ino, min_blk); 189 data_key_init(c, &max_key, ino, max_blk); 190 191 return ubifs_tnc_remove_range(c, &min_key, &max_key); 192 } 193 194 /** 195 * apply_replay_entry - apply a replay entry to the TNC. 196 * @c: UBIFS file-system description object 197 * @r: replay entry to apply 198 * 199 * Apply a replay entry to the TNC. 200 */ 201 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 202 { 203 int err, deletion = ((r->flags & REPLAY_DELETION) != 0); 204 205 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, 206 r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); 207 208 /* Set c->replay_sqnum to help deal with dangling branches. */ 209 c->replay_sqnum = r->sqnum; 210 211 if (r->flags & REPLAY_REF) 212 err = set_bud_lprops(c, r); 213 else if (is_hash_key(c, &r->key)) { 214 if (deletion) 215 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 216 else 217 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 218 r->len, &r->nm); 219 } else { 220 if (deletion) 221 switch (key_type(c, &r->key)) { 222 case UBIFS_INO_KEY: 223 { 224 ino_t inum = key_inum(c, &r->key); 225 226 err = ubifs_tnc_remove_ino(c, inum); 227 break; 228 } 229 case UBIFS_TRUN_KEY: 230 err = trun_remove_range(c, r); 231 break; 232 default: 233 err = ubifs_tnc_remove(c, &r->key); 234 break; 235 } 236 else 237 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 238 r->len); 239 if (err) 240 return err; 241 242 if (c->need_recovery) 243 err = ubifs_recover_size_accum(c, &r->key, deletion, 244 r->new_size); 245 } 246 247 return err; 248 } 249 250 /** 251 * destroy_replay_tree - destroy the replay. 252 * @c: UBIFS file-system description object 253 * 254 * Destroy the replay tree. 255 */ 256 static void destroy_replay_tree(struct ubifs_info *c) 257 { 258 struct rb_node *this = c->replay_tree.rb_node; 259 struct replay_entry *r; 260 261 while (this) { 262 if (this->rb_left) { 263 this = this->rb_left; 264 continue; 265 } else if (this->rb_right) { 266 this = this->rb_right; 267 continue; 268 } 269 r = rb_entry(this, struct replay_entry, rb); 270 this = rb_parent(this); 271 if (this) { 272 if (this->rb_left == &r->rb) 273 this->rb_left = NULL; 274 else 275 this->rb_right = NULL; 276 } 277 if (is_hash_key(c, &r->key)) 278 kfree((void *)r->nm.name); 279 kfree(r); 280 } 281 c->replay_tree = RB_ROOT; 282 } 283 284 /** 285 * apply_replay_tree - apply the replay tree to the TNC. 286 * @c: UBIFS file-system description object 287 * 288 * Apply the replay tree. 289 * Returns zero in case of success and a negative error code in case of 290 * failure. 291 */ 292 static int apply_replay_tree(struct ubifs_info *c) 293 { 294 struct rb_node *this = rb_first(&c->replay_tree); 295 296 while (this) { 297 struct replay_entry *r; 298 int err; 299 300 cond_resched(); 301 302 r = rb_entry(this, struct replay_entry, rb); 303 err = apply_replay_entry(c, r); 304 if (err) 305 return err; 306 this = rb_next(this); 307 } 308 return 0; 309 } 310 311 /** 312 * insert_node - insert a node to the replay tree. 313 * @c: UBIFS file-system description object 314 * @lnum: node logical eraseblock number 315 * @offs: node offset 316 * @len: node length 317 * @key: node key 318 * @sqnum: sequence number 319 * @deletion: non-zero if this is a deletion 320 * @used: number of bytes in use in a LEB 321 * @old_size: truncation old size 322 * @new_size: truncation new size 323 * 324 * This function inserts a scanned non-direntry node to the replay tree. The 325 * replay tree is an RB-tree containing @struct replay_entry elements which are 326 * indexed by the sequence number. The replay tree is applied at the very end 327 * of the replay process. Since the tree is sorted in sequence number order, 328 * the older modifications are applied first. This function returns zero in 329 * case of success and a negative error code in case of failure. 330 */ 331 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 332 union ubifs_key *key, unsigned long long sqnum, 333 int deletion, int *used, loff_t old_size, 334 loff_t new_size) 335 { 336 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 337 struct replay_entry *r; 338 339 if (key_inum(c, key) >= c->highest_inum) 340 c->highest_inum = key_inum(c, key); 341 342 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 343 while (*p) { 344 parent = *p; 345 r = rb_entry(parent, struct replay_entry, rb); 346 if (sqnum < r->sqnum) { 347 p = &(*p)->rb_left; 348 continue; 349 } else if (sqnum > r->sqnum) { 350 p = &(*p)->rb_right; 351 continue; 352 } 353 ubifs_err("duplicate sqnum in replay"); 354 return -EINVAL; 355 } 356 357 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 358 if (!r) 359 return -ENOMEM; 360 361 if (!deletion) 362 *used += ALIGN(len, 8); 363 r->lnum = lnum; 364 r->offs = offs; 365 r->len = len; 366 r->sqnum = sqnum; 367 r->flags = (deletion ? REPLAY_DELETION : 0); 368 r->old_size = old_size; 369 r->new_size = new_size; 370 key_copy(c, key, &r->key); 371 372 rb_link_node(&r->rb, parent, p); 373 rb_insert_color(&r->rb, &c->replay_tree); 374 return 0; 375 } 376 377 /** 378 * insert_dent - insert a directory entry node into the replay tree. 379 * @c: UBIFS file-system description object 380 * @lnum: node logical eraseblock number 381 * @offs: node offset 382 * @len: node length 383 * @key: node key 384 * @name: directory entry name 385 * @nlen: directory entry name length 386 * @sqnum: sequence number 387 * @deletion: non-zero if this is a deletion 388 * @used: number of bytes in use in a LEB 389 * 390 * This function inserts a scanned directory entry node to the replay tree. 391 * Returns zero in case of success and a negative error code in case of 392 * failure. 393 * 394 * This function is also used for extended attribute entries because they are 395 * implemented as directory entry nodes. 396 */ 397 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 398 union ubifs_key *key, const char *name, int nlen, 399 unsigned long long sqnum, int deletion, int *used) 400 { 401 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 402 struct replay_entry *r; 403 char *nbuf; 404 405 if (key_inum(c, key) >= c->highest_inum) 406 c->highest_inum = key_inum(c, key); 407 408 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 409 while (*p) { 410 parent = *p; 411 r = rb_entry(parent, struct replay_entry, rb); 412 if (sqnum < r->sqnum) { 413 p = &(*p)->rb_left; 414 continue; 415 } 416 if (sqnum > r->sqnum) { 417 p = &(*p)->rb_right; 418 continue; 419 } 420 ubifs_err("duplicate sqnum in replay"); 421 return -EINVAL; 422 } 423 424 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 425 if (!r) 426 return -ENOMEM; 427 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 428 if (!nbuf) { 429 kfree(r); 430 return -ENOMEM; 431 } 432 433 if (!deletion) 434 *used += ALIGN(len, 8); 435 r->lnum = lnum; 436 r->offs = offs; 437 r->len = len; 438 r->sqnum = sqnum; 439 r->nm.len = nlen; 440 memcpy(nbuf, name, nlen); 441 nbuf[nlen] = '\0'; 442 r->nm.name = nbuf; 443 r->flags = (deletion ? REPLAY_DELETION : 0); 444 key_copy(c, key, &r->key); 445 446 ubifs_assert(!*p); 447 rb_link_node(&r->rb, parent, p); 448 rb_insert_color(&r->rb, &c->replay_tree); 449 return 0; 450 } 451 452 /** 453 * ubifs_validate_entry - validate directory or extended attribute entry node. 454 * @c: UBIFS file-system description object 455 * @dent: the node to validate 456 * 457 * This function validates directory or extended attribute entry node @dent. 458 * Returns zero if the node is all right and a %-EINVAL if not. 459 */ 460 int ubifs_validate_entry(struct ubifs_info *c, 461 const struct ubifs_dent_node *dent) 462 { 463 int key_type = key_type_flash(c, dent->key); 464 int nlen = le16_to_cpu(dent->nlen); 465 466 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 467 dent->type >= UBIFS_ITYPES_CNT || 468 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 469 strnlen((char *)dent->name, nlen) != nlen || 470 le64_to_cpu(dent->inum) > MAX_INUM) { 471 ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? 472 "directory entry" : "extended attribute entry"); 473 return -EINVAL; 474 } 475 476 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 477 ubifs_err("bad key type %d", key_type); 478 return -EINVAL; 479 } 480 481 return 0; 482 } 483 484 /** 485 * replay_bud - replay a bud logical eraseblock. 486 * @c: UBIFS file-system description object 487 * @lnum: bud logical eraseblock number to replay 488 * @offs: bud start offset 489 * @jhead: journal head to which this bud belongs 490 * @free: amount of free space in the bud is returned here 491 * @dirty: amount of dirty space from padding and deletion nodes is returned 492 * here 493 * 494 * This function returns zero in case of success and a negative error code in 495 * case of failure. 496 */ 497 static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 498 int *free, int *dirty) 499 { 500 int err = 0, used = 0; 501 struct ubifs_scan_leb *sleb; 502 struct ubifs_scan_node *snod; 503 struct ubifs_bud *bud; 504 505 dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); 506 if (c->need_recovery) 507 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); 508 else 509 sleb = ubifs_scan(c, lnum, offs, c->sbuf); 510 if (IS_ERR(sleb)) 511 return PTR_ERR(sleb); 512 513 /* 514 * The bud does not have to start from offset zero - the beginning of 515 * the 'lnum' LEB may contain previously committed data. One of the 516 * things we have to do in replay is to correctly update lprops with 517 * newer information about this LEB. 518 * 519 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 520 * bytes of free space because it only contain information about 521 * committed data. 522 * 523 * But we know that real amount of free space is 'c->leb_size - 524 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 525 * 'sleb->endpt' is used by bud data. We have to correctly calculate 526 * how much of these data are dirty and update lprops with this 527 * information. 528 * 529 * The dirt in that LEB region is comprised of padding nodes, deletion 530 * nodes, truncation nodes and nodes which are obsoleted by subsequent 531 * nodes in this LEB. So instead of calculating clean space, we 532 * calculate used space ('used' variable). 533 */ 534 535 list_for_each_entry(snod, &sleb->nodes, list) { 536 int deletion = 0; 537 538 cond_resched(); 539 540 if (snod->sqnum >= SQNUM_WATERMARK) { 541 ubifs_err("file system's life ended"); 542 goto out_dump; 543 } 544 545 if (snod->sqnum > c->max_sqnum) 546 c->max_sqnum = snod->sqnum; 547 548 switch (snod->type) { 549 case UBIFS_INO_NODE: 550 { 551 struct ubifs_ino_node *ino = snod->node; 552 loff_t new_size = le64_to_cpu(ino->size); 553 554 if (le32_to_cpu(ino->nlink) == 0) 555 deletion = 1; 556 err = insert_node(c, lnum, snod->offs, snod->len, 557 &snod->key, snod->sqnum, deletion, 558 &used, 0, new_size); 559 break; 560 } 561 case UBIFS_DATA_NODE: 562 { 563 struct ubifs_data_node *dn = snod->node; 564 loff_t new_size = le32_to_cpu(dn->size) + 565 key_block(c, &snod->key) * 566 UBIFS_BLOCK_SIZE; 567 568 err = insert_node(c, lnum, snod->offs, snod->len, 569 &snod->key, snod->sqnum, deletion, 570 &used, 0, new_size); 571 break; 572 } 573 case UBIFS_DENT_NODE: 574 case UBIFS_XENT_NODE: 575 { 576 struct ubifs_dent_node *dent = snod->node; 577 578 err = ubifs_validate_entry(c, dent); 579 if (err) 580 goto out_dump; 581 582 err = insert_dent(c, lnum, snod->offs, snod->len, 583 &snod->key, (char *)dent->name, 584 le16_to_cpu(dent->nlen), snod->sqnum, 585 !le64_to_cpu(dent->inum), &used); 586 break; 587 } 588 case UBIFS_TRUN_NODE: 589 { 590 struct ubifs_trun_node *trun = snod->node; 591 loff_t old_size = le64_to_cpu(trun->old_size); 592 loff_t new_size = le64_to_cpu(trun->new_size); 593 union ubifs_key key; 594 595 /* Validate truncation node */ 596 if (old_size < 0 || old_size > c->max_inode_sz || 597 new_size < 0 || new_size > c->max_inode_sz || 598 old_size <= new_size) { 599 ubifs_err("bad truncation node"); 600 goto out_dump; 601 } 602 603 /* 604 * Create a fake truncation key just to use the same 605 * functions which expect nodes to have keys. 606 */ 607 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 608 err = insert_node(c, lnum, snod->offs, snod->len, 609 &key, snod->sqnum, 1, &used, 610 old_size, new_size); 611 break; 612 } 613 default: 614 ubifs_err("unexpected node type %d in bud LEB %d:%d", 615 snod->type, lnum, snod->offs); 616 err = -EINVAL; 617 goto out_dump; 618 } 619 if (err) 620 goto out; 621 } 622 623 bud = ubifs_search_bud(c, lnum); 624 if (!bud) 625 BUG(); 626 627 ubifs_assert(sleb->endpt - offs >= used); 628 ubifs_assert(sleb->endpt % c->min_io_size == 0); 629 630 *dirty = sleb->endpt - offs - used; 631 *free = c->leb_size - sleb->endpt; 632 633 out: 634 ubifs_scan_destroy(sleb); 635 return err; 636 637 out_dump: 638 ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); 639 dbg_dump_node(c, snod->node); 640 ubifs_scan_destroy(sleb); 641 return -EINVAL; 642 } 643 644 /** 645 * insert_ref_node - insert a reference node to the replay tree. 646 * @c: UBIFS file-system description object 647 * @lnum: node logical eraseblock number 648 * @offs: node offset 649 * @sqnum: sequence number 650 * @free: amount of free space in bud 651 * @dirty: amount of dirty space from padding and deletion nodes 652 * 653 * This function inserts a reference node to the replay tree and returns zero 654 * in case of success or a negative error code in case of failure. 655 */ 656 static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, 657 unsigned long long sqnum, int free, int dirty) 658 { 659 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 660 struct replay_entry *r; 661 662 dbg_mnt("add ref LEB %d:%d", lnum, offs); 663 while (*p) { 664 parent = *p; 665 r = rb_entry(parent, struct replay_entry, rb); 666 if (sqnum < r->sqnum) { 667 p = &(*p)->rb_left; 668 continue; 669 } else if (sqnum > r->sqnum) { 670 p = &(*p)->rb_right; 671 continue; 672 } 673 ubifs_err("duplicate sqnum in replay tree"); 674 return -EINVAL; 675 } 676 677 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 678 if (!r) 679 return -ENOMEM; 680 681 r->lnum = lnum; 682 r->offs = offs; 683 r->sqnum = sqnum; 684 r->flags = REPLAY_REF; 685 r->free = free; 686 r->dirty = dirty; 687 688 rb_link_node(&r->rb, parent, p); 689 rb_insert_color(&r->rb, &c->replay_tree); 690 return 0; 691 } 692 693 /** 694 * replay_buds - replay all buds. 695 * @c: UBIFS file-system description object 696 * 697 * This function returns zero in case of success and a negative error code in 698 * case of failure. 699 */ 700 static int replay_buds(struct ubifs_info *c) 701 { 702 struct bud_entry *b; 703 int err, uninitialized_var(free), uninitialized_var(dirty); 704 705 list_for_each_entry(b, &c->replay_buds, list) { 706 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, 707 &free, &dirty); 708 if (err) 709 return err; 710 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, 711 free, dirty); 712 if (err) 713 return err; 714 } 715 716 return 0; 717 } 718 719 /** 720 * destroy_bud_list - destroy the list of buds to replay. 721 * @c: UBIFS file-system description object 722 */ 723 static void destroy_bud_list(struct ubifs_info *c) 724 { 725 struct bud_entry *b; 726 727 while (!list_empty(&c->replay_buds)) { 728 b = list_entry(c->replay_buds.next, struct bud_entry, list); 729 list_del(&b->list); 730 kfree(b); 731 } 732 } 733 734 /** 735 * add_replay_bud - add a bud to the list of buds to replay. 736 * @c: UBIFS file-system description object 737 * @lnum: bud logical eraseblock number to replay 738 * @offs: bud start offset 739 * @jhead: journal head to which this bud belongs 740 * @sqnum: reference node sequence number 741 * 742 * This function returns zero in case of success and a negative error code in 743 * case of failure. 744 */ 745 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 746 unsigned long long sqnum) 747 { 748 struct ubifs_bud *bud; 749 struct bud_entry *b; 750 751 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 752 753 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 754 if (!bud) 755 return -ENOMEM; 756 757 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 758 if (!b) { 759 kfree(bud); 760 return -ENOMEM; 761 } 762 763 bud->lnum = lnum; 764 bud->start = offs; 765 bud->jhead = jhead; 766 ubifs_add_bud(c, bud); 767 768 b->bud = bud; 769 b->sqnum = sqnum; 770 list_add_tail(&b->list, &c->replay_buds); 771 772 return 0; 773 } 774 775 /** 776 * validate_ref - validate a reference node. 777 * @c: UBIFS file-system description object 778 * @ref: the reference node to validate 779 * @ref_lnum: LEB number of the reference node 780 * @ref_offs: reference node offset 781 * 782 * This function returns %1 if a bud reference already exists for the LEB. %0 is 783 * returned if the reference node is new, otherwise %-EINVAL is returned if 784 * validation failed. 785 */ 786 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 787 { 788 struct ubifs_bud *bud; 789 int lnum = le32_to_cpu(ref->lnum); 790 unsigned int offs = le32_to_cpu(ref->offs); 791 unsigned int jhead = le32_to_cpu(ref->jhead); 792 793 /* 794 * ref->offs may point to the end of LEB when the journal head points 795 * to the end of LEB and we write reference node for it during commit. 796 * So this is why we require 'offs > c->leb_size'. 797 */ 798 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 799 lnum < c->main_first || offs > c->leb_size || 800 offs & (c->min_io_size - 1)) 801 return -EINVAL; 802 803 /* Make sure we have not already looked at this bud */ 804 bud = ubifs_search_bud(c, lnum); 805 if (bud) { 806 if (bud->jhead == jhead && bud->start <= offs) 807 return 1; 808 ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); 809 return -EINVAL; 810 } 811 812 return 0; 813 } 814 815 /** 816 * replay_log_leb - replay a log logical eraseblock. 817 * @c: UBIFS file-system description object 818 * @lnum: log logical eraseblock to replay 819 * @offs: offset to start replaying from 820 * @sbuf: scan buffer 821 * 822 * This function replays a log LEB and returns zero in case of success, %1 if 823 * this is the last LEB in the log, and a negative error code in case of 824 * failure. 825 */ 826 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 827 { 828 int err; 829 struct ubifs_scan_leb *sleb; 830 struct ubifs_scan_node *snod; 831 const struct ubifs_cs_node *node; 832 833 dbg_mnt("replay log LEB %d:%d", lnum, offs); 834 sleb = ubifs_scan(c, lnum, offs, sbuf); 835 if (IS_ERR(sleb)) { 836 if (c->need_recovery) 837 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 838 if (IS_ERR(sleb)) 839 return PTR_ERR(sleb); 840 } 841 842 if (sleb->nodes_cnt == 0) { 843 err = 1; 844 goto out; 845 } 846 847 node = sleb->buf; 848 849 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 850 if (c->cs_sqnum == 0) { 851 /* 852 * This is the first log LEB we are looking at, make sure that 853 * the first node is a commit start node. Also record its 854 * sequence number so that UBIFS can determine where the log 855 * ends, because all nodes which were have higher sequence 856 * numbers. 857 */ 858 if (snod->type != UBIFS_CS_NODE) { 859 dbg_err("first log node at LEB %d:%d is not CS node", 860 lnum, offs); 861 goto out_dump; 862 } 863 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 864 dbg_err("first CS node at LEB %d:%d has wrong " 865 "commit number %llu expected %llu", 866 lnum, offs, 867 (unsigned long long)le64_to_cpu(node->cmt_no), 868 c->cmt_no); 869 goto out_dump; 870 } 871 872 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 873 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 874 } 875 876 if (snod->sqnum < c->cs_sqnum) { 877 /* 878 * This means that we reached end of log and now 879 * look to the older log data, which was already 880 * committed but the eraseblock was not erased (UBIFS 881 * only un-maps it). So this basically means we have to 882 * exit with "end of log" code. 883 */ 884 err = 1; 885 goto out; 886 } 887 888 /* Make sure the first node sits at offset zero of the LEB */ 889 if (snod->offs != 0) { 890 dbg_err("first node is not at zero offset"); 891 goto out_dump; 892 } 893 894 list_for_each_entry(snod, &sleb->nodes, list) { 895 896 cond_resched(); 897 898 if (snod->sqnum >= SQNUM_WATERMARK) { 899 ubifs_err("file system's life ended"); 900 goto out_dump; 901 } 902 903 if (snod->sqnum < c->cs_sqnum) { 904 dbg_err("bad sqnum %llu, commit sqnum %llu", 905 snod->sqnum, c->cs_sqnum); 906 goto out_dump; 907 } 908 909 if (snod->sqnum > c->max_sqnum) 910 c->max_sqnum = snod->sqnum; 911 912 switch (snod->type) { 913 case UBIFS_REF_NODE: { 914 const struct ubifs_ref_node *ref = snod->node; 915 916 err = validate_ref(c, ref); 917 if (err == 1) 918 break; /* Already have this bud */ 919 if (err) 920 goto out_dump; 921 922 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 923 le32_to_cpu(ref->offs), 924 le32_to_cpu(ref->jhead), 925 snod->sqnum); 926 if (err) 927 goto out; 928 929 break; 930 } 931 case UBIFS_CS_NODE: 932 /* Make sure it sits at the beginning of LEB */ 933 if (snod->offs != 0) { 934 ubifs_err("unexpected node in log"); 935 goto out_dump; 936 } 937 break; 938 default: 939 ubifs_err("unexpected node in log"); 940 goto out_dump; 941 } 942 } 943 944 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 945 c->lhead_lnum = lnum; 946 c->lhead_offs = sleb->endpt; 947 } 948 949 err = !sleb->endpt; 950 out: 951 ubifs_scan_destroy(sleb); 952 return err; 953 954 out_dump: 955 ubifs_err("log error detected while replying the log at LEB %d:%d", 956 lnum, offs + snod->offs); 957 dbg_dump_node(c, snod->node); 958 ubifs_scan_destroy(sleb); 959 return -EINVAL; 960 } 961 962 /** 963 * take_ihead - update the status of the index head in lprops to 'taken'. 964 * @c: UBIFS file-system description object 965 * 966 * This function returns the amount of free space in the index head LEB or a 967 * negative error code. 968 */ 969 static int take_ihead(struct ubifs_info *c) 970 { 971 const struct ubifs_lprops *lp; 972 int err, free; 973 974 ubifs_get_lprops(c); 975 976 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 977 if (IS_ERR(lp)) { 978 err = PTR_ERR(lp); 979 goto out; 980 } 981 982 free = lp->free; 983 984 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 985 lp->flags | LPROPS_TAKEN, 0); 986 if (IS_ERR(lp)) { 987 err = PTR_ERR(lp); 988 goto out; 989 } 990 991 err = free; 992 out: 993 ubifs_release_lprops(c); 994 return err; 995 } 996 997 /** 998 * ubifs_replay_journal - replay journal. 999 * @c: UBIFS file-system description object 1000 * 1001 * This function scans the journal, replays and cleans it up. It makes sure all 1002 * memory data structures related to uncommitted journal are built (dirty TNC 1003 * tree, tree of buds, modified lprops, etc). 1004 */ 1005 int ubifs_replay_journal(struct ubifs_info *c) 1006 { 1007 int err, i, lnum, offs, _free; 1008 void *sbuf = NULL; 1009 1010 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1011 1012 /* Update the status of the index head in lprops to 'taken' */ 1013 _free = take_ihead(c); 1014 if (_free < 0) 1015 return _free; /* Error code */ 1016 1017 if (c->ihead_offs != c->leb_size - _free) { 1018 ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, 1019 c->ihead_offs); 1020 return -EINVAL; 1021 } 1022 1023 sbuf = vmalloc(c->leb_size); 1024 if (!sbuf) 1025 return -ENOMEM; 1026 1027 dbg_mnt("start replaying the journal"); 1028 1029 c->replaying = 1; 1030 1031 lnum = c->ltail_lnum = c->lhead_lnum; 1032 offs = c->lhead_offs; 1033 1034 for (i = 0; i < c->log_lebs; i++, lnum++) { 1035 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { 1036 /* 1037 * The log is logically circular, we reached the last 1038 * LEB, switch to the first one. 1039 */ 1040 lnum = UBIFS_LOG_LNUM; 1041 offs = 0; 1042 } 1043 err = replay_log_leb(c, lnum, offs, sbuf); 1044 if (err == 1) 1045 /* We hit the end of the log */ 1046 break; 1047 if (err) 1048 goto out; 1049 offs = 0; 1050 } 1051 1052 err = replay_buds(c); 1053 if (err) 1054 goto out; 1055 1056 err = apply_replay_tree(c); 1057 if (err) 1058 goto out; 1059 1060 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1061 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1062 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1063 (unsigned long)c->highest_inum); 1064 out: 1065 destroy_replay_tree(c); 1066 destroy_bud_list(c); 1067 vfree(sbuf); 1068 c->replaying = 0; 1069 return err; 1070 } 1071