1 /* 2 * This file is part of UBIFS. 3 * 4 * Copyright (C) 2006-2008 Nokia Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published by 8 * the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program; if not, write to the Free Software Foundation, Inc., 51 17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 * 19 * Authors: Adrian Hunter 20 * Artem Bityutskiy (Битюцкий Артём) 21 */ 22 23 /* 24 * This file contains journal replay code. It runs when the file-system is being 25 * mounted and requires no locking. 26 * 27 * The larger is the journal, the longer it takes to scan it, so the longer it 28 * takes to mount UBIFS. This is why the journal has limited size which may be 29 * changed depending on the system requirements. But a larger journal gives 30 * faster I/O speed because it writes the index less frequently. So this is a 31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 32 * larger is the journal, the more memory its index may consume. 33 */ 34 35 #include "ubifs.h" 36 37 /* 38 * Replay flags. 39 * 40 * REPLAY_DELETION: node was deleted 41 * REPLAY_REF: node is a reference node 42 */ 43 enum { 44 REPLAY_DELETION = 1, 45 REPLAY_REF = 2, 46 }; 47 48 /** 49 * struct replay_entry - replay tree entry. 50 * @lnum: logical eraseblock number of the node 51 * @offs: node offset 52 * @len: node length 53 * @sqnum: node sequence number 54 * @flags: replay flags 55 * @rb: links the replay tree 56 * @key: node key 57 * @nm: directory entry name 58 * @old_size: truncation old size 59 * @new_size: truncation new size 60 * @free: amount of free space in a bud 61 * @dirty: amount of dirty space in a bud from padding and deletion nodes 62 * 63 * UBIFS journal replay must compare node sequence numbers, which means it must 64 * build a tree of node information to insert into the TNC. 65 */ 66 struct replay_entry { 67 int lnum; 68 int offs; 69 int len; 70 unsigned long long sqnum; 71 int flags; 72 struct rb_node rb; 73 union ubifs_key key; 74 union { 75 struct qstr nm; 76 struct { 77 loff_t old_size; 78 loff_t new_size; 79 }; 80 struct { 81 int free; 82 int dirty; 83 }; 84 }; 85 }; 86 87 /** 88 * struct bud_entry - entry in the list of buds to replay. 89 * @list: next bud in the list 90 * @bud: bud description object 91 * @free: free bytes in the bud 92 * @sqnum: reference node sequence number 93 */ 94 struct bud_entry { 95 struct list_head list; 96 struct ubifs_bud *bud; 97 int free; 98 unsigned long long sqnum; 99 }; 100 101 /** 102 * set_bud_lprops - set free and dirty space used by a bud. 103 * @c: UBIFS file-system description object 104 * @r: replay entry of bud 105 */ 106 static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) 107 { 108 const struct ubifs_lprops *lp; 109 int err = 0, dirty; 110 111 ubifs_get_lprops(c); 112 113 lp = ubifs_lpt_lookup_dirty(c, r->lnum); 114 if (IS_ERR(lp)) { 115 err = PTR_ERR(lp); 116 goto out; 117 } 118 119 dirty = lp->dirty; 120 if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 121 /* 122 * The LEB was added to the journal with a starting offset of 123 * zero which means the LEB must have been empty. The LEB 124 * property values should be lp->free == c->leb_size and 125 * lp->dirty == 0, but that is not the case. The reason is that 126 * the LEB was garbage collected. The garbage collector resets 127 * the free and dirty space without recording it anywhere except 128 * lprops, so if there is not a commit then lprops does not have 129 * that information next time the file system is mounted. 130 * 131 * We do not need to adjust free space because the scan has told 132 * us the exact value which is recorded in the replay entry as 133 * r->free. 134 * 135 * However we do need to subtract from the dirty space the 136 * amount of space that the garbage collector reclaimed, which 137 * is the whole LEB minus the amount of space that was free. 138 */ 139 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 140 lp->free, lp->dirty); 141 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 142 lp->free, lp->dirty); 143 dirty -= c->leb_size - lp->free; 144 /* 145 * If the replay order was perfect the dirty space would now be 146 * zero. The order is not perfect because the journal heads 147 * race with each other. This is not a problem but is does mean 148 * that the dirty space may temporarily exceed c->leb_size 149 * during the replay. 150 */ 151 if (dirty != 0) 152 dbg_msg("LEB %d lp: %d free %d dirty " 153 "replay: %d free %d dirty", r->lnum, lp->free, 154 lp->dirty, r->free, r->dirty); 155 } 156 lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, 157 lp->flags | LPROPS_TAKEN, 0); 158 if (IS_ERR(lp)) { 159 err = PTR_ERR(lp); 160 goto out; 161 } 162 out: 163 ubifs_release_lprops(c); 164 return err; 165 } 166 167 /** 168 * trun_remove_range - apply a replay entry for a truncation to the TNC. 169 * @c: UBIFS file-system description object 170 * @r: replay entry of truncation 171 */ 172 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 173 { 174 unsigned min_blk, max_blk; 175 union ubifs_key min_key, max_key; 176 ino_t ino; 177 178 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 179 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 180 min_blk += 1; 181 182 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 183 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 184 max_blk -= 1; 185 186 ino = key_inum(c, &r->key); 187 188 data_key_init(c, &min_key, ino, min_blk); 189 data_key_init(c, &max_key, ino, max_blk); 190 191 return ubifs_tnc_remove_range(c, &min_key, &max_key); 192 } 193 194 /** 195 * apply_replay_entry - apply a replay entry to the TNC. 196 * @c: UBIFS file-system description object 197 * @r: replay entry to apply 198 * 199 * Apply a replay entry to the TNC. 200 */ 201 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 202 { 203 int err, deletion = ((r->flags & REPLAY_DELETION) != 0); 204 205 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, 206 r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); 207 208 /* Set c->replay_sqnum to help deal with dangling branches. */ 209 c->replay_sqnum = r->sqnum; 210 211 if (r->flags & REPLAY_REF) 212 err = set_bud_lprops(c, r); 213 else if (is_hash_key(c, &r->key)) { 214 if (deletion) 215 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 216 else 217 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 218 r->len, &r->nm); 219 } else { 220 if (deletion) 221 switch (key_type(c, &r->key)) { 222 case UBIFS_INO_KEY: 223 { 224 ino_t inum = key_inum(c, &r->key); 225 226 err = ubifs_tnc_remove_ino(c, inum); 227 break; 228 } 229 case UBIFS_TRUN_KEY: 230 err = trun_remove_range(c, r); 231 break; 232 default: 233 err = ubifs_tnc_remove(c, &r->key); 234 break; 235 } 236 else 237 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 238 r->len); 239 if (err) 240 return err; 241 242 if (c->need_recovery) 243 err = ubifs_recover_size_accum(c, &r->key, deletion, 244 r->new_size); 245 } 246 247 return err; 248 } 249 250 /** 251 * destroy_replay_tree - destroy the replay. 252 * @c: UBIFS file-system description object 253 * 254 * Destroy the replay tree. 255 */ 256 static void destroy_replay_tree(struct ubifs_info *c) 257 { 258 struct rb_node *this = c->replay_tree.rb_node; 259 struct replay_entry *r; 260 261 while (this) { 262 if (this->rb_left) { 263 this = this->rb_left; 264 continue; 265 } else if (this->rb_right) { 266 this = this->rb_right; 267 continue; 268 } 269 r = rb_entry(this, struct replay_entry, rb); 270 this = rb_parent(this); 271 if (this) { 272 if (this->rb_left == &r->rb) 273 this->rb_left = NULL; 274 else 275 this->rb_right = NULL; 276 } 277 if (is_hash_key(c, &r->key)) 278 kfree(r->nm.name); 279 kfree(r); 280 } 281 c->replay_tree = RB_ROOT; 282 } 283 284 /** 285 * apply_replay_tree - apply the replay tree to the TNC. 286 * @c: UBIFS file-system description object 287 * 288 * Apply the replay tree. 289 * Returns zero in case of success and a negative error code in case of 290 * failure. 291 */ 292 static int apply_replay_tree(struct ubifs_info *c) 293 { 294 struct rb_node *this = rb_first(&c->replay_tree); 295 296 while (this) { 297 struct replay_entry *r; 298 int err; 299 300 cond_resched(); 301 302 r = rb_entry(this, struct replay_entry, rb); 303 err = apply_replay_entry(c, r); 304 if (err) 305 return err; 306 this = rb_next(this); 307 } 308 return 0; 309 } 310 311 /** 312 * insert_node - insert a node to the replay tree. 313 * @c: UBIFS file-system description object 314 * @lnum: node logical eraseblock number 315 * @offs: node offset 316 * @len: node length 317 * @key: node key 318 * @sqnum: sequence number 319 * @deletion: non-zero if this is a deletion 320 * @used: number of bytes in use in a LEB 321 * @old_size: truncation old size 322 * @new_size: truncation new size 323 * 324 * This function inserts a scanned non-direntry node to the replay tree. The 325 * replay tree is an RB-tree containing @struct replay_entry elements which are 326 * indexed by the sequence number. The replay tree is applied at the very end 327 * of the replay process. Since the tree is sorted in sequence number order, 328 * the older modifications are applied first. This function returns zero in 329 * case of success and a negative error code in case of failure. 330 */ 331 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 332 union ubifs_key *key, unsigned long long sqnum, 333 int deletion, int *used, loff_t old_size, 334 loff_t new_size) 335 { 336 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 337 struct replay_entry *r; 338 339 if (key_inum(c, key) >= c->highest_inum) 340 c->highest_inum = key_inum(c, key); 341 342 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 343 while (*p) { 344 parent = *p; 345 r = rb_entry(parent, struct replay_entry, rb); 346 if (sqnum < r->sqnum) { 347 p = &(*p)->rb_left; 348 continue; 349 } else if (sqnum > r->sqnum) { 350 p = &(*p)->rb_right; 351 continue; 352 } 353 ubifs_err("duplicate sqnum in replay"); 354 return -EINVAL; 355 } 356 357 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 358 if (!r) 359 return -ENOMEM; 360 361 if (!deletion) 362 *used += ALIGN(len, 8); 363 r->lnum = lnum; 364 r->offs = offs; 365 r->len = len; 366 r->sqnum = sqnum; 367 r->flags = (deletion ? REPLAY_DELETION : 0); 368 r->old_size = old_size; 369 r->new_size = new_size; 370 key_copy(c, key, &r->key); 371 372 rb_link_node(&r->rb, parent, p); 373 rb_insert_color(&r->rb, &c->replay_tree); 374 return 0; 375 } 376 377 /** 378 * insert_dent - insert a directory entry node into the replay tree. 379 * @c: UBIFS file-system description object 380 * @lnum: node logical eraseblock number 381 * @offs: node offset 382 * @len: node length 383 * @key: node key 384 * @name: directory entry name 385 * @nlen: directory entry name length 386 * @sqnum: sequence number 387 * @deletion: non-zero if this is a deletion 388 * @used: number of bytes in use in a LEB 389 * 390 * This function inserts a scanned directory entry node to the replay tree. 391 * Returns zero in case of success and a negative error code in case of 392 * failure. 393 * 394 * This function is also used for extended attribute entries because they are 395 * implemented as directory entry nodes. 396 */ 397 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 398 union ubifs_key *key, const char *name, int nlen, 399 unsigned long long sqnum, int deletion, int *used) 400 { 401 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 402 struct replay_entry *r; 403 char *nbuf; 404 405 if (key_inum(c, key) >= c->highest_inum) 406 c->highest_inum = key_inum(c, key); 407 408 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 409 while (*p) { 410 parent = *p; 411 r = rb_entry(parent, struct replay_entry, rb); 412 if (sqnum < r->sqnum) { 413 p = &(*p)->rb_left; 414 continue; 415 } 416 if (sqnum > r->sqnum) { 417 p = &(*p)->rb_right; 418 continue; 419 } 420 ubifs_err("duplicate sqnum in replay"); 421 return -EINVAL; 422 } 423 424 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 425 if (!r) 426 return -ENOMEM; 427 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 428 if (!nbuf) { 429 kfree(r); 430 return -ENOMEM; 431 } 432 433 if (!deletion) 434 *used += ALIGN(len, 8); 435 r->lnum = lnum; 436 r->offs = offs; 437 r->len = len; 438 r->sqnum = sqnum; 439 r->nm.len = nlen; 440 memcpy(nbuf, name, nlen); 441 nbuf[nlen] = '\0'; 442 r->nm.name = nbuf; 443 r->flags = (deletion ? REPLAY_DELETION : 0); 444 key_copy(c, key, &r->key); 445 446 ubifs_assert(!*p); 447 rb_link_node(&r->rb, parent, p); 448 rb_insert_color(&r->rb, &c->replay_tree); 449 return 0; 450 } 451 452 /** 453 * ubifs_validate_entry - validate directory or extended attribute entry node. 454 * @c: UBIFS file-system description object 455 * @dent: the node to validate 456 * 457 * This function validates directory or extended attribute entry node @dent. 458 * Returns zero if the node is all right and a %-EINVAL if not. 459 */ 460 int ubifs_validate_entry(struct ubifs_info *c, 461 const struct ubifs_dent_node *dent) 462 { 463 int key_type = key_type_flash(c, dent->key); 464 int nlen = le16_to_cpu(dent->nlen); 465 466 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 467 dent->type >= UBIFS_ITYPES_CNT || 468 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 469 strnlen(dent->name, nlen) != nlen || 470 le64_to_cpu(dent->inum) > MAX_INUM) { 471 ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? 472 "directory entry" : "extended attribute entry"); 473 return -EINVAL; 474 } 475 476 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 477 ubifs_err("bad key type %d", key_type); 478 return -EINVAL; 479 } 480 481 return 0; 482 } 483 484 /** 485 * replay_bud - replay a bud logical eraseblock. 486 * @c: UBIFS file-system description object 487 * @lnum: bud logical eraseblock number to replay 488 * @offs: bud start offset 489 * @jhead: journal head to which this bud belongs 490 * @free: amount of free space in the bud is returned here 491 * @dirty: amount of dirty space from padding and deletion nodes is returned 492 * here 493 * 494 * This function returns zero in case of success and a negative error code in 495 * case of failure. 496 */ 497 static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 498 int *free, int *dirty) 499 { 500 int err = 0, used = 0; 501 struct ubifs_scan_leb *sleb; 502 struct ubifs_scan_node *snod; 503 struct ubifs_bud *bud; 504 505 dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); 506 if (c->need_recovery) 507 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); 508 else 509 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 510 if (IS_ERR(sleb)) 511 return PTR_ERR(sleb); 512 513 /* 514 * The bud does not have to start from offset zero - the beginning of 515 * the 'lnum' LEB may contain previously committed data. One of the 516 * things we have to do in replay is to correctly update lprops with 517 * newer information about this LEB. 518 * 519 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 520 * bytes of free space because it only contain information about 521 * committed data. 522 * 523 * But we know that real amount of free space is 'c->leb_size - 524 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 525 * 'sleb->endpt' is used by bud data. We have to correctly calculate 526 * how much of these data are dirty and update lprops with this 527 * information. 528 * 529 * The dirt in that LEB region is comprised of padding nodes, deletion 530 * nodes, truncation nodes and nodes which are obsoleted by subsequent 531 * nodes in this LEB. So instead of calculating clean space, we 532 * calculate used space ('used' variable). 533 */ 534 535 list_for_each_entry(snod, &sleb->nodes, list) { 536 int deletion = 0; 537 538 cond_resched(); 539 540 if (snod->sqnum >= SQNUM_WATERMARK) { 541 ubifs_err("file system's life ended"); 542 goto out_dump; 543 } 544 545 if (snod->sqnum > c->max_sqnum) 546 c->max_sqnum = snod->sqnum; 547 548 switch (snod->type) { 549 case UBIFS_INO_NODE: 550 { 551 struct ubifs_ino_node *ino = snod->node; 552 loff_t new_size = le64_to_cpu(ino->size); 553 554 if (le32_to_cpu(ino->nlink) == 0) 555 deletion = 1; 556 err = insert_node(c, lnum, snod->offs, snod->len, 557 &snod->key, snod->sqnum, deletion, 558 &used, 0, new_size); 559 break; 560 } 561 case UBIFS_DATA_NODE: 562 { 563 struct ubifs_data_node *dn = snod->node; 564 loff_t new_size = le32_to_cpu(dn->size) + 565 key_block(c, &snod->key) * 566 UBIFS_BLOCK_SIZE; 567 568 err = insert_node(c, lnum, snod->offs, snod->len, 569 &snod->key, snod->sqnum, deletion, 570 &used, 0, new_size); 571 break; 572 } 573 case UBIFS_DENT_NODE: 574 case UBIFS_XENT_NODE: 575 { 576 struct ubifs_dent_node *dent = snod->node; 577 578 err = ubifs_validate_entry(c, dent); 579 if (err) 580 goto out_dump; 581 582 err = insert_dent(c, lnum, snod->offs, snod->len, 583 &snod->key, dent->name, 584 le16_to_cpu(dent->nlen), snod->sqnum, 585 !le64_to_cpu(dent->inum), &used); 586 break; 587 } 588 case UBIFS_TRUN_NODE: 589 { 590 struct ubifs_trun_node *trun = snod->node; 591 loff_t old_size = le64_to_cpu(trun->old_size); 592 loff_t new_size = le64_to_cpu(trun->new_size); 593 union ubifs_key key; 594 595 /* Validate truncation node */ 596 if (old_size < 0 || old_size > c->max_inode_sz || 597 new_size < 0 || new_size > c->max_inode_sz || 598 old_size <= new_size) { 599 ubifs_err("bad truncation node"); 600 goto out_dump; 601 } 602 603 /* 604 * Create a fake truncation key just to use the same 605 * functions which expect nodes to have keys. 606 */ 607 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 608 err = insert_node(c, lnum, snod->offs, snod->len, 609 &key, snod->sqnum, 1, &used, 610 old_size, new_size); 611 break; 612 } 613 default: 614 ubifs_err("unexpected node type %d in bud LEB %d:%d", 615 snod->type, lnum, snod->offs); 616 err = -EINVAL; 617 goto out_dump; 618 } 619 if (err) 620 goto out; 621 } 622 623 bud = ubifs_search_bud(c, lnum); 624 if (!bud) 625 BUG(); 626 627 ubifs_assert(sleb->endpt - offs >= used); 628 ubifs_assert(sleb->endpt % c->min_io_size == 0); 629 630 if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount) 631 err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, 632 sleb->endpt, UBI_SHORTTERM); 633 634 *dirty = sleb->endpt - offs - used; 635 *free = c->leb_size - sleb->endpt; 636 637 out: 638 ubifs_scan_destroy(sleb); 639 return err; 640 641 out_dump: 642 ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); 643 dbg_dump_node(c, snod->node); 644 ubifs_scan_destroy(sleb); 645 return -EINVAL; 646 } 647 648 /** 649 * insert_ref_node - insert a reference node to the replay tree. 650 * @c: UBIFS file-system description object 651 * @lnum: node logical eraseblock number 652 * @offs: node offset 653 * @sqnum: sequence number 654 * @free: amount of free space in bud 655 * @dirty: amount of dirty space from padding and deletion nodes 656 * 657 * This function inserts a reference node to the replay tree and returns zero 658 * in case of success or a negative error code in case of failure. 659 */ 660 static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, 661 unsigned long long sqnum, int free, int dirty) 662 { 663 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 664 struct replay_entry *r; 665 666 dbg_mnt("add ref LEB %d:%d", lnum, offs); 667 while (*p) { 668 parent = *p; 669 r = rb_entry(parent, struct replay_entry, rb); 670 if (sqnum < r->sqnum) { 671 p = &(*p)->rb_left; 672 continue; 673 } else if (sqnum > r->sqnum) { 674 p = &(*p)->rb_right; 675 continue; 676 } 677 ubifs_err("duplicate sqnum in replay tree"); 678 return -EINVAL; 679 } 680 681 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 682 if (!r) 683 return -ENOMEM; 684 685 r->lnum = lnum; 686 r->offs = offs; 687 r->sqnum = sqnum; 688 r->flags = REPLAY_REF; 689 r->free = free; 690 r->dirty = dirty; 691 692 rb_link_node(&r->rb, parent, p); 693 rb_insert_color(&r->rb, &c->replay_tree); 694 return 0; 695 } 696 697 /** 698 * replay_buds - replay all buds. 699 * @c: UBIFS file-system description object 700 * 701 * This function returns zero in case of success and a negative error code in 702 * case of failure. 703 */ 704 static int replay_buds(struct ubifs_info *c) 705 { 706 struct bud_entry *b; 707 int err, uninitialized_var(free), uninitialized_var(dirty); 708 709 list_for_each_entry(b, &c->replay_buds, list) { 710 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, 711 &free, &dirty); 712 if (err) 713 return err; 714 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, 715 free, dirty); 716 if (err) 717 return err; 718 } 719 720 return 0; 721 } 722 723 /** 724 * destroy_bud_list - destroy the list of buds to replay. 725 * @c: UBIFS file-system description object 726 */ 727 static void destroy_bud_list(struct ubifs_info *c) 728 { 729 struct bud_entry *b; 730 731 while (!list_empty(&c->replay_buds)) { 732 b = list_entry(c->replay_buds.next, struct bud_entry, list); 733 list_del(&b->list); 734 kfree(b); 735 } 736 } 737 738 /** 739 * add_replay_bud - add a bud to the list of buds to replay. 740 * @c: UBIFS file-system description object 741 * @lnum: bud logical eraseblock number to replay 742 * @offs: bud start offset 743 * @jhead: journal head to which this bud belongs 744 * @sqnum: reference node sequence number 745 * 746 * This function returns zero in case of success and a negative error code in 747 * case of failure. 748 */ 749 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 750 unsigned long long sqnum) 751 { 752 struct ubifs_bud *bud; 753 struct bud_entry *b; 754 755 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 756 757 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 758 if (!bud) 759 return -ENOMEM; 760 761 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 762 if (!b) { 763 kfree(bud); 764 return -ENOMEM; 765 } 766 767 bud->lnum = lnum; 768 bud->start = offs; 769 bud->jhead = jhead; 770 ubifs_add_bud(c, bud); 771 772 b->bud = bud; 773 b->sqnum = sqnum; 774 list_add_tail(&b->list, &c->replay_buds); 775 776 return 0; 777 } 778 779 /** 780 * validate_ref - validate a reference node. 781 * @c: UBIFS file-system description object 782 * @ref: the reference node to validate 783 * @ref_lnum: LEB number of the reference node 784 * @ref_offs: reference node offset 785 * 786 * This function returns %1 if a bud reference already exists for the LEB. %0 is 787 * returned if the reference node is new, otherwise %-EINVAL is returned if 788 * validation failed. 789 */ 790 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 791 { 792 struct ubifs_bud *bud; 793 int lnum = le32_to_cpu(ref->lnum); 794 unsigned int offs = le32_to_cpu(ref->offs); 795 unsigned int jhead = le32_to_cpu(ref->jhead); 796 797 /* 798 * ref->offs may point to the end of LEB when the journal head points 799 * to the end of LEB and we write reference node for it during commit. 800 * So this is why we require 'offs > c->leb_size'. 801 */ 802 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 803 lnum < c->main_first || offs > c->leb_size || 804 offs & (c->min_io_size - 1)) 805 return -EINVAL; 806 807 /* Make sure we have not already looked at this bud */ 808 bud = ubifs_search_bud(c, lnum); 809 if (bud) { 810 if (bud->jhead == jhead && bud->start <= offs) 811 return 1; 812 ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); 813 return -EINVAL; 814 } 815 816 return 0; 817 } 818 819 /** 820 * replay_log_leb - replay a log logical eraseblock. 821 * @c: UBIFS file-system description object 822 * @lnum: log logical eraseblock to replay 823 * @offs: offset to start replaying from 824 * @sbuf: scan buffer 825 * 826 * This function replays a log LEB and returns zero in case of success, %1 if 827 * this is the last LEB in the log, and a negative error code in case of 828 * failure. 829 */ 830 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 831 { 832 int err; 833 struct ubifs_scan_leb *sleb; 834 struct ubifs_scan_node *snod; 835 const struct ubifs_cs_node *node; 836 837 dbg_mnt("replay log LEB %d:%d", lnum, offs); 838 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); 839 if (IS_ERR(sleb)) { 840 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 841 return PTR_ERR(sleb); 842 /* 843 * Note, the below function will recover this log LEB only if 844 * it is the last, because unclean reboots can possibly corrupt 845 * only the tail of the log. 846 */ 847 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 848 if (IS_ERR(sleb)) 849 return PTR_ERR(sleb); 850 } 851 852 if (sleb->nodes_cnt == 0) { 853 err = 1; 854 goto out; 855 } 856 857 node = sleb->buf; 858 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 859 if (c->cs_sqnum == 0) { 860 /* 861 * This is the first log LEB we are looking at, make sure that 862 * the first node is a commit start node. Also record its 863 * sequence number so that UBIFS can determine where the log 864 * ends, because all nodes which were have higher sequence 865 * numbers. 866 */ 867 if (snod->type != UBIFS_CS_NODE) { 868 dbg_err("first log node at LEB %d:%d is not CS node", 869 lnum, offs); 870 goto out_dump; 871 } 872 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 873 dbg_err("first CS node at LEB %d:%d has wrong " 874 "commit number %llu expected %llu", 875 lnum, offs, 876 (unsigned long long)le64_to_cpu(node->cmt_no), 877 c->cmt_no); 878 goto out_dump; 879 } 880 881 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 882 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 883 } 884 885 if (snod->sqnum < c->cs_sqnum) { 886 /* 887 * This means that we reached end of log and now 888 * look to the older log data, which was already 889 * committed but the eraseblock was not erased (UBIFS 890 * only un-maps it). So this basically means we have to 891 * exit with "end of log" code. 892 */ 893 err = 1; 894 goto out; 895 } 896 897 /* Make sure the first node sits at offset zero of the LEB */ 898 if (snod->offs != 0) { 899 dbg_err("first node is not at zero offset"); 900 goto out_dump; 901 } 902 903 list_for_each_entry(snod, &sleb->nodes, list) { 904 cond_resched(); 905 906 if (snod->sqnum >= SQNUM_WATERMARK) { 907 ubifs_err("file system's life ended"); 908 goto out_dump; 909 } 910 911 if (snod->sqnum < c->cs_sqnum) { 912 dbg_err("bad sqnum %llu, commit sqnum %llu", 913 snod->sqnum, c->cs_sqnum); 914 goto out_dump; 915 } 916 917 if (snod->sqnum > c->max_sqnum) 918 c->max_sqnum = snod->sqnum; 919 920 switch (snod->type) { 921 case UBIFS_REF_NODE: { 922 const struct ubifs_ref_node *ref = snod->node; 923 924 err = validate_ref(c, ref); 925 if (err == 1) 926 break; /* Already have this bud */ 927 if (err) 928 goto out_dump; 929 930 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 931 le32_to_cpu(ref->offs), 932 le32_to_cpu(ref->jhead), 933 snod->sqnum); 934 if (err) 935 goto out; 936 937 break; 938 } 939 case UBIFS_CS_NODE: 940 /* Make sure it sits at the beginning of LEB */ 941 if (snod->offs != 0) { 942 ubifs_err("unexpected node in log"); 943 goto out_dump; 944 } 945 break; 946 default: 947 ubifs_err("unexpected node in log"); 948 goto out_dump; 949 } 950 } 951 952 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 953 c->lhead_lnum = lnum; 954 c->lhead_offs = sleb->endpt; 955 } 956 957 err = !sleb->endpt; 958 out: 959 ubifs_scan_destroy(sleb); 960 return err; 961 962 out_dump: 963 ubifs_err("log error detected while replaying the log at LEB %d:%d", 964 lnum, offs + snod->offs); 965 dbg_dump_node(c, snod->node); 966 ubifs_scan_destroy(sleb); 967 return -EINVAL; 968 } 969 970 /** 971 * take_ihead - update the status of the index head in lprops to 'taken'. 972 * @c: UBIFS file-system description object 973 * 974 * This function returns the amount of free space in the index head LEB or a 975 * negative error code. 976 */ 977 static int take_ihead(struct ubifs_info *c) 978 { 979 const struct ubifs_lprops *lp; 980 int err, free; 981 982 ubifs_get_lprops(c); 983 984 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 985 if (IS_ERR(lp)) { 986 err = PTR_ERR(lp); 987 goto out; 988 } 989 990 free = lp->free; 991 992 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 993 lp->flags | LPROPS_TAKEN, 0); 994 if (IS_ERR(lp)) { 995 err = PTR_ERR(lp); 996 goto out; 997 } 998 999 err = free; 1000 out: 1001 ubifs_release_lprops(c); 1002 return err; 1003 } 1004 1005 /** 1006 * ubifs_replay_journal - replay journal. 1007 * @c: UBIFS file-system description object 1008 * 1009 * This function scans the journal, replays and cleans it up. It makes sure all 1010 * memory data structures related to uncommitted journal are built (dirty TNC 1011 * tree, tree of buds, modified lprops, etc). 1012 */ 1013 int ubifs_replay_journal(struct ubifs_info *c) 1014 { 1015 int err, i, lnum, offs, free; 1016 1017 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1018 1019 /* Update the status of the index head in lprops to 'taken' */ 1020 free = take_ihead(c); 1021 if (free < 0) 1022 return free; /* Error code */ 1023 1024 if (c->ihead_offs != c->leb_size - free) { 1025 ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, 1026 c->ihead_offs); 1027 return -EINVAL; 1028 } 1029 1030 dbg_mnt("start replaying the journal"); 1031 c->replaying = 1; 1032 lnum = c->ltail_lnum = c->lhead_lnum; 1033 offs = c->lhead_offs; 1034 1035 for (i = 0; i < c->log_lebs; i++, lnum++) { 1036 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { 1037 /* 1038 * The log is logically circular, we reached the last 1039 * LEB, switch to the first one. 1040 */ 1041 lnum = UBIFS_LOG_LNUM; 1042 offs = 0; 1043 } 1044 err = replay_log_leb(c, lnum, offs, c->sbuf); 1045 if (err == 1) 1046 /* We hit the end of the log */ 1047 break; 1048 if (err) 1049 goto out; 1050 offs = 0; 1051 } 1052 1053 err = replay_buds(c); 1054 if (err) 1055 goto out; 1056 1057 err = apply_replay_tree(c); 1058 if (err) 1059 goto out; 1060 1061 /* 1062 * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable 1063 * to roughly estimate index growth. Things like @c->min_idx_lebs 1064 * depend on it. This means we have to initialize it to make sure 1065 * budgeting works properly. 1066 */ 1067 c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1068 c->budg_uncommitted_idx *= c->max_idx_node_sz; 1069 1070 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1071 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1072 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1073 (unsigned long)c->highest_inum); 1074 out: 1075 destroy_replay_tree(c); 1076 destroy_bud_list(c); 1077 c->replaying = 0; 1078 return err; 1079 } 1080