1 /* 2 * This file is part of UBIFS. 3 * 4 * Copyright (C) 2006-2008 Nokia Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published by 8 * the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program; if not, write to the Free Software Foundation, Inc., 51 17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 * 19 * Authors: Adrian Hunter 20 * Artem Bityutskiy (Битюцкий Артём) 21 */ 22 23 /* 24 * This file contains journal replay code. It runs when the file-system is being 25 * mounted and requires no locking. 26 * 27 * The larger is the journal, the longer it takes to scan it, so the longer it 28 * takes to mount UBIFS. This is why the journal has limited size which may be 29 * changed depending on the system requirements. But a larger journal gives 30 * faster I/O speed because it writes the index less frequently. So this is a 31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 32 * larger is the journal, the more memory its index may consume. 33 */ 34 35 #include "ubifs.h" 36 37 /* 38 * Replay flags. 39 * 40 * REPLAY_DELETION: node was deleted 41 * REPLAY_REF: node is a reference node 42 */ 43 enum { 44 REPLAY_DELETION = 1, 45 REPLAY_REF = 2, 46 }; 47 48 /** 49 * struct replay_entry - replay tree entry. 50 * @lnum: logical eraseblock number of the node 51 * @offs: node offset 52 * @len: node length 53 * @sqnum: node sequence number 54 * @flags: replay flags 55 * @rb: links the replay tree 56 * @key: node key 57 * @nm: directory entry name 58 * @old_size: truncation old size 59 * @new_size: truncation new size 60 * @free: amount of free space in a bud 61 * @dirty: amount of dirty space in a bud from padding and deletion nodes 62 * 63 * UBIFS journal replay must compare node sequence numbers, which means it must 64 * build a tree of node information to insert into the TNC. 65 */ 66 struct replay_entry { 67 int lnum; 68 int offs; 69 int len; 70 unsigned long long sqnum; 71 int flags; 72 struct rb_node rb; 73 union ubifs_key key; 74 union { 75 struct qstr nm; 76 struct { 77 loff_t old_size; 78 loff_t new_size; 79 }; 80 struct { 81 int free; 82 int dirty; 83 }; 84 }; 85 }; 86 87 /** 88 * struct bud_entry - entry in the list of buds to replay. 89 * @list: next bud in the list 90 * @bud: bud description object 91 * @free: free bytes in the bud 92 * @sqnum: reference node sequence number 93 */ 94 struct bud_entry { 95 struct list_head list; 96 struct ubifs_bud *bud; 97 int free; 98 unsigned long long sqnum; 99 }; 100 101 /** 102 * set_bud_lprops - set free and dirty space used by a bud. 103 * @c: UBIFS file-system description object 104 * @r: replay entry of bud 105 */ 106 static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) 107 { 108 const struct ubifs_lprops *lp; 109 int err = 0, dirty; 110 111 ubifs_get_lprops(c); 112 113 lp = ubifs_lpt_lookup_dirty(c, r->lnum); 114 if (IS_ERR(lp)) { 115 err = PTR_ERR(lp); 116 goto out; 117 } 118 119 dirty = lp->dirty; 120 if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 121 /* 122 * The LEB was added to the journal with a starting offset of 123 * zero which means the LEB must have been empty. The LEB 124 * property values should be lp->free == c->leb_size and 125 * lp->dirty == 0, but that is not the case. The reason is that 126 * the LEB was garbage collected. The garbage collector resets 127 * the free and dirty space without recording it anywhere except 128 * lprops, so if there is not a commit then lprops does not have 129 * that information next time the file system is mounted. 130 * 131 * We do not need to adjust free space because the scan has told 132 * us the exact value which is recorded in the replay entry as 133 * r->free. 134 * 135 * However we do need to subtract from the dirty space the 136 * amount of space that the garbage collector reclaimed, which 137 * is the whole LEB minus the amount of space that was free. 138 */ 139 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 140 lp->free, lp->dirty); 141 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 142 lp->free, lp->dirty); 143 dirty -= c->leb_size - lp->free; 144 /* 145 * If the replay order was perfect the dirty space would now be 146 * zero. The order is not perfect because the journal heads 147 * race with each other. This is not a problem but is does mean 148 * that the dirty space may temporarily exceed c->leb_size 149 * during the replay. 150 */ 151 if (dirty != 0) 152 dbg_msg("LEB %d lp: %d free %d dirty " 153 "replay: %d free %d dirty", r->lnum, lp->free, 154 lp->dirty, r->free, r->dirty); 155 } 156 lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, 157 lp->flags | LPROPS_TAKEN, 0); 158 if (IS_ERR(lp)) { 159 err = PTR_ERR(lp); 160 goto out; 161 } 162 out: 163 ubifs_release_lprops(c); 164 return err; 165 } 166 167 /** 168 * trun_remove_range - apply a replay entry for a truncation to the TNC. 169 * @c: UBIFS file-system description object 170 * @r: replay entry of truncation 171 */ 172 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 173 { 174 unsigned min_blk, max_blk; 175 union ubifs_key min_key, max_key; 176 ino_t ino; 177 178 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 179 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 180 min_blk += 1; 181 182 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 183 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 184 max_blk -= 1; 185 186 ino = key_inum(c, &r->key); 187 188 data_key_init(c, &min_key, ino, min_blk); 189 data_key_init(c, &max_key, ino, max_blk); 190 191 return ubifs_tnc_remove_range(c, &min_key, &max_key); 192 } 193 194 /** 195 * apply_replay_entry - apply a replay entry to the TNC. 196 * @c: UBIFS file-system description object 197 * @r: replay entry to apply 198 * 199 * Apply a replay entry to the TNC. 200 */ 201 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 202 { 203 int err, deletion = ((r->flags & REPLAY_DELETION) != 0); 204 205 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, 206 r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); 207 208 /* Set c->replay_sqnum to help deal with dangling branches. */ 209 c->replay_sqnum = r->sqnum; 210 211 if (r->flags & REPLAY_REF) 212 err = set_bud_lprops(c, r); 213 else if (is_hash_key(c, &r->key)) { 214 if (deletion) 215 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 216 else 217 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 218 r->len, &r->nm); 219 } else { 220 if (deletion) 221 switch (key_type(c, &r->key)) { 222 case UBIFS_INO_KEY: 223 { 224 ino_t inum = key_inum(c, &r->key); 225 226 err = ubifs_tnc_remove_ino(c, inum); 227 break; 228 } 229 case UBIFS_TRUN_KEY: 230 err = trun_remove_range(c, r); 231 break; 232 default: 233 err = ubifs_tnc_remove(c, &r->key); 234 break; 235 } 236 else 237 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 238 r->len); 239 if (err) 240 return err; 241 242 if (c->need_recovery) 243 err = ubifs_recover_size_accum(c, &r->key, deletion, 244 r->new_size); 245 } 246 247 return err; 248 } 249 250 /** 251 * destroy_replay_tree - destroy the replay. 252 * @c: UBIFS file-system description object 253 * 254 * Destroy the replay tree. 255 */ 256 static void destroy_replay_tree(struct ubifs_info *c) 257 { 258 struct rb_node *this = c->replay_tree.rb_node; 259 struct replay_entry *r; 260 261 while (this) { 262 if (this->rb_left) { 263 this = this->rb_left; 264 continue; 265 } else if (this->rb_right) { 266 this = this->rb_right; 267 continue; 268 } 269 r = rb_entry(this, struct replay_entry, rb); 270 this = rb_parent(this); 271 if (this) { 272 if (this->rb_left == &r->rb) 273 this->rb_left = NULL; 274 else 275 this->rb_right = NULL; 276 } 277 if (is_hash_key(c, &r->key)) 278 kfree(r->nm.name); 279 kfree(r); 280 } 281 c->replay_tree = RB_ROOT; 282 } 283 284 /** 285 * apply_replay_tree - apply the replay tree to the TNC. 286 * @c: UBIFS file-system description object 287 * 288 * Apply the replay tree. 289 * Returns zero in case of success and a negative error code in case of 290 * failure. 291 */ 292 static int apply_replay_tree(struct ubifs_info *c) 293 { 294 struct rb_node *this = rb_first(&c->replay_tree); 295 296 while (this) { 297 struct replay_entry *r; 298 int err; 299 300 cond_resched(); 301 302 r = rb_entry(this, struct replay_entry, rb); 303 err = apply_replay_entry(c, r); 304 if (err) 305 return err; 306 this = rb_next(this); 307 } 308 return 0; 309 } 310 311 /** 312 * insert_node - insert a node to the replay tree. 313 * @c: UBIFS file-system description object 314 * @lnum: node logical eraseblock number 315 * @offs: node offset 316 * @len: node length 317 * @key: node key 318 * @sqnum: sequence number 319 * @deletion: non-zero if this is a deletion 320 * @used: number of bytes in use in a LEB 321 * @old_size: truncation old size 322 * @new_size: truncation new size 323 * 324 * This function inserts a scanned non-direntry node to the replay tree. The 325 * replay tree is an RB-tree containing @struct replay_entry elements which are 326 * indexed by the sequence number. The replay tree is applied at the very end 327 * of the replay process. Since the tree is sorted in sequence number order, 328 * the older modifications are applied first. This function returns zero in 329 * case of success and a negative error code in case of failure. 330 */ 331 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 332 union ubifs_key *key, unsigned long long sqnum, 333 int deletion, int *used, loff_t old_size, 334 loff_t new_size) 335 { 336 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 337 struct replay_entry *r; 338 339 if (key_inum(c, key) >= c->highest_inum) 340 c->highest_inum = key_inum(c, key); 341 342 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 343 while (*p) { 344 parent = *p; 345 r = rb_entry(parent, struct replay_entry, rb); 346 if (sqnum < r->sqnum) { 347 p = &(*p)->rb_left; 348 continue; 349 } else if (sqnum > r->sqnum) { 350 p = &(*p)->rb_right; 351 continue; 352 } 353 ubifs_err("duplicate sqnum in replay"); 354 return -EINVAL; 355 } 356 357 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 358 if (!r) 359 return -ENOMEM; 360 361 if (!deletion) 362 *used += ALIGN(len, 8); 363 r->lnum = lnum; 364 r->offs = offs; 365 r->len = len; 366 r->sqnum = sqnum; 367 r->flags = (deletion ? REPLAY_DELETION : 0); 368 r->old_size = old_size; 369 r->new_size = new_size; 370 key_copy(c, key, &r->key); 371 372 rb_link_node(&r->rb, parent, p); 373 rb_insert_color(&r->rb, &c->replay_tree); 374 return 0; 375 } 376 377 /** 378 * insert_dent - insert a directory entry node into the replay tree. 379 * @c: UBIFS file-system description object 380 * @lnum: node logical eraseblock number 381 * @offs: node offset 382 * @len: node length 383 * @key: node key 384 * @name: directory entry name 385 * @nlen: directory entry name length 386 * @sqnum: sequence number 387 * @deletion: non-zero if this is a deletion 388 * @used: number of bytes in use in a LEB 389 * 390 * This function inserts a scanned directory entry node to the replay tree. 391 * Returns zero in case of success and a negative error code in case of 392 * failure. 393 * 394 * This function is also used for extended attribute entries because they are 395 * implemented as directory entry nodes. 396 */ 397 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 398 union ubifs_key *key, const char *name, int nlen, 399 unsigned long long sqnum, int deletion, int *used) 400 { 401 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 402 struct replay_entry *r; 403 char *nbuf; 404 405 if (key_inum(c, key) >= c->highest_inum) 406 c->highest_inum = key_inum(c, key); 407 408 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 409 while (*p) { 410 parent = *p; 411 r = rb_entry(parent, struct replay_entry, rb); 412 if (sqnum < r->sqnum) { 413 p = &(*p)->rb_left; 414 continue; 415 } 416 if (sqnum > r->sqnum) { 417 p = &(*p)->rb_right; 418 continue; 419 } 420 ubifs_err("duplicate sqnum in replay"); 421 return -EINVAL; 422 } 423 424 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 425 if (!r) 426 return -ENOMEM; 427 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 428 if (!nbuf) { 429 kfree(r); 430 return -ENOMEM; 431 } 432 433 if (!deletion) 434 *used += ALIGN(len, 8); 435 r->lnum = lnum; 436 r->offs = offs; 437 r->len = len; 438 r->sqnum = sqnum; 439 r->nm.len = nlen; 440 memcpy(nbuf, name, nlen); 441 nbuf[nlen] = '\0'; 442 r->nm.name = nbuf; 443 r->flags = (deletion ? REPLAY_DELETION : 0); 444 key_copy(c, key, &r->key); 445 446 ubifs_assert(!*p); 447 rb_link_node(&r->rb, parent, p); 448 rb_insert_color(&r->rb, &c->replay_tree); 449 return 0; 450 } 451 452 /** 453 * ubifs_validate_entry - validate directory or extended attribute entry node. 454 * @c: UBIFS file-system description object 455 * @dent: the node to validate 456 * 457 * This function validates directory or extended attribute entry node @dent. 458 * Returns zero if the node is all right and a %-EINVAL if not. 459 */ 460 int ubifs_validate_entry(struct ubifs_info *c, 461 const struct ubifs_dent_node *dent) 462 { 463 int key_type = key_type_flash(c, dent->key); 464 int nlen = le16_to_cpu(dent->nlen); 465 466 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 467 dent->type >= UBIFS_ITYPES_CNT || 468 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 469 strnlen(dent->name, nlen) != nlen || 470 le64_to_cpu(dent->inum) > MAX_INUM) { 471 ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? 472 "directory entry" : "extended attribute entry"); 473 return -EINVAL; 474 } 475 476 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 477 ubifs_err("bad key type %d", key_type); 478 return -EINVAL; 479 } 480 481 return 0; 482 } 483 484 /** 485 * replay_bud - replay a bud logical eraseblock. 486 * @c: UBIFS file-system description object 487 * @lnum: bud logical eraseblock number to replay 488 * @offs: bud start offset 489 * @jhead: journal head to which this bud belongs 490 * @free: amount of free space in the bud is returned here 491 * @dirty: amount of dirty space from padding and deletion nodes is returned 492 * here 493 * 494 * This function returns zero in case of success and a negative error code in 495 * case of failure. 496 */ 497 static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 498 int *free, int *dirty) 499 { 500 int err = 0, used = 0; 501 struct ubifs_scan_leb *sleb; 502 struct ubifs_scan_node *snod; 503 struct ubifs_bud *bud; 504 505 dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); 506 if (c->need_recovery) 507 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); 508 else 509 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 510 if (IS_ERR(sleb)) 511 return PTR_ERR(sleb); 512 513 /* 514 * The bud does not have to start from offset zero - the beginning of 515 * the 'lnum' LEB may contain previously committed data. One of the 516 * things we have to do in replay is to correctly update lprops with 517 * newer information about this LEB. 518 * 519 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 520 * bytes of free space because it only contain information about 521 * committed data. 522 * 523 * But we know that real amount of free space is 'c->leb_size - 524 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 525 * 'sleb->endpt' is used by bud data. We have to correctly calculate 526 * how much of these data are dirty and update lprops with this 527 * information. 528 * 529 * The dirt in that LEB region is comprised of padding nodes, deletion 530 * nodes, truncation nodes and nodes which are obsoleted by subsequent 531 * nodes in this LEB. So instead of calculating clean space, we 532 * calculate used space ('used' variable). 533 */ 534 535 list_for_each_entry(snod, &sleb->nodes, list) { 536 int deletion = 0; 537 538 cond_resched(); 539 540 if (snod->sqnum >= SQNUM_WATERMARK) { 541 ubifs_err("file system's life ended"); 542 goto out_dump; 543 } 544 545 if (snod->sqnum > c->max_sqnum) 546 c->max_sqnum = snod->sqnum; 547 548 switch (snod->type) { 549 case UBIFS_INO_NODE: 550 { 551 struct ubifs_ino_node *ino = snod->node; 552 loff_t new_size = le64_to_cpu(ino->size); 553 554 if (le32_to_cpu(ino->nlink) == 0) 555 deletion = 1; 556 err = insert_node(c, lnum, snod->offs, snod->len, 557 &snod->key, snod->sqnum, deletion, 558 &used, 0, new_size); 559 break; 560 } 561 case UBIFS_DATA_NODE: 562 { 563 struct ubifs_data_node *dn = snod->node; 564 loff_t new_size = le32_to_cpu(dn->size) + 565 key_block(c, &snod->key) * 566 UBIFS_BLOCK_SIZE; 567 568 err = insert_node(c, lnum, snod->offs, snod->len, 569 &snod->key, snod->sqnum, deletion, 570 &used, 0, new_size); 571 break; 572 } 573 case UBIFS_DENT_NODE: 574 case UBIFS_XENT_NODE: 575 { 576 struct ubifs_dent_node *dent = snod->node; 577 578 err = ubifs_validate_entry(c, dent); 579 if (err) 580 goto out_dump; 581 582 err = insert_dent(c, lnum, snod->offs, snod->len, 583 &snod->key, dent->name, 584 le16_to_cpu(dent->nlen), snod->sqnum, 585 !le64_to_cpu(dent->inum), &used); 586 break; 587 } 588 case UBIFS_TRUN_NODE: 589 { 590 struct ubifs_trun_node *trun = snod->node; 591 loff_t old_size = le64_to_cpu(trun->old_size); 592 loff_t new_size = le64_to_cpu(trun->new_size); 593 union ubifs_key key; 594 595 /* Validate truncation node */ 596 if (old_size < 0 || old_size > c->max_inode_sz || 597 new_size < 0 || new_size > c->max_inode_sz || 598 old_size <= new_size) { 599 ubifs_err("bad truncation node"); 600 goto out_dump; 601 } 602 603 /* 604 * Create a fake truncation key just to use the same 605 * functions which expect nodes to have keys. 606 */ 607 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 608 err = insert_node(c, lnum, snod->offs, snod->len, 609 &key, snod->sqnum, 1, &used, 610 old_size, new_size); 611 break; 612 } 613 default: 614 ubifs_err("unexpected node type %d in bud LEB %d:%d", 615 snod->type, lnum, snod->offs); 616 err = -EINVAL; 617 goto out_dump; 618 } 619 if (err) 620 goto out; 621 } 622 623 bud = ubifs_search_bud(c, lnum); 624 if (!bud) 625 BUG(); 626 627 ubifs_assert(sleb->endpt - offs >= used); 628 ubifs_assert(sleb->endpt % c->min_io_size == 0); 629 630 if (sleb->endpt + c->min_io_size <= c->leb_size && 631 !(c->vfs_sb->s_flags & MS_RDONLY)) 632 err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, 633 sleb->endpt, UBI_SHORTTERM); 634 635 *dirty = sleb->endpt - offs - used; 636 *free = c->leb_size - sleb->endpt; 637 638 out: 639 ubifs_scan_destroy(sleb); 640 return err; 641 642 out_dump: 643 ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); 644 dbg_dump_node(c, snod->node); 645 ubifs_scan_destroy(sleb); 646 return -EINVAL; 647 } 648 649 /** 650 * insert_ref_node - insert a reference node to the replay tree. 651 * @c: UBIFS file-system description object 652 * @lnum: node logical eraseblock number 653 * @offs: node offset 654 * @sqnum: sequence number 655 * @free: amount of free space in bud 656 * @dirty: amount of dirty space from padding and deletion nodes 657 * 658 * This function inserts a reference node to the replay tree and returns zero 659 * in case of success or a negative error code in case of failure. 660 */ 661 static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, 662 unsigned long long sqnum, int free, int dirty) 663 { 664 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 665 struct replay_entry *r; 666 667 dbg_mnt("add ref LEB %d:%d", lnum, offs); 668 while (*p) { 669 parent = *p; 670 r = rb_entry(parent, struct replay_entry, rb); 671 if (sqnum < r->sqnum) { 672 p = &(*p)->rb_left; 673 continue; 674 } else if (sqnum > r->sqnum) { 675 p = &(*p)->rb_right; 676 continue; 677 } 678 ubifs_err("duplicate sqnum in replay tree"); 679 return -EINVAL; 680 } 681 682 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 683 if (!r) 684 return -ENOMEM; 685 686 r->lnum = lnum; 687 r->offs = offs; 688 r->sqnum = sqnum; 689 r->flags = REPLAY_REF; 690 r->free = free; 691 r->dirty = dirty; 692 693 rb_link_node(&r->rb, parent, p); 694 rb_insert_color(&r->rb, &c->replay_tree); 695 return 0; 696 } 697 698 /** 699 * replay_buds - replay all buds. 700 * @c: UBIFS file-system description object 701 * 702 * This function returns zero in case of success and a negative error code in 703 * case of failure. 704 */ 705 static int replay_buds(struct ubifs_info *c) 706 { 707 struct bud_entry *b; 708 int err, uninitialized_var(free), uninitialized_var(dirty); 709 710 list_for_each_entry(b, &c->replay_buds, list) { 711 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, 712 &free, &dirty); 713 if (err) 714 return err; 715 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, 716 free, dirty); 717 if (err) 718 return err; 719 } 720 721 return 0; 722 } 723 724 /** 725 * destroy_bud_list - destroy the list of buds to replay. 726 * @c: UBIFS file-system description object 727 */ 728 static void destroy_bud_list(struct ubifs_info *c) 729 { 730 struct bud_entry *b; 731 732 while (!list_empty(&c->replay_buds)) { 733 b = list_entry(c->replay_buds.next, struct bud_entry, list); 734 list_del(&b->list); 735 kfree(b); 736 } 737 } 738 739 /** 740 * add_replay_bud - add a bud to the list of buds to replay. 741 * @c: UBIFS file-system description object 742 * @lnum: bud logical eraseblock number to replay 743 * @offs: bud start offset 744 * @jhead: journal head to which this bud belongs 745 * @sqnum: reference node sequence number 746 * 747 * This function returns zero in case of success and a negative error code in 748 * case of failure. 749 */ 750 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 751 unsigned long long sqnum) 752 { 753 struct ubifs_bud *bud; 754 struct bud_entry *b; 755 756 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 757 758 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 759 if (!bud) 760 return -ENOMEM; 761 762 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 763 if (!b) { 764 kfree(bud); 765 return -ENOMEM; 766 } 767 768 bud->lnum = lnum; 769 bud->start = offs; 770 bud->jhead = jhead; 771 ubifs_add_bud(c, bud); 772 773 b->bud = bud; 774 b->sqnum = sqnum; 775 list_add_tail(&b->list, &c->replay_buds); 776 777 return 0; 778 } 779 780 /** 781 * validate_ref - validate a reference node. 782 * @c: UBIFS file-system description object 783 * @ref: the reference node to validate 784 * @ref_lnum: LEB number of the reference node 785 * @ref_offs: reference node offset 786 * 787 * This function returns %1 if a bud reference already exists for the LEB. %0 is 788 * returned if the reference node is new, otherwise %-EINVAL is returned if 789 * validation failed. 790 */ 791 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 792 { 793 struct ubifs_bud *bud; 794 int lnum = le32_to_cpu(ref->lnum); 795 unsigned int offs = le32_to_cpu(ref->offs); 796 unsigned int jhead = le32_to_cpu(ref->jhead); 797 798 /* 799 * ref->offs may point to the end of LEB when the journal head points 800 * to the end of LEB and we write reference node for it during commit. 801 * So this is why we require 'offs > c->leb_size'. 802 */ 803 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 804 lnum < c->main_first || offs > c->leb_size || 805 offs & (c->min_io_size - 1)) 806 return -EINVAL; 807 808 /* Make sure we have not already looked at this bud */ 809 bud = ubifs_search_bud(c, lnum); 810 if (bud) { 811 if (bud->jhead == jhead && bud->start <= offs) 812 return 1; 813 ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); 814 return -EINVAL; 815 } 816 817 return 0; 818 } 819 820 /** 821 * replay_log_leb - replay a log logical eraseblock. 822 * @c: UBIFS file-system description object 823 * @lnum: log logical eraseblock to replay 824 * @offs: offset to start replaying from 825 * @sbuf: scan buffer 826 * 827 * This function replays a log LEB and returns zero in case of success, %1 if 828 * this is the last LEB in the log, and a negative error code in case of 829 * failure. 830 */ 831 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 832 { 833 int err; 834 struct ubifs_scan_leb *sleb; 835 struct ubifs_scan_node *snod; 836 const struct ubifs_cs_node *node; 837 838 dbg_mnt("replay log LEB %d:%d", lnum, offs); 839 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); 840 if (IS_ERR(sleb)) { 841 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 842 return PTR_ERR(sleb); 843 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 844 if (IS_ERR(sleb)) 845 return PTR_ERR(sleb); 846 } 847 848 if (sleb->nodes_cnt == 0) { 849 err = 1; 850 goto out; 851 } 852 853 node = sleb->buf; 854 855 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 856 if (c->cs_sqnum == 0) { 857 /* 858 * This is the first log LEB we are looking at, make sure that 859 * the first node is a commit start node. Also record its 860 * sequence number so that UBIFS can determine where the log 861 * ends, because all nodes which were have higher sequence 862 * numbers. 863 */ 864 if (snod->type != UBIFS_CS_NODE) { 865 dbg_err("first log node at LEB %d:%d is not CS node", 866 lnum, offs); 867 goto out_dump; 868 } 869 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 870 dbg_err("first CS node at LEB %d:%d has wrong " 871 "commit number %llu expected %llu", 872 lnum, offs, 873 (unsigned long long)le64_to_cpu(node->cmt_no), 874 c->cmt_no); 875 goto out_dump; 876 } 877 878 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 879 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 880 } 881 882 if (snod->sqnum < c->cs_sqnum) { 883 /* 884 * This means that we reached end of log and now 885 * look to the older log data, which was already 886 * committed but the eraseblock was not erased (UBIFS 887 * only un-maps it). So this basically means we have to 888 * exit with "end of log" code. 889 */ 890 err = 1; 891 goto out; 892 } 893 894 /* Make sure the first node sits at offset zero of the LEB */ 895 if (snod->offs != 0) { 896 dbg_err("first node is not at zero offset"); 897 goto out_dump; 898 } 899 900 list_for_each_entry(snod, &sleb->nodes, list) { 901 902 cond_resched(); 903 904 if (snod->sqnum >= SQNUM_WATERMARK) { 905 ubifs_err("file system's life ended"); 906 goto out_dump; 907 } 908 909 if (snod->sqnum < c->cs_sqnum) { 910 dbg_err("bad sqnum %llu, commit sqnum %llu", 911 snod->sqnum, c->cs_sqnum); 912 goto out_dump; 913 } 914 915 if (snod->sqnum > c->max_sqnum) 916 c->max_sqnum = snod->sqnum; 917 918 switch (snod->type) { 919 case UBIFS_REF_NODE: { 920 const struct ubifs_ref_node *ref = snod->node; 921 922 err = validate_ref(c, ref); 923 if (err == 1) 924 break; /* Already have this bud */ 925 if (err) 926 goto out_dump; 927 928 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 929 le32_to_cpu(ref->offs), 930 le32_to_cpu(ref->jhead), 931 snod->sqnum); 932 if (err) 933 goto out; 934 935 break; 936 } 937 case UBIFS_CS_NODE: 938 /* Make sure it sits at the beginning of LEB */ 939 if (snod->offs != 0) { 940 ubifs_err("unexpected node in log"); 941 goto out_dump; 942 } 943 break; 944 default: 945 ubifs_err("unexpected node in log"); 946 goto out_dump; 947 } 948 } 949 950 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 951 c->lhead_lnum = lnum; 952 c->lhead_offs = sleb->endpt; 953 } 954 955 err = !sleb->endpt; 956 out: 957 ubifs_scan_destroy(sleb); 958 return err; 959 960 out_dump: 961 ubifs_err("log error detected while replaying the log at LEB %d:%d", 962 lnum, offs + snod->offs); 963 dbg_dump_node(c, snod->node); 964 ubifs_scan_destroy(sleb); 965 return -EINVAL; 966 } 967 968 /** 969 * take_ihead - update the status of the index head in lprops to 'taken'. 970 * @c: UBIFS file-system description object 971 * 972 * This function returns the amount of free space in the index head LEB or a 973 * negative error code. 974 */ 975 static int take_ihead(struct ubifs_info *c) 976 { 977 const struct ubifs_lprops *lp; 978 int err, free; 979 980 ubifs_get_lprops(c); 981 982 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 983 if (IS_ERR(lp)) { 984 err = PTR_ERR(lp); 985 goto out; 986 } 987 988 free = lp->free; 989 990 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 991 lp->flags | LPROPS_TAKEN, 0); 992 if (IS_ERR(lp)) { 993 err = PTR_ERR(lp); 994 goto out; 995 } 996 997 err = free; 998 out: 999 ubifs_release_lprops(c); 1000 return err; 1001 } 1002 1003 /** 1004 * ubifs_replay_journal - replay journal. 1005 * @c: UBIFS file-system description object 1006 * 1007 * This function scans the journal, replays and cleans it up. It makes sure all 1008 * memory data structures related to uncommitted journal are built (dirty TNC 1009 * tree, tree of buds, modified lprops, etc). 1010 */ 1011 int ubifs_replay_journal(struct ubifs_info *c) 1012 { 1013 int err, i, lnum, offs, free; 1014 void *sbuf = NULL; 1015 1016 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1017 1018 /* Update the status of the index head in lprops to 'taken' */ 1019 free = take_ihead(c); 1020 if (free < 0) 1021 return free; /* Error code */ 1022 1023 if (c->ihead_offs != c->leb_size - free) { 1024 ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, 1025 c->ihead_offs); 1026 return -EINVAL; 1027 } 1028 1029 sbuf = vmalloc(c->leb_size); 1030 if (!sbuf) 1031 return -ENOMEM; 1032 1033 dbg_mnt("start replaying the journal"); 1034 1035 c->replaying = 1; 1036 1037 lnum = c->ltail_lnum = c->lhead_lnum; 1038 offs = c->lhead_offs; 1039 1040 for (i = 0; i < c->log_lebs; i++, lnum++) { 1041 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { 1042 /* 1043 * The log is logically circular, we reached the last 1044 * LEB, switch to the first one. 1045 */ 1046 lnum = UBIFS_LOG_LNUM; 1047 offs = 0; 1048 } 1049 err = replay_log_leb(c, lnum, offs, sbuf); 1050 if (err == 1) 1051 /* We hit the end of the log */ 1052 break; 1053 if (err) 1054 goto out; 1055 offs = 0; 1056 } 1057 1058 err = replay_buds(c); 1059 if (err) 1060 goto out; 1061 1062 err = apply_replay_tree(c); 1063 if (err) 1064 goto out; 1065 1066 /* 1067 * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable 1068 * to roughly estimate index growth. Things like @c->min_idx_lebs 1069 * depend on it. This means we have to initialize it to make sure 1070 * budgeting works properly. 1071 */ 1072 c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1073 c->budg_uncommitted_idx *= c->max_idx_node_sz; 1074 1075 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1076 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1077 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1078 (unsigned long)c->highest_inum); 1079 out: 1080 destroy_replay_tree(c); 1081 destroy_bud_list(c); 1082 vfree(sbuf); 1083 c->replaying = 0; 1084 return err; 1085 } 1086