1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * This file is part of UBIFS. 4 * 5 * Copyright (C) 2006-2008 Nokia Corporation. 6 * 7 * Authors: Adrian Hunter 8 * Artem Bityutskiy (Битюцкий Артём) 9 */ 10 11 /* 12 * This file contains journal replay code. It runs when the file-system is being 13 * mounted and requires no locking. 14 * 15 * The larger is the journal, the longer it takes to scan it, so the longer it 16 * takes to mount UBIFS. This is why the journal has limited size which may be 17 * changed depending on the system requirements. But a larger journal gives 18 * faster I/O speed because it writes the index less frequently. So this is a 19 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 20 * larger is the journal, the more memory its index may consume. 21 */ 22 23 #include "ubifs.h" 24 #include <linux/list_sort.h> 25 #include <crypto/hash.h> 26 #include <crypto/algapi.h> 27 28 /** 29 * struct replay_entry - replay list entry. 30 * @lnum: logical eraseblock number of the node 31 * @offs: node offset 32 * @len: node length 33 * @deletion: non-zero if this entry corresponds to a node deletion 34 * @sqnum: node sequence number 35 * @list: links the replay list 36 * @key: node key 37 * @nm: directory entry name 38 * @old_size: truncation old size 39 * @new_size: truncation new size 40 * 41 * The replay process first scans all buds and builds the replay list, then 42 * sorts the replay list in nodes sequence number order, and then inserts all 43 * the replay entries to the TNC. 44 */ 45 struct replay_entry { 46 int lnum; 47 int offs; 48 int len; 49 u8 hash[UBIFS_HASH_ARR_SZ]; 50 unsigned int deletion:1; 51 unsigned long long sqnum; 52 struct list_head list; 53 union ubifs_key key; 54 union { 55 struct fscrypt_name nm; 56 struct { 57 loff_t old_size; 58 loff_t new_size; 59 }; 60 }; 61 }; 62 63 /** 64 * struct bud_entry - entry in the list of buds to replay. 65 * @list: next bud in the list 66 * @bud: bud description object 67 * @sqnum: reference node sequence number 68 * @free: free bytes in the bud 69 * @dirty: dirty bytes in the bud 70 */ 71 struct bud_entry { 72 struct list_head list; 73 struct ubifs_bud *bud; 74 unsigned long long sqnum; 75 int free; 76 int dirty; 77 }; 78 79 /** 80 * set_bud_lprops - set free and dirty space used by a bud. 81 * @c: UBIFS file-system description object 82 * @b: bud entry which describes the bud 83 * 84 * This function makes sure the LEB properties of bud @b are set correctly 85 * after the replay. Returns zero in case of success and a negative error code 86 * in case of failure. 87 */ 88 static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) 89 { 90 const struct ubifs_lprops *lp; 91 int err = 0, dirty; 92 93 ubifs_get_lprops(c); 94 95 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); 96 if (IS_ERR(lp)) { 97 err = PTR_ERR(lp); 98 goto out; 99 } 100 101 dirty = lp->dirty; 102 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 103 /* 104 * The LEB was added to the journal with a starting offset of 105 * zero which means the LEB must have been empty. The LEB 106 * property values should be @lp->free == @c->leb_size and 107 * @lp->dirty == 0, but that is not the case. The reason is that 108 * the LEB had been garbage collected before it became the bud, 109 * and there was not commit inbetween. The garbage collector 110 * resets the free and dirty space without recording it 111 * anywhere except lprops, so if there was no commit then 112 * lprops does not have that information. 113 * 114 * We do not need to adjust free space because the scan has told 115 * us the exact value which is recorded in the replay entry as 116 * @b->free. 117 * 118 * However we do need to subtract from the dirty space the 119 * amount of space that the garbage collector reclaimed, which 120 * is the whole LEB minus the amount of space that was free. 121 */ 122 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 123 lp->free, lp->dirty); 124 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 125 lp->free, lp->dirty); 126 dirty -= c->leb_size - lp->free; 127 /* 128 * If the replay order was perfect the dirty space would now be 129 * zero. The order is not perfect because the journal heads 130 * race with each other. This is not a problem but is does mean 131 * that the dirty space may temporarily exceed c->leb_size 132 * during the replay. 133 */ 134 if (dirty != 0) 135 dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", 136 b->bud->lnum, lp->free, lp->dirty, b->free, 137 b->dirty); 138 } 139 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, 140 lp->flags | LPROPS_TAKEN, 0); 141 if (IS_ERR(lp)) { 142 err = PTR_ERR(lp); 143 goto out; 144 } 145 146 /* Make sure the journal head points to the latest bud */ 147 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, 148 b->bud->lnum, c->leb_size - b->free); 149 150 out: 151 ubifs_release_lprops(c); 152 return err; 153 } 154 155 /** 156 * set_buds_lprops - set free and dirty space for all replayed buds. 157 * @c: UBIFS file-system description object 158 * 159 * This function sets LEB properties for all replayed buds. Returns zero in 160 * case of success and a negative error code in case of failure. 161 */ 162 static int set_buds_lprops(struct ubifs_info *c) 163 { 164 struct bud_entry *b; 165 int err; 166 167 list_for_each_entry(b, &c->replay_buds, list) { 168 err = set_bud_lprops(c, b); 169 if (err) 170 return err; 171 } 172 173 return 0; 174 } 175 176 /** 177 * trun_remove_range - apply a replay entry for a truncation to the TNC. 178 * @c: UBIFS file-system description object 179 * @r: replay entry of truncation 180 */ 181 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 182 { 183 unsigned min_blk, max_blk; 184 union ubifs_key min_key, max_key; 185 ino_t ino; 186 187 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 188 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 189 min_blk += 1; 190 191 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 192 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 193 max_blk -= 1; 194 195 ino = key_inum(c, &r->key); 196 197 data_key_init(c, &min_key, ino, min_blk); 198 data_key_init(c, &max_key, ino, max_blk); 199 200 return ubifs_tnc_remove_range(c, &min_key, &max_key); 201 } 202 203 /** 204 * inode_still_linked - check whether inode in question will be re-linked. 205 * @c: UBIFS file-system description object 206 * @rino: replay entry to test 207 * 208 * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1. 209 * This case needs special care, otherwise all references to the inode will 210 * be removed upon the first replay entry of an inode with link count 0 211 * is found. 212 */ 213 static bool inode_still_linked(struct ubifs_info *c, struct replay_entry *rino) 214 { 215 struct replay_entry *r; 216 217 ubifs_assert(c, rino->deletion); 218 ubifs_assert(c, key_type(c, &rino->key) == UBIFS_INO_KEY); 219 220 /* 221 * Find the most recent entry for the inode behind @rino and check 222 * whether it is a deletion. 223 */ 224 list_for_each_entry_reverse(r, &c->replay_list, list) { 225 ubifs_assert(c, r->sqnum >= rino->sqnum); 226 if (key_inum(c, &r->key) == key_inum(c, &rino->key)) 227 return r->deletion == 0; 228 229 } 230 231 ubifs_assert(c, 0); 232 return false; 233 } 234 235 /** 236 * apply_replay_entry - apply a replay entry to the TNC. 237 * @c: UBIFS file-system description object 238 * @r: replay entry to apply 239 * 240 * Apply a replay entry to the TNC. 241 */ 242 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 243 { 244 int err; 245 246 dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", 247 r->lnum, r->offs, r->len, r->deletion, r->sqnum); 248 249 if (is_hash_key(c, &r->key)) { 250 if (r->deletion) 251 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 252 else 253 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 254 r->len, r->hash, &r->nm); 255 } else { 256 if (r->deletion) 257 switch (key_type(c, &r->key)) { 258 case UBIFS_INO_KEY: 259 { 260 ino_t inum = key_inum(c, &r->key); 261 262 if (inode_still_linked(c, r)) { 263 err = 0; 264 break; 265 } 266 267 err = ubifs_tnc_remove_ino(c, inum); 268 break; 269 } 270 case UBIFS_TRUN_KEY: 271 err = trun_remove_range(c, r); 272 break; 273 default: 274 err = ubifs_tnc_remove(c, &r->key); 275 break; 276 } 277 else 278 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 279 r->len, r->hash); 280 if (err) 281 return err; 282 283 if (c->need_recovery) 284 err = ubifs_recover_size_accum(c, &r->key, r->deletion, 285 r->new_size); 286 } 287 288 return err; 289 } 290 291 /** 292 * replay_entries_cmp - compare 2 replay entries. 293 * @priv: UBIFS file-system description object 294 * @a: first replay entry 295 * @b: second replay entry 296 * 297 * This is a comparios function for 'list_sort()' which compares 2 replay 298 * entries @a and @b by comparing their sequence numer. Returns %1 if @a has 299 * greater sequence number and %-1 otherwise. 300 */ 301 static int replay_entries_cmp(void *priv, struct list_head *a, 302 struct list_head *b) 303 { 304 struct ubifs_info *c = priv; 305 struct replay_entry *ra, *rb; 306 307 cond_resched(); 308 if (a == b) 309 return 0; 310 311 ra = list_entry(a, struct replay_entry, list); 312 rb = list_entry(b, struct replay_entry, list); 313 ubifs_assert(c, ra->sqnum != rb->sqnum); 314 if (ra->sqnum > rb->sqnum) 315 return 1; 316 return -1; 317 } 318 319 /** 320 * apply_replay_list - apply the replay list to the TNC. 321 * @c: UBIFS file-system description object 322 * 323 * Apply all entries in the replay list to the TNC. Returns zero in case of 324 * success and a negative error code in case of failure. 325 */ 326 static int apply_replay_list(struct ubifs_info *c) 327 { 328 struct replay_entry *r; 329 int err; 330 331 list_sort(c, &c->replay_list, &replay_entries_cmp); 332 333 list_for_each_entry(r, &c->replay_list, list) { 334 cond_resched(); 335 336 err = apply_replay_entry(c, r); 337 if (err) 338 return err; 339 } 340 341 return 0; 342 } 343 344 /** 345 * destroy_replay_list - destroy the replay. 346 * @c: UBIFS file-system description object 347 * 348 * Destroy the replay list. 349 */ 350 static void destroy_replay_list(struct ubifs_info *c) 351 { 352 struct replay_entry *r, *tmp; 353 354 list_for_each_entry_safe(r, tmp, &c->replay_list, list) { 355 if (is_hash_key(c, &r->key)) 356 kfree(fname_name(&r->nm)); 357 list_del(&r->list); 358 kfree(r); 359 } 360 } 361 362 /** 363 * insert_node - insert a node to the replay list 364 * @c: UBIFS file-system description object 365 * @lnum: node logical eraseblock number 366 * @offs: node offset 367 * @len: node length 368 * @key: node key 369 * @sqnum: sequence number 370 * @deletion: non-zero if this is a deletion 371 * @used: number of bytes in use in a LEB 372 * @old_size: truncation old size 373 * @new_size: truncation new size 374 * 375 * This function inserts a scanned non-direntry node to the replay list. The 376 * replay list contains @struct replay_entry elements, and we sort this list in 377 * sequence number order before applying it. The replay list is applied at the 378 * very end of the replay process. Since the list is sorted in sequence number 379 * order, the older modifications are applied first. This function returns zero 380 * in case of success and a negative error code in case of failure. 381 */ 382 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 383 const u8 *hash, union ubifs_key *key, 384 unsigned long long sqnum, int deletion, int *used, 385 loff_t old_size, loff_t new_size) 386 { 387 struct replay_entry *r; 388 389 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 390 391 if (key_inum(c, key) >= c->highest_inum) 392 c->highest_inum = key_inum(c, key); 393 394 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 395 if (!r) 396 return -ENOMEM; 397 398 if (!deletion) 399 *used += ALIGN(len, 8); 400 r->lnum = lnum; 401 r->offs = offs; 402 r->len = len; 403 ubifs_copy_hash(c, hash, r->hash); 404 r->deletion = !!deletion; 405 r->sqnum = sqnum; 406 key_copy(c, key, &r->key); 407 r->old_size = old_size; 408 r->new_size = new_size; 409 410 list_add_tail(&r->list, &c->replay_list); 411 return 0; 412 } 413 414 /** 415 * insert_dent - insert a directory entry node into the replay list. 416 * @c: UBIFS file-system description object 417 * @lnum: node logical eraseblock number 418 * @offs: node offset 419 * @len: node length 420 * @key: node key 421 * @name: directory entry name 422 * @nlen: directory entry name length 423 * @sqnum: sequence number 424 * @deletion: non-zero if this is a deletion 425 * @used: number of bytes in use in a LEB 426 * 427 * This function inserts a scanned directory entry node or an extended 428 * attribute entry to the replay list. Returns zero in case of success and a 429 * negative error code in case of failure. 430 */ 431 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 432 const u8 *hash, union ubifs_key *key, 433 const char *name, int nlen, unsigned long long sqnum, 434 int deletion, int *used) 435 { 436 struct replay_entry *r; 437 char *nbuf; 438 439 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 440 if (key_inum(c, key) >= c->highest_inum) 441 c->highest_inum = key_inum(c, key); 442 443 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 444 if (!r) 445 return -ENOMEM; 446 447 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 448 if (!nbuf) { 449 kfree(r); 450 return -ENOMEM; 451 } 452 453 if (!deletion) 454 *used += ALIGN(len, 8); 455 r->lnum = lnum; 456 r->offs = offs; 457 r->len = len; 458 ubifs_copy_hash(c, hash, r->hash); 459 r->deletion = !!deletion; 460 r->sqnum = sqnum; 461 key_copy(c, key, &r->key); 462 fname_len(&r->nm) = nlen; 463 memcpy(nbuf, name, nlen); 464 nbuf[nlen] = '\0'; 465 fname_name(&r->nm) = nbuf; 466 467 list_add_tail(&r->list, &c->replay_list); 468 return 0; 469 } 470 471 /** 472 * ubifs_validate_entry - validate directory or extended attribute entry node. 473 * @c: UBIFS file-system description object 474 * @dent: the node to validate 475 * 476 * This function validates directory or extended attribute entry node @dent. 477 * Returns zero if the node is all right and a %-EINVAL if not. 478 */ 479 int ubifs_validate_entry(struct ubifs_info *c, 480 const struct ubifs_dent_node *dent) 481 { 482 int key_type = key_type_flash(c, dent->key); 483 int nlen = le16_to_cpu(dent->nlen); 484 485 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 486 dent->type >= UBIFS_ITYPES_CNT || 487 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 488 (key_type == UBIFS_XENT_KEY && strnlen(dent->name, nlen) != nlen) || 489 le64_to_cpu(dent->inum) > MAX_INUM) { 490 ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ? 491 "directory entry" : "extended attribute entry"); 492 return -EINVAL; 493 } 494 495 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 496 ubifs_err(c, "bad key type %d", key_type); 497 return -EINVAL; 498 } 499 500 return 0; 501 } 502 503 /** 504 * is_last_bud - check if the bud is the last in the journal head. 505 * @c: UBIFS file-system description object 506 * @bud: bud description object 507 * 508 * This function checks if bud @bud is the last bud in its journal head. This 509 * information is then used by 'replay_bud()' to decide whether the bud can 510 * have corruptions or not. Indeed, only last buds can be corrupted by power 511 * cuts. Returns %1 if this is the last bud, and %0 if not. 512 */ 513 static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) 514 { 515 struct ubifs_jhead *jh = &c->jheads[bud->jhead]; 516 struct ubifs_bud *next; 517 uint32_t data; 518 int err; 519 520 if (list_is_last(&bud->list, &jh->buds_list)) 521 return 1; 522 523 /* 524 * The following is a quirk to make sure we work correctly with UBIFS 525 * images used with older UBIFS. 526 * 527 * Normally, the last bud will be the last in the journal head's list 528 * of bud. However, there is one exception if the UBIFS image belongs 529 * to older UBIFS. This is fairly unlikely: one would need to use old 530 * UBIFS, then have a power cut exactly at the right point, and then 531 * try to mount this image with new UBIFS. 532 * 533 * The exception is: it is possible to have 2 buds A and B, A goes 534 * before B, and B is the last, bud B is contains no data, and bud A is 535 * corrupted at the end. The reason is that in older versions when the 536 * journal code switched the next bud (from A to B), it first added a 537 * log reference node for the new bud (B), and only after this it 538 * synchronized the write-buffer of current bud (A). But later this was 539 * changed and UBIFS started to always synchronize the write-buffer of 540 * the bud (A) before writing the log reference for the new bud (B). 541 * 542 * But because older UBIFS always synchronized A's write-buffer before 543 * writing to B, we can recognize this exceptional situation but 544 * checking the contents of bud B - if it is empty, then A can be 545 * treated as the last and we can recover it. 546 * 547 * TODO: remove this piece of code in a couple of years (today it is 548 * 16.05.2011). 549 */ 550 next = list_entry(bud->list.next, struct ubifs_bud, list); 551 if (!list_is_last(&next->list, &jh->buds_list)) 552 return 0; 553 554 err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); 555 if (err) 556 return 0; 557 558 return data == 0xFFFFFFFF; 559 } 560 561 /* authenticate_sleb_hash and authenticate_sleb_hmac are split out for stack usage */ 562 static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_hash, u8 *hash) 563 { 564 SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm); 565 566 hash_desc->tfm = c->hash_tfm; 567 568 ubifs_shash_copy_state(c, log_hash, hash_desc); 569 return crypto_shash_final(hash_desc, hash); 570 } 571 572 static int authenticate_sleb_hmac(struct ubifs_info *c, u8 *hash, u8 *hmac) 573 { 574 SHASH_DESC_ON_STACK(hmac_desc, c->hmac_tfm); 575 576 hmac_desc->tfm = c->hmac_tfm; 577 578 return crypto_shash_digest(hmac_desc, hash, c->hash_len, hmac); 579 } 580 581 /** 582 * authenticate_sleb - authenticate one scan LEB 583 * @c: UBIFS file-system description object 584 * @sleb: the scan LEB to authenticate 585 * @log_hash: 586 * @is_last: if true, this is is the last LEB 587 * 588 * This function iterates over the buds of a single LEB authenticating all buds 589 * with the authentication nodes on this LEB. Authentication nodes are written 590 * after some buds and contain a HMAC covering the authentication node itself 591 * and the buds between the last authentication node and the current 592 * authentication node. It can happen that the last buds cannot be authenticated 593 * because a powercut happened when some nodes were written but not the 594 * corresponding authentication node. This function returns the number of nodes 595 * that could be authenticated or a negative error code. 596 */ 597 static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, 598 struct shash_desc *log_hash, int is_last) 599 { 600 int n_not_auth = 0; 601 struct ubifs_scan_node *snod; 602 int n_nodes = 0; 603 int err; 604 u8 *hash, *hmac; 605 606 if (!ubifs_authenticated(c)) 607 return sleb->nodes_cnt; 608 609 hash = kmalloc(crypto_shash_descsize(c->hash_tfm), GFP_NOFS); 610 hmac = kmalloc(c->hmac_desc_len, GFP_NOFS); 611 if (!hash || !hmac) { 612 err = -ENOMEM; 613 goto out; 614 } 615 616 list_for_each_entry(snod, &sleb->nodes, list) { 617 618 n_nodes++; 619 620 if (snod->type == UBIFS_AUTH_NODE) { 621 struct ubifs_auth_node *auth = snod->node; 622 623 err = authenticate_sleb_hash(c, log_hash, hash); 624 if (err) 625 goto out; 626 627 err = authenticate_sleb_hmac(c, hash, hmac); 628 if (err) 629 goto out; 630 631 err = ubifs_check_hmac(c, auth->hmac, hmac); 632 if (err) { 633 err = -EPERM; 634 goto out; 635 } 636 n_not_auth = 0; 637 } else { 638 err = crypto_shash_update(log_hash, snod->node, 639 snod->len); 640 if (err) 641 goto out; 642 n_not_auth++; 643 } 644 } 645 646 /* 647 * A powercut can happen when some nodes were written, but not yet 648 * the corresponding authentication node. This may only happen on 649 * the last bud though. 650 */ 651 if (n_not_auth) { 652 if (is_last) { 653 dbg_mnt("%d unauthenticated nodes found on LEB %d, Ignoring them", 654 n_not_auth, sleb->lnum); 655 err = 0; 656 } else { 657 dbg_mnt("%d unauthenticated nodes found on non-last LEB %d", 658 n_not_auth, sleb->lnum); 659 err = -EPERM; 660 } 661 } else { 662 err = 0; 663 } 664 out: 665 kfree(hash); 666 kfree(hmac); 667 668 return err ? err : n_nodes - n_not_auth; 669 } 670 671 /** 672 * replay_bud - replay a bud logical eraseblock. 673 * @c: UBIFS file-system description object 674 * @b: bud entry which describes the bud 675 * 676 * This function replays bud @bud, recovers it if needed, and adds all nodes 677 * from this bud to the replay list. Returns zero in case of success and a 678 * negative error code in case of failure. 679 */ 680 static int replay_bud(struct ubifs_info *c, struct bud_entry *b) 681 { 682 int is_last = is_last_bud(c, b->bud); 683 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; 684 int n_nodes, n = 0; 685 struct ubifs_scan_leb *sleb; 686 struct ubifs_scan_node *snod; 687 688 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", 689 lnum, b->bud->jhead, offs, is_last); 690 691 if (c->need_recovery && is_last) 692 /* 693 * Recover only last LEBs in the journal heads, because power 694 * cuts may cause corruptions only in these LEBs, because only 695 * these LEBs could possibly be written to at the power cut 696 * time. 697 */ 698 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); 699 else 700 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 701 if (IS_ERR(sleb)) 702 return PTR_ERR(sleb); 703 704 n_nodes = authenticate_sleb(c, sleb, b->bud->log_hash, is_last); 705 if (n_nodes < 0) { 706 err = n_nodes; 707 goto out; 708 } 709 710 ubifs_shash_copy_state(c, b->bud->log_hash, 711 c->jheads[b->bud->jhead].log_hash); 712 713 /* 714 * The bud does not have to start from offset zero - the beginning of 715 * the 'lnum' LEB may contain previously committed data. One of the 716 * things we have to do in replay is to correctly update lprops with 717 * newer information about this LEB. 718 * 719 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 720 * bytes of free space because it only contain information about 721 * committed data. 722 * 723 * But we know that real amount of free space is 'c->leb_size - 724 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 725 * 'sleb->endpt' is used by bud data. We have to correctly calculate 726 * how much of these data are dirty and update lprops with this 727 * information. 728 * 729 * The dirt in that LEB region is comprised of padding nodes, deletion 730 * nodes, truncation nodes and nodes which are obsoleted by subsequent 731 * nodes in this LEB. So instead of calculating clean space, we 732 * calculate used space ('used' variable). 733 */ 734 735 list_for_each_entry(snod, &sleb->nodes, list) { 736 u8 hash[UBIFS_HASH_ARR_SZ]; 737 int deletion = 0; 738 739 cond_resched(); 740 741 if (snod->sqnum >= SQNUM_WATERMARK) { 742 ubifs_err(c, "file system's life ended"); 743 goto out_dump; 744 } 745 746 ubifs_node_calc_hash(c, snod->node, hash); 747 748 if (snod->sqnum > c->max_sqnum) 749 c->max_sqnum = snod->sqnum; 750 751 switch (snod->type) { 752 case UBIFS_INO_NODE: 753 { 754 struct ubifs_ino_node *ino = snod->node; 755 loff_t new_size = le64_to_cpu(ino->size); 756 757 if (le32_to_cpu(ino->nlink) == 0) 758 deletion = 1; 759 err = insert_node(c, lnum, snod->offs, snod->len, hash, 760 &snod->key, snod->sqnum, deletion, 761 &used, 0, new_size); 762 break; 763 } 764 case UBIFS_DATA_NODE: 765 { 766 struct ubifs_data_node *dn = snod->node; 767 loff_t new_size = le32_to_cpu(dn->size) + 768 key_block(c, &snod->key) * 769 UBIFS_BLOCK_SIZE; 770 771 err = insert_node(c, lnum, snod->offs, snod->len, hash, 772 &snod->key, snod->sqnum, deletion, 773 &used, 0, new_size); 774 break; 775 } 776 case UBIFS_DENT_NODE: 777 case UBIFS_XENT_NODE: 778 { 779 struct ubifs_dent_node *dent = snod->node; 780 781 err = ubifs_validate_entry(c, dent); 782 if (err) 783 goto out_dump; 784 785 err = insert_dent(c, lnum, snod->offs, snod->len, hash, 786 &snod->key, dent->name, 787 le16_to_cpu(dent->nlen), snod->sqnum, 788 !le64_to_cpu(dent->inum), &used); 789 break; 790 } 791 case UBIFS_TRUN_NODE: 792 { 793 struct ubifs_trun_node *trun = snod->node; 794 loff_t old_size = le64_to_cpu(trun->old_size); 795 loff_t new_size = le64_to_cpu(trun->new_size); 796 union ubifs_key key; 797 798 /* Validate truncation node */ 799 if (old_size < 0 || old_size > c->max_inode_sz || 800 new_size < 0 || new_size > c->max_inode_sz || 801 old_size <= new_size) { 802 ubifs_err(c, "bad truncation node"); 803 goto out_dump; 804 } 805 806 /* 807 * Create a fake truncation key just to use the same 808 * functions which expect nodes to have keys. 809 */ 810 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 811 err = insert_node(c, lnum, snod->offs, snod->len, hash, 812 &key, snod->sqnum, 1, &used, 813 old_size, new_size); 814 break; 815 } 816 case UBIFS_AUTH_NODE: 817 break; 818 default: 819 ubifs_err(c, "unexpected node type %d in bud LEB %d:%d", 820 snod->type, lnum, snod->offs); 821 err = -EINVAL; 822 goto out_dump; 823 } 824 if (err) 825 goto out; 826 827 n++; 828 if (n == n_nodes) 829 break; 830 } 831 832 ubifs_assert(c, ubifs_search_bud(c, lnum)); 833 ubifs_assert(c, sleb->endpt - offs >= used); 834 ubifs_assert(c, sleb->endpt % c->min_io_size == 0); 835 836 b->dirty = sleb->endpt - offs - used; 837 b->free = c->leb_size - sleb->endpt; 838 dbg_mnt("bud LEB %d replied: dirty %d, free %d", 839 lnum, b->dirty, b->free); 840 841 out: 842 ubifs_scan_destroy(sleb); 843 return err; 844 845 out_dump: 846 ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); 847 ubifs_dump_node(c, snod->node); 848 ubifs_scan_destroy(sleb); 849 return -EINVAL; 850 } 851 852 /** 853 * replay_buds - replay all buds. 854 * @c: UBIFS file-system description object 855 * 856 * This function returns zero in case of success and a negative error code in 857 * case of failure. 858 */ 859 static int replay_buds(struct ubifs_info *c) 860 { 861 struct bud_entry *b; 862 int err; 863 unsigned long long prev_sqnum = 0; 864 865 list_for_each_entry(b, &c->replay_buds, list) { 866 err = replay_bud(c, b); 867 if (err) 868 return err; 869 870 ubifs_assert(c, b->sqnum > prev_sqnum); 871 prev_sqnum = b->sqnum; 872 } 873 874 return 0; 875 } 876 877 /** 878 * destroy_bud_list - destroy the list of buds to replay. 879 * @c: UBIFS file-system description object 880 */ 881 static void destroy_bud_list(struct ubifs_info *c) 882 { 883 struct bud_entry *b; 884 885 while (!list_empty(&c->replay_buds)) { 886 b = list_entry(c->replay_buds.next, struct bud_entry, list); 887 list_del(&b->list); 888 kfree(b); 889 } 890 } 891 892 /** 893 * add_replay_bud - add a bud to the list of buds to replay. 894 * @c: UBIFS file-system description object 895 * @lnum: bud logical eraseblock number to replay 896 * @offs: bud start offset 897 * @jhead: journal head to which this bud belongs 898 * @sqnum: reference node sequence number 899 * 900 * This function returns zero in case of success and a negative error code in 901 * case of failure. 902 */ 903 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 904 unsigned long long sqnum) 905 { 906 struct ubifs_bud *bud; 907 struct bud_entry *b; 908 int err; 909 910 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 911 912 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 913 if (!bud) 914 return -ENOMEM; 915 916 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 917 if (!b) { 918 err = -ENOMEM; 919 goto out; 920 } 921 922 bud->lnum = lnum; 923 bud->start = offs; 924 bud->jhead = jhead; 925 bud->log_hash = ubifs_hash_get_desc(c); 926 if (IS_ERR(bud->log_hash)) { 927 err = PTR_ERR(bud->log_hash); 928 goto out; 929 } 930 931 ubifs_shash_copy_state(c, c->log_hash, bud->log_hash); 932 933 ubifs_add_bud(c, bud); 934 935 b->bud = bud; 936 b->sqnum = sqnum; 937 list_add_tail(&b->list, &c->replay_buds); 938 939 return 0; 940 out: 941 kfree(bud); 942 kfree(b); 943 944 return err; 945 } 946 947 /** 948 * validate_ref - validate a reference node. 949 * @c: UBIFS file-system description object 950 * @ref: the reference node to validate 951 * @ref_lnum: LEB number of the reference node 952 * @ref_offs: reference node offset 953 * 954 * This function returns %1 if a bud reference already exists for the LEB. %0 is 955 * returned if the reference node is new, otherwise %-EINVAL is returned if 956 * validation failed. 957 */ 958 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 959 { 960 struct ubifs_bud *bud; 961 int lnum = le32_to_cpu(ref->lnum); 962 unsigned int offs = le32_to_cpu(ref->offs); 963 unsigned int jhead = le32_to_cpu(ref->jhead); 964 965 /* 966 * ref->offs may point to the end of LEB when the journal head points 967 * to the end of LEB and we write reference node for it during commit. 968 * So this is why we require 'offs > c->leb_size'. 969 */ 970 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 971 lnum < c->main_first || offs > c->leb_size || 972 offs & (c->min_io_size - 1)) 973 return -EINVAL; 974 975 /* Make sure we have not already looked at this bud */ 976 bud = ubifs_search_bud(c, lnum); 977 if (bud) { 978 if (bud->jhead == jhead && bud->start <= offs) 979 return 1; 980 ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs); 981 return -EINVAL; 982 } 983 984 return 0; 985 } 986 987 /** 988 * replay_log_leb - replay a log logical eraseblock. 989 * @c: UBIFS file-system description object 990 * @lnum: log logical eraseblock to replay 991 * @offs: offset to start replaying from 992 * @sbuf: scan buffer 993 * 994 * This function replays a log LEB and returns zero in case of success, %1 if 995 * this is the last LEB in the log, and a negative error code in case of 996 * failure. 997 */ 998 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 999 { 1000 int err; 1001 struct ubifs_scan_leb *sleb; 1002 struct ubifs_scan_node *snod; 1003 const struct ubifs_cs_node *node; 1004 1005 dbg_mnt("replay log LEB %d:%d", lnum, offs); 1006 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); 1007 if (IS_ERR(sleb)) { 1008 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 1009 return PTR_ERR(sleb); 1010 /* 1011 * Note, the below function will recover this log LEB only if 1012 * it is the last, because unclean reboots can possibly corrupt 1013 * only the tail of the log. 1014 */ 1015 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 1016 if (IS_ERR(sleb)) 1017 return PTR_ERR(sleb); 1018 } 1019 1020 if (sleb->nodes_cnt == 0) { 1021 err = 1; 1022 goto out; 1023 } 1024 1025 node = sleb->buf; 1026 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 1027 if (c->cs_sqnum == 0) { 1028 /* 1029 * This is the first log LEB we are looking at, make sure that 1030 * the first node is a commit start node. Also record its 1031 * sequence number so that UBIFS can determine where the log 1032 * ends, because all nodes which were have higher sequence 1033 * numbers. 1034 */ 1035 if (snod->type != UBIFS_CS_NODE) { 1036 ubifs_err(c, "first log node at LEB %d:%d is not CS node", 1037 lnum, offs); 1038 goto out_dump; 1039 } 1040 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 1041 ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu", 1042 lnum, offs, 1043 (unsigned long long)le64_to_cpu(node->cmt_no), 1044 c->cmt_no); 1045 goto out_dump; 1046 } 1047 1048 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 1049 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 1050 1051 err = ubifs_shash_init(c, c->log_hash); 1052 if (err) 1053 goto out; 1054 1055 err = ubifs_shash_update(c, c->log_hash, node, UBIFS_CS_NODE_SZ); 1056 if (err < 0) 1057 goto out; 1058 } 1059 1060 if (snod->sqnum < c->cs_sqnum) { 1061 /* 1062 * This means that we reached end of log and now 1063 * look to the older log data, which was already 1064 * committed but the eraseblock was not erased (UBIFS 1065 * only un-maps it). So this basically means we have to 1066 * exit with "end of log" code. 1067 */ 1068 err = 1; 1069 goto out; 1070 } 1071 1072 /* Make sure the first node sits at offset zero of the LEB */ 1073 if (snod->offs != 0) { 1074 ubifs_err(c, "first node is not at zero offset"); 1075 goto out_dump; 1076 } 1077 1078 list_for_each_entry(snod, &sleb->nodes, list) { 1079 cond_resched(); 1080 1081 if (snod->sqnum >= SQNUM_WATERMARK) { 1082 ubifs_err(c, "file system's life ended"); 1083 goto out_dump; 1084 } 1085 1086 if (snod->sqnum < c->cs_sqnum) { 1087 ubifs_err(c, "bad sqnum %llu, commit sqnum %llu", 1088 snod->sqnum, c->cs_sqnum); 1089 goto out_dump; 1090 } 1091 1092 if (snod->sqnum > c->max_sqnum) 1093 c->max_sqnum = snod->sqnum; 1094 1095 switch (snod->type) { 1096 case UBIFS_REF_NODE: { 1097 const struct ubifs_ref_node *ref = snod->node; 1098 1099 err = validate_ref(c, ref); 1100 if (err == 1) 1101 break; /* Already have this bud */ 1102 if (err) 1103 goto out_dump; 1104 1105 err = ubifs_shash_update(c, c->log_hash, ref, 1106 UBIFS_REF_NODE_SZ); 1107 if (err) 1108 goto out; 1109 1110 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 1111 le32_to_cpu(ref->offs), 1112 le32_to_cpu(ref->jhead), 1113 snod->sqnum); 1114 if (err) 1115 goto out; 1116 1117 break; 1118 } 1119 case UBIFS_CS_NODE: 1120 /* Make sure it sits at the beginning of LEB */ 1121 if (snod->offs != 0) { 1122 ubifs_err(c, "unexpected node in log"); 1123 goto out_dump; 1124 } 1125 break; 1126 default: 1127 ubifs_err(c, "unexpected node in log"); 1128 goto out_dump; 1129 } 1130 } 1131 1132 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 1133 c->lhead_lnum = lnum; 1134 c->lhead_offs = sleb->endpt; 1135 } 1136 1137 err = !sleb->endpt; 1138 out: 1139 ubifs_scan_destroy(sleb); 1140 return err; 1141 1142 out_dump: 1143 ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", 1144 lnum, offs + snod->offs); 1145 ubifs_dump_node(c, snod->node); 1146 ubifs_scan_destroy(sleb); 1147 return -EINVAL; 1148 } 1149 1150 /** 1151 * take_ihead - update the status of the index head in lprops to 'taken'. 1152 * @c: UBIFS file-system description object 1153 * 1154 * This function returns the amount of free space in the index head LEB or a 1155 * negative error code. 1156 */ 1157 static int take_ihead(struct ubifs_info *c) 1158 { 1159 const struct ubifs_lprops *lp; 1160 int err, free; 1161 1162 ubifs_get_lprops(c); 1163 1164 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 1165 if (IS_ERR(lp)) { 1166 err = PTR_ERR(lp); 1167 goto out; 1168 } 1169 1170 free = lp->free; 1171 1172 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 1173 lp->flags | LPROPS_TAKEN, 0); 1174 if (IS_ERR(lp)) { 1175 err = PTR_ERR(lp); 1176 goto out; 1177 } 1178 1179 err = free; 1180 out: 1181 ubifs_release_lprops(c); 1182 return err; 1183 } 1184 1185 /** 1186 * ubifs_replay_journal - replay journal. 1187 * @c: UBIFS file-system description object 1188 * 1189 * This function scans the journal, replays and cleans it up. It makes sure all 1190 * memory data structures related to uncommitted journal are built (dirty TNC 1191 * tree, tree of buds, modified lprops, etc). 1192 */ 1193 int ubifs_replay_journal(struct ubifs_info *c) 1194 { 1195 int err, lnum, free; 1196 1197 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1198 1199 /* Update the status of the index head in lprops to 'taken' */ 1200 free = take_ihead(c); 1201 if (free < 0) 1202 return free; /* Error code */ 1203 1204 if (c->ihead_offs != c->leb_size - free) { 1205 ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum, 1206 c->ihead_offs); 1207 return -EINVAL; 1208 } 1209 1210 dbg_mnt("start replaying the journal"); 1211 c->replaying = 1; 1212 lnum = c->ltail_lnum = c->lhead_lnum; 1213 1214 do { 1215 err = replay_log_leb(c, lnum, 0, c->sbuf); 1216 if (err == 1) { 1217 if (lnum != c->lhead_lnum) 1218 /* We hit the end of the log */ 1219 break; 1220 1221 /* 1222 * The head of the log must always start with the 1223 * "commit start" node on a properly formatted UBIFS. 1224 * But we found no nodes at all, which means that 1225 * something went wrong and we cannot proceed mounting 1226 * the file-system. 1227 */ 1228 ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", 1229 lnum, 0); 1230 err = -EINVAL; 1231 } 1232 if (err) 1233 goto out; 1234 lnum = ubifs_next_log_lnum(c, lnum); 1235 } while (lnum != c->ltail_lnum); 1236 1237 err = replay_buds(c); 1238 if (err) 1239 goto out; 1240 1241 err = apply_replay_list(c); 1242 if (err) 1243 goto out; 1244 1245 err = set_buds_lprops(c); 1246 if (err) 1247 goto out; 1248 1249 /* 1250 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable 1251 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs 1252 * depend on it. This means we have to initialize it to make sure 1253 * budgeting works properly. 1254 */ 1255 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1256 c->bi.uncommitted_idx *= c->max_idx_node_sz; 1257 1258 ubifs_assert(c, c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1259 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", 1260 c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1261 (unsigned long)c->highest_inum); 1262 out: 1263 destroy_replay_list(c); 1264 destroy_bud_list(c); 1265 c->replaying = 0; 1266 return err; 1267 } 1268