1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * This file is part of UBIFS. 4 * 5 * Copyright (C) 2006-2008 Nokia Corporation. 6 * 7 * Authors: Adrian Hunter 8 * Artem Bityutskiy (Битюцкий Артём) 9 */ 10 11 /* 12 * This file contains journal replay code. It runs when the file-system is being 13 * mounted and requires no locking. 14 * 15 * The larger is the journal, the longer it takes to scan it, so the longer it 16 * takes to mount UBIFS. This is why the journal has limited size which may be 17 * changed depending on the system requirements. But a larger journal gives 18 * faster I/O speed because it writes the index less frequently. So this is a 19 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 20 * larger is the journal, the more memory its index may consume. 21 */ 22 23 #include "ubifs.h" 24 #include <linux/list_sort.h> 25 #include <crypto/hash.h> 26 #include <crypto/algapi.h> 27 28 /** 29 * struct replay_entry - replay list entry. 30 * @lnum: logical eraseblock number of the node 31 * @offs: node offset 32 * @len: node length 33 * @deletion: non-zero if this entry corresponds to a node deletion 34 * @sqnum: node sequence number 35 * @list: links the replay list 36 * @key: node key 37 * @nm: directory entry name 38 * @old_size: truncation old size 39 * @new_size: truncation new size 40 * 41 * The replay process first scans all buds and builds the replay list, then 42 * sorts the replay list in nodes sequence number order, and then inserts all 43 * the replay entries to the TNC. 44 */ 45 struct replay_entry { 46 int lnum; 47 int offs; 48 int len; 49 u8 hash[UBIFS_HASH_ARR_SZ]; 50 unsigned int deletion:1; 51 unsigned long long sqnum; 52 struct list_head list; 53 union ubifs_key key; 54 union { 55 struct fscrypt_name nm; 56 struct { 57 loff_t old_size; 58 loff_t new_size; 59 }; 60 }; 61 }; 62 63 /** 64 * struct bud_entry - entry in the list of buds to replay. 65 * @list: next bud in the list 66 * @bud: bud description object 67 * @sqnum: reference node sequence number 68 * @free: free bytes in the bud 69 * @dirty: dirty bytes in the bud 70 */ 71 struct bud_entry { 72 struct list_head list; 73 struct ubifs_bud *bud; 74 unsigned long long sqnum; 75 int free; 76 int dirty; 77 }; 78 79 /** 80 * set_bud_lprops - set free and dirty space used by a bud. 81 * @c: UBIFS file-system description object 82 * @b: bud entry which describes the bud 83 * 84 * This function makes sure the LEB properties of bud @b are set correctly 85 * after the replay. Returns zero in case of success and a negative error code 86 * in case of failure. 87 */ 88 static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) 89 { 90 const struct ubifs_lprops *lp; 91 int err = 0, dirty; 92 93 ubifs_get_lprops(c); 94 95 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); 96 if (IS_ERR(lp)) { 97 err = PTR_ERR(lp); 98 goto out; 99 } 100 101 dirty = lp->dirty; 102 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 103 /* 104 * The LEB was added to the journal with a starting offset of 105 * zero which means the LEB must have been empty. The LEB 106 * property values should be @lp->free == @c->leb_size and 107 * @lp->dirty == 0, but that is not the case. The reason is that 108 * the LEB had been garbage collected before it became the bud, 109 * and there was not commit inbetween. The garbage collector 110 * resets the free and dirty space without recording it 111 * anywhere except lprops, so if there was no commit then 112 * lprops does not have that information. 113 * 114 * We do not need to adjust free space because the scan has told 115 * us the exact value which is recorded in the replay entry as 116 * @b->free. 117 * 118 * However we do need to subtract from the dirty space the 119 * amount of space that the garbage collector reclaimed, which 120 * is the whole LEB minus the amount of space that was free. 121 */ 122 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 123 lp->free, lp->dirty); 124 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 125 lp->free, lp->dirty); 126 dirty -= c->leb_size - lp->free; 127 /* 128 * If the replay order was perfect the dirty space would now be 129 * zero. The order is not perfect because the journal heads 130 * race with each other. This is not a problem but is does mean 131 * that the dirty space may temporarily exceed c->leb_size 132 * during the replay. 133 */ 134 if (dirty != 0) 135 dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", 136 b->bud->lnum, lp->free, lp->dirty, b->free, 137 b->dirty); 138 } 139 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, 140 lp->flags | LPROPS_TAKEN, 0); 141 if (IS_ERR(lp)) { 142 err = PTR_ERR(lp); 143 goto out; 144 } 145 146 /* Make sure the journal head points to the latest bud */ 147 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, 148 b->bud->lnum, c->leb_size - b->free); 149 150 out: 151 ubifs_release_lprops(c); 152 return err; 153 } 154 155 /** 156 * set_buds_lprops - set free and dirty space for all replayed buds. 157 * @c: UBIFS file-system description object 158 * 159 * This function sets LEB properties for all replayed buds. Returns zero in 160 * case of success and a negative error code in case of failure. 161 */ 162 static int set_buds_lprops(struct ubifs_info *c) 163 { 164 struct bud_entry *b; 165 int err; 166 167 list_for_each_entry(b, &c->replay_buds, list) { 168 err = set_bud_lprops(c, b); 169 if (err) 170 return err; 171 } 172 173 return 0; 174 } 175 176 /** 177 * trun_remove_range - apply a replay entry for a truncation to the TNC. 178 * @c: UBIFS file-system description object 179 * @r: replay entry of truncation 180 */ 181 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 182 { 183 unsigned min_blk, max_blk; 184 union ubifs_key min_key, max_key; 185 ino_t ino; 186 187 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 188 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 189 min_blk += 1; 190 191 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 192 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 193 max_blk -= 1; 194 195 ino = key_inum(c, &r->key); 196 197 data_key_init(c, &min_key, ino, min_blk); 198 data_key_init(c, &max_key, ino, max_blk); 199 200 return ubifs_tnc_remove_range(c, &min_key, &max_key); 201 } 202 203 /** 204 * inode_still_linked - check whether inode in question will be re-linked. 205 * @c: UBIFS file-system description object 206 * @rino: replay entry to test 207 * 208 * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1. 209 * This case needs special care, otherwise all references to the inode will 210 * be removed upon the first replay entry of an inode with link count 0 211 * is found. 212 */ 213 static bool inode_still_linked(struct ubifs_info *c, struct replay_entry *rino) 214 { 215 struct replay_entry *r; 216 217 ubifs_assert(c, rino->deletion); 218 ubifs_assert(c, key_type(c, &rino->key) == UBIFS_INO_KEY); 219 220 /* 221 * Find the most recent entry for the inode behind @rino and check 222 * whether it is a deletion. 223 */ 224 list_for_each_entry_reverse(r, &c->replay_list, list) { 225 ubifs_assert(c, r->sqnum >= rino->sqnum); 226 if (key_inum(c, &r->key) == key_inum(c, &rino->key)) 227 return r->deletion == 0; 228 229 } 230 231 ubifs_assert(c, 0); 232 return false; 233 } 234 235 /** 236 * apply_replay_entry - apply a replay entry to the TNC. 237 * @c: UBIFS file-system description object 238 * @r: replay entry to apply 239 * 240 * Apply a replay entry to the TNC. 241 */ 242 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 243 { 244 int err; 245 246 dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", 247 r->lnum, r->offs, r->len, r->deletion, r->sqnum); 248 249 if (is_hash_key(c, &r->key)) { 250 if (r->deletion) 251 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 252 else 253 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 254 r->len, r->hash, &r->nm); 255 } else { 256 if (r->deletion) 257 switch (key_type(c, &r->key)) { 258 case UBIFS_INO_KEY: 259 { 260 ino_t inum = key_inum(c, &r->key); 261 262 if (inode_still_linked(c, r)) { 263 err = 0; 264 break; 265 } 266 267 err = ubifs_tnc_remove_ino(c, inum); 268 break; 269 } 270 case UBIFS_TRUN_KEY: 271 err = trun_remove_range(c, r); 272 break; 273 default: 274 err = ubifs_tnc_remove(c, &r->key); 275 break; 276 } 277 else 278 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 279 r->len, r->hash); 280 if (err) 281 return err; 282 283 if (c->need_recovery) 284 err = ubifs_recover_size_accum(c, &r->key, r->deletion, 285 r->new_size); 286 } 287 288 return err; 289 } 290 291 /** 292 * replay_entries_cmp - compare 2 replay entries. 293 * @priv: UBIFS file-system description object 294 * @a: first replay entry 295 * @b: second replay entry 296 * 297 * This is a comparios function for 'list_sort()' which compares 2 replay 298 * entries @a and @b by comparing their sequence numer. Returns %1 if @a has 299 * greater sequence number and %-1 otherwise. 300 */ 301 static int replay_entries_cmp(void *priv, struct list_head *a, 302 struct list_head *b) 303 { 304 struct ubifs_info *c = priv; 305 struct replay_entry *ra, *rb; 306 307 cond_resched(); 308 if (a == b) 309 return 0; 310 311 ra = list_entry(a, struct replay_entry, list); 312 rb = list_entry(b, struct replay_entry, list); 313 ubifs_assert(c, ra->sqnum != rb->sqnum); 314 if (ra->sqnum > rb->sqnum) 315 return 1; 316 return -1; 317 } 318 319 /** 320 * apply_replay_list - apply the replay list to the TNC. 321 * @c: UBIFS file-system description object 322 * 323 * Apply all entries in the replay list to the TNC. Returns zero in case of 324 * success and a negative error code in case of failure. 325 */ 326 static int apply_replay_list(struct ubifs_info *c) 327 { 328 struct replay_entry *r; 329 int err; 330 331 list_sort(c, &c->replay_list, &replay_entries_cmp); 332 333 list_for_each_entry(r, &c->replay_list, list) { 334 cond_resched(); 335 336 err = apply_replay_entry(c, r); 337 if (err) 338 return err; 339 } 340 341 return 0; 342 } 343 344 /** 345 * destroy_replay_list - destroy the replay. 346 * @c: UBIFS file-system description object 347 * 348 * Destroy the replay list. 349 */ 350 static void destroy_replay_list(struct ubifs_info *c) 351 { 352 struct replay_entry *r, *tmp; 353 354 list_for_each_entry_safe(r, tmp, &c->replay_list, list) { 355 if (is_hash_key(c, &r->key)) 356 kfree(fname_name(&r->nm)); 357 list_del(&r->list); 358 kfree(r); 359 } 360 } 361 362 /** 363 * insert_node - insert a node to the replay list 364 * @c: UBIFS file-system description object 365 * @lnum: node logical eraseblock number 366 * @offs: node offset 367 * @len: node length 368 * @key: node key 369 * @sqnum: sequence number 370 * @deletion: non-zero if this is a deletion 371 * @used: number of bytes in use in a LEB 372 * @old_size: truncation old size 373 * @new_size: truncation new size 374 * 375 * This function inserts a scanned non-direntry node to the replay list. The 376 * replay list contains @struct replay_entry elements, and we sort this list in 377 * sequence number order before applying it. The replay list is applied at the 378 * very end of the replay process. Since the list is sorted in sequence number 379 * order, the older modifications are applied first. This function returns zero 380 * in case of success and a negative error code in case of failure. 381 */ 382 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 383 const u8 *hash, union ubifs_key *key, 384 unsigned long long sqnum, int deletion, int *used, 385 loff_t old_size, loff_t new_size) 386 { 387 struct replay_entry *r; 388 389 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 390 391 if (key_inum(c, key) >= c->highest_inum) 392 c->highest_inum = key_inum(c, key); 393 394 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 395 if (!r) 396 return -ENOMEM; 397 398 if (!deletion) 399 *used += ALIGN(len, 8); 400 r->lnum = lnum; 401 r->offs = offs; 402 r->len = len; 403 ubifs_copy_hash(c, hash, r->hash); 404 r->deletion = !!deletion; 405 r->sqnum = sqnum; 406 key_copy(c, key, &r->key); 407 r->old_size = old_size; 408 r->new_size = new_size; 409 410 list_add_tail(&r->list, &c->replay_list); 411 return 0; 412 } 413 414 /** 415 * insert_dent - insert a directory entry node into the replay list. 416 * @c: UBIFS file-system description object 417 * @lnum: node logical eraseblock number 418 * @offs: node offset 419 * @len: node length 420 * @key: node key 421 * @name: directory entry name 422 * @nlen: directory entry name length 423 * @sqnum: sequence number 424 * @deletion: non-zero if this is a deletion 425 * @used: number of bytes in use in a LEB 426 * 427 * This function inserts a scanned directory entry node or an extended 428 * attribute entry to the replay list. Returns zero in case of success and a 429 * negative error code in case of failure. 430 */ 431 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 432 const u8 *hash, union ubifs_key *key, 433 const char *name, int nlen, unsigned long long sqnum, 434 int deletion, int *used) 435 { 436 struct replay_entry *r; 437 char *nbuf; 438 439 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 440 if (key_inum(c, key) >= c->highest_inum) 441 c->highest_inum = key_inum(c, key); 442 443 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 444 if (!r) 445 return -ENOMEM; 446 447 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 448 if (!nbuf) { 449 kfree(r); 450 return -ENOMEM; 451 } 452 453 if (!deletion) 454 *used += ALIGN(len, 8); 455 r->lnum = lnum; 456 r->offs = offs; 457 r->len = len; 458 ubifs_copy_hash(c, hash, r->hash); 459 r->deletion = !!deletion; 460 r->sqnum = sqnum; 461 key_copy(c, key, &r->key); 462 fname_len(&r->nm) = nlen; 463 memcpy(nbuf, name, nlen); 464 nbuf[nlen] = '\0'; 465 fname_name(&r->nm) = nbuf; 466 467 list_add_tail(&r->list, &c->replay_list); 468 return 0; 469 } 470 471 /** 472 * ubifs_validate_entry - validate directory or extended attribute entry node. 473 * @c: UBIFS file-system description object 474 * @dent: the node to validate 475 * 476 * This function validates directory or extended attribute entry node @dent. 477 * Returns zero if the node is all right and a %-EINVAL if not. 478 */ 479 int ubifs_validate_entry(struct ubifs_info *c, 480 const struct ubifs_dent_node *dent) 481 { 482 int key_type = key_type_flash(c, dent->key); 483 int nlen = le16_to_cpu(dent->nlen); 484 485 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 486 dent->type >= UBIFS_ITYPES_CNT || 487 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 488 (key_type == UBIFS_XENT_KEY && strnlen(dent->name, nlen) != nlen) || 489 le64_to_cpu(dent->inum) > MAX_INUM) { 490 ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ? 491 "directory entry" : "extended attribute entry"); 492 return -EINVAL; 493 } 494 495 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 496 ubifs_err(c, "bad key type %d", key_type); 497 return -EINVAL; 498 } 499 500 return 0; 501 } 502 503 /** 504 * is_last_bud - check if the bud is the last in the journal head. 505 * @c: UBIFS file-system description object 506 * @bud: bud description object 507 * 508 * This function checks if bud @bud is the last bud in its journal head. This 509 * information is then used by 'replay_bud()' to decide whether the bud can 510 * have corruptions or not. Indeed, only last buds can be corrupted by power 511 * cuts. Returns %1 if this is the last bud, and %0 if not. 512 */ 513 static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) 514 { 515 struct ubifs_jhead *jh = &c->jheads[bud->jhead]; 516 struct ubifs_bud *next; 517 uint32_t data; 518 int err; 519 520 if (list_is_last(&bud->list, &jh->buds_list)) 521 return 1; 522 523 /* 524 * The following is a quirk to make sure we work correctly with UBIFS 525 * images used with older UBIFS. 526 * 527 * Normally, the last bud will be the last in the journal head's list 528 * of bud. However, there is one exception if the UBIFS image belongs 529 * to older UBIFS. This is fairly unlikely: one would need to use old 530 * UBIFS, then have a power cut exactly at the right point, and then 531 * try to mount this image with new UBIFS. 532 * 533 * The exception is: it is possible to have 2 buds A and B, A goes 534 * before B, and B is the last, bud B is contains no data, and bud A is 535 * corrupted at the end. The reason is that in older versions when the 536 * journal code switched the next bud (from A to B), it first added a 537 * log reference node for the new bud (B), and only after this it 538 * synchronized the write-buffer of current bud (A). But later this was 539 * changed and UBIFS started to always synchronize the write-buffer of 540 * the bud (A) before writing the log reference for the new bud (B). 541 * 542 * But because older UBIFS always synchronized A's write-buffer before 543 * writing to B, we can recognize this exceptional situation but 544 * checking the contents of bud B - if it is empty, then A can be 545 * treated as the last and we can recover it. 546 * 547 * TODO: remove this piece of code in a couple of years (today it is 548 * 16.05.2011). 549 */ 550 next = list_entry(bud->list.next, struct ubifs_bud, list); 551 if (!list_is_last(&next->list, &jh->buds_list)) 552 return 0; 553 554 err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); 555 if (err) 556 return 0; 557 558 return data == 0xFFFFFFFF; 559 } 560 561 /* authenticate_sleb_hash is split out for stack usage */ 562 static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_hash, u8 *hash) 563 { 564 SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm); 565 566 hash_desc->tfm = c->hash_tfm; 567 568 ubifs_shash_copy_state(c, log_hash, hash_desc); 569 return crypto_shash_final(hash_desc, hash); 570 } 571 572 /** 573 * authenticate_sleb - authenticate one scan LEB 574 * @c: UBIFS file-system description object 575 * @sleb: the scan LEB to authenticate 576 * @log_hash: 577 * @is_last: if true, this is the last LEB 578 * 579 * This function iterates over the buds of a single LEB authenticating all buds 580 * with the authentication nodes on this LEB. Authentication nodes are written 581 * after some buds and contain a HMAC covering the authentication node itself 582 * and the buds between the last authentication node and the current 583 * authentication node. It can happen that the last buds cannot be authenticated 584 * because a powercut happened when some nodes were written but not the 585 * corresponding authentication node. This function returns the number of nodes 586 * that could be authenticated or a negative error code. 587 */ 588 static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, 589 struct shash_desc *log_hash, int is_last) 590 { 591 int n_not_auth = 0; 592 struct ubifs_scan_node *snod; 593 int n_nodes = 0; 594 int err; 595 u8 hash[UBIFS_HASH_ARR_SZ]; 596 u8 hmac[UBIFS_HMAC_ARR_SZ]; 597 598 if (!ubifs_authenticated(c)) 599 return sleb->nodes_cnt; 600 601 list_for_each_entry(snod, &sleb->nodes, list) { 602 603 n_nodes++; 604 605 if (snod->type == UBIFS_AUTH_NODE) { 606 struct ubifs_auth_node *auth = snod->node; 607 608 err = authenticate_sleb_hash(c, log_hash, hash); 609 if (err) 610 goto out; 611 612 err = crypto_shash_tfm_digest(c->hmac_tfm, hash, 613 c->hash_len, hmac); 614 if (err) 615 goto out; 616 617 err = ubifs_check_hmac(c, auth->hmac, hmac); 618 if (err) { 619 err = -EPERM; 620 goto out; 621 } 622 n_not_auth = 0; 623 } else { 624 err = crypto_shash_update(log_hash, snod->node, 625 snod->len); 626 if (err) 627 goto out; 628 n_not_auth++; 629 } 630 } 631 632 /* 633 * A powercut can happen when some nodes were written, but not yet 634 * the corresponding authentication node. This may only happen on 635 * the last bud though. 636 */ 637 if (n_not_auth) { 638 if (is_last) { 639 dbg_mnt("%d unauthenticated nodes found on LEB %d, Ignoring them", 640 n_not_auth, sleb->lnum); 641 err = 0; 642 } else { 643 dbg_mnt("%d unauthenticated nodes found on non-last LEB %d", 644 n_not_auth, sleb->lnum); 645 err = -EPERM; 646 } 647 } else { 648 err = 0; 649 } 650 out: 651 return err ? err : n_nodes - n_not_auth; 652 } 653 654 /** 655 * replay_bud - replay a bud logical eraseblock. 656 * @c: UBIFS file-system description object 657 * @b: bud entry which describes the bud 658 * 659 * This function replays bud @bud, recovers it if needed, and adds all nodes 660 * from this bud to the replay list. Returns zero in case of success and a 661 * negative error code in case of failure. 662 */ 663 static int replay_bud(struct ubifs_info *c, struct bud_entry *b) 664 { 665 int is_last = is_last_bud(c, b->bud); 666 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; 667 int n_nodes, n = 0; 668 struct ubifs_scan_leb *sleb; 669 struct ubifs_scan_node *snod; 670 671 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", 672 lnum, b->bud->jhead, offs, is_last); 673 674 if (c->need_recovery && is_last) 675 /* 676 * Recover only last LEBs in the journal heads, because power 677 * cuts may cause corruptions only in these LEBs, because only 678 * these LEBs could possibly be written to at the power cut 679 * time. 680 */ 681 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); 682 else 683 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 684 if (IS_ERR(sleb)) 685 return PTR_ERR(sleb); 686 687 n_nodes = authenticate_sleb(c, sleb, b->bud->log_hash, is_last); 688 if (n_nodes < 0) { 689 err = n_nodes; 690 goto out; 691 } 692 693 ubifs_shash_copy_state(c, b->bud->log_hash, 694 c->jheads[b->bud->jhead].log_hash); 695 696 /* 697 * The bud does not have to start from offset zero - the beginning of 698 * the 'lnum' LEB may contain previously committed data. One of the 699 * things we have to do in replay is to correctly update lprops with 700 * newer information about this LEB. 701 * 702 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 703 * bytes of free space because it only contain information about 704 * committed data. 705 * 706 * But we know that real amount of free space is 'c->leb_size - 707 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 708 * 'sleb->endpt' is used by bud data. We have to correctly calculate 709 * how much of these data are dirty and update lprops with this 710 * information. 711 * 712 * The dirt in that LEB region is comprised of padding nodes, deletion 713 * nodes, truncation nodes and nodes which are obsoleted by subsequent 714 * nodes in this LEB. So instead of calculating clean space, we 715 * calculate used space ('used' variable). 716 */ 717 718 list_for_each_entry(snod, &sleb->nodes, list) { 719 u8 hash[UBIFS_HASH_ARR_SZ]; 720 int deletion = 0; 721 722 cond_resched(); 723 724 if (snod->sqnum >= SQNUM_WATERMARK) { 725 ubifs_err(c, "file system's life ended"); 726 goto out_dump; 727 } 728 729 ubifs_node_calc_hash(c, snod->node, hash); 730 731 if (snod->sqnum > c->max_sqnum) 732 c->max_sqnum = snod->sqnum; 733 734 switch (snod->type) { 735 case UBIFS_INO_NODE: 736 { 737 struct ubifs_ino_node *ino = snod->node; 738 loff_t new_size = le64_to_cpu(ino->size); 739 740 if (le32_to_cpu(ino->nlink) == 0) 741 deletion = 1; 742 err = insert_node(c, lnum, snod->offs, snod->len, hash, 743 &snod->key, snod->sqnum, deletion, 744 &used, 0, new_size); 745 break; 746 } 747 case UBIFS_DATA_NODE: 748 { 749 struct ubifs_data_node *dn = snod->node; 750 loff_t new_size = le32_to_cpu(dn->size) + 751 key_block(c, &snod->key) * 752 UBIFS_BLOCK_SIZE; 753 754 err = insert_node(c, lnum, snod->offs, snod->len, hash, 755 &snod->key, snod->sqnum, deletion, 756 &used, 0, new_size); 757 break; 758 } 759 case UBIFS_DENT_NODE: 760 case UBIFS_XENT_NODE: 761 { 762 struct ubifs_dent_node *dent = snod->node; 763 764 err = ubifs_validate_entry(c, dent); 765 if (err) 766 goto out_dump; 767 768 err = insert_dent(c, lnum, snod->offs, snod->len, hash, 769 &snod->key, dent->name, 770 le16_to_cpu(dent->nlen), snod->sqnum, 771 !le64_to_cpu(dent->inum), &used); 772 break; 773 } 774 case UBIFS_TRUN_NODE: 775 { 776 struct ubifs_trun_node *trun = snod->node; 777 loff_t old_size = le64_to_cpu(trun->old_size); 778 loff_t new_size = le64_to_cpu(trun->new_size); 779 union ubifs_key key; 780 781 /* Validate truncation node */ 782 if (old_size < 0 || old_size > c->max_inode_sz || 783 new_size < 0 || new_size > c->max_inode_sz || 784 old_size <= new_size) { 785 ubifs_err(c, "bad truncation node"); 786 goto out_dump; 787 } 788 789 /* 790 * Create a fake truncation key just to use the same 791 * functions which expect nodes to have keys. 792 */ 793 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 794 err = insert_node(c, lnum, snod->offs, snod->len, hash, 795 &key, snod->sqnum, 1, &used, 796 old_size, new_size); 797 break; 798 } 799 case UBIFS_AUTH_NODE: 800 break; 801 default: 802 ubifs_err(c, "unexpected node type %d in bud LEB %d:%d", 803 snod->type, lnum, snod->offs); 804 err = -EINVAL; 805 goto out_dump; 806 } 807 if (err) 808 goto out; 809 810 n++; 811 if (n == n_nodes) 812 break; 813 } 814 815 ubifs_assert(c, ubifs_search_bud(c, lnum)); 816 ubifs_assert(c, sleb->endpt - offs >= used); 817 ubifs_assert(c, sleb->endpt % c->min_io_size == 0); 818 819 b->dirty = sleb->endpt - offs - used; 820 b->free = c->leb_size - sleb->endpt; 821 dbg_mnt("bud LEB %d replied: dirty %d, free %d", 822 lnum, b->dirty, b->free); 823 824 out: 825 ubifs_scan_destroy(sleb); 826 return err; 827 828 out_dump: 829 ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); 830 ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); 831 ubifs_scan_destroy(sleb); 832 return -EINVAL; 833 } 834 835 /** 836 * replay_buds - replay all buds. 837 * @c: UBIFS file-system description object 838 * 839 * This function returns zero in case of success and a negative error code in 840 * case of failure. 841 */ 842 static int replay_buds(struct ubifs_info *c) 843 { 844 struct bud_entry *b; 845 int err; 846 unsigned long long prev_sqnum = 0; 847 848 list_for_each_entry(b, &c->replay_buds, list) { 849 err = replay_bud(c, b); 850 if (err) 851 return err; 852 853 ubifs_assert(c, b->sqnum > prev_sqnum); 854 prev_sqnum = b->sqnum; 855 } 856 857 return 0; 858 } 859 860 /** 861 * destroy_bud_list - destroy the list of buds to replay. 862 * @c: UBIFS file-system description object 863 */ 864 static void destroy_bud_list(struct ubifs_info *c) 865 { 866 struct bud_entry *b; 867 868 while (!list_empty(&c->replay_buds)) { 869 b = list_entry(c->replay_buds.next, struct bud_entry, list); 870 list_del(&b->list); 871 kfree(b); 872 } 873 } 874 875 /** 876 * add_replay_bud - add a bud to the list of buds to replay. 877 * @c: UBIFS file-system description object 878 * @lnum: bud logical eraseblock number to replay 879 * @offs: bud start offset 880 * @jhead: journal head to which this bud belongs 881 * @sqnum: reference node sequence number 882 * 883 * This function returns zero in case of success and a negative error code in 884 * case of failure. 885 */ 886 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 887 unsigned long long sqnum) 888 { 889 struct ubifs_bud *bud; 890 struct bud_entry *b; 891 int err; 892 893 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 894 895 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 896 if (!bud) 897 return -ENOMEM; 898 899 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 900 if (!b) { 901 err = -ENOMEM; 902 goto out; 903 } 904 905 bud->lnum = lnum; 906 bud->start = offs; 907 bud->jhead = jhead; 908 bud->log_hash = ubifs_hash_get_desc(c); 909 if (IS_ERR(bud->log_hash)) { 910 err = PTR_ERR(bud->log_hash); 911 goto out; 912 } 913 914 ubifs_shash_copy_state(c, c->log_hash, bud->log_hash); 915 916 ubifs_add_bud(c, bud); 917 918 b->bud = bud; 919 b->sqnum = sqnum; 920 list_add_tail(&b->list, &c->replay_buds); 921 922 return 0; 923 out: 924 kfree(bud); 925 kfree(b); 926 927 return err; 928 } 929 930 /** 931 * validate_ref - validate a reference node. 932 * @c: UBIFS file-system description object 933 * @ref: the reference node to validate 934 * 935 * This function returns %1 if a bud reference already exists for the LEB. %0 is 936 * returned if the reference node is new, otherwise %-EINVAL is returned if 937 * validation failed. 938 */ 939 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 940 { 941 struct ubifs_bud *bud; 942 int lnum = le32_to_cpu(ref->lnum); 943 unsigned int offs = le32_to_cpu(ref->offs); 944 unsigned int jhead = le32_to_cpu(ref->jhead); 945 946 /* 947 * ref->offs may point to the end of LEB when the journal head points 948 * to the end of LEB and we write reference node for it during commit. 949 * So this is why we require 'offs > c->leb_size'. 950 */ 951 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 952 lnum < c->main_first || offs > c->leb_size || 953 offs & (c->min_io_size - 1)) 954 return -EINVAL; 955 956 /* Make sure we have not already looked at this bud */ 957 bud = ubifs_search_bud(c, lnum); 958 if (bud) { 959 if (bud->jhead == jhead && bud->start <= offs) 960 return 1; 961 ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs); 962 return -EINVAL; 963 } 964 965 return 0; 966 } 967 968 /** 969 * replay_log_leb - replay a log logical eraseblock. 970 * @c: UBIFS file-system description object 971 * @lnum: log logical eraseblock to replay 972 * @offs: offset to start replaying from 973 * @sbuf: scan buffer 974 * 975 * This function replays a log LEB and returns zero in case of success, %1 if 976 * this is the last LEB in the log, and a negative error code in case of 977 * failure. 978 */ 979 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 980 { 981 int err; 982 struct ubifs_scan_leb *sleb; 983 struct ubifs_scan_node *snod; 984 const struct ubifs_cs_node *node; 985 986 dbg_mnt("replay log LEB %d:%d", lnum, offs); 987 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); 988 if (IS_ERR(sleb)) { 989 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 990 return PTR_ERR(sleb); 991 /* 992 * Note, the below function will recover this log LEB only if 993 * it is the last, because unclean reboots can possibly corrupt 994 * only the tail of the log. 995 */ 996 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 997 if (IS_ERR(sleb)) 998 return PTR_ERR(sleb); 999 } 1000 1001 if (sleb->nodes_cnt == 0) { 1002 err = 1; 1003 goto out; 1004 } 1005 1006 node = sleb->buf; 1007 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 1008 if (c->cs_sqnum == 0) { 1009 /* 1010 * This is the first log LEB we are looking at, make sure that 1011 * the first node is a commit start node. Also record its 1012 * sequence number so that UBIFS can determine where the log 1013 * ends, because all nodes which were have higher sequence 1014 * numbers. 1015 */ 1016 if (snod->type != UBIFS_CS_NODE) { 1017 ubifs_err(c, "first log node at LEB %d:%d is not CS node", 1018 lnum, offs); 1019 goto out_dump; 1020 } 1021 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 1022 ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu", 1023 lnum, offs, 1024 (unsigned long long)le64_to_cpu(node->cmt_no), 1025 c->cmt_no); 1026 goto out_dump; 1027 } 1028 1029 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 1030 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 1031 1032 err = ubifs_shash_init(c, c->log_hash); 1033 if (err) 1034 goto out; 1035 1036 err = ubifs_shash_update(c, c->log_hash, node, UBIFS_CS_NODE_SZ); 1037 if (err < 0) 1038 goto out; 1039 } 1040 1041 if (snod->sqnum < c->cs_sqnum) { 1042 /* 1043 * This means that we reached end of log and now 1044 * look to the older log data, which was already 1045 * committed but the eraseblock was not erased (UBIFS 1046 * only un-maps it). So this basically means we have to 1047 * exit with "end of log" code. 1048 */ 1049 err = 1; 1050 goto out; 1051 } 1052 1053 /* Make sure the first node sits at offset zero of the LEB */ 1054 if (snod->offs != 0) { 1055 ubifs_err(c, "first node is not at zero offset"); 1056 goto out_dump; 1057 } 1058 1059 list_for_each_entry(snod, &sleb->nodes, list) { 1060 cond_resched(); 1061 1062 if (snod->sqnum >= SQNUM_WATERMARK) { 1063 ubifs_err(c, "file system's life ended"); 1064 goto out_dump; 1065 } 1066 1067 if (snod->sqnum < c->cs_sqnum) { 1068 ubifs_err(c, "bad sqnum %llu, commit sqnum %llu", 1069 snod->sqnum, c->cs_sqnum); 1070 goto out_dump; 1071 } 1072 1073 if (snod->sqnum > c->max_sqnum) 1074 c->max_sqnum = snod->sqnum; 1075 1076 switch (snod->type) { 1077 case UBIFS_REF_NODE: { 1078 const struct ubifs_ref_node *ref = snod->node; 1079 1080 err = validate_ref(c, ref); 1081 if (err == 1) 1082 break; /* Already have this bud */ 1083 if (err) 1084 goto out_dump; 1085 1086 err = ubifs_shash_update(c, c->log_hash, ref, 1087 UBIFS_REF_NODE_SZ); 1088 if (err) 1089 goto out; 1090 1091 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 1092 le32_to_cpu(ref->offs), 1093 le32_to_cpu(ref->jhead), 1094 snod->sqnum); 1095 if (err) 1096 goto out; 1097 1098 break; 1099 } 1100 case UBIFS_CS_NODE: 1101 /* Make sure it sits at the beginning of LEB */ 1102 if (snod->offs != 0) { 1103 ubifs_err(c, "unexpected node in log"); 1104 goto out_dump; 1105 } 1106 break; 1107 default: 1108 ubifs_err(c, "unexpected node in log"); 1109 goto out_dump; 1110 } 1111 } 1112 1113 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 1114 c->lhead_lnum = lnum; 1115 c->lhead_offs = sleb->endpt; 1116 } 1117 1118 err = !sleb->endpt; 1119 out: 1120 ubifs_scan_destroy(sleb); 1121 return err; 1122 1123 out_dump: 1124 ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", 1125 lnum, offs + snod->offs); 1126 ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); 1127 ubifs_scan_destroy(sleb); 1128 return -EINVAL; 1129 } 1130 1131 /** 1132 * take_ihead - update the status of the index head in lprops to 'taken'. 1133 * @c: UBIFS file-system description object 1134 * 1135 * This function returns the amount of free space in the index head LEB or a 1136 * negative error code. 1137 */ 1138 static int take_ihead(struct ubifs_info *c) 1139 { 1140 const struct ubifs_lprops *lp; 1141 int err, free; 1142 1143 ubifs_get_lprops(c); 1144 1145 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 1146 if (IS_ERR(lp)) { 1147 err = PTR_ERR(lp); 1148 goto out; 1149 } 1150 1151 free = lp->free; 1152 1153 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 1154 lp->flags | LPROPS_TAKEN, 0); 1155 if (IS_ERR(lp)) { 1156 err = PTR_ERR(lp); 1157 goto out; 1158 } 1159 1160 err = free; 1161 out: 1162 ubifs_release_lprops(c); 1163 return err; 1164 } 1165 1166 /** 1167 * ubifs_replay_journal - replay journal. 1168 * @c: UBIFS file-system description object 1169 * 1170 * This function scans the journal, replays and cleans it up. It makes sure all 1171 * memory data structures related to uncommitted journal are built (dirty TNC 1172 * tree, tree of buds, modified lprops, etc). 1173 */ 1174 int ubifs_replay_journal(struct ubifs_info *c) 1175 { 1176 int err, lnum, free; 1177 1178 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1179 1180 /* Update the status of the index head in lprops to 'taken' */ 1181 free = take_ihead(c); 1182 if (free < 0) 1183 return free; /* Error code */ 1184 1185 if (c->ihead_offs != c->leb_size - free) { 1186 ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum, 1187 c->ihead_offs); 1188 return -EINVAL; 1189 } 1190 1191 dbg_mnt("start replaying the journal"); 1192 c->replaying = 1; 1193 lnum = c->ltail_lnum = c->lhead_lnum; 1194 1195 do { 1196 err = replay_log_leb(c, lnum, 0, c->sbuf); 1197 if (err == 1) { 1198 if (lnum != c->lhead_lnum) 1199 /* We hit the end of the log */ 1200 break; 1201 1202 /* 1203 * The head of the log must always start with the 1204 * "commit start" node on a properly formatted UBIFS. 1205 * But we found no nodes at all, which means that 1206 * something went wrong and we cannot proceed mounting 1207 * the file-system. 1208 */ 1209 ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", 1210 lnum, 0); 1211 err = -EINVAL; 1212 } 1213 if (err) 1214 goto out; 1215 lnum = ubifs_next_log_lnum(c, lnum); 1216 } while (lnum != c->ltail_lnum); 1217 1218 err = replay_buds(c); 1219 if (err) 1220 goto out; 1221 1222 err = apply_replay_list(c); 1223 if (err) 1224 goto out; 1225 1226 err = set_buds_lprops(c); 1227 if (err) 1228 goto out; 1229 1230 /* 1231 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable 1232 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs 1233 * depend on it. This means we have to initialize it to make sure 1234 * budgeting works properly. 1235 */ 1236 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1237 c->bi.uncommitted_idx *= c->max_idx_node_sz; 1238 1239 ubifs_assert(c, c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1240 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", 1241 c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1242 (unsigned long)c->highest_inum); 1243 out: 1244 destroy_replay_list(c); 1245 destroy_bud_list(c); 1246 c->replaying = 0; 1247 return err; 1248 } 1249