1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * This file is part of UBIFS. 4 * 5 * Copyright (C) 2006-2008 Nokia Corporation. 6 * 7 * Authors: Adrian Hunter 8 * Artem Bityutskiy (Битюцкий Артём) 9 */ 10 11 /* 12 * This file contains journal replay code. It runs when the file-system is being 13 * mounted and requires no locking. 14 * 15 * The larger is the journal, the longer it takes to scan it, so the longer it 16 * takes to mount UBIFS. This is why the journal has limited size which may be 17 * changed depending on the system requirements. But a larger journal gives 18 * faster I/O speed because it writes the index less frequently. So this is a 19 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 20 * larger is the journal, the more memory its index may consume. 21 */ 22 23 #include "ubifs.h" 24 #include <linux/list_sort.h> 25 #include <crypto/hash.h> 26 #include <crypto/algapi.h> 27 28 /** 29 * struct replay_entry - replay list entry. 30 * @lnum: logical eraseblock number of the node 31 * @offs: node offset 32 * @len: node length 33 * @deletion: non-zero if this entry corresponds to a node deletion 34 * @sqnum: node sequence number 35 * @list: links the replay list 36 * @key: node key 37 * @nm: directory entry name 38 * @old_size: truncation old size 39 * @new_size: truncation new size 40 * 41 * The replay process first scans all buds and builds the replay list, then 42 * sorts the replay list in nodes sequence number order, and then inserts all 43 * the replay entries to the TNC. 44 */ 45 struct replay_entry { 46 int lnum; 47 int offs; 48 int len; 49 u8 hash[UBIFS_HASH_ARR_SZ]; 50 unsigned int deletion:1; 51 unsigned long long sqnum; 52 struct list_head list; 53 union ubifs_key key; 54 union { 55 struct fscrypt_name nm; 56 struct { 57 loff_t old_size; 58 loff_t new_size; 59 }; 60 }; 61 }; 62 63 /** 64 * struct bud_entry - entry in the list of buds to replay. 65 * @list: next bud in the list 66 * @bud: bud description object 67 * @sqnum: reference node sequence number 68 * @free: free bytes in the bud 69 * @dirty: dirty bytes in the bud 70 */ 71 struct bud_entry { 72 struct list_head list; 73 struct ubifs_bud *bud; 74 unsigned long long sqnum; 75 int free; 76 int dirty; 77 }; 78 79 /** 80 * set_bud_lprops - set free and dirty space used by a bud. 81 * @c: UBIFS file-system description object 82 * @b: bud entry which describes the bud 83 * 84 * This function makes sure the LEB properties of bud @b are set correctly 85 * after the replay. Returns zero in case of success and a negative error code 86 * in case of failure. 87 */ 88 static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) 89 { 90 const struct ubifs_lprops *lp; 91 int err = 0, dirty; 92 93 ubifs_get_lprops(c); 94 95 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); 96 if (IS_ERR(lp)) { 97 err = PTR_ERR(lp); 98 goto out; 99 } 100 101 dirty = lp->dirty; 102 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 103 /* 104 * The LEB was added to the journal with a starting offset of 105 * zero which means the LEB must have been empty. The LEB 106 * property values should be @lp->free == @c->leb_size and 107 * @lp->dirty == 0, but that is not the case. The reason is that 108 * the LEB had been garbage collected before it became the bud, 109 * and there was no commit in between. The garbage collector 110 * resets the free and dirty space without recording it 111 * anywhere except lprops, so if there was no commit then 112 * lprops does not have that information. 113 * 114 * We do not need to adjust free space because the scan has told 115 * us the exact value which is recorded in the replay entry as 116 * @b->free. 117 * 118 * However we do need to subtract from the dirty space the 119 * amount of space that the garbage collector reclaimed, which 120 * is the whole LEB minus the amount of space that was free. 121 */ 122 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 123 lp->free, lp->dirty); 124 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 125 lp->free, lp->dirty); 126 dirty -= c->leb_size - lp->free; 127 /* 128 * If the replay order was perfect the dirty space would now be 129 * zero. The order is not perfect because the journal heads 130 * race with each other. This is not a problem but is does mean 131 * that the dirty space may temporarily exceed c->leb_size 132 * during the replay. 133 */ 134 if (dirty != 0) 135 dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", 136 b->bud->lnum, lp->free, lp->dirty, b->free, 137 b->dirty); 138 } 139 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, 140 lp->flags | LPROPS_TAKEN, 0); 141 if (IS_ERR(lp)) { 142 err = PTR_ERR(lp); 143 goto out; 144 } 145 146 /* Make sure the journal head points to the latest bud */ 147 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, 148 b->bud->lnum, c->leb_size - b->free); 149 150 out: 151 ubifs_release_lprops(c); 152 return err; 153 } 154 155 /** 156 * set_buds_lprops - set free and dirty space for all replayed buds. 157 * @c: UBIFS file-system description object 158 * 159 * This function sets LEB properties for all replayed buds. Returns zero in 160 * case of success and a negative error code in case of failure. 161 */ 162 static int set_buds_lprops(struct ubifs_info *c) 163 { 164 struct bud_entry *b; 165 int err; 166 167 list_for_each_entry(b, &c->replay_buds, list) { 168 err = set_bud_lprops(c, b); 169 if (err) 170 return err; 171 } 172 173 return 0; 174 } 175 176 /** 177 * trun_remove_range - apply a replay entry for a truncation to the TNC. 178 * @c: UBIFS file-system description object 179 * @r: replay entry of truncation 180 */ 181 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 182 { 183 unsigned min_blk, max_blk; 184 union ubifs_key min_key, max_key; 185 ino_t ino; 186 187 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 188 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 189 min_blk += 1; 190 191 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 192 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 193 max_blk -= 1; 194 195 ino = key_inum(c, &r->key); 196 197 data_key_init(c, &min_key, ino, min_blk); 198 data_key_init(c, &max_key, ino, max_blk); 199 200 return ubifs_tnc_remove_range(c, &min_key, &max_key); 201 } 202 203 /** 204 * inode_still_linked - check whether inode in question will be re-linked. 205 * @c: UBIFS file-system description object 206 * @rino: replay entry to test 207 * 208 * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1. 209 * This case needs special care, otherwise all references to the inode will 210 * be removed upon the first replay entry of an inode with link count 0 211 * is found. 212 */ 213 static bool inode_still_linked(struct ubifs_info *c, struct replay_entry *rino) 214 { 215 struct replay_entry *r; 216 217 ubifs_assert(c, rino->deletion); 218 ubifs_assert(c, key_type(c, &rino->key) == UBIFS_INO_KEY); 219 220 /* 221 * Find the most recent entry for the inode behind @rino and check 222 * whether it is a deletion. 223 */ 224 list_for_each_entry_reverse(r, &c->replay_list, list) { 225 ubifs_assert(c, r->sqnum >= rino->sqnum); 226 if (key_inum(c, &r->key) == key_inum(c, &rino->key) && 227 key_type(c, &r->key) == UBIFS_INO_KEY) 228 return r->deletion == 0; 229 230 } 231 232 ubifs_assert(c, 0); 233 return false; 234 } 235 236 /** 237 * apply_replay_entry - apply a replay entry to the TNC. 238 * @c: UBIFS file-system description object 239 * @r: replay entry to apply 240 * 241 * Apply a replay entry to the TNC. 242 */ 243 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 244 { 245 int err; 246 247 dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", 248 r->lnum, r->offs, r->len, r->deletion, r->sqnum); 249 250 if (is_hash_key(c, &r->key)) { 251 if (r->deletion) 252 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 253 else 254 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 255 r->len, r->hash, &r->nm); 256 } else { 257 if (r->deletion) 258 switch (key_type(c, &r->key)) { 259 case UBIFS_INO_KEY: 260 { 261 ino_t inum = key_inum(c, &r->key); 262 263 if (inode_still_linked(c, r)) { 264 err = 0; 265 break; 266 } 267 268 err = ubifs_tnc_remove_ino(c, inum); 269 break; 270 } 271 case UBIFS_TRUN_KEY: 272 err = trun_remove_range(c, r); 273 break; 274 default: 275 err = ubifs_tnc_remove(c, &r->key); 276 break; 277 } 278 else 279 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 280 r->len, r->hash); 281 if (err) 282 return err; 283 284 if (c->need_recovery) 285 err = ubifs_recover_size_accum(c, &r->key, r->deletion, 286 r->new_size); 287 } 288 289 return err; 290 } 291 292 /** 293 * replay_entries_cmp - compare 2 replay entries. 294 * @priv: UBIFS file-system description object 295 * @a: first replay entry 296 * @b: second replay entry 297 * 298 * This is a comparios function for 'list_sort()' which compares 2 replay 299 * entries @a and @b by comparing their sequence number. Returns %1 if @a has 300 * greater sequence number and %-1 otherwise. 301 */ 302 static int replay_entries_cmp(void *priv, const struct list_head *a, 303 const struct list_head *b) 304 { 305 struct ubifs_info *c = priv; 306 struct replay_entry *ra, *rb; 307 308 cond_resched(); 309 if (a == b) 310 return 0; 311 312 ra = list_entry(a, struct replay_entry, list); 313 rb = list_entry(b, struct replay_entry, list); 314 ubifs_assert(c, ra->sqnum != rb->sqnum); 315 if (ra->sqnum > rb->sqnum) 316 return 1; 317 return -1; 318 } 319 320 /** 321 * apply_replay_list - apply the replay list to the TNC. 322 * @c: UBIFS file-system description object 323 * 324 * Apply all entries in the replay list to the TNC. Returns zero in case of 325 * success and a negative error code in case of failure. 326 */ 327 static int apply_replay_list(struct ubifs_info *c) 328 { 329 struct replay_entry *r; 330 int err; 331 332 list_sort(c, &c->replay_list, &replay_entries_cmp); 333 334 list_for_each_entry(r, &c->replay_list, list) { 335 cond_resched(); 336 337 err = apply_replay_entry(c, r); 338 if (err) 339 return err; 340 } 341 342 return 0; 343 } 344 345 /** 346 * destroy_replay_list - destroy the replay. 347 * @c: UBIFS file-system description object 348 * 349 * Destroy the replay list. 350 */ 351 static void destroy_replay_list(struct ubifs_info *c) 352 { 353 struct replay_entry *r, *tmp; 354 355 list_for_each_entry_safe(r, tmp, &c->replay_list, list) { 356 if (is_hash_key(c, &r->key)) 357 kfree(fname_name(&r->nm)); 358 list_del(&r->list); 359 kfree(r); 360 } 361 } 362 363 /** 364 * insert_node - insert a node to the replay list 365 * @c: UBIFS file-system description object 366 * @lnum: node logical eraseblock number 367 * @offs: node offset 368 * @len: node length 369 * @key: node key 370 * @sqnum: sequence number 371 * @deletion: non-zero if this is a deletion 372 * @used: number of bytes in use in a LEB 373 * @old_size: truncation old size 374 * @new_size: truncation new size 375 * 376 * This function inserts a scanned non-direntry node to the replay list. The 377 * replay list contains @struct replay_entry elements, and we sort this list in 378 * sequence number order before applying it. The replay list is applied at the 379 * very end of the replay process. Since the list is sorted in sequence number 380 * order, the older modifications are applied first. This function returns zero 381 * in case of success and a negative error code in case of failure. 382 */ 383 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 384 const u8 *hash, union ubifs_key *key, 385 unsigned long long sqnum, int deletion, int *used, 386 loff_t old_size, loff_t new_size) 387 { 388 struct replay_entry *r; 389 390 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 391 392 if (key_inum(c, key) >= c->highest_inum) 393 c->highest_inum = key_inum(c, key); 394 395 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 396 if (!r) 397 return -ENOMEM; 398 399 if (!deletion) 400 *used += ALIGN(len, 8); 401 r->lnum = lnum; 402 r->offs = offs; 403 r->len = len; 404 ubifs_copy_hash(c, hash, r->hash); 405 r->deletion = !!deletion; 406 r->sqnum = sqnum; 407 key_copy(c, key, &r->key); 408 r->old_size = old_size; 409 r->new_size = new_size; 410 411 list_add_tail(&r->list, &c->replay_list); 412 return 0; 413 } 414 415 /** 416 * insert_dent - insert a directory entry node into the replay list. 417 * @c: UBIFS file-system description object 418 * @lnum: node logical eraseblock number 419 * @offs: node offset 420 * @len: node length 421 * @key: node key 422 * @name: directory entry name 423 * @nlen: directory entry name length 424 * @sqnum: sequence number 425 * @deletion: non-zero if this is a deletion 426 * @used: number of bytes in use in a LEB 427 * 428 * This function inserts a scanned directory entry node or an extended 429 * attribute entry to the replay list. Returns zero in case of success and a 430 * negative error code in case of failure. 431 */ 432 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 433 const u8 *hash, union ubifs_key *key, 434 const char *name, int nlen, unsigned long long sqnum, 435 int deletion, int *used) 436 { 437 struct replay_entry *r; 438 char *nbuf; 439 440 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 441 if (key_inum(c, key) >= c->highest_inum) 442 c->highest_inum = key_inum(c, key); 443 444 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 445 if (!r) 446 return -ENOMEM; 447 448 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 449 if (!nbuf) { 450 kfree(r); 451 return -ENOMEM; 452 } 453 454 if (!deletion) 455 *used += ALIGN(len, 8); 456 r->lnum = lnum; 457 r->offs = offs; 458 r->len = len; 459 ubifs_copy_hash(c, hash, r->hash); 460 r->deletion = !!deletion; 461 r->sqnum = sqnum; 462 key_copy(c, key, &r->key); 463 fname_len(&r->nm) = nlen; 464 memcpy(nbuf, name, nlen); 465 nbuf[nlen] = '\0'; 466 fname_name(&r->nm) = nbuf; 467 468 list_add_tail(&r->list, &c->replay_list); 469 return 0; 470 } 471 472 /** 473 * ubifs_validate_entry - validate directory or extended attribute entry node. 474 * @c: UBIFS file-system description object 475 * @dent: the node to validate 476 * 477 * This function validates directory or extended attribute entry node @dent. 478 * Returns zero if the node is all right and a %-EINVAL if not. 479 */ 480 int ubifs_validate_entry(struct ubifs_info *c, 481 const struct ubifs_dent_node *dent) 482 { 483 int key_type = key_type_flash(c, dent->key); 484 int nlen = le16_to_cpu(dent->nlen); 485 486 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 487 dent->type >= UBIFS_ITYPES_CNT || 488 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 489 (key_type == UBIFS_XENT_KEY && strnlen(dent->name, nlen) != nlen) || 490 le64_to_cpu(dent->inum) > MAX_INUM) { 491 ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ? 492 "directory entry" : "extended attribute entry"); 493 return -EINVAL; 494 } 495 496 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 497 ubifs_err(c, "bad key type %d", key_type); 498 return -EINVAL; 499 } 500 501 return 0; 502 } 503 504 /** 505 * is_last_bud - check if the bud is the last in the journal head. 506 * @c: UBIFS file-system description object 507 * @bud: bud description object 508 * 509 * This function checks if bud @bud is the last bud in its journal head. This 510 * information is then used by 'replay_bud()' to decide whether the bud can 511 * have corruptions or not. Indeed, only last buds can be corrupted by power 512 * cuts. Returns %1 if this is the last bud, and %0 if not. 513 */ 514 static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) 515 { 516 struct ubifs_jhead *jh = &c->jheads[bud->jhead]; 517 struct ubifs_bud *next; 518 uint32_t data; 519 int err; 520 521 if (list_is_last(&bud->list, &jh->buds_list)) 522 return 1; 523 524 /* 525 * The following is a quirk to make sure we work correctly with UBIFS 526 * images used with older UBIFS. 527 * 528 * Normally, the last bud will be the last in the journal head's list 529 * of bud. However, there is one exception if the UBIFS image belongs 530 * to older UBIFS. This is fairly unlikely: one would need to use old 531 * UBIFS, then have a power cut exactly at the right point, and then 532 * try to mount this image with new UBIFS. 533 * 534 * The exception is: it is possible to have 2 buds A and B, A goes 535 * before B, and B is the last, bud B is contains no data, and bud A is 536 * corrupted at the end. The reason is that in older versions when the 537 * journal code switched the next bud (from A to B), it first added a 538 * log reference node for the new bud (B), and only after this it 539 * synchronized the write-buffer of current bud (A). But later this was 540 * changed and UBIFS started to always synchronize the write-buffer of 541 * the bud (A) before writing the log reference for the new bud (B). 542 * 543 * But because older UBIFS always synchronized A's write-buffer before 544 * writing to B, we can recognize this exceptional situation but 545 * checking the contents of bud B - if it is empty, then A can be 546 * treated as the last and we can recover it. 547 * 548 * TODO: remove this piece of code in a couple of years (today it is 549 * 16.05.2011). 550 */ 551 next = list_entry(bud->list.next, struct ubifs_bud, list); 552 if (!list_is_last(&next->list, &jh->buds_list)) 553 return 0; 554 555 err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); 556 if (err) 557 return 0; 558 559 return data == 0xFFFFFFFF; 560 } 561 562 /* authenticate_sleb_hash is split out for stack usage */ 563 static int noinline_for_stack 564 authenticate_sleb_hash(struct ubifs_info *c, 565 struct shash_desc *log_hash, u8 *hash) 566 { 567 SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm); 568 569 hash_desc->tfm = c->hash_tfm; 570 571 ubifs_shash_copy_state(c, log_hash, hash_desc); 572 return crypto_shash_final(hash_desc, hash); 573 } 574 575 /** 576 * authenticate_sleb - authenticate one scan LEB 577 * @c: UBIFS file-system description object 578 * @sleb: the scan LEB to authenticate 579 * @log_hash: 580 * @is_last: if true, this is the last LEB 581 * 582 * This function iterates over the buds of a single LEB authenticating all buds 583 * with the authentication nodes on this LEB. Authentication nodes are written 584 * after some buds and contain a HMAC covering the authentication node itself 585 * and the buds between the last authentication node and the current 586 * authentication node. It can happen that the last buds cannot be authenticated 587 * because a powercut happened when some nodes were written but not the 588 * corresponding authentication node. This function returns the number of nodes 589 * that could be authenticated or a negative error code. 590 */ 591 static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, 592 struct shash_desc *log_hash, int is_last) 593 { 594 int n_not_auth = 0; 595 struct ubifs_scan_node *snod; 596 int n_nodes = 0; 597 int err; 598 u8 hash[UBIFS_HASH_ARR_SZ]; 599 u8 hmac[UBIFS_HMAC_ARR_SZ]; 600 601 if (!ubifs_authenticated(c)) 602 return sleb->nodes_cnt; 603 604 list_for_each_entry(snod, &sleb->nodes, list) { 605 606 n_nodes++; 607 608 if (snod->type == UBIFS_AUTH_NODE) { 609 struct ubifs_auth_node *auth = snod->node; 610 611 err = authenticate_sleb_hash(c, log_hash, hash); 612 if (err) 613 goto out; 614 615 err = crypto_shash_tfm_digest(c->hmac_tfm, hash, 616 c->hash_len, hmac); 617 if (err) 618 goto out; 619 620 err = ubifs_check_hmac(c, auth->hmac, hmac); 621 if (err) { 622 err = -EPERM; 623 goto out; 624 } 625 n_not_auth = 0; 626 } else { 627 err = crypto_shash_update(log_hash, snod->node, 628 snod->len); 629 if (err) 630 goto out; 631 n_not_auth++; 632 } 633 } 634 635 /* 636 * A powercut can happen when some nodes were written, but not yet 637 * the corresponding authentication node. This may only happen on 638 * the last bud though. 639 */ 640 if (n_not_auth) { 641 if (is_last) { 642 dbg_mnt("%d unauthenticated nodes found on LEB %d, Ignoring them", 643 n_not_auth, sleb->lnum); 644 err = 0; 645 } else { 646 dbg_mnt("%d unauthenticated nodes found on non-last LEB %d", 647 n_not_auth, sleb->lnum); 648 err = -EPERM; 649 } 650 } else { 651 err = 0; 652 } 653 out: 654 return err ? err : n_nodes - n_not_auth; 655 } 656 657 /** 658 * replay_bud - replay a bud logical eraseblock. 659 * @c: UBIFS file-system description object 660 * @b: bud entry which describes the bud 661 * 662 * This function replays bud @bud, recovers it if needed, and adds all nodes 663 * from this bud to the replay list. Returns zero in case of success and a 664 * negative error code in case of failure. 665 */ 666 static int replay_bud(struct ubifs_info *c, struct bud_entry *b) 667 { 668 int is_last = is_last_bud(c, b->bud); 669 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; 670 int n_nodes, n = 0; 671 struct ubifs_scan_leb *sleb; 672 struct ubifs_scan_node *snod; 673 674 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", 675 lnum, b->bud->jhead, offs, is_last); 676 677 if (c->need_recovery && is_last) 678 /* 679 * Recover only last LEBs in the journal heads, because power 680 * cuts may cause corruptions only in these LEBs, because only 681 * these LEBs could possibly be written to at the power cut 682 * time. 683 */ 684 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); 685 else 686 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 687 if (IS_ERR(sleb)) 688 return PTR_ERR(sleb); 689 690 n_nodes = authenticate_sleb(c, sleb, b->bud->log_hash, is_last); 691 if (n_nodes < 0) { 692 err = n_nodes; 693 goto out; 694 } 695 696 ubifs_shash_copy_state(c, b->bud->log_hash, 697 c->jheads[b->bud->jhead].log_hash); 698 699 /* 700 * The bud does not have to start from offset zero - the beginning of 701 * the 'lnum' LEB may contain previously committed data. One of the 702 * things we have to do in replay is to correctly update lprops with 703 * newer information about this LEB. 704 * 705 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 706 * bytes of free space because it only contain information about 707 * committed data. 708 * 709 * But we know that real amount of free space is 'c->leb_size - 710 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 711 * 'sleb->endpt' is used by bud data. We have to correctly calculate 712 * how much of these data are dirty and update lprops with this 713 * information. 714 * 715 * The dirt in that LEB region is comprised of padding nodes, deletion 716 * nodes, truncation nodes and nodes which are obsoleted by subsequent 717 * nodes in this LEB. So instead of calculating clean space, we 718 * calculate used space ('used' variable). 719 */ 720 721 list_for_each_entry(snod, &sleb->nodes, list) { 722 u8 hash[UBIFS_HASH_ARR_SZ]; 723 int deletion = 0; 724 725 cond_resched(); 726 727 if (snod->sqnum >= SQNUM_WATERMARK) { 728 ubifs_err(c, "file system's life ended"); 729 goto out_dump; 730 } 731 732 ubifs_node_calc_hash(c, snod->node, hash); 733 734 if (snod->sqnum > c->max_sqnum) 735 c->max_sqnum = snod->sqnum; 736 737 switch (snod->type) { 738 case UBIFS_INO_NODE: 739 { 740 struct ubifs_ino_node *ino = snod->node; 741 loff_t new_size = le64_to_cpu(ino->size); 742 743 if (le32_to_cpu(ino->nlink) == 0) 744 deletion = 1; 745 err = insert_node(c, lnum, snod->offs, snod->len, hash, 746 &snod->key, snod->sqnum, deletion, 747 &used, 0, new_size); 748 break; 749 } 750 case UBIFS_DATA_NODE: 751 { 752 struct ubifs_data_node *dn = snod->node; 753 loff_t new_size = le32_to_cpu(dn->size) + 754 key_block(c, &snod->key) * 755 UBIFS_BLOCK_SIZE; 756 757 err = insert_node(c, lnum, snod->offs, snod->len, hash, 758 &snod->key, snod->sqnum, deletion, 759 &used, 0, new_size); 760 break; 761 } 762 case UBIFS_DENT_NODE: 763 case UBIFS_XENT_NODE: 764 { 765 struct ubifs_dent_node *dent = snod->node; 766 767 err = ubifs_validate_entry(c, dent); 768 if (err) 769 goto out_dump; 770 771 err = insert_dent(c, lnum, snod->offs, snod->len, hash, 772 &snod->key, dent->name, 773 le16_to_cpu(dent->nlen), snod->sqnum, 774 !le64_to_cpu(dent->inum), &used); 775 break; 776 } 777 case UBIFS_TRUN_NODE: 778 { 779 struct ubifs_trun_node *trun = snod->node; 780 loff_t old_size = le64_to_cpu(trun->old_size); 781 loff_t new_size = le64_to_cpu(trun->new_size); 782 union ubifs_key key; 783 784 /* Validate truncation node */ 785 if (old_size < 0 || old_size > c->max_inode_sz || 786 new_size < 0 || new_size > c->max_inode_sz || 787 old_size <= new_size) { 788 ubifs_err(c, "bad truncation node"); 789 goto out_dump; 790 } 791 792 /* 793 * Create a fake truncation key just to use the same 794 * functions which expect nodes to have keys. 795 */ 796 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 797 err = insert_node(c, lnum, snod->offs, snod->len, hash, 798 &key, snod->sqnum, 1, &used, 799 old_size, new_size); 800 break; 801 } 802 case UBIFS_AUTH_NODE: 803 break; 804 default: 805 ubifs_err(c, "unexpected node type %d in bud LEB %d:%d", 806 snod->type, lnum, snod->offs); 807 err = -EINVAL; 808 goto out_dump; 809 } 810 if (err) 811 goto out; 812 813 n++; 814 if (n == n_nodes) 815 break; 816 } 817 818 ubifs_assert(c, ubifs_search_bud(c, lnum)); 819 ubifs_assert(c, sleb->endpt - offs >= used); 820 ubifs_assert(c, sleb->endpt % c->min_io_size == 0); 821 822 b->dirty = sleb->endpt - offs - used; 823 b->free = c->leb_size - sleb->endpt; 824 dbg_mnt("bud LEB %d replied: dirty %d, free %d", 825 lnum, b->dirty, b->free); 826 827 out: 828 ubifs_scan_destroy(sleb); 829 return err; 830 831 out_dump: 832 ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); 833 ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); 834 ubifs_scan_destroy(sleb); 835 return -EINVAL; 836 } 837 838 /** 839 * replay_buds - replay all buds. 840 * @c: UBIFS file-system description object 841 * 842 * This function returns zero in case of success and a negative error code in 843 * case of failure. 844 */ 845 static int replay_buds(struct ubifs_info *c) 846 { 847 struct bud_entry *b; 848 int err; 849 unsigned long long prev_sqnum = 0; 850 851 list_for_each_entry(b, &c->replay_buds, list) { 852 err = replay_bud(c, b); 853 if (err) 854 return err; 855 856 ubifs_assert(c, b->sqnum > prev_sqnum); 857 prev_sqnum = b->sqnum; 858 } 859 860 return 0; 861 } 862 863 /** 864 * destroy_bud_list - destroy the list of buds to replay. 865 * @c: UBIFS file-system description object 866 */ 867 static void destroy_bud_list(struct ubifs_info *c) 868 { 869 struct bud_entry *b; 870 871 while (!list_empty(&c->replay_buds)) { 872 b = list_entry(c->replay_buds.next, struct bud_entry, list); 873 list_del(&b->list); 874 kfree(b); 875 } 876 } 877 878 /** 879 * add_replay_bud - add a bud to the list of buds to replay. 880 * @c: UBIFS file-system description object 881 * @lnum: bud logical eraseblock number to replay 882 * @offs: bud start offset 883 * @jhead: journal head to which this bud belongs 884 * @sqnum: reference node sequence number 885 * 886 * This function returns zero in case of success and a negative error code in 887 * case of failure. 888 */ 889 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 890 unsigned long long sqnum) 891 { 892 struct ubifs_bud *bud; 893 struct bud_entry *b; 894 int err; 895 896 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 897 898 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 899 if (!bud) 900 return -ENOMEM; 901 902 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 903 if (!b) { 904 err = -ENOMEM; 905 goto out; 906 } 907 908 bud->lnum = lnum; 909 bud->start = offs; 910 bud->jhead = jhead; 911 bud->log_hash = ubifs_hash_get_desc(c); 912 if (IS_ERR(bud->log_hash)) { 913 err = PTR_ERR(bud->log_hash); 914 goto out; 915 } 916 917 ubifs_shash_copy_state(c, c->log_hash, bud->log_hash); 918 919 ubifs_add_bud(c, bud); 920 921 b->bud = bud; 922 b->sqnum = sqnum; 923 list_add_tail(&b->list, &c->replay_buds); 924 925 return 0; 926 out: 927 kfree(bud); 928 kfree(b); 929 930 return err; 931 } 932 933 /** 934 * validate_ref - validate a reference node. 935 * @c: UBIFS file-system description object 936 * @ref: the reference node to validate 937 * 938 * This function returns %1 if a bud reference already exists for the LEB. %0 is 939 * returned if the reference node is new, otherwise %-EINVAL is returned if 940 * validation failed. 941 */ 942 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 943 { 944 struct ubifs_bud *bud; 945 int lnum = le32_to_cpu(ref->lnum); 946 unsigned int offs = le32_to_cpu(ref->offs); 947 unsigned int jhead = le32_to_cpu(ref->jhead); 948 949 /* 950 * ref->offs may point to the end of LEB when the journal head points 951 * to the end of LEB and we write reference node for it during commit. 952 * So this is why we require 'offs > c->leb_size'. 953 */ 954 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 955 lnum < c->main_first || offs > c->leb_size || 956 offs & (c->min_io_size - 1)) 957 return -EINVAL; 958 959 /* Make sure we have not already looked at this bud */ 960 bud = ubifs_search_bud(c, lnum); 961 if (bud) { 962 if (bud->jhead == jhead && bud->start <= offs) 963 return 1; 964 ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs); 965 return -EINVAL; 966 } 967 968 return 0; 969 } 970 971 /** 972 * replay_log_leb - replay a log logical eraseblock. 973 * @c: UBIFS file-system description object 974 * @lnum: log logical eraseblock to replay 975 * @offs: offset to start replaying from 976 * @sbuf: scan buffer 977 * 978 * This function replays a log LEB and returns zero in case of success, %1 if 979 * this is the last LEB in the log, and a negative error code in case of 980 * failure. 981 */ 982 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 983 { 984 int err; 985 struct ubifs_scan_leb *sleb; 986 struct ubifs_scan_node *snod; 987 const struct ubifs_cs_node *node; 988 989 dbg_mnt("replay log LEB %d:%d", lnum, offs); 990 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); 991 if (IS_ERR(sleb)) { 992 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 993 return PTR_ERR(sleb); 994 /* 995 * Note, the below function will recover this log LEB only if 996 * it is the last, because unclean reboots can possibly corrupt 997 * only the tail of the log. 998 */ 999 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 1000 if (IS_ERR(sleb)) 1001 return PTR_ERR(sleb); 1002 } 1003 1004 if (sleb->nodes_cnt == 0) { 1005 err = 1; 1006 goto out; 1007 } 1008 1009 node = sleb->buf; 1010 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 1011 if (c->cs_sqnum == 0) { 1012 /* 1013 * This is the first log LEB we are looking at, make sure that 1014 * the first node is a commit start node. Also record its 1015 * sequence number so that UBIFS can determine where the log 1016 * ends, because all nodes which were have higher sequence 1017 * numbers. 1018 */ 1019 if (snod->type != UBIFS_CS_NODE) { 1020 ubifs_err(c, "first log node at LEB %d:%d is not CS node", 1021 lnum, offs); 1022 goto out_dump; 1023 } 1024 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 1025 ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu", 1026 lnum, offs, 1027 (unsigned long long)le64_to_cpu(node->cmt_no), 1028 c->cmt_no); 1029 goto out_dump; 1030 } 1031 1032 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 1033 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 1034 1035 err = ubifs_shash_init(c, c->log_hash); 1036 if (err) 1037 goto out; 1038 1039 err = ubifs_shash_update(c, c->log_hash, node, UBIFS_CS_NODE_SZ); 1040 if (err < 0) 1041 goto out; 1042 } 1043 1044 if (snod->sqnum < c->cs_sqnum) { 1045 /* 1046 * This means that we reached end of log and now 1047 * look to the older log data, which was already 1048 * committed but the eraseblock was not erased (UBIFS 1049 * only un-maps it). So this basically means we have to 1050 * exit with "end of log" code. 1051 */ 1052 err = 1; 1053 goto out; 1054 } 1055 1056 /* Make sure the first node sits at offset zero of the LEB */ 1057 if (snod->offs != 0) { 1058 ubifs_err(c, "first node is not at zero offset"); 1059 goto out_dump; 1060 } 1061 1062 list_for_each_entry(snod, &sleb->nodes, list) { 1063 cond_resched(); 1064 1065 if (snod->sqnum >= SQNUM_WATERMARK) { 1066 ubifs_err(c, "file system's life ended"); 1067 goto out_dump; 1068 } 1069 1070 if (snod->sqnum < c->cs_sqnum) { 1071 ubifs_err(c, "bad sqnum %llu, commit sqnum %llu", 1072 snod->sqnum, c->cs_sqnum); 1073 goto out_dump; 1074 } 1075 1076 if (snod->sqnum > c->max_sqnum) 1077 c->max_sqnum = snod->sqnum; 1078 1079 switch (snod->type) { 1080 case UBIFS_REF_NODE: { 1081 const struct ubifs_ref_node *ref = snod->node; 1082 1083 err = validate_ref(c, ref); 1084 if (err == 1) 1085 break; /* Already have this bud */ 1086 if (err) 1087 goto out_dump; 1088 1089 err = ubifs_shash_update(c, c->log_hash, ref, 1090 UBIFS_REF_NODE_SZ); 1091 if (err) 1092 goto out; 1093 1094 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 1095 le32_to_cpu(ref->offs), 1096 le32_to_cpu(ref->jhead), 1097 snod->sqnum); 1098 if (err) 1099 goto out; 1100 1101 break; 1102 } 1103 case UBIFS_CS_NODE: 1104 /* Make sure it sits at the beginning of LEB */ 1105 if (snod->offs != 0) { 1106 ubifs_err(c, "unexpected node in log"); 1107 goto out_dump; 1108 } 1109 break; 1110 default: 1111 ubifs_err(c, "unexpected node in log"); 1112 goto out_dump; 1113 } 1114 } 1115 1116 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 1117 c->lhead_lnum = lnum; 1118 c->lhead_offs = sleb->endpt; 1119 } 1120 1121 err = !sleb->endpt; 1122 out: 1123 ubifs_scan_destroy(sleb); 1124 return err; 1125 1126 out_dump: 1127 ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", 1128 lnum, offs + snod->offs); 1129 ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); 1130 ubifs_scan_destroy(sleb); 1131 return -EINVAL; 1132 } 1133 1134 /** 1135 * take_ihead - update the status of the index head in lprops to 'taken'. 1136 * @c: UBIFS file-system description object 1137 * 1138 * This function returns the amount of free space in the index head LEB or a 1139 * negative error code. 1140 */ 1141 static int take_ihead(struct ubifs_info *c) 1142 { 1143 const struct ubifs_lprops *lp; 1144 int err, free; 1145 1146 ubifs_get_lprops(c); 1147 1148 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 1149 if (IS_ERR(lp)) { 1150 err = PTR_ERR(lp); 1151 goto out; 1152 } 1153 1154 free = lp->free; 1155 1156 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 1157 lp->flags | LPROPS_TAKEN, 0); 1158 if (IS_ERR(lp)) { 1159 err = PTR_ERR(lp); 1160 goto out; 1161 } 1162 1163 err = free; 1164 out: 1165 ubifs_release_lprops(c); 1166 return err; 1167 } 1168 1169 /** 1170 * ubifs_replay_journal - replay journal. 1171 * @c: UBIFS file-system description object 1172 * 1173 * This function scans the journal, replays and cleans it up. It makes sure all 1174 * memory data structures related to uncommitted journal are built (dirty TNC 1175 * tree, tree of buds, modified lprops, etc). 1176 */ 1177 int ubifs_replay_journal(struct ubifs_info *c) 1178 { 1179 int err, lnum, free; 1180 1181 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1182 1183 /* Update the status of the index head in lprops to 'taken' */ 1184 free = take_ihead(c); 1185 if (free < 0) 1186 return free; /* Error code */ 1187 1188 if (c->ihead_offs != c->leb_size - free) { 1189 ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum, 1190 c->ihead_offs); 1191 return -EINVAL; 1192 } 1193 1194 dbg_mnt("start replaying the journal"); 1195 c->replaying = 1; 1196 lnum = c->ltail_lnum = c->lhead_lnum; 1197 1198 do { 1199 err = replay_log_leb(c, lnum, 0, c->sbuf); 1200 if (err == 1) { 1201 if (lnum != c->lhead_lnum) 1202 /* We hit the end of the log */ 1203 break; 1204 1205 /* 1206 * The head of the log must always start with the 1207 * "commit start" node on a properly formatted UBIFS. 1208 * But we found no nodes at all, which means that 1209 * something went wrong and we cannot proceed mounting 1210 * the file-system. 1211 */ 1212 ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", 1213 lnum, 0); 1214 err = -EINVAL; 1215 } 1216 if (err) 1217 goto out; 1218 lnum = ubifs_next_log_lnum(c, lnum); 1219 } while (lnum != c->ltail_lnum); 1220 1221 err = replay_buds(c); 1222 if (err) 1223 goto out; 1224 1225 err = apply_replay_list(c); 1226 if (err) 1227 goto out; 1228 1229 err = set_buds_lprops(c); 1230 if (err) 1231 goto out; 1232 1233 /* 1234 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable 1235 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs 1236 * depend on it. This means we have to initialize it to make sure 1237 * budgeting works properly. 1238 */ 1239 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1240 c->bi.uncommitted_idx *= c->max_idx_node_sz; 1241 1242 ubifs_assert(c, c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1243 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", 1244 c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1245 (unsigned long)c->highest_inum); 1246 out: 1247 destroy_replay_list(c); 1248 destroy_bud_list(c); 1249 c->replaying = 0; 1250 return err; 1251 } 1252