1 /* 2 * This file is part of UBIFS. 3 * 4 * Copyright (C) 2006-2008 Nokia Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published by 8 * the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program; if not, write to the Free Software Foundation, Inc., 51 17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 * 19 * Authors: Adrian Hunter 20 * Artem Bityutskiy (Битюцкий Артём) 21 */ 22 23 /* 24 * This file contains journal replay code. It runs when the file-system is being 25 * mounted and requires no locking. 26 * 27 * The larger is the journal, the longer it takes to scan it, so the longer it 28 * takes to mount UBIFS. This is why the journal has limited size which may be 29 * changed depending on the system requirements. But a larger journal gives 30 * faster I/O speed because it writes the index less frequently. So this is a 31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 32 * larger is the journal, the more memory its index may consume. 33 */ 34 35 #include "ubifs.h" 36 #include <linux/list_sort.h> 37 #include <crypto/hash.h> 38 #include <crypto/algapi.h> 39 40 /** 41 * struct replay_entry - replay list entry. 42 * @lnum: logical eraseblock number of the node 43 * @offs: node offset 44 * @len: node length 45 * @deletion: non-zero if this entry corresponds to a node deletion 46 * @sqnum: node sequence number 47 * @list: links the replay list 48 * @key: node key 49 * @nm: directory entry name 50 * @old_size: truncation old size 51 * @new_size: truncation new size 52 * 53 * The replay process first scans all buds and builds the replay list, then 54 * sorts the replay list in nodes sequence number order, and then inserts all 55 * the replay entries to the TNC. 56 */ 57 struct replay_entry { 58 int lnum; 59 int offs; 60 int len; 61 u8 hash[UBIFS_HASH_ARR_SZ]; 62 unsigned int deletion:1; 63 unsigned long long sqnum; 64 struct list_head list; 65 union ubifs_key key; 66 union { 67 struct fscrypt_name nm; 68 struct { 69 loff_t old_size; 70 loff_t new_size; 71 }; 72 }; 73 }; 74 75 /** 76 * struct bud_entry - entry in the list of buds to replay. 77 * @list: next bud in the list 78 * @bud: bud description object 79 * @sqnum: reference node sequence number 80 * @free: free bytes in the bud 81 * @dirty: dirty bytes in the bud 82 */ 83 struct bud_entry { 84 struct list_head list; 85 struct ubifs_bud *bud; 86 unsigned long long sqnum; 87 int free; 88 int dirty; 89 }; 90 91 /** 92 * set_bud_lprops - set free and dirty space used by a bud. 93 * @c: UBIFS file-system description object 94 * @b: bud entry which describes the bud 95 * 96 * This function makes sure the LEB properties of bud @b are set correctly 97 * after the replay. Returns zero in case of success and a negative error code 98 * in case of failure. 99 */ 100 static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) 101 { 102 const struct ubifs_lprops *lp; 103 int err = 0, dirty; 104 105 ubifs_get_lprops(c); 106 107 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); 108 if (IS_ERR(lp)) { 109 err = PTR_ERR(lp); 110 goto out; 111 } 112 113 dirty = lp->dirty; 114 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 115 /* 116 * The LEB was added to the journal with a starting offset of 117 * zero which means the LEB must have been empty. The LEB 118 * property values should be @lp->free == @c->leb_size and 119 * @lp->dirty == 0, but that is not the case. The reason is that 120 * the LEB had been garbage collected before it became the bud, 121 * and there was not commit inbetween. The garbage collector 122 * resets the free and dirty space without recording it 123 * anywhere except lprops, so if there was no commit then 124 * lprops does not have that information. 125 * 126 * We do not need to adjust free space because the scan has told 127 * us the exact value which is recorded in the replay entry as 128 * @b->free. 129 * 130 * However we do need to subtract from the dirty space the 131 * amount of space that the garbage collector reclaimed, which 132 * is the whole LEB minus the amount of space that was free. 133 */ 134 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 135 lp->free, lp->dirty); 136 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, 137 lp->free, lp->dirty); 138 dirty -= c->leb_size - lp->free; 139 /* 140 * If the replay order was perfect the dirty space would now be 141 * zero. The order is not perfect because the journal heads 142 * race with each other. This is not a problem but is does mean 143 * that the dirty space may temporarily exceed c->leb_size 144 * during the replay. 145 */ 146 if (dirty != 0) 147 dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", 148 b->bud->lnum, lp->free, lp->dirty, b->free, 149 b->dirty); 150 } 151 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, 152 lp->flags | LPROPS_TAKEN, 0); 153 if (IS_ERR(lp)) { 154 err = PTR_ERR(lp); 155 goto out; 156 } 157 158 /* Make sure the journal head points to the latest bud */ 159 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, 160 b->bud->lnum, c->leb_size - b->free); 161 162 out: 163 ubifs_release_lprops(c); 164 return err; 165 } 166 167 /** 168 * set_buds_lprops - set free and dirty space for all replayed buds. 169 * @c: UBIFS file-system description object 170 * 171 * This function sets LEB properties for all replayed buds. Returns zero in 172 * case of success and a negative error code in case of failure. 173 */ 174 static int set_buds_lprops(struct ubifs_info *c) 175 { 176 struct bud_entry *b; 177 int err; 178 179 list_for_each_entry(b, &c->replay_buds, list) { 180 err = set_bud_lprops(c, b); 181 if (err) 182 return err; 183 } 184 185 return 0; 186 } 187 188 /** 189 * trun_remove_range - apply a replay entry for a truncation to the TNC. 190 * @c: UBIFS file-system description object 191 * @r: replay entry of truncation 192 */ 193 static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 194 { 195 unsigned min_blk, max_blk; 196 union ubifs_key min_key, max_key; 197 ino_t ino; 198 199 min_blk = r->new_size / UBIFS_BLOCK_SIZE; 200 if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 201 min_blk += 1; 202 203 max_blk = r->old_size / UBIFS_BLOCK_SIZE; 204 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 205 max_blk -= 1; 206 207 ino = key_inum(c, &r->key); 208 209 data_key_init(c, &min_key, ino, min_blk); 210 data_key_init(c, &max_key, ino, max_blk); 211 212 return ubifs_tnc_remove_range(c, &min_key, &max_key); 213 } 214 215 /** 216 * apply_replay_entry - apply a replay entry to the TNC. 217 * @c: UBIFS file-system description object 218 * @r: replay entry to apply 219 * 220 * Apply a replay entry to the TNC. 221 */ 222 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 223 { 224 int err; 225 226 dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", 227 r->lnum, r->offs, r->len, r->deletion, r->sqnum); 228 229 if (is_hash_key(c, &r->key)) { 230 if (r->deletion) 231 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 232 else 233 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 234 r->len, r->hash, &r->nm); 235 } else { 236 if (r->deletion) 237 switch (key_type(c, &r->key)) { 238 case UBIFS_INO_KEY: 239 { 240 ino_t inum = key_inum(c, &r->key); 241 242 err = ubifs_tnc_remove_ino(c, inum); 243 break; 244 } 245 case UBIFS_TRUN_KEY: 246 err = trun_remove_range(c, r); 247 break; 248 default: 249 err = ubifs_tnc_remove(c, &r->key); 250 break; 251 } 252 else 253 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 254 r->len, r->hash); 255 if (err) 256 return err; 257 258 if (c->need_recovery) 259 err = ubifs_recover_size_accum(c, &r->key, r->deletion, 260 r->new_size); 261 } 262 263 return err; 264 } 265 266 /** 267 * replay_entries_cmp - compare 2 replay entries. 268 * @priv: UBIFS file-system description object 269 * @a: first replay entry 270 * @b: second replay entry 271 * 272 * This is a comparios function for 'list_sort()' which compares 2 replay 273 * entries @a and @b by comparing their sequence numer. Returns %1 if @a has 274 * greater sequence number and %-1 otherwise. 275 */ 276 static int replay_entries_cmp(void *priv, struct list_head *a, 277 struct list_head *b) 278 { 279 struct ubifs_info *c = priv; 280 struct replay_entry *ra, *rb; 281 282 cond_resched(); 283 if (a == b) 284 return 0; 285 286 ra = list_entry(a, struct replay_entry, list); 287 rb = list_entry(b, struct replay_entry, list); 288 ubifs_assert(c, ra->sqnum != rb->sqnum); 289 if (ra->sqnum > rb->sqnum) 290 return 1; 291 return -1; 292 } 293 294 /** 295 * apply_replay_list - apply the replay list to the TNC. 296 * @c: UBIFS file-system description object 297 * 298 * Apply all entries in the replay list to the TNC. Returns zero in case of 299 * success and a negative error code in case of failure. 300 */ 301 static int apply_replay_list(struct ubifs_info *c) 302 { 303 struct replay_entry *r; 304 int err; 305 306 list_sort(c, &c->replay_list, &replay_entries_cmp); 307 308 list_for_each_entry(r, &c->replay_list, list) { 309 cond_resched(); 310 311 err = apply_replay_entry(c, r); 312 if (err) 313 return err; 314 } 315 316 return 0; 317 } 318 319 /** 320 * destroy_replay_list - destroy the replay. 321 * @c: UBIFS file-system description object 322 * 323 * Destroy the replay list. 324 */ 325 static void destroy_replay_list(struct ubifs_info *c) 326 { 327 struct replay_entry *r, *tmp; 328 329 list_for_each_entry_safe(r, tmp, &c->replay_list, list) { 330 if (is_hash_key(c, &r->key)) 331 kfree(fname_name(&r->nm)); 332 list_del(&r->list); 333 kfree(r); 334 } 335 } 336 337 /** 338 * insert_node - insert a node to the replay list 339 * @c: UBIFS file-system description object 340 * @lnum: node logical eraseblock number 341 * @offs: node offset 342 * @len: node length 343 * @key: node key 344 * @sqnum: sequence number 345 * @deletion: non-zero if this is a deletion 346 * @used: number of bytes in use in a LEB 347 * @old_size: truncation old size 348 * @new_size: truncation new size 349 * 350 * This function inserts a scanned non-direntry node to the replay list. The 351 * replay list contains @struct replay_entry elements, and we sort this list in 352 * sequence number order before applying it. The replay list is applied at the 353 * very end of the replay process. Since the list is sorted in sequence number 354 * order, the older modifications are applied first. This function returns zero 355 * in case of success and a negative error code in case of failure. 356 */ 357 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 358 const u8 *hash, union ubifs_key *key, 359 unsigned long long sqnum, int deletion, int *used, 360 loff_t old_size, loff_t new_size) 361 { 362 struct replay_entry *r; 363 364 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 365 366 if (key_inum(c, key) >= c->highest_inum) 367 c->highest_inum = key_inum(c, key); 368 369 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 370 if (!r) 371 return -ENOMEM; 372 373 if (!deletion) 374 *used += ALIGN(len, 8); 375 r->lnum = lnum; 376 r->offs = offs; 377 r->len = len; 378 ubifs_copy_hash(c, hash, r->hash); 379 r->deletion = !!deletion; 380 r->sqnum = sqnum; 381 key_copy(c, key, &r->key); 382 r->old_size = old_size; 383 r->new_size = new_size; 384 385 list_add_tail(&r->list, &c->replay_list); 386 return 0; 387 } 388 389 /** 390 * insert_dent - insert a directory entry node into the replay list. 391 * @c: UBIFS file-system description object 392 * @lnum: node logical eraseblock number 393 * @offs: node offset 394 * @len: node length 395 * @key: node key 396 * @name: directory entry name 397 * @nlen: directory entry name length 398 * @sqnum: sequence number 399 * @deletion: non-zero if this is a deletion 400 * @used: number of bytes in use in a LEB 401 * 402 * This function inserts a scanned directory entry node or an extended 403 * attribute entry to the replay list. Returns zero in case of success and a 404 * negative error code in case of failure. 405 */ 406 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 407 const u8 *hash, union ubifs_key *key, 408 const char *name, int nlen, unsigned long long sqnum, 409 int deletion, int *used) 410 { 411 struct replay_entry *r; 412 char *nbuf; 413 414 dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); 415 if (key_inum(c, key) >= c->highest_inum) 416 c->highest_inum = key_inum(c, key); 417 418 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 419 if (!r) 420 return -ENOMEM; 421 422 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 423 if (!nbuf) { 424 kfree(r); 425 return -ENOMEM; 426 } 427 428 if (!deletion) 429 *used += ALIGN(len, 8); 430 r->lnum = lnum; 431 r->offs = offs; 432 r->len = len; 433 ubifs_copy_hash(c, hash, r->hash); 434 r->deletion = !!deletion; 435 r->sqnum = sqnum; 436 key_copy(c, key, &r->key); 437 fname_len(&r->nm) = nlen; 438 memcpy(nbuf, name, nlen); 439 nbuf[nlen] = '\0'; 440 fname_name(&r->nm) = nbuf; 441 442 list_add_tail(&r->list, &c->replay_list); 443 return 0; 444 } 445 446 /** 447 * ubifs_validate_entry - validate directory or extended attribute entry node. 448 * @c: UBIFS file-system description object 449 * @dent: the node to validate 450 * 451 * This function validates directory or extended attribute entry node @dent. 452 * Returns zero if the node is all right and a %-EINVAL if not. 453 */ 454 int ubifs_validate_entry(struct ubifs_info *c, 455 const struct ubifs_dent_node *dent) 456 { 457 int key_type = key_type_flash(c, dent->key); 458 int nlen = le16_to_cpu(dent->nlen); 459 460 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 461 dent->type >= UBIFS_ITYPES_CNT || 462 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 463 (key_type == UBIFS_XENT_KEY && strnlen(dent->name, nlen) != nlen) || 464 le64_to_cpu(dent->inum) > MAX_INUM) { 465 ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ? 466 "directory entry" : "extended attribute entry"); 467 return -EINVAL; 468 } 469 470 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 471 ubifs_err(c, "bad key type %d", key_type); 472 return -EINVAL; 473 } 474 475 return 0; 476 } 477 478 /** 479 * is_last_bud - check if the bud is the last in the journal head. 480 * @c: UBIFS file-system description object 481 * @bud: bud description object 482 * 483 * This function checks if bud @bud is the last bud in its journal head. This 484 * information is then used by 'replay_bud()' to decide whether the bud can 485 * have corruptions or not. Indeed, only last buds can be corrupted by power 486 * cuts. Returns %1 if this is the last bud, and %0 if not. 487 */ 488 static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) 489 { 490 struct ubifs_jhead *jh = &c->jheads[bud->jhead]; 491 struct ubifs_bud *next; 492 uint32_t data; 493 int err; 494 495 if (list_is_last(&bud->list, &jh->buds_list)) 496 return 1; 497 498 /* 499 * The following is a quirk to make sure we work correctly with UBIFS 500 * images used with older UBIFS. 501 * 502 * Normally, the last bud will be the last in the journal head's list 503 * of bud. However, there is one exception if the UBIFS image belongs 504 * to older UBIFS. This is fairly unlikely: one would need to use old 505 * UBIFS, then have a power cut exactly at the right point, and then 506 * try to mount this image with new UBIFS. 507 * 508 * The exception is: it is possible to have 2 buds A and B, A goes 509 * before B, and B is the last, bud B is contains no data, and bud A is 510 * corrupted at the end. The reason is that in older versions when the 511 * journal code switched the next bud (from A to B), it first added a 512 * log reference node for the new bud (B), and only after this it 513 * synchronized the write-buffer of current bud (A). But later this was 514 * changed and UBIFS started to always synchronize the write-buffer of 515 * the bud (A) before writing the log reference for the new bud (B). 516 * 517 * But because older UBIFS always synchronized A's write-buffer before 518 * writing to B, we can recognize this exceptional situation but 519 * checking the contents of bud B - if it is empty, then A can be 520 * treated as the last and we can recover it. 521 * 522 * TODO: remove this piece of code in a couple of years (today it is 523 * 16.05.2011). 524 */ 525 next = list_entry(bud->list.next, struct ubifs_bud, list); 526 if (!list_is_last(&next->list, &jh->buds_list)) 527 return 0; 528 529 err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); 530 if (err) 531 return 0; 532 533 return data == 0xFFFFFFFF; 534 } 535 536 /** 537 * authenticate_sleb - authenticate one scan LEB 538 * @c: UBIFS file-system description object 539 * @sleb: the scan LEB to authenticate 540 * @log_hash: 541 * @is_last: if true, this is is the last LEB 542 * 543 * This function iterates over the buds of a single LEB authenticating all buds 544 * with the authentication nodes on this LEB. Authentication nodes are written 545 * after some buds and contain a HMAC covering the authentication node itself 546 * and the buds between the last authentication node and the current 547 * authentication node. It can happen that the last buds cannot be authenticated 548 * because a powercut happened when some nodes were written but not the 549 * corresponding authentication node. This function returns the number of nodes 550 * that could be authenticated or a negative error code. 551 */ 552 static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, 553 struct shash_desc *log_hash, int is_last) 554 { 555 int n_not_auth = 0; 556 struct ubifs_scan_node *snod; 557 int n_nodes = 0; 558 int err; 559 u8 *hash, *hmac; 560 561 if (!ubifs_authenticated(c)) 562 return sleb->nodes_cnt; 563 564 hash = kmalloc(crypto_shash_descsize(c->hash_tfm), GFP_NOFS); 565 hmac = kmalloc(c->hmac_desc_len, GFP_NOFS); 566 if (!hash || !hmac) { 567 err = -ENOMEM; 568 goto out; 569 } 570 571 list_for_each_entry(snod, &sleb->nodes, list) { 572 573 n_nodes++; 574 575 if (snod->type == UBIFS_AUTH_NODE) { 576 struct ubifs_auth_node *auth = snod->node; 577 SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm); 578 SHASH_DESC_ON_STACK(hmac_desc, c->hmac_tfm); 579 580 hash_desc->tfm = c->hash_tfm; 581 hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; 582 583 ubifs_shash_copy_state(c, log_hash, hash_desc); 584 err = crypto_shash_final(hash_desc, hash); 585 if (err) 586 goto out; 587 588 hmac_desc->tfm = c->hmac_tfm; 589 hmac_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; 590 err = crypto_shash_digest(hmac_desc, hash, c->hash_len, 591 hmac); 592 if (err) 593 goto out; 594 595 err = ubifs_check_hmac(c, auth->hmac, hmac); 596 if (err) { 597 err = -EPERM; 598 goto out; 599 } 600 n_not_auth = 0; 601 } else { 602 err = crypto_shash_update(log_hash, snod->node, 603 snod->len); 604 if (err) 605 goto out; 606 n_not_auth++; 607 } 608 } 609 610 /* 611 * A powercut can happen when some nodes were written, but not yet 612 * the corresponding authentication node. This may only happen on 613 * the last bud though. 614 */ 615 if (n_not_auth) { 616 if (is_last) { 617 dbg_mnt("%d unauthenticated nodes found on LEB %d, Ignoring them", 618 n_not_auth, sleb->lnum); 619 err = 0; 620 } else { 621 dbg_mnt("%d unauthenticated nodes found on non-last LEB %d", 622 n_not_auth, sleb->lnum); 623 err = -EPERM; 624 } 625 } else { 626 err = 0; 627 } 628 out: 629 kfree(hash); 630 kfree(hmac); 631 632 return err ? err : n_nodes - n_not_auth; 633 } 634 635 /** 636 * replay_bud - replay a bud logical eraseblock. 637 * @c: UBIFS file-system description object 638 * @b: bud entry which describes the bud 639 * 640 * This function replays bud @bud, recovers it if needed, and adds all nodes 641 * from this bud to the replay list. Returns zero in case of success and a 642 * negative error code in case of failure. 643 */ 644 static int replay_bud(struct ubifs_info *c, struct bud_entry *b) 645 { 646 int is_last = is_last_bud(c, b->bud); 647 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; 648 int n_nodes, n = 0; 649 struct ubifs_scan_leb *sleb; 650 struct ubifs_scan_node *snod; 651 652 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", 653 lnum, b->bud->jhead, offs, is_last); 654 655 if (c->need_recovery && is_last) 656 /* 657 * Recover only last LEBs in the journal heads, because power 658 * cuts may cause corruptions only in these LEBs, because only 659 * these LEBs could possibly be written to at the power cut 660 * time. 661 */ 662 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); 663 else 664 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 665 if (IS_ERR(sleb)) 666 return PTR_ERR(sleb); 667 668 n_nodes = authenticate_sleb(c, sleb, b->bud->log_hash, is_last); 669 if (n_nodes < 0) { 670 err = n_nodes; 671 goto out; 672 } 673 674 ubifs_shash_copy_state(c, b->bud->log_hash, 675 c->jheads[b->bud->jhead].log_hash); 676 677 /* 678 * The bud does not have to start from offset zero - the beginning of 679 * the 'lnum' LEB may contain previously committed data. One of the 680 * things we have to do in replay is to correctly update lprops with 681 * newer information about this LEB. 682 * 683 * At this point lprops thinks that this LEB has 'c->leb_size - offs' 684 * bytes of free space because it only contain information about 685 * committed data. 686 * 687 * But we know that real amount of free space is 'c->leb_size - 688 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 689 * 'sleb->endpt' is used by bud data. We have to correctly calculate 690 * how much of these data are dirty and update lprops with this 691 * information. 692 * 693 * The dirt in that LEB region is comprised of padding nodes, deletion 694 * nodes, truncation nodes and nodes which are obsoleted by subsequent 695 * nodes in this LEB. So instead of calculating clean space, we 696 * calculate used space ('used' variable). 697 */ 698 699 list_for_each_entry(snod, &sleb->nodes, list) { 700 u8 hash[UBIFS_HASH_ARR_SZ]; 701 int deletion = 0; 702 703 cond_resched(); 704 705 if (snod->sqnum >= SQNUM_WATERMARK) { 706 ubifs_err(c, "file system's life ended"); 707 goto out_dump; 708 } 709 710 ubifs_node_calc_hash(c, snod->node, hash); 711 712 if (snod->sqnum > c->max_sqnum) 713 c->max_sqnum = snod->sqnum; 714 715 switch (snod->type) { 716 case UBIFS_INO_NODE: 717 { 718 struct ubifs_ino_node *ino = snod->node; 719 loff_t new_size = le64_to_cpu(ino->size); 720 721 if (le32_to_cpu(ino->nlink) == 0) 722 deletion = 1; 723 err = insert_node(c, lnum, snod->offs, snod->len, hash, 724 &snod->key, snod->sqnum, deletion, 725 &used, 0, new_size); 726 break; 727 } 728 case UBIFS_DATA_NODE: 729 { 730 struct ubifs_data_node *dn = snod->node; 731 loff_t new_size = le32_to_cpu(dn->size) + 732 key_block(c, &snod->key) * 733 UBIFS_BLOCK_SIZE; 734 735 err = insert_node(c, lnum, snod->offs, snod->len, hash, 736 &snod->key, snod->sqnum, deletion, 737 &used, 0, new_size); 738 break; 739 } 740 case UBIFS_DENT_NODE: 741 case UBIFS_XENT_NODE: 742 { 743 struct ubifs_dent_node *dent = snod->node; 744 745 err = ubifs_validate_entry(c, dent); 746 if (err) 747 goto out_dump; 748 749 err = insert_dent(c, lnum, snod->offs, snod->len, hash, 750 &snod->key, dent->name, 751 le16_to_cpu(dent->nlen), snod->sqnum, 752 !le64_to_cpu(dent->inum), &used); 753 break; 754 } 755 case UBIFS_TRUN_NODE: 756 { 757 struct ubifs_trun_node *trun = snod->node; 758 loff_t old_size = le64_to_cpu(trun->old_size); 759 loff_t new_size = le64_to_cpu(trun->new_size); 760 union ubifs_key key; 761 762 /* Validate truncation node */ 763 if (old_size < 0 || old_size > c->max_inode_sz || 764 new_size < 0 || new_size > c->max_inode_sz || 765 old_size <= new_size) { 766 ubifs_err(c, "bad truncation node"); 767 goto out_dump; 768 } 769 770 /* 771 * Create a fake truncation key just to use the same 772 * functions which expect nodes to have keys. 773 */ 774 trun_key_init(c, &key, le32_to_cpu(trun->inum)); 775 err = insert_node(c, lnum, snod->offs, snod->len, hash, 776 &key, snod->sqnum, 1, &used, 777 old_size, new_size); 778 break; 779 } 780 case UBIFS_AUTH_NODE: 781 break; 782 default: 783 ubifs_err(c, "unexpected node type %d in bud LEB %d:%d", 784 snod->type, lnum, snod->offs); 785 err = -EINVAL; 786 goto out_dump; 787 } 788 if (err) 789 goto out; 790 791 n++; 792 if (n == n_nodes) 793 break; 794 } 795 796 ubifs_assert(c, ubifs_search_bud(c, lnum)); 797 ubifs_assert(c, sleb->endpt - offs >= used); 798 ubifs_assert(c, sleb->endpt % c->min_io_size == 0); 799 800 b->dirty = sleb->endpt - offs - used; 801 b->free = c->leb_size - sleb->endpt; 802 dbg_mnt("bud LEB %d replied: dirty %d, free %d", 803 lnum, b->dirty, b->free); 804 805 out: 806 ubifs_scan_destroy(sleb); 807 return err; 808 809 out_dump: 810 ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); 811 ubifs_dump_node(c, snod->node); 812 ubifs_scan_destroy(sleb); 813 return -EINVAL; 814 } 815 816 /** 817 * replay_buds - replay all buds. 818 * @c: UBIFS file-system description object 819 * 820 * This function returns zero in case of success and a negative error code in 821 * case of failure. 822 */ 823 static int replay_buds(struct ubifs_info *c) 824 { 825 struct bud_entry *b; 826 int err; 827 unsigned long long prev_sqnum = 0; 828 829 list_for_each_entry(b, &c->replay_buds, list) { 830 err = replay_bud(c, b); 831 if (err) 832 return err; 833 834 ubifs_assert(c, b->sqnum > prev_sqnum); 835 prev_sqnum = b->sqnum; 836 } 837 838 return 0; 839 } 840 841 /** 842 * destroy_bud_list - destroy the list of buds to replay. 843 * @c: UBIFS file-system description object 844 */ 845 static void destroy_bud_list(struct ubifs_info *c) 846 { 847 struct bud_entry *b; 848 849 while (!list_empty(&c->replay_buds)) { 850 b = list_entry(c->replay_buds.next, struct bud_entry, list); 851 list_del(&b->list); 852 kfree(b); 853 } 854 } 855 856 /** 857 * add_replay_bud - add a bud to the list of buds to replay. 858 * @c: UBIFS file-system description object 859 * @lnum: bud logical eraseblock number to replay 860 * @offs: bud start offset 861 * @jhead: journal head to which this bud belongs 862 * @sqnum: reference node sequence number 863 * 864 * This function returns zero in case of success and a negative error code in 865 * case of failure. 866 */ 867 static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 868 unsigned long long sqnum) 869 { 870 struct ubifs_bud *bud; 871 struct bud_entry *b; 872 int err; 873 874 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 875 876 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 877 if (!bud) 878 return -ENOMEM; 879 880 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 881 if (!b) { 882 err = -ENOMEM; 883 goto out; 884 } 885 886 bud->lnum = lnum; 887 bud->start = offs; 888 bud->jhead = jhead; 889 bud->log_hash = ubifs_hash_get_desc(c); 890 if (IS_ERR(bud->log_hash)) { 891 err = PTR_ERR(bud->log_hash); 892 goto out; 893 } 894 895 ubifs_shash_copy_state(c, c->log_hash, bud->log_hash); 896 897 ubifs_add_bud(c, bud); 898 899 b->bud = bud; 900 b->sqnum = sqnum; 901 list_add_tail(&b->list, &c->replay_buds); 902 903 return 0; 904 out: 905 kfree(bud); 906 kfree(b); 907 908 return err; 909 } 910 911 /** 912 * validate_ref - validate a reference node. 913 * @c: UBIFS file-system description object 914 * @ref: the reference node to validate 915 * @ref_lnum: LEB number of the reference node 916 * @ref_offs: reference node offset 917 * 918 * This function returns %1 if a bud reference already exists for the LEB. %0 is 919 * returned if the reference node is new, otherwise %-EINVAL is returned if 920 * validation failed. 921 */ 922 static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 923 { 924 struct ubifs_bud *bud; 925 int lnum = le32_to_cpu(ref->lnum); 926 unsigned int offs = le32_to_cpu(ref->offs); 927 unsigned int jhead = le32_to_cpu(ref->jhead); 928 929 /* 930 * ref->offs may point to the end of LEB when the journal head points 931 * to the end of LEB and we write reference node for it during commit. 932 * So this is why we require 'offs > c->leb_size'. 933 */ 934 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 935 lnum < c->main_first || offs > c->leb_size || 936 offs & (c->min_io_size - 1)) 937 return -EINVAL; 938 939 /* Make sure we have not already looked at this bud */ 940 bud = ubifs_search_bud(c, lnum); 941 if (bud) { 942 if (bud->jhead == jhead && bud->start <= offs) 943 return 1; 944 ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs); 945 return -EINVAL; 946 } 947 948 return 0; 949 } 950 951 /** 952 * replay_log_leb - replay a log logical eraseblock. 953 * @c: UBIFS file-system description object 954 * @lnum: log logical eraseblock to replay 955 * @offs: offset to start replaying from 956 * @sbuf: scan buffer 957 * 958 * This function replays a log LEB and returns zero in case of success, %1 if 959 * this is the last LEB in the log, and a negative error code in case of 960 * failure. 961 */ 962 static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 963 { 964 int err; 965 struct ubifs_scan_leb *sleb; 966 struct ubifs_scan_node *snod; 967 const struct ubifs_cs_node *node; 968 969 dbg_mnt("replay log LEB %d:%d", lnum, offs); 970 sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); 971 if (IS_ERR(sleb)) { 972 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 973 return PTR_ERR(sleb); 974 /* 975 * Note, the below function will recover this log LEB only if 976 * it is the last, because unclean reboots can possibly corrupt 977 * only the tail of the log. 978 */ 979 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 980 if (IS_ERR(sleb)) 981 return PTR_ERR(sleb); 982 } 983 984 if (sleb->nodes_cnt == 0) { 985 err = 1; 986 goto out; 987 } 988 989 node = sleb->buf; 990 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 991 if (c->cs_sqnum == 0) { 992 /* 993 * This is the first log LEB we are looking at, make sure that 994 * the first node is a commit start node. Also record its 995 * sequence number so that UBIFS can determine where the log 996 * ends, because all nodes which were have higher sequence 997 * numbers. 998 */ 999 if (snod->type != UBIFS_CS_NODE) { 1000 ubifs_err(c, "first log node at LEB %d:%d is not CS node", 1001 lnum, offs); 1002 goto out_dump; 1003 } 1004 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 1005 ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu", 1006 lnum, offs, 1007 (unsigned long long)le64_to_cpu(node->cmt_no), 1008 c->cmt_no); 1009 goto out_dump; 1010 } 1011 1012 c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 1013 dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 1014 1015 err = ubifs_shash_init(c, c->log_hash); 1016 if (err) 1017 goto out; 1018 1019 err = ubifs_shash_update(c, c->log_hash, node, UBIFS_CS_NODE_SZ); 1020 if (err < 0) 1021 goto out; 1022 } 1023 1024 if (snod->sqnum < c->cs_sqnum) { 1025 /* 1026 * This means that we reached end of log and now 1027 * look to the older log data, which was already 1028 * committed but the eraseblock was not erased (UBIFS 1029 * only un-maps it). So this basically means we have to 1030 * exit with "end of log" code. 1031 */ 1032 err = 1; 1033 goto out; 1034 } 1035 1036 /* Make sure the first node sits at offset zero of the LEB */ 1037 if (snod->offs != 0) { 1038 ubifs_err(c, "first node is not at zero offset"); 1039 goto out_dump; 1040 } 1041 1042 list_for_each_entry(snod, &sleb->nodes, list) { 1043 cond_resched(); 1044 1045 if (snod->sqnum >= SQNUM_WATERMARK) { 1046 ubifs_err(c, "file system's life ended"); 1047 goto out_dump; 1048 } 1049 1050 if (snod->sqnum < c->cs_sqnum) { 1051 ubifs_err(c, "bad sqnum %llu, commit sqnum %llu", 1052 snod->sqnum, c->cs_sqnum); 1053 goto out_dump; 1054 } 1055 1056 if (snod->sqnum > c->max_sqnum) 1057 c->max_sqnum = snod->sqnum; 1058 1059 switch (snod->type) { 1060 case UBIFS_REF_NODE: { 1061 const struct ubifs_ref_node *ref = snod->node; 1062 1063 err = validate_ref(c, ref); 1064 if (err == 1) 1065 break; /* Already have this bud */ 1066 if (err) 1067 goto out_dump; 1068 1069 err = ubifs_shash_update(c, c->log_hash, ref, 1070 UBIFS_REF_NODE_SZ); 1071 if (err) 1072 goto out; 1073 1074 err = add_replay_bud(c, le32_to_cpu(ref->lnum), 1075 le32_to_cpu(ref->offs), 1076 le32_to_cpu(ref->jhead), 1077 snod->sqnum); 1078 if (err) 1079 goto out; 1080 1081 break; 1082 } 1083 case UBIFS_CS_NODE: 1084 /* Make sure it sits at the beginning of LEB */ 1085 if (snod->offs != 0) { 1086 ubifs_err(c, "unexpected node in log"); 1087 goto out_dump; 1088 } 1089 break; 1090 default: 1091 ubifs_err(c, "unexpected node in log"); 1092 goto out_dump; 1093 } 1094 } 1095 1096 if (sleb->endpt || c->lhead_offs >= c->leb_size) { 1097 c->lhead_lnum = lnum; 1098 c->lhead_offs = sleb->endpt; 1099 } 1100 1101 err = !sleb->endpt; 1102 out: 1103 ubifs_scan_destroy(sleb); 1104 return err; 1105 1106 out_dump: 1107 ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", 1108 lnum, offs + snod->offs); 1109 ubifs_dump_node(c, snod->node); 1110 ubifs_scan_destroy(sleb); 1111 return -EINVAL; 1112 } 1113 1114 /** 1115 * take_ihead - update the status of the index head in lprops to 'taken'. 1116 * @c: UBIFS file-system description object 1117 * 1118 * This function returns the amount of free space in the index head LEB or a 1119 * negative error code. 1120 */ 1121 static int take_ihead(struct ubifs_info *c) 1122 { 1123 const struct ubifs_lprops *lp; 1124 int err, free; 1125 1126 ubifs_get_lprops(c); 1127 1128 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 1129 if (IS_ERR(lp)) { 1130 err = PTR_ERR(lp); 1131 goto out; 1132 } 1133 1134 free = lp->free; 1135 1136 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 1137 lp->flags | LPROPS_TAKEN, 0); 1138 if (IS_ERR(lp)) { 1139 err = PTR_ERR(lp); 1140 goto out; 1141 } 1142 1143 err = free; 1144 out: 1145 ubifs_release_lprops(c); 1146 return err; 1147 } 1148 1149 /** 1150 * ubifs_replay_journal - replay journal. 1151 * @c: UBIFS file-system description object 1152 * 1153 * This function scans the journal, replays and cleans it up. It makes sure all 1154 * memory data structures related to uncommitted journal are built (dirty TNC 1155 * tree, tree of buds, modified lprops, etc). 1156 */ 1157 int ubifs_replay_journal(struct ubifs_info *c) 1158 { 1159 int err, lnum, free; 1160 1161 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1162 1163 /* Update the status of the index head in lprops to 'taken' */ 1164 free = take_ihead(c); 1165 if (free < 0) 1166 return free; /* Error code */ 1167 1168 if (c->ihead_offs != c->leb_size - free) { 1169 ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum, 1170 c->ihead_offs); 1171 return -EINVAL; 1172 } 1173 1174 dbg_mnt("start replaying the journal"); 1175 c->replaying = 1; 1176 lnum = c->ltail_lnum = c->lhead_lnum; 1177 1178 do { 1179 err = replay_log_leb(c, lnum, 0, c->sbuf); 1180 if (err == 1) { 1181 if (lnum != c->lhead_lnum) 1182 /* We hit the end of the log */ 1183 break; 1184 1185 /* 1186 * The head of the log must always start with the 1187 * "commit start" node on a properly formatted UBIFS. 1188 * But we found no nodes at all, which means that 1189 * something went wrong and we cannot proceed mounting 1190 * the file-system. 1191 */ 1192 ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", 1193 lnum, 0); 1194 err = -EINVAL; 1195 } 1196 if (err) 1197 goto out; 1198 lnum = ubifs_next_log_lnum(c, lnum); 1199 } while (lnum != c->ltail_lnum); 1200 1201 err = replay_buds(c); 1202 if (err) 1203 goto out; 1204 1205 err = apply_replay_list(c); 1206 if (err) 1207 goto out; 1208 1209 err = set_buds_lprops(c); 1210 if (err) 1211 goto out; 1212 1213 /* 1214 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable 1215 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs 1216 * depend on it. This means we have to initialize it to make sure 1217 * budgeting works properly. 1218 */ 1219 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1220 c->bi.uncommitted_idx *= c->max_idx_node_sz; 1221 1222 ubifs_assert(c, c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1223 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", 1224 c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1225 (unsigned long)c->highest_inum); 1226 out: 1227 destroy_replay_list(c); 1228 destroy_bud_list(c); 1229 c->replaying = 0; 1230 return err; 1231 } 1232