1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * This file is part of UBIFS. 4 * 5 * Copyright (C) 2006-2008 Nokia Corporation 6 * 7 * Authors: Adrian Hunter 8 * Artem Bityutskiy (Битюцкий Артём) 9 */ 10 11 /* 12 * This file implements functions needed to recover from unclean un-mounts. 13 * When UBIFS is mounted, it checks a flag on the master node to determine if 14 * an un-mount was completed successfully. If not, the process of mounting 15 * incorporates additional checking and fixing of on-flash data structures. 16 * UBIFS always cleans away all remnants of an unclean un-mount, so that 17 * errors do not accumulate. However UBIFS defers recovery if it is mounted 18 * read-only, and the flash is not modified in that case. 19 * 20 * The general UBIFS approach to the recovery is that it recovers from 21 * corruptions which could be caused by power cuts, but it refuses to recover 22 * from corruption caused by other reasons. And UBIFS tries to distinguish 23 * between these 2 reasons of corruptions and silently recover in the former 24 * case and loudly complain in the latter case. 25 * 26 * UBIFS writes only to erased LEBs, so it writes only to the flash space 27 * containing only 0xFFs. UBIFS also always writes strictly from the beginning 28 * of the LEB to the end. And UBIFS assumes that the underlying flash media 29 * writes in @c->max_write_size bytes at a time. 30 * 31 * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min. 32 * I/O unit corresponding to offset X to contain corrupted data, all the 33 * following min. I/O units have to contain empty space (all 0xFFs). If this is 34 * not true, the corruption cannot be the result of a power cut, and UBIFS 35 * refuses to mount. 36 */ 37 38 #include <linux/crc32.h> 39 #include <linux/slab.h> 40 #include "ubifs.h" 41 42 /** 43 * is_empty - determine whether a buffer is empty (contains all 0xff). 44 * @buf: buffer to clean 45 * @len: length of buffer 46 * 47 * This function returns %1 if the buffer is empty (contains all 0xff) otherwise 48 * %0 is returned. 49 */ 50 static int is_empty(void *buf, int len) 51 { 52 uint8_t *p = buf; 53 int i; 54 55 for (i = 0; i < len; i++) 56 if (*p++ != 0xff) 57 return 0; 58 return 1; 59 } 60 61 /** 62 * first_non_ff - find offset of the first non-0xff byte. 63 * @buf: buffer to search in 64 * @len: length of buffer 65 * 66 * This function returns offset of the first non-0xff byte in @buf or %-1 if 67 * the buffer contains only 0xff bytes. 68 */ 69 static int first_non_ff(void *buf, int len) 70 { 71 uint8_t *p = buf; 72 int i; 73 74 for (i = 0; i < len; i++) 75 if (*p++ != 0xff) 76 return i; 77 return -1; 78 } 79 80 /** 81 * get_master_node - get the last valid master node allowing for corruption. 82 * @c: UBIFS file-system description object 83 * @lnum: LEB number 84 * @pbuf: buffer containing the LEB read, is returned here 85 * @mst: master node, if found, is returned here 86 * @cor: corruption, if found, is returned here 87 * 88 * This function allocates a buffer, reads the LEB into it, and finds and 89 * returns the last valid master node allowing for one area of corruption. 90 * The corrupt area, if there is one, must be consistent with the assumption 91 * that it is the result of an unclean unmount while the master node was being 92 * written. Under those circumstances, it is valid to use the previously written 93 * master node. 94 * 95 * This function returns %0 on success and a negative error code on failure. 96 */ 97 static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, 98 struct ubifs_mst_node **mst, void **cor) 99 { 100 const int sz = c->mst_node_alsz; 101 int err, offs, len; 102 void *sbuf, *buf; 103 104 sbuf = vmalloc(c->leb_size); 105 if (!sbuf) 106 return -ENOMEM; 107 108 err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0); 109 if (err && err != -EBADMSG) 110 goto out_free; 111 112 /* Find the first position that is definitely not a node */ 113 offs = 0; 114 buf = sbuf; 115 len = c->leb_size; 116 while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) { 117 struct ubifs_ch *ch = buf; 118 119 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) 120 break; 121 offs += sz; 122 buf += sz; 123 len -= sz; 124 } 125 /* See if there was a valid master node before that */ 126 if (offs) { 127 int ret; 128 129 offs -= sz; 130 buf -= sz; 131 len += sz; 132 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); 133 if (ret != SCANNED_A_NODE && offs) { 134 /* Could have been corruption so check one place back */ 135 offs -= sz; 136 buf -= sz; 137 len += sz; 138 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); 139 if (ret != SCANNED_A_NODE) 140 /* 141 * We accept only one area of corruption because 142 * we are assuming that it was caused while 143 * trying to write a master node. 144 */ 145 goto out_err; 146 } 147 if (ret == SCANNED_A_NODE) { 148 struct ubifs_ch *ch = buf; 149 150 if (ch->node_type != UBIFS_MST_NODE) 151 goto out_err; 152 dbg_rcvry("found a master node at %d:%d", lnum, offs); 153 *mst = buf; 154 offs += sz; 155 buf += sz; 156 len -= sz; 157 } 158 } 159 /* Check for corruption */ 160 if (offs < c->leb_size) { 161 if (!is_empty(buf, min_t(int, len, sz))) { 162 *cor = buf; 163 dbg_rcvry("found corruption at %d:%d", lnum, offs); 164 } 165 offs += sz; 166 buf += sz; 167 len -= sz; 168 } 169 /* Check remaining empty space */ 170 if (offs < c->leb_size) 171 if (!is_empty(buf, len)) 172 goto out_err; 173 *pbuf = sbuf; 174 return 0; 175 176 out_err: 177 err = -EINVAL; 178 out_free: 179 vfree(sbuf); 180 *mst = NULL; 181 *cor = NULL; 182 return err; 183 } 184 185 /** 186 * write_rcvrd_mst_node - write recovered master node. 187 * @c: UBIFS file-system description object 188 * @mst: master node 189 * 190 * This function returns %0 on success and a negative error code on failure. 191 */ 192 static int write_rcvrd_mst_node(struct ubifs_info *c, 193 struct ubifs_mst_node *mst) 194 { 195 int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz; 196 __le32 save_flags; 197 198 dbg_rcvry("recovery"); 199 200 save_flags = mst->flags; 201 mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); 202 203 err = ubifs_prepare_node_hmac(c, mst, UBIFS_MST_NODE_SZ, 204 offsetof(struct ubifs_mst_node, hmac), 1); 205 if (err) 206 goto out; 207 err = ubifs_leb_change(c, lnum, mst, sz); 208 if (err) 209 goto out; 210 err = ubifs_leb_change(c, lnum + 1, mst, sz); 211 if (err) 212 goto out; 213 out: 214 mst->flags = save_flags; 215 return err; 216 } 217 218 /** 219 * ubifs_recover_master_node - recover the master node. 220 * @c: UBIFS file-system description object 221 * 222 * This function recovers the master node from corruption that may occur due to 223 * an unclean unmount. 224 * 225 * This function returns %0 on success and a negative error code on failure. 226 */ 227 int ubifs_recover_master_node(struct ubifs_info *c) 228 { 229 void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL; 230 struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst; 231 const int sz = c->mst_node_alsz; 232 int err, offs1, offs2; 233 234 dbg_rcvry("recovery"); 235 236 err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1); 237 if (err) 238 goto out_free; 239 240 err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2); 241 if (err) 242 goto out_free; 243 244 if (mst1) { 245 offs1 = (void *)mst1 - buf1; 246 if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) && 247 (offs1 == 0 && !cor1)) { 248 /* 249 * mst1 was written by recovery at offset 0 with no 250 * corruption. 251 */ 252 dbg_rcvry("recovery recovery"); 253 mst = mst1; 254 } else if (mst2) { 255 offs2 = (void *)mst2 - buf2; 256 if (offs1 == offs2) { 257 /* Same offset, so must be the same */ 258 if (ubifs_compare_master_node(c, mst1, mst2)) 259 goto out_err; 260 mst = mst1; 261 } else if (offs2 + sz == offs1) { 262 /* 1st LEB was written, 2nd was not */ 263 if (cor1) 264 goto out_err; 265 mst = mst1; 266 } else if (offs1 == 0 && 267 c->leb_size - offs2 - sz < sz) { 268 /* 1st LEB was unmapped and written, 2nd not */ 269 if (cor1) 270 goto out_err; 271 mst = mst1; 272 } else 273 goto out_err; 274 } else { 275 /* 276 * 2nd LEB was unmapped and about to be written, so 277 * there must be only one master node in the first LEB 278 * and no corruption. 279 */ 280 if (offs1 != 0 || cor1) 281 goto out_err; 282 mst = mst1; 283 } 284 } else { 285 if (!mst2) 286 goto out_err; 287 /* 288 * 1st LEB was unmapped and about to be written, so there must 289 * be no room left in 2nd LEB. 290 */ 291 offs2 = (void *)mst2 - buf2; 292 if (offs2 + sz + sz <= c->leb_size) 293 goto out_err; 294 mst = mst2; 295 } 296 297 ubifs_msg(c, "recovered master node from LEB %d", 298 (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); 299 300 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); 301 302 if (c->ro_mount) { 303 /* Read-only mode. Keep a copy for switching to rw mode */ 304 c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); 305 if (!c->rcvrd_mst_node) { 306 err = -ENOMEM; 307 goto out_free; 308 } 309 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); 310 311 /* 312 * We had to recover the master node, which means there was an 313 * unclean reboot. However, it is possible that the master node 314 * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set. 315 * E.g., consider the following chain of events: 316 * 317 * 1. UBIFS was cleanly unmounted, so the master node is clean 318 * 2. UBIFS is being mounted R/W and starts changing the master 319 * node in the first (%UBIFS_MST_LNUM). A power cut happens, 320 * so this LEB ends up with some amount of garbage at the 321 * end. 322 * 3. UBIFS is being mounted R/O. We reach this place and 323 * recover the master node from the second LEB 324 * (%UBIFS_MST_LNUM + 1). But we cannot update the media 325 * because we are being mounted R/O. We have to defer the 326 * operation. 327 * 4. However, this master node (@c->mst_node) is marked as 328 * clean (since the step 1). And if we just return, the 329 * mount code will be confused and won't recover the master 330 * node when it is re-mounter R/W later. 331 * 332 * Thus, to force the recovery by marking the master node as 333 * dirty. 334 */ 335 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); 336 } else { 337 /* Write the recovered master node */ 338 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; 339 err = write_rcvrd_mst_node(c, c->mst_node); 340 if (err) 341 goto out_free; 342 } 343 344 vfree(buf2); 345 vfree(buf1); 346 347 return 0; 348 349 out_err: 350 err = -EINVAL; 351 out_free: 352 ubifs_err(c, "failed to recover master node"); 353 if (mst1) { 354 ubifs_err(c, "dumping first master node"); 355 ubifs_dump_node(c, mst1); 356 } 357 if (mst2) { 358 ubifs_err(c, "dumping second master node"); 359 ubifs_dump_node(c, mst2); 360 } 361 vfree(buf2); 362 vfree(buf1); 363 return err; 364 } 365 366 /** 367 * ubifs_write_rcvrd_mst_node - write the recovered master node. 368 * @c: UBIFS file-system description object 369 * 370 * This function writes the master node that was recovered during mounting in 371 * read-only mode and must now be written because we are remounting rw. 372 * 373 * This function returns %0 on success and a negative error code on failure. 374 */ 375 int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) 376 { 377 int err; 378 379 if (!c->rcvrd_mst_node) 380 return 0; 381 c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); 382 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); 383 err = write_rcvrd_mst_node(c, c->rcvrd_mst_node); 384 if (err) 385 return err; 386 kfree(c->rcvrd_mst_node); 387 c->rcvrd_mst_node = NULL; 388 return 0; 389 } 390 391 /** 392 * is_last_write - determine if an offset was in the last write to a LEB. 393 * @c: UBIFS file-system description object 394 * @buf: buffer to check 395 * @offs: offset to check 396 * 397 * This function returns %1 if @offs was in the last write to the LEB whose data 398 * is in @buf, otherwise %0 is returned. The determination is made by checking 399 * for subsequent empty space starting from the next @c->max_write_size 400 * boundary. 401 */ 402 static int is_last_write(const struct ubifs_info *c, void *buf, int offs) 403 { 404 int empty_offs, check_len; 405 uint8_t *p; 406 407 /* 408 * Round up to the next @c->max_write_size boundary i.e. @offs is in 409 * the last wbuf written. After that should be empty space. 410 */ 411 empty_offs = ALIGN(offs + 1, c->max_write_size); 412 check_len = c->leb_size - empty_offs; 413 p = buf + empty_offs - offs; 414 return is_empty(p, check_len); 415 } 416 417 /** 418 * clean_buf - clean the data from an LEB sitting in a buffer. 419 * @c: UBIFS file-system description object 420 * @buf: buffer to clean 421 * @lnum: LEB number to clean 422 * @offs: offset from which to clean 423 * @len: length of buffer 424 * 425 * This function pads up to the next min_io_size boundary (if there is one) and 426 * sets empty space to all 0xff. @buf, @offs and @len are updated to the next 427 * @c->min_io_size boundary. 428 */ 429 static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, 430 int *offs, int *len) 431 { 432 int empty_offs, pad_len; 433 434 dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); 435 436 ubifs_assert(c, !(*offs & 7)); 437 empty_offs = ALIGN(*offs, c->min_io_size); 438 pad_len = empty_offs - *offs; 439 ubifs_pad(c, *buf, pad_len); 440 *offs += pad_len; 441 *buf += pad_len; 442 *len -= pad_len; 443 memset(*buf, 0xff, c->leb_size - empty_offs); 444 } 445 446 /** 447 * no_more_nodes - determine if there are no more nodes in a buffer. 448 * @c: UBIFS file-system description object 449 * @buf: buffer to check 450 * @len: length of buffer 451 * @lnum: LEB number of the LEB from which @buf was read 452 * @offs: offset from which @buf was read 453 * 454 * This function ensures that the corrupted node at @offs is the last thing 455 * written to a LEB. This function returns %1 if more data is not found and 456 * %0 if more data is found. 457 */ 458 static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, 459 int lnum, int offs) 460 { 461 struct ubifs_ch *ch = buf; 462 int skip, dlen = le32_to_cpu(ch->len); 463 464 /* Check for empty space after the corrupt node's common header */ 465 skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs; 466 if (is_empty(buf + skip, len - skip)) 467 return 1; 468 /* 469 * The area after the common header size is not empty, so the common 470 * header must be intact. Check it. 471 */ 472 if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) { 473 dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs); 474 return 0; 475 } 476 /* Now we know the corrupt node's length we can skip over it */ 477 skip = ALIGN(offs + dlen, c->max_write_size) - offs; 478 /* After which there should be empty space */ 479 if (is_empty(buf + skip, len - skip)) 480 return 1; 481 dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip); 482 return 0; 483 } 484 485 /** 486 * fix_unclean_leb - fix an unclean LEB. 487 * @c: UBIFS file-system description object 488 * @sleb: scanned LEB information 489 * @start: offset where scan started 490 */ 491 static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, 492 int start) 493 { 494 int lnum = sleb->lnum, endpt = start; 495 496 /* Get the end offset of the last node we are keeping */ 497 if (!list_empty(&sleb->nodes)) { 498 struct ubifs_scan_node *snod; 499 500 snod = list_entry(sleb->nodes.prev, 501 struct ubifs_scan_node, list); 502 endpt = snod->offs + snod->len; 503 } 504 505 if (c->ro_mount && !c->remounting_rw) { 506 /* Add to recovery list */ 507 struct ubifs_unclean_leb *ucleb; 508 509 dbg_rcvry("need to fix LEB %d start %d endpt %d", 510 lnum, start, sleb->endpt); 511 ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS); 512 if (!ucleb) 513 return -ENOMEM; 514 ucleb->lnum = lnum; 515 ucleb->endpt = endpt; 516 list_add_tail(&ucleb->list, &c->unclean_leb_list); 517 } else { 518 /* Write the fixed LEB back to flash */ 519 int err; 520 521 dbg_rcvry("fixing LEB %d start %d endpt %d", 522 lnum, start, sleb->endpt); 523 if (endpt == 0) { 524 err = ubifs_leb_unmap(c, lnum); 525 if (err) 526 return err; 527 } else { 528 int len = ALIGN(endpt, c->min_io_size); 529 530 if (start) { 531 err = ubifs_leb_read(c, lnum, sleb->buf, 0, 532 start, 1); 533 if (err) 534 return err; 535 } 536 /* Pad to min_io_size */ 537 if (len > endpt) { 538 int pad_len = len - ALIGN(endpt, 8); 539 540 if (pad_len > 0) { 541 void *buf = sleb->buf + len - pad_len; 542 543 ubifs_pad(c, buf, pad_len); 544 } 545 } 546 err = ubifs_leb_change(c, lnum, sleb->buf, len); 547 if (err) 548 return err; 549 } 550 } 551 return 0; 552 } 553 554 /** 555 * drop_last_group - drop the last group of nodes. 556 * @sleb: scanned LEB information 557 * @offs: offset of dropped nodes is returned here 558 * 559 * This is a helper function for 'ubifs_recover_leb()' which drops the last 560 * group of nodes of the scanned LEB. 561 */ 562 static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) 563 { 564 while (!list_empty(&sleb->nodes)) { 565 struct ubifs_scan_node *snod; 566 struct ubifs_ch *ch; 567 568 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, 569 list); 570 ch = snod->node; 571 if (ch->group_type != UBIFS_IN_NODE_GROUP) 572 break; 573 574 dbg_rcvry("dropping grouped node at %d:%d", 575 sleb->lnum, snod->offs); 576 *offs = snod->offs; 577 list_del(&snod->list); 578 kfree(snod); 579 sleb->nodes_cnt -= 1; 580 } 581 } 582 583 /** 584 * drop_last_node - drop the last node. 585 * @sleb: scanned LEB information 586 * @offs: offset of dropped nodes is returned here 587 * 588 * This is a helper function for 'ubifs_recover_leb()' which drops the last 589 * node of the scanned LEB. 590 */ 591 static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) 592 { 593 struct ubifs_scan_node *snod; 594 595 if (!list_empty(&sleb->nodes)) { 596 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, 597 list); 598 599 dbg_rcvry("dropping last node at %d:%d", 600 sleb->lnum, snod->offs); 601 *offs = snod->offs; 602 list_del(&snod->list); 603 kfree(snod); 604 sleb->nodes_cnt -= 1; 605 } 606 } 607 608 /** 609 * ubifs_recover_leb - scan and recover a LEB. 610 * @c: UBIFS file-system description object 611 * @lnum: LEB number 612 * @offs: offset 613 * @sbuf: LEB-sized buffer to use 614 * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not 615 * belong to any journal head) 616 * 617 * This function does a scan of a LEB, but caters for errors that might have 618 * been caused by the unclean unmount from which we are attempting to recover. 619 * Returns the scanned information on success and a negative error code on 620 * failure. 621 */ 622 struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 623 int offs, void *sbuf, int jhead) 624 { 625 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; 626 int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; 627 struct ubifs_scan_leb *sleb; 628 void *buf = sbuf + offs; 629 630 dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); 631 632 sleb = ubifs_start_scan(c, lnum, offs, sbuf); 633 if (IS_ERR(sleb)) 634 return sleb; 635 636 ubifs_assert(c, len >= 8); 637 while (len >= 8) { 638 dbg_scan("look at LEB %d:%d (%d bytes left)", 639 lnum, offs, len); 640 641 cond_resched(); 642 643 /* 644 * Scan quietly until there is an error from which we cannot 645 * recover 646 */ 647 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); 648 if (ret == SCANNED_A_NODE) { 649 /* A valid node, and not a padding node */ 650 struct ubifs_ch *ch = buf; 651 int node_len; 652 653 err = ubifs_add_snod(c, sleb, buf, offs); 654 if (err) 655 goto error; 656 node_len = ALIGN(le32_to_cpu(ch->len), 8); 657 offs += node_len; 658 buf += node_len; 659 len -= node_len; 660 } else if (ret > 0) { 661 /* Padding bytes or a valid padding node */ 662 offs += ret; 663 buf += ret; 664 len -= ret; 665 } else if (ret == SCANNED_EMPTY_SPACE || 666 ret == SCANNED_GARBAGE || 667 ret == SCANNED_A_BAD_PAD_NODE || 668 ret == SCANNED_A_CORRUPT_NODE) { 669 dbg_rcvry("found corruption (%d) at %d:%d", 670 ret, lnum, offs); 671 break; 672 } else { 673 ubifs_err(c, "unexpected return value %d", ret); 674 err = -EINVAL; 675 goto error; 676 } 677 } 678 679 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { 680 if (!is_last_write(c, buf, offs)) 681 goto corrupted_rescan; 682 } else if (ret == SCANNED_A_CORRUPT_NODE) { 683 if (!no_more_nodes(c, buf, len, lnum, offs)) 684 goto corrupted_rescan; 685 } else if (!is_empty(buf, len)) { 686 if (!is_last_write(c, buf, offs)) { 687 int corruption = first_non_ff(buf, len); 688 689 /* 690 * See header comment for this file for more 691 * explanations about the reasons we have this check. 692 */ 693 ubifs_err(c, "corrupt empty space LEB %d:%d, corruption starts at %d", 694 lnum, offs, corruption); 695 /* Make sure we dump interesting non-0xFF data */ 696 offs += corruption; 697 buf += corruption; 698 goto corrupted; 699 } 700 } 701 702 min_io_unit = round_down(offs, c->min_io_size); 703 if (grouped) 704 /* 705 * If nodes are grouped, always drop the incomplete group at 706 * the end. 707 */ 708 drop_last_group(sleb, &offs); 709 710 if (jhead == GCHD) { 711 /* 712 * If this LEB belongs to the GC head then while we are in the 713 * middle of the same min. I/O unit keep dropping nodes. So 714 * basically, what we want is to make sure that the last min. 715 * I/O unit where we saw the corruption is dropped completely 716 * with all the uncorrupted nodes which may possibly sit there. 717 * 718 * In other words, let's name the min. I/O unit where the 719 * corruption starts B, and the previous min. I/O unit A. The 720 * below code tries to deal with a situation when half of B 721 * contains valid nodes or the end of a valid node, and the 722 * second half of B contains corrupted data or garbage. This 723 * means that UBIFS had been writing to B just before the power 724 * cut happened. I do not know how realistic is this scenario 725 * that half of the min. I/O unit had been written successfully 726 * and the other half not, but this is possible in our 'failure 727 * mode emulation' infrastructure at least. 728 * 729 * So what is the problem, why we need to drop those nodes? Why 730 * can't we just clean-up the second half of B by putting a 731 * padding node there? We can, and this works fine with one 732 * exception which was reproduced with power cut emulation 733 * testing and happens extremely rarely. 734 * 735 * Imagine the file-system is full, we run GC which starts 736 * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is 737 * the current GC head LEB). The @c->gc_lnum is -1, which means 738 * that GC will retain LEB X and will try to continue. Imagine 739 * that LEB X is currently the dirtiest LEB, and the amount of 740 * used space in LEB Y is exactly the same as amount of free 741 * space in LEB X. 742 * 743 * And a power cut happens when nodes are moved from LEB X to 744 * LEB Y. We are here trying to recover LEB Y which is the GC 745 * head LEB. We find the min. I/O unit B as described above. 746 * Then we clean-up LEB Y by padding min. I/O unit. And later 747 * 'ubifs_rcvry_gc_commit()' function fails, because it cannot 748 * find a dirty LEB which could be GC'd into LEB Y! Even LEB X 749 * does not match because the amount of valid nodes there does 750 * not fit the free space in LEB Y any more! And this is 751 * because of the padding node which we added to LEB Y. The 752 * user-visible effect of this which I once observed and 753 * analysed is that we cannot mount the file-system with 754 * -ENOSPC error. 755 * 756 * So obviously, to make sure that situation does not happen we 757 * should free min. I/O unit B in LEB Y completely and the last 758 * used min. I/O unit in LEB Y should be A. This is basically 759 * what the below code tries to do. 760 */ 761 while (offs > min_io_unit) 762 drop_last_node(sleb, &offs); 763 } 764 765 buf = sbuf + offs; 766 len = c->leb_size - offs; 767 768 clean_buf(c, &buf, lnum, &offs, &len); 769 ubifs_end_scan(c, sleb, lnum, offs); 770 771 err = fix_unclean_leb(c, sleb, start); 772 if (err) 773 goto error; 774 775 return sleb; 776 777 corrupted_rescan: 778 /* Re-scan the corrupted data with verbose messages */ 779 ubifs_err(c, "corruption %d", ret); 780 ubifs_scan_a_node(c, buf, len, lnum, offs, 0); 781 corrupted: 782 ubifs_scanned_corruption(c, lnum, offs, buf); 783 err = -EUCLEAN; 784 error: 785 ubifs_err(c, "LEB %d scanning failed", lnum); 786 ubifs_scan_destroy(sleb); 787 return ERR_PTR(err); 788 } 789 790 /** 791 * get_cs_sqnum - get commit start sequence number. 792 * @c: UBIFS file-system description object 793 * @lnum: LEB number of commit start node 794 * @offs: offset of commit start node 795 * @cs_sqnum: commit start sequence number is returned here 796 * 797 * This function returns %0 on success and a negative error code on failure. 798 */ 799 static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, 800 unsigned long long *cs_sqnum) 801 { 802 struct ubifs_cs_node *cs_node = NULL; 803 int err, ret; 804 805 dbg_rcvry("at %d:%d", lnum, offs); 806 cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL); 807 if (!cs_node) 808 return -ENOMEM; 809 if (c->leb_size - offs < UBIFS_CS_NODE_SZ) 810 goto out_err; 811 err = ubifs_leb_read(c, lnum, (void *)cs_node, offs, 812 UBIFS_CS_NODE_SZ, 0); 813 if (err && err != -EBADMSG) 814 goto out_free; 815 ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); 816 if (ret != SCANNED_A_NODE) { 817 ubifs_err(c, "Not a valid node"); 818 goto out_err; 819 } 820 if (cs_node->ch.node_type != UBIFS_CS_NODE) { 821 ubifs_err(c, "Not a CS node, type is %d", cs_node->ch.node_type); 822 goto out_err; 823 } 824 if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { 825 ubifs_err(c, "CS node cmt_no %llu != current cmt_no %llu", 826 (unsigned long long)le64_to_cpu(cs_node->cmt_no), 827 c->cmt_no); 828 goto out_err; 829 } 830 *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum); 831 dbg_rcvry("commit start sqnum %llu", *cs_sqnum); 832 kfree(cs_node); 833 return 0; 834 835 out_err: 836 err = -EINVAL; 837 out_free: 838 ubifs_err(c, "failed to get CS sqnum"); 839 kfree(cs_node); 840 return err; 841 } 842 843 /** 844 * ubifs_recover_log_leb - scan and recover a log LEB. 845 * @c: UBIFS file-system description object 846 * @lnum: LEB number 847 * @offs: offset 848 * @sbuf: LEB-sized buffer to use 849 * 850 * This function does a scan of a LEB, but caters for errors that might have 851 * been caused by unclean reboots from which we are attempting to recover 852 * (assume that only the last log LEB can be corrupted by an unclean reboot). 853 * 854 * This function returns %0 on success and a negative error code on failure. 855 */ 856 struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, 857 int offs, void *sbuf) 858 { 859 struct ubifs_scan_leb *sleb; 860 int next_lnum; 861 862 dbg_rcvry("LEB %d", lnum); 863 next_lnum = lnum + 1; 864 if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs) 865 next_lnum = UBIFS_LOG_LNUM; 866 if (next_lnum != c->ltail_lnum) { 867 /* 868 * We can only recover at the end of the log, so check that the 869 * next log LEB is empty or out of date. 870 */ 871 sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0); 872 if (IS_ERR(sleb)) 873 return sleb; 874 if (sleb->nodes_cnt) { 875 struct ubifs_scan_node *snod; 876 unsigned long long cs_sqnum = c->cs_sqnum; 877 878 snod = list_entry(sleb->nodes.next, 879 struct ubifs_scan_node, list); 880 if (cs_sqnum == 0) { 881 int err; 882 883 err = get_cs_sqnum(c, lnum, offs, &cs_sqnum); 884 if (err) { 885 ubifs_scan_destroy(sleb); 886 return ERR_PTR(err); 887 } 888 } 889 if (snod->sqnum > cs_sqnum) { 890 ubifs_err(c, "unrecoverable log corruption in LEB %d", 891 lnum); 892 ubifs_scan_destroy(sleb); 893 return ERR_PTR(-EUCLEAN); 894 } 895 } 896 ubifs_scan_destroy(sleb); 897 } 898 return ubifs_recover_leb(c, lnum, offs, sbuf, -1); 899 } 900 901 /** 902 * recover_head - recover a head. 903 * @c: UBIFS file-system description object 904 * @lnum: LEB number of head to recover 905 * @offs: offset of head to recover 906 * @sbuf: LEB-sized buffer to use 907 * 908 * This function ensures that there is no data on the flash at a head location. 909 * 910 * This function returns %0 on success and a negative error code on failure. 911 */ 912 static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) 913 { 914 int len = c->max_write_size, err; 915 916 if (offs + len > c->leb_size) 917 len = c->leb_size - offs; 918 919 if (!len) 920 return 0; 921 922 /* Read at the head location and check it is empty flash */ 923 err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1); 924 if (err || !is_empty(sbuf, len)) { 925 dbg_rcvry("cleaning head at %d:%d", lnum, offs); 926 if (offs == 0) 927 return ubifs_leb_unmap(c, lnum); 928 err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1); 929 if (err) 930 return err; 931 return ubifs_leb_change(c, lnum, sbuf, offs); 932 } 933 934 return 0; 935 } 936 937 /** 938 * ubifs_recover_inl_heads - recover index and LPT heads. 939 * @c: UBIFS file-system description object 940 * @sbuf: LEB-sized buffer to use 941 * 942 * This function ensures that there is no data on the flash at the index and 943 * LPT head locations. 944 * 945 * This deals with the recovery of a half-completed journal commit. UBIFS is 946 * careful never to overwrite the last version of the index or the LPT. Because 947 * the index and LPT are wandering trees, data from a half-completed commit will 948 * not be referenced anywhere in UBIFS. The data will be either in LEBs that are 949 * assumed to be empty and will be unmapped anyway before use, or in the index 950 * and LPT heads. 951 * 952 * This function returns %0 on success and a negative error code on failure. 953 */ 954 int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) 955 { 956 int err; 957 958 ubifs_assert(c, !c->ro_mount || c->remounting_rw); 959 960 dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); 961 err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); 962 if (err) 963 return err; 964 965 dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs); 966 967 return recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf); 968 } 969 970 /** 971 * clean_an_unclean_leb - read and write a LEB to remove corruption. 972 * @c: UBIFS file-system description object 973 * @ucleb: unclean LEB information 974 * @sbuf: LEB-sized buffer to use 975 * 976 * This function reads a LEB up to a point pre-determined by the mount recovery, 977 * checks the nodes, and writes the result back to the flash, thereby cleaning 978 * off any following corruption, or non-fatal ECC errors. 979 * 980 * This function returns %0 on success and a negative error code on failure. 981 */ 982 static int clean_an_unclean_leb(struct ubifs_info *c, 983 struct ubifs_unclean_leb *ucleb, void *sbuf) 984 { 985 int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; 986 void *buf = sbuf; 987 988 dbg_rcvry("LEB %d len %d", lnum, len); 989 990 if (len == 0) { 991 /* Nothing to read, just unmap it */ 992 return ubifs_leb_unmap(c, lnum); 993 } 994 995 err = ubifs_leb_read(c, lnum, buf, offs, len, 0); 996 if (err && err != -EBADMSG) 997 return err; 998 999 while (len >= 8) { 1000 int ret; 1001 1002 cond_resched(); 1003 1004 /* Scan quietly until there is an error */ 1005 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); 1006 1007 if (ret == SCANNED_A_NODE) { 1008 /* A valid node, and not a padding node */ 1009 struct ubifs_ch *ch = buf; 1010 int node_len; 1011 1012 node_len = ALIGN(le32_to_cpu(ch->len), 8); 1013 offs += node_len; 1014 buf += node_len; 1015 len -= node_len; 1016 continue; 1017 } 1018 1019 if (ret > 0) { 1020 /* Padding bytes or a valid padding node */ 1021 offs += ret; 1022 buf += ret; 1023 len -= ret; 1024 continue; 1025 } 1026 1027 if (ret == SCANNED_EMPTY_SPACE) { 1028 ubifs_err(c, "unexpected empty space at %d:%d", 1029 lnum, offs); 1030 return -EUCLEAN; 1031 } 1032 1033 if (quiet) { 1034 /* Redo the last scan but noisily */ 1035 quiet = 0; 1036 continue; 1037 } 1038 1039 ubifs_scanned_corruption(c, lnum, offs, buf); 1040 return -EUCLEAN; 1041 } 1042 1043 /* Pad to min_io_size */ 1044 len = ALIGN(ucleb->endpt, c->min_io_size); 1045 if (len > ucleb->endpt) { 1046 int pad_len = len - ALIGN(ucleb->endpt, 8); 1047 1048 if (pad_len > 0) { 1049 buf = c->sbuf + len - pad_len; 1050 ubifs_pad(c, buf, pad_len); 1051 } 1052 } 1053 1054 /* Write back the LEB atomically */ 1055 err = ubifs_leb_change(c, lnum, sbuf, len); 1056 if (err) 1057 return err; 1058 1059 dbg_rcvry("cleaned LEB %d", lnum); 1060 1061 return 0; 1062 } 1063 1064 /** 1065 * ubifs_clean_lebs - clean LEBs recovered during read-only mount. 1066 * @c: UBIFS file-system description object 1067 * @sbuf: LEB-sized buffer to use 1068 * 1069 * This function cleans a LEB identified during recovery that needs to be 1070 * written but was not because UBIFS was mounted read-only. This happens when 1071 * remounting to read-write mode. 1072 * 1073 * This function returns %0 on success and a negative error code on failure. 1074 */ 1075 int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf) 1076 { 1077 dbg_rcvry("recovery"); 1078 while (!list_empty(&c->unclean_leb_list)) { 1079 struct ubifs_unclean_leb *ucleb; 1080 int err; 1081 1082 ucleb = list_entry(c->unclean_leb_list.next, 1083 struct ubifs_unclean_leb, list); 1084 err = clean_an_unclean_leb(c, ucleb, sbuf); 1085 if (err) 1086 return err; 1087 list_del(&ucleb->list); 1088 kfree(ucleb); 1089 } 1090 return 0; 1091 } 1092 1093 /** 1094 * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit. 1095 * @c: UBIFS file-system description object 1096 * 1097 * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty 1098 * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns 1099 * zero in case of success and a negative error code in case of failure. 1100 */ 1101 static int grab_empty_leb(struct ubifs_info *c) 1102 { 1103 int lnum, err; 1104 1105 /* 1106 * Note, it is very important to first search for an empty LEB and then 1107 * run the commit, not vice-versa. The reason is that there might be 1108 * only one empty LEB at the moment, the one which has been the 1109 * @c->gc_lnum just before the power cut happened. During the regular 1110 * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no 1111 * one but GC can grab it. But at this moment this single empty LEB is 1112 * not marked as taken, so if we run commit - what happens? Right, the 1113 * commit will grab it and write the index there. Remember that the 1114 * index always expands as long as there is free space, and it only 1115 * starts consolidating when we run out of space. 1116 * 1117 * IOW, if we run commit now, we might not be able to find a free LEB 1118 * after this. 1119 */ 1120 lnum = ubifs_find_free_leb_for_idx(c); 1121 if (lnum < 0) { 1122 ubifs_err(c, "could not find an empty LEB"); 1123 ubifs_dump_lprops(c); 1124 ubifs_dump_budg(c, &c->bi); 1125 return lnum; 1126 } 1127 1128 /* Reset the index flag */ 1129 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, 1130 LPROPS_INDEX, 0); 1131 if (err) 1132 return err; 1133 1134 c->gc_lnum = lnum; 1135 dbg_rcvry("found empty LEB %d, run commit", lnum); 1136 1137 return ubifs_run_commit(c); 1138 } 1139 1140 /** 1141 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. 1142 * @c: UBIFS file-system description object 1143 * 1144 * Out-of-place garbage collection requires always one empty LEB with which to 1145 * start garbage collection. The LEB number is recorded in c->gc_lnum and is 1146 * written to the master node on unmounting. In the case of an unclean unmount 1147 * the value of gc_lnum recorded in the master node is out of date and cannot 1148 * be used. Instead, recovery must allocate an empty LEB for this purpose. 1149 * However, there may not be enough empty space, in which case it must be 1150 * possible to GC the dirtiest LEB into the GC head LEB. 1151 * 1152 * This function also runs the commit which causes the TNC updates from 1153 * size-recovery and orphans to be written to the flash. That is important to 1154 * ensure correct replay order for subsequent mounts. 1155 * 1156 * This function returns %0 on success and a negative error code on failure. 1157 */ 1158 int ubifs_rcvry_gc_commit(struct ubifs_info *c) 1159 { 1160 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; 1161 struct ubifs_lprops lp; 1162 int err; 1163 1164 dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs); 1165 1166 c->gc_lnum = -1; 1167 if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) 1168 return grab_empty_leb(c); 1169 1170 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); 1171 if (err) { 1172 if (err != -ENOSPC) 1173 return err; 1174 1175 dbg_rcvry("could not find a dirty LEB"); 1176 return grab_empty_leb(c); 1177 } 1178 1179 ubifs_assert(c, !(lp.flags & LPROPS_INDEX)); 1180 ubifs_assert(c, lp.free + lp.dirty >= wbuf->offs); 1181 1182 /* 1183 * We run the commit before garbage collection otherwise subsequent 1184 * mounts will see the GC and orphan deletion in a different order. 1185 */ 1186 dbg_rcvry("committing"); 1187 err = ubifs_run_commit(c); 1188 if (err) 1189 return err; 1190 1191 dbg_rcvry("GC'ing LEB %d", lp.lnum); 1192 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 1193 err = ubifs_garbage_collect_leb(c, &lp); 1194 if (err >= 0) { 1195 int err2 = ubifs_wbuf_sync_nolock(wbuf); 1196 1197 if (err2) 1198 err = err2; 1199 } 1200 mutex_unlock(&wbuf->io_mutex); 1201 if (err < 0) { 1202 ubifs_err(c, "GC failed, error %d", err); 1203 if (err == -EAGAIN) 1204 err = -EINVAL; 1205 return err; 1206 } 1207 1208 ubifs_assert(c, err == LEB_RETAINED); 1209 if (err != LEB_RETAINED) 1210 return -EINVAL; 1211 1212 err = ubifs_leb_unmap(c, c->gc_lnum); 1213 if (err) 1214 return err; 1215 1216 dbg_rcvry("allocated LEB %d for GC", lp.lnum); 1217 return 0; 1218 } 1219 1220 /** 1221 * struct size_entry - inode size information for recovery. 1222 * @rb: link in the RB-tree of sizes 1223 * @inum: inode number 1224 * @i_size: size on inode 1225 * @d_size: maximum size based on data nodes 1226 * @exists: indicates whether the inode exists 1227 * @inode: inode if pinned in memory awaiting rw mode to fix it 1228 */ 1229 struct size_entry { 1230 struct rb_node rb; 1231 ino_t inum; 1232 loff_t i_size; 1233 loff_t d_size; 1234 int exists; 1235 struct inode *inode; 1236 }; 1237 1238 /** 1239 * add_ino - add an entry to the size tree. 1240 * @c: UBIFS file-system description object 1241 * @inum: inode number 1242 * @i_size: size on inode 1243 * @d_size: maximum size based on data nodes 1244 * @exists: indicates whether the inode exists 1245 */ 1246 static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size, 1247 loff_t d_size, int exists) 1248 { 1249 struct rb_node **p = &c->size_tree.rb_node, *parent = NULL; 1250 struct size_entry *e; 1251 1252 while (*p) { 1253 parent = *p; 1254 e = rb_entry(parent, struct size_entry, rb); 1255 if (inum < e->inum) 1256 p = &(*p)->rb_left; 1257 else 1258 p = &(*p)->rb_right; 1259 } 1260 1261 e = kzalloc(sizeof(struct size_entry), GFP_KERNEL); 1262 if (!e) 1263 return -ENOMEM; 1264 1265 e->inum = inum; 1266 e->i_size = i_size; 1267 e->d_size = d_size; 1268 e->exists = exists; 1269 1270 rb_link_node(&e->rb, parent, p); 1271 rb_insert_color(&e->rb, &c->size_tree); 1272 1273 return 0; 1274 } 1275 1276 /** 1277 * find_ino - find an entry on the size tree. 1278 * @c: UBIFS file-system description object 1279 * @inum: inode number 1280 */ 1281 static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum) 1282 { 1283 struct rb_node *p = c->size_tree.rb_node; 1284 struct size_entry *e; 1285 1286 while (p) { 1287 e = rb_entry(p, struct size_entry, rb); 1288 if (inum < e->inum) 1289 p = p->rb_left; 1290 else if (inum > e->inum) 1291 p = p->rb_right; 1292 else 1293 return e; 1294 } 1295 return NULL; 1296 } 1297 1298 /** 1299 * remove_ino - remove an entry from the size tree. 1300 * @c: UBIFS file-system description object 1301 * @inum: inode number 1302 */ 1303 static void remove_ino(struct ubifs_info *c, ino_t inum) 1304 { 1305 struct size_entry *e = find_ino(c, inum); 1306 1307 if (!e) 1308 return; 1309 rb_erase(&e->rb, &c->size_tree); 1310 kfree(e); 1311 } 1312 1313 /** 1314 * ubifs_destroy_size_tree - free resources related to the size tree. 1315 * @c: UBIFS file-system description object 1316 */ 1317 void ubifs_destroy_size_tree(struct ubifs_info *c) 1318 { 1319 struct size_entry *e, *n; 1320 1321 rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) { 1322 iput(e->inode); 1323 kfree(e); 1324 } 1325 1326 c->size_tree = RB_ROOT; 1327 } 1328 1329 /** 1330 * ubifs_recover_size_accum - accumulate inode sizes for recovery. 1331 * @c: UBIFS file-system description object 1332 * @key: node key 1333 * @deletion: node is for a deletion 1334 * @new_size: inode size 1335 * 1336 * This function has two purposes: 1337 * 1) to ensure there are no data nodes that fall outside the inode size 1338 * 2) to ensure there are no data nodes for inodes that do not exist 1339 * To accomplish those purposes, a rb-tree is constructed containing an entry 1340 * for each inode number in the journal that has not been deleted, and recording 1341 * the size from the inode node, the maximum size of any data node (also altered 1342 * by truncations) and a flag indicating a inode number for which no inode node 1343 * was present in the journal. 1344 * 1345 * Note that there is still the possibility that there are data nodes that have 1346 * been committed that are beyond the inode size, however the only way to find 1347 * them would be to scan the entire index. Alternatively, some provision could 1348 * be made to record the size of inodes at the start of commit, which would seem 1349 * very cumbersome for a scenario that is quite unlikely and the only negative 1350 * consequence of which is wasted space. 1351 * 1352 * This functions returns %0 on success and a negative error code on failure. 1353 */ 1354 int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, 1355 int deletion, loff_t new_size) 1356 { 1357 ino_t inum = key_inum(c, key); 1358 struct size_entry *e; 1359 int err; 1360 1361 switch (key_type(c, key)) { 1362 case UBIFS_INO_KEY: 1363 if (deletion) 1364 remove_ino(c, inum); 1365 else { 1366 e = find_ino(c, inum); 1367 if (e) { 1368 e->i_size = new_size; 1369 e->exists = 1; 1370 } else { 1371 err = add_ino(c, inum, new_size, 0, 1); 1372 if (err) 1373 return err; 1374 } 1375 } 1376 break; 1377 case UBIFS_DATA_KEY: 1378 e = find_ino(c, inum); 1379 if (e) { 1380 if (new_size > e->d_size) 1381 e->d_size = new_size; 1382 } else { 1383 err = add_ino(c, inum, 0, new_size, 0); 1384 if (err) 1385 return err; 1386 } 1387 break; 1388 case UBIFS_TRUN_KEY: 1389 e = find_ino(c, inum); 1390 if (e) 1391 e->d_size = new_size; 1392 break; 1393 } 1394 return 0; 1395 } 1396 1397 /** 1398 * fix_size_in_place - fix inode size in place on flash. 1399 * @c: UBIFS file-system description object 1400 * @e: inode size information for recovery 1401 */ 1402 static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) 1403 { 1404 struct ubifs_ino_node *ino = c->sbuf; 1405 unsigned char *p; 1406 union ubifs_key key; 1407 int err, lnum, offs, len; 1408 loff_t i_size; 1409 uint32_t crc; 1410 1411 /* Locate the inode node LEB number and offset */ 1412 ino_key_init(c, &key, e->inum); 1413 err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs); 1414 if (err) 1415 goto out; 1416 /* 1417 * If the size recorded on the inode node is greater than the size that 1418 * was calculated from nodes in the journal then don't change the inode. 1419 */ 1420 i_size = le64_to_cpu(ino->size); 1421 if (i_size >= e->d_size) 1422 return 0; 1423 /* Read the LEB */ 1424 err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1); 1425 if (err) 1426 goto out; 1427 /* Change the size field and recalculate the CRC */ 1428 ino = c->sbuf + offs; 1429 ino->size = cpu_to_le64(e->d_size); 1430 len = le32_to_cpu(ino->ch.len); 1431 crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8); 1432 ino->ch.crc = cpu_to_le32(crc); 1433 /* Work out where data in the LEB ends and free space begins */ 1434 p = c->sbuf; 1435 len = c->leb_size - 1; 1436 while (p[len] == 0xff) 1437 len -= 1; 1438 len = ALIGN(len + 1, c->min_io_size); 1439 /* Atomically write the fixed LEB back again */ 1440 err = ubifs_leb_change(c, lnum, c->sbuf, len); 1441 if (err) 1442 goto out; 1443 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", 1444 (unsigned long)e->inum, lnum, offs, i_size, e->d_size); 1445 return 0; 1446 1447 out: 1448 ubifs_warn(c, "inode %lu failed to fix size %lld -> %lld error %d", 1449 (unsigned long)e->inum, e->i_size, e->d_size, err); 1450 return err; 1451 } 1452 1453 /** 1454 * inode_fix_size - fix inode size 1455 * @c: UBIFS file-system description object 1456 * @e: inode size information for recovery 1457 */ 1458 static int inode_fix_size(struct ubifs_info *c, struct size_entry *e) 1459 { 1460 struct inode *inode; 1461 struct ubifs_inode *ui; 1462 int err; 1463 1464 if (c->ro_mount) 1465 ubifs_assert(c, !e->inode); 1466 1467 if (e->inode) { 1468 /* Remounting rw, pick up inode we stored earlier */ 1469 inode = e->inode; 1470 } else { 1471 inode = ubifs_iget(c->vfs_sb, e->inum); 1472 if (IS_ERR(inode)) 1473 return PTR_ERR(inode); 1474 1475 if (inode->i_size >= e->d_size) { 1476 /* 1477 * The original inode in the index already has a size 1478 * big enough, nothing to do 1479 */ 1480 iput(inode); 1481 return 0; 1482 } 1483 1484 dbg_rcvry("ino %lu size %lld -> %lld", 1485 (unsigned long)e->inum, 1486 inode->i_size, e->d_size); 1487 1488 ui = ubifs_inode(inode); 1489 1490 inode->i_size = e->d_size; 1491 ui->ui_size = e->d_size; 1492 ui->synced_i_size = e->d_size; 1493 1494 e->inode = inode; 1495 } 1496 1497 /* 1498 * In readonly mode just keep the inode pinned in memory until we go 1499 * readwrite. In readwrite mode write the inode to the journal with the 1500 * fixed size. 1501 */ 1502 if (c->ro_mount) 1503 return 0; 1504 1505 err = ubifs_jnl_write_inode(c, inode); 1506 1507 iput(inode); 1508 1509 if (err) 1510 return err; 1511 1512 rb_erase(&e->rb, &c->size_tree); 1513 kfree(e); 1514 1515 return 0; 1516 } 1517 1518 /** 1519 * ubifs_recover_size - recover inode size. 1520 * @c: UBIFS file-system description object 1521 * @in_place: If true, do a in-place size fixup 1522 * 1523 * This function attempts to fix inode size discrepancies identified by the 1524 * 'ubifs_recover_size_accum()' function. 1525 * 1526 * This functions returns %0 on success and a negative error code on failure. 1527 */ 1528 int ubifs_recover_size(struct ubifs_info *c, bool in_place) 1529 { 1530 struct rb_node *this = rb_first(&c->size_tree); 1531 1532 while (this) { 1533 struct size_entry *e; 1534 int err; 1535 1536 e = rb_entry(this, struct size_entry, rb); 1537 1538 this = rb_next(this); 1539 1540 if (!e->exists) { 1541 union ubifs_key key; 1542 1543 ino_key_init(c, &key, e->inum); 1544 err = ubifs_tnc_lookup(c, &key, c->sbuf); 1545 if (err && err != -ENOENT) 1546 return err; 1547 if (err == -ENOENT) { 1548 /* Remove data nodes that have no inode */ 1549 dbg_rcvry("removing ino %lu", 1550 (unsigned long)e->inum); 1551 err = ubifs_tnc_remove_ino(c, e->inum); 1552 if (err) 1553 return err; 1554 } else { 1555 struct ubifs_ino_node *ino = c->sbuf; 1556 1557 e->exists = 1; 1558 e->i_size = le64_to_cpu(ino->size); 1559 } 1560 } 1561 1562 if (e->exists && e->i_size < e->d_size) { 1563 ubifs_assert(c, !(c->ro_mount && in_place)); 1564 1565 /* 1566 * We found data that is outside the found inode size, 1567 * fixup the inode size 1568 */ 1569 1570 if (in_place) { 1571 err = fix_size_in_place(c, e); 1572 if (err) 1573 return err; 1574 iput(e->inode); 1575 } else { 1576 err = inode_fix_size(c, e); 1577 if (err) 1578 return err; 1579 continue; 1580 } 1581 } 1582 1583 rb_erase(&e->rb, &c->size_tree); 1584 kfree(e); 1585 } 1586 1587 return 0; 1588 } 1589