1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) International Business Machines Corp., 2006 4 * 5 * Author: Artem Bityutskiy (Битюцкий Артём) 6 */ 7 8 /* 9 * The UBI Eraseblock Association (EBA) sub-system. 10 * 11 * This sub-system is responsible for I/O to/from logical eraseblock. 12 * 13 * Although in this implementation the EBA table is fully kept and managed in 14 * RAM, which assumes poor scalability, it might be (partially) maintained on 15 * flash in future implementations. 16 * 17 * The EBA sub-system implements per-logical eraseblock locking. Before 18 * accessing a logical eraseblock it is locked for reading or writing. The 19 * per-logical eraseblock locking is implemented by means of the lock tree. The 20 * lock tree is an RB-tree which refers all the currently locked logical 21 * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects. 22 * They are indexed by (@vol_id, @lnum) pairs. 23 * 24 * EBA also maintains the global sequence counter which is incremented each 25 * time a logical eraseblock is mapped to a physical eraseblock and it is 26 * stored in the volume identifier header. This means that each VID header has 27 * a unique sequence number. The sequence number is only increased an we assume 28 * 64 bits is enough to never overflow. 29 */ 30 31 #include <linux/slab.h> 32 #include <linux/crc32.h> 33 #include <linux/err.h> 34 #include "ubi.h" 35 36 /* Number of physical eraseblocks reserved for atomic LEB change operation */ 37 #define EBA_RESERVED_PEBS 1 38 39 /** 40 * struct ubi_eba_entry - structure encoding a single LEB -> PEB association 41 * @pnum: the physical eraseblock number attached to the LEB 42 * 43 * This structure is encoding a LEB -> PEB association. Note that the LEB 44 * number is not stored here, because it is the index used to access the 45 * entries table. 46 */ 47 struct ubi_eba_entry { 48 int pnum; 49 }; 50 51 /** 52 * struct ubi_eba_table - LEB -> PEB association information 53 * @entries: the LEB to PEB mapping (one entry per LEB). 54 * 55 * This structure is private to the EBA logic and should be kept here. 56 * It is encoding the LEB to PEB association table, and is subject to 57 * changes. 58 */ 59 struct ubi_eba_table { 60 struct ubi_eba_entry *entries; 61 }; 62 63 /** 64 * next_sqnum - get next sequence number. 65 * @ubi: UBI device description object 66 * 67 * This function returns next sequence number to use, which is just the current 68 * global sequence counter value. It also increases the global sequence 69 * counter. 70 */ 71 unsigned long long ubi_next_sqnum(struct ubi_device *ubi) 72 { 73 unsigned long long sqnum; 74 75 spin_lock(&ubi->ltree_lock); 76 sqnum = ubi->global_sqnum++; 77 spin_unlock(&ubi->ltree_lock); 78 79 return sqnum; 80 } 81 82 /** 83 * ubi_get_compat - get compatibility flags of a volume. 84 * @ubi: UBI device description object 85 * @vol_id: volume ID 86 * 87 * This function returns compatibility flags for an internal volume. User 88 * volumes have no compatibility flags, so %0 is returned. 89 */ 90 static int ubi_get_compat(const struct ubi_device *ubi, int vol_id) 91 { 92 if (vol_id == UBI_LAYOUT_VOLUME_ID) 93 return UBI_LAYOUT_VOLUME_COMPAT; 94 return 0; 95 } 96 97 /** 98 * ubi_eba_get_ldesc - get information about a LEB 99 * @vol: volume description object 100 * @lnum: logical eraseblock number 101 * @ldesc: the LEB descriptor to fill 102 * 103 * Used to query information about a specific LEB. 104 * It is currently only returning the physical position of the LEB, but will be 105 * extended to provide more information. 106 */ 107 void ubi_eba_get_ldesc(struct ubi_volume *vol, int lnum, 108 struct ubi_eba_leb_desc *ldesc) 109 { 110 ldesc->lnum = lnum; 111 ldesc->pnum = vol->eba_tbl->entries[lnum].pnum; 112 } 113 114 /** 115 * ubi_eba_create_table - allocate a new EBA table and initialize it with all 116 * LEBs unmapped 117 * @vol: volume containing the EBA table to copy 118 * @nentries: number of entries in the table 119 * 120 * Allocate a new EBA table and initialize it with all LEBs unmapped. 121 * Returns a valid pointer if it succeed, an ERR_PTR() otherwise. 122 */ 123 struct ubi_eba_table *ubi_eba_create_table(struct ubi_volume *vol, 124 int nentries) 125 { 126 struct ubi_eba_table *tbl; 127 int err = -ENOMEM; 128 int i; 129 130 tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); 131 if (!tbl) 132 return ERR_PTR(-ENOMEM); 133 134 tbl->entries = kmalloc_array(nentries, sizeof(*tbl->entries), 135 GFP_KERNEL); 136 if (!tbl->entries) 137 goto err; 138 139 for (i = 0; i < nentries; i++) 140 tbl->entries[i].pnum = UBI_LEB_UNMAPPED; 141 142 return tbl; 143 144 err: 145 kfree(tbl->entries); 146 kfree(tbl); 147 148 return ERR_PTR(err); 149 } 150 151 /** 152 * ubi_eba_destroy_table - destroy an EBA table 153 * @tbl: the table to destroy 154 * 155 * Destroy an EBA table. 156 */ 157 void ubi_eba_destroy_table(struct ubi_eba_table *tbl) 158 { 159 if (!tbl) 160 return; 161 162 kfree(tbl->entries); 163 kfree(tbl); 164 } 165 166 /** 167 * ubi_eba_copy_table - copy the EBA table attached to vol into another table 168 * @vol: volume containing the EBA table to copy 169 * @dst: destination 170 * @nentries: number of entries to copy 171 * 172 * Copy the EBA table stored in vol into the one pointed by dst. 173 */ 174 void ubi_eba_copy_table(struct ubi_volume *vol, struct ubi_eba_table *dst, 175 int nentries) 176 { 177 struct ubi_eba_table *src; 178 int i; 179 180 ubi_assert(dst && vol && vol->eba_tbl); 181 182 src = vol->eba_tbl; 183 184 for (i = 0; i < nentries; i++) 185 dst->entries[i].pnum = src->entries[i].pnum; 186 } 187 188 /** 189 * ubi_eba_replace_table - assign a new EBA table to a volume 190 * @vol: volume containing the EBA table to copy 191 * @tbl: new EBA table 192 * 193 * Assign a new EBA table to the volume and release the old one. 194 */ 195 void ubi_eba_replace_table(struct ubi_volume *vol, struct ubi_eba_table *tbl) 196 { 197 ubi_eba_destroy_table(vol->eba_tbl); 198 vol->eba_tbl = tbl; 199 } 200 201 /** 202 * ltree_lookup - look up the lock tree. 203 * @ubi: UBI device description object 204 * @vol_id: volume ID 205 * @lnum: logical eraseblock number 206 * 207 * This function returns a pointer to the corresponding &struct ubi_ltree_entry 208 * object if the logical eraseblock is locked and %NULL if it is not. 209 * @ubi->ltree_lock has to be locked. 210 */ 211 static struct ubi_ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id, 212 int lnum) 213 { 214 struct rb_node *p; 215 216 p = ubi->ltree.rb_node; 217 while (p) { 218 struct ubi_ltree_entry *le; 219 220 le = rb_entry(p, struct ubi_ltree_entry, rb); 221 222 if (vol_id < le->vol_id) 223 p = p->rb_left; 224 else if (vol_id > le->vol_id) 225 p = p->rb_right; 226 else { 227 if (lnum < le->lnum) 228 p = p->rb_left; 229 else if (lnum > le->lnum) 230 p = p->rb_right; 231 else 232 return le; 233 } 234 } 235 236 return NULL; 237 } 238 239 /** 240 * ltree_add_entry - add new entry to the lock tree. 241 * @ubi: UBI device description object 242 * @vol_id: volume ID 243 * @lnum: logical eraseblock number 244 * 245 * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the 246 * lock tree. If such entry is already there, its usage counter is increased. 247 * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation 248 * failed. 249 */ 250 static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi, 251 int vol_id, int lnum) 252 { 253 struct ubi_ltree_entry *le, *le1, *le_free; 254 255 le = kmalloc(sizeof(struct ubi_ltree_entry), GFP_NOFS); 256 if (!le) 257 return ERR_PTR(-ENOMEM); 258 259 le->users = 0; 260 init_rwsem(&le->mutex); 261 le->vol_id = vol_id; 262 le->lnum = lnum; 263 264 spin_lock(&ubi->ltree_lock); 265 le1 = ltree_lookup(ubi, vol_id, lnum); 266 267 if (le1) { 268 /* 269 * This logical eraseblock is already locked. The newly 270 * allocated lock entry is not needed. 271 */ 272 le_free = le; 273 le = le1; 274 } else { 275 struct rb_node **p, *parent = NULL; 276 277 /* 278 * No lock entry, add the newly allocated one to the 279 * @ubi->ltree RB-tree. 280 */ 281 le_free = NULL; 282 283 p = &ubi->ltree.rb_node; 284 while (*p) { 285 parent = *p; 286 le1 = rb_entry(parent, struct ubi_ltree_entry, rb); 287 288 if (vol_id < le1->vol_id) 289 p = &(*p)->rb_left; 290 else if (vol_id > le1->vol_id) 291 p = &(*p)->rb_right; 292 else { 293 ubi_assert(lnum != le1->lnum); 294 if (lnum < le1->lnum) 295 p = &(*p)->rb_left; 296 else 297 p = &(*p)->rb_right; 298 } 299 } 300 301 rb_link_node(&le->rb, parent, p); 302 rb_insert_color(&le->rb, &ubi->ltree); 303 } 304 le->users += 1; 305 spin_unlock(&ubi->ltree_lock); 306 307 kfree(le_free); 308 return le; 309 } 310 311 /** 312 * leb_read_lock - lock logical eraseblock for reading. 313 * @ubi: UBI device description object 314 * @vol_id: volume ID 315 * @lnum: logical eraseblock number 316 * 317 * This function locks a logical eraseblock for reading. Returns zero in case 318 * of success and a negative error code in case of failure. 319 */ 320 static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum) 321 { 322 struct ubi_ltree_entry *le; 323 324 le = ltree_add_entry(ubi, vol_id, lnum); 325 if (IS_ERR(le)) 326 return PTR_ERR(le); 327 down_read(&le->mutex); 328 return 0; 329 } 330 331 /** 332 * leb_read_unlock - unlock logical eraseblock. 333 * @ubi: UBI device description object 334 * @vol_id: volume ID 335 * @lnum: logical eraseblock number 336 */ 337 static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) 338 { 339 struct ubi_ltree_entry *le; 340 341 spin_lock(&ubi->ltree_lock); 342 le = ltree_lookup(ubi, vol_id, lnum); 343 le->users -= 1; 344 ubi_assert(le->users >= 0); 345 up_read(&le->mutex); 346 if (le->users == 0) { 347 rb_erase(&le->rb, &ubi->ltree); 348 kfree(le); 349 } 350 spin_unlock(&ubi->ltree_lock); 351 } 352 353 /** 354 * leb_write_lock - lock logical eraseblock for writing. 355 * @ubi: UBI device description object 356 * @vol_id: volume ID 357 * @lnum: logical eraseblock number 358 * 359 * This function locks a logical eraseblock for writing. Returns zero in case 360 * of success and a negative error code in case of failure. 361 */ 362 static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) 363 { 364 struct ubi_ltree_entry *le; 365 366 le = ltree_add_entry(ubi, vol_id, lnum); 367 if (IS_ERR(le)) 368 return PTR_ERR(le); 369 down_write(&le->mutex); 370 return 0; 371 } 372 373 /** 374 * leb_write_trylock - try to lock logical eraseblock for writing. 375 * @ubi: UBI device description object 376 * @vol_id: volume ID 377 * @lnum: logical eraseblock number 378 * 379 * This function locks a logical eraseblock for writing if there is no 380 * contention and does nothing if there is contention. Returns %0 in case of 381 * success, %1 in case of contention, and and a negative error code in case of 382 * failure. 383 */ 384 static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) 385 { 386 struct ubi_ltree_entry *le; 387 388 le = ltree_add_entry(ubi, vol_id, lnum); 389 if (IS_ERR(le)) 390 return PTR_ERR(le); 391 if (down_write_trylock(&le->mutex)) 392 return 0; 393 394 /* Contention, cancel */ 395 spin_lock(&ubi->ltree_lock); 396 le->users -= 1; 397 ubi_assert(le->users >= 0); 398 if (le->users == 0) { 399 rb_erase(&le->rb, &ubi->ltree); 400 kfree(le); 401 } 402 spin_unlock(&ubi->ltree_lock); 403 404 return 1; 405 } 406 407 /** 408 * leb_write_unlock - unlock logical eraseblock. 409 * @ubi: UBI device description object 410 * @vol_id: volume ID 411 * @lnum: logical eraseblock number 412 */ 413 static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) 414 { 415 struct ubi_ltree_entry *le; 416 417 spin_lock(&ubi->ltree_lock); 418 le = ltree_lookup(ubi, vol_id, lnum); 419 le->users -= 1; 420 ubi_assert(le->users >= 0); 421 up_write(&le->mutex); 422 if (le->users == 0) { 423 rb_erase(&le->rb, &ubi->ltree); 424 kfree(le); 425 } 426 spin_unlock(&ubi->ltree_lock); 427 } 428 429 /** 430 * ubi_eba_is_mapped - check if a LEB is mapped. 431 * @vol: volume description object 432 * @lnum: logical eraseblock number 433 * 434 * This function returns true if the LEB is mapped, false otherwise. 435 */ 436 bool ubi_eba_is_mapped(struct ubi_volume *vol, int lnum) 437 { 438 return vol->eba_tbl->entries[lnum].pnum >= 0; 439 } 440 441 /** 442 * ubi_eba_unmap_leb - un-map logical eraseblock. 443 * @ubi: UBI device description object 444 * @vol: volume description object 445 * @lnum: logical eraseblock number 446 * 447 * This function un-maps logical eraseblock @lnum and schedules corresponding 448 * physical eraseblock for erasure. Returns zero in case of success and a 449 * negative error code in case of failure. 450 */ 451 int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, 452 int lnum) 453 { 454 int err, pnum, vol_id = vol->vol_id; 455 456 if (ubi->ro_mode) 457 return -EROFS; 458 459 err = leb_write_lock(ubi, vol_id, lnum); 460 if (err) 461 return err; 462 463 pnum = vol->eba_tbl->entries[lnum].pnum; 464 if (pnum < 0) 465 /* This logical eraseblock is already unmapped */ 466 goto out_unlock; 467 468 dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum); 469 470 down_read(&ubi->fm_eba_sem); 471 vol->eba_tbl->entries[lnum].pnum = UBI_LEB_UNMAPPED; 472 up_read(&ubi->fm_eba_sem); 473 err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 0); 474 475 out_unlock: 476 leb_write_unlock(ubi, vol_id, lnum); 477 return err; 478 } 479 480 #ifdef CONFIG_MTD_UBI_FASTMAP 481 /** 482 * check_mapping - check and fixup a mapping 483 * @ubi: UBI device description object 484 * @vol: volume description object 485 * @lnum: logical eraseblock number 486 * @pnum: physical eraseblock number 487 * 488 * Checks whether a given mapping is valid. Fastmap cannot track LEB unmap 489 * operations, if such an operation is interrupted the mapping still looks 490 * good, but upon first read an ECC is reported to the upper layer. 491 * Normaly during the full-scan at attach time this is fixed, for Fastmap 492 * we have to deal with it while reading. 493 * If the PEB behind a LEB shows this symthom we change the mapping to 494 * %UBI_LEB_UNMAPPED and schedule the PEB for erasure. 495 * 496 * Returns 0 on success, negative error code in case of failure. 497 */ 498 static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, 499 int *pnum) 500 { 501 int err; 502 struct ubi_vid_io_buf *vidb; 503 struct ubi_vid_hdr *vid_hdr; 504 505 if (!ubi->fast_attach) 506 return 0; 507 508 if (!vol->checkmap || test_bit(lnum, vol->checkmap)) 509 return 0; 510 511 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 512 if (!vidb) 513 return -ENOMEM; 514 515 err = ubi_io_read_vid_hdr(ubi, *pnum, vidb, 0); 516 if (err > 0 && err != UBI_IO_BITFLIPS) { 517 int torture = 0; 518 519 switch (err) { 520 case UBI_IO_FF: 521 case UBI_IO_FF_BITFLIPS: 522 case UBI_IO_BAD_HDR: 523 case UBI_IO_BAD_HDR_EBADMSG: 524 break; 525 default: 526 ubi_assert(0); 527 } 528 529 if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_FF_BITFLIPS) 530 torture = 1; 531 532 down_read(&ubi->fm_eba_sem); 533 vol->eba_tbl->entries[lnum].pnum = UBI_LEB_UNMAPPED; 534 up_read(&ubi->fm_eba_sem); 535 ubi_wl_put_peb(ubi, vol->vol_id, lnum, *pnum, torture); 536 537 *pnum = UBI_LEB_UNMAPPED; 538 } else if (err < 0) { 539 ubi_err(ubi, "unable to read VID header back from PEB %i: %i", 540 *pnum, err); 541 542 goto out_free; 543 } else { 544 int found_vol_id, found_lnum; 545 546 ubi_assert(err == 0 || err == UBI_IO_BITFLIPS); 547 548 vid_hdr = ubi_get_vid_hdr(vidb); 549 found_vol_id = be32_to_cpu(vid_hdr->vol_id); 550 found_lnum = be32_to_cpu(vid_hdr->lnum); 551 552 if (found_lnum != lnum || found_vol_id != vol->vol_id) { 553 ubi_err(ubi, "EBA mismatch! PEB %i is LEB %i:%i instead of LEB %i:%i", 554 *pnum, found_vol_id, found_lnum, vol->vol_id, lnum); 555 ubi_ro_mode(ubi); 556 err = -EINVAL; 557 goto out_free; 558 } 559 } 560 561 set_bit(lnum, vol->checkmap); 562 err = 0; 563 564 out_free: 565 ubi_free_vid_buf(vidb); 566 567 return err; 568 } 569 #else 570 static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, 571 int *pnum) 572 { 573 return 0; 574 } 575 #endif 576 577 /** 578 * ubi_eba_read_leb - read data. 579 * @ubi: UBI device description object 580 * @vol: volume description object 581 * @lnum: logical eraseblock number 582 * @buf: buffer to store the read data 583 * @offset: offset from where to read 584 * @len: how many bytes to read 585 * @check: data CRC check flag 586 * 587 * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF 588 * bytes. The @check flag only makes sense for static volumes and forces 589 * eraseblock data CRC checking. 590 * 591 * In case of success this function returns zero. In case of a static volume, 592 * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be 593 * returned for any volume type if an ECC error was detected by the MTD device 594 * driver. Other negative error cored may be returned in case of other errors. 595 */ 596 int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, 597 void *buf, int offset, int len, int check) 598 { 599 int err, pnum, scrub = 0, vol_id = vol->vol_id; 600 struct ubi_vid_io_buf *vidb; 601 struct ubi_vid_hdr *vid_hdr; 602 uint32_t crc; 603 604 err = leb_read_lock(ubi, vol_id, lnum); 605 if (err) 606 return err; 607 608 pnum = vol->eba_tbl->entries[lnum].pnum; 609 if (pnum >= 0) { 610 err = check_mapping(ubi, vol, lnum, &pnum); 611 if (err < 0) 612 goto out_unlock; 613 } 614 615 if (pnum == UBI_LEB_UNMAPPED) { 616 /* 617 * The logical eraseblock is not mapped, fill the whole buffer 618 * with 0xFF bytes. The exception is static volumes for which 619 * it is an error to read unmapped logical eraseblocks. 620 */ 621 dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)", 622 len, offset, vol_id, lnum); 623 leb_read_unlock(ubi, vol_id, lnum); 624 ubi_assert(vol->vol_type != UBI_STATIC_VOLUME); 625 memset(buf, 0xFF, len); 626 return 0; 627 } 628 629 dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d", 630 len, offset, vol_id, lnum, pnum); 631 632 if (vol->vol_type == UBI_DYNAMIC_VOLUME) 633 check = 0; 634 635 retry: 636 if (check) { 637 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 638 if (!vidb) { 639 err = -ENOMEM; 640 goto out_unlock; 641 } 642 643 vid_hdr = ubi_get_vid_hdr(vidb); 644 645 err = ubi_io_read_vid_hdr(ubi, pnum, vidb, 1); 646 if (err && err != UBI_IO_BITFLIPS) { 647 if (err > 0) { 648 /* 649 * The header is either absent or corrupted. 650 * The former case means there is a bug - 651 * switch to read-only mode just in case. 652 * The latter case means a real corruption - we 653 * may try to recover data. FIXME: but this is 654 * not implemented. 655 */ 656 if (err == UBI_IO_BAD_HDR_EBADMSG || 657 err == UBI_IO_BAD_HDR) { 658 ubi_warn(ubi, "corrupted VID header at PEB %d, LEB %d:%d", 659 pnum, vol_id, lnum); 660 err = -EBADMSG; 661 } else { 662 /* 663 * Ending up here in the non-Fastmap case 664 * is a clear bug as the VID header had to 665 * be present at scan time to have it referenced. 666 * With fastmap the story is more complicated. 667 * Fastmap has the mapping info without the need 668 * of a full scan. So the LEB could have been 669 * unmapped, Fastmap cannot know this and keeps 670 * the LEB referenced. 671 * This is valid and works as the layer above UBI 672 * has to do bookkeeping about used/referenced 673 * LEBs in any case. 674 */ 675 if (ubi->fast_attach) { 676 err = -EBADMSG; 677 } else { 678 err = -EINVAL; 679 ubi_ro_mode(ubi); 680 } 681 } 682 } 683 goto out_free; 684 } else if (err == UBI_IO_BITFLIPS) 685 scrub = 1; 686 687 ubi_assert(lnum < be32_to_cpu(vid_hdr->used_ebs)); 688 ubi_assert(len == be32_to_cpu(vid_hdr->data_size)); 689 690 crc = be32_to_cpu(vid_hdr->data_crc); 691 ubi_free_vid_buf(vidb); 692 } 693 694 err = ubi_io_read_data(ubi, buf, pnum, offset, len); 695 if (err) { 696 if (err == UBI_IO_BITFLIPS) 697 scrub = 1; 698 else if (mtd_is_eccerr(err)) { 699 if (vol->vol_type == UBI_DYNAMIC_VOLUME) 700 goto out_unlock; 701 scrub = 1; 702 if (!check) { 703 ubi_msg(ubi, "force data checking"); 704 check = 1; 705 goto retry; 706 } 707 } else 708 goto out_unlock; 709 } 710 711 if (check) { 712 uint32_t crc1 = crc32(UBI_CRC32_INIT, buf, len); 713 if (crc1 != crc) { 714 ubi_warn(ubi, "CRC error: calculated %#08x, must be %#08x", 715 crc1, crc); 716 err = -EBADMSG; 717 goto out_unlock; 718 } 719 } 720 721 if (scrub) 722 err = ubi_wl_scrub_peb(ubi, pnum); 723 724 leb_read_unlock(ubi, vol_id, lnum); 725 return err; 726 727 out_free: 728 ubi_free_vid_buf(vidb); 729 out_unlock: 730 leb_read_unlock(ubi, vol_id, lnum); 731 return err; 732 } 733 734 /** 735 * ubi_eba_read_leb_sg - read data into a scatter gather list. 736 * @ubi: UBI device description object 737 * @vol: volume description object 738 * @lnum: logical eraseblock number 739 * @sgl: UBI scatter gather list to store the read data 740 * @offset: offset from where to read 741 * @len: how many bytes to read 742 * @check: data CRC check flag 743 * 744 * This function works exactly like ubi_eba_read_leb(). But instead of 745 * storing the read data into a buffer it writes to an UBI scatter gather 746 * list. 747 */ 748 int ubi_eba_read_leb_sg(struct ubi_device *ubi, struct ubi_volume *vol, 749 struct ubi_sgl *sgl, int lnum, int offset, int len, 750 int check) 751 { 752 int to_read; 753 int ret; 754 struct scatterlist *sg; 755 756 for (;;) { 757 ubi_assert(sgl->list_pos < UBI_MAX_SG_COUNT); 758 sg = &sgl->sg[sgl->list_pos]; 759 if (len < sg->length - sgl->page_pos) 760 to_read = len; 761 else 762 to_read = sg->length - sgl->page_pos; 763 764 ret = ubi_eba_read_leb(ubi, vol, lnum, 765 sg_virt(sg) + sgl->page_pos, offset, 766 to_read, check); 767 if (ret < 0) 768 return ret; 769 770 offset += to_read; 771 len -= to_read; 772 if (!len) { 773 sgl->page_pos += to_read; 774 if (sgl->page_pos == sg->length) { 775 sgl->list_pos++; 776 sgl->page_pos = 0; 777 } 778 779 break; 780 } 781 782 sgl->list_pos++; 783 sgl->page_pos = 0; 784 } 785 786 return ret; 787 } 788 789 /** 790 * try_recover_peb - try to recover from write failure. 791 * @vol: volume description object 792 * @pnum: the physical eraseblock to recover 793 * @lnum: logical eraseblock number 794 * @buf: data which was not written because of the write failure 795 * @offset: offset of the failed write 796 * @len: how many bytes should have been written 797 * @vidb: VID buffer 798 * @retry: whether the caller should retry in case of failure 799 * 800 * This function is called in case of a write failure and moves all good data 801 * from the potentially bad physical eraseblock to a good physical eraseblock. 802 * This function also writes the data which was not written due to the failure. 803 * Returns 0 in case of success, and a negative error code in case of failure. 804 * In case of failure, the %retry parameter is set to false if this is a fatal 805 * error (retrying won't help), and true otherwise. 806 */ 807 static int try_recover_peb(struct ubi_volume *vol, int pnum, int lnum, 808 const void *buf, int offset, int len, 809 struct ubi_vid_io_buf *vidb, bool *retry) 810 { 811 struct ubi_device *ubi = vol->ubi; 812 struct ubi_vid_hdr *vid_hdr; 813 int new_pnum, err, vol_id = vol->vol_id, data_size; 814 uint32_t crc; 815 816 *retry = false; 817 818 new_pnum = ubi_wl_get_peb(ubi); 819 if (new_pnum < 0) { 820 err = new_pnum; 821 goto out_put; 822 } 823 824 ubi_msg(ubi, "recover PEB %d, move data to PEB %d", 825 pnum, new_pnum); 826 827 err = ubi_io_read_vid_hdr(ubi, pnum, vidb, 1); 828 if (err && err != UBI_IO_BITFLIPS) { 829 if (err > 0) 830 err = -EIO; 831 goto out_put; 832 } 833 834 vid_hdr = ubi_get_vid_hdr(vidb); 835 ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC); 836 837 mutex_lock(&ubi->buf_mutex); 838 memset(ubi->peb_buf + offset, 0xFF, len); 839 840 /* Read everything before the area where the write failure happened */ 841 if (offset > 0) { 842 err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, offset); 843 if (err && err != UBI_IO_BITFLIPS) 844 goto out_unlock; 845 } 846 847 *retry = true; 848 849 memcpy(ubi->peb_buf + offset, buf, len); 850 851 data_size = offset + len; 852 crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size); 853 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 854 vid_hdr->copy_flag = 1; 855 vid_hdr->data_size = cpu_to_be32(data_size); 856 vid_hdr->data_crc = cpu_to_be32(crc); 857 err = ubi_io_write_vid_hdr(ubi, new_pnum, vidb); 858 if (err) 859 goto out_unlock; 860 861 err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size); 862 863 out_unlock: 864 mutex_unlock(&ubi->buf_mutex); 865 866 if (!err) 867 vol->eba_tbl->entries[lnum].pnum = new_pnum; 868 869 out_put: 870 up_read(&ubi->fm_eba_sem); 871 872 if (!err) { 873 ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); 874 ubi_msg(ubi, "data was successfully recovered"); 875 } else if (new_pnum >= 0) { 876 /* 877 * Bad luck? This physical eraseblock is bad too? Crud. Let's 878 * try to get another one. 879 */ 880 ubi_wl_put_peb(ubi, vol_id, lnum, new_pnum, 1); 881 ubi_warn(ubi, "failed to write to PEB %d", new_pnum); 882 } 883 884 return err; 885 } 886 887 /** 888 * recover_peb - recover from write failure. 889 * @ubi: UBI device description object 890 * @pnum: the physical eraseblock to recover 891 * @vol_id: volume ID 892 * @lnum: logical eraseblock number 893 * @buf: data which was not written because of the write failure 894 * @offset: offset of the failed write 895 * @len: how many bytes should have been written 896 * 897 * This function is called in case of a write failure and moves all good data 898 * from the potentially bad physical eraseblock to a good physical eraseblock. 899 * This function also writes the data which was not written due to the failure. 900 * Returns 0 in case of success, and a negative error code in case of failure. 901 * This function tries %UBI_IO_RETRIES before giving up. 902 */ 903 static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, 904 const void *buf, int offset, int len) 905 { 906 int err, idx = vol_id2idx(ubi, vol_id), tries; 907 struct ubi_volume *vol = ubi->volumes[idx]; 908 struct ubi_vid_io_buf *vidb; 909 910 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 911 if (!vidb) 912 return -ENOMEM; 913 914 for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { 915 bool retry; 916 917 err = try_recover_peb(vol, pnum, lnum, buf, offset, len, vidb, 918 &retry); 919 if (!err || !retry) 920 break; 921 922 ubi_msg(ubi, "try again"); 923 } 924 925 ubi_free_vid_buf(vidb); 926 927 return err; 928 } 929 930 /** 931 * try_write_vid_and_data - try to write VID header and data to a new PEB. 932 * @vol: volume description object 933 * @lnum: logical eraseblock number 934 * @vidb: the VID buffer to write 935 * @buf: buffer containing the data 936 * @offset: where to start writing data 937 * @len: how many bytes should be written 938 * 939 * This function tries to write VID header and data belonging to logical 940 * eraseblock @lnum of volume @vol to a new physical eraseblock. Returns zero 941 * in case of success and a negative error code in case of failure. 942 * In case of error, it is possible that something was still written to the 943 * flash media, but may be some garbage. 944 */ 945 static int try_write_vid_and_data(struct ubi_volume *vol, int lnum, 946 struct ubi_vid_io_buf *vidb, const void *buf, 947 int offset, int len) 948 { 949 struct ubi_device *ubi = vol->ubi; 950 int pnum, opnum, err, vol_id = vol->vol_id; 951 952 pnum = ubi_wl_get_peb(ubi); 953 if (pnum < 0) { 954 err = pnum; 955 goto out_put; 956 } 957 958 opnum = vol->eba_tbl->entries[lnum].pnum; 959 960 dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d", 961 len, offset, vol_id, lnum, pnum); 962 963 err = ubi_io_write_vid_hdr(ubi, pnum, vidb); 964 if (err) { 965 ubi_warn(ubi, "failed to write VID header to LEB %d:%d, PEB %d", 966 vol_id, lnum, pnum); 967 goto out_put; 968 } 969 970 if (len) { 971 err = ubi_io_write_data(ubi, buf, pnum, offset, len); 972 if (err) { 973 ubi_warn(ubi, 974 "failed to write %d bytes at offset %d of LEB %d:%d, PEB %d", 975 len, offset, vol_id, lnum, pnum); 976 goto out_put; 977 } 978 } 979 980 vol->eba_tbl->entries[lnum].pnum = pnum; 981 982 out_put: 983 up_read(&ubi->fm_eba_sem); 984 985 if (err && pnum >= 0) 986 err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); 987 else if (!err && opnum >= 0) 988 err = ubi_wl_put_peb(ubi, vol_id, lnum, opnum, 0); 989 990 return err; 991 } 992 993 /** 994 * ubi_eba_write_leb - write data to dynamic volume. 995 * @ubi: UBI device description object 996 * @vol: volume description object 997 * @lnum: logical eraseblock number 998 * @buf: the data to write 999 * @offset: offset within the logical eraseblock where to write 1000 * @len: how many bytes to write 1001 * 1002 * This function writes data to logical eraseblock @lnum of a dynamic volume 1003 * @vol. Returns zero in case of success and a negative error code in case 1004 * of failure. In case of error, it is possible that something was still 1005 * written to the flash media, but may be some garbage. 1006 * This function retries %UBI_IO_RETRIES times before giving up. 1007 */ 1008 int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, 1009 const void *buf, int offset, int len) 1010 { 1011 int err, pnum, tries, vol_id = vol->vol_id; 1012 struct ubi_vid_io_buf *vidb; 1013 struct ubi_vid_hdr *vid_hdr; 1014 1015 if (ubi->ro_mode) 1016 return -EROFS; 1017 1018 err = leb_write_lock(ubi, vol_id, lnum); 1019 if (err) 1020 return err; 1021 1022 pnum = vol->eba_tbl->entries[lnum].pnum; 1023 if (pnum >= 0) { 1024 err = check_mapping(ubi, vol, lnum, &pnum); 1025 if (err < 0) 1026 goto out; 1027 } 1028 1029 if (pnum >= 0) { 1030 dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d", 1031 len, offset, vol_id, lnum, pnum); 1032 1033 err = ubi_io_write_data(ubi, buf, pnum, offset, len); 1034 if (err) { 1035 ubi_warn(ubi, "failed to write data to PEB %d", pnum); 1036 if (err == -EIO && ubi->bad_allowed) 1037 err = recover_peb(ubi, pnum, vol_id, lnum, buf, 1038 offset, len); 1039 } 1040 1041 goto out; 1042 } 1043 1044 /* 1045 * The logical eraseblock is not mapped. We have to get a free physical 1046 * eraseblock and write the volume identifier header there first. 1047 */ 1048 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 1049 if (!vidb) { 1050 leb_write_unlock(ubi, vol_id, lnum); 1051 return -ENOMEM; 1052 } 1053 1054 vid_hdr = ubi_get_vid_hdr(vidb); 1055 1056 vid_hdr->vol_type = UBI_VID_DYNAMIC; 1057 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1058 vid_hdr->vol_id = cpu_to_be32(vol_id); 1059 vid_hdr->lnum = cpu_to_be32(lnum); 1060 vid_hdr->compat = ubi_get_compat(ubi, vol_id); 1061 vid_hdr->data_pad = cpu_to_be32(vol->data_pad); 1062 1063 for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { 1064 err = try_write_vid_and_data(vol, lnum, vidb, buf, offset, len); 1065 if (err != -EIO || !ubi->bad_allowed) 1066 break; 1067 1068 /* 1069 * Fortunately, this is the first write operation to this 1070 * physical eraseblock, so just put it and request a new one. 1071 * We assume that if this physical eraseblock went bad, the 1072 * erase code will handle that. 1073 */ 1074 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1075 ubi_msg(ubi, "try another PEB"); 1076 } 1077 1078 ubi_free_vid_buf(vidb); 1079 1080 out: 1081 if (err) 1082 ubi_ro_mode(ubi); 1083 1084 leb_write_unlock(ubi, vol_id, lnum); 1085 1086 return err; 1087 } 1088 1089 /** 1090 * ubi_eba_write_leb_st - write data to static volume. 1091 * @ubi: UBI device description object 1092 * @vol: volume description object 1093 * @lnum: logical eraseblock number 1094 * @buf: data to write 1095 * @len: how many bytes to write 1096 * @used_ebs: how many logical eraseblocks will this volume contain 1097 * 1098 * This function writes data to logical eraseblock @lnum of static volume 1099 * @vol. The @used_ebs argument should contain total number of logical 1100 * eraseblock in this static volume. 1101 * 1102 * When writing to the last logical eraseblock, the @len argument doesn't have 1103 * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent 1104 * to the real data size, although the @buf buffer has to contain the 1105 * alignment. In all other cases, @len has to be aligned. 1106 * 1107 * It is prohibited to write more than once to logical eraseblocks of static 1108 * volumes. This function returns zero in case of success and a negative error 1109 * code in case of failure. 1110 */ 1111 int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, 1112 int lnum, const void *buf, int len, int used_ebs) 1113 { 1114 int err, tries, data_size = len, vol_id = vol->vol_id; 1115 struct ubi_vid_io_buf *vidb; 1116 struct ubi_vid_hdr *vid_hdr; 1117 uint32_t crc; 1118 1119 if (ubi->ro_mode) 1120 return -EROFS; 1121 1122 if (lnum == used_ebs - 1) 1123 /* If this is the last LEB @len may be unaligned */ 1124 len = ALIGN(data_size, ubi->min_io_size); 1125 else 1126 ubi_assert(!(len & (ubi->min_io_size - 1))); 1127 1128 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 1129 if (!vidb) 1130 return -ENOMEM; 1131 1132 vid_hdr = ubi_get_vid_hdr(vidb); 1133 1134 err = leb_write_lock(ubi, vol_id, lnum); 1135 if (err) 1136 goto out; 1137 1138 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1139 vid_hdr->vol_id = cpu_to_be32(vol_id); 1140 vid_hdr->lnum = cpu_to_be32(lnum); 1141 vid_hdr->compat = ubi_get_compat(ubi, vol_id); 1142 vid_hdr->data_pad = cpu_to_be32(vol->data_pad); 1143 1144 crc = crc32(UBI_CRC32_INIT, buf, data_size); 1145 vid_hdr->vol_type = UBI_VID_STATIC; 1146 vid_hdr->data_size = cpu_to_be32(data_size); 1147 vid_hdr->used_ebs = cpu_to_be32(used_ebs); 1148 vid_hdr->data_crc = cpu_to_be32(crc); 1149 1150 ubi_assert(vol->eba_tbl->entries[lnum].pnum < 0); 1151 1152 for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { 1153 err = try_write_vid_and_data(vol, lnum, vidb, buf, 0, len); 1154 if (err != -EIO || !ubi->bad_allowed) 1155 break; 1156 1157 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1158 ubi_msg(ubi, "try another PEB"); 1159 } 1160 1161 if (err) 1162 ubi_ro_mode(ubi); 1163 1164 leb_write_unlock(ubi, vol_id, lnum); 1165 1166 out: 1167 ubi_free_vid_buf(vidb); 1168 1169 return err; 1170 } 1171 1172 /* 1173 * ubi_eba_atomic_leb_change - change logical eraseblock atomically. 1174 * @ubi: UBI device description object 1175 * @vol: volume description object 1176 * @lnum: logical eraseblock number 1177 * @buf: data to write 1178 * @len: how many bytes to write 1179 * 1180 * This function changes the contents of a logical eraseblock atomically. @buf 1181 * has to contain new logical eraseblock data, and @len - the length of the 1182 * data, which has to be aligned. This function guarantees that in case of an 1183 * unclean reboot the old contents is preserved. Returns zero in case of 1184 * success and a negative error code in case of failure. 1185 * 1186 * UBI reserves one LEB for the "atomic LEB change" operation, so only one 1187 * LEB change may be done at a time. This is ensured by @ubi->alc_mutex. 1188 */ 1189 int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, 1190 int lnum, const void *buf, int len) 1191 { 1192 int err, tries, vol_id = vol->vol_id; 1193 struct ubi_vid_io_buf *vidb; 1194 struct ubi_vid_hdr *vid_hdr; 1195 uint32_t crc; 1196 1197 if (ubi->ro_mode) 1198 return -EROFS; 1199 1200 if (len == 0) { 1201 /* 1202 * Special case when data length is zero. In this case the LEB 1203 * has to be unmapped and mapped somewhere else. 1204 */ 1205 err = ubi_eba_unmap_leb(ubi, vol, lnum); 1206 if (err) 1207 return err; 1208 return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0); 1209 } 1210 1211 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 1212 if (!vidb) 1213 return -ENOMEM; 1214 1215 vid_hdr = ubi_get_vid_hdr(vidb); 1216 1217 mutex_lock(&ubi->alc_mutex); 1218 err = leb_write_lock(ubi, vol_id, lnum); 1219 if (err) 1220 goto out_mutex; 1221 1222 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1223 vid_hdr->vol_id = cpu_to_be32(vol_id); 1224 vid_hdr->lnum = cpu_to_be32(lnum); 1225 vid_hdr->compat = ubi_get_compat(ubi, vol_id); 1226 vid_hdr->data_pad = cpu_to_be32(vol->data_pad); 1227 1228 crc = crc32(UBI_CRC32_INIT, buf, len); 1229 vid_hdr->vol_type = UBI_VID_DYNAMIC; 1230 vid_hdr->data_size = cpu_to_be32(len); 1231 vid_hdr->copy_flag = 1; 1232 vid_hdr->data_crc = cpu_to_be32(crc); 1233 1234 dbg_eba("change LEB %d:%d", vol_id, lnum); 1235 1236 for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { 1237 err = try_write_vid_and_data(vol, lnum, vidb, buf, 0, len); 1238 if (err != -EIO || !ubi->bad_allowed) 1239 break; 1240 1241 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1242 ubi_msg(ubi, "try another PEB"); 1243 } 1244 1245 /* 1246 * This flash device does not admit of bad eraseblocks or 1247 * something nasty and unexpected happened. Switch to read-only 1248 * mode just in case. 1249 */ 1250 if (err) 1251 ubi_ro_mode(ubi); 1252 1253 leb_write_unlock(ubi, vol_id, lnum); 1254 1255 out_mutex: 1256 mutex_unlock(&ubi->alc_mutex); 1257 ubi_free_vid_buf(vidb); 1258 return err; 1259 } 1260 1261 /** 1262 * is_error_sane - check whether a read error is sane. 1263 * @err: code of the error happened during reading 1264 * 1265 * This is a helper function for 'ubi_eba_copy_leb()' which is called when we 1266 * cannot read data from the target PEB (an error @err happened). If the error 1267 * code is sane, then we treat this error as non-fatal. Otherwise the error is 1268 * fatal and UBI will be switched to R/O mode later. 1269 * 1270 * The idea is that we try not to switch to R/O mode if the read error is 1271 * something which suggests there was a real read problem. E.g., %-EIO. Or a 1272 * memory allocation failed (-%ENOMEM). Otherwise, it is safer to switch to R/O 1273 * mode, simply because we do not know what happened at the MTD level, and we 1274 * cannot handle this. E.g., the underlying driver may have become crazy, and 1275 * it is safer to switch to R/O mode to preserve the data. 1276 * 1277 * And bear in mind, this is about reading from the target PEB, i.e. the PEB 1278 * which we have just written. 1279 */ 1280 static int is_error_sane(int err) 1281 { 1282 if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_HDR || 1283 err == UBI_IO_BAD_HDR_EBADMSG || err == -ETIMEDOUT) 1284 return 0; 1285 return 1; 1286 } 1287 1288 /** 1289 * ubi_eba_copy_leb - copy logical eraseblock. 1290 * @ubi: UBI device description object 1291 * @from: physical eraseblock number from where to copy 1292 * @to: physical eraseblock number where to copy 1293 * @vid_hdr: VID header of the @from physical eraseblock 1294 * 1295 * This function copies logical eraseblock from physical eraseblock @from to 1296 * physical eraseblock @to. The @vid_hdr buffer may be changed by this 1297 * function. Returns: 1298 * o %0 in case of success; 1299 * o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_TARGET_BITFLIPS, etc; 1300 * o a negative error code in case of failure. 1301 */ 1302 int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, 1303 struct ubi_vid_io_buf *vidb) 1304 { 1305 int err, vol_id, lnum, data_size, aldata_size, idx; 1306 struct ubi_vid_hdr *vid_hdr = ubi_get_vid_hdr(vidb); 1307 struct ubi_volume *vol; 1308 uint32_t crc; 1309 1310 ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem)); 1311 1312 vol_id = be32_to_cpu(vid_hdr->vol_id); 1313 lnum = be32_to_cpu(vid_hdr->lnum); 1314 1315 dbg_wl("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to); 1316 1317 if (vid_hdr->vol_type == UBI_VID_STATIC) { 1318 data_size = be32_to_cpu(vid_hdr->data_size); 1319 aldata_size = ALIGN(data_size, ubi->min_io_size); 1320 } else 1321 data_size = aldata_size = 1322 ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); 1323 1324 idx = vol_id2idx(ubi, vol_id); 1325 spin_lock(&ubi->volumes_lock); 1326 /* 1327 * Note, we may race with volume deletion, which means that the volume 1328 * this logical eraseblock belongs to might be being deleted. Since the 1329 * volume deletion un-maps all the volume's logical eraseblocks, it will 1330 * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. 1331 */ 1332 vol = ubi->volumes[idx]; 1333 spin_unlock(&ubi->volumes_lock); 1334 if (!vol) { 1335 /* No need to do further work, cancel */ 1336 dbg_wl("volume %d is being removed, cancel", vol_id); 1337 return MOVE_CANCEL_RACE; 1338 } 1339 1340 /* 1341 * We do not want anybody to write to this logical eraseblock while we 1342 * are moving it, so lock it. 1343 * 1344 * Note, we are using non-waiting locking here, because we cannot sleep 1345 * on the LEB, since it may cause deadlocks. Indeed, imagine a task is 1346 * unmapping the LEB which is mapped to the PEB we are going to move 1347 * (@from). This task locks the LEB and goes sleep in the 1348 * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are 1349 * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the 1350 * LEB is already locked, we just do not move it and return 1351 * %MOVE_RETRY. Note, we do not return %MOVE_CANCEL_RACE here because 1352 * we do not know the reasons of the contention - it may be just a 1353 * normal I/O on this LEB, so we want to re-try. 1354 */ 1355 err = leb_write_trylock(ubi, vol_id, lnum); 1356 if (err) { 1357 dbg_wl("contention on LEB %d:%d, cancel", vol_id, lnum); 1358 return MOVE_RETRY; 1359 } 1360 1361 /* 1362 * The LEB might have been put meanwhile, and the task which put it is 1363 * probably waiting on @ubi->move_mutex. No need to continue the work, 1364 * cancel it. 1365 */ 1366 if (vol->eba_tbl->entries[lnum].pnum != from) { 1367 dbg_wl("LEB %d:%d is no longer mapped to PEB %d, mapped to PEB %d, cancel", 1368 vol_id, lnum, from, vol->eba_tbl->entries[lnum].pnum); 1369 err = MOVE_CANCEL_RACE; 1370 goto out_unlock_leb; 1371 } 1372 1373 /* 1374 * OK, now the LEB is locked and we can safely start moving it. Since 1375 * this function utilizes the @ubi->peb_buf buffer which is shared 1376 * with some other functions - we lock the buffer by taking the 1377 * @ubi->buf_mutex. 1378 */ 1379 mutex_lock(&ubi->buf_mutex); 1380 dbg_wl("read %d bytes of data", aldata_size); 1381 err = ubi_io_read_data(ubi, ubi->peb_buf, from, 0, aldata_size); 1382 if (err && err != UBI_IO_BITFLIPS) { 1383 ubi_warn(ubi, "error %d while reading data from PEB %d", 1384 err, from); 1385 err = MOVE_SOURCE_RD_ERR; 1386 goto out_unlock_buf; 1387 } 1388 1389 /* 1390 * Now we have got to calculate how much data we have to copy. In 1391 * case of a static volume it is fairly easy - the VID header contains 1392 * the data size. In case of a dynamic volume it is more difficult - we 1393 * have to read the contents, cut 0xFF bytes from the end and copy only 1394 * the first part. We must do this to avoid writing 0xFF bytes as it 1395 * may have some side-effects. And not only this. It is important not 1396 * to include those 0xFFs to CRC because later the they may be filled 1397 * by data. 1398 */ 1399 if (vid_hdr->vol_type == UBI_VID_DYNAMIC) 1400 aldata_size = data_size = 1401 ubi_calc_data_len(ubi, ubi->peb_buf, data_size); 1402 1403 cond_resched(); 1404 crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size); 1405 cond_resched(); 1406 1407 /* 1408 * It may turn out to be that the whole @from physical eraseblock 1409 * contains only 0xFF bytes. Then we have to only write the VID header 1410 * and do not write any data. This also means we should not set 1411 * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc. 1412 */ 1413 if (data_size > 0) { 1414 vid_hdr->copy_flag = 1; 1415 vid_hdr->data_size = cpu_to_be32(data_size); 1416 vid_hdr->data_crc = cpu_to_be32(crc); 1417 } 1418 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1419 1420 err = ubi_io_write_vid_hdr(ubi, to, vidb); 1421 if (err) { 1422 if (err == -EIO) 1423 err = MOVE_TARGET_WR_ERR; 1424 goto out_unlock_buf; 1425 } 1426 1427 cond_resched(); 1428 1429 /* Read the VID header back and check if it was written correctly */ 1430 err = ubi_io_read_vid_hdr(ubi, to, vidb, 1); 1431 if (err) { 1432 if (err != UBI_IO_BITFLIPS) { 1433 ubi_warn(ubi, "error %d while reading VID header back from PEB %d", 1434 err, to); 1435 if (is_error_sane(err)) 1436 err = MOVE_TARGET_RD_ERR; 1437 } else 1438 err = MOVE_TARGET_BITFLIPS; 1439 goto out_unlock_buf; 1440 } 1441 1442 if (data_size > 0) { 1443 err = ubi_io_write_data(ubi, ubi->peb_buf, to, 0, aldata_size); 1444 if (err) { 1445 if (err == -EIO) 1446 err = MOVE_TARGET_WR_ERR; 1447 goto out_unlock_buf; 1448 } 1449 1450 cond_resched(); 1451 } 1452 1453 ubi_assert(vol->eba_tbl->entries[lnum].pnum == from); 1454 vol->eba_tbl->entries[lnum].pnum = to; 1455 1456 out_unlock_buf: 1457 mutex_unlock(&ubi->buf_mutex); 1458 out_unlock_leb: 1459 leb_write_unlock(ubi, vol_id, lnum); 1460 return err; 1461 } 1462 1463 /** 1464 * print_rsvd_warning - warn about not having enough reserved PEBs. 1465 * @ubi: UBI device description object 1466 * 1467 * This is a helper function for 'ubi_eba_init()' which is called when UBI 1468 * cannot reserve enough PEBs for bad block handling. This function makes a 1469 * decision whether we have to print a warning or not. The algorithm is as 1470 * follows: 1471 * o if this is a new UBI image, then just print the warning 1472 * o if this is an UBI image which has already been used for some time, print 1473 * a warning only if we can reserve less than 10% of the expected amount of 1474 * the reserved PEB. 1475 * 1476 * The idea is that when UBI is used, PEBs become bad, and the reserved pool 1477 * of PEBs becomes smaller, which is normal and we do not want to scare users 1478 * with a warning every time they attach the MTD device. This was an issue 1479 * reported by real users. 1480 */ 1481 static void print_rsvd_warning(struct ubi_device *ubi, 1482 struct ubi_attach_info *ai) 1483 { 1484 /* 1485 * The 1 << 18 (256KiB) number is picked randomly, just a reasonably 1486 * large number to distinguish between newly flashed and used images. 1487 */ 1488 if (ai->max_sqnum > (1 << 18)) { 1489 int min = ubi->beb_rsvd_level / 10; 1490 1491 if (!min) 1492 min = 1; 1493 if (ubi->beb_rsvd_pebs > min) 1494 return; 1495 } 1496 1497 ubi_warn(ubi, "cannot reserve enough PEBs for bad PEB handling, reserved %d, need %d", 1498 ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); 1499 if (ubi->corr_peb_count) 1500 ubi_warn(ubi, "%d PEBs are corrupted and not used", 1501 ubi->corr_peb_count); 1502 } 1503 1504 /** 1505 * self_check_eba - run a self check on the EBA table constructed by fastmap. 1506 * @ubi: UBI device description object 1507 * @ai_fastmap: UBI attach info object created by fastmap 1508 * @ai_scan: UBI attach info object created by scanning 1509 * 1510 * Returns < 0 in case of an internal error, 0 otherwise. 1511 * If a bad EBA table entry was found it will be printed out and 1512 * ubi_assert() triggers. 1513 */ 1514 int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, 1515 struct ubi_attach_info *ai_scan) 1516 { 1517 int i, j, num_volumes, ret = 0; 1518 int **scan_eba, **fm_eba; 1519 struct ubi_ainf_volume *av; 1520 struct ubi_volume *vol; 1521 struct ubi_ainf_peb *aeb; 1522 struct rb_node *rb; 1523 1524 num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; 1525 1526 scan_eba = kmalloc_array(num_volumes, sizeof(*scan_eba), GFP_KERNEL); 1527 if (!scan_eba) 1528 return -ENOMEM; 1529 1530 fm_eba = kmalloc_array(num_volumes, sizeof(*fm_eba), GFP_KERNEL); 1531 if (!fm_eba) { 1532 kfree(scan_eba); 1533 return -ENOMEM; 1534 } 1535 1536 for (i = 0; i < num_volumes; i++) { 1537 vol = ubi->volumes[i]; 1538 if (!vol) 1539 continue; 1540 1541 scan_eba[i] = kmalloc_array(vol->reserved_pebs, 1542 sizeof(**scan_eba), 1543 GFP_KERNEL); 1544 if (!scan_eba[i]) { 1545 ret = -ENOMEM; 1546 goto out_free; 1547 } 1548 1549 fm_eba[i] = kmalloc_array(vol->reserved_pebs, 1550 sizeof(**fm_eba), 1551 GFP_KERNEL); 1552 if (!fm_eba[i]) { 1553 ret = -ENOMEM; 1554 goto out_free; 1555 } 1556 1557 for (j = 0; j < vol->reserved_pebs; j++) 1558 scan_eba[i][j] = fm_eba[i][j] = UBI_LEB_UNMAPPED; 1559 1560 av = ubi_find_av(ai_scan, idx2vol_id(ubi, i)); 1561 if (!av) 1562 continue; 1563 1564 ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) 1565 scan_eba[i][aeb->lnum] = aeb->pnum; 1566 1567 av = ubi_find_av(ai_fastmap, idx2vol_id(ubi, i)); 1568 if (!av) 1569 continue; 1570 1571 ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) 1572 fm_eba[i][aeb->lnum] = aeb->pnum; 1573 1574 for (j = 0; j < vol->reserved_pebs; j++) { 1575 if (scan_eba[i][j] != fm_eba[i][j]) { 1576 if (scan_eba[i][j] == UBI_LEB_UNMAPPED || 1577 fm_eba[i][j] == UBI_LEB_UNMAPPED) 1578 continue; 1579 1580 ubi_err(ubi, "LEB:%i:%i is PEB:%i instead of %i!", 1581 vol->vol_id, j, fm_eba[i][j], 1582 scan_eba[i][j]); 1583 ubi_assert(0); 1584 } 1585 } 1586 } 1587 1588 out_free: 1589 for (i = 0; i < num_volumes; i++) { 1590 if (!ubi->volumes[i]) 1591 continue; 1592 1593 kfree(scan_eba[i]); 1594 kfree(fm_eba[i]); 1595 } 1596 1597 kfree(scan_eba); 1598 kfree(fm_eba); 1599 return ret; 1600 } 1601 1602 /** 1603 * ubi_eba_init - initialize the EBA sub-system using attaching information. 1604 * @ubi: UBI device description object 1605 * @ai: attaching information 1606 * 1607 * This function returns zero in case of success and a negative error code in 1608 * case of failure. 1609 */ 1610 int ubi_eba_init(struct ubi_device *ubi, struct ubi_attach_info *ai) 1611 { 1612 int i, err, num_volumes; 1613 struct ubi_ainf_volume *av; 1614 struct ubi_volume *vol; 1615 struct ubi_ainf_peb *aeb; 1616 struct rb_node *rb; 1617 1618 dbg_eba("initialize EBA sub-system"); 1619 1620 spin_lock_init(&ubi->ltree_lock); 1621 mutex_init(&ubi->alc_mutex); 1622 ubi->ltree = RB_ROOT; 1623 1624 ubi->global_sqnum = ai->max_sqnum + 1; 1625 num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; 1626 1627 for (i = 0; i < num_volumes; i++) { 1628 struct ubi_eba_table *tbl; 1629 1630 vol = ubi->volumes[i]; 1631 if (!vol) 1632 continue; 1633 1634 cond_resched(); 1635 1636 tbl = ubi_eba_create_table(vol, vol->reserved_pebs); 1637 if (IS_ERR(tbl)) { 1638 err = PTR_ERR(tbl); 1639 goto out_free; 1640 } 1641 1642 ubi_eba_replace_table(vol, tbl); 1643 1644 av = ubi_find_av(ai, idx2vol_id(ubi, i)); 1645 if (!av) 1646 continue; 1647 1648 ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) { 1649 if (aeb->lnum >= vol->reserved_pebs) { 1650 /* 1651 * This may happen in case of an unclean reboot 1652 * during re-size. 1653 */ 1654 ubi_move_aeb_to_list(av, aeb, &ai->erase); 1655 } else { 1656 struct ubi_eba_entry *entry; 1657 1658 entry = &vol->eba_tbl->entries[aeb->lnum]; 1659 entry->pnum = aeb->pnum; 1660 } 1661 } 1662 } 1663 1664 if (ubi->avail_pebs < EBA_RESERVED_PEBS) { 1665 ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)", 1666 ubi->avail_pebs, EBA_RESERVED_PEBS); 1667 if (ubi->corr_peb_count) 1668 ubi_err(ubi, "%d PEBs are corrupted and not used", 1669 ubi->corr_peb_count); 1670 err = -ENOSPC; 1671 goto out_free; 1672 } 1673 ubi->avail_pebs -= EBA_RESERVED_PEBS; 1674 ubi->rsvd_pebs += EBA_RESERVED_PEBS; 1675 1676 if (ubi->bad_allowed) { 1677 ubi_calculate_reserved(ubi); 1678 1679 if (ubi->avail_pebs < ubi->beb_rsvd_level) { 1680 /* No enough free physical eraseblocks */ 1681 ubi->beb_rsvd_pebs = ubi->avail_pebs; 1682 print_rsvd_warning(ubi, ai); 1683 } else 1684 ubi->beb_rsvd_pebs = ubi->beb_rsvd_level; 1685 1686 ubi->avail_pebs -= ubi->beb_rsvd_pebs; 1687 ubi->rsvd_pebs += ubi->beb_rsvd_pebs; 1688 } 1689 1690 dbg_eba("EBA sub-system is initialized"); 1691 return 0; 1692 1693 out_free: 1694 for (i = 0; i < num_volumes; i++) { 1695 if (!ubi->volumes[i]) 1696 continue; 1697 ubi_eba_replace_table(ubi->volumes[i], NULL); 1698 } 1699 return err; 1700 } 1701