1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) International Business Machines Corp., 2006 4 * 5 * Author: Artem Bityutskiy (Битюцкий Артём) 6 */ 7 8 /* 9 * The UBI Eraseblock Association (EBA) sub-system. 10 * 11 * This sub-system is responsible for I/O to/from logical eraseblock. 12 * 13 * Although in this implementation the EBA table is fully kept and managed in 14 * RAM, which assumes poor scalability, it might be (partially) maintained on 15 * flash in future implementations. 16 * 17 * The EBA sub-system implements per-logical eraseblock locking. Before 18 * accessing a logical eraseblock it is locked for reading or writing. The 19 * per-logical eraseblock locking is implemented by means of the lock tree. The 20 * lock tree is an RB-tree which refers all the currently locked logical 21 * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects. 22 * They are indexed by (@vol_id, @lnum) pairs. 23 * 24 * EBA also maintains the global sequence counter which is incremented each 25 * time a logical eraseblock is mapped to a physical eraseblock and it is 26 * stored in the volume identifier header. This means that each VID header has 27 * a unique sequence number. The sequence number is only increased an we assume 28 * 64 bits is enough to never overflow. 29 */ 30 31 #include <linux/slab.h> 32 #include <linux/crc32.h> 33 #include <linux/err.h> 34 #include "ubi.h" 35 36 /* Number of physical eraseblocks reserved for atomic LEB change operation */ 37 #define EBA_RESERVED_PEBS 1 38 39 /** 40 * struct ubi_eba_entry - structure encoding a single LEB -> PEB association 41 * @pnum: the physical eraseblock number attached to the LEB 42 * 43 * This structure is encoding a LEB -> PEB association. Note that the LEB 44 * number is not stored here, because it is the index used to access the 45 * entries table. 46 */ 47 struct ubi_eba_entry { 48 int pnum; 49 }; 50 51 /** 52 * struct ubi_eba_table - LEB -> PEB association information 53 * @entries: the LEB to PEB mapping (one entry per LEB). 54 * 55 * This structure is private to the EBA logic and should be kept here. 56 * It is encoding the LEB to PEB association table, and is subject to 57 * changes. 58 */ 59 struct ubi_eba_table { 60 struct ubi_eba_entry *entries; 61 }; 62 63 /** 64 * next_sqnum - get next sequence number. 65 * @ubi: UBI device description object 66 * 67 * This function returns next sequence number to use, which is just the current 68 * global sequence counter value. It also increases the global sequence 69 * counter. 70 */ 71 unsigned long long ubi_next_sqnum(struct ubi_device *ubi) 72 { 73 unsigned long long sqnum; 74 75 spin_lock(&ubi->ltree_lock); 76 sqnum = ubi->global_sqnum++; 77 spin_unlock(&ubi->ltree_lock); 78 79 return sqnum; 80 } 81 82 /** 83 * ubi_get_compat - get compatibility flags of a volume. 84 * @ubi: UBI device description object 85 * @vol_id: volume ID 86 * 87 * This function returns compatibility flags for an internal volume. User 88 * volumes have no compatibility flags, so %0 is returned. 89 */ 90 static int ubi_get_compat(const struct ubi_device *ubi, int vol_id) 91 { 92 if (vol_id == UBI_LAYOUT_VOLUME_ID) 93 return UBI_LAYOUT_VOLUME_COMPAT; 94 return 0; 95 } 96 97 /** 98 * ubi_eba_get_ldesc - get information about a LEB 99 * @vol: volume description object 100 * @lnum: logical eraseblock number 101 * @ldesc: the LEB descriptor to fill 102 * 103 * Used to query information about a specific LEB. 104 * It is currently only returning the physical position of the LEB, but will be 105 * extended to provide more information. 106 */ 107 void ubi_eba_get_ldesc(struct ubi_volume *vol, int lnum, 108 struct ubi_eba_leb_desc *ldesc) 109 { 110 ldesc->lnum = lnum; 111 ldesc->pnum = vol->eba_tbl->entries[lnum].pnum; 112 } 113 114 /** 115 * ubi_eba_create_table - allocate a new EBA table and initialize it with all 116 * LEBs unmapped 117 * @vol: volume containing the EBA table to copy 118 * @nentries: number of entries in the table 119 * 120 * Allocate a new EBA table and initialize it with all LEBs unmapped. 121 * Returns a valid pointer if it succeed, an ERR_PTR() otherwise. 122 */ 123 struct ubi_eba_table *ubi_eba_create_table(struct ubi_volume *vol, 124 int nentries) 125 { 126 struct ubi_eba_table *tbl; 127 int err = -ENOMEM; 128 int i; 129 130 tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); 131 if (!tbl) 132 return ERR_PTR(-ENOMEM); 133 134 tbl->entries = kmalloc_array(nentries, sizeof(*tbl->entries), 135 GFP_KERNEL); 136 if (!tbl->entries) 137 goto err; 138 139 for (i = 0; i < nentries; i++) 140 tbl->entries[i].pnum = UBI_LEB_UNMAPPED; 141 142 return tbl; 143 144 err: 145 kfree(tbl); 146 147 return ERR_PTR(err); 148 } 149 150 /** 151 * ubi_eba_destroy_table - destroy an EBA table 152 * @tbl: the table to destroy 153 * 154 * Destroy an EBA table. 155 */ 156 void ubi_eba_destroy_table(struct ubi_eba_table *tbl) 157 { 158 if (!tbl) 159 return; 160 161 kfree(tbl->entries); 162 kfree(tbl); 163 } 164 165 /** 166 * ubi_eba_copy_table - copy the EBA table attached to vol into another table 167 * @vol: volume containing the EBA table to copy 168 * @dst: destination 169 * @nentries: number of entries to copy 170 * 171 * Copy the EBA table stored in vol into the one pointed by dst. 172 */ 173 void ubi_eba_copy_table(struct ubi_volume *vol, struct ubi_eba_table *dst, 174 int nentries) 175 { 176 struct ubi_eba_table *src; 177 int i; 178 179 ubi_assert(dst && vol && vol->eba_tbl); 180 181 src = vol->eba_tbl; 182 183 for (i = 0; i < nentries; i++) 184 dst->entries[i].pnum = src->entries[i].pnum; 185 } 186 187 /** 188 * ubi_eba_replace_table - assign a new EBA table to a volume 189 * @vol: volume containing the EBA table to copy 190 * @tbl: new EBA table 191 * 192 * Assign a new EBA table to the volume and release the old one. 193 */ 194 void ubi_eba_replace_table(struct ubi_volume *vol, struct ubi_eba_table *tbl) 195 { 196 ubi_eba_destroy_table(vol->eba_tbl); 197 vol->eba_tbl = tbl; 198 } 199 200 /** 201 * ltree_lookup - look up the lock tree. 202 * @ubi: UBI device description object 203 * @vol_id: volume ID 204 * @lnum: logical eraseblock number 205 * 206 * This function returns a pointer to the corresponding &struct ubi_ltree_entry 207 * object if the logical eraseblock is locked and %NULL if it is not. 208 * @ubi->ltree_lock has to be locked. 209 */ 210 static struct ubi_ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id, 211 int lnum) 212 { 213 struct rb_node *p; 214 215 p = ubi->ltree.rb_node; 216 while (p) { 217 struct ubi_ltree_entry *le; 218 219 le = rb_entry(p, struct ubi_ltree_entry, rb); 220 221 if (vol_id < le->vol_id) 222 p = p->rb_left; 223 else if (vol_id > le->vol_id) 224 p = p->rb_right; 225 else { 226 if (lnum < le->lnum) 227 p = p->rb_left; 228 else if (lnum > le->lnum) 229 p = p->rb_right; 230 else 231 return le; 232 } 233 } 234 235 return NULL; 236 } 237 238 /** 239 * ltree_add_entry - add new entry to the lock tree. 240 * @ubi: UBI device description object 241 * @vol_id: volume ID 242 * @lnum: logical eraseblock number 243 * 244 * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the 245 * lock tree. If such entry is already there, its usage counter is increased. 246 * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation 247 * failed. 248 */ 249 static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi, 250 int vol_id, int lnum) 251 { 252 struct ubi_ltree_entry *le, *le1, *le_free; 253 254 le = kmalloc(sizeof(struct ubi_ltree_entry), GFP_NOFS); 255 if (!le) 256 return ERR_PTR(-ENOMEM); 257 258 le->users = 0; 259 init_rwsem(&le->mutex); 260 le->vol_id = vol_id; 261 le->lnum = lnum; 262 263 spin_lock(&ubi->ltree_lock); 264 le1 = ltree_lookup(ubi, vol_id, lnum); 265 266 if (le1) { 267 /* 268 * This logical eraseblock is already locked. The newly 269 * allocated lock entry is not needed. 270 */ 271 le_free = le; 272 le = le1; 273 } else { 274 struct rb_node **p, *parent = NULL; 275 276 /* 277 * No lock entry, add the newly allocated one to the 278 * @ubi->ltree RB-tree. 279 */ 280 le_free = NULL; 281 282 p = &ubi->ltree.rb_node; 283 while (*p) { 284 parent = *p; 285 le1 = rb_entry(parent, struct ubi_ltree_entry, rb); 286 287 if (vol_id < le1->vol_id) 288 p = &(*p)->rb_left; 289 else if (vol_id > le1->vol_id) 290 p = &(*p)->rb_right; 291 else { 292 ubi_assert(lnum != le1->lnum); 293 if (lnum < le1->lnum) 294 p = &(*p)->rb_left; 295 else 296 p = &(*p)->rb_right; 297 } 298 } 299 300 rb_link_node(&le->rb, parent, p); 301 rb_insert_color(&le->rb, &ubi->ltree); 302 } 303 le->users += 1; 304 spin_unlock(&ubi->ltree_lock); 305 306 kfree(le_free); 307 return le; 308 } 309 310 /** 311 * leb_read_lock - lock logical eraseblock for reading. 312 * @ubi: UBI device description object 313 * @vol_id: volume ID 314 * @lnum: logical eraseblock number 315 * 316 * This function locks a logical eraseblock for reading. Returns zero in case 317 * of success and a negative error code in case of failure. 318 */ 319 static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum) 320 { 321 struct ubi_ltree_entry *le; 322 323 le = ltree_add_entry(ubi, vol_id, lnum); 324 if (IS_ERR(le)) 325 return PTR_ERR(le); 326 down_read(&le->mutex); 327 return 0; 328 } 329 330 /** 331 * leb_read_unlock - unlock logical eraseblock. 332 * @ubi: UBI device description object 333 * @vol_id: volume ID 334 * @lnum: logical eraseblock number 335 */ 336 static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) 337 { 338 struct ubi_ltree_entry *le; 339 340 spin_lock(&ubi->ltree_lock); 341 le = ltree_lookup(ubi, vol_id, lnum); 342 le->users -= 1; 343 ubi_assert(le->users >= 0); 344 up_read(&le->mutex); 345 if (le->users == 0) { 346 rb_erase(&le->rb, &ubi->ltree); 347 kfree(le); 348 } 349 spin_unlock(&ubi->ltree_lock); 350 } 351 352 /** 353 * leb_write_lock - lock logical eraseblock for writing. 354 * @ubi: UBI device description object 355 * @vol_id: volume ID 356 * @lnum: logical eraseblock number 357 * 358 * This function locks a logical eraseblock for writing. Returns zero in case 359 * of success and a negative error code in case of failure. 360 */ 361 static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) 362 { 363 struct ubi_ltree_entry *le; 364 365 le = ltree_add_entry(ubi, vol_id, lnum); 366 if (IS_ERR(le)) 367 return PTR_ERR(le); 368 down_write(&le->mutex); 369 return 0; 370 } 371 372 /** 373 * leb_write_trylock - try to lock logical eraseblock for writing. 374 * @ubi: UBI device description object 375 * @vol_id: volume ID 376 * @lnum: logical eraseblock number 377 * 378 * This function locks a logical eraseblock for writing if there is no 379 * contention and does nothing if there is contention. Returns %0 in case of 380 * success, %1 in case of contention, and and a negative error code in case of 381 * failure. 382 */ 383 static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) 384 { 385 struct ubi_ltree_entry *le; 386 387 le = ltree_add_entry(ubi, vol_id, lnum); 388 if (IS_ERR(le)) 389 return PTR_ERR(le); 390 if (down_write_trylock(&le->mutex)) 391 return 0; 392 393 /* Contention, cancel */ 394 spin_lock(&ubi->ltree_lock); 395 le->users -= 1; 396 ubi_assert(le->users >= 0); 397 if (le->users == 0) { 398 rb_erase(&le->rb, &ubi->ltree); 399 kfree(le); 400 } 401 spin_unlock(&ubi->ltree_lock); 402 403 return 1; 404 } 405 406 /** 407 * leb_write_unlock - unlock logical eraseblock. 408 * @ubi: UBI device description object 409 * @vol_id: volume ID 410 * @lnum: logical eraseblock number 411 */ 412 static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) 413 { 414 struct ubi_ltree_entry *le; 415 416 spin_lock(&ubi->ltree_lock); 417 le = ltree_lookup(ubi, vol_id, lnum); 418 le->users -= 1; 419 ubi_assert(le->users >= 0); 420 up_write(&le->mutex); 421 if (le->users == 0) { 422 rb_erase(&le->rb, &ubi->ltree); 423 kfree(le); 424 } 425 spin_unlock(&ubi->ltree_lock); 426 } 427 428 /** 429 * ubi_eba_is_mapped - check if a LEB is mapped. 430 * @vol: volume description object 431 * @lnum: logical eraseblock number 432 * 433 * This function returns true if the LEB is mapped, false otherwise. 434 */ 435 bool ubi_eba_is_mapped(struct ubi_volume *vol, int lnum) 436 { 437 return vol->eba_tbl->entries[lnum].pnum >= 0; 438 } 439 440 /** 441 * ubi_eba_unmap_leb - un-map logical eraseblock. 442 * @ubi: UBI device description object 443 * @vol: volume description object 444 * @lnum: logical eraseblock number 445 * 446 * This function un-maps logical eraseblock @lnum and schedules corresponding 447 * physical eraseblock for erasure. Returns zero in case of success and a 448 * negative error code in case of failure. 449 */ 450 int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, 451 int lnum) 452 { 453 int err, pnum, vol_id = vol->vol_id; 454 455 if (ubi->ro_mode) 456 return -EROFS; 457 458 err = leb_write_lock(ubi, vol_id, lnum); 459 if (err) 460 return err; 461 462 pnum = vol->eba_tbl->entries[lnum].pnum; 463 if (pnum < 0) 464 /* This logical eraseblock is already unmapped */ 465 goto out_unlock; 466 467 dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum); 468 469 down_read(&ubi->fm_eba_sem); 470 vol->eba_tbl->entries[lnum].pnum = UBI_LEB_UNMAPPED; 471 up_read(&ubi->fm_eba_sem); 472 err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 0); 473 474 out_unlock: 475 leb_write_unlock(ubi, vol_id, lnum); 476 return err; 477 } 478 479 #ifdef CONFIG_MTD_UBI_FASTMAP 480 /** 481 * check_mapping - check and fixup a mapping 482 * @ubi: UBI device description object 483 * @vol: volume description object 484 * @lnum: logical eraseblock number 485 * @pnum: physical eraseblock number 486 * 487 * Checks whether a given mapping is valid. Fastmap cannot track LEB unmap 488 * operations, if such an operation is interrupted the mapping still looks 489 * good, but upon first read an ECC is reported to the upper layer. 490 * Normaly during the full-scan at attach time this is fixed, for Fastmap 491 * we have to deal with it while reading. 492 * If the PEB behind a LEB shows this symthom we change the mapping to 493 * %UBI_LEB_UNMAPPED and schedule the PEB for erasure. 494 * 495 * Returns 0 on success, negative error code in case of failure. 496 */ 497 static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, 498 int *pnum) 499 { 500 int err; 501 struct ubi_vid_io_buf *vidb; 502 struct ubi_vid_hdr *vid_hdr; 503 504 if (!ubi->fast_attach) 505 return 0; 506 507 if (!vol->checkmap || test_bit(lnum, vol->checkmap)) 508 return 0; 509 510 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 511 if (!vidb) 512 return -ENOMEM; 513 514 err = ubi_io_read_vid_hdr(ubi, *pnum, vidb, 0); 515 if (err > 0 && err != UBI_IO_BITFLIPS) { 516 int torture = 0; 517 518 switch (err) { 519 case UBI_IO_FF: 520 case UBI_IO_FF_BITFLIPS: 521 case UBI_IO_BAD_HDR: 522 case UBI_IO_BAD_HDR_EBADMSG: 523 break; 524 default: 525 ubi_assert(0); 526 } 527 528 if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_FF_BITFLIPS) 529 torture = 1; 530 531 down_read(&ubi->fm_eba_sem); 532 vol->eba_tbl->entries[lnum].pnum = UBI_LEB_UNMAPPED; 533 up_read(&ubi->fm_eba_sem); 534 ubi_wl_put_peb(ubi, vol->vol_id, lnum, *pnum, torture); 535 536 *pnum = UBI_LEB_UNMAPPED; 537 } else if (err < 0) { 538 ubi_err(ubi, "unable to read VID header back from PEB %i: %i", 539 *pnum, err); 540 541 goto out_free; 542 } else { 543 int found_vol_id, found_lnum; 544 545 ubi_assert(err == 0 || err == UBI_IO_BITFLIPS); 546 547 vid_hdr = ubi_get_vid_hdr(vidb); 548 found_vol_id = be32_to_cpu(vid_hdr->vol_id); 549 found_lnum = be32_to_cpu(vid_hdr->lnum); 550 551 if (found_lnum != lnum || found_vol_id != vol->vol_id) { 552 ubi_err(ubi, "EBA mismatch! PEB %i is LEB %i:%i instead of LEB %i:%i", 553 *pnum, found_vol_id, found_lnum, vol->vol_id, lnum); 554 ubi_ro_mode(ubi); 555 err = -EINVAL; 556 goto out_free; 557 } 558 } 559 560 set_bit(lnum, vol->checkmap); 561 err = 0; 562 563 out_free: 564 ubi_free_vid_buf(vidb); 565 566 return err; 567 } 568 #else 569 static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, 570 int *pnum) 571 { 572 return 0; 573 } 574 #endif 575 576 /** 577 * ubi_eba_read_leb - read data. 578 * @ubi: UBI device description object 579 * @vol: volume description object 580 * @lnum: logical eraseblock number 581 * @buf: buffer to store the read data 582 * @offset: offset from where to read 583 * @len: how many bytes to read 584 * @check: data CRC check flag 585 * 586 * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF 587 * bytes. The @check flag only makes sense for static volumes and forces 588 * eraseblock data CRC checking. 589 * 590 * In case of success this function returns zero. In case of a static volume, 591 * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be 592 * returned for any volume type if an ECC error was detected by the MTD device 593 * driver. Other negative error cored may be returned in case of other errors. 594 */ 595 int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, 596 void *buf, int offset, int len, int check) 597 { 598 int err, pnum, scrub = 0, vol_id = vol->vol_id; 599 struct ubi_vid_io_buf *vidb; 600 struct ubi_vid_hdr *vid_hdr; 601 uint32_t crc; 602 603 err = leb_read_lock(ubi, vol_id, lnum); 604 if (err) 605 return err; 606 607 pnum = vol->eba_tbl->entries[lnum].pnum; 608 if (pnum >= 0) { 609 err = check_mapping(ubi, vol, lnum, &pnum); 610 if (err < 0) 611 goto out_unlock; 612 } 613 614 if (pnum == UBI_LEB_UNMAPPED) { 615 /* 616 * The logical eraseblock is not mapped, fill the whole buffer 617 * with 0xFF bytes. The exception is static volumes for which 618 * it is an error to read unmapped logical eraseblocks. 619 */ 620 dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)", 621 len, offset, vol_id, lnum); 622 leb_read_unlock(ubi, vol_id, lnum); 623 ubi_assert(vol->vol_type != UBI_STATIC_VOLUME); 624 memset(buf, 0xFF, len); 625 return 0; 626 } 627 628 dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d", 629 len, offset, vol_id, lnum, pnum); 630 631 if (vol->vol_type == UBI_DYNAMIC_VOLUME) 632 check = 0; 633 634 retry: 635 if (check) { 636 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 637 if (!vidb) { 638 err = -ENOMEM; 639 goto out_unlock; 640 } 641 642 vid_hdr = ubi_get_vid_hdr(vidb); 643 644 err = ubi_io_read_vid_hdr(ubi, pnum, vidb, 1); 645 if (err && err != UBI_IO_BITFLIPS) { 646 if (err > 0) { 647 /* 648 * The header is either absent or corrupted. 649 * The former case means there is a bug - 650 * switch to read-only mode just in case. 651 * The latter case means a real corruption - we 652 * may try to recover data. FIXME: but this is 653 * not implemented. 654 */ 655 if (err == UBI_IO_BAD_HDR_EBADMSG || 656 err == UBI_IO_BAD_HDR) { 657 ubi_warn(ubi, "corrupted VID header at PEB %d, LEB %d:%d", 658 pnum, vol_id, lnum); 659 err = -EBADMSG; 660 } else { 661 /* 662 * Ending up here in the non-Fastmap case 663 * is a clear bug as the VID header had to 664 * be present at scan time to have it referenced. 665 * With fastmap the story is more complicated. 666 * Fastmap has the mapping info without the need 667 * of a full scan. So the LEB could have been 668 * unmapped, Fastmap cannot know this and keeps 669 * the LEB referenced. 670 * This is valid and works as the layer above UBI 671 * has to do bookkeeping about used/referenced 672 * LEBs in any case. 673 */ 674 if (ubi->fast_attach) { 675 err = -EBADMSG; 676 } else { 677 err = -EINVAL; 678 ubi_ro_mode(ubi); 679 } 680 } 681 } 682 goto out_free; 683 } else if (err == UBI_IO_BITFLIPS) 684 scrub = 1; 685 686 ubi_assert(lnum < be32_to_cpu(vid_hdr->used_ebs)); 687 ubi_assert(len == be32_to_cpu(vid_hdr->data_size)); 688 689 crc = be32_to_cpu(vid_hdr->data_crc); 690 ubi_free_vid_buf(vidb); 691 } 692 693 err = ubi_io_read_data(ubi, buf, pnum, offset, len); 694 if (err) { 695 if (err == UBI_IO_BITFLIPS) 696 scrub = 1; 697 else if (mtd_is_eccerr(err)) { 698 if (vol->vol_type == UBI_DYNAMIC_VOLUME) 699 goto out_unlock; 700 scrub = 1; 701 if (!check) { 702 ubi_msg(ubi, "force data checking"); 703 check = 1; 704 goto retry; 705 } 706 } else 707 goto out_unlock; 708 } 709 710 if (check) { 711 uint32_t crc1 = crc32(UBI_CRC32_INIT, buf, len); 712 if (crc1 != crc) { 713 ubi_warn(ubi, "CRC error: calculated %#08x, must be %#08x", 714 crc1, crc); 715 err = -EBADMSG; 716 goto out_unlock; 717 } 718 } 719 720 if (scrub) 721 err = ubi_wl_scrub_peb(ubi, pnum); 722 723 leb_read_unlock(ubi, vol_id, lnum); 724 return err; 725 726 out_free: 727 ubi_free_vid_buf(vidb); 728 out_unlock: 729 leb_read_unlock(ubi, vol_id, lnum); 730 return err; 731 } 732 733 /** 734 * ubi_eba_read_leb_sg - read data into a scatter gather list. 735 * @ubi: UBI device description object 736 * @vol: volume description object 737 * @lnum: logical eraseblock number 738 * @sgl: UBI scatter gather list to store the read data 739 * @offset: offset from where to read 740 * @len: how many bytes to read 741 * @check: data CRC check flag 742 * 743 * This function works exactly like ubi_eba_read_leb(). But instead of 744 * storing the read data into a buffer it writes to an UBI scatter gather 745 * list. 746 */ 747 int ubi_eba_read_leb_sg(struct ubi_device *ubi, struct ubi_volume *vol, 748 struct ubi_sgl *sgl, int lnum, int offset, int len, 749 int check) 750 { 751 int to_read; 752 int ret; 753 struct scatterlist *sg; 754 755 for (;;) { 756 ubi_assert(sgl->list_pos < UBI_MAX_SG_COUNT); 757 sg = &sgl->sg[sgl->list_pos]; 758 if (len < sg->length - sgl->page_pos) 759 to_read = len; 760 else 761 to_read = sg->length - sgl->page_pos; 762 763 ret = ubi_eba_read_leb(ubi, vol, lnum, 764 sg_virt(sg) + sgl->page_pos, offset, 765 to_read, check); 766 if (ret < 0) 767 return ret; 768 769 offset += to_read; 770 len -= to_read; 771 if (!len) { 772 sgl->page_pos += to_read; 773 if (sgl->page_pos == sg->length) { 774 sgl->list_pos++; 775 sgl->page_pos = 0; 776 } 777 778 break; 779 } 780 781 sgl->list_pos++; 782 sgl->page_pos = 0; 783 } 784 785 return ret; 786 } 787 788 /** 789 * try_recover_peb - try to recover from write failure. 790 * @vol: volume description object 791 * @pnum: the physical eraseblock to recover 792 * @lnum: logical eraseblock number 793 * @buf: data which was not written because of the write failure 794 * @offset: offset of the failed write 795 * @len: how many bytes should have been written 796 * @vidb: VID buffer 797 * @retry: whether the caller should retry in case of failure 798 * 799 * This function is called in case of a write failure and moves all good data 800 * from the potentially bad physical eraseblock to a good physical eraseblock. 801 * This function also writes the data which was not written due to the failure. 802 * Returns 0 in case of success, and a negative error code in case of failure. 803 * In case of failure, the %retry parameter is set to false if this is a fatal 804 * error (retrying won't help), and true otherwise. 805 */ 806 static int try_recover_peb(struct ubi_volume *vol, int pnum, int lnum, 807 const void *buf, int offset, int len, 808 struct ubi_vid_io_buf *vidb, bool *retry) 809 { 810 struct ubi_device *ubi = vol->ubi; 811 struct ubi_vid_hdr *vid_hdr; 812 int new_pnum, err, vol_id = vol->vol_id, data_size; 813 uint32_t crc; 814 815 *retry = false; 816 817 new_pnum = ubi_wl_get_peb(ubi); 818 if (new_pnum < 0) { 819 err = new_pnum; 820 goto out_put; 821 } 822 823 ubi_msg(ubi, "recover PEB %d, move data to PEB %d", 824 pnum, new_pnum); 825 826 err = ubi_io_read_vid_hdr(ubi, pnum, vidb, 1); 827 if (err && err != UBI_IO_BITFLIPS) { 828 if (err > 0) 829 err = -EIO; 830 goto out_put; 831 } 832 833 vid_hdr = ubi_get_vid_hdr(vidb); 834 ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC); 835 836 mutex_lock(&ubi->buf_mutex); 837 memset(ubi->peb_buf + offset, 0xFF, len); 838 839 /* Read everything before the area where the write failure happened */ 840 if (offset > 0) { 841 err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, offset); 842 if (err && err != UBI_IO_BITFLIPS) 843 goto out_unlock; 844 } 845 846 *retry = true; 847 848 memcpy(ubi->peb_buf + offset, buf, len); 849 850 data_size = offset + len; 851 crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size); 852 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 853 vid_hdr->copy_flag = 1; 854 vid_hdr->data_size = cpu_to_be32(data_size); 855 vid_hdr->data_crc = cpu_to_be32(crc); 856 err = ubi_io_write_vid_hdr(ubi, new_pnum, vidb); 857 if (err) 858 goto out_unlock; 859 860 err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size); 861 862 out_unlock: 863 mutex_unlock(&ubi->buf_mutex); 864 865 if (!err) 866 vol->eba_tbl->entries[lnum].pnum = new_pnum; 867 868 out_put: 869 up_read(&ubi->fm_eba_sem); 870 871 if (!err) { 872 ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); 873 ubi_msg(ubi, "data was successfully recovered"); 874 } else if (new_pnum >= 0) { 875 /* 876 * Bad luck? This physical eraseblock is bad too? Crud. Let's 877 * try to get another one. 878 */ 879 ubi_wl_put_peb(ubi, vol_id, lnum, new_pnum, 1); 880 ubi_warn(ubi, "failed to write to PEB %d", new_pnum); 881 } 882 883 return err; 884 } 885 886 /** 887 * recover_peb - recover from write failure. 888 * @ubi: UBI device description object 889 * @pnum: the physical eraseblock to recover 890 * @vol_id: volume ID 891 * @lnum: logical eraseblock number 892 * @buf: data which was not written because of the write failure 893 * @offset: offset of the failed write 894 * @len: how many bytes should have been written 895 * 896 * This function is called in case of a write failure and moves all good data 897 * from the potentially bad physical eraseblock to a good physical eraseblock. 898 * This function also writes the data which was not written due to the failure. 899 * Returns 0 in case of success, and a negative error code in case of failure. 900 * This function tries %UBI_IO_RETRIES before giving up. 901 */ 902 static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, 903 const void *buf, int offset, int len) 904 { 905 int err, idx = vol_id2idx(ubi, vol_id), tries; 906 struct ubi_volume *vol = ubi->volumes[idx]; 907 struct ubi_vid_io_buf *vidb; 908 909 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 910 if (!vidb) 911 return -ENOMEM; 912 913 for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { 914 bool retry; 915 916 err = try_recover_peb(vol, pnum, lnum, buf, offset, len, vidb, 917 &retry); 918 if (!err || !retry) 919 break; 920 921 ubi_msg(ubi, "try again"); 922 } 923 924 ubi_free_vid_buf(vidb); 925 926 return err; 927 } 928 929 /** 930 * try_write_vid_and_data - try to write VID header and data to a new PEB. 931 * @vol: volume description object 932 * @lnum: logical eraseblock number 933 * @vidb: the VID buffer to write 934 * @buf: buffer containing the data 935 * @offset: where to start writing data 936 * @len: how many bytes should be written 937 * 938 * This function tries to write VID header and data belonging to logical 939 * eraseblock @lnum of volume @vol to a new physical eraseblock. Returns zero 940 * in case of success and a negative error code in case of failure. 941 * In case of error, it is possible that something was still written to the 942 * flash media, but may be some garbage. 943 */ 944 static int try_write_vid_and_data(struct ubi_volume *vol, int lnum, 945 struct ubi_vid_io_buf *vidb, const void *buf, 946 int offset, int len) 947 { 948 struct ubi_device *ubi = vol->ubi; 949 int pnum, opnum, err, vol_id = vol->vol_id; 950 951 pnum = ubi_wl_get_peb(ubi); 952 if (pnum < 0) { 953 err = pnum; 954 goto out_put; 955 } 956 957 opnum = vol->eba_tbl->entries[lnum].pnum; 958 959 dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d", 960 len, offset, vol_id, lnum, pnum); 961 962 err = ubi_io_write_vid_hdr(ubi, pnum, vidb); 963 if (err) { 964 ubi_warn(ubi, "failed to write VID header to LEB %d:%d, PEB %d", 965 vol_id, lnum, pnum); 966 goto out_put; 967 } 968 969 if (len) { 970 err = ubi_io_write_data(ubi, buf, pnum, offset, len); 971 if (err) { 972 ubi_warn(ubi, 973 "failed to write %d bytes at offset %d of LEB %d:%d, PEB %d", 974 len, offset, vol_id, lnum, pnum); 975 goto out_put; 976 } 977 } 978 979 vol->eba_tbl->entries[lnum].pnum = pnum; 980 981 out_put: 982 up_read(&ubi->fm_eba_sem); 983 984 if (err && pnum >= 0) 985 err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); 986 else if (!err && opnum >= 0) 987 err = ubi_wl_put_peb(ubi, vol_id, lnum, opnum, 0); 988 989 return err; 990 } 991 992 /** 993 * ubi_eba_write_leb - write data to dynamic volume. 994 * @ubi: UBI device description object 995 * @vol: volume description object 996 * @lnum: logical eraseblock number 997 * @buf: the data to write 998 * @offset: offset within the logical eraseblock where to write 999 * @len: how many bytes to write 1000 * 1001 * This function writes data to logical eraseblock @lnum of a dynamic volume 1002 * @vol. Returns zero in case of success and a negative error code in case 1003 * of failure. In case of error, it is possible that something was still 1004 * written to the flash media, but may be some garbage. 1005 * This function retries %UBI_IO_RETRIES times before giving up. 1006 */ 1007 int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, 1008 const void *buf, int offset, int len) 1009 { 1010 int err, pnum, tries, vol_id = vol->vol_id; 1011 struct ubi_vid_io_buf *vidb; 1012 struct ubi_vid_hdr *vid_hdr; 1013 1014 if (ubi->ro_mode) 1015 return -EROFS; 1016 1017 err = leb_write_lock(ubi, vol_id, lnum); 1018 if (err) 1019 return err; 1020 1021 pnum = vol->eba_tbl->entries[lnum].pnum; 1022 if (pnum >= 0) { 1023 err = check_mapping(ubi, vol, lnum, &pnum); 1024 if (err < 0) 1025 goto out; 1026 } 1027 1028 if (pnum >= 0) { 1029 dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d", 1030 len, offset, vol_id, lnum, pnum); 1031 1032 err = ubi_io_write_data(ubi, buf, pnum, offset, len); 1033 if (err) { 1034 ubi_warn(ubi, "failed to write data to PEB %d", pnum); 1035 if (err == -EIO && ubi->bad_allowed) 1036 err = recover_peb(ubi, pnum, vol_id, lnum, buf, 1037 offset, len); 1038 } 1039 1040 goto out; 1041 } 1042 1043 /* 1044 * The logical eraseblock is not mapped. We have to get a free physical 1045 * eraseblock and write the volume identifier header there first. 1046 */ 1047 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 1048 if (!vidb) { 1049 leb_write_unlock(ubi, vol_id, lnum); 1050 return -ENOMEM; 1051 } 1052 1053 vid_hdr = ubi_get_vid_hdr(vidb); 1054 1055 vid_hdr->vol_type = UBI_VID_DYNAMIC; 1056 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1057 vid_hdr->vol_id = cpu_to_be32(vol_id); 1058 vid_hdr->lnum = cpu_to_be32(lnum); 1059 vid_hdr->compat = ubi_get_compat(ubi, vol_id); 1060 vid_hdr->data_pad = cpu_to_be32(vol->data_pad); 1061 1062 for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { 1063 err = try_write_vid_and_data(vol, lnum, vidb, buf, offset, len); 1064 if (err != -EIO || !ubi->bad_allowed) 1065 break; 1066 1067 /* 1068 * Fortunately, this is the first write operation to this 1069 * physical eraseblock, so just put it and request a new one. 1070 * We assume that if this physical eraseblock went bad, the 1071 * erase code will handle that. 1072 */ 1073 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1074 ubi_msg(ubi, "try another PEB"); 1075 } 1076 1077 ubi_free_vid_buf(vidb); 1078 1079 out: 1080 if (err) 1081 ubi_ro_mode(ubi); 1082 1083 leb_write_unlock(ubi, vol_id, lnum); 1084 1085 return err; 1086 } 1087 1088 /** 1089 * ubi_eba_write_leb_st - write data to static volume. 1090 * @ubi: UBI device description object 1091 * @vol: volume description object 1092 * @lnum: logical eraseblock number 1093 * @buf: data to write 1094 * @len: how many bytes to write 1095 * @used_ebs: how many logical eraseblocks will this volume contain 1096 * 1097 * This function writes data to logical eraseblock @lnum of static volume 1098 * @vol. The @used_ebs argument should contain total number of logical 1099 * eraseblock in this static volume. 1100 * 1101 * When writing to the last logical eraseblock, the @len argument doesn't have 1102 * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent 1103 * to the real data size, although the @buf buffer has to contain the 1104 * alignment. In all other cases, @len has to be aligned. 1105 * 1106 * It is prohibited to write more than once to logical eraseblocks of static 1107 * volumes. This function returns zero in case of success and a negative error 1108 * code in case of failure. 1109 */ 1110 int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, 1111 int lnum, const void *buf, int len, int used_ebs) 1112 { 1113 int err, tries, data_size = len, vol_id = vol->vol_id; 1114 struct ubi_vid_io_buf *vidb; 1115 struct ubi_vid_hdr *vid_hdr; 1116 uint32_t crc; 1117 1118 if (ubi->ro_mode) 1119 return -EROFS; 1120 1121 if (lnum == used_ebs - 1) 1122 /* If this is the last LEB @len may be unaligned */ 1123 len = ALIGN(data_size, ubi->min_io_size); 1124 else 1125 ubi_assert(!(len & (ubi->min_io_size - 1))); 1126 1127 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 1128 if (!vidb) 1129 return -ENOMEM; 1130 1131 vid_hdr = ubi_get_vid_hdr(vidb); 1132 1133 err = leb_write_lock(ubi, vol_id, lnum); 1134 if (err) 1135 goto out; 1136 1137 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1138 vid_hdr->vol_id = cpu_to_be32(vol_id); 1139 vid_hdr->lnum = cpu_to_be32(lnum); 1140 vid_hdr->compat = ubi_get_compat(ubi, vol_id); 1141 vid_hdr->data_pad = cpu_to_be32(vol->data_pad); 1142 1143 crc = crc32(UBI_CRC32_INIT, buf, data_size); 1144 vid_hdr->vol_type = UBI_VID_STATIC; 1145 vid_hdr->data_size = cpu_to_be32(data_size); 1146 vid_hdr->used_ebs = cpu_to_be32(used_ebs); 1147 vid_hdr->data_crc = cpu_to_be32(crc); 1148 1149 ubi_assert(vol->eba_tbl->entries[lnum].pnum < 0); 1150 1151 for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { 1152 err = try_write_vid_and_data(vol, lnum, vidb, buf, 0, len); 1153 if (err != -EIO || !ubi->bad_allowed) 1154 break; 1155 1156 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1157 ubi_msg(ubi, "try another PEB"); 1158 } 1159 1160 if (err) 1161 ubi_ro_mode(ubi); 1162 1163 leb_write_unlock(ubi, vol_id, lnum); 1164 1165 out: 1166 ubi_free_vid_buf(vidb); 1167 1168 return err; 1169 } 1170 1171 /* 1172 * ubi_eba_atomic_leb_change - change logical eraseblock atomically. 1173 * @ubi: UBI device description object 1174 * @vol: volume description object 1175 * @lnum: logical eraseblock number 1176 * @buf: data to write 1177 * @len: how many bytes to write 1178 * 1179 * This function changes the contents of a logical eraseblock atomically. @buf 1180 * has to contain new logical eraseblock data, and @len - the length of the 1181 * data, which has to be aligned. This function guarantees that in case of an 1182 * unclean reboot the old contents is preserved. Returns zero in case of 1183 * success and a negative error code in case of failure. 1184 * 1185 * UBI reserves one LEB for the "atomic LEB change" operation, so only one 1186 * LEB change may be done at a time. This is ensured by @ubi->alc_mutex. 1187 */ 1188 int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, 1189 int lnum, const void *buf, int len) 1190 { 1191 int err, tries, vol_id = vol->vol_id; 1192 struct ubi_vid_io_buf *vidb; 1193 struct ubi_vid_hdr *vid_hdr; 1194 uint32_t crc; 1195 1196 if (ubi->ro_mode) 1197 return -EROFS; 1198 1199 if (len == 0) { 1200 /* 1201 * Special case when data length is zero. In this case the LEB 1202 * has to be unmapped and mapped somewhere else. 1203 */ 1204 err = ubi_eba_unmap_leb(ubi, vol, lnum); 1205 if (err) 1206 return err; 1207 return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0); 1208 } 1209 1210 vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); 1211 if (!vidb) 1212 return -ENOMEM; 1213 1214 vid_hdr = ubi_get_vid_hdr(vidb); 1215 1216 mutex_lock(&ubi->alc_mutex); 1217 err = leb_write_lock(ubi, vol_id, lnum); 1218 if (err) 1219 goto out_mutex; 1220 1221 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1222 vid_hdr->vol_id = cpu_to_be32(vol_id); 1223 vid_hdr->lnum = cpu_to_be32(lnum); 1224 vid_hdr->compat = ubi_get_compat(ubi, vol_id); 1225 vid_hdr->data_pad = cpu_to_be32(vol->data_pad); 1226 1227 crc = crc32(UBI_CRC32_INIT, buf, len); 1228 vid_hdr->vol_type = UBI_VID_DYNAMIC; 1229 vid_hdr->data_size = cpu_to_be32(len); 1230 vid_hdr->copy_flag = 1; 1231 vid_hdr->data_crc = cpu_to_be32(crc); 1232 1233 dbg_eba("change LEB %d:%d", vol_id, lnum); 1234 1235 for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { 1236 err = try_write_vid_and_data(vol, lnum, vidb, buf, 0, len); 1237 if (err != -EIO || !ubi->bad_allowed) 1238 break; 1239 1240 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1241 ubi_msg(ubi, "try another PEB"); 1242 } 1243 1244 /* 1245 * This flash device does not admit of bad eraseblocks or 1246 * something nasty and unexpected happened. Switch to read-only 1247 * mode just in case. 1248 */ 1249 if (err) 1250 ubi_ro_mode(ubi); 1251 1252 leb_write_unlock(ubi, vol_id, lnum); 1253 1254 out_mutex: 1255 mutex_unlock(&ubi->alc_mutex); 1256 ubi_free_vid_buf(vidb); 1257 return err; 1258 } 1259 1260 /** 1261 * is_error_sane - check whether a read error is sane. 1262 * @err: code of the error happened during reading 1263 * 1264 * This is a helper function for 'ubi_eba_copy_leb()' which is called when we 1265 * cannot read data from the target PEB (an error @err happened). If the error 1266 * code is sane, then we treat this error as non-fatal. Otherwise the error is 1267 * fatal and UBI will be switched to R/O mode later. 1268 * 1269 * The idea is that we try not to switch to R/O mode if the read error is 1270 * something which suggests there was a real read problem. E.g., %-EIO. Or a 1271 * memory allocation failed (-%ENOMEM). Otherwise, it is safer to switch to R/O 1272 * mode, simply because we do not know what happened at the MTD level, and we 1273 * cannot handle this. E.g., the underlying driver may have become crazy, and 1274 * it is safer to switch to R/O mode to preserve the data. 1275 * 1276 * And bear in mind, this is about reading from the target PEB, i.e. the PEB 1277 * which we have just written. 1278 */ 1279 static int is_error_sane(int err) 1280 { 1281 if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_HDR || 1282 err == UBI_IO_BAD_HDR_EBADMSG || err == -ETIMEDOUT) 1283 return 0; 1284 return 1; 1285 } 1286 1287 /** 1288 * ubi_eba_copy_leb - copy logical eraseblock. 1289 * @ubi: UBI device description object 1290 * @from: physical eraseblock number from where to copy 1291 * @to: physical eraseblock number where to copy 1292 * @vidb: data structure from where the VID header is derived 1293 * 1294 * This function copies logical eraseblock from physical eraseblock @from to 1295 * physical eraseblock @to. The @vid_hdr buffer may be changed by this 1296 * function. Returns: 1297 * o %0 in case of success; 1298 * o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_TARGET_BITFLIPS, etc; 1299 * o a negative error code in case of failure. 1300 */ 1301 int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, 1302 struct ubi_vid_io_buf *vidb) 1303 { 1304 int err, vol_id, lnum, data_size, aldata_size, idx; 1305 struct ubi_vid_hdr *vid_hdr = ubi_get_vid_hdr(vidb); 1306 struct ubi_volume *vol; 1307 uint32_t crc; 1308 1309 ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem)); 1310 1311 vol_id = be32_to_cpu(vid_hdr->vol_id); 1312 lnum = be32_to_cpu(vid_hdr->lnum); 1313 1314 dbg_wl("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to); 1315 1316 if (vid_hdr->vol_type == UBI_VID_STATIC) { 1317 data_size = be32_to_cpu(vid_hdr->data_size); 1318 aldata_size = ALIGN(data_size, ubi->min_io_size); 1319 } else 1320 data_size = aldata_size = 1321 ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); 1322 1323 idx = vol_id2idx(ubi, vol_id); 1324 spin_lock(&ubi->volumes_lock); 1325 /* 1326 * Note, we may race with volume deletion, which means that the volume 1327 * this logical eraseblock belongs to might be being deleted. Since the 1328 * volume deletion un-maps all the volume's logical eraseblocks, it will 1329 * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. 1330 */ 1331 vol = ubi->volumes[idx]; 1332 spin_unlock(&ubi->volumes_lock); 1333 if (!vol) { 1334 /* No need to do further work, cancel */ 1335 dbg_wl("volume %d is being removed, cancel", vol_id); 1336 return MOVE_CANCEL_RACE; 1337 } 1338 1339 /* 1340 * We do not want anybody to write to this logical eraseblock while we 1341 * are moving it, so lock it. 1342 * 1343 * Note, we are using non-waiting locking here, because we cannot sleep 1344 * on the LEB, since it may cause deadlocks. Indeed, imagine a task is 1345 * unmapping the LEB which is mapped to the PEB we are going to move 1346 * (@from). This task locks the LEB and goes sleep in the 1347 * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are 1348 * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the 1349 * LEB is already locked, we just do not move it and return 1350 * %MOVE_RETRY. Note, we do not return %MOVE_CANCEL_RACE here because 1351 * we do not know the reasons of the contention - it may be just a 1352 * normal I/O on this LEB, so we want to re-try. 1353 */ 1354 err = leb_write_trylock(ubi, vol_id, lnum); 1355 if (err) { 1356 dbg_wl("contention on LEB %d:%d, cancel", vol_id, lnum); 1357 return MOVE_RETRY; 1358 } 1359 1360 /* 1361 * The LEB might have been put meanwhile, and the task which put it is 1362 * probably waiting on @ubi->move_mutex. No need to continue the work, 1363 * cancel it. 1364 */ 1365 if (vol->eba_tbl->entries[lnum].pnum != from) { 1366 dbg_wl("LEB %d:%d is no longer mapped to PEB %d, mapped to PEB %d, cancel", 1367 vol_id, lnum, from, vol->eba_tbl->entries[lnum].pnum); 1368 err = MOVE_CANCEL_RACE; 1369 goto out_unlock_leb; 1370 } 1371 1372 /* 1373 * OK, now the LEB is locked and we can safely start moving it. Since 1374 * this function utilizes the @ubi->peb_buf buffer which is shared 1375 * with some other functions - we lock the buffer by taking the 1376 * @ubi->buf_mutex. 1377 */ 1378 mutex_lock(&ubi->buf_mutex); 1379 dbg_wl("read %d bytes of data", aldata_size); 1380 err = ubi_io_read_data(ubi, ubi->peb_buf, from, 0, aldata_size); 1381 if (err && err != UBI_IO_BITFLIPS) { 1382 ubi_warn(ubi, "error %d while reading data from PEB %d", 1383 err, from); 1384 err = MOVE_SOURCE_RD_ERR; 1385 goto out_unlock_buf; 1386 } 1387 1388 /* 1389 * Now we have got to calculate how much data we have to copy. In 1390 * case of a static volume it is fairly easy - the VID header contains 1391 * the data size. In case of a dynamic volume it is more difficult - we 1392 * have to read the contents, cut 0xFF bytes from the end and copy only 1393 * the first part. We must do this to avoid writing 0xFF bytes as it 1394 * may have some side-effects. And not only this. It is important not 1395 * to include those 0xFFs to CRC because later the they may be filled 1396 * by data. 1397 */ 1398 if (vid_hdr->vol_type == UBI_VID_DYNAMIC) 1399 aldata_size = data_size = 1400 ubi_calc_data_len(ubi, ubi->peb_buf, data_size); 1401 1402 cond_resched(); 1403 crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size); 1404 cond_resched(); 1405 1406 /* 1407 * It may turn out to be that the whole @from physical eraseblock 1408 * contains only 0xFF bytes. Then we have to only write the VID header 1409 * and do not write any data. This also means we should not set 1410 * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc. 1411 */ 1412 if (data_size > 0) { 1413 vid_hdr->copy_flag = 1; 1414 vid_hdr->data_size = cpu_to_be32(data_size); 1415 vid_hdr->data_crc = cpu_to_be32(crc); 1416 } 1417 vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); 1418 1419 err = ubi_io_write_vid_hdr(ubi, to, vidb); 1420 if (err) { 1421 if (err == -EIO) 1422 err = MOVE_TARGET_WR_ERR; 1423 goto out_unlock_buf; 1424 } 1425 1426 cond_resched(); 1427 1428 /* Read the VID header back and check if it was written correctly */ 1429 err = ubi_io_read_vid_hdr(ubi, to, vidb, 1); 1430 if (err) { 1431 if (err != UBI_IO_BITFLIPS) { 1432 ubi_warn(ubi, "error %d while reading VID header back from PEB %d", 1433 err, to); 1434 if (is_error_sane(err)) 1435 err = MOVE_TARGET_RD_ERR; 1436 } else 1437 err = MOVE_TARGET_BITFLIPS; 1438 goto out_unlock_buf; 1439 } 1440 1441 if (data_size > 0) { 1442 err = ubi_io_write_data(ubi, ubi->peb_buf, to, 0, aldata_size); 1443 if (err) { 1444 if (err == -EIO) 1445 err = MOVE_TARGET_WR_ERR; 1446 goto out_unlock_buf; 1447 } 1448 1449 cond_resched(); 1450 } 1451 1452 ubi_assert(vol->eba_tbl->entries[lnum].pnum == from); 1453 vol->eba_tbl->entries[lnum].pnum = to; 1454 1455 out_unlock_buf: 1456 mutex_unlock(&ubi->buf_mutex); 1457 out_unlock_leb: 1458 leb_write_unlock(ubi, vol_id, lnum); 1459 return err; 1460 } 1461 1462 /** 1463 * print_rsvd_warning - warn about not having enough reserved PEBs. 1464 * @ubi: UBI device description object 1465 * @ai: UBI attach info object 1466 * 1467 * This is a helper function for 'ubi_eba_init()' which is called when UBI 1468 * cannot reserve enough PEBs for bad block handling. This function makes a 1469 * decision whether we have to print a warning or not. The algorithm is as 1470 * follows: 1471 * o if this is a new UBI image, then just print the warning 1472 * o if this is an UBI image which has already been used for some time, print 1473 * a warning only if we can reserve less than 10% of the expected amount of 1474 * the reserved PEB. 1475 * 1476 * The idea is that when UBI is used, PEBs become bad, and the reserved pool 1477 * of PEBs becomes smaller, which is normal and we do not want to scare users 1478 * with a warning every time they attach the MTD device. This was an issue 1479 * reported by real users. 1480 */ 1481 static void print_rsvd_warning(struct ubi_device *ubi, 1482 struct ubi_attach_info *ai) 1483 { 1484 /* 1485 * The 1 << 18 (256KiB) number is picked randomly, just a reasonably 1486 * large number to distinguish between newly flashed and used images. 1487 */ 1488 if (ai->max_sqnum > (1 << 18)) { 1489 int min = ubi->beb_rsvd_level / 10; 1490 1491 if (!min) 1492 min = 1; 1493 if (ubi->beb_rsvd_pebs > min) 1494 return; 1495 } 1496 1497 ubi_warn(ubi, "cannot reserve enough PEBs for bad PEB handling, reserved %d, need %d", 1498 ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); 1499 if (ubi->corr_peb_count) 1500 ubi_warn(ubi, "%d PEBs are corrupted and not used", 1501 ubi->corr_peb_count); 1502 } 1503 1504 /** 1505 * self_check_eba - run a self check on the EBA table constructed by fastmap. 1506 * @ubi: UBI device description object 1507 * @ai_fastmap: UBI attach info object created by fastmap 1508 * @ai_scan: UBI attach info object created by scanning 1509 * 1510 * Returns < 0 in case of an internal error, 0 otherwise. 1511 * If a bad EBA table entry was found it will be printed out and 1512 * ubi_assert() triggers. 1513 */ 1514 int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, 1515 struct ubi_attach_info *ai_scan) 1516 { 1517 int i, j, num_volumes, ret = 0; 1518 int **scan_eba, **fm_eba; 1519 struct ubi_ainf_volume *av; 1520 struct ubi_volume *vol; 1521 struct ubi_ainf_peb *aeb; 1522 struct rb_node *rb; 1523 1524 num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; 1525 1526 scan_eba = kmalloc_array(num_volumes, sizeof(*scan_eba), GFP_KERNEL); 1527 if (!scan_eba) 1528 return -ENOMEM; 1529 1530 fm_eba = kmalloc_array(num_volumes, sizeof(*fm_eba), GFP_KERNEL); 1531 if (!fm_eba) { 1532 kfree(scan_eba); 1533 return -ENOMEM; 1534 } 1535 1536 for (i = 0; i < num_volumes; i++) { 1537 vol = ubi->volumes[i]; 1538 if (!vol) 1539 continue; 1540 1541 scan_eba[i] = kmalloc_array(vol->reserved_pebs, 1542 sizeof(**scan_eba), 1543 GFP_KERNEL); 1544 if (!scan_eba[i]) { 1545 ret = -ENOMEM; 1546 goto out_free; 1547 } 1548 1549 fm_eba[i] = kmalloc_array(vol->reserved_pebs, 1550 sizeof(**fm_eba), 1551 GFP_KERNEL); 1552 if (!fm_eba[i]) { 1553 ret = -ENOMEM; 1554 goto out_free; 1555 } 1556 1557 for (j = 0; j < vol->reserved_pebs; j++) 1558 scan_eba[i][j] = fm_eba[i][j] = UBI_LEB_UNMAPPED; 1559 1560 av = ubi_find_av(ai_scan, idx2vol_id(ubi, i)); 1561 if (!av) 1562 continue; 1563 1564 ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) 1565 scan_eba[i][aeb->lnum] = aeb->pnum; 1566 1567 av = ubi_find_av(ai_fastmap, idx2vol_id(ubi, i)); 1568 if (!av) 1569 continue; 1570 1571 ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) 1572 fm_eba[i][aeb->lnum] = aeb->pnum; 1573 1574 for (j = 0; j < vol->reserved_pebs; j++) { 1575 if (scan_eba[i][j] != fm_eba[i][j]) { 1576 if (scan_eba[i][j] == UBI_LEB_UNMAPPED || 1577 fm_eba[i][j] == UBI_LEB_UNMAPPED) 1578 continue; 1579 1580 ubi_err(ubi, "LEB:%i:%i is PEB:%i instead of %i!", 1581 vol->vol_id, j, fm_eba[i][j], 1582 scan_eba[i][j]); 1583 ubi_assert(0); 1584 } 1585 } 1586 } 1587 1588 out_free: 1589 for (i = 0; i < num_volumes; i++) { 1590 if (!ubi->volumes[i]) 1591 continue; 1592 1593 kfree(scan_eba[i]); 1594 kfree(fm_eba[i]); 1595 } 1596 1597 kfree(scan_eba); 1598 kfree(fm_eba); 1599 return ret; 1600 } 1601 1602 /** 1603 * ubi_eba_init - initialize the EBA sub-system using attaching information. 1604 * @ubi: UBI device description object 1605 * @ai: attaching information 1606 * 1607 * This function returns zero in case of success and a negative error code in 1608 * case of failure. 1609 */ 1610 int ubi_eba_init(struct ubi_device *ubi, struct ubi_attach_info *ai) 1611 { 1612 int i, err, num_volumes; 1613 struct ubi_ainf_volume *av; 1614 struct ubi_volume *vol; 1615 struct ubi_ainf_peb *aeb; 1616 struct rb_node *rb; 1617 1618 dbg_eba("initialize EBA sub-system"); 1619 1620 spin_lock_init(&ubi->ltree_lock); 1621 mutex_init(&ubi->alc_mutex); 1622 ubi->ltree = RB_ROOT; 1623 1624 ubi->global_sqnum = ai->max_sqnum + 1; 1625 num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; 1626 1627 for (i = 0; i < num_volumes; i++) { 1628 struct ubi_eba_table *tbl; 1629 1630 vol = ubi->volumes[i]; 1631 if (!vol) 1632 continue; 1633 1634 cond_resched(); 1635 1636 tbl = ubi_eba_create_table(vol, vol->reserved_pebs); 1637 if (IS_ERR(tbl)) { 1638 err = PTR_ERR(tbl); 1639 goto out_free; 1640 } 1641 1642 ubi_eba_replace_table(vol, tbl); 1643 1644 av = ubi_find_av(ai, idx2vol_id(ubi, i)); 1645 if (!av) 1646 continue; 1647 1648 ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) { 1649 if (aeb->lnum >= vol->reserved_pebs) { 1650 /* 1651 * This may happen in case of an unclean reboot 1652 * during re-size. 1653 */ 1654 ubi_move_aeb_to_list(av, aeb, &ai->erase); 1655 } else { 1656 struct ubi_eba_entry *entry; 1657 1658 entry = &vol->eba_tbl->entries[aeb->lnum]; 1659 entry->pnum = aeb->pnum; 1660 } 1661 } 1662 } 1663 1664 if (ubi->avail_pebs < EBA_RESERVED_PEBS) { 1665 ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)", 1666 ubi->avail_pebs, EBA_RESERVED_PEBS); 1667 if (ubi->corr_peb_count) 1668 ubi_err(ubi, "%d PEBs are corrupted and not used", 1669 ubi->corr_peb_count); 1670 err = -ENOSPC; 1671 goto out_free; 1672 } 1673 ubi->avail_pebs -= EBA_RESERVED_PEBS; 1674 ubi->rsvd_pebs += EBA_RESERVED_PEBS; 1675 1676 if (ubi->bad_allowed) { 1677 ubi_calculate_reserved(ubi); 1678 1679 if (ubi->avail_pebs < ubi->beb_rsvd_level) { 1680 /* No enough free physical eraseblocks */ 1681 ubi->beb_rsvd_pebs = ubi->avail_pebs; 1682 print_rsvd_warning(ubi, ai); 1683 } else 1684 ubi->beb_rsvd_pebs = ubi->beb_rsvd_level; 1685 1686 ubi->avail_pebs -= ubi->beb_rsvd_pebs; 1687 ubi->rsvd_pebs += ubi->beb_rsvd_pebs; 1688 } 1689 1690 dbg_eba("EBA sub-system is initialized"); 1691 return 0; 1692 1693 out_free: 1694 for (i = 0; i < num_volumes; i++) { 1695 if (!ubi->volumes[i]) 1696 continue; 1697 ubi_eba_replace_table(ubi->volumes[i], NULL); 1698 } 1699 return err; 1700 } 1701