1 /* 2 * Copyright (c) International Business Machines Corp., 2006 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * 18 * Author: Artem Bityutskiy (Битюцкий Артём) 19 */ 20 21 /* 22 * The UBI Eraseblock Association (EBA) sub-system. 23 * 24 * This sub-system is responsible for I/O to/from logical eraseblock. 25 * 26 * Although in this implementation the EBA table is fully kept and managed in 27 * RAM, which assumes poor scalability, it might be (partially) maintained on 28 * flash in future implementations. 29 * 30 * The EBA sub-system implements per-logical eraseblock locking. Before 31 * accessing a logical eraseblock it is locked for reading or writing. The 32 * per-logical eraseblock locking is implemented by means of the lock tree. The 33 * lock tree is an RB-tree which refers all the currently locked logical 34 * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects. 35 * They are indexed by (@vol_id, @lnum) pairs. 36 * 37 * EBA also maintains the global sequence counter which is incremented each 38 * time a logical eraseblock is mapped to a physical eraseblock and it is 39 * stored in the volume identifier header. This means that each VID header has 40 * a unique sequence number. The sequence number is only increased an we assume 41 * 64 bits is enough to never overflow. 42 */ 43 44 #include <linux/slab.h> 45 #include <linux/crc32.h> 46 #include <linux/err.h> 47 #include "ubi.h" 48 49 /* Number of physical eraseblocks reserved for atomic LEB change operation */ 50 #define EBA_RESERVED_PEBS 1 51 52 /** 53 * next_sqnum - get next sequence number. 54 * @ubi: UBI device description object 55 * 56 * This function returns next sequence number to use, which is just the current 57 * global sequence counter value. It also increases the global sequence 58 * counter. 59 */ 60 static unsigned long long next_sqnum(struct ubi_device *ubi) 61 { 62 unsigned long long sqnum; 63 64 spin_lock(&ubi->ltree_lock); 65 sqnum = ubi->global_sqnum++; 66 spin_unlock(&ubi->ltree_lock); 67 68 return sqnum; 69 } 70 71 /** 72 * ubi_get_compat - get compatibility flags of a volume. 73 * @ubi: UBI device description object 74 * @vol_id: volume ID 75 * 76 * This function returns compatibility flags for an internal volume. User 77 * volumes have no compatibility flags, so %0 is returned. 78 */ 79 static int ubi_get_compat(const struct ubi_device *ubi, int vol_id) 80 { 81 if (vol_id == UBI_LAYOUT_VOLUME_ID) 82 return UBI_LAYOUT_VOLUME_COMPAT; 83 return 0; 84 } 85 86 /** 87 * ltree_lookup - look up the lock tree. 88 * @ubi: UBI device description object 89 * @vol_id: volume ID 90 * @lnum: logical eraseblock number 91 * 92 * This function returns a pointer to the corresponding &struct ubi_ltree_entry 93 * object if the logical eraseblock is locked and %NULL if it is not. 94 * @ubi->ltree_lock has to be locked. 95 */ 96 static struct ubi_ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id, 97 int lnum) 98 { 99 struct rb_node *p; 100 101 p = ubi->ltree.rb_node; 102 while (p) { 103 struct ubi_ltree_entry *le; 104 105 le = rb_entry(p, struct ubi_ltree_entry, rb); 106 107 if (vol_id < le->vol_id) 108 p = p->rb_left; 109 else if (vol_id > le->vol_id) 110 p = p->rb_right; 111 else { 112 if (lnum < le->lnum) 113 p = p->rb_left; 114 else if (lnum > le->lnum) 115 p = p->rb_right; 116 else 117 return le; 118 } 119 } 120 121 return NULL; 122 } 123 124 /** 125 * ltree_add_entry - add new entry to the lock tree. 126 * @ubi: UBI device description object 127 * @vol_id: volume ID 128 * @lnum: logical eraseblock number 129 * 130 * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the 131 * lock tree. If such entry is already there, its usage counter is increased. 132 * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation 133 * failed. 134 */ 135 static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi, 136 int vol_id, int lnum) 137 { 138 struct ubi_ltree_entry *le, *le1, *le_free; 139 140 le = kmalloc(sizeof(struct ubi_ltree_entry), GFP_NOFS); 141 if (!le) 142 return ERR_PTR(-ENOMEM); 143 144 le->users = 0; 145 init_rwsem(&le->mutex); 146 le->vol_id = vol_id; 147 le->lnum = lnum; 148 149 spin_lock(&ubi->ltree_lock); 150 le1 = ltree_lookup(ubi, vol_id, lnum); 151 152 if (le1) { 153 /* 154 * This logical eraseblock is already locked. The newly 155 * allocated lock entry is not needed. 156 */ 157 le_free = le; 158 le = le1; 159 } else { 160 struct rb_node **p, *parent = NULL; 161 162 /* 163 * No lock entry, add the newly allocated one to the 164 * @ubi->ltree RB-tree. 165 */ 166 le_free = NULL; 167 168 p = &ubi->ltree.rb_node; 169 while (*p) { 170 parent = *p; 171 le1 = rb_entry(parent, struct ubi_ltree_entry, rb); 172 173 if (vol_id < le1->vol_id) 174 p = &(*p)->rb_left; 175 else if (vol_id > le1->vol_id) 176 p = &(*p)->rb_right; 177 else { 178 ubi_assert(lnum != le1->lnum); 179 if (lnum < le1->lnum) 180 p = &(*p)->rb_left; 181 else 182 p = &(*p)->rb_right; 183 } 184 } 185 186 rb_link_node(&le->rb, parent, p); 187 rb_insert_color(&le->rb, &ubi->ltree); 188 } 189 le->users += 1; 190 spin_unlock(&ubi->ltree_lock); 191 192 kfree(le_free); 193 return le; 194 } 195 196 /** 197 * leb_read_lock - lock logical eraseblock for reading. 198 * @ubi: UBI device description object 199 * @vol_id: volume ID 200 * @lnum: logical eraseblock number 201 * 202 * This function locks a logical eraseblock for reading. Returns zero in case 203 * of success and a negative error code in case of failure. 204 */ 205 static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum) 206 { 207 struct ubi_ltree_entry *le; 208 209 le = ltree_add_entry(ubi, vol_id, lnum); 210 if (IS_ERR(le)) 211 return PTR_ERR(le); 212 down_read(&le->mutex); 213 return 0; 214 } 215 216 /** 217 * leb_read_unlock - unlock logical eraseblock. 218 * @ubi: UBI device description object 219 * @vol_id: volume ID 220 * @lnum: logical eraseblock number 221 */ 222 static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) 223 { 224 struct ubi_ltree_entry *le; 225 226 spin_lock(&ubi->ltree_lock); 227 le = ltree_lookup(ubi, vol_id, lnum); 228 le->users -= 1; 229 ubi_assert(le->users >= 0); 230 up_read(&le->mutex); 231 if (le->users == 0) { 232 rb_erase(&le->rb, &ubi->ltree); 233 kfree(le); 234 } 235 spin_unlock(&ubi->ltree_lock); 236 } 237 238 /** 239 * leb_write_lock - lock logical eraseblock for writing. 240 * @ubi: UBI device description object 241 * @vol_id: volume ID 242 * @lnum: logical eraseblock number 243 * 244 * This function locks a logical eraseblock for writing. Returns zero in case 245 * of success and a negative error code in case of failure. 246 */ 247 static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) 248 { 249 struct ubi_ltree_entry *le; 250 251 le = ltree_add_entry(ubi, vol_id, lnum); 252 if (IS_ERR(le)) 253 return PTR_ERR(le); 254 down_write(&le->mutex); 255 return 0; 256 } 257 258 /** 259 * leb_write_lock - lock logical eraseblock for writing. 260 * @ubi: UBI device description object 261 * @vol_id: volume ID 262 * @lnum: logical eraseblock number 263 * 264 * This function locks a logical eraseblock for writing if there is no 265 * contention and does nothing if there is contention. Returns %0 in case of 266 * success, %1 in case of contention, and and a negative error code in case of 267 * failure. 268 */ 269 static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) 270 { 271 struct ubi_ltree_entry *le; 272 273 le = ltree_add_entry(ubi, vol_id, lnum); 274 if (IS_ERR(le)) 275 return PTR_ERR(le); 276 if (down_write_trylock(&le->mutex)) 277 return 0; 278 279 /* Contention, cancel */ 280 spin_lock(&ubi->ltree_lock); 281 le->users -= 1; 282 ubi_assert(le->users >= 0); 283 if (le->users == 0) { 284 rb_erase(&le->rb, &ubi->ltree); 285 kfree(le); 286 } 287 spin_unlock(&ubi->ltree_lock); 288 289 return 1; 290 } 291 292 /** 293 * leb_write_unlock - unlock logical eraseblock. 294 * @ubi: UBI device description object 295 * @vol_id: volume ID 296 * @lnum: logical eraseblock number 297 */ 298 static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) 299 { 300 struct ubi_ltree_entry *le; 301 302 spin_lock(&ubi->ltree_lock); 303 le = ltree_lookup(ubi, vol_id, lnum); 304 le->users -= 1; 305 ubi_assert(le->users >= 0); 306 up_write(&le->mutex); 307 if (le->users == 0) { 308 rb_erase(&le->rb, &ubi->ltree); 309 kfree(le); 310 } 311 spin_unlock(&ubi->ltree_lock); 312 } 313 314 /** 315 * ubi_eba_unmap_leb - un-map logical eraseblock. 316 * @ubi: UBI device description object 317 * @vol: volume description object 318 * @lnum: logical eraseblock number 319 * 320 * This function un-maps logical eraseblock @lnum and schedules corresponding 321 * physical eraseblock for erasure. Returns zero in case of success and a 322 * negative error code in case of failure. 323 */ 324 int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, 325 int lnum) 326 { 327 int err, pnum, vol_id = vol->vol_id; 328 329 if (ubi->ro_mode) 330 return -EROFS; 331 332 err = leb_write_lock(ubi, vol_id, lnum); 333 if (err) 334 return err; 335 336 pnum = vol->eba_tbl[lnum]; 337 if (pnum < 0) 338 /* This logical eraseblock is already unmapped */ 339 goto out_unlock; 340 341 dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum); 342 343 vol->eba_tbl[lnum] = UBI_LEB_UNMAPPED; 344 err = ubi_wl_put_peb(ubi, pnum, 0); 345 346 out_unlock: 347 leb_write_unlock(ubi, vol_id, lnum); 348 return err; 349 } 350 351 /** 352 * ubi_eba_read_leb - read data. 353 * @ubi: UBI device description object 354 * @vol: volume description object 355 * @lnum: logical eraseblock number 356 * @buf: buffer to store the read data 357 * @offset: offset from where to read 358 * @len: how many bytes to read 359 * @check: data CRC check flag 360 * 361 * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF 362 * bytes. The @check flag only makes sense for static volumes and forces 363 * eraseblock data CRC checking. 364 * 365 * In case of success this function returns zero. In case of a static volume, 366 * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be 367 * returned for any volume type if an ECC error was detected by the MTD device 368 * driver. Other negative error cored may be returned in case of other errors. 369 */ 370 int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, 371 void *buf, int offset, int len, int check) 372 { 373 int err, pnum, scrub = 0, vol_id = vol->vol_id; 374 struct ubi_vid_hdr *vid_hdr; 375 uint32_t uninitialized_var(crc); 376 377 err = leb_read_lock(ubi, vol_id, lnum); 378 if (err) 379 return err; 380 381 pnum = vol->eba_tbl[lnum]; 382 if (pnum < 0) { 383 /* 384 * The logical eraseblock is not mapped, fill the whole buffer 385 * with 0xFF bytes. The exception is static volumes for which 386 * it is an error to read unmapped logical eraseblocks. 387 */ 388 dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)", 389 len, offset, vol_id, lnum); 390 leb_read_unlock(ubi, vol_id, lnum); 391 ubi_assert(vol->vol_type != UBI_STATIC_VOLUME); 392 memset(buf, 0xFF, len); 393 return 0; 394 } 395 396 dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d", 397 len, offset, vol_id, lnum, pnum); 398 399 if (vol->vol_type == UBI_DYNAMIC_VOLUME) 400 check = 0; 401 402 retry: 403 if (check) { 404 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); 405 if (!vid_hdr) { 406 err = -ENOMEM; 407 goto out_unlock; 408 } 409 410 err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1); 411 if (err && err != UBI_IO_BITFLIPS) { 412 if (err > 0) { 413 /* 414 * The header is either absent or corrupted. 415 * The former case means there is a bug - 416 * switch to read-only mode just in case. 417 * The latter case means a real corruption - we 418 * may try to recover data. FIXME: but this is 419 * not implemented. 420 */ 421 if (err == UBI_IO_BAD_VID_HDR) { 422 ubi_warn("corrupted VID header at PEB " 423 "%d, LEB %d:%d", pnum, vol_id, 424 lnum); 425 err = -EBADMSG; 426 } else 427 ubi_ro_mode(ubi); 428 } 429 goto out_free; 430 } else if (err == UBI_IO_BITFLIPS) 431 scrub = 1; 432 433 ubi_assert(lnum < be32_to_cpu(vid_hdr->used_ebs)); 434 ubi_assert(len == be32_to_cpu(vid_hdr->data_size)); 435 436 crc = be32_to_cpu(vid_hdr->data_crc); 437 ubi_free_vid_hdr(ubi, vid_hdr); 438 } 439 440 err = ubi_io_read_data(ubi, buf, pnum, offset, len); 441 if (err) { 442 if (err == UBI_IO_BITFLIPS) { 443 scrub = 1; 444 err = 0; 445 } else if (err == -EBADMSG) { 446 if (vol->vol_type == UBI_DYNAMIC_VOLUME) 447 goto out_unlock; 448 scrub = 1; 449 if (!check) { 450 ubi_msg("force data checking"); 451 check = 1; 452 goto retry; 453 } 454 } else 455 goto out_unlock; 456 } 457 458 if (check) { 459 uint32_t crc1 = crc32(UBI_CRC32_INIT, buf, len); 460 if (crc1 != crc) { 461 ubi_warn("CRC error: calculated %#08x, must be %#08x", 462 crc1, crc); 463 err = -EBADMSG; 464 goto out_unlock; 465 } 466 } 467 468 if (scrub) 469 err = ubi_wl_scrub_peb(ubi, pnum); 470 471 leb_read_unlock(ubi, vol_id, lnum); 472 return err; 473 474 out_free: 475 ubi_free_vid_hdr(ubi, vid_hdr); 476 out_unlock: 477 leb_read_unlock(ubi, vol_id, lnum); 478 return err; 479 } 480 481 /** 482 * recover_peb - recover from write failure. 483 * @ubi: UBI device description object 484 * @pnum: the physical eraseblock to recover 485 * @vol_id: volume ID 486 * @lnum: logical eraseblock number 487 * @buf: data which was not written because of the write failure 488 * @offset: offset of the failed write 489 * @len: how many bytes should have been written 490 * 491 * This function is called in case of a write failure and moves all good data 492 * from the potentially bad physical eraseblock to a good physical eraseblock. 493 * This function also writes the data which was not written due to the failure. 494 * Returns new physical eraseblock number in case of success, and a negative 495 * error code in case of failure. 496 */ 497 static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, 498 const void *buf, int offset, int len) 499 { 500 int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0; 501 struct ubi_volume *vol = ubi->volumes[idx]; 502 struct ubi_vid_hdr *vid_hdr; 503 504 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); 505 if (!vid_hdr) 506 return -ENOMEM; 507 508 retry: 509 new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN); 510 if (new_pnum < 0) { 511 ubi_free_vid_hdr(ubi, vid_hdr); 512 return new_pnum; 513 } 514 515 ubi_msg("recover PEB %d, move data to PEB %d", pnum, new_pnum); 516 517 err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1); 518 if (err && err != UBI_IO_BITFLIPS) { 519 if (err > 0) 520 err = -EIO; 521 goto out_put; 522 } 523 524 vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); 525 err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr); 526 if (err) 527 goto write_error; 528 529 data_size = offset + len; 530 mutex_lock(&ubi->buf_mutex); 531 memset(ubi->peb_buf1 + offset, 0xFF, len); 532 533 /* Read everything before the area where the write failure happened */ 534 if (offset > 0) { 535 err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset); 536 if (err && err != UBI_IO_BITFLIPS) 537 goto out_unlock; 538 } 539 540 memcpy(ubi->peb_buf1 + offset, buf, len); 541 542 err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size); 543 if (err) { 544 mutex_unlock(&ubi->buf_mutex); 545 goto write_error; 546 } 547 548 mutex_unlock(&ubi->buf_mutex); 549 ubi_free_vid_hdr(ubi, vid_hdr); 550 551 vol->eba_tbl[lnum] = new_pnum; 552 ubi_wl_put_peb(ubi, pnum, 1); 553 554 ubi_msg("data was successfully recovered"); 555 return 0; 556 557 out_unlock: 558 mutex_unlock(&ubi->buf_mutex); 559 out_put: 560 ubi_wl_put_peb(ubi, new_pnum, 1); 561 ubi_free_vid_hdr(ubi, vid_hdr); 562 return err; 563 564 write_error: 565 /* 566 * Bad luck? This physical eraseblock is bad too? Crud. Let's try to 567 * get another one. 568 */ 569 ubi_warn("failed to write to PEB %d", new_pnum); 570 ubi_wl_put_peb(ubi, new_pnum, 1); 571 if (++tries > UBI_IO_RETRIES) { 572 ubi_free_vid_hdr(ubi, vid_hdr); 573 return err; 574 } 575 ubi_msg("try again"); 576 goto retry; 577 } 578 579 /** 580 * ubi_eba_write_leb - write data to dynamic volume. 581 * @ubi: UBI device description object 582 * @vol: volume description object 583 * @lnum: logical eraseblock number 584 * @buf: the data to write 585 * @offset: offset within the logical eraseblock where to write 586 * @len: how many bytes to write 587 * @dtype: data type 588 * 589 * This function writes data to logical eraseblock @lnum of a dynamic volume 590 * @vol. Returns zero in case of success and a negative error code in case 591 * of failure. In case of error, it is possible that something was still 592 * written to the flash media, but may be some garbage. 593 */ 594 int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, 595 const void *buf, int offset, int len, int dtype) 596 { 597 int err, pnum, tries = 0, vol_id = vol->vol_id; 598 struct ubi_vid_hdr *vid_hdr; 599 600 if (ubi->ro_mode) 601 return -EROFS; 602 603 err = leb_write_lock(ubi, vol_id, lnum); 604 if (err) 605 return err; 606 607 pnum = vol->eba_tbl[lnum]; 608 if (pnum >= 0) { 609 dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d", 610 len, offset, vol_id, lnum, pnum); 611 612 err = ubi_io_write_data(ubi, buf, pnum, offset, len); 613 if (err) { 614 ubi_warn("failed to write data to PEB %d", pnum); 615 if (err == -EIO && ubi->bad_allowed) 616 err = recover_peb(ubi, pnum, vol_id, lnum, buf, 617 offset, len); 618 if (err) 619 ubi_ro_mode(ubi); 620 } 621 leb_write_unlock(ubi, vol_id, lnum); 622 return err; 623 } 624 625 /* 626 * The logical eraseblock is not mapped. We have to get a free physical 627 * eraseblock and write the volume identifier header there first. 628 */ 629 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); 630 if (!vid_hdr) { 631 leb_write_unlock(ubi, vol_id, lnum); 632 return -ENOMEM; 633 } 634 635 vid_hdr->vol_type = UBI_VID_DYNAMIC; 636 vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); 637 vid_hdr->vol_id = cpu_to_be32(vol_id); 638 vid_hdr->lnum = cpu_to_be32(lnum); 639 vid_hdr->compat = ubi_get_compat(ubi, vol_id); 640 vid_hdr->data_pad = cpu_to_be32(vol->data_pad); 641 642 retry: 643 pnum = ubi_wl_get_peb(ubi, dtype); 644 if (pnum < 0) { 645 ubi_free_vid_hdr(ubi, vid_hdr); 646 leb_write_unlock(ubi, vol_id, lnum); 647 return pnum; 648 } 649 650 dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d", 651 len, offset, vol_id, lnum, pnum); 652 653 err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); 654 if (err) { 655 ubi_warn("failed to write VID header to LEB %d:%d, PEB %d", 656 vol_id, lnum, pnum); 657 goto write_error; 658 } 659 660 if (len) { 661 err = ubi_io_write_data(ubi, buf, pnum, offset, len); 662 if (err) { 663 ubi_warn("failed to write %d bytes at offset %d of " 664 "LEB %d:%d, PEB %d", len, offset, vol_id, 665 lnum, pnum); 666 goto write_error; 667 } 668 } 669 670 vol->eba_tbl[lnum] = pnum; 671 672 leb_write_unlock(ubi, vol_id, lnum); 673 ubi_free_vid_hdr(ubi, vid_hdr); 674 return 0; 675 676 write_error: 677 if (err != -EIO || !ubi->bad_allowed) { 678 ubi_ro_mode(ubi); 679 leb_write_unlock(ubi, vol_id, lnum); 680 ubi_free_vid_hdr(ubi, vid_hdr); 681 return err; 682 } 683 684 /* 685 * Fortunately, this is the first write operation to this physical 686 * eraseblock, so just put it and request a new one. We assume that if 687 * this physical eraseblock went bad, the erase code will handle that. 688 */ 689 err = ubi_wl_put_peb(ubi, pnum, 1); 690 if (err || ++tries > UBI_IO_RETRIES) { 691 ubi_ro_mode(ubi); 692 leb_write_unlock(ubi, vol_id, lnum); 693 ubi_free_vid_hdr(ubi, vid_hdr); 694 return err; 695 } 696 697 vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); 698 ubi_msg("try another PEB"); 699 goto retry; 700 } 701 702 /** 703 * ubi_eba_write_leb_st - write data to static volume. 704 * @ubi: UBI device description object 705 * @vol: volume description object 706 * @lnum: logical eraseblock number 707 * @buf: data to write 708 * @len: how many bytes to write 709 * @dtype: data type 710 * @used_ebs: how many logical eraseblocks will this volume contain 711 * 712 * This function writes data to logical eraseblock @lnum of static volume 713 * @vol. The @used_ebs argument should contain total number of logical 714 * eraseblock in this static volume. 715 * 716 * When writing to the last logical eraseblock, the @len argument doesn't have 717 * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent 718 * to the real data size, although the @buf buffer has to contain the 719 * alignment. In all other cases, @len has to be aligned. 720 * 721 * It is prohibited to write more than once to logical eraseblocks of static 722 * volumes. This function returns zero in case of success and a negative error 723 * code in case of failure. 724 */ 725 int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, 726 int lnum, const void *buf, int len, int dtype, 727 int used_ebs) 728 { 729 int err, pnum, tries = 0, data_size = len, vol_id = vol->vol_id; 730 struct ubi_vid_hdr *vid_hdr; 731 uint32_t crc; 732 733 if (ubi->ro_mode) 734 return -EROFS; 735 736 if (lnum == used_ebs - 1) 737 /* If this is the last LEB @len may be unaligned */ 738 len = ALIGN(data_size, ubi->min_io_size); 739 else 740 ubi_assert(!(len & (ubi->min_io_size - 1))); 741 742 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); 743 if (!vid_hdr) 744 return -ENOMEM; 745 746 err = leb_write_lock(ubi, vol_id, lnum); 747 if (err) { 748 ubi_free_vid_hdr(ubi, vid_hdr); 749 return err; 750 } 751 752 vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); 753 vid_hdr->vol_id = cpu_to_be32(vol_id); 754 vid_hdr->lnum = cpu_to_be32(lnum); 755 vid_hdr->compat = ubi_get_compat(ubi, vol_id); 756 vid_hdr->data_pad = cpu_to_be32(vol->data_pad); 757 758 crc = crc32(UBI_CRC32_INIT, buf, data_size); 759 vid_hdr->vol_type = UBI_VID_STATIC; 760 vid_hdr->data_size = cpu_to_be32(data_size); 761 vid_hdr->used_ebs = cpu_to_be32(used_ebs); 762 vid_hdr->data_crc = cpu_to_be32(crc); 763 764 retry: 765 pnum = ubi_wl_get_peb(ubi, dtype); 766 if (pnum < 0) { 767 ubi_free_vid_hdr(ubi, vid_hdr); 768 leb_write_unlock(ubi, vol_id, lnum); 769 return pnum; 770 } 771 772 dbg_eba("write VID hdr and %d bytes at LEB %d:%d, PEB %d, used_ebs %d", 773 len, vol_id, lnum, pnum, used_ebs); 774 775 err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); 776 if (err) { 777 ubi_warn("failed to write VID header to LEB %d:%d, PEB %d", 778 vol_id, lnum, pnum); 779 goto write_error; 780 } 781 782 err = ubi_io_write_data(ubi, buf, pnum, 0, len); 783 if (err) { 784 ubi_warn("failed to write %d bytes of data to PEB %d", 785 len, pnum); 786 goto write_error; 787 } 788 789 ubi_assert(vol->eba_tbl[lnum] < 0); 790 vol->eba_tbl[lnum] = pnum; 791 792 leb_write_unlock(ubi, vol_id, lnum); 793 ubi_free_vid_hdr(ubi, vid_hdr); 794 return 0; 795 796 write_error: 797 if (err != -EIO || !ubi->bad_allowed) { 798 /* 799 * This flash device does not admit of bad eraseblocks or 800 * something nasty and unexpected happened. Switch to read-only 801 * mode just in case. 802 */ 803 ubi_ro_mode(ubi); 804 leb_write_unlock(ubi, vol_id, lnum); 805 ubi_free_vid_hdr(ubi, vid_hdr); 806 return err; 807 } 808 809 err = ubi_wl_put_peb(ubi, pnum, 1); 810 if (err || ++tries > UBI_IO_RETRIES) { 811 ubi_ro_mode(ubi); 812 leb_write_unlock(ubi, vol_id, lnum); 813 ubi_free_vid_hdr(ubi, vid_hdr); 814 return err; 815 } 816 817 vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); 818 ubi_msg("try another PEB"); 819 goto retry; 820 } 821 822 /* 823 * ubi_eba_atomic_leb_change - change logical eraseblock atomically. 824 * @ubi: UBI device description object 825 * @vol: volume description object 826 * @lnum: logical eraseblock number 827 * @buf: data to write 828 * @len: how many bytes to write 829 * @dtype: data type 830 * 831 * This function changes the contents of a logical eraseblock atomically. @buf 832 * has to contain new logical eraseblock data, and @len - the length of the 833 * data, which has to be aligned. This function guarantees that in case of an 834 * unclean reboot the old contents is preserved. Returns zero in case of 835 * success and a negative error code in case of failure. 836 * 837 * UBI reserves one LEB for the "atomic LEB change" operation, so only one 838 * LEB change may be done at a time. This is ensured by @ubi->alc_mutex. 839 */ 840 int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, 841 int lnum, const void *buf, int len, int dtype) 842 { 843 int err, pnum, tries = 0, vol_id = vol->vol_id; 844 struct ubi_vid_hdr *vid_hdr; 845 uint32_t crc; 846 847 if (ubi->ro_mode) 848 return -EROFS; 849 850 if (len == 0) { 851 /* 852 * Special case when data length is zero. In this case the LEB 853 * has to be unmapped and mapped somewhere else. 854 */ 855 err = ubi_eba_unmap_leb(ubi, vol, lnum); 856 if (err) 857 return err; 858 return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0, dtype); 859 } 860 861 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); 862 if (!vid_hdr) 863 return -ENOMEM; 864 865 mutex_lock(&ubi->alc_mutex); 866 err = leb_write_lock(ubi, vol_id, lnum); 867 if (err) 868 goto out_mutex; 869 870 vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); 871 vid_hdr->vol_id = cpu_to_be32(vol_id); 872 vid_hdr->lnum = cpu_to_be32(lnum); 873 vid_hdr->compat = ubi_get_compat(ubi, vol_id); 874 vid_hdr->data_pad = cpu_to_be32(vol->data_pad); 875 876 crc = crc32(UBI_CRC32_INIT, buf, len); 877 vid_hdr->vol_type = UBI_VID_DYNAMIC; 878 vid_hdr->data_size = cpu_to_be32(len); 879 vid_hdr->copy_flag = 1; 880 vid_hdr->data_crc = cpu_to_be32(crc); 881 882 retry: 883 pnum = ubi_wl_get_peb(ubi, dtype); 884 if (pnum < 0) { 885 err = pnum; 886 goto out_leb_unlock; 887 } 888 889 dbg_eba("change LEB %d:%d, PEB %d, write VID hdr to PEB %d", 890 vol_id, lnum, vol->eba_tbl[lnum], pnum); 891 892 err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); 893 if (err) { 894 ubi_warn("failed to write VID header to LEB %d:%d, PEB %d", 895 vol_id, lnum, pnum); 896 goto write_error; 897 } 898 899 err = ubi_io_write_data(ubi, buf, pnum, 0, len); 900 if (err) { 901 ubi_warn("failed to write %d bytes of data to PEB %d", 902 len, pnum); 903 goto write_error; 904 } 905 906 if (vol->eba_tbl[lnum] >= 0) { 907 err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 0); 908 if (err) 909 goto out_leb_unlock; 910 } 911 912 vol->eba_tbl[lnum] = pnum; 913 914 out_leb_unlock: 915 leb_write_unlock(ubi, vol_id, lnum); 916 out_mutex: 917 mutex_unlock(&ubi->alc_mutex); 918 ubi_free_vid_hdr(ubi, vid_hdr); 919 return err; 920 921 write_error: 922 if (err != -EIO || !ubi->bad_allowed) { 923 /* 924 * This flash device does not admit of bad eraseblocks or 925 * something nasty and unexpected happened. Switch to read-only 926 * mode just in case. 927 */ 928 ubi_ro_mode(ubi); 929 goto out_leb_unlock; 930 } 931 932 err = ubi_wl_put_peb(ubi, pnum, 1); 933 if (err || ++tries > UBI_IO_RETRIES) { 934 ubi_ro_mode(ubi); 935 goto out_leb_unlock; 936 } 937 938 vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); 939 ubi_msg("try another PEB"); 940 goto retry; 941 } 942 943 /** 944 * is_error_sane - check whether a read error is sane. 945 * @err: code of the error happened during reading 946 * 947 * This is a helper function for 'ubi_eba_copy_leb()' which is called when we 948 * cannot read data from the target PEB (an error @err happened). If the error 949 * code is sane, then we treat this error as non-fatal. Otherwise the error is 950 * fatal and UBI will be switched to R/O mode later. 951 * 952 * The idea is that we try not to switch to R/O mode if the read error is 953 * something which suggests there was a real read problem. E.g., %-EIO. Or a 954 * memory allocation failed (-%ENOMEM). Otherwise, it is safer to switch to R/O 955 * mode, simply because we do not know what happened at the MTD level, and we 956 * cannot handle this. E.g., the underlying driver may have become crazy, and 957 * it is safer to switch to R/O mode to preserve the data. 958 * 959 * And bear in mind, this is about reading from the target PEB, i.e. the PEB 960 * which we have just written. 961 */ 962 static int is_error_sane(int err) 963 { 964 if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_VID_HDR || 965 err == -ETIMEDOUT) 966 return 0; 967 return 1; 968 } 969 970 /** 971 * ubi_eba_copy_leb - copy logical eraseblock. 972 * @ubi: UBI device description object 973 * @from: physical eraseblock number from where to copy 974 * @to: physical eraseblock number where to copy 975 * @vid_hdr: VID header of the @from physical eraseblock 976 * 977 * This function copies logical eraseblock from physical eraseblock @from to 978 * physical eraseblock @to. The @vid_hdr buffer may be changed by this 979 * function. Returns: 980 * o %0 in case of success; 981 * o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_CANCEL_BITFLIPS, etc; 982 * o a negative error code in case of failure. 983 */ 984 int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, 985 struct ubi_vid_hdr *vid_hdr) 986 { 987 int err, vol_id, lnum, data_size, aldata_size, idx; 988 struct ubi_volume *vol; 989 uint32_t crc; 990 991 vol_id = be32_to_cpu(vid_hdr->vol_id); 992 lnum = be32_to_cpu(vid_hdr->lnum); 993 994 dbg_wl("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to); 995 996 if (vid_hdr->vol_type == UBI_VID_STATIC) { 997 data_size = be32_to_cpu(vid_hdr->data_size); 998 aldata_size = ALIGN(data_size, ubi->min_io_size); 999 } else 1000 data_size = aldata_size = 1001 ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); 1002 1003 idx = vol_id2idx(ubi, vol_id); 1004 spin_lock(&ubi->volumes_lock); 1005 /* 1006 * Note, we may race with volume deletion, which means that the volume 1007 * this logical eraseblock belongs to might be being deleted. Since the 1008 * volume deletion un-maps all the volume's logical eraseblocks, it will 1009 * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. 1010 */ 1011 vol = ubi->volumes[idx]; 1012 spin_unlock(&ubi->volumes_lock); 1013 if (!vol) { 1014 /* No need to do further work, cancel */ 1015 dbg_wl("volume %d is being removed, cancel", vol_id); 1016 return MOVE_CANCEL_RACE; 1017 } 1018 1019 /* 1020 * We do not want anybody to write to this logical eraseblock while we 1021 * are moving it, so lock it. 1022 * 1023 * Note, we are using non-waiting locking here, because we cannot sleep 1024 * on the LEB, since it may cause deadlocks. Indeed, imagine a task is 1025 * unmapping the LEB which is mapped to the PEB we are going to move 1026 * (@from). This task locks the LEB and goes sleep in the 1027 * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are 1028 * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the 1029 * LEB is already locked, we just do not move it and return 1030 * %MOVE_CANCEL_RACE, which means that UBI will re-try, but later. 1031 */ 1032 err = leb_write_trylock(ubi, vol_id, lnum); 1033 if (err) { 1034 dbg_wl("contention on LEB %d:%d, cancel", vol_id, lnum); 1035 return MOVE_CANCEL_RACE; 1036 } 1037 1038 /* 1039 * The LEB might have been put meanwhile, and the task which put it is 1040 * probably waiting on @ubi->move_mutex. No need to continue the work, 1041 * cancel it. 1042 */ 1043 if (vol->eba_tbl[lnum] != from) { 1044 dbg_wl("LEB %d:%d is no longer mapped to PEB %d, mapped to " 1045 "PEB %d, cancel", vol_id, lnum, from, 1046 vol->eba_tbl[lnum]); 1047 err = MOVE_CANCEL_RACE; 1048 goto out_unlock_leb; 1049 } 1050 1051 /* 1052 * OK, now the LEB is locked and we can safely start moving it. Since 1053 * this function utilizes the @ubi->peb_buf1 buffer which is shared 1054 * with some other functions - we lock the buffer by taking the 1055 * @ubi->buf_mutex. 1056 */ 1057 mutex_lock(&ubi->buf_mutex); 1058 dbg_wl("read %d bytes of data", aldata_size); 1059 err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size); 1060 if (err && err != UBI_IO_BITFLIPS) { 1061 ubi_warn("error %d while reading data from PEB %d", 1062 err, from); 1063 err = MOVE_SOURCE_RD_ERR; 1064 goto out_unlock_buf; 1065 } 1066 1067 /* 1068 * Now we have got to calculate how much data we have to copy. In 1069 * case of a static volume it is fairly easy - the VID header contains 1070 * the data size. In case of a dynamic volume it is more difficult - we 1071 * have to read the contents, cut 0xFF bytes from the end and copy only 1072 * the first part. We must do this to avoid writing 0xFF bytes as it 1073 * may have some side-effects. And not only this. It is important not 1074 * to include those 0xFFs to CRC because later the they may be filled 1075 * by data. 1076 */ 1077 if (vid_hdr->vol_type == UBI_VID_DYNAMIC) 1078 aldata_size = data_size = 1079 ubi_calc_data_len(ubi, ubi->peb_buf1, data_size); 1080 1081 cond_resched(); 1082 crc = crc32(UBI_CRC32_INIT, ubi->peb_buf1, data_size); 1083 cond_resched(); 1084 1085 /* 1086 * It may turn out to be that the whole @from physical eraseblock 1087 * contains only 0xFF bytes. Then we have to only write the VID header 1088 * and do not write any data. This also means we should not set 1089 * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc. 1090 */ 1091 if (data_size > 0) { 1092 vid_hdr->copy_flag = 1; 1093 vid_hdr->data_size = cpu_to_be32(data_size); 1094 vid_hdr->data_crc = cpu_to_be32(crc); 1095 } 1096 vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); 1097 1098 err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); 1099 if (err) { 1100 if (err == -EIO) 1101 err = MOVE_TARGET_WR_ERR; 1102 goto out_unlock_buf; 1103 } 1104 1105 cond_resched(); 1106 1107 /* Read the VID header back and check if it was written correctly */ 1108 err = ubi_io_read_vid_hdr(ubi, to, vid_hdr, 1); 1109 if (err) { 1110 if (err != UBI_IO_BITFLIPS) { 1111 ubi_warn("error %d while reading VID header back from " 1112 "PEB %d", err, to); 1113 if (is_error_sane(err)) 1114 err = MOVE_TARGET_RD_ERR; 1115 } else 1116 err = MOVE_CANCEL_BITFLIPS; 1117 goto out_unlock_buf; 1118 } 1119 1120 if (data_size > 0) { 1121 err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); 1122 if (err) { 1123 if (err == -EIO) 1124 err = MOVE_TARGET_WR_ERR; 1125 goto out_unlock_buf; 1126 } 1127 1128 cond_resched(); 1129 1130 /* 1131 * We've written the data and are going to read it back to make 1132 * sure it was written correctly. 1133 */ 1134 1135 err = ubi_io_read_data(ubi, ubi->peb_buf2, to, 0, aldata_size); 1136 if (err) { 1137 if (err != UBI_IO_BITFLIPS) { 1138 ubi_warn("error %d while reading data back " 1139 "from PEB %d", err, to); 1140 if (is_error_sane(err)) 1141 err = MOVE_TARGET_RD_ERR; 1142 } else 1143 err = MOVE_CANCEL_BITFLIPS; 1144 goto out_unlock_buf; 1145 } 1146 1147 cond_resched(); 1148 1149 if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { 1150 ubi_warn("read data back from PEB %d and it is " 1151 "different", to); 1152 err = -EINVAL; 1153 goto out_unlock_buf; 1154 } 1155 } 1156 1157 ubi_assert(vol->eba_tbl[lnum] == from); 1158 vol->eba_tbl[lnum] = to; 1159 1160 out_unlock_buf: 1161 mutex_unlock(&ubi->buf_mutex); 1162 out_unlock_leb: 1163 leb_write_unlock(ubi, vol_id, lnum); 1164 return err; 1165 } 1166 1167 /** 1168 * ubi_eba_init_scan - initialize the EBA sub-system using scanning information. 1169 * @ubi: UBI device description object 1170 * @si: scanning information 1171 * 1172 * This function returns zero in case of success and a negative error code in 1173 * case of failure. 1174 */ 1175 int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) 1176 { 1177 int i, j, err, num_volumes; 1178 struct ubi_scan_volume *sv; 1179 struct ubi_volume *vol; 1180 struct ubi_scan_leb *seb; 1181 struct rb_node *rb; 1182 1183 dbg_eba("initialize EBA sub-system"); 1184 1185 spin_lock_init(&ubi->ltree_lock); 1186 mutex_init(&ubi->alc_mutex); 1187 ubi->ltree = RB_ROOT; 1188 1189 ubi->global_sqnum = si->max_sqnum + 1; 1190 num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; 1191 1192 for (i = 0; i < num_volumes; i++) { 1193 vol = ubi->volumes[i]; 1194 if (!vol) 1195 continue; 1196 1197 cond_resched(); 1198 1199 vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int), 1200 GFP_KERNEL); 1201 if (!vol->eba_tbl) { 1202 err = -ENOMEM; 1203 goto out_free; 1204 } 1205 1206 for (j = 0; j < vol->reserved_pebs; j++) 1207 vol->eba_tbl[j] = UBI_LEB_UNMAPPED; 1208 1209 sv = ubi_scan_find_sv(si, idx2vol_id(ubi, i)); 1210 if (!sv) 1211 continue; 1212 1213 ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { 1214 if (seb->lnum >= vol->reserved_pebs) 1215 /* 1216 * This may happen in case of an unclean reboot 1217 * during re-size. 1218 */ 1219 ubi_scan_move_to_list(sv, seb, &si->erase); 1220 vol->eba_tbl[seb->lnum] = seb->pnum; 1221 } 1222 } 1223 1224 if (ubi->avail_pebs < EBA_RESERVED_PEBS) { 1225 ubi_err("no enough physical eraseblocks (%d, need %d)", 1226 ubi->avail_pebs, EBA_RESERVED_PEBS); 1227 err = -ENOSPC; 1228 goto out_free; 1229 } 1230 ubi->avail_pebs -= EBA_RESERVED_PEBS; 1231 ubi->rsvd_pebs += EBA_RESERVED_PEBS; 1232 1233 if (ubi->bad_allowed) { 1234 ubi_calculate_reserved(ubi); 1235 1236 if (ubi->avail_pebs < ubi->beb_rsvd_level) { 1237 /* No enough free physical eraseblocks */ 1238 ubi->beb_rsvd_pebs = ubi->avail_pebs; 1239 ubi_warn("cannot reserve enough PEBs for bad PEB " 1240 "handling, reserved %d, need %d", 1241 ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); 1242 } else 1243 ubi->beb_rsvd_pebs = ubi->beb_rsvd_level; 1244 1245 ubi->avail_pebs -= ubi->beb_rsvd_pebs; 1246 ubi->rsvd_pebs += ubi->beb_rsvd_pebs; 1247 } 1248 1249 dbg_eba("EBA sub-system is initialized"); 1250 return 0; 1251 1252 out_free: 1253 for (i = 0; i < num_volumes; i++) { 1254 if (!ubi->volumes[i]) 1255 continue; 1256 kfree(ubi->volumes[i]->eba_tbl); 1257 ubi->volumes[i]->eba_tbl = NULL; 1258 } 1259 return err; 1260 } 1261