1 /* 2 * Copyright (c) International Business Machines Corp., 2006 3 * 4 * SPDX-License-Identifier: GPL-2.0+ 5 * 6 * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner 7 */ 8 9 /* 10 * UBI wear-leveling sub-system. 11 * 12 * This sub-system is responsible for wear-leveling. It works in terms of 13 * physical eraseblocks and erase counters and knows nothing about logical 14 * eraseblocks, volumes, etc. From this sub-system's perspective all physical 15 * eraseblocks are of two types - used and free. Used physical eraseblocks are 16 * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical 17 * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function. 18 * 19 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter 20 * header. The rest of the physical eraseblock contains only %0xFF bytes. 21 * 22 * When physical eraseblocks are returned to the WL sub-system by means of the 23 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is 24 * done asynchronously in context of the per-UBI device background thread, 25 * which is also managed by the WL sub-system. 26 * 27 * The wear-leveling is ensured by means of moving the contents of used 28 * physical eraseblocks with low erase counter to free physical eraseblocks 29 * with high erase counter. 30 * 31 * If the WL sub-system fails to erase a physical eraseblock, it marks it as 32 * bad. 33 * 34 * This sub-system is also responsible for scrubbing. If a bit-flip is detected 35 * in a physical eraseblock, it has to be moved. Technically this is the same 36 * as moving it for wear-leveling reasons. 37 * 38 * As it was said, for the UBI sub-system all physical eraseblocks are either 39 * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while 40 * used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub 41 * RB-trees, as well as (temporarily) in the @wl->pq queue. 42 * 43 * When the WL sub-system returns a physical eraseblock, the physical 44 * eraseblock is protected from being moved for some "time". For this reason, 45 * the physical eraseblock is not directly moved from the @wl->free tree to the 46 * @wl->used tree. There is a protection queue in between where this 47 * physical eraseblock is temporarily stored (@wl->pq). 48 * 49 * All this protection stuff is needed because: 50 * o we don't want to move physical eraseblocks just after we have given them 51 * to the user; instead, we first want to let users fill them up with data; 52 * 53 * o there is a chance that the user will put the physical eraseblock very 54 * soon, so it makes sense not to move it for some time, but wait. 55 * 56 * Physical eraseblocks stay protected only for limited time. But the "time" is 57 * measured in erase cycles in this case. This is implemented with help of the 58 * protection queue. Eraseblocks are put to the tail of this queue when they 59 * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the 60 * head of the queue on each erase operation (for any eraseblock). So the 61 * length of the queue defines how may (global) erase cycles PEBs are protected. 62 * 63 * To put it differently, each physical eraseblock has 2 main states: free and 64 * used. The former state corresponds to the @wl->free tree. The latter state 65 * is split up on several sub-states: 66 * o the WL movement is allowed (@wl->used tree); 67 * o the WL movement is disallowed (@wl->erroneous) because the PEB is 68 * erroneous - e.g., there was a read error; 69 * o the WL movement is temporarily prohibited (@wl->pq queue); 70 * o scrubbing is needed (@wl->scrub tree). 71 * 72 * Depending on the sub-state, wear-leveling entries of the used physical 73 * eraseblocks may be kept in one of those structures. 74 * 75 * Note, in this implementation, we keep a small in-RAM object for each physical 76 * eraseblock. This is surely not a scalable solution. But it appears to be good 77 * enough for moderately large flashes and it is simple. In future, one may 78 * re-work this sub-system and make it more scalable. 79 * 80 * At the moment this sub-system does not utilize the sequence number, which 81 * was introduced relatively recently. But it would be wise to do this because 82 * the sequence number of a logical eraseblock characterizes how old is it. For 83 * example, when we move a PEB with low erase counter, and we need to pick the 84 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we 85 * pick target PEB with an average EC if our PEB is not very "old". This is a 86 * room for future re-works of the WL sub-system. 87 */ 88 89 #ifndef __UBOOT__ 90 #include <linux/slab.h> 91 #include <linux/crc32.h> 92 #include <linux/freezer.h> 93 #include <linux/kthread.h> 94 #else 95 #include <ubi_uboot.h> 96 #endif 97 98 #include "ubi.h" 99 #include "wl.h" 100 101 /* Number of physical eraseblocks reserved for wear-leveling purposes */ 102 #define WL_RESERVED_PEBS 1 103 104 /* 105 * Maximum difference between two erase counters. If this threshold is 106 * exceeded, the WL sub-system starts moving data from used physical 107 * eraseblocks with low erase counter to free physical eraseblocks with high 108 * erase counter. 109 */ 110 #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD 111 112 /* 113 * When a physical eraseblock is moved, the WL sub-system has to pick the target 114 * physical eraseblock to move to. The simplest way would be just to pick the 115 * one with the highest erase counter. But in certain workloads this could lead 116 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a 117 * situation when the picked physical eraseblock is constantly erased after the 118 * data is written to it. So, we have a constant which limits the highest erase 119 * counter of the free physical eraseblock to pick. Namely, the WL sub-system 120 * does not pick eraseblocks with erase counter greater than the lowest erase 121 * counter plus %WL_FREE_MAX_DIFF. 122 */ 123 #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) 124 125 /* 126 * Maximum number of consecutive background thread failures which is enough to 127 * switch to read-only mode. 128 */ 129 #define WL_MAX_FAILURES 32 130 131 static int self_check_ec(struct ubi_device *ubi, int pnum, int ec); 132 static int self_check_in_wl_tree(const struct ubi_device *ubi, 133 struct ubi_wl_entry *e, struct rb_root *root); 134 static int self_check_in_pq(const struct ubi_device *ubi, 135 struct ubi_wl_entry *e); 136 137 /** 138 * wl_tree_add - add a wear-leveling entry to a WL RB-tree. 139 * @e: the wear-leveling entry to add 140 * @root: the root of the tree 141 * 142 * Note, we use (erase counter, physical eraseblock number) pairs as keys in 143 * the @ubi->used and @ubi->free RB-trees. 144 */ 145 static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) 146 { 147 struct rb_node **p, *parent = NULL; 148 149 p = &root->rb_node; 150 while (*p) { 151 struct ubi_wl_entry *e1; 152 153 parent = *p; 154 e1 = rb_entry(parent, struct ubi_wl_entry, u.rb); 155 156 if (e->ec < e1->ec) 157 p = &(*p)->rb_left; 158 else if (e->ec > e1->ec) 159 p = &(*p)->rb_right; 160 else { 161 ubi_assert(e->pnum != e1->pnum); 162 if (e->pnum < e1->pnum) 163 p = &(*p)->rb_left; 164 else 165 p = &(*p)->rb_right; 166 } 167 } 168 169 rb_link_node(&e->u.rb, parent, p); 170 rb_insert_color(&e->u.rb, root); 171 } 172 173 /** 174 * wl_tree_destroy - destroy a wear-leveling entry. 175 * @ubi: UBI device description object 176 * @e: the wear-leveling entry to add 177 * 178 * This function destroys a wear leveling entry and removes 179 * the reference from the lookup table. 180 */ 181 static void wl_entry_destroy(struct ubi_device *ubi, struct ubi_wl_entry *e) 182 { 183 ubi->lookuptbl[e->pnum] = NULL; 184 kmem_cache_free(ubi_wl_entry_slab, e); 185 } 186 187 /** 188 * do_work - do one pending work. 189 * @ubi: UBI device description object 190 * 191 * This function returns zero in case of success and a negative error code in 192 * case of failure. 193 */ 194 #ifndef __UBOOT__ 195 static int do_work(struct ubi_device *ubi) 196 #else 197 int do_work(struct ubi_device *ubi) 198 #endif 199 { 200 int err; 201 struct ubi_work *wrk; 202 203 cond_resched(); 204 205 /* 206 * @ubi->work_sem is used to synchronize with the workers. Workers take 207 * it in read mode, so many of them may be doing works at a time. But 208 * the queue flush code has to be sure the whole queue of works is 209 * done, and it takes the mutex in write mode. 210 */ 211 down_read(&ubi->work_sem); 212 spin_lock(&ubi->wl_lock); 213 if (list_empty(&ubi->works)) { 214 spin_unlock(&ubi->wl_lock); 215 up_read(&ubi->work_sem); 216 return 0; 217 } 218 219 wrk = list_entry(ubi->works.next, struct ubi_work, list); 220 list_del(&wrk->list); 221 ubi->works_count -= 1; 222 ubi_assert(ubi->works_count >= 0); 223 spin_unlock(&ubi->wl_lock); 224 225 /* 226 * Call the worker function. Do not touch the work structure 227 * after this call as it will have been freed or reused by that 228 * time by the worker function. 229 */ 230 err = wrk->func(ubi, wrk, 0); 231 if (err) 232 ubi_err(ubi, "work failed with error code %d", err); 233 up_read(&ubi->work_sem); 234 235 return err; 236 } 237 238 /** 239 * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree. 240 * @e: the wear-leveling entry to check 241 * @root: the root of the tree 242 * 243 * This function returns non-zero if @e is in the @root RB-tree and zero if it 244 * is not. 245 */ 246 static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) 247 { 248 struct rb_node *p; 249 250 p = root->rb_node; 251 while (p) { 252 struct ubi_wl_entry *e1; 253 254 e1 = rb_entry(p, struct ubi_wl_entry, u.rb); 255 256 if (e->pnum == e1->pnum) { 257 ubi_assert(e == e1); 258 return 1; 259 } 260 261 if (e->ec < e1->ec) 262 p = p->rb_left; 263 else if (e->ec > e1->ec) 264 p = p->rb_right; 265 else { 266 ubi_assert(e->pnum != e1->pnum); 267 if (e->pnum < e1->pnum) 268 p = p->rb_left; 269 else 270 p = p->rb_right; 271 } 272 } 273 274 return 0; 275 } 276 277 /** 278 * prot_queue_add - add physical eraseblock to the protection queue. 279 * @ubi: UBI device description object 280 * @e: the physical eraseblock to add 281 * 282 * This function adds @e to the tail of the protection queue @ubi->pq, where 283 * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be 284 * temporarily protected from the wear-leveling worker. Note, @wl->lock has to 285 * be locked. 286 */ 287 static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) 288 { 289 int pq_tail = ubi->pq_head - 1; 290 291 if (pq_tail < 0) 292 pq_tail = UBI_PROT_QUEUE_LEN - 1; 293 ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN); 294 list_add_tail(&e->u.list, &ubi->pq[pq_tail]); 295 dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec); 296 } 297 298 /** 299 * find_wl_entry - find wear-leveling entry closest to certain erase counter. 300 * @ubi: UBI device description object 301 * @root: the RB-tree where to look for 302 * @diff: maximum possible difference from the smallest erase counter 303 * 304 * This function looks for a wear leveling entry with erase counter closest to 305 * min + @diff, where min is the smallest erase counter. 306 */ 307 static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi, 308 struct rb_root *root, int diff) 309 { 310 struct rb_node *p; 311 struct ubi_wl_entry *e, *prev_e = NULL; 312 int max; 313 314 e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); 315 max = e->ec + diff; 316 317 p = root->rb_node; 318 while (p) { 319 struct ubi_wl_entry *e1; 320 321 e1 = rb_entry(p, struct ubi_wl_entry, u.rb); 322 if (e1->ec >= max) 323 p = p->rb_left; 324 else { 325 p = p->rb_right; 326 prev_e = e; 327 e = e1; 328 } 329 } 330 331 /* If no fastmap has been written and this WL entry can be used 332 * as anchor PEB, hold it back and return the second best WL entry 333 * such that fastmap can use the anchor PEB later. */ 334 if (prev_e && !ubi->fm_disabled && 335 !ubi->fm && e->pnum < UBI_FM_MAX_START) 336 return prev_e; 337 338 return e; 339 } 340 341 /** 342 * find_mean_wl_entry - find wear-leveling entry with medium erase counter. 343 * @ubi: UBI device description object 344 * @root: the RB-tree where to look for 345 * 346 * This function looks for a wear leveling entry with medium erase counter, 347 * but not greater or equivalent than the lowest erase counter plus 348 * %WL_FREE_MAX_DIFF/2. 349 */ 350 static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi, 351 struct rb_root *root) 352 { 353 struct ubi_wl_entry *e, *first, *last; 354 355 first = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); 356 last = rb_entry(rb_last(root), struct ubi_wl_entry, u.rb); 357 358 if (last->ec - first->ec < WL_FREE_MAX_DIFF) { 359 e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb); 360 361 /* If no fastmap has been written and this WL entry can be used 362 * as anchor PEB, hold it back and return the second best 363 * WL entry such that fastmap can use the anchor PEB later. */ 364 e = may_reserve_for_fm(ubi, e, root); 365 } else 366 e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2); 367 368 return e; 369 } 370 371 /** 372 * wl_get_wle - get a mean wl entry to be used by ubi_wl_get_peb() or 373 * refill_wl_user_pool(). 374 * @ubi: UBI device description object 375 * 376 * This function returns a a wear leveling entry in case of success and 377 * NULL in case of failure. 378 */ 379 static struct ubi_wl_entry *wl_get_wle(struct ubi_device *ubi) 380 { 381 struct ubi_wl_entry *e; 382 383 e = find_mean_wl_entry(ubi, &ubi->free); 384 if (!e) { 385 ubi_err(ubi, "no free eraseblocks"); 386 return NULL; 387 } 388 389 self_check_in_wl_tree(ubi, e, &ubi->free); 390 391 /* 392 * Move the physical eraseblock to the protection queue where it will 393 * be protected from being moved for some time. 394 */ 395 rb_erase(&e->u.rb, &ubi->free); 396 ubi->free_count--; 397 dbg_wl("PEB %d EC %d", e->pnum, e->ec); 398 399 return e; 400 } 401 402 /** 403 * prot_queue_del - remove a physical eraseblock from the protection queue. 404 * @ubi: UBI device description object 405 * @pnum: the physical eraseblock to remove 406 * 407 * This function deletes PEB @pnum from the protection queue and returns zero 408 * in case of success and %-ENODEV if the PEB was not found. 409 */ 410 static int prot_queue_del(struct ubi_device *ubi, int pnum) 411 { 412 struct ubi_wl_entry *e; 413 414 e = ubi->lookuptbl[pnum]; 415 if (!e) 416 return -ENODEV; 417 418 if (self_check_in_pq(ubi, e)) 419 return -ENODEV; 420 421 list_del(&e->u.list); 422 dbg_wl("deleted PEB %d from the protection queue", e->pnum); 423 return 0; 424 } 425 426 /** 427 * sync_erase - synchronously erase a physical eraseblock. 428 * @ubi: UBI device description object 429 * @e: the the physical eraseblock to erase 430 * @torture: if the physical eraseblock has to be tortured 431 * 432 * This function returns zero in case of success and a negative error code in 433 * case of failure. 434 */ 435 static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, 436 int torture) 437 { 438 int err; 439 struct ubi_ec_hdr *ec_hdr; 440 unsigned long long ec = e->ec; 441 442 dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec); 443 444 err = self_check_ec(ubi, e->pnum, e->ec); 445 if (err) 446 return -EINVAL; 447 448 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); 449 if (!ec_hdr) 450 return -ENOMEM; 451 452 err = ubi_io_sync_erase(ubi, e->pnum, torture); 453 if (err < 0) 454 goto out_free; 455 456 ec += err; 457 if (ec > UBI_MAX_ERASECOUNTER) { 458 /* 459 * Erase counter overflow. Upgrade UBI and use 64-bit 460 * erase counters internally. 461 */ 462 ubi_err(ubi, "erase counter overflow at PEB %d, EC %llu", 463 e->pnum, ec); 464 err = -EINVAL; 465 goto out_free; 466 } 467 468 dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec); 469 470 ec_hdr->ec = cpu_to_be64(ec); 471 472 err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); 473 if (err) 474 goto out_free; 475 476 e->ec = ec; 477 spin_lock(&ubi->wl_lock); 478 if (e->ec > ubi->max_ec) 479 ubi->max_ec = e->ec; 480 spin_unlock(&ubi->wl_lock); 481 482 out_free: 483 kfree(ec_hdr); 484 return err; 485 } 486 487 /** 488 * serve_prot_queue - check if it is time to stop protecting PEBs. 489 * @ubi: UBI device description object 490 * 491 * This function is called after each erase operation and removes PEBs from the 492 * tail of the protection queue. These PEBs have been protected for long enough 493 * and should be moved to the used tree. 494 */ 495 static void serve_prot_queue(struct ubi_device *ubi) 496 { 497 struct ubi_wl_entry *e, *tmp; 498 int count; 499 500 /* 501 * There may be several protected physical eraseblock to remove, 502 * process them all. 503 */ 504 repeat: 505 count = 0; 506 spin_lock(&ubi->wl_lock); 507 list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) { 508 dbg_wl("PEB %d EC %d protection over, move to used tree", 509 e->pnum, e->ec); 510 511 list_del(&e->u.list); 512 wl_tree_add(e, &ubi->used); 513 if (count++ > 32) { 514 /* 515 * Let's be nice and avoid holding the spinlock for 516 * too long. 517 */ 518 spin_unlock(&ubi->wl_lock); 519 cond_resched(); 520 goto repeat; 521 } 522 } 523 524 ubi->pq_head += 1; 525 if (ubi->pq_head == UBI_PROT_QUEUE_LEN) 526 ubi->pq_head = 0; 527 ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN); 528 spin_unlock(&ubi->wl_lock); 529 } 530 531 /** 532 * __schedule_ubi_work - schedule a work. 533 * @ubi: UBI device description object 534 * @wrk: the work to schedule 535 * 536 * This function adds a work defined by @wrk to the tail of the pending works 537 * list. Can only be used if ubi->work_sem is already held in read mode! 538 */ 539 static void __schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) 540 { 541 spin_lock(&ubi->wl_lock); 542 list_add_tail(&wrk->list, &ubi->works); 543 ubi_assert(ubi->works_count >= 0); 544 ubi->works_count += 1; 545 #ifndef __UBOOT__ 546 if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi)) 547 wake_up_process(ubi->bgt_thread); 548 #else 549 int err; 550 /* 551 * U-Boot special: We have no bgt_thread in U-Boot! 552 * So just call do_work() here directly. 553 */ 554 err = do_work(ubi); 555 if (err) { 556 ubi_err(ubi, "%s: work failed with error code %d", 557 ubi->bgt_name, err); 558 } 559 #endif 560 spin_unlock(&ubi->wl_lock); 561 } 562 563 /** 564 * schedule_ubi_work - schedule a work. 565 * @ubi: UBI device description object 566 * @wrk: the work to schedule 567 * 568 * This function adds a work defined by @wrk to the tail of the pending works 569 * list. 570 */ 571 static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) 572 { 573 down_read(&ubi->work_sem); 574 __schedule_ubi_work(ubi, wrk); 575 up_read(&ubi->work_sem); 576 } 577 578 static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, 579 int shutdown); 580 581 /** 582 * schedule_erase - schedule an erase work. 583 * @ubi: UBI device description object 584 * @e: the WL entry of the physical eraseblock to erase 585 * @vol_id: the volume ID that last used this PEB 586 * @lnum: the last used logical eraseblock number for the PEB 587 * @torture: if the physical eraseblock has to be tortured 588 * 589 * This function returns zero in case of success and a %-ENOMEM in case of 590 * failure. 591 */ 592 static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, 593 int vol_id, int lnum, int torture) 594 { 595 struct ubi_work *wl_wrk; 596 597 ubi_assert(e); 598 599 dbg_wl("schedule erasure of PEB %d, EC %d, torture %d", 600 e->pnum, e->ec, torture); 601 602 wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); 603 if (!wl_wrk) 604 return -ENOMEM; 605 606 wl_wrk->func = &erase_worker; 607 wl_wrk->e = e; 608 wl_wrk->vol_id = vol_id; 609 wl_wrk->lnum = lnum; 610 wl_wrk->torture = torture; 611 612 schedule_ubi_work(ubi, wl_wrk); 613 return 0; 614 } 615 616 /** 617 * do_sync_erase - run the erase worker synchronously. 618 * @ubi: UBI device description object 619 * @e: the WL entry of the physical eraseblock to erase 620 * @vol_id: the volume ID that last used this PEB 621 * @lnum: the last used logical eraseblock number for the PEB 622 * @torture: if the physical eraseblock has to be tortured 623 * 624 */ 625 static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, 626 int vol_id, int lnum, int torture) 627 { 628 struct ubi_work *wl_wrk; 629 630 dbg_wl("sync erase of PEB %i", e->pnum); 631 632 wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); 633 if (!wl_wrk) 634 return -ENOMEM; 635 636 wl_wrk->e = e; 637 wl_wrk->vol_id = vol_id; 638 wl_wrk->lnum = lnum; 639 wl_wrk->torture = torture; 640 641 return erase_worker(ubi, wl_wrk, 0); 642 } 643 644 /** 645 * wear_leveling_worker - wear-leveling worker function. 646 * @ubi: UBI device description object 647 * @wrk: the work object 648 * @shutdown: non-zero if the worker has to free memory and exit 649 * because the WL-subsystem is shutting down 650 * 651 * This function copies a more worn out physical eraseblock to a less worn out 652 * one. Returns zero in case of success and a negative error code in case of 653 * failure. 654 */ 655 static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, 656 int shutdown) 657 { 658 int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0; 659 int vol_id = -1, lnum = -1; 660 #ifdef CONFIG_MTD_UBI_FASTMAP 661 int anchor = wrk->anchor; 662 #endif 663 struct ubi_wl_entry *e1, *e2; 664 struct ubi_vid_hdr *vid_hdr; 665 666 kfree(wrk); 667 if (shutdown) 668 return 0; 669 670 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); 671 if (!vid_hdr) 672 return -ENOMEM; 673 674 mutex_lock(&ubi->move_mutex); 675 spin_lock(&ubi->wl_lock); 676 ubi_assert(!ubi->move_from && !ubi->move_to); 677 ubi_assert(!ubi->move_to_put); 678 679 if (!ubi->free.rb_node || 680 (!ubi->used.rb_node && !ubi->scrub.rb_node)) { 681 /* 682 * No free physical eraseblocks? Well, they must be waiting in 683 * the queue to be erased. Cancel movement - it will be 684 * triggered again when a free physical eraseblock appears. 685 * 686 * No used physical eraseblocks? They must be temporarily 687 * protected from being moved. They will be moved to the 688 * @ubi->used tree later and the wear-leveling will be 689 * triggered again. 690 */ 691 dbg_wl("cancel WL, a list is empty: free %d, used %d", 692 !ubi->free.rb_node, !ubi->used.rb_node); 693 goto out_cancel; 694 } 695 696 #ifdef CONFIG_MTD_UBI_FASTMAP 697 /* Check whether we need to produce an anchor PEB */ 698 if (!anchor) 699 anchor = !anchor_pebs_avalible(&ubi->free); 700 701 if (anchor) { 702 e1 = find_anchor_wl_entry(&ubi->used); 703 if (!e1) 704 goto out_cancel; 705 e2 = get_peb_for_wl(ubi); 706 if (!e2) 707 goto out_cancel; 708 709 self_check_in_wl_tree(ubi, e1, &ubi->used); 710 rb_erase(&e1->u.rb, &ubi->used); 711 dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum); 712 } else if (!ubi->scrub.rb_node) { 713 #else 714 if (!ubi->scrub.rb_node) { 715 #endif 716 /* 717 * Now pick the least worn-out used physical eraseblock and a 718 * highly worn-out free physical eraseblock. If the erase 719 * counters differ much enough, start wear-leveling. 720 */ 721 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); 722 e2 = get_peb_for_wl(ubi); 723 if (!e2) 724 goto out_cancel; 725 726 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { 727 dbg_wl("no WL needed: min used EC %d, max free EC %d", 728 e1->ec, e2->ec); 729 730 /* Give the unused PEB back */ 731 wl_tree_add(e2, &ubi->free); 732 ubi->free_count++; 733 goto out_cancel; 734 } 735 self_check_in_wl_tree(ubi, e1, &ubi->used); 736 rb_erase(&e1->u.rb, &ubi->used); 737 dbg_wl("move PEB %d EC %d to PEB %d EC %d", 738 e1->pnum, e1->ec, e2->pnum, e2->ec); 739 } else { 740 /* Perform scrubbing */ 741 scrubbing = 1; 742 e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); 743 e2 = get_peb_for_wl(ubi); 744 if (!e2) 745 goto out_cancel; 746 747 self_check_in_wl_tree(ubi, e1, &ubi->scrub); 748 rb_erase(&e1->u.rb, &ubi->scrub); 749 dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); 750 } 751 752 ubi->move_from = e1; 753 ubi->move_to = e2; 754 spin_unlock(&ubi->wl_lock); 755 756 /* 757 * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum. 758 * We so far do not know which logical eraseblock our physical 759 * eraseblock (@e1) belongs to. We have to read the volume identifier 760 * header first. 761 * 762 * Note, we are protected from this PEB being unmapped and erased. The 763 * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB 764 * which is being moved was unmapped. 765 */ 766 767 err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); 768 if (err && err != UBI_IO_BITFLIPS) { 769 if (err == UBI_IO_FF) { 770 /* 771 * We are trying to move PEB without a VID header. UBI 772 * always write VID headers shortly after the PEB was 773 * given, so we have a situation when it has not yet 774 * had a chance to write it, because it was preempted. 775 * So add this PEB to the protection queue so far, 776 * because presumably more data will be written there 777 * (including the missing VID header), and then we'll 778 * move it. 779 */ 780 dbg_wl("PEB %d has no VID header", e1->pnum); 781 protect = 1; 782 goto out_not_moved; 783 } else if (err == UBI_IO_FF_BITFLIPS) { 784 /* 785 * The same situation as %UBI_IO_FF, but bit-flips were 786 * detected. It is better to schedule this PEB for 787 * scrubbing. 788 */ 789 dbg_wl("PEB %d has no VID header but has bit-flips", 790 e1->pnum); 791 scrubbing = 1; 792 goto out_not_moved; 793 } 794 795 ubi_err(ubi, "error %d while reading VID header from PEB %d", 796 err, e1->pnum); 797 goto out_error; 798 } 799 800 vol_id = be32_to_cpu(vid_hdr->vol_id); 801 lnum = be32_to_cpu(vid_hdr->lnum); 802 803 err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); 804 if (err) { 805 if (err == MOVE_CANCEL_RACE) { 806 /* 807 * The LEB has not been moved because the volume is 808 * being deleted or the PEB has been put meanwhile. We 809 * should prevent this PEB from being selected for 810 * wear-leveling movement again, so put it to the 811 * protection queue. 812 */ 813 protect = 1; 814 goto out_not_moved; 815 } 816 if (err == MOVE_RETRY) { 817 scrubbing = 1; 818 goto out_not_moved; 819 } 820 if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR || 821 err == MOVE_TARGET_RD_ERR) { 822 /* 823 * Target PEB had bit-flips or write error - torture it. 824 */ 825 torture = 1; 826 goto out_not_moved; 827 } 828 829 if (err == MOVE_SOURCE_RD_ERR) { 830 /* 831 * An error happened while reading the source PEB. Do 832 * not switch to R/O mode in this case, and give the 833 * upper layers a possibility to recover from this, 834 * e.g. by unmapping corresponding LEB. Instead, just 835 * put this PEB to the @ubi->erroneous list to prevent 836 * UBI from trying to move it over and over again. 837 */ 838 if (ubi->erroneous_peb_count > ubi->max_erroneous) { 839 ubi_err(ubi, "too many erroneous eraseblocks (%d)", 840 ubi->erroneous_peb_count); 841 goto out_error; 842 } 843 erroneous = 1; 844 goto out_not_moved; 845 } 846 847 if (err < 0) 848 goto out_error; 849 850 ubi_assert(0); 851 } 852 853 /* The PEB has been successfully moved */ 854 if (scrubbing) 855 ubi_msg(ubi, "scrubbed PEB %d (LEB %d:%d), data moved to PEB %d", 856 e1->pnum, vol_id, lnum, e2->pnum); 857 ubi_free_vid_hdr(ubi, vid_hdr); 858 859 spin_lock(&ubi->wl_lock); 860 if (!ubi->move_to_put) { 861 wl_tree_add(e2, &ubi->used); 862 e2 = NULL; 863 } 864 ubi->move_from = ubi->move_to = NULL; 865 ubi->move_to_put = ubi->wl_scheduled = 0; 866 spin_unlock(&ubi->wl_lock); 867 868 err = do_sync_erase(ubi, e1, vol_id, lnum, 0); 869 if (err) { 870 if (e2) 871 wl_entry_destroy(ubi, e2); 872 goto out_ro; 873 } 874 875 if (e2) { 876 /* 877 * Well, the target PEB was put meanwhile, schedule it for 878 * erasure. 879 */ 880 dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase", 881 e2->pnum, vol_id, lnum); 882 err = do_sync_erase(ubi, e2, vol_id, lnum, 0); 883 if (err) 884 goto out_ro; 885 } 886 887 dbg_wl("done"); 888 mutex_unlock(&ubi->move_mutex); 889 return 0; 890 891 /* 892 * For some reasons the LEB was not moved, might be an error, might be 893 * something else. @e1 was not changed, so return it back. @e2 might 894 * have been changed, schedule it for erasure. 895 */ 896 out_not_moved: 897 if (vol_id != -1) 898 dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)", 899 e1->pnum, vol_id, lnum, e2->pnum, err); 900 else 901 dbg_wl("cancel moving PEB %d to PEB %d (%d)", 902 e1->pnum, e2->pnum, err); 903 spin_lock(&ubi->wl_lock); 904 if (protect) 905 prot_queue_add(ubi, e1); 906 else if (erroneous) { 907 wl_tree_add(e1, &ubi->erroneous); 908 ubi->erroneous_peb_count += 1; 909 } else if (scrubbing) 910 wl_tree_add(e1, &ubi->scrub); 911 else 912 wl_tree_add(e1, &ubi->used); 913 ubi_assert(!ubi->move_to_put); 914 ubi->move_from = ubi->move_to = NULL; 915 ubi->wl_scheduled = 0; 916 spin_unlock(&ubi->wl_lock); 917 918 ubi_free_vid_hdr(ubi, vid_hdr); 919 err = do_sync_erase(ubi, e2, vol_id, lnum, torture); 920 if (err) 921 goto out_ro; 922 923 mutex_unlock(&ubi->move_mutex); 924 return 0; 925 926 out_error: 927 if (vol_id != -1) 928 ubi_err(ubi, "error %d while moving PEB %d to PEB %d", 929 err, e1->pnum, e2->pnum); 930 else 931 ubi_err(ubi, "error %d while moving PEB %d (LEB %d:%d) to PEB %d", 932 err, e1->pnum, vol_id, lnum, e2->pnum); 933 spin_lock(&ubi->wl_lock); 934 ubi->move_from = ubi->move_to = NULL; 935 ubi->move_to_put = ubi->wl_scheduled = 0; 936 spin_unlock(&ubi->wl_lock); 937 938 ubi_free_vid_hdr(ubi, vid_hdr); 939 wl_entry_destroy(ubi, e1); 940 wl_entry_destroy(ubi, e2); 941 942 out_ro: 943 ubi_ro_mode(ubi); 944 mutex_unlock(&ubi->move_mutex); 945 ubi_assert(err != 0); 946 return err < 0 ? err : -EIO; 947 948 out_cancel: 949 ubi->wl_scheduled = 0; 950 spin_unlock(&ubi->wl_lock); 951 mutex_unlock(&ubi->move_mutex); 952 ubi_free_vid_hdr(ubi, vid_hdr); 953 return 0; 954 } 955 956 /** 957 * ensure_wear_leveling - schedule wear-leveling if it is needed. 958 * @ubi: UBI device description object 959 * @nested: set to non-zero if this function is called from UBI worker 960 * 961 * This function checks if it is time to start wear-leveling and schedules it 962 * if yes. This function returns zero in case of success and a negative error 963 * code in case of failure. 964 */ 965 static int ensure_wear_leveling(struct ubi_device *ubi, int nested) 966 { 967 int err = 0; 968 struct ubi_wl_entry *e1; 969 struct ubi_wl_entry *e2; 970 struct ubi_work *wrk; 971 972 spin_lock(&ubi->wl_lock); 973 if (ubi->wl_scheduled) 974 /* Wear-leveling is already in the work queue */ 975 goto out_unlock; 976 977 /* 978 * If the ubi->scrub tree is not empty, scrubbing is needed, and the 979 * the WL worker has to be scheduled anyway. 980 */ 981 if (!ubi->scrub.rb_node) { 982 if (!ubi->used.rb_node || !ubi->free.rb_node) 983 /* No physical eraseblocks - no deal */ 984 goto out_unlock; 985 986 /* 987 * We schedule wear-leveling only if the difference between the 988 * lowest erase counter of used physical eraseblocks and a high 989 * erase counter of free physical eraseblocks is greater than 990 * %UBI_WL_THRESHOLD. 991 */ 992 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); 993 e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); 994 995 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) 996 goto out_unlock; 997 dbg_wl("schedule wear-leveling"); 998 } else 999 dbg_wl("schedule scrubbing"); 1000 1001 ubi->wl_scheduled = 1; 1002 spin_unlock(&ubi->wl_lock); 1003 1004 wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); 1005 if (!wrk) { 1006 err = -ENOMEM; 1007 goto out_cancel; 1008 } 1009 1010 wrk->anchor = 0; 1011 wrk->func = &wear_leveling_worker; 1012 if (nested) 1013 __schedule_ubi_work(ubi, wrk); 1014 else 1015 schedule_ubi_work(ubi, wrk); 1016 return err; 1017 1018 out_cancel: 1019 spin_lock(&ubi->wl_lock); 1020 ubi->wl_scheduled = 0; 1021 out_unlock: 1022 spin_unlock(&ubi->wl_lock); 1023 return err; 1024 } 1025 1026 /** 1027 * erase_worker - physical eraseblock erase worker function. 1028 * @ubi: UBI device description object 1029 * @wl_wrk: the work object 1030 * @shutdown: non-zero if the worker has to free memory and exit 1031 * because the WL sub-system is shutting down 1032 * 1033 * This function erases a physical eraseblock and perform torture testing if 1034 * needed. It also takes care about marking the physical eraseblock bad if 1035 * needed. Returns zero in case of success and a negative error code in case of 1036 * failure. 1037 */ 1038 static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, 1039 int shutdown) 1040 { 1041 struct ubi_wl_entry *e = wl_wrk->e; 1042 int pnum = e->pnum; 1043 int vol_id = wl_wrk->vol_id; 1044 int lnum = wl_wrk->lnum; 1045 int err, available_consumed = 0; 1046 1047 if (shutdown) { 1048 dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec); 1049 kfree(wl_wrk); 1050 wl_entry_destroy(ubi, e); 1051 return 0; 1052 } 1053 1054 dbg_wl("erase PEB %d EC %d LEB %d:%d", 1055 pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum); 1056 1057 err = sync_erase(ubi, e, wl_wrk->torture); 1058 if (!err) { 1059 /* Fine, we've erased it successfully */ 1060 kfree(wl_wrk); 1061 1062 spin_lock(&ubi->wl_lock); 1063 wl_tree_add(e, &ubi->free); 1064 ubi->free_count++; 1065 spin_unlock(&ubi->wl_lock); 1066 1067 /* 1068 * One more erase operation has happened, take care about 1069 * protected physical eraseblocks. 1070 */ 1071 serve_prot_queue(ubi); 1072 1073 /* And take care about wear-leveling */ 1074 err = ensure_wear_leveling(ubi, 1); 1075 return err; 1076 } 1077 1078 ubi_err(ubi, "failed to erase PEB %d, error %d", pnum, err); 1079 kfree(wl_wrk); 1080 1081 if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || 1082 err == -EBUSY) { 1083 int err1; 1084 1085 /* Re-schedule the LEB for erasure */ 1086 err1 = schedule_erase(ubi, e, vol_id, lnum, 0); 1087 if (err1) { 1088 err = err1; 1089 goto out_ro; 1090 } 1091 return err; 1092 } 1093 1094 wl_entry_destroy(ubi, e); 1095 if (err != -EIO) 1096 /* 1097 * If this is not %-EIO, we have no idea what to do. Scheduling 1098 * this physical eraseblock for erasure again would cause 1099 * errors again and again. Well, lets switch to R/O mode. 1100 */ 1101 goto out_ro; 1102 1103 /* It is %-EIO, the PEB went bad */ 1104 1105 if (!ubi->bad_allowed) { 1106 ubi_err(ubi, "bad physical eraseblock %d detected", pnum); 1107 goto out_ro; 1108 } 1109 1110 spin_lock(&ubi->volumes_lock); 1111 if (ubi->beb_rsvd_pebs == 0) { 1112 if (ubi->avail_pebs == 0) { 1113 spin_unlock(&ubi->volumes_lock); 1114 ubi_err(ubi, "no reserved/available physical eraseblocks"); 1115 goto out_ro; 1116 } 1117 ubi->avail_pebs -= 1; 1118 available_consumed = 1; 1119 } 1120 spin_unlock(&ubi->volumes_lock); 1121 1122 ubi_msg(ubi, "mark PEB %d as bad", pnum); 1123 err = ubi_io_mark_bad(ubi, pnum); 1124 if (err) 1125 goto out_ro; 1126 1127 spin_lock(&ubi->volumes_lock); 1128 if (ubi->beb_rsvd_pebs > 0) { 1129 if (available_consumed) { 1130 /* 1131 * The amount of reserved PEBs increased since we last 1132 * checked. 1133 */ 1134 ubi->avail_pebs += 1; 1135 available_consumed = 0; 1136 } 1137 ubi->beb_rsvd_pebs -= 1; 1138 } 1139 ubi->bad_peb_count += 1; 1140 ubi->good_peb_count -= 1; 1141 ubi_calculate_reserved(ubi); 1142 if (available_consumed) 1143 ubi_warn(ubi, "no PEBs in the reserved pool, used an available PEB"); 1144 else if (ubi->beb_rsvd_pebs) 1145 ubi_msg(ubi, "%d PEBs left in the reserve", 1146 ubi->beb_rsvd_pebs); 1147 else 1148 ubi_warn(ubi, "last PEB from the reserve was used"); 1149 spin_unlock(&ubi->volumes_lock); 1150 1151 return err; 1152 1153 out_ro: 1154 if (available_consumed) { 1155 spin_lock(&ubi->volumes_lock); 1156 ubi->avail_pebs += 1; 1157 spin_unlock(&ubi->volumes_lock); 1158 } 1159 ubi_ro_mode(ubi); 1160 return err; 1161 } 1162 1163 /** 1164 * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system. 1165 * @ubi: UBI device description object 1166 * @vol_id: the volume ID that last used this PEB 1167 * @lnum: the last used logical eraseblock number for the PEB 1168 * @pnum: physical eraseblock to return 1169 * @torture: if this physical eraseblock has to be tortured 1170 * 1171 * This function is called to return physical eraseblock @pnum to the pool of 1172 * free physical eraseblocks. The @torture flag has to be set if an I/O error 1173 * occurred to this @pnum and it has to be tested. This function returns zero 1174 * in case of success, and a negative error code in case of failure. 1175 */ 1176 int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum, 1177 int pnum, int torture) 1178 { 1179 int err; 1180 struct ubi_wl_entry *e; 1181 1182 dbg_wl("PEB %d", pnum); 1183 ubi_assert(pnum >= 0); 1184 ubi_assert(pnum < ubi->peb_count); 1185 1186 down_read(&ubi->fm_protect); 1187 1188 retry: 1189 spin_lock(&ubi->wl_lock); 1190 e = ubi->lookuptbl[pnum]; 1191 if (e == ubi->move_from) { 1192 /* 1193 * User is putting the physical eraseblock which was selected to 1194 * be moved. It will be scheduled for erasure in the 1195 * wear-leveling worker. 1196 */ 1197 dbg_wl("PEB %d is being moved, wait", pnum); 1198 spin_unlock(&ubi->wl_lock); 1199 1200 /* Wait for the WL worker by taking the @ubi->move_mutex */ 1201 mutex_lock(&ubi->move_mutex); 1202 mutex_unlock(&ubi->move_mutex); 1203 goto retry; 1204 } else if (e == ubi->move_to) { 1205 /* 1206 * User is putting the physical eraseblock which was selected 1207 * as the target the data is moved to. It may happen if the EBA 1208 * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()' 1209 * but the WL sub-system has not put the PEB to the "used" tree 1210 * yet, but it is about to do this. So we just set a flag which 1211 * will tell the WL worker that the PEB is not needed anymore 1212 * and should be scheduled for erasure. 1213 */ 1214 dbg_wl("PEB %d is the target of data moving", pnum); 1215 ubi_assert(!ubi->move_to_put); 1216 ubi->move_to_put = 1; 1217 spin_unlock(&ubi->wl_lock); 1218 up_read(&ubi->fm_protect); 1219 return 0; 1220 } else { 1221 if (in_wl_tree(e, &ubi->used)) { 1222 self_check_in_wl_tree(ubi, e, &ubi->used); 1223 rb_erase(&e->u.rb, &ubi->used); 1224 } else if (in_wl_tree(e, &ubi->scrub)) { 1225 self_check_in_wl_tree(ubi, e, &ubi->scrub); 1226 rb_erase(&e->u.rb, &ubi->scrub); 1227 } else if (in_wl_tree(e, &ubi->erroneous)) { 1228 self_check_in_wl_tree(ubi, e, &ubi->erroneous); 1229 rb_erase(&e->u.rb, &ubi->erroneous); 1230 ubi->erroneous_peb_count -= 1; 1231 ubi_assert(ubi->erroneous_peb_count >= 0); 1232 /* Erroneous PEBs should be tortured */ 1233 torture = 1; 1234 } else { 1235 err = prot_queue_del(ubi, e->pnum); 1236 if (err) { 1237 ubi_err(ubi, "PEB %d not found", pnum); 1238 ubi_ro_mode(ubi); 1239 spin_unlock(&ubi->wl_lock); 1240 up_read(&ubi->fm_protect); 1241 return err; 1242 } 1243 } 1244 } 1245 spin_unlock(&ubi->wl_lock); 1246 1247 err = schedule_erase(ubi, e, vol_id, lnum, torture); 1248 if (err) { 1249 spin_lock(&ubi->wl_lock); 1250 wl_tree_add(e, &ubi->used); 1251 spin_unlock(&ubi->wl_lock); 1252 } 1253 1254 up_read(&ubi->fm_protect); 1255 return err; 1256 } 1257 1258 /** 1259 * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing. 1260 * @ubi: UBI device description object 1261 * @pnum: the physical eraseblock to schedule 1262 * 1263 * If a bit-flip in a physical eraseblock is detected, this physical eraseblock 1264 * needs scrubbing. This function schedules a physical eraseblock for 1265 * scrubbing which is done in background. This function returns zero in case of 1266 * success and a negative error code in case of failure. 1267 */ 1268 int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) 1269 { 1270 struct ubi_wl_entry *e; 1271 1272 ubi_msg(ubi, "schedule PEB %d for scrubbing", pnum); 1273 1274 retry: 1275 spin_lock(&ubi->wl_lock); 1276 e = ubi->lookuptbl[pnum]; 1277 if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) || 1278 in_wl_tree(e, &ubi->erroneous)) { 1279 spin_unlock(&ubi->wl_lock); 1280 return 0; 1281 } 1282 1283 if (e == ubi->move_to) { 1284 /* 1285 * This physical eraseblock was used to move data to. The data 1286 * was moved but the PEB was not yet inserted to the proper 1287 * tree. We should just wait a little and let the WL worker 1288 * proceed. 1289 */ 1290 spin_unlock(&ubi->wl_lock); 1291 dbg_wl("the PEB %d is not in proper tree, retry", pnum); 1292 yield(); 1293 goto retry; 1294 } 1295 1296 if (in_wl_tree(e, &ubi->used)) { 1297 self_check_in_wl_tree(ubi, e, &ubi->used); 1298 rb_erase(&e->u.rb, &ubi->used); 1299 } else { 1300 int err; 1301 1302 err = prot_queue_del(ubi, e->pnum); 1303 if (err) { 1304 ubi_err(ubi, "PEB %d not found", pnum); 1305 ubi_ro_mode(ubi); 1306 spin_unlock(&ubi->wl_lock); 1307 return err; 1308 } 1309 } 1310 1311 wl_tree_add(e, &ubi->scrub); 1312 spin_unlock(&ubi->wl_lock); 1313 1314 /* 1315 * Technically scrubbing is the same as wear-leveling, so it is done 1316 * by the WL worker. 1317 */ 1318 return ensure_wear_leveling(ubi, 0); 1319 } 1320 1321 /** 1322 * ubi_wl_flush - flush all pending works. 1323 * @ubi: UBI device description object 1324 * @vol_id: the volume id to flush for 1325 * @lnum: the logical eraseblock number to flush for 1326 * 1327 * This function executes all pending works for a particular volume id / 1328 * logical eraseblock number pair. If either value is set to %UBI_ALL, then it 1329 * acts as a wildcard for all of the corresponding volume numbers or logical 1330 * eraseblock numbers. It returns zero in case of success and a negative error 1331 * code in case of failure. 1332 */ 1333 int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum) 1334 { 1335 int err = 0; 1336 int found = 1; 1337 1338 /* 1339 * Erase while the pending works queue is not empty, but not more than 1340 * the number of currently pending works. 1341 */ 1342 dbg_wl("flush pending work for LEB %d:%d (%d pending works)", 1343 vol_id, lnum, ubi->works_count); 1344 1345 while (found) { 1346 struct ubi_work *wrk, *tmp; 1347 found = 0; 1348 1349 down_read(&ubi->work_sem); 1350 spin_lock(&ubi->wl_lock); 1351 list_for_each_entry_safe(wrk, tmp, &ubi->works, list) { 1352 if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) && 1353 (lnum == UBI_ALL || wrk->lnum == lnum)) { 1354 list_del(&wrk->list); 1355 ubi->works_count -= 1; 1356 ubi_assert(ubi->works_count >= 0); 1357 spin_unlock(&ubi->wl_lock); 1358 1359 err = wrk->func(ubi, wrk, 0); 1360 if (err) { 1361 up_read(&ubi->work_sem); 1362 return err; 1363 } 1364 1365 spin_lock(&ubi->wl_lock); 1366 found = 1; 1367 break; 1368 } 1369 } 1370 spin_unlock(&ubi->wl_lock); 1371 up_read(&ubi->work_sem); 1372 } 1373 1374 /* 1375 * Make sure all the works which have been done in parallel are 1376 * finished. 1377 */ 1378 down_write(&ubi->work_sem); 1379 up_write(&ubi->work_sem); 1380 1381 return err; 1382 } 1383 1384 /** 1385 * tree_destroy - destroy an RB-tree. 1386 * @ubi: UBI device description object 1387 * @root: the root of the tree to destroy 1388 */ 1389 static void tree_destroy(struct ubi_device *ubi, struct rb_root *root) 1390 { 1391 struct rb_node *rb; 1392 struct ubi_wl_entry *e; 1393 1394 rb = root->rb_node; 1395 while (rb) { 1396 if (rb->rb_left) 1397 rb = rb->rb_left; 1398 else if (rb->rb_right) 1399 rb = rb->rb_right; 1400 else { 1401 e = rb_entry(rb, struct ubi_wl_entry, u.rb); 1402 1403 rb = rb_parent(rb); 1404 if (rb) { 1405 if (rb->rb_left == &e->u.rb) 1406 rb->rb_left = NULL; 1407 else 1408 rb->rb_right = NULL; 1409 } 1410 1411 wl_entry_destroy(ubi, e); 1412 } 1413 } 1414 } 1415 1416 /** 1417 * ubi_thread - UBI background thread. 1418 * @u: the UBI device description object pointer 1419 */ 1420 int ubi_thread(void *u) 1421 { 1422 int failures = 0; 1423 struct ubi_device *ubi = u; 1424 1425 ubi_msg(ubi, "background thread \"%s\" started, PID %d", 1426 ubi->bgt_name, task_pid_nr(current)); 1427 1428 set_freezable(); 1429 for (;;) { 1430 int err; 1431 1432 if (kthread_should_stop()) 1433 break; 1434 1435 if (try_to_freeze()) 1436 continue; 1437 1438 spin_lock(&ubi->wl_lock); 1439 if (list_empty(&ubi->works) || ubi->ro_mode || 1440 !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { 1441 set_current_state(TASK_INTERRUPTIBLE); 1442 spin_unlock(&ubi->wl_lock); 1443 schedule(); 1444 continue; 1445 } 1446 spin_unlock(&ubi->wl_lock); 1447 1448 err = do_work(ubi); 1449 if (err) { 1450 ubi_err(ubi, "%s: work failed with error code %d", 1451 ubi->bgt_name, err); 1452 if (failures++ > WL_MAX_FAILURES) { 1453 /* 1454 * Too many failures, disable the thread and 1455 * switch to read-only mode. 1456 */ 1457 ubi_msg(ubi, "%s: %d consecutive failures", 1458 ubi->bgt_name, WL_MAX_FAILURES); 1459 ubi_ro_mode(ubi); 1460 ubi->thread_enabled = 0; 1461 continue; 1462 } 1463 } else 1464 failures = 0; 1465 1466 cond_resched(); 1467 } 1468 1469 dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); 1470 return 0; 1471 } 1472 1473 /** 1474 * shutdown_work - shutdown all pending works. 1475 * @ubi: UBI device description object 1476 */ 1477 static void shutdown_work(struct ubi_device *ubi) 1478 { 1479 #ifdef CONFIG_MTD_UBI_FASTMAP 1480 #ifndef __UBOOT__ 1481 flush_work(&ubi->fm_work); 1482 #else 1483 /* in U-Boot, we have all work done */ 1484 #endif 1485 #endif 1486 while (!list_empty(&ubi->works)) { 1487 struct ubi_work *wrk; 1488 1489 wrk = list_entry(ubi->works.next, struct ubi_work, list); 1490 list_del(&wrk->list); 1491 wrk->func(ubi, wrk, 1); 1492 ubi->works_count -= 1; 1493 ubi_assert(ubi->works_count >= 0); 1494 } 1495 } 1496 1497 /** 1498 * ubi_wl_init - initialize the WL sub-system using attaching information. 1499 * @ubi: UBI device description object 1500 * @ai: attaching information 1501 * 1502 * This function returns zero in case of success, and a negative error code in 1503 * case of failure. 1504 */ 1505 int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) 1506 { 1507 int err, i, reserved_pebs, found_pebs = 0; 1508 struct rb_node *rb1, *rb2; 1509 struct ubi_ainf_volume *av; 1510 struct ubi_ainf_peb *aeb, *tmp; 1511 struct ubi_wl_entry *e; 1512 1513 ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT; 1514 spin_lock_init(&ubi->wl_lock); 1515 mutex_init(&ubi->move_mutex); 1516 init_rwsem(&ubi->work_sem); 1517 ubi->max_ec = ai->max_ec; 1518 INIT_LIST_HEAD(&ubi->works); 1519 1520 sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); 1521 1522 err = -ENOMEM; 1523 ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL); 1524 if (!ubi->lookuptbl) 1525 return err; 1526 1527 for (i = 0; i < UBI_PROT_QUEUE_LEN; i++) 1528 INIT_LIST_HEAD(&ubi->pq[i]); 1529 ubi->pq_head = 0; 1530 1531 ubi->free_count = 0; 1532 list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { 1533 cond_resched(); 1534 1535 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1536 if (!e) 1537 goto out_free; 1538 1539 e->pnum = aeb->pnum; 1540 e->ec = aeb->ec; 1541 ubi->lookuptbl[e->pnum] = e; 1542 if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { 1543 wl_entry_destroy(ubi, e); 1544 goto out_free; 1545 } 1546 1547 found_pebs++; 1548 } 1549 1550 list_for_each_entry(aeb, &ai->free, u.list) { 1551 cond_resched(); 1552 1553 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1554 if (!e) 1555 goto out_free; 1556 1557 e->pnum = aeb->pnum; 1558 e->ec = aeb->ec; 1559 ubi_assert(e->ec >= 0); 1560 1561 wl_tree_add(e, &ubi->free); 1562 ubi->free_count++; 1563 1564 ubi->lookuptbl[e->pnum] = e; 1565 1566 found_pebs++; 1567 } 1568 1569 ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { 1570 ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) { 1571 cond_resched(); 1572 1573 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1574 if (!e) 1575 goto out_free; 1576 1577 e->pnum = aeb->pnum; 1578 e->ec = aeb->ec; 1579 ubi->lookuptbl[e->pnum] = e; 1580 1581 if (!aeb->scrub) { 1582 dbg_wl("add PEB %d EC %d to the used tree", 1583 e->pnum, e->ec); 1584 wl_tree_add(e, &ubi->used); 1585 } else { 1586 dbg_wl("add PEB %d EC %d to the scrub tree", 1587 e->pnum, e->ec); 1588 wl_tree_add(e, &ubi->scrub); 1589 } 1590 1591 found_pebs++; 1592 } 1593 } 1594 1595 dbg_wl("found %i PEBs", found_pebs); 1596 1597 if (ubi->fm) { 1598 ubi_assert(ubi->good_peb_count == 1599 found_pebs + ubi->fm->used_blocks); 1600 1601 for (i = 0; i < ubi->fm->used_blocks; i++) { 1602 e = ubi->fm->e[i]; 1603 ubi->lookuptbl[e->pnum] = e; 1604 } 1605 } 1606 else 1607 ubi_assert(ubi->good_peb_count == found_pebs); 1608 1609 reserved_pebs = WL_RESERVED_PEBS; 1610 ubi_fastmap_init(ubi, &reserved_pebs); 1611 1612 if (ubi->avail_pebs < reserved_pebs) { 1613 ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)", 1614 ubi->avail_pebs, reserved_pebs); 1615 if (ubi->corr_peb_count) 1616 ubi_err(ubi, "%d PEBs are corrupted and not used", 1617 ubi->corr_peb_count); 1618 goto out_free; 1619 } 1620 ubi->avail_pebs -= reserved_pebs; 1621 ubi->rsvd_pebs += reserved_pebs; 1622 1623 /* Schedule wear-leveling if needed */ 1624 err = ensure_wear_leveling(ubi, 0); 1625 if (err) 1626 goto out_free; 1627 1628 return 0; 1629 1630 out_free: 1631 shutdown_work(ubi); 1632 tree_destroy(ubi, &ubi->used); 1633 tree_destroy(ubi, &ubi->free); 1634 tree_destroy(ubi, &ubi->scrub); 1635 kfree(ubi->lookuptbl); 1636 return err; 1637 } 1638 1639 /** 1640 * protection_queue_destroy - destroy the protection queue. 1641 * @ubi: UBI device description object 1642 */ 1643 static void protection_queue_destroy(struct ubi_device *ubi) 1644 { 1645 int i; 1646 struct ubi_wl_entry *e, *tmp; 1647 1648 for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) { 1649 list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) { 1650 list_del(&e->u.list); 1651 wl_entry_destroy(ubi, e); 1652 } 1653 } 1654 } 1655 1656 /** 1657 * ubi_wl_close - close the wear-leveling sub-system. 1658 * @ubi: UBI device description object 1659 */ 1660 void ubi_wl_close(struct ubi_device *ubi) 1661 { 1662 dbg_wl("close the WL sub-system"); 1663 ubi_fastmap_close(ubi); 1664 shutdown_work(ubi); 1665 protection_queue_destroy(ubi); 1666 tree_destroy(ubi, &ubi->used); 1667 tree_destroy(ubi, &ubi->erroneous); 1668 tree_destroy(ubi, &ubi->free); 1669 tree_destroy(ubi, &ubi->scrub); 1670 kfree(ubi->lookuptbl); 1671 } 1672 1673 /** 1674 * self_check_ec - make sure that the erase counter of a PEB is correct. 1675 * @ubi: UBI device description object 1676 * @pnum: the physical eraseblock number to check 1677 * @ec: the erase counter to check 1678 * 1679 * This function returns zero if the erase counter of physical eraseblock @pnum 1680 * is equivalent to @ec, and a negative error code if not or if an error 1681 * occurred. 1682 */ 1683 static int self_check_ec(struct ubi_device *ubi, int pnum, int ec) 1684 { 1685 int err; 1686 long long read_ec; 1687 struct ubi_ec_hdr *ec_hdr; 1688 1689 if (!ubi_dbg_chk_gen(ubi)) 1690 return 0; 1691 1692 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); 1693 if (!ec_hdr) 1694 return -ENOMEM; 1695 1696 err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); 1697 if (err && err != UBI_IO_BITFLIPS) { 1698 /* The header does not have to exist */ 1699 err = 0; 1700 goto out_free; 1701 } 1702 1703 read_ec = be64_to_cpu(ec_hdr->ec); 1704 if (ec != read_ec && read_ec - ec > 1) { 1705 ubi_err(ubi, "self-check failed for PEB %d", pnum); 1706 ubi_err(ubi, "read EC is %lld, should be %d", read_ec, ec); 1707 dump_stack(); 1708 err = 1; 1709 } else 1710 err = 0; 1711 1712 out_free: 1713 kfree(ec_hdr); 1714 return err; 1715 } 1716 1717 /** 1718 * self_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. 1719 * @ubi: UBI device description object 1720 * @e: the wear-leveling entry to check 1721 * @root: the root of the tree 1722 * 1723 * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it 1724 * is not. 1725 */ 1726 static int self_check_in_wl_tree(const struct ubi_device *ubi, 1727 struct ubi_wl_entry *e, struct rb_root *root) 1728 { 1729 if (!ubi_dbg_chk_gen(ubi)) 1730 return 0; 1731 1732 if (in_wl_tree(e, root)) 1733 return 0; 1734 1735 ubi_err(ubi, "self-check failed for PEB %d, EC %d, RB-tree %p ", 1736 e->pnum, e->ec, root); 1737 dump_stack(); 1738 return -EINVAL; 1739 } 1740 1741 /** 1742 * self_check_in_pq - check if wear-leveling entry is in the protection 1743 * queue. 1744 * @ubi: UBI device description object 1745 * @e: the wear-leveling entry to check 1746 * 1747 * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not. 1748 */ 1749 static int self_check_in_pq(const struct ubi_device *ubi, 1750 struct ubi_wl_entry *e) 1751 { 1752 struct ubi_wl_entry *p; 1753 int i; 1754 1755 if (!ubi_dbg_chk_gen(ubi)) 1756 return 0; 1757 1758 for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) 1759 list_for_each_entry(p, &ubi->pq[i], u.list) 1760 if (p == e) 1761 return 0; 1762 1763 ubi_err(ubi, "self-check failed for PEB %d, EC %d, Protect queue", 1764 e->pnum, e->ec); 1765 dump_stack(); 1766 return -EINVAL; 1767 } 1768 #ifndef CONFIG_MTD_UBI_FASTMAP 1769 static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) 1770 { 1771 struct ubi_wl_entry *e; 1772 1773 e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); 1774 self_check_in_wl_tree(ubi, e, &ubi->free); 1775 ubi->free_count--; 1776 ubi_assert(ubi->free_count >= 0); 1777 rb_erase(&e->u.rb, &ubi->free); 1778 1779 return e; 1780 } 1781 1782 /** 1783 * produce_free_peb - produce a free physical eraseblock. 1784 * @ubi: UBI device description object 1785 * 1786 * This function tries to make a free PEB by means of synchronous execution of 1787 * pending works. This may be needed if, for example the background thread is 1788 * disabled. Returns zero in case of success and a negative error code in case 1789 * of failure. 1790 */ 1791 static int produce_free_peb(struct ubi_device *ubi) 1792 { 1793 int err; 1794 1795 while (!ubi->free.rb_node && ubi->works_count) { 1796 spin_unlock(&ubi->wl_lock); 1797 1798 dbg_wl("do one work synchronously"); 1799 err = do_work(ubi); 1800 1801 spin_lock(&ubi->wl_lock); 1802 if (err) 1803 return err; 1804 } 1805 1806 return 0; 1807 } 1808 1809 /** 1810 * ubi_wl_get_peb - get a physical eraseblock. 1811 * @ubi: UBI device description object 1812 * 1813 * This function returns a physical eraseblock in case of success and a 1814 * negative error code in case of failure. 1815 * Returns with ubi->fm_eba_sem held in read mode! 1816 */ 1817 int ubi_wl_get_peb(struct ubi_device *ubi) 1818 { 1819 int err; 1820 struct ubi_wl_entry *e; 1821 1822 retry: 1823 down_read(&ubi->fm_eba_sem); 1824 spin_lock(&ubi->wl_lock); 1825 if (!ubi->free.rb_node) { 1826 if (ubi->works_count == 0) { 1827 ubi_err(ubi, "no free eraseblocks"); 1828 ubi_assert(list_empty(&ubi->works)); 1829 spin_unlock(&ubi->wl_lock); 1830 return -ENOSPC; 1831 } 1832 1833 err = produce_free_peb(ubi); 1834 if (err < 0) { 1835 spin_unlock(&ubi->wl_lock); 1836 return err; 1837 } 1838 spin_unlock(&ubi->wl_lock); 1839 up_read(&ubi->fm_eba_sem); 1840 goto retry; 1841 1842 } 1843 e = wl_get_wle(ubi); 1844 prot_queue_add(ubi, e); 1845 spin_unlock(&ubi->wl_lock); 1846 1847 err = ubi_self_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset, 1848 ubi->peb_size - ubi->vid_hdr_aloffset); 1849 if (err) { 1850 ubi_err(ubi, "new PEB %d does not contain all 0xFF bytes", e->pnum); 1851 return err; 1852 } 1853 1854 return e->pnum; 1855 } 1856 #else 1857 #include "fastmap-wl.c" 1858 #endif 1859