1 /* 2 * Copyright (c) International Business Machines Corp., 2006 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * 18 * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner 19 */ 20 21 /* 22 * UBI wear-leveling sub-system. 23 * 24 * This sub-system is responsible for wear-leveling. It works in terms of 25 * physical eraseblocks and erase counters and knows nothing about logical 26 * eraseblocks, volumes, etc. From this sub-system's perspective all physical 27 * eraseblocks are of two types - used and free. Used physical eraseblocks are 28 * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical 29 * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function. 30 * 31 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter 32 * header. The rest of the physical eraseblock contains only %0xFF bytes. 33 * 34 * When physical eraseblocks are returned to the WL sub-system by means of the 35 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is 36 * done asynchronously in context of the per-UBI device background thread, 37 * which is also managed by the WL sub-system. 38 * 39 * The wear-leveling is ensured by means of moving the contents of used 40 * physical eraseblocks with low erase counter to free physical eraseblocks 41 * with high erase counter. 42 * 43 * If the WL sub-system fails to erase a physical eraseblock, it marks it as 44 * bad. 45 * 46 * This sub-system is also responsible for scrubbing. If a bit-flip is detected 47 * in a physical eraseblock, it has to be moved. Technically this is the same 48 * as moving it for wear-leveling reasons. 49 * 50 * As it was said, for the UBI sub-system all physical eraseblocks are either 51 * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while 52 * used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub 53 * RB-trees, as well as (temporarily) in the @wl->pq queue. 54 * 55 * When the WL sub-system returns a physical eraseblock, the physical 56 * eraseblock is protected from being moved for some "time". For this reason, 57 * the physical eraseblock is not directly moved from the @wl->free tree to the 58 * @wl->used tree. There is a protection queue in between where this 59 * physical eraseblock is temporarily stored (@wl->pq). 60 * 61 * All this protection stuff is needed because: 62 * o we don't want to move physical eraseblocks just after we have given them 63 * to the user; instead, we first want to let users fill them up with data; 64 * 65 * o there is a chance that the user will put the physical eraseblock very 66 * soon, so it makes sense not to move it for some time, but wait. 67 * 68 * Physical eraseblocks stay protected only for limited time. But the "time" is 69 * measured in erase cycles in this case. This is implemented with help of the 70 * protection queue. Eraseblocks are put to the tail of this queue when they 71 * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the 72 * head of the queue on each erase operation (for any eraseblock). So the 73 * length of the queue defines how may (global) erase cycles PEBs are protected. 74 * 75 * To put it differently, each physical eraseblock has 2 main states: free and 76 * used. The former state corresponds to the @wl->free tree. The latter state 77 * is split up on several sub-states: 78 * o the WL movement is allowed (@wl->used tree); 79 * o the WL movement is disallowed (@wl->erroneous) because the PEB is 80 * erroneous - e.g., there was a read error; 81 * o the WL movement is temporarily prohibited (@wl->pq queue); 82 * o scrubbing is needed (@wl->scrub tree). 83 * 84 * Depending on the sub-state, wear-leveling entries of the used physical 85 * eraseblocks may be kept in one of those structures. 86 * 87 * Note, in this implementation, we keep a small in-RAM object for each physical 88 * eraseblock. This is surely not a scalable solution. But it appears to be good 89 * enough for moderately large flashes and it is simple. In future, one may 90 * re-work this sub-system and make it more scalable. 91 * 92 * At the moment this sub-system does not utilize the sequence number, which 93 * was introduced relatively recently. But it would be wise to do this because 94 * the sequence number of a logical eraseblock characterizes how old is it. For 95 * example, when we move a PEB with low erase counter, and we need to pick the 96 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we 97 * pick target PEB with an average EC if our PEB is not very "old". This is a 98 * room for future re-works of the WL sub-system. 99 */ 100 101 #include <linux/slab.h> 102 #include <linux/crc32.h> 103 #include <linux/freezer.h> 104 #include <linux/kthread.h> 105 #include "ubi.h" 106 #include "wl.h" 107 108 /* Number of physical eraseblocks reserved for wear-leveling purposes */ 109 #define WL_RESERVED_PEBS 1 110 111 /* 112 * Maximum difference between two erase counters. If this threshold is 113 * exceeded, the WL sub-system starts moving data from used physical 114 * eraseblocks with low erase counter to free physical eraseblocks with high 115 * erase counter. 116 */ 117 #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD 118 119 /* 120 * When a physical eraseblock is moved, the WL sub-system has to pick the target 121 * physical eraseblock to move to. The simplest way would be just to pick the 122 * one with the highest erase counter. But in certain workloads this could lead 123 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a 124 * situation when the picked physical eraseblock is constantly erased after the 125 * data is written to it. So, we have a constant which limits the highest erase 126 * counter of the free physical eraseblock to pick. Namely, the WL sub-system 127 * does not pick eraseblocks with erase counter greater than the lowest erase 128 * counter plus %WL_FREE_MAX_DIFF. 129 */ 130 #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) 131 132 /* 133 * Maximum number of consecutive background thread failures which is enough to 134 * switch to read-only mode. 135 */ 136 #define WL_MAX_FAILURES 32 137 138 static int self_check_ec(struct ubi_device *ubi, int pnum, int ec); 139 static int self_check_in_wl_tree(const struct ubi_device *ubi, 140 struct ubi_wl_entry *e, struct rb_root *root); 141 static int self_check_in_pq(const struct ubi_device *ubi, 142 struct ubi_wl_entry *e); 143 144 /** 145 * wl_tree_add - add a wear-leveling entry to a WL RB-tree. 146 * @e: the wear-leveling entry to add 147 * @root: the root of the tree 148 * 149 * Note, we use (erase counter, physical eraseblock number) pairs as keys in 150 * the @ubi->used and @ubi->free RB-trees. 151 */ 152 static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) 153 { 154 struct rb_node **p, *parent = NULL; 155 156 p = &root->rb_node; 157 while (*p) { 158 struct ubi_wl_entry *e1; 159 160 parent = *p; 161 e1 = rb_entry(parent, struct ubi_wl_entry, u.rb); 162 163 if (e->ec < e1->ec) 164 p = &(*p)->rb_left; 165 else if (e->ec > e1->ec) 166 p = &(*p)->rb_right; 167 else { 168 ubi_assert(e->pnum != e1->pnum); 169 if (e->pnum < e1->pnum) 170 p = &(*p)->rb_left; 171 else 172 p = &(*p)->rb_right; 173 } 174 } 175 176 rb_link_node(&e->u.rb, parent, p); 177 rb_insert_color(&e->u.rb, root); 178 } 179 180 /** 181 * wl_tree_destroy - destroy a wear-leveling entry. 182 * @ubi: UBI device description object 183 * @e: the wear-leveling entry to add 184 * 185 * This function destroys a wear leveling entry and removes 186 * the reference from the lookup table. 187 */ 188 static void wl_entry_destroy(struct ubi_device *ubi, struct ubi_wl_entry *e) 189 { 190 ubi->lookuptbl[e->pnum] = NULL; 191 kmem_cache_free(ubi_wl_entry_slab, e); 192 } 193 194 /** 195 * do_work - do one pending work. 196 * @ubi: UBI device description object 197 * 198 * This function returns zero in case of success and a negative error code in 199 * case of failure. 200 */ 201 static int do_work(struct ubi_device *ubi) 202 { 203 int err; 204 struct ubi_work *wrk; 205 206 cond_resched(); 207 208 /* 209 * @ubi->work_sem is used to synchronize with the workers. Workers take 210 * it in read mode, so many of them may be doing works at a time. But 211 * the queue flush code has to be sure the whole queue of works is 212 * done, and it takes the mutex in write mode. 213 */ 214 down_read(&ubi->work_sem); 215 spin_lock(&ubi->wl_lock); 216 if (list_empty(&ubi->works)) { 217 spin_unlock(&ubi->wl_lock); 218 up_read(&ubi->work_sem); 219 return 0; 220 } 221 222 wrk = list_entry(ubi->works.next, struct ubi_work, list); 223 list_del(&wrk->list); 224 ubi->works_count -= 1; 225 ubi_assert(ubi->works_count >= 0); 226 spin_unlock(&ubi->wl_lock); 227 228 /* 229 * Call the worker function. Do not touch the work structure 230 * after this call as it will have been freed or reused by that 231 * time by the worker function. 232 */ 233 err = wrk->func(ubi, wrk, 0); 234 if (err) 235 ubi_err(ubi, "work failed with error code %d", err); 236 up_read(&ubi->work_sem); 237 238 return err; 239 } 240 241 /** 242 * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree. 243 * @e: the wear-leveling entry to check 244 * @root: the root of the tree 245 * 246 * This function returns non-zero if @e is in the @root RB-tree and zero if it 247 * is not. 248 */ 249 static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) 250 { 251 struct rb_node *p; 252 253 p = root->rb_node; 254 while (p) { 255 struct ubi_wl_entry *e1; 256 257 e1 = rb_entry(p, struct ubi_wl_entry, u.rb); 258 259 if (e->pnum == e1->pnum) { 260 ubi_assert(e == e1); 261 return 1; 262 } 263 264 if (e->ec < e1->ec) 265 p = p->rb_left; 266 else if (e->ec > e1->ec) 267 p = p->rb_right; 268 else { 269 ubi_assert(e->pnum != e1->pnum); 270 if (e->pnum < e1->pnum) 271 p = p->rb_left; 272 else 273 p = p->rb_right; 274 } 275 } 276 277 return 0; 278 } 279 280 /** 281 * prot_queue_add - add physical eraseblock to the protection queue. 282 * @ubi: UBI device description object 283 * @e: the physical eraseblock to add 284 * 285 * This function adds @e to the tail of the protection queue @ubi->pq, where 286 * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be 287 * temporarily protected from the wear-leveling worker. Note, @wl->lock has to 288 * be locked. 289 */ 290 static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) 291 { 292 int pq_tail = ubi->pq_head - 1; 293 294 if (pq_tail < 0) 295 pq_tail = UBI_PROT_QUEUE_LEN - 1; 296 ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN); 297 list_add_tail(&e->u.list, &ubi->pq[pq_tail]); 298 dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec); 299 } 300 301 /** 302 * find_wl_entry - find wear-leveling entry closest to certain erase counter. 303 * @ubi: UBI device description object 304 * @root: the RB-tree where to look for 305 * @diff: maximum possible difference from the smallest erase counter 306 * 307 * This function looks for a wear leveling entry with erase counter closest to 308 * min + @diff, where min is the smallest erase counter. 309 */ 310 static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi, 311 struct rb_root *root, int diff) 312 { 313 struct rb_node *p; 314 struct ubi_wl_entry *e, *prev_e = NULL; 315 int max; 316 317 e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); 318 max = e->ec + diff; 319 320 p = root->rb_node; 321 while (p) { 322 struct ubi_wl_entry *e1; 323 324 e1 = rb_entry(p, struct ubi_wl_entry, u.rb); 325 if (e1->ec >= max) 326 p = p->rb_left; 327 else { 328 p = p->rb_right; 329 prev_e = e; 330 e = e1; 331 } 332 } 333 334 /* If no fastmap has been written and this WL entry can be used 335 * as anchor PEB, hold it back and return the second best WL entry 336 * such that fastmap can use the anchor PEB later. */ 337 if (prev_e && !ubi->fm_disabled && 338 !ubi->fm && e->pnum < UBI_FM_MAX_START) 339 return prev_e; 340 341 return e; 342 } 343 344 /** 345 * find_mean_wl_entry - find wear-leveling entry with medium erase counter. 346 * @ubi: UBI device description object 347 * @root: the RB-tree where to look for 348 * 349 * This function looks for a wear leveling entry with medium erase counter, 350 * but not greater or equivalent than the lowest erase counter plus 351 * %WL_FREE_MAX_DIFF/2. 352 */ 353 static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi, 354 struct rb_root *root) 355 { 356 struct ubi_wl_entry *e, *first, *last; 357 358 first = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); 359 last = rb_entry(rb_last(root), struct ubi_wl_entry, u.rb); 360 361 if (last->ec - first->ec < WL_FREE_MAX_DIFF) { 362 e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb); 363 364 /* If no fastmap has been written and this WL entry can be used 365 * as anchor PEB, hold it back and return the second best 366 * WL entry such that fastmap can use the anchor PEB later. */ 367 e = may_reserve_for_fm(ubi, e, root); 368 } else 369 e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2); 370 371 return e; 372 } 373 374 /** 375 * wl_get_wle - get a mean wl entry to be used by ubi_wl_get_peb() or 376 * refill_wl_user_pool(). 377 * @ubi: UBI device description object 378 * 379 * This function returns a a wear leveling entry in case of success and 380 * NULL in case of failure. 381 */ 382 static struct ubi_wl_entry *wl_get_wle(struct ubi_device *ubi) 383 { 384 struct ubi_wl_entry *e; 385 386 e = find_mean_wl_entry(ubi, &ubi->free); 387 if (!e) { 388 ubi_err(ubi, "no free eraseblocks"); 389 return NULL; 390 } 391 392 self_check_in_wl_tree(ubi, e, &ubi->free); 393 394 /* 395 * Move the physical eraseblock to the protection queue where it will 396 * be protected from being moved for some time. 397 */ 398 rb_erase(&e->u.rb, &ubi->free); 399 ubi->free_count--; 400 dbg_wl("PEB %d EC %d", e->pnum, e->ec); 401 402 return e; 403 } 404 405 /** 406 * prot_queue_del - remove a physical eraseblock from the protection queue. 407 * @ubi: UBI device description object 408 * @pnum: the physical eraseblock to remove 409 * 410 * This function deletes PEB @pnum from the protection queue and returns zero 411 * in case of success and %-ENODEV if the PEB was not found. 412 */ 413 static int prot_queue_del(struct ubi_device *ubi, int pnum) 414 { 415 struct ubi_wl_entry *e; 416 417 e = ubi->lookuptbl[pnum]; 418 if (!e) 419 return -ENODEV; 420 421 if (self_check_in_pq(ubi, e)) 422 return -ENODEV; 423 424 list_del(&e->u.list); 425 dbg_wl("deleted PEB %d from the protection queue", e->pnum); 426 return 0; 427 } 428 429 /** 430 * sync_erase - synchronously erase a physical eraseblock. 431 * @ubi: UBI device description object 432 * @e: the the physical eraseblock to erase 433 * @torture: if the physical eraseblock has to be tortured 434 * 435 * This function returns zero in case of success and a negative error code in 436 * case of failure. 437 */ 438 static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, 439 int torture) 440 { 441 int err; 442 struct ubi_ec_hdr *ec_hdr; 443 unsigned long long ec = e->ec; 444 445 dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec); 446 447 err = self_check_ec(ubi, e->pnum, e->ec); 448 if (err) 449 return -EINVAL; 450 451 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); 452 if (!ec_hdr) 453 return -ENOMEM; 454 455 err = ubi_io_sync_erase(ubi, e->pnum, torture); 456 if (err < 0) 457 goto out_free; 458 459 ec += err; 460 if (ec > UBI_MAX_ERASECOUNTER) { 461 /* 462 * Erase counter overflow. Upgrade UBI and use 64-bit 463 * erase counters internally. 464 */ 465 ubi_err(ubi, "erase counter overflow at PEB %d, EC %llu", 466 e->pnum, ec); 467 err = -EINVAL; 468 goto out_free; 469 } 470 471 dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec); 472 473 ec_hdr->ec = cpu_to_be64(ec); 474 475 err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); 476 if (err) 477 goto out_free; 478 479 e->ec = ec; 480 spin_lock(&ubi->wl_lock); 481 if (e->ec > ubi->max_ec) 482 ubi->max_ec = e->ec; 483 spin_unlock(&ubi->wl_lock); 484 485 out_free: 486 kfree(ec_hdr); 487 return err; 488 } 489 490 /** 491 * serve_prot_queue - check if it is time to stop protecting PEBs. 492 * @ubi: UBI device description object 493 * 494 * This function is called after each erase operation and removes PEBs from the 495 * tail of the protection queue. These PEBs have been protected for long enough 496 * and should be moved to the used tree. 497 */ 498 static void serve_prot_queue(struct ubi_device *ubi) 499 { 500 struct ubi_wl_entry *e, *tmp; 501 int count; 502 503 /* 504 * There may be several protected physical eraseblock to remove, 505 * process them all. 506 */ 507 repeat: 508 count = 0; 509 spin_lock(&ubi->wl_lock); 510 list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) { 511 dbg_wl("PEB %d EC %d protection over, move to used tree", 512 e->pnum, e->ec); 513 514 list_del(&e->u.list); 515 wl_tree_add(e, &ubi->used); 516 if (count++ > 32) { 517 /* 518 * Let's be nice and avoid holding the spinlock for 519 * too long. 520 */ 521 spin_unlock(&ubi->wl_lock); 522 cond_resched(); 523 goto repeat; 524 } 525 } 526 527 ubi->pq_head += 1; 528 if (ubi->pq_head == UBI_PROT_QUEUE_LEN) 529 ubi->pq_head = 0; 530 ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN); 531 spin_unlock(&ubi->wl_lock); 532 } 533 534 /** 535 * __schedule_ubi_work - schedule a work. 536 * @ubi: UBI device description object 537 * @wrk: the work to schedule 538 * 539 * This function adds a work defined by @wrk to the tail of the pending works 540 * list. Can only be used if ubi->work_sem is already held in read mode! 541 */ 542 static void __schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) 543 { 544 spin_lock(&ubi->wl_lock); 545 list_add_tail(&wrk->list, &ubi->works); 546 ubi_assert(ubi->works_count >= 0); 547 ubi->works_count += 1; 548 if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi)) 549 wake_up_process(ubi->bgt_thread); 550 spin_unlock(&ubi->wl_lock); 551 } 552 553 /** 554 * schedule_ubi_work - schedule a work. 555 * @ubi: UBI device description object 556 * @wrk: the work to schedule 557 * 558 * This function adds a work defined by @wrk to the tail of the pending works 559 * list. 560 */ 561 static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) 562 { 563 down_read(&ubi->work_sem); 564 __schedule_ubi_work(ubi, wrk); 565 up_read(&ubi->work_sem); 566 } 567 568 static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, 569 int shutdown); 570 571 /** 572 * schedule_erase - schedule an erase work. 573 * @ubi: UBI device description object 574 * @e: the WL entry of the physical eraseblock to erase 575 * @vol_id: the volume ID that last used this PEB 576 * @lnum: the last used logical eraseblock number for the PEB 577 * @torture: if the physical eraseblock has to be tortured 578 * 579 * This function returns zero in case of success and a %-ENOMEM in case of 580 * failure. 581 */ 582 static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, 583 int vol_id, int lnum, int torture) 584 { 585 struct ubi_work *wl_wrk; 586 587 ubi_assert(e); 588 589 dbg_wl("schedule erasure of PEB %d, EC %d, torture %d", 590 e->pnum, e->ec, torture); 591 592 wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); 593 if (!wl_wrk) 594 return -ENOMEM; 595 596 wl_wrk->func = &erase_worker; 597 wl_wrk->e = e; 598 wl_wrk->vol_id = vol_id; 599 wl_wrk->lnum = lnum; 600 wl_wrk->torture = torture; 601 602 schedule_ubi_work(ubi, wl_wrk); 603 return 0; 604 } 605 606 static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk); 607 /** 608 * do_sync_erase - run the erase worker synchronously. 609 * @ubi: UBI device description object 610 * @e: the WL entry of the physical eraseblock to erase 611 * @vol_id: the volume ID that last used this PEB 612 * @lnum: the last used logical eraseblock number for the PEB 613 * @torture: if the physical eraseblock has to be tortured 614 * 615 */ 616 static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, 617 int vol_id, int lnum, int torture) 618 { 619 struct ubi_work wl_wrk; 620 621 dbg_wl("sync erase of PEB %i", e->pnum); 622 623 wl_wrk.e = e; 624 wl_wrk.vol_id = vol_id; 625 wl_wrk.lnum = lnum; 626 wl_wrk.torture = torture; 627 628 return __erase_worker(ubi, &wl_wrk); 629 } 630 631 static int ensure_wear_leveling(struct ubi_device *ubi, int nested); 632 /** 633 * wear_leveling_worker - wear-leveling worker function. 634 * @ubi: UBI device description object 635 * @wrk: the work object 636 * @shutdown: non-zero if the worker has to free memory and exit 637 * because the WL-subsystem is shutting down 638 * 639 * This function copies a more worn out physical eraseblock to a less worn out 640 * one. Returns zero in case of success and a negative error code in case of 641 * failure. 642 */ 643 static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, 644 int shutdown) 645 { 646 int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0; 647 int vol_id = -1, lnum = -1; 648 #ifdef CONFIG_MTD_UBI_FASTMAP 649 int anchor = wrk->anchor; 650 #endif 651 struct ubi_wl_entry *e1, *e2; 652 struct ubi_vid_hdr *vid_hdr; 653 int dst_leb_clean = 0; 654 655 kfree(wrk); 656 if (shutdown) 657 return 0; 658 659 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); 660 if (!vid_hdr) 661 return -ENOMEM; 662 663 mutex_lock(&ubi->move_mutex); 664 spin_lock(&ubi->wl_lock); 665 ubi_assert(!ubi->move_from && !ubi->move_to); 666 ubi_assert(!ubi->move_to_put); 667 668 if (!ubi->free.rb_node || 669 (!ubi->used.rb_node && !ubi->scrub.rb_node)) { 670 /* 671 * No free physical eraseblocks? Well, they must be waiting in 672 * the queue to be erased. Cancel movement - it will be 673 * triggered again when a free physical eraseblock appears. 674 * 675 * No used physical eraseblocks? They must be temporarily 676 * protected from being moved. They will be moved to the 677 * @ubi->used tree later and the wear-leveling will be 678 * triggered again. 679 */ 680 dbg_wl("cancel WL, a list is empty: free %d, used %d", 681 !ubi->free.rb_node, !ubi->used.rb_node); 682 goto out_cancel; 683 } 684 685 #ifdef CONFIG_MTD_UBI_FASTMAP 686 /* Check whether we need to produce an anchor PEB */ 687 if (!anchor) 688 anchor = !anchor_pebs_avalible(&ubi->free); 689 690 if (anchor) { 691 e1 = find_anchor_wl_entry(&ubi->used); 692 if (!e1) 693 goto out_cancel; 694 e2 = get_peb_for_wl(ubi); 695 if (!e2) 696 goto out_cancel; 697 698 self_check_in_wl_tree(ubi, e1, &ubi->used); 699 rb_erase(&e1->u.rb, &ubi->used); 700 dbg_wl("anchor-move PEB %d to PEB %d", e1->pnum, e2->pnum); 701 } else if (!ubi->scrub.rb_node) { 702 #else 703 if (!ubi->scrub.rb_node) { 704 #endif 705 /* 706 * Now pick the least worn-out used physical eraseblock and a 707 * highly worn-out free physical eraseblock. If the erase 708 * counters differ much enough, start wear-leveling. 709 */ 710 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); 711 e2 = get_peb_for_wl(ubi); 712 if (!e2) 713 goto out_cancel; 714 715 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { 716 dbg_wl("no WL needed: min used EC %d, max free EC %d", 717 e1->ec, e2->ec); 718 719 /* Give the unused PEB back */ 720 wl_tree_add(e2, &ubi->free); 721 ubi->free_count++; 722 goto out_cancel; 723 } 724 self_check_in_wl_tree(ubi, e1, &ubi->used); 725 rb_erase(&e1->u.rb, &ubi->used); 726 dbg_wl("move PEB %d EC %d to PEB %d EC %d", 727 e1->pnum, e1->ec, e2->pnum, e2->ec); 728 } else { 729 /* Perform scrubbing */ 730 scrubbing = 1; 731 e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); 732 e2 = get_peb_for_wl(ubi); 733 if (!e2) 734 goto out_cancel; 735 736 self_check_in_wl_tree(ubi, e1, &ubi->scrub); 737 rb_erase(&e1->u.rb, &ubi->scrub); 738 dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); 739 } 740 741 ubi->move_from = e1; 742 ubi->move_to = e2; 743 spin_unlock(&ubi->wl_lock); 744 745 /* 746 * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum. 747 * We so far do not know which logical eraseblock our physical 748 * eraseblock (@e1) belongs to. We have to read the volume identifier 749 * header first. 750 * 751 * Note, we are protected from this PEB being unmapped and erased. The 752 * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB 753 * which is being moved was unmapped. 754 */ 755 756 err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); 757 if (err && err != UBI_IO_BITFLIPS) { 758 dst_leb_clean = 1; 759 if (err == UBI_IO_FF) { 760 /* 761 * We are trying to move PEB without a VID header. UBI 762 * always write VID headers shortly after the PEB was 763 * given, so we have a situation when it has not yet 764 * had a chance to write it, because it was preempted. 765 * So add this PEB to the protection queue so far, 766 * because presumably more data will be written there 767 * (including the missing VID header), and then we'll 768 * move it. 769 */ 770 dbg_wl("PEB %d has no VID header", e1->pnum); 771 protect = 1; 772 goto out_not_moved; 773 } else if (err == UBI_IO_FF_BITFLIPS) { 774 /* 775 * The same situation as %UBI_IO_FF, but bit-flips were 776 * detected. It is better to schedule this PEB for 777 * scrubbing. 778 */ 779 dbg_wl("PEB %d has no VID header but has bit-flips", 780 e1->pnum); 781 scrubbing = 1; 782 goto out_not_moved; 783 } 784 785 ubi_err(ubi, "error %d while reading VID header from PEB %d", 786 err, e1->pnum); 787 goto out_error; 788 } 789 790 vol_id = be32_to_cpu(vid_hdr->vol_id); 791 lnum = be32_to_cpu(vid_hdr->lnum); 792 793 err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); 794 if (err) { 795 if (err == MOVE_CANCEL_RACE) { 796 /* 797 * The LEB has not been moved because the volume is 798 * being deleted or the PEB has been put meanwhile. We 799 * should prevent this PEB from being selected for 800 * wear-leveling movement again, so put it to the 801 * protection queue. 802 */ 803 protect = 1; 804 dst_leb_clean = 1; 805 goto out_not_moved; 806 } 807 if (err == MOVE_RETRY) { 808 scrubbing = 1; 809 dst_leb_clean = 1; 810 goto out_not_moved; 811 } 812 if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR || 813 err == MOVE_TARGET_RD_ERR) { 814 /* 815 * Target PEB had bit-flips or write error - torture it. 816 */ 817 torture = 1; 818 goto out_not_moved; 819 } 820 821 if (err == MOVE_SOURCE_RD_ERR) { 822 /* 823 * An error happened while reading the source PEB. Do 824 * not switch to R/O mode in this case, and give the 825 * upper layers a possibility to recover from this, 826 * e.g. by unmapping corresponding LEB. Instead, just 827 * put this PEB to the @ubi->erroneous list to prevent 828 * UBI from trying to move it over and over again. 829 */ 830 if (ubi->erroneous_peb_count > ubi->max_erroneous) { 831 ubi_err(ubi, "too many erroneous eraseblocks (%d)", 832 ubi->erroneous_peb_count); 833 goto out_error; 834 } 835 dst_leb_clean = 1; 836 erroneous = 1; 837 goto out_not_moved; 838 } 839 840 if (err < 0) 841 goto out_error; 842 843 ubi_assert(0); 844 } 845 846 /* The PEB has been successfully moved */ 847 if (scrubbing) 848 ubi_msg(ubi, "scrubbed PEB %d (LEB %d:%d), data moved to PEB %d", 849 e1->pnum, vol_id, lnum, e2->pnum); 850 ubi_free_vid_hdr(ubi, vid_hdr); 851 852 spin_lock(&ubi->wl_lock); 853 if (!ubi->move_to_put) { 854 wl_tree_add(e2, &ubi->used); 855 e2 = NULL; 856 } 857 ubi->move_from = ubi->move_to = NULL; 858 ubi->move_to_put = ubi->wl_scheduled = 0; 859 spin_unlock(&ubi->wl_lock); 860 861 err = do_sync_erase(ubi, e1, vol_id, lnum, 0); 862 if (err) { 863 if (e2) 864 wl_entry_destroy(ubi, e2); 865 goto out_ro; 866 } 867 868 if (e2) { 869 /* 870 * Well, the target PEB was put meanwhile, schedule it for 871 * erasure. 872 */ 873 dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase", 874 e2->pnum, vol_id, lnum); 875 err = do_sync_erase(ubi, e2, vol_id, lnum, 0); 876 if (err) 877 goto out_ro; 878 } 879 880 dbg_wl("done"); 881 mutex_unlock(&ubi->move_mutex); 882 return 0; 883 884 /* 885 * For some reasons the LEB was not moved, might be an error, might be 886 * something else. @e1 was not changed, so return it back. @e2 might 887 * have been changed, schedule it for erasure. 888 */ 889 out_not_moved: 890 if (vol_id != -1) 891 dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)", 892 e1->pnum, vol_id, lnum, e2->pnum, err); 893 else 894 dbg_wl("cancel moving PEB %d to PEB %d (%d)", 895 e1->pnum, e2->pnum, err); 896 spin_lock(&ubi->wl_lock); 897 if (protect) 898 prot_queue_add(ubi, e1); 899 else if (erroneous) { 900 wl_tree_add(e1, &ubi->erroneous); 901 ubi->erroneous_peb_count += 1; 902 } else if (scrubbing) 903 wl_tree_add(e1, &ubi->scrub); 904 else 905 wl_tree_add(e1, &ubi->used); 906 if (dst_leb_clean) { 907 wl_tree_add(e2, &ubi->free); 908 ubi->free_count++; 909 } 910 911 ubi_assert(!ubi->move_to_put); 912 ubi->move_from = ubi->move_to = NULL; 913 ubi->wl_scheduled = 0; 914 spin_unlock(&ubi->wl_lock); 915 916 ubi_free_vid_hdr(ubi, vid_hdr); 917 if (dst_leb_clean) { 918 ensure_wear_leveling(ubi, 1); 919 } else { 920 err = do_sync_erase(ubi, e2, vol_id, lnum, torture); 921 if (err) 922 goto out_ro; 923 } 924 925 mutex_unlock(&ubi->move_mutex); 926 return 0; 927 928 out_error: 929 if (vol_id != -1) 930 ubi_err(ubi, "error %d while moving PEB %d to PEB %d", 931 err, e1->pnum, e2->pnum); 932 else 933 ubi_err(ubi, "error %d while moving PEB %d (LEB %d:%d) to PEB %d", 934 err, e1->pnum, vol_id, lnum, e2->pnum); 935 spin_lock(&ubi->wl_lock); 936 ubi->move_from = ubi->move_to = NULL; 937 ubi->move_to_put = ubi->wl_scheduled = 0; 938 spin_unlock(&ubi->wl_lock); 939 940 ubi_free_vid_hdr(ubi, vid_hdr); 941 wl_entry_destroy(ubi, e1); 942 wl_entry_destroy(ubi, e2); 943 944 out_ro: 945 ubi_ro_mode(ubi); 946 mutex_unlock(&ubi->move_mutex); 947 ubi_assert(err != 0); 948 return err < 0 ? err : -EIO; 949 950 out_cancel: 951 ubi->wl_scheduled = 0; 952 spin_unlock(&ubi->wl_lock); 953 mutex_unlock(&ubi->move_mutex); 954 ubi_free_vid_hdr(ubi, vid_hdr); 955 return 0; 956 } 957 958 /** 959 * ensure_wear_leveling - schedule wear-leveling if it is needed. 960 * @ubi: UBI device description object 961 * @nested: set to non-zero if this function is called from UBI worker 962 * 963 * This function checks if it is time to start wear-leveling and schedules it 964 * if yes. This function returns zero in case of success and a negative error 965 * code in case of failure. 966 */ 967 static int ensure_wear_leveling(struct ubi_device *ubi, int nested) 968 { 969 int err = 0; 970 struct ubi_wl_entry *e1; 971 struct ubi_wl_entry *e2; 972 struct ubi_work *wrk; 973 974 spin_lock(&ubi->wl_lock); 975 if (ubi->wl_scheduled) 976 /* Wear-leveling is already in the work queue */ 977 goto out_unlock; 978 979 /* 980 * If the ubi->scrub tree is not empty, scrubbing is needed, and the 981 * the WL worker has to be scheduled anyway. 982 */ 983 if (!ubi->scrub.rb_node) { 984 if (!ubi->used.rb_node || !ubi->free.rb_node) 985 /* No physical eraseblocks - no deal */ 986 goto out_unlock; 987 988 /* 989 * We schedule wear-leveling only if the difference between the 990 * lowest erase counter of used physical eraseblocks and a high 991 * erase counter of free physical eraseblocks is greater than 992 * %UBI_WL_THRESHOLD. 993 */ 994 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); 995 e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); 996 997 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) 998 goto out_unlock; 999 dbg_wl("schedule wear-leveling"); 1000 } else 1001 dbg_wl("schedule scrubbing"); 1002 1003 ubi->wl_scheduled = 1; 1004 spin_unlock(&ubi->wl_lock); 1005 1006 wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); 1007 if (!wrk) { 1008 err = -ENOMEM; 1009 goto out_cancel; 1010 } 1011 1012 wrk->anchor = 0; 1013 wrk->func = &wear_leveling_worker; 1014 if (nested) 1015 __schedule_ubi_work(ubi, wrk); 1016 else 1017 schedule_ubi_work(ubi, wrk); 1018 return err; 1019 1020 out_cancel: 1021 spin_lock(&ubi->wl_lock); 1022 ubi->wl_scheduled = 0; 1023 out_unlock: 1024 spin_unlock(&ubi->wl_lock); 1025 return err; 1026 } 1027 1028 /** 1029 * __erase_worker - physical eraseblock erase worker function. 1030 * @ubi: UBI device description object 1031 * @wl_wrk: the work object 1032 * @shutdown: non-zero if the worker has to free memory and exit 1033 * because the WL sub-system is shutting down 1034 * 1035 * This function erases a physical eraseblock and perform torture testing if 1036 * needed. It also takes care about marking the physical eraseblock bad if 1037 * needed. Returns zero in case of success and a negative error code in case of 1038 * failure. 1039 */ 1040 static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk) 1041 { 1042 struct ubi_wl_entry *e = wl_wrk->e; 1043 int pnum = e->pnum; 1044 int vol_id = wl_wrk->vol_id; 1045 int lnum = wl_wrk->lnum; 1046 int err, available_consumed = 0; 1047 1048 dbg_wl("erase PEB %d EC %d LEB %d:%d", 1049 pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum); 1050 1051 err = sync_erase(ubi, e, wl_wrk->torture); 1052 if (!err) { 1053 spin_lock(&ubi->wl_lock); 1054 wl_tree_add(e, &ubi->free); 1055 ubi->free_count++; 1056 spin_unlock(&ubi->wl_lock); 1057 1058 /* 1059 * One more erase operation has happened, take care about 1060 * protected physical eraseblocks. 1061 */ 1062 serve_prot_queue(ubi); 1063 1064 /* And take care about wear-leveling */ 1065 err = ensure_wear_leveling(ubi, 1); 1066 return err; 1067 } 1068 1069 ubi_err(ubi, "failed to erase PEB %d, error %d", pnum, err); 1070 1071 if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || 1072 err == -EBUSY) { 1073 int err1; 1074 1075 /* Re-schedule the LEB for erasure */ 1076 err1 = schedule_erase(ubi, e, vol_id, lnum, 0); 1077 if (err1) { 1078 wl_entry_destroy(ubi, e); 1079 err = err1; 1080 goto out_ro; 1081 } 1082 return err; 1083 } 1084 1085 wl_entry_destroy(ubi, e); 1086 if (err != -EIO) 1087 /* 1088 * If this is not %-EIO, we have no idea what to do. Scheduling 1089 * this physical eraseblock for erasure again would cause 1090 * errors again and again. Well, lets switch to R/O mode. 1091 */ 1092 goto out_ro; 1093 1094 /* It is %-EIO, the PEB went bad */ 1095 1096 if (!ubi->bad_allowed) { 1097 ubi_err(ubi, "bad physical eraseblock %d detected", pnum); 1098 goto out_ro; 1099 } 1100 1101 spin_lock(&ubi->volumes_lock); 1102 if (ubi->beb_rsvd_pebs == 0) { 1103 if (ubi->avail_pebs == 0) { 1104 spin_unlock(&ubi->volumes_lock); 1105 ubi_err(ubi, "no reserved/available physical eraseblocks"); 1106 goto out_ro; 1107 } 1108 ubi->avail_pebs -= 1; 1109 available_consumed = 1; 1110 } 1111 spin_unlock(&ubi->volumes_lock); 1112 1113 ubi_msg(ubi, "mark PEB %d as bad", pnum); 1114 err = ubi_io_mark_bad(ubi, pnum); 1115 if (err) 1116 goto out_ro; 1117 1118 spin_lock(&ubi->volumes_lock); 1119 if (ubi->beb_rsvd_pebs > 0) { 1120 if (available_consumed) { 1121 /* 1122 * The amount of reserved PEBs increased since we last 1123 * checked. 1124 */ 1125 ubi->avail_pebs += 1; 1126 available_consumed = 0; 1127 } 1128 ubi->beb_rsvd_pebs -= 1; 1129 } 1130 ubi->bad_peb_count += 1; 1131 ubi->good_peb_count -= 1; 1132 ubi_calculate_reserved(ubi); 1133 if (available_consumed) 1134 ubi_warn(ubi, "no PEBs in the reserved pool, used an available PEB"); 1135 else if (ubi->beb_rsvd_pebs) 1136 ubi_msg(ubi, "%d PEBs left in the reserve", 1137 ubi->beb_rsvd_pebs); 1138 else 1139 ubi_warn(ubi, "last PEB from the reserve was used"); 1140 spin_unlock(&ubi->volumes_lock); 1141 1142 return err; 1143 1144 out_ro: 1145 if (available_consumed) { 1146 spin_lock(&ubi->volumes_lock); 1147 ubi->avail_pebs += 1; 1148 spin_unlock(&ubi->volumes_lock); 1149 } 1150 ubi_ro_mode(ubi); 1151 return err; 1152 } 1153 1154 static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, 1155 int shutdown) 1156 { 1157 int ret; 1158 1159 if (shutdown) { 1160 struct ubi_wl_entry *e = wl_wrk->e; 1161 1162 dbg_wl("cancel erasure of PEB %d EC %d", e->pnum, e->ec); 1163 kfree(wl_wrk); 1164 wl_entry_destroy(ubi, e); 1165 return 0; 1166 } 1167 1168 ret = __erase_worker(ubi, wl_wrk); 1169 kfree(wl_wrk); 1170 return ret; 1171 } 1172 1173 /** 1174 * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system. 1175 * @ubi: UBI device description object 1176 * @vol_id: the volume ID that last used this PEB 1177 * @lnum: the last used logical eraseblock number for the PEB 1178 * @pnum: physical eraseblock to return 1179 * @torture: if this physical eraseblock has to be tortured 1180 * 1181 * This function is called to return physical eraseblock @pnum to the pool of 1182 * free physical eraseblocks. The @torture flag has to be set if an I/O error 1183 * occurred to this @pnum and it has to be tested. This function returns zero 1184 * in case of success, and a negative error code in case of failure. 1185 */ 1186 int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum, 1187 int pnum, int torture) 1188 { 1189 int err; 1190 struct ubi_wl_entry *e; 1191 1192 dbg_wl("PEB %d", pnum); 1193 ubi_assert(pnum >= 0); 1194 ubi_assert(pnum < ubi->peb_count); 1195 1196 down_read(&ubi->fm_protect); 1197 1198 retry: 1199 spin_lock(&ubi->wl_lock); 1200 e = ubi->lookuptbl[pnum]; 1201 if (e == ubi->move_from) { 1202 /* 1203 * User is putting the physical eraseblock which was selected to 1204 * be moved. It will be scheduled for erasure in the 1205 * wear-leveling worker. 1206 */ 1207 dbg_wl("PEB %d is being moved, wait", pnum); 1208 spin_unlock(&ubi->wl_lock); 1209 1210 /* Wait for the WL worker by taking the @ubi->move_mutex */ 1211 mutex_lock(&ubi->move_mutex); 1212 mutex_unlock(&ubi->move_mutex); 1213 goto retry; 1214 } else if (e == ubi->move_to) { 1215 /* 1216 * User is putting the physical eraseblock which was selected 1217 * as the target the data is moved to. It may happen if the EBA 1218 * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()' 1219 * but the WL sub-system has not put the PEB to the "used" tree 1220 * yet, but it is about to do this. So we just set a flag which 1221 * will tell the WL worker that the PEB is not needed anymore 1222 * and should be scheduled for erasure. 1223 */ 1224 dbg_wl("PEB %d is the target of data moving", pnum); 1225 ubi_assert(!ubi->move_to_put); 1226 ubi->move_to_put = 1; 1227 spin_unlock(&ubi->wl_lock); 1228 up_read(&ubi->fm_protect); 1229 return 0; 1230 } else { 1231 if (in_wl_tree(e, &ubi->used)) { 1232 self_check_in_wl_tree(ubi, e, &ubi->used); 1233 rb_erase(&e->u.rb, &ubi->used); 1234 } else if (in_wl_tree(e, &ubi->scrub)) { 1235 self_check_in_wl_tree(ubi, e, &ubi->scrub); 1236 rb_erase(&e->u.rb, &ubi->scrub); 1237 } else if (in_wl_tree(e, &ubi->erroneous)) { 1238 self_check_in_wl_tree(ubi, e, &ubi->erroneous); 1239 rb_erase(&e->u.rb, &ubi->erroneous); 1240 ubi->erroneous_peb_count -= 1; 1241 ubi_assert(ubi->erroneous_peb_count >= 0); 1242 /* Erroneous PEBs should be tortured */ 1243 torture = 1; 1244 } else { 1245 err = prot_queue_del(ubi, e->pnum); 1246 if (err) { 1247 ubi_err(ubi, "PEB %d not found", pnum); 1248 ubi_ro_mode(ubi); 1249 spin_unlock(&ubi->wl_lock); 1250 up_read(&ubi->fm_protect); 1251 return err; 1252 } 1253 } 1254 } 1255 spin_unlock(&ubi->wl_lock); 1256 1257 err = schedule_erase(ubi, e, vol_id, lnum, torture); 1258 if (err) { 1259 spin_lock(&ubi->wl_lock); 1260 wl_tree_add(e, &ubi->used); 1261 spin_unlock(&ubi->wl_lock); 1262 } 1263 1264 up_read(&ubi->fm_protect); 1265 return err; 1266 } 1267 1268 /** 1269 * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing. 1270 * @ubi: UBI device description object 1271 * @pnum: the physical eraseblock to schedule 1272 * 1273 * If a bit-flip in a physical eraseblock is detected, this physical eraseblock 1274 * needs scrubbing. This function schedules a physical eraseblock for 1275 * scrubbing which is done in background. This function returns zero in case of 1276 * success and a negative error code in case of failure. 1277 */ 1278 int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) 1279 { 1280 struct ubi_wl_entry *e; 1281 1282 ubi_msg(ubi, "schedule PEB %d for scrubbing", pnum); 1283 1284 retry: 1285 spin_lock(&ubi->wl_lock); 1286 e = ubi->lookuptbl[pnum]; 1287 if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) || 1288 in_wl_tree(e, &ubi->erroneous)) { 1289 spin_unlock(&ubi->wl_lock); 1290 return 0; 1291 } 1292 1293 if (e == ubi->move_to) { 1294 /* 1295 * This physical eraseblock was used to move data to. The data 1296 * was moved but the PEB was not yet inserted to the proper 1297 * tree. We should just wait a little and let the WL worker 1298 * proceed. 1299 */ 1300 spin_unlock(&ubi->wl_lock); 1301 dbg_wl("the PEB %d is not in proper tree, retry", pnum); 1302 yield(); 1303 goto retry; 1304 } 1305 1306 if (in_wl_tree(e, &ubi->used)) { 1307 self_check_in_wl_tree(ubi, e, &ubi->used); 1308 rb_erase(&e->u.rb, &ubi->used); 1309 } else { 1310 int err; 1311 1312 err = prot_queue_del(ubi, e->pnum); 1313 if (err) { 1314 ubi_err(ubi, "PEB %d not found", pnum); 1315 ubi_ro_mode(ubi); 1316 spin_unlock(&ubi->wl_lock); 1317 return err; 1318 } 1319 } 1320 1321 wl_tree_add(e, &ubi->scrub); 1322 spin_unlock(&ubi->wl_lock); 1323 1324 /* 1325 * Technically scrubbing is the same as wear-leveling, so it is done 1326 * by the WL worker. 1327 */ 1328 return ensure_wear_leveling(ubi, 0); 1329 } 1330 1331 /** 1332 * ubi_wl_flush - flush all pending works. 1333 * @ubi: UBI device description object 1334 * @vol_id: the volume id to flush for 1335 * @lnum: the logical eraseblock number to flush for 1336 * 1337 * This function executes all pending works for a particular volume id / 1338 * logical eraseblock number pair. If either value is set to %UBI_ALL, then it 1339 * acts as a wildcard for all of the corresponding volume numbers or logical 1340 * eraseblock numbers. It returns zero in case of success and a negative error 1341 * code in case of failure. 1342 */ 1343 int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum) 1344 { 1345 int err = 0; 1346 int found = 1; 1347 1348 /* 1349 * Erase while the pending works queue is not empty, but not more than 1350 * the number of currently pending works. 1351 */ 1352 dbg_wl("flush pending work for LEB %d:%d (%d pending works)", 1353 vol_id, lnum, ubi->works_count); 1354 1355 while (found) { 1356 struct ubi_work *wrk, *tmp; 1357 found = 0; 1358 1359 down_read(&ubi->work_sem); 1360 spin_lock(&ubi->wl_lock); 1361 list_for_each_entry_safe(wrk, tmp, &ubi->works, list) { 1362 if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) && 1363 (lnum == UBI_ALL || wrk->lnum == lnum)) { 1364 list_del(&wrk->list); 1365 ubi->works_count -= 1; 1366 ubi_assert(ubi->works_count >= 0); 1367 spin_unlock(&ubi->wl_lock); 1368 1369 err = wrk->func(ubi, wrk, 0); 1370 if (err) { 1371 up_read(&ubi->work_sem); 1372 return err; 1373 } 1374 1375 spin_lock(&ubi->wl_lock); 1376 found = 1; 1377 break; 1378 } 1379 } 1380 spin_unlock(&ubi->wl_lock); 1381 up_read(&ubi->work_sem); 1382 } 1383 1384 /* 1385 * Make sure all the works which have been done in parallel are 1386 * finished. 1387 */ 1388 down_write(&ubi->work_sem); 1389 up_write(&ubi->work_sem); 1390 1391 return err; 1392 } 1393 1394 /** 1395 * tree_destroy - destroy an RB-tree. 1396 * @ubi: UBI device description object 1397 * @root: the root of the tree to destroy 1398 */ 1399 static void tree_destroy(struct ubi_device *ubi, struct rb_root *root) 1400 { 1401 struct rb_node *rb; 1402 struct ubi_wl_entry *e; 1403 1404 rb = root->rb_node; 1405 while (rb) { 1406 if (rb->rb_left) 1407 rb = rb->rb_left; 1408 else if (rb->rb_right) 1409 rb = rb->rb_right; 1410 else { 1411 e = rb_entry(rb, struct ubi_wl_entry, u.rb); 1412 1413 rb = rb_parent(rb); 1414 if (rb) { 1415 if (rb->rb_left == &e->u.rb) 1416 rb->rb_left = NULL; 1417 else 1418 rb->rb_right = NULL; 1419 } 1420 1421 wl_entry_destroy(ubi, e); 1422 } 1423 } 1424 } 1425 1426 /** 1427 * ubi_thread - UBI background thread. 1428 * @u: the UBI device description object pointer 1429 */ 1430 int ubi_thread(void *u) 1431 { 1432 int failures = 0; 1433 struct ubi_device *ubi = u; 1434 1435 ubi_msg(ubi, "background thread \"%s\" started, PID %d", 1436 ubi->bgt_name, task_pid_nr(current)); 1437 1438 set_freezable(); 1439 for (;;) { 1440 int err; 1441 1442 if (kthread_should_stop()) 1443 break; 1444 1445 if (try_to_freeze()) 1446 continue; 1447 1448 spin_lock(&ubi->wl_lock); 1449 if (list_empty(&ubi->works) || ubi->ro_mode || 1450 !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { 1451 set_current_state(TASK_INTERRUPTIBLE); 1452 spin_unlock(&ubi->wl_lock); 1453 schedule(); 1454 continue; 1455 } 1456 spin_unlock(&ubi->wl_lock); 1457 1458 err = do_work(ubi); 1459 if (err) { 1460 ubi_err(ubi, "%s: work failed with error code %d", 1461 ubi->bgt_name, err); 1462 if (failures++ > WL_MAX_FAILURES) { 1463 /* 1464 * Too many failures, disable the thread and 1465 * switch to read-only mode. 1466 */ 1467 ubi_msg(ubi, "%s: %d consecutive failures", 1468 ubi->bgt_name, WL_MAX_FAILURES); 1469 ubi_ro_mode(ubi); 1470 ubi->thread_enabled = 0; 1471 continue; 1472 } 1473 } else 1474 failures = 0; 1475 1476 cond_resched(); 1477 } 1478 1479 dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); 1480 return 0; 1481 } 1482 1483 /** 1484 * shutdown_work - shutdown all pending works. 1485 * @ubi: UBI device description object 1486 */ 1487 static void shutdown_work(struct ubi_device *ubi) 1488 { 1489 #ifdef CONFIG_MTD_UBI_FASTMAP 1490 flush_work(&ubi->fm_work); 1491 #endif 1492 while (!list_empty(&ubi->works)) { 1493 struct ubi_work *wrk; 1494 1495 wrk = list_entry(ubi->works.next, struct ubi_work, list); 1496 list_del(&wrk->list); 1497 wrk->func(ubi, wrk, 1); 1498 ubi->works_count -= 1; 1499 ubi_assert(ubi->works_count >= 0); 1500 } 1501 } 1502 1503 /** 1504 * ubi_wl_init - initialize the WL sub-system using attaching information. 1505 * @ubi: UBI device description object 1506 * @ai: attaching information 1507 * 1508 * This function returns zero in case of success, and a negative error code in 1509 * case of failure. 1510 */ 1511 int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) 1512 { 1513 int err, i, reserved_pebs, found_pebs = 0; 1514 struct rb_node *rb1, *rb2; 1515 struct ubi_ainf_volume *av; 1516 struct ubi_ainf_peb *aeb, *tmp; 1517 struct ubi_wl_entry *e; 1518 1519 ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT; 1520 spin_lock_init(&ubi->wl_lock); 1521 mutex_init(&ubi->move_mutex); 1522 init_rwsem(&ubi->work_sem); 1523 ubi->max_ec = ai->max_ec; 1524 INIT_LIST_HEAD(&ubi->works); 1525 1526 sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); 1527 1528 err = -ENOMEM; 1529 ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL); 1530 if (!ubi->lookuptbl) 1531 return err; 1532 1533 for (i = 0; i < UBI_PROT_QUEUE_LEN; i++) 1534 INIT_LIST_HEAD(&ubi->pq[i]); 1535 ubi->pq_head = 0; 1536 1537 ubi->free_count = 0; 1538 list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { 1539 cond_resched(); 1540 1541 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1542 if (!e) 1543 goto out_free; 1544 1545 e->pnum = aeb->pnum; 1546 e->ec = aeb->ec; 1547 ubi->lookuptbl[e->pnum] = e; 1548 if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { 1549 wl_entry_destroy(ubi, e); 1550 goto out_free; 1551 } 1552 1553 found_pebs++; 1554 } 1555 1556 list_for_each_entry(aeb, &ai->free, u.list) { 1557 cond_resched(); 1558 1559 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1560 if (!e) 1561 goto out_free; 1562 1563 e->pnum = aeb->pnum; 1564 e->ec = aeb->ec; 1565 ubi_assert(e->ec >= 0); 1566 1567 wl_tree_add(e, &ubi->free); 1568 ubi->free_count++; 1569 1570 ubi->lookuptbl[e->pnum] = e; 1571 1572 found_pebs++; 1573 } 1574 1575 ubi_rb_for_each_entry(rb1, av, &ai->volumes, rb) { 1576 ubi_rb_for_each_entry(rb2, aeb, &av->root, u.rb) { 1577 cond_resched(); 1578 1579 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1580 if (!e) 1581 goto out_free; 1582 1583 e->pnum = aeb->pnum; 1584 e->ec = aeb->ec; 1585 ubi->lookuptbl[e->pnum] = e; 1586 1587 if (!aeb->scrub) { 1588 dbg_wl("add PEB %d EC %d to the used tree", 1589 e->pnum, e->ec); 1590 wl_tree_add(e, &ubi->used); 1591 } else { 1592 dbg_wl("add PEB %d EC %d to the scrub tree", 1593 e->pnum, e->ec); 1594 wl_tree_add(e, &ubi->scrub); 1595 } 1596 1597 found_pebs++; 1598 } 1599 } 1600 1601 list_for_each_entry(aeb, &ai->fastmap, u.list) { 1602 cond_resched(); 1603 1604 e = ubi_find_fm_block(ubi, aeb->pnum); 1605 1606 if (e) { 1607 ubi_assert(!ubi->lookuptbl[e->pnum]); 1608 ubi->lookuptbl[e->pnum] = e; 1609 } else { 1610 /* 1611 * Usually old Fastmap PEBs are scheduled for erasure 1612 * and we don't have to care about them but if we face 1613 * an power cut before scheduling them we need to 1614 * take care of them here. 1615 */ 1616 if (ubi->lookuptbl[aeb->pnum]) 1617 continue; 1618 1619 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1620 if (!e) 1621 goto out_free; 1622 1623 e->pnum = aeb->pnum; 1624 e->ec = aeb->ec; 1625 ubi_assert(!ubi->lookuptbl[e->pnum]); 1626 ubi->lookuptbl[e->pnum] = e; 1627 if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0)) { 1628 wl_entry_destroy(ubi, e); 1629 goto out_free; 1630 } 1631 } 1632 1633 found_pebs++; 1634 } 1635 1636 dbg_wl("found %i PEBs", found_pebs); 1637 1638 ubi_assert(ubi->good_peb_count == found_pebs); 1639 1640 reserved_pebs = WL_RESERVED_PEBS; 1641 ubi_fastmap_init(ubi, &reserved_pebs); 1642 1643 if (ubi->avail_pebs < reserved_pebs) { 1644 ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)", 1645 ubi->avail_pebs, reserved_pebs); 1646 if (ubi->corr_peb_count) 1647 ubi_err(ubi, "%d PEBs are corrupted and not used", 1648 ubi->corr_peb_count); 1649 err = -ENOSPC; 1650 goto out_free; 1651 } 1652 ubi->avail_pebs -= reserved_pebs; 1653 ubi->rsvd_pebs += reserved_pebs; 1654 1655 /* Schedule wear-leveling if needed */ 1656 err = ensure_wear_leveling(ubi, 0); 1657 if (err) 1658 goto out_free; 1659 1660 return 0; 1661 1662 out_free: 1663 shutdown_work(ubi); 1664 tree_destroy(ubi, &ubi->used); 1665 tree_destroy(ubi, &ubi->free); 1666 tree_destroy(ubi, &ubi->scrub); 1667 kfree(ubi->lookuptbl); 1668 return err; 1669 } 1670 1671 /** 1672 * protection_queue_destroy - destroy the protection queue. 1673 * @ubi: UBI device description object 1674 */ 1675 static void protection_queue_destroy(struct ubi_device *ubi) 1676 { 1677 int i; 1678 struct ubi_wl_entry *e, *tmp; 1679 1680 for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) { 1681 list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) { 1682 list_del(&e->u.list); 1683 wl_entry_destroy(ubi, e); 1684 } 1685 } 1686 } 1687 1688 /** 1689 * ubi_wl_close - close the wear-leveling sub-system. 1690 * @ubi: UBI device description object 1691 */ 1692 void ubi_wl_close(struct ubi_device *ubi) 1693 { 1694 dbg_wl("close the WL sub-system"); 1695 ubi_fastmap_close(ubi); 1696 shutdown_work(ubi); 1697 protection_queue_destroy(ubi); 1698 tree_destroy(ubi, &ubi->used); 1699 tree_destroy(ubi, &ubi->erroneous); 1700 tree_destroy(ubi, &ubi->free); 1701 tree_destroy(ubi, &ubi->scrub); 1702 kfree(ubi->lookuptbl); 1703 } 1704 1705 /** 1706 * self_check_ec - make sure that the erase counter of a PEB is correct. 1707 * @ubi: UBI device description object 1708 * @pnum: the physical eraseblock number to check 1709 * @ec: the erase counter to check 1710 * 1711 * This function returns zero if the erase counter of physical eraseblock @pnum 1712 * is equivalent to @ec, and a negative error code if not or if an error 1713 * occurred. 1714 */ 1715 static int self_check_ec(struct ubi_device *ubi, int pnum, int ec) 1716 { 1717 int err; 1718 long long read_ec; 1719 struct ubi_ec_hdr *ec_hdr; 1720 1721 if (!ubi_dbg_chk_gen(ubi)) 1722 return 0; 1723 1724 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); 1725 if (!ec_hdr) 1726 return -ENOMEM; 1727 1728 err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); 1729 if (err && err != UBI_IO_BITFLIPS) { 1730 /* The header does not have to exist */ 1731 err = 0; 1732 goto out_free; 1733 } 1734 1735 read_ec = be64_to_cpu(ec_hdr->ec); 1736 if (ec != read_ec && read_ec - ec > 1) { 1737 ubi_err(ubi, "self-check failed for PEB %d", pnum); 1738 ubi_err(ubi, "read EC is %lld, should be %d", read_ec, ec); 1739 dump_stack(); 1740 err = 1; 1741 } else 1742 err = 0; 1743 1744 out_free: 1745 kfree(ec_hdr); 1746 return err; 1747 } 1748 1749 /** 1750 * self_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. 1751 * @ubi: UBI device description object 1752 * @e: the wear-leveling entry to check 1753 * @root: the root of the tree 1754 * 1755 * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it 1756 * is not. 1757 */ 1758 static int self_check_in_wl_tree(const struct ubi_device *ubi, 1759 struct ubi_wl_entry *e, struct rb_root *root) 1760 { 1761 if (!ubi_dbg_chk_gen(ubi)) 1762 return 0; 1763 1764 if (in_wl_tree(e, root)) 1765 return 0; 1766 1767 ubi_err(ubi, "self-check failed for PEB %d, EC %d, RB-tree %p ", 1768 e->pnum, e->ec, root); 1769 dump_stack(); 1770 return -EINVAL; 1771 } 1772 1773 /** 1774 * self_check_in_pq - check if wear-leveling entry is in the protection 1775 * queue. 1776 * @ubi: UBI device description object 1777 * @e: the wear-leveling entry to check 1778 * 1779 * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not. 1780 */ 1781 static int self_check_in_pq(const struct ubi_device *ubi, 1782 struct ubi_wl_entry *e) 1783 { 1784 struct ubi_wl_entry *p; 1785 int i; 1786 1787 if (!ubi_dbg_chk_gen(ubi)) 1788 return 0; 1789 1790 for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) 1791 list_for_each_entry(p, &ubi->pq[i], u.list) 1792 if (p == e) 1793 return 0; 1794 1795 ubi_err(ubi, "self-check failed for PEB %d, EC %d, Protect queue", 1796 e->pnum, e->ec); 1797 dump_stack(); 1798 return -EINVAL; 1799 } 1800 #ifndef CONFIG_MTD_UBI_FASTMAP 1801 static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) 1802 { 1803 struct ubi_wl_entry *e; 1804 1805 e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); 1806 self_check_in_wl_tree(ubi, e, &ubi->free); 1807 ubi->free_count--; 1808 ubi_assert(ubi->free_count >= 0); 1809 rb_erase(&e->u.rb, &ubi->free); 1810 1811 return e; 1812 } 1813 1814 /** 1815 * produce_free_peb - produce a free physical eraseblock. 1816 * @ubi: UBI device description object 1817 * 1818 * This function tries to make a free PEB by means of synchronous execution of 1819 * pending works. This may be needed if, for example the background thread is 1820 * disabled. Returns zero in case of success and a negative error code in case 1821 * of failure. 1822 */ 1823 static int produce_free_peb(struct ubi_device *ubi) 1824 { 1825 int err; 1826 1827 while (!ubi->free.rb_node && ubi->works_count) { 1828 spin_unlock(&ubi->wl_lock); 1829 1830 dbg_wl("do one work synchronously"); 1831 err = do_work(ubi); 1832 1833 spin_lock(&ubi->wl_lock); 1834 if (err) 1835 return err; 1836 } 1837 1838 return 0; 1839 } 1840 1841 /** 1842 * ubi_wl_get_peb - get a physical eraseblock. 1843 * @ubi: UBI device description object 1844 * 1845 * This function returns a physical eraseblock in case of success and a 1846 * negative error code in case of failure. 1847 * Returns with ubi->fm_eba_sem held in read mode! 1848 */ 1849 int ubi_wl_get_peb(struct ubi_device *ubi) 1850 { 1851 int err; 1852 struct ubi_wl_entry *e; 1853 1854 retry: 1855 down_read(&ubi->fm_eba_sem); 1856 spin_lock(&ubi->wl_lock); 1857 if (!ubi->free.rb_node) { 1858 if (ubi->works_count == 0) { 1859 ubi_err(ubi, "no free eraseblocks"); 1860 ubi_assert(list_empty(&ubi->works)); 1861 spin_unlock(&ubi->wl_lock); 1862 return -ENOSPC; 1863 } 1864 1865 err = produce_free_peb(ubi); 1866 if (err < 0) { 1867 spin_unlock(&ubi->wl_lock); 1868 return err; 1869 } 1870 spin_unlock(&ubi->wl_lock); 1871 up_read(&ubi->fm_eba_sem); 1872 goto retry; 1873 1874 } 1875 e = wl_get_wle(ubi); 1876 prot_queue_add(ubi, e); 1877 spin_unlock(&ubi->wl_lock); 1878 1879 err = ubi_self_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset, 1880 ubi->peb_size - ubi->vid_hdr_aloffset); 1881 if (err) { 1882 ubi_err(ubi, "new PEB %d does not contain all 0xFF bytes", e->pnum); 1883 return err; 1884 } 1885 1886 return e->pnum; 1887 } 1888 #else 1889 #include "fastmap-wl.c" 1890 #endif 1891