1 /* 2 * Copyright (c) International Business Machines Corp., 2006 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * 18 * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner 19 */ 20 21 /* 22 * UBI wear-leveling unit. 23 * 24 * This unit is responsible for wear-leveling. It works in terms of physical 25 * eraseblocks and erase counters and knows nothing about logical eraseblocks, 26 * volumes, etc. From this unit's perspective all physical eraseblocks are of 27 * two types - used and free. Used physical eraseblocks are those that were 28 * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are 29 * those that were put by the 'ubi_wl_put_peb()' function. 30 * 31 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter 32 * header. The rest of the physical eraseblock contains only 0xFF bytes. 33 * 34 * When physical eraseblocks are returned to the WL unit by means of the 35 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is 36 * done asynchronously in context of the per-UBI device background thread, 37 * which is also managed by the WL unit. 38 * 39 * The wear-leveling is ensured by means of moving the contents of used 40 * physical eraseblocks with low erase counter to free physical eraseblocks 41 * with high erase counter. 42 * 43 * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick 44 * an "optimal" physical eraseblock. For example, when it is known that the 45 * physical eraseblock will be "put" soon because it contains short-term data, 46 * the WL unit may pick a free physical eraseblock with low erase counter, and 47 * so forth. 48 * 49 * If the WL unit fails to erase a physical eraseblock, it marks it as bad. 50 * 51 * This unit is also responsible for scrubbing. If a bit-flip is detected in a 52 * physical eraseblock, it has to be moved. Technically this is the same as 53 * moving it for wear-leveling reasons. 54 * 55 * As it was said, for the UBI unit all physical eraseblocks are either "free" 56 * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used 57 * eraseblocks are kept in a set of different RB-trees: @wl->used, 58 * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. 59 * 60 * Note, in this implementation, we keep a small in-RAM object for each physical 61 * eraseblock. This is surely not a scalable solution. But it appears to be good 62 * enough for moderately large flashes and it is simple. In future, one may 63 * re-work this unit and make it more scalable. 64 * 65 * At the moment this unit does not utilize the sequence number, which was 66 * introduced relatively recently. But it would be wise to do this because the 67 * sequence number of a logical eraseblock characterizes how old is it. For 68 * example, when we move a PEB with low erase counter, and we need to pick the 69 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we 70 * pick target PEB with an average EC if our PEB is not very "old". This is a 71 * room for future re-works of the WL unit. 72 * 73 * FIXME: looks too complex, should be simplified (later). 74 */ 75 76 #include <linux/slab.h> 77 #include <linux/crc32.h> 78 #include <linux/freezer.h> 79 #include <linux/kthread.h> 80 #include "ubi.h" 81 82 /* Number of physical eraseblocks reserved for wear-leveling purposes */ 83 #define WL_RESERVED_PEBS 1 84 85 /* 86 * How many erase cycles are short term, unknown, and long term physical 87 * eraseblocks protected. 88 */ 89 #define ST_PROTECTION 16 90 #define U_PROTECTION 10 91 #define LT_PROTECTION 4 92 93 /* 94 * Maximum difference between two erase counters. If this threshold is 95 * exceeded, the WL unit starts moving data from used physical eraseblocks with 96 * low erase counter to free physical eraseblocks with high erase counter. 97 */ 98 #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD 99 100 /* 101 * When a physical eraseblock is moved, the WL unit has to pick the target 102 * physical eraseblock to move to. The simplest way would be just to pick the 103 * one with the highest erase counter. But in certain workloads this could lead 104 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a 105 * situation when the picked physical eraseblock is constantly erased after the 106 * data is written to it. So, we have a constant which limits the highest erase 107 * counter of the free physical eraseblock to pick. Namely, the WL unit does 108 * not pick eraseblocks with erase counter greater then the lowest erase 109 * counter plus %WL_FREE_MAX_DIFF. 110 */ 111 #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) 112 113 /* 114 * Maximum number of consecutive background thread failures which is enough to 115 * switch to read-only mode. 116 */ 117 #define WL_MAX_FAILURES 32 118 119 /** 120 * struct ubi_wl_prot_entry - PEB protection entry. 121 * @rb_pnum: link in the @wl->prot.pnum RB-tree 122 * @rb_aec: link in the @wl->prot.aec RB-tree 123 * @abs_ec: the absolute erase counter value when the protection ends 124 * @e: the wear-leveling entry of the physical eraseblock under protection 125 * 126 * When the WL unit returns a physical eraseblock, the physical eraseblock is 127 * protected from being moved for some "time". For this reason, the physical 128 * eraseblock is not directly moved from the @wl->free tree to the @wl->used 129 * tree. There is one more tree in between where this physical eraseblock is 130 * temporarily stored (@wl->prot). 131 * 132 * All this protection stuff is needed because: 133 * o we don't want to move physical eraseblocks just after we have given them 134 * to the user; instead, we first want to let users fill them up with data; 135 * 136 * o there is a chance that the user will put the physical eraseblock very 137 * soon, so it makes sense not to move it for some time, but wait; this is 138 * especially important in case of "short term" physical eraseblocks. 139 * 140 * Physical eraseblocks stay protected only for limited time. But the "time" is 141 * measured in erase cycles in this case. This is implemented with help of the 142 * absolute erase counter (@wl->abs_ec). When it reaches certain value, the 143 * physical eraseblocks are moved from the protection trees (@wl->prot.*) to 144 * the @wl->used tree. 145 * 146 * Protected physical eraseblocks are searched by physical eraseblock number 147 * (when they are put) and by the absolute erase counter (to check if it is 148 * time to move them to the @wl->used tree). So there are actually 2 RB-trees 149 * storing the protected physical eraseblocks: @wl->prot.pnum and 150 * @wl->prot.aec. They are referred to as the "protection" trees. The 151 * first one is indexed by the physical eraseblock number. The second one is 152 * indexed by the absolute erase counter. Both trees store 153 * &struct ubi_wl_prot_entry objects. 154 * 155 * Each physical eraseblock has 2 main states: free and used. The former state 156 * corresponds to the @wl->free tree. The latter state is split up on several 157 * sub-states: 158 * o the WL movement is allowed (@wl->used tree); 159 * o the WL movement is temporarily prohibited (@wl->prot.pnum and 160 * @wl->prot.aec trees); 161 * o scrubbing is needed (@wl->scrub tree). 162 * 163 * Depending on the sub-state, wear-leveling entries of the used physical 164 * eraseblocks may be kept in one of those trees. 165 */ 166 struct ubi_wl_prot_entry { 167 struct rb_node rb_pnum; 168 struct rb_node rb_aec; 169 unsigned long long abs_ec; 170 struct ubi_wl_entry *e; 171 }; 172 173 /** 174 * struct ubi_work - UBI work description data structure. 175 * @list: a link in the list of pending works 176 * @func: worker function 177 * @priv: private data of the worker function 178 * 179 * @e: physical eraseblock to erase 180 * @torture: if the physical eraseblock has to be tortured 181 * 182 * The @func pointer points to the worker function. If the @cancel argument is 183 * not zero, the worker has to free the resources and exit immediately. The 184 * worker has to return zero in case of success and a negative error code in 185 * case of failure. 186 */ 187 struct ubi_work { 188 struct list_head list; 189 int (*func)(struct ubi_device *ubi, struct ubi_work *wrk, int cancel); 190 /* The below fields are only relevant to erasure works */ 191 struct ubi_wl_entry *e; 192 int torture; 193 }; 194 195 #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID 196 static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); 197 static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, 198 struct rb_root *root); 199 #else 200 #define paranoid_check_ec(ubi, pnum, ec) 0 201 #define paranoid_check_in_wl_tree(e, root) 202 #endif 203 204 /** 205 * wl_tree_add - add a wear-leveling entry to a WL RB-tree. 206 * @e: the wear-leveling entry to add 207 * @root: the root of the tree 208 * 209 * Note, we use (erase counter, physical eraseblock number) pairs as keys in 210 * the @ubi->used and @ubi->free RB-trees. 211 */ 212 static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) 213 { 214 struct rb_node **p, *parent = NULL; 215 216 p = &root->rb_node; 217 while (*p) { 218 struct ubi_wl_entry *e1; 219 220 parent = *p; 221 e1 = rb_entry(parent, struct ubi_wl_entry, rb); 222 223 if (e->ec < e1->ec) 224 p = &(*p)->rb_left; 225 else if (e->ec > e1->ec) 226 p = &(*p)->rb_right; 227 else { 228 ubi_assert(e->pnum != e1->pnum); 229 if (e->pnum < e1->pnum) 230 p = &(*p)->rb_left; 231 else 232 p = &(*p)->rb_right; 233 } 234 } 235 236 rb_link_node(&e->rb, parent, p); 237 rb_insert_color(&e->rb, root); 238 } 239 240 /** 241 * do_work - do one pending work. 242 * @ubi: UBI device description object 243 * 244 * This function returns zero in case of success and a negative error code in 245 * case of failure. 246 */ 247 static int do_work(struct ubi_device *ubi) 248 { 249 int err; 250 struct ubi_work *wrk; 251 252 cond_resched(); 253 254 /* 255 * @ubi->work_sem is used to synchronize with the workers. Workers take 256 * it in read mode, so many of them may be doing works at a time. But 257 * the queue flush code has to be sure the whole queue of works is 258 * done, and it takes the mutex in write mode. 259 */ 260 down_read(&ubi->work_sem); 261 spin_lock(&ubi->wl_lock); 262 if (list_empty(&ubi->works)) { 263 spin_unlock(&ubi->wl_lock); 264 up_read(&ubi->work_sem); 265 return 0; 266 } 267 268 wrk = list_entry(ubi->works.next, struct ubi_work, list); 269 list_del(&wrk->list); 270 ubi->works_count -= 1; 271 ubi_assert(ubi->works_count >= 0); 272 spin_unlock(&ubi->wl_lock); 273 274 /* 275 * Call the worker function. Do not touch the work structure 276 * after this call as it will have been freed or reused by that 277 * time by the worker function. 278 */ 279 err = wrk->func(ubi, wrk, 0); 280 if (err) 281 ubi_err("work failed with error code %d", err); 282 up_read(&ubi->work_sem); 283 284 return err; 285 } 286 287 /** 288 * produce_free_peb - produce a free physical eraseblock. 289 * @ubi: UBI device description object 290 * 291 * This function tries to make a free PEB by means of synchronous execution of 292 * pending works. This may be needed if, for example the background thread is 293 * disabled. Returns zero in case of success and a negative error code in case 294 * of failure. 295 */ 296 static int produce_free_peb(struct ubi_device *ubi) 297 { 298 int err; 299 300 spin_lock(&ubi->wl_lock); 301 while (!ubi->free.rb_node) { 302 spin_unlock(&ubi->wl_lock); 303 304 dbg_wl("do one work synchronously"); 305 err = do_work(ubi); 306 if (err) 307 return err; 308 309 spin_lock(&ubi->wl_lock); 310 } 311 spin_unlock(&ubi->wl_lock); 312 313 return 0; 314 } 315 316 /** 317 * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree. 318 * @e: the wear-leveling entry to check 319 * @root: the root of the tree 320 * 321 * This function returns non-zero if @e is in the @root RB-tree and zero if it 322 * is not. 323 */ 324 static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) 325 { 326 struct rb_node *p; 327 328 p = root->rb_node; 329 while (p) { 330 struct ubi_wl_entry *e1; 331 332 e1 = rb_entry(p, struct ubi_wl_entry, rb); 333 334 if (e->pnum == e1->pnum) { 335 ubi_assert(e == e1); 336 return 1; 337 } 338 339 if (e->ec < e1->ec) 340 p = p->rb_left; 341 else if (e->ec > e1->ec) 342 p = p->rb_right; 343 else { 344 ubi_assert(e->pnum != e1->pnum); 345 if (e->pnum < e1->pnum) 346 p = p->rb_left; 347 else 348 p = p->rb_right; 349 } 350 } 351 352 return 0; 353 } 354 355 /** 356 * prot_tree_add - add physical eraseblock to protection trees. 357 * @ubi: UBI device description object 358 * @e: the physical eraseblock to add 359 * @pe: protection entry object to use 360 * @abs_ec: absolute erase counter value when this physical eraseblock has 361 * to be removed from the protection trees. 362 * 363 * @wl->lock has to be locked. 364 */ 365 static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e, 366 struct ubi_wl_prot_entry *pe, int abs_ec) 367 { 368 struct rb_node **p, *parent = NULL; 369 struct ubi_wl_prot_entry *pe1; 370 371 pe->e = e; 372 pe->abs_ec = ubi->abs_ec + abs_ec; 373 374 p = &ubi->prot.pnum.rb_node; 375 while (*p) { 376 parent = *p; 377 pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum); 378 379 if (e->pnum < pe1->e->pnum) 380 p = &(*p)->rb_left; 381 else 382 p = &(*p)->rb_right; 383 } 384 rb_link_node(&pe->rb_pnum, parent, p); 385 rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum); 386 387 p = &ubi->prot.aec.rb_node; 388 parent = NULL; 389 while (*p) { 390 parent = *p; 391 pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec); 392 393 if (pe->abs_ec < pe1->abs_ec) 394 p = &(*p)->rb_left; 395 else 396 p = &(*p)->rb_right; 397 } 398 rb_link_node(&pe->rb_aec, parent, p); 399 rb_insert_color(&pe->rb_aec, &ubi->prot.aec); 400 } 401 402 /** 403 * find_wl_entry - find wear-leveling entry closest to certain erase counter. 404 * @root: the RB-tree where to look for 405 * @max: highest possible erase counter 406 * 407 * This function looks for a wear leveling entry with erase counter closest to 408 * @max and less then @max. 409 */ 410 static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) 411 { 412 struct rb_node *p; 413 struct ubi_wl_entry *e; 414 415 e = rb_entry(rb_first(root), struct ubi_wl_entry, rb); 416 max += e->ec; 417 418 p = root->rb_node; 419 while (p) { 420 struct ubi_wl_entry *e1; 421 422 e1 = rb_entry(p, struct ubi_wl_entry, rb); 423 if (e1->ec >= max) 424 p = p->rb_left; 425 else { 426 p = p->rb_right; 427 e = e1; 428 } 429 } 430 431 return e; 432 } 433 434 /** 435 * ubi_wl_get_peb - get a physical eraseblock. 436 * @ubi: UBI device description object 437 * @dtype: type of data which will be stored in this physical eraseblock 438 * 439 * This function returns a physical eraseblock in case of success and a 440 * negative error code in case of failure. Might sleep. 441 */ 442 int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) 443 { 444 int err, protect, medium_ec; 445 struct ubi_wl_entry *e, *first, *last; 446 struct ubi_wl_prot_entry *pe; 447 448 ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || 449 dtype == UBI_UNKNOWN); 450 451 pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); 452 if (!pe) 453 return -ENOMEM; 454 455 retry: 456 spin_lock(&ubi->wl_lock); 457 if (!ubi->free.rb_node) { 458 if (ubi->works_count == 0) { 459 ubi_assert(list_empty(&ubi->works)); 460 ubi_err("no free eraseblocks"); 461 spin_unlock(&ubi->wl_lock); 462 kfree(pe); 463 return -ENOSPC; 464 } 465 spin_unlock(&ubi->wl_lock); 466 467 err = produce_free_peb(ubi); 468 if (err < 0) { 469 kfree(pe); 470 return err; 471 } 472 goto retry; 473 } 474 475 switch (dtype) { 476 case UBI_LONGTERM: 477 /* 478 * For long term data we pick a physical eraseblock 479 * with high erase counter. But the highest erase 480 * counter we can pick is bounded by the the lowest 481 * erase counter plus %WL_FREE_MAX_DIFF. 482 */ 483 e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); 484 protect = LT_PROTECTION; 485 break; 486 case UBI_UNKNOWN: 487 /* 488 * For unknown data we pick a physical eraseblock with 489 * medium erase counter. But we by no means can pick a 490 * physical eraseblock with erase counter greater or 491 * equivalent than the lowest erase counter plus 492 * %WL_FREE_MAX_DIFF. 493 */ 494 first = rb_entry(rb_first(&ubi->free), 495 struct ubi_wl_entry, rb); 496 last = rb_entry(rb_last(&ubi->free), 497 struct ubi_wl_entry, rb); 498 499 if (last->ec - first->ec < WL_FREE_MAX_DIFF) 500 e = rb_entry(ubi->free.rb_node, 501 struct ubi_wl_entry, rb); 502 else { 503 medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; 504 e = find_wl_entry(&ubi->free, medium_ec); 505 } 506 protect = U_PROTECTION; 507 break; 508 case UBI_SHORTTERM: 509 /* 510 * For short term data we pick a physical eraseblock 511 * with the lowest erase counter as we expect it will 512 * be erased soon. 513 */ 514 e = rb_entry(rb_first(&ubi->free), 515 struct ubi_wl_entry, rb); 516 protect = ST_PROTECTION; 517 break; 518 default: 519 protect = 0; 520 e = NULL; 521 BUG(); 522 } 523 524 /* 525 * Move the physical eraseblock to the protection trees where it will 526 * be protected from being moved for some time. 527 */ 528 paranoid_check_in_wl_tree(e, &ubi->free); 529 rb_erase(&e->rb, &ubi->free); 530 prot_tree_add(ubi, e, pe, protect); 531 532 dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect); 533 spin_unlock(&ubi->wl_lock); 534 535 return e->pnum; 536 } 537 538 /** 539 * prot_tree_del - remove a physical eraseblock from the protection trees 540 * @ubi: UBI device description object 541 * @pnum: the physical eraseblock to remove 542 * 543 * This function returns PEB @pnum from the protection trees and returns zero 544 * in case of success and %-ENODEV if the PEB was not found in the protection 545 * trees. 546 */ 547 static int prot_tree_del(struct ubi_device *ubi, int pnum) 548 { 549 struct rb_node *p; 550 struct ubi_wl_prot_entry *pe = NULL; 551 552 p = ubi->prot.pnum.rb_node; 553 while (p) { 554 555 pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); 556 557 if (pnum == pe->e->pnum) 558 goto found; 559 560 if (pnum < pe->e->pnum) 561 p = p->rb_left; 562 else 563 p = p->rb_right; 564 } 565 566 return -ENODEV; 567 568 found: 569 ubi_assert(pe->e->pnum == pnum); 570 rb_erase(&pe->rb_aec, &ubi->prot.aec); 571 rb_erase(&pe->rb_pnum, &ubi->prot.pnum); 572 kfree(pe); 573 return 0; 574 } 575 576 /** 577 * sync_erase - synchronously erase a physical eraseblock. 578 * @ubi: UBI device description object 579 * @e: the the physical eraseblock to erase 580 * @torture: if the physical eraseblock has to be tortured 581 * 582 * This function returns zero in case of success and a negative error code in 583 * case of failure. 584 */ 585 static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) 586 { 587 int err; 588 struct ubi_ec_hdr *ec_hdr; 589 unsigned long long ec = e->ec; 590 591 dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec); 592 593 err = paranoid_check_ec(ubi, e->pnum, e->ec); 594 if (err > 0) 595 return -EINVAL; 596 597 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); 598 if (!ec_hdr) 599 return -ENOMEM; 600 601 err = ubi_io_sync_erase(ubi, e->pnum, torture); 602 if (err < 0) 603 goto out_free; 604 605 ec += err; 606 if (ec > UBI_MAX_ERASECOUNTER) { 607 /* 608 * Erase counter overflow. Upgrade UBI and use 64-bit 609 * erase counters internally. 610 */ 611 ubi_err("erase counter overflow at PEB %d, EC %llu", 612 e->pnum, ec); 613 err = -EINVAL; 614 goto out_free; 615 } 616 617 dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec); 618 619 ec_hdr->ec = cpu_to_be64(ec); 620 621 err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); 622 if (err) 623 goto out_free; 624 625 e->ec = ec; 626 spin_lock(&ubi->wl_lock); 627 if (e->ec > ubi->max_ec) 628 ubi->max_ec = e->ec; 629 spin_unlock(&ubi->wl_lock); 630 631 out_free: 632 kfree(ec_hdr); 633 return err; 634 } 635 636 /** 637 * check_protection_over - check if it is time to stop protecting some 638 * physical eraseblocks. 639 * @ubi: UBI device description object 640 * 641 * This function is called after each erase operation, when the absolute erase 642 * counter is incremented, to check if some physical eraseblock have not to be 643 * protected any longer. These physical eraseblocks are moved from the 644 * protection trees to the used tree. 645 */ 646 static void check_protection_over(struct ubi_device *ubi) 647 { 648 struct ubi_wl_prot_entry *pe; 649 650 /* 651 * There may be several protected physical eraseblock to remove, 652 * process them all. 653 */ 654 while (1) { 655 spin_lock(&ubi->wl_lock); 656 if (!ubi->prot.aec.rb_node) { 657 spin_unlock(&ubi->wl_lock); 658 break; 659 } 660 661 pe = rb_entry(rb_first(&ubi->prot.aec), 662 struct ubi_wl_prot_entry, rb_aec); 663 664 if (pe->abs_ec > ubi->abs_ec) { 665 spin_unlock(&ubi->wl_lock); 666 break; 667 } 668 669 dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu", 670 pe->e->pnum, ubi->abs_ec, pe->abs_ec); 671 rb_erase(&pe->rb_aec, &ubi->prot.aec); 672 rb_erase(&pe->rb_pnum, &ubi->prot.pnum); 673 wl_tree_add(pe->e, &ubi->used); 674 spin_unlock(&ubi->wl_lock); 675 676 kfree(pe); 677 cond_resched(); 678 } 679 } 680 681 /** 682 * schedule_ubi_work - schedule a work. 683 * @ubi: UBI device description object 684 * @wrk: the work to schedule 685 * 686 * This function enqueues a work defined by @wrk to the tail of the pending 687 * works list. 688 */ 689 static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) 690 { 691 spin_lock(&ubi->wl_lock); 692 list_add_tail(&wrk->list, &ubi->works); 693 ubi_assert(ubi->works_count >= 0); 694 ubi->works_count += 1; 695 if (ubi->thread_enabled) 696 wake_up_process(ubi->bgt_thread); 697 spin_unlock(&ubi->wl_lock); 698 } 699 700 static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, 701 int cancel); 702 703 /** 704 * schedule_erase - schedule an erase work. 705 * @ubi: UBI device description object 706 * @e: the WL entry of the physical eraseblock to erase 707 * @torture: if the physical eraseblock has to be tortured 708 * 709 * This function returns zero in case of success and a %-ENOMEM in case of 710 * failure. 711 */ 712 static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, 713 int torture) 714 { 715 struct ubi_work *wl_wrk; 716 717 dbg_wl("schedule erasure of PEB %d, EC %d, torture %d", 718 e->pnum, e->ec, torture); 719 720 wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); 721 if (!wl_wrk) 722 return -ENOMEM; 723 724 wl_wrk->func = &erase_worker; 725 wl_wrk->e = e; 726 wl_wrk->torture = torture; 727 728 schedule_ubi_work(ubi, wl_wrk); 729 return 0; 730 } 731 732 /** 733 * wear_leveling_worker - wear-leveling worker function. 734 * @ubi: UBI device description object 735 * @wrk: the work object 736 * @cancel: non-zero if the worker has to free memory and exit 737 * 738 * This function copies a more worn out physical eraseblock to a less worn out 739 * one. Returns zero in case of success and a negative error code in case of 740 * failure. 741 */ 742 static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, 743 int cancel) 744 { 745 int err, put = 0, scrubbing = 0, protect = 0; 746 struct ubi_wl_prot_entry *uninitialized_var(pe); 747 struct ubi_wl_entry *e1, *e2; 748 struct ubi_vid_hdr *vid_hdr; 749 750 kfree(wrk); 751 752 if (cancel) 753 return 0; 754 755 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); 756 if (!vid_hdr) 757 return -ENOMEM; 758 759 mutex_lock(&ubi->move_mutex); 760 spin_lock(&ubi->wl_lock); 761 ubi_assert(!ubi->move_from && !ubi->move_to); 762 ubi_assert(!ubi->move_to_put); 763 764 if (!ubi->free.rb_node || 765 (!ubi->used.rb_node && !ubi->scrub.rb_node)) { 766 /* 767 * No free physical eraseblocks? Well, they must be waiting in 768 * the queue to be erased. Cancel movement - it will be 769 * triggered again when a free physical eraseblock appears. 770 * 771 * No used physical eraseblocks? They must be temporarily 772 * protected from being moved. They will be moved to the 773 * @ubi->used tree later and the wear-leveling will be 774 * triggered again. 775 */ 776 dbg_wl("cancel WL, a list is empty: free %d, used %d", 777 !ubi->free.rb_node, !ubi->used.rb_node); 778 goto out_cancel; 779 } 780 781 if (!ubi->scrub.rb_node) { 782 /* 783 * Now pick the least worn-out used physical eraseblock and a 784 * highly worn-out free physical eraseblock. If the erase 785 * counters differ much enough, start wear-leveling. 786 */ 787 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); 788 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); 789 790 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { 791 dbg_wl("no WL needed: min used EC %d, max free EC %d", 792 e1->ec, e2->ec); 793 goto out_cancel; 794 } 795 paranoid_check_in_wl_tree(e1, &ubi->used); 796 rb_erase(&e1->rb, &ubi->used); 797 dbg_wl("move PEB %d EC %d to PEB %d EC %d", 798 e1->pnum, e1->ec, e2->pnum, e2->ec); 799 } else { 800 /* Perform scrubbing */ 801 scrubbing = 1; 802 e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); 803 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); 804 paranoid_check_in_wl_tree(e1, &ubi->scrub); 805 rb_erase(&e1->rb, &ubi->scrub); 806 dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); 807 } 808 809 paranoid_check_in_wl_tree(e2, &ubi->free); 810 rb_erase(&e2->rb, &ubi->free); 811 ubi->move_from = e1; 812 ubi->move_to = e2; 813 spin_unlock(&ubi->wl_lock); 814 815 /* 816 * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum. 817 * We so far do not know which logical eraseblock our physical 818 * eraseblock (@e1) belongs to. We have to read the volume identifier 819 * header first. 820 * 821 * Note, we are protected from this PEB being unmapped and erased. The 822 * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB 823 * which is being moved was unmapped. 824 */ 825 826 err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); 827 if (err && err != UBI_IO_BITFLIPS) { 828 if (err == UBI_IO_PEB_FREE) { 829 /* 830 * We are trying to move PEB without a VID header. UBI 831 * always write VID headers shortly after the PEB was 832 * given, so we have a situation when it did not have 833 * chance to write it down because it was preempted. 834 * Just re-schedule the work, so that next time it will 835 * likely have the VID header in place. 836 */ 837 dbg_wl("PEB %d has no VID header", e1->pnum); 838 goto out_not_moved; 839 } 840 841 ubi_err("error %d while reading VID header from PEB %d", 842 err, e1->pnum); 843 if (err > 0) 844 err = -EIO; 845 goto out_error; 846 } 847 848 err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); 849 if (err) { 850 851 if (err < 0) 852 goto out_error; 853 if (err == 1) 854 goto out_not_moved; 855 856 /* 857 * For some reason the LEB was not moved - it might be because 858 * the volume is being deleted. We should prevent this PEB from 859 * being selected for wear-levelling movement for some "time", 860 * so put it to the protection tree. 861 */ 862 863 dbg_wl("cancelled moving PEB %d", e1->pnum); 864 pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); 865 if (!pe) { 866 err = -ENOMEM; 867 goto out_error; 868 } 869 870 protect = 1; 871 } 872 873 ubi_free_vid_hdr(ubi, vid_hdr); 874 spin_lock(&ubi->wl_lock); 875 if (protect) 876 prot_tree_add(ubi, e1, pe, protect); 877 if (!ubi->move_to_put) 878 wl_tree_add(e2, &ubi->used); 879 else 880 put = 1; 881 ubi->move_from = ubi->move_to = NULL; 882 ubi->move_to_put = ubi->wl_scheduled = 0; 883 spin_unlock(&ubi->wl_lock); 884 885 if (put) { 886 /* 887 * Well, the target PEB was put meanwhile, schedule it for 888 * erasure. 889 */ 890 dbg_wl("PEB %d was put meanwhile, erase", e2->pnum); 891 err = schedule_erase(ubi, e2, 0); 892 if (err) 893 goto out_error; 894 } 895 896 if (!protect) { 897 err = schedule_erase(ubi, e1, 0); 898 if (err) 899 goto out_error; 900 } 901 902 903 dbg_wl("done"); 904 mutex_unlock(&ubi->move_mutex); 905 return 0; 906 907 /* 908 * For some reasons the LEB was not moved, might be an error, might be 909 * something else. @e1 was not changed, so return it back. @e2 might 910 * be changed, schedule it for erasure. 911 */ 912 out_not_moved: 913 ubi_free_vid_hdr(ubi, vid_hdr); 914 spin_lock(&ubi->wl_lock); 915 if (scrubbing) 916 wl_tree_add(e1, &ubi->scrub); 917 else 918 wl_tree_add(e1, &ubi->used); 919 ubi->move_from = ubi->move_to = NULL; 920 ubi->move_to_put = ubi->wl_scheduled = 0; 921 spin_unlock(&ubi->wl_lock); 922 923 err = schedule_erase(ubi, e2, 0); 924 if (err) 925 goto out_error; 926 927 mutex_unlock(&ubi->move_mutex); 928 return 0; 929 930 out_error: 931 ubi_err("error %d while moving PEB %d to PEB %d", 932 err, e1->pnum, e2->pnum); 933 934 ubi_free_vid_hdr(ubi, vid_hdr); 935 spin_lock(&ubi->wl_lock); 936 ubi->move_from = ubi->move_to = NULL; 937 ubi->move_to_put = ubi->wl_scheduled = 0; 938 spin_unlock(&ubi->wl_lock); 939 940 kmem_cache_free(ubi_wl_entry_slab, e1); 941 kmem_cache_free(ubi_wl_entry_slab, e2); 942 ubi_ro_mode(ubi); 943 944 mutex_unlock(&ubi->move_mutex); 945 return err; 946 947 out_cancel: 948 ubi->wl_scheduled = 0; 949 spin_unlock(&ubi->wl_lock); 950 mutex_unlock(&ubi->move_mutex); 951 ubi_free_vid_hdr(ubi, vid_hdr); 952 return 0; 953 } 954 955 /** 956 * ensure_wear_leveling - schedule wear-leveling if it is needed. 957 * @ubi: UBI device description object 958 * 959 * This function checks if it is time to start wear-leveling and schedules it 960 * if yes. This function returns zero in case of success and a negative error 961 * code in case of failure. 962 */ 963 static int ensure_wear_leveling(struct ubi_device *ubi) 964 { 965 int err = 0; 966 struct ubi_wl_entry *e1; 967 struct ubi_wl_entry *e2; 968 struct ubi_work *wrk; 969 970 spin_lock(&ubi->wl_lock); 971 if (ubi->wl_scheduled) 972 /* Wear-leveling is already in the work queue */ 973 goto out_unlock; 974 975 /* 976 * If the ubi->scrub tree is not empty, scrubbing is needed, and the 977 * the WL worker has to be scheduled anyway. 978 */ 979 if (!ubi->scrub.rb_node) { 980 if (!ubi->used.rb_node || !ubi->free.rb_node) 981 /* No physical eraseblocks - no deal */ 982 goto out_unlock; 983 984 /* 985 * We schedule wear-leveling only if the difference between the 986 * lowest erase counter of used physical eraseblocks and a high 987 * erase counter of free physical eraseblocks is greater then 988 * %UBI_WL_THRESHOLD. 989 */ 990 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); 991 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); 992 993 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) 994 goto out_unlock; 995 dbg_wl("schedule wear-leveling"); 996 } else 997 dbg_wl("schedule scrubbing"); 998 999 ubi->wl_scheduled = 1; 1000 spin_unlock(&ubi->wl_lock); 1001 1002 wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); 1003 if (!wrk) { 1004 err = -ENOMEM; 1005 goto out_cancel; 1006 } 1007 1008 wrk->func = &wear_leveling_worker; 1009 schedule_ubi_work(ubi, wrk); 1010 return err; 1011 1012 out_cancel: 1013 spin_lock(&ubi->wl_lock); 1014 ubi->wl_scheduled = 0; 1015 out_unlock: 1016 spin_unlock(&ubi->wl_lock); 1017 return err; 1018 } 1019 1020 /** 1021 * erase_worker - physical eraseblock erase worker function. 1022 * @ubi: UBI device description object 1023 * @wl_wrk: the work object 1024 * @cancel: non-zero if the worker has to free memory and exit 1025 * 1026 * This function erases a physical eraseblock and perform torture testing if 1027 * needed. It also takes care about marking the physical eraseblock bad if 1028 * needed. Returns zero in case of success and a negative error code in case of 1029 * failure. 1030 */ 1031 static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, 1032 int cancel) 1033 { 1034 struct ubi_wl_entry *e = wl_wrk->e; 1035 int pnum = e->pnum, err, need; 1036 1037 if (cancel) { 1038 dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec); 1039 kfree(wl_wrk); 1040 kmem_cache_free(ubi_wl_entry_slab, e); 1041 return 0; 1042 } 1043 1044 dbg_wl("erase PEB %d EC %d", pnum, e->ec); 1045 1046 err = sync_erase(ubi, e, wl_wrk->torture); 1047 if (!err) { 1048 /* Fine, we've erased it successfully */ 1049 kfree(wl_wrk); 1050 1051 spin_lock(&ubi->wl_lock); 1052 ubi->abs_ec += 1; 1053 wl_tree_add(e, &ubi->free); 1054 spin_unlock(&ubi->wl_lock); 1055 1056 /* 1057 * One more erase operation has happened, take care about protected 1058 * physical eraseblocks. 1059 */ 1060 check_protection_over(ubi); 1061 1062 /* And take care about wear-leveling */ 1063 err = ensure_wear_leveling(ubi); 1064 return err; 1065 } 1066 1067 ubi_err("failed to erase PEB %d, error %d", pnum, err); 1068 kfree(wl_wrk); 1069 kmem_cache_free(ubi_wl_entry_slab, e); 1070 1071 if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || 1072 err == -EBUSY) { 1073 int err1; 1074 1075 /* Re-schedule the LEB for erasure */ 1076 err1 = schedule_erase(ubi, e, 0); 1077 if (err1) { 1078 err = err1; 1079 goto out_ro; 1080 } 1081 return err; 1082 } else if (err != -EIO) { 1083 /* 1084 * If this is not %-EIO, we have no idea what to do. Scheduling 1085 * this physical eraseblock for erasure again would cause 1086 * errors again and again. Well, lets switch to RO mode. 1087 */ 1088 goto out_ro; 1089 } 1090 1091 /* It is %-EIO, the PEB went bad */ 1092 1093 if (!ubi->bad_allowed) { 1094 ubi_err("bad physical eraseblock %d detected", pnum); 1095 goto out_ro; 1096 } 1097 1098 spin_lock(&ubi->volumes_lock); 1099 need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1; 1100 if (need > 0) { 1101 need = ubi->avail_pebs >= need ? need : ubi->avail_pebs; 1102 ubi->avail_pebs -= need; 1103 ubi->rsvd_pebs += need; 1104 ubi->beb_rsvd_pebs += need; 1105 if (need > 0) 1106 ubi_msg("reserve more %d PEBs", need); 1107 } 1108 1109 if (ubi->beb_rsvd_pebs == 0) { 1110 spin_unlock(&ubi->volumes_lock); 1111 ubi_err("no reserved physical eraseblocks"); 1112 goto out_ro; 1113 } 1114 1115 spin_unlock(&ubi->volumes_lock); 1116 ubi_msg("mark PEB %d as bad", pnum); 1117 1118 err = ubi_io_mark_bad(ubi, pnum); 1119 if (err) 1120 goto out_ro; 1121 1122 spin_lock(&ubi->volumes_lock); 1123 ubi->beb_rsvd_pebs -= 1; 1124 ubi->bad_peb_count += 1; 1125 ubi->good_peb_count -= 1; 1126 ubi_calculate_reserved(ubi); 1127 if (ubi->beb_rsvd_pebs == 0) 1128 ubi_warn("last PEB from the reserved pool was used"); 1129 spin_unlock(&ubi->volumes_lock); 1130 1131 return err; 1132 1133 out_ro: 1134 ubi_ro_mode(ubi); 1135 return err; 1136 } 1137 1138 /** 1139 * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit. 1140 * @ubi: UBI device description object 1141 * @pnum: physical eraseblock to return 1142 * @torture: if this physical eraseblock has to be tortured 1143 * 1144 * This function is called to return physical eraseblock @pnum to the pool of 1145 * free physical eraseblocks. The @torture flag has to be set if an I/O error 1146 * occurred to this @pnum and it has to be tested. This function returns zero 1147 * in case of success, and a negative error code in case of failure. 1148 */ 1149 int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) 1150 { 1151 int err; 1152 struct ubi_wl_entry *e; 1153 1154 dbg_wl("PEB %d", pnum); 1155 ubi_assert(pnum >= 0); 1156 ubi_assert(pnum < ubi->peb_count); 1157 1158 retry: 1159 spin_lock(&ubi->wl_lock); 1160 e = ubi->lookuptbl[pnum]; 1161 if (e == ubi->move_from) { 1162 /* 1163 * User is putting the physical eraseblock which was selected to 1164 * be moved. It will be scheduled for erasure in the 1165 * wear-leveling worker. 1166 */ 1167 dbg_wl("PEB %d is being moved, wait", pnum); 1168 spin_unlock(&ubi->wl_lock); 1169 1170 /* Wait for the WL worker by taking the @ubi->move_mutex */ 1171 mutex_lock(&ubi->move_mutex); 1172 mutex_unlock(&ubi->move_mutex); 1173 goto retry; 1174 } else if (e == ubi->move_to) { 1175 /* 1176 * User is putting the physical eraseblock which was selected 1177 * as the target the data is moved to. It may happen if the EBA 1178 * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but 1179 * the WL unit has not put the PEB to the "used" tree yet, but 1180 * it is about to do this. So we just set a flag which will 1181 * tell the WL worker that the PEB is not needed anymore and 1182 * should be scheduled for erasure. 1183 */ 1184 dbg_wl("PEB %d is the target of data moving", pnum); 1185 ubi_assert(!ubi->move_to_put); 1186 ubi->move_to_put = 1; 1187 spin_unlock(&ubi->wl_lock); 1188 return 0; 1189 } else { 1190 if (in_wl_tree(e, &ubi->used)) { 1191 paranoid_check_in_wl_tree(e, &ubi->used); 1192 rb_erase(&e->rb, &ubi->used); 1193 } else if (in_wl_tree(e, &ubi->scrub)) { 1194 paranoid_check_in_wl_tree(e, &ubi->scrub); 1195 rb_erase(&e->rb, &ubi->scrub); 1196 } else { 1197 err = prot_tree_del(ubi, e->pnum); 1198 if (err) { 1199 ubi_err("PEB %d not found", pnum); 1200 ubi_ro_mode(ubi); 1201 spin_unlock(&ubi->wl_lock); 1202 return err; 1203 } 1204 } 1205 } 1206 spin_unlock(&ubi->wl_lock); 1207 1208 err = schedule_erase(ubi, e, torture); 1209 if (err) { 1210 spin_lock(&ubi->wl_lock); 1211 wl_tree_add(e, &ubi->used); 1212 spin_unlock(&ubi->wl_lock); 1213 } 1214 1215 return err; 1216 } 1217 1218 /** 1219 * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing. 1220 * @ubi: UBI device description object 1221 * @pnum: the physical eraseblock to schedule 1222 * 1223 * If a bit-flip in a physical eraseblock is detected, this physical eraseblock 1224 * needs scrubbing. This function schedules a physical eraseblock for 1225 * scrubbing which is done in background. This function returns zero in case of 1226 * success and a negative error code in case of failure. 1227 */ 1228 int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) 1229 { 1230 struct ubi_wl_entry *e; 1231 1232 ubi_msg("schedule PEB %d for scrubbing", pnum); 1233 1234 retry: 1235 spin_lock(&ubi->wl_lock); 1236 e = ubi->lookuptbl[pnum]; 1237 if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) { 1238 spin_unlock(&ubi->wl_lock); 1239 return 0; 1240 } 1241 1242 if (e == ubi->move_to) { 1243 /* 1244 * This physical eraseblock was used to move data to. The data 1245 * was moved but the PEB was not yet inserted to the proper 1246 * tree. We should just wait a little and let the WL worker 1247 * proceed. 1248 */ 1249 spin_unlock(&ubi->wl_lock); 1250 dbg_wl("the PEB %d is not in proper tree, retry", pnum); 1251 yield(); 1252 goto retry; 1253 } 1254 1255 if (in_wl_tree(e, &ubi->used)) { 1256 paranoid_check_in_wl_tree(e, &ubi->used); 1257 rb_erase(&e->rb, &ubi->used); 1258 } else { 1259 int err; 1260 1261 err = prot_tree_del(ubi, e->pnum); 1262 if (err) { 1263 ubi_err("PEB %d not found", pnum); 1264 ubi_ro_mode(ubi); 1265 spin_unlock(&ubi->wl_lock); 1266 return err; 1267 } 1268 } 1269 1270 wl_tree_add(e, &ubi->scrub); 1271 spin_unlock(&ubi->wl_lock); 1272 1273 /* 1274 * Technically scrubbing is the same as wear-leveling, so it is done 1275 * by the WL worker. 1276 */ 1277 return ensure_wear_leveling(ubi); 1278 } 1279 1280 /** 1281 * ubi_wl_flush - flush all pending works. 1282 * @ubi: UBI device description object 1283 * 1284 * This function returns zero in case of success and a negative error code in 1285 * case of failure. 1286 */ 1287 int ubi_wl_flush(struct ubi_device *ubi) 1288 { 1289 int err; 1290 1291 /* 1292 * Erase while the pending works queue is not empty, but not more then 1293 * the number of currently pending works. 1294 */ 1295 dbg_wl("flush (%d pending works)", ubi->works_count); 1296 while (ubi->works_count) { 1297 err = do_work(ubi); 1298 if (err) 1299 return err; 1300 } 1301 1302 /* 1303 * Make sure all the works which have been done in parallel are 1304 * finished. 1305 */ 1306 down_write(&ubi->work_sem); 1307 up_write(&ubi->work_sem); 1308 1309 /* 1310 * And in case last was the WL worker and it cancelled the LEB 1311 * movement, flush again. 1312 */ 1313 while (ubi->works_count) { 1314 dbg_wl("flush more (%d pending works)", ubi->works_count); 1315 err = do_work(ubi); 1316 if (err) 1317 return err; 1318 } 1319 1320 return 0; 1321 } 1322 1323 /** 1324 * tree_destroy - destroy an RB-tree. 1325 * @root: the root of the tree to destroy 1326 */ 1327 static void tree_destroy(struct rb_root *root) 1328 { 1329 struct rb_node *rb; 1330 struct ubi_wl_entry *e; 1331 1332 rb = root->rb_node; 1333 while (rb) { 1334 if (rb->rb_left) 1335 rb = rb->rb_left; 1336 else if (rb->rb_right) 1337 rb = rb->rb_right; 1338 else { 1339 e = rb_entry(rb, struct ubi_wl_entry, rb); 1340 1341 rb = rb_parent(rb); 1342 if (rb) { 1343 if (rb->rb_left == &e->rb) 1344 rb->rb_left = NULL; 1345 else 1346 rb->rb_right = NULL; 1347 } 1348 1349 kmem_cache_free(ubi_wl_entry_slab, e); 1350 } 1351 } 1352 } 1353 1354 /** 1355 * ubi_thread - UBI background thread. 1356 * @u: the UBI device description object pointer 1357 */ 1358 int ubi_thread(void *u) 1359 { 1360 int failures = 0; 1361 struct ubi_device *ubi = u; 1362 1363 ubi_msg("background thread \"%s\" started, PID %d", 1364 ubi->bgt_name, task_pid_nr(current)); 1365 1366 set_freezable(); 1367 for (;;) { 1368 int err; 1369 1370 if (kthread_should_stop()) 1371 goto out; 1372 1373 if (try_to_freeze()) 1374 continue; 1375 1376 spin_lock(&ubi->wl_lock); 1377 if (list_empty(&ubi->works) || ubi->ro_mode || 1378 !ubi->thread_enabled) { 1379 set_current_state(TASK_INTERRUPTIBLE); 1380 spin_unlock(&ubi->wl_lock); 1381 schedule(); 1382 continue; 1383 } 1384 spin_unlock(&ubi->wl_lock); 1385 1386 err = do_work(ubi); 1387 if (err) { 1388 ubi_err("%s: work failed with error code %d", 1389 ubi->bgt_name, err); 1390 if (failures++ > WL_MAX_FAILURES) { 1391 /* 1392 * Too many failures, disable the thread and 1393 * switch to read-only mode. 1394 */ 1395 ubi_msg("%s: %d consecutive failures", 1396 ubi->bgt_name, WL_MAX_FAILURES); 1397 ubi_ro_mode(ubi); 1398 break; 1399 } 1400 } else 1401 failures = 0; 1402 1403 cond_resched(); 1404 } 1405 1406 out: 1407 dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); 1408 return 0; 1409 } 1410 1411 /** 1412 * cancel_pending - cancel all pending works. 1413 * @ubi: UBI device description object 1414 */ 1415 static void cancel_pending(struct ubi_device *ubi) 1416 { 1417 while (!list_empty(&ubi->works)) { 1418 struct ubi_work *wrk; 1419 1420 wrk = list_entry(ubi->works.next, struct ubi_work, list); 1421 list_del(&wrk->list); 1422 wrk->func(ubi, wrk, 1); 1423 ubi->works_count -= 1; 1424 ubi_assert(ubi->works_count >= 0); 1425 } 1426 } 1427 1428 /** 1429 * ubi_wl_init_scan - initialize the wear-leveling unit using scanning 1430 * information. 1431 * @ubi: UBI device description object 1432 * @si: scanning information 1433 * 1434 * This function returns zero in case of success, and a negative error code in 1435 * case of failure. 1436 */ 1437 int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) 1438 { 1439 int err; 1440 struct rb_node *rb1, *rb2; 1441 struct ubi_scan_volume *sv; 1442 struct ubi_scan_leb *seb, *tmp; 1443 struct ubi_wl_entry *e; 1444 1445 1446 ubi->used = ubi->free = ubi->scrub = RB_ROOT; 1447 ubi->prot.pnum = ubi->prot.aec = RB_ROOT; 1448 spin_lock_init(&ubi->wl_lock); 1449 mutex_init(&ubi->move_mutex); 1450 init_rwsem(&ubi->work_sem); 1451 ubi->max_ec = si->max_ec; 1452 INIT_LIST_HEAD(&ubi->works); 1453 1454 sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); 1455 1456 err = -ENOMEM; 1457 ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL); 1458 if (!ubi->lookuptbl) 1459 return err; 1460 1461 list_for_each_entry_safe(seb, tmp, &si->erase, u.list) { 1462 cond_resched(); 1463 1464 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1465 if (!e) 1466 goto out_free; 1467 1468 e->pnum = seb->pnum; 1469 e->ec = seb->ec; 1470 ubi->lookuptbl[e->pnum] = e; 1471 if (schedule_erase(ubi, e, 0)) { 1472 kmem_cache_free(ubi_wl_entry_slab, e); 1473 goto out_free; 1474 } 1475 } 1476 1477 list_for_each_entry(seb, &si->free, u.list) { 1478 cond_resched(); 1479 1480 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1481 if (!e) 1482 goto out_free; 1483 1484 e->pnum = seb->pnum; 1485 e->ec = seb->ec; 1486 ubi_assert(e->ec >= 0); 1487 wl_tree_add(e, &ubi->free); 1488 ubi->lookuptbl[e->pnum] = e; 1489 } 1490 1491 list_for_each_entry(seb, &si->corr, u.list) { 1492 cond_resched(); 1493 1494 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1495 if (!e) 1496 goto out_free; 1497 1498 e->pnum = seb->pnum; 1499 e->ec = seb->ec; 1500 ubi->lookuptbl[e->pnum] = e; 1501 if (schedule_erase(ubi, e, 0)) { 1502 kmem_cache_free(ubi_wl_entry_slab, e); 1503 goto out_free; 1504 } 1505 } 1506 1507 ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { 1508 ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { 1509 cond_resched(); 1510 1511 e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); 1512 if (!e) 1513 goto out_free; 1514 1515 e->pnum = seb->pnum; 1516 e->ec = seb->ec; 1517 ubi->lookuptbl[e->pnum] = e; 1518 if (!seb->scrub) { 1519 dbg_wl("add PEB %d EC %d to the used tree", 1520 e->pnum, e->ec); 1521 wl_tree_add(e, &ubi->used); 1522 } else { 1523 dbg_wl("add PEB %d EC %d to the scrub tree", 1524 e->pnum, e->ec); 1525 wl_tree_add(e, &ubi->scrub); 1526 } 1527 } 1528 } 1529 1530 if (ubi->avail_pebs < WL_RESERVED_PEBS) { 1531 ubi_err("no enough physical eraseblocks (%d, need %d)", 1532 ubi->avail_pebs, WL_RESERVED_PEBS); 1533 goto out_free; 1534 } 1535 ubi->avail_pebs -= WL_RESERVED_PEBS; 1536 ubi->rsvd_pebs += WL_RESERVED_PEBS; 1537 1538 /* Schedule wear-leveling if needed */ 1539 err = ensure_wear_leveling(ubi); 1540 if (err) 1541 goto out_free; 1542 1543 return 0; 1544 1545 out_free: 1546 cancel_pending(ubi); 1547 tree_destroy(&ubi->used); 1548 tree_destroy(&ubi->free); 1549 tree_destroy(&ubi->scrub); 1550 kfree(ubi->lookuptbl); 1551 return err; 1552 } 1553 1554 /** 1555 * protection_trees_destroy - destroy the protection RB-trees. 1556 * @ubi: UBI device description object 1557 */ 1558 static void protection_trees_destroy(struct ubi_device *ubi) 1559 { 1560 struct rb_node *rb; 1561 struct ubi_wl_prot_entry *pe; 1562 1563 rb = ubi->prot.aec.rb_node; 1564 while (rb) { 1565 if (rb->rb_left) 1566 rb = rb->rb_left; 1567 else if (rb->rb_right) 1568 rb = rb->rb_right; 1569 else { 1570 pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec); 1571 1572 rb = rb_parent(rb); 1573 if (rb) { 1574 if (rb->rb_left == &pe->rb_aec) 1575 rb->rb_left = NULL; 1576 else 1577 rb->rb_right = NULL; 1578 } 1579 1580 kmem_cache_free(ubi_wl_entry_slab, pe->e); 1581 kfree(pe); 1582 } 1583 } 1584 } 1585 1586 /** 1587 * ubi_wl_close - close the wear-leveling unit. 1588 * @ubi: UBI device description object 1589 */ 1590 void ubi_wl_close(struct ubi_device *ubi) 1591 { 1592 dbg_wl("close the UBI wear-leveling unit"); 1593 1594 cancel_pending(ubi); 1595 protection_trees_destroy(ubi); 1596 tree_destroy(&ubi->used); 1597 tree_destroy(&ubi->free); 1598 tree_destroy(&ubi->scrub); 1599 kfree(ubi->lookuptbl); 1600 } 1601 1602 #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID 1603 1604 /** 1605 * paranoid_check_ec - make sure that the erase counter of a physical eraseblock 1606 * is correct. 1607 * @ubi: UBI device description object 1608 * @pnum: the physical eraseblock number to check 1609 * @ec: the erase counter to check 1610 * 1611 * This function returns zero if the erase counter of physical eraseblock @pnum 1612 * is equivalent to @ec, %1 if not, and a negative error code if an error 1613 * occurred. 1614 */ 1615 static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) 1616 { 1617 int err; 1618 long long read_ec; 1619 struct ubi_ec_hdr *ec_hdr; 1620 1621 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); 1622 if (!ec_hdr) 1623 return -ENOMEM; 1624 1625 err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); 1626 if (err && err != UBI_IO_BITFLIPS) { 1627 /* The header does not have to exist */ 1628 err = 0; 1629 goto out_free; 1630 } 1631 1632 read_ec = be64_to_cpu(ec_hdr->ec); 1633 if (ec != read_ec) { 1634 ubi_err("paranoid check failed for PEB %d", pnum); 1635 ubi_err("read EC is %lld, should be %d", read_ec, ec); 1636 ubi_dbg_dump_stack(); 1637 err = 1; 1638 } else 1639 err = 0; 1640 1641 out_free: 1642 kfree(ec_hdr); 1643 return err; 1644 } 1645 1646 /** 1647 * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present 1648 * in a WL RB-tree. 1649 * @e: the wear-leveling entry to check 1650 * @root: the root of the tree 1651 * 1652 * This function returns zero if @e is in the @root RB-tree and %1 if it 1653 * is not. 1654 */ 1655 static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, 1656 struct rb_root *root) 1657 { 1658 if (in_wl_tree(e, root)) 1659 return 0; 1660 1661 ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ", 1662 e->pnum, e->ec, root); 1663 ubi_dbg_dump_stack(); 1664 return 1; 1665 } 1666 1667 #endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ 1668