1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Bad block management 4 * 5 * - Heavily based on MD badblocks code from Neil Brown 6 * 7 * Copyright (c) 2015, Intel Corporation. 8 */ 9 10 #include <linux/badblocks.h> 11 #include <linux/seqlock.h> 12 #include <linux/device.h> 13 #include <linux/kernel.h> 14 #include <linux/module.h> 15 #include <linux/stddef.h> 16 #include <linux/types.h> 17 #include <linux/slab.h> 18 19 /** 20 * badblocks_check() - check a given range for bad sectors 21 * @bb: the badblocks structure that holds all badblock information 22 * @s: sector (start) at which to check for badblocks 23 * @sectors: number of sectors to check for badblocks 24 * @first_bad: pointer to store location of the first badblock 25 * @bad_sectors: pointer to store number of badblocks after @first_bad 26 * 27 * We can record which blocks on each device are 'bad' and so just 28 * fail those blocks, or that stripe, rather than the whole device. 29 * Entries in the bad-block table are 64bits wide. This comprises: 30 * Length of bad-range, in sectors: 0-511 for lengths 1-512 31 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) 32 * A 'shift' can be set so that larger blocks are tracked and 33 * consequently larger devices can be covered. 34 * 'Acknowledged' flag - 1 bit. - the most significant bit. 35 * 36 * Locking of the bad-block table uses a seqlock so badblocks_check 37 * might need to retry if it is very unlucky. 38 * We will sometimes want to check for bad blocks in a bi_end_io function, 39 * so we use the write_seqlock_irq variant. 40 * 41 * When looking for a bad block we specify a range and want to 42 * know if any block in the range is bad. So we binary-search 43 * to the last range that starts at-or-before the given endpoint, 44 * (or "before the sector after the target range") 45 * then see if it ends after the given start. 46 * 47 * Return: 48 * 0: there are no known bad blocks in the range 49 * 1: there are known bad block which are all acknowledged 50 * -1: there are bad blocks which have not yet been acknowledged in metadata. 51 * plus the start/length of the first bad section we overlap. 52 */ 53 int badblocks_check(struct badblocks *bb, sector_t s, int sectors, 54 sector_t *first_bad, int *bad_sectors) 55 { 56 int hi; 57 int lo; 58 u64 *p = bb->page; 59 int rv; 60 sector_t target = s + sectors; 61 unsigned seq; 62 63 if (bb->shift > 0) { 64 /* round the start down, and the end up */ 65 s >>= bb->shift; 66 target += (1<<bb->shift) - 1; 67 target >>= bb->shift; 68 sectors = target - s; 69 } 70 /* 'target' is now the first block after the bad range */ 71 72 retry: 73 seq = read_seqbegin(&bb->lock); 74 lo = 0; 75 rv = 0; 76 hi = bb->count; 77 78 /* Binary search between lo and hi for 'target' 79 * i.e. for the last range that starts before 'target' 80 */ 81 /* INVARIANT: ranges before 'lo' and at-or-after 'hi' 82 * are known not to be the last range before target. 83 * VARIANT: hi-lo is the number of possible 84 * ranges, and decreases until it reaches 1 85 */ 86 while (hi - lo > 1) { 87 int mid = (lo + hi) / 2; 88 sector_t a = BB_OFFSET(p[mid]); 89 90 if (a < target) 91 /* This could still be the one, earlier ranges 92 * could not. 93 */ 94 lo = mid; 95 else 96 /* This and later ranges are definitely out. */ 97 hi = mid; 98 } 99 /* 'lo' might be the last that started before target, but 'hi' isn't */ 100 if (hi > lo) { 101 /* need to check all range that end after 's' to see if 102 * any are unacknowledged. 103 */ 104 while (lo >= 0 && 105 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { 106 if (BB_OFFSET(p[lo]) < target) { 107 /* starts before the end, and finishes after 108 * the start, so they must overlap 109 */ 110 if (rv != -1 && BB_ACK(p[lo])) 111 rv = 1; 112 else 113 rv = -1; 114 *first_bad = BB_OFFSET(p[lo]); 115 *bad_sectors = BB_LEN(p[lo]); 116 } 117 lo--; 118 } 119 } 120 121 if (read_seqretry(&bb->lock, seq)) 122 goto retry; 123 124 return rv; 125 } 126 EXPORT_SYMBOL_GPL(badblocks_check); 127 128 static void badblocks_update_acked(struct badblocks *bb) 129 { 130 u64 *p = bb->page; 131 int i; 132 bool unacked = false; 133 134 if (!bb->unacked_exist) 135 return; 136 137 for (i = 0; i < bb->count ; i++) { 138 if (!BB_ACK(p[i])) { 139 unacked = true; 140 break; 141 } 142 } 143 144 if (!unacked) 145 bb->unacked_exist = 0; 146 } 147 148 /** 149 * badblocks_set() - Add a range of bad blocks to the table. 150 * @bb: the badblocks structure that holds all badblock information 151 * @s: first sector to mark as bad 152 * @sectors: number of sectors to mark as bad 153 * @acknowledged: weather to mark the bad sectors as acknowledged 154 * 155 * This might extend the table, or might contract it if two adjacent ranges 156 * can be merged. We binary-search to find the 'insertion' point, then 157 * decide how best to handle it. 158 * 159 * Return: 160 * 0: success 161 * 1: failed to set badblocks (out of space) 162 */ 163 int badblocks_set(struct badblocks *bb, sector_t s, int sectors, 164 int acknowledged) 165 { 166 u64 *p; 167 int lo, hi; 168 int rv = 0; 169 unsigned long flags; 170 171 if (bb->shift < 0) 172 /* badblocks are disabled */ 173 return 1; 174 175 if (bb->shift) { 176 /* round the start down, and the end up */ 177 sector_t next = s + sectors; 178 179 s >>= bb->shift; 180 next += (1<<bb->shift) - 1; 181 next >>= bb->shift; 182 sectors = next - s; 183 } 184 185 write_seqlock_irqsave(&bb->lock, flags); 186 187 p = bb->page; 188 lo = 0; 189 hi = bb->count; 190 /* Find the last range that starts at-or-before 's' */ 191 while (hi - lo > 1) { 192 int mid = (lo + hi) / 2; 193 sector_t a = BB_OFFSET(p[mid]); 194 195 if (a <= s) 196 lo = mid; 197 else 198 hi = mid; 199 } 200 if (hi > lo && BB_OFFSET(p[lo]) > s) 201 hi = lo; 202 203 if (hi > lo) { 204 /* we found a range that might merge with the start 205 * of our new range 206 */ 207 sector_t a = BB_OFFSET(p[lo]); 208 sector_t e = a + BB_LEN(p[lo]); 209 int ack = BB_ACK(p[lo]); 210 211 if (e >= s) { 212 /* Yes, we can merge with a previous range */ 213 if (s == a && s + sectors >= e) 214 /* new range covers old */ 215 ack = acknowledged; 216 else 217 ack = ack && acknowledged; 218 219 if (e < s + sectors) 220 e = s + sectors; 221 if (e - a <= BB_MAX_LEN) { 222 p[lo] = BB_MAKE(a, e-a, ack); 223 s = e; 224 } else { 225 /* does not all fit in one range, 226 * make p[lo] maximal 227 */ 228 if (BB_LEN(p[lo]) != BB_MAX_LEN) 229 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack); 230 s = a + BB_MAX_LEN; 231 } 232 sectors = e - s; 233 } 234 } 235 if (sectors && hi < bb->count) { 236 /* 'hi' points to the first range that starts after 's'. 237 * Maybe we can merge with the start of that range 238 */ 239 sector_t a = BB_OFFSET(p[hi]); 240 sector_t e = a + BB_LEN(p[hi]); 241 int ack = BB_ACK(p[hi]); 242 243 if (a <= s + sectors) { 244 /* merging is possible */ 245 if (e <= s + sectors) { 246 /* full overlap */ 247 e = s + sectors; 248 ack = acknowledged; 249 } else 250 ack = ack && acknowledged; 251 252 a = s; 253 if (e - a <= BB_MAX_LEN) { 254 p[hi] = BB_MAKE(a, e-a, ack); 255 s = e; 256 } else { 257 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack); 258 s = a + BB_MAX_LEN; 259 } 260 sectors = e - s; 261 lo = hi; 262 hi++; 263 } 264 } 265 if (sectors == 0 && hi < bb->count) { 266 /* we might be able to combine lo and hi */ 267 /* Note: 's' is at the end of 'lo' */ 268 sector_t a = BB_OFFSET(p[hi]); 269 int lolen = BB_LEN(p[lo]); 270 int hilen = BB_LEN(p[hi]); 271 int newlen = lolen + hilen - (s - a); 272 273 if (s >= a && newlen < BB_MAX_LEN) { 274 /* yes, we can combine them */ 275 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]); 276 277 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack); 278 memmove(p + hi, p + hi + 1, 279 (bb->count - hi - 1) * 8); 280 bb->count--; 281 } 282 } 283 while (sectors) { 284 /* didn't merge (it all). 285 * Need to add a range just before 'hi' 286 */ 287 if (bb->count >= MAX_BADBLOCKS) { 288 /* No room for more */ 289 rv = 1; 290 break; 291 } else { 292 int this_sectors = sectors; 293 294 memmove(p + hi + 1, p + hi, 295 (bb->count - hi) * 8); 296 bb->count++; 297 298 if (this_sectors > BB_MAX_LEN) 299 this_sectors = BB_MAX_LEN; 300 p[hi] = BB_MAKE(s, this_sectors, acknowledged); 301 sectors -= this_sectors; 302 s += this_sectors; 303 } 304 } 305 306 bb->changed = 1; 307 if (!acknowledged) 308 bb->unacked_exist = 1; 309 else 310 badblocks_update_acked(bb); 311 write_sequnlock_irqrestore(&bb->lock, flags); 312 313 return rv; 314 } 315 EXPORT_SYMBOL_GPL(badblocks_set); 316 317 /** 318 * badblocks_clear() - Remove a range of bad blocks to the table. 319 * @bb: the badblocks structure that holds all badblock information 320 * @s: first sector to mark as bad 321 * @sectors: number of sectors to mark as bad 322 * 323 * This may involve extending the table if we spilt a region, 324 * but it must not fail. So if the table becomes full, we just 325 * drop the remove request. 326 * 327 * Return: 328 * 0: success 329 * 1: failed to clear badblocks 330 */ 331 int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) 332 { 333 u64 *p; 334 int lo, hi; 335 sector_t target = s + sectors; 336 int rv = 0; 337 338 if (bb->shift > 0) { 339 /* When clearing we round the start up and the end down. 340 * This should not matter as the shift should align with 341 * the block size and no rounding should ever be needed. 342 * However it is better the think a block is bad when it 343 * isn't than to think a block is not bad when it is. 344 */ 345 s += (1<<bb->shift) - 1; 346 s >>= bb->shift; 347 target >>= bb->shift; 348 sectors = target - s; 349 } 350 351 write_seqlock_irq(&bb->lock); 352 353 p = bb->page; 354 lo = 0; 355 hi = bb->count; 356 /* Find the last range that starts before 'target' */ 357 while (hi - lo > 1) { 358 int mid = (lo + hi) / 2; 359 sector_t a = BB_OFFSET(p[mid]); 360 361 if (a < target) 362 lo = mid; 363 else 364 hi = mid; 365 } 366 if (hi > lo) { 367 /* p[lo] is the last range that could overlap the 368 * current range. Earlier ranges could also overlap, 369 * but only this one can overlap the end of the range. 370 */ 371 if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) && 372 (BB_OFFSET(p[lo]) < target)) { 373 /* Partial overlap, leave the tail of this range */ 374 int ack = BB_ACK(p[lo]); 375 sector_t a = BB_OFFSET(p[lo]); 376 sector_t end = a + BB_LEN(p[lo]); 377 378 if (a < s) { 379 /* we need to split this range */ 380 if (bb->count >= MAX_BADBLOCKS) { 381 rv = -ENOSPC; 382 goto out; 383 } 384 memmove(p+lo+1, p+lo, (bb->count - lo) * 8); 385 bb->count++; 386 p[lo] = BB_MAKE(a, s-a, ack); 387 lo++; 388 } 389 p[lo] = BB_MAKE(target, end - target, ack); 390 /* there is no longer an overlap */ 391 hi = lo; 392 lo--; 393 } 394 while (lo >= 0 && 395 (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) && 396 (BB_OFFSET(p[lo]) < target)) { 397 /* This range does overlap */ 398 if (BB_OFFSET(p[lo]) < s) { 399 /* Keep the early parts of this range. */ 400 int ack = BB_ACK(p[lo]); 401 sector_t start = BB_OFFSET(p[lo]); 402 403 p[lo] = BB_MAKE(start, s - start, ack); 404 /* now low doesn't overlap, so.. */ 405 break; 406 } 407 lo--; 408 } 409 /* 'lo' is strictly before, 'hi' is strictly after, 410 * anything between needs to be discarded 411 */ 412 if (hi - lo > 1) { 413 memmove(p+lo+1, p+hi, (bb->count - hi) * 8); 414 bb->count -= (hi - lo - 1); 415 } 416 } 417 418 badblocks_update_acked(bb); 419 bb->changed = 1; 420 out: 421 write_sequnlock_irq(&bb->lock); 422 return rv; 423 } 424 EXPORT_SYMBOL_GPL(badblocks_clear); 425 426 /** 427 * ack_all_badblocks() - Acknowledge all bad blocks in a list. 428 * @bb: the badblocks structure that holds all badblock information 429 * 430 * This only succeeds if ->changed is clear. It is used by 431 * in-kernel metadata updates 432 */ 433 void ack_all_badblocks(struct badblocks *bb) 434 { 435 if (bb->page == NULL || bb->changed) 436 /* no point even trying */ 437 return; 438 write_seqlock_irq(&bb->lock); 439 440 if (bb->changed == 0 && bb->unacked_exist) { 441 u64 *p = bb->page; 442 int i; 443 444 for (i = 0; i < bb->count ; i++) { 445 if (!BB_ACK(p[i])) { 446 sector_t start = BB_OFFSET(p[i]); 447 int len = BB_LEN(p[i]); 448 449 p[i] = BB_MAKE(start, len, 1); 450 } 451 } 452 bb->unacked_exist = 0; 453 } 454 write_sequnlock_irq(&bb->lock); 455 } 456 EXPORT_SYMBOL_GPL(ack_all_badblocks); 457 458 /** 459 * badblocks_show() - sysfs access to bad-blocks list 460 * @bb: the badblocks structure that holds all badblock information 461 * @page: buffer received from sysfs 462 * @unack: weather to show unacknowledged badblocks 463 * 464 * Return: 465 * Length of returned data 466 */ 467 ssize_t badblocks_show(struct badblocks *bb, char *page, int unack) 468 { 469 size_t len; 470 int i; 471 u64 *p = bb->page; 472 unsigned seq; 473 474 if (bb->shift < 0) 475 return 0; 476 477 retry: 478 seq = read_seqbegin(&bb->lock); 479 480 len = 0; 481 i = 0; 482 483 while (len < PAGE_SIZE && i < bb->count) { 484 sector_t s = BB_OFFSET(p[i]); 485 unsigned int length = BB_LEN(p[i]); 486 int ack = BB_ACK(p[i]); 487 488 i++; 489 490 if (unack && ack) 491 continue; 492 493 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", 494 (unsigned long long)s << bb->shift, 495 length << bb->shift); 496 } 497 if (unack && len == 0) 498 bb->unacked_exist = 0; 499 500 if (read_seqretry(&bb->lock, seq)) 501 goto retry; 502 503 return len; 504 } 505 EXPORT_SYMBOL_GPL(badblocks_show); 506 507 /** 508 * badblocks_store() - sysfs access to bad-blocks list 509 * @bb: the badblocks structure that holds all badblock information 510 * @page: buffer received from sysfs 511 * @len: length of data received from sysfs 512 * @unack: weather to show unacknowledged badblocks 513 * 514 * Return: 515 * Length of the buffer processed or -ve error. 516 */ 517 ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, 518 int unack) 519 { 520 unsigned long long sector; 521 int length; 522 char newline; 523 524 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { 525 case 3: 526 if (newline != '\n') 527 return -EINVAL; 528 fallthrough; 529 case 2: 530 if (length <= 0) 531 return -EINVAL; 532 break; 533 default: 534 return -EINVAL; 535 } 536 537 if (badblocks_set(bb, sector, length, !unack)) 538 return -ENOSPC; 539 else 540 return len; 541 } 542 EXPORT_SYMBOL_GPL(badblocks_store); 543 544 static int __badblocks_init(struct device *dev, struct badblocks *bb, 545 int enable) 546 { 547 bb->dev = dev; 548 bb->count = 0; 549 if (enable) 550 bb->shift = 0; 551 else 552 bb->shift = -1; 553 if (dev) 554 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL); 555 else 556 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL); 557 if (!bb->page) { 558 bb->shift = -1; 559 return -ENOMEM; 560 } 561 seqlock_init(&bb->lock); 562 563 return 0; 564 } 565 566 /** 567 * badblocks_init() - initialize the badblocks structure 568 * @bb: the badblocks structure that holds all badblock information 569 * @enable: weather to enable badblocks accounting 570 * 571 * Return: 572 * 0: success 573 * -ve errno: on error 574 */ 575 int badblocks_init(struct badblocks *bb, int enable) 576 { 577 return __badblocks_init(NULL, bb, enable); 578 } 579 EXPORT_SYMBOL_GPL(badblocks_init); 580 581 int devm_init_badblocks(struct device *dev, struct badblocks *bb) 582 { 583 if (!bb) 584 return -EINVAL; 585 return __badblocks_init(dev, bb, 1); 586 } 587 EXPORT_SYMBOL_GPL(devm_init_badblocks); 588 589 /** 590 * badblocks_exit() - free the badblocks structure 591 * @bb: the badblocks structure that holds all badblock information 592 */ 593 void badblocks_exit(struct badblocks *bb) 594 { 595 if (!bb) 596 return; 597 if (bb->dev) 598 devm_kfree(bb->dev, bb->page); 599 else 600 kfree(bb->page); 601 bb->page = NULL; 602 } 603 EXPORT_SYMBOL_GPL(badblocks_exit); 604