1 /* 2 drbd_actlog.c 3 4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5 6 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. 7 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9 10 drbd is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2, or (at your option) 13 any later version. 14 15 drbd is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with drbd; see the file COPYING. If not, write to 22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23 24 */ 25 26 #include <linux/slab.h> 27 #include <linux/drbd.h> 28 #include "drbd_int.h" 29 #include "drbd_wrappers.h" 30 31 /* We maintain a trivial checksum in our on disk activity log. 32 * With that we can ensure correct operation even when the storage 33 * device might do a partial (last) sector write while losing power. 34 */ 35 struct __packed al_transaction { 36 u32 magic; 37 u32 tr_number; 38 struct __packed { 39 u32 pos; 40 u32 extent; } updates[1 + AL_EXTENTS_PT]; 41 u32 xor_sum; 42 }; 43 44 struct update_odbm_work { 45 struct drbd_work w; 46 unsigned int enr; 47 }; 48 49 struct update_al_work { 50 struct drbd_work w; 51 struct lc_element *al_ext; 52 struct completion event; 53 unsigned int enr; 54 /* if old_enr != LC_FREE, write corresponding bitmap sector, too */ 55 unsigned int old_enr; 56 }; 57 58 struct drbd_atodb_wait { 59 atomic_t count; 60 struct completion io_done; 61 struct drbd_conf *mdev; 62 int error; 63 }; 64 65 66 int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); 67 68 void *drbd_md_get_buffer(struct drbd_conf *mdev) 69 { 70 int r; 71 72 wait_event(mdev->misc_wait, 73 (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 || 74 mdev->state.disk <= D_FAILED); 75 76 return r ? NULL : page_address(mdev->md_io_page); 77 } 78 79 void drbd_md_put_buffer(struct drbd_conf *mdev) 80 { 81 if (atomic_dec_and_test(&mdev->md_io_in_use)) 82 wake_up(&mdev->misc_wait); 83 } 84 85 static bool md_io_allowed(struct drbd_conf *mdev) 86 { 87 enum drbd_disk_state ds = mdev->state.disk; 88 return ds >= D_NEGOTIATING || ds == D_ATTACHING; 89 } 90 91 void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, 92 unsigned int *done) 93 { 94 long dt = bdev->dc.disk_timeout * HZ / 10; 95 if (dt == 0) 96 dt = MAX_SCHEDULE_TIMEOUT; 97 98 dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt); 99 if (dt == 0) 100 dev_err(DEV, "meta-data IO operation timed out\n"); 101 } 102 103 static int _drbd_md_sync_page_io(struct drbd_conf *mdev, 104 struct drbd_backing_dev *bdev, 105 struct page *page, sector_t sector, 106 int rw, int size) 107 { 108 struct bio *bio; 109 int ok; 110 111 mdev->md_io.done = 0; 112 mdev->md_io.error = -ENODEV; 113 114 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) 115 rw |= REQ_FUA | REQ_FLUSH; 116 rw |= REQ_SYNC; 117 118 bio = bio_alloc_drbd(GFP_NOIO); 119 bio->bi_bdev = bdev->md_bdev; 120 bio->bi_sector = sector; 121 ok = (bio_add_page(bio, page, size, 0) == size); 122 if (!ok) 123 goto out; 124 bio->bi_private = &mdev->md_io; 125 bio->bi_end_io = drbd_md_io_complete; 126 bio->bi_rw = rw; 127 128 if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ 129 dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); 130 ok = 0; 131 goto out; 132 } 133 134 bio_get(bio); /* one bio_put() is in the completion handler */ 135 atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ 136 if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) 137 bio_endio(bio, -EIO); 138 else 139 submit_bio(rw, bio); 140 wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done); 141 ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0; 142 143 out: 144 bio_put(bio); 145 return ok; 146 } 147 148 int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, 149 sector_t sector, int rw) 150 { 151 int logical_block_size, mask, ok; 152 int offset = 0; 153 struct page *iop = mdev->md_io_page; 154 155 D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1); 156 157 BUG_ON(!bdev->md_bdev); 158 159 logical_block_size = bdev_logical_block_size(bdev->md_bdev); 160 if (logical_block_size == 0) 161 logical_block_size = MD_SECTOR_SIZE; 162 163 /* in case logical_block_size != 512 [ s390 only? ] */ 164 if (logical_block_size != MD_SECTOR_SIZE) { 165 mask = (logical_block_size / MD_SECTOR_SIZE) - 1; 166 D_ASSERT(mask == 1 || mask == 3 || mask == 7); 167 D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE); 168 offset = sector & mask; 169 sector = sector & ~mask; 170 iop = mdev->md_io_tmpp; 171 172 if (rw & WRITE) { 173 /* these are GFP_KERNEL pages, pre-allocated 174 * on device initialization */ 175 void *p = page_address(mdev->md_io_page); 176 void *hp = page_address(mdev->md_io_tmpp); 177 178 ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, 179 READ, logical_block_size); 180 181 if (unlikely(!ok)) { 182 dev_err(DEV, "drbd_md_sync_page_io(,%llus," 183 "READ [logical_block_size!=512]) failed!\n", 184 (unsigned long long)sector); 185 return 0; 186 } 187 188 memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE); 189 } 190 } 191 192 if (sector < drbd_md_first_sector(bdev) || 193 sector > drbd_md_last_sector(bdev)) 194 dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n", 195 current->comm, current->pid, __func__, 196 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); 197 198 ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size); 199 if (unlikely(!ok)) { 200 dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", 201 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); 202 return 0; 203 } 204 205 if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) { 206 void *p = page_address(mdev->md_io_page); 207 void *hp = page_address(mdev->md_io_tmpp); 208 209 memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE); 210 } 211 212 return ok; 213 } 214 215 static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) 216 { 217 struct lc_element *al_ext; 218 struct lc_element *tmp; 219 unsigned long al_flags = 0; 220 int wake; 221 222 spin_lock_irq(&mdev->al_lock); 223 tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); 224 if (unlikely(tmp != NULL)) { 225 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 226 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { 227 wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); 228 spin_unlock_irq(&mdev->al_lock); 229 if (wake) 230 wake_up(&mdev->al_wait); 231 return NULL; 232 } 233 } 234 al_ext = lc_get(mdev->act_log, enr); 235 al_flags = mdev->act_log->flags; 236 spin_unlock_irq(&mdev->al_lock); 237 238 /* 239 if (!al_ext) { 240 if (al_flags & LC_STARVING) 241 dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n"); 242 if (al_flags & LC_DIRTY) 243 dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n"); 244 } 245 */ 246 247 return al_ext; 248 } 249 250 void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) 251 { 252 unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); 253 struct lc_element *al_ext; 254 struct update_al_work al_work; 255 256 D_ASSERT(atomic_read(&mdev->local_cnt) > 0); 257 258 wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr))); 259 260 if (al_ext->lc_number != enr) { 261 /* drbd_al_write_transaction(mdev,al_ext,enr); 262 * recurses into generic_make_request(), which 263 * disallows recursion, bios being serialized on the 264 * current->bio_tail list now. 265 * we have to delegate updates to the activity log 266 * to the worker thread. */ 267 init_completion(&al_work.event); 268 al_work.al_ext = al_ext; 269 al_work.enr = enr; 270 al_work.old_enr = al_ext->lc_number; 271 al_work.w.cb = w_al_write_transaction; 272 drbd_queue_work_front(&mdev->data.work, &al_work.w); 273 wait_for_completion(&al_work.event); 274 275 mdev->al_writ_cnt++; 276 277 spin_lock_irq(&mdev->al_lock); 278 lc_changed(mdev->act_log, al_ext); 279 spin_unlock_irq(&mdev->al_lock); 280 wake_up(&mdev->al_wait); 281 } 282 } 283 284 void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) 285 { 286 unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); 287 struct lc_element *extent; 288 unsigned long flags; 289 290 spin_lock_irqsave(&mdev->al_lock, flags); 291 292 extent = lc_find(mdev->act_log, enr); 293 294 if (!extent) { 295 spin_unlock_irqrestore(&mdev->al_lock, flags); 296 dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr); 297 return; 298 } 299 300 if (lc_put(mdev->act_log, extent) == 0) 301 wake_up(&mdev->al_wait); 302 303 spin_unlock_irqrestore(&mdev->al_lock, flags); 304 } 305 306 #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) 307 /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT 308 * are still coupled, or assume too much about their relation. 309 * Code below will not work if this is violated. 310 * Will be cleaned up with some followup patch. 311 */ 312 # error FIXME 313 #endif 314 315 static unsigned int al_extent_to_bm_page(unsigned int al_enr) 316 { 317 return al_enr >> 318 /* bit to page */ 319 ((PAGE_SHIFT + 3) - 320 /* al extent number to bit */ 321 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); 322 } 323 324 static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) 325 { 326 return rs_enr >> 327 /* bit to page */ 328 ((PAGE_SHIFT + 3) - 329 /* al extent number to bit */ 330 (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); 331 } 332 333 int 334 w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) 335 { 336 struct update_al_work *aw = container_of(w, struct update_al_work, w); 337 struct lc_element *updated = aw->al_ext; 338 const unsigned int new_enr = aw->enr; 339 const unsigned int evicted = aw->old_enr; 340 struct al_transaction *buffer; 341 sector_t sector; 342 int i, n, mx; 343 unsigned int extent_nr; 344 u32 xor_sum = 0; 345 346 if (!get_ldev(mdev)) { 347 dev_err(DEV, 348 "disk is %s, cannot start al transaction (-%d +%d)\n", 349 drbd_disk_str(mdev->state.disk), evicted, new_enr); 350 complete(&((struct update_al_work *)w)->event); 351 return 1; 352 } 353 /* do we have to do a bitmap write, first? 354 * TODO reduce maximum latency: 355 * submit both bios, then wait for both, 356 * instead of doing two synchronous sector writes. 357 * For now, we must not write the transaction, 358 * if we cannot write out the bitmap of the evicted extent. */ 359 if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) 360 drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); 361 362 /* The bitmap write may have failed, causing a state change. */ 363 if (mdev->state.disk < D_INCONSISTENT) { 364 dev_err(DEV, 365 "disk is %s, cannot write al transaction (-%d +%d)\n", 366 drbd_disk_str(mdev->state.disk), evicted, new_enr); 367 complete(&((struct update_al_work *)w)->event); 368 put_ldev(mdev); 369 return 1; 370 } 371 372 buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ 373 if (!buffer) { 374 dev_err(DEV, "disk failed while waiting for md_io buffer\n"); 375 complete(&((struct update_al_work *)w)->event); 376 put_ldev(mdev); 377 return 1; 378 } 379 380 buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); 381 buffer->tr_number = cpu_to_be32(mdev->al_tr_number); 382 383 n = lc_index_of(mdev->act_log, updated); 384 385 buffer->updates[0].pos = cpu_to_be32(n); 386 buffer->updates[0].extent = cpu_to_be32(new_enr); 387 388 xor_sum ^= new_enr; 389 390 mx = min_t(int, AL_EXTENTS_PT, 391 mdev->act_log->nr_elements - mdev->al_tr_cycle); 392 for (i = 0; i < mx; i++) { 393 unsigned idx = mdev->al_tr_cycle + i; 394 extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; 395 buffer->updates[i+1].pos = cpu_to_be32(idx); 396 buffer->updates[i+1].extent = cpu_to_be32(extent_nr); 397 xor_sum ^= extent_nr; 398 } 399 for (; i < AL_EXTENTS_PT; i++) { 400 buffer->updates[i+1].pos = __constant_cpu_to_be32(-1); 401 buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE); 402 xor_sum ^= LC_FREE; 403 } 404 mdev->al_tr_cycle += AL_EXTENTS_PT; 405 if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) 406 mdev->al_tr_cycle = 0; 407 408 buffer->xor_sum = cpu_to_be32(xor_sum); 409 410 sector = mdev->ldev->md.md_offset 411 + mdev->ldev->md.al_offset + mdev->al_tr_pos; 412 413 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) 414 drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); 415 416 if (++mdev->al_tr_pos > 417 div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) 418 mdev->al_tr_pos = 0; 419 420 D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); 421 mdev->al_tr_number++; 422 423 drbd_md_put_buffer(mdev); 424 425 complete(&((struct update_al_work *)w)->event); 426 put_ldev(mdev); 427 428 return 1; 429 } 430 431 /** 432 * drbd_al_read_tr() - Read a single transaction from the on disk activity log 433 * @mdev: DRBD device. 434 * @bdev: Block device to read form. 435 * @b: pointer to an al_transaction. 436 * @index: On disk slot of the transaction to read. 437 * 438 * Returns -1 on IO error, 0 on checksum error and 1 upon success. 439 */ 440 static int drbd_al_read_tr(struct drbd_conf *mdev, 441 struct drbd_backing_dev *bdev, 442 struct al_transaction *b, 443 int index) 444 { 445 sector_t sector; 446 int rv, i; 447 u32 xor_sum = 0; 448 449 sector = bdev->md.md_offset + bdev->md.al_offset + index; 450 451 /* Dont process error normally, 452 * as this is done before disk is attached! */ 453 if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) 454 return -1; 455 456 rv = (be32_to_cpu(b->magic) == DRBD_MAGIC); 457 458 for (i = 0; i < AL_EXTENTS_PT + 1; i++) 459 xor_sum ^= be32_to_cpu(b->updates[i].extent); 460 rv &= (xor_sum == be32_to_cpu(b->xor_sum)); 461 462 return rv; 463 } 464 465 /** 466 * drbd_al_read_log() - Restores the activity log from its on disk representation. 467 * @mdev: DRBD device. 468 * @bdev: Block device to read form. 469 * 470 * Returns 1 on success, returns 0 when reading the log failed due to IO errors. 471 */ 472 int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) 473 { 474 struct al_transaction *buffer; 475 int i; 476 int rv; 477 int mx; 478 int active_extents = 0; 479 int transactions = 0; 480 int found_valid = 0; 481 int from = 0; 482 int to = 0; 483 u32 from_tnr = 0; 484 u32 to_tnr = 0; 485 u32 cnr; 486 487 mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT); 488 489 /* lock out all other meta data io for now, 490 * and make sure the page is mapped. 491 */ 492 buffer = drbd_md_get_buffer(mdev); 493 if (!buffer) 494 return 0; 495 496 /* Find the valid transaction in the log */ 497 for (i = 0; i <= mx; i++) { 498 rv = drbd_al_read_tr(mdev, bdev, buffer, i); 499 if (rv == 0) 500 continue; 501 if (rv == -1) { 502 drbd_md_put_buffer(mdev); 503 return 0; 504 } 505 cnr = be32_to_cpu(buffer->tr_number); 506 507 if (++found_valid == 1) { 508 from = i; 509 to = i; 510 from_tnr = cnr; 511 to_tnr = cnr; 512 continue; 513 } 514 if ((int)cnr - (int)from_tnr < 0) { 515 D_ASSERT(from_tnr - cnr + i - from == mx+1); 516 from = i; 517 from_tnr = cnr; 518 } 519 if ((int)cnr - (int)to_tnr > 0) { 520 D_ASSERT(cnr - to_tnr == i - to); 521 to = i; 522 to_tnr = cnr; 523 } 524 } 525 526 if (!found_valid) { 527 dev_warn(DEV, "No usable activity log found.\n"); 528 drbd_md_put_buffer(mdev); 529 return 1; 530 } 531 532 /* Read the valid transactions. 533 * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ 534 i = from; 535 while (1) { 536 int j, pos; 537 unsigned int extent_nr; 538 unsigned int trn; 539 540 rv = drbd_al_read_tr(mdev, bdev, buffer, i); 541 ERR_IF(rv == 0) goto cancel; 542 if (rv == -1) { 543 drbd_md_put_buffer(mdev); 544 return 0; 545 } 546 547 trn = be32_to_cpu(buffer->tr_number); 548 549 spin_lock_irq(&mdev->al_lock); 550 551 /* This loop runs backwards because in the cyclic 552 elements there might be an old version of the 553 updated element (in slot 0). So the element in slot 0 554 can overwrite old versions. */ 555 for (j = AL_EXTENTS_PT; j >= 0; j--) { 556 pos = be32_to_cpu(buffer->updates[j].pos); 557 extent_nr = be32_to_cpu(buffer->updates[j].extent); 558 559 if (extent_nr == LC_FREE) 560 continue; 561 562 lc_set(mdev->act_log, extent_nr, pos); 563 active_extents++; 564 } 565 spin_unlock_irq(&mdev->al_lock); 566 567 transactions++; 568 569 cancel: 570 if (i == to) 571 break; 572 i++; 573 if (i > mx) 574 i = 0; 575 } 576 577 mdev->al_tr_number = to_tnr+1; 578 mdev->al_tr_pos = to; 579 if (++mdev->al_tr_pos > 580 div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) 581 mdev->al_tr_pos = 0; 582 583 /* ok, we are done with it */ 584 drbd_md_put_buffer(mdev); 585 586 dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", 587 transactions, active_extents); 588 589 return 1; 590 } 591 592 /** 593 * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents 594 * @mdev: DRBD device. 595 */ 596 void drbd_al_apply_to_bm(struct drbd_conf *mdev) 597 { 598 unsigned int enr; 599 unsigned long add = 0; 600 char ppb[10]; 601 int i, tmp; 602 603 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); 604 605 for (i = 0; i < mdev->act_log->nr_elements; i++) { 606 enr = lc_element_by_index(mdev->act_log, i)->lc_number; 607 if (enr == LC_FREE) 608 continue; 609 tmp = drbd_bm_ALe_set_all(mdev, enr); 610 dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr); 611 add += tmp; 612 } 613 614 lc_unlock(mdev->act_log); 615 wake_up(&mdev->al_wait); 616 617 dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n", 618 ppsize(ppb, Bit2KB(add))); 619 } 620 621 static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) 622 { 623 int rv; 624 625 spin_lock_irq(&mdev->al_lock); 626 rv = (al_ext->refcnt == 0); 627 if (likely(rv)) 628 lc_del(mdev->act_log, al_ext); 629 spin_unlock_irq(&mdev->al_lock); 630 631 return rv; 632 } 633 634 /** 635 * drbd_al_shrink() - Removes all active extents form the activity log 636 * @mdev: DRBD device. 637 * 638 * Removes all active extents form the activity log, waiting until 639 * the reference count of each entry dropped to 0 first, of course. 640 * 641 * You need to lock mdev->act_log with lc_try_lock() / lc_unlock() 642 */ 643 void drbd_al_shrink(struct drbd_conf *mdev) 644 { 645 struct lc_element *al_ext; 646 int i; 647 648 D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); 649 650 for (i = 0; i < mdev->act_log->nr_elements; i++) { 651 al_ext = lc_element_by_index(mdev->act_log, i); 652 if (al_ext->lc_number == LC_FREE) 653 continue; 654 wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext)); 655 } 656 657 wake_up(&mdev->al_wait); 658 } 659 660 static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused) 661 { 662 struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); 663 664 if (!get_ldev(mdev)) { 665 if (__ratelimit(&drbd_ratelimit_state)) 666 dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n"); 667 kfree(udw); 668 return 1; 669 } 670 671 drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr)); 672 put_ldev(mdev); 673 674 kfree(udw); 675 676 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) { 677 switch (mdev->state.conn) { 678 case C_SYNC_SOURCE: case C_SYNC_TARGET: 679 case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T: 680 drbd_resync_finished(mdev); 681 default: 682 /* nothing to do */ 683 break; 684 } 685 } 686 drbd_bcast_sync_progress(mdev); 687 688 return 1; 689 } 690 691 692 /* ATTENTION. The AL's extents are 4MB each, while the extents in the 693 * resync LRU-cache are 16MB each. 694 * The caller of this function has to hold an get_ldev() reference. 695 * 696 * TODO will be obsoleted once we have a caching lru of the on disk bitmap 697 */ 698 static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, 699 int count, int success) 700 { 701 struct lc_element *e; 702 struct update_odbm_work *udw; 703 704 unsigned int enr; 705 706 D_ASSERT(atomic_read(&mdev->local_cnt)); 707 708 /* I simply assume that a sector/size pair never crosses 709 * a 16 MB extent border. (Currently this is true...) */ 710 enr = BM_SECT_TO_EXT(sector); 711 712 e = lc_get(mdev->resync, enr); 713 if (e) { 714 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); 715 if (ext->lce.lc_number == enr) { 716 if (success) 717 ext->rs_left -= count; 718 else 719 ext->rs_failed += count; 720 if (ext->rs_left < ext->rs_failed) { 721 dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d " 722 "rs_failed=%d count=%d cstate=%s\n", 723 (unsigned long long)sector, 724 ext->lce.lc_number, ext->rs_left, 725 ext->rs_failed, count, 726 drbd_conn_str(mdev->state.conn)); 727 728 /* We don't expect to be able to clear more bits 729 * than have been set when we originally counted 730 * the set bits to cache that value in ext->rs_left. 731 * Whatever the reason (disconnect during resync, 732 * delayed local completion of an application write), 733 * try to fix it up by recounting here. */ 734 ext->rs_left = drbd_bm_e_weight(mdev, enr); 735 } 736 } else { 737 /* Normally this element should be in the cache, 738 * since drbd_rs_begin_io() pulled it already in. 739 * 740 * But maybe an application write finished, and we set 741 * something outside the resync lru_cache in sync. 742 */ 743 int rs_left = drbd_bm_e_weight(mdev, enr); 744 if (ext->flags != 0) { 745 dev_warn(DEV, "changing resync lce: %d[%u;%02lx]" 746 " -> %d[%u;00]\n", 747 ext->lce.lc_number, ext->rs_left, 748 ext->flags, enr, rs_left); 749 ext->flags = 0; 750 } 751 if (ext->rs_failed) { 752 dev_warn(DEV, "Kicking resync_lru element enr=%u " 753 "out with rs_failed=%d\n", 754 ext->lce.lc_number, ext->rs_failed); 755 } 756 ext->rs_left = rs_left; 757 ext->rs_failed = success ? 0 : count; 758 lc_changed(mdev->resync, &ext->lce); 759 } 760 lc_put(mdev->resync, &ext->lce); 761 /* no race, we are within the al_lock! */ 762 763 if (ext->rs_left == ext->rs_failed) { 764 ext->rs_failed = 0; 765 766 udw = kmalloc(sizeof(*udw), GFP_ATOMIC); 767 if (udw) { 768 udw->enr = ext->lce.lc_number; 769 udw->w.cb = w_update_odbm; 770 drbd_queue_work_front(&mdev->data.work, &udw->w); 771 } else { 772 dev_warn(DEV, "Could not kmalloc an udw\n"); 773 } 774 } 775 } else { 776 dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n", 777 mdev->resync_locked, 778 mdev->resync->nr_elements, 779 mdev->resync->flags); 780 } 781 } 782 783 void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go) 784 { 785 unsigned long now = jiffies; 786 unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark]; 787 int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS; 788 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { 789 if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go && 790 mdev->state.conn != C_PAUSED_SYNC_T && 791 mdev->state.conn != C_PAUSED_SYNC_S) { 792 mdev->rs_mark_time[next] = now; 793 mdev->rs_mark_left[next] = still_to_go; 794 mdev->rs_last_mark = next; 795 } 796 } 797 } 798 799 /* clear the bit corresponding to the piece of storage in question: 800 * size byte of data starting from sector. Only clear a bits of the affected 801 * one ore more _aligned_ BM_BLOCK_SIZE blocks. 802 * 803 * called by worker on C_SYNC_TARGET and receiver on SyncSource. 804 * 805 */ 806 void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, 807 const char *file, const unsigned int line) 808 { 809 /* Is called from worker and receiver context _only_ */ 810 unsigned long sbnr, ebnr, lbnr; 811 unsigned long count = 0; 812 sector_t esector, nr_sectors; 813 int wake_up = 0; 814 unsigned long flags; 815 816 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { 817 dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", 818 (unsigned long long)sector, size); 819 return; 820 } 821 nr_sectors = drbd_get_capacity(mdev->this_bdev); 822 esector = sector + (size >> 9) - 1; 823 824 ERR_IF(sector >= nr_sectors) return; 825 ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); 826 827 lbnr = BM_SECT_TO_BIT(nr_sectors-1); 828 829 /* we clear it (in sync). 830 * round up start sector, round down end sector. we make sure we only 831 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */ 832 if (unlikely(esector < BM_SECT_PER_BIT-1)) 833 return; 834 if (unlikely(esector == (nr_sectors-1))) 835 ebnr = lbnr; 836 else 837 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 838 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 839 840 if (sbnr > ebnr) 841 return; 842 843 /* 844 * ok, (capacity & 7) != 0 sometimes, but who cares... 845 * we count rs_{total,left} in bits, not sectors. 846 */ 847 count = drbd_bm_clear_bits(mdev, sbnr, ebnr); 848 if (count && get_ldev(mdev)) { 849 drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev)); 850 spin_lock_irqsave(&mdev->al_lock, flags); 851 drbd_try_clear_on_disk_bm(mdev, sector, count, true); 852 spin_unlock_irqrestore(&mdev->al_lock, flags); 853 854 /* just wake_up unconditional now, various lc_chaged(), 855 * lc_put() in drbd_try_clear_on_disk_bm(). */ 856 wake_up = 1; 857 put_ldev(mdev); 858 } 859 if (wake_up) 860 wake_up(&mdev->al_wait); 861 } 862 863 /* 864 * this is intended to set one request worth of data out of sync. 865 * affects at least 1 bit, 866 * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. 867 * 868 * called by tl_clear and drbd_send_dblock (==drbd_make_request). 869 * so this can be _any_ process. 870 */ 871 int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, 872 const char *file, const unsigned int line) 873 { 874 unsigned long sbnr, ebnr, lbnr, flags; 875 sector_t esector, nr_sectors; 876 unsigned int enr, count = 0; 877 struct lc_element *e; 878 879 /* this should be an empty REQ_FLUSH */ 880 if (size == 0) 881 return 0; 882 883 if (size < 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { 884 dev_err(DEV, "sector: %llus, size: %d\n", 885 (unsigned long long)sector, size); 886 return 0; 887 } 888 889 if (!get_ldev(mdev)) 890 return 0; /* no disk, no metadata, no bitmap to set bits in */ 891 892 nr_sectors = drbd_get_capacity(mdev->this_bdev); 893 esector = sector + (size >> 9) - 1; 894 895 ERR_IF(sector >= nr_sectors) 896 goto out; 897 ERR_IF(esector >= nr_sectors) 898 esector = (nr_sectors-1); 899 900 lbnr = BM_SECT_TO_BIT(nr_sectors-1); 901 902 /* we set it out of sync, 903 * we do not need to round anything here */ 904 sbnr = BM_SECT_TO_BIT(sector); 905 ebnr = BM_SECT_TO_BIT(esector); 906 907 /* ok, (capacity & 7) != 0 sometimes, but who cares... 908 * we count rs_{total,left} in bits, not sectors. */ 909 spin_lock_irqsave(&mdev->al_lock, flags); 910 count = drbd_bm_set_bits(mdev, sbnr, ebnr); 911 912 enr = BM_SECT_TO_EXT(sector); 913 e = lc_find(mdev->resync, enr); 914 if (e) 915 lc_entry(e, struct bm_extent, lce)->rs_left += count; 916 spin_unlock_irqrestore(&mdev->al_lock, flags); 917 918 out: 919 put_ldev(mdev); 920 921 return count; 922 } 923 924 static 925 struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) 926 { 927 struct lc_element *e; 928 struct bm_extent *bm_ext; 929 int wakeup = 0; 930 unsigned long rs_flags; 931 932 spin_lock_irq(&mdev->al_lock); 933 if (mdev->resync_locked > mdev->resync->nr_elements/2) { 934 spin_unlock_irq(&mdev->al_lock); 935 return NULL; 936 } 937 e = lc_get(mdev->resync, enr); 938 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 939 if (bm_ext) { 940 if (bm_ext->lce.lc_number != enr) { 941 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); 942 bm_ext->rs_failed = 0; 943 lc_changed(mdev->resync, &bm_ext->lce); 944 wakeup = 1; 945 } 946 if (bm_ext->lce.refcnt == 1) 947 mdev->resync_locked++; 948 set_bit(BME_NO_WRITES, &bm_ext->flags); 949 } 950 rs_flags = mdev->resync->flags; 951 spin_unlock_irq(&mdev->al_lock); 952 if (wakeup) 953 wake_up(&mdev->al_wait); 954 955 if (!bm_ext) { 956 if (rs_flags & LC_STARVING) 957 dev_warn(DEV, "Have to wait for element" 958 " (resync LRU too small?)\n"); 959 BUG_ON(rs_flags & LC_DIRTY); 960 } 961 962 return bm_ext; 963 } 964 965 static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) 966 { 967 struct lc_element *al_ext; 968 int rv = 0; 969 970 spin_lock_irq(&mdev->al_lock); 971 if (unlikely(enr == mdev->act_log->new_number)) 972 rv = 1; 973 else { 974 al_ext = lc_find(mdev->act_log, enr); 975 if (al_ext) { 976 if (al_ext->refcnt) 977 rv = 1; 978 } 979 } 980 spin_unlock_irq(&mdev->al_lock); 981 982 /* 983 if (unlikely(rv)) { 984 dev_info(DEV, "Delaying sync read until app's write is done\n"); 985 } 986 */ 987 return rv; 988 } 989 990 /** 991 * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED 992 * @mdev: DRBD device. 993 * @sector: The sector number. 994 * 995 * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted. 996 */ 997 int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) 998 { 999 unsigned int enr = BM_SECT_TO_EXT(sector); 1000 struct bm_extent *bm_ext; 1001 int i, sig; 1002 int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait. 1003 200 times -> 20 seconds. */ 1004 1005 retry: 1006 sig = wait_event_interruptible(mdev->al_wait, 1007 (bm_ext = _bme_get(mdev, enr))); 1008 if (sig) 1009 return -EINTR; 1010 1011 if (test_bit(BME_LOCKED, &bm_ext->flags)) 1012 return 0; 1013 1014 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1015 sig = wait_event_interruptible(mdev->al_wait, 1016 !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) || 1017 test_bit(BME_PRIORITY, &bm_ext->flags)); 1018 1019 if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) { 1020 spin_lock_irq(&mdev->al_lock); 1021 if (lc_put(mdev->resync, &bm_ext->lce) == 0) { 1022 bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ 1023 mdev->resync_locked--; 1024 wake_up(&mdev->al_wait); 1025 } 1026 spin_unlock_irq(&mdev->al_lock); 1027 if (sig) 1028 return -EINTR; 1029 if (schedule_timeout_interruptible(HZ/10)) 1030 return -EINTR; 1031 if (sa && --sa == 0) 1032 dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec." 1033 "Resync stalled?\n"); 1034 goto retry; 1035 } 1036 } 1037 set_bit(BME_LOCKED, &bm_ext->flags); 1038 return 0; 1039 } 1040 1041 /** 1042 * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep 1043 * @mdev: DRBD device. 1044 * @sector: The sector number. 1045 * 1046 * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then 1047 * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN 1048 * if there is still application IO going on in this area. 1049 */ 1050 int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) 1051 { 1052 unsigned int enr = BM_SECT_TO_EXT(sector); 1053 const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; 1054 struct lc_element *e; 1055 struct bm_extent *bm_ext; 1056 int i; 1057 1058 spin_lock_irq(&mdev->al_lock); 1059 if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) { 1060 /* in case you have very heavy scattered io, it may 1061 * stall the syncer undefined if we give up the ref count 1062 * when we try again and requeue. 1063 * 1064 * if we don't give up the refcount, but the next time 1065 * we are scheduled this extent has been "synced" by new 1066 * application writes, we'd miss the lc_put on the 1067 * extent we keep the refcount on. 1068 * so we remembered which extent we had to try again, and 1069 * if the next requested one is something else, we do 1070 * the lc_put here... 1071 * we also have to wake_up 1072 */ 1073 e = lc_find(mdev->resync, mdev->resync_wenr); 1074 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1075 if (bm_ext) { 1076 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); 1077 D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags)); 1078 clear_bit(BME_NO_WRITES, &bm_ext->flags); 1079 mdev->resync_wenr = LC_FREE; 1080 if (lc_put(mdev->resync, &bm_ext->lce) == 0) 1081 mdev->resync_locked--; 1082 wake_up(&mdev->al_wait); 1083 } else { 1084 dev_alert(DEV, "LOGIC BUG\n"); 1085 } 1086 } 1087 /* TRY. */ 1088 e = lc_try_get(mdev->resync, enr); 1089 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1090 if (bm_ext) { 1091 if (test_bit(BME_LOCKED, &bm_ext->flags)) 1092 goto proceed; 1093 if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) { 1094 mdev->resync_locked++; 1095 } else { 1096 /* we did set the BME_NO_WRITES, 1097 * but then could not set BME_LOCKED, 1098 * so we tried again. 1099 * drop the extra reference. */ 1100 bm_ext->lce.refcnt--; 1101 D_ASSERT(bm_ext->lce.refcnt > 0); 1102 } 1103 goto check_al; 1104 } else { 1105 /* do we rather want to try later? */ 1106 if (mdev->resync_locked > mdev->resync->nr_elements-3) 1107 goto try_again; 1108 /* Do or do not. There is no try. -- Yoda */ 1109 e = lc_get(mdev->resync, enr); 1110 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1111 if (!bm_ext) { 1112 const unsigned long rs_flags = mdev->resync->flags; 1113 if (rs_flags & LC_STARVING) 1114 dev_warn(DEV, "Have to wait for element" 1115 " (resync LRU too small?)\n"); 1116 BUG_ON(rs_flags & LC_DIRTY); 1117 goto try_again; 1118 } 1119 if (bm_ext->lce.lc_number != enr) { 1120 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); 1121 bm_ext->rs_failed = 0; 1122 lc_changed(mdev->resync, &bm_ext->lce); 1123 wake_up(&mdev->al_wait); 1124 D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); 1125 } 1126 set_bit(BME_NO_WRITES, &bm_ext->flags); 1127 D_ASSERT(bm_ext->lce.refcnt == 1); 1128 mdev->resync_locked++; 1129 goto check_al; 1130 } 1131 check_al: 1132 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1133 if (unlikely(al_enr+i == mdev->act_log->new_number)) 1134 goto try_again; 1135 if (lc_is_used(mdev->act_log, al_enr+i)) 1136 goto try_again; 1137 } 1138 set_bit(BME_LOCKED, &bm_ext->flags); 1139 proceed: 1140 mdev->resync_wenr = LC_FREE; 1141 spin_unlock_irq(&mdev->al_lock); 1142 return 0; 1143 1144 try_again: 1145 if (bm_ext) 1146 mdev->resync_wenr = enr; 1147 spin_unlock_irq(&mdev->al_lock); 1148 return -EAGAIN; 1149 } 1150 1151 void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) 1152 { 1153 unsigned int enr = BM_SECT_TO_EXT(sector); 1154 struct lc_element *e; 1155 struct bm_extent *bm_ext; 1156 unsigned long flags; 1157 1158 spin_lock_irqsave(&mdev->al_lock, flags); 1159 e = lc_find(mdev->resync, enr); 1160 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1161 if (!bm_ext) { 1162 spin_unlock_irqrestore(&mdev->al_lock, flags); 1163 if (__ratelimit(&drbd_ratelimit_state)) 1164 dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n"); 1165 return; 1166 } 1167 1168 if (bm_ext->lce.refcnt == 0) { 1169 spin_unlock_irqrestore(&mdev->al_lock, flags); 1170 dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, " 1171 "but refcnt is 0!?\n", 1172 (unsigned long long)sector, enr); 1173 return; 1174 } 1175 1176 if (lc_put(mdev->resync, &bm_ext->lce) == 0) { 1177 bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */ 1178 mdev->resync_locked--; 1179 wake_up(&mdev->al_wait); 1180 } 1181 1182 spin_unlock_irqrestore(&mdev->al_lock, flags); 1183 } 1184 1185 /** 1186 * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED) 1187 * @mdev: DRBD device. 1188 */ 1189 void drbd_rs_cancel_all(struct drbd_conf *mdev) 1190 { 1191 spin_lock_irq(&mdev->al_lock); 1192 1193 if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */ 1194 lc_reset(mdev->resync); 1195 put_ldev(mdev); 1196 } 1197 mdev->resync_locked = 0; 1198 mdev->resync_wenr = LC_FREE; 1199 spin_unlock_irq(&mdev->al_lock); 1200 wake_up(&mdev->al_wait); 1201 } 1202 1203 /** 1204 * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU 1205 * @mdev: DRBD device. 1206 * 1207 * Returns 0 upon success, -EAGAIN if at least one reference count was 1208 * not zero. 1209 */ 1210 int drbd_rs_del_all(struct drbd_conf *mdev) 1211 { 1212 struct lc_element *e; 1213 struct bm_extent *bm_ext; 1214 int i; 1215 1216 spin_lock_irq(&mdev->al_lock); 1217 1218 if (get_ldev_if_state(mdev, D_FAILED)) { 1219 /* ok, ->resync is there. */ 1220 for (i = 0; i < mdev->resync->nr_elements; i++) { 1221 e = lc_element_by_index(mdev->resync, i); 1222 bm_ext = lc_entry(e, struct bm_extent, lce); 1223 if (bm_ext->lce.lc_number == LC_FREE) 1224 continue; 1225 if (bm_ext->lce.lc_number == mdev->resync_wenr) { 1226 dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently" 1227 " got 'synced' by application io\n", 1228 mdev->resync_wenr); 1229 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); 1230 D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags)); 1231 clear_bit(BME_NO_WRITES, &bm_ext->flags); 1232 mdev->resync_wenr = LC_FREE; 1233 lc_put(mdev->resync, &bm_ext->lce); 1234 } 1235 if (bm_ext->lce.refcnt != 0) { 1236 dev_info(DEV, "Retrying drbd_rs_del_all() later. " 1237 "refcnt=%d\n", bm_ext->lce.refcnt); 1238 put_ldev(mdev); 1239 spin_unlock_irq(&mdev->al_lock); 1240 return -EAGAIN; 1241 } 1242 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); 1243 D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags)); 1244 lc_del(mdev->resync, &bm_ext->lce); 1245 } 1246 D_ASSERT(mdev->resync->used == 0); 1247 put_ldev(mdev); 1248 } 1249 spin_unlock_irq(&mdev->al_lock); 1250 wake_up(&mdev->al_wait); 1251 1252 return 0; 1253 } 1254 1255 /** 1256 * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks 1257 * @mdev: DRBD device. 1258 * @sector: The sector number. 1259 * @size: Size of failed IO operation, in byte. 1260 */ 1261 void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) 1262 { 1263 /* Is called from worker and receiver context _only_ */ 1264 unsigned long sbnr, ebnr, lbnr; 1265 unsigned long count; 1266 sector_t esector, nr_sectors; 1267 int wake_up = 0; 1268 1269 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { 1270 dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", 1271 (unsigned long long)sector, size); 1272 return; 1273 } 1274 nr_sectors = drbd_get_capacity(mdev->this_bdev); 1275 esector = sector + (size >> 9) - 1; 1276 1277 ERR_IF(sector >= nr_sectors) return; 1278 ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); 1279 1280 lbnr = BM_SECT_TO_BIT(nr_sectors-1); 1281 1282 /* 1283 * round up start sector, round down end sector. we make sure we only 1284 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */ 1285 if (unlikely(esector < BM_SECT_PER_BIT-1)) 1286 return; 1287 if (unlikely(esector == (nr_sectors-1))) 1288 ebnr = lbnr; 1289 else 1290 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 1291 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 1292 1293 if (sbnr > ebnr) 1294 return; 1295 1296 /* 1297 * ok, (capacity & 7) != 0 sometimes, but who cares... 1298 * we count rs_{total,left} in bits, not sectors. 1299 */ 1300 spin_lock_irq(&mdev->al_lock); 1301 count = drbd_bm_count_bits(mdev, sbnr, ebnr); 1302 if (count) { 1303 mdev->rs_failed += count; 1304 1305 if (get_ldev(mdev)) { 1306 drbd_try_clear_on_disk_bm(mdev, sector, count, false); 1307 put_ldev(mdev); 1308 } 1309 1310 /* just wake_up unconditional now, various lc_chaged(), 1311 * lc_put() in drbd_try_clear_on_disk_bm(). */ 1312 wake_up = 1; 1313 } 1314 spin_unlock_irq(&mdev->al_lock); 1315 if (wake_up) 1316 wake_up(&mdev->al_wait); 1317 } 1318