1 /* 2 drbd_actlog.c 3 4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5 6 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. 7 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9 10 drbd is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2, or (at your option) 13 any later version. 14 15 drbd is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with drbd; see the file COPYING. If not, write to 22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23 24 */ 25 26 #include <linux/slab.h> 27 #include <linux/crc32c.h> 28 #include <linux/drbd.h> 29 #include <linux/drbd_limits.h> 30 #include <linux/dynamic_debug.h> 31 #include "drbd_int.h" 32 33 34 enum al_transaction_types { 35 AL_TR_UPDATE = 0, 36 AL_TR_INITIALIZED = 0xffff 37 }; 38 /* all fields on disc in big endian */ 39 struct __packed al_transaction_on_disk { 40 /* don't we all like magic */ 41 __be32 magic; 42 43 /* to identify the most recent transaction block 44 * in the on disk ring buffer */ 45 __be32 tr_number; 46 47 /* checksum on the full 4k block, with this field set to 0. */ 48 __be32 crc32c; 49 50 /* type of transaction, special transaction types like: 51 * purge-all, set-all-idle, set-all-active, ... to-be-defined 52 * see also enum al_transaction_types */ 53 __be16 transaction_type; 54 55 /* we currently allow only a few thousand extents, 56 * so 16bit will be enough for the slot number. */ 57 58 /* how many updates in this transaction */ 59 __be16 n_updates; 60 61 /* maximum slot number, "al-extents" in drbd.conf speak. 62 * Having this in each transaction should make reconfiguration 63 * of that parameter easier. */ 64 __be16 context_size; 65 66 /* slot number the context starts with */ 67 __be16 context_start_slot_nr; 68 69 /* Some reserved bytes. Expected usage is a 64bit counter of 70 * sectors-written since device creation, and other data generation tag 71 * supporting usage */ 72 __be32 __reserved[4]; 73 74 /* --- 36 byte used --- */ 75 76 /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes 77 * in one transaction, then use the remaining byte in the 4k block for 78 * context information. "Flexible" number of updates per transaction 79 * does not help, as we have to account for the case when all update 80 * slots are used anyways, so it would only complicate code without 81 * additional benefit. 82 */ 83 __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION]; 84 85 /* but the extent number is 32bit, which at an extent size of 4 MiB 86 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */ 87 __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION]; 88 89 /* --- 420 bytes used (36 + 64*6) --- */ 90 91 /* 4096 - 420 = 3676 = 919 * 4 */ 92 __be32 context[AL_CONTEXT_PER_TRANSACTION]; 93 }; 94 95 struct update_al_work { 96 struct drbd_work w; 97 struct drbd_device *device; 98 struct completion event; 99 int err; 100 }; 101 102 103 void *drbd_md_get_buffer(struct drbd_device *device) 104 { 105 int r; 106 107 wait_event(device->misc_wait, 108 (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 || 109 device->state.disk <= D_FAILED); 110 111 return r ? NULL : page_address(device->md_io_page); 112 } 113 114 void drbd_md_put_buffer(struct drbd_device *device) 115 { 116 if (atomic_dec_and_test(&device->md_io_in_use)) 117 wake_up(&device->misc_wait); 118 } 119 120 void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev, 121 unsigned int *done) 122 { 123 long dt; 124 125 rcu_read_lock(); 126 dt = rcu_dereference(bdev->disk_conf)->disk_timeout; 127 rcu_read_unlock(); 128 dt = dt * HZ / 10; 129 if (dt == 0) 130 dt = MAX_SCHEDULE_TIMEOUT; 131 132 dt = wait_event_timeout(device->misc_wait, 133 *done || test_bit(FORCE_DETACH, &device->flags), dt); 134 if (dt == 0) { 135 drbd_err(device, "meta-data IO operation timed out\n"); 136 drbd_chk_io_error(device, 1, DRBD_FORCE_DETACH); 137 } 138 } 139 140 static int _drbd_md_sync_page_io(struct drbd_device *device, 141 struct drbd_backing_dev *bdev, 142 struct page *page, sector_t sector, 143 int rw, int size) 144 { 145 struct bio *bio; 146 int err; 147 148 device->md_io.done = 0; 149 device->md_io.error = -ENODEV; 150 151 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags)) 152 rw |= REQ_FUA | REQ_FLUSH; 153 rw |= REQ_SYNC; 154 155 bio = bio_alloc_drbd(GFP_NOIO); 156 bio->bi_bdev = bdev->md_bdev; 157 bio->bi_iter.bi_sector = sector; 158 err = -EIO; 159 if (bio_add_page(bio, page, size, 0) != size) 160 goto out; 161 bio->bi_private = &device->md_io; 162 bio->bi_end_io = drbd_md_io_complete; 163 bio->bi_rw = rw; 164 165 if (!(rw & WRITE) && device->state.disk == D_DISKLESS && device->ldev == NULL) 166 /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */ 167 ; 168 else if (!get_ldev_if_state(device, D_ATTACHING)) { 169 /* Corresponding put_ldev in drbd_md_io_complete() */ 170 drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); 171 err = -ENODEV; 172 goto out; 173 } 174 175 bio_get(bio); /* one bio_put() is in the completion handler */ 176 atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ 177 if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) 178 bio_endio(bio, -EIO); 179 else 180 submit_bio(rw, bio); 181 wait_until_done_or_force_detached(device, bdev, &device->md_io.done); 182 if (bio_flagged(bio, BIO_UPTODATE)) 183 err = device->md_io.error; 184 185 out: 186 bio_put(bio); 187 return err; 188 } 189 190 int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, 191 sector_t sector, int rw) 192 { 193 int err; 194 struct page *iop = device->md_io_page; 195 196 D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1); 197 198 BUG_ON(!bdev->md_bdev); 199 200 dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", 201 current->comm, current->pid, __func__, 202 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", 203 (void*)_RET_IP_ ); 204 205 if (sector < drbd_md_first_sector(bdev) || 206 sector + 7 > drbd_md_last_sector(bdev)) 207 drbd_alert(device, "%s [%d]:%s(,%llus,%s) out of range md access!\n", 208 current->comm, current->pid, __func__, 209 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); 210 211 /* we do all our meta data IO in aligned 4k blocks. */ 212 err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096); 213 if (err) { 214 drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", 215 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); 216 } 217 return err; 218 } 219 220 static struct bm_extent *find_active_resync_extent(struct drbd_device *device, unsigned int enr) 221 { 222 struct lc_element *tmp; 223 tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); 224 if (unlikely(tmp != NULL)) { 225 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 226 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) 227 return bm_ext; 228 } 229 return NULL; 230 } 231 232 static struct lc_element *_al_get(struct drbd_device *device, unsigned int enr, bool nonblock) 233 { 234 struct lc_element *al_ext; 235 struct bm_extent *bm_ext; 236 int wake; 237 238 spin_lock_irq(&device->al_lock); 239 bm_ext = find_active_resync_extent(device, enr); 240 if (bm_ext) { 241 wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); 242 spin_unlock_irq(&device->al_lock); 243 if (wake) 244 wake_up(&device->al_wait); 245 return NULL; 246 } 247 if (nonblock) 248 al_ext = lc_try_get(device->act_log, enr); 249 else 250 al_ext = lc_get(device->act_log, enr); 251 spin_unlock_irq(&device->al_lock); 252 return al_ext; 253 } 254 255 bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i) 256 { 257 /* for bios crossing activity log extent boundaries, 258 * we may need to activate two extents in one go */ 259 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 260 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 261 262 D_ASSERT(device, (unsigned)(last - first) <= 1); 263 D_ASSERT(device, atomic_read(&device->local_cnt) > 0); 264 265 /* FIXME figure out a fast path for bios crossing AL extent boundaries */ 266 if (first != last) 267 return false; 268 269 return _al_get(device, first, true); 270 } 271 272 bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i) 273 { 274 /* for bios crossing activity log extent boundaries, 275 * we may need to activate two extents in one go */ 276 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 277 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 278 unsigned enr; 279 bool need_transaction = false; 280 281 D_ASSERT(device, first <= last); 282 D_ASSERT(device, atomic_read(&device->local_cnt) > 0); 283 284 for (enr = first; enr <= last; enr++) { 285 struct lc_element *al_ext; 286 wait_event(device->al_wait, 287 (al_ext = _al_get(device, enr, false)) != NULL); 288 if (al_ext->lc_number != enr) 289 need_transaction = true; 290 } 291 return need_transaction; 292 } 293 294 static int al_write_transaction(struct drbd_device *device, bool delegate); 295 296 /* When called through generic_make_request(), we must delegate 297 * activity log I/O to the worker thread: a further request 298 * submitted via generic_make_request() within the same task 299 * would be queued on current->bio_list, and would only start 300 * after this function returns (see generic_make_request()). 301 * 302 * However, if we *are* the worker, we must not delegate to ourselves. 303 */ 304 305 /* 306 * @delegate: delegate activity log I/O to the worker thread 307 */ 308 void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate) 309 { 310 bool locked = false; 311 312 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); 313 314 /* Serialize multiple transactions. 315 * This uses test_and_set_bit, memory barrier is implicit. 316 */ 317 wait_event(device->al_wait, 318 device->act_log->pending_changes == 0 || 319 (locked = lc_try_lock_for_transaction(device->act_log))); 320 321 if (locked) { 322 /* Double check: it may have been committed by someone else, 323 * while we have been waiting for the lock. */ 324 if (device->act_log->pending_changes) { 325 bool write_al_updates; 326 327 rcu_read_lock(); 328 write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; 329 rcu_read_unlock(); 330 331 if (write_al_updates) 332 al_write_transaction(device, delegate); 333 spin_lock_irq(&device->al_lock); 334 /* FIXME 335 if (err) 336 we need an "lc_cancel" here; 337 */ 338 lc_committed(device->act_log); 339 spin_unlock_irq(&device->al_lock); 340 } 341 lc_unlock(device->act_log); 342 wake_up(&device->al_wait); 343 } 344 } 345 346 /* 347 * @delegate: delegate activity log I/O to the worker thread 348 */ 349 void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate) 350 { 351 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); 352 353 if (drbd_al_begin_io_prepare(device, i)) 354 drbd_al_begin_io_commit(device, delegate); 355 } 356 357 int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) 358 { 359 struct lru_cache *al = device->act_log; 360 /* for bios crossing activity log extent boundaries, 361 * we may need to activate two extents in one go */ 362 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 363 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 364 unsigned nr_al_extents; 365 unsigned available_update_slots; 366 unsigned enr; 367 368 D_ASSERT(device, first <= last); 369 370 nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */ 371 available_update_slots = min(al->nr_elements - al->used, 372 al->max_pending_changes - al->pending_changes); 373 374 /* We want all necessary updates for a given request within the same transaction 375 * We could first check how many updates are *actually* needed, 376 * and use that instead of the worst-case nr_al_extents */ 377 if (available_update_slots < nr_al_extents) 378 return -EWOULDBLOCK; 379 380 /* Is resync active in this area? */ 381 for (enr = first; enr <= last; enr++) { 382 struct lc_element *tmp; 383 tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); 384 if (unlikely(tmp != NULL)) { 385 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 386 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { 387 if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags)) 388 return -EBUSY; 389 return -EWOULDBLOCK; 390 } 391 } 392 } 393 394 /* Checkout the refcounts. 395 * Given that we checked for available elements and update slots above, 396 * this has to be successful. */ 397 for (enr = first; enr <= last; enr++) { 398 struct lc_element *al_ext; 399 al_ext = lc_get_cumulative(device->act_log, enr); 400 if (!al_ext) 401 drbd_info(device, "LOGIC BUG for enr=%u\n", enr); 402 } 403 return 0; 404 } 405 406 void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i) 407 { 408 /* for bios crossing activity log extent boundaries, 409 * we may need to activate two extents in one go */ 410 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 411 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 412 unsigned enr; 413 struct lc_element *extent; 414 unsigned long flags; 415 416 D_ASSERT(device, first <= last); 417 spin_lock_irqsave(&device->al_lock, flags); 418 419 for (enr = first; enr <= last; enr++) { 420 extent = lc_find(device->act_log, enr); 421 if (!extent) { 422 drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr); 423 continue; 424 } 425 lc_put(device->act_log, extent); 426 } 427 spin_unlock_irqrestore(&device->al_lock, flags); 428 wake_up(&device->al_wait); 429 } 430 431 #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) 432 /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT 433 * are still coupled, or assume too much about their relation. 434 * Code below will not work if this is violated. 435 * Will be cleaned up with some followup patch. 436 */ 437 # error FIXME 438 #endif 439 440 static unsigned int al_extent_to_bm_page(unsigned int al_enr) 441 { 442 return al_enr >> 443 /* bit to page */ 444 ((PAGE_SHIFT + 3) - 445 /* al extent number to bit */ 446 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); 447 } 448 449 static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) 450 { 451 const unsigned int stripes = device->ldev->md.al_stripes; 452 const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k; 453 454 /* transaction number, modulo on-disk ring buffer wrap around */ 455 unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k); 456 457 /* ... to aligned 4k on disk block */ 458 t = ((t % stripes) * stripe_size_4kB) + t/stripes; 459 460 /* ... to 512 byte sector in activity log */ 461 t *= 8; 462 463 /* ... plus offset to the on disk position */ 464 return device->ldev->md.md_offset + device->ldev->md.al_offset + t; 465 } 466 467 static int 468 _al_write_transaction(struct drbd_device *device) 469 { 470 struct al_transaction_on_disk *buffer; 471 struct lc_element *e; 472 sector_t sector; 473 int i, mx; 474 unsigned extent_nr; 475 unsigned crc = 0; 476 int err = 0; 477 478 if (!get_ldev(device)) { 479 drbd_err(device, "disk is %s, cannot start al transaction\n", 480 drbd_disk_str(device->state.disk)); 481 return -EIO; 482 } 483 484 /* The bitmap write may have failed, causing a state change. */ 485 if (device->state.disk < D_INCONSISTENT) { 486 drbd_err(device, 487 "disk is %s, cannot write al transaction\n", 488 drbd_disk_str(device->state.disk)); 489 put_ldev(device); 490 return -EIO; 491 } 492 493 buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */ 494 if (!buffer) { 495 drbd_err(device, "disk failed while waiting for md_io buffer\n"); 496 put_ldev(device); 497 return -ENODEV; 498 } 499 500 memset(buffer, 0, sizeof(*buffer)); 501 buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); 502 buffer->tr_number = cpu_to_be32(device->al_tr_number); 503 504 i = 0; 505 506 /* Even though no one can start to change this list 507 * once we set the LC_LOCKED -- from drbd_al_begin_io(), 508 * lc_try_lock_for_transaction() --, someone may still 509 * be in the process of changing it. */ 510 spin_lock_irq(&device->al_lock); 511 list_for_each_entry(e, &device->act_log->to_be_changed, list) { 512 if (i == AL_UPDATES_PER_TRANSACTION) { 513 i++; 514 break; 515 } 516 buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); 517 buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); 518 if (e->lc_number != LC_FREE) 519 drbd_bm_mark_for_writeout(device, 520 al_extent_to_bm_page(e->lc_number)); 521 i++; 522 } 523 spin_unlock_irq(&device->al_lock); 524 BUG_ON(i > AL_UPDATES_PER_TRANSACTION); 525 526 buffer->n_updates = cpu_to_be16(i); 527 for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { 528 buffer->update_slot_nr[i] = cpu_to_be16(-1); 529 buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); 530 } 531 532 buffer->context_size = cpu_to_be16(device->act_log->nr_elements); 533 buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle); 534 535 mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, 536 device->act_log->nr_elements - device->al_tr_cycle); 537 for (i = 0; i < mx; i++) { 538 unsigned idx = device->al_tr_cycle + i; 539 extent_nr = lc_element_by_index(device->act_log, idx)->lc_number; 540 buffer->context[i] = cpu_to_be32(extent_nr); 541 } 542 for (; i < AL_CONTEXT_PER_TRANSACTION; i++) 543 buffer->context[i] = cpu_to_be32(LC_FREE); 544 545 device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; 546 if (device->al_tr_cycle >= device->act_log->nr_elements) 547 device->al_tr_cycle = 0; 548 549 sector = al_tr_number_to_on_disk_sector(device); 550 551 crc = crc32c(0, buffer, 4096); 552 buffer->crc32c = cpu_to_be32(crc); 553 554 if (drbd_bm_write_hinted(device)) 555 err = -EIO; 556 else { 557 bool write_al_updates; 558 rcu_read_lock(); 559 write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; 560 rcu_read_unlock(); 561 if (write_al_updates) { 562 if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) { 563 err = -EIO; 564 drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); 565 } else { 566 device->al_tr_number++; 567 device->al_writ_cnt++; 568 } 569 } 570 } 571 572 drbd_md_put_buffer(device); 573 put_ldev(device); 574 575 return err; 576 } 577 578 579 static int w_al_write_transaction(struct drbd_work *w, int unused) 580 { 581 struct update_al_work *aw = container_of(w, struct update_al_work, w); 582 struct drbd_device *device = aw->device; 583 int err; 584 585 err = _al_write_transaction(device); 586 aw->err = err; 587 complete(&aw->event); 588 589 return err != -EIO ? err : 0; 590 } 591 592 /* Calls from worker context (see w_restart_disk_io()) need to write the 593 transaction directly. Others came through generic_make_request(), 594 those need to delegate it to the worker. */ 595 static int al_write_transaction(struct drbd_device *device, bool delegate) 596 { 597 if (delegate) { 598 struct update_al_work al_work; 599 init_completion(&al_work.event); 600 al_work.w.cb = w_al_write_transaction; 601 al_work.device = device; 602 drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, 603 &al_work.w); 604 wait_for_completion(&al_work.event); 605 return al_work.err; 606 } else 607 return _al_write_transaction(device); 608 } 609 610 static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) 611 { 612 int rv; 613 614 spin_lock_irq(&device->al_lock); 615 rv = (al_ext->refcnt == 0); 616 if (likely(rv)) 617 lc_del(device->act_log, al_ext); 618 spin_unlock_irq(&device->al_lock); 619 620 return rv; 621 } 622 623 /** 624 * drbd_al_shrink() - Removes all active extents form the activity log 625 * @device: DRBD device. 626 * 627 * Removes all active extents form the activity log, waiting until 628 * the reference count of each entry dropped to 0 first, of course. 629 * 630 * You need to lock device->act_log with lc_try_lock() / lc_unlock() 631 */ 632 void drbd_al_shrink(struct drbd_device *device) 633 { 634 struct lc_element *al_ext; 635 int i; 636 637 D_ASSERT(device, test_bit(__LC_LOCKED, &device->act_log->flags)); 638 639 for (i = 0; i < device->act_log->nr_elements; i++) { 640 al_ext = lc_element_by_index(device->act_log, i); 641 if (al_ext->lc_number == LC_FREE) 642 continue; 643 wait_event(device->al_wait, _try_lc_del(device, al_ext)); 644 } 645 646 wake_up(&device->al_wait); 647 } 648 649 int drbd_initialize_al(struct drbd_device *device, void *buffer) 650 { 651 struct al_transaction_on_disk *al = buffer; 652 struct drbd_md *md = &device->ldev->md; 653 sector_t al_base = md->md_offset + md->al_offset; 654 int al_size_4k = md->al_stripes * md->al_stripe_size_4k; 655 int i; 656 657 memset(al, 0, 4096); 658 al->magic = cpu_to_be32(DRBD_AL_MAGIC); 659 al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED); 660 al->crc32c = cpu_to_be32(crc32c(0, al, 4096)); 661 662 for (i = 0; i < al_size_4k; i++) { 663 int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE); 664 if (err) 665 return err; 666 } 667 return 0; 668 } 669 670 /* ATTENTION. The AL's extents are 4MB each, while the extents in the 671 * resync LRU-cache are 16MB each. 672 * The caller of this function has to hold an get_ldev() reference. 673 * 674 * TODO will be obsoleted once we have a caching lru of the on disk bitmap 675 */ 676 static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector, 677 int count, int success) 678 { 679 struct lc_element *e; 680 unsigned int enr; 681 682 D_ASSERT(device, atomic_read(&device->local_cnt)); 683 684 /* I simply assume that a sector/size pair never crosses 685 * a 16 MB extent border. (Currently this is true...) */ 686 enr = BM_SECT_TO_EXT(sector); 687 688 e = lc_get(device->resync, enr); 689 if (e) { 690 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); 691 if (ext->lce.lc_number == enr) { 692 if (success) 693 ext->rs_left -= count; 694 else 695 ext->rs_failed += count; 696 if (ext->rs_left < ext->rs_failed) { 697 drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d " 698 "rs_failed=%d count=%d cstate=%s\n", 699 (unsigned long long)sector, 700 ext->lce.lc_number, ext->rs_left, 701 ext->rs_failed, count, 702 drbd_conn_str(device->state.conn)); 703 704 /* We don't expect to be able to clear more bits 705 * than have been set when we originally counted 706 * the set bits to cache that value in ext->rs_left. 707 * Whatever the reason (disconnect during resync, 708 * delayed local completion of an application write), 709 * try to fix it up by recounting here. */ 710 ext->rs_left = drbd_bm_e_weight(device, enr); 711 } 712 } else { 713 /* Normally this element should be in the cache, 714 * since drbd_rs_begin_io() pulled it already in. 715 * 716 * But maybe an application write finished, and we set 717 * something outside the resync lru_cache in sync. 718 */ 719 int rs_left = drbd_bm_e_weight(device, enr); 720 if (ext->flags != 0) { 721 drbd_warn(device, "changing resync lce: %d[%u;%02lx]" 722 " -> %d[%u;00]\n", 723 ext->lce.lc_number, ext->rs_left, 724 ext->flags, enr, rs_left); 725 ext->flags = 0; 726 } 727 if (ext->rs_failed) { 728 drbd_warn(device, "Kicking resync_lru element enr=%u " 729 "out with rs_failed=%d\n", 730 ext->lce.lc_number, ext->rs_failed); 731 } 732 ext->rs_left = rs_left; 733 ext->rs_failed = success ? 0 : count; 734 /* we don't keep a persistent log of the resync lru, 735 * we can commit any change right away. */ 736 lc_committed(device->resync); 737 } 738 lc_put(device->resync, &ext->lce); 739 /* no race, we are within the al_lock! */ 740 741 if (ext->rs_left == ext->rs_failed) { 742 ext->rs_failed = 0; 743 wake_up(&first_peer_device(device)->connection->sender_work.q_wait); 744 } 745 } else { 746 drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", 747 device->resync_locked, 748 device->resync->nr_elements, 749 device->resync->flags); 750 } 751 } 752 753 void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) 754 { 755 unsigned long now = jiffies; 756 unsigned long last = device->rs_mark_time[device->rs_last_mark]; 757 int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS; 758 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { 759 if (device->rs_mark_left[device->rs_last_mark] != still_to_go && 760 device->state.conn != C_PAUSED_SYNC_T && 761 device->state.conn != C_PAUSED_SYNC_S) { 762 device->rs_mark_time[next] = now; 763 device->rs_mark_left[next] = still_to_go; 764 device->rs_last_mark = next; 765 } 766 } 767 } 768 769 /* clear the bit corresponding to the piece of storage in question: 770 * size byte of data starting from sector. Only clear a bits of the affected 771 * one ore more _aligned_ BM_BLOCK_SIZE blocks. 772 * 773 * called by worker on C_SYNC_TARGET and receiver on SyncSource. 774 * 775 */ 776 void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size, 777 const char *file, const unsigned int line) 778 { 779 /* Is called from worker and receiver context _only_ */ 780 unsigned long sbnr, ebnr, lbnr; 781 unsigned long count = 0; 782 sector_t esector, nr_sectors; 783 int wake_up = 0; 784 unsigned long flags; 785 786 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 787 drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", 788 (unsigned long long)sector, size); 789 return; 790 } 791 792 if (!get_ldev(device)) 793 return; /* no disk, no metadata, no bitmap to clear bits in */ 794 795 nr_sectors = drbd_get_capacity(device->this_bdev); 796 esector = sector + (size >> 9) - 1; 797 798 if (!expect(sector < nr_sectors)) 799 goto out; 800 if (!expect(esector < nr_sectors)) 801 esector = nr_sectors - 1; 802 803 lbnr = BM_SECT_TO_BIT(nr_sectors-1); 804 805 /* we clear it (in sync). 806 * round up start sector, round down end sector. we make sure we only 807 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */ 808 if (unlikely(esector < BM_SECT_PER_BIT-1)) 809 goto out; 810 if (unlikely(esector == (nr_sectors-1))) 811 ebnr = lbnr; 812 else 813 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 814 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 815 816 if (sbnr > ebnr) 817 goto out; 818 819 /* 820 * ok, (capacity & 7) != 0 sometimes, but who cares... 821 * we count rs_{total,left} in bits, not sectors. 822 */ 823 count = drbd_bm_clear_bits(device, sbnr, ebnr); 824 if (count) { 825 drbd_advance_rs_marks(device, drbd_bm_total_weight(device)); 826 spin_lock_irqsave(&device->al_lock, flags); 827 drbd_try_clear_on_disk_bm(device, sector, count, true); 828 spin_unlock_irqrestore(&device->al_lock, flags); 829 830 /* just wake_up unconditional now, various lc_chaged(), 831 * lc_put() in drbd_try_clear_on_disk_bm(). */ 832 wake_up = 1; 833 } 834 out: 835 put_ldev(device); 836 if (wake_up) 837 wake_up(&device->al_wait); 838 } 839 840 /* 841 * this is intended to set one request worth of data out of sync. 842 * affects at least 1 bit, 843 * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. 844 * 845 * called by tl_clear and drbd_send_dblock (==drbd_make_request). 846 * so this can be _any_ process. 847 */ 848 int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size, 849 const char *file, const unsigned int line) 850 { 851 unsigned long sbnr, ebnr, flags; 852 sector_t esector, nr_sectors; 853 unsigned int enr, count = 0; 854 struct lc_element *e; 855 856 /* this should be an empty REQ_FLUSH */ 857 if (size == 0) 858 return 0; 859 860 if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 861 drbd_err(device, "sector: %llus, size: %d\n", 862 (unsigned long long)sector, size); 863 return 0; 864 } 865 866 if (!get_ldev(device)) 867 return 0; /* no disk, no metadata, no bitmap to set bits in */ 868 869 nr_sectors = drbd_get_capacity(device->this_bdev); 870 esector = sector + (size >> 9) - 1; 871 872 if (!expect(sector < nr_sectors)) 873 goto out; 874 if (!expect(esector < nr_sectors)) 875 esector = nr_sectors - 1; 876 877 /* we set it out of sync, 878 * we do not need to round anything here */ 879 sbnr = BM_SECT_TO_BIT(sector); 880 ebnr = BM_SECT_TO_BIT(esector); 881 882 /* ok, (capacity & 7) != 0 sometimes, but who cares... 883 * we count rs_{total,left} in bits, not sectors. */ 884 spin_lock_irqsave(&device->al_lock, flags); 885 count = drbd_bm_set_bits(device, sbnr, ebnr); 886 887 enr = BM_SECT_TO_EXT(sector); 888 e = lc_find(device->resync, enr); 889 if (e) 890 lc_entry(e, struct bm_extent, lce)->rs_left += count; 891 spin_unlock_irqrestore(&device->al_lock, flags); 892 893 out: 894 put_ldev(device); 895 896 return count; 897 } 898 899 static 900 struct bm_extent *_bme_get(struct drbd_device *device, unsigned int enr) 901 { 902 struct lc_element *e; 903 struct bm_extent *bm_ext; 904 int wakeup = 0; 905 unsigned long rs_flags; 906 907 spin_lock_irq(&device->al_lock); 908 if (device->resync_locked > device->resync->nr_elements/2) { 909 spin_unlock_irq(&device->al_lock); 910 return NULL; 911 } 912 e = lc_get(device->resync, enr); 913 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 914 if (bm_ext) { 915 if (bm_ext->lce.lc_number != enr) { 916 bm_ext->rs_left = drbd_bm_e_weight(device, enr); 917 bm_ext->rs_failed = 0; 918 lc_committed(device->resync); 919 wakeup = 1; 920 } 921 if (bm_ext->lce.refcnt == 1) 922 device->resync_locked++; 923 set_bit(BME_NO_WRITES, &bm_ext->flags); 924 } 925 rs_flags = device->resync->flags; 926 spin_unlock_irq(&device->al_lock); 927 if (wakeup) 928 wake_up(&device->al_wait); 929 930 if (!bm_ext) { 931 if (rs_flags & LC_STARVING) 932 drbd_warn(device, "Have to wait for element" 933 " (resync LRU too small?)\n"); 934 BUG_ON(rs_flags & LC_LOCKED); 935 } 936 937 return bm_ext; 938 } 939 940 static int _is_in_al(struct drbd_device *device, unsigned int enr) 941 { 942 int rv; 943 944 spin_lock_irq(&device->al_lock); 945 rv = lc_is_used(device->act_log, enr); 946 spin_unlock_irq(&device->al_lock); 947 948 return rv; 949 } 950 951 /** 952 * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED 953 * @device: DRBD device. 954 * @sector: The sector number. 955 * 956 * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted. 957 */ 958 int drbd_rs_begin_io(struct drbd_device *device, sector_t sector) 959 { 960 unsigned int enr = BM_SECT_TO_EXT(sector); 961 struct bm_extent *bm_ext; 962 int i, sig; 963 bool sa; 964 965 retry: 966 sig = wait_event_interruptible(device->al_wait, 967 (bm_ext = _bme_get(device, enr))); 968 if (sig) 969 return -EINTR; 970 971 if (test_bit(BME_LOCKED, &bm_ext->flags)) 972 return 0; 973 974 /* step aside only while we are above c-min-rate; unless disabled. */ 975 sa = drbd_rs_c_min_rate_throttle(device); 976 977 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 978 sig = wait_event_interruptible(device->al_wait, 979 !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || 980 (sa && test_bit(BME_PRIORITY, &bm_ext->flags))); 981 982 if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) { 983 spin_lock_irq(&device->al_lock); 984 if (lc_put(device->resync, &bm_ext->lce) == 0) { 985 bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ 986 device->resync_locked--; 987 wake_up(&device->al_wait); 988 } 989 spin_unlock_irq(&device->al_lock); 990 if (sig) 991 return -EINTR; 992 if (schedule_timeout_interruptible(HZ/10)) 993 return -EINTR; 994 goto retry; 995 } 996 } 997 set_bit(BME_LOCKED, &bm_ext->flags); 998 return 0; 999 } 1000 1001 /** 1002 * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep 1003 * @device: DRBD device. 1004 * @sector: The sector number. 1005 * 1006 * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then 1007 * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN 1008 * if there is still application IO going on in this area. 1009 */ 1010 int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) 1011 { 1012 unsigned int enr = BM_SECT_TO_EXT(sector); 1013 const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; 1014 struct lc_element *e; 1015 struct bm_extent *bm_ext; 1016 int i; 1017 1018 spin_lock_irq(&device->al_lock); 1019 if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { 1020 /* in case you have very heavy scattered io, it may 1021 * stall the syncer undefined if we give up the ref count 1022 * when we try again and requeue. 1023 * 1024 * if we don't give up the refcount, but the next time 1025 * we are scheduled this extent has been "synced" by new 1026 * application writes, we'd miss the lc_put on the 1027 * extent we keep the refcount on. 1028 * so we remembered which extent we had to try again, and 1029 * if the next requested one is something else, we do 1030 * the lc_put here... 1031 * we also have to wake_up 1032 */ 1033 e = lc_find(device->resync, device->resync_wenr); 1034 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1035 if (bm_ext) { 1036 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1037 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 1038 clear_bit(BME_NO_WRITES, &bm_ext->flags); 1039 device->resync_wenr = LC_FREE; 1040 if (lc_put(device->resync, &bm_ext->lce) == 0) 1041 device->resync_locked--; 1042 wake_up(&device->al_wait); 1043 } else { 1044 drbd_alert(device, "LOGIC BUG\n"); 1045 } 1046 } 1047 /* TRY. */ 1048 e = lc_try_get(device->resync, enr); 1049 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1050 if (bm_ext) { 1051 if (test_bit(BME_LOCKED, &bm_ext->flags)) 1052 goto proceed; 1053 if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) { 1054 device->resync_locked++; 1055 } else { 1056 /* we did set the BME_NO_WRITES, 1057 * but then could not set BME_LOCKED, 1058 * so we tried again. 1059 * drop the extra reference. */ 1060 bm_ext->lce.refcnt--; 1061 D_ASSERT(device, bm_ext->lce.refcnt > 0); 1062 } 1063 goto check_al; 1064 } else { 1065 /* do we rather want to try later? */ 1066 if (device->resync_locked > device->resync->nr_elements-3) 1067 goto try_again; 1068 /* Do or do not. There is no try. -- Yoda */ 1069 e = lc_get(device->resync, enr); 1070 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1071 if (!bm_ext) { 1072 const unsigned long rs_flags = device->resync->flags; 1073 if (rs_flags & LC_STARVING) 1074 drbd_warn(device, "Have to wait for element" 1075 " (resync LRU too small?)\n"); 1076 BUG_ON(rs_flags & LC_LOCKED); 1077 goto try_again; 1078 } 1079 if (bm_ext->lce.lc_number != enr) { 1080 bm_ext->rs_left = drbd_bm_e_weight(device, enr); 1081 bm_ext->rs_failed = 0; 1082 lc_committed(device->resync); 1083 wake_up(&device->al_wait); 1084 D_ASSERT(device, test_bit(BME_LOCKED, &bm_ext->flags) == 0); 1085 } 1086 set_bit(BME_NO_WRITES, &bm_ext->flags); 1087 D_ASSERT(device, bm_ext->lce.refcnt == 1); 1088 device->resync_locked++; 1089 goto check_al; 1090 } 1091 check_al: 1092 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1093 if (lc_is_used(device->act_log, al_enr+i)) 1094 goto try_again; 1095 } 1096 set_bit(BME_LOCKED, &bm_ext->flags); 1097 proceed: 1098 device->resync_wenr = LC_FREE; 1099 spin_unlock_irq(&device->al_lock); 1100 return 0; 1101 1102 try_again: 1103 if (bm_ext) 1104 device->resync_wenr = enr; 1105 spin_unlock_irq(&device->al_lock); 1106 return -EAGAIN; 1107 } 1108 1109 void drbd_rs_complete_io(struct drbd_device *device, sector_t sector) 1110 { 1111 unsigned int enr = BM_SECT_TO_EXT(sector); 1112 struct lc_element *e; 1113 struct bm_extent *bm_ext; 1114 unsigned long flags; 1115 1116 spin_lock_irqsave(&device->al_lock, flags); 1117 e = lc_find(device->resync, enr); 1118 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1119 if (!bm_ext) { 1120 spin_unlock_irqrestore(&device->al_lock, flags); 1121 if (__ratelimit(&drbd_ratelimit_state)) 1122 drbd_err(device, "drbd_rs_complete_io() called, but extent not found\n"); 1123 return; 1124 } 1125 1126 if (bm_ext->lce.refcnt == 0) { 1127 spin_unlock_irqrestore(&device->al_lock, flags); 1128 drbd_err(device, "drbd_rs_complete_io(,%llu [=%u]) called, " 1129 "but refcnt is 0!?\n", 1130 (unsigned long long)sector, enr); 1131 return; 1132 } 1133 1134 if (lc_put(device->resync, &bm_ext->lce) == 0) { 1135 bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */ 1136 device->resync_locked--; 1137 wake_up(&device->al_wait); 1138 } 1139 1140 spin_unlock_irqrestore(&device->al_lock, flags); 1141 } 1142 1143 /** 1144 * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED) 1145 * @device: DRBD device. 1146 */ 1147 void drbd_rs_cancel_all(struct drbd_device *device) 1148 { 1149 spin_lock_irq(&device->al_lock); 1150 1151 if (get_ldev_if_state(device, D_FAILED)) { /* Makes sure ->resync is there. */ 1152 lc_reset(device->resync); 1153 put_ldev(device); 1154 } 1155 device->resync_locked = 0; 1156 device->resync_wenr = LC_FREE; 1157 spin_unlock_irq(&device->al_lock); 1158 wake_up(&device->al_wait); 1159 } 1160 1161 /** 1162 * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU 1163 * @device: DRBD device. 1164 * 1165 * Returns 0 upon success, -EAGAIN if at least one reference count was 1166 * not zero. 1167 */ 1168 int drbd_rs_del_all(struct drbd_device *device) 1169 { 1170 struct lc_element *e; 1171 struct bm_extent *bm_ext; 1172 int i; 1173 1174 spin_lock_irq(&device->al_lock); 1175 1176 if (get_ldev_if_state(device, D_FAILED)) { 1177 /* ok, ->resync is there. */ 1178 for (i = 0; i < device->resync->nr_elements; i++) { 1179 e = lc_element_by_index(device->resync, i); 1180 bm_ext = lc_entry(e, struct bm_extent, lce); 1181 if (bm_ext->lce.lc_number == LC_FREE) 1182 continue; 1183 if (bm_ext->lce.lc_number == device->resync_wenr) { 1184 drbd_info(device, "dropping %u in drbd_rs_del_all, apparently" 1185 " got 'synced' by application io\n", 1186 device->resync_wenr); 1187 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1188 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 1189 clear_bit(BME_NO_WRITES, &bm_ext->flags); 1190 device->resync_wenr = LC_FREE; 1191 lc_put(device->resync, &bm_ext->lce); 1192 } 1193 if (bm_ext->lce.refcnt != 0) { 1194 drbd_info(device, "Retrying drbd_rs_del_all() later. " 1195 "refcnt=%d\n", bm_ext->lce.refcnt); 1196 put_ldev(device); 1197 spin_unlock_irq(&device->al_lock); 1198 return -EAGAIN; 1199 } 1200 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1201 D_ASSERT(device, !test_bit(BME_NO_WRITES, &bm_ext->flags)); 1202 lc_del(device->resync, &bm_ext->lce); 1203 } 1204 D_ASSERT(device, device->resync->used == 0); 1205 put_ldev(device); 1206 } 1207 spin_unlock_irq(&device->al_lock); 1208 wake_up(&device->al_wait); 1209 1210 return 0; 1211 } 1212 1213 /** 1214 * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks 1215 * @device: DRBD device. 1216 * @sector: The sector number. 1217 * @size: Size of failed IO operation, in byte. 1218 */ 1219 void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size) 1220 { 1221 /* Is called from worker and receiver context _only_ */ 1222 unsigned long sbnr, ebnr, lbnr; 1223 unsigned long count; 1224 sector_t esector, nr_sectors; 1225 int wake_up = 0; 1226 1227 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 1228 drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", 1229 (unsigned long long)sector, size); 1230 return; 1231 } 1232 nr_sectors = drbd_get_capacity(device->this_bdev); 1233 esector = sector + (size >> 9) - 1; 1234 1235 if (!expect(sector < nr_sectors)) 1236 return; 1237 if (!expect(esector < nr_sectors)) 1238 esector = nr_sectors - 1; 1239 1240 lbnr = BM_SECT_TO_BIT(nr_sectors-1); 1241 1242 /* 1243 * round up start sector, round down end sector. we make sure we only 1244 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */ 1245 if (unlikely(esector < BM_SECT_PER_BIT-1)) 1246 return; 1247 if (unlikely(esector == (nr_sectors-1))) 1248 ebnr = lbnr; 1249 else 1250 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 1251 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 1252 1253 if (sbnr > ebnr) 1254 return; 1255 1256 /* 1257 * ok, (capacity & 7) != 0 sometimes, but who cares... 1258 * we count rs_{total,left} in bits, not sectors. 1259 */ 1260 spin_lock_irq(&device->al_lock); 1261 count = drbd_bm_count_bits(device, sbnr, ebnr); 1262 if (count) { 1263 device->rs_failed += count; 1264 1265 if (get_ldev(device)) { 1266 drbd_try_clear_on_disk_bm(device, sector, count, false); 1267 put_ldev(device); 1268 } 1269 1270 /* just wake_up unconditional now, various lc_chaged(), 1271 * lc_put() in drbd_try_clear_on_disk_bm(). */ 1272 wake_up = 1; 1273 } 1274 spin_unlock_irq(&device->al_lock); 1275 if (wake_up) 1276 wake_up(&device->al_wait); 1277 } 1278