1 /* 2 drbd_actlog.c 3 4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5 6 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. 7 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9 10 drbd is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2, or (at your option) 13 any later version. 14 15 drbd is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with drbd; see the file COPYING. If not, write to 22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23 24 */ 25 26 #include <linux/slab.h> 27 #include <linux/crc32c.h> 28 #include <linux/drbd.h> 29 #include <linux/drbd_limits.h> 30 #include <linux/dynamic_debug.h> 31 #include "drbd_int.h" 32 33 34 enum al_transaction_types { 35 AL_TR_UPDATE = 0, 36 AL_TR_INITIALIZED = 0xffff 37 }; 38 /* all fields on disc in big endian */ 39 struct __packed al_transaction_on_disk { 40 /* don't we all like magic */ 41 __be32 magic; 42 43 /* to identify the most recent transaction block 44 * in the on disk ring buffer */ 45 __be32 tr_number; 46 47 /* checksum on the full 4k block, with this field set to 0. */ 48 __be32 crc32c; 49 50 /* type of transaction, special transaction types like: 51 * purge-all, set-all-idle, set-all-active, ... to-be-defined 52 * see also enum al_transaction_types */ 53 __be16 transaction_type; 54 55 /* we currently allow only a few thousand extents, 56 * so 16bit will be enough for the slot number. */ 57 58 /* how many updates in this transaction */ 59 __be16 n_updates; 60 61 /* maximum slot number, "al-extents" in drbd.conf speak. 62 * Having this in each transaction should make reconfiguration 63 * of that parameter easier. */ 64 __be16 context_size; 65 66 /* slot number the context starts with */ 67 __be16 context_start_slot_nr; 68 69 /* Some reserved bytes. Expected usage is a 64bit counter of 70 * sectors-written since device creation, and other data generation tag 71 * supporting usage */ 72 __be32 __reserved[4]; 73 74 /* --- 36 byte used --- */ 75 76 /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes 77 * in one transaction, then use the remaining byte in the 4k block for 78 * context information. "Flexible" number of updates per transaction 79 * does not help, as we have to account for the case when all update 80 * slots are used anyways, so it would only complicate code without 81 * additional benefit. 82 */ 83 __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION]; 84 85 /* but the extent number is 32bit, which at an extent size of 4 MiB 86 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */ 87 __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION]; 88 89 /* --- 420 bytes used (36 + 64*6) --- */ 90 91 /* 4096 - 420 = 3676 = 919 * 4 */ 92 __be32 context[AL_CONTEXT_PER_TRANSACTION]; 93 }; 94 95 struct update_odbm_work { 96 struct drbd_work w; 97 struct drbd_device *device; 98 unsigned int enr; 99 }; 100 101 struct update_al_work { 102 struct drbd_work w; 103 struct drbd_device *device; 104 struct completion event; 105 int err; 106 }; 107 108 109 void *drbd_md_get_buffer(struct drbd_device *device) 110 { 111 int r; 112 113 wait_event(device->misc_wait, 114 (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 || 115 device->state.disk <= D_FAILED); 116 117 return r ? NULL : page_address(device->md_io_page); 118 } 119 120 void drbd_md_put_buffer(struct drbd_device *device) 121 { 122 if (atomic_dec_and_test(&device->md_io_in_use)) 123 wake_up(&device->misc_wait); 124 } 125 126 void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev, 127 unsigned int *done) 128 { 129 long dt; 130 131 rcu_read_lock(); 132 dt = rcu_dereference(bdev->disk_conf)->disk_timeout; 133 rcu_read_unlock(); 134 dt = dt * HZ / 10; 135 if (dt == 0) 136 dt = MAX_SCHEDULE_TIMEOUT; 137 138 dt = wait_event_timeout(device->misc_wait, 139 *done || test_bit(FORCE_DETACH, &device->flags), dt); 140 if (dt == 0) { 141 drbd_err(device, "meta-data IO operation timed out\n"); 142 drbd_chk_io_error(device, 1, DRBD_FORCE_DETACH); 143 } 144 } 145 146 static int _drbd_md_sync_page_io(struct drbd_device *device, 147 struct drbd_backing_dev *bdev, 148 struct page *page, sector_t sector, 149 int rw, int size) 150 { 151 struct bio *bio; 152 int err; 153 154 device->md_io.done = 0; 155 device->md_io.error = -ENODEV; 156 157 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags)) 158 rw |= REQ_FUA | REQ_FLUSH; 159 rw |= REQ_SYNC; 160 161 bio = bio_alloc_drbd(GFP_NOIO); 162 bio->bi_bdev = bdev->md_bdev; 163 bio->bi_iter.bi_sector = sector; 164 err = -EIO; 165 if (bio_add_page(bio, page, size, 0) != size) 166 goto out; 167 bio->bi_private = &device->md_io; 168 bio->bi_end_io = drbd_md_io_complete; 169 bio->bi_rw = rw; 170 171 if (!(rw & WRITE) && device->state.disk == D_DISKLESS && device->ldev == NULL) 172 /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */ 173 ; 174 else if (!get_ldev_if_state(device, D_ATTACHING)) { 175 /* Corresponding put_ldev in drbd_md_io_complete() */ 176 drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); 177 err = -ENODEV; 178 goto out; 179 } 180 181 bio_get(bio); /* one bio_put() is in the completion handler */ 182 atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ 183 if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) 184 bio_endio(bio, -EIO); 185 else 186 submit_bio(rw, bio); 187 wait_until_done_or_force_detached(device, bdev, &device->md_io.done); 188 if (bio_flagged(bio, BIO_UPTODATE)) 189 err = device->md_io.error; 190 191 out: 192 bio_put(bio); 193 return err; 194 } 195 196 int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, 197 sector_t sector, int rw) 198 { 199 int err; 200 struct page *iop = device->md_io_page; 201 202 D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1); 203 204 BUG_ON(!bdev->md_bdev); 205 206 dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", 207 current->comm, current->pid, __func__, 208 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", 209 (void*)_RET_IP_ ); 210 211 if (sector < drbd_md_first_sector(bdev) || 212 sector + 7 > drbd_md_last_sector(bdev)) 213 drbd_alert(device, "%s [%d]:%s(,%llus,%s) out of range md access!\n", 214 current->comm, current->pid, __func__, 215 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); 216 217 /* we do all our meta data IO in aligned 4k blocks. */ 218 err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096); 219 if (err) { 220 drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", 221 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); 222 } 223 return err; 224 } 225 226 static struct bm_extent *find_active_resync_extent(struct drbd_device *device, unsigned int enr) 227 { 228 struct lc_element *tmp; 229 tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); 230 if (unlikely(tmp != NULL)) { 231 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 232 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) 233 return bm_ext; 234 } 235 return NULL; 236 } 237 238 static struct lc_element *_al_get(struct drbd_device *device, unsigned int enr, bool nonblock) 239 { 240 struct lc_element *al_ext; 241 struct bm_extent *bm_ext; 242 int wake; 243 244 spin_lock_irq(&device->al_lock); 245 bm_ext = find_active_resync_extent(device, enr); 246 if (bm_ext) { 247 wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); 248 spin_unlock_irq(&device->al_lock); 249 if (wake) 250 wake_up(&device->al_wait); 251 return NULL; 252 } 253 if (nonblock) 254 al_ext = lc_try_get(device->act_log, enr); 255 else 256 al_ext = lc_get(device->act_log, enr); 257 spin_unlock_irq(&device->al_lock); 258 return al_ext; 259 } 260 261 bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i) 262 { 263 /* for bios crossing activity log extent boundaries, 264 * we may need to activate two extents in one go */ 265 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 266 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 267 268 D_ASSERT(device, (unsigned)(last - first) <= 1); 269 D_ASSERT(device, atomic_read(&device->local_cnt) > 0); 270 271 /* FIXME figure out a fast path for bios crossing AL extent boundaries */ 272 if (first != last) 273 return false; 274 275 return _al_get(device, first, true); 276 } 277 278 bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i) 279 { 280 /* for bios crossing activity log extent boundaries, 281 * we may need to activate two extents in one go */ 282 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 283 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 284 unsigned enr; 285 bool need_transaction = false; 286 287 D_ASSERT(device, first <= last); 288 D_ASSERT(device, atomic_read(&device->local_cnt) > 0); 289 290 for (enr = first; enr <= last; enr++) { 291 struct lc_element *al_ext; 292 wait_event(device->al_wait, 293 (al_ext = _al_get(device, enr, false)) != NULL); 294 if (al_ext->lc_number != enr) 295 need_transaction = true; 296 } 297 return need_transaction; 298 } 299 300 static int al_write_transaction(struct drbd_device *device, bool delegate); 301 302 /* When called through generic_make_request(), we must delegate 303 * activity log I/O to the worker thread: a further request 304 * submitted via generic_make_request() within the same task 305 * would be queued on current->bio_list, and would only start 306 * after this function returns (see generic_make_request()). 307 * 308 * However, if we *are* the worker, we must not delegate to ourselves. 309 */ 310 311 /* 312 * @delegate: delegate activity log I/O to the worker thread 313 */ 314 void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate) 315 { 316 bool locked = false; 317 318 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); 319 320 /* Serialize multiple transactions. 321 * This uses test_and_set_bit, memory barrier is implicit. 322 */ 323 wait_event(device->al_wait, 324 device->act_log->pending_changes == 0 || 325 (locked = lc_try_lock_for_transaction(device->act_log))); 326 327 if (locked) { 328 /* Double check: it may have been committed by someone else, 329 * while we have been waiting for the lock. */ 330 if (device->act_log->pending_changes) { 331 bool write_al_updates; 332 333 rcu_read_lock(); 334 write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; 335 rcu_read_unlock(); 336 337 if (write_al_updates) 338 al_write_transaction(device, delegate); 339 spin_lock_irq(&device->al_lock); 340 /* FIXME 341 if (err) 342 we need an "lc_cancel" here; 343 */ 344 lc_committed(device->act_log); 345 spin_unlock_irq(&device->al_lock); 346 } 347 lc_unlock(device->act_log); 348 wake_up(&device->al_wait); 349 } 350 } 351 352 /* 353 * @delegate: delegate activity log I/O to the worker thread 354 */ 355 void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate) 356 { 357 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); 358 359 if (drbd_al_begin_io_prepare(device, i)) 360 drbd_al_begin_io_commit(device, delegate); 361 } 362 363 int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) 364 { 365 struct lru_cache *al = device->act_log; 366 /* for bios crossing activity log extent boundaries, 367 * we may need to activate two extents in one go */ 368 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 369 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 370 unsigned nr_al_extents; 371 unsigned available_update_slots; 372 unsigned enr; 373 374 D_ASSERT(device, first <= last); 375 376 nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */ 377 available_update_slots = min(al->nr_elements - al->used, 378 al->max_pending_changes - al->pending_changes); 379 380 /* We want all necessary updates for a given request within the same transaction 381 * We could first check how many updates are *actually* needed, 382 * and use that instead of the worst-case nr_al_extents */ 383 if (available_update_slots < nr_al_extents) 384 return -EWOULDBLOCK; 385 386 /* Is resync active in this area? */ 387 for (enr = first; enr <= last; enr++) { 388 struct lc_element *tmp; 389 tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); 390 if (unlikely(tmp != NULL)) { 391 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 392 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { 393 if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags)) 394 return -EBUSY; 395 return -EWOULDBLOCK; 396 } 397 } 398 } 399 400 /* Checkout the refcounts. 401 * Given that we checked for available elements and update slots above, 402 * this has to be successful. */ 403 for (enr = first; enr <= last; enr++) { 404 struct lc_element *al_ext; 405 al_ext = lc_get_cumulative(device->act_log, enr); 406 if (!al_ext) 407 drbd_info(device, "LOGIC BUG for enr=%u\n", enr); 408 } 409 return 0; 410 } 411 412 void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i) 413 { 414 /* for bios crossing activity log extent boundaries, 415 * we may need to activate two extents in one go */ 416 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 417 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 418 unsigned enr; 419 struct lc_element *extent; 420 unsigned long flags; 421 422 D_ASSERT(device, first <= last); 423 spin_lock_irqsave(&device->al_lock, flags); 424 425 for (enr = first; enr <= last; enr++) { 426 extent = lc_find(device->act_log, enr); 427 if (!extent) { 428 drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr); 429 continue; 430 } 431 lc_put(device->act_log, extent); 432 } 433 spin_unlock_irqrestore(&device->al_lock, flags); 434 wake_up(&device->al_wait); 435 } 436 437 #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) 438 /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT 439 * are still coupled, or assume too much about their relation. 440 * Code below will not work if this is violated. 441 * Will be cleaned up with some followup patch. 442 */ 443 # error FIXME 444 #endif 445 446 static unsigned int al_extent_to_bm_page(unsigned int al_enr) 447 { 448 return al_enr >> 449 /* bit to page */ 450 ((PAGE_SHIFT + 3) - 451 /* al extent number to bit */ 452 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); 453 } 454 455 static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) 456 { 457 return rs_enr >> 458 /* bit to page */ 459 ((PAGE_SHIFT + 3) - 460 /* resync extent number to bit */ 461 (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); 462 } 463 464 static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) 465 { 466 const unsigned int stripes = device->ldev->md.al_stripes; 467 const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k; 468 469 /* transaction number, modulo on-disk ring buffer wrap around */ 470 unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k); 471 472 /* ... to aligned 4k on disk block */ 473 t = ((t % stripes) * stripe_size_4kB) + t/stripes; 474 475 /* ... to 512 byte sector in activity log */ 476 t *= 8; 477 478 /* ... plus offset to the on disk position */ 479 return device->ldev->md.md_offset + device->ldev->md.al_offset + t; 480 } 481 482 static int 483 _al_write_transaction(struct drbd_device *device) 484 { 485 struct al_transaction_on_disk *buffer; 486 struct lc_element *e; 487 sector_t sector; 488 int i, mx; 489 unsigned extent_nr; 490 unsigned crc = 0; 491 int err = 0; 492 493 if (!get_ldev(device)) { 494 drbd_err(device, "disk is %s, cannot start al transaction\n", 495 drbd_disk_str(device->state.disk)); 496 return -EIO; 497 } 498 499 /* The bitmap write may have failed, causing a state change. */ 500 if (device->state.disk < D_INCONSISTENT) { 501 drbd_err(device, 502 "disk is %s, cannot write al transaction\n", 503 drbd_disk_str(device->state.disk)); 504 put_ldev(device); 505 return -EIO; 506 } 507 508 buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */ 509 if (!buffer) { 510 drbd_err(device, "disk failed while waiting for md_io buffer\n"); 511 put_ldev(device); 512 return -ENODEV; 513 } 514 515 memset(buffer, 0, sizeof(*buffer)); 516 buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); 517 buffer->tr_number = cpu_to_be32(device->al_tr_number); 518 519 i = 0; 520 521 /* Even though no one can start to change this list 522 * once we set the LC_LOCKED -- from drbd_al_begin_io(), 523 * lc_try_lock_for_transaction() --, someone may still 524 * be in the process of changing it. */ 525 spin_lock_irq(&device->al_lock); 526 list_for_each_entry(e, &device->act_log->to_be_changed, list) { 527 if (i == AL_UPDATES_PER_TRANSACTION) { 528 i++; 529 break; 530 } 531 buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); 532 buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); 533 if (e->lc_number != LC_FREE) 534 drbd_bm_mark_for_writeout(device, 535 al_extent_to_bm_page(e->lc_number)); 536 i++; 537 } 538 spin_unlock_irq(&device->al_lock); 539 BUG_ON(i > AL_UPDATES_PER_TRANSACTION); 540 541 buffer->n_updates = cpu_to_be16(i); 542 for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { 543 buffer->update_slot_nr[i] = cpu_to_be16(-1); 544 buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); 545 } 546 547 buffer->context_size = cpu_to_be16(device->act_log->nr_elements); 548 buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle); 549 550 mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, 551 device->act_log->nr_elements - device->al_tr_cycle); 552 for (i = 0; i < mx; i++) { 553 unsigned idx = device->al_tr_cycle + i; 554 extent_nr = lc_element_by_index(device->act_log, idx)->lc_number; 555 buffer->context[i] = cpu_to_be32(extent_nr); 556 } 557 for (; i < AL_CONTEXT_PER_TRANSACTION; i++) 558 buffer->context[i] = cpu_to_be32(LC_FREE); 559 560 device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; 561 if (device->al_tr_cycle >= device->act_log->nr_elements) 562 device->al_tr_cycle = 0; 563 564 sector = al_tr_number_to_on_disk_sector(device); 565 566 crc = crc32c(0, buffer, 4096); 567 buffer->crc32c = cpu_to_be32(crc); 568 569 if (drbd_bm_write_hinted(device)) 570 err = -EIO; 571 else { 572 bool write_al_updates; 573 rcu_read_lock(); 574 write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; 575 rcu_read_unlock(); 576 if (write_al_updates) { 577 if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) { 578 err = -EIO; 579 drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); 580 } else { 581 device->al_tr_number++; 582 device->al_writ_cnt++; 583 } 584 } 585 } 586 587 drbd_md_put_buffer(device); 588 put_ldev(device); 589 590 return err; 591 } 592 593 594 static int w_al_write_transaction(struct drbd_work *w, int unused) 595 { 596 struct update_al_work *aw = container_of(w, struct update_al_work, w); 597 struct drbd_device *device = aw->device; 598 int err; 599 600 err = _al_write_transaction(device); 601 aw->err = err; 602 complete(&aw->event); 603 604 return err != -EIO ? err : 0; 605 } 606 607 /* Calls from worker context (see w_restart_disk_io()) need to write the 608 transaction directly. Others came through generic_make_request(), 609 those need to delegate it to the worker. */ 610 static int al_write_transaction(struct drbd_device *device, bool delegate) 611 { 612 if (delegate) { 613 struct update_al_work al_work; 614 init_completion(&al_work.event); 615 al_work.w.cb = w_al_write_transaction; 616 al_work.device = device; 617 drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, 618 &al_work.w); 619 wait_for_completion(&al_work.event); 620 return al_work.err; 621 } else 622 return _al_write_transaction(device); 623 } 624 625 static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) 626 { 627 int rv; 628 629 spin_lock_irq(&device->al_lock); 630 rv = (al_ext->refcnt == 0); 631 if (likely(rv)) 632 lc_del(device->act_log, al_ext); 633 spin_unlock_irq(&device->al_lock); 634 635 return rv; 636 } 637 638 /** 639 * drbd_al_shrink() - Removes all active extents form the activity log 640 * @device: DRBD device. 641 * 642 * Removes all active extents form the activity log, waiting until 643 * the reference count of each entry dropped to 0 first, of course. 644 * 645 * You need to lock device->act_log with lc_try_lock() / lc_unlock() 646 */ 647 void drbd_al_shrink(struct drbd_device *device) 648 { 649 struct lc_element *al_ext; 650 int i; 651 652 D_ASSERT(device, test_bit(__LC_LOCKED, &device->act_log->flags)); 653 654 for (i = 0; i < device->act_log->nr_elements; i++) { 655 al_ext = lc_element_by_index(device->act_log, i); 656 if (al_ext->lc_number == LC_FREE) 657 continue; 658 wait_event(device->al_wait, _try_lc_del(device, al_ext)); 659 } 660 661 wake_up(&device->al_wait); 662 } 663 664 int drbd_initialize_al(struct drbd_device *device, void *buffer) 665 { 666 struct al_transaction_on_disk *al = buffer; 667 struct drbd_md *md = &device->ldev->md; 668 sector_t al_base = md->md_offset + md->al_offset; 669 int al_size_4k = md->al_stripes * md->al_stripe_size_4k; 670 int i; 671 672 memset(al, 0, 4096); 673 al->magic = cpu_to_be32(DRBD_AL_MAGIC); 674 al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED); 675 al->crc32c = cpu_to_be32(crc32c(0, al, 4096)); 676 677 for (i = 0; i < al_size_4k; i++) { 678 int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE); 679 if (err) 680 return err; 681 } 682 return 0; 683 } 684 685 static int w_update_odbm(struct drbd_work *w, int unused) 686 { 687 struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); 688 struct drbd_device *device = udw->device; 689 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; 690 691 if (!get_ldev(device)) { 692 if (__ratelimit(&drbd_ratelimit_state)) 693 drbd_warn(device, "Can not update on disk bitmap, local IO disabled.\n"); 694 kfree(udw); 695 return 0; 696 } 697 698 drbd_bm_write_page(device, rs_extent_to_bm_page(udw->enr)); 699 put_ldev(device); 700 701 kfree(udw); 702 703 if (drbd_bm_total_weight(device) <= device->rs_failed) { 704 switch (device->state.conn) { 705 case C_SYNC_SOURCE: case C_SYNC_TARGET: 706 case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T: 707 drbd_resync_finished(device); 708 default: 709 /* nothing to do */ 710 break; 711 } 712 } 713 drbd_bcast_event(device, &sib); 714 715 return 0; 716 } 717 718 719 /* ATTENTION. The AL's extents are 4MB each, while the extents in the 720 * resync LRU-cache are 16MB each. 721 * The caller of this function has to hold an get_ldev() reference. 722 * 723 * TODO will be obsoleted once we have a caching lru of the on disk bitmap 724 */ 725 static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector, 726 int count, int success) 727 { 728 struct lc_element *e; 729 struct update_odbm_work *udw; 730 731 unsigned int enr; 732 733 D_ASSERT(device, atomic_read(&device->local_cnt)); 734 735 /* I simply assume that a sector/size pair never crosses 736 * a 16 MB extent border. (Currently this is true...) */ 737 enr = BM_SECT_TO_EXT(sector); 738 739 e = lc_get(device->resync, enr); 740 if (e) { 741 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); 742 if (ext->lce.lc_number == enr) { 743 if (success) 744 ext->rs_left -= count; 745 else 746 ext->rs_failed += count; 747 if (ext->rs_left < ext->rs_failed) { 748 drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d " 749 "rs_failed=%d count=%d cstate=%s\n", 750 (unsigned long long)sector, 751 ext->lce.lc_number, ext->rs_left, 752 ext->rs_failed, count, 753 drbd_conn_str(device->state.conn)); 754 755 /* We don't expect to be able to clear more bits 756 * than have been set when we originally counted 757 * the set bits to cache that value in ext->rs_left. 758 * Whatever the reason (disconnect during resync, 759 * delayed local completion of an application write), 760 * try to fix it up by recounting here. */ 761 ext->rs_left = drbd_bm_e_weight(device, enr); 762 } 763 } else { 764 /* Normally this element should be in the cache, 765 * since drbd_rs_begin_io() pulled it already in. 766 * 767 * But maybe an application write finished, and we set 768 * something outside the resync lru_cache in sync. 769 */ 770 int rs_left = drbd_bm_e_weight(device, enr); 771 if (ext->flags != 0) { 772 drbd_warn(device, "changing resync lce: %d[%u;%02lx]" 773 " -> %d[%u;00]\n", 774 ext->lce.lc_number, ext->rs_left, 775 ext->flags, enr, rs_left); 776 ext->flags = 0; 777 } 778 if (ext->rs_failed) { 779 drbd_warn(device, "Kicking resync_lru element enr=%u " 780 "out with rs_failed=%d\n", 781 ext->lce.lc_number, ext->rs_failed); 782 } 783 ext->rs_left = rs_left; 784 ext->rs_failed = success ? 0 : count; 785 /* we don't keep a persistent log of the resync lru, 786 * we can commit any change right away. */ 787 lc_committed(device->resync); 788 } 789 lc_put(device->resync, &ext->lce); 790 /* no race, we are within the al_lock! */ 791 792 if (ext->rs_left == ext->rs_failed) { 793 ext->rs_failed = 0; 794 795 udw = kmalloc(sizeof(*udw), GFP_ATOMIC); 796 if (udw) { 797 udw->enr = ext->lce.lc_number; 798 udw->w.cb = w_update_odbm; 799 udw->device = device; 800 drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, 801 &udw->w); 802 } else { 803 drbd_warn(device, "Could not kmalloc an udw\n"); 804 } 805 } 806 } else { 807 drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", 808 device->resync_locked, 809 device->resync->nr_elements, 810 device->resync->flags); 811 } 812 } 813 814 void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) 815 { 816 unsigned long now = jiffies; 817 unsigned long last = device->rs_mark_time[device->rs_last_mark]; 818 int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS; 819 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { 820 if (device->rs_mark_left[device->rs_last_mark] != still_to_go && 821 device->state.conn != C_PAUSED_SYNC_T && 822 device->state.conn != C_PAUSED_SYNC_S) { 823 device->rs_mark_time[next] = now; 824 device->rs_mark_left[next] = still_to_go; 825 device->rs_last_mark = next; 826 } 827 } 828 } 829 830 /* clear the bit corresponding to the piece of storage in question: 831 * size byte of data starting from sector. Only clear a bits of the affected 832 * one ore more _aligned_ BM_BLOCK_SIZE blocks. 833 * 834 * called by worker on C_SYNC_TARGET and receiver on SyncSource. 835 * 836 */ 837 void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size, 838 const char *file, const unsigned int line) 839 { 840 /* Is called from worker and receiver context _only_ */ 841 unsigned long sbnr, ebnr, lbnr; 842 unsigned long count = 0; 843 sector_t esector, nr_sectors; 844 int wake_up = 0; 845 unsigned long flags; 846 847 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 848 drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", 849 (unsigned long long)sector, size); 850 return; 851 } 852 853 if (!get_ldev(device)) 854 return; /* no disk, no metadata, no bitmap to clear bits in */ 855 856 nr_sectors = drbd_get_capacity(device->this_bdev); 857 esector = sector + (size >> 9) - 1; 858 859 if (!expect(sector < nr_sectors)) 860 goto out; 861 if (!expect(esector < nr_sectors)) 862 esector = nr_sectors - 1; 863 864 lbnr = BM_SECT_TO_BIT(nr_sectors-1); 865 866 /* we clear it (in sync). 867 * round up start sector, round down end sector. we make sure we only 868 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */ 869 if (unlikely(esector < BM_SECT_PER_BIT-1)) 870 goto out; 871 if (unlikely(esector == (nr_sectors-1))) 872 ebnr = lbnr; 873 else 874 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 875 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 876 877 if (sbnr > ebnr) 878 goto out; 879 880 /* 881 * ok, (capacity & 7) != 0 sometimes, but who cares... 882 * we count rs_{total,left} in bits, not sectors. 883 */ 884 count = drbd_bm_clear_bits(device, sbnr, ebnr); 885 if (count) { 886 drbd_advance_rs_marks(device, drbd_bm_total_weight(device)); 887 spin_lock_irqsave(&device->al_lock, flags); 888 drbd_try_clear_on_disk_bm(device, sector, count, true); 889 spin_unlock_irqrestore(&device->al_lock, flags); 890 891 /* just wake_up unconditional now, various lc_chaged(), 892 * lc_put() in drbd_try_clear_on_disk_bm(). */ 893 wake_up = 1; 894 } 895 out: 896 put_ldev(device); 897 if (wake_up) 898 wake_up(&device->al_wait); 899 } 900 901 /* 902 * this is intended to set one request worth of data out of sync. 903 * affects at least 1 bit, 904 * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. 905 * 906 * called by tl_clear and drbd_send_dblock (==drbd_make_request). 907 * so this can be _any_ process. 908 */ 909 int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size, 910 const char *file, const unsigned int line) 911 { 912 unsigned long sbnr, ebnr, flags; 913 sector_t esector, nr_sectors; 914 unsigned int enr, count = 0; 915 struct lc_element *e; 916 917 /* this should be an empty REQ_FLUSH */ 918 if (size == 0) 919 return 0; 920 921 if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 922 drbd_err(device, "sector: %llus, size: %d\n", 923 (unsigned long long)sector, size); 924 return 0; 925 } 926 927 if (!get_ldev(device)) 928 return 0; /* no disk, no metadata, no bitmap to set bits in */ 929 930 nr_sectors = drbd_get_capacity(device->this_bdev); 931 esector = sector + (size >> 9) - 1; 932 933 if (!expect(sector < nr_sectors)) 934 goto out; 935 if (!expect(esector < nr_sectors)) 936 esector = nr_sectors - 1; 937 938 /* we set it out of sync, 939 * we do not need to round anything here */ 940 sbnr = BM_SECT_TO_BIT(sector); 941 ebnr = BM_SECT_TO_BIT(esector); 942 943 /* ok, (capacity & 7) != 0 sometimes, but who cares... 944 * we count rs_{total,left} in bits, not sectors. */ 945 spin_lock_irqsave(&device->al_lock, flags); 946 count = drbd_bm_set_bits(device, sbnr, ebnr); 947 948 enr = BM_SECT_TO_EXT(sector); 949 e = lc_find(device->resync, enr); 950 if (e) 951 lc_entry(e, struct bm_extent, lce)->rs_left += count; 952 spin_unlock_irqrestore(&device->al_lock, flags); 953 954 out: 955 put_ldev(device); 956 957 return count; 958 } 959 960 static 961 struct bm_extent *_bme_get(struct drbd_device *device, unsigned int enr) 962 { 963 struct lc_element *e; 964 struct bm_extent *bm_ext; 965 int wakeup = 0; 966 unsigned long rs_flags; 967 968 spin_lock_irq(&device->al_lock); 969 if (device->resync_locked > device->resync->nr_elements/2) { 970 spin_unlock_irq(&device->al_lock); 971 return NULL; 972 } 973 e = lc_get(device->resync, enr); 974 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 975 if (bm_ext) { 976 if (bm_ext->lce.lc_number != enr) { 977 bm_ext->rs_left = drbd_bm_e_weight(device, enr); 978 bm_ext->rs_failed = 0; 979 lc_committed(device->resync); 980 wakeup = 1; 981 } 982 if (bm_ext->lce.refcnt == 1) 983 device->resync_locked++; 984 set_bit(BME_NO_WRITES, &bm_ext->flags); 985 } 986 rs_flags = device->resync->flags; 987 spin_unlock_irq(&device->al_lock); 988 if (wakeup) 989 wake_up(&device->al_wait); 990 991 if (!bm_ext) { 992 if (rs_flags & LC_STARVING) 993 drbd_warn(device, "Have to wait for element" 994 " (resync LRU too small?)\n"); 995 BUG_ON(rs_flags & LC_LOCKED); 996 } 997 998 return bm_ext; 999 } 1000 1001 static int _is_in_al(struct drbd_device *device, unsigned int enr) 1002 { 1003 int rv; 1004 1005 spin_lock_irq(&device->al_lock); 1006 rv = lc_is_used(device->act_log, enr); 1007 spin_unlock_irq(&device->al_lock); 1008 1009 return rv; 1010 } 1011 1012 /** 1013 * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED 1014 * @device: DRBD device. 1015 * @sector: The sector number. 1016 * 1017 * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted. 1018 */ 1019 int drbd_rs_begin_io(struct drbd_device *device, sector_t sector) 1020 { 1021 unsigned int enr = BM_SECT_TO_EXT(sector); 1022 struct bm_extent *bm_ext; 1023 int i, sig; 1024 bool sa; 1025 1026 retry: 1027 sig = wait_event_interruptible(device->al_wait, 1028 (bm_ext = _bme_get(device, enr))); 1029 if (sig) 1030 return -EINTR; 1031 1032 if (test_bit(BME_LOCKED, &bm_ext->flags)) 1033 return 0; 1034 1035 /* step aside only while we are above c-min-rate; unless disabled. */ 1036 sa = drbd_rs_c_min_rate_throttle(device); 1037 1038 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1039 sig = wait_event_interruptible(device->al_wait, 1040 !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || 1041 (sa && test_bit(BME_PRIORITY, &bm_ext->flags))); 1042 1043 if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) { 1044 spin_lock_irq(&device->al_lock); 1045 if (lc_put(device->resync, &bm_ext->lce) == 0) { 1046 bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ 1047 device->resync_locked--; 1048 wake_up(&device->al_wait); 1049 } 1050 spin_unlock_irq(&device->al_lock); 1051 if (sig) 1052 return -EINTR; 1053 if (schedule_timeout_interruptible(HZ/10)) 1054 return -EINTR; 1055 goto retry; 1056 } 1057 } 1058 set_bit(BME_LOCKED, &bm_ext->flags); 1059 return 0; 1060 } 1061 1062 /** 1063 * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep 1064 * @device: DRBD device. 1065 * @sector: The sector number. 1066 * 1067 * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then 1068 * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN 1069 * if there is still application IO going on in this area. 1070 */ 1071 int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) 1072 { 1073 unsigned int enr = BM_SECT_TO_EXT(sector); 1074 const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; 1075 struct lc_element *e; 1076 struct bm_extent *bm_ext; 1077 int i; 1078 1079 spin_lock_irq(&device->al_lock); 1080 if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { 1081 /* in case you have very heavy scattered io, it may 1082 * stall the syncer undefined if we give up the ref count 1083 * when we try again and requeue. 1084 * 1085 * if we don't give up the refcount, but the next time 1086 * we are scheduled this extent has been "synced" by new 1087 * application writes, we'd miss the lc_put on the 1088 * extent we keep the refcount on. 1089 * so we remembered which extent we had to try again, and 1090 * if the next requested one is something else, we do 1091 * the lc_put here... 1092 * we also have to wake_up 1093 */ 1094 e = lc_find(device->resync, device->resync_wenr); 1095 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1096 if (bm_ext) { 1097 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1098 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 1099 clear_bit(BME_NO_WRITES, &bm_ext->flags); 1100 device->resync_wenr = LC_FREE; 1101 if (lc_put(device->resync, &bm_ext->lce) == 0) 1102 device->resync_locked--; 1103 wake_up(&device->al_wait); 1104 } else { 1105 drbd_alert(device, "LOGIC BUG\n"); 1106 } 1107 } 1108 /* TRY. */ 1109 e = lc_try_get(device->resync, enr); 1110 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1111 if (bm_ext) { 1112 if (test_bit(BME_LOCKED, &bm_ext->flags)) 1113 goto proceed; 1114 if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) { 1115 device->resync_locked++; 1116 } else { 1117 /* we did set the BME_NO_WRITES, 1118 * but then could not set BME_LOCKED, 1119 * so we tried again. 1120 * drop the extra reference. */ 1121 bm_ext->lce.refcnt--; 1122 D_ASSERT(device, bm_ext->lce.refcnt > 0); 1123 } 1124 goto check_al; 1125 } else { 1126 /* do we rather want to try later? */ 1127 if (device->resync_locked > device->resync->nr_elements-3) 1128 goto try_again; 1129 /* Do or do not. There is no try. -- Yoda */ 1130 e = lc_get(device->resync, enr); 1131 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1132 if (!bm_ext) { 1133 const unsigned long rs_flags = device->resync->flags; 1134 if (rs_flags & LC_STARVING) 1135 drbd_warn(device, "Have to wait for element" 1136 " (resync LRU too small?)\n"); 1137 BUG_ON(rs_flags & LC_LOCKED); 1138 goto try_again; 1139 } 1140 if (bm_ext->lce.lc_number != enr) { 1141 bm_ext->rs_left = drbd_bm_e_weight(device, enr); 1142 bm_ext->rs_failed = 0; 1143 lc_committed(device->resync); 1144 wake_up(&device->al_wait); 1145 D_ASSERT(device, test_bit(BME_LOCKED, &bm_ext->flags) == 0); 1146 } 1147 set_bit(BME_NO_WRITES, &bm_ext->flags); 1148 D_ASSERT(device, bm_ext->lce.refcnt == 1); 1149 device->resync_locked++; 1150 goto check_al; 1151 } 1152 check_al: 1153 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1154 if (lc_is_used(device->act_log, al_enr+i)) 1155 goto try_again; 1156 } 1157 set_bit(BME_LOCKED, &bm_ext->flags); 1158 proceed: 1159 device->resync_wenr = LC_FREE; 1160 spin_unlock_irq(&device->al_lock); 1161 return 0; 1162 1163 try_again: 1164 if (bm_ext) 1165 device->resync_wenr = enr; 1166 spin_unlock_irq(&device->al_lock); 1167 return -EAGAIN; 1168 } 1169 1170 void drbd_rs_complete_io(struct drbd_device *device, sector_t sector) 1171 { 1172 unsigned int enr = BM_SECT_TO_EXT(sector); 1173 struct lc_element *e; 1174 struct bm_extent *bm_ext; 1175 unsigned long flags; 1176 1177 spin_lock_irqsave(&device->al_lock, flags); 1178 e = lc_find(device->resync, enr); 1179 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1180 if (!bm_ext) { 1181 spin_unlock_irqrestore(&device->al_lock, flags); 1182 if (__ratelimit(&drbd_ratelimit_state)) 1183 drbd_err(device, "drbd_rs_complete_io() called, but extent not found\n"); 1184 return; 1185 } 1186 1187 if (bm_ext->lce.refcnt == 0) { 1188 spin_unlock_irqrestore(&device->al_lock, flags); 1189 drbd_err(device, "drbd_rs_complete_io(,%llu [=%u]) called, " 1190 "but refcnt is 0!?\n", 1191 (unsigned long long)sector, enr); 1192 return; 1193 } 1194 1195 if (lc_put(device->resync, &bm_ext->lce) == 0) { 1196 bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */ 1197 device->resync_locked--; 1198 wake_up(&device->al_wait); 1199 } 1200 1201 spin_unlock_irqrestore(&device->al_lock, flags); 1202 } 1203 1204 /** 1205 * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED) 1206 * @device: DRBD device. 1207 */ 1208 void drbd_rs_cancel_all(struct drbd_device *device) 1209 { 1210 spin_lock_irq(&device->al_lock); 1211 1212 if (get_ldev_if_state(device, D_FAILED)) { /* Makes sure ->resync is there. */ 1213 lc_reset(device->resync); 1214 put_ldev(device); 1215 } 1216 device->resync_locked = 0; 1217 device->resync_wenr = LC_FREE; 1218 spin_unlock_irq(&device->al_lock); 1219 wake_up(&device->al_wait); 1220 } 1221 1222 /** 1223 * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU 1224 * @device: DRBD device. 1225 * 1226 * Returns 0 upon success, -EAGAIN if at least one reference count was 1227 * not zero. 1228 */ 1229 int drbd_rs_del_all(struct drbd_device *device) 1230 { 1231 struct lc_element *e; 1232 struct bm_extent *bm_ext; 1233 int i; 1234 1235 spin_lock_irq(&device->al_lock); 1236 1237 if (get_ldev_if_state(device, D_FAILED)) { 1238 /* ok, ->resync is there. */ 1239 for (i = 0; i < device->resync->nr_elements; i++) { 1240 e = lc_element_by_index(device->resync, i); 1241 bm_ext = lc_entry(e, struct bm_extent, lce); 1242 if (bm_ext->lce.lc_number == LC_FREE) 1243 continue; 1244 if (bm_ext->lce.lc_number == device->resync_wenr) { 1245 drbd_info(device, "dropping %u in drbd_rs_del_all, apparently" 1246 " got 'synced' by application io\n", 1247 device->resync_wenr); 1248 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1249 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 1250 clear_bit(BME_NO_WRITES, &bm_ext->flags); 1251 device->resync_wenr = LC_FREE; 1252 lc_put(device->resync, &bm_ext->lce); 1253 } 1254 if (bm_ext->lce.refcnt != 0) { 1255 drbd_info(device, "Retrying drbd_rs_del_all() later. " 1256 "refcnt=%d\n", bm_ext->lce.refcnt); 1257 put_ldev(device); 1258 spin_unlock_irq(&device->al_lock); 1259 return -EAGAIN; 1260 } 1261 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1262 D_ASSERT(device, !test_bit(BME_NO_WRITES, &bm_ext->flags)); 1263 lc_del(device->resync, &bm_ext->lce); 1264 } 1265 D_ASSERT(device, device->resync->used == 0); 1266 put_ldev(device); 1267 } 1268 spin_unlock_irq(&device->al_lock); 1269 wake_up(&device->al_wait); 1270 1271 return 0; 1272 } 1273 1274 /** 1275 * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks 1276 * @device: DRBD device. 1277 * @sector: The sector number. 1278 * @size: Size of failed IO operation, in byte. 1279 */ 1280 void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size) 1281 { 1282 /* Is called from worker and receiver context _only_ */ 1283 unsigned long sbnr, ebnr, lbnr; 1284 unsigned long count; 1285 sector_t esector, nr_sectors; 1286 int wake_up = 0; 1287 1288 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 1289 drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", 1290 (unsigned long long)sector, size); 1291 return; 1292 } 1293 nr_sectors = drbd_get_capacity(device->this_bdev); 1294 esector = sector + (size >> 9) - 1; 1295 1296 if (!expect(sector < nr_sectors)) 1297 return; 1298 if (!expect(esector < nr_sectors)) 1299 esector = nr_sectors - 1; 1300 1301 lbnr = BM_SECT_TO_BIT(nr_sectors-1); 1302 1303 /* 1304 * round up start sector, round down end sector. we make sure we only 1305 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */ 1306 if (unlikely(esector < BM_SECT_PER_BIT-1)) 1307 return; 1308 if (unlikely(esector == (nr_sectors-1))) 1309 ebnr = lbnr; 1310 else 1311 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 1312 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 1313 1314 if (sbnr > ebnr) 1315 return; 1316 1317 /* 1318 * ok, (capacity & 7) != 0 sometimes, but who cares... 1319 * we count rs_{total,left} in bits, not sectors. 1320 */ 1321 spin_lock_irq(&device->al_lock); 1322 count = drbd_bm_count_bits(device, sbnr, ebnr); 1323 if (count) { 1324 device->rs_failed += count; 1325 1326 if (get_ldev(device)) { 1327 drbd_try_clear_on_disk_bm(device, sector, count, false); 1328 put_ldev(device); 1329 } 1330 1331 /* just wake_up unconditional now, various lc_chaged(), 1332 * lc_put() in drbd_try_clear_on_disk_bm(). */ 1333 wake_up = 1; 1334 } 1335 spin_unlock_irq(&device->al_lock); 1336 if (wake_up) 1337 wake_up(&device->al_wait); 1338 } 1339