1 /* 2 drbd_actlog.c 3 4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5 6 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. 7 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9 10 drbd is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2, or (at your option) 13 any later version. 14 15 drbd is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with drbd; see the file COPYING. If not, write to 22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23 24 */ 25 26 #include <linux/slab.h> 27 #include <linux/crc32c.h> 28 #include <linux/drbd.h> 29 #include <linux/drbd_limits.h> 30 #include <linux/dynamic_debug.h> 31 #include "drbd_int.h" 32 33 34 enum al_transaction_types { 35 AL_TR_UPDATE = 0, 36 AL_TR_INITIALIZED = 0xffff 37 }; 38 /* all fields on disc in big endian */ 39 struct __packed al_transaction_on_disk { 40 /* don't we all like magic */ 41 __be32 magic; 42 43 /* to identify the most recent transaction block 44 * in the on disk ring buffer */ 45 __be32 tr_number; 46 47 /* checksum on the full 4k block, with this field set to 0. */ 48 __be32 crc32c; 49 50 /* type of transaction, special transaction types like: 51 * purge-all, set-all-idle, set-all-active, ... to-be-defined 52 * see also enum al_transaction_types */ 53 __be16 transaction_type; 54 55 /* we currently allow only a few thousand extents, 56 * so 16bit will be enough for the slot number. */ 57 58 /* how many updates in this transaction */ 59 __be16 n_updates; 60 61 /* maximum slot number, "al-extents" in drbd.conf speak. 62 * Having this in each transaction should make reconfiguration 63 * of that parameter easier. */ 64 __be16 context_size; 65 66 /* slot number the context starts with */ 67 __be16 context_start_slot_nr; 68 69 /* Some reserved bytes. Expected usage is a 64bit counter of 70 * sectors-written since device creation, and other data generation tag 71 * supporting usage */ 72 __be32 __reserved[4]; 73 74 /* --- 36 byte used --- */ 75 76 /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes 77 * in one transaction, then use the remaining byte in the 4k block for 78 * context information. "Flexible" number of updates per transaction 79 * does not help, as we have to account for the case when all update 80 * slots are used anyways, so it would only complicate code without 81 * additional benefit. 82 */ 83 __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION]; 84 85 /* but the extent number is 32bit, which at an extent size of 4 MiB 86 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */ 87 __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION]; 88 89 /* --- 420 bytes used (36 + 64*6) --- */ 90 91 /* 4096 - 420 = 3676 = 919 * 4 */ 92 __be32 context[AL_CONTEXT_PER_TRANSACTION]; 93 }; 94 95 struct update_odbm_work { 96 struct drbd_work w; 97 struct drbd_device *device; 98 unsigned int enr; 99 }; 100 101 struct update_al_work { 102 struct drbd_work w; 103 struct drbd_device *device; 104 struct completion event; 105 int err; 106 }; 107 108 109 void *drbd_md_get_buffer(struct drbd_device *device) 110 { 111 int r; 112 113 wait_event(device->misc_wait, 114 (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 || 115 device->state.disk <= D_FAILED); 116 117 return r ? NULL : page_address(device->md_io_page); 118 } 119 120 void drbd_md_put_buffer(struct drbd_device *device) 121 { 122 if (atomic_dec_and_test(&device->md_io_in_use)) 123 wake_up(&device->misc_wait); 124 } 125 126 void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev, 127 unsigned int *done) 128 { 129 long dt; 130 131 rcu_read_lock(); 132 dt = rcu_dereference(bdev->disk_conf)->disk_timeout; 133 rcu_read_unlock(); 134 dt = dt * HZ / 10; 135 if (dt == 0) 136 dt = MAX_SCHEDULE_TIMEOUT; 137 138 dt = wait_event_timeout(device->misc_wait, 139 *done || test_bit(FORCE_DETACH, &device->flags), dt); 140 if (dt == 0) { 141 drbd_err(device, "meta-data IO operation timed out\n"); 142 drbd_chk_io_error(device, 1, DRBD_FORCE_DETACH); 143 } 144 } 145 146 static int _drbd_md_sync_page_io(struct drbd_device *device, 147 struct drbd_backing_dev *bdev, 148 struct page *page, sector_t sector, 149 int rw, int size) 150 { 151 struct bio *bio; 152 int err; 153 154 device->md_io.done = 0; 155 device->md_io.error = -ENODEV; 156 157 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags)) 158 rw |= REQ_FUA | REQ_FLUSH; 159 rw |= REQ_SYNC; 160 161 bio = bio_alloc_drbd(GFP_NOIO); 162 bio->bi_bdev = bdev->md_bdev; 163 bio->bi_iter.bi_sector = sector; 164 err = -EIO; 165 if (bio_add_page(bio, page, size, 0) != size) 166 goto out; 167 bio->bi_private = &device->md_io; 168 bio->bi_end_io = drbd_md_io_complete; 169 bio->bi_rw = rw; 170 171 if (!(rw & WRITE) && device->state.disk == D_DISKLESS && device->ldev == NULL) 172 /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */ 173 ; 174 else if (!get_ldev_if_state(device, D_ATTACHING)) { 175 /* Corresponding put_ldev in drbd_md_io_complete() */ 176 drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); 177 err = -ENODEV; 178 goto out; 179 } 180 181 bio_get(bio); /* one bio_put() is in the completion handler */ 182 atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ 183 if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) 184 bio_endio(bio, -EIO); 185 else 186 submit_bio(rw, bio); 187 wait_until_done_or_force_detached(device, bdev, &device->md_io.done); 188 if (bio_flagged(bio, BIO_UPTODATE)) 189 err = device->md_io.error; 190 191 out: 192 bio_put(bio); 193 return err; 194 } 195 196 int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, 197 sector_t sector, int rw) 198 { 199 int err; 200 struct page *iop = device->md_io_page; 201 202 D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1); 203 204 BUG_ON(!bdev->md_bdev); 205 206 drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", 207 current->comm, current->pid, __func__, 208 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", 209 (void*)_RET_IP_ ); 210 211 if (sector < drbd_md_first_sector(bdev) || 212 sector + 7 > drbd_md_last_sector(bdev)) 213 drbd_alert(device, "%s [%d]:%s(,%llus,%s) out of range md access!\n", 214 current->comm, current->pid, __func__, 215 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); 216 217 /* we do all our meta data IO in aligned 4k blocks. */ 218 err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096); 219 if (err) { 220 drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", 221 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); 222 } 223 return err; 224 } 225 226 static struct bm_extent *find_active_resync_extent(struct drbd_device *device, unsigned int enr) 227 { 228 struct lc_element *tmp; 229 tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); 230 if (unlikely(tmp != NULL)) { 231 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 232 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) 233 return bm_ext; 234 } 235 return NULL; 236 } 237 238 static struct lc_element *_al_get(struct drbd_device *device, unsigned int enr, bool nonblock) 239 { 240 struct lc_element *al_ext; 241 struct bm_extent *bm_ext; 242 int wake; 243 244 spin_lock_irq(&device->al_lock); 245 bm_ext = find_active_resync_extent(device, enr); 246 if (bm_ext) { 247 wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); 248 spin_unlock_irq(&device->al_lock); 249 if (wake) 250 wake_up(&device->al_wait); 251 return NULL; 252 } 253 if (nonblock) 254 al_ext = lc_try_get(device->act_log, enr); 255 else 256 al_ext = lc_get(device->act_log, enr); 257 spin_unlock_irq(&device->al_lock); 258 return al_ext; 259 } 260 261 bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i) 262 { 263 /* for bios crossing activity log extent boundaries, 264 * we may need to activate two extents in one go */ 265 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 266 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 267 268 D_ASSERT(device, (unsigned)(last - first) <= 1); 269 D_ASSERT(device, atomic_read(&device->local_cnt) > 0); 270 271 /* FIXME figure out a fast path for bios crossing AL extent boundaries */ 272 if (first != last) 273 return false; 274 275 return _al_get(device, first, true); 276 } 277 278 static 279 bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i) 280 { 281 /* for bios crossing activity log extent boundaries, 282 * we may need to activate two extents in one go */ 283 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 284 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 285 unsigned enr; 286 bool need_transaction = false; 287 288 D_ASSERT(device, first <= last); 289 D_ASSERT(device, atomic_read(&device->local_cnt) > 0); 290 291 for (enr = first; enr <= last; enr++) { 292 struct lc_element *al_ext; 293 wait_event(device->al_wait, 294 (al_ext = _al_get(device, enr, false)) != NULL); 295 if (al_ext->lc_number != enr) 296 need_transaction = true; 297 } 298 return need_transaction; 299 } 300 301 static int al_write_transaction(struct drbd_device *device, bool delegate); 302 303 /* When called through generic_make_request(), we must delegate 304 * activity log I/O to the worker thread: a further request 305 * submitted via generic_make_request() within the same task 306 * would be queued on current->bio_list, and would only start 307 * after this function returns (see generic_make_request()). 308 * 309 * However, if we *are* the worker, we must not delegate to ourselves. 310 */ 311 312 /* 313 * @delegate: delegate activity log I/O to the worker thread 314 */ 315 void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate) 316 { 317 bool locked = false; 318 319 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); 320 321 /* Serialize multiple transactions. 322 * This uses test_and_set_bit, memory barrier is implicit. 323 */ 324 wait_event(device->al_wait, 325 device->act_log->pending_changes == 0 || 326 (locked = lc_try_lock_for_transaction(device->act_log))); 327 328 if (locked) { 329 /* Double check: it may have been committed by someone else, 330 * while we have been waiting for the lock. */ 331 if (device->act_log->pending_changes) { 332 bool write_al_updates; 333 334 rcu_read_lock(); 335 write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; 336 rcu_read_unlock(); 337 338 if (write_al_updates) 339 al_write_transaction(device, delegate); 340 spin_lock_irq(&device->al_lock); 341 /* FIXME 342 if (err) 343 we need an "lc_cancel" here; 344 */ 345 lc_committed(device->act_log); 346 spin_unlock_irq(&device->al_lock); 347 } 348 lc_unlock(device->act_log); 349 wake_up(&device->al_wait); 350 } 351 } 352 353 /* 354 * @delegate: delegate activity log I/O to the worker thread 355 */ 356 void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate) 357 { 358 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); 359 360 if (drbd_al_begin_io_prepare(device, i)) 361 drbd_al_begin_io_commit(device, delegate); 362 } 363 364 int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) 365 { 366 struct lru_cache *al = device->act_log; 367 /* for bios crossing activity log extent boundaries, 368 * we may need to activate two extents in one go */ 369 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 370 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 371 unsigned nr_al_extents; 372 unsigned available_update_slots; 373 unsigned enr; 374 375 D_ASSERT(device, first <= last); 376 377 nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */ 378 available_update_slots = min(al->nr_elements - al->used, 379 al->max_pending_changes - al->pending_changes); 380 381 /* We want all necessary updates for a given request within the same transaction 382 * We could first check how many updates are *actually* needed, 383 * and use that instead of the worst-case nr_al_extents */ 384 if (available_update_slots < nr_al_extents) 385 return -EWOULDBLOCK; 386 387 /* Is resync active in this area? */ 388 for (enr = first; enr <= last; enr++) { 389 struct lc_element *tmp; 390 tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); 391 if (unlikely(tmp != NULL)) { 392 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 393 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { 394 if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags)) 395 return -EBUSY; 396 return -EWOULDBLOCK; 397 } 398 } 399 } 400 401 /* Checkout the refcounts. 402 * Given that we checked for available elements and update slots above, 403 * this has to be successful. */ 404 for (enr = first; enr <= last; enr++) { 405 struct lc_element *al_ext; 406 al_ext = lc_get_cumulative(device->act_log, enr); 407 if (!al_ext) 408 drbd_info(device, "LOGIC BUG for enr=%u\n", enr); 409 } 410 return 0; 411 } 412 413 void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i) 414 { 415 /* for bios crossing activity log extent boundaries, 416 * we may need to activate two extents in one go */ 417 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 418 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 419 unsigned enr; 420 struct lc_element *extent; 421 unsigned long flags; 422 423 D_ASSERT(device, first <= last); 424 spin_lock_irqsave(&device->al_lock, flags); 425 426 for (enr = first; enr <= last; enr++) { 427 extent = lc_find(device->act_log, enr); 428 if (!extent) { 429 drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr); 430 continue; 431 } 432 lc_put(device->act_log, extent); 433 } 434 spin_unlock_irqrestore(&device->al_lock, flags); 435 wake_up(&device->al_wait); 436 } 437 438 #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) 439 /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT 440 * are still coupled, or assume too much about their relation. 441 * Code below will not work if this is violated. 442 * Will be cleaned up with some followup patch. 443 */ 444 # error FIXME 445 #endif 446 447 static unsigned int al_extent_to_bm_page(unsigned int al_enr) 448 { 449 return al_enr >> 450 /* bit to page */ 451 ((PAGE_SHIFT + 3) - 452 /* al extent number to bit */ 453 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); 454 } 455 456 static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) 457 { 458 return rs_enr >> 459 /* bit to page */ 460 ((PAGE_SHIFT + 3) - 461 /* resync extent number to bit */ 462 (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); 463 } 464 465 static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) 466 { 467 const unsigned int stripes = device->ldev->md.al_stripes; 468 const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k; 469 470 /* transaction number, modulo on-disk ring buffer wrap around */ 471 unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k); 472 473 /* ... to aligned 4k on disk block */ 474 t = ((t % stripes) * stripe_size_4kB) + t/stripes; 475 476 /* ... to 512 byte sector in activity log */ 477 t *= 8; 478 479 /* ... plus offset to the on disk position */ 480 return device->ldev->md.md_offset + device->ldev->md.al_offset + t; 481 } 482 483 static int 484 _al_write_transaction(struct drbd_device *device) 485 { 486 struct al_transaction_on_disk *buffer; 487 struct lc_element *e; 488 sector_t sector; 489 int i, mx; 490 unsigned extent_nr; 491 unsigned crc = 0; 492 int err = 0; 493 494 if (!get_ldev(device)) { 495 drbd_err(device, "disk is %s, cannot start al transaction\n", 496 drbd_disk_str(device->state.disk)); 497 return -EIO; 498 } 499 500 /* The bitmap write may have failed, causing a state change. */ 501 if (device->state.disk < D_INCONSISTENT) { 502 drbd_err(device, 503 "disk is %s, cannot write al transaction\n", 504 drbd_disk_str(device->state.disk)); 505 put_ldev(device); 506 return -EIO; 507 } 508 509 buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */ 510 if (!buffer) { 511 drbd_err(device, "disk failed while waiting for md_io buffer\n"); 512 put_ldev(device); 513 return -ENODEV; 514 } 515 516 memset(buffer, 0, sizeof(*buffer)); 517 buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); 518 buffer->tr_number = cpu_to_be32(device->al_tr_number); 519 520 i = 0; 521 522 /* Even though no one can start to change this list 523 * once we set the LC_LOCKED -- from drbd_al_begin_io(), 524 * lc_try_lock_for_transaction() --, someone may still 525 * be in the process of changing it. */ 526 spin_lock_irq(&device->al_lock); 527 list_for_each_entry(e, &device->act_log->to_be_changed, list) { 528 if (i == AL_UPDATES_PER_TRANSACTION) { 529 i++; 530 break; 531 } 532 buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); 533 buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); 534 if (e->lc_number != LC_FREE) 535 drbd_bm_mark_for_writeout(device, 536 al_extent_to_bm_page(e->lc_number)); 537 i++; 538 } 539 spin_unlock_irq(&device->al_lock); 540 BUG_ON(i > AL_UPDATES_PER_TRANSACTION); 541 542 buffer->n_updates = cpu_to_be16(i); 543 for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { 544 buffer->update_slot_nr[i] = cpu_to_be16(-1); 545 buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); 546 } 547 548 buffer->context_size = cpu_to_be16(device->act_log->nr_elements); 549 buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle); 550 551 mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, 552 device->act_log->nr_elements - device->al_tr_cycle); 553 for (i = 0; i < mx; i++) { 554 unsigned idx = device->al_tr_cycle + i; 555 extent_nr = lc_element_by_index(device->act_log, idx)->lc_number; 556 buffer->context[i] = cpu_to_be32(extent_nr); 557 } 558 for (; i < AL_CONTEXT_PER_TRANSACTION; i++) 559 buffer->context[i] = cpu_to_be32(LC_FREE); 560 561 device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; 562 if (device->al_tr_cycle >= device->act_log->nr_elements) 563 device->al_tr_cycle = 0; 564 565 sector = al_tr_number_to_on_disk_sector(device); 566 567 crc = crc32c(0, buffer, 4096); 568 buffer->crc32c = cpu_to_be32(crc); 569 570 if (drbd_bm_write_hinted(device)) 571 err = -EIO; 572 else { 573 bool write_al_updates; 574 rcu_read_lock(); 575 write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; 576 rcu_read_unlock(); 577 if (write_al_updates) { 578 if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) { 579 err = -EIO; 580 drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); 581 } else { 582 device->al_tr_number++; 583 device->al_writ_cnt++; 584 } 585 } 586 } 587 588 drbd_md_put_buffer(device); 589 put_ldev(device); 590 591 return err; 592 } 593 594 595 static int w_al_write_transaction(struct drbd_work *w, int unused) 596 { 597 struct update_al_work *aw = container_of(w, struct update_al_work, w); 598 struct drbd_device *device = aw->device; 599 int err; 600 601 err = _al_write_transaction(device); 602 aw->err = err; 603 complete(&aw->event); 604 605 return err != -EIO ? err : 0; 606 } 607 608 /* Calls from worker context (see w_restart_disk_io()) need to write the 609 transaction directly. Others came through generic_make_request(), 610 those need to delegate it to the worker. */ 611 static int al_write_transaction(struct drbd_device *device, bool delegate) 612 { 613 if (delegate) { 614 struct update_al_work al_work; 615 init_completion(&al_work.event); 616 al_work.w.cb = w_al_write_transaction; 617 al_work.device = device; 618 drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, 619 &al_work.w); 620 wait_for_completion(&al_work.event); 621 return al_work.err; 622 } else 623 return _al_write_transaction(device); 624 } 625 626 static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) 627 { 628 int rv; 629 630 spin_lock_irq(&device->al_lock); 631 rv = (al_ext->refcnt == 0); 632 if (likely(rv)) 633 lc_del(device->act_log, al_ext); 634 spin_unlock_irq(&device->al_lock); 635 636 return rv; 637 } 638 639 /** 640 * drbd_al_shrink() - Removes all active extents form the activity log 641 * @device: DRBD device. 642 * 643 * Removes all active extents form the activity log, waiting until 644 * the reference count of each entry dropped to 0 first, of course. 645 * 646 * You need to lock device->act_log with lc_try_lock() / lc_unlock() 647 */ 648 void drbd_al_shrink(struct drbd_device *device) 649 { 650 struct lc_element *al_ext; 651 int i; 652 653 D_ASSERT(device, test_bit(__LC_LOCKED, &device->act_log->flags)); 654 655 for (i = 0; i < device->act_log->nr_elements; i++) { 656 al_ext = lc_element_by_index(device->act_log, i); 657 if (al_ext->lc_number == LC_FREE) 658 continue; 659 wait_event(device->al_wait, _try_lc_del(device, al_ext)); 660 } 661 662 wake_up(&device->al_wait); 663 } 664 665 int drbd_initialize_al(struct drbd_device *device, void *buffer) 666 { 667 struct al_transaction_on_disk *al = buffer; 668 struct drbd_md *md = &device->ldev->md; 669 sector_t al_base = md->md_offset + md->al_offset; 670 int al_size_4k = md->al_stripes * md->al_stripe_size_4k; 671 int i; 672 673 memset(al, 0, 4096); 674 al->magic = cpu_to_be32(DRBD_AL_MAGIC); 675 al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED); 676 al->crc32c = cpu_to_be32(crc32c(0, al, 4096)); 677 678 for (i = 0; i < al_size_4k; i++) { 679 int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE); 680 if (err) 681 return err; 682 } 683 return 0; 684 } 685 686 static int w_update_odbm(struct drbd_work *w, int unused) 687 { 688 struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); 689 struct drbd_device *device = udw->device; 690 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; 691 692 if (!get_ldev(device)) { 693 if (__ratelimit(&drbd_ratelimit_state)) 694 drbd_warn(device, "Can not update on disk bitmap, local IO disabled.\n"); 695 kfree(udw); 696 return 0; 697 } 698 699 drbd_bm_write_page(device, rs_extent_to_bm_page(udw->enr)); 700 put_ldev(device); 701 702 kfree(udw); 703 704 if (drbd_bm_total_weight(device) <= device->rs_failed) { 705 switch (device->state.conn) { 706 case C_SYNC_SOURCE: case C_SYNC_TARGET: 707 case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T: 708 drbd_resync_finished(device); 709 default: 710 /* nothing to do */ 711 break; 712 } 713 } 714 drbd_bcast_event(device, &sib); 715 716 return 0; 717 } 718 719 720 /* ATTENTION. The AL's extents are 4MB each, while the extents in the 721 * resync LRU-cache are 16MB each. 722 * The caller of this function has to hold an get_ldev() reference. 723 * 724 * TODO will be obsoleted once we have a caching lru of the on disk bitmap 725 */ 726 static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector, 727 int count, int success) 728 { 729 struct lc_element *e; 730 struct update_odbm_work *udw; 731 732 unsigned int enr; 733 734 D_ASSERT(device, atomic_read(&device->local_cnt)); 735 736 /* I simply assume that a sector/size pair never crosses 737 * a 16 MB extent border. (Currently this is true...) */ 738 enr = BM_SECT_TO_EXT(sector); 739 740 e = lc_get(device->resync, enr); 741 if (e) { 742 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); 743 if (ext->lce.lc_number == enr) { 744 if (success) 745 ext->rs_left -= count; 746 else 747 ext->rs_failed += count; 748 if (ext->rs_left < ext->rs_failed) { 749 drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d " 750 "rs_failed=%d count=%d cstate=%s\n", 751 (unsigned long long)sector, 752 ext->lce.lc_number, ext->rs_left, 753 ext->rs_failed, count, 754 drbd_conn_str(device->state.conn)); 755 756 /* We don't expect to be able to clear more bits 757 * than have been set when we originally counted 758 * the set bits to cache that value in ext->rs_left. 759 * Whatever the reason (disconnect during resync, 760 * delayed local completion of an application write), 761 * try to fix it up by recounting here. */ 762 ext->rs_left = drbd_bm_e_weight(device, enr); 763 } 764 } else { 765 /* Normally this element should be in the cache, 766 * since drbd_rs_begin_io() pulled it already in. 767 * 768 * But maybe an application write finished, and we set 769 * something outside the resync lru_cache in sync. 770 */ 771 int rs_left = drbd_bm_e_weight(device, enr); 772 if (ext->flags != 0) { 773 drbd_warn(device, "changing resync lce: %d[%u;%02lx]" 774 " -> %d[%u;00]\n", 775 ext->lce.lc_number, ext->rs_left, 776 ext->flags, enr, rs_left); 777 ext->flags = 0; 778 } 779 if (ext->rs_failed) { 780 drbd_warn(device, "Kicking resync_lru element enr=%u " 781 "out with rs_failed=%d\n", 782 ext->lce.lc_number, ext->rs_failed); 783 } 784 ext->rs_left = rs_left; 785 ext->rs_failed = success ? 0 : count; 786 /* we don't keep a persistent log of the resync lru, 787 * we can commit any change right away. */ 788 lc_committed(device->resync); 789 } 790 lc_put(device->resync, &ext->lce); 791 /* no race, we are within the al_lock! */ 792 793 if (ext->rs_left == ext->rs_failed) { 794 ext->rs_failed = 0; 795 796 udw = kmalloc(sizeof(*udw), GFP_ATOMIC); 797 if (udw) { 798 udw->enr = ext->lce.lc_number; 799 udw->w.cb = w_update_odbm; 800 udw->device = device; 801 drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, 802 &udw->w); 803 } else { 804 drbd_warn(device, "Could not kmalloc an udw\n"); 805 } 806 } 807 } else { 808 drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", 809 device->resync_locked, 810 device->resync->nr_elements, 811 device->resync->flags); 812 } 813 } 814 815 void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) 816 { 817 unsigned long now = jiffies; 818 unsigned long last = device->rs_mark_time[device->rs_last_mark]; 819 int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS; 820 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { 821 if (device->rs_mark_left[device->rs_last_mark] != still_to_go && 822 device->state.conn != C_PAUSED_SYNC_T && 823 device->state.conn != C_PAUSED_SYNC_S) { 824 device->rs_mark_time[next] = now; 825 device->rs_mark_left[next] = still_to_go; 826 device->rs_last_mark = next; 827 } 828 } 829 } 830 831 /* clear the bit corresponding to the piece of storage in question: 832 * size byte of data starting from sector. Only clear a bits of the affected 833 * one ore more _aligned_ BM_BLOCK_SIZE blocks. 834 * 835 * called by worker on C_SYNC_TARGET and receiver on SyncSource. 836 * 837 */ 838 void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size, 839 const char *file, const unsigned int line) 840 { 841 /* Is called from worker and receiver context _only_ */ 842 unsigned long sbnr, ebnr, lbnr; 843 unsigned long count = 0; 844 sector_t esector, nr_sectors; 845 int wake_up = 0; 846 unsigned long flags; 847 848 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 849 drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", 850 (unsigned long long)sector, size); 851 return; 852 } 853 854 if (!get_ldev(device)) 855 return; /* no disk, no metadata, no bitmap to clear bits in */ 856 857 nr_sectors = drbd_get_capacity(device->this_bdev); 858 esector = sector + (size >> 9) - 1; 859 860 if (!expect(sector < nr_sectors)) 861 goto out; 862 if (!expect(esector < nr_sectors)) 863 esector = nr_sectors - 1; 864 865 lbnr = BM_SECT_TO_BIT(nr_sectors-1); 866 867 /* we clear it (in sync). 868 * round up start sector, round down end sector. we make sure we only 869 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */ 870 if (unlikely(esector < BM_SECT_PER_BIT-1)) 871 goto out; 872 if (unlikely(esector == (nr_sectors-1))) 873 ebnr = lbnr; 874 else 875 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 876 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 877 878 if (sbnr > ebnr) 879 goto out; 880 881 /* 882 * ok, (capacity & 7) != 0 sometimes, but who cares... 883 * we count rs_{total,left} in bits, not sectors. 884 */ 885 count = drbd_bm_clear_bits(device, sbnr, ebnr); 886 if (count) { 887 drbd_advance_rs_marks(device, drbd_bm_total_weight(device)); 888 spin_lock_irqsave(&device->al_lock, flags); 889 drbd_try_clear_on_disk_bm(device, sector, count, true); 890 spin_unlock_irqrestore(&device->al_lock, flags); 891 892 /* just wake_up unconditional now, various lc_chaged(), 893 * lc_put() in drbd_try_clear_on_disk_bm(). */ 894 wake_up = 1; 895 } 896 out: 897 put_ldev(device); 898 if (wake_up) 899 wake_up(&device->al_wait); 900 } 901 902 /* 903 * this is intended to set one request worth of data out of sync. 904 * affects at least 1 bit, 905 * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. 906 * 907 * called by tl_clear and drbd_send_dblock (==drbd_make_request). 908 * so this can be _any_ process. 909 */ 910 int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size, 911 const char *file, const unsigned int line) 912 { 913 unsigned long sbnr, ebnr, flags; 914 sector_t esector, nr_sectors; 915 unsigned int enr, count = 0; 916 struct lc_element *e; 917 918 /* this should be an empty REQ_FLUSH */ 919 if (size == 0) 920 return 0; 921 922 if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 923 drbd_err(device, "sector: %llus, size: %d\n", 924 (unsigned long long)sector, size); 925 return 0; 926 } 927 928 if (!get_ldev(device)) 929 return 0; /* no disk, no metadata, no bitmap to set bits in */ 930 931 nr_sectors = drbd_get_capacity(device->this_bdev); 932 esector = sector + (size >> 9) - 1; 933 934 if (!expect(sector < nr_sectors)) 935 goto out; 936 if (!expect(esector < nr_sectors)) 937 esector = nr_sectors - 1; 938 939 /* we set it out of sync, 940 * we do not need to round anything here */ 941 sbnr = BM_SECT_TO_BIT(sector); 942 ebnr = BM_SECT_TO_BIT(esector); 943 944 /* ok, (capacity & 7) != 0 sometimes, but who cares... 945 * we count rs_{total,left} in bits, not sectors. */ 946 spin_lock_irqsave(&device->al_lock, flags); 947 count = drbd_bm_set_bits(device, sbnr, ebnr); 948 949 enr = BM_SECT_TO_EXT(sector); 950 e = lc_find(device->resync, enr); 951 if (e) 952 lc_entry(e, struct bm_extent, lce)->rs_left += count; 953 spin_unlock_irqrestore(&device->al_lock, flags); 954 955 out: 956 put_ldev(device); 957 958 return count; 959 } 960 961 static 962 struct bm_extent *_bme_get(struct drbd_device *device, unsigned int enr) 963 { 964 struct lc_element *e; 965 struct bm_extent *bm_ext; 966 int wakeup = 0; 967 unsigned long rs_flags; 968 969 spin_lock_irq(&device->al_lock); 970 if (device->resync_locked > device->resync->nr_elements/2) { 971 spin_unlock_irq(&device->al_lock); 972 return NULL; 973 } 974 e = lc_get(device->resync, enr); 975 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 976 if (bm_ext) { 977 if (bm_ext->lce.lc_number != enr) { 978 bm_ext->rs_left = drbd_bm_e_weight(device, enr); 979 bm_ext->rs_failed = 0; 980 lc_committed(device->resync); 981 wakeup = 1; 982 } 983 if (bm_ext->lce.refcnt == 1) 984 device->resync_locked++; 985 set_bit(BME_NO_WRITES, &bm_ext->flags); 986 } 987 rs_flags = device->resync->flags; 988 spin_unlock_irq(&device->al_lock); 989 if (wakeup) 990 wake_up(&device->al_wait); 991 992 if (!bm_ext) { 993 if (rs_flags & LC_STARVING) 994 drbd_warn(device, "Have to wait for element" 995 " (resync LRU too small?)\n"); 996 BUG_ON(rs_flags & LC_LOCKED); 997 } 998 999 return bm_ext; 1000 } 1001 1002 static int _is_in_al(struct drbd_device *device, unsigned int enr) 1003 { 1004 int rv; 1005 1006 spin_lock_irq(&device->al_lock); 1007 rv = lc_is_used(device->act_log, enr); 1008 spin_unlock_irq(&device->al_lock); 1009 1010 return rv; 1011 } 1012 1013 /** 1014 * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED 1015 * @device: DRBD device. 1016 * @sector: The sector number. 1017 * 1018 * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted. 1019 */ 1020 int drbd_rs_begin_io(struct drbd_device *device, sector_t sector) 1021 { 1022 unsigned int enr = BM_SECT_TO_EXT(sector); 1023 struct bm_extent *bm_ext; 1024 int i, sig; 1025 bool sa; 1026 1027 retry: 1028 sig = wait_event_interruptible(device->al_wait, 1029 (bm_ext = _bme_get(device, enr))); 1030 if (sig) 1031 return -EINTR; 1032 1033 if (test_bit(BME_LOCKED, &bm_ext->flags)) 1034 return 0; 1035 1036 /* step aside only while we are above c-min-rate; unless disabled. */ 1037 sa = drbd_rs_c_min_rate_throttle(device); 1038 1039 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1040 sig = wait_event_interruptible(device->al_wait, 1041 !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || 1042 (sa && test_bit(BME_PRIORITY, &bm_ext->flags))); 1043 1044 if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) { 1045 spin_lock_irq(&device->al_lock); 1046 if (lc_put(device->resync, &bm_ext->lce) == 0) { 1047 bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ 1048 device->resync_locked--; 1049 wake_up(&device->al_wait); 1050 } 1051 spin_unlock_irq(&device->al_lock); 1052 if (sig) 1053 return -EINTR; 1054 if (schedule_timeout_interruptible(HZ/10)) 1055 return -EINTR; 1056 goto retry; 1057 } 1058 } 1059 set_bit(BME_LOCKED, &bm_ext->flags); 1060 return 0; 1061 } 1062 1063 /** 1064 * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep 1065 * @device: DRBD device. 1066 * @sector: The sector number. 1067 * 1068 * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then 1069 * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN 1070 * if there is still application IO going on in this area. 1071 */ 1072 int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) 1073 { 1074 unsigned int enr = BM_SECT_TO_EXT(sector); 1075 const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; 1076 struct lc_element *e; 1077 struct bm_extent *bm_ext; 1078 int i; 1079 1080 spin_lock_irq(&device->al_lock); 1081 if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { 1082 /* in case you have very heavy scattered io, it may 1083 * stall the syncer undefined if we give up the ref count 1084 * when we try again and requeue. 1085 * 1086 * if we don't give up the refcount, but the next time 1087 * we are scheduled this extent has been "synced" by new 1088 * application writes, we'd miss the lc_put on the 1089 * extent we keep the refcount on. 1090 * so we remembered which extent we had to try again, and 1091 * if the next requested one is something else, we do 1092 * the lc_put here... 1093 * we also have to wake_up 1094 */ 1095 e = lc_find(device->resync, device->resync_wenr); 1096 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1097 if (bm_ext) { 1098 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1099 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 1100 clear_bit(BME_NO_WRITES, &bm_ext->flags); 1101 device->resync_wenr = LC_FREE; 1102 if (lc_put(device->resync, &bm_ext->lce) == 0) 1103 device->resync_locked--; 1104 wake_up(&device->al_wait); 1105 } else { 1106 drbd_alert(device, "LOGIC BUG\n"); 1107 } 1108 } 1109 /* TRY. */ 1110 e = lc_try_get(device->resync, enr); 1111 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1112 if (bm_ext) { 1113 if (test_bit(BME_LOCKED, &bm_ext->flags)) 1114 goto proceed; 1115 if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) { 1116 device->resync_locked++; 1117 } else { 1118 /* we did set the BME_NO_WRITES, 1119 * but then could not set BME_LOCKED, 1120 * so we tried again. 1121 * drop the extra reference. */ 1122 bm_ext->lce.refcnt--; 1123 D_ASSERT(device, bm_ext->lce.refcnt > 0); 1124 } 1125 goto check_al; 1126 } else { 1127 /* do we rather want to try later? */ 1128 if (device->resync_locked > device->resync->nr_elements-3) 1129 goto try_again; 1130 /* Do or do not. There is no try. -- Yoda */ 1131 e = lc_get(device->resync, enr); 1132 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1133 if (!bm_ext) { 1134 const unsigned long rs_flags = device->resync->flags; 1135 if (rs_flags & LC_STARVING) 1136 drbd_warn(device, "Have to wait for element" 1137 " (resync LRU too small?)\n"); 1138 BUG_ON(rs_flags & LC_LOCKED); 1139 goto try_again; 1140 } 1141 if (bm_ext->lce.lc_number != enr) { 1142 bm_ext->rs_left = drbd_bm_e_weight(device, enr); 1143 bm_ext->rs_failed = 0; 1144 lc_committed(device->resync); 1145 wake_up(&device->al_wait); 1146 D_ASSERT(device, test_bit(BME_LOCKED, &bm_ext->flags) == 0); 1147 } 1148 set_bit(BME_NO_WRITES, &bm_ext->flags); 1149 D_ASSERT(device, bm_ext->lce.refcnt == 1); 1150 device->resync_locked++; 1151 goto check_al; 1152 } 1153 check_al: 1154 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1155 if (lc_is_used(device->act_log, al_enr+i)) 1156 goto try_again; 1157 } 1158 set_bit(BME_LOCKED, &bm_ext->flags); 1159 proceed: 1160 device->resync_wenr = LC_FREE; 1161 spin_unlock_irq(&device->al_lock); 1162 return 0; 1163 1164 try_again: 1165 if (bm_ext) 1166 device->resync_wenr = enr; 1167 spin_unlock_irq(&device->al_lock); 1168 return -EAGAIN; 1169 } 1170 1171 void drbd_rs_complete_io(struct drbd_device *device, sector_t sector) 1172 { 1173 unsigned int enr = BM_SECT_TO_EXT(sector); 1174 struct lc_element *e; 1175 struct bm_extent *bm_ext; 1176 unsigned long flags; 1177 1178 spin_lock_irqsave(&device->al_lock, flags); 1179 e = lc_find(device->resync, enr); 1180 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1181 if (!bm_ext) { 1182 spin_unlock_irqrestore(&device->al_lock, flags); 1183 if (__ratelimit(&drbd_ratelimit_state)) 1184 drbd_err(device, "drbd_rs_complete_io() called, but extent not found\n"); 1185 return; 1186 } 1187 1188 if (bm_ext->lce.refcnt == 0) { 1189 spin_unlock_irqrestore(&device->al_lock, flags); 1190 drbd_err(device, "drbd_rs_complete_io(,%llu [=%u]) called, " 1191 "but refcnt is 0!?\n", 1192 (unsigned long long)sector, enr); 1193 return; 1194 } 1195 1196 if (lc_put(device->resync, &bm_ext->lce) == 0) { 1197 bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */ 1198 device->resync_locked--; 1199 wake_up(&device->al_wait); 1200 } 1201 1202 spin_unlock_irqrestore(&device->al_lock, flags); 1203 } 1204 1205 /** 1206 * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED) 1207 * @device: DRBD device. 1208 */ 1209 void drbd_rs_cancel_all(struct drbd_device *device) 1210 { 1211 spin_lock_irq(&device->al_lock); 1212 1213 if (get_ldev_if_state(device, D_FAILED)) { /* Makes sure ->resync is there. */ 1214 lc_reset(device->resync); 1215 put_ldev(device); 1216 } 1217 device->resync_locked = 0; 1218 device->resync_wenr = LC_FREE; 1219 spin_unlock_irq(&device->al_lock); 1220 wake_up(&device->al_wait); 1221 } 1222 1223 /** 1224 * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU 1225 * @device: DRBD device. 1226 * 1227 * Returns 0 upon success, -EAGAIN if at least one reference count was 1228 * not zero. 1229 */ 1230 int drbd_rs_del_all(struct drbd_device *device) 1231 { 1232 struct lc_element *e; 1233 struct bm_extent *bm_ext; 1234 int i; 1235 1236 spin_lock_irq(&device->al_lock); 1237 1238 if (get_ldev_if_state(device, D_FAILED)) { 1239 /* ok, ->resync is there. */ 1240 for (i = 0; i < device->resync->nr_elements; i++) { 1241 e = lc_element_by_index(device->resync, i); 1242 bm_ext = lc_entry(e, struct bm_extent, lce); 1243 if (bm_ext->lce.lc_number == LC_FREE) 1244 continue; 1245 if (bm_ext->lce.lc_number == device->resync_wenr) { 1246 drbd_info(device, "dropping %u in drbd_rs_del_all, apparently" 1247 " got 'synced' by application io\n", 1248 device->resync_wenr); 1249 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1250 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 1251 clear_bit(BME_NO_WRITES, &bm_ext->flags); 1252 device->resync_wenr = LC_FREE; 1253 lc_put(device->resync, &bm_ext->lce); 1254 } 1255 if (bm_ext->lce.refcnt != 0) { 1256 drbd_info(device, "Retrying drbd_rs_del_all() later. " 1257 "refcnt=%d\n", bm_ext->lce.refcnt); 1258 put_ldev(device); 1259 spin_unlock_irq(&device->al_lock); 1260 return -EAGAIN; 1261 } 1262 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1263 D_ASSERT(device, !test_bit(BME_NO_WRITES, &bm_ext->flags)); 1264 lc_del(device->resync, &bm_ext->lce); 1265 } 1266 D_ASSERT(device, device->resync->used == 0); 1267 put_ldev(device); 1268 } 1269 spin_unlock_irq(&device->al_lock); 1270 wake_up(&device->al_wait); 1271 1272 return 0; 1273 } 1274 1275 /** 1276 * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks 1277 * @device: DRBD device. 1278 * @sector: The sector number. 1279 * @size: Size of failed IO operation, in byte. 1280 */ 1281 void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size) 1282 { 1283 /* Is called from worker and receiver context _only_ */ 1284 unsigned long sbnr, ebnr, lbnr; 1285 unsigned long count; 1286 sector_t esector, nr_sectors; 1287 int wake_up = 0; 1288 1289 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 1290 drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", 1291 (unsigned long long)sector, size); 1292 return; 1293 } 1294 nr_sectors = drbd_get_capacity(device->this_bdev); 1295 esector = sector + (size >> 9) - 1; 1296 1297 if (!expect(sector < nr_sectors)) 1298 return; 1299 if (!expect(esector < nr_sectors)) 1300 esector = nr_sectors - 1; 1301 1302 lbnr = BM_SECT_TO_BIT(nr_sectors-1); 1303 1304 /* 1305 * round up start sector, round down end sector. we make sure we only 1306 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */ 1307 if (unlikely(esector < BM_SECT_PER_BIT-1)) 1308 return; 1309 if (unlikely(esector == (nr_sectors-1))) 1310 ebnr = lbnr; 1311 else 1312 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 1313 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 1314 1315 if (sbnr > ebnr) 1316 return; 1317 1318 /* 1319 * ok, (capacity & 7) != 0 sometimes, but who cares... 1320 * we count rs_{total,left} in bits, not sectors. 1321 */ 1322 spin_lock_irq(&device->al_lock); 1323 count = drbd_bm_count_bits(device, sbnr, ebnr); 1324 if (count) { 1325 device->rs_failed += count; 1326 1327 if (get_ldev(device)) { 1328 drbd_try_clear_on_disk_bm(device, sector, count, false); 1329 put_ldev(device); 1330 } 1331 1332 /* just wake_up unconditional now, various lc_chaged(), 1333 * lc_put() in drbd_try_clear_on_disk_bm(). */ 1334 wake_up = 1; 1335 } 1336 spin_unlock_irq(&device->al_lock); 1337 if (wake_up) 1338 wake_up(&device->al_wait); 1339 } 1340