1 /* 2 drbd_actlog.c 3 4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5 6 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. 7 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9 10 drbd is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2, or (at your option) 13 any later version. 14 15 drbd is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with drbd; see the file COPYING. If not, write to 22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23 24 */ 25 26 #include <linux/slab.h> 27 #include <linux/crc32c.h> 28 #include <linux/drbd.h> 29 #include <linux/drbd_limits.h> 30 #include <linux/dynamic_debug.h> 31 #include "drbd_int.h" 32 33 34 enum al_transaction_types { 35 AL_TR_UPDATE = 0, 36 AL_TR_INITIALIZED = 0xffff 37 }; 38 /* all fields on disc in big endian */ 39 struct __packed al_transaction_on_disk { 40 /* don't we all like magic */ 41 __be32 magic; 42 43 /* to identify the most recent transaction block 44 * in the on disk ring buffer */ 45 __be32 tr_number; 46 47 /* checksum on the full 4k block, with this field set to 0. */ 48 __be32 crc32c; 49 50 /* type of transaction, special transaction types like: 51 * purge-all, set-all-idle, set-all-active, ... to-be-defined 52 * see also enum al_transaction_types */ 53 __be16 transaction_type; 54 55 /* we currently allow only a few thousand extents, 56 * so 16bit will be enough for the slot number. */ 57 58 /* how many updates in this transaction */ 59 __be16 n_updates; 60 61 /* maximum slot number, "al-extents" in drbd.conf speak. 62 * Having this in each transaction should make reconfiguration 63 * of that parameter easier. */ 64 __be16 context_size; 65 66 /* slot number the context starts with */ 67 __be16 context_start_slot_nr; 68 69 /* Some reserved bytes. Expected usage is a 64bit counter of 70 * sectors-written since device creation, and other data generation tag 71 * supporting usage */ 72 __be32 __reserved[4]; 73 74 /* --- 36 byte used --- */ 75 76 /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes 77 * in one transaction, then use the remaining byte in the 4k block for 78 * context information. "Flexible" number of updates per transaction 79 * does not help, as we have to account for the case when all update 80 * slots are used anyways, so it would only complicate code without 81 * additional benefit. 82 */ 83 __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION]; 84 85 /* but the extent number is 32bit, which at an extent size of 4 MiB 86 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */ 87 __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION]; 88 89 /* --- 420 bytes used (36 + 64*6) --- */ 90 91 /* 4096 - 420 = 3676 = 919 * 4 */ 92 __be32 context[AL_CONTEXT_PER_TRANSACTION]; 93 }; 94 95 struct update_al_work { 96 struct drbd_work w; 97 struct drbd_device *device; 98 struct completion event; 99 int err; 100 }; 101 102 103 void *drbd_md_get_buffer(struct drbd_device *device) 104 { 105 int r; 106 107 wait_event(device->misc_wait, 108 (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 || 109 device->state.disk <= D_FAILED); 110 111 return r ? NULL : page_address(device->md_io_page); 112 } 113 114 void drbd_md_put_buffer(struct drbd_device *device) 115 { 116 if (atomic_dec_and_test(&device->md_io_in_use)) 117 wake_up(&device->misc_wait); 118 } 119 120 void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev, 121 unsigned int *done) 122 { 123 long dt; 124 125 rcu_read_lock(); 126 dt = rcu_dereference(bdev->disk_conf)->disk_timeout; 127 rcu_read_unlock(); 128 dt = dt * HZ / 10; 129 if (dt == 0) 130 dt = MAX_SCHEDULE_TIMEOUT; 131 132 dt = wait_event_timeout(device->misc_wait, 133 *done || test_bit(FORCE_DETACH, &device->flags), dt); 134 if (dt == 0) { 135 drbd_err(device, "meta-data IO operation timed out\n"); 136 drbd_chk_io_error(device, 1, DRBD_FORCE_DETACH); 137 } 138 } 139 140 static int _drbd_md_sync_page_io(struct drbd_device *device, 141 struct drbd_backing_dev *bdev, 142 struct page *page, sector_t sector, 143 int rw, int size) 144 { 145 struct bio *bio; 146 int err; 147 148 device->md_io.done = 0; 149 device->md_io.error = -ENODEV; 150 151 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags)) 152 rw |= REQ_FUA | REQ_FLUSH; 153 rw |= REQ_SYNC; 154 155 bio = bio_alloc_drbd(GFP_NOIO); 156 bio->bi_bdev = bdev->md_bdev; 157 bio->bi_iter.bi_sector = sector; 158 err = -EIO; 159 if (bio_add_page(bio, page, size, 0) != size) 160 goto out; 161 bio->bi_private = &device->md_io; 162 bio->bi_end_io = drbd_md_io_complete; 163 bio->bi_rw = rw; 164 165 if (!(rw & WRITE) && device->state.disk == D_DISKLESS && device->ldev == NULL) 166 /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */ 167 ; 168 else if (!get_ldev_if_state(device, D_ATTACHING)) { 169 /* Corresponding put_ldev in drbd_md_io_complete() */ 170 drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); 171 err = -ENODEV; 172 goto out; 173 } 174 175 bio_get(bio); /* one bio_put() is in the completion handler */ 176 atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ 177 if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) 178 bio_endio(bio, -EIO); 179 else 180 submit_bio(rw, bio); 181 wait_until_done_or_force_detached(device, bdev, &device->md_io.done); 182 if (bio_flagged(bio, BIO_UPTODATE)) 183 err = device->md_io.error; 184 185 out: 186 bio_put(bio); 187 return err; 188 } 189 190 int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, 191 sector_t sector, int rw) 192 { 193 int err; 194 struct page *iop = device->md_io_page; 195 196 D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1); 197 198 BUG_ON(!bdev->md_bdev); 199 200 dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", 201 current->comm, current->pid, __func__, 202 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", 203 (void*)_RET_IP_ ); 204 205 if (sector < drbd_md_first_sector(bdev) || 206 sector + 7 > drbd_md_last_sector(bdev)) 207 drbd_alert(device, "%s [%d]:%s(,%llus,%s) out of range md access!\n", 208 current->comm, current->pid, __func__, 209 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); 210 211 /* we do all our meta data IO in aligned 4k blocks. */ 212 err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096); 213 if (err) { 214 drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", 215 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); 216 } 217 return err; 218 } 219 220 static struct bm_extent *find_active_resync_extent(struct drbd_device *device, unsigned int enr) 221 { 222 struct lc_element *tmp; 223 tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); 224 if (unlikely(tmp != NULL)) { 225 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 226 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) 227 return bm_ext; 228 } 229 return NULL; 230 } 231 232 static struct lc_element *_al_get(struct drbd_device *device, unsigned int enr, bool nonblock) 233 { 234 struct lc_element *al_ext; 235 struct bm_extent *bm_ext; 236 int wake; 237 238 spin_lock_irq(&device->al_lock); 239 bm_ext = find_active_resync_extent(device, enr); 240 if (bm_ext) { 241 wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags); 242 spin_unlock_irq(&device->al_lock); 243 if (wake) 244 wake_up(&device->al_wait); 245 return NULL; 246 } 247 if (nonblock) 248 al_ext = lc_try_get(device->act_log, enr); 249 else 250 al_ext = lc_get(device->act_log, enr); 251 spin_unlock_irq(&device->al_lock); 252 return al_ext; 253 } 254 255 bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i) 256 { 257 /* for bios crossing activity log extent boundaries, 258 * we may need to activate two extents in one go */ 259 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 260 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 261 262 D_ASSERT(device, (unsigned)(last - first) <= 1); 263 D_ASSERT(device, atomic_read(&device->local_cnt) > 0); 264 265 /* FIXME figure out a fast path for bios crossing AL extent boundaries */ 266 if (first != last) 267 return false; 268 269 return _al_get(device, first, true); 270 } 271 272 bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i) 273 { 274 /* for bios crossing activity log extent boundaries, 275 * we may need to activate two extents in one go */ 276 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 277 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 278 unsigned enr; 279 bool need_transaction = false; 280 281 D_ASSERT(device, first <= last); 282 D_ASSERT(device, atomic_read(&device->local_cnt) > 0); 283 284 for (enr = first; enr <= last; enr++) { 285 struct lc_element *al_ext; 286 wait_event(device->al_wait, 287 (al_ext = _al_get(device, enr, false)) != NULL); 288 if (al_ext->lc_number != enr) 289 need_transaction = true; 290 } 291 return need_transaction; 292 } 293 294 static int al_write_transaction(struct drbd_device *device, bool delegate); 295 296 /* When called through generic_make_request(), we must delegate 297 * activity log I/O to the worker thread: a further request 298 * submitted via generic_make_request() within the same task 299 * would be queued on current->bio_list, and would only start 300 * after this function returns (see generic_make_request()). 301 * 302 * However, if we *are* the worker, we must not delegate to ourselves. 303 */ 304 305 /* 306 * @delegate: delegate activity log I/O to the worker thread 307 */ 308 void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate) 309 { 310 bool locked = false; 311 312 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); 313 314 /* Serialize multiple transactions. 315 * This uses test_and_set_bit, memory barrier is implicit. 316 */ 317 wait_event(device->al_wait, 318 device->act_log->pending_changes == 0 || 319 (locked = lc_try_lock_for_transaction(device->act_log))); 320 321 if (locked) { 322 /* Double check: it may have been committed by someone else, 323 * while we have been waiting for the lock. */ 324 if (device->act_log->pending_changes) { 325 bool write_al_updates; 326 327 rcu_read_lock(); 328 write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; 329 rcu_read_unlock(); 330 331 if (write_al_updates) 332 al_write_transaction(device, delegate); 333 spin_lock_irq(&device->al_lock); 334 /* FIXME 335 if (err) 336 we need an "lc_cancel" here; 337 */ 338 lc_committed(device->act_log); 339 spin_unlock_irq(&device->al_lock); 340 } 341 lc_unlock(device->act_log); 342 wake_up(&device->al_wait); 343 } 344 } 345 346 /* 347 * @delegate: delegate activity log I/O to the worker thread 348 */ 349 void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate) 350 { 351 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); 352 353 if (drbd_al_begin_io_prepare(device, i)) 354 drbd_al_begin_io_commit(device, delegate); 355 } 356 357 int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) 358 { 359 struct lru_cache *al = device->act_log; 360 /* for bios crossing activity log extent boundaries, 361 * we may need to activate two extents in one go */ 362 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 363 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 364 unsigned nr_al_extents; 365 unsigned available_update_slots; 366 unsigned enr; 367 368 D_ASSERT(device, first <= last); 369 370 nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */ 371 available_update_slots = min(al->nr_elements - al->used, 372 al->max_pending_changes - al->pending_changes); 373 374 /* We want all necessary updates for a given request within the same transaction 375 * We could first check how many updates are *actually* needed, 376 * and use that instead of the worst-case nr_al_extents */ 377 if (available_update_slots < nr_al_extents) 378 return -EWOULDBLOCK; 379 380 /* Is resync active in this area? */ 381 for (enr = first; enr <= last; enr++) { 382 struct lc_element *tmp; 383 tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT); 384 if (unlikely(tmp != NULL)) { 385 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 386 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { 387 if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags)) 388 return -EBUSY; 389 return -EWOULDBLOCK; 390 } 391 } 392 } 393 394 /* Checkout the refcounts. 395 * Given that we checked for available elements and update slots above, 396 * this has to be successful. */ 397 for (enr = first; enr <= last; enr++) { 398 struct lc_element *al_ext; 399 al_ext = lc_get_cumulative(device->act_log, enr); 400 if (!al_ext) 401 drbd_info(device, "LOGIC BUG for enr=%u\n", enr); 402 } 403 return 0; 404 } 405 406 void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i) 407 { 408 /* for bios crossing activity log extent boundaries, 409 * we may need to activate two extents in one go */ 410 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); 411 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); 412 unsigned enr; 413 struct lc_element *extent; 414 unsigned long flags; 415 416 D_ASSERT(device, first <= last); 417 spin_lock_irqsave(&device->al_lock, flags); 418 419 for (enr = first; enr <= last; enr++) { 420 extent = lc_find(device->act_log, enr); 421 if (!extent) { 422 drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr); 423 continue; 424 } 425 lc_put(device->act_log, extent); 426 } 427 spin_unlock_irqrestore(&device->al_lock, flags); 428 wake_up(&device->al_wait); 429 } 430 431 #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) 432 /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT 433 * are still coupled, or assume too much about their relation. 434 * Code below will not work if this is violated. 435 * Will be cleaned up with some followup patch. 436 */ 437 # error FIXME 438 #endif 439 440 static unsigned int al_extent_to_bm_page(unsigned int al_enr) 441 { 442 return al_enr >> 443 /* bit to page */ 444 ((PAGE_SHIFT + 3) - 445 /* al extent number to bit */ 446 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); 447 } 448 449 static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) 450 { 451 const unsigned int stripes = device->ldev->md.al_stripes; 452 const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k; 453 454 /* transaction number, modulo on-disk ring buffer wrap around */ 455 unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k); 456 457 /* ... to aligned 4k on disk block */ 458 t = ((t % stripes) * stripe_size_4kB) + t/stripes; 459 460 /* ... to 512 byte sector in activity log */ 461 t *= 8; 462 463 /* ... plus offset to the on disk position */ 464 return device->ldev->md.md_offset + device->ldev->md.al_offset + t; 465 } 466 467 static int 468 _al_write_transaction(struct drbd_device *device) 469 { 470 struct al_transaction_on_disk *buffer; 471 struct lc_element *e; 472 sector_t sector; 473 int i, mx; 474 unsigned extent_nr; 475 unsigned crc = 0; 476 int err = 0; 477 478 if (!get_ldev(device)) { 479 drbd_err(device, "disk is %s, cannot start al transaction\n", 480 drbd_disk_str(device->state.disk)); 481 return -EIO; 482 } 483 484 /* The bitmap write may have failed, causing a state change. */ 485 if (device->state.disk < D_INCONSISTENT) { 486 drbd_err(device, 487 "disk is %s, cannot write al transaction\n", 488 drbd_disk_str(device->state.disk)); 489 put_ldev(device); 490 return -EIO; 491 } 492 493 buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */ 494 if (!buffer) { 495 drbd_err(device, "disk failed while waiting for md_io buffer\n"); 496 put_ldev(device); 497 return -ENODEV; 498 } 499 500 memset(buffer, 0, sizeof(*buffer)); 501 buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); 502 buffer->tr_number = cpu_to_be32(device->al_tr_number); 503 504 i = 0; 505 506 /* Even though no one can start to change this list 507 * once we set the LC_LOCKED -- from drbd_al_begin_io(), 508 * lc_try_lock_for_transaction() --, someone may still 509 * be in the process of changing it. */ 510 spin_lock_irq(&device->al_lock); 511 list_for_each_entry(e, &device->act_log->to_be_changed, list) { 512 if (i == AL_UPDATES_PER_TRANSACTION) { 513 i++; 514 break; 515 } 516 buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); 517 buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); 518 if (e->lc_number != LC_FREE) 519 drbd_bm_mark_for_writeout(device, 520 al_extent_to_bm_page(e->lc_number)); 521 i++; 522 } 523 spin_unlock_irq(&device->al_lock); 524 BUG_ON(i > AL_UPDATES_PER_TRANSACTION); 525 526 buffer->n_updates = cpu_to_be16(i); 527 for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { 528 buffer->update_slot_nr[i] = cpu_to_be16(-1); 529 buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); 530 } 531 532 buffer->context_size = cpu_to_be16(device->act_log->nr_elements); 533 buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle); 534 535 mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, 536 device->act_log->nr_elements - device->al_tr_cycle); 537 for (i = 0; i < mx; i++) { 538 unsigned idx = device->al_tr_cycle + i; 539 extent_nr = lc_element_by_index(device->act_log, idx)->lc_number; 540 buffer->context[i] = cpu_to_be32(extent_nr); 541 } 542 for (; i < AL_CONTEXT_PER_TRANSACTION; i++) 543 buffer->context[i] = cpu_to_be32(LC_FREE); 544 545 device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; 546 if (device->al_tr_cycle >= device->act_log->nr_elements) 547 device->al_tr_cycle = 0; 548 549 sector = al_tr_number_to_on_disk_sector(device); 550 551 crc = crc32c(0, buffer, 4096); 552 buffer->crc32c = cpu_to_be32(crc); 553 554 if (drbd_bm_write_hinted(device)) 555 err = -EIO; 556 else { 557 bool write_al_updates; 558 rcu_read_lock(); 559 write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; 560 rcu_read_unlock(); 561 if (write_al_updates) { 562 if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) { 563 err = -EIO; 564 drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); 565 } else { 566 device->al_tr_number++; 567 device->al_writ_cnt++; 568 } 569 } 570 } 571 572 drbd_md_put_buffer(device); 573 put_ldev(device); 574 575 return err; 576 } 577 578 579 static int w_al_write_transaction(struct drbd_work *w, int unused) 580 { 581 struct update_al_work *aw = container_of(w, struct update_al_work, w); 582 struct drbd_device *device = aw->device; 583 int err; 584 585 err = _al_write_transaction(device); 586 aw->err = err; 587 complete(&aw->event); 588 589 return err != -EIO ? err : 0; 590 } 591 592 /* Calls from worker context (see w_restart_disk_io()) need to write the 593 transaction directly. Others came through generic_make_request(), 594 those need to delegate it to the worker. */ 595 static int al_write_transaction(struct drbd_device *device, bool delegate) 596 { 597 if (delegate) { 598 struct update_al_work al_work; 599 init_completion(&al_work.event); 600 al_work.w.cb = w_al_write_transaction; 601 al_work.device = device; 602 drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, 603 &al_work.w); 604 wait_for_completion(&al_work.event); 605 return al_work.err; 606 } else 607 return _al_write_transaction(device); 608 } 609 610 static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) 611 { 612 int rv; 613 614 spin_lock_irq(&device->al_lock); 615 rv = (al_ext->refcnt == 0); 616 if (likely(rv)) 617 lc_del(device->act_log, al_ext); 618 spin_unlock_irq(&device->al_lock); 619 620 return rv; 621 } 622 623 /** 624 * drbd_al_shrink() - Removes all active extents form the activity log 625 * @device: DRBD device. 626 * 627 * Removes all active extents form the activity log, waiting until 628 * the reference count of each entry dropped to 0 first, of course. 629 * 630 * You need to lock device->act_log with lc_try_lock() / lc_unlock() 631 */ 632 void drbd_al_shrink(struct drbd_device *device) 633 { 634 struct lc_element *al_ext; 635 int i; 636 637 D_ASSERT(device, test_bit(__LC_LOCKED, &device->act_log->flags)); 638 639 for (i = 0; i < device->act_log->nr_elements; i++) { 640 al_ext = lc_element_by_index(device->act_log, i); 641 if (al_ext->lc_number == LC_FREE) 642 continue; 643 wait_event(device->al_wait, _try_lc_del(device, al_ext)); 644 } 645 646 wake_up(&device->al_wait); 647 } 648 649 int drbd_initialize_al(struct drbd_device *device, void *buffer) 650 { 651 struct al_transaction_on_disk *al = buffer; 652 struct drbd_md *md = &device->ldev->md; 653 sector_t al_base = md->md_offset + md->al_offset; 654 int al_size_4k = md->al_stripes * md->al_stripe_size_4k; 655 int i; 656 657 memset(al, 0, 4096); 658 al->magic = cpu_to_be32(DRBD_AL_MAGIC); 659 al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED); 660 al->crc32c = cpu_to_be32(crc32c(0, al, 4096)); 661 662 for (i = 0; i < al_size_4k; i++) { 663 int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE); 664 if (err) 665 return err; 666 } 667 return 0; 668 } 669 670 static const char *drbd_change_sync_fname[] = { 671 [RECORD_RS_FAILED] = "drbd_rs_failed_io", 672 [SET_IN_SYNC] = "drbd_set_in_sync", 673 [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync" 674 }; 675 676 /* ATTENTION. The AL's extents are 4MB each, while the extents in the 677 * resync LRU-cache are 16MB each. 678 * The caller of this function has to hold an get_ldev() reference. 679 * 680 * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success), 681 * potentially pulling in (and recounting the corresponding bits) 682 * this resync extent into the resync extent lru cache. 683 * 684 * Returns whether all bits have been cleared for this resync extent, 685 * precisely: (rs_left <= rs_failed) 686 * 687 * TODO will be obsoleted once we have a caching lru of the on disk bitmap 688 */ 689 static bool update_rs_extent(struct drbd_device *device, 690 unsigned int enr, int count, 691 enum update_sync_bits_mode mode) 692 { 693 struct lc_element *e; 694 695 D_ASSERT(device, atomic_read(&device->local_cnt)); 696 697 /* When setting out-of-sync bits, 698 * we don't need it cached (lc_find). 699 * But if it is present in the cache, 700 * we should update the cached bit count. 701 * Otherwise, that extent should be in the resync extent lru cache 702 * already -- or we want to pull it in if necessary -- (lc_get), 703 * then update and check rs_left and rs_failed. */ 704 if (mode == SET_OUT_OF_SYNC) 705 e = lc_find(device->resync, enr); 706 else 707 e = lc_get(device->resync, enr); 708 if (e) { 709 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); 710 if (ext->lce.lc_number == enr) { 711 if (mode == SET_IN_SYNC) 712 ext->rs_left -= count; 713 else if (mode == SET_OUT_OF_SYNC) 714 ext->rs_left += count; 715 else 716 ext->rs_failed += count; 717 if (ext->rs_left < ext->rs_failed) { 718 drbd_warn(device, "BAD! enr=%u rs_left=%d " 719 "rs_failed=%d count=%d cstate=%s\n", 720 ext->lce.lc_number, ext->rs_left, 721 ext->rs_failed, count, 722 drbd_conn_str(device->state.conn)); 723 724 /* We don't expect to be able to clear more bits 725 * than have been set when we originally counted 726 * the set bits to cache that value in ext->rs_left. 727 * Whatever the reason (disconnect during resync, 728 * delayed local completion of an application write), 729 * try to fix it up by recounting here. */ 730 ext->rs_left = drbd_bm_e_weight(device, enr); 731 } 732 } else { 733 /* Normally this element should be in the cache, 734 * since drbd_rs_begin_io() pulled it already in. 735 * 736 * But maybe an application write finished, and we set 737 * something outside the resync lru_cache in sync. 738 */ 739 int rs_left = drbd_bm_e_weight(device, enr); 740 if (ext->flags != 0) { 741 drbd_warn(device, "changing resync lce: %d[%u;%02lx]" 742 " -> %d[%u;00]\n", 743 ext->lce.lc_number, ext->rs_left, 744 ext->flags, enr, rs_left); 745 ext->flags = 0; 746 } 747 if (ext->rs_failed) { 748 drbd_warn(device, "Kicking resync_lru element enr=%u " 749 "out with rs_failed=%d\n", 750 ext->lce.lc_number, ext->rs_failed); 751 } 752 ext->rs_left = rs_left; 753 ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0; 754 /* we don't keep a persistent log of the resync lru, 755 * we can commit any change right away. */ 756 lc_committed(device->resync); 757 } 758 if (mode != SET_OUT_OF_SYNC) 759 lc_put(device->resync, &ext->lce); 760 /* no race, we are within the al_lock! */ 761 762 if (ext->rs_left <= ext->rs_failed) { 763 ext->rs_failed = 0; 764 return true; 765 } 766 } else if (mode != SET_OUT_OF_SYNC) { 767 /* be quiet if lc_find() did not find it. */ 768 drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", 769 device->resync_locked, 770 device->resync->nr_elements, 771 device->resync->flags); 772 } 773 return false; 774 } 775 776 void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) 777 { 778 unsigned long now = jiffies; 779 unsigned long last = device->rs_mark_time[device->rs_last_mark]; 780 int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS; 781 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) { 782 if (device->rs_mark_left[device->rs_last_mark] != still_to_go && 783 device->state.conn != C_PAUSED_SYNC_T && 784 device->state.conn != C_PAUSED_SYNC_S) { 785 device->rs_mark_time[next] = now; 786 device->rs_mark_left[next] = still_to_go; 787 device->rs_last_mark = next; 788 } 789 } 790 } 791 792 /* It is called lazy update, so don't do write-out too often. */ 793 static bool lazy_bitmap_update_due(struct drbd_device *device) 794 { 795 return time_after(jiffies, device->rs_last_bcast + 2*HZ); 796 } 797 798 static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done) 799 { 800 struct drbd_connection *connection; 801 if (rs_done) 802 set_bit(RS_DONE, &device->flags); 803 /* and also set RS_PROGRESS below */ 804 else if (!lazy_bitmap_update_due(device)) 805 return; 806 807 /* compare with test_and_clear_bit() calls in and above 808 * try_update_all_on_disk_bitmaps() from the drbd_worker(). */ 809 if (test_and_set_bit(RS_PROGRESS, &device->flags)) 810 return; 811 connection = first_peer_device(device)->connection; 812 if (!test_and_set_bit(CONN_RS_PROGRESS, &connection->flags)) 813 wake_up(&connection->sender_work.q_wait); 814 } 815 816 static int update_sync_bits(struct drbd_device *device, 817 unsigned long sbnr, unsigned long ebnr, 818 enum update_sync_bits_mode mode) 819 { 820 /* 821 * We keep a count of set bits per resync-extent in the ->rs_left 822 * caching member, so we need to loop and work within the resync extent 823 * alignment. Typically this loop will execute exactly once. 824 */ 825 unsigned long flags; 826 unsigned long count = 0; 827 unsigned int cleared = 0; 828 while (sbnr <= ebnr) { 829 /* set temporary boundary bit number to last bit number within 830 * the resync extent of the current start bit number, 831 * but cap at provided end bit number */ 832 unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK); 833 unsigned long c; 834 835 if (mode == RECORD_RS_FAILED) 836 /* Only called from drbd_rs_failed_io(), bits 837 * supposedly still set. Recount, maybe some 838 * of the bits have been successfully cleared 839 * by application IO meanwhile. 840 */ 841 c = drbd_bm_count_bits(device, sbnr, tbnr); 842 else if (mode == SET_IN_SYNC) 843 c = drbd_bm_clear_bits(device, sbnr, tbnr); 844 else /* if (mode == SET_OUT_OF_SYNC) */ 845 c = drbd_bm_set_bits(device, sbnr, tbnr); 846 847 if (c) { 848 spin_lock_irqsave(&device->al_lock, flags); 849 cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode); 850 spin_unlock_irqrestore(&device->al_lock, flags); 851 count += c; 852 } 853 sbnr = tbnr + 1; 854 } 855 if (count) { 856 if (mode == SET_IN_SYNC) { 857 unsigned long still_to_go = drbd_bm_total_weight(device); 858 bool rs_is_done = (still_to_go <= device->rs_failed); 859 drbd_advance_rs_marks(device, still_to_go); 860 if (cleared || rs_is_done) 861 maybe_schedule_on_disk_bitmap_update(device, rs_is_done); 862 } else if (mode == RECORD_RS_FAILED) 863 device->rs_failed += count; 864 wake_up(&device->al_wait); 865 } 866 return count; 867 } 868 869 /* clear the bit corresponding to the piece of storage in question: 870 * size byte of data starting from sector. Only clear a bits of the affected 871 * one ore more _aligned_ BM_BLOCK_SIZE blocks. 872 * 873 * called by worker on C_SYNC_TARGET and receiver on SyncSource. 874 * 875 */ 876 int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, 877 enum update_sync_bits_mode mode, 878 const char *file, const unsigned int line) 879 { 880 /* Is called from worker and receiver context _only_ */ 881 unsigned long sbnr, ebnr, lbnr; 882 unsigned long count = 0; 883 sector_t esector, nr_sectors; 884 885 /* This would be an empty REQ_FLUSH, be silent. */ 886 if ((mode == SET_OUT_OF_SYNC) && size == 0) 887 return 0; 888 889 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 890 drbd_err(device, "%s: sector=%llus size=%d nonsense!\n", 891 drbd_change_sync_fname[mode], 892 (unsigned long long)sector, size); 893 return 0; 894 } 895 896 if (!get_ldev(device)) 897 return 0; /* no disk, no metadata, no bitmap to manipulate bits in */ 898 899 nr_sectors = drbd_get_capacity(device->this_bdev); 900 esector = sector + (size >> 9) - 1; 901 902 if (!expect(sector < nr_sectors)) 903 goto out; 904 if (!expect(esector < nr_sectors)) 905 esector = nr_sectors - 1; 906 907 lbnr = BM_SECT_TO_BIT(nr_sectors-1); 908 909 if (mode == SET_IN_SYNC) { 910 /* Round up start sector, round down end sector. We make sure 911 * we only clear full, aligned, BM_BLOCK_SIZE blocks. */ 912 if (unlikely(esector < BM_SECT_PER_BIT-1)) 913 goto out; 914 if (unlikely(esector == (nr_sectors-1))) 915 ebnr = lbnr; 916 else 917 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 918 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 919 } else { 920 /* We set it out of sync, or record resync failure. 921 * Should not round anything here. */ 922 sbnr = BM_SECT_TO_BIT(sector); 923 ebnr = BM_SECT_TO_BIT(esector); 924 } 925 926 count = update_sync_bits(device, sbnr, ebnr, mode); 927 out: 928 put_ldev(device); 929 return count; 930 } 931 932 static 933 struct bm_extent *_bme_get(struct drbd_device *device, unsigned int enr) 934 { 935 struct lc_element *e; 936 struct bm_extent *bm_ext; 937 int wakeup = 0; 938 unsigned long rs_flags; 939 940 spin_lock_irq(&device->al_lock); 941 if (device->resync_locked > device->resync->nr_elements/2) { 942 spin_unlock_irq(&device->al_lock); 943 return NULL; 944 } 945 e = lc_get(device->resync, enr); 946 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 947 if (bm_ext) { 948 if (bm_ext->lce.lc_number != enr) { 949 bm_ext->rs_left = drbd_bm_e_weight(device, enr); 950 bm_ext->rs_failed = 0; 951 lc_committed(device->resync); 952 wakeup = 1; 953 } 954 if (bm_ext->lce.refcnt == 1) 955 device->resync_locked++; 956 set_bit(BME_NO_WRITES, &bm_ext->flags); 957 } 958 rs_flags = device->resync->flags; 959 spin_unlock_irq(&device->al_lock); 960 if (wakeup) 961 wake_up(&device->al_wait); 962 963 if (!bm_ext) { 964 if (rs_flags & LC_STARVING) 965 drbd_warn(device, "Have to wait for element" 966 " (resync LRU too small?)\n"); 967 BUG_ON(rs_flags & LC_LOCKED); 968 } 969 970 return bm_ext; 971 } 972 973 static int _is_in_al(struct drbd_device *device, unsigned int enr) 974 { 975 int rv; 976 977 spin_lock_irq(&device->al_lock); 978 rv = lc_is_used(device->act_log, enr); 979 spin_unlock_irq(&device->al_lock); 980 981 return rv; 982 } 983 984 /** 985 * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED 986 * @device: DRBD device. 987 * @sector: The sector number. 988 * 989 * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted. 990 */ 991 int drbd_rs_begin_io(struct drbd_device *device, sector_t sector) 992 { 993 unsigned int enr = BM_SECT_TO_EXT(sector); 994 struct bm_extent *bm_ext; 995 int i, sig; 996 bool sa; 997 998 retry: 999 sig = wait_event_interruptible(device->al_wait, 1000 (bm_ext = _bme_get(device, enr))); 1001 if (sig) 1002 return -EINTR; 1003 1004 if (test_bit(BME_LOCKED, &bm_ext->flags)) 1005 return 0; 1006 1007 /* step aside only while we are above c-min-rate; unless disabled. */ 1008 sa = drbd_rs_c_min_rate_throttle(device); 1009 1010 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1011 sig = wait_event_interruptible(device->al_wait, 1012 !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || 1013 (sa && test_bit(BME_PRIORITY, &bm_ext->flags))); 1014 1015 if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) { 1016 spin_lock_irq(&device->al_lock); 1017 if (lc_put(device->resync, &bm_ext->lce) == 0) { 1018 bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ 1019 device->resync_locked--; 1020 wake_up(&device->al_wait); 1021 } 1022 spin_unlock_irq(&device->al_lock); 1023 if (sig) 1024 return -EINTR; 1025 if (schedule_timeout_interruptible(HZ/10)) 1026 return -EINTR; 1027 goto retry; 1028 } 1029 } 1030 set_bit(BME_LOCKED, &bm_ext->flags); 1031 return 0; 1032 } 1033 1034 /** 1035 * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep 1036 * @device: DRBD device. 1037 * @sector: The sector number. 1038 * 1039 * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then 1040 * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN 1041 * if there is still application IO going on in this area. 1042 */ 1043 int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) 1044 { 1045 unsigned int enr = BM_SECT_TO_EXT(sector); 1046 const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; 1047 struct lc_element *e; 1048 struct bm_extent *bm_ext; 1049 int i; 1050 1051 spin_lock_irq(&device->al_lock); 1052 if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { 1053 /* in case you have very heavy scattered io, it may 1054 * stall the syncer undefined if we give up the ref count 1055 * when we try again and requeue. 1056 * 1057 * if we don't give up the refcount, but the next time 1058 * we are scheduled this extent has been "synced" by new 1059 * application writes, we'd miss the lc_put on the 1060 * extent we keep the refcount on. 1061 * so we remembered which extent we had to try again, and 1062 * if the next requested one is something else, we do 1063 * the lc_put here... 1064 * we also have to wake_up 1065 */ 1066 e = lc_find(device->resync, device->resync_wenr); 1067 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1068 if (bm_ext) { 1069 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1070 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 1071 clear_bit(BME_NO_WRITES, &bm_ext->flags); 1072 device->resync_wenr = LC_FREE; 1073 if (lc_put(device->resync, &bm_ext->lce) == 0) 1074 device->resync_locked--; 1075 wake_up(&device->al_wait); 1076 } else { 1077 drbd_alert(device, "LOGIC BUG\n"); 1078 } 1079 } 1080 /* TRY. */ 1081 e = lc_try_get(device->resync, enr); 1082 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1083 if (bm_ext) { 1084 if (test_bit(BME_LOCKED, &bm_ext->flags)) 1085 goto proceed; 1086 if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) { 1087 device->resync_locked++; 1088 } else { 1089 /* we did set the BME_NO_WRITES, 1090 * but then could not set BME_LOCKED, 1091 * so we tried again. 1092 * drop the extra reference. */ 1093 bm_ext->lce.refcnt--; 1094 D_ASSERT(device, bm_ext->lce.refcnt > 0); 1095 } 1096 goto check_al; 1097 } else { 1098 /* do we rather want to try later? */ 1099 if (device->resync_locked > device->resync->nr_elements-3) 1100 goto try_again; 1101 /* Do or do not. There is no try. -- Yoda */ 1102 e = lc_get(device->resync, enr); 1103 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1104 if (!bm_ext) { 1105 const unsigned long rs_flags = device->resync->flags; 1106 if (rs_flags & LC_STARVING) 1107 drbd_warn(device, "Have to wait for element" 1108 " (resync LRU too small?)\n"); 1109 BUG_ON(rs_flags & LC_LOCKED); 1110 goto try_again; 1111 } 1112 if (bm_ext->lce.lc_number != enr) { 1113 bm_ext->rs_left = drbd_bm_e_weight(device, enr); 1114 bm_ext->rs_failed = 0; 1115 lc_committed(device->resync); 1116 wake_up(&device->al_wait); 1117 D_ASSERT(device, test_bit(BME_LOCKED, &bm_ext->flags) == 0); 1118 } 1119 set_bit(BME_NO_WRITES, &bm_ext->flags); 1120 D_ASSERT(device, bm_ext->lce.refcnt == 1); 1121 device->resync_locked++; 1122 goto check_al; 1123 } 1124 check_al: 1125 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1126 if (lc_is_used(device->act_log, al_enr+i)) 1127 goto try_again; 1128 } 1129 set_bit(BME_LOCKED, &bm_ext->flags); 1130 proceed: 1131 device->resync_wenr = LC_FREE; 1132 spin_unlock_irq(&device->al_lock); 1133 return 0; 1134 1135 try_again: 1136 if (bm_ext) 1137 device->resync_wenr = enr; 1138 spin_unlock_irq(&device->al_lock); 1139 return -EAGAIN; 1140 } 1141 1142 void drbd_rs_complete_io(struct drbd_device *device, sector_t sector) 1143 { 1144 unsigned int enr = BM_SECT_TO_EXT(sector); 1145 struct lc_element *e; 1146 struct bm_extent *bm_ext; 1147 unsigned long flags; 1148 1149 spin_lock_irqsave(&device->al_lock, flags); 1150 e = lc_find(device->resync, enr); 1151 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1152 if (!bm_ext) { 1153 spin_unlock_irqrestore(&device->al_lock, flags); 1154 if (__ratelimit(&drbd_ratelimit_state)) 1155 drbd_err(device, "drbd_rs_complete_io() called, but extent not found\n"); 1156 return; 1157 } 1158 1159 if (bm_ext->lce.refcnt == 0) { 1160 spin_unlock_irqrestore(&device->al_lock, flags); 1161 drbd_err(device, "drbd_rs_complete_io(,%llu [=%u]) called, " 1162 "but refcnt is 0!?\n", 1163 (unsigned long long)sector, enr); 1164 return; 1165 } 1166 1167 if (lc_put(device->resync, &bm_ext->lce) == 0) { 1168 bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */ 1169 device->resync_locked--; 1170 wake_up(&device->al_wait); 1171 } 1172 1173 spin_unlock_irqrestore(&device->al_lock, flags); 1174 } 1175 1176 /** 1177 * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED) 1178 * @device: DRBD device. 1179 */ 1180 void drbd_rs_cancel_all(struct drbd_device *device) 1181 { 1182 spin_lock_irq(&device->al_lock); 1183 1184 if (get_ldev_if_state(device, D_FAILED)) { /* Makes sure ->resync is there. */ 1185 lc_reset(device->resync); 1186 put_ldev(device); 1187 } 1188 device->resync_locked = 0; 1189 device->resync_wenr = LC_FREE; 1190 spin_unlock_irq(&device->al_lock); 1191 wake_up(&device->al_wait); 1192 } 1193 1194 /** 1195 * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU 1196 * @device: DRBD device. 1197 * 1198 * Returns 0 upon success, -EAGAIN if at least one reference count was 1199 * not zero. 1200 */ 1201 int drbd_rs_del_all(struct drbd_device *device) 1202 { 1203 struct lc_element *e; 1204 struct bm_extent *bm_ext; 1205 int i; 1206 1207 spin_lock_irq(&device->al_lock); 1208 1209 if (get_ldev_if_state(device, D_FAILED)) { 1210 /* ok, ->resync is there. */ 1211 for (i = 0; i < device->resync->nr_elements; i++) { 1212 e = lc_element_by_index(device->resync, i); 1213 bm_ext = lc_entry(e, struct bm_extent, lce); 1214 if (bm_ext->lce.lc_number == LC_FREE) 1215 continue; 1216 if (bm_ext->lce.lc_number == device->resync_wenr) { 1217 drbd_info(device, "dropping %u in drbd_rs_del_all, apparently" 1218 " got 'synced' by application io\n", 1219 device->resync_wenr); 1220 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1221 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 1222 clear_bit(BME_NO_WRITES, &bm_ext->flags); 1223 device->resync_wenr = LC_FREE; 1224 lc_put(device->resync, &bm_ext->lce); 1225 } 1226 if (bm_ext->lce.refcnt != 0) { 1227 drbd_info(device, "Retrying drbd_rs_del_all() later. " 1228 "refcnt=%d\n", bm_ext->lce.refcnt); 1229 put_ldev(device); 1230 spin_unlock_irq(&device->al_lock); 1231 return -EAGAIN; 1232 } 1233 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); 1234 D_ASSERT(device, !test_bit(BME_NO_WRITES, &bm_ext->flags)); 1235 lc_del(device->resync, &bm_ext->lce); 1236 } 1237 D_ASSERT(device, device->resync->used == 0); 1238 put_ldev(device); 1239 } 1240 spin_unlock_irq(&device->al_lock); 1241 wake_up(&device->al_wait); 1242 1243 return 0; 1244 } 1245