1*b411b363SPhilipp Reisner /* 2*b411b363SPhilipp Reisner drbd_actlog.c 3*b411b363SPhilipp Reisner 4*b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5*b411b363SPhilipp Reisner 6*b411b363SPhilipp Reisner Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. 7*b411b363SPhilipp Reisner Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8*b411b363SPhilipp Reisner Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9*b411b363SPhilipp Reisner 10*b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 11*b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12*b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13*b411b363SPhilipp Reisner any later version. 14*b411b363SPhilipp Reisner 15*b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 16*b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17*b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*b411b363SPhilipp Reisner GNU General Public License for more details. 19*b411b363SPhilipp Reisner 20*b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21*b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22*b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23*b411b363SPhilipp Reisner 24*b411b363SPhilipp Reisner */ 25*b411b363SPhilipp Reisner 26*b411b363SPhilipp Reisner #include <linux/slab.h> 27*b411b363SPhilipp Reisner #include <linux/drbd.h> 28*b411b363SPhilipp Reisner #include "drbd_int.h" 29*b411b363SPhilipp Reisner #include "drbd_tracing.h" 30*b411b363SPhilipp Reisner #include "drbd_wrappers.h" 31*b411b363SPhilipp Reisner 32*b411b363SPhilipp Reisner /* We maintain a trivial check sum in our on disk activity log. 33*b411b363SPhilipp Reisner * With that we can ensure correct operation even when the storage 34*b411b363SPhilipp Reisner * device might do a partial (last) sector write while loosing power. 35*b411b363SPhilipp Reisner */ 36*b411b363SPhilipp Reisner struct __packed al_transaction { 37*b411b363SPhilipp Reisner u32 magic; 38*b411b363SPhilipp Reisner u32 tr_number; 39*b411b363SPhilipp Reisner struct __packed { 40*b411b363SPhilipp Reisner u32 pos; 41*b411b363SPhilipp Reisner u32 extent; } updates[1 + AL_EXTENTS_PT]; 42*b411b363SPhilipp Reisner u32 xor_sum; 43*b411b363SPhilipp Reisner }; 44*b411b363SPhilipp Reisner 45*b411b363SPhilipp Reisner struct update_odbm_work { 46*b411b363SPhilipp Reisner struct drbd_work w; 47*b411b363SPhilipp Reisner unsigned int enr; 48*b411b363SPhilipp Reisner }; 49*b411b363SPhilipp Reisner 50*b411b363SPhilipp Reisner struct update_al_work { 51*b411b363SPhilipp Reisner struct drbd_work w; 52*b411b363SPhilipp Reisner struct lc_element *al_ext; 53*b411b363SPhilipp Reisner struct completion event; 54*b411b363SPhilipp Reisner unsigned int enr; 55*b411b363SPhilipp Reisner /* if old_enr != LC_FREE, write corresponding bitmap sector, too */ 56*b411b363SPhilipp Reisner unsigned int old_enr; 57*b411b363SPhilipp Reisner }; 58*b411b363SPhilipp Reisner 59*b411b363SPhilipp Reisner struct drbd_atodb_wait { 60*b411b363SPhilipp Reisner atomic_t count; 61*b411b363SPhilipp Reisner struct completion io_done; 62*b411b363SPhilipp Reisner struct drbd_conf *mdev; 63*b411b363SPhilipp Reisner int error; 64*b411b363SPhilipp Reisner }; 65*b411b363SPhilipp Reisner 66*b411b363SPhilipp Reisner 67*b411b363SPhilipp Reisner int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); 68*b411b363SPhilipp Reisner 69*b411b363SPhilipp Reisner /* The actual tracepoint needs to have constant number of known arguments... 70*b411b363SPhilipp Reisner */ 71*b411b363SPhilipp Reisner void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...) 72*b411b363SPhilipp Reisner { 73*b411b363SPhilipp Reisner va_list ap; 74*b411b363SPhilipp Reisner 75*b411b363SPhilipp Reisner va_start(ap, fmt); 76*b411b363SPhilipp Reisner trace__drbd_resync(mdev, level, fmt, ap); 77*b411b363SPhilipp Reisner va_end(ap); 78*b411b363SPhilipp Reisner } 79*b411b363SPhilipp Reisner 80*b411b363SPhilipp Reisner static int _drbd_md_sync_page_io(struct drbd_conf *mdev, 81*b411b363SPhilipp Reisner struct drbd_backing_dev *bdev, 82*b411b363SPhilipp Reisner struct page *page, sector_t sector, 83*b411b363SPhilipp Reisner int rw, int size) 84*b411b363SPhilipp Reisner { 85*b411b363SPhilipp Reisner struct bio *bio; 86*b411b363SPhilipp Reisner struct drbd_md_io md_io; 87*b411b363SPhilipp Reisner int ok; 88*b411b363SPhilipp Reisner 89*b411b363SPhilipp Reisner md_io.mdev = mdev; 90*b411b363SPhilipp Reisner init_completion(&md_io.event); 91*b411b363SPhilipp Reisner md_io.error = 0; 92*b411b363SPhilipp Reisner 93*b411b363SPhilipp Reisner if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags)) 94*b411b363SPhilipp Reisner rw |= (1 << BIO_RW_BARRIER); 95*b411b363SPhilipp Reisner rw |= ((1<<BIO_RW_UNPLUG) | (1<<BIO_RW_SYNCIO)); 96*b411b363SPhilipp Reisner 97*b411b363SPhilipp Reisner retry: 98*b411b363SPhilipp Reisner bio = bio_alloc(GFP_NOIO, 1); 99*b411b363SPhilipp Reisner bio->bi_bdev = bdev->md_bdev; 100*b411b363SPhilipp Reisner bio->bi_sector = sector; 101*b411b363SPhilipp Reisner ok = (bio_add_page(bio, page, size, 0) == size); 102*b411b363SPhilipp Reisner if (!ok) 103*b411b363SPhilipp Reisner goto out; 104*b411b363SPhilipp Reisner bio->bi_private = &md_io; 105*b411b363SPhilipp Reisner bio->bi_end_io = drbd_md_io_complete; 106*b411b363SPhilipp Reisner bio->bi_rw = rw; 107*b411b363SPhilipp Reisner 108*b411b363SPhilipp Reisner trace_drbd_bio(mdev, "Md", bio, 0, NULL); 109*b411b363SPhilipp Reisner 110*b411b363SPhilipp Reisner if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) 111*b411b363SPhilipp Reisner bio_endio(bio, -EIO); 112*b411b363SPhilipp Reisner else 113*b411b363SPhilipp Reisner submit_bio(rw, bio); 114*b411b363SPhilipp Reisner wait_for_completion(&md_io.event); 115*b411b363SPhilipp Reisner ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0; 116*b411b363SPhilipp Reisner 117*b411b363SPhilipp Reisner /* check for unsupported barrier op. 118*b411b363SPhilipp Reisner * would rather check on EOPNOTSUPP, but that is not reliable. 119*b411b363SPhilipp Reisner * don't try again for ANY return value != 0 */ 120*b411b363SPhilipp Reisner if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && !ok)) { 121*b411b363SPhilipp Reisner /* Try again with no barrier */ 122*b411b363SPhilipp Reisner dev_warn(DEV, "Barriers not supported on meta data device - disabling\n"); 123*b411b363SPhilipp Reisner set_bit(MD_NO_BARRIER, &mdev->flags); 124*b411b363SPhilipp Reisner rw &= ~(1 << BIO_RW_BARRIER); 125*b411b363SPhilipp Reisner bio_put(bio); 126*b411b363SPhilipp Reisner goto retry; 127*b411b363SPhilipp Reisner } 128*b411b363SPhilipp Reisner out: 129*b411b363SPhilipp Reisner bio_put(bio); 130*b411b363SPhilipp Reisner return ok; 131*b411b363SPhilipp Reisner } 132*b411b363SPhilipp Reisner 133*b411b363SPhilipp Reisner int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, 134*b411b363SPhilipp Reisner sector_t sector, int rw) 135*b411b363SPhilipp Reisner { 136*b411b363SPhilipp Reisner int logical_block_size, mask, ok; 137*b411b363SPhilipp Reisner int offset = 0; 138*b411b363SPhilipp Reisner struct page *iop = mdev->md_io_page; 139*b411b363SPhilipp Reisner 140*b411b363SPhilipp Reisner D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); 141*b411b363SPhilipp Reisner 142*b411b363SPhilipp Reisner BUG_ON(!bdev->md_bdev); 143*b411b363SPhilipp Reisner 144*b411b363SPhilipp Reisner logical_block_size = bdev_logical_block_size(bdev->md_bdev); 145*b411b363SPhilipp Reisner if (logical_block_size == 0) 146*b411b363SPhilipp Reisner logical_block_size = MD_SECTOR_SIZE; 147*b411b363SPhilipp Reisner 148*b411b363SPhilipp Reisner /* in case logical_block_size != 512 [ s390 only? ] */ 149*b411b363SPhilipp Reisner if (logical_block_size != MD_SECTOR_SIZE) { 150*b411b363SPhilipp Reisner mask = (logical_block_size / MD_SECTOR_SIZE) - 1; 151*b411b363SPhilipp Reisner D_ASSERT(mask == 1 || mask == 3 || mask == 7); 152*b411b363SPhilipp Reisner D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE); 153*b411b363SPhilipp Reisner offset = sector & mask; 154*b411b363SPhilipp Reisner sector = sector & ~mask; 155*b411b363SPhilipp Reisner iop = mdev->md_io_tmpp; 156*b411b363SPhilipp Reisner 157*b411b363SPhilipp Reisner if (rw & WRITE) { 158*b411b363SPhilipp Reisner /* these are GFP_KERNEL pages, pre-allocated 159*b411b363SPhilipp Reisner * on device initialization */ 160*b411b363SPhilipp Reisner void *p = page_address(mdev->md_io_page); 161*b411b363SPhilipp Reisner void *hp = page_address(mdev->md_io_tmpp); 162*b411b363SPhilipp Reisner 163*b411b363SPhilipp Reisner ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, 164*b411b363SPhilipp Reisner READ, logical_block_size); 165*b411b363SPhilipp Reisner 166*b411b363SPhilipp Reisner if (unlikely(!ok)) { 167*b411b363SPhilipp Reisner dev_err(DEV, "drbd_md_sync_page_io(,%llus," 168*b411b363SPhilipp Reisner "READ [logical_block_size!=512]) failed!\n", 169*b411b363SPhilipp Reisner (unsigned long long)sector); 170*b411b363SPhilipp Reisner return 0; 171*b411b363SPhilipp Reisner } 172*b411b363SPhilipp Reisner 173*b411b363SPhilipp Reisner memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE); 174*b411b363SPhilipp Reisner } 175*b411b363SPhilipp Reisner } 176*b411b363SPhilipp Reisner 177*b411b363SPhilipp Reisner if (sector < drbd_md_first_sector(bdev) || 178*b411b363SPhilipp Reisner sector > drbd_md_last_sector(bdev)) 179*b411b363SPhilipp Reisner dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n", 180*b411b363SPhilipp Reisner current->comm, current->pid, __func__, 181*b411b363SPhilipp Reisner (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); 182*b411b363SPhilipp Reisner 183*b411b363SPhilipp Reisner ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size); 184*b411b363SPhilipp Reisner if (unlikely(!ok)) { 185*b411b363SPhilipp Reisner dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", 186*b411b363SPhilipp Reisner (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); 187*b411b363SPhilipp Reisner return 0; 188*b411b363SPhilipp Reisner } 189*b411b363SPhilipp Reisner 190*b411b363SPhilipp Reisner if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) { 191*b411b363SPhilipp Reisner void *p = page_address(mdev->md_io_page); 192*b411b363SPhilipp Reisner void *hp = page_address(mdev->md_io_tmpp); 193*b411b363SPhilipp Reisner 194*b411b363SPhilipp Reisner memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE); 195*b411b363SPhilipp Reisner } 196*b411b363SPhilipp Reisner 197*b411b363SPhilipp Reisner return ok; 198*b411b363SPhilipp Reisner } 199*b411b363SPhilipp Reisner 200*b411b363SPhilipp Reisner static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) 201*b411b363SPhilipp Reisner { 202*b411b363SPhilipp Reisner struct lc_element *al_ext; 203*b411b363SPhilipp Reisner struct lc_element *tmp; 204*b411b363SPhilipp Reisner unsigned long al_flags = 0; 205*b411b363SPhilipp Reisner 206*b411b363SPhilipp Reisner spin_lock_irq(&mdev->al_lock); 207*b411b363SPhilipp Reisner tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); 208*b411b363SPhilipp Reisner if (unlikely(tmp != NULL)) { 209*b411b363SPhilipp Reisner struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 210*b411b363SPhilipp Reisner if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { 211*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 212*b411b363SPhilipp Reisner return NULL; 213*b411b363SPhilipp Reisner } 214*b411b363SPhilipp Reisner } 215*b411b363SPhilipp Reisner al_ext = lc_get(mdev->act_log, enr); 216*b411b363SPhilipp Reisner al_flags = mdev->act_log->flags; 217*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 218*b411b363SPhilipp Reisner 219*b411b363SPhilipp Reisner /* 220*b411b363SPhilipp Reisner if (!al_ext) { 221*b411b363SPhilipp Reisner if (al_flags & LC_STARVING) 222*b411b363SPhilipp Reisner dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n"); 223*b411b363SPhilipp Reisner if (al_flags & LC_DIRTY) 224*b411b363SPhilipp Reisner dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n"); 225*b411b363SPhilipp Reisner } 226*b411b363SPhilipp Reisner */ 227*b411b363SPhilipp Reisner 228*b411b363SPhilipp Reisner return al_ext; 229*b411b363SPhilipp Reisner } 230*b411b363SPhilipp Reisner 231*b411b363SPhilipp Reisner void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) 232*b411b363SPhilipp Reisner { 233*b411b363SPhilipp Reisner unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); 234*b411b363SPhilipp Reisner struct lc_element *al_ext; 235*b411b363SPhilipp Reisner struct update_al_work al_work; 236*b411b363SPhilipp Reisner 237*b411b363SPhilipp Reisner D_ASSERT(atomic_read(&mdev->local_cnt) > 0); 238*b411b363SPhilipp Reisner 239*b411b363SPhilipp Reisner trace_drbd_actlog(mdev, sector, "al_begin_io"); 240*b411b363SPhilipp Reisner 241*b411b363SPhilipp Reisner wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr))); 242*b411b363SPhilipp Reisner 243*b411b363SPhilipp Reisner if (al_ext->lc_number != enr) { 244*b411b363SPhilipp Reisner /* drbd_al_write_transaction(mdev,al_ext,enr); 245*b411b363SPhilipp Reisner * recurses into generic_make_request(), which 246*b411b363SPhilipp Reisner * disallows recursion, bios being serialized on the 247*b411b363SPhilipp Reisner * current->bio_tail list now. 248*b411b363SPhilipp Reisner * we have to delegate updates to the activity log 249*b411b363SPhilipp Reisner * to the worker thread. */ 250*b411b363SPhilipp Reisner init_completion(&al_work.event); 251*b411b363SPhilipp Reisner al_work.al_ext = al_ext; 252*b411b363SPhilipp Reisner al_work.enr = enr; 253*b411b363SPhilipp Reisner al_work.old_enr = al_ext->lc_number; 254*b411b363SPhilipp Reisner al_work.w.cb = w_al_write_transaction; 255*b411b363SPhilipp Reisner drbd_queue_work_front(&mdev->data.work, &al_work.w); 256*b411b363SPhilipp Reisner wait_for_completion(&al_work.event); 257*b411b363SPhilipp Reisner 258*b411b363SPhilipp Reisner mdev->al_writ_cnt++; 259*b411b363SPhilipp Reisner 260*b411b363SPhilipp Reisner spin_lock_irq(&mdev->al_lock); 261*b411b363SPhilipp Reisner lc_changed(mdev->act_log, al_ext); 262*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 263*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 264*b411b363SPhilipp Reisner } 265*b411b363SPhilipp Reisner } 266*b411b363SPhilipp Reisner 267*b411b363SPhilipp Reisner void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) 268*b411b363SPhilipp Reisner { 269*b411b363SPhilipp Reisner unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); 270*b411b363SPhilipp Reisner struct lc_element *extent; 271*b411b363SPhilipp Reisner unsigned long flags; 272*b411b363SPhilipp Reisner 273*b411b363SPhilipp Reisner trace_drbd_actlog(mdev, sector, "al_complete_io"); 274*b411b363SPhilipp Reisner 275*b411b363SPhilipp Reisner spin_lock_irqsave(&mdev->al_lock, flags); 276*b411b363SPhilipp Reisner 277*b411b363SPhilipp Reisner extent = lc_find(mdev->act_log, enr); 278*b411b363SPhilipp Reisner 279*b411b363SPhilipp Reisner if (!extent) { 280*b411b363SPhilipp Reisner spin_unlock_irqrestore(&mdev->al_lock, flags); 281*b411b363SPhilipp Reisner dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr); 282*b411b363SPhilipp Reisner return; 283*b411b363SPhilipp Reisner } 284*b411b363SPhilipp Reisner 285*b411b363SPhilipp Reisner if (lc_put(mdev->act_log, extent) == 0) 286*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 287*b411b363SPhilipp Reisner 288*b411b363SPhilipp Reisner spin_unlock_irqrestore(&mdev->al_lock, flags); 289*b411b363SPhilipp Reisner } 290*b411b363SPhilipp Reisner 291*b411b363SPhilipp Reisner int 292*b411b363SPhilipp Reisner w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) 293*b411b363SPhilipp Reisner { 294*b411b363SPhilipp Reisner struct update_al_work *aw = container_of(w, struct update_al_work, w); 295*b411b363SPhilipp Reisner struct lc_element *updated = aw->al_ext; 296*b411b363SPhilipp Reisner const unsigned int new_enr = aw->enr; 297*b411b363SPhilipp Reisner const unsigned int evicted = aw->old_enr; 298*b411b363SPhilipp Reisner struct al_transaction *buffer; 299*b411b363SPhilipp Reisner sector_t sector; 300*b411b363SPhilipp Reisner int i, n, mx; 301*b411b363SPhilipp Reisner unsigned int extent_nr; 302*b411b363SPhilipp Reisner u32 xor_sum = 0; 303*b411b363SPhilipp Reisner 304*b411b363SPhilipp Reisner if (!get_ldev(mdev)) { 305*b411b363SPhilipp Reisner dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n"); 306*b411b363SPhilipp Reisner complete(&((struct update_al_work *)w)->event); 307*b411b363SPhilipp Reisner return 1; 308*b411b363SPhilipp Reisner } 309*b411b363SPhilipp Reisner /* do we have to do a bitmap write, first? 310*b411b363SPhilipp Reisner * TODO reduce maximum latency: 311*b411b363SPhilipp Reisner * submit both bios, then wait for both, 312*b411b363SPhilipp Reisner * instead of doing two synchronous sector writes. */ 313*b411b363SPhilipp Reisner if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) 314*b411b363SPhilipp Reisner drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); 315*b411b363SPhilipp Reisner 316*b411b363SPhilipp Reisner mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */ 317*b411b363SPhilipp Reisner buffer = (struct al_transaction *)page_address(mdev->md_io_page); 318*b411b363SPhilipp Reisner 319*b411b363SPhilipp Reisner buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); 320*b411b363SPhilipp Reisner buffer->tr_number = cpu_to_be32(mdev->al_tr_number); 321*b411b363SPhilipp Reisner 322*b411b363SPhilipp Reisner n = lc_index_of(mdev->act_log, updated); 323*b411b363SPhilipp Reisner 324*b411b363SPhilipp Reisner buffer->updates[0].pos = cpu_to_be32(n); 325*b411b363SPhilipp Reisner buffer->updates[0].extent = cpu_to_be32(new_enr); 326*b411b363SPhilipp Reisner 327*b411b363SPhilipp Reisner xor_sum ^= new_enr; 328*b411b363SPhilipp Reisner 329*b411b363SPhilipp Reisner mx = min_t(int, AL_EXTENTS_PT, 330*b411b363SPhilipp Reisner mdev->act_log->nr_elements - mdev->al_tr_cycle); 331*b411b363SPhilipp Reisner for (i = 0; i < mx; i++) { 332*b411b363SPhilipp Reisner unsigned idx = mdev->al_tr_cycle + i; 333*b411b363SPhilipp Reisner extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; 334*b411b363SPhilipp Reisner buffer->updates[i+1].pos = cpu_to_be32(idx); 335*b411b363SPhilipp Reisner buffer->updates[i+1].extent = cpu_to_be32(extent_nr); 336*b411b363SPhilipp Reisner xor_sum ^= extent_nr; 337*b411b363SPhilipp Reisner } 338*b411b363SPhilipp Reisner for (; i < AL_EXTENTS_PT; i++) { 339*b411b363SPhilipp Reisner buffer->updates[i+1].pos = __constant_cpu_to_be32(-1); 340*b411b363SPhilipp Reisner buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE); 341*b411b363SPhilipp Reisner xor_sum ^= LC_FREE; 342*b411b363SPhilipp Reisner } 343*b411b363SPhilipp Reisner mdev->al_tr_cycle += AL_EXTENTS_PT; 344*b411b363SPhilipp Reisner if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) 345*b411b363SPhilipp Reisner mdev->al_tr_cycle = 0; 346*b411b363SPhilipp Reisner 347*b411b363SPhilipp Reisner buffer->xor_sum = cpu_to_be32(xor_sum); 348*b411b363SPhilipp Reisner 349*b411b363SPhilipp Reisner sector = mdev->ldev->md.md_offset 350*b411b363SPhilipp Reisner + mdev->ldev->md.al_offset + mdev->al_tr_pos; 351*b411b363SPhilipp Reisner 352*b411b363SPhilipp Reisner if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) 353*b411b363SPhilipp Reisner drbd_chk_io_error(mdev, 1, TRUE); 354*b411b363SPhilipp Reisner 355*b411b363SPhilipp Reisner if (++mdev->al_tr_pos > 356*b411b363SPhilipp Reisner div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) 357*b411b363SPhilipp Reisner mdev->al_tr_pos = 0; 358*b411b363SPhilipp Reisner 359*b411b363SPhilipp Reisner D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); 360*b411b363SPhilipp Reisner mdev->al_tr_number++; 361*b411b363SPhilipp Reisner 362*b411b363SPhilipp Reisner mutex_unlock(&mdev->md_io_mutex); 363*b411b363SPhilipp Reisner 364*b411b363SPhilipp Reisner complete(&((struct update_al_work *)w)->event); 365*b411b363SPhilipp Reisner put_ldev(mdev); 366*b411b363SPhilipp Reisner 367*b411b363SPhilipp Reisner return 1; 368*b411b363SPhilipp Reisner } 369*b411b363SPhilipp Reisner 370*b411b363SPhilipp Reisner /** 371*b411b363SPhilipp Reisner * drbd_al_read_tr() - Read a single transaction from the on disk activity log 372*b411b363SPhilipp Reisner * @mdev: DRBD device. 373*b411b363SPhilipp Reisner * @bdev: Block device to read form. 374*b411b363SPhilipp Reisner * @b: pointer to an al_transaction. 375*b411b363SPhilipp Reisner * @index: On disk slot of the transaction to read. 376*b411b363SPhilipp Reisner * 377*b411b363SPhilipp Reisner * Returns -1 on IO error, 0 on checksum error and 1 upon success. 378*b411b363SPhilipp Reisner */ 379*b411b363SPhilipp Reisner static int drbd_al_read_tr(struct drbd_conf *mdev, 380*b411b363SPhilipp Reisner struct drbd_backing_dev *bdev, 381*b411b363SPhilipp Reisner struct al_transaction *b, 382*b411b363SPhilipp Reisner int index) 383*b411b363SPhilipp Reisner { 384*b411b363SPhilipp Reisner sector_t sector; 385*b411b363SPhilipp Reisner int rv, i; 386*b411b363SPhilipp Reisner u32 xor_sum = 0; 387*b411b363SPhilipp Reisner 388*b411b363SPhilipp Reisner sector = bdev->md.md_offset + bdev->md.al_offset + index; 389*b411b363SPhilipp Reisner 390*b411b363SPhilipp Reisner /* Dont process error normally, 391*b411b363SPhilipp Reisner * as this is done before disk is attached! */ 392*b411b363SPhilipp Reisner if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) 393*b411b363SPhilipp Reisner return -1; 394*b411b363SPhilipp Reisner 395*b411b363SPhilipp Reisner rv = (be32_to_cpu(b->magic) == DRBD_MAGIC); 396*b411b363SPhilipp Reisner 397*b411b363SPhilipp Reisner for (i = 0; i < AL_EXTENTS_PT + 1; i++) 398*b411b363SPhilipp Reisner xor_sum ^= be32_to_cpu(b->updates[i].extent); 399*b411b363SPhilipp Reisner rv &= (xor_sum == be32_to_cpu(b->xor_sum)); 400*b411b363SPhilipp Reisner 401*b411b363SPhilipp Reisner return rv; 402*b411b363SPhilipp Reisner } 403*b411b363SPhilipp Reisner 404*b411b363SPhilipp Reisner /** 405*b411b363SPhilipp Reisner * drbd_al_read_log() - Restores the activity log from its on disk representation. 406*b411b363SPhilipp Reisner * @mdev: DRBD device. 407*b411b363SPhilipp Reisner * @bdev: Block device to read form. 408*b411b363SPhilipp Reisner * 409*b411b363SPhilipp Reisner * Returns 1 on success, returns 0 when reading the log failed due to IO errors. 410*b411b363SPhilipp Reisner */ 411*b411b363SPhilipp Reisner int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) 412*b411b363SPhilipp Reisner { 413*b411b363SPhilipp Reisner struct al_transaction *buffer; 414*b411b363SPhilipp Reisner int i; 415*b411b363SPhilipp Reisner int rv; 416*b411b363SPhilipp Reisner int mx; 417*b411b363SPhilipp Reisner int active_extents = 0; 418*b411b363SPhilipp Reisner int transactions = 0; 419*b411b363SPhilipp Reisner int found_valid = 0; 420*b411b363SPhilipp Reisner int from = 0; 421*b411b363SPhilipp Reisner int to = 0; 422*b411b363SPhilipp Reisner u32 from_tnr = 0; 423*b411b363SPhilipp Reisner u32 to_tnr = 0; 424*b411b363SPhilipp Reisner u32 cnr; 425*b411b363SPhilipp Reisner 426*b411b363SPhilipp Reisner mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT); 427*b411b363SPhilipp Reisner 428*b411b363SPhilipp Reisner /* lock out all other meta data io for now, 429*b411b363SPhilipp Reisner * and make sure the page is mapped. 430*b411b363SPhilipp Reisner */ 431*b411b363SPhilipp Reisner mutex_lock(&mdev->md_io_mutex); 432*b411b363SPhilipp Reisner buffer = page_address(mdev->md_io_page); 433*b411b363SPhilipp Reisner 434*b411b363SPhilipp Reisner /* Find the valid transaction in the log */ 435*b411b363SPhilipp Reisner for (i = 0; i <= mx; i++) { 436*b411b363SPhilipp Reisner rv = drbd_al_read_tr(mdev, bdev, buffer, i); 437*b411b363SPhilipp Reisner if (rv == 0) 438*b411b363SPhilipp Reisner continue; 439*b411b363SPhilipp Reisner if (rv == -1) { 440*b411b363SPhilipp Reisner mutex_unlock(&mdev->md_io_mutex); 441*b411b363SPhilipp Reisner return 0; 442*b411b363SPhilipp Reisner } 443*b411b363SPhilipp Reisner cnr = be32_to_cpu(buffer->tr_number); 444*b411b363SPhilipp Reisner 445*b411b363SPhilipp Reisner if (++found_valid == 1) { 446*b411b363SPhilipp Reisner from = i; 447*b411b363SPhilipp Reisner to = i; 448*b411b363SPhilipp Reisner from_tnr = cnr; 449*b411b363SPhilipp Reisner to_tnr = cnr; 450*b411b363SPhilipp Reisner continue; 451*b411b363SPhilipp Reisner } 452*b411b363SPhilipp Reisner if ((int)cnr - (int)from_tnr < 0) { 453*b411b363SPhilipp Reisner D_ASSERT(from_tnr - cnr + i - from == mx+1); 454*b411b363SPhilipp Reisner from = i; 455*b411b363SPhilipp Reisner from_tnr = cnr; 456*b411b363SPhilipp Reisner } 457*b411b363SPhilipp Reisner if ((int)cnr - (int)to_tnr > 0) { 458*b411b363SPhilipp Reisner D_ASSERT(cnr - to_tnr == i - to); 459*b411b363SPhilipp Reisner to = i; 460*b411b363SPhilipp Reisner to_tnr = cnr; 461*b411b363SPhilipp Reisner } 462*b411b363SPhilipp Reisner } 463*b411b363SPhilipp Reisner 464*b411b363SPhilipp Reisner if (!found_valid) { 465*b411b363SPhilipp Reisner dev_warn(DEV, "No usable activity log found.\n"); 466*b411b363SPhilipp Reisner mutex_unlock(&mdev->md_io_mutex); 467*b411b363SPhilipp Reisner return 1; 468*b411b363SPhilipp Reisner } 469*b411b363SPhilipp Reisner 470*b411b363SPhilipp Reisner /* Read the valid transactions. 471*b411b363SPhilipp Reisner * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ 472*b411b363SPhilipp Reisner i = from; 473*b411b363SPhilipp Reisner while (1) { 474*b411b363SPhilipp Reisner int j, pos; 475*b411b363SPhilipp Reisner unsigned int extent_nr; 476*b411b363SPhilipp Reisner unsigned int trn; 477*b411b363SPhilipp Reisner 478*b411b363SPhilipp Reisner rv = drbd_al_read_tr(mdev, bdev, buffer, i); 479*b411b363SPhilipp Reisner ERR_IF(rv == 0) goto cancel; 480*b411b363SPhilipp Reisner if (rv == -1) { 481*b411b363SPhilipp Reisner mutex_unlock(&mdev->md_io_mutex); 482*b411b363SPhilipp Reisner return 0; 483*b411b363SPhilipp Reisner } 484*b411b363SPhilipp Reisner 485*b411b363SPhilipp Reisner trn = be32_to_cpu(buffer->tr_number); 486*b411b363SPhilipp Reisner 487*b411b363SPhilipp Reisner spin_lock_irq(&mdev->al_lock); 488*b411b363SPhilipp Reisner 489*b411b363SPhilipp Reisner /* This loop runs backwards because in the cyclic 490*b411b363SPhilipp Reisner elements there might be an old version of the 491*b411b363SPhilipp Reisner updated element (in slot 0). So the element in slot 0 492*b411b363SPhilipp Reisner can overwrite old versions. */ 493*b411b363SPhilipp Reisner for (j = AL_EXTENTS_PT; j >= 0; j--) { 494*b411b363SPhilipp Reisner pos = be32_to_cpu(buffer->updates[j].pos); 495*b411b363SPhilipp Reisner extent_nr = be32_to_cpu(buffer->updates[j].extent); 496*b411b363SPhilipp Reisner 497*b411b363SPhilipp Reisner if (extent_nr == LC_FREE) 498*b411b363SPhilipp Reisner continue; 499*b411b363SPhilipp Reisner 500*b411b363SPhilipp Reisner lc_set(mdev->act_log, extent_nr, pos); 501*b411b363SPhilipp Reisner active_extents++; 502*b411b363SPhilipp Reisner } 503*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 504*b411b363SPhilipp Reisner 505*b411b363SPhilipp Reisner transactions++; 506*b411b363SPhilipp Reisner 507*b411b363SPhilipp Reisner cancel: 508*b411b363SPhilipp Reisner if (i == to) 509*b411b363SPhilipp Reisner break; 510*b411b363SPhilipp Reisner i++; 511*b411b363SPhilipp Reisner if (i > mx) 512*b411b363SPhilipp Reisner i = 0; 513*b411b363SPhilipp Reisner } 514*b411b363SPhilipp Reisner 515*b411b363SPhilipp Reisner mdev->al_tr_number = to_tnr+1; 516*b411b363SPhilipp Reisner mdev->al_tr_pos = to; 517*b411b363SPhilipp Reisner if (++mdev->al_tr_pos > 518*b411b363SPhilipp Reisner div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) 519*b411b363SPhilipp Reisner mdev->al_tr_pos = 0; 520*b411b363SPhilipp Reisner 521*b411b363SPhilipp Reisner /* ok, we are done with it */ 522*b411b363SPhilipp Reisner mutex_unlock(&mdev->md_io_mutex); 523*b411b363SPhilipp Reisner 524*b411b363SPhilipp Reisner dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", 525*b411b363SPhilipp Reisner transactions, active_extents); 526*b411b363SPhilipp Reisner 527*b411b363SPhilipp Reisner return 1; 528*b411b363SPhilipp Reisner } 529*b411b363SPhilipp Reisner 530*b411b363SPhilipp Reisner static void atodb_endio(struct bio *bio, int error) 531*b411b363SPhilipp Reisner { 532*b411b363SPhilipp Reisner struct drbd_atodb_wait *wc = bio->bi_private; 533*b411b363SPhilipp Reisner struct drbd_conf *mdev = wc->mdev; 534*b411b363SPhilipp Reisner struct page *page; 535*b411b363SPhilipp Reisner int uptodate = bio_flagged(bio, BIO_UPTODATE); 536*b411b363SPhilipp Reisner 537*b411b363SPhilipp Reisner /* strange behavior of some lower level drivers... 538*b411b363SPhilipp Reisner * fail the request by clearing the uptodate flag, 539*b411b363SPhilipp Reisner * but do not return any error?! */ 540*b411b363SPhilipp Reisner if (!error && !uptodate) 541*b411b363SPhilipp Reisner error = -EIO; 542*b411b363SPhilipp Reisner 543*b411b363SPhilipp Reisner drbd_chk_io_error(mdev, error, TRUE); 544*b411b363SPhilipp Reisner if (error && wc->error == 0) 545*b411b363SPhilipp Reisner wc->error = error; 546*b411b363SPhilipp Reisner 547*b411b363SPhilipp Reisner if (atomic_dec_and_test(&wc->count)) 548*b411b363SPhilipp Reisner complete(&wc->io_done); 549*b411b363SPhilipp Reisner 550*b411b363SPhilipp Reisner page = bio->bi_io_vec[0].bv_page; 551*b411b363SPhilipp Reisner put_page(page); 552*b411b363SPhilipp Reisner bio_put(bio); 553*b411b363SPhilipp Reisner mdev->bm_writ_cnt++; 554*b411b363SPhilipp Reisner put_ldev(mdev); 555*b411b363SPhilipp Reisner } 556*b411b363SPhilipp Reisner 557*b411b363SPhilipp Reisner #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) 558*b411b363SPhilipp Reisner /* activity log to on disk bitmap -- prepare bio unless that sector 559*b411b363SPhilipp Reisner * is already covered by previously prepared bios */ 560*b411b363SPhilipp Reisner static int atodb_prepare_unless_covered(struct drbd_conf *mdev, 561*b411b363SPhilipp Reisner struct bio **bios, 562*b411b363SPhilipp Reisner unsigned int enr, 563*b411b363SPhilipp Reisner struct drbd_atodb_wait *wc) __must_hold(local) 564*b411b363SPhilipp Reisner { 565*b411b363SPhilipp Reisner struct bio *bio; 566*b411b363SPhilipp Reisner struct page *page; 567*b411b363SPhilipp Reisner sector_t on_disk_sector = enr + mdev->ldev->md.md_offset 568*b411b363SPhilipp Reisner + mdev->ldev->md.bm_offset; 569*b411b363SPhilipp Reisner unsigned int page_offset = PAGE_SIZE; 570*b411b363SPhilipp Reisner int offset; 571*b411b363SPhilipp Reisner int i = 0; 572*b411b363SPhilipp Reisner int err = -ENOMEM; 573*b411b363SPhilipp Reisner 574*b411b363SPhilipp Reisner /* Check if that enr is already covered by an already created bio. 575*b411b363SPhilipp Reisner * Caution, bios[] is not NULL terminated, 576*b411b363SPhilipp Reisner * but only initialized to all NULL. 577*b411b363SPhilipp Reisner * For completely scattered activity log, 578*b411b363SPhilipp Reisner * the last invocation iterates over all bios, 579*b411b363SPhilipp Reisner * and finds the last NULL entry. 580*b411b363SPhilipp Reisner */ 581*b411b363SPhilipp Reisner while ((bio = bios[i])) { 582*b411b363SPhilipp Reisner if (bio->bi_sector == on_disk_sector) 583*b411b363SPhilipp Reisner return 0; 584*b411b363SPhilipp Reisner i++; 585*b411b363SPhilipp Reisner } 586*b411b363SPhilipp Reisner /* bios[i] == NULL, the next not yet used slot */ 587*b411b363SPhilipp Reisner 588*b411b363SPhilipp Reisner /* GFP_KERNEL, we are not in the write-out path */ 589*b411b363SPhilipp Reisner bio = bio_alloc(GFP_KERNEL, 1); 590*b411b363SPhilipp Reisner if (bio == NULL) 591*b411b363SPhilipp Reisner return -ENOMEM; 592*b411b363SPhilipp Reisner 593*b411b363SPhilipp Reisner if (i > 0) { 594*b411b363SPhilipp Reisner const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec; 595*b411b363SPhilipp Reisner page_offset = prev_bv->bv_offset + prev_bv->bv_len; 596*b411b363SPhilipp Reisner page = prev_bv->bv_page; 597*b411b363SPhilipp Reisner } 598*b411b363SPhilipp Reisner if (page_offset == PAGE_SIZE) { 599*b411b363SPhilipp Reisner page = alloc_page(__GFP_HIGHMEM); 600*b411b363SPhilipp Reisner if (page == NULL) 601*b411b363SPhilipp Reisner goto out_bio_put; 602*b411b363SPhilipp Reisner page_offset = 0; 603*b411b363SPhilipp Reisner } else { 604*b411b363SPhilipp Reisner get_page(page); 605*b411b363SPhilipp Reisner } 606*b411b363SPhilipp Reisner 607*b411b363SPhilipp Reisner offset = S2W(enr); 608*b411b363SPhilipp Reisner drbd_bm_get_lel(mdev, offset, 609*b411b363SPhilipp Reisner min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset), 610*b411b363SPhilipp Reisner kmap(page) + page_offset); 611*b411b363SPhilipp Reisner kunmap(page); 612*b411b363SPhilipp Reisner 613*b411b363SPhilipp Reisner bio->bi_private = wc; 614*b411b363SPhilipp Reisner bio->bi_end_io = atodb_endio; 615*b411b363SPhilipp Reisner bio->bi_bdev = mdev->ldev->md_bdev; 616*b411b363SPhilipp Reisner bio->bi_sector = on_disk_sector; 617*b411b363SPhilipp Reisner 618*b411b363SPhilipp Reisner if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE) 619*b411b363SPhilipp Reisner goto out_put_page; 620*b411b363SPhilipp Reisner 621*b411b363SPhilipp Reisner atomic_inc(&wc->count); 622*b411b363SPhilipp Reisner /* we already know that we may do this... 623*b411b363SPhilipp Reisner * get_ldev_if_state(mdev,D_ATTACHING); 624*b411b363SPhilipp Reisner * just get the extra reference, so that the local_cnt reflects 625*b411b363SPhilipp Reisner * the number of pending IO requests DRBD at its backing device. 626*b411b363SPhilipp Reisner */ 627*b411b363SPhilipp Reisner atomic_inc(&mdev->local_cnt); 628*b411b363SPhilipp Reisner 629*b411b363SPhilipp Reisner bios[i] = bio; 630*b411b363SPhilipp Reisner 631*b411b363SPhilipp Reisner return 0; 632*b411b363SPhilipp Reisner 633*b411b363SPhilipp Reisner out_put_page: 634*b411b363SPhilipp Reisner err = -EINVAL; 635*b411b363SPhilipp Reisner put_page(page); 636*b411b363SPhilipp Reisner out_bio_put: 637*b411b363SPhilipp Reisner bio_put(bio); 638*b411b363SPhilipp Reisner return err; 639*b411b363SPhilipp Reisner } 640*b411b363SPhilipp Reisner 641*b411b363SPhilipp Reisner /** 642*b411b363SPhilipp Reisner * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents 643*b411b363SPhilipp Reisner * @mdev: DRBD device. 644*b411b363SPhilipp Reisner * 645*b411b363SPhilipp Reisner * Called when we detach (unconfigure) local storage, 646*b411b363SPhilipp Reisner * or when we go from R_PRIMARY to R_SECONDARY role. 647*b411b363SPhilipp Reisner */ 648*b411b363SPhilipp Reisner void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) 649*b411b363SPhilipp Reisner { 650*b411b363SPhilipp Reisner int i, nr_elements; 651*b411b363SPhilipp Reisner unsigned int enr; 652*b411b363SPhilipp Reisner struct bio **bios; 653*b411b363SPhilipp Reisner struct drbd_atodb_wait wc; 654*b411b363SPhilipp Reisner 655*b411b363SPhilipp Reisner ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING)) 656*b411b363SPhilipp Reisner return; /* sorry, I don't have any act_log etc... */ 657*b411b363SPhilipp Reisner 658*b411b363SPhilipp Reisner wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); 659*b411b363SPhilipp Reisner 660*b411b363SPhilipp Reisner nr_elements = mdev->act_log->nr_elements; 661*b411b363SPhilipp Reisner 662*b411b363SPhilipp Reisner /* GFP_KERNEL, we are not in anyone's write-out path */ 663*b411b363SPhilipp Reisner bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL); 664*b411b363SPhilipp Reisner if (!bios) 665*b411b363SPhilipp Reisner goto submit_one_by_one; 666*b411b363SPhilipp Reisner 667*b411b363SPhilipp Reisner atomic_set(&wc.count, 0); 668*b411b363SPhilipp Reisner init_completion(&wc.io_done); 669*b411b363SPhilipp Reisner wc.mdev = mdev; 670*b411b363SPhilipp Reisner wc.error = 0; 671*b411b363SPhilipp Reisner 672*b411b363SPhilipp Reisner for (i = 0; i < nr_elements; i++) { 673*b411b363SPhilipp Reisner enr = lc_element_by_index(mdev->act_log, i)->lc_number; 674*b411b363SPhilipp Reisner if (enr == LC_FREE) 675*b411b363SPhilipp Reisner continue; 676*b411b363SPhilipp Reisner /* next statement also does atomic_inc wc.count and local_cnt */ 677*b411b363SPhilipp Reisner if (atodb_prepare_unless_covered(mdev, bios, 678*b411b363SPhilipp Reisner enr/AL_EXT_PER_BM_SECT, 679*b411b363SPhilipp Reisner &wc)) 680*b411b363SPhilipp Reisner goto free_bios_submit_one_by_one; 681*b411b363SPhilipp Reisner } 682*b411b363SPhilipp Reisner 683*b411b363SPhilipp Reisner /* unnecessary optimization? */ 684*b411b363SPhilipp Reisner lc_unlock(mdev->act_log); 685*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 686*b411b363SPhilipp Reisner 687*b411b363SPhilipp Reisner /* all prepared, submit them */ 688*b411b363SPhilipp Reisner for (i = 0; i < nr_elements; i++) { 689*b411b363SPhilipp Reisner if (bios[i] == NULL) 690*b411b363SPhilipp Reisner break; 691*b411b363SPhilipp Reisner if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) { 692*b411b363SPhilipp Reisner bios[i]->bi_rw = WRITE; 693*b411b363SPhilipp Reisner bio_endio(bios[i], -EIO); 694*b411b363SPhilipp Reisner } else { 695*b411b363SPhilipp Reisner submit_bio(WRITE, bios[i]); 696*b411b363SPhilipp Reisner } 697*b411b363SPhilipp Reisner } 698*b411b363SPhilipp Reisner 699*b411b363SPhilipp Reisner drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev)); 700*b411b363SPhilipp Reisner 701*b411b363SPhilipp Reisner /* always (try to) flush bitmap to stable storage */ 702*b411b363SPhilipp Reisner drbd_md_flush(mdev); 703*b411b363SPhilipp Reisner 704*b411b363SPhilipp Reisner /* In case we did not submit a single IO do not wait for 705*b411b363SPhilipp Reisner * them to complete. ( Because we would wait forever here. ) 706*b411b363SPhilipp Reisner * 707*b411b363SPhilipp Reisner * In case we had IOs and they are already complete, there 708*b411b363SPhilipp Reisner * is not point in waiting anyways. 709*b411b363SPhilipp Reisner * Therefore this if () ... */ 710*b411b363SPhilipp Reisner if (atomic_read(&wc.count)) 711*b411b363SPhilipp Reisner wait_for_completion(&wc.io_done); 712*b411b363SPhilipp Reisner 713*b411b363SPhilipp Reisner put_ldev(mdev); 714*b411b363SPhilipp Reisner 715*b411b363SPhilipp Reisner kfree(bios); 716*b411b363SPhilipp Reisner return; 717*b411b363SPhilipp Reisner 718*b411b363SPhilipp Reisner free_bios_submit_one_by_one: 719*b411b363SPhilipp Reisner /* free everything by calling the endio callback directly. */ 720*b411b363SPhilipp Reisner for (i = 0; i < nr_elements && bios[i]; i++) 721*b411b363SPhilipp Reisner bio_endio(bios[i], 0); 722*b411b363SPhilipp Reisner 723*b411b363SPhilipp Reisner kfree(bios); 724*b411b363SPhilipp Reisner 725*b411b363SPhilipp Reisner submit_one_by_one: 726*b411b363SPhilipp Reisner dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n"); 727*b411b363SPhilipp Reisner 728*b411b363SPhilipp Reisner for (i = 0; i < mdev->act_log->nr_elements; i++) { 729*b411b363SPhilipp Reisner enr = lc_element_by_index(mdev->act_log, i)->lc_number; 730*b411b363SPhilipp Reisner if (enr == LC_FREE) 731*b411b363SPhilipp Reisner continue; 732*b411b363SPhilipp Reisner /* Really slow: if we have al-extents 16..19 active, 733*b411b363SPhilipp Reisner * sector 4 will be written four times! Synchronous! */ 734*b411b363SPhilipp Reisner drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT); 735*b411b363SPhilipp Reisner } 736*b411b363SPhilipp Reisner 737*b411b363SPhilipp Reisner lc_unlock(mdev->act_log); 738*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 739*b411b363SPhilipp Reisner put_ldev(mdev); 740*b411b363SPhilipp Reisner } 741*b411b363SPhilipp Reisner 742*b411b363SPhilipp Reisner /** 743*b411b363SPhilipp Reisner * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents 744*b411b363SPhilipp Reisner * @mdev: DRBD device. 745*b411b363SPhilipp Reisner */ 746*b411b363SPhilipp Reisner void drbd_al_apply_to_bm(struct drbd_conf *mdev) 747*b411b363SPhilipp Reisner { 748*b411b363SPhilipp Reisner unsigned int enr; 749*b411b363SPhilipp Reisner unsigned long add = 0; 750*b411b363SPhilipp Reisner char ppb[10]; 751*b411b363SPhilipp Reisner int i; 752*b411b363SPhilipp Reisner 753*b411b363SPhilipp Reisner wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); 754*b411b363SPhilipp Reisner 755*b411b363SPhilipp Reisner for (i = 0; i < mdev->act_log->nr_elements; i++) { 756*b411b363SPhilipp Reisner enr = lc_element_by_index(mdev->act_log, i)->lc_number; 757*b411b363SPhilipp Reisner if (enr == LC_FREE) 758*b411b363SPhilipp Reisner continue; 759*b411b363SPhilipp Reisner add += drbd_bm_ALe_set_all(mdev, enr); 760*b411b363SPhilipp Reisner } 761*b411b363SPhilipp Reisner 762*b411b363SPhilipp Reisner lc_unlock(mdev->act_log); 763*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 764*b411b363SPhilipp Reisner 765*b411b363SPhilipp Reisner dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n", 766*b411b363SPhilipp Reisner ppsize(ppb, Bit2KB(add))); 767*b411b363SPhilipp Reisner } 768*b411b363SPhilipp Reisner 769*b411b363SPhilipp Reisner static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) 770*b411b363SPhilipp Reisner { 771*b411b363SPhilipp Reisner int rv; 772*b411b363SPhilipp Reisner 773*b411b363SPhilipp Reisner spin_lock_irq(&mdev->al_lock); 774*b411b363SPhilipp Reisner rv = (al_ext->refcnt == 0); 775*b411b363SPhilipp Reisner if (likely(rv)) 776*b411b363SPhilipp Reisner lc_del(mdev->act_log, al_ext); 777*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 778*b411b363SPhilipp Reisner 779*b411b363SPhilipp Reisner return rv; 780*b411b363SPhilipp Reisner } 781*b411b363SPhilipp Reisner 782*b411b363SPhilipp Reisner /** 783*b411b363SPhilipp Reisner * drbd_al_shrink() - Removes all active extents form the activity log 784*b411b363SPhilipp Reisner * @mdev: DRBD device. 785*b411b363SPhilipp Reisner * 786*b411b363SPhilipp Reisner * Removes all active extents form the activity log, waiting until 787*b411b363SPhilipp Reisner * the reference count of each entry dropped to 0 first, of course. 788*b411b363SPhilipp Reisner * 789*b411b363SPhilipp Reisner * You need to lock mdev->act_log with lc_try_lock() / lc_unlock() 790*b411b363SPhilipp Reisner */ 791*b411b363SPhilipp Reisner void drbd_al_shrink(struct drbd_conf *mdev) 792*b411b363SPhilipp Reisner { 793*b411b363SPhilipp Reisner struct lc_element *al_ext; 794*b411b363SPhilipp Reisner int i; 795*b411b363SPhilipp Reisner 796*b411b363SPhilipp Reisner D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); 797*b411b363SPhilipp Reisner 798*b411b363SPhilipp Reisner for (i = 0; i < mdev->act_log->nr_elements; i++) { 799*b411b363SPhilipp Reisner al_ext = lc_element_by_index(mdev->act_log, i); 800*b411b363SPhilipp Reisner if (al_ext->lc_number == LC_FREE) 801*b411b363SPhilipp Reisner continue; 802*b411b363SPhilipp Reisner wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext)); 803*b411b363SPhilipp Reisner } 804*b411b363SPhilipp Reisner 805*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 806*b411b363SPhilipp Reisner } 807*b411b363SPhilipp Reisner 808*b411b363SPhilipp Reisner static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused) 809*b411b363SPhilipp Reisner { 810*b411b363SPhilipp Reisner struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); 811*b411b363SPhilipp Reisner 812*b411b363SPhilipp Reisner if (!get_ldev(mdev)) { 813*b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 814*b411b363SPhilipp Reisner dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n"); 815*b411b363SPhilipp Reisner kfree(udw); 816*b411b363SPhilipp Reisner return 1; 817*b411b363SPhilipp Reisner } 818*b411b363SPhilipp Reisner 819*b411b363SPhilipp Reisner drbd_bm_write_sect(mdev, udw->enr); 820*b411b363SPhilipp Reisner put_ldev(mdev); 821*b411b363SPhilipp Reisner 822*b411b363SPhilipp Reisner kfree(udw); 823*b411b363SPhilipp Reisner 824*b411b363SPhilipp Reisner if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) { 825*b411b363SPhilipp Reisner switch (mdev->state.conn) { 826*b411b363SPhilipp Reisner case C_SYNC_SOURCE: case C_SYNC_TARGET: 827*b411b363SPhilipp Reisner case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T: 828*b411b363SPhilipp Reisner drbd_resync_finished(mdev); 829*b411b363SPhilipp Reisner default: 830*b411b363SPhilipp Reisner /* nothing to do */ 831*b411b363SPhilipp Reisner break; 832*b411b363SPhilipp Reisner } 833*b411b363SPhilipp Reisner } 834*b411b363SPhilipp Reisner drbd_bcast_sync_progress(mdev); 835*b411b363SPhilipp Reisner 836*b411b363SPhilipp Reisner return 1; 837*b411b363SPhilipp Reisner } 838*b411b363SPhilipp Reisner 839*b411b363SPhilipp Reisner 840*b411b363SPhilipp Reisner /* ATTENTION. The AL's extents are 4MB each, while the extents in the 841*b411b363SPhilipp Reisner * resync LRU-cache are 16MB each. 842*b411b363SPhilipp Reisner * The caller of this function has to hold an get_ldev() reference. 843*b411b363SPhilipp Reisner * 844*b411b363SPhilipp Reisner * TODO will be obsoleted once we have a caching lru of the on disk bitmap 845*b411b363SPhilipp Reisner */ 846*b411b363SPhilipp Reisner static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, 847*b411b363SPhilipp Reisner int count, int success) 848*b411b363SPhilipp Reisner { 849*b411b363SPhilipp Reisner struct lc_element *e; 850*b411b363SPhilipp Reisner struct update_odbm_work *udw; 851*b411b363SPhilipp Reisner 852*b411b363SPhilipp Reisner unsigned int enr; 853*b411b363SPhilipp Reisner 854*b411b363SPhilipp Reisner D_ASSERT(atomic_read(&mdev->local_cnt)); 855*b411b363SPhilipp Reisner 856*b411b363SPhilipp Reisner /* I simply assume that a sector/size pair never crosses 857*b411b363SPhilipp Reisner * a 16 MB extent border. (Currently this is true...) */ 858*b411b363SPhilipp Reisner enr = BM_SECT_TO_EXT(sector); 859*b411b363SPhilipp Reisner 860*b411b363SPhilipp Reisner e = lc_get(mdev->resync, enr); 861*b411b363SPhilipp Reisner if (e) { 862*b411b363SPhilipp Reisner struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); 863*b411b363SPhilipp Reisner if (ext->lce.lc_number == enr) { 864*b411b363SPhilipp Reisner if (success) 865*b411b363SPhilipp Reisner ext->rs_left -= count; 866*b411b363SPhilipp Reisner else 867*b411b363SPhilipp Reisner ext->rs_failed += count; 868*b411b363SPhilipp Reisner if (ext->rs_left < ext->rs_failed) { 869*b411b363SPhilipp Reisner dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d " 870*b411b363SPhilipp Reisner "rs_failed=%d count=%d\n", 871*b411b363SPhilipp Reisner (unsigned long long)sector, 872*b411b363SPhilipp Reisner ext->lce.lc_number, ext->rs_left, 873*b411b363SPhilipp Reisner ext->rs_failed, count); 874*b411b363SPhilipp Reisner dump_stack(); 875*b411b363SPhilipp Reisner 876*b411b363SPhilipp Reisner lc_put(mdev->resync, &ext->lce); 877*b411b363SPhilipp Reisner drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 878*b411b363SPhilipp Reisner return; 879*b411b363SPhilipp Reisner } 880*b411b363SPhilipp Reisner } else { 881*b411b363SPhilipp Reisner /* Normally this element should be in the cache, 882*b411b363SPhilipp Reisner * since drbd_rs_begin_io() pulled it already in. 883*b411b363SPhilipp Reisner * 884*b411b363SPhilipp Reisner * But maybe an application write finished, and we set 885*b411b363SPhilipp Reisner * something outside the resync lru_cache in sync. 886*b411b363SPhilipp Reisner */ 887*b411b363SPhilipp Reisner int rs_left = drbd_bm_e_weight(mdev, enr); 888*b411b363SPhilipp Reisner if (ext->flags != 0) { 889*b411b363SPhilipp Reisner dev_warn(DEV, "changing resync lce: %d[%u;%02lx]" 890*b411b363SPhilipp Reisner " -> %d[%u;00]\n", 891*b411b363SPhilipp Reisner ext->lce.lc_number, ext->rs_left, 892*b411b363SPhilipp Reisner ext->flags, enr, rs_left); 893*b411b363SPhilipp Reisner ext->flags = 0; 894*b411b363SPhilipp Reisner } 895*b411b363SPhilipp Reisner if (ext->rs_failed) { 896*b411b363SPhilipp Reisner dev_warn(DEV, "Kicking resync_lru element enr=%u " 897*b411b363SPhilipp Reisner "out with rs_failed=%d\n", 898*b411b363SPhilipp Reisner ext->lce.lc_number, ext->rs_failed); 899*b411b363SPhilipp Reisner set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); 900*b411b363SPhilipp Reisner } 901*b411b363SPhilipp Reisner ext->rs_left = rs_left; 902*b411b363SPhilipp Reisner ext->rs_failed = success ? 0 : count; 903*b411b363SPhilipp Reisner lc_changed(mdev->resync, &ext->lce); 904*b411b363SPhilipp Reisner } 905*b411b363SPhilipp Reisner lc_put(mdev->resync, &ext->lce); 906*b411b363SPhilipp Reisner /* no race, we are within the al_lock! */ 907*b411b363SPhilipp Reisner 908*b411b363SPhilipp Reisner if (ext->rs_left == ext->rs_failed) { 909*b411b363SPhilipp Reisner ext->rs_failed = 0; 910*b411b363SPhilipp Reisner 911*b411b363SPhilipp Reisner udw = kmalloc(sizeof(*udw), GFP_ATOMIC); 912*b411b363SPhilipp Reisner if (udw) { 913*b411b363SPhilipp Reisner udw->enr = ext->lce.lc_number; 914*b411b363SPhilipp Reisner udw->w.cb = w_update_odbm; 915*b411b363SPhilipp Reisner drbd_queue_work_front(&mdev->data.work, &udw->w); 916*b411b363SPhilipp Reisner } else { 917*b411b363SPhilipp Reisner dev_warn(DEV, "Could not kmalloc an udw\n"); 918*b411b363SPhilipp Reisner set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); 919*b411b363SPhilipp Reisner } 920*b411b363SPhilipp Reisner } 921*b411b363SPhilipp Reisner } else { 922*b411b363SPhilipp Reisner dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n", 923*b411b363SPhilipp Reisner mdev->resync_locked, 924*b411b363SPhilipp Reisner mdev->resync->nr_elements, 925*b411b363SPhilipp Reisner mdev->resync->flags); 926*b411b363SPhilipp Reisner } 927*b411b363SPhilipp Reisner } 928*b411b363SPhilipp Reisner 929*b411b363SPhilipp Reisner /* clear the bit corresponding to the piece of storage in question: 930*b411b363SPhilipp Reisner * size byte of data starting from sector. Only clear a bits of the affected 931*b411b363SPhilipp Reisner * one ore more _aligned_ BM_BLOCK_SIZE blocks. 932*b411b363SPhilipp Reisner * 933*b411b363SPhilipp Reisner * called by worker on C_SYNC_TARGET and receiver on SyncSource. 934*b411b363SPhilipp Reisner * 935*b411b363SPhilipp Reisner */ 936*b411b363SPhilipp Reisner void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, 937*b411b363SPhilipp Reisner const char *file, const unsigned int line) 938*b411b363SPhilipp Reisner { 939*b411b363SPhilipp Reisner /* Is called from worker and receiver context _only_ */ 940*b411b363SPhilipp Reisner unsigned long sbnr, ebnr, lbnr; 941*b411b363SPhilipp Reisner unsigned long count = 0; 942*b411b363SPhilipp Reisner sector_t esector, nr_sectors; 943*b411b363SPhilipp Reisner int wake_up = 0; 944*b411b363SPhilipp Reisner unsigned long flags; 945*b411b363SPhilipp Reisner 946*b411b363SPhilipp Reisner if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { 947*b411b363SPhilipp Reisner dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", 948*b411b363SPhilipp Reisner (unsigned long long)sector, size); 949*b411b363SPhilipp Reisner return; 950*b411b363SPhilipp Reisner } 951*b411b363SPhilipp Reisner nr_sectors = drbd_get_capacity(mdev->this_bdev); 952*b411b363SPhilipp Reisner esector = sector + (size >> 9) - 1; 953*b411b363SPhilipp Reisner 954*b411b363SPhilipp Reisner ERR_IF(sector >= nr_sectors) return; 955*b411b363SPhilipp Reisner ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); 956*b411b363SPhilipp Reisner 957*b411b363SPhilipp Reisner lbnr = BM_SECT_TO_BIT(nr_sectors-1); 958*b411b363SPhilipp Reisner 959*b411b363SPhilipp Reisner /* we clear it (in sync). 960*b411b363SPhilipp Reisner * round up start sector, round down end sector. we make sure we only 961*b411b363SPhilipp Reisner * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */ 962*b411b363SPhilipp Reisner if (unlikely(esector < BM_SECT_PER_BIT-1)) 963*b411b363SPhilipp Reisner return; 964*b411b363SPhilipp Reisner if (unlikely(esector == (nr_sectors-1))) 965*b411b363SPhilipp Reisner ebnr = lbnr; 966*b411b363SPhilipp Reisner else 967*b411b363SPhilipp Reisner ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 968*b411b363SPhilipp Reisner sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 969*b411b363SPhilipp Reisner 970*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_METRICS, 971*b411b363SPhilipp Reisner "drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", 972*b411b363SPhilipp Reisner (unsigned long long)sector, size, sbnr, ebnr); 973*b411b363SPhilipp Reisner 974*b411b363SPhilipp Reisner if (sbnr > ebnr) 975*b411b363SPhilipp Reisner return; 976*b411b363SPhilipp Reisner 977*b411b363SPhilipp Reisner /* 978*b411b363SPhilipp Reisner * ok, (capacity & 7) != 0 sometimes, but who cares... 979*b411b363SPhilipp Reisner * we count rs_{total,left} in bits, not sectors. 980*b411b363SPhilipp Reisner */ 981*b411b363SPhilipp Reisner spin_lock_irqsave(&mdev->al_lock, flags); 982*b411b363SPhilipp Reisner count = drbd_bm_clear_bits(mdev, sbnr, ebnr); 983*b411b363SPhilipp Reisner if (count) { 984*b411b363SPhilipp Reisner /* we need the lock for drbd_try_clear_on_disk_bm */ 985*b411b363SPhilipp Reisner if (jiffies - mdev->rs_mark_time > HZ*10) { 986*b411b363SPhilipp Reisner /* should be rolling marks, 987*b411b363SPhilipp Reisner * but we estimate only anyways. */ 988*b411b363SPhilipp Reisner if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) && 989*b411b363SPhilipp Reisner mdev->state.conn != C_PAUSED_SYNC_T && 990*b411b363SPhilipp Reisner mdev->state.conn != C_PAUSED_SYNC_S) { 991*b411b363SPhilipp Reisner mdev->rs_mark_time = jiffies; 992*b411b363SPhilipp Reisner mdev->rs_mark_left = drbd_bm_total_weight(mdev); 993*b411b363SPhilipp Reisner } 994*b411b363SPhilipp Reisner } 995*b411b363SPhilipp Reisner if (get_ldev(mdev)) { 996*b411b363SPhilipp Reisner drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE); 997*b411b363SPhilipp Reisner put_ldev(mdev); 998*b411b363SPhilipp Reisner } 999*b411b363SPhilipp Reisner /* just wake_up unconditional now, various lc_chaged(), 1000*b411b363SPhilipp Reisner * lc_put() in drbd_try_clear_on_disk_bm(). */ 1001*b411b363SPhilipp Reisner wake_up = 1; 1002*b411b363SPhilipp Reisner } 1003*b411b363SPhilipp Reisner spin_unlock_irqrestore(&mdev->al_lock, flags); 1004*b411b363SPhilipp Reisner if (wake_up) 1005*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 1006*b411b363SPhilipp Reisner } 1007*b411b363SPhilipp Reisner 1008*b411b363SPhilipp Reisner /* 1009*b411b363SPhilipp Reisner * this is intended to set one request worth of data out of sync. 1010*b411b363SPhilipp Reisner * affects at least 1 bit, 1011*b411b363SPhilipp Reisner * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits. 1012*b411b363SPhilipp Reisner * 1013*b411b363SPhilipp Reisner * called by tl_clear and drbd_send_dblock (==drbd_make_request). 1014*b411b363SPhilipp Reisner * so this can be _any_ process. 1015*b411b363SPhilipp Reisner */ 1016*b411b363SPhilipp Reisner void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, 1017*b411b363SPhilipp Reisner const char *file, const unsigned int line) 1018*b411b363SPhilipp Reisner { 1019*b411b363SPhilipp Reisner unsigned long sbnr, ebnr, lbnr, flags; 1020*b411b363SPhilipp Reisner sector_t esector, nr_sectors; 1021*b411b363SPhilipp Reisner unsigned int enr, count; 1022*b411b363SPhilipp Reisner struct lc_element *e; 1023*b411b363SPhilipp Reisner 1024*b411b363SPhilipp Reisner if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { 1025*b411b363SPhilipp Reisner dev_err(DEV, "sector: %llus, size: %d\n", 1026*b411b363SPhilipp Reisner (unsigned long long)sector, size); 1027*b411b363SPhilipp Reisner return; 1028*b411b363SPhilipp Reisner } 1029*b411b363SPhilipp Reisner 1030*b411b363SPhilipp Reisner if (!get_ldev(mdev)) 1031*b411b363SPhilipp Reisner return; /* no disk, no metadata, no bitmap to set bits in */ 1032*b411b363SPhilipp Reisner 1033*b411b363SPhilipp Reisner nr_sectors = drbd_get_capacity(mdev->this_bdev); 1034*b411b363SPhilipp Reisner esector = sector + (size >> 9) - 1; 1035*b411b363SPhilipp Reisner 1036*b411b363SPhilipp Reisner ERR_IF(sector >= nr_sectors) 1037*b411b363SPhilipp Reisner goto out; 1038*b411b363SPhilipp Reisner ERR_IF(esector >= nr_sectors) 1039*b411b363SPhilipp Reisner esector = (nr_sectors-1); 1040*b411b363SPhilipp Reisner 1041*b411b363SPhilipp Reisner lbnr = BM_SECT_TO_BIT(nr_sectors-1); 1042*b411b363SPhilipp Reisner 1043*b411b363SPhilipp Reisner /* we set it out of sync, 1044*b411b363SPhilipp Reisner * we do not need to round anything here */ 1045*b411b363SPhilipp Reisner sbnr = BM_SECT_TO_BIT(sector); 1046*b411b363SPhilipp Reisner ebnr = BM_SECT_TO_BIT(esector); 1047*b411b363SPhilipp Reisner 1048*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_METRICS, 1049*b411b363SPhilipp Reisner "drbd_set_out_of_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", 1050*b411b363SPhilipp Reisner (unsigned long long)sector, size, sbnr, ebnr); 1051*b411b363SPhilipp Reisner 1052*b411b363SPhilipp Reisner /* ok, (capacity & 7) != 0 sometimes, but who cares... 1053*b411b363SPhilipp Reisner * we count rs_{total,left} in bits, not sectors. */ 1054*b411b363SPhilipp Reisner spin_lock_irqsave(&mdev->al_lock, flags); 1055*b411b363SPhilipp Reisner count = drbd_bm_set_bits(mdev, sbnr, ebnr); 1056*b411b363SPhilipp Reisner 1057*b411b363SPhilipp Reisner enr = BM_SECT_TO_EXT(sector); 1058*b411b363SPhilipp Reisner e = lc_find(mdev->resync, enr); 1059*b411b363SPhilipp Reisner if (e) 1060*b411b363SPhilipp Reisner lc_entry(e, struct bm_extent, lce)->rs_left += count; 1061*b411b363SPhilipp Reisner spin_unlock_irqrestore(&mdev->al_lock, flags); 1062*b411b363SPhilipp Reisner 1063*b411b363SPhilipp Reisner out: 1064*b411b363SPhilipp Reisner put_ldev(mdev); 1065*b411b363SPhilipp Reisner } 1066*b411b363SPhilipp Reisner 1067*b411b363SPhilipp Reisner static 1068*b411b363SPhilipp Reisner struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) 1069*b411b363SPhilipp Reisner { 1070*b411b363SPhilipp Reisner struct lc_element *e; 1071*b411b363SPhilipp Reisner struct bm_extent *bm_ext; 1072*b411b363SPhilipp Reisner int wakeup = 0; 1073*b411b363SPhilipp Reisner unsigned long rs_flags; 1074*b411b363SPhilipp Reisner 1075*b411b363SPhilipp Reisner spin_lock_irq(&mdev->al_lock); 1076*b411b363SPhilipp Reisner if (mdev->resync_locked > mdev->resync->nr_elements/2) { 1077*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 1078*b411b363SPhilipp Reisner return NULL; 1079*b411b363SPhilipp Reisner } 1080*b411b363SPhilipp Reisner e = lc_get(mdev->resync, enr); 1081*b411b363SPhilipp Reisner bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1082*b411b363SPhilipp Reisner if (bm_ext) { 1083*b411b363SPhilipp Reisner if (bm_ext->lce.lc_number != enr) { 1084*b411b363SPhilipp Reisner bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); 1085*b411b363SPhilipp Reisner bm_ext->rs_failed = 0; 1086*b411b363SPhilipp Reisner lc_changed(mdev->resync, &bm_ext->lce); 1087*b411b363SPhilipp Reisner wakeup = 1; 1088*b411b363SPhilipp Reisner } 1089*b411b363SPhilipp Reisner if (bm_ext->lce.refcnt == 1) 1090*b411b363SPhilipp Reisner mdev->resync_locked++; 1091*b411b363SPhilipp Reisner set_bit(BME_NO_WRITES, &bm_ext->flags); 1092*b411b363SPhilipp Reisner } 1093*b411b363SPhilipp Reisner rs_flags = mdev->resync->flags; 1094*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 1095*b411b363SPhilipp Reisner if (wakeup) 1096*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 1097*b411b363SPhilipp Reisner 1098*b411b363SPhilipp Reisner if (!bm_ext) { 1099*b411b363SPhilipp Reisner if (rs_flags & LC_STARVING) 1100*b411b363SPhilipp Reisner dev_warn(DEV, "Have to wait for element" 1101*b411b363SPhilipp Reisner " (resync LRU too small?)\n"); 1102*b411b363SPhilipp Reisner BUG_ON(rs_flags & LC_DIRTY); 1103*b411b363SPhilipp Reisner } 1104*b411b363SPhilipp Reisner 1105*b411b363SPhilipp Reisner return bm_ext; 1106*b411b363SPhilipp Reisner } 1107*b411b363SPhilipp Reisner 1108*b411b363SPhilipp Reisner static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) 1109*b411b363SPhilipp Reisner { 1110*b411b363SPhilipp Reisner struct lc_element *al_ext; 1111*b411b363SPhilipp Reisner int rv = 0; 1112*b411b363SPhilipp Reisner 1113*b411b363SPhilipp Reisner spin_lock_irq(&mdev->al_lock); 1114*b411b363SPhilipp Reisner if (unlikely(enr == mdev->act_log->new_number)) 1115*b411b363SPhilipp Reisner rv = 1; 1116*b411b363SPhilipp Reisner else { 1117*b411b363SPhilipp Reisner al_ext = lc_find(mdev->act_log, enr); 1118*b411b363SPhilipp Reisner if (al_ext) { 1119*b411b363SPhilipp Reisner if (al_ext->refcnt) 1120*b411b363SPhilipp Reisner rv = 1; 1121*b411b363SPhilipp Reisner } 1122*b411b363SPhilipp Reisner } 1123*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 1124*b411b363SPhilipp Reisner 1125*b411b363SPhilipp Reisner /* 1126*b411b363SPhilipp Reisner if (unlikely(rv)) { 1127*b411b363SPhilipp Reisner dev_info(DEV, "Delaying sync read until app's write is done\n"); 1128*b411b363SPhilipp Reisner } 1129*b411b363SPhilipp Reisner */ 1130*b411b363SPhilipp Reisner return rv; 1131*b411b363SPhilipp Reisner } 1132*b411b363SPhilipp Reisner 1133*b411b363SPhilipp Reisner /** 1134*b411b363SPhilipp Reisner * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED 1135*b411b363SPhilipp Reisner * @mdev: DRBD device. 1136*b411b363SPhilipp Reisner * @sector: The sector number. 1137*b411b363SPhilipp Reisner * 1138*b411b363SPhilipp Reisner * This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted. 1139*b411b363SPhilipp Reisner */ 1140*b411b363SPhilipp Reisner int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) 1141*b411b363SPhilipp Reisner { 1142*b411b363SPhilipp Reisner unsigned int enr = BM_SECT_TO_EXT(sector); 1143*b411b363SPhilipp Reisner struct bm_extent *bm_ext; 1144*b411b363SPhilipp Reisner int i, sig; 1145*b411b363SPhilipp Reisner 1146*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_ALL, 1147*b411b363SPhilipp Reisner "drbd_rs_begin_io: sector=%llus (rs_end=%d)\n", 1148*b411b363SPhilipp Reisner (unsigned long long)sector, enr); 1149*b411b363SPhilipp Reisner 1150*b411b363SPhilipp Reisner sig = wait_event_interruptible(mdev->al_wait, 1151*b411b363SPhilipp Reisner (bm_ext = _bme_get(mdev, enr))); 1152*b411b363SPhilipp Reisner if (sig) 1153*b411b363SPhilipp Reisner return 0; 1154*b411b363SPhilipp Reisner 1155*b411b363SPhilipp Reisner if (test_bit(BME_LOCKED, &bm_ext->flags)) 1156*b411b363SPhilipp Reisner return 1; 1157*b411b363SPhilipp Reisner 1158*b411b363SPhilipp Reisner for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1159*b411b363SPhilipp Reisner sig = wait_event_interruptible(mdev->al_wait, 1160*b411b363SPhilipp Reisner !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i)); 1161*b411b363SPhilipp Reisner if (sig) { 1162*b411b363SPhilipp Reisner spin_lock_irq(&mdev->al_lock); 1163*b411b363SPhilipp Reisner if (lc_put(mdev->resync, &bm_ext->lce) == 0) { 1164*b411b363SPhilipp Reisner clear_bit(BME_NO_WRITES, &bm_ext->flags); 1165*b411b363SPhilipp Reisner mdev->resync_locked--; 1166*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 1167*b411b363SPhilipp Reisner } 1168*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 1169*b411b363SPhilipp Reisner return 0; 1170*b411b363SPhilipp Reisner } 1171*b411b363SPhilipp Reisner } 1172*b411b363SPhilipp Reisner 1173*b411b363SPhilipp Reisner set_bit(BME_LOCKED, &bm_ext->flags); 1174*b411b363SPhilipp Reisner 1175*b411b363SPhilipp Reisner return 1; 1176*b411b363SPhilipp Reisner } 1177*b411b363SPhilipp Reisner 1178*b411b363SPhilipp Reisner /** 1179*b411b363SPhilipp Reisner * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep 1180*b411b363SPhilipp Reisner * @mdev: DRBD device. 1181*b411b363SPhilipp Reisner * @sector: The sector number. 1182*b411b363SPhilipp Reisner * 1183*b411b363SPhilipp Reisner * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then 1184*b411b363SPhilipp Reisner * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN 1185*b411b363SPhilipp Reisner * if there is still application IO going on in this area. 1186*b411b363SPhilipp Reisner */ 1187*b411b363SPhilipp Reisner int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) 1188*b411b363SPhilipp Reisner { 1189*b411b363SPhilipp Reisner unsigned int enr = BM_SECT_TO_EXT(sector); 1190*b411b363SPhilipp Reisner const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; 1191*b411b363SPhilipp Reisner struct lc_element *e; 1192*b411b363SPhilipp Reisner struct bm_extent *bm_ext; 1193*b411b363SPhilipp Reisner int i; 1194*b411b363SPhilipp Reisner 1195*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_ALL, "drbd_try_rs_begin_io: sector=%llus\n", 1196*b411b363SPhilipp Reisner (unsigned long long)sector); 1197*b411b363SPhilipp Reisner 1198*b411b363SPhilipp Reisner spin_lock_irq(&mdev->al_lock); 1199*b411b363SPhilipp Reisner if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) { 1200*b411b363SPhilipp Reisner /* in case you have very heavy scattered io, it may 1201*b411b363SPhilipp Reisner * stall the syncer undefined if we give up the ref count 1202*b411b363SPhilipp Reisner * when we try again and requeue. 1203*b411b363SPhilipp Reisner * 1204*b411b363SPhilipp Reisner * if we don't give up the refcount, but the next time 1205*b411b363SPhilipp Reisner * we are scheduled this extent has been "synced" by new 1206*b411b363SPhilipp Reisner * application writes, we'd miss the lc_put on the 1207*b411b363SPhilipp Reisner * extent we keep the refcount on. 1208*b411b363SPhilipp Reisner * so we remembered which extent we had to try again, and 1209*b411b363SPhilipp Reisner * if the next requested one is something else, we do 1210*b411b363SPhilipp Reisner * the lc_put here... 1211*b411b363SPhilipp Reisner * we also have to wake_up 1212*b411b363SPhilipp Reisner */ 1213*b411b363SPhilipp Reisner 1214*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_ALL, 1215*b411b363SPhilipp Reisner "dropping %u, apparently got 'synced' by application io\n", 1216*b411b363SPhilipp Reisner mdev->resync_wenr); 1217*b411b363SPhilipp Reisner 1218*b411b363SPhilipp Reisner e = lc_find(mdev->resync, mdev->resync_wenr); 1219*b411b363SPhilipp Reisner bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1220*b411b363SPhilipp Reisner if (bm_ext) { 1221*b411b363SPhilipp Reisner D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); 1222*b411b363SPhilipp Reisner D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags)); 1223*b411b363SPhilipp Reisner clear_bit(BME_NO_WRITES, &bm_ext->flags); 1224*b411b363SPhilipp Reisner mdev->resync_wenr = LC_FREE; 1225*b411b363SPhilipp Reisner if (lc_put(mdev->resync, &bm_ext->lce) == 0) 1226*b411b363SPhilipp Reisner mdev->resync_locked--; 1227*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 1228*b411b363SPhilipp Reisner } else { 1229*b411b363SPhilipp Reisner dev_alert(DEV, "LOGIC BUG\n"); 1230*b411b363SPhilipp Reisner } 1231*b411b363SPhilipp Reisner } 1232*b411b363SPhilipp Reisner /* TRY. */ 1233*b411b363SPhilipp Reisner e = lc_try_get(mdev->resync, enr); 1234*b411b363SPhilipp Reisner bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1235*b411b363SPhilipp Reisner if (bm_ext) { 1236*b411b363SPhilipp Reisner if (test_bit(BME_LOCKED, &bm_ext->flags)) 1237*b411b363SPhilipp Reisner goto proceed; 1238*b411b363SPhilipp Reisner if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) { 1239*b411b363SPhilipp Reisner mdev->resync_locked++; 1240*b411b363SPhilipp Reisner } else { 1241*b411b363SPhilipp Reisner /* we did set the BME_NO_WRITES, 1242*b411b363SPhilipp Reisner * but then could not set BME_LOCKED, 1243*b411b363SPhilipp Reisner * so we tried again. 1244*b411b363SPhilipp Reisner * drop the extra reference. */ 1245*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_ALL, 1246*b411b363SPhilipp Reisner "dropping extra reference on %u\n", enr); 1247*b411b363SPhilipp Reisner 1248*b411b363SPhilipp Reisner bm_ext->lce.refcnt--; 1249*b411b363SPhilipp Reisner D_ASSERT(bm_ext->lce.refcnt > 0); 1250*b411b363SPhilipp Reisner } 1251*b411b363SPhilipp Reisner goto check_al; 1252*b411b363SPhilipp Reisner } else { 1253*b411b363SPhilipp Reisner /* do we rather want to try later? */ 1254*b411b363SPhilipp Reisner if (mdev->resync_locked > mdev->resync->nr_elements-3) { 1255*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_ALL, 1256*b411b363SPhilipp Reisner "resync_locked = %u!\n", mdev->resync_locked); 1257*b411b363SPhilipp Reisner 1258*b411b363SPhilipp Reisner goto try_again; 1259*b411b363SPhilipp Reisner } 1260*b411b363SPhilipp Reisner /* Do or do not. There is no try. -- Yoda */ 1261*b411b363SPhilipp Reisner e = lc_get(mdev->resync, enr); 1262*b411b363SPhilipp Reisner bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1263*b411b363SPhilipp Reisner if (!bm_ext) { 1264*b411b363SPhilipp Reisner const unsigned long rs_flags = mdev->resync->flags; 1265*b411b363SPhilipp Reisner if (rs_flags & LC_STARVING) 1266*b411b363SPhilipp Reisner dev_warn(DEV, "Have to wait for element" 1267*b411b363SPhilipp Reisner " (resync LRU too small?)\n"); 1268*b411b363SPhilipp Reisner BUG_ON(rs_flags & LC_DIRTY); 1269*b411b363SPhilipp Reisner goto try_again; 1270*b411b363SPhilipp Reisner } 1271*b411b363SPhilipp Reisner if (bm_ext->lce.lc_number != enr) { 1272*b411b363SPhilipp Reisner bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); 1273*b411b363SPhilipp Reisner bm_ext->rs_failed = 0; 1274*b411b363SPhilipp Reisner lc_changed(mdev->resync, &bm_ext->lce); 1275*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 1276*b411b363SPhilipp Reisner D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); 1277*b411b363SPhilipp Reisner } 1278*b411b363SPhilipp Reisner set_bit(BME_NO_WRITES, &bm_ext->flags); 1279*b411b363SPhilipp Reisner D_ASSERT(bm_ext->lce.refcnt == 1); 1280*b411b363SPhilipp Reisner mdev->resync_locked++; 1281*b411b363SPhilipp Reisner goto check_al; 1282*b411b363SPhilipp Reisner } 1283*b411b363SPhilipp Reisner check_al: 1284*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_ALL, "checking al for %u\n", enr); 1285*b411b363SPhilipp Reisner 1286*b411b363SPhilipp Reisner for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1287*b411b363SPhilipp Reisner if (unlikely(al_enr+i == mdev->act_log->new_number)) 1288*b411b363SPhilipp Reisner goto try_again; 1289*b411b363SPhilipp Reisner if (lc_is_used(mdev->act_log, al_enr+i)) 1290*b411b363SPhilipp Reisner goto try_again; 1291*b411b363SPhilipp Reisner } 1292*b411b363SPhilipp Reisner set_bit(BME_LOCKED, &bm_ext->flags); 1293*b411b363SPhilipp Reisner proceed: 1294*b411b363SPhilipp Reisner mdev->resync_wenr = LC_FREE; 1295*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 1296*b411b363SPhilipp Reisner return 0; 1297*b411b363SPhilipp Reisner 1298*b411b363SPhilipp Reisner try_again: 1299*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_ALL, "need to try again for %u\n", enr); 1300*b411b363SPhilipp Reisner if (bm_ext) 1301*b411b363SPhilipp Reisner mdev->resync_wenr = enr; 1302*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 1303*b411b363SPhilipp Reisner return -EAGAIN; 1304*b411b363SPhilipp Reisner } 1305*b411b363SPhilipp Reisner 1306*b411b363SPhilipp Reisner void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) 1307*b411b363SPhilipp Reisner { 1308*b411b363SPhilipp Reisner unsigned int enr = BM_SECT_TO_EXT(sector); 1309*b411b363SPhilipp Reisner struct lc_element *e; 1310*b411b363SPhilipp Reisner struct bm_extent *bm_ext; 1311*b411b363SPhilipp Reisner unsigned long flags; 1312*b411b363SPhilipp Reisner 1313*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_ALL, 1314*b411b363SPhilipp Reisner "drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n", 1315*b411b363SPhilipp Reisner (long long)sector, enr); 1316*b411b363SPhilipp Reisner 1317*b411b363SPhilipp Reisner spin_lock_irqsave(&mdev->al_lock, flags); 1318*b411b363SPhilipp Reisner e = lc_find(mdev->resync, enr); 1319*b411b363SPhilipp Reisner bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1320*b411b363SPhilipp Reisner if (!bm_ext) { 1321*b411b363SPhilipp Reisner spin_unlock_irqrestore(&mdev->al_lock, flags); 1322*b411b363SPhilipp Reisner if (__ratelimit(&drbd_ratelimit_state)) 1323*b411b363SPhilipp Reisner dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n"); 1324*b411b363SPhilipp Reisner return; 1325*b411b363SPhilipp Reisner } 1326*b411b363SPhilipp Reisner 1327*b411b363SPhilipp Reisner if (bm_ext->lce.refcnt == 0) { 1328*b411b363SPhilipp Reisner spin_unlock_irqrestore(&mdev->al_lock, flags); 1329*b411b363SPhilipp Reisner dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, " 1330*b411b363SPhilipp Reisner "but refcnt is 0!?\n", 1331*b411b363SPhilipp Reisner (unsigned long long)sector, enr); 1332*b411b363SPhilipp Reisner return; 1333*b411b363SPhilipp Reisner } 1334*b411b363SPhilipp Reisner 1335*b411b363SPhilipp Reisner if (lc_put(mdev->resync, &bm_ext->lce) == 0) { 1336*b411b363SPhilipp Reisner clear_bit(BME_LOCKED, &bm_ext->flags); 1337*b411b363SPhilipp Reisner clear_bit(BME_NO_WRITES, &bm_ext->flags); 1338*b411b363SPhilipp Reisner mdev->resync_locked--; 1339*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 1340*b411b363SPhilipp Reisner } 1341*b411b363SPhilipp Reisner 1342*b411b363SPhilipp Reisner spin_unlock_irqrestore(&mdev->al_lock, flags); 1343*b411b363SPhilipp Reisner } 1344*b411b363SPhilipp Reisner 1345*b411b363SPhilipp Reisner /** 1346*b411b363SPhilipp Reisner * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED) 1347*b411b363SPhilipp Reisner * @mdev: DRBD device. 1348*b411b363SPhilipp Reisner */ 1349*b411b363SPhilipp Reisner void drbd_rs_cancel_all(struct drbd_conf *mdev) 1350*b411b363SPhilipp Reisner { 1351*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_cancel_all\n"); 1352*b411b363SPhilipp Reisner 1353*b411b363SPhilipp Reisner spin_lock_irq(&mdev->al_lock); 1354*b411b363SPhilipp Reisner 1355*b411b363SPhilipp Reisner if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */ 1356*b411b363SPhilipp Reisner lc_reset(mdev->resync); 1357*b411b363SPhilipp Reisner put_ldev(mdev); 1358*b411b363SPhilipp Reisner } 1359*b411b363SPhilipp Reisner mdev->resync_locked = 0; 1360*b411b363SPhilipp Reisner mdev->resync_wenr = LC_FREE; 1361*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 1362*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 1363*b411b363SPhilipp Reisner } 1364*b411b363SPhilipp Reisner 1365*b411b363SPhilipp Reisner /** 1366*b411b363SPhilipp Reisner * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU 1367*b411b363SPhilipp Reisner * @mdev: DRBD device. 1368*b411b363SPhilipp Reisner * 1369*b411b363SPhilipp Reisner * Returns 0 upon success, -EAGAIN if at least one reference count was 1370*b411b363SPhilipp Reisner * not zero. 1371*b411b363SPhilipp Reisner */ 1372*b411b363SPhilipp Reisner int drbd_rs_del_all(struct drbd_conf *mdev) 1373*b411b363SPhilipp Reisner { 1374*b411b363SPhilipp Reisner struct lc_element *e; 1375*b411b363SPhilipp Reisner struct bm_extent *bm_ext; 1376*b411b363SPhilipp Reisner int i; 1377*b411b363SPhilipp Reisner 1378*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_del_all\n"); 1379*b411b363SPhilipp Reisner 1380*b411b363SPhilipp Reisner spin_lock_irq(&mdev->al_lock); 1381*b411b363SPhilipp Reisner 1382*b411b363SPhilipp Reisner if (get_ldev_if_state(mdev, D_FAILED)) { 1383*b411b363SPhilipp Reisner /* ok, ->resync is there. */ 1384*b411b363SPhilipp Reisner for (i = 0; i < mdev->resync->nr_elements; i++) { 1385*b411b363SPhilipp Reisner e = lc_element_by_index(mdev->resync, i); 1386*b411b363SPhilipp Reisner bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; 1387*b411b363SPhilipp Reisner if (bm_ext->lce.lc_number == LC_FREE) 1388*b411b363SPhilipp Reisner continue; 1389*b411b363SPhilipp Reisner if (bm_ext->lce.lc_number == mdev->resync_wenr) { 1390*b411b363SPhilipp Reisner dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently" 1391*b411b363SPhilipp Reisner " got 'synced' by application io\n", 1392*b411b363SPhilipp Reisner mdev->resync_wenr); 1393*b411b363SPhilipp Reisner D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); 1394*b411b363SPhilipp Reisner D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags)); 1395*b411b363SPhilipp Reisner clear_bit(BME_NO_WRITES, &bm_ext->flags); 1396*b411b363SPhilipp Reisner mdev->resync_wenr = LC_FREE; 1397*b411b363SPhilipp Reisner lc_put(mdev->resync, &bm_ext->lce); 1398*b411b363SPhilipp Reisner } 1399*b411b363SPhilipp Reisner if (bm_ext->lce.refcnt != 0) { 1400*b411b363SPhilipp Reisner dev_info(DEV, "Retrying drbd_rs_del_all() later. " 1401*b411b363SPhilipp Reisner "refcnt=%d\n", bm_ext->lce.refcnt); 1402*b411b363SPhilipp Reisner put_ldev(mdev); 1403*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 1404*b411b363SPhilipp Reisner return -EAGAIN; 1405*b411b363SPhilipp Reisner } 1406*b411b363SPhilipp Reisner D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); 1407*b411b363SPhilipp Reisner D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags)); 1408*b411b363SPhilipp Reisner lc_del(mdev->resync, &bm_ext->lce); 1409*b411b363SPhilipp Reisner } 1410*b411b363SPhilipp Reisner D_ASSERT(mdev->resync->used == 0); 1411*b411b363SPhilipp Reisner put_ldev(mdev); 1412*b411b363SPhilipp Reisner } 1413*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 1414*b411b363SPhilipp Reisner 1415*b411b363SPhilipp Reisner return 0; 1416*b411b363SPhilipp Reisner } 1417*b411b363SPhilipp Reisner 1418*b411b363SPhilipp Reisner /** 1419*b411b363SPhilipp Reisner * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks 1420*b411b363SPhilipp Reisner * @mdev: DRBD device. 1421*b411b363SPhilipp Reisner * @sector: The sector number. 1422*b411b363SPhilipp Reisner * @size: Size of failed IO operation, in byte. 1423*b411b363SPhilipp Reisner */ 1424*b411b363SPhilipp Reisner void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) 1425*b411b363SPhilipp Reisner { 1426*b411b363SPhilipp Reisner /* Is called from worker and receiver context _only_ */ 1427*b411b363SPhilipp Reisner unsigned long sbnr, ebnr, lbnr; 1428*b411b363SPhilipp Reisner unsigned long count; 1429*b411b363SPhilipp Reisner sector_t esector, nr_sectors; 1430*b411b363SPhilipp Reisner int wake_up = 0; 1431*b411b363SPhilipp Reisner 1432*b411b363SPhilipp Reisner trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, 1433*b411b363SPhilipp Reisner "drbd_rs_failed_io: sector=%llus, size=%u\n", 1434*b411b363SPhilipp Reisner (unsigned long long)sector, size); 1435*b411b363SPhilipp Reisner 1436*b411b363SPhilipp Reisner if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { 1437*b411b363SPhilipp Reisner dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", 1438*b411b363SPhilipp Reisner (unsigned long long)sector, size); 1439*b411b363SPhilipp Reisner return; 1440*b411b363SPhilipp Reisner } 1441*b411b363SPhilipp Reisner nr_sectors = drbd_get_capacity(mdev->this_bdev); 1442*b411b363SPhilipp Reisner esector = sector + (size >> 9) - 1; 1443*b411b363SPhilipp Reisner 1444*b411b363SPhilipp Reisner ERR_IF(sector >= nr_sectors) return; 1445*b411b363SPhilipp Reisner ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); 1446*b411b363SPhilipp Reisner 1447*b411b363SPhilipp Reisner lbnr = BM_SECT_TO_BIT(nr_sectors-1); 1448*b411b363SPhilipp Reisner 1449*b411b363SPhilipp Reisner /* 1450*b411b363SPhilipp Reisner * round up start sector, round down end sector. we make sure we only 1451*b411b363SPhilipp Reisner * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */ 1452*b411b363SPhilipp Reisner if (unlikely(esector < BM_SECT_PER_BIT-1)) 1453*b411b363SPhilipp Reisner return; 1454*b411b363SPhilipp Reisner if (unlikely(esector == (nr_sectors-1))) 1455*b411b363SPhilipp Reisner ebnr = lbnr; 1456*b411b363SPhilipp Reisner else 1457*b411b363SPhilipp Reisner ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); 1458*b411b363SPhilipp Reisner sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); 1459*b411b363SPhilipp Reisner 1460*b411b363SPhilipp Reisner if (sbnr > ebnr) 1461*b411b363SPhilipp Reisner return; 1462*b411b363SPhilipp Reisner 1463*b411b363SPhilipp Reisner /* 1464*b411b363SPhilipp Reisner * ok, (capacity & 7) != 0 sometimes, but who cares... 1465*b411b363SPhilipp Reisner * we count rs_{total,left} in bits, not sectors. 1466*b411b363SPhilipp Reisner */ 1467*b411b363SPhilipp Reisner spin_lock_irq(&mdev->al_lock); 1468*b411b363SPhilipp Reisner count = drbd_bm_count_bits(mdev, sbnr, ebnr); 1469*b411b363SPhilipp Reisner if (count) { 1470*b411b363SPhilipp Reisner mdev->rs_failed += count; 1471*b411b363SPhilipp Reisner 1472*b411b363SPhilipp Reisner if (get_ldev(mdev)) { 1473*b411b363SPhilipp Reisner drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE); 1474*b411b363SPhilipp Reisner put_ldev(mdev); 1475*b411b363SPhilipp Reisner } 1476*b411b363SPhilipp Reisner 1477*b411b363SPhilipp Reisner /* just wake_up unconditional now, various lc_chaged(), 1478*b411b363SPhilipp Reisner * lc_put() in drbd_try_clear_on_disk_bm(). */ 1479*b411b363SPhilipp Reisner wake_up = 1; 1480*b411b363SPhilipp Reisner } 1481*b411b363SPhilipp Reisner spin_unlock_irq(&mdev->al_lock); 1482*b411b363SPhilipp Reisner if (wake_up) 1483*b411b363SPhilipp Reisner wake_up(&mdev->al_wait); 1484*b411b363SPhilipp Reisner } 1485