xref: /openbmc/linux/drivers/block/drbd/drbd_actlog.c (revision b411b3637fa71fce9cf2acf0639009500f5892fe)
1*b411b363SPhilipp Reisner /*
2*b411b363SPhilipp Reisner    drbd_actlog.c
3*b411b363SPhilipp Reisner 
4*b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5*b411b363SPhilipp Reisner 
6*b411b363SPhilipp Reisner    Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
7*b411b363SPhilipp Reisner    Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8*b411b363SPhilipp Reisner    Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9*b411b363SPhilipp Reisner 
10*b411b363SPhilipp Reisner    drbd is free software; you can redistribute it and/or modify
11*b411b363SPhilipp Reisner    it under the terms of the GNU General Public License as published by
12*b411b363SPhilipp Reisner    the Free Software Foundation; either version 2, or (at your option)
13*b411b363SPhilipp Reisner    any later version.
14*b411b363SPhilipp Reisner 
15*b411b363SPhilipp Reisner    drbd is distributed in the hope that it will be useful,
16*b411b363SPhilipp Reisner    but WITHOUT ANY WARRANTY; without even the implied warranty of
17*b411b363SPhilipp Reisner    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*b411b363SPhilipp Reisner    GNU General Public License for more details.
19*b411b363SPhilipp Reisner 
20*b411b363SPhilipp Reisner    You should have received a copy of the GNU General Public License
21*b411b363SPhilipp Reisner    along with drbd; see the file COPYING.  If not, write to
22*b411b363SPhilipp Reisner    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23*b411b363SPhilipp Reisner 
24*b411b363SPhilipp Reisner  */
25*b411b363SPhilipp Reisner 
26*b411b363SPhilipp Reisner #include <linux/slab.h>
27*b411b363SPhilipp Reisner #include <linux/drbd.h>
28*b411b363SPhilipp Reisner #include "drbd_int.h"
29*b411b363SPhilipp Reisner #include "drbd_tracing.h"
30*b411b363SPhilipp Reisner #include "drbd_wrappers.h"
31*b411b363SPhilipp Reisner 
32*b411b363SPhilipp Reisner /* We maintain a trivial check sum in our on disk activity log.
33*b411b363SPhilipp Reisner  * With that we can ensure correct operation even when the storage
34*b411b363SPhilipp Reisner  * device might do a partial (last) sector write while loosing power.
35*b411b363SPhilipp Reisner  */
36*b411b363SPhilipp Reisner struct __packed al_transaction {
37*b411b363SPhilipp Reisner 	u32       magic;
38*b411b363SPhilipp Reisner 	u32       tr_number;
39*b411b363SPhilipp Reisner 	struct __packed {
40*b411b363SPhilipp Reisner 		u32 pos;
41*b411b363SPhilipp Reisner 		u32 extent; } updates[1 + AL_EXTENTS_PT];
42*b411b363SPhilipp Reisner 	u32       xor_sum;
43*b411b363SPhilipp Reisner };
44*b411b363SPhilipp Reisner 
45*b411b363SPhilipp Reisner struct update_odbm_work {
46*b411b363SPhilipp Reisner 	struct drbd_work w;
47*b411b363SPhilipp Reisner 	unsigned int enr;
48*b411b363SPhilipp Reisner };
49*b411b363SPhilipp Reisner 
50*b411b363SPhilipp Reisner struct update_al_work {
51*b411b363SPhilipp Reisner 	struct drbd_work w;
52*b411b363SPhilipp Reisner 	struct lc_element *al_ext;
53*b411b363SPhilipp Reisner 	struct completion event;
54*b411b363SPhilipp Reisner 	unsigned int enr;
55*b411b363SPhilipp Reisner 	/* if old_enr != LC_FREE, write corresponding bitmap sector, too */
56*b411b363SPhilipp Reisner 	unsigned int old_enr;
57*b411b363SPhilipp Reisner };
58*b411b363SPhilipp Reisner 
59*b411b363SPhilipp Reisner struct drbd_atodb_wait {
60*b411b363SPhilipp Reisner 	atomic_t           count;
61*b411b363SPhilipp Reisner 	struct completion  io_done;
62*b411b363SPhilipp Reisner 	struct drbd_conf   *mdev;
63*b411b363SPhilipp Reisner 	int                error;
64*b411b363SPhilipp Reisner };
65*b411b363SPhilipp Reisner 
66*b411b363SPhilipp Reisner 
67*b411b363SPhilipp Reisner int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int);
68*b411b363SPhilipp Reisner 
69*b411b363SPhilipp Reisner /* The actual tracepoint needs to have constant number of known arguments...
70*b411b363SPhilipp Reisner  */
71*b411b363SPhilipp Reisner void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...)
72*b411b363SPhilipp Reisner {
73*b411b363SPhilipp Reisner 	va_list ap;
74*b411b363SPhilipp Reisner 
75*b411b363SPhilipp Reisner 	va_start(ap, fmt);
76*b411b363SPhilipp Reisner 	trace__drbd_resync(mdev, level, fmt, ap);
77*b411b363SPhilipp Reisner 	va_end(ap);
78*b411b363SPhilipp Reisner }
79*b411b363SPhilipp Reisner 
80*b411b363SPhilipp Reisner static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
81*b411b363SPhilipp Reisner 				 struct drbd_backing_dev *bdev,
82*b411b363SPhilipp Reisner 				 struct page *page, sector_t sector,
83*b411b363SPhilipp Reisner 				 int rw, int size)
84*b411b363SPhilipp Reisner {
85*b411b363SPhilipp Reisner 	struct bio *bio;
86*b411b363SPhilipp Reisner 	struct drbd_md_io md_io;
87*b411b363SPhilipp Reisner 	int ok;
88*b411b363SPhilipp Reisner 
89*b411b363SPhilipp Reisner 	md_io.mdev = mdev;
90*b411b363SPhilipp Reisner 	init_completion(&md_io.event);
91*b411b363SPhilipp Reisner 	md_io.error = 0;
92*b411b363SPhilipp Reisner 
93*b411b363SPhilipp Reisner 	if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags))
94*b411b363SPhilipp Reisner 		rw |= (1 << BIO_RW_BARRIER);
95*b411b363SPhilipp Reisner 	rw |= ((1<<BIO_RW_UNPLUG) | (1<<BIO_RW_SYNCIO));
96*b411b363SPhilipp Reisner 
97*b411b363SPhilipp Reisner  retry:
98*b411b363SPhilipp Reisner 	bio = bio_alloc(GFP_NOIO, 1);
99*b411b363SPhilipp Reisner 	bio->bi_bdev = bdev->md_bdev;
100*b411b363SPhilipp Reisner 	bio->bi_sector = sector;
101*b411b363SPhilipp Reisner 	ok = (bio_add_page(bio, page, size, 0) == size);
102*b411b363SPhilipp Reisner 	if (!ok)
103*b411b363SPhilipp Reisner 		goto out;
104*b411b363SPhilipp Reisner 	bio->bi_private = &md_io;
105*b411b363SPhilipp Reisner 	bio->bi_end_io = drbd_md_io_complete;
106*b411b363SPhilipp Reisner 	bio->bi_rw = rw;
107*b411b363SPhilipp Reisner 
108*b411b363SPhilipp Reisner 	trace_drbd_bio(mdev, "Md", bio, 0, NULL);
109*b411b363SPhilipp Reisner 
110*b411b363SPhilipp Reisner 	if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
111*b411b363SPhilipp Reisner 		bio_endio(bio, -EIO);
112*b411b363SPhilipp Reisner 	else
113*b411b363SPhilipp Reisner 		submit_bio(rw, bio);
114*b411b363SPhilipp Reisner 	wait_for_completion(&md_io.event);
115*b411b363SPhilipp Reisner 	ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0;
116*b411b363SPhilipp Reisner 
117*b411b363SPhilipp Reisner 	/* check for unsupported barrier op.
118*b411b363SPhilipp Reisner 	 * would rather check on EOPNOTSUPP, but that is not reliable.
119*b411b363SPhilipp Reisner 	 * don't try again for ANY return value != 0 */
120*b411b363SPhilipp Reisner 	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && !ok)) {
121*b411b363SPhilipp Reisner 		/* Try again with no barrier */
122*b411b363SPhilipp Reisner 		dev_warn(DEV, "Barriers not supported on meta data device - disabling\n");
123*b411b363SPhilipp Reisner 		set_bit(MD_NO_BARRIER, &mdev->flags);
124*b411b363SPhilipp Reisner 		rw &= ~(1 << BIO_RW_BARRIER);
125*b411b363SPhilipp Reisner 		bio_put(bio);
126*b411b363SPhilipp Reisner 		goto retry;
127*b411b363SPhilipp Reisner 	}
128*b411b363SPhilipp Reisner  out:
129*b411b363SPhilipp Reisner 	bio_put(bio);
130*b411b363SPhilipp Reisner 	return ok;
131*b411b363SPhilipp Reisner }
132*b411b363SPhilipp Reisner 
133*b411b363SPhilipp Reisner int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
134*b411b363SPhilipp Reisner 			 sector_t sector, int rw)
135*b411b363SPhilipp Reisner {
136*b411b363SPhilipp Reisner 	int logical_block_size, mask, ok;
137*b411b363SPhilipp Reisner 	int offset = 0;
138*b411b363SPhilipp Reisner 	struct page *iop = mdev->md_io_page;
139*b411b363SPhilipp Reisner 
140*b411b363SPhilipp Reisner 	D_ASSERT(mutex_is_locked(&mdev->md_io_mutex));
141*b411b363SPhilipp Reisner 
142*b411b363SPhilipp Reisner 	BUG_ON(!bdev->md_bdev);
143*b411b363SPhilipp Reisner 
144*b411b363SPhilipp Reisner 	logical_block_size = bdev_logical_block_size(bdev->md_bdev);
145*b411b363SPhilipp Reisner 	if (logical_block_size == 0)
146*b411b363SPhilipp Reisner 		logical_block_size = MD_SECTOR_SIZE;
147*b411b363SPhilipp Reisner 
148*b411b363SPhilipp Reisner 	/* in case logical_block_size != 512 [ s390 only? ] */
149*b411b363SPhilipp Reisner 	if (logical_block_size != MD_SECTOR_SIZE) {
150*b411b363SPhilipp Reisner 		mask = (logical_block_size / MD_SECTOR_SIZE) - 1;
151*b411b363SPhilipp Reisner 		D_ASSERT(mask == 1 || mask == 3 || mask == 7);
152*b411b363SPhilipp Reisner 		D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE);
153*b411b363SPhilipp Reisner 		offset = sector & mask;
154*b411b363SPhilipp Reisner 		sector = sector & ~mask;
155*b411b363SPhilipp Reisner 		iop = mdev->md_io_tmpp;
156*b411b363SPhilipp Reisner 
157*b411b363SPhilipp Reisner 		if (rw & WRITE) {
158*b411b363SPhilipp Reisner 			/* these are GFP_KERNEL pages, pre-allocated
159*b411b363SPhilipp Reisner 			 * on device initialization */
160*b411b363SPhilipp Reisner 			void *p = page_address(mdev->md_io_page);
161*b411b363SPhilipp Reisner 			void *hp = page_address(mdev->md_io_tmpp);
162*b411b363SPhilipp Reisner 
163*b411b363SPhilipp Reisner 			ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector,
164*b411b363SPhilipp Reisner 					READ, logical_block_size);
165*b411b363SPhilipp Reisner 
166*b411b363SPhilipp Reisner 			if (unlikely(!ok)) {
167*b411b363SPhilipp Reisner 				dev_err(DEV, "drbd_md_sync_page_io(,%llus,"
168*b411b363SPhilipp Reisner 				    "READ [logical_block_size!=512]) failed!\n",
169*b411b363SPhilipp Reisner 				    (unsigned long long)sector);
170*b411b363SPhilipp Reisner 				return 0;
171*b411b363SPhilipp Reisner 			}
172*b411b363SPhilipp Reisner 
173*b411b363SPhilipp Reisner 			memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE);
174*b411b363SPhilipp Reisner 		}
175*b411b363SPhilipp Reisner 	}
176*b411b363SPhilipp Reisner 
177*b411b363SPhilipp Reisner 	if (sector < drbd_md_first_sector(bdev) ||
178*b411b363SPhilipp Reisner 	    sector > drbd_md_last_sector(bdev))
179*b411b363SPhilipp Reisner 		dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
180*b411b363SPhilipp Reisner 		     current->comm, current->pid, __func__,
181*b411b363SPhilipp Reisner 		     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
182*b411b363SPhilipp Reisner 
183*b411b363SPhilipp Reisner 	ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size);
184*b411b363SPhilipp Reisner 	if (unlikely(!ok)) {
185*b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n",
186*b411b363SPhilipp Reisner 		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
187*b411b363SPhilipp Reisner 		return 0;
188*b411b363SPhilipp Reisner 	}
189*b411b363SPhilipp Reisner 
190*b411b363SPhilipp Reisner 	if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
191*b411b363SPhilipp Reisner 		void *p = page_address(mdev->md_io_page);
192*b411b363SPhilipp Reisner 		void *hp = page_address(mdev->md_io_tmpp);
193*b411b363SPhilipp Reisner 
194*b411b363SPhilipp Reisner 		memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE);
195*b411b363SPhilipp Reisner 	}
196*b411b363SPhilipp Reisner 
197*b411b363SPhilipp Reisner 	return ok;
198*b411b363SPhilipp Reisner }
199*b411b363SPhilipp Reisner 
200*b411b363SPhilipp Reisner static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
201*b411b363SPhilipp Reisner {
202*b411b363SPhilipp Reisner 	struct lc_element *al_ext;
203*b411b363SPhilipp Reisner 	struct lc_element *tmp;
204*b411b363SPhilipp Reisner 	unsigned long     al_flags = 0;
205*b411b363SPhilipp Reisner 
206*b411b363SPhilipp Reisner 	spin_lock_irq(&mdev->al_lock);
207*b411b363SPhilipp Reisner 	tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
208*b411b363SPhilipp Reisner 	if (unlikely(tmp != NULL)) {
209*b411b363SPhilipp Reisner 		struct bm_extent  *bm_ext = lc_entry(tmp, struct bm_extent, lce);
210*b411b363SPhilipp Reisner 		if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
211*b411b363SPhilipp Reisner 			spin_unlock_irq(&mdev->al_lock);
212*b411b363SPhilipp Reisner 			return NULL;
213*b411b363SPhilipp Reisner 		}
214*b411b363SPhilipp Reisner 	}
215*b411b363SPhilipp Reisner 	al_ext   = lc_get(mdev->act_log, enr);
216*b411b363SPhilipp Reisner 	al_flags = mdev->act_log->flags;
217*b411b363SPhilipp Reisner 	spin_unlock_irq(&mdev->al_lock);
218*b411b363SPhilipp Reisner 
219*b411b363SPhilipp Reisner 	/*
220*b411b363SPhilipp Reisner 	if (!al_ext) {
221*b411b363SPhilipp Reisner 		if (al_flags & LC_STARVING)
222*b411b363SPhilipp Reisner 			dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n");
223*b411b363SPhilipp Reisner 		if (al_flags & LC_DIRTY)
224*b411b363SPhilipp Reisner 			dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n");
225*b411b363SPhilipp Reisner 	}
226*b411b363SPhilipp Reisner 	*/
227*b411b363SPhilipp Reisner 
228*b411b363SPhilipp Reisner 	return al_ext;
229*b411b363SPhilipp Reisner }
230*b411b363SPhilipp Reisner 
231*b411b363SPhilipp Reisner void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector)
232*b411b363SPhilipp Reisner {
233*b411b363SPhilipp Reisner 	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
234*b411b363SPhilipp Reisner 	struct lc_element *al_ext;
235*b411b363SPhilipp Reisner 	struct update_al_work al_work;
236*b411b363SPhilipp Reisner 
237*b411b363SPhilipp Reisner 	D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
238*b411b363SPhilipp Reisner 
239*b411b363SPhilipp Reisner 	trace_drbd_actlog(mdev, sector, "al_begin_io");
240*b411b363SPhilipp Reisner 
241*b411b363SPhilipp Reisner 	wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr)));
242*b411b363SPhilipp Reisner 
243*b411b363SPhilipp Reisner 	if (al_ext->lc_number != enr) {
244*b411b363SPhilipp Reisner 		/* drbd_al_write_transaction(mdev,al_ext,enr);
245*b411b363SPhilipp Reisner 		 * recurses into generic_make_request(), which
246*b411b363SPhilipp Reisner 		 * disallows recursion, bios being serialized on the
247*b411b363SPhilipp Reisner 		 * current->bio_tail list now.
248*b411b363SPhilipp Reisner 		 * we have to delegate updates to the activity log
249*b411b363SPhilipp Reisner 		 * to the worker thread. */
250*b411b363SPhilipp Reisner 		init_completion(&al_work.event);
251*b411b363SPhilipp Reisner 		al_work.al_ext = al_ext;
252*b411b363SPhilipp Reisner 		al_work.enr = enr;
253*b411b363SPhilipp Reisner 		al_work.old_enr = al_ext->lc_number;
254*b411b363SPhilipp Reisner 		al_work.w.cb = w_al_write_transaction;
255*b411b363SPhilipp Reisner 		drbd_queue_work_front(&mdev->data.work, &al_work.w);
256*b411b363SPhilipp Reisner 		wait_for_completion(&al_work.event);
257*b411b363SPhilipp Reisner 
258*b411b363SPhilipp Reisner 		mdev->al_writ_cnt++;
259*b411b363SPhilipp Reisner 
260*b411b363SPhilipp Reisner 		spin_lock_irq(&mdev->al_lock);
261*b411b363SPhilipp Reisner 		lc_changed(mdev->act_log, al_ext);
262*b411b363SPhilipp Reisner 		spin_unlock_irq(&mdev->al_lock);
263*b411b363SPhilipp Reisner 		wake_up(&mdev->al_wait);
264*b411b363SPhilipp Reisner 	}
265*b411b363SPhilipp Reisner }
266*b411b363SPhilipp Reisner 
267*b411b363SPhilipp Reisner void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector)
268*b411b363SPhilipp Reisner {
269*b411b363SPhilipp Reisner 	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
270*b411b363SPhilipp Reisner 	struct lc_element *extent;
271*b411b363SPhilipp Reisner 	unsigned long flags;
272*b411b363SPhilipp Reisner 
273*b411b363SPhilipp Reisner 	trace_drbd_actlog(mdev, sector, "al_complete_io");
274*b411b363SPhilipp Reisner 
275*b411b363SPhilipp Reisner 	spin_lock_irqsave(&mdev->al_lock, flags);
276*b411b363SPhilipp Reisner 
277*b411b363SPhilipp Reisner 	extent = lc_find(mdev->act_log, enr);
278*b411b363SPhilipp Reisner 
279*b411b363SPhilipp Reisner 	if (!extent) {
280*b411b363SPhilipp Reisner 		spin_unlock_irqrestore(&mdev->al_lock, flags);
281*b411b363SPhilipp Reisner 		dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr);
282*b411b363SPhilipp Reisner 		return;
283*b411b363SPhilipp Reisner 	}
284*b411b363SPhilipp Reisner 
285*b411b363SPhilipp Reisner 	if (lc_put(mdev->act_log, extent) == 0)
286*b411b363SPhilipp Reisner 		wake_up(&mdev->al_wait);
287*b411b363SPhilipp Reisner 
288*b411b363SPhilipp Reisner 	spin_unlock_irqrestore(&mdev->al_lock, flags);
289*b411b363SPhilipp Reisner }
290*b411b363SPhilipp Reisner 
291*b411b363SPhilipp Reisner int
292*b411b363SPhilipp Reisner w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
293*b411b363SPhilipp Reisner {
294*b411b363SPhilipp Reisner 	struct update_al_work *aw = container_of(w, struct update_al_work, w);
295*b411b363SPhilipp Reisner 	struct lc_element *updated = aw->al_ext;
296*b411b363SPhilipp Reisner 	const unsigned int new_enr = aw->enr;
297*b411b363SPhilipp Reisner 	const unsigned int evicted = aw->old_enr;
298*b411b363SPhilipp Reisner 	struct al_transaction *buffer;
299*b411b363SPhilipp Reisner 	sector_t sector;
300*b411b363SPhilipp Reisner 	int i, n, mx;
301*b411b363SPhilipp Reisner 	unsigned int extent_nr;
302*b411b363SPhilipp Reisner 	u32 xor_sum = 0;
303*b411b363SPhilipp Reisner 
304*b411b363SPhilipp Reisner 	if (!get_ldev(mdev)) {
305*b411b363SPhilipp Reisner 		dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n");
306*b411b363SPhilipp Reisner 		complete(&((struct update_al_work *)w)->event);
307*b411b363SPhilipp Reisner 		return 1;
308*b411b363SPhilipp Reisner 	}
309*b411b363SPhilipp Reisner 	/* do we have to do a bitmap write, first?
310*b411b363SPhilipp Reisner 	 * TODO reduce maximum latency:
311*b411b363SPhilipp Reisner 	 * submit both bios, then wait for both,
312*b411b363SPhilipp Reisner 	 * instead of doing two synchronous sector writes. */
313*b411b363SPhilipp Reisner 	if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
314*b411b363SPhilipp Reisner 		drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT);
315*b411b363SPhilipp Reisner 
316*b411b363SPhilipp Reisner 	mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */
317*b411b363SPhilipp Reisner 	buffer = (struct al_transaction *)page_address(mdev->md_io_page);
318*b411b363SPhilipp Reisner 
319*b411b363SPhilipp Reisner 	buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
320*b411b363SPhilipp Reisner 	buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
321*b411b363SPhilipp Reisner 
322*b411b363SPhilipp Reisner 	n = lc_index_of(mdev->act_log, updated);
323*b411b363SPhilipp Reisner 
324*b411b363SPhilipp Reisner 	buffer->updates[0].pos = cpu_to_be32(n);
325*b411b363SPhilipp Reisner 	buffer->updates[0].extent = cpu_to_be32(new_enr);
326*b411b363SPhilipp Reisner 
327*b411b363SPhilipp Reisner 	xor_sum ^= new_enr;
328*b411b363SPhilipp Reisner 
329*b411b363SPhilipp Reisner 	mx = min_t(int, AL_EXTENTS_PT,
330*b411b363SPhilipp Reisner 		   mdev->act_log->nr_elements - mdev->al_tr_cycle);
331*b411b363SPhilipp Reisner 	for (i = 0; i < mx; i++) {
332*b411b363SPhilipp Reisner 		unsigned idx = mdev->al_tr_cycle + i;
333*b411b363SPhilipp Reisner 		extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
334*b411b363SPhilipp Reisner 		buffer->updates[i+1].pos = cpu_to_be32(idx);
335*b411b363SPhilipp Reisner 		buffer->updates[i+1].extent = cpu_to_be32(extent_nr);
336*b411b363SPhilipp Reisner 		xor_sum ^= extent_nr;
337*b411b363SPhilipp Reisner 	}
338*b411b363SPhilipp Reisner 	for (; i < AL_EXTENTS_PT; i++) {
339*b411b363SPhilipp Reisner 		buffer->updates[i+1].pos = __constant_cpu_to_be32(-1);
340*b411b363SPhilipp Reisner 		buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE);
341*b411b363SPhilipp Reisner 		xor_sum ^= LC_FREE;
342*b411b363SPhilipp Reisner 	}
343*b411b363SPhilipp Reisner 	mdev->al_tr_cycle += AL_EXTENTS_PT;
344*b411b363SPhilipp Reisner 	if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
345*b411b363SPhilipp Reisner 		mdev->al_tr_cycle = 0;
346*b411b363SPhilipp Reisner 
347*b411b363SPhilipp Reisner 	buffer->xor_sum = cpu_to_be32(xor_sum);
348*b411b363SPhilipp Reisner 
349*b411b363SPhilipp Reisner 	sector =  mdev->ldev->md.md_offset
350*b411b363SPhilipp Reisner 		+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
351*b411b363SPhilipp Reisner 
352*b411b363SPhilipp Reisner 	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
353*b411b363SPhilipp Reisner 		drbd_chk_io_error(mdev, 1, TRUE);
354*b411b363SPhilipp Reisner 
355*b411b363SPhilipp Reisner 	if (++mdev->al_tr_pos >
356*b411b363SPhilipp Reisner 	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
357*b411b363SPhilipp Reisner 		mdev->al_tr_pos = 0;
358*b411b363SPhilipp Reisner 
359*b411b363SPhilipp Reisner 	D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
360*b411b363SPhilipp Reisner 	mdev->al_tr_number++;
361*b411b363SPhilipp Reisner 
362*b411b363SPhilipp Reisner 	mutex_unlock(&mdev->md_io_mutex);
363*b411b363SPhilipp Reisner 
364*b411b363SPhilipp Reisner 	complete(&((struct update_al_work *)w)->event);
365*b411b363SPhilipp Reisner 	put_ldev(mdev);
366*b411b363SPhilipp Reisner 
367*b411b363SPhilipp Reisner 	return 1;
368*b411b363SPhilipp Reisner }
369*b411b363SPhilipp Reisner 
370*b411b363SPhilipp Reisner /**
371*b411b363SPhilipp Reisner  * drbd_al_read_tr() - Read a single transaction from the on disk activity log
372*b411b363SPhilipp Reisner  * @mdev:	DRBD device.
373*b411b363SPhilipp Reisner  * @bdev:	Block device to read form.
374*b411b363SPhilipp Reisner  * @b:		pointer to an al_transaction.
375*b411b363SPhilipp Reisner  * @index:	On disk slot of the transaction to read.
376*b411b363SPhilipp Reisner  *
377*b411b363SPhilipp Reisner  * Returns -1 on IO error, 0 on checksum error and 1 upon success.
378*b411b363SPhilipp Reisner  */
379*b411b363SPhilipp Reisner static int drbd_al_read_tr(struct drbd_conf *mdev,
380*b411b363SPhilipp Reisner 			   struct drbd_backing_dev *bdev,
381*b411b363SPhilipp Reisner 			   struct al_transaction *b,
382*b411b363SPhilipp Reisner 			   int index)
383*b411b363SPhilipp Reisner {
384*b411b363SPhilipp Reisner 	sector_t sector;
385*b411b363SPhilipp Reisner 	int rv, i;
386*b411b363SPhilipp Reisner 	u32 xor_sum = 0;
387*b411b363SPhilipp Reisner 
388*b411b363SPhilipp Reisner 	sector = bdev->md.md_offset + bdev->md.al_offset + index;
389*b411b363SPhilipp Reisner 
390*b411b363SPhilipp Reisner 	/* Dont process error normally,
391*b411b363SPhilipp Reisner 	 * as this is done before disk is attached! */
392*b411b363SPhilipp Reisner 	if (!drbd_md_sync_page_io(mdev, bdev, sector, READ))
393*b411b363SPhilipp Reisner 		return -1;
394*b411b363SPhilipp Reisner 
395*b411b363SPhilipp Reisner 	rv = (be32_to_cpu(b->magic) == DRBD_MAGIC);
396*b411b363SPhilipp Reisner 
397*b411b363SPhilipp Reisner 	for (i = 0; i < AL_EXTENTS_PT + 1; i++)
398*b411b363SPhilipp Reisner 		xor_sum ^= be32_to_cpu(b->updates[i].extent);
399*b411b363SPhilipp Reisner 	rv &= (xor_sum == be32_to_cpu(b->xor_sum));
400*b411b363SPhilipp Reisner 
401*b411b363SPhilipp Reisner 	return rv;
402*b411b363SPhilipp Reisner }
403*b411b363SPhilipp Reisner 
404*b411b363SPhilipp Reisner /**
405*b411b363SPhilipp Reisner  * drbd_al_read_log() - Restores the activity log from its on disk representation.
406*b411b363SPhilipp Reisner  * @mdev:	DRBD device.
407*b411b363SPhilipp Reisner  * @bdev:	Block device to read form.
408*b411b363SPhilipp Reisner  *
409*b411b363SPhilipp Reisner  * Returns 1 on success, returns 0 when reading the log failed due to IO errors.
410*b411b363SPhilipp Reisner  */
411*b411b363SPhilipp Reisner int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
412*b411b363SPhilipp Reisner {
413*b411b363SPhilipp Reisner 	struct al_transaction *buffer;
414*b411b363SPhilipp Reisner 	int i;
415*b411b363SPhilipp Reisner 	int rv;
416*b411b363SPhilipp Reisner 	int mx;
417*b411b363SPhilipp Reisner 	int active_extents = 0;
418*b411b363SPhilipp Reisner 	int transactions = 0;
419*b411b363SPhilipp Reisner 	int found_valid = 0;
420*b411b363SPhilipp Reisner 	int from = 0;
421*b411b363SPhilipp Reisner 	int to = 0;
422*b411b363SPhilipp Reisner 	u32 from_tnr = 0;
423*b411b363SPhilipp Reisner 	u32 to_tnr = 0;
424*b411b363SPhilipp Reisner 	u32 cnr;
425*b411b363SPhilipp Reisner 
426*b411b363SPhilipp Reisner 	mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT);
427*b411b363SPhilipp Reisner 
428*b411b363SPhilipp Reisner 	/* lock out all other meta data io for now,
429*b411b363SPhilipp Reisner 	 * and make sure the page is mapped.
430*b411b363SPhilipp Reisner 	 */
431*b411b363SPhilipp Reisner 	mutex_lock(&mdev->md_io_mutex);
432*b411b363SPhilipp Reisner 	buffer = page_address(mdev->md_io_page);
433*b411b363SPhilipp Reisner 
434*b411b363SPhilipp Reisner 	/* Find the valid transaction in the log */
435*b411b363SPhilipp Reisner 	for (i = 0; i <= mx; i++) {
436*b411b363SPhilipp Reisner 		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
437*b411b363SPhilipp Reisner 		if (rv == 0)
438*b411b363SPhilipp Reisner 			continue;
439*b411b363SPhilipp Reisner 		if (rv == -1) {
440*b411b363SPhilipp Reisner 			mutex_unlock(&mdev->md_io_mutex);
441*b411b363SPhilipp Reisner 			return 0;
442*b411b363SPhilipp Reisner 		}
443*b411b363SPhilipp Reisner 		cnr = be32_to_cpu(buffer->tr_number);
444*b411b363SPhilipp Reisner 
445*b411b363SPhilipp Reisner 		if (++found_valid == 1) {
446*b411b363SPhilipp Reisner 			from = i;
447*b411b363SPhilipp Reisner 			to = i;
448*b411b363SPhilipp Reisner 			from_tnr = cnr;
449*b411b363SPhilipp Reisner 			to_tnr = cnr;
450*b411b363SPhilipp Reisner 			continue;
451*b411b363SPhilipp Reisner 		}
452*b411b363SPhilipp Reisner 		if ((int)cnr - (int)from_tnr < 0) {
453*b411b363SPhilipp Reisner 			D_ASSERT(from_tnr - cnr + i - from == mx+1);
454*b411b363SPhilipp Reisner 			from = i;
455*b411b363SPhilipp Reisner 			from_tnr = cnr;
456*b411b363SPhilipp Reisner 		}
457*b411b363SPhilipp Reisner 		if ((int)cnr - (int)to_tnr > 0) {
458*b411b363SPhilipp Reisner 			D_ASSERT(cnr - to_tnr == i - to);
459*b411b363SPhilipp Reisner 			to = i;
460*b411b363SPhilipp Reisner 			to_tnr = cnr;
461*b411b363SPhilipp Reisner 		}
462*b411b363SPhilipp Reisner 	}
463*b411b363SPhilipp Reisner 
464*b411b363SPhilipp Reisner 	if (!found_valid) {
465*b411b363SPhilipp Reisner 		dev_warn(DEV, "No usable activity log found.\n");
466*b411b363SPhilipp Reisner 		mutex_unlock(&mdev->md_io_mutex);
467*b411b363SPhilipp Reisner 		return 1;
468*b411b363SPhilipp Reisner 	}
469*b411b363SPhilipp Reisner 
470*b411b363SPhilipp Reisner 	/* Read the valid transactions.
471*b411b363SPhilipp Reisner 	 * dev_info(DEV, "Reading from %d to %d.\n",from,to); */
472*b411b363SPhilipp Reisner 	i = from;
473*b411b363SPhilipp Reisner 	while (1) {
474*b411b363SPhilipp Reisner 		int j, pos;
475*b411b363SPhilipp Reisner 		unsigned int extent_nr;
476*b411b363SPhilipp Reisner 		unsigned int trn;
477*b411b363SPhilipp Reisner 
478*b411b363SPhilipp Reisner 		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
479*b411b363SPhilipp Reisner 		ERR_IF(rv == 0) goto cancel;
480*b411b363SPhilipp Reisner 		if (rv == -1) {
481*b411b363SPhilipp Reisner 			mutex_unlock(&mdev->md_io_mutex);
482*b411b363SPhilipp Reisner 			return 0;
483*b411b363SPhilipp Reisner 		}
484*b411b363SPhilipp Reisner 
485*b411b363SPhilipp Reisner 		trn = be32_to_cpu(buffer->tr_number);
486*b411b363SPhilipp Reisner 
487*b411b363SPhilipp Reisner 		spin_lock_irq(&mdev->al_lock);
488*b411b363SPhilipp Reisner 
489*b411b363SPhilipp Reisner 		/* This loop runs backwards because in the cyclic
490*b411b363SPhilipp Reisner 		   elements there might be an old version of the
491*b411b363SPhilipp Reisner 		   updated element (in slot 0). So the element in slot 0
492*b411b363SPhilipp Reisner 		   can overwrite old versions. */
493*b411b363SPhilipp Reisner 		for (j = AL_EXTENTS_PT; j >= 0; j--) {
494*b411b363SPhilipp Reisner 			pos = be32_to_cpu(buffer->updates[j].pos);
495*b411b363SPhilipp Reisner 			extent_nr = be32_to_cpu(buffer->updates[j].extent);
496*b411b363SPhilipp Reisner 
497*b411b363SPhilipp Reisner 			if (extent_nr == LC_FREE)
498*b411b363SPhilipp Reisner 				continue;
499*b411b363SPhilipp Reisner 
500*b411b363SPhilipp Reisner 			lc_set(mdev->act_log, extent_nr, pos);
501*b411b363SPhilipp Reisner 			active_extents++;
502*b411b363SPhilipp Reisner 		}
503*b411b363SPhilipp Reisner 		spin_unlock_irq(&mdev->al_lock);
504*b411b363SPhilipp Reisner 
505*b411b363SPhilipp Reisner 		transactions++;
506*b411b363SPhilipp Reisner 
507*b411b363SPhilipp Reisner cancel:
508*b411b363SPhilipp Reisner 		if (i == to)
509*b411b363SPhilipp Reisner 			break;
510*b411b363SPhilipp Reisner 		i++;
511*b411b363SPhilipp Reisner 		if (i > mx)
512*b411b363SPhilipp Reisner 			i = 0;
513*b411b363SPhilipp Reisner 	}
514*b411b363SPhilipp Reisner 
515*b411b363SPhilipp Reisner 	mdev->al_tr_number = to_tnr+1;
516*b411b363SPhilipp Reisner 	mdev->al_tr_pos = to;
517*b411b363SPhilipp Reisner 	if (++mdev->al_tr_pos >
518*b411b363SPhilipp Reisner 	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
519*b411b363SPhilipp Reisner 		mdev->al_tr_pos = 0;
520*b411b363SPhilipp Reisner 
521*b411b363SPhilipp Reisner 	/* ok, we are done with it */
522*b411b363SPhilipp Reisner 	mutex_unlock(&mdev->md_io_mutex);
523*b411b363SPhilipp Reisner 
524*b411b363SPhilipp Reisner 	dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
525*b411b363SPhilipp Reisner 	     transactions, active_extents);
526*b411b363SPhilipp Reisner 
527*b411b363SPhilipp Reisner 	return 1;
528*b411b363SPhilipp Reisner }
529*b411b363SPhilipp Reisner 
530*b411b363SPhilipp Reisner static void atodb_endio(struct bio *bio, int error)
531*b411b363SPhilipp Reisner {
532*b411b363SPhilipp Reisner 	struct drbd_atodb_wait *wc = bio->bi_private;
533*b411b363SPhilipp Reisner 	struct drbd_conf *mdev = wc->mdev;
534*b411b363SPhilipp Reisner 	struct page *page;
535*b411b363SPhilipp Reisner 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
536*b411b363SPhilipp Reisner 
537*b411b363SPhilipp Reisner 	/* strange behavior of some lower level drivers...
538*b411b363SPhilipp Reisner 	 * fail the request by clearing the uptodate flag,
539*b411b363SPhilipp Reisner 	 * but do not return any error?! */
540*b411b363SPhilipp Reisner 	if (!error && !uptodate)
541*b411b363SPhilipp Reisner 		error = -EIO;
542*b411b363SPhilipp Reisner 
543*b411b363SPhilipp Reisner 	drbd_chk_io_error(mdev, error, TRUE);
544*b411b363SPhilipp Reisner 	if (error && wc->error == 0)
545*b411b363SPhilipp Reisner 		wc->error = error;
546*b411b363SPhilipp Reisner 
547*b411b363SPhilipp Reisner 	if (atomic_dec_and_test(&wc->count))
548*b411b363SPhilipp Reisner 		complete(&wc->io_done);
549*b411b363SPhilipp Reisner 
550*b411b363SPhilipp Reisner 	page = bio->bi_io_vec[0].bv_page;
551*b411b363SPhilipp Reisner 	put_page(page);
552*b411b363SPhilipp Reisner 	bio_put(bio);
553*b411b363SPhilipp Reisner 	mdev->bm_writ_cnt++;
554*b411b363SPhilipp Reisner 	put_ldev(mdev);
555*b411b363SPhilipp Reisner }
556*b411b363SPhilipp Reisner 
557*b411b363SPhilipp Reisner #define S2W(s)	((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
558*b411b363SPhilipp Reisner /* activity log to on disk bitmap -- prepare bio unless that sector
559*b411b363SPhilipp Reisner  * is already covered by previously prepared bios */
560*b411b363SPhilipp Reisner static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
561*b411b363SPhilipp Reisner 					struct bio **bios,
562*b411b363SPhilipp Reisner 					unsigned int enr,
563*b411b363SPhilipp Reisner 					struct drbd_atodb_wait *wc) __must_hold(local)
564*b411b363SPhilipp Reisner {
565*b411b363SPhilipp Reisner 	struct bio *bio;
566*b411b363SPhilipp Reisner 	struct page *page;
567*b411b363SPhilipp Reisner 	sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
568*b411b363SPhilipp Reisner 				      + mdev->ldev->md.bm_offset;
569*b411b363SPhilipp Reisner 	unsigned int page_offset = PAGE_SIZE;
570*b411b363SPhilipp Reisner 	int offset;
571*b411b363SPhilipp Reisner 	int i = 0;
572*b411b363SPhilipp Reisner 	int err = -ENOMEM;
573*b411b363SPhilipp Reisner 
574*b411b363SPhilipp Reisner 	/* Check if that enr is already covered by an already created bio.
575*b411b363SPhilipp Reisner 	 * Caution, bios[] is not NULL terminated,
576*b411b363SPhilipp Reisner 	 * but only initialized to all NULL.
577*b411b363SPhilipp Reisner 	 * For completely scattered activity log,
578*b411b363SPhilipp Reisner 	 * the last invocation iterates over all bios,
579*b411b363SPhilipp Reisner 	 * and finds the last NULL entry.
580*b411b363SPhilipp Reisner 	 */
581*b411b363SPhilipp Reisner 	while ((bio = bios[i])) {
582*b411b363SPhilipp Reisner 		if (bio->bi_sector == on_disk_sector)
583*b411b363SPhilipp Reisner 			return 0;
584*b411b363SPhilipp Reisner 		i++;
585*b411b363SPhilipp Reisner 	}
586*b411b363SPhilipp Reisner 	/* bios[i] == NULL, the next not yet used slot */
587*b411b363SPhilipp Reisner 
588*b411b363SPhilipp Reisner 	/* GFP_KERNEL, we are not in the write-out path */
589*b411b363SPhilipp Reisner 	bio = bio_alloc(GFP_KERNEL, 1);
590*b411b363SPhilipp Reisner 	if (bio == NULL)
591*b411b363SPhilipp Reisner 		return -ENOMEM;
592*b411b363SPhilipp Reisner 
593*b411b363SPhilipp Reisner 	if (i > 0) {
594*b411b363SPhilipp Reisner 		const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec;
595*b411b363SPhilipp Reisner 		page_offset = prev_bv->bv_offset + prev_bv->bv_len;
596*b411b363SPhilipp Reisner 		page = prev_bv->bv_page;
597*b411b363SPhilipp Reisner 	}
598*b411b363SPhilipp Reisner 	if (page_offset == PAGE_SIZE) {
599*b411b363SPhilipp Reisner 		page = alloc_page(__GFP_HIGHMEM);
600*b411b363SPhilipp Reisner 		if (page == NULL)
601*b411b363SPhilipp Reisner 			goto out_bio_put;
602*b411b363SPhilipp Reisner 		page_offset = 0;
603*b411b363SPhilipp Reisner 	} else {
604*b411b363SPhilipp Reisner 		get_page(page);
605*b411b363SPhilipp Reisner 	}
606*b411b363SPhilipp Reisner 
607*b411b363SPhilipp Reisner 	offset = S2W(enr);
608*b411b363SPhilipp Reisner 	drbd_bm_get_lel(mdev, offset,
609*b411b363SPhilipp Reisner 			min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset),
610*b411b363SPhilipp Reisner 			kmap(page) + page_offset);
611*b411b363SPhilipp Reisner 	kunmap(page);
612*b411b363SPhilipp Reisner 
613*b411b363SPhilipp Reisner 	bio->bi_private = wc;
614*b411b363SPhilipp Reisner 	bio->bi_end_io = atodb_endio;
615*b411b363SPhilipp Reisner 	bio->bi_bdev = mdev->ldev->md_bdev;
616*b411b363SPhilipp Reisner 	bio->bi_sector = on_disk_sector;
617*b411b363SPhilipp Reisner 
618*b411b363SPhilipp Reisner 	if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE)
619*b411b363SPhilipp Reisner 		goto out_put_page;
620*b411b363SPhilipp Reisner 
621*b411b363SPhilipp Reisner 	atomic_inc(&wc->count);
622*b411b363SPhilipp Reisner 	/* we already know that we may do this...
623*b411b363SPhilipp Reisner 	 * get_ldev_if_state(mdev,D_ATTACHING);
624*b411b363SPhilipp Reisner 	 * just get the extra reference, so that the local_cnt reflects
625*b411b363SPhilipp Reisner 	 * the number of pending IO requests DRBD at its backing device.
626*b411b363SPhilipp Reisner 	 */
627*b411b363SPhilipp Reisner 	atomic_inc(&mdev->local_cnt);
628*b411b363SPhilipp Reisner 
629*b411b363SPhilipp Reisner 	bios[i] = bio;
630*b411b363SPhilipp Reisner 
631*b411b363SPhilipp Reisner 	return 0;
632*b411b363SPhilipp Reisner 
633*b411b363SPhilipp Reisner out_put_page:
634*b411b363SPhilipp Reisner 	err = -EINVAL;
635*b411b363SPhilipp Reisner 	put_page(page);
636*b411b363SPhilipp Reisner out_bio_put:
637*b411b363SPhilipp Reisner 	bio_put(bio);
638*b411b363SPhilipp Reisner 	return err;
639*b411b363SPhilipp Reisner }
640*b411b363SPhilipp Reisner 
641*b411b363SPhilipp Reisner /**
642*b411b363SPhilipp Reisner  * drbd_al_to_on_disk_bm() -  * Writes bitmap parts covered by active AL extents
643*b411b363SPhilipp Reisner  * @mdev:	DRBD device.
644*b411b363SPhilipp Reisner  *
645*b411b363SPhilipp Reisner  * Called when we detach (unconfigure) local storage,
646*b411b363SPhilipp Reisner  * or when we go from R_PRIMARY to R_SECONDARY role.
647*b411b363SPhilipp Reisner  */
648*b411b363SPhilipp Reisner void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
649*b411b363SPhilipp Reisner {
650*b411b363SPhilipp Reisner 	int i, nr_elements;
651*b411b363SPhilipp Reisner 	unsigned int enr;
652*b411b363SPhilipp Reisner 	struct bio **bios;
653*b411b363SPhilipp Reisner 	struct drbd_atodb_wait wc;
654*b411b363SPhilipp Reisner 
655*b411b363SPhilipp Reisner 	ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING))
656*b411b363SPhilipp Reisner 		return; /* sorry, I don't have any act_log etc... */
657*b411b363SPhilipp Reisner 
658*b411b363SPhilipp Reisner 	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
659*b411b363SPhilipp Reisner 
660*b411b363SPhilipp Reisner 	nr_elements = mdev->act_log->nr_elements;
661*b411b363SPhilipp Reisner 
662*b411b363SPhilipp Reisner 	/* GFP_KERNEL, we are not in anyone's write-out path */
663*b411b363SPhilipp Reisner 	bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL);
664*b411b363SPhilipp Reisner 	if (!bios)
665*b411b363SPhilipp Reisner 		goto submit_one_by_one;
666*b411b363SPhilipp Reisner 
667*b411b363SPhilipp Reisner 	atomic_set(&wc.count, 0);
668*b411b363SPhilipp Reisner 	init_completion(&wc.io_done);
669*b411b363SPhilipp Reisner 	wc.mdev = mdev;
670*b411b363SPhilipp Reisner 	wc.error = 0;
671*b411b363SPhilipp Reisner 
672*b411b363SPhilipp Reisner 	for (i = 0; i < nr_elements; i++) {
673*b411b363SPhilipp Reisner 		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
674*b411b363SPhilipp Reisner 		if (enr == LC_FREE)
675*b411b363SPhilipp Reisner 			continue;
676*b411b363SPhilipp Reisner 		/* next statement also does atomic_inc wc.count and local_cnt */
677*b411b363SPhilipp Reisner 		if (atodb_prepare_unless_covered(mdev, bios,
678*b411b363SPhilipp Reisner 						enr/AL_EXT_PER_BM_SECT,
679*b411b363SPhilipp Reisner 						&wc))
680*b411b363SPhilipp Reisner 			goto free_bios_submit_one_by_one;
681*b411b363SPhilipp Reisner 	}
682*b411b363SPhilipp Reisner 
683*b411b363SPhilipp Reisner 	/* unnecessary optimization? */
684*b411b363SPhilipp Reisner 	lc_unlock(mdev->act_log);
685*b411b363SPhilipp Reisner 	wake_up(&mdev->al_wait);
686*b411b363SPhilipp Reisner 
687*b411b363SPhilipp Reisner 	/* all prepared, submit them */
688*b411b363SPhilipp Reisner 	for (i = 0; i < nr_elements; i++) {
689*b411b363SPhilipp Reisner 		if (bios[i] == NULL)
690*b411b363SPhilipp Reisner 			break;
691*b411b363SPhilipp Reisner 		if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) {
692*b411b363SPhilipp Reisner 			bios[i]->bi_rw = WRITE;
693*b411b363SPhilipp Reisner 			bio_endio(bios[i], -EIO);
694*b411b363SPhilipp Reisner 		} else {
695*b411b363SPhilipp Reisner 			submit_bio(WRITE, bios[i]);
696*b411b363SPhilipp Reisner 		}
697*b411b363SPhilipp Reisner 	}
698*b411b363SPhilipp Reisner 
699*b411b363SPhilipp Reisner 	drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
700*b411b363SPhilipp Reisner 
701*b411b363SPhilipp Reisner 	/* always (try to) flush bitmap to stable storage */
702*b411b363SPhilipp Reisner 	drbd_md_flush(mdev);
703*b411b363SPhilipp Reisner 
704*b411b363SPhilipp Reisner 	/* In case we did not submit a single IO do not wait for
705*b411b363SPhilipp Reisner 	 * them to complete. ( Because we would wait forever here. )
706*b411b363SPhilipp Reisner 	 *
707*b411b363SPhilipp Reisner 	 * In case we had IOs and they are already complete, there
708*b411b363SPhilipp Reisner 	 * is not point in waiting anyways.
709*b411b363SPhilipp Reisner 	 * Therefore this if () ... */
710*b411b363SPhilipp Reisner 	if (atomic_read(&wc.count))
711*b411b363SPhilipp Reisner 		wait_for_completion(&wc.io_done);
712*b411b363SPhilipp Reisner 
713*b411b363SPhilipp Reisner 	put_ldev(mdev);
714*b411b363SPhilipp Reisner 
715*b411b363SPhilipp Reisner 	kfree(bios);
716*b411b363SPhilipp Reisner 	return;
717*b411b363SPhilipp Reisner 
718*b411b363SPhilipp Reisner  free_bios_submit_one_by_one:
719*b411b363SPhilipp Reisner 	/* free everything by calling the endio callback directly. */
720*b411b363SPhilipp Reisner 	for (i = 0; i < nr_elements && bios[i]; i++)
721*b411b363SPhilipp Reisner 		bio_endio(bios[i], 0);
722*b411b363SPhilipp Reisner 
723*b411b363SPhilipp Reisner 	kfree(bios);
724*b411b363SPhilipp Reisner 
725*b411b363SPhilipp Reisner  submit_one_by_one:
726*b411b363SPhilipp Reisner 	dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n");
727*b411b363SPhilipp Reisner 
728*b411b363SPhilipp Reisner 	for (i = 0; i < mdev->act_log->nr_elements; i++) {
729*b411b363SPhilipp Reisner 		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
730*b411b363SPhilipp Reisner 		if (enr == LC_FREE)
731*b411b363SPhilipp Reisner 			continue;
732*b411b363SPhilipp Reisner 		/* Really slow: if we have al-extents 16..19 active,
733*b411b363SPhilipp Reisner 		 * sector 4 will be written four times! Synchronous! */
734*b411b363SPhilipp Reisner 		drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT);
735*b411b363SPhilipp Reisner 	}
736*b411b363SPhilipp Reisner 
737*b411b363SPhilipp Reisner 	lc_unlock(mdev->act_log);
738*b411b363SPhilipp Reisner 	wake_up(&mdev->al_wait);
739*b411b363SPhilipp Reisner 	put_ldev(mdev);
740*b411b363SPhilipp Reisner }
741*b411b363SPhilipp Reisner 
742*b411b363SPhilipp Reisner /**
743*b411b363SPhilipp Reisner  * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents
744*b411b363SPhilipp Reisner  * @mdev:	DRBD device.
745*b411b363SPhilipp Reisner  */
746*b411b363SPhilipp Reisner void drbd_al_apply_to_bm(struct drbd_conf *mdev)
747*b411b363SPhilipp Reisner {
748*b411b363SPhilipp Reisner 	unsigned int enr;
749*b411b363SPhilipp Reisner 	unsigned long add = 0;
750*b411b363SPhilipp Reisner 	char ppb[10];
751*b411b363SPhilipp Reisner 	int i;
752*b411b363SPhilipp Reisner 
753*b411b363SPhilipp Reisner 	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
754*b411b363SPhilipp Reisner 
755*b411b363SPhilipp Reisner 	for (i = 0; i < mdev->act_log->nr_elements; i++) {
756*b411b363SPhilipp Reisner 		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
757*b411b363SPhilipp Reisner 		if (enr == LC_FREE)
758*b411b363SPhilipp Reisner 			continue;
759*b411b363SPhilipp Reisner 		add += drbd_bm_ALe_set_all(mdev, enr);
760*b411b363SPhilipp Reisner 	}
761*b411b363SPhilipp Reisner 
762*b411b363SPhilipp Reisner 	lc_unlock(mdev->act_log);
763*b411b363SPhilipp Reisner 	wake_up(&mdev->al_wait);
764*b411b363SPhilipp Reisner 
765*b411b363SPhilipp Reisner 	dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n",
766*b411b363SPhilipp Reisner 	     ppsize(ppb, Bit2KB(add)));
767*b411b363SPhilipp Reisner }
768*b411b363SPhilipp Reisner 
769*b411b363SPhilipp Reisner static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
770*b411b363SPhilipp Reisner {
771*b411b363SPhilipp Reisner 	int rv;
772*b411b363SPhilipp Reisner 
773*b411b363SPhilipp Reisner 	spin_lock_irq(&mdev->al_lock);
774*b411b363SPhilipp Reisner 	rv = (al_ext->refcnt == 0);
775*b411b363SPhilipp Reisner 	if (likely(rv))
776*b411b363SPhilipp Reisner 		lc_del(mdev->act_log, al_ext);
777*b411b363SPhilipp Reisner 	spin_unlock_irq(&mdev->al_lock);
778*b411b363SPhilipp Reisner 
779*b411b363SPhilipp Reisner 	return rv;
780*b411b363SPhilipp Reisner }
781*b411b363SPhilipp Reisner 
782*b411b363SPhilipp Reisner /**
783*b411b363SPhilipp Reisner  * drbd_al_shrink() - Removes all active extents form the activity log
784*b411b363SPhilipp Reisner  * @mdev:	DRBD device.
785*b411b363SPhilipp Reisner  *
786*b411b363SPhilipp Reisner  * Removes all active extents form the activity log, waiting until
787*b411b363SPhilipp Reisner  * the reference count of each entry dropped to 0 first, of course.
788*b411b363SPhilipp Reisner  *
789*b411b363SPhilipp Reisner  * You need to lock mdev->act_log with lc_try_lock() / lc_unlock()
790*b411b363SPhilipp Reisner  */
791*b411b363SPhilipp Reisner void drbd_al_shrink(struct drbd_conf *mdev)
792*b411b363SPhilipp Reisner {
793*b411b363SPhilipp Reisner 	struct lc_element *al_ext;
794*b411b363SPhilipp Reisner 	int i;
795*b411b363SPhilipp Reisner 
796*b411b363SPhilipp Reisner 	D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags));
797*b411b363SPhilipp Reisner 
798*b411b363SPhilipp Reisner 	for (i = 0; i < mdev->act_log->nr_elements; i++) {
799*b411b363SPhilipp Reisner 		al_ext = lc_element_by_index(mdev->act_log, i);
800*b411b363SPhilipp Reisner 		if (al_ext->lc_number == LC_FREE)
801*b411b363SPhilipp Reisner 			continue;
802*b411b363SPhilipp Reisner 		wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext));
803*b411b363SPhilipp Reisner 	}
804*b411b363SPhilipp Reisner 
805*b411b363SPhilipp Reisner 	wake_up(&mdev->al_wait);
806*b411b363SPhilipp Reisner }
807*b411b363SPhilipp Reisner 
808*b411b363SPhilipp Reisner static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused)
809*b411b363SPhilipp Reisner {
810*b411b363SPhilipp Reisner 	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
811*b411b363SPhilipp Reisner 
812*b411b363SPhilipp Reisner 	if (!get_ldev(mdev)) {
813*b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
814*b411b363SPhilipp Reisner 			dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n");
815*b411b363SPhilipp Reisner 		kfree(udw);
816*b411b363SPhilipp Reisner 		return 1;
817*b411b363SPhilipp Reisner 	}
818*b411b363SPhilipp Reisner 
819*b411b363SPhilipp Reisner 	drbd_bm_write_sect(mdev, udw->enr);
820*b411b363SPhilipp Reisner 	put_ldev(mdev);
821*b411b363SPhilipp Reisner 
822*b411b363SPhilipp Reisner 	kfree(udw);
823*b411b363SPhilipp Reisner 
824*b411b363SPhilipp Reisner 	if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) {
825*b411b363SPhilipp Reisner 		switch (mdev->state.conn) {
826*b411b363SPhilipp Reisner 		case C_SYNC_SOURCE:  case C_SYNC_TARGET:
827*b411b363SPhilipp Reisner 		case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T:
828*b411b363SPhilipp Reisner 			drbd_resync_finished(mdev);
829*b411b363SPhilipp Reisner 		default:
830*b411b363SPhilipp Reisner 			/* nothing to do */
831*b411b363SPhilipp Reisner 			break;
832*b411b363SPhilipp Reisner 		}
833*b411b363SPhilipp Reisner 	}
834*b411b363SPhilipp Reisner 	drbd_bcast_sync_progress(mdev);
835*b411b363SPhilipp Reisner 
836*b411b363SPhilipp Reisner 	return 1;
837*b411b363SPhilipp Reisner }
838*b411b363SPhilipp Reisner 
839*b411b363SPhilipp Reisner 
840*b411b363SPhilipp Reisner /* ATTENTION. The AL's extents are 4MB each, while the extents in the
841*b411b363SPhilipp Reisner  * resync LRU-cache are 16MB each.
842*b411b363SPhilipp Reisner  * The caller of this function has to hold an get_ldev() reference.
843*b411b363SPhilipp Reisner  *
844*b411b363SPhilipp Reisner  * TODO will be obsoleted once we have a caching lru of the on disk bitmap
845*b411b363SPhilipp Reisner  */
846*b411b363SPhilipp Reisner static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
847*b411b363SPhilipp Reisner 				      int count, int success)
848*b411b363SPhilipp Reisner {
849*b411b363SPhilipp Reisner 	struct lc_element *e;
850*b411b363SPhilipp Reisner 	struct update_odbm_work *udw;
851*b411b363SPhilipp Reisner 
852*b411b363SPhilipp Reisner 	unsigned int enr;
853*b411b363SPhilipp Reisner 
854*b411b363SPhilipp Reisner 	D_ASSERT(atomic_read(&mdev->local_cnt));
855*b411b363SPhilipp Reisner 
856*b411b363SPhilipp Reisner 	/* I simply assume that a sector/size pair never crosses
857*b411b363SPhilipp Reisner 	 * a 16 MB extent border. (Currently this is true...) */
858*b411b363SPhilipp Reisner 	enr = BM_SECT_TO_EXT(sector);
859*b411b363SPhilipp Reisner 
860*b411b363SPhilipp Reisner 	e = lc_get(mdev->resync, enr);
861*b411b363SPhilipp Reisner 	if (e) {
862*b411b363SPhilipp Reisner 		struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
863*b411b363SPhilipp Reisner 		if (ext->lce.lc_number == enr) {
864*b411b363SPhilipp Reisner 			if (success)
865*b411b363SPhilipp Reisner 				ext->rs_left -= count;
866*b411b363SPhilipp Reisner 			else
867*b411b363SPhilipp Reisner 				ext->rs_failed += count;
868*b411b363SPhilipp Reisner 			if (ext->rs_left < ext->rs_failed) {
869*b411b363SPhilipp Reisner 				dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
870*b411b363SPhilipp Reisner 				    "rs_failed=%d count=%d\n",
871*b411b363SPhilipp Reisner 				     (unsigned long long)sector,
872*b411b363SPhilipp Reisner 				     ext->lce.lc_number, ext->rs_left,
873*b411b363SPhilipp Reisner 				     ext->rs_failed, count);
874*b411b363SPhilipp Reisner 				dump_stack();
875*b411b363SPhilipp Reisner 
876*b411b363SPhilipp Reisner 				lc_put(mdev->resync, &ext->lce);
877*b411b363SPhilipp Reisner 				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
878*b411b363SPhilipp Reisner 				return;
879*b411b363SPhilipp Reisner 			}
880*b411b363SPhilipp Reisner 		} else {
881*b411b363SPhilipp Reisner 			/* Normally this element should be in the cache,
882*b411b363SPhilipp Reisner 			 * since drbd_rs_begin_io() pulled it already in.
883*b411b363SPhilipp Reisner 			 *
884*b411b363SPhilipp Reisner 			 * But maybe an application write finished, and we set
885*b411b363SPhilipp Reisner 			 * something outside the resync lru_cache in sync.
886*b411b363SPhilipp Reisner 			 */
887*b411b363SPhilipp Reisner 			int rs_left = drbd_bm_e_weight(mdev, enr);
888*b411b363SPhilipp Reisner 			if (ext->flags != 0) {
889*b411b363SPhilipp Reisner 				dev_warn(DEV, "changing resync lce: %d[%u;%02lx]"
890*b411b363SPhilipp Reisner 				     " -> %d[%u;00]\n",
891*b411b363SPhilipp Reisner 				     ext->lce.lc_number, ext->rs_left,
892*b411b363SPhilipp Reisner 				     ext->flags, enr, rs_left);
893*b411b363SPhilipp Reisner 				ext->flags = 0;
894*b411b363SPhilipp Reisner 			}
895*b411b363SPhilipp Reisner 			if (ext->rs_failed) {
896*b411b363SPhilipp Reisner 				dev_warn(DEV, "Kicking resync_lru element enr=%u "
897*b411b363SPhilipp Reisner 				     "out with rs_failed=%d\n",
898*b411b363SPhilipp Reisner 				     ext->lce.lc_number, ext->rs_failed);
899*b411b363SPhilipp Reisner 				set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
900*b411b363SPhilipp Reisner 			}
901*b411b363SPhilipp Reisner 			ext->rs_left = rs_left;
902*b411b363SPhilipp Reisner 			ext->rs_failed = success ? 0 : count;
903*b411b363SPhilipp Reisner 			lc_changed(mdev->resync, &ext->lce);
904*b411b363SPhilipp Reisner 		}
905*b411b363SPhilipp Reisner 		lc_put(mdev->resync, &ext->lce);
906*b411b363SPhilipp Reisner 		/* no race, we are within the al_lock! */
907*b411b363SPhilipp Reisner 
908*b411b363SPhilipp Reisner 		if (ext->rs_left == ext->rs_failed) {
909*b411b363SPhilipp Reisner 			ext->rs_failed = 0;
910*b411b363SPhilipp Reisner 
911*b411b363SPhilipp Reisner 			udw = kmalloc(sizeof(*udw), GFP_ATOMIC);
912*b411b363SPhilipp Reisner 			if (udw) {
913*b411b363SPhilipp Reisner 				udw->enr = ext->lce.lc_number;
914*b411b363SPhilipp Reisner 				udw->w.cb = w_update_odbm;
915*b411b363SPhilipp Reisner 				drbd_queue_work_front(&mdev->data.work, &udw->w);
916*b411b363SPhilipp Reisner 			} else {
917*b411b363SPhilipp Reisner 				dev_warn(DEV, "Could not kmalloc an udw\n");
918*b411b363SPhilipp Reisner 				set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
919*b411b363SPhilipp Reisner 			}
920*b411b363SPhilipp Reisner 		}
921*b411b363SPhilipp Reisner 	} else {
922*b411b363SPhilipp Reisner 		dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n",
923*b411b363SPhilipp Reisner 		    mdev->resync_locked,
924*b411b363SPhilipp Reisner 		    mdev->resync->nr_elements,
925*b411b363SPhilipp Reisner 		    mdev->resync->flags);
926*b411b363SPhilipp Reisner 	}
927*b411b363SPhilipp Reisner }
928*b411b363SPhilipp Reisner 
929*b411b363SPhilipp Reisner /* clear the bit corresponding to the piece of storage in question:
930*b411b363SPhilipp Reisner  * size byte of data starting from sector.  Only clear a bits of the affected
931*b411b363SPhilipp Reisner  * one ore more _aligned_ BM_BLOCK_SIZE blocks.
932*b411b363SPhilipp Reisner  *
933*b411b363SPhilipp Reisner  * called by worker on C_SYNC_TARGET and receiver on SyncSource.
934*b411b363SPhilipp Reisner  *
935*b411b363SPhilipp Reisner  */
936*b411b363SPhilipp Reisner void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
937*b411b363SPhilipp Reisner 		       const char *file, const unsigned int line)
938*b411b363SPhilipp Reisner {
939*b411b363SPhilipp Reisner 	/* Is called from worker and receiver context _only_ */
940*b411b363SPhilipp Reisner 	unsigned long sbnr, ebnr, lbnr;
941*b411b363SPhilipp Reisner 	unsigned long count = 0;
942*b411b363SPhilipp Reisner 	sector_t esector, nr_sectors;
943*b411b363SPhilipp Reisner 	int wake_up = 0;
944*b411b363SPhilipp Reisner 	unsigned long flags;
945*b411b363SPhilipp Reisner 
946*b411b363SPhilipp Reisner 	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
947*b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
948*b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
949*b411b363SPhilipp Reisner 		return;
950*b411b363SPhilipp Reisner 	}
951*b411b363SPhilipp Reisner 	nr_sectors = drbd_get_capacity(mdev->this_bdev);
952*b411b363SPhilipp Reisner 	esector = sector + (size >> 9) - 1;
953*b411b363SPhilipp Reisner 
954*b411b363SPhilipp Reisner 	ERR_IF(sector >= nr_sectors) return;
955*b411b363SPhilipp Reisner 	ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
956*b411b363SPhilipp Reisner 
957*b411b363SPhilipp Reisner 	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
958*b411b363SPhilipp Reisner 
959*b411b363SPhilipp Reisner 	/* we clear it (in sync).
960*b411b363SPhilipp Reisner 	 * round up start sector, round down end sector.  we make sure we only
961*b411b363SPhilipp Reisner 	 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
962*b411b363SPhilipp Reisner 	if (unlikely(esector < BM_SECT_PER_BIT-1))
963*b411b363SPhilipp Reisner 		return;
964*b411b363SPhilipp Reisner 	if (unlikely(esector == (nr_sectors-1)))
965*b411b363SPhilipp Reisner 		ebnr = lbnr;
966*b411b363SPhilipp Reisner 	else
967*b411b363SPhilipp Reisner 		ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
968*b411b363SPhilipp Reisner 	sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
969*b411b363SPhilipp Reisner 
970*b411b363SPhilipp Reisner 	trace_drbd_resync(mdev, TRACE_LVL_METRICS,
971*b411b363SPhilipp Reisner 			  "drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n",
972*b411b363SPhilipp Reisner 			  (unsigned long long)sector, size, sbnr, ebnr);
973*b411b363SPhilipp Reisner 
974*b411b363SPhilipp Reisner 	if (sbnr > ebnr)
975*b411b363SPhilipp Reisner 		return;
976*b411b363SPhilipp Reisner 
977*b411b363SPhilipp Reisner 	/*
978*b411b363SPhilipp Reisner 	 * ok, (capacity & 7) != 0 sometimes, but who cares...
979*b411b363SPhilipp Reisner 	 * we count rs_{total,left} in bits, not sectors.
980*b411b363SPhilipp Reisner 	 */
981*b411b363SPhilipp Reisner 	spin_lock_irqsave(&mdev->al_lock, flags);
982*b411b363SPhilipp Reisner 	count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
983*b411b363SPhilipp Reisner 	if (count) {
984*b411b363SPhilipp Reisner 		/* we need the lock for drbd_try_clear_on_disk_bm */
985*b411b363SPhilipp Reisner 		if (jiffies - mdev->rs_mark_time > HZ*10) {
986*b411b363SPhilipp Reisner 			/* should be rolling marks,
987*b411b363SPhilipp Reisner 			 * but we estimate only anyways. */
988*b411b363SPhilipp Reisner 			if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) &&
989*b411b363SPhilipp Reisner 			    mdev->state.conn != C_PAUSED_SYNC_T &&
990*b411b363SPhilipp Reisner 			    mdev->state.conn != C_PAUSED_SYNC_S) {
991*b411b363SPhilipp Reisner 				mdev->rs_mark_time = jiffies;
992*b411b363SPhilipp Reisner 				mdev->rs_mark_left = drbd_bm_total_weight(mdev);
993*b411b363SPhilipp Reisner 			}
994*b411b363SPhilipp Reisner 		}
995*b411b363SPhilipp Reisner 		if (get_ldev(mdev)) {
996*b411b363SPhilipp Reisner 			drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
997*b411b363SPhilipp Reisner 			put_ldev(mdev);
998*b411b363SPhilipp Reisner 		}
999*b411b363SPhilipp Reisner 		/* just wake_up unconditional now, various lc_chaged(),
1000*b411b363SPhilipp Reisner 		 * lc_put() in drbd_try_clear_on_disk_bm(). */
1001*b411b363SPhilipp Reisner 		wake_up = 1;
1002*b411b363SPhilipp Reisner 	}
1003*b411b363SPhilipp Reisner 	spin_unlock_irqrestore(&mdev->al_lock, flags);
1004*b411b363SPhilipp Reisner 	if (wake_up)
1005*b411b363SPhilipp Reisner 		wake_up(&mdev->al_wait);
1006*b411b363SPhilipp Reisner }
1007*b411b363SPhilipp Reisner 
1008*b411b363SPhilipp Reisner /*
1009*b411b363SPhilipp Reisner  * this is intended to set one request worth of data out of sync.
1010*b411b363SPhilipp Reisner  * affects at least 1 bit,
1011*b411b363SPhilipp Reisner  * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits.
1012*b411b363SPhilipp Reisner  *
1013*b411b363SPhilipp Reisner  * called by tl_clear and drbd_send_dblock (==drbd_make_request).
1014*b411b363SPhilipp Reisner  * so this can be _any_ process.
1015*b411b363SPhilipp Reisner  */
1016*b411b363SPhilipp Reisner void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
1017*b411b363SPhilipp Reisner 			    const char *file, const unsigned int line)
1018*b411b363SPhilipp Reisner {
1019*b411b363SPhilipp Reisner 	unsigned long sbnr, ebnr, lbnr, flags;
1020*b411b363SPhilipp Reisner 	sector_t esector, nr_sectors;
1021*b411b363SPhilipp Reisner 	unsigned int enr, count;
1022*b411b363SPhilipp Reisner 	struct lc_element *e;
1023*b411b363SPhilipp Reisner 
1024*b411b363SPhilipp Reisner 	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
1025*b411b363SPhilipp Reisner 		dev_err(DEV, "sector: %llus, size: %d\n",
1026*b411b363SPhilipp Reisner 			(unsigned long long)sector, size);
1027*b411b363SPhilipp Reisner 		return;
1028*b411b363SPhilipp Reisner 	}
1029*b411b363SPhilipp Reisner 
1030*b411b363SPhilipp Reisner 	if (!get_ldev(mdev))
1031*b411b363SPhilipp Reisner 		return; /* no disk, no metadata, no bitmap to set bits in */
1032*b411b363SPhilipp Reisner 
1033*b411b363SPhilipp Reisner 	nr_sectors = drbd_get_capacity(mdev->this_bdev);
1034*b411b363SPhilipp Reisner 	esector = sector + (size >> 9) - 1;
1035*b411b363SPhilipp Reisner 
1036*b411b363SPhilipp Reisner 	ERR_IF(sector >= nr_sectors)
1037*b411b363SPhilipp Reisner 		goto out;
1038*b411b363SPhilipp Reisner 	ERR_IF(esector >= nr_sectors)
1039*b411b363SPhilipp Reisner 		esector = (nr_sectors-1);
1040*b411b363SPhilipp Reisner 
1041*b411b363SPhilipp Reisner 	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
1042*b411b363SPhilipp Reisner 
1043*b411b363SPhilipp Reisner 	/* we set it out of sync,
1044*b411b363SPhilipp Reisner 	 * we do not need to round anything here */
1045*b411b363SPhilipp Reisner 	sbnr = BM_SECT_TO_BIT(sector);
1046*b411b363SPhilipp Reisner 	ebnr = BM_SECT_TO_BIT(esector);
1047*b411b363SPhilipp Reisner 
1048*b411b363SPhilipp Reisner 	trace_drbd_resync(mdev, TRACE_LVL_METRICS,
1049*b411b363SPhilipp Reisner 			  "drbd_set_out_of_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n",
1050*b411b363SPhilipp Reisner 			  (unsigned long long)sector, size, sbnr, ebnr);
1051*b411b363SPhilipp Reisner 
1052*b411b363SPhilipp Reisner 	/* ok, (capacity & 7) != 0 sometimes, but who cares...
1053*b411b363SPhilipp Reisner 	 * we count rs_{total,left} in bits, not sectors.  */
1054*b411b363SPhilipp Reisner 	spin_lock_irqsave(&mdev->al_lock, flags);
1055*b411b363SPhilipp Reisner 	count = drbd_bm_set_bits(mdev, sbnr, ebnr);
1056*b411b363SPhilipp Reisner 
1057*b411b363SPhilipp Reisner 	enr = BM_SECT_TO_EXT(sector);
1058*b411b363SPhilipp Reisner 	e = lc_find(mdev->resync, enr);
1059*b411b363SPhilipp Reisner 	if (e)
1060*b411b363SPhilipp Reisner 		lc_entry(e, struct bm_extent, lce)->rs_left += count;
1061*b411b363SPhilipp Reisner 	spin_unlock_irqrestore(&mdev->al_lock, flags);
1062*b411b363SPhilipp Reisner 
1063*b411b363SPhilipp Reisner out:
1064*b411b363SPhilipp Reisner 	put_ldev(mdev);
1065*b411b363SPhilipp Reisner }
1066*b411b363SPhilipp Reisner 
1067*b411b363SPhilipp Reisner static
1068*b411b363SPhilipp Reisner struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
1069*b411b363SPhilipp Reisner {
1070*b411b363SPhilipp Reisner 	struct lc_element *e;
1071*b411b363SPhilipp Reisner 	struct bm_extent *bm_ext;
1072*b411b363SPhilipp Reisner 	int wakeup = 0;
1073*b411b363SPhilipp Reisner 	unsigned long rs_flags;
1074*b411b363SPhilipp Reisner 
1075*b411b363SPhilipp Reisner 	spin_lock_irq(&mdev->al_lock);
1076*b411b363SPhilipp Reisner 	if (mdev->resync_locked > mdev->resync->nr_elements/2) {
1077*b411b363SPhilipp Reisner 		spin_unlock_irq(&mdev->al_lock);
1078*b411b363SPhilipp Reisner 		return NULL;
1079*b411b363SPhilipp Reisner 	}
1080*b411b363SPhilipp Reisner 	e = lc_get(mdev->resync, enr);
1081*b411b363SPhilipp Reisner 	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1082*b411b363SPhilipp Reisner 	if (bm_ext) {
1083*b411b363SPhilipp Reisner 		if (bm_ext->lce.lc_number != enr) {
1084*b411b363SPhilipp Reisner 			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
1085*b411b363SPhilipp Reisner 			bm_ext->rs_failed = 0;
1086*b411b363SPhilipp Reisner 			lc_changed(mdev->resync, &bm_ext->lce);
1087*b411b363SPhilipp Reisner 			wakeup = 1;
1088*b411b363SPhilipp Reisner 		}
1089*b411b363SPhilipp Reisner 		if (bm_ext->lce.refcnt == 1)
1090*b411b363SPhilipp Reisner 			mdev->resync_locked++;
1091*b411b363SPhilipp Reisner 		set_bit(BME_NO_WRITES, &bm_ext->flags);
1092*b411b363SPhilipp Reisner 	}
1093*b411b363SPhilipp Reisner 	rs_flags = mdev->resync->flags;
1094*b411b363SPhilipp Reisner 	spin_unlock_irq(&mdev->al_lock);
1095*b411b363SPhilipp Reisner 	if (wakeup)
1096*b411b363SPhilipp Reisner 		wake_up(&mdev->al_wait);
1097*b411b363SPhilipp Reisner 
1098*b411b363SPhilipp Reisner 	if (!bm_ext) {
1099*b411b363SPhilipp Reisner 		if (rs_flags & LC_STARVING)
1100*b411b363SPhilipp Reisner 			dev_warn(DEV, "Have to wait for element"
1101*b411b363SPhilipp Reisner 			     " (resync LRU too small?)\n");
1102*b411b363SPhilipp Reisner 		BUG_ON(rs_flags & LC_DIRTY);
1103*b411b363SPhilipp Reisner 	}
1104*b411b363SPhilipp Reisner 
1105*b411b363SPhilipp Reisner 	return bm_ext;
1106*b411b363SPhilipp Reisner }
1107*b411b363SPhilipp Reisner 
1108*b411b363SPhilipp Reisner static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
1109*b411b363SPhilipp Reisner {
1110*b411b363SPhilipp Reisner 	struct lc_element *al_ext;
1111*b411b363SPhilipp Reisner 	int rv = 0;
1112*b411b363SPhilipp Reisner 
1113*b411b363SPhilipp Reisner 	spin_lock_irq(&mdev->al_lock);
1114*b411b363SPhilipp Reisner 	if (unlikely(enr == mdev->act_log->new_number))
1115*b411b363SPhilipp Reisner 		rv = 1;
1116*b411b363SPhilipp Reisner 	else {
1117*b411b363SPhilipp Reisner 		al_ext = lc_find(mdev->act_log, enr);
1118*b411b363SPhilipp Reisner 		if (al_ext) {
1119*b411b363SPhilipp Reisner 			if (al_ext->refcnt)
1120*b411b363SPhilipp Reisner 				rv = 1;
1121*b411b363SPhilipp Reisner 		}
1122*b411b363SPhilipp Reisner 	}
1123*b411b363SPhilipp Reisner 	spin_unlock_irq(&mdev->al_lock);
1124*b411b363SPhilipp Reisner 
1125*b411b363SPhilipp Reisner 	/*
1126*b411b363SPhilipp Reisner 	if (unlikely(rv)) {
1127*b411b363SPhilipp Reisner 		dev_info(DEV, "Delaying sync read until app's write is done\n");
1128*b411b363SPhilipp Reisner 	}
1129*b411b363SPhilipp Reisner 	*/
1130*b411b363SPhilipp Reisner 	return rv;
1131*b411b363SPhilipp Reisner }
1132*b411b363SPhilipp Reisner 
1133*b411b363SPhilipp Reisner /**
1134*b411b363SPhilipp Reisner  * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED
1135*b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1136*b411b363SPhilipp Reisner  * @sector:	The sector number.
1137*b411b363SPhilipp Reisner  *
1138*b411b363SPhilipp Reisner  * This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted.
1139*b411b363SPhilipp Reisner  */
1140*b411b363SPhilipp Reisner int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
1141*b411b363SPhilipp Reisner {
1142*b411b363SPhilipp Reisner 	unsigned int enr = BM_SECT_TO_EXT(sector);
1143*b411b363SPhilipp Reisner 	struct bm_extent *bm_ext;
1144*b411b363SPhilipp Reisner 	int i, sig;
1145*b411b363SPhilipp Reisner 
1146*b411b363SPhilipp Reisner 	trace_drbd_resync(mdev, TRACE_LVL_ALL,
1147*b411b363SPhilipp Reisner 			  "drbd_rs_begin_io: sector=%llus (rs_end=%d)\n",
1148*b411b363SPhilipp Reisner 			  (unsigned long long)sector, enr);
1149*b411b363SPhilipp Reisner 
1150*b411b363SPhilipp Reisner 	sig = wait_event_interruptible(mdev->al_wait,
1151*b411b363SPhilipp Reisner 			(bm_ext = _bme_get(mdev, enr)));
1152*b411b363SPhilipp Reisner 	if (sig)
1153*b411b363SPhilipp Reisner 		return 0;
1154*b411b363SPhilipp Reisner 
1155*b411b363SPhilipp Reisner 	if (test_bit(BME_LOCKED, &bm_ext->flags))
1156*b411b363SPhilipp Reisner 		return 1;
1157*b411b363SPhilipp Reisner 
1158*b411b363SPhilipp Reisner 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
1159*b411b363SPhilipp Reisner 		sig = wait_event_interruptible(mdev->al_wait,
1160*b411b363SPhilipp Reisner 				!_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i));
1161*b411b363SPhilipp Reisner 		if (sig) {
1162*b411b363SPhilipp Reisner 			spin_lock_irq(&mdev->al_lock);
1163*b411b363SPhilipp Reisner 			if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
1164*b411b363SPhilipp Reisner 				clear_bit(BME_NO_WRITES, &bm_ext->flags);
1165*b411b363SPhilipp Reisner 				mdev->resync_locked--;
1166*b411b363SPhilipp Reisner 				wake_up(&mdev->al_wait);
1167*b411b363SPhilipp Reisner 			}
1168*b411b363SPhilipp Reisner 			spin_unlock_irq(&mdev->al_lock);
1169*b411b363SPhilipp Reisner 			return 0;
1170*b411b363SPhilipp Reisner 		}
1171*b411b363SPhilipp Reisner 	}
1172*b411b363SPhilipp Reisner 
1173*b411b363SPhilipp Reisner 	set_bit(BME_LOCKED, &bm_ext->flags);
1174*b411b363SPhilipp Reisner 
1175*b411b363SPhilipp Reisner 	return 1;
1176*b411b363SPhilipp Reisner }
1177*b411b363SPhilipp Reisner 
1178*b411b363SPhilipp Reisner /**
1179*b411b363SPhilipp Reisner  * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep
1180*b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1181*b411b363SPhilipp Reisner  * @sector:	The sector number.
1182*b411b363SPhilipp Reisner  *
1183*b411b363SPhilipp Reisner  * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then
1184*b411b363SPhilipp Reisner  * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
1185*b411b363SPhilipp Reisner  * if there is still application IO going on in this area.
1186*b411b363SPhilipp Reisner  */
1187*b411b363SPhilipp Reisner int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
1188*b411b363SPhilipp Reisner {
1189*b411b363SPhilipp Reisner 	unsigned int enr = BM_SECT_TO_EXT(sector);
1190*b411b363SPhilipp Reisner 	const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
1191*b411b363SPhilipp Reisner 	struct lc_element *e;
1192*b411b363SPhilipp Reisner 	struct bm_extent *bm_ext;
1193*b411b363SPhilipp Reisner 	int i;
1194*b411b363SPhilipp Reisner 
1195*b411b363SPhilipp Reisner 	trace_drbd_resync(mdev, TRACE_LVL_ALL, "drbd_try_rs_begin_io: sector=%llus\n",
1196*b411b363SPhilipp Reisner 			  (unsigned long long)sector);
1197*b411b363SPhilipp Reisner 
1198*b411b363SPhilipp Reisner 	spin_lock_irq(&mdev->al_lock);
1199*b411b363SPhilipp Reisner 	if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) {
1200*b411b363SPhilipp Reisner 		/* in case you have very heavy scattered io, it may
1201*b411b363SPhilipp Reisner 		 * stall the syncer undefined if we give up the ref count
1202*b411b363SPhilipp Reisner 		 * when we try again and requeue.
1203*b411b363SPhilipp Reisner 		 *
1204*b411b363SPhilipp Reisner 		 * if we don't give up the refcount, but the next time
1205*b411b363SPhilipp Reisner 		 * we are scheduled this extent has been "synced" by new
1206*b411b363SPhilipp Reisner 		 * application writes, we'd miss the lc_put on the
1207*b411b363SPhilipp Reisner 		 * extent we keep the refcount on.
1208*b411b363SPhilipp Reisner 		 * so we remembered which extent we had to try again, and
1209*b411b363SPhilipp Reisner 		 * if the next requested one is something else, we do
1210*b411b363SPhilipp Reisner 		 * the lc_put here...
1211*b411b363SPhilipp Reisner 		 * we also have to wake_up
1212*b411b363SPhilipp Reisner 		 */
1213*b411b363SPhilipp Reisner 
1214*b411b363SPhilipp Reisner 		trace_drbd_resync(mdev, TRACE_LVL_ALL,
1215*b411b363SPhilipp Reisner 				  "dropping %u, apparently got 'synced' by application io\n",
1216*b411b363SPhilipp Reisner 				  mdev->resync_wenr);
1217*b411b363SPhilipp Reisner 
1218*b411b363SPhilipp Reisner 		e = lc_find(mdev->resync, mdev->resync_wenr);
1219*b411b363SPhilipp Reisner 		bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1220*b411b363SPhilipp Reisner 		if (bm_ext) {
1221*b411b363SPhilipp Reisner 			D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
1222*b411b363SPhilipp Reisner 			D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
1223*b411b363SPhilipp Reisner 			clear_bit(BME_NO_WRITES, &bm_ext->flags);
1224*b411b363SPhilipp Reisner 			mdev->resync_wenr = LC_FREE;
1225*b411b363SPhilipp Reisner 			if (lc_put(mdev->resync, &bm_ext->lce) == 0)
1226*b411b363SPhilipp Reisner 				mdev->resync_locked--;
1227*b411b363SPhilipp Reisner 			wake_up(&mdev->al_wait);
1228*b411b363SPhilipp Reisner 		} else {
1229*b411b363SPhilipp Reisner 			dev_alert(DEV, "LOGIC BUG\n");
1230*b411b363SPhilipp Reisner 		}
1231*b411b363SPhilipp Reisner 	}
1232*b411b363SPhilipp Reisner 	/* TRY. */
1233*b411b363SPhilipp Reisner 	e = lc_try_get(mdev->resync, enr);
1234*b411b363SPhilipp Reisner 	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1235*b411b363SPhilipp Reisner 	if (bm_ext) {
1236*b411b363SPhilipp Reisner 		if (test_bit(BME_LOCKED, &bm_ext->flags))
1237*b411b363SPhilipp Reisner 			goto proceed;
1238*b411b363SPhilipp Reisner 		if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) {
1239*b411b363SPhilipp Reisner 			mdev->resync_locked++;
1240*b411b363SPhilipp Reisner 		} else {
1241*b411b363SPhilipp Reisner 			/* we did set the BME_NO_WRITES,
1242*b411b363SPhilipp Reisner 			 * but then could not set BME_LOCKED,
1243*b411b363SPhilipp Reisner 			 * so we tried again.
1244*b411b363SPhilipp Reisner 			 * drop the extra reference. */
1245*b411b363SPhilipp Reisner 			trace_drbd_resync(mdev, TRACE_LVL_ALL,
1246*b411b363SPhilipp Reisner 					  "dropping extra reference on %u\n", enr);
1247*b411b363SPhilipp Reisner 
1248*b411b363SPhilipp Reisner 			bm_ext->lce.refcnt--;
1249*b411b363SPhilipp Reisner 			D_ASSERT(bm_ext->lce.refcnt > 0);
1250*b411b363SPhilipp Reisner 		}
1251*b411b363SPhilipp Reisner 		goto check_al;
1252*b411b363SPhilipp Reisner 	} else {
1253*b411b363SPhilipp Reisner 		/* do we rather want to try later? */
1254*b411b363SPhilipp Reisner 		if (mdev->resync_locked > mdev->resync->nr_elements-3) {
1255*b411b363SPhilipp Reisner 			trace_drbd_resync(mdev, TRACE_LVL_ALL,
1256*b411b363SPhilipp Reisner 					  "resync_locked = %u!\n", mdev->resync_locked);
1257*b411b363SPhilipp Reisner 
1258*b411b363SPhilipp Reisner 			goto try_again;
1259*b411b363SPhilipp Reisner 		}
1260*b411b363SPhilipp Reisner 		/* Do or do not. There is no try. -- Yoda */
1261*b411b363SPhilipp Reisner 		e = lc_get(mdev->resync, enr);
1262*b411b363SPhilipp Reisner 		bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1263*b411b363SPhilipp Reisner 		if (!bm_ext) {
1264*b411b363SPhilipp Reisner 			const unsigned long rs_flags = mdev->resync->flags;
1265*b411b363SPhilipp Reisner 			if (rs_flags & LC_STARVING)
1266*b411b363SPhilipp Reisner 				dev_warn(DEV, "Have to wait for element"
1267*b411b363SPhilipp Reisner 				     " (resync LRU too small?)\n");
1268*b411b363SPhilipp Reisner 			BUG_ON(rs_flags & LC_DIRTY);
1269*b411b363SPhilipp Reisner 			goto try_again;
1270*b411b363SPhilipp Reisner 		}
1271*b411b363SPhilipp Reisner 		if (bm_ext->lce.lc_number != enr) {
1272*b411b363SPhilipp Reisner 			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
1273*b411b363SPhilipp Reisner 			bm_ext->rs_failed = 0;
1274*b411b363SPhilipp Reisner 			lc_changed(mdev->resync, &bm_ext->lce);
1275*b411b363SPhilipp Reisner 			wake_up(&mdev->al_wait);
1276*b411b363SPhilipp Reisner 			D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
1277*b411b363SPhilipp Reisner 		}
1278*b411b363SPhilipp Reisner 		set_bit(BME_NO_WRITES, &bm_ext->flags);
1279*b411b363SPhilipp Reisner 		D_ASSERT(bm_ext->lce.refcnt == 1);
1280*b411b363SPhilipp Reisner 		mdev->resync_locked++;
1281*b411b363SPhilipp Reisner 		goto check_al;
1282*b411b363SPhilipp Reisner 	}
1283*b411b363SPhilipp Reisner check_al:
1284*b411b363SPhilipp Reisner 	trace_drbd_resync(mdev, TRACE_LVL_ALL, "checking al for %u\n", enr);
1285*b411b363SPhilipp Reisner 
1286*b411b363SPhilipp Reisner 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
1287*b411b363SPhilipp Reisner 		if (unlikely(al_enr+i == mdev->act_log->new_number))
1288*b411b363SPhilipp Reisner 			goto try_again;
1289*b411b363SPhilipp Reisner 		if (lc_is_used(mdev->act_log, al_enr+i))
1290*b411b363SPhilipp Reisner 			goto try_again;
1291*b411b363SPhilipp Reisner 	}
1292*b411b363SPhilipp Reisner 	set_bit(BME_LOCKED, &bm_ext->flags);
1293*b411b363SPhilipp Reisner proceed:
1294*b411b363SPhilipp Reisner 	mdev->resync_wenr = LC_FREE;
1295*b411b363SPhilipp Reisner 	spin_unlock_irq(&mdev->al_lock);
1296*b411b363SPhilipp Reisner 	return 0;
1297*b411b363SPhilipp Reisner 
1298*b411b363SPhilipp Reisner try_again:
1299*b411b363SPhilipp Reisner 	trace_drbd_resync(mdev, TRACE_LVL_ALL, "need to try again for %u\n", enr);
1300*b411b363SPhilipp Reisner 	if (bm_ext)
1301*b411b363SPhilipp Reisner 		mdev->resync_wenr = enr;
1302*b411b363SPhilipp Reisner 	spin_unlock_irq(&mdev->al_lock);
1303*b411b363SPhilipp Reisner 	return -EAGAIN;
1304*b411b363SPhilipp Reisner }
1305*b411b363SPhilipp Reisner 
1306*b411b363SPhilipp Reisner void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
1307*b411b363SPhilipp Reisner {
1308*b411b363SPhilipp Reisner 	unsigned int enr = BM_SECT_TO_EXT(sector);
1309*b411b363SPhilipp Reisner 	struct lc_element *e;
1310*b411b363SPhilipp Reisner 	struct bm_extent *bm_ext;
1311*b411b363SPhilipp Reisner 	unsigned long flags;
1312*b411b363SPhilipp Reisner 
1313*b411b363SPhilipp Reisner 	trace_drbd_resync(mdev, TRACE_LVL_ALL,
1314*b411b363SPhilipp Reisner 			  "drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n",
1315*b411b363SPhilipp Reisner 			  (long long)sector, enr);
1316*b411b363SPhilipp Reisner 
1317*b411b363SPhilipp Reisner 	spin_lock_irqsave(&mdev->al_lock, flags);
1318*b411b363SPhilipp Reisner 	e = lc_find(mdev->resync, enr);
1319*b411b363SPhilipp Reisner 	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1320*b411b363SPhilipp Reisner 	if (!bm_ext) {
1321*b411b363SPhilipp Reisner 		spin_unlock_irqrestore(&mdev->al_lock, flags);
1322*b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1323*b411b363SPhilipp Reisner 			dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n");
1324*b411b363SPhilipp Reisner 		return;
1325*b411b363SPhilipp Reisner 	}
1326*b411b363SPhilipp Reisner 
1327*b411b363SPhilipp Reisner 	if (bm_ext->lce.refcnt == 0) {
1328*b411b363SPhilipp Reisner 		spin_unlock_irqrestore(&mdev->al_lock, flags);
1329*b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, "
1330*b411b363SPhilipp Reisner 		    "but refcnt is 0!?\n",
1331*b411b363SPhilipp Reisner 		    (unsigned long long)sector, enr);
1332*b411b363SPhilipp Reisner 		return;
1333*b411b363SPhilipp Reisner 	}
1334*b411b363SPhilipp Reisner 
1335*b411b363SPhilipp Reisner 	if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
1336*b411b363SPhilipp Reisner 		clear_bit(BME_LOCKED, &bm_ext->flags);
1337*b411b363SPhilipp Reisner 		clear_bit(BME_NO_WRITES, &bm_ext->flags);
1338*b411b363SPhilipp Reisner 		mdev->resync_locked--;
1339*b411b363SPhilipp Reisner 		wake_up(&mdev->al_wait);
1340*b411b363SPhilipp Reisner 	}
1341*b411b363SPhilipp Reisner 
1342*b411b363SPhilipp Reisner 	spin_unlock_irqrestore(&mdev->al_lock, flags);
1343*b411b363SPhilipp Reisner }
1344*b411b363SPhilipp Reisner 
1345*b411b363SPhilipp Reisner /**
1346*b411b363SPhilipp Reisner  * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED)
1347*b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1348*b411b363SPhilipp Reisner  */
1349*b411b363SPhilipp Reisner void drbd_rs_cancel_all(struct drbd_conf *mdev)
1350*b411b363SPhilipp Reisner {
1351*b411b363SPhilipp Reisner 	trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_cancel_all\n");
1352*b411b363SPhilipp Reisner 
1353*b411b363SPhilipp Reisner 	spin_lock_irq(&mdev->al_lock);
1354*b411b363SPhilipp Reisner 
1355*b411b363SPhilipp Reisner 	if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */
1356*b411b363SPhilipp Reisner 		lc_reset(mdev->resync);
1357*b411b363SPhilipp Reisner 		put_ldev(mdev);
1358*b411b363SPhilipp Reisner 	}
1359*b411b363SPhilipp Reisner 	mdev->resync_locked = 0;
1360*b411b363SPhilipp Reisner 	mdev->resync_wenr = LC_FREE;
1361*b411b363SPhilipp Reisner 	spin_unlock_irq(&mdev->al_lock);
1362*b411b363SPhilipp Reisner 	wake_up(&mdev->al_wait);
1363*b411b363SPhilipp Reisner }
1364*b411b363SPhilipp Reisner 
1365*b411b363SPhilipp Reisner /**
1366*b411b363SPhilipp Reisner  * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU
1367*b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1368*b411b363SPhilipp Reisner  *
1369*b411b363SPhilipp Reisner  * Returns 0 upon success, -EAGAIN if at least one reference count was
1370*b411b363SPhilipp Reisner  * not zero.
1371*b411b363SPhilipp Reisner  */
1372*b411b363SPhilipp Reisner int drbd_rs_del_all(struct drbd_conf *mdev)
1373*b411b363SPhilipp Reisner {
1374*b411b363SPhilipp Reisner 	struct lc_element *e;
1375*b411b363SPhilipp Reisner 	struct bm_extent *bm_ext;
1376*b411b363SPhilipp Reisner 	int i;
1377*b411b363SPhilipp Reisner 
1378*b411b363SPhilipp Reisner 	trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_del_all\n");
1379*b411b363SPhilipp Reisner 
1380*b411b363SPhilipp Reisner 	spin_lock_irq(&mdev->al_lock);
1381*b411b363SPhilipp Reisner 
1382*b411b363SPhilipp Reisner 	if (get_ldev_if_state(mdev, D_FAILED)) {
1383*b411b363SPhilipp Reisner 		/* ok, ->resync is there. */
1384*b411b363SPhilipp Reisner 		for (i = 0; i < mdev->resync->nr_elements; i++) {
1385*b411b363SPhilipp Reisner 			e = lc_element_by_index(mdev->resync, i);
1386*b411b363SPhilipp Reisner 			bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1387*b411b363SPhilipp Reisner 			if (bm_ext->lce.lc_number == LC_FREE)
1388*b411b363SPhilipp Reisner 				continue;
1389*b411b363SPhilipp Reisner 			if (bm_ext->lce.lc_number == mdev->resync_wenr) {
1390*b411b363SPhilipp Reisner 				dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently"
1391*b411b363SPhilipp Reisner 				     " got 'synced' by application io\n",
1392*b411b363SPhilipp Reisner 				     mdev->resync_wenr);
1393*b411b363SPhilipp Reisner 				D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
1394*b411b363SPhilipp Reisner 				D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
1395*b411b363SPhilipp Reisner 				clear_bit(BME_NO_WRITES, &bm_ext->flags);
1396*b411b363SPhilipp Reisner 				mdev->resync_wenr = LC_FREE;
1397*b411b363SPhilipp Reisner 				lc_put(mdev->resync, &bm_ext->lce);
1398*b411b363SPhilipp Reisner 			}
1399*b411b363SPhilipp Reisner 			if (bm_ext->lce.refcnt != 0) {
1400*b411b363SPhilipp Reisner 				dev_info(DEV, "Retrying drbd_rs_del_all() later. "
1401*b411b363SPhilipp Reisner 				     "refcnt=%d\n", bm_ext->lce.refcnt);
1402*b411b363SPhilipp Reisner 				put_ldev(mdev);
1403*b411b363SPhilipp Reisner 				spin_unlock_irq(&mdev->al_lock);
1404*b411b363SPhilipp Reisner 				return -EAGAIN;
1405*b411b363SPhilipp Reisner 			}
1406*b411b363SPhilipp Reisner 			D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
1407*b411b363SPhilipp Reisner 			D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags));
1408*b411b363SPhilipp Reisner 			lc_del(mdev->resync, &bm_ext->lce);
1409*b411b363SPhilipp Reisner 		}
1410*b411b363SPhilipp Reisner 		D_ASSERT(mdev->resync->used == 0);
1411*b411b363SPhilipp Reisner 		put_ldev(mdev);
1412*b411b363SPhilipp Reisner 	}
1413*b411b363SPhilipp Reisner 	spin_unlock_irq(&mdev->al_lock);
1414*b411b363SPhilipp Reisner 
1415*b411b363SPhilipp Reisner 	return 0;
1416*b411b363SPhilipp Reisner }
1417*b411b363SPhilipp Reisner 
1418*b411b363SPhilipp Reisner /**
1419*b411b363SPhilipp Reisner  * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
1420*b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1421*b411b363SPhilipp Reisner  * @sector:	The sector number.
1422*b411b363SPhilipp Reisner  * @size:	Size of failed IO operation, in byte.
1423*b411b363SPhilipp Reisner  */
1424*b411b363SPhilipp Reisner void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
1425*b411b363SPhilipp Reisner {
1426*b411b363SPhilipp Reisner 	/* Is called from worker and receiver context _only_ */
1427*b411b363SPhilipp Reisner 	unsigned long sbnr, ebnr, lbnr;
1428*b411b363SPhilipp Reisner 	unsigned long count;
1429*b411b363SPhilipp Reisner 	sector_t esector, nr_sectors;
1430*b411b363SPhilipp Reisner 	int wake_up = 0;
1431*b411b363SPhilipp Reisner 
1432*b411b363SPhilipp Reisner 	trace_drbd_resync(mdev, TRACE_LVL_SUMMARY,
1433*b411b363SPhilipp Reisner 			  "drbd_rs_failed_io: sector=%llus, size=%u\n",
1434*b411b363SPhilipp Reisner 			  (unsigned long long)sector, size);
1435*b411b363SPhilipp Reisner 
1436*b411b363SPhilipp Reisner 	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
1437*b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
1438*b411b363SPhilipp Reisner 				(unsigned long long)sector, size);
1439*b411b363SPhilipp Reisner 		return;
1440*b411b363SPhilipp Reisner 	}
1441*b411b363SPhilipp Reisner 	nr_sectors = drbd_get_capacity(mdev->this_bdev);
1442*b411b363SPhilipp Reisner 	esector = sector + (size >> 9) - 1;
1443*b411b363SPhilipp Reisner 
1444*b411b363SPhilipp Reisner 	ERR_IF(sector >= nr_sectors) return;
1445*b411b363SPhilipp Reisner 	ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
1446*b411b363SPhilipp Reisner 
1447*b411b363SPhilipp Reisner 	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
1448*b411b363SPhilipp Reisner 
1449*b411b363SPhilipp Reisner 	/*
1450*b411b363SPhilipp Reisner 	 * round up start sector, round down end sector.  we make sure we only
1451*b411b363SPhilipp Reisner 	 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
1452*b411b363SPhilipp Reisner 	if (unlikely(esector < BM_SECT_PER_BIT-1))
1453*b411b363SPhilipp Reisner 		return;
1454*b411b363SPhilipp Reisner 	if (unlikely(esector == (nr_sectors-1)))
1455*b411b363SPhilipp Reisner 		ebnr = lbnr;
1456*b411b363SPhilipp Reisner 	else
1457*b411b363SPhilipp Reisner 		ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
1458*b411b363SPhilipp Reisner 	sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
1459*b411b363SPhilipp Reisner 
1460*b411b363SPhilipp Reisner 	if (sbnr > ebnr)
1461*b411b363SPhilipp Reisner 		return;
1462*b411b363SPhilipp Reisner 
1463*b411b363SPhilipp Reisner 	/*
1464*b411b363SPhilipp Reisner 	 * ok, (capacity & 7) != 0 sometimes, but who cares...
1465*b411b363SPhilipp Reisner 	 * we count rs_{total,left} in bits, not sectors.
1466*b411b363SPhilipp Reisner 	 */
1467*b411b363SPhilipp Reisner 	spin_lock_irq(&mdev->al_lock);
1468*b411b363SPhilipp Reisner 	count = drbd_bm_count_bits(mdev, sbnr, ebnr);
1469*b411b363SPhilipp Reisner 	if (count) {
1470*b411b363SPhilipp Reisner 		mdev->rs_failed += count;
1471*b411b363SPhilipp Reisner 
1472*b411b363SPhilipp Reisner 		if (get_ldev(mdev)) {
1473*b411b363SPhilipp Reisner 			drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE);
1474*b411b363SPhilipp Reisner 			put_ldev(mdev);
1475*b411b363SPhilipp Reisner 		}
1476*b411b363SPhilipp Reisner 
1477*b411b363SPhilipp Reisner 		/* just wake_up unconditional now, various lc_chaged(),
1478*b411b363SPhilipp Reisner 		 * lc_put() in drbd_try_clear_on_disk_bm(). */
1479*b411b363SPhilipp Reisner 		wake_up = 1;
1480*b411b363SPhilipp Reisner 	}
1481*b411b363SPhilipp Reisner 	spin_unlock_irq(&mdev->al_lock);
1482*b411b363SPhilipp Reisner 	if (wake_up)
1483*b411b363SPhilipp Reisner 		wake_up(&mdev->al_wait);
1484*b411b363SPhilipp Reisner }
1485