xref: /openbmc/linux/drivers/block/drbd/drbd_worker.c (revision c04ccaa6)
1b411b363SPhilipp Reisner /*
2b411b363SPhilipp Reisner    drbd_worker.c
3b411b363SPhilipp Reisner 
4b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5b411b363SPhilipp Reisner 
6b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9b411b363SPhilipp Reisner 
10b411b363SPhilipp Reisner    drbd is free software; you can redistribute it and/or modify
11b411b363SPhilipp Reisner    it under the terms of the GNU General Public License as published by
12b411b363SPhilipp Reisner    the Free Software Foundation; either version 2, or (at your option)
13b411b363SPhilipp Reisner    any later version.
14b411b363SPhilipp Reisner 
15b411b363SPhilipp Reisner    drbd is distributed in the hope that it will be useful,
16b411b363SPhilipp Reisner    but WITHOUT ANY WARRANTY; without even the implied warranty of
17b411b363SPhilipp Reisner    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18b411b363SPhilipp Reisner    GNU General Public License for more details.
19b411b363SPhilipp Reisner 
20b411b363SPhilipp Reisner    You should have received a copy of the GNU General Public License
21b411b363SPhilipp Reisner    along with drbd; see the file COPYING.  If not, write to
22b411b363SPhilipp Reisner    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23b411b363SPhilipp Reisner 
24b411b363SPhilipp Reisner  */
25b411b363SPhilipp Reisner 
26b411b363SPhilipp Reisner #include <linux/module.h>
27b411b363SPhilipp Reisner #include <linux/drbd.h>
28b411b363SPhilipp Reisner #include <linux/sched.h>
29b411b363SPhilipp Reisner #include <linux/wait.h>
30b411b363SPhilipp Reisner #include <linux/mm.h>
31b411b363SPhilipp Reisner #include <linux/memcontrol.h>
32b411b363SPhilipp Reisner #include <linux/mm_inline.h>
33b411b363SPhilipp Reisner #include <linux/slab.h>
34b411b363SPhilipp Reisner #include <linux/random.h>
35b411b363SPhilipp Reisner #include <linux/string.h>
36b411b363SPhilipp Reisner #include <linux/scatterlist.h>
37b411b363SPhilipp Reisner 
38b411b363SPhilipp Reisner #include "drbd_int.h"
39b411b363SPhilipp Reisner #include "drbd_req.h"
40b411b363SPhilipp Reisner 
4100d56944SPhilipp Reisner static int w_make_ov_request(struct drbd_work *w, int cancel);
42b411b363SPhilipp Reisner 
43b411b363SPhilipp Reisner 
44c5a91619SAndreas Gruenbacher /* endio handlers:
45c5a91619SAndreas Gruenbacher  *   drbd_md_io_complete (defined here)
46fcefa62eSAndreas Gruenbacher  *   drbd_request_endio (defined here)
47fcefa62eSAndreas Gruenbacher  *   drbd_peer_request_endio (defined here)
48c5a91619SAndreas Gruenbacher  *   bm_async_io_complete (defined in drbd_bitmap.c)
49c5a91619SAndreas Gruenbacher  *
50b411b363SPhilipp Reisner  * For all these callbacks, note the following:
51b411b363SPhilipp Reisner  * The callbacks will be called in irq context by the IDE drivers,
52b411b363SPhilipp Reisner  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
53b411b363SPhilipp Reisner  * Try to get the locking right :)
54b411b363SPhilipp Reisner  *
55b411b363SPhilipp Reisner  */
56b411b363SPhilipp Reisner 
57b411b363SPhilipp Reisner 
58b411b363SPhilipp Reisner /* About the global_state_lock
59b411b363SPhilipp Reisner    Each state transition on an device holds a read lock. In case we have
6095f8efd0SAndreas Gruenbacher    to evaluate the resync after dependencies, we grab a write lock, because
61b411b363SPhilipp Reisner    we need stable states on all devices for that.  */
62b411b363SPhilipp Reisner rwlock_t global_state_lock;
63b411b363SPhilipp Reisner 
64b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO
65b411b363SPhilipp Reisner  * submitted by drbd_md_sync_page_io()
66b411b363SPhilipp Reisner  */
67b411b363SPhilipp Reisner void drbd_md_io_complete(struct bio *bio, int error)
68b411b363SPhilipp Reisner {
69b411b363SPhilipp Reisner 	struct drbd_md_io *md_io;
70cdfda633SPhilipp Reisner 	struct drbd_conf *mdev;
71b411b363SPhilipp Reisner 
72b411b363SPhilipp Reisner 	md_io = (struct drbd_md_io *)bio->bi_private;
73cdfda633SPhilipp Reisner 	mdev = container_of(md_io, struct drbd_conf, md_io);
74cdfda633SPhilipp Reisner 
75b411b363SPhilipp Reisner 	md_io->error = error;
76b411b363SPhilipp Reisner 
770cfac5ddSPhilipp Reisner 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
780cfac5ddSPhilipp Reisner 	 * to timeout on the lower level device, and eventually detach from it.
790cfac5ddSPhilipp Reisner 	 * If this io completion runs after that timeout expired, this
800cfac5ddSPhilipp Reisner 	 * drbd_md_put_buffer() may allow us to finally try and re-attach.
810cfac5ddSPhilipp Reisner 	 * During normal operation, this only puts that extra reference
820cfac5ddSPhilipp Reisner 	 * down to 1 again.
830cfac5ddSPhilipp Reisner 	 * Make sure we first drop the reference, and only then signal
840cfac5ddSPhilipp Reisner 	 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
850cfac5ddSPhilipp Reisner 	 * next drbd_md_sync_page_io(), that we trigger the
860cfac5ddSPhilipp Reisner 	 * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there.
870cfac5ddSPhilipp Reisner 	 */
880cfac5ddSPhilipp Reisner 	drbd_md_put_buffer(mdev);
89cdfda633SPhilipp Reisner 	md_io->done = 1;
90cdfda633SPhilipp Reisner 	wake_up(&mdev->misc_wait);
91cdfda633SPhilipp Reisner 	bio_put(bio);
92c04ccaa6SLars Ellenberg 	if (mdev->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
93cdfda633SPhilipp Reisner 		put_ldev(mdev);
94b411b363SPhilipp Reisner }
95b411b363SPhilipp Reisner 
96b411b363SPhilipp Reisner /* reads on behalf of the partner,
97b411b363SPhilipp Reisner  * "submitted" by the receiver
98b411b363SPhilipp Reisner  */
99db830c46SAndreas Gruenbacher void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
100b411b363SPhilipp Reisner {
101b411b363SPhilipp Reisner 	unsigned long flags = 0;
102a21e9298SPhilipp Reisner 	struct drbd_conf *mdev = peer_req->w.mdev;
103b411b363SPhilipp Reisner 
10487eeee41SPhilipp Reisner 	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
105db830c46SAndreas Gruenbacher 	mdev->read_cnt += peer_req->i.size >> 9;
106db830c46SAndreas Gruenbacher 	list_del(&peer_req->w.list);
107b411b363SPhilipp Reisner 	if (list_empty(&mdev->read_ee))
108b411b363SPhilipp Reisner 		wake_up(&mdev->ee_wait);
109db830c46SAndreas Gruenbacher 	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
110edc9f5ebSLars Ellenberg 		__drbd_chk_io_error(mdev, DRBD_READ_ERROR);
11187eeee41SPhilipp Reisner 	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
112b411b363SPhilipp Reisner 
113d5b27b01SLars Ellenberg 	drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w);
114b411b363SPhilipp Reisner 	put_ldev(mdev);
115b411b363SPhilipp Reisner }
116b411b363SPhilipp Reisner 
117b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes,
11845bb912bSLars Ellenberg  * "submitted" by the receiver, final stage.  */
119db830c46SAndreas Gruenbacher static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
120b411b363SPhilipp Reisner {
121b411b363SPhilipp Reisner 	unsigned long flags = 0;
122a21e9298SPhilipp Reisner 	struct drbd_conf *mdev = peer_req->w.mdev;
123181286adSLars Ellenberg 	struct drbd_interval i;
124b411b363SPhilipp Reisner 	int do_wake;
125579b57edSAndreas Gruenbacher 	u64 block_id;
126b411b363SPhilipp Reisner 	int do_al_complete_io;
127b411b363SPhilipp Reisner 
128db830c46SAndreas Gruenbacher 	/* after we moved peer_req to done_ee,
129b411b363SPhilipp Reisner 	 * we may no longer access it,
130b411b363SPhilipp Reisner 	 * it may be freed/reused already!
131b411b363SPhilipp Reisner 	 * (as soon as we release the req_lock) */
132181286adSLars Ellenberg 	i = peer_req->i;
133db830c46SAndreas Gruenbacher 	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
134db830c46SAndreas Gruenbacher 	block_id = peer_req->block_id;
135b411b363SPhilipp Reisner 
13687eeee41SPhilipp Reisner 	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
137db830c46SAndreas Gruenbacher 	mdev->writ_cnt += peer_req->i.size >> 9;
138a506c13aSLars Ellenberg 	list_move_tail(&peer_req->w.list, &mdev->done_ee);
139b411b363SPhilipp Reisner 
140bb3bfe96SAndreas Gruenbacher 	/*
1415e472264SAndreas Gruenbacher 	 * Do not remove from the write_requests tree here: we did not send the
142bb3bfe96SAndreas Gruenbacher 	 * Ack yet and did not wake possibly waiting conflicting requests.
143bb3bfe96SAndreas Gruenbacher 	 * Removed from the tree from "drbd_process_done_ee" within the
144bb3bfe96SAndreas Gruenbacher 	 * appropriate w.cb (e_end_block/e_end_resync_block) or from
145bb3bfe96SAndreas Gruenbacher 	 * _drbd_clear_done_ee.
146bb3bfe96SAndreas Gruenbacher 	 */
147b411b363SPhilipp Reisner 
148579b57edSAndreas Gruenbacher 	do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
149b411b363SPhilipp Reisner 
150db830c46SAndreas Gruenbacher 	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
151edc9f5ebSLars Ellenberg 		__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
15287eeee41SPhilipp Reisner 	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
153b411b363SPhilipp Reisner 
154579b57edSAndreas Gruenbacher 	if (block_id == ID_SYNCER)
155181286adSLars Ellenberg 		drbd_rs_complete_io(mdev, i.sector);
156b411b363SPhilipp Reisner 
157b411b363SPhilipp Reisner 	if (do_wake)
158b411b363SPhilipp Reisner 		wake_up(&mdev->ee_wait);
159b411b363SPhilipp Reisner 
160b411b363SPhilipp Reisner 	if (do_al_complete_io)
161181286adSLars Ellenberg 		drbd_al_complete_io(mdev, &i);
162b411b363SPhilipp Reisner 
1630625ac19SPhilipp Reisner 	wake_asender(mdev->tconn);
164b411b363SPhilipp Reisner 	put_ldev(mdev);
16545bb912bSLars Ellenberg }
166b411b363SPhilipp Reisner 
16745bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes,
16845bb912bSLars Ellenberg  * "submitted" by the receiver.
16945bb912bSLars Ellenberg  */
170fcefa62eSAndreas Gruenbacher void drbd_peer_request_endio(struct bio *bio, int error)
17145bb912bSLars Ellenberg {
172db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = bio->bi_private;
173a21e9298SPhilipp Reisner 	struct drbd_conf *mdev = peer_req->w.mdev;
17445bb912bSLars Ellenberg 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
17545bb912bSLars Ellenberg 	int is_write = bio_data_dir(bio) == WRITE;
17645bb912bSLars Ellenberg 
17707194272SLars Ellenberg 	if (error && __ratelimit(&drbd_ratelimit_state))
17845bb912bSLars Ellenberg 		dev_warn(DEV, "%s: error=%d s=%llus\n",
17945bb912bSLars Ellenberg 				is_write ? "write" : "read", error,
180db830c46SAndreas Gruenbacher 				(unsigned long long)peer_req->i.sector);
18145bb912bSLars Ellenberg 	if (!error && !uptodate) {
18207194272SLars Ellenberg 		if (__ratelimit(&drbd_ratelimit_state))
18345bb912bSLars Ellenberg 			dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
18445bb912bSLars Ellenberg 					is_write ? "write" : "read",
185db830c46SAndreas Gruenbacher 					(unsigned long long)peer_req->i.sector);
18645bb912bSLars Ellenberg 		/* strange behavior of some lower level drivers...
18745bb912bSLars Ellenberg 		 * fail the request by clearing the uptodate flag,
18845bb912bSLars Ellenberg 		 * but do not return any error?! */
18945bb912bSLars Ellenberg 		error = -EIO;
19045bb912bSLars Ellenberg 	}
19145bb912bSLars Ellenberg 
19245bb912bSLars Ellenberg 	if (error)
193db830c46SAndreas Gruenbacher 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
19445bb912bSLars Ellenberg 
19545bb912bSLars Ellenberg 	bio_put(bio); /* no need for the bio anymore */
196db830c46SAndreas Gruenbacher 	if (atomic_dec_and_test(&peer_req->pending_bios)) {
19745bb912bSLars Ellenberg 		if (is_write)
198db830c46SAndreas Gruenbacher 			drbd_endio_write_sec_final(peer_req);
19945bb912bSLars Ellenberg 		else
200db830c46SAndreas Gruenbacher 			drbd_endio_read_sec_final(peer_req);
20145bb912bSLars Ellenberg 	}
202b411b363SPhilipp Reisner }
203b411b363SPhilipp Reisner 
204b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
205b411b363SPhilipp Reisner  */
206fcefa62eSAndreas Gruenbacher void drbd_request_endio(struct bio *bio, int error)
207b411b363SPhilipp Reisner {
208a115413dSLars Ellenberg 	unsigned long flags;
209b411b363SPhilipp Reisner 	struct drbd_request *req = bio->bi_private;
210a21e9298SPhilipp Reisner 	struct drbd_conf *mdev = req->w.mdev;
211a115413dSLars Ellenberg 	struct bio_and_error m;
212b411b363SPhilipp Reisner 	enum drbd_req_event what;
213b411b363SPhilipp Reisner 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
214b411b363SPhilipp Reisner 
215b411b363SPhilipp Reisner 	if (!error && !uptodate) {
216b411b363SPhilipp Reisner 		dev_warn(DEV, "p %s: setting error to -EIO\n",
217b411b363SPhilipp Reisner 			 bio_data_dir(bio) == WRITE ? "write" : "read");
218b411b363SPhilipp Reisner 		/* strange behavior of some lower level drivers...
219b411b363SPhilipp Reisner 		 * fail the request by clearing the uptodate flag,
220b411b363SPhilipp Reisner 		 * but do not return any error?! */
221b411b363SPhilipp Reisner 		error = -EIO;
222b411b363SPhilipp Reisner 	}
223b411b363SPhilipp Reisner 
2241b6dd252SPhilipp Reisner 
2251b6dd252SPhilipp Reisner 	/* If this request was aborted locally before,
2261b6dd252SPhilipp Reisner 	 * but now was completed "successfully",
2271b6dd252SPhilipp Reisner 	 * chances are that this caused arbitrary data corruption.
2281b6dd252SPhilipp Reisner 	 *
2291b6dd252SPhilipp Reisner 	 * "aborting" requests, or force-detaching the disk, is intended for
2301b6dd252SPhilipp Reisner 	 * completely blocked/hung local backing devices which do no longer
2311b6dd252SPhilipp Reisner 	 * complete requests at all, not even do error completions.  In this
2321b6dd252SPhilipp Reisner 	 * situation, usually a hard-reset and failover is the only way out.
2331b6dd252SPhilipp Reisner 	 *
2341b6dd252SPhilipp Reisner 	 * By "aborting", basically faking a local error-completion,
2351b6dd252SPhilipp Reisner 	 * we allow for a more graceful swichover by cleanly migrating services.
2361b6dd252SPhilipp Reisner 	 * Still the affected node has to be rebooted "soon".
2371b6dd252SPhilipp Reisner 	 *
2381b6dd252SPhilipp Reisner 	 * By completing these requests, we allow the upper layers to re-use
2391b6dd252SPhilipp Reisner 	 * the associated data pages.
2401b6dd252SPhilipp Reisner 	 *
2411b6dd252SPhilipp Reisner 	 * If later the local backing device "recovers", and now DMAs some data
2421b6dd252SPhilipp Reisner 	 * from disk into the original request pages, in the best case it will
2431b6dd252SPhilipp Reisner 	 * just put random data into unused pages; but typically it will corrupt
2441b6dd252SPhilipp Reisner 	 * meanwhile completely unrelated data, causing all sorts of damage.
2451b6dd252SPhilipp Reisner 	 *
2461b6dd252SPhilipp Reisner 	 * Which means delayed successful completion,
2471b6dd252SPhilipp Reisner 	 * especially for READ requests,
2481b6dd252SPhilipp Reisner 	 * is a reason to panic().
2491b6dd252SPhilipp Reisner 	 *
2501b6dd252SPhilipp Reisner 	 * We assume that a delayed *error* completion is OK,
2511b6dd252SPhilipp Reisner 	 * though we still will complain noisily about it.
2521b6dd252SPhilipp Reisner 	 */
2531b6dd252SPhilipp Reisner 	if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
2541b6dd252SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
2551b6dd252SPhilipp Reisner 			dev_emerg(DEV, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
2561b6dd252SPhilipp Reisner 
2571b6dd252SPhilipp Reisner 		if (!error)
2581b6dd252SPhilipp Reisner 			panic("possible random memory corruption caused by delayed completion of aborted local request\n");
2591b6dd252SPhilipp Reisner 	}
2601b6dd252SPhilipp Reisner 
261b411b363SPhilipp Reisner 	/* to avoid recursion in __req_mod */
262b411b363SPhilipp Reisner 	if (unlikely(error)) {
263b411b363SPhilipp Reisner 		what = (bio_data_dir(bio) == WRITE)
2648554df1cSAndreas Gruenbacher 			? WRITE_COMPLETED_WITH_ERROR
2655c3c7e64SLars Ellenberg 			: (bio_rw(bio) == READ)
2668554df1cSAndreas Gruenbacher 			  ? READ_COMPLETED_WITH_ERROR
2678554df1cSAndreas Gruenbacher 			  : READ_AHEAD_COMPLETED_WITH_ERROR;
268b411b363SPhilipp Reisner 	} else
2698554df1cSAndreas Gruenbacher 		what = COMPLETED_OK;
270b411b363SPhilipp Reisner 
271b411b363SPhilipp Reisner 	bio_put(req->private_bio);
272b411b363SPhilipp Reisner 	req->private_bio = ERR_PTR(error);
273b411b363SPhilipp Reisner 
274a115413dSLars Ellenberg 	/* not req_mod(), we need irqsave here! */
27587eeee41SPhilipp Reisner 	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
276a115413dSLars Ellenberg 	__req_mod(req, what, &m);
27787eeee41SPhilipp Reisner 	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
2782415308eSLars Ellenberg 	put_ldev(mdev);
279a115413dSLars Ellenberg 
280a115413dSLars Ellenberg 	if (m.bio)
281a115413dSLars Ellenberg 		complete_master_bio(mdev, &m);
282b411b363SPhilipp Reisner }
283b411b363SPhilipp Reisner 
284f6ffca9fSAndreas Gruenbacher void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
285db830c46SAndreas Gruenbacher 		  struct drbd_peer_request *peer_req, void *digest)
28645bb912bSLars Ellenberg {
28745bb912bSLars Ellenberg 	struct hash_desc desc;
28845bb912bSLars Ellenberg 	struct scatterlist sg;
289db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
29045bb912bSLars Ellenberg 	struct page *tmp;
29145bb912bSLars Ellenberg 	unsigned len;
29245bb912bSLars Ellenberg 
29345bb912bSLars Ellenberg 	desc.tfm = tfm;
29445bb912bSLars Ellenberg 	desc.flags = 0;
29545bb912bSLars Ellenberg 
29645bb912bSLars Ellenberg 	sg_init_table(&sg, 1);
29745bb912bSLars Ellenberg 	crypto_hash_init(&desc);
29845bb912bSLars Ellenberg 
29945bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page))) {
30045bb912bSLars Ellenberg 		/* all but the last page will be fully used */
30145bb912bSLars Ellenberg 		sg_set_page(&sg, page, PAGE_SIZE, 0);
30245bb912bSLars Ellenberg 		crypto_hash_update(&desc, &sg, sg.length);
30345bb912bSLars Ellenberg 		page = tmp;
30445bb912bSLars Ellenberg 	}
30545bb912bSLars Ellenberg 	/* and now the last, possibly only partially used page */
306db830c46SAndreas Gruenbacher 	len = peer_req->i.size & (PAGE_SIZE - 1);
30745bb912bSLars Ellenberg 	sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
30845bb912bSLars Ellenberg 	crypto_hash_update(&desc, &sg, sg.length);
30945bb912bSLars Ellenberg 	crypto_hash_final(&desc, digest);
31045bb912bSLars Ellenberg }
31145bb912bSLars Ellenberg 
31245bb912bSLars Ellenberg void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
313b411b363SPhilipp Reisner {
314b411b363SPhilipp Reisner 	struct hash_desc desc;
315b411b363SPhilipp Reisner 	struct scatterlist sg;
316b411b363SPhilipp Reisner 	struct bio_vec *bvec;
317b411b363SPhilipp Reisner 	int i;
318b411b363SPhilipp Reisner 
319b411b363SPhilipp Reisner 	desc.tfm = tfm;
320b411b363SPhilipp Reisner 	desc.flags = 0;
321b411b363SPhilipp Reisner 
322b411b363SPhilipp Reisner 	sg_init_table(&sg, 1);
323b411b363SPhilipp Reisner 	crypto_hash_init(&desc);
324b411b363SPhilipp Reisner 
3254b8514eeSLars Ellenberg 	bio_for_each_segment(bvec, bio, i) {
326b411b363SPhilipp Reisner 		sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
327b411b363SPhilipp Reisner 		crypto_hash_update(&desc, &sg, sg.length);
328b411b363SPhilipp Reisner 	}
329b411b363SPhilipp Reisner 	crypto_hash_final(&desc, digest);
330b411b363SPhilipp Reisner }
331b411b363SPhilipp Reisner 
3329676c760SLars Ellenberg /* MAYBE merge common code with w_e_end_ov_req */
33399920dc5SAndreas Gruenbacher static int w_e_send_csum(struct drbd_work *w, int cancel)
334b411b363SPhilipp Reisner {
33500d56944SPhilipp Reisner 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
33600d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
337b411b363SPhilipp Reisner 	int digest_size;
338b411b363SPhilipp Reisner 	void *digest;
33999920dc5SAndreas Gruenbacher 	int err = 0;
340b411b363SPhilipp Reisner 
34153ea4331SLars Ellenberg 	if (unlikely(cancel))
34253ea4331SLars Ellenberg 		goto out;
343b411b363SPhilipp Reisner 
3449676c760SLars Ellenberg 	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
34553ea4331SLars Ellenberg 		goto out;
34653ea4331SLars Ellenberg 
347f399002eSLars Ellenberg 	digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
348b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
349b411b363SPhilipp Reisner 	if (digest) {
350db830c46SAndreas Gruenbacher 		sector_t sector = peer_req->i.sector;
351db830c46SAndreas Gruenbacher 		unsigned int size = peer_req->i.size;
352f399002eSLars Ellenberg 		drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
3539676c760SLars Ellenberg 		/* Free peer_req and pages before send.
35453ea4331SLars Ellenberg 		 * In case we block on congestion, we could otherwise run into
35553ea4331SLars Ellenberg 		 * some distributed deadlock, if the other side blocks on
35653ea4331SLars Ellenberg 		 * congestion as well, because our receiver blocks in
357c37c8ecfSAndreas Gruenbacher 		 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3583967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
359db830c46SAndreas Gruenbacher 		peer_req = NULL;
360b411b363SPhilipp Reisner 		inc_rs_pending(mdev);
36199920dc5SAndreas Gruenbacher 		err = drbd_send_drequest_csum(mdev, sector, size,
36253ea4331SLars Ellenberg 					      digest, digest_size,
363b411b363SPhilipp Reisner 					      P_CSUM_RS_REQUEST);
364b411b363SPhilipp Reisner 		kfree(digest);
365b411b363SPhilipp Reisner 	} else {
366b411b363SPhilipp Reisner 		dev_err(DEV, "kmalloc() of digest failed.\n");
36799920dc5SAndreas Gruenbacher 		err = -ENOMEM;
368b411b363SPhilipp Reisner 	}
369b411b363SPhilipp Reisner 
37053ea4331SLars Ellenberg out:
371db830c46SAndreas Gruenbacher 	if (peer_req)
3723967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
373b411b363SPhilipp Reisner 
37499920dc5SAndreas Gruenbacher 	if (unlikely(err))
375b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
37699920dc5SAndreas Gruenbacher 	return err;
377b411b363SPhilipp Reisner }
378b411b363SPhilipp Reisner 
379b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
380b411b363SPhilipp Reisner 
381b411b363SPhilipp Reisner static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
382b411b363SPhilipp Reisner {
383db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
384b411b363SPhilipp Reisner 
385b411b363SPhilipp Reisner 	if (!get_ldev(mdev))
38680a40e43SLars Ellenberg 		return -EIO;
387b411b363SPhilipp Reisner 
388e3555d85SPhilipp Reisner 	if (drbd_rs_should_slow_down(mdev, sector))
3890f0601f4SLars Ellenberg 		goto defer;
3900f0601f4SLars Ellenberg 
391b411b363SPhilipp Reisner 	/* GFP_TRY, because if there is no memory available right now, this may
392b411b363SPhilipp Reisner 	 * be rescheduled for later. It is "only" background resync, after all. */
3930db55363SAndreas Gruenbacher 	peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
3940db55363SAndreas Gruenbacher 				       size, GFP_TRY);
395db830c46SAndreas Gruenbacher 	if (!peer_req)
39680a40e43SLars Ellenberg 		goto defer;
397b411b363SPhilipp Reisner 
398db830c46SAndreas Gruenbacher 	peer_req->w.cb = w_e_send_csum;
39987eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
400db830c46SAndreas Gruenbacher 	list_add(&peer_req->w.list, &mdev->read_ee);
40187eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
402b411b363SPhilipp Reisner 
4030f0601f4SLars Ellenberg 	atomic_add(size >> 9, &mdev->rs_sect_ev);
404fbe29decSAndreas Gruenbacher 	if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
40580a40e43SLars Ellenberg 		return 0;
40645bb912bSLars Ellenberg 
40710f6d992SLars Ellenberg 	/* If it failed because of ENOMEM, retry should help.  If it failed
40810f6d992SLars Ellenberg 	 * because bio_add_page failed (probably broken lower level driver),
40910f6d992SLars Ellenberg 	 * retry may or may not help.
41010f6d992SLars Ellenberg 	 * If it does not, you may need to force disconnect. */
41187eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
412db830c46SAndreas Gruenbacher 	list_del(&peer_req->w.list);
41387eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
41422cc37a9SLars Ellenberg 
4153967deb1SAndreas Gruenbacher 	drbd_free_peer_req(mdev, peer_req);
41680a40e43SLars Ellenberg defer:
41745bb912bSLars Ellenberg 	put_ldev(mdev);
41880a40e43SLars Ellenberg 	return -EAGAIN;
419b411b363SPhilipp Reisner }
420b411b363SPhilipp Reisner 
42199920dc5SAndreas Gruenbacher int w_resync_timer(struct drbd_work *w, int cancel)
422794abb75SPhilipp Reisner {
42300d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
424794abb75SPhilipp Reisner 	switch (mdev->state.conn) {
425794abb75SPhilipp Reisner 	case C_VERIFY_S:
42600d56944SPhilipp Reisner 		w_make_ov_request(w, cancel);
427794abb75SPhilipp Reisner 		break;
428794abb75SPhilipp Reisner 	case C_SYNC_TARGET:
42900d56944SPhilipp Reisner 		w_make_resync_request(w, cancel);
430794abb75SPhilipp Reisner 		break;
431794abb75SPhilipp Reisner 	}
432794abb75SPhilipp Reisner 
43399920dc5SAndreas Gruenbacher 	return 0;
434794abb75SPhilipp Reisner }
435794abb75SPhilipp Reisner 
436b411b363SPhilipp Reisner void resync_timer_fn(unsigned long data)
437b411b363SPhilipp Reisner {
438b411b363SPhilipp Reisner 	struct drbd_conf *mdev = (struct drbd_conf *) data;
439b411b363SPhilipp Reisner 
440794abb75SPhilipp Reisner 	if (list_empty(&mdev->resync_work.list))
441d5b27b01SLars Ellenberg 		drbd_queue_work(&mdev->tconn->sender_work, &mdev->resync_work);
442b411b363SPhilipp Reisner }
443b411b363SPhilipp Reisner 
444778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value)
445778f271dSPhilipp Reisner {
446778f271dSPhilipp Reisner 	int i;
447778f271dSPhilipp Reisner 
448778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
449f10f2623SPhilipp Reisner 		fb->values[i] = value;
450778f271dSPhilipp Reisner }
451778f271dSPhilipp Reisner 
452778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value)
453778f271dSPhilipp Reisner {
454778f271dSPhilipp Reisner 	int ov;
455778f271dSPhilipp Reisner 
456778f271dSPhilipp Reisner 	ov = fb->values[fb->head_index];
457778f271dSPhilipp Reisner 	fb->values[fb->head_index++] = value;
458778f271dSPhilipp Reisner 
459778f271dSPhilipp Reisner 	if (fb->head_index >= fb->size)
460778f271dSPhilipp Reisner 		fb->head_index = 0;
461778f271dSPhilipp Reisner 
462778f271dSPhilipp Reisner 	return ov;
463778f271dSPhilipp Reisner }
464778f271dSPhilipp Reisner 
465778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value)
466778f271dSPhilipp Reisner {
467778f271dSPhilipp Reisner 	int i;
468778f271dSPhilipp Reisner 
469778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
470778f271dSPhilipp Reisner 		fb->values[i] += value;
471778f271dSPhilipp Reisner }
472778f271dSPhilipp Reisner 
4739958c857SPhilipp Reisner struct fifo_buffer *fifo_alloc(int fifo_size)
4749958c857SPhilipp Reisner {
4759958c857SPhilipp Reisner 	struct fifo_buffer *fb;
4769958c857SPhilipp Reisner 
4778747d30aSLars Ellenberg 	fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
4789958c857SPhilipp Reisner 	if (!fb)
4799958c857SPhilipp Reisner 		return NULL;
4809958c857SPhilipp Reisner 
4819958c857SPhilipp Reisner 	fb->head_index = 0;
4829958c857SPhilipp Reisner 	fb->size = fifo_size;
4839958c857SPhilipp Reisner 	fb->total = 0;
4849958c857SPhilipp Reisner 
4859958c857SPhilipp Reisner 	return fb;
4869958c857SPhilipp Reisner }
4879958c857SPhilipp Reisner 
4889d77a5feSPhilipp Reisner static int drbd_rs_controller(struct drbd_conf *mdev)
489778f271dSPhilipp Reisner {
490daeda1ccSPhilipp Reisner 	struct disk_conf *dc;
491778f271dSPhilipp Reisner 	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
492778f271dSPhilipp Reisner 	unsigned int want;     /* The number of sectors we want in the proxy */
493778f271dSPhilipp Reisner 	int req_sect; /* Number of sectors to request in this turn */
494778f271dSPhilipp Reisner 	int correction; /* Number of sectors more we need in the proxy*/
495778f271dSPhilipp Reisner 	int cps; /* correction per invocation of drbd_rs_controller() */
496778f271dSPhilipp Reisner 	int steps; /* Number of time steps to plan ahead */
497778f271dSPhilipp Reisner 	int curr_corr;
498778f271dSPhilipp Reisner 	int max_sect;
499813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
500778f271dSPhilipp Reisner 
501778f271dSPhilipp Reisner 	sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
502778f271dSPhilipp Reisner 	mdev->rs_in_flight -= sect_in;
503778f271dSPhilipp Reisner 
504daeda1ccSPhilipp Reisner 	dc = rcu_dereference(mdev->ldev->disk_conf);
505813472ceSPhilipp Reisner 	plan = rcu_dereference(mdev->rs_plan_s);
506778f271dSPhilipp Reisner 
507813472ceSPhilipp Reisner 	steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
508778f271dSPhilipp Reisner 
509778f271dSPhilipp Reisner 	if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
510daeda1ccSPhilipp Reisner 		want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
511778f271dSPhilipp Reisner 	} else { /* normal path */
512daeda1ccSPhilipp Reisner 		want = dc->c_fill_target ? dc->c_fill_target :
513daeda1ccSPhilipp Reisner 			sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
514778f271dSPhilipp Reisner 	}
515778f271dSPhilipp Reisner 
516813472ceSPhilipp Reisner 	correction = want - mdev->rs_in_flight - plan->total;
517778f271dSPhilipp Reisner 
518778f271dSPhilipp Reisner 	/* Plan ahead */
519778f271dSPhilipp Reisner 	cps = correction / steps;
520813472ceSPhilipp Reisner 	fifo_add_val(plan, cps);
521813472ceSPhilipp Reisner 	plan->total += cps * steps;
522778f271dSPhilipp Reisner 
523778f271dSPhilipp Reisner 	/* What we do in this step */
524813472ceSPhilipp Reisner 	curr_corr = fifo_push(plan, 0);
525813472ceSPhilipp Reisner 	plan->total -= curr_corr;
526778f271dSPhilipp Reisner 
527778f271dSPhilipp Reisner 	req_sect = sect_in + curr_corr;
528778f271dSPhilipp Reisner 	if (req_sect < 0)
529778f271dSPhilipp Reisner 		req_sect = 0;
530778f271dSPhilipp Reisner 
531daeda1ccSPhilipp Reisner 	max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
532778f271dSPhilipp Reisner 	if (req_sect > max_sect)
533778f271dSPhilipp Reisner 		req_sect = max_sect;
534778f271dSPhilipp Reisner 
535778f271dSPhilipp Reisner 	/*
536778f271dSPhilipp Reisner 	dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
537778f271dSPhilipp Reisner 		 sect_in, mdev->rs_in_flight, want, correction,
538778f271dSPhilipp Reisner 		 steps, cps, mdev->rs_planed, curr_corr, req_sect);
539778f271dSPhilipp Reisner 	*/
540778f271dSPhilipp Reisner 
541778f271dSPhilipp Reisner 	return req_sect;
542778f271dSPhilipp Reisner }
543778f271dSPhilipp Reisner 
5449d77a5feSPhilipp Reisner static int drbd_rs_number_requests(struct drbd_conf *mdev)
545e65f440dSLars Ellenberg {
546e65f440dSLars Ellenberg 	int number;
547813472ceSPhilipp Reisner 
548813472ceSPhilipp Reisner 	rcu_read_lock();
549813472ceSPhilipp Reisner 	if (rcu_dereference(mdev->rs_plan_s)->size) {
550e65f440dSLars Ellenberg 		number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
551e65f440dSLars Ellenberg 		mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
552e65f440dSLars Ellenberg 	} else {
553daeda1ccSPhilipp Reisner 		mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate;
554e65f440dSLars Ellenberg 		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
555e65f440dSLars Ellenberg 	}
556813472ceSPhilipp Reisner 	rcu_read_unlock();
557e65f440dSLars Ellenberg 
558e65f440dSLars Ellenberg 	/* ignore the amount of pending requests, the resync controller should
559e65f440dSLars Ellenberg 	 * throttle down to incoming reply rate soon enough anyways. */
560e65f440dSLars Ellenberg 	return number;
561e65f440dSLars Ellenberg }
562e65f440dSLars Ellenberg 
56399920dc5SAndreas Gruenbacher int w_make_resync_request(struct drbd_work *w, int cancel)
564b411b363SPhilipp Reisner {
56500d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
566b411b363SPhilipp Reisner 	unsigned long bit;
567b411b363SPhilipp Reisner 	sector_t sector;
568b411b363SPhilipp Reisner 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
5691816a2b4SLars Ellenberg 	int max_bio_size;
570e65f440dSLars Ellenberg 	int number, rollback_i, size;
571b411b363SPhilipp Reisner 	int align, queued, sndbuf;
5720f0601f4SLars Ellenberg 	int i = 0;
573b411b363SPhilipp Reisner 
574b411b363SPhilipp Reisner 	if (unlikely(cancel))
57599920dc5SAndreas Gruenbacher 		return 0;
576b411b363SPhilipp Reisner 
577af85e8e8SLars Ellenberg 	if (mdev->rs_total == 0) {
578af85e8e8SLars Ellenberg 		/* empty resync? */
579af85e8e8SLars Ellenberg 		drbd_resync_finished(mdev);
58099920dc5SAndreas Gruenbacher 		return 0;
581af85e8e8SLars Ellenberg 	}
582af85e8e8SLars Ellenberg 
583b411b363SPhilipp Reisner 	if (!get_ldev(mdev)) {
584b411b363SPhilipp Reisner 		/* Since we only need to access mdev->rsync a
585b411b363SPhilipp Reisner 		   get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
586b411b363SPhilipp Reisner 		   to continue resync with a broken disk makes no sense at
587b411b363SPhilipp Reisner 		   all */
588b411b363SPhilipp Reisner 		dev_err(DEV, "Disk broke down during resync!\n");
58999920dc5SAndreas Gruenbacher 		return 0;
590b411b363SPhilipp Reisner 	}
591b411b363SPhilipp Reisner 
5920cfdd247SPhilipp Reisner 	max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
593e65f440dSLars Ellenberg 	number = drbd_rs_number_requests(mdev);
594e65f440dSLars Ellenberg 	if (number == 0)
5950f0601f4SLars Ellenberg 		goto requeue;
596b411b363SPhilipp Reisner 
597b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
598b411b363SPhilipp Reisner 		/* Stop generating RS requests, when half of the send buffer is filled */
599e42325a5SPhilipp Reisner 		mutex_lock(&mdev->tconn->data.mutex);
600e42325a5SPhilipp Reisner 		if (mdev->tconn->data.socket) {
601e42325a5SPhilipp Reisner 			queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
602e42325a5SPhilipp Reisner 			sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
603b411b363SPhilipp Reisner 		} else {
604b411b363SPhilipp Reisner 			queued = 1;
605b411b363SPhilipp Reisner 			sndbuf = 0;
606b411b363SPhilipp Reisner 		}
607e42325a5SPhilipp Reisner 		mutex_unlock(&mdev->tconn->data.mutex);
608b411b363SPhilipp Reisner 		if (queued > sndbuf / 2)
609b411b363SPhilipp Reisner 			goto requeue;
610b411b363SPhilipp Reisner 
611b411b363SPhilipp Reisner next_sector:
612b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
613b411b363SPhilipp Reisner 		bit  = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
614b411b363SPhilipp Reisner 
6154b0715f0SLars Ellenberg 		if (bit == DRBD_END_OF_BITMAP) {
616b411b363SPhilipp Reisner 			mdev->bm_resync_fo = drbd_bm_bits(mdev);
617b411b363SPhilipp Reisner 			put_ldev(mdev);
61899920dc5SAndreas Gruenbacher 			return 0;
619b411b363SPhilipp Reisner 		}
620b411b363SPhilipp Reisner 
621b411b363SPhilipp Reisner 		sector = BM_BIT_TO_SECT(bit);
622b411b363SPhilipp Reisner 
623e3555d85SPhilipp Reisner 		if (drbd_rs_should_slow_down(mdev, sector) ||
624e3555d85SPhilipp Reisner 		    drbd_try_rs_begin_io(mdev, sector)) {
625b411b363SPhilipp Reisner 			mdev->bm_resync_fo = bit;
626b411b363SPhilipp Reisner 			goto requeue;
627b411b363SPhilipp Reisner 		}
628b411b363SPhilipp Reisner 		mdev->bm_resync_fo = bit + 1;
629b411b363SPhilipp Reisner 
630b411b363SPhilipp Reisner 		if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
631b411b363SPhilipp Reisner 			drbd_rs_complete_io(mdev, sector);
632b411b363SPhilipp Reisner 			goto next_sector;
633b411b363SPhilipp Reisner 		}
634b411b363SPhilipp Reisner 
6351816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
636b411b363SPhilipp Reisner 		/* try to find some adjacent bits.
637b411b363SPhilipp Reisner 		 * we stop if we have already the maximum req size.
638b411b363SPhilipp Reisner 		 *
639b411b363SPhilipp Reisner 		 * Additionally always align bigger requests, in order to
640b411b363SPhilipp Reisner 		 * be prepared for all stripe sizes of software RAIDs.
641b411b363SPhilipp Reisner 		 */
642b411b363SPhilipp Reisner 		align = 1;
643d207450cSPhilipp Reisner 		rollback_i = i;
644b411b363SPhilipp Reisner 		for (;;) {
6451816a2b4SLars Ellenberg 			if (size + BM_BLOCK_SIZE > max_bio_size)
646b411b363SPhilipp Reisner 				break;
647b411b363SPhilipp Reisner 
648b411b363SPhilipp Reisner 			/* Be always aligned */
649b411b363SPhilipp Reisner 			if (sector & ((1<<(align+3))-1))
650b411b363SPhilipp Reisner 				break;
651b411b363SPhilipp Reisner 
652b411b363SPhilipp Reisner 			/* do not cross extent boundaries */
653b411b363SPhilipp Reisner 			if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
654b411b363SPhilipp Reisner 				break;
655b411b363SPhilipp Reisner 			/* now, is it actually dirty, after all?
656b411b363SPhilipp Reisner 			 * caution, drbd_bm_test_bit is tri-state for some
657b411b363SPhilipp Reisner 			 * obscure reason; ( b == 0 ) would get the out-of-band
658b411b363SPhilipp Reisner 			 * only accidentally right because of the "oddly sized"
659b411b363SPhilipp Reisner 			 * adjustment below */
660b411b363SPhilipp Reisner 			if (drbd_bm_test_bit(mdev, bit+1) != 1)
661b411b363SPhilipp Reisner 				break;
662b411b363SPhilipp Reisner 			bit++;
663b411b363SPhilipp Reisner 			size += BM_BLOCK_SIZE;
664b411b363SPhilipp Reisner 			if ((BM_BLOCK_SIZE << align) <= size)
665b411b363SPhilipp Reisner 				align++;
666b411b363SPhilipp Reisner 			i++;
667b411b363SPhilipp Reisner 		}
668b411b363SPhilipp Reisner 		/* if we merged some,
669b411b363SPhilipp Reisner 		 * reset the offset to start the next drbd_bm_find_next from */
670b411b363SPhilipp Reisner 		if (size > BM_BLOCK_SIZE)
671b411b363SPhilipp Reisner 			mdev->bm_resync_fo = bit + 1;
672b411b363SPhilipp Reisner #endif
673b411b363SPhilipp Reisner 
674b411b363SPhilipp Reisner 		/* adjust very last sectors, in case we are oddly sized */
675b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
676b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
677f399002eSLars Ellenberg 		if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
678b411b363SPhilipp Reisner 			switch (read_for_csum(mdev, sector, size)) {
67980a40e43SLars Ellenberg 			case -EIO: /* Disk failure */
680b411b363SPhilipp Reisner 				put_ldev(mdev);
68199920dc5SAndreas Gruenbacher 				return -EIO;
68280a40e43SLars Ellenberg 			case -EAGAIN: /* allocation failed, or ldev busy */
683b411b363SPhilipp Reisner 				drbd_rs_complete_io(mdev, sector);
684b411b363SPhilipp Reisner 				mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
685d207450cSPhilipp Reisner 				i = rollback_i;
686b411b363SPhilipp Reisner 				goto requeue;
68780a40e43SLars Ellenberg 			case 0:
68880a40e43SLars Ellenberg 				/* everything ok */
68980a40e43SLars Ellenberg 				break;
69080a40e43SLars Ellenberg 			default:
69180a40e43SLars Ellenberg 				BUG();
692b411b363SPhilipp Reisner 			}
693b411b363SPhilipp Reisner 		} else {
69499920dc5SAndreas Gruenbacher 			int err;
69599920dc5SAndreas Gruenbacher 
696b411b363SPhilipp Reisner 			inc_rs_pending(mdev);
69799920dc5SAndreas Gruenbacher 			err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
69899920dc5SAndreas Gruenbacher 						 sector, size, ID_SYNCER);
69999920dc5SAndreas Gruenbacher 			if (err) {
700b411b363SPhilipp Reisner 				dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
701b411b363SPhilipp Reisner 				dec_rs_pending(mdev);
702b411b363SPhilipp Reisner 				put_ldev(mdev);
70399920dc5SAndreas Gruenbacher 				return err;
704b411b363SPhilipp Reisner 			}
705b411b363SPhilipp Reisner 		}
706b411b363SPhilipp Reisner 	}
707b411b363SPhilipp Reisner 
708b411b363SPhilipp Reisner 	if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
709b411b363SPhilipp Reisner 		/* last syncer _request_ was sent,
710b411b363SPhilipp Reisner 		 * but the P_RS_DATA_REPLY not yet received.  sync will end (and
711b411b363SPhilipp Reisner 		 * next sync group will resume), as soon as we receive the last
712b411b363SPhilipp Reisner 		 * resync data block, and the last bit is cleared.
713b411b363SPhilipp Reisner 		 * until then resync "work" is "inactive" ...
714b411b363SPhilipp Reisner 		 */
715b411b363SPhilipp Reisner 		put_ldev(mdev);
71699920dc5SAndreas Gruenbacher 		return 0;
717b411b363SPhilipp Reisner 	}
718b411b363SPhilipp Reisner 
719b411b363SPhilipp Reisner  requeue:
720778f271dSPhilipp Reisner 	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
721b411b363SPhilipp Reisner 	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
722b411b363SPhilipp Reisner 	put_ldev(mdev);
72399920dc5SAndreas Gruenbacher 	return 0;
724b411b363SPhilipp Reisner }
725b411b363SPhilipp Reisner 
72600d56944SPhilipp Reisner static int w_make_ov_request(struct drbd_work *w, int cancel)
727b411b363SPhilipp Reisner {
72800d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
729b411b363SPhilipp Reisner 	int number, i, size;
730b411b363SPhilipp Reisner 	sector_t sector;
731b411b363SPhilipp Reisner 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
73258ffa580SLars Ellenberg 	bool stop_sector_reached = false;
733b411b363SPhilipp Reisner 
734b411b363SPhilipp Reisner 	if (unlikely(cancel))
735b411b363SPhilipp Reisner 		return 1;
736b411b363SPhilipp Reisner 
7372649f080SLars Ellenberg 	number = drbd_rs_number_requests(mdev);
738b411b363SPhilipp Reisner 
739b411b363SPhilipp Reisner 	sector = mdev->ov_position;
740b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
74158ffa580SLars Ellenberg 		if (sector >= capacity)
742b411b363SPhilipp Reisner 			return 1;
74358ffa580SLars Ellenberg 
74458ffa580SLars Ellenberg 		/* We check for "finished" only in the reply path:
74558ffa580SLars Ellenberg 		 * w_e_end_ov_reply().
74658ffa580SLars Ellenberg 		 * We need to send at least one request out. */
74758ffa580SLars Ellenberg 		stop_sector_reached = i > 0
74858ffa580SLars Ellenberg 			&& verify_can_do_stop_sector(mdev)
74958ffa580SLars Ellenberg 			&& sector >= mdev->ov_stop_sector;
75058ffa580SLars Ellenberg 		if (stop_sector_reached)
75158ffa580SLars Ellenberg 			break;
752b411b363SPhilipp Reisner 
753b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
754b411b363SPhilipp Reisner 
755e3555d85SPhilipp Reisner 		if (drbd_rs_should_slow_down(mdev, sector) ||
756e3555d85SPhilipp Reisner 		    drbd_try_rs_begin_io(mdev, sector)) {
757b411b363SPhilipp Reisner 			mdev->ov_position = sector;
758b411b363SPhilipp Reisner 			goto requeue;
759b411b363SPhilipp Reisner 		}
760b411b363SPhilipp Reisner 
761b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
762b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
763b411b363SPhilipp Reisner 
764b411b363SPhilipp Reisner 		inc_rs_pending(mdev);
7655b9f499cSAndreas Gruenbacher 		if (drbd_send_ov_request(mdev, sector, size)) {
766b411b363SPhilipp Reisner 			dec_rs_pending(mdev);
767b411b363SPhilipp Reisner 			return 0;
768b411b363SPhilipp Reisner 		}
769b411b363SPhilipp Reisner 		sector += BM_SECT_PER_BIT;
770b411b363SPhilipp Reisner 	}
771b411b363SPhilipp Reisner 	mdev->ov_position = sector;
772b411b363SPhilipp Reisner 
773b411b363SPhilipp Reisner  requeue:
7742649f080SLars Ellenberg 	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
77558ffa580SLars Ellenberg 	if (i == 0 || !stop_sector_reached)
776b411b363SPhilipp Reisner 		mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
777b411b363SPhilipp Reisner 	return 1;
778b411b363SPhilipp Reisner }
779b411b363SPhilipp Reisner 
78099920dc5SAndreas Gruenbacher int w_ov_finished(struct drbd_work *w, int cancel)
781b411b363SPhilipp Reisner {
78200d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
783b411b363SPhilipp Reisner 	kfree(w);
7848f7bed77SAndreas Gruenbacher 	ov_out_of_sync_print(mdev);
785b411b363SPhilipp Reisner 	drbd_resync_finished(mdev);
786b411b363SPhilipp Reisner 
78799920dc5SAndreas Gruenbacher 	return 0;
788b411b363SPhilipp Reisner }
789b411b363SPhilipp Reisner 
79099920dc5SAndreas Gruenbacher static int w_resync_finished(struct drbd_work *w, int cancel)
791b411b363SPhilipp Reisner {
79200d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
793b411b363SPhilipp Reisner 	kfree(w);
794b411b363SPhilipp Reisner 
795b411b363SPhilipp Reisner 	drbd_resync_finished(mdev);
796b411b363SPhilipp Reisner 
79799920dc5SAndreas Gruenbacher 	return 0;
798b411b363SPhilipp Reisner }
799b411b363SPhilipp Reisner 
800af85e8e8SLars Ellenberg static void ping_peer(struct drbd_conf *mdev)
801af85e8e8SLars Ellenberg {
8022a67d8b9SPhilipp Reisner 	struct drbd_tconn *tconn = mdev->tconn;
8032a67d8b9SPhilipp Reisner 
8042a67d8b9SPhilipp Reisner 	clear_bit(GOT_PING_ACK, &tconn->flags);
8052a67d8b9SPhilipp Reisner 	request_ping(tconn);
8062a67d8b9SPhilipp Reisner 	wait_event(tconn->ping_wait,
8072a67d8b9SPhilipp Reisner 		   test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
808af85e8e8SLars Ellenberg }
809af85e8e8SLars Ellenberg 
810b411b363SPhilipp Reisner int drbd_resync_finished(struct drbd_conf *mdev)
811b411b363SPhilipp Reisner {
812b411b363SPhilipp Reisner 	unsigned long db, dt, dbdt;
813b411b363SPhilipp Reisner 	unsigned long n_oos;
814b411b363SPhilipp Reisner 	union drbd_state os, ns;
815b411b363SPhilipp Reisner 	struct drbd_work *w;
816b411b363SPhilipp Reisner 	char *khelper_cmd = NULL;
81726525618SLars Ellenberg 	int verify_done = 0;
818b411b363SPhilipp Reisner 
819b411b363SPhilipp Reisner 	/* Remove all elements from the resync LRU. Since future actions
820b411b363SPhilipp Reisner 	 * might set bits in the (main) bitmap, then the entries in the
821b411b363SPhilipp Reisner 	 * resync LRU would be wrong. */
822b411b363SPhilipp Reisner 	if (drbd_rs_del_all(mdev)) {
823b411b363SPhilipp Reisner 		/* In case this is not possible now, most probably because
824b411b363SPhilipp Reisner 		 * there are P_RS_DATA_REPLY Packets lingering on the worker's
825b411b363SPhilipp Reisner 		 * queue (or even the read operations for those packets
826b411b363SPhilipp Reisner 		 * is not finished by now).   Retry in 100ms. */
827b411b363SPhilipp Reisner 
82820ee6390SPhilipp Reisner 		schedule_timeout_interruptible(HZ / 10);
829b411b363SPhilipp Reisner 		w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
830b411b363SPhilipp Reisner 		if (w) {
831b411b363SPhilipp Reisner 			w->cb = w_resync_finished;
8329b743da9SPhilipp Reisner 			w->mdev = mdev;
833d5b27b01SLars Ellenberg 			drbd_queue_work(&mdev->tconn->sender_work, w);
834b411b363SPhilipp Reisner 			return 1;
835b411b363SPhilipp Reisner 		}
836b411b363SPhilipp Reisner 		dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
837b411b363SPhilipp Reisner 	}
838b411b363SPhilipp Reisner 
839b411b363SPhilipp Reisner 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
840b411b363SPhilipp Reisner 	if (dt <= 0)
841b411b363SPhilipp Reisner 		dt = 1;
84258ffa580SLars Ellenberg 
843b411b363SPhilipp Reisner 	db = mdev->rs_total;
84458ffa580SLars Ellenberg 	/* adjust for verify start and stop sectors, respective reached position */
84558ffa580SLars Ellenberg 	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
84658ffa580SLars Ellenberg 		db -= mdev->ov_left;
84758ffa580SLars Ellenberg 
848b411b363SPhilipp Reisner 	dbdt = Bit2KB(db/dt);
849b411b363SPhilipp Reisner 	mdev->rs_paused /= HZ;
850b411b363SPhilipp Reisner 
851b411b363SPhilipp Reisner 	if (!get_ldev(mdev))
852b411b363SPhilipp Reisner 		goto out;
853b411b363SPhilipp Reisner 
854af85e8e8SLars Ellenberg 	ping_peer(mdev);
855af85e8e8SLars Ellenberg 
85687eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
85778bae59bSPhilipp Reisner 	os = drbd_read_state(mdev);
858b411b363SPhilipp Reisner 
85926525618SLars Ellenberg 	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
86026525618SLars Ellenberg 
861b411b363SPhilipp Reisner 	/* This protects us against multiple calls (that can happen in the presence
862b411b363SPhilipp Reisner 	   of application IO), and against connectivity loss just before we arrive here. */
863b411b363SPhilipp Reisner 	if (os.conn <= C_CONNECTED)
864b411b363SPhilipp Reisner 		goto out_unlock;
865b411b363SPhilipp Reisner 
866b411b363SPhilipp Reisner 	ns = os;
867b411b363SPhilipp Reisner 	ns.conn = C_CONNECTED;
868b411b363SPhilipp Reisner 
869b411b363SPhilipp Reisner 	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
87026525618SLars Ellenberg 	     verify_done ? "Online verify" : "Resync",
871b411b363SPhilipp Reisner 	     dt + mdev->rs_paused, mdev->rs_paused, dbdt);
872b411b363SPhilipp Reisner 
873b411b363SPhilipp Reisner 	n_oos = drbd_bm_total_weight(mdev);
874b411b363SPhilipp Reisner 
875b411b363SPhilipp Reisner 	if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
876b411b363SPhilipp Reisner 		if (n_oos) {
877b411b363SPhilipp Reisner 			dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
878b411b363SPhilipp Reisner 			      n_oos, Bit2KB(1));
879b411b363SPhilipp Reisner 			khelper_cmd = "out-of-sync";
880b411b363SPhilipp Reisner 		}
881b411b363SPhilipp Reisner 	} else {
882b411b363SPhilipp Reisner 		D_ASSERT((n_oos - mdev->rs_failed) == 0);
883b411b363SPhilipp Reisner 
884b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
885b411b363SPhilipp Reisner 			khelper_cmd = "after-resync-target";
886b411b363SPhilipp Reisner 
887f399002eSLars Ellenberg 		if (mdev->tconn->csums_tfm && mdev->rs_total) {
888b411b363SPhilipp Reisner 			const unsigned long s = mdev->rs_same_csum;
889b411b363SPhilipp Reisner 			const unsigned long t = mdev->rs_total;
890b411b363SPhilipp Reisner 			const int ratio =
891b411b363SPhilipp Reisner 				(t == 0)     ? 0 :
892b411b363SPhilipp Reisner 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
893b411b363SPhilipp Reisner 			dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
894b411b363SPhilipp Reisner 			     "transferred %luK total %luK\n",
895b411b363SPhilipp Reisner 			     ratio,
896b411b363SPhilipp Reisner 			     Bit2KB(mdev->rs_same_csum),
897b411b363SPhilipp Reisner 			     Bit2KB(mdev->rs_total - mdev->rs_same_csum),
898b411b363SPhilipp Reisner 			     Bit2KB(mdev->rs_total));
899b411b363SPhilipp Reisner 		}
900b411b363SPhilipp Reisner 	}
901b411b363SPhilipp Reisner 
902b411b363SPhilipp Reisner 	if (mdev->rs_failed) {
903b411b363SPhilipp Reisner 		dev_info(DEV, "            %lu failed blocks\n", mdev->rs_failed);
904b411b363SPhilipp Reisner 
905b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
906b411b363SPhilipp Reisner 			ns.disk = D_INCONSISTENT;
907b411b363SPhilipp Reisner 			ns.pdsk = D_UP_TO_DATE;
908b411b363SPhilipp Reisner 		} else {
909b411b363SPhilipp Reisner 			ns.disk = D_UP_TO_DATE;
910b411b363SPhilipp Reisner 			ns.pdsk = D_INCONSISTENT;
911b411b363SPhilipp Reisner 		}
912b411b363SPhilipp Reisner 	} else {
913b411b363SPhilipp Reisner 		ns.disk = D_UP_TO_DATE;
914b411b363SPhilipp Reisner 		ns.pdsk = D_UP_TO_DATE;
915b411b363SPhilipp Reisner 
916b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
917b411b363SPhilipp Reisner 			if (mdev->p_uuid) {
918b411b363SPhilipp Reisner 				int i;
919b411b363SPhilipp Reisner 				for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
920b411b363SPhilipp Reisner 					_drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
921b411b363SPhilipp Reisner 				drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
922b411b363SPhilipp Reisner 				_drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
923b411b363SPhilipp Reisner 			} else {
924b411b363SPhilipp Reisner 				dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
925b411b363SPhilipp Reisner 			}
926b411b363SPhilipp Reisner 		}
927b411b363SPhilipp Reisner 
92862b0da3aSLars Ellenberg 		if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
92962b0da3aSLars Ellenberg 			/* for verify runs, we don't update uuids here,
93062b0da3aSLars Ellenberg 			 * so there would be nothing to report. */
931b411b363SPhilipp Reisner 			drbd_uuid_set_bm(mdev, 0UL);
93262b0da3aSLars Ellenberg 			drbd_print_uuids(mdev, "updated UUIDs");
933b411b363SPhilipp Reisner 			if (mdev->p_uuid) {
934b411b363SPhilipp Reisner 				/* Now the two UUID sets are equal, update what we
935b411b363SPhilipp Reisner 				 * know of the peer. */
936b411b363SPhilipp Reisner 				int i;
937b411b363SPhilipp Reisner 				for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
938b411b363SPhilipp Reisner 					mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
939b411b363SPhilipp Reisner 			}
940b411b363SPhilipp Reisner 		}
94162b0da3aSLars Ellenberg 	}
942b411b363SPhilipp Reisner 
943b411b363SPhilipp Reisner 	_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
944b411b363SPhilipp Reisner out_unlock:
94587eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
946b411b363SPhilipp Reisner 	put_ldev(mdev);
947b411b363SPhilipp Reisner out:
948b411b363SPhilipp Reisner 	mdev->rs_total  = 0;
949b411b363SPhilipp Reisner 	mdev->rs_failed = 0;
950b411b363SPhilipp Reisner 	mdev->rs_paused = 0;
95158ffa580SLars Ellenberg 
95258ffa580SLars Ellenberg 	/* reset start sector, if we reached end of device */
95358ffa580SLars Ellenberg 	if (verify_done && mdev->ov_left == 0)
954b411b363SPhilipp Reisner 		mdev->ov_start_sector = 0;
955b411b363SPhilipp Reisner 
95613d42685SLars Ellenberg 	drbd_md_sync(mdev);
95713d42685SLars Ellenberg 
958b411b363SPhilipp Reisner 	if (khelper_cmd)
959b411b363SPhilipp Reisner 		drbd_khelper(mdev, khelper_cmd);
960b411b363SPhilipp Reisner 
961b411b363SPhilipp Reisner 	return 1;
962b411b363SPhilipp Reisner }
963b411b363SPhilipp Reisner 
964b411b363SPhilipp Reisner /* helper */
965db830c46SAndreas Gruenbacher static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
966b411b363SPhilipp Reisner {
967045417f7SAndreas Gruenbacher 	if (drbd_peer_req_has_active_page(peer_req)) {
968b411b363SPhilipp Reisner 		/* This might happen if sendpage() has not finished */
969db830c46SAndreas Gruenbacher 		int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
970435f0740SLars Ellenberg 		atomic_add(i, &mdev->pp_in_use_by_net);
971435f0740SLars Ellenberg 		atomic_sub(i, &mdev->pp_in_use);
97287eeee41SPhilipp Reisner 		spin_lock_irq(&mdev->tconn->req_lock);
973db830c46SAndreas Gruenbacher 		list_add_tail(&peer_req->w.list, &mdev->net_ee);
97487eeee41SPhilipp Reisner 		spin_unlock_irq(&mdev->tconn->req_lock);
975435f0740SLars Ellenberg 		wake_up(&drbd_pp_wait);
976b411b363SPhilipp Reisner 	} else
9773967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
978b411b363SPhilipp Reisner }
979b411b363SPhilipp Reisner 
980b411b363SPhilipp Reisner /**
981b411b363SPhilipp Reisner  * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
982b411b363SPhilipp Reisner  * @mdev:	DRBD device.
983b411b363SPhilipp Reisner  * @w:		work object.
984b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
985b411b363SPhilipp Reisner  */
98699920dc5SAndreas Gruenbacher int w_e_end_data_req(struct drbd_work *w, int cancel)
987b411b363SPhilipp Reisner {
988db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
98900d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
99099920dc5SAndreas Gruenbacher 	int err;
991b411b363SPhilipp Reisner 
992b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
9933967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
994b411b363SPhilipp Reisner 		dec_unacked(mdev);
99599920dc5SAndreas Gruenbacher 		return 0;
996b411b363SPhilipp Reisner 	}
997b411b363SPhilipp Reisner 
998db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
99999920dc5SAndreas Gruenbacher 		err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
1000b411b363SPhilipp Reisner 	} else {
1001b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1002b411b363SPhilipp Reisner 			dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
1003db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1004b411b363SPhilipp Reisner 
100599920dc5SAndreas Gruenbacher 		err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
1006b411b363SPhilipp Reisner 	}
1007b411b363SPhilipp Reisner 
1008b411b363SPhilipp Reisner 	dec_unacked(mdev);
1009b411b363SPhilipp Reisner 
1010db830c46SAndreas Gruenbacher 	move_to_net_ee_or_free(mdev, peer_req);
1011b411b363SPhilipp Reisner 
101299920dc5SAndreas Gruenbacher 	if (unlikely(err))
1013b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_block() failed\n");
101499920dc5SAndreas Gruenbacher 	return err;
1015b411b363SPhilipp Reisner }
1016b411b363SPhilipp Reisner 
1017b411b363SPhilipp Reisner /**
1018a209b4aeSAndreas Gruenbacher  * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
1019b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1020b411b363SPhilipp Reisner  * @w:		work object.
1021b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1022b411b363SPhilipp Reisner  */
102399920dc5SAndreas Gruenbacher int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
1024b411b363SPhilipp Reisner {
1025db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
102600d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
102799920dc5SAndreas Gruenbacher 	int err;
1028b411b363SPhilipp Reisner 
1029b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
10303967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
1031b411b363SPhilipp Reisner 		dec_unacked(mdev);
103299920dc5SAndreas Gruenbacher 		return 0;
1033b411b363SPhilipp Reisner 	}
1034b411b363SPhilipp Reisner 
1035b411b363SPhilipp Reisner 	if (get_ldev_if_state(mdev, D_FAILED)) {
1036db830c46SAndreas Gruenbacher 		drbd_rs_complete_io(mdev, peer_req->i.sector);
1037b411b363SPhilipp Reisner 		put_ldev(mdev);
1038b411b363SPhilipp Reisner 	}
1039b411b363SPhilipp Reisner 
1040d612d309SPhilipp Reisner 	if (mdev->state.conn == C_AHEAD) {
104199920dc5SAndreas Gruenbacher 		err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
1042db830c46SAndreas Gruenbacher 	} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1043b411b363SPhilipp Reisner 		if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
1044b411b363SPhilipp Reisner 			inc_rs_pending(mdev);
104599920dc5SAndreas Gruenbacher 			err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
1046b411b363SPhilipp Reisner 		} else {
1047b411b363SPhilipp Reisner 			if (__ratelimit(&drbd_ratelimit_state))
1048b411b363SPhilipp Reisner 				dev_err(DEV, "Not sending RSDataReply, "
1049b411b363SPhilipp Reisner 				    "partner DISKLESS!\n");
105099920dc5SAndreas Gruenbacher 			err = 0;
1051b411b363SPhilipp Reisner 		}
1052b411b363SPhilipp Reisner 	} else {
1053b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1054b411b363SPhilipp Reisner 			dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
1055db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1056b411b363SPhilipp Reisner 
105799920dc5SAndreas Gruenbacher 		err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
1058b411b363SPhilipp Reisner 
1059b411b363SPhilipp Reisner 		/* update resync data with failure */
1060db830c46SAndreas Gruenbacher 		drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
1061b411b363SPhilipp Reisner 	}
1062b411b363SPhilipp Reisner 
1063b411b363SPhilipp Reisner 	dec_unacked(mdev);
1064b411b363SPhilipp Reisner 
1065db830c46SAndreas Gruenbacher 	move_to_net_ee_or_free(mdev, peer_req);
1066b411b363SPhilipp Reisner 
106799920dc5SAndreas Gruenbacher 	if (unlikely(err))
1068b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_block() failed\n");
106999920dc5SAndreas Gruenbacher 	return err;
1070b411b363SPhilipp Reisner }
1071b411b363SPhilipp Reisner 
107299920dc5SAndreas Gruenbacher int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
1073b411b363SPhilipp Reisner {
1074db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
107500d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1076b411b363SPhilipp Reisner 	struct digest_info *di;
1077b411b363SPhilipp Reisner 	int digest_size;
1078b411b363SPhilipp Reisner 	void *digest = NULL;
107999920dc5SAndreas Gruenbacher 	int err, eq = 0;
1080b411b363SPhilipp Reisner 
1081b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
10823967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
1083b411b363SPhilipp Reisner 		dec_unacked(mdev);
108499920dc5SAndreas Gruenbacher 		return 0;
1085b411b363SPhilipp Reisner 	}
1086b411b363SPhilipp Reisner 
10871d53f09eSLars Ellenberg 	if (get_ldev(mdev)) {
1088db830c46SAndreas Gruenbacher 		drbd_rs_complete_io(mdev, peer_req->i.sector);
10891d53f09eSLars Ellenberg 		put_ldev(mdev);
10901d53f09eSLars Ellenberg 	}
1091b411b363SPhilipp Reisner 
1092db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1093b411b363SPhilipp Reisner 
1094db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1095b411b363SPhilipp Reisner 		/* quick hack to try to avoid a race against reconfiguration.
1096b411b363SPhilipp Reisner 		 * a real fix would be much more involved,
1097b411b363SPhilipp Reisner 		 * introducing more locking mechanisms */
1098f399002eSLars Ellenberg 		if (mdev->tconn->csums_tfm) {
1099f399002eSLars Ellenberg 			digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
1100b411b363SPhilipp Reisner 			D_ASSERT(digest_size == di->digest_size);
1101b411b363SPhilipp Reisner 			digest = kmalloc(digest_size, GFP_NOIO);
1102b411b363SPhilipp Reisner 		}
1103b411b363SPhilipp Reisner 		if (digest) {
1104f399002eSLars Ellenberg 			drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
1105b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1106b411b363SPhilipp Reisner 			kfree(digest);
1107b411b363SPhilipp Reisner 		}
1108b411b363SPhilipp Reisner 
1109b411b363SPhilipp Reisner 		if (eq) {
1110db830c46SAndreas Gruenbacher 			drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
1111676396d5SLars Ellenberg 			/* rs_same_csums unit is BM_BLOCK_SIZE */
1112db830c46SAndreas Gruenbacher 			mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
111399920dc5SAndreas Gruenbacher 			err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
1114b411b363SPhilipp Reisner 		} else {
1115b411b363SPhilipp Reisner 			inc_rs_pending(mdev);
1116db830c46SAndreas Gruenbacher 			peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1117db830c46SAndreas Gruenbacher 			peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
1118204bba99SPhilipp Reisner 			kfree(di);
111999920dc5SAndreas Gruenbacher 			err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
1120b411b363SPhilipp Reisner 		}
1121b411b363SPhilipp Reisner 	} else {
112299920dc5SAndreas Gruenbacher 		err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
1123b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1124b411b363SPhilipp Reisner 			dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1125b411b363SPhilipp Reisner 	}
1126b411b363SPhilipp Reisner 
1127b411b363SPhilipp Reisner 	dec_unacked(mdev);
1128db830c46SAndreas Gruenbacher 	move_to_net_ee_or_free(mdev, peer_req);
1129b411b363SPhilipp Reisner 
113099920dc5SAndreas Gruenbacher 	if (unlikely(err))
1131b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_block/ack() failed\n");
113299920dc5SAndreas Gruenbacher 	return err;
1133b411b363SPhilipp Reisner }
1134b411b363SPhilipp Reisner 
113599920dc5SAndreas Gruenbacher int w_e_end_ov_req(struct drbd_work *w, int cancel)
1136b411b363SPhilipp Reisner {
1137db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
113800d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1139db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1140db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
1141b411b363SPhilipp Reisner 	int digest_size;
1142b411b363SPhilipp Reisner 	void *digest;
114399920dc5SAndreas Gruenbacher 	int err = 0;
1144b411b363SPhilipp Reisner 
1145b411b363SPhilipp Reisner 	if (unlikely(cancel))
1146b411b363SPhilipp Reisner 		goto out;
1147b411b363SPhilipp Reisner 
1148f399002eSLars Ellenberg 	digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
1149b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
11508f21420eSPhilipp Reisner 	if (!digest) {
115199920dc5SAndreas Gruenbacher 		err = 1;	/* terminate the connection in case the allocation failed */
11528f21420eSPhilipp Reisner 		goto out;
11538f21420eSPhilipp Reisner 	}
11548f21420eSPhilipp Reisner 
1155db830c46SAndreas Gruenbacher 	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
1156f399002eSLars Ellenberg 		drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
11578f21420eSPhilipp Reisner 	else
11588f21420eSPhilipp Reisner 		memset(digest, 0, digest_size);
11598f21420eSPhilipp Reisner 
116053ea4331SLars Ellenberg 	/* Free e and pages before send.
116153ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
116253ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
116353ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1164c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
11653967deb1SAndreas Gruenbacher 	drbd_free_peer_req(mdev, peer_req);
1166db830c46SAndreas Gruenbacher 	peer_req = NULL;
1167b411b363SPhilipp Reisner 	inc_rs_pending(mdev);
116899920dc5SAndreas Gruenbacher 	err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
116999920dc5SAndreas Gruenbacher 	if (err)
1170b411b363SPhilipp Reisner 		dec_rs_pending(mdev);
1171b411b363SPhilipp Reisner 	kfree(digest);
1172b411b363SPhilipp Reisner 
1173b411b363SPhilipp Reisner out:
1174db830c46SAndreas Gruenbacher 	if (peer_req)
11753967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
1176b411b363SPhilipp Reisner 	dec_unacked(mdev);
117799920dc5SAndreas Gruenbacher 	return err;
1178b411b363SPhilipp Reisner }
1179b411b363SPhilipp Reisner 
11808f7bed77SAndreas Gruenbacher void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size)
1181b411b363SPhilipp Reisner {
1182b411b363SPhilipp Reisner 	if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
1183b411b363SPhilipp Reisner 		mdev->ov_last_oos_size += size>>9;
1184b411b363SPhilipp Reisner 	} else {
1185b411b363SPhilipp Reisner 		mdev->ov_last_oos_start = sector;
1186b411b363SPhilipp Reisner 		mdev->ov_last_oos_size = size>>9;
1187b411b363SPhilipp Reisner 	}
1188b411b363SPhilipp Reisner 	drbd_set_out_of_sync(mdev, sector, size);
1189b411b363SPhilipp Reisner }
1190b411b363SPhilipp Reisner 
119199920dc5SAndreas Gruenbacher int w_e_end_ov_reply(struct drbd_work *w, int cancel)
1192b411b363SPhilipp Reisner {
1193db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
119400d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1195b411b363SPhilipp Reisner 	struct digest_info *di;
1196b411b363SPhilipp Reisner 	void *digest;
1197db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1198db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
119953ea4331SLars Ellenberg 	int digest_size;
120099920dc5SAndreas Gruenbacher 	int err, eq = 0;
120158ffa580SLars Ellenberg 	bool stop_sector_reached = false;
1202b411b363SPhilipp Reisner 
1203b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
12043967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
1205b411b363SPhilipp Reisner 		dec_unacked(mdev);
120699920dc5SAndreas Gruenbacher 		return 0;
1207b411b363SPhilipp Reisner 	}
1208b411b363SPhilipp Reisner 
1209b411b363SPhilipp Reisner 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1210b411b363SPhilipp Reisner 	 * the resync lru has been cleaned up already */
12111d53f09eSLars Ellenberg 	if (get_ldev(mdev)) {
1212db830c46SAndreas Gruenbacher 		drbd_rs_complete_io(mdev, peer_req->i.sector);
12131d53f09eSLars Ellenberg 		put_ldev(mdev);
12141d53f09eSLars Ellenberg 	}
1215b411b363SPhilipp Reisner 
1216db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1217b411b363SPhilipp Reisner 
1218db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1219f399002eSLars Ellenberg 		digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
1220b411b363SPhilipp Reisner 		digest = kmalloc(digest_size, GFP_NOIO);
1221b411b363SPhilipp Reisner 		if (digest) {
1222f399002eSLars Ellenberg 			drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
1223b411b363SPhilipp Reisner 
1224b411b363SPhilipp Reisner 			D_ASSERT(digest_size == di->digest_size);
1225b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1226b411b363SPhilipp Reisner 			kfree(digest);
1227b411b363SPhilipp Reisner 		}
1228b411b363SPhilipp Reisner 	}
1229b411b363SPhilipp Reisner 
12309676c760SLars Ellenberg 	/* Free peer_req and pages before send.
123153ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
123253ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
123353ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1234c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
12353967deb1SAndreas Gruenbacher 	drbd_free_peer_req(mdev, peer_req);
1236b411b363SPhilipp Reisner 	if (!eq)
12378f7bed77SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(mdev, sector, size);
1238b411b363SPhilipp Reisner 	else
12398f7bed77SAndreas Gruenbacher 		ov_out_of_sync_print(mdev);
1240b411b363SPhilipp Reisner 
124199920dc5SAndreas Gruenbacher 	err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
1242b411b363SPhilipp Reisner 			       eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
1243b411b363SPhilipp Reisner 
124453ea4331SLars Ellenberg 	dec_unacked(mdev);
1245b411b363SPhilipp Reisner 
1246ea5442afSLars Ellenberg 	--mdev->ov_left;
1247ea5442afSLars Ellenberg 
1248ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
1249ea5442afSLars Ellenberg 	if ((mdev->ov_left & 0x200) == 0x200)
1250ea5442afSLars Ellenberg 		drbd_advance_rs_marks(mdev, mdev->ov_left);
1251ea5442afSLars Ellenberg 
125258ffa580SLars Ellenberg 	stop_sector_reached = verify_can_do_stop_sector(mdev) &&
125358ffa580SLars Ellenberg 		(sector + (size>>9)) >= mdev->ov_stop_sector;
125458ffa580SLars Ellenberg 
125558ffa580SLars Ellenberg 	if (mdev->ov_left == 0 || stop_sector_reached) {
12568f7bed77SAndreas Gruenbacher 		ov_out_of_sync_print(mdev);
1257b411b363SPhilipp Reisner 		drbd_resync_finished(mdev);
1258b411b363SPhilipp Reisner 	}
1259b411b363SPhilipp Reisner 
126099920dc5SAndreas Gruenbacher 	return err;
1261b411b363SPhilipp Reisner }
1262b411b363SPhilipp Reisner 
126399920dc5SAndreas Gruenbacher int w_prev_work_done(struct drbd_work *w, int cancel)
1264b411b363SPhilipp Reisner {
1265b411b363SPhilipp Reisner 	struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
126600d56944SPhilipp Reisner 
1267b411b363SPhilipp Reisner 	complete(&b->done);
126899920dc5SAndreas Gruenbacher 	return 0;
1269b411b363SPhilipp Reisner }
1270b411b363SPhilipp Reisner 
1271b6dd1a89SLars Ellenberg /* FIXME
1272b6dd1a89SLars Ellenberg  * We need to track the number of pending barrier acks,
1273b6dd1a89SLars Ellenberg  * and to be able to wait for them.
1274b6dd1a89SLars Ellenberg  * See also comment in drbd_adm_attach before drbd_suspend_io.
1275b6dd1a89SLars Ellenberg  */
1276b6dd1a89SLars Ellenberg int drbd_send_barrier(struct drbd_tconn *tconn)
1277b411b363SPhilipp Reisner {
12789f5bdc33SAndreas Gruenbacher 	struct p_barrier *p;
1279b6dd1a89SLars Ellenberg 	struct drbd_socket *sock;
1280b411b363SPhilipp Reisner 
1281b6dd1a89SLars Ellenberg 	sock = &tconn->data;
1282b6dd1a89SLars Ellenberg 	p = conn_prepare_command(tconn, sock);
12839f5bdc33SAndreas Gruenbacher 	if (!p)
12849f5bdc33SAndreas Gruenbacher 		return -EIO;
1285b6dd1a89SLars Ellenberg 	p->barrier = tconn->send.current_epoch_nr;
1286b6dd1a89SLars Ellenberg 	p->pad = 0;
1287b6dd1a89SLars Ellenberg 	tconn->send.current_epoch_writes = 0;
1288b6dd1a89SLars Ellenberg 
1289b6dd1a89SLars Ellenberg 	return conn_send_command(tconn, sock, P_BARRIER, sizeof(*p), NULL, 0);
1290b411b363SPhilipp Reisner }
1291b411b363SPhilipp Reisner 
129299920dc5SAndreas Gruenbacher int w_send_write_hint(struct drbd_work *w, int cancel)
1293b411b363SPhilipp Reisner {
129400d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
12959f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
12969f5bdc33SAndreas Gruenbacher 
1297b411b363SPhilipp Reisner 	if (cancel)
129899920dc5SAndreas Gruenbacher 		return 0;
12999f5bdc33SAndreas Gruenbacher 	sock = &mdev->tconn->data;
13009f5bdc33SAndreas Gruenbacher 	if (!drbd_prepare_command(mdev, sock))
13019f5bdc33SAndreas Gruenbacher 		return -EIO;
1302e658983aSAndreas Gruenbacher 	return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
1303b411b363SPhilipp Reisner }
1304b411b363SPhilipp Reisner 
13054eb9b3cbSLars Ellenberg static void re_init_if_first_write(struct drbd_tconn *tconn, unsigned int epoch)
13064eb9b3cbSLars Ellenberg {
13074eb9b3cbSLars Ellenberg 	if (!tconn->send.seen_any_write_yet) {
13084eb9b3cbSLars Ellenberg 		tconn->send.seen_any_write_yet = true;
13094eb9b3cbSLars Ellenberg 		tconn->send.current_epoch_nr = epoch;
13104eb9b3cbSLars Ellenberg 		tconn->send.current_epoch_writes = 0;
13114eb9b3cbSLars Ellenberg 	}
13124eb9b3cbSLars Ellenberg }
13134eb9b3cbSLars Ellenberg 
13144eb9b3cbSLars Ellenberg static void maybe_send_barrier(struct drbd_tconn *tconn, unsigned int epoch)
13154eb9b3cbSLars Ellenberg {
13164eb9b3cbSLars Ellenberg 	/* re-init if first write on this connection */
13174eb9b3cbSLars Ellenberg 	if (!tconn->send.seen_any_write_yet)
13184eb9b3cbSLars Ellenberg 		return;
13194eb9b3cbSLars Ellenberg 	if (tconn->send.current_epoch_nr != epoch) {
13204eb9b3cbSLars Ellenberg 		if (tconn->send.current_epoch_writes)
13214eb9b3cbSLars Ellenberg 			drbd_send_barrier(tconn);
13224eb9b3cbSLars Ellenberg 		tconn->send.current_epoch_nr = epoch;
13234eb9b3cbSLars Ellenberg 	}
13244eb9b3cbSLars Ellenberg }
13254eb9b3cbSLars Ellenberg 
13268f7bed77SAndreas Gruenbacher int w_send_out_of_sync(struct drbd_work *w, int cancel)
132773a01a18SPhilipp Reisner {
132873a01a18SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
132900d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1330b6dd1a89SLars Ellenberg 	struct drbd_tconn *tconn = mdev->tconn;
133199920dc5SAndreas Gruenbacher 	int err;
133273a01a18SPhilipp Reisner 
133373a01a18SPhilipp Reisner 	if (unlikely(cancel)) {
13348554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
133599920dc5SAndreas Gruenbacher 		return 0;
133673a01a18SPhilipp Reisner 	}
133773a01a18SPhilipp Reisner 
1338b6dd1a89SLars Ellenberg 	/* this time, no tconn->send.current_epoch_writes++;
1339b6dd1a89SLars Ellenberg 	 * If it was sent, it was the closing barrier for the last
1340b6dd1a89SLars Ellenberg 	 * replicated epoch, before we went into AHEAD mode.
1341b6dd1a89SLars Ellenberg 	 * No more barriers will be sent, until we leave AHEAD mode again. */
13424eb9b3cbSLars Ellenberg 	maybe_send_barrier(tconn, req->epoch);
1343b6dd1a89SLars Ellenberg 
13448f7bed77SAndreas Gruenbacher 	err = drbd_send_out_of_sync(mdev, req);
13458554df1cSAndreas Gruenbacher 	req_mod(req, OOS_HANDED_TO_NETWORK);
134673a01a18SPhilipp Reisner 
134799920dc5SAndreas Gruenbacher 	return err;
134873a01a18SPhilipp Reisner }
134973a01a18SPhilipp Reisner 
1350b411b363SPhilipp Reisner /**
1351b411b363SPhilipp Reisner  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1352b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1353b411b363SPhilipp Reisner  * @w:		work object.
1354b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1355b411b363SPhilipp Reisner  */
135699920dc5SAndreas Gruenbacher int w_send_dblock(struct drbd_work *w, int cancel)
1357b411b363SPhilipp Reisner {
1358b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
135900d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1360b6dd1a89SLars Ellenberg 	struct drbd_tconn *tconn = mdev->tconn;
136199920dc5SAndreas Gruenbacher 	int err;
1362b411b363SPhilipp Reisner 
1363b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
13648554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
136599920dc5SAndreas Gruenbacher 		return 0;
1366b411b363SPhilipp Reisner 	}
1367b411b363SPhilipp Reisner 
13684eb9b3cbSLars Ellenberg 	re_init_if_first_write(tconn, req->epoch);
13694eb9b3cbSLars Ellenberg 	maybe_send_barrier(tconn, req->epoch);
1370b6dd1a89SLars Ellenberg 	tconn->send.current_epoch_writes++;
1371b6dd1a89SLars Ellenberg 
137299920dc5SAndreas Gruenbacher 	err = drbd_send_dblock(mdev, req);
137399920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1374b411b363SPhilipp Reisner 
137599920dc5SAndreas Gruenbacher 	return err;
1376b411b363SPhilipp Reisner }
1377b411b363SPhilipp Reisner 
1378b411b363SPhilipp Reisner /**
1379b411b363SPhilipp Reisner  * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1380b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1381b411b363SPhilipp Reisner  * @w:		work object.
1382b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1383b411b363SPhilipp Reisner  */
138499920dc5SAndreas Gruenbacher int w_send_read_req(struct drbd_work *w, int cancel)
1385b411b363SPhilipp Reisner {
1386b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
138700d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1388b6dd1a89SLars Ellenberg 	struct drbd_tconn *tconn = mdev->tconn;
138999920dc5SAndreas Gruenbacher 	int err;
1390b411b363SPhilipp Reisner 
1391b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
13928554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
139399920dc5SAndreas Gruenbacher 		return 0;
1394b411b363SPhilipp Reisner 	}
1395b411b363SPhilipp Reisner 
1396b6dd1a89SLars Ellenberg 	/* Even read requests may close a write epoch,
1397b6dd1a89SLars Ellenberg 	 * if there was any yet. */
13984eb9b3cbSLars Ellenberg 	maybe_send_barrier(tconn, req->epoch);
1399b6dd1a89SLars Ellenberg 
140099920dc5SAndreas Gruenbacher 	err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
1401b411b363SPhilipp Reisner 				 (unsigned long)req);
1402b411b363SPhilipp Reisner 
140399920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1404b411b363SPhilipp Reisner 
140599920dc5SAndreas Gruenbacher 	return err;
1406b411b363SPhilipp Reisner }
1407b411b363SPhilipp Reisner 
140899920dc5SAndreas Gruenbacher int w_restart_disk_io(struct drbd_work *w, int cancel)
1409265be2d0SPhilipp Reisner {
1410265be2d0SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
141100d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1412265be2d0SPhilipp Reisner 
14130778286aSPhilipp Reisner 	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
1414181286adSLars Ellenberg 		drbd_al_begin_io(mdev, &req->i);
1415265be2d0SPhilipp Reisner 
1416265be2d0SPhilipp Reisner 	drbd_req_make_private_bio(req, req->master_bio);
1417265be2d0SPhilipp Reisner 	req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
1418265be2d0SPhilipp Reisner 	generic_make_request(req->private_bio);
1419265be2d0SPhilipp Reisner 
142099920dc5SAndreas Gruenbacher 	return 0;
1421265be2d0SPhilipp Reisner }
1422265be2d0SPhilipp Reisner 
1423b411b363SPhilipp Reisner static int _drbd_may_sync_now(struct drbd_conf *mdev)
1424b411b363SPhilipp Reisner {
1425b411b363SPhilipp Reisner 	struct drbd_conf *odev = mdev;
142695f8efd0SAndreas Gruenbacher 	int resync_after;
1427b411b363SPhilipp Reisner 
1428b411b363SPhilipp Reisner 	while (1) {
1429438c8374SPhilipp Reisner 		if (!odev->ldev)
1430438c8374SPhilipp Reisner 			return 1;
1431daeda1ccSPhilipp Reisner 		rcu_read_lock();
143295f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1433daeda1ccSPhilipp Reisner 		rcu_read_unlock();
143495f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1435b411b363SPhilipp Reisner 			return 1;
143695f8efd0SAndreas Gruenbacher 		odev = minor_to_mdev(resync_after);
1437841ce241SAndreas Gruenbacher 		if (!expect(odev))
1438841ce241SAndreas Gruenbacher 			return 1;
1439b411b363SPhilipp Reisner 		if ((odev->state.conn >= C_SYNC_SOURCE &&
1440b411b363SPhilipp Reisner 		     odev->state.conn <= C_PAUSED_SYNC_T) ||
1441b411b363SPhilipp Reisner 		    odev->state.aftr_isp || odev->state.peer_isp ||
1442b411b363SPhilipp Reisner 		    odev->state.user_isp)
1443b411b363SPhilipp Reisner 			return 0;
1444b411b363SPhilipp Reisner 	}
1445b411b363SPhilipp Reisner }
1446b411b363SPhilipp Reisner 
1447b411b363SPhilipp Reisner /**
1448b411b363SPhilipp Reisner  * _drbd_pause_after() - Pause resync on all devices that may not resync now
1449b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1450b411b363SPhilipp Reisner  *
1451b411b363SPhilipp Reisner  * Called from process context only (admin command and after_state_ch).
1452b411b363SPhilipp Reisner  */
1453b411b363SPhilipp Reisner static int _drbd_pause_after(struct drbd_conf *mdev)
1454b411b363SPhilipp Reisner {
1455b411b363SPhilipp Reisner 	struct drbd_conf *odev;
1456b411b363SPhilipp Reisner 	int i, rv = 0;
1457b411b363SPhilipp Reisner 
1458695d08faSPhilipp Reisner 	rcu_read_lock();
145981a5d60eSPhilipp Reisner 	idr_for_each_entry(&minors, odev, i) {
1460b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1461b411b363SPhilipp Reisner 			continue;
1462b411b363SPhilipp Reisner 		if (!_drbd_may_sync_now(odev))
1463b411b363SPhilipp Reisner 			rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1464b411b363SPhilipp Reisner 			       != SS_NOTHING_TO_DO);
1465b411b363SPhilipp Reisner 	}
1466695d08faSPhilipp Reisner 	rcu_read_unlock();
1467b411b363SPhilipp Reisner 
1468b411b363SPhilipp Reisner 	return rv;
1469b411b363SPhilipp Reisner }
1470b411b363SPhilipp Reisner 
1471b411b363SPhilipp Reisner /**
1472b411b363SPhilipp Reisner  * _drbd_resume_next() - Resume resync on all devices that may resync now
1473b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1474b411b363SPhilipp Reisner  *
1475b411b363SPhilipp Reisner  * Called from process context only (admin command and worker).
1476b411b363SPhilipp Reisner  */
1477b411b363SPhilipp Reisner static int _drbd_resume_next(struct drbd_conf *mdev)
1478b411b363SPhilipp Reisner {
1479b411b363SPhilipp Reisner 	struct drbd_conf *odev;
1480b411b363SPhilipp Reisner 	int i, rv = 0;
1481b411b363SPhilipp Reisner 
1482695d08faSPhilipp Reisner 	rcu_read_lock();
148381a5d60eSPhilipp Reisner 	idr_for_each_entry(&minors, odev, i) {
1484b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1485b411b363SPhilipp Reisner 			continue;
1486b411b363SPhilipp Reisner 		if (odev->state.aftr_isp) {
1487b411b363SPhilipp Reisner 			if (_drbd_may_sync_now(odev))
1488b411b363SPhilipp Reisner 				rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1489b411b363SPhilipp Reisner 							CS_HARD, NULL)
1490b411b363SPhilipp Reisner 				       != SS_NOTHING_TO_DO) ;
1491b411b363SPhilipp Reisner 		}
1492b411b363SPhilipp Reisner 	}
1493695d08faSPhilipp Reisner 	rcu_read_unlock();
1494b411b363SPhilipp Reisner 	return rv;
1495b411b363SPhilipp Reisner }
1496b411b363SPhilipp Reisner 
1497b411b363SPhilipp Reisner void resume_next_sg(struct drbd_conf *mdev)
1498b411b363SPhilipp Reisner {
1499b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1500b411b363SPhilipp Reisner 	_drbd_resume_next(mdev);
1501b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1502b411b363SPhilipp Reisner }
1503b411b363SPhilipp Reisner 
1504b411b363SPhilipp Reisner void suspend_other_sg(struct drbd_conf *mdev)
1505b411b363SPhilipp Reisner {
1506b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1507b411b363SPhilipp Reisner 	_drbd_pause_after(mdev);
1508b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1509b411b363SPhilipp Reisner }
1510b411b363SPhilipp Reisner 
1511dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
151295f8efd0SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor)
1513b411b363SPhilipp Reisner {
1514b411b363SPhilipp Reisner 	struct drbd_conf *odev;
151595f8efd0SAndreas Gruenbacher 	int resync_after;
1516b411b363SPhilipp Reisner 
1517b411b363SPhilipp Reisner 	if (o_minor == -1)
1518b411b363SPhilipp Reisner 		return NO_ERROR;
1519b411b363SPhilipp Reisner 	if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
152095f8efd0SAndreas Gruenbacher 		return ERR_RESYNC_AFTER;
1521b411b363SPhilipp Reisner 
1522b411b363SPhilipp Reisner 	/* check for loops */
1523b411b363SPhilipp Reisner 	odev = minor_to_mdev(o_minor);
1524b411b363SPhilipp Reisner 	while (1) {
1525b411b363SPhilipp Reisner 		if (odev == mdev)
152695f8efd0SAndreas Gruenbacher 			return ERR_RESYNC_AFTER_CYCLE;
1527b411b363SPhilipp Reisner 
1528daeda1ccSPhilipp Reisner 		rcu_read_lock();
152995f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1530daeda1ccSPhilipp Reisner 		rcu_read_unlock();
1531b411b363SPhilipp Reisner 		/* dependency chain ends here, no cycles. */
153295f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1533b411b363SPhilipp Reisner 			return NO_ERROR;
1534b411b363SPhilipp Reisner 
1535b411b363SPhilipp Reisner 		/* follow the dependency chain */
153695f8efd0SAndreas Gruenbacher 		odev = minor_to_mdev(resync_after);
1537b411b363SPhilipp Reisner 	}
1538b411b363SPhilipp Reisner }
1539b411b363SPhilipp Reisner 
1540dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
154195f8efd0SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_conf *mdev)
1542b411b363SPhilipp Reisner {
1543b411b363SPhilipp Reisner 	int changes;
1544b411b363SPhilipp Reisner 
1545b411b363SPhilipp Reisner 	do {
1546b411b363SPhilipp Reisner 		changes  = _drbd_pause_after(mdev);
1547b411b363SPhilipp Reisner 		changes |= _drbd_resume_next(mdev);
1548b411b363SPhilipp Reisner 	} while (changes);
1549b411b363SPhilipp Reisner }
1550b411b363SPhilipp Reisner 
15519bd28d3cSLars Ellenberg void drbd_rs_controller_reset(struct drbd_conf *mdev)
15529bd28d3cSLars Ellenberg {
1553813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
1554813472ceSPhilipp Reisner 
15559bd28d3cSLars Ellenberg 	atomic_set(&mdev->rs_sect_in, 0);
15569bd28d3cSLars Ellenberg 	atomic_set(&mdev->rs_sect_ev, 0);
15579bd28d3cSLars Ellenberg 	mdev->rs_in_flight = 0;
1558813472ceSPhilipp Reisner 
1559813472ceSPhilipp Reisner 	/* Updating the RCU protected object in place is necessary since
1560813472ceSPhilipp Reisner 	   this function gets called from atomic context.
1561813472ceSPhilipp Reisner 	   It is valid since all other updates also lead to an completely
1562813472ceSPhilipp Reisner 	   empty fifo */
1563813472ceSPhilipp Reisner 	rcu_read_lock();
1564813472ceSPhilipp Reisner 	plan = rcu_dereference(mdev->rs_plan_s);
1565813472ceSPhilipp Reisner 	plan->total = 0;
1566813472ceSPhilipp Reisner 	fifo_set(plan, 0);
1567813472ceSPhilipp Reisner 	rcu_read_unlock();
15689bd28d3cSLars Ellenberg }
15699bd28d3cSLars Ellenberg 
15701f04af33SPhilipp Reisner void start_resync_timer_fn(unsigned long data)
15711f04af33SPhilipp Reisner {
15721f04af33SPhilipp Reisner 	struct drbd_conf *mdev = (struct drbd_conf *) data;
15731f04af33SPhilipp Reisner 
1574d5b27b01SLars Ellenberg 	drbd_queue_work(&mdev->tconn->sender_work, &mdev->start_resync_work);
15751f04af33SPhilipp Reisner }
15761f04af33SPhilipp Reisner 
157799920dc5SAndreas Gruenbacher int w_start_resync(struct drbd_work *w, int cancel)
15781f04af33SPhilipp Reisner {
157900d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
158000d56944SPhilipp Reisner 
15811f04af33SPhilipp Reisner 	if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
15821f04af33SPhilipp Reisner 		dev_warn(DEV, "w_start_resync later...\n");
15831f04af33SPhilipp Reisner 		mdev->start_resync_timer.expires = jiffies + HZ/10;
15841f04af33SPhilipp Reisner 		add_timer(&mdev->start_resync_timer);
158599920dc5SAndreas Gruenbacher 		return 0;
15861f04af33SPhilipp Reisner 	}
15871f04af33SPhilipp Reisner 
15881f04af33SPhilipp Reisner 	drbd_start_resync(mdev, C_SYNC_SOURCE);
158936baf611SPhilipp Reisner 	clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags);
159099920dc5SAndreas Gruenbacher 	return 0;
15911f04af33SPhilipp Reisner }
15921f04af33SPhilipp Reisner 
1593b411b363SPhilipp Reisner /**
1594b411b363SPhilipp Reisner  * drbd_start_resync() - Start the resync process
1595b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1596b411b363SPhilipp Reisner  * @side:	Either C_SYNC_SOURCE or C_SYNC_TARGET
1597b411b363SPhilipp Reisner  *
1598b411b363SPhilipp Reisner  * This function might bring you directly into one of the
1599b411b363SPhilipp Reisner  * C_PAUSED_SYNC_* states.
1600b411b363SPhilipp Reisner  */
1601b411b363SPhilipp Reisner void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1602b411b363SPhilipp Reisner {
1603b411b363SPhilipp Reisner 	union drbd_state ns;
1604b411b363SPhilipp Reisner 	int r;
1605b411b363SPhilipp Reisner 
1606c4752ef1SPhilipp Reisner 	if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
1607b411b363SPhilipp Reisner 		dev_err(DEV, "Resync already running!\n");
1608b411b363SPhilipp Reisner 		return;
1609b411b363SPhilipp Reisner 	}
1610b411b363SPhilipp Reisner 
1611e64a3294SPhilipp Reisner 	if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
1612b411b363SPhilipp Reisner 		if (side == C_SYNC_TARGET) {
1613b411b363SPhilipp Reisner 			/* Since application IO was locked out during C_WF_BITMAP_T and
1614b411b363SPhilipp Reisner 			   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1615b411b363SPhilipp Reisner 			   we check that we might make the data inconsistent. */
1616b411b363SPhilipp Reisner 			r = drbd_khelper(mdev, "before-resync-target");
1617b411b363SPhilipp Reisner 			r = (r >> 8) & 0xff;
1618b411b363SPhilipp Reisner 			if (r > 0) {
1619b411b363SPhilipp Reisner 				dev_info(DEV, "before-resync-target handler returned %d, "
1620b411b363SPhilipp Reisner 					 "dropping connection.\n", r);
162138fa9988SPhilipp Reisner 				conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
1622b411b363SPhilipp Reisner 				return;
1623b411b363SPhilipp Reisner 			}
162409b9e797SPhilipp Reisner 		} else /* C_SYNC_SOURCE */ {
162509b9e797SPhilipp Reisner 			r = drbd_khelper(mdev, "before-resync-source");
162609b9e797SPhilipp Reisner 			r = (r >> 8) & 0xff;
162709b9e797SPhilipp Reisner 			if (r > 0) {
162809b9e797SPhilipp Reisner 				if (r == 3) {
162909b9e797SPhilipp Reisner 					dev_info(DEV, "before-resync-source handler returned %d, "
163009b9e797SPhilipp Reisner 						 "ignoring. Old userland tools?", r);
163109b9e797SPhilipp Reisner 				} else {
163209b9e797SPhilipp Reisner 					dev_info(DEV, "before-resync-source handler returned %d, "
163309b9e797SPhilipp Reisner 						 "dropping connection.\n", r);
163438fa9988SPhilipp Reisner 					conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
163509b9e797SPhilipp Reisner 					return;
163609b9e797SPhilipp Reisner 				}
163709b9e797SPhilipp Reisner 			}
1638b411b363SPhilipp Reisner 		}
1639e64a3294SPhilipp Reisner 	}
1640b411b363SPhilipp Reisner 
1641e64a3294SPhilipp Reisner 	if (current == mdev->tconn->worker.task) {
1642dad20554SPhilipp Reisner 		/* The worker should not sleep waiting for state_mutex,
1643e64a3294SPhilipp Reisner 		   that can take long */
16448410da8fSPhilipp Reisner 		if (!mutex_trylock(mdev->state_mutex)) {
1645e64a3294SPhilipp Reisner 			set_bit(B_RS_H_DONE, &mdev->flags);
1646e64a3294SPhilipp Reisner 			mdev->start_resync_timer.expires = jiffies + HZ/5;
1647e64a3294SPhilipp Reisner 			add_timer(&mdev->start_resync_timer);
1648e64a3294SPhilipp Reisner 			return;
1649e64a3294SPhilipp Reisner 		}
1650e64a3294SPhilipp Reisner 	} else {
16518410da8fSPhilipp Reisner 		mutex_lock(mdev->state_mutex);
1652e64a3294SPhilipp Reisner 	}
1653e64a3294SPhilipp Reisner 	clear_bit(B_RS_H_DONE, &mdev->flags);
1654b411b363SPhilipp Reisner 
16550cfac5ddSPhilipp Reisner 	write_lock_irq(&global_state_lock);
1656b411b363SPhilipp Reisner 	if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
16570cfac5ddSPhilipp Reisner 		write_unlock_irq(&global_state_lock);
16588410da8fSPhilipp Reisner 		mutex_unlock(mdev->state_mutex);
1659b411b363SPhilipp Reisner 		return;
1660b411b363SPhilipp Reisner 	}
1661b411b363SPhilipp Reisner 
166278bae59bSPhilipp Reisner 	ns = drbd_read_state(mdev);
1663b411b363SPhilipp Reisner 
1664b411b363SPhilipp Reisner 	ns.aftr_isp = !_drbd_may_sync_now(mdev);
1665b411b363SPhilipp Reisner 
1666b411b363SPhilipp Reisner 	ns.conn = side;
1667b411b363SPhilipp Reisner 
1668b411b363SPhilipp Reisner 	if (side == C_SYNC_TARGET)
1669b411b363SPhilipp Reisner 		ns.disk = D_INCONSISTENT;
1670b411b363SPhilipp Reisner 	else /* side == C_SYNC_SOURCE */
1671b411b363SPhilipp Reisner 		ns.pdsk = D_INCONSISTENT;
1672b411b363SPhilipp Reisner 
1673b411b363SPhilipp Reisner 	r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
167478bae59bSPhilipp Reisner 	ns = drbd_read_state(mdev);
1675b411b363SPhilipp Reisner 
1676b411b363SPhilipp Reisner 	if (ns.conn < C_CONNECTED)
1677b411b363SPhilipp Reisner 		r = SS_UNKNOWN_ERROR;
1678b411b363SPhilipp Reisner 
1679b411b363SPhilipp Reisner 	if (r == SS_SUCCESS) {
16801d7734a0SLars Ellenberg 		unsigned long tw = drbd_bm_total_weight(mdev);
16811d7734a0SLars Ellenberg 		unsigned long now = jiffies;
16821d7734a0SLars Ellenberg 		int i;
16831d7734a0SLars Ellenberg 
1684b411b363SPhilipp Reisner 		mdev->rs_failed    = 0;
1685b411b363SPhilipp Reisner 		mdev->rs_paused    = 0;
1686b411b363SPhilipp Reisner 		mdev->rs_same_csum = 0;
16870f0601f4SLars Ellenberg 		mdev->rs_last_events = 0;
16880f0601f4SLars Ellenberg 		mdev->rs_last_sect_ev = 0;
16891d7734a0SLars Ellenberg 		mdev->rs_total     = tw;
16901d7734a0SLars Ellenberg 		mdev->rs_start     = now;
16911d7734a0SLars Ellenberg 		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
16921d7734a0SLars Ellenberg 			mdev->rs_mark_left[i] = tw;
16931d7734a0SLars Ellenberg 			mdev->rs_mark_time[i] = now;
16941d7734a0SLars Ellenberg 		}
1695b411b363SPhilipp Reisner 		_drbd_pause_after(mdev);
1696b411b363SPhilipp Reisner 	}
1697b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
16985a22db89SLars Ellenberg 
16996c922ed5SLars Ellenberg 	if (r == SS_SUCCESS) {
1700328e0f12SPhilipp Reisner 		/* reset rs_last_bcast when a resync or verify is started,
1701328e0f12SPhilipp Reisner 		 * to deal with potential jiffies wrap. */
1702328e0f12SPhilipp Reisner 		mdev->rs_last_bcast = jiffies - HZ;
1703328e0f12SPhilipp Reisner 
17046c922ed5SLars Ellenberg 		dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
17056c922ed5SLars Ellenberg 		     drbd_conn_str(ns.conn),
17066c922ed5SLars Ellenberg 		     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
17076c922ed5SLars Ellenberg 		     (unsigned long) mdev->rs_total);
17085a22db89SLars Ellenberg 		if (side == C_SYNC_TARGET)
17095a22db89SLars Ellenberg 			mdev->bm_resync_fo = 0;
17105a22db89SLars Ellenberg 
17115a22db89SLars Ellenberg 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
17125a22db89SLars Ellenberg 		 * with w_send_oos, or the sync target will get confused as to
17135a22db89SLars Ellenberg 		 * how much bits to resync.  We cannot do that always, because for an
17145a22db89SLars Ellenberg 		 * empty resync and protocol < 95, we need to do it here, as we call
17155a22db89SLars Ellenberg 		 * drbd_resync_finished from here in that case.
17165a22db89SLars Ellenberg 		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
17175a22db89SLars Ellenberg 		 * and from after_state_ch otherwise. */
171831890f4aSPhilipp Reisner 		if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
17195a22db89SLars Ellenberg 			drbd_gen_and_send_sync_uuid(mdev);
1720b411b363SPhilipp Reisner 
172131890f4aSPhilipp Reisner 		if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
1722af85e8e8SLars Ellenberg 			/* This still has a race (about when exactly the peers
1723af85e8e8SLars Ellenberg 			 * detect connection loss) that can lead to a full sync
1724af85e8e8SLars Ellenberg 			 * on next handshake. In 8.3.9 we fixed this with explicit
1725af85e8e8SLars Ellenberg 			 * resync-finished notifications, but the fix
1726af85e8e8SLars Ellenberg 			 * introduces a protocol change.  Sleeping for some
1727af85e8e8SLars Ellenberg 			 * time longer than the ping interval + timeout on the
1728af85e8e8SLars Ellenberg 			 * SyncSource, to give the SyncTarget the chance to
1729af85e8e8SLars Ellenberg 			 * detect connection loss, then waiting for a ping
1730af85e8e8SLars Ellenberg 			 * response (implicit in drbd_resync_finished) reduces
1731af85e8e8SLars Ellenberg 			 * the race considerably, but does not solve it. */
173244ed167dSPhilipp Reisner 			if (side == C_SYNC_SOURCE) {
173344ed167dSPhilipp Reisner 				struct net_conf *nc;
173444ed167dSPhilipp Reisner 				int timeo;
173544ed167dSPhilipp Reisner 
173644ed167dSPhilipp Reisner 				rcu_read_lock();
173744ed167dSPhilipp Reisner 				nc = rcu_dereference(mdev->tconn->net_conf);
173844ed167dSPhilipp Reisner 				timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
173944ed167dSPhilipp Reisner 				rcu_read_unlock();
174044ed167dSPhilipp Reisner 				schedule_timeout_interruptible(timeo);
174144ed167dSPhilipp Reisner 			}
1742b411b363SPhilipp Reisner 			drbd_resync_finished(mdev);
1743b411b363SPhilipp Reisner 		}
1744b411b363SPhilipp Reisner 
17459bd28d3cSLars Ellenberg 		drbd_rs_controller_reset(mdev);
1746b411b363SPhilipp Reisner 		/* ns.conn may already be != mdev->state.conn,
1747b411b363SPhilipp Reisner 		 * we may have been paused in between, or become paused until
1748b411b363SPhilipp Reisner 		 * the timer triggers.
1749b411b363SPhilipp Reisner 		 * No matter, that is handled in resync_timer_fn() */
1750b411b363SPhilipp Reisner 		if (ns.conn == C_SYNC_TARGET)
1751b411b363SPhilipp Reisner 			mod_timer(&mdev->resync_timer, jiffies);
1752b411b363SPhilipp Reisner 
1753b411b363SPhilipp Reisner 		drbd_md_sync(mdev);
1754b411b363SPhilipp Reisner 	}
17555a22db89SLars Ellenberg 	put_ldev(mdev);
17568410da8fSPhilipp Reisner 	mutex_unlock(mdev->state_mutex);
1757b411b363SPhilipp Reisner }
1758b411b363SPhilipp Reisner 
1759b6dd1a89SLars Ellenberg /* If the resource already closed the current epoch, but we did not
1760b6dd1a89SLars Ellenberg  * (because we have not yet seen new requests), we should send the
1761b6dd1a89SLars Ellenberg  * corresponding barrier now.  Must be checked within the same spinlock
1762b6dd1a89SLars Ellenberg  * that is used to check for new requests. */
1763b6dd1a89SLars Ellenberg bool need_to_send_barrier(struct drbd_tconn *connection)
1764b6dd1a89SLars Ellenberg {
1765b6dd1a89SLars Ellenberg 	if (!connection->send.seen_any_write_yet)
1766b6dd1a89SLars Ellenberg 		return false;
1767b6dd1a89SLars Ellenberg 
1768b6dd1a89SLars Ellenberg 	/* Skip barriers that do not contain any writes.
1769b6dd1a89SLars Ellenberg 	 * This may happen during AHEAD mode. */
1770b6dd1a89SLars Ellenberg 	if (!connection->send.current_epoch_writes)
1771b6dd1a89SLars Ellenberg 		return false;
1772b6dd1a89SLars Ellenberg 
1773b6dd1a89SLars Ellenberg 	/* ->req_lock is held when requests are queued on
1774b6dd1a89SLars Ellenberg 	 * connection->sender_work, and put into ->transfer_log.
1775b6dd1a89SLars Ellenberg 	 * It is also held when ->current_tle_nr is increased.
1776b6dd1a89SLars Ellenberg 	 * So either there are already new requests queued,
1777b6dd1a89SLars Ellenberg 	 * and corresponding barriers will be send there.
1778b6dd1a89SLars Ellenberg 	 * Or nothing new is queued yet, so the difference will be 1.
1779b6dd1a89SLars Ellenberg 	 */
1780b6dd1a89SLars Ellenberg 	if (atomic_read(&connection->current_tle_nr) !=
1781b6dd1a89SLars Ellenberg 	    connection->send.current_epoch_nr + 1)
1782b6dd1a89SLars Ellenberg 		return false;
1783b6dd1a89SLars Ellenberg 
1784b6dd1a89SLars Ellenberg 	return true;
1785b6dd1a89SLars Ellenberg }
1786b6dd1a89SLars Ellenberg 
17878c0785a5SLars Ellenberg bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
17888c0785a5SLars Ellenberg {
17898c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
17908c0785a5SLars Ellenberg 	list_splice_init(&queue->q, work_list);
17918c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
17928c0785a5SLars Ellenberg 	return !list_empty(work_list);
17938c0785a5SLars Ellenberg }
17948c0785a5SLars Ellenberg 
17958c0785a5SLars Ellenberg bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
17968c0785a5SLars Ellenberg {
17978c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
17988c0785a5SLars Ellenberg 	if (!list_empty(&queue->q))
17998c0785a5SLars Ellenberg 		list_move(queue->q.next, work_list);
18008c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
18018c0785a5SLars Ellenberg 	return !list_empty(work_list);
18028c0785a5SLars Ellenberg }
18038c0785a5SLars Ellenberg 
1804b6dd1a89SLars Ellenberg void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list)
1805b6dd1a89SLars Ellenberg {
1806b6dd1a89SLars Ellenberg 	DEFINE_WAIT(wait);
1807b6dd1a89SLars Ellenberg 	struct net_conf *nc;
1808b6dd1a89SLars Ellenberg 	int uncork, cork;
1809b6dd1a89SLars Ellenberg 
1810b6dd1a89SLars Ellenberg 	dequeue_work_item(&connection->sender_work, work_list);
1811b6dd1a89SLars Ellenberg 	if (!list_empty(work_list))
1812b6dd1a89SLars Ellenberg 		return;
1813b6dd1a89SLars Ellenberg 
1814b6dd1a89SLars Ellenberg 	/* Still nothing to do?
1815b6dd1a89SLars Ellenberg 	 * Maybe we still need to close the current epoch,
1816b6dd1a89SLars Ellenberg 	 * even if no new requests are queued yet.
1817b6dd1a89SLars Ellenberg 	 *
1818b6dd1a89SLars Ellenberg 	 * Also, poke TCP, just in case.
1819b6dd1a89SLars Ellenberg 	 * Then wait for new work (or signal). */
1820b6dd1a89SLars Ellenberg 	rcu_read_lock();
1821b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
1822b6dd1a89SLars Ellenberg 	uncork = nc ? nc->tcp_cork : 0;
1823b6dd1a89SLars Ellenberg 	rcu_read_unlock();
1824b6dd1a89SLars Ellenberg 	if (uncork) {
1825b6dd1a89SLars Ellenberg 		mutex_lock(&connection->data.mutex);
1826b6dd1a89SLars Ellenberg 		if (connection->data.socket)
1827b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
1828b6dd1a89SLars Ellenberg 		mutex_unlock(&connection->data.mutex);
1829b6dd1a89SLars Ellenberg 	}
1830b6dd1a89SLars Ellenberg 
1831b6dd1a89SLars Ellenberg 	for (;;) {
1832b6dd1a89SLars Ellenberg 		int send_barrier;
1833b6dd1a89SLars Ellenberg 		prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
1834b6dd1a89SLars Ellenberg 		spin_lock_irq(&connection->req_lock);
1835b6dd1a89SLars Ellenberg 		spin_lock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
1836bc317a9eSLars Ellenberg 		/* dequeue single item only,
1837bc317a9eSLars Ellenberg 		 * we still use drbd_queue_work_front() in some places */
1838bc317a9eSLars Ellenberg 		if (!list_empty(&connection->sender_work.q))
1839bc317a9eSLars Ellenberg 			list_move(connection->sender_work.q.next, work_list);
1840b6dd1a89SLars Ellenberg 		spin_unlock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
1841b6dd1a89SLars Ellenberg 		if (!list_empty(work_list) || signal_pending(current)) {
1842b6dd1a89SLars Ellenberg 			spin_unlock_irq(&connection->req_lock);
1843b6dd1a89SLars Ellenberg 			break;
1844b6dd1a89SLars Ellenberg 		}
1845b6dd1a89SLars Ellenberg 		send_barrier = need_to_send_barrier(connection);
1846b6dd1a89SLars Ellenberg 		spin_unlock_irq(&connection->req_lock);
1847b6dd1a89SLars Ellenberg 		if (send_barrier) {
1848b6dd1a89SLars Ellenberg 			drbd_send_barrier(connection);
1849b6dd1a89SLars Ellenberg 			connection->send.current_epoch_nr++;
1850b6dd1a89SLars Ellenberg 		}
1851b6dd1a89SLars Ellenberg 		schedule();
1852b6dd1a89SLars Ellenberg 		/* may be woken up for other things but new work, too,
1853b6dd1a89SLars Ellenberg 		 * e.g. if the current epoch got closed.
1854b6dd1a89SLars Ellenberg 		 * In which case we send the barrier above. */
1855b6dd1a89SLars Ellenberg 	}
1856b6dd1a89SLars Ellenberg 	finish_wait(&connection->sender_work.q_wait, &wait);
1857b6dd1a89SLars Ellenberg 
1858b6dd1a89SLars Ellenberg 	/* someone may have changed the config while we have been waiting above. */
1859b6dd1a89SLars Ellenberg 	rcu_read_lock();
1860b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
1861b6dd1a89SLars Ellenberg 	cork = nc ? nc->tcp_cork : 0;
1862b6dd1a89SLars Ellenberg 	rcu_read_unlock();
1863b6dd1a89SLars Ellenberg 	mutex_lock(&connection->data.mutex);
1864b6dd1a89SLars Ellenberg 	if (connection->data.socket) {
1865b6dd1a89SLars Ellenberg 		if (cork)
1866b6dd1a89SLars Ellenberg 			drbd_tcp_cork(connection->data.socket);
1867b6dd1a89SLars Ellenberg 		else if (!uncork)
1868b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
1869b6dd1a89SLars Ellenberg 	}
1870b6dd1a89SLars Ellenberg 	mutex_unlock(&connection->data.mutex);
1871b6dd1a89SLars Ellenberg }
1872b6dd1a89SLars Ellenberg 
1873b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi)
1874b411b363SPhilipp Reisner {
1875392c8801SPhilipp Reisner 	struct drbd_tconn *tconn = thi->tconn;
1876b411b363SPhilipp Reisner 	struct drbd_work *w = NULL;
18770e29d163SPhilipp Reisner 	struct drbd_conf *mdev;
1878b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
18798c0785a5SLars Ellenberg 	int vnr;
1880b411b363SPhilipp Reisner 
1881e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
188280822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
1883b411b363SPhilipp Reisner 
18848c0785a5SLars Ellenberg 		/* as long as we use drbd_queue_work_front(),
18858c0785a5SLars Ellenberg 		 * we may only dequeue single work items here, not batches. */
18868c0785a5SLars Ellenberg 		if (list_empty(&work_list))
1887b6dd1a89SLars Ellenberg 			wait_for_work(tconn, &work_list);
1888b411b363SPhilipp Reisner 
18898c0785a5SLars Ellenberg 		if (signal_pending(current)) {
1890b411b363SPhilipp Reisner 			flush_signals(current);
189119393e10SPhilipp Reisner 			if (get_t_state(thi) == RUNNING) {
189219393e10SPhilipp Reisner 				conn_warn(tconn, "Worker got an unexpected signal\n");
1893b411b363SPhilipp Reisner 				continue;
189419393e10SPhilipp Reisner 			}
1895b411b363SPhilipp Reisner 			break;
1896b411b363SPhilipp Reisner 		}
1897b411b363SPhilipp Reisner 
1898e77a0a5cSAndreas Gruenbacher 		if (get_t_state(thi) != RUNNING)
1899b411b363SPhilipp Reisner 			break;
1900b411b363SPhilipp Reisner 
19018c0785a5SLars Ellenberg 		while (!list_empty(&work_list)) {
19028c0785a5SLars Ellenberg 			w = list_first_entry(&work_list, struct drbd_work, list);
1903b411b363SPhilipp Reisner 			list_del_init(&w->list);
19048c0785a5SLars Ellenberg 			if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS) == 0)
19058c0785a5SLars Ellenberg 				continue;
1906bbeb641cSPhilipp Reisner 			if (tconn->cstate >= C_WF_REPORT_PARAMS)
1907bbeb641cSPhilipp Reisner 				conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
1908b411b363SPhilipp Reisner 		}
1909b411b363SPhilipp Reisner 	}
1910b411b363SPhilipp Reisner 
19118c0785a5SLars Ellenberg 	do {
1912b411b363SPhilipp Reisner 		while (!list_empty(&work_list)) {
19138c0785a5SLars Ellenberg 			w = list_first_entry(&work_list, struct drbd_work, list);
1914b411b363SPhilipp Reisner 			list_del_init(&w->list);
191500d56944SPhilipp Reisner 			w->cb(w, 1);
1916b411b363SPhilipp Reisner 		}
1917d5b27b01SLars Ellenberg 		dequeue_work_batch(&tconn->sender_work, &work_list);
19188c0785a5SLars Ellenberg 	} while (!list_empty(&work_list));
1919b411b363SPhilipp Reisner 
1920c141ebdaSPhilipp Reisner 	rcu_read_lock();
1921f399002eSLars Ellenberg 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
19220e29d163SPhilipp Reisner 		D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
1923c141ebdaSPhilipp Reisner 		kref_get(&mdev->kref);
1924c141ebdaSPhilipp Reisner 		rcu_read_unlock();
19250e29d163SPhilipp Reisner 		drbd_mdev_cleanup(mdev);
1926c141ebdaSPhilipp Reisner 		kref_put(&mdev->kref, &drbd_minor_destroy);
1927c141ebdaSPhilipp Reisner 		rcu_read_lock();
19280e29d163SPhilipp Reisner 	}
1929c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1930b411b363SPhilipp Reisner 
1931b411b363SPhilipp Reisner 	return 0;
1932b411b363SPhilipp Reisner }
1933