xref: /openbmc/linux/drivers/block/drbd/drbd_worker.c (revision 8747d30a)
1b411b363SPhilipp Reisner /*
2b411b363SPhilipp Reisner    drbd_worker.c
3b411b363SPhilipp Reisner 
4b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5b411b363SPhilipp Reisner 
6b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9b411b363SPhilipp Reisner 
10b411b363SPhilipp Reisner    drbd is free software; you can redistribute it and/or modify
11b411b363SPhilipp Reisner    it under the terms of the GNU General Public License as published by
12b411b363SPhilipp Reisner    the Free Software Foundation; either version 2, or (at your option)
13b411b363SPhilipp Reisner    any later version.
14b411b363SPhilipp Reisner 
15b411b363SPhilipp Reisner    drbd is distributed in the hope that it will be useful,
16b411b363SPhilipp Reisner    but WITHOUT ANY WARRANTY; without even the implied warranty of
17b411b363SPhilipp Reisner    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18b411b363SPhilipp Reisner    GNU General Public License for more details.
19b411b363SPhilipp Reisner 
20b411b363SPhilipp Reisner    You should have received a copy of the GNU General Public License
21b411b363SPhilipp Reisner    along with drbd; see the file COPYING.  If not, write to
22b411b363SPhilipp Reisner    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23b411b363SPhilipp Reisner 
24b411b363SPhilipp Reisner  */
25b411b363SPhilipp Reisner 
26b411b363SPhilipp Reisner #include <linux/module.h>
27b411b363SPhilipp Reisner #include <linux/drbd.h>
28b411b363SPhilipp Reisner #include <linux/sched.h>
29b411b363SPhilipp Reisner #include <linux/wait.h>
30b411b363SPhilipp Reisner #include <linux/mm.h>
31b411b363SPhilipp Reisner #include <linux/memcontrol.h>
32b411b363SPhilipp Reisner #include <linux/mm_inline.h>
33b411b363SPhilipp Reisner #include <linux/slab.h>
34b411b363SPhilipp Reisner #include <linux/random.h>
35b411b363SPhilipp Reisner #include <linux/string.h>
36b411b363SPhilipp Reisner #include <linux/scatterlist.h>
37b411b363SPhilipp Reisner 
38b411b363SPhilipp Reisner #include "drbd_int.h"
39b411b363SPhilipp Reisner #include "drbd_req.h"
40b411b363SPhilipp Reisner 
4100d56944SPhilipp Reisner static int w_make_ov_request(struct drbd_work *w, int cancel);
42b411b363SPhilipp Reisner 
43b411b363SPhilipp Reisner 
44c5a91619SAndreas Gruenbacher /* endio handlers:
45c5a91619SAndreas Gruenbacher  *   drbd_md_io_complete (defined here)
46fcefa62eSAndreas Gruenbacher  *   drbd_request_endio (defined here)
47fcefa62eSAndreas Gruenbacher  *   drbd_peer_request_endio (defined here)
48c5a91619SAndreas Gruenbacher  *   bm_async_io_complete (defined in drbd_bitmap.c)
49c5a91619SAndreas Gruenbacher  *
50b411b363SPhilipp Reisner  * For all these callbacks, note the following:
51b411b363SPhilipp Reisner  * The callbacks will be called in irq context by the IDE drivers,
52b411b363SPhilipp Reisner  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
53b411b363SPhilipp Reisner  * Try to get the locking right :)
54b411b363SPhilipp Reisner  *
55b411b363SPhilipp Reisner  */
56b411b363SPhilipp Reisner 
57b411b363SPhilipp Reisner 
58b411b363SPhilipp Reisner /* About the global_state_lock
59b411b363SPhilipp Reisner    Each state transition on an device holds a read lock. In case we have
6095f8efd0SAndreas Gruenbacher    to evaluate the resync after dependencies, we grab a write lock, because
61b411b363SPhilipp Reisner    we need stable states on all devices for that.  */
62b411b363SPhilipp Reisner rwlock_t global_state_lock;
63b411b363SPhilipp Reisner 
64b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO
65b411b363SPhilipp Reisner  * submitted by drbd_md_sync_page_io()
66b411b363SPhilipp Reisner  */
67b411b363SPhilipp Reisner void drbd_md_io_complete(struct bio *bio, int error)
68b411b363SPhilipp Reisner {
69b411b363SPhilipp Reisner 	struct drbd_md_io *md_io;
70cdfda633SPhilipp Reisner 	struct drbd_conf *mdev;
71b411b363SPhilipp Reisner 
72b411b363SPhilipp Reisner 	md_io = (struct drbd_md_io *)bio->bi_private;
73cdfda633SPhilipp Reisner 	mdev = container_of(md_io, struct drbd_conf, md_io);
74cdfda633SPhilipp Reisner 
75b411b363SPhilipp Reisner 	md_io->error = error;
76b411b363SPhilipp Reisner 
770cfac5ddSPhilipp Reisner 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
780cfac5ddSPhilipp Reisner 	 * to timeout on the lower level device, and eventually detach from it.
790cfac5ddSPhilipp Reisner 	 * If this io completion runs after that timeout expired, this
800cfac5ddSPhilipp Reisner 	 * drbd_md_put_buffer() may allow us to finally try and re-attach.
810cfac5ddSPhilipp Reisner 	 * During normal operation, this only puts that extra reference
820cfac5ddSPhilipp Reisner 	 * down to 1 again.
830cfac5ddSPhilipp Reisner 	 * Make sure we first drop the reference, and only then signal
840cfac5ddSPhilipp Reisner 	 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
850cfac5ddSPhilipp Reisner 	 * next drbd_md_sync_page_io(), that we trigger the
860cfac5ddSPhilipp Reisner 	 * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there.
870cfac5ddSPhilipp Reisner 	 */
880cfac5ddSPhilipp Reisner 	drbd_md_put_buffer(mdev);
89cdfda633SPhilipp Reisner 	md_io->done = 1;
90cdfda633SPhilipp Reisner 	wake_up(&mdev->misc_wait);
91cdfda633SPhilipp Reisner 	bio_put(bio);
92cdfda633SPhilipp Reisner 	put_ldev(mdev);
93b411b363SPhilipp Reisner }
94b411b363SPhilipp Reisner 
95b411b363SPhilipp Reisner /* reads on behalf of the partner,
96b411b363SPhilipp Reisner  * "submitted" by the receiver
97b411b363SPhilipp Reisner  */
98db830c46SAndreas Gruenbacher void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
99b411b363SPhilipp Reisner {
100b411b363SPhilipp Reisner 	unsigned long flags = 0;
101a21e9298SPhilipp Reisner 	struct drbd_conf *mdev = peer_req->w.mdev;
102b411b363SPhilipp Reisner 
10387eeee41SPhilipp Reisner 	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
104db830c46SAndreas Gruenbacher 	mdev->read_cnt += peer_req->i.size >> 9;
105db830c46SAndreas Gruenbacher 	list_del(&peer_req->w.list);
106b411b363SPhilipp Reisner 	if (list_empty(&mdev->read_ee))
107b411b363SPhilipp Reisner 		wake_up(&mdev->ee_wait);
108db830c46SAndreas Gruenbacher 	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
1090c849666SLars Ellenberg 		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
11087eeee41SPhilipp Reisner 	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
111b411b363SPhilipp Reisner 
112d5b27b01SLars Ellenberg 	drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w);
113b411b363SPhilipp Reisner 	put_ldev(mdev);
114b411b363SPhilipp Reisner }
115b411b363SPhilipp Reisner 
116b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes,
11745bb912bSLars Ellenberg  * "submitted" by the receiver, final stage.  */
118db830c46SAndreas Gruenbacher static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
119b411b363SPhilipp Reisner {
120b411b363SPhilipp Reisner 	unsigned long flags = 0;
121a21e9298SPhilipp Reisner 	struct drbd_conf *mdev = peer_req->w.mdev;
122181286adSLars Ellenberg 	struct drbd_interval i;
123b411b363SPhilipp Reisner 	int do_wake;
124579b57edSAndreas Gruenbacher 	u64 block_id;
125b411b363SPhilipp Reisner 	int do_al_complete_io;
126b411b363SPhilipp Reisner 
127db830c46SAndreas Gruenbacher 	/* after we moved peer_req to done_ee,
128b411b363SPhilipp Reisner 	 * we may no longer access it,
129b411b363SPhilipp Reisner 	 * it may be freed/reused already!
130b411b363SPhilipp Reisner 	 * (as soon as we release the req_lock) */
131181286adSLars Ellenberg 	i = peer_req->i;
132db830c46SAndreas Gruenbacher 	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
133db830c46SAndreas Gruenbacher 	block_id = peer_req->block_id;
134b411b363SPhilipp Reisner 
13587eeee41SPhilipp Reisner 	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
136db830c46SAndreas Gruenbacher 	mdev->writ_cnt += peer_req->i.size >> 9;
137a506c13aSLars Ellenberg 	list_move_tail(&peer_req->w.list, &mdev->done_ee);
138b411b363SPhilipp Reisner 
139bb3bfe96SAndreas Gruenbacher 	/*
1405e472264SAndreas Gruenbacher 	 * Do not remove from the write_requests tree here: we did not send the
141bb3bfe96SAndreas Gruenbacher 	 * Ack yet and did not wake possibly waiting conflicting requests.
142bb3bfe96SAndreas Gruenbacher 	 * Removed from the tree from "drbd_process_done_ee" within the
143bb3bfe96SAndreas Gruenbacher 	 * appropriate w.cb (e_end_block/e_end_resync_block) or from
144bb3bfe96SAndreas Gruenbacher 	 * _drbd_clear_done_ee.
145bb3bfe96SAndreas Gruenbacher 	 */
146b411b363SPhilipp Reisner 
147579b57edSAndreas Gruenbacher 	do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
148b411b363SPhilipp Reisner 
149db830c46SAndreas Gruenbacher 	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
1500c849666SLars Ellenberg 		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
15187eeee41SPhilipp Reisner 	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
152b411b363SPhilipp Reisner 
153579b57edSAndreas Gruenbacher 	if (block_id == ID_SYNCER)
154181286adSLars Ellenberg 		drbd_rs_complete_io(mdev, i.sector);
155b411b363SPhilipp Reisner 
156b411b363SPhilipp Reisner 	if (do_wake)
157b411b363SPhilipp Reisner 		wake_up(&mdev->ee_wait);
158b411b363SPhilipp Reisner 
159b411b363SPhilipp Reisner 	if (do_al_complete_io)
160181286adSLars Ellenberg 		drbd_al_complete_io(mdev, &i);
161b411b363SPhilipp Reisner 
1620625ac19SPhilipp Reisner 	wake_asender(mdev->tconn);
163b411b363SPhilipp Reisner 	put_ldev(mdev);
16445bb912bSLars Ellenberg }
165b411b363SPhilipp Reisner 
16645bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes,
16745bb912bSLars Ellenberg  * "submitted" by the receiver.
16845bb912bSLars Ellenberg  */
169fcefa62eSAndreas Gruenbacher void drbd_peer_request_endio(struct bio *bio, int error)
17045bb912bSLars Ellenberg {
171db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = bio->bi_private;
172a21e9298SPhilipp Reisner 	struct drbd_conf *mdev = peer_req->w.mdev;
17345bb912bSLars Ellenberg 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
17445bb912bSLars Ellenberg 	int is_write = bio_data_dir(bio) == WRITE;
17545bb912bSLars Ellenberg 
17607194272SLars Ellenberg 	if (error && __ratelimit(&drbd_ratelimit_state))
17745bb912bSLars Ellenberg 		dev_warn(DEV, "%s: error=%d s=%llus\n",
17845bb912bSLars Ellenberg 				is_write ? "write" : "read", error,
179db830c46SAndreas Gruenbacher 				(unsigned long long)peer_req->i.sector);
18045bb912bSLars Ellenberg 	if (!error && !uptodate) {
18107194272SLars Ellenberg 		if (__ratelimit(&drbd_ratelimit_state))
18245bb912bSLars Ellenberg 			dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
18345bb912bSLars Ellenberg 					is_write ? "write" : "read",
184db830c46SAndreas Gruenbacher 					(unsigned long long)peer_req->i.sector);
18545bb912bSLars Ellenberg 		/* strange behavior of some lower level drivers...
18645bb912bSLars Ellenberg 		 * fail the request by clearing the uptodate flag,
18745bb912bSLars Ellenberg 		 * but do not return any error?! */
18845bb912bSLars Ellenberg 		error = -EIO;
18945bb912bSLars Ellenberg 	}
19045bb912bSLars Ellenberg 
19145bb912bSLars Ellenberg 	if (error)
192db830c46SAndreas Gruenbacher 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
19345bb912bSLars Ellenberg 
19445bb912bSLars Ellenberg 	bio_put(bio); /* no need for the bio anymore */
195db830c46SAndreas Gruenbacher 	if (atomic_dec_and_test(&peer_req->pending_bios)) {
19645bb912bSLars Ellenberg 		if (is_write)
197db830c46SAndreas Gruenbacher 			drbd_endio_write_sec_final(peer_req);
19845bb912bSLars Ellenberg 		else
199db830c46SAndreas Gruenbacher 			drbd_endio_read_sec_final(peer_req);
20045bb912bSLars Ellenberg 	}
201b411b363SPhilipp Reisner }
202b411b363SPhilipp Reisner 
203b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
204b411b363SPhilipp Reisner  */
205fcefa62eSAndreas Gruenbacher void drbd_request_endio(struct bio *bio, int error)
206b411b363SPhilipp Reisner {
207a115413dSLars Ellenberg 	unsigned long flags;
208b411b363SPhilipp Reisner 	struct drbd_request *req = bio->bi_private;
209a21e9298SPhilipp Reisner 	struct drbd_conf *mdev = req->w.mdev;
210a115413dSLars Ellenberg 	struct bio_and_error m;
211b411b363SPhilipp Reisner 	enum drbd_req_event what;
212b411b363SPhilipp Reisner 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
213b411b363SPhilipp Reisner 
214b411b363SPhilipp Reisner 	if (!error && !uptodate) {
215b411b363SPhilipp Reisner 		dev_warn(DEV, "p %s: setting error to -EIO\n",
216b411b363SPhilipp Reisner 			 bio_data_dir(bio) == WRITE ? "write" : "read");
217b411b363SPhilipp Reisner 		/* strange behavior of some lower level drivers...
218b411b363SPhilipp Reisner 		 * fail the request by clearing the uptodate flag,
219b411b363SPhilipp Reisner 		 * but do not return any error?! */
220b411b363SPhilipp Reisner 		error = -EIO;
221b411b363SPhilipp Reisner 	}
222b411b363SPhilipp Reisner 
2231b6dd252SPhilipp Reisner 
2241b6dd252SPhilipp Reisner 	/* If this request was aborted locally before,
2251b6dd252SPhilipp Reisner 	 * but now was completed "successfully",
2261b6dd252SPhilipp Reisner 	 * chances are that this caused arbitrary data corruption.
2271b6dd252SPhilipp Reisner 	 *
2281b6dd252SPhilipp Reisner 	 * "aborting" requests, or force-detaching the disk, is intended for
2291b6dd252SPhilipp Reisner 	 * completely blocked/hung local backing devices which do no longer
2301b6dd252SPhilipp Reisner 	 * complete requests at all, not even do error completions.  In this
2311b6dd252SPhilipp Reisner 	 * situation, usually a hard-reset and failover is the only way out.
2321b6dd252SPhilipp Reisner 	 *
2331b6dd252SPhilipp Reisner 	 * By "aborting", basically faking a local error-completion,
2341b6dd252SPhilipp Reisner 	 * we allow for a more graceful swichover by cleanly migrating services.
2351b6dd252SPhilipp Reisner 	 * Still the affected node has to be rebooted "soon".
2361b6dd252SPhilipp Reisner 	 *
2371b6dd252SPhilipp Reisner 	 * By completing these requests, we allow the upper layers to re-use
2381b6dd252SPhilipp Reisner 	 * the associated data pages.
2391b6dd252SPhilipp Reisner 	 *
2401b6dd252SPhilipp Reisner 	 * If later the local backing device "recovers", and now DMAs some data
2411b6dd252SPhilipp Reisner 	 * from disk into the original request pages, in the best case it will
2421b6dd252SPhilipp Reisner 	 * just put random data into unused pages; but typically it will corrupt
2431b6dd252SPhilipp Reisner 	 * meanwhile completely unrelated data, causing all sorts of damage.
2441b6dd252SPhilipp Reisner 	 *
2451b6dd252SPhilipp Reisner 	 * Which means delayed successful completion,
2461b6dd252SPhilipp Reisner 	 * especially for READ requests,
2471b6dd252SPhilipp Reisner 	 * is a reason to panic().
2481b6dd252SPhilipp Reisner 	 *
2491b6dd252SPhilipp Reisner 	 * We assume that a delayed *error* completion is OK,
2501b6dd252SPhilipp Reisner 	 * though we still will complain noisily about it.
2511b6dd252SPhilipp Reisner 	 */
2521b6dd252SPhilipp Reisner 	if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
2531b6dd252SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
2541b6dd252SPhilipp Reisner 			dev_emerg(DEV, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
2551b6dd252SPhilipp Reisner 
2561b6dd252SPhilipp Reisner 		if (!error)
2571b6dd252SPhilipp Reisner 			panic("possible random memory corruption caused by delayed completion of aborted local request\n");
2581b6dd252SPhilipp Reisner 	}
2591b6dd252SPhilipp Reisner 
260b411b363SPhilipp Reisner 	/* to avoid recursion in __req_mod */
261b411b363SPhilipp Reisner 	if (unlikely(error)) {
262b411b363SPhilipp Reisner 		what = (bio_data_dir(bio) == WRITE)
2638554df1cSAndreas Gruenbacher 			? WRITE_COMPLETED_WITH_ERROR
2645c3c7e64SLars Ellenberg 			: (bio_rw(bio) == READ)
2658554df1cSAndreas Gruenbacher 			  ? READ_COMPLETED_WITH_ERROR
2668554df1cSAndreas Gruenbacher 			  : READ_AHEAD_COMPLETED_WITH_ERROR;
267b411b363SPhilipp Reisner 	} else
2688554df1cSAndreas Gruenbacher 		what = COMPLETED_OK;
269b411b363SPhilipp Reisner 
270b411b363SPhilipp Reisner 	bio_put(req->private_bio);
271b411b363SPhilipp Reisner 	req->private_bio = ERR_PTR(error);
272b411b363SPhilipp Reisner 
273a115413dSLars Ellenberg 	/* not req_mod(), we need irqsave here! */
27487eeee41SPhilipp Reisner 	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
275a115413dSLars Ellenberg 	__req_mod(req, what, &m);
27687eeee41SPhilipp Reisner 	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
2772415308eSLars Ellenberg 	put_ldev(mdev);
278a115413dSLars Ellenberg 
279a115413dSLars Ellenberg 	if (m.bio)
280a115413dSLars Ellenberg 		complete_master_bio(mdev, &m);
281b411b363SPhilipp Reisner }
282b411b363SPhilipp Reisner 
283f6ffca9fSAndreas Gruenbacher void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
284db830c46SAndreas Gruenbacher 		  struct drbd_peer_request *peer_req, void *digest)
28545bb912bSLars Ellenberg {
28645bb912bSLars Ellenberg 	struct hash_desc desc;
28745bb912bSLars Ellenberg 	struct scatterlist sg;
288db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
28945bb912bSLars Ellenberg 	struct page *tmp;
29045bb912bSLars Ellenberg 	unsigned len;
29145bb912bSLars Ellenberg 
29245bb912bSLars Ellenberg 	desc.tfm = tfm;
29345bb912bSLars Ellenberg 	desc.flags = 0;
29445bb912bSLars Ellenberg 
29545bb912bSLars Ellenberg 	sg_init_table(&sg, 1);
29645bb912bSLars Ellenberg 	crypto_hash_init(&desc);
29745bb912bSLars Ellenberg 
29845bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page))) {
29945bb912bSLars Ellenberg 		/* all but the last page will be fully used */
30045bb912bSLars Ellenberg 		sg_set_page(&sg, page, PAGE_SIZE, 0);
30145bb912bSLars Ellenberg 		crypto_hash_update(&desc, &sg, sg.length);
30245bb912bSLars Ellenberg 		page = tmp;
30345bb912bSLars Ellenberg 	}
30445bb912bSLars Ellenberg 	/* and now the last, possibly only partially used page */
305db830c46SAndreas Gruenbacher 	len = peer_req->i.size & (PAGE_SIZE - 1);
30645bb912bSLars Ellenberg 	sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
30745bb912bSLars Ellenberg 	crypto_hash_update(&desc, &sg, sg.length);
30845bb912bSLars Ellenberg 	crypto_hash_final(&desc, digest);
30945bb912bSLars Ellenberg }
31045bb912bSLars Ellenberg 
31145bb912bSLars Ellenberg void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
312b411b363SPhilipp Reisner {
313b411b363SPhilipp Reisner 	struct hash_desc desc;
314b411b363SPhilipp Reisner 	struct scatterlist sg;
315b411b363SPhilipp Reisner 	struct bio_vec *bvec;
316b411b363SPhilipp Reisner 	int i;
317b411b363SPhilipp Reisner 
318b411b363SPhilipp Reisner 	desc.tfm = tfm;
319b411b363SPhilipp Reisner 	desc.flags = 0;
320b411b363SPhilipp Reisner 
321b411b363SPhilipp Reisner 	sg_init_table(&sg, 1);
322b411b363SPhilipp Reisner 	crypto_hash_init(&desc);
323b411b363SPhilipp Reisner 
3244b8514eeSLars Ellenberg 	bio_for_each_segment(bvec, bio, i) {
325b411b363SPhilipp Reisner 		sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
326b411b363SPhilipp Reisner 		crypto_hash_update(&desc, &sg, sg.length);
327b411b363SPhilipp Reisner 	}
328b411b363SPhilipp Reisner 	crypto_hash_final(&desc, digest);
329b411b363SPhilipp Reisner }
330b411b363SPhilipp Reisner 
3319676c760SLars Ellenberg /* MAYBE merge common code with w_e_end_ov_req */
33299920dc5SAndreas Gruenbacher static int w_e_send_csum(struct drbd_work *w, int cancel)
333b411b363SPhilipp Reisner {
33400d56944SPhilipp Reisner 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
33500d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
336b411b363SPhilipp Reisner 	int digest_size;
337b411b363SPhilipp Reisner 	void *digest;
33899920dc5SAndreas Gruenbacher 	int err = 0;
339b411b363SPhilipp Reisner 
34053ea4331SLars Ellenberg 	if (unlikely(cancel))
34153ea4331SLars Ellenberg 		goto out;
342b411b363SPhilipp Reisner 
3439676c760SLars Ellenberg 	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
34453ea4331SLars Ellenberg 		goto out;
34553ea4331SLars Ellenberg 
346f399002eSLars Ellenberg 	digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
347b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
348b411b363SPhilipp Reisner 	if (digest) {
349db830c46SAndreas Gruenbacher 		sector_t sector = peer_req->i.sector;
350db830c46SAndreas Gruenbacher 		unsigned int size = peer_req->i.size;
351f399002eSLars Ellenberg 		drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
3529676c760SLars Ellenberg 		/* Free peer_req and pages before send.
35353ea4331SLars Ellenberg 		 * In case we block on congestion, we could otherwise run into
35453ea4331SLars Ellenberg 		 * some distributed deadlock, if the other side blocks on
35553ea4331SLars Ellenberg 		 * congestion as well, because our receiver blocks in
356c37c8ecfSAndreas Gruenbacher 		 * drbd_alloc_pages due to pp_in_use > max_buffers. */
3573967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
358db830c46SAndreas Gruenbacher 		peer_req = NULL;
359b411b363SPhilipp Reisner 		inc_rs_pending(mdev);
36099920dc5SAndreas Gruenbacher 		err = drbd_send_drequest_csum(mdev, sector, size,
36153ea4331SLars Ellenberg 					      digest, digest_size,
362b411b363SPhilipp Reisner 					      P_CSUM_RS_REQUEST);
363b411b363SPhilipp Reisner 		kfree(digest);
364b411b363SPhilipp Reisner 	} else {
365b411b363SPhilipp Reisner 		dev_err(DEV, "kmalloc() of digest failed.\n");
36699920dc5SAndreas Gruenbacher 		err = -ENOMEM;
367b411b363SPhilipp Reisner 	}
368b411b363SPhilipp Reisner 
36953ea4331SLars Ellenberg out:
370db830c46SAndreas Gruenbacher 	if (peer_req)
3713967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
372b411b363SPhilipp Reisner 
37399920dc5SAndreas Gruenbacher 	if (unlikely(err))
374b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
37599920dc5SAndreas Gruenbacher 	return err;
376b411b363SPhilipp Reisner }
377b411b363SPhilipp Reisner 
378b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
379b411b363SPhilipp Reisner 
380b411b363SPhilipp Reisner static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
381b411b363SPhilipp Reisner {
382db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
383b411b363SPhilipp Reisner 
384b411b363SPhilipp Reisner 	if (!get_ldev(mdev))
38580a40e43SLars Ellenberg 		return -EIO;
386b411b363SPhilipp Reisner 
387e3555d85SPhilipp Reisner 	if (drbd_rs_should_slow_down(mdev, sector))
3880f0601f4SLars Ellenberg 		goto defer;
3890f0601f4SLars Ellenberg 
390b411b363SPhilipp Reisner 	/* GFP_TRY, because if there is no memory available right now, this may
391b411b363SPhilipp Reisner 	 * be rescheduled for later. It is "only" background resync, after all. */
3920db55363SAndreas Gruenbacher 	peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
3930db55363SAndreas Gruenbacher 				       size, GFP_TRY);
394db830c46SAndreas Gruenbacher 	if (!peer_req)
39580a40e43SLars Ellenberg 		goto defer;
396b411b363SPhilipp Reisner 
397db830c46SAndreas Gruenbacher 	peer_req->w.cb = w_e_send_csum;
39887eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
399db830c46SAndreas Gruenbacher 	list_add(&peer_req->w.list, &mdev->read_ee);
40087eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
401b411b363SPhilipp Reisner 
4020f0601f4SLars Ellenberg 	atomic_add(size >> 9, &mdev->rs_sect_ev);
403fbe29decSAndreas Gruenbacher 	if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
40480a40e43SLars Ellenberg 		return 0;
40545bb912bSLars Ellenberg 
40610f6d992SLars Ellenberg 	/* If it failed because of ENOMEM, retry should help.  If it failed
40710f6d992SLars Ellenberg 	 * because bio_add_page failed (probably broken lower level driver),
40810f6d992SLars Ellenberg 	 * retry may or may not help.
40910f6d992SLars Ellenberg 	 * If it does not, you may need to force disconnect. */
41087eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
411db830c46SAndreas Gruenbacher 	list_del(&peer_req->w.list);
41287eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
41322cc37a9SLars Ellenberg 
4143967deb1SAndreas Gruenbacher 	drbd_free_peer_req(mdev, peer_req);
41580a40e43SLars Ellenberg defer:
41645bb912bSLars Ellenberg 	put_ldev(mdev);
41780a40e43SLars Ellenberg 	return -EAGAIN;
418b411b363SPhilipp Reisner }
419b411b363SPhilipp Reisner 
42099920dc5SAndreas Gruenbacher int w_resync_timer(struct drbd_work *w, int cancel)
421794abb75SPhilipp Reisner {
42200d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
423794abb75SPhilipp Reisner 	switch (mdev->state.conn) {
424794abb75SPhilipp Reisner 	case C_VERIFY_S:
42500d56944SPhilipp Reisner 		w_make_ov_request(w, cancel);
426794abb75SPhilipp Reisner 		break;
427794abb75SPhilipp Reisner 	case C_SYNC_TARGET:
42800d56944SPhilipp Reisner 		w_make_resync_request(w, cancel);
429794abb75SPhilipp Reisner 		break;
430794abb75SPhilipp Reisner 	}
431794abb75SPhilipp Reisner 
43299920dc5SAndreas Gruenbacher 	return 0;
433794abb75SPhilipp Reisner }
434794abb75SPhilipp Reisner 
435b411b363SPhilipp Reisner void resync_timer_fn(unsigned long data)
436b411b363SPhilipp Reisner {
437b411b363SPhilipp Reisner 	struct drbd_conf *mdev = (struct drbd_conf *) data;
438b411b363SPhilipp Reisner 
439794abb75SPhilipp Reisner 	if (list_empty(&mdev->resync_work.list))
440d5b27b01SLars Ellenberg 		drbd_queue_work(&mdev->tconn->sender_work, &mdev->resync_work);
441b411b363SPhilipp Reisner }
442b411b363SPhilipp Reisner 
443778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value)
444778f271dSPhilipp Reisner {
445778f271dSPhilipp Reisner 	int i;
446778f271dSPhilipp Reisner 
447778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
448f10f2623SPhilipp Reisner 		fb->values[i] = value;
449778f271dSPhilipp Reisner }
450778f271dSPhilipp Reisner 
451778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value)
452778f271dSPhilipp Reisner {
453778f271dSPhilipp Reisner 	int ov;
454778f271dSPhilipp Reisner 
455778f271dSPhilipp Reisner 	ov = fb->values[fb->head_index];
456778f271dSPhilipp Reisner 	fb->values[fb->head_index++] = value;
457778f271dSPhilipp Reisner 
458778f271dSPhilipp Reisner 	if (fb->head_index >= fb->size)
459778f271dSPhilipp Reisner 		fb->head_index = 0;
460778f271dSPhilipp Reisner 
461778f271dSPhilipp Reisner 	return ov;
462778f271dSPhilipp Reisner }
463778f271dSPhilipp Reisner 
464778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value)
465778f271dSPhilipp Reisner {
466778f271dSPhilipp Reisner 	int i;
467778f271dSPhilipp Reisner 
468778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
469778f271dSPhilipp Reisner 		fb->values[i] += value;
470778f271dSPhilipp Reisner }
471778f271dSPhilipp Reisner 
4729958c857SPhilipp Reisner struct fifo_buffer *fifo_alloc(int fifo_size)
4739958c857SPhilipp Reisner {
4749958c857SPhilipp Reisner 	struct fifo_buffer *fb;
4759958c857SPhilipp Reisner 
4768747d30aSLars Ellenberg 	fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
4779958c857SPhilipp Reisner 	if (!fb)
4789958c857SPhilipp Reisner 		return NULL;
4799958c857SPhilipp Reisner 
4809958c857SPhilipp Reisner 	fb->head_index = 0;
4819958c857SPhilipp Reisner 	fb->size = fifo_size;
4829958c857SPhilipp Reisner 	fb->total = 0;
4839958c857SPhilipp Reisner 
4849958c857SPhilipp Reisner 	return fb;
4859958c857SPhilipp Reisner }
4869958c857SPhilipp Reisner 
4879d77a5feSPhilipp Reisner static int drbd_rs_controller(struct drbd_conf *mdev)
488778f271dSPhilipp Reisner {
489daeda1ccSPhilipp Reisner 	struct disk_conf *dc;
490778f271dSPhilipp Reisner 	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
491778f271dSPhilipp Reisner 	unsigned int want;     /* The number of sectors we want in the proxy */
492778f271dSPhilipp Reisner 	int req_sect; /* Number of sectors to request in this turn */
493778f271dSPhilipp Reisner 	int correction; /* Number of sectors more we need in the proxy*/
494778f271dSPhilipp Reisner 	int cps; /* correction per invocation of drbd_rs_controller() */
495778f271dSPhilipp Reisner 	int steps; /* Number of time steps to plan ahead */
496778f271dSPhilipp Reisner 	int curr_corr;
497778f271dSPhilipp Reisner 	int max_sect;
498813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
499778f271dSPhilipp Reisner 
500778f271dSPhilipp Reisner 	sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
501778f271dSPhilipp Reisner 	mdev->rs_in_flight -= sect_in;
502778f271dSPhilipp Reisner 
503daeda1ccSPhilipp Reisner 	dc = rcu_dereference(mdev->ldev->disk_conf);
504813472ceSPhilipp Reisner 	plan = rcu_dereference(mdev->rs_plan_s);
505778f271dSPhilipp Reisner 
506813472ceSPhilipp Reisner 	steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
507778f271dSPhilipp Reisner 
508778f271dSPhilipp Reisner 	if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
509daeda1ccSPhilipp Reisner 		want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
510778f271dSPhilipp Reisner 	} else { /* normal path */
511daeda1ccSPhilipp Reisner 		want = dc->c_fill_target ? dc->c_fill_target :
512daeda1ccSPhilipp Reisner 			sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
513778f271dSPhilipp Reisner 	}
514778f271dSPhilipp Reisner 
515813472ceSPhilipp Reisner 	correction = want - mdev->rs_in_flight - plan->total;
516778f271dSPhilipp Reisner 
517778f271dSPhilipp Reisner 	/* Plan ahead */
518778f271dSPhilipp Reisner 	cps = correction / steps;
519813472ceSPhilipp Reisner 	fifo_add_val(plan, cps);
520813472ceSPhilipp Reisner 	plan->total += cps * steps;
521778f271dSPhilipp Reisner 
522778f271dSPhilipp Reisner 	/* What we do in this step */
523813472ceSPhilipp Reisner 	curr_corr = fifo_push(plan, 0);
524813472ceSPhilipp Reisner 	plan->total -= curr_corr;
525778f271dSPhilipp Reisner 
526778f271dSPhilipp Reisner 	req_sect = sect_in + curr_corr;
527778f271dSPhilipp Reisner 	if (req_sect < 0)
528778f271dSPhilipp Reisner 		req_sect = 0;
529778f271dSPhilipp Reisner 
530daeda1ccSPhilipp Reisner 	max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
531778f271dSPhilipp Reisner 	if (req_sect > max_sect)
532778f271dSPhilipp Reisner 		req_sect = max_sect;
533778f271dSPhilipp Reisner 
534778f271dSPhilipp Reisner 	/*
535778f271dSPhilipp Reisner 	dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
536778f271dSPhilipp Reisner 		 sect_in, mdev->rs_in_flight, want, correction,
537778f271dSPhilipp Reisner 		 steps, cps, mdev->rs_planed, curr_corr, req_sect);
538778f271dSPhilipp Reisner 	*/
539778f271dSPhilipp Reisner 
540778f271dSPhilipp Reisner 	return req_sect;
541778f271dSPhilipp Reisner }
542778f271dSPhilipp Reisner 
5439d77a5feSPhilipp Reisner static int drbd_rs_number_requests(struct drbd_conf *mdev)
544e65f440dSLars Ellenberg {
545e65f440dSLars Ellenberg 	int number;
546813472ceSPhilipp Reisner 
547813472ceSPhilipp Reisner 	rcu_read_lock();
548813472ceSPhilipp Reisner 	if (rcu_dereference(mdev->rs_plan_s)->size) {
549e65f440dSLars Ellenberg 		number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
550e65f440dSLars Ellenberg 		mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
551e65f440dSLars Ellenberg 	} else {
552daeda1ccSPhilipp Reisner 		mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate;
553e65f440dSLars Ellenberg 		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
554e65f440dSLars Ellenberg 	}
555813472ceSPhilipp Reisner 	rcu_read_unlock();
556e65f440dSLars Ellenberg 
557e65f440dSLars Ellenberg 	/* ignore the amount of pending requests, the resync controller should
558e65f440dSLars Ellenberg 	 * throttle down to incoming reply rate soon enough anyways. */
559e65f440dSLars Ellenberg 	return number;
560e65f440dSLars Ellenberg }
561e65f440dSLars Ellenberg 
56299920dc5SAndreas Gruenbacher int w_make_resync_request(struct drbd_work *w, int cancel)
563b411b363SPhilipp Reisner {
56400d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
565b411b363SPhilipp Reisner 	unsigned long bit;
566b411b363SPhilipp Reisner 	sector_t sector;
567b411b363SPhilipp Reisner 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
5681816a2b4SLars Ellenberg 	int max_bio_size;
569e65f440dSLars Ellenberg 	int number, rollback_i, size;
570b411b363SPhilipp Reisner 	int align, queued, sndbuf;
5710f0601f4SLars Ellenberg 	int i = 0;
572b411b363SPhilipp Reisner 
573b411b363SPhilipp Reisner 	if (unlikely(cancel))
57499920dc5SAndreas Gruenbacher 		return 0;
575b411b363SPhilipp Reisner 
576af85e8e8SLars Ellenberg 	if (mdev->rs_total == 0) {
577af85e8e8SLars Ellenberg 		/* empty resync? */
578af85e8e8SLars Ellenberg 		drbd_resync_finished(mdev);
57999920dc5SAndreas Gruenbacher 		return 0;
580af85e8e8SLars Ellenberg 	}
581af85e8e8SLars Ellenberg 
582b411b363SPhilipp Reisner 	if (!get_ldev(mdev)) {
583b411b363SPhilipp Reisner 		/* Since we only need to access mdev->rsync a
584b411b363SPhilipp Reisner 		   get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
585b411b363SPhilipp Reisner 		   to continue resync with a broken disk makes no sense at
586b411b363SPhilipp Reisner 		   all */
587b411b363SPhilipp Reisner 		dev_err(DEV, "Disk broke down during resync!\n");
58899920dc5SAndreas Gruenbacher 		return 0;
589b411b363SPhilipp Reisner 	}
590b411b363SPhilipp Reisner 
5910cfdd247SPhilipp Reisner 	max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
592e65f440dSLars Ellenberg 	number = drbd_rs_number_requests(mdev);
593e65f440dSLars Ellenberg 	if (number == 0)
5940f0601f4SLars Ellenberg 		goto requeue;
595b411b363SPhilipp Reisner 
596b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
597b411b363SPhilipp Reisner 		/* Stop generating RS requests, when half of the send buffer is filled */
598e42325a5SPhilipp Reisner 		mutex_lock(&mdev->tconn->data.mutex);
599e42325a5SPhilipp Reisner 		if (mdev->tconn->data.socket) {
600e42325a5SPhilipp Reisner 			queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
601e42325a5SPhilipp Reisner 			sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
602b411b363SPhilipp Reisner 		} else {
603b411b363SPhilipp Reisner 			queued = 1;
604b411b363SPhilipp Reisner 			sndbuf = 0;
605b411b363SPhilipp Reisner 		}
606e42325a5SPhilipp Reisner 		mutex_unlock(&mdev->tconn->data.mutex);
607b411b363SPhilipp Reisner 		if (queued > sndbuf / 2)
608b411b363SPhilipp Reisner 			goto requeue;
609b411b363SPhilipp Reisner 
610b411b363SPhilipp Reisner next_sector:
611b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
612b411b363SPhilipp Reisner 		bit  = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
613b411b363SPhilipp Reisner 
6144b0715f0SLars Ellenberg 		if (bit == DRBD_END_OF_BITMAP) {
615b411b363SPhilipp Reisner 			mdev->bm_resync_fo = drbd_bm_bits(mdev);
616b411b363SPhilipp Reisner 			put_ldev(mdev);
61799920dc5SAndreas Gruenbacher 			return 0;
618b411b363SPhilipp Reisner 		}
619b411b363SPhilipp Reisner 
620b411b363SPhilipp Reisner 		sector = BM_BIT_TO_SECT(bit);
621b411b363SPhilipp Reisner 
622e3555d85SPhilipp Reisner 		if (drbd_rs_should_slow_down(mdev, sector) ||
623e3555d85SPhilipp Reisner 		    drbd_try_rs_begin_io(mdev, sector)) {
624b411b363SPhilipp Reisner 			mdev->bm_resync_fo = bit;
625b411b363SPhilipp Reisner 			goto requeue;
626b411b363SPhilipp Reisner 		}
627b411b363SPhilipp Reisner 		mdev->bm_resync_fo = bit + 1;
628b411b363SPhilipp Reisner 
629b411b363SPhilipp Reisner 		if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
630b411b363SPhilipp Reisner 			drbd_rs_complete_io(mdev, sector);
631b411b363SPhilipp Reisner 			goto next_sector;
632b411b363SPhilipp Reisner 		}
633b411b363SPhilipp Reisner 
6341816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
635b411b363SPhilipp Reisner 		/* try to find some adjacent bits.
636b411b363SPhilipp Reisner 		 * we stop if we have already the maximum req size.
637b411b363SPhilipp Reisner 		 *
638b411b363SPhilipp Reisner 		 * Additionally always align bigger requests, in order to
639b411b363SPhilipp Reisner 		 * be prepared for all stripe sizes of software RAIDs.
640b411b363SPhilipp Reisner 		 */
641b411b363SPhilipp Reisner 		align = 1;
642d207450cSPhilipp Reisner 		rollback_i = i;
643b411b363SPhilipp Reisner 		for (;;) {
6441816a2b4SLars Ellenberg 			if (size + BM_BLOCK_SIZE > max_bio_size)
645b411b363SPhilipp Reisner 				break;
646b411b363SPhilipp Reisner 
647b411b363SPhilipp Reisner 			/* Be always aligned */
648b411b363SPhilipp Reisner 			if (sector & ((1<<(align+3))-1))
649b411b363SPhilipp Reisner 				break;
650b411b363SPhilipp Reisner 
651b411b363SPhilipp Reisner 			/* do not cross extent boundaries */
652b411b363SPhilipp Reisner 			if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
653b411b363SPhilipp Reisner 				break;
654b411b363SPhilipp Reisner 			/* now, is it actually dirty, after all?
655b411b363SPhilipp Reisner 			 * caution, drbd_bm_test_bit is tri-state for some
656b411b363SPhilipp Reisner 			 * obscure reason; ( b == 0 ) would get the out-of-band
657b411b363SPhilipp Reisner 			 * only accidentally right because of the "oddly sized"
658b411b363SPhilipp Reisner 			 * adjustment below */
659b411b363SPhilipp Reisner 			if (drbd_bm_test_bit(mdev, bit+1) != 1)
660b411b363SPhilipp Reisner 				break;
661b411b363SPhilipp Reisner 			bit++;
662b411b363SPhilipp Reisner 			size += BM_BLOCK_SIZE;
663b411b363SPhilipp Reisner 			if ((BM_BLOCK_SIZE << align) <= size)
664b411b363SPhilipp Reisner 				align++;
665b411b363SPhilipp Reisner 			i++;
666b411b363SPhilipp Reisner 		}
667b411b363SPhilipp Reisner 		/* if we merged some,
668b411b363SPhilipp Reisner 		 * reset the offset to start the next drbd_bm_find_next from */
669b411b363SPhilipp Reisner 		if (size > BM_BLOCK_SIZE)
670b411b363SPhilipp Reisner 			mdev->bm_resync_fo = bit + 1;
671b411b363SPhilipp Reisner #endif
672b411b363SPhilipp Reisner 
673b411b363SPhilipp Reisner 		/* adjust very last sectors, in case we are oddly sized */
674b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
675b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
676f399002eSLars Ellenberg 		if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
677b411b363SPhilipp Reisner 			switch (read_for_csum(mdev, sector, size)) {
67880a40e43SLars Ellenberg 			case -EIO: /* Disk failure */
679b411b363SPhilipp Reisner 				put_ldev(mdev);
68099920dc5SAndreas Gruenbacher 				return -EIO;
68180a40e43SLars Ellenberg 			case -EAGAIN: /* allocation failed, or ldev busy */
682b411b363SPhilipp Reisner 				drbd_rs_complete_io(mdev, sector);
683b411b363SPhilipp Reisner 				mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
684d207450cSPhilipp Reisner 				i = rollback_i;
685b411b363SPhilipp Reisner 				goto requeue;
68680a40e43SLars Ellenberg 			case 0:
68780a40e43SLars Ellenberg 				/* everything ok */
68880a40e43SLars Ellenberg 				break;
68980a40e43SLars Ellenberg 			default:
69080a40e43SLars Ellenberg 				BUG();
691b411b363SPhilipp Reisner 			}
692b411b363SPhilipp Reisner 		} else {
69399920dc5SAndreas Gruenbacher 			int err;
69499920dc5SAndreas Gruenbacher 
695b411b363SPhilipp Reisner 			inc_rs_pending(mdev);
69699920dc5SAndreas Gruenbacher 			err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
69799920dc5SAndreas Gruenbacher 						 sector, size, ID_SYNCER);
69899920dc5SAndreas Gruenbacher 			if (err) {
699b411b363SPhilipp Reisner 				dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
700b411b363SPhilipp Reisner 				dec_rs_pending(mdev);
701b411b363SPhilipp Reisner 				put_ldev(mdev);
70299920dc5SAndreas Gruenbacher 				return err;
703b411b363SPhilipp Reisner 			}
704b411b363SPhilipp Reisner 		}
705b411b363SPhilipp Reisner 	}
706b411b363SPhilipp Reisner 
707b411b363SPhilipp Reisner 	if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
708b411b363SPhilipp Reisner 		/* last syncer _request_ was sent,
709b411b363SPhilipp Reisner 		 * but the P_RS_DATA_REPLY not yet received.  sync will end (and
710b411b363SPhilipp Reisner 		 * next sync group will resume), as soon as we receive the last
711b411b363SPhilipp Reisner 		 * resync data block, and the last bit is cleared.
712b411b363SPhilipp Reisner 		 * until then resync "work" is "inactive" ...
713b411b363SPhilipp Reisner 		 */
714b411b363SPhilipp Reisner 		put_ldev(mdev);
71599920dc5SAndreas Gruenbacher 		return 0;
716b411b363SPhilipp Reisner 	}
717b411b363SPhilipp Reisner 
718b411b363SPhilipp Reisner  requeue:
719778f271dSPhilipp Reisner 	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
720b411b363SPhilipp Reisner 	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
721b411b363SPhilipp Reisner 	put_ldev(mdev);
72299920dc5SAndreas Gruenbacher 	return 0;
723b411b363SPhilipp Reisner }
724b411b363SPhilipp Reisner 
72500d56944SPhilipp Reisner static int w_make_ov_request(struct drbd_work *w, int cancel)
726b411b363SPhilipp Reisner {
72700d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
728b411b363SPhilipp Reisner 	int number, i, size;
729b411b363SPhilipp Reisner 	sector_t sector;
730b411b363SPhilipp Reisner 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
73158ffa580SLars Ellenberg 	bool stop_sector_reached = false;
732b411b363SPhilipp Reisner 
733b411b363SPhilipp Reisner 	if (unlikely(cancel))
734b411b363SPhilipp Reisner 		return 1;
735b411b363SPhilipp Reisner 
7362649f080SLars Ellenberg 	number = drbd_rs_number_requests(mdev);
737b411b363SPhilipp Reisner 
738b411b363SPhilipp Reisner 	sector = mdev->ov_position;
739b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
74058ffa580SLars Ellenberg 		if (sector >= capacity)
741b411b363SPhilipp Reisner 			return 1;
74258ffa580SLars Ellenberg 
74358ffa580SLars Ellenberg 		/* We check for "finished" only in the reply path:
74458ffa580SLars Ellenberg 		 * w_e_end_ov_reply().
74558ffa580SLars Ellenberg 		 * We need to send at least one request out. */
74658ffa580SLars Ellenberg 		stop_sector_reached = i > 0
74758ffa580SLars Ellenberg 			&& verify_can_do_stop_sector(mdev)
74858ffa580SLars Ellenberg 			&& sector >= mdev->ov_stop_sector;
74958ffa580SLars Ellenberg 		if (stop_sector_reached)
75058ffa580SLars Ellenberg 			break;
751b411b363SPhilipp Reisner 
752b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
753b411b363SPhilipp Reisner 
754e3555d85SPhilipp Reisner 		if (drbd_rs_should_slow_down(mdev, sector) ||
755e3555d85SPhilipp Reisner 		    drbd_try_rs_begin_io(mdev, sector)) {
756b411b363SPhilipp Reisner 			mdev->ov_position = sector;
757b411b363SPhilipp Reisner 			goto requeue;
758b411b363SPhilipp Reisner 		}
759b411b363SPhilipp Reisner 
760b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
761b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
762b411b363SPhilipp Reisner 
763b411b363SPhilipp Reisner 		inc_rs_pending(mdev);
7645b9f499cSAndreas Gruenbacher 		if (drbd_send_ov_request(mdev, sector, size)) {
765b411b363SPhilipp Reisner 			dec_rs_pending(mdev);
766b411b363SPhilipp Reisner 			return 0;
767b411b363SPhilipp Reisner 		}
768b411b363SPhilipp Reisner 		sector += BM_SECT_PER_BIT;
769b411b363SPhilipp Reisner 	}
770b411b363SPhilipp Reisner 	mdev->ov_position = sector;
771b411b363SPhilipp Reisner 
772b411b363SPhilipp Reisner  requeue:
7732649f080SLars Ellenberg 	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
77458ffa580SLars Ellenberg 	if (i == 0 || !stop_sector_reached)
775b411b363SPhilipp Reisner 		mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
776b411b363SPhilipp Reisner 	return 1;
777b411b363SPhilipp Reisner }
778b411b363SPhilipp Reisner 
77999920dc5SAndreas Gruenbacher int w_ov_finished(struct drbd_work *w, int cancel)
780b411b363SPhilipp Reisner {
78100d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
782b411b363SPhilipp Reisner 	kfree(w);
7838f7bed77SAndreas Gruenbacher 	ov_out_of_sync_print(mdev);
784b411b363SPhilipp Reisner 	drbd_resync_finished(mdev);
785b411b363SPhilipp Reisner 
78699920dc5SAndreas Gruenbacher 	return 0;
787b411b363SPhilipp Reisner }
788b411b363SPhilipp Reisner 
78999920dc5SAndreas Gruenbacher static int w_resync_finished(struct drbd_work *w, int cancel)
790b411b363SPhilipp Reisner {
79100d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
792b411b363SPhilipp Reisner 	kfree(w);
793b411b363SPhilipp Reisner 
794b411b363SPhilipp Reisner 	drbd_resync_finished(mdev);
795b411b363SPhilipp Reisner 
79699920dc5SAndreas Gruenbacher 	return 0;
797b411b363SPhilipp Reisner }
798b411b363SPhilipp Reisner 
799af85e8e8SLars Ellenberg static void ping_peer(struct drbd_conf *mdev)
800af85e8e8SLars Ellenberg {
8012a67d8b9SPhilipp Reisner 	struct drbd_tconn *tconn = mdev->tconn;
8022a67d8b9SPhilipp Reisner 
8032a67d8b9SPhilipp Reisner 	clear_bit(GOT_PING_ACK, &tconn->flags);
8042a67d8b9SPhilipp Reisner 	request_ping(tconn);
8052a67d8b9SPhilipp Reisner 	wait_event(tconn->ping_wait,
8062a67d8b9SPhilipp Reisner 		   test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
807af85e8e8SLars Ellenberg }
808af85e8e8SLars Ellenberg 
809b411b363SPhilipp Reisner int drbd_resync_finished(struct drbd_conf *mdev)
810b411b363SPhilipp Reisner {
811b411b363SPhilipp Reisner 	unsigned long db, dt, dbdt;
812b411b363SPhilipp Reisner 	unsigned long n_oos;
813b411b363SPhilipp Reisner 	union drbd_state os, ns;
814b411b363SPhilipp Reisner 	struct drbd_work *w;
815b411b363SPhilipp Reisner 	char *khelper_cmd = NULL;
81626525618SLars Ellenberg 	int verify_done = 0;
817b411b363SPhilipp Reisner 
818b411b363SPhilipp Reisner 	/* Remove all elements from the resync LRU. Since future actions
819b411b363SPhilipp Reisner 	 * might set bits in the (main) bitmap, then the entries in the
820b411b363SPhilipp Reisner 	 * resync LRU would be wrong. */
821b411b363SPhilipp Reisner 	if (drbd_rs_del_all(mdev)) {
822b411b363SPhilipp Reisner 		/* In case this is not possible now, most probably because
823b411b363SPhilipp Reisner 		 * there are P_RS_DATA_REPLY Packets lingering on the worker's
824b411b363SPhilipp Reisner 		 * queue (or even the read operations for those packets
825b411b363SPhilipp Reisner 		 * is not finished by now).   Retry in 100ms. */
826b411b363SPhilipp Reisner 
82720ee6390SPhilipp Reisner 		schedule_timeout_interruptible(HZ / 10);
828b411b363SPhilipp Reisner 		w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
829b411b363SPhilipp Reisner 		if (w) {
830b411b363SPhilipp Reisner 			w->cb = w_resync_finished;
8319b743da9SPhilipp Reisner 			w->mdev = mdev;
832d5b27b01SLars Ellenberg 			drbd_queue_work(&mdev->tconn->sender_work, w);
833b411b363SPhilipp Reisner 			return 1;
834b411b363SPhilipp Reisner 		}
835b411b363SPhilipp Reisner 		dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
836b411b363SPhilipp Reisner 	}
837b411b363SPhilipp Reisner 
838b411b363SPhilipp Reisner 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
839b411b363SPhilipp Reisner 	if (dt <= 0)
840b411b363SPhilipp Reisner 		dt = 1;
84158ffa580SLars Ellenberg 
842b411b363SPhilipp Reisner 	db = mdev->rs_total;
84358ffa580SLars Ellenberg 	/* adjust for verify start and stop sectors, respective reached position */
84458ffa580SLars Ellenberg 	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
84558ffa580SLars Ellenberg 		db -= mdev->ov_left;
84658ffa580SLars Ellenberg 
847b411b363SPhilipp Reisner 	dbdt = Bit2KB(db/dt);
848b411b363SPhilipp Reisner 	mdev->rs_paused /= HZ;
849b411b363SPhilipp Reisner 
850b411b363SPhilipp Reisner 	if (!get_ldev(mdev))
851b411b363SPhilipp Reisner 		goto out;
852b411b363SPhilipp Reisner 
853af85e8e8SLars Ellenberg 	ping_peer(mdev);
854af85e8e8SLars Ellenberg 
85587eeee41SPhilipp Reisner 	spin_lock_irq(&mdev->tconn->req_lock);
85678bae59bSPhilipp Reisner 	os = drbd_read_state(mdev);
857b411b363SPhilipp Reisner 
85826525618SLars Ellenberg 	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
85926525618SLars Ellenberg 
860b411b363SPhilipp Reisner 	/* This protects us against multiple calls (that can happen in the presence
861b411b363SPhilipp Reisner 	   of application IO), and against connectivity loss just before we arrive here. */
862b411b363SPhilipp Reisner 	if (os.conn <= C_CONNECTED)
863b411b363SPhilipp Reisner 		goto out_unlock;
864b411b363SPhilipp Reisner 
865b411b363SPhilipp Reisner 	ns = os;
866b411b363SPhilipp Reisner 	ns.conn = C_CONNECTED;
867b411b363SPhilipp Reisner 
868b411b363SPhilipp Reisner 	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
86926525618SLars Ellenberg 	     verify_done ? "Online verify" : "Resync",
870b411b363SPhilipp Reisner 	     dt + mdev->rs_paused, mdev->rs_paused, dbdt);
871b411b363SPhilipp Reisner 
872b411b363SPhilipp Reisner 	n_oos = drbd_bm_total_weight(mdev);
873b411b363SPhilipp Reisner 
874b411b363SPhilipp Reisner 	if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
875b411b363SPhilipp Reisner 		if (n_oos) {
876b411b363SPhilipp Reisner 			dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
877b411b363SPhilipp Reisner 			      n_oos, Bit2KB(1));
878b411b363SPhilipp Reisner 			khelper_cmd = "out-of-sync";
879b411b363SPhilipp Reisner 		}
880b411b363SPhilipp Reisner 	} else {
881b411b363SPhilipp Reisner 		D_ASSERT((n_oos - mdev->rs_failed) == 0);
882b411b363SPhilipp Reisner 
883b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
884b411b363SPhilipp Reisner 			khelper_cmd = "after-resync-target";
885b411b363SPhilipp Reisner 
886f399002eSLars Ellenberg 		if (mdev->tconn->csums_tfm && mdev->rs_total) {
887b411b363SPhilipp Reisner 			const unsigned long s = mdev->rs_same_csum;
888b411b363SPhilipp Reisner 			const unsigned long t = mdev->rs_total;
889b411b363SPhilipp Reisner 			const int ratio =
890b411b363SPhilipp Reisner 				(t == 0)     ? 0 :
891b411b363SPhilipp Reisner 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
892b411b363SPhilipp Reisner 			dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
893b411b363SPhilipp Reisner 			     "transferred %luK total %luK\n",
894b411b363SPhilipp Reisner 			     ratio,
895b411b363SPhilipp Reisner 			     Bit2KB(mdev->rs_same_csum),
896b411b363SPhilipp Reisner 			     Bit2KB(mdev->rs_total - mdev->rs_same_csum),
897b411b363SPhilipp Reisner 			     Bit2KB(mdev->rs_total));
898b411b363SPhilipp Reisner 		}
899b411b363SPhilipp Reisner 	}
900b411b363SPhilipp Reisner 
901b411b363SPhilipp Reisner 	if (mdev->rs_failed) {
902b411b363SPhilipp Reisner 		dev_info(DEV, "            %lu failed blocks\n", mdev->rs_failed);
903b411b363SPhilipp Reisner 
904b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
905b411b363SPhilipp Reisner 			ns.disk = D_INCONSISTENT;
906b411b363SPhilipp Reisner 			ns.pdsk = D_UP_TO_DATE;
907b411b363SPhilipp Reisner 		} else {
908b411b363SPhilipp Reisner 			ns.disk = D_UP_TO_DATE;
909b411b363SPhilipp Reisner 			ns.pdsk = D_INCONSISTENT;
910b411b363SPhilipp Reisner 		}
911b411b363SPhilipp Reisner 	} else {
912b411b363SPhilipp Reisner 		ns.disk = D_UP_TO_DATE;
913b411b363SPhilipp Reisner 		ns.pdsk = D_UP_TO_DATE;
914b411b363SPhilipp Reisner 
915b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
916b411b363SPhilipp Reisner 			if (mdev->p_uuid) {
917b411b363SPhilipp Reisner 				int i;
918b411b363SPhilipp Reisner 				for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
919b411b363SPhilipp Reisner 					_drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
920b411b363SPhilipp Reisner 				drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
921b411b363SPhilipp Reisner 				_drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
922b411b363SPhilipp Reisner 			} else {
923b411b363SPhilipp Reisner 				dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
924b411b363SPhilipp Reisner 			}
925b411b363SPhilipp Reisner 		}
926b411b363SPhilipp Reisner 
92762b0da3aSLars Ellenberg 		if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
92862b0da3aSLars Ellenberg 			/* for verify runs, we don't update uuids here,
92962b0da3aSLars Ellenberg 			 * so there would be nothing to report. */
930b411b363SPhilipp Reisner 			drbd_uuid_set_bm(mdev, 0UL);
93162b0da3aSLars Ellenberg 			drbd_print_uuids(mdev, "updated UUIDs");
932b411b363SPhilipp Reisner 			if (mdev->p_uuid) {
933b411b363SPhilipp Reisner 				/* Now the two UUID sets are equal, update what we
934b411b363SPhilipp Reisner 				 * know of the peer. */
935b411b363SPhilipp Reisner 				int i;
936b411b363SPhilipp Reisner 				for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
937b411b363SPhilipp Reisner 					mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
938b411b363SPhilipp Reisner 			}
939b411b363SPhilipp Reisner 		}
94062b0da3aSLars Ellenberg 	}
941b411b363SPhilipp Reisner 
942b411b363SPhilipp Reisner 	_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
943b411b363SPhilipp Reisner out_unlock:
94487eeee41SPhilipp Reisner 	spin_unlock_irq(&mdev->tconn->req_lock);
945b411b363SPhilipp Reisner 	put_ldev(mdev);
946b411b363SPhilipp Reisner out:
947b411b363SPhilipp Reisner 	mdev->rs_total  = 0;
948b411b363SPhilipp Reisner 	mdev->rs_failed = 0;
949b411b363SPhilipp Reisner 	mdev->rs_paused = 0;
95058ffa580SLars Ellenberg 
95158ffa580SLars Ellenberg 	/* reset start sector, if we reached end of device */
95258ffa580SLars Ellenberg 	if (verify_done && mdev->ov_left == 0)
953b411b363SPhilipp Reisner 		mdev->ov_start_sector = 0;
954b411b363SPhilipp Reisner 
95513d42685SLars Ellenberg 	drbd_md_sync(mdev);
95613d42685SLars Ellenberg 
957b411b363SPhilipp Reisner 	if (khelper_cmd)
958b411b363SPhilipp Reisner 		drbd_khelper(mdev, khelper_cmd);
959b411b363SPhilipp Reisner 
960b411b363SPhilipp Reisner 	return 1;
961b411b363SPhilipp Reisner }
962b411b363SPhilipp Reisner 
963b411b363SPhilipp Reisner /* helper */
964db830c46SAndreas Gruenbacher static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
965b411b363SPhilipp Reisner {
966045417f7SAndreas Gruenbacher 	if (drbd_peer_req_has_active_page(peer_req)) {
967b411b363SPhilipp Reisner 		/* This might happen if sendpage() has not finished */
968db830c46SAndreas Gruenbacher 		int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
969435f0740SLars Ellenberg 		atomic_add(i, &mdev->pp_in_use_by_net);
970435f0740SLars Ellenberg 		atomic_sub(i, &mdev->pp_in_use);
97187eeee41SPhilipp Reisner 		spin_lock_irq(&mdev->tconn->req_lock);
972db830c46SAndreas Gruenbacher 		list_add_tail(&peer_req->w.list, &mdev->net_ee);
97387eeee41SPhilipp Reisner 		spin_unlock_irq(&mdev->tconn->req_lock);
974435f0740SLars Ellenberg 		wake_up(&drbd_pp_wait);
975b411b363SPhilipp Reisner 	} else
9763967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
977b411b363SPhilipp Reisner }
978b411b363SPhilipp Reisner 
979b411b363SPhilipp Reisner /**
980b411b363SPhilipp Reisner  * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
981b411b363SPhilipp Reisner  * @mdev:	DRBD device.
982b411b363SPhilipp Reisner  * @w:		work object.
983b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
984b411b363SPhilipp Reisner  */
98599920dc5SAndreas Gruenbacher int w_e_end_data_req(struct drbd_work *w, int cancel)
986b411b363SPhilipp Reisner {
987db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
98800d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
98999920dc5SAndreas Gruenbacher 	int err;
990b411b363SPhilipp Reisner 
991b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
9923967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
993b411b363SPhilipp Reisner 		dec_unacked(mdev);
99499920dc5SAndreas Gruenbacher 		return 0;
995b411b363SPhilipp Reisner 	}
996b411b363SPhilipp Reisner 
997db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
99899920dc5SAndreas Gruenbacher 		err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
999b411b363SPhilipp Reisner 	} else {
1000b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1001b411b363SPhilipp Reisner 			dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
1002db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1003b411b363SPhilipp Reisner 
100499920dc5SAndreas Gruenbacher 		err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
1005b411b363SPhilipp Reisner 	}
1006b411b363SPhilipp Reisner 
1007b411b363SPhilipp Reisner 	dec_unacked(mdev);
1008b411b363SPhilipp Reisner 
1009db830c46SAndreas Gruenbacher 	move_to_net_ee_or_free(mdev, peer_req);
1010b411b363SPhilipp Reisner 
101199920dc5SAndreas Gruenbacher 	if (unlikely(err))
1012b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_block() failed\n");
101399920dc5SAndreas Gruenbacher 	return err;
1014b411b363SPhilipp Reisner }
1015b411b363SPhilipp Reisner 
1016b411b363SPhilipp Reisner /**
1017a209b4aeSAndreas Gruenbacher  * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
1018b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1019b411b363SPhilipp Reisner  * @w:		work object.
1020b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1021b411b363SPhilipp Reisner  */
102299920dc5SAndreas Gruenbacher int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
1023b411b363SPhilipp Reisner {
1024db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
102500d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
102699920dc5SAndreas Gruenbacher 	int err;
1027b411b363SPhilipp Reisner 
1028b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
10293967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
1030b411b363SPhilipp Reisner 		dec_unacked(mdev);
103199920dc5SAndreas Gruenbacher 		return 0;
1032b411b363SPhilipp Reisner 	}
1033b411b363SPhilipp Reisner 
1034b411b363SPhilipp Reisner 	if (get_ldev_if_state(mdev, D_FAILED)) {
1035db830c46SAndreas Gruenbacher 		drbd_rs_complete_io(mdev, peer_req->i.sector);
1036b411b363SPhilipp Reisner 		put_ldev(mdev);
1037b411b363SPhilipp Reisner 	}
1038b411b363SPhilipp Reisner 
1039d612d309SPhilipp Reisner 	if (mdev->state.conn == C_AHEAD) {
104099920dc5SAndreas Gruenbacher 		err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
1041db830c46SAndreas Gruenbacher 	} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1042b411b363SPhilipp Reisner 		if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
1043b411b363SPhilipp Reisner 			inc_rs_pending(mdev);
104499920dc5SAndreas Gruenbacher 			err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
1045b411b363SPhilipp Reisner 		} else {
1046b411b363SPhilipp Reisner 			if (__ratelimit(&drbd_ratelimit_state))
1047b411b363SPhilipp Reisner 				dev_err(DEV, "Not sending RSDataReply, "
1048b411b363SPhilipp Reisner 				    "partner DISKLESS!\n");
104999920dc5SAndreas Gruenbacher 			err = 0;
1050b411b363SPhilipp Reisner 		}
1051b411b363SPhilipp Reisner 	} else {
1052b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1053b411b363SPhilipp Reisner 			dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
1054db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1055b411b363SPhilipp Reisner 
105699920dc5SAndreas Gruenbacher 		err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
1057b411b363SPhilipp Reisner 
1058b411b363SPhilipp Reisner 		/* update resync data with failure */
1059db830c46SAndreas Gruenbacher 		drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
1060b411b363SPhilipp Reisner 	}
1061b411b363SPhilipp Reisner 
1062b411b363SPhilipp Reisner 	dec_unacked(mdev);
1063b411b363SPhilipp Reisner 
1064db830c46SAndreas Gruenbacher 	move_to_net_ee_or_free(mdev, peer_req);
1065b411b363SPhilipp Reisner 
106699920dc5SAndreas Gruenbacher 	if (unlikely(err))
1067b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_block() failed\n");
106899920dc5SAndreas Gruenbacher 	return err;
1069b411b363SPhilipp Reisner }
1070b411b363SPhilipp Reisner 
107199920dc5SAndreas Gruenbacher int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
1072b411b363SPhilipp Reisner {
1073db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
107400d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1075b411b363SPhilipp Reisner 	struct digest_info *di;
1076b411b363SPhilipp Reisner 	int digest_size;
1077b411b363SPhilipp Reisner 	void *digest = NULL;
107899920dc5SAndreas Gruenbacher 	int err, eq = 0;
1079b411b363SPhilipp Reisner 
1080b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
10813967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
1082b411b363SPhilipp Reisner 		dec_unacked(mdev);
108399920dc5SAndreas Gruenbacher 		return 0;
1084b411b363SPhilipp Reisner 	}
1085b411b363SPhilipp Reisner 
10861d53f09eSLars Ellenberg 	if (get_ldev(mdev)) {
1087db830c46SAndreas Gruenbacher 		drbd_rs_complete_io(mdev, peer_req->i.sector);
10881d53f09eSLars Ellenberg 		put_ldev(mdev);
10891d53f09eSLars Ellenberg 	}
1090b411b363SPhilipp Reisner 
1091db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1092b411b363SPhilipp Reisner 
1093db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1094b411b363SPhilipp Reisner 		/* quick hack to try to avoid a race against reconfiguration.
1095b411b363SPhilipp Reisner 		 * a real fix would be much more involved,
1096b411b363SPhilipp Reisner 		 * introducing more locking mechanisms */
1097f399002eSLars Ellenberg 		if (mdev->tconn->csums_tfm) {
1098f399002eSLars Ellenberg 			digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
1099b411b363SPhilipp Reisner 			D_ASSERT(digest_size == di->digest_size);
1100b411b363SPhilipp Reisner 			digest = kmalloc(digest_size, GFP_NOIO);
1101b411b363SPhilipp Reisner 		}
1102b411b363SPhilipp Reisner 		if (digest) {
1103f399002eSLars Ellenberg 			drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
1104b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1105b411b363SPhilipp Reisner 			kfree(digest);
1106b411b363SPhilipp Reisner 		}
1107b411b363SPhilipp Reisner 
1108b411b363SPhilipp Reisner 		if (eq) {
1109db830c46SAndreas Gruenbacher 			drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
1110676396d5SLars Ellenberg 			/* rs_same_csums unit is BM_BLOCK_SIZE */
1111db830c46SAndreas Gruenbacher 			mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
111299920dc5SAndreas Gruenbacher 			err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
1113b411b363SPhilipp Reisner 		} else {
1114b411b363SPhilipp Reisner 			inc_rs_pending(mdev);
1115db830c46SAndreas Gruenbacher 			peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1116db830c46SAndreas Gruenbacher 			peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
1117204bba99SPhilipp Reisner 			kfree(di);
111899920dc5SAndreas Gruenbacher 			err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
1119b411b363SPhilipp Reisner 		}
1120b411b363SPhilipp Reisner 	} else {
112199920dc5SAndreas Gruenbacher 		err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
1122b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1123b411b363SPhilipp Reisner 			dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1124b411b363SPhilipp Reisner 	}
1125b411b363SPhilipp Reisner 
1126b411b363SPhilipp Reisner 	dec_unacked(mdev);
1127db830c46SAndreas Gruenbacher 	move_to_net_ee_or_free(mdev, peer_req);
1128b411b363SPhilipp Reisner 
112999920dc5SAndreas Gruenbacher 	if (unlikely(err))
1130b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_block/ack() failed\n");
113199920dc5SAndreas Gruenbacher 	return err;
1132b411b363SPhilipp Reisner }
1133b411b363SPhilipp Reisner 
113499920dc5SAndreas Gruenbacher int w_e_end_ov_req(struct drbd_work *w, int cancel)
1135b411b363SPhilipp Reisner {
1136db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
113700d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1138db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1139db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
1140b411b363SPhilipp Reisner 	int digest_size;
1141b411b363SPhilipp Reisner 	void *digest;
114299920dc5SAndreas Gruenbacher 	int err = 0;
1143b411b363SPhilipp Reisner 
1144b411b363SPhilipp Reisner 	if (unlikely(cancel))
1145b411b363SPhilipp Reisner 		goto out;
1146b411b363SPhilipp Reisner 
1147f399002eSLars Ellenberg 	digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
1148b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
11498f21420eSPhilipp Reisner 	if (!digest) {
115099920dc5SAndreas Gruenbacher 		err = 1;	/* terminate the connection in case the allocation failed */
11518f21420eSPhilipp Reisner 		goto out;
11528f21420eSPhilipp Reisner 	}
11538f21420eSPhilipp Reisner 
1154db830c46SAndreas Gruenbacher 	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
1155f399002eSLars Ellenberg 		drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
11568f21420eSPhilipp Reisner 	else
11578f21420eSPhilipp Reisner 		memset(digest, 0, digest_size);
11588f21420eSPhilipp Reisner 
115953ea4331SLars Ellenberg 	/* Free e and pages before send.
116053ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
116153ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
116253ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1163c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
11643967deb1SAndreas Gruenbacher 	drbd_free_peer_req(mdev, peer_req);
1165db830c46SAndreas Gruenbacher 	peer_req = NULL;
1166b411b363SPhilipp Reisner 	inc_rs_pending(mdev);
116799920dc5SAndreas Gruenbacher 	err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
116899920dc5SAndreas Gruenbacher 	if (err)
1169b411b363SPhilipp Reisner 		dec_rs_pending(mdev);
1170b411b363SPhilipp Reisner 	kfree(digest);
1171b411b363SPhilipp Reisner 
1172b411b363SPhilipp Reisner out:
1173db830c46SAndreas Gruenbacher 	if (peer_req)
11743967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
1175b411b363SPhilipp Reisner 	dec_unacked(mdev);
117699920dc5SAndreas Gruenbacher 	return err;
1177b411b363SPhilipp Reisner }
1178b411b363SPhilipp Reisner 
11798f7bed77SAndreas Gruenbacher void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size)
1180b411b363SPhilipp Reisner {
1181b411b363SPhilipp Reisner 	if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
1182b411b363SPhilipp Reisner 		mdev->ov_last_oos_size += size>>9;
1183b411b363SPhilipp Reisner 	} else {
1184b411b363SPhilipp Reisner 		mdev->ov_last_oos_start = sector;
1185b411b363SPhilipp Reisner 		mdev->ov_last_oos_size = size>>9;
1186b411b363SPhilipp Reisner 	}
1187b411b363SPhilipp Reisner 	drbd_set_out_of_sync(mdev, sector, size);
1188b411b363SPhilipp Reisner }
1189b411b363SPhilipp Reisner 
119099920dc5SAndreas Gruenbacher int w_e_end_ov_reply(struct drbd_work *w, int cancel)
1191b411b363SPhilipp Reisner {
1192db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
119300d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1194b411b363SPhilipp Reisner 	struct digest_info *di;
1195b411b363SPhilipp Reisner 	void *digest;
1196db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1197db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
119853ea4331SLars Ellenberg 	int digest_size;
119999920dc5SAndreas Gruenbacher 	int err, eq = 0;
120058ffa580SLars Ellenberg 	bool stop_sector_reached = false;
1201b411b363SPhilipp Reisner 
1202b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
12033967deb1SAndreas Gruenbacher 		drbd_free_peer_req(mdev, peer_req);
1204b411b363SPhilipp Reisner 		dec_unacked(mdev);
120599920dc5SAndreas Gruenbacher 		return 0;
1206b411b363SPhilipp Reisner 	}
1207b411b363SPhilipp Reisner 
1208b411b363SPhilipp Reisner 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1209b411b363SPhilipp Reisner 	 * the resync lru has been cleaned up already */
12101d53f09eSLars Ellenberg 	if (get_ldev(mdev)) {
1211db830c46SAndreas Gruenbacher 		drbd_rs_complete_io(mdev, peer_req->i.sector);
12121d53f09eSLars Ellenberg 		put_ldev(mdev);
12131d53f09eSLars Ellenberg 	}
1214b411b363SPhilipp Reisner 
1215db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1216b411b363SPhilipp Reisner 
1217db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1218f399002eSLars Ellenberg 		digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
1219b411b363SPhilipp Reisner 		digest = kmalloc(digest_size, GFP_NOIO);
1220b411b363SPhilipp Reisner 		if (digest) {
1221f399002eSLars Ellenberg 			drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
1222b411b363SPhilipp Reisner 
1223b411b363SPhilipp Reisner 			D_ASSERT(digest_size == di->digest_size);
1224b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1225b411b363SPhilipp Reisner 			kfree(digest);
1226b411b363SPhilipp Reisner 		}
1227b411b363SPhilipp Reisner 	}
1228b411b363SPhilipp Reisner 
12299676c760SLars Ellenberg 	/* Free peer_req and pages before send.
123053ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
123153ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
123253ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1233c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
12343967deb1SAndreas Gruenbacher 	drbd_free_peer_req(mdev, peer_req);
1235b411b363SPhilipp Reisner 	if (!eq)
12368f7bed77SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(mdev, sector, size);
1237b411b363SPhilipp Reisner 	else
12388f7bed77SAndreas Gruenbacher 		ov_out_of_sync_print(mdev);
1239b411b363SPhilipp Reisner 
124099920dc5SAndreas Gruenbacher 	err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
1241b411b363SPhilipp Reisner 			       eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
1242b411b363SPhilipp Reisner 
124353ea4331SLars Ellenberg 	dec_unacked(mdev);
1244b411b363SPhilipp Reisner 
1245ea5442afSLars Ellenberg 	--mdev->ov_left;
1246ea5442afSLars Ellenberg 
1247ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
1248ea5442afSLars Ellenberg 	if ((mdev->ov_left & 0x200) == 0x200)
1249ea5442afSLars Ellenberg 		drbd_advance_rs_marks(mdev, mdev->ov_left);
1250ea5442afSLars Ellenberg 
125158ffa580SLars Ellenberg 	stop_sector_reached = verify_can_do_stop_sector(mdev) &&
125258ffa580SLars Ellenberg 		(sector + (size>>9)) >= mdev->ov_stop_sector;
125358ffa580SLars Ellenberg 
125458ffa580SLars Ellenberg 	if (mdev->ov_left == 0 || stop_sector_reached) {
12558f7bed77SAndreas Gruenbacher 		ov_out_of_sync_print(mdev);
1256b411b363SPhilipp Reisner 		drbd_resync_finished(mdev);
1257b411b363SPhilipp Reisner 	}
1258b411b363SPhilipp Reisner 
125999920dc5SAndreas Gruenbacher 	return err;
1260b411b363SPhilipp Reisner }
1261b411b363SPhilipp Reisner 
126299920dc5SAndreas Gruenbacher int w_prev_work_done(struct drbd_work *w, int cancel)
1263b411b363SPhilipp Reisner {
1264b411b363SPhilipp Reisner 	struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
126500d56944SPhilipp Reisner 
1266b411b363SPhilipp Reisner 	complete(&b->done);
126799920dc5SAndreas Gruenbacher 	return 0;
1268b411b363SPhilipp Reisner }
1269b411b363SPhilipp Reisner 
1270b6dd1a89SLars Ellenberg /* FIXME
1271b6dd1a89SLars Ellenberg  * We need to track the number of pending barrier acks,
1272b6dd1a89SLars Ellenberg  * and to be able to wait for them.
1273b6dd1a89SLars Ellenberg  * See also comment in drbd_adm_attach before drbd_suspend_io.
1274b6dd1a89SLars Ellenberg  */
1275b6dd1a89SLars Ellenberg int drbd_send_barrier(struct drbd_tconn *tconn)
1276b411b363SPhilipp Reisner {
12779f5bdc33SAndreas Gruenbacher 	struct p_barrier *p;
1278b6dd1a89SLars Ellenberg 	struct drbd_socket *sock;
1279b411b363SPhilipp Reisner 
1280b6dd1a89SLars Ellenberg 	sock = &tconn->data;
1281b6dd1a89SLars Ellenberg 	p = conn_prepare_command(tconn, sock);
12829f5bdc33SAndreas Gruenbacher 	if (!p)
12839f5bdc33SAndreas Gruenbacher 		return -EIO;
1284b6dd1a89SLars Ellenberg 	p->barrier = tconn->send.current_epoch_nr;
1285b6dd1a89SLars Ellenberg 	p->pad = 0;
1286b6dd1a89SLars Ellenberg 	tconn->send.current_epoch_writes = 0;
1287b6dd1a89SLars Ellenberg 
1288b6dd1a89SLars Ellenberg 	return conn_send_command(tconn, sock, P_BARRIER, sizeof(*p), NULL, 0);
1289b411b363SPhilipp Reisner }
1290b411b363SPhilipp Reisner 
129199920dc5SAndreas Gruenbacher int w_send_write_hint(struct drbd_work *w, int cancel)
1292b411b363SPhilipp Reisner {
129300d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
12949f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
12959f5bdc33SAndreas Gruenbacher 
1296b411b363SPhilipp Reisner 	if (cancel)
129799920dc5SAndreas Gruenbacher 		return 0;
12989f5bdc33SAndreas Gruenbacher 	sock = &mdev->tconn->data;
12999f5bdc33SAndreas Gruenbacher 	if (!drbd_prepare_command(mdev, sock))
13009f5bdc33SAndreas Gruenbacher 		return -EIO;
1301e658983aSAndreas Gruenbacher 	return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
1302b411b363SPhilipp Reisner }
1303b411b363SPhilipp Reisner 
13044eb9b3cbSLars Ellenberg static void re_init_if_first_write(struct drbd_tconn *tconn, unsigned int epoch)
13054eb9b3cbSLars Ellenberg {
13064eb9b3cbSLars Ellenberg 	if (!tconn->send.seen_any_write_yet) {
13074eb9b3cbSLars Ellenberg 		tconn->send.seen_any_write_yet = true;
13084eb9b3cbSLars Ellenberg 		tconn->send.current_epoch_nr = epoch;
13094eb9b3cbSLars Ellenberg 		tconn->send.current_epoch_writes = 0;
13104eb9b3cbSLars Ellenberg 	}
13114eb9b3cbSLars Ellenberg }
13124eb9b3cbSLars Ellenberg 
13134eb9b3cbSLars Ellenberg static void maybe_send_barrier(struct drbd_tconn *tconn, unsigned int epoch)
13144eb9b3cbSLars Ellenberg {
13154eb9b3cbSLars Ellenberg 	/* re-init if first write on this connection */
13164eb9b3cbSLars Ellenberg 	if (!tconn->send.seen_any_write_yet)
13174eb9b3cbSLars Ellenberg 		return;
13184eb9b3cbSLars Ellenberg 	if (tconn->send.current_epoch_nr != epoch) {
13194eb9b3cbSLars Ellenberg 		if (tconn->send.current_epoch_writes)
13204eb9b3cbSLars Ellenberg 			drbd_send_barrier(tconn);
13214eb9b3cbSLars Ellenberg 		tconn->send.current_epoch_nr = epoch;
13224eb9b3cbSLars Ellenberg 	}
13234eb9b3cbSLars Ellenberg }
13244eb9b3cbSLars Ellenberg 
13258f7bed77SAndreas Gruenbacher int w_send_out_of_sync(struct drbd_work *w, int cancel)
132673a01a18SPhilipp Reisner {
132773a01a18SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
132800d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1329b6dd1a89SLars Ellenberg 	struct drbd_tconn *tconn = mdev->tconn;
133099920dc5SAndreas Gruenbacher 	int err;
133173a01a18SPhilipp Reisner 
133273a01a18SPhilipp Reisner 	if (unlikely(cancel)) {
13338554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
133499920dc5SAndreas Gruenbacher 		return 0;
133573a01a18SPhilipp Reisner 	}
133673a01a18SPhilipp Reisner 
1337b6dd1a89SLars Ellenberg 	/* this time, no tconn->send.current_epoch_writes++;
1338b6dd1a89SLars Ellenberg 	 * If it was sent, it was the closing barrier for the last
1339b6dd1a89SLars Ellenberg 	 * replicated epoch, before we went into AHEAD mode.
1340b6dd1a89SLars Ellenberg 	 * No more barriers will be sent, until we leave AHEAD mode again. */
13414eb9b3cbSLars Ellenberg 	maybe_send_barrier(tconn, req->epoch);
1342b6dd1a89SLars Ellenberg 
13438f7bed77SAndreas Gruenbacher 	err = drbd_send_out_of_sync(mdev, req);
13448554df1cSAndreas Gruenbacher 	req_mod(req, OOS_HANDED_TO_NETWORK);
134573a01a18SPhilipp Reisner 
134699920dc5SAndreas Gruenbacher 	return err;
134773a01a18SPhilipp Reisner }
134873a01a18SPhilipp Reisner 
1349b411b363SPhilipp Reisner /**
1350b411b363SPhilipp Reisner  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1351b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1352b411b363SPhilipp Reisner  * @w:		work object.
1353b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1354b411b363SPhilipp Reisner  */
135599920dc5SAndreas Gruenbacher int w_send_dblock(struct drbd_work *w, int cancel)
1356b411b363SPhilipp Reisner {
1357b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
135800d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1359b6dd1a89SLars Ellenberg 	struct drbd_tconn *tconn = mdev->tconn;
136099920dc5SAndreas Gruenbacher 	int err;
1361b411b363SPhilipp Reisner 
1362b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
13638554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
136499920dc5SAndreas Gruenbacher 		return 0;
1365b411b363SPhilipp Reisner 	}
1366b411b363SPhilipp Reisner 
13674eb9b3cbSLars Ellenberg 	re_init_if_first_write(tconn, req->epoch);
13684eb9b3cbSLars Ellenberg 	maybe_send_barrier(tconn, req->epoch);
1369b6dd1a89SLars Ellenberg 	tconn->send.current_epoch_writes++;
1370b6dd1a89SLars Ellenberg 
137199920dc5SAndreas Gruenbacher 	err = drbd_send_dblock(mdev, req);
137299920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1373b411b363SPhilipp Reisner 
137499920dc5SAndreas Gruenbacher 	return err;
1375b411b363SPhilipp Reisner }
1376b411b363SPhilipp Reisner 
1377b411b363SPhilipp Reisner /**
1378b411b363SPhilipp Reisner  * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1379b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1380b411b363SPhilipp Reisner  * @w:		work object.
1381b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1382b411b363SPhilipp Reisner  */
138399920dc5SAndreas Gruenbacher int w_send_read_req(struct drbd_work *w, int cancel)
1384b411b363SPhilipp Reisner {
1385b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
138600d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1387b6dd1a89SLars Ellenberg 	struct drbd_tconn *tconn = mdev->tconn;
138899920dc5SAndreas Gruenbacher 	int err;
1389b411b363SPhilipp Reisner 
1390b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
13918554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
139299920dc5SAndreas Gruenbacher 		return 0;
1393b411b363SPhilipp Reisner 	}
1394b411b363SPhilipp Reisner 
1395b6dd1a89SLars Ellenberg 	/* Even read requests may close a write epoch,
1396b6dd1a89SLars Ellenberg 	 * if there was any yet. */
13974eb9b3cbSLars Ellenberg 	maybe_send_barrier(tconn, req->epoch);
1398b6dd1a89SLars Ellenberg 
139999920dc5SAndreas Gruenbacher 	err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
1400b411b363SPhilipp Reisner 				 (unsigned long)req);
1401b411b363SPhilipp Reisner 
140299920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1403b411b363SPhilipp Reisner 
140499920dc5SAndreas Gruenbacher 	return err;
1405b411b363SPhilipp Reisner }
1406b411b363SPhilipp Reisner 
140799920dc5SAndreas Gruenbacher int w_restart_disk_io(struct drbd_work *w, int cancel)
1408265be2d0SPhilipp Reisner {
1409265be2d0SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
141000d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
1411265be2d0SPhilipp Reisner 
14120778286aSPhilipp Reisner 	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
1413181286adSLars Ellenberg 		drbd_al_begin_io(mdev, &req->i);
1414265be2d0SPhilipp Reisner 
1415265be2d0SPhilipp Reisner 	drbd_req_make_private_bio(req, req->master_bio);
1416265be2d0SPhilipp Reisner 	req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
1417265be2d0SPhilipp Reisner 	generic_make_request(req->private_bio);
1418265be2d0SPhilipp Reisner 
141999920dc5SAndreas Gruenbacher 	return 0;
1420265be2d0SPhilipp Reisner }
1421265be2d0SPhilipp Reisner 
1422b411b363SPhilipp Reisner static int _drbd_may_sync_now(struct drbd_conf *mdev)
1423b411b363SPhilipp Reisner {
1424b411b363SPhilipp Reisner 	struct drbd_conf *odev = mdev;
142595f8efd0SAndreas Gruenbacher 	int resync_after;
1426b411b363SPhilipp Reisner 
1427b411b363SPhilipp Reisner 	while (1) {
1428438c8374SPhilipp Reisner 		if (!odev->ldev)
1429438c8374SPhilipp Reisner 			return 1;
1430daeda1ccSPhilipp Reisner 		rcu_read_lock();
143195f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1432daeda1ccSPhilipp Reisner 		rcu_read_unlock();
143395f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1434b411b363SPhilipp Reisner 			return 1;
143595f8efd0SAndreas Gruenbacher 		odev = minor_to_mdev(resync_after);
1436841ce241SAndreas Gruenbacher 		if (!expect(odev))
1437841ce241SAndreas Gruenbacher 			return 1;
1438b411b363SPhilipp Reisner 		if ((odev->state.conn >= C_SYNC_SOURCE &&
1439b411b363SPhilipp Reisner 		     odev->state.conn <= C_PAUSED_SYNC_T) ||
1440b411b363SPhilipp Reisner 		    odev->state.aftr_isp || odev->state.peer_isp ||
1441b411b363SPhilipp Reisner 		    odev->state.user_isp)
1442b411b363SPhilipp Reisner 			return 0;
1443b411b363SPhilipp Reisner 	}
1444b411b363SPhilipp Reisner }
1445b411b363SPhilipp Reisner 
1446b411b363SPhilipp Reisner /**
1447b411b363SPhilipp Reisner  * _drbd_pause_after() - Pause resync on all devices that may not resync now
1448b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1449b411b363SPhilipp Reisner  *
1450b411b363SPhilipp Reisner  * Called from process context only (admin command and after_state_ch).
1451b411b363SPhilipp Reisner  */
1452b411b363SPhilipp Reisner static int _drbd_pause_after(struct drbd_conf *mdev)
1453b411b363SPhilipp Reisner {
1454b411b363SPhilipp Reisner 	struct drbd_conf *odev;
1455b411b363SPhilipp Reisner 	int i, rv = 0;
1456b411b363SPhilipp Reisner 
1457695d08faSPhilipp Reisner 	rcu_read_lock();
145881a5d60eSPhilipp Reisner 	idr_for_each_entry(&minors, odev, i) {
1459b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1460b411b363SPhilipp Reisner 			continue;
1461b411b363SPhilipp Reisner 		if (!_drbd_may_sync_now(odev))
1462b411b363SPhilipp Reisner 			rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1463b411b363SPhilipp Reisner 			       != SS_NOTHING_TO_DO);
1464b411b363SPhilipp Reisner 	}
1465695d08faSPhilipp Reisner 	rcu_read_unlock();
1466b411b363SPhilipp Reisner 
1467b411b363SPhilipp Reisner 	return rv;
1468b411b363SPhilipp Reisner }
1469b411b363SPhilipp Reisner 
1470b411b363SPhilipp Reisner /**
1471b411b363SPhilipp Reisner  * _drbd_resume_next() - Resume resync on all devices that may resync now
1472b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1473b411b363SPhilipp Reisner  *
1474b411b363SPhilipp Reisner  * Called from process context only (admin command and worker).
1475b411b363SPhilipp Reisner  */
1476b411b363SPhilipp Reisner static int _drbd_resume_next(struct drbd_conf *mdev)
1477b411b363SPhilipp Reisner {
1478b411b363SPhilipp Reisner 	struct drbd_conf *odev;
1479b411b363SPhilipp Reisner 	int i, rv = 0;
1480b411b363SPhilipp Reisner 
1481695d08faSPhilipp Reisner 	rcu_read_lock();
148281a5d60eSPhilipp Reisner 	idr_for_each_entry(&minors, odev, i) {
1483b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1484b411b363SPhilipp Reisner 			continue;
1485b411b363SPhilipp Reisner 		if (odev->state.aftr_isp) {
1486b411b363SPhilipp Reisner 			if (_drbd_may_sync_now(odev))
1487b411b363SPhilipp Reisner 				rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1488b411b363SPhilipp Reisner 							CS_HARD, NULL)
1489b411b363SPhilipp Reisner 				       != SS_NOTHING_TO_DO) ;
1490b411b363SPhilipp Reisner 		}
1491b411b363SPhilipp Reisner 	}
1492695d08faSPhilipp Reisner 	rcu_read_unlock();
1493b411b363SPhilipp Reisner 	return rv;
1494b411b363SPhilipp Reisner }
1495b411b363SPhilipp Reisner 
1496b411b363SPhilipp Reisner void resume_next_sg(struct drbd_conf *mdev)
1497b411b363SPhilipp Reisner {
1498b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1499b411b363SPhilipp Reisner 	_drbd_resume_next(mdev);
1500b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1501b411b363SPhilipp Reisner }
1502b411b363SPhilipp Reisner 
1503b411b363SPhilipp Reisner void suspend_other_sg(struct drbd_conf *mdev)
1504b411b363SPhilipp Reisner {
1505b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1506b411b363SPhilipp Reisner 	_drbd_pause_after(mdev);
1507b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1508b411b363SPhilipp Reisner }
1509b411b363SPhilipp Reisner 
1510dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
151195f8efd0SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor)
1512b411b363SPhilipp Reisner {
1513b411b363SPhilipp Reisner 	struct drbd_conf *odev;
151495f8efd0SAndreas Gruenbacher 	int resync_after;
1515b411b363SPhilipp Reisner 
1516b411b363SPhilipp Reisner 	if (o_minor == -1)
1517b411b363SPhilipp Reisner 		return NO_ERROR;
1518b411b363SPhilipp Reisner 	if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
151995f8efd0SAndreas Gruenbacher 		return ERR_RESYNC_AFTER;
1520b411b363SPhilipp Reisner 
1521b411b363SPhilipp Reisner 	/* check for loops */
1522b411b363SPhilipp Reisner 	odev = minor_to_mdev(o_minor);
1523b411b363SPhilipp Reisner 	while (1) {
1524b411b363SPhilipp Reisner 		if (odev == mdev)
152595f8efd0SAndreas Gruenbacher 			return ERR_RESYNC_AFTER_CYCLE;
1526b411b363SPhilipp Reisner 
1527daeda1ccSPhilipp Reisner 		rcu_read_lock();
152895f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1529daeda1ccSPhilipp Reisner 		rcu_read_unlock();
1530b411b363SPhilipp Reisner 		/* dependency chain ends here, no cycles. */
153195f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1532b411b363SPhilipp Reisner 			return NO_ERROR;
1533b411b363SPhilipp Reisner 
1534b411b363SPhilipp Reisner 		/* follow the dependency chain */
153595f8efd0SAndreas Gruenbacher 		odev = minor_to_mdev(resync_after);
1536b411b363SPhilipp Reisner 	}
1537b411b363SPhilipp Reisner }
1538b411b363SPhilipp Reisner 
1539dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
154095f8efd0SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_conf *mdev)
1541b411b363SPhilipp Reisner {
1542b411b363SPhilipp Reisner 	int changes;
1543b411b363SPhilipp Reisner 
1544b411b363SPhilipp Reisner 	do {
1545b411b363SPhilipp Reisner 		changes  = _drbd_pause_after(mdev);
1546b411b363SPhilipp Reisner 		changes |= _drbd_resume_next(mdev);
1547b411b363SPhilipp Reisner 	} while (changes);
1548b411b363SPhilipp Reisner }
1549b411b363SPhilipp Reisner 
15509bd28d3cSLars Ellenberg void drbd_rs_controller_reset(struct drbd_conf *mdev)
15519bd28d3cSLars Ellenberg {
1552813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
1553813472ceSPhilipp Reisner 
15549bd28d3cSLars Ellenberg 	atomic_set(&mdev->rs_sect_in, 0);
15559bd28d3cSLars Ellenberg 	atomic_set(&mdev->rs_sect_ev, 0);
15569bd28d3cSLars Ellenberg 	mdev->rs_in_flight = 0;
1557813472ceSPhilipp Reisner 
1558813472ceSPhilipp Reisner 	/* Updating the RCU protected object in place is necessary since
1559813472ceSPhilipp Reisner 	   this function gets called from atomic context.
1560813472ceSPhilipp Reisner 	   It is valid since all other updates also lead to an completely
1561813472ceSPhilipp Reisner 	   empty fifo */
1562813472ceSPhilipp Reisner 	rcu_read_lock();
1563813472ceSPhilipp Reisner 	plan = rcu_dereference(mdev->rs_plan_s);
1564813472ceSPhilipp Reisner 	plan->total = 0;
1565813472ceSPhilipp Reisner 	fifo_set(plan, 0);
1566813472ceSPhilipp Reisner 	rcu_read_unlock();
15679bd28d3cSLars Ellenberg }
15689bd28d3cSLars Ellenberg 
15691f04af33SPhilipp Reisner void start_resync_timer_fn(unsigned long data)
15701f04af33SPhilipp Reisner {
15711f04af33SPhilipp Reisner 	struct drbd_conf *mdev = (struct drbd_conf *) data;
15721f04af33SPhilipp Reisner 
1573d5b27b01SLars Ellenberg 	drbd_queue_work(&mdev->tconn->sender_work, &mdev->start_resync_work);
15741f04af33SPhilipp Reisner }
15751f04af33SPhilipp Reisner 
157699920dc5SAndreas Gruenbacher int w_start_resync(struct drbd_work *w, int cancel)
15771f04af33SPhilipp Reisner {
157800d56944SPhilipp Reisner 	struct drbd_conf *mdev = w->mdev;
157900d56944SPhilipp Reisner 
15801f04af33SPhilipp Reisner 	if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
15811f04af33SPhilipp Reisner 		dev_warn(DEV, "w_start_resync later...\n");
15821f04af33SPhilipp Reisner 		mdev->start_resync_timer.expires = jiffies + HZ/10;
15831f04af33SPhilipp Reisner 		add_timer(&mdev->start_resync_timer);
158499920dc5SAndreas Gruenbacher 		return 0;
15851f04af33SPhilipp Reisner 	}
15861f04af33SPhilipp Reisner 
15871f04af33SPhilipp Reisner 	drbd_start_resync(mdev, C_SYNC_SOURCE);
158836baf611SPhilipp Reisner 	clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags);
158999920dc5SAndreas Gruenbacher 	return 0;
15901f04af33SPhilipp Reisner }
15911f04af33SPhilipp Reisner 
1592b411b363SPhilipp Reisner /**
1593b411b363SPhilipp Reisner  * drbd_start_resync() - Start the resync process
1594b411b363SPhilipp Reisner  * @mdev:	DRBD device.
1595b411b363SPhilipp Reisner  * @side:	Either C_SYNC_SOURCE or C_SYNC_TARGET
1596b411b363SPhilipp Reisner  *
1597b411b363SPhilipp Reisner  * This function might bring you directly into one of the
1598b411b363SPhilipp Reisner  * C_PAUSED_SYNC_* states.
1599b411b363SPhilipp Reisner  */
1600b411b363SPhilipp Reisner void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1601b411b363SPhilipp Reisner {
1602b411b363SPhilipp Reisner 	union drbd_state ns;
1603b411b363SPhilipp Reisner 	int r;
1604b411b363SPhilipp Reisner 
1605c4752ef1SPhilipp Reisner 	if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
1606b411b363SPhilipp Reisner 		dev_err(DEV, "Resync already running!\n");
1607b411b363SPhilipp Reisner 		return;
1608b411b363SPhilipp Reisner 	}
1609b411b363SPhilipp Reisner 
1610e64a3294SPhilipp Reisner 	if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
1611b411b363SPhilipp Reisner 		if (side == C_SYNC_TARGET) {
1612b411b363SPhilipp Reisner 			/* Since application IO was locked out during C_WF_BITMAP_T and
1613b411b363SPhilipp Reisner 			   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1614b411b363SPhilipp Reisner 			   we check that we might make the data inconsistent. */
1615b411b363SPhilipp Reisner 			r = drbd_khelper(mdev, "before-resync-target");
1616b411b363SPhilipp Reisner 			r = (r >> 8) & 0xff;
1617b411b363SPhilipp Reisner 			if (r > 0) {
1618b411b363SPhilipp Reisner 				dev_info(DEV, "before-resync-target handler returned %d, "
1619b411b363SPhilipp Reisner 					 "dropping connection.\n", r);
162038fa9988SPhilipp Reisner 				conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
1621b411b363SPhilipp Reisner 				return;
1622b411b363SPhilipp Reisner 			}
162309b9e797SPhilipp Reisner 		} else /* C_SYNC_SOURCE */ {
162409b9e797SPhilipp Reisner 			r = drbd_khelper(mdev, "before-resync-source");
162509b9e797SPhilipp Reisner 			r = (r >> 8) & 0xff;
162609b9e797SPhilipp Reisner 			if (r > 0) {
162709b9e797SPhilipp Reisner 				if (r == 3) {
162809b9e797SPhilipp Reisner 					dev_info(DEV, "before-resync-source handler returned %d, "
162909b9e797SPhilipp Reisner 						 "ignoring. Old userland tools?", r);
163009b9e797SPhilipp Reisner 				} else {
163109b9e797SPhilipp Reisner 					dev_info(DEV, "before-resync-source handler returned %d, "
163209b9e797SPhilipp Reisner 						 "dropping connection.\n", r);
163338fa9988SPhilipp Reisner 					conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
163409b9e797SPhilipp Reisner 					return;
163509b9e797SPhilipp Reisner 				}
163609b9e797SPhilipp Reisner 			}
1637b411b363SPhilipp Reisner 		}
1638e64a3294SPhilipp Reisner 	}
1639b411b363SPhilipp Reisner 
1640e64a3294SPhilipp Reisner 	if (current == mdev->tconn->worker.task) {
1641dad20554SPhilipp Reisner 		/* The worker should not sleep waiting for state_mutex,
1642e64a3294SPhilipp Reisner 		   that can take long */
16438410da8fSPhilipp Reisner 		if (!mutex_trylock(mdev->state_mutex)) {
1644e64a3294SPhilipp Reisner 			set_bit(B_RS_H_DONE, &mdev->flags);
1645e64a3294SPhilipp Reisner 			mdev->start_resync_timer.expires = jiffies + HZ/5;
1646e64a3294SPhilipp Reisner 			add_timer(&mdev->start_resync_timer);
1647e64a3294SPhilipp Reisner 			return;
1648e64a3294SPhilipp Reisner 		}
1649e64a3294SPhilipp Reisner 	} else {
16508410da8fSPhilipp Reisner 		mutex_lock(mdev->state_mutex);
1651e64a3294SPhilipp Reisner 	}
1652e64a3294SPhilipp Reisner 	clear_bit(B_RS_H_DONE, &mdev->flags);
1653b411b363SPhilipp Reisner 
16540cfac5ddSPhilipp Reisner 	write_lock_irq(&global_state_lock);
1655b411b363SPhilipp Reisner 	if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
16560cfac5ddSPhilipp Reisner 		write_unlock_irq(&global_state_lock);
16578410da8fSPhilipp Reisner 		mutex_unlock(mdev->state_mutex);
1658b411b363SPhilipp Reisner 		return;
1659b411b363SPhilipp Reisner 	}
1660b411b363SPhilipp Reisner 
166178bae59bSPhilipp Reisner 	ns = drbd_read_state(mdev);
1662b411b363SPhilipp Reisner 
1663b411b363SPhilipp Reisner 	ns.aftr_isp = !_drbd_may_sync_now(mdev);
1664b411b363SPhilipp Reisner 
1665b411b363SPhilipp Reisner 	ns.conn = side;
1666b411b363SPhilipp Reisner 
1667b411b363SPhilipp Reisner 	if (side == C_SYNC_TARGET)
1668b411b363SPhilipp Reisner 		ns.disk = D_INCONSISTENT;
1669b411b363SPhilipp Reisner 	else /* side == C_SYNC_SOURCE */
1670b411b363SPhilipp Reisner 		ns.pdsk = D_INCONSISTENT;
1671b411b363SPhilipp Reisner 
1672b411b363SPhilipp Reisner 	r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
167378bae59bSPhilipp Reisner 	ns = drbd_read_state(mdev);
1674b411b363SPhilipp Reisner 
1675b411b363SPhilipp Reisner 	if (ns.conn < C_CONNECTED)
1676b411b363SPhilipp Reisner 		r = SS_UNKNOWN_ERROR;
1677b411b363SPhilipp Reisner 
1678b411b363SPhilipp Reisner 	if (r == SS_SUCCESS) {
16791d7734a0SLars Ellenberg 		unsigned long tw = drbd_bm_total_weight(mdev);
16801d7734a0SLars Ellenberg 		unsigned long now = jiffies;
16811d7734a0SLars Ellenberg 		int i;
16821d7734a0SLars Ellenberg 
1683b411b363SPhilipp Reisner 		mdev->rs_failed    = 0;
1684b411b363SPhilipp Reisner 		mdev->rs_paused    = 0;
1685b411b363SPhilipp Reisner 		mdev->rs_same_csum = 0;
16860f0601f4SLars Ellenberg 		mdev->rs_last_events = 0;
16870f0601f4SLars Ellenberg 		mdev->rs_last_sect_ev = 0;
16881d7734a0SLars Ellenberg 		mdev->rs_total     = tw;
16891d7734a0SLars Ellenberg 		mdev->rs_start     = now;
16901d7734a0SLars Ellenberg 		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
16911d7734a0SLars Ellenberg 			mdev->rs_mark_left[i] = tw;
16921d7734a0SLars Ellenberg 			mdev->rs_mark_time[i] = now;
16931d7734a0SLars Ellenberg 		}
1694b411b363SPhilipp Reisner 		_drbd_pause_after(mdev);
1695b411b363SPhilipp Reisner 	}
1696b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
16975a22db89SLars Ellenberg 
16986c922ed5SLars Ellenberg 	if (r == SS_SUCCESS) {
16996c922ed5SLars Ellenberg 		dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
17006c922ed5SLars Ellenberg 		     drbd_conn_str(ns.conn),
17016c922ed5SLars Ellenberg 		     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
17026c922ed5SLars Ellenberg 		     (unsigned long) mdev->rs_total);
17035a22db89SLars Ellenberg 		if (side == C_SYNC_TARGET)
17045a22db89SLars Ellenberg 			mdev->bm_resync_fo = 0;
17055a22db89SLars Ellenberg 
17065a22db89SLars Ellenberg 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
17075a22db89SLars Ellenberg 		 * with w_send_oos, or the sync target will get confused as to
17085a22db89SLars Ellenberg 		 * how much bits to resync.  We cannot do that always, because for an
17095a22db89SLars Ellenberg 		 * empty resync and protocol < 95, we need to do it here, as we call
17105a22db89SLars Ellenberg 		 * drbd_resync_finished from here in that case.
17115a22db89SLars Ellenberg 		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
17125a22db89SLars Ellenberg 		 * and from after_state_ch otherwise. */
171331890f4aSPhilipp Reisner 		if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
17145a22db89SLars Ellenberg 			drbd_gen_and_send_sync_uuid(mdev);
1715b411b363SPhilipp Reisner 
171631890f4aSPhilipp Reisner 		if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
1717af85e8e8SLars Ellenberg 			/* This still has a race (about when exactly the peers
1718af85e8e8SLars Ellenberg 			 * detect connection loss) that can lead to a full sync
1719af85e8e8SLars Ellenberg 			 * on next handshake. In 8.3.9 we fixed this with explicit
1720af85e8e8SLars Ellenberg 			 * resync-finished notifications, but the fix
1721af85e8e8SLars Ellenberg 			 * introduces a protocol change.  Sleeping for some
1722af85e8e8SLars Ellenberg 			 * time longer than the ping interval + timeout on the
1723af85e8e8SLars Ellenberg 			 * SyncSource, to give the SyncTarget the chance to
1724af85e8e8SLars Ellenberg 			 * detect connection loss, then waiting for a ping
1725af85e8e8SLars Ellenberg 			 * response (implicit in drbd_resync_finished) reduces
1726af85e8e8SLars Ellenberg 			 * the race considerably, but does not solve it. */
172744ed167dSPhilipp Reisner 			if (side == C_SYNC_SOURCE) {
172844ed167dSPhilipp Reisner 				struct net_conf *nc;
172944ed167dSPhilipp Reisner 				int timeo;
173044ed167dSPhilipp Reisner 
173144ed167dSPhilipp Reisner 				rcu_read_lock();
173244ed167dSPhilipp Reisner 				nc = rcu_dereference(mdev->tconn->net_conf);
173344ed167dSPhilipp Reisner 				timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
173444ed167dSPhilipp Reisner 				rcu_read_unlock();
173544ed167dSPhilipp Reisner 				schedule_timeout_interruptible(timeo);
173644ed167dSPhilipp Reisner 			}
1737b411b363SPhilipp Reisner 			drbd_resync_finished(mdev);
1738b411b363SPhilipp Reisner 		}
1739b411b363SPhilipp Reisner 
17409bd28d3cSLars Ellenberg 		drbd_rs_controller_reset(mdev);
1741b411b363SPhilipp Reisner 		/* ns.conn may already be != mdev->state.conn,
1742b411b363SPhilipp Reisner 		 * we may have been paused in between, or become paused until
1743b411b363SPhilipp Reisner 		 * the timer triggers.
1744b411b363SPhilipp Reisner 		 * No matter, that is handled in resync_timer_fn() */
1745b411b363SPhilipp Reisner 		if (ns.conn == C_SYNC_TARGET)
1746b411b363SPhilipp Reisner 			mod_timer(&mdev->resync_timer, jiffies);
1747b411b363SPhilipp Reisner 
1748b411b363SPhilipp Reisner 		drbd_md_sync(mdev);
1749b411b363SPhilipp Reisner 	}
17505a22db89SLars Ellenberg 	put_ldev(mdev);
17518410da8fSPhilipp Reisner 	mutex_unlock(mdev->state_mutex);
1752b411b363SPhilipp Reisner }
1753b411b363SPhilipp Reisner 
1754b6dd1a89SLars Ellenberg /* If the resource already closed the current epoch, but we did not
1755b6dd1a89SLars Ellenberg  * (because we have not yet seen new requests), we should send the
1756b6dd1a89SLars Ellenberg  * corresponding barrier now.  Must be checked within the same spinlock
1757b6dd1a89SLars Ellenberg  * that is used to check for new requests. */
1758b6dd1a89SLars Ellenberg bool need_to_send_barrier(struct drbd_tconn *connection)
1759b6dd1a89SLars Ellenberg {
1760b6dd1a89SLars Ellenberg 	if (!connection->send.seen_any_write_yet)
1761b6dd1a89SLars Ellenberg 		return false;
1762b6dd1a89SLars Ellenberg 
1763b6dd1a89SLars Ellenberg 	/* Skip barriers that do not contain any writes.
1764b6dd1a89SLars Ellenberg 	 * This may happen during AHEAD mode. */
1765b6dd1a89SLars Ellenberg 	if (!connection->send.current_epoch_writes)
1766b6dd1a89SLars Ellenberg 		return false;
1767b6dd1a89SLars Ellenberg 
1768b6dd1a89SLars Ellenberg 	/* ->req_lock is held when requests are queued on
1769b6dd1a89SLars Ellenberg 	 * connection->sender_work, and put into ->transfer_log.
1770b6dd1a89SLars Ellenberg 	 * It is also held when ->current_tle_nr is increased.
1771b6dd1a89SLars Ellenberg 	 * So either there are already new requests queued,
1772b6dd1a89SLars Ellenberg 	 * and corresponding barriers will be send there.
1773b6dd1a89SLars Ellenberg 	 * Or nothing new is queued yet, so the difference will be 1.
1774b6dd1a89SLars Ellenberg 	 */
1775b6dd1a89SLars Ellenberg 	if (atomic_read(&connection->current_tle_nr) !=
1776b6dd1a89SLars Ellenberg 	    connection->send.current_epoch_nr + 1)
1777b6dd1a89SLars Ellenberg 		return false;
1778b6dd1a89SLars Ellenberg 
1779b6dd1a89SLars Ellenberg 	return true;
1780b6dd1a89SLars Ellenberg }
1781b6dd1a89SLars Ellenberg 
17828c0785a5SLars Ellenberg bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
17838c0785a5SLars Ellenberg {
17848c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
17858c0785a5SLars Ellenberg 	list_splice_init(&queue->q, work_list);
17868c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
17878c0785a5SLars Ellenberg 	return !list_empty(work_list);
17888c0785a5SLars Ellenberg }
17898c0785a5SLars Ellenberg 
17908c0785a5SLars Ellenberg bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
17918c0785a5SLars Ellenberg {
17928c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
17938c0785a5SLars Ellenberg 	if (!list_empty(&queue->q))
17948c0785a5SLars Ellenberg 		list_move(queue->q.next, work_list);
17958c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
17968c0785a5SLars Ellenberg 	return !list_empty(work_list);
17978c0785a5SLars Ellenberg }
17988c0785a5SLars Ellenberg 
1799b6dd1a89SLars Ellenberg void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list)
1800b6dd1a89SLars Ellenberg {
1801b6dd1a89SLars Ellenberg 	DEFINE_WAIT(wait);
1802b6dd1a89SLars Ellenberg 	struct net_conf *nc;
1803b6dd1a89SLars Ellenberg 	int uncork, cork;
1804b6dd1a89SLars Ellenberg 
1805b6dd1a89SLars Ellenberg 	dequeue_work_item(&connection->sender_work, work_list);
1806b6dd1a89SLars Ellenberg 	if (!list_empty(work_list))
1807b6dd1a89SLars Ellenberg 		return;
1808b6dd1a89SLars Ellenberg 
1809b6dd1a89SLars Ellenberg 	/* Still nothing to do?
1810b6dd1a89SLars Ellenberg 	 * Maybe we still need to close the current epoch,
1811b6dd1a89SLars Ellenberg 	 * even if no new requests are queued yet.
1812b6dd1a89SLars Ellenberg 	 *
1813b6dd1a89SLars Ellenberg 	 * Also, poke TCP, just in case.
1814b6dd1a89SLars Ellenberg 	 * Then wait for new work (or signal). */
1815b6dd1a89SLars Ellenberg 	rcu_read_lock();
1816b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
1817b6dd1a89SLars Ellenberg 	uncork = nc ? nc->tcp_cork : 0;
1818b6dd1a89SLars Ellenberg 	rcu_read_unlock();
1819b6dd1a89SLars Ellenberg 	if (uncork) {
1820b6dd1a89SLars Ellenberg 		mutex_lock(&connection->data.mutex);
1821b6dd1a89SLars Ellenberg 		if (connection->data.socket)
1822b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
1823b6dd1a89SLars Ellenberg 		mutex_unlock(&connection->data.mutex);
1824b6dd1a89SLars Ellenberg 	}
1825b6dd1a89SLars Ellenberg 
1826b6dd1a89SLars Ellenberg 	for (;;) {
1827b6dd1a89SLars Ellenberg 		int send_barrier;
1828b6dd1a89SLars Ellenberg 		prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
1829b6dd1a89SLars Ellenberg 		spin_lock_irq(&connection->req_lock);
1830b6dd1a89SLars Ellenberg 		spin_lock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
1831bc317a9eSLars Ellenberg 		/* dequeue single item only,
1832bc317a9eSLars Ellenberg 		 * we still use drbd_queue_work_front() in some places */
1833bc317a9eSLars Ellenberg 		if (!list_empty(&connection->sender_work.q))
1834bc317a9eSLars Ellenberg 			list_move(connection->sender_work.q.next, work_list);
1835b6dd1a89SLars Ellenberg 		spin_unlock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
1836b6dd1a89SLars Ellenberg 		if (!list_empty(work_list) || signal_pending(current)) {
1837b6dd1a89SLars Ellenberg 			spin_unlock_irq(&connection->req_lock);
1838b6dd1a89SLars Ellenberg 			break;
1839b6dd1a89SLars Ellenberg 		}
1840b6dd1a89SLars Ellenberg 		send_barrier = need_to_send_barrier(connection);
1841b6dd1a89SLars Ellenberg 		spin_unlock_irq(&connection->req_lock);
1842b6dd1a89SLars Ellenberg 		if (send_barrier) {
1843b6dd1a89SLars Ellenberg 			drbd_send_barrier(connection);
1844b6dd1a89SLars Ellenberg 			connection->send.current_epoch_nr++;
1845b6dd1a89SLars Ellenberg 		}
1846b6dd1a89SLars Ellenberg 		schedule();
1847b6dd1a89SLars Ellenberg 		/* may be woken up for other things but new work, too,
1848b6dd1a89SLars Ellenberg 		 * e.g. if the current epoch got closed.
1849b6dd1a89SLars Ellenberg 		 * In which case we send the barrier above. */
1850b6dd1a89SLars Ellenberg 	}
1851b6dd1a89SLars Ellenberg 	finish_wait(&connection->sender_work.q_wait, &wait);
1852b6dd1a89SLars Ellenberg 
1853b6dd1a89SLars Ellenberg 	/* someone may have changed the config while we have been waiting above. */
1854b6dd1a89SLars Ellenberg 	rcu_read_lock();
1855b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
1856b6dd1a89SLars Ellenberg 	cork = nc ? nc->tcp_cork : 0;
1857b6dd1a89SLars Ellenberg 	rcu_read_unlock();
1858b6dd1a89SLars Ellenberg 	mutex_lock(&connection->data.mutex);
1859b6dd1a89SLars Ellenberg 	if (connection->data.socket) {
1860b6dd1a89SLars Ellenberg 		if (cork)
1861b6dd1a89SLars Ellenberg 			drbd_tcp_cork(connection->data.socket);
1862b6dd1a89SLars Ellenberg 		else if (!uncork)
1863b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
1864b6dd1a89SLars Ellenberg 	}
1865b6dd1a89SLars Ellenberg 	mutex_unlock(&connection->data.mutex);
1866b6dd1a89SLars Ellenberg }
1867b6dd1a89SLars Ellenberg 
1868b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi)
1869b411b363SPhilipp Reisner {
1870392c8801SPhilipp Reisner 	struct drbd_tconn *tconn = thi->tconn;
1871b411b363SPhilipp Reisner 	struct drbd_work *w = NULL;
18720e29d163SPhilipp Reisner 	struct drbd_conf *mdev;
1873b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
18748c0785a5SLars Ellenberg 	int vnr;
1875b411b363SPhilipp Reisner 
1876e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
187780822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
1878b411b363SPhilipp Reisner 
18798c0785a5SLars Ellenberg 		/* as long as we use drbd_queue_work_front(),
18808c0785a5SLars Ellenberg 		 * we may only dequeue single work items here, not batches. */
18818c0785a5SLars Ellenberg 		if (list_empty(&work_list))
1882b6dd1a89SLars Ellenberg 			wait_for_work(tconn, &work_list);
1883b411b363SPhilipp Reisner 
18848c0785a5SLars Ellenberg 		if (signal_pending(current)) {
1885b411b363SPhilipp Reisner 			flush_signals(current);
188619393e10SPhilipp Reisner 			if (get_t_state(thi) == RUNNING) {
188719393e10SPhilipp Reisner 				conn_warn(tconn, "Worker got an unexpected signal\n");
1888b411b363SPhilipp Reisner 				continue;
188919393e10SPhilipp Reisner 			}
1890b411b363SPhilipp Reisner 			break;
1891b411b363SPhilipp Reisner 		}
1892b411b363SPhilipp Reisner 
1893e77a0a5cSAndreas Gruenbacher 		if (get_t_state(thi) != RUNNING)
1894b411b363SPhilipp Reisner 			break;
1895b411b363SPhilipp Reisner 
18968c0785a5SLars Ellenberg 		while (!list_empty(&work_list)) {
18978c0785a5SLars Ellenberg 			w = list_first_entry(&work_list, struct drbd_work, list);
1898b411b363SPhilipp Reisner 			list_del_init(&w->list);
18998c0785a5SLars Ellenberg 			if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS) == 0)
19008c0785a5SLars Ellenberg 				continue;
1901bbeb641cSPhilipp Reisner 			if (tconn->cstate >= C_WF_REPORT_PARAMS)
1902bbeb641cSPhilipp Reisner 				conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
1903b411b363SPhilipp Reisner 		}
1904b411b363SPhilipp Reisner 	}
1905b411b363SPhilipp Reisner 
19068c0785a5SLars Ellenberg 	do {
1907b411b363SPhilipp Reisner 		while (!list_empty(&work_list)) {
19088c0785a5SLars Ellenberg 			w = list_first_entry(&work_list, struct drbd_work, list);
1909b411b363SPhilipp Reisner 			list_del_init(&w->list);
191000d56944SPhilipp Reisner 			w->cb(w, 1);
1911b411b363SPhilipp Reisner 		}
1912d5b27b01SLars Ellenberg 		dequeue_work_batch(&tconn->sender_work, &work_list);
19138c0785a5SLars Ellenberg 	} while (!list_empty(&work_list));
1914b411b363SPhilipp Reisner 
1915c141ebdaSPhilipp Reisner 	rcu_read_lock();
1916f399002eSLars Ellenberg 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
19170e29d163SPhilipp Reisner 		D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
1918c141ebdaSPhilipp Reisner 		kref_get(&mdev->kref);
1919c141ebdaSPhilipp Reisner 		rcu_read_unlock();
19200e29d163SPhilipp Reisner 		drbd_mdev_cleanup(mdev);
1921c141ebdaSPhilipp Reisner 		kref_put(&mdev->kref, &drbd_minor_destroy);
1922c141ebdaSPhilipp Reisner 		rcu_read_lock();
19230e29d163SPhilipp Reisner 	}
1924c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1925b411b363SPhilipp Reisner 
1926b411b363SPhilipp Reisner 	return 0;
1927b411b363SPhilipp Reisner }
1928