xref: /openbmc/linux/drivers/block/drbd/drbd_worker.c (revision b30ab791)
1b411b363SPhilipp Reisner /*
2b411b363SPhilipp Reisner    drbd_worker.c
3b411b363SPhilipp Reisner 
4b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5b411b363SPhilipp Reisner 
6b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9b411b363SPhilipp Reisner 
10b411b363SPhilipp Reisner    drbd is free software; you can redistribute it and/or modify
11b411b363SPhilipp Reisner    it under the terms of the GNU General Public License as published by
12b411b363SPhilipp Reisner    the Free Software Foundation; either version 2, or (at your option)
13b411b363SPhilipp Reisner    any later version.
14b411b363SPhilipp Reisner 
15b411b363SPhilipp Reisner    drbd is distributed in the hope that it will be useful,
16b411b363SPhilipp Reisner    but WITHOUT ANY WARRANTY; without even the implied warranty of
17b411b363SPhilipp Reisner    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18b411b363SPhilipp Reisner    GNU General Public License for more details.
19b411b363SPhilipp Reisner 
20b411b363SPhilipp Reisner    You should have received a copy of the GNU General Public License
21b411b363SPhilipp Reisner    along with drbd; see the file COPYING.  If not, write to
22b411b363SPhilipp Reisner    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23b411b363SPhilipp Reisner 
24b411b363SPhilipp Reisner  */
25b411b363SPhilipp Reisner 
26b411b363SPhilipp Reisner #include <linux/module.h>
27b411b363SPhilipp Reisner #include <linux/drbd.h>
28b411b363SPhilipp Reisner #include <linux/sched.h>
29b411b363SPhilipp Reisner #include <linux/wait.h>
30b411b363SPhilipp Reisner #include <linux/mm.h>
31b411b363SPhilipp Reisner #include <linux/memcontrol.h>
32b411b363SPhilipp Reisner #include <linux/mm_inline.h>
33b411b363SPhilipp Reisner #include <linux/slab.h>
34b411b363SPhilipp Reisner #include <linux/random.h>
35b411b363SPhilipp Reisner #include <linux/string.h>
36b411b363SPhilipp Reisner #include <linux/scatterlist.h>
37b411b363SPhilipp Reisner 
38b411b363SPhilipp Reisner #include "drbd_int.h"
39a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
40b411b363SPhilipp Reisner #include "drbd_req.h"
41b411b363SPhilipp Reisner 
4200d56944SPhilipp Reisner static int w_make_ov_request(struct drbd_work *w, int cancel);
43b411b363SPhilipp Reisner 
44b411b363SPhilipp Reisner 
45c5a91619SAndreas Gruenbacher /* endio handlers:
46c5a91619SAndreas Gruenbacher  *   drbd_md_io_complete (defined here)
47fcefa62eSAndreas Gruenbacher  *   drbd_request_endio (defined here)
48fcefa62eSAndreas Gruenbacher  *   drbd_peer_request_endio (defined here)
49c5a91619SAndreas Gruenbacher  *   bm_async_io_complete (defined in drbd_bitmap.c)
50c5a91619SAndreas Gruenbacher  *
51b411b363SPhilipp Reisner  * For all these callbacks, note the following:
52b411b363SPhilipp Reisner  * The callbacks will be called in irq context by the IDE drivers,
53b411b363SPhilipp Reisner  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54b411b363SPhilipp Reisner  * Try to get the locking right :)
55b411b363SPhilipp Reisner  *
56b411b363SPhilipp Reisner  */
57b411b363SPhilipp Reisner 
58b411b363SPhilipp Reisner 
59b411b363SPhilipp Reisner /* About the global_state_lock
60b411b363SPhilipp Reisner    Each state transition on an device holds a read lock. In case we have
6195f8efd0SAndreas Gruenbacher    to evaluate the resync after dependencies, we grab a write lock, because
62b411b363SPhilipp Reisner    we need stable states on all devices for that.  */
63b411b363SPhilipp Reisner rwlock_t global_state_lock;
64b411b363SPhilipp Reisner 
65b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO
66b411b363SPhilipp Reisner  * submitted by drbd_md_sync_page_io()
67b411b363SPhilipp Reisner  */
68b411b363SPhilipp Reisner void drbd_md_io_complete(struct bio *bio, int error)
69b411b363SPhilipp Reisner {
70b411b363SPhilipp Reisner 	struct drbd_md_io *md_io;
71b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
72b411b363SPhilipp Reisner 
73b411b363SPhilipp Reisner 	md_io = (struct drbd_md_io *)bio->bi_private;
74b30ab791SAndreas Gruenbacher 	device = container_of(md_io, struct drbd_device, md_io);
75cdfda633SPhilipp Reisner 
76b411b363SPhilipp Reisner 	md_io->error = error;
77b411b363SPhilipp Reisner 
780cfac5ddSPhilipp Reisner 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
790cfac5ddSPhilipp Reisner 	 * to timeout on the lower level device, and eventually detach from it.
800cfac5ddSPhilipp Reisner 	 * If this io completion runs after that timeout expired, this
810cfac5ddSPhilipp Reisner 	 * drbd_md_put_buffer() may allow us to finally try and re-attach.
820cfac5ddSPhilipp Reisner 	 * During normal operation, this only puts that extra reference
830cfac5ddSPhilipp Reisner 	 * down to 1 again.
840cfac5ddSPhilipp Reisner 	 * Make sure we first drop the reference, and only then signal
850cfac5ddSPhilipp Reisner 	 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
860cfac5ddSPhilipp Reisner 	 * next drbd_md_sync_page_io(), that we trigger the
87b30ab791SAndreas Gruenbacher 	 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
880cfac5ddSPhilipp Reisner 	 */
89b30ab791SAndreas Gruenbacher 	drbd_md_put_buffer(device);
90cdfda633SPhilipp Reisner 	md_io->done = 1;
91b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
92cdfda633SPhilipp Reisner 	bio_put(bio);
93b30ab791SAndreas Gruenbacher 	if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
94b30ab791SAndreas Gruenbacher 		put_ldev(device);
95b411b363SPhilipp Reisner }
96b411b363SPhilipp Reisner 
97b411b363SPhilipp Reisner /* reads on behalf of the partner,
98b411b363SPhilipp Reisner  * "submitted" by the receiver
99b411b363SPhilipp Reisner  */
100a186e478SRashika Kheria static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
101b411b363SPhilipp Reisner {
102b411b363SPhilipp Reisner 	unsigned long flags = 0;
103b30ab791SAndreas Gruenbacher 	struct drbd_device *device = peer_req->w.device;
104b411b363SPhilipp Reisner 
105b30ab791SAndreas Gruenbacher 	spin_lock_irqsave(&device->tconn->req_lock, flags);
106b30ab791SAndreas Gruenbacher 	device->read_cnt += peer_req->i.size >> 9;
107db830c46SAndreas Gruenbacher 	list_del(&peer_req->w.list);
108b30ab791SAndreas Gruenbacher 	if (list_empty(&device->read_ee))
109b30ab791SAndreas Gruenbacher 		wake_up(&device->ee_wait);
110db830c46SAndreas Gruenbacher 	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
111b30ab791SAndreas Gruenbacher 		__drbd_chk_io_error(device, DRBD_READ_ERROR);
112b30ab791SAndreas Gruenbacher 	spin_unlock_irqrestore(&device->tconn->req_lock, flags);
113b411b363SPhilipp Reisner 
114b30ab791SAndreas Gruenbacher 	drbd_queue_work(&device->tconn->sender_work, &peer_req->w);
115b30ab791SAndreas Gruenbacher 	put_ldev(device);
116b411b363SPhilipp Reisner }
117b411b363SPhilipp Reisner 
118b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes,
11945bb912bSLars Ellenberg  * "submitted" by the receiver, final stage.  */
120db830c46SAndreas Gruenbacher static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
121b411b363SPhilipp Reisner {
122b411b363SPhilipp Reisner 	unsigned long flags = 0;
123b30ab791SAndreas Gruenbacher 	struct drbd_device *device = peer_req->w.device;
124181286adSLars Ellenberg 	struct drbd_interval i;
125b411b363SPhilipp Reisner 	int do_wake;
126579b57edSAndreas Gruenbacher 	u64 block_id;
127b411b363SPhilipp Reisner 	int do_al_complete_io;
128b411b363SPhilipp Reisner 
129db830c46SAndreas Gruenbacher 	/* after we moved peer_req to done_ee,
130b411b363SPhilipp Reisner 	 * we may no longer access it,
131b411b363SPhilipp Reisner 	 * it may be freed/reused already!
132b411b363SPhilipp Reisner 	 * (as soon as we release the req_lock) */
133181286adSLars Ellenberg 	i = peer_req->i;
134db830c46SAndreas Gruenbacher 	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
135db830c46SAndreas Gruenbacher 	block_id = peer_req->block_id;
136b411b363SPhilipp Reisner 
137b30ab791SAndreas Gruenbacher 	spin_lock_irqsave(&device->tconn->req_lock, flags);
138b30ab791SAndreas Gruenbacher 	device->writ_cnt += peer_req->i.size >> 9;
139b30ab791SAndreas Gruenbacher 	list_move_tail(&peer_req->w.list, &device->done_ee);
140b411b363SPhilipp Reisner 
141bb3bfe96SAndreas Gruenbacher 	/*
1425e472264SAndreas Gruenbacher 	 * Do not remove from the write_requests tree here: we did not send the
143bb3bfe96SAndreas Gruenbacher 	 * Ack yet and did not wake possibly waiting conflicting requests.
144bb3bfe96SAndreas Gruenbacher 	 * Removed from the tree from "drbd_process_done_ee" within the
145bb3bfe96SAndreas Gruenbacher 	 * appropriate w.cb (e_end_block/e_end_resync_block) or from
146bb3bfe96SAndreas Gruenbacher 	 * _drbd_clear_done_ee.
147bb3bfe96SAndreas Gruenbacher 	 */
148b411b363SPhilipp Reisner 
149b30ab791SAndreas Gruenbacher 	do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
150b411b363SPhilipp Reisner 
151db830c46SAndreas Gruenbacher 	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
152b30ab791SAndreas Gruenbacher 		__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
153b30ab791SAndreas Gruenbacher 	spin_unlock_irqrestore(&device->tconn->req_lock, flags);
154b411b363SPhilipp Reisner 
155579b57edSAndreas Gruenbacher 	if (block_id == ID_SYNCER)
156b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, i.sector);
157b411b363SPhilipp Reisner 
158b411b363SPhilipp Reisner 	if (do_wake)
159b30ab791SAndreas Gruenbacher 		wake_up(&device->ee_wait);
160b411b363SPhilipp Reisner 
161b411b363SPhilipp Reisner 	if (do_al_complete_io)
162b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &i);
163b411b363SPhilipp Reisner 
164b30ab791SAndreas Gruenbacher 	wake_asender(device->tconn);
165b30ab791SAndreas Gruenbacher 	put_ldev(device);
16645bb912bSLars Ellenberg }
167b411b363SPhilipp Reisner 
16845bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes,
16945bb912bSLars Ellenberg  * "submitted" by the receiver.
17045bb912bSLars Ellenberg  */
171fcefa62eSAndreas Gruenbacher void drbd_peer_request_endio(struct bio *bio, int error)
17245bb912bSLars Ellenberg {
173db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = bio->bi_private;
174b30ab791SAndreas Gruenbacher 	struct drbd_device *device = peer_req->w.device;
17545bb912bSLars Ellenberg 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
17645bb912bSLars Ellenberg 	int is_write = bio_data_dir(bio) == WRITE;
17745bb912bSLars Ellenberg 
17807194272SLars Ellenberg 	if (error && __ratelimit(&drbd_ratelimit_state))
17945bb912bSLars Ellenberg 		dev_warn(DEV, "%s: error=%d s=%llus\n",
18045bb912bSLars Ellenberg 				is_write ? "write" : "read", error,
181db830c46SAndreas Gruenbacher 				(unsigned long long)peer_req->i.sector);
18245bb912bSLars Ellenberg 	if (!error && !uptodate) {
18307194272SLars Ellenberg 		if (__ratelimit(&drbd_ratelimit_state))
18445bb912bSLars Ellenberg 			dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
18545bb912bSLars Ellenberg 					is_write ? "write" : "read",
186db830c46SAndreas Gruenbacher 					(unsigned long long)peer_req->i.sector);
18745bb912bSLars Ellenberg 		/* strange behavior of some lower level drivers...
18845bb912bSLars Ellenberg 		 * fail the request by clearing the uptodate flag,
18945bb912bSLars Ellenberg 		 * but do not return any error?! */
19045bb912bSLars Ellenberg 		error = -EIO;
19145bb912bSLars Ellenberg 	}
19245bb912bSLars Ellenberg 
19345bb912bSLars Ellenberg 	if (error)
194db830c46SAndreas Gruenbacher 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
19545bb912bSLars Ellenberg 
19645bb912bSLars Ellenberg 	bio_put(bio); /* no need for the bio anymore */
197db830c46SAndreas Gruenbacher 	if (atomic_dec_and_test(&peer_req->pending_bios)) {
19845bb912bSLars Ellenberg 		if (is_write)
199db830c46SAndreas Gruenbacher 			drbd_endio_write_sec_final(peer_req);
20045bb912bSLars Ellenberg 		else
201db830c46SAndreas Gruenbacher 			drbd_endio_read_sec_final(peer_req);
20245bb912bSLars Ellenberg 	}
203b411b363SPhilipp Reisner }
204b411b363SPhilipp Reisner 
205b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
206b411b363SPhilipp Reisner  */
207fcefa62eSAndreas Gruenbacher void drbd_request_endio(struct bio *bio, int error)
208b411b363SPhilipp Reisner {
209a115413dSLars Ellenberg 	unsigned long flags;
210b411b363SPhilipp Reisner 	struct drbd_request *req = bio->bi_private;
211b30ab791SAndreas Gruenbacher 	struct drbd_device *device = req->w.device;
212a115413dSLars Ellenberg 	struct bio_and_error m;
213b411b363SPhilipp Reisner 	enum drbd_req_event what;
214b411b363SPhilipp Reisner 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
215b411b363SPhilipp Reisner 
216b411b363SPhilipp Reisner 	if (!error && !uptodate) {
217b411b363SPhilipp Reisner 		dev_warn(DEV, "p %s: setting error to -EIO\n",
218b411b363SPhilipp Reisner 			 bio_data_dir(bio) == WRITE ? "write" : "read");
219b411b363SPhilipp Reisner 		/* strange behavior of some lower level drivers...
220b411b363SPhilipp Reisner 		 * fail the request by clearing the uptodate flag,
221b411b363SPhilipp Reisner 		 * but do not return any error?! */
222b411b363SPhilipp Reisner 		error = -EIO;
223b411b363SPhilipp Reisner 	}
224b411b363SPhilipp Reisner 
2251b6dd252SPhilipp Reisner 
2261b6dd252SPhilipp Reisner 	/* If this request was aborted locally before,
2271b6dd252SPhilipp Reisner 	 * but now was completed "successfully",
2281b6dd252SPhilipp Reisner 	 * chances are that this caused arbitrary data corruption.
2291b6dd252SPhilipp Reisner 	 *
2301b6dd252SPhilipp Reisner 	 * "aborting" requests, or force-detaching the disk, is intended for
2311b6dd252SPhilipp Reisner 	 * completely blocked/hung local backing devices which do no longer
2321b6dd252SPhilipp Reisner 	 * complete requests at all, not even do error completions.  In this
2331b6dd252SPhilipp Reisner 	 * situation, usually a hard-reset and failover is the only way out.
2341b6dd252SPhilipp Reisner 	 *
2351b6dd252SPhilipp Reisner 	 * By "aborting", basically faking a local error-completion,
2361b6dd252SPhilipp Reisner 	 * we allow for a more graceful swichover by cleanly migrating services.
2371b6dd252SPhilipp Reisner 	 * Still the affected node has to be rebooted "soon".
2381b6dd252SPhilipp Reisner 	 *
2391b6dd252SPhilipp Reisner 	 * By completing these requests, we allow the upper layers to re-use
2401b6dd252SPhilipp Reisner 	 * the associated data pages.
2411b6dd252SPhilipp Reisner 	 *
2421b6dd252SPhilipp Reisner 	 * If later the local backing device "recovers", and now DMAs some data
2431b6dd252SPhilipp Reisner 	 * from disk into the original request pages, in the best case it will
2441b6dd252SPhilipp Reisner 	 * just put random data into unused pages; but typically it will corrupt
2451b6dd252SPhilipp Reisner 	 * meanwhile completely unrelated data, causing all sorts of damage.
2461b6dd252SPhilipp Reisner 	 *
2471b6dd252SPhilipp Reisner 	 * Which means delayed successful completion,
2481b6dd252SPhilipp Reisner 	 * especially for READ requests,
2491b6dd252SPhilipp Reisner 	 * is a reason to panic().
2501b6dd252SPhilipp Reisner 	 *
2511b6dd252SPhilipp Reisner 	 * We assume that a delayed *error* completion is OK,
2521b6dd252SPhilipp Reisner 	 * though we still will complain noisily about it.
2531b6dd252SPhilipp Reisner 	 */
2541b6dd252SPhilipp Reisner 	if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
2551b6dd252SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
2561b6dd252SPhilipp Reisner 			dev_emerg(DEV, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
2571b6dd252SPhilipp Reisner 
2581b6dd252SPhilipp Reisner 		if (!error)
2591b6dd252SPhilipp Reisner 			panic("possible random memory corruption caused by delayed completion of aborted local request\n");
2601b6dd252SPhilipp Reisner 	}
2611b6dd252SPhilipp Reisner 
262b411b363SPhilipp Reisner 	/* to avoid recursion in __req_mod */
263b411b363SPhilipp Reisner 	if (unlikely(error)) {
264b411b363SPhilipp Reisner 		what = (bio_data_dir(bio) == WRITE)
2658554df1cSAndreas Gruenbacher 			? WRITE_COMPLETED_WITH_ERROR
2665c3c7e64SLars Ellenberg 			: (bio_rw(bio) == READ)
2678554df1cSAndreas Gruenbacher 			  ? READ_COMPLETED_WITH_ERROR
2688554df1cSAndreas Gruenbacher 			  : READ_AHEAD_COMPLETED_WITH_ERROR;
269b411b363SPhilipp Reisner 	} else
2708554df1cSAndreas Gruenbacher 		what = COMPLETED_OK;
271b411b363SPhilipp Reisner 
272b411b363SPhilipp Reisner 	bio_put(req->private_bio);
273b411b363SPhilipp Reisner 	req->private_bio = ERR_PTR(error);
274b411b363SPhilipp Reisner 
275a115413dSLars Ellenberg 	/* not req_mod(), we need irqsave here! */
276b30ab791SAndreas Gruenbacher 	spin_lock_irqsave(&device->tconn->req_lock, flags);
277a115413dSLars Ellenberg 	__req_mod(req, what, &m);
278b30ab791SAndreas Gruenbacher 	spin_unlock_irqrestore(&device->tconn->req_lock, flags);
279b30ab791SAndreas Gruenbacher 	put_ldev(device);
280a115413dSLars Ellenberg 
281a115413dSLars Ellenberg 	if (m.bio)
282b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
283b411b363SPhilipp Reisner }
284b411b363SPhilipp Reisner 
285b30ab791SAndreas Gruenbacher void drbd_csum_ee(struct drbd_device *device, struct crypto_hash *tfm,
286db830c46SAndreas Gruenbacher 		  struct drbd_peer_request *peer_req, void *digest)
28745bb912bSLars Ellenberg {
28845bb912bSLars Ellenberg 	struct hash_desc desc;
28945bb912bSLars Ellenberg 	struct scatterlist sg;
290db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
29145bb912bSLars Ellenberg 	struct page *tmp;
29245bb912bSLars Ellenberg 	unsigned len;
29345bb912bSLars Ellenberg 
29445bb912bSLars Ellenberg 	desc.tfm = tfm;
29545bb912bSLars Ellenberg 	desc.flags = 0;
29645bb912bSLars Ellenberg 
29745bb912bSLars Ellenberg 	sg_init_table(&sg, 1);
29845bb912bSLars Ellenberg 	crypto_hash_init(&desc);
29945bb912bSLars Ellenberg 
30045bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page))) {
30145bb912bSLars Ellenberg 		/* all but the last page will be fully used */
30245bb912bSLars Ellenberg 		sg_set_page(&sg, page, PAGE_SIZE, 0);
30345bb912bSLars Ellenberg 		crypto_hash_update(&desc, &sg, sg.length);
30445bb912bSLars Ellenberg 		page = tmp;
30545bb912bSLars Ellenberg 	}
30645bb912bSLars Ellenberg 	/* and now the last, possibly only partially used page */
307db830c46SAndreas Gruenbacher 	len = peer_req->i.size & (PAGE_SIZE - 1);
30845bb912bSLars Ellenberg 	sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
30945bb912bSLars Ellenberg 	crypto_hash_update(&desc, &sg, sg.length);
31045bb912bSLars Ellenberg 	crypto_hash_final(&desc, digest);
31145bb912bSLars Ellenberg }
31245bb912bSLars Ellenberg 
313b30ab791SAndreas Gruenbacher void drbd_csum_bio(struct drbd_device *device, struct crypto_hash *tfm, struct bio *bio, void *digest)
314b411b363SPhilipp Reisner {
315b411b363SPhilipp Reisner 	struct hash_desc desc;
316b411b363SPhilipp Reisner 	struct scatterlist sg;
3177988613bSKent Overstreet 	struct bio_vec bvec;
3187988613bSKent Overstreet 	struct bvec_iter iter;
319b411b363SPhilipp Reisner 
320b411b363SPhilipp Reisner 	desc.tfm = tfm;
321b411b363SPhilipp Reisner 	desc.flags = 0;
322b411b363SPhilipp Reisner 
323b411b363SPhilipp Reisner 	sg_init_table(&sg, 1);
324b411b363SPhilipp Reisner 	crypto_hash_init(&desc);
325b411b363SPhilipp Reisner 
3267988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
3277988613bSKent Overstreet 		sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
328b411b363SPhilipp Reisner 		crypto_hash_update(&desc, &sg, sg.length);
329b411b363SPhilipp Reisner 	}
330b411b363SPhilipp Reisner 	crypto_hash_final(&desc, digest);
331b411b363SPhilipp Reisner }
332b411b363SPhilipp Reisner 
3339676c760SLars Ellenberg /* MAYBE merge common code with w_e_end_ov_req */
33499920dc5SAndreas Gruenbacher static int w_e_send_csum(struct drbd_work *w, int cancel)
335b411b363SPhilipp Reisner {
33600d56944SPhilipp Reisner 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
337b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
338b411b363SPhilipp Reisner 	int digest_size;
339b411b363SPhilipp Reisner 	void *digest;
34099920dc5SAndreas Gruenbacher 	int err = 0;
341b411b363SPhilipp Reisner 
34253ea4331SLars Ellenberg 	if (unlikely(cancel))
34353ea4331SLars Ellenberg 		goto out;
344b411b363SPhilipp Reisner 
3459676c760SLars Ellenberg 	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
34653ea4331SLars Ellenberg 		goto out;
34753ea4331SLars Ellenberg 
348b30ab791SAndreas Gruenbacher 	digest_size = crypto_hash_digestsize(device->tconn->csums_tfm);
349b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
350b411b363SPhilipp Reisner 	if (digest) {
351db830c46SAndreas Gruenbacher 		sector_t sector = peer_req->i.sector;
352db830c46SAndreas Gruenbacher 		unsigned int size = peer_req->i.size;
353b30ab791SAndreas Gruenbacher 		drbd_csum_ee(device, device->tconn->csums_tfm, peer_req, digest);
3549676c760SLars Ellenberg 		/* Free peer_req and pages before send.
35553ea4331SLars Ellenberg 		 * In case we block on congestion, we could otherwise run into
35653ea4331SLars Ellenberg 		 * some distributed deadlock, if the other side blocks on
35753ea4331SLars Ellenberg 		 * congestion as well, because our receiver blocks in
358c37c8ecfSAndreas Gruenbacher 		 * drbd_alloc_pages due to pp_in_use > max_buffers. */
359b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
360db830c46SAndreas Gruenbacher 		peer_req = NULL;
361b30ab791SAndreas Gruenbacher 		inc_rs_pending(device);
362b30ab791SAndreas Gruenbacher 		err = drbd_send_drequest_csum(device, sector, size,
36353ea4331SLars Ellenberg 					      digest, digest_size,
364b411b363SPhilipp Reisner 					      P_CSUM_RS_REQUEST);
365b411b363SPhilipp Reisner 		kfree(digest);
366b411b363SPhilipp Reisner 	} else {
367b411b363SPhilipp Reisner 		dev_err(DEV, "kmalloc() of digest failed.\n");
36899920dc5SAndreas Gruenbacher 		err = -ENOMEM;
369b411b363SPhilipp Reisner 	}
370b411b363SPhilipp Reisner 
37153ea4331SLars Ellenberg out:
372db830c46SAndreas Gruenbacher 	if (peer_req)
373b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
374b411b363SPhilipp Reisner 
37599920dc5SAndreas Gruenbacher 	if (unlikely(err))
376b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
37799920dc5SAndreas Gruenbacher 	return err;
378b411b363SPhilipp Reisner }
379b411b363SPhilipp Reisner 
380b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
381b411b363SPhilipp Reisner 
382b30ab791SAndreas Gruenbacher static int read_for_csum(struct drbd_device *device, sector_t sector, int size)
383b411b363SPhilipp Reisner {
384db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
385b411b363SPhilipp Reisner 
386b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
38780a40e43SLars Ellenberg 		return -EIO;
388b411b363SPhilipp Reisner 
389b30ab791SAndreas Gruenbacher 	if (drbd_rs_should_slow_down(device, sector))
3900f0601f4SLars Ellenberg 		goto defer;
3910f0601f4SLars Ellenberg 
392b411b363SPhilipp Reisner 	/* GFP_TRY, because if there is no memory available right now, this may
393b411b363SPhilipp Reisner 	 * be rescheduled for later. It is "only" background resync, after all. */
394b30ab791SAndreas Gruenbacher 	peer_req = drbd_alloc_peer_req(device, ID_SYNCER /* unused */, sector,
3950db55363SAndreas Gruenbacher 				       size, GFP_TRY);
396db830c46SAndreas Gruenbacher 	if (!peer_req)
39780a40e43SLars Ellenberg 		goto defer;
398b411b363SPhilipp Reisner 
399db830c46SAndreas Gruenbacher 	peer_req->w.cb = w_e_send_csum;
400b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->tconn->req_lock);
401b30ab791SAndreas Gruenbacher 	list_add(&peer_req->w.list, &device->read_ee);
402b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->tconn->req_lock);
403b411b363SPhilipp Reisner 
404b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
405b30ab791SAndreas Gruenbacher 	if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
40680a40e43SLars Ellenberg 		return 0;
40745bb912bSLars Ellenberg 
40810f6d992SLars Ellenberg 	/* If it failed because of ENOMEM, retry should help.  If it failed
40910f6d992SLars Ellenberg 	 * because bio_add_page failed (probably broken lower level driver),
41010f6d992SLars Ellenberg 	 * retry may or may not help.
41110f6d992SLars Ellenberg 	 * If it does not, you may need to force disconnect. */
412b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->tconn->req_lock);
413db830c46SAndreas Gruenbacher 	list_del(&peer_req->w.list);
414b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->tconn->req_lock);
41522cc37a9SLars Ellenberg 
416b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
41780a40e43SLars Ellenberg defer:
418b30ab791SAndreas Gruenbacher 	put_ldev(device);
41980a40e43SLars Ellenberg 	return -EAGAIN;
420b411b363SPhilipp Reisner }
421b411b363SPhilipp Reisner 
42299920dc5SAndreas Gruenbacher int w_resync_timer(struct drbd_work *w, int cancel)
423794abb75SPhilipp Reisner {
424b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
425b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
426794abb75SPhilipp Reisner 	case C_VERIFY_S:
42700d56944SPhilipp Reisner 		w_make_ov_request(w, cancel);
428794abb75SPhilipp Reisner 		break;
429794abb75SPhilipp Reisner 	case C_SYNC_TARGET:
43000d56944SPhilipp Reisner 		w_make_resync_request(w, cancel);
431794abb75SPhilipp Reisner 		break;
432794abb75SPhilipp Reisner 	}
433794abb75SPhilipp Reisner 
43499920dc5SAndreas Gruenbacher 	return 0;
435794abb75SPhilipp Reisner }
436794abb75SPhilipp Reisner 
437b411b363SPhilipp Reisner void resync_timer_fn(unsigned long data)
438b411b363SPhilipp Reisner {
439b30ab791SAndreas Gruenbacher 	struct drbd_device *device = (struct drbd_device *) data;
440b411b363SPhilipp Reisner 
441b30ab791SAndreas Gruenbacher 	if (list_empty(&device->resync_work.list))
442b30ab791SAndreas Gruenbacher 		drbd_queue_work(&device->tconn->sender_work, &device->resync_work);
443b411b363SPhilipp Reisner }
444b411b363SPhilipp Reisner 
445778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value)
446778f271dSPhilipp Reisner {
447778f271dSPhilipp Reisner 	int i;
448778f271dSPhilipp Reisner 
449778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
450f10f2623SPhilipp Reisner 		fb->values[i] = value;
451778f271dSPhilipp Reisner }
452778f271dSPhilipp Reisner 
453778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value)
454778f271dSPhilipp Reisner {
455778f271dSPhilipp Reisner 	int ov;
456778f271dSPhilipp Reisner 
457778f271dSPhilipp Reisner 	ov = fb->values[fb->head_index];
458778f271dSPhilipp Reisner 	fb->values[fb->head_index++] = value;
459778f271dSPhilipp Reisner 
460778f271dSPhilipp Reisner 	if (fb->head_index >= fb->size)
461778f271dSPhilipp Reisner 		fb->head_index = 0;
462778f271dSPhilipp Reisner 
463778f271dSPhilipp Reisner 	return ov;
464778f271dSPhilipp Reisner }
465778f271dSPhilipp Reisner 
466778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value)
467778f271dSPhilipp Reisner {
468778f271dSPhilipp Reisner 	int i;
469778f271dSPhilipp Reisner 
470778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
471778f271dSPhilipp Reisner 		fb->values[i] += value;
472778f271dSPhilipp Reisner }
473778f271dSPhilipp Reisner 
4749958c857SPhilipp Reisner struct fifo_buffer *fifo_alloc(int fifo_size)
4759958c857SPhilipp Reisner {
4769958c857SPhilipp Reisner 	struct fifo_buffer *fb;
4779958c857SPhilipp Reisner 
4788747d30aSLars Ellenberg 	fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
4799958c857SPhilipp Reisner 	if (!fb)
4809958c857SPhilipp Reisner 		return NULL;
4819958c857SPhilipp Reisner 
4829958c857SPhilipp Reisner 	fb->head_index = 0;
4839958c857SPhilipp Reisner 	fb->size = fifo_size;
4849958c857SPhilipp Reisner 	fb->total = 0;
4859958c857SPhilipp Reisner 
4869958c857SPhilipp Reisner 	return fb;
4879958c857SPhilipp Reisner }
4889958c857SPhilipp Reisner 
489b30ab791SAndreas Gruenbacher static int drbd_rs_controller(struct drbd_device *device)
490778f271dSPhilipp Reisner {
491daeda1ccSPhilipp Reisner 	struct disk_conf *dc;
492778f271dSPhilipp Reisner 	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
493778f271dSPhilipp Reisner 	unsigned int want;     /* The number of sectors we want in the proxy */
494778f271dSPhilipp Reisner 	int req_sect; /* Number of sectors to request in this turn */
495778f271dSPhilipp Reisner 	int correction; /* Number of sectors more we need in the proxy*/
496778f271dSPhilipp Reisner 	int cps; /* correction per invocation of drbd_rs_controller() */
497778f271dSPhilipp Reisner 	int steps; /* Number of time steps to plan ahead */
498778f271dSPhilipp Reisner 	int curr_corr;
499778f271dSPhilipp Reisner 	int max_sect;
500813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
501778f271dSPhilipp Reisner 
502b30ab791SAndreas Gruenbacher 	sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */
503b30ab791SAndreas Gruenbacher 	device->rs_in_flight -= sect_in;
504778f271dSPhilipp Reisner 
505b30ab791SAndreas Gruenbacher 	dc = rcu_dereference(device->ldev->disk_conf);
506b30ab791SAndreas Gruenbacher 	plan = rcu_dereference(device->rs_plan_s);
507778f271dSPhilipp Reisner 
508813472ceSPhilipp Reisner 	steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
509778f271dSPhilipp Reisner 
510b30ab791SAndreas Gruenbacher 	if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
511daeda1ccSPhilipp Reisner 		want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
512778f271dSPhilipp Reisner 	} else { /* normal path */
513daeda1ccSPhilipp Reisner 		want = dc->c_fill_target ? dc->c_fill_target :
514daeda1ccSPhilipp Reisner 			sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
515778f271dSPhilipp Reisner 	}
516778f271dSPhilipp Reisner 
517b30ab791SAndreas Gruenbacher 	correction = want - device->rs_in_flight - plan->total;
518778f271dSPhilipp Reisner 
519778f271dSPhilipp Reisner 	/* Plan ahead */
520778f271dSPhilipp Reisner 	cps = correction / steps;
521813472ceSPhilipp Reisner 	fifo_add_val(plan, cps);
522813472ceSPhilipp Reisner 	plan->total += cps * steps;
523778f271dSPhilipp Reisner 
524778f271dSPhilipp Reisner 	/* What we do in this step */
525813472ceSPhilipp Reisner 	curr_corr = fifo_push(plan, 0);
526813472ceSPhilipp Reisner 	plan->total -= curr_corr;
527778f271dSPhilipp Reisner 
528778f271dSPhilipp Reisner 	req_sect = sect_in + curr_corr;
529778f271dSPhilipp Reisner 	if (req_sect < 0)
530778f271dSPhilipp Reisner 		req_sect = 0;
531778f271dSPhilipp Reisner 
532daeda1ccSPhilipp Reisner 	max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
533778f271dSPhilipp Reisner 	if (req_sect > max_sect)
534778f271dSPhilipp Reisner 		req_sect = max_sect;
535778f271dSPhilipp Reisner 
536778f271dSPhilipp Reisner 	/*
537778f271dSPhilipp Reisner 	dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
538b30ab791SAndreas Gruenbacher 		 sect_in, device->rs_in_flight, want, correction,
539b30ab791SAndreas Gruenbacher 		 steps, cps, device->rs_planed, curr_corr, req_sect);
540778f271dSPhilipp Reisner 	*/
541778f271dSPhilipp Reisner 
542778f271dSPhilipp Reisner 	return req_sect;
543778f271dSPhilipp Reisner }
544778f271dSPhilipp Reisner 
545b30ab791SAndreas Gruenbacher static int drbd_rs_number_requests(struct drbd_device *device)
546e65f440dSLars Ellenberg {
547e65f440dSLars Ellenberg 	int number;
548813472ceSPhilipp Reisner 
549813472ceSPhilipp Reisner 	rcu_read_lock();
550b30ab791SAndreas Gruenbacher 	if (rcu_dereference(device->rs_plan_s)->size) {
551b30ab791SAndreas Gruenbacher 		number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9);
552b30ab791SAndreas Gruenbacher 		device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
553e65f440dSLars Ellenberg 	} else {
554b30ab791SAndreas Gruenbacher 		device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
555b30ab791SAndreas Gruenbacher 		number = SLEEP_TIME * device->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
556e65f440dSLars Ellenberg 	}
557813472ceSPhilipp Reisner 	rcu_read_unlock();
558e65f440dSLars Ellenberg 
559e65f440dSLars Ellenberg 	/* ignore the amount of pending requests, the resync controller should
560e65f440dSLars Ellenberg 	 * throttle down to incoming reply rate soon enough anyways. */
561e65f440dSLars Ellenberg 	return number;
562e65f440dSLars Ellenberg }
563e65f440dSLars Ellenberg 
56499920dc5SAndreas Gruenbacher int w_make_resync_request(struct drbd_work *w, int cancel)
565b411b363SPhilipp Reisner {
566b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
567b411b363SPhilipp Reisner 	unsigned long bit;
568b411b363SPhilipp Reisner 	sector_t sector;
569b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
5701816a2b4SLars Ellenberg 	int max_bio_size;
571e65f440dSLars Ellenberg 	int number, rollback_i, size;
572b411b363SPhilipp Reisner 	int align, queued, sndbuf;
5730f0601f4SLars Ellenberg 	int i = 0;
574b411b363SPhilipp Reisner 
575b411b363SPhilipp Reisner 	if (unlikely(cancel))
57699920dc5SAndreas Gruenbacher 		return 0;
577b411b363SPhilipp Reisner 
578b30ab791SAndreas Gruenbacher 	if (device->rs_total == 0) {
579af85e8e8SLars Ellenberg 		/* empty resync? */
580b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
58199920dc5SAndreas Gruenbacher 		return 0;
582af85e8e8SLars Ellenberg 	}
583af85e8e8SLars Ellenberg 
584b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
585b30ab791SAndreas Gruenbacher 		/* Since we only need to access device->rsync a
586b30ab791SAndreas Gruenbacher 		   get_ldev_if_state(device,D_FAILED) would be sufficient, but
587b411b363SPhilipp Reisner 		   to continue resync with a broken disk makes no sense at
588b411b363SPhilipp Reisner 		   all */
589b411b363SPhilipp Reisner 		dev_err(DEV, "Disk broke down during resync!\n");
59099920dc5SAndreas Gruenbacher 		return 0;
591b411b363SPhilipp Reisner 	}
592b411b363SPhilipp Reisner 
593b30ab791SAndreas Gruenbacher 	max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
594b30ab791SAndreas Gruenbacher 	number = drbd_rs_number_requests(device);
595e65f440dSLars Ellenberg 	if (number == 0)
5960f0601f4SLars Ellenberg 		goto requeue;
597b411b363SPhilipp Reisner 
598b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
599b411b363SPhilipp Reisner 		/* Stop generating RS requests, when half of the send buffer is filled */
600b30ab791SAndreas Gruenbacher 		mutex_lock(&device->tconn->data.mutex);
601b30ab791SAndreas Gruenbacher 		if (device->tconn->data.socket) {
602b30ab791SAndreas Gruenbacher 			queued = device->tconn->data.socket->sk->sk_wmem_queued;
603b30ab791SAndreas Gruenbacher 			sndbuf = device->tconn->data.socket->sk->sk_sndbuf;
604b411b363SPhilipp Reisner 		} else {
605b411b363SPhilipp Reisner 			queued = 1;
606b411b363SPhilipp Reisner 			sndbuf = 0;
607b411b363SPhilipp Reisner 		}
608b30ab791SAndreas Gruenbacher 		mutex_unlock(&device->tconn->data.mutex);
609b411b363SPhilipp Reisner 		if (queued > sndbuf / 2)
610b411b363SPhilipp Reisner 			goto requeue;
611b411b363SPhilipp Reisner 
612b411b363SPhilipp Reisner next_sector:
613b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
614b30ab791SAndreas Gruenbacher 		bit  = drbd_bm_find_next(device, device->bm_resync_fo);
615b411b363SPhilipp Reisner 
6164b0715f0SLars Ellenberg 		if (bit == DRBD_END_OF_BITMAP) {
617b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = drbd_bm_bits(device);
618b30ab791SAndreas Gruenbacher 			put_ldev(device);
61999920dc5SAndreas Gruenbacher 			return 0;
620b411b363SPhilipp Reisner 		}
621b411b363SPhilipp Reisner 
622b411b363SPhilipp Reisner 		sector = BM_BIT_TO_SECT(bit);
623b411b363SPhilipp Reisner 
624b30ab791SAndreas Gruenbacher 		if (drbd_rs_should_slow_down(device, sector) ||
625b30ab791SAndreas Gruenbacher 		    drbd_try_rs_begin_io(device, sector)) {
626b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = bit;
627b411b363SPhilipp Reisner 			goto requeue;
628b411b363SPhilipp Reisner 		}
629b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = bit + 1;
630b411b363SPhilipp Reisner 
631b30ab791SAndreas Gruenbacher 		if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
632b30ab791SAndreas Gruenbacher 			drbd_rs_complete_io(device, sector);
633b411b363SPhilipp Reisner 			goto next_sector;
634b411b363SPhilipp Reisner 		}
635b411b363SPhilipp Reisner 
6361816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
637b411b363SPhilipp Reisner 		/* try to find some adjacent bits.
638b411b363SPhilipp Reisner 		 * we stop if we have already the maximum req size.
639b411b363SPhilipp Reisner 		 *
640b411b363SPhilipp Reisner 		 * Additionally always align bigger requests, in order to
641b411b363SPhilipp Reisner 		 * be prepared for all stripe sizes of software RAIDs.
642b411b363SPhilipp Reisner 		 */
643b411b363SPhilipp Reisner 		align = 1;
644d207450cSPhilipp Reisner 		rollback_i = i;
645b411b363SPhilipp Reisner 		for (;;) {
6461816a2b4SLars Ellenberg 			if (size + BM_BLOCK_SIZE > max_bio_size)
647b411b363SPhilipp Reisner 				break;
648b411b363SPhilipp Reisner 
649b411b363SPhilipp Reisner 			/* Be always aligned */
650b411b363SPhilipp Reisner 			if (sector & ((1<<(align+3))-1))
651b411b363SPhilipp Reisner 				break;
652b411b363SPhilipp Reisner 
653b411b363SPhilipp Reisner 			/* do not cross extent boundaries */
654b411b363SPhilipp Reisner 			if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
655b411b363SPhilipp Reisner 				break;
656b411b363SPhilipp Reisner 			/* now, is it actually dirty, after all?
657b411b363SPhilipp Reisner 			 * caution, drbd_bm_test_bit is tri-state for some
658b411b363SPhilipp Reisner 			 * obscure reason; ( b == 0 ) would get the out-of-band
659b411b363SPhilipp Reisner 			 * only accidentally right because of the "oddly sized"
660b411b363SPhilipp Reisner 			 * adjustment below */
661b30ab791SAndreas Gruenbacher 			if (drbd_bm_test_bit(device, bit+1) != 1)
662b411b363SPhilipp Reisner 				break;
663b411b363SPhilipp Reisner 			bit++;
664b411b363SPhilipp Reisner 			size += BM_BLOCK_SIZE;
665b411b363SPhilipp Reisner 			if ((BM_BLOCK_SIZE << align) <= size)
666b411b363SPhilipp Reisner 				align++;
667b411b363SPhilipp Reisner 			i++;
668b411b363SPhilipp Reisner 		}
669b411b363SPhilipp Reisner 		/* if we merged some,
670b411b363SPhilipp Reisner 		 * reset the offset to start the next drbd_bm_find_next from */
671b411b363SPhilipp Reisner 		if (size > BM_BLOCK_SIZE)
672b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = bit + 1;
673b411b363SPhilipp Reisner #endif
674b411b363SPhilipp Reisner 
675b411b363SPhilipp Reisner 		/* adjust very last sectors, in case we are oddly sized */
676b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
677b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
678b30ab791SAndreas Gruenbacher 		if (device->tconn->agreed_pro_version >= 89 && device->tconn->csums_tfm) {
679b30ab791SAndreas Gruenbacher 			switch (read_for_csum(device, sector, size)) {
68080a40e43SLars Ellenberg 			case -EIO: /* Disk failure */
681b30ab791SAndreas Gruenbacher 				put_ldev(device);
68299920dc5SAndreas Gruenbacher 				return -EIO;
68380a40e43SLars Ellenberg 			case -EAGAIN: /* allocation failed, or ldev busy */
684b30ab791SAndreas Gruenbacher 				drbd_rs_complete_io(device, sector);
685b30ab791SAndreas Gruenbacher 				device->bm_resync_fo = BM_SECT_TO_BIT(sector);
686d207450cSPhilipp Reisner 				i = rollback_i;
687b411b363SPhilipp Reisner 				goto requeue;
68880a40e43SLars Ellenberg 			case 0:
68980a40e43SLars Ellenberg 				/* everything ok */
69080a40e43SLars Ellenberg 				break;
69180a40e43SLars Ellenberg 			default:
69280a40e43SLars Ellenberg 				BUG();
693b411b363SPhilipp Reisner 			}
694b411b363SPhilipp Reisner 		} else {
69599920dc5SAndreas Gruenbacher 			int err;
69699920dc5SAndreas Gruenbacher 
697b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
698b30ab791SAndreas Gruenbacher 			err = drbd_send_drequest(device, P_RS_DATA_REQUEST,
69999920dc5SAndreas Gruenbacher 						 sector, size, ID_SYNCER);
70099920dc5SAndreas Gruenbacher 			if (err) {
701b411b363SPhilipp Reisner 				dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
702b30ab791SAndreas Gruenbacher 				dec_rs_pending(device);
703b30ab791SAndreas Gruenbacher 				put_ldev(device);
70499920dc5SAndreas Gruenbacher 				return err;
705b411b363SPhilipp Reisner 			}
706b411b363SPhilipp Reisner 		}
707b411b363SPhilipp Reisner 	}
708b411b363SPhilipp Reisner 
709b30ab791SAndreas Gruenbacher 	if (device->bm_resync_fo >= drbd_bm_bits(device)) {
710b411b363SPhilipp Reisner 		/* last syncer _request_ was sent,
711b411b363SPhilipp Reisner 		 * but the P_RS_DATA_REPLY not yet received.  sync will end (and
712b411b363SPhilipp Reisner 		 * next sync group will resume), as soon as we receive the last
713b411b363SPhilipp Reisner 		 * resync data block, and the last bit is cleared.
714b411b363SPhilipp Reisner 		 * until then resync "work" is "inactive" ...
715b411b363SPhilipp Reisner 		 */
716b30ab791SAndreas Gruenbacher 		put_ldev(device);
71799920dc5SAndreas Gruenbacher 		return 0;
718b411b363SPhilipp Reisner 	}
719b411b363SPhilipp Reisner 
720b411b363SPhilipp Reisner  requeue:
721b30ab791SAndreas Gruenbacher 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
722b30ab791SAndreas Gruenbacher 	mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
723b30ab791SAndreas Gruenbacher 	put_ldev(device);
72499920dc5SAndreas Gruenbacher 	return 0;
725b411b363SPhilipp Reisner }
726b411b363SPhilipp Reisner 
72700d56944SPhilipp Reisner static int w_make_ov_request(struct drbd_work *w, int cancel)
728b411b363SPhilipp Reisner {
729b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
730b411b363SPhilipp Reisner 	int number, i, size;
731b411b363SPhilipp Reisner 	sector_t sector;
732b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
73358ffa580SLars Ellenberg 	bool stop_sector_reached = false;
734b411b363SPhilipp Reisner 
735b411b363SPhilipp Reisner 	if (unlikely(cancel))
736b411b363SPhilipp Reisner 		return 1;
737b411b363SPhilipp Reisner 
738b30ab791SAndreas Gruenbacher 	number = drbd_rs_number_requests(device);
739b411b363SPhilipp Reisner 
740b30ab791SAndreas Gruenbacher 	sector = device->ov_position;
741b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
74258ffa580SLars Ellenberg 		if (sector >= capacity)
743b411b363SPhilipp Reisner 			return 1;
74458ffa580SLars Ellenberg 
74558ffa580SLars Ellenberg 		/* We check for "finished" only in the reply path:
74658ffa580SLars Ellenberg 		 * w_e_end_ov_reply().
74758ffa580SLars Ellenberg 		 * We need to send at least one request out. */
74858ffa580SLars Ellenberg 		stop_sector_reached = i > 0
749b30ab791SAndreas Gruenbacher 			&& verify_can_do_stop_sector(device)
750b30ab791SAndreas Gruenbacher 			&& sector >= device->ov_stop_sector;
75158ffa580SLars Ellenberg 		if (stop_sector_reached)
75258ffa580SLars Ellenberg 			break;
753b411b363SPhilipp Reisner 
754b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
755b411b363SPhilipp Reisner 
756b30ab791SAndreas Gruenbacher 		if (drbd_rs_should_slow_down(device, sector) ||
757b30ab791SAndreas Gruenbacher 		    drbd_try_rs_begin_io(device, sector)) {
758b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
759b411b363SPhilipp Reisner 			goto requeue;
760b411b363SPhilipp Reisner 		}
761b411b363SPhilipp Reisner 
762b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
763b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
764b411b363SPhilipp Reisner 
765b30ab791SAndreas Gruenbacher 		inc_rs_pending(device);
766b30ab791SAndreas Gruenbacher 		if (drbd_send_ov_request(device, sector, size)) {
767b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
768b411b363SPhilipp Reisner 			return 0;
769b411b363SPhilipp Reisner 		}
770b411b363SPhilipp Reisner 		sector += BM_SECT_PER_BIT;
771b411b363SPhilipp Reisner 	}
772b30ab791SAndreas Gruenbacher 	device->ov_position = sector;
773b411b363SPhilipp Reisner 
774b411b363SPhilipp Reisner  requeue:
775b30ab791SAndreas Gruenbacher 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
77658ffa580SLars Ellenberg 	if (i == 0 || !stop_sector_reached)
777b30ab791SAndreas Gruenbacher 		mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
778b411b363SPhilipp Reisner 	return 1;
779b411b363SPhilipp Reisner }
780b411b363SPhilipp Reisner 
78199920dc5SAndreas Gruenbacher int w_ov_finished(struct drbd_work *w, int cancel)
782b411b363SPhilipp Reisner {
783b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
784b411b363SPhilipp Reisner 	kfree(w);
785b30ab791SAndreas Gruenbacher 	ov_out_of_sync_print(device);
786b30ab791SAndreas Gruenbacher 	drbd_resync_finished(device);
787b411b363SPhilipp Reisner 
78899920dc5SAndreas Gruenbacher 	return 0;
789b411b363SPhilipp Reisner }
790b411b363SPhilipp Reisner 
79199920dc5SAndreas Gruenbacher static int w_resync_finished(struct drbd_work *w, int cancel)
792b411b363SPhilipp Reisner {
793b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
794b411b363SPhilipp Reisner 	kfree(w);
795b411b363SPhilipp Reisner 
796b30ab791SAndreas Gruenbacher 	drbd_resync_finished(device);
797b411b363SPhilipp Reisner 
79899920dc5SAndreas Gruenbacher 	return 0;
799b411b363SPhilipp Reisner }
800b411b363SPhilipp Reisner 
801b30ab791SAndreas Gruenbacher static void ping_peer(struct drbd_device *device)
802af85e8e8SLars Ellenberg {
803b30ab791SAndreas Gruenbacher 	struct drbd_tconn *tconn = device->tconn;
8042a67d8b9SPhilipp Reisner 
8052a67d8b9SPhilipp Reisner 	clear_bit(GOT_PING_ACK, &tconn->flags);
8062a67d8b9SPhilipp Reisner 	request_ping(tconn);
8072a67d8b9SPhilipp Reisner 	wait_event(tconn->ping_wait,
808b30ab791SAndreas Gruenbacher 		   test_bit(GOT_PING_ACK, &tconn->flags) || device->state.conn < C_CONNECTED);
809af85e8e8SLars Ellenberg }
810af85e8e8SLars Ellenberg 
811b30ab791SAndreas Gruenbacher int drbd_resync_finished(struct drbd_device *device)
812b411b363SPhilipp Reisner {
813b411b363SPhilipp Reisner 	unsigned long db, dt, dbdt;
814b411b363SPhilipp Reisner 	unsigned long n_oos;
815b411b363SPhilipp Reisner 	union drbd_state os, ns;
816b411b363SPhilipp Reisner 	struct drbd_work *w;
817b411b363SPhilipp Reisner 	char *khelper_cmd = NULL;
81826525618SLars Ellenberg 	int verify_done = 0;
819b411b363SPhilipp Reisner 
820b411b363SPhilipp Reisner 	/* Remove all elements from the resync LRU. Since future actions
821b411b363SPhilipp Reisner 	 * might set bits in the (main) bitmap, then the entries in the
822b411b363SPhilipp Reisner 	 * resync LRU would be wrong. */
823b30ab791SAndreas Gruenbacher 	if (drbd_rs_del_all(device)) {
824b411b363SPhilipp Reisner 		/* In case this is not possible now, most probably because
825b411b363SPhilipp Reisner 		 * there are P_RS_DATA_REPLY Packets lingering on the worker's
826b411b363SPhilipp Reisner 		 * queue (or even the read operations for those packets
827b411b363SPhilipp Reisner 		 * is not finished by now).   Retry in 100ms. */
828b411b363SPhilipp Reisner 
82920ee6390SPhilipp Reisner 		schedule_timeout_interruptible(HZ / 10);
830b411b363SPhilipp Reisner 		w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
831b411b363SPhilipp Reisner 		if (w) {
832b411b363SPhilipp Reisner 			w->cb = w_resync_finished;
833b30ab791SAndreas Gruenbacher 			w->device = device;
834b30ab791SAndreas Gruenbacher 			drbd_queue_work(&device->tconn->sender_work, w);
835b411b363SPhilipp Reisner 			return 1;
836b411b363SPhilipp Reisner 		}
837b411b363SPhilipp Reisner 		dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
838b411b363SPhilipp Reisner 	}
839b411b363SPhilipp Reisner 
840b30ab791SAndreas Gruenbacher 	dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
841b411b363SPhilipp Reisner 	if (dt <= 0)
842b411b363SPhilipp Reisner 		dt = 1;
84358ffa580SLars Ellenberg 
844b30ab791SAndreas Gruenbacher 	db = device->rs_total;
84558ffa580SLars Ellenberg 	/* adjust for verify start and stop sectors, respective reached position */
846b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
847b30ab791SAndreas Gruenbacher 		db -= device->ov_left;
84858ffa580SLars Ellenberg 
849b411b363SPhilipp Reisner 	dbdt = Bit2KB(db/dt);
850b30ab791SAndreas Gruenbacher 	device->rs_paused /= HZ;
851b411b363SPhilipp Reisner 
852b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
853b411b363SPhilipp Reisner 		goto out;
854b411b363SPhilipp Reisner 
855b30ab791SAndreas Gruenbacher 	ping_peer(device);
856af85e8e8SLars Ellenberg 
857b30ab791SAndreas Gruenbacher 	spin_lock_irq(&device->tconn->req_lock);
858b30ab791SAndreas Gruenbacher 	os = drbd_read_state(device);
859b411b363SPhilipp Reisner 
86026525618SLars Ellenberg 	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
86126525618SLars Ellenberg 
862b411b363SPhilipp Reisner 	/* This protects us against multiple calls (that can happen in the presence
863b411b363SPhilipp Reisner 	   of application IO), and against connectivity loss just before we arrive here. */
864b411b363SPhilipp Reisner 	if (os.conn <= C_CONNECTED)
865b411b363SPhilipp Reisner 		goto out_unlock;
866b411b363SPhilipp Reisner 
867b411b363SPhilipp Reisner 	ns = os;
868b411b363SPhilipp Reisner 	ns.conn = C_CONNECTED;
869b411b363SPhilipp Reisner 
870b411b363SPhilipp Reisner 	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
87126525618SLars Ellenberg 	     verify_done ? "Online verify" : "Resync",
872b30ab791SAndreas Gruenbacher 	     dt + device->rs_paused, device->rs_paused, dbdt);
873b411b363SPhilipp Reisner 
874b30ab791SAndreas Gruenbacher 	n_oos = drbd_bm_total_weight(device);
875b411b363SPhilipp Reisner 
876b411b363SPhilipp Reisner 	if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
877b411b363SPhilipp Reisner 		if (n_oos) {
878b411b363SPhilipp Reisner 			dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
879b411b363SPhilipp Reisner 			      n_oos, Bit2KB(1));
880b411b363SPhilipp Reisner 			khelper_cmd = "out-of-sync";
881b411b363SPhilipp Reisner 		}
882b411b363SPhilipp Reisner 	} else {
883b30ab791SAndreas Gruenbacher 		D_ASSERT((n_oos - device->rs_failed) == 0);
884b411b363SPhilipp Reisner 
885b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
886b411b363SPhilipp Reisner 			khelper_cmd = "after-resync-target";
887b411b363SPhilipp Reisner 
888b30ab791SAndreas Gruenbacher 		if (device->tconn->csums_tfm && device->rs_total) {
889b30ab791SAndreas Gruenbacher 			const unsigned long s = device->rs_same_csum;
890b30ab791SAndreas Gruenbacher 			const unsigned long t = device->rs_total;
891b411b363SPhilipp Reisner 			const int ratio =
892b411b363SPhilipp Reisner 				(t == 0)     ? 0 :
893b411b363SPhilipp Reisner 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
894b411b363SPhilipp Reisner 			dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
895b411b363SPhilipp Reisner 			     "transferred %luK total %luK\n",
896b411b363SPhilipp Reisner 			     ratio,
897b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_same_csum),
898b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_total - device->rs_same_csum),
899b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_total));
900b411b363SPhilipp Reisner 		}
901b411b363SPhilipp Reisner 	}
902b411b363SPhilipp Reisner 
903b30ab791SAndreas Gruenbacher 	if (device->rs_failed) {
904b30ab791SAndreas Gruenbacher 		dev_info(DEV, "            %lu failed blocks\n", device->rs_failed);
905b411b363SPhilipp Reisner 
906b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
907b411b363SPhilipp Reisner 			ns.disk = D_INCONSISTENT;
908b411b363SPhilipp Reisner 			ns.pdsk = D_UP_TO_DATE;
909b411b363SPhilipp Reisner 		} else {
910b411b363SPhilipp Reisner 			ns.disk = D_UP_TO_DATE;
911b411b363SPhilipp Reisner 			ns.pdsk = D_INCONSISTENT;
912b411b363SPhilipp Reisner 		}
913b411b363SPhilipp Reisner 	} else {
914b411b363SPhilipp Reisner 		ns.disk = D_UP_TO_DATE;
915b411b363SPhilipp Reisner 		ns.pdsk = D_UP_TO_DATE;
916b411b363SPhilipp Reisner 
917b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
918b30ab791SAndreas Gruenbacher 			if (device->p_uuid) {
919b411b363SPhilipp Reisner 				int i;
920b411b363SPhilipp Reisner 				for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
921b30ab791SAndreas Gruenbacher 					_drbd_uuid_set(device, i, device->p_uuid[i]);
922b30ab791SAndreas Gruenbacher 				drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
923b30ab791SAndreas Gruenbacher 				_drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
924b411b363SPhilipp Reisner 			} else {
925b30ab791SAndreas Gruenbacher 				dev_err(DEV, "device->p_uuid is NULL! BUG\n");
926b411b363SPhilipp Reisner 			}
927b411b363SPhilipp Reisner 		}
928b411b363SPhilipp Reisner 
92962b0da3aSLars Ellenberg 		if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
93062b0da3aSLars Ellenberg 			/* for verify runs, we don't update uuids here,
93162b0da3aSLars Ellenberg 			 * so there would be nothing to report. */
932b30ab791SAndreas Gruenbacher 			drbd_uuid_set_bm(device, 0UL);
933b30ab791SAndreas Gruenbacher 			drbd_print_uuids(device, "updated UUIDs");
934b30ab791SAndreas Gruenbacher 			if (device->p_uuid) {
935b411b363SPhilipp Reisner 				/* Now the two UUID sets are equal, update what we
936b411b363SPhilipp Reisner 				 * know of the peer. */
937b411b363SPhilipp Reisner 				int i;
938b411b363SPhilipp Reisner 				for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
939b30ab791SAndreas Gruenbacher 					device->p_uuid[i] = device->ldev->md.uuid[i];
940b411b363SPhilipp Reisner 			}
941b411b363SPhilipp Reisner 		}
94262b0da3aSLars Ellenberg 	}
943b411b363SPhilipp Reisner 
944b30ab791SAndreas Gruenbacher 	_drbd_set_state(device, ns, CS_VERBOSE, NULL);
945b411b363SPhilipp Reisner out_unlock:
946b30ab791SAndreas Gruenbacher 	spin_unlock_irq(&device->tconn->req_lock);
947b30ab791SAndreas Gruenbacher 	put_ldev(device);
948b411b363SPhilipp Reisner out:
949b30ab791SAndreas Gruenbacher 	device->rs_total  = 0;
950b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
951b30ab791SAndreas Gruenbacher 	device->rs_paused = 0;
95258ffa580SLars Ellenberg 
95358ffa580SLars Ellenberg 	/* reset start sector, if we reached end of device */
954b30ab791SAndreas Gruenbacher 	if (verify_done && device->ov_left == 0)
955b30ab791SAndreas Gruenbacher 		device->ov_start_sector = 0;
956b411b363SPhilipp Reisner 
957b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
95813d42685SLars Ellenberg 
959b411b363SPhilipp Reisner 	if (khelper_cmd)
960b30ab791SAndreas Gruenbacher 		drbd_khelper(device, khelper_cmd);
961b411b363SPhilipp Reisner 
962b411b363SPhilipp Reisner 	return 1;
963b411b363SPhilipp Reisner }
964b411b363SPhilipp Reisner 
965b411b363SPhilipp Reisner /* helper */
966b30ab791SAndreas Gruenbacher static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
967b411b363SPhilipp Reisner {
968045417f7SAndreas Gruenbacher 	if (drbd_peer_req_has_active_page(peer_req)) {
969b411b363SPhilipp Reisner 		/* This might happen if sendpage() has not finished */
970db830c46SAndreas Gruenbacher 		int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
971b30ab791SAndreas Gruenbacher 		atomic_add(i, &device->pp_in_use_by_net);
972b30ab791SAndreas Gruenbacher 		atomic_sub(i, &device->pp_in_use);
973b30ab791SAndreas Gruenbacher 		spin_lock_irq(&device->tconn->req_lock);
974b30ab791SAndreas Gruenbacher 		list_add_tail(&peer_req->w.list, &device->net_ee);
975b30ab791SAndreas Gruenbacher 		spin_unlock_irq(&device->tconn->req_lock);
976435f0740SLars Ellenberg 		wake_up(&drbd_pp_wait);
977b411b363SPhilipp Reisner 	} else
978b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
979b411b363SPhilipp Reisner }
980b411b363SPhilipp Reisner 
981b411b363SPhilipp Reisner /**
982b411b363SPhilipp Reisner  * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
983b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
984b411b363SPhilipp Reisner  * @w:		work object.
985b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
986b411b363SPhilipp Reisner  */
98799920dc5SAndreas Gruenbacher int w_e_end_data_req(struct drbd_work *w, int cancel)
988b411b363SPhilipp Reisner {
989db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
990b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
99199920dc5SAndreas Gruenbacher 	int err;
992b411b363SPhilipp Reisner 
993b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
994b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
995b30ab791SAndreas Gruenbacher 		dec_unacked(device);
99699920dc5SAndreas Gruenbacher 		return 0;
997b411b363SPhilipp Reisner 	}
998b411b363SPhilipp Reisner 
999db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1000b30ab791SAndreas Gruenbacher 		err = drbd_send_block(device, P_DATA_REPLY, peer_req);
1001b411b363SPhilipp Reisner 	} else {
1002b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1003b411b363SPhilipp Reisner 			dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
1004db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1005b411b363SPhilipp Reisner 
1006b30ab791SAndreas Gruenbacher 		err = drbd_send_ack(device, P_NEG_DREPLY, peer_req);
1007b411b363SPhilipp Reisner 	}
1008b411b363SPhilipp Reisner 
1009b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1010b411b363SPhilipp Reisner 
1011b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1012b411b363SPhilipp Reisner 
101399920dc5SAndreas Gruenbacher 	if (unlikely(err))
1014b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_block() failed\n");
101599920dc5SAndreas Gruenbacher 	return err;
1016b411b363SPhilipp Reisner }
1017b411b363SPhilipp Reisner 
1018b411b363SPhilipp Reisner /**
1019a209b4aeSAndreas Gruenbacher  * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
1020b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1021b411b363SPhilipp Reisner  * @w:		work object.
1022b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1023b411b363SPhilipp Reisner  */
102499920dc5SAndreas Gruenbacher int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
1025b411b363SPhilipp Reisner {
1026db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1027b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
102899920dc5SAndreas Gruenbacher 	int err;
1029b411b363SPhilipp Reisner 
1030b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1031b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1032b30ab791SAndreas Gruenbacher 		dec_unacked(device);
103399920dc5SAndreas Gruenbacher 		return 0;
1034b411b363SPhilipp Reisner 	}
1035b411b363SPhilipp Reisner 
1036b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
1037b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1038b30ab791SAndreas Gruenbacher 		put_ldev(device);
1039b411b363SPhilipp Reisner 	}
1040b411b363SPhilipp Reisner 
1041b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_AHEAD) {
1042b30ab791SAndreas Gruenbacher 		err = drbd_send_ack(device, P_RS_CANCEL, peer_req);
1043db830c46SAndreas Gruenbacher 	} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1044b30ab791SAndreas Gruenbacher 		if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1045b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
1046b30ab791SAndreas Gruenbacher 			err = drbd_send_block(device, P_RS_DATA_REPLY, peer_req);
1047b411b363SPhilipp Reisner 		} else {
1048b411b363SPhilipp Reisner 			if (__ratelimit(&drbd_ratelimit_state))
1049b411b363SPhilipp Reisner 				dev_err(DEV, "Not sending RSDataReply, "
1050b411b363SPhilipp Reisner 				    "partner DISKLESS!\n");
105199920dc5SAndreas Gruenbacher 			err = 0;
1052b411b363SPhilipp Reisner 		}
1053b411b363SPhilipp Reisner 	} else {
1054b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1055b411b363SPhilipp Reisner 			dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
1056db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1057b411b363SPhilipp Reisner 
1058b30ab791SAndreas Gruenbacher 		err = drbd_send_ack(device, P_NEG_RS_DREPLY, peer_req);
1059b411b363SPhilipp Reisner 
1060b411b363SPhilipp Reisner 		/* update resync data with failure */
1061b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
1062b411b363SPhilipp Reisner 	}
1063b411b363SPhilipp Reisner 
1064b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1065b411b363SPhilipp Reisner 
1066b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1067b411b363SPhilipp Reisner 
106899920dc5SAndreas Gruenbacher 	if (unlikely(err))
1069b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_block() failed\n");
107099920dc5SAndreas Gruenbacher 	return err;
1071b411b363SPhilipp Reisner }
1072b411b363SPhilipp Reisner 
107399920dc5SAndreas Gruenbacher int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
1074b411b363SPhilipp Reisner {
1075db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1076b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
1077b411b363SPhilipp Reisner 	struct digest_info *di;
1078b411b363SPhilipp Reisner 	int digest_size;
1079b411b363SPhilipp Reisner 	void *digest = NULL;
108099920dc5SAndreas Gruenbacher 	int err, eq = 0;
1081b411b363SPhilipp Reisner 
1082b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1083b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1084b30ab791SAndreas Gruenbacher 		dec_unacked(device);
108599920dc5SAndreas Gruenbacher 		return 0;
1086b411b363SPhilipp Reisner 	}
1087b411b363SPhilipp Reisner 
1088b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1089b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1090b30ab791SAndreas Gruenbacher 		put_ldev(device);
10911d53f09eSLars Ellenberg 	}
1092b411b363SPhilipp Reisner 
1093db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1094b411b363SPhilipp Reisner 
1095db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1096b411b363SPhilipp Reisner 		/* quick hack to try to avoid a race against reconfiguration.
1097b411b363SPhilipp Reisner 		 * a real fix would be much more involved,
1098b411b363SPhilipp Reisner 		 * introducing more locking mechanisms */
1099b30ab791SAndreas Gruenbacher 		if (device->tconn->csums_tfm) {
1100b30ab791SAndreas Gruenbacher 			digest_size = crypto_hash_digestsize(device->tconn->csums_tfm);
1101b411b363SPhilipp Reisner 			D_ASSERT(digest_size == di->digest_size);
1102b411b363SPhilipp Reisner 			digest = kmalloc(digest_size, GFP_NOIO);
1103b411b363SPhilipp Reisner 		}
1104b411b363SPhilipp Reisner 		if (digest) {
1105b30ab791SAndreas Gruenbacher 			drbd_csum_ee(device, device->tconn->csums_tfm, peer_req, digest);
1106b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1107b411b363SPhilipp Reisner 			kfree(digest);
1108b411b363SPhilipp Reisner 		}
1109b411b363SPhilipp Reisner 
1110b411b363SPhilipp Reisner 		if (eq) {
1111b30ab791SAndreas Gruenbacher 			drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
1112676396d5SLars Ellenberg 			/* rs_same_csums unit is BM_BLOCK_SIZE */
1113b30ab791SAndreas Gruenbacher 			device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
1114b30ab791SAndreas Gruenbacher 			err = drbd_send_ack(device, P_RS_IS_IN_SYNC, peer_req);
1115b411b363SPhilipp Reisner 		} else {
1116b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
1117db830c46SAndreas Gruenbacher 			peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1118db830c46SAndreas Gruenbacher 			peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
1119204bba99SPhilipp Reisner 			kfree(di);
1120b30ab791SAndreas Gruenbacher 			err = drbd_send_block(device, P_RS_DATA_REPLY, peer_req);
1121b411b363SPhilipp Reisner 		}
1122b411b363SPhilipp Reisner 	} else {
1123b30ab791SAndreas Gruenbacher 		err = drbd_send_ack(device, P_NEG_RS_DREPLY, peer_req);
1124b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1125b411b363SPhilipp Reisner 			dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1126b411b363SPhilipp Reisner 	}
1127b411b363SPhilipp Reisner 
1128b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1129b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1130b411b363SPhilipp Reisner 
113199920dc5SAndreas Gruenbacher 	if (unlikely(err))
1132b411b363SPhilipp Reisner 		dev_err(DEV, "drbd_send_block/ack() failed\n");
113399920dc5SAndreas Gruenbacher 	return err;
1134b411b363SPhilipp Reisner }
1135b411b363SPhilipp Reisner 
113699920dc5SAndreas Gruenbacher int w_e_end_ov_req(struct drbd_work *w, int cancel)
1137b411b363SPhilipp Reisner {
1138db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1139b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
1140db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1141db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
1142b411b363SPhilipp Reisner 	int digest_size;
1143b411b363SPhilipp Reisner 	void *digest;
114499920dc5SAndreas Gruenbacher 	int err = 0;
1145b411b363SPhilipp Reisner 
1146b411b363SPhilipp Reisner 	if (unlikely(cancel))
1147b411b363SPhilipp Reisner 		goto out;
1148b411b363SPhilipp Reisner 
1149b30ab791SAndreas Gruenbacher 	digest_size = crypto_hash_digestsize(device->tconn->verify_tfm);
1150b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
11518f21420eSPhilipp Reisner 	if (!digest) {
115299920dc5SAndreas Gruenbacher 		err = 1;	/* terminate the connection in case the allocation failed */
11538f21420eSPhilipp Reisner 		goto out;
11548f21420eSPhilipp Reisner 	}
11558f21420eSPhilipp Reisner 
1156db830c46SAndreas Gruenbacher 	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
1157b30ab791SAndreas Gruenbacher 		drbd_csum_ee(device, device->tconn->verify_tfm, peer_req, digest);
11588f21420eSPhilipp Reisner 	else
11598f21420eSPhilipp Reisner 		memset(digest, 0, digest_size);
11608f21420eSPhilipp Reisner 
116153ea4331SLars Ellenberg 	/* Free e and pages before send.
116253ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
116353ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
116453ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1165c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1166b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
1167db830c46SAndreas Gruenbacher 	peer_req = NULL;
1168b30ab791SAndreas Gruenbacher 	inc_rs_pending(device);
1169b30ab791SAndreas Gruenbacher 	err = drbd_send_drequest_csum(device, sector, size, digest, digest_size, P_OV_REPLY);
117099920dc5SAndreas Gruenbacher 	if (err)
1171b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
1172b411b363SPhilipp Reisner 	kfree(digest);
1173b411b363SPhilipp Reisner 
1174b411b363SPhilipp Reisner out:
1175db830c46SAndreas Gruenbacher 	if (peer_req)
1176b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1177b30ab791SAndreas Gruenbacher 	dec_unacked(device);
117899920dc5SAndreas Gruenbacher 	return err;
1179b411b363SPhilipp Reisner }
1180b411b363SPhilipp Reisner 
1181b30ab791SAndreas Gruenbacher void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
1182b411b363SPhilipp Reisner {
1183b30ab791SAndreas Gruenbacher 	if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1184b30ab791SAndreas Gruenbacher 		device->ov_last_oos_size += size>>9;
1185b411b363SPhilipp Reisner 	} else {
1186b30ab791SAndreas Gruenbacher 		device->ov_last_oos_start = sector;
1187b30ab791SAndreas Gruenbacher 		device->ov_last_oos_size = size>>9;
1188b411b363SPhilipp Reisner 	}
1189b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, sector, size);
1190b411b363SPhilipp Reisner }
1191b411b363SPhilipp Reisner 
119299920dc5SAndreas Gruenbacher int w_e_end_ov_reply(struct drbd_work *w, int cancel)
1193b411b363SPhilipp Reisner {
1194db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
1195b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
1196b411b363SPhilipp Reisner 	struct digest_info *di;
1197b411b363SPhilipp Reisner 	void *digest;
1198db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1199db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
120053ea4331SLars Ellenberg 	int digest_size;
120199920dc5SAndreas Gruenbacher 	int err, eq = 0;
120258ffa580SLars Ellenberg 	bool stop_sector_reached = false;
1203b411b363SPhilipp Reisner 
1204b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1205b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1206b30ab791SAndreas Gruenbacher 		dec_unacked(device);
120799920dc5SAndreas Gruenbacher 		return 0;
1208b411b363SPhilipp Reisner 	}
1209b411b363SPhilipp Reisner 
1210b411b363SPhilipp Reisner 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1211b411b363SPhilipp Reisner 	 * the resync lru has been cleaned up already */
1212b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1213b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1214b30ab791SAndreas Gruenbacher 		put_ldev(device);
12151d53f09eSLars Ellenberg 	}
1216b411b363SPhilipp Reisner 
1217db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1218b411b363SPhilipp Reisner 
1219db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1220b30ab791SAndreas Gruenbacher 		digest_size = crypto_hash_digestsize(device->tconn->verify_tfm);
1221b411b363SPhilipp Reisner 		digest = kmalloc(digest_size, GFP_NOIO);
1222b411b363SPhilipp Reisner 		if (digest) {
1223b30ab791SAndreas Gruenbacher 			drbd_csum_ee(device, device->tconn->verify_tfm, peer_req, digest);
1224b411b363SPhilipp Reisner 
1225b411b363SPhilipp Reisner 			D_ASSERT(digest_size == di->digest_size);
1226b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1227b411b363SPhilipp Reisner 			kfree(digest);
1228b411b363SPhilipp Reisner 		}
1229b411b363SPhilipp Reisner 	}
1230b411b363SPhilipp Reisner 
12319676c760SLars Ellenberg 	/* Free peer_req and pages before send.
123253ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
123353ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
123453ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1235c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1236b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
1237b411b363SPhilipp Reisner 	if (!eq)
1238b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
1239b411b363SPhilipp Reisner 	else
1240b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
1241b411b363SPhilipp Reisner 
1242b30ab791SAndreas Gruenbacher 	err = drbd_send_ack_ex(device, P_OV_RESULT, sector, size,
1243b411b363SPhilipp Reisner 			       eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
1244b411b363SPhilipp Reisner 
1245b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1246b411b363SPhilipp Reisner 
1247b30ab791SAndreas Gruenbacher 	--device->ov_left;
1248ea5442afSLars Ellenberg 
1249ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
1250b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
1251b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
1252ea5442afSLars Ellenberg 
1253b30ab791SAndreas Gruenbacher 	stop_sector_reached = verify_can_do_stop_sector(device) &&
1254b30ab791SAndreas Gruenbacher 		(sector + (size>>9)) >= device->ov_stop_sector;
125558ffa580SLars Ellenberg 
1256b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0 || stop_sector_reached) {
1257b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
1258b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
1259b411b363SPhilipp Reisner 	}
1260b411b363SPhilipp Reisner 
126199920dc5SAndreas Gruenbacher 	return err;
1262b411b363SPhilipp Reisner }
1263b411b363SPhilipp Reisner 
126499920dc5SAndreas Gruenbacher int w_prev_work_done(struct drbd_work *w, int cancel)
1265b411b363SPhilipp Reisner {
1266b411b363SPhilipp Reisner 	struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
126700d56944SPhilipp Reisner 
1268b411b363SPhilipp Reisner 	complete(&b->done);
126999920dc5SAndreas Gruenbacher 	return 0;
1270b411b363SPhilipp Reisner }
1271b411b363SPhilipp Reisner 
1272b6dd1a89SLars Ellenberg /* FIXME
1273b6dd1a89SLars Ellenberg  * We need to track the number of pending barrier acks,
1274b6dd1a89SLars Ellenberg  * and to be able to wait for them.
1275b6dd1a89SLars Ellenberg  * See also comment in drbd_adm_attach before drbd_suspend_io.
1276b6dd1a89SLars Ellenberg  */
1277a186e478SRashika Kheria static int drbd_send_barrier(struct drbd_tconn *tconn)
1278b411b363SPhilipp Reisner {
12799f5bdc33SAndreas Gruenbacher 	struct p_barrier *p;
1280b6dd1a89SLars Ellenberg 	struct drbd_socket *sock;
1281b411b363SPhilipp Reisner 
1282b6dd1a89SLars Ellenberg 	sock = &tconn->data;
1283b6dd1a89SLars Ellenberg 	p = conn_prepare_command(tconn, sock);
12849f5bdc33SAndreas Gruenbacher 	if (!p)
12859f5bdc33SAndreas Gruenbacher 		return -EIO;
1286b6dd1a89SLars Ellenberg 	p->barrier = tconn->send.current_epoch_nr;
1287b6dd1a89SLars Ellenberg 	p->pad = 0;
1288b6dd1a89SLars Ellenberg 	tconn->send.current_epoch_writes = 0;
1289b6dd1a89SLars Ellenberg 
1290b6dd1a89SLars Ellenberg 	return conn_send_command(tconn, sock, P_BARRIER, sizeof(*p), NULL, 0);
1291b411b363SPhilipp Reisner }
1292b411b363SPhilipp Reisner 
129399920dc5SAndreas Gruenbacher int w_send_write_hint(struct drbd_work *w, int cancel)
1294b411b363SPhilipp Reisner {
1295b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
12969f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
12979f5bdc33SAndreas Gruenbacher 
1298b411b363SPhilipp Reisner 	if (cancel)
129999920dc5SAndreas Gruenbacher 		return 0;
1300b30ab791SAndreas Gruenbacher 	sock = &device->tconn->data;
1301b30ab791SAndreas Gruenbacher 	if (!drbd_prepare_command(device, sock))
13029f5bdc33SAndreas Gruenbacher 		return -EIO;
1303b30ab791SAndreas Gruenbacher 	return drbd_send_command(device, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
1304b411b363SPhilipp Reisner }
1305b411b363SPhilipp Reisner 
13064eb9b3cbSLars Ellenberg static void re_init_if_first_write(struct drbd_tconn *tconn, unsigned int epoch)
13074eb9b3cbSLars Ellenberg {
13084eb9b3cbSLars Ellenberg 	if (!tconn->send.seen_any_write_yet) {
13094eb9b3cbSLars Ellenberg 		tconn->send.seen_any_write_yet = true;
13104eb9b3cbSLars Ellenberg 		tconn->send.current_epoch_nr = epoch;
13114eb9b3cbSLars Ellenberg 		tconn->send.current_epoch_writes = 0;
13124eb9b3cbSLars Ellenberg 	}
13134eb9b3cbSLars Ellenberg }
13144eb9b3cbSLars Ellenberg 
13154eb9b3cbSLars Ellenberg static void maybe_send_barrier(struct drbd_tconn *tconn, unsigned int epoch)
13164eb9b3cbSLars Ellenberg {
13174eb9b3cbSLars Ellenberg 	/* re-init if first write on this connection */
13184eb9b3cbSLars Ellenberg 	if (!tconn->send.seen_any_write_yet)
13194eb9b3cbSLars Ellenberg 		return;
13204eb9b3cbSLars Ellenberg 	if (tconn->send.current_epoch_nr != epoch) {
13214eb9b3cbSLars Ellenberg 		if (tconn->send.current_epoch_writes)
13224eb9b3cbSLars Ellenberg 			drbd_send_barrier(tconn);
13234eb9b3cbSLars Ellenberg 		tconn->send.current_epoch_nr = epoch;
13244eb9b3cbSLars Ellenberg 	}
13254eb9b3cbSLars Ellenberg }
13264eb9b3cbSLars Ellenberg 
13278f7bed77SAndreas Gruenbacher int w_send_out_of_sync(struct drbd_work *w, int cancel)
132873a01a18SPhilipp Reisner {
132973a01a18SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
1330b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
1331b30ab791SAndreas Gruenbacher 	struct drbd_tconn *tconn = device->tconn;
133299920dc5SAndreas Gruenbacher 	int err;
133373a01a18SPhilipp Reisner 
133473a01a18SPhilipp Reisner 	if (unlikely(cancel)) {
13358554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
133699920dc5SAndreas Gruenbacher 		return 0;
133773a01a18SPhilipp Reisner 	}
133873a01a18SPhilipp Reisner 
1339b6dd1a89SLars Ellenberg 	/* this time, no tconn->send.current_epoch_writes++;
1340b6dd1a89SLars Ellenberg 	 * If it was sent, it was the closing barrier for the last
1341b6dd1a89SLars Ellenberg 	 * replicated epoch, before we went into AHEAD mode.
1342b6dd1a89SLars Ellenberg 	 * No more barriers will be sent, until we leave AHEAD mode again. */
13434eb9b3cbSLars Ellenberg 	maybe_send_barrier(tconn, req->epoch);
1344b6dd1a89SLars Ellenberg 
1345b30ab791SAndreas Gruenbacher 	err = drbd_send_out_of_sync(device, req);
13468554df1cSAndreas Gruenbacher 	req_mod(req, OOS_HANDED_TO_NETWORK);
134773a01a18SPhilipp Reisner 
134899920dc5SAndreas Gruenbacher 	return err;
134973a01a18SPhilipp Reisner }
135073a01a18SPhilipp Reisner 
1351b411b363SPhilipp Reisner /**
1352b411b363SPhilipp Reisner  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1353b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1354b411b363SPhilipp Reisner  * @w:		work object.
1355b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1356b411b363SPhilipp Reisner  */
135799920dc5SAndreas Gruenbacher int w_send_dblock(struct drbd_work *w, int cancel)
1358b411b363SPhilipp Reisner {
1359b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
1360b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
1361b30ab791SAndreas Gruenbacher 	struct drbd_tconn *tconn = device->tconn;
136299920dc5SAndreas Gruenbacher 	int err;
1363b411b363SPhilipp Reisner 
1364b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
13658554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
136699920dc5SAndreas Gruenbacher 		return 0;
1367b411b363SPhilipp Reisner 	}
1368b411b363SPhilipp Reisner 
13694eb9b3cbSLars Ellenberg 	re_init_if_first_write(tconn, req->epoch);
13704eb9b3cbSLars Ellenberg 	maybe_send_barrier(tconn, req->epoch);
1371b6dd1a89SLars Ellenberg 	tconn->send.current_epoch_writes++;
1372b6dd1a89SLars Ellenberg 
1373b30ab791SAndreas Gruenbacher 	err = drbd_send_dblock(device, req);
137499920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1375b411b363SPhilipp Reisner 
137699920dc5SAndreas Gruenbacher 	return err;
1377b411b363SPhilipp Reisner }
1378b411b363SPhilipp Reisner 
1379b411b363SPhilipp Reisner /**
1380b411b363SPhilipp Reisner  * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1381b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1382b411b363SPhilipp Reisner  * @w:		work object.
1383b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1384b411b363SPhilipp Reisner  */
138599920dc5SAndreas Gruenbacher int w_send_read_req(struct drbd_work *w, int cancel)
1386b411b363SPhilipp Reisner {
1387b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
1388b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
1389b30ab791SAndreas Gruenbacher 	struct drbd_tconn *tconn = device->tconn;
139099920dc5SAndreas Gruenbacher 	int err;
1391b411b363SPhilipp Reisner 
1392b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
13938554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
139499920dc5SAndreas Gruenbacher 		return 0;
1395b411b363SPhilipp Reisner 	}
1396b411b363SPhilipp Reisner 
1397b6dd1a89SLars Ellenberg 	/* Even read requests may close a write epoch,
1398b6dd1a89SLars Ellenberg 	 * if there was any yet. */
13994eb9b3cbSLars Ellenberg 	maybe_send_barrier(tconn, req->epoch);
1400b6dd1a89SLars Ellenberg 
1401b30ab791SAndreas Gruenbacher 	err = drbd_send_drequest(device, P_DATA_REQUEST, req->i.sector, req->i.size,
1402b411b363SPhilipp Reisner 				 (unsigned long)req);
1403b411b363SPhilipp Reisner 
140499920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1405b411b363SPhilipp Reisner 
140699920dc5SAndreas Gruenbacher 	return err;
1407b411b363SPhilipp Reisner }
1408b411b363SPhilipp Reisner 
140999920dc5SAndreas Gruenbacher int w_restart_disk_io(struct drbd_work *w, int cancel)
1410265be2d0SPhilipp Reisner {
1411265be2d0SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
1412b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
1413265be2d0SPhilipp Reisner 
14140778286aSPhilipp Reisner 	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
1415b30ab791SAndreas Gruenbacher 		drbd_al_begin_io(device, &req->i, false);
1416265be2d0SPhilipp Reisner 
1417265be2d0SPhilipp Reisner 	drbd_req_make_private_bio(req, req->master_bio);
1418b30ab791SAndreas Gruenbacher 	req->private_bio->bi_bdev = device->ldev->backing_bdev;
1419265be2d0SPhilipp Reisner 	generic_make_request(req->private_bio);
1420265be2d0SPhilipp Reisner 
142199920dc5SAndreas Gruenbacher 	return 0;
1422265be2d0SPhilipp Reisner }
1423265be2d0SPhilipp Reisner 
1424b30ab791SAndreas Gruenbacher static int _drbd_may_sync_now(struct drbd_device *device)
1425b411b363SPhilipp Reisner {
1426b30ab791SAndreas Gruenbacher 	struct drbd_device *odev = device;
142795f8efd0SAndreas Gruenbacher 	int resync_after;
1428b411b363SPhilipp Reisner 
1429b411b363SPhilipp Reisner 	while (1) {
1430a3f8f7dcSLars Ellenberg 		if (!odev->ldev || odev->state.disk == D_DISKLESS)
1431438c8374SPhilipp Reisner 			return 1;
1432daeda1ccSPhilipp Reisner 		rcu_read_lock();
143395f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1434daeda1ccSPhilipp Reisner 		rcu_read_unlock();
143595f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1436b411b363SPhilipp Reisner 			return 1;
1437b30ab791SAndreas Gruenbacher 		odev = minor_to_device(resync_after);
1438a3f8f7dcSLars Ellenberg 		if (!odev)
1439841ce241SAndreas Gruenbacher 			return 1;
1440b411b363SPhilipp Reisner 		if ((odev->state.conn >= C_SYNC_SOURCE &&
1441b411b363SPhilipp Reisner 		     odev->state.conn <= C_PAUSED_SYNC_T) ||
1442b411b363SPhilipp Reisner 		    odev->state.aftr_isp || odev->state.peer_isp ||
1443b411b363SPhilipp Reisner 		    odev->state.user_isp)
1444b411b363SPhilipp Reisner 			return 0;
1445b411b363SPhilipp Reisner 	}
1446b411b363SPhilipp Reisner }
1447b411b363SPhilipp Reisner 
1448b411b363SPhilipp Reisner /**
1449b411b363SPhilipp Reisner  * _drbd_pause_after() - Pause resync on all devices that may not resync now
1450b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1451b411b363SPhilipp Reisner  *
1452b411b363SPhilipp Reisner  * Called from process context only (admin command and after_state_ch).
1453b411b363SPhilipp Reisner  */
1454b30ab791SAndreas Gruenbacher static int _drbd_pause_after(struct drbd_device *device)
1455b411b363SPhilipp Reisner {
145654761697SAndreas Gruenbacher 	struct drbd_device *odev;
1457b411b363SPhilipp Reisner 	int i, rv = 0;
1458b411b363SPhilipp Reisner 
1459695d08faSPhilipp Reisner 	rcu_read_lock();
146081a5d60eSPhilipp Reisner 	idr_for_each_entry(&minors, odev, i) {
1461b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1462b411b363SPhilipp Reisner 			continue;
1463b411b363SPhilipp Reisner 		if (!_drbd_may_sync_now(odev))
1464b411b363SPhilipp Reisner 			rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1465b411b363SPhilipp Reisner 			       != SS_NOTHING_TO_DO);
1466b411b363SPhilipp Reisner 	}
1467695d08faSPhilipp Reisner 	rcu_read_unlock();
1468b411b363SPhilipp Reisner 
1469b411b363SPhilipp Reisner 	return rv;
1470b411b363SPhilipp Reisner }
1471b411b363SPhilipp Reisner 
1472b411b363SPhilipp Reisner /**
1473b411b363SPhilipp Reisner  * _drbd_resume_next() - Resume resync on all devices that may resync now
1474b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1475b411b363SPhilipp Reisner  *
1476b411b363SPhilipp Reisner  * Called from process context only (admin command and worker).
1477b411b363SPhilipp Reisner  */
1478b30ab791SAndreas Gruenbacher static int _drbd_resume_next(struct drbd_device *device)
1479b411b363SPhilipp Reisner {
148054761697SAndreas Gruenbacher 	struct drbd_device *odev;
1481b411b363SPhilipp Reisner 	int i, rv = 0;
1482b411b363SPhilipp Reisner 
1483695d08faSPhilipp Reisner 	rcu_read_lock();
148481a5d60eSPhilipp Reisner 	idr_for_each_entry(&minors, odev, i) {
1485b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1486b411b363SPhilipp Reisner 			continue;
1487b411b363SPhilipp Reisner 		if (odev->state.aftr_isp) {
1488b411b363SPhilipp Reisner 			if (_drbd_may_sync_now(odev))
1489b411b363SPhilipp Reisner 				rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1490b411b363SPhilipp Reisner 							CS_HARD, NULL)
1491b411b363SPhilipp Reisner 				       != SS_NOTHING_TO_DO) ;
1492b411b363SPhilipp Reisner 		}
1493b411b363SPhilipp Reisner 	}
1494695d08faSPhilipp Reisner 	rcu_read_unlock();
1495b411b363SPhilipp Reisner 	return rv;
1496b411b363SPhilipp Reisner }
1497b411b363SPhilipp Reisner 
1498b30ab791SAndreas Gruenbacher void resume_next_sg(struct drbd_device *device)
1499b411b363SPhilipp Reisner {
1500b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1501b30ab791SAndreas Gruenbacher 	_drbd_resume_next(device);
1502b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1503b411b363SPhilipp Reisner }
1504b411b363SPhilipp Reisner 
1505b30ab791SAndreas Gruenbacher void suspend_other_sg(struct drbd_device *device)
1506b411b363SPhilipp Reisner {
1507b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1508b30ab791SAndreas Gruenbacher 	_drbd_pause_after(device);
1509b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1510b411b363SPhilipp Reisner }
1511b411b363SPhilipp Reisner 
1512dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
1513b30ab791SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
1514b411b363SPhilipp Reisner {
151554761697SAndreas Gruenbacher 	struct drbd_device *odev;
151695f8efd0SAndreas Gruenbacher 	int resync_after;
1517b411b363SPhilipp Reisner 
1518b411b363SPhilipp Reisner 	if (o_minor == -1)
1519b411b363SPhilipp Reisner 		return NO_ERROR;
1520a3f8f7dcSLars Ellenberg 	if (o_minor < -1 || o_minor > MINORMASK)
152195f8efd0SAndreas Gruenbacher 		return ERR_RESYNC_AFTER;
1522b411b363SPhilipp Reisner 
1523b411b363SPhilipp Reisner 	/* check for loops */
1524b30ab791SAndreas Gruenbacher 	odev = minor_to_device(o_minor);
1525b411b363SPhilipp Reisner 	while (1) {
1526b30ab791SAndreas Gruenbacher 		if (odev == device)
152795f8efd0SAndreas Gruenbacher 			return ERR_RESYNC_AFTER_CYCLE;
1528b411b363SPhilipp Reisner 
1529a3f8f7dcSLars Ellenberg 		/* You are free to depend on diskless, non-existing,
1530a3f8f7dcSLars Ellenberg 		 * or not yet/no longer existing minors.
1531a3f8f7dcSLars Ellenberg 		 * We only reject dependency loops.
1532a3f8f7dcSLars Ellenberg 		 * We cannot follow the dependency chain beyond a detached or
1533a3f8f7dcSLars Ellenberg 		 * missing minor.
1534a3f8f7dcSLars Ellenberg 		 */
1535a3f8f7dcSLars Ellenberg 		if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1536a3f8f7dcSLars Ellenberg 			return NO_ERROR;
1537a3f8f7dcSLars Ellenberg 
1538daeda1ccSPhilipp Reisner 		rcu_read_lock();
153995f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1540daeda1ccSPhilipp Reisner 		rcu_read_unlock();
1541b411b363SPhilipp Reisner 		/* dependency chain ends here, no cycles. */
154295f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1543b411b363SPhilipp Reisner 			return NO_ERROR;
1544b411b363SPhilipp Reisner 
1545b411b363SPhilipp Reisner 		/* follow the dependency chain */
1546b30ab791SAndreas Gruenbacher 		odev = minor_to_device(resync_after);
1547b411b363SPhilipp Reisner 	}
1548b411b363SPhilipp Reisner }
1549b411b363SPhilipp Reisner 
1550dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
1551b30ab791SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_device *device)
1552b411b363SPhilipp Reisner {
1553b411b363SPhilipp Reisner 	int changes;
1554b411b363SPhilipp Reisner 
1555b411b363SPhilipp Reisner 	do {
1556b30ab791SAndreas Gruenbacher 		changes  = _drbd_pause_after(device);
1557b30ab791SAndreas Gruenbacher 		changes |= _drbd_resume_next(device);
1558b411b363SPhilipp Reisner 	} while (changes);
1559b411b363SPhilipp Reisner }
1560b411b363SPhilipp Reisner 
1561b30ab791SAndreas Gruenbacher void drbd_rs_controller_reset(struct drbd_device *device)
15629bd28d3cSLars Ellenberg {
1563813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
1564813472ceSPhilipp Reisner 
1565b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_sect_in, 0);
1566b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_sect_ev, 0);
1567b30ab791SAndreas Gruenbacher 	device->rs_in_flight = 0;
1568813472ceSPhilipp Reisner 
1569813472ceSPhilipp Reisner 	/* Updating the RCU protected object in place is necessary since
1570813472ceSPhilipp Reisner 	   this function gets called from atomic context.
1571813472ceSPhilipp Reisner 	   It is valid since all other updates also lead to an completely
1572813472ceSPhilipp Reisner 	   empty fifo */
1573813472ceSPhilipp Reisner 	rcu_read_lock();
1574b30ab791SAndreas Gruenbacher 	plan = rcu_dereference(device->rs_plan_s);
1575813472ceSPhilipp Reisner 	plan->total = 0;
1576813472ceSPhilipp Reisner 	fifo_set(plan, 0);
1577813472ceSPhilipp Reisner 	rcu_read_unlock();
15789bd28d3cSLars Ellenberg }
15799bd28d3cSLars Ellenberg 
15801f04af33SPhilipp Reisner void start_resync_timer_fn(unsigned long data)
15811f04af33SPhilipp Reisner {
1582b30ab791SAndreas Gruenbacher 	struct drbd_device *device = (struct drbd_device *) data;
15831f04af33SPhilipp Reisner 
1584b30ab791SAndreas Gruenbacher 	drbd_queue_work(&device->tconn->sender_work, &device->start_resync_work);
15851f04af33SPhilipp Reisner }
15861f04af33SPhilipp Reisner 
158799920dc5SAndreas Gruenbacher int w_start_resync(struct drbd_work *w, int cancel)
15881f04af33SPhilipp Reisner {
1589b30ab791SAndreas Gruenbacher 	struct drbd_device *device = w->device;
159000d56944SPhilipp Reisner 
1591b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
15921f04af33SPhilipp Reisner 		dev_warn(DEV, "w_start_resync later...\n");
1593b30ab791SAndreas Gruenbacher 		device->start_resync_timer.expires = jiffies + HZ/10;
1594b30ab791SAndreas Gruenbacher 		add_timer(&device->start_resync_timer);
159599920dc5SAndreas Gruenbacher 		return 0;
15961f04af33SPhilipp Reisner 	}
15971f04af33SPhilipp Reisner 
1598b30ab791SAndreas Gruenbacher 	drbd_start_resync(device, C_SYNC_SOURCE);
1599b30ab791SAndreas Gruenbacher 	clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
160099920dc5SAndreas Gruenbacher 	return 0;
16011f04af33SPhilipp Reisner }
16021f04af33SPhilipp Reisner 
1603b411b363SPhilipp Reisner /**
1604b411b363SPhilipp Reisner  * drbd_start_resync() - Start the resync process
1605b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1606b411b363SPhilipp Reisner  * @side:	Either C_SYNC_SOURCE or C_SYNC_TARGET
1607b411b363SPhilipp Reisner  *
1608b411b363SPhilipp Reisner  * This function might bring you directly into one of the
1609b411b363SPhilipp Reisner  * C_PAUSED_SYNC_* states.
1610b411b363SPhilipp Reisner  */
1611b30ab791SAndreas Gruenbacher void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1612b411b363SPhilipp Reisner {
1613b411b363SPhilipp Reisner 	union drbd_state ns;
1614b411b363SPhilipp Reisner 	int r;
1615b411b363SPhilipp Reisner 
1616b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
1617b411b363SPhilipp Reisner 		dev_err(DEV, "Resync already running!\n");
1618b411b363SPhilipp Reisner 		return;
1619b411b363SPhilipp Reisner 	}
1620b411b363SPhilipp Reisner 
1621b30ab791SAndreas Gruenbacher 	if (!test_bit(B_RS_H_DONE, &device->flags)) {
1622b411b363SPhilipp Reisner 		if (side == C_SYNC_TARGET) {
1623b411b363SPhilipp Reisner 			/* Since application IO was locked out during C_WF_BITMAP_T and
1624b411b363SPhilipp Reisner 			   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1625b411b363SPhilipp Reisner 			   we check that we might make the data inconsistent. */
1626b30ab791SAndreas Gruenbacher 			r = drbd_khelper(device, "before-resync-target");
1627b411b363SPhilipp Reisner 			r = (r >> 8) & 0xff;
1628b411b363SPhilipp Reisner 			if (r > 0) {
1629b411b363SPhilipp Reisner 				dev_info(DEV, "before-resync-target handler returned %d, "
1630b411b363SPhilipp Reisner 					 "dropping connection.\n", r);
1631b30ab791SAndreas Gruenbacher 				conn_request_state(device->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
1632b411b363SPhilipp Reisner 				return;
1633b411b363SPhilipp Reisner 			}
163409b9e797SPhilipp Reisner 		} else /* C_SYNC_SOURCE */ {
1635b30ab791SAndreas Gruenbacher 			r = drbd_khelper(device, "before-resync-source");
163609b9e797SPhilipp Reisner 			r = (r >> 8) & 0xff;
163709b9e797SPhilipp Reisner 			if (r > 0) {
163809b9e797SPhilipp Reisner 				if (r == 3) {
163909b9e797SPhilipp Reisner 					dev_info(DEV, "before-resync-source handler returned %d, "
164009b9e797SPhilipp Reisner 						 "ignoring. Old userland tools?", r);
164109b9e797SPhilipp Reisner 				} else {
164209b9e797SPhilipp Reisner 					dev_info(DEV, "before-resync-source handler returned %d, "
164309b9e797SPhilipp Reisner 						 "dropping connection.\n", r);
1644b30ab791SAndreas Gruenbacher 					conn_request_state(device->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
164509b9e797SPhilipp Reisner 					return;
164609b9e797SPhilipp Reisner 				}
164709b9e797SPhilipp Reisner 			}
1648b411b363SPhilipp Reisner 		}
1649e64a3294SPhilipp Reisner 	}
1650b411b363SPhilipp Reisner 
1651b30ab791SAndreas Gruenbacher 	if (current == device->tconn->worker.task) {
1652dad20554SPhilipp Reisner 		/* The worker should not sleep waiting for state_mutex,
1653e64a3294SPhilipp Reisner 		   that can take long */
1654b30ab791SAndreas Gruenbacher 		if (!mutex_trylock(device->state_mutex)) {
1655b30ab791SAndreas Gruenbacher 			set_bit(B_RS_H_DONE, &device->flags);
1656b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ/5;
1657b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
1658e64a3294SPhilipp Reisner 			return;
1659e64a3294SPhilipp Reisner 		}
1660e64a3294SPhilipp Reisner 	} else {
1661b30ab791SAndreas Gruenbacher 		mutex_lock(device->state_mutex);
1662e64a3294SPhilipp Reisner 	}
1663b30ab791SAndreas Gruenbacher 	clear_bit(B_RS_H_DONE, &device->flags);
1664b411b363SPhilipp Reisner 
16650cfac5ddSPhilipp Reisner 	write_lock_irq(&global_state_lock);
1666a700471bSPhilipp Reisner 	/* Did some connection breakage or IO error race with us? */
1667b30ab791SAndreas Gruenbacher 	if (device->state.conn < C_CONNECTED
1668b30ab791SAndreas Gruenbacher 	|| !get_ldev_if_state(device, D_NEGOTIATING)) {
16690cfac5ddSPhilipp Reisner 		write_unlock_irq(&global_state_lock);
1670b30ab791SAndreas Gruenbacher 		mutex_unlock(device->state_mutex);
1671b411b363SPhilipp Reisner 		return;
1672b411b363SPhilipp Reisner 	}
1673b411b363SPhilipp Reisner 
1674b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
1675b411b363SPhilipp Reisner 
1676b30ab791SAndreas Gruenbacher 	ns.aftr_isp = !_drbd_may_sync_now(device);
1677b411b363SPhilipp Reisner 
1678b411b363SPhilipp Reisner 	ns.conn = side;
1679b411b363SPhilipp Reisner 
1680b411b363SPhilipp Reisner 	if (side == C_SYNC_TARGET)
1681b411b363SPhilipp Reisner 		ns.disk = D_INCONSISTENT;
1682b411b363SPhilipp Reisner 	else /* side == C_SYNC_SOURCE */
1683b411b363SPhilipp Reisner 		ns.pdsk = D_INCONSISTENT;
1684b411b363SPhilipp Reisner 
1685b30ab791SAndreas Gruenbacher 	r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1686b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
1687b411b363SPhilipp Reisner 
1688b411b363SPhilipp Reisner 	if (ns.conn < C_CONNECTED)
1689b411b363SPhilipp Reisner 		r = SS_UNKNOWN_ERROR;
1690b411b363SPhilipp Reisner 
1691b411b363SPhilipp Reisner 	if (r == SS_SUCCESS) {
1692b30ab791SAndreas Gruenbacher 		unsigned long tw = drbd_bm_total_weight(device);
16931d7734a0SLars Ellenberg 		unsigned long now = jiffies;
16941d7734a0SLars Ellenberg 		int i;
16951d7734a0SLars Ellenberg 
1696b30ab791SAndreas Gruenbacher 		device->rs_failed    = 0;
1697b30ab791SAndreas Gruenbacher 		device->rs_paused    = 0;
1698b30ab791SAndreas Gruenbacher 		device->rs_same_csum = 0;
1699b30ab791SAndreas Gruenbacher 		device->rs_last_events = 0;
1700b30ab791SAndreas Gruenbacher 		device->rs_last_sect_ev = 0;
1701b30ab791SAndreas Gruenbacher 		device->rs_total     = tw;
1702b30ab791SAndreas Gruenbacher 		device->rs_start     = now;
17031d7734a0SLars Ellenberg 		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1704b30ab791SAndreas Gruenbacher 			device->rs_mark_left[i] = tw;
1705b30ab791SAndreas Gruenbacher 			device->rs_mark_time[i] = now;
17061d7734a0SLars Ellenberg 		}
1707b30ab791SAndreas Gruenbacher 		_drbd_pause_after(device);
1708b411b363SPhilipp Reisner 	}
1709b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
17105a22db89SLars Ellenberg 
17116c922ed5SLars Ellenberg 	if (r == SS_SUCCESS) {
1712328e0f12SPhilipp Reisner 		/* reset rs_last_bcast when a resync or verify is started,
1713328e0f12SPhilipp Reisner 		 * to deal with potential jiffies wrap. */
1714b30ab791SAndreas Gruenbacher 		device->rs_last_bcast = jiffies - HZ;
1715328e0f12SPhilipp Reisner 
17166c922ed5SLars Ellenberg 		dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
17176c922ed5SLars Ellenberg 		     drbd_conn_str(ns.conn),
1718b30ab791SAndreas Gruenbacher 		     (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1719b30ab791SAndreas Gruenbacher 		     (unsigned long) device->rs_total);
17205a22db89SLars Ellenberg 		if (side == C_SYNC_TARGET)
1721b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = 0;
17225a22db89SLars Ellenberg 
17235a22db89SLars Ellenberg 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
17245a22db89SLars Ellenberg 		 * with w_send_oos, or the sync target will get confused as to
17255a22db89SLars Ellenberg 		 * how much bits to resync.  We cannot do that always, because for an
17265a22db89SLars Ellenberg 		 * empty resync and protocol < 95, we need to do it here, as we call
17275a22db89SLars Ellenberg 		 * drbd_resync_finished from here in that case.
17285a22db89SLars Ellenberg 		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
17295a22db89SLars Ellenberg 		 * and from after_state_ch otherwise. */
1730b30ab791SAndreas Gruenbacher 		if (side == C_SYNC_SOURCE && device->tconn->agreed_pro_version < 96)
1731b30ab791SAndreas Gruenbacher 			drbd_gen_and_send_sync_uuid(device);
1732b411b363SPhilipp Reisner 
1733b30ab791SAndreas Gruenbacher 		if (device->tconn->agreed_pro_version < 95 && device->rs_total == 0) {
1734af85e8e8SLars Ellenberg 			/* This still has a race (about when exactly the peers
1735af85e8e8SLars Ellenberg 			 * detect connection loss) that can lead to a full sync
1736af85e8e8SLars Ellenberg 			 * on next handshake. In 8.3.9 we fixed this with explicit
1737af85e8e8SLars Ellenberg 			 * resync-finished notifications, but the fix
1738af85e8e8SLars Ellenberg 			 * introduces a protocol change.  Sleeping for some
1739af85e8e8SLars Ellenberg 			 * time longer than the ping interval + timeout on the
1740af85e8e8SLars Ellenberg 			 * SyncSource, to give the SyncTarget the chance to
1741af85e8e8SLars Ellenberg 			 * detect connection loss, then waiting for a ping
1742af85e8e8SLars Ellenberg 			 * response (implicit in drbd_resync_finished) reduces
1743af85e8e8SLars Ellenberg 			 * the race considerably, but does not solve it. */
174444ed167dSPhilipp Reisner 			if (side == C_SYNC_SOURCE) {
174544ed167dSPhilipp Reisner 				struct net_conf *nc;
174644ed167dSPhilipp Reisner 				int timeo;
174744ed167dSPhilipp Reisner 
174844ed167dSPhilipp Reisner 				rcu_read_lock();
1749b30ab791SAndreas Gruenbacher 				nc = rcu_dereference(device->tconn->net_conf);
175044ed167dSPhilipp Reisner 				timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
175144ed167dSPhilipp Reisner 				rcu_read_unlock();
175244ed167dSPhilipp Reisner 				schedule_timeout_interruptible(timeo);
175344ed167dSPhilipp Reisner 			}
1754b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
1755b411b363SPhilipp Reisner 		}
1756b411b363SPhilipp Reisner 
1757b30ab791SAndreas Gruenbacher 		drbd_rs_controller_reset(device);
1758b30ab791SAndreas Gruenbacher 		/* ns.conn may already be != device->state.conn,
1759b411b363SPhilipp Reisner 		 * we may have been paused in between, or become paused until
1760b411b363SPhilipp Reisner 		 * the timer triggers.
1761b411b363SPhilipp Reisner 		 * No matter, that is handled in resync_timer_fn() */
1762b411b363SPhilipp Reisner 		if (ns.conn == C_SYNC_TARGET)
1763b30ab791SAndreas Gruenbacher 			mod_timer(&device->resync_timer, jiffies);
1764b411b363SPhilipp Reisner 
1765b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
1766b411b363SPhilipp Reisner 	}
1767b30ab791SAndreas Gruenbacher 	put_ldev(device);
1768b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
1769b411b363SPhilipp Reisner }
1770b411b363SPhilipp Reisner 
1771b6dd1a89SLars Ellenberg /* If the resource already closed the current epoch, but we did not
1772b6dd1a89SLars Ellenberg  * (because we have not yet seen new requests), we should send the
1773b6dd1a89SLars Ellenberg  * corresponding barrier now.  Must be checked within the same spinlock
1774b6dd1a89SLars Ellenberg  * that is used to check for new requests. */
1775a186e478SRashika Kheria static bool need_to_send_barrier(struct drbd_tconn *connection)
1776b6dd1a89SLars Ellenberg {
1777b6dd1a89SLars Ellenberg 	if (!connection->send.seen_any_write_yet)
1778b6dd1a89SLars Ellenberg 		return false;
1779b6dd1a89SLars Ellenberg 
1780b6dd1a89SLars Ellenberg 	/* Skip barriers that do not contain any writes.
1781b6dd1a89SLars Ellenberg 	 * This may happen during AHEAD mode. */
1782b6dd1a89SLars Ellenberg 	if (!connection->send.current_epoch_writes)
1783b6dd1a89SLars Ellenberg 		return false;
1784b6dd1a89SLars Ellenberg 
1785b6dd1a89SLars Ellenberg 	/* ->req_lock is held when requests are queued on
1786b6dd1a89SLars Ellenberg 	 * connection->sender_work, and put into ->transfer_log.
1787b6dd1a89SLars Ellenberg 	 * It is also held when ->current_tle_nr is increased.
1788b6dd1a89SLars Ellenberg 	 * So either there are already new requests queued,
1789b6dd1a89SLars Ellenberg 	 * and corresponding barriers will be send there.
1790b6dd1a89SLars Ellenberg 	 * Or nothing new is queued yet, so the difference will be 1.
1791b6dd1a89SLars Ellenberg 	 */
1792b6dd1a89SLars Ellenberg 	if (atomic_read(&connection->current_tle_nr) !=
1793b6dd1a89SLars Ellenberg 	    connection->send.current_epoch_nr + 1)
1794b6dd1a89SLars Ellenberg 		return false;
1795b6dd1a89SLars Ellenberg 
1796b6dd1a89SLars Ellenberg 	return true;
1797b6dd1a89SLars Ellenberg }
1798b6dd1a89SLars Ellenberg 
1799a186e478SRashika Kheria static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
18008c0785a5SLars Ellenberg {
18018c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
18028c0785a5SLars Ellenberg 	list_splice_init(&queue->q, work_list);
18038c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
18048c0785a5SLars Ellenberg 	return !list_empty(work_list);
18058c0785a5SLars Ellenberg }
18068c0785a5SLars Ellenberg 
1807a186e478SRashika Kheria static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
18088c0785a5SLars Ellenberg {
18098c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
18108c0785a5SLars Ellenberg 	if (!list_empty(&queue->q))
18118c0785a5SLars Ellenberg 		list_move(queue->q.next, work_list);
18128c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
18138c0785a5SLars Ellenberg 	return !list_empty(work_list);
18148c0785a5SLars Ellenberg }
18158c0785a5SLars Ellenberg 
1816a186e478SRashika Kheria static void wait_for_work(struct drbd_tconn *connection, struct list_head *work_list)
1817b6dd1a89SLars Ellenberg {
1818b6dd1a89SLars Ellenberg 	DEFINE_WAIT(wait);
1819b6dd1a89SLars Ellenberg 	struct net_conf *nc;
1820b6dd1a89SLars Ellenberg 	int uncork, cork;
1821b6dd1a89SLars Ellenberg 
1822b6dd1a89SLars Ellenberg 	dequeue_work_item(&connection->sender_work, work_list);
1823b6dd1a89SLars Ellenberg 	if (!list_empty(work_list))
1824b6dd1a89SLars Ellenberg 		return;
1825b6dd1a89SLars Ellenberg 
1826b6dd1a89SLars Ellenberg 	/* Still nothing to do?
1827b6dd1a89SLars Ellenberg 	 * Maybe we still need to close the current epoch,
1828b6dd1a89SLars Ellenberg 	 * even if no new requests are queued yet.
1829b6dd1a89SLars Ellenberg 	 *
1830b6dd1a89SLars Ellenberg 	 * Also, poke TCP, just in case.
1831b6dd1a89SLars Ellenberg 	 * Then wait for new work (or signal). */
1832b6dd1a89SLars Ellenberg 	rcu_read_lock();
1833b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
1834b6dd1a89SLars Ellenberg 	uncork = nc ? nc->tcp_cork : 0;
1835b6dd1a89SLars Ellenberg 	rcu_read_unlock();
1836b6dd1a89SLars Ellenberg 	if (uncork) {
1837b6dd1a89SLars Ellenberg 		mutex_lock(&connection->data.mutex);
1838b6dd1a89SLars Ellenberg 		if (connection->data.socket)
1839b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
1840b6dd1a89SLars Ellenberg 		mutex_unlock(&connection->data.mutex);
1841b6dd1a89SLars Ellenberg 	}
1842b6dd1a89SLars Ellenberg 
1843b6dd1a89SLars Ellenberg 	for (;;) {
1844b6dd1a89SLars Ellenberg 		int send_barrier;
1845b6dd1a89SLars Ellenberg 		prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
1846b6dd1a89SLars Ellenberg 		spin_lock_irq(&connection->req_lock);
1847b6dd1a89SLars Ellenberg 		spin_lock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
1848bc317a9eSLars Ellenberg 		/* dequeue single item only,
1849bc317a9eSLars Ellenberg 		 * we still use drbd_queue_work_front() in some places */
1850bc317a9eSLars Ellenberg 		if (!list_empty(&connection->sender_work.q))
1851bc317a9eSLars Ellenberg 			list_move(connection->sender_work.q.next, work_list);
1852b6dd1a89SLars Ellenberg 		spin_unlock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
1853b6dd1a89SLars Ellenberg 		if (!list_empty(work_list) || signal_pending(current)) {
1854b6dd1a89SLars Ellenberg 			spin_unlock_irq(&connection->req_lock);
1855b6dd1a89SLars Ellenberg 			break;
1856b6dd1a89SLars Ellenberg 		}
1857b6dd1a89SLars Ellenberg 		send_barrier = need_to_send_barrier(connection);
1858b6dd1a89SLars Ellenberg 		spin_unlock_irq(&connection->req_lock);
1859b6dd1a89SLars Ellenberg 		if (send_barrier) {
1860b6dd1a89SLars Ellenberg 			drbd_send_barrier(connection);
1861b6dd1a89SLars Ellenberg 			connection->send.current_epoch_nr++;
1862b6dd1a89SLars Ellenberg 		}
1863b6dd1a89SLars Ellenberg 		schedule();
1864b6dd1a89SLars Ellenberg 		/* may be woken up for other things but new work, too,
1865b6dd1a89SLars Ellenberg 		 * e.g. if the current epoch got closed.
1866b6dd1a89SLars Ellenberg 		 * In which case we send the barrier above. */
1867b6dd1a89SLars Ellenberg 	}
1868b6dd1a89SLars Ellenberg 	finish_wait(&connection->sender_work.q_wait, &wait);
1869b6dd1a89SLars Ellenberg 
1870b6dd1a89SLars Ellenberg 	/* someone may have changed the config while we have been waiting above. */
1871b6dd1a89SLars Ellenberg 	rcu_read_lock();
1872b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
1873b6dd1a89SLars Ellenberg 	cork = nc ? nc->tcp_cork : 0;
1874b6dd1a89SLars Ellenberg 	rcu_read_unlock();
1875b6dd1a89SLars Ellenberg 	mutex_lock(&connection->data.mutex);
1876b6dd1a89SLars Ellenberg 	if (connection->data.socket) {
1877b6dd1a89SLars Ellenberg 		if (cork)
1878b6dd1a89SLars Ellenberg 			drbd_tcp_cork(connection->data.socket);
1879b6dd1a89SLars Ellenberg 		else if (!uncork)
1880b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
1881b6dd1a89SLars Ellenberg 	}
1882b6dd1a89SLars Ellenberg 	mutex_unlock(&connection->data.mutex);
1883b6dd1a89SLars Ellenberg }
1884b6dd1a89SLars Ellenberg 
1885b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi)
1886b411b363SPhilipp Reisner {
1887392c8801SPhilipp Reisner 	struct drbd_tconn *tconn = thi->tconn;
1888b411b363SPhilipp Reisner 	struct drbd_work *w = NULL;
1889b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
1890b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
18918c0785a5SLars Ellenberg 	int vnr;
1892b411b363SPhilipp Reisner 
1893e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
189480822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
1895b411b363SPhilipp Reisner 
18968c0785a5SLars Ellenberg 		/* as long as we use drbd_queue_work_front(),
18978c0785a5SLars Ellenberg 		 * we may only dequeue single work items here, not batches. */
18988c0785a5SLars Ellenberg 		if (list_empty(&work_list))
1899b6dd1a89SLars Ellenberg 			wait_for_work(tconn, &work_list);
1900b411b363SPhilipp Reisner 
19018c0785a5SLars Ellenberg 		if (signal_pending(current)) {
1902b411b363SPhilipp Reisner 			flush_signals(current);
190319393e10SPhilipp Reisner 			if (get_t_state(thi) == RUNNING) {
190419393e10SPhilipp Reisner 				conn_warn(tconn, "Worker got an unexpected signal\n");
1905b411b363SPhilipp Reisner 				continue;
190619393e10SPhilipp Reisner 			}
1907b411b363SPhilipp Reisner 			break;
1908b411b363SPhilipp Reisner 		}
1909b411b363SPhilipp Reisner 
1910e77a0a5cSAndreas Gruenbacher 		if (get_t_state(thi) != RUNNING)
1911b411b363SPhilipp Reisner 			break;
1912b411b363SPhilipp Reisner 
19138c0785a5SLars Ellenberg 		while (!list_empty(&work_list)) {
19148c0785a5SLars Ellenberg 			w = list_first_entry(&work_list, struct drbd_work, list);
1915b411b363SPhilipp Reisner 			list_del_init(&w->list);
19168c0785a5SLars Ellenberg 			if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS) == 0)
19178c0785a5SLars Ellenberg 				continue;
1918bbeb641cSPhilipp Reisner 			if (tconn->cstate >= C_WF_REPORT_PARAMS)
1919bbeb641cSPhilipp Reisner 				conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
1920b411b363SPhilipp Reisner 		}
1921b411b363SPhilipp Reisner 	}
1922b411b363SPhilipp Reisner 
19238c0785a5SLars Ellenberg 	do {
1924b411b363SPhilipp Reisner 		while (!list_empty(&work_list)) {
19258c0785a5SLars Ellenberg 			w = list_first_entry(&work_list, struct drbd_work, list);
1926b411b363SPhilipp Reisner 			list_del_init(&w->list);
192700d56944SPhilipp Reisner 			w->cb(w, 1);
1928b411b363SPhilipp Reisner 		}
1929d5b27b01SLars Ellenberg 		dequeue_work_batch(&tconn->sender_work, &work_list);
19308c0785a5SLars Ellenberg 	} while (!list_empty(&work_list));
1931b411b363SPhilipp Reisner 
1932c141ebdaSPhilipp Reisner 	rcu_read_lock();
1933b30ab791SAndreas Gruenbacher 	idr_for_each_entry(&tconn->volumes, device, vnr) {
1934b30ab791SAndreas Gruenbacher 		D_ASSERT(device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
1935b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
1936c141ebdaSPhilipp Reisner 		rcu_read_unlock();
1937b30ab791SAndreas Gruenbacher 		drbd_device_cleanup(device);
1938b30ab791SAndreas Gruenbacher 		kref_put(&device->kref, &drbd_minor_destroy);
1939c141ebdaSPhilipp Reisner 		rcu_read_lock();
19400e29d163SPhilipp Reisner 	}
1941c141ebdaSPhilipp Reisner 	rcu_read_unlock();
1942b411b363SPhilipp Reisner 
1943b411b363SPhilipp Reisner 	return 0;
1944b411b363SPhilipp Reisner }
1945