xref: /openbmc/linux/drivers/block/drbd/drbd_worker.c (revision 21ae5d7f)
1b411b363SPhilipp Reisner /*
2b411b363SPhilipp Reisner    drbd_worker.c
3b411b363SPhilipp Reisner 
4b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5b411b363SPhilipp Reisner 
6b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9b411b363SPhilipp Reisner 
10b411b363SPhilipp Reisner    drbd is free software; you can redistribute it and/or modify
11b411b363SPhilipp Reisner    it under the terms of the GNU General Public License as published by
12b411b363SPhilipp Reisner    the Free Software Foundation; either version 2, or (at your option)
13b411b363SPhilipp Reisner    any later version.
14b411b363SPhilipp Reisner 
15b411b363SPhilipp Reisner    drbd is distributed in the hope that it will be useful,
16b411b363SPhilipp Reisner    but WITHOUT ANY WARRANTY; without even the implied warranty of
17b411b363SPhilipp Reisner    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18b411b363SPhilipp Reisner    GNU General Public License for more details.
19b411b363SPhilipp Reisner 
20b411b363SPhilipp Reisner    You should have received a copy of the GNU General Public License
21b411b363SPhilipp Reisner    along with drbd; see the file COPYING.  If not, write to
22b411b363SPhilipp Reisner    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23b411b363SPhilipp Reisner 
24b411b363SPhilipp Reisner */
25b411b363SPhilipp Reisner 
26b411b363SPhilipp Reisner #include <linux/module.h>
27b411b363SPhilipp Reisner #include <linux/drbd.h>
28b411b363SPhilipp Reisner #include <linux/sched.h>
29b411b363SPhilipp Reisner #include <linux/wait.h>
30b411b363SPhilipp Reisner #include <linux/mm.h>
31b411b363SPhilipp Reisner #include <linux/memcontrol.h>
32b411b363SPhilipp Reisner #include <linux/mm_inline.h>
33b411b363SPhilipp Reisner #include <linux/slab.h>
34b411b363SPhilipp Reisner #include <linux/random.h>
35b411b363SPhilipp Reisner #include <linux/string.h>
36b411b363SPhilipp Reisner #include <linux/scatterlist.h>
37b411b363SPhilipp Reisner 
38b411b363SPhilipp Reisner #include "drbd_int.h"
39a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
40b411b363SPhilipp Reisner #include "drbd_req.h"
41b411b363SPhilipp Reisner 
42d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *, int);
43d448a2e1SAndreas Gruenbacher static int make_resync_request(struct drbd_device *, int);
44b411b363SPhilipp Reisner 
45c5a91619SAndreas Gruenbacher /* endio handlers:
46c5a91619SAndreas Gruenbacher  *   drbd_md_io_complete (defined here)
47fcefa62eSAndreas Gruenbacher  *   drbd_request_endio (defined here)
48fcefa62eSAndreas Gruenbacher  *   drbd_peer_request_endio (defined here)
49c5a91619SAndreas Gruenbacher  *   bm_async_io_complete (defined in drbd_bitmap.c)
50c5a91619SAndreas Gruenbacher  *
51b411b363SPhilipp Reisner  * For all these callbacks, note the following:
52b411b363SPhilipp Reisner  * The callbacks will be called in irq context by the IDE drivers,
53b411b363SPhilipp Reisner  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54b411b363SPhilipp Reisner  * Try to get the locking right :)
55b411b363SPhilipp Reisner  *
56b411b363SPhilipp Reisner  */
57b411b363SPhilipp Reisner 
58b411b363SPhilipp Reisner 
59b411b363SPhilipp Reisner /* About the global_state_lock
60b411b363SPhilipp Reisner    Each state transition on an device holds a read lock. In case we have
6195f8efd0SAndreas Gruenbacher    to evaluate the resync after dependencies, we grab a write lock, because
62b411b363SPhilipp Reisner    we need stable states on all devices for that.  */
63b411b363SPhilipp Reisner rwlock_t global_state_lock;
64b411b363SPhilipp Reisner 
65b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO
66b411b363SPhilipp Reisner  * submitted by drbd_md_sync_page_io()
67b411b363SPhilipp Reisner  */
68b411b363SPhilipp Reisner void drbd_md_io_complete(struct bio *bio, int error)
69b411b363SPhilipp Reisner {
70b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
71b411b363SPhilipp Reisner 
72e37d2438SLars Ellenberg 	device = bio->bi_private;
73e37d2438SLars Ellenberg 	device->md_io.error = error;
74b411b363SPhilipp Reisner 
750cfac5ddSPhilipp Reisner 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
760cfac5ddSPhilipp Reisner 	 * to timeout on the lower level device, and eventually detach from it.
770cfac5ddSPhilipp Reisner 	 * If this io completion runs after that timeout expired, this
780cfac5ddSPhilipp Reisner 	 * drbd_md_put_buffer() may allow us to finally try and re-attach.
790cfac5ddSPhilipp Reisner 	 * During normal operation, this only puts that extra reference
800cfac5ddSPhilipp Reisner 	 * down to 1 again.
810cfac5ddSPhilipp Reisner 	 * Make sure we first drop the reference, and only then signal
820cfac5ddSPhilipp Reisner 	 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
830cfac5ddSPhilipp Reisner 	 * next drbd_md_sync_page_io(), that we trigger the
84b30ab791SAndreas Gruenbacher 	 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
850cfac5ddSPhilipp Reisner 	 */
86b30ab791SAndreas Gruenbacher 	drbd_md_put_buffer(device);
87e37d2438SLars Ellenberg 	device->md_io.done = 1;
88b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
89cdfda633SPhilipp Reisner 	bio_put(bio);
90b30ab791SAndreas Gruenbacher 	if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
91b30ab791SAndreas Gruenbacher 		put_ldev(device);
92b411b363SPhilipp Reisner }
93b411b363SPhilipp Reisner 
94b411b363SPhilipp Reisner /* reads on behalf of the partner,
95b411b363SPhilipp Reisner  * "submitted" by the receiver
96b411b363SPhilipp Reisner  */
97a186e478SRashika Kheria static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
98b411b363SPhilipp Reisner {
99b411b363SPhilipp Reisner 	unsigned long flags = 0;
1006780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1016780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
102b411b363SPhilipp Reisner 
1030500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
104b30ab791SAndreas Gruenbacher 	device->read_cnt += peer_req->i.size >> 9;
105a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
106b30ab791SAndreas Gruenbacher 	if (list_empty(&device->read_ee))
107b30ab791SAndreas Gruenbacher 		wake_up(&device->ee_wait);
108db830c46SAndreas Gruenbacher 	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
109b30ab791SAndreas Gruenbacher 		__drbd_chk_io_error(device, DRBD_READ_ERROR);
1100500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
111b411b363SPhilipp Reisner 
1126780139cSAndreas Gruenbacher 	drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
113b30ab791SAndreas Gruenbacher 	put_ldev(device);
114b411b363SPhilipp Reisner }
115b411b363SPhilipp Reisner 
116b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes,
11745bb912bSLars Ellenberg  * "submitted" by the receiver, final stage.  */
118a0fb3c47SLars Ellenberg void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
119b411b363SPhilipp Reisner {
120b411b363SPhilipp Reisner 	unsigned long flags = 0;
1216780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1226780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
123181286adSLars Ellenberg 	struct drbd_interval i;
124b411b363SPhilipp Reisner 	int do_wake;
125579b57edSAndreas Gruenbacher 	u64 block_id;
126b411b363SPhilipp Reisner 	int do_al_complete_io;
127b411b363SPhilipp Reisner 
128db830c46SAndreas Gruenbacher 	/* after we moved peer_req to done_ee,
129b411b363SPhilipp Reisner 	 * we may no longer access it,
130b411b363SPhilipp Reisner 	 * it may be freed/reused already!
131b411b363SPhilipp Reisner 	 * (as soon as we release the req_lock) */
132181286adSLars Ellenberg 	i = peer_req->i;
133db830c46SAndreas Gruenbacher 	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
134db830c46SAndreas Gruenbacher 	block_id = peer_req->block_id;
13521ae5d7fSLars Ellenberg 	peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
136b411b363SPhilipp Reisner 
1370500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
138b30ab791SAndreas Gruenbacher 	device->writ_cnt += peer_req->i.size >> 9;
139a8cd15baSAndreas Gruenbacher 	list_move_tail(&peer_req->w.list, &device->done_ee);
140b411b363SPhilipp Reisner 
141bb3bfe96SAndreas Gruenbacher 	/*
1425e472264SAndreas Gruenbacher 	 * Do not remove from the write_requests tree here: we did not send the
143bb3bfe96SAndreas Gruenbacher 	 * Ack yet and did not wake possibly waiting conflicting requests.
144bb3bfe96SAndreas Gruenbacher 	 * Removed from the tree from "drbd_process_done_ee" within the
14584b8c06bSAndreas Gruenbacher 	 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
146bb3bfe96SAndreas Gruenbacher 	 * _drbd_clear_done_ee.
147bb3bfe96SAndreas Gruenbacher 	 */
148b411b363SPhilipp Reisner 
149b30ab791SAndreas Gruenbacher 	do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
150b411b363SPhilipp Reisner 
151a0fb3c47SLars Ellenberg 	/* FIXME do we want to detach for failed REQ_DISCARD?
152a0fb3c47SLars Ellenberg 	 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
153a0fb3c47SLars Ellenberg 	if (peer_req->flags & EE_WAS_ERROR)
154b30ab791SAndreas Gruenbacher 		__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
1550500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
156b411b363SPhilipp Reisner 
157579b57edSAndreas Gruenbacher 	if (block_id == ID_SYNCER)
158b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, i.sector);
159b411b363SPhilipp Reisner 
160b411b363SPhilipp Reisner 	if (do_wake)
161b30ab791SAndreas Gruenbacher 		wake_up(&device->ee_wait);
162b411b363SPhilipp Reisner 
163b411b363SPhilipp Reisner 	if (do_al_complete_io)
164b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &i);
165b411b363SPhilipp Reisner 
1666780139cSAndreas Gruenbacher 	wake_asender(peer_device->connection);
167b30ab791SAndreas Gruenbacher 	put_ldev(device);
16845bb912bSLars Ellenberg }
169b411b363SPhilipp Reisner 
17045bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes,
17145bb912bSLars Ellenberg  * "submitted" by the receiver.
17245bb912bSLars Ellenberg  */
173fcefa62eSAndreas Gruenbacher void drbd_peer_request_endio(struct bio *bio, int error)
17445bb912bSLars Ellenberg {
175db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = bio->bi_private;
176a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_req->peer_device->device;
17745bb912bSLars Ellenberg 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
17845bb912bSLars Ellenberg 	int is_write = bio_data_dir(bio) == WRITE;
179a0fb3c47SLars Ellenberg 	int is_discard = !!(bio->bi_rw & REQ_DISCARD);
18045bb912bSLars Ellenberg 
18107194272SLars Ellenberg 	if (error && __ratelimit(&drbd_ratelimit_state))
182d0180171SAndreas Gruenbacher 		drbd_warn(device, "%s: error=%d s=%llus\n",
183a0fb3c47SLars Ellenberg 				is_write ? (is_discard ? "discard" : "write")
184a0fb3c47SLars Ellenberg 					: "read", error,
185db830c46SAndreas Gruenbacher 				(unsigned long long)peer_req->i.sector);
18645bb912bSLars Ellenberg 	if (!error && !uptodate) {
18707194272SLars Ellenberg 		if (__ratelimit(&drbd_ratelimit_state))
188d0180171SAndreas Gruenbacher 			drbd_warn(device, "%s: setting error to -EIO s=%llus\n",
18945bb912bSLars Ellenberg 					is_write ? "write" : "read",
190db830c46SAndreas Gruenbacher 					(unsigned long long)peer_req->i.sector);
19145bb912bSLars Ellenberg 		/* strange behavior of some lower level drivers...
19245bb912bSLars Ellenberg 		 * fail the request by clearing the uptodate flag,
19345bb912bSLars Ellenberg 		 * but do not return any error?! */
19445bb912bSLars Ellenberg 		error = -EIO;
19545bb912bSLars Ellenberg 	}
19645bb912bSLars Ellenberg 
19745bb912bSLars Ellenberg 	if (error)
198db830c46SAndreas Gruenbacher 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
19945bb912bSLars Ellenberg 
20045bb912bSLars Ellenberg 	bio_put(bio); /* no need for the bio anymore */
201db830c46SAndreas Gruenbacher 	if (atomic_dec_and_test(&peer_req->pending_bios)) {
20245bb912bSLars Ellenberg 		if (is_write)
203db830c46SAndreas Gruenbacher 			drbd_endio_write_sec_final(peer_req);
20445bb912bSLars Ellenberg 		else
205db830c46SAndreas Gruenbacher 			drbd_endio_read_sec_final(peer_req);
20645bb912bSLars Ellenberg 	}
207b411b363SPhilipp Reisner }
208b411b363SPhilipp Reisner 
209b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
210b411b363SPhilipp Reisner  */
211fcefa62eSAndreas Gruenbacher void drbd_request_endio(struct bio *bio, int error)
212b411b363SPhilipp Reisner {
213a115413dSLars Ellenberg 	unsigned long flags;
214b411b363SPhilipp Reisner 	struct drbd_request *req = bio->bi_private;
21584b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
216a115413dSLars Ellenberg 	struct bio_and_error m;
217b411b363SPhilipp Reisner 	enum drbd_req_event what;
218b411b363SPhilipp Reisner 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
219b411b363SPhilipp Reisner 
220b411b363SPhilipp Reisner 	if (!error && !uptodate) {
221d0180171SAndreas Gruenbacher 		drbd_warn(device, "p %s: setting error to -EIO\n",
222b411b363SPhilipp Reisner 			 bio_data_dir(bio) == WRITE ? "write" : "read");
223b411b363SPhilipp Reisner 		/* strange behavior of some lower level drivers...
224b411b363SPhilipp Reisner 		 * fail the request by clearing the uptodate flag,
225b411b363SPhilipp Reisner 		 * but do not return any error?! */
226b411b363SPhilipp Reisner 		error = -EIO;
227b411b363SPhilipp Reisner 	}
228b411b363SPhilipp Reisner 
2291b6dd252SPhilipp Reisner 
2301b6dd252SPhilipp Reisner 	/* If this request was aborted locally before,
2311b6dd252SPhilipp Reisner 	 * but now was completed "successfully",
2321b6dd252SPhilipp Reisner 	 * chances are that this caused arbitrary data corruption.
2331b6dd252SPhilipp Reisner 	 *
2341b6dd252SPhilipp Reisner 	 * "aborting" requests, or force-detaching the disk, is intended for
2351b6dd252SPhilipp Reisner 	 * completely blocked/hung local backing devices which do no longer
2361b6dd252SPhilipp Reisner 	 * complete requests at all, not even do error completions.  In this
2371b6dd252SPhilipp Reisner 	 * situation, usually a hard-reset and failover is the only way out.
2381b6dd252SPhilipp Reisner 	 *
2391b6dd252SPhilipp Reisner 	 * By "aborting", basically faking a local error-completion,
2401b6dd252SPhilipp Reisner 	 * we allow for a more graceful swichover by cleanly migrating services.
2411b6dd252SPhilipp Reisner 	 * Still the affected node has to be rebooted "soon".
2421b6dd252SPhilipp Reisner 	 *
2431b6dd252SPhilipp Reisner 	 * By completing these requests, we allow the upper layers to re-use
2441b6dd252SPhilipp Reisner 	 * the associated data pages.
2451b6dd252SPhilipp Reisner 	 *
2461b6dd252SPhilipp Reisner 	 * If later the local backing device "recovers", and now DMAs some data
2471b6dd252SPhilipp Reisner 	 * from disk into the original request pages, in the best case it will
2481b6dd252SPhilipp Reisner 	 * just put random data into unused pages; but typically it will corrupt
2491b6dd252SPhilipp Reisner 	 * meanwhile completely unrelated data, causing all sorts of damage.
2501b6dd252SPhilipp Reisner 	 *
2511b6dd252SPhilipp Reisner 	 * Which means delayed successful completion,
2521b6dd252SPhilipp Reisner 	 * especially for READ requests,
2531b6dd252SPhilipp Reisner 	 * is a reason to panic().
2541b6dd252SPhilipp Reisner 	 *
2551b6dd252SPhilipp Reisner 	 * We assume that a delayed *error* completion is OK,
2561b6dd252SPhilipp Reisner 	 * though we still will complain noisily about it.
2571b6dd252SPhilipp Reisner 	 */
2581b6dd252SPhilipp Reisner 	if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
2591b6dd252SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
260d0180171SAndreas Gruenbacher 			drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
2611b6dd252SPhilipp Reisner 
2621b6dd252SPhilipp Reisner 		if (!error)
2631b6dd252SPhilipp Reisner 			panic("possible random memory corruption caused by delayed completion of aborted local request\n");
2641b6dd252SPhilipp Reisner 	}
2651b6dd252SPhilipp Reisner 
266b411b363SPhilipp Reisner 	/* to avoid recursion in __req_mod */
267b411b363SPhilipp Reisner 	if (unlikely(error)) {
2682f632aebSLars Ellenberg 		if (bio->bi_rw & REQ_DISCARD)
2692f632aebSLars Ellenberg 			what = (error == -EOPNOTSUPP)
2702f632aebSLars Ellenberg 				? DISCARD_COMPLETED_NOTSUPP
2712f632aebSLars Ellenberg 				: DISCARD_COMPLETED_WITH_ERROR;
2722f632aebSLars Ellenberg 		else
273b411b363SPhilipp Reisner 			what = (bio_data_dir(bio) == WRITE)
2748554df1cSAndreas Gruenbacher 			? WRITE_COMPLETED_WITH_ERROR
2755c3c7e64SLars Ellenberg 			: (bio_rw(bio) == READ)
2768554df1cSAndreas Gruenbacher 			  ? READ_COMPLETED_WITH_ERROR
2778554df1cSAndreas Gruenbacher 			  : READ_AHEAD_COMPLETED_WITH_ERROR;
278b411b363SPhilipp Reisner 	} else
2798554df1cSAndreas Gruenbacher 		what = COMPLETED_OK;
280b411b363SPhilipp Reisner 
281b411b363SPhilipp Reisner 	bio_put(req->private_bio);
282b411b363SPhilipp Reisner 	req->private_bio = ERR_PTR(error);
283b411b363SPhilipp Reisner 
284a115413dSLars Ellenberg 	/* not req_mod(), we need irqsave here! */
2850500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
286a115413dSLars Ellenberg 	__req_mod(req, what, &m);
2870500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
288b30ab791SAndreas Gruenbacher 	put_ldev(device);
289a115413dSLars Ellenberg 
290a115413dSLars Ellenberg 	if (m.bio)
291b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
292b411b363SPhilipp Reisner }
293b411b363SPhilipp Reisner 
29479a3c8d3SAndreas Gruenbacher void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
29545bb912bSLars Ellenberg {
29645bb912bSLars Ellenberg 	struct hash_desc desc;
29745bb912bSLars Ellenberg 	struct scatterlist sg;
298db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
29945bb912bSLars Ellenberg 	struct page *tmp;
30045bb912bSLars Ellenberg 	unsigned len;
30145bb912bSLars Ellenberg 
30245bb912bSLars Ellenberg 	desc.tfm = tfm;
30345bb912bSLars Ellenberg 	desc.flags = 0;
30445bb912bSLars Ellenberg 
30545bb912bSLars Ellenberg 	sg_init_table(&sg, 1);
30645bb912bSLars Ellenberg 	crypto_hash_init(&desc);
30745bb912bSLars Ellenberg 
30845bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page))) {
30945bb912bSLars Ellenberg 		/* all but the last page will be fully used */
31045bb912bSLars Ellenberg 		sg_set_page(&sg, page, PAGE_SIZE, 0);
31145bb912bSLars Ellenberg 		crypto_hash_update(&desc, &sg, sg.length);
31245bb912bSLars Ellenberg 		page = tmp;
31345bb912bSLars Ellenberg 	}
31445bb912bSLars Ellenberg 	/* and now the last, possibly only partially used page */
315db830c46SAndreas Gruenbacher 	len = peer_req->i.size & (PAGE_SIZE - 1);
31645bb912bSLars Ellenberg 	sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
31745bb912bSLars Ellenberg 	crypto_hash_update(&desc, &sg, sg.length);
31845bb912bSLars Ellenberg 	crypto_hash_final(&desc, digest);
31945bb912bSLars Ellenberg }
32045bb912bSLars Ellenberg 
32179a3c8d3SAndreas Gruenbacher void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
322b411b363SPhilipp Reisner {
323b411b363SPhilipp Reisner 	struct hash_desc desc;
324b411b363SPhilipp Reisner 	struct scatterlist sg;
3257988613bSKent Overstreet 	struct bio_vec bvec;
3267988613bSKent Overstreet 	struct bvec_iter iter;
327b411b363SPhilipp Reisner 
328b411b363SPhilipp Reisner 	desc.tfm = tfm;
329b411b363SPhilipp Reisner 	desc.flags = 0;
330b411b363SPhilipp Reisner 
331b411b363SPhilipp Reisner 	sg_init_table(&sg, 1);
332b411b363SPhilipp Reisner 	crypto_hash_init(&desc);
333b411b363SPhilipp Reisner 
3347988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
3357988613bSKent Overstreet 		sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
336b411b363SPhilipp Reisner 		crypto_hash_update(&desc, &sg, sg.length);
337b411b363SPhilipp Reisner 	}
338b411b363SPhilipp Reisner 	crypto_hash_final(&desc, digest);
339b411b363SPhilipp Reisner }
340b411b363SPhilipp Reisner 
3419676c760SLars Ellenberg /* MAYBE merge common code with w_e_end_ov_req */
34299920dc5SAndreas Gruenbacher static int w_e_send_csum(struct drbd_work *w, int cancel)
343b411b363SPhilipp Reisner {
344a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
3456780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
3466780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
347b411b363SPhilipp Reisner 	int digest_size;
348b411b363SPhilipp Reisner 	void *digest;
34999920dc5SAndreas Gruenbacher 	int err = 0;
350b411b363SPhilipp Reisner 
35153ea4331SLars Ellenberg 	if (unlikely(cancel))
35253ea4331SLars Ellenberg 		goto out;
353b411b363SPhilipp Reisner 
3549676c760SLars Ellenberg 	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
35553ea4331SLars Ellenberg 		goto out;
35653ea4331SLars Ellenberg 
3576780139cSAndreas Gruenbacher 	digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
358b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
359b411b363SPhilipp Reisner 	if (digest) {
360db830c46SAndreas Gruenbacher 		sector_t sector = peer_req->i.sector;
361db830c46SAndreas Gruenbacher 		unsigned int size = peer_req->i.size;
3626780139cSAndreas Gruenbacher 		drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
3639676c760SLars Ellenberg 		/* Free peer_req and pages before send.
36453ea4331SLars Ellenberg 		 * In case we block on congestion, we could otherwise run into
36553ea4331SLars Ellenberg 		 * some distributed deadlock, if the other side blocks on
36653ea4331SLars Ellenberg 		 * congestion as well, because our receiver blocks in
367c37c8ecfSAndreas Gruenbacher 		 * drbd_alloc_pages due to pp_in_use > max_buffers. */
368b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
369db830c46SAndreas Gruenbacher 		peer_req = NULL;
370b30ab791SAndreas Gruenbacher 		inc_rs_pending(device);
3716780139cSAndreas Gruenbacher 		err = drbd_send_drequest_csum(peer_device, sector, size,
37253ea4331SLars Ellenberg 					      digest, digest_size,
373b411b363SPhilipp Reisner 					      P_CSUM_RS_REQUEST);
374b411b363SPhilipp Reisner 		kfree(digest);
375b411b363SPhilipp Reisner 	} else {
376d0180171SAndreas Gruenbacher 		drbd_err(device, "kmalloc() of digest failed.\n");
37799920dc5SAndreas Gruenbacher 		err = -ENOMEM;
378b411b363SPhilipp Reisner 	}
379b411b363SPhilipp Reisner 
38053ea4331SLars Ellenberg out:
381db830c46SAndreas Gruenbacher 	if (peer_req)
382b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
383b411b363SPhilipp Reisner 
38499920dc5SAndreas Gruenbacher 	if (unlikely(err))
385d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
38699920dc5SAndreas Gruenbacher 	return err;
387b411b363SPhilipp Reisner }
388b411b363SPhilipp Reisner 
389b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
390b411b363SPhilipp Reisner 
39169a22773SAndreas Gruenbacher static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
392b411b363SPhilipp Reisner {
39369a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
394db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
395b411b363SPhilipp Reisner 
396b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
39780a40e43SLars Ellenberg 		return -EIO;
398b411b363SPhilipp Reisner 
399b411b363SPhilipp Reisner 	/* GFP_TRY, because if there is no memory available right now, this may
400b411b363SPhilipp Reisner 	 * be rescheduled for later. It is "only" background resync, after all. */
40169a22773SAndreas Gruenbacher 	peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
402a0fb3c47SLars Ellenberg 				       size, true /* has real payload */, GFP_TRY);
403db830c46SAndreas Gruenbacher 	if (!peer_req)
40480a40e43SLars Ellenberg 		goto defer;
405b411b363SPhilipp Reisner 
406a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = w_e_send_csum;
4070500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
408b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
4090500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
410b411b363SPhilipp Reisner 
411b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
412b30ab791SAndreas Gruenbacher 	if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
41380a40e43SLars Ellenberg 		return 0;
41445bb912bSLars Ellenberg 
41510f6d992SLars Ellenberg 	/* If it failed because of ENOMEM, retry should help.  If it failed
41610f6d992SLars Ellenberg 	 * because bio_add_page failed (probably broken lower level driver),
41710f6d992SLars Ellenberg 	 * retry may or may not help.
41810f6d992SLars Ellenberg 	 * If it does not, you may need to force disconnect. */
4190500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
420a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
4210500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
42222cc37a9SLars Ellenberg 
423b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
42480a40e43SLars Ellenberg defer:
425b30ab791SAndreas Gruenbacher 	put_ldev(device);
42680a40e43SLars Ellenberg 	return -EAGAIN;
427b411b363SPhilipp Reisner }
428b411b363SPhilipp Reisner 
42999920dc5SAndreas Gruenbacher int w_resync_timer(struct drbd_work *w, int cancel)
430794abb75SPhilipp Reisner {
43184b8c06bSAndreas Gruenbacher 	struct drbd_device *device =
43284b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device, resync_work);
43384b8c06bSAndreas Gruenbacher 
434b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
435794abb75SPhilipp Reisner 	case C_VERIFY_S:
436d448a2e1SAndreas Gruenbacher 		make_ov_request(device, cancel);
437794abb75SPhilipp Reisner 		break;
438794abb75SPhilipp Reisner 	case C_SYNC_TARGET:
439d448a2e1SAndreas Gruenbacher 		make_resync_request(device, cancel);
440794abb75SPhilipp Reisner 		break;
441794abb75SPhilipp Reisner 	}
442794abb75SPhilipp Reisner 
44399920dc5SAndreas Gruenbacher 	return 0;
444794abb75SPhilipp Reisner }
445794abb75SPhilipp Reisner 
446b411b363SPhilipp Reisner void resync_timer_fn(unsigned long data)
447b411b363SPhilipp Reisner {
448b30ab791SAndreas Gruenbacher 	struct drbd_device *device = (struct drbd_device *) data;
449b411b363SPhilipp Reisner 
45015e26f6aSLars Ellenberg 	drbd_queue_work_if_unqueued(
45115e26f6aSLars Ellenberg 		&first_peer_device(device)->connection->sender_work,
45284b8c06bSAndreas Gruenbacher 		&device->resync_work);
453b411b363SPhilipp Reisner }
454b411b363SPhilipp Reisner 
455778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value)
456778f271dSPhilipp Reisner {
457778f271dSPhilipp Reisner 	int i;
458778f271dSPhilipp Reisner 
459778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
460f10f2623SPhilipp Reisner 		fb->values[i] = value;
461778f271dSPhilipp Reisner }
462778f271dSPhilipp Reisner 
463778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value)
464778f271dSPhilipp Reisner {
465778f271dSPhilipp Reisner 	int ov;
466778f271dSPhilipp Reisner 
467778f271dSPhilipp Reisner 	ov = fb->values[fb->head_index];
468778f271dSPhilipp Reisner 	fb->values[fb->head_index++] = value;
469778f271dSPhilipp Reisner 
470778f271dSPhilipp Reisner 	if (fb->head_index >= fb->size)
471778f271dSPhilipp Reisner 		fb->head_index = 0;
472778f271dSPhilipp Reisner 
473778f271dSPhilipp Reisner 	return ov;
474778f271dSPhilipp Reisner }
475778f271dSPhilipp Reisner 
476778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value)
477778f271dSPhilipp Reisner {
478778f271dSPhilipp Reisner 	int i;
479778f271dSPhilipp Reisner 
480778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
481778f271dSPhilipp Reisner 		fb->values[i] += value;
482778f271dSPhilipp Reisner }
483778f271dSPhilipp Reisner 
4849958c857SPhilipp Reisner struct fifo_buffer *fifo_alloc(int fifo_size)
4859958c857SPhilipp Reisner {
4869958c857SPhilipp Reisner 	struct fifo_buffer *fb;
4879958c857SPhilipp Reisner 
4888747d30aSLars Ellenberg 	fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
4899958c857SPhilipp Reisner 	if (!fb)
4909958c857SPhilipp Reisner 		return NULL;
4919958c857SPhilipp Reisner 
4929958c857SPhilipp Reisner 	fb->head_index = 0;
4939958c857SPhilipp Reisner 	fb->size = fifo_size;
4949958c857SPhilipp Reisner 	fb->total = 0;
4959958c857SPhilipp Reisner 
4969958c857SPhilipp Reisner 	return fb;
4979958c857SPhilipp Reisner }
4989958c857SPhilipp Reisner 
4990e49d7b0SLars Ellenberg static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
500778f271dSPhilipp Reisner {
501daeda1ccSPhilipp Reisner 	struct disk_conf *dc;
5027f34f614SLars Ellenberg 	unsigned int want;     /* The number of sectors we want in-flight */
503778f271dSPhilipp Reisner 	int req_sect; /* Number of sectors to request in this turn */
5047f34f614SLars Ellenberg 	int correction; /* Number of sectors more we need in-flight */
505778f271dSPhilipp Reisner 	int cps; /* correction per invocation of drbd_rs_controller() */
506778f271dSPhilipp Reisner 	int steps; /* Number of time steps to plan ahead */
507778f271dSPhilipp Reisner 	int curr_corr;
508778f271dSPhilipp Reisner 	int max_sect;
509813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
510778f271dSPhilipp Reisner 
511b30ab791SAndreas Gruenbacher 	dc = rcu_dereference(device->ldev->disk_conf);
512b30ab791SAndreas Gruenbacher 	plan = rcu_dereference(device->rs_plan_s);
513778f271dSPhilipp Reisner 
514813472ceSPhilipp Reisner 	steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
515778f271dSPhilipp Reisner 
516b30ab791SAndreas Gruenbacher 	if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
517daeda1ccSPhilipp Reisner 		want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
518778f271dSPhilipp Reisner 	} else { /* normal path */
519daeda1ccSPhilipp Reisner 		want = dc->c_fill_target ? dc->c_fill_target :
520daeda1ccSPhilipp Reisner 			sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
521778f271dSPhilipp Reisner 	}
522778f271dSPhilipp Reisner 
523b30ab791SAndreas Gruenbacher 	correction = want - device->rs_in_flight - plan->total;
524778f271dSPhilipp Reisner 
525778f271dSPhilipp Reisner 	/* Plan ahead */
526778f271dSPhilipp Reisner 	cps = correction / steps;
527813472ceSPhilipp Reisner 	fifo_add_val(plan, cps);
528813472ceSPhilipp Reisner 	plan->total += cps * steps;
529778f271dSPhilipp Reisner 
530778f271dSPhilipp Reisner 	/* What we do in this step */
531813472ceSPhilipp Reisner 	curr_corr = fifo_push(plan, 0);
532813472ceSPhilipp Reisner 	plan->total -= curr_corr;
533778f271dSPhilipp Reisner 
534778f271dSPhilipp Reisner 	req_sect = sect_in + curr_corr;
535778f271dSPhilipp Reisner 	if (req_sect < 0)
536778f271dSPhilipp Reisner 		req_sect = 0;
537778f271dSPhilipp Reisner 
538daeda1ccSPhilipp Reisner 	max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
539778f271dSPhilipp Reisner 	if (req_sect > max_sect)
540778f271dSPhilipp Reisner 		req_sect = max_sect;
541778f271dSPhilipp Reisner 
542778f271dSPhilipp Reisner 	/*
543d0180171SAndreas Gruenbacher 	drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
544b30ab791SAndreas Gruenbacher 		 sect_in, device->rs_in_flight, want, correction,
545b30ab791SAndreas Gruenbacher 		 steps, cps, device->rs_planed, curr_corr, req_sect);
546778f271dSPhilipp Reisner 	*/
547778f271dSPhilipp Reisner 
548778f271dSPhilipp Reisner 	return req_sect;
549778f271dSPhilipp Reisner }
550778f271dSPhilipp Reisner 
551b30ab791SAndreas Gruenbacher static int drbd_rs_number_requests(struct drbd_device *device)
552e65f440dSLars Ellenberg {
5530e49d7b0SLars Ellenberg 	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
5540e49d7b0SLars Ellenberg 	int number, mxb;
5550e49d7b0SLars Ellenberg 
5560e49d7b0SLars Ellenberg 	sect_in = atomic_xchg(&device->rs_sect_in, 0);
5570e49d7b0SLars Ellenberg 	device->rs_in_flight -= sect_in;
558813472ceSPhilipp Reisner 
559813472ceSPhilipp Reisner 	rcu_read_lock();
5600e49d7b0SLars Ellenberg 	mxb = drbd_get_max_buffers(device) / 2;
561b30ab791SAndreas Gruenbacher 	if (rcu_dereference(device->rs_plan_s)->size) {
5620e49d7b0SLars Ellenberg 		number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
563b30ab791SAndreas Gruenbacher 		device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
564e65f440dSLars Ellenberg 	} else {
565b30ab791SAndreas Gruenbacher 		device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
566b30ab791SAndreas Gruenbacher 		number = SLEEP_TIME * device->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
567e65f440dSLars Ellenberg 	}
568813472ceSPhilipp Reisner 	rcu_read_unlock();
569e65f440dSLars Ellenberg 
5700e49d7b0SLars Ellenberg 	/* Don't have more than "max-buffers"/2 in-flight.
5710e49d7b0SLars Ellenberg 	 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
5720e49d7b0SLars Ellenberg 	 * potentially causing a distributed deadlock on congestion during
5730e49d7b0SLars Ellenberg 	 * online-verify or (checksum-based) resync, if max-buffers,
5740e49d7b0SLars Ellenberg 	 * socket buffer sizes and resync rate settings are mis-configured. */
5757f34f614SLars Ellenberg 
5767f34f614SLars Ellenberg 	/* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
5777f34f614SLars Ellenberg 	 * mxb (as used here, and in drbd_alloc_pages on the peer) is
5787f34f614SLars Ellenberg 	 * "number of pages" (typically also 4k),
5797f34f614SLars Ellenberg 	 * but "rs_in_flight" is in "sectors" (512 Byte). */
5807f34f614SLars Ellenberg 	if (mxb - device->rs_in_flight/8 < number)
5817f34f614SLars Ellenberg 		number = mxb - device->rs_in_flight/8;
5820e49d7b0SLars Ellenberg 
583e65f440dSLars Ellenberg 	return number;
584e65f440dSLars Ellenberg }
585e65f440dSLars Ellenberg 
58644a4d551SLars Ellenberg static int make_resync_request(struct drbd_device *const device, int cancel)
587b411b363SPhilipp Reisner {
58844a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
58944a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
590b411b363SPhilipp Reisner 	unsigned long bit;
591b411b363SPhilipp Reisner 	sector_t sector;
592b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
5931816a2b4SLars Ellenberg 	int max_bio_size;
594e65f440dSLars Ellenberg 	int number, rollback_i, size;
595506afb62SLars Ellenberg 	int align, requeue = 0;
5960f0601f4SLars Ellenberg 	int i = 0;
597b411b363SPhilipp Reisner 
598b411b363SPhilipp Reisner 	if (unlikely(cancel))
59999920dc5SAndreas Gruenbacher 		return 0;
600b411b363SPhilipp Reisner 
601b30ab791SAndreas Gruenbacher 	if (device->rs_total == 0) {
602af85e8e8SLars Ellenberg 		/* empty resync? */
603b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
60499920dc5SAndreas Gruenbacher 		return 0;
605af85e8e8SLars Ellenberg 	}
606af85e8e8SLars Ellenberg 
607b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
608b30ab791SAndreas Gruenbacher 		/* Since we only need to access device->rsync a
609b30ab791SAndreas Gruenbacher 		   get_ldev_if_state(device,D_FAILED) would be sufficient, but
610b411b363SPhilipp Reisner 		   to continue resync with a broken disk makes no sense at
611b411b363SPhilipp Reisner 		   all */
612d0180171SAndreas Gruenbacher 		drbd_err(device, "Disk broke down during resync!\n");
61399920dc5SAndreas Gruenbacher 		return 0;
614b411b363SPhilipp Reisner 	}
615b411b363SPhilipp Reisner 
616b30ab791SAndreas Gruenbacher 	max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
617b30ab791SAndreas Gruenbacher 	number = drbd_rs_number_requests(device);
6180e49d7b0SLars Ellenberg 	if (number <= 0)
6190f0601f4SLars Ellenberg 		goto requeue;
620b411b363SPhilipp Reisner 
621b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
622506afb62SLars Ellenberg 		/* Stop generating RS requests when half of the send buffer is filled,
623506afb62SLars Ellenberg 		 * but notify TCP that we'd like to have more space. */
62444a4d551SLars Ellenberg 		mutex_lock(&connection->data.mutex);
62544a4d551SLars Ellenberg 		if (connection->data.socket) {
626506afb62SLars Ellenberg 			struct sock *sk = connection->data.socket->sk;
627506afb62SLars Ellenberg 			int queued = sk->sk_wmem_queued;
628506afb62SLars Ellenberg 			int sndbuf = sk->sk_sndbuf;
629506afb62SLars Ellenberg 			if (queued > sndbuf / 2) {
630506afb62SLars Ellenberg 				requeue = 1;
631506afb62SLars Ellenberg 				if (sk->sk_socket)
632506afb62SLars Ellenberg 					set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
633b411b363SPhilipp Reisner 			}
634506afb62SLars Ellenberg 		} else
635506afb62SLars Ellenberg 			requeue = 1;
63644a4d551SLars Ellenberg 		mutex_unlock(&connection->data.mutex);
637506afb62SLars Ellenberg 		if (requeue)
638b411b363SPhilipp Reisner 			goto requeue;
639b411b363SPhilipp Reisner 
640b411b363SPhilipp Reisner next_sector:
641b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
642b30ab791SAndreas Gruenbacher 		bit  = drbd_bm_find_next(device, device->bm_resync_fo);
643b411b363SPhilipp Reisner 
6444b0715f0SLars Ellenberg 		if (bit == DRBD_END_OF_BITMAP) {
645b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = drbd_bm_bits(device);
646b30ab791SAndreas Gruenbacher 			put_ldev(device);
64799920dc5SAndreas Gruenbacher 			return 0;
648b411b363SPhilipp Reisner 		}
649b411b363SPhilipp Reisner 
650b411b363SPhilipp Reisner 		sector = BM_BIT_TO_SECT(bit);
651b411b363SPhilipp Reisner 
652ad3fee79SLars Ellenberg 		if (drbd_try_rs_begin_io(device, sector)) {
653b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = bit;
654b411b363SPhilipp Reisner 			goto requeue;
655b411b363SPhilipp Reisner 		}
656b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = bit + 1;
657b411b363SPhilipp Reisner 
658b30ab791SAndreas Gruenbacher 		if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
659b30ab791SAndreas Gruenbacher 			drbd_rs_complete_io(device, sector);
660b411b363SPhilipp Reisner 			goto next_sector;
661b411b363SPhilipp Reisner 		}
662b411b363SPhilipp Reisner 
6631816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
664b411b363SPhilipp Reisner 		/* try to find some adjacent bits.
665b411b363SPhilipp Reisner 		 * we stop if we have already the maximum req size.
666b411b363SPhilipp Reisner 		 *
667b411b363SPhilipp Reisner 		 * Additionally always align bigger requests, in order to
668b411b363SPhilipp Reisner 		 * be prepared for all stripe sizes of software RAIDs.
669b411b363SPhilipp Reisner 		 */
670b411b363SPhilipp Reisner 		align = 1;
671d207450cSPhilipp Reisner 		rollback_i = i;
6726377b923SLars Ellenberg 		while (i < number) {
6731816a2b4SLars Ellenberg 			if (size + BM_BLOCK_SIZE > max_bio_size)
674b411b363SPhilipp Reisner 				break;
675b411b363SPhilipp Reisner 
676b411b363SPhilipp Reisner 			/* Be always aligned */
677b411b363SPhilipp Reisner 			if (sector & ((1<<(align+3))-1))
678b411b363SPhilipp Reisner 				break;
679b411b363SPhilipp Reisner 
680b411b363SPhilipp Reisner 			/* do not cross extent boundaries */
681b411b363SPhilipp Reisner 			if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
682b411b363SPhilipp Reisner 				break;
683b411b363SPhilipp Reisner 			/* now, is it actually dirty, after all?
684b411b363SPhilipp Reisner 			 * caution, drbd_bm_test_bit is tri-state for some
685b411b363SPhilipp Reisner 			 * obscure reason; ( b == 0 ) would get the out-of-band
686b411b363SPhilipp Reisner 			 * only accidentally right because of the "oddly sized"
687b411b363SPhilipp Reisner 			 * adjustment below */
688b30ab791SAndreas Gruenbacher 			if (drbd_bm_test_bit(device, bit+1) != 1)
689b411b363SPhilipp Reisner 				break;
690b411b363SPhilipp Reisner 			bit++;
691b411b363SPhilipp Reisner 			size += BM_BLOCK_SIZE;
692b411b363SPhilipp Reisner 			if ((BM_BLOCK_SIZE << align) <= size)
693b411b363SPhilipp Reisner 				align++;
694b411b363SPhilipp Reisner 			i++;
695b411b363SPhilipp Reisner 		}
696b411b363SPhilipp Reisner 		/* if we merged some,
697b411b363SPhilipp Reisner 		 * reset the offset to start the next drbd_bm_find_next from */
698b411b363SPhilipp Reisner 		if (size > BM_BLOCK_SIZE)
699b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = bit + 1;
700b411b363SPhilipp Reisner #endif
701b411b363SPhilipp Reisner 
702b411b363SPhilipp Reisner 		/* adjust very last sectors, in case we are oddly sized */
703b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
704b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
705aaaba345SLars Ellenberg 
706aaaba345SLars Ellenberg 		if (device->use_csums) {
70744a4d551SLars Ellenberg 			switch (read_for_csum(peer_device, sector, size)) {
70880a40e43SLars Ellenberg 			case -EIO: /* Disk failure */
709b30ab791SAndreas Gruenbacher 				put_ldev(device);
71099920dc5SAndreas Gruenbacher 				return -EIO;
71180a40e43SLars Ellenberg 			case -EAGAIN: /* allocation failed, or ldev busy */
712b30ab791SAndreas Gruenbacher 				drbd_rs_complete_io(device, sector);
713b30ab791SAndreas Gruenbacher 				device->bm_resync_fo = BM_SECT_TO_BIT(sector);
714d207450cSPhilipp Reisner 				i = rollback_i;
715b411b363SPhilipp Reisner 				goto requeue;
71680a40e43SLars Ellenberg 			case 0:
71780a40e43SLars Ellenberg 				/* everything ok */
71880a40e43SLars Ellenberg 				break;
71980a40e43SLars Ellenberg 			default:
72080a40e43SLars Ellenberg 				BUG();
721b411b363SPhilipp Reisner 			}
722b411b363SPhilipp Reisner 		} else {
72399920dc5SAndreas Gruenbacher 			int err;
72499920dc5SAndreas Gruenbacher 
725b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
72644a4d551SLars Ellenberg 			err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
72799920dc5SAndreas Gruenbacher 						 sector, size, ID_SYNCER);
72899920dc5SAndreas Gruenbacher 			if (err) {
729d0180171SAndreas Gruenbacher 				drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
730b30ab791SAndreas Gruenbacher 				dec_rs_pending(device);
731b30ab791SAndreas Gruenbacher 				put_ldev(device);
73299920dc5SAndreas Gruenbacher 				return err;
733b411b363SPhilipp Reisner 			}
734b411b363SPhilipp Reisner 		}
735b411b363SPhilipp Reisner 	}
736b411b363SPhilipp Reisner 
737b30ab791SAndreas Gruenbacher 	if (device->bm_resync_fo >= drbd_bm_bits(device)) {
738b411b363SPhilipp Reisner 		/* last syncer _request_ was sent,
739b411b363SPhilipp Reisner 		 * but the P_RS_DATA_REPLY not yet received.  sync will end (and
740b411b363SPhilipp Reisner 		 * next sync group will resume), as soon as we receive the last
741b411b363SPhilipp Reisner 		 * resync data block, and the last bit is cleared.
742b411b363SPhilipp Reisner 		 * until then resync "work" is "inactive" ...
743b411b363SPhilipp Reisner 		 */
744b30ab791SAndreas Gruenbacher 		put_ldev(device);
74599920dc5SAndreas Gruenbacher 		return 0;
746b411b363SPhilipp Reisner 	}
747b411b363SPhilipp Reisner 
748b411b363SPhilipp Reisner  requeue:
749b30ab791SAndreas Gruenbacher 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
750b30ab791SAndreas Gruenbacher 	mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
751b30ab791SAndreas Gruenbacher 	put_ldev(device);
75299920dc5SAndreas Gruenbacher 	return 0;
753b411b363SPhilipp Reisner }
754b411b363SPhilipp Reisner 
755d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *device, int cancel)
756b411b363SPhilipp Reisner {
757b411b363SPhilipp Reisner 	int number, i, size;
758b411b363SPhilipp Reisner 	sector_t sector;
759b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
76058ffa580SLars Ellenberg 	bool stop_sector_reached = false;
761b411b363SPhilipp Reisner 
762b411b363SPhilipp Reisner 	if (unlikely(cancel))
763b411b363SPhilipp Reisner 		return 1;
764b411b363SPhilipp Reisner 
765b30ab791SAndreas Gruenbacher 	number = drbd_rs_number_requests(device);
766b411b363SPhilipp Reisner 
767b30ab791SAndreas Gruenbacher 	sector = device->ov_position;
768b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
76958ffa580SLars Ellenberg 		if (sector >= capacity)
770b411b363SPhilipp Reisner 			return 1;
77158ffa580SLars Ellenberg 
77258ffa580SLars Ellenberg 		/* We check for "finished" only in the reply path:
77358ffa580SLars Ellenberg 		 * w_e_end_ov_reply().
77458ffa580SLars Ellenberg 		 * We need to send at least one request out. */
77558ffa580SLars Ellenberg 		stop_sector_reached = i > 0
776b30ab791SAndreas Gruenbacher 			&& verify_can_do_stop_sector(device)
777b30ab791SAndreas Gruenbacher 			&& sector >= device->ov_stop_sector;
77858ffa580SLars Ellenberg 		if (stop_sector_reached)
77958ffa580SLars Ellenberg 			break;
780b411b363SPhilipp Reisner 
781b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
782b411b363SPhilipp Reisner 
783ad3fee79SLars Ellenberg 		if (drbd_try_rs_begin_io(device, sector)) {
784b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
785b411b363SPhilipp Reisner 			goto requeue;
786b411b363SPhilipp Reisner 		}
787b411b363SPhilipp Reisner 
788b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
789b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
790b411b363SPhilipp Reisner 
791b30ab791SAndreas Gruenbacher 		inc_rs_pending(device);
79269a22773SAndreas Gruenbacher 		if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
793b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
794b411b363SPhilipp Reisner 			return 0;
795b411b363SPhilipp Reisner 		}
796b411b363SPhilipp Reisner 		sector += BM_SECT_PER_BIT;
797b411b363SPhilipp Reisner 	}
798b30ab791SAndreas Gruenbacher 	device->ov_position = sector;
799b411b363SPhilipp Reisner 
800b411b363SPhilipp Reisner  requeue:
801b30ab791SAndreas Gruenbacher 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
80258ffa580SLars Ellenberg 	if (i == 0 || !stop_sector_reached)
803b30ab791SAndreas Gruenbacher 		mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
804b411b363SPhilipp Reisner 	return 1;
805b411b363SPhilipp Reisner }
806b411b363SPhilipp Reisner 
80799920dc5SAndreas Gruenbacher int w_ov_finished(struct drbd_work *w, int cancel)
808b411b363SPhilipp Reisner {
80984b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw =
81084b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device_work, w);
81184b8c06bSAndreas Gruenbacher 	struct drbd_device *device = dw->device;
81284b8c06bSAndreas Gruenbacher 	kfree(dw);
813b30ab791SAndreas Gruenbacher 	ov_out_of_sync_print(device);
814b30ab791SAndreas Gruenbacher 	drbd_resync_finished(device);
815b411b363SPhilipp Reisner 
81699920dc5SAndreas Gruenbacher 	return 0;
817b411b363SPhilipp Reisner }
818b411b363SPhilipp Reisner 
81999920dc5SAndreas Gruenbacher static int w_resync_finished(struct drbd_work *w, int cancel)
820b411b363SPhilipp Reisner {
82184b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw =
82284b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device_work, w);
82384b8c06bSAndreas Gruenbacher 	struct drbd_device *device = dw->device;
82484b8c06bSAndreas Gruenbacher 	kfree(dw);
825b411b363SPhilipp Reisner 
826b30ab791SAndreas Gruenbacher 	drbd_resync_finished(device);
827b411b363SPhilipp Reisner 
82899920dc5SAndreas Gruenbacher 	return 0;
829b411b363SPhilipp Reisner }
830b411b363SPhilipp Reisner 
831b30ab791SAndreas Gruenbacher static void ping_peer(struct drbd_device *device)
832af85e8e8SLars Ellenberg {
833a6b32bc3SAndreas Gruenbacher 	struct drbd_connection *connection = first_peer_device(device)->connection;
8342a67d8b9SPhilipp Reisner 
835bde89a9eSAndreas Gruenbacher 	clear_bit(GOT_PING_ACK, &connection->flags);
836bde89a9eSAndreas Gruenbacher 	request_ping(connection);
837bde89a9eSAndreas Gruenbacher 	wait_event(connection->ping_wait,
838bde89a9eSAndreas Gruenbacher 		   test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
839af85e8e8SLars Ellenberg }
840af85e8e8SLars Ellenberg 
841b30ab791SAndreas Gruenbacher int drbd_resync_finished(struct drbd_device *device)
842b411b363SPhilipp Reisner {
843b411b363SPhilipp Reisner 	unsigned long db, dt, dbdt;
844b411b363SPhilipp Reisner 	unsigned long n_oos;
845b411b363SPhilipp Reisner 	union drbd_state os, ns;
84684b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
847b411b363SPhilipp Reisner 	char *khelper_cmd = NULL;
84826525618SLars Ellenberg 	int verify_done = 0;
849b411b363SPhilipp Reisner 
850b411b363SPhilipp Reisner 	/* Remove all elements from the resync LRU. Since future actions
851b411b363SPhilipp Reisner 	 * might set bits in the (main) bitmap, then the entries in the
852b411b363SPhilipp Reisner 	 * resync LRU would be wrong. */
853b30ab791SAndreas Gruenbacher 	if (drbd_rs_del_all(device)) {
854b411b363SPhilipp Reisner 		/* In case this is not possible now, most probably because
855b411b363SPhilipp Reisner 		 * there are P_RS_DATA_REPLY Packets lingering on the worker's
856b411b363SPhilipp Reisner 		 * queue (or even the read operations for those packets
857b411b363SPhilipp Reisner 		 * is not finished by now).   Retry in 100ms. */
858b411b363SPhilipp Reisner 
85920ee6390SPhilipp Reisner 		schedule_timeout_interruptible(HZ / 10);
86084b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
86184b8c06bSAndreas Gruenbacher 		if (dw) {
86284b8c06bSAndreas Gruenbacher 			dw->w.cb = w_resync_finished;
86384b8c06bSAndreas Gruenbacher 			dw->device = device;
86484b8c06bSAndreas Gruenbacher 			drbd_queue_work(&first_peer_device(device)->connection->sender_work,
86584b8c06bSAndreas Gruenbacher 					&dw->w);
866b411b363SPhilipp Reisner 			return 1;
867b411b363SPhilipp Reisner 		}
86884b8c06bSAndreas Gruenbacher 		drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
869b411b363SPhilipp Reisner 	}
870b411b363SPhilipp Reisner 
871b30ab791SAndreas Gruenbacher 	dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
872b411b363SPhilipp Reisner 	if (dt <= 0)
873b411b363SPhilipp Reisner 		dt = 1;
87458ffa580SLars Ellenberg 
875b30ab791SAndreas Gruenbacher 	db = device->rs_total;
87658ffa580SLars Ellenberg 	/* adjust for verify start and stop sectors, respective reached position */
877b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
878b30ab791SAndreas Gruenbacher 		db -= device->ov_left;
87958ffa580SLars Ellenberg 
880b411b363SPhilipp Reisner 	dbdt = Bit2KB(db/dt);
881b30ab791SAndreas Gruenbacher 	device->rs_paused /= HZ;
882b411b363SPhilipp Reisner 
883b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
884b411b363SPhilipp Reisner 		goto out;
885b411b363SPhilipp Reisner 
886b30ab791SAndreas Gruenbacher 	ping_peer(device);
887af85e8e8SLars Ellenberg 
8880500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
889b30ab791SAndreas Gruenbacher 	os = drbd_read_state(device);
890b411b363SPhilipp Reisner 
89126525618SLars Ellenberg 	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
89226525618SLars Ellenberg 
893b411b363SPhilipp Reisner 	/* This protects us against multiple calls (that can happen in the presence
894b411b363SPhilipp Reisner 	   of application IO), and against connectivity loss just before we arrive here. */
895b411b363SPhilipp Reisner 	if (os.conn <= C_CONNECTED)
896b411b363SPhilipp Reisner 		goto out_unlock;
897b411b363SPhilipp Reisner 
898b411b363SPhilipp Reisner 	ns = os;
899b411b363SPhilipp Reisner 	ns.conn = C_CONNECTED;
900b411b363SPhilipp Reisner 
901d0180171SAndreas Gruenbacher 	drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
90226525618SLars Ellenberg 	     verify_done ? "Online verify" : "Resync",
903b30ab791SAndreas Gruenbacher 	     dt + device->rs_paused, device->rs_paused, dbdt);
904b411b363SPhilipp Reisner 
905b30ab791SAndreas Gruenbacher 	n_oos = drbd_bm_total_weight(device);
906b411b363SPhilipp Reisner 
907b411b363SPhilipp Reisner 	if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
908b411b363SPhilipp Reisner 		if (n_oos) {
909d0180171SAndreas Gruenbacher 			drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
910b411b363SPhilipp Reisner 			      n_oos, Bit2KB(1));
911b411b363SPhilipp Reisner 			khelper_cmd = "out-of-sync";
912b411b363SPhilipp Reisner 		}
913b411b363SPhilipp Reisner 	} else {
9140b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, (n_oos - device->rs_failed) == 0);
915b411b363SPhilipp Reisner 
916b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
917b411b363SPhilipp Reisner 			khelper_cmd = "after-resync-target";
918b411b363SPhilipp Reisner 
919aaaba345SLars Ellenberg 		if (device->use_csums && device->rs_total) {
920b30ab791SAndreas Gruenbacher 			const unsigned long s = device->rs_same_csum;
921b30ab791SAndreas Gruenbacher 			const unsigned long t = device->rs_total;
922b411b363SPhilipp Reisner 			const int ratio =
923b411b363SPhilipp Reisner 				(t == 0)     ? 0 :
924b411b363SPhilipp Reisner 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
925d0180171SAndreas Gruenbacher 			drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
926b411b363SPhilipp Reisner 			     "transferred %luK total %luK\n",
927b411b363SPhilipp Reisner 			     ratio,
928b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_same_csum),
929b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_total - device->rs_same_csum),
930b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_total));
931b411b363SPhilipp Reisner 		}
932b411b363SPhilipp Reisner 	}
933b411b363SPhilipp Reisner 
934b30ab791SAndreas Gruenbacher 	if (device->rs_failed) {
935d0180171SAndreas Gruenbacher 		drbd_info(device, "            %lu failed blocks\n", device->rs_failed);
936b411b363SPhilipp Reisner 
937b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
938b411b363SPhilipp Reisner 			ns.disk = D_INCONSISTENT;
939b411b363SPhilipp Reisner 			ns.pdsk = D_UP_TO_DATE;
940b411b363SPhilipp Reisner 		} else {
941b411b363SPhilipp Reisner 			ns.disk = D_UP_TO_DATE;
942b411b363SPhilipp Reisner 			ns.pdsk = D_INCONSISTENT;
943b411b363SPhilipp Reisner 		}
944b411b363SPhilipp Reisner 	} else {
945b411b363SPhilipp Reisner 		ns.disk = D_UP_TO_DATE;
946b411b363SPhilipp Reisner 		ns.pdsk = D_UP_TO_DATE;
947b411b363SPhilipp Reisner 
948b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
949b30ab791SAndreas Gruenbacher 			if (device->p_uuid) {
950b411b363SPhilipp Reisner 				int i;
951b411b363SPhilipp Reisner 				for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
952b30ab791SAndreas Gruenbacher 					_drbd_uuid_set(device, i, device->p_uuid[i]);
953b30ab791SAndreas Gruenbacher 				drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
954b30ab791SAndreas Gruenbacher 				_drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
955b411b363SPhilipp Reisner 			} else {
956d0180171SAndreas Gruenbacher 				drbd_err(device, "device->p_uuid is NULL! BUG\n");
957b411b363SPhilipp Reisner 			}
958b411b363SPhilipp Reisner 		}
959b411b363SPhilipp Reisner 
96062b0da3aSLars Ellenberg 		if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
96162b0da3aSLars Ellenberg 			/* for verify runs, we don't update uuids here,
96262b0da3aSLars Ellenberg 			 * so there would be nothing to report. */
963b30ab791SAndreas Gruenbacher 			drbd_uuid_set_bm(device, 0UL);
964b30ab791SAndreas Gruenbacher 			drbd_print_uuids(device, "updated UUIDs");
965b30ab791SAndreas Gruenbacher 			if (device->p_uuid) {
966b411b363SPhilipp Reisner 				/* Now the two UUID sets are equal, update what we
967b411b363SPhilipp Reisner 				 * know of the peer. */
968b411b363SPhilipp Reisner 				int i;
969b411b363SPhilipp Reisner 				for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
970b30ab791SAndreas Gruenbacher 					device->p_uuid[i] = device->ldev->md.uuid[i];
971b411b363SPhilipp Reisner 			}
972b411b363SPhilipp Reisner 		}
97362b0da3aSLars Ellenberg 	}
974b411b363SPhilipp Reisner 
975b30ab791SAndreas Gruenbacher 	_drbd_set_state(device, ns, CS_VERBOSE, NULL);
976b411b363SPhilipp Reisner out_unlock:
9770500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
978b30ab791SAndreas Gruenbacher 	put_ldev(device);
979b411b363SPhilipp Reisner out:
980b30ab791SAndreas Gruenbacher 	device->rs_total  = 0;
981b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
982b30ab791SAndreas Gruenbacher 	device->rs_paused = 0;
98358ffa580SLars Ellenberg 
98458ffa580SLars Ellenberg 	/* reset start sector, if we reached end of device */
985b30ab791SAndreas Gruenbacher 	if (verify_done && device->ov_left == 0)
986b30ab791SAndreas Gruenbacher 		device->ov_start_sector = 0;
987b411b363SPhilipp Reisner 
988b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
98913d42685SLars Ellenberg 
990b411b363SPhilipp Reisner 	if (khelper_cmd)
991b30ab791SAndreas Gruenbacher 		drbd_khelper(device, khelper_cmd);
992b411b363SPhilipp Reisner 
993b411b363SPhilipp Reisner 	return 1;
994b411b363SPhilipp Reisner }
995b411b363SPhilipp Reisner 
996b411b363SPhilipp Reisner /* helper */
997b30ab791SAndreas Gruenbacher static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
998b411b363SPhilipp Reisner {
999045417f7SAndreas Gruenbacher 	if (drbd_peer_req_has_active_page(peer_req)) {
1000b411b363SPhilipp Reisner 		/* This might happen if sendpage() has not finished */
1001db830c46SAndreas Gruenbacher 		int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
1002b30ab791SAndreas Gruenbacher 		atomic_add(i, &device->pp_in_use_by_net);
1003b30ab791SAndreas Gruenbacher 		atomic_sub(i, &device->pp_in_use);
10040500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
1005a8cd15baSAndreas Gruenbacher 		list_add_tail(&peer_req->w.list, &device->net_ee);
10060500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
1007435f0740SLars Ellenberg 		wake_up(&drbd_pp_wait);
1008b411b363SPhilipp Reisner 	} else
1009b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1010b411b363SPhilipp Reisner }
1011b411b363SPhilipp Reisner 
1012b411b363SPhilipp Reisner /**
1013b411b363SPhilipp Reisner  * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
1014b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1015b411b363SPhilipp Reisner  * @w:		work object.
1016b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1017b411b363SPhilipp Reisner  */
101899920dc5SAndreas Gruenbacher int w_e_end_data_req(struct drbd_work *w, int cancel)
1019b411b363SPhilipp Reisner {
1020a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
10216780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
10226780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
102399920dc5SAndreas Gruenbacher 	int err;
1024b411b363SPhilipp Reisner 
1025b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1026b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1027b30ab791SAndreas Gruenbacher 		dec_unacked(device);
102899920dc5SAndreas Gruenbacher 		return 0;
1029b411b363SPhilipp Reisner 	}
1030b411b363SPhilipp Reisner 
1031db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
10326780139cSAndreas Gruenbacher 		err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
1033b411b363SPhilipp Reisner 	} else {
1034b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1035d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegDReply. sector=%llus.\n",
1036db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1037b411b363SPhilipp Reisner 
10386780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
1039b411b363SPhilipp Reisner 	}
1040b411b363SPhilipp Reisner 
1041b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1042b411b363SPhilipp Reisner 
1043b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1044b411b363SPhilipp Reisner 
104599920dc5SAndreas Gruenbacher 	if (unlikely(err))
1046d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block() failed\n");
104799920dc5SAndreas Gruenbacher 	return err;
1048b411b363SPhilipp Reisner }
1049b411b363SPhilipp Reisner 
1050b411b363SPhilipp Reisner /**
1051a209b4aeSAndreas Gruenbacher  * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
1052b411b363SPhilipp Reisner  * @w:		work object.
1053b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1054b411b363SPhilipp Reisner  */
105599920dc5SAndreas Gruenbacher int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
1056b411b363SPhilipp Reisner {
1057a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
10586780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
10596780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
106099920dc5SAndreas Gruenbacher 	int err;
1061b411b363SPhilipp Reisner 
1062b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1063b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1064b30ab791SAndreas Gruenbacher 		dec_unacked(device);
106599920dc5SAndreas Gruenbacher 		return 0;
1066b411b363SPhilipp Reisner 	}
1067b411b363SPhilipp Reisner 
1068b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
1069b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1070b30ab791SAndreas Gruenbacher 		put_ldev(device);
1071b411b363SPhilipp Reisner 	}
1072b411b363SPhilipp Reisner 
1073b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_AHEAD) {
10746780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
1075db830c46SAndreas Gruenbacher 	} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1076b30ab791SAndreas Gruenbacher 		if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1077b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
10786780139cSAndreas Gruenbacher 			err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1079b411b363SPhilipp Reisner 		} else {
1080b411b363SPhilipp Reisner 			if (__ratelimit(&drbd_ratelimit_state))
1081d0180171SAndreas Gruenbacher 				drbd_err(device, "Not sending RSDataReply, "
1082b411b363SPhilipp Reisner 				    "partner DISKLESS!\n");
108399920dc5SAndreas Gruenbacher 			err = 0;
1084b411b363SPhilipp Reisner 		}
1085b411b363SPhilipp Reisner 	} else {
1086b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1087d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
1088db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1089b411b363SPhilipp Reisner 
10906780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1091b411b363SPhilipp Reisner 
1092b411b363SPhilipp Reisner 		/* update resync data with failure */
1093b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
1094b411b363SPhilipp Reisner 	}
1095b411b363SPhilipp Reisner 
1096b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1097b411b363SPhilipp Reisner 
1098b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1099b411b363SPhilipp Reisner 
110099920dc5SAndreas Gruenbacher 	if (unlikely(err))
1101d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block() failed\n");
110299920dc5SAndreas Gruenbacher 	return err;
1103b411b363SPhilipp Reisner }
1104b411b363SPhilipp Reisner 
110599920dc5SAndreas Gruenbacher int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
1106b411b363SPhilipp Reisner {
1107a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
11086780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
11096780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1110b411b363SPhilipp Reisner 	struct digest_info *di;
1111b411b363SPhilipp Reisner 	int digest_size;
1112b411b363SPhilipp Reisner 	void *digest = NULL;
111399920dc5SAndreas Gruenbacher 	int err, eq = 0;
1114b411b363SPhilipp Reisner 
1115b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1116b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1117b30ab791SAndreas Gruenbacher 		dec_unacked(device);
111899920dc5SAndreas Gruenbacher 		return 0;
1119b411b363SPhilipp Reisner 	}
1120b411b363SPhilipp Reisner 
1121b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1122b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1123b30ab791SAndreas Gruenbacher 		put_ldev(device);
11241d53f09eSLars Ellenberg 	}
1125b411b363SPhilipp Reisner 
1126db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1127b411b363SPhilipp Reisner 
1128db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1129b411b363SPhilipp Reisner 		/* quick hack to try to avoid a race against reconfiguration.
1130b411b363SPhilipp Reisner 		 * a real fix would be much more involved,
1131b411b363SPhilipp Reisner 		 * introducing more locking mechanisms */
11326780139cSAndreas Gruenbacher 		if (peer_device->connection->csums_tfm) {
11336780139cSAndreas Gruenbacher 			digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
11340b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, digest_size == di->digest_size);
1135b411b363SPhilipp Reisner 			digest = kmalloc(digest_size, GFP_NOIO);
1136b411b363SPhilipp Reisner 		}
1137b411b363SPhilipp Reisner 		if (digest) {
11386780139cSAndreas Gruenbacher 			drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
1139b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1140b411b363SPhilipp Reisner 			kfree(digest);
1141b411b363SPhilipp Reisner 		}
1142b411b363SPhilipp Reisner 
1143b411b363SPhilipp Reisner 		if (eq) {
1144b30ab791SAndreas Gruenbacher 			drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
1145676396d5SLars Ellenberg 			/* rs_same_csums unit is BM_BLOCK_SIZE */
1146b30ab791SAndreas Gruenbacher 			device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
11476780139cSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
1148b411b363SPhilipp Reisner 		} else {
1149b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
1150db830c46SAndreas Gruenbacher 			peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1151db830c46SAndreas Gruenbacher 			peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
1152204bba99SPhilipp Reisner 			kfree(di);
11536780139cSAndreas Gruenbacher 			err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1154b411b363SPhilipp Reisner 		}
1155b411b363SPhilipp Reisner 	} else {
11566780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1157b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1158d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
1159b411b363SPhilipp Reisner 	}
1160b411b363SPhilipp Reisner 
1161b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1162b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1163b411b363SPhilipp Reisner 
116499920dc5SAndreas Gruenbacher 	if (unlikely(err))
1165d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block/ack() failed\n");
116699920dc5SAndreas Gruenbacher 	return err;
1167b411b363SPhilipp Reisner }
1168b411b363SPhilipp Reisner 
116999920dc5SAndreas Gruenbacher int w_e_end_ov_req(struct drbd_work *w, int cancel)
1170b411b363SPhilipp Reisner {
1171a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
11726780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
11736780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1174db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1175db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
1176b411b363SPhilipp Reisner 	int digest_size;
1177b411b363SPhilipp Reisner 	void *digest;
117899920dc5SAndreas Gruenbacher 	int err = 0;
1179b411b363SPhilipp Reisner 
1180b411b363SPhilipp Reisner 	if (unlikely(cancel))
1181b411b363SPhilipp Reisner 		goto out;
1182b411b363SPhilipp Reisner 
11836780139cSAndreas Gruenbacher 	digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
1184b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
11858f21420eSPhilipp Reisner 	if (!digest) {
118699920dc5SAndreas Gruenbacher 		err = 1;	/* terminate the connection in case the allocation failed */
11878f21420eSPhilipp Reisner 		goto out;
11888f21420eSPhilipp Reisner 	}
11898f21420eSPhilipp Reisner 
1190db830c46SAndreas Gruenbacher 	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
11916780139cSAndreas Gruenbacher 		drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
11928f21420eSPhilipp Reisner 	else
11938f21420eSPhilipp Reisner 		memset(digest, 0, digest_size);
11948f21420eSPhilipp Reisner 
119553ea4331SLars Ellenberg 	/* Free e and pages before send.
119653ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
119753ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
119853ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1199c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1200b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
1201db830c46SAndreas Gruenbacher 	peer_req = NULL;
1202b30ab791SAndreas Gruenbacher 	inc_rs_pending(device);
12036780139cSAndreas Gruenbacher 	err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
120499920dc5SAndreas Gruenbacher 	if (err)
1205b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
1206b411b363SPhilipp Reisner 	kfree(digest);
1207b411b363SPhilipp Reisner 
1208b411b363SPhilipp Reisner out:
1209db830c46SAndreas Gruenbacher 	if (peer_req)
1210b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1211b30ab791SAndreas Gruenbacher 	dec_unacked(device);
121299920dc5SAndreas Gruenbacher 	return err;
1213b411b363SPhilipp Reisner }
1214b411b363SPhilipp Reisner 
1215b30ab791SAndreas Gruenbacher void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
1216b411b363SPhilipp Reisner {
1217b30ab791SAndreas Gruenbacher 	if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1218b30ab791SAndreas Gruenbacher 		device->ov_last_oos_size += size>>9;
1219b411b363SPhilipp Reisner 	} else {
1220b30ab791SAndreas Gruenbacher 		device->ov_last_oos_start = sector;
1221b30ab791SAndreas Gruenbacher 		device->ov_last_oos_size = size>>9;
1222b411b363SPhilipp Reisner 	}
1223b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, sector, size);
1224b411b363SPhilipp Reisner }
1225b411b363SPhilipp Reisner 
122699920dc5SAndreas Gruenbacher int w_e_end_ov_reply(struct drbd_work *w, int cancel)
1227b411b363SPhilipp Reisner {
1228a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
12296780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
12306780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1231b411b363SPhilipp Reisner 	struct digest_info *di;
1232b411b363SPhilipp Reisner 	void *digest;
1233db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1234db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
123553ea4331SLars Ellenberg 	int digest_size;
123699920dc5SAndreas Gruenbacher 	int err, eq = 0;
123758ffa580SLars Ellenberg 	bool stop_sector_reached = false;
1238b411b363SPhilipp Reisner 
1239b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1240b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1241b30ab791SAndreas Gruenbacher 		dec_unacked(device);
124299920dc5SAndreas Gruenbacher 		return 0;
1243b411b363SPhilipp Reisner 	}
1244b411b363SPhilipp Reisner 
1245b411b363SPhilipp Reisner 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1246b411b363SPhilipp Reisner 	 * the resync lru has been cleaned up already */
1247b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1248b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1249b30ab791SAndreas Gruenbacher 		put_ldev(device);
12501d53f09eSLars Ellenberg 	}
1251b411b363SPhilipp Reisner 
1252db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1253b411b363SPhilipp Reisner 
1254db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
12556780139cSAndreas Gruenbacher 		digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
1256b411b363SPhilipp Reisner 		digest = kmalloc(digest_size, GFP_NOIO);
1257b411b363SPhilipp Reisner 		if (digest) {
12586780139cSAndreas Gruenbacher 			drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
1259b411b363SPhilipp Reisner 
12600b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, digest_size == di->digest_size);
1261b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1262b411b363SPhilipp Reisner 			kfree(digest);
1263b411b363SPhilipp Reisner 		}
1264b411b363SPhilipp Reisner 	}
1265b411b363SPhilipp Reisner 
12669676c760SLars Ellenberg 	/* Free peer_req and pages before send.
126753ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
126853ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
126953ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1270c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1271b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
1272b411b363SPhilipp Reisner 	if (!eq)
1273b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
1274b411b363SPhilipp Reisner 	else
1275b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
1276b411b363SPhilipp Reisner 
12776780139cSAndreas Gruenbacher 	err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
1278b411b363SPhilipp Reisner 			       eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
1279b411b363SPhilipp Reisner 
1280b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1281b411b363SPhilipp Reisner 
1282b30ab791SAndreas Gruenbacher 	--device->ov_left;
1283ea5442afSLars Ellenberg 
1284ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
1285b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
1286b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
1287ea5442afSLars Ellenberg 
1288b30ab791SAndreas Gruenbacher 	stop_sector_reached = verify_can_do_stop_sector(device) &&
1289b30ab791SAndreas Gruenbacher 		(sector + (size>>9)) >= device->ov_stop_sector;
129058ffa580SLars Ellenberg 
1291b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0 || stop_sector_reached) {
1292b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
1293b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
1294b411b363SPhilipp Reisner 	}
1295b411b363SPhilipp Reisner 
129699920dc5SAndreas Gruenbacher 	return err;
1297b411b363SPhilipp Reisner }
1298b411b363SPhilipp Reisner 
1299b6dd1a89SLars Ellenberg /* FIXME
1300b6dd1a89SLars Ellenberg  * We need to track the number of pending barrier acks,
1301b6dd1a89SLars Ellenberg  * and to be able to wait for them.
1302b6dd1a89SLars Ellenberg  * See also comment in drbd_adm_attach before drbd_suspend_io.
1303b6dd1a89SLars Ellenberg  */
1304bde89a9eSAndreas Gruenbacher static int drbd_send_barrier(struct drbd_connection *connection)
1305b411b363SPhilipp Reisner {
13069f5bdc33SAndreas Gruenbacher 	struct p_barrier *p;
1307b6dd1a89SLars Ellenberg 	struct drbd_socket *sock;
1308b411b363SPhilipp Reisner 
1309bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
1310bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
13119f5bdc33SAndreas Gruenbacher 	if (!p)
13129f5bdc33SAndreas Gruenbacher 		return -EIO;
1313bde89a9eSAndreas Gruenbacher 	p->barrier = connection->send.current_epoch_nr;
1314b6dd1a89SLars Ellenberg 	p->pad = 0;
1315bde89a9eSAndreas Gruenbacher 	connection->send.current_epoch_writes = 0;
1316b6dd1a89SLars Ellenberg 
1317bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
1318b411b363SPhilipp Reisner }
1319b411b363SPhilipp Reisner 
132099920dc5SAndreas Gruenbacher int w_send_write_hint(struct drbd_work *w, int cancel)
1321b411b363SPhilipp Reisner {
132284b8c06bSAndreas Gruenbacher 	struct drbd_device *device =
132384b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device, unplug_work);
13249f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
13259f5bdc33SAndreas Gruenbacher 
1326b411b363SPhilipp Reisner 	if (cancel)
132799920dc5SAndreas Gruenbacher 		return 0;
1328a6b32bc3SAndreas Gruenbacher 	sock = &first_peer_device(device)->connection->data;
132969a22773SAndreas Gruenbacher 	if (!drbd_prepare_command(first_peer_device(device), sock))
13309f5bdc33SAndreas Gruenbacher 		return -EIO;
133169a22773SAndreas Gruenbacher 	return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
1332b411b363SPhilipp Reisner }
1333b411b363SPhilipp Reisner 
1334bde89a9eSAndreas Gruenbacher static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
13354eb9b3cbSLars Ellenberg {
1336bde89a9eSAndreas Gruenbacher 	if (!connection->send.seen_any_write_yet) {
1337bde89a9eSAndreas Gruenbacher 		connection->send.seen_any_write_yet = true;
1338bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_nr = epoch;
1339bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_writes = 0;
13404eb9b3cbSLars Ellenberg 	}
13414eb9b3cbSLars Ellenberg }
13424eb9b3cbSLars Ellenberg 
1343bde89a9eSAndreas Gruenbacher static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
13444eb9b3cbSLars Ellenberg {
13454eb9b3cbSLars Ellenberg 	/* re-init if first write on this connection */
1346bde89a9eSAndreas Gruenbacher 	if (!connection->send.seen_any_write_yet)
13474eb9b3cbSLars Ellenberg 		return;
1348bde89a9eSAndreas Gruenbacher 	if (connection->send.current_epoch_nr != epoch) {
1349bde89a9eSAndreas Gruenbacher 		if (connection->send.current_epoch_writes)
1350bde89a9eSAndreas Gruenbacher 			drbd_send_barrier(connection);
1351bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_nr = epoch;
13524eb9b3cbSLars Ellenberg 	}
13534eb9b3cbSLars Ellenberg }
13544eb9b3cbSLars Ellenberg 
13558f7bed77SAndreas Gruenbacher int w_send_out_of_sync(struct drbd_work *w, int cancel)
135673a01a18SPhilipp Reisner {
135773a01a18SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
135884b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
135944a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
136044a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device->connection;
136199920dc5SAndreas Gruenbacher 	int err;
136273a01a18SPhilipp Reisner 
136373a01a18SPhilipp Reisner 	if (unlikely(cancel)) {
13648554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
136599920dc5SAndreas Gruenbacher 		return 0;
136673a01a18SPhilipp Reisner 	}
1367e5f891b2SLars Ellenberg 	req->pre_send_jif = jiffies;
136873a01a18SPhilipp Reisner 
1369bde89a9eSAndreas Gruenbacher 	/* this time, no connection->send.current_epoch_writes++;
1370b6dd1a89SLars Ellenberg 	 * If it was sent, it was the closing barrier for the last
1371b6dd1a89SLars Ellenberg 	 * replicated epoch, before we went into AHEAD mode.
1372b6dd1a89SLars Ellenberg 	 * No more barriers will be sent, until we leave AHEAD mode again. */
1373bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1374b6dd1a89SLars Ellenberg 
137544a4d551SLars Ellenberg 	err = drbd_send_out_of_sync(peer_device, req);
13768554df1cSAndreas Gruenbacher 	req_mod(req, OOS_HANDED_TO_NETWORK);
137773a01a18SPhilipp Reisner 
137899920dc5SAndreas Gruenbacher 	return err;
137973a01a18SPhilipp Reisner }
138073a01a18SPhilipp Reisner 
1381b411b363SPhilipp Reisner /**
1382b411b363SPhilipp Reisner  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1383b411b363SPhilipp Reisner  * @w:		work object.
1384b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1385b411b363SPhilipp Reisner  */
138699920dc5SAndreas Gruenbacher int w_send_dblock(struct drbd_work *w, int cancel)
1387b411b363SPhilipp Reisner {
1388b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
138984b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
139044a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
139144a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device->connection;
139299920dc5SAndreas Gruenbacher 	int err;
1393b411b363SPhilipp Reisner 
1394b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
13958554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
139699920dc5SAndreas Gruenbacher 		return 0;
1397b411b363SPhilipp Reisner 	}
1398e5f891b2SLars Ellenberg 	req->pre_send_jif = jiffies;
1399b411b363SPhilipp Reisner 
1400bde89a9eSAndreas Gruenbacher 	re_init_if_first_write(connection, req->epoch);
1401bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1402bde89a9eSAndreas Gruenbacher 	connection->send.current_epoch_writes++;
1403b6dd1a89SLars Ellenberg 
140444a4d551SLars Ellenberg 	err = drbd_send_dblock(peer_device, req);
140599920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1406b411b363SPhilipp Reisner 
140799920dc5SAndreas Gruenbacher 	return err;
1408b411b363SPhilipp Reisner }
1409b411b363SPhilipp Reisner 
1410b411b363SPhilipp Reisner /**
1411b411b363SPhilipp Reisner  * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1412b411b363SPhilipp Reisner  * @w:		work object.
1413b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1414b411b363SPhilipp Reisner  */
141599920dc5SAndreas Gruenbacher int w_send_read_req(struct drbd_work *w, int cancel)
1416b411b363SPhilipp Reisner {
1417b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
141884b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
141944a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
142044a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device->connection;
142199920dc5SAndreas Gruenbacher 	int err;
1422b411b363SPhilipp Reisner 
1423b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
14248554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
142599920dc5SAndreas Gruenbacher 		return 0;
1426b411b363SPhilipp Reisner 	}
1427e5f891b2SLars Ellenberg 	req->pre_send_jif = jiffies;
1428b411b363SPhilipp Reisner 
1429b6dd1a89SLars Ellenberg 	/* Even read requests may close a write epoch,
1430b6dd1a89SLars Ellenberg 	 * if there was any yet. */
1431bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1432b6dd1a89SLars Ellenberg 
143344a4d551SLars Ellenberg 	err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
1434b411b363SPhilipp Reisner 				 (unsigned long)req);
1435b411b363SPhilipp Reisner 
143699920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1437b411b363SPhilipp Reisner 
143899920dc5SAndreas Gruenbacher 	return err;
1439b411b363SPhilipp Reisner }
1440b411b363SPhilipp Reisner 
144199920dc5SAndreas Gruenbacher int w_restart_disk_io(struct drbd_work *w, int cancel)
1442265be2d0SPhilipp Reisner {
1443265be2d0SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
144484b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
1445265be2d0SPhilipp Reisner 
14460778286aSPhilipp Reisner 	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
14474dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &req->i);
1448265be2d0SPhilipp Reisner 
1449265be2d0SPhilipp Reisner 	drbd_req_make_private_bio(req, req->master_bio);
1450b30ab791SAndreas Gruenbacher 	req->private_bio->bi_bdev = device->ldev->backing_bdev;
1451265be2d0SPhilipp Reisner 	generic_make_request(req->private_bio);
1452265be2d0SPhilipp Reisner 
145399920dc5SAndreas Gruenbacher 	return 0;
1454265be2d0SPhilipp Reisner }
1455265be2d0SPhilipp Reisner 
1456b30ab791SAndreas Gruenbacher static int _drbd_may_sync_now(struct drbd_device *device)
1457b411b363SPhilipp Reisner {
1458b30ab791SAndreas Gruenbacher 	struct drbd_device *odev = device;
145995f8efd0SAndreas Gruenbacher 	int resync_after;
1460b411b363SPhilipp Reisner 
1461b411b363SPhilipp Reisner 	while (1) {
1462a3f8f7dcSLars Ellenberg 		if (!odev->ldev || odev->state.disk == D_DISKLESS)
1463438c8374SPhilipp Reisner 			return 1;
1464daeda1ccSPhilipp Reisner 		rcu_read_lock();
146595f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1466daeda1ccSPhilipp Reisner 		rcu_read_unlock();
146795f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1468b411b363SPhilipp Reisner 			return 1;
1469b30ab791SAndreas Gruenbacher 		odev = minor_to_device(resync_after);
1470a3f8f7dcSLars Ellenberg 		if (!odev)
1471841ce241SAndreas Gruenbacher 			return 1;
1472b411b363SPhilipp Reisner 		if ((odev->state.conn >= C_SYNC_SOURCE &&
1473b411b363SPhilipp Reisner 		     odev->state.conn <= C_PAUSED_SYNC_T) ||
1474b411b363SPhilipp Reisner 		    odev->state.aftr_isp || odev->state.peer_isp ||
1475b411b363SPhilipp Reisner 		    odev->state.user_isp)
1476b411b363SPhilipp Reisner 			return 0;
1477b411b363SPhilipp Reisner 	}
1478b411b363SPhilipp Reisner }
1479b411b363SPhilipp Reisner 
1480b411b363SPhilipp Reisner /**
1481b411b363SPhilipp Reisner  * _drbd_pause_after() - Pause resync on all devices that may not resync now
1482b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1483b411b363SPhilipp Reisner  *
1484b411b363SPhilipp Reisner  * Called from process context only (admin command and after_state_ch).
1485b411b363SPhilipp Reisner  */
1486b30ab791SAndreas Gruenbacher static int _drbd_pause_after(struct drbd_device *device)
1487b411b363SPhilipp Reisner {
148854761697SAndreas Gruenbacher 	struct drbd_device *odev;
1489b411b363SPhilipp Reisner 	int i, rv = 0;
1490b411b363SPhilipp Reisner 
1491695d08faSPhilipp Reisner 	rcu_read_lock();
149205a10ec7SAndreas Gruenbacher 	idr_for_each_entry(&drbd_devices, odev, i) {
1493b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1494b411b363SPhilipp Reisner 			continue;
1495b411b363SPhilipp Reisner 		if (!_drbd_may_sync_now(odev))
1496b411b363SPhilipp Reisner 			rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1497b411b363SPhilipp Reisner 			       != SS_NOTHING_TO_DO);
1498b411b363SPhilipp Reisner 	}
1499695d08faSPhilipp Reisner 	rcu_read_unlock();
1500b411b363SPhilipp Reisner 
1501b411b363SPhilipp Reisner 	return rv;
1502b411b363SPhilipp Reisner }
1503b411b363SPhilipp Reisner 
1504b411b363SPhilipp Reisner /**
1505b411b363SPhilipp Reisner  * _drbd_resume_next() - Resume resync on all devices that may resync now
1506b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1507b411b363SPhilipp Reisner  *
1508b411b363SPhilipp Reisner  * Called from process context only (admin command and worker).
1509b411b363SPhilipp Reisner  */
1510b30ab791SAndreas Gruenbacher static int _drbd_resume_next(struct drbd_device *device)
1511b411b363SPhilipp Reisner {
151254761697SAndreas Gruenbacher 	struct drbd_device *odev;
1513b411b363SPhilipp Reisner 	int i, rv = 0;
1514b411b363SPhilipp Reisner 
1515695d08faSPhilipp Reisner 	rcu_read_lock();
151605a10ec7SAndreas Gruenbacher 	idr_for_each_entry(&drbd_devices, odev, i) {
1517b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1518b411b363SPhilipp Reisner 			continue;
1519b411b363SPhilipp Reisner 		if (odev->state.aftr_isp) {
1520b411b363SPhilipp Reisner 			if (_drbd_may_sync_now(odev))
1521b411b363SPhilipp Reisner 				rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1522b411b363SPhilipp Reisner 							CS_HARD, NULL)
1523b411b363SPhilipp Reisner 				       != SS_NOTHING_TO_DO) ;
1524b411b363SPhilipp Reisner 		}
1525b411b363SPhilipp Reisner 	}
1526695d08faSPhilipp Reisner 	rcu_read_unlock();
1527b411b363SPhilipp Reisner 	return rv;
1528b411b363SPhilipp Reisner }
1529b411b363SPhilipp Reisner 
1530b30ab791SAndreas Gruenbacher void resume_next_sg(struct drbd_device *device)
1531b411b363SPhilipp Reisner {
1532b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1533b30ab791SAndreas Gruenbacher 	_drbd_resume_next(device);
1534b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1535b411b363SPhilipp Reisner }
1536b411b363SPhilipp Reisner 
1537b30ab791SAndreas Gruenbacher void suspend_other_sg(struct drbd_device *device)
1538b411b363SPhilipp Reisner {
1539b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1540b30ab791SAndreas Gruenbacher 	_drbd_pause_after(device);
1541b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1542b411b363SPhilipp Reisner }
1543b411b363SPhilipp Reisner 
1544dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
1545b30ab791SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
1546b411b363SPhilipp Reisner {
154754761697SAndreas Gruenbacher 	struct drbd_device *odev;
154895f8efd0SAndreas Gruenbacher 	int resync_after;
1549b411b363SPhilipp Reisner 
1550b411b363SPhilipp Reisner 	if (o_minor == -1)
1551b411b363SPhilipp Reisner 		return NO_ERROR;
1552a3f8f7dcSLars Ellenberg 	if (o_minor < -1 || o_minor > MINORMASK)
155395f8efd0SAndreas Gruenbacher 		return ERR_RESYNC_AFTER;
1554b411b363SPhilipp Reisner 
1555b411b363SPhilipp Reisner 	/* check for loops */
1556b30ab791SAndreas Gruenbacher 	odev = minor_to_device(o_minor);
1557b411b363SPhilipp Reisner 	while (1) {
1558b30ab791SAndreas Gruenbacher 		if (odev == device)
155995f8efd0SAndreas Gruenbacher 			return ERR_RESYNC_AFTER_CYCLE;
1560b411b363SPhilipp Reisner 
1561a3f8f7dcSLars Ellenberg 		/* You are free to depend on diskless, non-existing,
1562a3f8f7dcSLars Ellenberg 		 * or not yet/no longer existing minors.
1563a3f8f7dcSLars Ellenberg 		 * We only reject dependency loops.
1564a3f8f7dcSLars Ellenberg 		 * We cannot follow the dependency chain beyond a detached or
1565a3f8f7dcSLars Ellenberg 		 * missing minor.
1566a3f8f7dcSLars Ellenberg 		 */
1567a3f8f7dcSLars Ellenberg 		if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1568a3f8f7dcSLars Ellenberg 			return NO_ERROR;
1569a3f8f7dcSLars Ellenberg 
1570daeda1ccSPhilipp Reisner 		rcu_read_lock();
157195f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1572daeda1ccSPhilipp Reisner 		rcu_read_unlock();
1573b411b363SPhilipp Reisner 		/* dependency chain ends here, no cycles. */
157495f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1575b411b363SPhilipp Reisner 			return NO_ERROR;
1576b411b363SPhilipp Reisner 
1577b411b363SPhilipp Reisner 		/* follow the dependency chain */
1578b30ab791SAndreas Gruenbacher 		odev = minor_to_device(resync_after);
1579b411b363SPhilipp Reisner 	}
1580b411b363SPhilipp Reisner }
1581b411b363SPhilipp Reisner 
1582dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
1583b30ab791SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_device *device)
1584b411b363SPhilipp Reisner {
1585b411b363SPhilipp Reisner 	int changes;
1586b411b363SPhilipp Reisner 
1587b411b363SPhilipp Reisner 	do {
1588b30ab791SAndreas Gruenbacher 		changes  = _drbd_pause_after(device);
1589b30ab791SAndreas Gruenbacher 		changes |= _drbd_resume_next(device);
1590b411b363SPhilipp Reisner 	} while (changes);
1591b411b363SPhilipp Reisner }
1592b411b363SPhilipp Reisner 
1593b30ab791SAndreas Gruenbacher void drbd_rs_controller_reset(struct drbd_device *device)
15949bd28d3cSLars Ellenberg {
1595813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
1596813472ceSPhilipp Reisner 
1597b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_sect_in, 0);
1598b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_sect_ev, 0);
1599b30ab791SAndreas Gruenbacher 	device->rs_in_flight = 0;
1600813472ceSPhilipp Reisner 
1601813472ceSPhilipp Reisner 	/* Updating the RCU protected object in place is necessary since
1602813472ceSPhilipp Reisner 	   this function gets called from atomic context.
1603813472ceSPhilipp Reisner 	   It is valid since all other updates also lead to an completely
1604813472ceSPhilipp Reisner 	   empty fifo */
1605813472ceSPhilipp Reisner 	rcu_read_lock();
1606b30ab791SAndreas Gruenbacher 	plan = rcu_dereference(device->rs_plan_s);
1607813472ceSPhilipp Reisner 	plan->total = 0;
1608813472ceSPhilipp Reisner 	fifo_set(plan, 0);
1609813472ceSPhilipp Reisner 	rcu_read_unlock();
16109bd28d3cSLars Ellenberg }
16119bd28d3cSLars Ellenberg 
16121f04af33SPhilipp Reisner void start_resync_timer_fn(unsigned long data)
16131f04af33SPhilipp Reisner {
1614b30ab791SAndreas Gruenbacher 	struct drbd_device *device = (struct drbd_device *) data;
1615ac0acb9eSLars Ellenberg 	drbd_device_post_work(device, RS_START);
16161f04af33SPhilipp Reisner }
16171f04af33SPhilipp Reisner 
1618ac0acb9eSLars Ellenberg static void do_start_resync(struct drbd_device *device)
16191f04af33SPhilipp Reisner {
1620b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
1621ac0acb9eSLars Ellenberg 		drbd_warn(device, "postponing start_resync ...\n");
1622b30ab791SAndreas Gruenbacher 		device->start_resync_timer.expires = jiffies + HZ/10;
1623b30ab791SAndreas Gruenbacher 		add_timer(&device->start_resync_timer);
1624ac0acb9eSLars Ellenberg 		return;
16251f04af33SPhilipp Reisner 	}
16261f04af33SPhilipp Reisner 
1627b30ab791SAndreas Gruenbacher 	drbd_start_resync(device, C_SYNC_SOURCE);
1628b30ab791SAndreas Gruenbacher 	clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
16291f04af33SPhilipp Reisner }
16301f04af33SPhilipp Reisner 
1631aaaba345SLars Ellenberg static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1632aaaba345SLars Ellenberg {
1633aaaba345SLars Ellenberg 	bool csums_after_crash_only;
1634aaaba345SLars Ellenberg 	rcu_read_lock();
1635aaaba345SLars Ellenberg 	csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1636aaaba345SLars Ellenberg 	rcu_read_unlock();
1637aaaba345SLars Ellenberg 	return connection->agreed_pro_version >= 89 &&		/* supported? */
1638aaaba345SLars Ellenberg 		connection->csums_tfm &&			/* configured? */
1639aaaba345SLars Ellenberg 		(csums_after_crash_only == 0			/* use for each resync? */
1640aaaba345SLars Ellenberg 		 || test_bit(CRASHED_PRIMARY, &device->flags));	/* or only after Primary crash? */
1641aaaba345SLars Ellenberg }
1642aaaba345SLars Ellenberg 
1643b411b363SPhilipp Reisner /**
1644b411b363SPhilipp Reisner  * drbd_start_resync() - Start the resync process
1645b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1646b411b363SPhilipp Reisner  * @side:	Either C_SYNC_SOURCE or C_SYNC_TARGET
1647b411b363SPhilipp Reisner  *
1648b411b363SPhilipp Reisner  * This function might bring you directly into one of the
1649b411b363SPhilipp Reisner  * C_PAUSED_SYNC_* states.
1650b411b363SPhilipp Reisner  */
1651b30ab791SAndreas Gruenbacher void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1652b411b363SPhilipp Reisner {
165344a4d551SLars Ellenberg 	struct drbd_peer_device *peer_device = first_peer_device(device);
165444a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1655b411b363SPhilipp Reisner 	union drbd_state ns;
1656b411b363SPhilipp Reisner 	int r;
1657b411b363SPhilipp Reisner 
1658b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
1659d0180171SAndreas Gruenbacher 		drbd_err(device, "Resync already running!\n");
1660b411b363SPhilipp Reisner 		return;
1661b411b363SPhilipp Reisner 	}
1662b411b363SPhilipp Reisner 
1663b30ab791SAndreas Gruenbacher 	if (!test_bit(B_RS_H_DONE, &device->flags)) {
1664b411b363SPhilipp Reisner 		if (side == C_SYNC_TARGET) {
1665b411b363SPhilipp Reisner 			/* Since application IO was locked out during C_WF_BITMAP_T and
1666b411b363SPhilipp Reisner 			   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1667b411b363SPhilipp Reisner 			   we check that we might make the data inconsistent. */
1668b30ab791SAndreas Gruenbacher 			r = drbd_khelper(device, "before-resync-target");
1669b411b363SPhilipp Reisner 			r = (r >> 8) & 0xff;
1670b411b363SPhilipp Reisner 			if (r > 0) {
1671d0180171SAndreas Gruenbacher 				drbd_info(device, "before-resync-target handler returned %d, "
1672b411b363SPhilipp Reisner 					 "dropping connection.\n", r);
167344a4d551SLars Ellenberg 				conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
1674b411b363SPhilipp Reisner 				return;
1675b411b363SPhilipp Reisner 			}
167609b9e797SPhilipp Reisner 		} else /* C_SYNC_SOURCE */ {
1677b30ab791SAndreas Gruenbacher 			r = drbd_khelper(device, "before-resync-source");
167809b9e797SPhilipp Reisner 			r = (r >> 8) & 0xff;
167909b9e797SPhilipp Reisner 			if (r > 0) {
168009b9e797SPhilipp Reisner 				if (r == 3) {
1681d0180171SAndreas Gruenbacher 					drbd_info(device, "before-resync-source handler returned %d, "
168209b9e797SPhilipp Reisner 						 "ignoring. Old userland tools?", r);
168309b9e797SPhilipp Reisner 				} else {
1684d0180171SAndreas Gruenbacher 					drbd_info(device, "before-resync-source handler returned %d, "
168509b9e797SPhilipp Reisner 						 "dropping connection.\n", r);
168644a4d551SLars Ellenberg 					conn_request_state(connection,
1687a6b32bc3SAndreas Gruenbacher 							   NS(conn, C_DISCONNECTING), CS_HARD);
168809b9e797SPhilipp Reisner 					return;
168909b9e797SPhilipp Reisner 				}
169009b9e797SPhilipp Reisner 			}
1691b411b363SPhilipp Reisner 		}
1692e64a3294SPhilipp Reisner 	}
1693b411b363SPhilipp Reisner 
169444a4d551SLars Ellenberg 	if (current == connection->worker.task) {
1695dad20554SPhilipp Reisner 		/* The worker should not sleep waiting for state_mutex,
1696e64a3294SPhilipp Reisner 		   that can take long */
1697b30ab791SAndreas Gruenbacher 		if (!mutex_trylock(device->state_mutex)) {
1698b30ab791SAndreas Gruenbacher 			set_bit(B_RS_H_DONE, &device->flags);
1699b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ/5;
1700b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
1701e64a3294SPhilipp Reisner 			return;
1702e64a3294SPhilipp Reisner 		}
1703e64a3294SPhilipp Reisner 	} else {
1704b30ab791SAndreas Gruenbacher 		mutex_lock(device->state_mutex);
1705e64a3294SPhilipp Reisner 	}
1706b30ab791SAndreas Gruenbacher 	clear_bit(B_RS_H_DONE, &device->flags);
1707b411b363SPhilipp Reisner 
1708074f4afeSLars Ellenberg 	/* req_lock: serialize with drbd_send_and_submit() and others
1709074f4afeSLars Ellenberg 	 * global_state_lock: for stable sync-after dependencies */
1710074f4afeSLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
1711074f4afeSLars Ellenberg 	write_lock(&global_state_lock);
1712a700471bSPhilipp Reisner 	/* Did some connection breakage or IO error race with us? */
1713b30ab791SAndreas Gruenbacher 	if (device->state.conn < C_CONNECTED
1714b30ab791SAndreas Gruenbacher 	|| !get_ldev_if_state(device, D_NEGOTIATING)) {
1715074f4afeSLars Ellenberg 		write_unlock(&global_state_lock);
1716074f4afeSLars Ellenberg 		spin_unlock_irq(&device->resource->req_lock);
1717b30ab791SAndreas Gruenbacher 		mutex_unlock(device->state_mutex);
1718b411b363SPhilipp Reisner 		return;
1719b411b363SPhilipp Reisner 	}
1720b411b363SPhilipp Reisner 
1721b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
1722b411b363SPhilipp Reisner 
1723b30ab791SAndreas Gruenbacher 	ns.aftr_isp = !_drbd_may_sync_now(device);
1724b411b363SPhilipp Reisner 
1725b411b363SPhilipp Reisner 	ns.conn = side;
1726b411b363SPhilipp Reisner 
1727b411b363SPhilipp Reisner 	if (side == C_SYNC_TARGET)
1728b411b363SPhilipp Reisner 		ns.disk = D_INCONSISTENT;
1729b411b363SPhilipp Reisner 	else /* side == C_SYNC_SOURCE */
1730b411b363SPhilipp Reisner 		ns.pdsk = D_INCONSISTENT;
1731b411b363SPhilipp Reisner 
1732b30ab791SAndreas Gruenbacher 	r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1733b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
1734b411b363SPhilipp Reisner 
1735b411b363SPhilipp Reisner 	if (ns.conn < C_CONNECTED)
1736b411b363SPhilipp Reisner 		r = SS_UNKNOWN_ERROR;
1737b411b363SPhilipp Reisner 
1738b411b363SPhilipp Reisner 	if (r == SS_SUCCESS) {
1739b30ab791SAndreas Gruenbacher 		unsigned long tw = drbd_bm_total_weight(device);
17401d7734a0SLars Ellenberg 		unsigned long now = jiffies;
17411d7734a0SLars Ellenberg 		int i;
17421d7734a0SLars Ellenberg 
1743b30ab791SAndreas Gruenbacher 		device->rs_failed    = 0;
1744b30ab791SAndreas Gruenbacher 		device->rs_paused    = 0;
1745b30ab791SAndreas Gruenbacher 		device->rs_same_csum = 0;
1746b30ab791SAndreas Gruenbacher 		device->rs_last_events = 0;
1747b30ab791SAndreas Gruenbacher 		device->rs_last_sect_ev = 0;
1748b30ab791SAndreas Gruenbacher 		device->rs_total     = tw;
1749b30ab791SAndreas Gruenbacher 		device->rs_start     = now;
17501d7734a0SLars Ellenberg 		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1751b30ab791SAndreas Gruenbacher 			device->rs_mark_left[i] = tw;
1752b30ab791SAndreas Gruenbacher 			device->rs_mark_time[i] = now;
17531d7734a0SLars Ellenberg 		}
1754b30ab791SAndreas Gruenbacher 		_drbd_pause_after(device);
17555ab7d2c0SLars Ellenberg 		/* Forget potentially stale cached per resync extent bit-counts.
17565ab7d2c0SLars Ellenberg 		 * Open coded drbd_rs_cancel_all(device), we already have IRQs
17575ab7d2c0SLars Ellenberg 		 * disabled, and know the disk state is ok. */
17585ab7d2c0SLars Ellenberg 		spin_lock(&device->al_lock);
17595ab7d2c0SLars Ellenberg 		lc_reset(device->resync);
17605ab7d2c0SLars Ellenberg 		device->resync_locked = 0;
17615ab7d2c0SLars Ellenberg 		device->resync_wenr = LC_FREE;
17625ab7d2c0SLars Ellenberg 		spin_unlock(&device->al_lock);
1763b411b363SPhilipp Reisner 	}
1764074f4afeSLars Ellenberg 	write_unlock(&global_state_lock);
1765074f4afeSLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
17665a22db89SLars Ellenberg 
17676c922ed5SLars Ellenberg 	if (r == SS_SUCCESS) {
17685ab7d2c0SLars Ellenberg 		wake_up(&device->al_wait); /* for lc_reset() above */
1769328e0f12SPhilipp Reisner 		/* reset rs_last_bcast when a resync or verify is started,
1770328e0f12SPhilipp Reisner 		 * to deal with potential jiffies wrap. */
1771b30ab791SAndreas Gruenbacher 		device->rs_last_bcast = jiffies - HZ;
1772328e0f12SPhilipp Reisner 
1773d0180171SAndreas Gruenbacher 		drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
17746c922ed5SLars Ellenberg 		     drbd_conn_str(ns.conn),
1775b30ab791SAndreas Gruenbacher 		     (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1776b30ab791SAndreas Gruenbacher 		     (unsigned long) device->rs_total);
1777aaaba345SLars Ellenberg 		if (side == C_SYNC_TARGET) {
1778b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = 0;
1779aaaba345SLars Ellenberg 			device->use_csums = use_checksum_based_resync(connection, device);
1780aaaba345SLars Ellenberg 		} else {
1781aaaba345SLars Ellenberg 			device->use_csums = 0;
1782aaaba345SLars Ellenberg 		}
17835a22db89SLars Ellenberg 
17845a22db89SLars Ellenberg 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
17855a22db89SLars Ellenberg 		 * with w_send_oos, or the sync target will get confused as to
17865a22db89SLars Ellenberg 		 * how much bits to resync.  We cannot do that always, because for an
17875a22db89SLars Ellenberg 		 * empty resync and protocol < 95, we need to do it here, as we call
17885a22db89SLars Ellenberg 		 * drbd_resync_finished from here in that case.
17895a22db89SLars Ellenberg 		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
17905a22db89SLars Ellenberg 		 * and from after_state_ch otherwise. */
179144a4d551SLars Ellenberg 		if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
179244a4d551SLars Ellenberg 			drbd_gen_and_send_sync_uuid(peer_device);
1793b411b363SPhilipp Reisner 
179444a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
1795af85e8e8SLars Ellenberg 			/* This still has a race (about when exactly the peers
1796af85e8e8SLars Ellenberg 			 * detect connection loss) that can lead to a full sync
1797af85e8e8SLars Ellenberg 			 * on next handshake. In 8.3.9 we fixed this with explicit
1798af85e8e8SLars Ellenberg 			 * resync-finished notifications, but the fix
1799af85e8e8SLars Ellenberg 			 * introduces a protocol change.  Sleeping for some
1800af85e8e8SLars Ellenberg 			 * time longer than the ping interval + timeout on the
1801af85e8e8SLars Ellenberg 			 * SyncSource, to give the SyncTarget the chance to
1802af85e8e8SLars Ellenberg 			 * detect connection loss, then waiting for a ping
1803af85e8e8SLars Ellenberg 			 * response (implicit in drbd_resync_finished) reduces
1804af85e8e8SLars Ellenberg 			 * the race considerably, but does not solve it. */
180544ed167dSPhilipp Reisner 			if (side == C_SYNC_SOURCE) {
180644ed167dSPhilipp Reisner 				struct net_conf *nc;
180744ed167dSPhilipp Reisner 				int timeo;
180844ed167dSPhilipp Reisner 
180944ed167dSPhilipp Reisner 				rcu_read_lock();
181044a4d551SLars Ellenberg 				nc = rcu_dereference(connection->net_conf);
181144ed167dSPhilipp Reisner 				timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
181244ed167dSPhilipp Reisner 				rcu_read_unlock();
181344ed167dSPhilipp Reisner 				schedule_timeout_interruptible(timeo);
181444ed167dSPhilipp Reisner 			}
1815b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
1816b411b363SPhilipp Reisner 		}
1817b411b363SPhilipp Reisner 
1818b30ab791SAndreas Gruenbacher 		drbd_rs_controller_reset(device);
1819b30ab791SAndreas Gruenbacher 		/* ns.conn may already be != device->state.conn,
1820b411b363SPhilipp Reisner 		 * we may have been paused in between, or become paused until
1821b411b363SPhilipp Reisner 		 * the timer triggers.
1822b411b363SPhilipp Reisner 		 * No matter, that is handled in resync_timer_fn() */
1823b411b363SPhilipp Reisner 		if (ns.conn == C_SYNC_TARGET)
1824b30ab791SAndreas Gruenbacher 			mod_timer(&device->resync_timer, jiffies);
1825b411b363SPhilipp Reisner 
1826b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
1827b411b363SPhilipp Reisner 	}
1828b30ab791SAndreas Gruenbacher 	put_ldev(device);
1829b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
1830b411b363SPhilipp Reisner }
1831b411b363SPhilipp Reisner 
1832e334f550SLars Ellenberg static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
1833c7a58db4SLars Ellenberg {
1834c7a58db4SLars Ellenberg 	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1835c7a58db4SLars Ellenberg 	device->rs_last_bcast = jiffies;
1836c7a58db4SLars Ellenberg 
1837c7a58db4SLars Ellenberg 	if (!get_ldev(device))
1838c7a58db4SLars Ellenberg 		return;
1839c7a58db4SLars Ellenberg 
1840c7a58db4SLars Ellenberg 	drbd_bm_write_lazy(device, 0);
18415ab7d2c0SLars Ellenberg 	if (resync_done && is_sync_state(device->state.conn))
1842c7a58db4SLars Ellenberg 		drbd_resync_finished(device);
18435ab7d2c0SLars Ellenberg 
1844c7a58db4SLars Ellenberg 	drbd_bcast_event(device, &sib);
1845c7a58db4SLars Ellenberg 	/* update timestamp, in case it took a while to write out stuff */
1846c7a58db4SLars Ellenberg 	device->rs_last_bcast = jiffies;
1847c7a58db4SLars Ellenberg 	put_ldev(device);
1848c7a58db4SLars Ellenberg }
1849c7a58db4SLars Ellenberg 
1850e334f550SLars Ellenberg static void drbd_ldev_destroy(struct drbd_device *device)
1851e334f550SLars Ellenberg {
1852e334f550SLars Ellenberg 	lc_destroy(device->resync);
1853e334f550SLars Ellenberg 	device->resync = NULL;
1854e334f550SLars Ellenberg 	lc_destroy(device->act_log);
1855e334f550SLars Ellenberg 	device->act_log = NULL;
1856e334f550SLars Ellenberg 	__no_warn(local,
1857e334f550SLars Ellenberg 		drbd_free_ldev(device->ldev);
1858e334f550SLars Ellenberg 		device->ldev = NULL;);
1859e334f550SLars Ellenberg 	clear_bit(GOING_DISKLESS, &device->flags);
1860e334f550SLars Ellenberg 	wake_up(&device->misc_wait);
1861e334f550SLars Ellenberg }
1862e334f550SLars Ellenberg 
1863e334f550SLars Ellenberg static void go_diskless(struct drbd_device *device)
1864e334f550SLars Ellenberg {
1865e334f550SLars Ellenberg 	D_ASSERT(device, device->state.disk == D_FAILED);
1866e334f550SLars Ellenberg 	/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1867e334f550SLars Ellenberg 	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1868e334f550SLars Ellenberg 	 * the protected members anymore, though, so once put_ldev reaches zero
1869e334f550SLars Ellenberg 	 * again, it will be safe to free them. */
1870e334f550SLars Ellenberg 
1871e334f550SLars Ellenberg 	/* Try to write changed bitmap pages, read errors may have just
1872e334f550SLars Ellenberg 	 * set some bits outside the area covered by the activity log.
1873e334f550SLars Ellenberg 	 *
1874e334f550SLars Ellenberg 	 * If we have an IO error during the bitmap writeout,
1875e334f550SLars Ellenberg 	 * we will want a full sync next time, just in case.
1876e334f550SLars Ellenberg 	 * (Do we want a specific meta data flag for this?)
1877e334f550SLars Ellenberg 	 *
1878e334f550SLars Ellenberg 	 * If that does not make it to stable storage either,
1879e334f550SLars Ellenberg 	 * we cannot do anything about that anymore.
1880e334f550SLars Ellenberg 	 *
1881e334f550SLars Ellenberg 	 * We still need to check if both bitmap and ldev are present, we may
1882e334f550SLars Ellenberg 	 * end up here after a failed attach, before ldev was even assigned.
1883e334f550SLars Ellenberg 	 */
1884e334f550SLars Ellenberg 	if (device->bitmap && device->ldev) {
1885e334f550SLars Ellenberg 		/* An interrupted resync or similar is allowed to recounts bits
1886e334f550SLars Ellenberg 		 * while we detach.
1887e334f550SLars Ellenberg 		 * Any modifications would not be expected anymore, though.
1888e334f550SLars Ellenberg 		 */
1889e334f550SLars Ellenberg 		if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1890e334f550SLars Ellenberg 					"detach", BM_LOCKED_TEST_ALLOWED)) {
1891e334f550SLars Ellenberg 			if (test_bit(WAS_READ_ERROR, &device->flags)) {
1892e334f550SLars Ellenberg 				drbd_md_set_flag(device, MDF_FULL_SYNC);
1893e334f550SLars Ellenberg 				drbd_md_sync(device);
1894e334f550SLars Ellenberg 			}
1895e334f550SLars Ellenberg 		}
1896e334f550SLars Ellenberg 	}
1897e334f550SLars Ellenberg 
1898e334f550SLars Ellenberg 	drbd_force_state(device, NS(disk, D_DISKLESS));
1899e334f550SLars Ellenberg }
1900e334f550SLars Ellenberg 
1901ac0acb9eSLars Ellenberg static int do_md_sync(struct drbd_device *device)
1902ac0acb9eSLars Ellenberg {
1903ac0acb9eSLars Ellenberg 	drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1904ac0acb9eSLars Ellenberg 	drbd_md_sync(device);
1905ac0acb9eSLars Ellenberg 	return 0;
1906ac0acb9eSLars Ellenberg }
1907ac0acb9eSLars Ellenberg 
1908e334f550SLars Ellenberg #define WORK_PENDING(work_bit, todo)	(todo & (1UL << work_bit))
1909e334f550SLars Ellenberg static void do_device_work(struct drbd_device *device, const unsigned long todo)
1910e334f550SLars Ellenberg {
1911ac0acb9eSLars Ellenberg 	if (WORK_PENDING(MD_SYNC, todo))
1912ac0acb9eSLars Ellenberg 		do_md_sync(device);
1913e334f550SLars Ellenberg 	if (WORK_PENDING(RS_DONE, todo) ||
1914e334f550SLars Ellenberg 	    WORK_PENDING(RS_PROGRESS, todo))
1915e334f550SLars Ellenberg 		update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo));
1916e334f550SLars Ellenberg 	if (WORK_PENDING(GO_DISKLESS, todo))
1917e334f550SLars Ellenberg 		go_diskless(device);
1918e334f550SLars Ellenberg 	if (WORK_PENDING(DESTROY_DISK, todo))
1919e334f550SLars Ellenberg 		drbd_ldev_destroy(device);
1920ac0acb9eSLars Ellenberg 	if (WORK_PENDING(RS_START, todo))
1921ac0acb9eSLars Ellenberg 		do_start_resync(device);
1922e334f550SLars Ellenberg }
1923e334f550SLars Ellenberg 
1924e334f550SLars Ellenberg #define DRBD_DEVICE_WORK_MASK	\
1925e334f550SLars Ellenberg 	((1UL << GO_DISKLESS)	\
1926e334f550SLars Ellenberg 	|(1UL << DESTROY_DISK)	\
1927ac0acb9eSLars Ellenberg 	|(1UL << MD_SYNC)	\
1928ac0acb9eSLars Ellenberg 	|(1UL << RS_START)	\
1929e334f550SLars Ellenberg 	|(1UL << RS_PROGRESS)	\
1930e334f550SLars Ellenberg 	|(1UL << RS_DONE)	\
1931e334f550SLars Ellenberg 	)
1932e334f550SLars Ellenberg 
1933e334f550SLars Ellenberg static unsigned long get_work_bits(unsigned long *flags)
1934e334f550SLars Ellenberg {
1935e334f550SLars Ellenberg 	unsigned long old, new;
1936e334f550SLars Ellenberg 	do {
1937e334f550SLars Ellenberg 		old = *flags;
1938e334f550SLars Ellenberg 		new = old & ~DRBD_DEVICE_WORK_MASK;
1939e334f550SLars Ellenberg 	} while (cmpxchg(flags, old, new) != old);
1940e334f550SLars Ellenberg 	return old & DRBD_DEVICE_WORK_MASK;
1941e334f550SLars Ellenberg }
1942e334f550SLars Ellenberg 
1943e334f550SLars Ellenberg static void do_unqueued_work(struct drbd_connection *connection)
1944c7a58db4SLars Ellenberg {
1945c7a58db4SLars Ellenberg 	struct drbd_peer_device *peer_device;
1946c7a58db4SLars Ellenberg 	int vnr;
1947c7a58db4SLars Ellenberg 
1948c7a58db4SLars Ellenberg 	rcu_read_lock();
1949c7a58db4SLars Ellenberg 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1950c7a58db4SLars Ellenberg 		struct drbd_device *device = peer_device->device;
1951e334f550SLars Ellenberg 		unsigned long todo = get_work_bits(&device->flags);
1952e334f550SLars Ellenberg 		if (!todo)
1953c7a58db4SLars Ellenberg 			continue;
19545ab7d2c0SLars Ellenberg 
1955c7a58db4SLars Ellenberg 		kref_get(&device->kref);
1956c7a58db4SLars Ellenberg 		rcu_read_unlock();
1957e334f550SLars Ellenberg 		do_device_work(device, todo);
1958c7a58db4SLars Ellenberg 		kref_put(&device->kref, drbd_destroy_device);
1959c7a58db4SLars Ellenberg 		rcu_read_lock();
1960c7a58db4SLars Ellenberg 	}
1961c7a58db4SLars Ellenberg 	rcu_read_unlock();
1962c7a58db4SLars Ellenberg }
1963c7a58db4SLars Ellenberg 
1964a186e478SRashika Kheria static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
19658c0785a5SLars Ellenberg {
19668c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
196715e26f6aSLars Ellenberg 	list_splice_tail_init(&queue->q, work_list);
19688c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
19698c0785a5SLars Ellenberg 	return !list_empty(work_list);
19708c0785a5SLars Ellenberg }
19718c0785a5SLars Ellenberg 
1972a186e478SRashika Kheria static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
19738c0785a5SLars Ellenberg {
19748c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
19758c0785a5SLars Ellenberg 	if (!list_empty(&queue->q))
19768c0785a5SLars Ellenberg 		list_move(queue->q.next, work_list);
19778c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
19788c0785a5SLars Ellenberg 	return !list_empty(work_list);
19798c0785a5SLars Ellenberg }
19808c0785a5SLars Ellenberg 
1981bde89a9eSAndreas Gruenbacher static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
1982b6dd1a89SLars Ellenberg {
1983b6dd1a89SLars Ellenberg 	DEFINE_WAIT(wait);
1984b6dd1a89SLars Ellenberg 	struct net_conf *nc;
1985b6dd1a89SLars Ellenberg 	int uncork, cork;
1986b6dd1a89SLars Ellenberg 
1987b6dd1a89SLars Ellenberg 	dequeue_work_item(&connection->sender_work, work_list);
1988b6dd1a89SLars Ellenberg 	if (!list_empty(work_list))
1989b6dd1a89SLars Ellenberg 		return;
1990b6dd1a89SLars Ellenberg 
1991b6dd1a89SLars Ellenberg 	/* Still nothing to do?
1992b6dd1a89SLars Ellenberg 	 * Maybe we still need to close the current epoch,
1993b6dd1a89SLars Ellenberg 	 * even if no new requests are queued yet.
1994b6dd1a89SLars Ellenberg 	 *
1995b6dd1a89SLars Ellenberg 	 * Also, poke TCP, just in case.
1996b6dd1a89SLars Ellenberg 	 * Then wait for new work (or signal). */
1997b6dd1a89SLars Ellenberg 	rcu_read_lock();
1998b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
1999b6dd1a89SLars Ellenberg 	uncork = nc ? nc->tcp_cork : 0;
2000b6dd1a89SLars Ellenberg 	rcu_read_unlock();
2001b6dd1a89SLars Ellenberg 	if (uncork) {
2002b6dd1a89SLars Ellenberg 		mutex_lock(&connection->data.mutex);
2003b6dd1a89SLars Ellenberg 		if (connection->data.socket)
2004b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
2005b6dd1a89SLars Ellenberg 		mutex_unlock(&connection->data.mutex);
2006b6dd1a89SLars Ellenberg 	}
2007b6dd1a89SLars Ellenberg 
2008b6dd1a89SLars Ellenberg 	for (;;) {
2009b6dd1a89SLars Ellenberg 		int send_barrier;
2010b6dd1a89SLars Ellenberg 		prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
20110500813fSAndreas Gruenbacher 		spin_lock_irq(&connection->resource->req_lock);
2012b6dd1a89SLars Ellenberg 		spin_lock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
2013bc317a9eSLars Ellenberg 		/* dequeue single item only,
2014bc317a9eSLars Ellenberg 		 * we still use drbd_queue_work_front() in some places */
2015bc317a9eSLars Ellenberg 		if (!list_empty(&connection->sender_work.q))
20164dd726f0SLars Ellenberg 			list_splice_tail_init(&connection->sender_work.q, work_list);
2017b6dd1a89SLars Ellenberg 		spin_unlock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
2018b6dd1a89SLars Ellenberg 		if (!list_empty(work_list) || signal_pending(current)) {
20190500813fSAndreas Gruenbacher 			spin_unlock_irq(&connection->resource->req_lock);
2020b6dd1a89SLars Ellenberg 			break;
2021b6dd1a89SLars Ellenberg 		}
2022f9c78128SLars Ellenberg 
2023f9c78128SLars Ellenberg 		/* We found nothing new to do, no to-be-communicated request,
2024f9c78128SLars Ellenberg 		 * no other work item.  We may still need to close the last
2025f9c78128SLars Ellenberg 		 * epoch.  Next incoming request epoch will be connection ->
2026f9c78128SLars Ellenberg 		 * current transfer log epoch number.  If that is different
2027f9c78128SLars Ellenberg 		 * from the epoch of the last request we communicated, it is
2028f9c78128SLars Ellenberg 		 * safe to send the epoch separating barrier now.
2029f9c78128SLars Ellenberg 		 */
2030f9c78128SLars Ellenberg 		send_barrier =
2031f9c78128SLars Ellenberg 			atomic_read(&connection->current_tle_nr) !=
2032f9c78128SLars Ellenberg 			connection->send.current_epoch_nr;
20330500813fSAndreas Gruenbacher 		spin_unlock_irq(&connection->resource->req_lock);
2034f9c78128SLars Ellenberg 
2035f9c78128SLars Ellenberg 		if (send_barrier)
2036f9c78128SLars Ellenberg 			maybe_send_barrier(connection,
2037f9c78128SLars Ellenberg 					connection->send.current_epoch_nr + 1);
20385ab7d2c0SLars Ellenberg 
2039e334f550SLars Ellenberg 		if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
20405ab7d2c0SLars Ellenberg 			break;
20415ab7d2c0SLars Ellenberg 
2042a80ca1aeSLars Ellenberg 		/* drbd_send() may have called flush_signals() */
2043a80ca1aeSLars Ellenberg 		if (get_t_state(&connection->worker) != RUNNING)
2044a80ca1aeSLars Ellenberg 			break;
20455ab7d2c0SLars Ellenberg 
2046b6dd1a89SLars Ellenberg 		schedule();
2047b6dd1a89SLars Ellenberg 		/* may be woken up for other things but new work, too,
2048b6dd1a89SLars Ellenberg 		 * e.g. if the current epoch got closed.
2049b6dd1a89SLars Ellenberg 		 * In which case we send the barrier above. */
2050b6dd1a89SLars Ellenberg 	}
2051b6dd1a89SLars Ellenberg 	finish_wait(&connection->sender_work.q_wait, &wait);
2052b6dd1a89SLars Ellenberg 
2053b6dd1a89SLars Ellenberg 	/* someone may have changed the config while we have been waiting above. */
2054b6dd1a89SLars Ellenberg 	rcu_read_lock();
2055b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
2056b6dd1a89SLars Ellenberg 	cork = nc ? nc->tcp_cork : 0;
2057b6dd1a89SLars Ellenberg 	rcu_read_unlock();
2058b6dd1a89SLars Ellenberg 	mutex_lock(&connection->data.mutex);
2059b6dd1a89SLars Ellenberg 	if (connection->data.socket) {
2060b6dd1a89SLars Ellenberg 		if (cork)
2061b6dd1a89SLars Ellenberg 			drbd_tcp_cork(connection->data.socket);
2062b6dd1a89SLars Ellenberg 		else if (!uncork)
2063b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
2064b6dd1a89SLars Ellenberg 	}
2065b6dd1a89SLars Ellenberg 	mutex_unlock(&connection->data.mutex);
2066b6dd1a89SLars Ellenberg }
2067b6dd1a89SLars Ellenberg 
2068b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi)
2069b411b363SPhilipp Reisner {
2070bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
20716db7e50aSAndreas Gruenbacher 	struct drbd_work *w = NULL;
2072c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2073b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
20748c0785a5SLars Ellenberg 	int vnr;
2075b411b363SPhilipp Reisner 
2076e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
207780822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
2078b411b363SPhilipp Reisner 
20798c0785a5SLars Ellenberg 		if (list_empty(&work_list))
2080bde89a9eSAndreas Gruenbacher 			wait_for_work(connection, &work_list);
2081b411b363SPhilipp Reisner 
2082e334f550SLars Ellenberg 		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags))
2083e334f550SLars Ellenberg 			do_unqueued_work(connection);
20845ab7d2c0SLars Ellenberg 
20858c0785a5SLars Ellenberg 		if (signal_pending(current)) {
2086b411b363SPhilipp Reisner 			flush_signals(current);
208719393e10SPhilipp Reisner 			if (get_t_state(thi) == RUNNING) {
20881ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Worker got an unexpected signal\n");
2089b411b363SPhilipp Reisner 				continue;
209019393e10SPhilipp Reisner 			}
2091b411b363SPhilipp Reisner 			break;
2092b411b363SPhilipp Reisner 		}
2093b411b363SPhilipp Reisner 
2094e77a0a5cSAndreas Gruenbacher 		if (get_t_state(thi) != RUNNING)
2095b411b363SPhilipp Reisner 			break;
2096b411b363SPhilipp Reisner 
20978c0785a5SLars Ellenberg 		while (!list_empty(&work_list)) {
20986db7e50aSAndreas Gruenbacher 			w = list_first_entry(&work_list, struct drbd_work, list);
20996db7e50aSAndreas Gruenbacher 			list_del_init(&w->list);
21006db7e50aSAndreas Gruenbacher 			if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
21018c0785a5SLars Ellenberg 				continue;
2102bde89a9eSAndreas Gruenbacher 			if (connection->cstate >= C_WF_REPORT_PARAMS)
2103bde89a9eSAndreas Gruenbacher 				conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
2104b411b363SPhilipp Reisner 		}
2105b411b363SPhilipp Reisner 	}
2106b411b363SPhilipp Reisner 
21078c0785a5SLars Ellenberg 	do {
2108e334f550SLars Ellenberg 		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags))
2109e334f550SLars Ellenberg 			do_unqueued_work(connection);
2110b411b363SPhilipp Reisner 		while (!list_empty(&work_list)) {
21116db7e50aSAndreas Gruenbacher 			w = list_first_entry(&work_list, struct drbd_work, list);
21126db7e50aSAndreas Gruenbacher 			list_del_init(&w->list);
21136db7e50aSAndreas Gruenbacher 			w->cb(w, 1);
2114b411b363SPhilipp Reisner 		}
2115bde89a9eSAndreas Gruenbacher 		dequeue_work_batch(&connection->sender_work, &work_list);
2116e334f550SLars Ellenberg 	} while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
2117b411b363SPhilipp Reisner 
2118c141ebdaSPhilipp Reisner 	rcu_read_lock();
2119c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2120c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
21210b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
2122b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
2123c141ebdaSPhilipp Reisner 		rcu_read_unlock();
2124b30ab791SAndreas Gruenbacher 		drbd_device_cleanup(device);
212505a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
2126c141ebdaSPhilipp Reisner 		rcu_read_lock();
21270e29d163SPhilipp Reisner 	}
2128c141ebdaSPhilipp Reisner 	rcu_read_unlock();
2129b411b363SPhilipp Reisner 
2130b411b363SPhilipp Reisner 	return 0;
2131b411b363SPhilipp Reisner }
2132