xref: /openbmc/linux/drivers/block/drbd/drbd_worker.c (revision e37d2438)
1b411b363SPhilipp Reisner /*
2b411b363SPhilipp Reisner    drbd_worker.c
3b411b363SPhilipp Reisner 
4b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5b411b363SPhilipp Reisner 
6b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9b411b363SPhilipp Reisner 
10b411b363SPhilipp Reisner    drbd is free software; you can redistribute it and/or modify
11b411b363SPhilipp Reisner    it under the terms of the GNU General Public License as published by
12b411b363SPhilipp Reisner    the Free Software Foundation; either version 2, or (at your option)
13b411b363SPhilipp Reisner    any later version.
14b411b363SPhilipp Reisner 
15b411b363SPhilipp Reisner    drbd is distributed in the hope that it will be useful,
16b411b363SPhilipp Reisner    but WITHOUT ANY WARRANTY; without even the implied warranty of
17b411b363SPhilipp Reisner    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18b411b363SPhilipp Reisner    GNU General Public License for more details.
19b411b363SPhilipp Reisner 
20b411b363SPhilipp Reisner    You should have received a copy of the GNU General Public License
21b411b363SPhilipp Reisner    along with drbd; see the file COPYING.  If not, write to
22b411b363SPhilipp Reisner    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23b411b363SPhilipp Reisner 
24b411b363SPhilipp Reisner */
25b411b363SPhilipp Reisner 
26b411b363SPhilipp Reisner #include <linux/module.h>
27b411b363SPhilipp Reisner #include <linux/drbd.h>
28b411b363SPhilipp Reisner #include <linux/sched.h>
29b411b363SPhilipp Reisner #include <linux/wait.h>
30b411b363SPhilipp Reisner #include <linux/mm.h>
31b411b363SPhilipp Reisner #include <linux/memcontrol.h>
32b411b363SPhilipp Reisner #include <linux/mm_inline.h>
33b411b363SPhilipp Reisner #include <linux/slab.h>
34b411b363SPhilipp Reisner #include <linux/random.h>
35b411b363SPhilipp Reisner #include <linux/string.h>
36b411b363SPhilipp Reisner #include <linux/scatterlist.h>
37b411b363SPhilipp Reisner 
38b411b363SPhilipp Reisner #include "drbd_int.h"
39a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
40b411b363SPhilipp Reisner #include "drbd_req.h"
41b411b363SPhilipp Reisner 
42d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *, int);
43d448a2e1SAndreas Gruenbacher static int make_resync_request(struct drbd_device *, int);
44b411b363SPhilipp Reisner 
45c5a91619SAndreas Gruenbacher /* endio handlers:
46c5a91619SAndreas Gruenbacher  *   drbd_md_io_complete (defined here)
47fcefa62eSAndreas Gruenbacher  *   drbd_request_endio (defined here)
48fcefa62eSAndreas Gruenbacher  *   drbd_peer_request_endio (defined here)
49c5a91619SAndreas Gruenbacher  *   bm_async_io_complete (defined in drbd_bitmap.c)
50c5a91619SAndreas Gruenbacher  *
51b411b363SPhilipp Reisner  * For all these callbacks, note the following:
52b411b363SPhilipp Reisner  * The callbacks will be called in irq context by the IDE drivers,
53b411b363SPhilipp Reisner  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54b411b363SPhilipp Reisner  * Try to get the locking right :)
55b411b363SPhilipp Reisner  *
56b411b363SPhilipp Reisner  */
57b411b363SPhilipp Reisner 
58b411b363SPhilipp Reisner 
59b411b363SPhilipp Reisner /* About the global_state_lock
60b411b363SPhilipp Reisner    Each state transition on an device holds a read lock. In case we have
6195f8efd0SAndreas Gruenbacher    to evaluate the resync after dependencies, we grab a write lock, because
62b411b363SPhilipp Reisner    we need stable states on all devices for that.  */
63b411b363SPhilipp Reisner rwlock_t global_state_lock;
64b411b363SPhilipp Reisner 
65b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO
66b411b363SPhilipp Reisner  * submitted by drbd_md_sync_page_io()
67b411b363SPhilipp Reisner  */
68b411b363SPhilipp Reisner void drbd_md_io_complete(struct bio *bio, int error)
69b411b363SPhilipp Reisner {
70b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
71b411b363SPhilipp Reisner 
72e37d2438SLars Ellenberg 	device = bio->bi_private;
73e37d2438SLars Ellenberg 	device->md_io.error = error;
74b411b363SPhilipp Reisner 
750cfac5ddSPhilipp Reisner 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
760cfac5ddSPhilipp Reisner 	 * to timeout on the lower level device, and eventually detach from it.
770cfac5ddSPhilipp Reisner 	 * If this io completion runs after that timeout expired, this
780cfac5ddSPhilipp Reisner 	 * drbd_md_put_buffer() may allow us to finally try and re-attach.
790cfac5ddSPhilipp Reisner 	 * During normal operation, this only puts that extra reference
800cfac5ddSPhilipp Reisner 	 * down to 1 again.
810cfac5ddSPhilipp Reisner 	 * Make sure we first drop the reference, and only then signal
820cfac5ddSPhilipp Reisner 	 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
830cfac5ddSPhilipp Reisner 	 * next drbd_md_sync_page_io(), that we trigger the
84b30ab791SAndreas Gruenbacher 	 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
850cfac5ddSPhilipp Reisner 	 */
86b30ab791SAndreas Gruenbacher 	drbd_md_put_buffer(device);
87e37d2438SLars Ellenberg 	device->md_io.done = 1;
88b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
89cdfda633SPhilipp Reisner 	bio_put(bio);
90b30ab791SAndreas Gruenbacher 	if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
91b30ab791SAndreas Gruenbacher 		put_ldev(device);
92b411b363SPhilipp Reisner }
93b411b363SPhilipp Reisner 
94b411b363SPhilipp Reisner /* reads on behalf of the partner,
95b411b363SPhilipp Reisner  * "submitted" by the receiver
96b411b363SPhilipp Reisner  */
97a186e478SRashika Kheria static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
98b411b363SPhilipp Reisner {
99b411b363SPhilipp Reisner 	unsigned long flags = 0;
1006780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1016780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
102b411b363SPhilipp Reisner 
1030500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
104b30ab791SAndreas Gruenbacher 	device->read_cnt += peer_req->i.size >> 9;
105a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
106b30ab791SAndreas Gruenbacher 	if (list_empty(&device->read_ee))
107b30ab791SAndreas Gruenbacher 		wake_up(&device->ee_wait);
108db830c46SAndreas Gruenbacher 	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
109b30ab791SAndreas Gruenbacher 		__drbd_chk_io_error(device, DRBD_READ_ERROR);
1100500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
111b411b363SPhilipp Reisner 
1126780139cSAndreas Gruenbacher 	drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
113b30ab791SAndreas Gruenbacher 	put_ldev(device);
114b411b363SPhilipp Reisner }
115b411b363SPhilipp Reisner 
116b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes,
11745bb912bSLars Ellenberg  * "submitted" by the receiver, final stage.  */
118a0fb3c47SLars Ellenberg void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
119b411b363SPhilipp Reisner {
120b411b363SPhilipp Reisner 	unsigned long flags = 0;
1216780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1226780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
123181286adSLars Ellenberg 	struct drbd_interval i;
124b411b363SPhilipp Reisner 	int do_wake;
125579b57edSAndreas Gruenbacher 	u64 block_id;
126b411b363SPhilipp Reisner 	int do_al_complete_io;
127b411b363SPhilipp Reisner 
128db830c46SAndreas Gruenbacher 	/* after we moved peer_req to done_ee,
129b411b363SPhilipp Reisner 	 * we may no longer access it,
130b411b363SPhilipp Reisner 	 * it may be freed/reused already!
131b411b363SPhilipp Reisner 	 * (as soon as we release the req_lock) */
132181286adSLars Ellenberg 	i = peer_req->i;
133db830c46SAndreas Gruenbacher 	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
134db830c46SAndreas Gruenbacher 	block_id = peer_req->block_id;
135b411b363SPhilipp Reisner 
1360500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
137b30ab791SAndreas Gruenbacher 	device->writ_cnt += peer_req->i.size >> 9;
138a8cd15baSAndreas Gruenbacher 	list_move_tail(&peer_req->w.list, &device->done_ee);
139b411b363SPhilipp Reisner 
140bb3bfe96SAndreas Gruenbacher 	/*
1415e472264SAndreas Gruenbacher 	 * Do not remove from the write_requests tree here: we did not send the
142bb3bfe96SAndreas Gruenbacher 	 * Ack yet and did not wake possibly waiting conflicting requests.
143bb3bfe96SAndreas Gruenbacher 	 * Removed from the tree from "drbd_process_done_ee" within the
14484b8c06bSAndreas Gruenbacher 	 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
145bb3bfe96SAndreas Gruenbacher 	 * _drbd_clear_done_ee.
146bb3bfe96SAndreas Gruenbacher 	 */
147b411b363SPhilipp Reisner 
148b30ab791SAndreas Gruenbacher 	do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
149b411b363SPhilipp Reisner 
150a0fb3c47SLars Ellenberg 	/* FIXME do we want to detach for failed REQ_DISCARD?
151a0fb3c47SLars Ellenberg 	 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
152a0fb3c47SLars Ellenberg 	if (peer_req->flags & EE_WAS_ERROR)
153b30ab791SAndreas Gruenbacher 		__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
1540500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
155b411b363SPhilipp Reisner 
156579b57edSAndreas Gruenbacher 	if (block_id == ID_SYNCER)
157b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, i.sector);
158b411b363SPhilipp Reisner 
159b411b363SPhilipp Reisner 	if (do_wake)
160b30ab791SAndreas Gruenbacher 		wake_up(&device->ee_wait);
161b411b363SPhilipp Reisner 
162b411b363SPhilipp Reisner 	if (do_al_complete_io)
163b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &i);
164b411b363SPhilipp Reisner 
1656780139cSAndreas Gruenbacher 	wake_asender(peer_device->connection);
166b30ab791SAndreas Gruenbacher 	put_ldev(device);
16745bb912bSLars Ellenberg }
168b411b363SPhilipp Reisner 
16945bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes,
17045bb912bSLars Ellenberg  * "submitted" by the receiver.
17145bb912bSLars Ellenberg  */
172fcefa62eSAndreas Gruenbacher void drbd_peer_request_endio(struct bio *bio, int error)
17345bb912bSLars Ellenberg {
174db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = bio->bi_private;
175a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_req->peer_device->device;
17645bb912bSLars Ellenberg 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
17745bb912bSLars Ellenberg 	int is_write = bio_data_dir(bio) == WRITE;
178a0fb3c47SLars Ellenberg 	int is_discard = !!(bio->bi_rw & REQ_DISCARD);
17945bb912bSLars Ellenberg 
18007194272SLars Ellenberg 	if (error && __ratelimit(&drbd_ratelimit_state))
181d0180171SAndreas Gruenbacher 		drbd_warn(device, "%s: error=%d s=%llus\n",
182a0fb3c47SLars Ellenberg 				is_write ? (is_discard ? "discard" : "write")
183a0fb3c47SLars Ellenberg 					: "read", error,
184db830c46SAndreas Gruenbacher 				(unsigned long long)peer_req->i.sector);
18545bb912bSLars Ellenberg 	if (!error && !uptodate) {
18607194272SLars Ellenberg 		if (__ratelimit(&drbd_ratelimit_state))
187d0180171SAndreas Gruenbacher 			drbd_warn(device, "%s: setting error to -EIO s=%llus\n",
18845bb912bSLars Ellenberg 					is_write ? "write" : "read",
189db830c46SAndreas Gruenbacher 					(unsigned long long)peer_req->i.sector);
19045bb912bSLars Ellenberg 		/* strange behavior of some lower level drivers...
19145bb912bSLars Ellenberg 		 * fail the request by clearing the uptodate flag,
19245bb912bSLars Ellenberg 		 * but do not return any error?! */
19345bb912bSLars Ellenberg 		error = -EIO;
19445bb912bSLars Ellenberg 	}
19545bb912bSLars Ellenberg 
19645bb912bSLars Ellenberg 	if (error)
197db830c46SAndreas Gruenbacher 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
19845bb912bSLars Ellenberg 
19945bb912bSLars Ellenberg 	bio_put(bio); /* no need for the bio anymore */
200db830c46SAndreas Gruenbacher 	if (atomic_dec_and_test(&peer_req->pending_bios)) {
20145bb912bSLars Ellenberg 		if (is_write)
202db830c46SAndreas Gruenbacher 			drbd_endio_write_sec_final(peer_req);
20345bb912bSLars Ellenberg 		else
204db830c46SAndreas Gruenbacher 			drbd_endio_read_sec_final(peer_req);
20545bb912bSLars Ellenberg 	}
206b411b363SPhilipp Reisner }
207b411b363SPhilipp Reisner 
208b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
209b411b363SPhilipp Reisner  */
210fcefa62eSAndreas Gruenbacher void drbd_request_endio(struct bio *bio, int error)
211b411b363SPhilipp Reisner {
212a115413dSLars Ellenberg 	unsigned long flags;
213b411b363SPhilipp Reisner 	struct drbd_request *req = bio->bi_private;
21484b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
215a115413dSLars Ellenberg 	struct bio_and_error m;
216b411b363SPhilipp Reisner 	enum drbd_req_event what;
217b411b363SPhilipp Reisner 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
218b411b363SPhilipp Reisner 
219b411b363SPhilipp Reisner 	if (!error && !uptodate) {
220d0180171SAndreas Gruenbacher 		drbd_warn(device, "p %s: setting error to -EIO\n",
221b411b363SPhilipp Reisner 			 bio_data_dir(bio) == WRITE ? "write" : "read");
222b411b363SPhilipp Reisner 		/* strange behavior of some lower level drivers...
223b411b363SPhilipp Reisner 		 * fail the request by clearing the uptodate flag,
224b411b363SPhilipp Reisner 		 * but do not return any error?! */
225b411b363SPhilipp Reisner 		error = -EIO;
226b411b363SPhilipp Reisner 	}
227b411b363SPhilipp Reisner 
2281b6dd252SPhilipp Reisner 
2291b6dd252SPhilipp Reisner 	/* If this request was aborted locally before,
2301b6dd252SPhilipp Reisner 	 * but now was completed "successfully",
2311b6dd252SPhilipp Reisner 	 * chances are that this caused arbitrary data corruption.
2321b6dd252SPhilipp Reisner 	 *
2331b6dd252SPhilipp Reisner 	 * "aborting" requests, or force-detaching the disk, is intended for
2341b6dd252SPhilipp Reisner 	 * completely blocked/hung local backing devices which do no longer
2351b6dd252SPhilipp Reisner 	 * complete requests at all, not even do error completions.  In this
2361b6dd252SPhilipp Reisner 	 * situation, usually a hard-reset and failover is the only way out.
2371b6dd252SPhilipp Reisner 	 *
2381b6dd252SPhilipp Reisner 	 * By "aborting", basically faking a local error-completion,
2391b6dd252SPhilipp Reisner 	 * we allow for a more graceful swichover by cleanly migrating services.
2401b6dd252SPhilipp Reisner 	 * Still the affected node has to be rebooted "soon".
2411b6dd252SPhilipp Reisner 	 *
2421b6dd252SPhilipp Reisner 	 * By completing these requests, we allow the upper layers to re-use
2431b6dd252SPhilipp Reisner 	 * the associated data pages.
2441b6dd252SPhilipp Reisner 	 *
2451b6dd252SPhilipp Reisner 	 * If later the local backing device "recovers", and now DMAs some data
2461b6dd252SPhilipp Reisner 	 * from disk into the original request pages, in the best case it will
2471b6dd252SPhilipp Reisner 	 * just put random data into unused pages; but typically it will corrupt
2481b6dd252SPhilipp Reisner 	 * meanwhile completely unrelated data, causing all sorts of damage.
2491b6dd252SPhilipp Reisner 	 *
2501b6dd252SPhilipp Reisner 	 * Which means delayed successful completion,
2511b6dd252SPhilipp Reisner 	 * especially for READ requests,
2521b6dd252SPhilipp Reisner 	 * is a reason to panic().
2531b6dd252SPhilipp Reisner 	 *
2541b6dd252SPhilipp Reisner 	 * We assume that a delayed *error* completion is OK,
2551b6dd252SPhilipp Reisner 	 * though we still will complain noisily about it.
2561b6dd252SPhilipp Reisner 	 */
2571b6dd252SPhilipp Reisner 	if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
2581b6dd252SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
259d0180171SAndreas Gruenbacher 			drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
2601b6dd252SPhilipp Reisner 
2611b6dd252SPhilipp Reisner 		if (!error)
2621b6dd252SPhilipp Reisner 			panic("possible random memory corruption caused by delayed completion of aborted local request\n");
2631b6dd252SPhilipp Reisner 	}
2641b6dd252SPhilipp Reisner 
265b411b363SPhilipp Reisner 	/* to avoid recursion in __req_mod */
266b411b363SPhilipp Reisner 	if (unlikely(error)) {
2672f632aebSLars Ellenberg 		if (bio->bi_rw & REQ_DISCARD)
2682f632aebSLars Ellenberg 			what = (error == -EOPNOTSUPP)
2692f632aebSLars Ellenberg 				? DISCARD_COMPLETED_NOTSUPP
2702f632aebSLars Ellenberg 				: DISCARD_COMPLETED_WITH_ERROR;
2712f632aebSLars Ellenberg 		else
272b411b363SPhilipp Reisner 			what = (bio_data_dir(bio) == WRITE)
2738554df1cSAndreas Gruenbacher 			? WRITE_COMPLETED_WITH_ERROR
2745c3c7e64SLars Ellenberg 			: (bio_rw(bio) == READ)
2758554df1cSAndreas Gruenbacher 			  ? READ_COMPLETED_WITH_ERROR
2768554df1cSAndreas Gruenbacher 			  : READ_AHEAD_COMPLETED_WITH_ERROR;
277b411b363SPhilipp Reisner 	} else
2788554df1cSAndreas Gruenbacher 		what = COMPLETED_OK;
279b411b363SPhilipp Reisner 
280b411b363SPhilipp Reisner 	bio_put(req->private_bio);
281b411b363SPhilipp Reisner 	req->private_bio = ERR_PTR(error);
282b411b363SPhilipp Reisner 
283a115413dSLars Ellenberg 	/* not req_mod(), we need irqsave here! */
2840500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
285a115413dSLars Ellenberg 	__req_mod(req, what, &m);
2860500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
287b30ab791SAndreas Gruenbacher 	put_ldev(device);
288a115413dSLars Ellenberg 
289a115413dSLars Ellenberg 	if (m.bio)
290b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
291b411b363SPhilipp Reisner }
292b411b363SPhilipp Reisner 
29379a3c8d3SAndreas Gruenbacher void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
29445bb912bSLars Ellenberg {
29545bb912bSLars Ellenberg 	struct hash_desc desc;
29645bb912bSLars Ellenberg 	struct scatterlist sg;
297db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
29845bb912bSLars Ellenberg 	struct page *tmp;
29945bb912bSLars Ellenberg 	unsigned len;
30045bb912bSLars Ellenberg 
30145bb912bSLars Ellenberg 	desc.tfm = tfm;
30245bb912bSLars Ellenberg 	desc.flags = 0;
30345bb912bSLars Ellenberg 
30445bb912bSLars Ellenberg 	sg_init_table(&sg, 1);
30545bb912bSLars Ellenberg 	crypto_hash_init(&desc);
30645bb912bSLars Ellenberg 
30745bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page))) {
30845bb912bSLars Ellenberg 		/* all but the last page will be fully used */
30945bb912bSLars Ellenberg 		sg_set_page(&sg, page, PAGE_SIZE, 0);
31045bb912bSLars Ellenberg 		crypto_hash_update(&desc, &sg, sg.length);
31145bb912bSLars Ellenberg 		page = tmp;
31245bb912bSLars Ellenberg 	}
31345bb912bSLars Ellenberg 	/* and now the last, possibly only partially used page */
314db830c46SAndreas Gruenbacher 	len = peer_req->i.size & (PAGE_SIZE - 1);
31545bb912bSLars Ellenberg 	sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
31645bb912bSLars Ellenberg 	crypto_hash_update(&desc, &sg, sg.length);
31745bb912bSLars Ellenberg 	crypto_hash_final(&desc, digest);
31845bb912bSLars Ellenberg }
31945bb912bSLars Ellenberg 
32079a3c8d3SAndreas Gruenbacher void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
321b411b363SPhilipp Reisner {
322b411b363SPhilipp Reisner 	struct hash_desc desc;
323b411b363SPhilipp Reisner 	struct scatterlist sg;
3247988613bSKent Overstreet 	struct bio_vec bvec;
3257988613bSKent Overstreet 	struct bvec_iter iter;
326b411b363SPhilipp Reisner 
327b411b363SPhilipp Reisner 	desc.tfm = tfm;
328b411b363SPhilipp Reisner 	desc.flags = 0;
329b411b363SPhilipp Reisner 
330b411b363SPhilipp Reisner 	sg_init_table(&sg, 1);
331b411b363SPhilipp Reisner 	crypto_hash_init(&desc);
332b411b363SPhilipp Reisner 
3337988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
3347988613bSKent Overstreet 		sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
335b411b363SPhilipp Reisner 		crypto_hash_update(&desc, &sg, sg.length);
336b411b363SPhilipp Reisner 	}
337b411b363SPhilipp Reisner 	crypto_hash_final(&desc, digest);
338b411b363SPhilipp Reisner }
339b411b363SPhilipp Reisner 
3409676c760SLars Ellenberg /* MAYBE merge common code with w_e_end_ov_req */
34199920dc5SAndreas Gruenbacher static int w_e_send_csum(struct drbd_work *w, int cancel)
342b411b363SPhilipp Reisner {
343a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
3446780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
3456780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
346b411b363SPhilipp Reisner 	int digest_size;
347b411b363SPhilipp Reisner 	void *digest;
34899920dc5SAndreas Gruenbacher 	int err = 0;
349b411b363SPhilipp Reisner 
35053ea4331SLars Ellenberg 	if (unlikely(cancel))
35153ea4331SLars Ellenberg 		goto out;
352b411b363SPhilipp Reisner 
3539676c760SLars Ellenberg 	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
35453ea4331SLars Ellenberg 		goto out;
35553ea4331SLars Ellenberg 
3566780139cSAndreas Gruenbacher 	digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
357b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
358b411b363SPhilipp Reisner 	if (digest) {
359db830c46SAndreas Gruenbacher 		sector_t sector = peer_req->i.sector;
360db830c46SAndreas Gruenbacher 		unsigned int size = peer_req->i.size;
3616780139cSAndreas Gruenbacher 		drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
3629676c760SLars Ellenberg 		/* Free peer_req and pages before send.
36353ea4331SLars Ellenberg 		 * In case we block on congestion, we could otherwise run into
36453ea4331SLars Ellenberg 		 * some distributed deadlock, if the other side blocks on
36553ea4331SLars Ellenberg 		 * congestion as well, because our receiver blocks in
366c37c8ecfSAndreas Gruenbacher 		 * drbd_alloc_pages due to pp_in_use > max_buffers. */
367b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
368db830c46SAndreas Gruenbacher 		peer_req = NULL;
369b30ab791SAndreas Gruenbacher 		inc_rs_pending(device);
3706780139cSAndreas Gruenbacher 		err = drbd_send_drequest_csum(peer_device, sector, size,
37153ea4331SLars Ellenberg 					      digest, digest_size,
372b411b363SPhilipp Reisner 					      P_CSUM_RS_REQUEST);
373b411b363SPhilipp Reisner 		kfree(digest);
374b411b363SPhilipp Reisner 	} else {
375d0180171SAndreas Gruenbacher 		drbd_err(device, "kmalloc() of digest failed.\n");
37699920dc5SAndreas Gruenbacher 		err = -ENOMEM;
377b411b363SPhilipp Reisner 	}
378b411b363SPhilipp Reisner 
37953ea4331SLars Ellenberg out:
380db830c46SAndreas Gruenbacher 	if (peer_req)
381b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
382b411b363SPhilipp Reisner 
38399920dc5SAndreas Gruenbacher 	if (unlikely(err))
384d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
38599920dc5SAndreas Gruenbacher 	return err;
386b411b363SPhilipp Reisner }
387b411b363SPhilipp Reisner 
388b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
389b411b363SPhilipp Reisner 
39069a22773SAndreas Gruenbacher static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
391b411b363SPhilipp Reisner {
39269a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
393db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
394b411b363SPhilipp Reisner 
395b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
39680a40e43SLars Ellenberg 		return -EIO;
397b411b363SPhilipp Reisner 
398b30ab791SAndreas Gruenbacher 	if (drbd_rs_should_slow_down(device, sector))
3990f0601f4SLars Ellenberg 		goto defer;
4000f0601f4SLars Ellenberg 
401b411b363SPhilipp Reisner 	/* GFP_TRY, because if there is no memory available right now, this may
402b411b363SPhilipp Reisner 	 * be rescheduled for later. It is "only" background resync, after all. */
40369a22773SAndreas Gruenbacher 	peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
404a0fb3c47SLars Ellenberg 				       size, true /* has real payload */, GFP_TRY);
405db830c46SAndreas Gruenbacher 	if (!peer_req)
40680a40e43SLars Ellenberg 		goto defer;
407b411b363SPhilipp Reisner 
408a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = w_e_send_csum;
4090500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
410b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
4110500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
412b411b363SPhilipp Reisner 
413b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
414b30ab791SAndreas Gruenbacher 	if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
41580a40e43SLars Ellenberg 		return 0;
41645bb912bSLars Ellenberg 
41710f6d992SLars Ellenberg 	/* If it failed because of ENOMEM, retry should help.  If it failed
41810f6d992SLars Ellenberg 	 * because bio_add_page failed (probably broken lower level driver),
41910f6d992SLars Ellenberg 	 * retry may or may not help.
42010f6d992SLars Ellenberg 	 * If it does not, you may need to force disconnect. */
4210500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
422a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
4230500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
42422cc37a9SLars Ellenberg 
425b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
42680a40e43SLars Ellenberg defer:
427b30ab791SAndreas Gruenbacher 	put_ldev(device);
42880a40e43SLars Ellenberg 	return -EAGAIN;
429b411b363SPhilipp Reisner }
430b411b363SPhilipp Reisner 
43199920dc5SAndreas Gruenbacher int w_resync_timer(struct drbd_work *w, int cancel)
432794abb75SPhilipp Reisner {
43384b8c06bSAndreas Gruenbacher 	struct drbd_device *device =
43484b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device, resync_work);
43584b8c06bSAndreas Gruenbacher 
436b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
437794abb75SPhilipp Reisner 	case C_VERIFY_S:
438d448a2e1SAndreas Gruenbacher 		make_ov_request(device, cancel);
439794abb75SPhilipp Reisner 		break;
440794abb75SPhilipp Reisner 	case C_SYNC_TARGET:
441d448a2e1SAndreas Gruenbacher 		make_resync_request(device, cancel);
442794abb75SPhilipp Reisner 		break;
443794abb75SPhilipp Reisner 	}
444794abb75SPhilipp Reisner 
44599920dc5SAndreas Gruenbacher 	return 0;
446794abb75SPhilipp Reisner }
447794abb75SPhilipp Reisner 
448b411b363SPhilipp Reisner void resync_timer_fn(unsigned long data)
449b411b363SPhilipp Reisner {
450b30ab791SAndreas Gruenbacher 	struct drbd_device *device = (struct drbd_device *) data;
451b411b363SPhilipp Reisner 
45215e26f6aSLars Ellenberg 	drbd_queue_work_if_unqueued(
45315e26f6aSLars Ellenberg 		&first_peer_device(device)->connection->sender_work,
45484b8c06bSAndreas Gruenbacher 		&device->resync_work);
455b411b363SPhilipp Reisner }
456b411b363SPhilipp Reisner 
457778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value)
458778f271dSPhilipp Reisner {
459778f271dSPhilipp Reisner 	int i;
460778f271dSPhilipp Reisner 
461778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
462f10f2623SPhilipp Reisner 		fb->values[i] = value;
463778f271dSPhilipp Reisner }
464778f271dSPhilipp Reisner 
465778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value)
466778f271dSPhilipp Reisner {
467778f271dSPhilipp Reisner 	int ov;
468778f271dSPhilipp Reisner 
469778f271dSPhilipp Reisner 	ov = fb->values[fb->head_index];
470778f271dSPhilipp Reisner 	fb->values[fb->head_index++] = value;
471778f271dSPhilipp Reisner 
472778f271dSPhilipp Reisner 	if (fb->head_index >= fb->size)
473778f271dSPhilipp Reisner 		fb->head_index = 0;
474778f271dSPhilipp Reisner 
475778f271dSPhilipp Reisner 	return ov;
476778f271dSPhilipp Reisner }
477778f271dSPhilipp Reisner 
478778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value)
479778f271dSPhilipp Reisner {
480778f271dSPhilipp Reisner 	int i;
481778f271dSPhilipp Reisner 
482778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
483778f271dSPhilipp Reisner 		fb->values[i] += value;
484778f271dSPhilipp Reisner }
485778f271dSPhilipp Reisner 
4869958c857SPhilipp Reisner struct fifo_buffer *fifo_alloc(int fifo_size)
4879958c857SPhilipp Reisner {
4889958c857SPhilipp Reisner 	struct fifo_buffer *fb;
4899958c857SPhilipp Reisner 
4908747d30aSLars Ellenberg 	fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
4919958c857SPhilipp Reisner 	if (!fb)
4929958c857SPhilipp Reisner 		return NULL;
4939958c857SPhilipp Reisner 
4949958c857SPhilipp Reisner 	fb->head_index = 0;
4959958c857SPhilipp Reisner 	fb->size = fifo_size;
4969958c857SPhilipp Reisner 	fb->total = 0;
4979958c857SPhilipp Reisner 
4989958c857SPhilipp Reisner 	return fb;
4999958c857SPhilipp Reisner }
5009958c857SPhilipp Reisner 
5010e49d7b0SLars Ellenberg static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
502778f271dSPhilipp Reisner {
503daeda1ccSPhilipp Reisner 	struct disk_conf *dc;
5047f34f614SLars Ellenberg 	unsigned int want;     /* The number of sectors we want in-flight */
505778f271dSPhilipp Reisner 	int req_sect; /* Number of sectors to request in this turn */
5067f34f614SLars Ellenberg 	int correction; /* Number of sectors more we need in-flight */
507778f271dSPhilipp Reisner 	int cps; /* correction per invocation of drbd_rs_controller() */
508778f271dSPhilipp Reisner 	int steps; /* Number of time steps to plan ahead */
509778f271dSPhilipp Reisner 	int curr_corr;
510778f271dSPhilipp Reisner 	int max_sect;
511813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
512778f271dSPhilipp Reisner 
513b30ab791SAndreas Gruenbacher 	dc = rcu_dereference(device->ldev->disk_conf);
514b30ab791SAndreas Gruenbacher 	plan = rcu_dereference(device->rs_plan_s);
515778f271dSPhilipp Reisner 
516813472ceSPhilipp Reisner 	steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
517778f271dSPhilipp Reisner 
518b30ab791SAndreas Gruenbacher 	if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
519daeda1ccSPhilipp Reisner 		want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
520778f271dSPhilipp Reisner 	} else { /* normal path */
521daeda1ccSPhilipp Reisner 		want = dc->c_fill_target ? dc->c_fill_target :
522daeda1ccSPhilipp Reisner 			sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
523778f271dSPhilipp Reisner 	}
524778f271dSPhilipp Reisner 
525b30ab791SAndreas Gruenbacher 	correction = want - device->rs_in_flight - plan->total;
526778f271dSPhilipp Reisner 
527778f271dSPhilipp Reisner 	/* Plan ahead */
528778f271dSPhilipp Reisner 	cps = correction / steps;
529813472ceSPhilipp Reisner 	fifo_add_val(plan, cps);
530813472ceSPhilipp Reisner 	plan->total += cps * steps;
531778f271dSPhilipp Reisner 
532778f271dSPhilipp Reisner 	/* What we do in this step */
533813472ceSPhilipp Reisner 	curr_corr = fifo_push(plan, 0);
534813472ceSPhilipp Reisner 	plan->total -= curr_corr;
535778f271dSPhilipp Reisner 
536778f271dSPhilipp Reisner 	req_sect = sect_in + curr_corr;
537778f271dSPhilipp Reisner 	if (req_sect < 0)
538778f271dSPhilipp Reisner 		req_sect = 0;
539778f271dSPhilipp Reisner 
540daeda1ccSPhilipp Reisner 	max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
541778f271dSPhilipp Reisner 	if (req_sect > max_sect)
542778f271dSPhilipp Reisner 		req_sect = max_sect;
543778f271dSPhilipp Reisner 
544778f271dSPhilipp Reisner 	/*
545d0180171SAndreas Gruenbacher 	drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
546b30ab791SAndreas Gruenbacher 		 sect_in, device->rs_in_flight, want, correction,
547b30ab791SAndreas Gruenbacher 		 steps, cps, device->rs_planed, curr_corr, req_sect);
548778f271dSPhilipp Reisner 	*/
549778f271dSPhilipp Reisner 
550778f271dSPhilipp Reisner 	return req_sect;
551778f271dSPhilipp Reisner }
552778f271dSPhilipp Reisner 
553b30ab791SAndreas Gruenbacher static int drbd_rs_number_requests(struct drbd_device *device)
554e65f440dSLars Ellenberg {
5550e49d7b0SLars Ellenberg 	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
5560e49d7b0SLars Ellenberg 	int number, mxb;
5570e49d7b0SLars Ellenberg 
5580e49d7b0SLars Ellenberg 	sect_in = atomic_xchg(&device->rs_sect_in, 0);
5590e49d7b0SLars Ellenberg 	device->rs_in_flight -= sect_in;
560813472ceSPhilipp Reisner 
561813472ceSPhilipp Reisner 	rcu_read_lock();
5620e49d7b0SLars Ellenberg 	mxb = drbd_get_max_buffers(device) / 2;
563b30ab791SAndreas Gruenbacher 	if (rcu_dereference(device->rs_plan_s)->size) {
5640e49d7b0SLars Ellenberg 		number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
565b30ab791SAndreas Gruenbacher 		device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
566e65f440dSLars Ellenberg 	} else {
567b30ab791SAndreas Gruenbacher 		device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
568b30ab791SAndreas Gruenbacher 		number = SLEEP_TIME * device->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
569e65f440dSLars Ellenberg 	}
570813472ceSPhilipp Reisner 	rcu_read_unlock();
571e65f440dSLars Ellenberg 
5720e49d7b0SLars Ellenberg 	/* Don't have more than "max-buffers"/2 in-flight.
5730e49d7b0SLars Ellenberg 	 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
5740e49d7b0SLars Ellenberg 	 * potentially causing a distributed deadlock on congestion during
5750e49d7b0SLars Ellenberg 	 * online-verify or (checksum-based) resync, if max-buffers,
5760e49d7b0SLars Ellenberg 	 * socket buffer sizes and resync rate settings are mis-configured. */
5777f34f614SLars Ellenberg 
5787f34f614SLars Ellenberg 	/* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
5797f34f614SLars Ellenberg 	 * mxb (as used here, and in drbd_alloc_pages on the peer) is
5807f34f614SLars Ellenberg 	 * "number of pages" (typically also 4k),
5817f34f614SLars Ellenberg 	 * but "rs_in_flight" is in "sectors" (512 Byte). */
5827f34f614SLars Ellenberg 	if (mxb - device->rs_in_flight/8 < number)
5837f34f614SLars Ellenberg 		number = mxb - device->rs_in_flight/8;
5840e49d7b0SLars Ellenberg 
585e65f440dSLars Ellenberg 	return number;
586e65f440dSLars Ellenberg }
587e65f440dSLars Ellenberg 
58844a4d551SLars Ellenberg static int make_resync_request(struct drbd_device *const device, int cancel)
589b411b363SPhilipp Reisner {
59044a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
59144a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
592b411b363SPhilipp Reisner 	unsigned long bit;
593b411b363SPhilipp Reisner 	sector_t sector;
594b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
5951816a2b4SLars Ellenberg 	int max_bio_size;
596e65f440dSLars Ellenberg 	int number, rollback_i, size;
597506afb62SLars Ellenberg 	int align, requeue = 0;
5980f0601f4SLars Ellenberg 	int i = 0;
599b411b363SPhilipp Reisner 
600b411b363SPhilipp Reisner 	if (unlikely(cancel))
60199920dc5SAndreas Gruenbacher 		return 0;
602b411b363SPhilipp Reisner 
603b30ab791SAndreas Gruenbacher 	if (device->rs_total == 0) {
604af85e8e8SLars Ellenberg 		/* empty resync? */
605b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
60699920dc5SAndreas Gruenbacher 		return 0;
607af85e8e8SLars Ellenberg 	}
608af85e8e8SLars Ellenberg 
609b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
610b30ab791SAndreas Gruenbacher 		/* Since we only need to access device->rsync a
611b30ab791SAndreas Gruenbacher 		   get_ldev_if_state(device,D_FAILED) would be sufficient, but
612b411b363SPhilipp Reisner 		   to continue resync with a broken disk makes no sense at
613b411b363SPhilipp Reisner 		   all */
614d0180171SAndreas Gruenbacher 		drbd_err(device, "Disk broke down during resync!\n");
61599920dc5SAndreas Gruenbacher 		return 0;
616b411b363SPhilipp Reisner 	}
617b411b363SPhilipp Reisner 
618b30ab791SAndreas Gruenbacher 	max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
619b30ab791SAndreas Gruenbacher 	number = drbd_rs_number_requests(device);
6200e49d7b0SLars Ellenberg 	if (number <= 0)
6210f0601f4SLars Ellenberg 		goto requeue;
622b411b363SPhilipp Reisner 
623b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
624506afb62SLars Ellenberg 		/* Stop generating RS requests when half of the send buffer is filled,
625506afb62SLars Ellenberg 		 * but notify TCP that we'd like to have more space. */
62644a4d551SLars Ellenberg 		mutex_lock(&connection->data.mutex);
62744a4d551SLars Ellenberg 		if (connection->data.socket) {
628506afb62SLars Ellenberg 			struct sock *sk = connection->data.socket->sk;
629506afb62SLars Ellenberg 			int queued = sk->sk_wmem_queued;
630506afb62SLars Ellenberg 			int sndbuf = sk->sk_sndbuf;
631506afb62SLars Ellenberg 			if (queued > sndbuf / 2) {
632506afb62SLars Ellenberg 				requeue = 1;
633506afb62SLars Ellenberg 				if (sk->sk_socket)
634506afb62SLars Ellenberg 					set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
635b411b363SPhilipp Reisner 			}
636506afb62SLars Ellenberg 		} else
637506afb62SLars Ellenberg 			requeue = 1;
63844a4d551SLars Ellenberg 		mutex_unlock(&connection->data.mutex);
639506afb62SLars Ellenberg 		if (requeue)
640b411b363SPhilipp Reisner 			goto requeue;
641b411b363SPhilipp Reisner 
642b411b363SPhilipp Reisner next_sector:
643b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
644b30ab791SAndreas Gruenbacher 		bit  = drbd_bm_find_next(device, device->bm_resync_fo);
645b411b363SPhilipp Reisner 
6464b0715f0SLars Ellenberg 		if (bit == DRBD_END_OF_BITMAP) {
647b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = drbd_bm_bits(device);
648b30ab791SAndreas Gruenbacher 			put_ldev(device);
64999920dc5SAndreas Gruenbacher 			return 0;
650b411b363SPhilipp Reisner 		}
651b411b363SPhilipp Reisner 
652b411b363SPhilipp Reisner 		sector = BM_BIT_TO_SECT(bit);
653b411b363SPhilipp Reisner 
654b30ab791SAndreas Gruenbacher 		if (drbd_rs_should_slow_down(device, sector) ||
655b30ab791SAndreas Gruenbacher 		    drbd_try_rs_begin_io(device, sector)) {
656b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = bit;
657b411b363SPhilipp Reisner 			goto requeue;
658b411b363SPhilipp Reisner 		}
659b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = bit + 1;
660b411b363SPhilipp Reisner 
661b30ab791SAndreas Gruenbacher 		if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
662b30ab791SAndreas Gruenbacher 			drbd_rs_complete_io(device, sector);
663b411b363SPhilipp Reisner 			goto next_sector;
664b411b363SPhilipp Reisner 		}
665b411b363SPhilipp Reisner 
6661816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
667b411b363SPhilipp Reisner 		/* try to find some adjacent bits.
668b411b363SPhilipp Reisner 		 * we stop if we have already the maximum req size.
669b411b363SPhilipp Reisner 		 *
670b411b363SPhilipp Reisner 		 * Additionally always align bigger requests, in order to
671b411b363SPhilipp Reisner 		 * be prepared for all stripe sizes of software RAIDs.
672b411b363SPhilipp Reisner 		 */
673b411b363SPhilipp Reisner 		align = 1;
674d207450cSPhilipp Reisner 		rollback_i = i;
6756377b923SLars Ellenberg 		while (i < number) {
6761816a2b4SLars Ellenberg 			if (size + BM_BLOCK_SIZE > max_bio_size)
677b411b363SPhilipp Reisner 				break;
678b411b363SPhilipp Reisner 
679b411b363SPhilipp Reisner 			/* Be always aligned */
680b411b363SPhilipp Reisner 			if (sector & ((1<<(align+3))-1))
681b411b363SPhilipp Reisner 				break;
682b411b363SPhilipp Reisner 
683b411b363SPhilipp Reisner 			/* do not cross extent boundaries */
684b411b363SPhilipp Reisner 			if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
685b411b363SPhilipp Reisner 				break;
686b411b363SPhilipp Reisner 			/* now, is it actually dirty, after all?
687b411b363SPhilipp Reisner 			 * caution, drbd_bm_test_bit is tri-state for some
688b411b363SPhilipp Reisner 			 * obscure reason; ( b == 0 ) would get the out-of-band
689b411b363SPhilipp Reisner 			 * only accidentally right because of the "oddly sized"
690b411b363SPhilipp Reisner 			 * adjustment below */
691b30ab791SAndreas Gruenbacher 			if (drbd_bm_test_bit(device, bit+1) != 1)
692b411b363SPhilipp Reisner 				break;
693b411b363SPhilipp Reisner 			bit++;
694b411b363SPhilipp Reisner 			size += BM_BLOCK_SIZE;
695b411b363SPhilipp Reisner 			if ((BM_BLOCK_SIZE << align) <= size)
696b411b363SPhilipp Reisner 				align++;
697b411b363SPhilipp Reisner 			i++;
698b411b363SPhilipp Reisner 		}
699b411b363SPhilipp Reisner 		/* if we merged some,
700b411b363SPhilipp Reisner 		 * reset the offset to start the next drbd_bm_find_next from */
701b411b363SPhilipp Reisner 		if (size > BM_BLOCK_SIZE)
702b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = bit + 1;
703b411b363SPhilipp Reisner #endif
704b411b363SPhilipp Reisner 
705b411b363SPhilipp Reisner 		/* adjust very last sectors, in case we are oddly sized */
706b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
707b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
708aaaba345SLars Ellenberg 
709aaaba345SLars Ellenberg 		if (device->use_csums) {
71044a4d551SLars Ellenberg 			switch (read_for_csum(peer_device, sector, size)) {
71180a40e43SLars Ellenberg 			case -EIO: /* Disk failure */
712b30ab791SAndreas Gruenbacher 				put_ldev(device);
71399920dc5SAndreas Gruenbacher 				return -EIO;
71480a40e43SLars Ellenberg 			case -EAGAIN: /* allocation failed, or ldev busy */
715b30ab791SAndreas Gruenbacher 				drbd_rs_complete_io(device, sector);
716b30ab791SAndreas Gruenbacher 				device->bm_resync_fo = BM_SECT_TO_BIT(sector);
717d207450cSPhilipp Reisner 				i = rollback_i;
718b411b363SPhilipp Reisner 				goto requeue;
71980a40e43SLars Ellenberg 			case 0:
72080a40e43SLars Ellenberg 				/* everything ok */
72180a40e43SLars Ellenberg 				break;
72280a40e43SLars Ellenberg 			default:
72380a40e43SLars Ellenberg 				BUG();
724b411b363SPhilipp Reisner 			}
725b411b363SPhilipp Reisner 		} else {
72699920dc5SAndreas Gruenbacher 			int err;
72799920dc5SAndreas Gruenbacher 
728b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
72944a4d551SLars Ellenberg 			err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
73099920dc5SAndreas Gruenbacher 						 sector, size, ID_SYNCER);
73199920dc5SAndreas Gruenbacher 			if (err) {
732d0180171SAndreas Gruenbacher 				drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
733b30ab791SAndreas Gruenbacher 				dec_rs_pending(device);
734b30ab791SAndreas Gruenbacher 				put_ldev(device);
73599920dc5SAndreas Gruenbacher 				return err;
736b411b363SPhilipp Reisner 			}
737b411b363SPhilipp Reisner 		}
738b411b363SPhilipp Reisner 	}
739b411b363SPhilipp Reisner 
740b30ab791SAndreas Gruenbacher 	if (device->bm_resync_fo >= drbd_bm_bits(device)) {
741b411b363SPhilipp Reisner 		/* last syncer _request_ was sent,
742b411b363SPhilipp Reisner 		 * but the P_RS_DATA_REPLY not yet received.  sync will end (and
743b411b363SPhilipp Reisner 		 * next sync group will resume), as soon as we receive the last
744b411b363SPhilipp Reisner 		 * resync data block, and the last bit is cleared.
745b411b363SPhilipp Reisner 		 * until then resync "work" is "inactive" ...
746b411b363SPhilipp Reisner 		 */
747b30ab791SAndreas Gruenbacher 		put_ldev(device);
74899920dc5SAndreas Gruenbacher 		return 0;
749b411b363SPhilipp Reisner 	}
750b411b363SPhilipp Reisner 
751b411b363SPhilipp Reisner  requeue:
752b30ab791SAndreas Gruenbacher 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
753b30ab791SAndreas Gruenbacher 	mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
754b30ab791SAndreas Gruenbacher 	put_ldev(device);
75599920dc5SAndreas Gruenbacher 	return 0;
756b411b363SPhilipp Reisner }
757b411b363SPhilipp Reisner 
758d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *device, int cancel)
759b411b363SPhilipp Reisner {
760b411b363SPhilipp Reisner 	int number, i, size;
761b411b363SPhilipp Reisner 	sector_t sector;
762b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
76358ffa580SLars Ellenberg 	bool stop_sector_reached = false;
764b411b363SPhilipp Reisner 
765b411b363SPhilipp Reisner 	if (unlikely(cancel))
766b411b363SPhilipp Reisner 		return 1;
767b411b363SPhilipp Reisner 
768b30ab791SAndreas Gruenbacher 	number = drbd_rs_number_requests(device);
769b411b363SPhilipp Reisner 
770b30ab791SAndreas Gruenbacher 	sector = device->ov_position;
771b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
77258ffa580SLars Ellenberg 		if (sector >= capacity)
773b411b363SPhilipp Reisner 			return 1;
77458ffa580SLars Ellenberg 
77558ffa580SLars Ellenberg 		/* We check for "finished" only in the reply path:
77658ffa580SLars Ellenberg 		 * w_e_end_ov_reply().
77758ffa580SLars Ellenberg 		 * We need to send at least one request out. */
77858ffa580SLars Ellenberg 		stop_sector_reached = i > 0
779b30ab791SAndreas Gruenbacher 			&& verify_can_do_stop_sector(device)
780b30ab791SAndreas Gruenbacher 			&& sector >= device->ov_stop_sector;
78158ffa580SLars Ellenberg 		if (stop_sector_reached)
78258ffa580SLars Ellenberg 			break;
783b411b363SPhilipp Reisner 
784b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
785b411b363SPhilipp Reisner 
786b30ab791SAndreas Gruenbacher 		if (drbd_rs_should_slow_down(device, sector) ||
787b30ab791SAndreas Gruenbacher 		    drbd_try_rs_begin_io(device, sector)) {
788b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
789b411b363SPhilipp Reisner 			goto requeue;
790b411b363SPhilipp Reisner 		}
791b411b363SPhilipp Reisner 
792b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
793b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
794b411b363SPhilipp Reisner 
795b30ab791SAndreas Gruenbacher 		inc_rs_pending(device);
79669a22773SAndreas Gruenbacher 		if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
797b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
798b411b363SPhilipp Reisner 			return 0;
799b411b363SPhilipp Reisner 		}
800b411b363SPhilipp Reisner 		sector += BM_SECT_PER_BIT;
801b411b363SPhilipp Reisner 	}
802b30ab791SAndreas Gruenbacher 	device->ov_position = sector;
803b411b363SPhilipp Reisner 
804b411b363SPhilipp Reisner  requeue:
805b30ab791SAndreas Gruenbacher 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
80658ffa580SLars Ellenberg 	if (i == 0 || !stop_sector_reached)
807b30ab791SAndreas Gruenbacher 		mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
808b411b363SPhilipp Reisner 	return 1;
809b411b363SPhilipp Reisner }
810b411b363SPhilipp Reisner 
81199920dc5SAndreas Gruenbacher int w_ov_finished(struct drbd_work *w, int cancel)
812b411b363SPhilipp Reisner {
81384b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw =
81484b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device_work, w);
81584b8c06bSAndreas Gruenbacher 	struct drbd_device *device = dw->device;
81684b8c06bSAndreas Gruenbacher 	kfree(dw);
817b30ab791SAndreas Gruenbacher 	ov_out_of_sync_print(device);
818b30ab791SAndreas Gruenbacher 	drbd_resync_finished(device);
819b411b363SPhilipp Reisner 
82099920dc5SAndreas Gruenbacher 	return 0;
821b411b363SPhilipp Reisner }
822b411b363SPhilipp Reisner 
82399920dc5SAndreas Gruenbacher static int w_resync_finished(struct drbd_work *w, int cancel)
824b411b363SPhilipp Reisner {
82584b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw =
82684b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device_work, w);
82784b8c06bSAndreas Gruenbacher 	struct drbd_device *device = dw->device;
82884b8c06bSAndreas Gruenbacher 	kfree(dw);
829b411b363SPhilipp Reisner 
830b30ab791SAndreas Gruenbacher 	drbd_resync_finished(device);
831b411b363SPhilipp Reisner 
83299920dc5SAndreas Gruenbacher 	return 0;
833b411b363SPhilipp Reisner }
834b411b363SPhilipp Reisner 
835b30ab791SAndreas Gruenbacher static void ping_peer(struct drbd_device *device)
836af85e8e8SLars Ellenberg {
837a6b32bc3SAndreas Gruenbacher 	struct drbd_connection *connection = first_peer_device(device)->connection;
8382a67d8b9SPhilipp Reisner 
839bde89a9eSAndreas Gruenbacher 	clear_bit(GOT_PING_ACK, &connection->flags);
840bde89a9eSAndreas Gruenbacher 	request_ping(connection);
841bde89a9eSAndreas Gruenbacher 	wait_event(connection->ping_wait,
842bde89a9eSAndreas Gruenbacher 		   test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
843af85e8e8SLars Ellenberg }
844af85e8e8SLars Ellenberg 
845b30ab791SAndreas Gruenbacher int drbd_resync_finished(struct drbd_device *device)
846b411b363SPhilipp Reisner {
847b411b363SPhilipp Reisner 	unsigned long db, dt, dbdt;
848b411b363SPhilipp Reisner 	unsigned long n_oos;
849b411b363SPhilipp Reisner 	union drbd_state os, ns;
85084b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
851b411b363SPhilipp Reisner 	char *khelper_cmd = NULL;
85226525618SLars Ellenberg 	int verify_done = 0;
853b411b363SPhilipp Reisner 
854b411b363SPhilipp Reisner 	/* Remove all elements from the resync LRU. Since future actions
855b411b363SPhilipp Reisner 	 * might set bits in the (main) bitmap, then the entries in the
856b411b363SPhilipp Reisner 	 * resync LRU would be wrong. */
857b30ab791SAndreas Gruenbacher 	if (drbd_rs_del_all(device)) {
858b411b363SPhilipp Reisner 		/* In case this is not possible now, most probably because
859b411b363SPhilipp Reisner 		 * there are P_RS_DATA_REPLY Packets lingering on the worker's
860b411b363SPhilipp Reisner 		 * queue (or even the read operations for those packets
861b411b363SPhilipp Reisner 		 * is not finished by now).   Retry in 100ms. */
862b411b363SPhilipp Reisner 
86320ee6390SPhilipp Reisner 		schedule_timeout_interruptible(HZ / 10);
86484b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
86584b8c06bSAndreas Gruenbacher 		if (dw) {
86684b8c06bSAndreas Gruenbacher 			dw->w.cb = w_resync_finished;
86784b8c06bSAndreas Gruenbacher 			dw->device = device;
86884b8c06bSAndreas Gruenbacher 			drbd_queue_work(&first_peer_device(device)->connection->sender_work,
86984b8c06bSAndreas Gruenbacher 					&dw->w);
870b411b363SPhilipp Reisner 			return 1;
871b411b363SPhilipp Reisner 		}
87284b8c06bSAndreas Gruenbacher 		drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
873b411b363SPhilipp Reisner 	}
874b411b363SPhilipp Reisner 
875b30ab791SAndreas Gruenbacher 	dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
876b411b363SPhilipp Reisner 	if (dt <= 0)
877b411b363SPhilipp Reisner 		dt = 1;
87858ffa580SLars Ellenberg 
879b30ab791SAndreas Gruenbacher 	db = device->rs_total;
88058ffa580SLars Ellenberg 	/* adjust for verify start and stop sectors, respective reached position */
881b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
882b30ab791SAndreas Gruenbacher 		db -= device->ov_left;
88358ffa580SLars Ellenberg 
884b411b363SPhilipp Reisner 	dbdt = Bit2KB(db/dt);
885b30ab791SAndreas Gruenbacher 	device->rs_paused /= HZ;
886b411b363SPhilipp Reisner 
887b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
888b411b363SPhilipp Reisner 		goto out;
889b411b363SPhilipp Reisner 
890b30ab791SAndreas Gruenbacher 	ping_peer(device);
891af85e8e8SLars Ellenberg 
8920500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
893b30ab791SAndreas Gruenbacher 	os = drbd_read_state(device);
894b411b363SPhilipp Reisner 
89526525618SLars Ellenberg 	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
89626525618SLars Ellenberg 
897b411b363SPhilipp Reisner 	/* This protects us against multiple calls (that can happen in the presence
898b411b363SPhilipp Reisner 	   of application IO), and against connectivity loss just before we arrive here. */
899b411b363SPhilipp Reisner 	if (os.conn <= C_CONNECTED)
900b411b363SPhilipp Reisner 		goto out_unlock;
901b411b363SPhilipp Reisner 
902b411b363SPhilipp Reisner 	ns = os;
903b411b363SPhilipp Reisner 	ns.conn = C_CONNECTED;
904b411b363SPhilipp Reisner 
905d0180171SAndreas Gruenbacher 	drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
90626525618SLars Ellenberg 	     verify_done ? "Online verify" : "Resync",
907b30ab791SAndreas Gruenbacher 	     dt + device->rs_paused, device->rs_paused, dbdt);
908b411b363SPhilipp Reisner 
909b30ab791SAndreas Gruenbacher 	n_oos = drbd_bm_total_weight(device);
910b411b363SPhilipp Reisner 
911b411b363SPhilipp Reisner 	if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
912b411b363SPhilipp Reisner 		if (n_oos) {
913d0180171SAndreas Gruenbacher 			drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
914b411b363SPhilipp Reisner 			      n_oos, Bit2KB(1));
915b411b363SPhilipp Reisner 			khelper_cmd = "out-of-sync";
916b411b363SPhilipp Reisner 		}
917b411b363SPhilipp Reisner 	} else {
9180b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, (n_oos - device->rs_failed) == 0);
919b411b363SPhilipp Reisner 
920b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
921b411b363SPhilipp Reisner 			khelper_cmd = "after-resync-target";
922b411b363SPhilipp Reisner 
923aaaba345SLars Ellenberg 		if (device->use_csums && device->rs_total) {
924b30ab791SAndreas Gruenbacher 			const unsigned long s = device->rs_same_csum;
925b30ab791SAndreas Gruenbacher 			const unsigned long t = device->rs_total;
926b411b363SPhilipp Reisner 			const int ratio =
927b411b363SPhilipp Reisner 				(t == 0)     ? 0 :
928b411b363SPhilipp Reisner 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
929d0180171SAndreas Gruenbacher 			drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
930b411b363SPhilipp Reisner 			     "transferred %luK total %luK\n",
931b411b363SPhilipp Reisner 			     ratio,
932b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_same_csum),
933b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_total - device->rs_same_csum),
934b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_total));
935b411b363SPhilipp Reisner 		}
936b411b363SPhilipp Reisner 	}
937b411b363SPhilipp Reisner 
938b30ab791SAndreas Gruenbacher 	if (device->rs_failed) {
939d0180171SAndreas Gruenbacher 		drbd_info(device, "            %lu failed blocks\n", device->rs_failed);
940b411b363SPhilipp Reisner 
941b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
942b411b363SPhilipp Reisner 			ns.disk = D_INCONSISTENT;
943b411b363SPhilipp Reisner 			ns.pdsk = D_UP_TO_DATE;
944b411b363SPhilipp Reisner 		} else {
945b411b363SPhilipp Reisner 			ns.disk = D_UP_TO_DATE;
946b411b363SPhilipp Reisner 			ns.pdsk = D_INCONSISTENT;
947b411b363SPhilipp Reisner 		}
948b411b363SPhilipp Reisner 	} else {
949b411b363SPhilipp Reisner 		ns.disk = D_UP_TO_DATE;
950b411b363SPhilipp Reisner 		ns.pdsk = D_UP_TO_DATE;
951b411b363SPhilipp Reisner 
952b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
953b30ab791SAndreas Gruenbacher 			if (device->p_uuid) {
954b411b363SPhilipp Reisner 				int i;
955b411b363SPhilipp Reisner 				for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
956b30ab791SAndreas Gruenbacher 					_drbd_uuid_set(device, i, device->p_uuid[i]);
957b30ab791SAndreas Gruenbacher 				drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
958b30ab791SAndreas Gruenbacher 				_drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
959b411b363SPhilipp Reisner 			} else {
960d0180171SAndreas Gruenbacher 				drbd_err(device, "device->p_uuid is NULL! BUG\n");
961b411b363SPhilipp Reisner 			}
962b411b363SPhilipp Reisner 		}
963b411b363SPhilipp Reisner 
96462b0da3aSLars Ellenberg 		if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
96562b0da3aSLars Ellenberg 			/* for verify runs, we don't update uuids here,
96662b0da3aSLars Ellenberg 			 * so there would be nothing to report. */
967b30ab791SAndreas Gruenbacher 			drbd_uuid_set_bm(device, 0UL);
968b30ab791SAndreas Gruenbacher 			drbd_print_uuids(device, "updated UUIDs");
969b30ab791SAndreas Gruenbacher 			if (device->p_uuid) {
970b411b363SPhilipp Reisner 				/* Now the two UUID sets are equal, update what we
971b411b363SPhilipp Reisner 				 * know of the peer. */
972b411b363SPhilipp Reisner 				int i;
973b411b363SPhilipp Reisner 				for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
974b30ab791SAndreas Gruenbacher 					device->p_uuid[i] = device->ldev->md.uuid[i];
975b411b363SPhilipp Reisner 			}
976b411b363SPhilipp Reisner 		}
97762b0da3aSLars Ellenberg 	}
978b411b363SPhilipp Reisner 
979b30ab791SAndreas Gruenbacher 	_drbd_set_state(device, ns, CS_VERBOSE, NULL);
980b411b363SPhilipp Reisner out_unlock:
9810500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
982b30ab791SAndreas Gruenbacher 	put_ldev(device);
983b411b363SPhilipp Reisner out:
984b30ab791SAndreas Gruenbacher 	device->rs_total  = 0;
985b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
986b30ab791SAndreas Gruenbacher 	device->rs_paused = 0;
98758ffa580SLars Ellenberg 
98858ffa580SLars Ellenberg 	/* reset start sector, if we reached end of device */
989b30ab791SAndreas Gruenbacher 	if (verify_done && device->ov_left == 0)
990b30ab791SAndreas Gruenbacher 		device->ov_start_sector = 0;
991b411b363SPhilipp Reisner 
992b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
99313d42685SLars Ellenberg 
994b411b363SPhilipp Reisner 	if (khelper_cmd)
995b30ab791SAndreas Gruenbacher 		drbd_khelper(device, khelper_cmd);
996b411b363SPhilipp Reisner 
997b411b363SPhilipp Reisner 	return 1;
998b411b363SPhilipp Reisner }
999b411b363SPhilipp Reisner 
1000b411b363SPhilipp Reisner /* helper */
1001b30ab791SAndreas Gruenbacher static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
1002b411b363SPhilipp Reisner {
1003045417f7SAndreas Gruenbacher 	if (drbd_peer_req_has_active_page(peer_req)) {
1004b411b363SPhilipp Reisner 		/* This might happen if sendpage() has not finished */
1005db830c46SAndreas Gruenbacher 		int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
1006b30ab791SAndreas Gruenbacher 		atomic_add(i, &device->pp_in_use_by_net);
1007b30ab791SAndreas Gruenbacher 		atomic_sub(i, &device->pp_in_use);
10080500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
1009a8cd15baSAndreas Gruenbacher 		list_add_tail(&peer_req->w.list, &device->net_ee);
10100500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
1011435f0740SLars Ellenberg 		wake_up(&drbd_pp_wait);
1012b411b363SPhilipp Reisner 	} else
1013b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1014b411b363SPhilipp Reisner }
1015b411b363SPhilipp Reisner 
1016b411b363SPhilipp Reisner /**
1017b411b363SPhilipp Reisner  * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
1018b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1019b411b363SPhilipp Reisner  * @w:		work object.
1020b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1021b411b363SPhilipp Reisner  */
102299920dc5SAndreas Gruenbacher int w_e_end_data_req(struct drbd_work *w, int cancel)
1023b411b363SPhilipp Reisner {
1024a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
10256780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
10266780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
102799920dc5SAndreas Gruenbacher 	int err;
1028b411b363SPhilipp Reisner 
1029b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1030b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1031b30ab791SAndreas Gruenbacher 		dec_unacked(device);
103299920dc5SAndreas Gruenbacher 		return 0;
1033b411b363SPhilipp Reisner 	}
1034b411b363SPhilipp Reisner 
1035db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
10366780139cSAndreas Gruenbacher 		err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
1037b411b363SPhilipp Reisner 	} else {
1038b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1039d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegDReply. sector=%llus.\n",
1040db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1041b411b363SPhilipp Reisner 
10426780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
1043b411b363SPhilipp Reisner 	}
1044b411b363SPhilipp Reisner 
1045b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1046b411b363SPhilipp Reisner 
1047b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1048b411b363SPhilipp Reisner 
104999920dc5SAndreas Gruenbacher 	if (unlikely(err))
1050d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block() failed\n");
105199920dc5SAndreas Gruenbacher 	return err;
1052b411b363SPhilipp Reisner }
1053b411b363SPhilipp Reisner 
1054b411b363SPhilipp Reisner /**
1055a209b4aeSAndreas Gruenbacher  * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
1056b411b363SPhilipp Reisner  * @w:		work object.
1057b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1058b411b363SPhilipp Reisner  */
105999920dc5SAndreas Gruenbacher int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
1060b411b363SPhilipp Reisner {
1061a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
10626780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
10636780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
106499920dc5SAndreas Gruenbacher 	int err;
1065b411b363SPhilipp Reisner 
1066b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1067b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1068b30ab791SAndreas Gruenbacher 		dec_unacked(device);
106999920dc5SAndreas Gruenbacher 		return 0;
1070b411b363SPhilipp Reisner 	}
1071b411b363SPhilipp Reisner 
1072b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
1073b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1074b30ab791SAndreas Gruenbacher 		put_ldev(device);
1075b411b363SPhilipp Reisner 	}
1076b411b363SPhilipp Reisner 
1077b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_AHEAD) {
10786780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
1079db830c46SAndreas Gruenbacher 	} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1080b30ab791SAndreas Gruenbacher 		if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1081b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
10826780139cSAndreas Gruenbacher 			err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1083b411b363SPhilipp Reisner 		} else {
1084b411b363SPhilipp Reisner 			if (__ratelimit(&drbd_ratelimit_state))
1085d0180171SAndreas Gruenbacher 				drbd_err(device, "Not sending RSDataReply, "
1086b411b363SPhilipp Reisner 				    "partner DISKLESS!\n");
108799920dc5SAndreas Gruenbacher 			err = 0;
1088b411b363SPhilipp Reisner 		}
1089b411b363SPhilipp Reisner 	} else {
1090b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1091d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
1092db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1093b411b363SPhilipp Reisner 
10946780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1095b411b363SPhilipp Reisner 
1096b411b363SPhilipp Reisner 		/* update resync data with failure */
1097b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
1098b411b363SPhilipp Reisner 	}
1099b411b363SPhilipp Reisner 
1100b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1101b411b363SPhilipp Reisner 
1102b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1103b411b363SPhilipp Reisner 
110499920dc5SAndreas Gruenbacher 	if (unlikely(err))
1105d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block() failed\n");
110699920dc5SAndreas Gruenbacher 	return err;
1107b411b363SPhilipp Reisner }
1108b411b363SPhilipp Reisner 
110999920dc5SAndreas Gruenbacher int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
1110b411b363SPhilipp Reisner {
1111a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
11126780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
11136780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1114b411b363SPhilipp Reisner 	struct digest_info *di;
1115b411b363SPhilipp Reisner 	int digest_size;
1116b411b363SPhilipp Reisner 	void *digest = NULL;
111799920dc5SAndreas Gruenbacher 	int err, eq = 0;
1118b411b363SPhilipp Reisner 
1119b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1120b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1121b30ab791SAndreas Gruenbacher 		dec_unacked(device);
112299920dc5SAndreas Gruenbacher 		return 0;
1123b411b363SPhilipp Reisner 	}
1124b411b363SPhilipp Reisner 
1125b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1126b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1127b30ab791SAndreas Gruenbacher 		put_ldev(device);
11281d53f09eSLars Ellenberg 	}
1129b411b363SPhilipp Reisner 
1130db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1131b411b363SPhilipp Reisner 
1132db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1133b411b363SPhilipp Reisner 		/* quick hack to try to avoid a race against reconfiguration.
1134b411b363SPhilipp Reisner 		 * a real fix would be much more involved,
1135b411b363SPhilipp Reisner 		 * introducing more locking mechanisms */
11366780139cSAndreas Gruenbacher 		if (peer_device->connection->csums_tfm) {
11376780139cSAndreas Gruenbacher 			digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
11380b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, digest_size == di->digest_size);
1139b411b363SPhilipp Reisner 			digest = kmalloc(digest_size, GFP_NOIO);
1140b411b363SPhilipp Reisner 		}
1141b411b363SPhilipp Reisner 		if (digest) {
11426780139cSAndreas Gruenbacher 			drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
1143b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1144b411b363SPhilipp Reisner 			kfree(digest);
1145b411b363SPhilipp Reisner 		}
1146b411b363SPhilipp Reisner 
1147b411b363SPhilipp Reisner 		if (eq) {
1148b30ab791SAndreas Gruenbacher 			drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
1149676396d5SLars Ellenberg 			/* rs_same_csums unit is BM_BLOCK_SIZE */
1150b30ab791SAndreas Gruenbacher 			device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
11516780139cSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
1152b411b363SPhilipp Reisner 		} else {
1153b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
1154db830c46SAndreas Gruenbacher 			peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1155db830c46SAndreas Gruenbacher 			peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
1156204bba99SPhilipp Reisner 			kfree(di);
11576780139cSAndreas Gruenbacher 			err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1158b411b363SPhilipp Reisner 		}
1159b411b363SPhilipp Reisner 	} else {
11606780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1161b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1162d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
1163b411b363SPhilipp Reisner 	}
1164b411b363SPhilipp Reisner 
1165b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1166b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1167b411b363SPhilipp Reisner 
116899920dc5SAndreas Gruenbacher 	if (unlikely(err))
1169d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block/ack() failed\n");
117099920dc5SAndreas Gruenbacher 	return err;
1171b411b363SPhilipp Reisner }
1172b411b363SPhilipp Reisner 
117399920dc5SAndreas Gruenbacher int w_e_end_ov_req(struct drbd_work *w, int cancel)
1174b411b363SPhilipp Reisner {
1175a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
11766780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
11776780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1178db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1179db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
1180b411b363SPhilipp Reisner 	int digest_size;
1181b411b363SPhilipp Reisner 	void *digest;
118299920dc5SAndreas Gruenbacher 	int err = 0;
1183b411b363SPhilipp Reisner 
1184b411b363SPhilipp Reisner 	if (unlikely(cancel))
1185b411b363SPhilipp Reisner 		goto out;
1186b411b363SPhilipp Reisner 
11876780139cSAndreas Gruenbacher 	digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
1188b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
11898f21420eSPhilipp Reisner 	if (!digest) {
119099920dc5SAndreas Gruenbacher 		err = 1;	/* terminate the connection in case the allocation failed */
11918f21420eSPhilipp Reisner 		goto out;
11928f21420eSPhilipp Reisner 	}
11938f21420eSPhilipp Reisner 
1194db830c46SAndreas Gruenbacher 	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
11956780139cSAndreas Gruenbacher 		drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
11968f21420eSPhilipp Reisner 	else
11978f21420eSPhilipp Reisner 		memset(digest, 0, digest_size);
11988f21420eSPhilipp Reisner 
119953ea4331SLars Ellenberg 	/* Free e and pages before send.
120053ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
120153ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
120253ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1203c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1204b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
1205db830c46SAndreas Gruenbacher 	peer_req = NULL;
1206b30ab791SAndreas Gruenbacher 	inc_rs_pending(device);
12076780139cSAndreas Gruenbacher 	err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
120899920dc5SAndreas Gruenbacher 	if (err)
1209b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
1210b411b363SPhilipp Reisner 	kfree(digest);
1211b411b363SPhilipp Reisner 
1212b411b363SPhilipp Reisner out:
1213db830c46SAndreas Gruenbacher 	if (peer_req)
1214b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1215b30ab791SAndreas Gruenbacher 	dec_unacked(device);
121699920dc5SAndreas Gruenbacher 	return err;
1217b411b363SPhilipp Reisner }
1218b411b363SPhilipp Reisner 
1219b30ab791SAndreas Gruenbacher void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
1220b411b363SPhilipp Reisner {
1221b30ab791SAndreas Gruenbacher 	if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1222b30ab791SAndreas Gruenbacher 		device->ov_last_oos_size += size>>9;
1223b411b363SPhilipp Reisner 	} else {
1224b30ab791SAndreas Gruenbacher 		device->ov_last_oos_start = sector;
1225b30ab791SAndreas Gruenbacher 		device->ov_last_oos_size = size>>9;
1226b411b363SPhilipp Reisner 	}
1227b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, sector, size);
1228b411b363SPhilipp Reisner }
1229b411b363SPhilipp Reisner 
123099920dc5SAndreas Gruenbacher int w_e_end_ov_reply(struct drbd_work *w, int cancel)
1231b411b363SPhilipp Reisner {
1232a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
12336780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
12346780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1235b411b363SPhilipp Reisner 	struct digest_info *di;
1236b411b363SPhilipp Reisner 	void *digest;
1237db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1238db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
123953ea4331SLars Ellenberg 	int digest_size;
124099920dc5SAndreas Gruenbacher 	int err, eq = 0;
124158ffa580SLars Ellenberg 	bool stop_sector_reached = false;
1242b411b363SPhilipp Reisner 
1243b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1244b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1245b30ab791SAndreas Gruenbacher 		dec_unacked(device);
124699920dc5SAndreas Gruenbacher 		return 0;
1247b411b363SPhilipp Reisner 	}
1248b411b363SPhilipp Reisner 
1249b411b363SPhilipp Reisner 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1250b411b363SPhilipp Reisner 	 * the resync lru has been cleaned up already */
1251b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1252b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1253b30ab791SAndreas Gruenbacher 		put_ldev(device);
12541d53f09eSLars Ellenberg 	}
1255b411b363SPhilipp Reisner 
1256db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1257b411b363SPhilipp Reisner 
1258db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
12596780139cSAndreas Gruenbacher 		digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
1260b411b363SPhilipp Reisner 		digest = kmalloc(digest_size, GFP_NOIO);
1261b411b363SPhilipp Reisner 		if (digest) {
12626780139cSAndreas Gruenbacher 			drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
1263b411b363SPhilipp Reisner 
12640b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, digest_size == di->digest_size);
1265b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1266b411b363SPhilipp Reisner 			kfree(digest);
1267b411b363SPhilipp Reisner 		}
1268b411b363SPhilipp Reisner 	}
1269b411b363SPhilipp Reisner 
12709676c760SLars Ellenberg 	/* Free peer_req and pages before send.
127153ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
127253ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
127353ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1274c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1275b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
1276b411b363SPhilipp Reisner 	if (!eq)
1277b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
1278b411b363SPhilipp Reisner 	else
1279b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
1280b411b363SPhilipp Reisner 
12816780139cSAndreas Gruenbacher 	err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
1282b411b363SPhilipp Reisner 			       eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
1283b411b363SPhilipp Reisner 
1284b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1285b411b363SPhilipp Reisner 
1286b30ab791SAndreas Gruenbacher 	--device->ov_left;
1287ea5442afSLars Ellenberg 
1288ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
1289b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
1290b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
1291ea5442afSLars Ellenberg 
1292b30ab791SAndreas Gruenbacher 	stop_sector_reached = verify_can_do_stop_sector(device) &&
1293b30ab791SAndreas Gruenbacher 		(sector + (size>>9)) >= device->ov_stop_sector;
129458ffa580SLars Ellenberg 
1295b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0 || stop_sector_reached) {
1296b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
1297b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
1298b411b363SPhilipp Reisner 	}
1299b411b363SPhilipp Reisner 
130099920dc5SAndreas Gruenbacher 	return err;
1301b411b363SPhilipp Reisner }
1302b411b363SPhilipp Reisner 
1303b6dd1a89SLars Ellenberg /* FIXME
1304b6dd1a89SLars Ellenberg  * We need to track the number of pending barrier acks,
1305b6dd1a89SLars Ellenberg  * and to be able to wait for them.
1306b6dd1a89SLars Ellenberg  * See also comment in drbd_adm_attach before drbd_suspend_io.
1307b6dd1a89SLars Ellenberg  */
1308bde89a9eSAndreas Gruenbacher static int drbd_send_barrier(struct drbd_connection *connection)
1309b411b363SPhilipp Reisner {
13109f5bdc33SAndreas Gruenbacher 	struct p_barrier *p;
1311b6dd1a89SLars Ellenberg 	struct drbd_socket *sock;
1312b411b363SPhilipp Reisner 
1313bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
1314bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
13159f5bdc33SAndreas Gruenbacher 	if (!p)
13169f5bdc33SAndreas Gruenbacher 		return -EIO;
1317bde89a9eSAndreas Gruenbacher 	p->barrier = connection->send.current_epoch_nr;
1318b6dd1a89SLars Ellenberg 	p->pad = 0;
1319bde89a9eSAndreas Gruenbacher 	connection->send.current_epoch_writes = 0;
1320b6dd1a89SLars Ellenberg 
1321bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
1322b411b363SPhilipp Reisner }
1323b411b363SPhilipp Reisner 
132499920dc5SAndreas Gruenbacher int w_send_write_hint(struct drbd_work *w, int cancel)
1325b411b363SPhilipp Reisner {
132684b8c06bSAndreas Gruenbacher 	struct drbd_device *device =
132784b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device, unplug_work);
13289f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
13299f5bdc33SAndreas Gruenbacher 
1330b411b363SPhilipp Reisner 	if (cancel)
133199920dc5SAndreas Gruenbacher 		return 0;
1332a6b32bc3SAndreas Gruenbacher 	sock = &first_peer_device(device)->connection->data;
133369a22773SAndreas Gruenbacher 	if (!drbd_prepare_command(first_peer_device(device), sock))
13349f5bdc33SAndreas Gruenbacher 		return -EIO;
133569a22773SAndreas Gruenbacher 	return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
1336b411b363SPhilipp Reisner }
1337b411b363SPhilipp Reisner 
1338bde89a9eSAndreas Gruenbacher static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
13394eb9b3cbSLars Ellenberg {
1340bde89a9eSAndreas Gruenbacher 	if (!connection->send.seen_any_write_yet) {
1341bde89a9eSAndreas Gruenbacher 		connection->send.seen_any_write_yet = true;
1342bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_nr = epoch;
1343bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_writes = 0;
13444eb9b3cbSLars Ellenberg 	}
13454eb9b3cbSLars Ellenberg }
13464eb9b3cbSLars Ellenberg 
1347bde89a9eSAndreas Gruenbacher static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
13484eb9b3cbSLars Ellenberg {
13494eb9b3cbSLars Ellenberg 	/* re-init if first write on this connection */
1350bde89a9eSAndreas Gruenbacher 	if (!connection->send.seen_any_write_yet)
13514eb9b3cbSLars Ellenberg 		return;
1352bde89a9eSAndreas Gruenbacher 	if (connection->send.current_epoch_nr != epoch) {
1353bde89a9eSAndreas Gruenbacher 		if (connection->send.current_epoch_writes)
1354bde89a9eSAndreas Gruenbacher 			drbd_send_barrier(connection);
1355bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_nr = epoch;
13564eb9b3cbSLars Ellenberg 	}
13574eb9b3cbSLars Ellenberg }
13584eb9b3cbSLars Ellenberg 
13598f7bed77SAndreas Gruenbacher int w_send_out_of_sync(struct drbd_work *w, int cancel)
136073a01a18SPhilipp Reisner {
136173a01a18SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
136284b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
136344a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
136444a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device->connection;
136599920dc5SAndreas Gruenbacher 	int err;
136673a01a18SPhilipp Reisner 
136773a01a18SPhilipp Reisner 	if (unlikely(cancel)) {
13688554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
136999920dc5SAndreas Gruenbacher 		return 0;
137073a01a18SPhilipp Reisner 	}
137173a01a18SPhilipp Reisner 
1372bde89a9eSAndreas Gruenbacher 	/* this time, no connection->send.current_epoch_writes++;
1373b6dd1a89SLars Ellenberg 	 * If it was sent, it was the closing barrier for the last
1374b6dd1a89SLars Ellenberg 	 * replicated epoch, before we went into AHEAD mode.
1375b6dd1a89SLars Ellenberg 	 * No more barriers will be sent, until we leave AHEAD mode again. */
1376bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1377b6dd1a89SLars Ellenberg 
137844a4d551SLars Ellenberg 	err = drbd_send_out_of_sync(peer_device, req);
13798554df1cSAndreas Gruenbacher 	req_mod(req, OOS_HANDED_TO_NETWORK);
138073a01a18SPhilipp Reisner 
138199920dc5SAndreas Gruenbacher 	return err;
138273a01a18SPhilipp Reisner }
138373a01a18SPhilipp Reisner 
1384b411b363SPhilipp Reisner /**
1385b411b363SPhilipp Reisner  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1386b411b363SPhilipp Reisner  * @w:		work object.
1387b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1388b411b363SPhilipp Reisner  */
138999920dc5SAndreas Gruenbacher int w_send_dblock(struct drbd_work *w, int cancel)
1390b411b363SPhilipp Reisner {
1391b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
139284b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
139344a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
139444a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device->connection;
139599920dc5SAndreas Gruenbacher 	int err;
1396b411b363SPhilipp Reisner 
1397b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
13988554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
139999920dc5SAndreas Gruenbacher 		return 0;
1400b411b363SPhilipp Reisner 	}
1401b411b363SPhilipp Reisner 
1402bde89a9eSAndreas Gruenbacher 	re_init_if_first_write(connection, req->epoch);
1403bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1404bde89a9eSAndreas Gruenbacher 	connection->send.current_epoch_writes++;
1405b6dd1a89SLars Ellenberg 
140644a4d551SLars Ellenberg 	err = drbd_send_dblock(peer_device, req);
140799920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1408b411b363SPhilipp Reisner 
140999920dc5SAndreas Gruenbacher 	return err;
1410b411b363SPhilipp Reisner }
1411b411b363SPhilipp Reisner 
1412b411b363SPhilipp Reisner /**
1413b411b363SPhilipp Reisner  * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1414b411b363SPhilipp Reisner  * @w:		work object.
1415b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1416b411b363SPhilipp Reisner  */
141799920dc5SAndreas Gruenbacher int w_send_read_req(struct drbd_work *w, int cancel)
1418b411b363SPhilipp Reisner {
1419b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
142084b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
142144a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
142244a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device->connection;
142399920dc5SAndreas Gruenbacher 	int err;
1424b411b363SPhilipp Reisner 
1425b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
14268554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
142799920dc5SAndreas Gruenbacher 		return 0;
1428b411b363SPhilipp Reisner 	}
1429b411b363SPhilipp Reisner 
1430b6dd1a89SLars Ellenberg 	/* Even read requests may close a write epoch,
1431b6dd1a89SLars Ellenberg 	 * if there was any yet. */
1432bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1433b6dd1a89SLars Ellenberg 
143444a4d551SLars Ellenberg 	err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
1435b411b363SPhilipp Reisner 				 (unsigned long)req);
1436b411b363SPhilipp Reisner 
143799920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1438b411b363SPhilipp Reisner 
143999920dc5SAndreas Gruenbacher 	return err;
1440b411b363SPhilipp Reisner }
1441b411b363SPhilipp Reisner 
144299920dc5SAndreas Gruenbacher int w_restart_disk_io(struct drbd_work *w, int cancel)
1443265be2d0SPhilipp Reisner {
1444265be2d0SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
144584b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
1446265be2d0SPhilipp Reisner 
14470778286aSPhilipp Reisner 	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
14484dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &req->i);
1449265be2d0SPhilipp Reisner 
1450265be2d0SPhilipp Reisner 	drbd_req_make_private_bio(req, req->master_bio);
1451b30ab791SAndreas Gruenbacher 	req->private_bio->bi_bdev = device->ldev->backing_bdev;
1452265be2d0SPhilipp Reisner 	generic_make_request(req->private_bio);
1453265be2d0SPhilipp Reisner 
145499920dc5SAndreas Gruenbacher 	return 0;
1455265be2d0SPhilipp Reisner }
1456265be2d0SPhilipp Reisner 
1457b30ab791SAndreas Gruenbacher static int _drbd_may_sync_now(struct drbd_device *device)
1458b411b363SPhilipp Reisner {
1459b30ab791SAndreas Gruenbacher 	struct drbd_device *odev = device;
146095f8efd0SAndreas Gruenbacher 	int resync_after;
1461b411b363SPhilipp Reisner 
1462b411b363SPhilipp Reisner 	while (1) {
1463a3f8f7dcSLars Ellenberg 		if (!odev->ldev || odev->state.disk == D_DISKLESS)
1464438c8374SPhilipp Reisner 			return 1;
1465daeda1ccSPhilipp Reisner 		rcu_read_lock();
146695f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1467daeda1ccSPhilipp Reisner 		rcu_read_unlock();
146895f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1469b411b363SPhilipp Reisner 			return 1;
1470b30ab791SAndreas Gruenbacher 		odev = minor_to_device(resync_after);
1471a3f8f7dcSLars Ellenberg 		if (!odev)
1472841ce241SAndreas Gruenbacher 			return 1;
1473b411b363SPhilipp Reisner 		if ((odev->state.conn >= C_SYNC_SOURCE &&
1474b411b363SPhilipp Reisner 		     odev->state.conn <= C_PAUSED_SYNC_T) ||
1475b411b363SPhilipp Reisner 		    odev->state.aftr_isp || odev->state.peer_isp ||
1476b411b363SPhilipp Reisner 		    odev->state.user_isp)
1477b411b363SPhilipp Reisner 			return 0;
1478b411b363SPhilipp Reisner 	}
1479b411b363SPhilipp Reisner }
1480b411b363SPhilipp Reisner 
1481b411b363SPhilipp Reisner /**
1482b411b363SPhilipp Reisner  * _drbd_pause_after() - Pause resync on all devices that may not resync now
1483b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1484b411b363SPhilipp Reisner  *
1485b411b363SPhilipp Reisner  * Called from process context only (admin command and after_state_ch).
1486b411b363SPhilipp Reisner  */
1487b30ab791SAndreas Gruenbacher static int _drbd_pause_after(struct drbd_device *device)
1488b411b363SPhilipp Reisner {
148954761697SAndreas Gruenbacher 	struct drbd_device *odev;
1490b411b363SPhilipp Reisner 	int i, rv = 0;
1491b411b363SPhilipp Reisner 
1492695d08faSPhilipp Reisner 	rcu_read_lock();
149305a10ec7SAndreas Gruenbacher 	idr_for_each_entry(&drbd_devices, odev, i) {
1494b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1495b411b363SPhilipp Reisner 			continue;
1496b411b363SPhilipp Reisner 		if (!_drbd_may_sync_now(odev))
1497b411b363SPhilipp Reisner 			rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1498b411b363SPhilipp Reisner 			       != SS_NOTHING_TO_DO);
1499b411b363SPhilipp Reisner 	}
1500695d08faSPhilipp Reisner 	rcu_read_unlock();
1501b411b363SPhilipp Reisner 
1502b411b363SPhilipp Reisner 	return rv;
1503b411b363SPhilipp Reisner }
1504b411b363SPhilipp Reisner 
1505b411b363SPhilipp Reisner /**
1506b411b363SPhilipp Reisner  * _drbd_resume_next() - Resume resync on all devices that may resync now
1507b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1508b411b363SPhilipp Reisner  *
1509b411b363SPhilipp Reisner  * Called from process context only (admin command and worker).
1510b411b363SPhilipp Reisner  */
1511b30ab791SAndreas Gruenbacher static int _drbd_resume_next(struct drbd_device *device)
1512b411b363SPhilipp Reisner {
151354761697SAndreas Gruenbacher 	struct drbd_device *odev;
1514b411b363SPhilipp Reisner 	int i, rv = 0;
1515b411b363SPhilipp Reisner 
1516695d08faSPhilipp Reisner 	rcu_read_lock();
151705a10ec7SAndreas Gruenbacher 	idr_for_each_entry(&drbd_devices, odev, i) {
1518b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1519b411b363SPhilipp Reisner 			continue;
1520b411b363SPhilipp Reisner 		if (odev->state.aftr_isp) {
1521b411b363SPhilipp Reisner 			if (_drbd_may_sync_now(odev))
1522b411b363SPhilipp Reisner 				rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1523b411b363SPhilipp Reisner 							CS_HARD, NULL)
1524b411b363SPhilipp Reisner 				       != SS_NOTHING_TO_DO) ;
1525b411b363SPhilipp Reisner 		}
1526b411b363SPhilipp Reisner 	}
1527695d08faSPhilipp Reisner 	rcu_read_unlock();
1528b411b363SPhilipp Reisner 	return rv;
1529b411b363SPhilipp Reisner }
1530b411b363SPhilipp Reisner 
1531b30ab791SAndreas Gruenbacher void resume_next_sg(struct drbd_device *device)
1532b411b363SPhilipp Reisner {
1533b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1534b30ab791SAndreas Gruenbacher 	_drbd_resume_next(device);
1535b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1536b411b363SPhilipp Reisner }
1537b411b363SPhilipp Reisner 
1538b30ab791SAndreas Gruenbacher void suspend_other_sg(struct drbd_device *device)
1539b411b363SPhilipp Reisner {
1540b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1541b30ab791SAndreas Gruenbacher 	_drbd_pause_after(device);
1542b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1543b411b363SPhilipp Reisner }
1544b411b363SPhilipp Reisner 
1545dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
1546b30ab791SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
1547b411b363SPhilipp Reisner {
154854761697SAndreas Gruenbacher 	struct drbd_device *odev;
154995f8efd0SAndreas Gruenbacher 	int resync_after;
1550b411b363SPhilipp Reisner 
1551b411b363SPhilipp Reisner 	if (o_minor == -1)
1552b411b363SPhilipp Reisner 		return NO_ERROR;
1553a3f8f7dcSLars Ellenberg 	if (o_minor < -1 || o_minor > MINORMASK)
155495f8efd0SAndreas Gruenbacher 		return ERR_RESYNC_AFTER;
1555b411b363SPhilipp Reisner 
1556b411b363SPhilipp Reisner 	/* check for loops */
1557b30ab791SAndreas Gruenbacher 	odev = minor_to_device(o_minor);
1558b411b363SPhilipp Reisner 	while (1) {
1559b30ab791SAndreas Gruenbacher 		if (odev == device)
156095f8efd0SAndreas Gruenbacher 			return ERR_RESYNC_AFTER_CYCLE;
1561b411b363SPhilipp Reisner 
1562a3f8f7dcSLars Ellenberg 		/* You are free to depend on diskless, non-existing,
1563a3f8f7dcSLars Ellenberg 		 * or not yet/no longer existing minors.
1564a3f8f7dcSLars Ellenberg 		 * We only reject dependency loops.
1565a3f8f7dcSLars Ellenberg 		 * We cannot follow the dependency chain beyond a detached or
1566a3f8f7dcSLars Ellenberg 		 * missing minor.
1567a3f8f7dcSLars Ellenberg 		 */
1568a3f8f7dcSLars Ellenberg 		if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1569a3f8f7dcSLars Ellenberg 			return NO_ERROR;
1570a3f8f7dcSLars Ellenberg 
1571daeda1ccSPhilipp Reisner 		rcu_read_lock();
157295f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1573daeda1ccSPhilipp Reisner 		rcu_read_unlock();
1574b411b363SPhilipp Reisner 		/* dependency chain ends here, no cycles. */
157595f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1576b411b363SPhilipp Reisner 			return NO_ERROR;
1577b411b363SPhilipp Reisner 
1578b411b363SPhilipp Reisner 		/* follow the dependency chain */
1579b30ab791SAndreas Gruenbacher 		odev = minor_to_device(resync_after);
1580b411b363SPhilipp Reisner 	}
1581b411b363SPhilipp Reisner }
1582b411b363SPhilipp Reisner 
1583dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
1584b30ab791SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_device *device)
1585b411b363SPhilipp Reisner {
1586b411b363SPhilipp Reisner 	int changes;
1587b411b363SPhilipp Reisner 
1588b411b363SPhilipp Reisner 	do {
1589b30ab791SAndreas Gruenbacher 		changes  = _drbd_pause_after(device);
1590b30ab791SAndreas Gruenbacher 		changes |= _drbd_resume_next(device);
1591b411b363SPhilipp Reisner 	} while (changes);
1592b411b363SPhilipp Reisner }
1593b411b363SPhilipp Reisner 
1594b30ab791SAndreas Gruenbacher void drbd_rs_controller_reset(struct drbd_device *device)
15959bd28d3cSLars Ellenberg {
1596813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
1597813472ceSPhilipp Reisner 
1598b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_sect_in, 0);
1599b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_sect_ev, 0);
1600b30ab791SAndreas Gruenbacher 	device->rs_in_flight = 0;
1601813472ceSPhilipp Reisner 
1602813472ceSPhilipp Reisner 	/* Updating the RCU protected object in place is necessary since
1603813472ceSPhilipp Reisner 	   this function gets called from atomic context.
1604813472ceSPhilipp Reisner 	   It is valid since all other updates also lead to an completely
1605813472ceSPhilipp Reisner 	   empty fifo */
1606813472ceSPhilipp Reisner 	rcu_read_lock();
1607b30ab791SAndreas Gruenbacher 	plan = rcu_dereference(device->rs_plan_s);
1608813472ceSPhilipp Reisner 	plan->total = 0;
1609813472ceSPhilipp Reisner 	fifo_set(plan, 0);
1610813472ceSPhilipp Reisner 	rcu_read_unlock();
16119bd28d3cSLars Ellenberg }
16129bd28d3cSLars Ellenberg 
16131f04af33SPhilipp Reisner void start_resync_timer_fn(unsigned long data)
16141f04af33SPhilipp Reisner {
1615b30ab791SAndreas Gruenbacher 	struct drbd_device *device = (struct drbd_device *) data;
1616ac0acb9eSLars Ellenberg 	drbd_device_post_work(device, RS_START);
16171f04af33SPhilipp Reisner }
16181f04af33SPhilipp Reisner 
1619ac0acb9eSLars Ellenberg static void do_start_resync(struct drbd_device *device)
16201f04af33SPhilipp Reisner {
1621b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
1622ac0acb9eSLars Ellenberg 		drbd_warn(device, "postponing start_resync ...\n");
1623b30ab791SAndreas Gruenbacher 		device->start_resync_timer.expires = jiffies + HZ/10;
1624b30ab791SAndreas Gruenbacher 		add_timer(&device->start_resync_timer);
1625ac0acb9eSLars Ellenberg 		return;
16261f04af33SPhilipp Reisner 	}
16271f04af33SPhilipp Reisner 
1628b30ab791SAndreas Gruenbacher 	drbd_start_resync(device, C_SYNC_SOURCE);
1629b30ab791SAndreas Gruenbacher 	clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
16301f04af33SPhilipp Reisner }
16311f04af33SPhilipp Reisner 
1632aaaba345SLars Ellenberg static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1633aaaba345SLars Ellenberg {
1634aaaba345SLars Ellenberg 	bool csums_after_crash_only;
1635aaaba345SLars Ellenberg 	rcu_read_lock();
1636aaaba345SLars Ellenberg 	csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1637aaaba345SLars Ellenberg 	rcu_read_unlock();
1638aaaba345SLars Ellenberg 	return connection->agreed_pro_version >= 89 &&		/* supported? */
1639aaaba345SLars Ellenberg 		connection->csums_tfm &&			/* configured? */
1640aaaba345SLars Ellenberg 		(csums_after_crash_only == 0			/* use for each resync? */
1641aaaba345SLars Ellenberg 		 || test_bit(CRASHED_PRIMARY, &device->flags));	/* or only after Primary crash? */
1642aaaba345SLars Ellenberg }
1643aaaba345SLars Ellenberg 
1644b411b363SPhilipp Reisner /**
1645b411b363SPhilipp Reisner  * drbd_start_resync() - Start the resync process
1646b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1647b411b363SPhilipp Reisner  * @side:	Either C_SYNC_SOURCE or C_SYNC_TARGET
1648b411b363SPhilipp Reisner  *
1649b411b363SPhilipp Reisner  * This function might bring you directly into one of the
1650b411b363SPhilipp Reisner  * C_PAUSED_SYNC_* states.
1651b411b363SPhilipp Reisner  */
1652b30ab791SAndreas Gruenbacher void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1653b411b363SPhilipp Reisner {
165444a4d551SLars Ellenberg 	struct drbd_peer_device *peer_device = first_peer_device(device);
165544a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1656b411b363SPhilipp Reisner 	union drbd_state ns;
1657b411b363SPhilipp Reisner 	int r;
1658b411b363SPhilipp Reisner 
1659b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
1660d0180171SAndreas Gruenbacher 		drbd_err(device, "Resync already running!\n");
1661b411b363SPhilipp Reisner 		return;
1662b411b363SPhilipp Reisner 	}
1663b411b363SPhilipp Reisner 
1664b30ab791SAndreas Gruenbacher 	if (!test_bit(B_RS_H_DONE, &device->flags)) {
1665b411b363SPhilipp Reisner 		if (side == C_SYNC_TARGET) {
1666b411b363SPhilipp Reisner 			/* Since application IO was locked out during C_WF_BITMAP_T and
1667b411b363SPhilipp Reisner 			   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1668b411b363SPhilipp Reisner 			   we check that we might make the data inconsistent. */
1669b30ab791SAndreas Gruenbacher 			r = drbd_khelper(device, "before-resync-target");
1670b411b363SPhilipp Reisner 			r = (r >> 8) & 0xff;
1671b411b363SPhilipp Reisner 			if (r > 0) {
1672d0180171SAndreas Gruenbacher 				drbd_info(device, "before-resync-target handler returned %d, "
1673b411b363SPhilipp Reisner 					 "dropping connection.\n", r);
167444a4d551SLars Ellenberg 				conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
1675b411b363SPhilipp Reisner 				return;
1676b411b363SPhilipp Reisner 			}
167709b9e797SPhilipp Reisner 		} else /* C_SYNC_SOURCE */ {
1678b30ab791SAndreas Gruenbacher 			r = drbd_khelper(device, "before-resync-source");
167909b9e797SPhilipp Reisner 			r = (r >> 8) & 0xff;
168009b9e797SPhilipp Reisner 			if (r > 0) {
168109b9e797SPhilipp Reisner 				if (r == 3) {
1682d0180171SAndreas Gruenbacher 					drbd_info(device, "before-resync-source handler returned %d, "
168309b9e797SPhilipp Reisner 						 "ignoring. Old userland tools?", r);
168409b9e797SPhilipp Reisner 				} else {
1685d0180171SAndreas Gruenbacher 					drbd_info(device, "before-resync-source handler returned %d, "
168609b9e797SPhilipp Reisner 						 "dropping connection.\n", r);
168744a4d551SLars Ellenberg 					conn_request_state(connection,
1688a6b32bc3SAndreas Gruenbacher 							   NS(conn, C_DISCONNECTING), CS_HARD);
168909b9e797SPhilipp Reisner 					return;
169009b9e797SPhilipp Reisner 				}
169109b9e797SPhilipp Reisner 			}
1692b411b363SPhilipp Reisner 		}
1693e64a3294SPhilipp Reisner 	}
1694b411b363SPhilipp Reisner 
169544a4d551SLars Ellenberg 	if (current == connection->worker.task) {
1696dad20554SPhilipp Reisner 		/* The worker should not sleep waiting for state_mutex,
1697e64a3294SPhilipp Reisner 		   that can take long */
1698b30ab791SAndreas Gruenbacher 		if (!mutex_trylock(device->state_mutex)) {
1699b30ab791SAndreas Gruenbacher 			set_bit(B_RS_H_DONE, &device->flags);
1700b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ/5;
1701b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
1702e64a3294SPhilipp Reisner 			return;
1703e64a3294SPhilipp Reisner 		}
1704e64a3294SPhilipp Reisner 	} else {
1705b30ab791SAndreas Gruenbacher 		mutex_lock(device->state_mutex);
1706e64a3294SPhilipp Reisner 	}
1707b30ab791SAndreas Gruenbacher 	clear_bit(B_RS_H_DONE, &device->flags);
1708b411b363SPhilipp Reisner 
1709074f4afeSLars Ellenberg 	/* req_lock: serialize with drbd_send_and_submit() and others
1710074f4afeSLars Ellenberg 	 * global_state_lock: for stable sync-after dependencies */
1711074f4afeSLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
1712074f4afeSLars Ellenberg 	write_lock(&global_state_lock);
1713a700471bSPhilipp Reisner 	/* Did some connection breakage or IO error race with us? */
1714b30ab791SAndreas Gruenbacher 	if (device->state.conn < C_CONNECTED
1715b30ab791SAndreas Gruenbacher 	|| !get_ldev_if_state(device, D_NEGOTIATING)) {
1716074f4afeSLars Ellenberg 		write_unlock(&global_state_lock);
1717074f4afeSLars Ellenberg 		spin_unlock_irq(&device->resource->req_lock);
1718b30ab791SAndreas Gruenbacher 		mutex_unlock(device->state_mutex);
1719b411b363SPhilipp Reisner 		return;
1720b411b363SPhilipp Reisner 	}
1721b411b363SPhilipp Reisner 
1722b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
1723b411b363SPhilipp Reisner 
1724b30ab791SAndreas Gruenbacher 	ns.aftr_isp = !_drbd_may_sync_now(device);
1725b411b363SPhilipp Reisner 
1726b411b363SPhilipp Reisner 	ns.conn = side;
1727b411b363SPhilipp Reisner 
1728b411b363SPhilipp Reisner 	if (side == C_SYNC_TARGET)
1729b411b363SPhilipp Reisner 		ns.disk = D_INCONSISTENT;
1730b411b363SPhilipp Reisner 	else /* side == C_SYNC_SOURCE */
1731b411b363SPhilipp Reisner 		ns.pdsk = D_INCONSISTENT;
1732b411b363SPhilipp Reisner 
1733b30ab791SAndreas Gruenbacher 	r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1734b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
1735b411b363SPhilipp Reisner 
1736b411b363SPhilipp Reisner 	if (ns.conn < C_CONNECTED)
1737b411b363SPhilipp Reisner 		r = SS_UNKNOWN_ERROR;
1738b411b363SPhilipp Reisner 
1739b411b363SPhilipp Reisner 	if (r == SS_SUCCESS) {
1740b30ab791SAndreas Gruenbacher 		unsigned long tw = drbd_bm_total_weight(device);
17411d7734a0SLars Ellenberg 		unsigned long now = jiffies;
17421d7734a0SLars Ellenberg 		int i;
17431d7734a0SLars Ellenberg 
1744b30ab791SAndreas Gruenbacher 		device->rs_failed    = 0;
1745b30ab791SAndreas Gruenbacher 		device->rs_paused    = 0;
1746b30ab791SAndreas Gruenbacher 		device->rs_same_csum = 0;
1747b30ab791SAndreas Gruenbacher 		device->rs_last_events = 0;
1748b30ab791SAndreas Gruenbacher 		device->rs_last_sect_ev = 0;
1749b30ab791SAndreas Gruenbacher 		device->rs_total     = tw;
1750b30ab791SAndreas Gruenbacher 		device->rs_start     = now;
17511d7734a0SLars Ellenberg 		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1752b30ab791SAndreas Gruenbacher 			device->rs_mark_left[i] = tw;
1753b30ab791SAndreas Gruenbacher 			device->rs_mark_time[i] = now;
17541d7734a0SLars Ellenberg 		}
1755b30ab791SAndreas Gruenbacher 		_drbd_pause_after(device);
17565ab7d2c0SLars Ellenberg 		/* Forget potentially stale cached per resync extent bit-counts.
17575ab7d2c0SLars Ellenberg 		 * Open coded drbd_rs_cancel_all(device), we already have IRQs
17585ab7d2c0SLars Ellenberg 		 * disabled, and know the disk state is ok. */
17595ab7d2c0SLars Ellenberg 		spin_lock(&device->al_lock);
17605ab7d2c0SLars Ellenberg 		lc_reset(device->resync);
17615ab7d2c0SLars Ellenberg 		device->resync_locked = 0;
17625ab7d2c0SLars Ellenberg 		device->resync_wenr = LC_FREE;
17635ab7d2c0SLars Ellenberg 		spin_unlock(&device->al_lock);
1764b411b363SPhilipp Reisner 	}
1765074f4afeSLars Ellenberg 	write_unlock(&global_state_lock);
1766074f4afeSLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
17675a22db89SLars Ellenberg 
17686c922ed5SLars Ellenberg 	if (r == SS_SUCCESS) {
17695ab7d2c0SLars Ellenberg 		wake_up(&device->al_wait); /* for lc_reset() above */
1770328e0f12SPhilipp Reisner 		/* reset rs_last_bcast when a resync or verify is started,
1771328e0f12SPhilipp Reisner 		 * to deal with potential jiffies wrap. */
1772b30ab791SAndreas Gruenbacher 		device->rs_last_bcast = jiffies - HZ;
1773328e0f12SPhilipp Reisner 
1774d0180171SAndreas Gruenbacher 		drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
17756c922ed5SLars Ellenberg 		     drbd_conn_str(ns.conn),
1776b30ab791SAndreas Gruenbacher 		     (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1777b30ab791SAndreas Gruenbacher 		     (unsigned long) device->rs_total);
1778aaaba345SLars Ellenberg 		if (side == C_SYNC_TARGET) {
1779b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = 0;
1780aaaba345SLars Ellenberg 			device->use_csums = use_checksum_based_resync(connection, device);
1781aaaba345SLars Ellenberg 		} else {
1782aaaba345SLars Ellenberg 			device->use_csums = 0;
1783aaaba345SLars Ellenberg 		}
17845a22db89SLars Ellenberg 
17855a22db89SLars Ellenberg 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
17865a22db89SLars Ellenberg 		 * with w_send_oos, or the sync target will get confused as to
17875a22db89SLars Ellenberg 		 * how much bits to resync.  We cannot do that always, because for an
17885a22db89SLars Ellenberg 		 * empty resync and protocol < 95, we need to do it here, as we call
17895a22db89SLars Ellenberg 		 * drbd_resync_finished from here in that case.
17905a22db89SLars Ellenberg 		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
17915a22db89SLars Ellenberg 		 * and from after_state_ch otherwise. */
179244a4d551SLars Ellenberg 		if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
179344a4d551SLars Ellenberg 			drbd_gen_and_send_sync_uuid(peer_device);
1794b411b363SPhilipp Reisner 
179544a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
1796af85e8e8SLars Ellenberg 			/* This still has a race (about when exactly the peers
1797af85e8e8SLars Ellenberg 			 * detect connection loss) that can lead to a full sync
1798af85e8e8SLars Ellenberg 			 * on next handshake. In 8.3.9 we fixed this with explicit
1799af85e8e8SLars Ellenberg 			 * resync-finished notifications, but the fix
1800af85e8e8SLars Ellenberg 			 * introduces a protocol change.  Sleeping for some
1801af85e8e8SLars Ellenberg 			 * time longer than the ping interval + timeout on the
1802af85e8e8SLars Ellenberg 			 * SyncSource, to give the SyncTarget the chance to
1803af85e8e8SLars Ellenberg 			 * detect connection loss, then waiting for a ping
1804af85e8e8SLars Ellenberg 			 * response (implicit in drbd_resync_finished) reduces
1805af85e8e8SLars Ellenberg 			 * the race considerably, but does not solve it. */
180644ed167dSPhilipp Reisner 			if (side == C_SYNC_SOURCE) {
180744ed167dSPhilipp Reisner 				struct net_conf *nc;
180844ed167dSPhilipp Reisner 				int timeo;
180944ed167dSPhilipp Reisner 
181044ed167dSPhilipp Reisner 				rcu_read_lock();
181144a4d551SLars Ellenberg 				nc = rcu_dereference(connection->net_conf);
181244ed167dSPhilipp Reisner 				timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
181344ed167dSPhilipp Reisner 				rcu_read_unlock();
181444ed167dSPhilipp Reisner 				schedule_timeout_interruptible(timeo);
181544ed167dSPhilipp Reisner 			}
1816b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
1817b411b363SPhilipp Reisner 		}
1818b411b363SPhilipp Reisner 
1819b30ab791SAndreas Gruenbacher 		drbd_rs_controller_reset(device);
1820b30ab791SAndreas Gruenbacher 		/* ns.conn may already be != device->state.conn,
1821b411b363SPhilipp Reisner 		 * we may have been paused in between, or become paused until
1822b411b363SPhilipp Reisner 		 * the timer triggers.
1823b411b363SPhilipp Reisner 		 * No matter, that is handled in resync_timer_fn() */
1824b411b363SPhilipp Reisner 		if (ns.conn == C_SYNC_TARGET)
1825b30ab791SAndreas Gruenbacher 			mod_timer(&device->resync_timer, jiffies);
1826b411b363SPhilipp Reisner 
1827b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
1828b411b363SPhilipp Reisner 	}
1829b30ab791SAndreas Gruenbacher 	put_ldev(device);
1830b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
1831b411b363SPhilipp Reisner }
1832b411b363SPhilipp Reisner 
1833e334f550SLars Ellenberg static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
1834c7a58db4SLars Ellenberg {
1835c7a58db4SLars Ellenberg 	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1836c7a58db4SLars Ellenberg 	device->rs_last_bcast = jiffies;
1837c7a58db4SLars Ellenberg 
1838c7a58db4SLars Ellenberg 	if (!get_ldev(device))
1839c7a58db4SLars Ellenberg 		return;
1840c7a58db4SLars Ellenberg 
1841c7a58db4SLars Ellenberg 	drbd_bm_write_lazy(device, 0);
18425ab7d2c0SLars Ellenberg 	if (resync_done && is_sync_state(device->state.conn))
1843c7a58db4SLars Ellenberg 		drbd_resync_finished(device);
18445ab7d2c0SLars Ellenberg 
1845c7a58db4SLars Ellenberg 	drbd_bcast_event(device, &sib);
1846c7a58db4SLars Ellenberg 	/* update timestamp, in case it took a while to write out stuff */
1847c7a58db4SLars Ellenberg 	device->rs_last_bcast = jiffies;
1848c7a58db4SLars Ellenberg 	put_ldev(device);
1849c7a58db4SLars Ellenberg }
1850c7a58db4SLars Ellenberg 
1851e334f550SLars Ellenberg static void drbd_ldev_destroy(struct drbd_device *device)
1852e334f550SLars Ellenberg {
1853e334f550SLars Ellenberg 	lc_destroy(device->resync);
1854e334f550SLars Ellenberg 	device->resync = NULL;
1855e334f550SLars Ellenberg 	lc_destroy(device->act_log);
1856e334f550SLars Ellenberg 	device->act_log = NULL;
1857e334f550SLars Ellenberg 	__no_warn(local,
1858e334f550SLars Ellenberg 		drbd_free_ldev(device->ldev);
1859e334f550SLars Ellenberg 		device->ldev = NULL;);
1860e334f550SLars Ellenberg 	clear_bit(GOING_DISKLESS, &device->flags);
1861e334f550SLars Ellenberg 	wake_up(&device->misc_wait);
1862e334f550SLars Ellenberg }
1863e334f550SLars Ellenberg 
1864e334f550SLars Ellenberg static void go_diskless(struct drbd_device *device)
1865e334f550SLars Ellenberg {
1866e334f550SLars Ellenberg 	D_ASSERT(device, device->state.disk == D_FAILED);
1867e334f550SLars Ellenberg 	/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1868e334f550SLars Ellenberg 	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1869e334f550SLars Ellenberg 	 * the protected members anymore, though, so once put_ldev reaches zero
1870e334f550SLars Ellenberg 	 * again, it will be safe to free them. */
1871e334f550SLars Ellenberg 
1872e334f550SLars Ellenberg 	/* Try to write changed bitmap pages, read errors may have just
1873e334f550SLars Ellenberg 	 * set some bits outside the area covered by the activity log.
1874e334f550SLars Ellenberg 	 *
1875e334f550SLars Ellenberg 	 * If we have an IO error during the bitmap writeout,
1876e334f550SLars Ellenberg 	 * we will want a full sync next time, just in case.
1877e334f550SLars Ellenberg 	 * (Do we want a specific meta data flag for this?)
1878e334f550SLars Ellenberg 	 *
1879e334f550SLars Ellenberg 	 * If that does not make it to stable storage either,
1880e334f550SLars Ellenberg 	 * we cannot do anything about that anymore.
1881e334f550SLars Ellenberg 	 *
1882e334f550SLars Ellenberg 	 * We still need to check if both bitmap and ldev are present, we may
1883e334f550SLars Ellenberg 	 * end up here after a failed attach, before ldev was even assigned.
1884e334f550SLars Ellenberg 	 */
1885e334f550SLars Ellenberg 	if (device->bitmap && device->ldev) {
1886e334f550SLars Ellenberg 		/* An interrupted resync or similar is allowed to recounts bits
1887e334f550SLars Ellenberg 		 * while we detach.
1888e334f550SLars Ellenberg 		 * Any modifications would not be expected anymore, though.
1889e334f550SLars Ellenberg 		 */
1890e334f550SLars Ellenberg 		if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1891e334f550SLars Ellenberg 					"detach", BM_LOCKED_TEST_ALLOWED)) {
1892e334f550SLars Ellenberg 			if (test_bit(WAS_READ_ERROR, &device->flags)) {
1893e334f550SLars Ellenberg 				drbd_md_set_flag(device, MDF_FULL_SYNC);
1894e334f550SLars Ellenberg 				drbd_md_sync(device);
1895e334f550SLars Ellenberg 			}
1896e334f550SLars Ellenberg 		}
1897e334f550SLars Ellenberg 	}
1898e334f550SLars Ellenberg 
1899e334f550SLars Ellenberg 	drbd_force_state(device, NS(disk, D_DISKLESS));
1900e334f550SLars Ellenberg }
1901e334f550SLars Ellenberg 
1902ac0acb9eSLars Ellenberg static int do_md_sync(struct drbd_device *device)
1903ac0acb9eSLars Ellenberg {
1904ac0acb9eSLars Ellenberg 	drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1905ac0acb9eSLars Ellenberg 	drbd_md_sync(device);
1906ac0acb9eSLars Ellenberg 	return 0;
1907ac0acb9eSLars Ellenberg }
1908ac0acb9eSLars Ellenberg 
1909e334f550SLars Ellenberg #define WORK_PENDING(work_bit, todo)	(todo & (1UL << work_bit))
1910e334f550SLars Ellenberg static void do_device_work(struct drbd_device *device, const unsigned long todo)
1911e334f550SLars Ellenberg {
1912ac0acb9eSLars Ellenberg 	if (WORK_PENDING(MD_SYNC, todo))
1913ac0acb9eSLars Ellenberg 		do_md_sync(device);
1914e334f550SLars Ellenberg 	if (WORK_PENDING(RS_DONE, todo) ||
1915e334f550SLars Ellenberg 	    WORK_PENDING(RS_PROGRESS, todo))
1916e334f550SLars Ellenberg 		update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo));
1917e334f550SLars Ellenberg 	if (WORK_PENDING(GO_DISKLESS, todo))
1918e334f550SLars Ellenberg 		go_diskless(device);
1919e334f550SLars Ellenberg 	if (WORK_PENDING(DESTROY_DISK, todo))
1920e334f550SLars Ellenberg 		drbd_ldev_destroy(device);
1921ac0acb9eSLars Ellenberg 	if (WORK_PENDING(RS_START, todo))
1922ac0acb9eSLars Ellenberg 		do_start_resync(device);
1923e334f550SLars Ellenberg }
1924e334f550SLars Ellenberg 
1925e334f550SLars Ellenberg #define DRBD_DEVICE_WORK_MASK	\
1926e334f550SLars Ellenberg 	((1UL << GO_DISKLESS)	\
1927e334f550SLars Ellenberg 	|(1UL << DESTROY_DISK)	\
1928ac0acb9eSLars Ellenberg 	|(1UL << MD_SYNC)	\
1929ac0acb9eSLars Ellenberg 	|(1UL << RS_START)	\
1930e334f550SLars Ellenberg 	|(1UL << RS_PROGRESS)	\
1931e334f550SLars Ellenberg 	|(1UL << RS_DONE)	\
1932e334f550SLars Ellenberg 	)
1933e334f550SLars Ellenberg 
1934e334f550SLars Ellenberg static unsigned long get_work_bits(unsigned long *flags)
1935e334f550SLars Ellenberg {
1936e334f550SLars Ellenberg 	unsigned long old, new;
1937e334f550SLars Ellenberg 	do {
1938e334f550SLars Ellenberg 		old = *flags;
1939e334f550SLars Ellenberg 		new = old & ~DRBD_DEVICE_WORK_MASK;
1940e334f550SLars Ellenberg 	} while (cmpxchg(flags, old, new) != old);
1941e334f550SLars Ellenberg 	return old & DRBD_DEVICE_WORK_MASK;
1942e334f550SLars Ellenberg }
1943e334f550SLars Ellenberg 
1944e334f550SLars Ellenberg static void do_unqueued_work(struct drbd_connection *connection)
1945c7a58db4SLars Ellenberg {
1946c7a58db4SLars Ellenberg 	struct drbd_peer_device *peer_device;
1947c7a58db4SLars Ellenberg 	int vnr;
1948c7a58db4SLars Ellenberg 
1949c7a58db4SLars Ellenberg 	rcu_read_lock();
1950c7a58db4SLars Ellenberg 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1951c7a58db4SLars Ellenberg 		struct drbd_device *device = peer_device->device;
1952e334f550SLars Ellenberg 		unsigned long todo = get_work_bits(&device->flags);
1953e334f550SLars Ellenberg 		if (!todo)
1954c7a58db4SLars Ellenberg 			continue;
19555ab7d2c0SLars Ellenberg 
1956c7a58db4SLars Ellenberg 		kref_get(&device->kref);
1957c7a58db4SLars Ellenberg 		rcu_read_unlock();
1958e334f550SLars Ellenberg 		do_device_work(device, todo);
1959c7a58db4SLars Ellenberg 		kref_put(&device->kref, drbd_destroy_device);
1960c7a58db4SLars Ellenberg 		rcu_read_lock();
1961c7a58db4SLars Ellenberg 	}
1962c7a58db4SLars Ellenberg 	rcu_read_unlock();
1963c7a58db4SLars Ellenberg }
1964c7a58db4SLars Ellenberg 
1965a186e478SRashika Kheria static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
19668c0785a5SLars Ellenberg {
19678c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
196815e26f6aSLars Ellenberg 	list_splice_tail_init(&queue->q, work_list);
19698c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
19708c0785a5SLars Ellenberg 	return !list_empty(work_list);
19718c0785a5SLars Ellenberg }
19728c0785a5SLars Ellenberg 
1973a186e478SRashika Kheria static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
19748c0785a5SLars Ellenberg {
19758c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
19768c0785a5SLars Ellenberg 	if (!list_empty(&queue->q))
19778c0785a5SLars Ellenberg 		list_move(queue->q.next, work_list);
19788c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
19798c0785a5SLars Ellenberg 	return !list_empty(work_list);
19808c0785a5SLars Ellenberg }
19818c0785a5SLars Ellenberg 
1982bde89a9eSAndreas Gruenbacher static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
1983b6dd1a89SLars Ellenberg {
1984b6dd1a89SLars Ellenberg 	DEFINE_WAIT(wait);
1985b6dd1a89SLars Ellenberg 	struct net_conf *nc;
1986b6dd1a89SLars Ellenberg 	int uncork, cork;
1987b6dd1a89SLars Ellenberg 
1988b6dd1a89SLars Ellenberg 	dequeue_work_item(&connection->sender_work, work_list);
1989b6dd1a89SLars Ellenberg 	if (!list_empty(work_list))
1990b6dd1a89SLars Ellenberg 		return;
1991b6dd1a89SLars Ellenberg 
1992b6dd1a89SLars Ellenberg 	/* Still nothing to do?
1993b6dd1a89SLars Ellenberg 	 * Maybe we still need to close the current epoch,
1994b6dd1a89SLars Ellenberg 	 * even if no new requests are queued yet.
1995b6dd1a89SLars Ellenberg 	 *
1996b6dd1a89SLars Ellenberg 	 * Also, poke TCP, just in case.
1997b6dd1a89SLars Ellenberg 	 * Then wait for new work (or signal). */
1998b6dd1a89SLars Ellenberg 	rcu_read_lock();
1999b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
2000b6dd1a89SLars Ellenberg 	uncork = nc ? nc->tcp_cork : 0;
2001b6dd1a89SLars Ellenberg 	rcu_read_unlock();
2002b6dd1a89SLars Ellenberg 	if (uncork) {
2003b6dd1a89SLars Ellenberg 		mutex_lock(&connection->data.mutex);
2004b6dd1a89SLars Ellenberg 		if (connection->data.socket)
2005b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
2006b6dd1a89SLars Ellenberg 		mutex_unlock(&connection->data.mutex);
2007b6dd1a89SLars Ellenberg 	}
2008b6dd1a89SLars Ellenberg 
2009b6dd1a89SLars Ellenberg 	for (;;) {
2010b6dd1a89SLars Ellenberg 		int send_barrier;
2011b6dd1a89SLars Ellenberg 		prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
20120500813fSAndreas Gruenbacher 		spin_lock_irq(&connection->resource->req_lock);
2013b6dd1a89SLars Ellenberg 		spin_lock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
2014bc317a9eSLars Ellenberg 		/* dequeue single item only,
2015bc317a9eSLars Ellenberg 		 * we still use drbd_queue_work_front() in some places */
2016bc317a9eSLars Ellenberg 		if (!list_empty(&connection->sender_work.q))
20174dd726f0SLars Ellenberg 			list_splice_tail_init(&connection->sender_work.q, work_list);
2018b6dd1a89SLars Ellenberg 		spin_unlock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
2019b6dd1a89SLars Ellenberg 		if (!list_empty(work_list) || signal_pending(current)) {
20200500813fSAndreas Gruenbacher 			spin_unlock_irq(&connection->resource->req_lock);
2021b6dd1a89SLars Ellenberg 			break;
2022b6dd1a89SLars Ellenberg 		}
2023f9c78128SLars Ellenberg 
2024f9c78128SLars Ellenberg 		/* We found nothing new to do, no to-be-communicated request,
2025f9c78128SLars Ellenberg 		 * no other work item.  We may still need to close the last
2026f9c78128SLars Ellenberg 		 * epoch.  Next incoming request epoch will be connection ->
2027f9c78128SLars Ellenberg 		 * current transfer log epoch number.  If that is different
2028f9c78128SLars Ellenberg 		 * from the epoch of the last request we communicated, it is
2029f9c78128SLars Ellenberg 		 * safe to send the epoch separating barrier now.
2030f9c78128SLars Ellenberg 		 */
2031f9c78128SLars Ellenberg 		send_barrier =
2032f9c78128SLars Ellenberg 			atomic_read(&connection->current_tle_nr) !=
2033f9c78128SLars Ellenberg 			connection->send.current_epoch_nr;
20340500813fSAndreas Gruenbacher 		spin_unlock_irq(&connection->resource->req_lock);
2035f9c78128SLars Ellenberg 
2036f9c78128SLars Ellenberg 		if (send_barrier)
2037f9c78128SLars Ellenberg 			maybe_send_barrier(connection,
2038f9c78128SLars Ellenberg 					connection->send.current_epoch_nr + 1);
20395ab7d2c0SLars Ellenberg 
2040e334f550SLars Ellenberg 		if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
20415ab7d2c0SLars Ellenberg 			break;
20425ab7d2c0SLars Ellenberg 
2043a80ca1aeSLars Ellenberg 		/* drbd_send() may have called flush_signals() */
2044a80ca1aeSLars Ellenberg 		if (get_t_state(&connection->worker) != RUNNING)
2045a80ca1aeSLars Ellenberg 			break;
20465ab7d2c0SLars Ellenberg 
2047b6dd1a89SLars Ellenberg 		schedule();
2048b6dd1a89SLars Ellenberg 		/* may be woken up for other things but new work, too,
2049b6dd1a89SLars Ellenberg 		 * e.g. if the current epoch got closed.
2050b6dd1a89SLars Ellenberg 		 * In which case we send the barrier above. */
2051b6dd1a89SLars Ellenberg 	}
2052b6dd1a89SLars Ellenberg 	finish_wait(&connection->sender_work.q_wait, &wait);
2053b6dd1a89SLars Ellenberg 
2054b6dd1a89SLars Ellenberg 	/* someone may have changed the config while we have been waiting above. */
2055b6dd1a89SLars Ellenberg 	rcu_read_lock();
2056b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
2057b6dd1a89SLars Ellenberg 	cork = nc ? nc->tcp_cork : 0;
2058b6dd1a89SLars Ellenberg 	rcu_read_unlock();
2059b6dd1a89SLars Ellenberg 	mutex_lock(&connection->data.mutex);
2060b6dd1a89SLars Ellenberg 	if (connection->data.socket) {
2061b6dd1a89SLars Ellenberg 		if (cork)
2062b6dd1a89SLars Ellenberg 			drbd_tcp_cork(connection->data.socket);
2063b6dd1a89SLars Ellenberg 		else if (!uncork)
2064b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
2065b6dd1a89SLars Ellenberg 	}
2066b6dd1a89SLars Ellenberg 	mutex_unlock(&connection->data.mutex);
2067b6dd1a89SLars Ellenberg }
2068b6dd1a89SLars Ellenberg 
2069b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi)
2070b411b363SPhilipp Reisner {
2071bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
20726db7e50aSAndreas Gruenbacher 	struct drbd_work *w = NULL;
2073c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2074b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
20758c0785a5SLars Ellenberg 	int vnr;
2076b411b363SPhilipp Reisner 
2077e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
207880822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
2079b411b363SPhilipp Reisner 
20808c0785a5SLars Ellenberg 		if (list_empty(&work_list))
2081bde89a9eSAndreas Gruenbacher 			wait_for_work(connection, &work_list);
2082b411b363SPhilipp Reisner 
2083e334f550SLars Ellenberg 		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags))
2084e334f550SLars Ellenberg 			do_unqueued_work(connection);
20855ab7d2c0SLars Ellenberg 
20868c0785a5SLars Ellenberg 		if (signal_pending(current)) {
2087b411b363SPhilipp Reisner 			flush_signals(current);
208819393e10SPhilipp Reisner 			if (get_t_state(thi) == RUNNING) {
20891ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Worker got an unexpected signal\n");
2090b411b363SPhilipp Reisner 				continue;
209119393e10SPhilipp Reisner 			}
2092b411b363SPhilipp Reisner 			break;
2093b411b363SPhilipp Reisner 		}
2094b411b363SPhilipp Reisner 
2095e77a0a5cSAndreas Gruenbacher 		if (get_t_state(thi) != RUNNING)
2096b411b363SPhilipp Reisner 			break;
2097b411b363SPhilipp Reisner 
20988c0785a5SLars Ellenberg 		while (!list_empty(&work_list)) {
20996db7e50aSAndreas Gruenbacher 			w = list_first_entry(&work_list, struct drbd_work, list);
21006db7e50aSAndreas Gruenbacher 			list_del_init(&w->list);
21016db7e50aSAndreas Gruenbacher 			if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
21028c0785a5SLars Ellenberg 				continue;
2103bde89a9eSAndreas Gruenbacher 			if (connection->cstate >= C_WF_REPORT_PARAMS)
2104bde89a9eSAndreas Gruenbacher 				conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
2105b411b363SPhilipp Reisner 		}
2106b411b363SPhilipp Reisner 	}
2107b411b363SPhilipp Reisner 
21088c0785a5SLars Ellenberg 	do {
2109e334f550SLars Ellenberg 		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags))
2110e334f550SLars Ellenberg 			do_unqueued_work(connection);
2111b411b363SPhilipp Reisner 		while (!list_empty(&work_list)) {
21126db7e50aSAndreas Gruenbacher 			w = list_first_entry(&work_list, struct drbd_work, list);
21136db7e50aSAndreas Gruenbacher 			list_del_init(&w->list);
21146db7e50aSAndreas Gruenbacher 			w->cb(w, 1);
2115b411b363SPhilipp Reisner 		}
2116bde89a9eSAndreas Gruenbacher 		dequeue_work_batch(&connection->sender_work, &work_list);
2117e334f550SLars Ellenberg 	} while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
2118b411b363SPhilipp Reisner 
2119c141ebdaSPhilipp Reisner 	rcu_read_lock();
2120c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2121c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
21220b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
2123b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
2124c141ebdaSPhilipp Reisner 		rcu_read_unlock();
2125b30ab791SAndreas Gruenbacher 		drbd_device_cleanup(device);
212605a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
2127c141ebdaSPhilipp Reisner 		rcu_read_lock();
21280e29d163SPhilipp Reisner 	}
2129c141ebdaSPhilipp Reisner 	rcu_read_unlock();
2130b411b363SPhilipp Reisner 
2131b411b363SPhilipp Reisner 	return 0;
2132b411b363SPhilipp Reisner }
2133