xref: /openbmc/linux/drivers/block/drbd/drbd_worker.c (revision 63a7c8ad)
1b411b363SPhilipp Reisner /*
2b411b363SPhilipp Reisner    drbd_worker.c
3b411b363SPhilipp Reisner 
4b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5b411b363SPhilipp Reisner 
6b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9b411b363SPhilipp Reisner 
10b411b363SPhilipp Reisner    drbd is free software; you can redistribute it and/or modify
11b411b363SPhilipp Reisner    it under the terms of the GNU General Public License as published by
12b411b363SPhilipp Reisner    the Free Software Foundation; either version 2, or (at your option)
13b411b363SPhilipp Reisner    any later version.
14b411b363SPhilipp Reisner 
15b411b363SPhilipp Reisner    drbd is distributed in the hope that it will be useful,
16b411b363SPhilipp Reisner    but WITHOUT ANY WARRANTY; without even the implied warranty of
17b411b363SPhilipp Reisner    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18b411b363SPhilipp Reisner    GNU General Public License for more details.
19b411b363SPhilipp Reisner 
20b411b363SPhilipp Reisner    You should have received a copy of the GNU General Public License
21b411b363SPhilipp Reisner    along with drbd; see the file COPYING.  If not, write to
22b411b363SPhilipp Reisner    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23b411b363SPhilipp Reisner 
24b411b363SPhilipp Reisner */
25b411b363SPhilipp Reisner 
26b411b363SPhilipp Reisner #include <linux/module.h>
27b411b363SPhilipp Reisner #include <linux/drbd.h>
28b411b363SPhilipp Reisner #include <linux/sched.h>
29b411b363SPhilipp Reisner #include <linux/wait.h>
30b411b363SPhilipp Reisner #include <linux/mm.h>
31b411b363SPhilipp Reisner #include <linux/memcontrol.h>
32b411b363SPhilipp Reisner #include <linux/mm_inline.h>
33b411b363SPhilipp Reisner #include <linux/slab.h>
34b411b363SPhilipp Reisner #include <linux/random.h>
35b411b363SPhilipp Reisner #include <linux/string.h>
36b411b363SPhilipp Reisner #include <linux/scatterlist.h>
37b411b363SPhilipp Reisner 
38b411b363SPhilipp Reisner #include "drbd_int.h"
39a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
40b411b363SPhilipp Reisner #include "drbd_req.h"
41b411b363SPhilipp Reisner 
42d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *, int);
43d448a2e1SAndreas Gruenbacher static int make_resync_request(struct drbd_device *, int);
44b411b363SPhilipp Reisner 
45c5a91619SAndreas Gruenbacher /* endio handlers:
46ed15b795SAndreas Gruenbacher  *   drbd_md_endio (defined here)
47fcefa62eSAndreas Gruenbacher  *   drbd_request_endio (defined here)
48fcefa62eSAndreas Gruenbacher  *   drbd_peer_request_endio (defined here)
49ed15b795SAndreas Gruenbacher  *   drbd_bm_endio (defined in drbd_bitmap.c)
50c5a91619SAndreas Gruenbacher  *
51b411b363SPhilipp Reisner  * For all these callbacks, note the following:
52b411b363SPhilipp Reisner  * The callbacks will be called in irq context by the IDE drivers,
53b411b363SPhilipp Reisner  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54b411b363SPhilipp Reisner  * Try to get the locking right :)
55b411b363SPhilipp Reisner  *
56b411b363SPhilipp Reisner  */
57b411b363SPhilipp Reisner 
58b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO
59b411b363SPhilipp Reisner  * submitted by drbd_md_sync_page_io()
60b411b363SPhilipp Reisner  */
614246a0b6SChristoph Hellwig void drbd_md_endio(struct bio *bio)
62b411b363SPhilipp Reisner {
63b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
64b411b363SPhilipp Reisner 
65e37d2438SLars Ellenberg 	device = bio->bi_private;
664246a0b6SChristoph Hellwig 	device->md_io.error = bio->bi_error;
67b411b363SPhilipp Reisner 
680cfac5ddSPhilipp Reisner 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
690cfac5ddSPhilipp Reisner 	 * to timeout on the lower level device, and eventually detach from it.
700cfac5ddSPhilipp Reisner 	 * If this io completion runs after that timeout expired, this
710cfac5ddSPhilipp Reisner 	 * drbd_md_put_buffer() may allow us to finally try and re-attach.
720cfac5ddSPhilipp Reisner 	 * During normal operation, this only puts that extra reference
730cfac5ddSPhilipp Reisner 	 * down to 1 again.
740cfac5ddSPhilipp Reisner 	 * Make sure we first drop the reference, and only then signal
750cfac5ddSPhilipp Reisner 	 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
760cfac5ddSPhilipp Reisner 	 * next drbd_md_sync_page_io(), that we trigger the
77b30ab791SAndreas Gruenbacher 	 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
780cfac5ddSPhilipp Reisner 	 */
79b30ab791SAndreas Gruenbacher 	drbd_md_put_buffer(device);
80e37d2438SLars Ellenberg 	device->md_io.done = 1;
81b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
82cdfda633SPhilipp Reisner 	bio_put(bio);
83b30ab791SAndreas Gruenbacher 	if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
84b30ab791SAndreas Gruenbacher 		put_ldev(device);
85b411b363SPhilipp Reisner }
86b411b363SPhilipp Reisner 
87b411b363SPhilipp Reisner /* reads on behalf of the partner,
88b411b363SPhilipp Reisner  * "submitted" by the receiver
89b411b363SPhilipp Reisner  */
90a186e478SRashika Kheria static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
91b411b363SPhilipp Reisner {
92b411b363SPhilipp Reisner 	unsigned long flags = 0;
936780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
946780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
95b411b363SPhilipp Reisner 
960500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
97b30ab791SAndreas Gruenbacher 	device->read_cnt += peer_req->i.size >> 9;
98a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
99b30ab791SAndreas Gruenbacher 	if (list_empty(&device->read_ee))
100b30ab791SAndreas Gruenbacher 		wake_up(&device->ee_wait);
101db830c46SAndreas Gruenbacher 	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
102b30ab791SAndreas Gruenbacher 		__drbd_chk_io_error(device, DRBD_READ_ERROR);
1030500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
104b411b363SPhilipp Reisner 
1056780139cSAndreas Gruenbacher 	drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
106b30ab791SAndreas Gruenbacher 	put_ldev(device);
107b411b363SPhilipp Reisner }
108b411b363SPhilipp Reisner 
109b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes,
11045bb912bSLars Ellenberg  * "submitted" by the receiver, final stage.  */
111a0fb3c47SLars Ellenberg void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
112b411b363SPhilipp Reisner {
113b411b363SPhilipp Reisner 	unsigned long flags = 0;
1146780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1156780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
116668700b4SPhilipp Reisner 	struct drbd_connection *connection = peer_device->connection;
117181286adSLars Ellenberg 	struct drbd_interval i;
118b411b363SPhilipp Reisner 	int do_wake;
119579b57edSAndreas Gruenbacher 	u64 block_id;
120b411b363SPhilipp Reisner 	int do_al_complete_io;
121b411b363SPhilipp Reisner 
122db830c46SAndreas Gruenbacher 	/* after we moved peer_req to done_ee,
123b411b363SPhilipp Reisner 	 * we may no longer access it,
124b411b363SPhilipp Reisner 	 * it may be freed/reused already!
125b411b363SPhilipp Reisner 	 * (as soon as we release the req_lock) */
126181286adSLars Ellenberg 	i = peer_req->i;
127db830c46SAndreas Gruenbacher 	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
128db830c46SAndreas Gruenbacher 	block_id = peer_req->block_id;
12921ae5d7fSLars Ellenberg 	peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
130b411b363SPhilipp Reisner 
1310500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
132b30ab791SAndreas Gruenbacher 	device->writ_cnt += peer_req->i.size >> 9;
133a8cd15baSAndreas Gruenbacher 	list_move_tail(&peer_req->w.list, &device->done_ee);
134b411b363SPhilipp Reisner 
135bb3bfe96SAndreas Gruenbacher 	/*
1365e472264SAndreas Gruenbacher 	 * Do not remove from the write_requests tree here: we did not send the
137bb3bfe96SAndreas Gruenbacher 	 * Ack yet and did not wake possibly waiting conflicting requests.
138bb3bfe96SAndreas Gruenbacher 	 * Removed from the tree from "drbd_process_done_ee" within the
13984b8c06bSAndreas Gruenbacher 	 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
140bb3bfe96SAndreas Gruenbacher 	 * _drbd_clear_done_ee.
141bb3bfe96SAndreas Gruenbacher 	 */
142b411b363SPhilipp Reisner 
143b30ab791SAndreas Gruenbacher 	do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
144b411b363SPhilipp Reisner 
145a0fb3c47SLars Ellenberg 	/* FIXME do we want to detach for failed REQ_DISCARD?
146a0fb3c47SLars Ellenberg 	 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
147a0fb3c47SLars Ellenberg 	if (peer_req->flags & EE_WAS_ERROR)
148b30ab791SAndreas Gruenbacher 		__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
149668700b4SPhilipp Reisner 
150668700b4SPhilipp Reisner 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
151668700b4SPhilipp Reisner 		kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
152668700b4SPhilipp Reisner 		if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
153668700b4SPhilipp Reisner 			kref_put(&device->kref, drbd_destroy_device);
154668700b4SPhilipp Reisner 	}
1550500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
156b411b363SPhilipp Reisner 
157579b57edSAndreas Gruenbacher 	if (block_id == ID_SYNCER)
158b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, i.sector);
159b411b363SPhilipp Reisner 
160b411b363SPhilipp Reisner 	if (do_wake)
161b30ab791SAndreas Gruenbacher 		wake_up(&device->ee_wait);
162b411b363SPhilipp Reisner 
163b411b363SPhilipp Reisner 	if (do_al_complete_io)
164b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &i);
165b411b363SPhilipp Reisner 
166b30ab791SAndreas Gruenbacher 	put_ldev(device);
16745bb912bSLars Ellenberg }
168b411b363SPhilipp Reisner 
16945bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes,
17045bb912bSLars Ellenberg  * "submitted" by the receiver.
17145bb912bSLars Ellenberg  */
1724246a0b6SChristoph Hellwig void drbd_peer_request_endio(struct bio *bio)
17345bb912bSLars Ellenberg {
174db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = bio->bi_private;
175a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_req->peer_device->device;
17645bb912bSLars Ellenberg 	int is_write = bio_data_dir(bio) == WRITE;
177a0fb3c47SLars Ellenberg 	int is_discard = !!(bio->bi_rw & REQ_DISCARD);
17845bb912bSLars Ellenberg 
1794246a0b6SChristoph Hellwig 	if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
180d0180171SAndreas Gruenbacher 		drbd_warn(device, "%s: error=%d s=%llus\n",
181a0fb3c47SLars Ellenberg 				is_write ? (is_discard ? "discard" : "write")
1824246a0b6SChristoph Hellwig 					: "read", bio->bi_error,
183db830c46SAndreas Gruenbacher 				(unsigned long long)peer_req->i.sector);
18445bb912bSLars Ellenberg 
1854246a0b6SChristoph Hellwig 	if (bio->bi_error)
186db830c46SAndreas Gruenbacher 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
18745bb912bSLars Ellenberg 
18845bb912bSLars Ellenberg 	bio_put(bio); /* no need for the bio anymore */
189db830c46SAndreas Gruenbacher 	if (atomic_dec_and_test(&peer_req->pending_bios)) {
19045bb912bSLars Ellenberg 		if (is_write)
191db830c46SAndreas Gruenbacher 			drbd_endio_write_sec_final(peer_req);
19245bb912bSLars Ellenberg 		else
193db830c46SAndreas Gruenbacher 			drbd_endio_read_sec_final(peer_req);
19445bb912bSLars Ellenberg 	}
195b411b363SPhilipp Reisner }
196b411b363SPhilipp Reisner 
197142207f7SLars Ellenberg void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
198142207f7SLars Ellenberg {
199142207f7SLars Ellenberg 	panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
200142207f7SLars Ellenberg 		device->minor, device->resource->name, device->vnr);
201142207f7SLars Ellenberg }
202142207f7SLars Ellenberg 
203b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
204b411b363SPhilipp Reisner  */
2054246a0b6SChristoph Hellwig void drbd_request_endio(struct bio *bio)
206b411b363SPhilipp Reisner {
207a115413dSLars Ellenberg 	unsigned long flags;
208b411b363SPhilipp Reisner 	struct drbd_request *req = bio->bi_private;
20984b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
210a115413dSLars Ellenberg 	struct bio_and_error m;
211b411b363SPhilipp Reisner 	enum drbd_req_event what;
2121b6dd252SPhilipp Reisner 
2131b6dd252SPhilipp Reisner 	/* If this request was aborted locally before,
2141b6dd252SPhilipp Reisner 	 * but now was completed "successfully",
2151b6dd252SPhilipp Reisner 	 * chances are that this caused arbitrary data corruption.
2161b6dd252SPhilipp Reisner 	 *
2171b6dd252SPhilipp Reisner 	 * "aborting" requests, or force-detaching the disk, is intended for
2181b6dd252SPhilipp Reisner 	 * completely blocked/hung local backing devices which do no longer
2191b6dd252SPhilipp Reisner 	 * complete requests at all, not even do error completions.  In this
2201b6dd252SPhilipp Reisner 	 * situation, usually a hard-reset and failover is the only way out.
2211b6dd252SPhilipp Reisner 	 *
2221b6dd252SPhilipp Reisner 	 * By "aborting", basically faking a local error-completion,
2231b6dd252SPhilipp Reisner 	 * we allow for a more graceful swichover by cleanly migrating services.
2241b6dd252SPhilipp Reisner 	 * Still the affected node has to be rebooted "soon".
2251b6dd252SPhilipp Reisner 	 *
2261b6dd252SPhilipp Reisner 	 * By completing these requests, we allow the upper layers to re-use
2271b6dd252SPhilipp Reisner 	 * the associated data pages.
2281b6dd252SPhilipp Reisner 	 *
2291b6dd252SPhilipp Reisner 	 * If later the local backing device "recovers", and now DMAs some data
2301b6dd252SPhilipp Reisner 	 * from disk into the original request pages, in the best case it will
2311b6dd252SPhilipp Reisner 	 * just put random data into unused pages; but typically it will corrupt
2321b6dd252SPhilipp Reisner 	 * meanwhile completely unrelated data, causing all sorts of damage.
2331b6dd252SPhilipp Reisner 	 *
2341b6dd252SPhilipp Reisner 	 * Which means delayed successful completion,
2351b6dd252SPhilipp Reisner 	 * especially for READ requests,
2361b6dd252SPhilipp Reisner 	 * is a reason to panic().
2371b6dd252SPhilipp Reisner 	 *
2381b6dd252SPhilipp Reisner 	 * We assume that a delayed *error* completion is OK,
2391b6dd252SPhilipp Reisner 	 * though we still will complain noisily about it.
2401b6dd252SPhilipp Reisner 	 */
2411b6dd252SPhilipp Reisner 	if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
2421b6dd252SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
243d0180171SAndreas Gruenbacher 			drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
2441b6dd252SPhilipp Reisner 
2454246a0b6SChristoph Hellwig 		if (!bio->bi_error)
246142207f7SLars Ellenberg 			drbd_panic_after_delayed_completion_of_aborted_request(device);
2471b6dd252SPhilipp Reisner 	}
2481b6dd252SPhilipp Reisner 
249b411b363SPhilipp Reisner 	/* to avoid recursion in __req_mod */
2504246a0b6SChristoph Hellwig 	if (unlikely(bio->bi_error)) {
2512f632aebSLars Ellenberg 		if (bio->bi_rw & REQ_DISCARD)
2524246a0b6SChristoph Hellwig 			what = (bio->bi_error == -EOPNOTSUPP)
2532f632aebSLars Ellenberg 				? DISCARD_COMPLETED_NOTSUPP
2542f632aebSLars Ellenberg 				: DISCARD_COMPLETED_WITH_ERROR;
2552f632aebSLars Ellenberg 		else
256b411b363SPhilipp Reisner 			what = (bio_data_dir(bio) == WRITE)
2578554df1cSAndreas Gruenbacher 			? WRITE_COMPLETED_WITH_ERROR
2585c3c7e64SLars Ellenberg 			: (bio_rw(bio) == READ)
2598554df1cSAndreas Gruenbacher 			  ? READ_COMPLETED_WITH_ERROR
2608554df1cSAndreas Gruenbacher 			  : READ_AHEAD_COMPLETED_WITH_ERROR;
261b411b363SPhilipp Reisner 	} else
2628554df1cSAndreas Gruenbacher 		what = COMPLETED_OK;
263b411b363SPhilipp Reisner 
264b411b363SPhilipp Reisner 	bio_put(req->private_bio);
2654246a0b6SChristoph Hellwig 	req->private_bio = ERR_PTR(bio->bi_error);
266b411b363SPhilipp Reisner 
267a115413dSLars Ellenberg 	/* not req_mod(), we need irqsave here! */
2680500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
269a115413dSLars Ellenberg 	__req_mod(req, what, &m);
2700500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
271b30ab791SAndreas Gruenbacher 	put_ldev(device);
272a115413dSLars Ellenberg 
273a115413dSLars Ellenberg 	if (m.bio)
274b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
275b411b363SPhilipp Reisner }
276b411b363SPhilipp Reisner 
27779a3c8d3SAndreas Gruenbacher void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
27845bb912bSLars Ellenberg {
27945bb912bSLars Ellenberg 	struct hash_desc desc;
28045bb912bSLars Ellenberg 	struct scatterlist sg;
281db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
28245bb912bSLars Ellenberg 	struct page *tmp;
28345bb912bSLars Ellenberg 	unsigned len;
28445bb912bSLars Ellenberg 
28545bb912bSLars Ellenberg 	desc.tfm = tfm;
28645bb912bSLars Ellenberg 	desc.flags = 0;
28745bb912bSLars Ellenberg 
28845bb912bSLars Ellenberg 	sg_init_table(&sg, 1);
28945bb912bSLars Ellenberg 	crypto_hash_init(&desc);
29045bb912bSLars Ellenberg 
29145bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page))) {
29245bb912bSLars Ellenberg 		/* all but the last page will be fully used */
29345bb912bSLars Ellenberg 		sg_set_page(&sg, page, PAGE_SIZE, 0);
29445bb912bSLars Ellenberg 		crypto_hash_update(&desc, &sg, sg.length);
29545bb912bSLars Ellenberg 		page = tmp;
29645bb912bSLars Ellenberg 	}
29745bb912bSLars Ellenberg 	/* and now the last, possibly only partially used page */
298db830c46SAndreas Gruenbacher 	len = peer_req->i.size & (PAGE_SIZE - 1);
29945bb912bSLars Ellenberg 	sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
30045bb912bSLars Ellenberg 	crypto_hash_update(&desc, &sg, sg.length);
30145bb912bSLars Ellenberg 	crypto_hash_final(&desc, digest);
30245bb912bSLars Ellenberg }
30345bb912bSLars Ellenberg 
30479a3c8d3SAndreas Gruenbacher void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
305b411b363SPhilipp Reisner {
306b411b363SPhilipp Reisner 	struct hash_desc desc;
307b411b363SPhilipp Reisner 	struct scatterlist sg;
3087988613bSKent Overstreet 	struct bio_vec bvec;
3097988613bSKent Overstreet 	struct bvec_iter iter;
310b411b363SPhilipp Reisner 
311b411b363SPhilipp Reisner 	desc.tfm = tfm;
312b411b363SPhilipp Reisner 	desc.flags = 0;
313b411b363SPhilipp Reisner 
314b411b363SPhilipp Reisner 	sg_init_table(&sg, 1);
315b411b363SPhilipp Reisner 	crypto_hash_init(&desc);
316b411b363SPhilipp Reisner 
3177988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
3187988613bSKent Overstreet 		sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
319b411b363SPhilipp Reisner 		crypto_hash_update(&desc, &sg, sg.length);
320b411b363SPhilipp Reisner 	}
321b411b363SPhilipp Reisner 	crypto_hash_final(&desc, digest);
322b411b363SPhilipp Reisner }
323b411b363SPhilipp Reisner 
3249676c760SLars Ellenberg /* MAYBE merge common code with w_e_end_ov_req */
32599920dc5SAndreas Gruenbacher static int w_e_send_csum(struct drbd_work *w, int cancel)
326b411b363SPhilipp Reisner {
327a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
3286780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
3296780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
330b411b363SPhilipp Reisner 	int digest_size;
331b411b363SPhilipp Reisner 	void *digest;
33299920dc5SAndreas Gruenbacher 	int err = 0;
333b411b363SPhilipp Reisner 
33453ea4331SLars Ellenberg 	if (unlikely(cancel))
33553ea4331SLars Ellenberg 		goto out;
336b411b363SPhilipp Reisner 
3379676c760SLars Ellenberg 	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
33853ea4331SLars Ellenberg 		goto out;
33953ea4331SLars Ellenberg 
3406780139cSAndreas Gruenbacher 	digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
341b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
342b411b363SPhilipp Reisner 	if (digest) {
343db830c46SAndreas Gruenbacher 		sector_t sector = peer_req->i.sector;
344db830c46SAndreas Gruenbacher 		unsigned int size = peer_req->i.size;
3456780139cSAndreas Gruenbacher 		drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
3469676c760SLars Ellenberg 		/* Free peer_req and pages before send.
34753ea4331SLars Ellenberg 		 * In case we block on congestion, we could otherwise run into
34853ea4331SLars Ellenberg 		 * some distributed deadlock, if the other side blocks on
34953ea4331SLars Ellenberg 		 * congestion as well, because our receiver blocks in
350c37c8ecfSAndreas Gruenbacher 		 * drbd_alloc_pages due to pp_in_use > max_buffers. */
351b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
352db830c46SAndreas Gruenbacher 		peer_req = NULL;
353b30ab791SAndreas Gruenbacher 		inc_rs_pending(device);
3546780139cSAndreas Gruenbacher 		err = drbd_send_drequest_csum(peer_device, sector, size,
35553ea4331SLars Ellenberg 					      digest, digest_size,
356b411b363SPhilipp Reisner 					      P_CSUM_RS_REQUEST);
357b411b363SPhilipp Reisner 		kfree(digest);
358b411b363SPhilipp Reisner 	} else {
359d0180171SAndreas Gruenbacher 		drbd_err(device, "kmalloc() of digest failed.\n");
36099920dc5SAndreas Gruenbacher 		err = -ENOMEM;
361b411b363SPhilipp Reisner 	}
362b411b363SPhilipp Reisner 
36353ea4331SLars Ellenberg out:
364db830c46SAndreas Gruenbacher 	if (peer_req)
365b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
366b411b363SPhilipp Reisner 
36799920dc5SAndreas Gruenbacher 	if (unlikely(err))
368d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
36999920dc5SAndreas Gruenbacher 	return err;
370b411b363SPhilipp Reisner }
371b411b363SPhilipp Reisner 
372b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
373b411b363SPhilipp Reisner 
37469a22773SAndreas Gruenbacher static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
375b411b363SPhilipp Reisner {
37669a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
377db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
378b411b363SPhilipp Reisner 
379b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
38080a40e43SLars Ellenberg 		return -EIO;
381b411b363SPhilipp Reisner 
382b411b363SPhilipp Reisner 	/* GFP_TRY, because if there is no memory available right now, this may
383b411b363SPhilipp Reisner 	 * be rescheduled for later. It is "only" background resync, after all. */
38469a22773SAndreas Gruenbacher 	peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
385a0fb3c47SLars Ellenberg 				       size, true /* has real payload */, GFP_TRY);
386db830c46SAndreas Gruenbacher 	if (!peer_req)
38780a40e43SLars Ellenberg 		goto defer;
388b411b363SPhilipp Reisner 
389a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = w_e_send_csum;
3900500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
391b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
3920500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
393b411b363SPhilipp Reisner 
394b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
395b30ab791SAndreas Gruenbacher 	if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
39680a40e43SLars Ellenberg 		return 0;
39745bb912bSLars Ellenberg 
39810f6d992SLars Ellenberg 	/* If it failed because of ENOMEM, retry should help.  If it failed
39910f6d992SLars Ellenberg 	 * because bio_add_page failed (probably broken lower level driver),
40010f6d992SLars Ellenberg 	 * retry may or may not help.
40110f6d992SLars Ellenberg 	 * If it does not, you may need to force disconnect. */
4020500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
403a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
4040500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
40522cc37a9SLars Ellenberg 
406b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
40780a40e43SLars Ellenberg defer:
408b30ab791SAndreas Gruenbacher 	put_ldev(device);
40980a40e43SLars Ellenberg 	return -EAGAIN;
410b411b363SPhilipp Reisner }
411b411b363SPhilipp Reisner 
41299920dc5SAndreas Gruenbacher int w_resync_timer(struct drbd_work *w, int cancel)
413794abb75SPhilipp Reisner {
41484b8c06bSAndreas Gruenbacher 	struct drbd_device *device =
41584b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device, resync_work);
41684b8c06bSAndreas Gruenbacher 
417b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
418794abb75SPhilipp Reisner 	case C_VERIFY_S:
419d448a2e1SAndreas Gruenbacher 		make_ov_request(device, cancel);
420794abb75SPhilipp Reisner 		break;
421794abb75SPhilipp Reisner 	case C_SYNC_TARGET:
422d448a2e1SAndreas Gruenbacher 		make_resync_request(device, cancel);
423794abb75SPhilipp Reisner 		break;
424794abb75SPhilipp Reisner 	}
425794abb75SPhilipp Reisner 
42699920dc5SAndreas Gruenbacher 	return 0;
427794abb75SPhilipp Reisner }
428794abb75SPhilipp Reisner 
429b411b363SPhilipp Reisner void resync_timer_fn(unsigned long data)
430b411b363SPhilipp Reisner {
431b30ab791SAndreas Gruenbacher 	struct drbd_device *device = (struct drbd_device *) data;
432b411b363SPhilipp Reisner 
43315e26f6aSLars Ellenberg 	drbd_queue_work_if_unqueued(
43415e26f6aSLars Ellenberg 		&first_peer_device(device)->connection->sender_work,
43584b8c06bSAndreas Gruenbacher 		&device->resync_work);
436b411b363SPhilipp Reisner }
437b411b363SPhilipp Reisner 
438778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value)
439778f271dSPhilipp Reisner {
440778f271dSPhilipp Reisner 	int i;
441778f271dSPhilipp Reisner 
442778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
443f10f2623SPhilipp Reisner 		fb->values[i] = value;
444778f271dSPhilipp Reisner }
445778f271dSPhilipp Reisner 
446778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value)
447778f271dSPhilipp Reisner {
448778f271dSPhilipp Reisner 	int ov;
449778f271dSPhilipp Reisner 
450778f271dSPhilipp Reisner 	ov = fb->values[fb->head_index];
451778f271dSPhilipp Reisner 	fb->values[fb->head_index++] = value;
452778f271dSPhilipp Reisner 
453778f271dSPhilipp Reisner 	if (fb->head_index >= fb->size)
454778f271dSPhilipp Reisner 		fb->head_index = 0;
455778f271dSPhilipp Reisner 
456778f271dSPhilipp Reisner 	return ov;
457778f271dSPhilipp Reisner }
458778f271dSPhilipp Reisner 
459778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value)
460778f271dSPhilipp Reisner {
461778f271dSPhilipp Reisner 	int i;
462778f271dSPhilipp Reisner 
463778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
464778f271dSPhilipp Reisner 		fb->values[i] += value;
465778f271dSPhilipp Reisner }
466778f271dSPhilipp Reisner 
4679958c857SPhilipp Reisner struct fifo_buffer *fifo_alloc(int fifo_size)
4689958c857SPhilipp Reisner {
4699958c857SPhilipp Reisner 	struct fifo_buffer *fb;
4709958c857SPhilipp Reisner 
4718747d30aSLars Ellenberg 	fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
4729958c857SPhilipp Reisner 	if (!fb)
4739958c857SPhilipp Reisner 		return NULL;
4749958c857SPhilipp Reisner 
4759958c857SPhilipp Reisner 	fb->head_index = 0;
4769958c857SPhilipp Reisner 	fb->size = fifo_size;
4779958c857SPhilipp Reisner 	fb->total = 0;
4789958c857SPhilipp Reisner 
4799958c857SPhilipp Reisner 	return fb;
4809958c857SPhilipp Reisner }
4819958c857SPhilipp Reisner 
4820e49d7b0SLars Ellenberg static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
483778f271dSPhilipp Reisner {
484daeda1ccSPhilipp Reisner 	struct disk_conf *dc;
4857f34f614SLars Ellenberg 	unsigned int want;     /* The number of sectors we want in-flight */
486778f271dSPhilipp Reisner 	int req_sect; /* Number of sectors to request in this turn */
4877f34f614SLars Ellenberg 	int correction; /* Number of sectors more we need in-flight */
488778f271dSPhilipp Reisner 	int cps; /* correction per invocation of drbd_rs_controller() */
489778f271dSPhilipp Reisner 	int steps; /* Number of time steps to plan ahead */
490778f271dSPhilipp Reisner 	int curr_corr;
491778f271dSPhilipp Reisner 	int max_sect;
492813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
493778f271dSPhilipp Reisner 
494b30ab791SAndreas Gruenbacher 	dc = rcu_dereference(device->ldev->disk_conf);
495b30ab791SAndreas Gruenbacher 	plan = rcu_dereference(device->rs_plan_s);
496778f271dSPhilipp Reisner 
497813472ceSPhilipp Reisner 	steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
498778f271dSPhilipp Reisner 
499b30ab791SAndreas Gruenbacher 	if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
500daeda1ccSPhilipp Reisner 		want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
501778f271dSPhilipp Reisner 	} else { /* normal path */
502daeda1ccSPhilipp Reisner 		want = dc->c_fill_target ? dc->c_fill_target :
503daeda1ccSPhilipp Reisner 			sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
504778f271dSPhilipp Reisner 	}
505778f271dSPhilipp Reisner 
506b30ab791SAndreas Gruenbacher 	correction = want - device->rs_in_flight - plan->total;
507778f271dSPhilipp Reisner 
508778f271dSPhilipp Reisner 	/* Plan ahead */
509778f271dSPhilipp Reisner 	cps = correction / steps;
510813472ceSPhilipp Reisner 	fifo_add_val(plan, cps);
511813472ceSPhilipp Reisner 	plan->total += cps * steps;
512778f271dSPhilipp Reisner 
513778f271dSPhilipp Reisner 	/* What we do in this step */
514813472ceSPhilipp Reisner 	curr_corr = fifo_push(plan, 0);
515813472ceSPhilipp Reisner 	plan->total -= curr_corr;
516778f271dSPhilipp Reisner 
517778f271dSPhilipp Reisner 	req_sect = sect_in + curr_corr;
518778f271dSPhilipp Reisner 	if (req_sect < 0)
519778f271dSPhilipp Reisner 		req_sect = 0;
520778f271dSPhilipp Reisner 
521daeda1ccSPhilipp Reisner 	max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
522778f271dSPhilipp Reisner 	if (req_sect > max_sect)
523778f271dSPhilipp Reisner 		req_sect = max_sect;
524778f271dSPhilipp Reisner 
525778f271dSPhilipp Reisner 	/*
526d0180171SAndreas Gruenbacher 	drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
527b30ab791SAndreas Gruenbacher 		 sect_in, device->rs_in_flight, want, correction,
528b30ab791SAndreas Gruenbacher 		 steps, cps, device->rs_planed, curr_corr, req_sect);
529778f271dSPhilipp Reisner 	*/
530778f271dSPhilipp Reisner 
531778f271dSPhilipp Reisner 	return req_sect;
532778f271dSPhilipp Reisner }
533778f271dSPhilipp Reisner 
534b30ab791SAndreas Gruenbacher static int drbd_rs_number_requests(struct drbd_device *device)
535e65f440dSLars Ellenberg {
5360e49d7b0SLars Ellenberg 	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
5370e49d7b0SLars Ellenberg 	int number, mxb;
5380e49d7b0SLars Ellenberg 
5390e49d7b0SLars Ellenberg 	sect_in = atomic_xchg(&device->rs_sect_in, 0);
5400e49d7b0SLars Ellenberg 	device->rs_in_flight -= sect_in;
541813472ceSPhilipp Reisner 
542813472ceSPhilipp Reisner 	rcu_read_lock();
5430e49d7b0SLars Ellenberg 	mxb = drbd_get_max_buffers(device) / 2;
544b30ab791SAndreas Gruenbacher 	if (rcu_dereference(device->rs_plan_s)->size) {
5450e49d7b0SLars Ellenberg 		number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
546b30ab791SAndreas Gruenbacher 		device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
547e65f440dSLars Ellenberg 	} else {
548b30ab791SAndreas Gruenbacher 		device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
549b30ab791SAndreas Gruenbacher 		number = SLEEP_TIME * device->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
550e65f440dSLars Ellenberg 	}
551813472ceSPhilipp Reisner 	rcu_read_unlock();
552e65f440dSLars Ellenberg 
5530e49d7b0SLars Ellenberg 	/* Don't have more than "max-buffers"/2 in-flight.
5540e49d7b0SLars Ellenberg 	 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
5550e49d7b0SLars Ellenberg 	 * potentially causing a distributed deadlock on congestion during
5560e49d7b0SLars Ellenberg 	 * online-verify or (checksum-based) resync, if max-buffers,
5570e49d7b0SLars Ellenberg 	 * socket buffer sizes and resync rate settings are mis-configured. */
5587f34f614SLars Ellenberg 
5597f34f614SLars Ellenberg 	/* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
5607f34f614SLars Ellenberg 	 * mxb (as used here, and in drbd_alloc_pages on the peer) is
5617f34f614SLars Ellenberg 	 * "number of pages" (typically also 4k),
5627f34f614SLars Ellenberg 	 * but "rs_in_flight" is in "sectors" (512 Byte). */
5637f34f614SLars Ellenberg 	if (mxb - device->rs_in_flight/8 < number)
5647f34f614SLars Ellenberg 		number = mxb - device->rs_in_flight/8;
5650e49d7b0SLars Ellenberg 
566e65f440dSLars Ellenberg 	return number;
567e65f440dSLars Ellenberg }
568e65f440dSLars Ellenberg 
56944a4d551SLars Ellenberg static int make_resync_request(struct drbd_device *const device, int cancel)
570b411b363SPhilipp Reisner {
57144a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
57244a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
573b411b363SPhilipp Reisner 	unsigned long bit;
574b411b363SPhilipp Reisner 	sector_t sector;
575b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
5761816a2b4SLars Ellenberg 	int max_bio_size;
577e65f440dSLars Ellenberg 	int number, rollback_i, size;
578506afb62SLars Ellenberg 	int align, requeue = 0;
5790f0601f4SLars Ellenberg 	int i = 0;
580b411b363SPhilipp Reisner 
581b411b363SPhilipp Reisner 	if (unlikely(cancel))
58299920dc5SAndreas Gruenbacher 		return 0;
583b411b363SPhilipp Reisner 
584b30ab791SAndreas Gruenbacher 	if (device->rs_total == 0) {
585af85e8e8SLars Ellenberg 		/* empty resync? */
586b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
58799920dc5SAndreas Gruenbacher 		return 0;
588af85e8e8SLars Ellenberg 	}
589af85e8e8SLars Ellenberg 
590b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
591b30ab791SAndreas Gruenbacher 		/* Since we only need to access device->rsync a
592b30ab791SAndreas Gruenbacher 		   get_ldev_if_state(device,D_FAILED) would be sufficient, but
593b411b363SPhilipp Reisner 		   to continue resync with a broken disk makes no sense at
594b411b363SPhilipp Reisner 		   all */
595d0180171SAndreas Gruenbacher 		drbd_err(device, "Disk broke down during resync!\n");
59699920dc5SAndreas Gruenbacher 		return 0;
597b411b363SPhilipp Reisner 	}
598b411b363SPhilipp Reisner 
599b30ab791SAndreas Gruenbacher 	max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
600b30ab791SAndreas Gruenbacher 	number = drbd_rs_number_requests(device);
6010e49d7b0SLars Ellenberg 	if (number <= 0)
6020f0601f4SLars Ellenberg 		goto requeue;
603b411b363SPhilipp Reisner 
604b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
605506afb62SLars Ellenberg 		/* Stop generating RS requests when half of the send buffer is filled,
606506afb62SLars Ellenberg 		 * but notify TCP that we'd like to have more space. */
60744a4d551SLars Ellenberg 		mutex_lock(&connection->data.mutex);
60844a4d551SLars Ellenberg 		if (connection->data.socket) {
609506afb62SLars Ellenberg 			struct sock *sk = connection->data.socket->sk;
610506afb62SLars Ellenberg 			int queued = sk->sk_wmem_queued;
611506afb62SLars Ellenberg 			int sndbuf = sk->sk_sndbuf;
612506afb62SLars Ellenberg 			if (queued > sndbuf / 2) {
613506afb62SLars Ellenberg 				requeue = 1;
614506afb62SLars Ellenberg 				if (sk->sk_socket)
615506afb62SLars Ellenberg 					set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
616b411b363SPhilipp Reisner 			}
617506afb62SLars Ellenberg 		} else
618506afb62SLars Ellenberg 			requeue = 1;
61944a4d551SLars Ellenberg 		mutex_unlock(&connection->data.mutex);
620506afb62SLars Ellenberg 		if (requeue)
621b411b363SPhilipp Reisner 			goto requeue;
622b411b363SPhilipp Reisner 
623b411b363SPhilipp Reisner next_sector:
624b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
625b30ab791SAndreas Gruenbacher 		bit  = drbd_bm_find_next(device, device->bm_resync_fo);
626b411b363SPhilipp Reisner 
6274b0715f0SLars Ellenberg 		if (bit == DRBD_END_OF_BITMAP) {
628b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = drbd_bm_bits(device);
629b30ab791SAndreas Gruenbacher 			put_ldev(device);
63099920dc5SAndreas Gruenbacher 			return 0;
631b411b363SPhilipp Reisner 		}
632b411b363SPhilipp Reisner 
633b411b363SPhilipp Reisner 		sector = BM_BIT_TO_SECT(bit);
634b411b363SPhilipp Reisner 
635ad3fee79SLars Ellenberg 		if (drbd_try_rs_begin_io(device, sector)) {
636b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = bit;
637b411b363SPhilipp Reisner 			goto requeue;
638b411b363SPhilipp Reisner 		}
639b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = bit + 1;
640b411b363SPhilipp Reisner 
641b30ab791SAndreas Gruenbacher 		if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
642b30ab791SAndreas Gruenbacher 			drbd_rs_complete_io(device, sector);
643b411b363SPhilipp Reisner 			goto next_sector;
644b411b363SPhilipp Reisner 		}
645b411b363SPhilipp Reisner 
6461816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
647b411b363SPhilipp Reisner 		/* try to find some adjacent bits.
648b411b363SPhilipp Reisner 		 * we stop if we have already the maximum req size.
649b411b363SPhilipp Reisner 		 *
650b411b363SPhilipp Reisner 		 * Additionally always align bigger requests, in order to
651b411b363SPhilipp Reisner 		 * be prepared for all stripe sizes of software RAIDs.
652b411b363SPhilipp Reisner 		 */
653b411b363SPhilipp Reisner 		align = 1;
654d207450cSPhilipp Reisner 		rollback_i = i;
6556377b923SLars Ellenberg 		while (i < number) {
6561816a2b4SLars Ellenberg 			if (size + BM_BLOCK_SIZE > max_bio_size)
657b411b363SPhilipp Reisner 				break;
658b411b363SPhilipp Reisner 
659b411b363SPhilipp Reisner 			/* Be always aligned */
660b411b363SPhilipp Reisner 			if (sector & ((1<<(align+3))-1))
661b411b363SPhilipp Reisner 				break;
662b411b363SPhilipp Reisner 
663b411b363SPhilipp Reisner 			/* do not cross extent boundaries */
664b411b363SPhilipp Reisner 			if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
665b411b363SPhilipp Reisner 				break;
666b411b363SPhilipp Reisner 			/* now, is it actually dirty, after all?
667b411b363SPhilipp Reisner 			 * caution, drbd_bm_test_bit is tri-state for some
668b411b363SPhilipp Reisner 			 * obscure reason; ( b == 0 ) would get the out-of-band
669b411b363SPhilipp Reisner 			 * only accidentally right because of the "oddly sized"
670b411b363SPhilipp Reisner 			 * adjustment below */
671b30ab791SAndreas Gruenbacher 			if (drbd_bm_test_bit(device, bit+1) != 1)
672b411b363SPhilipp Reisner 				break;
673b411b363SPhilipp Reisner 			bit++;
674b411b363SPhilipp Reisner 			size += BM_BLOCK_SIZE;
675b411b363SPhilipp Reisner 			if ((BM_BLOCK_SIZE << align) <= size)
676b411b363SPhilipp Reisner 				align++;
677b411b363SPhilipp Reisner 			i++;
678b411b363SPhilipp Reisner 		}
679b411b363SPhilipp Reisner 		/* if we merged some,
680b411b363SPhilipp Reisner 		 * reset the offset to start the next drbd_bm_find_next from */
681b411b363SPhilipp Reisner 		if (size > BM_BLOCK_SIZE)
682b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = bit + 1;
683b411b363SPhilipp Reisner #endif
684b411b363SPhilipp Reisner 
685b411b363SPhilipp Reisner 		/* adjust very last sectors, in case we are oddly sized */
686b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
687b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
688aaaba345SLars Ellenberg 
689aaaba345SLars Ellenberg 		if (device->use_csums) {
69044a4d551SLars Ellenberg 			switch (read_for_csum(peer_device, sector, size)) {
69180a40e43SLars Ellenberg 			case -EIO: /* Disk failure */
692b30ab791SAndreas Gruenbacher 				put_ldev(device);
69399920dc5SAndreas Gruenbacher 				return -EIO;
69480a40e43SLars Ellenberg 			case -EAGAIN: /* allocation failed, or ldev busy */
695b30ab791SAndreas Gruenbacher 				drbd_rs_complete_io(device, sector);
696b30ab791SAndreas Gruenbacher 				device->bm_resync_fo = BM_SECT_TO_BIT(sector);
697d207450cSPhilipp Reisner 				i = rollback_i;
698b411b363SPhilipp Reisner 				goto requeue;
69980a40e43SLars Ellenberg 			case 0:
70080a40e43SLars Ellenberg 				/* everything ok */
70180a40e43SLars Ellenberg 				break;
70280a40e43SLars Ellenberg 			default:
70380a40e43SLars Ellenberg 				BUG();
704b411b363SPhilipp Reisner 			}
705b411b363SPhilipp Reisner 		} else {
70699920dc5SAndreas Gruenbacher 			int err;
70799920dc5SAndreas Gruenbacher 
708b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
70944a4d551SLars Ellenberg 			err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
71099920dc5SAndreas Gruenbacher 						 sector, size, ID_SYNCER);
71199920dc5SAndreas Gruenbacher 			if (err) {
712d0180171SAndreas Gruenbacher 				drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
713b30ab791SAndreas Gruenbacher 				dec_rs_pending(device);
714b30ab791SAndreas Gruenbacher 				put_ldev(device);
71599920dc5SAndreas Gruenbacher 				return err;
716b411b363SPhilipp Reisner 			}
717b411b363SPhilipp Reisner 		}
718b411b363SPhilipp Reisner 	}
719b411b363SPhilipp Reisner 
720b30ab791SAndreas Gruenbacher 	if (device->bm_resync_fo >= drbd_bm_bits(device)) {
721b411b363SPhilipp Reisner 		/* last syncer _request_ was sent,
722b411b363SPhilipp Reisner 		 * but the P_RS_DATA_REPLY not yet received.  sync will end (and
723b411b363SPhilipp Reisner 		 * next sync group will resume), as soon as we receive the last
724b411b363SPhilipp Reisner 		 * resync data block, and the last bit is cleared.
725b411b363SPhilipp Reisner 		 * until then resync "work" is "inactive" ...
726b411b363SPhilipp Reisner 		 */
727b30ab791SAndreas Gruenbacher 		put_ldev(device);
72899920dc5SAndreas Gruenbacher 		return 0;
729b411b363SPhilipp Reisner 	}
730b411b363SPhilipp Reisner 
731b411b363SPhilipp Reisner  requeue:
732b30ab791SAndreas Gruenbacher 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
733b30ab791SAndreas Gruenbacher 	mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
734b30ab791SAndreas Gruenbacher 	put_ldev(device);
73599920dc5SAndreas Gruenbacher 	return 0;
736b411b363SPhilipp Reisner }
737b411b363SPhilipp Reisner 
738d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *device, int cancel)
739b411b363SPhilipp Reisner {
740b411b363SPhilipp Reisner 	int number, i, size;
741b411b363SPhilipp Reisner 	sector_t sector;
742b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
74358ffa580SLars Ellenberg 	bool stop_sector_reached = false;
744b411b363SPhilipp Reisner 
745b411b363SPhilipp Reisner 	if (unlikely(cancel))
746b411b363SPhilipp Reisner 		return 1;
747b411b363SPhilipp Reisner 
748b30ab791SAndreas Gruenbacher 	number = drbd_rs_number_requests(device);
749b411b363SPhilipp Reisner 
750b30ab791SAndreas Gruenbacher 	sector = device->ov_position;
751b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
75258ffa580SLars Ellenberg 		if (sector >= capacity)
753b411b363SPhilipp Reisner 			return 1;
75458ffa580SLars Ellenberg 
75558ffa580SLars Ellenberg 		/* We check for "finished" only in the reply path:
75658ffa580SLars Ellenberg 		 * w_e_end_ov_reply().
75758ffa580SLars Ellenberg 		 * We need to send at least one request out. */
75858ffa580SLars Ellenberg 		stop_sector_reached = i > 0
759b30ab791SAndreas Gruenbacher 			&& verify_can_do_stop_sector(device)
760b30ab791SAndreas Gruenbacher 			&& sector >= device->ov_stop_sector;
76158ffa580SLars Ellenberg 		if (stop_sector_reached)
76258ffa580SLars Ellenberg 			break;
763b411b363SPhilipp Reisner 
764b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
765b411b363SPhilipp Reisner 
766ad3fee79SLars Ellenberg 		if (drbd_try_rs_begin_io(device, sector)) {
767b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
768b411b363SPhilipp Reisner 			goto requeue;
769b411b363SPhilipp Reisner 		}
770b411b363SPhilipp Reisner 
771b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
772b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
773b411b363SPhilipp Reisner 
774b30ab791SAndreas Gruenbacher 		inc_rs_pending(device);
77569a22773SAndreas Gruenbacher 		if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
776b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
777b411b363SPhilipp Reisner 			return 0;
778b411b363SPhilipp Reisner 		}
779b411b363SPhilipp Reisner 		sector += BM_SECT_PER_BIT;
780b411b363SPhilipp Reisner 	}
781b30ab791SAndreas Gruenbacher 	device->ov_position = sector;
782b411b363SPhilipp Reisner 
783b411b363SPhilipp Reisner  requeue:
784b30ab791SAndreas Gruenbacher 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
78558ffa580SLars Ellenberg 	if (i == 0 || !stop_sector_reached)
786b30ab791SAndreas Gruenbacher 		mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
787b411b363SPhilipp Reisner 	return 1;
788b411b363SPhilipp Reisner }
789b411b363SPhilipp Reisner 
79099920dc5SAndreas Gruenbacher int w_ov_finished(struct drbd_work *w, int cancel)
791b411b363SPhilipp Reisner {
79284b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw =
79384b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device_work, w);
79484b8c06bSAndreas Gruenbacher 	struct drbd_device *device = dw->device;
79584b8c06bSAndreas Gruenbacher 	kfree(dw);
796b30ab791SAndreas Gruenbacher 	ov_out_of_sync_print(device);
797b30ab791SAndreas Gruenbacher 	drbd_resync_finished(device);
798b411b363SPhilipp Reisner 
79999920dc5SAndreas Gruenbacher 	return 0;
800b411b363SPhilipp Reisner }
801b411b363SPhilipp Reisner 
80299920dc5SAndreas Gruenbacher static int w_resync_finished(struct drbd_work *w, int cancel)
803b411b363SPhilipp Reisner {
80484b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw =
80584b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device_work, w);
80684b8c06bSAndreas Gruenbacher 	struct drbd_device *device = dw->device;
80784b8c06bSAndreas Gruenbacher 	kfree(dw);
808b411b363SPhilipp Reisner 
809b30ab791SAndreas Gruenbacher 	drbd_resync_finished(device);
810b411b363SPhilipp Reisner 
81199920dc5SAndreas Gruenbacher 	return 0;
812b411b363SPhilipp Reisner }
813b411b363SPhilipp Reisner 
814b30ab791SAndreas Gruenbacher static void ping_peer(struct drbd_device *device)
815af85e8e8SLars Ellenberg {
816a6b32bc3SAndreas Gruenbacher 	struct drbd_connection *connection = first_peer_device(device)->connection;
8172a67d8b9SPhilipp Reisner 
818bde89a9eSAndreas Gruenbacher 	clear_bit(GOT_PING_ACK, &connection->flags);
819bde89a9eSAndreas Gruenbacher 	request_ping(connection);
820bde89a9eSAndreas Gruenbacher 	wait_event(connection->ping_wait,
821bde89a9eSAndreas Gruenbacher 		   test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
822af85e8e8SLars Ellenberg }
823af85e8e8SLars Ellenberg 
824b30ab791SAndreas Gruenbacher int drbd_resync_finished(struct drbd_device *device)
825b411b363SPhilipp Reisner {
826b411b363SPhilipp Reisner 	unsigned long db, dt, dbdt;
827b411b363SPhilipp Reisner 	unsigned long n_oos;
828b411b363SPhilipp Reisner 	union drbd_state os, ns;
82984b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
830b411b363SPhilipp Reisner 	char *khelper_cmd = NULL;
83126525618SLars Ellenberg 	int verify_done = 0;
832b411b363SPhilipp Reisner 
833b411b363SPhilipp Reisner 	/* Remove all elements from the resync LRU. Since future actions
834b411b363SPhilipp Reisner 	 * might set bits in the (main) bitmap, then the entries in the
835b411b363SPhilipp Reisner 	 * resync LRU would be wrong. */
836b30ab791SAndreas Gruenbacher 	if (drbd_rs_del_all(device)) {
837b411b363SPhilipp Reisner 		/* In case this is not possible now, most probably because
838b411b363SPhilipp Reisner 		 * there are P_RS_DATA_REPLY Packets lingering on the worker's
839b411b363SPhilipp Reisner 		 * queue (or even the read operations for those packets
840b411b363SPhilipp Reisner 		 * is not finished by now).   Retry in 100ms. */
841b411b363SPhilipp Reisner 
84220ee6390SPhilipp Reisner 		schedule_timeout_interruptible(HZ / 10);
84384b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
84484b8c06bSAndreas Gruenbacher 		if (dw) {
84584b8c06bSAndreas Gruenbacher 			dw->w.cb = w_resync_finished;
84684b8c06bSAndreas Gruenbacher 			dw->device = device;
84784b8c06bSAndreas Gruenbacher 			drbd_queue_work(&first_peer_device(device)->connection->sender_work,
84884b8c06bSAndreas Gruenbacher 					&dw->w);
849b411b363SPhilipp Reisner 			return 1;
850b411b363SPhilipp Reisner 		}
85184b8c06bSAndreas Gruenbacher 		drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
852b411b363SPhilipp Reisner 	}
853b411b363SPhilipp Reisner 
854b30ab791SAndreas Gruenbacher 	dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
855b411b363SPhilipp Reisner 	if (dt <= 0)
856b411b363SPhilipp Reisner 		dt = 1;
85758ffa580SLars Ellenberg 
858b30ab791SAndreas Gruenbacher 	db = device->rs_total;
85958ffa580SLars Ellenberg 	/* adjust for verify start and stop sectors, respective reached position */
860b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
861b30ab791SAndreas Gruenbacher 		db -= device->ov_left;
86258ffa580SLars Ellenberg 
863b411b363SPhilipp Reisner 	dbdt = Bit2KB(db/dt);
864b30ab791SAndreas Gruenbacher 	device->rs_paused /= HZ;
865b411b363SPhilipp Reisner 
866b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
867b411b363SPhilipp Reisner 		goto out;
868b411b363SPhilipp Reisner 
869b30ab791SAndreas Gruenbacher 	ping_peer(device);
870af85e8e8SLars Ellenberg 
8710500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
872b30ab791SAndreas Gruenbacher 	os = drbd_read_state(device);
873b411b363SPhilipp Reisner 
87426525618SLars Ellenberg 	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
87526525618SLars Ellenberg 
876b411b363SPhilipp Reisner 	/* This protects us against multiple calls (that can happen in the presence
877b411b363SPhilipp Reisner 	   of application IO), and against connectivity loss just before we arrive here. */
878b411b363SPhilipp Reisner 	if (os.conn <= C_CONNECTED)
879b411b363SPhilipp Reisner 		goto out_unlock;
880b411b363SPhilipp Reisner 
881b411b363SPhilipp Reisner 	ns = os;
882b411b363SPhilipp Reisner 	ns.conn = C_CONNECTED;
883b411b363SPhilipp Reisner 
884d0180171SAndreas Gruenbacher 	drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
88526525618SLars Ellenberg 	     verify_done ? "Online verify" : "Resync",
886b30ab791SAndreas Gruenbacher 	     dt + device->rs_paused, device->rs_paused, dbdt);
887b411b363SPhilipp Reisner 
888b30ab791SAndreas Gruenbacher 	n_oos = drbd_bm_total_weight(device);
889b411b363SPhilipp Reisner 
890b411b363SPhilipp Reisner 	if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
891b411b363SPhilipp Reisner 		if (n_oos) {
892d0180171SAndreas Gruenbacher 			drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
893b411b363SPhilipp Reisner 			      n_oos, Bit2KB(1));
894b411b363SPhilipp Reisner 			khelper_cmd = "out-of-sync";
895b411b363SPhilipp Reisner 		}
896b411b363SPhilipp Reisner 	} else {
8970b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, (n_oos - device->rs_failed) == 0);
898b411b363SPhilipp Reisner 
899b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
900b411b363SPhilipp Reisner 			khelper_cmd = "after-resync-target";
901b411b363SPhilipp Reisner 
902aaaba345SLars Ellenberg 		if (device->use_csums && device->rs_total) {
903b30ab791SAndreas Gruenbacher 			const unsigned long s = device->rs_same_csum;
904b30ab791SAndreas Gruenbacher 			const unsigned long t = device->rs_total;
905b411b363SPhilipp Reisner 			const int ratio =
906b411b363SPhilipp Reisner 				(t == 0)     ? 0 :
907b411b363SPhilipp Reisner 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
908d0180171SAndreas Gruenbacher 			drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
909b411b363SPhilipp Reisner 			     "transferred %luK total %luK\n",
910b411b363SPhilipp Reisner 			     ratio,
911b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_same_csum),
912b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_total - device->rs_same_csum),
913b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_total));
914b411b363SPhilipp Reisner 		}
915b411b363SPhilipp Reisner 	}
916b411b363SPhilipp Reisner 
917b30ab791SAndreas Gruenbacher 	if (device->rs_failed) {
918d0180171SAndreas Gruenbacher 		drbd_info(device, "            %lu failed blocks\n", device->rs_failed);
919b411b363SPhilipp Reisner 
920b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
921b411b363SPhilipp Reisner 			ns.disk = D_INCONSISTENT;
922b411b363SPhilipp Reisner 			ns.pdsk = D_UP_TO_DATE;
923b411b363SPhilipp Reisner 		} else {
924b411b363SPhilipp Reisner 			ns.disk = D_UP_TO_DATE;
925b411b363SPhilipp Reisner 			ns.pdsk = D_INCONSISTENT;
926b411b363SPhilipp Reisner 		}
927b411b363SPhilipp Reisner 	} else {
928b411b363SPhilipp Reisner 		ns.disk = D_UP_TO_DATE;
929b411b363SPhilipp Reisner 		ns.pdsk = D_UP_TO_DATE;
930b411b363SPhilipp Reisner 
931b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
932b30ab791SAndreas Gruenbacher 			if (device->p_uuid) {
933b411b363SPhilipp Reisner 				int i;
934b411b363SPhilipp Reisner 				for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
935b30ab791SAndreas Gruenbacher 					_drbd_uuid_set(device, i, device->p_uuid[i]);
936b30ab791SAndreas Gruenbacher 				drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
937b30ab791SAndreas Gruenbacher 				_drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
938b411b363SPhilipp Reisner 			} else {
939d0180171SAndreas Gruenbacher 				drbd_err(device, "device->p_uuid is NULL! BUG\n");
940b411b363SPhilipp Reisner 			}
941b411b363SPhilipp Reisner 		}
942b411b363SPhilipp Reisner 
94362b0da3aSLars Ellenberg 		if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
94462b0da3aSLars Ellenberg 			/* for verify runs, we don't update uuids here,
94562b0da3aSLars Ellenberg 			 * so there would be nothing to report. */
946b30ab791SAndreas Gruenbacher 			drbd_uuid_set_bm(device, 0UL);
947b30ab791SAndreas Gruenbacher 			drbd_print_uuids(device, "updated UUIDs");
948b30ab791SAndreas Gruenbacher 			if (device->p_uuid) {
949b411b363SPhilipp Reisner 				/* Now the two UUID sets are equal, update what we
950b411b363SPhilipp Reisner 				 * know of the peer. */
951b411b363SPhilipp Reisner 				int i;
952b411b363SPhilipp Reisner 				for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
953b30ab791SAndreas Gruenbacher 					device->p_uuid[i] = device->ldev->md.uuid[i];
954b411b363SPhilipp Reisner 			}
955b411b363SPhilipp Reisner 		}
95662b0da3aSLars Ellenberg 	}
957b411b363SPhilipp Reisner 
958b30ab791SAndreas Gruenbacher 	_drbd_set_state(device, ns, CS_VERBOSE, NULL);
959b411b363SPhilipp Reisner out_unlock:
9600500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
961b30ab791SAndreas Gruenbacher 	put_ldev(device);
962b411b363SPhilipp Reisner out:
963b30ab791SAndreas Gruenbacher 	device->rs_total  = 0;
964b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
965b30ab791SAndreas Gruenbacher 	device->rs_paused = 0;
96658ffa580SLars Ellenberg 
96758ffa580SLars Ellenberg 	/* reset start sector, if we reached end of device */
968b30ab791SAndreas Gruenbacher 	if (verify_done && device->ov_left == 0)
969b30ab791SAndreas Gruenbacher 		device->ov_start_sector = 0;
970b411b363SPhilipp Reisner 
971b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
97213d42685SLars Ellenberg 
973b411b363SPhilipp Reisner 	if (khelper_cmd)
974b30ab791SAndreas Gruenbacher 		drbd_khelper(device, khelper_cmd);
975b411b363SPhilipp Reisner 
976b411b363SPhilipp Reisner 	return 1;
977b411b363SPhilipp Reisner }
978b411b363SPhilipp Reisner 
979b411b363SPhilipp Reisner /* helper */
980b30ab791SAndreas Gruenbacher static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
981b411b363SPhilipp Reisner {
982045417f7SAndreas Gruenbacher 	if (drbd_peer_req_has_active_page(peer_req)) {
983b411b363SPhilipp Reisner 		/* This might happen if sendpage() has not finished */
984db830c46SAndreas Gruenbacher 		int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
985b30ab791SAndreas Gruenbacher 		atomic_add(i, &device->pp_in_use_by_net);
986b30ab791SAndreas Gruenbacher 		atomic_sub(i, &device->pp_in_use);
9870500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
988a8cd15baSAndreas Gruenbacher 		list_add_tail(&peer_req->w.list, &device->net_ee);
9890500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
990435f0740SLars Ellenberg 		wake_up(&drbd_pp_wait);
991b411b363SPhilipp Reisner 	} else
992b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
993b411b363SPhilipp Reisner }
994b411b363SPhilipp Reisner 
995b411b363SPhilipp Reisner /**
996b411b363SPhilipp Reisner  * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
997b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
998b411b363SPhilipp Reisner  * @w:		work object.
999b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1000b411b363SPhilipp Reisner  */
100199920dc5SAndreas Gruenbacher int w_e_end_data_req(struct drbd_work *w, int cancel)
1002b411b363SPhilipp Reisner {
1003a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
10046780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
10056780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
100699920dc5SAndreas Gruenbacher 	int err;
1007b411b363SPhilipp Reisner 
1008b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1009b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1010b30ab791SAndreas Gruenbacher 		dec_unacked(device);
101199920dc5SAndreas Gruenbacher 		return 0;
1012b411b363SPhilipp Reisner 	}
1013b411b363SPhilipp Reisner 
1014db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
10156780139cSAndreas Gruenbacher 		err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
1016b411b363SPhilipp Reisner 	} else {
1017b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1018d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegDReply. sector=%llus.\n",
1019db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1020b411b363SPhilipp Reisner 
10216780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
1022b411b363SPhilipp Reisner 	}
1023b411b363SPhilipp Reisner 
1024b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1025b411b363SPhilipp Reisner 
1026b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1027b411b363SPhilipp Reisner 
102899920dc5SAndreas Gruenbacher 	if (unlikely(err))
1029d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block() failed\n");
103099920dc5SAndreas Gruenbacher 	return err;
1031b411b363SPhilipp Reisner }
1032b411b363SPhilipp Reisner 
1033b411b363SPhilipp Reisner /**
1034a209b4aeSAndreas Gruenbacher  * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
1035b411b363SPhilipp Reisner  * @w:		work object.
1036b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1037b411b363SPhilipp Reisner  */
103899920dc5SAndreas Gruenbacher int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
1039b411b363SPhilipp Reisner {
1040a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
10416780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
10426780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
104399920dc5SAndreas Gruenbacher 	int err;
1044b411b363SPhilipp Reisner 
1045b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1046b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1047b30ab791SAndreas Gruenbacher 		dec_unacked(device);
104899920dc5SAndreas Gruenbacher 		return 0;
1049b411b363SPhilipp Reisner 	}
1050b411b363SPhilipp Reisner 
1051b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
1052b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1053b30ab791SAndreas Gruenbacher 		put_ldev(device);
1054b411b363SPhilipp Reisner 	}
1055b411b363SPhilipp Reisner 
1056b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_AHEAD) {
10576780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
1058db830c46SAndreas Gruenbacher 	} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1059b30ab791SAndreas Gruenbacher 		if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1060b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
10616780139cSAndreas Gruenbacher 			err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1062b411b363SPhilipp Reisner 		} else {
1063b411b363SPhilipp Reisner 			if (__ratelimit(&drbd_ratelimit_state))
1064d0180171SAndreas Gruenbacher 				drbd_err(device, "Not sending RSDataReply, "
1065b411b363SPhilipp Reisner 				    "partner DISKLESS!\n");
106699920dc5SAndreas Gruenbacher 			err = 0;
1067b411b363SPhilipp Reisner 		}
1068b411b363SPhilipp Reisner 	} else {
1069b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1070d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
1071db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1072b411b363SPhilipp Reisner 
10736780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1074b411b363SPhilipp Reisner 
1075b411b363SPhilipp Reisner 		/* update resync data with failure */
1076b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
1077b411b363SPhilipp Reisner 	}
1078b411b363SPhilipp Reisner 
1079b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1080b411b363SPhilipp Reisner 
1081b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1082b411b363SPhilipp Reisner 
108399920dc5SAndreas Gruenbacher 	if (unlikely(err))
1084d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block() failed\n");
108599920dc5SAndreas Gruenbacher 	return err;
1086b411b363SPhilipp Reisner }
1087b411b363SPhilipp Reisner 
108899920dc5SAndreas Gruenbacher int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
1089b411b363SPhilipp Reisner {
1090a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
10916780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
10926780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1093b411b363SPhilipp Reisner 	struct digest_info *di;
1094b411b363SPhilipp Reisner 	int digest_size;
1095b411b363SPhilipp Reisner 	void *digest = NULL;
109699920dc5SAndreas Gruenbacher 	int err, eq = 0;
1097b411b363SPhilipp Reisner 
1098b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1099b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1100b30ab791SAndreas Gruenbacher 		dec_unacked(device);
110199920dc5SAndreas Gruenbacher 		return 0;
1102b411b363SPhilipp Reisner 	}
1103b411b363SPhilipp Reisner 
1104b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1105b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1106b30ab791SAndreas Gruenbacher 		put_ldev(device);
11071d53f09eSLars Ellenberg 	}
1108b411b363SPhilipp Reisner 
1109db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1110b411b363SPhilipp Reisner 
1111db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1112b411b363SPhilipp Reisner 		/* quick hack to try to avoid a race against reconfiguration.
1113b411b363SPhilipp Reisner 		 * a real fix would be much more involved,
1114b411b363SPhilipp Reisner 		 * introducing more locking mechanisms */
11156780139cSAndreas Gruenbacher 		if (peer_device->connection->csums_tfm) {
11166780139cSAndreas Gruenbacher 			digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
11170b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, digest_size == di->digest_size);
1118b411b363SPhilipp Reisner 			digest = kmalloc(digest_size, GFP_NOIO);
1119b411b363SPhilipp Reisner 		}
1120b411b363SPhilipp Reisner 		if (digest) {
11216780139cSAndreas Gruenbacher 			drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
1122b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1123b411b363SPhilipp Reisner 			kfree(digest);
1124b411b363SPhilipp Reisner 		}
1125b411b363SPhilipp Reisner 
1126b411b363SPhilipp Reisner 		if (eq) {
1127b30ab791SAndreas Gruenbacher 			drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
1128676396d5SLars Ellenberg 			/* rs_same_csums unit is BM_BLOCK_SIZE */
1129b30ab791SAndreas Gruenbacher 			device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
11306780139cSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
1131b411b363SPhilipp Reisner 		} else {
1132b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
1133db830c46SAndreas Gruenbacher 			peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1134db830c46SAndreas Gruenbacher 			peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
1135204bba99SPhilipp Reisner 			kfree(di);
11366780139cSAndreas Gruenbacher 			err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1137b411b363SPhilipp Reisner 		}
1138b411b363SPhilipp Reisner 	} else {
11396780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1140b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1141d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
1142b411b363SPhilipp Reisner 	}
1143b411b363SPhilipp Reisner 
1144b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1145b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1146b411b363SPhilipp Reisner 
114799920dc5SAndreas Gruenbacher 	if (unlikely(err))
1148d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block/ack() failed\n");
114999920dc5SAndreas Gruenbacher 	return err;
1150b411b363SPhilipp Reisner }
1151b411b363SPhilipp Reisner 
115299920dc5SAndreas Gruenbacher int w_e_end_ov_req(struct drbd_work *w, int cancel)
1153b411b363SPhilipp Reisner {
1154a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
11556780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
11566780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1157db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1158db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
1159b411b363SPhilipp Reisner 	int digest_size;
1160b411b363SPhilipp Reisner 	void *digest;
116199920dc5SAndreas Gruenbacher 	int err = 0;
1162b411b363SPhilipp Reisner 
1163b411b363SPhilipp Reisner 	if (unlikely(cancel))
1164b411b363SPhilipp Reisner 		goto out;
1165b411b363SPhilipp Reisner 
11666780139cSAndreas Gruenbacher 	digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
1167b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
11688f21420eSPhilipp Reisner 	if (!digest) {
116999920dc5SAndreas Gruenbacher 		err = 1;	/* terminate the connection in case the allocation failed */
11708f21420eSPhilipp Reisner 		goto out;
11718f21420eSPhilipp Reisner 	}
11728f21420eSPhilipp Reisner 
1173db830c46SAndreas Gruenbacher 	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
11746780139cSAndreas Gruenbacher 		drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
11758f21420eSPhilipp Reisner 	else
11768f21420eSPhilipp Reisner 		memset(digest, 0, digest_size);
11778f21420eSPhilipp Reisner 
117853ea4331SLars Ellenberg 	/* Free e and pages before send.
117953ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
118053ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
118153ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1182c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1183b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
1184db830c46SAndreas Gruenbacher 	peer_req = NULL;
1185b30ab791SAndreas Gruenbacher 	inc_rs_pending(device);
11866780139cSAndreas Gruenbacher 	err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
118799920dc5SAndreas Gruenbacher 	if (err)
1188b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
1189b411b363SPhilipp Reisner 	kfree(digest);
1190b411b363SPhilipp Reisner 
1191b411b363SPhilipp Reisner out:
1192db830c46SAndreas Gruenbacher 	if (peer_req)
1193b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1194b30ab791SAndreas Gruenbacher 	dec_unacked(device);
119599920dc5SAndreas Gruenbacher 	return err;
1196b411b363SPhilipp Reisner }
1197b411b363SPhilipp Reisner 
1198b30ab791SAndreas Gruenbacher void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
1199b411b363SPhilipp Reisner {
1200b30ab791SAndreas Gruenbacher 	if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1201b30ab791SAndreas Gruenbacher 		device->ov_last_oos_size += size>>9;
1202b411b363SPhilipp Reisner 	} else {
1203b30ab791SAndreas Gruenbacher 		device->ov_last_oos_start = sector;
1204b30ab791SAndreas Gruenbacher 		device->ov_last_oos_size = size>>9;
1205b411b363SPhilipp Reisner 	}
1206b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, sector, size);
1207b411b363SPhilipp Reisner }
1208b411b363SPhilipp Reisner 
120999920dc5SAndreas Gruenbacher int w_e_end_ov_reply(struct drbd_work *w, int cancel)
1210b411b363SPhilipp Reisner {
1211a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
12126780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
12136780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1214b411b363SPhilipp Reisner 	struct digest_info *di;
1215b411b363SPhilipp Reisner 	void *digest;
1216db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1217db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
121853ea4331SLars Ellenberg 	int digest_size;
121999920dc5SAndreas Gruenbacher 	int err, eq = 0;
122058ffa580SLars Ellenberg 	bool stop_sector_reached = false;
1221b411b363SPhilipp Reisner 
1222b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1223b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1224b30ab791SAndreas Gruenbacher 		dec_unacked(device);
122599920dc5SAndreas Gruenbacher 		return 0;
1226b411b363SPhilipp Reisner 	}
1227b411b363SPhilipp Reisner 
1228b411b363SPhilipp Reisner 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1229b411b363SPhilipp Reisner 	 * the resync lru has been cleaned up already */
1230b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1231b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1232b30ab791SAndreas Gruenbacher 		put_ldev(device);
12331d53f09eSLars Ellenberg 	}
1234b411b363SPhilipp Reisner 
1235db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1236b411b363SPhilipp Reisner 
1237db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
12386780139cSAndreas Gruenbacher 		digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
1239b411b363SPhilipp Reisner 		digest = kmalloc(digest_size, GFP_NOIO);
1240b411b363SPhilipp Reisner 		if (digest) {
12416780139cSAndreas Gruenbacher 			drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
1242b411b363SPhilipp Reisner 
12430b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, digest_size == di->digest_size);
1244b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1245b411b363SPhilipp Reisner 			kfree(digest);
1246b411b363SPhilipp Reisner 		}
1247b411b363SPhilipp Reisner 	}
1248b411b363SPhilipp Reisner 
12499676c760SLars Ellenberg 	/* Free peer_req and pages before send.
125053ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
125153ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
125253ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1253c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1254b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
1255b411b363SPhilipp Reisner 	if (!eq)
1256b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
1257b411b363SPhilipp Reisner 	else
1258b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
1259b411b363SPhilipp Reisner 
12606780139cSAndreas Gruenbacher 	err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
1261b411b363SPhilipp Reisner 			       eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
1262b411b363SPhilipp Reisner 
1263b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1264b411b363SPhilipp Reisner 
1265b30ab791SAndreas Gruenbacher 	--device->ov_left;
1266ea5442afSLars Ellenberg 
1267ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
1268b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
1269b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
1270ea5442afSLars Ellenberg 
1271b30ab791SAndreas Gruenbacher 	stop_sector_reached = verify_can_do_stop_sector(device) &&
1272b30ab791SAndreas Gruenbacher 		(sector + (size>>9)) >= device->ov_stop_sector;
127358ffa580SLars Ellenberg 
1274b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0 || stop_sector_reached) {
1275b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
1276b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
1277b411b363SPhilipp Reisner 	}
1278b411b363SPhilipp Reisner 
127999920dc5SAndreas Gruenbacher 	return err;
1280b411b363SPhilipp Reisner }
1281b411b363SPhilipp Reisner 
1282b6dd1a89SLars Ellenberg /* FIXME
1283b6dd1a89SLars Ellenberg  * We need to track the number of pending barrier acks,
1284b6dd1a89SLars Ellenberg  * and to be able to wait for them.
1285b6dd1a89SLars Ellenberg  * See also comment in drbd_adm_attach before drbd_suspend_io.
1286b6dd1a89SLars Ellenberg  */
1287bde89a9eSAndreas Gruenbacher static int drbd_send_barrier(struct drbd_connection *connection)
1288b411b363SPhilipp Reisner {
12899f5bdc33SAndreas Gruenbacher 	struct p_barrier *p;
1290b6dd1a89SLars Ellenberg 	struct drbd_socket *sock;
1291b411b363SPhilipp Reisner 
1292bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
1293bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
12949f5bdc33SAndreas Gruenbacher 	if (!p)
12959f5bdc33SAndreas Gruenbacher 		return -EIO;
1296bde89a9eSAndreas Gruenbacher 	p->barrier = connection->send.current_epoch_nr;
1297b6dd1a89SLars Ellenberg 	p->pad = 0;
1298bde89a9eSAndreas Gruenbacher 	connection->send.current_epoch_writes = 0;
129984d34f2fSLars Ellenberg 	connection->send.last_sent_barrier_jif = jiffies;
1300b6dd1a89SLars Ellenberg 
1301bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
1302b411b363SPhilipp Reisner }
1303b411b363SPhilipp Reisner 
130499920dc5SAndreas Gruenbacher int w_send_write_hint(struct drbd_work *w, int cancel)
1305b411b363SPhilipp Reisner {
130684b8c06bSAndreas Gruenbacher 	struct drbd_device *device =
130784b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device, unplug_work);
13089f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
13099f5bdc33SAndreas Gruenbacher 
1310b411b363SPhilipp Reisner 	if (cancel)
131199920dc5SAndreas Gruenbacher 		return 0;
1312a6b32bc3SAndreas Gruenbacher 	sock = &first_peer_device(device)->connection->data;
131369a22773SAndreas Gruenbacher 	if (!drbd_prepare_command(first_peer_device(device), sock))
13149f5bdc33SAndreas Gruenbacher 		return -EIO;
131569a22773SAndreas Gruenbacher 	return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
1316b411b363SPhilipp Reisner }
1317b411b363SPhilipp Reisner 
1318bde89a9eSAndreas Gruenbacher static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
13194eb9b3cbSLars Ellenberg {
1320bde89a9eSAndreas Gruenbacher 	if (!connection->send.seen_any_write_yet) {
1321bde89a9eSAndreas Gruenbacher 		connection->send.seen_any_write_yet = true;
1322bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_nr = epoch;
1323bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_writes = 0;
132484d34f2fSLars Ellenberg 		connection->send.last_sent_barrier_jif = jiffies;
13254eb9b3cbSLars Ellenberg 	}
13264eb9b3cbSLars Ellenberg }
13274eb9b3cbSLars Ellenberg 
1328bde89a9eSAndreas Gruenbacher static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
13294eb9b3cbSLars Ellenberg {
13304eb9b3cbSLars Ellenberg 	/* re-init if first write on this connection */
1331bde89a9eSAndreas Gruenbacher 	if (!connection->send.seen_any_write_yet)
13324eb9b3cbSLars Ellenberg 		return;
1333bde89a9eSAndreas Gruenbacher 	if (connection->send.current_epoch_nr != epoch) {
1334bde89a9eSAndreas Gruenbacher 		if (connection->send.current_epoch_writes)
1335bde89a9eSAndreas Gruenbacher 			drbd_send_barrier(connection);
1336bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_nr = epoch;
13374eb9b3cbSLars Ellenberg 	}
13384eb9b3cbSLars Ellenberg }
13394eb9b3cbSLars Ellenberg 
13408f7bed77SAndreas Gruenbacher int w_send_out_of_sync(struct drbd_work *w, int cancel)
134173a01a18SPhilipp Reisner {
134273a01a18SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
134384b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
134444a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
134544a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device->connection;
134699920dc5SAndreas Gruenbacher 	int err;
134773a01a18SPhilipp Reisner 
134873a01a18SPhilipp Reisner 	if (unlikely(cancel)) {
13498554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
135099920dc5SAndreas Gruenbacher 		return 0;
135173a01a18SPhilipp Reisner 	}
1352e5f891b2SLars Ellenberg 	req->pre_send_jif = jiffies;
135373a01a18SPhilipp Reisner 
1354bde89a9eSAndreas Gruenbacher 	/* this time, no connection->send.current_epoch_writes++;
1355b6dd1a89SLars Ellenberg 	 * If it was sent, it was the closing barrier for the last
1356b6dd1a89SLars Ellenberg 	 * replicated epoch, before we went into AHEAD mode.
1357b6dd1a89SLars Ellenberg 	 * No more barriers will be sent, until we leave AHEAD mode again. */
1358bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1359b6dd1a89SLars Ellenberg 
136044a4d551SLars Ellenberg 	err = drbd_send_out_of_sync(peer_device, req);
13618554df1cSAndreas Gruenbacher 	req_mod(req, OOS_HANDED_TO_NETWORK);
136273a01a18SPhilipp Reisner 
136399920dc5SAndreas Gruenbacher 	return err;
136473a01a18SPhilipp Reisner }
136573a01a18SPhilipp Reisner 
1366b411b363SPhilipp Reisner /**
1367b411b363SPhilipp Reisner  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1368b411b363SPhilipp Reisner  * @w:		work object.
1369b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1370b411b363SPhilipp Reisner  */
137199920dc5SAndreas Gruenbacher int w_send_dblock(struct drbd_work *w, int cancel)
1372b411b363SPhilipp Reisner {
1373b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
137484b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
137544a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
137644a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device->connection;
137799920dc5SAndreas Gruenbacher 	int err;
1378b411b363SPhilipp Reisner 
1379b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
13808554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
138199920dc5SAndreas Gruenbacher 		return 0;
1382b411b363SPhilipp Reisner 	}
1383e5f891b2SLars Ellenberg 	req->pre_send_jif = jiffies;
1384b411b363SPhilipp Reisner 
1385bde89a9eSAndreas Gruenbacher 	re_init_if_first_write(connection, req->epoch);
1386bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1387bde89a9eSAndreas Gruenbacher 	connection->send.current_epoch_writes++;
1388b6dd1a89SLars Ellenberg 
138944a4d551SLars Ellenberg 	err = drbd_send_dblock(peer_device, req);
139099920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1391b411b363SPhilipp Reisner 
139299920dc5SAndreas Gruenbacher 	return err;
1393b411b363SPhilipp Reisner }
1394b411b363SPhilipp Reisner 
1395b411b363SPhilipp Reisner /**
1396b411b363SPhilipp Reisner  * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1397b411b363SPhilipp Reisner  * @w:		work object.
1398b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1399b411b363SPhilipp Reisner  */
140099920dc5SAndreas Gruenbacher int w_send_read_req(struct drbd_work *w, int cancel)
1401b411b363SPhilipp Reisner {
1402b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
140384b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
140444a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
140544a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device->connection;
140699920dc5SAndreas Gruenbacher 	int err;
1407b411b363SPhilipp Reisner 
1408b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
14098554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
141099920dc5SAndreas Gruenbacher 		return 0;
1411b411b363SPhilipp Reisner 	}
1412e5f891b2SLars Ellenberg 	req->pre_send_jif = jiffies;
1413b411b363SPhilipp Reisner 
1414b6dd1a89SLars Ellenberg 	/* Even read requests may close a write epoch,
1415b6dd1a89SLars Ellenberg 	 * if there was any yet. */
1416bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1417b6dd1a89SLars Ellenberg 
141844a4d551SLars Ellenberg 	err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
1419b411b363SPhilipp Reisner 				 (unsigned long)req);
1420b411b363SPhilipp Reisner 
142199920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1422b411b363SPhilipp Reisner 
142399920dc5SAndreas Gruenbacher 	return err;
1424b411b363SPhilipp Reisner }
1425b411b363SPhilipp Reisner 
142699920dc5SAndreas Gruenbacher int w_restart_disk_io(struct drbd_work *w, int cancel)
1427265be2d0SPhilipp Reisner {
1428265be2d0SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
142984b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
1430265be2d0SPhilipp Reisner 
14310778286aSPhilipp Reisner 	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
14324dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &req->i);
1433265be2d0SPhilipp Reisner 
1434265be2d0SPhilipp Reisner 	drbd_req_make_private_bio(req, req->master_bio);
1435b30ab791SAndreas Gruenbacher 	req->private_bio->bi_bdev = device->ldev->backing_bdev;
1436265be2d0SPhilipp Reisner 	generic_make_request(req->private_bio);
1437265be2d0SPhilipp Reisner 
143899920dc5SAndreas Gruenbacher 	return 0;
1439265be2d0SPhilipp Reisner }
1440265be2d0SPhilipp Reisner 
1441b30ab791SAndreas Gruenbacher static int _drbd_may_sync_now(struct drbd_device *device)
1442b411b363SPhilipp Reisner {
1443b30ab791SAndreas Gruenbacher 	struct drbd_device *odev = device;
144495f8efd0SAndreas Gruenbacher 	int resync_after;
1445b411b363SPhilipp Reisner 
1446b411b363SPhilipp Reisner 	while (1) {
1447a3f8f7dcSLars Ellenberg 		if (!odev->ldev || odev->state.disk == D_DISKLESS)
1448438c8374SPhilipp Reisner 			return 1;
1449daeda1ccSPhilipp Reisner 		rcu_read_lock();
145095f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1451daeda1ccSPhilipp Reisner 		rcu_read_unlock();
145295f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1453b411b363SPhilipp Reisner 			return 1;
1454b30ab791SAndreas Gruenbacher 		odev = minor_to_device(resync_after);
1455a3f8f7dcSLars Ellenberg 		if (!odev)
1456841ce241SAndreas Gruenbacher 			return 1;
1457b411b363SPhilipp Reisner 		if ((odev->state.conn >= C_SYNC_SOURCE &&
1458b411b363SPhilipp Reisner 		     odev->state.conn <= C_PAUSED_SYNC_T) ||
1459b411b363SPhilipp Reisner 		    odev->state.aftr_isp || odev->state.peer_isp ||
1460b411b363SPhilipp Reisner 		    odev->state.user_isp)
1461b411b363SPhilipp Reisner 			return 0;
1462b411b363SPhilipp Reisner 	}
1463b411b363SPhilipp Reisner }
1464b411b363SPhilipp Reisner 
1465b411b363SPhilipp Reisner /**
146628bc3b8cSAndreas Gruenbacher  * drbd_pause_after() - Pause resync on all devices that may not resync now
1467b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1468b411b363SPhilipp Reisner  *
1469b411b363SPhilipp Reisner  * Called from process context only (admin command and after_state_ch).
1470b411b363SPhilipp Reisner  */
147128bc3b8cSAndreas Gruenbacher static bool drbd_pause_after(struct drbd_device *device)
1472b411b363SPhilipp Reisner {
147328bc3b8cSAndreas Gruenbacher 	bool changed = false;
147454761697SAndreas Gruenbacher 	struct drbd_device *odev;
147528bc3b8cSAndreas Gruenbacher 	int i;
1476b411b363SPhilipp Reisner 
1477695d08faSPhilipp Reisner 	rcu_read_lock();
147805a10ec7SAndreas Gruenbacher 	idr_for_each_entry(&drbd_devices, odev, i) {
1479b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1480b411b363SPhilipp Reisner 			continue;
148128bc3b8cSAndreas Gruenbacher 		if (!_drbd_may_sync_now(odev) &&
148228bc3b8cSAndreas Gruenbacher 		    _drbd_set_state(_NS(odev, aftr_isp, 1),
148328bc3b8cSAndreas Gruenbacher 				    CS_HARD, NULL) != SS_NOTHING_TO_DO)
148428bc3b8cSAndreas Gruenbacher 			changed = true;
1485b411b363SPhilipp Reisner 	}
1486695d08faSPhilipp Reisner 	rcu_read_unlock();
1487b411b363SPhilipp Reisner 
148828bc3b8cSAndreas Gruenbacher 	return changed;
1489b411b363SPhilipp Reisner }
1490b411b363SPhilipp Reisner 
1491b411b363SPhilipp Reisner /**
149228bc3b8cSAndreas Gruenbacher  * drbd_resume_next() - Resume resync on all devices that may resync now
1493b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1494b411b363SPhilipp Reisner  *
1495b411b363SPhilipp Reisner  * Called from process context only (admin command and worker).
1496b411b363SPhilipp Reisner  */
149728bc3b8cSAndreas Gruenbacher static bool drbd_resume_next(struct drbd_device *device)
1498b411b363SPhilipp Reisner {
149928bc3b8cSAndreas Gruenbacher 	bool changed = false;
150054761697SAndreas Gruenbacher 	struct drbd_device *odev;
150128bc3b8cSAndreas Gruenbacher 	int i;
1502b411b363SPhilipp Reisner 
1503695d08faSPhilipp Reisner 	rcu_read_lock();
150405a10ec7SAndreas Gruenbacher 	idr_for_each_entry(&drbd_devices, odev, i) {
1505b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1506b411b363SPhilipp Reisner 			continue;
1507b411b363SPhilipp Reisner 		if (odev->state.aftr_isp) {
150828bc3b8cSAndreas Gruenbacher 			if (_drbd_may_sync_now(odev) &&
150928bc3b8cSAndreas Gruenbacher 			    _drbd_set_state(_NS(odev, aftr_isp, 0),
151028bc3b8cSAndreas Gruenbacher 					    CS_HARD, NULL) != SS_NOTHING_TO_DO)
151128bc3b8cSAndreas Gruenbacher 				changed = true;
1512b411b363SPhilipp Reisner 		}
1513b411b363SPhilipp Reisner 	}
1514695d08faSPhilipp Reisner 	rcu_read_unlock();
151528bc3b8cSAndreas Gruenbacher 	return changed;
1516b411b363SPhilipp Reisner }
1517b411b363SPhilipp Reisner 
1518b30ab791SAndreas Gruenbacher void resume_next_sg(struct drbd_device *device)
1519b411b363SPhilipp Reisner {
152028bc3b8cSAndreas Gruenbacher 	lock_all_resources();
152128bc3b8cSAndreas Gruenbacher 	drbd_resume_next(device);
152228bc3b8cSAndreas Gruenbacher 	unlock_all_resources();
1523b411b363SPhilipp Reisner }
1524b411b363SPhilipp Reisner 
1525b30ab791SAndreas Gruenbacher void suspend_other_sg(struct drbd_device *device)
1526b411b363SPhilipp Reisner {
152728bc3b8cSAndreas Gruenbacher 	lock_all_resources();
152828bc3b8cSAndreas Gruenbacher 	drbd_pause_after(device);
152928bc3b8cSAndreas Gruenbacher 	unlock_all_resources();
1530b411b363SPhilipp Reisner }
1531b411b363SPhilipp Reisner 
153228bc3b8cSAndreas Gruenbacher /* caller must lock_all_resources() */
1533b30ab791SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
1534b411b363SPhilipp Reisner {
153554761697SAndreas Gruenbacher 	struct drbd_device *odev;
153695f8efd0SAndreas Gruenbacher 	int resync_after;
1537b411b363SPhilipp Reisner 
1538b411b363SPhilipp Reisner 	if (o_minor == -1)
1539b411b363SPhilipp Reisner 		return NO_ERROR;
1540a3f8f7dcSLars Ellenberg 	if (o_minor < -1 || o_minor > MINORMASK)
154195f8efd0SAndreas Gruenbacher 		return ERR_RESYNC_AFTER;
1542b411b363SPhilipp Reisner 
1543b411b363SPhilipp Reisner 	/* check for loops */
1544b30ab791SAndreas Gruenbacher 	odev = minor_to_device(o_minor);
1545b411b363SPhilipp Reisner 	while (1) {
1546b30ab791SAndreas Gruenbacher 		if (odev == device)
154795f8efd0SAndreas Gruenbacher 			return ERR_RESYNC_AFTER_CYCLE;
1548b411b363SPhilipp Reisner 
1549a3f8f7dcSLars Ellenberg 		/* You are free to depend on diskless, non-existing,
1550a3f8f7dcSLars Ellenberg 		 * or not yet/no longer existing minors.
1551a3f8f7dcSLars Ellenberg 		 * We only reject dependency loops.
1552a3f8f7dcSLars Ellenberg 		 * We cannot follow the dependency chain beyond a detached or
1553a3f8f7dcSLars Ellenberg 		 * missing minor.
1554a3f8f7dcSLars Ellenberg 		 */
1555a3f8f7dcSLars Ellenberg 		if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1556a3f8f7dcSLars Ellenberg 			return NO_ERROR;
1557a3f8f7dcSLars Ellenberg 
1558daeda1ccSPhilipp Reisner 		rcu_read_lock();
155995f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1560daeda1ccSPhilipp Reisner 		rcu_read_unlock();
1561b411b363SPhilipp Reisner 		/* dependency chain ends here, no cycles. */
156295f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1563b411b363SPhilipp Reisner 			return NO_ERROR;
1564b411b363SPhilipp Reisner 
1565b411b363SPhilipp Reisner 		/* follow the dependency chain */
1566b30ab791SAndreas Gruenbacher 		odev = minor_to_device(resync_after);
1567b411b363SPhilipp Reisner 	}
1568b411b363SPhilipp Reisner }
1569b411b363SPhilipp Reisner 
157028bc3b8cSAndreas Gruenbacher /* caller must lock_all_resources() */
1571b30ab791SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_device *device)
1572b411b363SPhilipp Reisner {
157328bc3b8cSAndreas Gruenbacher 	int changed;
1574b411b363SPhilipp Reisner 
1575b411b363SPhilipp Reisner 	do {
157628bc3b8cSAndreas Gruenbacher 		changed  = drbd_pause_after(device);
157728bc3b8cSAndreas Gruenbacher 		changed |= drbd_resume_next(device);
157828bc3b8cSAndreas Gruenbacher 	} while (changed);
1579b411b363SPhilipp Reisner }
1580b411b363SPhilipp Reisner 
1581b30ab791SAndreas Gruenbacher void drbd_rs_controller_reset(struct drbd_device *device)
15829bd28d3cSLars Ellenberg {
1583ff8bd88bSLars Ellenberg 	struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
1584813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
1585813472ceSPhilipp Reisner 
1586b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_sect_in, 0);
1587b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_sect_ev, 0);
1588b30ab791SAndreas Gruenbacher 	device->rs_in_flight = 0;
1589ff8bd88bSLars Ellenberg 	device->rs_last_events =
1590ff8bd88bSLars Ellenberg 		(int)part_stat_read(&disk->part0, sectors[0]) +
1591ff8bd88bSLars Ellenberg 		(int)part_stat_read(&disk->part0, sectors[1]);
1592813472ceSPhilipp Reisner 
1593813472ceSPhilipp Reisner 	/* Updating the RCU protected object in place is necessary since
1594813472ceSPhilipp Reisner 	   this function gets called from atomic context.
1595813472ceSPhilipp Reisner 	   It is valid since all other updates also lead to an completely
1596813472ceSPhilipp Reisner 	   empty fifo */
1597813472ceSPhilipp Reisner 	rcu_read_lock();
1598b30ab791SAndreas Gruenbacher 	plan = rcu_dereference(device->rs_plan_s);
1599813472ceSPhilipp Reisner 	plan->total = 0;
1600813472ceSPhilipp Reisner 	fifo_set(plan, 0);
1601813472ceSPhilipp Reisner 	rcu_read_unlock();
16029bd28d3cSLars Ellenberg }
16039bd28d3cSLars Ellenberg 
16041f04af33SPhilipp Reisner void start_resync_timer_fn(unsigned long data)
16051f04af33SPhilipp Reisner {
1606b30ab791SAndreas Gruenbacher 	struct drbd_device *device = (struct drbd_device *) data;
1607ac0acb9eSLars Ellenberg 	drbd_device_post_work(device, RS_START);
16081f04af33SPhilipp Reisner }
16091f04af33SPhilipp Reisner 
1610ac0acb9eSLars Ellenberg static void do_start_resync(struct drbd_device *device)
16111f04af33SPhilipp Reisner {
1612b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
1613ac0acb9eSLars Ellenberg 		drbd_warn(device, "postponing start_resync ...\n");
1614b30ab791SAndreas Gruenbacher 		device->start_resync_timer.expires = jiffies + HZ/10;
1615b30ab791SAndreas Gruenbacher 		add_timer(&device->start_resync_timer);
1616ac0acb9eSLars Ellenberg 		return;
16171f04af33SPhilipp Reisner 	}
16181f04af33SPhilipp Reisner 
1619b30ab791SAndreas Gruenbacher 	drbd_start_resync(device, C_SYNC_SOURCE);
1620b30ab791SAndreas Gruenbacher 	clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
16211f04af33SPhilipp Reisner }
16221f04af33SPhilipp Reisner 
1623aaaba345SLars Ellenberg static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1624aaaba345SLars Ellenberg {
1625aaaba345SLars Ellenberg 	bool csums_after_crash_only;
1626aaaba345SLars Ellenberg 	rcu_read_lock();
1627aaaba345SLars Ellenberg 	csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1628aaaba345SLars Ellenberg 	rcu_read_unlock();
1629aaaba345SLars Ellenberg 	return connection->agreed_pro_version >= 89 &&		/* supported? */
1630aaaba345SLars Ellenberg 		connection->csums_tfm &&			/* configured? */
1631aaaba345SLars Ellenberg 		(csums_after_crash_only == 0			/* use for each resync? */
1632aaaba345SLars Ellenberg 		 || test_bit(CRASHED_PRIMARY, &device->flags));	/* or only after Primary crash? */
1633aaaba345SLars Ellenberg }
1634aaaba345SLars Ellenberg 
1635b411b363SPhilipp Reisner /**
1636b411b363SPhilipp Reisner  * drbd_start_resync() - Start the resync process
1637b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1638b411b363SPhilipp Reisner  * @side:	Either C_SYNC_SOURCE or C_SYNC_TARGET
1639b411b363SPhilipp Reisner  *
1640b411b363SPhilipp Reisner  * This function might bring you directly into one of the
1641b411b363SPhilipp Reisner  * C_PAUSED_SYNC_* states.
1642b411b363SPhilipp Reisner  */
1643b30ab791SAndreas Gruenbacher void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1644b411b363SPhilipp Reisner {
164544a4d551SLars Ellenberg 	struct drbd_peer_device *peer_device = first_peer_device(device);
164644a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1647b411b363SPhilipp Reisner 	union drbd_state ns;
1648b411b363SPhilipp Reisner 	int r;
1649b411b363SPhilipp Reisner 
1650b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
1651d0180171SAndreas Gruenbacher 		drbd_err(device, "Resync already running!\n");
1652b411b363SPhilipp Reisner 		return;
1653b411b363SPhilipp Reisner 	}
1654b411b363SPhilipp Reisner 
1655b30ab791SAndreas Gruenbacher 	if (!test_bit(B_RS_H_DONE, &device->flags)) {
1656b411b363SPhilipp Reisner 		if (side == C_SYNC_TARGET) {
1657b411b363SPhilipp Reisner 			/* Since application IO was locked out during C_WF_BITMAP_T and
1658b411b363SPhilipp Reisner 			   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1659b411b363SPhilipp Reisner 			   we check that we might make the data inconsistent. */
1660b30ab791SAndreas Gruenbacher 			r = drbd_khelper(device, "before-resync-target");
1661b411b363SPhilipp Reisner 			r = (r >> 8) & 0xff;
1662b411b363SPhilipp Reisner 			if (r > 0) {
1663d0180171SAndreas Gruenbacher 				drbd_info(device, "before-resync-target handler returned %d, "
1664b411b363SPhilipp Reisner 					 "dropping connection.\n", r);
166544a4d551SLars Ellenberg 				conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
1666b411b363SPhilipp Reisner 				return;
1667b411b363SPhilipp Reisner 			}
166809b9e797SPhilipp Reisner 		} else /* C_SYNC_SOURCE */ {
1669b30ab791SAndreas Gruenbacher 			r = drbd_khelper(device, "before-resync-source");
167009b9e797SPhilipp Reisner 			r = (r >> 8) & 0xff;
167109b9e797SPhilipp Reisner 			if (r > 0) {
167209b9e797SPhilipp Reisner 				if (r == 3) {
1673d0180171SAndreas Gruenbacher 					drbd_info(device, "before-resync-source handler returned %d, "
167409b9e797SPhilipp Reisner 						 "ignoring. Old userland tools?", r);
167509b9e797SPhilipp Reisner 				} else {
1676d0180171SAndreas Gruenbacher 					drbd_info(device, "before-resync-source handler returned %d, "
167709b9e797SPhilipp Reisner 						 "dropping connection.\n", r);
167844a4d551SLars Ellenberg 					conn_request_state(connection,
1679a6b32bc3SAndreas Gruenbacher 							   NS(conn, C_DISCONNECTING), CS_HARD);
168009b9e797SPhilipp Reisner 					return;
168109b9e797SPhilipp Reisner 				}
168209b9e797SPhilipp Reisner 			}
1683b411b363SPhilipp Reisner 		}
1684e64a3294SPhilipp Reisner 	}
1685b411b363SPhilipp Reisner 
168644a4d551SLars Ellenberg 	if (current == connection->worker.task) {
1687dad20554SPhilipp Reisner 		/* The worker should not sleep waiting for state_mutex,
1688e64a3294SPhilipp Reisner 		   that can take long */
1689b30ab791SAndreas Gruenbacher 		if (!mutex_trylock(device->state_mutex)) {
1690b30ab791SAndreas Gruenbacher 			set_bit(B_RS_H_DONE, &device->flags);
1691b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ/5;
1692b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
1693e64a3294SPhilipp Reisner 			return;
1694e64a3294SPhilipp Reisner 		}
1695e64a3294SPhilipp Reisner 	} else {
1696b30ab791SAndreas Gruenbacher 		mutex_lock(device->state_mutex);
1697e64a3294SPhilipp Reisner 	}
1698b411b363SPhilipp Reisner 
169928bc3b8cSAndreas Gruenbacher 	lock_all_resources();
170028bc3b8cSAndreas Gruenbacher 	clear_bit(B_RS_H_DONE, &device->flags);
1701a700471bSPhilipp Reisner 	/* Did some connection breakage or IO error race with us? */
1702b30ab791SAndreas Gruenbacher 	if (device->state.conn < C_CONNECTED
1703b30ab791SAndreas Gruenbacher 	|| !get_ldev_if_state(device, D_NEGOTIATING)) {
170428bc3b8cSAndreas Gruenbacher 		unlock_all_resources();
170528bc3b8cSAndreas Gruenbacher 		goto out;
1706b411b363SPhilipp Reisner 	}
1707b411b363SPhilipp Reisner 
1708b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
1709b411b363SPhilipp Reisner 
1710b30ab791SAndreas Gruenbacher 	ns.aftr_isp = !_drbd_may_sync_now(device);
1711b411b363SPhilipp Reisner 
1712b411b363SPhilipp Reisner 	ns.conn = side;
1713b411b363SPhilipp Reisner 
1714b411b363SPhilipp Reisner 	if (side == C_SYNC_TARGET)
1715b411b363SPhilipp Reisner 		ns.disk = D_INCONSISTENT;
1716b411b363SPhilipp Reisner 	else /* side == C_SYNC_SOURCE */
1717b411b363SPhilipp Reisner 		ns.pdsk = D_INCONSISTENT;
1718b411b363SPhilipp Reisner 
171928bc3b8cSAndreas Gruenbacher 	r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1720b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
1721b411b363SPhilipp Reisner 
1722b411b363SPhilipp Reisner 	if (ns.conn < C_CONNECTED)
1723b411b363SPhilipp Reisner 		r = SS_UNKNOWN_ERROR;
1724b411b363SPhilipp Reisner 
1725b411b363SPhilipp Reisner 	if (r == SS_SUCCESS) {
1726b30ab791SAndreas Gruenbacher 		unsigned long tw = drbd_bm_total_weight(device);
17271d7734a0SLars Ellenberg 		unsigned long now = jiffies;
17281d7734a0SLars Ellenberg 		int i;
17291d7734a0SLars Ellenberg 
1730b30ab791SAndreas Gruenbacher 		device->rs_failed    = 0;
1731b30ab791SAndreas Gruenbacher 		device->rs_paused    = 0;
1732b30ab791SAndreas Gruenbacher 		device->rs_same_csum = 0;
1733b30ab791SAndreas Gruenbacher 		device->rs_last_sect_ev = 0;
1734b30ab791SAndreas Gruenbacher 		device->rs_total     = tw;
1735b30ab791SAndreas Gruenbacher 		device->rs_start     = now;
17361d7734a0SLars Ellenberg 		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1737b30ab791SAndreas Gruenbacher 			device->rs_mark_left[i] = tw;
1738b30ab791SAndreas Gruenbacher 			device->rs_mark_time[i] = now;
17391d7734a0SLars Ellenberg 		}
174028bc3b8cSAndreas Gruenbacher 		drbd_pause_after(device);
17415ab7d2c0SLars Ellenberg 		/* Forget potentially stale cached per resync extent bit-counts.
17425ab7d2c0SLars Ellenberg 		 * Open coded drbd_rs_cancel_all(device), we already have IRQs
17435ab7d2c0SLars Ellenberg 		 * disabled, and know the disk state is ok. */
17445ab7d2c0SLars Ellenberg 		spin_lock(&device->al_lock);
17455ab7d2c0SLars Ellenberg 		lc_reset(device->resync);
17465ab7d2c0SLars Ellenberg 		device->resync_locked = 0;
17475ab7d2c0SLars Ellenberg 		device->resync_wenr = LC_FREE;
17485ab7d2c0SLars Ellenberg 		spin_unlock(&device->al_lock);
1749b411b363SPhilipp Reisner 	}
175028bc3b8cSAndreas Gruenbacher 	unlock_all_resources();
17515a22db89SLars Ellenberg 
17526c922ed5SLars Ellenberg 	if (r == SS_SUCCESS) {
17535ab7d2c0SLars Ellenberg 		wake_up(&device->al_wait); /* for lc_reset() above */
1754328e0f12SPhilipp Reisner 		/* reset rs_last_bcast when a resync or verify is started,
1755328e0f12SPhilipp Reisner 		 * to deal with potential jiffies wrap. */
1756b30ab791SAndreas Gruenbacher 		device->rs_last_bcast = jiffies - HZ;
1757328e0f12SPhilipp Reisner 
1758d0180171SAndreas Gruenbacher 		drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
17596c922ed5SLars Ellenberg 		     drbd_conn_str(ns.conn),
1760b30ab791SAndreas Gruenbacher 		     (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1761b30ab791SAndreas Gruenbacher 		     (unsigned long) device->rs_total);
1762aaaba345SLars Ellenberg 		if (side == C_SYNC_TARGET) {
1763b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = 0;
1764aaaba345SLars Ellenberg 			device->use_csums = use_checksum_based_resync(connection, device);
1765aaaba345SLars Ellenberg 		} else {
1766aaaba345SLars Ellenberg 			device->use_csums = 0;
1767aaaba345SLars Ellenberg 		}
17685a22db89SLars Ellenberg 
17695a22db89SLars Ellenberg 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
17705a22db89SLars Ellenberg 		 * with w_send_oos, or the sync target will get confused as to
17715a22db89SLars Ellenberg 		 * how much bits to resync.  We cannot do that always, because for an
17725a22db89SLars Ellenberg 		 * empty resync and protocol < 95, we need to do it here, as we call
17735a22db89SLars Ellenberg 		 * drbd_resync_finished from here in that case.
17745a22db89SLars Ellenberg 		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
17755a22db89SLars Ellenberg 		 * and from after_state_ch otherwise. */
177644a4d551SLars Ellenberg 		if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
177744a4d551SLars Ellenberg 			drbd_gen_and_send_sync_uuid(peer_device);
1778b411b363SPhilipp Reisner 
177944a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
1780af85e8e8SLars Ellenberg 			/* This still has a race (about when exactly the peers
1781af85e8e8SLars Ellenberg 			 * detect connection loss) that can lead to a full sync
1782af85e8e8SLars Ellenberg 			 * on next handshake. In 8.3.9 we fixed this with explicit
1783af85e8e8SLars Ellenberg 			 * resync-finished notifications, but the fix
1784af85e8e8SLars Ellenberg 			 * introduces a protocol change.  Sleeping for some
1785af85e8e8SLars Ellenberg 			 * time longer than the ping interval + timeout on the
1786af85e8e8SLars Ellenberg 			 * SyncSource, to give the SyncTarget the chance to
1787af85e8e8SLars Ellenberg 			 * detect connection loss, then waiting for a ping
1788af85e8e8SLars Ellenberg 			 * response (implicit in drbd_resync_finished) reduces
1789af85e8e8SLars Ellenberg 			 * the race considerably, but does not solve it. */
179044ed167dSPhilipp Reisner 			if (side == C_SYNC_SOURCE) {
179144ed167dSPhilipp Reisner 				struct net_conf *nc;
179244ed167dSPhilipp Reisner 				int timeo;
179344ed167dSPhilipp Reisner 
179444ed167dSPhilipp Reisner 				rcu_read_lock();
179544a4d551SLars Ellenberg 				nc = rcu_dereference(connection->net_conf);
179644ed167dSPhilipp Reisner 				timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
179744ed167dSPhilipp Reisner 				rcu_read_unlock();
179844ed167dSPhilipp Reisner 				schedule_timeout_interruptible(timeo);
179944ed167dSPhilipp Reisner 			}
1800b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
1801b411b363SPhilipp Reisner 		}
1802b411b363SPhilipp Reisner 
1803b30ab791SAndreas Gruenbacher 		drbd_rs_controller_reset(device);
1804b30ab791SAndreas Gruenbacher 		/* ns.conn may already be != device->state.conn,
1805b411b363SPhilipp Reisner 		 * we may have been paused in between, or become paused until
1806b411b363SPhilipp Reisner 		 * the timer triggers.
1807b411b363SPhilipp Reisner 		 * No matter, that is handled in resync_timer_fn() */
1808b411b363SPhilipp Reisner 		if (ns.conn == C_SYNC_TARGET)
1809b30ab791SAndreas Gruenbacher 			mod_timer(&device->resync_timer, jiffies);
1810b411b363SPhilipp Reisner 
1811b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
1812b411b363SPhilipp Reisner 	}
1813b30ab791SAndreas Gruenbacher 	put_ldev(device);
181428bc3b8cSAndreas Gruenbacher out:
1815b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
1816b411b363SPhilipp Reisner }
1817b411b363SPhilipp Reisner 
1818e334f550SLars Ellenberg static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
1819c7a58db4SLars Ellenberg {
1820c7a58db4SLars Ellenberg 	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1821c7a58db4SLars Ellenberg 	device->rs_last_bcast = jiffies;
1822c7a58db4SLars Ellenberg 
1823c7a58db4SLars Ellenberg 	if (!get_ldev(device))
1824c7a58db4SLars Ellenberg 		return;
1825c7a58db4SLars Ellenberg 
1826c7a58db4SLars Ellenberg 	drbd_bm_write_lazy(device, 0);
18275ab7d2c0SLars Ellenberg 	if (resync_done && is_sync_state(device->state.conn))
1828c7a58db4SLars Ellenberg 		drbd_resync_finished(device);
18295ab7d2c0SLars Ellenberg 
1830c7a58db4SLars Ellenberg 	drbd_bcast_event(device, &sib);
1831c7a58db4SLars Ellenberg 	/* update timestamp, in case it took a while to write out stuff */
1832c7a58db4SLars Ellenberg 	device->rs_last_bcast = jiffies;
1833c7a58db4SLars Ellenberg 	put_ldev(device);
1834c7a58db4SLars Ellenberg }
1835c7a58db4SLars Ellenberg 
1836e334f550SLars Ellenberg static void drbd_ldev_destroy(struct drbd_device *device)
1837e334f550SLars Ellenberg {
1838e334f550SLars Ellenberg 	lc_destroy(device->resync);
1839e334f550SLars Ellenberg 	device->resync = NULL;
1840e334f550SLars Ellenberg 	lc_destroy(device->act_log);
1841e334f550SLars Ellenberg 	device->act_log = NULL;
1842d1b80853SAndreas Gruenbacher 
1843d1b80853SAndreas Gruenbacher 	__acquire(local);
184463a7c8adSLars Ellenberg 	drbd_backing_dev_free(device, device->ldev);
1845d1b80853SAndreas Gruenbacher 	device->ldev = NULL;
1846d1b80853SAndreas Gruenbacher 	__release(local);
1847d1b80853SAndreas Gruenbacher 
1848e334f550SLars Ellenberg 	clear_bit(GOING_DISKLESS, &device->flags);
1849e334f550SLars Ellenberg 	wake_up(&device->misc_wait);
1850e334f550SLars Ellenberg }
1851e334f550SLars Ellenberg 
1852e334f550SLars Ellenberg static void go_diskless(struct drbd_device *device)
1853e334f550SLars Ellenberg {
1854e334f550SLars Ellenberg 	D_ASSERT(device, device->state.disk == D_FAILED);
1855e334f550SLars Ellenberg 	/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1856e334f550SLars Ellenberg 	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1857e334f550SLars Ellenberg 	 * the protected members anymore, though, so once put_ldev reaches zero
1858e334f550SLars Ellenberg 	 * again, it will be safe to free them. */
1859e334f550SLars Ellenberg 
1860e334f550SLars Ellenberg 	/* Try to write changed bitmap pages, read errors may have just
1861e334f550SLars Ellenberg 	 * set some bits outside the area covered by the activity log.
1862e334f550SLars Ellenberg 	 *
1863e334f550SLars Ellenberg 	 * If we have an IO error during the bitmap writeout,
1864e334f550SLars Ellenberg 	 * we will want a full sync next time, just in case.
1865e334f550SLars Ellenberg 	 * (Do we want a specific meta data flag for this?)
1866e334f550SLars Ellenberg 	 *
1867e334f550SLars Ellenberg 	 * If that does not make it to stable storage either,
1868e334f550SLars Ellenberg 	 * we cannot do anything about that anymore.
1869e334f550SLars Ellenberg 	 *
1870e334f550SLars Ellenberg 	 * We still need to check if both bitmap and ldev are present, we may
1871e334f550SLars Ellenberg 	 * end up here after a failed attach, before ldev was even assigned.
1872e334f550SLars Ellenberg 	 */
1873e334f550SLars Ellenberg 	if (device->bitmap && device->ldev) {
1874e334f550SLars Ellenberg 		/* An interrupted resync or similar is allowed to recounts bits
1875e334f550SLars Ellenberg 		 * while we detach.
1876e334f550SLars Ellenberg 		 * Any modifications would not be expected anymore, though.
1877e334f550SLars Ellenberg 		 */
1878e334f550SLars Ellenberg 		if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1879e334f550SLars Ellenberg 					"detach", BM_LOCKED_TEST_ALLOWED)) {
1880e334f550SLars Ellenberg 			if (test_bit(WAS_READ_ERROR, &device->flags)) {
1881e334f550SLars Ellenberg 				drbd_md_set_flag(device, MDF_FULL_SYNC);
1882e334f550SLars Ellenberg 				drbd_md_sync(device);
1883e334f550SLars Ellenberg 			}
1884e334f550SLars Ellenberg 		}
1885e334f550SLars Ellenberg 	}
1886e334f550SLars Ellenberg 
1887e334f550SLars Ellenberg 	drbd_force_state(device, NS(disk, D_DISKLESS));
1888e334f550SLars Ellenberg }
1889e334f550SLars Ellenberg 
1890ac0acb9eSLars Ellenberg static int do_md_sync(struct drbd_device *device)
1891ac0acb9eSLars Ellenberg {
1892ac0acb9eSLars Ellenberg 	drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1893ac0acb9eSLars Ellenberg 	drbd_md_sync(device);
1894ac0acb9eSLars Ellenberg 	return 0;
1895ac0acb9eSLars Ellenberg }
1896ac0acb9eSLars Ellenberg 
1897944410e9SLars Ellenberg /* only called from drbd_worker thread, no locking */
1898944410e9SLars Ellenberg void __update_timing_details(
1899944410e9SLars Ellenberg 		struct drbd_thread_timing_details *tdp,
1900944410e9SLars Ellenberg 		unsigned int *cb_nr,
1901944410e9SLars Ellenberg 		void *cb,
1902944410e9SLars Ellenberg 		const char *fn, const unsigned int line)
1903944410e9SLars Ellenberg {
1904944410e9SLars Ellenberg 	unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
1905944410e9SLars Ellenberg 	struct drbd_thread_timing_details *td = tdp + i;
1906944410e9SLars Ellenberg 
1907944410e9SLars Ellenberg 	td->start_jif = jiffies;
1908944410e9SLars Ellenberg 	td->cb_addr = cb;
1909944410e9SLars Ellenberg 	td->caller_fn = fn;
1910944410e9SLars Ellenberg 	td->line = line;
1911944410e9SLars Ellenberg 	td->cb_nr = *cb_nr;
1912944410e9SLars Ellenberg 
1913944410e9SLars Ellenberg 	i = (i+1) % DRBD_THREAD_DETAILS_HIST;
1914944410e9SLars Ellenberg 	td = tdp + i;
1915944410e9SLars Ellenberg 	memset(td, 0, sizeof(*td));
1916944410e9SLars Ellenberg 
1917944410e9SLars Ellenberg 	++(*cb_nr);
1918944410e9SLars Ellenberg }
1919944410e9SLars Ellenberg 
1920e334f550SLars Ellenberg static void do_device_work(struct drbd_device *device, const unsigned long todo)
1921e334f550SLars Ellenberg {
1922b47a06d1SAndreas Gruenbacher 	if (test_bit(MD_SYNC, &todo))
1923ac0acb9eSLars Ellenberg 		do_md_sync(device);
1924b47a06d1SAndreas Gruenbacher 	if (test_bit(RS_DONE, &todo) ||
1925b47a06d1SAndreas Gruenbacher 	    test_bit(RS_PROGRESS, &todo))
1926b47a06d1SAndreas Gruenbacher 		update_on_disk_bitmap(device, test_bit(RS_DONE, &todo));
1927b47a06d1SAndreas Gruenbacher 	if (test_bit(GO_DISKLESS, &todo))
1928e334f550SLars Ellenberg 		go_diskless(device);
1929b47a06d1SAndreas Gruenbacher 	if (test_bit(DESTROY_DISK, &todo))
1930e334f550SLars Ellenberg 		drbd_ldev_destroy(device);
1931b47a06d1SAndreas Gruenbacher 	if (test_bit(RS_START, &todo))
1932ac0acb9eSLars Ellenberg 		do_start_resync(device);
1933e334f550SLars Ellenberg }
1934e334f550SLars Ellenberg 
1935e334f550SLars Ellenberg #define DRBD_DEVICE_WORK_MASK	\
1936e334f550SLars Ellenberg 	((1UL << GO_DISKLESS)	\
1937e334f550SLars Ellenberg 	|(1UL << DESTROY_DISK)	\
1938ac0acb9eSLars Ellenberg 	|(1UL << MD_SYNC)	\
1939ac0acb9eSLars Ellenberg 	|(1UL << RS_START)	\
1940e334f550SLars Ellenberg 	|(1UL << RS_PROGRESS)	\
1941e334f550SLars Ellenberg 	|(1UL << RS_DONE)	\
1942e334f550SLars Ellenberg 	)
1943e334f550SLars Ellenberg 
1944e334f550SLars Ellenberg static unsigned long get_work_bits(unsigned long *flags)
1945e334f550SLars Ellenberg {
1946e334f550SLars Ellenberg 	unsigned long old, new;
1947e334f550SLars Ellenberg 	do {
1948e334f550SLars Ellenberg 		old = *flags;
1949e334f550SLars Ellenberg 		new = old & ~DRBD_DEVICE_WORK_MASK;
1950e334f550SLars Ellenberg 	} while (cmpxchg(flags, old, new) != old);
1951e334f550SLars Ellenberg 	return old & DRBD_DEVICE_WORK_MASK;
1952e334f550SLars Ellenberg }
1953e334f550SLars Ellenberg 
1954e334f550SLars Ellenberg static void do_unqueued_work(struct drbd_connection *connection)
1955c7a58db4SLars Ellenberg {
1956c7a58db4SLars Ellenberg 	struct drbd_peer_device *peer_device;
1957c7a58db4SLars Ellenberg 	int vnr;
1958c7a58db4SLars Ellenberg 
1959c7a58db4SLars Ellenberg 	rcu_read_lock();
1960c7a58db4SLars Ellenberg 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1961c7a58db4SLars Ellenberg 		struct drbd_device *device = peer_device->device;
1962e334f550SLars Ellenberg 		unsigned long todo = get_work_bits(&device->flags);
1963e334f550SLars Ellenberg 		if (!todo)
1964c7a58db4SLars Ellenberg 			continue;
19655ab7d2c0SLars Ellenberg 
1966c7a58db4SLars Ellenberg 		kref_get(&device->kref);
1967c7a58db4SLars Ellenberg 		rcu_read_unlock();
1968e334f550SLars Ellenberg 		do_device_work(device, todo);
1969c7a58db4SLars Ellenberg 		kref_put(&device->kref, drbd_destroy_device);
1970c7a58db4SLars Ellenberg 		rcu_read_lock();
1971c7a58db4SLars Ellenberg 	}
1972c7a58db4SLars Ellenberg 	rcu_read_unlock();
1973c7a58db4SLars Ellenberg }
1974c7a58db4SLars Ellenberg 
1975a186e478SRashika Kheria static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
19768c0785a5SLars Ellenberg {
19778c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
197815e26f6aSLars Ellenberg 	list_splice_tail_init(&queue->q, work_list);
19798c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
19808c0785a5SLars Ellenberg 	return !list_empty(work_list);
19818c0785a5SLars Ellenberg }
19828c0785a5SLars Ellenberg 
1983bde89a9eSAndreas Gruenbacher static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
1984b6dd1a89SLars Ellenberg {
1985b6dd1a89SLars Ellenberg 	DEFINE_WAIT(wait);
1986b6dd1a89SLars Ellenberg 	struct net_conf *nc;
1987b6dd1a89SLars Ellenberg 	int uncork, cork;
1988b6dd1a89SLars Ellenberg 
1989abde9cc6SLars Ellenberg 	dequeue_work_batch(&connection->sender_work, work_list);
1990b6dd1a89SLars Ellenberg 	if (!list_empty(work_list))
1991b6dd1a89SLars Ellenberg 		return;
1992b6dd1a89SLars Ellenberg 
1993b6dd1a89SLars Ellenberg 	/* Still nothing to do?
1994b6dd1a89SLars Ellenberg 	 * Maybe we still need to close the current epoch,
1995b6dd1a89SLars Ellenberg 	 * even if no new requests are queued yet.
1996b6dd1a89SLars Ellenberg 	 *
1997b6dd1a89SLars Ellenberg 	 * Also, poke TCP, just in case.
1998b6dd1a89SLars Ellenberg 	 * Then wait for new work (or signal). */
1999b6dd1a89SLars Ellenberg 	rcu_read_lock();
2000b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
2001b6dd1a89SLars Ellenberg 	uncork = nc ? nc->tcp_cork : 0;
2002b6dd1a89SLars Ellenberg 	rcu_read_unlock();
2003b6dd1a89SLars Ellenberg 	if (uncork) {
2004b6dd1a89SLars Ellenberg 		mutex_lock(&connection->data.mutex);
2005b6dd1a89SLars Ellenberg 		if (connection->data.socket)
2006b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
2007b6dd1a89SLars Ellenberg 		mutex_unlock(&connection->data.mutex);
2008b6dd1a89SLars Ellenberg 	}
2009b6dd1a89SLars Ellenberg 
2010b6dd1a89SLars Ellenberg 	for (;;) {
2011b6dd1a89SLars Ellenberg 		int send_barrier;
2012b6dd1a89SLars Ellenberg 		prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
20130500813fSAndreas Gruenbacher 		spin_lock_irq(&connection->resource->req_lock);
2014b6dd1a89SLars Ellenberg 		spin_lock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
2015bc317a9eSLars Ellenberg 		if (!list_empty(&connection->sender_work.q))
20164dd726f0SLars Ellenberg 			list_splice_tail_init(&connection->sender_work.q, work_list);
2017b6dd1a89SLars Ellenberg 		spin_unlock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
2018b6dd1a89SLars Ellenberg 		if (!list_empty(work_list) || signal_pending(current)) {
20190500813fSAndreas Gruenbacher 			spin_unlock_irq(&connection->resource->req_lock);
2020b6dd1a89SLars Ellenberg 			break;
2021b6dd1a89SLars Ellenberg 		}
2022f9c78128SLars Ellenberg 
2023f9c78128SLars Ellenberg 		/* We found nothing new to do, no to-be-communicated request,
2024f9c78128SLars Ellenberg 		 * no other work item.  We may still need to close the last
2025f9c78128SLars Ellenberg 		 * epoch.  Next incoming request epoch will be connection ->
2026f9c78128SLars Ellenberg 		 * current transfer log epoch number.  If that is different
2027f9c78128SLars Ellenberg 		 * from the epoch of the last request we communicated, it is
2028f9c78128SLars Ellenberg 		 * safe to send the epoch separating barrier now.
2029f9c78128SLars Ellenberg 		 */
2030f9c78128SLars Ellenberg 		send_barrier =
2031f9c78128SLars Ellenberg 			atomic_read(&connection->current_tle_nr) !=
2032f9c78128SLars Ellenberg 			connection->send.current_epoch_nr;
20330500813fSAndreas Gruenbacher 		spin_unlock_irq(&connection->resource->req_lock);
2034f9c78128SLars Ellenberg 
2035f9c78128SLars Ellenberg 		if (send_barrier)
2036f9c78128SLars Ellenberg 			maybe_send_barrier(connection,
2037f9c78128SLars Ellenberg 					connection->send.current_epoch_nr + 1);
20385ab7d2c0SLars Ellenberg 
2039e334f550SLars Ellenberg 		if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
20405ab7d2c0SLars Ellenberg 			break;
20415ab7d2c0SLars Ellenberg 
2042a80ca1aeSLars Ellenberg 		/* drbd_send() may have called flush_signals() */
2043a80ca1aeSLars Ellenberg 		if (get_t_state(&connection->worker) != RUNNING)
2044a80ca1aeSLars Ellenberg 			break;
20455ab7d2c0SLars Ellenberg 
2046b6dd1a89SLars Ellenberg 		schedule();
2047b6dd1a89SLars Ellenberg 		/* may be woken up for other things but new work, too,
2048b6dd1a89SLars Ellenberg 		 * e.g. if the current epoch got closed.
2049b6dd1a89SLars Ellenberg 		 * In which case we send the barrier above. */
2050b6dd1a89SLars Ellenberg 	}
2051b6dd1a89SLars Ellenberg 	finish_wait(&connection->sender_work.q_wait, &wait);
2052b6dd1a89SLars Ellenberg 
2053b6dd1a89SLars Ellenberg 	/* someone may have changed the config while we have been waiting above. */
2054b6dd1a89SLars Ellenberg 	rcu_read_lock();
2055b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
2056b6dd1a89SLars Ellenberg 	cork = nc ? nc->tcp_cork : 0;
2057b6dd1a89SLars Ellenberg 	rcu_read_unlock();
2058b6dd1a89SLars Ellenberg 	mutex_lock(&connection->data.mutex);
2059b6dd1a89SLars Ellenberg 	if (connection->data.socket) {
2060b6dd1a89SLars Ellenberg 		if (cork)
2061b6dd1a89SLars Ellenberg 			drbd_tcp_cork(connection->data.socket);
2062b6dd1a89SLars Ellenberg 		else if (!uncork)
2063b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
2064b6dd1a89SLars Ellenberg 	}
2065b6dd1a89SLars Ellenberg 	mutex_unlock(&connection->data.mutex);
2066b6dd1a89SLars Ellenberg }
2067b6dd1a89SLars Ellenberg 
2068b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi)
2069b411b363SPhilipp Reisner {
2070bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
20716db7e50aSAndreas Gruenbacher 	struct drbd_work *w = NULL;
2072c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2073b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
20748c0785a5SLars Ellenberg 	int vnr;
2075b411b363SPhilipp Reisner 
2076e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
207780822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
2078b411b363SPhilipp Reisner 
2079944410e9SLars Ellenberg 		if (list_empty(&work_list)) {
2080944410e9SLars Ellenberg 			update_worker_timing_details(connection, wait_for_work);
2081bde89a9eSAndreas Gruenbacher 			wait_for_work(connection, &work_list);
2082944410e9SLars Ellenberg 		}
2083b411b363SPhilipp Reisner 
2084944410e9SLars Ellenberg 		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2085944410e9SLars Ellenberg 			update_worker_timing_details(connection, do_unqueued_work);
2086e334f550SLars Ellenberg 			do_unqueued_work(connection);
2087944410e9SLars Ellenberg 		}
20885ab7d2c0SLars Ellenberg 
20898c0785a5SLars Ellenberg 		if (signal_pending(current)) {
2090b411b363SPhilipp Reisner 			flush_signals(current);
209119393e10SPhilipp Reisner 			if (get_t_state(thi) == RUNNING) {
20921ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Worker got an unexpected signal\n");
2093b411b363SPhilipp Reisner 				continue;
209419393e10SPhilipp Reisner 			}
2095b411b363SPhilipp Reisner 			break;
2096b411b363SPhilipp Reisner 		}
2097b411b363SPhilipp Reisner 
2098e77a0a5cSAndreas Gruenbacher 		if (get_t_state(thi) != RUNNING)
2099b411b363SPhilipp Reisner 			break;
2100b411b363SPhilipp Reisner 
2101729e8b87SLars Ellenberg 		if (!list_empty(&work_list)) {
21026db7e50aSAndreas Gruenbacher 			w = list_first_entry(&work_list, struct drbd_work, list);
21036db7e50aSAndreas Gruenbacher 			list_del_init(&w->list);
2104944410e9SLars Ellenberg 			update_worker_timing_details(connection, w->cb);
21056db7e50aSAndreas Gruenbacher 			if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
21068c0785a5SLars Ellenberg 				continue;
2107bde89a9eSAndreas Gruenbacher 			if (connection->cstate >= C_WF_REPORT_PARAMS)
2108bde89a9eSAndreas Gruenbacher 				conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
2109b411b363SPhilipp Reisner 		}
2110b411b363SPhilipp Reisner 	}
2111b411b363SPhilipp Reisner 
21128c0785a5SLars Ellenberg 	do {
2113944410e9SLars Ellenberg 		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2114944410e9SLars Ellenberg 			update_worker_timing_details(connection, do_unqueued_work);
2115e334f550SLars Ellenberg 			do_unqueued_work(connection);
2116944410e9SLars Ellenberg 		}
2117729e8b87SLars Ellenberg 		if (!list_empty(&work_list)) {
21186db7e50aSAndreas Gruenbacher 			w = list_first_entry(&work_list, struct drbd_work, list);
21196db7e50aSAndreas Gruenbacher 			list_del_init(&w->list);
2120944410e9SLars Ellenberg 			update_worker_timing_details(connection, w->cb);
21216db7e50aSAndreas Gruenbacher 			w->cb(w, 1);
2122729e8b87SLars Ellenberg 		} else
2123bde89a9eSAndreas Gruenbacher 			dequeue_work_batch(&connection->sender_work, &work_list);
2124e334f550SLars Ellenberg 	} while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
2125b411b363SPhilipp Reisner 
2126c141ebdaSPhilipp Reisner 	rcu_read_lock();
2127c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2128c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
21290b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
2130b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
2131c141ebdaSPhilipp Reisner 		rcu_read_unlock();
2132b30ab791SAndreas Gruenbacher 		drbd_device_cleanup(device);
213305a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
2134c141ebdaSPhilipp Reisner 		rcu_read_lock();
21350e29d163SPhilipp Reisner 	}
2136c141ebdaSPhilipp Reisner 	rcu_read_unlock();
2137b411b363SPhilipp Reisner 
2138b411b363SPhilipp Reisner 	return 0;
2139b411b363SPhilipp Reisner }
2140