xref: /openbmc/linux/drivers/block/drbd/drbd_worker.c (revision b9ed7080)
1b411b363SPhilipp Reisner /*
2b411b363SPhilipp Reisner    drbd_worker.c
3b411b363SPhilipp Reisner 
4b411b363SPhilipp Reisner    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5b411b363SPhilipp Reisner 
6b411b363SPhilipp Reisner    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7b411b363SPhilipp Reisner    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8b411b363SPhilipp Reisner    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9b411b363SPhilipp Reisner 
10b411b363SPhilipp Reisner    drbd is free software; you can redistribute it and/or modify
11b411b363SPhilipp Reisner    it under the terms of the GNU General Public License as published by
12b411b363SPhilipp Reisner    the Free Software Foundation; either version 2, or (at your option)
13b411b363SPhilipp Reisner    any later version.
14b411b363SPhilipp Reisner 
15b411b363SPhilipp Reisner    drbd is distributed in the hope that it will be useful,
16b411b363SPhilipp Reisner    but WITHOUT ANY WARRANTY; without even the implied warranty of
17b411b363SPhilipp Reisner    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18b411b363SPhilipp Reisner    GNU General Public License for more details.
19b411b363SPhilipp Reisner 
20b411b363SPhilipp Reisner    You should have received a copy of the GNU General Public License
21b411b363SPhilipp Reisner    along with drbd; see the file COPYING.  If not, write to
22b411b363SPhilipp Reisner    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23b411b363SPhilipp Reisner 
24b411b363SPhilipp Reisner */
25b411b363SPhilipp Reisner 
26b411b363SPhilipp Reisner #include <linux/module.h>
27b411b363SPhilipp Reisner #include <linux/drbd.h>
28b411b363SPhilipp Reisner #include <linux/sched.h>
29b411b363SPhilipp Reisner #include <linux/wait.h>
30b411b363SPhilipp Reisner #include <linux/mm.h>
31b411b363SPhilipp Reisner #include <linux/memcontrol.h>
32b411b363SPhilipp Reisner #include <linux/mm_inline.h>
33b411b363SPhilipp Reisner #include <linux/slab.h>
34b411b363SPhilipp Reisner #include <linux/random.h>
35b411b363SPhilipp Reisner #include <linux/string.h>
36b411b363SPhilipp Reisner #include <linux/scatterlist.h>
37b411b363SPhilipp Reisner 
38b411b363SPhilipp Reisner #include "drbd_int.h"
39a3603a6eSAndreas Gruenbacher #include "drbd_protocol.h"
40b411b363SPhilipp Reisner #include "drbd_req.h"
41b411b363SPhilipp Reisner 
42d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *, int);
43d448a2e1SAndreas Gruenbacher static int make_resync_request(struct drbd_device *, int);
44b411b363SPhilipp Reisner 
45c5a91619SAndreas Gruenbacher /* endio handlers:
46c5a91619SAndreas Gruenbacher  *   drbd_md_io_complete (defined here)
47fcefa62eSAndreas Gruenbacher  *   drbd_request_endio (defined here)
48fcefa62eSAndreas Gruenbacher  *   drbd_peer_request_endio (defined here)
49c5a91619SAndreas Gruenbacher  *   bm_async_io_complete (defined in drbd_bitmap.c)
50c5a91619SAndreas Gruenbacher  *
51b411b363SPhilipp Reisner  * For all these callbacks, note the following:
52b411b363SPhilipp Reisner  * The callbacks will be called in irq context by the IDE drivers,
53b411b363SPhilipp Reisner  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54b411b363SPhilipp Reisner  * Try to get the locking right :)
55b411b363SPhilipp Reisner  *
56b411b363SPhilipp Reisner  */
57b411b363SPhilipp Reisner 
58b411b363SPhilipp Reisner 
59b411b363SPhilipp Reisner /* About the global_state_lock
60b411b363SPhilipp Reisner    Each state transition on an device holds a read lock. In case we have
6195f8efd0SAndreas Gruenbacher    to evaluate the resync after dependencies, we grab a write lock, because
62b411b363SPhilipp Reisner    we need stable states on all devices for that.  */
63b411b363SPhilipp Reisner rwlock_t global_state_lock;
64b411b363SPhilipp Reisner 
65b411b363SPhilipp Reisner /* used for synchronous meta data and bitmap IO
66b411b363SPhilipp Reisner  * submitted by drbd_md_sync_page_io()
67b411b363SPhilipp Reisner  */
68b411b363SPhilipp Reisner void drbd_md_io_complete(struct bio *bio, int error)
69b411b363SPhilipp Reisner {
70b411b363SPhilipp Reisner 	struct drbd_md_io *md_io;
71b30ab791SAndreas Gruenbacher 	struct drbd_device *device;
72b411b363SPhilipp Reisner 
73b411b363SPhilipp Reisner 	md_io = (struct drbd_md_io *)bio->bi_private;
74b30ab791SAndreas Gruenbacher 	device = container_of(md_io, struct drbd_device, md_io);
75cdfda633SPhilipp Reisner 
76b411b363SPhilipp Reisner 	md_io->error = error;
77b411b363SPhilipp Reisner 
780cfac5ddSPhilipp Reisner 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
790cfac5ddSPhilipp Reisner 	 * to timeout on the lower level device, and eventually detach from it.
800cfac5ddSPhilipp Reisner 	 * If this io completion runs after that timeout expired, this
810cfac5ddSPhilipp Reisner 	 * drbd_md_put_buffer() may allow us to finally try and re-attach.
820cfac5ddSPhilipp Reisner 	 * During normal operation, this only puts that extra reference
830cfac5ddSPhilipp Reisner 	 * down to 1 again.
840cfac5ddSPhilipp Reisner 	 * Make sure we first drop the reference, and only then signal
850cfac5ddSPhilipp Reisner 	 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
860cfac5ddSPhilipp Reisner 	 * next drbd_md_sync_page_io(), that we trigger the
87b30ab791SAndreas Gruenbacher 	 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
880cfac5ddSPhilipp Reisner 	 */
89b30ab791SAndreas Gruenbacher 	drbd_md_put_buffer(device);
90cdfda633SPhilipp Reisner 	md_io->done = 1;
91b30ab791SAndreas Gruenbacher 	wake_up(&device->misc_wait);
92cdfda633SPhilipp Reisner 	bio_put(bio);
93b30ab791SAndreas Gruenbacher 	if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
94b30ab791SAndreas Gruenbacher 		put_ldev(device);
95b411b363SPhilipp Reisner }
96b411b363SPhilipp Reisner 
97b411b363SPhilipp Reisner /* reads on behalf of the partner,
98b411b363SPhilipp Reisner  * "submitted" by the receiver
99b411b363SPhilipp Reisner  */
100a186e478SRashika Kheria static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
101b411b363SPhilipp Reisner {
102b411b363SPhilipp Reisner 	unsigned long flags = 0;
1036780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1046780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
105b411b363SPhilipp Reisner 
1060500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
107b30ab791SAndreas Gruenbacher 	device->read_cnt += peer_req->i.size >> 9;
108a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
109b30ab791SAndreas Gruenbacher 	if (list_empty(&device->read_ee))
110b30ab791SAndreas Gruenbacher 		wake_up(&device->ee_wait);
111db830c46SAndreas Gruenbacher 	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
112b30ab791SAndreas Gruenbacher 		__drbd_chk_io_error(device, DRBD_READ_ERROR);
1130500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
114b411b363SPhilipp Reisner 
1156780139cSAndreas Gruenbacher 	drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
116b30ab791SAndreas Gruenbacher 	put_ldev(device);
117b411b363SPhilipp Reisner }
118b411b363SPhilipp Reisner 
119b411b363SPhilipp Reisner /* writes on behalf of the partner, or resync writes,
12045bb912bSLars Ellenberg  * "submitted" by the receiver, final stage.  */
121a0fb3c47SLars Ellenberg void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
122b411b363SPhilipp Reisner {
123b411b363SPhilipp Reisner 	unsigned long flags = 0;
1246780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
1256780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
126181286adSLars Ellenberg 	struct drbd_interval i;
127b411b363SPhilipp Reisner 	int do_wake;
128579b57edSAndreas Gruenbacher 	u64 block_id;
129b411b363SPhilipp Reisner 	int do_al_complete_io;
130b411b363SPhilipp Reisner 
131db830c46SAndreas Gruenbacher 	/* after we moved peer_req to done_ee,
132b411b363SPhilipp Reisner 	 * we may no longer access it,
133b411b363SPhilipp Reisner 	 * it may be freed/reused already!
134b411b363SPhilipp Reisner 	 * (as soon as we release the req_lock) */
135181286adSLars Ellenberg 	i = peer_req->i;
136db830c46SAndreas Gruenbacher 	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
137db830c46SAndreas Gruenbacher 	block_id = peer_req->block_id;
138b411b363SPhilipp Reisner 
1390500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
140b30ab791SAndreas Gruenbacher 	device->writ_cnt += peer_req->i.size >> 9;
141a8cd15baSAndreas Gruenbacher 	list_move_tail(&peer_req->w.list, &device->done_ee);
142b411b363SPhilipp Reisner 
143bb3bfe96SAndreas Gruenbacher 	/*
1445e472264SAndreas Gruenbacher 	 * Do not remove from the write_requests tree here: we did not send the
145bb3bfe96SAndreas Gruenbacher 	 * Ack yet and did not wake possibly waiting conflicting requests.
146bb3bfe96SAndreas Gruenbacher 	 * Removed from the tree from "drbd_process_done_ee" within the
14784b8c06bSAndreas Gruenbacher 	 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
148bb3bfe96SAndreas Gruenbacher 	 * _drbd_clear_done_ee.
149bb3bfe96SAndreas Gruenbacher 	 */
150b411b363SPhilipp Reisner 
151b30ab791SAndreas Gruenbacher 	do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
152b411b363SPhilipp Reisner 
153a0fb3c47SLars Ellenberg 	/* FIXME do we want to detach for failed REQ_DISCARD?
154a0fb3c47SLars Ellenberg 	 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
155a0fb3c47SLars Ellenberg 	if (peer_req->flags & EE_WAS_ERROR)
156b30ab791SAndreas Gruenbacher 		__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
1570500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
158b411b363SPhilipp Reisner 
159579b57edSAndreas Gruenbacher 	if (block_id == ID_SYNCER)
160b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, i.sector);
161b411b363SPhilipp Reisner 
162b411b363SPhilipp Reisner 	if (do_wake)
163b30ab791SAndreas Gruenbacher 		wake_up(&device->ee_wait);
164b411b363SPhilipp Reisner 
165b411b363SPhilipp Reisner 	if (do_al_complete_io)
166b30ab791SAndreas Gruenbacher 		drbd_al_complete_io(device, &i);
167b411b363SPhilipp Reisner 
1686780139cSAndreas Gruenbacher 	wake_asender(peer_device->connection);
169b30ab791SAndreas Gruenbacher 	put_ldev(device);
17045bb912bSLars Ellenberg }
171b411b363SPhilipp Reisner 
17245bb912bSLars Ellenberg /* writes on behalf of the partner, or resync writes,
17345bb912bSLars Ellenberg  * "submitted" by the receiver.
17445bb912bSLars Ellenberg  */
175fcefa62eSAndreas Gruenbacher void drbd_peer_request_endio(struct bio *bio, int error)
17645bb912bSLars Ellenberg {
177db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req = bio->bi_private;
178a8cd15baSAndreas Gruenbacher 	struct drbd_device *device = peer_req->peer_device->device;
17945bb912bSLars Ellenberg 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
18045bb912bSLars Ellenberg 	int is_write = bio_data_dir(bio) == WRITE;
181a0fb3c47SLars Ellenberg 	int is_discard = !!(bio->bi_rw & REQ_DISCARD);
18245bb912bSLars Ellenberg 
18307194272SLars Ellenberg 	if (error && __ratelimit(&drbd_ratelimit_state))
184d0180171SAndreas Gruenbacher 		drbd_warn(device, "%s: error=%d s=%llus\n",
185a0fb3c47SLars Ellenberg 				is_write ? (is_discard ? "discard" : "write")
186a0fb3c47SLars Ellenberg 					: "read", error,
187db830c46SAndreas Gruenbacher 				(unsigned long long)peer_req->i.sector);
18845bb912bSLars Ellenberg 	if (!error && !uptodate) {
18907194272SLars Ellenberg 		if (__ratelimit(&drbd_ratelimit_state))
190d0180171SAndreas Gruenbacher 			drbd_warn(device, "%s: setting error to -EIO s=%llus\n",
19145bb912bSLars Ellenberg 					is_write ? "write" : "read",
192db830c46SAndreas Gruenbacher 					(unsigned long long)peer_req->i.sector);
19345bb912bSLars Ellenberg 		/* strange behavior of some lower level drivers...
19445bb912bSLars Ellenberg 		 * fail the request by clearing the uptodate flag,
19545bb912bSLars Ellenberg 		 * but do not return any error?! */
19645bb912bSLars Ellenberg 		error = -EIO;
19745bb912bSLars Ellenberg 	}
19845bb912bSLars Ellenberg 
19945bb912bSLars Ellenberg 	if (error)
200db830c46SAndreas Gruenbacher 		set_bit(__EE_WAS_ERROR, &peer_req->flags);
20145bb912bSLars Ellenberg 
20245bb912bSLars Ellenberg 	bio_put(bio); /* no need for the bio anymore */
203db830c46SAndreas Gruenbacher 	if (atomic_dec_and_test(&peer_req->pending_bios)) {
20445bb912bSLars Ellenberg 		if (is_write)
205db830c46SAndreas Gruenbacher 			drbd_endio_write_sec_final(peer_req);
20645bb912bSLars Ellenberg 		else
207db830c46SAndreas Gruenbacher 			drbd_endio_read_sec_final(peer_req);
20845bb912bSLars Ellenberg 	}
209b411b363SPhilipp Reisner }
210b411b363SPhilipp Reisner 
211b411b363SPhilipp Reisner /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
212b411b363SPhilipp Reisner  */
213fcefa62eSAndreas Gruenbacher void drbd_request_endio(struct bio *bio, int error)
214b411b363SPhilipp Reisner {
215a115413dSLars Ellenberg 	unsigned long flags;
216b411b363SPhilipp Reisner 	struct drbd_request *req = bio->bi_private;
21784b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
218a115413dSLars Ellenberg 	struct bio_and_error m;
219b411b363SPhilipp Reisner 	enum drbd_req_event what;
220b411b363SPhilipp Reisner 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
221b411b363SPhilipp Reisner 
222b411b363SPhilipp Reisner 	if (!error && !uptodate) {
223d0180171SAndreas Gruenbacher 		drbd_warn(device, "p %s: setting error to -EIO\n",
224b411b363SPhilipp Reisner 			 bio_data_dir(bio) == WRITE ? "write" : "read");
225b411b363SPhilipp Reisner 		/* strange behavior of some lower level drivers...
226b411b363SPhilipp Reisner 		 * fail the request by clearing the uptodate flag,
227b411b363SPhilipp Reisner 		 * but do not return any error?! */
228b411b363SPhilipp Reisner 		error = -EIO;
229b411b363SPhilipp Reisner 	}
230b411b363SPhilipp Reisner 
2311b6dd252SPhilipp Reisner 
2321b6dd252SPhilipp Reisner 	/* If this request was aborted locally before,
2331b6dd252SPhilipp Reisner 	 * but now was completed "successfully",
2341b6dd252SPhilipp Reisner 	 * chances are that this caused arbitrary data corruption.
2351b6dd252SPhilipp Reisner 	 *
2361b6dd252SPhilipp Reisner 	 * "aborting" requests, or force-detaching the disk, is intended for
2371b6dd252SPhilipp Reisner 	 * completely blocked/hung local backing devices which do no longer
2381b6dd252SPhilipp Reisner 	 * complete requests at all, not even do error completions.  In this
2391b6dd252SPhilipp Reisner 	 * situation, usually a hard-reset and failover is the only way out.
2401b6dd252SPhilipp Reisner 	 *
2411b6dd252SPhilipp Reisner 	 * By "aborting", basically faking a local error-completion,
2421b6dd252SPhilipp Reisner 	 * we allow for a more graceful swichover by cleanly migrating services.
2431b6dd252SPhilipp Reisner 	 * Still the affected node has to be rebooted "soon".
2441b6dd252SPhilipp Reisner 	 *
2451b6dd252SPhilipp Reisner 	 * By completing these requests, we allow the upper layers to re-use
2461b6dd252SPhilipp Reisner 	 * the associated data pages.
2471b6dd252SPhilipp Reisner 	 *
2481b6dd252SPhilipp Reisner 	 * If later the local backing device "recovers", and now DMAs some data
2491b6dd252SPhilipp Reisner 	 * from disk into the original request pages, in the best case it will
2501b6dd252SPhilipp Reisner 	 * just put random data into unused pages; but typically it will corrupt
2511b6dd252SPhilipp Reisner 	 * meanwhile completely unrelated data, causing all sorts of damage.
2521b6dd252SPhilipp Reisner 	 *
2531b6dd252SPhilipp Reisner 	 * Which means delayed successful completion,
2541b6dd252SPhilipp Reisner 	 * especially for READ requests,
2551b6dd252SPhilipp Reisner 	 * is a reason to panic().
2561b6dd252SPhilipp Reisner 	 *
2571b6dd252SPhilipp Reisner 	 * We assume that a delayed *error* completion is OK,
2581b6dd252SPhilipp Reisner 	 * though we still will complain noisily about it.
2591b6dd252SPhilipp Reisner 	 */
2601b6dd252SPhilipp Reisner 	if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
2611b6dd252SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
262d0180171SAndreas Gruenbacher 			drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
2631b6dd252SPhilipp Reisner 
2641b6dd252SPhilipp Reisner 		if (!error)
2651b6dd252SPhilipp Reisner 			panic("possible random memory corruption caused by delayed completion of aborted local request\n");
2661b6dd252SPhilipp Reisner 	}
2671b6dd252SPhilipp Reisner 
268b411b363SPhilipp Reisner 	/* to avoid recursion in __req_mod */
269b411b363SPhilipp Reisner 	if (unlikely(error)) {
2702f632aebSLars Ellenberg 		if (bio->bi_rw & REQ_DISCARD)
2712f632aebSLars Ellenberg 			what = (error == -EOPNOTSUPP)
2722f632aebSLars Ellenberg 				? DISCARD_COMPLETED_NOTSUPP
2732f632aebSLars Ellenberg 				: DISCARD_COMPLETED_WITH_ERROR;
2742f632aebSLars Ellenberg 		else
275b411b363SPhilipp Reisner 			what = (bio_data_dir(bio) == WRITE)
2768554df1cSAndreas Gruenbacher 			? WRITE_COMPLETED_WITH_ERROR
2775c3c7e64SLars Ellenberg 			: (bio_rw(bio) == READ)
2788554df1cSAndreas Gruenbacher 			  ? READ_COMPLETED_WITH_ERROR
2798554df1cSAndreas Gruenbacher 			  : READ_AHEAD_COMPLETED_WITH_ERROR;
280b411b363SPhilipp Reisner 	} else
2818554df1cSAndreas Gruenbacher 		what = COMPLETED_OK;
282b411b363SPhilipp Reisner 
283b411b363SPhilipp Reisner 	bio_put(req->private_bio);
284b411b363SPhilipp Reisner 	req->private_bio = ERR_PTR(error);
285b411b363SPhilipp Reisner 
286a115413dSLars Ellenberg 	/* not req_mod(), we need irqsave here! */
2870500813fSAndreas Gruenbacher 	spin_lock_irqsave(&device->resource->req_lock, flags);
288a115413dSLars Ellenberg 	__req_mod(req, what, &m);
2890500813fSAndreas Gruenbacher 	spin_unlock_irqrestore(&device->resource->req_lock, flags);
290b30ab791SAndreas Gruenbacher 	put_ldev(device);
291a115413dSLars Ellenberg 
292a115413dSLars Ellenberg 	if (m.bio)
293b30ab791SAndreas Gruenbacher 		complete_master_bio(device, &m);
294b411b363SPhilipp Reisner }
295b411b363SPhilipp Reisner 
29679a3c8d3SAndreas Gruenbacher void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
29745bb912bSLars Ellenberg {
29845bb912bSLars Ellenberg 	struct hash_desc desc;
29945bb912bSLars Ellenberg 	struct scatterlist sg;
300db830c46SAndreas Gruenbacher 	struct page *page = peer_req->pages;
30145bb912bSLars Ellenberg 	struct page *tmp;
30245bb912bSLars Ellenberg 	unsigned len;
30345bb912bSLars Ellenberg 
30445bb912bSLars Ellenberg 	desc.tfm = tfm;
30545bb912bSLars Ellenberg 	desc.flags = 0;
30645bb912bSLars Ellenberg 
30745bb912bSLars Ellenberg 	sg_init_table(&sg, 1);
30845bb912bSLars Ellenberg 	crypto_hash_init(&desc);
30945bb912bSLars Ellenberg 
31045bb912bSLars Ellenberg 	while ((tmp = page_chain_next(page))) {
31145bb912bSLars Ellenberg 		/* all but the last page will be fully used */
31245bb912bSLars Ellenberg 		sg_set_page(&sg, page, PAGE_SIZE, 0);
31345bb912bSLars Ellenberg 		crypto_hash_update(&desc, &sg, sg.length);
31445bb912bSLars Ellenberg 		page = tmp;
31545bb912bSLars Ellenberg 	}
31645bb912bSLars Ellenberg 	/* and now the last, possibly only partially used page */
317db830c46SAndreas Gruenbacher 	len = peer_req->i.size & (PAGE_SIZE - 1);
31845bb912bSLars Ellenberg 	sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
31945bb912bSLars Ellenberg 	crypto_hash_update(&desc, &sg, sg.length);
32045bb912bSLars Ellenberg 	crypto_hash_final(&desc, digest);
32145bb912bSLars Ellenberg }
32245bb912bSLars Ellenberg 
32379a3c8d3SAndreas Gruenbacher void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
324b411b363SPhilipp Reisner {
325b411b363SPhilipp Reisner 	struct hash_desc desc;
326b411b363SPhilipp Reisner 	struct scatterlist sg;
3277988613bSKent Overstreet 	struct bio_vec bvec;
3287988613bSKent Overstreet 	struct bvec_iter iter;
329b411b363SPhilipp Reisner 
330b411b363SPhilipp Reisner 	desc.tfm = tfm;
331b411b363SPhilipp Reisner 	desc.flags = 0;
332b411b363SPhilipp Reisner 
333b411b363SPhilipp Reisner 	sg_init_table(&sg, 1);
334b411b363SPhilipp Reisner 	crypto_hash_init(&desc);
335b411b363SPhilipp Reisner 
3367988613bSKent Overstreet 	bio_for_each_segment(bvec, bio, iter) {
3377988613bSKent Overstreet 		sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
338b411b363SPhilipp Reisner 		crypto_hash_update(&desc, &sg, sg.length);
339b411b363SPhilipp Reisner 	}
340b411b363SPhilipp Reisner 	crypto_hash_final(&desc, digest);
341b411b363SPhilipp Reisner }
342b411b363SPhilipp Reisner 
3439676c760SLars Ellenberg /* MAYBE merge common code with w_e_end_ov_req */
34499920dc5SAndreas Gruenbacher static int w_e_send_csum(struct drbd_work *w, int cancel)
345b411b363SPhilipp Reisner {
346a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
3476780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
3486780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
349b411b363SPhilipp Reisner 	int digest_size;
350b411b363SPhilipp Reisner 	void *digest;
35199920dc5SAndreas Gruenbacher 	int err = 0;
352b411b363SPhilipp Reisner 
35353ea4331SLars Ellenberg 	if (unlikely(cancel))
35453ea4331SLars Ellenberg 		goto out;
355b411b363SPhilipp Reisner 
3569676c760SLars Ellenberg 	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
35753ea4331SLars Ellenberg 		goto out;
35853ea4331SLars Ellenberg 
3596780139cSAndreas Gruenbacher 	digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
360b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
361b411b363SPhilipp Reisner 	if (digest) {
362db830c46SAndreas Gruenbacher 		sector_t sector = peer_req->i.sector;
363db830c46SAndreas Gruenbacher 		unsigned int size = peer_req->i.size;
3646780139cSAndreas Gruenbacher 		drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
3659676c760SLars Ellenberg 		/* Free peer_req and pages before send.
36653ea4331SLars Ellenberg 		 * In case we block on congestion, we could otherwise run into
36753ea4331SLars Ellenberg 		 * some distributed deadlock, if the other side blocks on
36853ea4331SLars Ellenberg 		 * congestion as well, because our receiver blocks in
369c37c8ecfSAndreas Gruenbacher 		 * drbd_alloc_pages due to pp_in_use > max_buffers. */
370b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
371db830c46SAndreas Gruenbacher 		peer_req = NULL;
372b30ab791SAndreas Gruenbacher 		inc_rs_pending(device);
3736780139cSAndreas Gruenbacher 		err = drbd_send_drequest_csum(peer_device, sector, size,
37453ea4331SLars Ellenberg 					      digest, digest_size,
375b411b363SPhilipp Reisner 					      P_CSUM_RS_REQUEST);
376b411b363SPhilipp Reisner 		kfree(digest);
377b411b363SPhilipp Reisner 	} else {
378d0180171SAndreas Gruenbacher 		drbd_err(device, "kmalloc() of digest failed.\n");
37999920dc5SAndreas Gruenbacher 		err = -ENOMEM;
380b411b363SPhilipp Reisner 	}
381b411b363SPhilipp Reisner 
38253ea4331SLars Ellenberg out:
383db830c46SAndreas Gruenbacher 	if (peer_req)
384b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
385b411b363SPhilipp Reisner 
38699920dc5SAndreas Gruenbacher 	if (unlikely(err))
387d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
38899920dc5SAndreas Gruenbacher 	return err;
389b411b363SPhilipp Reisner }
390b411b363SPhilipp Reisner 
391b411b363SPhilipp Reisner #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
392b411b363SPhilipp Reisner 
39369a22773SAndreas Gruenbacher static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
394b411b363SPhilipp Reisner {
39569a22773SAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
396db830c46SAndreas Gruenbacher 	struct drbd_peer_request *peer_req;
397b411b363SPhilipp Reisner 
398b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
39980a40e43SLars Ellenberg 		return -EIO;
400b411b363SPhilipp Reisner 
401b30ab791SAndreas Gruenbacher 	if (drbd_rs_should_slow_down(device, sector))
4020f0601f4SLars Ellenberg 		goto defer;
4030f0601f4SLars Ellenberg 
404b411b363SPhilipp Reisner 	/* GFP_TRY, because if there is no memory available right now, this may
405b411b363SPhilipp Reisner 	 * be rescheduled for later. It is "only" background resync, after all. */
40669a22773SAndreas Gruenbacher 	peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
407a0fb3c47SLars Ellenberg 				       size, true /* has real payload */, GFP_TRY);
408db830c46SAndreas Gruenbacher 	if (!peer_req)
40980a40e43SLars Ellenberg 		goto defer;
410b411b363SPhilipp Reisner 
411a8cd15baSAndreas Gruenbacher 	peer_req->w.cb = w_e_send_csum;
4120500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
413b9ed7080SLars Ellenberg 	list_add_tail(&peer_req->w.list, &device->read_ee);
4140500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
415b411b363SPhilipp Reisner 
416b30ab791SAndreas Gruenbacher 	atomic_add(size >> 9, &device->rs_sect_ev);
417b30ab791SAndreas Gruenbacher 	if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
41880a40e43SLars Ellenberg 		return 0;
41945bb912bSLars Ellenberg 
42010f6d992SLars Ellenberg 	/* If it failed because of ENOMEM, retry should help.  If it failed
42110f6d992SLars Ellenberg 	 * because bio_add_page failed (probably broken lower level driver),
42210f6d992SLars Ellenberg 	 * retry may or may not help.
42310f6d992SLars Ellenberg 	 * If it does not, you may need to force disconnect. */
4240500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
425a8cd15baSAndreas Gruenbacher 	list_del(&peer_req->w.list);
4260500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
42722cc37a9SLars Ellenberg 
428b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
42980a40e43SLars Ellenberg defer:
430b30ab791SAndreas Gruenbacher 	put_ldev(device);
43180a40e43SLars Ellenberg 	return -EAGAIN;
432b411b363SPhilipp Reisner }
433b411b363SPhilipp Reisner 
43499920dc5SAndreas Gruenbacher int w_resync_timer(struct drbd_work *w, int cancel)
435794abb75SPhilipp Reisner {
43684b8c06bSAndreas Gruenbacher 	struct drbd_device *device =
43784b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device, resync_work);
43884b8c06bSAndreas Gruenbacher 
439b30ab791SAndreas Gruenbacher 	switch (device->state.conn) {
440794abb75SPhilipp Reisner 	case C_VERIFY_S:
441d448a2e1SAndreas Gruenbacher 		make_ov_request(device, cancel);
442794abb75SPhilipp Reisner 		break;
443794abb75SPhilipp Reisner 	case C_SYNC_TARGET:
444d448a2e1SAndreas Gruenbacher 		make_resync_request(device, cancel);
445794abb75SPhilipp Reisner 		break;
446794abb75SPhilipp Reisner 	}
447794abb75SPhilipp Reisner 
44899920dc5SAndreas Gruenbacher 	return 0;
449794abb75SPhilipp Reisner }
450794abb75SPhilipp Reisner 
451b411b363SPhilipp Reisner void resync_timer_fn(unsigned long data)
452b411b363SPhilipp Reisner {
453b30ab791SAndreas Gruenbacher 	struct drbd_device *device = (struct drbd_device *) data;
454b411b363SPhilipp Reisner 
45515e26f6aSLars Ellenberg 	drbd_queue_work_if_unqueued(
45615e26f6aSLars Ellenberg 		&first_peer_device(device)->connection->sender_work,
45784b8c06bSAndreas Gruenbacher 		&device->resync_work);
458b411b363SPhilipp Reisner }
459b411b363SPhilipp Reisner 
460778f271dSPhilipp Reisner static void fifo_set(struct fifo_buffer *fb, int value)
461778f271dSPhilipp Reisner {
462778f271dSPhilipp Reisner 	int i;
463778f271dSPhilipp Reisner 
464778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
465f10f2623SPhilipp Reisner 		fb->values[i] = value;
466778f271dSPhilipp Reisner }
467778f271dSPhilipp Reisner 
468778f271dSPhilipp Reisner static int fifo_push(struct fifo_buffer *fb, int value)
469778f271dSPhilipp Reisner {
470778f271dSPhilipp Reisner 	int ov;
471778f271dSPhilipp Reisner 
472778f271dSPhilipp Reisner 	ov = fb->values[fb->head_index];
473778f271dSPhilipp Reisner 	fb->values[fb->head_index++] = value;
474778f271dSPhilipp Reisner 
475778f271dSPhilipp Reisner 	if (fb->head_index >= fb->size)
476778f271dSPhilipp Reisner 		fb->head_index = 0;
477778f271dSPhilipp Reisner 
478778f271dSPhilipp Reisner 	return ov;
479778f271dSPhilipp Reisner }
480778f271dSPhilipp Reisner 
481778f271dSPhilipp Reisner static void fifo_add_val(struct fifo_buffer *fb, int value)
482778f271dSPhilipp Reisner {
483778f271dSPhilipp Reisner 	int i;
484778f271dSPhilipp Reisner 
485778f271dSPhilipp Reisner 	for (i = 0; i < fb->size; i++)
486778f271dSPhilipp Reisner 		fb->values[i] += value;
487778f271dSPhilipp Reisner }
488778f271dSPhilipp Reisner 
4899958c857SPhilipp Reisner struct fifo_buffer *fifo_alloc(int fifo_size)
4909958c857SPhilipp Reisner {
4919958c857SPhilipp Reisner 	struct fifo_buffer *fb;
4929958c857SPhilipp Reisner 
4938747d30aSLars Ellenberg 	fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
4949958c857SPhilipp Reisner 	if (!fb)
4959958c857SPhilipp Reisner 		return NULL;
4969958c857SPhilipp Reisner 
4979958c857SPhilipp Reisner 	fb->head_index = 0;
4989958c857SPhilipp Reisner 	fb->size = fifo_size;
4999958c857SPhilipp Reisner 	fb->total = 0;
5009958c857SPhilipp Reisner 
5019958c857SPhilipp Reisner 	return fb;
5029958c857SPhilipp Reisner }
5039958c857SPhilipp Reisner 
5040e49d7b0SLars Ellenberg static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
505778f271dSPhilipp Reisner {
506daeda1ccSPhilipp Reisner 	struct disk_conf *dc;
5077f34f614SLars Ellenberg 	unsigned int want;     /* The number of sectors we want in-flight */
508778f271dSPhilipp Reisner 	int req_sect; /* Number of sectors to request in this turn */
5097f34f614SLars Ellenberg 	int correction; /* Number of sectors more we need in-flight */
510778f271dSPhilipp Reisner 	int cps; /* correction per invocation of drbd_rs_controller() */
511778f271dSPhilipp Reisner 	int steps; /* Number of time steps to plan ahead */
512778f271dSPhilipp Reisner 	int curr_corr;
513778f271dSPhilipp Reisner 	int max_sect;
514813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
515778f271dSPhilipp Reisner 
516b30ab791SAndreas Gruenbacher 	dc = rcu_dereference(device->ldev->disk_conf);
517b30ab791SAndreas Gruenbacher 	plan = rcu_dereference(device->rs_plan_s);
518778f271dSPhilipp Reisner 
519813472ceSPhilipp Reisner 	steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
520778f271dSPhilipp Reisner 
521b30ab791SAndreas Gruenbacher 	if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
522daeda1ccSPhilipp Reisner 		want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
523778f271dSPhilipp Reisner 	} else { /* normal path */
524daeda1ccSPhilipp Reisner 		want = dc->c_fill_target ? dc->c_fill_target :
525daeda1ccSPhilipp Reisner 			sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
526778f271dSPhilipp Reisner 	}
527778f271dSPhilipp Reisner 
528b30ab791SAndreas Gruenbacher 	correction = want - device->rs_in_flight - plan->total;
529778f271dSPhilipp Reisner 
530778f271dSPhilipp Reisner 	/* Plan ahead */
531778f271dSPhilipp Reisner 	cps = correction / steps;
532813472ceSPhilipp Reisner 	fifo_add_val(plan, cps);
533813472ceSPhilipp Reisner 	plan->total += cps * steps;
534778f271dSPhilipp Reisner 
535778f271dSPhilipp Reisner 	/* What we do in this step */
536813472ceSPhilipp Reisner 	curr_corr = fifo_push(plan, 0);
537813472ceSPhilipp Reisner 	plan->total -= curr_corr;
538778f271dSPhilipp Reisner 
539778f271dSPhilipp Reisner 	req_sect = sect_in + curr_corr;
540778f271dSPhilipp Reisner 	if (req_sect < 0)
541778f271dSPhilipp Reisner 		req_sect = 0;
542778f271dSPhilipp Reisner 
543daeda1ccSPhilipp Reisner 	max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
544778f271dSPhilipp Reisner 	if (req_sect > max_sect)
545778f271dSPhilipp Reisner 		req_sect = max_sect;
546778f271dSPhilipp Reisner 
547778f271dSPhilipp Reisner 	/*
548d0180171SAndreas Gruenbacher 	drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
549b30ab791SAndreas Gruenbacher 		 sect_in, device->rs_in_flight, want, correction,
550b30ab791SAndreas Gruenbacher 		 steps, cps, device->rs_planed, curr_corr, req_sect);
551778f271dSPhilipp Reisner 	*/
552778f271dSPhilipp Reisner 
553778f271dSPhilipp Reisner 	return req_sect;
554778f271dSPhilipp Reisner }
555778f271dSPhilipp Reisner 
556b30ab791SAndreas Gruenbacher static int drbd_rs_number_requests(struct drbd_device *device)
557e65f440dSLars Ellenberg {
5580e49d7b0SLars Ellenberg 	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
5590e49d7b0SLars Ellenberg 	int number, mxb;
5600e49d7b0SLars Ellenberg 
5610e49d7b0SLars Ellenberg 	sect_in = atomic_xchg(&device->rs_sect_in, 0);
5620e49d7b0SLars Ellenberg 	device->rs_in_flight -= sect_in;
563813472ceSPhilipp Reisner 
564813472ceSPhilipp Reisner 	rcu_read_lock();
5650e49d7b0SLars Ellenberg 	mxb = drbd_get_max_buffers(device) / 2;
566b30ab791SAndreas Gruenbacher 	if (rcu_dereference(device->rs_plan_s)->size) {
5670e49d7b0SLars Ellenberg 		number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
568b30ab791SAndreas Gruenbacher 		device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
569e65f440dSLars Ellenberg 	} else {
570b30ab791SAndreas Gruenbacher 		device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
571b30ab791SAndreas Gruenbacher 		number = SLEEP_TIME * device->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
572e65f440dSLars Ellenberg 	}
573813472ceSPhilipp Reisner 	rcu_read_unlock();
574e65f440dSLars Ellenberg 
5750e49d7b0SLars Ellenberg 	/* Don't have more than "max-buffers"/2 in-flight.
5760e49d7b0SLars Ellenberg 	 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
5770e49d7b0SLars Ellenberg 	 * potentially causing a distributed deadlock on congestion during
5780e49d7b0SLars Ellenberg 	 * online-verify or (checksum-based) resync, if max-buffers,
5790e49d7b0SLars Ellenberg 	 * socket buffer sizes and resync rate settings are mis-configured. */
5807f34f614SLars Ellenberg 
5817f34f614SLars Ellenberg 	/* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
5827f34f614SLars Ellenberg 	 * mxb (as used here, and in drbd_alloc_pages on the peer) is
5837f34f614SLars Ellenberg 	 * "number of pages" (typically also 4k),
5847f34f614SLars Ellenberg 	 * but "rs_in_flight" is in "sectors" (512 Byte). */
5857f34f614SLars Ellenberg 	if (mxb - device->rs_in_flight/8 < number)
5867f34f614SLars Ellenberg 		number = mxb - device->rs_in_flight/8;
5870e49d7b0SLars Ellenberg 
588e65f440dSLars Ellenberg 	return number;
589e65f440dSLars Ellenberg }
590e65f440dSLars Ellenberg 
59144a4d551SLars Ellenberg static int make_resync_request(struct drbd_device *const device, int cancel)
592b411b363SPhilipp Reisner {
59344a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
59444a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
595b411b363SPhilipp Reisner 	unsigned long bit;
596b411b363SPhilipp Reisner 	sector_t sector;
597b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
5981816a2b4SLars Ellenberg 	int max_bio_size;
599e65f440dSLars Ellenberg 	int number, rollback_i, size;
600506afb62SLars Ellenberg 	int align, requeue = 0;
6010f0601f4SLars Ellenberg 	int i = 0;
602b411b363SPhilipp Reisner 
603b411b363SPhilipp Reisner 	if (unlikely(cancel))
60499920dc5SAndreas Gruenbacher 		return 0;
605b411b363SPhilipp Reisner 
606b30ab791SAndreas Gruenbacher 	if (device->rs_total == 0) {
607af85e8e8SLars Ellenberg 		/* empty resync? */
608b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
60999920dc5SAndreas Gruenbacher 		return 0;
610af85e8e8SLars Ellenberg 	}
611af85e8e8SLars Ellenberg 
612b30ab791SAndreas Gruenbacher 	if (!get_ldev(device)) {
613b30ab791SAndreas Gruenbacher 		/* Since we only need to access device->rsync a
614b30ab791SAndreas Gruenbacher 		   get_ldev_if_state(device,D_FAILED) would be sufficient, but
615b411b363SPhilipp Reisner 		   to continue resync with a broken disk makes no sense at
616b411b363SPhilipp Reisner 		   all */
617d0180171SAndreas Gruenbacher 		drbd_err(device, "Disk broke down during resync!\n");
61899920dc5SAndreas Gruenbacher 		return 0;
619b411b363SPhilipp Reisner 	}
620b411b363SPhilipp Reisner 
621b30ab791SAndreas Gruenbacher 	max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
622b30ab791SAndreas Gruenbacher 	number = drbd_rs_number_requests(device);
6230e49d7b0SLars Ellenberg 	if (number <= 0)
6240f0601f4SLars Ellenberg 		goto requeue;
625b411b363SPhilipp Reisner 
626b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
627506afb62SLars Ellenberg 		/* Stop generating RS requests when half of the send buffer is filled,
628506afb62SLars Ellenberg 		 * but notify TCP that we'd like to have more space. */
62944a4d551SLars Ellenberg 		mutex_lock(&connection->data.mutex);
63044a4d551SLars Ellenberg 		if (connection->data.socket) {
631506afb62SLars Ellenberg 			struct sock *sk = connection->data.socket->sk;
632506afb62SLars Ellenberg 			int queued = sk->sk_wmem_queued;
633506afb62SLars Ellenberg 			int sndbuf = sk->sk_sndbuf;
634506afb62SLars Ellenberg 			if (queued > sndbuf / 2) {
635506afb62SLars Ellenberg 				requeue = 1;
636506afb62SLars Ellenberg 				if (sk->sk_socket)
637506afb62SLars Ellenberg 					set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
638b411b363SPhilipp Reisner 			}
639506afb62SLars Ellenberg 		} else
640506afb62SLars Ellenberg 			requeue = 1;
64144a4d551SLars Ellenberg 		mutex_unlock(&connection->data.mutex);
642506afb62SLars Ellenberg 		if (requeue)
643b411b363SPhilipp Reisner 			goto requeue;
644b411b363SPhilipp Reisner 
645b411b363SPhilipp Reisner next_sector:
646b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
647b30ab791SAndreas Gruenbacher 		bit  = drbd_bm_find_next(device, device->bm_resync_fo);
648b411b363SPhilipp Reisner 
6494b0715f0SLars Ellenberg 		if (bit == DRBD_END_OF_BITMAP) {
650b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = drbd_bm_bits(device);
651b30ab791SAndreas Gruenbacher 			put_ldev(device);
65299920dc5SAndreas Gruenbacher 			return 0;
653b411b363SPhilipp Reisner 		}
654b411b363SPhilipp Reisner 
655b411b363SPhilipp Reisner 		sector = BM_BIT_TO_SECT(bit);
656b411b363SPhilipp Reisner 
657b30ab791SAndreas Gruenbacher 		if (drbd_rs_should_slow_down(device, sector) ||
658b30ab791SAndreas Gruenbacher 		    drbd_try_rs_begin_io(device, sector)) {
659b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = bit;
660b411b363SPhilipp Reisner 			goto requeue;
661b411b363SPhilipp Reisner 		}
662b30ab791SAndreas Gruenbacher 		device->bm_resync_fo = bit + 1;
663b411b363SPhilipp Reisner 
664b30ab791SAndreas Gruenbacher 		if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
665b30ab791SAndreas Gruenbacher 			drbd_rs_complete_io(device, sector);
666b411b363SPhilipp Reisner 			goto next_sector;
667b411b363SPhilipp Reisner 		}
668b411b363SPhilipp Reisner 
6691816a2b4SLars Ellenberg #if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
670b411b363SPhilipp Reisner 		/* try to find some adjacent bits.
671b411b363SPhilipp Reisner 		 * we stop if we have already the maximum req size.
672b411b363SPhilipp Reisner 		 *
673b411b363SPhilipp Reisner 		 * Additionally always align bigger requests, in order to
674b411b363SPhilipp Reisner 		 * be prepared for all stripe sizes of software RAIDs.
675b411b363SPhilipp Reisner 		 */
676b411b363SPhilipp Reisner 		align = 1;
677d207450cSPhilipp Reisner 		rollback_i = i;
6786377b923SLars Ellenberg 		while (i < number) {
6791816a2b4SLars Ellenberg 			if (size + BM_BLOCK_SIZE > max_bio_size)
680b411b363SPhilipp Reisner 				break;
681b411b363SPhilipp Reisner 
682b411b363SPhilipp Reisner 			/* Be always aligned */
683b411b363SPhilipp Reisner 			if (sector & ((1<<(align+3))-1))
684b411b363SPhilipp Reisner 				break;
685b411b363SPhilipp Reisner 
686b411b363SPhilipp Reisner 			/* do not cross extent boundaries */
687b411b363SPhilipp Reisner 			if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
688b411b363SPhilipp Reisner 				break;
689b411b363SPhilipp Reisner 			/* now, is it actually dirty, after all?
690b411b363SPhilipp Reisner 			 * caution, drbd_bm_test_bit is tri-state for some
691b411b363SPhilipp Reisner 			 * obscure reason; ( b == 0 ) would get the out-of-band
692b411b363SPhilipp Reisner 			 * only accidentally right because of the "oddly sized"
693b411b363SPhilipp Reisner 			 * adjustment below */
694b30ab791SAndreas Gruenbacher 			if (drbd_bm_test_bit(device, bit+1) != 1)
695b411b363SPhilipp Reisner 				break;
696b411b363SPhilipp Reisner 			bit++;
697b411b363SPhilipp Reisner 			size += BM_BLOCK_SIZE;
698b411b363SPhilipp Reisner 			if ((BM_BLOCK_SIZE << align) <= size)
699b411b363SPhilipp Reisner 				align++;
700b411b363SPhilipp Reisner 			i++;
701b411b363SPhilipp Reisner 		}
702b411b363SPhilipp Reisner 		/* if we merged some,
703b411b363SPhilipp Reisner 		 * reset the offset to start the next drbd_bm_find_next from */
704b411b363SPhilipp Reisner 		if (size > BM_BLOCK_SIZE)
705b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = bit + 1;
706b411b363SPhilipp Reisner #endif
707b411b363SPhilipp Reisner 
708b411b363SPhilipp Reisner 		/* adjust very last sectors, in case we are oddly sized */
709b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
710b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
711aaaba345SLars Ellenberg 
712aaaba345SLars Ellenberg 		if (device->use_csums) {
71344a4d551SLars Ellenberg 			switch (read_for_csum(peer_device, sector, size)) {
71480a40e43SLars Ellenberg 			case -EIO: /* Disk failure */
715b30ab791SAndreas Gruenbacher 				put_ldev(device);
71699920dc5SAndreas Gruenbacher 				return -EIO;
71780a40e43SLars Ellenberg 			case -EAGAIN: /* allocation failed, or ldev busy */
718b30ab791SAndreas Gruenbacher 				drbd_rs_complete_io(device, sector);
719b30ab791SAndreas Gruenbacher 				device->bm_resync_fo = BM_SECT_TO_BIT(sector);
720d207450cSPhilipp Reisner 				i = rollback_i;
721b411b363SPhilipp Reisner 				goto requeue;
72280a40e43SLars Ellenberg 			case 0:
72380a40e43SLars Ellenberg 				/* everything ok */
72480a40e43SLars Ellenberg 				break;
72580a40e43SLars Ellenberg 			default:
72680a40e43SLars Ellenberg 				BUG();
727b411b363SPhilipp Reisner 			}
728b411b363SPhilipp Reisner 		} else {
72999920dc5SAndreas Gruenbacher 			int err;
73099920dc5SAndreas Gruenbacher 
731b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
73244a4d551SLars Ellenberg 			err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
73399920dc5SAndreas Gruenbacher 						 sector, size, ID_SYNCER);
73499920dc5SAndreas Gruenbacher 			if (err) {
735d0180171SAndreas Gruenbacher 				drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
736b30ab791SAndreas Gruenbacher 				dec_rs_pending(device);
737b30ab791SAndreas Gruenbacher 				put_ldev(device);
73899920dc5SAndreas Gruenbacher 				return err;
739b411b363SPhilipp Reisner 			}
740b411b363SPhilipp Reisner 		}
741b411b363SPhilipp Reisner 	}
742b411b363SPhilipp Reisner 
743b30ab791SAndreas Gruenbacher 	if (device->bm_resync_fo >= drbd_bm_bits(device)) {
744b411b363SPhilipp Reisner 		/* last syncer _request_ was sent,
745b411b363SPhilipp Reisner 		 * but the P_RS_DATA_REPLY not yet received.  sync will end (and
746b411b363SPhilipp Reisner 		 * next sync group will resume), as soon as we receive the last
747b411b363SPhilipp Reisner 		 * resync data block, and the last bit is cleared.
748b411b363SPhilipp Reisner 		 * until then resync "work" is "inactive" ...
749b411b363SPhilipp Reisner 		 */
750b30ab791SAndreas Gruenbacher 		put_ldev(device);
75199920dc5SAndreas Gruenbacher 		return 0;
752b411b363SPhilipp Reisner 	}
753b411b363SPhilipp Reisner 
754b411b363SPhilipp Reisner  requeue:
755b30ab791SAndreas Gruenbacher 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
756b30ab791SAndreas Gruenbacher 	mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
757b30ab791SAndreas Gruenbacher 	put_ldev(device);
75899920dc5SAndreas Gruenbacher 	return 0;
759b411b363SPhilipp Reisner }
760b411b363SPhilipp Reisner 
761d448a2e1SAndreas Gruenbacher static int make_ov_request(struct drbd_device *device, int cancel)
762b411b363SPhilipp Reisner {
763b411b363SPhilipp Reisner 	int number, i, size;
764b411b363SPhilipp Reisner 	sector_t sector;
765b30ab791SAndreas Gruenbacher 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
76658ffa580SLars Ellenberg 	bool stop_sector_reached = false;
767b411b363SPhilipp Reisner 
768b411b363SPhilipp Reisner 	if (unlikely(cancel))
769b411b363SPhilipp Reisner 		return 1;
770b411b363SPhilipp Reisner 
771b30ab791SAndreas Gruenbacher 	number = drbd_rs_number_requests(device);
772b411b363SPhilipp Reisner 
773b30ab791SAndreas Gruenbacher 	sector = device->ov_position;
774b411b363SPhilipp Reisner 	for (i = 0; i < number; i++) {
77558ffa580SLars Ellenberg 		if (sector >= capacity)
776b411b363SPhilipp Reisner 			return 1;
77758ffa580SLars Ellenberg 
77858ffa580SLars Ellenberg 		/* We check for "finished" only in the reply path:
77958ffa580SLars Ellenberg 		 * w_e_end_ov_reply().
78058ffa580SLars Ellenberg 		 * We need to send at least one request out. */
78158ffa580SLars Ellenberg 		stop_sector_reached = i > 0
782b30ab791SAndreas Gruenbacher 			&& verify_can_do_stop_sector(device)
783b30ab791SAndreas Gruenbacher 			&& sector >= device->ov_stop_sector;
78458ffa580SLars Ellenberg 		if (stop_sector_reached)
78558ffa580SLars Ellenberg 			break;
786b411b363SPhilipp Reisner 
787b411b363SPhilipp Reisner 		size = BM_BLOCK_SIZE;
788b411b363SPhilipp Reisner 
789b30ab791SAndreas Gruenbacher 		if (drbd_rs_should_slow_down(device, sector) ||
790b30ab791SAndreas Gruenbacher 		    drbd_try_rs_begin_io(device, sector)) {
791b30ab791SAndreas Gruenbacher 			device->ov_position = sector;
792b411b363SPhilipp Reisner 			goto requeue;
793b411b363SPhilipp Reisner 		}
794b411b363SPhilipp Reisner 
795b411b363SPhilipp Reisner 		if (sector + (size>>9) > capacity)
796b411b363SPhilipp Reisner 			size = (capacity-sector)<<9;
797b411b363SPhilipp Reisner 
798b30ab791SAndreas Gruenbacher 		inc_rs_pending(device);
79969a22773SAndreas Gruenbacher 		if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
800b30ab791SAndreas Gruenbacher 			dec_rs_pending(device);
801b411b363SPhilipp Reisner 			return 0;
802b411b363SPhilipp Reisner 		}
803b411b363SPhilipp Reisner 		sector += BM_SECT_PER_BIT;
804b411b363SPhilipp Reisner 	}
805b30ab791SAndreas Gruenbacher 	device->ov_position = sector;
806b411b363SPhilipp Reisner 
807b411b363SPhilipp Reisner  requeue:
808b30ab791SAndreas Gruenbacher 	device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
80958ffa580SLars Ellenberg 	if (i == 0 || !stop_sector_reached)
810b30ab791SAndreas Gruenbacher 		mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
811b411b363SPhilipp Reisner 	return 1;
812b411b363SPhilipp Reisner }
813b411b363SPhilipp Reisner 
81499920dc5SAndreas Gruenbacher int w_ov_finished(struct drbd_work *w, int cancel)
815b411b363SPhilipp Reisner {
81684b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw =
81784b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device_work, w);
81884b8c06bSAndreas Gruenbacher 	struct drbd_device *device = dw->device;
81984b8c06bSAndreas Gruenbacher 	kfree(dw);
820b30ab791SAndreas Gruenbacher 	ov_out_of_sync_print(device);
821b30ab791SAndreas Gruenbacher 	drbd_resync_finished(device);
822b411b363SPhilipp Reisner 
82399920dc5SAndreas Gruenbacher 	return 0;
824b411b363SPhilipp Reisner }
825b411b363SPhilipp Reisner 
82699920dc5SAndreas Gruenbacher static int w_resync_finished(struct drbd_work *w, int cancel)
827b411b363SPhilipp Reisner {
82884b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw =
82984b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device_work, w);
83084b8c06bSAndreas Gruenbacher 	struct drbd_device *device = dw->device;
83184b8c06bSAndreas Gruenbacher 	kfree(dw);
832b411b363SPhilipp Reisner 
833b30ab791SAndreas Gruenbacher 	drbd_resync_finished(device);
834b411b363SPhilipp Reisner 
83599920dc5SAndreas Gruenbacher 	return 0;
836b411b363SPhilipp Reisner }
837b411b363SPhilipp Reisner 
838b30ab791SAndreas Gruenbacher static void ping_peer(struct drbd_device *device)
839af85e8e8SLars Ellenberg {
840a6b32bc3SAndreas Gruenbacher 	struct drbd_connection *connection = first_peer_device(device)->connection;
8412a67d8b9SPhilipp Reisner 
842bde89a9eSAndreas Gruenbacher 	clear_bit(GOT_PING_ACK, &connection->flags);
843bde89a9eSAndreas Gruenbacher 	request_ping(connection);
844bde89a9eSAndreas Gruenbacher 	wait_event(connection->ping_wait,
845bde89a9eSAndreas Gruenbacher 		   test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
846af85e8e8SLars Ellenberg }
847af85e8e8SLars Ellenberg 
848b30ab791SAndreas Gruenbacher int drbd_resync_finished(struct drbd_device *device)
849b411b363SPhilipp Reisner {
850b411b363SPhilipp Reisner 	unsigned long db, dt, dbdt;
851b411b363SPhilipp Reisner 	unsigned long n_oos;
852b411b363SPhilipp Reisner 	union drbd_state os, ns;
85384b8c06bSAndreas Gruenbacher 	struct drbd_device_work *dw;
854b411b363SPhilipp Reisner 	char *khelper_cmd = NULL;
85526525618SLars Ellenberg 	int verify_done = 0;
856b411b363SPhilipp Reisner 
857b411b363SPhilipp Reisner 	/* Remove all elements from the resync LRU. Since future actions
858b411b363SPhilipp Reisner 	 * might set bits in the (main) bitmap, then the entries in the
859b411b363SPhilipp Reisner 	 * resync LRU would be wrong. */
860b30ab791SAndreas Gruenbacher 	if (drbd_rs_del_all(device)) {
861b411b363SPhilipp Reisner 		/* In case this is not possible now, most probably because
862b411b363SPhilipp Reisner 		 * there are P_RS_DATA_REPLY Packets lingering on the worker's
863b411b363SPhilipp Reisner 		 * queue (or even the read operations for those packets
864b411b363SPhilipp Reisner 		 * is not finished by now).   Retry in 100ms. */
865b411b363SPhilipp Reisner 
86620ee6390SPhilipp Reisner 		schedule_timeout_interruptible(HZ / 10);
86784b8c06bSAndreas Gruenbacher 		dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
86884b8c06bSAndreas Gruenbacher 		if (dw) {
86984b8c06bSAndreas Gruenbacher 			dw->w.cb = w_resync_finished;
87084b8c06bSAndreas Gruenbacher 			dw->device = device;
87184b8c06bSAndreas Gruenbacher 			drbd_queue_work(&first_peer_device(device)->connection->sender_work,
87284b8c06bSAndreas Gruenbacher 					&dw->w);
873b411b363SPhilipp Reisner 			return 1;
874b411b363SPhilipp Reisner 		}
87584b8c06bSAndreas Gruenbacher 		drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
876b411b363SPhilipp Reisner 	}
877b411b363SPhilipp Reisner 
878b30ab791SAndreas Gruenbacher 	dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
879b411b363SPhilipp Reisner 	if (dt <= 0)
880b411b363SPhilipp Reisner 		dt = 1;
88158ffa580SLars Ellenberg 
882b30ab791SAndreas Gruenbacher 	db = device->rs_total;
88358ffa580SLars Ellenberg 	/* adjust for verify start and stop sectors, respective reached position */
884b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
885b30ab791SAndreas Gruenbacher 		db -= device->ov_left;
88658ffa580SLars Ellenberg 
887b411b363SPhilipp Reisner 	dbdt = Bit2KB(db/dt);
888b30ab791SAndreas Gruenbacher 	device->rs_paused /= HZ;
889b411b363SPhilipp Reisner 
890b30ab791SAndreas Gruenbacher 	if (!get_ldev(device))
891b411b363SPhilipp Reisner 		goto out;
892b411b363SPhilipp Reisner 
893b30ab791SAndreas Gruenbacher 	ping_peer(device);
894af85e8e8SLars Ellenberg 
8950500813fSAndreas Gruenbacher 	spin_lock_irq(&device->resource->req_lock);
896b30ab791SAndreas Gruenbacher 	os = drbd_read_state(device);
897b411b363SPhilipp Reisner 
89826525618SLars Ellenberg 	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
89926525618SLars Ellenberg 
900b411b363SPhilipp Reisner 	/* This protects us against multiple calls (that can happen in the presence
901b411b363SPhilipp Reisner 	   of application IO), and against connectivity loss just before we arrive here. */
902b411b363SPhilipp Reisner 	if (os.conn <= C_CONNECTED)
903b411b363SPhilipp Reisner 		goto out_unlock;
904b411b363SPhilipp Reisner 
905b411b363SPhilipp Reisner 	ns = os;
906b411b363SPhilipp Reisner 	ns.conn = C_CONNECTED;
907b411b363SPhilipp Reisner 
908d0180171SAndreas Gruenbacher 	drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
90926525618SLars Ellenberg 	     verify_done ? "Online verify" : "Resync",
910b30ab791SAndreas Gruenbacher 	     dt + device->rs_paused, device->rs_paused, dbdt);
911b411b363SPhilipp Reisner 
912b30ab791SAndreas Gruenbacher 	n_oos = drbd_bm_total_weight(device);
913b411b363SPhilipp Reisner 
914b411b363SPhilipp Reisner 	if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
915b411b363SPhilipp Reisner 		if (n_oos) {
916d0180171SAndreas Gruenbacher 			drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
917b411b363SPhilipp Reisner 			      n_oos, Bit2KB(1));
918b411b363SPhilipp Reisner 			khelper_cmd = "out-of-sync";
919b411b363SPhilipp Reisner 		}
920b411b363SPhilipp Reisner 	} else {
9210b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, (n_oos - device->rs_failed) == 0);
922b411b363SPhilipp Reisner 
923b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
924b411b363SPhilipp Reisner 			khelper_cmd = "after-resync-target";
925b411b363SPhilipp Reisner 
926aaaba345SLars Ellenberg 		if (device->use_csums && device->rs_total) {
927b30ab791SAndreas Gruenbacher 			const unsigned long s = device->rs_same_csum;
928b30ab791SAndreas Gruenbacher 			const unsigned long t = device->rs_total;
929b411b363SPhilipp Reisner 			const int ratio =
930b411b363SPhilipp Reisner 				(t == 0)     ? 0 :
931b411b363SPhilipp Reisner 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
932d0180171SAndreas Gruenbacher 			drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
933b411b363SPhilipp Reisner 			     "transferred %luK total %luK\n",
934b411b363SPhilipp Reisner 			     ratio,
935b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_same_csum),
936b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_total - device->rs_same_csum),
937b30ab791SAndreas Gruenbacher 			     Bit2KB(device->rs_total));
938b411b363SPhilipp Reisner 		}
939b411b363SPhilipp Reisner 	}
940b411b363SPhilipp Reisner 
941b30ab791SAndreas Gruenbacher 	if (device->rs_failed) {
942d0180171SAndreas Gruenbacher 		drbd_info(device, "            %lu failed blocks\n", device->rs_failed);
943b411b363SPhilipp Reisner 
944b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
945b411b363SPhilipp Reisner 			ns.disk = D_INCONSISTENT;
946b411b363SPhilipp Reisner 			ns.pdsk = D_UP_TO_DATE;
947b411b363SPhilipp Reisner 		} else {
948b411b363SPhilipp Reisner 			ns.disk = D_UP_TO_DATE;
949b411b363SPhilipp Reisner 			ns.pdsk = D_INCONSISTENT;
950b411b363SPhilipp Reisner 		}
951b411b363SPhilipp Reisner 	} else {
952b411b363SPhilipp Reisner 		ns.disk = D_UP_TO_DATE;
953b411b363SPhilipp Reisner 		ns.pdsk = D_UP_TO_DATE;
954b411b363SPhilipp Reisner 
955b411b363SPhilipp Reisner 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
956b30ab791SAndreas Gruenbacher 			if (device->p_uuid) {
957b411b363SPhilipp Reisner 				int i;
958b411b363SPhilipp Reisner 				for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
959b30ab791SAndreas Gruenbacher 					_drbd_uuid_set(device, i, device->p_uuid[i]);
960b30ab791SAndreas Gruenbacher 				drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
961b30ab791SAndreas Gruenbacher 				_drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
962b411b363SPhilipp Reisner 			} else {
963d0180171SAndreas Gruenbacher 				drbd_err(device, "device->p_uuid is NULL! BUG\n");
964b411b363SPhilipp Reisner 			}
965b411b363SPhilipp Reisner 		}
966b411b363SPhilipp Reisner 
96762b0da3aSLars Ellenberg 		if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
96862b0da3aSLars Ellenberg 			/* for verify runs, we don't update uuids here,
96962b0da3aSLars Ellenberg 			 * so there would be nothing to report. */
970b30ab791SAndreas Gruenbacher 			drbd_uuid_set_bm(device, 0UL);
971b30ab791SAndreas Gruenbacher 			drbd_print_uuids(device, "updated UUIDs");
972b30ab791SAndreas Gruenbacher 			if (device->p_uuid) {
973b411b363SPhilipp Reisner 				/* Now the two UUID sets are equal, update what we
974b411b363SPhilipp Reisner 				 * know of the peer. */
975b411b363SPhilipp Reisner 				int i;
976b411b363SPhilipp Reisner 				for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
977b30ab791SAndreas Gruenbacher 					device->p_uuid[i] = device->ldev->md.uuid[i];
978b411b363SPhilipp Reisner 			}
979b411b363SPhilipp Reisner 		}
98062b0da3aSLars Ellenberg 	}
981b411b363SPhilipp Reisner 
982b30ab791SAndreas Gruenbacher 	_drbd_set_state(device, ns, CS_VERBOSE, NULL);
983b411b363SPhilipp Reisner out_unlock:
9840500813fSAndreas Gruenbacher 	spin_unlock_irq(&device->resource->req_lock);
985b30ab791SAndreas Gruenbacher 	put_ldev(device);
986b411b363SPhilipp Reisner out:
987b30ab791SAndreas Gruenbacher 	device->rs_total  = 0;
988b30ab791SAndreas Gruenbacher 	device->rs_failed = 0;
989b30ab791SAndreas Gruenbacher 	device->rs_paused = 0;
99058ffa580SLars Ellenberg 
99158ffa580SLars Ellenberg 	/* reset start sector, if we reached end of device */
992b30ab791SAndreas Gruenbacher 	if (verify_done && device->ov_left == 0)
993b30ab791SAndreas Gruenbacher 		device->ov_start_sector = 0;
994b411b363SPhilipp Reisner 
995b30ab791SAndreas Gruenbacher 	drbd_md_sync(device);
99613d42685SLars Ellenberg 
997b411b363SPhilipp Reisner 	if (khelper_cmd)
998b30ab791SAndreas Gruenbacher 		drbd_khelper(device, khelper_cmd);
999b411b363SPhilipp Reisner 
1000b411b363SPhilipp Reisner 	return 1;
1001b411b363SPhilipp Reisner }
1002b411b363SPhilipp Reisner 
1003b411b363SPhilipp Reisner /* helper */
1004b30ab791SAndreas Gruenbacher static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
1005b411b363SPhilipp Reisner {
1006045417f7SAndreas Gruenbacher 	if (drbd_peer_req_has_active_page(peer_req)) {
1007b411b363SPhilipp Reisner 		/* This might happen if sendpage() has not finished */
1008db830c46SAndreas Gruenbacher 		int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
1009b30ab791SAndreas Gruenbacher 		atomic_add(i, &device->pp_in_use_by_net);
1010b30ab791SAndreas Gruenbacher 		atomic_sub(i, &device->pp_in_use);
10110500813fSAndreas Gruenbacher 		spin_lock_irq(&device->resource->req_lock);
1012a8cd15baSAndreas Gruenbacher 		list_add_tail(&peer_req->w.list, &device->net_ee);
10130500813fSAndreas Gruenbacher 		spin_unlock_irq(&device->resource->req_lock);
1014435f0740SLars Ellenberg 		wake_up(&drbd_pp_wait);
1015b411b363SPhilipp Reisner 	} else
1016b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1017b411b363SPhilipp Reisner }
1018b411b363SPhilipp Reisner 
1019b411b363SPhilipp Reisner /**
1020b411b363SPhilipp Reisner  * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
1021b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1022b411b363SPhilipp Reisner  * @w:		work object.
1023b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1024b411b363SPhilipp Reisner  */
102599920dc5SAndreas Gruenbacher int w_e_end_data_req(struct drbd_work *w, int cancel)
1026b411b363SPhilipp Reisner {
1027a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
10286780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
10296780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
103099920dc5SAndreas Gruenbacher 	int err;
1031b411b363SPhilipp Reisner 
1032b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1033b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1034b30ab791SAndreas Gruenbacher 		dec_unacked(device);
103599920dc5SAndreas Gruenbacher 		return 0;
1036b411b363SPhilipp Reisner 	}
1037b411b363SPhilipp Reisner 
1038db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
10396780139cSAndreas Gruenbacher 		err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
1040b411b363SPhilipp Reisner 	} else {
1041b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1042d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegDReply. sector=%llus.\n",
1043db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1044b411b363SPhilipp Reisner 
10456780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
1046b411b363SPhilipp Reisner 	}
1047b411b363SPhilipp Reisner 
1048b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1049b411b363SPhilipp Reisner 
1050b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1051b411b363SPhilipp Reisner 
105299920dc5SAndreas Gruenbacher 	if (unlikely(err))
1053d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block() failed\n");
105499920dc5SAndreas Gruenbacher 	return err;
1055b411b363SPhilipp Reisner }
1056b411b363SPhilipp Reisner 
1057b411b363SPhilipp Reisner /**
1058a209b4aeSAndreas Gruenbacher  * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
1059b411b363SPhilipp Reisner  * @w:		work object.
1060b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1061b411b363SPhilipp Reisner  */
106299920dc5SAndreas Gruenbacher int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
1063b411b363SPhilipp Reisner {
1064a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
10656780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
10666780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
106799920dc5SAndreas Gruenbacher 	int err;
1068b411b363SPhilipp Reisner 
1069b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1070b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1071b30ab791SAndreas Gruenbacher 		dec_unacked(device);
107299920dc5SAndreas Gruenbacher 		return 0;
1073b411b363SPhilipp Reisner 	}
1074b411b363SPhilipp Reisner 
1075b30ab791SAndreas Gruenbacher 	if (get_ldev_if_state(device, D_FAILED)) {
1076b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1077b30ab791SAndreas Gruenbacher 		put_ldev(device);
1078b411b363SPhilipp Reisner 	}
1079b411b363SPhilipp Reisner 
1080b30ab791SAndreas Gruenbacher 	if (device->state.conn == C_AHEAD) {
10816780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
1082db830c46SAndreas Gruenbacher 	} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1083b30ab791SAndreas Gruenbacher 		if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1084b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
10856780139cSAndreas Gruenbacher 			err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1086b411b363SPhilipp Reisner 		} else {
1087b411b363SPhilipp Reisner 			if (__ratelimit(&drbd_ratelimit_state))
1088d0180171SAndreas Gruenbacher 				drbd_err(device, "Not sending RSDataReply, "
1089b411b363SPhilipp Reisner 				    "partner DISKLESS!\n");
109099920dc5SAndreas Gruenbacher 			err = 0;
1091b411b363SPhilipp Reisner 		}
1092b411b363SPhilipp Reisner 	} else {
1093b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1094d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
1095db830c46SAndreas Gruenbacher 			    (unsigned long long)peer_req->i.sector);
1096b411b363SPhilipp Reisner 
10976780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1098b411b363SPhilipp Reisner 
1099b411b363SPhilipp Reisner 		/* update resync data with failure */
1100b30ab791SAndreas Gruenbacher 		drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
1101b411b363SPhilipp Reisner 	}
1102b411b363SPhilipp Reisner 
1103b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1104b411b363SPhilipp Reisner 
1105b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1106b411b363SPhilipp Reisner 
110799920dc5SAndreas Gruenbacher 	if (unlikely(err))
1108d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block() failed\n");
110999920dc5SAndreas Gruenbacher 	return err;
1110b411b363SPhilipp Reisner }
1111b411b363SPhilipp Reisner 
111299920dc5SAndreas Gruenbacher int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
1113b411b363SPhilipp Reisner {
1114a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
11156780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
11166780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1117b411b363SPhilipp Reisner 	struct digest_info *di;
1118b411b363SPhilipp Reisner 	int digest_size;
1119b411b363SPhilipp Reisner 	void *digest = NULL;
112099920dc5SAndreas Gruenbacher 	int err, eq = 0;
1121b411b363SPhilipp Reisner 
1122b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1123b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1124b30ab791SAndreas Gruenbacher 		dec_unacked(device);
112599920dc5SAndreas Gruenbacher 		return 0;
1126b411b363SPhilipp Reisner 	}
1127b411b363SPhilipp Reisner 
1128b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1129b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1130b30ab791SAndreas Gruenbacher 		put_ldev(device);
11311d53f09eSLars Ellenberg 	}
1132b411b363SPhilipp Reisner 
1133db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1134b411b363SPhilipp Reisner 
1135db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1136b411b363SPhilipp Reisner 		/* quick hack to try to avoid a race against reconfiguration.
1137b411b363SPhilipp Reisner 		 * a real fix would be much more involved,
1138b411b363SPhilipp Reisner 		 * introducing more locking mechanisms */
11396780139cSAndreas Gruenbacher 		if (peer_device->connection->csums_tfm) {
11406780139cSAndreas Gruenbacher 			digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
11410b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, digest_size == di->digest_size);
1142b411b363SPhilipp Reisner 			digest = kmalloc(digest_size, GFP_NOIO);
1143b411b363SPhilipp Reisner 		}
1144b411b363SPhilipp Reisner 		if (digest) {
11456780139cSAndreas Gruenbacher 			drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
1146b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1147b411b363SPhilipp Reisner 			kfree(digest);
1148b411b363SPhilipp Reisner 		}
1149b411b363SPhilipp Reisner 
1150b411b363SPhilipp Reisner 		if (eq) {
1151b30ab791SAndreas Gruenbacher 			drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
1152676396d5SLars Ellenberg 			/* rs_same_csums unit is BM_BLOCK_SIZE */
1153b30ab791SAndreas Gruenbacher 			device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
11546780139cSAndreas Gruenbacher 			err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
1155b411b363SPhilipp Reisner 		} else {
1156b30ab791SAndreas Gruenbacher 			inc_rs_pending(device);
1157db830c46SAndreas Gruenbacher 			peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1158db830c46SAndreas Gruenbacher 			peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
1159204bba99SPhilipp Reisner 			kfree(di);
11606780139cSAndreas Gruenbacher 			err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
1161b411b363SPhilipp Reisner 		}
1162b411b363SPhilipp Reisner 	} else {
11636780139cSAndreas Gruenbacher 		err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
1164b411b363SPhilipp Reisner 		if (__ratelimit(&drbd_ratelimit_state))
1165d0180171SAndreas Gruenbacher 			drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
1166b411b363SPhilipp Reisner 	}
1167b411b363SPhilipp Reisner 
1168b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1169b30ab791SAndreas Gruenbacher 	move_to_net_ee_or_free(device, peer_req);
1170b411b363SPhilipp Reisner 
117199920dc5SAndreas Gruenbacher 	if (unlikely(err))
1172d0180171SAndreas Gruenbacher 		drbd_err(device, "drbd_send_block/ack() failed\n");
117399920dc5SAndreas Gruenbacher 	return err;
1174b411b363SPhilipp Reisner }
1175b411b363SPhilipp Reisner 
117699920dc5SAndreas Gruenbacher int w_e_end_ov_req(struct drbd_work *w, int cancel)
1177b411b363SPhilipp Reisner {
1178a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
11796780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
11806780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1181db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1182db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
1183b411b363SPhilipp Reisner 	int digest_size;
1184b411b363SPhilipp Reisner 	void *digest;
118599920dc5SAndreas Gruenbacher 	int err = 0;
1186b411b363SPhilipp Reisner 
1187b411b363SPhilipp Reisner 	if (unlikely(cancel))
1188b411b363SPhilipp Reisner 		goto out;
1189b411b363SPhilipp Reisner 
11906780139cSAndreas Gruenbacher 	digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
1191b411b363SPhilipp Reisner 	digest = kmalloc(digest_size, GFP_NOIO);
11928f21420eSPhilipp Reisner 	if (!digest) {
119399920dc5SAndreas Gruenbacher 		err = 1;	/* terminate the connection in case the allocation failed */
11948f21420eSPhilipp Reisner 		goto out;
11958f21420eSPhilipp Reisner 	}
11968f21420eSPhilipp Reisner 
1197db830c46SAndreas Gruenbacher 	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
11986780139cSAndreas Gruenbacher 		drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
11998f21420eSPhilipp Reisner 	else
12008f21420eSPhilipp Reisner 		memset(digest, 0, digest_size);
12018f21420eSPhilipp Reisner 
120253ea4331SLars Ellenberg 	/* Free e and pages before send.
120353ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
120453ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
120553ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1206c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1207b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
1208db830c46SAndreas Gruenbacher 	peer_req = NULL;
1209b30ab791SAndreas Gruenbacher 	inc_rs_pending(device);
12106780139cSAndreas Gruenbacher 	err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
121199920dc5SAndreas Gruenbacher 	if (err)
1212b30ab791SAndreas Gruenbacher 		dec_rs_pending(device);
1213b411b363SPhilipp Reisner 	kfree(digest);
1214b411b363SPhilipp Reisner 
1215b411b363SPhilipp Reisner out:
1216db830c46SAndreas Gruenbacher 	if (peer_req)
1217b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1218b30ab791SAndreas Gruenbacher 	dec_unacked(device);
121999920dc5SAndreas Gruenbacher 	return err;
1220b411b363SPhilipp Reisner }
1221b411b363SPhilipp Reisner 
1222b30ab791SAndreas Gruenbacher void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
1223b411b363SPhilipp Reisner {
1224b30ab791SAndreas Gruenbacher 	if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1225b30ab791SAndreas Gruenbacher 		device->ov_last_oos_size += size>>9;
1226b411b363SPhilipp Reisner 	} else {
1227b30ab791SAndreas Gruenbacher 		device->ov_last_oos_start = sector;
1228b30ab791SAndreas Gruenbacher 		device->ov_last_oos_size = size>>9;
1229b411b363SPhilipp Reisner 	}
1230b30ab791SAndreas Gruenbacher 	drbd_set_out_of_sync(device, sector, size);
1231b411b363SPhilipp Reisner }
1232b411b363SPhilipp Reisner 
123399920dc5SAndreas Gruenbacher int w_e_end_ov_reply(struct drbd_work *w, int cancel)
1234b411b363SPhilipp Reisner {
1235a8cd15baSAndreas Gruenbacher 	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
12366780139cSAndreas Gruenbacher 	struct drbd_peer_device *peer_device = peer_req->peer_device;
12376780139cSAndreas Gruenbacher 	struct drbd_device *device = peer_device->device;
1238b411b363SPhilipp Reisner 	struct digest_info *di;
1239b411b363SPhilipp Reisner 	void *digest;
1240db830c46SAndreas Gruenbacher 	sector_t sector = peer_req->i.sector;
1241db830c46SAndreas Gruenbacher 	unsigned int size = peer_req->i.size;
124253ea4331SLars Ellenberg 	int digest_size;
124399920dc5SAndreas Gruenbacher 	int err, eq = 0;
124458ffa580SLars Ellenberg 	bool stop_sector_reached = false;
1245b411b363SPhilipp Reisner 
1246b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
1247b30ab791SAndreas Gruenbacher 		drbd_free_peer_req(device, peer_req);
1248b30ab791SAndreas Gruenbacher 		dec_unacked(device);
124999920dc5SAndreas Gruenbacher 		return 0;
1250b411b363SPhilipp Reisner 	}
1251b411b363SPhilipp Reisner 
1252b411b363SPhilipp Reisner 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1253b411b363SPhilipp Reisner 	 * the resync lru has been cleaned up already */
1254b30ab791SAndreas Gruenbacher 	if (get_ldev(device)) {
1255b30ab791SAndreas Gruenbacher 		drbd_rs_complete_io(device, peer_req->i.sector);
1256b30ab791SAndreas Gruenbacher 		put_ldev(device);
12571d53f09eSLars Ellenberg 	}
1258b411b363SPhilipp Reisner 
1259db830c46SAndreas Gruenbacher 	di = peer_req->digest;
1260b411b363SPhilipp Reisner 
1261db830c46SAndreas Gruenbacher 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
12626780139cSAndreas Gruenbacher 		digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
1263b411b363SPhilipp Reisner 		digest = kmalloc(digest_size, GFP_NOIO);
1264b411b363SPhilipp Reisner 		if (digest) {
12656780139cSAndreas Gruenbacher 			drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
1266b411b363SPhilipp Reisner 
12670b0ba1efSAndreas Gruenbacher 			D_ASSERT(device, digest_size == di->digest_size);
1268b411b363SPhilipp Reisner 			eq = !memcmp(digest, di->digest, digest_size);
1269b411b363SPhilipp Reisner 			kfree(digest);
1270b411b363SPhilipp Reisner 		}
1271b411b363SPhilipp Reisner 	}
1272b411b363SPhilipp Reisner 
12739676c760SLars Ellenberg 	/* Free peer_req and pages before send.
127453ea4331SLars Ellenberg 	 * In case we block on congestion, we could otherwise run into
127553ea4331SLars Ellenberg 	 * some distributed deadlock, if the other side blocks on
127653ea4331SLars Ellenberg 	 * congestion as well, because our receiver blocks in
1277c37c8ecfSAndreas Gruenbacher 	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
1278b30ab791SAndreas Gruenbacher 	drbd_free_peer_req(device, peer_req);
1279b411b363SPhilipp Reisner 	if (!eq)
1280b30ab791SAndreas Gruenbacher 		drbd_ov_out_of_sync_found(device, sector, size);
1281b411b363SPhilipp Reisner 	else
1282b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
1283b411b363SPhilipp Reisner 
12846780139cSAndreas Gruenbacher 	err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
1285b411b363SPhilipp Reisner 			       eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
1286b411b363SPhilipp Reisner 
1287b30ab791SAndreas Gruenbacher 	dec_unacked(device);
1288b411b363SPhilipp Reisner 
1289b30ab791SAndreas Gruenbacher 	--device->ov_left;
1290ea5442afSLars Ellenberg 
1291ea5442afSLars Ellenberg 	/* let's advance progress step marks only for every other megabyte */
1292b30ab791SAndreas Gruenbacher 	if ((device->ov_left & 0x200) == 0x200)
1293b30ab791SAndreas Gruenbacher 		drbd_advance_rs_marks(device, device->ov_left);
1294ea5442afSLars Ellenberg 
1295b30ab791SAndreas Gruenbacher 	stop_sector_reached = verify_can_do_stop_sector(device) &&
1296b30ab791SAndreas Gruenbacher 		(sector + (size>>9)) >= device->ov_stop_sector;
129758ffa580SLars Ellenberg 
1298b30ab791SAndreas Gruenbacher 	if (device->ov_left == 0 || stop_sector_reached) {
1299b30ab791SAndreas Gruenbacher 		ov_out_of_sync_print(device);
1300b30ab791SAndreas Gruenbacher 		drbd_resync_finished(device);
1301b411b363SPhilipp Reisner 	}
1302b411b363SPhilipp Reisner 
130399920dc5SAndreas Gruenbacher 	return err;
1304b411b363SPhilipp Reisner }
1305b411b363SPhilipp Reisner 
1306b6dd1a89SLars Ellenberg /* FIXME
1307b6dd1a89SLars Ellenberg  * We need to track the number of pending barrier acks,
1308b6dd1a89SLars Ellenberg  * and to be able to wait for them.
1309b6dd1a89SLars Ellenberg  * See also comment in drbd_adm_attach before drbd_suspend_io.
1310b6dd1a89SLars Ellenberg  */
1311bde89a9eSAndreas Gruenbacher static int drbd_send_barrier(struct drbd_connection *connection)
1312b411b363SPhilipp Reisner {
13139f5bdc33SAndreas Gruenbacher 	struct p_barrier *p;
1314b6dd1a89SLars Ellenberg 	struct drbd_socket *sock;
1315b411b363SPhilipp Reisner 
1316bde89a9eSAndreas Gruenbacher 	sock = &connection->data;
1317bde89a9eSAndreas Gruenbacher 	p = conn_prepare_command(connection, sock);
13189f5bdc33SAndreas Gruenbacher 	if (!p)
13199f5bdc33SAndreas Gruenbacher 		return -EIO;
1320bde89a9eSAndreas Gruenbacher 	p->barrier = connection->send.current_epoch_nr;
1321b6dd1a89SLars Ellenberg 	p->pad = 0;
1322bde89a9eSAndreas Gruenbacher 	connection->send.current_epoch_writes = 0;
1323b6dd1a89SLars Ellenberg 
1324bde89a9eSAndreas Gruenbacher 	return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
1325b411b363SPhilipp Reisner }
1326b411b363SPhilipp Reisner 
132799920dc5SAndreas Gruenbacher int w_send_write_hint(struct drbd_work *w, int cancel)
1328b411b363SPhilipp Reisner {
132984b8c06bSAndreas Gruenbacher 	struct drbd_device *device =
133084b8c06bSAndreas Gruenbacher 		container_of(w, struct drbd_device, unplug_work);
13319f5bdc33SAndreas Gruenbacher 	struct drbd_socket *sock;
13329f5bdc33SAndreas Gruenbacher 
1333b411b363SPhilipp Reisner 	if (cancel)
133499920dc5SAndreas Gruenbacher 		return 0;
1335a6b32bc3SAndreas Gruenbacher 	sock = &first_peer_device(device)->connection->data;
133669a22773SAndreas Gruenbacher 	if (!drbd_prepare_command(first_peer_device(device), sock))
13379f5bdc33SAndreas Gruenbacher 		return -EIO;
133869a22773SAndreas Gruenbacher 	return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
1339b411b363SPhilipp Reisner }
1340b411b363SPhilipp Reisner 
1341bde89a9eSAndreas Gruenbacher static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
13424eb9b3cbSLars Ellenberg {
1343bde89a9eSAndreas Gruenbacher 	if (!connection->send.seen_any_write_yet) {
1344bde89a9eSAndreas Gruenbacher 		connection->send.seen_any_write_yet = true;
1345bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_nr = epoch;
1346bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_writes = 0;
13474eb9b3cbSLars Ellenberg 	}
13484eb9b3cbSLars Ellenberg }
13494eb9b3cbSLars Ellenberg 
1350bde89a9eSAndreas Gruenbacher static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
13514eb9b3cbSLars Ellenberg {
13524eb9b3cbSLars Ellenberg 	/* re-init if first write on this connection */
1353bde89a9eSAndreas Gruenbacher 	if (!connection->send.seen_any_write_yet)
13544eb9b3cbSLars Ellenberg 		return;
1355bde89a9eSAndreas Gruenbacher 	if (connection->send.current_epoch_nr != epoch) {
1356bde89a9eSAndreas Gruenbacher 		if (connection->send.current_epoch_writes)
1357bde89a9eSAndreas Gruenbacher 			drbd_send_barrier(connection);
1358bde89a9eSAndreas Gruenbacher 		connection->send.current_epoch_nr = epoch;
13594eb9b3cbSLars Ellenberg 	}
13604eb9b3cbSLars Ellenberg }
13614eb9b3cbSLars Ellenberg 
13628f7bed77SAndreas Gruenbacher int w_send_out_of_sync(struct drbd_work *w, int cancel)
136373a01a18SPhilipp Reisner {
136473a01a18SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
136584b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
136644a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
136744a4d551SLars Ellenberg 	struct drbd_connection *const connection = peer_device->connection;
136899920dc5SAndreas Gruenbacher 	int err;
136973a01a18SPhilipp Reisner 
137073a01a18SPhilipp Reisner 	if (unlikely(cancel)) {
13718554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
137299920dc5SAndreas Gruenbacher 		return 0;
137373a01a18SPhilipp Reisner 	}
137473a01a18SPhilipp Reisner 
1375bde89a9eSAndreas Gruenbacher 	/* this time, no connection->send.current_epoch_writes++;
1376b6dd1a89SLars Ellenberg 	 * If it was sent, it was the closing barrier for the last
1377b6dd1a89SLars Ellenberg 	 * replicated epoch, before we went into AHEAD mode.
1378b6dd1a89SLars Ellenberg 	 * No more barriers will be sent, until we leave AHEAD mode again. */
1379bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1380b6dd1a89SLars Ellenberg 
138144a4d551SLars Ellenberg 	err = drbd_send_out_of_sync(peer_device, req);
13828554df1cSAndreas Gruenbacher 	req_mod(req, OOS_HANDED_TO_NETWORK);
138373a01a18SPhilipp Reisner 
138499920dc5SAndreas Gruenbacher 	return err;
138573a01a18SPhilipp Reisner }
138673a01a18SPhilipp Reisner 
1387b411b363SPhilipp Reisner /**
1388b411b363SPhilipp Reisner  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1389b411b363SPhilipp Reisner  * @w:		work object.
1390b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1391b411b363SPhilipp Reisner  */
139299920dc5SAndreas Gruenbacher int w_send_dblock(struct drbd_work *w, int cancel)
1393b411b363SPhilipp Reisner {
1394b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
139584b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
139644a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
139744a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device->connection;
139899920dc5SAndreas Gruenbacher 	int err;
1399b411b363SPhilipp Reisner 
1400b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
14018554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
140299920dc5SAndreas Gruenbacher 		return 0;
1403b411b363SPhilipp Reisner 	}
1404b411b363SPhilipp Reisner 
1405bde89a9eSAndreas Gruenbacher 	re_init_if_first_write(connection, req->epoch);
1406bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1407bde89a9eSAndreas Gruenbacher 	connection->send.current_epoch_writes++;
1408b6dd1a89SLars Ellenberg 
140944a4d551SLars Ellenberg 	err = drbd_send_dblock(peer_device, req);
141099920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1411b411b363SPhilipp Reisner 
141299920dc5SAndreas Gruenbacher 	return err;
1413b411b363SPhilipp Reisner }
1414b411b363SPhilipp Reisner 
1415b411b363SPhilipp Reisner /**
1416b411b363SPhilipp Reisner  * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1417b411b363SPhilipp Reisner  * @w:		work object.
1418b411b363SPhilipp Reisner  * @cancel:	The connection will be closed anyways
1419b411b363SPhilipp Reisner  */
142099920dc5SAndreas Gruenbacher int w_send_read_req(struct drbd_work *w, int cancel)
1421b411b363SPhilipp Reisner {
1422b411b363SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
142384b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
142444a4d551SLars Ellenberg 	struct drbd_peer_device *const peer_device = first_peer_device(device);
142544a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device->connection;
142699920dc5SAndreas Gruenbacher 	int err;
1427b411b363SPhilipp Reisner 
1428b411b363SPhilipp Reisner 	if (unlikely(cancel)) {
14298554df1cSAndreas Gruenbacher 		req_mod(req, SEND_CANCELED);
143099920dc5SAndreas Gruenbacher 		return 0;
1431b411b363SPhilipp Reisner 	}
1432b411b363SPhilipp Reisner 
1433b6dd1a89SLars Ellenberg 	/* Even read requests may close a write epoch,
1434b6dd1a89SLars Ellenberg 	 * if there was any yet. */
1435bde89a9eSAndreas Gruenbacher 	maybe_send_barrier(connection, req->epoch);
1436b6dd1a89SLars Ellenberg 
143744a4d551SLars Ellenberg 	err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
1438b411b363SPhilipp Reisner 				 (unsigned long)req);
1439b411b363SPhilipp Reisner 
144099920dc5SAndreas Gruenbacher 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1441b411b363SPhilipp Reisner 
144299920dc5SAndreas Gruenbacher 	return err;
1443b411b363SPhilipp Reisner }
1444b411b363SPhilipp Reisner 
144599920dc5SAndreas Gruenbacher int w_restart_disk_io(struct drbd_work *w, int cancel)
1446265be2d0SPhilipp Reisner {
1447265be2d0SPhilipp Reisner 	struct drbd_request *req = container_of(w, struct drbd_request, w);
144884b8c06bSAndreas Gruenbacher 	struct drbd_device *device = req->device;
1449265be2d0SPhilipp Reisner 
14500778286aSPhilipp Reisner 	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
14514dd726f0SLars Ellenberg 		drbd_al_begin_io(device, &req->i);
1452265be2d0SPhilipp Reisner 
1453265be2d0SPhilipp Reisner 	drbd_req_make_private_bio(req, req->master_bio);
1454b30ab791SAndreas Gruenbacher 	req->private_bio->bi_bdev = device->ldev->backing_bdev;
1455265be2d0SPhilipp Reisner 	generic_make_request(req->private_bio);
1456265be2d0SPhilipp Reisner 
145799920dc5SAndreas Gruenbacher 	return 0;
1458265be2d0SPhilipp Reisner }
1459265be2d0SPhilipp Reisner 
1460b30ab791SAndreas Gruenbacher static int _drbd_may_sync_now(struct drbd_device *device)
1461b411b363SPhilipp Reisner {
1462b30ab791SAndreas Gruenbacher 	struct drbd_device *odev = device;
146395f8efd0SAndreas Gruenbacher 	int resync_after;
1464b411b363SPhilipp Reisner 
1465b411b363SPhilipp Reisner 	while (1) {
1466a3f8f7dcSLars Ellenberg 		if (!odev->ldev || odev->state.disk == D_DISKLESS)
1467438c8374SPhilipp Reisner 			return 1;
1468daeda1ccSPhilipp Reisner 		rcu_read_lock();
146995f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1470daeda1ccSPhilipp Reisner 		rcu_read_unlock();
147195f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1472b411b363SPhilipp Reisner 			return 1;
1473b30ab791SAndreas Gruenbacher 		odev = minor_to_device(resync_after);
1474a3f8f7dcSLars Ellenberg 		if (!odev)
1475841ce241SAndreas Gruenbacher 			return 1;
1476b411b363SPhilipp Reisner 		if ((odev->state.conn >= C_SYNC_SOURCE &&
1477b411b363SPhilipp Reisner 		     odev->state.conn <= C_PAUSED_SYNC_T) ||
1478b411b363SPhilipp Reisner 		    odev->state.aftr_isp || odev->state.peer_isp ||
1479b411b363SPhilipp Reisner 		    odev->state.user_isp)
1480b411b363SPhilipp Reisner 			return 0;
1481b411b363SPhilipp Reisner 	}
1482b411b363SPhilipp Reisner }
1483b411b363SPhilipp Reisner 
1484b411b363SPhilipp Reisner /**
1485b411b363SPhilipp Reisner  * _drbd_pause_after() - Pause resync on all devices that may not resync now
1486b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1487b411b363SPhilipp Reisner  *
1488b411b363SPhilipp Reisner  * Called from process context only (admin command and after_state_ch).
1489b411b363SPhilipp Reisner  */
1490b30ab791SAndreas Gruenbacher static int _drbd_pause_after(struct drbd_device *device)
1491b411b363SPhilipp Reisner {
149254761697SAndreas Gruenbacher 	struct drbd_device *odev;
1493b411b363SPhilipp Reisner 	int i, rv = 0;
1494b411b363SPhilipp Reisner 
1495695d08faSPhilipp Reisner 	rcu_read_lock();
149605a10ec7SAndreas Gruenbacher 	idr_for_each_entry(&drbd_devices, odev, i) {
1497b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1498b411b363SPhilipp Reisner 			continue;
1499b411b363SPhilipp Reisner 		if (!_drbd_may_sync_now(odev))
1500b411b363SPhilipp Reisner 			rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1501b411b363SPhilipp Reisner 			       != SS_NOTHING_TO_DO);
1502b411b363SPhilipp Reisner 	}
1503695d08faSPhilipp Reisner 	rcu_read_unlock();
1504b411b363SPhilipp Reisner 
1505b411b363SPhilipp Reisner 	return rv;
1506b411b363SPhilipp Reisner }
1507b411b363SPhilipp Reisner 
1508b411b363SPhilipp Reisner /**
1509b411b363SPhilipp Reisner  * _drbd_resume_next() - Resume resync on all devices that may resync now
1510b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1511b411b363SPhilipp Reisner  *
1512b411b363SPhilipp Reisner  * Called from process context only (admin command and worker).
1513b411b363SPhilipp Reisner  */
1514b30ab791SAndreas Gruenbacher static int _drbd_resume_next(struct drbd_device *device)
1515b411b363SPhilipp Reisner {
151654761697SAndreas Gruenbacher 	struct drbd_device *odev;
1517b411b363SPhilipp Reisner 	int i, rv = 0;
1518b411b363SPhilipp Reisner 
1519695d08faSPhilipp Reisner 	rcu_read_lock();
152005a10ec7SAndreas Gruenbacher 	idr_for_each_entry(&drbd_devices, odev, i) {
1521b411b363SPhilipp Reisner 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1522b411b363SPhilipp Reisner 			continue;
1523b411b363SPhilipp Reisner 		if (odev->state.aftr_isp) {
1524b411b363SPhilipp Reisner 			if (_drbd_may_sync_now(odev))
1525b411b363SPhilipp Reisner 				rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1526b411b363SPhilipp Reisner 							CS_HARD, NULL)
1527b411b363SPhilipp Reisner 				       != SS_NOTHING_TO_DO) ;
1528b411b363SPhilipp Reisner 		}
1529b411b363SPhilipp Reisner 	}
1530695d08faSPhilipp Reisner 	rcu_read_unlock();
1531b411b363SPhilipp Reisner 	return rv;
1532b411b363SPhilipp Reisner }
1533b411b363SPhilipp Reisner 
1534b30ab791SAndreas Gruenbacher void resume_next_sg(struct drbd_device *device)
1535b411b363SPhilipp Reisner {
1536b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1537b30ab791SAndreas Gruenbacher 	_drbd_resume_next(device);
1538b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1539b411b363SPhilipp Reisner }
1540b411b363SPhilipp Reisner 
1541b30ab791SAndreas Gruenbacher void suspend_other_sg(struct drbd_device *device)
1542b411b363SPhilipp Reisner {
1543b411b363SPhilipp Reisner 	write_lock_irq(&global_state_lock);
1544b30ab791SAndreas Gruenbacher 	_drbd_pause_after(device);
1545b411b363SPhilipp Reisner 	write_unlock_irq(&global_state_lock);
1546b411b363SPhilipp Reisner }
1547b411b363SPhilipp Reisner 
1548dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
1549b30ab791SAndreas Gruenbacher enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
1550b411b363SPhilipp Reisner {
155154761697SAndreas Gruenbacher 	struct drbd_device *odev;
155295f8efd0SAndreas Gruenbacher 	int resync_after;
1553b411b363SPhilipp Reisner 
1554b411b363SPhilipp Reisner 	if (o_minor == -1)
1555b411b363SPhilipp Reisner 		return NO_ERROR;
1556a3f8f7dcSLars Ellenberg 	if (o_minor < -1 || o_minor > MINORMASK)
155795f8efd0SAndreas Gruenbacher 		return ERR_RESYNC_AFTER;
1558b411b363SPhilipp Reisner 
1559b411b363SPhilipp Reisner 	/* check for loops */
1560b30ab791SAndreas Gruenbacher 	odev = minor_to_device(o_minor);
1561b411b363SPhilipp Reisner 	while (1) {
1562b30ab791SAndreas Gruenbacher 		if (odev == device)
156395f8efd0SAndreas Gruenbacher 			return ERR_RESYNC_AFTER_CYCLE;
1564b411b363SPhilipp Reisner 
1565a3f8f7dcSLars Ellenberg 		/* You are free to depend on diskless, non-existing,
1566a3f8f7dcSLars Ellenberg 		 * or not yet/no longer existing minors.
1567a3f8f7dcSLars Ellenberg 		 * We only reject dependency loops.
1568a3f8f7dcSLars Ellenberg 		 * We cannot follow the dependency chain beyond a detached or
1569a3f8f7dcSLars Ellenberg 		 * missing minor.
1570a3f8f7dcSLars Ellenberg 		 */
1571a3f8f7dcSLars Ellenberg 		if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1572a3f8f7dcSLars Ellenberg 			return NO_ERROR;
1573a3f8f7dcSLars Ellenberg 
1574daeda1ccSPhilipp Reisner 		rcu_read_lock();
157595f8efd0SAndreas Gruenbacher 		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
1576daeda1ccSPhilipp Reisner 		rcu_read_unlock();
1577b411b363SPhilipp Reisner 		/* dependency chain ends here, no cycles. */
157895f8efd0SAndreas Gruenbacher 		if (resync_after == -1)
1579b411b363SPhilipp Reisner 			return NO_ERROR;
1580b411b363SPhilipp Reisner 
1581b411b363SPhilipp Reisner 		/* follow the dependency chain */
1582b30ab791SAndreas Gruenbacher 		odev = minor_to_device(resync_after);
1583b411b363SPhilipp Reisner 	}
1584b411b363SPhilipp Reisner }
1585b411b363SPhilipp Reisner 
1586dc97b708SPhilipp Reisner /* caller must hold global_state_lock */
1587b30ab791SAndreas Gruenbacher void drbd_resync_after_changed(struct drbd_device *device)
1588b411b363SPhilipp Reisner {
1589b411b363SPhilipp Reisner 	int changes;
1590b411b363SPhilipp Reisner 
1591b411b363SPhilipp Reisner 	do {
1592b30ab791SAndreas Gruenbacher 		changes  = _drbd_pause_after(device);
1593b30ab791SAndreas Gruenbacher 		changes |= _drbd_resume_next(device);
1594b411b363SPhilipp Reisner 	} while (changes);
1595b411b363SPhilipp Reisner }
1596b411b363SPhilipp Reisner 
1597b30ab791SAndreas Gruenbacher void drbd_rs_controller_reset(struct drbd_device *device)
15989bd28d3cSLars Ellenberg {
1599813472ceSPhilipp Reisner 	struct fifo_buffer *plan;
1600813472ceSPhilipp Reisner 
1601b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_sect_in, 0);
1602b30ab791SAndreas Gruenbacher 	atomic_set(&device->rs_sect_ev, 0);
1603b30ab791SAndreas Gruenbacher 	device->rs_in_flight = 0;
1604813472ceSPhilipp Reisner 
1605813472ceSPhilipp Reisner 	/* Updating the RCU protected object in place is necessary since
1606813472ceSPhilipp Reisner 	   this function gets called from atomic context.
1607813472ceSPhilipp Reisner 	   It is valid since all other updates also lead to an completely
1608813472ceSPhilipp Reisner 	   empty fifo */
1609813472ceSPhilipp Reisner 	rcu_read_lock();
1610b30ab791SAndreas Gruenbacher 	plan = rcu_dereference(device->rs_plan_s);
1611813472ceSPhilipp Reisner 	plan->total = 0;
1612813472ceSPhilipp Reisner 	fifo_set(plan, 0);
1613813472ceSPhilipp Reisner 	rcu_read_unlock();
16149bd28d3cSLars Ellenberg }
16159bd28d3cSLars Ellenberg 
16161f04af33SPhilipp Reisner void start_resync_timer_fn(unsigned long data)
16171f04af33SPhilipp Reisner {
1618b30ab791SAndreas Gruenbacher 	struct drbd_device *device = (struct drbd_device *) data;
1619ac0acb9eSLars Ellenberg 	drbd_device_post_work(device, RS_START);
16201f04af33SPhilipp Reisner }
16211f04af33SPhilipp Reisner 
1622ac0acb9eSLars Ellenberg static void do_start_resync(struct drbd_device *device)
16231f04af33SPhilipp Reisner {
1624b30ab791SAndreas Gruenbacher 	if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
1625ac0acb9eSLars Ellenberg 		drbd_warn(device, "postponing start_resync ...\n");
1626b30ab791SAndreas Gruenbacher 		device->start_resync_timer.expires = jiffies + HZ/10;
1627b30ab791SAndreas Gruenbacher 		add_timer(&device->start_resync_timer);
1628ac0acb9eSLars Ellenberg 		return;
16291f04af33SPhilipp Reisner 	}
16301f04af33SPhilipp Reisner 
1631b30ab791SAndreas Gruenbacher 	drbd_start_resync(device, C_SYNC_SOURCE);
1632b30ab791SAndreas Gruenbacher 	clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
16331f04af33SPhilipp Reisner }
16341f04af33SPhilipp Reisner 
1635aaaba345SLars Ellenberg static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1636aaaba345SLars Ellenberg {
1637aaaba345SLars Ellenberg 	bool csums_after_crash_only;
1638aaaba345SLars Ellenberg 	rcu_read_lock();
1639aaaba345SLars Ellenberg 	csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1640aaaba345SLars Ellenberg 	rcu_read_unlock();
1641aaaba345SLars Ellenberg 	return connection->agreed_pro_version >= 89 &&		/* supported? */
1642aaaba345SLars Ellenberg 		connection->csums_tfm &&			/* configured? */
1643aaaba345SLars Ellenberg 		(csums_after_crash_only == 0			/* use for each resync? */
1644aaaba345SLars Ellenberg 		 || test_bit(CRASHED_PRIMARY, &device->flags));	/* or only after Primary crash? */
1645aaaba345SLars Ellenberg }
1646aaaba345SLars Ellenberg 
1647b411b363SPhilipp Reisner /**
1648b411b363SPhilipp Reisner  * drbd_start_resync() - Start the resync process
1649b30ab791SAndreas Gruenbacher  * @device:	DRBD device.
1650b411b363SPhilipp Reisner  * @side:	Either C_SYNC_SOURCE or C_SYNC_TARGET
1651b411b363SPhilipp Reisner  *
1652b411b363SPhilipp Reisner  * This function might bring you directly into one of the
1653b411b363SPhilipp Reisner  * C_PAUSED_SYNC_* states.
1654b411b363SPhilipp Reisner  */
1655b30ab791SAndreas Gruenbacher void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1656b411b363SPhilipp Reisner {
165744a4d551SLars Ellenberg 	struct drbd_peer_device *peer_device = first_peer_device(device);
165844a4d551SLars Ellenberg 	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1659b411b363SPhilipp Reisner 	union drbd_state ns;
1660b411b363SPhilipp Reisner 	int r;
1661b411b363SPhilipp Reisner 
1662b30ab791SAndreas Gruenbacher 	if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
1663d0180171SAndreas Gruenbacher 		drbd_err(device, "Resync already running!\n");
1664b411b363SPhilipp Reisner 		return;
1665b411b363SPhilipp Reisner 	}
1666b411b363SPhilipp Reisner 
1667b30ab791SAndreas Gruenbacher 	if (!test_bit(B_RS_H_DONE, &device->flags)) {
1668b411b363SPhilipp Reisner 		if (side == C_SYNC_TARGET) {
1669b411b363SPhilipp Reisner 			/* Since application IO was locked out during C_WF_BITMAP_T and
1670b411b363SPhilipp Reisner 			   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1671b411b363SPhilipp Reisner 			   we check that we might make the data inconsistent. */
1672b30ab791SAndreas Gruenbacher 			r = drbd_khelper(device, "before-resync-target");
1673b411b363SPhilipp Reisner 			r = (r >> 8) & 0xff;
1674b411b363SPhilipp Reisner 			if (r > 0) {
1675d0180171SAndreas Gruenbacher 				drbd_info(device, "before-resync-target handler returned %d, "
1676b411b363SPhilipp Reisner 					 "dropping connection.\n", r);
167744a4d551SLars Ellenberg 				conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
1678b411b363SPhilipp Reisner 				return;
1679b411b363SPhilipp Reisner 			}
168009b9e797SPhilipp Reisner 		} else /* C_SYNC_SOURCE */ {
1681b30ab791SAndreas Gruenbacher 			r = drbd_khelper(device, "before-resync-source");
168209b9e797SPhilipp Reisner 			r = (r >> 8) & 0xff;
168309b9e797SPhilipp Reisner 			if (r > 0) {
168409b9e797SPhilipp Reisner 				if (r == 3) {
1685d0180171SAndreas Gruenbacher 					drbd_info(device, "before-resync-source handler returned %d, "
168609b9e797SPhilipp Reisner 						 "ignoring. Old userland tools?", r);
168709b9e797SPhilipp Reisner 				} else {
1688d0180171SAndreas Gruenbacher 					drbd_info(device, "before-resync-source handler returned %d, "
168909b9e797SPhilipp Reisner 						 "dropping connection.\n", r);
169044a4d551SLars Ellenberg 					conn_request_state(connection,
1691a6b32bc3SAndreas Gruenbacher 							   NS(conn, C_DISCONNECTING), CS_HARD);
169209b9e797SPhilipp Reisner 					return;
169309b9e797SPhilipp Reisner 				}
169409b9e797SPhilipp Reisner 			}
1695b411b363SPhilipp Reisner 		}
1696e64a3294SPhilipp Reisner 	}
1697b411b363SPhilipp Reisner 
169844a4d551SLars Ellenberg 	if (current == connection->worker.task) {
1699dad20554SPhilipp Reisner 		/* The worker should not sleep waiting for state_mutex,
1700e64a3294SPhilipp Reisner 		   that can take long */
1701b30ab791SAndreas Gruenbacher 		if (!mutex_trylock(device->state_mutex)) {
1702b30ab791SAndreas Gruenbacher 			set_bit(B_RS_H_DONE, &device->flags);
1703b30ab791SAndreas Gruenbacher 			device->start_resync_timer.expires = jiffies + HZ/5;
1704b30ab791SAndreas Gruenbacher 			add_timer(&device->start_resync_timer);
1705e64a3294SPhilipp Reisner 			return;
1706e64a3294SPhilipp Reisner 		}
1707e64a3294SPhilipp Reisner 	} else {
1708b30ab791SAndreas Gruenbacher 		mutex_lock(device->state_mutex);
1709e64a3294SPhilipp Reisner 	}
1710b30ab791SAndreas Gruenbacher 	clear_bit(B_RS_H_DONE, &device->flags);
1711b411b363SPhilipp Reisner 
1712074f4afeSLars Ellenberg 	/* req_lock: serialize with drbd_send_and_submit() and others
1713074f4afeSLars Ellenberg 	 * global_state_lock: for stable sync-after dependencies */
1714074f4afeSLars Ellenberg 	spin_lock_irq(&device->resource->req_lock);
1715074f4afeSLars Ellenberg 	write_lock(&global_state_lock);
1716a700471bSPhilipp Reisner 	/* Did some connection breakage or IO error race with us? */
1717b30ab791SAndreas Gruenbacher 	if (device->state.conn < C_CONNECTED
1718b30ab791SAndreas Gruenbacher 	|| !get_ldev_if_state(device, D_NEGOTIATING)) {
1719074f4afeSLars Ellenberg 		write_unlock(&global_state_lock);
1720074f4afeSLars Ellenberg 		spin_unlock_irq(&device->resource->req_lock);
1721b30ab791SAndreas Gruenbacher 		mutex_unlock(device->state_mutex);
1722b411b363SPhilipp Reisner 		return;
1723b411b363SPhilipp Reisner 	}
1724b411b363SPhilipp Reisner 
1725b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
1726b411b363SPhilipp Reisner 
1727b30ab791SAndreas Gruenbacher 	ns.aftr_isp = !_drbd_may_sync_now(device);
1728b411b363SPhilipp Reisner 
1729b411b363SPhilipp Reisner 	ns.conn = side;
1730b411b363SPhilipp Reisner 
1731b411b363SPhilipp Reisner 	if (side == C_SYNC_TARGET)
1732b411b363SPhilipp Reisner 		ns.disk = D_INCONSISTENT;
1733b411b363SPhilipp Reisner 	else /* side == C_SYNC_SOURCE */
1734b411b363SPhilipp Reisner 		ns.pdsk = D_INCONSISTENT;
1735b411b363SPhilipp Reisner 
1736b30ab791SAndreas Gruenbacher 	r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1737b30ab791SAndreas Gruenbacher 	ns = drbd_read_state(device);
1738b411b363SPhilipp Reisner 
1739b411b363SPhilipp Reisner 	if (ns.conn < C_CONNECTED)
1740b411b363SPhilipp Reisner 		r = SS_UNKNOWN_ERROR;
1741b411b363SPhilipp Reisner 
1742b411b363SPhilipp Reisner 	if (r == SS_SUCCESS) {
1743b30ab791SAndreas Gruenbacher 		unsigned long tw = drbd_bm_total_weight(device);
17441d7734a0SLars Ellenberg 		unsigned long now = jiffies;
17451d7734a0SLars Ellenberg 		int i;
17461d7734a0SLars Ellenberg 
1747b30ab791SAndreas Gruenbacher 		device->rs_failed    = 0;
1748b30ab791SAndreas Gruenbacher 		device->rs_paused    = 0;
1749b30ab791SAndreas Gruenbacher 		device->rs_same_csum = 0;
1750b30ab791SAndreas Gruenbacher 		device->rs_last_events = 0;
1751b30ab791SAndreas Gruenbacher 		device->rs_last_sect_ev = 0;
1752b30ab791SAndreas Gruenbacher 		device->rs_total     = tw;
1753b30ab791SAndreas Gruenbacher 		device->rs_start     = now;
17541d7734a0SLars Ellenberg 		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1755b30ab791SAndreas Gruenbacher 			device->rs_mark_left[i] = tw;
1756b30ab791SAndreas Gruenbacher 			device->rs_mark_time[i] = now;
17571d7734a0SLars Ellenberg 		}
1758b30ab791SAndreas Gruenbacher 		_drbd_pause_after(device);
17595ab7d2c0SLars Ellenberg 		/* Forget potentially stale cached per resync extent bit-counts.
17605ab7d2c0SLars Ellenberg 		 * Open coded drbd_rs_cancel_all(device), we already have IRQs
17615ab7d2c0SLars Ellenberg 		 * disabled, and know the disk state is ok. */
17625ab7d2c0SLars Ellenberg 		spin_lock(&device->al_lock);
17635ab7d2c0SLars Ellenberg 		lc_reset(device->resync);
17645ab7d2c0SLars Ellenberg 		device->resync_locked = 0;
17655ab7d2c0SLars Ellenberg 		device->resync_wenr = LC_FREE;
17665ab7d2c0SLars Ellenberg 		spin_unlock(&device->al_lock);
1767b411b363SPhilipp Reisner 	}
1768074f4afeSLars Ellenberg 	write_unlock(&global_state_lock);
1769074f4afeSLars Ellenberg 	spin_unlock_irq(&device->resource->req_lock);
17705a22db89SLars Ellenberg 
17716c922ed5SLars Ellenberg 	if (r == SS_SUCCESS) {
17725ab7d2c0SLars Ellenberg 		wake_up(&device->al_wait); /* for lc_reset() above */
1773328e0f12SPhilipp Reisner 		/* reset rs_last_bcast when a resync or verify is started,
1774328e0f12SPhilipp Reisner 		 * to deal with potential jiffies wrap. */
1775b30ab791SAndreas Gruenbacher 		device->rs_last_bcast = jiffies - HZ;
1776328e0f12SPhilipp Reisner 
1777d0180171SAndreas Gruenbacher 		drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
17786c922ed5SLars Ellenberg 		     drbd_conn_str(ns.conn),
1779b30ab791SAndreas Gruenbacher 		     (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1780b30ab791SAndreas Gruenbacher 		     (unsigned long) device->rs_total);
1781aaaba345SLars Ellenberg 		if (side == C_SYNC_TARGET) {
1782b30ab791SAndreas Gruenbacher 			device->bm_resync_fo = 0;
1783aaaba345SLars Ellenberg 			device->use_csums = use_checksum_based_resync(connection, device);
1784aaaba345SLars Ellenberg 		} else {
1785aaaba345SLars Ellenberg 			device->use_csums = 0;
1786aaaba345SLars Ellenberg 		}
17875a22db89SLars Ellenberg 
17885a22db89SLars Ellenberg 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
17895a22db89SLars Ellenberg 		 * with w_send_oos, or the sync target will get confused as to
17905a22db89SLars Ellenberg 		 * how much bits to resync.  We cannot do that always, because for an
17915a22db89SLars Ellenberg 		 * empty resync and protocol < 95, we need to do it here, as we call
17925a22db89SLars Ellenberg 		 * drbd_resync_finished from here in that case.
17935a22db89SLars Ellenberg 		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
17945a22db89SLars Ellenberg 		 * and from after_state_ch otherwise. */
179544a4d551SLars Ellenberg 		if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
179644a4d551SLars Ellenberg 			drbd_gen_and_send_sync_uuid(peer_device);
1797b411b363SPhilipp Reisner 
179844a4d551SLars Ellenberg 		if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
1799af85e8e8SLars Ellenberg 			/* This still has a race (about when exactly the peers
1800af85e8e8SLars Ellenberg 			 * detect connection loss) that can lead to a full sync
1801af85e8e8SLars Ellenberg 			 * on next handshake. In 8.3.9 we fixed this with explicit
1802af85e8e8SLars Ellenberg 			 * resync-finished notifications, but the fix
1803af85e8e8SLars Ellenberg 			 * introduces a protocol change.  Sleeping for some
1804af85e8e8SLars Ellenberg 			 * time longer than the ping interval + timeout on the
1805af85e8e8SLars Ellenberg 			 * SyncSource, to give the SyncTarget the chance to
1806af85e8e8SLars Ellenberg 			 * detect connection loss, then waiting for a ping
1807af85e8e8SLars Ellenberg 			 * response (implicit in drbd_resync_finished) reduces
1808af85e8e8SLars Ellenberg 			 * the race considerably, but does not solve it. */
180944ed167dSPhilipp Reisner 			if (side == C_SYNC_SOURCE) {
181044ed167dSPhilipp Reisner 				struct net_conf *nc;
181144ed167dSPhilipp Reisner 				int timeo;
181244ed167dSPhilipp Reisner 
181344ed167dSPhilipp Reisner 				rcu_read_lock();
181444a4d551SLars Ellenberg 				nc = rcu_dereference(connection->net_conf);
181544ed167dSPhilipp Reisner 				timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
181644ed167dSPhilipp Reisner 				rcu_read_unlock();
181744ed167dSPhilipp Reisner 				schedule_timeout_interruptible(timeo);
181844ed167dSPhilipp Reisner 			}
1819b30ab791SAndreas Gruenbacher 			drbd_resync_finished(device);
1820b411b363SPhilipp Reisner 		}
1821b411b363SPhilipp Reisner 
1822b30ab791SAndreas Gruenbacher 		drbd_rs_controller_reset(device);
1823b30ab791SAndreas Gruenbacher 		/* ns.conn may already be != device->state.conn,
1824b411b363SPhilipp Reisner 		 * we may have been paused in between, or become paused until
1825b411b363SPhilipp Reisner 		 * the timer triggers.
1826b411b363SPhilipp Reisner 		 * No matter, that is handled in resync_timer_fn() */
1827b411b363SPhilipp Reisner 		if (ns.conn == C_SYNC_TARGET)
1828b30ab791SAndreas Gruenbacher 			mod_timer(&device->resync_timer, jiffies);
1829b411b363SPhilipp Reisner 
1830b30ab791SAndreas Gruenbacher 		drbd_md_sync(device);
1831b411b363SPhilipp Reisner 	}
1832b30ab791SAndreas Gruenbacher 	put_ldev(device);
1833b30ab791SAndreas Gruenbacher 	mutex_unlock(device->state_mutex);
1834b411b363SPhilipp Reisner }
1835b411b363SPhilipp Reisner 
1836e334f550SLars Ellenberg static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
1837c7a58db4SLars Ellenberg {
1838c7a58db4SLars Ellenberg 	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1839c7a58db4SLars Ellenberg 	device->rs_last_bcast = jiffies;
1840c7a58db4SLars Ellenberg 
1841c7a58db4SLars Ellenberg 	if (!get_ldev(device))
1842c7a58db4SLars Ellenberg 		return;
1843c7a58db4SLars Ellenberg 
1844c7a58db4SLars Ellenberg 	drbd_bm_write_lazy(device, 0);
18455ab7d2c0SLars Ellenberg 	if (resync_done && is_sync_state(device->state.conn))
1846c7a58db4SLars Ellenberg 		drbd_resync_finished(device);
18475ab7d2c0SLars Ellenberg 
1848c7a58db4SLars Ellenberg 	drbd_bcast_event(device, &sib);
1849c7a58db4SLars Ellenberg 	/* update timestamp, in case it took a while to write out stuff */
1850c7a58db4SLars Ellenberg 	device->rs_last_bcast = jiffies;
1851c7a58db4SLars Ellenberg 	put_ldev(device);
1852c7a58db4SLars Ellenberg }
1853c7a58db4SLars Ellenberg 
1854e334f550SLars Ellenberg static void drbd_ldev_destroy(struct drbd_device *device)
1855e334f550SLars Ellenberg {
1856e334f550SLars Ellenberg 	lc_destroy(device->resync);
1857e334f550SLars Ellenberg 	device->resync = NULL;
1858e334f550SLars Ellenberg 	lc_destroy(device->act_log);
1859e334f550SLars Ellenberg 	device->act_log = NULL;
1860e334f550SLars Ellenberg 	__no_warn(local,
1861e334f550SLars Ellenberg 		drbd_free_ldev(device->ldev);
1862e334f550SLars Ellenberg 		device->ldev = NULL;);
1863e334f550SLars Ellenberg 	clear_bit(GOING_DISKLESS, &device->flags);
1864e334f550SLars Ellenberg 	wake_up(&device->misc_wait);
1865e334f550SLars Ellenberg }
1866e334f550SLars Ellenberg 
1867e334f550SLars Ellenberg static void go_diskless(struct drbd_device *device)
1868e334f550SLars Ellenberg {
1869e334f550SLars Ellenberg 	D_ASSERT(device, device->state.disk == D_FAILED);
1870e334f550SLars Ellenberg 	/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1871e334f550SLars Ellenberg 	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1872e334f550SLars Ellenberg 	 * the protected members anymore, though, so once put_ldev reaches zero
1873e334f550SLars Ellenberg 	 * again, it will be safe to free them. */
1874e334f550SLars Ellenberg 
1875e334f550SLars Ellenberg 	/* Try to write changed bitmap pages, read errors may have just
1876e334f550SLars Ellenberg 	 * set some bits outside the area covered by the activity log.
1877e334f550SLars Ellenberg 	 *
1878e334f550SLars Ellenberg 	 * If we have an IO error during the bitmap writeout,
1879e334f550SLars Ellenberg 	 * we will want a full sync next time, just in case.
1880e334f550SLars Ellenberg 	 * (Do we want a specific meta data flag for this?)
1881e334f550SLars Ellenberg 	 *
1882e334f550SLars Ellenberg 	 * If that does not make it to stable storage either,
1883e334f550SLars Ellenberg 	 * we cannot do anything about that anymore.
1884e334f550SLars Ellenberg 	 *
1885e334f550SLars Ellenberg 	 * We still need to check if both bitmap and ldev are present, we may
1886e334f550SLars Ellenberg 	 * end up here after a failed attach, before ldev was even assigned.
1887e334f550SLars Ellenberg 	 */
1888e334f550SLars Ellenberg 	if (device->bitmap && device->ldev) {
1889e334f550SLars Ellenberg 		/* An interrupted resync or similar is allowed to recounts bits
1890e334f550SLars Ellenberg 		 * while we detach.
1891e334f550SLars Ellenberg 		 * Any modifications would not be expected anymore, though.
1892e334f550SLars Ellenberg 		 */
1893e334f550SLars Ellenberg 		if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1894e334f550SLars Ellenberg 					"detach", BM_LOCKED_TEST_ALLOWED)) {
1895e334f550SLars Ellenberg 			if (test_bit(WAS_READ_ERROR, &device->flags)) {
1896e334f550SLars Ellenberg 				drbd_md_set_flag(device, MDF_FULL_SYNC);
1897e334f550SLars Ellenberg 				drbd_md_sync(device);
1898e334f550SLars Ellenberg 			}
1899e334f550SLars Ellenberg 		}
1900e334f550SLars Ellenberg 	}
1901e334f550SLars Ellenberg 
1902e334f550SLars Ellenberg 	drbd_force_state(device, NS(disk, D_DISKLESS));
1903e334f550SLars Ellenberg }
1904e334f550SLars Ellenberg 
1905ac0acb9eSLars Ellenberg static int do_md_sync(struct drbd_device *device)
1906ac0acb9eSLars Ellenberg {
1907ac0acb9eSLars Ellenberg 	drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1908ac0acb9eSLars Ellenberg 	drbd_md_sync(device);
1909ac0acb9eSLars Ellenberg 	return 0;
1910ac0acb9eSLars Ellenberg }
1911ac0acb9eSLars Ellenberg 
1912e334f550SLars Ellenberg #define WORK_PENDING(work_bit, todo)	(todo & (1UL << work_bit))
1913e334f550SLars Ellenberg static void do_device_work(struct drbd_device *device, const unsigned long todo)
1914e334f550SLars Ellenberg {
1915ac0acb9eSLars Ellenberg 	if (WORK_PENDING(MD_SYNC, todo))
1916ac0acb9eSLars Ellenberg 		do_md_sync(device);
1917e334f550SLars Ellenberg 	if (WORK_PENDING(RS_DONE, todo) ||
1918e334f550SLars Ellenberg 	    WORK_PENDING(RS_PROGRESS, todo))
1919e334f550SLars Ellenberg 		update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo));
1920e334f550SLars Ellenberg 	if (WORK_PENDING(GO_DISKLESS, todo))
1921e334f550SLars Ellenberg 		go_diskless(device);
1922e334f550SLars Ellenberg 	if (WORK_PENDING(DESTROY_DISK, todo))
1923e334f550SLars Ellenberg 		drbd_ldev_destroy(device);
1924ac0acb9eSLars Ellenberg 	if (WORK_PENDING(RS_START, todo))
1925ac0acb9eSLars Ellenberg 		do_start_resync(device);
1926e334f550SLars Ellenberg }
1927e334f550SLars Ellenberg 
1928e334f550SLars Ellenberg #define DRBD_DEVICE_WORK_MASK	\
1929e334f550SLars Ellenberg 	((1UL << GO_DISKLESS)	\
1930e334f550SLars Ellenberg 	|(1UL << DESTROY_DISK)	\
1931ac0acb9eSLars Ellenberg 	|(1UL << MD_SYNC)	\
1932ac0acb9eSLars Ellenberg 	|(1UL << RS_START)	\
1933e334f550SLars Ellenberg 	|(1UL << RS_PROGRESS)	\
1934e334f550SLars Ellenberg 	|(1UL << RS_DONE)	\
1935e334f550SLars Ellenberg 	)
1936e334f550SLars Ellenberg 
1937e334f550SLars Ellenberg static unsigned long get_work_bits(unsigned long *flags)
1938e334f550SLars Ellenberg {
1939e334f550SLars Ellenberg 	unsigned long old, new;
1940e334f550SLars Ellenberg 	do {
1941e334f550SLars Ellenberg 		old = *flags;
1942e334f550SLars Ellenberg 		new = old & ~DRBD_DEVICE_WORK_MASK;
1943e334f550SLars Ellenberg 	} while (cmpxchg(flags, old, new) != old);
1944e334f550SLars Ellenberg 	return old & DRBD_DEVICE_WORK_MASK;
1945e334f550SLars Ellenberg }
1946e334f550SLars Ellenberg 
1947e334f550SLars Ellenberg static void do_unqueued_work(struct drbd_connection *connection)
1948c7a58db4SLars Ellenberg {
1949c7a58db4SLars Ellenberg 	struct drbd_peer_device *peer_device;
1950c7a58db4SLars Ellenberg 	int vnr;
1951c7a58db4SLars Ellenberg 
1952c7a58db4SLars Ellenberg 	rcu_read_lock();
1953c7a58db4SLars Ellenberg 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1954c7a58db4SLars Ellenberg 		struct drbd_device *device = peer_device->device;
1955e334f550SLars Ellenberg 		unsigned long todo = get_work_bits(&device->flags);
1956e334f550SLars Ellenberg 		if (!todo)
1957c7a58db4SLars Ellenberg 			continue;
19585ab7d2c0SLars Ellenberg 
1959c7a58db4SLars Ellenberg 		kref_get(&device->kref);
1960c7a58db4SLars Ellenberg 		rcu_read_unlock();
1961e334f550SLars Ellenberg 		do_device_work(device, todo);
1962c7a58db4SLars Ellenberg 		kref_put(&device->kref, drbd_destroy_device);
1963c7a58db4SLars Ellenberg 		rcu_read_lock();
1964c7a58db4SLars Ellenberg 	}
1965c7a58db4SLars Ellenberg 	rcu_read_unlock();
1966c7a58db4SLars Ellenberg }
1967c7a58db4SLars Ellenberg 
1968a186e478SRashika Kheria static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
19698c0785a5SLars Ellenberg {
19708c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
197115e26f6aSLars Ellenberg 	list_splice_tail_init(&queue->q, work_list);
19728c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
19738c0785a5SLars Ellenberg 	return !list_empty(work_list);
19748c0785a5SLars Ellenberg }
19758c0785a5SLars Ellenberg 
1976a186e478SRashika Kheria static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
19778c0785a5SLars Ellenberg {
19788c0785a5SLars Ellenberg 	spin_lock_irq(&queue->q_lock);
19798c0785a5SLars Ellenberg 	if (!list_empty(&queue->q))
19808c0785a5SLars Ellenberg 		list_move(queue->q.next, work_list);
19818c0785a5SLars Ellenberg 	spin_unlock_irq(&queue->q_lock);
19828c0785a5SLars Ellenberg 	return !list_empty(work_list);
19838c0785a5SLars Ellenberg }
19848c0785a5SLars Ellenberg 
1985bde89a9eSAndreas Gruenbacher static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
1986b6dd1a89SLars Ellenberg {
1987b6dd1a89SLars Ellenberg 	DEFINE_WAIT(wait);
1988b6dd1a89SLars Ellenberg 	struct net_conf *nc;
1989b6dd1a89SLars Ellenberg 	int uncork, cork;
1990b6dd1a89SLars Ellenberg 
1991b6dd1a89SLars Ellenberg 	dequeue_work_item(&connection->sender_work, work_list);
1992b6dd1a89SLars Ellenberg 	if (!list_empty(work_list))
1993b6dd1a89SLars Ellenberg 		return;
1994b6dd1a89SLars Ellenberg 
1995b6dd1a89SLars Ellenberg 	/* Still nothing to do?
1996b6dd1a89SLars Ellenberg 	 * Maybe we still need to close the current epoch,
1997b6dd1a89SLars Ellenberg 	 * even if no new requests are queued yet.
1998b6dd1a89SLars Ellenberg 	 *
1999b6dd1a89SLars Ellenberg 	 * Also, poke TCP, just in case.
2000b6dd1a89SLars Ellenberg 	 * Then wait for new work (or signal). */
2001b6dd1a89SLars Ellenberg 	rcu_read_lock();
2002b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
2003b6dd1a89SLars Ellenberg 	uncork = nc ? nc->tcp_cork : 0;
2004b6dd1a89SLars Ellenberg 	rcu_read_unlock();
2005b6dd1a89SLars Ellenberg 	if (uncork) {
2006b6dd1a89SLars Ellenberg 		mutex_lock(&connection->data.mutex);
2007b6dd1a89SLars Ellenberg 		if (connection->data.socket)
2008b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
2009b6dd1a89SLars Ellenberg 		mutex_unlock(&connection->data.mutex);
2010b6dd1a89SLars Ellenberg 	}
2011b6dd1a89SLars Ellenberg 
2012b6dd1a89SLars Ellenberg 	for (;;) {
2013b6dd1a89SLars Ellenberg 		int send_barrier;
2014b6dd1a89SLars Ellenberg 		prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
20150500813fSAndreas Gruenbacher 		spin_lock_irq(&connection->resource->req_lock);
2016b6dd1a89SLars Ellenberg 		spin_lock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
2017bc317a9eSLars Ellenberg 		/* dequeue single item only,
2018bc317a9eSLars Ellenberg 		 * we still use drbd_queue_work_front() in some places */
2019bc317a9eSLars Ellenberg 		if (!list_empty(&connection->sender_work.q))
20204dd726f0SLars Ellenberg 			list_splice_tail_init(&connection->sender_work.q, work_list);
2021b6dd1a89SLars Ellenberg 		spin_unlock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
2022b6dd1a89SLars Ellenberg 		if (!list_empty(work_list) || signal_pending(current)) {
20230500813fSAndreas Gruenbacher 			spin_unlock_irq(&connection->resource->req_lock);
2024b6dd1a89SLars Ellenberg 			break;
2025b6dd1a89SLars Ellenberg 		}
2026f9c78128SLars Ellenberg 
2027f9c78128SLars Ellenberg 		/* We found nothing new to do, no to-be-communicated request,
2028f9c78128SLars Ellenberg 		 * no other work item.  We may still need to close the last
2029f9c78128SLars Ellenberg 		 * epoch.  Next incoming request epoch will be connection ->
2030f9c78128SLars Ellenberg 		 * current transfer log epoch number.  If that is different
2031f9c78128SLars Ellenberg 		 * from the epoch of the last request we communicated, it is
2032f9c78128SLars Ellenberg 		 * safe to send the epoch separating barrier now.
2033f9c78128SLars Ellenberg 		 */
2034f9c78128SLars Ellenberg 		send_barrier =
2035f9c78128SLars Ellenberg 			atomic_read(&connection->current_tle_nr) !=
2036f9c78128SLars Ellenberg 			connection->send.current_epoch_nr;
20370500813fSAndreas Gruenbacher 		spin_unlock_irq(&connection->resource->req_lock);
2038f9c78128SLars Ellenberg 
2039f9c78128SLars Ellenberg 		if (send_barrier)
2040f9c78128SLars Ellenberg 			maybe_send_barrier(connection,
2041f9c78128SLars Ellenberg 					connection->send.current_epoch_nr + 1);
20425ab7d2c0SLars Ellenberg 
2043e334f550SLars Ellenberg 		if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
20445ab7d2c0SLars Ellenberg 			break;
20455ab7d2c0SLars Ellenberg 
2046a80ca1aeSLars Ellenberg 		/* drbd_send() may have called flush_signals() */
2047a80ca1aeSLars Ellenberg 		if (get_t_state(&connection->worker) != RUNNING)
2048a80ca1aeSLars Ellenberg 			break;
20495ab7d2c0SLars Ellenberg 
2050b6dd1a89SLars Ellenberg 		schedule();
2051b6dd1a89SLars Ellenberg 		/* may be woken up for other things but new work, too,
2052b6dd1a89SLars Ellenberg 		 * e.g. if the current epoch got closed.
2053b6dd1a89SLars Ellenberg 		 * In which case we send the barrier above. */
2054b6dd1a89SLars Ellenberg 	}
2055b6dd1a89SLars Ellenberg 	finish_wait(&connection->sender_work.q_wait, &wait);
2056b6dd1a89SLars Ellenberg 
2057b6dd1a89SLars Ellenberg 	/* someone may have changed the config while we have been waiting above. */
2058b6dd1a89SLars Ellenberg 	rcu_read_lock();
2059b6dd1a89SLars Ellenberg 	nc = rcu_dereference(connection->net_conf);
2060b6dd1a89SLars Ellenberg 	cork = nc ? nc->tcp_cork : 0;
2061b6dd1a89SLars Ellenberg 	rcu_read_unlock();
2062b6dd1a89SLars Ellenberg 	mutex_lock(&connection->data.mutex);
2063b6dd1a89SLars Ellenberg 	if (connection->data.socket) {
2064b6dd1a89SLars Ellenberg 		if (cork)
2065b6dd1a89SLars Ellenberg 			drbd_tcp_cork(connection->data.socket);
2066b6dd1a89SLars Ellenberg 		else if (!uncork)
2067b6dd1a89SLars Ellenberg 			drbd_tcp_uncork(connection->data.socket);
2068b6dd1a89SLars Ellenberg 	}
2069b6dd1a89SLars Ellenberg 	mutex_unlock(&connection->data.mutex);
2070b6dd1a89SLars Ellenberg }
2071b6dd1a89SLars Ellenberg 
2072b411b363SPhilipp Reisner int drbd_worker(struct drbd_thread *thi)
2073b411b363SPhilipp Reisner {
2074bde89a9eSAndreas Gruenbacher 	struct drbd_connection *connection = thi->connection;
20756db7e50aSAndreas Gruenbacher 	struct drbd_work *w = NULL;
2076c06ece6bSAndreas Gruenbacher 	struct drbd_peer_device *peer_device;
2077b411b363SPhilipp Reisner 	LIST_HEAD(work_list);
20788c0785a5SLars Ellenberg 	int vnr;
2079b411b363SPhilipp Reisner 
2080e77a0a5cSAndreas Gruenbacher 	while (get_t_state(thi) == RUNNING) {
208180822284SPhilipp Reisner 		drbd_thread_current_set_cpu(thi);
2082b411b363SPhilipp Reisner 
20838c0785a5SLars Ellenberg 		if (list_empty(&work_list))
2084bde89a9eSAndreas Gruenbacher 			wait_for_work(connection, &work_list);
2085b411b363SPhilipp Reisner 
2086e334f550SLars Ellenberg 		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags))
2087e334f550SLars Ellenberg 			do_unqueued_work(connection);
20885ab7d2c0SLars Ellenberg 
20898c0785a5SLars Ellenberg 		if (signal_pending(current)) {
2090b411b363SPhilipp Reisner 			flush_signals(current);
209119393e10SPhilipp Reisner 			if (get_t_state(thi) == RUNNING) {
20921ec861ebSAndreas Gruenbacher 				drbd_warn(connection, "Worker got an unexpected signal\n");
2093b411b363SPhilipp Reisner 				continue;
209419393e10SPhilipp Reisner 			}
2095b411b363SPhilipp Reisner 			break;
2096b411b363SPhilipp Reisner 		}
2097b411b363SPhilipp Reisner 
2098e77a0a5cSAndreas Gruenbacher 		if (get_t_state(thi) != RUNNING)
2099b411b363SPhilipp Reisner 			break;
2100b411b363SPhilipp Reisner 
21018c0785a5SLars Ellenberg 		while (!list_empty(&work_list)) {
21026db7e50aSAndreas Gruenbacher 			w = list_first_entry(&work_list, struct drbd_work, list);
21036db7e50aSAndreas Gruenbacher 			list_del_init(&w->list);
21046db7e50aSAndreas Gruenbacher 			if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
21058c0785a5SLars Ellenberg 				continue;
2106bde89a9eSAndreas Gruenbacher 			if (connection->cstate >= C_WF_REPORT_PARAMS)
2107bde89a9eSAndreas Gruenbacher 				conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
2108b411b363SPhilipp Reisner 		}
2109b411b363SPhilipp Reisner 	}
2110b411b363SPhilipp Reisner 
21118c0785a5SLars Ellenberg 	do {
2112e334f550SLars Ellenberg 		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags))
2113e334f550SLars Ellenberg 			do_unqueued_work(connection);
2114b411b363SPhilipp Reisner 		while (!list_empty(&work_list)) {
21156db7e50aSAndreas Gruenbacher 			w = list_first_entry(&work_list, struct drbd_work, list);
21166db7e50aSAndreas Gruenbacher 			list_del_init(&w->list);
21176db7e50aSAndreas Gruenbacher 			w->cb(w, 1);
2118b411b363SPhilipp Reisner 		}
2119bde89a9eSAndreas Gruenbacher 		dequeue_work_batch(&connection->sender_work, &work_list);
2120e334f550SLars Ellenberg 	} while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
2121b411b363SPhilipp Reisner 
2122c141ebdaSPhilipp Reisner 	rcu_read_lock();
2123c06ece6bSAndreas Gruenbacher 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2124c06ece6bSAndreas Gruenbacher 		struct drbd_device *device = peer_device->device;
21250b0ba1efSAndreas Gruenbacher 		D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
2126b30ab791SAndreas Gruenbacher 		kref_get(&device->kref);
2127c141ebdaSPhilipp Reisner 		rcu_read_unlock();
2128b30ab791SAndreas Gruenbacher 		drbd_device_cleanup(device);
212905a10ec7SAndreas Gruenbacher 		kref_put(&device->kref, drbd_destroy_device);
2130c141ebdaSPhilipp Reisner 		rcu_read_lock();
21310e29d163SPhilipp Reisner 	}
2132c141ebdaSPhilipp Reisner 	rcu_read_unlock();
2133b411b363SPhilipp Reisner 
2134b411b363SPhilipp Reisner 	return 0;
2135b411b363SPhilipp Reisner }
2136